diff options
Diffstat (limited to 'src')
171 files changed, 41671 insertions, 13056 deletions
diff --git a/src/.gitignore b/src/.gitignore index fc94e82c..1a30573c 100644 --- a/src/.gitignore +++ b/src/.gitignore | |||
@@ -4,4 +4,4 @@ lj_ffdef.h | |||
4 | lj_libdef.h | 4 | lj_libdef.h |
5 | lj_recdef.h | 5 | lj_recdef.h |
6 | lj_folddef.h | 6 | lj_folddef.h |
7 | lj_vm.s | 7 | lj_vm.[sS] |
diff --git a/src/Makefile b/src/Makefile index c4d0b14d..30d64be2 100644 --- a/src/Makefile +++ b/src/Makefile | |||
@@ -11,8 +11,8 @@ | |||
11 | ############################################################################## | 11 | ############################################################################## |
12 | 12 | ||
13 | MAJVER= 2 | 13 | MAJVER= 2 |
14 | MINVER= 0 | 14 | MINVER= 1 |
15 | RELVER= 5 | 15 | RELVER= 0 |
16 | ABIVER= 5.1 | 16 | ABIVER= 5.1 |
17 | NODOTABIVER= 51 | 17 | NODOTABIVER= 51 |
18 | 18 | ||
@@ -44,17 +44,14 @@ CCOPT= -O2 -fomit-frame-pointer | |||
44 | # | 44 | # |
45 | # Target-specific compiler options: | 45 | # Target-specific compiler options: |
46 | # | 46 | # |
47 | # x86 only: it's recommended to compile at least for i686. Better yet, | ||
48 | # compile for an architecture that has SSE2, too (-msse -msse2). | ||
49 | # | ||
50 | # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute | 47 | # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute |
51 | # the binaries to a different machine you could also use: -march=native | 48 | # the binaries to a different machine you could also use: -march=native |
52 | # | 49 | # |
53 | CCOPT_x86= -march=i686 | 50 | CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse |
54 | CCOPT_x64= | 51 | CCOPT_x64= |
55 | CCOPT_arm= | 52 | CCOPT_arm= |
53 | CCOPT_arm64= | ||
56 | CCOPT_ppc= | 54 | CCOPT_ppc= |
57 | CCOPT_ppcspe= | ||
58 | CCOPT_mips= | 55 | CCOPT_mips= |
59 | # | 56 | # |
60 | CCDEBUG= | 57 | CCDEBUG= |
@@ -113,6 +110,9 @@ XCFLAGS= | |||
113 | #XCFLAGS+= -DLUAJIT_NUMMODE=1 | 110 | #XCFLAGS+= -DLUAJIT_NUMMODE=1 |
114 | #XCFLAGS+= -DLUAJIT_NUMMODE=2 | 111 | #XCFLAGS+= -DLUAJIT_NUMMODE=2 |
115 | # | 112 | # |
113 | # Disable LJ_GC64 mode for x64. | ||
114 | #XCFLAGS+= -DLUAJIT_DISABLE_GC64 | ||
115 | # | ||
116 | ############################################################################## | 116 | ############################################################################## |
117 | 117 | ||
118 | ############################################################################## | 118 | ############################################################################## |
@@ -124,15 +124,14 @@ XCFLAGS= | |||
124 | # | 124 | # |
125 | # Use the system provided memory allocator (realloc) instead of the | 125 | # Use the system provided memory allocator (realloc) instead of the |
126 | # bundled memory allocator. This is slower, but sometimes helpful for | 126 | # bundled memory allocator. This is slower, but sometimes helpful for |
127 | # debugging. This option cannot be enabled on x64, since realloc usually | 127 | # debugging. This option cannot be enabled on x64 without GC64, since |
128 | # doesn't return addresses in the right address range. | 128 | # realloc usually doesn't return addresses in the right address range. |
129 | # OTOH this option is mandatory for Valgrind's memcheck tool on x64 and | 129 | # OTOH this option is mandatory for Valgrind's memcheck tool on x64 and |
130 | # the only way to get useful results from it for all other architectures. | 130 | # the only way to get useful results from it for all other architectures. |
131 | #XCFLAGS+= -DLUAJIT_USE_SYSMALLOC | 131 | #XCFLAGS+= -DLUAJIT_USE_SYSMALLOC |
132 | # | 132 | # |
133 | # This define is required to run LuaJIT under Valgrind. The Valgrind | 133 | # This define is required to run LuaJIT under Valgrind. The Valgrind |
134 | # header files must be installed. You should enable debug information, too. | 134 | # header files must be installed. You should enable debug information, too. |
135 | # Use --suppressions=lj.supp to avoid some false positives. | ||
136 | #XCFLAGS+= -DLUAJIT_USE_VALGRIND | 135 | #XCFLAGS+= -DLUAJIT_USE_VALGRIND |
137 | # | 136 | # |
138 | # This is the client for the GDB JIT API. GDB 7.0 or higher is required | 137 | # This is the client for the GDB JIT API. GDB 7.0 or higher is required |
@@ -189,7 +188,8 @@ endif | |||
189 | # make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows | 188 | # make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows |
190 | # make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- | 189 | # make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- |
191 | 190 | ||
192 | CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS) | 191 | ASOPTIONS= $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS) |
192 | CCOPTIONS= $(CCDEBUG) $(ASOPTIONS) | ||
193 | LDOPTIONS= $(CCDEBUG) $(LDFLAGS) | 193 | LDOPTIONS= $(CCDEBUG) $(LDFLAGS) |
194 | 194 | ||
195 | HOST_CC= $(CC) | 195 | HOST_CC= $(CC) |
@@ -229,6 +229,7 @@ TARGET_XLDFLAGS= | |||
229 | TARGET_XLIBS= -lm | 229 | TARGET_XLIBS= -lm |
230 | TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) | 230 | TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) |
231 | TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) | 231 | TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) |
232 | TARGET_ASFLAGS= $(ASOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) | ||
232 | TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS) | 233 | TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS) |
233 | TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS) | 234 | TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS) |
234 | TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) | 235 | TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) |
@@ -243,17 +244,29 @@ else | |||
243 | ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) | 244 | ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) |
244 | TARGET_LJARCH= arm | 245 | TARGET_LJARCH= arm |
245 | else | 246 | else |
247 | ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) | ||
248 | ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH))) | ||
249 | TARGET_ARCH= -D__AARCH64EB__=1 | ||
250 | endif | ||
251 | TARGET_LJARCH= arm64 | ||
252 | else | ||
246 | ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) | 253 | ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) |
254 | ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH))) | ||
255 | TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE | ||
256 | else | ||
257 | TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_BE | ||
258 | endif | ||
247 | TARGET_LJARCH= ppc | 259 | TARGET_LJARCH= ppc |
248 | else | 260 | else |
249 | ifneq (,$(findstring LJ_TARGET_PPCSPE ,$(TARGET_TESTARCH))) | ||
250 | TARGET_LJARCH= ppcspe | ||
251 | else | ||
252 | ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) | 261 | ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) |
253 | ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH))) | 262 | ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH))) |
254 | TARGET_ARCH= -D__MIPSEL__=1 | 263 | TARGET_ARCH= -D__MIPSEL__=1 |
255 | endif | 264 | endif |
256 | TARGET_LJARCH= mips | 265 | ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH))) |
266 | TARGET_LJARCH= mips64 | ||
267 | else | ||
268 | TARGET_LJARCH= mips | ||
269 | endif | ||
257 | else | 270 | else |
258 | $(error Unsupported target architecture) | 271 | $(error Unsupported target architecture) |
259 | endif | 272 | endif |
@@ -267,6 +280,7 @@ ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) | |||
267 | TARGET_SYS= PS3 | 280 | TARGET_SYS= PS3 |
268 | TARGET_ARCH+= -D__CELLOS_LV2__ | 281 | TARGET_ARCH+= -D__CELLOS_LV2__ |
269 | TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC | 282 | TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC |
283 | TARGET_XLIBS+= -lpthread | ||
270 | endif | 284 | endif |
271 | 285 | ||
272 | TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH)) | 286 | TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH)) |
@@ -306,20 +320,27 @@ ifeq (Darwin,$(TARGET_SYS)) | |||
306 | $(error missing: export MACOSX_DEPLOYMENT_TARGET=XX.YY) | 320 | $(error missing: export MACOSX_DEPLOYMENT_TARGET=XX.YY) |
307 | endif | 321 | endif |
308 | TARGET_STRIP+= -x | 322 | TARGET_STRIP+= -x |
323 | TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL | ||
309 | TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC | 324 | TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC |
310 | TARGET_DYNXLDOPTS= | 325 | TARGET_DYNXLDOPTS= |
311 | TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) | 326 | TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) |
312 | ifeq (x64,$(TARGET_LJARCH)) | ||
313 | TARGET_XLDFLAGS+= -pagezero_size 10000 -image_base 100000000 | ||
314 | TARGET_XSHLDFLAGS+= -image_base 7fff04c4a000 | ||
315 | endif | ||
316 | else | 327 | else |
317 | ifeq (iOS,$(TARGET_SYS)) | 328 | ifeq (iOS,$(TARGET_SYS)) |
318 | TARGET_STRIP+= -x | 329 | TARGET_STRIP+= -x |
319 | TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC | 330 | TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC |
320 | TARGET_DYNXLDOPTS= | 331 | TARGET_DYNXLDOPTS= |
321 | TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) | 332 | TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) |
333 | ifeq (arm64,$(TARGET_LJARCH)) | ||
334 | TARGET_XCFLAGS+= -fno-omit-frame-pointer | ||
335 | endif | ||
322 | else | 336 | else |
337 | ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH))) | ||
338 | # Find out whether the target toolchain always generates unwind tables. | ||
339 | TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && { grep -qa -e eh_frame -e __unwind_info tmpunwind.o || grep -qU -e eh_frame -e __unwind_info tmpunwind.o; } && echo E; rm -f tmpunwind.o) | ||
340 | ifneq (,$(findstring E,$(TARGET_TESTUNWIND))) | ||
341 | TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL | ||
342 | endif | ||
343 | endif | ||
323 | ifneq (SunOS,$(TARGET_SYS)) | 344 | ifneq (SunOS,$(TARGET_SYS)) |
324 | ifneq (PS3,$(TARGET_SYS)) | 345 | ifneq (PS3,$(TARGET_SYS)) |
325 | TARGET_XLDFLAGS+= -Wl,-E | 346 | TARGET_XLDFLAGS+= -Wl,-E |
@@ -346,7 +367,7 @@ ifneq ($(HOST_SYS),$(TARGET_SYS)) | |||
346 | HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX | 367 | HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX |
347 | else | 368 | else |
348 | ifeq (iOS,$(TARGET_SYS)) | 369 | ifeq (iOS,$(TARGET_SYS)) |
349 | HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX | 370 | HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX -DTARGET_OS_IPHONE=1 |
350 | else | 371 | else |
351 | HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER | 372 | HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER |
352 | endif | 373 | endif |
@@ -379,6 +400,11 @@ DASM_XFLAGS= | |||
379 | DASM_AFLAGS= | 400 | DASM_AFLAGS= |
380 | DASM_ARCH= $(TARGET_LJARCH) | 401 | DASM_ARCH= $(TARGET_LJARCH) |
381 | 402 | ||
403 | ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH))) | ||
404 | DASM_AFLAGS+= -D ENDIAN_LE | ||
405 | else | ||
406 | DASM_AFLAGS+= -D ENDIAN_BE | ||
407 | endif | ||
382 | ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH))) | 408 | ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH))) |
383 | DASM_AFLAGS+= -D P64 | 409 | DASM_AFLAGS+= -D P64 |
384 | endif | 410 | endif |
@@ -411,19 +437,19 @@ DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subs | |||
411 | ifeq (Windows,$(TARGET_SYS)) | 437 | ifeq (Windows,$(TARGET_SYS)) |
412 | DASM_AFLAGS+= -D WIN | 438 | DASM_AFLAGS+= -D WIN |
413 | endif | 439 | endif |
414 | ifeq (x86,$(TARGET_LJARCH)) | ||
415 | ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH))) | ||
416 | DASM_AFLAGS+= -D SSE | ||
417 | endif | ||
418 | else | ||
419 | ifeq (x64,$(TARGET_LJARCH)) | 440 | ifeq (x64,$(TARGET_LJARCH)) |
420 | DASM_ARCH= x86 | 441 | ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH))) |
442 | DASM_ARCH= x86 | ||
443 | endif | ||
421 | else | 444 | else |
422 | ifeq (arm,$(TARGET_LJARCH)) | 445 | ifeq (arm,$(TARGET_LJARCH)) |
423 | ifeq (iOS,$(TARGET_SYS)) | 446 | ifeq (iOS,$(TARGET_SYS)) |
424 | DASM_AFLAGS+= -D IOS | 447 | DASM_AFLAGS+= -D IOS |
425 | endif | 448 | endif |
426 | else | 449 | else |
450 | ifneq (,$(findstring LJ_TARGET_MIPSR6 ,$(TARGET_TESTARCH))) | ||
451 | DASM_AFLAGS+= -D MIPSR6 | ||
452 | endif | ||
427 | ifeq (ppc,$(TARGET_LJARCH)) | 453 | ifeq (ppc,$(TARGET_LJARCH)) |
428 | ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) | 454 | ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) |
429 | DASM_AFLAGS+= -D SQRT | 455 | DASM_AFLAGS+= -D SQRT |
@@ -431,7 +457,7 @@ ifeq (ppc,$(TARGET_LJARCH)) | |||
431 | ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH))) | 457 | ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH))) |
432 | DASM_AFLAGS+= -D ROUND | 458 | DASM_AFLAGS+= -D ROUND |
433 | endif | 459 | endif |
434 | ifneq (,$(findstring LJ_ARCH_PPC64 1,$(TARGET_TESTARCH))) | 460 | ifneq (,$(findstring LJ_ARCH_PPC32ON64 1,$(TARGET_TESTARCH))) |
435 | DASM_AFLAGS+= -D GPR64 | 461 | DASM_AFLAGS+= -D GPR64 |
436 | endif | 462 | endif |
437 | ifeq (PS3,$(TARGET_SYS)) | 463 | ifeq (PS3,$(TARGET_SYS)) |
@@ -440,7 +466,6 @@ ifeq (ppc,$(TARGET_LJARCH)) | |||
440 | endif | 466 | endif |
441 | endif | 467 | endif |
442 | endif | 468 | endif |
443 | endif | ||
444 | 469 | ||
445 | DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) | 470 | DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) |
446 | DASM_DASC= vm_$(DASM_ARCH).dasc | 471 | DASM_DASC= vm_$(DASM_ARCH).dasc |
@@ -453,19 +478,22 @@ BUILDVM_X= $(BUILDVM_T) | |||
453 | HOST_O= $(MINILUA_O) $(BUILDVM_O) | 478 | HOST_O= $(MINILUA_O) $(BUILDVM_O) |
454 | HOST_T= $(MINILUA_T) $(BUILDVM_T) | 479 | HOST_T= $(MINILUA_T) $(BUILDVM_T) |
455 | 480 | ||
456 | LJVM_S= lj_vm.s | 481 | LJVM_S= lj_vm.S |
457 | LJVM_O= lj_vm.o | 482 | LJVM_O= lj_vm.o |
458 | LJVM_BOUT= $(LJVM_S) | 483 | LJVM_BOUT= $(LJVM_S) |
459 | LJVM_MODE= elfasm | 484 | LJVM_MODE= elfasm |
460 | 485 | ||
461 | LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \ | 486 | LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \ |
462 | lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o | 487 | lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o \ |
488 | lib_buffer.o | ||
463 | LJLIB_C= $(LJLIB_O:.o=.c) | 489 | LJLIB_C= $(LJLIB_O:.o=.c) |
464 | 490 | ||
465 | LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \ | 491 | LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ |
466 | lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ | 492 | lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ |
467 | lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ | 493 | lj_prng.o lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o \ |
468 | lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ | 494 | lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_serialize.o \ |
495 | lj_api.o lj_profile.o \ | ||
496 | lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ | ||
469 | lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ | 497 | lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ |
470 | lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ | 498 | lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ |
471 | lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ | 499 | lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ |
@@ -580,12 +608,15 @@ E= @echo | |||
580 | default all: $(TARGET_T) | 608 | default all: $(TARGET_T) |
581 | 609 | ||
582 | amalg: | 610 | amalg: |
583 | @grep "^[+|]" ljamalg.c | ||
584 | $(MAKE) all "LJCORE_O=ljamalg.o" | 611 | $(MAKE) all "LJCORE_O=ljamalg.o" |
585 | 612 | ||
586 | clean: | 613 | clean: |
587 | $(HOST_RM) $(ALL_RM) | 614 | $(HOST_RM) $(ALL_RM) |
588 | 615 | ||
616 | libbc: | ||
617 | ./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C) | ||
618 | $(MAKE) all | ||
619 | |||
589 | depend: | 620 | depend: |
590 | @for file in $(ALL_HDRGEN); do \ | 621 | @for file in $(ALL_HDRGEN); do \ |
591 | test -f $$file || touch $$file; \ | 622 | test -f $$file || touch $$file; \ |
@@ -600,7 +631,7 @@ depend: | |||
600 | test -s $$file || $(HOST_RM) $$file; \ | 631 | test -s $$file || $(HOST_RM) $$file; \ |
601 | done | 632 | done |
602 | 633 | ||
603 | .PHONY: default all amalg clean depend | 634 | .PHONY: default all amalg clean libbc depend |
604 | 635 | ||
605 | ############################################################################## | 636 | ############################################################################## |
606 | # Rules for generated files. | 637 | # Rules for generated files. |
@@ -610,7 +641,7 @@ $(MINILUA_T): $(MINILUA_O) | |||
610 | $(E) "HOSTLINK $@" | 641 | $(E) "HOSTLINK $@" |
611 | $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS) | 642 | $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS) |
612 | 643 | ||
613 | host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) lj_arch.h lua.h luaconf.h | 644 | host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua lj_arch.h lua.h luaconf.h |
614 | $(E) "DYNASM $@" | 645 | $(E) "DYNASM $@" |
615 | $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC) | 646 | $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC) |
616 | 647 | ||
@@ -657,10 +688,10 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c | |||
657 | $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< | 688 | $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< |
658 | $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< | 689 | $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< |
659 | 690 | ||
660 | %.o: %.s | 691 | %.o: %.S |
661 | $(E) "ASM $@" | 692 | $(E) "ASM $@" |
662 | $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< | 693 | $(Q)$(TARGET_DYNCC) $(TARGET_ASFLAGS) -c -o $(@:.o=_dyn.o) $< |
663 | $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< | 694 | $(Q)$(TARGET_CC) $(TARGET_ASFLAGS) -c -o $@ $< |
664 | 695 | ||
665 | $(LUAJIT_O): | 696 | $(LUAJIT_O): |
666 | $(E) "CC $@" | 697 | $(E) "CC $@" |
diff --git a/src/Makefile.dep b/src/Makefile.dep index 9e14d617..1ad6701a 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep | |||
@@ -1,66 +1,79 @@ | |||
1 | lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ | 1 | lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ |
2 | lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \ | 2 | lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \ |
3 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h | 3 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h |
4 | lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | 4 | lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ |
5 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ | 5 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h \ |
6 | lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \ | 6 | lj_str.h lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \ |
7 | lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ | 7 | lj_cconv.h lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h \ |
8 | lj_lib.h lj_libdef.h | 8 | lj_strscan.h lj_strfmt.h lj_lib.h lj_libdef.h |
9 | lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ | 9 | lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ |
10 | lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h | 10 | lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \ |
11 | lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \ | ||
12 | lj_ffdef.h lj_lib.h lj_libdef.h | ||
13 | lib_buffer.o: lib_buffer.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | ||
14 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ | ||
15 | lj_tab.h lj_udata.h lj_meta.h lj_ctype.h lj_cdata.h lj_cconv.h \ | ||
16 | lj_strfmt.h lj_serialize.h lj_lib.h lj_libdef.h | ||
11 | lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | 17 | lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ |
12 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ | 18 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ |
13 | lj_libdef.h | 19 | lj_libdef.h |
14 | lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ | 20 | lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ |
15 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \ | 21 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \ |
16 | lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \ | 22 | lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \ |
17 | lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h | 23 | lj_ccallback.h lj_clib.h lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h \ |
24 | lj_libdef.h | ||
18 | lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h | 25 | lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h |
19 | lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ | 26 | lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ |
20 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ff.h \ | 27 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \ |
21 | lj_ffdef.h lj_lib.h lj_libdef.h | 28 | lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h |
22 | lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ | 29 | lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ |
23 | lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \ | 30 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \ |
24 | lj_bc.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_target.h \ | 31 | lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ |
25 | lj_target_*.h lj_dispatch.h lj_vm.h lj_vmevent.h lj_lib.h luajit.h \ | 32 | lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \ |
26 | lj_libdef.h | 33 | lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h |
27 | lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | 34 | lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ |
28 | lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h | 35 | lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_prng.h lj_libdef.h |
29 | lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ | 36 | lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ |
30 | lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h | 37 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \ |
38 | lj_libdef.h | ||
31 | lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | 39 | lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ |
32 | lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h | 40 | lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h |
33 | lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | 41 | lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ |
34 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \ | 42 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ |
35 | lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h lj_char.h \ | 43 | lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \ |
36 | lj_lib.h lj_libdef.h | 44 | lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h |
37 | lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | 45 | lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ |
38 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \ | 46 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ |
39 | lj_libdef.h | 47 | lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h |
40 | lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h | 48 | lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h \ |
49 | lj_prng.h | ||
41 | lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 50 | lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
42 | lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ | 51 | lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ |
43 | lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ | 52 | lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ |
44 | lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h | 53 | lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h |
45 | lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 54 | lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
46 | lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \ | 55 | lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \ |
47 | lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \ | 56 | lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \ |
48 | lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h lj_emit_*.h \ | 57 | lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \ |
49 | lj_asm_*.h | 58 | lj_emit_*.h lj_asm_*.h |
59 | lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h | ||
50 | lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ | 60 | lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ |
51 | lj_bcdef.h | 61 | lj_bcdef.h |
52 | lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 62 | lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
53 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_bc.h lj_ctype.h \ | 63 | lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h \ |
54 | lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h | 64 | lj_ctype.h lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h \ |
65 | lj_strfmt.h | ||
55 | lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 66 | lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
56 | lj_gc.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h lj_ir.h \ | 67 | lj_gc.h lj_buf.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h \ |
57 | lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h | 68 | lj_ir.h lj_strfmt.h lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h |
69 | lj_buf.o: lj_buf.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | ||
70 | lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_strfmt.h | ||
58 | lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 71 | lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
59 | lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \ | 72 | lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \ |
60 | lj_cdata.h lj_carith.h | 73 | lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h |
61 | lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 74 | lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
62 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ | 75 | lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h \ |
63 | lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ | 76 | lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ |
64 | lj_traceerr.h | 77 | lj_traceerr.h |
65 | lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \ | 78 | lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \ |
66 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \ | 79 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \ |
@@ -68,110 +81,127 @@ lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \ | |||
68 | lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \ | 81 | lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \ |
69 | lj_traceerr.h lj_vm.h | 82 | lj_traceerr.h lj_vm.h |
70 | lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 83 | lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
71 | lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \ | 84 | lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ctype.h \ |
72 | lj_ccallback.h | 85 | lj_cdata.h lj_cconv.h lj_ccallback.h |
73 | lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 86 | lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
74 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ | 87 | lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h |
75 | lj_cdata.h | ||
76 | lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h | 88 | lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h |
77 | lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 89 | lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
78 | lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \ | 90 | lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \ |
79 | lj_cdata.h lj_clib.h | 91 | lj_cdata.h lj_clib.h lj_strfmt.h |
80 | lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 92 | lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
81 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cparse.h lj_frame.h \ | 93 | lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_ctype.h lj_cparse.h \ |
82 | lj_bc.h lj_vm.h lj_char.h lj_strscan.h | 94 | lj_frame.h lj_bc.h lj_vm.h lj_char.h lj_strscan.h lj_strfmt.h |
83 | lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 95 | lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
84 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \ | 96 | lj_err.h lj_errmsg.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_gc.h \ |
85 | lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ff.h \ | 97 | lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \ |
86 | lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ | 98 | lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ |
87 | lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \ | 99 | lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \ |
88 | lj_crecord.h | 100 | lj_crecord.h lj_strfmt.h |
89 | lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 101 | lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
90 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_ccallback.h | 102 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \ |
103 | lj_ccallback.h lj_buf.h | ||
91 | lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 104 | lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
92 | lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_state.h lj_frame.h \ | 105 | lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \ |
93 | lj_bc.h lj_vm.h lj_jit.h lj_ir.h | 106 | lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h |
94 | lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 107 | lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
95 | lj_err.h lj_errmsg.h lj_func.h lj_str.h lj_tab.h lj_meta.h lj_debug.h \ | 108 | lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \ |
96 | lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h lj_jit.h lj_ir.h \ | 109 | lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \ |
97 | lj_ccallback.h lj_ctype.h lj_gc.h lj_trace.h lj_dispatch.h lj_traceerr.h \ | 110 | lj_strfmt.h lj_jit.h lj_ir.h lj_ccallback.h lj_ctype.h lj_trace.h \ |
98 | lj_vm.h luajit.h | 111 | lj_dispatch.h lj_traceerr.h lj_profile.h lj_vm.h luajit.h |
99 | lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \ | 112 | lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \ |
100 | lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \ | 113 | lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \ |
101 | lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ | 114 | lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ |
102 | lj_traceerr.h lj_vm.h | 115 | lj_traceerr.h lj_vm.h lj_strfmt.h |
103 | lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 116 | lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
104 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ | 117 | lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_frame.h \ |
105 | lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ | 118 | lj_bc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ |
106 | lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \ | 119 | lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h \ |
107 | lj_vm.h lj_strscan.h lj_recdef.h | 120 | lj_crecord.h lj_vm.h lj_strscan.h lj_strfmt.h lj_serialize.h lj_recdef.h |
108 | lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 121 | lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
109 | lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ | 122 | lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ |
110 | lj_traceerr.h lj_vm.h | 123 | lj_traceerr.h lj_vm.h |
111 | lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 124 | lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
112 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \ | 125 | lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ |
113 | lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h \ | 126 | lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \ |
114 | lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h | 127 | lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h |
115 | lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 128 | lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
116 | lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_jit.h \ | 129 | lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \ |
117 | lj_ir.h lj_dispatch.h | 130 | lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h |
118 | lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 131 | lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
119 | lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ | 132 | lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ |
120 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \ | 133 | lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \ |
121 | lj_vm.h lj_strscan.h lj_lib.h | 134 | lj_carith.h lj_vm.h lj_strscan.h lj_serialize.h lj_strfmt.h lj_prng.h |
122 | lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 135 | lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
123 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \ | 136 | lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \ |
124 | lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h | 137 | lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \ |
138 | lj_strfmt.h | ||
125 | lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ | 139 | lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ |
126 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ | 140 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ |
127 | lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h | 141 | lj_dispatch.h lj_jit.h lj_ir.h lj_ctype.h lj_vm.h lj_strscan.h \ |
142 | lj_strfmt.h lj_lex.h lj_bcdump.h lj_lib.h | ||
128 | lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ | 143 | lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ |
129 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_func.h lj_frame.h \ | 144 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \ |
130 | lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h | 145 | lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h |
131 | lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 146 | lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
132 | lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \ | 147 | lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \ |
133 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h | 148 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_prng.h lj_vm.h |
134 | lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 149 | lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
135 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ | 150 | lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \ |
136 | lj_vm.h lj_strscan.h | 151 | lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h |
137 | lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h | 152 | lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h |
138 | lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 153 | lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
139 | lj_ir.h lj_jit.h lj_iropt.h | 154 | lj_ir.h lj_jit.h lj_iropt.h |
140 | lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 155 | lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
141 | lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ | 156 | lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h \ |
142 | lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \ | 157 | lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h \ |
143 | lj_strscan.h lj_folddef.h | 158 | lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_folddef.h |
144 | lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 159 | lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
145 | lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ | 160 | lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \ |
146 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h | 161 | lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \ |
162 | lj_vm.h | ||
147 | lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 163 | lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
148 | lj_tab.h lj_ir.h lj_jit.h lj_iropt.h | 164 | lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h lj_dispatch.h lj_bc.h |
149 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ | 165 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ |
150 | lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ | 166 | lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ |
151 | lj_traceerr.h lj_vm.h lj_strscan.h | 167 | lj_traceerr.h lj_vm.h lj_strscan.h |
152 | lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 168 | lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
153 | lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h | 169 | lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h |
154 | lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ | 170 | lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ |
155 | lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \ | 171 | lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \ |
156 | lj_iropt.h lj_vm.h | 172 | lj_jit.h lj_ircall.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h |
157 | lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 173 | lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
158 | lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h \ | 174 | lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \ |
159 | lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h | 175 | lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \ |
176 | lj_vm.h lj_vmevent.h | ||
177 | lj_prng.o: lj_prng.c lj_def.h lua.h luaconf.h lj_arch.h lj_prng.h | ||
178 | lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
179 | lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \ | ||
180 | lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h | ||
160 | lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 181 | lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
161 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ | 182 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ |
162 | lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \ | 183 | lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \ |
163 | lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \ | 184 | lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \ |
164 | lj_ffrecord.h lj_snap.h lj_vm.h | 185 | lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_prng.h |
186 | lj_serialize.o: lj_serialize.c lj_obj.h lua.h luaconf.h lj_def.h \ | ||
187 | lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \ | ||
188 | lj_udata.h lj_ctype.h lj_cdata.h lj_ir.h lj_serialize.h | ||
165 | lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 189 | lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
166 | lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ | 190 | lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ |
167 | lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ | 191 | lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ |
168 | lj_target_*.h lj_ctype.h lj_cdata.h | 192 | lj_target_*.h lj_ctype.h lj_cdata.h |
169 | lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 193 | lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
170 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \ | 194 | lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \ |
171 | lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \ | 195 | lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \ |
172 | lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h | 196 | lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_prng.h lj_lex.h \ |
197 | lj_alloc.h luajit.h | ||
173 | lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 198 | lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
174 | lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_char.h | 199 | lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_prng.h |
200 | lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
201 | lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_meta.h lj_state.h \ | ||
202 | lj_char.h lj_strfmt.h lj_ctype.h lj_lib.h | ||
203 | lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \ | ||
204 | lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h | ||
175 | lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 205 | lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
176 | lj_char.h lj_strscan.h | 206 | lj_char.h lj_strscan.h |
177 | lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 207 | lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
@@ -180,35 +210,37 @@ lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | |||
180 | lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_frame.h lj_bc.h \ | 210 | lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_frame.h lj_bc.h \ |
181 | lj_state.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \ | 211 | lj_state.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \ |
182 | lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \ | 212 | lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \ |
183 | lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h | 213 | lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h lj_prng.h |
184 | lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 214 | lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
185 | lj_gc.h lj_udata.h | 215 | lj_gc.h lj_err.h lj_errmsg.h lj_udata.h |
186 | lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 216 | lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
187 | lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \ | 217 | lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \ |
188 | lj_vm.h lj_vmevent.h | 218 | lj_vm.h lj_vmevent.h |
189 | lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 219 | lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
190 | lj_ir.h lj_vm.h | 220 | lj_ir.h lj_vm.h |
191 | ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ | 221 | ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_assert.c lj_obj.h \ |
192 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \ | 222 | lj_def.h lj_arch.h lj_gc.c lj_gc.h lj_err.h lj_errmsg.h lj_buf.h \ |
193 | lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \ | 223 | lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h \ |
194 | lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \ | 224 | lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ |
195 | lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \ | 225 | lj_traceerr.h lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h \ |
196 | lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \ | 226 | lj_char.c lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c \ |
197 | lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \ | 227 | lj_prng.h lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h \ |
198 | luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \ | 228 | lj_debug.c lj_prng.c lj_state.c lj_lex.h lj_alloc.h luajit.h \ |
199 | lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \ | 229 | lj_dispatch.c lj_ccallback.h lj_profile.h lj_vmevent.c lj_vmevent.h \ |
200 | lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \ | 230 | lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_serialize.c \ |
201 | lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \ | 231 | lj_serialize.h lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \ |
202 | lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \ | 232 | lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \ |
203 | lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \ | 233 | lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \ |
204 | lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \ | 234 | lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \ |
205 | lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \ | 235 | lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \ |
206 | lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ | 236 | lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ |
237 | lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \ | ||
238 | lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ | ||
207 | lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ | 239 | lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ |
208 | lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ | 240 | lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ |
209 | lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ | 241 | lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ |
210 | lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ | 242 | lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ |
211 | lib_init.c | 243 | lib_buffer.c lib_init.c |
212 | luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h | 244 | luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h |
213 | host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ | 245 | host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ |
214 | lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ | 246 | lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ |
@@ -220,7 +252,8 @@ host/buildvm_asm.o: host/buildvm_asm.c host/buildvm.h lj_def.h lua.h luaconf.h \ | |||
220 | host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \ | 252 | host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \ |
221 | luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h | 253 | luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h |
222 | host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \ | 254 | host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \ |
223 | lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_lib.h lj_obj.h | 255 | lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_bc.h lj_lib.h lj_obj.h \ |
256 | host/buildvm_libbc.h | ||
224 | host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \ | 257 | host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \ |
225 | luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h | 258 | luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h |
226 | host/minilua.o: host/minilua.c | 259 | host/minilua.o: host/minilua.c |
diff --git a/src/host/buildvm.c b/src/host/buildvm.c index a12245fd..7348bd5b 100644 --- a/src/host/buildvm.c +++ b/src/host/buildvm.c | |||
@@ -59,10 +59,10 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); | |||
59 | #include "../dynasm/dasm_x86.h" | 59 | #include "../dynasm/dasm_x86.h" |
60 | #elif LJ_TARGET_ARM | 60 | #elif LJ_TARGET_ARM |
61 | #include "../dynasm/dasm_arm.h" | 61 | #include "../dynasm/dasm_arm.h" |
62 | #elif LJ_TARGET_ARM64 | ||
63 | #include "../dynasm/dasm_arm64.h" | ||
62 | #elif LJ_TARGET_PPC | 64 | #elif LJ_TARGET_PPC |
63 | #include "../dynasm/dasm_ppc.h" | 65 | #include "../dynasm/dasm_ppc.h" |
64 | #elif LJ_TARGET_PPCSPE | ||
65 | #include "../dynasm/dasm_ppc.h" | ||
66 | #elif LJ_TARGET_MIPS | 66 | #elif LJ_TARGET_MIPS |
67 | #include "../dynasm/dasm_mips.h" | 67 | #include "../dynasm/dasm_mips.h" |
68 | #else | 68 | #else |
@@ -110,11 +110,11 @@ static const char *sym_decorate(BuildCtx *ctx, | |||
110 | if (p) { | 110 | if (p) { |
111 | #if LJ_TARGET_X86ORX64 | 111 | #if LJ_TARGET_X86ORX64 |
112 | if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj)) | 112 | if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj)) |
113 | name[0] = '@'; | 113 | name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */ |
114 | else | 114 | else |
115 | *p = '\0'; | 115 | *p = '\0'; |
116 | #elif (LJ_TARGET_PPC || LJ_TARGET_PPCSPE) && !LJ_TARGET_CONSOLE | 116 | #elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE |
117 | /* Keep @plt. */ | 117 | /* Keep @plt etc. */ |
118 | #else | 118 | #else |
119 | *p = '\0'; | 119 | *p = '\0'; |
120 | #endif | 120 | #endif |
@@ -179,6 +179,7 @@ static int build_code(BuildCtx *ctx) | |||
179 | ctx->nreloc = 0; | 179 | ctx->nreloc = 0; |
180 | 180 | ||
181 | ctx->globnames = globnames; | 181 | ctx->globnames = globnames; |
182 | ctx->extnames = extnames; | ||
182 | ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *)); | 183 | ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *)); |
183 | ctx->nrelocsym = 0; | 184 | ctx->nrelocsym = 0; |
184 | for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1; | 185 | for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1; |
@@ -320,20 +321,20 @@ static void emit_vmdef(BuildCtx *ctx) | |||
320 | char buf[80]; | 321 | char buf[80]; |
321 | int i; | 322 | int i; |
322 | fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n"); | 323 | fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n"); |
323 | fprintf(ctx->fp, "module(...)\n\n"); | 324 | fprintf(ctx->fp, "return {\n\n"); |
324 | 325 | ||
325 | fprintf(ctx->fp, "bcnames = \""); | 326 | fprintf(ctx->fp, "bcnames = \""); |
326 | for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]); | 327 | for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]); |
327 | fprintf(ctx->fp, "\"\n\n"); | 328 | fprintf(ctx->fp, "\",\n\n"); |
328 | 329 | ||
329 | fprintf(ctx->fp, "irnames = \""); | 330 | fprintf(ctx->fp, "irnames = \""); |
330 | for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]); | 331 | for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]); |
331 | fprintf(ctx->fp, "\"\n\n"); | 332 | fprintf(ctx->fp, "\",\n\n"); |
332 | 333 | ||
333 | fprintf(ctx->fp, "irfpm = { [0]="); | 334 | fprintf(ctx->fp, "irfpm = { [0]="); |
334 | for (i = 0; irfpm_names[i]; i++) | 335 | for (i = 0; irfpm_names[i]; i++) |
335 | fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i])); | 336 | fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i])); |
336 | fprintf(ctx->fp, "}\n\n"); | 337 | fprintf(ctx->fp, "},\n\n"); |
337 | 338 | ||
338 | fprintf(ctx->fp, "irfield = { [0]="); | 339 | fprintf(ctx->fp, "irfield = { [0]="); |
339 | for (i = 0; irfield_names[i]; i++) { | 340 | for (i = 0; irfield_names[i]; i++) { |
@@ -343,17 +344,17 @@ static void emit_vmdef(BuildCtx *ctx) | |||
343 | if (p) *p = '.'; | 344 | if (p) *p = '.'; |
344 | fprintf(ctx->fp, "\"%s\", ", buf); | 345 | fprintf(ctx->fp, "\"%s\", ", buf); |
345 | } | 346 | } |
346 | fprintf(ctx->fp, "}\n\n"); | 347 | fprintf(ctx->fp, "},\n\n"); |
347 | 348 | ||
348 | fprintf(ctx->fp, "ircall = {\n[0]="); | 349 | fprintf(ctx->fp, "ircall = {\n[0]="); |
349 | for (i = 0; ircall_names[i]; i++) | 350 | for (i = 0; ircall_names[i]; i++) |
350 | fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]); | 351 | fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]); |
351 | fprintf(ctx->fp, "}\n\n"); | 352 | fprintf(ctx->fp, "},\n\n"); |
352 | 353 | ||
353 | fprintf(ctx->fp, "traceerr = {\n[0]="); | 354 | fprintf(ctx->fp, "traceerr = {\n[0]="); |
354 | for (i = 0; trace_errors[i]; i++) | 355 | for (i = 0; trace_errors[i]; i++) |
355 | fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); | 356 | fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); |
356 | fprintf(ctx->fp, "}\n\n"); | 357 | fprintf(ctx->fp, "},\n\n"); |
357 | } | 358 | } |
358 | 359 | ||
359 | /* -- Argument parsing ---------------------------------------------------- */ | 360 | /* -- Argument parsing ---------------------------------------------------- */ |
@@ -490,6 +491,7 @@ int main(int argc, char **argv) | |||
490 | case BUILD_vmdef: | 491 | case BUILD_vmdef: |
491 | emit_vmdef(ctx); | 492 | emit_vmdef(ctx); |
492 | emit_lib(ctx); | 493 | emit_lib(ctx); |
494 | fprintf(ctx->fp, "}\n\n"); | ||
493 | break; | 495 | break; |
494 | case BUILD_ffdef: | 496 | case BUILD_ffdef: |
495 | case BUILD_libdef: | 497 | case BUILD_libdef: |
diff --git a/src/host/buildvm.h b/src/host/buildvm.h index 3b3110fb..18cd8848 100644 --- a/src/host/buildvm.h +++ b/src/host/buildvm.h | |||
@@ -82,6 +82,7 @@ typedef struct BuildCtx { | |||
82 | const char *beginsym; | 82 | const char *beginsym; |
83 | /* Strings generated by DynASM. */ | 83 | /* Strings generated by DynASM. */ |
84 | const char *const *globnames; | 84 | const char *const *globnames; |
85 | const char *const *extnames; | ||
85 | const char *dasm_ident; | 86 | const char *dasm_ident; |
86 | const char *dasm_arch; | 87 | const char *dasm_arch; |
87 | /* Relocations. */ | 88 | /* Relocations. */ |
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c index 390abbdd..7baa011f 100644 --- a/src/host/buildvm_asm.c +++ b/src/host/buildvm_asm.c | |||
@@ -51,8 +51,8 @@ static const char *const jccnames[] = { | |||
51 | "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg" | 51 | "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg" |
52 | }; | 52 | }; |
53 | 53 | ||
54 | /* Emit relocation for the incredibly stupid OSX assembler. */ | 54 | /* Emit x86/x64 text relocations. */ |
55 | static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n, | 55 | static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n, |
56 | const char *sym) | 56 | const char *sym) |
57 | { | 57 | { |
58 | const char *opname = NULL; | 58 | const char *opname = NULL; |
@@ -71,6 +71,20 @@ err: | |||
71 | exit(1); | 71 | exit(1); |
72 | } | 72 | } |
73 | emit_asm_bytes(ctx, cp, n); | 73 | emit_asm_bytes(ctx, cp, n); |
74 | if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) { | ||
75 | /* Various fixups for external symbols outside of our binary. */ | ||
76 | if (ctx->mode == BUILD_elfasm) { | ||
77 | if (LJ_32) | ||
78 | fprintf(ctx->fp, "#if __PIC__\n\t%s lj_wrap_%s\n#else\n", opname, sym); | ||
79 | fprintf(ctx->fp, "\t%s %s@PLT\n", opname, sym); | ||
80 | if (LJ_32) | ||
81 | fprintf(ctx->fp, "#endif\n"); | ||
82 | return; | ||
83 | } else if (LJ_32 && ctx->mode == BUILD_machasm) { | ||
84 | fprintf(ctx->fp, "\t%s L%s$stub\n", opname, sym); | ||
85 | return; | ||
86 | } | ||
87 | } | ||
74 | fprintf(ctx->fp, "\t%s %s\n", opname, sym); | 88 | fprintf(ctx->fp, "\t%s %s\n", opname, sym); |
75 | } | 89 | } |
76 | #else | 90 | #else |
@@ -79,10 +93,14 @@ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n) | |||
79 | { | 93 | { |
80 | int i; | 94 | int i; |
81 | for (i = 0; i < n; i += 4) { | 95 | for (i = 0; i < n; i += 4) { |
96 | uint32_t ins = *(uint32_t *)(p+i); | ||
97 | #if LJ_TARGET_ARM64 && LJ_BE | ||
98 | ins = lj_bswap(ins); /* ARM64 instructions are always little-endian. */ | ||
99 | #endif | ||
82 | if ((i & 15) == 0) | 100 | if ((i & 15) == 0) |
83 | fprintf(ctx->fp, "\t.long 0x%08x", *(uint32_t *)(p+i)); | 101 | fprintf(ctx->fp, "\t.long 0x%08x", ins); |
84 | else | 102 | else |
85 | fprintf(ctx->fp, ",0x%08x", *(uint32_t *)(p+i)); | 103 | fprintf(ctx->fp, ",0x%08x", ins); |
86 | if ((i & 15) == 12) putc('\n', ctx->fp); | 104 | if ((i & 15) == 12) putc('\n', ctx->fp); |
87 | } | 105 | } |
88 | if ((n & 15) != 0) putc('\n', ctx->fp); | 106 | if ((n & 15) != 0) putc('\n', ctx->fp); |
@@ -107,7 +125,16 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, | |||
107 | ins, sym); | 125 | ins, sym); |
108 | exit(1); | 126 | exit(1); |
109 | } | 127 | } |
110 | #elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE | 128 | #elif LJ_TARGET_ARM64 |
129 | if ((ins >> 26) == 0x25u) { | ||
130 | fprintf(ctx->fp, "\tbl %s\n", sym); | ||
131 | } else { | ||
132 | fprintf(stderr, | ||
133 | "Error: unsupported opcode %08x for %s symbol relocation.\n", | ||
134 | ins, sym); | ||
135 | exit(1); | ||
136 | } | ||
137 | #elif LJ_TARGET_PPC | ||
111 | #if LJ_TARGET_PS3 | 138 | #if LJ_TARGET_PS3 |
112 | #define TOCPREFIX "." | 139 | #define TOCPREFIX "." |
113 | #else | 140 | #else |
@@ -228,11 +255,20 @@ void emit_asm(BuildCtx *ctx) | |||
228 | 255 | ||
229 | #if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND | 256 | #if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND |
230 | /* This should really be moved into buildvm_arm.dasc. */ | 257 | /* This should really be moved into buildvm_arm.dasc. */ |
258 | #if LJ_ARCH_HASFPU | ||
259 | fprintf(ctx->fp, | ||
260 | ".fnstart\n" | ||
261 | ".save {r5, r6, r7, r8, r9, r10, r11, lr}\n" | ||
262 | ".vsave {d8-d15}\n" | ||
263 | ".save {r4}\n" | ||
264 | ".pad #28\n"); | ||
265 | #else | ||
231 | fprintf(ctx->fp, | 266 | fprintf(ctx->fp, |
232 | ".fnstart\n" | 267 | ".fnstart\n" |
233 | ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n" | 268 | ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n" |
234 | ".pad #28\n"); | 269 | ".pad #28\n"); |
235 | #endif | 270 | #endif |
271 | #endif | ||
236 | #if LJ_TARGET_MIPS | 272 | #if LJ_TARGET_MIPS |
237 | fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n"); | 273 | fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n"); |
238 | #endif | 274 | #endif |
@@ -255,8 +291,9 @@ void emit_asm(BuildCtx *ctx) | |||
255 | BuildReloc *r = &ctx->reloc[rel]; | 291 | BuildReloc *r = &ctx->reloc[rel]; |
256 | int n = r->ofs - ofs; | 292 | int n = r->ofs - ofs; |
257 | #if LJ_TARGET_X86ORX64 | 293 | #if LJ_TARGET_X86ORX64 |
258 | if (ctx->mode == BUILD_machasm && r->type != 0) { | 294 | if (r->type != 0 && |
259 | emit_asm_reloc_mach(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]); | 295 | (ctx->mode == BUILD_elfasm || ctx->mode == BUILD_machasm)) { |
296 | emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]); | ||
260 | } else { | 297 | } else { |
261 | emit_asm_bytes(ctx, ctx->code+ofs, n); | 298 | emit_asm_bytes(ctx, ctx->code+ofs, n); |
262 | emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]); | 299 | emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]); |
@@ -290,10 +327,7 @@ void emit_asm(BuildCtx *ctx) | |||
290 | #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA) | 327 | #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA) |
291 | fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n"); | 328 | fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n"); |
292 | #endif | 329 | #endif |
293 | #if LJ_TARGET_PPCSPE | 330 | #if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP |
294 | /* Soft-float ABI + SPE. */ | ||
295 | fprintf(ctx->fp, "\t.gnu_attribute 4, 2\n\t.gnu_attribute 8, 3\n"); | ||
296 | #elif LJ_TARGET_PPC && !LJ_TARGET_PS3 | ||
297 | /* Hard-float ABI. */ | 331 | /* Hard-float ABI. */ |
298 | fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n"); | 332 | fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n"); |
299 | #endif | 333 | #endif |
diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c index a9829d0d..b125ea12 100644 --- a/src/host/buildvm_lib.c +++ b/src/host/buildvm_lib.c | |||
@@ -5,7 +5,9 @@ | |||
5 | 5 | ||
6 | #include "buildvm.h" | 6 | #include "buildvm.h" |
7 | #include "lj_obj.h" | 7 | #include "lj_obj.h" |
8 | #include "lj_bc.h" | ||
8 | #include "lj_lib.h" | 9 | #include "lj_lib.h" |
10 | #include "buildvm_libbc.h" | ||
9 | 11 | ||
10 | /* Context for library definitions. */ | 12 | /* Context for library definitions. */ |
11 | static uint8_t obuf[8192]; | 13 | static uint8_t obuf[8192]; |
@@ -151,6 +153,62 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg) | |||
151 | regfunc = REGFUNC_OK; | 153 | regfunc = REGFUNC_OK; |
152 | } | 154 | } |
153 | 155 | ||
156 | static uint8_t *libdef_uleb128(uint8_t *p, uint32_t *vv) | ||
157 | { | ||
158 | uint32_t v = *p++; | ||
159 | if (v >= 0x80) { | ||
160 | int sh = 0; v &= 0x7f; | ||
161 | do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80); | ||
162 | } | ||
163 | *vv = v; | ||
164 | return p; | ||
165 | } | ||
166 | |||
167 | static void libdef_fixupbc(uint8_t *p) | ||
168 | { | ||
169 | uint32_t i, sizebc; | ||
170 | p += 4; | ||
171 | p = libdef_uleb128(p, &sizebc); | ||
172 | p = libdef_uleb128(p, &sizebc); | ||
173 | p = libdef_uleb128(p, &sizebc); | ||
174 | for (i = 0; i < sizebc; i++, p += 4) { | ||
175 | uint8_t op = p[libbc_endian ? 3 : 0]; | ||
176 | uint8_t ra = p[libbc_endian ? 2 : 1]; | ||
177 | uint8_t rc = p[libbc_endian ? 1 : 2]; | ||
178 | uint8_t rb = p[libbc_endian ? 0 : 3]; | ||
179 | if (!LJ_DUALNUM && op == BC_ISTYPE && rc == ~LJ_TNUMX+1) { | ||
180 | op = BC_ISNUM; rc++; | ||
181 | } | ||
182 | p[LJ_ENDIAN_SELECT(0, 3)] = op; | ||
183 | p[LJ_ENDIAN_SELECT(1, 2)] = ra; | ||
184 | p[LJ_ENDIAN_SELECT(2, 1)] = rc; | ||
185 | p[LJ_ENDIAN_SELECT(3, 0)] = rb; | ||
186 | } | ||
187 | } | ||
188 | |||
189 | static void libdef_lua(BuildCtx *ctx, char *p, int arg) | ||
190 | { | ||
191 | UNUSED(arg); | ||
192 | if (ctx->mode == BUILD_libdef) { | ||
193 | int i; | ||
194 | for (i = 0; libbc_map[i].name != NULL; i++) { | ||
195 | if (!strcmp(libbc_map[i].name, p)) { | ||
196 | int ofs = libbc_map[i].ofs; | ||
197 | int len = libbc_map[i+1].ofs - ofs; | ||
198 | obuf[2]++; /* Bump hash table size. */ | ||
199 | *optr++ = LIBINIT_LUA; | ||
200 | libdef_name(p, 0); | ||
201 | memcpy(optr, libbc_code + ofs, len); | ||
202 | libdef_fixupbc(optr); | ||
203 | optr += len; | ||
204 | return; | ||
205 | } | ||
206 | } | ||
207 | fprintf(stderr, "Error: missing libbc definition for %s\n", p); | ||
208 | exit(1); | ||
209 | } | ||
210 | } | ||
211 | |||
154 | static uint32_t find_rec(char *name) | 212 | static uint32_t find_rec(char *name) |
155 | { | 213 | { |
156 | char *p = (char *)obuf; | 214 | char *p = (char *)obuf; |
@@ -277,6 +335,7 @@ static const LibDefHandler libdef_handlers[] = { | |||
277 | { "CF(", ")", libdef_func, LIBINIT_CF }, | 335 | { "CF(", ")", libdef_func, LIBINIT_CF }, |
278 | { "ASM(", ")", libdef_func, LIBINIT_ASM }, | 336 | { "ASM(", ")", libdef_func, LIBINIT_ASM }, |
279 | { "ASM_(", ")", libdef_func, LIBINIT_ASM_ }, | 337 | { "ASM_(", ")", libdef_func, LIBINIT_ASM_ }, |
338 | { "LUA(", ")", libdef_lua, 0 }, | ||
280 | { "REC(", ")", libdef_rec, 0 }, | 339 | { "REC(", ")", libdef_rec, 0 }, |
281 | { "PUSH(", ")", libdef_push, 0 }, | 340 | { "PUSH(", ")", libdef_push, 0 }, |
282 | { "SET(", ")", libdef_set, 0 }, | 341 | { "SET(", ")", libdef_set, 0 }, |
@@ -326,6 +385,8 @@ void emit_lib(BuildCtx *ctx) | |||
326 | ok = LJ_HASJIT; | 385 | ok = LJ_HASJIT; |
327 | else if (!strcmp(buf, "#if LJ_HASFFI\n")) | 386 | else if (!strcmp(buf, "#if LJ_HASFFI\n")) |
328 | ok = LJ_HASFFI; | 387 | ok = LJ_HASFFI; |
388 | else if (!strcmp(buf, "#if LJ_HASBUFFER\n")) | ||
389 | ok = LJ_HASBUFFER; | ||
329 | if (!ok) { | 390 | if (!ok) { |
330 | int lvl = 1; | 391 | int lvl = 1; |
331 | while (fgets(buf, sizeof(buf), fp) != NULL) { | 392 | while (fgets(buf, sizeof(buf), fp) != NULL) { |
@@ -373,7 +434,7 @@ void emit_lib(BuildCtx *ctx) | |||
373 | "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n", | 434 | "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n", |
374 | ffasmfunc); | 435 | ffasmfunc); |
375 | } else if (ctx->mode == BUILD_vmdef) { | 436 | } else if (ctx->mode == BUILD_vmdef) { |
376 | fprintf(ctx->fp, "}\n\n"); | 437 | fprintf(ctx->fp, "},\n\n"); |
377 | } else if (ctx->mode == BUILD_bcdef) { | 438 | } else if (ctx->mode == BUILD_bcdef) { |
378 | int i; | 439 | int i; |
379 | fprintf(ctx->fp, "\n};\n\n"); | 440 | fprintf(ctx->fp, "\n};\n\n"); |
diff --git a/src/host/buildvm_libbc.h b/src/host/buildvm_libbc.h new file mode 100644 index 00000000..b2600bd5 --- /dev/null +++ b/src/host/buildvm_libbc.h | |||
@@ -0,0 +1,56 @@ | |||
1 | /* This is a generated file. DO NOT EDIT! */ | ||
2 | |||
3 | static const int libbc_endian = 0; | ||
4 | |||
5 | static const uint8_t libbc_code[] = { | ||
6 | #if LJ_FR2 | ||
7 | 0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0, | ||
8 | 0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3, | ||
9 | 16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3, | ||
10 | 0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1, | ||
11 | 128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,0,16,16,0,12,0,16,1,9,0,43,2, | ||
12 | 0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7, | ||
13 | 0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12, | ||
14 | 0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128, | ||
15 | 8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14, | ||
16 | 0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2, | ||
17 | 0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4, | ||
18 | 2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16, | ||
19 | 3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3, | ||
20 | 0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0, | ||
21 | 41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128, | ||
22 | 18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79, | ||
23 | 6,252,127,76,4,2,0,0 | ||
24 | #else | ||
25 | 0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0, | ||
26 | 0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3, | ||
27 | 16,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3, | ||
28 | 0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1, | ||
29 | 128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,43,2, | ||
30 | 0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0, | ||
31 | 0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12, | ||
32 | 0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128, | ||
33 | 8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14, | ||
34 | 0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2, | ||
35 | 0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4, | ||
36 | 2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16, | ||
37 | 3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3, | ||
38 | 0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0, | ||
39 | 41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128, | ||
40 | 18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79, | ||
41 | 6,252,127,76,4,2,0,0 | ||
42 | #endif | ||
43 | }; | ||
44 | |||
45 | static const struct { const char *name; int ofs; } libbc_map[] = { | ||
46 | {"math_deg",0}, | ||
47 | {"math_rad",25}, | ||
48 | {"string_len",50}, | ||
49 | {"table_foreachi",69}, | ||
50 | {"table_foreach",136}, | ||
51 | {"table_getn",207}, | ||
52 | {"table_remove",226}, | ||
53 | {"table_move",355}, | ||
54 | {NULL,502} | ||
55 | }; | ||
56 | |||
diff --git a/src/host/buildvm_peobj.c b/src/host/buildvm_peobj.c index 97c0698e..b030f234 100644 --- a/src/host/buildvm_peobj.c +++ b/src/host/buildvm_peobj.c | |||
@@ -9,7 +9,7 @@ | |||
9 | #include "buildvm.h" | 9 | #include "buildvm.h" |
10 | #include "lj_bc.h" | 10 | #include "lj_bc.h" |
11 | 11 | ||
12 | #if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC | 12 | #if LJ_TARGET_X86ORX64 |
13 | 13 | ||
14 | /* Context for PE object emitter. */ | 14 | /* Context for PE object emitter. */ |
15 | static char *strtab; | 15 | static char *strtab; |
@@ -93,12 +93,6 @@ typedef struct PEsymaux { | |||
93 | #define PEOBJ_RELOC_ADDR32NB 0x03 | 93 | #define PEOBJ_RELOC_ADDR32NB 0x03 |
94 | #define PEOBJ_RELOC_OFS 0 | 94 | #define PEOBJ_RELOC_OFS 0 |
95 | #define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ | 95 | #define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ |
96 | #elif LJ_TARGET_PPC | ||
97 | #define PEOBJ_ARCH_TARGET 0x01f2 | ||
98 | #define PEOBJ_RELOC_REL32 0x06 | ||
99 | #define PEOBJ_RELOC_DIR32 0x02 | ||
100 | #define PEOBJ_RELOC_OFS (-4) | ||
101 | #define PEOBJ_TEXT_FLAGS 0x60400020 /* 60=r+x, 40=align8, 20=code. */ | ||
102 | #endif | 96 | #endif |
103 | 97 | ||
104 | /* Section numbers (0-based). */ | 98 | /* Section numbers (0-based). */ |
@@ -109,6 +103,8 @@ enum { | |||
109 | #if LJ_TARGET_X64 | 103 | #if LJ_TARGET_X64 |
110 | PEOBJ_SECT_PDATA, | 104 | PEOBJ_SECT_PDATA, |
111 | PEOBJ_SECT_XDATA, | 105 | PEOBJ_SECT_XDATA, |
106 | #elif LJ_TARGET_X86 | ||
107 | PEOBJ_SECT_SXDATA, | ||
112 | #endif | 108 | #endif |
113 | PEOBJ_SECT_RDATA_Z, | 109 | PEOBJ_SECT_RDATA_Z, |
114 | PEOBJ_NSECTIONS | 110 | PEOBJ_NSECTIONS |
@@ -208,6 +204,13 @@ void emit_peobj(BuildCtx *ctx) | |||
208 | sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; | 204 | sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; |
209 | /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ | 205 | /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ |
210 | pesect[PEOBJ_SECT_XDATA].flags = 0x40300040; | 206 | pesect[PEOBJ_SECT_XDATA].flags = 0x40300040; |
207 | #elif LJ_TARGET_X86 | ||
208 | memcpy(pesect[PEOBJ_SECT_SXDATA].name, ".sxdata", sizeof(".sxdata")-1); | ||
209 | pesect[PEOBJ_SECT_SXDATA].ofs = sofs; | ||
210 | sofs += (pesect[PEOBJ_SECT_SXDATA].size = 4); | ||
211 | pesect[PEOBJ_SECT_SXDATA].relocofs = sofs; | ||
212 | /* Flags: 40 = read, 30 = align4, 02 = lnk_info, 40 = initialized data. */ | ||
213 | pesect[PEOBJ_SECT_SXDATA].flags = 0x40300240; | ||
211 | #endif | 214 | #endif |
212 | 215 | ||
213 | memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1); | 216 | memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1); |
@@ -232,7 +235,7 @@ void emit_peobj(BuildCtx *ctx) | |||
232 | nrsym = ctx->nrelocsym; | 235 | nrsym = ctx->nrelocsym; |
233 | pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; | 236 | pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; |
234 | #if LJ_TARGET_X64 | 237 | #if LJ_TARGET_X64 |
235 | pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win64. */ | 238 | pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */ |
236 | #endif | 239 | #endif |
237 | 240 | ||
238 | /* Write PE object header and all sections. */ | 241 | /* Write PE object header and all sections. */ |
@@ -242,15 +245,8 @@ void emit_peobj(BuildCtx *ctx) | |||
242 | /* Write .text section. */ | 245 | /* Write .text section. */ |
243 | host_endian.u = 1; | 246 | host_endian.u = 1; |
244 | if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) { | 247 | if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) { |
245 | #if LJ_TARGET_PPC | ||
246 | uint32_t *p = (uint32_t *)ctx->code; | ||
247 | int n = (int)(ctx->codesz >> 2); | ||
248 | for (i = 0; i < n; i++, p++) | ||
249 | *p = lj_bswap(*p); /* Byteswap .text section. */ | ||
250 | #else | ||
251 | fprintf(stderr, "Error: different byte order for host and target\n"); | 248 | fprintf(stderr, "Error: different byte order for host and target\n"); |
252 | exit(1); | 249 | exit(1); |
253 | #endif | ||
254 | } | 250 | } |
255 | owrite(ctx, ctx->code, ctx->codesz); | 251 | owrite(ctx, ctx->code, ctx->codesz); |
256 | for (i = 0; i < ctx->nreloc; i++) { | 252 | for (i = 0; i < ctx->nreloc; i++) { |
@@ -312,6 +308,19 @@ void emit_peobj(BuildCtx *ctx) | |||
312 | reloc.type = PEOBJ_RELOC_ADDR32NB; | 308 | reloc.type = PEOBJ_RELOC_ADDR32NB; |
313 | owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); | 309 | owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); |
314 | } | 310 | } |
311 | #elif LJ_TARGET_X86 | ||
312 | /* Write .sxdata section. */ | ||
313 | for (i = 0; i < nrsym; i++) { | ||
314 | if (!strcmp(ctx->relocsym[i], "_lj_err_unwind_win")) { | ||
315 | uint32_t symidx = 1+2+i; | ||
316 | owrite(ctx, &symidx, 4); | ||
317 | break; | ||
318 | } | ||
319 | } | ||
320 | if (i == nrsym) { | ||
321 | fprintf(stderr, "Error: extern lj_err_unwind_win not used\n"); | ||
322 | exit(1); | ||
323 | } | ||
315 | #endif | 324 | #endif |
316 | 325 | ||
317 | /* Write .rdata$Z section. */ | 326 | /* Write .rdata$Z section. */ |
@@ -333,8 +342,10 @@ void emit_peobj(BuildCtx *ctx) | |||
333 | #if LJ_TARGET_X64 | 342 | #if LJ_TARGET_X64 |
334 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); | 343 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); |
335 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); | 344 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); |
336 | emit_peobj_sym(ctx, "lj_err_unwind_win64", 0, | 345 | emit_peobj_sym(ctx, "lj_err_unwind_win", 0, |
337 | PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); | 346 | PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); |
347 | #elif LJ_TARGET_X86 | ||
348 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_SXDATA); | ||
338 | #endif | 349 | #endif |
339 | 350 | ||
340 | emit_peobj_sym(ctx, ctx->beginsym, 0, | 351 | emit_peobj_sym(ctx, ctx->beginsym, 0, |
diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua new file mode 100644 index 00000000..f6dabbec --- /dev/null +++ b/src/host/genlibbc.lua | |||
@@ -0,0 +1,197 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- Lua script to dump the bytecode of the library functions written in Lua. | ||
3 | -- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT. | ||
4 | ---------------------------------------------------------------------------- | ||
5 | -- Copyright (C) 2005-2022 Mike Pall. All rights reserved. | ||
6 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
7 | ---------------------------------------------------------------------------- | ||
8 | |||
9 | local ffi = require("ffi") | ||
10 | local bit = require("bit") | ||
11 | local vmdef = require("jit.vmdef") | ||
12 | local bcnames = vmdef.bcnames | ||
13 | |||
14 | local format = string.format | ||
15 | |||
16 | local isbe = (string.byte(string.dump(function() end), 5) % 2 == 1) | ||
17 | |||
18 | local function usage(arg) | ||
19 | io.stderr:write("Usage: ", arg and arg[0] or "genlibbc", | ||
20 | " [-o buildvm_libbc.h] lib_*.c\n") | ||
21 | os.exit(1) | ||
22 | end | ||
23 | |||
24 | local function parse_arg(arg) | ||
25 | local outfile = "-" | ||
26 | if not (arg and arg[1]) then | ||
27 | usage(arg) | ||
28 | end | ||
29 | if arg[1] == "-o" then | ||
30 | outfile = arg[2] | ||
31 | if not outfile then usage(arg) end | ||
32 | table.remove(arg, 1) | ||
33 | table.remove(arg, 1) | ||
34 | end | ||
35 | return outfile | ||
36 | end | ||
37 | |||
38 | local function read_files(names) | ||
39 | local src = "" | ||
40 | for _,name in ipairs(names) do | ||
41 | local fp = assert(io.open(name)) | ||
42 | src = src .. fp:read("*a") | ||
43 | fp:close() | ||
44 | end | ||
45 | return src | ||
46 | end | ||
47 | |||
48 | local function transform_lua(code) | ||
49 | local fixup = {} | ||
50 | local n = -30000 | ||
51 | code = string.gsub(code, "CHECK_(%w*)%((.-)%)", function(tp, var) | ||
52 | n = n + 1 | ||
53 | fixup[n] = { "CHECK", tp } | ||
54 | return format("%s=%d", var, n) | ||
55 | end) | ||
56 | code = string.gsub(code, "PAIRS%((.-)%)", function(var) | ||
57 | fixup.PAIRS = true | ||
58 | return format("nil, %s, 0", var) | ||
59 | end) | ||
60 | return "return "..code, fixup | ||
61 | end | ||
62 | |||
63 | local function read_uleb128(p) | ||
64 | local v = p[0]; p = p + 1 | ||
65 | if v >= 128 then | ||
66 | local sh = 7; v = v - 128 | ||
67 | repeat | ||
68 | local r = p[0] | ||
69 | v = v + bit.lshift(bit.band(r, 127), sh) | ||
70 | sh = sh + 7 | ||
71 | p = p + 1 | ||
72 | until r < 128 | ||
73 | end | ||
74 | return p, v | ||
75 | end | ||
76 | |||
77 | -- ORDER LJ_T | ||
78 | local name2itype = { | ||
79 | str = 5, func = 9, tab = 12, int = 14, num = 15 | ||
80 | } | ||
81 | |||
82 | local BC = {} | ||
83 | for i=0,#bcnames/6-1 do | ||
84 | BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i | ||
85 | end | ||
86 | local xop, xra = isbe and 3 or 0, isbe and 2 or 1 | ||
87 | local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3 | ||
88 | |||
89 | local function fixup_dump(dump, fixup) | ||
90 | local buf = ffi.new("uint8_t[?]", #dump+1, dump) | ||
91 | local p = buf+5 | ||
92 | local n, sizebc | ||
93 | p, n = read_uleb128(p) | ||
94 | local start = p | ||
95 | p = p + 4 | ||
96 | p = read_uleb128(p) | ||
97 | p = read_uleb128(p) | ||
98 | p, sizebc = read_uleb128(p) | ||
99 | local rawtab = {} | ||
100 | for i=0,sizebc-1 do | ||
101 | local op = p[xop] | ||
102 | if op == BC.KSHORT then | ||
103 | local rd = p[xrc] + 256*p[xrb] | ||
104 | rd = bit.arshift(bit.lshift(rd, 16), 16) | ||
105 | local f = fixup[rd] | ||
106 | if f then | ||
107 | if f[1] == "CHECK" then | ||
108 | local tp = f[2] | ||
109 | if tp == "tab" then rawtab[p[xra]] = true end | ||
110 | p[xop] = tp == "num" and BC.ISNUM or BC.ISTYPE | ||
111 | p[xrb] = 0 | ||
112 | p[xrc] = name2itype[tp] | ||
113 | else | ||
114 | error("unhandled fixup type: "..f[1]) | ||
115 | end | ||
116 | end | ||
117 | elseif op == BC.TGETV then | ||
118 | if rawtab[p[xrb]] then | ||
119 | p[xop] = BC.TGETR | ||
120 | end | ||
121 | elseif op == BC.TSETV then | ||
122 | if rawtab[p[xrb]] then | ||
123 | p[xop] = BC.TSETR | ||
124 | end | ||
125 | elseif op == BC.ITERC then | ||
126 | if fixup.PAIRS then | ||
127 | p[xop] = BC.ITERN | ||
128 | end | ||
129 | end | ||
130 | p = p + 4 | ||
131 | end | ||
132 | return ffi.string(start, n) | ||
133 | end | ||
134 | |||
135 | local function find_defs(src) | ||
136 | local defs = {} | ||
137 | for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do | ||
138 | local env = {} | ||
139 | local tcode, fixup = transform_lua(code) | ||
140 | local func = assert(load(tcode, "", nil, env))() | ||
141 | defs[name] = fixup_dump(string.dump(func, true), fixup) | ||
142 | defs[#defs+1] = name | ||
143 | end | ||
144 | return defs | ||
145 | end | ||
146 | |||
147 | local function gen_header(defs) | ||
148 | local t = {} | ||
149 | local function w(x) t[#t+1] = x end | ||
150 | w("/* This is a generated file. DO NOT EDIT! */\n\n") | ||
151 | w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n") | ||
152 | local s = "" | ||
153 | for _,name in ipairs(defs) do | ||
154 | s = s .. defs[name] | ||
155 | end | ||
156 | w("static const uint8_t libbc_code[] = {\n") | ||
157 | local n = 0 | ||
158 | for i=1,#s do | ||
159 | local x = string.byte(s, i) | ||
160 | w(x); w(",") | ||
161 | n = n + (x < 10 and 2 or (x < 100 and 3 or 4)) | ||
162 | if n >= 75 then n = 0; w("\n") end | ||
163 | end | ||
164 | w("0\n};\n\n") | ||
165 | w("static const struct { const char *name; int ofs; } libbc_map[] = {\n") | ||
166 | local m = 0 | ||
167 | for _,name in ipairs(defs) do | ||
168 | w('{"'); w(name); w('",'); w(m) w('},\n') | ||
169 | m = m + #defs[name] | ||
170 | end | ||
171 | w("{NULL,"); w(m); w("}\n};\n\n") | ||
172 | return table.concat(t) | ||
173 | end | ||
174 | |||
175 | local function write_file(name, data) | ||
176 | if name == "-" then | ||
177 | assert(io.write(data)) | ||
178 | assert(io.flush()) | ||
179 | else | ||
180 | local fp = io.open(name) | ||
181 | if fp then | ||
182 | local old = fp:read("*a") | ||
183 | fp:close() | ||
184 | if data == old then return end | ||
185 | end | ||
186 | fp = assert(io.open(name, "w")) | ||
187 | assert(fp:write(data)) | ||
188 | assert(fp:close()) | ||
189 | end | ||
190 | end | ||
191 | |||
192 | local outfile = parse_arg(arg) | ||
193 | local src = read_files(arg) | ||
194 | local defs = find_defs(src) | ||
195 | local hdr = gen_header(defs) | ||
196 | write_file(outfile, hdr) | ||
197 | |||
diff --git a/src/jit/bc.lua b/src/jit/bc.lua index d4c6d4a6..8d0844c0 100644 --- a/src/jit/bc.lua +++ b/src/jit/bc.lua | |||
@@ -41,7 +41,7 @@ | |||
41 | 41 | ||
42 | -- Cache some library functions and objects. | 42 | -- Cache some library functions and objects. |
43 | local jit = require("jit") | 43 | local jit = require("jit") |
44 | assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") | 44 | assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") |
45 | local jutil = require("jit.util") | 45 | local jutil = require("jit.util") |
46 | local vmdef = require("jit.vmdef") | 46 | local vmdef = require("jit.vmdef") |
47 | local bit = require("bit") | 47 | local bit = require("bit") |
@@ -179,13 +179,12 @@ local function bcliston(outfile) | |||
179 | end | 179 | end |
180 | 180 | ||
181 | -- Public module functions. | 181 | -- Public module functions. |
182 | module(...) | 182 | return { |
183 | 183 | line = bcline, | |
184 | line = bcline | 184 | dump = bcdump, |
185 | dump = bcdump | 185 | targets = bctargets, |
186 | targets = bctargets | 186 | on = bcliston, |
187 | 187 | off = bclistoff, | |
188 | on = bcliston | 188 | start = bcliston -- For -j command line option. |
189 | off = bclistoff | 189 | } |
190 | start = bcliston -- For -j command line option. | ||
191 | 190 | ||
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index 086d5f88..6227d136 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua | |||
@@ -11,12 +11,16 @@ | |||
11 | ------------------------------------------------------------------------------ | 11 | ------------------------------------------------------------------------------ |
12 | 12 | ||
13 | local jit = require("jit") | 13 | local jit = require("jit") |
14 | assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") | 14 | assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") |
15 | local bit = require("bit") | 15 | local bit = require("bit") |
16 | 16 | ||
17 | -- Symbol name prefix for LuaJIT bytecode. | 17 | -- Symbol name prefix for LuaJIT bytecode. |
18 | local LJBC_PREFIX = "luaJIT_BC_" | 18 | local LJBC_PREFIX = "luaJIT_BC_" |
19 | 19 | ||
20 | local type, assert = type, assert | ||
21 | local format = string.format | ||
22 | local tremove, tconcat = table.remove, table.concat | ||
23 | |||
20 | ------------------------------------------------------------------------------ | 24 | ------------------------------------------------------------------------------ |
21 | 25 | ||
22 | local function usage() | 26 | local function usage() |
@@ -56,6 +60,11 @@ local function savefile(name, mode) | |||
56 | return check(io.open(name, mode)) | 60 | return check(io.open(name, mode)) |
57 | end | 61 | end |
58 | 62 | ||
63 | local function set_stdout_binary(ffi) | ||
64 | ffi.cdef[[int _setmode(int fd, int mode);]] | ||
65 | ffi.C._setmode(1, 0x8000) | ||
66 | end | ||
67 | |||
59 | ------------------------------------------------------------------------------ | 68 | ------------------------------------------------------------------------------ |
60 | 69 | ||
61 | local map_type = { | 70 | local map_type = { |
@@ -63,8 +72,18 @@ local map_type = { | |||
63 | } | 72 | } |
64 | 73 | ||
65 | local map_arch = { | 74 | local map_arch = { |
66 | x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true, | 75 | x86 = { e = "le", b = 32, m = 3, p = 0x14c, }, |
67 | mips = true, mipsel = true, | 76 | x64 = { e = "le", b = 64, m = 62, p = 0x8664, }, |
77 | arm = { e = "le", b = 32, m = 40, p = 0x1c0, }, | ||
78 | arm64 = { e = "le", b = 64, m = 183, p = 0xaa64, }, | ||
79 | arm64be = { e = "be", b = 64, m = 183, }, | ||
80 | ppc = { e = "be", b = 32, m = 20, }, | ||
81 | mips = { e = "be", b = 32, m = 8, f = 0x50001006, }, | ||
82 | mipsel = { e = "le", b = 32, m = 8, f = 0x50001006, }, | ||
83 | mips64 = { e = "be", b = 64, m = 8, f = 0x80000007, }, | ||
84 | mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, }, | ||
85 | mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, }, | ||
86 | mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, }, | ||
68 | } | 87 | } |
69 | 88 | ||
70 | local map_os = { | 89 | local map_os = { |
@@ -73,33 +92,33 @@ local map_os = { | |||
73 | } | 92 | } |
74 | 93 | ||
75 | local function checkarg(str, map, err) | 94 | local function checkarg(str, map, err) |
76 | str = string.lower(str) | 95 | str = str:lower() |
77 | local s = check(map[str], "unknown ", err) | 96 | local s = check(map[str], "unknown ", err) |
78 | return s == true and str or s | 97 | return type(s) == "string" and s or str |
79 | end | 98 | end |
80 | 99 | ||
81 | local function detecttype(str) | 100 | local function detecttype(str) |
82 | local ext = string.match(string.lower(str), "%.(%a+)$") | 101 | local ext = str:lower():match("%.(%a+)$") |
83 | return map_type[ext] or "raw" | 102 | return map_type[ext] or "raw" |
84 | end | 103 | end |
85 | 104 | ||
86 | local function checkmodname(str) | 105 | local function checkmodname(str) |
87 | check(string.match(str, "^[%w_.%-]+$"), "bad module name") | 106 | check(str:match("^[%w_.%-]+$"), "bad module name") |
88 | return string.gsub(str, "[%.%-]", "_") | 107 | return str:gsub("[%.%-]", "_") |
89 | end | 108 | end |
90 | 109 | ||
91 | local function detectmodname(str) | 110 | local function detectmodname(str) |
92 | if type(str) == "string" then | 111 | if type(str) == "string" then |
93 | local tail = string.match(str, "[^/\\]+$") | 112 | local tail = str:match("[^/\\]+$") |
94 | if tail then str = tail end | 113 | if tail then str = tail end |
95 | local head = string.match(str, "^(.*)%.[^.]*$") | 114 | local head = str:match("^(.*)%.[^.]*$") |
96 | if head then str = head end | 115 | if head then str = head end |
97 | str = string.match(str, "^[%w_.%-]+") | 116 | str = str:match("^[%w_.%-]+") |
98 | else | 117 | else |
99 | str = nil | 118 | str = nil |
100 | end | 119 | end |
101 | check(str, "cannot derive module name, use -n name") | 120 | check(str, "cannot derive module name, use -n name") |
102 | return string.gsub(str, "[%.%-]", "_") | 121 | return str:gsub("[%.%-]", "_") |
103 | end | 122 | end |
104 | 123 | ||
105 | ------------------------------------------------------------------------------ | 124 | ------------------------------------------------------------------------------ |
@@ -111,6 +130,11 @@ local function bcsave_tail(fp, output, s) | |||
111 | end | 130 | end |
112 | 131 | ||
113 | local function bcsave_raw(output, s) | 132 | local function bcsave_raw(output, s) |
133 | if output == "-" and jit.os == "Windows" then | ||
134 | local ok, ffi = pcall(require, "ffi") | ||
135 | check(ok, "FFI library required to write binary file to stdout") | ||
136 | set_stdout_binary(ffi) | ||
137 | end | ||
114 | local fp = savefile(output, "wb") | 138 | local fp = savefile(output, "wb") |
115 | bcsave_tail(fp, output, s) | 139 | bcsave_tail(fp, output, s) |
116 | end | 140 | end |
@@ -118,19 +142,19 @@ end | |||
118 | local function bcsave_c(ctx, output, s) | 142 | local function bcsave_c(ctx, output, s) |
119 | local fp = savefile(output, "w") | 143 | local fp = savefile(output, "w") |
120 | if ctx.type == "c" then | 144 | if ctx.type == "c" then |
121 | fp:write(string.format([[ | 145 | fp:write(format([[ |
122 | #ifdef __cplusplus | 146 | #ifdef __cplusplus |
123 | extern "C" | 147 | extern "C" |
124 | #endif | 148 | #endif |
125 | #ifdef _WIN32 | 149 | #ifdef _WIN32 |
126 | __declspec(dllexport) | 150 | __declspec(dllexport) |
127 | #endif | 151 | #endif |
128 | const char %s%s[] = { | 152 | const unsigned char %s%s[] = { |
129 | ]], LJBC_PREFIX, ctx.modname)) | 153 | ]], LJBC_PREFIX, ctx.modname)) |
130 | else | 154 | else |
131 | fp:write(string.format([[ | 155 | fp:write(format([[ |
132 | #define %s%s_SIZE %d | 156 | #define %s%s_SIZE %d |
133 | static const char %s%s[] = { | 157 | static const unsigned char %s%s[] = { |
134 | ]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname)) | 158 | ]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname)) |
135 | end | 159 | end |
136 | local t, n, m = {}, 0, 0 | 160 | local t, n, m = {}, 0, 0 |
@@ -138,13 +162,13 @@ static const char %s%s[] = { | |||
138 | local b = tostring(string.byte(s, i)) | 162 | local b = tostring(string.byte(s, i)) |
139 | m = m + #b + 1 | 163 | m = m + #b + 1 |
140 | if m > 78 then | 164 | if m > 78 then |
141 | fp:write(table.concat(t, ",", 1, n), ",\n") | 165 | fp:write(tconcat(t, ",", 1, n), ",\n") |
142 | n, m = 0, #b + 1 | 166 | n, m = 0, #b + 1 |
143 | end | 167 | end |
144 | n = n + 1 | 168 | n = n + 1 |
145 | t[n] = b | 169 | t[n] = b |
146 | end | 170 | end |
147 | bcsave_tail(fp, output, table.concat(t, ",", 1, n).."\n};\n") | 171 | bcsave_tail(fp, output, tconcat(t, ",", 1, n).."\n};\n") |
148 | end | 172 | end |
149 | 173 | ||
150 | local function bcsave_elfobj(ctx, output, s, ffi) | 174 | local function bcsave_elfobj(ctx, output, s, ffi) |
@@ -199,12 +223,8 @@ typedef struct { | |||
199 | } ELF64obj; | 223 | } ELF64obj; |
200 | ]] | 224 | ]] |
201 | local symname = LJBC_PREFIX..ctx.modname | 225 | local symname = LJBC_PREFIX..ctx.modname |
202 | local is64, isbe = false, false | 226 | local ai = assert(map_arch[ctx.arch]) |
203 | if ctx.arch == "x64" then | 227 | local is64, isbe = ai.b == 64, ai.e == "be" |
204 | is64 = true | ||
205 | elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then | ||
206 | isbe = true | ||
207 | end | ||
208 | 228 | ||
209 | -- Handle different host/target endianess. | 229 | -- Handle different host/target endianess. |
210 | local function f32(x) return x end | 230 | local function f32(x) return x end |
@@ -237,10 +257,8 @@ typedef struct { | |||
237 | hdr.eendian = isbe and 2 or 1 | 257 | hdr.eendian = isbe and 2 or 1 |
238 | hdr.eversion = 1 | 258 | hdr.eversion = 1 |
239 | hdr.type = f16(1) | 259 | hdr.type = f16(1) |
240 | hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch]) | 260 | hdr.machine = f16(ai.m) |
241 | if ctx.arch == "mips" or ctx.arch == "mipsel" then | 261 | hdr.flags = f32(ai.f or 0) |
242 | hdr.flags = f32(0x50001006) | ||
243 | end | ||
244 | hdr.version = f32(1) | 262 | hdr.version = f32(1) |
245 | hdr.shofs = fofs(ffi.offsetof(o, "sect")) | 263 | hdr.shofs = fofs(ffi.offsetof(o, "sect")) |
246 | hdr.ehsize = f16(ffi.sizeof(hdr)) | 264 | hdr.ehsize = f16(ffi.sizeof(hdr)) |
@@ -336,12 +354,8 @@ typedef struct { | |||
336 | } PEobj; | 354 | } PEobj; |
337 | ]] | 355 | ]] |
338 | local symname = LJBC_PREFIX..ctx.modname | 356 | local symname = LJBC_PREFIX..ctx.modname |
339 | local is64 = false | 357 | local ai = assert(map_arch[ctx.arch]) |
340 | if ctx.arch == "x86" then | 358 | local is64 = ai.b == 64 |
341 | symname = "_"..symname | ||
342 | elseif ctx.arch == "x64" then | ||
343 | is64 = true | ||
344 | end | ||
345 | local symexport = " /EXPORT:"..symname..",DATA " | 359 | local symexport = " /EXPORT:"..symname..",DATA " |
346 | 360 | ||
347 | -- The file format is always little-endian. Swap if the host is big-endian. | 361 | -- The file format is always little-endian. Swap if the host is big-endian. |
@@ -355,7 +369,7 @@ typedef struct { | |||
355 | -- Create PE object and fill in header. | 369 | -- Create PE object and fill in header. |
356 | local o = ffi.new("PEobj") | 370 | local o = ffi.new("PEobj") |
357 | local hdr = o.hdr | 371 | local hdr = o.hdr |
358 | hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366, mipsel=0x366 })[ctx.arch]) | 372 | hdr.arch = f16(assert(ai.p)) |
359 | hdr.nsects = f16(2) | 373 | hdr.nsects = f16(2) |
360 | hdr.symtabofs = f32(ffi.offsetof(o, "sym0")) | 374 | hdr.symtabofs = f32(ffi.offsetof(o, "sym0")) |
361 | hdr.nsyms = f32(6) | 375 | hdr.nsyms = f32(6) |
@@ -477,13 +491,13 @@ typedef struct { | |||
477 | } mach_obj_64; | 491 | } mach_obj_64; |
478 | typedef struct { | 492 | typedef struct { |
479 | mach_fat_header fat; | 493 | mach_fat_header fat; |
480 | mach_fat_arch fat_arch[4]; | 494 | mach_fat_arch fat_arch[2]; |
481 | struct { | 495 | struct { |
482 | mach_header hdr; | 496 | mach_header hdr; |
483 | mach_segment_command seg; | 497 | mach_segment_command seg; |
484 | mach_section sec; | 498 | mach_section sec; |
485 | mach_symtab_command sym; | 499 | mach_symtab_command sym; |
486 | } arch[4]; | 500 | } arch[2]; |
487 | mach_nlist sym_entry; | 501 | mach_nlist sym_entry; |
488 | uint8_t space[4096]; | 502 | uint8_t space[4096]; |
489 | } mach_fat_obj; | 503 | } mach_fat_obj; |
@@ -494,6 +508,8 @@ typedef struct { | |||
494 | is64, align, mobj = true, 8, "mach_obj_64" | 508 | is64, align, mobj = true, 8, "mach_obj_64" |
495 | elseif ctx.arch == "arm" then | 509 | elseif ctx.arch == "arm" then |
496 | isfat, mobj = true, "mach_fat_obj" | 510 | isfat, mobj = true, "mach_fat_obj" |
511 | elseif ctx.arch == "arm64" then | ||
512 | is64, align, isfat, mobj = true, 8, true, "mach_fat_obj" | ||
497 | else | 513 | else |
498 | check(ctx.arch == "x86", "unsupported architecture for OSX") | 514 | check(ctx.arch == "x86", "unsupported architecture for OSX") |
499 | end | 515 | end |
@@ -503,8 +519,8 @@ typedef struct { | |||
503 | -- Create Mach-O object and fill in header. | 519 | -- Create Mach-O object and fill in header. |
504 | local o = ffi.new(mobj) | 520 | local o = ffi.new(mobj) |
505 | local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align) | 521 | local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align) |
506 | local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12,12,12} })[ctx.arch] | 522 | local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12}, arm64={0x01000007,0x0100000c} })[ctx.arch] |
507 | local cpusubtype = ({ x86={3}, x64={3}, arm={3,6,9,11} })[ctx.arch] | 523 | local cpusubtype = ({ x86={3}, x64={3}, arm={3,9}, arm64={3,0} })[ctx.arch] |
508 | if isfat then | 524 | if isfat then |
509 | o.fat.magic = be32(0xcafebabe) | 525 | o.fat.magic = be32(0xcafebabe) |
510 | o.fat.nfat_arch = be32(#cpusubtype) | 526 | o.fat.nfat_arch = be32(#cpusubtype) |
@@ -562,6 +578,9 @@ end | |||
562 | local function bcsave_obj(ctx, output, s) | 578 | local function bcsave_obj(ctx, output, s) |
563 | local ok, ffi = pcall(require, "ffi") | 579 | local ok, ffi = pcall(require, "ffi") |
564 | check(ok, "FFI library required to write this file type") | 580 | check(ok, "FFI library required to write this file type") |
581 | if output == "-" and jit.os == "Windows" then | ||
582 | set_stdout_binary(ffi) | ||
583 | end | ||
565 | if ctx.os == "windows" then | 584 | if ctx.os == "windows" then |
566 | return bcsave_peobj(ctx, output, s, ffi) | 585 | return bcsave_peobj(ctx, output, s, ffi) |
567 | elseif ctx.os == "osx" then | 586 | elseif ctx.os == "osx" then |
@@ -603,16 +622,16 @@ local function docmd(...) | |||
603 | local n = 1 | 622 | local n = 1 |
604 | local list = false | 623 | local list = false |
605 | local ctx = { | 624 | local ctx = { |
606 | strip = true, arch = jit.arch, os = string.lower(jit.os), | 625 | strip = true, arch = jit.arch, os = jit.os:lower(), |
607 | type = false, modname = false, | 626 | type = false, modname = false, |
608 | } | 627 | } |
609 | while n <= #arg do | 628 | while n <= #arg do |
610 | local a = arg[n] | 629 | local a = arg[n] |
611 | if type(a) == "string" and string.sub(a, 1, 1) == "-" and a ~= "-" then | 630 | if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then |
612 | table.remove(arg, n) | 631 | tremove(arg, n) |
613 | if a == "--" then break end | 632 | if a == "--" then break end |
614 | for m=2,#a do | 633 | for m=2,#a do |
615 | local opt = string.sub(a, m, m) | 634 | local opt = a:sub(m, m) |
616 | if opt == "l" then | 635 | if opt == "l" then |
617 | list = true | 636 | list = true |
618 | elseif opt == "s" then | 637 | elseif opt == "s" then |
@@ -625,13 +644,13 @@ local function docmd(...) | |||
625 | if n ~= 1 then usage() end | 644 | if n ~= 1 then usage() end |
626 | arg[1] = check(loadstring(arg[1])) | 645 | arg[1] = check(loadstring(arg[1])) |
627 | elseif opt == "n" then | 646 | elseif opt == "n" then |
628 | ctx.modname = checkmodname(table.remove(arg, n)) | 647 | ctx.modname = checkmodname(tremove(arg, n)) |
629 | elseif opt == "t" then | 648 | elseif opt == "t" then |
630 | ctx.type = checkarg(table.remove(arg, n), map_type, "file type") | 649 | ctx.type = checkarg(tremove(arg, n), map_type, "file type") |
631 | elseif opt == "a" then | 650 | elseif opt == "a" then |
632 | ctx.arch = checkarg(table.remove(arg, n), map_arch, "architecture") | 651 | ctx.arch = checkarg(tremove(arg, n), map_arch, "architecture") |
633 | elseif opt == "o" then | 652 | elseif opt == "o" then |
634 | ctx.os = checkarg(table.remove(arg, n), map_os, "OS name") | 653 | ctx.os = checkarg(tremove(arg, n), map_os, "OS name") |
635 | else | 654 | else |
636 | usage() | 655 | usage() |
637 | end | 656 | end |
@@ -653,7 +672,7 @@ end | |||
653 | ------------------------------------------------------------------------------ | 672 | ------------------------------------------------------------------------------ |
654 | 673 | ||
655 | -- Public module functions. | 674 | -- Public module functions. |
656 | module(...) | 675 | return { |
657 | 676 | start = docmd -- Process -b command line option. | |
658 | start = docmd -- Process -b command line option. | 677 | } |
659 | 678 | ||
diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua index d572a5c3..18ab68df 100644 --- a/src/jit/dis_arm.lua +++ b/src/jit/dis_arm.lua | |||
@@ -658,7 +658,7 @@ local function disass_block(ctx, ofs, len) | |||
658 | end | 658 | end |
659 | 659 | ||
660 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). | 660 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). |
661 | local function create_(code, addr, out) | 661 | local function create(code, addr, out) |
662 | local ctx = {} | 662 | local ctx = {} |
663 | ctx.code = code | 663 | ctx.code = code |
664 | ctx.addr = addr or 0 | 664 | ctx.addr = addr or 0 |
@@ -670,20 +670,20 @@ local function create_(code, addr, out) | |||
670 | end | 670 | end |
671 | 671 | ||
672 | -- Simple API: disassemble code (a string) at address and output via out. | 672 | -- Simple API: disassemble code (a string) at address and output via out. |
673 | local function disass_(code, addr, out) | 673 | local function disass(code, addr, out) |
674 | create_(code, addr, out):disass() | 674 | create(code, addr, out):disass() |
675 | end | 675 | end |
676 | 676 | ||
677 | -- Return register name for RID. | 677 | -- Return register name for RID. |
678 | local function regname_(r) | 678 | local function regname(r) |
679 | if r < 16 then return map_gpr[r] end | 679 | if r < 16 then return map_gpr[r] end |
680 | return "d"..(r-16) | 680 | return "d"..(r-16) |
681 | end | 681 | end |
682 | 682 | ||
683 | -- Public module functions. | 683 | -- Public module functions. |
684 | module(...) | 684 | return { |
685 | 685 | create = create, | |
686 | create = create_ | 686 | disass = disass, |
687 | disass = disass_ | 687 | regname = regname |
688 | regname = regname_ | 688 | } |
689 | 689 | ||
diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua new file mode 100644 index 00000000..531584a1 --- /dev/null +++ b/src/jit/dis_arm64.lua | |||
@@ -0,0 +1,1216 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT ARM64 disassembler module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2022 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | -- | ||
7 | -- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. | ||
8 | -- Sponsored by Cisco Systems, Inc. | ||
9 | ---------------------------------------------------------------------------- | ||
10 | -- This is a helper module used by the LuaJIT machine code dumper module. | ||
11 | -- | ||
12 | -- It disassembles most user-mode AArch64 instructions. | ||
13 | -- NYI: Advanced SIMD and VFP instructions. | ||
14 | ------------------------------------------------------------------------------ | ||
15 | |||
16 | local type = type | ||
17 | local sub, byte, format = string.sub, string.byte, string.format | ||
18 | local match, gmatch, gsub = string.match, string.gmatch, string.gsub | ||
19 | local concat = table.concat | ||
20 | local bit = require("bit") | ||
21 | local band, bor, bxor, tohex = bit.band, bit.bor, bit.bxor, bit.tohex | ||
22 | local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift | ||
23 | local ror = bit.ror | ||
24 | |||
25 | ------------------------------------------------------------------------------ | ||
26 | -- Opcode maps | ||
27 | ------------------------------------------------------------------------------ | ||
28 | |||
29 | local map_adr = { -- PC-relative addressing. | ||
30 | shift = 31, mask = 1, | ||
31 | [0] = "adrDBx", "adrpDBx" | ||
32 | } | ||
33 | |||
34 | local map_addsubi = { -- Add/subtract immediate. | ||
35 | shift = 29, mask = 3, | ||
36 | [0] = "add|movDNIg", "adds|cmnD0NIg", "subDNIg", "subs|cmpD0NIg", | ||
37 | } | ||
38 | |||
39 | local map_logi = { -- Logical immediate. | ||
40 | shift = 31, mask = 1, | ||
41 | [0] = { | ||
42 | shift = 22, mask = 1, | ||
43 | [0] = { | ||
44 | shift = 29, mask = 3, | ||
45 | [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig" | ||
46 | }, | ||
47 | false -- unallocated | ||
48 | }, | ||
49 | { | ||
50 | shift = 29, mask = 3, | ||
51 | [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig" | ||
52 | } | ||
53 | } | ||
54 | |||
55 | local map_movwi = { -- Move wide immediate. | ||
56 | shift = 31, mask = 1, | ||
57 | [0] = { | ||
58 | shift = 22, mask = 1, | ||
59 | [0] = { | ||
60 | shift = 29, mask = 3, | ||
61 | [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg" | ||
62 | }, false -- unallocated | ||
63 | }, | ||
64 | { | ||
65 | shift = 29, mask = 3, | ||
66 | [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg" | ||
67 | }, | ||
68 | } | ||
69 | |||
70 | local map_bitf = { -- Bitfield. | ||
71 | shift = 31, mask = 1, | ||
72 | [0] = { | ||
73 | shift = 22, mask = 1, | ||
74 | [0] = { | ||
75 | shift = 29, mask = 3, | ||
76 | [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12w", | ||
77 | "bfm|bfi|bfxilDN13w", | ||
78 | "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12w" | ||
79 | } | ||
80 | }, | ||
81 | { | ||
82 | shift = 22, mask = 1, | ||
83 | { | ||
84 | shift = 29, mask = 3, | ||
85 | [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12x", | ||
86 | "bfm|bfi|bfxilDN13x", | ||
87 | "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12x" | ||
88 | } | ||
89 | } | ||
90 | } | ||
91 | |||
92 | local map_datai = { -- Data processing - immediate. | ||
93 | shift = 23, mask = 7, | ||
94 | [0] = map_adr, map_adr, map_addsubi, false, | ||
95 | map_logi, map_movwi, map_bitf, | ||
96 | { | ||
97 | shift = 15, mask = 0x1c0c1, | ||
98 | [0] = "extr|rorDNM4w", [0x10080] = "extr|rorDNM4x", | ||
99 | [0x10081] = "extr|rorDNM4x" | ||
100 | } | ||
101 | } | ||
102 | |||
103 | local map_logsr = { -- Logical, shifted register. | ||
104 | shift = 31, mask = 1, | ||
105 | [0] = { | ||
106 | shift = 15, mask = 1, | ||
107 | [0] = { | ||
108 | shift = 29, mask = 3, | ||
109 | [0] = { | ||
110 | shift = 21, mask = 7, | ||
111 | [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", | ||
112 | "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" | ||
113 | }, | ||
114 | { | ||
115 | shift = 21, mask = 7, | ||
116 | [0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", | ||
117 | "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" | ||
118 | }, | ||
119 | { | ||
120 | shift = 21, mask = 7, | ||
121 | [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", | ||
122 | "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" | ||
123 | }, | ||
124 | { | ||
125 | shift = 21, mask = 7, | ||
126 | [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", | ||
127 | "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" | ||
128 | } | ||
129 | }, | ||
130 | false -- unallocated | ||
131 | }, | ||
132 | { | ||
133 | shift = 29, mask = 3, | ||
134 | [0] = { | ||
135 | shift = 21, mask = 7, | ||
136 | [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", | ||
137 | "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" | ||
138 | }, | ||
139 | { | ||
140 | shift = 21, mask = 7, | ||
141 | [0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", | ||
142 | "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" | ||
143 | }, | ||
144 | { | ||
145 | shift = 21, mask = 7, | ||
146 | [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", | ||
147 | "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" | ||
148 | }, | ||
149 | { | ||
150 | shift = 21, mask = 7, | ||
151 | [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", | ||
152 | "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" | ||
153 | } | ||
154 | } | ||
155 | } | ||
156 | |||
157 | local map_assh = { | ||
158 | shift = 31, mask = 1, | ||
159 | [0] = { | ||
160 | shift = 15, mask = 1, | ||
161 | [0] = { | ||
162 | shift = 29, mask = 3, | ||
163 | [0] = { | ||
164 | shift = 22, mask = 3, | ||
165 | [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg" | ||
166 | }, | ||
167 | { | ||
168 | shift = 22, mask = 3, | ||
169 | [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", | ||
170 | "adds|cmnD0NMSg", "adds|cmnD0NMg" | ||
171 | }, | ||
172 | { | ||
173 | shift = 22, mask = 3, | ||
174 | [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg" | ||
175 | }, | ||
176 | { | ||
177 | shift = 22, mask = 3, | ||
178 | [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg", | ||
179 | "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg" | ||
180 | }, | ||
181 | }, | ||
182 | false -- unallocated | ||
183 | }, | ||
184 | { | ||
185 | shift = 29, mask = 3, | ||
186 | [0] = { | ||
187 | shift = 22, mask = 3, | ||
188 | [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg" | ||
189 | }, | ||
190 | { | ||
191 | shift = 22, mask = 3, | ||
192 | [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", "adds|cmnD0NMSg", | ||
193 | "adds|cmnD0NMg" | ||
194 | }, | ||
195 | { | ||
196 | shift = 22, mask = 3, | ||
197 | [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg" | ||
198 | }, | ||
199 | { | ||
200 | shift = 22, mask = 3, | ||
201 | [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg", | ||
202 | "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg" | ||
203 | } | ||
204 | } | ||
205 | } | ||
206 | |||
207 | local map_addsubsh = { -- Add/subtract, shifted register. | ||
208 | shift = 22, mask = 3, | ||
209 | [0] = map_assh, map_assh, map_assh | ||
210 | } | ||
211 | |||
212 | local map_addsubex = { -- Add/subtract, extended register. | ||
213 | shift = 22, mask = 3, | ||
214 | [0] = { | ||
215 | shift = 29, mask = 3, | ||
216 | [0] = "addDNMXg", "adds|cmnD0NMXg", "subDNMXg", "subs|cmpD0NMzXg", | ||
217 | } | ||
218 | } | ||
219 | |||
220 | local map_addsubc = { -- Add/subtract, with carry. | ||
221 | shift = 10, mask = 63, | ||
222 | [0] = { | ||
223 | shift = 29, mask = 3, | ||
224 | [0] = "adcDNMg", "adcsDNMg", "sbc|ngcDN0Mg", "sbcs|ngcsDN0Mg", | ||
225 | } | ||
226 | } | ||
227 | |||
228 | local map_ccomp = { | ||
229 | shift = 4, mask = 1, | ||
230 | [0] = { | ||
231 | shift = 10, mask = 3, | ||
232 | [0] = { -- Conditional compare register. | ||
233 | shift = 29, mask = 3, | ||
234 | "ccmnNMVCg", false, "ccmpNMVCg", | ||
235 | }, | ||
236 | [2] = { -- Conditional compare immediate. | ||
237 | shift = 29, mask = 3, | ||
238 | "ccmnN5VCg", false, "ccmpN5VCg", | ||
239 | } | ||
240 | } | ||
241 | } | ||
242 | |||
243 | local map_csel = { -- Conditional select. | ||
244 | shift = 11, mask = 1, | ||
245 | [0] = { | ||
246 | shift = 10, mask = 1, | ||
247 | [0] = { | ||
248 | shift = 29, mask = 3, | ||
249 | [0] = "cselDNMzCg", false, "csinv|cinv|csetmDNMcg", false, | ||
250 | }, | ||
251 | { | ||
252 | shift = 29, mask = 3, | ||
253 | [0] = "csinc|cinc|csetDNMcg", false, "csneg|cnegDNMcg", false, | ||
254 | } | ||
255 | } | ||
256 | } | ||
257 | |||
258 | local map_data1s = { -- Data processing, 1 source. | ||
259 | shift = 29, mask = 1, | ||
260 | [0] = { | ||
261 | shift = 31, mask = 1, | ||
262 | [0] = { | ||
263 | shift = 10, mask = 0x7ff, | ||
264 | [0] = "rbitDNg", "rev16DNg", "revDNw", false, "clzDNg", "clsDNg" | ||
265 | }, | ||
266 | { | ||
267 | shift = 10, mask = 0x7ff, | ||
268 | [0] = "rbitDNg", "rev16DNg", "rev32DNx", "revDNx", "clzDNg", "clsDNg" | ||
269 | } | ||
270 | } | ||
271 | } | ||
272 | |||
273 | local map_data2s = { -- Data processing, 2 sources. | ||
274 | shift = 29, mask = 1, | ||
275 | [0] = { | ||
276 | shift = 10, mask = 63, | ||
277 | false, "udivDNMg", "sdivDNMg", false, false, false, false, "lslDNMg", | ||
278 | "lsrDNMg", "asrDNMg", "rorDNMg" | ||
279 | } | ||
280 | } | ||
281 | |||
282 | local map_data3s = { -- Data processing, 3 sources. | ||
283 | shift = 29, mask = 7, | ||
284 | [0] = { | ||
285 | shift = 21, mask = 7, | ||
286 | [0] = { | ||
287 | shift = 15, mask = 1, | ||
288 | [0] = "madd|mulDNMA0g", "msub|mnegDNMA0g" | ||
289 | } | ||
290 | }, false, false, false, | ||
291 | { | ||
292 | shift = 15, mask = 1, | ||
293 | [0] = { | ||
294 | shift = 21, mask = 7, | ||
295 | [0] = "madd|mulDNMA0g", "smaddl|smullDxNMwA0x", "smulhDNMx", false, | ||
296 | false, "umaddl|umullDxNMwA0x", "umulhDNMx" | ||
297 | }, | ||
298 | { | ||
299 | shift = 21, mask = 7, | ||
300 | [0] = "msub|mnegDNMA0g", "smsubl|smneglDxNMwA0x", false, false, | ||
301 | false, "umsubl|umneglDxNMwA0x" | ||
302 | } | ||
303 | } | ||
304 | } | ||
305 | |||
306 | local map_datar = { -- Data processing, register. | ||
307 | shift = 28, mask = 1, | ||
308 | [0] = { | ||
309 | shift = 24, mask = 1, | ||
310 | [0] = map_logsr, | ||
311 | { | ||
312 | shift = 21, mask = 1, | ||
313 | [0] = map_addsubsh, map_addsubex | ||
314 | } | ||
315 | }, | ||
316 | { | ||
317 | shift = 21, mask = 15, | ||
318 | [0] = map_addsubc, false, map_ccomp, false, map_csel, false, | ||
319 | { | ||
320 | shift = 30, mask = 1, | ||
321 | [0] = map_data2s, map_data1s | ||
322 | }, | ||
323 | false, map_data3s, map_data3s, map_data3s, map_data3s, map_data3s, | ||
324 | map_data3s, map_data3s, map_data3s | ||
325 | } | ||
326 | } | ||
327 | |||
328 | local map_lrl = { -- Load register, literal. | ||
329 | shift = 26, mask = 1, | ||
330 | [0] = { | ||
331 | shift = 30, mask = 3, | ||
332 | [0] = "ldrDwB", "ldrDxB", "ldrswDxB" | ||
333 | }, | ||
334 | { | ||
335 | shift = 30, mask = 3, | ||
336 | [0] = "ldrDsB", "ldrDdB" | ||
337 | } | ||
338 | } | ||
339 | |||
340 | local map_lsriind = { -- Load/store register, immediate pre/post-indexed. | ||
341 | shift = 30, mask = 3, | ||
342 | [0] = { | ||
343 | shift = 26, mask = 1, | ||
344 | [0] = { | ||
345 | shift = 22, mask = 3, | ||
346 | [0] = "strbDwzL", "ldrbDwzL", "ldrsbDxzL", "ldrsbDwzL" | ||
347 | } | ||
348 | }, | ||
349 | { | ||
350 | shift = 26, mask = 1, | ||
351 | [0] = { | ||
352 | shift = 22, mask = 3, | ||
353 | [0] = "strhDwzL", "ldrhDwzL", "ldrshDxzL", "ldrshDwzL" | ||
354 | } | ||
355 | }, | ||
356 | { | ||
357 | shift = 26, mask = 1, | ||
358 | [0] = { | ||
359 | shift = 22, mask = 3, | ||
360 | [0] = "strDwzL", "ldrDwzL", "ldrswDxzL" | ||
361 | }, | ||
362 | { | ||
363 | shift = 22, mask = 3, | ||
364 | [0] = "strDszL", "ldrDszL" | ||
365 | } | ||
366 | }, | ||
367 | { | ||
368 | shift = 26, mask = 1, | ||
369 | [0] = { | ||
370 | shift = 22, mask = 3, | ||
371 | [0] = "strDxzL", "ldrDxzL" | ||
372 | }, | ||
373 | { | ||
374 | shift = 22, mask = 3, | ||
375 | [0] = "strDdzL", "ldrDdzL" | ||
376 | } | ||
377 | } | ||
378 | } | ||
379 | |||
380 | local map_lsriro = { | ||
381 | shift = 21, mask = 1, | ||
382 | [0] = { -- Load/store register immediate. | ||
383 | shift = 10, mask = 3, | ||
384 | [0] = { -- Unscaled immediate. | ||
385 | shift = 26, mask = 1, | ||
386 | [0] = { | ||
387 | shift = 30, mask = 3, | ||
388 | [0] = { | ||
389 | shift = 22, mask = 3, | ||
390 | [0] = "sturbDwK", "ldurbDwK" | ||
391 | }, | ||
392 | { | ||
393 | shift = 22, mask = 3, | ||
394 | [0] = "sturhDwK", "ldurhDwK" | ||
395 | }, | ||
396 | { | ||
397 | shift = 22, mask = 3, | ||
398 | [0] = "sturDwK", "ldurDwK" | ||
399 | }, | ||
400 | { | ||
401 | shift = 22, mask = 3, | ||
402 | [0] = "sturDxK", "ldurDxK" | ||
403 | } | ||
404 | } | ||
405 | }, map_lsriind, false, map_lsriind | ||
406 | }, | ||
407 | { -- Load/store register, register offset. | ||
408 | shift = 10, mask = 3, | ||
409 | [2] = { | ||
410 | shift = 26, mask = 1, | ||
411 | [0] = { | ||
412 | shift = 30, mask = 3, | ||
413 | [0] = { | ||
414 | shift = 22, mask = 3, | ||
415 | [0] = "strbDwO", "ldrbDwO", "ldrsbDxO", "ldrsbDwO" | ||
416 | }, | ||
417 | { | ||
418 | shift = 22, mask = 3, | ||
419 | [0] = "strhDwO", "ldrhDwO", "ldrshDxO", "ldrshDwO" | ||
420 | }, | ||
421 | { | ||
422 | shift = 22, mask = 3, | ||
423 | [0] = "strDwO", "ldrDwO", "ldrswDxO" | ||
424 | }, | ||
425 | { | ||
426 | shift = 22, mask = 3, | ||
427 | [0] = "strDxO", "ldrDxO" | ||
428 | } | ||
429 | }, | ||
430 | { | ||
431 | shift = 30, mask = 3, | ||
432 | [2] = { | ||
433 | shift = 22, mask = 3, | ||
434 | [0] = "strDsO", "ldrDsO" | ||
435 | }, | ||
436 | [3] = { | ||
437 | shift = 22, mask = 3, | ||
438 | [0] = "strDdO", "ldrDdO" | ||
439 | } | ||
440 | } | ||
441 | } | ||
442 | } | ||
443 | } | ||
444 | |||
445 | local map_lsp = { -- Load/store register pair, offset. | ||
446 | shift = 22, mask = 1, | ||
447 | [0] = { | ||
448 | shift = 30, mask = 3, | ||
449 | [0] = { | ||
450 | shift = 26, mask = 1, | ||
451 | [0] = "stpDzAzwP", "stpDzAzsP", | ||
452 | }, | ||
453 | { | ||
454 | shift = 26, mask = 1, | ||
455 | "stpDzAzdP" | ||
456 | }, | ||
457 | { | ||
458 | shift = 26, mask = 1, | ||
459 | [0] = "stpDzAzxP" | ||
460 | } | ||
461 | }, | ||
462 | { | ||
463 | shift = 30, mask = 3, | ||
464 | [0] = { | ||
465 | shift = 26, mask = 1, | ||
466 | [0] = "ldpDzAzwP", "ldpDzAzsP", | ||
467 | }, | ||
468 | { | ||
469 | shift = 26, mask = 1, | ||
470 | [0] = "ldpswDAxP", "ldpDzAzdP" | ||
471 | }, | ||
472 | { | ||
473 | shift = 26, mask = 1, | ||
474 | [0] = "ldpDzAzxP" | ||
475 | } | ||
476 | } | ||
477 | } | ||
478 | |||
479 | local map_ls = { -- Loads and stores. | ||
480 | shift = 24, mask = 0x31, | ||
481 | [0x10] = map_lrl, [0x30] = map_lsriro, | ||
482 | [0x20] = { | ||
483 | shift = 23, mask = 3, | ||
484 | map_lsp, map_lsp, map_lsp | ||
485 | }, | ||
486 | [0x21] = { | ||
487 | shift = 23, mask = 3, | ||
488 | map_lsp, map_lsp, map_lsp | ||
489 | }, | ||
490 | [0x31] = { | ||
491 | shift = 26, mask = 1, | ||
492 | [0] = { | ||
493 | shift = 30, mask = 3, | ||
494 | [0] = { | ||
495 | shift = 22, mask = 3, | ||
496 | [0] = "strbDwzU", "ldrbDwzU" | ||
497 | }, | ||
498 | { | ||
499 | shift = 22, mask = 3, | ||
500 | [0] = "strhDwzU", "ldrhDwzU" | ||
501 | }, | ||
502 | { | ||
503 | shift = 22, mask = 3, | ||
504 | [0] = "strDwzU", "ldrDwzU" | ||
505 | }, | ||
506 | { | ||
507 | shift = 22, mask = 3, | ||
508 | [0] = "strDxzU", "ldrDxzU" | ||
509 | } | ||
510 | }, | ||
511 | { | ||
512 | shift = 30, mask = 3, | ||
513 | [2] = { | ||
514 | shift = 22, mask = 3, | ||
515 | [0] = "strDszU", "ldrDszU" | ||
516 | }, | ||
517 | [3] = { | ||
518 | shift = 22, mask = 3, | ||
519 | [0] = "strDdzU", "ldrDdzU" | ||
520 | } | ||
521 | } | ||
522 | }, | ||
523 | } | ||
524 | |||
525 | local map_datafp = { -- Data processing, SIMD and FP. | ||
526 | shift = 28, mask = 7, | ||
527 | { -- 001 | ||
528 | shift = 24, mask = 1, | ||
529 | [0] = { | ||
530 | shift = 21, mask = 1, | ||
531 | { | ||
532 | shift = 10, mask = 3, | ||
533 | [0] = { | ||
534 | shift = 12, mask = 1, | ||
535 | [0] = { | ||
536 | shift = 13, mask = 1, | ||
537 | [0] = { | ||
538 | shift = 14, mask = 1, | ||
539 | [0] = { | ||
540 | shift = 15, mask = 1, | ||
541 | [0] = { -- FP/int conversion. | ||
542 | shift = 31, mask = 1, | ||
543 | [0] = { | ||
544 | shift = 16, mask = 0xff, | ||
545 | [0x20] = "fcvtnsDwNs", [0x21] = "fcvtnuDwNs", | ||
546 | [0x22] = "scvtfDsNw", [0x23] = "ucvtfDsNw", | ||
547 | [0x24] = "fcvtasDwNs", [0x25] = "fcvtauDwNs", | ||
548 | [0x26] = "fmovDwNs", [0x27] = "fmovDsNw", | ||
549 | [0x28] = "fcvtpsDwNs", [0x29] = "fcvtpuDwNs", | ||
550 | [0x30] = "fcvtmsDwNs", [0x31] = "fcvtmuDwNs", | ||
551 | [0x38] = "fcvtzsDwNs", [0x39] = "fcvtzuDwNs", | ||
552 | [0x60] = "fcvtnsDwNd", [0x61] = "fcvtnuDwNd", | ||
553 | [0x62] = "scvtfDdNw", [0x63] = "ucvtfDdNw", | ||
554 | [0x64] = "fcvtasDwNd", [0x65] = "fcvtauDwNd", | ||
555 | [0x68] = "fcvtpsDwNd", [0x69] = "fcvtpuDwNd", | ||
556 | [0x70] = "fcvtmsDwNd", [0x71] = "fcvtmuDwNd", | ||
557 | [0x78] = "fcvtzsDwNd", [0x79] = "fcvtzuDwNd" | ||
558 | }, | ||
559 | { | ||
560 | shift = 16, mask = 0xff, | ||
561 | [0x20] = "fcvtnsDxNs", [0x21] = "fcvtnuDxNs", | ||
562 | [0x22] = "scvtfDsNx", [0x23] = "ucvtfDsNx", | ||
563 | [0x24] = "fcvtasDxNs", [0x25] = "fcvtauDxNs", | ||
564 | [0x28] = "fcvtpsDxNs", [0x29] = "fcvtpuDxNs", | ||
565 | [0x30] = "fcvtmsDxNs", [0x31] = "fcvtmuDxNs", | ||
566 | [0x38] = "fcvtzsDxNs", [0x39] = "fcvtzuDxNs", | ||
567 | [0x60] = "fcvtnsDxNd", [0x61] = "fcvtnuDxNd", | ||
568 | [0x62] = "scvtfDdNx", [0x63] = "ucvtfDdNx", | ||
569 | [0x64] = "fcvtasDxNd", [0x65] = "fcvtauDxNd", | ||
570 | [0x66] = "fmovDxNd", [0x67] = "fmovDdNx", | ||
571 | [0x68] = "fcvtpsDxNd", [0x69] = "fcvtpuDxNd", | ||
572 | [0x70] = "fcvtmsDxNd", [0x71] = "fcvtmuDxNd", | ||
573 | [0x78] = "fcvtzsDxNd", [0x79] = "fcvtzuDxNd" | ||
574 | } | ||
575 | } | ||
576 | }, | ||
577 | { -- FP data-processing, 1 source. | ||
578 | shift = 31, mask = 1, | ||
579 | [0] = { | ||
580 | shift = 22, mask = 3, | ||
581 | [0] = { | ||
582 | shift = 15, mask = 63, | ||
583 | [0] = "fmovDNf", "fabsDNf", "fnegDNf", | ||
584 | "fsqrtDNf", false, "fcvtDdNs", false, false, | ||
585 | "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf", | ||
586 | "frintaDNf", false, "frintxDNf", "frintiDNf", | ||
587 | }, | ||
588 | { | ||
589 | shift = 15, mask = 63, | ||
590 | [0] = "fmovDNf", "fabsDNf", "fnegDNf", | ||
591 | "fsqrtDNf", "fcvtDsNd", false, false, false, | ||
592 | "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf", | ||
593 | "frintaDNf", false, "frintxDNf", "frintiDNf", | ||
594 | } | ||
595 | } | ||
596 | } | ||
597 | }, | ||
598 | { -- FP compare. | ||
599 | shift = 31, mask = 1, | ||
600 | [0] = { | ||
601 | shift = 14, mask = 3, | ||
602 | [0] = { | ||
603 | shift = 23, mask = 1, | ||
604 | [0] = { | ||
605 | shift = 0, mask = 31, | ||
606 | [0] = "fcmpNMf", [8] = "fcmpNZf", | ||
607 | [16] = "fcmpeNMf", [24] = "fcmpeNZf", | ||
608 | } | ||
609 | } | ||
610 | } | ||
611 | } | ||
612 | }, | ||
613 | { -- FP immediate. | ||
614 | shift = 31, mask = 1, | ||
615 | [0] = { | ||
616 | shift = 5, mask = 31, | ||
617 | [0] = { | ||
618 | shift = 23, mask = 1, | ||
619 | [0] = "fmovDFf" | ||
620 | } | ||
621 | } | ||
622 | } | ||
623 | }, | ||
624 | { -- FP conditional compare. | ||
625 | shift = 31, mask = 1, | ||
626 | [0] = { | ||
627 | shift = 23, mask = 1, | ||
628 | [0] = { | ||
629 | shift = 4, mask = 1, | ||
630 | [0] = "fccmpNMVCf", "fccmpeNMVCf" | ||
631 | } | ||
632 | } | ||
633 | }, | ||
634 | { -- FP data-processing, 2 sources. | ||
635 | shift = 31, mask = 1, | ||
636 | [0] = { | ||
637 | shift = 23, mask = 1, | ||
638 | [0] = { | ||
639 | shift = 12, mask = 15, | ||
640 | [0] = "fmulDNMf", "fdivDNMf", "faddDNMf", "fsubDNMf", | ||
641 | "fmaxDNMf", "fminDNMf", "fmaxnmDNMf", "fminnmDNMf", | ||
642 | "fnmulDNMf" | ||
643 | } | ||
644 | } | ||
645 | }, | ||
646 | { -- FP conditional select. | ||
647 | shift = 31, mask = 1, | ||
648 | [0] = { | ||
649 | shift = 23, mask = 1, | ||
650 | [0] = "fcselDNMCf" | ||
651 | } | ||
652 | } | ||
653 | } | ||
654 | }, | ||
655 | { -- FP data-processing, 3 sources. | ||
656 | shift = 31, mask = 1, | ||
657 | [0] = { | ||
658 | shift = 15, mask = 1, | ||
659 | [0] = { | ||
660 | shift = 21, mask = 5, | ||
661 | [0] = "fmaddDNMAf", "fnmaddDNMAf" | ||
662 | }, | ||
663 | { | ||
664 | shift = 21, mask = 5, | ||
665 | [0] = "fmsubDNMAf", "fnmsubDNMAf" | ||
666 | } | ||
667 | } | ||
668 | } | ||
669 | } | ||
670 | } | ||
671 | |||
672 | local map_br = { -- Branches, exception generating and system instructions. | ||
673 | shift = 29, mask = 7, | ||
674 | [0] = "bB", | ||
675 | { -- Compare & branch, immediate. | ||
676 | shift = 24, mask = 3, | ||
677 | [0] = "cbzDBg", "cbnzDBg", "tbzDTBw", "tbnzDTBw" | ||
678 | }, | ||
679 | { -- Conditional branch, immediate. | ||
680 | shift = 24, mask = 3, | ||
681 | [0] = { | ||
682 | shift = 4, mask = 1, | ||
683 | [0] = { | ||
684 | shift = 0, mask = 15, | ||
685 | [0] = "beqB", "bneB", "bhsB", "bloB", "bmiB", "bplB", "bvsB", "bvcB", | ||
686 | "bhiB", "blsB", "bgeB", "bltB", "bgtB", "bleB", "balB" | ||
687 | } | ||
688 | } | ||
689 | }, false, "blB", | ||
690 | { -- Compare & branch, immediate. | ||
691 | shift = 24, mask = 3, | ||
692 | [0] = "cbzDBg", "cbnzDBg", "tbzDTBx", "tbnzDTBx" | ||
693 | }, | ||
694 | { | ||
695 | shift = 24, mask = 3, | ||
696 | [0] = { -- Exception generation. | ||
697 | shift = 0, mask = 0xe0001f, | ||
698 | [0x200000] = "brkW" | ||
699 | }, | ||
700 | { -- System instructions. | ||
701 | shift = 0, mask = 0x3fffff, | ||
702 | [0x03201f] = "nop" | ||
703 | }, | ||
704 | { -- Unconditional branch, register. | ||
705 | shift = 0, mask = 0xfffc1f, | ||
706 | [0x1f0000] = "brNx", [0x3f0000] = "blrNx", | ||
707 | [0x5f0000] = "retNx" | ||
708 | }, | ||
709 | } | ||
710 | } | ||
711 | |||
712 | local map_init = { | ||
713 | shift = 25, mask = 15, | ||
714 | [0] = false, false, false, false, map_ls, map_datar, map_ls, map_datafp, | ||
715 | map_datai, map_datai, map_br, map_br, map_ls, map_datar, map_ls, map_datafp | ||
716 | } | ||
717 | |||
718 | ------------------------------------------------------------------------------ | ||
719 | |||
720 | local map_regs = { x = {}, w = {}, d = {}, s = {} } | ||
721 | |||
722 | for i=0,30 do | ||
723 | map_regs.x[i] = "x"..i | ||
724 | map_regs.w[i] = "w"..i | ||
725 | map_regs.d[i] = "d"..i | ||
726 | map_regs.s[i] = "s"..i | ||
727 | end | ||
728 | map_regs.x[31] = "sp" | ||
729 | map_regs.w[31] = "wsp" | ||
730 | map_regs.d[31] = "d31" | ||
731 | map_regs.s[31] = "s31" | ||
732 | |||
733 | local map_cond = { | ||
734 | [0] = "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", | ||
735 | "hi", "ls", "ge", "lt", "gt", "le", "al", | ||
736 | } | ||
737 | |||
738 | local map_shift = { [0] = "lsl", "lsr", "asr", } | ||
739 | |||
740 | local map_extend = { | ||
741 | [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx", | ||
742 | } | ||
743 | |||
744 | ------------------------------------------------------------------------------ | ||
745 | |||
746 | -- Output a nicely formatted line with an opcode and operands. | ||
747 | local function putop(ctx, text, operands) | ||
748 | local pos = ctx.pos | ||
749 | local extra = "" | ||
750 | if ctx.rel then | ||
751 | local sym = ctx.symtab[ctx.rel] | ||
752 | if sym then | ||
753 | extra = "\t->"..sym | ||
754 | end | ||
755 | end | ||
756 | if ctx.hexdump > 0 then | ||
757 | ctx.out(format("%08x %s %-5s %s%s\n", | ||
758 | ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra)) | ||
759 | else | ||
760 | ctx.out(format("%08x %-5s %s%s\n", | ||
761 | ctx.addr+pos, text, concat(operands, ", "), extra)) | ||
762 | end | ||
763 | ctx.pos = pos + 4 | ||
764 | end | ||
765 | |||
766 | -- Fallback for unknown opcodes. | ||
767 | local function unknown(ctx) | ||
768 | return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) | ||
769 | end | ||
770 | |||
771 | local function match_reg(p, pat, regnum) | ||
772 | return map_regs[match(pat, p.."%w-([xwds])")][regnum] | ||
773 | end | ||
774 | |||
775 | local function fmt_hex32(x) | ||
776 | if x < 0 then | ||
777 | return tohex(x) | ||
778 | else | ||
779 | return format("%x", x) | ||
780 | end | ||
781 | end | ||
782 | |||
783 | local imm13_rep = { 0x55555555, 0x11111111, 0x01010101, 0x00010001, 0x00000001 } | ||
784 | |||
785 | local function decode_imm13(op) | ||
786 | local imms = band(rshift(op, 10), 63) | ||
787 | local immr = band(rshift(op, 16), 63) | ||
788 | if band(op, 0x00400000) == 0 then | ||
789 | local len = 5 | ||
790 | if imms >= 56 then | ||
791 | if imms >= 60 then len = 1 else len = 2 end | ||
792 | elseif imms >= 48 then len = 3 elseif imms >= 32 then len = 4 end | ||
793 | local l = lshift(1, len)-1 | ||
794 | local s = band(imms, l) | ||
795 | local r = band(immr, l) | ||
796 | local imm = ror(rshift(-1, 31-s), r) | ||
797 | if len ~= 5 then imm = band(imm, lshift(1, l)-1) + rshift(imm, 31-l) end | ||
798 | imm = imm * imm13_rep[len] | ||
799 | local ix = fmt_hex32(imm) | ||
800 | if rshift(op, 31) ~= 0 then | ||
801 | return ix..tohex(imm) | ||
802 | else | ||
803 | return ix | ||
804 | end | ||
805 | else | ||
806 | local lo, hi = -1, 0 | ||
807 | if imms < 32 then lo = rshift(-1, 31-imms) else hi = rshift(-1, 63-imms) end | ||
808 | if immr ~= 0 then | ||
809 | lo, hi = ror(lo, immr), ror(hi, immr) | ||
810 | local x = immr == 32 and 0 or band(bxor(lo, hi), lshift(-1, 32-immr)) | ||
811 | lo, hi = bxor(lo, x), bxor(hi, x) | ||
812 | if immr >= 32 then lo, hi = hi, lo end | ||
813 | end | ||
814 | if hi ~= 0 then | ||
815 | return fmt_hex32(hi)..tohex(lo) | ||
816 | else | ||
817 | return fmt_hex32(lo) | ||
818 | end | ||
819 | end | ||
820 | end | ||
821 | |||
822 | local function parse_immpc(op, name) | ||
823 | if name == "b" or name == "bl" then | ||
824 | return arshift(lshift(op, 6), 4) | ||
825 | elseif name == "adr" or name == "adrp" then | ||
826 | local immlo = band(rshift(op, 29), 3) | ||
827 | local immhi = lshift(arshift(lshift(op, 8), 13), 2) | ||
828 | return bor(immhi, immlo) | ||
829 | elseif name == "tbz" or name == "tbnz" then | ||
830 | return lshift(arshift(lshift(op, 13), 18), 2) | ||
831 | else | ||
832 | return lshift(arshift(lshift(op, 8), 13), 2) | ||
833 | end | ||
834 | end | ||
835 | |||
836 | local function parse_fpimm8(op) | ||
837 | local sign = band(op, 0x100000) == 0 and 1 or -1 | ||
838 | local exp = bxor(rshift(arshift(lshift(op, 12), 5), 24), 0x80) - 131 | ||
839 | local frac = 16+band(rshift(op, 13), 15) | ||
840 | return sign * frac * 2^exp | ||
841 | end | ||
842 | |||
843 | local function prefer_bfx(sf, uns, imms, immr) | ||
844 | if imms < immr or imms == 31 or imms == 63 then | ||
845 | return false | ||
846 | end | ||
847 | if immr == 0 then | ||
848 | if sf == 0 and (imms == 7 or imms == 15) then | ||
849 | return false | ||
850 | end | ||
851 | if sf ~= 0 and uns == 0 and (imms == 7 or imms == 15 or imms == 31) then | ||
852 | return false | ||
853 | end | ||
854 | end | ||
855 | return true | ||
856 | end | ||
857 | |||
858 | -- Disassemble a single instruction. | ||
859 | local function disass_ins(ctx) | ||
860 | local pos = ctx.pos | ||
861 | local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) | ||
862 | local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) | ||
863 | local operands = {} | ||
864 | local suffix = "" | ||
865 | local last, name, pat | ||
866 | local map_reg | ||
867 | ctx.op = op | ||
868 | ctx.rel = nil | ||
869 | last = nil | ||
870 | local opat | ||
871 | opat = map_init[band(rshift(op, 25), 15)] | ||
872 | while type(opat) ~= "string" do | ||
873 | if not opat then return unknown(ctx) end | ||
874 | opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ | ||
875 | end | ||
876 | name, pat = match(opat, "^([a-z0-9]*)(.*)") | ||
877 | local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") | ||
878 | if altname then pat = pat2 end | ||
879 | if sub(pat, 1, 1) == "." then | ||
880 | local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)") | ||
881 | suffix = suffix..s2 | ||
882 | pat = p2 | ||
883 | end | ||
884 | |||
885 | local rt = match(pat, "[gf]") | ||
886 | if rt then | ||
887 | if rt == "g" then | ||
888 | map_reg = band(op, 0x80000000) ~= 0 and map_regs.x or map_regs.w | ||
889 | else | ||
890 | map_reg = band(op, 0x400000) ~= 0 and map_regs.d or map_regs.s | ||
891 | end | ||
892 | end | ||
893 | |||
894 | local second0, immr | ||
895 | |||
896 | for p in gmatch(pat, ".") do | ||
897 | local x = nil | ||
898 | if p == "D" then | ||
899 | local regnum = band(op, 31) | ||
900 | x = rt and map_reg[regnum] or match_reg(p, pat, regnum) | ||
901 | elseif p == "N" then | ||
902 | local regnum = band(rshift(op, 5), 31) | ||
903 | x = rt and map_reg[regnum] or match_reg(p, pat, regnum) | ||
904 | elseif p == "M" then | ||
905 | local regnum = band(rshift(op, 16), 31) | ||
906 | x = rt and map_reg[regnum] or match_reg(p, pat, regnum) | ||
907 | elseif p == "A" then | ||
908 | local regnum = band(rshift(op, 10), 31) | ||
909 | x = rt and map_reg[regnum] or match_reg(p, pat, regnum) | ||
910 | elseif p == "B" then | ||
911 | local addr = ctx.addr + pos + parse_immpc(op, name) | ||
912 | ctx.rel = addr | ||
913 | x = "0x"..tohex(addr) | ||
914 | elseif p == "T" then | ||
915 | x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31)) | ||
916 | elseif p == "V" then | ||
917 | x = band(op, 15) | ||
918 | elseif p == "C" then | ||
919 | x = map_cond[band(rshift(op, 12), 15)] | ||
920 | elseif p == "c" then | ||
921 | local rn = band(rshift(op, 5), 31) | ||
922 | local rm = band(rshift(op, 16), 31) | ||
923 | local cond = band(rshift(op, 12), 15) | ||
924 | local invc = bxor(cond, 1) | ||
925 | x = map_cond[cond] | ||
926 | if altname and cond ~= 14 and cond ~= 15 then | ||
927 | local a1, a2 = match(altname, "([^|]*)|(.*)") | ||
928 | if rn == rm then | ||
929 | local n = #operands | ||
930 | operands[n] = nil | ||
931 | x = map_cond[invc] | ||
932 | if rn ~= 31 then | ||
933 | if a1 then name = a1 else name = altname end | ||
934 | else | ||
935 | operands[n-1] = nil | ||
936 | name = a2 | ||
937 | end | ||
938 | end | ||
939 | end | ||
940 | elseif p == "W" then | ||
941 | x = band(rshift(op, 5), 0xffff) | ||
942 | elseif p == "Y" then | ||
943 | x = band(rshift(op, 5), 0xffff) | ||
944 | local hw = band(rshift(op, 21), 3) | ||
945 | if altname and (hw == 0 or x ~= 0) then | ||
946 | name = altname | ||
947 | end | ||
948 | elseif p == "L" then | ||
949 | local rn = map_regs.x[band(rshift(op, 5), 31)] | ||
950 | local imm9 = arshift(lshift(op, 11), 23) | ||
951 | if band(op, 0x800) ~= 0 then | ||
952 | x = "["..rn..", #"..imm9.."]!" | ||
953 | else | ||
954 | x = "["..rn.."], #"..imm9 | ||
955 | end | ||
956 | elseif p == "U" then | ||
957 | local rn = map_regs.x[band(rshift(op, 5), 31)] | ||
958 | local sz = band(rshift(op, 30), 3) | ||
959 | local imm12 = lshift(arshift(lshift(op, 10), 20), sz) | ||
960 | if imm12 ~= 0 then | ||
961 | x = "["..rn..", #"..imm12.."]" | ||
962 | else | ||
963 | x = "["..rn.."]" | ||
964 | end | ||
965 | elseif p == "K" then | ||
966 | local rn = map_regs.x[band(rshift(op, 5), 31)] | ||
967 | local imm9 = arshift(lshift(op, 11), 23) | ||
968 | if imm9 ~= 0 then | ||
969 | x = "["..rn..", #"..imm9.."]" | ||
970 | else | ||
971 | x = "["..rn.."]" | ||
972 | end | ||
973 | elseif p == "O" then | ||
974 | local rn, rm = map_regs.x[band(rshift(op, 5), 31)] | ||
975 | local m = band(rshift(op, 13), 1) | ||
976 | if m == 0 then | ||
977 | rm = map_regs.w[band(rshift(op, 16), 31)] | ||
978 | else | ||
979 | rm = map_regs.x[band(rshift(op, 16), 31)] | ||
980 | end | ||
981 | x = "["..rn..", "..rm | ||
982 | local opt = band(rshift(op, 13), 7) | ||
983 | local s = band(rshift(op, 12), 1) | ||
984 | local sz = band(rshift(op, 30), 3) | ||
985 | -- extension to be applied | ||
986 | if opt == 3 then | ||
987 | if s == 0 then x = x.."]" | ||
988 | else x = x..", lsl #"..sz.."]" end | ||
989 | elseif opt == 2 or opt == 6 or opt == 7 then | ||
990 | if s == 0 then x = x..", "..map_extend[opt].."]" | ||
991 | else x = x..", "..map_extend[opt].." #"..sz.."]" end | ||
992 | else | ||
993 | x = x.."]" | ||
994 | end | ||
995 | elseif p == "P" then | ||
996 | local opcv, sh = rshift(op, 26), 2 | ||
997 | if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end | ||
998 | local imm7 = lshift(arshift(lshift(op, 10), 25), sh) | ||
999 | local rn = map_regs.x[band(rshift(op, 5), 31)] | ||
1000 | local ind = band(rshift(op, 23), 3) | ||
1001 | if ind == 1 then | ||
1002 | x = "["..rn.."], #"..imm7 | ||
1003 | elseif ind == 2 then | ||
1004 | if imm7 == 0 then | ||
1005 | x = "["..rn.."]" | ||
1006 | else | ||
1007 | x = "["..rn..", #"..imm7.."]" | ||
1008 | end | ||
1009 | elseif ind == 3 then | ||
1010 | x = "["..rn..", #"..imm7.."]!" | ||
1011 | end | ||
1012 | elseif p == "I" then | ||
1013 | local shf = band(rshift(op, 22), 3) | ||
1014 | local imm12 = band(rshift(op, 10), 0x0fff) | ||
1015 | local rn, rd = band(rshift(op, 5), 31), band(op, 31) | ||
1016 | if altname == "mov" and shf == 0 and imm12 == 0 and (rn == 31 or rd == 31) then | ||
1017 | name = altname | ||
1018 | x = nil | ||
1019 | elseif shf == 0 then | ||
1020 | x = imm12 | ||
1021 | elseif shf == 1 then | ||
1022 | x = imm12..", lsl #12" | ||
1023 | end | ||
1024 | elseif p == "i" then | ||
1025 | x = "#0x"..decode_imm13(op) | ||
1026 | elseif p == "1" then | ||
1027 | immr = band(rshift(op, 16), 63) | ||
1028 | x = immr | ||
1029 | elseif p == "2" then | ||
1030 | x = band(rshift(op, 10), 63) | ||
1031 | if altname then | ||
1032 | local a1, a2, a3, a4, a5, a6 = | ||
1033 | match(altname, "([^|]*)|([^|]*)|([^|]*)|([^|]*)|([^|]*)|(.*)") | ||
1034 | local sf = band(rshift(op, 26), 32) | ||
1035 | local uns = band(rshift(op, 30), 1) | ||
1036 | if prefer_bfx(sf, uns, x, immr) then | ||
1037 | name = a2 | ||
1038 | x = x - immr + 1 | ||
1039 | elseif immr == 0 and x == 7 then | ||
1040 | local n = #operands | ||
1041 | operands[n] = nil | ||
1042 | if sf ~= 0 then | ||
1043 | operands[n-1] = gsub(operands[n-1], "x", "w") | ||
1044 | end | ||
1045 | last = operands[n-1] | ||
1046 | name = a6 | ||
1047 | x = nil | ||
1048 | elseif immr == 0 and x == 15 then | ||
1049 | local n = #operands | ||
1050 | operands[n] = nil | ||
1051 | if sf ~= 0 then | ||
1052 | operands[n-1] = gsub(operands[n-1], "x", "w") | ||
1053 | end | ||
1054 | last = operands[n-1] | ||
1055 | name = a5 | ||
1056 | x = nil | ||
1057 | elseif x == 31 or x == 63 then | ||
1058 | if x == 31 and immr == 0 and name == "sbfm" then | ||
1059 | name = a4 | ||
1060 | local n = #operands | ||
1061 | operands[n] = nil | ||
1062 | if sf ~= 0 then | ||
1063 | operands[n-1] = gsub(operands[n-1], "x", "w") | ||
1064 | end | ||
1065 | last = operands[n-1] | ||
1066 | else | ||
1067 | name = a3 | ||
1068 | end | ||
1069 | x = nil | ||
1070 | elseif band(x, 31) ~= 31 and immr == x+1 and name == "ubfm" then | ||
1071 | name = a4 | ||
1072 | last = "#"..(sf+32 - immr) | ||
1073 | operands[#operands] = last | ||
1074 | x = nil | ||
1075 | elseif x < immr then | ||
1076 | name = a1 | ||
1077 | last = "#"..(sf+32 - immr) | ||
1078 | operands[#operands] = last | ||
1079 | x = x + 1 | ||
1080 | end | ||
1081 | end | ||
1082 | elseif p == "3" then | ||
1083 | x = band(rshift(op, 10), 63) | ||
1084 | if altname then | ||
1085 | local a1, a2 = match(altname, "([^|]*)|(.*)") | ||
1086 | if x < immr then | ||
1087 | name = a1 | ||
1088 | local sf = band(rshift(op, 26), 32) | ||
1089 | last = "#"..(sf+32 - immr) | ||
1090 | operands[#operands] = last | ||
1091 | x = x + 1 | ||
1092 | else | ||
1093 | name = a2 | ||
1094 | x = x - immr + 1 | ||
1095 | end | ||
1096 | end | ||
1097 | elseif p == "4" then | ||
1098 | x = band(rshift(op, 10), 63) | ||
1099 | local rn = band(rshift(op, 5), 31) | ||
1100 | local rm = band(rshift(op, 16), 31) | ||
1101 | if altname and rn == rm then | ||
1102 | local n = #operands | ||
1103 | operands[n] = nil | ||
1104 | last = operands[n-1] | ||
1105 | name = altname | ||
1106 | end | ||
1107 | elseif p == "5" then | ||
1108 | x = band(rshift(op, 16), 31) | ||
1109 | elseif p == "S" then | ||
1110 | x = band(rshift(op, 10), 63) | ||
1111 | if x == 0 then x = nil | ||
1112 | else x = map_shift[band(rshift(op, 22), 3)].." #"..x end | ||
1113 | elseif p == "X" then | ||
1114 | local opt = band(rshift(op, 13), 7) | ||
1115 | -- Width specifier <R>. | ||
1116 | if opt ~= 3 and opt ~= 7 then | ||
1117 | last = map_regs.w[band(rshift(op, 16), 31)] | ||
1118 | operands[#operands] = last | ||
1119 | end | ||
1120 | x = band(rshift(op, 10), 7) | ||
1121 | -- Extension. | ||
1122 | if opt == 2 + band(rshift(op, 31), 1) and | ||
1123 | band(rshift(op, second0 and 5 or 0), 31) == 31 then | ||
1124 | if x == 0 then x = nil | ||
1125 | else x = "lsl #"..x end | ||
1126 | else | ||
1127 | if x == 0 then x = map_extend[band(rshift(op, 13), 7)] | ||
1128 | else x = map_extend[band(rshift(op, 13), 7)].." #"..x end | ||
1129 | end | ||
1130 | elseif p == "R" then | ||
1131 | x = band(rshift(op,21), 3) | ||
1132 | if x == 0 then x = nil | ||
1133 | else x = "lsl #"..x*16 end | ||
1134 | elseif p == "z" then | ||
1135 | local n = #operands | ||
1136 | if operands[n] == "sp" then operands[n] = "xzr" | ||
1137 | elseif operands[n] == "wsp" then operands[n] = "wzr" | ||
1138 | end | ||
1139 | elseif p == "Z" then | ||
1140 | x = 0 | ||
1141 | elseif p == "F" then | ||
1142 | x = parse_fpimm8(op) | ||
1143 | elseif p == "g" or p == "f" or p == "x" or p == "w" or | ||
1144 | p == "d" or p == "s" then | ||
1145 | -- These are handled in D/N/M/A. | ||
1146 | elseif p == "0" then | ||
1147 | if last == "sp" or last == "wsp" then | ||
1148 | local n = #operands | ||
1149 | operands[n] = nil | ||
1150 | last = operands[n-1] | ||
1151 | if altname then | ||
1152 | local a1, a2 = match(altname, "([^|]*)|(.*)") | ||
1153 | if not a1 then | ||
1154 | name = altname | ||
1155 | elseif second0 then | ||
1156 | name, altname = a2, a1 | ||
1157 | else | ||
1158 | name, altname = a1, a2 | ||
1159 | end | ||
1160 | end | ||
1161 | end | ||
1162 | second0 = true | ||
1163 | else | ||
1164 | assert(false) | ||
1165 | end | ||
1166 | if x then | ||
1167 | last = x | ||
1168 | if type(x) == "number" then x = "#"..x end | ||
1169 | operands[#operands+1] = x | ||
1170 | end | ||
1171 | end | ||
1172 | |||
1173 | return putop(ctx, name..suffix, operands) | ||
1174 | end | ||
1175 | |||
1176 | ------------------------------------------------------------------------------ | ||
1177 | |||
1178 | -- Disassemble a block of code. | ||
1179 | local function disass_block(ctx, ofs, len) | ||
1180 | if not ofs then ofs = 0 end | ||
1181 | local stop = len and ofs+len or #ctx.code | ||
1182 | ctx.pos = ofs | ||
1183 | ctx.rel = nil | ||
1184 | while ctx.pos < stop do disass_ins(ctx) end | ||
1185 | end | ||
1186 | |||
1187 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). | ||
1188 | local function create(code, addr, out) | ||
1189 | local ctx = {} | ||
1190 | ctx.code = code | ||
1191 | ctx.addr = addr or 0 | ||
1192 | ctx.out = out or io.write | ||
1193 | ctx.symtab = {} | ||
1194 | ctx.disass = disass_block | ||
1195 | ctx.hexdump = 8 | ||
1196 | return ctx | ||
1197 | end | ||
1198 | |||
1199 | -- Simple API: disassemble code (a string) at address and output via out. | ||
1200 | local function disass(code, addr, out) | ||
1201 | create(code, addr, out):disass() | ||
1202 | end | ||
1203 | |||
1204 | -- Return register name for RID. | ||
1205 | local function regname(r) | ||
1206 | if r < 32 then return map_regs.x[r] end | ||
1207 | return map_regs.d[r-32] | ||
1208 | end | ||
1209 | |||
1210 | -- Public module functions. | ||
1211 | return { | ||
1212 | create = create, | ||
1213 | disass = disass, | ||
1214 | regname = regname | ||
1215 | } | ||
1216 | |||
diff --git a/src/jit/dis_arm64be.lua b/src/jit/dis_arm64be.lua new file mode 100644 index 00000000..7337f5b7 --- /dev/null +++ b/src/jit/dis_arm64be.lua | |||
@@ -0,0 +1,12 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT ARM64BE disassembler wrapper module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2022 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- ARM64 instructions are always little-endian. So just forward to the | ||
8 | -- common ARM64 disassembler module. All the interesting stuff is there. | ||
9 | ------------------------------------------------------------------------------ | ||
10 | |||
11 | return require((string.match(..., ".*%.") or "").."dis_arm64") | ||
12 | |||
diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua index ed65702a..05dc30fd 100644 --- a/src/jit/dis_mips.lua +++ b/src/jit/dis_mips.lua | |||
@@ -19,13 +19,34 @@ local band, bor, tohex = bit.band, bit.bor, bit.tohex | |||
19 | local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift | 19 | local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift |
20 | 20 | ||
21 | ------------------------------------------------------------------------------ | 21 | ------------------------------------------------------------------------------ |
22 | -- Primary and extended opcode maps | 22 | -- Extended opcode maps common to all MIPS releases |
23 | ------------------------------------------------------------------------------ | 23 | ------------------------------------------------------------------------------ |
24 | 24 | ||
25 | local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", } | ||
26 | local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", } | 25 | local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", } |
27 | local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", } | 26 | local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", } |
28 | 27 | ||
28 | local map_cop0 = { | ||
29 | shift = 25, mask = 1, | ||
30 | [0] = { | ||
31 | shift = 21, mask = 15, | ||
32 | [0] = "mfc0TDW", [4] = "mtc0TDW", | ||
33 | [10] = "rdpgprDT", | ||
34 | [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", }, | ||
35 | [14] = "wrpgprDT", | ||
36 | }, { | ||
37 | shift = 0, mask = 63, | ||
38 | [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp", | ||
39 | [24] = "eret", [31] = "deret", | ||
40 | [32] = "wait", | ||
41 | }, | ||
42 | } | ||
43 | |||
44 | ------------------------------------------------------------------------------ | ||
45 | -- Primary and extended opcode maps for MIPS R1-R5 | ||
46 | ------------------------------------------------------------------------------ | ||
47 | |||
48 | local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", } | ||
49 | |||
29 | local map_special = { | 50 | local map_special = { |
30 | shift = 0, mask = 63, | 51 | shift = 0, mask = 63, |
31 | [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, | 52 | [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, |
@@ -34,15 +55,17 @@ local map_special = { | |||
34 | "jrS", "jalrD1S", "movzDST", "movnDST", | 55 | "jrS", "jalrD1S", "movzDST", "movnDST", |
35 | "syscallY", "breakY", false, "sync", | 56 | "syscallY", "breakY", false, "sync", |
36 | "mfhiD", "mthiS", "mfloD", "mtloS", | 57 | "mfhiD", "mthiS", "mfloD", "mtloS", |
37 | false, false, false, false, | 58 | "dsllvDST", false, "dsrlvDST", "dsravDST", |
38 | "multST", "multuST", "divST", "divuST", | 59 | "multST", "multuST", "divST", "divuST", |
39 | false, false, false, false, | 60 | "dmultST", "dmultuST", "ddivST", "ddivuST", |
40 | "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", | 61 | "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", |
41 | "andDST", "orDST", "xorDST", "nor|notDST0", | 62 | "andDST", "or|moveDST0", "xorDST", "nor|notDST0", |
42 | false, false, "sltDST", "sltuDST", | 63 | false, false, "sltDST", "sltuDST", |
43 | false, false, false, false, | 64 | "daddDST", "dadduDST", "dsubDST", "dsubuDST", |
44 | "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", | 65 | "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", |
45 | "teqSTZ", false, "tneSTZ", | 66 | "teqSTZ", false, "tneSTZ", false, |
67 | "dsllDTA", false, "dsrlDTA", "dsraDTA", | ||
68 | "dsll32DTA", false, "dsrl32DTA", "dsra32DTA", | ||
46 | } | 69 | } |
47 | 70 | ||
48 | local map_special2 = { | 71 | local map_special2 = { |
@@ -60,11 +83,17 @@ local map_bshfl = { | |||
60 | [24] = "sehDT", | 83 | [24] = "sehDT", |
61 | } | 84 | } |
62 | 85 | ||
86 | local map_dbshfl = { | ||
87 | shift = 6, mask = 31, | ||
88 | [2] = "dsbhDT", | ||
89 | [5] = "dshdDT", | ||
90 | } | ||
91 | |||
63 | local map_special3 = { | 92 | local map_special3 = { |
64 | shift = 0, mask = 63, | 93 | shift = 0, mask = 63, |
65 | [0] = "extTSAK", [4] = "insTSAL", | 94 | [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK", |
66 | [32] = map_bshfl, | 95 | [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL", |
67 | [59] = "rdhwrTD", | 96 | [32] = map_bshfl, [36] = map_dbshfl, [59] = "rdhwrTD", |
68 | } | 97 | } |
69 | 98 | ||
70 | local map_regimm = { | 99 | local map_regimm = { |
@@ -79,22 +108,6 @@ local map_regimm = { | |||
79 | false, false, false, "synciSO", | 108 | false, false, false, "synciSO", |
80 | } | 109 | } |
81 | 110 | ||
82 | local map_cop0 = { | ||
83 | shift = 25, mask = 1, | ||
84 | [0] = { | ||
85 | shift = 21, mask = 15, | ||
86 | [0] = "mfc0TDW", [4] = "mtc0TDW", | ||
87 | [10] = "rdpgprDT", | ||
88 | [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", }, | ||
89 | [14] = "wrpgprDT", | ||
90 | }, { | ||
91 | shift = 0, mask = 63, | ||
92 | [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp", | ||
93 | [24] = "eret", [31] = "deret", | ||
94 | [32] = "wait", | ||
95 | }, | ||
96 | } | ||
97 | |||
98 | local map_cop1s = { | 111 | local map_cop1s = { |
99 | shift = 0, mask = 63, | 112 | shift = 0, mask = 63, |
100 | [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", | 113 | [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", |
@@ -178,8 +191,8 @@ local map_cop1bc = { | |||
178 | 191 | ||
179 | local map_cop1 = { | 192 | local map_cop1 = { |
180 | shift = 21, mask = 31, | 193 | shift = 21, mask = 31, |
181 | [0] = "mfc1TG", false, "cfc1TG", "mfhc1TG", | 194 | [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG", |
182 | "mtc1TG", false, "ctc1TG", "mthc1TG", | 195 | "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG", |
183 | map_cop1bc, false, false, false, | 196 | map_cop1bc, false, false, false, |
184 | false, false, false, false, | 197 | false, false, false, false, |
185 | map_cop1s, map_cop1d, false, false, | 198 | map_cop1s, map_cop1d, false, false, |
@@ -213,16 +226,218 @@ local map_pri = { | |||
213 | "andiTSU", "ori|liTS0U", "xoriTSU", "luiTU", | 226 | "andiTSU", "ori|liTS0U", "xoriTSU", "luiTU", |
214 | map_cop0, map_cop1, false, map_cop1x, | 227 | map_cop0, map_cop1, false, map_cop1x, |
215 | "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB", | 228 | "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB", |
216 | false, false, false, false, | 229 | "daddiTSI", "daddiuTSI", false, false, |
217 | map_special2, false, false, map_special3, | 230 | map_special2, "jalxJ", false, map_special3, |
218 | "lbTSO", "lhTSO", "lwlTSO", "lwTSO", | 231 | "lbTSO", "lhTSO", "lwlTSO", "lwTSO", |
219 | "lbuTSO", "lhuTSO", "lwrTSO", false, | 232 | "lbuTSO", "lhuTSO", "lwrTSO", false, |
220 | "sbTSO", "shTSO", "swlTSO", "swTSO", | 233 | "sbTSO", "shTSO", "swlTSO", "swTSO", |
221 | false, false, "swrTSO", "cacheNSO", | 234 | false, false, "swrTSO", "cacheNSO", |
222 | "llTSO", "lwc1HSO", "lwc2TSO", "prefNSO", | 235 | "llTSO", "lwc1HSO", "lwc2TSO", "prefNSO", |
223 | false, "ldc1HSO", "ldc2TSO", false, | 236 | false, "ldc1HSO", "ldc2TSO", "ldTSO", |
224 | "scTSO", "swc1HSO", "swc2TSO", false, | 237 | "scTSO", "swc1HSO", "swc2TSO", false, |
225 | false, "sdc1HSO", "sdc2TSO", false, | 238 | false, "sdc1HSO", "sdc2TSO", "sdTSO", |
239 | } | ||
240 | |||
241 | ------------------------------------------------------------------------------ | ||
242 | -- Primary and extended opcode maps for MIPS R6 | ||
243 | ------------------------------------------------------------------------------ | ||
244 | |||
245 | local map_mul_r6 = { shift = 6, mask = 3, [2] = "mulDST", [3] = "muhDST" } | ||
246 | local map_mulu_r6 = { shift = 6, mask = 3, [2] = "muluDST", [3] = "muhuDST" } | ||
247 | local map_div_r6 = { shift = 6, mask = 3, [2] = "divDST", [3] = "modDST" } | ||
248 | local map_divu_r6 = { shift = 6, mask = 3, [2] = "divuDST", [3] = "moduDST" } | ||
249 | local map_dmul_r6 = { shift = 6, mask = 3, [2] = "dmulDST", [3] = "dmuhDST" } | ||
250 | local map_dmulu_r6 = { shift = 6, mask = 3, [2] = "dmuluDST", [3] = "dmuhuDST" } | ||
251 | local map_ddiv_r6 = { shift = 6, mask = 3, [2] = "ddivDST", [3] = "dmodDST" } | ||
252 | local map_ddivu_r6 = { shift = 6, mask = 3, [2] = "ddivuDST", [3] = "dmoduDST" } | ||
253 | |||
254 | local map_special_r6 = { | ||
255 | shift = 0, mask = 63, | ||
256 | [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, | ||
257 | false, map_srl, "sraDTA", | ||
258 | "sllvDTS", false, map_srlv, "sravDTS", | ||
259 | "jrS", "jalrD1S", false, false, | ||
260 | "syscallY", "breakY", false, "sync", | ||
261 | "clzDS", "cloDS", "dclzDS", "dcloDS", | ||
262 | "dsllvDST", "dlsaDSTA", "dsrlvDST", "dsravDST", | ||
263 | map_mul_r6, map_mulu_r6, map_div_r6, map_divu_r6, | ||
264 | map_dmul_r6, map_dmulu_r6, map_ddiv_r6, map_ddivu_r6, | ||
265 | "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", | ||
266 | "andDST", "or|moveDST0", "xorDST", "nor|notDST0", | ||
267 | false, false, "sltDST", "sltuDST", | ||
268 | "daddDST", "dadduDST", "dsubDST", "dsubuDST", | ||
269 | "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", | ||
270 | "teqSTZ", "seleqzDST", "tneSTZ", "selnezDST", | ||
271 | "dsllDTA", false, "dsrlDTA", "dsraDTA", | ||
272 | "dsll32DTA", false, "dsrl32DTA", "dsra32DTA", | ||
273 | } | ||
274 | |||
275 | local map_bshfl_r6 = { | ||
276 | shift = 9, mask = 3, | ||
277 | [1] = "alignDSTa", | ||
278 | _ = { | ||
279 | shift = 6, mask = 31, | ||
280 | [0] = "bitswapDT", | ||
281 | [2] = "wsbhDT", | ||
282 | [16] = "sebDT", | ||
283 | [24] = "sehDT", | ||
284 | } | ||
285 | } | ||
286 | |||
287 | local map_dbshfl_r6 = { | ||
288 | shift = 9, mask = 3, | ||
289 | [1] = "dalignDSTa", | ||
290 | _ = { | ||
291 | shift = 6, mask = 31, | ||
292 | [0] = "dbitswapDT", | ||
293 | [2] = "dsbhDT", | ||
294 | [5] = "dshdDT", | ||
295 | } | ||
296 | } | ||
297 | |||
298 | local map_special3_r6 = { | ||
299 | shift = 0, mask = 63, | ||
300 | [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK", | ||
301 | [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL", | ||
302 | [32] = map_bshfl_r6, [36] = map_dbshfl_r6, [59] = "rdhwrTD", | ||
303 | } | ||
304 | |||
305 | local map_regimm_r6 = { | ||
306 | shift = 16, mask = 31, | ||
307 | [0] = "bltzSB", [1] = "bgezSB", | ||
308 | [6] = "dahiSI", [30] = "datiSI", | ||
309 | [23] = "sigrieI", [31] = "synciSO", | ||
310 | } | ||
311 | |||
312 | local map_pcrel_r6 = { | ||
313 | shift = 19, mask = 3, | ||
314 | [0] = "addiupcS2", "lwpcS2", "lwupcS2", { | ||
315 | shift = 18, mask = 1, | ||
316 | [0] = "ldpcS3", { shift = 16, mask = 3, [2] = "auipcSI", [3] = "aluipcSI" } | ||
317 | } | ||
318 | } | ||
319 | |||
320 | local map_cop1s_r6 = { | ||
321 | shift = 0, mask = 63, | ||
322 | [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", | ||
323 | "sqrt.sFG", "abs.sFG", "mov.sFG", "neg.sFG", | ||
324 | "round.l.sFG", "trunc.l.sFG", "ceil.l.sFG", "floor.l.sFG", | ||
325 | "round.w.sFG", "trunc.w.sFG", "ceil.w.sFG", "floor.w.sFG", | ||
326 | "sel.sFGH", false, false, false, | ||
327 | "seleqz.sFGH", "recip.sFG", "rsqrt.sFG", "selnez.sFGH", | ||
328 | "maddf.sFGH", "msubf.sFGH", "rint.sFG", "class.sFG", | ||
329 | "min.sFGH", "mina.sFGH", "max.sFGH", "maxa.sFGH", | ||
330 | false, "cvt.d.sFG", false, false, | ||
331 | "cvt.w.sFG", "cvt.l.sFG", | ||
332 | } | ||
333 | |||
334 | local map_cop1d_r6 = { | ||
335 | shift = 0, mask = 63, | ||
336 | [0] = "add.dFGH", "sub.dFGH", "mul.dFGH", "div.dFGH", | ||
337 | "sqrt.dFG", "abs.dFG", "mov.dFG", "neg.dFG", | ||
338 | "round.l.dFG", "trunc.l.dFG", "ceil.l.dFG", "floor.l.dFG", | ||
339 | "round.w.dFG", "trunc.w.dFG", "ceil.w.dFG", "floor.w.dFG", | ||
340 | "sel.dFGH", false, false, false, | ||
341 | "seleqz.dFGH", "recip.dFG", "rsqrt.dFG", "selnez.dFGH", | ||
342 | "maddf.dFGH", "msubf.dFGH", "rint.dFG", "class.dFG", | ||
343 | "min.dFGH", "mina.dFGH", "max.dFGH", "maxa.dFGH", | ||
344 | "cvt.s.dFG", false, false, false, | ||
345 | "cvt.w.dFG", "cvt.l.dFG", | ||
346 | } | ||
347 | |||
348 | local map_cop1w_r6 = { | ||
349 | shift = 0, mask = 63, | ||
350 | [0] = "cmp.af.sFGH", "cmp.un.sFGH", "cmp.eq.sFGH", "cmp.ueq.sFGH", | ||
351 | "cmp.lt.sFGH", "cmp.ult.sFGH", "cmp.le.sFGH", "cmp.ule.sFGH", | ||
352 | "cmp.saf.sFGH", "cmp.sun.sFGH", "cmp.seq.sFGH", "cmp.sueq.sFGH", | ||
353 | "cmp.slt.sFGH", "cmp.sult.sFGH", "cmp.sle.sFGH", "cmp.sule.sFGH", | ||
354 | false, "cmp.or.sFGH", "cmp.une.sFGH", "cmp.ne.sFGH", | ||
355 | false, false, false, false, | ||
356 | false, "cmp.sor.sFGH", "cmp.sune.sFGH", "cmp.sne.sFGH", | ||
357 | false, false, false, false, | ||
358 | "cvt.s.wFG", "cvt.d.wFG", | ||
359 | } | ||
360 | |||
361 | local map_cop1l_r6 = { | ||
362 | shift = 0, mask = 63, | ||
363 | [0] = "cmp.af.dFGH", "cmp.un.dFGH", "cmp.eq.dFGH", "cmp.ueq.dFGH", | ||
364 | "cmp.lt.dFGH", "cmp.ult.dFGH", "cmp.le.dFGH", "cmp.ule.dFGH", | ||
365 | "cmp.saf.dFGH", "cmp.sun.dFGH", "cmp.seq.dFGH", "cmp.sueq.dFGH", | ||
366 | "cmp.slt.dFGH", "cmp.sult.dFGH", "cmp.sle.dFGH", "cmp.sule.dFGH", | ||
367 | false, "cmp.or.dFGH", "cmp.une.dFGH", "cmp.ne.dFGH", | ||
368 | false, false, false, false, | ||
369 | false, "cmp.sor.dFGH", "cmp.sune.dFGH", "cmp.sne.dFGH", | ||
370 | false, false, false, false, | ||
371 | "cvt.s.lFG", "cvt.d.lFG", | ||
372 | } | ||
373 | |||
374 | local map_cop1_r6 = { | ||
375 | shift = 21, mask = 31, | ||
376 | [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG", | ||
377 | "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG", | ||
378 | false, "bc1eqzHB", false, false, | ||
379 | false, "bc1nezHB", false, false, | ||
380 | map_cop1s_r6, map_cop1d_r6, false, false, | ||
381 | map_cop1w_r6, map_cop1l_r6, | ||
382 | } | ||
383 | |||
384 | local function maprs_popTS(rs, rt) | ||
385 | if rt == 0 then return 0 elseif rs == 0 then return 1 | ||
386 | elseif rs == rt then return 2 else return 3 end | ||
387 | end | ||
388 | |||
389 | local map_pop06_r6 = { | ||
390 | maprs = maprs_popTS, [0] = "blezSB", "blezalcTB", "bgezalcTB", "bgeucSTB" | ||
391 | } | ||
392 | local map_pop07_r6 = { | ||
393 | maprs = maprs_popTS, [0] = "bgtzSB", "bgtzalcTB", "bltzalcTB", "bltucSTB" | ||
394 | } | ||
395 | local map_pop26_r6 = { | ||
396 | maprs = maprs_popTS, "blezcTB", "bgezcTB", "bgecSTB" | ||
397 | } | ||
398 | local map_pop27_r6 = { | ||
399 | maprs = maprs_popTS, "bgtzcTB", "bltzcTB", "bltcSTB" | ||
400 | } | ||
401 | |||
402 | local function maprs_popS(rs, rt) | ||
403 | if rs == 0 then return 0 else return 1 end | ||
404 | end | ||
405 | |||
406 | local map_pop66_r6 = { | ||
407 | maprs = maprs_popS, [0] = "jicTI", "beqzcSb" | ||
408 | } | ||
409 | local map_pop76_r6 = { | ||
410 | maprs = maprs_popS, [0] = "jialcTI", "bnezcSb" | ||
411 | } | ||
412 | |||
413 | local function maprs_popST(rs, rt) | ||
414 | if rs >= rt then return 0 elseif rs == 0 then return 1 else return 2 end | ||
415 | end | ||
416 | |||
417 | local map_pop10_r6 = { | ||
418 | maprs = maprs_popST, [0] = "bovcSTB", "beqzalcTB", "beqcSTB" | ||
419 | } | ||
420 | local map_pop30_r6 = { | ||
421 | maprs = maprs_popST, [0] = "bnvcSTB", "bnezalcTB", "bnecSTB" | ||
422 | } | ||
423 | |||
424 | local map_pri_r6 = { | ||
425 | [0] = map_special_r6, map_regimm_r6, "jJ", "jalJ", | ||
426 | "beq|beqz|bST00B", "bne|bnezST0B", map_pop06_r6, map_pop07_r6, | ||
427 | map_pop10_r6, "addiu|liTS0I", "sltiTSI", "sltiuTSI", | ||
428 | "andiTSU", "ori|liTS0U", "xoriTSU", "aui|luiTS0U", | ||
429 | map_cop0, map_cop1_r6, false, false, | ||
430 | false, false, map_pop26_r6, map_pop27_r6, | ||
431 | map_pop30_r6, "daddiuTSI", false, false, | ||
432 | false, "dauiTSI", false, map_special3_r6, | ||
433 | "lbTSO", "lhTSO", false, "lwTSO", | ||
434 | "lbuTSO", "lhuTSO", false, false, | ||
435 | "sbTSO", "shTSO", false, "swTSO", | ||
436 | false, false, false, false, | ||
437 | false, "lwc1HSO", "bc#", false, | ||
438 | false, "ldc1HSO", map_pop66_r6, "ldTSO", | ||
439 | false, "swc1HSO", "balc#", map_pcrel_r6, | ||
440 | false, "sdc1HSO", map_pop76_r6, "sdTSO", | ||
226 | } | 441 | } |
227 | 442 | ||
228 | ------------------------------------------------------------------------------ | 443 | ------------------------------------------------------------------------------ |
@@ -279,10 +494,14 @@ local function disass_ins(ctx) | |||
279 | ctx.op = op | 494 | ctx.op = op |
280 | ctx.rel = nil | 495 | ctx.rel = nil |
281 | 496 | ||
282 | local opat = map_pri[rshift(op, 26)] | 497 | local opat = ctx.map_pri[rshift(op, 26)] |
283 | while type(opat) ~= "string" do | 498 | while type(opat) ~= "string" do |
284 | if not opat then return unknown(ctx) end | 499 | if not opat then return unknown(ctx) end |
285 | opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ | 500 | if opat.maprs then |
501 | opat = opat[opat.maprs(band(rshift(op,21),31), band(rshift(op,16),31))] | ||
502 | else | ||
503 | opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ | ||
504 | end | ||
286 | end | 505 | end |
287 | local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") | 506 | local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") |
288 | local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") | 507 | local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") |
@@ -306,6 +525,10 @@ local function disass_ins(ctx) | |||
306 | x = "f"..band(rshift(op, 21), 31) | 525 | x = "f"..band(rshift(op, 21), 31) |
307 | elseif p == "A" then | 526 | elseif p == "A" then |
308 | x = band(rshift(op, 6), 31) | 527 | x = band(rshift(op, 6), 31) |
528 | elseif p == "a" then | ||
529 | x = band(rshift(op, 6), 7) | ||
530 | elseif p == "E" then | ||
531 | x = band(rshift(op, 6), 31) + 32 | ||
309 | elseif p == "M" then | 532 | elseif p == "M" then |
310 | x = band(rshift(op, 11), 31) | 533 | x = band(rshift(op, 11), 31) |
311 | elseif p == "N" then | 534 | elseif p == "N" then |
@@ -315,10 +538,18 @@ local function disass_ins(ctx) | |||
315 | if x == 0 then x = nil end | 538 | if x == 0 then x = nil end |
316 | elseif p == "K" then | 539 | elseif p == "K" then |
317 | x = band(rshift(op, 11), 31) + 1 | 540 | x = band(rshift(op, 11), 31) + 1 |
541 | elseif p == "P" then | ||
542 | x = band(rshift(op, 11), 31) + 33 | ||
318 | elseif p == "L" then | 543 | elseif p == "L" then |
319 | x = band(rshift(op, 11), 31) - last + 1 | 544 | x = band(rshift(op, 11), 31) - last + 1 |
545 | elseif p == "Q" then | ||
546 | x = band(rshift(op, 11), 31) - last + 33 | ||
320 | elseif p == "I" then | 547 | elseif p == "I" then |
321 | x = arshift(lshift(op, 16), 16) | 548 | x = arshift(lshift(op, 16), 16) |
549 | elseif p == "2" then | ||
550 | x = arshift(lshift(op, 13), 11) | ||
551 | elseif p == "3" then | ||
552 | x = arshift(lshift(op, 14), 11) | ||
322 | elseif p == "U" then | 553 | elseif p == "U" then |
323 | x = band(op, 0xffff) | 554 | x = band(op, 0xffff) |
324 | elseif p == "O" then | 555 | elseif p == "O" then |
@@ -328,13 +559,22 @@ local function disass_ins(ctx) | |||
328 | local index = map_gpr[band(rshift(op, 16), 31)] | 559 | local index = map_gpr[band(rshift(op, 16), 31)] |
329 | operands[#operands] = format("%s(%s)", index, last) | 560 | operands[#operands] = format("%s(%s)", index, last) |
330 | elseif p == "B" then | 561 | elseif p == "B" then |
331 | x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4 | 562 | x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 14) + 4 |
563 | ctx.rel = x | ||
564 | x = format("0x%08x", x) | ||
565 | elseif p == "b" then | ||
566 | x = ctx.addr + ctx.pos + arshift(lshift(op, 11), 9) + 4 | ||
332 | ctx.rel = x | 567 | ctx.rel = x |
333 | x = "0x"..tohex(x) | 568 | x = format("0x%08x", x) |
569 | elseif p == "#" then | ||
570 | x = ctx.addr + ctx.pos + arshift(lshift(op, 6), 4) + 4 | ||
571 | ctx.rel = x | ||
572 | x = format("0x%08x", x) | ||
334 | elseif p == "J" then | 573 | elseif p == "J" then |
335 | x = band(ctx.addr + ctx.pos, 0xf0000000) + band(op, 0x03ffffff)*4 | 574 | local a = ctx.addr + ctx.pos |
575 | x = a - band(a, 0x0fffffff) + band(op, 0x03ffffff)*4 | ||
336 | ctx.rel = x | 576 | ctx.rel = x |
337 | x = "0x"..tohex(x) | 577 | x = format("0x%08x", x) |
338 | elseif p == "V" then | 578 | elseif p == "V" then |
339 | x = band(rshift(op, 8), 7) | 579 | x = band(rshift(op, 8), 7) |
340 | if x == 0 then x = nil end | 580 | if x == 0 then x = nil end |
@@ -384,7 +624,7 @@ local function disass_block(ctx, ofs, len) | |||
384 | end | 624 | end |
385 | 625 | ||
386 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). | 626 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). |
387 | local function create_(code, addr, out) | 627 | local function create(code, addr, out) |
388 | local ctx = {} | 628 | local ctx = {} |
389 | ctx.code = code | 629 | ctx.code = code |
390 | ctx.addr = addr or 0 | 630 | ctx.addr = addr or 0 |
@@ -393,36 +633,62 @@ local function create_(code, addr, out) | |||
393 | ctx.disass = disass_block | 633 | ctx.disass = disass_block |
394 | ctx.hexdump = 8 | 634 | ctx.hexdump = 8 |
395 | ctx.get = get_be | 635 | ctx.get = get_be |
636 | ctx.map_pri = map_pri | ||
637 | return ctx | ||
638 | end | ||
639 | |||
640 | local function create_el(code, addr, out) | ||
641 | local ctx = create(code, addr, out) | ||
642 | ctx.get = get_le | ||
643 | return ctx | ||
644 | end | ||
645 | |||
646 | local function create_r6(code, addr, out) | ||
647 | local ctx = create(code, addr, out) | ||
648 | ctx.map_pri = map_pri_r6 | ||
396 | return ctx | 649 | return ctx |
397 | end | 650 | end |
398 | 651 | ||
399 | local function create_el_(code, addr, out) | 652 | local function create_r6_el(code, addr, out) |
400 | local ctx = create_(code, addr, out) | 653 | local ctx = create(code, addr, out) |
401 | ctx.get = get_le | 654 | ctx.get = get_le |
655 | ctx.map_pri = map_pri_r6 | ||
402 | return ctx | 656 | return ctx |
403 | end | 657 | end |
404 | 658 | ||
405 | -- Simple API: disassemble code (a string) at address and output via out. | 659 | -- Simple API: disassemble code (a string) at address and output via out. |
406 | local function disass_(code, addr, out) | 660 | local function disass(code, addr, out) |
407 | create_(code, addr, out):disass() | 661 | create(code, addr, out):disass() |
662 | end | ||
663 | |||
664 | local function disass_el(code, addr, out) | ||
665 | create_el(code, addr, out):disass() | ||
408 | end | 666 | end |
409 | 667 | ||
410 | local function disass_el_(code, addr, out) | 668 | local function disass_r6(code, addr, out) |
411 | create_el_(code, addr, out):disass() | 669 | create_r6(code, addr, out):disass() |
670 | end | ||
671 | |||
672 | local function disass_r6_el(code, addr, out) | ||
673 | create_r6_el(code, addr, out):disass() | ||
412 | end | 674 | end |
413 | 675 | ||
414 | -- Return register name for RID. | 676 | -- Return register name for RID. |
415 | local function regname_(r) | 677 | local function regname(r) |
416 | if r < 32 then return map_gpr[r] end | 678 | if r < 32 then return map_gpr[r] end |
417 | return "f"..(r-32) | 679 | return "f"..(r-32) |
418 | end | 680 | end |
419 | 681 | ||
420 | -- Public module functions. | 682 | -- Public module functions. |
421 | module(...) | 683 | return { |
422 | 684 | create = create, | |
423 | create = create_ | 685 | create_el = create_el, |
424 | create_el = create_el_ | 686 | create_r6 = create_r6, |
425 | disass = disass_ | 687 | create_r6_el = create_r6_el, |
426 | disass_el = disass_el_ | 688 | disass = disass, |
427 | regname = regname_ | 689 | disass_el = disass_el, |
690 | disass_r6 = disass_r6, | ||
691 | disass_r6_el = disass_r6_el, | ||
692 | regname = regname | ||
693 | } | ||
428 | 694 | ||
diff --git a/src/jit/dis_mips64.lua b/src/jit/dis_mips64.lua new file mode 100644 index 00000000..1236e524 --- /dev/null +++ b/src/jit/dis_mips64.lua | |||
@@ -0,0 +1,17 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT MIPS64 disassembler wrapper module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2022 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- This module just exports the big-endian functions from the | ||
8 | -- MIPS disassembler module. All the interesting stuff is there. | ||
9 | ------------------------------------------------------------------------------ | ||
10 | |||
11 | local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") | ||
12 | return { | ||
13 | create = dis_mips.create, | ||
14 | disass = dis_mips.disass, | ||
15 | regname = dis_mips.regname | ||
16 | } | ||
17 | |||
diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64el.lua new file mode 100644 index 00000000..7c478d2d --- /dev/null +++ b/src/jit/dis_mips64el.lua | |||
@@ -0,0 +1,17 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT MIPS64EL disassembler wrapper module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2022 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- This module just exports the little-endian functions from the | ||
8 | -- MIPS disassembler module. All the interesting stuff is there. | ||
9 | ------------------------------------------------------------------------------ | ||
10 | |||
11 | local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") | ||
12 | return { | ||
13 | create = dis_mips.create_el, | ||
14 | disass = dis_mips.disass_el, | ||
15 | regname = dis_mips.regname | ||
16 | } | ||
17 | |||
diff --git a/src/jit/dis_mips64r6.lua b/src/jit/dis_mips64r6.lua new file mode 100644 index 00000000..c5789ce4 --- /dev/null +++ b/src/jit/dis_mips64r6.lua | |||
@@ -0,0 +1,17 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT MIPS64R6 disassembler wrapper module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2022 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- This module just exports the r6 big-endian functions from the | ||
8 | -- MIPS disassembler module. All the interesting stuff is there. | ||
9 | ------------------------------------------------------------------------------ | ||
10 | |||
11 | local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") | ||
12 | return { | ||
13 | create = dis_mips.create_r6, | ||
14 | disass = dis_mips.disass_r6, | ||
15 | regname = dis_mips.regname | ||
16 | } | ||
17 | |||
diff --git a/src/jit/dis_mips64r6el.lua b/src/jit/dis_mips64r6el.lua new file mode 100644 index 00000000..f67f6240 --- /dev/null +++ b/src/jit/dis_mips64r6el.lua | |||
@@ -0,0 +1,17 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT MIPS64R6EL disassembler wrapper module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2022 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- This module just exports the r6 little-endian functions from the | ||
8 | -- MIPS disassembler module. All the interesting stuff is there. | ||
9 | ------------------------------------------------------------------------------ | ||
10 | |||
11 | local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") | ||
12 | return { | ||
13 | create = dis_mips.create_r6_el, | ||
14 | disass = dis_mips.disass_r6_el, | ||
15 | regname = dis_mips.regname | ||
16 | } | ||
17 | |||
diff --git a/src/jit/dis_mipsel.lua b/src/jit/dis_mipsel.lua index 4f75ca32..a4fa6c60 100644 --- a/src/jit/dis_mipsel.lua +++ b/src/jit/dis_mipsel.lua | |||
@@ -8,13 +8,10 @@ | |||
8 | -- MIPS disassembler module. All the interesting stuff is there. | 8 | -- MIPS disassembler module. All the interesting stuff is there. |
9 | ------------------------------------------------------------------------------ | 9 | ------------------------------------------------------------------------------ |
10 | 10 | ||
11 | local require = require | 11 | local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") |
12 | 12 | return { | |
13 | module(...) | 13 | create = dis_mips.create_el, |
14 | 14 | disass = dis_mips.disass_el, | |
15 | local dis_mips = require(_PACKAGE.."dis_mips") | 15 | regname = dis_mips.regname |
16 | 16 | } | |
17 | create = dis_mips.create_el | ||
18 | disass = dis_mips.disass_el | ||
19 | regname = dis_mips.regname | ||
20 | 17 | ||
diff --git a/src/jit/dis_ppc.lua b/src/jit/dis_ppc.lua index 6d3adfe0..8f65f25a 100644 --- a/src/jit/dis_ppc.lua +++ b/src/jit/dis_ppc.lua | |||
@@ -560,7 +560,7 @@ local function disass_block(ctx, ofs, len) | |||
560 | end | 560 | end |
561 | 561 | ||
562 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). | 562 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). |
563 | local function create_(code, addr, out) | 563 | local function create(code, addr, out) |
564 | local ctx = {} | 564 | local ctx = {} |
565 | ctx.code = code | 565 | ctx.code = code |
566 | ctx.addr = addr or 0 | 566 | ctx.addr = addr or 0 |
@@ -572,20 +572,20 @@ local function create_(code, addr, out) | |||
572 | end | 572 | end |
573 | 573 | ||
574 | -- Simple API: disassemble code (a string) at address and output via out. | 574 | -- Simple API: disassemble code (a string) at address and output via out. |
575 | local function disass_(code, addr, out) | 575 | local function disass(code, addr, out) |
576 | create_(code, addr, out):disass() | 576 | create(code, addr, out):disass() |
577 | end | 577 | end |
578 | 578 | ||
579 | -- Return register name for RID. | 579 | -- Return register name for RID. |
580 | local function regname_(r) | 580 | local function regname(r) |
581 | if r < 32 then return map_gpr[r] end | 581 | if r < 32 then return map_gpr[r] end |
582 | return "f"..(r-32) | 582 | return "f"..(r-32) |
583 | end | 583 | end |
584 | 584 | ||
585 | -- Public module functions. | 585 | -- Public module functions. |
586 | module(...) | 586 | return { |
587 | 587 | create = create, | |
588 | create = create_ | 588 | disass = disass, |
589 | disass = disass_ | 589 | regname = regname |
590 | regname = regname_ | 590 | } |
591 | 591 | ||
diff --git a/src/jit/dis_x64.lua b/src/jit/dis_x64.lua index 5a11c2cc..d076c6ae 100644 --- a/src/jit/dis_x64.lua +++ b/src/jit/dis_x64.lua | |||
@@ -8,13 +8,10 @@ | |||
8 | -- x86/x64 disassembler module. All the interesting stuff is there. | 8 | -- x86/x64 disassembler module. All the interesting stuff is there. |
9 | ------------------------------------------------------------------------------ | 9 | ------------------------------------------------------------------------------ |
10 | 10 | ||
11 | local require = require | 11 | local dis_x86 = require((string.match(..., ".*%.") or "").."dis_x86") |
12 | 12 | return { | |
13 | module(...) | 13 | create = dis_x86.create64, |
14 | 14 | disass = dis_x86.disass64, | |
15 | local dis_x86 = require(_PACKAGE.."dis_x86") | 15 | regname = dis_x86.regname64 |
16 | 16 | } | |
17 | create = dis_x86.create64 | ||
18 | disass = dis_x86.disass64 | ||
19 | regname = dis_x86.regname64 | ||
20 | 17 | ||
diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua index 25f60c77..84492fff 100644 --- a/src/jit/dis_x86.lua +++ b/src/jit/dis_x86.lua | |||
@@ -15,19 +15,20 @@ | |||
15 | -- Intel and AMD manuals. The supported instruction set is quite extensive | 15 | -- Intel and AMD manuals. The supported instruction set is quite extensive |
16 | -- and reflects what a current generation Intel or AMD CPU implements in | 16 | -- and reflects what a current generation Intel or AMD CPU implements in |
17 | -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3, | 17 | -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3, |
18 | -- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM) | 18 | -- SSE4.1, SSE4.2, SSE4a, AVX, AVX2 and even privileged and hypervisor |
19 | -- instructions. | 19 | -- (VMX/SVM) instructions. |
20 | -- | 20 | -- |
21 | -- Notes: | 21 | -- Notes: |
22 | -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported. | 22 | -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported. |
23 | -- * No attempt at optimization has been made -- it's fast enough for my needs. | 23 | -- * No attempt at optimization has been made -- it's fast enough for my needs. |
24 | -- * The public API may change when more architectures are added. | ||
25 | ------------------------------------------------------------------------------ | 24 | ------------------------------------------------------------------------------ |
26 | 25 | ||
27 | local type = type | 26 | local type = type |
28 | local sub, byte, format = string.sub, string.byte, string.format | 27 | local sub, byte, format = string.sub, string.byte, string.format |
29 | local match, gmatch, gsub = string.match, string.gmatch, string.gsub | 28 | local match, gmatch, gsub = string.match, string.gmatch, string.gsub |
30 | local lower, rep = string.lower, string.rep | 29 | local lower, rep = string.lower, string.rep |
30 | local bit = require("bit") | ||
31 | local tohex = bit.tohex | ||
31 | 32 | ||
32 | -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. | 33 | -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. |
33 | local map_opc1_32 = { | 34 | local map_opc1_32 = { |
@@ -76,7 +77,7 @@ local map_opc1_32 = { | |||
76 | "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi", | 77 | "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi", |
77 | "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI", | 78 | "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI", |
78 | --Cx | 79 | --Cx |
79 | "shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi", | 80 | "shift!Bmu","shift!Vmu","retBw","ret","vex*3$lesVrm","vex*2$ldsVrm","movBmi","movVmi", |
80 | "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS", | 81 | "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS", |
81 | --Dx | 82 | --Dx |
82 | "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb", | 83 | "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb", |
@@ -101,7 +102,7 @@ local map_opc1_64 = setmetatable({ | |||
101 | [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb", | 102 | [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb", |
102 | [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb", | 103 | [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb", |
103 | [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb", | 104 | [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb", |
104 | [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false, | 105 | [0x82]=false, [0x9a]=false, [0xc4]="vex*3", [0xc5]="vex*2", [0xce]=false, |
105 | [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false, | 106 | [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false, |
106 | }, { __index = map_opc1_32 }) | 107 | }, { __index = map_opc1_32 }) |
107 | 108 | ||
@@ -112,12 +113,12 @@ local map_opc2 = { | |||
112 | [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret", | 113 | [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret", |
113 | "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu", | 114 | "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu", |
114 | --1x | 115 | --1x |
115 | "movupsXrm|movssXrm|movupdXrm|movsdXrm", | 116 | "movupsXrm|movssXrvm|movupdXrm|movsdXrvm", |
116 | "movupsXmr|movssXmr|movupdXmr|movsdXmr", | 117 | "movupsXmr|movssXmvr|movupdXmr|movsdXmvr", |
117 | "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm", | 118 | "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm", |
118 | "movlpsXmr||movlpdXmr", | 119 | "movlpsXmr||movlpdXmr", |
119 | "unpcklpsXrm||unpcklpdXrm", | 120 | "unpcklpsXrvm||unpcklpdXrvm", |
120 | "unpckhpsXrm||unpckhpdXrm", | 121 | "unpckhpsXrvm||unpckhpdXrvm", |
121 | "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", | 122 | "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", |
122 | "movhpsXmr||movhpdXmr", | 123 | "movhpsXmr||movhpdXmr", |
123 | "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", | 124 | "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", |
@@ -126,7 +127,7 @@ local map_opc2 = { | |||
126 | "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, | 127 | "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, |
127 | "movapsXrm||movapdXrm", | 128 | "movapsXrm||movapdXrm", |
128 | "movapsXmr||movapdXmr", | 129 | "movapsXmr||movapdXmr", |
129 | "cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt", | 130 | "cvtpi2psXrMm|cvtsi2ssXrvVmt|cvtpi2pdXrMm|cvtsi2sdXrvVmt", |
130 | "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr", | 131 | "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr", |
131 | "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm", | 132 | "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm", |
132 | "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm", | 133 | "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm", |
@@ -142,27 +143,27 @@ local map_opc2 = { | |||
142 | "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm", | 143 | "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm", |
143 | --5x | 144 | --5x |
144 | "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm", | 145 | "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm", |
145 | "rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm", | 146 | "rsqrtpsXrm|rsqrtssXrvm","rcppsXrm|rcpssXrvm", |
146 | "andpsXrm||andpdXrm","andnpsXrm||andnpdXrm", | 147 | "andpsXrvm||andpdXrvm","andnpsXrvm||andnpdXrvm", |
147 | "orpsXrm||orpdXrm","xorpsXrm||xorpdXrm", | 148 | "orpsXrvm||orpdXrvm","xorpsXrvm||xorpdXrvm", |
148 | "addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm", | 149 | "addpsXrvm|addssXrvm|addpdXrvm|addsdXrvm","mulpsXrvm|mulssXrvm|mulpdXrvm|mulsdXrvm", |
149 | "cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm", | 150 | "cvtps2pdXrm|cvtss2sdXrvm|cvtpd2psXrm|cvtsd2ssXrvm", |
150 | "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm", | 151 | "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm", |
151 | "subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm", | 152 | "subpsXrvm|subssXrvm|subpdXrvm|subsdXrvm","minpsXrvm|minssXrvm|minpdXrvm|minsdXrvm", |
152 | "divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm", | 153 | "divpsXrvm|divssXrvm|divpdXrvm|divsdXrvm","maxpsXrvm|maxssXrvm|maxpdXrvm|maxsdXrvm", |
153 | --6x | 154 | --6x |
154 | "punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm", | 155 | "punpcklbwPrvm","punpcklwdPrvm","punpckldqPrvm","packsswbPrvm", |
155 | "pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm", | 156 | "pcmpgtbPrvm","pcmpgtwPrvm","pcmpgtdPrvm","packuswbPrvm", |
156 | "punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm", | 157 | "punpckhbwPrvm","punpckhwdPrvm","punpckhdqPrvm","packssdwPrvm", |
157 | "||punpcklqdqXrm","||punpckhqdqXrm", | 158 | "||punpcklqdqXrvm","||punpckhqdqXrvm", |
158 | "movPrVSm","movqMrm|movdquXrm|movdqaXrm", | 159 | "movPrVSm","movqMrm|movdquXrm|movdqaXrm", |
159 | --7x | 160 | --7x |
160 | "pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu", | 161 | "pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pvmu", |
161 | "pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu", | 162 | "pshiftd!Pvmu","pshiftq!Mvmu||pshiftdq!Xvmu", |
162 | "pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|", | 163 | "pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|", |
163 | "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$", | 164 | "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$", |
164 | nil,nil, | 165 | nil,nil, |
165 | "||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm", | 166 | "||haddpdXrvm|haddpsXrvm","||hsubpdXrvm|hsubpsXrvm", |
166 | "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr", | 167 | "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr", |
167 | --8x | 168 | --8x |
168 | "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj", | 169 | "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj", |
@@ -180,27 +181,27 @@ nil,nil, | |||
180 | "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt", | 181 | "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt", |
181 | --Cx | 182 | --Cx |
182 | "xaddBmr","xaddVmr", | 183 | "xaddBmr","xaddVmr", |
183 | "cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|", | 184 | "cmppsXrvmu|cmpssXrvmu|cmppdXrvmu|cmpsdXrvmu","$movntiVmr|", |
184 | "pinsrwPrWmu","pextrwDrPmu", | 185 | "pinsrwPrvWmu","pextrwDrPmu", |
185 | "shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp", | 186 | "shufpsXrvmu||shufpdXrvmu","$cmpxchg!Qmp", |
186 | "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR", | 187 | "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR", |
187 | --Dx | 188 | --Dx |
188 | "||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm", | 189 | "||addsubpdXrvm|addsubpsXrvm","psrlwPrvm","psrldPrvm","psrlqPrvm", |
189 | "paddqPrm","pmullwPrm", | 190 | "paddqPrvm","pmullwPrvm", |
190 | "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm", | 191 | "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm", |
191 | "psubusbPrm","psubuswPrm","pminubPrm","pandPrm", | 192 | "psubusbPrvm","psubuswPrvm","pminubPrvm","pandPrvm", |
192 | "paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm", | 193 | "paddusbPrvm","padduswPrvm","pmaxubPrvm","pandnPrvm", |
193 | --Ex | 194 | --Ex |
194 | "pavgbPrm","psrawPrm","psradPrm","pavgwPrm", | 195 | "pavgbPrvm","psrawPrvm","psradPrvm","pavgwPrvm", |
195 | "pmulhuwPrm","pmulhwPrm", | 196 | "pmulhuwPrvm","pmulhwPrvm", |
196 | "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr", | 197 | "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr", |
197 | "psubsbPrm","psubswPrm","pminswPrm","porPrm", | 198 | "psubsbPrvm","psubswPrvm","pminswPrvm","porPrvm", |
198 | "paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm", | 199 | "paddsbPrvm","paddswPrvm","pmaxswPrvm","pxorPrvm", |
199 | --Fx | 200 | --Fx |
200 | "|||lddquXrm","psllwPrm","pslldPrm","psllqPrm", | 201 | "|||lddquXrm","psllwPrvm","pslldPrvm","psllqPrvm", |
201 | "pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$", | 202 | "pmuludqPrvm","pmaddwdPrvm","psadbwPrvm","maskmovqMrm||maskmovdquXrm$", |
202 | "psubbPrm","psubwPrm","psubdPrm","psubqPrm", | 203 | "psubbPrvm","psubwPrvm","psubdPrvm","psubqPrvm", |
203 | "paddbPrm","paddwPrm","padddPrm","ud", | 204 | "paddbPrvm","paddwPrvm","padddPrvm","ud", |
204 | } | 205 | } |
205 | assert(map_opc2[255] == "ud") | 206 | assert(map_opc2[255] == "ud") |
206 | 207 | ||
@@ -208,49 +209,91 @@ assert(map_opc2[255] == "ud") | |||
208 | local map_opc3 = { | 209 | local map_opc3 = { |
209 | ["38"] = { -- [66] 0f 38 xx | 210 | ["38"] = { -- [66] 0f 38 xx |
210 | --0x | 211 | --0x |
211 | [0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm", | 212 | [0]="pshufbPrvm","phaddwPrvm","phadddPrvm","phaddswPrvm", |
212 | "pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm", | 213 | "pmaddubswPrvm","phsubwPrvm","phsubdPrvm","phsubswPrvm", |
213 | "psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm", | 214 | "psignbPrvm","psignwPrvm","psigndPrvm","pmulhrswPrvm", |
214 | nil,nil,nil,nil, | 215 | "||permilpsXrvm","||permilpdXrvm",nil,nil, |
215 | --1x | 216 | --1x |
216 | "||pblendvbXrma",nil,nil,nil, | 217 | "||pblendvbXrma",nil,nil,nil, |
217 | "||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm", | 218 | "||blendvpsXrma","||blendvpdXrma","||permpsXrvm","||ptestXrm", |
218 | nil,nil,nil,nil, | 219 | "||broadcastssXrm","||broadcastsdXrm","||broadcastf128XrlXm",nil, |
219 | "pabsbPrm","pabswPrm","pabsdPrm",nil, | 220 | "pabsbPrm","pabswPrm","pabsdPrm",nil, |
220 | --2x | 221 | --2x |
221 | "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm", | 222 | "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm", |
222 | "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil, | 223 | "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil, |
223 | "||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm", | 224 | "||pmuldqXrvm","||pcmpeqqXrvm","||$movntdqaXrm","||packusdwXrvm", |
224 | nil,nil,nil,nil, | 225 | "||maskmovpsXrvm","||maskmovpdXrvm","||maskmovpsXmvr","||maskmovpdXmvr", |
225 | --3x | 226 | --3x |
226 | "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm", | 227 | "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm", |
227 | "||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm", | 228 | "||pmovzxwqXrm","||pmovzxdqXrm","||permdXrvm","||pcmpgtqXrvm", |
228 | "||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm", | 229 | "||pminsbXrvm","||pminsdXrvm","||pminuwXrvm","||pminudXrvm", |
229 | "||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm", | 230 | "||pmaxsbXrvm","||pmaxsdXrvm","||pmaxuwXrvm","||pmaxudXrvm", |
230 | --4x | 231 | --4x |
231 | "||pmulddXrm","||phminposuwXrm", | 232 | "||pmulddXrvm","||phminposuwXrm",nil,nil, |
233 | nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm", | ||
234 | --5x | ||
235 | [0x58] = "||pbroadcastdXrlXm",[0x59] = "||pbroadcastqXrlXm", | ||
236 | [0x5a] = "||broadcasti128XrlXm", | ||
237 | --7x | ||
238 | [0x78] = "||pbroadcastbXrlXm",[0x79] = "||pbroadcastwXrlXm", | ||
239 | --8x | ||
240 | [0x8c] = "||pmaskmovXrvVSm", | ||
241 | [0x8e] = "||pmaskmovVSmXvr", | ||
242 | --9x | ||
243 | [0x96] = "||fmaddsub132pHXrvm",[0x97] = "||fmsubadd132pHXrvm", | ||
244 | [0x98] = "||fmadd132pHXrvm",[0x99] = "||fmadd132sHXrvm", | ||
245 | [0x9a] = "||fmsub132pHXrvm",[0x9b] = "||fmsub132sHXrvm", | ||
246 | [0x9c] = "||fnmadd132pHXrvm",[0x9d] = "||fnmadd132sHXrvm", | ||
247 | [0x9e] = "||fnmsub132pHXrvm",[0x9f] = "||fnmsub132sHXrvm", | ||
248 | --Ax | ||
249 | [0xa6] = "||fmaddsub213pHXrvm",[0xa7] = "||fmsubadd213pHXrvm", | ||
250 | [0xa8] = "||fmadd213pHXrvm",[0xa9] = "||fmadd213sHXrvm", | ||
251 | [0xaa] = "||fmsub213pHXrvm",[0xab] = "||fmsub213sHXrvm", | ||
252 | [0xac] = "||fnmadd213pHXrvm",[0xad] = "||fnmadd213sHXrvm", | ||
253 | [0xae] = "||fnmsub213pHXrvm",[0xaf] = "||fnmsub213sHXrvm", | ||
254 | --Bx | ||
255 | [0xb6] = "||fmaddsub231pHXrvm",[0xb7] = "||fmsubadd231pHXrvm", | ||
256 | [0xb8] = "||fmadd231pHXrvm",[0xb9] = "||fmadd231sHXrvm", | ||
257 | [0xba] = "||fmsub231pHXrvm",[0xbb] = "||fmsub231sHXrvm", | ||
258 | [0xbc] = "||fnmadd231pHXrvm",[0xbd] = "||fnmadd231sHXrvm", | ||
259 | [0xbe] = "||fnmsub231pHXrvm",[0xbf] = "||fnmsub231sHXrvm", | ||
260 | --Dx | ||
261 | [0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm", | ||
262 | [0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm", | ||
232 | --Fx | 263 | --Fx |
233 | [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", | 264 | [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", |
265 | [0xf7] = "| sarxVrmv| shlxVrmv| shrxVrmv", | ||
234 | }, | 266 | }, |
235 | 267 | ||
236 | ["3a"] = { -- [66] 0f 3a xx | 268 | ["3a"] = { -- [66] 0f 3a xx |
237 | --0x | 269 | --0x |
238 | [0x00]=nil,nil,nil,nil,nil,nil,nil,nil, | 270 | [0x00]="||permqXrmu","||permpdXrmu","||pblenddXrvmu",nil, |
239 | "||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu", | 271 | "||permilpsXrmu","||permilpdXrmu","||perm2f128Xrvmu",nil, |
240 | "||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu", | 272 | "||roundpsXrmu","||roundpdXrmu","||roundssXrvmu","||roundsdXrvmu", |
273 | "||blendpsXrvmu","||blendpdXrvmu","||pblendwXrvmu","palignrPrvmu", | ||
241 | --1x | 274 | --1x |
242 | nil,nil,nil,nil, | 275 | nil,nil,nil,nil, |
243 | "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru", | 276 | "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru", |
244 | nil,nil,nil,nil,nil,nil,nil,nil, | 277 | "||insertf128XrvlXmu","||extractf128XlXmYru",nil,nil, |
278 | nil,nil,nil,nil, | ||
245 | --2x | 279 | --2x |
246 | "||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil, | 280 | "||pinsrbXrvVmu","||insertpsXrvmu","||pinsrXrvVmuS",nil, |
281 | --3x | ||
282 | [0x38] = "||inserti128Xrvmu",[0x39] = "||extracti128XlXmYru", | ||
247 | --4x | 283 | --4x |
248 | [0x40] = "||dppsXrmu", | 284 | [0x40] = "||dppsXrvmu", |
249 | [0x41] = "||dppdXrmu", | 285 | [0x41] = "||dppdXrvmu", |
250 | [0x42] = "||mpsadbwXrmu", | 286 | [0x42] = "||mpsadbwXrvmu", |
287 | [0x44] = "||pclmulqdqXrvmu", | ||
288 | [0x46] = "||perm2i128Xrvmu", | ||
289 | [0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb", | ||
290 | [0x4c] = "||pblendvbXrvmb", | ||
251 | --6x | 291 | --6x |
252 | [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", | 292 | [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", |
253 | [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", | 293 | [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", |
294 | [0xdf] = "||aeskeygenassistXrmu", | ||
295 | --Fx | ||
296 | [0xf0] = "||| rorxVrmu", | ||
254 | }, | 297 | }, |
255 | } | 298 | } |
256 | 299 | ||
@@ -354,17 +397,19 @@ local map_regs = { | |||
354 | "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext! | 397 | "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext! |
355 | X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", | 398 | X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", |
356 | "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" }, | 399 | "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" }, |
400 | Y = { "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", | ||
401 | "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" }, | ||
357 | } | 402 | } |
358 | local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" } | 403 | local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" } |
359 | 404 | ||
360 | -- Maps for size names. | 405 | -- Maps for size names. |
361 | local map_sz2n = { | 406 | local map_sz2n = { |
362 | B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, | 407 | B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, Y = 32, |
363 | } | 408 | } |
364 | local map_sz2prefix = { | 409 | local map_sz2prefix = { |
365 | B = "byte", W = "word", D = "dword", | 410 | B = "byte", W = "word", D = "dword", |
366 | Q = "qword", | 411 | Q = "qword", |
367 | M = "qword", X = "xword", | 412 | M = "qword", X = "xword", Y = "yword", |
368 | F = "dword", G = "qword", -- No need for sizes/register names for these two. | 413 | F = "dword", G = "qword", -- No need for sizes/register names for these two. |
369 | } | 414 | } |
370 | 415 | ||
@@ -387,10 +432,13 @@ local function putop(ctx, text, operands) | |||
387 | if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end | 432 | if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end |
388 | if ctx.rex then | 433 | if ctx.rex then |
389 | local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "").. | 434 | local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "").. |
390 | (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "") | 435 | (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "").. |
391 | if t ~= "" then text = "rex."..t.." "..text end | 436 | (ctx.vexl and "l" or "") |
437 | if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end | ||
438 | if t ~= "" then text = ctx.rex.."."..t.." "..gsub(text, "^ ", "") | ||
439 | elseif ctx.rex == "vex" then text = gsub("v"..text, "^v ", "") end | ||
392 | ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false | 440 | ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false |
393 | ctx.rex = false | 441 | ctx.rex = false; ctx.vexl = false; ctx.vexv = false |
394 | end | 442 | end |
395 | if ctx.seg then | 443 | if ctx.seg then |
396 | local text2, n = gsub(text, "%[", "["..ctx.seg..":") | 444 | local text2, n = gsub(text, "%[", "["..ctx.seg..":") |
@@ -405,6 +453,7 @@ local function putop(ctx, text, operands) | |||
405 | end | 453 | end |
406 | ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text)) | 454 | ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text)) |
407 | ctx.mrm = false | 455 | ctx.mrm = false |
456 | ctx.vexv = false | ||
408 | ctx.start = pos | 457 | ctx.start = pos |
409 | ctx.imm = nil | 458 | ctx.imm = nil |
410 | end | 459 | end |
@@ -413,7 +462,7 @@ end | |||
413 | local function clearprefixes(ctx) | 462 | local function clearprefixes(ctx) |
414 | ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false | 463 | ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false |
415 | ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false | 464 | ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false |
416 | ctx.rex = false; ctx.a32 = false | 465 | ctx.rex = false; ctx.a32 = false; ctx.vexl = false |
417 | end | 466 | end |
418 | 467 | ||
419 | -- Fallback for incomplete opcodes at the end. | 468 | -- Fallback for incomplete opcodes at the end. |
@@ -450,9 +499,9 @@ end | |||
450 | -- Process pattern string and generate the operands. | 499 | -- Process pattern string and generate the operands. |
451 | local function putpat(ctx, name, pat) | 500 | local function putpat(ctx, name, pat) |
452 | local operands, regs, sz, mode, sp, rm, sc, rx, sdisp | 501 | local operands, regs, sz, mode, sp, rm, sc, rx, sdisp |
453 | local code, pos, stop = ctx.code, ctx.pos, ctx.stop | 502 | local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl |
454 | 503 | ||
455 | -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz | 504 | -- Chars used: 1DFGHIMPQRSTUVWXYabcdfgijlmoprstuvwxyz |
456 | for p in gmatch(pat, ".") do | 505 | for p in gmatch(pat, ".") do |
457 | local x = nil | 506 | local x = nil |
458 | if p == "V" or p == "U" then | 507 | if p == "V" or p == "U" then |
@@ -467,12 +516,17 @@ local function putpat(ctx, name, pat) | |||
467 | elseif p == "B" then | 516 | elseif p == "B" then |
468 | sz = "B" | 517 | sz = "B" |
469 | regs = ctx.rex and map_regs.B64 or map_regs.B | 518 | regs = ctx.rex and map_regs.B64 or map_regs.B |
470 | elseif match(p, "[WDQMXFG]") then | 519 | elseif match(p, "[WDQMXYFG]") then |
471 | sz = p | 520 | sz = p |
521 | if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end | ||
472 | regs = map_regs[sz] | 522 | regs = map_regs[sz] |
473 | elseif p == "P" then | 523 | elseif p == "P" then |
474 | sz = ctx.o16 and "X" or "M"; ctx.o16 = false | 524 | sz = ctx.o16 and "X" or "M"; ctx.o16 = false |
525 | if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end | ||
475 | regs = map_regs[sz] | 526 | regs = map_regs[sz] |
527 | elseif p == "H" then | ||
528 | name = name..(ctx.rexw and "d" or "s") | ||
529 | ctx.rexw = false | ||
476 | elseif p == "S" then | 530 | elseif p == "S" then |
477 | name = name..lower(sz) | 531 | name = name..lower(sz) |
478 | elseif p == "s" then | 532 | elseif p == "s" then |
@@ -484,6 +538,10 @@ local function putpat(ctx, name, pat) | |||
484 | local imm = getimm(ctx, pos, 1); if not imm then return end | 538 | local imm = getimm(ctx, pos, 1); if not imm then return end |
485 | x = format("0x%02x", imm) | 539 | x = format("0x%02x", imm) |
486 | pos = pos+1 | 540 | pos = pos+1 |
541 | elseif p == "b" then | ||
542 | local imm = getimm(ctx, pos, 1); if not imm then return end | ||
543 | x = regs[imm/16+1] | ||
544 | pos = pos+1 | ||
487 | elseif p == "w" then | 545 | elseif p == "w" then |
488 | local imm = getimm(ctx, pos, 2); if not imm then return end | 546 | local imm = getimm(ctx, pos, 2); if not imm then return end |
489 | x = format("0x%x", imm) | 547 | x = format("0x%x", imm) |
@@ -532,7 +590,7 @@ local function putpat(ctx, name, pat) | |||
532 | local lo = imm % 0x1000000 | 590 | local lo = imm % 0x1000000 |
533 | x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) | 591 | x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) |
534 | else | 592 | else |
535 | x = format("0x%08x", imm) | 593 | x = "0x"..tohex(imm) |
536 | end | 594 | end |
537 | elseif p == "R" then | 595 | elseif p == "R" then |
538 | local r = byte(code, pos-1, pos-1)%8 | 596 | local r = byte(code, pos-1, pos-1)%8 |
@@ -616,8 +674,13 @@ local function putpat(ctx, name, pat) | |||
616 | else | 674 | else |
617 | x = "CR"..sp | 675 | x = "CR"..sp |
618 | end | 676 | end |
677 | elseif p == "v" then | ||
678 | if ctx.vexv then | ||
679 | x = regs[ctx.vexv+1]; ctx.vexv = false | ||
680 | end | ||
619 | elseif p == "y" then x = "DR"..sp | 681 | elseif p == "y" then x = "DR"..sp |
620 | elseif p == "z" then x = "TR"..sp | 682 | elseif p == "z" then x = "TR"..sp |
683 | elseif p == "l" then vexl = false | ||
621 | elseif p == "t" then | 684 | elseif p == "t" then |
622 | else | 685 | else |
623 | error("bad pattern `"..pat.."'") | 686 | error("bad pattern `"..pat.."'") |
@@ -692,7 +755,8 @@ map_act = { | |||
692 | B = putpat, W = putpat, D = putpat, Q = putpat, | 755 | B = putpat, W = putpat, D = putpat, Q = putpat, |
693 | V = putpat, U = putpat, T = putpat, | 756 | V = putpat, U = putpat, T = putpat, |
694 | M = putpat, X = putpat, P = putpat, | 757 | M = putpat, X = putpat, P = putpat, |
695 | F = putpat, G = putpat, | 758 | F = putpat, G = putpat, Y = putpat, |
759 | H = putpat, | ||
696 | 760 | ||
697 | -- Collect prefixes. | 761 | -- Collect prefixes. |
698 | [":"] = function(ctx, name, pat) | 762 | [":"] = function(ctx, name, pat) |
@@ -753,15 +817,68 @@ map_act = { | |||
753 | 817 | ||
754 | -- REX prefix. | 818 | -- REX prefix. |
755 | rex = function(ctx, name, pat) | 819 | rex = function(ctx, name, pat) |
756 | if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed. | 820 | if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed. |
757 | for p in gmatch(pat, ".") do ctx["rex"..p] = true end | 821 | for p in gmatch(pat, ".") do ctx["rex"..p] = true end |
758 | ctx.rex = true | 822 | ctx.rex = "rex" |
823 | end, | ||
824 | |||
825 | -- VEX prefix. | ||
826 | vex = function(ctx, name, pat) | ||
827 | if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed. | ||
828 | ctx.rex = "vex" | ||
829 | local pos = ctx.pos | ||
830 | if ctx.mrm then | ||
831 | ctx.mrm = nil | ||
832 | pos = pos-1 | ||
833 | end | ||
834 | local b = byte(ctx.code, pos, pos) | ||
835 | if not b then return incomplete(ctx) end | ||
836 | pos = pos+1 | ||
837 | if b < 128 then ctx.rexr = true end | ||
838 | local m = 1 | ||
839 | if pat == "3" then | ||
840 | m = b%32; b = (b-m)/32 | ||
841 | local nb = b%2; b = (b-nb)/2 | ||
842 | if nb == 0 then ctx.rexb = true end | ||
843 | local nx = b%2 | ||
844 | if nx == 0 then ctx.rexx = true end | ||
845 | b = byte(ctx.code, pos, pos) | ||
846 | if not b then return incomplete(ctx) end | ||
847 | pos = pos+1 | ||
848 | if b >= 128 then ctx.rexw = true end | ||
849 | end | ||
850 | ctx.pos = pos | ||
851 | local map | ||
852 | if m == 1 then map = map_opc2 | ||
853 | elseif m == 2 then map = map_opc3["38"] | ||
854 | elseif m == 3 then map = map_opc3["3a"] | ||
855 | else return unknown(ctx) end | ||
856 | local p = b%4; b = (b-p)/4 | ||
857 | if p == 1 then ctx.o16 = "o16" | ||
858 | elseif p == 2 then ctx.rep = "rep" | ||
859 | elseif p == 3 then ctx.rep = "repne" end | ||
860 | local l = b%2; b = (b-l)/2 | ||
861 | if l ~= 0 then ctx.vexl = true end | ||
862 | ctx.vexv = (-1-b)%16 | ||
863 | return dispatchmap(ctx, map) | ||
759 | end, | 864 | end, |
760 | 865 | ||
761 | -- Special case for nop with REX prefix. | 866 | -- Special case for nop with REX prefix. |
762 | nop = function(ctx, name, pat) | 867 | nop = function(ctx, name, pat) |
763 | return dispatch(ctx, ctx.rex and pat or "nop") | 868 | return dispatch(ctx, ctx.rex and pat or "nop") |
764 | end, | 869 | end, |
870 | |||
871 | -- Special case for 0F 77. | ||
872 | emms = function(ctx, name, pat) | ||
873 | if ctx.rex ~= "vex" then | ||
874 | return putop(ctx, "emms") | ||
875 | elseif ctx.vexl then | ||
876 | ctx.vexl = false | ||
877 | return putop(ctx, "zeroall") | ||
878 | else | ||
879 | return putop(ctx, "zeroupper") | ||
880 | end | ||
881 | end, | ||
765 | } | 882 | } |
766 | 883 | ||
767 | ------------------------------------------------------------------------------ | 884 | ------------------------------------------------------------------------------ |
@@ -782,7 +899,7 @@ local function disass_block(ctx, ofs, len) | |||
782 | end | 899 | end |
783 | 900 | ||
784 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). | 901 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). |
785 | local function create_(code, addr, out) | 902 | local function create(code, addr, out) |
786 | local ctx = {} | 903 | local ctx = {} |
787 | ctx.code = code | 904 | ctx.code = code |
788 | ctx.addr = (addr or 0) - 1 | 905 | ctx.addr = (addr or 0) - 1 |
@@ -796,8 +913,8 @@ local function create_(code, addr, out) | |||
796 | return ctx | 913 | return ctx |
797 | end | 914 | end |
798 | 915 | ||
799 | local function create64_(code, addr, out) | 916 | local function create64(code, addr, out) |
800 | local ctx = create_(code, addr, out) | 917 | local ctx = create(code, addr, out) |
801 | ctx.x64 = true | 918 | ctx.x64 = true |
802 | ctx.map1 = map_opc1_64 | 919 | ctx.map1 = map_opc1_64 |
803 | ctx.aregs = map_regs.Q | 920 | ctx.aregs = map_regs.Q |
@@ -805,32 +922,32 @@ local function create64_(code, addr, out) | |||
805 | end | 922 | end |
806 | 923 | ||
807 | -- Simple API: disassemble code (a string) at address and output via out. | 924 | -- Simple API: disassemble code (a string) at address and output via out. |
808 | local function disass_(code, addr, out) | 925 | local function disass(code, addr, out) |
809 | create_(code, addr, out):disass() | 926 | create(code, addr, out):disass() |
810 | end | 927 | end |
811 | 928 | ||
812 | local function disass64_(code, addr, out) | 929 | local function disass64(code, addr, out) |
813 | create64_(code, addr, out):disass() | 930 | create64(code, addr, out):disass() |
814 | end | 931 | end |
815 | 932 | ||
816 | -- Return register name for RID. | 933 | -- Return register name for RID. |
817 | local function regname_(r) | 934 | local function regname(r) |
818 | if r < 8 then return map_regs.D[r+1] end | 935 | if r < 8 then return map_regs.D[r+1] end |
819 | return map_regs.X[r-7] | 936 | return map_regs.X[r-7] |
820 | end | 937 | end |
821 | 938 | ||
822 | local function regname64_(r) | 939 | local function regname64(r) |
823 | if r < 16 then return map_regs.Q[r+1] end | 940 | if r < 16 then return map_regs.Q[r+1] end |
824 | return map_regs.X[r-15] | 941 | return map_regs.X[r-15] |
825 | end | 942 | end |
826 | 943 | ||
827 | -- Public module functions. | 944 | -- Public module functions. |
828 | module(...) | 945 | return { |
829 | 946 | create = create, | |
830 | create = create_ | 947 | create64 = create64, |
831 | create64 = create64_ | 948 | disass = disass, |
832 | disass = disass_ | 949 | disass64 = disass64, |
833 | disass64 = disass64_ | 950 | regname = regname, |
834 | regname = regname_ | 951 | regname64 = regname64 |
835 | regname64 = regname64_ | 952 | } |
836 | 953 | ||
diff --git a/src/jit/dump.lua b/src/jit/dump.lua index 344fa926..18e7a4b7 100644 --- a/src/jit/dump.lua +++ b/src/jit/dump.lua | |||
@@ -55,7 +55,7 @@ | |||
55 | 55 | ||
56 | -- Cache some library functions and objects. | 56 | -- Cache some library functions and objects. |
57 | local jit = require("jit") | 57 | local jit = require("jit") |
58 | assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") | 58 | assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") |
59 | local jutil = require("jit.util") | 59 | local jutil = require("jit.util") |
60 | local vmdef = require("jit.vmdef") | 60 | local vmdef = require("jit.vmdef") |
61 | local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc | 61 | local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc |
@@ -63,7 +63,7 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek | |||
63 | local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap | 63 | local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap |
64 | local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr | 64 | local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr |
65 | local bit = require("bit") | 65 | local bit = require("bit") |
66 | local band, shr = bit.band, bit.rshift | 66 | local band, shr, tohex = bit.band, bit.rshift, bit.tohex |
67 | local sub, gsub, format = string.sub, string.gsub, string.format | 67 | local sub, gsub, format = string.sub, string.gsub, string.format |
68 | local byte, rep = string.byte, string.rep | 68 | local byte, rep = string.byte, string.rep |
69 | local type, tostring = type, tostring | 69 | local type, tostring = type, tostring |
@@ -85,12 +85,13 @@ local nexitsym = 0 | |||
85 | local function fillsymtab_tr(tr, nexit) | 85 | local function fillsymtab_tr(tr, nexit) |
86 | local t = {} | 86 | local t = {} |
87 | symtabmt.__index = t | 87 | symtabmt.__index = t |
88 | if jit.arch == "mips" or jit.arch == "mipsel" then | 88 | if jit.arch:sub(1, 4) == "mips" then |
89 | t[traceexitstub(tr, 0)] = "exit" | 89 | t[traceexitstub(tr, 0)] = "exit" |
90 | return | 90 | return |
91 | end | 91 | end |
92 | for i=0,nexit-1 do | 92 | for i=0,nexit-1 do |
93 | local addr = traceexitstub(tr, i) | 93 | local addr = traceexitstub(tr, i) |
94 | if addr < 0 then addr = addr + 2^32 end | ||
94 | t[addr] = tostring(i) | 95 | t[addr] = tostring(i) |
95 | end | 96 | end |
96 | local addr = traceexitstub(tr, nexit) | 97 | local addr = traceexitstub(tr, nexit) |
@@ -101,10 +102,15 @@ end | |||
101 | local function fillsymtab(tr, nexit) | 102 | local function fillsymtab(tr, nexit) |
102 | local t = symtab | 103 | local t = symtab |
103 | if nexitsym == 0 then | 104 | if nexitsym == 0 then |
105 | local maskaddr = jit.arch == "arm" and -2 | ||
104 | local ircall = vmdef.ircall | 106 | local ircall = vmdef.ircall |
105 | for i=0,#ircall do | 107 | for i=0,#ircall do |
106 | local addr = ircalladdr(i) | 108 | local addr = ircalladdr(i) |
107 | if addr ~= 0 then t[addr] = ircall[i] end | 109 | if addr ~= 0 then |
110 | if maskaddr then addr = band(addr, maskaddr) end | ||
111 | if addr < 0 then addr = addr + 2^32 end | ||
112 | t[addr] = ircall[i] | ||
113 | end | ||
108 | end | 114 | end |
109 | end | 115 | end |
110 | if nexitsym == 1000000 then -- Per-trace exit stubs. | 116 | if nexitsym == 1000000 then -- Per-trace exit stubs. |
@@ -118,6 +124,7 @@ local function fillsymtab(tr, nexit) | |||
118 | nexit = 1000000 | 124 | nexit = 1000000 |
119 | break | 125 | break |
120 | end | 126 | end |
127 | if addr < 0 then addr = addr + 2^32 end | ||
121 | t[addr] = tostring(i) | 128 | t[addr] = tostring(i) |
122 | end | 129 | end |
123 | nexitsym = nexit | 130 | nexitsym = nexit |
@@ -136,6 +143,7 @@ local function dump_mcode(tr) | |||
136 | local mcode, addr, loop = tracemc(tr) | 143 | local mcode, addr, loop = tracemc(tr) |
137 | if not mcode then return end | 144 | if not mcode then return end |
138 | if not disass then disass = require("jit.dis_"..jit.arch) end | 145 | if not disass then disass = require("jit.dis_"..jit.arch) end |
146 | if addr < 0 then addr = addr + 2^32 end | ||
139 | out:write("---- TRACE ", tr, " mcode ", #mcode, "\n") | 147 | out:write("---- TRACE ", tr, " mcode ", #mcode, "\n") |
140 | local ctx = disass.create(mcode, addr, dumpwrite) | 148 | local ctx = disass.create(mcode, addr, dumpwrite) |
141 | ctx.hexdump = 0 | 149 | ctx.hexdump = 0 |
@@ -211,8 +219,10 @@ local function colorize_text(s) | |||
211 | return s | 219 | return s |
212 | end | 220 | end |
213 | 221 | ||
214 | local function colorize_ansi(s, t) | 222 | local function colorize_ansi(s, t, extra) |
215 | return format(colortype_ansi[t], s) | 223 | local out = format(colortype_ansi[t], s) |
224 | if extra then out = "\027[3m"..out end | ||
225 | return out | ||
216 | end | 226 | end |
217 | 227 | ||
218 | local irtype_ansi = setmetatable({}, | 228 | local irtype_ansi = setmetatable({}, |
@@ -221,9 +231,10 @@ local irtype_ansi = setmetatable({}, | |||
221 | 231 | ||
222 | local html_escape = { ["<"] = "<", [">"] = ">", ["&"] = "&", } | 232 | local html_escape = { ["<"] = "<", [">"] = ">", ["&"] = "&", } |
223 | 233 | ||
224 | local function colorize_html(s, t) | 234 | local function colorize_html(s, t, extra) |
225 | s = gsub(s, "[<>&]", html_escape) | 235 | s = gsub(s, "[<>&]", html_escape) |
226 | return format('<span class="irt_%s">%s</span>', irtype_text[t], s) | 236 | return format('<span class="irt_%s%s">%s</span>', |
237 | irtype_text[t], extra and " irt_extra" or "", s) | ||
227 | end | 238 | end |
228 | 239 | ||
229 | local irtype_html = setmetatable({}, | 240 | local irtype_html = setmetatable({}, |
@@ -248,6 +259,7 @@ span.irt_tab { color: #c00000; } | |||
248 | span.irt_udt, span.irt_lud { color: #00c0c0; } | 259 | span.irt_udt, span.irt_lud { color: #00c0c0; } |
249 | span.irt_num { color: #4040c0; } | 260 | span.irt_num { color: #4040c0; } |
250 | span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; } | 261 | span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; } |
262 | span.irt_extra { font-style: italic; } | ||
251 | </style> | 263 | </style> |
252 | ]] | 264 | ]] |
253 | 265 | ||
@@ -263,6 +275,7 @@ local litname = { | |||
263 | if band(mode, 8) ~= 0 then s = s.."C" end | 275 | if band(mode, 8) ~= 0 then s = s.."C" end |
264 | if band(mode, 16) ~= 0 then s = s.."R" end | 276 | if band(mode, 16) ~= 0 then s = s.."R" end |
265 | if band(mode, 32) ~= 0 then s = s.."I" end | 277 | if band(mode, 32) ~= 0 then s = s.."I" end |
278 | if band(mode, 64) ~= 0 then s = s.."K" end | ||
266 | t[mode] = s | 279 | t[mode] = s |
267 | return s | 280 | return s |
268 | end}), | 281 | end}), |
@@ -270,16 +283,20 @@ local litname = { | |||
270 | ["CONV "] = setmetatable({}, { __index = function(t, mode) | 283 | ["CONV "] = setmetatable({}, { __index = function(t, mode) |
271 | local s = irtype[band(mode, 31)] | 284 | local s = irtype[band(mode, 31)] |
272 | s = irtype[band(shr(mode, 5), 31)].."."..s | 285 | s = irtype[band(shr(mode, 5), 31)].."."..s |
273 | if band(mode, 0x400) ~= 0 then s = s.." trunc" | 286 | if band(mode, 0x800) ~= 0 then s = s.." sext" end |
274 | elseif band(mode, 0x800) ~= 0 then s = s.." sext" end | ||
275 | local c = shr(mode, 12) | 287 | local c = shr(mode, 12) |
276 | if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end | 288 | if c == 1 then s = s.." none" |
289 | elseif c == 2 then s = s.." index" | ||
290 | elseif c == 3 then s = s.." check" end | ||
277 | t[mode] = s | 291 | t[mode] = s |
278 | return s | 292 | return s |
279 | end}), | 293 | end}), |
280 | ["FLOAD "] = vmdef.irfield, | 294 | ["FLOAD "] = vmdef.irfield, |
281 | ["FREF "] = vmdef.irfield, | 295 | ["FREF "] = vmdef.irfield, |
282 | ["FPMATH"] = vmdef.irfpm, | 296 | ["FPMATH"] = vmdef.irfpm, |
297 | ["TMPREF"] = { [0] = "", "IN", "OUT", "INOUT", "", "", "OUT2", "INOUT2" }, | ||
298 | ["BUFHDR"] = { [0] = "RESET", "APPEND", "WRITE" }, | ||
299 | ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" }, | ||
283 | } | 300 | } |
284 | 301 | ||
285 | local function ctlsub(c) | 302 | local function ctlsub(c) |
@@ -303,15 +320,19 @@ local function fmtfunc(func, pc) | |||
303 | end | 320 | end |
304 | end | 321 | end |
305 | 322 | ||
306 | local function formatk(tr, idx) | 323 | local function formatk(tr, idx, sn) |
307 | local k, t, slot = tracek(tr, idx) | 324 | local k, t, slot = tracek(tr, idx) |
308 | local tn = type(k) | 325 | local tn = type(k) |
309 | local s | 326 | local s |
310 | if tn == "number" then | 327 | if tn == "number" then |
311 | if k == 2^52+2^51 then | 328 | if t < 12 then |
329 | s = k == 0 and "NULL" or format("[0x%08x]", k) | ||
330 | elseif band(sn or 0, 0x30000) ~= 0 then | ||
331 | s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz" | ||
332 | elseif k == 2^52+2^51 then | ||
312 | s = "bias" | 333 | s = "bias" |
313 | else | 334 | else |
314 | s = format("%+.14g", k) | 335 | s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k) |
315 | end | 336 | end |
316 | elseif tn == "string" then | 337 | elseif tn == "string" then |
317 | s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) | 338 | s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) |
@@ -329,10 +350,12 @@ local function formatk(tr, idx) | |||
329 | elseif t == 21 then -- int64_t | 350 | elseif t == 21 then -- int64_t |
330 | s = sub(tostring(k), 1, -3) | 351 | s = sub(tostring(k), 1, -3) |
331 | if sub(s, 1, 1) ~= "-" then s = "+"..s end | 352 | if sub(s, 1, 1) ~= "-" then s = "+"..s end |
353 | elseif sn == 0x1057fff then -- SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL) | ||
354 | return "----" -- Special case for LJ_FR2 slot 1. | ||
332 | else | 355 | else |
333 | s = tostring(k) -- For primitives. | 356 | s = tostring(k) -- For primitives. |
334 | end | 357 | end |
335 | s = colorize(format("%-4s", s), t) | 358 | s = colorize(format("%-4s", s), t, band(sn or 0, 0x100000) ~= 0) |
336 | if slot then | 359 | if slot then |
337 | s = format("%s @%d", s, slot) | 360 | s = format("%s @%d", s, slot) |
338 | end | 361 | end |
@@ -347,12 +370,12 @@ local function printsnap(tr, snap) | |||
347 | n = n + 1 | 370 | n = n + 1 |
348 | local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS | 371 | local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS |
349 | if ref < 0 then | 372 | if ref < 0 then |
350 | out:write(formatk(tr, ref)) | 373 | out:write(formatk(tr, ref, sn)) |
351 | elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM | 374 | elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM |
352 | out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) | 375 | out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) |
353 | else | 376 | else |
354 | local m, ot, op1, op2 = traceir(tr, ref) | 377 | local m, ot, op1, op2 = traceir(tr, ref) |
355 | out:write(colorize(format("%04d", ref), band(ot, 31))) | 378 | out:write(colorize(format("%04d", ref), band(ot, 31), band(sn, 0x100000) ~= 0)) |
356 | end | 379 | end |
357 | out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME | 380 | out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME |
358 | else | 381 | else |
@@ -545,7 +568,7 @@ local function dump_trace(what, tr, func, pc, otr, oex) | |||
545 | if what == "start" then | 568 | if what == "start" then |
546 | if dumpmode.H then out:write('<pre class="ljdump">\n') end | 569 | if dumpmode.H then out:write('<pre class="ljdump">\n') end |
547 | out:write("---- TRACE ", tr, " ", what) | 570 | out:write("---- TRACE ", tr, " ", what) |
548 | if otr then out:write(" ", otr, "/", oex) end | 571 | if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end |
549 | out:write(" ", fmtfunc(func, pc), "\n") | 572 | out:write(" ", fmtfunc(func, pc), "\n") |
550 | elseif what == "stop" or what == "abort" then | 573 | elseif what == "stop" or what == "abort" then |
551 | out:write("---- TRACE ", tr, " ", what) | 574 | out:write("---- TRACE ", tr, " ", what) |
@@ -595,23 +618,26 @@ end | |||
595 | 618 | ||
596 | ------------------------------------------------------------------------------ | 619 | ------------------------------------------------------------------------------ |
597 | 620 | ||
621 | local gpr64 = jit.arch:match("64") | ||
622 | local fprmips32 = jit.arch == "mips" or jit.arch == "mipsel" | ||
623 | |||
598 | -- Dump taken trace exits. | 624 | -- Dump taken trace exits. |
599 | local function dump_texit(tr, ex, ngpr, nfpr, ...) | 625 | local function dump_texit(tr, ex, ngpr, nfpr, ...) |
600 | out:write("---- TRACE ", tr, " exit ", ex, "\n") | 626 | out:write("---- TRACE ", tr, " exit ", ex, "\n") |
601 | if dumpmode.X then | 627 | if dumpmode.X then |
602 | local regs = {...} | 628 | local regs = {...} |
603 | if jit.arch == "x64" then | 629 | if gpr64 then |
604 | for i=1,ngpr do | 630 | for i=1,ngpr do |
605 | out:write(format(" %016x", regs[i])) | 631 | out:write(format(" %016x", regs[i])) |
606 | if i % 4 == 0 then out:write("\n") end | 632 | if i % 4 == 0 then out:write("\n") end |
607 | end | 633 | end |
608 | else | 634 | else |
609 | for i=1,ngpr do | 635 | for i=1,ngpr do |
610 | out:write(format(" %08x", regs[i])) | 636 | out:write(" ", tohex(regs[i])) |
611 | if i % 8 == 0 then out:write("\n") end | 637 | if i % 8 == 0 then out:write("\n") end |
612 | end | 638 | end |
613 | end | 639 | end |
614 | if jit.arch == "mips" or jit.arch == "mipsel" then | 640 | if fprmips32 then |
615 | for i=1,nfpr,2 do | 641 | for i=1,nfpr,2 do |
616 | out:write(format(" %+17.14g", regs[ngpr+i])) | 642 | out:write(format(" %+17.14g", regs[ngpr+i])) |
617 | if i % 8 == 7 then out:write("\n") end | 643 | if i % 8 == 7 then out:write("\n") end |
@@ -692,9 +718,9 @@ local function dumpon(opt, outfile) | |||
692 | end | 718 | end |
693 | 719 | ||
694 | -- Public module functions. | 720 | -- Public module functions. |
695 | module(...) | 721 | return { |
696 | 722 | on = dumpon, | |
697 | on = dumpon | 723 | off = dumpoff, |
698 | off = dumpoff | 724 | start = dumpon -- For -j command line option. |
699 | start = dumpon -- For -j command line option. | 725 | } |
700 | 726 | ||
diff --git a/src/jit/p.lua b/src/jit/p.lua new file mode 100644 index 00000000..f225c312 --- /dev/null +++ b/src/jit/p.lua | |||
@@ -0,0 +1,312 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT profiler. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2022 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- | ||
8 | -- This module is a simple command line interface to the built-in | ||
9 | -- low-overhead profiler of LuaJIT. | ||
10 | -- | ||
11 | -- The lower-level API of the profiler is accessible via the "jit.profile" | ||
12 | -- module or the luaJIT_profile_* C API. | ||
13 | -- | ||
14 | -- Example usage: | ||
15 | -- | ||
16 | -- luajit -jp myapp.lua | ||
17 | -- luajit -jp=s myapp.lua | ||
18 | -- luajit -jp=-s myapp.lua | ||
19 | -- luajit -jp=vl myapp.lua | ||
20 | -- luajit -jp=G,profile.txt myapp.lua | ||
21 | -- | ||
22 | -- The following dump features are available: | ||
23 | -- | ||
24 | -- f Stack dump: function name, otherwise module:line. Default mode. | ||
25 | -- F Stack dump: ditto, but always prepend module. | ||
26 | -- l Stack dump: module:line. | ||
27 | -- <number> stack dump depth (callee < caller). Default: 1. | ||
28 | -- -<number> Inverse stack dump depth (caller > callee). | ||
29 | -- s Split stack dump after first stack level. Implies abs(depth) >= 2. | ||
30 | -- p Show full path for module names. | ||
31 | -- v Show VM states. Can be combined with stack dumps, e.g. vf or fv. | ||
32 | -- z Show zones. Can be combined with stack dumps, e.g. zf or fz. | ||
33 | -- r Show raw sample counts. Default: show percentages. | ||
34 | -- a Annotate excerpts from source code files. | ||
35 | -- A Annotate complete source code files. | ||
36 | -- G Produce raw output suitable for graphical tools (e.g. flame graphs). | ||
37 | -- m<number> Minimum sample percentage to be shown. Default: 3. | ||
38 | -- i<number> Sampling interval in milliseconds. Default: 10. | ||
39 | -- | ||
40 | ---------------------------------------------------------------------------- | ||
41 | |||
42 | -- Cache some library functions and objects. | ||
43 | local jit = require("jit") | ||
44 | assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") | ||
45 | local profile = require("jit.profile") | ||
46 | local vmdef = require("jit.vmdef") | ||
47 | local math = math | ||
48 | local pairs, ipairs, tonumber, floor = pairs, ipairs, tonumber, math.floor | ||
49 | local sort, format = table.sort, string.format | ||
50 | local stdout = io.stdout | ||
51 | local zone -- Load jit.zone module on demand. | ||
52 | |||
53 | -- Output file handle. | ||
54 | local out | ||
55 | |||
56 | ------------------------------------------------------------------------------ | ||
57 | |||
58 | local prof_ud | ||
59 | local prof_states, prof_split, prof_min, prof_raw, prof_fmt, prof_depth | ||
60 | local prof_ann, prof_count1, prof_count2, prof_samples | ||
61 | |||
62 | local map_vmmode = { | ||
63 | N = "Compiled", | ||
64 | I = "Interpreted", | ||
65 | C = "C code", | ||
66 | G = "Garbage Collector", | ||
67 | J = "JIT Compiler", | ||
68 | } | ||
69 | |||
70 | -- Profiler callback. | ||
71 | local function prof_cb(th, samples, vmmode) | ||
72 | prof_samples = prof_samples + samples | ||
73 | local key_stack, key_stack2, key_state | ||
74 | -- Collect keys for sample. | ||
75 | if prof_states then | ||
76 | if prof_states == "v" then | ||
77 | key_state = map_vmmode[vmmode] or vmmode | ||
78 | else | ||
79 | key_state = zone:get() or "(none)" | ||
80 | end | ||
81 | end | ||
82 | if prof_fmt then | ||
83 | key_stack = profile.dumpstack(th, prof_fmt, prof_depth) | ||
84 | key_stack = key_stack:gsub("%[builtin#(%d+)%]", function(x) | ||
85 | return vmdef.ffnames[tonumber(x)] | ||
86 | end) | ||
87 | if prof_split == 2 then | ||
88 | local k1, k2 = key_stack:match("(.-) [<>] (.*)") | ||
89 | if k2 then key_stack, key_stack2 = k1, k2 end | ||
90 | elseif prof_split == 3 then | ||
91 | key_stack2 = profile.dumpstack(th, "l", 1) | ||
92 | end | ||
93 | end | ||
94 | -- Order keys. | ||
95 | local k1, k2 | ||
96 | if prof_split == 1 then | ||
97 | if key_state then | ||
98 | k1 = key_state | ||
99 | if key_stack then k2 = key_stack end | ||
100 | end | ||
101 | elseif key_stack then | ||
102 | k1 = key_stack | ||
103 | if key_stack2 then k2 = key_stack2 elseif key_state then k2 = key_state end | ||
104 | end | ||
105 | -- Coalesce samples in one or two levels. | ||
106 | if k1 then | ||
107 | local t1 = prof_count1 | ||
108 | t1[k1] = (t1[k1] or 0) + samples | ||
109 | if k2 then | ||
110 | local t2 = prof_count2 | ||
111 | local t3 = t2[k1] | ||
112 | if not t3 then t3 = {}; t2[k1] = t3 end | ||
113 | t3[k2] = (t3[k2] or 0) + samples | ||
114 | end | ||
115 | end | ||
116 | end | ||
117 | |||
118 | ------------------------------------------------------------------------------ | ||
119 | |||
120 | -- Show top N list. | ||
121 | local function prof_top(count1, count2, samples, indent) | ||
122 | local t, n = {}, 0 | ||
123 | for k in pairs(count1) do | ||
124 | n = n + 1 | ||
125 | t[n] = k | ||
126 | end | ||
127 | sort(t, function(a, b) return count1[a] > count1[b] end) | ||
128 | for i=1,n do | ||
129 | local k = t[i] | ||
130 | local v = count1[k] | ||
131 | local pct = floor(v*100/samples + 0.5) | ||
132 | if pct < prof_min then break end | ||
133 | if not prof_raw then | ||
134 | out:write(format("%s%2d%% %s\n", indent, pct, k)) | ||
135 | elseif prof_raw == "r" then | ||
136 | out:write(format("%s%5d %s\n", indent, v, k)) | ||
137 | else | ||
138 | out:write(format("%s %d\n", k, v)) | ||
139 | end | ||
140 | if count2 then | ||
141 | local r = count2[k] | ||
142 | if r then | ||
143 | prof_top(r, nil, v, (prof_split == 3 or prof_split == 1) and " -- " or | ||
144 | (prof_depth < 0 and " -> " or " <- ")) | ||
145 | end | ||
146 | end | ||
147 | end | ||
148 | end | ||
149 | |||
150 | -- Annotate source code | ||
151 | local function prof_annotate(count1, samples) | ||
152 | local files = {} | ||
153 | local ms = 0 | ||
154 | for k, v in pairs(count1) do | ||
155 | local pct = floor(v*100/samples + 0.5) | ||
156 | ms = math.max(ms, v) | ||
157 | if pct >= prof_min then | ||
158 | local file, line = k:match("^(.*):(%d+)$") | ||
159 | if not file then file = k; line = 0 end | ||
160 | local fl = files[file] | ||
161 | if not fl then fl = {}; files[file] = fl; files[#files+1] = file end | ||
162 | line = tonumber(line) | ||
163 | fl[line] = prof_raw and v or pct | ||
164 | end | ||
165 | end | ||
166 | sort(files) | ||
167 | local fmtv, fmtn = " %3d%% | %s\n", " | %s\n" | ||
168 | if prof_raw then | ||
169 | local n = math.max(5, math.ceil(math.log10(ms))) | ||
170 | fmtv = "%"..n.."d | %s\n" | ||
171 | fmtn = (" "):rep(n).." | %s\n" | ||
172 | end | ||
173 | local ann = prof_ann | ||
174 | for _, file in ipairs(files) do | ||
175 | local f0 = file:byte() | ||
176 | if f0 == 40 or f0 == 91 then | ||
177 | out:write(format("\n====== %s ======\n[Cannot annotate non-file]\n", file)) | ||
178 | break | ||
179 | end | ||
180 | local fp, err = io.open(file) | ||
181 | if not fp then | ||
182 | out:write(format("====== ERROR: %s: %s\n", file, err)) | ||
183 | break | ||
184 | end | ||
185 | out:write(format("\n====== %s ======\n", file)) | ||
186 | local fl = files[file] | ||
187 | local n, show = 1, false | ||
188 | if ann ~= 0 then | ||
189 | for i=1,ann do | ||
190 | if fl[i] then show = true; out:write("@@ 1 @@\n"); break end | ||
191 | end | ||
192 | end | ||
193 | for line in fp:lines() do | ||
194 | if line:byte() == 27 then | ||
195 | out:write("[Cannot annotate bytecode file]\n") | ||
196 | break | ||
197 | end | ||
198 | local v = fl[n] | ||
199 | if ann ~= 0 then | ||
200 | local v2 = fl[n+ann] | ||
201 | if show then | ||
202 | if v2 then show = n+ann elseif v then show = n | ||
203 | elseif show+ann < n then show = false end | ||
204 | elseif v2 then | ||
205 | show = n+ann | ||
206 | out:write(format("@@ %d @@\n", n)) | ||
207 | end | ||
208 | if not show then goto next end | ||
209 | end | ||
210 | if v then | ||
211 | out:write(format(fmtv, v, line)) | ||
212 | else | ||
213 | out:write(format(fmtn, line)) | ||
214 | end | ||
215 | ::next:: | ||
216 | n = n + 1 | ||
217 | end | ||
218 | fp:close() | ||
219 | end | ||
220 | end | ||
221 | |||
222 | ------------------------------------------------------------------------------ | ||
223 | |||
224 | -- Finish profiling and dump result. | ||
225 | local function prof_finish() | ||
226 | if prof_ud then | ||
227 | profile.stop() | ||
228 | local samples = prof_samples | ||
229 | if samples == 0 then | ||
230 | if prof_raw ~= true then out:write("[No samples collected]\n") end | ||
231 | return | ||
232 | end | ||
233 | if prof_ann then | ||
234 | prof_annotate(prof_count1, samples) | ||
235 | else | ||
236 | prof_top(prof_count1, prof_count2, samples, "") | ||
237 | end | ||
238 | prof_count1 = nil | ||
239 | prof_count2 = nil | ||
240 | prof_ud = nil | ||
241 | if out ~= stdout then out:close() end | ||
242 | end | ||
243 | end | ||
244 | |||
245 | -- Start profiling. | ||
246 | local function prof_start(mode) | ||
247 | local interval = "" | ||
248 | mode = mode:gsub("i%d*", function(s) interval = s; return "" end) | ||
249 | prof_min = 3 | ||
250 | mode = mode:gsub("m(%d+)", function(s) prof_min = tonumber(s); return "" end) | ||
251 | prof_depth = 1 | ||
252 | mode = mode:gsub("%-?%d+", function(s) prof_depth = tonumber(s); return "" end) | ||
253 | local m = {} | ||
254 | for c in mode:gmatch(".") do m[c] = c end | ||
255 | prof_states = m.z or m.v | ||
256 | if prof_states == "z" then zone = require("jit.zone") end | ||
257 | local scope = m.l or m.f or m.F or (prof_states and "" or "f") | ||
258 | local flags = (m.p or "") | ||
259 | prof_raw = m.r | ||
260 | if m.s then | ||
261 | prof_split = 2 | ||
262 | if prof_depth == -1 or m["-"] then prof_depth = -2 | ||
263 | elseif prof_depth == 1 then prof_depth = 2 end | ||
264 | elseif mode:find("[fF].*l") then | ||
265 | scope = "l" | ||
266 | prof_split = 3 | ||
267 | else | ||
268 | prof_split = (scope == "" or mode:find("[zv].*[lfF]")) and 1 or 0 | ||
269 | end | ||
270 | prof_ann = m.A and 0 or (m.a and 3) | ||
271 | if prof_ann then | ||
272 | scope = "l" | ||
273 | prof_fmt = "pl" | ||
274 | prof_split = 0 | ||
275 | prof_depth = 1 | ||
276 | elseif m.G and scope ~= "" then | ||
277 | prof_fmt = flags..scope.."Z;" | ||
278 | prof_depth = -100 | ||
279 | prof_raw = true | ||
280 | prof_min = 0 | ||
281 | elseif scope == "" then | ||
282 | prof_fmt = false | ||
283 | else | ||
284 | local sc = prof_split == 3 and m.f or m.F or scope | ||
285 | prof_fmt = flags..sc..(prof_depth >= 0 and "Z < " or "Z > ") | ||
286 | end | ||
287 | prof_count1 = {} | ||
288 | prof_count2 = {} | ||
289 | prof_samples = 0 | ||
290 | profile.start(scope:lower()..interval, prof_cb) | ||
291 | prof_ud = newproxy(true) | ||
292 | getmetatable(prof_ud).__gc = prof_finish | ||
293 | end | ||
294 | |||
295 | ------------------------------------------------------------------------------ | ||
296 | |||
297 | local function start(mode, outfile) | ||
298 | if not outfile then outfile = os.getenv("LUAJIT_PROFILEFILE") end | ||
299 | if outfile then | ||
300 | out = outfile == "-" and stdout or assert(io.open(outfile, "w")) | ||
301 | else | ||
302 | out = stdout | ||
303 | end | ||
304 | prof_start(mode or "f") | ||
305 | end | ||
306 | |||
307 | -- Public module functions. | ||
308 | return { | ||
309 | start = start, -- For -j command line option. | ||
310 | stop = prof_finish | ||
311 | } | ||
312 | |||
diff --git a/src/jit/v.lua b/src/jit/v.lua index 9624688b..ac8b19db 100644 --- a/src/jit/v.lua +++ b/src/jit/v.lua | |||
@@ -59,7 +59,7 @@ | |||
59 | 59 | ||
60 | -- Cache some library functions and objects. | 60 | -- Cache some library functions and objects. |
61 | local jit = require("jit") | 61 | local jit = require("jit") |
62 | assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") | 62 | assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") |
63 | local jutil = require("jit.util") | 63 | local jutil = require("jit.util") |
64 | local vmdef = require("jit.vmdef") | 64 | local vmdef = require("jit.vmdef") |
65 | local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo | 65 | local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo |
@@ -99,7 +99,7 @@ end | |||
99 | local function dump_trace(what, tr, func, pc, otr, oex) | 99 | local function dump_trace(what, tr, func, pc, otr, oex) |
100 | if what == "start" then | 100 | if what == "start" then |
101 | startloc = fmtfunc(func, pc) | 101 | startloc = fmtfunc(func, pc) |
102 | startex = otr and "("..otr.."/"..oex..") " or "" | 102 | startex = otr and "("..otr.."/"..(oex == -1 and "stitch" or oex)..") " or "" |
103 | else | 103 | else |
104 | if what == "abort" then | 104 | if what == "abort" then |
105 | local loc = fmtfunc(func, pc) | 105 | local loc = fmtfunc(func, pc) |
@@ -116,6 +116,9 @@ local function dump_trace(what, tr, func, pc, otr, oex) | |||
116 | if ltype == "interpreter" then | 116 | if ltype == "interpreter" then |
117 | out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n", | 117 | out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n", |
118 | tr, startex, startloc)) | 118 | tr, startex, startloc)) |
119 | elseif ltype == "stitch" then | ||
120 | out:write(format("[TRACE %3s %s%s %s %s]\n", | ||
121 | tr, startex, startloc, ltype, fmtfunc(func, pc))) | ||
119 | elseif link == tr or link == 0 then | 122 | elseif link == tr or link == 0 then |
120 | out:write(format("[TRACE %3s %s%s %s]\n", | 123 | out:write(format("[TRACE %3s %s%s %s]\n", |
121 | tr, startex, startloc, ltype)) | 124 | tr, startex, startloc, ltype)) |
@@ -159,9 +162,9 @@ local function dumpon(outfile) | |||
159 | end | 162 | end |
160 | 163 | ||
161 | -- Public module functions. | 164 | -- Public module functions. |
162 | module(...) | 165 | return { |
163 | 166 | on = dumpon, | |
164 | on = dumpon | 167 | off = dumpoff, |
165 | off = dumpoff | 168 | start = dumpon -- For -j command line option. |
166 | start = dumpon -- For -j command line option. | 169 | } |
167 | 170 | ||
diff --git a/src/jit/zone.lua b/src/jit/zone.lua new file mode 100644 index 00000000..1308cb74 --- /dev/null +++ b/src/jit/zone.lua | |||
@@ -0,0 +1,45 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT profiler zones. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2022 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- | ||
8 | -- This module implements a simple hierarchical zone model. | ||
9 | -- | ||
10 | -- Example usage: | ||
11 | -- | ||
12 | -- local zone = require("jit.zone") | ||
13 | -- zone("AI") | ||
14 | -- ... | ||
15 | -- zone("A*") | ||
16 | -- ... | ||
17 | -- print(zone:get()) --> "A*" | ||
18 | -- ... | ||
19 | -- zone() | ||
20 | -- ... | ||
21 | -- print(zone:get()) --> "AI" | ||
22 | -- ... | ||
23 | -- zone() | ||
24 | -- | ||
25 | ---------------------------------------------------------------------------- | ||
26 | |||
27 | local remove = table.remove | ||
28 | |||
29 | return setmetatable({ | ||
30 | flush = function(t) | ||
31 | for i=#t,1,-1 do t[i] = nil end | ||
32 | end, | ||
33 | get = function(t) | ||
34 | return t[#t] | ||
35 | end | ||
36 | }, { | ||
37 | __call = function(t, zone) | ||
38 | if zone then | ||
39 | t[#t+1] = zone | ||
40 | else | ||
41 | return (assert(remove(t), "empty zone stack")) | ||
42 | end | ||
43 | end | ||
44 | }) | ||
45 | |||
diff --git a/src/lauxlib.h b/src/lauxlib.h index fed1491b..a44f0272 100644 --- a/src/lauxlib.h +++ b/src/lauxlib.h | |||
@@ -15,9 +15,6 @@ | |||
15 | #include "lua.h" | 15 | #include "lua.h" |
16 | 16 | ||
17 | 17 | ||
18 | #define luaL_getn(L,i) ((int)lua_objlen(L, i)) | ||
19 | #define luaL_setn(L,i,j) ((void)0) /* no op! */ | ||
20 | |||
21 | /* extra error code for `luaL_load' */ | 18 | /* extra error code for `luaL_load' */ |
22 | #define LUA_ERRFILE (LUA_ERRERR+1) | 19 | #define LUA_ERRFILE (LUA_ERRERR+1) |
23 | 20 | ||
@@ -58,6 +55,10 @@ LUALIB_API int (luaL_error) (lua_State *L, const char *fmt, ...); | |||
58 | LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def, | 55 | LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def, |
59 | const char *const lst[]); | 56 | const char *const lst[]); |
60 | 57 | ||
58 | /* pre-defined references */ | ||
59 | #define LUA_NOREF (-2) | ||
60 | #define LUA_REFNIL (-1) | ||
61 | |||
61 | LUALIB_API int (luaL_ref) (lua_State *L, int t); | 62 | LUALIB_API int (luaL_ref) (lua_State *L, int t); |
62 | LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref); | 63 | LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref); |
63 | 64 | ||
@@ -84,6 +85,11 @@ LUALIB_API int (luaL_loadbufferx) (lua_State *L, const char *buff, size_t sz, | |||
84 | const char *name, const char *mode); | 85 | const char *name, const char *mode); |
85 | LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, | 86 | LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, |
86 | int level); | 87 | int level); |
88 | LUALIB_API void (luaL_setfuncs) (lua_State *L, const luaL_Reg *l, int nup); | ||
89 | LUALIB_API void (luaL_pushmodule) (lua_State *L, const char *modname, | ||
90 | int sizehint); | ||
91 | LUALIB_API void *(luaL_testudata) (lua_State *L, int ud, const char *tname); | ||
92 | LUALIB_API void (luaL_setmetatable) (lua_State *L, const char *tname); | ||
87 | 93 | ||
88 | 94 | ||
89 | /* | 95 | /* |
@@ -113,6 +119,11 @@ LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, | |||
113 | 119 | ||
114 | #define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n))) | 120 | #define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n))) |
115 | 121 | ||
122 | /* From Lua 5.2. */ | ||
123 | #define luaL_newlibtable(L, l) \ | ||
124 | lua_createtable(L, 0, sizeof(l)/sizeof((l)[0]) - 1) | ||
125 | #define luaL_newlib(L, l) (luaL_newlibtable(L, l), luaL_setfuncs(L, l, 0)) | ||
126 | |||
116 | /* | 127 | /* |
117 | ** {====================================================== | 128 | ** {====================================================== |
118 | ** Generic Buffer manipulation | 129 | ** Generic Buffer manipulation |
@@ -147,21 +158,4 @@ LUALIB_API void (luaL_pushresult) (luaL_Buffer *B); | |||
147 | 158 | ||
148 | /* }====================================================== */ | 159 | /* }====================================================== */ |
149 | 160 | ||
150 | |||
151 | /* compatibility with ref system */ | ||
152 | |||
153 | /* pre-defined references */ | ||
154 | #define LUA_NOREF (-2) | ||
155 | #define LUA_REFNIL (-1) | ||
156 | |||
157 | #define lua_ref(L,lock) ((lock) ? luaL_ref(L, LUA_REGISTRYINDEX) : \ | ||
158 | (lua_pushstring(L, "unlocked references are obsolete"), lua_error(L), 0)) | ||
159 | |||
160 | #define lua_unref(L,ref) luaL_unref(L, LUA_REGISTRYINDEX, (ref)) | ||
161 | |||
162 | #define lua_getref(L,ref) lua_rawgeti(L, LUA_REGISTRYINDEX, (ref)) | ||
163 | |||
164 | |||
165 | #define luaL_reg luaL_Reg | ||
166 | |||
167 | #endif | 161 | #endif |
diff --git a/src/lib_aux.c b/src/lib_aux.c index 14dd57e3..b8e56436 100644 --- a/src/lib_aux.c +++ b/src/lib_aux.c | |||
@@ -107,38 +107,36 @@ LUALIB_API const char *luaL_findtable(lua_State *L, int idx, | |||
107 | static int libsize(const luaL_Reg *l) | 107 | static int libsize(const luaL_Reg *l) |
108 | { | 108 | { |
109 | int size = 0; | 109 | int size = 0; |
110 | for (; l->name; l++) size++; | 110 | for (; l && l->name; l++) size++; |
111 | return size; | 111 | return size; |
112 | } | 112 | } |
113 | 113 | ||
114 | LUALIB_API void luaL_pushmodule(lua_State *L, const char *modname, int sizehint) | ||
115 | { | ||
116 | luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16); | ||
117 | lua_getfield(L, -1, modname); | ||
118 | if (!lua_istable(L, -1)) { | ||
119 | lua_pop(L, 1); | ||
120 | if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, sizehint) != NULL) | ||
121 | lj_err_callerv(L, LJ_ERR_BADMODN, modname); | ||
122 | lua_pushvalue(L, -1); | ||
123 | lua_setfield(L, -3, modname); /* _LOADED[modname] = new table. */ | ||
124 | } | ||
125 | lua_remove(L, -2); /* Remove _LOADED table. */ | ||
126 | } | ||
127 | |||
114 | LUALIB_API void luaL_openlib(lua_State *L, const char *libname, | 128 | LUALIB_API void luaL_openlib(lua_State *L, const char *libname, |
115 | const luaL_Reg *l, int nup) | 129 | const luaL_Reg *l, int nup) |
116 | { | 130 | { |
117 | lj_lib_checkfpu(L); | 131 | lj_lib_checkfpu(L); |
118 | if (libname) { | 132 | if (libname) { |
119 | int size = libsize(l); | 133 | luaL_pushmodule(L, libname, libsize(l)); |
120 | /* check whether lib already exists */ | 134 | lua_insert(L, -(nup + 1)); /* Move module table below upvalues. */ |
121 | luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16); | ||
122 | lua_getfield(L, -1, libname); /* get _LOADED[libname] */ | ||
123 | if (!lua_istable(L, -1)) { /* not found? */ | ||
124 | lua_pop(L, 1); /* remove previous result */ | ||
125 | /* try global variable (and create one if it does not exist) */ | ||
126 | if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, size) != NULL) | ||
127 | lj_err_callerv(L, LJ_ERR_BADMODN, libname); | ||
128 | lua_pushvalue(L, -1); | ||
129 | lua_setfield(L, -3, libname); /* _LOADED[libname] = new table */ | ||
130 | } | ||
131 | lua_remove(L, -2); /* remove _LOADED table */ | ||
132 | lua_insert(L, -(nup+1)); /* move library table to below upvalues */ | ||
133 | } | 135 | } |
134 | for (; l->name; l++) { | 136 | if (l) |
135 | int i; | 137 | luaL_setfuncs(L, l, nup); |
136 | for (i = 0; i < nup; i++) /* copy upvalues to the top */ | 138 | else |
137 | lua_pushvalue(L, -nup); | 139 | lua_pop(L, nup); /* Remove upvalues. */ |
138 | lua_pushcclosure(L, l->func, nup); | ||
139 | lua_setfield(L, -(nup+2), l->name); | ||
140 | } | ||
141 | lua_pop(L, nup); /* remove upvalues */ | ||
142 | } | 140 | } |
143 | 141 | ||
144 | LUALIB_API void luaL_register(lua_State *L, const char *libname, | 142 | LUALIB_API void luaL_register(lua_State *L, const char *libname, |
@@ -147,6 +145,19 @@ LUALIB_API void luaL_register(lua_State *L, const char *libname, | |||
147 | luaL_openlib(L, libname, l, 0); | 145 | luaL_openlib(L, libname, l, 0); |
148 | } | 146 | } |
149 | 147 | ||
148 | LUALIB_API void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup) | ||
149 | { | ||
150 | luaL_checkstack(L, nup, "too many upvalues"); | ||
151 | for (; l->name; l++) { | ||
152 | int i; | ||
153 | for (i = 0; i < nup; i++) /* Copy upvalues to the top. */ | ||
154 | lua_pushvalue(L, -nup); | ||
155 | lua_pushcclosure(L, l->func, nup); | ||
156 | lua_setfield(L, -(nup + 2), l->name); | ||
157 | } | ||
158 | lua_pop(L, nup); /* Remove upvalues. */ | ||
159 | } | ||
160 | |||
150 | LUALIB_API const char *luaL_gsub(lua_State *L, const char *s, | 161 | LUALIB_API const char *luaL_gsub(lua_State *L, const char *s, |
151 | const char *p, const char *r) | 162 | const char *p, const char *r) |
152 | { | 163 | { |
@@ -207,8 +218,15 @@ LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B) | |||
207 | 218 | ||
208 | LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l) | 219 | LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l) |
209 | { | 220 | { |
210 | while (l--) | 221 | if (l <= bufffree(B)) { |
211 | luaL_addchar(B, *s++); | 222 | memcpy(B->p, s, l); |
223 | B->p += l; | ||
224 | } else { | ||
225 | emptybuffer(B); | ||
226 | lua_pushlstring(B->L, s, l); | ||
227 | B->lvl++; | ||
228 | adjuststack(B); | ||
229 | } | ||
212 | } | 230 | } |
213 | 231 | ||
214 | LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s) | 232 | LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s) |
@@ -302,7 +320,7 @@ static int panic(lua_State *L) | |||
302 | 320 | ||
303 | #ifdef LUAJIT_USE_SYSMALLOC | 321 | #ifdef LUAJIT_USE_SYSMALLOC |
304 | 322 | ||
305 | #if LJ_64 && !defined(LUAJIT_USE_VALGRIND) | 323 | #if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND) |
306 | #error "Must use builtin allocator for 64 bit target" | 324 | #error "Must use builtin allocator for 64 bit target" |
307 | #endif | 325 | #endif |
308 | 326 | ||
@@ -327,23 +345,19 @@ LUALIB_API lua_State *luaL_newstate(void) | |||
327 | 345 | ||
328 | #else | 346 | #else |
329 | 347 | ||
330 | #include "lj_alloc.h" | ||
331 | |||
332 | LUALIB_API lua_State *luaL_newstate(void) | 348 | LUALIB_API lua_State *luaL_newstate(void) |
333 | { | 349 | { |
334 | lua_State *L; | 350 | lua_State *L; |
335 | void *ud = lj_alloc_create(); | 351 | #if LJ_64 && !LJ_GC64 |
336 | if (ud == NULL) return NULL; | 352 | L = lj_state_newstate(LJ_ALLOCF_INTERNAL, NULL); |
337 | #if LJ_64 | ||
338 | L = lj_state_newstate(lj_alloc_f, ud); | ||
339 | #else | 353 | #else |
340 | L = lua_newstate(lj_alloc_f, ud); | 354 | L = lua_newstate(LJ_ALLOCF_INTERNAL, NULL); |
341 | #endif | 355 | #endif |
342 | if (L) G(L)->panic = panic; | 356 | if (L) G(L)->panic = panic; |
343 | return L; | 357 | return L; |
344 | } | 358 | } |
345 | 359 | ||
346 | #if LJ_64 | 360 | #if LJ_64 && !LJ_GC64 |
347 | LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) | 361 | LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) |
348 | { | 362 | { |
349 | UNUSED(f); UNUSED(ud); | 363 | UNUSED(f); UNUSED(ud); |
diff --git a/src/lib_base.c b/src/lib_base.c index 6c96e8d5..98ec67c7 100644 --- a/src/lib_base.c +++ b/src/lib_base.c | |||
@@ -19,10 +19,12 @@ | |||
19 | #include "lj_gc.h" | 19 | #include "lj_gc.h" |
20 | #include "lj_err.h" | 20 | #include "lj_err.h" |
21 | #include "lj_debug.h" | 21 | #include "lj_debug.h" |
22 | #include "lj_buf.h" | ||
22 | #include "lj_str.h" | 23 | #include "lj_str.h" |
23 | #include "lj_tab.h" | 24 | #include "lj_tab.h" |
24 | #include "lj_meta.h" | 25 | #include "lj_meta.h" |
25 | #include "lj_state.h" | 26 | #include "lj_state.h" |
27 | #include "lj_frame.h" | ||
26 | #if LJ_HASFFI | 28 | #if LJ_HASFFI |
27 | #include "lj_ctype.h" | 29 | #include "lj_ctype.h" |
28 | #include "lj_cconv.h" | 30 | #include "lj_cconv.h" |
@@ -32,6 +34,7 @@ | |||
32 | #include "lj_dispatch.h" | 34 | #include "lj_dispatch.h" |
33 | #include "lj_char.h" | 35 | #include "lj_char.h" |
34 | #include "lj_strscan.h" | 36 | #include "lj_strscan.h" |
37 | #include "lj_strfmt.h" | ||
35 | #include "lj_lib.h" | 38 | #include "lj_lib.h" |
36 | 39 | ||
37 | /* -- Base library: checks ------------------------------------------------ */ | 40 | /* -- Base library: checks ------------------------------------------------ */ |
@@ -40,13 +43,13 @@ | |||
40 | 43 | ||
41 | LJLIB_ASM(assert) LJLIB_REC(.) | 44 | LJLIB_ASM(assert) LJLIB_REC(.) |
42 | { | 45 | { |
43 | GCstr *s; | ||
44 | lj_lib_checkany(L, 1); | 46 | lj_lib_checkany(L, 1); |
45 | s = lj_lib_optstr(L, 2); | 47 | if (L->top == L->base+1) |
46 | if (s) | ||
47 | lj_err_callermsg(L, strdata(s)); | ||
48 | else | ||
49 | lj_err_caller(L, LJ_ERR_ASSERT); | 48 | lj_err_caller(L, LJ_ERR_ASSERT); |
49 | else if (tvisstr(L->base+1) || tvisnumber(L->base+1)) | ||
50 | lj_err_callermsg(L, strdata(lj_lib_checkstr(L, 2))); | ||
51 | else | ||
52 | lj_err_run(L); | ||
50 | return FFH_UNREACHABLE; | 53 | return FFH_UNREACHABLE; |
51 | } | 54 | } |
52 | 55 | ||
@@ -73,9 +76,10 @@ LJLIB_ASM_(type) LJLIB_REC(.) | |||
73 | /* This solves a circular dependency problem -- change FF_next_N as needed. */ | 76 | /* This solves a circular dependency problem -- change FF_next_N as needed. */ |
74 | LJ_STATIC_ASSERT((int)FF_next == FF_next_N); | 77 | LJ_STATIC_ASSERT((int)FF_next == FF_next_N); |
75 | 78 | ||
76 | LJLIB_ASM(next) | 79 | LJLIB_ASM(next) LJLIB_REC(.) |
77 | { | 80 | { |
78 | lj_lib_checktab(L, 1); | 81 | lj_lib_checktab(L, 1); |
82 | lj_err_msg(L, LJ_ERR_NEXTIDX); | ||
79 | return FFH_UNREACHABLE; | 83 | return FFH_UNREACHABLE; |
80 | } | 84 | } |
81 | 85 | ||
@@ -86,10 +90,11 @@ static int ffh_pairs(lua_State *L, MMS mm) | |||
86 | cTValue *mo = lj_meta_lookup(L, o, mm); | 90 | cTValue *mo = lj_meta_lookup(L, o, mm); |
87 | if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) { | 91 | if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) { |
88 | L->top = o+1; /* Only keep one argument. */ | 92 | L->top = o+1; /* Only keep one argument. */ |
89 | copyTV(L, L->base-1, mo); /* Replace callable. */ | 93 | copyTV(L, L->base-1-LJ_FR2, mo); /* Replace callable. */ |
90 | return FFH_TAILCALL; | 94 | return FFH_TAILCALL; |
91 | } else { | 95 | } else { |
92 | if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE); | 96 | if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE); |
97 | if (LJ_FR2) { copyTV(L, o-1, o); o--; } | ||
93 | setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1))); | 98 | setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1))); |
94 | if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0); | 99 | if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0); |
95 | return FFH_RES(3); | 100 | return FFH_RES(3); |
@@ -100,7 +105,7 @@ static int ffh_pairs(lua_State *L, MMS mm) | |||
100 | #endif | 105 | #endif |
101 | 106 | ||
102 | LJLIB_PUSH(lastcl) | 107 | LJLIB_PUSH(lastcl) |
103 | LJLIB_ASM(pairs) | 108 | LJLIB_ASM(pairs) LJLIB_REC(xpairs 0) |
104 | { | 109 | { |
105 | return ffh_pairs(L, MM_pairs); | 110 | return ffh_pairs(L, MM_pairs); |
106 | } | 111 | } |
@@ -113,7 +118,7 @@ LJLIB_NOREGUV LJLIB_ASM(ipairs_aux) LJLIB_REC(.) | |||
113 | } | 118 | } |
114 | 119 | ||
115 | LJLIB_PUSH(lastcl) | 120 | LJLIB_PUSH(lastcl) |
116 | LJLIB_ASM(ipairs) LJLIB_REC(.) | 121 | LJLIB_ASM(ipairs) LJLIB_REC(xpairs 1) |
117 | { | 122 | { |
118 | return ffh_pairs(L, MM_ipairs); | 123 | return ffh_pairs(L, MM_ipairs); |
119 | } | 124 | } |
@@ -131,11 +136,11 @@ LJLIB_ASM(setmetatable) LJLIB_REC(.) | |||
131 | lj_err_caller(L, LJ_ERR_PROTMT); | 136 | lj_err_caller(L, LJ_ERR_PROTMT); |
132 | setgcref(t->metatable, obj2gco(mt)); | 137 | setgcref(t->metatable, obj2gco(mt)); |
133 | if (mt) { lj_gc_objbarriert(L, t, mt); } | 138 | if (mt) { lj_gc_objbarriert(L, t, mt); } |
134 | settabV(L, L->base-1, t); | 139 | settabV(L, L->base-1-LJ_FR2, t); |
135 | return FFH_RES(1); | 140 | return FFH_RES(1); |
136 | } | 141 | } |
137 | 142 | ||
138 | LJLIB_CF(getfenv) | 143 | LJLIB_CF(getfenv) LJLIB_REC(.) |
139 | { | 144 | { |
140 | GCfunc *fn; | 145 | GCfunc *fn; |
141 | cTValue *o = L->base; | 146 | cTValue *o = L->base; |
@@ -144,6 +149,7 @@ LJLIB_CF(getfenv) | |||
144 | o = lj_debug_frame(L, level, &level); | 149 | o = lj_debug_frame(L, level, &level); |
145 | if (o == NULL) | 150 | if (o == NULL) |
146 | lj_err_arg(L, 1, LJ_ERR_INVLVL); | 151 | lj_err_arg(L, 1, LJ_ERR_INVLVL); |
152 | if (LJ_FR2) o--; | ||
147 | } | 153 | } |
148 | fn = &gcval(o)->fn; | 154 | fn = &gcval(o)->fn; |
149 | settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env)); | 155 | settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env)); |
@@ -165,6 +171,7 @@ LJLIB_CF(setfenv) | |||
165 | o = lj_debug_frame(L, level, &level); | 171 | o = lj_debug_frame(L, level, &level); |
166 | if (o == NULL) | 172 | if (o == NULL) |
167 | lj_err_arg(L, 1, LJ_ERR_INVLVL); | 173 | lj_err_arg(L, 1, LJ_ERR_INVLVL); |
174 | if (LJ_FR2) o--; | ||
168 | } | 175 | } |
169 | fn = &gcval(o)->fn; | 176 | fn = &gcval(o)->fn; |
170 | if (!isluafunc(fn)) | 177 | if (!isluafunc(fn)) |
@@ -259,7 +266,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) | |||
259 | if (base == 10) { | 266 | if (base == 10) { |
260 | TValue *o = lj_lib_checkany(L, 1); | 267 | TValue *o = lj_lib_checkany(L, 1); |
261 | if (lj_strscan_numberobj(o)) { | 268 | if (lj_strscan_numberobj(o)) { |
262 | copyTV(L, L->base-1, o); | 269 | copyTV(L, L->base-1-LJ_FR2, o); |
263 | return FFH_RES(1); | 270 | return FFH_RES(1); |
264 | } | 271 | } |
265 | #if LJ_HASFFI | 272 | #if LJ_HASFFI |
@@ -272,11 +279,11 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) | |||
272 | ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) { | 279 | ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) { |
273 | int32_t i; | 280 | int32_t i; |
274 | lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0); | 281 | lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0); |
275 | setintV(L->base-1, i); | 282 | setintV(L->base-1-LJ_FR2, i); |
276 | return FFH_RES(1); | 283 | return FFH_RES(1); |
277 | } | 284 | } |
278 | lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE), | 285 | lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE), |
279 | (uint8_t *)&(L->base-1)->n, o, 0); | 286 | (uint8_t *)&(L->base-1-LJ_FR2)->n, o, 0); |
280 | return FFH_RES(1); | 287 | return FFH_RES(1); |
281 | } | 288 | } |
282 | } | 289 | } |
@@ -284,53 +291,46 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) | |||
284 | } else { | 291 | } else { |
285 | const char *p = strdata(lj_lib_checkstr(L, 1)); | 292 | const char *p = strdata(lj_lib_checkstr(L, 1)); |
286 | char *ep; | 293 | char *ep; |
294 | unsigned int neg = 0; | ||
287 | unsigned long ul; | 295 | unsigned long ul; |
288 | if (base < 2 || base > 36) | 296 | if (base < 2 || base > 36) |
289 | lj_err_arg(L, 2, LJ_ERR_BASERNG); | 297 | lj_err_arg(L, 2, LJ_ERR_BASERNG); |
290 | ul = strtoul(p, &ep, base); | 298 | while (lj_char_isspace((unsigned char)(*p))) p++; |
291 | if (p != ep) { | 299 | if (*p == '-') { p++; neg = 1; } else if (*p == '+') { p++; } |
292 | while (lj_char_isspace((unsigned char)(*ep))) ep++; | 300 | if (lj_char_isalnum((unsigned char)(*p))) { |
293 | if (*ep == '\0') { | 301 | ul = strtoul(p, &ep, base); |
294 | if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u)) | 302 | if (p != ep) { |
295 | setintV(L->base-1, (int32_t)ul); | 303 | while (lj_char_isspace((unsigned char)(*ep))) ep++; |
296 | else | 304 | if (*ep == '\0') { |
297 | setnumV(L->base-1, (lua_Number)ul); | 305 | if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) { |
298 | return FFH_RES(1); | 306 | if (neg) ul = (unsigned long)-(long)ul; |
307 | setintV(L->base-1-LJ_FR2, (int32_t)ul); | ||
308 | } else { | ||
309 | lua_Number n = (lua_Number)ul; | ||
310 | if (neg) n = -n; | ||
311 | setnumV(L->base-1-LJ_FR2, n); | ||
312 | } | ||
313 | return FFH_RES(1); | ||
314 | } | ||
299 | } | 315 | } |
300 | } | 316 | } |
301 | } | 317 | } |
302 | setnilV(L->base-1); | 318 | setnilV(L->base-1-LJ_FR2); |
303 | return FFH_RES(1); | 319 | return FFH_RES(1); |
304 | } | 320 | } |
305 | 321 | ||
306 | LJLIB_PUSH("nil") | ||
307 | LJLIB_PUSH("false") | ||
308 | LJLIB_PUSH("true") | ||
309 | LJLIB_ASM(tostring) LJLIB_REC(.) | 322 | LJLIB_ASM(tostring) LJLIB_REC(.) |
310 | { | 323 | { |
311 | TValue *o = lj_lib_checkany(L, 1); | 324 | TValue *o = lj_lib_checkany(L, 1); |
312 | cTValue *mo; | 325 | cTValue *mo; |
313 | L->top = o+1; /* Only keep one argument. */ | 326 | L->top = o+1; /* Only keep one argument. */ |
314 | if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { | 327 | if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { |
315 | copyTV(L, L->base-1, mo); /* Replace callable. */ | 328 | copyTV(L, L->base-1-LJ_FR2, mo); /* Replace callable. */ |
316 | return FFH_TAILCALL; | 329 | return FFH_TAILCALL; |
317 | } else { | ||
318 | GCstr *s; | ||
319 | if (tvisnumber(o)) { | ||
320 | s = lj_str_fromnumber(L, o); | ||
321 | } else if (tvispri(o)) { | ||
322 | s = strV(lj_lib_upvalue(L, -(int32_t)itype(o))); | ||
323 | } else { | ||
324 | if (tvisfunc(o) && isffunc(funcV(o))) | ||
325 | lua_pushfstring(L, "function: builtin#%d", funcV(o)->c.ffid); | ||
326 | else | ||
327 | lua_pushfstring(L, "%s: %p", lj_typename(o), lua_topointer(L, 1)); | ||
328 | /* Note: lua_pushfstring calls the GC which may invalidate o. */ | ||
329 | s = strV(L->top-1); | ||
330 | } | ||
331 | setstrV(L, L->base-1, s); | ||
332 | return FFH_RES(1); | ||
333 | } | 330 | } |
331 | lj_gc_check(L); | ||
332 | setstrV(L, L->base-1-LJ_FR2, lj_strfmt_obj(L, L->base)); | ||
333 | return FFH_RES(1); | ||
334 | } | 334 | } |
335 | 335 | ||
336 | /* -- Base library: throw and catch errors -------------------------------- */ | 336 | /* -- Base library: throw and catch errors -------------------------------- */ |
@@ -359,7 +359,7 @@ LJLIB_ASM_(xpcall) LJLIB_REC(.) | |||
359 | 359 | ||
360 | static int load_aux(lua_State *L, int status, int envarg) | 360 | static int load_aux(lua_State *L, int status, int envarg) |
361 | { | 361 | { |
362 | if (status == 0) { | 362 | if (status == LUA_OK) { |
363 | if (tvistab(L->base+envarg-1)) { | 363 | if (tvistab(L->base+envarg-1)) { |
364 | GCfunc *fn = funcV(L->top-1); | 364 | GCfunc *fn = funcV(L->top-1); |
365 | GCtab *t = tabV(L->base+envarg-1); | 365 | GCtab *t = tabV(L->base+envarg-1); |
@@ -408,10 +408,22 @@ LJLIB_CF(load) | |||
408 | GCstr *name = lj_lib_optstr(L, 2); | 408 | GCstr *name = lj_lib_optstr(L, 2); |
409 | GCstr *mode = lj_lib_optstr(L, 3); | 409 | GCstr *mode = lj_lib_optstr(L, 3); |
410 | int status; | 410 | int status; |
411 | if (L->base < L->top && (tvisstr(L->base) || tvisnumber(L->base))) { | 411 | if (L->base < L->top && |
412 | GCstr *s = lj_lib_checkstr(L, 1); | 412 | (tvisstr(L->base) || tvisnumber(L->base) || tvisbuf(L->base))) { |
413 | const char *s; | ||
414 | MSize len; | ||
415 | if (tvisbuf(L->base)) { | ||
416 | SBufExt *sbx = bufV(L->base); | ||
417 | s = sbx->r; | ||
418 | len = sbufxlen(sbx); | ||
419 | if (!name) name = &G(L)->strempty; /* Buffers are not NUL-terminated. */ | ||
420 | } else { | ||
421 | GCstr *str = lj_lib_checkstr(L, 1); | ||
422 | s = strdata(str); | ||
423 | len = str->len; | ||
424 | } | ||
413 | lua_settop(L, 4); /* Ensure env arg exists. */ | 425 | lua_settop(L, 4); /* Ensure env arg exists. */ |
414 | status = luaL_loadbufferx(L, strdata(s), s->len, strdata(name ? name : s), | 426 | status = luaL_loadbufferx(L, s, len, name ? strdata(name) : s, |
415 | mode ? strdata(mode) : NULL); | 427 | mode ? strdata(mode) : NULL); |
416 | } else { | 428 | } else { |
417 | lj_lib_checkfunc(L, 1); | 429 | lj_lib_checkfunc(L, 1); |
@@ -432,7 +444,7 @@ LJLIB_CF(dofile) | |||
432 | GCstr *fname = lj_lib_optstr(L, 1); | 444 | GCstr *fname = lj_lib_optstr(L, 1); |
433 | setnilV(L->top); | 445 | setnilV(L->top); |
434 | L->top = L->base+1; | 446 | L->top = L->base+1; |
435 | if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != 0) | 447 | if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != LUA_OK) |
436 | lua_error(L); | 448 | lua_error(L); |
437 | lua_call(L, 0, LUA_MULTRET); | 449 | lua_call(L, 0, LUA_MULTRET); |
438 | return (int)(L->top - L->base) - 1; | 450 | return (int)(L->top - L->base) - 1; |
@@ -442,20 +454,20 @@ LJLIB_CF(dofile) | |||
442 | 454 | ||
443 | LJLIB_CF(gcinfo) | 455 | LJLIB_CF(gcinfo) |
444 | { | 456 | { |
445 | setintV(L->top++, (G(L)->gc.total >> 10)); | 457 | setintV(L->top++, (int32_t)(G(L)->gc.total >> 10)); |
446 | return 1; | 458 | return 1; |
447 | } | 459 | } |
448 | 460 | ||
449 | LJLIB_CF(collectgarbage) | 461 | LJLIB_CF(collectgarbage) |
450 | { | 462 | { |
451 | int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT, /* ORDER LUA_GC* */ | 463 | int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT, /* ORDER LUA_GC* */ |
452 | "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul"); | 464 | "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul\1\377\11isrunning"); |
453 | int32_t data = lj_lib_optint(L, 2, 0); | 465 | int32_t data = lj_lib_optint(L, 2, 0); |
454 | if (opt == LUA_GCCOUNT) { | 466 | if (opt == LUA_GCCOUNT) { |
455 | setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0); | 467 | setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0); |
456 | } else { | 468 | } else { |
457 | int res = lua_gc(L, opt, data); | 469 | int res = lua_gc(L, opt, data); |
458 | if (opt == LUA_GCSTEP) | 470 | if (opt == LUA_GCSTEP || opt == LUA_GCISRUNNING) |
459 | setboolV(L->top, res); | 471 | setboolV(L->top, res); |
460 | else | 472 | else |
461 | setintV(L->top, res); | 473 | setintV(L->top, res); |
@@ -507,23 +519,14 @@ LJLIB_CF(print) | |||
507 | tv = L->top-1; | 519 | tv = L->top-1; |
508 | } | 520 | } |
509 | shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring) && | 521 | shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring) && |
510 | !gcrefu(basemt_it(G(L), LJ_TNUMX)); | 522 | !gcrefu(basemt_it(G(L), LJ_TNUMX)); |
511 | for (i = 0; i < nargs; i++) { | 523 | for (i = 0; i < nargs; i++) { |
524 | cTValue *o = &L->base[i]; | ||
512 | const char *str; | 525 | const char *str; |
513 | size_t size; | 526 | size_t size; |
514 | cTValue *o = &L->base[i]; | 527 | MSize len; |
515 | if (shortcut && tvisstr(o)) { | 528 | if (shortcut && (str = lj_strfmt_wstrnum(L, o, &len)) != NULL) { |
516 | str = strVdata(o); | 529 | size = len; |
517 | size = strV(o)->len; | ||
518 | } else if (shortcut && tvisint(o)) { | ||
519 | char buf[LJ_STR_INTBUF]; | ||
520 | char *p = lj_str_bufint(buf, intV(o)); | ||
521 | size = (size_t)(buf+LJ_STR_INTBUF-p); | ||
522 | str = p; | ||
523 | } else if (shortcut && tvisnum(o)) { | ||
524 | char buf[LJ_STR_NUMBUF]; | ||
525 | size = lj_str_bufnum(buf, o); | ||
526 | str = buf; | ||
527 | } else { | 530 | } else { |
528 | copyTV(L, L->top+1, o); | 531 | copyTV(L, L->top+1, o); |
529 | copyTV(L, L->top, L->top-1); | 532 | copyTV(L, L->top, L->top-1); |
@@ -560,8 +563,8 @@ LJLIB_CF(coroutine_status) | |||
560 | co = threadV(L->base); | 563 | co = threadV(L->base); |
561 | if (co == L) s = "running"; | 564 | if (co == L) s = "running"; |
562 | else if (co->status == LUA_YIELD) s = "suspended"; | 565 | else if (co->status == LUA_YIELD) s = "suspended"; |
563 | else if (co->status != 0) s = "dead"; | 566 | else if (co->status != LUA_OK) s = "dead"; |
564 | else if (co->base > tvref(co->stack)+1) s = "normal"; | 567 | else if (co->base > tvref(co->stack)+1+LJ_FR2) s = "normal"; |
565 | else if (co->top == co->base) s = "dead"; | 568 | else if (co->top == co->base) s = "dead"; |
566 | else s = "suspended"; | 569 | else s = "suspended"; |
567 | lua_pushstring(L, s); | 570 | lua_pushstring(L, s); |
@@ -581,6 +584,12 @@ LJLIB_CF(coroutine_running) | |||
581 | #endif | 584 | #endif |
582 | } | 585 | } |
583 | 586 | ||
587 | LJLIB_CF(coroutine_isyieldable) | ||
588 | { | ||
589 | setboolV(L->top++, cframe_canyield(L->cframe)); | ||
590 | return 1; | ||
591 | } | ||
592 | |||
584 | LJLIB_CF(coroutine_create) | 593 | LJLIB_CF(coroutine_create) |
585 | { | 594 | { |
586 | lua_State *L1; | 595 | lua_State *L1; |
@@ -600,11 +609,11 @@ LJLIB_ASM(coroutine_yield) | |||
600 | static int ffh_resume(lua_State *L, lua_State *co, int wrap) | 609 | static int ffh_resume(lua_State *L, lua_State *co, int wrap) |
601 | { | 610 | { |
602 | if (co->cframe != NULL || co->status > LUA_YIELD || | 611 | if (co->cframe != NULL || co->status > LUA_YIELD || |
603 | (co->status == 0 && co->top == co->base)) { | 612 | (co->status == LUA_OK && co->top == co->base)) { |
604 | ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD; | 613 | ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD; |
605 | if (wrap) lj_err_caller(L, em); | 614 | if (wrap) lj_err_caller(L, em); |
606 | setboolV(L->base-1, 0); | 615 | setboolV(L->base-1-LJ_FR2, 0); |
607 | setstrV(L, L->base, lj_err_str(L, em)); | 616 | setstrV(L, L->base-LJ_FR2, lj_err_str(L, em)); |
608 | return FFH_RES(2); | 617 | return FFH_RES(2); |
609 | } | 618 | } |
610 | lj_state_growstack(co, (MSize)(L->top - L->base)); | 619 | lj_state_growstack(co, (MSize)(L->top - L->base)); |
@@ -645,9 +654,10 @@ static void setpc_wrap_aux(lua_State *L, GCfunc *fn); | |||
645 | 654 | ||
646 | LJLIB_CF(coroutine_wrap) | 655 | LJLIB_CF(coroutine_wrap) |
647 | { | 656 | { |
657 | GCfunc *fn; | ||
648 | lj_cf_coroutine_create(L); | 658 | lj_cf_coroutine_create(L); |
649 | lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1); | 659 | fn = lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1); |
650 | setpc_wrap_aux(L, funcV(L->top-1)); | 660 | setpc_wrap_aux(L, fn); |
651 | return 1; | 661 | return 1; |
652 | } | 662 | } |
653 | 663 | ||
diff --git a/src/lib_bit.c b/src/lib_bit.c index 9e75eef3..38c0f578 100644 --- a/src/lib_bit.c +++ b/src/lib_bit.c | |||
@@ -12,26 +12,99 @@ | |||
12 | 12 | ||
13 | #include "lj_obj.h" | 13 | #include "lj_obj.h" |
14 | #include "lj_err.h" | 14 | #include "lj_err.h" |
15 | #include "lj_str.h" | 15 | #include "lj_buf.h" |
16 | #include "lj_strscan.h" | ||
17 | #include "lj_strfmt.h" | ||
18 | #if LJ_HASFFI | ||
19 | #include "lj_ctype.h" | ||
20 | #include "lj_cdata.h" | ||
21 | #include "lj_cconv.h" | ||
22 | #include "lj_carith.h" | ||
23 | #endif | ||
24 | #include "lj_ff.h" | ||
16 | #include "lj_lib.h" | 25 | #include "lj_lib.h" |
17 | 26 | ||
18 | /* ------------------------------------------------------------------------ */ | 27 | /* ------------------------------------------------------------------------ */ |
19 | 28 | ||
20 | #define LJLIB_MODULE_bit | 29 | #define LJLIB_MODULE_bit |
21 | 30 | ||
22 | LJLIB_ASM(bit_tobit) LJLIB_REC(bit_unary IR_TOBIT) | 31 | #if LJ_HASFFI |
32 | static int bit_result64(lua_State *L, CTypeID id, uint64_t x) | ||
23 | { | 33 | { |
34 | GCcdata *cd = lj_cdata_new_(L, id, 8); | ||
35 | *(uint64_t *)cdataptr(cd) = x; | ||
36 | setcdataV(L, L->base-1-LJ_FR2, cd); | ||
37 | return FFH_RES(1); | ||
38 | } | ||
39 | #else | ||
40 | static int32_t bit_checkbit(lua_State *L, int narg) | ||
41 | { | ||
42 | TValue *o = L->base + narg-1; | ||
43 | if (!(o < L->top && lj_strscan_numberobj(o))) | ||
44 | lj_err_argt(L, narg, LUA_TNUMBER); | ||
45 | if (LJ_LIKELY(tvisint(o))) { | ||
46 | return intV(o); | ||
47 | } else { | ||
48 | int32_t i = lj_num2bit(numV(o)); | ||
49 | if (LJ_DUALNUM) setintV(o, i); | ||
50 | return i; | ||
51 | } | ||
52 | } | ||
53 | #endif | ||
54 | |||
55 | LJLIB_ASM(bit_tobit) LJLIB_REC(bit_tobit) | ||
56 | { | ||
57 | #if LJ_HASFFI | ||
58 | CTypeID id = 0; | ||
59 | setintV(L->base-1-LJ_FR2, (int32_t)lj_carith_check64(L, 1, &id)); | ||
60 | return FFH_RES(1); | ||
61 | #else | ||
62 | lj_lib_checknumber(L, 1); | ||
63 | return FFH_RETRY; | ||
64 | #endif | ||
65 | } | ||
66 | |||
67 | LJLIB_ASM(bit_bnot) LJLIB_REC(bit_unary IR_BNOT) | ||
68 | { | ||
69 | #if LJ_HASFFI | ||
70 | CTypeID id = 0; | ||
71 | uint64_t x = lj_carith_check64(L, 1, &id); | ||
72 | return id ? bit_result64(L, id, ~x) : FFH_RETRY; | ||
73 | #else | ||
24 | lj_lib_checknumber(L, 1); | 74 | lj_lib_checknumber(L, 1); |
25 | return FFH_RETRY; | 75 | return FFH_RETRY; |
76 | #endif | ||
77 | } | ||
78 | |||
79 | LJLIB_ASM(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP) | ||
80 | { | ||
81 | #if LJ_HASFFI | ||
82 | CTypeID id = 0; | ||
83 | uint64_t x = lj_carith_check64(L, 1, &id); | ||
84 | return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY; | ||
85 | #else | ||
86 | lj_lib_checknumber(L, 1); | ||
87 | return FFH_RETRY; | ||
88 | #endif | ||
26 | } | 89 | } |
27 | LJLIB_ASM_(bit_bnot) LJLIB_REC(bit_unary IR_BNOT) | ||
28 | LJLIB_ASM_(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP) | ||
29 | 90 | ||
30 | LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) | 91 | LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) |
31 | { | 92 | { |
93 | #if LJ_HASFFI | ||
94 | CTypeID id = 0, id2 = 0; | ||
95 | uint64_t x = lj_carith_check64(L, 1, &id); | ||
96 | int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2); | ||
97 | if (id) { | ||
98 | x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift); | ||
99 | return bit_result64(L, id, x); | ||
100 | } | ||
101 | if (id2) setintV(L->base+1, sh); | ||
102 | return FFH_RETRY; | ||
103 | #else | ||
32 | lj_lib_checknumber(L, 1); | 104 | lj_lib_checknumber(L, 1); |
33 | lj_lib_checkbit(L, 2); | 105 | bit_checkbit(L, 2); |
34 | return FFH_RETRY; | 106 | return FFH_RETRY; |
107 | #endif | ||
35 | } | 108 | } |
36 | LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR) | 109 | LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR) |
37 | LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR) | 110 | LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR) |
@@ -40,25 +113,58 @@ LJLIB_ASM_(bit_ror) LJLIB_REC(bit_shift IR_BROR) | |||
40 | 113 | ||
41 | LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) | 114 | LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) |
42 | { | 115 | { |
116 | #if LJ_HASFFI | ||
117 | CTypeID id = 0; | ||
118 | TValue *o = L->base, *top = L->top; | ||
119 | int i = 0; | ||
120 | do { lj_carith_check64(L, ++i, &id); } while (++o < top); | ||
121 | if (id) { | ||
122 | CTState *cts = ctype_cts(L); | ||
123 | CType *ct = ctype_get(cts, id); | ||
124 | int op = curr_func(L)->c.ffid - (int)FF_bit_bor; | ||
125 | uint64_t x, y = op >= 0 ? 0 : ~(uint64_t)0; | ||
126 | o = L->base; | ||
127 | do { | ||
128 | lj_cconv_ct_tv(cts, ct, (uint8_t *)&x, o, 0); | ||
129 | if (op < 0) y &= x; else if (op == 0) y |= x; else y ^= x; | ||
130 | } while (++o < top); | ||
131 | return bit_result64(L, id, y); | ||
132 | } | ||
133 | return FFH_RETRY; | ||
134 | #else | ||
43 | int i = 0; | 135 | int i = 0; |
44 | do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top); | 136 | do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top); |
45 | return FFH_RETRY; | 137 | return FFH_RETRY; |
138 | #endif | ||
46 | } | 139 | } |
47 | LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR) | 140 | LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR) |
48 | LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) | 141 | LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) |
49 | 142 | ||
50 | /* ------------------------------------------------------------------------ */ | 143 | /* ------------------------------------------------------------------------ */ |
51 | 144 | ||
52 | LJLIB_CF(bit_tohex) | 145 | LJLIB_CF(bit_tohex) LJLIB_REC(.) |
53 | { | 146 | { |
54 | uint32_t b = (uint32_t)lj_lib_checkbit(L, 1); | 147 | #if LJ_HASFFI |
55 | int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2); | 148 | CTypeID id = 0, id2 = 0; |
56 | const char *hexdigits = "0123456789abcdef"; | 149 | uint64_t b = lj_carith_check64(L, 1, &id); |
57 | char buf[8]; | 150 | int32_t n = L->base+1>=L->top ? (id ? 16 : 8) : |
58 | if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; } | 151 | (int32_t)lj_carith_check64(L, 2, &id2); |
59 | if (n > 8) n = 8; | 152 | #else |
60 | for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; } | 153 | uint32_t b = (uint32_t)bit_checkbit(L, 1); |
61 | lua_pushlstring(L, buf, (size_t)n); | 154 | int32_t n = L->base+1>=L->top ? 8 : bit_checkbit(L, 2); |
155 | #endif | ||
156 | SBuf *sb = lj_buf_tmp_(L); | ||
157 | SFormat sf = (STRFMT_UINT|STRFMT_T_HEX); | ||
158 | if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; } | ||
159 | sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC); | ||
160 | #if LJ_HASFFI | ||
161 | if (n < 16) b &= ((uint64_t)1 << 4*n)-1; | ||
162 | #else | ||
163 | if (n < 8) b &= (1u << 4*n)-1; | ||
164 | #endif | ||
165 | sb = lj_strfmt_putfxint(sb, sf, b); | ||
166 | setstrV(L, L->top-1, lj_buf_str(L, sb)); | ||
167 | lj_gc_check(L); | ||
62 | return 1; | 168 | return 1; |
63 | } | 169 | } |
64 | 170 | ||
diff --git a/src/lib_buffer.c b/src/lib_buffer.c new file mode 100644 index 00000000..9aa725b6 --- /dev/null +++ b/src/lib_buffer.c | |||
@@ -0,0 +1,356 @@ | |||
1 | /* | ||
2 | ** Buffer library. | ||
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lib_buffer_c | ||
7 | #define LUA_LIB | ||
8 | |||
9 | #include "lua.h" | ||
10 | #include "lauxlib.h" | ||
11 | #include "lualib.h" | ||
12 | |||
13 | #include "lj_obj.h" | ||
14 | |||
15 | #if LJ_HASBUFFER | ||
16 | #include "lj_gc.h" | ||
17 | #include "lj_err.h" | ||
18 | #include "lj_buf.h" | ||
19 | #include "lj_str.h" | ||
20 | #include "lj_tab.h" | ||
21 | #include "lj_udata.h" | ||
22 | #include "lj_meta.h" | ||
23 | #if LJ_HASFFI | ||
24 | #include "lj_ctype.h" | ||
25 | #include "lj_cdata.h" | ||
26 | #include "lj_cconv.h" | ||
27 | #endif | ||
28 | #include "lj_strfmt.h" | ||
29 | #include "lj_serialize.h" | ||
30 | #include "lj_lib.h" | ||
31 | |||
32 | /* -- Helper functions ---------------------------------------------------- */ | ||
33 | |||
34 | /* Check that the first argument is a string buffer. */ | ||
35 | static SBufExt *buffer_tobuf(lua_State *L) | ||
36 | { | ||
37 | if (!(L->base < L->top && tvisbuf(L->base))) | ||
38 | lj_err_argtype(L, 1, "buffer"); | ||
39 | return bufV(L->base); | ||
40 | } | ||
41 | |||
42 | /* Ditto, but for writers. */ | ||
43 | static LJ_AINLINE SBufExt *buffer_tobufw(lua_State *L) | ||
44 | { | ||
45 | SBufExt *sbx = buffer_tobuf(L); | ||
46 | setsbufXL_(sbx, L); | ||
47 | return sbx; | ||
48 | } | ||
49 | |||
50 | #define buffer_toudata(sbx) ((GCudata *)(sbx)-1) | ||
51 | |||
52 | /* -- Buffer methods ------------------------------------------------------ */ | ||
53 | |||
54 | #define LJLIB_MODULE_buffer_method | ||
55 | |||
56 | LJLIB_CF(buffer_method_free) | ||
57 | { | ||
58 | SBufExt *sbx = buffer_tobuf(L); | ||
59 | lj_bufx_free(L, sbx); | ||
60 | L->top = L->base+1; /* Chain buffer object. */ | ||
61 | return 1; | ||
62 | } | ||
63 | |||
64 | LJLIB_CF(buffer_method_reset) LJLIB_REC(.) | ||
65 | { | ||
66 | SBufExt *sbx = buffer_tobuf(L); | ||
67 | lj_bufx_reset(sbx); | ||
68 | L->top = L->base+1; /* Chain buffer object. */ | ||
69 | return 1; | ||
70 | } | ||
71 | |||
72 | LJLIB_CF(buffer_method_skip) LJLIB_REC(.) | ||
73 | { | ||
74 | SBufExt *sbx = buffer_tobuf(L); | ||
75 | MSize n = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF); | ||
76 | MSize len = sbufxlen(sbx); | ||
77 | if (n < len) { | ||
78 | sbx->r += n; | ||
79 | } else { | ||
80 | sbx->r = sbx->w = sbx->b; | ||
81 | } | ||
82 | L->top = L->base+1; /* Chain buffer object. */ | ||
83 | return 1; | ||
84 | } | ||
85 | |||
86 | LJLIB_CF(buffer_method_set) LJLIB_REC(.) | ||
87 | { | ||
88 | SBufExt *sbx = buffer_tobuf(L); | ||
89 | GCobj *ref; | ||
90 | const char *p; | ||
91 | MSize len; | ||
92 | #if LJ_HASFFI | ||
93 | if (tviscdata(L->base+1)) { | ||
94 | CTState *cts = ctype_cts(L); | ||
95 | lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p, | ||
96 | L->base+1, CCF_ARG(2)); | ||
97 | len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF); | ||
98 | } else | ||
99 | #endif | ||
100 | { | ||
101 | GCstr *str = lj_lib_checkstrx(L, 2); | ||
102 | p = strdata(str); | ||
103 | len = str->len; | ||
104 | } | ||
105 | lj_bufx_free(L, sbx); | ||
106 | lj_bufx_set_cow(L, sbx, p, len); | ||
107 | ref = gcV(L->base+1); | ||
108 | setgcref(sbx->cowref, ref); | ||
109 | lj_gc_objbarrier(L, buffer_toudata(sbx), ref); | ||
110 | L->top = L->base+1; /* Chain buffer object. */ | ||
111 | return 1; | ||
112 | } | ||
113 | |||
114 | LJLIB_CF(buffer_method_put) LJLIB_REC(.) | ||
115 | { | ||
116 | SBufExt *sbx = buffer_tobufw(L); | ||
117 | ptrdiff_t arg, narg = L->top - L->base; | ||
118 | for (arg = 1; arg < narg; arg++) { | ||
119 | cTValue *o = &L->base[arg], *mo = NULL; | ||
120 | retry: | ||
121 | if (tvisstr(o)) { | ||
122 | lj_buf_putstr((SBuf *)sbx, strV(o)); | ||
123 | } else if (tvisint(o)) { | ||
124 | lj_strfmt_putint((SBuf *)sbx, intV(o)); | ||
125 | } else if (tvisnum(o)) { | ||
126 | lj_strfmt_putfnum((SBuf *)sbx, STRFMT_G14, numV(o)); | ||
127 | } else if (tvisbuf(o)) { | ||
128 | SBufExt *sbx2 = bufV(o); | ||
129 | if (sbx2 == sbx) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF); | ||
130 | lj_buf_putmem((SBuf *)sbx, sbx2->r, sbufxlen(sbx2)); | ||
131 | } else if (!mo && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { | ||
132 | /* Call __tostring metamethod inline. */ | ||
133 | copyTV(L, L->top++, mo); | ||
134 | copyTV(L, L->top++, o); | ||
135 | lua_call(L, 1, 1); | ||
136 | o = &L->base[arg]; /* The stack may have been reallocated. */ | ||
137 | copyTV(L, &L->base[arg], L->top-1); | ||
138 | L->top = L->base + narg; | ||
139 | goto retry; /* Retry with the result. */ | ||
140 | } else { | ||
141 | lj_err_argtype(L, arg+1, "string/number/__tostring"); | ||
142 | } | ||
143 | /* Probably not useful to inline other __tostring MMs, e.g. FFI numbers. */ | ||
144 | } | ||
145 | L->top = L->base+1; /* Chain buffer object. */ | ||
146 | lj_gc_check(L); | ||
147 | return 1; | ||
148 | } | ||
149 | |||
150 | LJLIB_CF(buffer_method_putf) LJLIB_REC(.) | ||
151 | { | ||
152 | SBufExt *sbx = buffer_tobufw(L); | ||
153 | lj_strfmt_putarg(L, (SBuf *)sbx, 2, 2); | ||
154 | L->top = L->base+1; /* Chain buffer object. */ | ||
155 | lj_gc_check(L); | ||
156 | return 1; | ||
157 | } | ||
158 | |||
159 | LJLIB_CF(buffer_method_get) LJLIB_REC(.) | ||
160 | { | ||
161 | SBufExt *sbx = buffer_tobuf(L); | ||
162 | ptrdiff_t arg, narg = L->top - L->base; | ||
163 | if (narg == 1) { | ||
164 | narg++; | ||
165 | setnilV(L->top++); /* get() is the same as get(nil). */ | ||
166 | } | ||
167 | for (arg = 1; arg < narg; arg++) { | ||
168 | TValue *o = &L->base[arg]; | ||
169 | MSize n = tvisnil(o) ? LJ_MAX_BUF : | ||
170 | (MSize) lj_lib_checkintrange(L, arg+1, 0, LJ_MAX_BUF); | ||
171 | MSize len = sbufxlen(sbx); | ||
172 | if (n > len) n = len; | ||
173 | setstrV(L, o, lj_str_new(L, sbx->r, n)); | ||
174 | sbx->r += n; | ||
175 | } | ||
176 | if (sbx->r == sbx->w) sbx->r = sbx->w = sbx->b; | ||
177 | lj_gc_check(L); | ||
178 | return narg-1; | ||
179 | } | ||
180 | |||
181 | #if LJ_HASFFI | ||
182 | LJLIB_CF(buffer_method_putcdata) LJLIB_REC(.) | ||
183 | { | ||
184 | SBufExt *sbx = buffer_tobufw(L); | ||
185 | const char *p; | ||
186 | MSize len; | ||
187 | if (tviscdata(L->base+1)) { | ||
188 | CTState *cts = ctype_cts(L); | ||
189 | lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p, | ||
190 | L->base+1, CCF_ARG(2)); | ||
191 | } else { | ||
192 | lj_err_argtype(L, 2, "cdata"); | ||
193 | } | ||
194 | len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF); | ||
195 | lj_buf_putmem((SBuf *)sbx, p, len); | ||
196 | L->top = L->base+1; /* Chain buffer object. */ | ||
197 | return 1; | ||
198 | } | ||
199 | |||
200 | LJLIB_CF(buffer_method_reserve) LJLIB_REC(.) | ||
201 | { | ||
202 | SBufExt *sbx = buffer_tobufw(L); | ||
203 | MSize sz = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF); | ||
204 | GCcdata *cd; | ||
205 | lj_buf_more((SBuf *)sbx, sz); | ||
206 | ctype_loadffi(L); | ||
207 | cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR); | ||
208 | *(void **)cdataptr(cd) = sbx->w; | ||
209 | setcdataV(L, L->top++, cd); | ||
210 | setintV(L->top++, sbufleft(sbx)); | ||
211 | return 2; | ||
212 | } | ||
213 | |||
214 | LJLIB_CF(buffer_method_commit) LJLIB_REC(.) | ||
215 | { | ||
216 | SBufExt *sbx = buffer_tobuf(L); | ||
217 | MSize len = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF); | ||
218 | if (len > sbufleft(sbx)) lj_err_arg(L, 2, LJ_ERR_NUMRNG); | ||
219 | sbx->w += len; | ||
220 | L->top = L->base+1; /* Chain buffer object. */ | ||
221 | return 1; | ||
222 | } | ||
223 | |||
224 | LJLIB_CF(buffer_method_ref) LJLIB_REC(.) | ||
225 | { | ||
226 | SBufExt *sbx = buffer_tobuf(L); | ||
227 | GCcdata *cd; | ||
228 | ctype_loadffi(L); | ||
229 | cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR); | ||
230 | *(void **)cdataptr(cd) = sbx->r; | ||
231 | setcdataV(L, L->top++, cd); | ||
232 | setintV(L->top++, sbufxlen(sbx)); | ||
233 | return 2; | ||
234 | } | ||
235 | #endif | ||
236 | |||
237 | LJLIB_CF(buffer_method_encode) LJLIB_REC(.) | ||
238 | { | ||
239 | SBufExt *sbx = buffer_tobufw(L); | ||
240 | cTValue *o = lj_lib_checkany(L, 2); | ||
241 | lj_serialize_put(sbx, o); | ||
242 | lj_gc_check(L); | ||
243 | L->top = L->base+1; /* Chain buffer object. */ | ||
244 | return 1; | ||
245 | } | ||
246 | |||
247 | LJLIB_CF(buffer_method_decode) LJLIB_REC(.) | ||
248 | { | ||
249 | SBufExt *sbx = buffer_tobufw(L); | ||
250 | setnilV(L->top++); | ||
251 | sbx->r = lj_serialize_get(sbx, L->top-1); | ||
252 | lj_gc_check(L); | ||
253 | return 1; | ||
254 | } | ||
255 | |||
256 | LJLIB_CF(buffer_method___gc) | ||
257 | { | ||
258 | SBufExt *sbx = buffer_tobuf(L); | ||
259 | lj_bufx_free(L, sbx); | ||
260 | return 0; | ||
261 | } | ||
262 | |||
263 | LJLIB_CF(buffer_method___tostring) LJLIB_REC(.) | ||
264 | { | ||
265 | SBufExt *sbx = buffer_tobuf(L); | ||
266 | setstrV(L, L->top-1, lj_str_new(L, sbx->r, sbufxlen(sbx))); | ||
267 | lj_gc_check(L); | ||
268 | return 1; | ||
269 | } | ||
270 | |||
271 | LJLIB_CF(buffer_method___len) LJLIB_REC(.) | ||
272 | { | ||
273 | SBufExt *sbx = buffer_tobuf(L); | ||
274 | setintV(L->top-1, (int32_t)sbufxlen(sbx)); | ||
275 | return 1; | ||
276 | } | ||
277 | |||
278 | LJLIB_PUSH("buffer") LJLIB_SET(__metatable) | ||
279 | LJLIB_PUSH(top-1) LJLIB_SET(__index) | ||
280 | |||
281 | /* -- Buffer library functions -------------------------------------------- */ | ||
282 | |||
283 | #define LJLIB_MODULE_buffer | ||
284 | |||
285 | LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */ | ||
286 | |||
287 | LJLIB_CF(buffer_new) | ||
288 | { | ||
289 | MSize sz = 0; | ||
290 | int targ = 1; | ||
291 | GCtab *env, *dict_str = NULL, *dict_mt = NULL; | ||
292 | GCudata *ud; | ||
293 | SBufExt *sbx; | ||
294 | if (L->base < L->top && !tvistab(L->base)) { | ||
295 | targ = 2; | ||
296 | if (!tvisnil(L->base)) | ||
297 | sz = (MSize)lj_lib_checkintrange(L, 1, 0, LJ_MAX_BUF); | ||
298 | } | ||
299 | if (L->base+targ-1 < L->top) { | ||
300 | GCtab *options = lj_lib_checktab(L, targ); | ||
301 | cTValue *opt_dict, *opt_mt; | ||
302 | opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict")); | ||
303 | if (opt_dict && tvistab(opt_dict)) { | ||
304 | dict_str = tabV(opt_dict); | ||
305 | lj_serialize_dict_prep_str(L, dict_str); | ||
306 | } | ||
307 | opt_mt = lj_tab_getstr(options, lj_str_newlit(L, "metatable")); | ||
308 | if (opt_mt && tvistab(opt_mt)) { | ||
309 | dict_mt = tabV(opt_mt); | ||
310 | lj_serialize_dict_prep_mt(L, dict_mt); | ||
311 | } | ||
312 | } | ||
313 | env = tabref(curr_func(L)->c.env); | ||
314 | ud = lj_udata_new(L, sizeof(SBufExt), env); | ||
315 | ud->udtype = UDTYPE_BUFFER; | ||
316 | /* NOBARRIER: The GCudata is new (marked white). */ | ||
317 | setgcref(ud->metatable, obj2gco(env)); | ||
318 | setudataV(L, L->top++, ud); | ||
319 | sbx = (SBufExt *)uddata(ud); | ||
320 | lj_bufx_init(L, sbx); | ||
321 | setgcref(sbx->dict_str, obj2gco(dict_str)); | ||
322 | setgcref(sbx->dict_mt, obj2gco(dict_mt)); | ||
323 | if (sz > 0) lj_buf_need2((SBuf *)sbx, sz); | ||
324 | return 1; | ||
325 | } | ||
326 | |||
327 | LJLIB_CF(buffer_encode) LJLIB_REC(.) | ||
328 | { | ||
329 | cTValue *o = lj_lib_checkany(L, 1); | ||
330 | setstrV(L, L->top++, lj_serialize_encode(L, o)); | ||
331 | lj_gc_check(L); | ||
332 | return 1; | ||
333 | } | ||
334 | |||
335 | LJLIB_CF(buffer_decode) LJLIB_REC(.) | ||
336 | { | ||
337 | GCstr *str = lj_lib_checkstrx(L, 1); | ||
338 | setnilV(L->top++); | ||
339 | lj_serialize_decode(L, L->top-1, str); | ||
340 | return 1; | ||
341 | } | ||
342 | |||
343 | /* ------------------------------------------------------------------------ */ | ||
344 | |||
345 | #include "lj_libdef.h" | ||
346 | |||
347 | int luaopen_string_buffer(lua_State *L) | ||
348 | { | ||
349 | LJ_LIB_REG(L, NULL, buffer_method); | ||
350 | lua_getfield(L, -1, "__tostring"); | ||
351 | lua_setfield(L, -2, "tostring"); | ||
352 | LJ_LIB_REG(L, NULL, buffer); | ||
353 | return 1; | ||
354 | } | ||
355 | |||
356 | #endif | ||
diff --git a/src/lib_debug.c b/src/lib_debug.c index e7d8d24a..3af7a353 100644 --- a/src/lib_debug.c +++ b/src/lib_debug.c | |||
@@ -29,7 +29,7 @@ LJLIB_CF(debug_getregistry) | |||
29 | return 1; | 29 | return 1; |
30 | } | 30 | } |
31 | 31 | ||
32 | LJLIB_CF(debug_getmetatable) | 32 | LJLIB_CF(debug_getmetatable) LJLIB_REC(.) |
33 | { | 33 | { |
34 | lj_lib_checkany(L, 1); | 34 | lj_lib_checkany(L, 1); |
35 | if (!lua_getmetatable(L, 1)) { | 35 | if (!lua_getmetatable(L, 1)) { |
@@ -231,8 +231,8 @@ LJLIB_CF(debug_upvalueid) | |||
231 | int32_t n = lj_lib_checkint(L, 2) - 1; | 231 | int32_t n = lj_lib_checkint(L, 2) - 1; |
232 | if ((uint32_t)n >= fn->l.nupvalues) | 232 | if ((uint32_t)n >= fn->l.nupvalues) |
233 | lj_err_arg(L, 2, LJ_ERR_IDXRNG); | 233 | lj_err_arg(L, 2, LJ_ERR_IDXRNG); |
234 | setlightudV(L->top-1, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) : | 234 | lua_pushlightuserdata(L, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) : |
235 | (void *)&fn->c.upvalue[n]); | 235 | (void *)&fn->c.upvalue[n]); |
236 | return 1; | 236 | return 1; |
237 | } | 237 | } |
238 | 238 | ||
@@ -283,13 +283,13 @@ LJLIB_CF(debug_setuservalue) | |||
283 | 283 | ||
284 | /* ------------------------------------------------------------------------ */ | 284 | /* ------------------------------------------------------------------------ */ |
285 | 285 | ||
286 | static const char KEY_HOOK = 'h'; | 286 | #define KEY_HOOK (U64x(80000000,00000000)|'h') |
287 | 287 | ||
288 | static void hookf(lua_State *L, lua_Debug *ar) | 288 | static void hookf(lua_State *L, lua_Debug *ar) |
289 | { | 289 | { |
290 | static const char *const hooknames[] = | 290 | static const char *const hooknames[] = |
291 | {"call", "return", "line", "count", "tail return"}; | 291 | {"call", "return", "line", "count", "tail return"}; |
292 | lua_pushlightuserdata(L, (void *)&KEY_HOOK); | 292 | (L->top++)->u64 = KEY_HOOK; |
293 | lua_rawget(L, LUA_REGISTRYINDEX); | 293 | lua_rawget(L, LUA_REGISTRYINDEX); |
294 | if (lua_isfunction(L, -1)) { | 294 | if (lua_isfunction(L, -1)) { |
295 | lua_pushstring(L, hooknames[(int)ar->event]); | 295 | lua_pushstring(L, hooknames[(int)ar->event]); |
@@ -334,7 +334,7 @@ LJLIB_CF(debug_sethook) | |||
334 | count = luaL_optint(L, arg+3, 0); | 334 | count = luaL_optint(L, arg+3, 0); |
335 | func = hookf; mask = makemask(smask, count); | 335 | func = hookf; mask = makemask(smask, count); |
336 | } | 336 | } |
337 | lua_pushlightuserdata(L, (void *)&KEY_HOOK); | 337 | (L->top++)->u64 = KEY_HOOK; |
338 | lua_pushvalue(L, arg+1); | 338 | lua_pushvalue(L, arg+1); |
339 | lua_rawset(L, LUA_REGISTRYINDEX); | 339 | lua_rawset(L, LUA_REGISTRYINDEX); |
340 | lua_sethook(L, func, mask, count); | 340 | lua_sethook(L, func, mask, count); |
@@ -349,7 +349,7 @@ LJLIB_CF(debug_gethook) | |||
349 | if (hook != NULL && hook != hookf) { /* external hook? */ | 349 | if (hook != NULL && hook != hookf) { /* external hook? */ |
350 | lua_pushliteral(L, "external hook"); | 350 | lua_pushliteral(L, "external hook"); |
351 | } else { | 351 | } else { |
352 | lua_pushlightuserdata(L, (void *)&KEY_HOOK); | 352 | (L->top++)->u64 = KEY_HOOK; |
353 | lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */ | 353 | lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */ |
354 | } | 354 | } |
355 | lua_pushstring(L, unmakemask(mask, buff)); | 355 | lua_pushstring(L, unmakemask(mask, buff)); |
diff --git a/src/lib_ffi.c b/src/lib_ffi.c index cc8d419d..ac2b2742 100644 --- a/src/lib_ffi.c +++ b/src/lib_ffi.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include "lj_ccall.h" | 29 | #include "lj_ccall.h" |
30 | #include "lj_ccallback.h" | 30 | #include "lj_ccallback.h" |
31 | #include "lj_clib.h" | 31 | #include "lj_clib.h" |
32 | #include "lj_strfmt.h" | ||
32 | #include "lj_ff.h" | 33 | #include "lj_ff.h" |
33 | #include "lj_lib.h" | 34 | #include "lj_lib.h" |
34 | 35 | ||
@@ -137,7 +138,7 @@ static int ffi_index_meta(lua_State *L, CTState *cts, CType *ct, MMS mm) | |||
137 | } | 138 | } |
138 | } | 139 | } |
139 | copyTV(L, base, L->top); | 140 | copyTV(L, base, L->top); |
140 | tv = L->top-1; | 141 | tv = L->top-1-LJ_FR2; |
141 | } | 142 | } |
142 | return lj_meta_tailcall(L, tv); | 143 | return lj_meta_tailcall(L, tv); |
143 | } | 144 | } |
@@ -318,7 +319,7 @@ LJLIB_CF(ffi_meta___tostring) | |||
318 | } | 319 | } |
319 | } | 320 | } |
320 | } | 321 | } |
321 | lj_str_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p); | 322 | lj_strfmt_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p); |
322 | checkgc: | 323 | checkgc: |
323 | lj_gc_check(L); | 324 | lj_gc_check(L); |
324 | return 1; | 325 | return 1; |
@@ -504,10 +505,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.) | |||
504 | } | 505 | } |
505 | if (sz == CTSIZE_INVALID) | 506 | if (sz == CTSIZE_INVALID) |
506 | lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE); | 507 | lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE); |
507 | if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN) | 508 | cd = lj_cdata_newx(cts, id, sz, info); |
508 | cd = lj_cdata_new(cts, id, sz); | ||
509 | else | ||
510 | cd = lj_cdata_newv(cts, id, sz, ctype_align(info)); | ||
511 | setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */ | 509 | setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */ |
512 | lj_cconv_ct_init(cts, ct, sz, cdataptr(cd), | 510 | lj_cconv_ct_init(cts, ct, sz, cdataptr(cd), |
513 | o, (MSize)(L->top - o)); /* Initialize cdata. */ | 511 | o, (MSize)(L->top - o)); /* Initialize cdata. */ |
@@ -558,6 +556,32 @@ LJLIB_CF(ffi_typeof) LJLIB_REC(.) | |||
558 | return 1; | 556 | return 1; |
559 | } | 557 | } |
560 | 558 | ||
559 | /* Internal and unsupported API. */ | ||
560 | LJLIB_CF(ffi_typeinfo) | ||
561 | { | ||
562 | CTState *cts = ctype_cts(L); | ||
563 | CTypeID id = (CTypeID)ffi_checkint(L, 1); | ||
564 | if (id > 0 && id < cts->top) { | ||
565 | CType *ct = ctype_get(cts, id); | ||
566 | GCtab *t; | ||
567 | lua_createtable(L, 0, 4); /* Increment hash size if fields are added. */ | ||
568 | t = tabV(L->top-1); | ||
569 | setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "info")), (int32_t)ct->info); | ||
570 | if (ct->size != CTSIZE_INVALID) | ||
571 | setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "size")), (int32_t)ct->size); | ||
572 | if (ct->sib) | ||
573 | setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")), (int32_t)ct->sib); | ||
574 | if (gcref(ct->name)) { | ||
575 | GCstr *s = gco2str(gcref(ct->name)); | ||
576 | if (isdead(G(L), obj2gco(s))) flipwhite(obj2gco(s)); | ||
577 | setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s); | ||
578 | } | ||
579 | lj_gc_check(L); | ||
580 | return 1; | ||
581 | } | ||
582 | return 0; | ||
583 | } | ||
584 | |||
561 | LJLIB_CF(ffi_istype) LJLIB_REC(.) | 585 | LJLIB_CF(ffi_istype) LJLIB_REC(.) |
562 | { | 586 | { |
563 | CTState *cts = ctype_cts(L); | 587 | CTState *cts = ctype_cts(L); |
@@ -697,44 +721,47 @@ LJLIB_CF(ffi_fill) LJLIB_REC(.) | |||
697 | return 0; | 721 | return 0; |
698 | } | 722 | } |
699 | 723 | ||
700 | #define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be) | ||
701 | |||
702 | /* Test ABI string. */ | 724 | /* Test ABI string. */ |
703 | LJLIB_CF(ffi_abi) LJLIB_REC(.) | 725 | LJLIB_CF(ffi_abi) LJLIB_REC(.) |
704 | { | 726 | { |
705 | GCstr *s = lj_lib_checkstr(L, 1); | 727 | GCstr *s = lj_lib_checkstr(L, 1); |
706 | int b = 0; | 728 | int b = lj_cparse_case(s, |
707 | switch (s->hash) { | ||
708 | #if LJ_64 | 729 | #if LJ_64 |
709 | case H_(849858eb,ad35fd06): b = 1; break; /* 64bit */ | 730 | "\00564bit" |
710 | #else | 731 | #else |
711 | case H_(662d3c79,d0e22477): b = 1; break; /* 32bit */ | 732 | "\00532bit" |
712 | #endif | 733 | #endif |
713 | #if LJ_ARCH_HASFPU | 734 | #if LJ_ARCH_HASFPU |
714 | case H_(e33ee463,e33ee463): b = 1; break; /* fpu */ | 735 | "\003fpu" |
715 | #endif | 736 | #endif |
716 | #if LJ_ABI_SOFTFP | 737 | #if LJ_ABI_SOFTFP |
717 | case H_(61211a23,c2e8c81c): b = 1; break; /* softfp */ | 738 | "\006softfp" |
718 | #else | 739 | #else |
719 | case H_(539417a8,8ce0812f): b = 1; break; /* hardfp */ | 740 | "\006hardfp" |
720 | #endif | 741 | #endif |
721 | #if LJ_ABI_EABI | 742 | #if LJ_ABI_EABI |
722 | case H_(2182df8f,f2ed1152): b = 1; break; /* eabi */ | 743 | "\004eabi" |
723 | #endif | 744 | #endif |
724 | #if LJ_ABI_WIN | 745 | #if LJ_ABI_WIN |
725 | case H_(4ab624a8,4ab624a8): b = 1; break; /* win */ | 746 | "\003win" |
726 | #endif | 747 | #endif |
727 | case H_(3af93066,1f001464): b = 1; break; /* le/be */ | 748 | #if LJ_TARGET_UWP |
728 | default: | 749 | "\003uwp" |
729 | break; | 750 | #endif |
730 | } | 751 | #if LJ_LE |
752 | "\002le" | ||
753 | #else | ||
754 | "\002be" | ||
755 | #endif | ||
756 | #if LJ_GC64 | ||
757 | "\004gc64" | ||
758 | #endif | ||
759 | ) >= 0; | ||
731 | setboolV(L->top-1, b); | 760 | setboolV(L->top-1, b); |
732 | setboolV(&G(L)->tmptv2, b); /* Remember for trace recorder. */ | 761 | setboolV(&G(L)->tmptv2, b); /* Remember for trace recorder. */ |
733 | return 1; | 762 | return 1; |
734 | } | 763 | } |
735 | 764 | ||
736 | #undef H_ | ||
737 | |||
738 | LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */ | 765 | LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */ |
739 | 766 | ||
740 | LJLIB_CF(ffi_metatype) | 767 | LJLIB_CF(ffi_metatype) |
@@ -768,19 +795,11 @@ LJLIB_CF(ffi_gc) LJLIB_REC(.) | |||
768 | GCcdata *cd = ffi_checkcdata(L, 1); | 795 | GCcdata *cd = ffi_checkcdata(L, 1); |
769 | TValue *fin = lj_lib_checkany(L, 2); | 796 | TValue *fin = lj_lib_checkany(L, 2); |
770 | CTState *cts = ctype_cts(L); | 797 | CTState *cts = ctype_cts(L); |
771 | GCtab *t = cts->finalizer; | ||
772 | CType *ct = ctype_raw(cts, cd->ctypeid); | 798 | CType *ct = ctype_raw(cts, cd->ctypeid); |
773 | if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) || | 799 | if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) || |
774 | ctype_isrefarray(ct->info))) | 800 | ctype_isrefarray(ct->info))) |
775 | lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE); | 801 | lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE); |
776 | if (gcref(t->metatable)) { /* Update finalizer table, if still enabled. */ | 802 | lj_cdata_setfin(L, cd, gcval(fin), itype(fin)); |
777 | copyTV(L, lj_tab_set(L, t, L->base), fin); | ||
778 | lj_gc_anybarriert(L, t); | ||
779 | if (!tvisnil(fin)) | ||
780 | cd->marked |= LJ_GC_CDATA_FIN; | ||
781 | else | ||
782 | cd->marked &= ~LJ_GC_CDATA_FIN; | ||
783 | } | ||
784 | L->top = L->base+1; /* Pass through the cdata object. */ | 803 | L->top = L->base+1; /* Pass through the cdata object. */ |
785 | return 1; | 804 | return 1; |
786 | } | 805 | } |
diff --git a/src/lib_io.c b/src/lib_io.c index d5786e5d..a3278ab2 100644 --- a/src/lib_io.c +++ b/src/lib_io.c | |||
@@ -19,8 +19,10 @@ | |||
19 | #include "lj_obj.h" | 19 | #include "lj_obj.h" |
20 | #include "lj_gc.h" | 20 | #include "lj_gc.h" |
21 | #include "lj_err.h" | 21 | #include "lj_err.h" |
22 | #include "lj_buf.h" | ||
22 | #include "lj_str.h" | 23 | #include "lj_str.h" |
23 | #include "lj_state.h" | 24 | #include "lj_state.h" |
25 | #include "lj_strfmt.h" | ||
24 | #include "lj_ff.h" | 26 | #include "lj_ff.h" |
25 | #include "lj_lib.h" | 27 | #include "lj_lib.h" |
26 | 28 | ||
@@ -84,7 +86,7 @@ static IOFileUD *io_file_open(lua_State *L, const char *mode) | |||
84 | IOFileUD *iof = io_file_new(L); | 86 | IOFileUD *iof = io_file_new(L); |
85 | iof->fp = fopen(fname, mode); | 87 | iof->fp = fopen(fname, mode); |
86 | if (iof->fp == NULL) | 88 | if (iof->fp == NULL) |
87 | luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno))); | 89 | luaL_argerror(L, 1, lj_strfmt_pushf(L, "%s: %s", fname, strerror(errno))); |
88 | return iof; | 90 | return iof; |
89 | } | 91 | } |
90 | 92 | ||
@@ -97,11 +99,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof) | |||
97 | int stat = -1; | 99 | int stat = -1; |
98 | #if LJ_TARGET_POSIX | 100 | #if LJ_TARGET_POSIX |
99 | stat = pclose(iof->fp); | 101 | stat = pclose(iof->fp); |
100 | #elif LJ_TARGET_WINDOWS | 102 | #elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP |
101 | stat = _pclose(iof->fp); | 103 | stat = _pclose(iof->fp); |
102 | #else | ||
103 | lua_assert(0); | ||
104 | return 0; | ||
105 | #endif | 104 | #endif |
106 | #if LJ_52 | 105 | #if LJ_52 |
107 | iof->fp = NULL; | 106 | iof->fp = NULL; |
@@ -110,7 +109,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof) | |||
110 | ok = (stat != -1); | 109 | ok = (stat != -1); |
111 | #endif | 110 | #endif |
112 | } else { | 111 | } else { |
113 | lua_assert((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF); | 112 | lj_assertL((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF, |
113 | "close of unknown FILE* type"); | ||
114 | setnilV(L->top++); | 114 | setnilV(L->top++); |
115 | lua_pushliteral(L, "cannot close standard file"); | 115 | lua_pushliteral(L, "cannot close standard file"); |
116 | return 2; | 116 | return 2; |
@@ -145,7 +145,7 @@ static int io_file_readline(lua_State *L, FILE *fp, MSize chop) | |||
145 | MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0; | 145 | MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0; |
146 | char *buf; | 146 | char *buf; |
147 | for (;;) { | 147 | for (;;) { |
148 | buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); | 148 | buf = lj_buf_tmp(L, m); |
149 | if (fgets(buf+n, m-n, fp) == NULL) break; | 149 | if (fgets(buf+n, m-n, fp) == NULL) break; |
150 | n += (MSize)strlen(buf+n); | 150 | n += (MSize)strlen(buf+n); |
151 | ok |= n; | 151 | ok |= n; |
@@ -161,7 +161,7 @@ static void io_file_readall(lua_State *L, FILE *fp) | |||
161 | { | 161 | { |
162 | MSize m, n; | 162 | MSize m, n; |
163 | for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) { | 163 | for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) { |
164 | char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); | 164 | char *buf = lj_buf_tmp(L, m); |
165 | n += (MSize)fread(buf+n, 1, m-n, fp); | 165 | n += (MSize)fread(buf+n, 1, m-n, fp); |
166 | if (n != m) { | 166 | if (n != m) { |
167 | setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); | 167 | setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); |
@@ -174,7 +174,7 @@ static void io_file_readall(lua_State *L, FILE *fp) | |||
174 | static int io_file_readlen(lua_State *L, FILE *fp, MSize m) | 174 | static int io_file_readlen(lua_State *L, FILE *fp, MSize m) |
175 | { | 175 | { |
176 | if (m) { | 176 | if (m) { |
177 | char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); | 177 | char *buf = lj_buf_tmp(L, m); |
178 | MSize n = (MSize)fread(buf, 1, m, fp); | 178 | MSize n = (MSize)fread(buf, 1, m, fp); |
179 | setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); | 179 | setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); |
180 | lj_gc_check(L); | 180 | lj_gc_check(L); |
@@ -202,13 +202,12 @@ static int io_file_read(lua_State *L, IOFileUD *iof, int start) | |||
202 | for (n = start; nargs-- && ok; n++) { | 202 | for (n = start; nargs-- && ok; n++) { |
203 | if (tvisstr(L->base+n)) { | 203 | if (tvisstr(L->base+n)) { |
204 | const char *p = strVdata(L->base+n); | 204 | const char *p = strVdata(L->base+n); |
205 | if (p[0] != '*') | 205 | if (p[0] == '*') p++; |
206 | lj_err_arg(L, n+1, LJ_ERR_INVOPT); | 206 | if (p[0] == 'n') |
207 | if (p[1] == 'n') | ||
208 | ok = io_file_readnum(L, fp); | 207 | ok = io_file_readnum(L, fp); |
209 | else if ((p[1] & ~0x20) == 'L') | 208 | else if ((p[0] & ~0x20) == 'L') |
210 | ok = io_file_readline(L, fp, (p[1] == 'l')); | 209 | ok = io_file_readline(L, fp, (p[0] == 'l')); |
211 | else if (p[1] == 'a') | 210 | else if (p[0] == 'a') |
212 | io_file_readall(L, fp); | 211 | io_file_readall(L, fp); |
213 | else | 212 | else |
214 | lj_err_arg(L, n+1, LJ_ERR_INVFMT); | 213 | lj_err_arg(L, n+1, LJ_ERR_INVFMT); |
@@ -232,19 +231,11 @@ static int io_file_write(lua_State *L, IOFileUD *iof, int start) | |||
232 | cTValue *tv; | 231 | cTValue *tv; |
233 | int status = 1; | 232 | int status = 1; |
234 | for (tv = L->base+start; tv < L->top; tv++) { | 233 | for (tv = L->base+start; tv < L->top; tv++) { |
235 | if (tvisstr(tv)) { | 234 | MSize len; |
236 | MSize len = strV(tv)->len; | 235 | const char *p = lj_strfmt_wstrnum(L, tv, &len); |
237 | status = status && (fwrite(strVdata(tv), 1, len, fp) == len); | 236 | if (!p) |
238 | } else if (tvisint(tv)) { | ||
239 | char buf[LJ_STR_INTBUF]; | ||
240 | char *p = lj_str_bufint(buf, intV(tv)); | ||
241 | size_t len = (size_t)(buf+LJ_STR_INTBUF-p); | ||
242 | status = status && (fwrite(p, 1, len, fp) == len); | ||
243 | } else if (tvisnum(tv)) { | ||
244 | status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0); | ||
245 | } else { | ||
246 | lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING); | 237 | lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING); |
247 | } | 238 | status = status && (fwrite(p, 1, len, fp) == len); |
248 | } | 239 | } |
249 | if (LJ_52 && status) { | 240 | if (LJ_52 && status) { |
250 | L->top = L->base+1; | 241 | L->top = L->base+1; |
@@ -319,6 +310,14 @@ LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0) | |||
319 | return luaL_fileresult(L, fflush(io_tofile(L)->fp) == 0, NULL); | 310 | return luaL_fileresult(L, fflush(io_tofile(L)->fp) == 0, NULL); |
320 | } | 311 | } |
321 | 312 | ||
313 | #if LJ_32 && defined(__ANDROID__) && __ANDROID_API__ < 24 | ||
314 | /* The Android NDK is such an unmatched marvel of engineering. */ | ||
315 | extern int fseeko32(FILE *, long int, int) __asm__("fseeko"); | ||
316 | extern long int ftello32(FILE *) __asm__("ftello"); | ||
317 | #define fseeko(fp, pos, whence) (fseeko32((fp), (pos), (whence))) | ||
318 | #define ftello(fp) (ftello32((fp))) | ||
319 | #endif | ||
320 | |||
322 | LJLIB_CF(io_method_seek) | 321 | LJLIB_CF(io_method_seek) |
323 | { | 322 | { |
324 | FILE *fp = io_tofile(L)->fp; | 323 | FILE *fp = io_tofile(L)->fp; |
@@ -419,7 +418,7 @@ LJLIB_CF(io_open) | |||
419 | 418 | ||
420 | LJLIB_CF(io_popen) | 419 | LJLIB_CF(io_popen) |
421 | { | 420 | { |
422 | #if LJ_TARGET_POSIX || LJ_TARGET_WINDOWS | 421 | #if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP) |
423 | const char *fname = strdata(lj_lib_checkstr(L, 1)); | 422 | const char *fname = strdata(lj_lib_checkstr(L, 1)); |
424 | GCstr *s = lj_lib_optstr(L, 2); | 423 | GCstr *s = lj_lib_optstr(L, 2); |
425 | const char *mode = s ? strdata(s) : "r"; | 424 | const char *mode = s ? strdata(s) : "r"; |
diff --git a/src/lib_jit.c b/src/lib_jit.c index 83ee0984..2867d420 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c | |||
@@ -10,13 +10,17 @@ | |||
10 | #include "lauxlib.h" | 10 | #include "lauxlib.h" |
11 | #include "lualib.h" | 11 | #include "lualib.h" |
12 | 12 | ||
13 | #include "lj_arch.h" | ||
14 | #include "lj_obj.h" | 13 | #include "lj_obj.h" |
14 | #include "lj_gc.h" | ||
15 | #include "lj_err.h" | 15 | #include "lj_err.h" |
16 | #include "lj_debug.h" | 16 | #include "lj_debug.h" |
17 | #include "lj_str.h" | 17 | #include "lj_str.h" |
18 | #include "lj_tab.h" | 18 | #include "lj_tab.h" |
19 | #include "lj_state.h" | ||
19 | #include "lj_bc.h" | 20 | #include "lj_bc.h" |
21 | #if LJ_HASFFI | ||
22 | #include "lj_ctype.h" | ||
23 | #endif | ||
20 | #if LJ_HASJIT | 24 | #if LJ_HASJIT |
21 | #include "lj_ir.h" | 25 | #include "lj_ir.h" |
22 | #include "lj_jit.h" | 26 | #include "lj_jit.h" |
@@ -24,6 +28,7 @@ | |||
24 | #include "lj_iropt.h" | 28 | #include "lj_iropt.h" |
25 | #include "lj_target.h" | 29 | #include "lj_target.h" |
26 | #endif | 30 | #endif |
31 | #include "lj_trace.h" | ||
27 | #include "lj_dispatch.h" | 32 | #include "lj_dispatch.h" |
28 | #include "lj_vm.h" | 33 | #include "lj_vm.h" |
29 | #include "lj_vmevent.h" | 34 | #include "lj_vmevent.h" |
@@ -99,8 +104,8 @@ LJLIB_CF(jit_status) | |||
99 | jit_State *J = L2J(L); | 104 | jit_State *J = L2J(L); |
100 | L->top = L->base; | 105 | L->top = L->base; |
101 | setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); | 106 | setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); |
102 | flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING); | 107 | flagbits_to_strings(L, J->flags, JIT_F_CPU, JIT_F_CPUSTRING); |
103 | flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING); | 108 | flagbits_to_strings(L, J->flags, JIT_F_OPT, JIT_F_OPTSTRING); |
104 | return (int)(L->top - L->base); | 109 | return (int)(L->top - L->base); |
105 | #else | 110 | #else |
106 | setboolV(L->top++, 0); | 111 | setboolV(L->top++, 0); |
@@ -108,6 +113,13 @@ LJLIB_CF(jit_status) | |||
108 | #endif | 113 | #endif |
109 | } | 114 | } |
110 | 115 | ||
116 | LJLIB_CF(jit_security) | ||
117 | { | ||
118 | int idx = lj_lib_checkopt(L, 1, -1, LJ_SECURITY_MODESTRING); | ||
119 | setintV(L->top++, ((LJ_SECURITY_MODE >> (2*idx)) & 3)); | ||
120 | return 1; | ||
121 | } | ||
122 | |||
111 | LJLIB_CF(jit_attach) | 123 | LJLIB_CF(jit_attach) |
112 | { | 124 | { |
113 | #ifdef LUAJIT_DISABLE_VMEVENT | 125 | #ifdef LUAJIT_DISABLE_VMEVENT |
@@ -222,7 +234,7 @@ LJLIB_CF(jit_util_funcbc) | |||
222 | if (pc < pt->sizebc) { | 234 | if (pc < pt->sizebc) { |
223 | BCIns ins = proto_bc(pt)[pc]; | 235 | BCIns ins = proto_bc(pt)[pc]; |
224 | BCOp op = bc_op(ins); | 236 | BCOp op = bc_op(ins); |
225 | lua_assert(op < BC__MAX); | 237 | lj_assertL(op < BC__MAX, "bad bytecode op %d", op); |
226 | setintV(L->top, ins); | 238 | setintV(L->top, ins); |
227 | setintV(L->top+1, lj_bc_mode[op]); | 239 | setintV(L->top+1, lj_bc_mode[op]); |
228 | L->top += 2; | 240 | L->top += 2; |
@@ -280,7 +292,7 @@ static GCtrace *jit_checktrace(lua_State *L) | |||
280 | /* Names of link types. ORDER LJ_TRLINK */ | 292 | /* Names of link types. ORDER LJ_TRLINK */ |
281 | static const char *const jit_trlinkname[] = { | 293 | static const char *const jit_trlinkname[] = { |
282 | "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion", | 294 | "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion", |
283 | "interpreter", "return" | 295 | "interpreter", "return", "stitch" |
284 | }; | 296 | }; |
285 | 297 | ||
286 | /* local info = jit.util.traceinfo(tr) */ | 298 | /* local info = jit.util.traceinfo(tr) */ |
@@ -333,6 +345,9 @@ LJLIB_CF(jit_util_tracek) | |||
333 | slot = ir->op2; | 345 | slot = ir->op2; |
334 | ir = &T->ir[ir->op1]; | 346 | ir = &T->ir[ir->op1]; |
335 | } | 347 | } |
348 | #if LJ_HASFFI | ||
349 | if (ir->o == IR_KINT64) ctype_loadffi(L); | ||
350 | #endif | ||
336 | lj_ir_kvalue(L, L->top-2, ir); | 351 | lj_ir_kvalue(L, L->top-2, ir); |
337 | setintV(L->top-1, (int32_t)irt_type(ir->t)); | 352 | setintV(L->top-1, (int32_t)irt_type(ir->t)); |
338 | if (slot == -1) | 353 | if (slot == -1) |
@@ -417,6 +432,12 @@ LJLIB_CF(jit_util_ircalladdr) | |||
417 | 432 | ||
418 | #include "lj_libdef.h" | 433 | #include "lj_libdef.h" |
419 | 434 | ||
435 | static int luaopen_jit_util(lua_State *L) | ||
436 | { | ||
437 | LJ_LIB_REG(L, NULL, jit_util); | ||
438 | return 1; | ||
439 | } | ||
440 | |||
420 | /* -- jit.opt module ------------------------------------------------------ */ | 441 | /* -- jit.opt module ------------------------------------------------------ */ |
421 | 442 | ||
422 | #if LJ_HASJIT | 443 | #if LJ_HASJIT |
@@ -453,7 +474,7 @@ static int jitopt_flag(jit_State *J, const char *str) | |||
453 | str += str[2] == '-' ? 3 : 2; | 474 | str += str[2] == '-' ? 3 : 2; |
454 | set = 0; | 475 | set = 0; |
455 | } | 476 | } |
456 | for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) { | 477 | for (opt = JIT_F_OPT; ; opt <<= 1) { |
457 | size_t len = *(const uint8_t *)lst; | 478 | size_t len = *(const uint8_t *)lst; |
458 | if (len == 0) | 479 | if (len == 0) |
459 | break; | 480 | break; |
@@ -473,7 +494,7 @@ static int jitopt_param(jit_State *J, const char *str) | |||
473 | int i; | 494 | int i; |
474 | for (i = 0; i < JIT_P__MAX; i++) { | 495 | for (i = 0; i < JIT_P__MAX; i++) { |
475 | size_t len = *(const uint8_t *)lst; | 496 | size_t len = *(const uint8_t *)lst; |
476 | lua_assert(len != 0); | 497 | lj_assertJ(len != 0, "bad JIT_P_STRING"); |
477 | if (strncmp(str, lst+1, len) == 0 && str[len] == '=') { | 498 | if (strncmp(str, lst+1, len) == 0 && str[len] == '=') { |
478 | int32_t n = 0; | 499 | int32_t n = 0; |
479 | const char *p = &str[len+1]; | 500 | const char *p = &str[len+1]; |
@@ -514,6 +535,104 @@ LJLIB_CF(jit_opt_start) | |||
514 | 535 | ||
515 | #endif | 536 | #endif |
516 | 537 | ||
538 | /* -- jit.profile module -------------------------------------------------- */ | ||
539 | |||
540 | #if LJ_HASPROFILE | ||
541 | |||
542 | #define LJLIB_MODULE_jit_profile | ||
543 | |||
544 | /* Not loaded by default, use: local profile = require("jit.profile") */ | ||
545 | |||
546 | #define KEY_PROFILE_THREAD (U64x(80000000,00000000)|'t') | ||
547 | #define KEY_PROFILE_FUNC (U64x(80000000,00000000)|'f') | ||
548 | |||
549 | static void jit_profile_callback(lua_State *L2, lua_State *L, int samples, | ||
550 | int vmstate) | ||
551 | { | ||
552 | TValue key; | ||
553 | cTValue *tv; | ||
554 | key.u64 = KEY_PROFILE_FUNC; | ||
555 | tv = lj_tab_get(L, tabV(registry(L)), &key); | ||
556 | if (tvisfunc(tv)) { | ||
557 | char vmst = (char)vmstate; | ||
558 | int status; | ||
559 | setfuncV(L2, L2->top++, funcV(tv)); | ||
560 | setthreadV(L2, L2->top++, L); | ||
561 | setintV(L2->top++, samples); | ||
562 | setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1)); | ||
563 | status = lua_pcall(L2, 3, 0, 0); /* callback(thread, samples, vmstate) */ | ||
564 | if (status) { | ||
565 | if (G(L2)->panic) G(L2)->panic(L2); | ||
566 | exit(EXIT_FAILURE); | ||
567 | } | ||
568 | lj_trace_abort(G(L2)); | ||
569 | } | ||
570 | } | ||
571 | |||
572 | /* profile.start(mode, cb) */ | ||
573 | LJLIB_CF(jit_profile_start) | ||
574 | { | ||
575 | GCtab *registry = tabV(registry(L)); | ||
576 | GCstr *mode = lj_lib_optstr(L, 1); | ||
577 | GCfunc *func = lj_lib_checkfunc(L, 2); | ||
578 | lua_State *L2 = lua_newthread(L); /* Thread that runs profiler callback. */ | ||
579 | TValue key; | ||
580 | /* Anchor thread and function in registry. */ | ||
581 | key.u64 = KEY_PROFILE_THREAD; | ||
582 | setthreadV(L, lj_tab_set(L, registry, &key), L2); | ||
583 | key.u64 = KEY_PROFILE_FUNC; | ||
584 | setfuncV(L, lj_tab_set(L, registry, &key), func); | ||
585 | lj_gc_anybarriert(L, registry); | ||
586 | luaJIT_profile_start(L, mode ? strdata(mode) : "", | ||
587 | (luaJIT_profile_callback)jit_profile_callback, L2); | ||
588 | return 0; | ||
589 | } | ||
590 | |||
591 | /* profile.stop() */ | ||
592 | LJLIB_CF(jit_profile_stop) | ||
593 | { | ||
594 | GCtab *registry; | ||
595 | TValue key; | ||
596 | luaJIT_profile_stop(L); | ||
597 | registry = tabV(registry(L)); | ||
598 | key.u64 = KEY_PROFILE_THREAD; | ||
599 | setnilV(lj_tab_set(L, registry, &key)); | ||
600 | key.u64 = KEY_PROFILE_FUNC; | ||
601 | setnilV(lj_tab_set(L, registry, &key)); | ||
602 | lj_gc_anybarriert(L, registry); | ||
603 | return 0; | ||
604 | } | ||
605 | |||
606 | /* dump = profile.dumpstack([thread,] fmt, depth) */ | ||
607 | LJLIB_CF(jit_profile_dumpstack) | ||
608 | { | ||
609 | lua_State *L2 = L; | ||
610 | int arg = 0; | ||
611 | size_t len; | ||
612 | int depth; | ||
613 | GCstr *fmt; | ||
614 | const char *p; | ||
615 | if (L->top > L->base && tvisthread(L->base)) { | ||
616 | L2 = threadV(L->base); | ||
617 | arg = 1; | ||
618 | } | ||
619 | fmt = lj_lib_checkstr(L, arg+1); | ||
620 | depth = lj_lib_checkint(L, arg+2); | ||
621 | p = luaJIT_profile_dumpstack(L2, strdata(fmt), depth, &len); | ||
622 | lua_pushlstring(L, p, len); | ||
623 | return 1; | ||
624 | } | ||
625 | |||
626 | #include "lj_libdef.h" | ||
627 | |||
628 | static int luaopen_jit_profile(lua_State *L) | ||
629 | { | ||
630 | LJ_LIB_REG(L, NULL, jit_profile); | ||
631 | return 1; | ||
632 | } | ||
633 | |||
634 | #endif | ||
635 | |||
517 | /* -- JIT compiler initialization ----------------------------------------- */ | 636 | /* -- JIT compiler initialization ----------------------------------------- */ |
518 | 637 | ||
519 | #if LJ_HASJIT | 638 | #if LJ_HASJIT |
@@ -524,66 +643,41 @@ JIT_PARAMDEF(JIT_PARAMINIT) | |||
524 | #undef JIT_PARAMINIT | 643 | #undef JIT_PARAMINIT |
525 | 0 | 644 | 0 |
526 | }; | 645 | }; |
527 | #endif | ||
528 | 646 | ||
529 | #if LJ_TARGET_ARM && LJ_TARGET_LINUX | 647 | #if LJ_TARGET_ARM && LJ_TARGET_LINUX |
530 | #include <sys/utsname.h> | 648 | #include <sys/utsname.h> |
531 | #endif | 649 | #endif |
532 | 650 | ||
533 | /* Arch-dependent CPU detection. */ | 651 | /* Arch-dependent CPU feature detection. */ |
534 | static uint32_t jit_cpudetect(lua_State *L) | 652 | static uint32_t jit_cpudetect(void) |
535 | { | 653 | { |
536 | uint32_t flags = 0; | 654 | uint32_t flags = 0; |
537 | #if LJ_TARGET_X86ORX64 | 655 | #if LJ_TARGET_X86ORX64 |
656 | |||
538 | uint32_t vendor[4]; | 657 | uint32_t vendor[4]; |
539 | uint32_t features[4]; | 658 | uint32_t features[4]; |
540 | if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { | 659 | if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { |
541 | #if !LJ_HASJIT | ||
542 | #define JIT_F_CMOV 1 | ||
543 | #define JIT_F_SSE2 2 | ||
544 | #endif | ||
545 | flags |= ((features[3] >> 15)&1) * JIT_F_CMOV; | ||
546 | flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; | ||
547 | #if LJ_HASJIT | ||
548 | flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; | 660 | flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; |
549 | flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; | 661 | flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; |
550 | if (vendor[2] == 0x6c65746e) { /* Intel. */ | 662 | if (vendor[0] >= 7) { |
551 | if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */ | 663 | uint32_t xfeatures[4]; |
552 | flags |= JIT_F_P4; /* Currently unused. */ | 664 | lj_vm_cpuid(7, xfeatures); |
553 | else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ | 665 | flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2; |
554 | flags |= JIT_F_LEA_AGU; | ||
555 | } else if (vendor[2] == 0x444d4163) { /* AMD. */ | ||
556 | uint32_t fam = (features[0] & 0x0ff00f00); | ||
557 | if (fam == 0x00000f00) /* K8. */ | ||
558 | flags |= JIT_F_SPLIT_XMM; | ||
559 | if (fam >= 0x00000f00) /* K8, K10. */ | ||
560 | flags |= JIT_F_PREFER_IMUL; | ||
561 | } | 666 | } |
562 | #endif | ||
563 | } | 667 | } |
564 | /* Check for required instruction set support on x86 (unnecessary on x64). */ | 668 | /* Don't bother checking for SSE2 -- the VM will crash before getting here. */ |
565 | #if LJ_TARGET_X86 | 669 | |
566 | #if !defined(LUAJIT_CPU_NOCMOV) | ||
567 | if (!(flags & JIT_F_CMOV)) | ||
568 | luaL_error(L, "CPU not supported"); | ||
569 | #endif | ||
570 | #if defined(LUAJIT_CPU_SSE2) | ||
571 | if (!(flags & JIT_F_SSE2)) | ||
572 | luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)"); | ||
573 | #endif | ||
574 | #endif | ||
575 | #elif LJ_TARGET_ARM | 670 | #elif LJ_TARGET_ARM |
576 | #if LJ_HASJIT | 671 | |
577 | int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */ | 672 | int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */ |
578 | #if LJ_TARGET_LINUX | 673 | #if LJ_TARGET_LINUX |
579 | if (ver < 70) { /* Runtime ARM CPU detection. */ | 674 | if (ver < 70) { /* Runtime ARM CPU detection. */ |
580 | struct utsname ut; | 675 | struct utsname ut; |
581 | uname(&ut); | 676 | uname(&ut); |
582 | if (strncmp(ut.machine, "armv", 4) == 0) { | 677 | if (strncmp(ut.machine, "armv", 4) == 0) { |
583 | if (ut.machine[4] >= '7') | 678 | if (ut.machine[4] >= '8') ver = 80; |
584 | ver = 70; | 679 | else if (ut.machine[4] == '7') ver = 70; |
585 | else if (ut.machine[4] == '6') | 680 | else if (ut.machine[4] == '6') ver = 60; |
586 | ver = 60; | ||
587 | } | 681 | } |
588 | } | 682 | } |
589 | #endif | 683 | #endif |
@@ -591,74 +685,77 @@ static uint32_t jit_cpudetect(lua_State *L) | |||
591 | ver >= 61 ? JIT_F_ARMV6T2_ : | 685 | ver >= 61 ? JIT_F_ARMV6T2_ : |
592 | ver >= 60 ? JIT_F_ARMV6_ : 0; | 686 | ver >= 60 ? JIT_F_ARMV6_ : 0; |
593 | flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; | 687 | flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; |
594 | #endif | 688 | |
689 | #elif LJ_TARGET_ARM64 | ||
690 | |||
691 | /* No optional CPU features to detect (for now). */ | ||
692 | |||
595 | #elif LJ_TARGET_PPC | 693 | #elif LJ_TARGET_PPC |
596 | #if LJ_HASJIT | 694 | |
597 | #if LJ_ARCH_SQRT | 695 | #if LJ_ARCH_SQRT |
598 | flags |= JIT_F_SQRT; | 696 | flags |= JIT_F_SQRT; |
599 | #endif | 697 | #endif |
600 | #if LJ_ARCH_ROUND | 698 | #if LJ_ARCH_ROUND |
601 | flags |= JIT_F_ROUND; | 699 | flags |= JIT_F_ROUND; |
602 | #endif | 700 | #endif |
603 | #endif | 701 | |
604 | #elif LJ_TARGET_PPCSPE | ||
605 | /* Nothing to do. */ | ||
606 | #elif LJ_TARGET_MIPS | 702 | #elif LJ_TARGET_MIPS |
607 | #if LJ_HASJIT | 703 | |
608 | /* Compile-time MIPS CPU detection. */ | 704 | /* Compile-time MIPS CPU detection. */ |
609 | #if LJ_ARCH_VERSION >= 20 | 705 | #if LJ_ARCH_VERSION >= 20 |
610 | flags |= JIT_F_MIPS32R2; | 706 | flags |= JIT_F_MIPSXXR2; |
611 | #endif | 707 | #endif |
612 | /* Runtime MIPS CPU detection. */ | 708 | /* Runtime MIPS CPU detection. */ |
613 | #if defined(__GNUC__) | 709 | #if defined(__GNUC__) |
614 | if (!(flags & JIT_F_MIPS32R2)) { | 710 | if (!(flags & JIT_F_MIPSXXR2)) { |
615 | int x; | 711 | int x; |
712 | #ifdef __mips16 | ||
713 | x = 0; /* Runtime detection is difficult. Ensure optimal -march flags. */ | ||
714 | #else | ||
616 | /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */ | 715 | /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */ |
617 | __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2"); | 716 | __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2"); |
618 | if (x) flags |= JIT_F_MIPS32R2; /* Either 0x80000000 (R2) or 0 (R1). */ | ||
619 | } | ||
620 | #endif | 717 | #endif |
718 | if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ | ||
719 | } | ||
621 | #endif | 720 | #endif |
721 | |||
622 | #else | 722 | #else |
623 | #error "Missing CPU detection for this architecture" | 723 | #error "Missing CPU detection for this architecture" |
624 | #endif | 724 | #endif |
625 | UNUSED(L); | ||
626 | return flags; | 725 | return flags; |
627 | } | 726 | } |
628 | 727 | ||
629 | /* Initialize JIT compiler. */ | 728 | /* Initialize JIT compiler. */ |
630 | static void jit_init(lua_State *L) | 729 | static void jit_init(lua_State *L) |
631 | { | 730 | { |
632 | uint32_t flags = jit_cpudetect(L); | ||
633 | #if LJ_HASJIT | ||
634 | jit_State *J = L2J(L); | 731 | jit_State *J = L2J(L); |
635 | #if LJ_TARGET_X86 | 732 | J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT; |
636 | /* Silently turn off the JIT compiler on CPUs without SSE2. */ | ||
637 | if ((flags & JIT_F_SSE2)) | ||
638 | #endif | ||
639 | J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; | ||
640 | memcpy(J->param, jit_param_default, sizeof(J->param)); | 733 | memcpy(J->param, jit_param_default, sizeof(J->param)); |
641 | lj_dispatch_update(G(L)); | 734 | lj_dispatch_update(G(L)); |
642 | #else | ||
643 | UNUSED(flags); | ||
644 | #endif | ||
645 | } | 735 | } |
736 | #endif | ||
646 | 737 | ||
647 | LUALIB_API int luaopen_jit(lua_State *L) | 738 | LUALIB_API int luaopen_jit(lua_State *L) |
648 | { | 739 | { |
740 | #if LJ_HASJIT | ||
741 | jit_init(L); | ||
742 | #endif | ||
649 | lua_pushliteral(L, LJ_OS_NAME); | 743 | lua_pushliteral(L, LJ_OS_NAME); |
650 | lua_pushliteral(L, LJ_ARCH_NAME); | 744 | lua_pushliteral(L, LJ_ARCH_NAME); |
651 | lua_pushinteger(L, LUAJIT_VERSION_NUM); | 745 | lua_pushinteger(L, LUAJIT_VERSION_NUM); |
652 | lua_pushliteral(L, LUAJIT_VERSION); | 746 | lua_pushliteral(L, LUAJIT_VERSION); |
653 | LJ_LIB_REG(L, LUA_JITLIBNAME, jit); | 747 | LJ_LIB_REG(L, LUA_JITLIBNAME, jit); |
748 | #if LJ_HASPROFILE | ||
749 | lj_lib_prereg(L, LUA_JITLIBNAME ".profile", luaopen_jit_profile, | ||
750 | tabref(L->env)); | ||
751 | #endif | ||
654 | #ifndef LUAJIT_DISABLE_JITUTIL | 752 | #ifndef LUAJIT_DISABLE_JITUTIL |
655 | LJ_LIB_REG(L, "jit.util", jit_util); | 753 | lj_lib_prereg(L, LUA_JITLIBNAME ".util", luaopen_jit_util, tabref(L->env)); |
656 | #endif | 754 | #endif |
657 | #if LJ_HASJIT | 755 | #if LJ_HASJIT |
658 | LJ_LIB_REG(L, "jit.opt", jit_opt); | 756 | LJ_LIB_REG(L, "jit.opt", jit_opt); |
659 | #endif | 757 | #endif |
660 | L->top -= 2; | 758 | L->top -= 2; |
661 | jit_init(L); | ||
662 | return 1; | 759 | return 1; |
663 | } | 760 | } |
664 | 761 | ||
diff --git a/src/lib_math.c b/src/lib_math.c index 56fb091b..b677bbcd 100644 --- a/src/lib_math.c +++ b/src/lib_math.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include "lj_obj.h" | 15 | #include "lj_obj.h" |
16 | #include "lj_lib.h" | 16 | #include "lj_lib.h" |
17 | #include "lj_vm.h" | 17 | #include "lj_vm.h" |
18 | #include "lj_prng.h" | ||
18 | 19 | ||
19 | /* ------------------------------------------------------------------------ */ | 20 | /* ------------------------------------------------------------------------ */ |
20 | 21 | ||
@@ -33,25 +34,19 @@ LJLIB_ASM(math_sqrt) LJLIB_REC(math_unary IRFPM_SQRT) | |||
33 | lj_lib_checknum(L, 1); | 34 | lj_lib_checknum(L, 1); |
34 | return FFH_RETRY; | 35 | return FFH_RETRY; |
35 | } | 36 | } |
36 | LJLIB_ASM_(math_log10) LJLIB_REC(math_unary IRFPM_LOG10) | 37 | LJLIB_ASM_(math_log10) LJLIB_REC(math_call IRCALL_log10) |
37 | LJLIB_ASM_(math_exp) LJLIB_REC(math_unary IRFPM_EXP) | 38 | LJLIB_ASM_(math_exp) LJLIB_REC(math_call IRCALL_exp) |
38 | LJLIB_ASM_(math_sin) LJLIB_REC(math_unary IRFPM_SIN) | 39 | LJLIB_ASM_(math_sin) LJLIB_REC(math_call IRCALL_sin) |
39 | LJLIB_ASM_(math_cos) LJLIB_REC(math_unary IRFPM_COS) | 40 | LJLIB_ASM_(math_cos) LJLIB_REC(math_call IRCALL_cos) |
40 | LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN) | 41 | LJLIB_ASM_(math_tan) LJLIB_REC(math_call IRCALL_tan) |
41 | LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin) | 42 | LJLIB_ASM_(math_asin) LJLIB_REC(math_call IRCALL_asin) |
42 | LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos) | 43 | LJLIB_ASM_(math_acos) LJLIB_REC(math_call IRCALL_acos) |
43 | LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan) | 44 | LJLIB_ASM_(math_atan) LJLIB_REC(math_call IRCALL_atan) |
44 | LJLIB_ASM_(math_sinh) LJLIB_REC(math_htrig IRCALL_sinh) | 45 | LJLIB_ASM_(math_sinh) LJLIB_REC(math_call IRCALL_sinh) |
45 | LJLIB_ASM_(math_cosh) LJLIB_REC(math_htrig IRCALL_cosh) | 46 | LJLIB_ASM_(math_cosh) LJLIB_REC(math_call IRCALL_cosh) |
46 | LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh) | 47 | LJLIB_ASM_(math_tanh) LJLIB_REC(math_call IRCALL_tanh) |
47 | LJLIB_ASM_(math_frexp) | 48 | LJLIB_ASM_(math_frexp) |
48 | LJLIB_ASM_(math_modf) LJLIB_REC(.) | 49 | LJLIB_ASM_(math_modf) |
49 | |||
50 | LJLIB_PUSH(57.29577951308232) | ||
51 | LJLIB_ASM_(math_deg) LJLIB_REC(math_degrad) | ||
52 | |||
53 | LJLIB_PUSH(0.017453292519943295) | ||
54 | LJLIB_ASM_(math_rad) LJLIB_REC(math_degrad) | ||
55 | 50 | ||
56 | LJLIB_ASM(math_log) LJLIB_REC(math_log) | 51 | LJLIB_ASM(math_log) LJLIB_REC(math_log) |
57 | { | 52 | { |
@@ -63,12 +58,15 @@ LJLIB_ASM(math_log) LJLIB_REC(math_log) | |||
63 | #else | 58 | #else |
64 | x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y); | 59 | x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y); |
65 | #endif | 60 | #endif |
66 | setnumV(L->base-1, x*y); /* Do NOT join the expression to x / y. */ | 61 | setnumV(L->base-1-LJ_FR2, x*y); /* Do NOT join the expression to x / y. */ |
67 | return FFH_RES(1); | 62 | return FFH_RES(1); |
68 | } | 63 | } |
69 | return FFH_RETRY; | 64 | return FFH_RETRY; |
70 | } | 65 | } |
71 | 66 | ||
67 | LJLIB_LUA(math_deg) /* function(x) return x * 57.29577951308232 end */ | ||
68 | LJLIB_LUA(math_rad) /* function(x) return x * 0.017453292519943295 end */ | ||
69 | |||
72 | LJLIB_ASM(math_atan2) LJLIB_REC(.) | 70 | LJLIB_ASM(math_atan2) LJLIB_REC(.) |
73 | { | 71 | { |
74 | lj_lib_checknum(L, 1); | 72 | lj_lib_checknum(L, 1); |
@@ -108,34 +106,11 @@ LJLIB_PUSH(1e310) LJLIB_SET(huge) | |||
108 | ** Full-period ME-CF generator with L=64, J=4, k=223, N1=49. | 106 | ** Full-period ME-CF generator with L=64, J=4, k=223, N1=49. |
109 | */ | 107 | */ |
110 | 108 | ||
111 | /* PRNG state. */ | ||
112 | struct RandomState { | ||
113 | uint64_t gen[4]; /* State of the 4 LFSR generators. */ | ||
114 | int valid; /* State is valid. */ | ||
115 | }; | ||
116 | |||
117 | /* Union needed for bit-pattern conversion between uint64_t and double. */ | 109 | /* Union needed for bit-pattern conversion between uint64_t and double. */ |
118 | typedef union { uint64_t u64; double d; } U64double; | 110 | typedef union { uint64_t u64; double d; } U64double; |
119 | 111 | ||
120 | /* Update generator i and compute a running xor of all states. */ | 112 | /* PRNG seeding function. */ |
121 | #define TW223_GEN(i, k, q, s) \ | 113 | static void random_seed(PRNGState *rs, double d) |
122 | z = rs->gen[i]; \ | ||
123 | z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \ | ||
124 | r ^= z; rs->gen[i] = z; | ||
125 | |||
126 | /* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */ | ||
127 | LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs) | ||
128 | { | ||
129 | uint64_t z, r = 0; | ||
130 | TW223_GEN(0, 63, 31, 18) | ||
131 | TW223_GEN(1, 58, 19, 28) | ||
132 | TW223_GEN(2, 55, 24, 7) | ||
133 | TW223_GEN(3, 47, 21, 8) | ||
134 | return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000); | ||
135 | } | ||
136 | |||
137 | /* PRNG initialization function. */ | ||
138 | static void random_init(RandomState *rs, double d) | ||
139 | { | 114 | { |
140 | uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */ | 115 | uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */ |
141 | int i; | 116 | int i; |
@@ -144,24 +119,22 @@ static void random_init(RandomState *rs, double d) | |||
144 | uint32_t m = 1u << (r&255); | 119 | uint32_t m = 1u << (r&255); |
145 | r >>= 8; | 120 | r >>= 8; |
146 | u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354; | 121 | u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354; |
147 | if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */ | 122 | if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of u[i] are non-zero. */ |
148 | rs->gen[i] = u.u64; | 123 | rs->u[i] = u.u64; |
149 | } | 124 | } |
150 | rs->valid = 1; | ||
151 | for (i = 0; i < 10; i++) | 125 | for (i = 0; i < 10; i++) |
152 | lj_math_random_step(rs); | 126 | (void)lj_prng_u64(rs); |
153 | } | 127 | } |
154 | 128 | ||
155 | /* PRNG extract function. */ | 129 | /* PRNG extract function. */ |
156 | LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ | 130 | LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */ |
157 | LJLIB_CF(math_random) LJLIB_REC(.) | 131 | LJLIB_CF(math_random) LJLIB_REC(.) |
158 | { | 132 | { |
159 | int n = (int)(L->top - L->base); | 133 | int n = (int)(L->top - L->base); |
160 | RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); | 134 | PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); |
161 | U64double u; | 135 | U64double u; |
162 | double d; | 136 | double d; |
163 | if (LJ_UNLIKELY(!rs->valid)) random_init(rs, 0.0); | 137 | u.u64 = lj_prng_u64d(rs); |
164 | u.u64 = lj_math_random_step(rs); | ||
165 | d = u.d - 1.0; | 138 | d = u.d - 1.0; |
166 | if (n > 0) { | 139 | if (n > 0) { |
167 | #if LJ_DUALNUM | 140 | #if LJ_DUALNUM |
@@ -206,11 +179,11 @@ LJLIB_CF(math_random) LJLIB_REC(.) | |||
206 | } | 179 | } |
207 | 180 | ||
208 | /* PRNG seed function. */ | 181 | /* PRNG seed function. */ |
209 | LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ | 182 | LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */ |
210 | LJLIB_CF(math_randomseed) | 183 | LJLIB_CF(math_randomseed) |
211 | { | 184 | { |
212 | RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); | 185 | PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); |
213 | random_init(rs, lj_lib_checknum(L, 1)); | 186 | random_seed(rs, lj_lib_checknum(L, 1)); |
214 | return 0; | 187 | return 0; |
215 | } | 188 | } |
216 | 189 | ||
@@ -220,14 +193,9 @@ LJLIB_CF(math_randomseed) | |||
220 | 193 | ||
221 | LUALIB_API int luaopen_math(lua_State *L) | 194 | LUALIB_API int luaopen_math(lua_State *L) |
222 | { | 195 | { |
223 | RandomState *rs; | 196 | PRNGState *rs = (PRNGState *)lua_newuserdata(L, sizeof(PRNGState)); |
224 | rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState)); | 197 | lj_prng_seed_fixed(rs); |
225 | rs->valid = 0; /* Use lazy initialization to save some time on startup. */ | ||
226 | LJ_LIB_REG(L, LUA_MATHLIBNAME, math); | 198 | LJ_LIB_REG(L, LUA_MATHLIBNAME, math); |
227 | #if defined(LUA_COMPAT_MOD) && !LJ_52 | ||
228 | lua_getfield(L, -1, "fmod"); | ||
229 | lua_setfield(L, -2, "mod"); | ||
230 | #endif | ||
231 | return 1; | 199 | return 1; |
232 | } | 200 | } |
233 | 201 | ||
diff --git a/src/lib_os.c b/src/lib_os.c index 7ad7dfaf..a61ded40 100644 --- a/src/lib_os.c +++ b/src/lib_os.c | |||
@@ -17,7 +17,10 @@ | |||
17 | #include "lualib.h" | 17 | #include "lualib.h" |
18 | 18 | ||
19 | #include "lj_obj.h" | 19 | #include "lj_obj.h" |
20 | #include "lj_gc.h" | ||
20 | #include "lj_err.h" | 21 | #include "lj_err.h" |
22 | #include "lj_buf.h" | ||
23 | #include "lj_str.h" | ||
21 | #include "lj_lib.h" | 24 | #include "lj_lib.h" |
22 | 25 | ||
23 | #if LJ_TARGET_POSIX | 26 | #if LJ_TARGET_POSIX |
@@ -188,7 +191,7 @@ LJLIB_CF(os_date) | |||
188 | #endif | 191 | #endif |
189 | } | 192 | } |
190 | if (stm == NULL) { /* Invalid date? */ | 193 | if (stm == NULL) { /* Invalid date? */ |
191 | setnilV(L->top-1); | 194 | setnilV(L->top++); |
192 | } else if (strcmp(s, "*t") == 0) { | 195 | } else if (strcmp(s, "*t") == 0) { |
193 | lua_createtable(L, 0, 9); /* 9 = number of fields */ | 196 | lua_createtable(L, 0, 9); /* 9 = number of fields */ |
194 | setfield(L, "sec", stm->tm_sec); | 197 | setfield(L, "sec", stm->tm_sec); |
@@ -200,23 +203,25 @@ LJLIB_CF(os_date) | |||
200 | setfield(L, "wday", stm->tm_wday+1); | 203 | setfield(L, "wday", stm->tm_wday+1); |
201 | setfield(L, "yday", stm->tm_yday+1); | 204 | setfield(L, "yday", stm->tm_yday+1); |
202 | setboolfield(L, "isdst", stm->tm_isdst); | 205 | setboolfield(L, "isdst", stm->tm_isdst); |
203 | } else { | 206 | } else if (*s) { |
204 | char cc[3]; | 207 | SBuf *sb = &G(L)->tmpbuf; |
205 | luaL_Buffer b; | 208 | MSize sz = 0, retry = 4; |
206 | cc[0] = '%'; cc[2] = '\0'; | 209 | const char *q; |
207 | luaL_buffinit(L, &b); | 210 | for (q = s; *q; q++) |
208 | for (; *s; s++) { | 211 | sz += (*q == '%') ? 30 : 1; /* Overflow doesn't matter. */ |
209 | if (*s != '%' || *(s + 1) == '\0') { /* No conversion specifier? */ | 212 | setsbufL(sb, L); |
210 | luaL_addchar(&b, *s); | 213 | while (retry--) { /* Limit growth for invalid format or empty result. */ |
211 | } else { | 214 | char *buf = lj_buf_need(sb, sz); |
212 | size_t reslen; | 215 | size_t len = strftime(buf, sbufsz(sb), s, stm); |
213 | char buff[200]; /* Should be big enough for any conversion result. */ | 216 | if (len) { |
214 | cc[1] = *(++s); | 217 | setstrV(L, L->top++, lj_str_new(L, buf, len)); |
215 | reslen = strftime(buff, sizeof(buff), cc, stm); | 218 | lj_gc_check(L); |
216 | luaL_addlstring(&b, buff, reslen); | 219 | break; |
217 | } | 220 | } |
221 | sz += (sz|1); | ||
218 | } | 222 | } |
219 | luaL_pushresult(&b); | 223 | } else { |
224 | setstrV(L, L->top++, &G(L)->strempty); | ||
220 | } | 225 | } |
221 | return 1; | 226 | return 1; |
222 | } | 227 | } |
diff --git a/src/lib_package.c b/src/lib_package.c index a9c1ca48..1f8b0677 100644 --- a/src/lib_package.c +++ b/src/lib_package.c | |||
@@ -76,6 +76,20 @@ static const char *ll_bcsym(void *lib, const char *sym) | |||
76 | BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); | 76 | BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); |
77 | #endif | 77 | #endif |
78 | 78 | ||
79 | #if LJ_TARGET_UWP | ||
80 | void *LJ_WIN_LOADLIBA(const char *path) | ||
81 | { | ||
82 | DWORD err = GetLastError(); | ||
83 | wchar_t wpath[256]; | ||
84 | HANDLE lib = NULL; | ||
85 | if (MultiByteToWideChar(CP_ACP, 0, path, -1, wpath, 256) > 0) { | ||
86 | lib = LoadPackagedLibrary(wpath, 0); | ||
87 | } | ||
88 | SetLastError(err); | ||
89 | return lib; | ||
90 | } | ||
91 | #endif | ||
92 | |||
79 | #undef setprogdir | 93 | #undef setprogdir |
80 | 94 | ||
81 | static void setprogdir(lua_State *L) | 95 | static void setprogdir(lua_State *L) |
@@ -96,9 +110,17 @@ static void setprogdir(lua_State *L) | |||
96 | static void pusherror(lua_State *L) | 110 | static void pusherror(lua_State *L) |
97 | { | 111 | { |
98 | DWORD error = GetLastError(); | 112 | DWORD error = GetLastError(); |
113 | #if LJ_TARGET_XBOXONE | ||
114 | wchar_t wbuffer[128]; | ||
115 | char buffer[128*2]; | ||
116 | if (FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM, | ||
117 | NULL, error, 0, wbuffer, sizeof(wbuffer)/sizeof(wchar_t), NULL) && | ||
118 | WideCharToMultiByte(CP_ACP, 0, wbuffer, 128, buffer, 128*2, NULL, NULL)) | ||
119 | #else | ||
99 | char buffer[128]; | 120 | char buffer[128]; |
100 | if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM, | 121 | if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM, |
101 | NULL, error, 0, buffer, sizeof(buffer), NULL)) | 122 | NULL, error, 0, buffer, sizeof(buffer), NULL)) |
123 | #endif | ||
102 | lua_pushstring(L, buffer); | 124 | lua_pushstring(L, buffer); |
103 | else | 125 | else |
104 | lua_pushfstring(L, "system error %d\n", error); | 126 | lua_pushfstring(L, "system error %d\n", error); |
@@ -111,7 +133,7 @@ static void ll_unloadlib(void *lib) | |||
111 | 133 | ||
112 | static void *ll_load(lua_State *L, const char *path, int gl) | 134 | static void *ll_load(lua_State *L, const char *path, int gl) |
113 | { | 135 | { |
114 | HINSTANCE lib = LoadLibraryA(path); | 136 | HINSTANCE lib = LJ_WIN_LOADLIBA(path); |
115 | if (lib == NULL) pusherror(L); | 137 | if (lib == NULL) pusherror(L); |
116 | UNUSED(gl); | 138 | UNUSED(gl); |
117 | return lib; | 139 | return lib; |
@@ -124,17 +146,25 @@ static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym) | |||
124 | return f; | 146 | return f; |
125 | } | 147 | } |
126 | 148 | ||
149 | #if LJ_TARGET_UWP | ||
150 | EXTERN_C IMAGE_DOS_HEADER __ImageBase; | ||
151 | #endif | ||
152 | |||
127 | static const char *ll_bcsym(void *lib, const char *sym) | 153 | static const char *ll_bcsym(void *lib, const char *sym) |
128 | { | 154 | { |
129 | if (lib) { | 155 | if (lib) { |
130 | return (const char *)GetProcAddress((HINSTANCE)lib, sym); | 156 | return (const char *)GetProcAddress((HINSTANCE)lib, sym); |
131 | } else { | 157 | } else { |
158 | #if LJ_TARGET_UWP | ||
159 | return (const char *)GetProcAddress((HINSTANCE)&__ImageBase, sym); | ||
160 | #else | ||
132 | HINSTANCE h = GetModuleHandleA(NULL); | 161 | HINSTANCE h = GetModuleHandleA(NULL); |
133 | const char *p = (const char *)GetProcAddress(h, sym); | 162 | const char *p = (const char *)GetProcAddress(h, sym); |
134 | if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, | 163 | if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, |
135 | (const char *)ll_bcsym, &h)) | 164 | (const char *)ll_bcsym, &h)) |
136 | p = (const char *)GetProcAddress(h, sym); | 165 | p = (const char *)GetProcAddress(h, sym); |
137 | return p; | 166 | return p; |
167 | #endif | ||
138 | } | 168 | } |
139 | } | 169 | } |
140 | 170 | ||
@@ -185,8 +215,7 @@ static void **ll_register(lua_State *L, const char *path) | |||
185 | lua_pop(L, 1); | 215 | lua_pop(L, 1); |
186 | plib = (void **)lua_newuserdata(L, sizeof(void *)); | 216 | plib = (void **)lua_newuserdata(L, sizeof(void *)); |
187 | *plib = NULL; | 217 | *plib = NULL; |
188 | luaL_getmetatable(L, "_LOADLIB"); | 218 | luaL_setmetatable(L, "_LOADLIB"); |
189 | lua_setmetatable(L, -2); | ||
190 | lua_pushfstring(L, "LOADLIB: %s", path); | 219 | lua_pushfstring(L, "LOADLIB: %s", path); |
191 | lua_pushvalue(L, -2); | 220 | lua_pushvalue(L, -2); |
192 | lua_settable(L, LUA_REGISTRYINDEX); | 221 | lua_settable(L, LUA_REGISTRYINDEX); |
@@ -396,8 +425,7 @@ static int lj_cf_package_loader_preload(lua_State *L) | |||
396 | 425 | ||
397 | /* ------------------------------------------------------------------------ */ | 426 | /* ------------------------------------------------------------------------ */ |
398 | 427 | ||
399 | static const int sentinel_ = 0; | 428 | #define KEY_SENTINEL (U64x(80000000,00000000)|'s') |
400 | #define sentinel ((void *)&sentinel_) | ||
401 | 429 | ||
402 | static int lj_cf_package_require(lua_State *L) | 430 | static int lj_cf_package_require(lua_State *L) |
403 | { | 431 | { |
@@ -407,7 +435,7 @@ static int lj_cf_package_require(lua_State *L) | |||
407 | lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); | 435 | lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); |
408 | lua_getfield(L, 2, name); | 436 | lua_getfield(L, 2, name); |
409 | if (lua_toboolean(L, -1)) { /* is it there? */ | 437 | if (lua_toboolean(L, -1)) { /* is it there? */ |
410 | if (lua_touserdata(L, -1) == sentinel) /* check loops */ | 438 | if ((L->top-1)->u64 == KEY_SENTINEL) /* check loops */ |
411 | luaL_error(L, "loop or previous error loading module " LUA_QS, name); | 439 | luaL_error(L, "loop or previous error loading module " LUA_QS, name); |
412 | return 1; /* package is already loaded */ | 440 | return 1; /* package is already loaded */ |
413 | } | 441 | } |
@@ -430,14 +458,14 @@ static int lj_cf_package_require(lua_State *L) | |||
430 | else | 458 | else |
431 | lua_pop(L, 1); | 459 | lua_pop(L, 1); |
432 | } | 460 | } |
433 | lua_pushlightuserdata(L, sentinel); | 461 | (L->top++)->u64 = KEY_SENTINEL; |
434 | lua_setfield(L, 2, name); /* _LOADED[name] = sentinel */ | 462 | lua_setfield(L, 2, name); /* _LOADED[name] = sentinel */ |
435 | lua_pushstring(L, name); /* pass name as argument to module */ | 463 | lua_pushstring(L, name); /* pass name as argument to module */ |
436 | lua_call(L, 1, 1); /* run loaded module */ | 464 | lua_call(L, 1, 1); /* run loaded module */ |
437 | if (!lua_isnil(L, -1)) /* non-nil return? */ | 465 | if (!lua_isnil(L, -1)) /* non-nil return? */ |
438 | lua_setfield(L, 2, name); /* _LOADED[name] = returned value */ | 466 | lua_setfield(L, 2, name); /* _LOADED[name] = returned value */ |
439 | lua_getfield(L, 2, name); | 467 | lua_getfield(L, 2, name); |
440 | if (lua_touserdata(L, -1) == sentinel) { /* module did not set a value? */ | 468 | if ((L->top-1)->u64 == KEY_SENTINEL) { /* module did not set a value? */ |
441 | lua_pushboolean(L, 1); /* use true as result */ | 469 | lua_pushboolean(L, 1); /* use true as result */ |
442 | lua_pushvalue(L, -1); /* extra copy to be returned */ | 470 | lua_pushvalue(L, -1); /* extra copy to be returned */ |
443 | lua_setfield(L, 2, name); /* _LOADED[name] = true */ | 471 | lua_setfield(L, 2, name); /* _LOADED[name] = true */ |
@@ -487,29 +515,19 @@ static void modinit(lua_State *L, const char *modname) | |||
487 | static int lj_cf_package_module(lua_State *L) | 515 | static int lj_cf_package_module(lua_State *L) |
488 | { | 516 | { |
489 | const char *modname = luaL_checkstring(L, 1); | 517 | const char *modname = luaL_checkstring(L, 1); |
490 | int loaded = lua_gettop(L) + 1; /* index of _LOADED table */ | 518 | int lastarg = (int)(L->top - L->base); |
491 | lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); | 519 | luaL_pushmodule(L, modname, 1); |
492 | lua_getfield(L, loaded, modname); /* get _LOADED[modname] */ | ||
493 | if (!lua_istable(L, -1)) { /* not found? */ | ||
494 | lua_pop(L, 1); /* remove previous result */ | ||
495 | /* try global variable (and create one if it does not exist) */ | ||
496 | if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, 1) != NULL) | ||
497 | lj_err_callerv(L, LJ_ERR_BADMODN, modname); | ||
498 | lua_pushvalue(L, -1); | ||
499 | lua_setfield(L, loaded, modname); /* _LOADED[modname] = new table */ | ||
500 | } | ||
501 | /* check whether table already has a _NAME field */ | ||
502 | lua_getfield(L, -1, "_NAME"); | 520 | lua_getfield(L, -1, "_NAME"); |
503 | if (!lua_isnil(L, -1)) { /* is table an initialized module? */ | 521 | if (!lua_isnil(L, -1)) { /* Module already initialized? */ |
504 | lua_pop(L, 1); | 522 | lua_pop(L, 1); |
505 | } else { /* no; initialize it */ | 523 | } else { |
506 | lua_pop(L, 1); | 524 | lua_pop(L, 1); |
507 | modinit(L, modname); | 525 | modinit(L, modname); |
508 | } | 526 | } |
509 | lua_pushvalue(L, -1); | 527 | lua_pushvalue(L, -1); |
510 | setfenv(L); | 528 | setfenv(L); |
511 | dooptions(L, loaded - 1); | 529 | dooptions(L, lastarg); |
512 | return 0; | 530 | return LJ_52; |
513 | } | 531 | } |
514 | 532 | ||
515 | static int lj_cf_package_seeall(lua_State *L) | 533 | static int lj_cf_package_seeall(lua_State *L) |
@@ -580,13 +598,16 @@ LUALIB_API int luaopen_package(lua_State *L) | |||
580 | lj_lib_pushcf(L, lj_cf_package_unloadlib, 1); | 598 | lj_lib_pushcf(L, lj_cf_package_unloadlib, 1); |
581 | lua_setfield(L, -2, "__gc"); | 599 | lua_setfield(L, -2, "__gc"); |
582 | luaL_register(L, LUA_LOADLIBNAME, package_lib); | 600 | luaL_register(L, LUA_LOADLIBNAME, package_lib); |
583 | lua_pushvalue(L, -1); | 601 | lua_copy(L, -1, LUA_ENVIRONINDEX); |
584 | lua_replace(L, LUA_ENVIRONINDEX); | ||
585 | lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0); | 602 | lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0); |
586 | for (i = 0; package_loaders[i] != NULL; i++) { | 603 | for (i = 0; package_loaders[i] != NULL; i++) { |
587 | lj_lib_pushcf(L, package_loaders[i], 1); | 604 | lj_lib_pushcf(L, package_loaders[i], 1); |
588 | lua_rawseti(L, -2, i+1); | 605 | lua_rawseti(L, -2, i+1); |
589 | } | 606 | } |
607 | #if LJ_52 | ||
608 | lua_pushvalue(L, -1); | ||
609 | lua_setfield(L, -3, "searchers"); | ||
610 | #endif | ||
590 | lua_setfield(L, -2, "loaders"); | 611 | lua_setfield(L, -2, "loaders"); |
591 | lua_getfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); | 612 | lua_getfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); |
592 | noenv = lua_toboolean(L, -1); | 613 | noenv = lua_toboolean(L, -1); |
diff --git a/src/lib_string.c b/src/lib_string.c index 60bb8088..79aeddfc 100644 --- a/src/lib_string.c +++ b/src/lib_string.c | |||
@@ -6,8 +6,6 @@ | |||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | 6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <stdio.h> | ||
10 | |||
11 | #define lib_string_c | 9 | #define lib_string_c |
12 | #define LUA_LIB | 10 | #define LUA_LIB |
13 | 11 | ||
@@ -18,6 +16,7 @@ | |||
18 | #include "lj_obj.h" | 16 | #include "lj_obj.h" |
19 | #include "lj_gc.h" | 17 | #include "lj_gc.h" |
20 | #include "lj_err.h" | 18 | #include "lj_err.h" |
19 | #include "lj_buf.h" | ||
21 | #include "lj_str.h" | 20 | #include "lj_str.h" |
22 | #include "lj_tab.h" | 21 | #include "lj_tab.h" |
23 | #include "lj_meta.h" | 22 | #include "lj_meta.h" |
@@ -25,17 +24,19 @@ | |||
25 | #include "lj_ff.h" | 24 | #include "lj_ff.h" |
26 | #include "lj_bcdump.h" | 25 | #include "lj_bcdump.h" |
27 | #include "lj_char.h" | 26 | #include "lj_char.h" |
27 | #include "lj_strfmt.h" | ||
28 | #include "lj_lib.h" | 28 | #include "lj_lib.h" |
29 | 29 | ||
30 | /* ------------------------------------------------------------------------ */ | 30 | /* ------------------------------------------------------------------------ */ |
31 | 31 | ||
32 | #define LJLIB_MODULE_string | 32 | #define LJLIB_MODULE_string |
33 | 33 | ||
34 | LJLIB_ASM(string_len) LJLIB_REC(.) | 34 | LJLIB_LUA(string_len) /* |
35 | { | 35 | function(s) |
36 | lj_lib_checkstr(L, 1); | 36 | CHECK_str(s) |
37 | return FFH_RETRY; | 37 | return #s |
38 | } | 38 | end |
39 | */ | ||
39 | 40 | ||
40 | LJLIB_ASM(string_byte) LJLIB_REC(string_range 0) | 41 | LJLIB_ASM(string_byte) LJLIB_REC(string_range 0) |
41 | { | 42 | { |
@@ -57,21 +58,21 @@ LJLIB_ASM(string_byte) LJLIB_REC(string_range 0) | |||
57 | lj_state_checkstack(L, (MSize)n); | 58 | lj_state_checkstack(L, (MSize)n); |
58 | p = (const unsigned char *)strdata(s) + start; | 59 | p = (const unsigned char *)strdata(s) + start; |
59 | for (i = 0; i < n; i++) | 60 | for (i = 0; i < n; i++) |
60 | setintV(L->base + i-1, p[i]); | 61 | setintV(L->base + i-1-LJ_FR2, p[i]); |
61 | return FFH_RES(n); | 62 | return FFH_RES(n); |
62 | } | 63 | } |
63 | 64 | ||
64 | LJLIB_ASM(string_char) | 65 | LJLIB_ASM(string_char) LJLIB_REC(.) |
65 | { | 66 | { |
66 | int i, nargs = (int)(L->top - L->base); | 67 | int i, nargs = (int)(L->top - L->base); |
67 | char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (MSize)nargs); | 68 | char *buf = lj_buf_tmp(L, (MSize)nargs); |
68 | for (i = 1; i <= nargs; i++) { | 69 | for (i = 1; i <= nargs; i++) { |
69 | int32_t k = lj_lib_checkint(L, i); | 70 | int32_t k = lj_lib_checkint(L, i); |
70 | if (!checku8(k)) | 71 | if (!checku8(k)) |
71 | lj_err_arg(L, i, LJ_ERR_BADVAL); | 72 | lj_err_arg(L, i, LJ_ERR_BADVAL); |
72 | buf[i-1] = (char)k; | 73 | buf[i-1] = (char)k; |
73 | } | 74 | } |
74 | setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs)); | 75 | setstrV(L, L->base-1-LJ_FR2, lj_str_new(L, buf, (size_t)nargs)); |
75 | return FFH_RES(1); | 76 | return FFH_RES(1); |
76 | } | 77 | } |
77 | 78 | ||
@@ -83,68 +84,38 @@ LJLIB_ASM(string_sub) LJLIB_REC(string_range 1) | |||
83 | return FFH_RETRY; | 84 | return FFH_RETRY; |
84 | } | 85 | } |
85 | 86 | ||
86 | LJLIB_ASM(string_rep) | 87 | LJLIB_CF(string_rep) LJLIB_REC(.) |
87 | { | 88 | { |
88 | GCstr *s = lj_lib_checkstr(L, 1); | 89 | GCstr *s = lj_lib_checkstr(L, 1); |
89 | int32_t k = lj_lib_checkint(L, 2); | 90 | int32_t rep = lj_lib_checkint(L, 2); |
90 | GCstr *sep = lj_lib_optstr(L, 3); | 91 | GCstr *sep = lj_lib_optstr(L, 3); |
91 | int32_t len = (int32_t)s->len; | 92 | SBuf *sb = lj_buf_tmp_(L); |
92 | global_State *g = G(L); | 93 | if (sep && rep > 1) { |
93 | int64_t tlen; | 94 | GCstr *s2 = lj_buf_cat2str(L, sep, s); |
94 | const char *src; | 95 | lj_buf_reset(sb); |
95 | char *buf; | 96 | lj_buf_putstr(sb, s); |
96 | if (k <= 0) { | 97 | s = s2; |
97 | empty: | 98 | rep--; |
98 | setstrV(L, L->base-1, &g->strempty); | ||
99 | return FFH_RES(1); | ||
100 | } | ||
101 | if (sep) { | ||
102 | tlen = (int64_t)len + sep->len; | ||
103 | if (tlen > LJ_MAX_STR) | ||
104 | lj_err_caller(L, LJ_ERR_STROV); | ||
105 | tlen *= k; | ||
106 | if (tlen > LJ_MAX_STR) | ||
107 | lj_err_caller(L, LJ_ERR_STROV); | ||
108 | } else { | ||
109 | tlen = (int64_t)k * len; | ||
110 | if (tlen > LJ_MAX_STR) | ||
111 | lj_err_caller(L, LJ_ERR_STROV); | ||
112 | } | ||
113 | if (tlen == 0) goto empty; | ||
114 | buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen); | ||
115 | src = strdata(s); | ||
116 | if (sep) { | ||
117 | tlen -= sep->len; /* Ignore trailing separator. */ | ||
118 | if (k > 1) { /* Paste one string and one separator. */ | ||
119 | int32_t i; | ||
120 | i = 0; while (i < len) *buf++ = src[i++]; | ||
121 | src = strdata(sep); len = sep->len; | ||
122 | i = 0; while (i < len) *buf++ = src[i++]; | ||
123 | src = g->tmpbuf.buf; len += s->len; k--; /* Now copy that k-1 times. */ | ||
124 | } | ||
125 | } | 99 | } |
126 | do { | 100 | sb = lj_buf_putstr_rep(sb, s, rep); |
127 | int32_t i = 0; | 101 | setstrV(L, L->top-1, lj_buf_str(L, sb)); |
128 | do { *buf++ = src[i++]; } while (i < len); | 102 | lj_gc_check(L); |
129 | } while (--k > 0); | 103 | return 1; |
130 | setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen)); | ||
131 | return FFH_RES(1); | ||
132 | } | 104 | } |
133 | 105 | ||
134 | LJLIB_ASM(string_reverse) | 106 | LJLIB_ASM(string_reverse) LJLIB_REC(string_op IRCALL_lj_buf_putstr_reverse) |
135 | { | 107 | { |
136 | GCstr *s = lj_lib_checkstr(L, 1); | 108 | lj_lib_checkstr(L, 1); |
137 | lj_str_needbuf(L, &G(L)->tmpbuf, s->len); | ||
138 | return FFH_RETRY; | 109 | return FFH_RETRY; |
139 | } | 110 | } |
140 | LJLIB_ASM_(string_lower) | 111 | LJLIB_ASM_(string_lower) LJLIB_REC(string_op IRCALL_lj_buf_putstr_lower) |
141 | LJLIB_ASM_(string_upper) | 112 | LJLIB_ASM_(string_upper) LJLIB_REC(string_op IRCALL_lj_buf_putstr_upper) |
142 | 113 | ||
143 | /* ------------------------------------------------------------------------ */ | 114 | /* ------------------------------------------------------------------------ */ |
144 | 115 | ||
145 | static int writer_buf(lua_State *L, const void *p, size_t size, void *b) | 116 | static int writer_buf(lua_State *L, const void *p, size_t size, void *sb) |
146 | { | 117 | { |
147 | luaL_addlstring((luaL_Buffer *)b, (const char *)p, size); | 118 | lj_buf_putmem((SBuf *)sb, p, (MSize)size); |
148 | UNUSED(L); | 119 | UNUSED(L); |
149 | return 0; | 120 | return 0; |
150 | } | 121 | } |
@@ -153,19 +124,19 @@ LJLIB_CF(string_dump) | |||
153 | { | 124 | { |
154 | GCfunc *fn = lj_lib_checkfunc(L, 1); | 125 | GCfunc *fn = lj_lib_checkfunc(L, 1); |
155 | int strip = L->base+1 < L->top && tvistruecond(L->base+1); | 126 | int strip = L->base+1 < L->top && tvistruecond(L->base+1); |
156 | luaL_Buffer b; | 127 | SBuf *sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */ |
157 | L->top = L->base+1; | 128 | L->top = L->base+1; |
158 | luaL_buffinit(L, &b); | 129 | if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip)) |
159 | if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip)) | ||
160 | lj_err_caller(L, LJ_ERR_STRDUMP); | 130 | lj_err_caller(L, LJ_ERR_STRDUMP); |
161 | luaL_pushresult(&b); | 131 | setstrV(L, L->top-1, lj_buf_str(L, sb)); |
132 | lj_gc_check(L); | ||
162 | return 1; | 133 | return 1; |
163 | } | 134 | } |
164 | 135 | ||
165 | /* ------------------------------------------------------------------------ */ | 136 | /* ------------------------------------------------------------------------ */ |
166 | 137 | ||
167 | /* macro to `unsign' a character */ | 138 | /* macro to `unsign' a character */ |
168 | #define uchar(c) ((unsigned char)(c)) | 139 | #define uchar(c) ((unsigned char)(c)) |
169 | 140 | ||
170 | #define CAP_UNFINISHED (-1) | 141 | #define CAP_UNFINISHED (-1) |
171 | #define CAP_POSITION (-2) | 142 | #define CAP_POSITION (-2) |
@@ -183,7 +154,6 @@ typedef struct MatchState { | |||
183 | } MatchState; | 154 | } MatchState; |
184 | 155 | ||
185 | #define L_ESC '%' | 156 | #define L_ESC '%' |
186 | #define SPECIALS "^$*+?.([%-" | ||
187 | 157 | ||
188 | static int check_capture(MatchState *ms, int l) | 158 | static int check_capture(MatchState *ms, int l) |
189 | { | 159 | { |
@@ -450,30 +420,6 @@ static const char *match(MatchState *ms, const char *s, const char *p) | |||
450 | return s; | 420 | return s; |
451 | } | 421 | } |
452 | 422 | ||
453 | static const char *lmemfind(const char *s1, size_t l1, | ||
454 | const char *s2, size_t l2) | ||
455 | { | ||
456 | if (l2 == 0) { | ||
457 | return s1; /* empty strings are everywhere */ | ||
458 | } else if (l2 > l1) { | ||
459 | return NULL; /* avoids a negative `l1' */ | ||
460 | } else { | ||
461 | const char *init; /* to search for a `*s2' inside `s1' */ | ||
462 | l2--; /* 1st char will be checked by `memchr' */ | ||
463 | l1 = l1-l2; /* `s2' cannot be found after that */ | ||
464 | while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) { | ||
465 | init++; /* 1st char is already checked */ | ||
466 | if (memcmp(init, s2+1, l2) == 0) { | ||
467 | return init-1; | ||
468 | } else { /* correct `l1' and `s1' to try again */ | ||
469 | l1 -= (size_t)(init-s1); | ||
470 | s1 = init; | ||
471 | } | ||
472 | } | ||
473 | return NULL; /* not found */ | ||
474 | } | ||
475 | } | ||
476 | |||
477 | static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) | 423 | static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) |
478 | { | 424 | { |
479 | if (i >= ms->level) { | 425 | if (i >= ms->level) { |
@@ -501,64 +447,60 @@ static int push_captures(MatchState *ms, const char *s, const char *e) | |||
501 | return nlevels; /* number of strings pushed */ | 447 | return nlevels; /* number of strings pushed */ |
502 | } | 448 | } |
503 | 449 | ||
504 | static ptrdiff_t posrelat(ptrdiff_t pos, size_t len) | ||
505 | { | ||
506 | /* relative string position: negative means back from end */ | ||
507 | if (pos < 0) pos += (ptrdiff_t)len + 1; | ||
508 | return (pos >= 0) ? pos : 0; | ||
509 | } | ||
510 | |||
511 | static int str_find_aux(lua_State *L, int find) | 450 | static int str_find_aux(lua_State *L, int find) |
512 | { | 451 | { |
513 | size_t l1, l2; | 452 | GCstr *s = lj_lib_checkstr(L, 1); |
514 | const char *s = luaL_checklstring(L, 1, &l1); | 453 | GCstr *p = lj_lib_checkstr(L, 2); |
515 | const char *p = luaL_checklstring(L, 2, &l2); | 454 | int32_t start = lj_lib_optint(L, 3, 1); |
516 | ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1; | 455 | MSize st; |
517 | if (init < 0) { | 456 | if (start < 0) start += (int32_t)s->len; else start--; |
518 | init = 0; | 457 | if (start < 0) start = 0; |
519 | } else if ((size_t)(init) > l1) { | 458 | st = (MSize)start; |
459 | if (st > s->len) { | ||
520 | #if LJ_52 | 460 | #if LJ_52 |
521 | setnilV(L->top-1); | 461 | setnilV(L->top-1); |
522 | return 1; | 462 | return 1; |
523 | #else | 463 | #else |
524 | init = (ptrdiff_t)l1; | 464 | st = s->len; |
525 | #endif | 465 | #endif |
526 | } | 466 | } |
527 | if (find && (lua_toboolean(L, 4) || /* explicit request? */ | 467 | if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) || |
528 | strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */ | 468 | !lj_str_haspattern(p))) { /* Search for fixed string. */ |
529 | /* do a plain search */ | 469 | const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len); |
530 | const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2); | 470 | if (q) { |
531 | if (s2) { | 471 | setintV(L->top-2, (int32_t)(q-strdata(s)) + 1); |
532 | lua_pushinteger(L, s2-s+1); | 472 | setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len); |
533 | lua_pushinteger(L, s2-s+(ptrdiff_t)l2); | ||
534 | return 2; | 473 | return 2; |
535 | } | 474 | } |
536 | } else { | 475 | } else { /* Search for pattern. */ |
537 | MatchState ms; | 476 | MatchState ms; |
538 | int anchor = (*p == '^') ? (p++, 1) : 0; | 477 | const char *pstr = strdata(p); |
539 | const char *s1=s+init; | 478 | const char *sstr = strdata(s) + st; |
479 | int anchor = 0; | ||
480 | if (*pstr == '^') { pstr++; anchor = 1; } | ||
540 | ms.L = L; | 481 | ms.L = L; |
541 | ms.src_init = s; | 482 | ms.src_init = strdata(s); |
542 | ms.src_end = s+l1; | 483 | ms.src_end = strdata(s) + s->len; |
543 | do { | 484 | do { /* Loop through string and try to match the pattern. */ |
544 | const char *res; | 485 | const char *q; |
545 | ms.level = ms.depth = 0; | 486 | ms.level = ms.depth = 0; |
546 | if ((res=match(&ms, s1, p)) != NULL) { | 487 | q = match(&ms, sstr, pstr); |
488 | if (q) { | ||
547 | if (find) { | 489 | if (find) { |
548 | lua_pushinteger(L, s1-s+1); /* start */ | 490 | setintV(L->top++, (int32_t)(sstr-(strdata(s)-1))); |
549 | lua_pushinteger(L, res-s); /* end */ | 491 | setintV(L->top++, (int32_t)(q-strdata(s))); |
550 | return push_captures(&ms, NULL, 0) + 2; | 492 | return push_captures(&ms, NULL, NULL) + 2; |
551 | } else { | 493 | } else { |
552 | return push_captures(&ms, s1, res); | 494 | return push_captures(&ms, sstr, q); |
553 | } | 495 | } |
554 | } | 496 | } |
555 | } while (s1++ < ms.src_end && !anchor); | 497 | } while (sstr++ < ms.src_end && !anchor); |
556 | } | 498 | } |
557 | lua_pushnil(L); /* not found */ | 499 | setnilV(L->top-1); /* Not found. */ |
558 | return 1; | 500 | return 1; |
559 | } | 501 | } |
560 | 502 | ||
561 | LJLIB_CF(string_find) | 503 | LJLIB_CF(string_find) LJLIB_REC(.) |
562 | { | 504 | { |
563 | return str_find_aux(L, 1); | 505 | return str_find_aux(L, 1); |
564 | } | 506 | } |
@@ -698,222 +640,16 @@ LJLIB_CF(string_gsub) | |||
698 | 640 | ||
699 | /* ------------------------------------------------------------------------ */ | 641 | /* ------------------------------------------------------------------------ */ |
700 | 642 | ||
701 | /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ | 643 | LJLIB_CF(string_format) LJLIB_REC(.) |
702 | #define MAX_FMTITEM 512 | ||
703 | /* valid flags in a format specification */ | ||
704 | #define FMT_FLAGS "-+ #0" | ||
705 | /* | ||
706 | ** maximum size of each format specification (such as '%-099.99d') | ||
707 | ** (+10 accounts for %99.99x plus margin of error) | ||
708 | */ | ||
709 | #define MAX_FMTSPEC (sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10) | ||
710 | |||
711 | static void addquoted(lua_State *L, luaL_Buffer *b, int arg) | ||
712 | { | ||
713 | GCstr *str = lj_lib_checkstr(L, arg); | ||
714 | int32_t len = (int32_t)str->len; | ||
715 | const char *s = strdata(str); | ||
716 | luaL_addchar(b, '"'); | ||
717 | while (len--) { | ||
718 | uint32_t c = uchar(*s); | ||
719 | if (c == '"' || c == '\\' || c == '\n') { | ||
720 | luaL_addchar(b, '\\'); | ||
721 | } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ | ||
722 | uint32_t d; | ||
723 | luaL_addchar(b, '\\'); | ||
724 | if (c >= 100 || lj_char_isdigit(uchar(s[1]))) { | ||
725 | luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100; | ||
726 | goto tens; | ||
727 | } else if (c >= 10) { | ||
728 | tens: | ||
729 | d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d); | ||
730 | } | ||
731 | c += '0'; | ||
732 | } | ||
733 | luaL_addchar(b, c); | ||
734 | s++; | ||
735 | } | ||
736 | luaL_addchar(b, '"'); | ||
737 | } | ||
738 | |||
739 | static const char *scanformat(lua_State *L, const char *strfrmt, char *form) | ||
740 | { | ||
741 | const char *p = strfrmt; | ||
742 | while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++; /* skip flags */ | ||
743 | if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS)) | ||
744 | lj_err_caller(L, LJ_ERR_STRFMTR); | ||
745 | if (lj_char_isdigit(uchar(*p))) p++; /* skip width */ | ||
746 | if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */ | ||
747 | if (*p == '.') { | ||
748 | p++; | ||
749 | if (lj_char_isdigit(uchar(*p))) p++; /* skip precision */ | ||
750 | if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */ | ||
751 | } | ||
752 | if (lj_char_isdigit(uchar(*p))) | ||
753 | lj_err_caller(L, LJ_ERR_STRFMTW); | ||
754 | *(form++) = '%'; | ||
755 | strncpy(form, strfrmt, (size_t)(p - strfrmt + 1)); | ||
756 | form += p - strfrmt + 1; | ||
757 | *form = '\0'; | ||
758 | return p; | ||
759 | } | ||
760 | |||
761 | static void addintlen(char *form) | ||
762 | { | ||
763 | size_t l = strlen(form); | ||
764 | char spec = form[l - 1]; | ||
765 | strcpy(form + l - 1, LUA_INTFRMLEN); | ||
766 | form[l + sizeof(LUA_INTFRMLEN) - 2] = spec; | ||
767 | form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0'; | ||
768 | } | ||
769 | |||
770 | static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg) | ||
771 | { | ||
772 | if (sizeof(LUA_INTFRM_T) == 4) { | ||
773 | return (LUA_INTFRM_T)lj_lib_checkbit(L, arg); | ||
774 | } else { | ||
775 | cTValue *o; | ||
776 | lj_lib_checknumber(L, arg); | ||
777 | o = L->base+arg-1; | ||
778 | if (tvisint(o)) | ||
779 | return (LUA_INTFRM_T)intV(o); | ||
780 | else | ||
781 | return (LUA_INTFRM_T)numV(o); | ||
782 | } | ||
783 | } | ||
784 | |||
785 | static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg) | ||
786 | { | 644 | { |
787 | if (sizeof(LUA_INTFRM_T) == 4) { | 645 | int retry = 0; |
788 | return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg); | 646 | SBuf *sb; |
789 | } else { | 647 | do { |
790 | cTValue *o; | 648 | sb = lj_buf_tmp_(L); |
791 | lj_lib_checknumber(L, arg); | 649 | retry = lj_strfmt_putarg(L, sb, 1, -retry); |
792 | o = L->base+arg-1; | 650 | } while (retry > 0); |
793 | if (tvisint(o)) | 651 | setstrV(L, L->top-1, lj_buf_str(L, sb)); |
794 | return (unsigned LUA_INTFRM_T)intV(o); | 652 | lj_gc_check(L); |
795 | else if ((int32_t)o->u32.hi < 0) | ||
796 | return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o); | ||
797 | else | ||
798 | return (unsigned LUA_INTFRM_T)numV(o); | ||
799 | } | ||
800 | } | ||
801 | |||
802 | static GCstr *meta_tostring(lua_State *L, int arg) | ||
803 | { | ||
804 | TValue *o = L->base+arg-1; | ||
805 | cTValue *mo; | ||
806 | lua_assert(o < L->top); /* Caller already checks for existence. */ | ||
807 | if (LJ_LIKELY(tvisstr(o))) | ||
808 | return strV(o); | ||
809 | if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { | ||
810 | copyTV(L, L->top++, mo); | ||
811 | copyTV(L, L->top++, o); | ||
812 | lua_call(L, 1, 1); | ||
813 | L->top--; | ||
814 | if (tvisstr(L->top)) | ||
815 | return strV(L->top); | ||
816 | o = L->base+arg-1; | ||
817 | copyTV(L, o, L->top); | ||
818 | } | ||
819 | if (tvisnumber(o)) { | ||
820 | return lj_str_fromnumber(L, o); | ||
821 | } else if (tvisnil(o)) { | ||
822 | return lj_str_newlit(L, "nil"); | ||
823 | } else if (tvisfalse(o)) { | ||
824 | return lj_str_newlit(L, "false"); | ||
825 | } else if (tvistrue(o)) { | ||
826 | return lj_str_newlit(L, "true"); | ||
827 | } else { | ||
828 | if (tvisfunc(o) && isffunc(funcV(o))) | ||
829 | lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid); | ||
830 | else | ||
831 | lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg)); | ||
832 | L->top--; | ||
833 | return strV(L->top); | ||
834 | } | ||
835 | } | ||
836 | |||
837 | LJLIB_CF(string_format) | ||
838 | { | ||
839 | int arg = 1, top = (int)(L->top - L->base); | ||
840 | GCstr *fmt = lj_lib_checkstr(L, arg); | ||
841 | const char *strfrmt = strdata(fmt); | ||
842 | const char *strfrmt_end = strfrmt + fmt->len; | ||
843 | luaL_Buffer b; | ||
844 | luaL_buffinit(L, &b); | ||
845 | while (strfrmt < strfrmt_end) { | ||
846 | if (*strfrmt != L_ESC) { | ||
847 | luaL_addchar(&b, *strfrmt++); | ||
848 | } else if (*++strfrmt == L_ESC) { | ||
849 | luaL_addchar(&b, *strfrmt++); /* %% */ | ||
850 | } else { /* format item */ | ||
851 | char form[MAX_FMTSPEC]; /* to store the format (`%...') */ | ||
852 | char buff[MAX_FMTITEM]; /* to store the formatted item */ | ||
853 | int n = 0; | ||
854 | if (++arg > top) | ||
855 | luaL_argerror(L, arg, lj_obj_typename[0]); | ||
856 | strfrmt = scanformat(L, strfrmt, form); | ||
857 | switch (*strfrmt++) { | ||
858 | case 'c': | ||
859 | n = sprintf(buff, form, lj_lib_checkint(L, arg)); | ||
860 | break; | ||
861 | case 'd': case 'i': | ||
862 | addintlen(form); | ||
863 | n = sprintf(buff, form, num2intfrm(L, arg)); | ||
864 | break; | ||
865 | case 'o': case 'u': case 'x': case 'X': | ||
866 | addintlen(form); | ||
867 | n = sprintf(buff, form, num2uintfrm(L, arg)); | ||
868 | break; | ||
869 | case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': { | ||
870 | TValue tv; | ||
871 | tv.n = lj_lib_checknum(L, arg); | ||
872 | if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { | ||
873 | /* Canonicalize output of non-finite values. */ | ||
874 | char *p, nbuf[LJ_STR_NUMBUF]; | ||
875 | size_t len = lj_str_bufnum(nbuf, &tv); | ||
876 | if (strfrmt[-1] < 'a') { | ||
877 | nbuf[len-3] = nbuf[len-3] - 0x20; | ||
878 | nbuf[len-2] = nbuf[len-2] - 0x20; | ||
879 | nbuf[len-1] = nbuf[len-1] - 0x20; | ||
880 | } | ||
881 | nbuf[len] = '\0'; | ||
882 | for (p = form; *p < 'A' && *p != '.'; p++) ; | ||
883 | *p++ = 's'; *p = '\0'; | ||
884 | n = sprintf(buff, form, nbuf); | ||
885 | break; | ||
886 | } | ||
887 | n = sprintf(buff, form, (double)tv.n); | ||
888 | break; | ||
889 | } | ||
890 | case 'q': | ||
891 | addquoted(L, &b, arg); | ||
892 | continue; | ||
893 | case 'p': | ||
894 | lj_str_pushf(L, "%p", lua_topointer(L, arg)); | ||
895 | luaL_addvalue(&b); | ||
896 | continue; | ||
897 | case 's': { | ||
898 | GCstr *str = meta_tostring(L, arg); | ||
899 | if (!strchr(form, '.') && str->len >= 100) { | ||
900 | /* no precision and string is too long to be formatted; | ||
901 | keep original string */ | ||
902 | setstrV(L, L->top++, str); | ||
903 | luaL_addvalue(&b); | ||
904 | continue; | ||
905 | } | ||
906 | n = sprintf(buff, form, strdata(str)); | ||
907 | break; | ||
908 | } | ||
909 | default: | ||
910 | lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1)); | ||
911 | break; | ||
912 | } | ||
913 | luaL_addlstring(&b, buff, n); | ||
914 | } | ||
915 | } | ||
916 | luaL_pushresult(&b); | ||
917 | return 1; | 653 | return 1; |
918 | } | 654 | } |
919 | 655 | ||
@@ -926,16 +662,15 @@ LUALIB_API int luaopen_string(lua_State *L) | |||
926 | GCtab *mt; | 662 | GCtab *mt; |
927 | global_State *g; | 663 | global_State *g; |
928 | LJ_LIB_REG(L, LUA_STRLIBNAME, string); | 664 | LJ_LIB_REG(L, LUA_STRLIBNAME, string); |
929 | #if defined(LUA_COMPAT_GFIND) && !LJ_52 | ||
930 | lua_getfield(L, -1, "gmatch"); | ||
931 | lua_setfield(L, -2, "gfind"); | ||
932 | #endif | ||
933 | mt = lj_tab_new(L, 0, 1); | 665 | mt = lj_tab_new(L, 0, 1); |
934 | /* NOBARRIER: basemt is a GC root. */ | 666 | /* NOBARRIER: basemt is a GC root. */ |
935 | g = G(L); | 667 | g = G(L); |
936 | setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt)); | 668 | setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt)); |
937 | settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1)); | 669 | settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1)); |
938 | mt->nomm = (uint8_t)(~(1u<<MM_index)); | 670 | mt->nomm = (uint8_t)(~(1u<<MM_index)); |
671 | #if LJ_HASBUFFER | ||
672 | lj_lib_prereg(L, LUA_STRLIBNAME ".buffer", luaopen_string_buffer, tabV(L->top-1)); | ||
673 | #endif | ||
939 | return 1; | 674 | return 1; |
940 | } | 675 | } |
941 | 676 | ||
diff --git a/src/lib_table.c b/src/lib_table.c index dc89116f..a723326a 100644 --- a/src/lib_table.c +++ b/src/lib_table.c | |||
@@ -16,57 +16,43 @@ | |||
16 | #include "lj_obj.h" | 16 | #include "lj_obj.h" |
17 | #include "lj_gc.h" | 17 | #include "lj_gc.h" |
18 | #include "lj_err.h" | 18 | #include "lj_err.h" |
19 | #include "lj_buf.h" | ||
19 | #include "lj_tab.h" | 20 | #include "lj_tab.h" |
21 | #include "lj_ff.h" | ||
20 | #include "lj_lib.h" | 22 | #include "lj_lib.h" |
21 | 23 | ||
22 | /* ------------------------------------------------------------------------ */ | 24 | /* ------------------------------------------------------------------------ */ |
23 | 25 | ||
24 | #define LJLIB_MODULE_table | 26 | #define LJLIB_MODULE_table |
25 | 27 | ||
26 | LJLIB_CF(table_foreachi) | 28 | LJLIB_LUA(table_foreachi) /* |
27 | { | 29 | function(t, f) |
28 | GCtab *t = lj_lib_checktab(L, 1); | 30 | CHECK_tab(t) |
29 | GCfunc *func = lj_lib_checkfunc(L, 2); | 31 | CHECK_func(f) |
30 | MSize i, n = lj_tab_len(t); | 32 | for i=1,#t do |
31 | for (i = 1; i <= n; i++) { | 33 | local r = f(i, t[i]) |
32 | cTValue *val; | 34 | if r ~= nil then return r end |
33 | setfuncV(L, L->top, func); | 35 | end |
34 | setintV(L->top+1, i); | 36 | end |
35 | val = lj_tab_getint(t, (int32_t)i); | 37 | */ |
36 | if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); } | ||
37 | L->top += 3; | ||
38 | lua_call(L, 2, 1); | ||
39 | if (!tvisnil(L->top-1)) | ||
40 | return 1; | ||
41 | L->top--; | ||
42 | } | ||
43 | return 0; | ||
44 | } | ||
45 | 38 | ||
46 | LJLIB_CF(table_foreach) | 39 | LJLIB_LUA(table_foreach) /* |
47 | { | 40 | function(t, f) |
48 | GCtab *t = lj_lib_checktab(L, 1); | 41 | CHECK_tab(t) |
49 | GCfunc *func = lj_lib_checkfunc(L, 2); | 42 | CHECK_func(f) |
50 | L->top = L->base+3; | 43 | for k, v in PAIRS(t) do |
51 | setnilV(L->top-1); | 44 | local r = f(k, v) |
52 | while (lj_tab_next(L, t, L->top-1)) { | 45 | if r ~= nil then return r end |
53 | copyTV(L, L->top+2, L->top); | 46 | end |
54 | copyTV(L, L->top+1, L->top-1); | 47 | end |
55 | setfuncV(L, L->top, func); | 48 | */ |
56 | L->top += 3; | ||
57 | lua_call(L, 2, 1); | ||
58 | if (!tvisnil(L->top-1)) | ||
59 | return 1; | ||
60 | L->top--; | ||
61 | } | ||
62 | return 0; | ||
63 | } | ||
64 | 49 | ||
65 | LJLIB_ASM(table_getn) LJLIB_REC(.) | 50 | LJLIB_LUA(table_getn) /* |
66 | { | 51 | function(t) |
67 | lj_lib_checktab(L, 1); | 52 | CHECK_tab(t) |
68 | return FFH_UNREACHABLE; | 53 | return #t |
69 | } | 54 | end |
55 | */ | ||
70 | 56 | ||
71 | LJLIB_CF(table_maxn) | 57 | LJLIB_CF(table_maxn) |
72 | { | 58 | { |
@@ -119,52 +105,67 @@ LJLIB_CF(table_insert) LJLIB_REC(.) | |||
119 | return 0; | 105 | return 0; |
120 | } | 106 | } |
121 | 107 | ||
122 | LJLIB_CF(table_remove) LJLIB_REC(.) | 108 | LJLIB_LUA(table_remove) /* |
123 | { | 109 | function(t, pos) |
124 | GCtab *t = lj_lib_checktab(L, 1); | 110 | CHECK_tab(t) |
125 | int32_t e = (int32_t)lj_tab_len(t); | 111 | local len = #t |
126 | int32_t pos = lj_lib_optint(L, 2, e); | 112 | if pos == nil then |
127 | if (!(1 <= pos && pos <= e)) /* Nothing to remove? */ | 113 | if len ~= 0 then |
128 | return 0; | 114 | local old = t[len] |
129 | lua_rawgeti(L, 1, pos); /* Get previous value. */ | 115 | t[len] = nil |
130 | /* NOBARRIER: This just moves existing elements around. */ | 116 | return old |
131 | for (; pos < e; pos++) { | 117 | end |
132 | cTValue *src = lj_tab_getint(t, pos+1); | 118 | else |
133 | TValue *dst = lj_tab_setint(L, t, pos); | 119 | CHECK_int(pos) |
134 | if (src) { | 120 | if pos >= 1 and pos <= len then |
135 | copyTV(L, dst, src); | 121 | local old = t[pos] |
136 | } else { | 122 | for i=pos+1,len do |
137 | setnilV(dst); | 123 | t[i-1] = t[i] |
138 | } | 124 | end |
139 | } | 125 | t[len] = nil |
140 | setnilV(lj_tab_setint(L, t, e)); /* Remove (last) value. */ | 126 | return old |
141 | return 1; /* Return previous value. */ | 127 | end |
142 | } | 128 | end |
129 | end | ||
130 | */ | ||
131 | |||
132 | LJLIB_LUA(table_move) /* | ||
133 | function(a1, f, e, t, a2) | ||
134 | CHECK_tab(a1) | ||
135 | CHECK_int(f) | ||
136 | CHECK_int(e) | ||
137 | CHECK_int(t) | ||
138 | if a2 == nil then a2 = a1 end | ||
139 | CHECK_tab(a2) | ||
140 | if e >= f then | ||
141 | local d = t - f | ||
142 | if t > e or t <= f or a2 ~= a1 then | ||
143 | for i=f,e do a2[i+d] = a1[i] end | ||
144 | else | ||
145 | for i=e,f,-1 do a2[i+d] = a1[i] end | ||
146 | end | ||
147 | end | ||
148 | return a2 | ||
149 | end | ||
150 | */ | ||
143 | 151 | ||
144 | LJLIB_CF(table_concat) | 152 | LJLIB_CF(table_concat) LJLIB_REC(.) |
145 | { | 153 | { |
146 | luaL_Buffer b; | ||
147 | GCtab *t = lj_lib_checktab(L, 1); | 154 | GCtab *t = lj_lib_checktab(L, 1); |
148 | GCstr *sep = lj_lib_optstr(L, 2); | 155 | GCstr *sep = lj_lib_optstr(L, 2); |
149 | MSize seplen = sep ? sep->len : 0; | ||
150 | int32_t i = lj_lib_optint(L, 3, 1); | 156 | int32_t i = lj_lib_optint(L, 3, 1); |
151 | int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ? | 157 | int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ? |
152 | lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t); | 158 | lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t); |
153 | luaL_buffinit(L, &b); | 159 | SBuf *sb = lj_buf_tmp_(L); |
154 | if (i <= e) { | 160 | SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e); |
155 | for (;;) { | 161 | if (LJ_UNLIKELY(!sbx)) { /* Error: bad element type. */ |
156 | cTValue *o; | 162 | int32_t idx = (int32_t)(intptr_t)sb->w; |
157 | lua_rawgeti(L, 1, i); | 163 | cTValue *o = lj_tab_getint(t, idx); |
158 | o = L->top-1; | 164 | lj_err_callerv(L, LJ_ERR_TABCAT, |
159 | if (!(tvisstr(o) || tvisnumber(o))) | 165 | lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx); |
160 | lj_err_callerv(L, LJ_ERR_TABCAT, lj_typename(o), i); | ||
161 | luaL_addvalue(&b); | ||
162 | if (i++ == e) break; | ||
163 | if (seplen) | ||
164 | luaL_addlstring(&b, strdata(sep), seplen); | ||
165 | } | ||
166 | } | 166 | } |
167 | luaL_pushresult(&b); | 167 | setstrV(L, L->top-1, lj_buf_str(L, sbx)); |
168 | lj_gc_check(L); | ||
168 | return 1; | 169 | return 1; |
169 | } | 170 | } |
170 | 171 | ||
@@ -284,6 +285,30 @@ LJLIB_CF(table_pack) | |||
284 | } | 285 | } |
285 | #endif | 286 | #endif |
286 | 287 | ||
288 | LJLIB_NOREG LJLIB_CF(table_new) LJLIB_REC(.) | ||
289 | { | ||
290 | int32_t a = lj_lib_checkint(L, 1); | ||
291 | int32_t h = lj_lib_checkint(L, 2); | ||
292 | lua_createtable(L, a, h); | ||
293 | return 1; | ||
294 | } | ||
295 | |||
296 | LJLIB_NOREG LJLIB_CF(table_clear) LJLIB_REC(.) | ||
297 | { | ||
298 | lj_tab_clear(lj_lib_checktab(L, 1)); | ||
299 | return 0; | ||
300 | } | ||
301 | |||
302 | static int luaopen_table_new(lua_State *L) | ||
303 | { | ||
304 | return lj_lib_postreg(L, lj_cf_table_new, FF_table_new, "new"); | ||
305 | } | ||
306 | |||
307 | static int luaopen_table_clear(lua_State *L) | ||
308 | { | ||
309 | return lj_lib_postreg(L, lj_cf_table_clear, FF_table_clear, "clear"); | ||
310 | } | ||
311 | |||
287 | /* ------------------------------------------------------------------------ */ | 312 | /* ------------------------------------------------------------------------ */ |
288 | 313 | ||
289 | #include "lj_libdef.h" | 314 | #include "lj_libdef.h" |
@@ -295,6 +320,8 @@ LUALIB_API int luaopen_table(lua_State *L) | |||
295 | lua_getglobal(L, "unpack"); | 320 | lua_getglobal(L, "unpack"); |
296 | lua_setfield(L, -2, "unpack"); | 321 | lua_setfield(L, -2, "unpack"); |
297 | #endif | 322 | #endif |
323 | lj_lib_prereg(L, LUA_TABLIBNAME ".new", luaopen_table_new, tabV(L->top-1)); | ||
324 | lj_lib_prereg(L, LUA_TABLIBNAME ".clear", luaopen_table_clear, tabV(L->top-1)); | ||
298 | return 1; | 325 | return 1; |
299 | } | 326 | } |
300 | 327 | ||
diff --git a/src/lj.supp b/src/lj.supp deleted file mode 100644 index 217f7c89..00000000 --- a/src/lj.supp +++ /dev/null | |||
@@ -1,41 +0,0 @@ | |||
1 | # Valgrind suppression file for LuaJIT 2.0. | ||
2 | { | ||
3 | Optimized string compare | ||
4 | Memcheck:Addr4 | ||
5 | fun:lj_str_cmp | ||
6 | } | ||
7 | { | ||
8 | Optimized string compare | ||
9 | Memcheck:Addr1 | ||
10 | fun:lj_str_cmp | ||
11 | } | ||
12 | { | ||
13 | Optimized string compare | ||
14 | Memcheck:Addr4 | ||
15 | fun:lj_str_new | ||
16 | } | ||
17 | { | ||
18 | Optimized string compare | ||
19 | Memcheck:Addr1 | ||
20 | fun:lj_str_new | ||
21 | } | ||
22 | { | ||
23 | Optimized string compare | ||
24 | Memcheck:Cond | ||
25 | fun:lj_str_new | ||
26 | } | ||
27 | { | ||
28 | Optimized string compare | ||
29 | Memcheck:Addr4 | ||
30 | fun:str_fastcmp | ||
31 | } | ||
32 | { | ||
33 | Optimized string compare | ||
34 | Memcheck:Addr1 | ||
35 | fun:str_fastcmp | ||
36 | } | ||
37 | { | ||
38 | Optimized string compare | ||
39 | Memcheck:Cond | ||
40 | fun:str_fastcmp | ||
41 | } | ||
diff --git a/src/lj_alloc.c b/src/lj_alloc.c index 9adaa0e5..165203fa 100644 --- a/src/lj_alloc.c +++ b/src/lj_alloc.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include "lj_def.h" | 31 | #include "lj_def.h" |
32 | #include "lj_arch.h" | 32 | #include "lj_arch.h" |
33 | #include "lj_alloc.h" | 33 | #include "lj_alloc.h" |
34 | #include "lj_prng.h" | ||
34 | 35 | ||
35 | #ifndef LUAJIT_USE_SYSMALLOC | 36 | #ifndef LUAJIT_USE_SYSMALLOC |
36 | 37 | ||
@@ -72,15 +73,58 @@ | |||
72 | 73 | ||
73 | #define IS_DIRECT_BIT (SIZE_T_ONE) | 74 | #define IS_DIRECT_BIT (SIZE_T_ONE) |
74 | 75 | ||
76 | |||
77 | /* Determine system-specific block allocation method. */ | ||
75 | #if LJ_TARGET_WINDOWS | 78 | #if LJ_TARGET_WINDOWS |
76 | 79 | ||
77 | #define WIN32_LEAN_AND_MEAN | 80 | #define WIN32_LEAN_AND_MEAN |
78 | #include <windows.h> | 81 | #include <windows.h> |
79 | 82 | ||
83 | #define LJ_ALLOC_VIRTUALALLOC 1 | ||
84 | |||
85 | #if LJ_64 && !LJ_GC64 | ||
86 | #define LJ_ALLOC_NTAVM 1 | ||
87 | #endif | ||
88 | |||
89 | #else | ||
90 | |||
91 | #include <errno.h> | ||
92 | /* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */ | ||
93 | #include <sys/mman.h> | ||
94 | |||
95 | #define LJ_ALLOC_MMAP 1 | ||
96 | |||
80 | #if LJ_64 | 97 | #if LJ_64 |
81 | 98 | ||
99 | #define LJ_ALLOC_MMAP_PROBE 1 | ||
100 | |||
101 | #if LJ_GC64 | ||
102 | #define LJ_ALLOC_MBITS 47 /* 128 TB in LJ_GC64 mode. */ | ||
103 | #elif LJ_TARGET_X64 && LJ_HASJIT | ||
104 | /* Due to limitations in the x64 compiler backend. */ | ||
105 | #define LJ_ALLOC_MBITS 31 /* 2 GB on x64 with !LJ_GC64. */ | ||
106 | #else | ||
107 | #define LJ_ALLOC_MBITS 32 /* 4 GB on other archs with !LJ_GC64. */ | ||
108 | #endif | ||
109 | |||
110 | #endif | ||
111 | |||
112 | #if LJ_64 && !LJ_GC64 && defined(MAP_32BIT) | ||
113 | #define LJ_ALLOC_MMAP32 1 | ||
114 | #endif | ||
115 | |||
116 | #if LJ_TARGET_LINUX | ||
117 | #define LJ_ALLOC_MREMAP 1 | ||
118 | #endif | ||
119 | |||
120 | #endif | ||
121 | |||
122 | |||
123 | #if LJ_ALLOC_VIRTUALALLOC | ||
124 | |||
125 | #if LJ_ALLOC_NTAVM | ||
82 | /* Undocumented, but hey, that's what we all love so much about Windows. */ | 126 | /* Undocumented, but hey, that's what we all love so much about Windows. */ |
83 | typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, | 127 | typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG_PTR zbits, |
84 | size_t *size, ULONG alloctype, ULONG prot); | 128 | size_t *size, ULONG alloctype, ULONG prot); |
85 | static PNTAVM ntavm; | 129 | static PNTAVM ntavm; |
86 | 130 | ||
@@ -89,14 +133,15 @@ static PNTAVM ntavm; | |||
89 | */ | 133 | */ |
90 | #define NTAVM_ZEROBITS 1 | 134 | #define NTAVM_ZEROBITS 1 |
91 | 135 | ||
92 | static void INIT_MMAP(void) | 136 | static void init_mmap(void) |
93 | { | 137 | { |
94 | ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"), | 138 | ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"), |
95 | "NtAllocateVirtualMemory"); | 139 | "NtAllocateVirtualMemory"); |
96 | } | 140 | } |
141 | #define INIT_MMAP() init_mmap() | ||
97 | 142 | ||
98 | /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ | 143 | /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ |
99 | static LJ_AINLINE void *CALL_MMAP(size_t size) | 144 | static void *mmap_plain(size_t size) |
100 | { | 145 | { |
101 | DWORD olderr = GetLastError(); | 146 | DWORD olderr = GetLastError(); |
102 | void *ptr = NULL; | 147 | void *ptr = NULL; |
@@ -107,7 +152,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size) | |||
107 | } | 152 | } |
108 | 153 | ||
109 | /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ | 154 | /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ |
110 | static LJ_AINLINE void *DIRECT_MMAP(size_t size) | 155 | static void *direct_mmap(size_t size) |
111 | { | 156 | { |
112 | DWORD olderr = GetLastError(); | 157 | DWORD olderr = GetLastError(); |
113 | void *ptr = NULL; | 158 | void *ptr = NULL; |
@@ -119,31 +164,32 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size) | |||
119 | 164 | ||
120 | #else | 165 | #else |
121 | 166 | ||
122 | #define INIT_MMAP() ((void)0) | ||
123 | |||
124 | /* Win32 MMAP via VirtualAlloc */ | 167 | /* Win32 MMAP via VirtualAlloc */ |
125 | static LJ_AINLINE void *CALL_MMAP(size_t size) | 168 | static void *mmap_plain(size_t size) |
126 | { | 169 | { |
127 | DWORD olderr = GetLastError(); | 170 | DWORD olderr = GetLastError(); |
128 | void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); | 171 | void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); |
129 | SetLastError(olderr); | 172 | SetLastError(olderr); |
130 | return ptr ? ptr : MFAIL; | 173 | return ptr ? ptr : MFAIL; |
131 | } | 174 | } |
132 | 175 | ||
133 | /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ | 176 | /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ |
134 | static LJ_AINLINE void *DIRECT_MMAP(size_t size) | 177 | static void *direct_mmap(size_t size) |
135 | { | 178 | { |
136 | DWORD olderr = GetLastError(); | 179 | DWORD olderr = GetLastError(); |
137 | void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, | 180 | void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, |
138 | PAGE_READWRITE); | 181 | PAGE_READWRITE); |
139 | SetLastError(olderr); | 182 | SetLastError(olderr); |
140 | return ptr ? ptr : MFAIL; | 183 | return ptr ? ptr : MFAIL; |
141 | } | 184 | } |
142 | 185 | ||
143 | #endif | 186 | #endif |
144 | 187 | ||
188 | #define CALL_MMAP(prng, size) mmap_plain(size) | ||
189 | #define DIRECT_MMAP(prng, size) direct_mmap(size) | ||
190 | |||
145 | /* This function supports releasing coalesed segments */ | 191 | /* This function supports releasing coalesed segments */ |
146 | static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) | 192 | static int CALL_MUNMAP(void *ptr, size_t size) |
147 | { | 193 | { |
148 | DWORD olderr = GetLastError(); | 194 | DWORD olderr = GetLastError(); |
149 | MEMORY_BASIC_INFORMATION minfo; | 195 | MEMORY_BASIC_INFORMATION minfo; |
@@ -163,10 +209,7 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) | |||
163 | return 0; | 209 | return 0; |
164 | } | 210 | } |
165 | 211 | ||
166 | #else | 212 | #elif LJ_ALLOC_MMAP |
167 | |||
168 | #include <errno.h> | ||
169 | #include <sys/mman.h> | ||
170 | 213 | ||
171 | #define MMAP_PROT (PROT_READ|PROT_WRITE) | 214 | #define MMAP_PROT (PROT_READ|PROT_WRITE) |
172 | #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) | 215 | #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) |
@@ -174,105 +217,134 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) | |||
174 | #endif | 217 | #endif |
175 | #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) | 218 | #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) |
176 | 219 | ||
177 | #if LJ_64 | 220 | #if LJ_ALLOC_MMAP_PROBE |
178 | /* 64 bit mode needs special support for allocating memory in the lower 2GB. */ | ||
179 | 221 | ||
180 | #if defined(MAP_32BIT) | 222 | #ifdef MAP_TRYFIXED |
181 | 223 | #define MMAP_FLAGS_PROBE (MMAP_FLAGS|MAP_TRYFIXED) | |
182 | #if defined(__sun__) | ||
183 | #define MMAP_REGION_START ((uintptr_t)0x1000) | ||
184 | #else | 224 | #else |
185 | /* Actually this only gives us max. 1GB in current Linux kernels. */ | 225 | #define MMAP_FLAGS_PROBE MMAP_FLAGS |
186 | #define MMAP_REGION_START ((uintptr_t)0) | ||
187 | #endif | 226 | #endif |
188 | 227 | ||
189 | static LJ_AINLINE void *CALL_MMAP(size_t size) | 228 | #define LJ_ALLOC_MMAP_PROBE_MAX 30 |
229 | #define LJ_ALLOC_MMAP_PROBE_LINEAR 5 | ||
230 | |||
231 | #define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000) | ||
232 | |||
233 | static void *mmap_probe(PRNGState *rs, size_t size) | ||
190 | { | 234 | { |
235 | /* Hint for next allocation. Doesn't need to be thread-safe. */ | ||
236 | static uintptr_t hint_addr = 0; | ||
191 | int olderr = errno; | 237 | int olderr = errno; |
192 | void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0); | 238 | int retry; |
239 | for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) { | ||
240 | void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0); | ||
241 | uintptr_t addr = (uintptr_t)p; | ||
242 | if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER && | ||
243 | ((addr + size) >> LJ_ALLOC_MBITS) == 0) { | ||
244 | /* We got a suitable address. Bump the hint address. */ | ||
245 | hint_addr = addr + size; | ||
246 | errno = olderr; | ||
247 | return p; | ||
248 | } | ||
249 | if (p != MFAIL) { | ||
250 | munmap(p, size); | ||
251 | } else if (errno == ENOMEM) { | ||
252 | return MFAIL; | ||
253 | } | ||
254 | if (hint_addr) { | ||
255 | /* First, try linear probing. */ | ||
256 | if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) { | ||
257 | hint_addr += 0x1000000; | ||
258 | if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0) | ||
259 | hint_addr = 0; | ||
260 | continue; | ||
261 | } else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) { | ||
262 | /* Next, try a no-hint probe to get back an ASLR address. */ | ||
263 | hint_addr = 0; | ||
264 | continue; | ||
265 | } | ||
266 | } | ||
267 | /* Finally, try pseudo-random probing. */ | ||
268 | do { | ||
269 | hint_addr = lj_prng_u64(rs) & (((uintptr_t)1<<LJ_ALLOC_MBITS)-LJ_PAGESIZE); | ||
270 | } while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER); | ||
271 | } | ||
193 | errno = olderr; | 272 | errno = olderr; |
194 | return ptr; | 273 | return MFAIL; |
195 | } | 274 | } |
196 | 275 | ||
197 | #elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || LJ_TARGET_CYGWIN | 276 | #endif |
277 | |||
278 | #if LJ_ALLOC_MMAP32 | ||
198 | 279 | ||
199 | /* OSX and FreeBSD mmap() use a naive first-fit linear search. | 280 | #if LJ_TARGET_SOLARIS |
200 | ** That's perfect for us. Except that -pagezero_size must be set for OSX, | 281 | #define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000) |
201 | ** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs | ||
202 | ** to be reduced to 250MB on FreeBSD. | ||
203 | */ | ||
204 | #if LJ_TARGET_OSX || defined(__DragonFly__) | ||
205 | #define MMAP_REGION_START ((uintptr_t)0x10000) | ||
206 | #elif LJ_TARGET_PS4 | ||
207 | #define MMAP_REGION_START ((uintptr_t)0x4000) | ||
208 | #else | 282 | #else |
209 | #define MMAP_REGION_START ((uintptr_t)0x10000000) | 283 | #define LJ_ALLOC_MMAP32_START ((uintptr_t)0) |
210 | #endif | 284 | #endif |
211 | #define MMAP_REGION_END ((uintptr_t)0x80000000) | ||
212 | 285 | ||
213 | #if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 | 286 | #if LJ_ALLOC_MMAP_PROBE |
214 | #include <sys/resource.h> | 287 | static void *mmap_map32(PRNGState *rs, size_t size) |
288 | #else | ||
289 | static void *mmap_map32(size_t size) | ||
215 | #endif | 290 | #endif |
216 | |||
217 | static LJ_AINLINE void *CALL_MMAP(size_t size) | ||
218 | { | 291 | { |
219 | int olderr = errno; | 292 | #if LJ_ALLOC_MMAP_PROBE |
220 | /* Hint for next allocation. Doesn't need to be thread-safe. */ | 293 | static int fallback = 0; |
221 | static uintptr_t alloc_hint = MMAP_REGION_START; | 294 | if (fallback) |
222 | int retry = 0; | 295 | return mmap_probe(rs, size); |
223 | #if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 | ||
224 | static int rlimit_modified = 0; | ||
225 | if (LJ_UNLIKELY(rlimit_modified == 0)) { | ||
226 | struct rlimit rlim; | ||
227 | rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START; | ||
228 | setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail below. */ | ||
229 | rlimit_modified = 1; | ||
230 | } | ||
231 | #endif | 296 | #endif |
232 | for (;;) { | 297 | { |
233 | void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0); | 298 | int olderr = errno; |
234 | if ((uintptr_t)p >= MMAP_REGION_START && | 299 | void *ptr = mmap((void *)LJ_ALLOC_MMAP32_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0); |
235 | (uintptr_t)p + size < MMAP_REGION_END) { | 300 | errno = olderr; |
236 | alloc_hint = (uintptr_t)p + size; | 301 | /* This only allows 1GB on Linux. So fallback to probing to get 2GB. */ |
237 | errno = olderr; | 302 | #if LJ_ALLOC_MMAP_PROBE |
238 | return p; | 303 | if (ptr == MFAIL) { |
304 | fallback = 1; | ||
305 | return mmap_probe(rs, size); | ||
239 | } | 306 | } |
240 | if (p != CMFAIL) munmap(p, size); | ||
241 | #if defined(__sun__) || defined(__DragonFly__) | ||
242 | alloc_hint += 0x1000000; /* Need near-exhaustive linear scan. */ | ||
243 | if (alloc_hint + size < MMAP_REGION_END) continue; | ||
244 | #endif | 307 | #endif |
245 | if (retry) break; | 308 | return ptr; |
246 | retry = 1; | ||
247 | alloc_hint = MMAP_REGION_START; | ||
248 | } | 309 | } |
249 | errno = olderr; | ||
250 | return CMFAIL; | ||
251 | } | 310 | } |
252 | 311 | ||
253 | #else | ||
254 | |||
255 | #error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS" | ||
256 | |||
257 | #endif | 312 | #endif |
258 | 313 | ||
314 | #if LJ_ALLOC_MMAP32 | ||
315 | #if LJ_ALLOC_MMAP_PROBE | ||
316 | #define CALL_MMAP(prng, size) mmap_map32(prng, size) | ||
259 | #else | 317 | #else |
260 | 318 | #define CALL_MMAP(prng, size) mmap_map32(size) | |
261 | /* 32 bit mode is easy. */ | 319 | #endif |
262 | static LJ_AINLINE void *CALL_MMAP(size_t size) | 320 | #elif LJ_ALLOC_MMAP_PROBE |
321 | #define CALL_MMAP(prng, size) mmap_probe(prng, size) | ||
322 | #else | ||
323 | static void *mmap_plain(size_t size) | ||
263 | { | 324 | { |
264 | int olderr = errno; | 325 | int olderr = errno; |
265 | void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0); | 326 | void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0); |
266 | errno = olderr; | 327 | errno = olderr; |
267 | return ptr; | 328 | return ptr; |
268 | } | 329 | } |
269 | 330 | #define CALL_MMAP(prng, size) mmap_plain(size) | |
270 | #endif | 331 | #endif |
271 | 332 | ||
272 | #define INIT_MMAP() ((void)0) | 333 | #if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 |
273 | #define DIRECT_MMAP(s) CALL_MMAP(s) | 334 | |
335 | #include <sys/resource.h> | ||
274 | 336 | ||
275 | static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) | 337 | static void init_mmap(void) |
338 | { | ||
339 | struct rlimit rlim; | ||
340 | rlim.rlim_cur = rlim.rlim_max = 0x10000; | ||
341 | setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail later. */ | ||
342 | } | ||
343 | #define INIT_MMAP() init_mmap() | ||
344 | |||
345 | #endif | ||
346 | |||
347 | static int CALL_MUNMAP(void *ptr, size_t size) | ||
276 | { | 348 | { |
277 | int olderr = errno; | 349 | int olderr = errno; |
278 | int ret = munmap(ptr, size); | 350 | int ret = munmap(ptr, size); |
@@ -280,10 +352,9 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) | |||
280 | return ret; | 352 | return ret; |
281 | } | 353 | } |
282 | 354 | ||
283 | #if LJ_TARGET_LINUX | 355 | #if LJ_ALLOC_MREMAP |
284 | /* Need to define _GNU_SOURCE to get the mremap prototype. */ | 356 | /* Need to define _GNU_SOURCE to get the mremap prototype. */ |
285 | static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, | 357 | static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags) |
286 | int flags) | ||
287 | { | 358 | { |
288 | int olderr = errno; | 359 | int olderr = errno; |
289 | ptr = mremap(ptr, osz, nsz, flags); | 360 | ptr = mremap(ptr, osz, nsz, flags); |
@@ -294,7 +365,7 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, | |||
294 | #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) | 365 | #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) |
295 | #define CALL_MREMAP_NOMOVE 0 | 366 | #define CALL_MREMAP_NOMOVE 0 |
296 | #define CALL_MREMAP_MAYMOVE 1 | 367 | #define CALL_MREMAP_MAYMOVE 1 |
297 | #if LJ_64 | 368 | #if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64) |
298 | #define CALL_MREMAP_MV CALL_MREMAP_NOMOVE | 369 | #define CALL_MREMAP_MV CALL_MREMAP_NOMOVE |
299 | #else | 370 | #else |
300 | #define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE | 371 | #define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE |
@@ -303,6 +374,15 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, | |||
303 | 374 | ||
304 | #endif | 375 | #endif |
305 | 376 | ||
377 | |||
378 | #ifndef INIT_MMAP | ||
379 | #define INIT_MMAP() ((void)0) | ||
380 | #endif | ||
381 | |||
382 | #ifndef DIRECT_MMAP | ||
383 | #define DIRECT_MMAP(prng, s) CALL_MMAP(prng, s) | ||
384 | #endif | ||
385 | |||
306 | #ifndef CALL_MREMAP | 386 | #ifndef CALL_MREMAP |
307 | #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL) | 387 | #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL) |
308 | #endif | 388 | #endif |
@@ -459,6 +539,7 @@ struct malloc_state { | |||
459 | mchunkptr smallbins[(NSMALLBINS+1)*2]; | 539 | mchunkptr smallbins[(NSMALLBINS+1)*2]; |
460 | tbinptr treebins[NTREEBINS]; | 540 | tbinptr treebins[NTREEBINS]; |
461 | msegment seg; | 541 | msegment seg; |
542 | PRNGState *prng; | ||
462 | }; | 543 | }; |
463 | 544 | ||
464 | typedef struct malloc_state *mstate; | 545 | typedef struct malloc_state *mstate; |
@@ -516,7 +597,7 @@ static int has_segment_link(mstate m, msegmentptr ss) | |||
516 | noncontiguous segments are added. | 597 | noncontiguous segments are added. |
517 | */ | 598 | */ |
518 | #define TOP_FOOT_SIZE\ | 599 | #define TOP_FOOT_SIZE\ |
519 | (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) | 600 | (align_offset(TWO_SIZE_T_SIZES)+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) |
520 | 601 | ||
521 | /* ---------------------------- Indexing Bins ---------------------------- */ | 602 | /* ---------------------------- Indexing Bins ---------------------------- */ |
522 | 603 | ||
@@ -741,11 +822,11 @@ static int has_segment_link(mstate m, msegmentptr ss) | |||
741 | 822 | ||
742 | /* ----------------------- Direct-mmapping chunks ----------------------- */ | 823 | /* ----------------------- Direct-mmapping chunks ----------------------- */ |
743 | 824 | ||
744 | static void *direct_alloc(size_t nb) | 825 | static void *direct_alloc(mstate m, size_t nb) |
745 | { | 826 | { |
746 | size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); | 827 | size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); |
747 | if (LJ_LIKELY(mmsize > nb)) { /* Check for wrap around 0 */ | 828 | if (LJ_LIKELY(mmsize > nb)) { /* Check for wrap around 0 */ |
748 | char *mm = (char *)(DIRECT_MMAP(mmsize)); | 829 | char *mm = (char *)(DIRECT_MMAP(m->prng, mmsize)); |
749 | if (mm != CMFAIL) { | 830 | if (mm != CMFAIL) { |
750 | size_t offset = align_offset(chunk2mem(mm)); | 831 | size_t offset = align_offset(chunk2mem(mm)); |
751 | size_t psize = mmsize - offset - DIRECT_FOOT_PAD; | 832 | size_t psize = mmsize - offset - DIRECT_FOOT_PAD; |
@@ -757,6 +838,7 @@ static void *direct_alloc(size_t nb) | |||
757 | return chunk2mem(p); | 838 | return chunk2mem(p); |
758 | } | 839 | } |
759 | } | 840 | } |
841 | UNUSED(m); | ||
760 | return NULL; | 842 | return NULL; |
761 | } | 843 | } |
762 | 844 | ||
@@ -905,7 +987,7 @@ static void *alloc_sys(mstate m, size_t nb) | |||
905 | 987 | ||
906 | /* Directly map large chunks */ | 988 | /* Directly map large chunks */ |
907 | if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) { | 989 | if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) { |
908 | void *mem = direct_alloc(nb); | 990 | void *mem = direct_alloc(m, nb); |
909 | if (mem != 0) | 991 | if (mem != 0) |
910 | return mem; | 992 | return mem; |
911 | } | 993 | } |
@@ -914,7 +996,7 @@ static void *alloc_sys(mstate m, size_t nb) | |||
914 | size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE; | 996 | size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE; |
915 | size_t rsize = granularity_align(req); | 997 | size_t rsize = granularity_align(req); |
916 | if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */ | 998 | if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */ |
917 | char *mp = (char *)(CALL_MMAP(rsize)); | 999 | char *mp = (char *)(CALL_MMAP(m->prng, rsize)); |
918 | if (mp != CMFAIL) { | 1000 | if (mp != CMFAIL) { |
919 | tbase = mp; | 1001 | tbase = mp; |
920 | tsize = rsize; | 1002 | tsize = rsize; |
@@ -1141,12 +1223,13 @@ static void *tmalloc_small(mstate m, size_t nb) | |||
1141 | 1223 | ||
1142 | /* ----------------------------------------------------------------------- */ | 1224 | /* ----------------------------------------------------------------------- */ |
1143 | 1225 | ||
1144 | void *lj_alloc_create(void) | 1226 | void *lj_alloc_create(PRNGState *rs) |
1145 | { | 1227 | { |
1146 | size_t tsize = DEFAULT_GRANULARITY; | 1228 | size_t tsize = DEFAULT_GRANULARITY; |
1147 | char *tbase; | 1229 | char *tbase; |
1148 | INIT_MMAP(); | 1230 | INIT_MMAP(); |
1149 | tbase = (char *)(CALL_MMAP(tsize)); | 1231 | UNUSED(rs); |
1232 | tbase = (char *)(CALL_MMAP(rs, tsize)); | ||
1150 | if (tbase != CMFAIL) { | 1233 | if (tbase != CMFAIL) { |
1151 | size_t msize = pad_request(sizeof(struct malloc_state)); | 1234 | size_t msize = pad_request(sizeof(struct malloc_state)); |
1152 | mchunkptr mn; | 1235 | mchunkptr mn; |
@@ -1165,6 +1248,12 @@ void *lj_alloc_create(void) | |||
1165 | return NULL; | 1248 | return NULL; |
1166 | } | 1249 | } |
1167 | 1250 | ||
1251 | void lj_alloc_setprng(void *msp, PRNGState *rs) | ||
1252 | { | ||
1253 | mstate ms = (mstate)msp; | ||
1254 | ms->prng = rs; | ||
1255 | } | ||
1256 | |||
1168 | void lj_alloc_destroy(void *msp) | 1257 | void lj_alloc_destroy(void *msp) |
1169 | { | 1258 | { |
1170 | mstate ms = (mstate)msp; | 1259 | mstate ms = (mstate)msp; |
diff --git a/src/lj_alloc.h b/src/lj_alloc.h index f87a7cf3..669f50b7 100644 --- a/src/lj_alloc.h +++ b/src/lj_alloc.h | |||
@@ -9,7 +9,8 @@ | |||
9 | #include "lj_def.h" | 9 | #include "lj_def.h" |
10 | 10 | ||
11 | #ifndef LUAJIT_USE_SYSMALLOC | 11 | #ifndef LUAJIT_USE_SYSMALLOC |
12 | LJ_FUNC void *lj_alloc_create(void); | 12 | LJ_FUNC void *lj_alloc_create(PRNGState *rs); |
13 | LJ_FUNC void lj_alloc_setprng(void *msp, PRNGState *rs); | ||
13 | LJ_FUNC void lj_alloc_destroy(void *msp); | 14 | LJ_FUNC void lj_alloc_destroy(void *msp); |
14 | LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize); | 15 | LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize); |
15 | #endif | 16 | #endif |
diff --git a/src/lj_api.c b/src/lj_api.c index 04a41792..d869ebf8 100644 --- a/src/lj_api.c +++ b/src/lj_api.c | |||
@@ -24,11 +24,12 @@ | |||
24 | #include "lj_trace.h" | 24 | #include "lj_trace.h" |
25 | #include "lj_vm.h" | 25 | #include "lj_vm.h" |
26 | #include "lj_strscan.h" | 26 | #include "lj_strscan.h" |
27 | #include "lj_strfmt.h" | ||
27 | 28 | ||
28 | /* -- Common helper functions --------------------------------------------- */ | 29 | /* -- Common helper functions --------------------------------------------- */ |
29 | 30 | ||
30 | #define api_checknelems(L, n) api_check(L, (n) <= (L->top - L->base)) | 31 | #define lj_checkapi_slot(idx) \ |
31 | #define api_checkvalidindex(L, i) api_check(L, (i) != niltv(L)) | 32 | lj_checkapi((idx) <= (L->top - L->base), "stack slot %d out of range", (idx)) |
32 | 33 | ||
33 | static TValue *index2adr(lua_State *L, int idx) | 34 | static TValue *index2adr(lua_State *L, int idx) |
34 | { | 35 | { |
@@ -36,7 +37,8 @@ static TValue *index2adr(lua_State *L, int idx) | |||
36 | TValue *o = L->base + (idx - 1); | 37 | TValue *o = L->base + (idx - 1); |
37 | return o < L->top ? o : niltv(L); | 38 | return o < L->top ? o : niltv(L); |
38 | } else if (idx > LUA_REGISTRYINDEX) { | 39 | } else if (idx > LUA_REGISTRYINDEX) { |
39 | api_check(L, idx != 0 && -idx <= L->top - L->base); | 40 | lj_checkapi(idx != 0 && -idx <= L->top - L->base, |
41 | "bad stack slot %d", idx); | ||
40 | return L->top + idx; | 42 | return L->top + idx; |
41 | } else if (idx == LUA_GLOBALSINDEX) { | 43 | } else if (idx == LUA_GLOBALSINDEX) { |
42 | TValue *o = &G(L)->tmptv; | 44 | TValue *o = &G(L)->tmptv; |
@@ -46,7 +48,8 @@ static TValue *index2adr(lua_State *L, int idx) | |||
46 | return registry(L); | 48 | return registry(L); |
47 | } else { | 49 | } else { |
48 | GCfunc *fn = curr_func(L); | 50 | GCfunc *fn = curr_func(L); |
49 | api_check(L, fn->c.gct == ~LJ_TFUNC && !isluafunc(fn)); | 51 | lj_checkapi(fn->c.gct == ~LJ_TFUNC && !isluafunc(fn), |
52 | "calling frame is not a C function"); | ||
50 | if (idx == LUA_ENVIRONINDEX) { | 53 | if (idx == LUA_ENVIRONINDEX) { |
51 | TValue *o = &G(L)->tmptv; | 54 | TValue *o = &G(L)->tmptv; |
52 | settabV(L, o, tabref(fn->c.env)); | 55 | settabV(L, o, tabref(fn->c.env)); |
@@ -58,13 +61,27 @@ static TValue *index2adr(lua_State *L, int idx) | |||
58 | } | 61 | } |
59 | } | 62 | } |
60 | 63 | ||
61 | static TValue *stkindex2adr(lua_State *L, int idx) | 64 | static LJ_AINLINE TValue *index2adr_check(lua_State *L, int idx) |
65 | { | ||
66 | TValue *o = index2adr(L, idx); | ||
67 | lj_checkapi(o != niltv(L), "invalid stack slot %d", idx); | ||
68 | return o; | ||
69 | } | ||
70 | |||
71 | static TValue *index2adr_stack(lua_State *L, int idx) | ||
62 | { | 72 | { |
63 | if (idx > 0) { | 73 | if (idx > 0) { |
64 | TValue *o = L->base + (idx - 1); | 74 | TValue *o = L->base + (idx - 1); |
75 | if (o < L->top) { | ||
76 | return o; | ||
77 | } else { | ||
78 | lj_checkapi(0, "invalid stack slot %d", idx); | ||
79 | return niltv(L); | ||
80 | } | ||
65 | return o < L->top ? o : niltv(L); | 81 | return o < L->top ? o : niltv(L); |
66 | } else { | 82 | } else { |
67 | api_check(L, idx != 0 && -idx <= L->top - L->base); | 83 | lj_checkapi(idx != 0 && -idx <= L->top - L->base, |
84 | "invalid stack slot %d", idx); | ||
68 | return L->top + idx; | 85 | return L->top + idx; |
69 | } | 86 | } |
70 | } | 87 | } |
@@ -98,17 +115,24 @@ LUALIB_API void luaL_checkstack(lua_State *L, int size, const char *msg) | |||
98 | lj_err_callerv(L, LJ_ERR_STKOVM, msg); | 115 | lj_err_callerv(L, LJ_ERR_STKOVM, msg); |
99 | } | 116 | } |
100 | 117 | ||
101 | LUA_API void lua_xmove(lua_State *from, lua_State *to, int n) | 118 | LUA_API void lua_xmove(lua_State *L, lua_State *to, int n) |
102 | { | 119 | { |
103 | TValue *f, *t; | 120 | TValue *f, *t; |
104 | if (from == to) return; | 121 | if (L == to) return; |
105 | api_checknelems(from, n); | 122 | lj_checkapi_slot(n); |
106 | api_check(from, G(from) == G(to)); | 123 | lj_checkapi(G(L) == G(to), "move across global states"); |
107 | lj_state_checkstack(to, (MSize)n); | 124 | lj_state_checkstack(to, (MSize)n); |
108 | f = from->top; | 125 | f = L->top; |
109 | t = to->top = to->top + n; | 126 | t = to->top = to->top + n; |
110 | while (--n >= 0) copyTV(to, --t, --f); | 127 | while (--n >= 0) copyTV(to, --t, --f); |
111 | from->top = f; | 128 | L->top = f; |
129 | } | ||
130 | |||
131 | LUA_API const lua_Number *lua_version(lua_State *L) | ||
132 | { | ||
133 | static const lua_Number version = LUA_VERSION_NUM; | ||
134 | UNUSED(L); | ||
135 | return &version; | ||
112 | } | 136 | } |
113 | 137 | ||
114 | /* -- Stack manipulation -------------------------------------------------- */ | 138 | /* -- Stack manipulation -------------------------------------------------- */ |
@@ -121,7 +145,7 @@ LUA_API int lua_gettop(lua_State *L) | |||
121 | LUA_API void lua_settop(lua_State *L, int idx) | 145 | LUA_API void lua_settop(lua_State *L, int idx) |
122 | { | 146 | { |
123 | if (idx >= 0) { | 147 | if (idx >= 0) { |
124 | api_check(L, idx <= tvref(L->maxstack) - L->base); | 148 | lj_checkapi(idx <= tvref(L->maxstack) - L->base, "bad stack slot %d", idx); |
125 | if (L->base + idx > L->top) { | 149 | if (L->base + idx > L->top) { |
126 | if (L->base + idx >= tvref(L->maxstack)) | 150 | if (L->base + idx >= tvref(L->maxstack)) |
127 | lj_state_growstack(L, (MSize)idx - (MSize)(L->top - L->base)); | 151 | lj_state_growstack(L, (MSize)idx - (MSize)(L->top - L->base)); |
@@ -130,51 +154,58 @@ LUA_API void lua_settop(lua_State *L, int idx) | |||
130 | L->top = L->base + idx; | 154 | L->top = L->base + idx; |
131 | } | 155 | } |
132 | } else { | 156 | } else { |
133 | api_check(L, -(idx+1) <= (L->top - L->base)); | 157 | lj_checkapi(-(idx+1) <= (L->top - L->base), "bad stack slot %d", idx); |
134 | L->top += idx+1; /* Shrinks top (idx < 0). */ | 158 | L->top += idx+1; /* Shrinks top (idx < 0). */ |
135 | } | 159 | } |
136 | } | 160 | } |
137 | 161 | ||
138 | LUA_API void lua_remove(lua_State *L, int idx) | 162 | LUA_API void lua_remove(lua_State *L, int idx) |
139 | { | 163 | { |
140 | TValue *p = stkindex2adr(L, idx); | 164 | TValue *p = index2adr_stack(L, idx); |
141 | api_checkvalidindex(L, p); | ||
142 | while (++p < L->top) copyTV(L, p-1, p); | 165 | while (++p < L->top) copyTV(L, p-1, p); |
143 | L->top--; | 166 | L->top--; |
144 | } | 167 | } |
145 | 168 | ||
146 | LUA_API void lua_insert(lua_State *L, int idx) | 169 | LUA_API void lua_insert(lua_State *L, int idx) |
147 | { | 170 | { |
148 | TValue *q, *p = stkindex2adr(L, idx); | 171 | TValue *q, *p = index2adr_stack(L, idx); |
149 | api_checkvalidindex(L, p); | ||
150 | for (q = L->top; q > p; q--) copyTV(L, q, q-1); | 172 | for (q = L->top; q > p; q--) copyTV(L, q, q-1); |
151 | copyTV(L, p, L->top); | 173 | copyTV(L, p, L->top); |
152 | } | 174 | } |
153 | 175 | ||
154 | LUA_API void lua_replace(lua_State *L, int idx) | 176 | static void copy_slot(lua_State *L, TValue *f, int idx) |
155 | { | 177 | { |
156 | api_checknelems(L, 1); | ||
157 | if (idx == LUA_GLOBALSINDEX) { | 178 | if (idx == LUA_GLOBALSINDEX) { |
158 | api_check(L, tvistab(L->top-1)); | 179 | lj_checkapi(tvistab(f), "stack slot %d is not a table", idx); |
159 | /* NOBARRIER: A thread (i.e. L) is never black. */ | 180 | /* NOBARRIER: A thread (i.e. L) is never black. */ |
160 | setgcref(L->env, obj2gco(tabV(L->top-1))); | 181 | setgcref(L->env, obj2gco(tabV(f))); |
161 | } else if (idx == LUA_ENVIRONINDEX) { | 182 | } else if (idx == LUA_ENVIRONINDEX) { |
162 | GCfunc *fn = curr_func(L); | 183 | GCfunc *fn = curr_func(L); |
163 | if (fn->c.gct != ~LJ_TFUNC) | 184 | if (fn->c.gct != ~LJ_TFUNC) |
164 | lj_err_msg(L, LJ_ERR_NOENV); | 185 | lj_err_msg(L, LJ_ERR_NOENV); |
165 | api_check(L, tvistab(L->top-1)); | 186 | lj_checkapi(tvistab(f), "stack slot %d is not a table", idx); |
166 | setgcref(fn->c.env, obj2gco(tabV(L->top-1))); | 187 | setgcref(fn->c.env, obj2gco(tabV(f))); |
167 | lj_gc_barrier(L, fn, L->top-1); | 188 | lj_gc_barrier(L, fn, f); |
168 | } else { | 189 | } else { |
169 | TValue *o = index2adr(L, idx); | 190 | TValue *o = index2adr_check(L, idx); |
170 | api_checkvalidindex(L, o); | 191 | copyTV(L, o, f); |
171 | copyTV(L, o, L->top-1); | ||
172 | if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */ | 192 | if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */ |
173 | lj_gc_barrier(L, curr_func(L), L->top-1); | 193 | lj_gc_barrier(L, curr_func(L), f); |
174 | } | 194 | } |
195 | } | ||
196 | |||
197 | LUA_API void lua_replace(lua_State *L, int idx) | ||
198 | { | ||
199 | lj_checkapi_slot(1); | ||
200 | copy_slot(L, L->top - 1, idx); | ||
175 | L->top--; | 201 | L->top--; |
176 | } | 202 | } |
177 | 203 | ||
204 | LUA_API void lua_copy(lua_State *L, int fromidx, int toidx) | ||
205 | { | ||
206 | copy_slot(L, index2adr(L, fromidx), toidx); | ||
207 | } | ||
208 | |||
178 | LUA_API void lua_pushvalue(lua_State *L, int idx) | 209 | LUA_API void lua_pushvalue(lua_State *L, int idx) |
179 | { | 210 | { |
180 | copyTV(L, L->top, index2adr(L, idx)); | 211 | copyTV(L, L->top, index2adr(L, idx)); |
@@ -188,7 +219,7 @@ LUA_API int lua_type(lua_State *L, int idx) | |||
188 | cTValue *o = index2adr(L, idx); | 219 | cTValue *o = index2adr(L, idx); |
189 | if (tvisnumber(o)) { | 220 | if (tvisnumber(o)) { |
190 | return LUA_TNUMBER; | 221 | return LUA_TNUMBER; |
191 | #if LJ_64 | 222 | #if LJ_64 && !LJ_GC64 |
192 | } else if (tvislightud(o)) { | 223 | } else if (tvislightud(o)) { |
193 | return LUA_TLIGHTUSERDATA; | 224 | return LUA_TLIGHTUSERDATA; |
194 | #endif | 225 | #endif |
@@ -201,7 +232,7 @@ LUA_API int lua_type(lua_State *L, int idx) | |||
201 | #else | 232 | #else |
202 | int tt = (int)(((t < 8 ? 0x98042110u : 0x75a06u) >> 4*(t&7)) & 15u); | 233 | int tt = (int)(((t < 8 ? 0x98042110u : 0x75a06u) >> 4*(t&7)) & 15u); |
203 | #endif | 234 | #endif |
204 | lua_assert(tt != LUA_TNIL || tvisnil(o)); | 235 | lj_assertL(tt != LUA_TNIL || tvisnil(o), "bad tag conversion"); |
205 | return tt; | 236 | return tt; |
206 | } | 237 | } |
207 | } | 238 | } |
@@ -268,7 +299,7 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2) | |||
268 | return 0; | 299 | return 0; |
269 | } else if (tvispri(o1)) { | 300 | } else if (tvispri(o1)) { |
270 | return o1 != niltv(L) && o2 != niltv(L); | 301 | return o1 != niltv(L) && o2 != niltv(L); |
271 | #if LJ_64 | 302 | #if LJ_64 && !LJ_GC64 |
272 | } else if (tvislightud(o1)) { | 303 | } else if (tvislightud(o1)) { |
273 | return o1->u64 == o2->u64; | 304 | return o1->u64 == o2->u64; |
274 | #endif | 305 | #endif |
@@ -283,8 +314,8 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2) | |||
283 | } else { | 314 | } else { |
284 | L->top = base+2; | 315 | L->top = base+2; |
285 | lj_vm_call(L, base, 1+1); | 316 | lj_vm_call(L, base, 1+1); |
286 | L->top -= 2; | 317 | L->top -= 2+LJ_FR2; |
287 | return tvistruecond(L->top+1); | 318 | return tvistruecond(L->top+1+LJ_FR2); |
288 | } | 319 | } |
289 | } | 320 | } |
290 | } | 321 | } |
@@ -306,8 +337,8 @@ LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2) | |||
306 | } else { | 337 | } else { |
307 | L->top = base+2; | 338 | L->top = base+2; |
308 | lj_vm_call(L, base, 1+1); | 339 | lj_vm_call(L, base, 1+1); |
309 | L->top -= 2; | 340 | L->top -= 2+LJ_FR2; |
310 | return tvistruecond(L->top+1); | 341 | return tvistruecond(L->top+1+LJ_FR2); |
311 | } | 342 | } |
312 | } | 343 | } |
313 | } | 344 | } |
@@ -324,6 +355,22 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx) | |||
324 | return 0; | 355 | return 0; |
325 | } | 356 | } |
326 | 357 | ||
358 | LUA_API lua_Number lua_tonumberx(lua_State *L, int idx, int *ok) | ||
359 | { | ||
360 | cTValue *o = index2adr(L, idx); | ||
361 | TValue tmp; | ||
362 | if (LJ_LIKELY(tvisnumber(o))) { | ||
363 | if (ok) *ok = 1; | ||
364 | return numberVnum(o); | ||
365 | } else if (tvisstr(o) && lj_strscan_num(strV(o), &tmp)) { | ||
366 | if (ok) *ok = 1; | ||
367 | return numV(&tmp); | ||
368 | } else { | ||
369 | if (ok) *ok = 0; | ||
370 | return 0; | ||
371 | } | ||
372 | } | ||
373 | |||
327 | LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx) | 374 | LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx) |
328 | { | 375 | { |
329 | cTValue *o = index2adr(L, idx); | 376 | cTValue *o = index2adr(L, idx); |
@@ -361,9 +408,38 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx) | |||
361 | if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) | 408 | if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) |
362 | return 0; | 409 | return 0; |
363 | if (tvisint(&tmp)) | 410 | if (tvisint(&tmp)) |
364 | return (lua_Integer)intV(&tmp); | 411 | return intV(&tmp); |
412 | n = numV(&tmp); | ||
413 | } | ||
414 | #if LJ_64 | ||
415 | return (lua_Integer)n; | ||
416 | #else | ||
417 | return lj_num2int(n); | ||
418 | #endif | ||
419 | } | ||
420 | |||
421 | LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok) | ||
422 | { | ||
423 | cTValue *o = index2adr(L, idx); | ||
424 | TValue tmp; | ||
425 | lua_Number n; | ||
426 | if (LJ_LIKELY(tvisint(o))) { | ||
427 | if (ok) *ok = 1; | ||
428 | return intV(o); | ||
429 | } else if (LJ_LIKELY(tvisnum(o))) { | ||
430 | n = numV(o); | ||
431 | } else { | ||
432 | if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) { | ||
433 | if (ok) *ok = 0; | ||
434 | return 0; | ||
435 | } | ||
436 | if (tvisint(&tmp)) { | ||
437 | if (ok) *ok = 1; | ||
438 | return intV(&tmp); | ||
439 | } | ||
365 | n = numV(&tmp); | 440 | n = numV(&tmp); |
366 | } | 441 | } |
442 | if (ok) *ok = 1; | ||
367 | #if LJ_64 | 443 | #if LJ_64 |
368 | return (lua_Integer)n; | 444 | return (lua_Integer)n; |
369 | #else | 445 | #else |
@@ -434,7 +510,7 @@ LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len) | |||
434 | } else if (tvisnumber(o)) { | 510 | } else if (tvisnumber(o)) { |
435 | lj_gc_check(L); | 511 | lj_gc_check(L); |
436 | o = index2adr(L, idx); /* GC may move the stack. */ | 512 | o = index2adr(L, idx); /* GC may move the stack. */ |
437 | s = lj_str_fromnumber(L, o); | 513 | s = lj_strfmt_number(L, o); |
438 | setstrV(L, o, s); | 514 | setstrV(L, o, s); |
439 | } else { | 515 | } else { |
440 | if (len != NULL) *len = 0; | 516 | if (len != NULL) *len = 0; |
@@ -453,7 +529,7 @@ LUALIB_API const char *luaL_checklstring(lua_State *L, int idx, size_t *len) | |||
453 | } else if (tvisnumber(o)) { | 529 | } else if (tvisnumber(o)) { |
454 | lj_gc_check(L); | 530 | lj_gc_check(L); |
455 | o = index2adr(L, idx); /* GC may move the stack. */ | 531 | o = index2adr(L, idx); /* GC may move the stack. */ |
456 | s = lj_str_fromnumber(L, o); | 532 | s = lj_strfmt_number(L, o); |
457 | setstrV(L, o, s); | 533 | setstrV(L, o, s); |
458 | } else { | 534 | } else { |
459 | lj_err_argt(L, idx, LUA_TSTRING); | 535 | lj_err_argt(L, idx, LUA_TSTRING); |
@@ -475,7 +551,7 @@ LUALIB_API const char *luaL_optlstring(lua_State *L, int idx, | |||
475 | } else if (tvisnumber(o)) { | 551 | } else if (tvisnumber(o)) { |
476 | lj_gc_check(L); | 552 | lj_gc_check(L); |
477 | o = index2adr(L, idx); /* GC may move the stack. */ | 553 | o = index2adr(L, idx); /* GC may move the stack. */ |
478 | s = lj_str_fromnumber(L, o); | 554 | s = lj_strfmt_number(L, o); |
479 | setstrV(L, o, s); | 555 | setstrV(L, o, s); |
480 | } else { | 556 | } else { |
481 | lj_err_argt(L, idx, LUA_TSTRING); | 557 | lj_err_argt(L, idx, LUA_TSTRING); |
@@ -507,7 +583,7 @@ LUA_API size_t lua_objlen(lua_State *L, int idx) | |||
507 | } else if (tvisudata(o)) { | 583 | } else if (tvisudata(o)) { |
508 | return udataV(o)->len; | 584 | return udataV(o)->len; |
509 | } else if (tvisnumber(o)) { | 585 | } else if (tvisnumber(o)) { |
510 | GCstr *s = lj_str_fromnumber(L, o); | 586 | GCstr *s = lj_strfmt_number(L, o); |
511 | setstrV(L, o, s); | 587 | setstrV(L, o, s); |
512 | return s->len; | 588 | return s->len; |
513 | } else { | 589 | } else { |
@@ -532,7 +608,7 @@ LUA_API void *lua_touserdata(lua_State *L, int idx) | |||
532 | if (tvisudata(o)) | 608 | if (tvisudata(o)) |
533 | return uddata(udataV(o)); | 609 | return uddata(udataV(o)); |
534 | else if (tvislightud(o)) | 610 | else if (tvislightud(o)) |
535 | return lightudV(o); | 611 | return lightudV(G(L), o); |
536 | else | 612 | else |
537 | return NULL; | 613 | return NULL; |
538 | } | 614 | } |
@@ -545,17 +621,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx) | |||
545 | 621 | ||
546 | LUA_API const void *lua_topointer(lua_State *L, int idx) | 622 | LUA_API const void *lua_topointer(lua_State *L, int idx) |
547 | { | 623 | { |
548 | cTValue *o = index2adr(L, idx); | 624 | return lj_obj_ptr(G(L), index2adr(L, idx)); |
549 | if (tvisudata(o)) | ||
550 | return uddata(udataV(o)); | ||
551 | else if (tvislightud(o)) | ||
552 | return lightudV(o); | ||
553 | else if (tviscdata(o)) | ||
554 | return cdataptr(cdataV(o)); | ||
555 | else if (tvisgcv(o)) | ||
556 | return gcV(o); | ||
557 | else | ||
558 | return NULL; | ||
559 | } | 625 | } |
560 | 626 | ||
561 | /* -- Stack setters (object creation) ------------------------------------- */ | 627 | /* -- Stack setters (object creation) ------------------------------------- */ |
@@ -606,7 +672,7 @@ LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt, | |||
606 | va_list argp) | 672 | va_list argp) |
607 | { | 673 | { |
608 | lj_gc_check(L); | 674 | lj_gc_check(L); |
609 | return lj_str_pushvf(L, fmt, argp); | 675 | return lj_strfmt_pushvf(L, fmt, argp); |
610 | } | 676 | } |
611 | 677 | ||
612 | LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...) | 678 | LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...) |
@@ -615,7 +681,7 @@ LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...) | |||
615 | va_list argp; | 681 | va_list argp; |
616 | lj_gc_check(L); | 682 | lj_gc_check(L); |
617 | va_start(argp, fmt); | 683 | va_start(argp, fmt); |
618 | ret = lj_str_pushvf(L, fmt, argp); | 684 | ret = lj_strfmt_pushvf(L, fmt, argp); |
619 | va_end(argp); | 685 | va_end(argp); |
620 | return ret; | 686 | return ret; |
621 | } | 687 | } |
@@ -624,14 +690,14 @@ LUA_API void lua_pushcclosure(lua_State *L, lua_CFunction f, int n) | |||
624 | { | 690 | { |
625 | GCfunc *fn; | 691 | GCfunc *fn; |
626 | lj_gc_check(L); | 692 | lj_gc_check(L); |
627 | api_checknelems(L, n); | 693 | lj_checkapi_slot(n); |
628 | fn = lj_func_newC(L, (MSize)n, getcurrenv(L)); | 694 | fn = lj_func_newC(L, (MSize)n, getcurrenv(L)); |
629 | fn->c.f = f; | 695 | fn->c.f = f; |
630 | L->top -= n; | 696 | L->top -= n; |
631 | while (n--) | 697 | while (n--) |
632 | copyTV(L, &fn->c.upvalue[n], L->top+n); | 698 | copyTV(L, &fn->c.upvalue[n], L->top+n); |
633 | setfuncV(L, L->top, fn); | 699 | setfuncV(L, L->top, fn); |
634 | lua_assert(iswhite(obj2gco(fn))); | 700 | lj_assertL(iswhite(obj2gco(fn)), "new GC object is not white"); |
635 | incr_top(L); | 701 | incr_top(L); |
636 | } | 702 | } |
637 | 703 | ||
@@ -643,16 +709,17 @@ LUA_API void lua_pushboolean(lua_State *L, int b) | |||
643 | 709 | ||
644 | LUA_API void lua_pushlightuserdata(lua_State *L, void *p) | 710 | LUA_API void lua_pushlightuserdata(lua_State *L, void *p) |
645 | { | 711 | { |
646 | setlightudV(L->top, checklightudptr(L, p)); | 712 | #if LJ_64 |
713 | p = lj_lightud_intern(L, p); | ||
714 | #endif | ||
715 | setrawlightudV(L->top, p); | ||
647 | incr_top(L); | 716 | incr_top(L); |
648 | } | 717 | } |
649 | 718 | ||
650 | LUA_API void lua_createtable(lua_State *L, int narray, int nrec) | 719 | LUA_API void lua_createtable(lua_State *L, int narray, int nrec) |
651 | { | 720 | { |
652 | GCtab *t; | ||
653 | lj_gc_check(L); | 721 | lj_gc_check(L); |
654 | t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec)); | 722 | settabV(L, L->top, lj_tab_new_ah(L, narray, nrec)); |
655 | settabV(L, L->top, t); | ||
656 | incr_top(L); | 723 | incr_top(L); |
657 | } | 724 | } |
658 | 725 | ||
@@ -703,7 +770,7 @@ LUA_API void *lua_newuserdata(lua_State *L, size_t size) | |||
703 | 770 | ||
704 | LUA_API void lua_concat(lua_State *L, int n) | 771 | LUA_API void lua_concat(lua_State *L, int n) |
705 | { | 772 | { |
706 | api_checknelems(L, n); | 773 | lj_checkapi_slot(n); |
707 | if (n >= 2) { | 774 | if (n >= 2) { |
708 | n--; | 775 | n--; |
709 | do { | 776 | do { |
@@ -715,8 +782,8 @@ LUA_API void lua_concat(lua_State *L, int n) | |||
715 | n -= (int)(L->top - top); | 782 | n -= (int)(L->top - top); |
716 | L->top = top+2; | 783 | L->top = top+2; |
717 | lj_vm_call(L, top, 1+1); | 784 | lj_vm_call(L, top, 1+1); |
718 | L->top--; | 785 | L->top -= 1+LJ_FR2; |
719 | copyTV(L, L->top-1, L->top); | 786 | copyTV(L, L->top-1, L->top+LJ_FR2); |
720 | } while (--n > 0); | 787 | } while (--n > 0); |
721 | } else if (n == 0) { /* Push empty string. */ | 788 | } else if (n == 0) { /* Push empty string. */ |
722 | setstrV(L, L->top, &G(L)->strempty); | 789 | setstrV(L, L->top, &G(L)->strempty); |
@@ -729,30 +796,28 @@ LUA_API void lua_concat(lua_State *L, int n) | |||
729 | 796 | ||
730 | LUA_API void lua_gettable(lua_State *L, int idx) | 797 | LUA_API void lua_gettable(lua_State *L, int idx) |
731 | { | 798 | { |
732 | cTValue *v, *t = index2adr(L, idx); | 799 | cTValue *t = index2adr_check(L, idx); |
733 | api_checkvalidindex(L, t); | 800 | cTValue *v = lj_meta_tget(L, t, L->top-1); |
734 | v = lj_meta_tget(L, t, L->top-1); | ||
735 | if (v == NULL) { | 801 | if (v == NULL) { |
736 | L->top += 2; | 802 | L->top += 2; |
737 | lj_vm_call(L, L->top-2, 1+1); | 803 | lj_vm_call(L, L->top-2, 1+1); |
738 | L->top -= 2; | 804 | L->top -= 2+LJ_FR2; |
739 | v = L->top+1; | 805 | v = L->top+1+LJ_FR2; |
740 | } | 806 | } |
741 | copyTV(L, L->top-1, v); | 807 | copyTV(L, L->top-1, v); |
742 | } | 808 | } |
743 | 809 | ||
744 | LUA_API void lua_getfield(lua_State *L, int idx, const char *k) | 810 | LUA_API void lua_getfield(lua_State *L, int idx, const char *k) |
745 | { | 811 | { |
746 | cTValue *v, *t = index2adr(L, idx); | 812 | cTValue *v, *t = index2adr_check(L, idx); |
747 | TValue key; | 813 | TValue key; |
748 | api_checkvalidindex(L, t); | ||
749 | setstrV(L, &key, lj_str_newz(L, k)); | 814 | setstrV(L, &key, lj_str_newz(L, k)); |
750 | v = lj_meta_tget(L, t, &key); | 815 | v = lj_meta_tget(L, t, &key); |
751 | if (v == NULL) { | 816 | if (v == NULL) { |
752 | L->top += 2; | 817 | L->top += 2; |
753 | lj_vm_call(L, L->top-2, 1+1); | 818 | lj_vm_call(L, L->top-2, 1+1); |
754 | L->top -= 2; | 819 | L->top -= 2+LJ_FR2; |
755 | v = L->top+1; | 820 | v = L->top+1+LJ_FR2; |
756 | } | 821 | } |
757 | copyTV(L, L->top, v); | 822 | copyTV(L, L->top, v); |
758 | incr_top(L); | 823 | incr_top(L); |
@@ -761,14 +826,14 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k) | |||
761 | LUA_API void lua_rawget(lua_State *L, int idx) | 826 | LUA_API void lua_rawget(lua_State *L, int idx) |
762 | { | 827 | { |
763 | cTValue *t = index2adr(L, idx); | 828 | cTValue *t = index2adr(L, idx); |
764 | api_check(L, tvistab(t)); | 829 | lj_checkapi(tvistab(t), "stack slot %d is not a table", idx); |
765 | copyTV(L, L->top-1, lj_tab_get(L, tabV(t), L->top-1)); | 830 | copyTV(L, L->top-1, lj_tab_get(L, tabV(t), L->top-1)); |
766 | } | 831 | } |
767 | 832 | ||
768 | LUA_API void lua_rawgeti(lua_State *L, int idx, int n) | 833 | LUA_API void lua_rawgeti(lua_State *L, int idx, int n) |
769 | { | 834 | { |
770 | cTValue *v, *t = index2adr(L, idx); | 835 | cTValue *v, *t = index2adr(L, idx); |
771 | api_check(L, tvistab(t)); | 836 | lj_checkapi(tvistab(t), "stack slot %d is not a table", idx); |
772 | v = lj_tab_getint(tabV(t), n); | 837 | v = lj_tab_getint(tabV(t), n); |
773 | if (v) { | 838 | if (v) { |
774 | copyTV(L, L->top, v); | 839 | copyTV(L, L->top, v); |
@@ -810,8 +875,7 @@ LUALIB_API int luaL_getmetafield(lua_State *L, int idx, const char *field) | |||
810 | 875 | ||
811 | LUA_API void lua_getfenv(lua_State *L, int idx) | 876 | LUA_API void lua_getfenv(lua_State *L, int idx) |
812 | { | 877 | { |
813 | cTValue *o = index2adr(L, idx); | 878 | cTValue *o = index2adr_check(L, idx); |
814 | api_checkvalidindex(L, o); | ||
815 | if (tvisfunc(o)) { | 879 | if (tvisfunc(o)) { |
816 | settabV(L, L->top, tabref(funcV(o)->c.env)); | 880 | settabV(L, L->top, tabref(funcV(o)->c.env)); |
817 | } else if (tvisudata(o)) { | 881 | } else if (tvisudata(o)) { |
@@ -828,12 +892,14 @@ LUA_API int lua_next(lua_State *L, int idx) | |||
828 | { | 892 | { |
829 | cTValue *t = index2adr(L, idx); | 893 | cTValue *t = index2adr(L, idx); |
830 | int more; | 894 | int more; |
831 | api_check(L, tvistab(t)); | 895 | lj_checkapi(tvistab(t), "stack slot %d is not a table", idx); |
832 | more = lj_tab_next(L, tabV(t), L->top-1); | 896 | more = lj_tab_next(tabV(t), L->top-1, L->top-1); |
833 | if (more) { | 897 | if (more > 0) { |
834 | incr_top(L); /* Return new key and value slot. */ | 898 | incr_top(L); /* Return new key and value slot. */ |
835 | } else { /* End of traversal. */ | 899 | } else if (!more) { /* End of traversal. */ |
836 | L->top--; /* Remove key slot. */ | 900 | L->top--; /* Remove key slot. */ |
901 | } else { | ||
902 | lj_err_msg(L, LJ_ERR_NEXTIDX); | ||
837 | } | 903 | } |
838 | return more; | 904 | return more; |
839 | } | 905 | } |
@@ -854,7 +920,7 @@ LUA_API void *lua_upvalueid(lua_State *L, int idx, int n) | |||
854 | { | 920 | { |
855 | GCfunc *fn = funcV(index2adr(L, idx)); | 921 | GCfunc *fn = funcV(index2adr(L, idx)); |
856 | n--; | 922 | n--; |
857 | api_check(L, (uint32_t)n < fn->l.nupvalues); | 923 | lj_checkapi((uint32_t)n < fn->l.nupvalues, "bad upvalue %d", n); |
858 | return isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) : | 924 | return isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) : |
859 | (void *)&fn->c.upvalue[n]; | 925 | (void *)&fn->c.upvalue[n]; |
860 | } | 926 | } |
@@ -864,13 +930,15 @@ LUA_API void lua_upvaluejoin(lua_State *L, int idx1, int n1, int idx2, int n2) | |||
864 | GCfunc *fn1 = funcV(index2adr(L, idx1)); | 930 | GCfunc *fn1 = funcV(index2adr(L, idx1)); |
865 | GCfunc *fn2 = funcV(index2adr(L, idx2)); | 931 | GCfunc *fn2 = funcV(index2adr(L, idx2)); |
866 | n1--; n2--; | 932 | n1--; n2--; |
867 | api_check(L, isluafunc(fn1) && (uint32_t)n1 < fn1->l.nupvalues); | 933 | lj_checkapi(isluafunc(fn1), "stack slot %d is not a Lua function", idx1); |
868 | api_check(L, isluafunc(fn2) && (uint32_t)n2 < fn2->l.nupvalues); | 934 | lj_checkapi(isluafunc(fn2), "stack slot %d is not a Lua function", idx2); |
935 | lj_checkapi((uint32_t)n1 < fn1->l.nupvalues, "bad upvalue %d", n1+1); | ||
936 | lj_checkapi((uint32_t)n2 < fn2->l.nupvalues, "bad upvalue %d", n2+1); | ||
869 | setgcrefr(fn1->l.uvptr[n1], fn2->l.uvptr[n2]); | 937 | setgcrefr(fn1->l.uvptr[n1], fn2->l.uvptr[n2]); |
870 | lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1])); | 938 | lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1])); |
871 | } | 939 | } |
872 | 940 | ||
873 | LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) | 941 | LUALIB_API void *luaL_testudata(lua_State *L, int idx, const char *tname) |
874 | { | 942 | { |
875 | cTValue *o = index2adr(L, idx); | 943 | cTValue *o = index2adr(L, idx); |
876 | if (tvisudata(o)) { | 944 | if (tvisudata(o)) { |
@@ -879,8 +947,14 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) | |||
879 | if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable)) | 947 | if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable)) |
880 | return uddata(ud); | 948 | return uddata(ud); |
881 | } | 949 | } |
882 | lj_err_argtype(L, idx, tname); | 950 | return NULL; /* value is not a userdata with a metatable */ |
883 | return NULL; /* unreachable */ | 951 | } |
952 | |||
953 | LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) | ||
954 | { | ||
955 | void *p = luaL_testudata(L, idx, tname); | ||
956 | if (!p) lj_err_argtype(L, idx, tname); | ||
957 | return p; | ||
884 | } | 958 | } |
885 | 959 | ||
886 | /* -- Object setters ------------------------------------------------------ */ | 960 | /* -- Object setters ------------------------------------------------------ */ |
@@ -888,19 +962,19 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) | |||
888 | LUA_API void lua_settable(lua_State *L, int idx) | 962 | LUA_API void lua_settable(lua_State *L, int idx) |
889 | { | 963 | { |
890 | TValue *o; | 964 | TValue *o; |
891 | cTValue *t = index2adr(L, idx); | 965 | cTValue *t = index2adr_check(L, idx); |
892 | api_checknelems(L, 2); | 966 | lj_checkapi_slot(2); |
893 | api_checkvalidindex(L, t); | ||
894 | o = lj_meta_tset(L, t, L->top-2); | 967 | o = lj_meta_tset(L, t, L->top-2); |
895 | if (o) { | 968 | if (o) { |
896 | /* NOBARRIER: lj_meta_tset ensures the table is not black. */ | 969 | /* NOBARRIER: lj_meta_tset ensures the table is not black. */ |
897 | copyTV(L, o, L->top-1); | ||
898 | L->top -= 2; | 970 | L->top -= 2; |
971 | copyTV(L, o, L->top+1); | ||
899 | } else { | 972 | } else { |
900 | L->top += 3; | 973 | TValue *base = L->top; |
901 | copyTV(L, L->top-1, L->top-6); | 974 | copyTV(L, base+2, base-3-2*LJ_FR2); |
902 | lj_vm_call(L, L->top-3, 0+1); | 975 | L->top = base+3; |
903 | L->top -= 3; | 976 | lj_vm_call(L, base, 0+1); |
977 | L->top -= 3+LJ_FR2; | ||
904 | } | 978 | } |
905 | } | 979 | } |
906 | 980 | ||
@@ -908,20 +982,19 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k) | |||
908 | { | 982 | { |
909 | TValue *o; | 983 | TValue *o; |
910 | TValue key; | 984 | TValue key; |
911 | cTValue *t = index2adr(L, idx); | 985 | cTValue *t = index2adr_check(L, idx); |
912 | api_checknelems(L, 1); | 986 | lj_checkapi_slot(1); |
913 | api_checkvalidindex(L, t); | ||
914 | setstrV(L, &key, lj_str_newz(L, k)); | 987 | setstrV(L, &key, lj_str_newz(L, k)); |
915 | o = lj_meta_tset(L, t, &key); | 988 | o = lj_meta_tset(L, t, &key); |
916 | if (o) { | 989 | if (o) { |
917 | L->top--; | ||
918 | /* NOBARRIER: lj_meta_tset ensures the table is not black. */ | 990 | /* NOBARRIER: lj_meta_tset ensures the table is not black. */ |
919 | copyTV(L, o, L->top); | 991 | copyTV(L, o, --L->top); |
920 | } else { | 992 | } else { |
921 | L->top += 3; | 993 | TValue *base = L->top; |
922 | copyTV(L, L->top-1, L->top-6); | 994 | copyTV(L, base+2, base-3-2*LJ_FR2); |
923 | lj_vm_call(L, L->top-3, 0+1); | 995 | L->top = base+3; |
924 | L->top -= 2; | 996 | lj_vm_call(L, base, 0+1); |
997 | L->top -= 2+LJ_FR2; | ||
925 | } | 998 | } |
926 | } | 999 | } |
927 | 1000 | ||
@@ -929,7 +1002,7 @@ LUA_API void lua_rawset(lua_State *L, int idx) | |||
929 | { | 1002 | { |
930 | GCtab *t = tabV(index2adr(L, idx)); | 1003 | GCtab *t = tabV(index2adr(L, idx)); |
931 | TValue *dst, *key; | 1004 | TValue *dst, *key; |
932 | api_checknelems(L, 2); | 1005 | lj_checkapi_slot(2); |
933 | key = L->top-2; | 1006 | key = L->top-2; |
934 | dst = lj_tab_set(L, t, key); | 1007 | dst = lj_tab_set(L, t, key); |
935 | copyTV(L, dst, key+1); | 1008 | copyTV(L, dst, key+1); |
@@ -941,7 +1014,7 @@ LUA_API void lua_rawseti(lua_State *L, int idx, int n) | |||
941 | { | 1014 | { |
942 | GCtab *t = tabV(index2adr(L, idx)); | 1015 | GCtab *t = tabV(index2adr(L, idx)); |
943 | TValue *dst, *src; | 1016 | TValue *dst, *src; |
944 | api_checknelems(L, 1); | 1017 | lj_checkapi_slot(1); |
945 | dst = lj_tab_setint(L, t, n); | 1018 | dst = lj_tab_setint(L, t, n); |
946 | src = L->top-1; | 1019 | src = L->top-1; |
947 | copyTV(L, dst, src); | 1020 | copyTV(L, dst, src); |
@@ -953,13 +1026,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx) | |||
953 | { | 1026 | { |
954 | global_State *g; | 1027 | global_State *g; |
955 | GCtab *mt; | 1028 | GCtab *mt; |
956 | cTValue *o = index2adr(L, idx); | 1029 | cTValue *o = index2adr_check(L, idx); |
957 | api_checknelems(L, 1); | 1030 | lj_checkapi_slot(1); |
958 | api_checkvalidindex(L, o); | ||
959 | if (tvisnil(L->top-1)) { | 1031 | if (tvisnil(L->top-1)) { |
960 | mt = NULL; | 1032 | mt = NULL; |
961 | } else { | 1033 | } else { |
962 | api_check(L, tvistab(L->top-1)); | 1034 | lj_checkapi(tvistab(L->top-1), "top stack slot is not a table"); |
963 | mt = tabV(L->top-1); | 1035 | mt = tabV(L->top-1); |
964 | } | 1036 | } |
965 | g = G(L); | 1037 | g = G(L); |
@@ -988,13 +1060,18 @@ LUA_API int lua_setmetatable(lua_State *L, int idx) | |||
988 | return 1; | 1060 | return 1; |
989 | } | 1061 | } |
990 | 1062 | ||
1063 | LUALIB_API void luaL_setmetatable(lua_State *L, const char *tname) | ||
1064 | { | ||
1065 | lua_getfield(L, LUA_REGISTRYINDEX, tname); | ||
1066 | lua_setmetatable(L, -2); | ||
1067 | } | ||
1068 | |||
991 | LUA_API int lua_setfenv(lua_State *L, int idx) | 1069 | LUA_API int lua_setfenv(lua_State *L, int idx) |
992 | { | 1070 | { |
993 | cTValue *o = index2adr(L, idx); | 1071 | cTValue *o = index2adr_check(L, idx); |
994 | GCtab *t; | 1072 | GCtab *t; |
995 | api_checknelems(L, 1); | 1073 | lj_checkapi_slot(1); |
996 | api_checkvalidindex(L, o); | 1074 | lj_checkapi(tvistab(L->top-1), "top stack slot is not a table"); |
997 | api_check(L, tvistab(L->top-1)); | ||
998 | t = tabV(L->top-1); | 1075 | t = tabV(L->top-1); |
999 | if (tvisfunc(o)) { | 1076 | if (tvisfunc(o)) { |
1000 | setgcref(funcV(o)->c.env, obj2gco(t)); | 1077 | setgcref(funcV(o)->c.env, obj2gco(t)); |
@@ -1017,7 +1094,7 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n) | |||
1017 | TValue *val; | 1094 | TValue *val; |
1018 | GCobj *o; | 1095 | GCobj *o; |
1019 | const char *name; | 1096 | const char *name; |
1020 | api_checknelems(L, 1); | 1097 | lj_checkapi_slot(1); |
1021 | name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val, &o); | 1098 | name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val, &o); |
1022 | if (name) { | 1099 | if (name) { |
1023 | L->top--; | 1100 | L->top--; |
@@ -1029,11 +1106,25 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n) | |||
1029 | 1106 | ||
1030 | /* -- Calls --------------------------------------------------------------- */ | 1107 | /* -- Calls --------------------------------------------------------------- */ |
1031 | 1108 | ||
1109 | #if LJ_FR2 | ||
1110 | static TValue *api_call_base(lua_State *L, int nargs) | ||
1111 | { | ||
1112 | TValue *o = L->top, *base = o - nargs; | ||
1113 | L->top = o+1; | ||
1114 | for (; o > base; o--) copyTV(L, o, o-1); | ||
1115 | setnilV(o); | ||
1116 | return o+1; | ||
1117 | } | ||
1118 | #else | ||
1119 | #define api_call_base(L, nargs) (L->top - (nargs)) | ||
1120 | #endif | ||
1121 | |||
1032 | LUA_API void lua_call(lua_State *L, int nargs, int nresults) | 1122 | LUA_API void lua_call(lua_State *L, int nargs, int nresults) |
1033 | { | 1123 | { |
1034 | api_check(L, L->status == 0 || L->status == LUA_ERRERR); | 1124 | lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR, |
1035 | api_checknelems(L, nargs+1); | 1125 | "thread called in wrong state %d", L->status); |
1036 | lj_vm_call(L, L->top - nargs, nresults+1); | 1126 | lj_checkapi_slot(nargs+1); |
1127 | lj_vm_call(L, api_call_base(L, nargs), nresults+1); | ||
1037 | } | 1128 | } |
1038 | 1129 | ||
1039 | LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) | 1130 | LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) |
@@ -1042,16 +1133,16 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) | |||
1042 | uint8_t oldh = hook_save(g); | 1133 | uint8_t oldh = hook_save(g); |
1043 | ptrdiff_t ef; | 1134 | ptrdiff_t ef; |
1044 | int status; | 1135 | int status; |
1045 | api_check(L, L->status == 0 || L->status == LUA_ERRERR); | 1136 | lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR, |
1046 | api_checknelems(L, nargs+1); | 1137 | "thread called in wrong state %d", L->status); |
1138 | lj_checkapi_slot(nargs+1); | ||
1047 | if (errfunc == 0) { | 1139 | if (errfunc == 0) { |
1048 | ef = 0; | 1140 | ef = 0; |
1049 | } else { | 1141 | } else { |
1050 | cTValue *o = stkindex2adr(L, errfunc); | 1142 | cTValue *o = index2adr_stack(L, errfunc); |
1051 | api_checkvalidindex(L, o); | ||
1052 | ef = savestack(L, o); | 1143 | ef = savestack(L, o); |
1053 | } | 1144 | } |
1054 | status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef); | 1145 | status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef); |
1055 | if (status) hook_restore(g, oldh); | 1146 | if (status) hook_restore(g, oldh); |
1056 | return status; | 1147 | return status; |
1057 | } | 1148 | } |
@@ -1059,12 +1150,17 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) | |||
1059 | static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud) | 1150 | static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud) |
1060 | { | 1151 | { |
1061 | GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L)); | 1152 | GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L)); |
1153 | TValue *top = L->top; | ||
1062 | fn->c.f = func; | 1154 | fn->c.f = func; |
1063 | setfuncV(L, L->top, fn); | 1155 | setfuncV(L, top++, fn); |
1064 | setlightudV(L->top+1, checklightudptr(L, ud)); | 1156 | if (LJ_FR2) setnilV(top++); |
1157 | #if LJ_64 | ||
1158 | ud = lj_lightud_intern(L, ud); | ||
1159 | #endif | ||
1160 | setrawlightudV(top++, ud); | ||
1065 | cframe_nres(L->cframe) = 1+0; /* Zero results. */ | 1161 | cframe_nres(L->cframe) = 1+0; /* Zero results. */ |
1066 | L->top += 2; | 1162 | L->top = top; |
1067 | return L->top-1; /* Now call the newly allocated C function. */ | 1163 | return top-1; /* Now call the newly allocated C function. */ |
1068 | } | 1164 | } |
1069 | 1165 | ||
1070 | LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) | 1166 | LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) |
@@ -1072,7 +1168,8 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) | |||
1072 | global_State *g = G(L); | 1168 | global_State *g = G(L); |
1073 | uint8_t oldh = hook_save(g); | 1169 | uint8_t oldh = hook_save(g); |
1074 | int status; | 1170 | int status; |
1075 | api_check(L, L->status == 0 || L->status == LUA_ERRERR); | 1171 | lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR, |
1172 | "thread called in wrong state %d", L->status); | ||
1076 | status = lj_vm_cpcall(L, func, ud, cpcall); | 1173 | status = lj_vm_cpcall(L, func, ud, cpcall); |
1077 | if (status) hook_restore(g, oldh); | 1174 | if (status) hook_restore(g, oldh); |
1078 | return status; | 1175 | return status; |
@@ -1081,10 +1178,11 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) | |||
1081 | LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field) | 1178 | LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field) |
1082 | { | 1179 | { |
1083 | if (luaL_getmetafield(L, idx, field)) { | 1180 | if (luaL_getmetafield(L, idx, field)) { |
1084 | TValue *base = L->top--; | 1181 | TValue *top = L->top--; |
1085 | copyTV(L, base, index2adr(L, idx)); | 1182 | if (LJ_FR2) setnilV(top++); |
1086 | L->top = base+1; | 1183 | copyTV(L, top++, index2adr(L, idx)); |
1087 | lj_vm_call(L, base, 1+1); | 1184 | L->top = top; |
1185 | lj_vm_call(L, top-1, 1+1); | ||
1088 | return 1; | 1186 | return 1; |
1089 | } | 1187 | } |
1090 | return 0; | 1188 | return 0; |
@@ -1092,6 +1190,11 @@ LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field) | |||
1092 | 1190 | ||
1093 | /* -- Coroutine yield and resume ------------------------------------------ */ | 1191 | /* -- Coroutine yield and resume ------------------------------------------ */ |
1094 | 1192 | ||
1193 | LUA_API int lua_isyieldable(lua_State *L) | ||
1194 | { | ||
1195 | return cframe_canyield(L->cframe); | ||
1196 | } | ||
1197 | |||
1095 | LUA_API int lua_yield(lua_State *L, int nresults) | 1198 | LUA_API int lua_yield(lua_State *L, int nresults) |
1096 | { | 1199 | { |
1097 | void *cf = L->cframe; | 1200 | void *cf = L->cframe; |
@@ -1111,13 +1214,16 @@ LUA_API int lua_yield(lua_State *L, int nresults) | |||
1111 | } else { /* Yield from hook: add a pseudo-frame. */ | 1214 | } else { /* Yield from hook: add a pseudo-frame. */ |
1112 | TValue *top = L->top; | 1215 | TValue *top = L->top; |
1113 | hook_leave(g); | 1216 | hook_leave(g); |
1114 | top->u64 = cframe_multres(cf); | 1217 | (top++)->u64 = cframe_multres(cf); |
1115 | setcont(top+1, lj_cont_hook); | 1218 | setcont(top, lj_cont_hook); |
1116 | setframe_pc(top+1, cframe_pc(cf)-1); | 1219 | if (LJ_FR2) top++; |
1117 | setframe_gc(top+2, obj2gco(L)); | 1220 | setframe_pc(top, cframe_pc(cf)-1); |
1118 | setframe_ftsz(top+2, (int)((char *)(top+3)-(char *)L->base)+FRAME_CONT); | 1221 | top++; |
1119 | L->top = L->base = top+3; | 1222 | setframe_gc(top, obj2gco(L), LJ_TTHREAD); |
1120 | #if LJ_TARGET_X64 | 1223 | if (LJ_FR2) top++; |
1224 | setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT); | ||
1225 | L->top = L->base = top+1; | ||
1226 | #if ((defined(__GNUC__) || defined(__clang__)) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND) || LJ_TARGET_WINDOWS | ||
1121 | lj_err_throw(L, LUA_YIELD); | 1227 | lj_err_throw(L, LUA_YIELD); |
1122 | #else | 1228 | #else |
1123 | L->cframe = NULL; | 1229 | L->cframe = NULL; |
@@ -1133,7 +1239,9 @@ LUA_API int lua_yield(lua_State *L, int nresults) | |||
1133 | LUA_API int lua_resume(lua_State *L, int nargs) | 1239 | LUA_API int lua_resume(lua_State *L, int nargs) |
1134 | { | 1240 | { |
1135 | if (L->cframe == NULL && L->status <= LUA_YIELD) | 1241 | if (L->cframe == NULL && L->status <= LUA_YIELD) |
1136 | return lj_vm_resume(L, L->top - nargs, 0, 0); | 1242 | return lj_vm_resume(L, |
1243 | L->status == LUA_OK ? api_call_base(L, nargs) : L->top - nargs, | ||
1244 | 0, 0); | ||
1137 | L->top = L->base; | 1245 | L->top = L->base; |
1138 | setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP)); | 1246 | setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP)); |
1139 | incr_top(L); | 1247 | incr_top(L); |
@@ -1163,7 +1271,7 @@ LUA_API int lua_gc(lua_State *L, int what, int data) | |||
1163 | res = (int)(g->gc.total & 0x3ff); | 1271 | res = (int)(g->gc.total & 0x3ff); |
1164 | break; | 1272 | break; |
1165 | case LUA_GCSTEP: { | 1273 | case LUA_GCSTEP: { |
1166 | MSize a = (MSize)data << 10; | 1274 | GCSize a = (GCSize)data << 10; |
1167 | g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0; | 1275 | g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0; |
1168 | while (g->gc.total >= g->gc.threshold) | 1276 | while (g->gc.total >= g->gc.threshold) |
1169 | if (lj_gc_step(L) > 0) { | 1277 | if (lj_gc_step(L) > 0) { |
@@ -1180,6 +1288,9 @@ LUA_API int lua_gc(lua_State *L, int what, int data) | |||
1180 | res = (int)(g->gc.stepmul); | 1288 | res = (int)(g->gc.stepmul); |
1181 | g->gc.stepmul = (MSize)data; | 1289 | g->gc.stepmul = (MSize)data; |
1182 | break; | 1290 | break; |
1291 | case LUA_GCISRUNNING: | ||
1292 | res = (g->gc.threshold != LJ_MAX_MEM); | ||
1293 | break; | ||
1183 | default: | 1294 | default: |
1184 | res = -1; /* Invalid option. */ | 1295 | res = -1; /* Invalid option. */ |
1185 | } | 1296 | } |
diff --git a/src/lj_arch.h b/src/lj_arch.h index db46f886..cc4eae72 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h | |||
@@ -8,6 +8,8 @@ | |||
8 | 8 | ||
9 | #include "lua.h" | 9 | #include "lua.h" |
10 | 10 | ||
11 | /* -- Target definitions -------------------------------------------------- */ | ||
12 | |||
11 | /* Target endianess. */ | 13 | /* Target endianess. */ |
12 | #define LUAJIT_LE 0 | 14 | #define LUAJIT_LE 0 |
13 | #define LUAJIT_BE 1 | 15 | #define LUAJIT_BE 1 |
@@ -19,12 +21,16 @@ | |||
19 | #define LUAJIT_ARCH_x64 2 | 21 | #define LUAJIT_ARCH_x64 2 |
20 | #define LUAJIT_ARCH_ARM 3 | 22 | #define LUAJIT_ARCH_ARM 3 |
21 | #define LUAJIT_ARCH_arm 3 | 23 | #define LUAJIT_ARCH_arm 3 |
22 | #define LUAJIT_ARCH_PPC 4 | 24 | #define LUAJIT_ARCH_ARM64 4 |
23 | #define LUAJIT_ARCH_ppc 4 | 25 | #define LUAJIT_ARCH_arm64 4 |
24 | #define LUAJIT_ARCH_PPCSPE 5 | 26 | #define LUAJIT_ARCH_PPC 5 |
25 | #define LUAJIT_ARCH_ppcspe 5 | 27 | #define LUAJIT_ARCH_ppc 5 |
26 | #define LUAJIT_ARCH_MIPS 6 | 28 | #define LUAJIT_ARCH_MIPS 6 |
27 | #define LUAJIT_ARCH_mips 6 | 29 | #define LUAJIT_ARCH_mips 6 |
30 | #define LUAJIT_ARCH_MIPS32 6 | ||
31 | #define LUAJIT_ARCH_mips32 6 | ||
32 | #define LUAJIT_ARCH_MIPS64 7 | ||
33 | #define LUAJIT_ARCH_mips64 7 | ||
28 | 34 | ||
29 | /* Target OS. */ | 35 | /* Target OS. */ |
30 | #define LUAJIT_OS_OTHER 0 | 36 | #define LUAJIT_OS_OTHER 0 |
@@ -34,6 +40,14 @@ | |||
34 | #define LUAJIT_OS_BSD 4 | 40 | #define LUAJIT_OS_BSD 4 |
35 | #define LUAJIT_OS_POSIX 5 | 41 | #define LUAJIT_OS_POSIX 5 |
36 | 42 | ||
43 | /* Number mode. */ | ||
44 | #define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ | ||
45 | #define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */ | ||
46 | #define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */ | ||
47 | #define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */ | ||
48 | |||
49 | /* -- Target detection ---------------------------------------------------- */ | ||
50 | |||
37 | /* Select native target if no target defined. */ | 51 | /* Select native target if no target defined. */ |
38 | #ifndef LUAJIT_TARGET | 52 | #ifndef LUAJIT_TARGET |
39 | 53 | ||
@@ -43,14 +57,14 @@ | |||
43 | #define LUAJIT_TARGET LUAJIT_ARCH_X64 | 57 | #define LUAJIT_TARGET LUAJIT_ARCH_X64 |
44 | #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) | 58 | #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) |
45 | #define LUAJIT_TARGET LUAJIT_ARCH_ARM | 59 | #define LUAJIT_TARGET LUAJIT_ARCH_ARM |
60 | #elif defined(__aarch64__) | ||
61 | #define LUAJIT_TARGET LUAJIT_ARCH_ARM64 | ||
46 | #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) | 62 | #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) |
47 | #ifdef __NO_FPRS__ | ||
48 | #define LUAJIT_TARGET LUAJIT_ARCH_PPCSPE | ||
49 | #else | ||
50 | #define LUAJIT_TARGET LUAJIT_ARCH_PPC | 63 | #define LUAJIT_TARGET LUAJIT_ARCH_PPC |
51 | #endif | 64 | #elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64) |
65 | #define LUAJIT_TARGET LUAJIT_ARCH_MIPS64 | ||
52 | #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) | 66 | #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) |
53 | #define LUAJIT_TARGET LUAJIT_ARCH_MIPS | 67 | #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 |
54 | #else | 68 | #else |
55 | #error "No support for this architecture (yet)" | 69 | #error "No support for this architecture (yet)" |
56 | #endif | 70 | #endif |
@@ -65,12 +79,16 @@ | |||
65 | #elif defined(__linux__) | 79 | #elif defined(__linux__) |
66 | #define LUAJIT_OS LUAJIT_OS_LINUX | 80 | #define LUAJIT_OS LUAJIT_OS_LINUX |
67 | #elif defined(__MACH__) && defined(__APPLE__) | 81 | #elif defined(__MACH__) && defined(__APPLE__) |
82 | #include "TargetConditionals.h" | ||
68 | #define LUAJIT_OS LUAJIT_OS_OSX | 83 | #define LUAJIT_OS LUAJIT_OS_OSX |
69 | #elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \ | 84 | #elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \ |
70 | defined(__NetBSD__) || defined(__OpenBSD__) || \ | 85 | defined(__NetBSD__) || defined(__OpenBSD__) || \ |
71 | defined(__DragonFly__)) && !defined(__ORBIS__) | 86 | defined(__DragonFly__)) && !defined(__ORBIS__) |
72 | #define LUAJIT_OS LUAJIT_OS_BSD | 87 | #define LUAJIT_OS LUAJIT_OS_BSD |
73 | #elif (defined(__sun__) && defined(__svr4__)) | 88 | #elif (defined(__sun__) && defined(__svr4__)) |
89 | #define LJ_TARGET_SOLARIS 1 | ||
90 | #define LUAJIT_OS LUAJIT_OS_POSIX | ||
91 | #elif defined(__HAIKU__) | ||
74 | #define LUAJIT_OS LUAJIT_OS_POSIX | 92 | #define LUAJIT_OS LUAJIT_OS_POSIX |
75 | #elif defined(__CYGWIN__) | 93 | #elif defined(__CYGWIN__) |
76 | #define LJ_TARGET_CYGWIN 1 | 94 | #define LJ_TARGET_CYGWIN 1 |
@@ -99,10 +117,16 @@ | |||
99 | #define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS) | 117 | #define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS) |
100 | #define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX) | 118 | #define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX) |
101 | #define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX) | 119 | #define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX) |
102 | #define LJ_TARGET_IOS (LJ_TARGET_OSX && LUAJIT_TARGET == LUAJIT_ARCH_ARM) | 120 | #define LJ_TARGET_BSD (LUAJIT_OS == LUAJIT_OS_BSD) |
103 | #define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) | 121 | #define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) |
104 | #define LJ_TARGET_DLOPEN LJ_TARGET_POSIX | 122 | #define LJ_TARGET_DLOPEN LJ_TARGET_POSIX |
105 | 123 | ||
124 | #if TARGET_OS_IPHONE | ||
125 | #define LJ_TARGET_IOS 1 | ||
126 | #else | ||
127 | #define LJ_TARGET_IOS 0 | ||
128 | #endif | ||
129 | |||
106 | #ifdef __CELLOS_LV2__ | 130 | #ifdef __CELLOS_LV2__ |
107 | #define LJ_TARGET_PS3 1 | 131 | #define LJ_TARGET_PS3 1 |
108 | #define LJ_TARGET_CONSOLE 1 | 132 | #define LJ_TARGET_CONSOLE 1 |
@@ -125,10 +149,20 @@ | |||
125 | #define LJ_TARGET_CONSOLE 1 | 149 | #define LJ_TARGET_CONSOLE 1 |
126 | #endif | 150 | #endif |
127 | 151 | ||
128 | #define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ | 152 | #ifdef _DURANGO |
129 | #define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */ | 153 | #define LJ_TARGET_XBOXONE 1 |
130 | #define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */ | 154 | #define LJ_TARGET_CONSOLE 1 |
131 | #define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */ | 155 | #define LJ_TARGET_GC64 1 |
156 | #endif | ||
157 | |||
158 | #ifdef _UWP | ||
159 | #define LJ_TARGET_UWP 1 | ||
160 | #if LUAJIT_TARGET == LUAJIT_ARCH_X64 | ||
161 | #define LJ_TARGET_GC64 1 | ||
162 | #endif | ||
163 | #endif | ||
164 | |||
165 | /* -- Arch-specific settings ---------------------------------------------- */ | ||
132 | 166 | ||
133 | /* Set target architecture properties. */ | 167 | /* Set target architecture properties. */ |
134 | #if LUAJIT_TARGET == LUAJIT_ARCH_X86 | 168 | #if LUAJIT_TARGET == LUAJIT_ARCH_X86 |
@@ -136,14 +170,10 @@ | |||
136 | #define LJ_ARCH_NAME "x86" | 170 | #define LJ_ARCH_NAME "x86" |
137 | #define LJ_ARCH_BITS 32 | 171 | #define LJ_ARCH_BITS 32 |
138 | #define LJ_ARCH_ENDIAN LUAJIT_LE | 172 | #define LJ_ARCH_ENDIAN LUAJIT_LE |
139 | #if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN | ||
140 | #define LJ_ABI_WIN 1 | ||
141 | #else | ||
142 | #define LJ_ABI_WIN 0 | ||
143 | #endif | ||
144 | #define LJ_TARGET_X86 1 | 173 | #define LJ_TARGET_X86 1 |
145 | #define LJ_TARGET_X86ORX64 1 | 174 | #define LJ_TARGET_X86ORX64 1 |
146 | #define LJ_TARGET_EHRETREG 0 | 175 | #define LJ_TARGET_EHRETREG 0 |
176 | #define LJ_TARGET_EHRAREG 8 | ||
147 | #define LJ_TARGET_MASKSHIFT 1 | 177 | #define LJ_TARGET_MASKSHIFT 1 |
148 | #define LJ_TARGET_MASKROT 1 | 178 | #define LJ_TARGET_MASKROT 1 |
149 | #define LJ_TARGET_UNALIGNED 1 | 179 | #define LJ_TARGET_UNALIGNED 1 |
@@ -154,19 +184,20 @@ | |||
154 | #define LJ_ARCH_NAME "x64" | 184 | #define LJ_ARCH_NAME "x64" |
155 | #define LJ_ARCH_BITS 64 | 185 | #define LJ_ARCH_BITS 64 |
156 | #define LJ_ARCH_ENDIAN LUAJIT_LE | 186 | #define LJ_ARCH_ENDIAN LUAJIT_LE |
157 | #if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN | ||
158 | #define LJ_ABI_WIN 1 | ||
159 | #else | ||
160 | #define LJ_ABI_WIN 0 | ||
161 | #endif | ||
162 | #define LJ_TARGET_X64 1 | 187 | #define LJ_TARGET_X64 1 |
163 | #define LJ_TARGET_X86ORX64 1 | 188 | #define LJ_TARGET_X86ORX64 1 |
164 | #define LJ_TARGET_EHRETREG 0 | 189 | #define LJ_TARGET_EHRETREG 0 |
190 | #define LJ_TARGET_EHRAREG 16 | ||
165 | #define LJ_TARGET_JUMPRANGE 31 /* +-2^31 = +-2GB */ | 191 | #define LJ_TARGET_JUMPRANGE 31 /* +-2^31 = +-2GB */ |
166 | #define LJ_TARGET_MASKSHIFT 1 | 192 | #define LJ_TARGET_MASKSHIFT 1 |
167 | #define LJ_TARGET_MASKROT 1 | 193 | #define LJ_TARGET_MASKROT 1 |
168 | #define LJ_TARGET_UNALIGNED 1 | 194 | #define LJ_TARGET_UNALIGNED 1 |
169 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL | 195 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL |
196 | #ifndef LUAJIT_DISABLE_GC64 | ||
197 | #define LJ_TARGET_GC64 1 | ||
198 | #elif LJ_TARGET_OSX | ||
199 | #error "macOS requires GC64 -- don't disable it" | ||
200 | #endif | ||
170 | 201 | ||
171 | #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM | 202 | #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM |
172 | 203 | ||
@@ -182,40 +213,105 @@ | |||
182 | #define LJ_ABI_EABI 1 | 213 | #define LJ_ABI_EABI 1 |
183 | #define LJ_TARGET_ARM 1 | 214 | #define LJ_TARGET_ARM 1 |
184 | #define LJ_TARGET_EHRETREG 0 | 215 | #define LJ_TARGET_EHRETREG 0 |
216 | #define LJ_TARGET_EHRAREG 14 | ||
185 | #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ | 217 | #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ |
186 | #define LJ_TARGET_MASKSHIFT 0 | 218 | #define LJ_TARGET_MASKSHIFT 0 |
187 | #define LJ_TARGET_MASKROT 1 | 219 | #define LJ_TARGET_MASKROT 1 |
188 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ | 220 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ |
189 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL | 221 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL |
190 | 222 | ||
191 | #if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__ | 223 | #if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__ |
192 | #define LJ_ARCH_VERSION 80 | 224 | #define LJ_ARCH_VERSION 80 |
193 | #elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ | 225 | #elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ |
194 | #define LJ_ARCH_VERSION 70 | 226 | #define LJ_ARCH_VERSION 70 |
195 | #elif __ARM_ARCH_6T2__ | 227 | #elif __ARM_ARCH_6T2__ |
196 | #define LJ_ARCH_VERSION 61 | 228 | #define LJ_ARCH_VERSION 61 |
197 | #elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ | 229 | #elif __ARM_ARCH == 6 || __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ |
198 | #define LJ_ARCH_VERSION 60 | 230 | #define LJ_ARCH_VERSION 60 |
199 | #else | 231 | #else |
200 | #define LJ_ARCH_VERSION 50 | 232 | #define LJ_ARCH_VERSION 50 |
201 | #endif | 233 | #endif |
202 | 234 | ||
235 | #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64 | ||
236 | |||
237 | #define LJ_ARCH_BITS 64 | ||
238 | #if defined(__AARCH64EB__) | ||
239 | #define LJ_ARCH_NAME "arm64be" | ||
240 | #define LJ_ARCH_ENDIAN LUAJIT_BE | ||
241 | #else | ||
242 | #define LJ_ARCH_NAME "arm64" | ||
243 | #define LJ_ARCH_ENDIAN LUAJIT_LE | ||
244 | #endif | ||
245 | #define LJ_TARGET_ARM64 1 | ||
246 | #define LJ_TARGET_EHRETREG 0 | ||
247 | #define LJ_TARGET_EHRAREG 30 | ||
248 | #define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */ | ||
249 | #define LJ_TARGET_MASKSHIFT 1 | ||
250 | #define LJ_TARGET_MASKROT 1 | ||
251 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ | ||
252 | #define LJ_TARGET_GC64 1 | ||
253 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL | ||
254 | |||
255 | #define LJ_ARCH_VERSION 80 | ||
256 | |||
203 | #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC | 257 | #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC |
204 | 258 | ||
205 | #define LJ_ARCH_NAME "ppc" | 259 | #ifndef LJ_ARCH_ENDIAN |
260 | #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ | ||
261 | #define LJ_ARCH_ENDIAN LUAJIT_LE | ||
262 | #else | ||
263 | #define LJ_ARCH_ENDIAN LUAJIT_BE | ||
264 | #endif | ||
265 | #endif | ||
266 | |||
206 | #if _LP64 | 267 | #if _LP64 |
207 | #define LJ_ARCH_BITS 64 | 268 | #define LJ_ARCH_BITS 64 |
269 | #if LJ_ARCH_ENDIAN == LUAJIT_LE | ||
270 | #define LJ_ARCH_NAME "ppc64le" | ||
271 | #else | ||
272 | #define LJ_ARCH_NAME "ppc64" | ||
273 | #endif | ||
208 | #else | 274 | #else |
209 | #define LJ_ARCH_BITS 32 | 275 | #define LJ_ARCH_BITS 32 |
276 | #define LJ_ARCH_NAME "ppc" | ||
277 | |||
278 | #if !defined(LJ_ARCH_HASFPU) | ||
279 | #if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) | ||
280 | #define LJ_ARCH_HASFPU 0 | ||
281 | #else | ||
282 | #define LJ_ARCH_HASFPU 1 | ||
210 | #endif | 283 | #endif |
211 | #define LJ_ARCH_ENDIAN LUAJIT_BE | 284 | #endif |
285 | |||
286 | #if !defined(LJ_ABI_SOFTFP) | ||
287 | #if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) | ||
288 | #define LJ_ABI_SOFTFP 1 | ||
289 | #else | ||
290 | #define LJ_ABI_SOFTFP 0 | ||
291 | #endif | ||
292 | #endif | ||
293 | #endif | ||
294 | |||
295 | #if LJ_ABI_SOFTFP | ||
296 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL | ||
297 | #else | ||
298 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE | ||
299 | #endif | ||
300 | |||
212 | #define LJ_TARGET_PPC 1 | 301 | #define LJ_TARGET_PPC 1 |
213 | #define LJ_TARGET_EHRETREG 3 | 302 | #define LJ_TARGET_EHRETREG 3 |
303 | #define LJ_TARGET_EHRAREG 65 | ||
214 | #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ | 304 | #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ |
215 | #define LJ_TARGET_MASKSHIFT 0 | 305 | #define LJ_TARGET_MASKSHIFT 0 |
216 | #define LJ_TARGET_MASKROT 1 | 306 | #define LJ_TARGET_MASKROT 1 |
217 | #define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ | 307 | #define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ |
218 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE | 308 | |
309 | #if LJ_TARGET_CONSOLE | ||
310 | #define LJ_ARCH_PPC32ON64 1 | ||
311 | #define LJ_ARCH_NOFFI 1 | ||
312 | #elif LJ_ARCH_BITS == 64 | ||
313 | #error "No support for PPC64" | ||
314 | #endif | ||
219 | 315 | ||
220 | #if _ARCH_PWR7 | 316 | #if _ARCH_PWR7 |
221 | #define LJ_ARCH_VERSION 70 | 317 | #define LJ_ARCH_VERSION 70 |
@@ -230,10 +326,6 @@ | |||
230 | #else | 326 | #else |
231 | #define LJ_ARCH_VERSION 0 | 327 | #define LJ_ARCH_VERSION 0 |
232 | #endif | 328 | #endif |
233 | #if __PPC64__ || __powerpc64__ || LJ_TARGET_CONSOLE | ||
234 | #define LJ_ARCH_PPC64 1 | ||
235 | #define LJ_ARCH_NOFFI 1 | ||
236 | #endif | ||
237 | #if _ARCH_PPCSQ | 329 | #if _ARCH_PPCSQ |
238 | #define LJ_ARCH_SQRT 1 | 330 | #define LJ_ARCH_SQRT 1 |
239 | #endif | 331 | #endif |
@@ -247,44 +339,80 @@ | |||
247 | #define LJ_ARCH_XENON 1 | 339 | #define LJ_ARCH_XENON 1 |
248 | #endif | 340 | #endif |
249 | 341 | ||
250 | #elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE | 342 | #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64 |
251 | |||
252 | #define LJ_ARCH_NAME "ppcspe" | ||
253 | #define LJ_ARCH_BITS 32 | ||
254 | #define LJ_ARCH_ENDIAN LUAJIT_BE | ||
255 | #ifndef LJ_ABI_SOFTFP | ||
256 | #define LJ_ABI_SOFTFP 1 | ||
257 | #endif | ||
258 | #define LJ_ABI_EABI 1 | ||
259 | #define LJ_TARGET_PPCSPE 1 | ||
260 | #define LJ_TARGET_EHRETREG 3 | ||
261 | #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ | ||
262 | #define LJ_TARGET_MASKSHIFT 0 | ||
263 | #define LJ_TARGET_MASKROT 1 | ||
264 | #define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ | ||
265 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE | ||
266 | #define LJ_ARCH_NOFFI 1 /* NYI: comparisons, calls. */ | ||
267 | #define LJ_ARCH_NOJIT 1 | ||
268 | |||
269 | #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS | ||
270 | 343 | ||
271 | #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) | 344 | #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) |
345 | #if __mips_isa_rev >= 6 | ||
346 | #define LJ_TARGET_MIPSR6 1 | ||
347 | #define LJ_TARGET_UNALIGNED 1 | ||
348 | #endif | ||
349 | #if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 | ||
350 | #if LJ_TARGET_MIPSR6 | ||
351 | #define LJ_ARCH_NAME "mips32r6el" | ||
352 | #else | ||
272 | #define LJ_ARCH_NAME "mipsel" | 353 | #define LJ_ARCH_NAME "mipsel" |
354 | #endif | ||
355 | #else | ||
356 | #if LJ_TARGET_MIPSR6 | ||
357 | #define LJ_ARCH_NAME "mips64r6el" | ||
358 | #else | ||
359 | #define LJ_ARCH_NAME "mips64el" | ||
360 | #endif | ||
361 | #endif | ||
273 | #define LJ_ARCH_ENDIAN LUAJIT_LE | 362 | #define LJ_ARCH_ENDIAN LUAJIT_LE |
274 | #else | 363 | #else |
364 | #if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 | ||
365 | #if LJ_TARGET_MIPSR6 | ||
366 | #define LJ_ARCH_NAME "mips32r6" | ||
367 | #else | ||
275 | #define LJ_ARCH_NAME "mips" | 368 | #define LJ_ARCH_NAME "mips" |
369 | #endif | ||
370 | #else | ||
371 | #if LJ_TARGET_MIPSR6 | ||
372 | #define LJ_ARCH_NAME "mips64r6" | ||
373 | #else | ||
374 | #define LJ_ARCH_NAME "mips64" | ||
375 | #endif | ||
376 | #endif | ||
276 | #define LJ_ARCH_ENDIAN LUAJIT_BE | 377 | #define LJ_ARCH_ENDIAN LUAJIT_BE |
277 | #endif | 378 | #endif |
379 | |||
380 | #if !defined(LJ_ARCH_HASFPU) | ||
381 | #ifdef __mips_soft_float | ||
382 | #define LJ_ARCH_HASFPU 0 | ||
383 | #else | ||
384 | #define LJ_ARCH_HASFPU 1 | ||
385 | #endif | ||
386 | #endif | ||
387 | |||
388 | #if !defined(LJ_ABI_SOFTFP) | ||
389 | #ifdef __mips_soft_float | ||
390 | #define LJ_ABI_SOFTFP 1 | ||
391 | #else | ||
392 | #define LJ_ABI_SOFTFP 0 | ||
393 | #endif | ||
394 | #endif | ||
395 | |||
396 | #if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 | ||
278 | #define LJ_ARCH_BITS 32 | 397 | #define LJ_ARCH_BITS 32 |
398 | #define LJ_TARGET_MIPS32 1 | ||
399 | #else | ||
400 | #define LJ_ARCH_BITS 64 | ||
401 | #define LJ_TARGET_MIPS64 1 | ||
402 | #define LJ_TARGET_GC64 1 | ||
403 | #endif | ||
279 | #define LJ_TARGET_MIPS 1 | 404 | #define LJ_TARGET_MIPS 1 |
280 | #define LJ_TARGET_EHRETREG 4 | 405 | #define LJ_TARGET_EHRETREG 4 |
406 | #define LJ_TARGET_EHRAREG 31 | ||
281 | #define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ | 407 | #define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ |
282 | #define LJ_TARGET_MASKSHIFT 1 | 408 | #define LJ_TARGET_MASKSHIFT 1 |
283 | #define LJ_TARGET_MASKROT 1 | 409 | #define LJ_TARGET_MASKROT 1 |
284 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ | 410 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ |
285 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE | 411 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL |
286 | 412 | ||
287 | #if _MIPS_ARCH_MIPS32R2 | 413 | #if LJ_TARGET_MIPSR6 |
414 | #define LJ_ARCH_VERSION 60 | ||
415 | #elif _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2 | ||
288 | #define LJ_ARCH_VERSION 20 | 416 | #define LJ_ARCH_VERSION 20 |
289 | #else | 417 | #else |
290 | #define LJ_ARCH_VERSION 10 | 418 | #define LJ_ARCH_VERSION 10 |
@@ -294,9 +422,7 @@ | |||
294 | #error "No target architecture defined" | 422 | #error "No target architecture defined" |
295 | #endif | 423 | #endif |
296 | 424 | ||
297 | #ifndef LJ_PAGESIZE | 425 | /* -- Checks for requirements --------------------------------------------- */ |
298 | #define LJ_PAGESIZE 4096 | ||
299 | #endif | ||
300 | 426 | ||
301 | /* Check for minimum required compiler versions. */ | 427 | /* Check for minimum required compiler versions. */ |
302 | #if defined(__GNUC__) | 428 | #if defined(__GNUC__) |
@@ -312,6 +438,16 @@ | |||
312 | #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2) | 438 | #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2) |
313 | #error "Need at least GCC 4.2 or newer" | 439 | #error "Need at least GCC 4.2 or newer" |
314 | #endif | 440 | #endif |
441 | #elif LJ_TARGET_ARM64 | ||
442 | #if __clang__ | ||
443 | #if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)) && !defined(__NX_TOOLCHAIN_MAJOR__) | ||
444 | #error "Need at least Clang 3.5 or newer" | ||
445 | #endif | ||
446 | #else | ||
447 | #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8) | ||
448 | #error "Need at least GCC 4.8 or newer" | ||
449 | #endif | ||
450 | #endif | ||
315 | #elif !LJ_TARGET_PS3 | 451 | #elif !LJ_TARGET_PS3 |
316 | #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3) | 452 | #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3) |
317 | #error "Need at least GCC 4.3 or newer" | 453 | #error "Need at least GCC 4.3 or newer" |
@@ -335,26 +471,35 @@ | |||
335 | #if !(__ARM_EABI__ || LJ_TARGET_IOS) | 471 | #if !(__ARM_EABI__ || LJ_TARGET_IOS) |
336 | #error "Only ARM EABI or iOS 3.0+ ABI is supported" | 472 | #error "Only ARM EABI or iOS 3.0+ ABI is supported" |
337 | #endif | 473 | #endif |
338 | #elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE | 474 | #elif LJ_TARGET_ARM64 |
339 | #if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) | 475 | #if defined(_ILP32) |
340 | #error "No support for PowerPC CPUs without double-precision FPU" | 476 | #error "No support for ILP32 model on ARM64" |
341 | #endif | 477 | #endif |
478 | #elif LJ_TARGET_PPC | ||
342 | #if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN)) | 479 | #if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN)) |
343 | #error "No support for little-endian PowerPC" | 480 | #error "No support for little-endian PPC32" |
481 | #endif | ||
482 | #if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT) | ||
483 | #error "No support for PPC/e500 anymore (use LuaJIT 2.0)" | ||
344 | #endif | 484 | #endif |
345 | #if defined(_LP64) | 485 | #elif LJ_TARGET_MIPS32 |
346 | #error "No support for PowerPC 64 bit mode" | 486 | #if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32)) |
487 | #error "Only o32 ABI supported for MIPS32" | ||
347 | #endif | 488 | #endif |
348 | #elif LJ_TARGET_MIPS | 489 | #if LJ_TARGET_MIPSR6 |
349 | #if defined(__mips_soft_float) | 490 | /* Not that useful, since most available r6 CPUs are 64 bit. */ |
350 | #error "No support for MIPS CPUs without FPU" | 491 | #error "No support for MIPS32R6" |
351 | #endif | 492 | #endif |
352 | #if defined(_LP64) | 493 | #elif LJ_TARGET_MIPS64 |
353 | #error "No support for MIPS64" | 494 | #if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64)) |
495 | /* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */ | ||
496 | #error "Only n64 ABI supported for MIPS64" | ||
354 | #endif | 497 | #endif |
355 | #endif | 498 | #endif |
356 | #endif | 499 | #endif |
357 | 500 | ||
501 | /* -- Derived defines ----------------------------------------------------- */ | ||
502 | |||
358 | /* Enable or disable the dual-number mode for the VM. */ | 503 | /* Enable or disable the dual-number mode for the VM. */ |
359 | #if (LJ_ARCH_NUMMODE == LJ_NUMMODE_SINGLE && LUAJIT_NUMMODE == 2) || \ | 504 | #if (LJ_ARCH_NUMMODE == LJ_NUMMODE_SINGLE && LUAJIT_NUMMODE == 2) || \ |
360 | (LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL && LUAJIT_NUMMODE == 1) | 505 | (LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL && LUAJIT_NUMMODE == 1) |
@@ -376,6 +521,20 @@ | |||
376 | #endif | 521 | #endif |
377 | #endif | 522 | #endif |
378 | 523 | ||
524 | /* 64 bit GC references. */ | ||
525 | #if LJ_TARGET_GC64 | ||
526 | #define LJ_GC64 1 | ||
527 | #else | ||
528 | #define LJ_GC64 0 | ||
529 | #endif | ||
530 | |||
531 | /* 2-slot frame info. */ | ||
532 | #if LJ_GC64 | ||
533 | #define LJ_FR2 1 | ||
534 | #else | ||
535 | #define LJ_FR2 0 | ||
536 | #endif | ||
537 | |||
379 | /* Disable or enable the JIT compiler. */ | 538 | /* Disable or enable the JIT compiler. */ |
380 | #if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) | 539 | #if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) |
381 | #define LJ_HASJIT 0 | 540 | #define LJ_HASJIT 0 |
@@ -390,6 +549,28 @@ | |||
390 | #define LJ_HASFFI 1 | 549 | #define LJ_HASFFI 1 |
391 | #endif | 550 | #endif |
392 | 551 | ||
552 | /* Disable or enable the string buffer extension. */ | ||
553 | #if defined(LUAJIT_DISABLE_BUFFER) | ||
554 | #define LJ_HASBUFFER 0 | ||
555 | #else | ||
556 | #define LJ_HASBUFFER 1 | ||
557 | #endif | ||
558 | |||
559 | #if defined(LUAJIT_DISABLE_PROFILE) | ||
560 | #define LJ_HASPROFILE 0 | ||
561 | #elif LJ_TARGET_POSIX | ||
562 | #define LJ_HASPROFILE 1 | ||
563 | #define LJ_PROFILE_SIGPROF 1 | ||
564 | #elif LJ_TARGET_PS3 | ||
565 | #define LJ_HASPROFILE 1 | ||
566 | #define LJ_PROFILE_PTHREAD 1 | ||
567 | #elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOX360 | ||
568 | #define LJ_HASPROFILE 1 | ||
569 | #define LJ_PROFILE_WTHREAD 1 | ||
570 | #else | ||
571 | #define LJ_HASPROFILE 0 | ||
572 | #endif | ||
573 | |||
393 | #ifndef LJ_ARCH_HASFPU | 574 | #ifndef LJ_ARCH_HASFPU |
394 | #define LJ_ARCH_HASFPU 1 | 575 | #define LJ_ARCH_HASFPU 1 |
395 | #endif | 576 | #endif |
@@ -397,6 +578,7 @@ | |||
397 | #define LJ_ABI_SOFTFP 0 | 578 | #define LJ_ABI_SOFTFP 0 |
398 | #endif | 579 | #endif |
399 | #define LJ_SOFTFP (!LJ_ARCH_HASFPU) | 580 | #define LJ_SOFTFP (!LJ_ARCH_HASFPU) |
581 | #define LJ_SOFTFP32 (LJ_SOFTFP && LJ_32) | ||
400 | 582 | ||
401 | #if LJ_ARCH_ENDIAN == LUAJIT_BE | 583 | #if LJ_ARCH_ENDIAN == LUAJIT_BE |
402 | #define LJ_LE 0 | 584 | #define LJ_LE 0 |
@@ -422,26 +604,52 @@ | |||
422 | #define LJ_TARGET_UNALIGNED 0 | 604 | #define LJ_TARGET_UNALIGNED 0 |
423 | #endif | 605 | #endif |
424 | 606 | ||
425 | /* Various workarounds for embedded operating systems. */ | 607 | #ifndef LJ_PAGESIZE |
426 | #if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 | 608 | #define LJ_PAGESIZE 4096 |
427 | #define LUAJIT_NO_LOG2 | ||
428 | #endif | 609 | #endif |
429 | #if defined(__symbian__) | 610 | |
430 | #define LUAJIT_NO_EXP2 | 611 | /* Various workarounds for embedded operating systems or weak C runtimes. */ |
612 | #if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS | ||
613 | #define LUAJIT_NO_LOG2 | ||
431 | #endif | 614 | #endif |
432 | #if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0) | 615 | #if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0) |
433 | #define LJ_NO_SYSTEM 1 | 616 | #define LJ_NO_SYSTEM 1 |
434 | #endif | 617 | #endif |
435 | 618 | ||
436 | #if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__ | 619 | #if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN |
437 | /* NYI: no support for compact unwind specification, yet. */ | 620 | #define LJ_ABI_WIN 1 |
438 | #define LUAJIT_NO_UNWIND 1 | 621 | #else |
622 | #define LJ_ABI_WIN 0 | ||
439 | #endif | 623 | #endif |
440 | 624 | ||
441 | #if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 | 625 | #if LJ_TARGET_WINDOWS |
626 | #if LJ_TARGET_UWP | ||
627 | #define LJ_WIN_VALLOC VirtualAllocFromApp | ||
628 | #define LJ_WIN_VPROTECT VirtualProtectFromApp | ||
629 | extern void *LJ_WIN_LOADLIBA(const char *path); | ||
630 | #else | ||
631 | #define LJ_WIN_VALLOC VirtualAlloc | ||
632 | #define LJ_WIN_VPROTECT VirtualProtect | ||
633 | #define LJ_WIN_LOADLIBA(path) LoadLibraryExA((path), NULL, 0) | ||
634 | #endif | ||
635 | #endif | ||
636 | |||
637 | #if defined(LUAJIT_NO_UNWIND) || __GNU_COMPACT_EH__ || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 | ||
442 | #define LJ_NO_UNWIND 1 | 638 | #define LJ_NO_UNWIND 1 |
443 | #endif | 639 | #endif |
444 | 640 | ||
641 | #if !LJ_NO_UNWIND && !defined(LUAJIT_UNWIND_INTERNAL) && (LJ_ABI_WIN || (defined(LUAJIT_UNWIND_EXTERNAL) && (defined(__GNUC__) || defined(__clang__)))) | ||
642 | #define LJ_UNWIND_EXT 1 | ||
643 | #else | ||
644 | #define LJ_UNWIND_EXT 0 | ||
645 | #endif | ||
646 | |||
647 | #if LJ_UNWIND_EXT && LJ_HASJIT && !LJ_TARGET_ARM && !(LJ_ABI_WIN && LJ_TARGET_X86) | ||
648 | #define LJ_UNWIND_JIT 1 | ||
649 | #else | ||
650 | #define LJ_UNWIND_JIT 0 | ||
651 | #endif | ||
652 | |||
445 | /* Compatibility with Lua 5.1 vs. 5.2. */ | 653 | /* Compatibility with Lua 5.1 vs. 5.2. */ |
446 | #ifdef LUAJIT_ENABLE_LUA52COMPAT | 654 | #ifdef LUAJIT_ENABLE_LUA52COMPAT |
447 | #define LJ_52 1 | 655 | #define LJ_52 1 |
@@ -449,4 +657,46 @@ | |||
449 | #define LJ_52 0 | 657 | #define LJ_52 0 |
450 | #endif | 658 | #endif |
451 | 659 | ||
660 | /* -- VM security --------------------------------------------------------- */ | ||
661 | |||
662 | /* Don't make any changes here. Instead build with: | ||
663 | ** make "XCFLAGS=-DLUAJIT_SECURITY_flag=value" | ||
664 | ** | ||
665 | ** Important note to distro maintainers: DO NOT change the defaults for a | ||
666 | ** regular distro build -- neither upwards, nor downwards! | ||
667 | ** These build-time configurable security flags are intended for embedders | ||
668 | ** who may have specific needs wrt. security vs. performance. | ||
669 | */ | ||
670 | |||
671 | /* Security defaults. */ | ||
672 | #ifndef LUAJIT_SECURITY_PRNG | ||
673 | /* PRNG init: 0 = fixed/insecure, 1 = secure from OS. */ | ||
674 | #define LUAJIT_SECURITY_PRNG 1 | ||
675 | #endif | ||
676 | |||
677 | #ifndef LUAJIT_SECURITY_STRHASH | ||
678 | /* String hash: 0 = sparse only, 1 = sparse + dense. */ | ||
679 | #define LUAJIT_SECURITY_STRHASH 1 | ||
680 | #endif | ||
681 | |||
682 | #ifndef LUAJIT_SECURITY_STRID | ||
683 | /* String IDs: 0 = linear, 1 = reseed < 255, 2 = reseed < 15, 3 = random. */ | ||
684 | #define LUAJIT_SECURITY_STRID 1 | ||
685 | #endif | ||
686 | |||
687 | #ifndef LUAJIT_SECURITY_MCODE | ||
688 | /* Machine code page protection: 0 = insecure RWX, 1 = secure RW^X. */ | ||
689 | #define LUAJIT_SECURITY_MCODE 1 | ||
690 | #endif | ||
691 | |||
692 | #define LJ_SECURITY_MODE \ | ||
693 | ( 0u \ | ||
694 | | ((LUAJIT_SECURITY_PRNG & 3) << 0) \ | ||
695 | | ((LUAJIT_SECURITY_STRHASH & 3) << 2) \ | ||
696 | | ((LUAJIT_SECURITY_STRID & 3) << 4) \ | ||
697 | | ((LUAJIT_SECURITY_MCODE & 3) << 6) \ | ||
698 | ) | ||
699 | #define LJ_SECURITY_MODESTRING \ | ||
700 | "\004prng\007strhash\005strid\005mcode" | ||
701 | |||
452 | #endif | 702 | #endif |
diff --git a/src/lj_asm.c b/src/lj_asm.c index 9ff9215f..7abafbf4 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #if LJ_HASJIT | 11 | #if LJ_HASJIT |
12 | 12 | ||
13 | #include "lj_gc.h" | 13 | #include "lj_gc.h" |
14 | #include "lj_buf.h" | ||
14 | #include "lj_str.h" | 15 | #include "lj_str.h" |
15 | #include "lj_tab.h" | 16 | #include "lj_tab.h" |
16 | #include "lj_frame.h" | 17 | #include "lj_frame.h" |
@@ -71,6 +72,7 @@ typedef struct ASMState { | |||
71 | IRRef snaprename; /* Rename highwater mark for snapshot check. */ | 72 | IRRef snaprename; /* Rename highwater mark for snapshot check. */ |
72 | SnapNo snapno; /* Current snapshot number. */ | 73 | SnapNo snapno; /* Current snapshot number. */ |
73 | SnapNo loopsnapno; /* Loop snapshot number. */ | 74 | SnapNo loopsnapno; /* Loop snapshot number. */ |
75 | int snapalloc; /* Current snapshot needs allocation. */ | ||
74 | BloomFilter snapfilt1, snapfilt2; /* Filled with snapshot refs. */ | 76 | BloomFilter snapfilt1, snapfilt2; /* Filled with snapshot refs. */ |
75 | 77 | ||
76 | IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */ | 78 | IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */ |
@@ -85,18 +87,25 @@ typedef struct ASMState { | |||
85 | 87 | ||
86 | MCode *mcbot; /* Bottom of reserved MCode. */ | 88 | MCode *mcbot; /* Bottom of reserved MCode. */ |
87 | MCode *mctop; /* Top of generated MCode. */ | 89 | MCode *mctop; /* Top of generated MCode. */ |
90 | MCode *mctoporig; /* Original top of generated MCode. */ | ||
88 | MCode *mcloop; /* Pointer to loop MCode (or NULL). */ | 91 | MCode *mcloop; /* Pointer to loop MCode (or NULL). */ |
89 | MCode *invmcp; /* Points to invertible loop branch (or NULL). */ | 92 | MCode *invmcp; /* Points to invertible loop branch (or NULL). */ |
90 | MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ | 93 | MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ |
91 | MCode *realign; /* Realign loop if not NULL. */ | 94 | MCode *realign; /* Realign loop if not NULL. */ |
92 | 95 | ||
93 | #ifdef RID_NUM_KREF | 96 | #ifdef RID_NUM_KREF |
94 | int32_t krefk[RID_NUM_KREF]; | 97 | intptr_t krefk[RID_NUM_KREF]; |
95 | #endif | 98 | #endif |
96 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ | 99 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ |
97 | uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ | 100 | uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ |
98 | } ASMState; | 101 | } ASMState; |
99 | 102 | ||
103 | #ifdef LUA_USE_ASSERT | ||
104 | #define lj_assertA(c, ...) lj_assertG_(J2G(as->J), (c), __VA_ARGS__) | ||
105 | #else | ||
106 | #define lj_assertA(c, ...) ((void)as) | ||
107 | #endif | ||
108 | |||
100 | #define IR(ref) (&as->ir[(ref)]) | 109 | #define IR(ref) (&as->ir[(ref)]) |
101 | 110 | ||
102 | #define ASMREF_TMP1 REF_TRUE /* Temp. register. */ | 111 | #define ASMREF_TMP1 REF_TRUE /* Temp. register. */ |
@@ -128,9 +137,8 @@ static LJ_AINLINE void checkmclim(ASMState *as) | |||
128 | #ifdef LUA_USE_ASSERT | 137 | #ifdef LUA_USE_ASSERT |
129 | if (as->mcp + MCLIM_REDZONE < as->mcp_prev) { | 138 | if (as->mcp + MCLIM_REDZONE < as->mcp_prev) { |
130 | IRIns *ir = IR(as->curins+1); | 139 | IRIns *ir = IR(as->curins+1); |
131 | fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d %02d %04d %04d\n", as->mcp, | 140 | lj_assertA(0, "red zone overflow: %p IR %04d %02d %04d %04d\n", as->mcp, |
132 | as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); | 141 | as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); |
133 | lua_assert(0); | ||
134 | } | 142 | } |
135 | #endif | 143 | #endif |
136 | if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as); | 144 | if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as); |
@@ -144,7 +152,7 @@ static LJ_AINLINE void checkmclim(ASMState *as) | |||
144 | #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) | 152 | #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) |
145 | #define ra_krefk(as, ref) (as->krefk[(ref)]) | 153 | #define ra_krefk(as, ref) (as->krefk[(ref)]) |
146 | 154 | ||
147 | static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) | 155 | static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k) |
148 | { | 156 | { |
149 | IRRef ref = (IRRef)(r - RID_MIN_KREF); | 157 | IRRef ref = (IRRef)(r - RID_MIN_KREF); |
150 | as->krefk[ref] = k; | 158 | as->krefk[ref] = k; |
@@ -171,6 +179,8 @@ IRFLDEF(FLOFS) | |||
171 | #include "lj_emit_x86.h" | 179 | #include "lj_emit_x86.h" |
172 | #elif LJ_TARGET_ARM | 180 | #elif LJ_TARGET_ARM |
173 | #include "lj_emit_arm.h" | 181 | #include "lj_emit_arm.h" |
182 | #elif LJ_TARGET_ARM64 | ||
183 | #include "lj_emit_arm64.h" | ||
174 | #elif LJ_TARGET_PPC | 184 | #elif LJ_TARGET_PPC |
175 | #include "lj_emit_ppc.h" | 185 | #include "lj_emit_ppc.h" |
176 | #elif LJ_TARGET_MIPS | 186 | #elif LJ_TARGET_MIPS |
@@ -179,6 +189,12 @@ IRFLDEF(FLOFS) | |||
179 | #error "Missing instruction emitter for target CPU" | 189 | #error "Missing instruction emitter for target CPU" |
180 | #endif | 190 | #endif |
181 | 191 | ||
192 | /* Generic load/store of register from/to stack slot. */ | ||
193 | #define emit_spload(as, ir, r, ofs) \ | ||
194 | emit_loadofs(as, ir, (r), RID_SP, (ofs)) | ||
195 | #define emit_spstore(as, ir, r, ofs) \ | ||
196 | emit_storeofs(as, ir, (r), RID_SP, (ofs)) | ||
197 | |||
182 | /* -- Register allocator debugging ---------------------------------------- */ | 198 | /* -- Register allocator debugging ---------------------------------------- */ |
183 | 199 | ||
184 | /* #define LUAJIT_DEBUG_RA */ | 200 | /* #define LUAJIT_DEBUG_RA */ |
@@ -236,7 +252,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) | |||
236 | *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q; | 252 | *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q; |
237 | } else { | 253 | } else { |
238 | *p++ = '?'; | 254 | *p++ = '?'; |
239 | lua_assert(0); | 255 | lj_assertA(0, "bad register %d for debug format \"%s\"", r, fmt); |
240 | } | 256 | } |
241 | } else if (e[1] == 'f' || e[1] == 'i') { | 257 | } else if (e[1] == 'f' || e[1] == 'i') { |
242 | IRRef ref; | 258 | IRRef ref; |
@@ -254,7 +270,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) | |||
254 | } else if (e[1] == 'x') { | 270 | } else if (e[1] == 'x') { |
255 | p += sprintf(p, "%08x", va_arg(argp, int32_t)); | 271 | p += sprintf(p, "%08x", va_arg(argp, int32_t)); |
256 | } else { | 272 | } else { |
257 | lua_assert(0); | 273 | lj_assertA(0, "bad debug format code"); |
258 | } | 274 | } |
259 | fmt = e+2; | 275 | fmt = e+2; |
260 | } | 276 | } |
@@ -313,37 +329,51 @@ static Reg ra_rematk(ASMState *as, IRRef ref) | |||
313 | Reg r; | 329 | Reg r; |
314 | if (ra_iskref(ref)) { | 330 | if (ra_iskref(ref)) { |
315 | r = ra_krefreg(ref); | 331 | r = ra_krefreg(ref); |
316 | lua_assert(!rset_test(as->freeset, r)); | 332 | lj_assertA(!rset_test(as->freeset, r), "rematk of free reg %d", r); |
317 | ra_free(as, r); | 333 | ra_free(as, r); |
318 | ra_modified(as, r); | 334 | ra_modified(as, r); |
335 | #if LJ_64 | ||
336 | emit_loadu64(as, r, ra_krefk(as, ref)); | ||
337 | #else | ||
319 | emit_loadi(as, r, ra_krefk(as, ref)); | 338 | emit_loadi(as, r, ra_krefk(as, ref)); |
339 | #endif | ||
320 | return r; | 340 | return r; |
321 | } | 341 | } |
322 | ir = IR(ref); | 342 | ir = IR(ref); |
323 | r = ir->r; | 343 | r = ir->r; |
324 | lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); | 344 | lj_assertA(ra_hasreg(r), "rematk of K%03d has no reg", REF_BIAS - ref); |
345 | lj_assertA(!ra_hasspill(ir->s), | ||
346 | "rematk of K%03d has spill slot [%x]", REF_BIAS - ref, ir->s); | ||
325 | ra_free(as, r); | 347 | ra_free(as, r); |
326 | ra_modified(as, r); | 348 | ra_modified(as, r); |
327 | ir->r = RID_INIT; /* Do not keep any hint. */ | 349 | ir->r = RID_INIT; /* Do not keep any hint. */ |
328 | RA_DBGX((as, "remat $i $r", ir, r)); | 350 | RA_DBGX((as, "remat $i $r", ir, r)); |
329 | #if !LJ_SOFTFP | 351 | #if !LJ_SOFTFP32 |
330 | if (ir->o == IR_KNUM) { | 352 | if (ir->o == IR_KNUM) { |
331 | emit_loadn(as, r, ir_knum(ir)); | 353 | emit_loadk64(as, r, ir); |
332 | } else | 354 | } else |
333 | #endif | 355 | #endif |
334 | if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { | 356 | if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { |
335 | ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ | 357 | ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ |
336 | emit_getgl(as, r, jit_base); | 358 | emit_getgl(as, r, jit_base); |
337 | } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { | 359 | } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { |
338 | lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ | 360 | /* REF_NIL stores ASMREF_L register. */ |
339 | emit_getgl(as, r, jit_L); | 361 | lj_assertA(irt_isnil(ir->t), "rematk of bad ASMREF_L"); |
362 | emit_getgl(as, r, cur_L); | ||
340 | #if LJ_64 | 363 | #if LJ_64 |
341 | } else if (ir->o == IR_KINT64) { | 364 | } else if (ir->o == IR_KINT64) { |
342 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 365 | emit_loadu64(as, r, ir_kint64(ir)->u64); |
366 | #if LJ_GC64 | ||
367 | } else if (ir->o == IR_KGC) { | ||
368 | emit_loadu64(as, r, (uintptr_t)ir_kgc(ir)); | ||
369 | } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
370 | emit_loadu64(as, r, (uintptr_t)ir_kptr(ir)); | ||
371 | #endif | ||
343 | #endif | 372 | #endif |
344 | } else { | 373 | } else { |
345 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | 374 | lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC || |
346 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); | 375 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL, |
376 | "rematk of bad IR op %d", ir->o); | ||
347 | emit_loadi(as, r, ir->i); | 377 | emit_loadi(as, r, ir->i); |
348 | } | 378 | } |
349 | return r; | 379 | return r; |
@@ -353,7 +383,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref) | |||
353 | static int32_t ra_spill(ASMState *as, IRIns *ir) | 383 | static int32_t ra_spill(ASMState *as, IRIns *ir) |
354 | { | 384 | { |
355 | int32_t slot = ir->s; | 385 | int32_t slot = ir->s; |
356 | lua_assert(ir >= as->ir + REF_TRUE); | 386 | lj_assertA(ir >= as->ir + REF_TRUE, |
387 | "spill of K%03d", REF_BIAS - (int)(ir - as->ir)); | ||
357 | if (!ra_hasspill(slot)) { | 388 | if (!ra_hasspill(slot)) { |
358 | if (irt_is64(ir->t)) { | 389 | if (irt_is64(ir->t)) { |
359 | slot = as->evenspill; | 390 | slot = as->evenspill; |
@@ -378,7 +409,9 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref) | |||
378 | { | 409 | { |
379 | IRIns *ir = IR(ref); | 410 | IRIns *ir = IR(ref); |
380 | Reg r = ir->r; | 411 | Reg r = ir->r; |
381 | lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); | 412 | lj_assertA(ra_hasreg(r), "release of TMP%d has no reg", ref-ASMREF_TMP1+1); |
413 | lj_assertA(!ra_hasspill(ir->s), | ||
414 | "release of TMP%d has spill slot [%x]", ref-ASMREF_TMP1+1, ir->s); | ||
382 | ra_free(as, r); | 415 | ra_free(as, r); |
383 | ra_modified(as, r); | 416 | ra_modified(as, r); |
384 | ir->r = RID_INIT; | 417 | ir->r = RID_INIT; |
@@ -394,7 +427,7 @@ static Reg ra_restore(ASMState *as, IRRef ref) | |||
394 | IRIns *ir = IR(ref); | 427 | IRIns *ir = IR(ref); |
395 | int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ | 428 | int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ |
396 | Reg r = ir->r; | 429 | Reg r = ir->r; |
397 | lua_assert(ra_hasreg(r)); | 430 | lj_assertA(ra_hasreg(r), "restore of IR %04d has no reg", ref - REF_BIAS); |
398 | ra_sethint(ir->r, r); /* Keep hint. */ | 431 | ra_sethint(ir->r, r); /* Keep hint. */ |
399 | ra_free(as, r); | 432 | ra_free(as, r); |
400 | if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ | 433 | if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ |
@@ -423,14 +456,15 @@ static Reg ra_evict(ASMState *as, RegSet allow) | |||
423 | { | 456 | { |
424 | IRRef ref; | 457 | IRRef ref; |
425 | RegCost cost = ~(RegCost)0; | 458 | RegCost cost = ~(RegCost)0; |
426 | lua_assert(allow != RSET_EMPTY); | 459 | lj_assertA(allow != RSET_EMPTY, "evict from empty set"); |
427 | if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) { | 460 | if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) { |
428 | GPRDEF(MINCOST) | 461 | GPRDEF(MINCOST) |
429 | } else { | 462 | } else { |
430 | FPRDEF(MINCOST) | 463 | FPRDEF(MINCOST) |
431 | } | 464 | } |
432 | ref = regcost_ref(cost); | 465 | ref = regcost_ref(cost); |
433 | lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins)); | 466 | lj_assertA(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins), |
467 | "evict of out-of-range IR %04d", ref - REF_BIAS); | ||
434 | /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ | 468 | /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ |
435 | if (!irref_isk(ref) && (as->weakset & allow)) { | 469 | if (!irref_isk(ref) && (as->weakset & allow)) { |
436 | IRIns *ir = IR(ref); | 470 | IRIns *ir = IR(ref); |
@@ -512,7 +546,7 @@ static void ra_evictk(ASMState *as) | |||
512 | 546 | ||
513 | #ifdef RID_NUM_KREF | 547 | #ifdef RID_NUM_KREF |
514 | /* Allocate a register for a constant. */ | 548 | /* Allocate a register for a constant. */ |
515 | static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | 549 | static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow) |
516 | { | 550 | { |
517 | /* First try to find a register which already holds the same constant. */ | 551 | /* First try to find a register which already holds the same constant. */ |
518 | RegSet pick, work = ~as->freeset & RSET_GPR; | 552 | RegSet pick, work = ~as->freeset & RSET_GPR; |
@@ -521,9 +555,31 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | |||
521 | IRRef ref; | 555 | IRRef ref; |
522 | r = rset_pickbot(work); | 556 | r = rset_pickbot(work); |
523 | ref = regcost_ref(as->cost[r]); | 557 | ref = regcost_ref(as->cost[r]); |
558 | #if LJ_64 | ||
559 | if (ref < ASMREF_L) { | ||
560 | if (ra_iskref(ref)) { | ||
561 | if (k == ra_krefk(as, ref)) | ||
562 | return r; | ||
563 | } else { | ||
564 | IRIns *ir = IR(ref); | ||
565 | if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || | ||
566 | #if LJ_GC64 | ||
567 | (ir->o == IR_KINT && k == ir->i) || | ||
568 | (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || | ||
569 | ((ir->o == IR_KPTR || ir->o == IR_KKPTR) && | ||
570 | k == (intptr_t)ir_kptr(ir)) | ||
571 | #else | ||
572 | (ir->o != IR_KINT64 && k == ir->i) | ||
573 | #endif | ||
574 | ) | ||
575 | return r; | ||
576 | } | ||
577 | } | ||
578 | #else | ||
524 | if (ref < ASMREF_L && | 579 | if (ref < ASMREF_L && |
525 | k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) | 580 | k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) |
526 | return r; | 581 | return r; |
582 | #endif | ||
527 | rset_clear(work, r); | 583 | rset_clear(work, r); |
528 | } | 584 | } |
529 | pick = as->freeset & allow; | 585 | pick = as->freeset & allow; |
@@ -543,7 +599,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | |||
543 | } | 599 | } |
544 | 600 | ||
545 | /* Allocate a specific register for a constant. */ | 601 | /* Allocate a specific register for a constant. */ |
546 | static void ra_allockreg(ASMState *as, int32_t k, Reg r) | 602 | static void ra_allockreg(ASMState *as, intptr_t k, Reg r) |
547 | { | 603 | { |
548 | Reg kr = ra_allock(as, k, RID2RSET(r)); | 604 | Reg kr = ra_allock(as, k, RID2RSET(r)); |
549 | if (kr != r) { | 605 | if (kr != r) { |
@@ -566,7 +622,8 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow) | |||
566 | IRIns *ir = IR(ref); | 622 | IRIns *ir = IR(ref); |
567 | RegSet pick = as->freeset & allow; | 623 | RegSet pick = as->freeset & allow; |
568 | Reg r; | 624 | Reg r; |
569 | lua_assert(ra_noreg(ir->r)); | 625 | lj_assertA(ra_noreg(ir->r), |
626 | "IR %04d already has reg %d", ref - REF_BIAS, ir->r); | ||
570 | if (pick) { | 627 | if (pick) { |
571 | /* First check register hint from propagation or PHI. */ | 628 | /* First check register hint from propagation or PHI. */ |
572 | if (ra_hashint(ir->r)) { | 629 | if (ra_hashint(ir->r)) { |
@@ -613,15 +670,27 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow) | |||
613 | return r; | 670 | return r; |
614 | } | 671 | } |
615 | 672 | ||
673 | /* Add a register rename to the IR. */ | ||
674 | static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno) | ||
675 | { | ||
676 | IRRef ren; | ||
677 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno); | ||
678 | ren = tref_ref(lj_ir_emit(as->J)); | ||
679 | as->J->cur.ir[ren].r = (uint8_t)down; | ||
680 | as->J->cur.ir[ren].s = SPS_NONE; | ||
681 | } | ||
682 | |||
616 | /* Rename register allocation and emit move. */ | 683 | /* Rename register allocation and emit move. */ |
617 | static void ra_rename(ASMState *as, Reg down, Reg up) | 684 | static void ra_rename(ASMState *as, Reg down, Reg up) |
618 | { | 685 | { |
619 | IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); | 686 | IRRef ref = regcost_ref(as->cost[up] = as->cost[down]); |
620 | IRIns *ir = IR(ref); | 687 | IRIns *ir = IR(ref); |
621 | ir->r = (uint8_t)up; | 688 | ir->r = (uint8_t)up; |
622 | as->cost[down] = 0; | 689 | as->cost[down] = 0; |
623 | lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); | 690 | lj_assertA((down < RID_MAX_GPR) == (up < RID_MAX_GPR), |
624 | lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); | 691 | "rename between GPR/FPR %d and %d", down, up); |
692 | lj_assertA(!rset_test(as->freeset, down), "rename from free reg %d", down); | ||
693 | lj_assertA(rset_test(as->freeset, up), "rename to non-free reg %d", up); | ||
625 | ra_free(as, down); /* 'down' is free ... */ | 694 | ra_free(as, down); /* 'down' is free ... */ |
626 | ra_modified(as, down); | 695 | ra_modified(as, down); |
627 | rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ | 696 | rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ |
@@ -629,11 +698,14 @@ static void ra_rename(ASMState *as, Reg down, Reg up) | |||
629 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); | 698 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); |
630 | emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ | 699 | emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ |
631 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ | 700 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ |
632 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); | 701 | /* |
633 | ren = tref_ref(lj_ir_emit(as->J)); | 702 | ** The rename is effective at the subsequent (already emitted) exit |
634 | as->ir = as->T->ir; /* The IR may have been reallocated. */ | 703 | ** branch. This is for the current snapshot (as->snapno). Except if we |
635 | IR(ren)->r = (uint8_t)down; | 704 | ** haven't yet allocated any refs for the snapshot (as->snapalloc == 1), |
636 | IR(ren)->s = SPS_NONE; | 705 | ** then it belongs to the next snapshot. |
706 | ** See also the discussion at asm_snap_checkrename(). | ||
707 | */ | ||
708 | ra_addrename(as, down, ref, as->snapno + as->snapalloc); | ||
637 | } | 709 | } |
638 | } | 710 | } |
639 | 711 | ||
@@ -666,7 +738,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r) | |||
666 | { | 738 | { |
667 | Reg dest = ra_dest(as, ir, RID2RSET(r)); | 739 | Reg dest = ra_dest(as, ir, RID2RSET(r)); |
668 | if (dest != r) { | 740 | if (dest != r) { |
669 | lua_assert(rset_test(as->freeset, r)); | 741 | lj_assertA(rset_test(as->freeset, r), "dest reg %d is not free", r); |
670 | ra_modified(as, r); | 742 | ra_modified(as, r); |
671 | emit_movrr(as, ir, dest, r); | 743 | emit_movrr(as, ir, dest, r); |
672 | } | 744 | } |
@@ -683,20 +755,25 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) | |||
683 | if (ra_noreg(left)) { | 755 | if (ra_noreg(left)) { |
684 | if (irref_isk(lref)) { | 756 | if (irref_isk(lref)) { |
685 | if (ir->o == IR_KNUM) { | 757 | if (ir->o == IR_KNUM) { |
686 | cTValue *tv = ir_knum(ir); | ||
687 | /* FP remat needs a load except for +0. Still better than eviction. */ | 758 | /* FP remat needs a load except for +0. Still better than eviction. */ |
688 | if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { | 759 | if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) { |
689 | emit_loadn(as, dest, tv); | 760 | emit_loadk64(as, dest, ir); |
690 | return; | 761 | return; |
691 | } | 762 | } |
692 | #if LJ_64 | 763 | #if LJ_64 |
693 | } else if (ir->o == IR_KINT64) { | 764 | } else if (ir->o == IR_KINT64) { |
694 | emit_loadu64(as, dest, ir_kint64(ir)->u64); | 765 | emit_loadk64(as, dest, ir); |
766 | return; | ||
767 | #if LJ_GC64 | ||
768 | } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
769 | emit_loadk64(as, dest, ir); | ||
695 | return; | 770 | return; |
696 | #endif | 771 | #endif |
697 | } else { | 772 | #endif |
698 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | 773 | } else if (ir->o != IR_KPRI) { |
699 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); | 774 | lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC || |
775 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL, | ||
776 | "K%03d has bad IR op %d", REF_BIAS - lref, ir->o); | ||
700 | emit_loadi(as, dest, ir->i); | 777 | emit_loadi(as, dest, ir->i); |
701 | return; | 778 | return; |
702 | } | 779 | } |
@@ -741,11 +818,11 @@ static void ra_leftov(ASMState *as, Reg dest, IRRef lref) | |||
741 | } | 818 | } |
742 | #endif | 819 | #endif |
743 | 820 | ||
744 | #if !LJ_64 | ||
745 | /* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */ | 821 | /* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */ |
746 | static void ra_destpair(ASMState *as, IRIns *ir) | 822 | static void ra_destpair(ASMState *as, IRIns *ir) |
747 | { | 823 | { |
748 | Reg destlo = ir->r, desthi = (ir+1)->r; | 824 | Reg destlo = ir->r, desthi = (ir+1)->r; |
825 | IRIns *irx = (LJ_64 && !irt_is64(ir->t)) ? ir+1 : ir; | ||
749 | /* First spill unrelated refs blocking the destination registers. */ | 826 | /* First spill unrelated refs blocking the destination registers. */ |
750 | if (!rset_test(as->freeset, RID_RETLO) && | 827 | if (!rset_test(as->freeset, RID_RETLO) && |
751 | destlo != RID_RETLO && desthi != RID_RETLO) | 828 | destlo != RID_RETLO && desthi != RID_RETLO) |
@@ -769,29 +846,28 @@ static void ra_destpair(ASMState *as, IRIns *ir) | |||
769 | /* Check for conflicts and shuffle the registers as needed. */ | 846 | /* Check for conflicts and shuffle the registers as needed. */ |
770 | if (destlo == RID_RETHI) { | 847 | if (destlo == RID_RETHI) { |
771 | if (desthi == RID_RETLO) { | 848 | if (desthi == RID_RETLO) { |
772 | #if LJ_TARGET_X86 | 849 | #if LJ_TARGET_X86ORX64 |
773 | *--as->mcp = XI_XCHGa + RID_RETHI; | 850 | *--as->mcp = REX_64IR(irx, XI_XCHGa + RID_RETHI); |
774 | #else | 851 | #else |
775 | emit_movrr(as, ir, RID_RETHI, RID_TMP); | 852 | emit_movrr(as, irx, RID_RETHI, RID_TMP); |
776 | emit_movrr(as, ir, RID_RETLO, RID_RETHI); | 853 | emit_movrr(as, irx, RID_RETLO, RID_RETHI); |
777 | emit_movrr(as, ir, RID_TMP, RID_RETLO); | 854 | emit_movrr(as, irx, RID_TMP, RID_RETLO); |
778 | #endif | 855 | #endif |
779 | } else { | 856 | } else { |
780 | emit_movrr(as, ir, RID_RETHI, RID_RETLO); | 857 | emit_movrr(as, irx, RID_RETHI, RID_RETLO); |
781 | if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); | 858 | if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI); |
782 | } | 859 | } |
783 | } else if (desthi == RID_RETLO) { | 860 | } else if (desthi == RID_RETLO) { |
784 | emit_movrr(as, ir, RID_RETLO, RID_RETHI); | 861 | emit_movrr(as, irx, RID_RETLO, RID_RETHI); |
785 | if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); | 862 | if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO); |
786 | } else { | 863 | } else { |
787 | if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); | 864 | if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI); |
788 | if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); | 865 | if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO); |
789 | } | 866 | } |
790 | /* Restore spill slots (if any). */ | 867 | /* Restore spill slots (if any). */ |
791 | if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI); | 868 | if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI); |
792 | if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO); | 869 | if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO); |
793 | } | 870 | } |
794 | #endif | ||
795 | 871 | ||
796 | /* -- Snapshot handling --------- ----------------------------------------- */ | 872 | /* -- Snapshot handling --------- ----------------------------------------- */ |
797 | 873 | ||
@@ -841,11 +917,14 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref) | |||
841 | #endif | 917 | #endif |
842 | { /* Allocate stored values for TNEW, TDUP and CNEW. */ | 918 | { /* Allocate stored values for TNEW, TDUP and CNEW. */ |
843 | IRIns *irs; | 919 | IRIns *irs; |
844 | lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW); | 920 | lj_assertA(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW, |
921 | "sink of IR %04d has bad op %d", ref - REF_BIAS, ir->o); | ||
845 | for (irs = IR(as->snapref-1); irs > ir; irs--) | 922 | for (irs = IR(as->snapref-1); irs > ir; irs--) |
846 | if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) { | 923 | if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) { |
847 | lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || | 924 | lj_assertA(irs->o == IR_ASTORE || irs->o == IR_HSTORE || |
848 | irs->o == IR_FSTORE || irs->o == IR_XSTORE); | 925 | irs->o == IR_FSTORE || irs->o == IR_XSTORE, |
926 | "sunk store IR %04d has bad op %d", | ||
927 | (int)(irs - as->ir) - REF_BIAS, irs->o); | ||
849 | asm_snap_alloc1(as, irs->op2); | 928 | asm_snap_alloc1(as, irs->op2); |
850 | if (LJ_32 && (irs+1)->o == IR_HIOP) | 929 | if (LJ_32 && (irs+1)->o == IR_HIOP) |
851 | asm_snap_alloc1(as, (irs+1)->op2); | 930 | asm_snap_alloc1(as, (irs+1)->op2); |
@@ -881,9 +960,9 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref) | |||
881 | } | 960 | } |
882 | 961 | ||
883 | /* Allocate refs escaping to a snapshot. */ | 962 | /* Allocate refs escaping to a snapshot. */ |
884 | static void asm_snap_alloc(ASMState *as) | 963 | static void asm_snap_alloc(ASMState *as, int snapno) |
885 | { | 964 | { |
886 | SnapShot *snap = &as->T->snap[as->snapno]; | 965 | SnapShot *snap = &as->T->snap[snapno]; |
887 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 966 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
888 | MSize n, nent = snap->nent; | 967 | MSize n, nent = snap->nent; |
889 | as->snapfilt1 = as->snapfilt2 = 0; | 968 | as->snapfilt1 = as->snapfilt2 = 0; |
@@ -893,7 +972,9 @@ static void asm_snap_alloc(ASMState *as) | |||
893 | if (!irref_isk(ref)) { | 972 | if (!irref_isk(ref)) { |
894 | asm_snap_alloc1(as, ref); | 973 | asm_snap_alloc1(as, ref); |
895 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { | 974 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { |
896 | lua_assert(irt_type(IR(ref+1)->t) == IRT_SOFTFP); | 975 | lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP, |
976 | "snap %d[%d] points to bad SOFTFP IR %04d", | ||
977 | snapno, n, ref - REF_BIAS); | ||
897 | asm_snap_alloc1(as, ref+1); | 978 | asm_snap_alloc1(as, ref+1); |
898 | } | 979 | } |
899 | } | 980 | } |
@@ -919,67 +1000,55 @@ static int asm_snap_checkrename(ASMState *as, IRRef ren) | |||
919 | return 0; /* Not found. */ | 1000 | return 0; /* Not found. */ |
920 | } | 1001 | } |
921 | 1002 | ||
922 | /* Prepare snapshot for next guard instruction. */ | 1003 | /* Prepare snapshot for next guard or throwing instruction. */ |
923 | static void asm_snap_prep(ASMState *as) | 1004 | static void asm_snap_prep(ASMState *as) |
924 | { | 1005 | { |
925 | if (as->curins < as->snapref) { | 1006 | if (as->snapalloc) { |
926 | do { | 1007 | /* Alloc on first invocation for each snapshot. */ |
927 | if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */ | 1008 | as->snapalloc = 0; |
928 | as->snapno--; | 1009 | asm_snap_alloc(as, as->snapno); |
929 | as->snapref = as->T->snap[as->snapno].ref; | ||
930 | } while (as->curins < as->snapref); | ||
931 | asm_snap_alloc(as); | ||
932 | as->snaprename = as->T->nins; | 1010 | as->snaprename = as->T->nins; |
933 | } else { | 1011 | } else { |
934 | /* Process any renames above the highwater mark. */ | 1012 | /* Check any renames above the highwater mark. */ |
935 | for (; as->snaprename < as->T->nins; as->snaprename++) { | 1013 | for (; as->snaprename < as->T->nins; as->snaprename++) { |
936 | IRIns *ir = IR(as->snaprename); | 1014 | IRIns *ir = &as->T->ir[as->snaprename]; |
937 | if (asm_snap_checkrename(as, ir->op1)) | 1015 | if (asm_snap_checkrename(as, ir->op1)) |
938 | ir->op2 = REF_BIAS-1; /* Kill rename. */ | 1016 | ir->op2 = REF_BIAS-1; /* Kill rename. */ |
939 | } | 1017 | } |
940 | } | 1018 | } |
941 | } | 1019 | } |
942 | 1020 | ||
943 | /* -- Miscellaneous helpers ----------------------------------------------- */ | 1021 | /* Move to previous snapshot when we cross the current snapshot ref. */ |
944 | 1022 | static void asm_snap_prev(ASMState *as) | |
945 | /* Collect arguments from CALL* and CARG instructions. */ | ||
946 | static void asm_collectargs(ASMState *as, IRIns *ir, | ||
947 | const CCallInfo *ci, IRRef *args) | ||
948 | { | 1023 | { |
949 | uint32_t n = CCI_NARGS(ci); | 1024 | if (as->curins < as->snapref) { |
950 | lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ | 1025 | uintptr_t ofs = (uintptr_t)(as->mctoporig - as->mcp); |
951 | if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } | 1026 | if (ofs >= 0x10000) lj_trace_err(as->J, LJ_TRERR_MCODEOV); |
952 | while (n-- > 1) { | 1027 | do { |
953 | ir = IR(ir->op1); | 1028 | if (as->snapno == 0) return; |
954 | lua_assert(ir->o == IR_CARG); | 1029 | as->snapno--; |
955 | args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; | 1030 | as->snapref = as->T->snap[as->snapno].ref; |
1031 | as->T->snap[as->snapno].mcofs = (uint16_t)ofs; /* Remember mcode ofs. */ | ||
1032 | } while (as->curins < as->snapref); /* May have no ins inbetween. */ | ||
1033 | as->snapalloc = 1; | ||
956 | } | 1034 | } |
957 | args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; | ||
958 | lua_assert(IR(ir->op1)->o != IR_CARG); | ||
959 | } | 1035 | } |
960 | 1036 | ||
961 | /* Reconstruct CCallInfo flags for CALLX*. */ | 1037 | /* Fixup snapshot mcode offsetst. */ |
962 | static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) | 1038 | static void asm_snap_fixup_mcofs(ASMState *as) |
963 | { | 1039 | { |
964 | uint32_t nargs = 0; | 1040 | uint32_t sz = (uint32_t)(as->mctoporig - as->mcp); |
965 | if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ | 1041 | SnapShot *snap = as->T->snap; |
966 | IRIns *ira = IR(ir->op1); | 1042 | SnapNo i; |
967 | nargs++; | 1043 | for (i = as->T->nsnap-1; i > 0; i--) { |
968 | while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } | 1044 | /* Compute offset from mcode start and store in correct snapshot. */ |
1045 | snap[i].mcofs = (uint16_t)(sz - snap[i-1].mcofs); | ||
969 | } | 1046 | } |
970 | #if LJ_HASFFI | 1047 | snap[0].mcofs = 0; |
971 | if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ | ||
972 | CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; | ||
973 | CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); | ||
974 | nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); | ||
975 | #if LJ_TARGET_X86 | ||
976 | nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); | ||
977 | #endif | ||
978 | } | ||
979 | #endif | ||
980 | return (nargs | (ir->t.irt << CCI_OTSHIFT)); | ||
981 | } | 1048 | } |
982 | 1049 | ||
1050 | /* -- Miscellaneous helpers ----------------------------------------------- */ | ||
1051 | |||
983 | /* Calculate stack adjustment. */ | 1052 | /* Calculate stack adjustment. */ |
984 | static int32_t asm_stack_adjust(ASMState *as) | 1053 | static int32_t asm_stack_adjust(ASMState *as) |
985 | { | 1054 | { |
@@ -989,21 +1058,26 @@ static int32_t asm_stack_adjust(ASMState *as) | |||
989 | } | 1058 | } |
990 | 1059 | ||
991 | /* Must match with hash*() in lj_tab.c. */ | 1060 | /* Must match with hash*() in lj_tab.c. */ |
992 | static uint32_t ir_khash(IRIns *ir) | 1061 | static uint32_t ir_khash(ASMState *as, IRIns *ir) |
993 | { | 1062 | { |
994 | uint32_t lo, hi; | 1063 | uint32_t lo, hi; |
1064 | UNUSED(as); | ||
995 | if (irt_isstr(ir->t)) { | 1065 | if (irt_isstr(ir->t)) { |
996 | return ir_kstr(ir)->hash; | 1066 | return ir_kstr(ir)->sid; |
997 | } else if (irt_isnum(ir->t)) { | 1067 | } else if (irt_isnum(ir->t)) { |
998 | lo = ir_knum(ir)->u32.lo; | 1068 | lo = ir_knum(ir)->u32.lo; |
999 | hi = ir_knum(ir)->u32.hi << 1; | 1069 | hi = ir_knum(ir)->u32.hi << 1; |
1000 | } else if (irt_ispri(ir->t)) { | 1070 | } else if (irt_ispri(ir->t)) { |
1001 | lua_assert(!irt_isnil(ir->t)); | 1071 | lj_assertA(!irt_isnil(ir->t), "hash of nil key"); |
1002 | return irt_type(ir->t)-IRT_FALSE; | 1072 | return irt_type(ir->t)-IRT_FALSE; |
1003 | } else { | 1073 | } else { |
1004 | lua_assert(irt_isgcv(ir->t)); | 1074 | lj_assertA(irt_isgcv(ir->t), "hash of bad IR type %d", irt_type(ir->t)); |
1005 | lo = u32ptr(ir_kgc(ir)); | 1075 | lo = u32ptr(ir_kgc(ir)); |
1076 | #if LJ_GC64 | ||
1077 | hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15); | ||
1078 | #else | ||
1006 | hi = lo + HASH_BIAS; | 1079 | hi = lo + HASH_BIAS; |
1080 | #endif | ||
1007 | } | 1081 | } |
1008 | return hashrot(lo, hi); | 1082 | return hashrot(lo, hi); |
1009 | } | 1083 | } |
@@ -1017,6 +1091,7 @@ static void asm_snew(ASMState *as, IRIns *ir) | |||
1017 | { | 1091 | { |
1018 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new]; | 1092 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new]; |
1019 | IRRef args[3]; | 1093 | IRRef args[3]; |
1094 | asm_snap_prep(as); | ||
1020 | args[0] = ASMREF_L; /* lua_State *L */ | 1095 | args[0] = ASMREF_L; /* lua_State *L */ |
1021 | args[1] = ir->op1; /* const char *str */ | 1096 | args[1] = ir->op1; /* const char *str */ |
1022 | args[2] = ir->op2; /* size_t len */ | 1097 | args[2] = ir->op2; /* size_t len */ |
@@ -1029,6 +1104,7 @@ static void asm_tnew(ASMState *as, IRIns *ir) | |||
1029 | { | 1104 | { |
1030 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1]; | 1105 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1]; |
1031 | IRRef args[2]; | 1106 | IRRef args[2]; |
1107 | asm_snap_prep(as); | ||
1032 | args[0] = ASMREF_L; /* lua_State *L */ | 1108 | args[0] = ASMREF_L; /* lua_State *L */ |
1033 | args[1] = ASMREF_TMP1; /* uint32_t ahsize */ | 1109 | args[1] = ASMREF_TMP1; /* uint32_t ahsize */ |
1034 | as->gcsteps++; | 1110 | as->gcsteps++; |
@@ -1041,6 +1117,7 @@ static void asm_tdup(ASMState *as, IRIns *ir) | |||
1041 | { | 1117 | { |
1042 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup]; | 1118 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup]; |
1043 | IRRef args[2]; | 1119 | IRRef args[2]; |
1120 | asm_snap_prep(as); | ||
1044 | args[0] = ASMREF_L; /* lua_State *L */ | 1121 | args[0] = ASMREF_L; /* lua_State *L */ |
1045 | args[1] = ir->op1; /* const GCtab *kt */ | 1122 | args[1] = ir->op1; /* const GCtab *kt */ |
1046 | as->gcsteps++; | 1123 | as->gcsteps++; |
@@ -1064,6 +1141,260 @@ static void asm_gcstep(ASMState *as, IRIns *ir) | |||
1064 | as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ | 1141 | as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ |
1065 | } | 1142 | } |
1066 | 1143 | ||
1144 | /* -- Buffer operations --------------------------------------------------- */ | ||
1145 | |||
1146 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode); | ||
1147 | #if LJ_HASBUFFER | ||
1148 | static void asm_bufhdr_write(ASMState *as, Reg sb); | ||
1149 | #endif | ||
1150 | |||
1151 | static void asm_bufhdr(ASMState *as, IRIns *ir) | ||
1152 | { | ||
1153 | Reg sb = ra_dest(as, ir, RSET_GPR); | ||
1154 | switch (ir->op2) { | ||
1155 | case IRBUFHDR_RESET: { | ||
1156 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); | ||
1157 | IRIns irbp; | ||
1158 | irbp.ot = IRT(0, IRT_PTR); /* Buffer data pointer type. */ | ||
1159 | emit_storeofs(as, &irbp, tmp, sb, offsetof(SBuf, w)); | ||
1160 | emit_loadofs(as, &irbp, tmp, sb, offsetof(SBuf, b)); | ||
1161 | break; | ||
1162 | } | ||
1163 | case IRBUFHDR_APPEND: { | ||
1164 | /* Rematerialize const buffer pointer instead of likely spill. */ | ||
1165 | IRIns *irp = IR(ir->op1); | ||
1166 | if (!(ra_hasreg(irp->r) || irp == ir-1 || | ||
1167 | (irp == ir-2 && !ra_used(ir-1)))) { | ||
1168 | while (!(irp->o == IR_BUFHDR && irp->op2 == IRBUFHDR_RESET)) | ||
1169 | irp = IR(irp->op1); | ||
1170 | if (irref_isk(irp->op1)) { | ||
1171 | ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR)); | ||
1172 | ir = irp; | ||
1173 | } | ||
1174 | } | ||
1175 | break; | ||
1176 | } | ||
1177 | #if LJ_HASBUFFER | ||
1178 | case IRBUFHDR_WRITE: | ||
1179 | asm_bufhdr_write(as, sb); | ||
1180 | break; | ||
1181 | #endif | ||
1182 | default: lj_assertA(0, "bad BUFHDR op2 %d", ir->op2); break; | ||
1183 | } | ||
1184 | #if LJ_TARGET_X86ORX64 | ||
1185 | ra_left(as, sb, ir->op1); | ||
1186 | #else | ||
1187 | ra_leftov(as, sb, ir->op1); | ||
1188 | #endif | ||
1189 | } | ||
1190 | |||
1191 | static void asm_bufput(ASMState *as, IRIns *ir) | ||
1192 | { | ||
1193 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr]; | ||
1194 | IRRef args[3]; | ||
1195 | IRIns *irs; | ||
1196 | int kchar = -129; | ||
1197 | args[0] = ir->op1; /* SBuf * */ | ||
1198 | args[1] = ir->op2; /* GCstr * */ | ||
1199 | irs = IR(ir->op2); | ||
1200 | lj_assertA(irt_isstr(irs->t), | ||
1201 | "BUFPUT of non-string IR %04d", ir->op2 - REF_BIAS); | ||
1202 | if (irs->o == IR_KGC) { | ||
1203 | GCstr *s = ir_kstr(irs); | ||
1204 | if (s->len == 1) { /* Optimize put of single-char string constant. */ | ||
1205 | kchar = (int8_t)strdata(s)[0]; /* Signed! */ | ||
1206 | args[1] = ASMREF_TMP1; /* int, truncated to char */ | ||
1207 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; | ||
1208 | } | ||
1209 | } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) { | ||
1210 | if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */ | ||
1211 | if (irs->op2 == IRTOSTR_NUM) { | ||
1212 | args[1] = ASMREF_TMP1; /* TValue * */ | ||
1213 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum]; | ||
1214 | } else { | ||
1215 | lj_assertA(irt_isinteger(IR(irs->op1)->t), | ||
1216 | "TOSTR of non-numeric IR %04d", irs->op1); | ||
1217 | args[1] = irs->op1; /* int */ | ||
1218 | if (irs->op2 == IRTOSTR_INT) | ||
1219 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint]; | ||
1220 | else | ||
1221 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; | ||
1222 | } | ||
1223 | } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */ | ||
1224 | args[1] = irs->op1; /* const void * */ | ||
1225 | args[2] = irs->op2; /* MSize */ | ||
1226 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem]; | ||
1227 | } | ||
1228 | } | ||
1229 | asm_setupresult(as, ir, ci); /* SBuf * */ | ||
1230 | asm_gencall(as, ci, args); | ||
1231 | if (args[1] == ASMREF_TMP1) { | ||
1232 | Reg tmp = ra_releasetmp(as, ASMREF_TMP1); | ||
1233 | if (kchar == -129) | ||
1234 | asm_tvptr(as, tmp, irs->op1, IRTMPREF_IN1); | ||
1235 | else | ||
1236 | ra_allockreg(as, kchar, tmp); | ||
1237 | } | ||
1238 | } | ||
1239 | |||
1240 | static void asm_bufstr(ASMState *as, IRIns *ir) | ||
1241 | { | ||
1242 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr]; | ||
1243 | IRRef args[1]; | ||
1244 | args[0] = ir->op1; /* SBuf *sb */ | ||
1245 | as->gcsteps++; | ||
1246 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
1247 | asm_gencall(as, ci, args); | ||
1248 | } | ||
1249 | |||
1250 | /* -- Type conversions ---------------------------------------------------- */ | ||
1251 | |||
1252 | static void asm_tostr(ASMState *as, IRIns *ir) | ||
1253 | { | ||
1254 | const CCallInfo *ci; | ||
1255 | IRRef args[2]; | ||
1256 | asm_snap_prep(as); | ||
1257 | args[0] = ASMREF_L; | ||
1258 | as->gcsteps++; | ||
1259 | if (ir->op2 == IRTOSTR_NUM) { | ||
1260 | args[1] = ASMREF_TMP1; /* cTValue * */ | ||
1261 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num]; | ||
1262 | } else { | ||
1263 | args[1] = ir->op1; /* int32_t k */ | ||
1264 | if (ir->op2 == IRTOSTR_INT) | ||
1265 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int]; | ||
1266 | else | ||
1267 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char]; | ||
1268 | } | ||
1269 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
1270 | asm_gencall(as, ci, args); | ||
1271 | if (ir->op2 == IRTOSTR_NUM) | ||
1272 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1, IRTMPREF_IN1); | ||
1273 | } | ||
1274 | |||
1275 | #if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86 | ||
1276 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
1277 | { | ||
1278 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
1279 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
1280 | IRCallID id; | ||
1281 | IRRef args[2]; | ||
1282 | lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP, | ||
1283 | "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS); | ||
1284 | args[LJ_BE] = (ir-1)->op1; | ||
1285 | args[LJ_LE] = ir->op1; | ||
1286 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
1287 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
1288 | ir--; | ||
1289 | } else { | ||
1290 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
1291 | } | ||
1292 | { | ||
1293 | #if LJ_TARGET_ARM && !LJ_ABI_SOFTFP | ||
1294 | CCallInfo cim = lj_ir_callinfo[id], *ci = &cim; | ||
1295 | cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ | ||
1296 | #else | ||
1297 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
1298 | #endif | ||
1299 | asm_setupresult(as, ir, ci); | ||
1300 | asm_gencall(as, ci, args); | ||
1301 | } | ||
1302 | } | ||
1303 | #endif | ||
1304 | |||
1305 | /* -- Memory references --------------------------------------------------- */ | ||
1306 | |||
1307 | static void asm_newref(ASMState *as, IRIns *ir) | ||
1308 | { | ||
1309 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
1310 | IRRef args[3]; | ||
1311 | if (ir->r == RID_SINK) | ||
1312 | return; | ||
1313 | asm_snap_prep(as); | ||
1314 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1315 | args[1] = ir->op1; /* GCtab *t */ | ||
1316 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
1317 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
1318 | asm_gencall(as, ci, args); | ||
1319 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2, IRTMPREF_IN1); | ||
1320 | } | ||
1321 | |||
1322 | static void asm_tmpref(ASMState *as, IRIns *ir) | ||
1323 | { | ||
1324 | Reg r = ra_dest(as, ir, RSET_GPR); | ||
1325 | asm_tvptr(as, r, ir->op1, ir->op2); | ||
1326 | } | ||
1327 | |||
1328 | static void asm_lref(ASMState *as, IRIns *ir) | ||
1329 | { | ||
1330 | Reg r = ra_dest(as, ir, RSET_GPR); | ||
1331 | #if LJ_TARGET_X86ORX64 | ||
1332 | ra_left(as, r, ASMREF_L); | ||
1333 | #else | ||
1334 | ra_leftov(as, r, ASMREF_L); | ||
1335 | #endif | ||
1336 | } | ||
1337 | |||
1338 | /* -- Calls --------------------------------------------------------------- */ | ||
1339 | |||
1340 | /* Collect arguments from CALL* and CARG instructions. */ | ||
1341 | static void asm_collectargs(ASMState *as, IRIns *ir, | ||
1342 | const CCallInfo *ci, IRRef *args) | ||
1343 | { | ||
1344 | uint32_t n = CCI_XNARGS(ci); | ||
1345 | /* Account for split args. */ | ||
1346 | lj_assertA(n <= CCI_NARGS_MAX*2, "too many args %d to collect", n); | ||
1347 | if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } | ||
1348 | while (n-- > 1) { | ||
1349 | ir = IR(ir->op1); | ||
1350 | lj_assertA(ir->o == IR_CARG, "malformed CALL arg tree"); | ||
1351 | args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; | ||
1352 | } | ||
1353 | args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; | ||
1354 | lj_assertA(IR(ir->op1)->o != IR_CARG, "malformed CALL arg tree"); | ||
1355 | } | ||
1356 | |||
1357 | /* Reconstruct CCallInfo flags for CALLX*. */ | ||
1358 | static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) | ||
1359 | { | ||
1360 | uint32_t nargs = 0; | ||
1361 | if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ | ||
1362 | IRIns *ira = IR(ir->op1); | ||
1363 | nargs++; | ||
1364 | while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } | ||
1365 | } | ||
1366 | #if LJ_HASFFI | ||
1367 | if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ | ||
1368 | CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; | ||
1369 | CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); | ||
1370 | nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); | ||
1371 | #if LJ_TARGET_X86 | ||
1372 | nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); | ||
1373 | #endif | ||
1374 | } | ||
1375 | #endif | ||
1376 | return (nargs | (ir->t.irt << CCI_OTSHIFT)); | ||
1377 | } | ||
1378 | |||
1379 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | ||
1380 | { | ||
1381 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
1382 | IRRef args[2]; | ||
1383 | args[0] = ir->op1; | ||
1384 | args[1] = ir->op2; | ||
1385 | asm_setupresult(as, ir, ci); | ||
1386 | asm_gencall(as, ci, args); | ||
1387 | } | ||
1388 | |||
1389 | static void asm_call(ASMState *as, IRIns *ir) | ||
1390 | { | ||
1391 | IRRef args[CCI_NARGS_MAX]; | ||
1392 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
1393 | asm_collectargs(as, ir, ci, args); | ||
1394 | asm_setupresult(as, ir, ci); | ||
1395 | asm_gencall(as, ci, args); | ||
1396 | } | ||
1397 | |||
1067 | /* -- PHI and loop handling ----------------------------------------------- */ | 1398 | /* -- PHI and loop handling ----------------------------------------------- */ |
1068 | 1399 | ||
1069 | /* Break a PHI cycle by renaming to a free register (evict if needed). */ | 1400 | /* Break a PHI cycle by renaming to a free register (evict if needed). */ |
@@ -1249,12 +1580,7 @@ static void asm_phi_fixup(ASMState *as) | |||
1249 | irt_clearmark(ir->t); | 1580 | irt_clearmark(ir->t); |
1250 | /* Left PHI gained a spill slot before the loop? */ | 1581 | /* Left PHI gained a spill slot before the loop? */ |
1251 | if (ra_hasspill(ir->s)) { | 1582 | if (ra_hasspill(ir->s)) { |
1252 | IRRef ren; | 1583 | ra_addrename(as, r, lref, as->loopsnapno); |
1253 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno); | ||
1254 | ren = tref_ref(lj_ir_emit(as->J)); | ||
1255 | as->ir = as->T->ir; /* The IR may have been reallocated. */ | ||
1256 | IR(ren)->r = (uint8_t)r; | ||
1257 | IR(ren)->s = SPS_NONE; | ||
1258 | } | 1584 | } |
1259 | } | 1585 | } |
1260 | rset_clear(work, r); | 1586 | rset_clear(work, r); |
@@ -1329,6 +1655,8 @@ static void asm_loop(ASMState *as) | |||
1329 | #include "lj_asm_x86.h" | 1655 | #include "lj_asm_x86.h" |
1330 | #elif LJ_TARGET_ARM | 1656 | #elif LJ_TARGET_ARM |
1331 | #include "lj_asm_arm.h" | 1657 | #include "lj_asm_arm.h" |
1658 | #elif LJ_TARGET_ARM64 | ||
1659 | #include "lj_asm_arm64.h" | ||
1332 | #elif LJ_TARGET_PPC | 1660 | #elif LJ_TARGET_PPC |
1333 | #include "lj_asm_ppc.h" | 1661 | #include "lj_asm_ppc.h" |
1334 | #elif LJ_TARGET_MIPS | 1662 | #elif LJ_TARGET_MIPS |
@@ -1337,6 +1665,204 @@ static void asm_loop(ASMState *as) | |||
1337 | #error "Missing assembler for target CPU" | 1665 | #error "Missing assembler for target CPU" |
1338 | #endif | 1666 | #endif |
1339 | 1667 | ||
1668 | /* -- Common instruction helpers ------------------------------------------ */ | ||
1669 | |||
1670 | #if !LJ_SOFTFP32 | ||
1671 | #if !LJ_TARGET_X86ORX64 | ||
1672 | #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) | ||
1673 | #define asm_fppowi(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) | ||
1674 | #endif | ||
1675 | |||
1676 | static void asm_pow(ASMState *as, IRIns *ir) | ||
1677 | { | ||
1678 | #if LJ_64 && LJ_HASFFI | ||
1679 | if (!irt_isnum(ir->t)) | ||
1680 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
1681 | IRCALL_lj_carith_powu64); | ||
1682 | else | ||
1683 | #endif | ||
1684 | if (irt_isnum(IR(ir->op2)->t)) | ||
1685 | asm_callid(as, ir, IRCALL_pow); | ||
1686 | else | ||
1687 | asm_fppowi(as, ir); | ||
1688 | } | ||
1689 | |||
1690 | static void asm_div(ASMState *as, IRIns *ir) | ||
1691 | { | ||
1692 | #if LJ_64 && LJ_HASFFI | ||
1693 | if (!irt_isnum(ir->t)) | ||
1694 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : | ||
1695 | IRCALL_lj_carith_divu64); | ||
1696 | else | ||
1697 | #endif | ||
1698 | asm_fpdiv(as, ir); | ||
1699 | } | ||
1700 | #endif | ||
1701 | |||
1702 | static void asm_mod(ASMState *as, IRIns *ir) | ||
1703 | { | ||
1704 | #if LJ_64 && LJ_HASFFI | ||
1705 | if (!irt_isint(ir->t)) | ||
1706 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : | ||
1707 | IRCALL_lj_carith_modu64); | ||
1708 | else | ||
1709 | #endif | ||
1710 | asm_callid(as, ir, IRCALL_lj_vm_modi); | ||
1711 | } | ||
1712 | |||
1713 | static void asm_fuseequal(ASMState *as, IRIns *ir) | ||
1714 | { | ||
1715 | /* Fuse HREF + EQ/NE. */ | ||
1716 | if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { | ||
1717 | as->curins--; | ||
1718 | asm_href(as, ir-1, (IROp)ir->o); | ||
1719 | } else { | ||
1720 | asm_equal(as, ir); | ||
1721 | } | ||
1722 | } | ||
1723 | |||
1724 | static void asm_alen(ASMState *as, IRIns *ir) | ||
1725 | { | ||
1726 | asm_callid(as, ir, ir->op2 == REF_NIL ? IRCALL_lj_tab_len : | ||
1727 | IRCALL_lj_tab_len_hint); | ||
1728 | } | ||
1729 | |||
1730 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
1731 | |||
1732 | /* Assemble a single instruction. */ | ||
1733 | static void asm_ir(ASMState *as, IRIns *ir) | ||
1734 | { | ||
1735 | switch ((IROp)ir->o) { | ||
1736 | /* Miscellaneous ops. */ | ||
1737 | case IR_LOOP: asm_loop(as); break; | ||
1738 | case IR_NOP: case IR_XBAR: | ||
1739 | lj_assertA(!ra_used(ir), | ||
1740 | "IR %04d not unused", (int)(ir - as->ir) - REF_BIAS); | ||
1741 | break; | ||
1742 | case IR_USE: | ||
1743 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
1744 | case IR_PHI: asm_phi(as, ir); break; | ||
1745 | case IR_HIOP: asm_hiop(as, ir); break; | ||
1746 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
1747 | case IR_PROF: asm_prof(as, ir); break; | ||
1748 | |||
1749 | /* Guarded assertions. */ | ||
1750 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
1751 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
1752 | case IR_ABC: | ||
1753 | asm_comp(as, ir); | ||
1754 | break; | ||
1755 | case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break; | ||
1756 | |||
1757 | case IR_RETF: asm_retf(as, ir); break; | ||
1758 | |||
1759 | /* Bit ops. */ | ||
1760 | case IR_BNOT: asm_bnot(as, ir); break; | ||
1761 | case IR_BSWAP: asm_bswap(as, ir); break; | ||
1762 | case IR_BAND: asm_band(as, ir); break; | ||
1763 | case IR_BOR: asm_bor(as, ir); break; | ||
1764 | case IR_BXOR: asm_bxor(as, ir); break; | ||
1765 | case IR_BSHL: asm_bshl(as, ir); break; | ||
1766 | case IR_BSHR: asm_bshr(as, ir); break; | ||
1767 | case IR_BSAR: asm_bsar(as, ir); break; | ||
1768 | case IR_BROL: asm_brol(as, ir); break; | ||
1769 | case IR_BROR: asm_bror(as, ir); break; | ||
1770 | |||
1771 | /* Arithmetic ops. */ | ||
1772 | case IR_ADD: asm_add(as, ir); break; | ||
1773 | case IR_SUB: asm_sub(as, ir); break; | ||
1774 | case IR_MUL: asm_mul(as, ir); break; | ||
1775 | case IR_MOD: asm_mod(as, ir); break; | ||
1776 | case IR_NEG: asm_neg(as, ir); break; | ||
1777 | #if LJ_SOFTFP32 | ||
1778 | case IR_DIV: case IR_POW: case IR_ABS: | ||
1779 | case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: | ||
1780 | /* Unused for LJ_SOFTFP32. */ | ||
1781 | lj_assertA(0, "IR %04d with unused op %d", | ||
1782 | (int)(ir - as->ir) - REF_BIAS, ir->o); | ||
1783 | break; | ||
1784 | #else | ||
1785 | case IR_DIV: asm_div(as, ir); break; | ||
1786 | case IR_POW: asm_pow(as, ir); break; | ||
1787 | case IR_ABS: asm_abs(as, ir); break; | ||
1788 | case IR_LDEXP: asm_ldexp(as, ir); break; | ||
1789 | case IR_FPMATH: asm_fpmath(as, ir); break; | ||
1790 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
1791 | #endif | ||
1792 | case IR_MIN: asm_min(as, ir); break; | ||
1793 | case IR_MAX: asm_max(as, ir); break; | ||
1794 | |||
1795 | /* Overflow-checking arithmetic ops. */ | ||
1796 | case IR_ADDOV: asm_addov(as, ir); break; | ||
1797 | case IR_SUBOV: asm_subov(as, ir); break; | ||
1798 | case IR_MULOV: asm_mulov(as, ir); break; | ||
1799 | |||
1800 | /* Memory references. */ | ||
1801 | case IR_AREF: asm_aref(as, ir); break; | ||
1802 | case IR_HREF: asm_href(as, ir, 0); break; | ||
1803 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
1804 | case IR_NEWREF: asm_newref(as, ir); break; | ||
1805 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
1806 | case IR_FREF: asm_fref(as, ir); break; | ||
1807 | case IR_TMPREF: asm_tmpref(as, ir); break; | ||
1808 | case IR_STRREF: asm_strref(as, ir); break; | ||
1809 | case IR_LREF: asm_lref(as, ir); break; | ||
1810 | |||
1811 | /* Loads and stores. */ | ||
1812 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
1813 | asm_ahuvload(as, ir); | ||
1814 | break; | ||
1815 | case IR_FLOAD: asm_fload(as, ir); break; | ||
1816 | case IR_XLOAD: asm_xload(as, ir); break; | ||
1817 | case IR_SLOAD: asm_sload(as, ir); break; | ||
1818 | case IR_ALEN: asm_alen(as, ir); break; | ||
1819 | |||
1820 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
1821 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
1822 | case IR_XSTORE: asm_xstore(as, ir); break; | ||
1823 | |||
1824 | /* Allocations. */ | ||
1825 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
1826 | case IR_TNEW: asm_tnew(as, ir); break; | ||
1827 | case IR_TDUP: asm_tdup(as, ir); break; | ||
1828 | case IR_CNEW: case IR_CNEWI: | ||
1829 | #if LJ_HASFFI | ||
1830 | asm_cnew(as, ir); | ||
1831 | #else | ||
1832 | lj_assertA(0, "IR %04d with unused op %d", | ||
1833 | (int)(ir - as->ir) - REF_BIAS, ir->o); | ||
1834 | #endif | ||
1835 | break; | ||
1836 | |||
1837 | /* Buffer operations. */ | ||
1838 | case IR_BUFHDR: asm_bufhdr(as, ir); break; | ||
1839 | case IR_BUFPUT: asm_bufput(as, ir); break; | ||
1840 | case IR_BUFSTR: asm_bufstr(as, ir); break; | ||
1841 | |||
1842 | /* Write barriers. */ | ||
1843 | case IR_TBAR: asm_tbar(as, ir); break; | ||
1844 | case IR_OBAR: asm_obar(as, ir); break; | ||
1845 | |||
1846 | /* Type conversions. */ | ||
1847 | case IR_CONV: asm_conv(as, ir); break; | ||
1848 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
1849 | case IR_STRTO: asm_strto(as, ir); break; | ||
1850 | |||
1851 | /* Calls. */ | ||
1852 | case IR_CALLA: | ||
1853 | as->gcsteps++; | ||
1854 | /* fallthrough */ | ||
1855 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
1856 | case IR_CALLXS: asm_callx(as, ir); break; | ||
1857 | case IR_CARG: break; | ||
1858 | |||
1859 | default: | ||
1860 | setintV(&as->J->errinfo, ir->o); | ||
1861 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1862 | break; | ||
1863 | } | ||
1864 | } | ||
1865 | |||
1340 | /* -- Head of trace ------------------------------------------------------- */ | 1866 | /* -- Head of trace ------------------------------------------------------- */ |
1341 | 1867 | ||
1342 | /* Head of a root trace. */ | 1868 | /* Head of a root trace. */ |
@@ -1373,8 +1899,7 @@ static void asm_head_side(ASMState *as) | |||
1373 | 1899 | ||
1374 | if (as->snapno && as->topslot > as->parent->topslot) { | 1900 | if (as->snapno && as->topslot > as->parent->topslot) { |
1375 | /* Force snap #0 alloc to prevent register overwrite in stack check. */ | 1901 | /* Force snap #0 alloc to prevent register overwrite in stack check. */ |
1376 | as->snapno = 0; | 1902 | asm_snap_alloc(as, 0); |
1377 | asm_snap_alloc(as); | ||
1378 | } | 1903 | } |
1379 | allow = asm_head_side_base(as, irp, allow); | 1904 | allow = asm_head_side_base(as, irp, allow); |
1380 | 1905 | ||
@@ -1382,8 +1907,10 @@ static void asm_head_side(ASMState *as) | |||
1382 | for (i = as->stopins; i > REF_BASE; i--) { | 1907 | for (i = as->stopins; i > REF_BASE; i--) { |
1383 | IRIns *ir = IR(i); | 1908 | IRIns *ir = IR(i); |
1384 | RegSP rs; | 1909 | RegSP rs; |
1385 | lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || | 1910 | lj_assertA((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || |
1386 | (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL); | 1911 | (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL, |
1912 | "IR %04d has bad parent op %d", | ||
1913 | (int)(ir - as->ir) - REF_BIAS, ir->o); | ||
1387 | rs = as->parentmap[i - REF_FIRST]; | 1914 | rs = as->parentmap[i - REF_FIRST]; |
1388 | if (ra_hasreg(ir->r)) { | 1915 | if (ra_hasreg(ir->r)) { |
1389 | rset_clear(allow, ir->r); | 1916 | rset_clear(allow, ir->r); |
@@ -1535,7 +2062,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe) | |||
1535 | SnapEntry sn = map[n-1]; | 2062 | SnapEntry sn = map[n-1]; |
1536 | if ((sn & SNAP_FRAME)) { | 2063 | if ((sn & SNAP_FRAME)) { |
1537 | *gotframe = 1; | 2064 | *gotframe = 1; |
1538 | return snap_slot(sn); | 2065 | return snap_slot(sn) - LJ_FR2; |
1539 | } | 2066 | } |
1540 | } | 2067 | } |
1541 | return 0; | 2068 | return 0; |
@@ -1555,19 +2082,23 @@ static void asm_tail_link(ASMState *as) | |||
1555 | 2082 | ||
1556 | if (as->T->link == 0) { | 2083 | if (as->T->link == 0) { |
1557 | /* Setup fixed registers for exit to interpreter. */ | 2084 | /* Setup fixed registers for exit to interpreter. */ |
1558 | const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); | 2085 | const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]); |
1559 | int32_t mres; | 2086 | int32_t mres; |
1560 | if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ | 2087 | if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ |
1561 | BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; | 2088 | BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; |
1562 | if (bc_isret(bc_op(*retpc))) | 2089 | if (bc_isret(bc_op(*retpc))) |
1563 | pc = retpc; | 2090 | pc = retpc; |
1564 | } | 2091 | } |
2092 | #if LJ_GC64 | ||
2093 | emit_loadu64(as, RID_LPC, u64ptr(pc)); | ||
2094 | #else | ||
1565 | ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); | 2095 | ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); |
1566 | ra_allockreg(as, i32ptr(pc), RID_LPC); | 2096 | ra_allockreg(as, i32ptr(pc), RID_LPC); |
1567 | mres = (int32_t)(snap->nslots - baseslot); | 2097 | #endif |
2098 | mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); | ||
1568 | switch (bc_op(*pc)) { | 2099 | switch (bc_op(*pc)) { |
1569 | case BC_CALLM: case BC_CALLMT: | 2100 | case BC_CALLM: case BC_CALLMT: |
1570 | mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; | 2101 | mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break; |
1571 | case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; | 2102 | case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; |
1572 | case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; | 2103 | case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; |
1573 | default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; | 2104 | default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; |
@@ -1579,6 +2110,11 @@ static void asm_tail_link(ASMState *as) | |||
1579 | } | 2110 | } |
1580 | emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); | 2111 | emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); |
1581 | 2112 | ||
2113 | if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */ | ||
2114 | setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal)); | ||
2115 | IR(as->J->ktrace)->o = IR_KGC; | ||
2116 | } | ||
2117 | |||
1582 | /* Sync the interpreter state with the on-trace state. */ | 2118 | /* Sync the interpreter state with the on-trace state. */ |
1583 | asm_stack_restore(as, snap); | 2119 | asm_stack_restore(as, snap); |
1584 | 2120 | ||
@@ -1602,22 +2138,32 @@ static void asm_setup_regsp(ASMState *as) | |||
1602 | #endif | 2138 | #endif |
1603 | 2139 | ||
1604 | ra_setup(as); | 2140 | ra_setup(as); |
2141 | #if LJ_TARGET_ARM64 | ||
2142 | ra_setkref(as, RID_GL, (intptr_t)J2G(as->J)); | ||
2143 | #endif | ||
1605 | 2144 | ||
1606 | /* Clear reg/sp for constants. */ | 2145 | /* Clear reg/sp for constants. */ |
1607 | for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) | 2146 | for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) { |
1608 | ir->prev = REGSP_INIT; | 2147 | ir->prev = REGSP_INIT; |
2148 | if (irt_is64(ir->t) && ir->o != IR_KNULL) { | ||
2149 | #if LJ_GC64 | ||
2150 | /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ | ||
2151 | ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */ | ||
2152 | #else | ||
2153 | /* Make life easier for backends by putting address of constant in i. */ | ||
2154 | ir->i = (int32_t)(intptr_t)(ir+1); | ||
2155 | #endif | ||
2156 | ir++; | ||
2157 | } | ||
2158 | } | ||
1609 | 2159 | ||
1610 | /* REF_BASE is used for implicit references to the BASE register. */ | 2160 | /* REF_BASE is used for implicit references to the BASE register. */ |
1611 | lastir->prev = REGSP_HINT(RID_BASE); | 2161 | lastir->prev = REGSP_HINT(RID_BASE); |
1612 | 2162 | ||
1613 | ir = IR(nins-1); | ||
1614 | if (ir->o == IR_RENAME) { | ||
1615 | do { ir--; nins--; } while (ir->o == IR_RENAME); | ||
1616 | T->nins = nins; /* Remove any renames left over from ASM restart. */ | ||
1617 | } | ||
1618 | as->snaprename = nins; | 2163 | as->snaprename = nins; |
1619 | as->snapref = nins; | 2164 | as->snapref = nins; |
1620 | as->snapno = T->nsnap; | 2165 | as->snapno = T->nsnap; |
2166 | as->snapalloc = 0; | ||
1621 | 2167 | ||
1622 | as->stopins = REF_BASE; | 2168 | as->stopins = REF_BASE; |
1623 | as->orignins = nins; | 2169 | as->orignins = nins; |
@@ -1627,7 +2173,7 @@ static void asm_setup_regsp(ASMState *as) | |||
1627 | ir = IR(REF_FIRST); | 2173 | ir = IR(REF_FIRST); |
1628 | if (as->parent) { | 2174 | if (as->parent) { |
1629 | uint16_t *p; | 2175 | uint16_t *p; |
1630 | lastir = lj_snap_regspmap(as->parent, as->J->exitno, ir); | 2176 | lastir = lj_snap_regspmap(as->J, as->parent, as->J->exitno, ir); |
1631 | if (lastir - ir > LJ_MAX_JSLOTS) | 2177 | if (lastir - ir > LJ_MAX_JSLOTS) |
1632 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); | 2178 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); |
1633 | as->stopins = (IRRef)((lastir-1) - as->ir); | 2179 | as->stopins = (IRRef)((lastir-1) - as->ir); |
@@ -1666,6 +2212,10 @@ static void asm_setup_regsp(ASMState *as) | |||
1666 | ir->prev = (uint16_t)REGSP_HINT((rload & 15)); | 2212 | ir->prev = (uint16_t)REGSP_HINT((rload & 15)); |
1667 | rload = lj_ror(rload, 4); | 2213 | rload = lj_ror(rload, 4); |
1668 | continue; | 2214 | continue; |
2215 | case IR_TMPREF: | ||
2216 | if ((ir->op2 & IRTMPREF_OUT2) && as->evenspill < 4) | ||
2217 | as->evenspill = 4; /* TMPREF OUT2 needs two TValues on the stack. */ | ||
2218 | break; | ||
1669 | #endif | 2219 | #endif |
1670 | case IR_CALLXS: { | 2220 | case IR_CALLXS: { |
1671 | CCallInfo ci; | 2221 | CCallInfo ci; |
@@ -1675,7 +2225,17 @@ static void asm_setup_regsp(ASMState *as) | |||
1675 | as->modset |= RSET_SCRATCH; | 2225 | as->modset |= RSET_SCRATCH; |
1676 | continue; | 2226 | continue; |
1677 | } | 2227 | } |
1678 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { | 2228 | case IR_CALLL: |
2229 | /* lj_vm_next needs two TValues on the stack. */ | ||
2230 | #if LJ_TARGET_X64 && LJ_ABI_WIN | ||
2231 | if (ir->op2 == IRCALL_lj_vm_next && as->evenspill < SPS_FIRST + 4) | ||
2232 | as->evenspill = SPS_FIRST + 4; | ||
2233 | #else | ||
2234 | if (SPS_FIRST < 4 && ir->op2 == IRCALL_lj_vm_next && as->evenspill < 4) | ||
2235 | as->evenspill = 4; | ||
2236 | #endif | ||
2237 | /* fallthrough */ | ||
2238 | case IR_CALLN: case IR_CALLA: case IR_CALLS: { | ||
1679 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | 2239 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; |
1680 | ir->prev = asm_setup_call_slots(as, ir, ci); | 2240 | ir->prev = asm_setup_call_slots(as, ir, ci); |
1681 | if (inloop) | 2241 | if (inloop) |
@@ -1683,7 +2243,6 @@ static void asm_setup_regsp(ASMState *as) | |||
1683 | (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; | 2243 | (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; |
1684 | continue; | 2244 | continue; |
1685 | } | 2245 | } |
1686 | #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) | ||
1687 | case IR_HIOP: | 2246 | case IR_HIOP: |
1688 | switch ((ir-1)->o) { | 2247 | switch ((ir-1)->o) { |
1689 | #if LJ_SOFTFP && LJ_TARGET_ARM | 2248 | #if LJ_SOFTFP && LJ_TARGET_ARM |
@@ -1694,15 +2253,15 @@ static void asm_setup_regsp(ASMState *as) | |||
1694 | } | 2253 | } |
1695 | break; | 2254 | break; |
1696 | #endif | 2255 | #endif |
1697 | #if !LJ_SOFTFP && LJ_NEED_FP64 | 2256 | #if !LJ_SOFTFP && LJ_NEED_FP64 && LJ_32 && LJ_HASFFI |
1698 | case IR_CONV: | 2257 | case IR_CONV: |
1699 | if (irt_isfp((ir-1)->t)) { | 2258 | if (irt_isfp((ir-1)->t)) { |
1700 | ir->prev = REGSP_HINT(RID_FPRET); | 2259 | ir->prev = REGSP_HINT(RID_FPRET); |
1701 | continue; | 2260 | continue; |
1702 | } | 2261 | } |
1703 | /* fallthrough */ | ||
1704 | #endif | 2262 | #endif |
1705 | case IR_CALLN: case IR_CALLXS: | 2263 | /* fallthrough */ |
2264 | case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: | ||
1706 | #if LJ_SOFTFP | 2265 | #if LJ_SOFTFP |
1707 | case IR_MIN: case IR_MAX: | 2266 | case IR_MIN: case IR_MAX: |
1708 | #endif | 2267 | #endif |
@@ -1713,18 +2272,29 @@ static void asm_setup_regsp(ASMState *as) | |||
1713 | break; | 2272 | break; |
1714 | } | 2273 | } |
1715 | break; | 2274 | break; |
1716 | #endif | ||
1717 | #if LJ_SOFTFP | 2275 | #if LJ_SOFTFP |
1718 | case IR_MIN: case IR_MAX: | 2276 | case IR_MIN: case IR_MAX: |
1719 | if ((ir+1)->o != IR_HIOP) break; | 2277 | if ((ir+1)->o != IR_HIOP) break; |
1720 | #endif | 2278 | #endif |
1721 | /* fallthrough */ | 2279 | /* fallthrough */ |
1722 | /* C calls evict all scratch regs and return results in RID_RET. */ | 2280 | /* C calls evict all scratch regs and return results in RID_RET. */ |
1723 | case IR_SNEW: case IR_XSNEW: case IR_NEWREF: | 2281 | case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT: |
1724 | if (REGARG_NUMGPR < 3 && as->evenspill < 3) | 2282 | if (REGARG_NUMGPR < 3 && as->evenspill < 3) |
1725 | as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ | 2283 | as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ |
2284 | #if LJ_TARGET_X86 && LJ_HASFFI | ||
2285 | if (0) { | ||
2286 | case IR_CNEW: | ||
2287 | if (ir->op2 != REF_NIL && as->evenspill < 4) | ||
2288 | as->evenspill = 4; /* lj_cdata_newv needs 4 args. */ | ||
2289 | } | ||
1726 | /* fallthrough */ | 2290 | /* fallthrough */ |
1727 | case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: | 2291 | #else |
2292 | /* fallthrough */ | ||
2293 | case IR_CNEW: | ||
2294 | #endif | ||
2295 | /* fallthrough */ | ||
2296 | case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR: | ||
2297 | case IR_BUFSTR: | ||
1728 | ir->prev = REGSP_HINT(RID_RET); | 2298 | ir->prev = REGSP_HINT(RID_RET); |
1729 | if (inloop) | 2299 | if (inloop) |
1730 | as->modset = RSET_SCRATCH; | 2300 | as->modset = RSET_SCRATCH; |
@@ -1733,58 +2303,73 @@ static void asm_setup_regsp(ASMState *as) | |||
1733 | if (inloop) | 2303 | if (inloop) |
1734 | as->modset = RSET_SCRATCH; | 2304 | as->modset = RSET_SCRATCH; |
1735 | break; | 2305 | break; |
1736 | #if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP | 2306 | #if !LJ_SOFTFP |
1737 | case IR_ATAN2: case IR_LDEXP: | 2307 | #if !LJ_TARGET_X86ORX64 |
2308 | case IR_LDEXP: | ||
2309 | #endif | ||
1738 | #endif | 2310 | #endif |
2311 | /* fallthrough */ | ||
1739 | case IR_POW: | 2312 | case IR_POW: |
1740 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { | 2313 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
1741 | #if LJ_TARGET_X86ORX64 | ||
1742 | ir->prev = REGSP_HINT(RID_XMM0); | ||
1743 | if (inloop) | 2314 | if (inloop) |
1744 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); | 2315 | as->modset |= RSET_SCRATCH; |
2316 | #if LJ_TARGET_X86 | ||
2317 | if (irt_isnum(IR(ir->op2)->t)) { | ||
2318 | if (as->evenspill < 4) /* Leave room to call pow(). */ | ||
2319 | as->evenspill = 4; | ||
2320 | } | ||
2321 | break; | ||
1745 | #else | 2322 | #else |
1746 | ir->prev = REGSP_HINT(RID_FPRET); | 2323 | ir->prev = REGSP_HINT(RID_FPRET); |
1747 | if (inloop) | ||
1748 | as->modset |= RSET_SCRATCH; | ||
1749 | #endif | ||
1750 | continue; | 2324 | continue; |
2325 | #endif | ||
1751 | } | 2326 | } |
1752 | /* fallthrough */ /* for integer POW */ | 2327 | /* fallthrough */ /* for integer POW */ |
1753 | case IR_DIV: case IR_MOD: | 2328 | case IR_DIV: case IR_MOD: |
1754 | if (!irt_isnum(ir->t)) { | 2329 | if ((LJ_64 && LJ_SOFTFP) || !irt_isnum(ir->t)) { |
1755 | ir->prev = REGSP_HINT(RID_RET); | 2330 | ir->prev = REGSP_HINT(RID_RET); |
1756 | if (inloop) | 2331 | if (inloop) |
1757 | as->modset |= (RSET_SCRATCH & RSET_GPR); | 2332 | as->modset |= (RSET_SCRATCH & RSET_GPR); |
1758 | continue; | 2333 | continue; |
1759 | } | 2334 | } |
1760 | break; | 2335 | break; |
1761 | case IR_FPMATH: | 2336 | #if LJ_64 && LJ_SOFTFP |
1762 | #if LJ_TARGET_X86ORX64 | 2337 | case IR_ADD: case IR_SUB: case IR_MUL: |
1763 | if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ | 2338 | if (irt_isnum(ir->t)) { |
1764 | ir->prev = REGSP_HINT(RID_XMM0); | 2339 | ir->prev = REGSP_HINT(RID_RET); |
1765 | #if !LJ_64 | ||
1766 | if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ | ||
1767 | as->evenspill = 4; | ||
1768 | #endif | ||
1769 | if (inloop) | ||
1770 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); | ||
1771 | continue; | ||
1772 | } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) { | ||
1773 | ir->prev = REGSP_HINT(RID_XMM0); | ||
1774 | if (inloop) | 2340 | if (inloop) |
1775 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); | 2341 | as->modset |= (RSET_SCRATCH & RSET_GPR); |
1776 | continue; | 2342 | continue; |
1777 | } | 2343 | } |
1778 | break; | 2344 | break; |
1779 | #else | 2345 | #endif |
1780 | ir->prev = REGSP_HINT(RID_FPRET); | 2346 | case IR_FPMATH: |
2347 | #if LJ_TARGET_X86ORX64 | ||
2348 | if (ir->op2 <= IRFPM_TRUNC) { | ||
2349 | if (!(as->flags & JIT_F_SSE4_1)) { | ||
2350 | ir->prev = REGSP_HINT(RID_XMM0); | ||
2351 | if (inloop) | ||
2352 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); | ||
2353 | continue; | ||
2354 | } | ||
2355 | break; | ||
2356 | } | ||
2357 | #endif | ||
1781 | if (inloop) | 2358 | if (inloop) |
1782 | as->modset |= RSET_SCRATCH; | 2359 | as->modset |= RSET_SCRATCH; |
2360 | #if LJ_TARGET_X86 | ||
2361 | break; | ||
2362 | #else | ||
2363 | ir->prev = REGSP_HINT(RID_FPRET); | ||
1783 | continue; | 2364 | continue; |
1784 | #endif | 2365 | #endif |
1785 | #if LJ_TARGET_X86ORX64 | 2366 | #if LJ_TARGET_X86ORX64 |
1786 | /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ | 2367 | /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ |
1787 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: | 2368 | case IR_BSHL: case IR_BSHR: case IR_BSAR: |
2369 | if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */ | ||
2370 | break; | ||
2371 | /* fallthrough */ | ||
2372 | case IR_BROL: case IR_BROR: | ||
1788 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { | 2373 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { |
1789 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); | 2374 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); |
1790 | if (inloop) | 2375 | if (inloop) |
@@ -1828,16 +2413,26 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1828 | { | 2413 | { |
1829 | ASMState as_; | 2414 | ASMState as_; |
1830 | ASMState *as = &as_; | 2415 | ASMState *as = &as_; |
1831 | MCode *origtop; | 2416 | |
2417 | /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */ | ||
2418 | { | ||
2419 | IRRef nins = T->nins; | ||
2420 | IRIns *ir = &T->ir[nins-1]; | ||
2421 | if (ir->o == IR_NOP || ir->o == IR_RENAME) { | ||
2422 | do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME); | ||
2423 | T->nins = nins; | ||
2424 | } | ||
2425 | } | ||
1832 | 2426 | ||
1833 | /* Ensure an initialized instruction beyond the last one for HIOP checks. */ | 2427 | /* Ensure an initialized instruction beyond the last one for HIOP checks. */ |
1834 | J->cur.nins = lj_ir_nextins(J); | 2428 | /* This also allows one RENAME to be added without reallocating curfinal. */ |
1835 | lj_ir_nop(&J->cur.ir[J->cur.nins]); | 2429 | as->orignins = lj_ir_nextins(J); |
2430 | lj_ir_nop(&J->cur.ir[as->orignins]); | ||
1836 | 2431 | ||
1837 | /* Setup initial state. Copy some fields to reduce indirections. */ | 2432 | /* Setup initial state. Copy some fields to reduce indirections. */ |
1838 | as->J = J; | 2433 | as->J = J; |
1839 | as->T = T; | 2434 | as->T = T; |
1840 | as->ir = T->ir; | 2435 | J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */ |
1841 | as->flags = J->flags; | 2436 | as->flags = J->flags; |
1842 | as->loopref = J->loopref; | 2437 | as->loopref = J->loopref; |
1843 | as->realign = NULL; | 2438 | as->realign = NULL; |
@@ -1845,17 +2440,46 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1845 | as->parent = J->parent ? traceref(J, J->parent) : NULL; | 2440 | as->parent = J->parent ? traceref(J, J->parent) : NULL; |
1846 | 2441 | ||
1847 | /* Reserve MCode memory. */ | 2442 | /* Reserve MCode memory. */ |
1848 | as->mctop = origtop = lj_mcode_reserve(J, &as->mcbot); | 2443 | as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot); |
1849 | as->mcp = as->mctop; | 2444 | as->mcp = as->mctop; |
1850 | as->mclim = as->mcbot + MCLIM_REDZONE; | 2445 | as->mclim = as->mcbot + MCLIM_REDZONE; |
1851 | asm_setup_target(as); | 2446 | asm_setup_target(as); |
1852 | 2447 | ||
1853 | do { | 2448 | /* |
2449 | ** This is a loop, because the MCode may have to be (re-)assembled | ||
2450 | ** multiple times: | ||
2451 | ** | ||
2452 | ** 1. as->realign is set (and the assembly aborted), if the arch-specific | ||
2453 | ** backend wants the MCode to be aligned differently. | ||
2454 | ** | ||
2455 | ** This is currently only the case on x86/x64, where small loops get | ||
2456 | ** an aligned loop body plus a short branch. Not much effort is wasted, | ||
2457 | ** because the abort happens very quickly and only once. | ||
2458 | ** | ||
2459 | ** 2. The IR is immovable, since the MCode embeds pointers to various | ||
2460 | ** constants inside the IR. But RENAMEs may need to be added to the IR | ||
2461 | ** during assembly, which might grow and reallocate the IR. We check | ||
2462 | ** at the end if the IR (in J->cur.ir) has actually grown, resize the | ||
2463 | ** copy (in J->curfinal.ir) and try again. | ||
2464 | ** | ||
2465 | ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have | ||
2466 | ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to | ||
2467 | ** always have one spare slot in the IR (see above), which means we | ||
2468 | ** have to redo the assembly for only ~2% of all traces. | ||
2469 | ** | ||
2470 | ** Very, very rarely, this needs to be done repeatedly, since the | ||
2471 | ** location of constants inside the IR (actually, reachability from | ||
2472 | ** a global pointer) may affect register allocation and thus the | ||
2473 | ** number of RENAMEs. | ||
2474 | */ | ||
2475 | for (;;) { | ||
1854 | as->mcp = as->mctop; | 2476 | as->mcp = as->mctop; |
1855 | #ifdef LUA_USE_ASSERT | 2477 | #ifdef LUA_USE_ASSERT |
1856 | as->mcp_prev = as->mcp; | 2478 | as->mcp_prev = as->mcp; |
1857 | #endif | 2479 | #endif |
1858 | as->curins = T->nins; | 2480 | as->ir = J->curfinal->ir; /* Use the copied IR. */ |
2481 | as->curins = J->cur.nins = as->orignins; | ||
2482 | |||
1859 | RA_DBG_START(); | 2483 | RA_DBG_START(); |
1860 | RA_DBGX((as, "===== STOP =====")); | 2484 | RA_DBGX((as, "===== STOP =====")); |
1861 | 2485 | ||
@@ -1874,7 +2498,11 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1874 | /* Assemble a trace in linear backwards order. */ | 2498 | /* Assemble a trace in linear backwards order. */ |
1875 | for (as->curins--; as->curins > as->stopins; as->curins--) { | 2499 | for (as->curins--; as->curins > as->stopins; as->curins--) { |
1876 | IRIns *ir = IR(as->curins); | 2500 | IRIns *ir = IR(as->curins); |
1877 | lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ | 2501 | /* 64 bit types handled by SPLIT for 32 bit archs. */ |
2502 | lj_assertA(!(LJ_32 && irt_isint64(ir->t)), | ||
2503 | "IR %04d has unsplit 64 bit type", | ||
2504 | (int)(ir - as->ir) - REF_BIAS); | ||
2505 | asm_snap_prev(as); | ||
1878 | if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) | 2506 | if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) |
1879 | continue; /* Dead-code elimination can be soooo easy. */ | 2507 | continue; /* Dead-code elimination can be soooo easy. */ |
1880 | if (irt_isguard(ir->t)) | 2508 | if (irt_isguard(ir->t)) |
@@ -1883,22 +2511,43 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1883 | checkmclim(as); | 2511 | checkmclim(as); |
1884 | asm_ir(as, ir); | 2512 | asm_ir(as, ir); |
1885 | } | 2513 | } |
1886 | } while (as->realign); /* Retry in case the MCode needs to be realigned. */ | ||
1887 | 2514 | ||
1888 | /* Emit head of trace. */ | 2515 | if (as->realign && J->curfinal->nins >= T->nins) |
1889 | RA_DBG_REF(); | 2516 | continue; /* Retry in case only the MCode needs to be realigned. */ |
1890 | checkmclim(as); | 2517 | |
1891 | if (as->gcsteps > 0) { | 2518 | /* Emit head of trace. */ |
1892 | as->curins = as->T->snap[0].ref; | 2519 | RA_DBG_REF(); |
1893 | asm_snap_prep(as); /* The GC check is a guard. */ | 2520 | checkmclim(as); |
1894 | asm_gc_check(as); | 2521 | if (as->gcsteps > 0) { |
2522 | as->curins = as->T->snap[0].ref; | ||
2523 | asm_snap_prep(as); /* The GC check is a guard. */ | ||
2524 | asm_gc_check(as); | ||
2525 | as->curins = as->stopins; | ||
2526 | } | ||
2527 | ra_evictk(as); | ||
2528 | if (as->parent) | ||
2529 | asm_head_side(as); | ||
2530 | else | ||
2531 | asm_head_root(as); | ||
2532 | asm_phi_fixup(as); | ||
2533 | |||
2534 | if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */ | ||
2535 | lj_assertA(J->curfinal->nk == T->nk, "unexpected IR constant growth"); | ||
2536 | memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins, | ||
2537 | (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */ | ||
2538 | T->nins = J->curfinal->nins; | ||
2539 | /* Fill mcofs of any unprocessed snapshots. */ | ||
2540 | as->curins = REF_FIRST; | ||
2541 | asm_snap_prev(as); | ||
2542 | break; /* Done. */ | ||
2543 | } | ||
2544 | |||
2545 | /* Otherwise try again with a bigger IR. */ | ||
2546 | lj_trace_free(J2G(J), J->curfinal); | ||
2547 | J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */ | ||
2548 | J->curfinal = lj_trace_alloc(J->L, T); | ||
2549 | as->realign = NULL; | ||
1895 | } | 2550 | } |
1896 | ra_evictk(as); | ||
1897 | if (as->parent) | ||
1898 | asm_head_side(as); | ||
1899 | else | ||
1900 | asm_head_root(as); | ||
1901 | asm_phi_fixup(as); | ||
1902 | 2551 | ||
1903 | RA_DBGX((as, "===== START ====")); | 2552 | RA_DBGX((as, "===== START ====")); |
1904 | RA_DBG_FLUSH(); | 2553 | RA_DBG_FLUSH(); |
@@ -1908,10 +2557,16 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1908 | /* Set trace entry point before fixing up tail to allow link to self. */ | 2557 | /* Set trace entry point before fixing up tail to allow link to self. */ |
1909 | T->mcode = as->mcp; | 2558 | T->mcode = as->mcp; |
1910 | T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0; | 2559 | T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0; |
1911 | if (!as->loopref) | 2560 | if (as->loopref) |
2561 | asm_loop_tail_fixup(as); | ||
2562 | else | ||
1912 | asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ | 2563 | asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ |
1913 | T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); | 2564 | T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); |
1914 | lj_mcode_sync(T->mcode, origtop); | 2565 | asm_snap_fixup_mcofs(as); |
2566 | #if LJ_TARGET_MCODE_FIXUP | ||
2567 | asm_mcode_fixup(T->mcode, T->szmcode); | ||
2568 | #endif | ||
2569 | lj_mcode_sync(T->mcode, as->mctoporig); | ||
1915 | } | 2570 | } |
1916 | 2571 | ||
1917 | #undef IR | 2572 | #undef IR |
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 262fa59e..ded63913 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h | |||
@@ -41,7 +41,7 @@ static Reg ra_scratchpair(ASMState *as, RegSet allow) | |||
41 | } | 41 | } |
42 | } | 42 | } |
43 | } | 43 | } |
44 | lua_assert(rset_test(RSET_GPREVEN, r)); | 44 | lj_assertA(rset_test(RSET_GPREVEN, r), "odd reg %d", r); |
45 | ra_modified(as, r); | 45 | ra_modified(as, r); |
46 | ra_modified(as, r+1); | 46 | ra_modified(as, r+1); |
47 | RA_DBGX((as, "scratchpair $r $r", r, r+1)); | 47 | RA_DBGX((as, "scratchpair $r $r", r, r+1)); |
@@ -185,6 +185,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, | |||
185 | *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */ | 185 | *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */ |
186 | return ra_allock(as, (ofs & ~255), allow); | 186 | return ra_allock(as, (ofs & ~255), allow); |
187 | } | 187 | } |
188 | } else if (ir->o == IR_TMPREF) { | ||
189 | *ofsp = 0; | ||
190 | return RID_SP; | ||
188 | } | 191 | } |
189 | } | 192 | } |
190 | *ofsp = 0; | 193 | *ofsp = 0; |
@@ -269,7 +272,7 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, | |||
269 | return; | 272 | return; |
270 | } | 273 | } |
271 | } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) { | 274 | } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) { |
272 | lua_assert(ofs == 0); | 275 | lj_assertA(ofs == 0, "bad usage"); |
273 | ofs = (int32_t)sizeof(GCstr); | 276 | ofs = (int32_t)sizeof(GCstr); |
274 | if (irref_isk(ir->op2)) { | 277 | if (irref_isk(ir->op2)) { |
275 | ofs += IR(ir->op2)->i; | 278 | ofs += IR(ir->op2)->i; |
@@ -338,7 +341,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air) | |||
338 | /* Generate a call to a C function. */ | 341 | /* Generate a call to a C function. */ |
339 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | 342 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) |
340 | { | 343 | { |
341 | uint32_t n, nargs = CCI_NARGS(ci); | 344 | uint32_t n, nargs = CCI_XNARGS(ci); |
342 | int32_t ofs = 0; | 345 | int32_t ofs = 0; |
343 | #if LJ_SOFTFP | 346 | #if LJ_SOFTFP |
344 | Reg gpr = REGARG_FIRSTGPR; | 347 | Reg gpr = REGARG_FIRSTGPR; |
@@ -389,9 +392,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
389 | as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); | 392 | as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); |
390 | if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u; | 393 | if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u; |
391 | if (gpr <= REGARG_LASTGPR) { | 394 | if (gpr <= REGARG_LASTGPR) { |
392 | lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ | 395 | lj_assertA(rset_test(as->freeset, gpr), |
396 | "reg %d not free", gpr); /* Must have been evicted. */ | ||
393 | if (irt_isnum(ir->t)) { | 397 | if (irt_isnum(ir->t)) { |
394 | lua_assert(rset_test(as->freeset, gpr+1)); /* Ditto. */ | 398 | lj_assertA(rset_test(as->freeset, gpr+1), |
399 | "reg %d not free", gpr+1); /* Ditto. */ | ||
395 | emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15)); | 400 | emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15)); |
396 | gpr += 2; | 401 | gpr += 2; |
397 | } else { | 402 | } else { |
@@ -408,7 +413,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
408 | #endif | 413 | #endif |
409 | { | 414 | { |
410 | if (gpr <= REGARG_LASTGPR) { | 415 | if (gpr <= REGARG_LASTGPR) { |
411 | lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ | 416 | lj_assertA(rset_test(as->freeset, gpr), |
417 | "reg %d not free", gpr); /* Must have been evicted. */ | ||
412 | if (ref) ra_leftov(as, gpr, ref); | 418 | if (ref) ra_leftov(as, gpr, ref); |
413 | gpr++; | 419 | gpr++; |
414 | } else { | 420 | } else { |
@@ -433,7 +439,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
433 | rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ | 439 | rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ |
434 | ra_evictset(as, drop); /* Evictions must be performed first. */ | 440 | ra_evictset(as, drop); /* Evictions must be performed first. */ |
435 | if (ra_used(ir)) { | 441 | if (ra_used(ir)) { |
436 | lua_assert(!irt_ispri(ir->t)); | 442 | lj_assertA(!irt_ispri(ir->t), "PRI dest"); |
437 | if (!LJ_SOFTFP && irt_isfp(ir->t)) { | 443 | if (!LJ_SOFTFP && irt_isfp(ir->t)) { |
438 | if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) { | 444 | if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) { |
439 | Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); | 445 | Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); |
@@ -453,15 +459,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
453 | UNUSED(ci); | 459 | UNUSED(ci); |
454 | } | 460 | } |
455 | 461 | ||
456 | static void asm_call(ASMState *as, IRIns *ir) | ||
457 | { | ||
458 | IRRef args[CCI_NARGS_MAX]; | ||
459 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
460 | asm_collectargs(as, ir, ci, args); | ||
461 | asm_setupresult(as, ir, ci); | ||
462 | asm_gencall(as, ci, args); | ||
463 | } | ||
464 | |||
465 | static void asm_callx(ASMState *as, IRIns *ir) | 462 | static void asm_callx(ASMState *as, IRIns *ir) |
466 | { | 463 | { |
467 | IRRef args[CCI_NARGS_MAX*2]; | 464 | IRRef args[CCI_NARGS_MAX*2]; |
@@ -490,7 +487,7 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
490 | { | 487 | { |
491 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | 488 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); |
492 | void *pc = ir_kptr(IR(ir->op2)); | 489 | void *pc = ir_kptr(IR(ir->op2)); |
493 | int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); | 490 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); |
494 | as->topslot -= (BCReg)delta; | 491 | as->topslot -= (BCReg)delta; |
495 | if ((int32_t)as->topslot < 0) as->topslot = 0; | 492 | if ((int32_t)as->topslot < 0) as->topslot = 0; |
496 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | 493 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ |
@@ -504,6 +501,30 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
504 | emit_lso(as, ARMI_LDR, RID_TMP, base, -4); | 501 | emit_lso(as, ARMI_LDR, RID_TMP, base, -4); |
505 | } | 502 | } |
506 | 503 | ||
504 | /* -- Buffer operations --------------------------------------------------- */ | ||
505 | |||
506 | #if LJ_HASBUFFER | ||
507 | static void asm_bufhdr_write(ASMState *as, Reg sb) | ||
508 | { | ||
509 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); | ||
510 | IRIns irgc; | ||
511 | int32_t addr = i32ptr((void *)&J2G(as->J)->cur_L); | ||
512 | irgc.ot = IRT(0, IRT_PGC); /* GC type. */ | ||
513 | emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); | ||
514 | if ((as->flags & JIT_F_ARMV6T2)) { | ||
515 | emit_dnm(as, ARMI_BFI, RID_TMP, lj_fls(SBUF_MASK_FLAG), tmp); | ||
516 | } else { | ||
517 | emit_dnm(as, ARMI_ORR, RID_TMP, RID_TMP, tmp); | ||
518 | emit_dn(as, ARMI_AND|ARMI_K12|SBUF_MASK_FLAG, tmp, tmp); | ||
519 | } | ||
520 | emit_lso(as, ARMI_LDR, RID_TMP, | ||
521 | ra_allock(as, (addr & ~4095), | ||
522 | rset_exclude(rset_exclude(RSET_GPR, sb), tmp)), | ||
523 | (addr & 4095)); | ||
524 | emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); | ||
525 | } | ||
526 | #endif | ||
527 | |||
507 | /* -- Type conversions ---------------------------------------------------- */ | 528 | /* -- Type conversions ---------------------------------------------------- */ |
508 | 529 | ||
509 | #if !LJ_SOFTFP | 530 | #if !LJ_SOFTFP |
@@ -539,13 +560,17 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
539 | #endif | 560 | #endif |
540 | IRRef lref = ir->op1; | 561 | IRRef lref = ir->op1; |
541 | /* 64 bit integer conversions are handled by SPLIT. */ | 562 | /* 64 bit integer conversions are handled by SPLIT. */ |
542 | lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64)); | 563 | lj_assertA(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64), |
564 | "IR %04d has unsplit 64 bit type", | ||
565 | (int)(ir - as->ir) - REF_BIAS); | ||
543 | #if LJ_SOFTFP | 566 | #if LJ_SOFTFP |
544 | /* FP conversions are handled by SPLIT. */ | 567 | /* FP conversions are handled by SPLIT. */ |
545 | lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); | 568 | lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), |
569 | "IR %04d has FP type", | ||
570 | (int)(ir - as->ir) - REF_BIAS); | ||
546 | /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ | 571 | /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ |
547 | #else | 572 | #else |
548 | lua_assert(irt_type(ir->t) != st); | 573 | lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); |
549 | if (irt_isfp(ir->t)) { | 574 | if (irt_isfp(ir->t)) { |
550 | Reg dest = ra_dest(as, ir, RSET_FPR); | 575 | Reg dest = ra_dest(as, ir, RSET_FPR); |
551 | if (stfp) { /* FP to FP conversion. */ | 576 | if (stfp) { /* FP to FP conversion. */ |
@@ -562,7 +587,8 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
562 | } else if (stfp) { /* FP to integer conversion. */ | 587 | } else if (stfp) { /* FP to integer conversion. */ |
563 | if (irt_isguard(ir->t)) { | 588 | if (irt_isguard(ir->t)) { |
564 | /* Checked conversions are only supported from number to int. */ | 589 | /* Checked conversions are only supported from number to int. */ |
565 | lua_assert(irt_isint(ir->t) && st == IRT_NUM); | 590 | lj_assertA(irt_isint(ir->t) && st == IRT_NUM, |
591 | "bad type for checked CONV"); | ||
566 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | 592 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); |
567 | } else { | 593 | } else { |
568 | Reg left = ra_alloc1(as, lref, RSET_FPR); | 594 | Reg left = ra_alloc1(as, lref, RSET_FPR); |
@@ -581,7 +607,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
581 | Reg dest = ra_dest(as, ir, RSET_GPR); | 607 | Reg dest = ra_dest(as, ir, RSET_GPR); |
582 | if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | 608 | if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ |
583 | Reg left = ra_alloc1(as, lref, RSET_GPR); | 609 | Reg left = ra_alloc1(as, lref, RSET_GPR); |
584 | lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); | 610 | lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); |
585 | if ((as->flags & JIT_F_ARMV6)) { | 611 | if ((as->flags & JIT_F_ARMV6)) { |
586 | ARMIns ai = st == IRT_I8 ? ARMI_SXTB : | 612 | ARMIns ai = st == IRT_I8 ? ARMI_SXTB : |
587 | st == IRT_U8 ? ARMI_UXTB : | 613 | st == IRT_U8 ? ARMI_UXTB : |
@@ -601,31 +627,6 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
601 | } | 627 | } |
602 | } | 628 | } |
603 | 629 | ||
604 | #if !LJ_SOFTFP && LJ_HASFFI | ||
605 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
606 | { | ||
607 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
608 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
609 | IRCallID id; | ||
610 | CCallInfo ci; | ||
611 | IRRef args[2]; | ||
612 | args[0] = (ir-1)->op1; | ||
613 | args[1] = ir->op1; | ||
614 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
615 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
616 | ir--; | ||
617 | } else { | ||
618 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
619 | } | ||
620 | ci = lj_ir_callinfo[id]; | ||
621 | #if !LJ_ABI_SOFTFP | ||
622 | ci.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ | ||
623 | #endif | ||
624 | asm_setupresult(as, ir, &ci); | ||
625 | asm_gencall(as, &ci, args); | ||
626 | } | ||
627 | #endif | ||
628 | |||
629 | static void asm_strto(ASMState *as, IRIns *ir) | 630 | static void asm_strto(ASMState *as, IRIns *ir) |
630 | { | 631 | { |
631 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; | 632 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; |
@@ -689,60 +690,61 @@ static void asm_strto(ASMState *as, IRIns *ir) | |||
689 | emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); | 690 | emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); |
690 | } | 691 | } |
691 | 692 | ||
693 | /* -- Memory references --------------------------------------------------- */ | ||
694 | |||
692 | /* Get pointer to TValue. */ | 695 | /* Get pointer to TValue. */ |
693 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | 696 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) |
694 | { | 697 | { |
695 | IRIns *ir = IR(ref); | 698 | if ((mode & IRTMPREF_IN1)) { |
696 | if (irt_isnum(ir->t)) { | 699 | IRIns *ir = IR(ref); |
697 | if (irref_isk(ref)) { | 700 | if (irt_isnum(ir->t)) { |
698 | /* Use the number constant itself as a TValue. */ | 701 | if ((mode & IRTMPREF_OUT1)) { |
699 | ra_allockreg(as, i32ptr(ir_knum(ir)), dest); | 702 | #if LJ_SOFTFP |
700 | } else { | 703 | lj_assertA(irref_isk(ref), "unsplit FP op"); |
704 | emit_dm(as, ARMI_MOV, dest, RID_SP); | ||
705 | emit_lso(as, ARMI_STR, | ||
706 | ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR), | ||
707 | RID_SP, 0); | ||
708 | emit_lso(as, ARMI_STR, | ||
709 | ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR), | ||
710 | RID_SP, 4); | ||
711 | #else | ||
712 | Reg src = ra_alloc1(as, ref, RSET_FPR); | ||
713 | emit_dm(as, ARMI_MOV, dest, RID_SP); | ||
714 | emit_vlso(as, ARMI_VSTR_D, src, RID_SP, 0); | ||
715 | #endif | ||
716 | } else if (irref_isk(ref)) { | ||
717 | /* Use the number constant itself as a TValue. */ | ||
718 | ra_allockreg(as, i32ptr(ir_knum(ir)), dest); | ||
719 | } else { | ||
701 | #if LJ_SOFTFP | 720 | #if LJ_SOFTFP |
702 | lua_assert(0); | 721 | lj_assertA(0, "unsplit FP op"); |
703 | #else | 722 | #else |
704 | /* Otherwise force a spill and use the spill slot. */ | 723 | /* Otherwise force a spill and use the spill slot. */ |
705 | emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); | 724 | emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); |
706 | #endif | 725 | #endif |
726 | } | ||
727 | } else { | ||
728 | /* Otherwise use [sp] and [sp+4] to hold the TValue. | ||
729 | ** This assumes the following call has max. 4 args. | ||
730 | */ | ||
731 | Reg type; | ||
732 | emit_dm(as, ARMI_MOV, dest, RID_SP); | ||
733 | if (!irt_ispri(ir->t)) { | ||
734 | Reg src = ra_alloc1(as, ref, RSET_GPR); | ||
735 | emit_lso(as, ARMI_STR, src, RID_SP, 0); | ||
736 | } | ||
737 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)) | ||
738 | type = ra_alloc1(as, ref+1, RSET_GPR); | ||
739 | else | ||
740 | type = ra_allock(as, irt_toitype(ir->t), RSET_GPR); | ||
741 | emit_lso(as, ARMI_STR, type, RID_SP, 4); | ||
707 | } | 742 | } |
708 | } else { | 743 | } else { |
709 | /* Otherwise use [sp] and [sp+4] to hold the TValue. */ | ||
710 | RegSet allow = rset_exclude(RSET_GPR, dest); | ||
711 | Reg type; | ||
712 | emit_dm(as, ARMI_MOV, dest, RID_SP); | 744 | emit_dm(as, ARMI_MOV, dest, RID_SP); |
713 | if (!irt_ispri(ir->t)) { | ||
714 | Reg src = ra_alloc1(as, ref, allow); | ||
715 | emit_lso(as, ARMI_STR, src, RID_SP, 0); | ||
716 | } | ||
717 | if ((ir+1)->o == IR_HIOP) | ||
718 | type = ra_alloc1(as, ref+1, allow); | ||
719 | else | ||
720 | type = ra_allock(as, irt_toitype(ir->t), allow); | ||
721 | emit_lso(as, ARMI_STR, type, RID_SP, 4); | ||
722 | } | 745 | } |
723 | } | 746 | } |
724 | 747 | ||
725 | static void asm_tostr(ASMState *as, IRIns *ir) | ||
726 | { | ||
727 | IRRef args[2]; | ||
728 | args[0] = ASMREF_L; | ||
729 | as->gcsteps++; | ||
730 | if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) { | ||
731 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; | ||
732 | args[1] = ASMREF_TMP1; /* const lua_Number * */ | ||
733 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
734 | asm_gencall(as, ci, args); | ||
735 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); | ||
736 | } else { | ||
737 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; | ||
738 | args[1] = ir->op1; /* int32_t k */ | ||
739 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
740 | asm_gencall(as, ci, args); | ||
741 | } | ||
742 | } | ||
743 | |||
744 | /* -- Memory references --------------------------------------------------- */ | ||
745 | |||
746 | static void asm_aref(ASMState *as, IRIns *ir) | 748 | static void asm_aref(ASMState *as, IRIns *ir) |
747 | { | 749 | { |
748 | Reg dest = ra_dest(as, ir, RSET_GPR); | 750 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -864,16 +866,16 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
864 | *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu); | 866 | *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu); |
865 | 867 | ||
866 | /* Load main position relative to tab->node into dest. */ | 868 | /* Load main position relative to tab->node into dest. */ |
867 | khash = irref_isk(refkey) ? ir_khash(irkey) : 1; | 869 | khash = irref_isk(refkey) ? ir_khash(as, irkey) : 1; |
868 | if (khash == 0) { | 870 | if (khash == 0) { |
869 | emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); | 871 | emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); |
870 | } else { | 872 | } else { |
871 | emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp); | 873 | emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp); |
872 | emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp); | 874 | emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp); |
873 | if (irt_isstr(kt)) { /* Fetch of str->hash is cheaper than ra_allock. */ | 875 | if (irt_isstr(kt)) { /* Fetch of str->sid is cheaper than ra_allock. */ |
874 | emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP); | 876 | emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP); |
875 | emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); | 877 | emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); |
876 | emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash)); | 878 | emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, sid)); |
877 | emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); | 879 | emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); |
878 | } else if (irref_isk(refkey)) { | 880 | } else if (irref_isk(refkey)) { |
879 | emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash, | 881 | emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash, |
@@ -920,7 +922,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
920 | Reg node = ra_alloc1(as, ir->op1, RSET_GPR); | 922 | Reg node = ra_alloc1(as, ir->op1, RSET_GPR); |
921 | Reg key = RID_NONE, type = RID_TMP, idx = node; | 923 | Reg key = RID_NONE, type = RID_TMP, idx = node; |
922 | RegSet allow = rset_exclude(RSET_GPR, node); | 924 | RegSet allow = rset_exclude(RSET_GPR, node); |
923 | lua_assert(ofs % sizeof(Node) == 0); | 925 | lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); |
924 | if (ofs > 4095) { | 926 | if (ofs > 4095) { |
925 | idx = dest; | 927 | idx = dest; |
926 | rset_clear(allow, dest); | 928 | rset_clear(allow, dest); |
@@ -960,20 +962,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
960 | emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); | 962 | emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); |
961 | } | 963 | } |
962 | 964 | ||
963 | static void asm_newref(ASMState *as, IRIns *ir) | ||
964 | { | ||
965 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
966 | IRRef args[3]; | ||
967 | if (ir->r == RID_SINK) | ||
968 | return; | ||
969 | args[0] = ASMREF_L; /* lua_State *L */ | ||
970 | args[1] = ir->op1; /* GCtab *t */ | ||
971 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
972 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
973 | asm_gencall(as, ci, args); | ||
974 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | ||
975 | } | ||
976 | |||
977 | static void asm_uref(ASMState *as, IRIns *ir) | 965 | static void asm_uref(ASMState *as, IRIns *ir) |
978 | { | 966 | { |
979 | Reg dest = ra_dest(as, ir, RSET_GPR); | 967 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -1001,7 +989,7 @@ static void asm_uref(ASMState *as, IRIns *ir) | |||
1001 | static void asm_fref(ASMState *as, IRIns *ir) | 989 | static void asm_fref(ASMState *as, IRIns *ir) |
1002 | { | 990 | { |
1003 | UNUSED(as); UNUSED(ir); | 991 | UNUSED(as); UNUSED(ir); |
1004 | lua_assert(!ra_used(ir)); | 992 | lj_assertA(!ra_used(ir), "unfused FREF"); |
1005 | } | 993 | } |
1006 | 994 | ||
1007 | static void asm_strref(ASMState *as, IRIns *ir) | 995 | static void asm_strref(ASMState *as, IRIns *ir) |
@@ -1038,25 +1026,27 @@ static void asm_strref(ASMState *as, IRIns *ir) | |||
1038 | 1026 | ||
1039 | /* -- Loads and stores ---------------------------------------------------- */ | 1027 | /* -- Loads and stores ---------------------------------------------------- */ |
1040 | 1028 | ||
1041 | static ARMIns asm_fxloadins(IRIns *ir) | 1029 | static ARMIns asm_fxloadins(ASMState *as, IRIns *ir) |
1042 | { | 1030 | { |
1031 | UNUSED(as); | ||
1043 | switch (irt_type(ir->t)) { | 1032 | switch (irt_type(ir->t)) { |
1044 | case IRT_I8: return ARMI_LDRSB; | 1033 | case IRT_I8: return ARMI_LDRSB; |
1045 | case IRT_U8: return ARMI_LDRB; | 1034 | case IRT_U8: return ARMI_LDRB; |
1046 | case IRT_I16: return ARMI_LDRSH; | 1035 | case IRT_I16: return ARMI_LDRSH; |
1047 | case IRT_U16: return ARMI_LDRH; | 1036 | case IRT_U16: return ARMI_LDRH; |
1048 | case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D; | 1037 | case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VLDR_D; |
1049 | case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */ | 1038 | case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */ |
1050 | default: return ARMI_LDR; | 1039 | default: return ARMI_LDR; |
1051 | } | 1040 | } |
1052 | } | 1041 | } |
1053 | 1042 | ||
1054 | static ARMIns asm_fxstoreins(IRIns *ir) | 1043 | static ARMIns asm_fxstoreins(ASMState *as, IRIns *ir) |
1055 | { | 1044 | { |
1045 | UNUSED(as); | ||
1056 | switch (irt_type(ir->t)) { | 1046 | switch (irt_type(ir->t)) { |
1057 | case IRT_I8: case IRT_U8: return ARMI_STRB; | 1047 | case IRT_I8: case IRT_U8: return ARMI_STRB; |
1058 | case IRT_I16: case IRT_U16: return ARMI_STRH; | 1048 | case IRT_I16: case IRT_U16: return ARMI_STRH; |
1059 | case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D; | 1049 | case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VSTR_D; |
1060 | case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */ | 1050 | case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */ |
1061 | default: return ARMI_STR; | 1051 | default: return ARMI_STR; |
1062 | } | 1052 | } |
@@ -1065,17 +1055,23 @@ static ARMIns asm_fxstoreins(IRIns *ir) | |||
1065 | static void asm_fload(ASMState *as, IRIns *ir) | 1055 | static void asm_fload(ASMState *as, IRIns *ir) |
1066 | { | 1056 | { |
1067 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1057 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1068 | Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); | 1058 | ARMIns ai = asm_fxloadins(as, ir); |
1069 | ARMIns ai = asm_fxloadins(ir); | 1059 | Reg idx; |
1070 | int32_t ofs; | 1060 | int32_t ofs; |
1071 | if (ir->op2 == IRFL_TAB_ARRAY) { | 1061 | if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ |
1072 | ofs = asm_fuseabase(as, ir->op1); | 1062 | idx = ra_allock(as, (int32_t)(ir->op2<<2) + (int32_t)J2GG(as->J), RSET_GPR); |
1073 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | 1063 | ofs = 0; |
1074 | emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); | 1064 | } else { |
1075 | return; | 1065 | idx = ra_alloc1(as, ir->op1, RSET_GPR); |
1066 | if (ir->op2 == IRFL_TAB_ARRAY) { | ||
1067 | ofs = asm_fuseabase(as, ir->op1); | ||
1068 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | ||
1069 | emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); | ||
1070 | return; | ||
1071 | } | ||
1076 | } | 1072 | } |
1073 | ofs = field_ofs[ir->op2]; | ||
1077 | } | 1074 | } |
1078 | ofs = field_ofs[ir->op2]; | ||
1079 | if ((ai & 0x04000000)) | 1075 | if ((ai & 0x04000000)) |
1080 | emit_lso(as, ai, dest, idx, ofs); | 1076 | emit_lso(as, ai, dest, idx, ofs); |
1081 | else | 1077 | else |
@@ -1089,7 +1085,7 @@ static void asm_fstore(ASMState *as, IRIns *ir) | |||
1089 | IRIns *irf = IR(ir->op1); | 1085 | IRIns *irf = IR(ir->op1); |
1090 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); | 1086 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); |
1091 | int32_t ofs = field_ofs[irf->op2]; | 1087 | int32_t ofs = field_ofs[irf->op2]; |
1092 | ARMIns ai = asm_fxstoreins(ir); | 1088 | ARMIns ai = asm_fxstoreins(as, ir); |
1093 | if ((ai & 0x04000000)) | 1089 | if ((ai & 0x04000000)) |
1094 | emit_lso(as, ai, src, idx, ofs); | 1090 | emit_lso(as, ai, src, idx, ofs); |
1095 | else | 1091 | else |
@@ -1101,20 +1097,22 @@ static void asm_xload(ASMState *as, IRIns *ir) | |||
1101 | { | 1097 | { |
1102 | Reg dest = ra_dest(as, ir, | 1098 | Reg dest = ra_dest(as, ir, |
1103 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); | 1099 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); |
1104 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); | 1100 | lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); |
1105 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); | 1101 | asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); |
1106 | } | 1102 | } |
1107 | 1103 | ||
1108 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | 1104 | static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) |
1109 | { | 1105 | { |
1110 | if (ir->r != RID_SINK) { | 1106 | if (ir->r != RID_SINK) { |
1111 | Reg src = ra_alloc1(as, ir->op2, | 1107 | Reg src = ra_alloc1(as, ir->op2, |
1112 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); | 1108 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); |
1113 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | 1109 | asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, |
1114 | rset_exclude(RSET_GPR, src), ofs); | 1110 | rset_exclude(RSET_GPR, src), ofs); |
1115 | } | 1111 | } |
1116 | } | 1112 | } |
1117 | 1113 | ||
1114 | #define asm_xstore(as, ir) asm_xstore_(as, ir, 0) | ||
1115 | |||
1118 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 1116 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
1119 | { | 1117 | { |
1120 | int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); | 1118 | int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); |
@@ -1127,13 +1125,15 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
1127 | rset_clear(allow, type); | 1125 | rset_clear(allow, type); |
1128 | } | 1126 | } |
1129 | if (ra_used(ir)) { | 1127 | if (ra_used(ir)) { |
1130 | lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || | 1128 | lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || |
1131 | irt_isint(ir->t) || irt_isaddr(ir->t)); | 1129 | irt_isint(ir->t) || irt_isaddr(ir->t), |
1130 | "bad load type %d", irt_type(ir->t)); | ||
1132 | dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); | 1131 | dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); |
1133 | rset_clear(allow, dest); | 1132 | rset_clear(allow, dest); |
1134 | } | 1133 | } |
1135 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow, | 1134 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow, |
1136 | (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096); | 1135 | (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096); |
1136 | if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; | ||
1137 | if (!hiop || type == RID_NONE) { | 1137 | if (!hiop || type == RID_NONE) { |
1138 | rset_clear(allow, idx); | 1138 | rset_clear(allow, idx); |
1139 | if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && | 1139 | if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && |
@@ -1194,10 +1194,13 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1194 | IRType t = hiop ? IRT_NUM : irt_type(ir->t); | 1194 | IRType t = hiop ? IRT_NUM : irt_type(ir->t); |
1195 | Reg dest = RID_NONE, type = RID_NONE, base; | 1195 | Reg dest = RID_NONE, type = RID_NONE, base; |
1196 | RegSet allow = RSET_GPR; | 1196 | RegSet allow = RSET_GPR; |
1197 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ | 1197 | lj_assertA(!(ir->op2 & IRSLOAD_PARENT), |
1198 | lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); | 1198 | "bad parent SLOAD"); /* Handled by asm_head_side(). */ |
1199 | lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), | ||
1200 | "inconsistent SLOAD variant"); | ||
1199 | #if LJ_SOFTFP | 1201 | #if LJ_SOFTFP |
1200 | lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ | 1202 | lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), |
1203 | "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ | ||
1201 | if (hiop && ra_used(ir+1)) { | 1204 | if (hiop && ra_used(ir+1)) { |
1202 | type = ra_dest(as, ir+1, allow); | 1205 | type = ra_dest(as, ir+1, allow); |
1203 | rset_clear(allow, type); | 1206 | rset_clear(allow, type); |
@@ -1213,8 +1216,9 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1213 | Reg tmp = RID_NONE; | 1216 | Reg tmp = RID_NONE; |
1214 | if ((ir->op2 & IRSLOAD_CONVERT)) | 1217 | if ((ir->op2 & IRSLOAD_CONVERT)) |
1215 | tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR); | 1218 | tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR); |
1216 | lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || | 1219 | lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || |
1217 | irt_isint(ir->t) || irt_isaddr(ir->t)); | 1220 | irt_isint(ir->t) || irt_isaddr(ir->t), |
1221 | "bad SLOAD type %d", irt_type(ir->t)); | ||
1218 | dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); | 1222 | dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); |
1219 | rset_clear(allow, dest); | 1223 | rset_clear(allow, dest); |
1220 | base = ra_alloc1(as, REF_BASE, allow); | 1224 | base = ra_alloc1(as, REF_BASE, allow); |
@@ -1272,19 +1276,17 @@ dotypecheck: | |||
1272 | static void asm_cnew(ASMState *as, IRIns *ir) | 1276 | static void asm_cnew(ASMState *as, IRIns *ir) |
1273 | { | 1277 | { |
1274 | CTState *cts = ctype_ctsG(J2G(as->J)); | 1278 | CTState *cts = ctype_ctsG(J2G(as->J)); |
1275 | CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; | 1279 | CTypeID id = (CTypeID)IR(ir->op1)->i; |
1276 | CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? | 1280 | CTSize sz; |
1277 | lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; | 1281 | CTInfo info = lj_ctype_info(cts, id, &sz); |
1278 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | 1282 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; |
1279 | IRRef args[2]; | 1283 | IRRef args[4]; |
1280 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | 1284 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); |
1281 | RegSet drop = RSET_SCRATCH; | 1285 | RegSet drop = RSET_SCRATCH; |
1282 | lua_assert(sz != CTSIZE_INVALID); | 1286 | lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), |
1287 | "bad CNEW/CNEWI operands"); | ||
1283 | 1288 | ||
1284 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1285 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1286 | as->gcsteps++; | 1289 | as->gcsteps++; |
1287 | |||
1288 | if (ra_hasreg(ir->r)) | 1290 | if (ra_hasreg(ir->r)) |
1289 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 1291 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
1290 | ra_evictset(as, drop); | 1292 | ra_evictset(as, drop); |
@@ -1294,10 +1296,10 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1294 | /* Initialize immutable cdata object. */ | 1296 | /* Initialize immutable cdata object. */ |
1295 | if (ir->o == IR_CNEWI) { | 1297 | if (ir->o == IR_CNEWI) { |
1296 | int32_t ofs = sizeof(GCcdata); | 1298 | int32_t ofs = sizeof(GCcdata); |
1297 | lua_assert(sz == 4 || sz == 8); | 1299 | lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); |
1298 | if (sz == 8) { | 1300 | if (sz == 8) { |
1299 | ofs += 4; ir++; | 1301 | ofs += 4; ir++; |
1300 | lua_assert(ir->o == IR_HIOP); | 1302 | lj_assertA(ir->o == IR_HIOP, "expected HIOP for CNEWI"); |
1301 | } | 1303 | } |
1302 | for (;;) { | 1304 | for (;;) { |
1303 | Reg r = ra_alloc1(as, ir->op2, allow); | 1305 | Reg r = ra_alloc1(as, ir->op2, allow); |
@@ -1306,22 +1308,32 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1306 | if (ofs == sizeof(GCcdata)) break; | 1308 | if (ofs == sizeof(GCcdata)) break; |
1307 | ofs -= 4; ir--; | 1309 | ofs -= 4; ir--; |
1308 | } | 1310 | } |
1311 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1312 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1313 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1314 | args[1] = ir->op1; /* CTypeID id */ | ||
1315 | args[2] = ir->op2; /* CTSize sz */ | ||
1316 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1317 | asm_gencall(as, ci, args); | ||
1318 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1319 | return; | ||
1309 | } | 1320 | } |
1321 | |||
1310 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ | 1322 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ |
1311 | { | 1323 | { |
1312 | uint32_t k = emit_isk12(ARMI_MOV, ctypeid); | 1324 | uint32_t k = emit_isk12(ARMI_MOV, id); |
1313 | Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow); | 1325 | Reg r = k ? RID_R1 : ra_allock(as, id, allow); |
1314 | emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); | 1326 | emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); |
1315 | emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); | 1327 | emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); |
1316 | emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); | 1328 | emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); |
1317 | if (k) emit_d(as, ARMI_MOV^k, RID_R1); | 1329 | if (k) emit_d(as, ARMI_MOV^k, RID_R1); |
1318 | } | 1330 | } |
1331 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1332 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1319 | asm_gencall(as, ci, args); | 1333 | asm_gencall(as, ci, args); |
1320 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), | 1334 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), |
1321 | ra_releasetmp(as, ASMREF_TMP1)); | 1335 | ra_releasetmp(as, ASMREF_TMP1)); |
1322 | } | 1336 | } |
1323 | #else | ||
1324 | #define asm_cnew(as, ir) ((void)0) | ||
1325 | #endif | 1337 | #endif |
1326 | 1338 | ||
1327 | /* -- Write barriers ------------------------------------------------------ */ | 1339 | /* -- Write barriers ------------------------------------------------------ */ |
@@ -1353,7 +1365,7 @@ static void asm_obar(ASMState *as, IRIns *ir) | |||
1353 | MCLabel l_end; | 1365 | MCLabel l_end; |
1354 | Reg obj, val, tmp; | 1366 | Reg obj, val, tmp; |
1355 | /* No need for other object barriers (yet). */ | 1367 | /* No need for other object barriers (yet). */ |
1356 | lua_assert(IR(ir->op1)->o == IR_UREFC); | 1368 | lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); |
1357 | ra_evictset(as, RSET_SCRATCH); | 1369 | ra_evictset(as, RSET_SCRATCH); |
1358 | l_end = emit_label(as); | 1370 | l_end = emit_label(as); |
1359 | args[0] = ASMREF_TMP1; /* global_State *g */ | 1371 | args[0] = ASMREF_TMP1; /* global_State *g */ |
@@ -1392,23 +1404,36 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai) | |||
1392 | emit_dm(as, ai, (dest & 15), (left & 15)); | 1404 | emit_dm(as, ai, (dest & 15), (left & 15)); |
1393 | } | 1405 | } |
1394 | 1406 | ||
1395 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) | 1407 | static void asm_callround(ASMState *as, IRIns *ir, int id) |
1396 | { | 1408 | { |
1397 | IRIns *irp = IR(ir->op1); | 1409 | /* The modified regs must match with the *.dasc implementation. */ |
1398 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | 1410 | RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)| |
1399 | IRIns *irpp = IR(irp->op1); | 1411 | RID2RSET(RID_R3)|RID2RSET(RID_R12); |
1400 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | 1412 | RegSet of; |
1401 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | 1413 | Reg dest, src; |
1402 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; | 1414 | ra_evictset(as, drop); |
1403 | IRRef args[2]; | 1415 | dest = ra_dest(as, ir, RSET_FPR); |
1404 | args[0] = irpp->op1; | 1416 | emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); |
1405 | args[1] = irp->op2; | 1417 | emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : |
1406 | asm_setupresult(as, ir, ci); | 1418 | id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : |
1407 | asm_gencall(as, ci, args); | 1419 | (void *)lj_vm_trunc_sf); |
1408 | return 1; | 1420 | /* Workaround to protect argument GPRs from being used for remat. */ |
1409 | } | 1421 | of = as->freeset; |
1410 | } | 1422 | as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1); |
1411 | return 0; | 1423 | as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); |
1424 | src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ | ||
1425 | as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1)); | ||
1426 | emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); | ||
1427 | } | ||
1428 | |||
1429 | static void asm_fpmath(ASMState *as, IRIns *ir) | ||
1430 | { | ||
1431 | if (ir->op2 <= IRFPM_TRUNC) | ||
1432 | asm_callround(as, ir, ir->op2); | ||
1433 | else if (ir->op2 == IRFPM_SQRT) | ||
1434 | asm_fpunary(as, ir, ARMI_VSQRT_D); | ||
1435 | else | ||
1436 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); | ||
1412 | } | 1437 | } |
1413 | #endif | 1438 | #endif |
1414 | 1439 | ||
@@ -1474,19 +1499,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) | |||
1474 | asm_intop(as, ir, asm_drop_cmp0(as, ai)); | 1499 | asm_intop(as, ir, asm_drop_cmp0(as, ai)); |
1475 | } | 1500 | } |
1476 | 1501 | ||
1477 | static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) | ||
1478 | { | ||
1479 | ai = asm_drop_cmp0(as, ai); | ||
1480 | if (ir->op2 == 0) { | ||
1481 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1482 | uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); | ||
1483 | emit_d(as, ai^m, dest); | ||
1484 | } else { | ||
1485 | /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */ | ||
1486 | asm_intop(as, ir, ai); | ||
1487 | } | ||
1488 | } | ||
1489 | |||
1490 | static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) | 1502 | static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) |
1491 | { | 1503 | { |
1492 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1504 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -1552,6 +1564,15 @@ static void asm_mul(ASMState *as, IRIns *ir) | |||
1552 | asm_intmul(as, ir); | 1564 | asm_intmul(as, ir); |
1553 | } | 1565 | } |
1554 | 1566 | ||
1567 | #define asm_addov(as, ir) asm_add(as, ir) | ||
1568 | #define asm_subov(as, ir) asm_sub(as, ir) | ||
1569 | #define asm_mulov(as, ir) asm_mul(as, ir) | ||
1570 | |||
1571 | #if !LJ_SOFTFP | ||
1572 | #define asm_fpdiv(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) | ||
1573 | #define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D) | ||
1574 | #endif | ||
1575 | |||
1555 | static void asm_neg(ASMState *as, IRIns *ir) | 1576 | static void asm_neg(ASMState *as, IRIns *ir) |
1556 | { | 1577 | { |
1557 | #if !LJ_SOFTFP | 1578 | #if !LJ_SOFTFP |
@@ -1563,41 +1584,22 @@ static void asm_neg(ASMState *as, IRIns *ir) | |||
1563 | asm_intneg(as, ir, ARMI_RSB); | 1584 | asm_intneg(as, ir, ARMI_RSB); |
1564 | } | 1585 | } |
1565 | 1586 | ||
1566 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | 1587 | static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) |
1567 | { | 1588 | { |
1568 | const CCallInfo *ci = &lj_ir_callinfo[id]; | 1589 | ai = asm_drop_cmp0(as, ai); |
1569 | IRRef args[2]; | 1590 | if (ir->op2 == 0) { |
1570 | args[0] = ir->op1; | 1591 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1571 | args[1] = ir->op2; | 1592 | uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); |
1572 | asm_setupresult(as, ir, ci); | 1593 | emit_d(as, ai^m, dest); |
1573 | asm_gencall(as, ci, args); | 1594 | } else { |
1595 | /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */ | ||
1596 | asm_intop(as, ir, ai); | ||
1597 | } | ||
1574 | } | 1598 | } |
1575 | 1599 | ||
1576 | #if !LJ_SOFTFP | 1600 | #define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN) |
1577 | static void asm_callround(ASMState *as, IRIns *ir, int id) | ||
1578 | { | ||
1579 | /* The modified regs must match with the *.dasc implementation. */ | ||
1580 | RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)| | ||
1581 | RID2RSET(RID_R3)|RID2RSET(RID_R12); | ||
1582 | RegSet of; | ||
1583 | Reg dest, src; | ||
1584 | ra_evictset(as, drop); | ||
1585 | dest = ra_dest(as, ir, RSET_FPR); | ||
1586 | emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); | ||
1587 | emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : | ||
1588 | id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : | ||
1589 | (void *)lj_vm_trunc_sf); | ||
1590 | /* Workaround to protect argument GPRs from being used for remat. */ | ||
1591 | of = as->freeset; | ||
1592 | as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1); | ||
1593 | as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); | ||
1594 | src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ | ||
1595 | as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1)); | ||
1596 | emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); | ||
1597 | } | ||
1598 | #endif | ||
1599 | 1601 | ||
1600 | static void asm_bitswap(ASMState *as, IRIns *ir) | 1602 | static void asm_bswap(ASMState *as, IRIns *ir) |
1601 | { | 1603 | { |
1602 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1604 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1603 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | 1605 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); |
@@ -1614,6 +1616,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir) | |||
1614 | } | 1616 | } |
1615 | } | 1617 | } |
1616 | 1618 | ||
1619 | #define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND) | ||
1620 | #define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR) | ||
1621 | #define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR) | ||
1622 | |||
1617 | static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) | 1623 | static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) |
1618 | { | 1624 | { |
1619 | if (irref_isk(ir->op2)) { /* Constant shifts. */ | 1625 | if (irref_isk(ir->op2)) { /* Constant shifts. */ |
@@ -1631,6 +1637,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) | |||
1631 | } | 1637 | } |
1632 | } | 1638 | } |
1633 | 1639 | ||
1640 | #define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL) | ||
1641 | #define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR) | ||
1642 | #define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR) | ||
1643 | #define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR) | ||
1644 | #define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") | ||
1645 | |||
1634 | static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) | 1646 | static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) |
1635 | { | 1647 | { |
1636 | uint32_t kcmp = 0, kmov = 0; | 1648 | uint32_t kcmp = 0, kmov = 0; |
@@ -1704,6 +1716,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc) | |||
1704 | asm_intmin_max(as, ir, cc); | 1716 | asm_intmin_max(as, ir, cc); |
1705 | } | 1717 | } |
1706 | 1718 | ||
1719 | #define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_PL) | ||
1720 | #define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LE) | ||
1721 | |||
1707 | /* -- Comparisons --------------------------------------------------------- */ | 1722 | /* -- Comparisons --------------------------------------------------------- */ |
1708 | 1723 | ||
1709 | /* Map of comparisons to flags. ORDER IR. */ | 1724 | /* Map of comparisons to flags. ORDER IR. */ |
@@ -1777,7 +1792,8 @@ static void asm_intcomp(ASMState *as, IRIns *ir) | |||
1777 | Reg left; | 1792 | Reg left; |
1778 | uint32_t m; | 1793 | uint32_t m; |
1779 | int cmpprev0 = 0; | 1794 | int cmpprev0 = 0; |
1780 | lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); | 1795 | lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t), |
1796 | "bad comparison data type %d", irt_type(ir->t)); | ||
1781 | if (asm_swapops(as, lref, rref)) { | 1797 | if (asm_swapops(as, lref, rref)) { |
1782 | Reg tmp = lref; lref = rref; rref = tmp; | 1798 | Reg tmp = lref; lref = rref; rref = tmp; |
1783 | if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ | 1799 | if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ |
@@ -1819,6 +1835,18 @@ notst: | |||
1819 | as->flagmcp = as->mcp; /* Allow elimination of the compare. */ | 1835 | as->flagmcp = as->mcp; /* Allow elimination of the compare. */ |
1820 | } | 1836 | } |
1821 | 1837 | ||
1838 | static void asm_comp(ASMState *as, IRIns *ir) | ||
1839 | { | ||
1840 | #if !LJ_SOFTFP | ||
1841 | if (irt_isnum(ir->t)) | ||
1842 | asm_fpcomp(as, ir); | ||
1843 | else | ||
1844 | #endif | ||
1845 | asm_intcomp(as, ir); | ||
1846 | } | ||
1847 | |||
1848 | #define asm_equal(as, ir) asm_comp(as, ir) | ||
1849 | |||
1822 | #if LJ_HASFFI | 1850 | #if LJ_HASFFI |
1823 | /* 64 bit integer comparisons. */ | 1851 | /* 64 bit integer comparisons. */ |
1824 | static void asm_int64comp(ASMState *as, IRIns *ir) | 1852 | static void asm_int64comp(ASMState *as, IRIns *ir) |
@@ -1857,15 +1885,15 @@ static void asm_int64comp(ASMState *as, IRIns *ir) | |||
1857 | } | 1885 | } |
1858 | #endif | 1886 | #endif |
1859 | 1887 | ||
1860 | /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ | 1888 | /* -- Split register ops -------------------------------------------------- */ |
1861 | 1889 | ||
1862 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ | 1890 | /* Hiword op of a split 32/32 bit op. Previous op is the loword op. */ |
1863 | static void asm_hiop(ASMState *as, IRIns *ir) | 1891 | static void asm_hiop(ASMState *as, IRIns *ir) |
1864 | { | 1892 | { |
1865 | #if LJ_HASFFI || LJ_SOFTFP | ||
1866 | /* HIOP is marked as a store because it needs its own DCE logic. */ | 1893 | /* HIOP is marked as a store because it needs its own DCE logic. */ |
1867 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | 1894 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ |
1868 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | 1895 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; |
1896 | #if LJ_HASFFI || LJ_SOFTFP | ||
1869 | if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */ | 1897 | if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */ |
1870 | as->curins--; /* Always skip the loword comparison. */ | 1898 | as->curins--; /* Always skip the loword comparison. */ |
1871 | #if LJ_SOFTFP | 1899 | #if LJ_SOFTFP |
@@ -1882,7 +1910,7 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1882 | } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { | 1910 | } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { |
1883 | as->curins--; /* Always skip the loword min/max. */ | 1911 | as->curins--; /* Always skip the loword min/max. */ |
1884 | if (uselo || usehi) | 1912 | if (uselo || usehi) |
1885 | asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); | 1913 | asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE); |
1886 | return; | 1914 | return; |
1887 | #elif LJ_HASFFI | 1915 | #elif LJ_HASFFI |
1888 | } else if ((ir-1)->o == IR_CONV) { | 1916 | } else if ((ir-1)->o == IR_CONV) { |
@@ -1893,9 +1921,10 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1893 | #endif | 1921 | #endif |
1894 | } else if ((ir-1)->o == IR_XSTORE) { | 1922 | } else if ((ir-1)->o == IR_XSTORE) { |
1895 | if ((ir-1)->r != RID_SINK) | 1923 | if ((ir-1)->r != RID_SINK) |
1896 | asm_xstore(as, ir, 4); | 1924 | asm_xstore_(as, ir, 4); |
1897 | return; | 1925 | return; |
1898 | } | 1926 | } |
1927 | #endif | ||
1899 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 1928 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
1900 | switch ((ir-1)->o) { | 1929 | switch ((ir-1)->o) { |
1901 | #if LJ_HASFFI | 1930 | #if LJ_HASFFI |
@@ -1914,6 +1943,9 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1914 | asm_intneg(as, ir, ARMI_RSC); | 1943 | asm_intneg(as, ir, ARMI_RSC); |
1915 | asm_intneg(as, ir-1, ARMI_RSB|ARMI_S); | 1944 | asm_intneg(as, ir-1, ARMI_RSB|ARMI_S); |
1916 | break; | 1945 | break; |
1946 | case IR_CNEWI: | ||
1947 | /* Nothing to do here. Handled by lo op itself. */ | ||
1948 | break; | ||
1917 | #endif | 1949 | #endif |
1918 | #if LJ_SOFTFP | 1950 | #if LJ_SOFTFP |
1919 | case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | 1951 | case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: |
@@ -1921,24 +1953,26 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1921 | if (!uselo) | 1953 | if (!uselo) |
1922 | ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ | 1954 | ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ |
1923 | break; | 1955 | break; |
1956 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF: | ||
1957 | /* Nothing to do here. Handled by lo op itself. */ | ||
1958 | break; | ||
1924 | #endif | 1959 | #endif |
1925 | case IR_CALLN: | 1960 | case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: |
1926 | case IR_CALLS: | ||
1927 | case IR_CALLXS: | ||
1928 | if (!uselo) | 1961 | if (!uselo) |
1929 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ | 1962 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ |
1930 | break; | 1963 | break; |
1931 | #if LJ_SOFTFP | 1964 | default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; |
1932 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: | ||
1933 | #endif | ||
1934 | case IR_CNEWI: | ||
1935 | /* Nothing to do here. Handled by lo op itself. */ | ||
1936 | break; | ||
1937 | default: lua_assert(0); break; | ||
1938 | } | 1965 | } |
1939 | #else | 1966 | } |
1940 | UNUSED(as); UNUSED(ir); lua_assert(0); | 1967 | |
1941 | #endif | 1968 | /* -- Profiling ----------------------------------------------------------- */ |
1969 | |||
1970 | static void asm_prof(ASMState *as, IRIns *ir) | ||
1971 | { | ||
1972 | UNUSED(ir); | ||
1973 | asm_guardcc(as, CC_NE); | ||
1974 | emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP); | ||
1975 | emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); | ||
1942 | } | 1976 | } |
1943 | 1977 | ||
1944 | /* -- Stack handling ------------------------------------------------------ */ | 1978 | /* -- Stack handling ------------------------------------------------------ */ |
@@ -1952,7 +1986,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
1952 | if (irp) { | 1986 | if (irp) { |
1953 | if (!ra_hasspill(irp->s)) { | 1987 | if (!ra_hasspill(irp->s)) { |
1954 | pbase = irp->r; | 1988 | pbase = irp->r; |
1955 | lua_assert(ra_hasreg(pbase)); | 1989 | lj_assertA(ra_hasreg(pbase), "base reg lost"); |
1956 | } else if (allow) { | 1990 | } else if (allow) { |
1957 | pbase = rset_pickbot(allow); | 1991 | pbase = rset_pickbot(allow); |
1958 | } else { | 1992 | } else { |
@@ -1964,13 +1998,13 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
1964 | } | 1998 | } |
1965 | emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno)); | 1999 | emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno)); |
1966 | k = emit_isk12(0, (int32_t)(8*topslot)); | 2000 | k = emit_isk12(0, (int32_t)(8*topslot)); |
1967 | lua_assert(k); | 2001 | lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); |
1968 | emit_n(as, ARMI_CMP^k, RID_TMP); | 2002 | emit_n(as, ARMI_CMP^k, RID_TMP); |
1969 | emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase); | 2003 | emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase); |
1970 | emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, | 2004 | emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, |
1971 | (int32_t)offsetof(lua_State, maxstack)); | 2005 | (int32_t)offsetof(lua_State, maxstack)); |
1972 | if (irp) { /* Must not spill arbitrary registers in head of side trace. */ | 2006 | if (irp) { /* Must not spill arbitrary registers in head of side trace. */ |
1973 | int32_t i = i32ptr(&J2G(as->J)->jit_L); | 2007 | int32_t i = i32ptr(&J2G(as->J)->cur_L); |
1974 | if (ra_hasspill(irp->s)) | 2008 | if (ra_hasspill(irp->s)) |
1975 | emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); | 2009 | emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); |
1976 | emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); | 2010 | emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); |
@@ -1978,7 +2012,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
1978 | emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ | 2012 | emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ |
1979 | emit_loadi(as, RID_TMP, (i & ~4095)); | 2013 | emit_loadi(as, RID_TMP, (i & ~4095)); |
1980 | } else { | 2014 | } else { |
1981 | emit_getgl(as, RID_TMP, jit_L); | 2015 | emit_getgl(as, RID_TMP, cur_L); |
1982 | } | 2016 | } |
1983 | } | 2017 | } |
1984 | 2018 | ||
@@ -2001,7 +2035,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
2001 | #if LJ_SOFTFP | 2035 | #if LJ_SOFTFP |
2002 | RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); | 2036 | RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); |
2003 | Reg tmp; | 2037 | Reg tmp; |
2004 | lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ | 2038 | /* LJ_SOFTFP: must be a number constant. */ |
2039 | lj_assertA(irref_isk(ref), "unsplit FP op"); | ||
2005 | tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, | 2040 | tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, |
2006 | rset_exclude(RSET_GPREVEN, RID_BASE)); | 2041 | rset_exclude(RSET_GPREVEN, RID_BASE)); |
2007 | emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs); | 2042 | emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs); |
@@ -2015,7 +2050,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
2015 | } else { | 2050 | } else { |
2016 | RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); | 2051 | RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); |
2017 | Reg type; | 2052 | Reg type; |
2018 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); | 2053 | lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), |
2054 | "restore of IR type %d", irt_type(ir->t)); | ||
2019 | if (!irt_ispri(ir->t)) { | 2055 | if (!irt_ispri(ir->t)) { |
2020 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE)); | 2056 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE)); |
2021 | emit_lso(as, ARMI_STR, src, RID_BASE, ofs); | 2057 | emit_lso(as, ARMI_STR, src, RID_BASE, ofs); |
@@ -2028,6 +2064,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
2028 | } else if ((sn & SNAP_SOFTFPNUM)) { | 2064 | } else if ((sn & SNAP_SOFTFPNUM)) { |
2029 | type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE)); | 2065 | type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE)); |
2030 | #endif | 2066 | #endif |
2067 | } else if ((sn & SNAP_KEYINDEX)) { | ||
2068 | type = ra_allock(as, (int32_t)LJ_KEYINDEX, odd); | ||
2031 | } else { | 2069 | } else { |
2032 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); | 2070 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); |
2033 | } | 2071 | } |
@@ -2035,7 +2073,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
2035 | } | 2073 | } |
2036 | checkmclim(as); | 2074 | checkmclim(as); |
2037 | } | 2075 | } |
2038 | lua_assert(map + nent == flinks); | 2076 | lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); |
2039 | } | 2077 | } |
2040 | 2078 | ||
2041 | /* -- GC handling --------------------------------------------------------- */ | 2079 | /* -- GC handling --------------------------------------------------------- */ |
@@ -2089,15 +2127,21 @@ static void asm_loop_fixup(ASMState *as) | |||
2089 | } | 2127 | } |
2090 | } | 2128 | } |
2091 | 2129 | ||
2130 | /* Fixup the tail of the loop. */ | ||
2131 | static void asm_loop_tail_fixup(ASMState *as) | ||
2132 | { | ||
2133 | UNUSED(as); /* Nothing to do. */ | ||
2134 | } | ||
2135 | |||
2092 | /* -- Head of trace ------------------------------------------------------- */ | 2136 | /* -- Head of trace ------------------------------------------------------- */ |
2093 | 2137 | ||
2094 | /* Reload L register from g->jit_L. */ | 2138 | /* Reload L register from g->cur_L. */ |
2095 | static void asm_head_lreg(ASMState *as) | 2139 | static void asm_head_lreg(ASMState *as) |
2096 | { | 2140 | { |
2097 | IRIns *ir = IR(ASMREF_L); | 2141 | IRIns *ir = IR(ASMREF_L); |
2098 | if (ra_used(ir)) { | 2142 | if (ra_used(ir)) { |
2099 | Reg r = ra_dest(as, ir, RSET_GPR); | 2143 | Reg r = ra_dest(as, ir, RSET_GPR); |
2100 | emit_getgl(as, r, jit_L); | 2144 | emit_getgl(as, r, cur_L); |
2101 | ra_evictk(as); | 2145 | ra_evictk(as); |
2102 | } | 2146 | } |
2103 | } | 2147 | } |
@@ -2125,7 +2169,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) | |||
2125 | rset_clear(allow, ra_dest(as, ir, allow)); | 2169 | rset_clear(allow, ra_dest(as, ir, allow)); |
2126 | } else { | 2170 | } else { |
2127 | Reg r = irp->r; | 2171 | Reg r = irp->r; |
2128 | lua_assert(ra_hasreg(r)); | 2172 | lj_assertA(ra_hasreg(r), "base reg lost"); |
2129 | rset_clear(allow, r); | 2173 | rset_clear(allow, r); |
2130 | if (r != ir->r && !rset_test(as->freeset, r)) | 2174 | if (r != ir->r && !rset_test(as->freeset, r)) |
2131 | ra_restore(as, regcost_ref(as->cost[r])); | 2175 | ra_restore(as, regcost_ref(as->cost[r])); |
@@ -2147,7 +2191,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |||
2147 | } else { | 2191 | } else { |
2148 | /* Patch stack adjustment. */ | 2192 | /* Patch stack adjustment. */ |
2149 | uint32_t k = emit_isk12(ARMI_ADD, spadj); | 2193 | uint32_t k = emit_isk12(ARMI_ADD, spadj); |
2150 | lua_assert(k); | 2194 | lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); |
2151 | p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); | 2195 | p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); |
2152 | } | 2196 | } |
2153 | /* Patch exit branch. */ | 2197 | /* Patch exit branch. */ |
@@ -2168,143 +2212,13 @@ static void asm_tail_prep(ASMState *as) | |||
2168 | *p = 0; /* Prevent load/store merging. */ | 2212 | *p = 0; /* Prevent load/store merging. */ |
2169 | } | 2213 | } |
2170 | 2214 | ||
2171 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
2172 | |||
2173 | /* Assemble a single instruction. */ | ||
2174 | static void asm_ir(ASMState *as, IRIns *ir) | ||
2175 | { | ||
2176 | switch ((IROp)ir->o) { | ||
2177 | /* Miscellaneous ops. */ | ||
2178 | case IR_LOOP: asm_loop(as); break; | ||
2179 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
2180 | case IR_USE: | ||
2181 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
2182 | case IR_PHI: asm_phi(as, ir); break; | ||
2183 | case IR_HIOP: asm_hiop(as, ir); break; | ||
2184 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
2185 | |||
2186 | /* Guarded assertions. */ | ||
2187 | case IR_EQ: case IR_NE: | ||
2188 | if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { | ||
2189 | as->curins--; | ||
2190 | asm_href(as, ir-1, (IROp)ir->o); | ||
2191 | break; | ||
2192 | } | ||
2193 | /* fallthrough */ | ||
2194 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
2195 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
2196 | case IR_ABC: | ||
2197 | #if !LJ_SOFTFP | ||
2198 | if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; } | ||
2199 | #endif | ||
2200 | asm_intcomp(as, ir); | ||
2201 | break; | ||
2202 | |||
2203 | case IR_RETF: asm_retf(as, ir); break; | ||
2204 | |||
2205 | /* Bit ops. */ | ||
2206 | case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break; | ||
2207 | case IR_BSWAP: asm_bitswap(as, ir); break; | ||
2208 | |||
2209 | case IR_BAND: asm_bitop(as, ir, ARMI_AND); break; | ||
2210 | case IR_BOR: asm_bitop(as, ir, ARMI_ORR); break; | ||
2211 | case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break; | ||
2212 | |||
2213 | case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break; | ||
2214 | case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break; | ||
2215 | case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break; | ||
2216 | case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break; | ||
2217 | case IR_BROL: lua_assert(0); break; | ||
2218 | |||
2219 | /* Arithmetic ops. */ | ||
2220 | case IR_ADD: case IR_ADDOV: asm_add(as, ir); break; | ||
2221 | case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break; | ||
2222 | case IR_MUL: case IR_MULOV: asm_mul(as, ir); break; | ||
2223 | case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break; | ||
2224 | case IR_NEG: asm_neg(as, ir); break; | ||
2225 | |||
2226 | #if LJ_SOFTFP | ||
2227 | case IR_DIV: case IR_POW: case IR_ABS: | ||
2228 | case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: | ||
2229 | lua_assert(0); /* Unused for LJ_SOFTFP. */ | ||
2230 | break; | ||
2231 | #else | ||
2232 | case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break; | ||
2233 | case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break; | ||
2234 | case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break; | ||
2235 | case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break; | ||
2236 | case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break; | ||
2237 | case IR_FPMATH: | ||
2238 | if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) | ||
2239 | break; | ||
2240 | if (ir->op2 <= IRFPM_TRUNC) | ||
2241 | asm_callround(as, ir, ir->op2); | ||
2242 | else if (ir->op2 == IRFPM_SQRT) | ||
2243 | asm_fpunary(as, ir, ARMI_VSQRT_D); | ||
2244 | else | ||
2245 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); | ||
2246 | break; | ||
2247 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
2248 | #endif | ||
2249 | |||
2250 | case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break; | ||
2251 | case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break; | ||
2252 | |||
2253 | /* Memory references. */ | ||
2254 | case IR_AREF: asm_aref(as, ir); break; | ||
2255 | case IR_HREF: asm_href(as, ir, 0); break; | ||
2256 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
2257 | case IR_NEWREF: asm_newref(as, ir); break; | ||
2258 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
2259 | case IR_FREF: asm_fref(as, ir); break; | ||
2260 | case IR_STRREF: asm_strref(as, ir); break; | ||
2261 | |||
2262 | /* Loads and stores. */ | ||
2263 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
2264 | asm_ahuvload(as, ir); | ||
2265 | break; | ||
2266 | case IR_FLOAD: asm_fload(as, ir); break; | ||
2267 | case IR_XLOAD: asm_xload(as, ir); break; | ||
2268 | case IR_SLOAD: asm_sload(as, ir); break; | ||
2269 | |||
2270 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
2271 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
2272 | case IR_XSTORE: asm_xstore(as, ir, 0); break; | ||
2273 | |||
2274 | /* Allocations. */ | ||
2275 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
2276 | case IR_TNEW: asm_tnew(as, ir); break; | ||
2277 | case IR_TDUP: asm_tdup(as, ir); break; | ||
2278 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
2279 | |||
2280 | /* Write barriers. */ | ||
2281 | case IR_TBAR: asm_tbar(as, ir); break; | ||
2282 | case IR_OBAR: asm_obar(as, ir); break; | ||
2283 | |||
2284 | /* Type conversions. */ | ||
2285 | case IR_CONV: asm_conv(as, ir); break; | ||
2286 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
2287 | case IR_STRTO: asm_strto(as, ir); break; | ||
2288 | |||
2289 | /* Calls. */ | ||
2290 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
2291 | case IR_CALLXS: asm_callx(as, ir); break; | ||
2292 | case IR_CARG: break; | ||
2293 | |||
2294 | default: | ||
2295 | setintV(&as->J->errinfo, ir->o); | ||
2296 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
2297 | break; | ||
2298 | } | ||
2299 | } | ||
2300 | |||
2301 | /* -- Trace setup --------------------------------------------------------- */ | 2215 | /* -- Trace setup --------------------------------------------------------- */ |
2302 | 2216 | ||
2303 | /* Ensure there are enough stack slots for call arguments. */ | 2217 | /* Ensure there are enough stack slots for call arguments. */ |
2304 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | 2218 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) |
2305 | { | 2219 | { |
2306 | IRRef args[CCI_NARGS_MAX*2]; | 2220 | IRRef args[CCI_NARGS_MAX*2]; |
2307 | uint32_t i, nargs = (int)CCI_NARGS(ci); | 2221 | uint32_t i, nargs = CCI_XNARGS(ci); |
2308 | int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; | 2222 | int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; |
2309 | asm_collectargs(as, ir, ci, args); | 2223 | asm_collectargs(as, ir, ci, args); |
2310 | for (i = 0; i < nargs; i++) { | 2224 | for (i = 0; i < nargs; i++) { |
@@ -2360,7 +2274,7 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | |||
2360 | if (!cstart) cstart = p; | 2274 | if (!cstart) cstart = p; |
2361 | } | 2275 | } |
2362 | } | 2276 | } |
2363 | lua_assert(cstart != NULL); | 2277 | lj_assertJ(cstart != NULL, "exit stub %d not found", exitno); |
2364 | lj_mcode_sync(cstart, cend); | 2278 | lj_mcode_sync(cstart, cend); |
2365 | lj_mcode_patch(J, mcarea, 1); | 2279 | lj_mcode_patch(J, mcarea, 1); |
2366 | } | 2280 | } |
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h new file mode 100644 index 00000000..eb31b006 --- /dev/null +++ b/src/lj_asm_arm64.h | |||
@@ -0,0 +1,2070 @@ | |||
1 | /* | ||
2 | ** ARM64 IR assembler (SSA IR -> machine code). | ||
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. | ||
6 | ** Sponsored by Cisco Systems, Inc. | ||
7 | */ | ||
8 | |||
9 | /* -- Register allocator extensions --------------------------------------- */ | ||
10 | |||
11 | /* Allocate a register with a hint. */ | ||
12 | static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) | ||
13 | { | ||
14 | Reg r = IR(ref)->r; | ||
15 | if (ra_noreg(r)) { | ||
16 | if (!ra_hashint(r) && !iscrossref(as, ref)) | ||
17 | ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ | ||
18 | r = ra_allocref(as, ref, allow); | ||
19 | } | ||
20 | ra_noweak(as, r); | ||
21 | return r; | ||
22 | } | ||
23 | |||
24 | /* Allocate two source registers for three-operand instructions. */ | ||
25 | static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) | ||
26 | { | ||
27 | IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); | ||
28 | Reg left = irl->r, right = irr->r; | ||
29 | if (ra_hasreg(left)) { | ||
30 | ra_noweak(as, left); | ||
31 | if (ra_noreg(right)) | ||
32 | right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); | ||
33 | else | ||
34 | ra_noweak(as, right); | ||
35 | } else if (ra_hasreg(right)) { | ||
36 | ra_noweak(as, right); | ||
37 | left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); | ||
38 | } else if (ra_hashint(right)) { | ||
39 | right = ra_allocref(as, ir->op2, allow); | ||
40 | left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); | ||
41 | } else { | ||
42 | left = ra_allocref(as, ir->op1, allow); | ||
43 | right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); | ||
44 | } | ||
45 | return left | (right << 8); | ||
46 | } | ||
47 | |||
48 | /* -- Guard handling ------------------------------------------------------ */ | ||
49 | |||
50 | /* Setup all needed exit stubs. */ | ||
51 | static void asm_exitstub_setup(ASMState *as, ExitNo nexits) | ||
52 | { | ||
53 | ExitNo i; | ||
54 | MCode *mxp = as->mctop; | ||
55 | if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) | ||
56 | asm_mclimit(as); | ||
57 | /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ | ||
58 | for (i = nexits-1; (int32_t)i >= 0; i--) | ||
59 | *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i)); | ||
60 | *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno)); | ||
61 | mxp--; | ||
62 | *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp))); | ||
63 | *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP)); | ||
64 | as->mctop = mxp; | ||
65 | } | ||
66 | |||
67 | static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) | ||
68 | { | ||
69 | /* Keep this in-sync with exitstub_trace_addr(). */ | ||
70 | return as->mctop + exitno + 3; | ||
71 | } | ||
72 | |||
73 | /* Emit conditional branch to exit for guard. */ | ||
74 | static void asm_guardcc(ASMState *as, A64CC cc) | ||
75 | { | ||
76 | MCode *target = asm_exitstub_addr(as, as->snapno); | ||
77 | MCode *p = as->mcp; | ||
78 | if (LJ_UNLIKELY(p == as->invmcp)) { | ||
79 | as->loopinv = 1; | ||
80 | *p = A64I_B | A64F_S26(target-p); | ||
81 | emit_cond_branch(as, cc^1, p-1); | ||
82 | return; | ||
83 | } | ||
84 | emit_cond_branch(as, cc, target); | ||
85 | } | ||
86 | |||
87 | /* Emit test and branch instruction to exit for guard. */ | ||
88 | static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit) | ||
89 | { | ||
90 | MCode *target = asm_exitstub_addr(as, as->snapno); | ||
91 | MCode *p = as->mcp; | ||
92 | if (LJ_UNLIKELY(p == as->invmcp)) { | ||
93 | as->loopinv = 1; | ||
94 | *p = A64I_B | A64F_S26(target-p); | ||
95 | emit_tnb(as, ai^0x01000000u, r, bit, p-1); | ||
96 | return; | ||
97 | } | ||
98 | emit_tnb(as, ai, r, bit, target); | ||
99 | } | ||
100 | |||
101 | /* Emit compare and branch instruction to exit for guard. */ | ||
102 | static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r) | ||
103 | { | ||
104 | MCode *target = asm_exitstub_addr(as, as->snapno); | ||
105 | MCode *p = as->mcp; | ||
106 | if (LJ_UNLIKELY(p == as->invmcp)) { | ||
107 | as->loopinv = 1; | ||
108 | *p = A64I_B | A64F_S26(target-p); | ||
109 | emit_cnb(as, ai^0x01000000u, r, p-1); | ||
110 | return; | ||
111 | } | ||
112 | emit_cnb(as, ai, r, target); | ||
113 | } | ||
114 | |||
115 | /* -- Operand fusion ------------------------------------------------------ */ | ||
116 | |||
117 | /* Limit linear search to this distance. Avoids O(n^2) behavior. */ | ||
118 | #define CONFLICT_SEARCH_LIM 31 | ||
119 | |||
120 | static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) | ||
121 | { | ||
122 | if (irref_isk(ref)) { | ||
123 | IRIns *ir = IR(ref); | ||
124 | if (ir->o == IR_KNULL || !irt_is64(ir->t)) { | ||
125 | *k = ir->i; | ||
126 | return 1; | ||
127 | } else if (checki32((int64_t)ir_k64(ir)->u64)) { | ||
128 | *k = (int32_t)ir_k64(ir)->u64; | ||
129 | return 1; | ||
130 | } | ||
131 | } | ||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | /* Check if there's no conflicting instruction between curins and ref. */ | ||
136 | static int noconflict(ASMState *as, IRRef ref, IROp conflict) | ||
137 | { | ||
138 | IRIns *ir = as->ir; | ||
139 | IRRef i = as->curins; | ||
140 | if (i > ref + CONFLICT_SEARCH_LIM) | ||
141 | return 0; /* Give up, ref is too far away. */ | ||
142 | while (--i > ref) | ||
143 | if (ir[i].o == conflict) | ||
144 | return 0; /* Conflict found. */ | ||
145 | return 1; /* Ok, no conflict. */ | ||
146 | } | ||
147 | |||
148 | /* Fuse the array base of colocated arrays. */ | ||
149 | static int32_t asm_fuseabase(ASMState *as, IRRef ref) | ||
150 | { | ||
151 | IRIns *ir = IR(ref); | ||
152 | if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && | ||
153 | !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) | ||
154 | return (int32_t)sizeof(GCtab); | ||
155 | return 0; | ||
156 | } | ||
157 | |||
158 | #define FUSE_REG 0x40000000 | ||
159 | |||
160 | /* Fuse array/hash/upvalue reference into register+offset operand. */ | ||
161 | static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, | ||
162 | A64Ins ins) | ||
163 | { | ||
164 | IRIns *ir = IR(ref); | ||
165 | if (ra_noreg(ir->r)) { | ||
166 | if (ir->o == IR_AREF) { | ||
167 | if (mayfuse(as, ref)) { | ||
168 | if (irref_isk(ir->op2)) { | ||
169 | IRRef tab = IR(ir->op1)->op1; | ||
170 | int32_t ofs = asm_fuseabase(as, tab); | ||
171 | IRRef refa = ofs ? tab : ir->op1; | ||
172 | ofs += 8*IR(ir->op2)->i; | ||
173 | if (emit_checkofs(ins, ofs)) { | ||
174 | *ofsp = ofs; | ||
175 | return ra_alloc1(as, refa, allow); | ||
176 | } | ||
177 | } else { | ||
178 | Reg base = ra_alloc1(as, ir->op1, allow); | ||
179 | *ofsp = FUSE_REG|ra_alloc1(as, ir->op2, rset_exclude(allow, base)); | ||
180 | return base; | ||
181 | } | ||
182 | } | ||
183 | } else if (ir->o == IR_HREFK) { | ||
184 | if (mayfuse(as, ref)) { | ||
185 | int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); | ||
186 | if (emit_checkofs(ins, ofs)) { | ||
187 | *ofsp = ofs; | ||
188 | return ra_alloc1(as, ir->op1, allow); | ||
189 | } | ||
190 | } | ||
191 | } else if (ir->o == IR_UREFC) { | ||
192 | if (irref_isk(ir->op1)) { | ||
193 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | ||
194 | GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; | ||
195 | int64_t ofs = glofs(as, &uv->tv); | ||
196 | if (emit_checkofs(ins, ofs)) { | ||
197 | *ofsp = (int32_t)ofs; | ||
198 | return RID_GL; | ||
199 | } | ||
200 | } | ||
201 | } else if (ir->o == IR_TMPREF) { | ||
202 | *ofsp = (int32_t)glofs(as, &J2G(as->J)->tmptv); | ||
203 | return RID_GL; | ||
204 | } | ||
205 | } | ||
206 | *ofsp = 0; | ||
207 | return ra_alloc1(as, ref, allow); | ||
208 | } | ||
209 | |||
210 | /* Fuse m operand into arithmetic/logic instructions. */ | ||
211 | static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) | ||
212 | { | ||
213 | IRIns *ir = IR(ref); | ||
214 | if (ra_hasreg(ir->r)) { | ||
215 | ra_noweak(as, ir->r); | ||
216 | return A64F_M(ir->r); | ||
217 | } else if (irref_isk(ref)) { | ||
218 | uint32_t m; | ||
219 | int64_t k = get_k64val(as, ref); | ||
220 | if ((ai & 0x1f000000) == 0x0a000000) | ||
221 | m = emit_isk13(k, irt_is64(ir->t)); | ||
222 | else | ||
223 | m = emit_isk12(k); | ||
224 | if (m) | ||
225 | return m; | ||
226 | } else if (mayfuse(as, ref)) { | ||
227 | if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR && irref_isk(ir->op2)) || | ||
228 | (ir->o == IR_ADD && ir->op1 == ir->op2)) { | ||
229 | A64Shift sh = ir->o == IR_BSHR ? A64SH_LSR : | ||
230 | ir->o == IR_BSAR ? A64SH_ASR : A64SH_LSL; | ||
231 | int shift = ir->o == IR_ADD ? 1 : | ||
232 | (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31)); | ||
233 | IRIns *irl = IR(ir->op1); | ||
234 | if (sh == A64SH_LSL && | ||
235 | irl->o == IR_CONV && | ||
236 | irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) && | ||
237 | shift <= 4 && | ||
238 | canfuse(as, irl)) { | ||
239 | Reg m = ra_alloc1(as, irl->op1, allow); | ||
240 | return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift); | ||
241 | } else { | ||
242 | Reg m = ra_alloc1(as, ir->op1, allow); | ||
243 | return A64F_M(m) | A64F_SH(sh, shift); | ||
244 | } | ||
245 | } else if (ir->o == IR_CONV && | ||
246 | ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) { | ||
247 | Reg m = ra_alloc1(as, ir->op1, allow); | ||
248 | return A64F_M(m) | A64F_EX(A64EX_SXTW); | ||
249 | } | ||
250 | } | ||
251 | return A64F_M(ra_allocref(as, ref, allow)); | ||
252 | } | ||
253 | |||
254 | /* Fuse XLOAD/XSTORE reference into load/store operand. */ | ||
255 | static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, | ||
256 | RegSet allow) | ||
257 | { | ||
258 | IRIns *ir = IR(ref); | ||
259 | Reg base; | ||
260 | int32_t ofs = 0; | ||
261 | if (ra_noreg(ir->r) && canfuse(as, ir)) { | ||
262 | if (ir->o == IR_ADD) { | ||
263 | if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) { | ||
264 | ref = ir->op1; | ||
265 | } else { | ||
266 | Reg rn, rm; | ||
267 | IRRef lref = ir->op1, rref = ir->op2; | ||
268 | IRIns *irl = IR(lref); | ||
269 | if (mayfuse(as, irl->op1)) { | ||
270 | unsigned int shift = 4; | ||
271 | if (irl->o == IR_BSHL && irref_isk(irl->op2)) { | ||
272 | shift = (IR(irl->op2)->i & 63); | ||
273 | } else if (irl->o == IR_ADD && irl->op1 == irl->op2) { | ||
274 | shift = 1; | ||
275 | } | ||
276 | if ((ai >> 30) == shift) { | ||
277 | lref = irl->op1; | ||
278 | irl = IR(lref); | ||
279 | ai |= A64I_LS_SH; | ||
280 | } | ||
281 | } | ||
282 | if (irl->o == IR_CONV && | ||
283 | irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) && | ||
284 | canfuse(as, irl)) { | ||
285 | lref = irl->op1; | ||
286 | ai |= A64I_LS_SXTWx; | ||
287 | } else { | ||
288 | ai |= A64I_LS_LSLx; | ||
289 | } | ||
290 | rm = ra_alloc1(as, lref, allow); | ||
291 | rn = ra_alloc1(as, rref, rset_exclude(allow, rm)); | ||
292 | emit_dnm(as, (ai^A64I_LS_R), (rd & 31), rn, rm); | ||
293 | return; | ||
294 | } | ||
295 | } else if (ir->o == IR_STRREF) { | ||
296 | if (asm_isk32(as, ir->op2, &ofs)) { | ||
297 | ref = ir->op1; | ||
298 | } else if (asm_isk32(as, ir->op1, &ofs)) { | ||
299 | ref = ir->op2; | ||
300 | } else { | ||
301 | Reg refk = irref_isk(ir->op1) ? ir->op1 : ir->op2; | ||
302 | Reg refv = irref_isk(ir->op1) ? ir->op2 : ir->op1; | ||
303 | Reg rn = ra_alloc1(as, refv, allow); | ||
304 | IRIns *irr = IR(refk); | ||
305 | uint32_t m; | ||
306 | if (irr+1 == ir && !ra_used(irr) && | ||
307 | irr->o == IR_ADD && irref_isk(irr->op2)) { | ||
308 | ofs = sizeof(GCstr) + IR(irr->op2)->i; | ||
309 | if (emit_checkofs(ai, ofs)) { | ||
310 | Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn)); | ||
311 | m = A64F_M(rm) | A64F_EX(A64EX_SXTW); | ||
312 | goto skipopm; | ||
313 | } | ||
314 | } | ||
315 | m = asm_fuseopm(as, 0, refk, rset_exclude(allow, rn)); | ||
316 | ofs = sizeof(GCstr); | ||
317 | skipopm: | ||
318 | emit_lso(as, ai, rd, rd, ofs); | ||
319 | emit_dn(as, A64I_ADDx^m, rd, rn); | ||
320 | return; | ||
321 | } | ||
322 | ofs += sizeof(GCstr); | ||
323 | if (!emit_checkofs(ai, ofs)) { | ||
324 | Reg rn = ra_alloc1(as, ref, allow); | ||
325 | Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); | ||
326 | emit_dnm(as, (ai^A64I_LS_R)|A64I_LS_UXTWx, rd, rn, rm); | ||
327 | return; | ||
328 | } | ||
329 | } | ||
330 | } | ||
331 | base = ra_alloc1(as, ref, allow); | ||
332 | emit_lso(as, ai, (rd & 31), base, ofs); | ||
333 | } | ||
334 | |||
335 | /* Fuse FP multiply-add/sub. */ | ||
336 | static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) | ||
337 | { | ||
338 | IRRef lref = ir->op1, rref = ir->op2; | ||
339 | IRIns *irm; | ||
340 | if (lref != rref && | ||
341 | ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && | ||
342 | ra_noreg(irm->r)) || | ||
343 | (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && | ||
344 | (rref = lref, ai = air, ra_noreg(irm->r))))) { | ||
345 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
346 | Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); | ||
347 | Reg left = ra_alloc2(as, irm, | ||
348 | rset_exclude(rset_exclude(RSET_FPR, dest), add)); | ||
349 | Reg right = (left >> 8); left &= 255; | ||
350 | emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31)); | ||
351 | return 1; | ||
352 | } | ||
353 | return 0; | ||
354 | } | ||
355 | |||
356 | /* Fuse BAND + BSHL/BSHR into UBFM. */ | ||
357 | static int asm_fuseandshift(ASMState *as, IRIns *ir) | ||
358 | { | ||
359 | IRIns *irl = IR(ir->op1); | ||
360 | lj_assertA(ir->o == IR_BAND, "bad usage"); | ||
361 | if (canfuse(as, irl) && irref_isk(ir->op2)) { | ||
362 | uint64_t mask = get_k64val(as, ir->op2); | ||
363 | if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) { | ||
364 | int32_t shmask = irt_is64(irl->t) ? 63 : 31; | ||
365 | int32_t shift = (IR(irl->op2)->i & shmask); | ||
366 | int32_t imms = shift; | ||
367 | if (irl->o == IR_BSHL) { | ||
368 | mask >>= shift; | ||
369 | shift = (shmask-shift+1) & shmask; | ||
370 | imms = 0; | ||
371 | } | ||
372 | if (mask && !((mask+1) & mask)) { /* Contiguous 1-bits at the bottom. */ | ||
373 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
374 | Reg left = ra_alloc1(as, irl->op1, RSET_GPR); | ||
375 | A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw; | ||
376 | imms += 63 - emit_clz64(mask); | ||
377 | if (imms > shmask) imms = shmask; | ||
378 | emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left); | ||
379 | return 1; | ||
380 | } | ||
381 | } | ||
382 | } | ||
383 | return 0; | ||
384 | } | ||
385 | |||
386 | /* Fuse BOR(BSHL, BSHR) into EXTR/ROR. */ | ||
387 | static int asm_fuseorshift(ASMState *as, IRIns *ir) | ||
388 | { | ||
389 | IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); | ||
390 | lj_assertA(ir->o == IR_BOR, "bad usage"); | ||
391 | if (canfuse(as, irl) && canfuse(as, irr) && | ||
392 | ((irl->o == IR_BSHR && irr->o == IR_BSHL) || | ||
393 | (irl->o == IR_BSHL && irr->o == IR_BSHR))) { | ||
394 | if (irref_isk(irl->op2) && irref_isk(irr->op2)) { | ||
395 | IRRef lref = irl->op1, rref = irr->op1; | ||
396 | uint32_t lshift = IR(irl->op2)->i, rshift = IR(irr->op2)->i; | ||
397 | if (irl->o == IR_BSHR) { /* BSHR needs to be the right operand. */ | ||
398 | uint32_t tmp2; | ||
399 | IRRef tmp1 = lref; lref = rref; rref = tmp1; | ||
400 | tmp2 = lshift; lshift = rshift; rshift = tmp2; | ||
401 | } | ||
402 | if (rshift + lshift == (irt_is64(ir->t) ? 64 : 32)) { | ||
403 | A64Ins ai = irt_is64(ir->t) ? A64I_EXTRx : A64I_EXTRw; | ||
404 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
405 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
406 | Reg right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left)); | ||
407 | emit_dnm(as, ai | A64F_IMMS(rshift), dest, left, right); | ||
408 | return 1; | ||
409 | } | ||
410 | } | ||
411 | } | ||
412 | return 0; | ||
413 | } | ||
414 | |||
415 | /* -- Calls --------------------------------------------------------------- */ | ||
416 | |||
417 | /* Generate a call to a C function. */ | ||
418 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | ||
419 | { | ||
420 | uint32_t n, nargs = CCI_XNARGS(ci); | ||
421 | int32_t ofs = 0; | ||
422 | Reg gpr, fpr = REGARG_FIRSTFPR; | ||
423 | if ((void *)ci->func) | ||
424 | emit_call(as, (void *)ci->func); | ||
425 | for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) | ||
426 | as->cost[gpr] = REGCOST(~0u, ASMREF_L); | ||
427 | gpr = REGARG_FIRSTGPR; | ||
428 | for (n = 0; n < nargs; n++) { /* Setup args. */ | ||
429 | IRRef ref = args[n]; | ||
430 | IRIns *ir = IR(ref); | ||
431 | if (ref) { | ||
432 | if (irt_isfp(ir->t)) { | ||
433 | if (fpr <= REGARG_LASTFPR) { | ||
434 | lj_assertA(rset_test(as->freeset, fpr), | ||
435 | "reg %d not free", fpr); /* Must have been evicted. */ | ||
436 | ra_leftov(as, fpr, ref); | ||
437 | fpr++; | ||
438 | } else { | ||
439 | Reg r = ra_alloc1(as, ref, RSET_FPR); | ||
440 | emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0)); | ||
441 | ofs += 8; | ||
442 | } | ||
443 | } else { | ||
444 | if (gpr <= REGARG_LASTGPR) { | ||
445 | lj_assertA(rset_test(as->freeset, gpr), | ||
446 | "reg %d not free", gpr); /* Must have been evicted. */ | ||
447 | ra_leftov(as, gpr, ref); | ||
448 | gpr++; | ||
449 | } else { | ||
450 | Reg r = ra_alloc1(as, ref, RSET_GPR); | ||
451 | emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0)); | ||
452 | ofs += 8; | ||
453 | } | ||
454 | } | ||
455 | } | ||
456 | } | ||
457 | } | ||
458 | |||
459 | /* Setup result reg/sp for call. Evict scratch regs. */ | ||
460 | static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | ||
461 | { | ||
462 | RegSet drop = RSET_SCRATCH; | ||
463 | int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); | ||
464 | if (ra_hasreg(ir->r)) | ||
465 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | ||
466 | if (hiop && ra_hasreg((ir+1)->r)) | ||
467 | rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ | ||
468 | ra_evictset(as, drop); /* Evictions must be performed first. */ | ||
469 | if (ra_used(ir)) { | ||
470 | lj_assertA(!irt_ispri(ir->t), "PRI dest"); | ||
471 | if (irt_isfp(ir->t)) { | ||
472 | if (ci->flags & CCI_CASTU64) { | ||
473 | Reg dest = ra_dest(as, ir, RSET_FPR) & 31; | ||
474 | emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D_R : A64I_FMOV_S_R, | ||
475 | dest, RID_RET); | ||
476 | } else { | ||
477 | ra_destreg(as, ir, RID_FPRET); | ||
478 | } | ||
479 | } else if (hiop) { | ||
480 | ra_destpair(as, ir); | ||
481 | } else { | ||
482 | ra_destreg(as, ir, RID_RET); | ||
483 | } | ||
484 | } | ||
485 | UNUSED(ci); | ||
486 | } | ||
487 | |||
488 | static void asm_callx(ASMState *as, IRIns *ir) | ||
489 | { | ||
490 | IRRef args[CCI_NARGS_MAX*2]; | ||
491 | CCallInfo ci; | ||
492 | IRRef func; | ||
493 | IRIns *irf; | ||
494 | ci.flags = asm_callx_flags(as, ir); | ||
495 | asm_collectargs(as, ir, &ci, args); | ||
496 | asm_setupresult(as, ir, &ci); | ||
497 | func = ir->op2; irf = IR(func); | ||
498 | if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } | ||
499 | if (irref_isk(func)) { /* Call to constant address. */ | ||
500 | ci.func = (ASMFunction)(ir_k64(irf)->u64); | ||
501 | } else { /* Need a non-argument register for indirect calls. */ | ||
502 | Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); | ||
503 | emit_n(as, A64I_BLR, freg); | ||
504 | ci.func = (ASMFunction)(void *)0; | ||
505 | } | ||
506 | asm_gencall(as, &ci, args); | ||
507 | } | ||
508 | |||
509 | /* -- Returns ------------------------------------------------------------- */ | ||
510 | |||
511 | /* Return to lower frame. Guard that it goes to the right spot. */ | ||
512 | static void asm_retf(ASMState *as, IRIns *ir) | ||
513 | { | ||
514 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | ||
515 | void *pc = ir_kptr(IR(ir->op2)); | ||
516 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); | ||
517 | as->topslot -= (BCReg)delta; | ||
518 | if ((int32_t)as->topslot < 0) as->topslot = 0; | ||
519 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | ||
520 | /* Need to force a spill on REF_BASE now to update the stack slot. */ | ||
521 | emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE))); | ||
522 | emit_setgl(as, base, jit_base); | ||
523 | emit_addptr(as, base, -8*delta); | ||
524 | asm_guardcc(as, CC_NE); | ||
525 | emit_nm(as, A64I_CMPx, RID_TMP, | ||
526 | ra_allock(as, i64ptr(pc), rset_exclude(RSET_GPR, base))); | ||
527 | emit_lso(as, A64I_LDRx, RID_TMP, base, -8); | ||
528 | } | ||
529 | |||
530 | /* -- Buffer operations --------------------------------------------------- */ | ||
531 | |||
532 | #if LJ_HASBUFFER | ||
533 | static void asm_bufhdr_write(ASMState *as, Reg sb) | ||
534 | { | ||
535 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); | ||
536 | IRIns irgc; | ||
537 | irgc.ot = IRT(0, IRT_PGC); /* GC type. */ | ||
538 | emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); | ||
539 | emit_dn(as, A64I_BFMx | A64F_IMMS(lj_fls(SBUF_MASK_FLAG)) | A64F_IMMR(0), RID_TMP, tmp); | ||
540 | emit_getgl(as, RID_TMP, cur_L); | ||
541 | emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); | ||
542 | } | ||
543 | #endif | ||
544 | |||
545 | /* -- Type conversions ---------------------------------------------------- */ | ||
546 | |||
547 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | ||
548 | { | ||
549 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); | ||
550 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
551 | asm_guardcc(as, CC_NE); | ||
552 | emit_nm(as, A64I_FCMPd, (tmp & 31), (left & 31)); | ||
553 | emit_dn(as, A64I_FCVT_F64_S32, (tmp & 31), dest); | ||
554 | emit_dn(as, A64I_FCVT_S32_F64, dest, (left & 31)); | ||
555 | } | ||
556 | |||
557 | static void asm_tobit(ASMState *as, IRIns *ir) | ||
558 | { | ||
559 | RegSet allow = RSET_FPR; | ||
560 | Reg left = ra_alloc1(as, ir->op1, allow); | ||
561 | Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); | ||
562 | Reg tmp = ra_scratch(as, rset_clear(allow, right)); | ||
563 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
564 | emit_dn(as, A64I_FMOV_R_S, dest, (tmp & 31)); | ||
565 | emit_dnm(as, A64I_FADDd, (tmp & 31), (left & 31), (right & 31)); | ||
566 | } | ||
567 | |||
568 | static void asm_conv(ASMState *as, IRIns *ir) | ||
569 | { | ||
570 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | ||
571 | int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); | ||
572 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); | ||
573 | IRRef lref = ir->op1; | ||
574 | lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); | ||
575 | if (irt_isfp(ir->t)) { | ||
576 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
577 | if (stfp) { /* FP to FP conversion. */ | ||
578 | emit_dn(as, st == IRT_NUM ? A64I_FCVT_F32_F64 : A64I_FCVT_F64_F32, | ||
579 | (dest & 31), (ra_alloc1(as, lref, RSET_FPR) & 31)); | ||
580 | } else { /* Integer to FP conversion. */ | ||
581 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
582 | A64Ins ai = irt_isfloat(ir->t) ? | ||
583 | (((IRT_IS64 >> st) & 1) ? | ||
584 | (st == IRT_I64 ? A64I_FCVT_F32_S64 : A64I_FCVT_F32_U64) : | ||
585 | (st == IRT_INT ? A64I_FCVT_F32_S32 : A64I_FCVT_F32_U32)) : | ||
586 | (((IRT_IS64 >> st) & 1) ? | ||
587 | (st == IRT_I64 ? A64I_FCVT_F64_S64 : A64I_FCVT_F64_U64) : | ||
588 | (st == IRT_INT ? A64I_FCVT_F64_S32 : A64I_FCVT_F64_U32)); | ||
589 | emit_dn(as, ai, (dest & 31), left); | ||
590 | } | ||
591 | } else if (stfp) { /* FP to integer conversion. */ | ||
592 | if (irt_isguard(ir->t)) { | ||
593 | /* Checked conversions are only supported from number to int. */ | ||
594 | lj_assertA(irt_isint(ir->t) && st == IRT_NUM, | ||
595 | "bad type for checked CONV"); | ||
596 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | ||
597 | } else { | ||
598 | Reg left = ra_alloc1(as, lref, RSET_FPR); | ||
599 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
600 | A64Ins ai = irt_is64(ir->t) ? | ||
601 | (st == IRT_NUM ? | ||
602 | (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) : | ||
603 | (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) : | ||
604 | (st == IRT_NUM ? | ||
605 | (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) : | ||
606 | (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32)); | ||
607 | emit_dn(as, ai, dest, (left & 31)); | ||
608 | } | ||
609 | } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | ||
610 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
611 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
612 | A64Ins ai = st == IRT_I8 ? A64I_SXTBw : | ||
613 | st == IRT_U8 ? A64I_UXTBw : | ||
614 | st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw; | ||
615 | lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); | ||
616 | emit_dn(as, ai, dest, left); | ||
617 | } else { | ||
618 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
619 | if (irt_is64(ir->t)) { | ||
620 | if (st64 || !(ir->op2 & IRCONV_SEXT)) { | ||
621 | /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ | ||
622 | ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ | ||
623 | } else { /* 32 to 64 bit sign extension. */ | ||
624 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
625 | emit_dn(as, A64I_SXTW, dest, left); | ||
626 | } | ||
627 | } else { | ||
628 | if (st64 && !(ir->op2 & IRCONV_NONE)) { | ||
629 | /* This is either a 32 bit reg/reg mov which zeroes the hiword | ||
630 | ** or a load of the loword from a 64 bit address. | ||
631 | */ | ||
632 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
633 | emit_dm(as, A64I_MOVw, dest, left); | ||
634 | } else { /* 32/32 bit no-op (cast). */ | ||
635 | ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ | ||
636 | } | ||
637 | } | ||
638 | } | ||
639 | } | ||
640 | |||
641 | static void asm_strto(ASMState *as, IRIns *ir) | ||
642 | { | ||
643 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; | ||
644 | IRRef args[2]; | ||
645 | Reg dest = 0, tmp; | ||
646 | int destused = ra_used(ir); | ||
647 | int32_t ofs = 0; | ||
648 | ra_evictset(as, RSET_SCRATCH); | ||
649 | if (destused) { | ||
650 | if (ra_hasspill(ir->s)) { | ||
651 | ofs = sps_scale(ir->s); | ||
652 | destused = 0; | ||
653 | if (ra_hasreg(ir->r)) { | ||
654 | ra_free(as, ir->r); | ||
655 | ra_modified(as, ir->r); | ||
656 | emit_spload(as, ir, ir->r, ofs); | ||
657 | } | ||
658 | } else { | ||
659 | dest = ra_dest(as, ir, RSET_FPR); | ||
660 | } | ||
661 | } | ||
662 | if (destused) | ||
663 | emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0); | ||
664 | asm_guardcnb(as, A64I_CBZ, RID_RET); | ||
665 | args[0] = ir->op1; /* GCstr *str */ | ||
666 | args[1] = ASMREF_TMP1; /* TValue *n */ | ||
667 | asm_gencall(as, ci, args); | ||
668 | tmp = ra_releasetmp(as, ASMREF_TMP1); | ||
669 | emit_opk(as, A64I_ADDx, tmp, RID_SP, ofs, RSET_GPR); | ||
670 | } | ||
671 | |||
672 | /* -- Memory references --------------------------------------------------- */ | ||
673 | |||
674 | /* Store tagged value for ref at base+ofs. */ | ||
675 | static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) | ||
676 | { | ||
677 | RegSet allow = rset_exclude(RSET_GPR, base); | ||
678 | IRIns *ir = IR(ref); | ||
679 | lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), | ||
680 | "store of IR type %d", irt_type(ir->t)); | ||
681 | if (irref_isk(ref)) { | ||
682 | TValue k; | ||
683 | lj_ir_kvalue(as->J->L, &k, ir); | ||
684 | emit_lso(as, A64I_STRx, ra_allock(as, k.u64, allow), base, ofs); | ||
685 | } else { | ||
686 | Reg src = ra_alloc1(as, ref, allow); | ||
687 | rset_clear(allow, src); | ||
688 | if (irt_isinteger(ir->t)) { | ||
689 | Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); | ||
690 | emit_lso(as, A64I_STRx, RID_TMP, base, ofs); | ||
691 | emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src); | ||
692 | } else { | ||
693 | Reg type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
694 | emit_lso(as, A64I_STRx, RID_TMP, base, ofs); | ||
695 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type); | ||
696 | } | ||
697 | } | ||
698 | } | ||
699 | |||
700 | /* Get pointer to TValue. */ | ||
701 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) | ||
702 | { | ||
703 | if ((mode & IRTMPREF_IN1)) { | ||
704 | IRIns *ir = IR(ref); | ||
705 | if (irt_isnum(ir->t)) { | ||
706 | if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) { | ||
707 | /* Use the number constant itself as a TValue. */ | ||
708 | ra_allockreg(as, i64ptr(ir_knum(ir)), dest); | ||
709 | return; | ||
710 | } | ||
711 | emit_lso(as, A64I_STRd, (ra_alloc1(as, ref, RSET_FPR) & 31), dest, 0); | ||
712 | } else { | ||
713 | asm_tvstore64(as, dest, 0, ref); | ||
714 | } | ||
715 | } | ||
716 | /* g->tmptv holds the TValue(s). */ | ||
717 | emit_dn(as, A64I_ADDx^emit_isk12(glofs(as, &J2G(as->J)->tmptv)), dest, RID_GL); | ||
718 | } | ||
719 | |||
720 | static void asm_aref(ASMState *as, IRIns *ir) | ||
721 | { | ||
722 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
723 | Reg idx, base; | ||
724 | if (irref_isk(ir->op2)) { | ||
725 | IRRef tab = IR(ir->op1)->op1; | ||
726 | int32_t ofs = asm_fuseabase(as, tab); | ||
727 | IRRef refa = ofs ? tab : ir->op1; | ||
728 | uint32_t k = emit_isk12(ofs + 8*IR(ir->op2)->i); | ||
729 | if (k) { | ||
730 | base = ra_alloc1(as, refa, RSET_GPR); | ||
731 | emit_dn(as, A64I_ADDx^k, dest, base); | ||
732 | return; | ||
733 | } | ||
734 | } | ||
735 | base = ra_alloc1(as, ir->op1, RSET_GPR); | ||
736 | idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); | ||
737 | emit_dnm(as, A64I_ADDx | A64F_EXSH(A64EX_UXTW, 3), dest, base, idx); | ||
738 | } | ||
739 | |||
740 | /* Inlined hash lookup. Specialized for key type and for const keys. | ||
741 | ** The equivalent C code is: | ||
742 | ** Node *n = hashkey(t, key); | ||
743 | ** do { | ||
744 | ** if (lj_obj_equal(&n->key, key)) return &n->val; | ||
745 | ** } while ((n = nextnode(n))); | ||
746 | ** return niltv(L); | ||
747 | */ | ||
748 | static void asm_href(ASMState *as, IRIns *ir, IROp merge) | ||
749 | { | ||
750 | RegSet allow = RSET_GPR; | ||
751 | int destused = ra_used(ir); | ||
752 | Reg dest = ra_dest(as, ir, allow); | ||
753 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); | ||
754 | Reg key = 0, tmp = RID_TMP; | ||
755 | Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE; | ||
756 | IRRef refkey = ir->op2; | ||
757 | IRIns *irkey = IR(refkey); | ||
758 | int isk = irref_isk(ir->op2); | ||
759 | IRType1 kt = irkey->t; | ||
760 | uint32_t k = 0; | ||
761 | uint32_t khash; | ||
762 | MCLabel l_end, l_loop, l_next; | ||
763 | rset_clear(allow, tab); | ||
764 | |||
765 | if (!isk) { | ||
766 | key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); | ||
767 | rset_clear(allow, key); | ||
768 | if (!irt_isstr(kt)) { | ||
769 | tmp = ra_scratch(as, allow); | ||
770 | rset_clear(allow, tmp); | ||
771 | } | ||
772 | } else if (irt_isnum(kt)) { | ||
773 | int64_t val = (int64_t)ir_knum(irkey)->u64; | ||
774 | if (!(k = emit_isk12(val))) { | ||
775 | key = ra_allock(as, val, allow); | ||
776 | rset_clear(allow, key); | ||
777 | } | ||
778 | } else if (!irt_ispri(kt)) { | ||
779 | if (!(k = emit_isk12(irkey->i))) { | ||
780 | key = ra_alloc1(as, refkey, allow); | ||
781 | rset_clear(allow, key); | ||
782 | } | ||
783 | } | ||
784 | |||
785 | /* Allocate constants early. */ | ||
786 | if (irt_isnum(kt)) { | ||
787 | if (!isk) { | ||
788 | tisnum = ra_allock(as, LJ_TISNUM << 15, allow); | ||
789 | ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key)); | ||
790 | rset_clear(allow, tisnum); | ||
791 | } | ||
792 | } else if (irt_isaddr(kt)) { | ||
793 | if (isk) { | ||
794 | int64_t kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; | ||
795 | scr = ra_allock(as, kk, allow); | ||
796 | } else { | ||
797 | scr = ra_scratch(as, allow); | ||
798 | } | ||
799 | rset_clear(allow, scr); | ||
800 | } else { | ||
801 | lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); | ||
802 | type = ra_allock(as, ~((int64_t)~irt_toitype(kt) << 47), allow); | ||
803 | scr = ra_scratch(as, rset_clear(allow, type)); | ||
804 | rset_clear(allow, scr); | ||
805 | } | ||
806 | |||
807 | /* Key not found in chain: jump to exit (if merged) or load niltv. */ | ||
808 | l_end = emit_label(as); | ||
809 | as->invmcp = NULL; | ||
810 | if (merge == IR_NE) | ||
811 | asm_guardcc(as, CC_AL); | ||
812 | else if (destused) | ||
813 | emit_loada(as, dest, niltvg(J2G(as->J))); | ||
814 | |||
815 | /* Follow hash chain until the end. */ | ||
816 | l_loop = --as->mcp; | ||
817 | emit_n(as, A64I_CMPx^A64I_K12^0, dest); | ||
818 | emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); | ||
819 | l_next = emit_label(as); | ||
820 | |||
821 | /* Type and value comparison. */ | ||
822 | if (merge == IR_EQ) | ||
823 | asm_guardcc(as, CC_EQ); | ||
824 | else | ||
825 | emit_cond_branch(as, CC_EQ, l_end); | ||
826 | |||
827 | if (irt_isnum(kt)) { | ||
828 | if (isk) { | ||
829 | /* Assumes -0.0 is already canonicalized to +0.0. */ | ||
830 | if (k) | ||
831 | emit_n(as, A64I_CMPx^k, tmp); | ||
832 | else | ||
833 | emit_nm(as, A64I_CMPx, key, tmp); | ||
834 | emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); | ||
835 | } else { | ||
836 | emit_nm(as, A64I_FCMPd, key, ftmp); | ||
837 | emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31)); | ||
838 | emit_cond_branch(as, CC_LO, l_next); | ||
839 | emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp); | ||
840 | emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n)); | ||
841 | } | ||
842 | } else if (irt_isaddr(kt)) { | ||
843 | if (isk) { | ||
844 | emit_nm(as, A64I_CMPx, scr, tmp); | ||
845 | emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); | ||
846 | } else { | ||
847 | emit_nm(as, A64I_CMPx, tmp, scr); | ||
848 | emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64)); | ||
849 | } | ||
850 | } else { | ||
851 | emit_nm(as, A64I_CMPx, scr, type); | ||
852 | emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key)); | ||
853 | } | ||
854 | |||
855 | *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE; | ||
856 | if (!isk && irt_isaddr(kt)) { | ||
857 | type = ra_allock(as, (int32_t)irt_toitype(kt), allow); | ||
858 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type); | ||
859 | rset_clear(allow, type); | ||
860 | } | ||
861 | /* Load main position relative to tab->node into dest. */ | ||
862 | khash = isk ? ir_khash(as, irkey) : 1; | ||
863 | if (khash == 0) { | ||
864 | emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node)); | ||
865 | } else { | ||
866 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 3), dest, tmp, dest); | ||
867 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 1), dest, dest, dest); | ||
868 | emit_lso(as, A64I_LDRx, tmp, tab, offsetof(GCtab, node)); | ||
869 | if (isk) { | ||
870 | Reg tmphash = ra_allock(as, khash, allow); | ||
871 | emit_dnm(as, A64I_ANDw, dest, dest, tmphash); | ||
872 | emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); | ||
873 | } else if (irt_isstr(kt)) { | ||
874 | /* Fetch of str->sid is cheaper than ra_allock. */ | ||
875 | emit_dnm(as, A64I_ANDw, dest, dest, tmp); | ||
876 | emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid)); | ||
877 | emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); | ||
878 | } else { /* Must match with hash*() in lj_tab.c. */ | ||
879 | emit_dnm(as, A64I_ANDw, dest, dest, tmp); | ||
880 | emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); | ||
881 | emit_dnm(as, A64I_SUBw, dest, dest, tmp); | ||
882 | emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); | ||
883 | emit_dnm(as, A64I_EORw, dest, dest, tmp); | ||
884 | emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest); | ||
885 | emit_dnm(as, A64I_SUBw, tmp, tmp, dest); | ||
886 | emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); | ||
887 | emit_dnm(as, A64I_EORw, tmp, tmp, dest); | ||
888 | if (irt_isnum(kt)) { | ||
889 | emit_dnm(as, A64I_ADDw, dest, dest, dest); | ||
890 | emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); | ||
891 | emit_dm(as, A64I_MOVw, tmp, dest); | ||
892 | emit_dn(as, A64I_FMOV_R_D, dest, (key & 31)); | ||
893 | } else { | ||
894 | checkmclim(as); | ||
895 | emit_dm(as, A64I_MOVw, tmp, key); | ||
896 | emit_dnm(as, A64I_EORw, dest, dest, | ||
897 | ra_allock(as, irt_toitype(kt) << 15, allow)); | ||
898 | emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); | ||
899 | emit_dm(as, A64I_MOVx, dest, key); | ||
900 | } | ||
901 | } | ||
902 | } | ||
903 | } | ||
904 | |||
905 | static void asm_hrefk(ASMState *as, IRIns *ir) | ||
906 | { | ||
907 | IRIns *kslot = IR(ir->op2); | ||
908 | IRIns *irkey = IR(kslot->op1); | ||
909 | int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); | ||
910 | int32_t kofs = ofs + (int32_t)offsetof(Node, key); | ||
911 | int bigofs = !emit_checkofs(A64I_LDRx, ofs); | ||
912 | Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; | ||
913 | Reg node = ra_alloc1(as, ir->op1, RSET_GPR); | ||
914 | Reg key, idx = node; | ||
915 | RegSet allow = rset_exclude(RSET_GPR, node); | ||
916 | uint64_t k; | ||
917 | lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); | ||
918 | if (bigofs) { | ||
919 | idx = dest; | ||
920 | rset_clear(allow, dest); | ||
921 | kofs = (int32_t)offsetof(Node, key); | ||
922 | } else if (ra_hasreg(dest)) { | ||
923 | emit_opk(as, A64I_ADDx, dest, node, ofs, allow); | ||
924 | } | ||
925 | asm_guardcc(as, CC_NE); | ||
926 | if (irt_ispri(irkey->t)) { | ||
927 | k = ~((int64_t)~irt_toitype(irkey->t) << 47); | ||
928 | } else if (irt_isnum(irkey->t)) { | ||
929 | k = ir_knum(irkey)->u64; | ||
930 | } else { | ||
931 | k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); | ||
932 | } | ||
933 | key = ra_scratch(as, allow); | ||
934 | emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key))); | ||
935 | emit_lso(as, A64I_LDRx, key, idx, kofs); | ||
936 | if (bigofs) | ||
937 | emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR); | ||
938 | } | ||
939 | |||
940 | static void asm_uref(ASMState *as, IRIns *ir) | ||
941 | { | ||
942 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
943 | if (irref_isk(ir->op1)) { | ||
944 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | ||
945 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; | ||
946 | emit_lsptr(as, A64I_LDRx, dest, v); | ||
947 | } else { | ||
948 | Reg uv = ra_scratch(as, RSET_GPR); | ||
949 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); | ||
950 | if (ir->o == IR_UREFC) { | ||
951 | asm_guardcc(as, CC_NE); | ||
952 | emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP); | ||
953 | emit_opk(as, A64I_ADDx, dest, uv, | ||
954 | (int32_t)offsetof(GCupval, tv), RSET_GPR); | ||
955 | emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); | ||
956 | } else { | ||
957 | emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v)); | ||
958 | } | ||
959 | emit_lso(as, A64I_LDRx, uv, func, | ||
960 | (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8)); | ||
961 | } | ||
962 | } | ||
963 | |||
964 | static void asm_fref(ASMState *as, IRIns *ir) | ||
965 | { | ||
966 | UNUSED(as); UNUSED(ir); | ||
967 | lj_assertA(!ra_used(ir), "unfused FREF"); | ||
968 | } | ||
969 | |||
970 | static void asm_strref(ASMState *as, IRIns *ir) | ||
971 | { | ||
972 | RegSet allow = RSET_GPR; | ||
973 | Reg dest = ra_dest(as, ir, allow); | ||
974 | Reg base = ra_alloc1(as, ir->op1, allow); | ||
975 | IRIns *irr = IR(ir->op2); | ||
976 | int32_t ofs = sizeof(GCstr); | ||
977 | uint32_t m; | ||
978 | rset_clear(allow, base); | ||
979 | if (irref_isk(ir->op2) && (m = emit_isk12(ofs + irr->i))) { | ||
980 | emit_dn(as, A64I_ADDx^m, dest, base); | ||
981 | } else { | ||
982 | emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, dest); | ||
983 | emit_dnm(as, A64I_ADDx, dest, base, ra_alloc1(as, ir->op2, allow)); | ||
984 | } | ||
985 | } | ||
986 | |||
987 | /* -- Loads and stores ---------------------------------------------------- */ | ||
988 | |||
989 | static A64Ins asm_fxloadins(IRIns *ir) | ||
990 | { | ||
991 | switch (irt_type(ir->t)) { | ||
992 | case IRT_I8: return A64I_LDRB ^ A64I_LS_S; | ||
993 | case IRT_U8: return A64I_LDRB; | ||
994 | case IRT_I16: return A64I_LDRH ^ A64I_LS_S; | ||
995 | case IRT_U16: return A64I_LDRH; | ||
996 | case IRT_NUM: return A64I_LDRd; | ||
997 | case IRT_FLOAT: return A64I_LDRs; | ||
998 | default: return irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw; | ||
999 | } | ||
1000 | } | ||
1001 | |||
1002 | static A64Ins asm_fxstoreins(IRIns *ir) | ||
1003 | { | ||
1004 | switch (irt_type(ir->t)) { | ||
1005 | case IRT_I8: case IRT_U8: return A64I_STRB; | ||
1006 | case IRT_I16: case IRT_U16: return A64I_STRH; | ||
1007 | case IRT_NUM: return A64I_STRd; | ||
1008 | case IRT_FLOAT: return A64I_STRs; | ||
1009 | default: return irt_is64(ir->t) ? A64I_STRx : A64I_STRw; | ||
1010 | } | ||
1011 | } | ||
1012 | |||
1013 | static void asm_fload(ASMState *as, IRIns *ir) | ||
1014 | { | ||
1015 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1016 | Reg idx; | ||
1017 | A64Ins ai = asm_fxloadins(ir); | ||
1018 | int32_t ofs; | ||
1019 | if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ | ||
1020 | idx = RID_GL; | ||
1021 | ofs = (ir->op2 << 2) - GG_OFS(g); | ||
1022 | } else { | ||
1023 | idx = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1024 | if (ir->op2 == IRFL_TAB_ARRAY) { | ||
1025 | ofs = asm_fuseabase(as, ir->op1); | ||
1026 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | ||
1027 | emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, idx); | ||
1028 | return; | ||
1029 | } | ||
1030 | } | ||
1031 | ofs = field_ofs[ir->op2]; | ||
1032 | } | ||
1033 | emit_lso(as, ai, (dest & 31), idx, ofs); | ||
1034 | } | ||
1035 | |||
1036 | static void asm_fstore(ASMState *as, IRIns *ir) | ||
1037 | { | ||
1038 | if (ir->r != RID_SINK) { | ||
1039 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | ||
1040 | IRIns *irf = IR(ir->op1); | ||
1041 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); | ||
1042 | int32_t ofs = field_ofs[irf->op2]; | ||
1043 | emit_lso(as, asm_fxstoreins(ir), (src & 31), idx, ofs); | ||
1044 | } | ||
1045 | } | ||
1046 | |||
1047 | static void asm_xload(ASMState *as, IRIns *ir) | ||
1048 | { | ||
1049 | Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | ||
1050 | lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); | ||
1051 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); | ||
1052 | } | ||
1053 | |||
1054 | static void asm_xstore(ASMState *as, IRIns *ir) | ||
1055 | { | ||
1056 | if (ir->r != RID_SINK) { | ||
1057 | Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | ||
1058 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | ||
1059 | rset_exclude(RSET_GPR, src)); | ||
1060 | } | ||
1061 | } | ||
1062 | |||
1063 | static void asm_ahuvload(ASMState *as, IRIns *ir) | ||
1064 | { | ||
1065 | Reg idx, tmp, type; | ||
1066 | int32_t ofs = 0; | ||
1067 | RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; | ||
1068 | lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || | ||
1069 | irt_isint(ir->t), | ||
1070 | "bad load type %d", irt_type(ir->t)); | ||
1071 | if (ra_used(ir)) { | ||
1072 | Reg dest = ra_dest(as, ir, allow); | ||
1073 | tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest; | ||
1074 | if (irt_isaddr(ir->t)) { | ||
1075 | emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); | ||
1076 | } else if (irt_isnum(ir->t)) { | ||
1077 | emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); | ||
1078 | } else if (irt_isint(ir->t)) { | ||
1079 | emit_dm(as, A64I_MOVw, dest, dest); | ||
1080 | } | ||
1081 | } else { | ||
1082 | tmp = ra_scratch(as, gpr); | ||
1083 | } | ||
1084 | type = ra_scratch(as, rset_clear(gpr, tmp)); | ||
1085 | idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx); | ||
1086 | if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; | ||
1087 | /* Always do the type check, even if the load result is unused. */ | ||
1088 | asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE); | ||
1089 | if (irt_type(ir->t) >= IRT_NUM) { | ||
1090 | lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t), | ||
1091 | "bad load type %d", irt_type(ir->t)); | ||
1092 | emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), | ||
1093 | ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp); | ||
1094 | } else if (irt_isaddr(ir->t)) { | ||
1095 | emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type); | ||
1096 | emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); | ||
1097 | } else if (irt_isnil(ir->t)) { | ||
1098 | emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); | ||
1099 | } else { | ||
1100 | emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), | ||
1101 | ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, gpr), tmp); | ||
1102 | } | ||
1103 | if (ofs & FUSE_REG) | ||
1104 | emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); | ||
1105 | else | ||
1106 | emit_lso(as, A64I_LDRx, tmp, idx, ofs); | ||
1107 | } | ||
1108 | |||
1109 | static void asm_ahustore(ASMState *as, IRIns *ir) | ||
1110 | { | ||
1111 | if (ir->r != RID_SINK) { | ||
1112 | RegSet allow = RSET_GPR; | ||
1113 | Reg idx, src = RID_NONE, tmp = RID_TMP, type = RID_NONE; | ||
1114 | int32_t ofs = 0; | ||
1115 | if (irt_isnum(ir->t)) { | ||
1116 | src = ra_alloc1(as, ir->op2, RSET_FPR); | ||
1117 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow, A64I_STRd); | ||
1118 | if (ofs & FUSE_REG) | ||
1119 | emit_dnm(as, (A64I_STRd^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, (src & 31), idx, (ofs &31)); | ||
1120 | else | ||
1121 | emit_lso(as, A64I_STRd, (src & 31), idx, ofs); | ||
1122 | } else { | ||
1123 | if (!irt_ispri(ir->t)) { | ||
1124 | src = ra_alloc1(as, ir->op2, allow); | ||
1125 | rset_clear(allow, src); | ||
1126 | if (irt_isinteger(ir->t)) | ||
1127 | type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow); | ||
1128 | else | ||
1129 | type = ra_allock(as, irt_toitype(ir->t), allow); | ||
1130 | } else { | ||
1131 | tmp = type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t)<<47), allow); | ||
1132 | } | ||
1133 | idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), | ||
1134 | A64I_STRx); | ||
1135 | if (ofs & FUSE_REG) | ||
1136 | emit_dnm(as, (A64I_STRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); | ||
1137 | else | ||
1138 | emit_lso(as, A64I_STRx, tmp, idx, ofs); | ||
1139 | if (ra_hasreg(src)) { | ||
1140 | if (irt_isinteger(ir->t)) { | ||
1141 | emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), tmp, type, src); | ||
1142 | } else { | ||
1143 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, src, type); | ||
1144 | } | ||
1145 | } | ||
1146 | } | ||
1147 | } | ||
1148 | } | ||
1149 | |||
1150 | static void asm_sload(ASMState *as, IRIns *ir) | ||
1151 | { | ||
1152 | int32_t ofs = 8*((int32_t)ir->op1-2); | ||
1153 | IRType1 t = ir->t; | ||
1154 | Reg dest = RID_NONE, base; | ||
1155 | RegSet allow = RSET_GPR; | ||
1156 | lj_assertA(!(ir->op2 & IRSLOAD_PARENT), | ||
1157 | "bad parent SLOAD"); /* Handled by asm_head_side(). */ | ||
1158 | lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), | ||
1159 | "inconsistent SLOAD variant"); | ||
1160 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { | ||
1161 | dest = ra_scratch(as, RSET_FPR); | ||
1162 | asm_tointg(as, ir, dest); | ||
1163 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | ||
1164 | } else if (ra_used(ir)) { | ||
1165 | Reg tmp = RID_NONE; | ||
1166 | if ((ir->op2 & IRSLOAD_CONVERT)) | ||
1167 | tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); | ||
1168 | lj_assertA((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t), | ||
1169 | "bad SLOAD type %d", irt_type(t)); | ||
1170 | dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); | ||
1171 | base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest)); | ||
1172 | if (irt_isaddr(t)) { | ||
1173 | emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); | ||
1174 | } else if ((ir->op2 & IRSLOAD_CONVERT)) { | ||
1175 | if (irt_isint(t)) { | ||
1176 | emit_dn(as, A64I_FCVT_S32_F64, dest, (tmp & 31)); | ||
1177 | /* If value is already loaded for type check, move it to FPR. */ | ||
1178 | if ((ir->op2 & IRSLOAD_TYPECHECK)) | ||
1179 | emit_dn(as, A64I_FMOV_D_R, (tmp & 31), dest); | ||
1180 | else | ||
1181 | dest = tmp; | ||
1182 | t.irt = IRT_NUM; /* Check for original type. */ | ||
1183 | } else { | ||
1184 | emit_dn(as, A64I_FCVT_F64_S32, (dest & 31), tmp); | ||
1185 | dest = tmp; | ||
1186 | t.irt = IRT_INT; /* Check for original type. */ | ||
1187 | } | ||
1188 | } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { | ||
1189 | emit_dm(as, A64I_MOVw, dest, dest); | ||
1190 | } | ||
1191 | goto dotypecheck; | ||
1192 | } | ||
1193 | base = ra_alloc1(as, REF_BASE, allow); | ||
1194 | dotypecheck: | ||
1195 | rset_clear(allow, base); | ||
1196 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | ||
1197 | Reg tmp; | ||
1198 | if (ra_hasreg(dest) && rset_test(RSET_GPR, dest)) { | ||
1199 | tmp = dest; | ||
1200 | } else { | ||
1201 | tmp = ra_scratch(as, allow); | ||
1202 | rset_clear(allow, tmp); | ||
1203 | } | ||
1204 | if (irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT)) | ||
1205 | emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); | ||
1206 | /* Need type check, even if the load result is unused. */ | ||
1207 | asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE); | ||
1208 | if (irt_type(t) >= IRT_NUM) { | ||
1209 | lj_assertA(irt_isinteger(t) || irt_isnum(t), | ||
1210 | "bad SLOAD type %d", irt_type(t)); | ||
1211 | emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), | ||
1212 | ra_allock(as, LJ_TISNUM << 15, allow), tmp); | ||
1213 | } else if (irt_isnil(t)) { | ||
1214 | emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); | ||
1215 | } else if (irt_ispri(t)) { | ||
1216 | emit_nm(as, A64I_CMPx, | ||
1217 | ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp); | ||
1218 | } else { | ||
1219 | Reg type = ra_scratch(as, allow); | ||
1220 | emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type); | ||
1221 | emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); | ||
1222 | } | ||
1223 | emit_lso(as, A64I_LDRx, tmp, base, ofs); | ||
1224 | return; | ||
1225 | } | ||
1226 | if (ra_hasreg(dest)) { | ||
1227 | emit_lso(as, irt_isnum(t) ? A64I_LDRd : | ||
1228 | (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base, | ||
1229 | ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0))); | ||
1230 | } | ||
1231 | } | ||
1232 | |||
1233 | /* -- Allocations --------------------------------------------------------- */ | ||
1234 | |||
1235 | #if LJ_HASFFI | ||
1236 | static void asm_cnew(ASMState *as, IRIns *ir) | ||
1237 | { | ||
1238 | CTState *cts = ctype_ctsG(J2G(as->J)); | ||
1239 | CTypeID id = (CTypeID)IR(ir->op1)->i; | ||
1240 | CTSize sz; | ||
1241 | CTInfo info = lj_ctype_info(cts, id, &sz); | ||
1242 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | ||
1243 | IRRef args[4]; | ||
1244 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | ||
1245 | lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), | ||
1246 | "bad CNEW/CNEWI operands"); | ||
1247 | |||
1248 | as->gcsteps++; | ||
1249 | asm_setupresult(as, ir, ci); /* GCcdata * */ | ||
1250 | /* Initialize immutable cdata object. */ | ||
1251 | if (ir->o == IR_CNEWI) { | ||
1252 | int32_t ofs = sizeof(GCcdata); | ||
1253 | Reg r = ra_alloc1(as, ir->op2, allow); | ||
1254 | lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); | ||
1255 | emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs); | ||
1256 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1257 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1258 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1259 | args[1] = ir->op1; /* CTypeID id */ | ||
1260 | args[2] = ir->op2; /* CTSize sz */ | ||
1261 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1262 | asm_gencall(as, ci, args); | ||
1263 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1264 | return; | ||
1265 | } | ||
1266 | |||
1267 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ | ||
1268 | { | ||
1269 | Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow); | ||
1270 | emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); | ||
1271 | emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); | ||
1272 | emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP); | ||
1273 | if (id < 65536) emit_d(as, A64I_MOVZw | A64F_U16(id), RID_X1); | ||
1274 | } | ||
1275 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1276 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1277 | asm_gencall(as, ci, args); | ||
1278 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), | ||
1279 | ra_releasetmp(as, ASMREF_TMP1)); | ||
1280 | } | ||
1281 | #endif | ||
1282 | |||
1283 | /* -- Write barriers ------------------------------------------------------ */ | ||
1284 | |||
1285 | static void asm_tbar(ASMState *as, IRIns *ir) | ||
1286 | { | ||
1287 | Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1288 | Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); | ||
1289 | Reg mark = RID_TMP; | ||
1290 | MCLabel l_end = emit_label(as); | ||
1291 | emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist)); | ||
1292 | emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); | ||
1293 | emit_setgl(as, tab, gc.grayagain); | ||
1294 | emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark); | ||
1295 | emit_getgl(as, link, gc.grayagain); | ||
1296 | emit_cond_branch(as, CC_EQ, l_end); | ||
1297 | emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark); | ||
1298 | emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked)); | ||
1299 | } | ||
1300 | |||
1301 | static void asm_obar(ASMState *as, IRIns *ir) | ||
1302 | { | ||
1303 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; | ||
1304 | IRRef args[2]; | ||
1305 | MCLabel l_end; | ||
1306 | RegSet allow = RSET_GPR; | ||
1307 | Reg obj, val, tmp; | ||
1308 | /* No need for other object barriers (yet). */ | ||
1309 | lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); | ||
1310 | ra_evictset(as, RSET_SCRATCH); | ||
1311 | l_end = emit_label(as); | ||
1312 | args[0] = ASMREF_TMP1; /* global_State *g */ | ||
1313 | args[1] = ir->op1; /* TValue *tv */ | ||
1314 | asm_gencall(as, ci, args); | ||
1315 | emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL); | ||
1316 | obj = IR(ir->op1)->r; | ||
1317 | tmp = ra_scratch(as, rset_exclude(allow, obj)); | ||
1318 | emit_cond_branch(as, CC_EQ, l_end); | ||
1319 | emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp); | ||
1320 | emit_cond_branch(as, CC_EQ, l_end); | ||
1321 | emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP); | ||
1322 | val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); | ||
1323 | emit_lso(as, A64I_LDRB, tmp, obj, | ||
1324 | (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); | ||
1325 | emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); | ||
1326 | } | ||
1327 | |||
1328 | /* -- Arithmetic and logic operations ------------------------------------- */ | ||
1329 | |||
1330 | static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai) | ||
1331 | { | ||
1332 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
1333 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | ||
1334 | right = (left >> 8); left &= 255; | ||
1335 | emit_dnm(as, ai, (dest & 31), (left & 31), (right & 31)); | ||
1336 | } | ||
1337 | |||
1338 | static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai) | ||
1339 | { | ||
1340 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
1341 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); | ||
1342 | emit_dn(as, ai, (dest & 31), (left & 31)); | ||
1343 | } | ||
1344 | |||
1345 | static void asm_fpmath(ASMState *as, IRIns *ir) | ||
1346 | { | ||
1347 | IRFPMathOp fpm = (IRFPMathOp)ir->op2; | ||
1348 | if (fpm == IRFPM_SQRT) { | ||
1349 | asm_fpunary(as, ir, A64I_FSQRTd); | ||
1350 | } else if (fpm <= IRFPM_TRUNC) { | ||
1351 | asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd : | ||
1352 | fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd); | ||
1353 | } else { | ||
1354 | asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); | ||
1355 | } | ||
1356 | } | ||
1357 | |||
1358 | static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) | ||
1359 | { | ||
1360 | IRIns *ir; | ||
1361 | if (irref_isk(rref)) | ||
1362 | return 0; /* Don't swap constants to the left. */ | ||
1363 | if (irref_isk(lref)) | ||
1364 | return 1; /* But swap constants to the right. */ | ||
1365 | ir = IR(rref); | ||
1366 | if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || | ||
1367 | (ir->o == IR_ADD && ir->op1 == ir->op2) || | ||
1368 | (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT))) | ||
1369 | return 0; /* Don't swap fusable operands to the left. */ | ||
1370 | ir = IR(lref); | ||
1371 | if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || | ||
1372 | (ir->o == IR_ADD && ir->op1 == ir->op2) || | ||
1373 | (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT))) | ||
1374 | return 1; /* But swap fusable operands to the right. */ | ||
1375 | return 0; /* Otherwise don't swap. */ | ||
1376 | } | ||
1377 | |||
1378 | static void asm_intop(ASMState *as, IRIns *ir, A64Ins ai) | ||
1379 | { | ||
1380 | IRRef lref = ir->op1, rref = ir->op2; | ||
1381 | Reg left, dest = ra_dest(as, ir, RSET_GPR); | ||
1382 | uint32_t m; | ||
1383 | if ((ai & ~A64I_S) != A64I_SUBw && asm_swapops(as, lref, rref)) { | ||
1384 | IRRef tmp = lref; lref = rref; rref = tmp; | ||
1385 | } | ||
1386 | left = ra_hintalloc(as, lref, dest, RSET_GPR); | ||
1387 | if (irt_is64(ir->t)) ai |= A64I_X; | ||
1388 | m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); | ||
1389 | if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */ | ||
1390 | asm_guardcc(as, CC_VS); | ||
1391 | ai |= A64I_S; | ||
1392 | } | ||
1393 | emit_dn(as, ai^m, dest, left); | ||
1394 | } | ||
1395 | |||
1396 | static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai) | ||
1397 | { | ||
1398 | if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ | ||
1399 | as->flagmcp = NULL; | ||
1400 | as->mcp++; | ||
1401 | ai |= A64I_S; | ||
1402 | } | ||
1403 | asm_intop(as, ir, ai); | ||
1404 | } | ||
1405 | |||
1406 | static void asm_intneg(ASMState *as, IRIns *ir) | ||
1407 | { | ||
1408 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1409 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1410 | emit_dm(as, irt_is64(ir->t) ? A64I_NEGx : A64I_NEGw, dest, left); | ||
1411 | } | ||
1412 | |||
1413 | /* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */ | ||
1414 | static void asm_intmul(ASMState *as, IRIns *ir) | ||
1415 | { | ||
1416 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1417 | Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); | ||
1418 | Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1419 | if (irt_isguard(ir->t)) { /* IR_MULOV */ | ||
1420 | asm_guardcc(as, CC_NE); | ||
1421 | emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */ | ||
1422 | emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest); | ||
1423 | emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest); | ||
1424 | emit_dnm(as, A64I_SMULL, dest, right, left); | ||
1425 | } else { | ||
1426 | emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right); | ||
1427 | } | ||
1428 | } | ||
1429 | |||
1430 | static void asm_add(ASMState *as, IRIns *ir) | ||
1431 | { | ||
1432 | if (irt_isnum(ir->t)) { | ||
1433 | if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd)) | ||
1434 | asm_fparith(as, ir, A64I_FADDd); | ||
1435 | return; | ||
1436 | } | ||
1437 | asm_intop_s(as, ir, A64I_ADDw); | ||
1438 | } | ||
1439 | |||
1440 | static void asm_sub(ASMState *as, IRIns *ir) | ||
1441 | { | ||
1442 | if (irt_isnum(ir->t)) { | ||
1443 | if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd)) | ||
1444 | asm_fparith(as, ir, A64I_FSUBd); | ||
1445 | return; | ||
1446 | } | ||
1447 | asm_intop_s(as, ir, A64I_SUBw); | ||
1448 | } | ||
1449 | |||
1450 | static void asm_mul(ASMState *as, IRIns *ir) | ||
1451 | { | ||
1452 | if (irt_isnum(ir->t)) { | ||
1453 | asm_fparith(as, ir, A64I_FMULd); | ||
1454 | return; | ||
1455 | } | ||
1456 | asm_intmul(as, ir); | ||
1457 | } | ||
1458 | |||
1459 | #define asm_addov(as, ir) asm_add(as, ir) | ||
1460 | #define asm_subov(as, ir) asm_sub(as, ir) | ||
1461 | #define asm_mulov(as, ir) asm_mul(as, ir) | ||
1462 | |||
1463 | #define asm_fpdiv(as, ir) asm_fparith(as, ir, A64I_FDIVd) | ||
1464 | #define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS) | ||
1465 | |||
1466 | static void asm_neg(ASMState *as, IRIns *ir) | ||
1467 | { | ||
1468 | if (irt_isnum(ir->t)) { | ||
1469 | asm_fpunary(as, ir, A64I_FNEGd); | ||
1470 | return; | ||
1471 | } | ||
1472 | asm_intneg(as, ir); | ||
1473 | } | ||
1474 | |||
1475 | static void asm_band(ASMState *as, IRIns *ir) | ||
1476 | { | ||
1477 | A64Ins ai = A64I_ANDw; | ||
1478 | if (asm_fuseandshift(as, ir)) | ||
1479 | return; | ||
1480 | if (as->flagmcp == as->mcp) { | ||
1481 | /* Try to drop cmp r, #0. */ | ||
1482 | as->flagmcp = NULL; | ||
1483 | as->mcp++; | ||
1484 | ai = A64I_ANDSw; | ||
1485 | } | ||
1486 | asm_intop(as, ir, ai); | ||
1487 | } | ||
1488 | |||
1489 | static void asm_borbxor(ASMState *as, IRIns *ir, A64Ins ai) | ||
1490 | { | ||
1491 | IRRef lref = ir->op1, rref = ir->op2; | ||
1492 | IRIns *irl = IR(lref), *irr = IR(rref); | ||
1493 | if ((canfuse(as, irl) && irl->o == IR_BNOT && !irref_isk(rref)) || | ||
1494 | (canfuse(as, irr) && irr->o == IR_BNOT && !irref_isk(lref))) { | ||
1495 | Reg left, dest = ra_dest(as, ir, RSET_GPR); | ||
1496 | uint32_t m; | ||
1497 | if (irl->o == IR_BNOT) { | ||
1498 | IRRef tmp = lref; lref = rref; rref = tmp; | ||
1499 | } | ||
1500 | left = ra_alloc1(as, lref, RSET_GPR); | ||
1501 | ai |= A64I_ON; | ||
1502 | if (irt_is64(ir->t)) ai |= A64I_X; | ||
1503 | m = asm_fuseopm(as, ai, IR(rref)->op1, rset_exclude(RSET_GPR, left)); | ||
1504 | emit_dn(as, ai^m, dest, left); | ||
1505 | } else { | ||
1506 | asm_intop(as, ir, ai); | ||
1507 | } | ||
1508 | } | ||
1509 | |||
1510 | static void asm_bor(ASMState *as, IRIns *ir) | ||
1511 | { | ||
1512 | if (asm_fuseorshift(as, ir)) | ||
1513 | return; | ||
1514 | asm_borbxor(as, ir, A64I_ORRw); | ||
1515 | } | ||
1516 | |||
1517 | #define asm_bxor(as, ir) asm_borbxor(as, ir, A64I_EORw) | ||
1518 | |||
1519 | static void asm_bnot(ASMState *as, IRIns *ir) | ||
1520 | { | ||
1521 | A64Ins ai = A64I_MVNw; | ||
1522 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1523 | uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); | ||
1524 | if (irt_is64(ir->t)) ai |= A64I_X; | ||
1525 | emit_d(as, ai^m, dest); | ||
1526 | } | ||
1527 | |||
1528 | static void asm_bswap(ASMState *as, IRIns *ir) | ||
1529 | { | ||
1530 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1531 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1532 | emit_dn(as, irt_is64(ir->t) ? A64I_REVx : A64I_REVw, dest, left); | ||
1533 | } | ||
1534 | |||
1535 | static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh) | ||
1536 | { | ||
1537 | int32_t shmask = irt_is64(ir->t) ? 63 : 31; | ||
1538 | if (irref_isk(ir->op2)) { /* Constant shifts. */ | ||
1539 | Reg left, dest = ra_dest(as, ir, RSET_GPR); | ||
1540 | int32_t shift = (IR(ir->op2)->i & shmask); | ||
1541 | IRIns *irl = IR(ir->op1); | ||
1542 | if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw; | ||
1543 | |||
1544 | /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */ | ||
1545 | if ((sh == A64SH_LSR || sh == A64SH_ASR) && canfuse(as, irl)) { | ||
1546 | if (irl->o == IR_BSHL && irref_isk(irl->op2)) { | ||
1547 | int32_t shift2 = (IR(irl->op2)->i & shmask); | ||
1548 | shift = ((shift - shift2) & shmask); | ||
1549 | shmask -= shift2; | ||
1550 | ir = irl; | ||
1551 | } | ||
1552 | } | ||
1553 | |||
1554 | left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1555 | switch (sh) { | ||
1556 | case A64SH_LSL: | ||
1557 | emit_dn(as, ai | A64F_IMMS(shmask-shift) | | ||
1558 | A64F_IMMR((shmask-shift+1)&shmask), dest, left); | ||
1559 | break; | ||
1560 | case A64SH_LSR: case A64SH_ASR: | ||
1561 | emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left); | ||
1562 | break; | ||
1563 | case A64SH_ROR: | ||
1564 | emit_dnm(as, ai | A64F_IMMS(shift), dest, left, left); | ||
1565 | break; | ||
1566 | } | ||
1567 | } else { /* Variable-length shifts. */ | ||
1568 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1569 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1570 | Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1571 | emit_dnm(as, (shmask == 63 ? A64I_SHRx : A64I_SHRw) | A64F_BSH(sh), dest, left, right); | ||
1572 | } | ||
1573 | } | ||
1574 | |||
1575 | #define asm_bshl(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL) | ||
1576 | #define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR) | ||
1577 | #define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR) | ||
1578 | #define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR) | ||
1579 | #define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") | ||
1580 | |||
1581 | static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc) | ||
1582 | { | ||
1583 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1584 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1585 | Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1586 | emit_dnm(as, A64I_CSELw|A64F_CC(cc), dest, left, right); | ||
1587 | emit_nm(as, A64I_CMPw, left, right); | ||
1588 | } | ||
1589 | |||
1590 | static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc) | ||
1591 | { | ||
1592 | Reg dest = (ra_dest(as, ir, RSET_FPR) & 31); | ||
1593 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | ||
1594 | right = ((left >> 8) & 31); left &= 31; | ||
1595 | emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, right, left); | ||
1596 | emit_nm(as, A64I_FCMPd, left, right); | ||
1597 | } | ||
1598 | |||
1599 | static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc) | ||
1600 | { | ||
1601 | if (irt_isnum(ir->t)) | ||
1602 | asm_fpmin_max(as, ir, fcc); | ||
1603 | else | ||
1604 | asm_intmin_max(as, ir, cc); | ||
1605 | } | ||
1606 | |||
1607 | #define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_PL) | ||
1608 | #define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_LE) | ||
1609 | |||
1610 | /* -- Comparisons --------------------------------------------------------- */ | ||
1611 | |||
1612 | /* Map of comparisons to flags. ORDER IR. */ | ||
1613 | static const uint8_t asm_compmap[IR_ABC+1] = { | ||
1614 | /* op FP swp int cc FP cc */ | ||
1615 | /* LT */ CC_GE + (CC_HS << 4), | ||
1616 | /* GE x */ CC_LT + (CC_HI << 4), | ||
1617 | /* LE */ CC_GT + (CC_HI << 4), | ||
1618 | /* GT x */ CC_LE + (CC_HS << 4), | ||
1619 | /* ULT x */ CC_HS + (CC_LS << 4), | ||
1620 | /* UGE */ CC_LO + (CC_LO << 4), | ||
1621 | /* ULE x */ CC_HI + (CC_LO << 4), | ||
1622 | /* UGT */ CC_LS + (CC_LS << 4), | ||
1623 | /* EQ */ CC_NE + (CC_NE << 4), | ||
1624 | /* NE */ CC_EQ + (CC_EQ << 4), | ||
1625 | /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */ | ||
1626 | }; | ||
1627 | |||
1628 | /* FP comparisons. */ | ||
1629 | static void asm_fpcomp(ASMState *as, IRIns *ir) | ||
1630 | { | ||
1631 | Reg left, right; | ||
1632 | A64Ins ai; | ||
1633 | int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1); | ||
1634 | if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) { | ||
1635 | left = (ra_alloc1(as, ir->op1, RSET_FPR) & 31); | ||
1636 | right = 0; | ||
1637 | ai = A64I_FCMPZd; | ||
1638 | } else { | ||
1639 | left = ra_alloc2(as, ir, RSET_FPR); | ||
1640 | if (swp) { | ||
1641 | right = (left & 31); left = ((left >> 8) & 31); | ||
1642 | } else { | ||
1643 | right = ((left >> 8) & 31); left &= 31; | ||
1644 | } | ||
1645 | ai = A64I_FCMPd; | ||
1646 | } | ||
1647 | asm_guardcc(as, (asm_compmap[ir->o] >> 4)); | ||
1648 | emit_nm(as, ai, left, right); | ||
1649 | } | ||
1650 | |||
1651 | /* Integer comparisons. */ | ||
1652 | static void asm_intcomp(ASMState *as, IRIns *ir) | ||
1653 | { | ||
1654 | A64CC oldcc, cc = (asm_compmap[ir->o] & 15); | ||
1655 | A64Ins ai = irt_is64(ir->t) ? A64I_CMPx : A64I_CMPw; | ||
1656 | IRRef lref = ir->op1, rref = ir->op2; | ||
1657 | Reg left; | ||
1658 | uint32_t m; | ||
1659 | int cmpprev0 = 0; | ||
1660 | lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || | ||
1661 | irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t), | ||
1662 | "bad comparison data type %d", irt_type(ir->t)); | ||
1663 | if (asm_swapops(as, lref, rref)) { | ||
1664 | IRRef tmp = lref; lref = rref; rref = tmp; | ||
1665 | if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ | ||
1666 | else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ | ||
1667 | } | ||
1668 | oldcc = cc; | ||
1669 | if (irref_isk(rref) && get_k64val(as, rref) == 0) { | ||
1670 | IRIns *irl = IR(lref); | ||
1671 | if (cc == CC_GE) cc = CC_PL; | ||
1672 | else if (cc == CC_LT) cc = CC_MI; | ||
1673 | else if (cc > CC_NE) goto nocombine; /* Other conds don't work with tst. */ | ||
1674 | cmpprev0 = (irl+1 == ir); | ||
1675 | /* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */ | ||
1676 | if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) { | ||
1677 | IRRef blref = irl->op1, brref = irl->op2; | ||
1678 | uint32_t m2 = 0; | ||
1679 | Reg bleft; | ||
1680 | if (asm_swapops(as, blref, brref)) { | ||
1681 | Reg tmp = blref; blref = brref; brref = tmp; | ||
1682 | } | ||
1683 | if (irref_isk(brref)) { | ||
1684 | uint64_t k = get_k64val(as, brref); | ||
1685 | if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) { | ||
1686 | asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, | ||
1687 | ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k)); | ||
1688 | return; | ||
1689 | } | ||
1690 | m2 = emit_isk13(k, irt_is64(irl->t)); | ||
1691 | } | ||
1692 | bleft = ra_alloc1(as, blref, RSET_GPR); | ||
1693 | ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw); | ||
1694 | if (!m2) | ||
1695 | m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft)); | ||
1696 | asm_guardcc(as, cc); | ||
1697 | emit_n(as, ai^m2, bleft); | ||
1698 | return; | ||
1699 | } | ||
1700 | if (cc == CC_EQ || cc == CC_NE) { | ||
1701 | /* Combine cmp-bcc into cbz/cbnz. */ | ||
1702 | ai = cc == CC_EQ ? A64I_CBZ : A64I_CBNZ; | ||
1703 | if (irt_is64(ir->t)) ai |= A64I_X; | ||
1704 | asm_guardcnb(as, ai, ra_alloc1(as, lref, RSET_GPR)); | ||
1705 | return; | ||
1706 | } | ||
1707 | } | ||
1708 | nocombine: | ||
1709 | left = ra_alloc1(as, lref, RSET_GPR); | ||
1710 | m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); | ||
1711 | asm_guardcc(as, cc); | ||
1712 | emit_n(as, ai^m, left); | ||
1713 | /* Signed comparison with zero and referencing previous ins? */ | ||
1714 | if (cmpprev0 && (oldcc <= CC_NE || oldcc >= CC_GE)) | ||
1715 | as->flagmcp = as->mcp; /* Allow elimination of the compare. */ | ||
1716 | } | ||
1717 | |||
1718 | static void asm_comp(ASMState *as, IRIns *ir) | ||
1719 | { | ||
1720 | if (irt_isnum(ir->t)) | ||
1721 | asm_fpcomp(as, ir); | ||
1722 | else | ||
1723 | asm_intcomp(as, ir); | ||
1724 | } | ||
1725 | |||
1726 | #define asm_equal(as, ir) asm_comp(as, ir) | ||
1727 | |||
1728 | /* -- Split register ops -------------------------------------------------- */ | ||
1729 | |||
1730 | /* Hiword op of a split 64/64 bit op. Previous op is the loword op. */ | ||
1731 | static void asm_hiop(ASMState *as, IRIns *ir) | ||
1732 | { | ||
1733 | /* HIOP is marked as a store because it needs its own DCE logic. */ | ||
1734 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | ||
1735 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | ||
1736 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | ||
1737 | switch ((ir-1)->o) { | ||
1738 | case IR_CALLN: | ||
1739 | case IR_CALLL: | ||
1740 | case IR_CALLS: | ||
1741 | case IR_CALLXS: | ||
1742 | if (!uselo) | ||
1743 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ | ||
1744 | break; | ||
1745 | default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; | ||
1746 | } | ||
1747 | } | ||
1748 | |||
1749 | /* -- Profiling ----------------------------------------------------------- */ | ||
1750 | |||
1751 | static void asm_prof(ASMState *as, IRIns *ir) | ||
1752 | { | ||
1753 | uint32_t k = emit_isk13(HOOK_PROFILE, 0); | ||
1754 | lj_assertA(k != 0, "HOOK_PROFILE does not fit in K13"); | ||
1755 | UNUSED(ir); | ||
1756 | asm_guardcc(as, CC_NE); | ||
1757 | emit_n(as, A64I_TSTw^k, RID_TMP); | ||
1758 | emit_lsptr(as, A64I_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); | ||
1759 | } | ||
1760 | |||
1761 | /* -- Stack handling ------------------------------------------------------ */ | ||
1762 | |||
1763 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ | ||
1764 | static void asm_stack_check(ASMState *as, BCReg topslot, | ||
1765 | IRIns *irp, RegSet allow, ExitNo exitno) | ||
1766 | { | ||
1767 | Reg pbase; | ||
1768 | uint32_t k; | ||
1769 | if (irp) { | ||
1770 | if (!ra_hasspill(irp->s)) { | ||
1771 | pbase = irp->r; | ||
1772 | lj_assertA(ra_hasreg(pbase), "base reg lost"); | ||
1773 | } else if (allow) { | ||
1774 | pbase = rset_pickbot(allow); | ||
1775 | } else { | ||
1776 | pbase = RID_RET; | ||
1777 | emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */ | ||
1778 | } | ||
1779 | } else { | ||
1780 | pbase = RID_BASE; | ||
1781 | } | ||
1782 | emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); | ||
1783 | k = emit_isk12((8*topslot)); | ||
1784 | lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); | ||
1785 | emit_n(as, A64I_CMPx^k, RID_TMP); | ||
1786 | emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase); | ||
1787 | emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, | ||
1788 | (int32_t)offsetof(lua_State, maxstack)); | ||
1789 | if (irp) { /* Must not spill arbitrary registers in head of side trace. */ | ||
1790 | if (ra_hasspill(irp->s)) | ||
1791 | emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s)); | ||
1792 | emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L)); | ||
1793 | if (ra_hasspill(irp->s) && !allow) | ||
1794 | emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */ | ||
1795 | } else { | ||
1796 | emit_getgl(as, RID_TMP, cur_L); | ||
1797 | } | ||
1798 | } | ||
1799 | |||
1800 | /* Restore Lua stack from on-trace state. */ | ||
1801 | static void asm_stack_restore(ASMState *as, SnapShot *snap) | ||
1802 | { | ||
1803 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | ||
1804 | #ifdef LUA_USE_ASSERT | ||
1805 | SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; | ||
1806 | #endif | ||
1807 | MSize n, nent = snap->nent; | ||
1808 | /* Store the value of all modified slots to the Lua stack. */ | ||
1809 | for (n = 0; n < nent; n++) { | ||
1810 | SnapEntry sn = map[n]; | ||
1811 | BCReg s = snap_slot(sn); | ||
1812 | int32_t ofs = 8*((int32_t)s-1-LJ_FR2); | ||
1813 | IRRef ref = snap_ref(sn); | ||
1814 | IRIns *ir = IR(ref); | ||
1815 | if ((sn & SNAP_NORESTORE)) | ||
1816 | continue; | ||
1817 | if ((sn & SNAP_KEYINDEX)) { | ||
1818 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); | ||
1819 | Reg r = irref_isk(ref) ? ra_allock(as, ir->i, allow) : | ||
1820 | ra_alloc1(as, ref, allow); | ||
1821 | rset_clear(allow, r); | ||
1822 | emit_lso(as, A64I_STRw, r, RID_BASE, ofs); | ||
1823 | emit_lso(as, A64I_STRw, ra_allock(as, LJ_KEYINDEX, allow), RID_BASE, ofs+4); | ||
1824 | } else if (irt_isnum(ir->t)) { | ||
1825 | Reg src = ra_alloc1(as, ref, RSET_FPR); | ||
1826 | emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs); | ||
1827 | } else { | ||
1828 | asm_tvstore64(as, RID_BASE, ofs, ref); | ||
1829 | } | ||
1830 | checkmclim(as); | ||
1831 | } | ||
1832 | lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); | ||
1833 | } | ||
1834 | |||
1835 | /* -- GC handling --------------------------------------------------------- */ | ||
1836 | |||
1837 | /* Marker to prevent patching the GC check exit. */ | ||
1838 | #define ARM64_NOPATCH_GC_CHECK \ | ||
1839 | (A64I_ORRx|A64F_D(RID_TMP)|A64F_M(RID_TMP)|A64F_N(RID_TMP)) | ||
1840 | |||
1841 | /* Check GC threshold and do one or more GC steps. */ | ||
1842 | static void asm_gc_check(ASMState *as) | ||
1843 | { | ||
1844 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; | ||
1845 | IRRef args[2]; | ||
1846 | MCLabel l_end; | ||
1847 | Reg tmp2; | ||
1848 | ra_evictset(as, RSET_SCRATCH); | ||
1849 | l_end = emit_label(as); | ||
1850 | /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ | ||
1851 | asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */ | ||
1852 | *--as->mcp = ARM64_NOPATCH_GC_CHECK; | ||
1853 | args[0] = ASMREF_TMP1; /* global_State *g */ | ||
1854 | args[1] = ASMREF_TMP2; /* MSize steps */ | ||
1855 | asm_gencall(as, ci, args); | ||
1856 | emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL); | ||
1857 | tmp2 = ra_releasetmp(as, ASMREF_TMP2); | ||
1858 | emit_loadi(as, tmp2, as->gcsteps); | ||
1859 | /* Jump around GC step if GC total < GC threshold. */ | ||
1860 | emit_cond_branch(as, CC_LS, l_end); | ||
1861 | emit_nm(as, A64I_CMPx, RID_TMP, tmp2); | ||
1862 | emit_getgl(as, tmp2, gc.threshold); | ||
1863 | emit_getgl(as, RID_TMP, gc.total); | ||
1864 | as->gcsteps = 0; | ||
1865 | checkmclim(as); | ||
1866 | } | ||
1867 | |||
1868 | /* -- Loop handling ------------------------------------------------------- */ | ||
1869 | |||
1870 | /* Fixup the loop branch. */ | ||
1871 | static void asm_loop_fixup(ASMState *as) | ||
1872 | { | ||
1873 | MCode *p = as->mctop; | ||
1874 | MCode *target = as->mcp; | ||
1875 | if (as->loopinv) { /* Inverted loop branch? */ | ||
1876 | uint32_t mask = (p[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu : 0x7ffffu; | ||
1877 | ptrdiff_t delta = target - (p - 2); | ||
1878 | /* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */ | ||
1879 | p[-2] |= ((uint32_t)delta & mask) << 5; | ||
1880 | } else { | ||
1881 | ptrdiff_t delta = target - (p - 1); | ||
1882 | p[-1] = A64I_B | A64F_S26(delta); | ||
1883 | } | ||
1884 | } | ||
1885 | |||
1886 | /* Fixup the tail of the loop. */ | ||
1887 | static void asm_loop_tail_fixup(ASMState *as) | ||
1888 | { | ||
1889 | UNUSED(as); /* Nothing to do. */ | ||
1890 | } | ||
1891 | |||
1892 | /* -- Head of trace ------------------------------------------------------- */ | ||
1893 | |||
1894 | /* Reload L register from g->cur_L. */ | ||
1895 | static void asm_head_lreg(ASMState *as) | ||
1896 | { | ||
1897 | IRIns *ir = IR(ASMREF_L); | ||
1898 | if (ra_used(ir)) { | ||
1899 | Reg r = ra_dest(as, ir, RSET_GPR); | ||
1900 | emit_getgl(as, r, cur_L); | ||
1901 | ra_evictk(as); | ||
1902 | } | ||
1903 | } | ||
1904 | |||
1905 | /* Coalesce BASE register for a root trace. */ | ||
1906 | static void asm_head_root_base(ASMState *as) | ||
1907 | { | ||
1908 | IRIns *ir; | ||
1909 | asm_head_lreg(as); | ||
1910 | ir = IR(REF_BASE); | ||
1911 | if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) | ||
1912 | ra_spill(as, ir); | ||
1913 | ra_destreg(as, ir, RID_BASE); | ||
1914 | } | ||
1915 | |||
1916 | /* Coalesce BASE register for a side trace. */ | ||
1917 | static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) | ||
1918 | { | ||
1919 | IRIns *ir; | ||
1920 | asm_head_lreg(as); | ||
1921 | ir = IR(REF_BASE); | ||
1922 | if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) | ||
1923 | ra_spill(as, ir); | ||
1924 | if (ra_hasspill(irp->s)) { | ||
1925 | rset_clear(allow, ra_dest(as, ir, allow)); | ||
1926 | } else { | ||
1927 | Reg r = irp->r; | ||
1928 | lj_assertA(ra_hasreg(r), "base reg lost"); | ||
1929 | rset_clear(allow, r); | ||
1930 | if (r != ir->r && !rset_test(as->freeset, r)) | ||
1931 | ra_restore(as, regcost_ref(as->cost[r])); | ||
1932 | ra_destreg(as, ir, r); | ||
1933 | } | ||
1934 | return allow; | ||
1935 | } | ||
1936 | |||
1937 | /* -- Tail of trace ------------------------------------------------------- */ | ||
1938 | |||
1939 | /* Fixup the tail code. */ | ||
1940 | static void asm_tail_fixup(ASMState *as, TraceNo lnk) | ||
1941 | { | ||
1942 | MCode *p = as->mctop; | ||
1943 | MCode *target; | ||
1944 | /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ | ||
1945 | int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); | ||
1946 | if (spadj == 0) { | ||
1947 | *--p = A64I_LE(A64I_NOP); | ||
1948 | as->mctop = p; | ||
1949 | } else { | ||
1950 | /* Patch stack adjustment. */ | ||
1951 | uint32_t k = emit_isk12(spadj); | ||
1952 | lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); | ||
1953 | p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); | ||
1954 | } | ||
1955 | /* Patch exit branch. */ | ||
1956 | target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; | ||
1957 | p[-1] = A64I_B | A64F_S26((target-p)+1); | ||
1958 | } | ||
1959 | |||
1960 | /* Prepare tail of code. */ | ||
1961 | static void asm_tail_prep(ASMState *as) | ||
1962 | { | ||
1963 | MCode *p = as->mctop - 1; /* Leave room for exit branch. */ | ||
1964 | if (as->loopref) { | ||
1965 | as->invmcp = as->mcp = p; | ||
1966 | } else { | ||
1967 | as->mcp = p-1; /* Leave room for stack pointer adjustment. */ | ||
1968 | as->invmcp = NULL; | ||
1969 | } | ||
1970 | *p = 0; /* Prevent load/store merging. */ | ||
1971 | } | ||
1972 | |||
1973 | /* -- Trace setup --------------------------------------------------------- */ | ||
1974 | |||
1975 | /* Ensure there are enough stack slots for call arguments. */ | ||
1976 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | ||
1977 | { | ||
1978 | IRRef args[CCI_NARGS_MAX*2]; | ||
1979 | uint32_t i, nargs = CCI_XNARGS(ci); | ||
1980 | int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; | ||
1981 | asm_collectargs(as, ir, ci, args); | ||
1982 | for (i = 0; i < nargs; i++) { | ||
1983 | if (args[i] && irt_isfp(IR(args[i])->t)) { | ||
1984 | if (nfpr > 0) nfpr--; else nslots += 2; | ||
1985 | } else { | ||
1986 | if (ngpr > 0) ngpr--; else nslots += 2; | ||
1987 | } | ||
1988 | } | ||
1989 | if (nslots > as->evenspill) /* Leave room for args in stack slots. */ | ||
1990 | as->evenspill = nslots; | ||
1991 | return REGSP_HINT(RID_RET); | ||
1992 | } | ||
1993 | |||
1994 | static void asm_setup_target(ASMState *as) | ||
1995 | { | ||
1996 | /* May need extra exit for asm_stack_check on side traces. */ | ||
1997 | asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); | ||
1998 | } | ||
1999 | |||
2000 | #if LJ_BE | ||
2001 | /* ARM64 instructions are always little-endian. Swap for ARM64BE. */ | ||
2002 | static void asm_mcode_fixup(MCode *mcode, MSize size) | ||
2003 | { | ||
2004 | MCode *pe = (MCode *)((char *)mcode + size); | ||
2005 | while (mcode < pe) { | ||
2006 | MCode ins = *mcode; | ||
2007 | *mcode++ = lj_bswap(ins); | ||
2008 | } | ||
2009 | } | ||
2010 | #define LJ_TARGET_MCODE_FIXUP 1 | ||
2011 | #endif | ||
2012 | |||
2013 | /* -- Trace patching ------------------------------------------------------ */ | ||
2014 | |||
2015 | /* Patch exit jumps of existing machine code to a new target. */ | ||
2016 | void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | ||
2017 | { | ||
2018 | MCode *p = T->mcode; | ||
2019 | MCode *pe = (MCode *)((char *)p + T->szmcode); | ||
2020 | MCode *cstart = NULL; | ||
2021 | MCode *mcarea = lj_mcode_patch(J, p, 0); | ||
2022 | MCode *px = exitstub_trace_addr(T, exitno); | ||
2023 | int patchlong = 1; | ||
2024 | /* Note: this assumes a trace exit is only ever patched once. */ | ||
2025 | for (; p < pe; p++) { | ||
2026 | /* Look for exitstub branch, replace with branch to target. */ | ||
2027 | ptrdiff_t delta = target - p; | ||
2028 | MCode ins = A64I_LE(*p); | ||
2029 | if ((ins & 0xff000000u) == 0x54000000u && | ||
2030 | ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { | ||
2031 | /* Patch bcc, if within range. */ | ||
2032 | if (A64F_S_OK(delta, 19)) { | ||
2033 | *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta)); | ||
2034 | if (!cstart) cstart = p; | ||
2035 | } | ||
2036 | } else if ((ins & 0xfc000000u) == 0x14000000u && | ||
2037 | ((ins ^ (px-p)) & 0x03ffffffu) == 0) { | ||
2038 | /* Patch b. */ | ||
2039 | lj_assertJ(A64F_S_OK(delta, 26), "branch target out of range"); | ||
2040 | *p = A64I_LE((ins & 0xfc000000u) | A64F_S26(delta)); | ||
2041 | if (!cstart) cstart = p; | ||
2042 | } else if ((ins & 0x7e000000u) == 0x34000000u && | ||
2043 | ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { | ||
2044 | /* Patch cbz/cbnz, if within range. */ | ||
2045 | if (p[-1] == ARM64_NOPATCH_GC_CHECK) { | ||
2046 | patchlong = 0; | ||
2047 | } else if (A64F_S_OK(delta, 19)) { | ||
2048 | *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta)); | ||
2049 | if (!cstart) cstart = p; | ||
2050 | } | ||
2051 | } else if ((ins & 0x7e000000u) == 0x36000000u && | ||
2052 | ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) { | ||
2053 | /* Patch tbz/tbnz, if within range. */ | ||
2054 | if (A64F_S_OK(delta, 14)) { | ||
2055 | *p = A64I_LE((ins & 0xfff8001fu) | A64F_S14(delta)); | ||
2056 | if (!cstart) cstart = p; | ||
2057 | } | ||
2058 | } | ||
2059 | } | ||
2060 | /* Always patch long-range branch in exit stub itself. Except, if we can't. */ | ||
2061 | if (patchlong) { | ||
2062 | ptrdiff_t delta = target - px; | ||
2063 | lj_assertJ(A64F_S_OK(delta, 26), "branch target out of range"); | ||
2064 | *px = A64I_B | A64F_S26(delta); | ||
2065 | if (!cstart) cstart = px; | ||
2066 | } | ||
2067 | if (cstart) lj_mcode_sync(cstart, px+1); | ||
2068 | lj_mcode_patch(J, mcarea, 1); | ||
2069 | } | ||
2070 | |||
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index c0e491a6..db42b8f3 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h | |||
@@ -23,7 +23,7 @@ static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) | |||
23 | { | 23 | { |
24 | Reg r = IR(ref)->r; | 24 | Reg r = IR(ref)->r; |
25 | if (ra_noreg(r)) { | 25 | if (ra_noreg(r)) { |
26 | if (!(allow & RSET_FPR) && irref_isk(ref) && IR(ref)->i == 0) | 26 | if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref) == 0) |
27 | return RID_ZERO; | 27 | return RID_ZERO; |
28 | r = ra_allocref(as, ref, allow); | 28 | r = ra_allocref(as, ref, allow); |
29 | } else { | 29 | } else { |
@@ -64,17 +64,29 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) | |||
64 | /* Setup spare long-range jump slots per mcarea. */ | 64 | /* Setup spare long-range jump slots per mcarea. */ |
65 | static void asm_sparejump_setup(ASMState *as) | 65 | static void asm_sparejump_setup(ASMState *as) |
66 | { | 66 | { |
67 | MCode *mxp = as->mcbot; | 67 | MCode *mxp = as->mctop; |
68 | if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == sizeof(MCLink)) { | 68 | if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) { |
69 | lua_assert(MIPSI_NOP == 0); | 69 | mxp -= MIPS_SPAREJUMP*2; |
70 | lj_assertA(MIPSI_NOP == 0, "bad NOP"); | ||
70 | memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode)); | 71 | memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode)); |
71 | mxp += MIPS_SPAREJUMP*2; | 72 | as->mctop = mxp; |
72 | lua_assert(mxp < as->mctop); | 73 | } |
73 | lj_mcode_sync(as->mcbot, mxp); | 74 | } |
74 | lj_mcode_commitbot(as->J, mxp); | 75 | |
75 | as->mcbot = mxp; | 76 | static MCode *asm_sparejump_use(MCode *mcarea, MCode tjump) |
76 | as->mclim = as->mcbot + MCLIM_REDZONE; | 77 | { |
78 | MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size); | ||
79 | int slot = MIPS_SPAREJUMP; | ||
80 | while (slot--) { | ||
81 | mxp -= 2; | ||
82 | if (*mxp == tjump) { | ||
83 | return mxp; | ||
84 | } else if (*mxp == MIPSI_NOP) { | ||
85 | *mxp = tjump; | ||
86 | return mxp; | ||
87 | } | ||
77 | } | 88 | } |
89 | return NULL; | ||
78 | } | 90 | } |
79 | 91 | ||
80 | /* Setup exit stub after the end of each trace. */ | 92 | /* Setup exit stub after the end of each trace. */ |
@@ -84,7 +96,8 @@ static void asm_exitstub_setup(ASMState *as) | |||
84 | /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ | 96 | /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ |
85 | *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno; | 97 | *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno; |
86 | *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu); | 98 | *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu); |
87 | lua_assert(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0); | 99 | lj_assertA(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0, |
100 | "branch target out of range"); | ||
88 | *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0; | 101 | *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0; |
89 | as->mctop = mxp; | 102 | as->mctop = mxp; |
90 | } | 103 | } |
@@ -101,7 +114,12 @@ static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt) | |||
101 | as->invmcp = NULL; | 114 | as->invmcp = NULL; |
102 | as->loopinv = 1; | 115 | as->loopinv = 1; |
103 | as->mcp = p+1; | 116 | as->mcp = p+1; |
117 | #if !LJ_TARGET_MIPSR6 | ||
104 | mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */ | 118 | mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */ |
119 | #else | ||
120 | mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : | ||
121 | (mi>>28) == 4 ? 0x00800000u : 0x00010000u); /* Invert cond. */ | ||
122 | #endif | ||
105 | target = p; /* Patch target later in asm_loop_fixup. */ | 123 | target = p; /* Patch target later in asm_loop_fixup. */ |
106 | } | 124 | } |
107 | emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); | 125 | emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); |
@@ -165,9 +183,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) | |||
165 | } else if (ir->o == IR_UREFC) { | 183 | } else if (ir->o == IR_UREFC) { |
166 | if (irref_isk(ir->op1)) { | 184 | if (irref_isk(ir->op1)) { |
167 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | 185 | GCfunc *fn = ir_kfunc(IR(ir->op1)); |
168 | int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); | 186 | intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv; |
169 | int32_t jgl = (intptr_t)J2G(as->J); | 187 | intptr_t jgl = (intptr_t)J2G(as->J); |
170 | if ((uint32_t)(ofs-jgl) < 65536) { | 188 | if ((uintptr_t)(ofs-jgl) < 65536) { |
171 | *ofsp = ofs-jgl-32768; | 189 | *ofsp = ofs-jgl-32768; |
172 | return RID_JGL; | 190 | return RID_JGL; |
173 | } else { | 191 | } else { |
@@ -175,6 +193,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) | |||
175 | return ra_allock(as, ofs-(int16_t)ofs, allow); | 193 | return ra_allock(as, ofs-(int16_t)ofs, allow); |
176 | } | 194 | } |
177 | } | 195 | } |
196 | } else if (ir->o == IR_TMPREF) { | ||
197 | *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768); | ||
198 | return RID_JGL; | ||
178 | } | 199 | } |
179 | } | 200 | } |
180 | *ofsp = 0; | 201 | *ofsp = 0; |
@@ -189,20 +210,21 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, | |||
189 | Reg base; | 210 | Reg base; |
190 | if (ra_noreg(ir->r) && canfuse(as, ir)) { | 211 | if (ra_noreg(ir->r) && canfuse(as, ir)) { |
191 | if (ir->o == IR_ADD) { | 212 | if (ir->o == IR_ADD) { |
192 | int32_t ofs2; | 213 | intptr_t ofs2; |
193 | if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) { | 214 | if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2), |
215 | checki16(ofs2))) { | ||
194 | ref = ir->op1; | 216 | ref = ir->op1; |
195 | ofs = ofs2; | 217 | ofs = (int32_t)ofs2; |
196 | } | 218 | } |
197 | } else if (ir->o == IR_STRREF) { | 219 | } else if (ir->o == IR_STRREF) { |
198 | int32_t ofs2 = 65536; | 220 | intptr_t ofs2 = 65536; |
199 | lua_assert(ofs == 0); | 221 | lj_assertA(ofs == 0, "bad usage"); |
200 | ofs = (int32_t)sizeof(GCstr); | 222 | ofs = (int32_t)sizeof(GCstr); |
201 | if (irref_isk(ir->op2)) { | 223 | if (irref_isk(ir->op2)) { |
202 | ofs2 = ofs + IR(ir->op2)->i; | 224 | ofs2 = ofs + get_kval(as, ir->op2); |
203 | ref = ir->op1; | 225 | ref = ir->op1; |
204 | } else if (irref_isk(ir->op1)) { | 226 | } else if (irref_isk(ir->op1)) { |
205 | ofs2 = ofs + IR(ir->op1)->i; | 227 | ofs2 = ofs + get_kval(as, ir->op1); |
206 | ref = ir->op2; | 228 | ref = ir->op2; |
207 | } | 229 | } |
208 | if (!checki16(ofs2)) { | 230 | if (!checki16(ofs2)) { |
@@ -210,7 +232,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, | |||
210 | Reg right, left = ra_alloc2(as, ir, allow); | 232 | Reg right, left = ra_alloc2(as, ir, allow); |
211 | right = (left >> 8); left &= 255; | 233 | right = (left >> 8); left &= 255; |
212 | emit_hsi(as, mi, rt, RID_TMP, ofs); | 234 | emit_hsi(as, mi, rt, RID_TMP, ofs); |
213 | emit_dst(as, MIPSI_ADDU, RID_TMP, left, right); | 235 | emit_dst(as, MIPSI_AADDU, RID_TMP, left, right); |
214 | return; | 236 | return; |
215 | } | 237 | } |
216 | ofs = ofs2; | 238 | ofs = ofs2; |
@@ -225,29 +247,43 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, | |||
225 | /* Generate a call to a C function. */ | 247 | /* Generate a call to a C function. */ |
226 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | 248 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) |
227 | { | 249 | { |
228 | uint32_t n, nargs = CCI_NARGS(ci); | 250 | uint32_t n, nargs = CCI_XNARGS(ci); |
229 | int32_t ofs = 16; | 251 | int32_t ofs = LJ_32 ? 16 : 0; |
252 | #if LJ_SOFTFP | ||
253 | Reg gpr = REGARG_FIRSTGPR; | ||
254 | #else | ||
230 | Reg gpr, fpr = REGARG_FIRSTFPR; | 255 | Reg gpr, fpr = REGARG_FIRSTFPR; |
256 | #endif | ||
231 | if ((void *)ci->func) | 257 | if ((void *)ci->func) |
232 | emit_call(as, (void *)ci->func); | 258 | emit_call(as, (void *)ci->func, 1); |
259 | #if !LJ_SOFTFP | ||
233 | for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) | 260 | for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) |
234 | as->cost[gpr] = REGCOST(~0u, ASMREF_L); | 261 | as->cost[gpr] = REGCOST(~0u, ASMREF_L); |
235 | gpr = REGARG_FIRSTGPR; | 262 | gpr = REGARG_FIRSTGPR; |
263 | #endif | ||
236 | for (n = 0; n < nargs; n++) { /* Setup args. */ | 264 | for (n = 0; n < nargs; n++) { /* Setup args. */ |
237 | IRRef ref = args[n]; | 265 | IRRef ref = args[n]; |
238 | if (ref) { | 266 | if (ref) { |
239 | IRIns *ir = IR(ref); | 267 | IRIns *ir = IR(ref); |
268 | #if !LJ_SOFTFP | ||
240 | if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && | 269 | if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && |
241 | !(ci->flags & CCI_VARARG)) { | 270 | !(ci->flags & CCI_VARARG)) { |
242 | lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ | 271 | lj_assertA(rset_test(as->freeset, fpr), |
272 | "reg %d not free", fpr); /* Already evicted. */ | ||
243 | ra_leftov(as, fpr, ref); | 273 | ra_leftov(as, fpr, ref); |
244 | fpr += 2; | 274 | fpr += LJ_32 ? 2 : 1; |
245 | gpr += irt_isnum(ir->t) ? 2 : 1; | 275 | gpr += (LJ_32 && irt_isnum(ir->t)) ? 2 : 1; |
246 | } else { | 276 | } else |
277 | #endif | ||
278 | { | ||
279 | #if LJ_32 && !LJ_SOFTFP | ||
247 | fpr = REGARG_LASTFPR+1; | 280 | fpr = REGARG_LASTFPR+1; |
248 | if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1; | 281 | #endif |
282 | if (LJ_32 && irt_isnum(ir->t)) gpr = (gpr+1) & ~1; | ||
249 | if (gpr <= REGARG_LASTGPR) { | 283 | if (gpr <= REGARG_LASTGPR) { |
250 | lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ | 284 | lj_assertA(rset_test(as->freeset, gpr), |
285 | "reg %d not free", gpr); /* Already evicted. */ | ||
286 | #if !LJ_SOFTFP | ||
251 | if (irt_isfp(ir->t)) { | 287 | if (irt_isfp(ir->t)) { |
252 | RegSet of = as->freeset; | 288 | RegSet of = as->freeset; |
253 | Reg r; | 289 | Reg r; |
@@ -256,31 +292,56 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
256 | r = ra_alloc1(as, ref, RSET_FPR); | 292 | r = ra_alloc1(as, ref, RSET_FPR); |
257 | as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); | 293 | as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); |
258 | if (irt_isnum(ir->t)) { | 294 | if (irt_isnum(ir->t)) { |
295 | #if LJ_32 | ||
259 | emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1); | 296 | emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1); |
260 | emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r); | 297 | emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r); |
261 | lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */ | 298 | lj_assertA(rset_test(as->freeset, gpr+1), |
299 | "reg %d not free", gpr+1); /* Already evicted. */ | ||
262 | gpr += 2; | 300 | gpr += 2; |
301 | #else | ||
302 | emit_tg(as, MIPSI_DMFC1, gpr, r); | ||
303 | gpr++; fpr++; | ||
304 | #endif | ||
263 | } else if (irt_isfloat(ir->t)) { | 305 | } else if (irt_isfloat(ir->t)) { |
264 | emit_tg(as, MIPSI_MFC1, gpr, r); | 306 | emit_tg(as, MIPSI_MFC1, gpr, r); |
265 | gpr++; | 307 | gpr++; |
308 | #if LJ_64 | ||
309 | fpr++; | ||
310 | #endif | ||
266 | } | 311 | } |
267 | } else { | 312 | } else |
313 | #endif | ||
314 | { | ||
268 | ra_leftov(as, gpr, ref); | 315 | ra_leftov(as, gpr, ref); |
269 | gpr++; | 316 | gpr++; |
317 | #if LJ_64 && !LJ_SOFTFP | ||
318 | fpr++; | ||
319 | #endif | ||
270 | } | 320 | } |
271 | } else { | 321 | } else { |
272 | Reg r = ra_alloc1z(as, ref, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | 322 | Reg r = ra_alloc1z(as, ref, !LJ_SOFTFP && irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); |
323 | #if LJ_32 | ||
273 | if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; | 324 | if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; |
274 | emit_spstore(as, ir, r, ofs); | 325 | emit_spstore(as, ir, r, ofs); |
275 | ofs += irt_isnum(ir->t) ? 8 : 4; | 326 | ofs += irt_isnum(ir->t) ? 8 : 4; |
327 | #else | ||
328 | emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isfp(ir->t) && !irt_is64(ir->t)) ? 4 : 0)); | ||
329 | ofs += 8; | ||
330 | #endif | ||
276 | } | 331 | } |
277 | } | 332 | } |
278 | } else { | 333 | } else { |
334 | #if !LJ_SOFTFP | ||
279 | fpr = REGARG_LASTFPR+1; | 335 | fpr = REGARG_LASTFPR+1; |
280 | if (gpr <= REGARG_LASTGPR) | 336 | #endif |
337 | if (gpr <= REGARG_LASTGPR) { | ||
281 | gpr++; | 338 | gpr++; |
282 | else | 339 | #if LJ_64 && !LJ_SOFTFP |
283 | ofs += 4; | 340 | fpr++; |
341 | #endif | ||
342 | } else { | ||
343 | ofs += LJ_32 ? 4 : 8; | ||
344 | } | ||
284 | } | 345 | } |
285 | checkmclim(as); | 346 | checkmclim(as); |
286 | } | 347 | } |
@@ -291,28 +352,38 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
291 | { | 352 | { |
292 | RegSet drop = RSET_SCRATCH; | 353 | RegSet drop = RSET_SCRATCH; |
293 | int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); | 354 | int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); |
355 | #if !LJ_SOFTFP | ||
294 | if ((ci->flags & CCI_NOFPRCLOBBER)) | 356 | if ((ci->flags & CCI_NOFPRCLOBBER)) |
295 | drop &= ~RSET_FPR; | 357 | drop &= ~RSET_FPR; |
358 | #endif | ||
296 | if (ra_hasreg(ir->r)) | 359 | if (ra_hasreg(ir->r)) |
297 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 360 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
298 | if (hiop && ra_hasreg((ir+1)->r)) | 361 | if (hiop && ra_hasreg((ir+1)->r)) |
299 | rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ | 362 | rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ |
300 | ra_evictset(as, drop); /* Evictions must be performed first. */ | 363 | ra_evictset(as, drop); /* Evictions must be performed first. */ |
301 | if (ra_used(ir)) { | 364 | if (ra_used(ir)) { |
302 | lua_assert(!irt_ispri(ir->t)); | 365 | lj_assertA(!irt_ispri(ir->t), "PRI dest"); |
303 | if (irt_isfp(ir->t)) { | 366 | if (!LJ_SOFTFP && irt_isfp(ir->t)) { |
304 | if ((ci->flags & CCI_CASTU64)) { | 367 | if ((ci->flags & CCI_CASTU64)) { |
305 | int32_t ofs = sps_scale(ir->s); | 368 | int32_t ofs = sps_scale(ir->s); |
306 | Reg dest = ir->r; | 369 | Reg dest = ir->r; |
307 | if (ra_hasreg(dest)) { | 370 | if (ra_hasreg(dest)) { |
308 | ra_free(as, dest); | 371 | ra_free(as, dest); |
309 | ra_modified(as, dest); | 372 | ra_modified(as, dest); |
373 | #if LJ_32 | ||
310 | emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1); | 374 | emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1); |
311 | emit_tg(as, MIPSI_MTC1, RID_RETLO, dest); | 375 | emit_tg(as, MIPSI_MTC1, RID_RETLO, dest); |
376 | #else | ||
377 | emit_tg(as, MIPSI_DMTC1, RID_RET, dest); | ||
378 | #endif | ||
312 | } | 379 | } |
313 | if (ofs) { | 380 | if (ofs) { |
381 | #if LJ_32 | ||
314 | emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0)); | 382 | emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0)); |
315 | emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4)); | 383 | emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4)); |
384 | #else | ||
385 | emit_tsi(as, MIPSI_SD, RID_RET, RID_SP, ofs); | ||
386 | #endif | ||
316 | } | 387 | } |
317 | } else { | 388 | } else { |
318 | ra_destreg(as, ir, RID_FPRET); | 389 | ra_destreg(as, ir, RID_FPRET); |
@@ -325,15 +396,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
325 | } | 396 | } |
326 | } | 397 | } |
327 | 398 | ||
328 | static void asm_call(ASMState *as, IRIns *ir) | ||
329 | { | ||
330 | IRRef args[CCI_NARGS_MAX]; | ||
331 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
332 | asm_collectargs(as, ir, ci, args); | ||
333 | asm_setupresult(as, ir, ci); | ||
334 | asm_gencall(as, ci, args); | ||
335 | } | ||
336 | |||
337 | static void asm_callx(ASMState *as, IRIns *ir) | 399 | static void asm_callx(ASMState *as, IRIns *ir) |
338 | { | 400 | { |
339 | IRRef args[CCI_NARGS_MAX*2]; | 401 | IRRef args[CCI_NARGS_MAX*2]; |
@@ -346,7 +408,7 @@ static void asm_callx(ASMState *as, IRIns *ir) | |||
346 | func = ir->op2; irf = IR(func); | 408 | func = ir->op2; irf = IR(func); |
347 | if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } | 409 | if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } |
348 | if (irref_isk(func)) { /* Call to constant address. */ | 410 | if (irref_isk(func)) { /* Call to constant address. */ |
349 | ci.func = (ASMFunction)(void *)(irf->i); | 411 | ci.func = (ASMFunction)(void *)get_kval(as, func); |
350 | } else { /* Need specific register for indirect calls. */ | 412 | } else { /* Need specific register for indirect calls. */ |
351 | Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); | 413 | Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); |
352 | MCode *p = as->mcp; | 414 | MCode *p = as->mcp; |
@@ -361,27 +423,23 @@ static void asm_callx(ASMState *as, IRIns *ir) | |||
361 | asm_gencall(as, &ci, args); | 423 | asm_gencall(as, &ci, args); |
362 | } | 424 | } |
363 | 425 | ||
364 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | 426 | #if !LJ_SOFTFP |
365 | { | ||
366 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
367 | IRRef args[2]; | ||
368 | args[0] = ir->op1; | ||
369 | args[1] = ir->op2; | ||
370 | asm_setupresult(as, ir, ci); | ||
371 | asm_gencall(as, ci, args); | ||
372 | } | ||
373 | |||
374 | static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) | 427 | static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) |
375 | { | 428 | { |
376 | /* The modified regs must match with the *.dasc implementation. */ | 429 | /* The modified regs must match with the *.dasc implementation. */ |
377 | RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| | 430 | RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| |
378 | RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR); | 431 | RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR) |
432 | #if LJ_TARGET_MIPSR6 | ||
433 | |RID2RSET(RID_F21) | ||
434 | #endif | ||
435 | ; | ||
379 | if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); | 436 | if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); |
380 | ra_evictset(as, drop); | 437 | ra_evictset(as, drop); |
381 | ra_destreg(as, ir, RID_FPRET); | 438 | ra_destreg(as, ir, RID_FPRET); |
382 | emit_call(as, (void *)lj_ir_callinfo[id].func); | 439 | emit_call(as, (void *)lj_ir_callinfo[id].func, 0); |
383 | ra_leftov(as, REGARG_FIRSTFPR, ir->op1); | 440 | ra_leftov(as, REGARG_FIRSTFPR, ir->op1); |
384 | } | 441 | } |
442 | #endif | ||
385 | 443 | ||
386 | /* -- Returns ------------------------------------------------------------- */ | 444 | /* -- Returns ------------------------------------------------------------- */ |
387 | 445 | ||
@@ -390,25 +448,52 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
390 | { | 448 | { |
391 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | 449 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); |
392 | void *pc = ir_kptr(IR(ir->op2)); | 450 | void *pc = ir_kptr(IR(ir->op2)); |
393 | int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); | 451 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); |
394 | as->topslot -= (BCReg)delta; | 452 | as->topslot -= (BCReg)delta; |
395 | if ((int32_t)as->topslot < 0) as->topslot = 0; | 453 | if ((int32_t)as->topslot < 0) as->topslot = 0; |
396 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | 454 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ |
397 | emit_setgl(as, base, jit_base); | 455 | emit_setgl(as, base, jit_base); |
398 | emit_addptr(as, base, -8*delta); | 456 | emit_addptr(as, base, -8*delta); |
399 | asm_guard(as, MIPSI_BNE, RID_TMP, | 457 | asm_guard(as, MIPSI_BNE, RID_TMP, |
400 | ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); | 458 | ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base))); |
401 | emit_tsi(as, MIPSI_LW, RID_TMP, base, -8); | 459 | emit_tsi(as, MIPSI_AL, RID_TMP, base, -8); |
402 | } | 460 | } |
403 | 461 | ||
462 | /* -- Buffer operations --------------------------------------------------- */ | ||
463 | |||
464 | #if LJ_HASBUFFER | ||
465 | static void asm_bufhdr_write(ASMState *as, Reg sb) | ||
466 | { | ||
467 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); | ||
468 | IRIns irgc; | ||
469 | irgc.ot = IRT(0, IRT_PGC); /* GC type. */ | ||
470 | emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); | ||
471 | if ((as->flags & JIT_F_MIPSXXR2)) { | ||
472 | emit_tsml(as, LJ_64 ? MIPSI_DINS : MIPSI_INS, RID_TMP, tmp, | ||
473 | lj_fls(SBUF_MASK_FLAG), 0); | ||
474 | } else { | ||
475 | emit_dst(as, MIPSI_OR, RID_TMP, RID_TMP, tmp); | ||
476 | emit_tsi(as, MIPSI_ANDI, tmp, tmp, SBUF_MASK_FLAG); | ||
477 | } | ||
478 | emit_getgl(as, RID_TMP, cur_L); | ||
479 | emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); | ||
480 | } | ||
481 | #endif | ||
482 | |||
404 | /* -- Type conversions ---------------------------------------------------- */ | 483 | /* -- Type conversions ---------------------------------------------------- */ |
405 | 484 | ||
485 | #if !LJ_SOFTFP | ||
406 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | 486 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) |
407 | { | 487 | { |
408 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); | 488 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); |
409 | Reg dest = ra_dest(as, ir, RSET_GPR); | 489 | Reg dest = ra_dest(as, ir, RSET_GPR); |
490 | #if !LJ_TARGET_MIPSR6 | ||
410 | asm_guard(as, MIPSI_BC1F, 0, 0); | 491 | asm_guard(as, MIPSI_BC1F, 0, 0); |
411 | emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left); | 492 | emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left); |
493 | #else | ||
494 | asm_guard(as, MIPSI_BC1EQZ, 0, (tmp&31)); | ||
495 | emit_fgh(as, MIPSI_CMP_EQ_D, tmp, tmp, left); | ||
496 | #endif | ||
412 | emit_fg(as, MIPSI_CVT_D_W, tmp, tmp); | 497 | emit_fg(as, MIPSI_CVT_D_W, tmp, tmp); |
413 | emit_tg(as, MIPSI_MFC1, dest, tmp); | 498 | emit_tg(as, MIPSI_MFC1, dest, tmp); |
414 | emit_fg(as, MIPSI_CVT_W_D, tmp, left); | 499 | emit_fg(as, MIPSI_CVT_W_D, tmp, left); |
@@ -424,15 +509,57 @@ static void asm_tobit(ASMState *as, IRIns *ir) | |||
424 | emit_tg(as, MIPSI_MFC1, dest, tmp); | 509 | emit_tg(as, MIPSI_MFC1, dest, tmp); |
425 | emit_fgh(as, MIPSI_ADD_D, tmp, left, right); | 510 | emit_fgh(as, MIPSI_ADD_D, tmp, left, right); |
426 | } | 511 | } |
512 | #elif LJ_64 /* && LJ_SOFTFP */ | ||
513 | static void asm_tointg(ASMState *as, IRIns *ir, Reg r) | ||
514 | { | ||
515 | /* The modified regs must match with the *.dasc implementation. */ | ||
516 | RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)| | ||
517 | RID2RSET(RID_R1)|RID2RSET(RID_R12); | ||
518 | if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); | ||
519 | ra_evictset(as, drop); | ||
520 | /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */ | ||
521 | ra_destreg(as, ir, RID_RET); | ||
522 | asm_guard(as, MIPSI_BNE, RID_RET+1, RID_ZERO); | ||
523 | emit_call(as, (void *)lj_ir_callinfo[IRCALL_lj_vm_tointg].func, 0); | ||
524 | if (r == RID_NONE) | ||
525 | ra_leftov(as, REGARG_FIRSTGPR, ir->op1); | ||
526 | else if (r != REGARG_FIRSTGPR) | ||
527 | emit_move(as, REGARG_FIRSTGPR, r); | ||
528 | } | ||
529 | |||
530 | static void asm_tobit(ASMState *as, IRIns *ir) | ||
531 | { | ||
532 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
533 | emit_dta(as, MIPSI_SLL, dest, dest, 0); | ||
534 | asm_callid(as, ir, IRCALL_lj_vm_tobit); | ||
535 | } | ||
536 | #endif | ||
427 | 537 | ||
428 | static void asm_conv(ASMState *as, IRIns *ir) | 538 | static void asm_conv(ASMState *as, IRIns *ir) |
429 | { | 539 | { |
430 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | 540 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
541 | #if !LJ_SOFTFP32 | ||
431 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); | 542 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); |
543 | #endif | ||
544 | #if LJ_64 | ||
545 | int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); | ||
546 | #endif | ||
432 | IRRef lref = ir->op1; | 547 | IRRef lref = ir->op1; |
433 | lua_assert(irt_type(ir->t) != st); | 548 | #if LJ_32 |
434 | lua_assert(!(irt_isint64(ir->t) || | 549 | /* 64 bit integer conversions are handled by SPLIT. */ |
435 | (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ | 550 | lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)), |
551 | "IR %04d has unsplit 64 bit type", | ||
552 | (int)(ir - as->ir) - REF_BIAS); | ||
553 | #endif | ||
554 | #if LJ_SOFTFP32 | ||
555 | /* FP conversions are handled by SPLIT. */ | ||
556 | lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), | ||
557 | "IR %04d has FP type", | ||
558 | (int)(ir - as->ir) - REF_BIAS); | ||
559 | /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ | ||
560 | #else | ||
561 | lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); | ||
562 | #if !LJ_SOFTFP | ||
436 | if (irt_isfp(ir->t)) { | 563 | if (irt_isfp(ir->t)) { |
437 | Reg dest = ra_dest(as, ir, RSET_FPR); | 564 | Reg dest = ra_dest(as, ir, RSET_FPR); |
438 | if (stfp) { /* FP to FP conversion. */ | 565 | if (stfp) { /* FP to FP conversion. */ |
@@ -448,27 +575,56 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
448 | emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); | 575 | emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); |
449 | emit_fg(as, MIPSI_CVT_D_W, dest, dest); | 576 | emit_fg(as, MIPSI_CVT_D_W, dest, dest); |
450 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), | 577 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), |
451 | (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), | 578 | (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); |
452 | RSET_GPR); | ||
453 | emit_tg(as, MIPSI_MTC1, RID_TMP, dest); | 579 | emit_tg(as, MIPSI_MTC1, RID_TMP, dest); |
454 | emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); | 580 | emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); |
455 | emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); | 581 | emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); |
582 | #if LJ_64 | ||
583 | } else if(st == IRT_U64) { /* U64 to FP conversion. */ | ||
584 | /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */ | ||
585 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
586 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); | ||
587 | MCLabel l_end = emit_label(as); | ||
588 | if (irt_isfloat(ir->t)) { | ||
589 | emit_fgh(as, MIPSI_ADD_S, dest, dest, tmp); | ||
590 | emit_lsptr(as, MIPSI_LWC1, (tmp & 31), (void *)&as->J->k32[LJ_K32_2P63], | ||
591 | rset_exclude(RSET_GPR, left)); | ||
592 | emit_fg(as, MIPSI_CVT_S_L, dest, dest); | ||
593 | } else { | ||
594 | emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); | ||
595 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P63], | ||
596 | rset_exclude(RSET_GPR, left)); | ||
597 | emit_fg(as, MIPSI_CVT_D_L, dest, dest); | ||
598 | } | ||
599 | emit_branch(as, MIPSI_BGEZ, left, RID_ZERO, l_end); | ||
600 | emit_tg(as, MIPSI_DMTC1, RID_TMP, dest); | ||
601 | emit_tsml(as, MIPSI_DEXTM, RID_TMP, left, 30, 0); | ||
602 | #endif | ||
456 | } else { /* Integer to FP conversion. */ | 603 | } else { /* Integer to FP conversion. */ |
457 | Reg left = ra_alloc1(as, lref, RSET_GPR); | 604 | Reg left = ra_alloc1(as, lref, RSET_GPR); |
605 | #if LJ_32 | ||
458 | emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W, | 606 | emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W, |
459 | dest, dest); | 607 | dest, dest); |
460 | emit_tg(as, MIPSI_MTC1, left, dest); | 608 | emit_tg(as, MIPSI_MTC1, left, dest); |
609 | #else | ||
610 | MIPSIns mi = irt_isfloat(ir->t) ? | ||
611 | (st64 ? MIPSI_CVT_S_L : MIPSI_CVT_S_W) : | ||
612 | (st64 ? MIPSI_CVT_D_L : MIPSI_CVT_D_W); | ||
613 | emit_fg(as, mi, dest, dest); | ||
614 | emit_tg(as, st64 ? MIPSI_DMTC1 : MIPSI_MTC1, left, dest); | ||
615 | #endif | ||
461 | } | 616 | } |
462 | } else if (stfp) { /* FP to integer conversion. */ | 617 | } else if (stfp) { /* FP to integer conversion. */ |
463 | if (irt_isguard(ir->t)) { | 618 | if (irt_isguard(ir->t)) { |
464 | /* Checked conversions are only supported from number to int. */ | 619 | /* Checked conversions are only supported from number to int. */ |
465 | lua_assert(irt_isint(ir->t) && st == IRT_NUM); | 620 | lj_assertA(irt_isint(ir->t) && st == IRT_NUM, |
621 | "bad type for checked CONV"); | ||
466 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | 622 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); |
467 | } else { | 623 | } else { |
468 | Reg dest = ra_dest(as, ir, RSET_GPR); | 624 | Reg dest = ra_dest(as, ir, RSET_GPR); |
469 | Reg left = ra_alloc1(as, lref, RSET_FPR); | 625 | Reg left = ra_alloc1(as, lref, RSET_FPR); |
470 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); | 626 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); |
471 | if (irt_isu32(ir->t)) { | 627 | if (irt_isu32(ir->t)) { /* FP to U32 conversion. */ |
472 | /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ | 628 | /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ |
473 | emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP); | 629 | emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP); |
474 | emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); | 630 | emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); |
@@ -479,25 +635,112 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
479 | tmp, left, tmp); | 635 | tmp, left, tmp); |
480 | if (st == IRT_FLOAT) | 636 | if (st == IRT_FLOAT) |
481 | emit_lsptr(as, MIPSI_LWC1, (tmp & 31), | 637 | emit_lsptr(as, MIPSI_LWC1, (tmp & 31), |
482 | (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)), | 638 | (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); |
483 | RSET_GPR); | ||
484 | else | 639 | else |
485 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), | 640 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), |
486 | (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), | 641 | (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); |
487 | RSET_GPR); | 642 | #if LJ_64 |
643 | } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */ | ||
644 | MCLabel l_end; | ||
645 | emit_tg(as, MIPSI_DMFC1, dest, tmp); | ||
646 | l_end = emit_label(as); | ||
647 | /* For inputs >= 2^63 add -2^64 and convert again. */ | ||
648 | if (st == IRT_NUM) { | ||
649 | emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp); | ||
650 | emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp); | ||
651 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), | ||
652 | (void *)&as->J->k64[LJ_K64_M2P64], | ||
653 | rset_exclude(RSET_GPR, dest)); | ||
654 | emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ | ||
655 | #if !LJ_TARGET_MIPSR6 | ||
656 | emit_branch(as, MIPSI_BC1T, 0, 0, l_end); | ||
657 | emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); | ||
658 | #else | ||
659 | emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end); | ||
660 | emit_fgh(as, MIPSI_CMP_LT_D, left, left, tmp); | ||
661 | #endif | ||
662 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), | ||
663 | (void *)&as->J->k64[LJ_K64_2P63], | ||
664 | rset_exclude(RSET_GPR, dest)); | ||
665 | } else { | ||
666 | emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp); | ||
667 | emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp); | ||
668 | emit_lsptr(as, MIPSI_LWC1, (tmp & 31), | ||
669 | (void *)&as->J->k32[LJ_K32_M2P64], | ||
670 | rset_exclude(RSET_GPR, dest)); | ||
671 | emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ | ||
672 | #if !LJ_TARGET_MIPSR6 | ||
673 | emit_branch(as, MIPSI_BC1T, 0, 0, l_end); | ||
674 | emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); | ||
675 | #else | ||
676 | emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end); | ||
677 | emit_fgh(as, MIPSI_CMP_LT_S, left, left, tmp); | ||
678 | #endif | ||
679 | emit_lsptr(as, MIPSI_LWC1, (tmp & 31), | ||
680 | (void *)&as->J->k32[LJ_K32_2P63], | ||
681 | rset_exclude(RSET_GPR, dest)); | ||
682 | } | ||
683 | #endif | ||
488 | } else { | 684 | } else { |
685 | #if LJ_32 | ||
489 | emit_tg(as, MIPSI_MFC1, dest, tmp); | 686 | emit_tg(as, MIPSI_MFC1, dest, tmp); |
490 | emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, | 687 | emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, |
491 | tmp, left); | 688 | tmp, left); |
689 | #else | ||
690 | MIPSIns mi = irt_is64(ir->t) ? | ||
691 | (st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) : | ||
692 | (st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S); | ||
693 | emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left); | ||
694 | emit_fg(as, mi, left, left); | ||
695 | #endif | ||
492 | } | 696 | } |
493 | } | 697 | } |
494 | } else { | 698 | } else |
699 | #else | ||
700 | if (irt_isfp(ir->t)) { | ||
701 | #if LJ_64 && LJ_HASFFI | ||
702 | if (stfp) { /* FP to FP conversion. */ | ||
703 | asm_callid(as, ir, irt_isnum(ir->t) ? IRCALL_softfp_f2d : | ||
704 | IRCALL_softfp_d2f); | ||
705 | } else { /* Integer to FP conversion. */ | ||
706 | IRCallID cid = ((IRT_IS64 >> st) & 1) ? | ||
707 | (irt_isnum(ir->t) ? | ||
708 | (st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d) : | ||
709 | (st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f)) : | ||
710 | (irt_isnum(ir->t) ? | ||
711 | (st == IRT_INT ? IRCALL_softfp_i2d : IRCALL_softfp_ui2d) : | ||
712 | (st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f)); | ||
713 | asm_callid(as, ir, cid); | ||
714 | } | ||
715 | #else | ||
716 | asm_callid(as, ir, IRCALL_softfp_i2d); | ||
717 | #endif | ||
718 | } else if (stfp) { /* FP to integer conversion. */ | ||
719 | if (irt_isguard(ir->t)) { | ||
720 | /* Checked conversions are only supported from number to int. */ | ||
721 | lj_assertA(irt_isint(ir->t) && st == IRT_NUM, | ||
722 | "bad type for checked CONV"); | ||
723 | asm_tointg(as, ir, RID_NONE); | ||
724 | } else { | ||
725 | IRCallID cid = irt_is64(ir->t) ? | ||
726 | ((st == IRT_NUM) ? | ||
727 | (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) : | ||
728 | (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) : | ||
729 | ((st == IRT_NUM) ? | ||
730 | (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : | ||
731 | (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)); | ||
732 | asm_callid(as, ir, cid); | ||
733 | } | ||
734 | } else | ||
735 | #endif | ||
736 | #endif | ||
737 | { | ||
495 | Reg dest = ra_dest(as, ir, RSET_GPR); | 738 | Reg dest = ra_dest(as, ir, RSET_GPR); |
496 | if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | 739 | if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ |
497 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | 740 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); |
498 | lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); | 741 | lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); |
499 | if ((ir->op2 & IRCONV_SEXT)) { | 742 | if ((ir->op2 & IRCONV_SEXT)) { |
500 | if ((as->flags & JIT_F_MIPS32R2)) { | 743 | if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { |
501 | emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); | 744 | emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); |
502 | } else { | 745 | } else { |
503 | uint32_t shift = st == IRT_I8 ? 24 : 16; | 746 | uint32_t shift = st == IRT_I8 ? 24 : 16; |
@@ -509,94 +752,171 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
509 | (int32_t)(st == IRT_U8 ? 0xff : 0xffff)); | 752 | (int32_t)(st == IRT_U8 ? 0xff : 0xffff)); |
510 | } | 753 | } |
511 | } else { /* 32/64 bit integer conversions. */ | 754 | } else { /* 32/64 bit integer conversions. */ |
755 | #if LJ_32 | ||
512 | /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ | 756 | /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ |
513 | ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ | 757 | ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ |
758 | #else | ||
759 | if (irt_is64(ir->t)) { | ||
760 | if (st64) { | ||
761 | /* 64/64 bit no-op (cast)*/ | ||
762 | ra_leftov(as, dest, lref); | ||
763 | } else { | ||
764 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
765 | if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */ | ||
766 | emit_dta(as, MIPSI_SLL, dest, left, 0); | ||
767 | } else { /* 32 to 64 bit zero extension. */ | ||
768 | emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0); | ||
769 | } | ||
770 | } | ||
771 | } else { | ||
772 | if (st64 && !(ir->op2 & IRCONV_NONE)) { | ||
773 | /* This is either a 32 bit reg/reg mov which zeroes the hiword | ||
774 | ** or a load of the loword from a 64 bit address. | ||
775 | */ | ||
776 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
777 | emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0); | ||
778 | } else { /* 32/32 bit no-op (cast). */ | ||
779 | /* Do nothing, but may need to move regs. */ | ||
780 | ra_leftov(as, dest, lref); | ||
781 | } | ||
782 | } | ||
783 | #endif | ||
514 | } | 784 | } |
515 | } | 785 | } |
516 | } | 786 | } |
517 | 787 | ||
518 | #if LJ_HASFFI | ||
519 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
520 | { | ||
521 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
522 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
523 | IRCallID id; | ||
524 | const CCallInfo *ci; | ||
525 | IRRef args[2]; | ||
526 | args[LJ_BE?0:1] = ir->op1; | ||
527 | args[LJ_BE?1:0] = (ir-1)->op1; | ||
528 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
529 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
530 | ir--; | ||
531 | } else { | ||
532 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
533 | } | ||
534 | ci = &lj_ir_callinfo[id]; | ||
535 | asm_setupresult(as, ir, ci); | ||
536 | asm_gencall(as, ci, args); | ||
537 | } | ||
538 | #endif | ||
539 | |||
540 | static void asm_strto(ASMState *as, IRIns *ir) | 788 | static void asm_strto(ASMState *as, IRIns *ir) |
541 | { | 789 | { |
542 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; | 790 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; |
543 | IRRef args[2]; | 791 | IRRef args[2]; |
792 | int32_t ofs = 0; | ||
793 | #if LJ_SOFTFP32 | ||
794 | ra_evictset(as, RSET_SCRATCH); | ||
795 | if (ra_used(ir)) { | ||
796 | if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && | ||
797 | (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { | ||
798 | int i; | ||
799 | for (i = 0; i < 2; i++) { | ||
800 | Reg r = (ir+i)->r; | ||
801 | if (ra_hasreg(r)) { | ||
802 | ra_free(as, r); | ||
803 | ra_modified(as, r); | ||
804 | emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); | ||
805 | } | ||
806 | } | ||
807 | ofs = sps_scale(ir->s & ~1); | ||
808 | } else { | ||
809 | Reg rhi = ra_dest(as, ir+1, RSET_GPR); | ||
810 | Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); | ||
811 | emit_tsi(as, MIPSI_LW, rhi, RID_SP, ofs+(LJ_BE?0:4)); | ||
812 | emit_tsi(as, MIPSI_LW, rlo, RID_SP, ofs+(LJ_BE?4:0)); | ||
813 | } | ||
814 | } | ||
815 | #else | ||
544 | RegSet drop = RSET_SCRATCH; | 816 | RegSet drop = RSET_SCRATCH; |
545 | if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ | 817 | if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ |
546 | ra_evictset(as, drop); | 818 | ra_evictset(as, drop); |
819 | ofs = sps_scale(ir->s); | ||
820 | #endif | ||
547 | asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ | 821 | asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ |
548 | args[0] = ir->op1; /* GCstr *str */ | 822 | args[0] = ir->op1; /* GCstr *str */ |
549 | args[1] = ASMREF_TMP1; /* TValue *n */ | 823 | args[1] = ASMREF_TMP1; /* TValue *n */ |
550 | asm_gencall(as, ci, args); | 824 | asm_gencall(as, ci, args); |
551 | /* Store the result to the spill slot or temp slots. */ | 825 | /* Store the result to the spill slot or temp slots. */ |
552 | emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), | 826 | emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), |
553 | RID_SP, sps_scale(ir->s)); | 827 | RID_SP, ofs); |
554 | } | 828 | } |
555 | 829 | ||
556 | /* Get pointer to TValue. */ | 830 | /* -- Memory references --------------------------------------------------- */ |
557 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | 831 | |
832 | #if LJ_64 | ||
833 | /* Store tagged value for ref at base+ofs. */ | ||
834 | static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) | ||
558 | { | 835 | { |
836 | RegSet allow = rset_exclude(RSET_GPR, base); | ||
559 | IRIns *ir = IR(ref); | 837 | IRIns *ir = IR(ref); |
560 | if (irt_isnum(ir->t)) { | 838 | lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), |
561 | if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ | 839 | "store of IR type %d", irt_type(ir->t)); |
562 | ra_allockreg(as, i32ptr(ir_knum(ir)), dest); | 840 | if (irref_isk(ref)) { |
563 | else /* Otherwise force a spill and use the spill slot. */ | 841 | TValue k; |
564 | emit_tsi(as, MIPSI_ADDIU, dest, RID_SP, ra_spill(as, ir)); | 842 | lj_ir_kvalue(as->J->L, &k, ir); |
843 | emit_tsi(as, MIPSI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs); | ||
565 | } else { | 844 | } else { |
566 | /* Otherwise use g->tmptv to hold the TValue. */ | 845 | Reg src = ra_alloc1(as, ref, allow); |
567 | RegSet allow = rset_exclude(RSET_GPR, dest); | 846 | Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, |
568 | Reg type; | 847 | rset_exclude(allow, src)); |
569 | emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, offsetof(global_State, tmptv)-32768); | 848 | emit_tsi(as, MIPSI_SD, RID_TMP, base, ofs); |
570 | if (!irt_ispri(ir->t)) { | 849 | if (irt_isinteger(ir->t)) { |
571 | Reg src = ra_alloc1(as, ref, allow); | 850 | emit_dst(as, MIPSI_DADDU, RID_TMP, RID_TMP, type); |
572 | emit_setgl(as, src, tmptv.gcr); | 851 | emit_tsml(as, MIPSI_DEXT, RID_TMP, src, 31, 0); |
852 | } else { | ||
853 | emit_dst(as, MIPSI_DADDU, RID_TMP, src, type); | ||
573 | } | 854 | } |
574 | type = ra_allock(as, irt_toitype(ir->t), allow); | ||
575 | emit_setgl(as, type, tmptv.it); | ||
576 | } | 855 | } |
577 | } | 856 | } |
857 | #endif | ||
578 | 858 | ||
579 | static void asm_tostr(ASMState *as, IRIns *ir) | 859 | /* Get pointer to TValue. */ |
860 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) | ||
580 | { | 861 | { |
581 | IRRef args[2]; | 862 | int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768); |
582 | args[0] = ASMREF_L; | 863 | if ((mode & IRTMPREF_IN1)) { |
583 | as->gcsteps++; | 864 | IRIns *ir = IR(ref); |
584 | if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) { | 865 | if (irt_isnum(ir->t)) { |
585 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; | 866 | if ((mode & IRTMPREF_OUT1)) { |
586 | args[1] = ASMREF_TMP1; /* const lua_Number * */ | 867 | #if LJ_SOFTFP |
587 | asm_setupresult(as, ir, ci); /* GCstr * */ | 868 | emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs); |
588 | asm_gencall(as, ci, args); | 869 | #if LJ_64 |
589 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); | 870 | emit_setgl(as, ra_alloc1(as, ref, RSET_GPR), tmptv.u64); |
871 | #else | ||
872 | lj_assertA(irref_isk(ref), "unsplit FP op"); | ||
873 | emit_setgl(as, | ||
874 | ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR), | ||
875 | tmptv.u32.lo); | ||
876 | emit_setgl(as, | ||
877 | ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR), | ||
878 | tmptv.u32.hi); | ||
879 | #endif | ||
880 | #else | ||
881 | Reg src = ra_alloc1(as, ref, RSET_FPR); | ||
882 | emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs); | ||
883 | emit_tsi(as, MIPSI_SDC1, (src & 31), RID_JGL, tmpofs); | ||
884 | #endif | ||
885 | } else if (irref_isk(ref)) { | ||
886 | /* Use the number constant itself as a TValue. */ | ||
887 | ra_allockreg(as, igcptr(ir_knum(ir)), dest); | ||
888 | } else { | ||
889 | #if LJ_SOFTFP32 | ||
890 | lj_assertA(0, "unsplit FP op"); | ||
891 | #else | ||
892 | /* Otherwise force a spill and use the spill slot. */ | ||
893 | emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir)); | ||
894 | #endif | ||
895 | } | ||
896 | } else { | ||
897 | /* Otherwise use g->tmptv to hold the TValue. */ | ||
898 | #if LJ_32 | ||
899 | Reg type; | ||
900 | emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, tmpofs); | ||
901 | if (!irt_ispri(ir->t)) { | ||
902 | Reg src = ra_alloc1(as, ref, RSET_GPR); | ||
903 | emit_setgl(as, src, tmptv.gcr); | ||
904 | } | ||
905 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)) | ||
906 | type = ra_alloc1(as, ref+1, RSET_GPR); | ||
907 | else | ||
908 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), RSET_GPR); | ||
909 | emit_setgl(as, type, tmptv.it); | ||
910 | #else | ||
911 | asm_tvstore64(as, dest, 0, ref); | ||
912 | emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, tmpofs); | ||
913 | #endif | ||
914 | } | ||
590 | } else { | 915 | } else { |
591 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; | 916 | emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs); |
592 | args[1] = ir->op1; /* int32_t k */ | ||
593 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
594 | asm_gencall(as, ci, args); | ||
595 | } | 917 | } |
596 | } | 918 | } |
597 | 919 | ||
598 | /* -- Memory references --------------------------------------------------- */ | ||
599 | |||
600 | static void asm_aref(ASMState *as, IRIns *ir) | 920 | static void asm_aref(ASMState *as, IRIns *ir) |
601 | { | 921 | { |
602 | Reg dest = ra_dest(as, ir, RSET_GPR); | 922 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -608,14 +928,18 @@ static void asm_aref(ASMState *as, IRIns *ir) | |||
608 | ofs += 8*IR(ir->op2)->i; | 928 | ofs += 8*IR(ir->op2)->i; |
609 | if (checki16(ofs)) { | 929 | if (checki16(ofs)) { |
610 | base = ra_alloc1(as, refa, RSET_GPR); | 930 | base = ra_alloc1(as, refa, RSET_GPR); |
611 | emit_tsi(as, MIPSI_ADDIU, dest, base, ofs); | 931 | emit_tsi(as, MIPSI_AADDIU, dest, base, ofs); |
612 | return; | 932 | return; |
613 | } | 933 | } |
614 | } | 934 | } |
615 | base = ra_alloc1(as, ir->op1, RSET_GPR); | 935 | base = ra_alloc1(as, ir->op1, RSET_GPR); |
616 | idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); | 936 | idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); |
617 | emit_dst(as, MIPSI_ADDU, dest, RID_TMP, base); | 937 | #if !LJ_TARGET_MIPSR6 |
938 | emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base); | ||
618 | emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); | 939 | emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); |
940 | #else | ||
941 | emit_dst(as, MIPSI_ALSA | MIPSF_A(3-1), dest, idx, base); | ||
942 | #endif | ||
619 | } | 943 | } |
620 | 944 | ||
621 | /* Inlined hash lookup. Specialized for key type and for const keys. | 945 | /* Inlined hash lookup. Specialized for key type and for const keys. |
@@ -626,21 +950,25 @@ static void asm_aref(ASMState *as, IRIns *ir) | |||
626 | ** } while ((n = nextnode(n))); | 950 | ** } while ((n = nextnode(n))); |
627 | ** return niltv(L); | 951 | ** return niltv(L); |
628 | */ | 952 | */ |
629 | static void asm_href(ASMState *as, IRIns *ir) | 953 | static void asm_href(ASMState *as, IRIns *ir, IROp merge) |
630 | { | 954 | { |
631 | RegSet allow = RSET_GPR; | 955 | RegSet allow = RSET_GPR; |
632 | int destused = ra_used(ir); | 956 | int destused = ra_used(ir); |
633 | Reg dest = ra_dest(as, ir, allow); | 957 | Reg dest = ra_dest(as, ir, allow); |
634 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); | 958 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); |
635 | Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; | 959 | Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; |
960 | #if LJ_64 | ||
961 | Reg cmp64 = RID_NONE; | ||
962 | #endif | ||
636 | IRRef refkey = ir->op2; | 963 | IRRef refkey = ir->op2; |
637 | IRIns *irkey = IR(refkey); | 964 | IRIns *irkey = IR(refkey); |
965 | int isk = irref_isk(refkey); | ||
638 | IRType1 kt = irkey->t; | 966 | IRType1 kt = irkey->t; |
639 | uint32_t khash; | 967 | uint32_t khash; |
640 | MCLabel l_end, l_loop, l_next; | 968 | MCLabel l_end, l_loop, l_next; |
641 | 969 | ||
642 | rset_clear(allow, tab); | 970 | rset_clear(allow, tab); |
643 | if (irt_isnum(kt)) { | 971 | if (!LJ_SOFTFP && irt_isnum(kt)) { |
644 | key = ra_alloc1(as, refkey, RSET_FPR); | 972 | key = ra_alloc1(as, refkey, RSET_FPR); |
645 | tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); | 973 | tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); |
646 | } else { | 974 | } else { |
@@ -648,31 +976,76 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
648 | key = ra_alloc1(as, refkey, allow); | 976 | key = ra_alloc1(as, refkey, allow); |
649 | rset_clear(allow, key); | 977 | rset_clear(allow, key); |
650 | } | 978 | } |
651 | type = ra_allock(as, irt_toitype(irkey->t), allow); | 979 | #if LJ_32 |
652 | rset_clear(allow, type); | 980 | if (LJ_SOFTFP && irkey[1].o == IR_HIOP) { |
981 | if (ra_hasreg((irkey+1)->r)) { | ||
982 | type = tmpnum = (irkey+1)->r; | ||
983 | tmp1 = ra_scratch(as, allow); | ||
984 | rset_clear(allow, tmp1); | ||
985 | ra_noweak(as, tmpnum); | ||
986 | } else { | ||
987 | type = tmpnum = ra_allocref(as, refkey+1, allow); | ||
988 | } | ||
989 | rset_clear(allow, tmpnum); | ||
990 | } else { | ||
991 | type = ra_allock(as, (int32_t)irt_toitype(kt), allow); | ||
992 | rset_clear(allow, type); | ||
993 | } | ||
994 | #endif | ||
653 | } | 995 | } |
654 | tmp2 = ra_scratch(as, allow); | 996 | tmp2 = ra_scratch(as, allow); |
655 | rset_clear(allow, tmp2); | 997 | rset_clear(allow, tmp2); |
998 | #if LJ_64 | ||
999 | if (LJ_SOFTFP || !irt_isnum(kt)) { | ||
1000 | /* Allocate cmp64 register used for 64-bit comparisons */ | ||
1001 | if (LJ_SOFTFP && irt_isnum(kt)) { | ||
1002 | cmp64 = key; | ||
1003 | } else if (!isk && irt_isaddr(kt)) { | ||
1004 | cmp64 = tmp2; | ||
1005 | } else { | ||
1006 | int64_t k; | ||
1007 | if (isk && irt_isaddr(kt)) { | ||
1008 | k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; | ||
1009 | } else { | ||
1010 | lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); | ||
1011 | k = ~((int64_t)~irt_toitype(kt) << 47); | ||
1012 | } | ||
1013 | cmp64 = ra_allock(as, k, allow); | ||
1014 | rset_clear(allow, cmp64); | ||
1015 | } | ||
1016 | } | ||
1017 | #endif | ||
656 | 1018 | ||
657 | /* Key not found in chain: load niltv. */ | 1019 | /* Key not found in chain: jump to exit (if merged) or load niltv. */ |
658 | l_end = emit_label(as); | 1020 | l_end = emit_label(as); |
659 | if (destused) | 1021 | as->invmcp = NULL; |
1022 | if (merge == IR_NE) | ||
1023 | asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO); | ||
1024 | else if (destused) | ||
660 | emit_loada(as, dest, niltvg(J2G(as->J))); | 1025 | emit_loada(as, dest, niltvg(J2G(as->J))); |
661 | else | ||
662 | *--as->mcp = MIPSI_NOP; | ||
663 | /* Follow hash chain until the end. */ | 1026 | /* Follow hash chain until the end. */ |
664 | emit_move(as, dest, tmp1); | 1027 | emit_move(as, dest, tmp1); |
665 | l_loop = --as->mcp; | 1028 | l_loop = --as->mcp; |
666 | emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, next)); | 1029 | emit_tsi(as, MIPSI_AL, tmp1, dest, (int32_t)offsetof(Node, next)); |
667 | l_next = emit_label(as); | 1030 | l_next = emit_label(as); |
668 | 1031 | ||
669 | /* Type and value comparison. */ | 1032 | /* Type and value comparison. */ |
670 | if (irt_isnum(kt)) { | 1033 | if (merge == IR_EQ) { /* Must match asm_guard(). */ |
1034 | emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); | ||
1035 | l_end = asm_exitstub_addr(as); | ||
1036 | } | ||
1037 | if (!LJ_SOFTFP && irt_isnum(kt)) { | ||
1038 | #if !LJ_TARGET_MIPSR6 | ||
671 | emit_branch(as, MIPSI_BC1T, 0, 0, l_end); | 1039 | emit_branch(as, MIPSI_BC1T, 0, 0, l_end); |
672 | emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); | 1040 | emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); |
673 | emit_tg(as, MIPSI_MFC1, tmp1, key+1); | 1041 | #else |
1042 | emit_branch(as, MIPSI_BC1NEZ, 0, (tmpnum&31), l_end); | ||
1043 | emit_fgh(as, MIPSI_CMP_EQ_D, tmpnum, tmpnum, key); | ||
1044 | #endif | ||
1045 | *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */ | ||
674 | emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); | 1046 | emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); |
675 | emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); | 1047 | emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); |
1048 | #if LJ_32 | ||
676 | emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n)); | 1049 | emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n)); |
677 | } else { | 1050 | } else { |
678 | if (irt_ispri(kt)) { | 1051 | if (irt_ispri(kt)) { |
@@ -685,36 +1058,52 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
685 | } | 1058 | } |
686 | emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it)); | 1059 | emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it)); |
687 | *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); | 1060 | *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); |
1061 | #else | ||
1062 | emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15); | ||
1063 | emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum); | ||
1064 | emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); | ||
1065 | } else { | ||
1066 | emit_branch(as, MIPSI_BEQ, tmp1, cmp64, l_end); | ||
1067 | emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); | ||
1068 | } | ||
1069 | *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); | ||
1070 | if (!isk && irt_isaddr(kt)) { | ||
1071 | type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow); | ||
1072 | emit_dst(as, MIPSI_DADDU, tmp2, key, type); | ||
1073 | rset_clear(allow, type); | ||
1074 | } | ||
1075 | #endif | ||
688 | 1076 | ||
689 | /* Load main position relative to tab->node into dest. */ | 1077 | /* Load main position relative to tab->node into dest. */ |
690 | khash = irref_isk(refkey) ? ir_khash(irkey) : 1; | 1078 | khash = isk ? ir_khash(as, irkey) : 1; |
691 | if (khash == 0) { | 1079 | if (khash == 0) { |
692 | emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); | 1080 | emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); |
693 | } else { | 1081 | } else { |
694 | Reg tmphash = tmp1; | 1082 | Reg tmphash = tmp1; |
695 | if (irref_isk(refkey)) | 1083 | if (isk) |
696 | tmphash = ra_allock(as, khash, allow); | 1084 | tmphash = ra_allock(as, khash, allow); |
697 | emit_dst(as, MIPSI_ADDU, dest, dest, tmp1); | 1085 | emit_dst(as, MIPSI_AADDU, dest, dest, tmp1); |
698 | lua_assert(sizeof(Node) == 24); | 1086 | lj_assertA(sizeof(Node) == 24, "bad Node size"); |
699 | emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1); | 1087 | emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1); |
700 | emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3); | 1088 | emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3); |
701 | emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5); | 1089 | emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5); |
702 | emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash); | 1090 | emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash); |
703 | emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); | 1091 | emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); |
704 | emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); | 1092 | emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); |
705 | if (irref_isk(refkey)) { | 1093 | if (isk) { |
706 | /* Nothing to do. */ | 1094 | /* Nothing to do. */ |
707 | } else if (irt_isstr(kt)) { | 1095 | } else if (irt_isstr(kt)) { |
708 | emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash)); | 1096 | emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid)); |
709 | } else { /* Must match with hash*() in lj_tab.c. */ | 1097 | } else { /* Must match with hash*() in lj_tab.c. */ |
710 | emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2); | 1098 | emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2); |
711 | emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31); | 1099 | emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31); |
712 | emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2); | 1100 | emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2); |
713 | emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31); | 1101 | emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31); |
714 | emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest); | 1102 | emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest); |
715 | if (irt_isnum(kt)) { | 1103 | #if LJ_32 |
1104 | if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { | ||
716 | emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); | 1105 | emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); |
717 | if ((as->flags & JIT_F_MIPS32R2)) { | 1106 | if ((as->flags & JIT_F_MIPSXXR2)) { |
718 | emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); | 1107 | emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); |
719 | } else { | 1108 | } else { |
720 | emit_dst(as, MIPSI_OR, dest, dest, tmp1); | 1109 | emit_dst(as, MIPSI_OR, dest, dest, tmp1); |
@@ -722,13 +1111,35 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
722 | emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31); | 1111 | emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31); |
723 | } | 1112 | } |
724 | emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); | 1113 | emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); |
1114 | #if LJ_SOFTFP | ||
1115 | emit_ds(as, MIPSI_MOVE, tmp1, type); | ||
1116 | emit_ds(as, MIPSI_MOVE, tmp2, key); | ||
1117 | #else | ||
725 | emit_tg(as, MIPSI_MFC1, tmp2, key); | 1118 | emit_tg(as, MIPSI_MFC1, tmp2, key); |
726 | emit_tg(as, MIPSI_MFC1, tmp1, key+1); | 1119 | emit_tg(as, MIPSI_MFC1, tmp1, key+1); |
1120 | #endif | ||
727 | } else { | 1121 | } else { |
728 | emit_dst(as, MIPSI_XOR, tmp2, key, tmp1); | 1122 | emit_dst(as, MIPSI_XOR, tmp2, key, tmp1); |
729 | emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31); | 1123 | emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31); |
730 | emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow)); | 1124 | emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow)); |
731 | } | 1125 | } |
1126 | #else | ||
1127 | emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); | ||
1128 | emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); | ||
1129 | if (irt_isnum(kt)) { | ||
1130 | emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); | ||
1131 | emit_dta(as, MIPSI_DSRA32, tmp1, LJ_SOFTFP ? key : tmp1, 0); | ||
1132 | emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0); | ||
1133 | #if !LJ_SOFTFP | ||
1134 | emit_tg(as, MIPSI_DMFC1, tmp1, key); | ||
1135 | #endif | ||
1136 | } else { | ||
1137 | checkmclim(as); | ||
1138 | emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0); | ||
1139 | emit_dta(as, MIPSI_SLL, tmp2, key, 0); | ||
1140 | emit_dst(as, MIPSI_DADDU, tmp1, key, type); | ||
1141 | } | ||
1142 | #endif | ||
732 | } | 1143 | } |
733 | } | 1144 | } |
734 | } | 1145 | } |
@@ -741,17 +1152,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
741 | int32_t kofs = ofs + (int32_t)offsetof(Node, key); | 1152 | int32_t kofs = ofs + (int32_t)offsetof(Node, key); |
742 | Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; | 1153 | Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; |
743 | Reg node = ra_alloc1(as, ir->op1, RSET_GPR); | 1154 | Reg node = ra_alloc1(as, ir->op1, RSET_GPR); |
744 | Reg key = RID_NONE, type = RID_TMP, idx = node; | ||
745 | RegSet allow = rset_exclude(RSET_GPR, node); | 1155 | RegSet allow = rset_exclude(RSET_GPR, node); |
1156 | Reg idx = node; | ||
1157 | #if LJ_32 | ||
1158 | Reg key = RID_NONE, type = RID_TMP; | ||
746 | int32_t lo, hi; | 1159 | int32_t lo, hi; |
747 | lua_assert(ofs % sizeof(Node) == 0); | 1160 | #else |
1161 | Reg key = ra_scratch(as, allow); | ||
1162 | int64_t k; | ||
1163 | #endif | ||
1164 | lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); | ||
748 | if (ofs > 32736) { | 1165 | if (ofs > 32736) { |
749 | idx = dest; | 1166 | idx = dest; |
750 | rset_clear(allow, dest); | 1167 | rset_clear(allow, dest); |
751 | kofs = (int32_t)offsetof(Node, key); | 1168 | kofs = (int32_t)offsetof(Node, key); |
752 | } else if (ra_hasreg(dest)) { | 1169 | } else if (ra_hasreg(dest)) { |
753 | emit_tsi(as, MIPSI_ADDIU, dest, node, ofs); | 1170 | emit_tsi(as, MIPSI_AADDIU, dest, node, ofs); |
754 | } | 1171 | } |
1172 | #if LJ_32 | ||
755 | if (!irt_ispri(irkey->t)) { | 1173 | if (!irt_ispri(irkey->t)) { |
756 | key = ra_scratch(as, allow); | 1174 | key = ra_scratch(as, allow); |
757 | rset_clear(allow, key); | 1175 | rset_clear(allow, key); |
@@ -770,22 +1188,20 @@ nolo: | |||
770 | asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO); | 1188 | asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO); |
771 | if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0)); | 1189 | if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0)); |
772 | emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4)); | 1190 | emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4)); |
773 | if (ofs > 32736) | 1191 | #else |
774 | emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow)); | 1192 | if (irt_ispri(irkey->t)) { |
775 | } | 1193 | lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); |
776 | 1194 | k = ~((int64_t)~irt_toitype(irkey->t) << 47); | |
777 | static void asm_newref(ASMState *as, IRIns *ir) | 1195 | } else if (irt_isnum(irkey->t)) { |
778 | { | 1196 | k = (int64_t)ir_knum(irkey)->u64; |
779 | if (ir->r != RID_SINK) { | 1197 | } else { |
780 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | 1198 | k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); |
781 | IRRef args[3]; | ||
782 | args[0] = ASMREF_L; /* lua_State *L */ | ||
783 | args[1] = ir->op1; /* GCtab *t */ | ||
784 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
785 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
786 | asm_gencall(as, ci, args); | ||
787 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | ||
788 | } | 1199 | } |
1200 | asm_guard(as, MIPSI_BNE, key, ra_allock(as, k, allow)); | ||
1201 | emit_tsi(as, MIPSI_LD, key, idx, kofs); | ||
1202 | #endif | ||
1203 | if (ofs > 32736) | ||
1204 | emit_tsi(as, MIPSI_AADDU, dest, node, ra_allock(as, ofs, allow)); | ||
789 | } | 1205 | } |
790 | 1206 | ||
791 | static void asm_uref(ASMState *as, IRIns *ir) | 1207 | static void asm_uref(ASMState *as, IRIns *ir) |
@@ -794,30 +1210,31 @@ static void asm_uref(ASMState *as, IRIns *ir) | |||
794 | if (irref_isk(ir->op1)) { | 1210 | if (irref_isk(ir->op1)) { |
795 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | 1211 | GCfunc *fn = ir_kfunc(IR(ir->op1)); |
796 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; | 1212 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; |
797 | emit_lsptr(as, MIPSI_LW, dest, v, RSET_GPR); | 1213 | emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR); |
798 | } else { | 1214 | } else { |
799 | Reg uv = ra_scratch(as, RSET_GPR); | 1215 | Reg uv = ra_scratch(as, RSET_GPR); |
800 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); | 1216 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); |
801 | if (ir->o == IR_UREFC) { | 1217 | if (ir->o == IR_UREFC) { |
802 | asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); | 1218 | asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); |
803 | emit_tsi(as, MIPSI_ADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); | 1219 | emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); |
804 | emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); | 1220 | emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); |
805 | } else { | 1221 | } else { |
806 | emit_tsi(as, MIPSI_LW, dest, uv, (int32_t)offsetof(GCupval, v)); | 1222 | emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v)); |
807 | } | 1223 | } |
808 | emit_tsi(as, MIPSI_LW, uv, func, | 1224 | emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) + |
809 | (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); | 1225 | (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); |
810 | } | 1226 | } |
811 | } | 1227 | } |
812 | 1228 | ||
813 | static void asm_fref(ASMState *as, IRIns *ir) | 1229 | static void asm_fref(ASMState *as, IRIns *ir) |
814 | { | 1230 | { |
815 | UNUSED(as); UNUSED(ir); | 1231 | UNUSED(as); UNUSED(ir); |
816 | lua_assert(!ra_used(ir)); | 1232 | lj_assertA(!ra_used(ir), "unfused FREF"); |
817 | } | 1233 | } |
818 | 1234 | ||
819 | static void asm_strref(ASMState *as, IRIns *ir) | 1235 | static void asm_strref(ASMState *as, IRIns *ir) |
820 | { | 1236 | { |
1237 | #if LJ_32 | ||
821 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1238 | Reg dest = ra_dest(as, ir, RSET_GPR); |
822 | IRRef ref = ir->op2, refk = ir->op1; | 1239 | IRRef ref = ir->op2, refk = ir->op1; |
823 | int32_t ofs = (int32_t)sizeof(GCstr); | 1240 | int32_t ofs = (int32_t)sizeof(GCstr); |
@@ -849,49 +1266,79 @@ static void asm_strref(ASMState *as, IRIns *ir) | |||
849 | else | 1266 | else |
850 | emit_dst(as, MIPSI_ADDU, dest, r, | 1267 | emit_dst(as, MIPSI_ADDU, dest, r, |
851 | ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); | 1268 | ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); |
1269 | #else | ||
1270 | RegSet allow = RSET_GPR; | ||
1271 | Reg dest = ra_dest(as, ir, allow); | ||
1272 | Reg base = ra_alloc1(as, ir->op1, allow); | ||
1273 | IRIns *irr = IR(ir->op2); | ||
1274 | int32_t ofs = sizeof(GCstr); | ||
1275 | rset_clear(allow, base); | ||
1276 | if (irref_isk(ir->op2) && checki16(ofs + irr->i)) { | ||
1277 | emit_tsi(as, MIPSI_DADDIU, dest, base, ofs + irr->i); | ||
1278 | } else { | ||
1279 | emit_tsi(as, MIPSI_DADDIU, dest, dest, ofs); | ||
1280 | emit_dst(as, MIPSI_DADDU, dest, base, ra_alloc1(as, ir->op2, allow)); | ||
1281 | } | ||
1282 | #endif | ||
852 | } | 1283 | } |
853 | 1284 | ||
854 | /* -- Loads and stores ---------------------------------------------------- */ | 1285 | /* -- Loads and stores ---------------------------------------------------- */ |
855 | 1286 | ||
856 | static MIPSIns asm_fxloadins(IRIns *ir) | 1287 | static MIPSIns asm_fxloadins(ASMState *as, IRIns *ir) |
857 | { | 1288 | { |
1289 | UNUSED(as); | ||
858 | switch (irt_type(ir->t)) { | 1290 | switch (irt_type(ir->t)) { |
859 | case IRT_I8: return MIPSI_LB; | 1291 | case IRT_I8: return MIPSI_LB; |
860 | case IRT_U8: return MIPSI_LBU; | 1292 | case IRT_U8: return MIPSI_LBU; |
861 | case IRT_I16: return MIPSI_LH; | 1293 | case IRT_I16: return MIPSI_LH; |
862 | case IRT_U16: return MIPSI_LHU; | 1294 | case IRT_U16: return MIPSI_LHU; |
863 | case IRT_NUM: return MIPSI_LDC1; | 1295 | case IRT_NUM: |
864 | case IRT_FLOAT: return MIPSI_LWC1; | 1296 | lj_assertA(!LJ_SOFTFP32, "unsplit FP op"); |
865 | default: return MIPSI_LW; | 1297 | if (!LJ_SOFTFP) return MIPSI_LDC1; |
1298 | /* fallthrough */ | ||
1299 | case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1; | ||
1300 | /* fallthrough */ | ||
1301 | default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW; | ||
866 | } | 1302 | } |
867 | } | 1303 | } |
868 | 1304 | ||
869 | static MIPSIns asm_fxstoreins(IRIns *ir) | 1305 | static MIPSIns asm_fxstoreins(ASMState *as, IRIns *ir) |
870 | { | 1306 | { |
1307 | UNUSED(as); | ||
871 | switch (irt_type(ir->t)) { | 1308 | switch (irt_type(ir->t)) { |
872 | case IRT_I8: case IRT_U8: return MIPSI_SB; | 1309 | case IRT_I8: case IRT_U8: return MIPSI_SB; |
873 | case IRT_I16: case IRT_U16: return MIPSI_SH; | 1310 | case IRT_I16: case IRT_U16: return MIPSI_SH; |
874 | case IRT_NUM: return MIPSI_SDC1; | 1311 | case IRT_NUM: |
875 | case IRT_FLOAT: return MIPSI_SWC1; | 1312 | lj_assertA(!LJ_SOFTFP32, "unsplit FP op"); |
876 | default: return MIPSI_SW; | 1313 | if (!LJ_SOFTFP) return MIPSI_SDC1; |
1314 | /* fallthrough */ | ||
1315 | case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1; | ||
1316 | /* fallthrough */ | ||
1317 | default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW; | ||
877 | } | 1318 | } |
878 | } | 1319 | } |
879 | 1320 | ||
880 | static void asm_fload(ASMState *as, IRIns *ir) | 1321 | static void asm_fload(ASMState *as, IRIns *ir) |
881 | { | 1322 | { |
882 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1323 | Reg dest = ra_dest(as, ir, RSET_GPR); |
883 | Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); | 1324 | MIPSIns mi = asm_fxloadins(as, ir); |
884 | MIPSIns mi = asm_fxloadins(ir); | 1325 | Reg idx; |
885 | int32_t ofs; | 1326 | int32_t ofs; |
886 | if (ir->op2 == IRFL_TAB_ARRAY) { | 1327 | if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ |
887 | ofs = asm_fuseabase(as, ir->op1); | 1328 | idx = RID_JGL; |
888 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | 1329 | ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); |
889 | emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs); | 1330 | } else { |
890 | return; | 1331 | idx = ra_alloc1(as, ir->op1, RSET_GPR); |
1332 | if (ir->op2 == IRFL_TAB_ARRAY) { | ||
1333 | ofs = asm_fuseabase(as, ir->op1); | ||
1334 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | ||
1335 | emit_tsi(as, MIPSI_AADDIU, dest, idx, ofs); | ||
1336 | return; | ||
1337 | } | ||
891 | } | 1338 | } |
1339 | ofs = field_ofs[ir->op2]; | ||
892 | } | 1340 | } |
893 | ofs = field_ofs[ir->op2]; | 1341 | lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD"); |
894 | lua_assert(!irt_isfp(ir->t)); | ||
895 | emit_tsi(as, mi, dest, idx, ofs); | 1342 | emit_tsi(as, mi, dest, idx, ofs); |
896 | } | 1343 | } |
897 | 1344 | ||
@@ -902,51 +1349,90 @@ static void asm_fstore(ASMState *as, IRIns *ir) | |||
902 | IRIns *irf = IR(ir->op1); | 1349 | IRIns *irf = IR(ir->op1); |
903 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); | 1350 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); |
904 | int32_t ofs = field_ofs[irf->op2]; | 1351 | int32_t ofs = field_ofs[irf->op2]; |
905 | MIPSIns mi = asm_fxstoreins(ir); | 1352 | MIPSIns mi = asm_fxstoreins(as, ir); |
906 | lua_assert(!irt_isfp(ir->t)); | 1353 | lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE"); |
907 | emit_tsi(as, mi, src, idx, ofs); | 1354 | emit_tsi(as, mi, src, idx, ofs); |
908 | } | 1355 | } |
909 | } | 1356 | } |
910 | 1357 | ||
911 | static void asm_xload(ASMState *as, IRIns *ir) | 1358 | static void asm_xload(ASMState *as, IRIns *ir) |
912 | { | 1359 | { |
913 | Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | 1360 | Reg dest = ra_dest(as, ir, |
914 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); | 1361 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); |
915 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); | 1362 | lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED), |
1363 | "unaligned XLOAD"); | ||
1364 | asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); | ||
916 | } | 1365 | } |
917 | 1366 | ||
918 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | 1367 | static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) |
919 | { | 1368 | { |
920 | if (ir->r != RID_SINK) { | 1369 | if (ir->r != RID_SINK) { |
921 | Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | 1370 | Reg src = ra_alloc1z(as, ir->op2, |
922 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | 1371 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); |
1372 | asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, | ||
923 | rset_exclude(RSET_GPR, src), ofs); | 1373 | rset_exclude(RSET_GPR, src), ofs); |
924 | } | 1374 | } |
925 | } | 1375 | } |
926 | 1376 | ||
1377 | #define asm_xstore(as, ir) asm_xstore_(as, ir, 0) | ||
1378 | |||
927 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 1379 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
928 | { | 1380 | { |
929 | IRType1 t = ir->t; | 1381 | int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP); |
930 | Reg dest = RID_NONE, type = RID_TMP, idx; | 1382 | Reg dest = RID_NONE, type = RID_TMP, idx; |
931 | RegSet allow = RSET_GPR; | 1383 | RegSet allow = RSET_GPR; |
932 | int32_t ofs = 0; | 1384 | int32_t ofs = 0; |
1385 | IRType1 t = ir->t; | ||
1386 | if (hiop) { | ||
1387 | t.irt = IRT_NUM; | ||
1388 | if (ra_used(ir+1)) { | ||
1389 | type = ra_dest(as, ir+1, allow); | ||
1390 | rset_clear(allow, type); | ||
1391 | } | ||
1392 | } | ||
933 | if (ra_used(ir)) { | 1393 | if (ra_used(ir)) { |
934 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 1394 | lj_assertA((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || |
935 | dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); | 1395 | irt_isint(ir->t) || irt_isaddr(ir->t), |
1396 | "bad load type %d", irt_type(ir->t)); | ||
1397 | dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); | ||
936 | rset_clear(allow, dest); | 1398 | rset_clear(allow, dest); |
1399 | #if LJ_64 | ||
1400 | if (irt_isaddr(t)) | ||
1401 | emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0); | ||
1402 | else if (irt_isint(t)) | ||
1403 | emit_dta(as, MIPSI_SLL, dest, dest, 0); | ||
1404 | #endif | ||
937 | } | 1405 | } |
938 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | 1406 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); |
1407 | if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; | ||
939 | rset_clear(allow, idx); | 1408 | rset_clear(allow, idx); |
940 | if (irt_isnum(t)) { | 1409 | if (irt_isnum(t)) { |
941 | asm_guard(as, MIPSI_BEQ, type, RID_ZERO); | 1410 | asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); |
942 | emit_tsi(as, MIPSI_SLTIU, type, type, (int32_t)LJ_TISNUM); | 1411 | emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); |
943 | if (ra_hasreg(dest)) | ||
944 | emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); | ||
945 | } else { | 1412 | } else { |
946 | asm_guard(as, MIPSI_BNE, type, ra_allock(as, irt_toitype(t), allow)); | 1413 | asm_guard(as, MIPSI_BNE, type, |
947 | if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0)); | 1414 | ra_allock(as, (int32_t)irt_toitype(t), allow)); |
1415 | } | ||
1416 | #if LJ_32 | ||
1417 | if (ra_hasreg(dest)) { | ||
1418 | if (!LJ_SOFTFP && irt_isnum(t)) | ||
1419 | emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); | ||
1420 | else | ||
1421 | emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0)); | ||
948 | } | 1422 | } |
949 | emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4)); | 1423 | emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4)); |
1424 | #else | ||
1425 | if (ra_hasreg(dest)) { | ||
1426 | if (!LJ_SOFTFP && irt_isnum(t)) { | ||
1427 | emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); | ||
1428 | dest = type; | ||
1429 | } | ||
1430 | } else { | ||
1431 | dest = type; | ||
1432 | } | ||
1433 | emit_dta(as, MIPSI_DSRA32, type, dest, 15); | ||
1434 | emit_tsi(as, MIPSI_LD, dest, idx, ofs); | ||
1435 | #endif | ||
950 | } | 1436 | } |
951 | 1437 | ||
952 | static void asm_ahustore(ASMState *as, IRIns *ir) | 1438 | static void asm_ahustore(ASMState *as, IRIns *ir) |
@@ -956,81 +1442,180 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
956 | int32_t ofs = 0; | 1442 | int32_t ofs = 0; |
957 | if (ir->r == RID_SINK) | 1443 | if (ir->r == RID_SINK) |
958 | return; | 1444 | return; |
959 | if (irt_isnum(ir->t)) { | 1445 | if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { |
960 | src = ra_alloc1(as, ir->op2, RSET_FPR); | 1446 | src = ra_alloc1(as, ir->op2, LJ_SOFTFP ? RSET_GPR : RSET_FPR); |
1447 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | ||
1448 | emit_hsi(as, LJ_SOFTFP ? MIPSI_SD : MIPSI_SDC1, src, idx, ofs); | ||
961 | } else { | 1449 | } else { |
1450 | #if LJ_32 | ||
962 | if (!irt_ispri(ir->t)) { | 1451 | if (!irt_ispri(ir->t)) { |
963 | src = ra_alloc1(as, ir->op2, allow); | 1452 | src = ra_alloc1(as, ir->op2, allow); |
964 | rset_clear(allow, src); | 1453 | rset_clear(allow, src); |
965 | } | 1454 | } |
966 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | 1455 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) |
1456 | type = ra_alloc1(as, (ir+1)->op2, allow); | ||
1457 | else | ||
1458 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
967 | rset_clear(allow, type); | 1459 | rset_clear(allow, type); |
968 | } | 1460 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); |
969 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | ||
970 | if (irt_isnum(ir->t)) { | ||
971 | emit_hsi(as, MIPSI_SDC1, src, idx, ofs); | ||
972 | } else { | ||
973 | if (ra_hasreg(src)) | 1461 | if (ra_hasreg(src)) |
974 | emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0)); | 1462 | emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0)); |
975 | emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4)); | 1463 | emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4)); |
1464 | #else | ||
1465 | Reg tmp = RID_TMP; | ||
1466 | if (irt_ispri(ir->t)) { | ||
1467 | tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); | ||
1468 | rset_clear(allow, tmp); | ||
1469 | } else { | ||
1470 | src = ra_alloc1(as, ir->op2, allow); | ||
1471 | rset_clear(allow, src); | ||
1472 | type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); | ||
1473 | rset_clear(allow, type); | ||
1474 | } | ||
1475 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | ||
1476 | emit_tsi(as, MIPSI_SD, tmp, idx, ofs); | ||
1477 | if (ra_hasreg(src)) { | ||
1478 | if (irt_isinteger(ir->t)) { | ||
1479 | emit_dst(as, MIPSI_DADDU, tmp, tmp, type); | ||
1480 | emit_tsml(as, MIPSI_DEXT, tmp, src, 31, 0); | ||
1481 | } else { | ||
1482 | emit_dst(as, MIPSI_DADDU, tmp, src, type); | ||
1483 | } | ||
1484 | } | ||
1485 | #endif | ||
976 | } | 1486 | } |
977 | } | 1487 | } |
978 | 1488 | ||
979 | static void asm_sload(ASMState *as, IRIns *ir) | 1489 | static void asm_sload(ASMState *as, IRIns *ir) |
980 | { | 1490 | { |
981 | int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); | ||
982 | IRType1 t = ir->t; | ||
983 | Reg dest = RID_NONE, type = RID_NONE, base; | 1491 | Reg dest = RID_NONE, type = RID_NONE, base; |
984 | RegSet allow = RSET_GPR; | 1492 | RegSet allow = RSET_GPR; |
985 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ | 1493 | IRType1 t = ir->t; |
986 | lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); | 1494 | #if LJ_32 |
987 | lua_assert(!irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); | 1495 | int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); |
1496 | int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP); | ||
1497 | if (hiop) | ||
1498 | t.irt = IRT_NUM; | ||
1499 | #else | ||
1500 | int32_t ofs = 8*((int32_t)ir->op1-2); | ||
1501 | #endif | ||
1502 | lj_assertA(!(ir->op2 & IRSLOAD_PARENT), | ||
1503 | "bad parent SLOAD"); /* Handled by asm_head_side(). */ | ||
1504 | lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), | ||
1505 | "inconsistent SLOAD variant"); | ||
1506 | #if LJ_SOFTFP32 | ||
1507 | lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), | ||
1508 | "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ | ||
1509 | if (hiop && ra_used(ir+1)) { | ||
1510 | type = ra_dest(as, ir+1, allow); | ||
1511 | rset_clear(allow, type); | ||
1512 | } | ||
1513 | #else | ||
988 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { | 1514 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { |
989 | dest = ra_scratch(as, RSET_FPR); | 1515 | dest = ra_scratch(as, LJ_SOFTFP ? allow : RSET_FPR); |
990 | asm_tointg(as, ir, dest); | 1516 | asm_tointg(as, ir, dest); |
991 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | 1517 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ |
992 | } else if (ra_used(ir)) { | 1518 | } else |
993 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 1519 | #endif |
994 | dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); | 1520 | if (ra_used(ir)) { |
1521 | lj_assertA((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || | ||
1522 | irt_isint(ir->t) || irt_isaddr(ir->t), | ||
1523 | "bad SLOAD type %d", irt_type(ir->t)); | ||
1524 | dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); | ||
995 | rset_clear(allow, dest); | 1525 | rset_clear(allow, dest); |
996 | base = ra_alloc1(as, REF_BASE, allow); | 1526 | base = ra_alloc1(as, REF_BASE, allow); |
997 | rset_clear(allow, base); | 1527 | rset_clear(allow, base); |
998 | if ((ir->op2 & IRSLOAD_CONVERT)) { | 1528 | if (!LJ_SOFTFP32 && (ir->op2 & IRSLOAD_CONVERT)) { |
999 | if (irt_isint(t)) { | 1529 | if (irt_isint(t)) { |
1000 | Reg tmp = ra_scratch(as, RSET_FPR); | 1530 | Reg tmp = ra_scratch(as, LJ_SOFTFP ? RSET_GPR : RSET_FPR); |
1531 | #if LJ_SOFTFP | ||
1532 | ra_evictset(as, rset_exclude(RSET_SCRATCH, dest)); | ||
1533 | ra_destreg(as, ir, RID_RET); | ||
1534 | emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_d2i].func, 0); | ||
1535 | if (tmp != REGARG_FIRSTGPR) | ||
1536 | emit_move(as, REGARG_FIRSTGPR, tmp); | ||
1537 | #else | ||
1001 | emit_tg(as, MIPSI_MFC1, dest, tmp); | 1538 | emit_tg(as, MIPSI_MFC1, dest, tmp); |
1002 | emit_fg(as, MIPSI_CVT_W_D, tmp, tmp); | 1539 | emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp); |
1540 | #endif | ||
1003 | dest = tmp; | 1541 | dest = tmp; |
1004 | t.irt = IRT_NUM; /* Check for original type. */ | 1542 | t.irt = IRT_NUM; /* Check for original type. */ |
1005 | } else { | 1543 | } else { |
1006 | Reg tmp = ra_scratch(as, RSET_GPR); | 1544 | Reg tmp = ra_scratch(as, RSET_GPR); |
1545 | #if LJ_SOFTFP | ||
1546 | ra_evictset(as, rset_exclude(RSET_SCRATCH, dest)); | ||
1547 | ra_destreg(as, ir, RID_RET); | ||
1548 | emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_i2d].func, 0); | ||
1549 | emit_dta(as, MIPSI_SLL, REGARG_FIRSTGPR, tmp, 0); | ||
1550 | #else | ||
1007 | emit_fg(as, MIPSI_CVT_D_W, dest, dest); | 1551 | emit_fg(as, MIPSI_CVT_D_W, dest, dest); |
1008 | emit_tg(as, MIPSI_MTC1, tmp, dest); | 1552 | emit_tg(as, MIPSI_MTC1, tmp, dest); |
1553 | #endif | ||
1009 | dest = tmp; | 1554 | dest = tmp; |
1010 | t.irt = IRT_INT; /* Check for original type. */ | 1555 | t.irt = IRT_INT; /* Check for original type. */ |
1011 | } | 1556 | } |
1012 | } | 1557 | } |
1558 | #if LJ_64 | ||
1559 | else if (irt_isaddr(t)) { | ||
1560 | /* Clear type from pointers. */ | ||
1561 | emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0); | ||
1562 | } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { | ||
1563 | /* Sign-extend integers. */ | ||
1564 | emit_dta(as, MIPSI_SLL, dest, dest, 0); | ||
1565 | } | ||
1566 | #endif | ||
1013 | goto dotypecheck; | 1567 | goto dotypecheck; |
1014 | } | 1568 | } |
1015 | base = ra_alloc1(as, REF_BASE, allow); | 1569 | base = ra_alloc1(as, REF_BASE, allow); |
1016 | rset_clear(allow, base); | 1570 | rset_clear(allow, base); |
1017 | dotypecheck: | 1571 | dotypecheck: |
1018 | if (irt_isnum(t)) { | 1572 | #if LJ_32 |
1019 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | 1573 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { |
1020 | asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); | 1574 | if (ra_noreg(type)) |
1021 | emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM); | ||
1022 | type = RID_TMP; | 1575 | type = RID_TMP; |
1023 | } | 1576 | if (irt_isnum(t)) { |
1024 | if (ra_hasreg(dest)) emit_hsi(as, MIPSI_LDC1, dest, base, ofs); | 1577 | asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); |
1025 | } else { | 1578 | emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); |
1026 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | 1579 | } else { |
1027 | Reg ktype = ra_allock(as, irt_toitype(t), allow); | 1580 | Reg ktype = ra_allock(as, irt_toitype(t), allow); |
1028 | asm_guard(as, MIPSI_BNE, RID_TMP, ktype); | 1581 | asm_guard(as, MIPSI_BNE, type, ktype); |
1029 | type = RID_TMP; | ||
1030 | } | 1582 | } |
1031 | if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0)); | ||
1032 | } | 1583 | } |
1033 | if (ra_hasreg(type)) emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4)); | 1584 | if (ra_hasreg(dest)) { |
1585 | if (!LJ_SOFTFP && irt_isnum(t)) | ||
1586 | emit_hsi(as, MIPSI_LDC1, dest, base, ofs); | ||
1587 | else | ||
1588 | emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0)); | ||
1589 | } | ||
1590 | if (ra_hasreg(type)) | ||
1591 | emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4)); | ||
1592 | #else | ||
1593 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | ||
1594 | type = dest < RID_MAX_GPR ? dest : RID_TMP; | ||
1595 | if (irt_ispri(t)) { | ||
1596 | asm_guard(as, MIPSI_BNE, type, | ||
1597 | ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow)); | ||
1598 | } else { | ||
1599 | if (irt_isnum(t)) { | ||
1600 | asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); | ||
1601 | emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM); | ||
1602 | if (!LJ_SOFTFP && ra_hasreg(dest)) | ||
1603 | emit_hsi(as, MIPSI_LDC1, dest, base, ofs); | ||
1604 | } else { | ||
1605 | asm_guard(as, MIPSI_BNE, RID_TMP, | ||
1606 | ra_allock(as, (int32_t)irt_toitype(t), allow)); | ||
1607 | } | ||
1608 | emit_dta(as, MIPSI_DSRA32, RID_TMP, type, 15); | ||
1609 | } | ||
1610 | emit_tsi(as, MIPSI_LD, type, base, ofs); | ||
1611 | } else if (ra_hasreg(dest)) { | ||
1612 | if (!LJ_SOFTFP && irt_isnum(t)) | ||
1613 | emit_hsi(as, MIPSI_LDC1, dest, base, ofs); | ||
1614 | else | ||
1615 | emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base, | ||
1616 | ofs ^ ((LJ_BE && irt_isint(t)) ? 4 : 0)); | ||
1617 | } | ||
1618 | #endif | ||
1034 | } | 1619 | } |
1035 | 1620 | ||
1036 | /* -- Allocations --------------------------------------------------------- */ | 1621 | /* -- Allocations --------------------------------------------------------- */ |
@@ -1039,19 +1624,16 @@ dotypecheck: | |||
1039 | static void asm_cnew(ASMState *as, IRIns *ir) | 1624 | static void asm_cnew(ASMState *as, IRIns *ir) |
1040 | { | 1625 | { |
1041 | CTState *cts = ctype_ctsG(J2G(as->J)); | 1626 | CTState *cts = ctype_ctsG(J2G(as->J)); |
1042 | CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; | 1627 | CTypeID id = (CTypeID)IR(ir->op1)->i; |
1043 | CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? | 1628 | CTSize sz; |
1044 | lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; | 1629 | CTInfo info = lj_ctype_info(cts, id, &sz); |
1045 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | 1630 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; |
1046 | IRRef args[2]; | 1631 | IRRef args[4]; |
1047 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | ||
1048 | RegSet drop = RSET_SCRATCH; | 1632 | RegSet drop = RSET_SCRATCH; |
1049 | lua_assert(sz != CTSIZE_INVALID); | 1633 | lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), |
1634 | "bad CNEW/CNEWI operands"); | ||
1050 | 1635 | ||
1051 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1052 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1053 | as->gcsteps++; | 1636 | as->gcsteps++; |
1054 | |||
1055 | if (ra_hasreg(ir->r)) | 1637 | if (ra_hasreg(ir->r)) |
1056 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 1638 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
1057 | ra_evictset(as, drop); | 1639 | ra_evictset(as, drop); |
@@ -1060,11 +1642,12 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1060 | 1642 | ||
1061 | /* Initialize immutable cdata object. */ | 1643 | /* Initialize immutable cdata object. */ |
1062 | if (ir->o == IR_CNEWI) { | 1644 | if (ir->o == IR_CNEWI) { |
1645 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | ||
1646 | #if LJ_32 | ||
1063 | int32_t ofs = sizeof(GCcdata); | 1647 | int32_t ofs = sizeof(GCcdata); |
1064 | lua_assert(sz == 4 || sz == 8); | ||
1065 | if (sz == 8) { | 1648 | if (sz == 8) { |
1066 | ofs += 4; | 1649 | ofs += 4; |
1067 | lua_assert((ir+1)->o == IR_HIOP); | 1650 | lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI"); |
1068 | if (LJ_LE) ir++; | 1651 | if (LJ_LE) ir++; |
1069 | } | 1652 | } |
1070 | for (;;) { | 1653 | for (;;) { |
@@ -1074,18 +1657,33 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1074 | if (ofs == sizeof(GCcdata)) break; | 1657 | if (ofs == sizeof(GCcdata)) break; |
1075 | ofs -= 4; if (LJ_BE) ir++; else ir--; | 1658 | ofs -= 4; if (LJ_BE) ir++; else ir--; |
1076 | } | 1659 | } |
1660 | #else | ||
1661 | emit_tsi(as, sz == 8 ? MIPSI_SD : MIPSI_SW, ra_alloc1(as, ir->op2, allow), | ||
1662 | RID_RET, sizeof(GCcdata)); | ||
1663 | #endif | ||
1664 | lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); | ||
1665 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1666 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1667 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1668 | args[1] = ir->op1; /* CTypeID id */ | ||
1669 | args[2] = ir->op2; /* CTSize sz */ | ||
1670 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1671 | asm_gencall(as, ci, args); | ||
1672 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1673 | return; | ||
1077 | } | 1674 | } |
1675 | |||
1078 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ | 1676 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ |
1079 | emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); | 1677 | emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); |
1080 | emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); | 1678 | emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); |
1081 | emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA); | 1679 | emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA); |
1082 | emit_ti(as, MIPSI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ | 1680 | emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ |
1681 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1682 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1083 | asm_gencall(as, ci, args); | 1683 | asm_gencall(as, ci, args); |
1084 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), | 1684 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), |
1085 | ra_releasetmp(as, ASMREF_TMP1)); | 1685 | ra_releasetmp(as, ASMREF_TMP1)); |
1086 | } | 1686 | } |
1087 | #else | ||
1088 | #define asm_cnew(as, ir) ((void)0) | ||
1089 | #endif | 1687 | #endif |
1090 | 1688 | ||
1091 | /* -- Write barriers ------------------------------------------------------ */ | 1689 | /* -- Write barriers ------------------------------------------------------ */ |
@@ -1096,7 +1694,7 @@ static void asm_tbar(ASMState *as, IRIns *ir) | |||
1096 | Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); | 1694 | Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); |
1097 | Reg link = RID_TMP; | 1695 | Reg link = RID_TMP; |
1098 | MCLabel l_end = emit_label(as); | 1696 | MCLabel l_end = emit_label(as); |
1099 | emit_tsi(as, MIPSI_SW, link, tab, (int32_t)offsetof(GCtab, gclist)); | 1697 | emit_tsi(as, MIPSI_AS, link, tab, (int32_t)offsetof(GCtab, gclist)); |
1100 | emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); | 1698 | emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); |
1101 | emit_setgl(as, tab, gc.grayagain); | 1699 | emit_setgl(as, tab, gc.grayagain); |
1102 | emit_getgl(as, link, gc.grayagain); | 1700 | emit_getgl(as, link, gc.grayagain); |
@@ -1113,13 +1711,13 @@ static void asm_obar(ASMState *as, IRIns *ir) | |||
1113 | MCLabel l_end; | 1711 | MCLabel l_end; |
1114 | Reg obj, val, tmp; | 1712 | Reg obj, val, tmp; |
1115 | /* No need for other object barriers (yet). */ | 1713 | /* No need for other object barriers (yet). */ |
1116 | lua_assert(IR(ir->op1)->o == IR_UREFC); | 1714 | lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); |
1117 | ra_evictset(as, RSET_SCRATCH); | 1715 | ra_evictset(as, RSET_SCRATCH); |
1118 | l_end = emit_label(as); | 1716 | l_end = emit_label(as); |
1119 | args[0] = ASMREF_TMP1; /* global_State *g */ | 1717 | args[0] = ASMREF_TMP1; /* global_State *g */ |
1120 | args[1] = ir->op1; /* TValue *tv */ | 1718 | args[1] = ir->op1; /* TValue *tv */ |
1121 | asm_gencall(as, ci, args); | 1719 | asm_gencall(as, ci, args); |
1122 | emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); | 1720 | emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); |
1123 | obj = IR(ir->op1)->r; | 1721 | obj = IR(ir->op1)->r; |
1124 | tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); | 1722 | tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); |
1125 | emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); | 1723 | emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); |
@@ -1134,6 +1732,7 @@ static void asm_obar(ASMState *as, IRIns *ir) | |||
1134 | 1732 | ||
1135 | /* -- Arithmetic and logic operations ------------------------------------- */ | 1733 | /* -- Arithmetic and logic operations ------------------------------------- */ |
1136 | 1734 | ||
1735 | #if !LJ_SOFTFP | ||
1137 | static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi) | 1736 | static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi) |
1138 | { | 1737 | { |
1139 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1738 | Reg dest = ra_dest(as, ir, RSET_FPR); |
@@ -1148,83 +1747,147 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi) | |||
1148 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); | 1747 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); |
1149 | emit_fg(as, mi, dest, left); | 1748 | emit_fg(as, mi, dest, left); |
1150 | } | 1749 | } |
1750 | #endif | ||
1151 | 1751 | ||
1152 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) | 1752 | #if !LJ_SOFTFP32 |
1153 | { | 1753 | static void asm_fpmath(ASMState *as, IRIns *ir) |
1154 | IRIns *irp = IR(ir->op1); | 1754 | { |
1155 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | 1755 | #if !LJ_SOFTFP |
1156 | IRIns *irpp = IR(irp->op1); | 1756 | if (ir->op2 <= IRFPM_TRUNC) |
1157 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | 1757 | asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); |
1158 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | 1758 | else if (ir->op2 == IRFPM_SQRT) |
1159 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; | 1759 | asm_fpunary(as, ir, MIPSI_SQRT_D); |
1160 | IRRef args[2]; | 1760 | else |
1161 | args[0] = irpp->op1; | 1761 | #endif |
1162 | args[1] = irp->op2; | 1762 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); |
1163 | asm_setupresult(as, ir, ci); | ||
1164 | asm_gencall(as, ci, args); | ||
1165 | return 1; | ||
1166 | } | ||
1167 | } | ||
1168 | return 0; | ||
1169 | } | 1763 | } |
1764 | #endif | ||
1765 | |||
1766 | #if !LJ_SOFTFP | ||
1767 | #define asm_fpadd(as, ir) asm_fparith(as, ir, MIPSI_ADD_D) | ||
1768 | #define asm_fpsub(as, ir) asm_fparith(as, ir, MIPSI_SUB_D) | ||
1769 | #define asm_fpmul(as, ir) asm_fparith(as, ir, MIPSI_MUL_D) | ||
1770 | #elif LJ_64 /* && LJ_SOFTFP */ | ||
1771 | #define asm_fpadd(as, ir) asm_callid(as, ir, IRCALL_softfp_add) | ||
1772 | #define asm_fpsub(as, ir) asm_callid(as, ir, IRCALL_softfp_sub) | ||
1773 | #define asm_fpmul(as, ir) asm_callid(as, ir, IRCALL_softfp_mul) | ||
1774 | #endif | ||
1170 | 1775 | ||
1171 | static void asm_add(ASMState *as, IRIns *ir) | 1776 | static void asm_add(ASMState *as, IRIns *ir) |
1172 | { | 1777 | { |
1173 | if (irt_isnum(ir->t)) { | 1778 | IRType1 t = ir->t; |
1174 | asm_fparith(as, ir, MIPSI_ADD_D); | 1779 | #if !LJ_SOFTFP32 |
1175 | } else { | 1780 | if (irt_isnum(t)) { |
1781 | asm_fpadd(as, ir); | ||
1782 | } else | ||
1783 | #endif | ||
1784 | { | ||
1785 | /* TODO MIPSR6: Fuse ADD(BSHL(a,1-4),b) or ADD(ADD(a,a),b) to MIPSI_ALSA. */ | ||
1176 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1786 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1177 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | 1787 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); |
1178 | if (irref_isk(ir->op2)) { | 1788 | if (irref_isk(ir->op2)) { |
1179 | int32_t k = IR(ir->op2)->i; | 1789 | intptr_t k = get_kval(as, ir->op2); |
1180 | if (checki16(k)) { | 1790 | if (checki16(k)) { |
1181 | emit_tsi(as, MIPSI_ADDIU, dest, left, k); | 1791 | emit_tsi(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDIU : MIPSI_ADDIU, dest, |
1792 | left, k); | ||
1182 | return; | 1793 | return; |
1183 | } | 1794 | } |
1184 | } | 1795 | } |
1185 | right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | 1796 | right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); |
1186 | emit_dst(as, MIPSI_ADDU, dest, left, right); | 1797 | emit_dst(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDU : MIPSI_ADDU, dest, |
1798 | left, right); | ||
1187 | } | 1799 | } |
1188 | } | 1800 | } |
1189 | 1801 | ||
1190 | static void asm_sub(ASMState *as, IRIns *ir) | 1802 | static void asm_sub(ASMState *as, IRIns *ir) |
1191 | { | 1803 | { |
1804 | #if !LJ_SOFTFP32 | ||
1192 | if (irt_isnum(ir->t)) { | 1805 | if (irt_isnum(ir->t)) { |
1193 | asm_fparith(as, ir, MIPSI_SUB_D); | 1806 | asm_fpsub(as, ir); |
1194 | } else { | 1807 | } else |
1808 | #endif | ||
1809 | { | ||
1195 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1810 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1196 | Reg right, left = ra_alloc2(as, ir, RSET_GPR); | 1811 | Reg right, left = ra_alloc2(as, ir, RSET_GPR); |
1197 | right = (left >> 8); left &= 255; | 1812 | right = (left >> 8); left &= 255; |
1198 | emit_dst(as, MIPSI_SUBU, dest, left, right); | 1813 | emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest, |
1814 | left, right); | ||
1199 | } | 1815 | } |
1200 | } | 1816 | } |
1201 | 1817 | ||
1202 | static void asm_mul(ASMState *as, IRIns *ir) | 1818 | static void asm_mul(ASMState *as, IRIns *ir) |
1203 | { | 1819 | { |
1820 | #if !LJ_SOFTFP32 | ||
1204 | if (irt_isnum(ir->t)) { | 1821 | if (irt_isnum(ir->t)) { |
1205 | asm_fparith(as, ir, MIPSI_MUL_D); | 1822 | asm_fpmul(as, ir); |
1206 | } else { | 1823 | } else |
1824 | #endif | ||
1825 | { | ||
1207 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1826 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1208 | Reg right, left = ra_alloc2(as, ir, RSET_GPR); | 1827 | Reg right, left = ra_alloc2(as, ir, RSET_GPR); |
1209 | right = (left >> 8); left &= 255; | 1828 | right = (left >> 8); left &= 255; |
1210 | emit_dst(as, MIPSI_MUL, dest, left, right); | 1829 | if (LJ_64 && irt_is64(ir->t)) { |
1830 | #if !LJ_TARGET_MIPSR6 | ||
1831 | emit_dst(as, MIPSI_MFLO, dest, 0, 0); | ||
1832 | emit_dst(as, MIPSI_DMULT, 0, left, right); | ||
1833 | #else | ||
1834 | emit_dst(as, MIPSI_DMUL, dest, left, right); | ||
1835 | #endif | ||
1836 | } else { | ||
1837 | emit_dst(as, MIPSI_MUL, dest, left, right); | ||
1838 | } | ||
1211 | } | 1839 | } |
1212 | } | 1840 | } |
1213 | 1841 | ||
1842 | #if !LJ_SOFTFP32 | ||
1843 | static void asm_fpdiv(ASMState *as, IRIns *ir) | ||
1844 | { | ||
1845 | #if !LJ_SOFTFP | ||
1846 | asm_fparith(as, ir, MIPSI_DIV_D); | ||
1847 | #else | ||
1848 | asm_callid(as, ir, IRCALL_softfp_div); | ||
1849 | #endif | ||
1850 | } | ||
1851 | #endif | ||
1852 | |||
1214 | static void asm_neg(ASMState *as, IRIns *ir) | 1853 | static void asm_neg(ASMState *as, IRIns *ir) |
1215 | { | 1854 | { |
1855 | #if !LJ_SOFTFP | ||
1216 | if (irt_isnum(ir->t)) { | 1856 | if (irt_isnum(ir->t)) { |
1217 | asm_fpunary(as, ir, MIPSI_NEG_D); | 1857 | asm_fpunary(as, ir, MIPSI_NEG_D); |
1218 | } else { | 1858 | } else |
1859 | #elif LJ_64 /* && LJ_SOFTFP */ | ||
1860 | if (irt_isnum(ir->t)) { | ||
1861 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1862 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1863 | emit_dst(as, MIPSI_XOR, dest, left, | ||
1864 | ra_allock(as, 0x8000000000000000ll, rset_exclude(RSET_GPR, dest))); | ||
1865 | } else | ||
1866 | #endif | ||
1867 | { | ||
1219 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1868 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1220 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | 1869 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); |
1221 | emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left); | 1870 | emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest, |
1871 | RID_ZERO, left); | ||
1222 | } | 1872 | } |
1223 | } | 1873 | } |
1224 | 1874 | ||
1875 | #if !LJ_SOFTFP | ||
1876 | #define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D) | ||
1877 | #elif LJ_64 /* && LJ_SOFTFP */ | ||
1878 | static void asm_abs(ASMState *as, IRIns *ir) | ||
1879 | { | ||
1880 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1881 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1882 | emit_tsml(as, MIPSI_DEXTM, dest, left, 30, 0); | ||
1883 | } | ||
1884 | #endif | ||
1885 | |||
1225 | static void asm_arithov(ASMState *as, IRIns *ir) | 1886 | static void asm_arithov(ASMState *as, IRIns *ir) |
1226 | { | 1887 | { |
1888 | /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */ | ||
1227 | Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); | 1889 | Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); |
1890 | lj_assertA(!irt_is64(ir->t), "bad usage"); | ||
1228 | if (irref_isk(ir->op2)) { | 1891 | if (irref_isk(ir->op2)) { |
1229 | int k = IR(ir->op2)->i; | 1892 | int k = IR(ir->op2)->i; |
1230 | if (ir->o == IR_SUBOV) k = -k; | 1893 | if (ir->o == IR_SUBOV) k = -k; |
@@ -1255,16 +1918,29 @@ static void asm_arithov(ASMState *as, IRIns *ir) | |||
1255 | emit_move(as, RID_TMP, dest == left ? left : right); | 1918 | emit_move(as, RID_TMP, dest == left ? left : right); |
1256 | } | 1919 | } |
1257 | 1920 | ||
1921 | #define asm_addov(as, ir) asm_arithov(as, ir) | ||
1922 | #define asm_subov(as, ir) asm_arithov(as, ir) | ||
1923 | |||
1258 | static void asm_mulov(ASMState *as, IRIns *ir) | 1924 | static void asm_mulov(ASMState *as, IRIns *ir) |
1259 | { | 1925 | { |
1260 | #if LJ_DUALNUM | 1926 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1261 | #error "NYI: MULOV" | 1927 | Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR); |
1928 | right = (left >> 8); left &= 255; | ||
1929 | tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), | ||
1930 | right), dest)); | ||
1931 | asm_guard(as, MIPSI_BNE, RID_TMP, tmp); | ||
1932 | emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31); | ||
1933 | #if !LJ_TARGET_MIPSR6 | ||
1934 | emit_dst(as, MIPSI_MFHI, tmp, 0, 0); | ||
1935 | emit_dst(as, MIPSI_MFLO, dest, 0, 0); | ||
1936 | emit_dst(as, MIPSI_MULT, 0, left, right); | ||
1262 | #else | 1937 | #else |
1263 | UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused in single-number mode. */ | 1938 | emit_dst(as, MIPSI_MUL, dest, left, right); |
1939 | emit_dst(as, MIPSI_MUH, tmp, left, right); | ||
1264 | #endif | 1940 | #endif |
1265 | } | 1941 | } |
1266 | 1942 | ||
1267 | #if LJ_HASFFI | 1943 | #if LJ_32 && LJ_HASFFI |
1268 | static void asm_add64(ASMState *as, IRIns *ir) | 1944 | static void asm_add64(ASMState *as, IRIns *ir) |
1269 | { | 1945 | { |
1270 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1946 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -1348,7 +2024,7 @@ static void asm_neg64(ASMState *as, IRIns *ir) | |||
1348 | } | 2024 | } |
1349 | #endif | 2025 | #endif |
1350 | 2026 | ||
1351 | static void asm_bitnot(ASMState *as, IRIns *ir) | 2027 | static void asm_bnot(ASMState *as, IRIns *ir) |
1352 | { | 2028 | { |
1353 | Reg left, right, dest = ra_dest(as, ir, RSET_GPR); | 2029 | Reg left, right, dest = ra_dest(as, ir, RSET_GPR); |
1354 | IRIns *irl = IR(ir->op1); | 2030 | IRIns *irl = IR(ir->op1); |
@@ -1362,11 +2038,12 @@ static void asm_bitnot(ASMState *as, IRIns *ir) | |||
1362 | emit_dst(as, MIPSI_NOR, dest, left, right); | 2038 | emit_dst(as, MIPSI_NOR, dest, left, right); |
1363 | } | 2039 | } |
1364 | 2040 | ||
1365 | static void asm_bitswap(ASMState *as, IRIns *ir) | 2041 | static void asm_bswap(ASMState *as, IRIns *ir) |
1366 | { | 2042 | { |
1367 | Reg dest = ra_dest(as, ir, RSET_GPR); | 2043 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1368 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | 2044 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); |
1369 | if ((as->flags & JIT_F_MIPS32R2)) { | 2045 | #if LJ_32 |
2046 | if ((as->flags & JIT_F_MIPSXXR2)) { | ||
1370 | emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); | 2047 | emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); |
1371 | emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); | 2048 | emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); |
1372 | } else { | 2049 | } else { |
@@ -1381,6 +2058,15 @@ static void asm_bitswap(ASMState *as, IRIns *ir) | |||
1381 | emit_dta(as, MIPSI_SRL, tmp, left, 24); | 2058 | emit_dta(as, MIPSI_SRL, tmp, left, 24); |
1382 | emit_dta(as, MIPSI_SLL, RID_TMP, left, 24); | 2059 | emit_dta(as, MIPSI_SLL, RID_TMP, left, 24); |
1383 | } | 2060 | } |
2061 | #else | ||
2062 | if (irt_is64(ir->t)) { | ||
2063 | emit_dst(as, MIPSI_DSHD, dest, 0, RID_TMP); | ||
2064 | emit_dst(as, MIPSI_DSBH, RID_TMP, 0, left); | ||
2065 | } else { | ||
2066 | emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); | ||
2067 | emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); | ||
2068 | } | ||
2069 | #endif | ||
1384 | } | 2070 | } |
1385 | 2071 | ||
1386 | static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) | 2072 | static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) |
@@ -1388,7 +2074,7 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) | |||
1388 | Reg dest = ra_dest(as, ir, RSET_GPR); | 2074 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1389 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | 2075 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); |
1390 | if (irref_isk(ir->op2)) { | 2076 | if (irref_isk(ir->op2)) { |
1391 | int32_t k = IR(ir->op2)->i; | 2077 | intptr_t k = get_kval(as, ir->op2); |
1392 | if (checku16(k)) { | 2078 | if (checku16(k)) { |
1393 | emit_tsi(as, mik, dest, left, k); | 2079 | emit_tsi(as, mik, dest, left, k); |
1394 | return; | 2080 | return; |
@@ -1398,22 +2084,34 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) | |||
1398 | emit_dst(as, mi, dest, left, right); | 2084 | emit_dst(as, mi, dest, left, right); |
1399 | } | 2085 | } |
1400 | 2086 | ||
2087 | #define asm_band(as, ir) asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI) | ||
2088 | #define asm_bor(as, ir) asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI) | ||
2089 | #define asm_bxor(as, ir) asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI) | ||
2090 | |||
1401 | static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) | 2091 | static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) |
1402 | { | 2092 | { |
1403 | Reg dest = ra_dest(as, ir, RSET_GPR); | 2093 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1404 | if (irref_isk(ir->op2)) { /* Constant shifts. */ | 2094 | if (irref_isk(ir->op2)) { /* Constant shifts. */ |
1405 | uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31); | 2095 | uint32_t shift = (uint32_t)IR(ir->op2)->i; |
1406 | emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), shift); | 2096 | if (LJ_64 && irt_is64(ir->t)) mik |= (shift & 32) ? MIPSI_D32 : MIPSI_D; |
2097 | emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), | ||
2098 | (shift & 31)); | ||
1407 | } else { | 2099 | } else { |
1408 | Reg right, left = ra_alloc2(as, ir, RSET_GPR); | 2100 | Reg right, left = ra_alloc2(as, ir, RSET_GPR); |
1409 | right = (left >> 8); left &= 255; | 2101 | right = (left >> 8); left &= 255; |
2102 | if (LJ_64 && irt_is64(ir->t)) mi |= MIPSI_DV; | ||
1410 | emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */ | 2103 | emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */ |
1411 | } | 2104 | } |
1412 | } | 2105 | } |
1413 | 2106 | ||
1414 | static void asm_bitror(ASMState *as, IRIns *ir) | 2107 | #define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL) |
2108 | #define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL) | ||
2109 | #define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA) | ||
2110 | #define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") | ||
2111 | |||
2112 | static void asm_bror(ASMState *as, IRIns *ir) | ||
1415 | { | 2113 | { |
1416 | if ((as->flags & JIT_F_MIPS32R2)) { | 2114 | if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { |
1417 | asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); | 2115 | asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); |
1418 | } else { | 2116 | } else { |
1419 | Reg dest = ra_dest(as, ir, RSET_GPR); | 2117 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -1432,55 +2130,182 @@ static void asm_bitror(ASMState *as, IRIns *ir) | |||
1432 | } | 2130 | } |
1433 | } | 2131 | } |
1434 | 2132 | ||
2133 | #if LJ_SOFTFP | ||
2134 | static void asm_sfpmin_max(ASMState *as, IRIns *ir) | ||
2135 | { | ||
2136 | CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax]; | ||
2137 | #if LJ_64 | ||
2138 | IRRef args[2]; | ||
2139 | args[0] = ir->op1; | ||
2140 | args[1] = ir->op2; | ||
2141 | #else | ||
2142 | IRRef args[4]; | ||
2143 | args[0^LJ_BE] = ir->op1; | ||
2144 | args[1^LJ_BE] = (ir+1)->op1; | ||
2145 | args[2^LJ_BE] = ir->op2; | ||
2146 | args[3^LJ_BE] = (ir+1)->op2; | ||
2147 | #endif | ||
2148 | asm_setupresult(as, ir, &ci); | ||
2149 | emit_call(as, (void *)ci.func, 0); | ||
2150 | ci.func = NULL; | ||
2151 | asm_gencall(as, &ci, args); | ||
2152 | } | ||
2153 | #endif | ||
2154 | |||
1435 | static void asm_min_max(ASMState *as, IRIns *ir, int ismax) | 2155 | static void asm_min_max(ASMState *as, IRIns *ir, int ismax) |
1436 | { | 2156 | { |
1437 | if (irt_isnum(ir->t)) { | 2157 | if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { |
2158 | #if LJ_SOFTFP | ||
2159 | asm_sfpmin_max(as, ir); | ||
2160 | #else | ||
1438 | Reg dest = ra_dest(as, ir, RSET_FPR); | 2161 | Reg dest = ra_dest(as, ir, RSET_FPR); |
1439 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | 2162 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); |
1440 | right = (left >> 8); left &= 255; | 2163 | right = (left >> 8); left &= 255; |
2164 | #if !LJ_TARGET_MIPSR6 | ||
1441 | if (dest == left) { | 2165 | if (dest == left) { |
1442 | emit_fg(as, MIPSI_MOVT_D, dest, right); | 2166 | emit_fg(as, MIPSI_MOVF_D, dest, right); |
1443 | } else { | 2167 | } else { |
1444 | emit_fg(as, MIPSI_MOVF_D, dest, left); | 2168 | emit_fg(as, MIPSI_MOVT_D, dest, left); |
1445 | if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); | 2169 | if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); |
1446 | } | 2170 | } |
1447 | emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); | 2171 | emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? right : left, ismax ? left : right); |
2172 | #else | ||
2173 | emit_fgh(as, ismax ? MIPSI_MAX_D : MIPSI_MIN_D, dest, left, right); | ||
2174 | #endif | ||
2175 | #endif | ||
1448 | } else { | 2176 | } else { |
1449 | Reg dest = ra_dest(as, ir, RSET_GPR); | 2177 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1450 | Reg right, left = ra_alloc2(as, ir, RSET_GPR); | 2178 | Reg right, left = ra_alloc2(as, ir, RSET_GPR); |
1451 | right = (left >> 8); left &= 255; | 2179 | right = (left >> 8); left &= 255; |
1452 | if (dest == left) { | 2180 | if (left == right) { |
1453 | emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); | 2181 | if (dest != left) emit_move(as, dest, left); |
1454 | } else { | 2182 | } else { |
1455 | emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); | 2183 | #if !LJ_TARGET_MIPSR6 |
1456 | if (dest != right) emit_move(as, dest, right); | 2184 | if (dest == left) { |
2185 | emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); | ||
2186 | } else { | ||
2187 | emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); | ||
2188 | if (dest != right) emit_move(as, dest, right); | ||
2189 | } | ||
2190 | #else | ||
2191 | emit_dst(as, MIPSI_OR, dest, dest, RID_TMP); | ||
2192 | if (dest != right) { | ||
2193 | emit_dst(as, MIPSI_SELNEZ, RID_TMP, right, RID_TMP); | ||
2194 | emit_dst(as, MIPSI_SELEQZ, dest, left, RID_TMP); | ||
2195 | } else { | ||
2196 | emit_dst(as, MIPSI_SELEQZ, RID_TMP, left, RID_TMP); | ||
2197 | emit_dst(as, MIPSI_SELNEZ, dest, right, RID_TMP); | ||
2198 | } | ||
2199 | #endif | ||
2200 | emit_dst(as, MIPSI_SLT, RID_TMP, | ||
2201 | ismax ? left : right, ismax ? right : left); | ||
1457 | } | 2202 | } |
1458 | emit_dst(as, MIPSI_SLT, RID_TMP, | ||
1459 | ismax ? left : right, ismax ? right : left); | ||
1460 | } | 2203 | } |
1461 | } | 2204 | } |
1462 | 2205 | ||
2206 | #define asm_min(as, ir) asm_min_max(as, ir, 0) | ||
2207 | #define asm_max(as, ir) asm_min_max(as, ir, 1) | ||
2208 | |||
1463 | /* -- Comparisons --------------------------------------------------------- */ | 2209 | /* -- Comparisons --------------------------------------------------------- */ |
1464 | 2210 | ||
2211 | #if LJ_SOFTFP | ||
2212 | /* SFP comparisons. */ | ||
2213 | static void asm_sfpcomp(ASMState *as, IRIns *ir) | ||
2214 | { | ||
2215 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; | ||
2216 | RegSet drop = RSET_SCRATCH; | ||
2217 | Reg r; | ||
2218 | #if LJ_64 | ||
2219 | IRRef args[2]; | ||
2220 | args[0] = ir->op1; | ||
2221 | args[1] = ir->op2; | ||
2222 | #else | ||
2223 | IRRef args[4]; | ||
2224 | args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1; | ||
2225 | args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2; | ||
2226 | #endif | ||
2227 | |||
2228 | for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+(LJ_64?1:3); r++) { | ||
2229 | if (!rset_test(as->freeset, r) && | ||
2230 | regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) | ||
2231 | rset_clear(drop, r); | ||
2232 | } | ||
2233 | ra_evictset(as, drop); | ||
2234 | |||
2235 | asm_setupresult(as, ir, ci); | ||
2236 | |||
2237 | switch ((IROp)ir->o) { | ||
2238 | case IR_LT: | ||
2239 | asm_guard(as, MIPSI_BGEZ, RID_RET, 0); | ||
2240 | break; | ||
2241 | case IR_ULT: | ||
2242 | asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); | ||
2243 | emit_loadi(as, RID_TMP, 1); | ||
2244 | asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); | ||
2245 | break; | ||
2246 | case IR_GE: | ||
2247 | asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); | ||
2248 | emit_loadi(as, RID_TMP, 2); | ||
2249 | asm_guard(as, MIPSI_BLTZ, RID_RET, 0); | ||
2250 | break; | ||
2251 | case IR_LE: | ||
2252 | asm_guard(as, MIPSI_BGTZ, RID_RET, 0); | ||
2253 | break; | ||
2254 | case IR_GT: | ||
2255 | asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); | ||
2256 | emit_loadi(as, RID_TMP, 2); | ||
2257 | asm_guard(as, MIPSI_BLEZ, RID_RET, 0); | ||
2258 | break; | ||
2259 | case IR_UGE: | ||
2260 | asm_guard(as, MIPSI_BLTZ, RID_RET, 0); | ||
2261 | break; | ||
2262 | case IR_ULE: | ||
2263 | asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); | ||
2264 | emit_loadi(as, RID_TMP, 1); | ||
2265 | break; | ||
2266 | case IR_UGT: case IR_ABC: | ||
2267 | asm_guard(as, MIPSI_BLEZ, RID_RET, 0); | ||
2268 | break; | ||
2269 | case IR_EQ: case IR_NE: | ||
2270 | asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_RET, RID_ZERO); | ||
2271 | default: | ||
2272 | break; | ||
2273 | } | ||
2274 | asm_gencall(as, ci, args); | ||
2275 | } | ||
2276 | #endif | ||
2277 | |||
1465 | static void asm_comp(ASMState *as, IRIns *ir) | 2278 | static void asm_comp(ASMState *as, IRIns *ir) |
1466 | { | 2279 | { |
1467 | /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ | 2280 | /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ |
1468 | IROp op = ir->o; | 2281 | IROp op = ir->o; |
1469 | if (irt_isnum(ir->t)) { | 2282 | if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { |
2283 | #if LJ_SOFTFP | ||
2284 | asm_sfpcomp(as, ir); | ||
2285 | #else | ||
2286 | #if !LJ_TARGET_MIPSR6 | ||
1470 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | 2287 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); |
1471 | right = (left >> 8); left &= 255; | 2288 | right = (left >> 8); left &= 255; |
1472 | asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); | 2289 | asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); |
1473 | emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); | 2290 | emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); |
2291 | #else | ||
2292 | Reg tmp, right, left = ra_alloc2(as, ir, RSET_FPR); | ||
2293 | right = (left >> 8); left &= 255; | ||
2294 | tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right)); | ||
2295 | asm_guard(as, (op&1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31)); | ||
2296 | emit_fgh(as, MIPSI_CMP_LT_D + ((op&3) ^ ((op>>2)&1)), tmp, left, right); | ||
2297 | #endif | ||
2298 | #endif | ||
1474 | } else { | 2299 | } else { |
1475 | Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); | 2300 | Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); |
1476 | if (op == IR_ABC) op = IR_UGT; | 2301 | if (op == IR_ABC) op = IR_UGT; |
1477 | if ((op&4) == 0 && irref_isk(ir->op2) && IR(ir->op2)->i == 0) { | 2302 | if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) { |
1478 | MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : | 2303 | MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : |
1479 | ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); | 2304 | ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); |
1480 | asm_guard(as, mi, left, 0); | 2305 | asm_guard(as, mi, left, 0); |
1481 | } else { | 2306 | } else { |
1482 | if (irref_isk(ir->op2)) { | 2307 | if (irref_isk(ir->op2)) { |
1483 | int32_t k = IR(ir->op2)->i; | 2308 | intptr_t k = get_kval(as, ir->op2); |
1484 | if ((op&2)) k++; | 2309 | if ((op&2)) k++; |
1485 | if (checki16(k)) { | 2310 | if (checki16(k)) { |
1486 | asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); | 2311 | asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); |
@@ -1497,19 +2322,28 @@ static void asm_comp(ASMState *as, IRIns *ir) | |||
1497 | } | 2322 | } |
1498 | } | 2323 | } |
1499 | 2324 | ||
1500 | static void asm_compeq(ASMState *as, IRIns *ir) | 2325 | static void asm_equal(ASMState *as, IRIns *ir) |
1501 | { | 2326 | { |
1502 | Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR); | 2327 | Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ? |
2328 | RSET_FPR : RSET_GPR); | ||
1503 | right = (left >> 8); left &= 255; | 2329 | right = (left >> 8); left &= 255; |
1504 | if (irt_isnum(ir->t)) { | 2330 | if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { |
2331 | #if LJ_SOFTFP | ||
2332 | asm_sfpcomp(as, ir); | ||
2333 | #elif !LJ_TARGET_MIPSR6 | ||
1505 | asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); | 2334 | asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); |
1506 | emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); | 2335 | emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); |
2336 | #else | ||
2337 | Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right)); | ||
2338 | asm_guard(as, (ir->o & 1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31)); | ||
2339 | emit_fgh(as, MIPSI_CMP_EQ_D, tmp, left, right); | ||
2340 | #endif | ||
1507 | } else { | 2341 | } else { |
1508 | asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); | 2342 | asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); |
1509 | } | 2343 | } |
1510 | } | 2344 | } |
1511 | 2345 | ||
1512 | #if LJ_HASFFI | 2346 | #if LJ_32 && LJ_HASFFI |
1513 | /* 64 bit integer comparisons. */ | 2347 | /* 64 bit integer comparisons. */ |
1514 | static void asm_comp64(ASMState *as, IRIns *ir) | 2348 | static void asm_comp64(ASMState *as, IRIns *ir) |
1515 | { | 2349 | { |
@@ -1546,54 +2380,99 @@ static void asm_comp64eq(ASMState *as, IRIns *ir) | |||
1546 | } | 2380 | } |
1547 | #endif | 2381 | #endif |
1548 | 2382 | ||
1549 | /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ | 2383 | /* -- Split register ops -------------------------------------------------- */ |
1550 | 2384 | ||
1551 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ | 2385 | /* Hiword op of a split 32/32 or 64/64 bit op. Previous op is the loword op. */ |
1552 | static void asm_hiop(ASMState *as, IRIns *ir) | 2386 | static void asm_hiop(ASMState *as, IRIns *ir) |
1553 | { | 2387 | { |
1554 | #if LJ_HASFFI | ||
1555 | /* HIOP is marked as a store because it needs its own DCE logic. */ | 2388 | /* HIOP is marked as a store because it needs its own DCE logic. */ |
1556 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | 2389 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ |
1557 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | 2390 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; |
2391 | #if LJ_32 && (LJ_HASFFI || LJ_SOFTFP) | ||
1558 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ | 2392 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ |
1559 | as->curins--; /* Always skip the CONV. */ | 2393 | as->curins--; /* Always skip the CONV. */ |
2394 | #if LJ_HASFFI && !LJ_SOFTFP | ||
1560 | if (usehi || uselo) | 2395 | if (usehi || uselo) |
1561 | asm_conv64(as, ir); | 2396 | asm_conv64(as, ir); |
1562 | return; | 2397 | return; |
2398 | #endif | ||
1563 | } else if ((ir-1)->o < IR_EQ) { /* 64 bit integer comparisons. ORDER IR. */ | 2399 | } else if ((ir-1)->o < IR_EQ) { /* 64 bit integer comparisons. ORDER IR. */ |
1564 | as->curins--; /* Always skip the loword comparison. */ | 2400 | as->curins--; /* Always skip the loword comparison. */ |
2401 | #if LJ_SOFTFP | ||
2402 | if (!irt_isint(ir->t)) { | ||
2403 | asm_sfpcomp(as, ir-1); | ||
2404 | return; | ||
2405 | } | ||
2406 | #endif | ||
2407 | #if LJ_HASFFI | ||
1565 | asm_comp64(as, ir); | 2408 | asm_comp64(as, ir); |
2409 | #endif | ||
1566 | return; | 2410 | return; |
1567 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ | 2411 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ |
1568 | as->curins--; /* Always skip the loword comparison. */ | 2412 | as->curins--; /* Always skip the loword comparison. */ |
2413 | #if LJ_SOFTFP | ||
2414 | if (!irt_isint(ir->t)) { | ||
2415 | asm_sfpcomp(as, ir-1); | ||
2416 | return; | ||
2417 | } | ||
2418 | #endif | ||
2419 | #if LJ_HASFFI | ||
1569 | asm_comp64eq(as, ir); | 2420 | asm_comp64eq(as, ir); |
2421 | #endif | ||
2422 | return; | ||
2423 | #if LJ_SOFTFP | ||
2424 | } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { | ||
2425 | as->curins--; /* Always skip the loword min/max. */ | ||
2426 | if (uselo || usehi) | ||
2427 | asm_sfpmin_max(as, ir-1); | ||
1570 | return; | 2428 | return; |
2429 | #endif | ||
1571 | } else if ((ir-1)->o == IR_XSTORE) { | 2430 | } else if ((ir-1)->o == IR_XSTORE) { |
1572 | as->curins--; /* Handle both stores here. */ | 2431 | as->curins--; /* Handle both stores here. */ |
1573 | if ((ir-1)->r != RID_SINK) { | 2432 | if ((ir-1)->r != RID_SINK) { |
1574 | asm_xstore(as, ir, LJ_LE ? 4 : 0); | 2433 | asm_xstore_(as, ir, LJ_LE ? 4 : 0); |
1575 | asm_xstore(as, ir-1, LJ_LE ? 0 : 4); | 2434 | asm_xstore_(as, ir-1, LJ_LE ? 0 : 4); |
1576 | } | 2435 | } |
1577 | return; | 2436 | return; |
1578 | } | 2437 | } |
2438 | #endif | ||
1579 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 2439 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
1580 | switch ((ir-1)->o) { | 2440 | switch ((ir-1)->o) { |
2441 | #if LJ_32 && LJ_HASFFI | ||
1581 | case IR_ADD: as->curins--; asm_add64(as, ir); break; | 2442 | case IR_ADD: as->curins--; asm_add64(as, ir); break; |
1582 | case IR_SUB: as->curins--; asm_sub64(as, ir); break; | 2443 | case IR_SUB: as->curins--; asm_sub64(as, ir); break; |
1583 | case IR_NEG: as->curins--; asm_neg64(as, ir); break; | 2444 | case IR_NEG: as->curins--; asm_neg64(as, ir); break; |
1584 | case IR_CALLN: | 2445 | case IR_CNEWI: |
1585 | case IR_CALLXS: | 2446 | /* Nothing to do here. Handled by lo op itself. */ |
2447 | break; | ||
2448 | #endif | ||
2449 | #if LJ_32 && LJ_SOFTFP | ||
2450 | case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
2451 | case IR_STRTO: | ||
1586 | if (!uselo) | 2452 | if (!uselo) |
1587 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ | 2453 | ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ |
1588 | break; | 2454 | break; |
1589 | case IR_CNEWI: | 2455 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF: |
1590 | /* Nothing to do here. Handled by lo op itself. */ | 2456 | /* Nothing to do here. Handled by lo op itself. */ |
1591 | break; | 2457 | break; |
1592 | default: lua_assert(0); break; | ||
1593 | } | ||
1594 | #else | ||
1595 | UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ | ||
1596 | #endif | 2458 | #endif |
2459 | case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: | ||
2460 | if (!uselo) | ||
2461 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ | ||
2462 | break; | ||
2463 | default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; | ||
2464 | } | ||
2465 | } | ||
2466 | |||
2467 | /* -- Profiling ----------------------------------------------------------- */ | ||
2468 | |||
2469 | static void asm_prof(ASMState *as, IRIns *ir) | ||
2470 | { | ||
2471 | UNUSED(ir); | ||
2472 | asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); | ||
2473 | emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE); | ||
2474 | emit_lsglptr(as, MIPSI_LBU, RID_TMP, | ||
2475 | (int32_t)offsetof(global_State, hookmask)); | ||
1597 | } | 2476 | } |
1598 | 2477 | ||
1599 | /* -- Stack handling ------------------------------------------------------ */ | 2478 | /* -- Stack handling ------------------------------------------------------ */ |
@@ -1606,47 +2485,70 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
1606 | Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; | 2485 | Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; |
1607 | ExitNo oldsnap = as->snapno; | 2486 | ExitNo oldsnap = as->snapno; |
1608 | rset_clear(allow, pbase); | 2487 | rset_clear(allow, pbase); |
2488 | #if LJ_32 | ||
1609 | tmp = allow ? rset_pickbot(allow) : | 2489 | tmp = allow ? rset_pickbot(allow) : |
1610 | (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); | 2490 | (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); |
2491 | #else | ||
2492 | tmp = allow ? rset_pickbot(allow) : RID_RET; | ||
2493 | #endif | ||
1611 | as->snapno = exitno; | 2494 | as->snapno = exitno; |
1612 | asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); | 2495 | asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); |
1613 | as->snapno = oldsnap; | 2496 | as->snapno = oldsnap; |
1614 | if (allow == RSET_EMPTY) /* Restore temp. register. */ | 2497 | if (allow == RSET_EMPTY) /* Restore temp. register. */ |
1615 | emit_tsi(as, MIPSI_LW, tmp, RID_SP, 0); | 2498 | emit_tsi(as, MIPSI_AL, tmp, RID_SP, 0); |
1616 | else | 2499 | else |
1617 | ra_modified(as, tmp); | 2500 | ra_modified(as, tmp); |
1618 | emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); | 2501 | emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); |
1619 | emit_dst(as, MIPSI_SUBU, RID_TMP, tmp, pbase); | 2502 | emit_dst(as, MIPSI_ASUBU, RID_TMP, tmp, pbase); |
1620 | emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack)); | 2503 | emit_tsi(as, MIPSI_AL, tmp, tmp, offsetof(lua_State, maxstack)); |
1621 | if (pbase == RID_TMP) | 2504 | if (pbase == RID_TMP) |
1622 | emit_getgl(as, RID_TMP, jit_base); | 2505 | emit_getgl(as, RID_TMP, jit_base); |
1623 | emit_getgl(as, tmp, jit_L); | 2506 | emit_getgl(as, tmp, cur_L); |
1624 | if (allow == RSET_EMPTY) /* Spill temp. register. */ | 2507 | if (allow == RSET_EMPTY) /* Spill temp. register. */ |
1625 | emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0); | 2508 | emit_tsi(as, MIPSI_AS, tmp, RID_SP, 0); |
1626 | } | 2509 | } |
1627 | 2510 | ||
1628 | /* Restore Lua stack from on-trace state. */ | 2511 | /* Restore Lua stack from on-trace state. */ |
1629 | static void asm_stack_restore(ASMState *as, SnapShot *snap) | 2512 | static void asm_stack_restore(ASMState *as, SnapShot *snap) |
1630 | { | 2513 | { |
1631 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 2514 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
1632 | SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; | 2515 | #if LJ_32 || defined(LUA_USE_ASSERT) |
2516 | SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; | ||
2517 | #endif | ||
1633 | MSize n, nent = snap->nent; | 2518 | MSize n, nent = snap->nent; |
1634 | /* Store the value of all modified slots to the Lua stack. */ | 2519 | /* Store the value of all modified slots to the Lua stack. */ |
1635 | for (n = 0; n < nent; n++) { | 2520 | for (n = 0; n < nent; n++) { |
1636 | SnapEntry sn = map[n]; | 2521 | SnapEntry sn = map[n]; |
1637 | BCReg s = snap_slot(sn); | 2522 | BCReg s = snap_slot(sn); |
1638 | int32_t ofs = 8*((int32_t)s-1); | 2523 | int32_t ofs = 8*((int32_t)s-1-LJ_FR2); |
1639 | IRRef ref = snap_ref(sn); | 2524 | IRRef ref = snap_ref(sn); |
1640 | IRIns *ir = IR(ref); | 2525 | IRIns *ir = IR(ref); |
1641 | if ((sn & SNAP_NORESTORE)) | 2526 | if ((sn & SNAP_NORESTORE)) |
1642 | continue; | 2527 | continue; |
1643 | if (irt_isnum(ir->t)) { | 2528 | if (irt_isnum(ir->t)) { |
2529 | #if LJ_SOFTFP32 | ||
2530 | Reg tmp; | ||
2531 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); | ||
2532 | /* LJ_SOFTFP: must be a number constant. */ | ||
2533 | lj_assertA(irref_isk(ref), "unsplit FP op"); | ||
2534 | tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); | ||
2535 | emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); | ||
2536 | if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); | ||
2537 | tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); | ||
2538 | emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); | ||
2539 | #elif LJ_SOFTFP /* && LJ_64 */ | ||
2540 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | ||
2541 | emit_tsi(as, MIPSI_SD, src, RID_BASE, ofs); | ||
2542 | #else | ||
1644 | Reg src = ra_alloc1(as, ref, RSET_FPR); | 2543 | Reg src = ra_alloc1(as, ref, RSET_FPR); |
1645 | emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); | 2544 | emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); |
2545 | #endif | ||
1646 | } else { | 2546 | } else { |
1647 | Reg type; | 2547 | #if LJ_32 |
1648 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); | 2548 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); |
1649 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); | 2549 | Reg type; |
2550 | lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), | ||
2551 | "restore of IR type %d", irt_type(ir->t)); | ||
1650 | if (!irt_ispri(ir->t)) { | 2552 | if (!irt_ispri(ir->t)) { |
1651 | Reg src = ra_alloc1(as, ref, allow); | 2553 | Reg src = ra_alloc1(as, ref, allow); |
1652 | rset_clear(allow, src); | 2554 | rset_clear(allow, src); |
@@ -1655,14 +2557,23 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
1655 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { | 2557 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
1656 | if (s == 0) continue; /* Do not overwrite link to previous frame. */ | 2558 | if (s == 0) continue; /* Do not overwrite link to previous frame. */ |
1657 | type = ra_allock(as, (int32_t)(*flinks--), allow); | 2559 | type = ra_allock(as, (int32_t)(*flinks--), allow); |
2560 | #if LJ_SOFTFP | ||
2561 | } else if ((sn & SNAP_SOFTFPNUM)) { | ||
2562 | type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); | ||
2563 | #endif | ||
2564 | } else if ((sn & SNAP_KEYINDEX)) { | ||
2565 | type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow); | ||
1658 | } else { | 2566 | } else { |
1659 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | 2567 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); |
1660 | } | 2568 | } |
1661 | emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4)); | 2569 | emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4)); |
2570 | #else | ||
2571 | asm_tvstore64(as, RID_BASE, ofs, ref); | ||
2572 | #endif | ||
1662 | } | 2573 | } |
1663 | checkmclim(as); | 2574 | checkmclim(as); |
1664 | } | 2575 | } |
1665 | lua_assert(map + nent == flinks); | 2576 | lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); |
1666 | } | 2577 | } |
1667 | 2578 | ||
1668 | /* -- GC handling --------------------------------------------------------- */ | 2579 | /* -- GC handling --------------------------------------------------------- */ |
@@ -1686,7 +2597,7 @@ static void asm_gc_check(ASMState *as) | |||
1686 | args[1] = ASMREF_TMP2; /* MSize steps */ | 2597 | args[1] = ASMREF_TMP2; /* MSize steps */ |
1687 | asm_gencall(as, ci, args); | 2598 | asm_gencall(as, ci, args); |
1688 | l_end[-3] = MIPS_NOPATCH_GC_CHECK; /* Replace the nop after the call. */ | 2599 | l_end[-3] = MIPS_NOPATCH_GC_CHECK; /* Replace the nop after the call. */ |
1689 | emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); | 2600 | emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); |
1690 | tmp = ra_releasetmp(as, ASMREF_TMP2); | 2601 | tmp = ra_releasetmp(as, ASMREF_TMP2); |
1691 | emit_loadi(as, tmp, as->gcsteps); | 2602 | emit_loadi(as, tmp, as->gcsteps); |
1692 | /* Jump around GC step if GC total < GC threshold. */ | 2603 | /* Jump around GC step if GC total < GC threshold. */ |
@@ -1714,6 +2625,12 @@ static void asm_loop_fixup(ASMState *as) | |||
1714 | } | 2625 | } |
1715 | } | 2626 | } |
1716 | 2627 | ||
2628 | /* Fixup the tail of the loop. */ | ||
2629 | static void asm_loop_tail_fixup(ASMState *as) | ||
2630 | { | ||
2631 | if (as->loopinv) as->mctop--; | ||
2632 | } | ||
2633 | |||
1717 | /* -- Head of trace ------------------------------------------------------- */ | 2634 | /* -- Head of trace ------------------------------------------------------- */ |
1718 | 2635 | ||
1719 | /* Coalesce BASE register for a root trace. */ | 2636 | /* Coalesce BASE register for a root trace. */ |
@@ -1721,7 +2638,6 @@ static void asm_head_root_base(ASMState *as) | |||
1721 | { | 2638 | { |
1722 | IRIns *ir = IR(REF_BASE); | 2639 | IRIns *ir = IR(REF_BASE); |
1723 | Reg r = ir->r; | 2640 | Reg r = ir->r; |
1724 | if (as->loopinv) as->mctop--; | ||
1725 | if (ra_hasreg(r)) { | 2641 | if (ra_hasreg(r)) { |
1726 | ra_free(as, r); | 2642 | ra_free(as, r); |
1727 | if (rset_test(as->modset, r) || irt_ismarked(ir->t)) | 2643 | if (rset_test(as->modset, r) || irt_ismarked(ir->t)) |
@@ -1736,7 +2652,6 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) | |||
1736 | { | 2652 | { |
1737 | IRIns *ir = IR(REF_BASE); | 2653 | IRIns *ir = IR(REF_BASE); |
1738 | Reg r = ir->r; | 2654 | Reg r = ir->r; |
1739 | if (as->loopinv) as->mctop--; | ||
1740 | if (ra_hasreg(r)) { | 2655 | if (ra_hasreg(r)) { |
1741 | ra_free(as, r); | 2656 | ra_free(as, r); |
1742 | if (rset_test(as->modset, r) || irt_ismarked(ir->t)) | 2657 | if (rset_test(as->modset, r) || irt_ismarked(ir->t)) |
@@ -1761,7 +2676,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |||
1761 | MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; | 2676 | MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; |
1762 | int32_t spadj = as->T->spadjust; | 2677 | int32_t spadj = as->T->spadjust; |
1763 | MCode *p = as->mctop-1; | 2678 | MCode *p = as->mctop-1; |
1764 | *p = spadj ? (MIPSI_ADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; | 2679 | *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; |
1765 | p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); | 2680 | p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); |
1766 | } | 2681 | } |
1767 | 2682 | ||
@@ -1772,139 +2687,26 @@ static void asm_tail_prep(ASMState *as) | |||
1772 | as->invmcp = as->loopref ? as->mcp : NULL; | 2687 | as->invmcp = as->loopref ? as->mcp : NULL; |
1773 | } | 2688 | } |
1774 | 2689 | ||
1775 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
1776 | |||
1777 | /* Assemble a single instruction. */ | ||
1778 | static void asm_ir(ASMState *as, IRIns *ir) | ||
1779 | { | ||
1780 | switch ((IROp)ir->o) { | ||
1781 | /* Miscellaneous ops. */ | ||
1782 | case IR_LOOP: asm_loop(as); break; | ||
1783 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
1784 | case IR_USE: | ||
1785 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
1786 | case IR_PHI: asm_phi(as, ir); break; | ||
1787 | case IR_HIOP: asm_hiop(as, ir); break; | ||
1788 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
1789 | |||
1790 | /* Guarded assertions. */ | ||
1791 | case IR_EQ: case IR_NE: asm_compeq(as, ir); break; | ||
1792 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
1793 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
1794 | case IR_ABC: | ||
1795 | asm_comp(as, ir); | ||
1796 | break; | ||
1797 | |||
1798 | case IR_RETF: asm_retf(as, ir); break; | ||
1799 | |||
1800 | /* Bit ops. */ | ||
1801 | case IR_BNOT: asm_bitnot(as, ir); break; | ||
1802 | case IR_BSWAP: asm_bitswap(as, ir); break; | ||
1803 | |||
1804 | case IR_BAND: asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI); break; | ||
1805 | case IR_BOR: asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI); break; | ||
1806 | case IR_BXOR: asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI); break; | ||
1807 | |||
1808 | case IR_BSHL: asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL); break; | ||
1809 | case IR_BSHR: asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL); break; | ||
1810 | case IR_BSAR: asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA); break; | ||
1811 | case IR_BROL: lua_assert(0); break; | ||
1812 | case IR_BROR: asm_bitror(as, ir); break; | ||
1813 | |||
1814 | /* Arithmetic ops. */ | ||
1815 | case IR_ADD: asm_add(as, ir); break; | ||
1816 | case IR_SUB: asm_sub(as, ir); break; | ||
1817 | case IR_MUL: asm_mul(as, ir); break; | ||
1818 | case IR_DIV: asm_fparith(as, ir, MIPSI_DIV_D); break; | ||
1819 | case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break; | ||
1820 | case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break; | ||
1821 | case IR_NEG: asm_neg(as, ir); break; | ||
1822 | |||
1823 | case IR_ABS: asm_fpunary(as, ir, MIPSI_ABS_D); break; | ||
1824 | case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break; | ||
1825 | case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break; | ||
1826 | case IR_MIN: asm_min_max(as, ir, 0); break; | ||
1827 | case IR_MAX: asm_min_max(as, ir, 1); break; | ||
1828 | case IR_FPMATH: | ||
1829 | if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) | ||
1830 | break; | ||
1831 | if (ir->op2 <= IRFPM_TRUNC) | ||
1832 | asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); | ||
1833 | else if (ir->op2 == IRFPM_SQRT) | ||
1834 | asm_fpunary(as, ir, MIPSI_SQRT_D); | ||
1835 | else | ||
1836 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); | ||
1837 | break; | ||
1838 | |||
1839 | /* Overflow-checking arithmetic ops. */ | ||
1840 | case IR_ADDOV: asm_arithov(as, ir); break; | ||
1841 | case IR_SUBOV: asm_arithov(as, ir); break; | ||
1842 | case IR_MULOV: asm_mulov(as, ir); break; | ||
1843 | |||
1844 | /* Memory references. */ | ||
1845 | case IR_AREF: asm_aref(as, ir); break; | ||
1846 | case IR_HREF: asm_href(as, ir); break; | ||
1847 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
1848 | case IR_NEWREF: asm_newref(as, ir); break; | ||
1849 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
1850 | case IR_FREF: asm_fref(as, ir); break; | ||
1851 | case IR_STRREF: asm_strref(as, ir); break; | ||
1852 | |||
1853 | /* Loads and stores. */ | ||
1854 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
1855 | asm_ahuvload(as, ir); | ||
1856 | break; | ||
1857 | case IR_FLOAD: asm_fload(as, ir); break; | ||
1858 | case IR_XLOAD: asm_xload(as, ir); break; | ||
1859 | case IR_SLOAD: asm_sload(as, ir); break; | ||
1860 | |||
1861 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
1862 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
1863 | case IR_XSTORE: asm_xstore(as, ir, 0); break; | ||
1864 | |||
1865 | /* Allocations. */ | ||
1866 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
1867 | case IR_TNEW: asm_tnew(as, ir); break; | ||
1868 | case IR_TDUP: asm_tdup(as, ir); break; | ||
1869 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
1870 | |||
1871 | /* Write barriers. */ | ||
1872 | case IR_TBAR: asm_tbar(as, ir); break; | ||
1873 | case IR_OBAR: asm_obar(as, ir); break; | ||
1874 | |||
1875 | /* Type conversions. */ | ||
1876 | case IR_CONV: asm_conv(as, ir); break; | ||
1877 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
1878 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
1879 | case IR_STRTO: asm_strto(as, ir); break; | ||
1880 | |||
1881 | /* Calls. */ | ||
1882 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
1883 | case IR_CALLXS: asm_callx(as, ir); break; | ||
1884 | case IR_CARG: break; | ||
1885 | |||
1886 | default: | ||
1887 | setintV(&as->J->errinfo, ir->o); | ||
1888 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1889 | break; | ||
1890 | } | ||
1891 | } | ||
1892 | |||
1893 | /* -- Trace setup --------------------------------------------------------- */ | 2690 | /* -- Trace setup --------------------------------------------------------- */ |
1894 | 2691 | ||
1895 | /* Ensure there are enough stack slots for call arguments. */ | 2692 | /* Ensure there are enough stack slots for call arguments. */ |
1896 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | 2693 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) |
1897 | { | 2694 | { |
1898 | IRRef args[CCI_NARGS_MAX*2]; | 2695 | IRRef args[CCI_NARGS_MAX*2]; |
1899 | uint32_t i, nargs = (int)CCI_NARGS(ci); | 2696 | uint32_t i, nargs = CCI_XNARGS(ci); |
2697 | #if LJ_32 | ||
1900 | int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; | 2698 | int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; |
2699 | #else | ||
2700 | int nslots = 0, ngpr = REGARG_NUMGPR; | ||
2701 | #endif | ||
1901 | asm_collectargs(as, ir, ci, args); | 2702 | asm_collectargs(as, ir, ci, args); |
1902 | for (i = 0; i < nargs; i++) { | 2703 | for (i = 0; i < nargs; i++) { |
1903 | if (args[i] && irt_isfp(IR(args[i])->t) && | 2704 | #if LJ_32 |
2705 | if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t) && | ||
1904 | nfpr > 0 && !(ci->flags & CCI_VARARG)) { | 2706 | nfpr > 0 && !(ci->flags & CCI_VARARG)) { |
1905 | nfpr--; | 2707 | nfpr--; |
1906 | ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1; | 2708 | ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1; |
1907 | } else if (args[i] && irt_isnum(IR(args[i])->t)) { | 2709 | } else if (!LJ_SOFTFP && args[i] && irt_isnum(IR(args[i])->t)) { |
1908 | nfpr = 0; | 2710 | nfpr = 0; |
1909 | ngpr = ngpr & ~1; | 2711 | ngpr = ngpr & ~1; |
1910 | if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1; | 2712 | if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1; |
@@ -1912,6 +2714,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
1912 | nfpr = 0; | 2714 | nfpr = 0; |
1913 | if (ngpr > 0) ngpr--; else nslots++; | 2715 | if (ngpr > 0) ngpr--; else nslots++; |
1914 | } | 2716 | } |
2717 | #else | ||
2718 | if (ngpr > 0) ngpr--; else nslots += 2; | ||
2719 | #endif | ||
1915 | } | 2720 | } |
1916 | if (nslots > as->evenspill) /* Leave room for args in stack slots. */ | 2721 | if (nslots > as->evenspill) /* Leave room for args in stack slots. */ |
1917 | as->evenspill = nslots; | 2722 | as->evenspill = nslots; |
@@ -1942,35 +2747,35 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | |||
1942 | if (((p[-1] ^ (px-p)) & 0xffffu) == 0 && | 2747 | if (((p[-1] ^ (px-p)) & 0xffffu) == 0 && |
1943 | ((p[-1] & 0xf0000000u) == MIPSI_BEQ || | 2748 | ((p[-1] & 0xf0000000u) == MIPSI_BEQ || |
1944 | (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ || | 2749 | (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ || |
1945 | (p[-1] & 0xffe00000u) == MIPSI_BC1F) && | 2750 | #if !LJ_TARGET_MIPSR6 |
1946 | p[-2] != MIPS_NOPATCH_GC_CHECK) { | 2751 | (p[-1] & 0xffe00000u) == MIPSI_BC1F |
2752 | #else | ||
2753 | (p[-1] & 0xff600000u) == MIPSI_BC1EQZ | ||
2754 | #endif | ||
2755 | ) && p[-2] != MIPS_NOPATCH_GC_CHECK) { | ||
1947 | ptrdiff_t delta = target - p; | 2756 | ptrdiff_t delta = target - p; |
1948 | if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ | 2757 | if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ |
1949 | patchbranch: | 2758 | patchbranch: |
1950 | p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu); | 2759 | p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu); |
1951 | *p = MIPSI_NOP; /* Replace the load of the exit number. */ | 2760 | *p = MIPSI_NOP; /* Replace the load of the exit number. */ |
1952 | cstop = p; | 2761 | cstop = p+1; |
1953 | if (!cstart) cstart = p-1; | 2762 | if (!cstart) cstart = p-1; |
1954 | } else { /* Branch out of range. Use spare jump slot in mcarea. */ | 2763 | } else { /* Branch out of range. Use spare jump slot in mcarea. */ |
1955 | int i; | 2764 | MCode *mcjump = asm_sparejump_use(mcarea, tjump); |
1956 | for (i = (int)(sizeof(MCLink)/sizeof(MCode)); | 2765 | if (mcjump) { |
1957 | i < (int)(sizeof(MCLink)/sizeof(MCode)+MIPS_SPAREJUMP*2); | 2766 | lj_mcode_sync(mcjump, mcjump+1); |
1958 | i += 2) { | 2767 | delta = mcjump - p; |
1959 | if (mcarea[i] == tjump) { | 2768 | if (((delta + 0x8000) >> 16) == 0) { |
1960 | delta = mcarea+i - p; | ||
1961 | goto patchbranch; | ||
1962 | } else if (mcarea[i] == MIPSI_NOP) { | ||
1963 | mcarea[i] = tjump; | ||
1964 | cstart = mcarea+i; | ||
1965 | delta = mcarea+i - p; | ||
1966 | goto patchbranch; | 2769 | goto patchbranch; |
2770 | } else { | ||
2771 | lj_assertJ(0, "spare jump out of range: -Osizemcode too big"); | ||
1967 | } | 2772 | } |
1968 | } | 2773 | } |
1969 | /* Ignore jump slot overflow. Child trace is simply not attached. */ | 2774 | /* Ignore jump slot overflow. Child trace is simply not attached. */ |
1970 | } | 2775 | } |
1971 | } else if (p+1 == pe) { | 2776 | } else if (p+1 == pe) { |
1972 | /* Patch NOP after code for inverted loop branch. Use of J is ok. */ | 2777 | /* Patch NOP after code for inverted loop branch. Use of J is ok. */ |
1973 | lua_assert(p[1] == MIPSI_NOP); | 2778 | lj_assertJ(p[1] == MIPSI_NOP, "expected NOP"); |
1974 | p[1] = tjump; | 2779 | p[1] = tjump; |
1975 | *p = MIPSI_NOP; /* Replace the load of the exit number. */ | 2780 | *p = MIPSI_NOP; /* Replace the load of the exit number. */ |
1976 | cstop = p+2; | 2781 | cstop = p+2; |
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 5fd35d2e..5ea4d47d 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h | |||
@@ -156,6 +156,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) | |||
156 | return ra_allock(as, ofs-(int16_t)ofs, allow); | 156 | return ra_allock(as, ofs-(int16_t)ofs, allow); |
157 | } | 157 | } |
158 | } | 158 | } |
159 | } else if (ir->o == IR_TMPREF) { | ||
160 | *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768); | ||
161 | return RID_JGL; | ||
159 | } | 162 | } |
160 | } | 163 | } |
161 | *ofsp = 0; | 164 | *ofsp = 0; |
@@ -181,7 +184,7 @@ static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref, | |||
181 | return; | 184 | return; |
182 | } | 185 | } |
183 | } else if (ir->o == IR_STRREF) { | 186 | } else if (ir->o == IR_STRREF) { |
184 | lua_assert(ofs == 0); | 187 | lj_assertA(ofs == 0, "bad usage"); |
185 | ofs = (int32_t)sizeof(GCstr); | 188 | ofs = (int32_t)sizeof(GCstr); |
186 | if (irref_isk(ir->op2)) { | 189 | if (irref_isk(ir->op2)) { |
187 | ofs += IR(ir->op2)->i; | 190 | ofs += IR(ir->op2)->i; |
@@ -226,6 +229,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, | |||
226 | emit_tab(as, pi, rt, left, right); | 229 | emit_tab(as, pi, rt, left, right); |
227 | } | 230 | } |
228 | 231 | ||
232 | #if !LJ_SOFTFP | ||
229 | /* Fuse to multiply-add/sub instruction. */ | 233 | /* Fuse to multiply-add/sub instruction. */ |
230 | static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) | 234 | static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) |
231 | { | 235 | { |
@@ -245,24 +249,30 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) | |||
245 | } | 249 | } |
246 | return 0; | 250 | return 0; |
247 | } | 251 | } |
252 | #endif | ||
248 | 253 | ||
249 | /* -- Calls --------------------------------------------------------------- */ | 254 | /* -- Calls --------------------------------------------------------------- */ |
250 | 255 | ||
251 | /* Generate a call to a C function. */ | 256 | /* Generate a call to a C function. */ |
252 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | 257 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) |
253 | { | 258 | { |
254 | uint32_t n, nargs = CCI_NARGS(ci); | 259 | uint32_t n, nargs = CCI_XNARGS(ci); |
255 | int32_t ofs = 8; | 260 | int32_t ofs = 8; |
256 | Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; | 261 | Reg gpr = REGARG_FIRSTGPR; |
262 | #if !LJ_SOFTFP | ||
263 | Reg fpr = REGARG_FIRSTFPR; | ||
264 | #endif | ||
257 | if ((void *)ci->func) | 265 | if ((void *)ci->func) |
258 | emit_call(as, (void *)ci->func); | 266 | emit_call(as, (void *)ci->func); |
259 | for (n = 0; n < nargs; n++) { /* Setup args. */ | 267 | for (n = 0; n < nargs; n++) { /* Setup args. */ |
260 | IRRef ref = args[n]; | 268 | IRRef ref = args[n]; |
261 | if (ref) { | 269 | if (ref) { |
262 | IRIns *ir = IR(ref); | 270 | IRIns *ir = IR(ref); |
271 | #if !LJ_SOFTFP | ||
263 | if (irt_isfp(ir->t)) { | 272 | if (irt_isfp(ir->t)) { |
264 | if (fpr <= REGARG_LASTFPR) { | 273 | if (fpr <= REGARG_LASTFPR) { |
265 | lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ | 274 | lj_assertA(rset_test(as->freeset, fpr), |
275 | "reg %d not free", fpr); /* Already evicted. */ | ||
266 | ra_leftov(as, fpr, ref); | 276 | ra_leftov(as, fpr, ref); |
267 | fpr++; | 277 | fpr++; |
268 | } else { | 278 | } else { |
@@ -271,9 +281,12 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
271 | emit_spstore(as, ir, r, ofs); | 281 | emit_spstore(as, ir, r, ofs); |
272 | ofs += irt_isnum(ir->t) ? 8 : 4; | 282 | ofs += irt_isnum(ir->t) ? 8 : 4; |
273 | } | 283 | } |
274 | } else { | 284 | } else |
285 | #endif | ||
286 | { | ||
275 | if (gpr <= REGARG_LASTGPR) { | 287 | if (gpr <= REGARG_LASTGPR) { |
276 | lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ | 288 | lj_assertA(rset_test(as->freeset, gpr), |
289 | "reg %d not free", gpr); /* Already evicted. */ | ||
277 | ra_leftov(as, gpr, ref); | 290 | ra_leftov(as, gpr, ref); |
278 | gpr++; | 291 | gpr++; |
279 | } else { | 292 | } else { |
@@ -290,8 +303,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
290 | } | 303 | } |
291 | checkmclim(as); | 304 | checkmclim(as); |
292 | } | 305 | } |
306 | #if !LJ_SOFTFP | ||
293 | if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ | 307 | if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ |
294 | emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); | 308 | emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); |
309 | #endif | ||
295 | } | 310 | } |
296 | 311 | ||
297 | /* Setup result reg/sp for call. Evict scratch regs. */ | 312 | /* Setup result reg/sp for call. Evict scratch regs. */ |
@@ -299,16 +314,18 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
299 | { | 314 | { |
300 | RegSet drop = RSET_SCRATCH; | 315 | RegSet drop = RSET_SCRATCH; |
301 | int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); | 316 | int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); |
317 | #if !LJ_SOFTFP | ||
302 | if ((ci->flags & CCI_NOFPRCLOBBER)) | 318 | if ((ci->flags & CCI_NOFPRCLOBBER)) |
303 | drop &= ~RSET_FPR; | 319 | drop &= ~RSET_FPR; |
320 | #endif | ||
304 | if (ra_hasreg(ir->r)) | 321 | if (ra_hasreg(ir->r)) |
305 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 322 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
306 | if (hiop && ra_hasreg((ir+1)->r)) | 323 | if (hiop && ra_hasreg((ir+1)->r)) |
307 | rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ | 324 | rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ |
308 | ra_evictset(as, drop); /* Evictions must be performed first. */ | 325 | ra_evictset(as, drop); /* Evictions must be performed first. */ |
309 | if (ra_used(ir)) { | 326 | if (ra_used(ir)) { |
310 | lua_assert(!irt_ispri(ir->t)); | 327 | lj_assertA(!irt_ispri(ir->t), "PRI dest"); |
311 | if (irt_isfp(ir->t)) { | 328 | if (!LJ_SOFTFP && irt_isfp(ir->t)) { |
312 | if ((ci->flags & CCI_CASTU64)) { | 329 | if ((ci->flags & CCI_CASTU64)) { |
313 | /* Use spill slot or temp slots. */ | 330 | /* Use spill slot or temp slots. */ |
314 | int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; | 331 | int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; |
@@ -331,15 +348,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
331 | } | 348 | } |
332 | } | 349 | } |
333 | 350 | ||
334 | static void asm_call(ASMState *as, IRIns *ir) | ||
335 | { | ||
336 | IRRef args[CCI_NARGS_MAX]; | ||
337 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
338 | asm_collectargs(as, ir, ci, args); | ||
339 | asm_setupresult(as, ir, ci); | ||
340 | asm_gencall(as, ci, args); | ||
341 | } | ||
342 | |||
343 | static void asm_callx(ASMState *as, IRIns *ir) | 351 | static void asm_callx(ASMState *as, IRIns *ir) |
344 | { | 352 | { |
345 | IRRef args[CCI_NARGS_MAX*2]; | 353 | IRRef args[CCI_NARGS_MAX*2]; |
@@ -352,7 +360,7 @@ static void asm_callx(ASMState *as, IRIns *ir) | |||
352 | func = ir->op2; irf = IR(func); | 360 | func = ir->op2; irf = IR(func); |
353 | if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } | 361 | if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } |
354 | if (irref_isk(func)) { /* Call to constant address. */ | 362 | if (irref_isk(func)) { /* Call to constant address. */ |
355 | ci.func = (ASMFunction)(void *)(irf->i); | 363 | ci.func = (ASMFunction)(void *)(intptr_t)(irf->i); |
356 | } else { /* Need a non-argument register for indirect calls. */ | 364 | } else { /* Need a non-argument register for indirect calls. */ |
357 | RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); | 365 | RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); |
358 | Reg freg = ra_alloc1(as, func, allow); | 366 | Reg freg = ra_alloc1(as, func, allow); |
@@ -363,16 +371,6 @@ static void asm_callx(ASMState *as, IRIns *ir) | |||
363 | asm_gencall(as, &ci, args); | 371 | asm_gencall(as, &ci, args); |
364 | } | 372 | } |
365 | 373 | ||
366 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | ||
367 | { | ||
368 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
369 | IRRef args[2]; | ||
370 | args[0] = ir->op1; | ||
371 | args[1] = ir->op2; | ||
372 | asm_setupresult(as, ir, ci); | ||
373 | asm_gencall(as, ci, args); | ||
374 | } | ||
375 | |||
376 | /* -- Returns ------------------------------------------------------------- */ | 374 | /* -- Returns ------------------------------------------------------------- */ |
377 | 375 | ||
378 | /* Return to lower frame. Guard that it goes to the right spot. */ | 376 | /* Return to lower frame. Guard that it goes to the right spot. */ |
@@ -380,7 +378,7 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
380 | { | 378 | { |
381 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | 379 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); |
382 | void *pc = ir_kptr(IR(ir->op2)); | 380 | void *pc = ir_kptr(IR(ir->op2)); |
383 | int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); | 381 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); |
384 | as->topslot -= (BCReg)delta; | 382 | as->topslot -= (BCReg)delta; |
385 | if ((int32_t)as->topslot < 0) as->topslot = 0; | 383 | if ((int32_t)as->topslot < 0) as->topslot = 0; |
386 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | 384 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ |
@@ -392,8 +390,24 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
392 | emit_tai(as, PPCI_LWZ, RID_TMP, base, -8); | 390 | emit_tai(as, PPCI_LWZ, RID_TMP, base, -8); |
393 | } | 391 | } |
394 | 392 | ||
393 | /* -- Buffer operations --------------------------------------------------- */ | ||
394 | |||
395 | #if LJ_HASBUFFER | ||
396 | static void asm_bufhdr_write(ASMState *as, Reg sb) | ||
397 | { | ||
398 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); | ||
399 | IRIns irgc; | ||
400 | irgc.ot = IRT(0, IRT_PGC); /* GC type. */ | ||
401 | emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); | ||
402 | emit_rot(as, PPCI_RLWIMI, RID_TMP, tmp, 0, 31-lj_fls(SBUF_MASK_FLAG), 31); | ||
403 | emit_getgl(as, RID_TMP, cur_L); | ||
404 | emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); | ||
405 | } | ||
406 | #endif | ||
407 | |||
395 | /* -- Type conversions ---------------------------------------------------- */ | 408 | /* -- Type conversions ---------------------------------------------------- */ |
396 | 409 | ||
410 | #if !LJ_SOFTFP | ||
397 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | 411 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) |
398 | { | 412 | { |
399 | RegSet allow = RSET_FPR; | 413 | RegSet allow = RSET_FPR; |
@@ -410,8 +424,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | |||
410 | emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); | 424 | emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); |
411 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | 425 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); |
412 | emit_lsptr(as, PPCI_LFS, (fbias & 31), | 426 | emit_lsptr(as, PPCI_LFS, (fbias & 31), |
413 | (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), | 427 | (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR); |
414 | RSET_GPR); | ||
415 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | 428 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); |
416 | emit_fb(as, PPCI_FCTIWZ, tmp, left); | 429 | emit_fb(as, PPCI_FCTIWZ, tmp, left); |
417 | } | 430 | } |
@@ -427,15 +440,27 @@ static void asm_tobit(ASMState *as, IRIns *ir) | |||
427 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | 440 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); |
428 | emit_fab(as, PPCI_FADD, tmp, left, right); | 441 | emit_fab(as, PPCI_FADD, tmp, left, right); |
429 | } | 442 | } |
443 | #endif | ||
430 | 444 | ||
431 | static void asm_conv(ASMState *as, IRIns *ir) | 445 | static void asm_conv(ASMState *as, IRIns *ir) |
432 | { | 446 | { |
433 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | 447 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
448 | #if !LJ_SOFTFP | ||
434 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); | 449 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); |
450 | #endif | ||
435 | IRRef lref = ir->op1; | 451 | IRRef lref = ir->op1; |
436 | lua_assert(irt_type(ir->t) != st); | 452 | /* 64 bit integer conversions are handled by SPLIT. */ |
437 | lua_assert(!(irt_isint64(ir->t) || | 453 | lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)), |
438 | (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ | 454 | "IR %04d has unsplit 64 bit type", |
455 | (int)(ir - as->ir) - REF_BIAS); | ||
456 | #if LJ_SOFTFP | ||
457 | /* FP conversions are handled by SPLIT. */ | ||
458 | lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), | ||
459 | "IR %04d has FP type", | ||
460 | (int)(ir - as->ir) - REF_BIAS); | ||
461 | /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ | ||
462 | #else | ||
463 | lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); | ||
439 | if (irt_isfp(ir->t)) { | 464 | if (irt_isfp(ir->t)) { |
440 | Reg dest = ra_dest(as, ir, RSET_FPR); | 465 | Reg dest = ra_dest(as, ir, RSET_FPR); |
441 | if (stfp) { /* FP to FP conversion. */ | 466 | if (stfp) { /* FP to FP conversion. */ |
@@ -450,13 +475,11 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
450 | Reg left = ra_alloc1(as, lref, allow); | 475 | Reg left = ra_alloc1(as, lref, allow); |
451 | Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); | 476 | Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); |
452 | Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); | 477 | Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); |
453 | const float *kbias; | ||
454 | if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); | 478 | if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); |
455 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); | 479 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); |
456 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); | 480 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); |
457 | kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000)); | 481 | emit_lsptr(as, PPCI_LFS, (fbias & 31), |
458 | if (st == IRT_U32) kbias++; | 482 | &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31], |
459 | emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias, | ||
460 | rset_clear(allow, hibias)); | 483 | rset_clear(allow, hibias)); |
461 | emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, | 484 | emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, |
462 | RID_SP, SPOFS_TMPLO); | 485 | RID_SP, SPOFS_TMPLO); |
@@ -466,7 +489,8 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
466 | } else if (stfp) { /* FP to integer conversion. */ | 489 | } else if (stfp) { /* FP to integer conversion. */ |
467 | if (irt_isguard(ir->t)) { | 490 | if (irt_isguard(ir->t)) { |
468 | /* Checked conversions are only supported from number to int. */ | 491 | /* Checked conversions are only supported from number to int. */ |
469 | lua_assert(irt_isint(ir->t) && st == IRT_NUM); | 492 | lj_assertA(irt_isint(ir->t) && st == IRT_NUM, |
493 | "bad type for checked CONV"); | ||
470 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | 494 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); |
471 | } else { | 495 | } else { |
472 | Reg dest = ra_dest(as, ir, RSET_GPR); | 496 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -489,19 +513,20 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
489 | emit_fb(as, PPCI_FCTIWZ, tmp, tmp); | 513 | emit_fb(as, PPCI_FCTIWZ, tmp, tmp); |
490 | emit_fab(as, PPCI_FSUB, tmp, left, tmp); | 514 | emit_fab(as, PPCI_FSUB, tmp, left, tmp); |
491 | emit_lsptr(as, PPCI_LFS, (tmp & 31), | 515 | emit_lsptr(as, PPCI_LFS, (tmp & 31), |
492 | (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)), | 516 | (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); |
493 | RSET_GPR); | ||
494 | } else { | 517 | } else { |
495 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | 518 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); |
496 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | 519 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); |
497 | emit_fb(as, PPCI_FCTIWZ, tmp, left); | 520 | emit_fb(as, PPCI_FCTIWZ, tmp, left); |
498 | } | 521 | } |
499 | } | 522 | } |
500 | } else { | 523 | } else |
524 | #endif | ||
525 | { | ||
501 | Reg dest = ra_dest(as, ir, RSET_GPR); | 526 | Reg dest = ra_dest(as, ir, RSET_GPR); |
502 | if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | 527 | if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ |
503 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | 528 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); |
504 | lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); | 529 | lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); |
505 | if ((ir->op2 & IRCONV_SEXT)) | 530 | if ((ir->op2 & IRCONV_SEXT)) |
506 | emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left); | 531 | emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left); |
507 | else | 532 | else |
@@ -513,90 +538,102 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
513 | } | 538 | } |
514 | } | 539 | } |
515 | 540 | ||
516 | #if LJ_HASFFI | ||
517 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
518 | { | ||
519 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
520 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
521 | IRCallID id; | ||
522 | const CCallInfo *ci; | ||
523 | IRRef args[2]; | ||
524 | args[0] = ir->op1; | ||
525 | args[1] = (ir-1)->op1; | ||
526 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
527 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
528 | ir--; | ||
529 | } else { | ||
530 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
531 | } | ||
532 | ci = &lj_ir_callinfo[id]; | ||
533 | asm_setupresult(as, ir, ci); | ||
534 | asm_gencall(as, ci, args); | ||
535 | } | ||
536 | #endif | ||
537 | |||
538 | static void asm_strto(ASMState *as, IRIns *ir) | 541 | static void asm_strto(ASMState *as, IRIns *ir) |
539 | { | 542 | { |
540 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; | 543 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; |
541 | IRRef args[2]; | 544 | IRRef args[2]; |
542 | int32_t ofs; | 545 | int32_t ofs = SPOFS_TMP; |
546 | #if LJ_SOFTFP | ||
547 | ra_evictset(as, RSET_SCRATCH); | ||
548 | if (ra_used(ir)) { | ||
549 | if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && | ||
550 | (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { | ||
551 | int i; | ||
552 | for (i = 0; i < 2; i++) { | ||
553 | Reg r = (ir+i)->r; | ||
554 | if (ra_hasreg(r)) { | ||
555 | ra_free(as, r); | ||
556 | ra_modified(as, r); | ||
557 | emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); | ||
558 | } | ||
559 | } | ||
560 | ofs = sps_scale(ir->s & ~1); | ||
561 | } else { | ||
562 | Reg rhi = ra_dest(as, ir+1, RSET_GPR); | ||
563 | Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); | ||
564 | emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs); | ||
565 | emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4); | ||
566 | } | ||
567 | } | ||
568 | #else | ||
543 | RegSet drop = RSET_SCRATCH; | 569 | RegSet drop = RSET_SCRATCH; |
544 | if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ | 570 | if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ |
545 | ra_evictset(as, drop); | 571 | ra_evictset(as, drop); |
572 | if (ir->s) ofs = sps_scale(ir->s); | ||
573 | #endif | ||
546 | asm_guardcc(as, CC_EQ); | 574 | asm_guardcc(as, CC_EQ); |
547 | emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ | 575 | emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ |
548 | args[0] = ir->op1; /* GCstr *str */ | 576 | args[0] = ir->op1; /* GCstr *str */ |
549 | args[1] = ASMREF_TMP1; /* TValue *n */ | 577 | args[1] = ASMREF_TMP1; /* TValue *n */ |
550 | asm_gencall(as, ci, args); | 578 | asm_gencall(as, ci, args); |
551 | /* Store the result to the spill slot or temp slots. */ | 579 | /* Store the result to the spill slot or temp slots. */ |
552 | ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; | ||
553 | emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); | 580 | emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); |
554 | } | 581 | } |
555 | 582 | ||
583 | /* -- Memory references --------------------------------------------------- */ | ||
584 | |||
556 | /* Get pointer to TValue. */ | 585 | /* Get pointer to TValue. */ |
557 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | 586 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) |
558 | { | 587 | { |
559 | IRIns *ir = IR(ref); | 588 | int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768); |
560 | if (irt_isnum(ir->t)) { | 589 | if ((mode & IRTMPREF_IN1)) { |
561 | if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ | 590 | IRIns *ir = IR(ref); |
562 | ra_allockreg(as, i32ptr(ir_knum(ir)), dest); | 591 | if (irt_isnum(ir->t)) { |
563 | else /* Otherwise force a spill and use the spill slot. */ | 592 | if ((mode & IRTMPREF_OUT1)) { |
564 | emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir)); | 593 | #if LJ_SOFTFP |
565 | } else { | 594 | lj_assertA(irref_isk(ref), "unsplit FP op"); |
566 | /* Otherwise use g->tmptv to hold the TValue. */ | 595 | emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); |
567 | RegSet allow = rset_exclude(RSET_GPR, dest); | 596 | emit_setgl(as, |
568 | Reg type; | 597 | ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR), |
569 | emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768); | 598 | tmptv.u32.lo); |
570 | if (!irt_ispri(ir->t)) { | 599 | emit_setgl(as, |
571 | Reg src = ra_alloc1(as, ref, allow); | 600 | ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR), |
572 | emit_setgl(as, src, tmptv.gcr); | 601 | tmptv.u32.hi); |
602 | #else | ||
603 | Reg src = ra_alloc1(as, ref, RSET_FPR); | ||
604 | emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); | ||
605 | emit_fai(as, PPCI_STFD, src, RID_JGL, tmpofs); | ||
606 | #endif | ||
607 | } else if (irref_isk(ref)) { | ||
608 | /* Use the number constant itself as a TValue. */ | ||
609 | ra_allockreg(as, i32ptr(ir_knum(ir)), dest); | ||
610 | } else { | ||
611 | #if LJ_SOFTFP | ||
612 | lj_assertA(0, "unsplit FP op"); | ||
613 | #else | ||
614 | /* Otherwise force a spill and use the spill slot. */ | ||
615 | emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir)); | ||
616 | #endif | ||
617 | } | ||
618 | } else { | ||
619 | /* Otherwise use g->tmptv to hold the TValue. */ | ||
620 | Reg type; | ||
621 | emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); | ||
622 | if (!irt_ispri(ir->t)) { | ||
623 | Reg src = ra_alloc1(as, ref, RSET_GPR); | ||
624 | emit_setgl(as, src, tmptv.gcr); | ||
625 | } | ||
626 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)) | ||
627 | type = ra_alloc1(as, ref+1, RSET_GPR); | ||
628 | else | ||
629 | type = ra_allock(as, irt_toitype(ir->t), RSET_GPR); | ||
630 | emit_setgl(as, type, tmptv.it); | ||
573 | } | 631 | } |
574 | type = ra_allock(as, irt_toitype(ir->t), allow); | ||
575 | emit_setgl(as, type, tmptv.it); | ||
576 | } | ||
577 | } | ||
578 | |||
579 | static void asm_tostr(ASMState *as, IRIns *ir) | ||
580 | { | ||
581 | IRRef args[2]; | ||
582 | args[0] = ASMREF_L; | ||
583 | as->gcsteps++; | ||
584 | if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) { | ||
585 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; | ||
586 | args[1] = ASMREF_TMP1; /* const lua_Number * */ | ||
587 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
588 | asm_gencall(as, ci, args); | ||
589 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); | ||
590 | } else { | 632 | } else { |
591 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; | 633 | emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); |
592 | args[1] = ir->op1; /* int32_t k */ | ||
593 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
594 | asm_gencall(as, ci, args); | ||
595 | } | 634 | } |
596 | } | 635 | } |
597 | 636 | ||
598 | /* -- Memory references --------------------------------------------------- */ | ||
599 | |||
600 | static void asm_aref(ASMState *as, IRIns *ir) | 637 | static void asm_aref(ASMState *as, IRIns *ir) |
601 | { | 638 | { |
602 | Reg dest = ra_dest(as, ir, RSET_GPR); | 639 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -636,11 +673,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
636 | Reg tisnum = RID_NONE, tmpnum = RID_NONE; | 673 | Reg tisnum = RID_NONE, tmpnum = RID_NONE; |
637 | IRRef refkey = ir->op2; | 674 | IRRef refkey = ir->op2; |
638 | IRIns *irkey = IR(refkey); | 675 | IRIns *irkey = IR(refkey); |
676 | int isk = irref_isk(refkey); | ||
639 | IRType1 kt = irkey->t; | 677 | IRType1 kt = irkey->t; |
640 | uint32_t khash; | 678 | uint32_t khash; |
641 | MCLabel l_end, l_loop, l_next; | 679 | MCLabel l_end, l_loop, l_next; |
642 | 680 | ||
643 | rset_clear(allow, tab); | 681 | rset_clear(allow, tab); |
682 | #if LJ_SOFTFP | ||
683 | if (!isk) { | ||
684 | key = ra_alloc1(as, refkey, allow); | ||
685 | rset_clear(allow, key); | ||
686 | if (irkey[1].o == IR_HIOP) { | ||
687 | if (ra_hasreg((irkey+1)->r)) { | ||
688 | tmpnum = (irkey+1)->r; | ||
689 | ra_noweak(as, tmpnum); | ||
690 | } else { | ||
691 | tmpnum = ra_allocref(as, refkey+1, allow); | ||
692 | } | ||
693 | rset_clear(allow, tmpnum); | ||
694 | } | ||
695 | } | ||
696 | #else | ||
644 | if (irt_isnum(kt)) { | 697 | if (irt_isnum(kt)) { |
645 | key = ra_alloc1(as, refkey, RSET_FPR); | 698 | key = ra_alloc1(as, refkey, RSET_FPR); |
646 | tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); | 699 | tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); |
@@ -650,6 +703,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
650 | key = ra_alloc1(as, refkey, allow); | 703 | key = ra_alloc1(as, refkey, allow); |
651 | rset_clear(allow, key); | 704 | rset_clear(allow, key); |
652 | } | 705 | } |
706 | #endif | ||
653 | tmp2 = ra_scratch(as, allow); | 707 | tmp2 = ra_scratch(as, allow); |
654 | rset_clear(allow, tmp2); | 708 | rset_clear(allow, tmp2); |
655 | 709 | ||
@@ -672,7 +726,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
672 | asm_guardcc(as, CC_EQ); | 726 | asm_guardcc(as, CC_EQ); |
673 | else | 727 | else |
674 | emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); | 728 | emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); |
675 | if (irt_isnum(kt)) { | 729 | if (!LJ_SOFTFP && irt_isnum(kt)) { |
676 | emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); | 730 | emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); |
677 | emit_condbranch(as, PPCI_BC, CC_GE, l_next); | 731 | emit_condbranch(as, PPCI_BC, CC_GE, l_next); |
678 | emit_ab(as, PPCI_CMPLW, tmp1, tisnum); | 732 | emit_ab(as, PPCI_CMPLW, tmp1, tisnum); |
@@ -682,7 +736,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
682 | emit_ab(as, PPCI_CMPW, tmp2, key); | 736 | emit_ab(as, PPCI_CMPW, tmp2, key); |
683 | emit_condbranch(as, PPCI_BC, CC_NE, l_next); | 737 | emit_condbranch(as, PPCI_BC, CC_NE, l_next); |
684 | } | 738 | } |
685 | emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); | 739 | if (LJ_SOFTFP && ra_hasreg(tmpnum)) |
740 | emit_ab(as, PPCI_CMPW, tmp1, tmpnum); | ||
741 | else | ||
742 | emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); | ||
686 | if (!irt_ispri(kt)) | 743 | if (!irt_ispri(kt)) |
687 | emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); | 744 | emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); |
688 | } | 745 | } |
@@ -691,35 +748,41 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
691 | (((char *)as->mcp-(char *)l_loop) & 0xffffu); | 748 | (((char *)as->mcp-(char *)l_loop) & 0xffffu); |
692 | 749 | ||
693 | /* Load main position relative to tab->node into dest. */ | 750 | /* Load main position relative to tab->node into dest. */ |
694 | khash = irref_isk(refkey) ? ir_khash(irkey) : 1; | 751 | khash = isk ? ir_khash(as, irkey) : 1; |
695 | if (khash == 0) { | 752 | if (khash == 0) { |
696 | emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); | 753 | emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); |
697 | } else { | 754 | } else { |
698 | Reg tmphash = tmp1; | 755 | Reg tmphash = tmp1; |
699 | if (irref_isk(refkey)) | 756 | if (isk) |
700 | tmphash = ra_allock(as, khash, allow); | 757 | tmphash = ra_allock(as, khash, allow); |
701 | emit_tab(as, PPCI_ADD, dest, dest, tmp1); | 758 | emit_tab(as, PPCI_ADD, dest, dest, tmp1); |
702 | emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); | 759 | emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); |
703 | emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); | 760 | emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); |
704 | emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); | 761 | emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); |
705 | emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); | 762 | emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); |
706 | if (irref_isk(refkey)) { | 763 | if (isk) { |
707 | /* Nothing to do. */ | 764 | /* Nothing to do. */ |
708 | } else if (irt_isstr(kt)) { | 765 | } else if (irt_isstr(kt)) { |
709 | emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); | 766 | emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, sid)); |
710 | } else { /* Must match with hash*() in lj_tab.c. */ | 767 | } else { /* Must match with hash*() in lj_tab.c. */ |
711 | emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1); | 768 | emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1); |
712 | emit_rotlwi(as, tmp2, tmp2, HASH_ROT3); | 769 | emit_rotlwi(as, tmp2, tmp2, HASH_ROT3); |
713 | emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); | 770 | emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); |
714 | emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); | 771 | emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); |
715 | emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); | 772 | emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); |
716 | if (irt_isnum(kt)) { | 773 | if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { |
774 | #if LJ_SOFTFP | ||
775 | emit_asb(as, PPCI_XOR, tmp2, key, tmp1); | ||
776 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); | ||
777 | emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum); | ||
778 | #else | ||
717 | int32_t ofs = ra_spill(as, irkey); | 779 | int32_t ofs = ra_spill(as, irkey); |
718 | emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); | 780 | emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); |
719 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); | 781 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); |
720 | emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); | 782 | emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); |
721 | emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); | 783 | emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); |
722 | emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); | 784 | emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); |
785 | #endif | ||
723 | } else { | 786 | } else { |
724 | emit_asb(as, PPCI_XOR, tmp2, key, tmp1); | 787 | emit_asb(as, PPCI_XOR, tmp2, key, tmp1); |
725 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); | 788 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); |
@@ -740,7 +803,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
740 | Reg node = ra_alloc1(as, ir->op1, RSET_GPR); | 803 | Reg node = ra_alloc1(as, ir->op1, RSET_GPR); |
741 | Reg key = RID_NONE, type = RID_TMP, idx = node; | 804 | Reg key = RID_NONE, type = RID_TMP, idx = node; |
742 | RegSet allow = rset_exclude(RSET_GPR, node); | 805 | RegSet allow = rset_exclude(RSET_GPR, node); |
743 | lua_assert(ofs % sizeof(Node) == 0); | 806 | lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); |
744 | if (ofs > 32736) { | 807 | if (ofs > 32736) { |
745 | idx = dest; | 808 | idx = dest; |
746 | rset_clear(allow, dest); | 809 | rset_clear(allow, dest); |
@@ -773,20 +836,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
773 | } | 836 | } |
774 | } | 837 | } |
775 | 838 | ||
776 | static void asm_newref(ASMState *as, IRIns *ir) | ||
777 | { | ||
778 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
779 | IRRef args[3]; | ||
780 | if (ir->r == RID_SINK) | ||
781 | return; | ||
782 | args[0] = ASMREF_L; /* lua_State *L */ | ||
783 | args[1] = ir->op1; /* GCtab *t */ | ||
784 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
785 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
786 | asm_gencall(as, ci, args); | ||
787 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | ||
788 | } | ||
789 | |||
790 | static void asm_uref(ASMState *as, IRIns *ir) | 839 | static void asm_uref(ASMState *as, IRIns *ir) |
791 | { | 840 | { |
792 | Reg dest = ra_dest(as, ir, RSET_GPR); | 841 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -813,7 +862,7 @@ static void asm_uref(ASMState *as, IRIns *ir) | |||
813 | static void asm_fref(ASMState *as, IRIns *ir) | 862 | static void asm_fref(ASMState *as, IRIns *ir) |
814 | { | 863 | { |
815 | UNUSED(as); UNUSED(ir); | 864 | UNUSED(as); UNUSED(ir); |
816 | lua_assert(!ra_used(ir)); | 865 | lj_assertA(!ra_used(ir), "unfused FREF"); |
817 | } | 866 | } |
818 | 867 | ||
819 | static void asm_strref(ASMState *as, IRIns *ir) | 868 | static void asm_strref(ASMState *as, IRIns *ir) |
@@ -853,26 +902,28 @@ static void asm_strref(ASMState *as, IRIns *ir) | |||
853 | 902 | ||
854 | /* -- Loads and stores ---------------------------------------------------- */ | 903 | /* -- Loads and stores ---------------------------------------------------- */ |
855 | 904 | ||
856 | static PPCIns asm_fxloadins(IRIns *ir) | 905 | static PPCIns asm_fxloadins(ASMState *as, IRIns *ir) |
857 | { | 906 | { |
907 | UNUSED(as); | ||
858 | switch (irt_type(ir->t)) { | 908 | switch (irt_type(ir->t)) { |
859 | case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */ | 909 | case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */ |
860 | case IRT_U8: return PPCI_LBZ; | 910 | case IRT_U8: return PPCI_LBZ; |
861 | case IRT_I16: return PPCI_LHA; | 911 | case IRT_I16: return PPCI_LHA; |
862 | case IRT_U16: return PPCI_LHZ; | 912 | case IRT_U16: return PPCI_LHZ; |
863 | case IRT_NUM: return PPCI_LFD; | 913 | case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_LFD; |
864 | case IRT_FLOAT: return PPCI_LFS; | 914 | case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS; |
865 | default: return PPCI_LWZ; | 915 | default: return PPCI_LWZ; |
866 | } | 916 | } |
867 | } | 917 | } |
868 | 918 | ||
869 | static PPCIns asm_fxstoreins(IRIns *ir) | 919 | static PPCIns asm_fxstoreins(ASMState *as, IRIns *ir) |
870 | { | 920 | { |
921 | UNUSED(as); | ||
871 | switch (irt_type(ir->t)) { | 922 | switch (irt_type(ir->t)) { |
872 | case IRT_I8: case IRT_U8: return PPCI_STB; | 923 | case IRT_I8: case IRT_U8: return PPCI_STB; |
873 | case IRT_I16: case IRT_U16: return PPCI_STH; | 924 | case IRT_I16: case IRT_U16: return PPCI_STH; |
874 | case IRT_NUM: return PPCI_STFD; | 925 | case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_STFD; |
875 | case IRT_FLOAT: return PPCI_STFS; | 926 | case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS; |
876 | default: return PPCI_STW; | 927 | default: return PPCI_STW; |
877 | } | 928 | } |
878 | } | 929 | } |
@@ -880,18 +931,24 @@ static PPCIns asm_fxstoreins(IRIns *ir) | |||
880 | static void asm_fload(ASMState *as, IRIns *ir) | 931 | static void asm_fload(ASMState *as, IRIns *ir) |
881 | { | 932 | { |
882 | Reg dest = ra_dest(as, ir, RSET_GPR); | 933 | Reg dest = ra_dest(as, ir, RSET_GPR); |
883 | Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); | 934 | PPCIns pi = asm_fxloadins(as, ir); |
884 | PPCIns pi = asm_fxloadins(ir); | 935 | Reg idx; |
885 | int32_t ofs; | 936 | int32_t ofs; |
886 | if (ir->op2 == IRFL_TAB_ARRAY) { | 937 | if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ |
887 | ofs = asm_fuseabase(as, ir->op1); | 938 | idx = RID_JGL; |
888 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | 939 | ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); |
889 | emit_tai(as, PPCI_ADDI, dest, idx, ofs); | 940 | } else { |
890 | return; | 941 | idx = ra_alloc1(as, ir->op1, RSET_GPR); |
942 | if (ir->op2 == IRFL_TAB_ARRAY) { | ||
943 | ofs = asm_fuseabase(as, ir->op1); | ||
944 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | ||
945 | emit_tai(as, PPCI_ADDI, dest, idx, ofs); | ||
946 | return; | ||
947 | } | ||
891 | } | 948 | } |
949 | ofs = field_ofs[ir->op2]; | ||
892 | } | 950 | } |
893 | ofs = field_ofs[ir->op2]; | 951 | lj_assertA(!irt_isi8(ir->t), "unsupported FLOAD I8"); |
894 | lua_assert(!irt_isi8(ir->t)); | ||
895 | emit_tai(as, pi, dest, idx, ofs); | 952 | emit_tai(as, pi, dest, idx, ofs); |
896 | } | 953 | } |
897 | 954 | ||
@@ -902,21 +959,22 @@ static void asm_fstore(ASMState *as, IRIns *ir) | |||
902 | IRIns *irf = IR(ir->op1); | 959 | IRIns *irf = IR(ir->op1); |
903 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); | 960 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); |
904 | int32_t ofs = field_ofs[irf->op2]; | 961 | int32_t ofs = field_ofs[irf->op2]; |
905 | PPCIns pi = asm_fxstoreins(ir); | 962 | PPCIns pi = asm_fxstoreins(as, ir); |
906 | emit_tai(as, pi, src, idx, ofs); | 963 | emit_tai(as, pi, src, idx, ofs); |
907 | } | 964 | } |
908 | } | 965 | } |
909 | 966 | ||
910 | static void asm_xload(ASMState *as, IRIns *ir) | 967 | static void asm_xload(ASMState *as, IRIns *ir) |
911 | { | 968 | { |
912 | Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | 969 | Reg dest = ra_dest(as, ir, |
913 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); | 970 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); |
971 | lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); | ||
914 | if (irt_isi8(ir->t)) | 972 | if (irt_isi8(ir->t)) |
915 | emit_as(as, PPCI_EXTSB, dest, dest); | 973 | emit_as(as, PPCI_EXTSB, dest, dest); |
916 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); | 974 | asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); |
917 | } | 975 | } |
918 | 976 | ||
919 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | 977 | static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) |
920 | { | 978 | { |
921 | IRIns *irb; | 979 | IRIns *irb; |
922 | if (ir->r == RID_SINK) | 980 | if (ir->r == RID_SINK) |
@@ -927,36 +985,54 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | |||
927 | Reg src = ra_alloc1(as, irb->op1, RSET_GPR); | 985 | Reg src = ra_alloc1(as, irb->op1, RSET_GPR); |
928 | asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); | 986 | asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); |
929 | } else { | 987 | } else { |
930 | Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | 988 | Reg src = ra_alloc1(as, ir->op2, |
931 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | 989 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); |
990 | asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, | ||
932 | rset_exclude(RSET_GPR, src), ofs); | 991 | rset_exclude(RSET_GPR, src), ofs); |
933 | } | 992 | } |
934 | } | 993 | } |
935 | 994 | ||
995 | #define asm_xstore(as, ir) asm_xstore_(as, ir, 0) | ||
996 | |||
936 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 997 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
937 | { | 998 | { |
938 | IRType1 t = ir->t; | 999 | IRType1 t = ir->t; |
939 | Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; | 1000 | Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; |
940 | RegSet allow = RSET_GPR; | 1001 | RegSet allow = RSET_GPR; |
941 | int32_t ofs = AHUREF_LSX; | 1002 | int32_t ofs = AHUREF_LSX; |
1003 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) { | ||
1004 | t.irt = IRT_NUM; | ||
1005 | if (ra_used(ir+1)) { | ||
1006 | type = ra_dest(as, ir+1, allow); | ||
1007 | rset_clear(allow, type); | ||
1008 | } | ||
1009 | ofs = 0; | ||
1010 | } | ||
942 | if (ra_used(ir)) { | 1011 | if (ra_used(ir)) { |
943 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 1012 | lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || |
944 | if (!irt_isnum(t)) ofs = 0; | 1013 | irt_isint(ir->t) || irt_isaddr(ir->t), |
945 | dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); | 1014 | "bad load type %d", irt_type(ir->t)); |
1015 | if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0; | ||
1016 | dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); | ||
946 | rset_clear(allow, dest); | 1017 | rset_clear(allow, dest); |
947 | } | 1018 | } |
948 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | 1019 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); |
1020 | if (ir->o == IR_VLOAD) { | ||
1021 | ofs = ofs != AHUREF_LSX ? ofs + 8 * ir->op2 : | ||
1022 | ir->op2 ? 8 * ir->op2 : AHUREF_LSX; | ||
1023 | } | ||
949 | if (irt_isnum(t)) { | 1024 | if (irt_isnum(t)) { |
950 | Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx)); | 1025 | Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx)); |
951 | asm_guardcc(as, CC_GE); | 1026 | asm_guardcc(as, CC_GE); |
952 | emit_ab(as, PPCI_CMPLW, type, tisnum); | 1027 | emit_ab(as, PPCI_CMPLW, type, tisnum); |
953 | if (ra_hasreg(dest)) { | 1028 | if (ra_hasreg(dest)) { |
954 | if (ofs == AHUREF_LSX) { | 1029 | if (!LJ_SOFTFP && ofs == AHUREF_LSX) { |
955 | tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, | 1030 | tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, |
956 | (idx&255)), (idx>>8))); | 1031 | (idx&255)), (idx>>8))); |
957 | emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); | 1032 | emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); |
958 | } else { | 1033 | } else { |
959 | emit_fai(as, PPCI_LFD, dest, idx, ofs); | 1034 | emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx, |
1035 | ofs+4*LJ_SOFTFP); | ||
960 | } | 1036 | } |
961 | } | 1037 | } |
962 | } else { | 1038 | } else { |
@@ -979,7 +1055,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
979 | int32_t ofs = AHUREF_LSX; | 1055 | int32_t ofs = AHUREF_LSX; |
980 | if (ir->r == RID_SINK) | 1056 | if (ir->r == RID_SINK) |
981 | return; | 1057 | return; |
982 | if (irt_isnum(ir->t)) { | 1058 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
983 | src = ra_alloc1(as, ir->op2, RSET_FPR); | 1059 | src = ra_alloc1(as, ir->op2, RSET_FPR); |
984 | } else { | 1060 | } else { |
985 | if (!irt_ispri(ir->t)) { | 1061 | if (!irt_ispri(ir->t)) { |
@@ -987,11 +1063,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
987 | rset_clear(allow, src); | 1063 | rset_clear(allow, src); |
988 | ofs = 0; | 1064 | ofs = 0; |
989 | } | 1065 | } |
990 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | 1066 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) |
1067 | type = ra_alloc1(as, (ir+1)->op2, allow); | ||
1068 | else | ||
1069 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
991 | rset_clear(allow, type); | 1070 | rset_clear(allow, type); |
992 | } | 1071 | } |
993 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | 1072 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); |
994 | if (irt_isnum(ir->t)) { | 1073 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
995 | if (ofs == AHUREF_LSX) { | 1074 | if (ofs == AHUREF_LSX) { |
996 | emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); | 1075 | emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); |
997 | emit_slwi(as, RID_TMP, (idx>>8), 3); | 1076 | emit_slwi(as, RID_TMP, (idx>>8), 3); |
@@ -1016,21 +1095,39 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1016 | IRType1 t = ir->t; | 1095 | IRType1 t = ir->t; |
1017 | Reg dest = RID_NONE, type = RID_NONE, base; | 1096 | Reg dest = RID_NONE, type = RID_NONE, base; |
1018 | RegSet allow = RSET_GPR; | 1097 | RegSet allow = RSET_GPR; |
1019 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ | 1098 | int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); |
1020 | lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); | 1099 | if (hiop) |
1021 | lua_assert(LJ_DUALNUM || | 1100 | t.irt = IRT_NUM; |
1022 | !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); | 1101 | lj_assertA(!(ir->op2 & IRSLOAD_PARENT), |
1102 | "bad parent SLOAD"); /* Handled by asm_head_side(). */ | ||
1103 | lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), | ||
1104 | "inconsistent SLOAD variant"); | ||
1105 | lj_assertA(LJ_DUALNUM || | ||
1106 | !irt_isint(t) || | ||
1107 | (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)), | ||
1108 | "bad SLOAD type"); | ||
1109 | #if LJ_SOFTFP | ||
1110 | lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), | ||
1111 | "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ | ||
1112 | if (hiop && ra_used(ir+1)) { | ||
1113 | type = ra_dest(as, ir+1, allow); | ||
1114 | rset_clear(allow, type); | ||
1115 | } | ||
1116 | #else | ||
1023 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { | 1117 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { |
1024 | dest = ra_scratch(as, RSET_FPR); | 1118 | dest = ra_scratch(as, RSET_FPR); |
1025 | asm_tointg(as, ir, dest); | 1119 | asm_tointg(as, ir, dest); |
1026 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | 1120 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ |
1027 | } else if (ra_used(ir)) { | 1121 | } else |
1028 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 1122 | #endif |
1029 | dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); | 1123 | if (ra_used(ir)) { |
1124 | lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t), | ||
1125 | "bad SLOAD type %d", irt_type(ir->t)); | ||
1126 | dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); | ||
1030 | rset_clear(allow, dest); | 1127 | rset_clear(allow, dest); |
1031 | base = ra_alloc1(as, REF_BASE, allow); | 1128 | base = ra_alloc1(as, REF_BASE, allow); |
1032 | rset_clear(allow, base); | 1129 | rset_clear(allow, base); |
1033 | if ((ir->op2 & IRSLOAD_CONVERT)) { | 1130 | if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { |
1034 | if (irt_isint(t)) { | 1131 | if (irt_isint(t)) { |
1035 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | 1132 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); |
1036 | dest = ra_scratch(as, RSET_FPR); | 1133 | dest = ra_scratch(as, RSET_FPR); |
@@ -1044,7 +1141,7 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1044 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); | 1141 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); |
1045 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); | 1142 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); |
1046 | emit_lsptr(as, PPCI_LFS, (fbias & 31), | 1143 | emit_lsptr(as, PPCI_LFS, (fbias & 31), |
1047 | (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), | 1144 | (void *)&as->J->k32[LJ_K32_2P52_2P31], |
1048 | rset_clear(allow, hibias)); | 1145 | rset_clear(allow, hibias)); |
1049 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); | 1146 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); |
1050 | emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); | 1147 | emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); |
@@ -1062,10 +1159,13 @@ dotypecheck: | |||
1062 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | 1159 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { |
1063 | Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); | 1160 | Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); |
1064 | asm_guardcc(as, CC_GE); | 1161 | asm_guardcc(as, CC_GE); |
1065 | emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); | 1162 | #if !LJ_SOFTFP |
1066 | type = RID_TMP; | 1163 | type = RID_TMP; |
1164 | #endif | ||
1165 | emit_ab(as, PPCI_CMPLW, type, tisnum); | ||
1067 | } | 1166 | } |
1068 | if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); | 1167 | if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, |
1168 | base, ofs-(LJ_SOFTFP?0:4)); | ||
1069 | } else { | 1169 | } else { |
1070 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | 1170 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { |
1071 | asm_guardcc(as, CC_NE); | 1171 | asm_guardcc(as, CC_NE); |
@@ -1083,19 +1183,16 @@ dotypecheck: | |||
1083 | static void asm_cnew(ASMState *as, IRIns *ir) | 1183 | static void asm_cnew(ASMState *as, IRIns *ir) |
1084 | { | 1184 | { |
1085 | CTState *cts = ctype_ctsG(J2G(as->J)); | 1185 | CTState *cts = ctype_ctsG(J2G(as->J)); |
1086 | CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; | 1186 | CTypeID id = (CTypeID)IR(ir->op1)->i; |
1087 | CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? | 1187 | CTSize sz; |
1088 | lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; | 1188 | CTInfo info = lj_ctype_info(cts, id, &sz); |
1089 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | 1189 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; |
1090 | IRRef args[2]; | 1190 | IRRef args[4]; |
1091 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | ||
1092 | RegSet drop = RSET_SCRATCH; | 1191 | RegSet drop = RSET_SCRATCH; |
1093 | lua_assert(sz != CTSIZE_INVALID); | 1192 | lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), |
1193 | "bad CNEW/CNEWI operands"); | ||
1094 | 1194 | ||
1095 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1096 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1097 | as->gcsteps++; | 1195 | as->gcsteps++; |
1098 | |||
1099 | if (ra_hasreg(ir->r)) | 1196 | if (ra_hasreg(ir->r)) |
1100 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 1197 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
1101 | ra_evictset(as, drop); | 1198 | ra_evictset(as, drop); |
@@ -1104,11 +1201,12 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1104 | 1201 | ||
1105 | /* Initialize immutable cdata object. */ | 1202 | /* Initialize immutable cdata object. */ |
1106 | if (ir->o == IR_CNEWI) { | 1203 | if (ir->o == IR_CNEWI) { |
1204 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | ||
1107 | int32_t ofs = sizeof(GCcdata); | 1205 | int32_t ofs = sizeof(GCcdata); |
1108 | lua_assert(sz == 4 || sz == 8); | 1206 | lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); |
1109 | if (sz == 8) { | 1207 | if (sz == 8) { |
1110 | ofs += 4; | 1208 | ofs += 4; |
1111 | lua_assert((ir+1)->o == IR_HIOP); | 1209 | lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI"); |
1112 | } | 1210 | } |
1113 | for (;;) { | 1211 | for (;;) { |
1114 | Reg r = ra_alloc1(as, ir->op2, allow); | 1212 | Reg r = ra_alloc1(as, ir->op2, allow); |
@@ -1117,18 +1215,28 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1117 | if (ofs == sizeof(GCcdata)) break; | 1215 | if (ofs == sizeof(GCcdata)) break; |
1118 | ofs -= 4; ir++; | 1216 | ofs -= 4; ir++; |
1119 | } | 1217 | } |
1218 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1219 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1220 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1221 | args[1] = ir->op1; /* CTypeID id */ | ||
1222 | args[2] = ir->op2; /* CTSize sz */ | ||
1223 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1224 | asm_gencall(as, ci, args); | ||
1225 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1226 | return; | ||
1120 | } | 1227 | } |
1228 | |||
1121 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ | 1229 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ |
1122 | emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); | 1230 | emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); |
1123 | emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); | 1231 | emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); |
1124 | emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); | 1232 | emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); |
1125 | emit_ti(as, PPCI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ | 1233 | emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ |
1234 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1235 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1126 | asm_gencall(as, ci, args); | 1236 | asm_gencall(as, ci, args); |
1127 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), | 1237 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), |
1128 | ra_releasetmp(as, ASMREF_TMP1)); | 1238 | ra_releasetmp(as, ASMREF_TMP1)); |
1129 | } | 1239 | } |
1130 | #else | ||
1131 | #define asm_cnew(as, ir) ((void)0) | ||
1132 | #endif | 1240 | #endif |
1133 | 1241 | ||
1134 | /* -- Write barriers ------------------------------------------------------ */ | 1242 | /* -- Write barriers ------------------------------------------------------ */ |
@@ -1142,7 +1250,7 @@ static void asm_tbar(ASMState *as, IRIns *ir) | |||
1142 | emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist)); | 1250 | emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist)); |
1143 | emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked)); | 1251 | emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked)); |
1144 | emit_setgl(as, tab, gc.grayagain); | 1252 | emit_setgl(as, tab, gc.grayagain); |
1145 | lua_assert(LJ_GC_BLACK == 0x04); | 1253 | lj_assertA(LJ_GC_BLACK == 0x04, "bad LJ_GC_BLACK"); |
1146 | emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */ | 1254 | emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */ |
1147 | emit_getgl(as, link, gc.grayagain); | 1255 | emit_getgl(as, link, gc.grayagain); |
1148 | emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); | 1256 | emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); |
@@ -1157,7 +1265,7 @@ static void asm_obar(ASMState *as, IRIns *ir) | |||
1157 | MCLabel l_end; | 1265 | MCLabel l_end; |
1158 | Reg obj, val, tmp; | 1266 | Reg obj, val, tmp; |
1159 | /* No need for other object barriers (yet). */ | 1267 | /* No need for other object barriers (yet). */ |
1160 | lua_assert(IR(ir->op1)->o == IR_UREFC); | 1268 | lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); |
1161 | ra_evictset(as, RSET_SCRATCH); | 1269 | ra_evictset(as, RSET_SCRATCH); |
1162 | l_end = emit_label(as); | 1270 | l_end = emit_label(as); |
1163 | args[0] = ASMREF_TMP1; /* global_State *g */ | 1271 | args[0] = ASMREF_TMP1; /* global_State *g */ |
@@ -1178,6 +1286,7 @@ static void asm_obar(ASMState *as, IRIns *ir) | |||
1178 | 1286 | ||
1179 | /* -- Arithmetic and logic operations ------------------------------------- */ | 1287 | /* -- Arithmetic and logic operations ------------------------------------- */ |
1180 | 1288 | ||
1289 | #if !LJ_SOFTFP | ||
1181 | static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) | 1290 | static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) |
1182 | { | 1291 | { |
1183 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1292 | Reg dest = ra_dest(as, ir, RSET_FPR); |
@@ -1196,31 +1305,24 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi) | |||
1196 | emit_fb(as, pi, dest, left); | 1305 | emit_fb(as, pi, dest, left); |
1197 | } | 1306 | } |
1198 | 1307 | ||
1199 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) | 1308 | static void asm_fpmath(ASMState *as, IRIns *ir) |
1200 | { | 1309 | { |
1201 | IRIns *irp = IR(ir->op1); | 1310 | if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) |
1202 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | 1311 | asm_fpunary(as, ir, PPCI_FSQRT); |
1203 | IRIns *irpp = IR(irp->op1); | 1312 | else |
1204 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | 1313 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); |
1205 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | ||
1206 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; | ||
1207 | IRRef args[2]; | ||
1208 | args[0] = irpp->op1; | ||
1209 | args[1] = irp->op2; | ||
1210 | asm_setupresult(as, ir, ci); | ||
1211 | asm_gencall(as, ci, args); | ||
1212 | return 1; | ||
1213 | } | ||
1214 | } | ||
1215 | return 0; | ||
1216 | } | 1314 | } |
1315 | #endif | ||
1217 | 1316 | ||
1218 | static void asm_add(ASMState *as, IRIns *ir) | 1317 | static void asm_add(ASMState *as, IRIns *ir) |
1219 | { | 1318 | { |
1319 | #if !LJ_SOFTFP | ||
1220 | if (irt_isnum(ir->t)) { | 1320 | if (irt_isnum(ir->t)) { |
1221 | if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) | 1321 | if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) |
1222 | asm_fparith(as, ir, PPCI_FADD); | 1322 | asm_fparith(as, ir, PPCI_FADD); |
1223 | } else { | 1323 | } else |
1324 | #endif | ||
1325 | { | ||
1224 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1326 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1225 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | 1327 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); |
1226 | PPCIns pi; | 1328 | PPCIns pi; |
@@ -1259,10 +1361,13 @@ static void asm_add(ASMState *as, IRIns *ir) | |||
1259 | 1361 | ||
1260 | static void asm_sub(ASMState *as, IRIns *ir) | 1362 | static void asm_sub(ASMState *as, IRIns *ir) |
1261 | { | 1363 | { |
1364 | #if !LJ_SOFTFP | ||
1262 | if (irt_isnum(ir->t)) { | 1365 | if (irt_isnum(ir->t)) { |
1263 | if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) | 1366 | if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) |
1264 | asm_fparith(as, ir, PPCI_FSUB); | 1367 | asm_fparith(as, ir, PPCI_FSUB); |
1265 | } else { | 1368 | } else |
1369 | #endif | ||
1370 | { | ||
1266 | PPCIns pi = PPCI_SUBF; | 1371 | PPCIns pi = PPCI_SUBF; |
1267 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1372 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1268 | Reg left, right; | 1373 | Reg left, right; |
@@ -1288,9 +1393,12 @@ static void asm_sub(ASMState *as, IRIns *ir) | |||
1288 | 1393 | ||
1289 | static void asm_mul(ASMState *as, IRIns *ir) | 1394 | static void asm_mul(ASMState *as, IRIns *ir) |
1290 | { | 1395 | { |
1396 | #if !LJ_SOFTFP | ||
1291 | if (irt_isnum(ir->t)) { | 1397 | if (irt_isnum(ir->t)) { |
1292 | asm_fparith(as, ir, PPCI_FMUL); | 1398 | asm_fparith(as, ir, PPCI_FMUL); |
1293 | } else { | 1399 | } else |
1400 | #endif | ||
1401 | { | ||
1294 | PPCIns pi = PPCI_MULLW; | 1402 | PPCIns pi = PPCI_MULLW; |
1295 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1403 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1296 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | 1404 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); |
@@ -1312,11 +1420,16 @@ static void asm_mul(ASMState *as, IRIns *ir) | |||
1312 | } | 1420 | } |
1313 | } | 1421 | } |
1314 | 1422 | ||
1423 | #define asm_fpdiv(as, ir) asm_fparith(as, ir, PPCI_FDIV) | ||
1424 | |||
1315 | static void asm_neg(ASMState *as, IRIns *ir) | 1425 | static void asm_neg(ASMState *as, IRIns *ir) |
1316 | { | 1426 | { |
1427 | #if !LJ_SOFTFP | ||
1317 | if (irt_isnum(ir->t)) { | 1428 | if (irt_isnum(ir->t)) { |
1318 | asm_fpunary(as, ir, PPCI_FNEG); | 1429 | asm_fpunary(as, ir, PPCI_FNEG); |
1319 | } else { | 1430 | } else |
1431 | #endif | ||
1432 | { | ||
1320 | Reg dest, left; | 1433 | Reg dest, left; |
1321 | PPCIns pi = PPCI_NEG; | 1434 | PPCIns pi = PPCI_NEG; |
1322 | if (as->flagmcp == as->mcp) { | 1435 | if (as->flagmcp == as->mcp) { |
@@ -1330,6 +1443,8 @@ static void asm_neg(ASMState *as, IRIns *ir) | |||
1330 | } | 1443 | } |
1331 | } | 1444 | } |
1332 | 1445 | ||
1446 | #define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS) | ||
1447 | |||
1333 | static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) | 1448 | static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) |
1334 | { | 1449 | { |
1335 | Reg dest, left, right; | 1450 | Reg dest, left, right; |
@@ -1345,6 +1460,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) | |||
1345 | emit_tab(as, pi|PPCF_DOT, dest, left, right); | 1460 | emit_tab(as, pi|PPCF_DOT, dest, left, right); |
1346 | } | 1461 | } |
1347 | 1462 | ||
1463 | #define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO) | ||
1464 | #define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO) | ||
1465 | #define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO) | ||
1466 | |||
1348 | #if LJ_HASFFI | 1467 | #if LJ_HASFFI |
1349 | static void asm_add64(ASMState *as, IRIns *ir) | 1468 | static void asm_add64(ASMState *as, IRIns *ir) |
1350 | { | 1469 | { |
@@ -1424,7 +1543,7 @@ static void asm_neg64(ASMState *as, IRIns *ir) | |||
1424 | } | 1543 | } |
1425 | #endif | 1544 | #endif |
1426 | 1545 | ||
1427 | static void asm_bitnot(ASMState *as, IRIns *ir) | 1546 | static void asm_bnot(ASMState *as, IRIns *ir) |
1428 | { | 1547 | { |
1429 | Reg dest, left, right; | 1548 | Reg dest, left, right; |
1430 | PPCIns pi = PPCI_NOR; | 1549 | PPCIns pi = PPCI_NOR; |
@@ -1451,7 +1570,7 @@ nofuse: | |||
1451 | emit_asb(as, pi, dest, left, right); | 1570 | emit_asb(as, pi, dest, left, right); |
1452 | } | 1571 | } |
1453 | 1572 | ||
1454 | static void asm_bitswap(ASMState *as, IRIns *ir) | 1573 | static void asm_bswap(ASMState *as, IRIns *ir) |
1455 | { | 1574 | { |
1456 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1575 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1457 | IRIns *irx; | 1576 | IRIns *irx; |
@@ -1472,32 +1591,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir) | |||
1472 | } | 1591 | } |
1473 | } | 1592 | } |
1474 | 1593 | ||
1475 | static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | ||
1476 | { | ||
1477 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1478 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1479 | if (irref_isk(ir->op2)) { | ||
1480 | int32_t k = IR(ir->op2)->i; | ||
1481 | Reg tmp = left; | ||
1482 | if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { | ||
1483 | if (!checku16(k)) { | ||
1484 | emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); | ||
1485 | if ((k & 0xffff) == 0) return; | ||
1486 | } | ||
1487 | emit_asi(as, pik, dest, left, k); | ||
1488 | return; | ||
1489 | } | ||
1490 | } | ||
1491 | /* May fail due to spills/restores above, but simplifies the logic. */ | ||
1492 | if (as->flagmcp == as->mcp) { | ||
1493 | as->flagmcp = NULL; | ||
1494 | as->mcp++; | ||
1495 | pi |= PPCF_DOT; | ||
1496 | } | ||
1497 | right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1498 | emit_asb(as, pi, dest, left, right); | ||
1499 | } | ||
1500 | |||
1501 | /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ | 1594 | /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ |
1502 | static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) | 1595 | static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) |
1503 | { | 1596 | { |
@@ -1528,7 +1621,7 @@ nofuse: | |||
1528 | *--as->mcp = pi | PPCF_T(left); | 1621 | *--as->mcp = pi | PPCF_T(left); |
1529 | } | 1622 | } |
1530 | 1623 | ||
1531 | static void asm_bitand(ASMState *as, IRIns *ir) | 1624 | static void asm_band(ASMState *as, IRIns *ir) |
1532 | { | 1625 | { |
1533 | Reg dest, left, right; | 1626 | Reg dest, left, right; |
1534 | IRRef lref = ir->op1; | 1627 | IRRef lref = ir->op1; |
@@ -1583,6 +1676,35 @@ static void asm_bitand(ASMState *as, IRIns *ir) | |||
1583 | emit_asb(as, PPCI_AND ^ dot, dest, left, right); | 1676 | emit_asb(as, PPCI_AND ^ dot, dest, left, right); |
1584 | } | 1677 | } |
1585 | 1678 | ||
1679 | static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | ||
1680 | { | ||
1681 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1682 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1683 | if (irref_isk(ir->op2)) { | ||
1684 | int32_t k = IR(ir->op2)->i; | ||
1685 | Reg tmp = left; | ||
1686 | if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { | ||
1687 | if (!checku16(k)) { | ||
1688 | emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); | ||
1689 | if ((k & 0xffff) == 0) return; | ||
1690 | } | ||
1691 | emit_asi(as, pik, dest, left, k); | ||
1692 | return; | ||
1693 | } | ||
1694 | } | ||
1695 | /* May fail due to spills/restores above, but simplifies the logic. */ | ||
1696 | if (as->flagmcp == as->mcp) { | ||
1697 | as->flagmcp = NULL; | ||
1698 | as->mcp++; | ||
1699 | pi |= PPCF_DOT; | ||
1700 | } | ||
1701 | right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1702 | emit_asb(as, pi, dest, left, right); | ||
1703 | } | ||
1704 | |||
1705 | #define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI) | ||
1706 | #define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI) | ||
1707 | |||
1586 | static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | 1708 | static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) |
1587 | { | 1709 | { |
1588 | Reg dest, left; | 1710 | Reg dest, left; |
@@ -1608,9 +1730,48 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | |||
1608 | } | 1730 | } |
1609 | } | 1731 | } |
1610 | 1732 | ||
1733 | #define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0) | ||
1734 | #define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1) | ||
1735 | #define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI) | ||
1736 | #define asm_brol(as, ir) \ | ||
1737 | asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \ | ||
1738 | PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) | ||
1739 | #define asm_bror(as, ir) lj_assertA(0, "unexpected BROR") | ||
1740 | |||
1741 | #if LJ_SOFTFP | ||
1742 | static void asm_sfpmin_max(ASMState *as, IRIns *ir) | ||
1743 | { | ||
1744 | CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp]; | ||
1745 | IRRef args[4]; | ||
1746 | MCLabel l_right, l_end; | ||
1747 | Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR); | ||
1748 | Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR); | ||
1749 | Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR); | ||
1750 | PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE; | ||
1751 | righthi = (lefthi >> 8); lefthi &= 255; | ||
1752 | rightlo = (leftlo >> 8); leftlo &= 255; | ||
1753 | args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; | ||
1754 | args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; | ||
1755 | l_end = emit_label(as); | ||
1756 | if (desthi != righthi) emit_mr(as, desthi, righthi); | ||
1757 | if (destlo != rightlo) emit_mr(as, destlo, rightlo); | ||
1758 | l_right = emit_label(as); | ||
1759 | if (l_end != l_right) emit_jmp(as, l_end); | ||
1760 | if (desthi != lefthi) emit_mr(as, desthi, lefthi); | ||
1761 | if (destlo != leftlo) emit_mr(as, destlo, leftlo); | ||
1762 | if (l_right == as->mcp+1) { | ||
1763 | cond ^= 4; l_right = l_end; ++as->mcp; | ||
1764 | } | ||
1765 | emit_condbranch(as, PPCI_BC, cond, l_right); | ||
1766 | ra_evictset(as, RSET_SCRATCH); | ||
1767 | emit_cmpi(as, RID_RET, 1); | ||
1768 | asm_gencall(as, &ci, args); | ||
1769 | } | ||
1770 | #endif | ||
1771 | |||
1611 | static void asm_min_max(ASMState *as, IRIns *ir, int ismax) | 1772 | static void asm_min_max(ASMState *as, IRIns *ir, int ismax) |
1612 | { | 1773 | { |
1613 | if (irt_isnum(ir->t)) { | 1774 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
1614 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1775 | Reg dest = ra_dest(as, ir, RSET_FPR); |
1615 | Reg tmp = dest; | 1776 | Reg tmp = dest; |
1616 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | 1777 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); |
@@ -1618,9 +1779,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) | |||
1618 | if (tmp == left || tmp == right) | 1779 | if (tmp == left || tmp == right) |
1619 | tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR, | 1780 | tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR, |
1620 | dest), left), right)); | 1781 | dest), left), right)); |
1621 | emit_facb(as, PPCI_FSEL, dest, tmp, | 1782 | emit_facb(as, PPCI_FSEL, dest, tmp, left, right); |
1622 | ismax ? left : right, ismax ? right : left); | 1783 | emit_fab(as, PPCI_FSUB, tmp, ismax ? left : right, ismax ? right : left); |
1623 | emit_fab(as, PPCI_FSUB, tmp, left, right); | ||
1624 | } else { | 1784 | } else { |
1625 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1785 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1626 | Reg tmp1 = RID_TMP, tmp2 = dest; | 1786 | Reg tmp1 = RID_TMP, tmp2 = dest; |
@@ -1638,6 +1798,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) | |||
1638 | } | 1798 | } |
1639 | } | 1799 | } |
1640 | 1800 | ||
1801 | #define asm_min(as, ir) asm_min_max(as, ir, 0) | ||
1802 | #define asm_max(as, ir) asm_min_max(as, ir, 1) | ||
1803 | |||
1641 | /* -- Comparisons --------------------------------------------------------- */ | 1804 | /* -- Comparisons --------------------------------------------------------- */ |
1642 | 1805 | ||
1643 | #define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ | 1806 | #define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ |
@@ -1695,7 +1858,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc) | |||
1695 | static void asm_comp(ASMState *as, IRIns *ir) | 1858 | static void asm_comp(ASMState *as, IRIns *ir) |
1696 | { | 1859 | { |
1697 | PPCCC cc = asm_compmap[ir->o]; | 1860 | PPCCC cc = asm_compmap[ir->o]; |
1698 | if (irt_isnum(ir->t)) { | 1861 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
1699 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | 1862 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); |
1700 | right = (left >> 8); left &= 255; | 1863 | right = (left >> 8); left &= 255; |
1701 | asm_guardcc(as, (cc >> 4)); | 1864 | asm_guardcc(as, (cc >> 4)); |
@@ -1714,6 +1877,46 @@ static void asm_comp(ASMState *as, IRIns *ir) | |||
1714 | } | 1877 | } |
1715 | } | 1878 | } |
1716 | 1879 | ||
1880 | #define asm_equal(as, ir) asm_comp(as, ir) | ||
1881 | |||
1882 | #if LJ_SOFTFP | ||
1883 | /* SFP comparisons. */ | ||
1884 | static void asm_sfpcomp(ASMState *as, IRIns *ir) | ||
1885 | { | ||
1886 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; | ||
1887 | RegSet drop = RSET_SCRATCH; | ||
1888 | Reg r; | ||
1889 | IRRef args[4]; | ||
1890 | args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; | ||
1891 | args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; | ||
1892 | |||
1893 | for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { | ||
1894 | if (!rset_test(as->freeset, r) && | ||
1895 | regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) | ||
1896 | rset_clear(drop, r); | ||
1897 | } | ||
1898 | ra_evictset(as, drop); | ||
1899 | asm_setupresult(as, ir, ci); | ||
1900 | switch ((IROp)ir->o) { | ||
1901 | case IR_ULT: | ||
1902 | asm_guardcc(as, CC_EQ); | ||
1903 | emit_ai(as, PPCI_CMPWI, RID_RET, 0); | ||
1904 | case IR_ULE: | ||
1905 | asm_guardcc(as, CC_EQ); | ||
1906 | emit_ai(as, PPCI_CMPWI, RID_RET, 1); | ||
1907 | break; | ||
1908 | case IR_GE: case IR_GT: | ||
1909 | asm_guardcc(as, CC_EQ); | ||
1910 | emit_ai(as, PPCI_CMPWI, RID_RET, 2); | ||
1911 | default: | ||
1912 | asm_guardcc(as, (asm_compmap[ir->o] & 0xf)); | ||
1913 | emit_ai(as, PPCI_CMPWI, RID_RET, 0); | ||
1914 | break; | ||
1915 | } | ||
1916 | asm_gencall(as, ci, args); | ||
1917 | } | ||
1918 | #endif | ||
1919 | |||
1717 | #if LJ_HASFFI | 1920 | #if LJ_HASFFI |
1718 | /* 64 bit integer comparisons. */ | 1921 | /* 64 bit integer comparisons. */ |
1719 | static void asm_comp64(ASMState *as, IRIns *ir) | 1922 | static void asm_comp64(ASMState *as, IRIns *ir) |
@@ -1738,50 +1941,87 @@ static void asm_comp64(ASMState *as, IRIns *ir) | |||
1738 | } | 1941 | } |
1739 | #endif | 1942 | #endif |
1740 | 1943 | ||
1741 | /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ | 1944 | /* -- Split register ops -------------------------------------------------- */ |
1742 | 1945 | ||
1743 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ | 1946 | /* Hiword op of a split 32/32 bit op. Previous op is be the loword op. */ |
1744 | static void asm_hiop(ASMState *as, IRIns *ir) | 1947 | static void asm_hiop(ASMState *as, IRIns *ir) |
1745 | { | 1948 | { |
1746 | #if LJ_HASFFI | ||
1747 | /* HIOP is marked as a store because it needs its own DCE logic. */ | 1949 | /* HIOP is marked as a store because it needs its own DCE logic. */ |
1748 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | 1950 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ |
1749 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | 1951 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; |
1952 | #if LJ_HASFFI || LJ_SOFTFP | ||
1750 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ | 1953 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ |
1751 | as->curins--; /* Always skip the CONV. */ | 1954 | as->curins--; /* Always skip the CONV. */ |
1955 | #if LJ_HASFFI && !LJ_SOFTFP | ||
1752 | if (usehi || uselo) | 1956 | if (usehi || uselo) |
1753 | asm_conv64(as, ir); | 1957 | asm_conv64(as, ir); |
1754 | return; | 1958 | return; |
1959 | #endif | ||
1755 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ | 1960 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ |
1756 | as->curins--; /* Always skip the loword comparison. */ | 1961 | as->curins--; /* Always skip the loword comparison. */ |
1962 | #if LJ_SOFTFP | ||
1963 | if (!irt_isint(ir->t)) { | ||
1964 | asm_sfpcomp(as, ir-1); | ||
1965 | return; | ||
1966 | } | ||
1967 | #endif | ||
1968 | #if LJ_HASFFI | ||
1757 | asm_comp64(as, ir); | 1969 | asm_comp64(as, ir); |
1970 | #endif | ||
1758 | return; | 1971 | return; |
1972 | #if LJ_SOFTFP | ||
1973 | } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { | ||
1974 | as->curins--; /* Always skip the loword min/max. */ | ||
1975 | if (uselo || usehi) | ||
1976 | asm_sfpmin_max(as, ir-1); | ||
1977 | return; | ||
1978 | #endif | ||
1759 | } else if ((ir-1)->o == IR_XSTORE) { | 1979 | } else if ((ir-1)->o == IR_XSTORE) { |
1760 | as->curins--; /* Handle both stores here. */ | 1980 | as->curins--; /* Handle both stores here. */ |
1761 | if ((ir-1)->r != RID_SINK) { | 1981 | if ((ir-1)->r != RID_SINK) { |
1762 | asm_xstore(as, ir, 0); | 1982 | asm_xstore_(as, ir, 0); |
1763 | asm_xstore(as, ir-1, 4); | 1983 | asm_xstore_(as, ir-1, 4); |
1764 | } | 1984 | } |
1765 | return; | 1985 | return; |
1766 | } | 1986 | } |
1987 | #endif | ||
1767 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 1988 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
1768 | switch ((ir-1)->o) { | 1989 | switch ((ir-1)->o) { |
1990 | #if LJ_HASFFI | ||
1769 | case IR_ADD: as->curins--; asm_add64(as, ir); break; | 1991 | case IR_ADD: as->curins--; asm_add64(as, ir); break; |
1770 | case IR_SUB: as->curins--; asm_sub64(as, ir); break; | 1992 | case IR_SUB: as->curins--; asm_sub64(as, ir); break; |
1771 | case IR_NEG: as->curins--; asm_neg64(as, ir); break; | 1993 | case IR_NEG: as->curins--; asm_neg64(as, ir); break; |
1772 | case IR_CALLN: | 1994 | case IR_CNEWI: |
1773 | case IR_CALLXS: | 1995 | /* Nothing to do here. Handled by lo op itself. */ |
1996 | break; | ||
1997 | #endif | ||
1998 | #if LJ_SOFTFP | ||
1999 | case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
2000 | case IR_STRTO: | ||
1774 | if (!uselo) | 2001 | if (!uselo) |
1775 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ | 2002 | ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ |
1776 | break; | 2003 | break; |
1777 | case IR_CNEWI: | 2004 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF: |
1778 | /* Nothing to do here. Handled by lo op itself. */ | 2005 | /* Nothing to do here. Handled by lo op itself. */ |
1779 | break; | 2006 | break; |
1780 | default: lua_assert(0); break; | ||
1781 | } | ||
1782 | #else | ||
1783 | UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ | ||
1784 | #endif | 2007 | #endif |
2008 | case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: | ||
2009 | if (!uselo) | ||
2010 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ | ||
2011 | break; | ||
2012 | default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; | ||
2013 | } | ||
2014 | } | ||
2015 | |||
2016 | /* -- Profiling ----------------------------------------------------------- */ | ||
2017 | |||
2018 | static void asm_prof(ASMState *as, IRIns *ir) | ||
2019 | { | ||
2020 | UNUSED(ir); | ||
2021 | asm_guardcc(as, CC_NE); | ||
2022 | emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE); | ||
2023 | emit_lsglptr(as, PPCI_LBZ, RID_TMP, | ||
2024 | (int32_t)offsetof(global_State, hookmask)); | ||
1785 | } | 2025 | } |
1786 | 2026 | ||
1787 | /* -- Stack handling ------------------------------------------------------ */ | 2027 | /* -- Stack handling ------------------------------------------------------ */ |
@@ -1805,7 +2045,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
1805 | emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); | 2045 | emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); |
1806 | if (pbase == RID_TMP) | 2046 | if (pbase == RID_TMP) |
1807 | emit_getgl(as, RID_TMP, jit_base); | 2047 | emit_getgl(as, RID_TMP, jit_base); |
1808 | emit_getgl(as, tmp, jit_L); | 2048 | emit_getgl(as, tmp, cur_L); |
1809 | if (allow == RSET_EMPTY) /* Spill temp. register. */ | 2049 | if (allow == RSET_EMPTY) /* Spill temp. register. */ |
1810 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); | 2050 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); |
1811 | } | 2051 | } |
@@ -1826,12 +2066,25 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
1826 | if ((sn & SNAP_NORESTORE)) | 2066 | if ((sn & SNAP_NORESTORE)) |
1827 | continue; | 2067 | continue; |
1828 | if (irt_isnum(ir->t)) { | 2068 | if (irt_isnum(ir->t)) { |
2069 | #if LJ_SOFTFP | ||
2070 | Reg tmp; | ||
2071 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); | ||
2072 | /* LJ_SOFTFP: must be a number constant. */ | ||
2073 | lj_assertA(irref_isk(ref), "unsplit FP op"); | ||
2074 | tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); | ||
2075 | emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); | ||
2076 | if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); | ||
2077 | tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); | ||
2078 | emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); | ||
2079 | #else | ||
1829 | Reg src = ra_alloc1(as, ref, RSET_FPR); | 2080 | Reg src = ra_alloc1(as, ref, RSET_FPR); |
1830 | emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); | 2081 | emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); |
2082 | #endif | ||
1831 | } else { | 2083 | } else { |
1832 | Reg type; | 2084 | Reg type; |
1833 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); | 2085 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); |
1834 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); | 2086 | lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), |
2087 | "restore of IR type %d", irt_type(ir->t)); | ||
1835 | if (!irt_ispri(ir->t)) { | 2088 | if (!irt_ispri(ir->t)) { |
1836 | Reg src = ra_alloc1(as, ref, allow); | 2089 | Reg src = ra_alloc1(as, ref, allow); |
1837 | rset_clear(allow, src); | 2090 | rset_clear(allow, src); |
@@ -1840,6 +2093,12 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
1840 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { | 2093 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
1841 | if (s == 0) continue; /* Do not overwrite link to previous frame. */ | 2094 | if (s == 0) continue; /* Do not overwrite link to previous frame. */ |
1842 | type = ra_allock(as, (int32_t)(*flinks--), allow); | 2095 | type = ra_allock(as, (int32_t)(*flinks--), allow); |
2096 | #if LJ_SOFTFP | ||
2097 | } else if ((sn & SNAP_SOFTFPNUM)) { | ||
2098 | type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); | ||
2099 | #endif | ||
2100 | } else if ((sn & SNAP_KEYINDEX)) { | ||
2101 | type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow); | ||
1843 | } else { | 2102 | } else { |
1844 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | 2103 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); |
1845 | } | 2104 | } |
@@ -1847,7 +2106,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
1847 | } | 2106 | } |
1848 | checkmclim(as); | 2107 | checkmclim(as); |
1849 | } | 2108 | } |
1850 | lua_assert(map + nent == flinks); | 2109 | lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); |
1851 | } | 2110 | } |
1852 | 2111 | ||
1853 | /* -- GC handling --------------------------------------------------------- */ | 2112 | /* -- GC handling --------------------------------------------------------- */ |
@@ -1898,6 +2157,12 @@ static void asm_loop_fixup(ASMState *as) | |||
1898 | } | 2157 | } |
1899 | } | 2158 | } |
1900 | 2159 | ||
2160 | /* Fixup the tail of the loop. */ | ||
2161 | static void asm_loop_tail_fixup(ASMState *as) | ||
2162 | { | ||
2163 | UNUSED(as); /* Nothing to do. */ | ||
2164 | } | ||
2165 | |||
1901 | /* -- Head of trace ------------------------------------------------------- */ | 2166 | /* -- Head of trace ------------------------------------------------------- */ |
1902 | 2167 | ||
1903 | /* Coalesce BASE register for a root trace. */ | 2168 | /* Coalesce BASE register for a root trace. */ |
@@ -1949,7 +2214,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |||
1949 | as->mctop = p; | 2214 | as->mctop = p; |
1950 | } else { | 2215 | } else { |
1951 | /* Patch stack adjustment. */ | 2216 | /* Patch stack adjustment. */ |
1952 | lua_assert(checki16(CFRAME_SIZE+spadj)); | 2217 | lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range"); |
1953 | p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); | 2218 | p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); |
1954 | p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; | 2219 | p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; |
1955 | } | 2220 | } |
@@ -1970,147 +2235,25 @@ static void asm_tail_prep(ASMState *as) | |||
1970 | } | 2235 | } |
1971 | } | 2236 | } |
1972 | 2237 | ||
1973 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
1974 | |||
1975 | /* Assemble a single instruction. */ | ||
1976 | static void asm_ir(ASMState *as, IRIns *ir) | ||
1977 | { | ||
1978 | switch ((IROp)ir->o) { | ||
1979 | /* Miscellaneous ops. */ | ||
1980 | case IR_LOOP: asm_loop(as); break; | ||
1981 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
1982 | case IR_USE: | ||
1983 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
1984 | case IR_PHI: asm_phi(as, ir); break; | ||
1985 | case IR_HIOP: asm_hiop(as, ir); break; | ||
1986 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
1987 | |||
1988 | /* Guarded assertions. */ | ||
1989 | case IR_EQ: case IR_NE: | ||
1990 | if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { | ||
1991 | as->curins--; | ||
1992 | asm_href(as, ir-1, (IROp)ir->o); | ||
1993 | break; | ||
1994 | } | ||
1995 | /* fallthrough */ | ||
1996 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
1997 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
1998 | case IR_ABC: | ||
1999 | asm_comp(as, ir); | ||
2000 | break; | ||
2001 | |||
2002 | case IR_RETF: asm_retf(as, ir); break; | ||
2003 | |||
2004 | /* Bit ops. */ | ||
2005 | case IR_BNOT: asm_bitnot(as, ir); break; | ||
2006 | case IR_BSWAP: asm_bitswap(as, ir); break; | ||
2007 | |||
2008 | case IR_BAND: asm_bitand(as, ir); break; | ||
2009 | case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break; | ||
2010 | case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break; | ||
2011 | |||
2012 | case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break; | ||
2013 | case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break; | ||
2014 | case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break; | ||
2015 | case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), | ||
2016 | PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break; | ||
2017 | case IR_BROR: lua_assert(0); break; | ||
2018 | |||
2019 | /* Arithmetic ops. */ | ||
2020 | case IR_ADD: asm_add(as, ir); break; | ||
2021 | case IR_SUB: asm_sub(as, ir); break; | ||
2022 | case IR_MUL: asm_mul(as, ir); break; | ||
2023 | case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break; | ||
2024 | case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break; | ||
2025 | case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break; | ||
2026 | case IR_NEG: asm_neg(as, ir); break; | ||
2027 | |||
2028 | case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break; | ||
2029 | case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break; | ||
2030 | case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break; | ||
2031 | case IR_MIN: asm_min_max(as, ir, 0); break; | ||
2032 | case IR_MAX: asm_min_max(as, ir, 1); break; | ||
2033 | case IR_FPMATH: | ||
2034 | if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) | ||
2035 | break; | ||
2036 | if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) | ||
2037 | asm_fpunary(as, ir, PPCI_FSQRT); | ||
2038 | else | ||
2039 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); | ||
2040 | break; | ||
2041 | |||
2042 | /* Overflow-checking arithmetic ops. */ | ||
2043 | case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break; | ||
2044 | case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break; | ||
2045 | case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break; | ||
2046 | |||
2047 | /* Memory references. */ | ||
2048 | case IR_AREF: asm_aref(as, ir); break; | ||
2049 | case IR_HREF: asm_href(as, ir, 0); break; | ||
2050 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
2051 | case IR_NEWREF: asm_newref(as, ir); break; | ||
2052 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
2053 | case IR_FREF: asm_fref(as, ir); break; | ||
2054 | case IR_STRREF: asm_strref(as, ir); break; | ||
2055 | |||
2056 | /* Loads and stores. */ | ||
2057 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
2058 | asm_ahuvload(as, ir); | ||
2059 | break; | ||
2060 | case IR_FLOAD: asm_fload(as, ir); break; | ||
2061 | case IR_XLOAD: asm_xload(as, ir); break; | ||
2062 | case IR_SLOAD: asm_sload(as, ir); break; | ||
2063 | |||
2064 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
2065 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
2066 | case IR_XSTORE: asm_xstore(as, ir, 0); break; | ||
2067 | |||
2068 | /* Allocations. */ | ||
2069 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
2070 | case IR_TNEW: asm_tnew(as, ir); break; | ||
2071 | case IR_TDUP: asm_tdup(as, ir); break; | ||
2072 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
2073 | |||
2074 | /* Write barriers. */ | ||
2075 | case IR_TBAR: asm_tbar(as, ir); break; | ||
2076 | case IR_OBAR: asm_obar(as, ir); break; | ||
2077 | |||
2078 | /* Type conversions. */ | ||
2079 | case IR_CONV: asm_conv(as, ir); break; | ||
2080 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
2081 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
2082 | case IR_STRTO: asm_strto(as, ir); break; | ||
2083 | |||
2084 | /* Calls. */ | ||
2085 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
2086 | case IR_CALLXS: asm_callx(as, ir); break; | ||
2087 | case IR_CARG: break; | ||
2088 | |||
2089 | default: | ||
2090 | setintV(&as->J->errinfo, ir->o); | ||
2091 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
2092 | break; | ||
2093 | } | ||
2094 | } | ||
2095 | |||
2096 | /* -- Trace setup --------------------------------------------------------- */ | 2238 | /* -- Trace setup --------------------------------------------------------- */ |
2097 | 2239 | ||
2098 | /* Ensure there are enough stack slots for call arguments. */ | 2240 | /* Ensure there are enough stack slots for call arguments. */ |
2099 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | 2241 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) |
2100 | { | 2242 | { |
2101 | IRRef args[CCI_NARGS_MAX*2]; | 2243 | IRRef args[CCI_NARGS_MAX*2]; |
2102 | uint32_t i, nargs = (int)CCI_NARGS(ci); | 2244 | uint32_t i, nargs = CCI_XNARGS(ci); |
2103 | int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; | 2245 | int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; |
2104 | asm_collectargs(as, ir, ci, args); | 2246 | asm_collectargs(as, ir, ci, args); |
2105 | for (i = 0; i < nargs; i++) | 2247 | for (i = 0; i < nargs; i++) |
2106 | if (args[i] && irt_isfp(IR(args[i])->t)) { | 2248 | if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { |
2107 | if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; | 2249 | if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; |
2108 | } else { | 2250 | } else { |
2109 | if (ngpr > 0) ngpr--; else nslots++; | 2251 | if (ngpr > 0) ngpr--; else nslots++; |
2110 | } | 2252 | } |
2111 | if (nslots > as->evenspill) /* Leave room for args in stack slots. */ | 2253 | if (nslots > as->evenspill) /* Leave room for args in stack slots. */ |
2112 | as->evenspill = nslots; | 2254 | as->evenspill = nslots; |
2113 | return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); | 2255 | return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) : |
2256 | REGSP_HINT(RID_RET); | ||
2114 | } | 2257 | } |
2115 | 2258 | ||
2116 | static void asm_setup_target(ASMState *as) | 2259 | static void asm_setup_target(ASMState *as) |
@@ -2150,7 +2293,8 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | |||
2150 | } else if ((ins & 0xfc000000u) == PPCI_B && | 2293 | } else if ((ins & 0xfc000000u) == PPCI_B && |
2151 | ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) { | 2294 | ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) { |
2152 | ptrdiff_t delta = (char *)target - (char *)p; | 2295 | ptrdiff_t delta = (char *)target - (char *)p; |
2153 | lua_assert(((delta + 0x02000000) >> 26) == 0); | 2296 | lj_assertJ(((delta + 0x02000000) >> 26) == 0, |
2297 | "branch target out of range"); | ||
2154 | *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu); | 2298 | *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu); |
2155 | if (!cstart) cstart = p; | 2299 | if (!cstart) cstart = p; |
2156 | } | 2300 | } |
@@ -2158,7 +2302,8 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | |||
2158 | /* Always patch long-range branch in exit stub itself. Except, if we can't. */ | 2302 | /* Always patch long-range branch in exit stub itself. Except, if we can't. */ |
2159 | if (patchlong) { | 2303 | if (patchlong) { |
2160 | ptrdiff_t delta = (char *)target - (char *)px - clearso; | 2304 | ptrdiff_t delta = (char *)target - (char *)px - clearso; |
2161 | lua_assert(((delta + 0x02000000) >> 26) == 0); | 2305 | lj_assertJ(((delta + 0x02000000) >> 26) == 0, |
2306 | "branch target out of range"); | ||
2162 | *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu); | 2307 | *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu); |
2163 | } | 2308 | } |
2164 | if (!cstart) cstart = px; | 2309 | if (!cstart) cstart = px; |
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 8b529086..1ef7c38f 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
@@ -21,15 +21,17 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) | |||
21 | } | 21 | } |
22 | /* Push the high byte of the exitno for each exit stub group. */ | 22 | /* Push the high byte of the exitno for each exit stub group. */ |
23 | *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8); | 23 | *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8); |
24 | #if !LJ_GC64 | ||
24 | /* Store DISPATCH at original stack slot 0. Account for the two push ops. */ | 25 | /* Store DISPATCH at original stack slot 0. Account for the two push ops. */ |
25 | *mxp++ = XI_MOVmi; | 26 | *mxp++ = XI_MOVmi; |
26 | *mxp++ = MODRM(XM_OFS8, 0, RID_ESP); | 27 | *mxp++ = MODRM(XM_OFS8, 0, RID_ESP); |
27 | *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP); | 28 | *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP); |
28 | *mxp++ = 2*sizeof(void *); | 29 | *mxp++ = 2*sizeof(void *); |
29 | *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; | 30 | *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; |
31 | #endif | ||
30 | /* Jump to exit handler which fills in the ExitState. */ | 32 | /* Jump to exit handler which fills in the ExitState. */ |
31 | *mxp++ = XI_JMP; mxp += 4; | 33 | *mxp++ = XI_JMP; mxp += 4; |
32 | *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler); | 34 | *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, (MCode *)(void *)lj_vm_exit_handler); |
33 | /* Commit the code for this group (even if assembly fails later on). */ | 35 | /* Commit the code for this group (even if assembly fails later on). */ |
34 | lj_mcode_commitbot(as->J, mxp); | 36 | lj_mcode_commitbot(as->J, mxp); |
35 | as->mcbot = mxp; | 37 | as->mcbot = mxp; |
@@ -58,14 +60,18 @@ static void asm_guardcc(ASMState *as, int cc) | |||
58 | MCode *p = as->mcp; | 60 | MCode *p = as->mcp; |
59 | if (LJ_UNLIKELY(p == as->invmcp)) { | 61 | if (LJ_UNLIKELY(p == as->invmcp)) { |
60 | as->loopinv = 1; | 62 | as->loopinv = 1; |
61 | *(int32_t *)(p+1) = jmprel(p+5, target); | 63 | *(int32_t *)(p+1) = jmprel(as->J, p+5, target); |
62 | target = p; | 64 | target = p; |
63 | cc ^= 1; | 65 | cc ^= 1; |
64 | if (as->realign) { | 66 | if (as->realign) { |
67 | if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP)) | ||
68 | as->mrm.ofs += 2; /* Fixup RIP offset for pending fused load. */ | ||
65 | emit_sjcc(as, cc, target); | 69 | emit_sjcc(as, cc, target); |
66 | return; | 70 | return; |
67 | } | 71 | } |
68 | } | 72 | } |
73 | if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP)) | ||
74 | as->mrm.ofs += 6; /* Fixup RIP offset for pending fused load. */ | ||
69 | emit_jcc(as, cc, target); | 75 | emit_jcc(as, cc, target); |
70 | } | 76 | } |
71 | 77 | ||
@@ -79,6 +85,15 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) | |||
79 | { | 85 | { |
80 | if (irref_isk(ref)) { | 86 | if (irref_isk(ref)) { |
81 | IRIns *ir = IR(ref); | 87 | IRIns *ir = IR(ref); |
88 | #if LJ_GC64 | ||
89 | if (ir->o == IR_KNULL || !irt_is64(ir->t)) { | ||
90 | *k = ir->i; | ||
91 | return 1; | ||
92 | } else if (checki32((int64_t)ir_k64(ir)->u64)) { | ||
93 | *k = (int32_t)ir_k64(ir)->u64; | ||
94 | return 1; | ||
95 | } | ||
96 | #else | ||
82 | if (ir->o != IR_KINT64) { | 97 | if (ir->o != IR_KINT64) { |
83 | *k = ir->i; | 98 | *k = ir->i; |
84 | return 1; | 99 | return 1; |
@@ -86,6 +101,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) | |||
86 | *k = (int32_t)ir_kint64(ir)->u64; | 101 | *k = (int32_t)ir_kint64(ir)->u64; |
87 | return 1; | 102 | return 1; |
88 | } | 103 | } |
104 | #endif | ||
89 | } | 105 | } |
90 | return 0; | 106 | return 0; |
91 | } | 107 | } |
@@ -115,7 +131,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref) | |||
115 | as->mrm.ofs = 0; | 131 | as->mrm.ofs = 0; |
116 | if (irb->o == IR_FLOAD) { | 132 | if (irb->o == IR_FLOAD) { |
117 | IRIns *ira = IR(irb->op1); | 133 | IRIns *ira = IR(irb->op1); |
118 | lua_assert(irb->op2 == IRFL_TAB_ARRAY); | 134 | lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY"); |
119 | /* We can avoid the FLOAD of t->array for colocated arrays. */ | 135 | /* We can avoid the FLOAD of t->array for colocated arrays. */ |
120 | if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && | 136 | if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && |
121 | !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) { | 137 | !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) { |
@@ -134,7 +150,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref) | |||
134 | static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow) | 150 | static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow) |
135 | { | 151 | { |
136 | IRIns *irx; | 152 | IRIns *irx; |
137 | lua_assert(ir->o == IR_AREF); | 153 | lj_assertA(ir->o == IR_AREF, "expected AREF"); |
138 | as->mrm.base = (uint8_t)ra_alloc1(as, asm_fuseabase(as, ir->op1), allow); | 154 | as->mrm.base = (uint8_t)ra_alloc1(as, asm_fuseabase(as, ir->op1), allow); |
139 | irx = IR(ir->op2); | 155 | irx = IR(ir->op2); |
140 | if (irref_isk(ir->op2)) { | 156 | if (irref_isk(ir->op2)) { |
@@ -185,14 +201,32 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) | |||
185 | if (irref_isk(ir->op1)) { | 201 | if (irref_isk(ir->op1)) { |
186 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | 202 | GCfunc *fn = ir_kfunc(IR(ir->op1)); |
187 | GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; | 203 | GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; |
204 | #if LJ_GC64 | ||
205 | int64_t ofs = dispofs(as, &uv->tv); | ||
206 | if (checki32(ofs) && checki32(ofs+4)) { | ||
207 | as->mrm.ofs = (int32_t)ofs; | ||
208 | as->mrm.base = RID_DISPATCH; | ||
209 | as->mrm.idx = RID_NONE; | ||
210 | return; | ||
211 | } | ||
212 | #else | ||
188 | as->mrm.ofs = ptr2addr(&uv->tv); | 213 | as->mrm.ofs = ptr2addr(&uv->tv); |
189 | as->mrm.base = as->mrm.idx = RID_NONE; | 214 | as->mrm.base = as->mrm.idx = RID_NONE; |
190 | return; | 215 | return; |
216 | #endif | ||
191 | } | 217 | } |
192 | break; | 218 | break; |
219 | case IR_TMPREF: | ||
220 | #if LJ_GC64 | ||
221 | as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->tmptv); | ||
222 | as->mrm.base = RID_DISPATCH; | ||
223 | as->mrm.idx = RID_NONE; | ||
224 | #else | ||
225 | as->mrm.ofs = igcptr(&J2G(as->J)->tmptv); | ||
226 | as->mrm.base = as->mrm.idx = RID_NONE; | ||
227 | #endif | ||
228 | return; | ||
193 | default: | 229 | default: |
194 | lua_assert(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO || | ||
195 | ir->o == IR_KKPTR); | ||
196 | break; | 230 | break; |
197 | } | 231 | } |
198 | } | 232 | } |
@@ -204,26 +238,53 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) | |||
204 | /* Fuse FLOAD/FREF reference into memory operand. */ | 238 | /* Fuse FLOAD/FREF reference into memory operand. */ |
205 | static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) | 239 | static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) |
206 | { | 240 | { |
207 | lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); | 241 | lj_assertA(ir->o == IR_FLOAD || ir->o == IR_FREF, |
208 | as->mrm.ofs = field_ofs[ir->op2]; | 242 | "bad IR op %d", ir->o); |
209 | as->mrm.idx = RID_NONE; | 243 | as->mrm.idx = RID_NONE; |
244 | if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ | ||
245 | #if LJ_GC64 | ||
246 | as->mrm.ofs = (int32_t)(ir->op2 << 2) - GG_OFS(dispatch); | ||
247 | as->mrm.base = RID_DISPATCH; | ||
248 | #else | ||
249 | as->mrm.ofs = (int32_t)(ir->op2 << 2) + ptr2addr(J2GG(as->J)); | ||
250 | as->mrm.base = RID_NONE; | ||
251 | #endif | ||
252 | return; | ||
253 | } | ||
254 | as->mrm.ofs = field_ofs[ir->op2]; | ||
210 | if (irref_isk(ir->op1)) { | 255 | if (irref_isk(ir->op1)) { |
211 | as->mrm.ofs += IR(ir->op1)->i; | 256 | IRIns *op1 = IR(ir->op1); |
257 | #if LJ_GC64 | ||
258 | if (ir->op1 == REF_NIL) { | ||
259 | as->mrm.ofs -= GG_OFS(dispatch); | ||
260 | as->mrm.base = RID_DISPATCH; | ||
261 | return; | ||
262 | } else if (op1->o == IR_KPTR || op1->o == IR_KKPTR) { | ||
263 | intptr_t ofs = dispofs(as, ir_kptr(op1)); | ||
264 | if (checki32(as->mrm.ofs + ofs)) { | ||
265 | as->mrm.ofs += (int32_t)ofs; | ||
266 | as->mrm.base = RID_DISPATCH; | ||
267 | return; | ||
268 | } | ||
269 | } | ||
270 | #else | ||
271 | as->mrm.ofs += op1->i; | ||
212 | as->mrm.base = RID_NONE; | 272 | as->mrm.base = RID_NONE; |
213 | } else { | 273 | return; |
214 | as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); | 274 | #endif |
215 | } | 275 | } |
276 | as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); | ||
216 | } | 277 | } |
217 | 278 | ||
218 | /* Fuse string reference into memory operand. */ | 279 | /* Fuse string reference into memory operand. */ |
219 | static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) | 280 | static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) |
220 | { | 281 | { |
221 | IRIns *irr; | 282 | IRIns *irr; |
222 | lua_assert(ir->o == IR_STRREF); | 283 | lj_assertA(ir->o == IR_STRREF, "bad IR op %d", ir->o); |
223 | as->mrm.base = as->mrm.idx = RID_NONE; | 284 | as->mrm.base = as->mrm.idx = RID_NONE; |
224 | as->mrm.scale = XM_SCALE1; | 285 | as->mrm.scale = XM_SCALE1; |
225 | as->mrm.ofs = sizeof(GCstr); | 286 | as->mrm.ofs = sizeof(GCstr); |
226 | if (irref_isk(ir->op1)) { | 287 | if (!LJ_GC64 && irref_isk(ir->op1)) { |
227 | as->mrm.ofs += IR(ir->op1)->i; | 288 | as->mrm.ofs += IR(ir->op1)->i; |
228 | } else { | 289 | } else { |
229 | Reg r = ra_alloc1(as, ir->op1, allow); | 290 | Reg r = ra_alloc1(as, ir->op1, allow); |
@@ -255,10 +316,20 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) | |||
255 | IRIns *ir = IR(ref); | 316 | IRIns *ir = IR(ref); |
256 | as->mrm.idx = RID_NONE; | 317 | as->mrm.idx = RID_NONE; |
257 | if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { | 318 | if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { |
319 | #if LJ_GC64 | ||
320 | intptr_t ofs = dispofs(as, ir_kptr(ir)); | ||
321 | if (checki32(ofs)) { | ||
322 | as->mrm.ofs = (int32_t)ofs; | ||
323 | as->mrm.base = RID_DISPATCH; | ||
324 | return; | ||
325 | } | ||
326 | } if (0) { | ||
327 | #else | ||
258 | as->mrm.ofs = ir->i; | 328 | as->mrm.ofs = ir->i; |
259 | as->mrm.base = RID_NONE; | 329 | as->mrm.base = RID_NONE; |
260 | } else if (ir->o == IR_STRREF) { | 330 | } else if (ir->o == IR_STRREF) { |
261 | asm_fusestrref(as, ir, allow); | 331 | asm_fusestrref(as, ir, allow); |
332 | #endif | ||
262 | } else { | 333 | } else { |
263 | as->mrm.ofs = 0; | 334 | as->mrm.ofs = 0; |
264 | if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) { | 335 | if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) { |
@@ -301,7 +372,47 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) | |||
301 | } | 372 | } |
302 | } | 373 | } |
303 | 374 | ||
304 | /* Fuse load into memory operand. */ | 375 | /* Fuse load of 64 bit IR constant into memory operand. */ |
376 | static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) | ||
377 | { | ||
378 | const uint64_t *k = &ir_k64(ir)->u64; | ||
379 | if (!LJ_GC64 || checki32((intptr_t)k)) { | ||
380 | as->mrm.ofs = ptr2addr(k); | ||
381 | as->mrm.base = RID_NONE; | ||
382 | #if LJ_GC64 | ||
383 | } else if (checki32(dispofs(as, k))) { | ||
384 | as->mrm.ofs = (int32_t)dispofs(as, k); | ||
385 | as->mrm.base = RID_DISPATCH; | ||
386 | } else if (checki32(mcpofs(as, k)) && checki32(mcpofs(as, k+1)) && | ||
387 | checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) { | ||
388 | as->mrm.ofs = (int32_t)mcpofs(as, k); | ||
389 | as->mrm.base = RID_RIP; | ||
390 | } else { /* Intern 64 bit constant at bottom of mcode. */ | ||
391 | if (ir->i) { | ||
392 | lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i), | ||
393 | "bad interned 64 bit constant"); | ||
394 | } else { | ||
395 | while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; | ||
396 | *(uint64_t*)as->mcbot = *k; | ||
397 | ir->i = (int32_t)(as->mctop - as->mcbot); | ||
398 | as->mcbot += 8; | ||
399 | as->mclim = as->mcbot + MCLIM_REDZONE; | ||
400 | lj_mcode_commitbot(as->J, as->mcbot); | ||
401 | } | ||
402 | as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i); | ||
403 | as->mrm.base = RID_RIP; | ||
404 | #endif | ||
405 | } | ||
406 | as->mrm.idx = RID_NONE; | ||
407 | return RID_MRM; | ||
408 | } | ||
409 | |||
410 | /* Fuse load into memory operand. | ||
411 | ** | ||
412 | ** Important caveat: this may emit RIP-relative loads! So don't place any | ||
413 | ** code emitters between this function and the use of its result. | ||
414 | ** The only permitted exception is asm_guardcc(). | ||
415 | */ | ||
305 | static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | 416 | static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) |
306 | { | 417 | { |
307 | IRIns *ir = IR(ref); | 418 | IRIns *ir = IR(ref); |
@@ -319,27 +430,36 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
319 | } | 430 | } |
320 | if (ir->o == IR_KNUM) { | 431 | if (ir->o == IR_KNUM) { |
321 | RegSet avail = as->freeset & ~as->modset & RSET_FPR; | 432 | RegSet avail = as->freeset & ~as->modset & RSET_FPR; |
322 | lua_assert(allow != RSET_EMPTY); | 433 | lj_assertA(allow != RSET_EMPTY, "no register allowed"); |
323 | if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ | 434 | if (!(avail & (avail-1))) /* Fuse if less than two regs available. */ |
324 | as->mrm.ofs = ptr2addr(ir_knum(ir)); | 435 | return asm_fuseloadk64(as, ir); |
325 | as->mrm.base = as->mrm.idx = RID_NONE; | ||
326 | return RID_MRM; | ||
327 | } | ||
328 | } else if (ref == REF_BASE || ir->o == IR_KINT64) { | 436 | } else if (ref == REF_BASE || ir->o == IR_KINT64) { |
329 | RegSet avail = as->freeset & ~as->modset & RSET_GPR; | 437 | RegSet avail = as->freeset & ~as->modset & RSET_GPR; |
330 | lua_assert(allow != RSET_EMPTY); | 438 | lj_assertA(allow != RSET_EMPTY, "no register allowed"); |
331 | if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ | 439 | if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ |
332 | as->mrm.ofs = ptr2addr(ref == REF_BASE ? (void *)&J2G(as->J)->jit_base : (void *)ir_kint64(ir)); | 440 | if (ref == REF_BASE) { |
333 | as->mrm.base = as->mrm.idx = RID_NONE; | 441 | #if LJ_GC64 |
334 | return RID_MRM; | 442 | as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->jit_base); |
443 | as->mrm.base = RID_DISPATCH; | ||
444 | #else | ||
445 | as->mrm.ofs = ptr2addr(&J2G(as->J)->jit_base); | ||
446 | as->mrm.base = RID_NONE; | ||
447 | #endif | ||
448 | as->mrm.idx = RID_NONE; | ||
449 | return RID_MRM; | ||
450 | } else { | ||
451 | return asm_fuseloadk64(as, ir); | ||
452 | } | ||
335 | } | 453 | } |
336 | } else if (mayfuse(as, ref)) { | 454 | } else if (mayfuse(as, ref)) { |
337 | RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; | 455 | RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; |
338 | if (ir->o == IR_SLOAD) { | 456 | if (ir->o == IR_SLOAD) { |
339 | if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && | 457 | if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && |
340 | noconflict(as, ref, IR_RETF, 0)) { | 458 | noconflict(as, ref, IR_RETF, 0) && |
459 | !(LJ_GC64 && irt_isaddr(ir->t))) { | ||
341 | as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); | 460 | as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); |
342 | as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); | 461 | as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + |
462 | (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); | ||
343 | as->mrm.idx = RID_NONE; | 463 | as->mrm.idx = RID_NONE; |
344 | return RID_MRM; | 464 | return RID_MRM; |
345 | } | 465 | } |
@@ -351,7 +471,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
351 | return RID_MRM; | 471 | return RID_MRM; |
352 | } | 472 | } |
353 | } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { | 473 | } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { |
354 | if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) { | 474 | if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) && |
475 | !(LJ_GC64 && irt_isaddr(ir->t))) { | ||
355 | asm_fuseahuref(as, ir->op1, xallow); | 476 | asm_fuseahuref(as, ir->op1, xallow); |
356 | return RID_MRM; | 477 | return RID_MRM; |
357 | } | 478 | } |
@@ -364,11 +485,16 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
364 | asm_fusexref(as, ir->op1, xallow); | 485 | asm_fusexref(as, ir->op1, xallow); |
365 | return RID_MRM; | 486 | return RID_MRM; |
366 | } | 487 | } |
367 | } else if (ir->o == IR_VLOAD) { | 488 | } else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) { |
368 | asm_fuseahuref(as, ir->op1, xallow); | 489 | asm_fuseahuref(as, ir->op1, xallow); |
490 | as->mrm.ofs += 8 * ir->op2; | ||
369 | return RID_MRM; | 491 | return RID_MRM; |
370 | } | 492 | } |
371 | } | 493 | } |
494 | if (ir->o == IR_FLOAD && ir->op1 == REF_NIL) { | ||
495 | asm_fusefref(as, ir, RSET_EMPTY); | ||
496 | return RID_MRM; | ||
497 | } | ||
372 | if (!(as->freeset & allow) && !emit_canremat(ref) && | 498 | if (!(as->freeset & allow) && !emit_canremat(ref) && |
373 | (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref))) | 499 | (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref))) |
374 | goto fusespill; | 500 | goto fusespill; |
@@ -392,7 +518,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64) | |||
392 | /* Count the required number of stack slots for a call. */ | 518 | /* Count the required number of stack slots for a call. */ |
393 | static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) | 519 | static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) |
394 | { | 520 | { |
395 | uint32_t i, nargs = CCI_NARGS(ci); | 521 | uint32_t i, nargs = CCI_XNARGS(ci); |
396 | int nslots = 0; | 522 | int nslots = 0; |
397 | #if LJ_64 | 523 | #if LJ_64 |
398 | if (LJ_ABI_WIN) { | 524 | if (LJ_ABI_WIN) { |
@@ -425,7 +551,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
425 | /* Generate a call to a C function. */ | 551 | /* Generate a call to a C function. */ |
426 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | 552 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) |
427 | { | 553 | { |
428 | uint32_t n, nargs = CCI_NARGS(ci); | 554 | uint32_t n, nargs = CCI_XNARGS(ci); |
429 | int32_t ofs = STACKARG_OFS; | 555 | int32_t ofs = STACKARG_OFS; |
430 | #if LJ_64 | 556 | #if LJ_64 |
431 | uint32_t gprs = REGARG_GPRS; | 557 | uint32_t gprs = REGARG_GPRS; |
@@ -485,13 +611,14 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
485 | if (r) { /* Argument is in a register. */ | 611 | if (r) { /* Argument is in a register. */ |
486 | if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { | 612 | if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { |
487 | #if LJ_64 | 613 | #if LJ_64 |
488 | if (ir->o == IR_KINT64) | 614 | if (LJ_GC64 ? !(ir->o == IR_KINT || ir->o == IR_KNULL) : ir->o == IR_KINT64) |
489 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 615 | emit_loadu64(as, r, ir_k64(ir)->u64); |
490 | else | 616 | else |
491 | #endif | 617 | #endif |
492 | emit_loadi(as, r, ir->i); | 618 | emit_loadi(as, r, ir->i); |
493 | } else { | 619 | } else { |
494 | lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ | 620 | /* Must have been evicted. */ |
621 | lj_assertA(rset_test(as->freeset, r), "reg %d not free", r); | ||
495 | if (ra_hasreg(ir->r)) { | 622 | if (ra_hasreg(ir->r)) { |
496 | ra_noweak(as, ir->r); | 623 | ra_noweak(as, ir->r); |
497 | emit_movrr(as, ir, r, ir->r); | 624 | emit_movrr(as, ir, r, ir->r); |
@@ -500,7 +627,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
500 | } | 627 | } |
501 | } | 628 | } |
502 | } else if (irt_isfp(ir->t)) { /* FP argument is on stack. */ | 629 | } else if (irt_isfp(ir->t)) { /* FP argument is on stack. */ |
503 | lua_assert(!(irt_isfloat(ir->t) && irref_isk(ref))); /* No float k. */ | 630 | lj_assertA(!(irt_isfloat(ir->t) && irref_isk(ref)), |
631 | "unexpected float constant"); | ||
504 | if (LJ_32 && (ofs & 4) && irref_isk(ref)) { | 632 | if (LJ_32 && (ofs & 4) && irref_isk(ref)) { |
505 | /* Split stores for unaligned FP consts. */ | 633 | /* Split stores for unaligned FP consts. */ |
506 | emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); | 634 | emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); |
@@ -531,7 +659,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
531 | static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | 659 | static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) |
532 | { | 660 | { |
533 | RegSet drop = RSET_SCRATCH; | 661 | RegSet drop = RSET_SCRATCH; |
534 | int hiop = (LJ_32 && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); | 662 | int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); |
535 | if ((ci->flags & CCI_NOFPRCLOBBER)) | 663 | if ((ci->flags & CCI_NOFPRCLOBBER)) |
536 | drop &= ~RSET_FPR; | 664 | drop &= ~RSET_FPR; |
537 | if (ra_hasreg(ir->r)) | 665 | if (ra_hasreg(ir->r)) |
@@ -560,7 +688,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
560 | if (ra_hasreg(dest)) { | 688 | if (ra_hasreg(dest)) { |
561 | ra_free(as, dest); | 689 | ra_free(as, dest); |
562 | ra_modified(as, dest); | 690 | ra_modified(as, dest); |
563 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, | 691 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, |
564 | dest, RID_ESP, ofs); | 692 | dest, RID_ESP, ofs); |
565 | } | 693 | } |
566 | if ((ci->flags & CCI_CASTU64)) { | 694 | if ((ci->flags & CCI_CASTU64)) { |
@@ -571,12 +699,10 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
571 | irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); | 699 | irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); |
572 | } | 700 | } |
573 | #endif | 701 | #endif |
574 | #if LJ_32 | ||
575 | } else if (hiop) { | 702 | } else if (hiop) { |
576 | ra_destpair(as, ir); | 703 | ra_destpair(as, ir); |
577 | #endif | ||
578 | } else { | 704 | } else { |
579 | lua_assert(!irt_ispri(ir->t)); | 705 | lj_assertA(!irt_ispri(ir->t), "PRI dest"); |
580 | ra_destreg(as, ir, RID_RET); | 706 | ra_destreg(as, ir, RID_RET); |
581 | } | 707 | } |
582 | } else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags & CCI_CASTU64)) { | 708 | } else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags & CCI_CASTU64)) { |
@@ -584,15 +710,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
584 | } | 710 | } |
585 | } | 711 | } |
586 | 712 | ||
587 | static void asm_call(ASMState *as, IRIns *ir) | ||
588 | { | ||
589 | IRRef args[CCI_NARGS_MAX]; | ||
590 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
591 | asm_collectargs(as, ir, ci, args); | ||
592 | asm_setupresult(as, ir, ci); | ||
593 | asm_gencall(as, ci, args); | ||
594 | } | ||
595 | |||
596 | /* Return a constant function pointer or NULL for indirect calls. */ | 713 | /* Return a constant function pointer or NULL for indirect calls. */ |
597 | static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) | 714 | static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) |
598 | { | 715 | { |
@@ -651,16 +768,39 @@ static void asm_callx(ASMState *as, IRIns *ir) | |||
651 | static void asm_retf(ASMState *as, IRIns *ir) | 768 | static void asm_retf(ASMState *as, IRIns *ir) |
652 | { | 769 | { |
653 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | 770 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); |
771 | #if LJ_FR2 | ||
772 | Reg rpc = ra_scratch(as, rset_exclude(RSET_GPR, base)); | ||
773 | #endif | ||
654 | void *pc = ir_kptr(IR(ir->op2)); | 774 | void *pc = ir_kptr(IR(ir->op2)); |
655 | int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); | 775 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); |
656 | as->topslot -= (BCReg)delta; | 776 | as->topslot -= (BCReg)delta; |
657 | if ((int32_t)as->topslot < 0) as->topslot = 0; | 777 | if ((int32_t)as->topslot < 0) as->topslot = 0; |
658 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | 778 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ |
659 | emit_setgl(as, base, jit_base); | 779 | emit_setgl(as, base, jit_base); |
660 | emit_addptr(as, base, -8*delta); | 780 | emit_addptr(as, base, -8*delta); |
661 | asm_guardcc(as, CC_NE); | 781 | asm_guardcc(as, CC_NE); |
782 | #if LJ_FR2 | ||
783 | emit_rmro(as, XO_CMP, rpc|REX_GC64, base, -8); | ||
784 | emit_loadu64(as, rpc, u64ptr(pc)); | ||
785 | #else | ||
662 | emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); | 786 | emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); |
787 | #endif | ||
788 | } | ||
789 | |||
790 | /* -- Buffer operations --------------------------------------------------- */ | ||
791 | |||
792 | #if LJ_HASBUFFER | ||
793 | static void asm_bufhdr_write(ASMState *as, Reg sb) | ||
794 | { | ||
795 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); | ||
796 | IRIns irgc; | ||
797 | irgc.ot = IRT(0, IRT_PGC); /* GC type. */ | ||
798 | emit_storeofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); | ||
799 | emit_opgl(as, XO_ARITH(XOg_OR), tmp|REX_GC64, cur_L); | ||
800 | emit_gri(as, XG_ARITHi(XOg_AND), tmp, SBUF_MASK_FLAG); | ||
801 | emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); | ||
663 | } | 802 | } |
803 | #endif | ||
664 | 804 | ||
665 | /* -- Type conversions ---------------------------------------------------- */ | 805 | /* -- Type conversions ---------------------------------------------------- */ |
666 | 806 | ||
@@ -672,8 +812,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | |||
672 | asm_guardcc(as, CC_NE); | 812 | asm_guardcc(as, CC_NE); |
673 | emit_rr(as, XO_UCOMISD, left, tmp); | 813 | emit_rr(as, XO_UCOMISD, left, tmp); |
674 | emit_rr(as, XO_CVTSI2SD, tmp, dest); | 814 | emit_rr(as, XO_CVTSI2SD, tmp, dest); |
675 | if (!(as->flags & JIT_F_SPLIT_XMM)) | 815 | emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ |
676 | emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ | ||
677 | emit_rr(as, XO_CVTTSD2SI, dest, left); | 816 | emit_rr(as, XO_CVTTSD2SI, dest, left); |
678 | /* Can't fuse since left is needed twice. */ | 817 | /* Can't fuse since left is needed twice. */ |
679 | } | 818 | } |
@@ -684,8 +823,9 @@ static void asm_tobit(ASMState *as, IRIns *ir) | |||
684 | Reg tmp = ra_noreg(IR(ir->op1)->r) ? | 823 | Reg tmp = ra_noreg(IR(ir->op1)->r) ? |
685 | ra_alloc1(as, ir->op1, RSET_FPR) : | 824 | ra_alloc1(as, ir->op1, RSET_FPR) : |
686 | ra_scratch(as, RSET_FPR); | 825 | ra_scratch(as, RSET_FPR); |
687 | Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); | 826 | Reg right; |
688 | emit_rr(as, XO_MOVDto, tmp, dest); | 827 | emit_rr(as, XO_MOVDto, tmp, dest); |
828 | right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); | ||
689 | emit_mrm(as, XO_ADDSD, tmp, right); | 829 | emit_mrm(as, XO_ADDSD, tmp, right); |
690 | ra_left(as, tmp, ir->op1); | 830 | ra_left(as, tmp, ir->op1); |
691 | } | 831 | } |
@@ -696,8 +836,10 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
696 | int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64)); | 836 | int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64)); |
697 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); | 837 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); |
698 | IRRef lref = ir->op1; | 838 | IRRef lref = ir->op1; |
699 | lua_assert(irt_type(ir->t) != st); | 839 | lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); |
700 | lua_assert(!(LJ_32 && (irt_isint64(ir->t) || st64))); /* Handled by SPLIT. */ | 840 | lj_assertA(!(LJ_32 && (irt_isint64(ir->t) || st64)), |
841 | "IR %04d has unsplit 64 bit type", | ||
842 | (int)(ir - as->ir) - REF_BIAS); | ||
701 | if (irt_isfp(ir->t)) { | 843 | if (irt_isfp(ir->t)) { |
702 | Reg dest = ra_dest(as, ir, RSET_FPR); | 844 | Reg dest = ra_dest(as, ir, RSET_FPR); |
703 | if (stfp) { /* FP to FP conversion. */ | 845 | if (stfp) { /* FP to FP conversion. */ |
@@ -706,13 +848,13 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
706 | if (left == dest) return; /* Avoid the XO_XORPS. */ | 848 | if (left == dest) return; /* Avoid the XO_XORPS. */ |
707 | } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */ | 849 | } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */ |
708 | /* number = (2^52+2^51 .. u32) - (2^52+2^51) */ | 850 | /* number = (2^52+2^51 .. u32) - (2^52+2^51) */ |
709 | cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000)); | 851 | cTValue *k = &as->J->k64[LJ_K64_TOBIT]; |
710 | Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); | 852 | Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); |
711 | if (irt_isfloat(ir->t)) | 853 | if (irt_isfloat(ir->t)) |
712 | emit_rr(as, XO_CVTSD2SS, dest, dest); | 854 | emit_rr(as, XO_CVTSD2SS, dest, dest); |
713 | emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ | 855 | emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ |
714 | emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ | 856 | emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ |
715 | emit_loadn(as, bias, k); | 857 | emit_rma(as, XO_MOVSD, bias, k); |
716 | emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); | 858 | emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); |
717 | return; | 859 | return; |
718 | } else { /* Integer to FP conversion. */ | 860 | } else { /* Integer to FP conversion. */ |
@@ -721,7 +863,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
721 | asm_fuseloadm(as, lref, RSET_GPR, st64); | 863 | asm_fuseloadm(as, lref, RSET_GPR, st64); |
722 | if (LJ_64 && st == IRT_U64) { | 864 | if (LJ_64 && st == IRT_U64) { |
723 | MCLabel l_end = emit_label(as); | 865 | MCLabel l_end = emit_label(as); |
724 | const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000)); | 866 | cTValue *k = &as->J->k64[LJ_K64_2P64]; |
725 | emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */ | 867 | emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */ |
726 | emit_sjcc(as, CC_NS, l_end); | 868 | emit_sjcc(as, CC_NS, l_end); |
727 | emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */ | 869 | emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */ |
@@ -729,18 +871,16 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
729 | emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, | 871 | emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, |
730 | dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); | 872 | dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); |
731 | } | 873 | } |
732 | if (!(as->flags & JIT_F_SPLIT_XMM)) | 874 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ |
733 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ | ||
734 | } else if (stfp) { /* FP to integer conversion. */ | 875 | } else if (stfp) { /* FP to integer conversion. */ |
735 | if (irt_isguard(ir->t)) { | 876 | if (irt_isguard(ir->t)) { |
736 | /* Checked conversions are only supported from number to int. */ | 877 | /* Checked conversions are only supported from number to int. */ |
737 | lua_assert(irt_isint(ir->t) && st == IRT_NUM); | 878 | lj_assertA(irt_isint(ir->t) && st == IRT_NUM, |
879 | "bad type for checked CONV"); | ||
738 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | 880 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); |
739 | } else { | 881 | } else { |
740 | Reg dest = ra_dest(as, ir, RSET_GPR); | 882 | Reg dest = ra_dest(as, ir, RSET_GPR); |
741 | x86Op op = st == IRT_NUM ? | 883 | x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI; |
742 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) : | ||
743 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI); | ||
744 | if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { | 884 | if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { |
745 | /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ | 885 | /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ |
746 | /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ | 886 | /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ |
@@ -751,30 +891,27 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
751 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); | 891 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); |
752 | emit_rr(as, op, dest|REX_64, tmp); | 892 | emit_rr(as, op, dest|REX_64, tmp); |
753 | if (st == IRT_NUM) | 893 | if (st == IRT_NUM) |
754 | emit_rma(as, XO_ADDSD, tmp, lj_ir_k64_find(as->J, | 894 | emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]); |
755 | LJ_64 ? U64x(c3f00000,00000000) : U64x(c1e00000,00000000))); | ||
756 | else | 895 | else |
757 | emit_rma(as, XO_ADDSS, tmp, lj_ir_k64_find(as->J, | 896 | emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]); |
758 | LJ_64 ? U64x(00000000,df800000) : U64x(00000000,cf000000))); | ||
759 | emit_sjcc(as, CC_NS, l_end); | 897 | emit_sjcc(as, CC_NS, l_end); |
760 | emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ | 898 | emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ |
761 | emit_rr(as, op, dest|REX_64, tmp); | 899 | emit_rr(as, op, dest|REX_64, tmp); |
762 | ra_left(as, tmp, lref); | 900 | ra_left(as, tmp, lref); |
763 | } else { | 901 | } else { |
764 | Reg left = asm_fuseload(as, lref, RSET_FPR); | ||
765 | if (LJ_64 && irt_isu32(ir->t)) | 902 | if (LJ_64 && irt_isu32(ir->t)) |
766 | emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ | 903 | emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ |
767 | emit_mrm(as, op, | 904 | emit_mrm(as, op, |
768 | dest|((LJ_64 && | 905 | dest|((LJ_64 && |
769 | (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), | 906 | (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), |
770 | left); | 907 | asm_fuseload(as, lref, RSET_FPR)); |
771 | } | 908 | } |
772 | } | 909 | } |
773 | } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | 910 | } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ |
774 | Reg left, dest = ra_dest(as, ir, RSET_GPR); | 911 | Reg left, dest = ra_dest(as, ir, RSET_GPR); |
775 | RegSet allow = RSET_GPR; | 912 | RegSet allow = RSET_GPR; |
776 | x86Op op; | 913 | x86Op op; |
777 | lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); | 914 | lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); |
778 | if (st == IRT_I8) { | 915 | if (st == IRT_I8) { |
779 | op = XO_MOVSXb; allow = RSET_GPR8; dest |= FORCE_REX; | 916 | op = XO_MOVSXb; allow = RSET_GPR8; dest |= FORCE_REX; |
780 | } else if (st == IRT_U8) { | 917 | } else if (st == IRT_U8) { |
@@ -808,7 +945,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
808 | } | 945 | } |
809 | } else { | 946 | } else { |
810 | Reg dest = ra_dest(as, ir, RSET_GPR); | 947 | Reg dest = ra_dest(as, ir, RSET_GPR); |
811 | if (st64) { | 948 | if (st64 && !(ir->op2 & IRCONV_NONE)) { |
812 | Reg left = asm_fuseload(as, lref, RSET_GPR); | 949 | Reg left = asm_fuseload(as, lref, RSET_GPR); |
813 | /* This is either a 32 bit reg/reg mov which zeroes the hiword | 950 | /* This is either a 32 bit reg/reg mov which zeroes the hiword |
814 | ** or a load of the loword from a 64 bit address. | 951 | ** or a load of the loword from a 64 bit address. |
@@ -834,20 +971,18 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir) | |||
834 | if (ra_hasreg(dest)) { | 971 | if (ra_hasreg(dest)) { |
835 | ra_free(as, dest); | 972 | ra_free(as, dest); |
836 | ra_modified(as, dest); | 973 | ra_modified(as, dest); |
837 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, | 974 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs); |
838 | dest, RID_ESP, ofs); | ||
839 | } | 975 | } |
840 | emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, | 976 | emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, |
841 | irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); | 977 | irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); |
842 | if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { | 978 | if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { |
843 | /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ | 979 | /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ |
844 | MCLabel l_end = emit_label(as); | 980 | MCLabel l_end = emit_label(as); |
845 | emit_rma(as, XO_FADDq, XOg_FADDq, | 981 | emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_2P64]); |
846 | lj_ir_k64_find(as->J, U64x(43f00000,00000000))); | ||
847 | emit_sjcc(as, CC_NS, l_end); | 982 | emit_sjcc(as, CC_NS, l_end); |
848 | emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ | 983 | emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ |
849 | } else { | 984 | } else { |
850 | lua_assert(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64); | 985 | lj_assertA(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64, "bad type for CONV"); |
851 | } | 986 | } |
852 | emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0); | 987 | emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0); |
853 | /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */ | 988 | /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */ |
@@ -861,9 +996,8 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | |||
861 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | 996 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); |
862 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | 997 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); |
863 | Reg lo, hi; | 998 | Reg lo, hi; |
864 | lua_assert(st == IRT_NUM || st == IRT_FLOAT); | 999 | lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV"); |
865 | lua_assert(dt == IRT_I64 || dt == IRT_U64); | 1000 | lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV"); |
866 | lua_assert(((ir-1)->op2 & IRCONV_TRUNC)); | ||
867 | hi = ra_dest(as, ir, RSET_GPR); | 1001 | hi = ra_dest(as, ir, RSET_GPR); |
868 | lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); | 1002 | lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); |
869 | if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); | 1003 | if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); |
@@ -884,8 +1018,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | |||
884 | emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); | 1018 | emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); |
885 | else | 1019 | else |
886 | emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); | 1020 | emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); |
887 | emit_rma(as, XO_FADDq, XOg_FADDq, | 1021 | emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]); |
888 | lj_ir_k64_find(as->J, U64x(c3f00000,00000000))); | ||
889 | emit_sjcc(as, CC_NS, l_pop); | 1022 | emit_sjcc(as, CC_NS, l_pop); |
890 | emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ | 1023 | emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ |
891 | } | 1024 | } |
@@ -906,6 +1039,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | |||
906 | st == IRT_NUM ? XOg_FLDq: XOg_FLDd, | 1039 | st == IRT_NUM ? XOg_FLDq: XOg_FLDd, |
907 | asm_fuseload(as, ir->op1, RSET_EMPTY)); | 1040 | asm_fuseload(as, ir->op1, RSET_EMPTY)); |
908 | } | 1041 | } |
1042 | |||
1043 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
1044 | { | ||
1045 | if (irt_isfp(ir->t)) | ||
1046 | asm_conv_fp_int64(as, ir); | ||
1047 | else | ||
1048 | asm_conv_int64_fp(as, ir); | ||
1049 | } | ||
909 | #endif | 1050 | #endif |
910 | 1051 | ||
911 | static void asm_strto(ASMState *as, IRIns *ir) | 1052 | static void asm_strto(ASMState *as, IRIns *ir) |
@@ -927,54 +1068,61 @@ static void asm_strto(ASMState *as, IRIns *ir) | |||
927 | RID_ESP, sps_scale(ir->s)); | 1068 | RID_ESP, sps_scale(ir->s)); |
928 | } | 1069 | } |
929 | 1070 | ||
930 | static void asm_tostr(ASMState *as, IRIns *ir) | 1071 | /* -- Memory references --------------------------------------------------- */ |
1072 | |||
1073 | /* Get pointer to TValue. */ | ||
1074 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) | ||
931 | { | 1075 | { |
932 | IRIns *irl = IR(ir->op1); | 1076 | if ((mode & IRTMPREF_IN1)) { |
933 | IRRef args[2]; | 1077 | IRIns *ir = IR(ref); |
934 | args[0] = ASMREF_L; | 1078 | if (irt_isnum(ir->t)) { |
935 | as->gcsteps++; | 1079 | if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) { |
936 | if (irt_isnum(irl->t)) { | 1080 | /* Use the number constant itself as a TValue. */ |
937 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; | 1081 | emit_loada(as, dest, ir_knum(ir)); |
938 | args[1] = ASMREF_TMP1; /* const lua_Number * */ | 1082 | return; |
939 | asm_setupresult(as, ir, ci); /* GCstr * */ | 1083 | } |
940 | asm_gencall(as, ci, args); | 1084 | emit_rmro(as, XO_MOVSDto, ra_alloc1(as, ref, RSET_FPR), dest, 0); |
941 | emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64, | 1085 | } else { |
942 | RID_ESP, ra_spill(as, irl)); | 1086 | #if LJ_GC64 |
943 | } else { | 1087 | if (irref_isk(ref)) { |
944 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; | 1088 | TValue k; |
945 | args[1] = ir->op1; /* int32_t k */ | 1089 | lj_ir_kvalue(as->J->L, &k, ir); |
946 | asm_setupresult(as, ir, ci); /* GCstr * */ | 1090 | emit_movmroi(as, dest, 4, k.u32.hi); |
947 | asm_gencall(as, ci, args); | 1091 | emit_movmroi(as, dest, 0, k.u32.lo); |
1092 | } else { | ||
1093 | /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ | ||
1094 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); | ||
1095 | if (irt_is64(ir->t)) { | ||
1096 | emit_u32(as, irt_toitype(ir->t) << 15); | ||
1097 | emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4); | ||
1098 | } else { | ||
1099 | emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15)); | ||
1100 | } | ||
1101 | emit_movtomro(as, REX_64IR(ir, src), dest, 0); | ||
1102 | } | ||
1103 | #else | ||
1104 | if (!irref_isk(ref)) { | ||
1105 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); | ||
1106 | emit_movtomro(as, REX_64IR(ir, src), dest, 0); | ||
1107 | } else if (!irt_ispri(ir->t)) { | ||
1108 | emit_movmroi(as, dest, 0, ir->i); | ||
1109 | } | ||
1110 | if (!(LJ_64 && irt_islightud(ir->t))) | ||
1111 | emit_movmroi(as, dest, 4, irt_toitype(ir->t)); | ||
1112 | #endif | ||
1113 | } | ||
948 | } | 1114 | } |
1115 | emit_loada(as, dest, &J2G(as->J)->tmptv); /* g->tmptv holds the TValue(s). */ | ||
949 | } | 1116 | } |
950 | 1117 | ||
951 | /* -- Memory references --------------------------------------------------- */ | ||
952 | |||
953 | static void asm_aref(ASMState *as, IRIns *ir) | 1118 | static void asm_aref(ASMState *as, IRIns *ir) |
954 | { | 1119 | { |
955 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1120 | Reg dest = ra_dest(as, ir, RSET_GPR); |
956 | asm_fusearef(as, ir, RSET_GPR); | 1121 | asm_fusearef(as, ir, RSET_GPR); |
957 | if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0)) | 1122 | if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0)) |
958 | emit_mrm(as, XO_LEA, dest, RID_MRM); | 1123 | emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM); |
959 | else if (as->mrm.base != dest) | 1124 | else if (as->mrm.base != dest) |
960 | emit_rr(as, XO_MOV, dest, as->mrm.base); | 1125 | emit_rr(as, XO_MOV, dest|REX_GC64, as->mrm.base); |
961 | } | ||
962 | |||
963 | /* Merge NE(HREF, niltv) check. */ | ||
964 | static MCode *merge_href_niltv(ASMState *as, IRIns *ir) | ||
965 | { | ||
966 | /* Assumes nothing else generates NE of HREF. */ | ||
967 | if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins && | ||
968 | ra_hasreg(ir->r)) { | ||
969 | MCode *p = as->mcp; | ||
970 | p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6; | ||
971 | /* Ensure no loop branch inversion happened. */ | ||
972 | if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) { | ||
973 | as->mcp = p; /* Kill cmp reg, imm32 + jz exit. */ | ||
974 | return p + *(int32_t *)(p-4); /* Return exit address. */ | ||
975 | } | ||
976 | } | ||
977 | return NULL; | ||
978 | } | 1126 | } |
979 | 1127 | ||
980 | /* Inlined hash lookup. Specialized for key type and for const keys. | 1128 | /* Inlined hash lookup. Specialized for key type and for const keys. |
@@ -985,10 +1133,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir) | |||
985 | ** } while ((n = nextnode(n))); | 1133 | ** } while ((n = nextnode(n))); |
986 | ** return niltv(L); | 1134 | ** return niltv(L); |
987 | */ | 1135 | */ |
988 | static void asm_href(ASMState *as, IRIns *ir) | 1136 | static void asm_href(ASMState *as, IRIns *ir, IROp merge) |
989 | { | 1137 | { |
990 | MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */ | ||
991 | RegSet allow = RSET_GPR; | 1138 | RegSet allow = RSET_GPR; |
1139 | int destused = ra_used(ir); | ||
992 | Reg dest = ra_dest(as, ir, allow); | 1140 | Reg dest = ra_dest(as, ir, allow); |
993 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); | 1141 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); |
994 | Reg key = RID_NONE, tmp = RID_NONE; | 1142 | Reg key = RID_NONE, tmp = RID_NONE; |
@@ -1001,28 +1149,26 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
1001 | if (!isk) { | 1149 | if (!isk) { |
1002 | rset_clear(allow, tab); | 1150 | rset_clear(allow, tab); |
1003 | key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); | 1151 | key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); |
1004 | if (!irt_isstr(kt)) | 1152 | if (LJ_GC64 || !irt_isstr(kt)) |
1005 | tmp = ra_scratch(as, rset_exclude(allow, key)); | 1153 | tmp = ra_scratch(as, rset_exclude(allow, key)); |
1006 | } | 1154 | } |
1007 | 1155 | ||
1008 | /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */ | 1156 | /* Key not found in chain: jump to exit (if merged) or load niltv. */ |
1009 | l_end = emit_label(as); | 1157 | l_end = emit_label(as); |
1010 | if (nilexit && ir[1].o == IR_NE) { | 1158 | if (merge == IR_NE) |
1011 | emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */ | 1159 | asm_guardcc(as, CC_E); /* XI_JMP is not found by lj_asm_patchexit. */ |
1012 | nilexit = NULL; | 1160 | else if (destused) |
1013 | } else { | ||
1014 | emit_loada(as, dest, niltvg(J2G(as->J))); | 1161 | emit_loada(as, dest, niltvg(J2G(as->J))); |
1015 | } | ||
1016 | 1162 | ||
1017 | /* Follow hash chain until the end. */ | 1163 | /* Follow hash chain until the end. */ |
1018 | l_loop = emit_sjcc_label(as, CC_NZ); | 1164 | l_loop = emit_sjcc_label(as, CC_NZ); |
1019 | emit_rr(as, XO_TEST, dest, dest); | 1165 | emit_rr(as, XO_TEST, dest|REX_GC64, dest); |
1020 | emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next)); | 1166 | emit_rmro(as, XO_MOV, dest|REX_GC64, dest, offsetof(Node, next)); |
1021 | l_next = emit_label(as); | 1167 | l_next = emit_label(as); |
1022 | 1168 | ||
1023 | /* Type and value comparison. */ | 1169 | /* Type and value comparison. */ |
1024 | if (nilexit) | 1170 | if (merge == IR_EQ) |
1025 | emit_jcc(as, CC_E, nilexit); | 1171 | asm_guardcc(as, CC_E); |
1026 | else | 1172 | else |
1027 | emit_sjcc(as, CC_E, l_end); | 1173 | emit_sjcc(as, CC_E, l_end); |
1028 | if (irt_isnum(kt)) { | 1174 | if (irt_isnum(kt)) { |
@@ -1038,7 +1184,7 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
1038 | emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); | 1184 | emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); |
1039 | emit_sjcc(as, CC_AE, l_next); | 1185 | emit_sjcc(as, CC_AE, l_next); |
1040 | /* The type check avoids NaN penalties and complaints from Valgrind. */ | 1186 | /* The type check avoids NaN penalties and complaints from Valgrind. */ |
1041 | #if LJ_64 | 1187 | #if LJ_64 && !LJ_GC64 |
1042 | emit_u32(as, LJ_TISNUM); | 1188 | emit_u32(as, LJ_TISNUM); |
1043 | emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); | 1189 | emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); |
1044 | #else | 1190 | #else |
@@ -1046,13 +1192,31 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
1046 | emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); | 1192 | emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); |
1047 | #endif | 1193 | #endif |
1048 | } | 1194 | } |
1049 | #if LJ_64 | 1195 | #if LJ_64 && !LJ_GC64 |
1050 | } else if (irt_islightud(kt)) { | 1196 | } else if (irt_islightud(kt)) { |
1051 | emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64)); | 1197 | emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64)); |
1052 | #endif | 1198 | #endif |
1199 | #if LJ_GC64 | ||
1200 | } else if (irt_isaddr(kt)) { | ||
1201 | if (isk) { | ||
1202 | TValue k; | ||
1203 | k.u64 = ((uint64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; | ||
1204 | emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo), | ||
1205 | k.u32.lo); | ||
1206 | emit_sjcc(as, CC_NE, l_next); | ||
1207 | emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi), | ||
1208 | k.u32.hi); | ||
1209 | } else { | ||
1210 | emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64)); | ||
1211 | } | ||
1212 | } else { | ||
1213 | lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); | ||
1214 | emit_u32(as, (irt_toitype(kt)<<15)|0x7fff); | ||
1215 | emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); | ||
1216 | #else | ||
1053 | } else { | 1217 | } else { |
1054 | if (!irt_ispri(kt)) { | 1218 | if (!irt_ispri(kt)) { |
1055 | lua_assert(irt_isaddr(kt)); | 1219 | lj_assertA(irt_isaddr(kt), "bad HREF key type"); |
1056 | if (isk) | 1220 | if (isk) |
1057 | emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr), | 1221 | emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr), |
1058 | ptr2addr(ir_kgc(irkey))); | 1222 | ptr2addr(ir_kgc(irkey))); |
@@ -1060,31 +1224,33 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
1060 | emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr)); | 1224 | emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr)); |
1061 | emit_sjcc(as, CC_NE, l_next); | 1225 | emit_sjcc(as, CC_NE, l_next); |
1062 | } | 1226 | } |
1063 | lua_assert(!irt_isnil(kt)); | 1227 | lj_assertA(!irt_isnil(kt), "bad HREF key type"); |
1064 | emit_i8(as, irt_toitype(kt)); | 1228 | emit_i8(as, irt_toitype(kt)); |
1065 | emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); | 1229 | emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); |
1230 | #endif | ||
1066 | } | 1231 | } |
1067 | emit_sfixup(as, l_loop); | 1232 | emit_sfixup(as, l_loop); |
1068 | checkmclim(as); | 1233 | checkmclim(as); |
1234 | #if LJ_GC64 | ||
1235 | if (!isk && irt_isaddr(kt)) { | ||
1236 | emit_rr(as, XO_OR, tmp|REX_64, key); | ||
1237 | emit_loadu64(as, tmp, (uint64_t)irt_toitype(kt) << 47); | ||
1238 | } | ||
1239 | #endif | ||
1069 | 1240 | ||
1070 | /* Load main position relative to tab->node into dest. */ | 1241 | /* Load main position relative to tab->node into dest. */ |
1071 | khash = isk ? ir_khash(irkey) : 1; | 1242 | khash = isk ? ir_khash(as, irkey) : 1; |
1072 | if (khash == 0) { | 1243 | if (khash == 0) { |
1073 | emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node)); | 1244 | emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node)); |
1074 | } else { | 1245 | } else { |
1075 | emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node)); | 1246 | emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node)); |
1076 | if ((as->flags & JIT_F_PREFER_IMUL)) { | 1247 | emit_shifti(as, XOg_SHL, dest, 3); |
1077 | emit_i8(as, sizeof(Node)); | 1248 | emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); |
1078 | emit_rr(as, XO_IMULi8, dest, dest); | ||
1079 | } else { | ||
1080 | emit_shifti(as, XOg_SHL, dest, 3); | ||
1081 | emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); | ||
1082 | } | ||
1083 | if (isk) { | 1249 | if (isk) { |
1084 | emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash); | 1250 | emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash); |
1085 | emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); | 1251 | emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); |
1086 | } else if (irt_isstr(kt)) { | 1252 | } else if (irt_isstr(kt)) { |
1087 | emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, hash)); | 1253 | emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, sid)); |
1088 | emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); | 1254 | emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); |
1089 | } else { /* Must match with hashrot() in lj_tab.c. */ | 1255 | } else { /* Must match with hashrot() in lj_tab.c. */ |
1090 | emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask)); | 1256 | emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask)); |
@@ -1107,7 +1273,19 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
1107 | #endif | 1273 | #endif |
1108 | } else { | 1274 | } else { |
1109 | emit_rr(as, XO_MOV, tmp, key); | 1275 | emit_rr(as, XO_MOV, tmp, key); |
1276 | #if LJ_GC64 | ||
1277 | checkmclim(as); | ||
1278 | emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15); | ||
1279 | if ((as->flags & JIT_F_BMI2)) { | ||
1280 | emit_i8(as, 32); | ||
1281 | emit_mrm(as, XV_RORX|VEX_64, dest, key); | ||
1282 | } else { | ||
1283 | emit_shifti(as, XOg_SHR|REX_64, dest, 32); | ||
1284 | emit_rr(as, XO_MOV, dest|REX_64, key|REX_64); | ||
1285 | } | ||
1286 | #else | ||
1110 | emit_rmro(as, XO_LEA, dest, key, HASH_BIAS); | 1287 | emit_rmro(as, XO_LEA, dest, key, HASH_BIAS); |
1288 | #endif | ||
1111 | } | 1289 | } |
1112 | } | 1290 | } |
1113 | } | 1291 | } |
@@ -1123,15 +1301,15 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
1123 | #if !LJ_64 | 1301 | #if !LJ_64 |
1124 | MCLabel l_exit; | 1302 | MCLabel l_exit; |
1125 | #endif | 1303 | #endif |
1126 | lua_assert(ofs % sizeof(Node) == 0); | 1304 | lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); |
1127 | if (ra_hasreg(dest)) { | 1305 | if (ra_hasreg(dest)) { |
1128 | if (ofs != 0) { | 1306 | if (ofs != 0) { |
1129 | if (dest == node && !(as->flags & JIT_F_LEA_AGU)) | 1307 | if (dest == node) |
1130 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs); | 1308 | emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs); |
1131 | else | 1309 | else |
1132 | emit_rmro(as, XO_LEA, dest, node, ofs); | 1310 | emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs); |
1133 | } else if (dest != node) { | 1311 | } else if (dest != node) { |
1134 | emit_rr(as, XO_MOV, dest, node); | 1312 | emit_rr(as, XO_MOV, dest|REX_GC64, node); |
1135 | } | 1313 | } |
1136 | } | 1314 | } |
1137 | asm_guardcc(as, CC_NE); | 1315 | asm_guardcc(as, CC_NE); |
@@ -1140,16 +1318,28 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
1140 | Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node)); | 1318 | Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node)); |
1141 | emit_rmro(as, XO_CMP, key|REX_64, node, | 1319 | emit_rmro(as, XO_CMP, key|REX_64, node, |
1142 | ofs + (int32_t)offsetof(Node, key.u64)); | 1320 | ofs + (int32_t)offsetof(Node, key.u64)); |
1143 | lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t)); | 1321 | lj_assertA(irt_isnum(irkey->t) || irt_isgcv(irkey->t), |
1322 | "bad HREFK key type"); | ||
1144 | /* Assumes -0.0 is already canonicalized to +0.0. */ | 1323 | /* Assumes -0.0 is already canonicalized to +0.0. */ |
1145 | emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : | 1324 | emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : |
1325 | #if LJ_GC64 | ||
1326 | ((uint64_t)irt_toitype(irkey->t) << 47) | | ||
1327 | (uint64_t)ir_kgc(irkey)); | ||
1328 | #else | ||
1146 | ((uint64_t)irt_toitype(irkey->t) << 32) | | 1329 | ((uint64_t)irt_toitype(irkey->t) << 32) | |
1147 | (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); | 1330 | (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); |
1331 | #endif | ||
1148 | } else { | 1332 | } else { |
1149 | lua_assert(!irt_isnil(irkey->t)); | 1333 | lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); |
1334 | #if LJ_GC64 | ||
1335 | emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff); | ||
1336 | emit_rmro(as, XO_ARITHi, XOg_CMP, node, | ||
1337 | ofs + (int32_t)offsetof(Node, key.it)); | ||
1338 | #else | ||
1150 | emit_i8(as, irt_toitype(irkey->t)); | 1339 | emit_i8(as, irt_toitype(irkey->t)); |
1151 | emit_rmro(as, XO_ARITHi8, XOg_CMP, node, | 1340 | emit_rmro(as, XO_ARITHi8, XOg_CMP, node, |
1152 | ofs + (int32_t)offsetof(Node, key.it)); | 1341 | ofs + (int32_t)offsetof(Node, key.it)); |
1342 | #endif | ||
1153 | } | 1343 | } |
1154 | #else | 1344 | #else |
1155 | l_exit = emit_label(as); | 1345 | l_exit = emit_label(as); |
@@ -1164,13 +1354,13 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
1164 | (int32_t)ir_knum(irkey)->u32.hi); | 1354 | (int32_t)ir_knum(irkey)->u32.hi); |
1165 | } else { | 1355 | } else { |
1166 | if (!irt_ispri(irkey->t)) { | 1356 | if (!irt_ispri(irkey->t)) { |
1167 | lua_assert(irt_isgcv(irkey->t)); | 1357 | lj_assertA(irt_isgcv(irkey->t), "bad HREFK key type"); |
1168 | emit_gmroi(as, XG_ARITHi(XOg_CMP), node, | 1358 | emit_gmroi(as, XG_ARITHi(XOg_CMP), node, |
1169 | ofs + (int32_t)offsetof(Node, key.gcr), | 1359 | ofs + (int32_t)offsetof(Node, key.gcr), |
1170 | ptr2addr(ir_kgc(irkey))); | 1360 | ptr2addr(ir_kgc(irkey))); |
1171 | emit_sjcc(as, CC_NE, l_exit); | 1361 | emit_sjcc(as, CC_NE, l_exit); |
1172 | } | 1362 | } |
1173 | lua_assert(!irt_isnil(irkey->t)); | 1363 | lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); |
1174 | emit_i8(as, irt_toitype(irkey->t)); | 1364 | emit_i8(as, irt_toitype(irkey->t)); |
1175 | emit_rmro(as, XO_ARITHi8, XOg_CMP, node, | 1365 | emit_rmro(as, XO_ARITHi8, XOg_CMP, node, |
1176 | ofs + (int32_t)offsetof(Node, key.it)); | 1366 | ofs + (int32_t)offsetof(Node, key.it)); |
@@ -1178,61 +1368,27 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
1178 | #endif | 1368 | #endif |
1179 | } | 1369 | } |
1180 | 1370 | ||
1181 | static void asm_newref(ASMState *as, IRIns *ir) | ||
1182 | { | ||
1183 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
1184 | IRRef args[3]; | ||
1185 | IRIns *irkey; | ||
1186 | Reg tmp; | ||
1187 | if (ir->r == RID_SINK) | ||
1188 | return; | ||
1189 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1190 | args[1] = ir->op1; /* GCtab *t */ | ||
1191 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
1192 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
1193 | asm_gencall(as, ci, args); | ||
1194 | tmp = ra_releasetmp(as, ASMREF_TMP1); | ||
1195 | irkey = IR(ir->op2); | ||
1196 | if (irt_isnum(irkey->t)) { | ||
1197 | /* For numbers use the constant itself or a spill slot as a TValue. */ | ||
1198 | if (irref_isk(ir->op2)) | ||
1199 | emit_loada(as, tmp, ir_knum(irkey)); | ||
1200 | else | ||
1201 | emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey)); | ||
1202 | } else { | ||
1203 | /* Otherwise use g->tmptv to hold the TValue. */ | ||
1204 | if (!irref_isk(ir->op2)) { | ||
1205 | Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp)); | ||
1206 | emit_movtomro(as, REX_64IR(irkey, src), tmp, 0); | ||
1207 | } else if (!irt_ispri(irkey->t)) { | ||
1208 | emit_movmroi(as, tmp, 0, irkey->i); | ||
1209 | } | ||
1210 | if (!(LJ_64 && irt_islightud(irkey->t))) | ||
1211 | emit_movmroi(as, tmp, 4, irt_toitype(irkey->t)); | ||
1212 | emit_loada(as, tmp, &J2G(as->J)->tmptv); | ||
1213 | } | ||
1214 | } | ||
1215 | |||
1216 | static void asm_uref(ASMState *as, IRIns *ir) | 1371 | static void asm_uref(ASMState *as, IRIns *ir) |
1217 | { | 1372 | { |
1218 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1373 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1219 | if (irref_isk(ir->op1)) { | 1374 | if (irref_isk(ir->op1)) { |
1220 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | 1375 | GCfunc *fn = ir_kfunc(IR(ir->op1)); |
1221 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; | 1376 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; |
1222 | emit_rma(as, XO_MOV, dest, v); | 1377 | emit_rma(as, XO_MOV, dest|REX_GC64, v); |
1223 | } else { | 1378 | } else { |
1224 | Reg uv = ra_scratch(as, RSET_GPR); | 1379 | Reg uv = ra_scratch(as, RSET_GPR); |
1225 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); | 1380 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); |
1226 | if (ir->o == IR_UREFC) { | 1381 | if (ir->o == IR_UREFC) { |
1227 | emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv)); | 1382 | emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv)); |
1228 | asm_guardcc(as, CC_NE); | 1383 | asm_guardcc(as, CC_NE); |
1229 | emit_i8(as, 1); | 1384 | emit_i8(as, 1); |
1230 | emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); | 1385 | emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); |
1231 | } else { | 1386 | } else { |
1232 | emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v)); | 1387 | emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v)); |
1233 | } | 1388 | } |
1234 | emit_rmro(as, XO_MOV, uv, func, | 1389 | emit_rmro(as, XO_MOV, uv|REX_GC64, func, |
1235 | (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); | 1390 | (int32_t)offsetof(GCfuncL, uvptr) + |
1391 | (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); | ||
1236 | } | 1392 | } |
1237 | } | 1393 | } |
1238 | 1394 | ||
@@ -1250,9 +1406,9 @@ static void asm_strref(ASMState *as, IRIns *ir) | |||
1250 | if (as->mrm.base == RID_NONE) | 1406 | if (as->mrm.base == RID_NONE) |
1251 | emit_loadi(as, dest, as->mrm.ofs); | 1407 | emit_loadi(as, dest, as->mrm.ofs); |
1252 | else if (as->mrm.base == dest && as->mrm.idx == RID_NONE) | 1408 | else if (as->mrm.base == dest && as->mrm.idx == RID_NONE) |
1253 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs); | 1409 | emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, as->mrm.ofs); |
1254 | else | 1410 | else |
1255 | emit_mrm(as, XO_LEA, dest, RID_MRM); | 1411 | emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM); |
1256 | } | 1412 | } |
1257 | 1413 | ||
1258 | /* -- Loads and stores ---------------------------------------------------- */ | 1414 | /* -- Loads and stores ---------------------------------------------------- */ |
@@ -1271,19 +1427,23 @@ static void asm_fxload(ASMState *as, IRIns *ir) | |||
1271 | case IRT_U8: xo = XO_MOVZXb; break; | 1427 | case IRT_U8: xo = XO_MOVZXb; break; |
1272 | case IRT_I16: xo = XO_MOVSXw; break; | 1428 | case IRT_I16: xo = XO_MOVSXw; break; |
1273 | case IRT_U16: xo = XO_MOVZXw; break; | 1429 | case IRT_U16: xo = XO_MOVZXw; break; |
1274 | case IRT_NUM: xo = XMM_MOVRM(as); break; | 1430 | case IRT_NUM: xo = XO_MOVSD; break; |
1275 | case IRT_FLOAT: xo = XO_MOVSS; break; | 1431 | case IRT_FLOAT: xo = XO_MOVSS; break; |
1276 | default: | 1432 | default: |
1277 | if (LJ_64 && irt_is64(ir->t)) | 1433 | if (LJ_64 && irt_is64(ir->t)) |
1278 | dest |= REX_64; | 1434 | dest |= REX_64; |
1279 | else | 1435 | else |
1280 | lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); | 1436 | lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t), |
1437 | "unsplit 64 bit load"); | ||
1281 | xo = XO_MOV; | 1438 | xo = XO_MOV; |
1282 | break; | 1439 | break; |
1283 | } | 1440 | } |
1284 | emit_mrm(as, xo, dest, RID_MRM); | 1441 | emit_mrm(as, xo, dest, RID_MRM); |
1285 | } | 1442 | } |
1286 | 1443 | ||
1444 | #define asm_fload(as, ir) asm_fxload(as, ir) | ||
1445 | #define asm_xload(as, ir) asm_fxload(as, ir) | ||
1446 | |||
1287 | static void asm_fxstore(ASMState *as, IRIns *ir) | 1447 | static void asm_fxstore(ASMState *as, IRIns *ir) |
1288 | { | 1448 | { |
1289 | RegSet allow = RSET_GPR; | 1449 | RegSet allow = RSET_GPR; |
@@ -1318,14 +1478,17 @@ static void asm_fxstore(ASMState *as, IRIns *ir) | |||
1318 | case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; | 1478 | case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; |
1319 | case IRT_NUM: xo = XO_MOVSDto; break; | 1479 | case IRT_NUM: xo = XO_MOVSDto; break; |
1320 | case IRT_FLOAT: xo = XO_MOVSSto; break; | 1480 | case IRT_FLOAT: xo = XO_MOVSSto; break; |
1321 | #if LJ_64 | 1481 | #if LJ_64 && !LJ_GC64 |
1322 | case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ | 1482 | case IRT_LIGHTUD: |
1483 | /* NYI: mask 64 bit lightuserdata. */ | ||
1484 | lj_assertA(0, "store of lightuserdata"); | ||
1323 | #endif | 1485 | #endif |
1324 | default: | 1486 | default: |
1325 | if (LJ_64 && irt_is64(ir->t)) | 1487 | if (LJ_64 && irt_is64(ir->t)) |
1326 | src |= REX_64; | 1488 | src |= REX_64; |
1327 | else | 1489 | else |
1328 | lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); | 1490 | lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t), |
1491 | "unsplit 64 bit store"); | ||
1329 | xo = XO_MOVto; | 1492 | xo = XO_MOVto; |
1330 | break; | 1493 | break; |
1331 | } | 1494 | } |
@@ -1339,15 +1502,18 @@ static void asm_fxstore(ASMState *as, IRIns *ir) | |||
1339 | emit_i8(as, k); | 1502 | emit_i8(as, k); |
1340 | emit_mrm(as, XO_MOVmib, 0, RID_MRM); | 1503 | emit_mrm(as, XO_MOVmib, 0, RID_MRM); |
1341 | } else { | 1504 | } else { |
1342 | lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) || | 1505 | lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) || |
1343 | irt_isaddr(ir->t)); | 1506 | irt_isaddr(ir->t), "bad store type"); |
1344 | emit_i32(as, k); | 1507 | emit_i32(as, k); |
1345 | emit_mrm(as, XO_MOVmi, REX_64IR(ir, 0), RID_MRM); | 1508 | emit_mrm(as, XO_MOVmi, REX_64IR(ir, 0), RID_MRM); |
1346 | } | 1509 | } |
1347 | } | 1510 | } |
1348 | } | 1511 | } |
1349 | 1512 | ||
1350 | #if LJ_64 | 1513 | #define asm_fstore(as, ir) asm_fxstore(as, ir) |
1514 | #define asm_xstore(as, ir) asm_fxstore(as, ir) | ||
1515 | |||
1516 | #if LJ_64 && !LJ_GC64 | ||
1351 | static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) | 1517 | static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) |
1352 | { | 1518 | { |
1353 | if (ra_used(ir) || typecheck) { | 1519 | if (ra_used(ir) || typecheck) { |
@@ -1369,13 +1535,18 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) | |||
1369 | 1535 | ||
1370 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 1536 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
1371 | { | 1537 | { |
1372 | lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || | 1538 | #if LJ_GC64 |
1373 | (LJ_DUALNUM && irt_isint(ir->t))); | 1539 | Reg tmp = RID_NONE; |
1374 | #if LJ_64 | 1540 | #endif |
1541 | lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || | ||
1542 | (LJ_DUALNUM && irt_isint(ir->t)), | ||
1543 | "bad load type %d", irt_type(ir->t)); | ||
1544 | #if LJ_64 && !LJ_GC64 | ||
1375 | if (irt_islightud(ir->t)) { | 1545 | if (irt_islightud(ir->t)) { |
1376 | Reg dest = asm_load_lightud64(as, ir, 1); | 1546 | Reg dest = asm_load_lightud64(as, ir, 1); |
1377 | if (ra_hasreg(dest)) { | 1547 | if (ra_hasreg(dest)) { |
1378 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1548 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
1549 | if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2; | ||
1379 | emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); | 1550 | emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); |
1380 | } | 1551 | } |
1381 | return; | 1552 | return; |
@@ -1385,20 +1556,67 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
1385 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; | 1556 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; |
1386 | Reg dest = ra_dest(as, ir, allow); | 1557 | Reg dest = ra_dest(as, ir, allow); |
1387 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1558 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
1388 | emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); | 1559 | if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2; |
1560 | #if LJ_GC64 | ||
1561 | if (irt_isaddr(ir->t)) { | ||
1562 | emit_shifti(as, XOg_SHR|REX_64, dest, 17); | ||
1563 | asm_guardcc(as, CC_NE); | ||
1564 | emit_i8(as, irt_toitype(ir->t)); | ||
1565 | emit_rr(as, XO_ARITHi8, XOg_CMP, dest); | ||
1566 | emit_i8(as, XI_O16); | ||
1567 | if ((as->flags & JIT_F_BMI2)) { | ||
1568 | emit_i8(as, 47); | ||
1569 | emit_mrm(as, XV_RORX|VEX_64, dest, RID_MRM); | ||
1570 | } else { | ||
1571 | emit_shifti(as, XOg_ROR|REX_64, dest, 47); | ||
1572 | emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); | ||
1573 | } | ||
1574 | return; | ||
1575 | } else | ||
1576 | #endif | ||
1577 | emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM); | ||
1389 | } else { | 1578 | } else { |
1390 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1579 | RegSet gpr = RSET_GPR; |
1580 | #if LJ_GC64 | ||
1581 | if (irt_isaddr(ir->t)) { | ||
1582 | tmp = ra_scratch(as, RSET_GPR); | ||
1583 | gpr = rset_exclude(gpr, tmp); | ||
1584 | } | ||
1585 | #endif | ||
1586 | asm_fuseahuref(as, ir->op1, gpr); | ||
1587 | if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2; | ||
1391 | } | 1588 | } |
1392 | /* Always do the type check, even if the load result is unused. */ | 1589 | /* Always do the type check, even if the load result is unused. */ |
1393 | as->mrm.ofs += 4; | 1590 | as->mrm.ofs += 4; |
1394 | asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); | 1591 | asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); |
1395 | if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { | 1592 | if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { |
1396 | lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); | 1593 | lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t), |
1594 | "bad load type %d", irt_type(ir->t)); | ||
1595 | #if LJ_GC64 | ||
1596 | emit_u32(as, LJ_TISNUM << 15); | ||
1597 | #else | ||
1397 | emit_u32(as, LJ_TISNUM); | 1598 | emit_u32(as, LJ_TISNUM); |
1599 | #endif | ||
1600 | emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); | ||
1601 | #if LJ_GC64 | ||
1602 | } else if (irt_isaddr(ir->t)) { | ||
1603 | as->mrm.ofs -= 4; | ||
1604 | emit_i8(as, irt_toitype(ir->t)); | ||
1605 | emit_mrm(as, XO_ARITHi8, XOg_CMP, tmp); | ||
1606 | emit_shifti(as, XOg_SAR|REX_64, tmp, 47); | ||
1607 | emit_mrm(as, XO_MOV, tmp|REX_64, RID_MRM); | ||
1608 | } else if (irt_isnil(ir->t)) { | ||
1609 | as->mrm.ofs -= 4; | ||
1610 | emit_i8(as, -1); | ||
1611 | emit_mrm(as, XO_ARITHi8, XOg_CMP|REX_64, RID_MRM); | ||
1612 | } else { | ||
1613 | emit_u32(as, (irt_toitype(ir->t) << 15) | 0x7fff); | ||
1398 | emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); | 1614 | emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); |
1615 | #else | ||
1399 | } else { | 1616 | } else { |
1400 | emit_i8(as, irt_toitype(ir->t)); | 1617 | emit_i8(as, irt_toitype(ir->t)); |
1401 | emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM); | 1618 | emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM); |
1619 | #endif | ||
1402 | } | 1620 | } |
1403 | } | 1621 | } |
1404 | 1622 | ||
@@ -1410,12 +1628,28 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
1410 | Reg src = ra_alloc1(as, ir->op2, RSET_FPR); | 1628 | Reg src = ra_alloc1(as, ir->op2, RSET_FPR); |
1411 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1629 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
1412 | emit_mrm(as, XO_MOVSDto, src, RID_MRM); | 1630 | emit_mrm(as, XO_MOVSDto, src, RID_MRM); |
1413 | #if LJ_64 | 1631 | #if LJ_64 && !LJ_GC64 |
1414 | } else if (irt_islightud(ir->t)) { | 1632 | } else if (irt_islightud(ir->t)) { |
1415 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | 1633 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); |
1416 | asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src)); | 1634 | asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src)); |
1417 | emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); | 1635 | emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); |
1418 | #endif | 1636 | #endif |
1637 | #if LJ_GC64 | ||
1638 | } else if (irref_isk(ir->op2)) { | ||
1639 | TValue k; | ||
1640 | lj_ir_kvalue(as->J->L, &k, IR(ir->op2)); | ||
1641 | asm_fuseahuref(as, ir->op1, RSET_GPR); | ||
1642 | if (tvisnil(&k)) { | ||
1643 | emit_i32(as, -1); | ||
1644 | emit_mrm(as, XO_MOVmi, REX_64, RID_MRM); | ||
1645 | } else { | ||
1646 | emit_u32(as, k.u32.lo); | ||
1647 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); | ||
1648 | as->mrm.ofs += 4; | ||
1649 | emit_u32(as, k.u32.hi); | ||
1650 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); | ||
1651 | } | ||
1652 | #endif | ||
1419 | } else { | 1653 | } else { |
1420 | IRIns *irr = IR(ir->op2); | 1654 | IRIns *irr = IR(ir->op2); |
1421 | RegSet allow = RSET_GPR; | 1655 | RegSet allow = RSET_GPR; |
@@ -1426,34 +1660,56 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
1426 | } | 1660 | } |
1427 | asm_fuseahuref(as, ir->op1, allow); | 1661 | asm_fuseahuref(as, ir->op1, allow); |
1428 | if (ra_hasreg(src)) { | 1662 | if (ra_hasreg(src)) { |
1663 | #if LJ_GC64 | ||
1664 | if (!(LJ_DUALNUM && irt_isinteger(ir->t))) { | ||
1665 | /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ | ||
1666 | as->mrm.ofs += 4; | ||
1667 | emit_u32(as, irt_toitype(ir->t) << 15); | ||
1668 | emit_mrm(as, XO_ARITHi, XOg_OR, RID_MRM); | ||
1669 | as->mrm.ofs -= 4; | ||
1670 | emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); | ||
1671 | return; | ||
1672 | } | ||
1673 | #endif | ||
1429 | emit_mrm(as, XO_MOVto, src, RID_MRM); | 1674 | emit_mrm(as, XO_MOVto, src, RID_MRM); |
1430 | } else if (!irt_ispri(irr->t)) { | 1675 | } else if (!irt_ispri(irr->t)) { |
1431 | lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); | 1676 | lj_assertA(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)), |
1677 | "bad store type"); | ||
1432 | emit_i32(as, irr->i); | 1678 | emit_i32(as, irr->i); |
1433 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); | 1679 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); |
1434 | } | 1680 | } |
1435 | as->mrm.ofs += 4; | 1681 | as->mrm.ofs += 4; |
1682 | #if LJ_GC64 | ||
1683 | lj_assertA(LJ_DUALNUM && irt_isinteger(ir->t), "bad store type"); | ||
1684 | emit_i32(as, LJ_TNUMX << 15); | ||
1685 | #else | ||
1436 | emit_i32(as, (int32_t)irt_toitype(ir->t)); | 1686 | emit_i32(as, (int32_t)irt_toitype(ir->t)); |
1687 | #endif | ||
1437 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); | 1688 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); |
1438 | } | 1689 | } |
1439 | } | 1690 | } |
1440 | 1691 | ||
1441 | static void asm_sload(ASMState *as, IRIns *ir) | 1692 | static void asm_sload(ASMState *as, IRIns *ir) |
1442 | { | 1693 | { |
1443 | int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); | 1694 | int32_t ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + |
1695 | (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); | ||
1444 | IRType1 t = ir->t; | 1696 | IRType1 t = ir->t; |
1445 | Reg base; | 1697 | Reg base; |
1446 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ | 1698 | lj_assertA(!(ir->op2 & IRSLOAD_PARENT), |
1447 | lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); | 1699 | "bad parent SLOAD"); /* Handled by asm_head_side(). */ |
1448 | lua_assert(LJ_DUALNUM || | 1700 | lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), |
1449 | !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); | 1701 | "inconsistent SLOAD variant"); |
1702 | lj_assertA(LJ_DUALNUM || | ||
1703 | !irt_isint(t) || | ||
1704 | (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)), | ||
1705 | "bad SLOAD type"); | ||
1450 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { | 1706 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { |
1451 | Reg left = ra_scratch(as, RSET_FPR); | 1707 | Reg left = ra_scratch(as, RSET_FPR); |
1452 | asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ | 1708 | asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ |
1453 | base = ra_alloc1(as, REF_BASE, RSET_GPR); | 1709 | base = ra_alloc1(as, REF_BASE, RSET_GPR); |
1454 | emit_rmro(as, XMM_MOVRM(as), left, base, ofs); | 1710 | emit_rmro(as, XO_MOVSD, left, base, ofs); |
1455 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | 1711 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ |
1456 | #if LJ_64 | 1712 | #if LJ_64 && !LJ_GC64 |
1457 | } else if (irt_islightud(t)) { | 1713 | } else if (irt_islightud(t)) { |
1458 | Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK)); | 1714 | Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK)); |
1459 | if (ra_hasreg(dest)) { | 1715 | if (ra_hasreg(dest)) { |
@@ -1466,14 +1722,43 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1466 | RegSet allow = irt_isnum(t) ? RSET_FPR : RSET_GPR; | 1722 | RegSet allow = irt_isnum(t) ? RSET_FPR : RSET_GPR; |
1467 | Reg dest = ra_dest(as, ir, allow); | 1723 | Reg dest = ra_dest(as, ir, allow); |
1468 | base = ra_alloc1(as, REF_BASE, RSET_GPR); | 1724 | base = ra_alloc1(as, REF_BASE, RSET_GPR); |
1469 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 1725 | lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t), |
1726 | "bad SLOAD type %d", irt_type(t)); | ||
1470 | if ((ir->op2 & IRSLOAD_CONVERT)) { | 1727 | if ((ir->op2 & IRSLOAD_CONVERT)) { |
1471 | t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ | 1728 | t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ |
1472 | emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); | 1729 | emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs); |
1473 | } else if (irt_isnum(t)) { | ||
1474 | emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); | ||
1475 | } else { | 1730 | } else { |
1476 | emit_rmro(as, XO_MOV, dest, base, ofs); | 1731 | #if LJ_GC64 |
1732 | if (irt_isaddr(t)) { | ||
1733 | /* LJ_GC64 type check + tag removal without BMI2 and with BMI2: | ||
1734 | ** | ||
1735 | ** mov r64, [addr] rorx r64, [addr], 47 | ||
1736 | ** ror r64, 47 | ||
1737 | ** cmp r16, itype cmp r16, itype | ||
1738 | ** jne ->exit jne ->exit | ||
1739 | ** shr r64, 16 shr r64, 16 | ||
1740 | */ | ||
1741 | emit_shifti(as, XOg_SHR|REX_64, dest, 17); | ||
1742 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | ||
1743 | asm_guardcc(as, CC_NE); | ||
1744 | emit_i8(as, irt_toitype(t)); | ||
1745 | emit_rr(as, XO_ARITHi8, XOg_CMP, dest); | ||
1746 | emit_i8(as, XI_O16); | ||
1747 | } | ||
1748 | if ((as->flags & JIT_F_BMI2)) { | ||
1749 | emit_i8(as, 47); | ||
1750 | emit_rmro(as, XV_RORX|VEX_64, dest, base, ofs); | ||
1751 | } else { | ||
1752 | if ((ir->op2 & IRSLOAD_TYPECHECK)) | ||
1753 | emit_shifti(as, XOg_ROR|REX_64, dest, 47); | ||
1754 | else | ||
1755 | emit_shifti(as, XOg_SHL|REX_64, dest, 17); | ||
1756 | emit_rmro(as, XO_MOV, dest|REX_64, base, ofs); | ||
1757 | } | ||
1758 | return; | ||
1759 | } else | ||
1760 | #endif | ||
1761 | emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs); | ||
1477 | } | 1762 | } |
1478 | } else { | 1763 | } else { |
1479 | if (!(ir->op2 & IRSLOAD_TYPECHECK)) | 1764 | if (!(ir->op2 & IRSLOAD_TYPECHECK)) |
@@ -1484,12 +1769,44 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1484 | /* Need type check, even if the load result is unused. */ | 1769 | /* Need type check, even if the load result is unused. */ |
1485 | asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); | 1770 | asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); |
1486 | if (LJ_64 && irt_type(t) >= IRT_NUM) { | 1771 | if (LJ_64 && irt_type(t) >= IRT_NUM) { |
1487 | lua_assert(irt_isinteger(t) || irt_isnum(t)); | 1772 | lj_assertA(irt_isinteger(t) || irt_isnum(t), |
1773 | "bad SLOAD type %d", irt_type(t)); | ||
1774 | #if LJ_GC64 | ||
1775 | emit_u32(as, LJ_TISNUM << 15); | ||
1776 | #else | ||
1488 | emit_u32(as, LJ_TISNUM); | 1777 | emit_u32(as, LJ_TISNUM); |
1778 | #endif | ||
1489 | emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); | 1779 | emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); |
1780 | #if LJ_GC64 | ||
1781 | } else if (irt_isnil(t)) { | ||
1782 | /* LJ_GC64 type check for nil: | ||
1783 | ** | ||
1784 | ** cmp qword [addr], -1 | ||
1785 | ** jne ->exit | ||
1786 | */ | ||
1787 | emit_i8(as, -1); | ||
1788 | emit_rmro(as, XO_ARITHi8, XOg_CMP|REX_64, base, ofs); | ||
1789 | } else if (irt_ispri(t)) { | ||
1790 | emit_u32(as, (irt_toitype(t) << 15) | 0x7fff); | ||
1791 | emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); | ||
1792 | } else { | ||
1793 | /* LJ_GC64 type check only: | ||
1794 | ** | ||
1795 | ** mov r64, [addr] | ||
1796 | ** sar r64, 47 | ||
1797 | ** cmp r32, itype | ||
1798 | ** jne ->exit | ||
1799 | */ | ||
1800 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base)); | ||
1801 | emit_i8(as, irt_toitype(t)); | ||
1802 | emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); | ||
1803 | emit_shifti(as, XOg_SAR|REX_64, tmp, 47); | ||
1804 | emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs); | ||
1805 | #else | ||
1490 | } else { | 1806 | } else { |
1491 | emit_i8(as, irt_toitype(t)); | 1807 | emit_i8(as, irt_toitype(t)); |
1492 | emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4); | 1808 | emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4); |
1809 | #endif | ||
1493 | } | 1810 | } |
1494 | } | 1811 | } |
1495 | } | 1812 | } |
@@ -1500,15 +1817,14 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1500 | static void asm_cnew(ASMState *as, IRIns *ir) | 1817 | static void asm_cnew(ASMState *as, IRIns *ir) |
1501 | { | 1818 | { |
1502 | CTState *cts = ctype_ctsG(J2G(as->J)); | 1819 | CTState *cts = ctype_ctsG(J2G(as->J)); |
1503 | CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; | 1820 | CTypeID id = (CTypeID)IR(ir->op1)->i; |
1504 | CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? | 1821 | CTSize sz; |
1505 | lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; | 1822 | CTInfo info = lj_ctype_info(cts, id, &sz); |
1506 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | 1823 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; |
1507 | IRRef args[2]; | 1824 | IRRef args[4]; |
1508 | lua_assert(sz != CTSIZE_INVALID); | 1825 | lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), |
1826 | "bad CNEW/CNEWI operands"); | ||
1509 | 1827 | ||
1510 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1511 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1512 | as->gcsteps++; | 1828 | as->gcsteps++; |
1513 | asm_setupresult(as, ir, ci); /* GCcdata * */ | 1829 | asm_setupresult(as, ir, ci); /* GCcdata * */ |
1514 | 1830 | ||
@@ -1519,8 +1835,9 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1519 | Reg r64 = sz == 8 ? REX_64 : 0; | 1835 | Reg r64 = sz == 8 ? REX_64 : 0; |
1520 | if (irref_isk(ir->op2)) { | 1836 | if (irref_isk(ir->op2)) { |
1521 | IRIns *irk = IR(ir->op2); | 1837 | IRIns *irk = IR(ir->op2); |
1522 | uint64_t k = irk->o == IR_KINT64 ? ir_k64(irk)->u64 : | 1838 | uint64_t k = (irk->o == IR_KINT64 || |
1523 | (uint64_t)(uint32_t)irk->i; | 1839 | (LJ_GC64 && (irk->o == IR_KPTR || irk->o == IR_KKPTR))) ? |
1840 | ir_k64(irk)->u64 : (uint64_t)(uint32_t)irk->i; | ||
1524 | if (sz == 4 || checki32((int64_t)k)) { | 1841 | if (sz == 4 || checki32((int64_t)k)) { |
1525 | emit_i32(as, (int32_t)k); | 1842 | emit_i32(as, (int32_t)k); |
1526 | emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata)); | 1843 | emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata)); |
@@ -1536,7 +1853,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1536 | int32_t ofs = sizeof(GCcdata); | 1853 | int32_t ofs = sizeof(GCcdata); |
1537 | if (sz == 8) { | 1854 | if (sz == 8) { |
1538 | ofs += 4; ir++; | 1855 | ofs += 4; ir++; |
1539 | lua_assert(ir->o == IR_HIOP); | 1856 | lj_assertA(ir->o == IR_HIOP, "missing CNEWI HIOP"); |
1540 | } | 1857 | } |
1541 | do { | 1858 | do { |
1542 | if (irref_isk(ir->op2)) { | 1859 | if (irref_isk(ir->op2)) { |
@@ -1550,21 +1867,30 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1550 | ofs -= 4; ir--; | 1867 | ofs -= 4; ir--; |
1551 | } while (1); | 1868 | } while (1); |
1552 | #endif | 1869 | #endif |
1553 | lua_assert(sz == 4 || sz == 8); | 1870 | lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); |
1871 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1872 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1873 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1874 | args[1] = ir->op1; /* CTypeID id */ | ||
1875 | args[2] = ir->op2; /* CTSize sz */ | ||
1876 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1877 | asm_gencall(as, ci, args); | ||
1878 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1879 | return; | ||
1554 | } | 1880 | } |
1555 | 1881 | ||
1556 | /* Combine initialization of marked, gct and ctypeid. */ | 1882 | /* Combine initialization of marked, gct and ctypeid. */ |
1557 | emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); | 1883 | emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); |
1558 | emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, | 1884 | emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, |
1559 | (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16))); | 1885 | (int32_t)((~LJ_TCDATA<<8)+(id<<16))); |
1560 | emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); | 1886 | emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); |
1561 | emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); | 1887 | emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); |
1562 | 1888 | ||
1889 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1890 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1563 | asm_gencall(as, ci, args); | 1891 | asm_gencall(as, ci, args); |
1564 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); | 1892 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); |
1565 | } | 1893 | } |
1566 | #else | ||
1567 | #define asm_cnew(as, ir) ((void)0) | ||
1568 | #endif | 1894 | #endif |
1569 | 1895 | ||
1570 | /* -- Write barriers ------------------------------------------------------ */ | 1896 | /* -- Write barriers ------------------------------------------------------ */ |
@@ -1574,7 +1900,7 @@ static void asm_tbar(ASMState *as, IRIns *ir) | |||
1574 | Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); | 1900 | Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); |
1575 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab)); | 1901 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab)); |
1576 | MCLabel l_end = emit_label(as); | 1902 | MCLabel l_end = emit_label(as); |
1577 | emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist)); | 1903 | emit_movtomro(as, tmp|REX_GC64, tab, offsetof(GCtab, gclist)); |
1578 | emit_setgl(as, tab, gc.grayagain); | 1904 | emit_setgl(as, tab, gc.grayagain); |
1579 | emit_getgl(as, tmp, gc.grayagain); | 1905 | emit_getgl(as, tmp, gc.grayagain); |
1580 | emit_i8(as, ~LJ_GC_BLACK); | 1906 | emit_i8(as, ~LJ_GC_BLACK); |
@@ -1591,7 +1917,7 @@ static void asm_obar(ASMState *as, IRIns *ir) | |||
1591 | MCLabel l_end; | 1917 | MCLabel l_end; |
1592 | Reg obj; | 1918 | Reg obj; |
1593 | /* No need for other object barriers (yet). */ | 1919 | /* No need for other object barriers (yet). */ |
1594 | lua_assert(IR(ir->op1)->o == IR_UREFC); | 1920 | lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); |
1595 | ra_evictset(as, RSET_SCRATCH); | 1921 | ra_evictset(as, RSET_SCRATCH); |
1596 | l_end = emit_label(as); | 1922 | l_end = emit_label(as); |
1597 | args[0] = ASMREF_TMP1; /* global_State *g */ | 1923 | args[0] = ASMREF_TMP1; /* global_State *g */ |
@@ -1637,36 +1963,9 @@ static void asm_x87load(ASMState *as, IRRef ref) | |||
1637 | } | 1963 | } |
1638 | } | 1964 | } |
1639 | 1965 | ||
1640 | /* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */ | ||
1641 | static int fpmjoin_pow(ASMState *as, IRIns *ir) | ||
1642 | { | ||
1643 | IRIns *irp = IR(ir->op1); | ||
1644 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | ||
1645 | IRIns *irpp = IR(irp->op1); | ||
1646 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | ||
1647 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | ||
1648 | /* The modified regs must match with the *.dasc implementation. */ | ||
1649 | RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); | ||
1650 | IRIns *irx; | ||
1651 | if (ra_hasreg(ir->r)) | ||
1652 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | ||
1653 | ra_evictset(as, drop); | ||
1654 | ra_destreg(as, ir, RID_XMM0); | ||
1655 | emit_call(as, lj_vm_pow_sse); | ||
1656 | irx = IR(irpp->op1); | ||
1657 | if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1) | ||
1658 | irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */ | ||
1659 | ra_left(as, RID_XMM0, irpp->op1); | ||
1660 | ra_left(as, RID_XMM1, irp->op2); | ||
1661 | return 1; | ||
1662 | } | ||
1663 | } | ||
1664 | return 0; | ||
1665 | } | ||
1666 | |||
1667 | static void asm_fpmath(ASMState *as, IRIns *ir) | 1966 | static void asm_fpmath(ASMState *as, IRIns *ir) |
1668 | { | 1967 | { |
1669 | IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER; | 1968 | IRFPMathOp fpm = (IRFPMathOp)ir->op2; |
1670 | if (fpm == IRFPM_SQRT) { | 1969 | if (fpm == IRFPM_SQRT) { |
1671 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1970 | Reg dest = ra_dest(as, ir, RSET_FPR); |
1672 | Reg left = asm_fuseload(as, ir->op1, RSET_FPR); | 1971 | Reg left = asm_fuseload(as, ir->op1, RSET_FPR); |
@@ -1697,51 +1996,25 @@ static void asm_fpmath(ASMState *as, IRIns *ir) | |||
1697 | fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); | 1996 | fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); |
1698 | ra_left(as, RID_XMM0, ir->op1); | 1997 | ra_left(as, RID_XMM0, ir->op1); |
1699 | } | 1998 | } |
1700 | } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { | 1999 | } else { |
1701 | /* Rejoined to pow(). */ | 2000 | asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); |
1702 | } else { /* Handle x87 ops. */ | 2001 | } |
1703 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | 2002 | } |
1704 | Reg dest = ir->r; | 2003 | |
1705 | if (ra_hasreg(dest)) { | 2004 | static void asm_ldexp(ASMState *as, IRIns *ir) |
1706 | ra_free(as, dest); | 2005 | { |
1707 | ra_modified(as, dest); | 2006 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ |
1708 | emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); | 2007 | Reg dest = ir->r; |
1709 | } | 2008 | if (ra_hasreg(dest)) { |
1710 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); | 2009 | ra_free(as, dest); |
1711 | switch (fpm) { /* st0 = lj_vm_*(st0) */ | 2010 | ra_modified(as, dest); |
1712 | case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break; | 2011 | emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs); |
1713 | case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break; | ||
1714 | case IRFPM_SIN: emit_x87op(as, XI_FSIN); break; | ||
1715 | case IRFPM_COS: emit_x87op(as, XI_FCOS); break; | ||
1716 | case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break; | ||
1717 | case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10: | ||
1718 | /* Note: the use of fyl2xp1 would be pointless here. When computing | ||
1719 | ** log(1.0+eps) the precision is already lost after 1.0 is added. | ||
1720 | ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense. | ||
1721 | */ | ||
1722 | emit_x87op(as, XI_FYL2X); break; | ||
1723 | case IRFPM_OTHER: | ||
1724 | switch (ir->o) { | ||
1725 | case IR_ATAN2: | ||
1726 | emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break; | ||
1727 | case IR_LDEXP: | ||
1728 | emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break; | ||
1729 | default: lua_assert(0); break; | ||
1730 | } | ||
1731 | break; | ||
1732 | default: lua_assert(0); break; | ||
1733 | } | ||
1734 | asm_x87load(as, ir->op1); | ||
1735 | switch (fpm) { | ||
1736 | case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break; | ||
1737 | case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break; | ||
1738 | case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break; | ||
1739 | case IRFPM_OTHER: | ||
1740 | if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2); | ||
1741 | break; | ||
1742 | default: break; | ||
1743 | } | ||
1744 | } | 2012 | } |
2013 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); | ||
2014 | emit_x87op(as, XI_FPOP1); | ||
2015 | emit_x87op(as, XI_FSCALE); | ||
2016 | asm_x87load(as, ir->op1); | ||
2017 | asm_x87load(as, ir->op2); | ||
1745 | } | 2018 | } |
1746 | 2019 | ||
1747 | static void asm_fppowi(ASMState *as, IRIns *ir) | 2020 | static void asm_fppowi(ASMState *as, IRIns *ir) |
@@ -1757,33 +2030,11 @@ static void asm_fppowi(ASMState *as, IRIns *ir) | |||
1757 | ra_left(as, RID_EAX, ir->op2); | 2030 | ra_left(as, RID_EAX, ir->op2); |
1758 | } | 2031 | } |
1759 | 2032 | ||
1760 | #if LJ_64 && LJ_HASFFI | ||
1761 | static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id) | ||
1762 | { | ||
1763 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
1764 | IRRef args[2]; | ||
1765 | args[0] = ir->op1; | ||
1766 | args[1] = ir->op2; | ||
1767 | asm_setupresult(as, ir, ci); | ||
1768 | asm_gencall(as, ci, args); | ||
1769 | } | ||
1770 | #endif | ||
1771 | |||
1772 | static void asm_intmod(ASMState *as, IRIns *ir) | ||
1773 | { | ||
1774 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi]; | ||
1775 | IRRef args[2]; | ||
1776 | args[0] = ir->op1; | ||
1777 | args[1] = ir->op2; | ||
1778 | asm_setupresult(as, ir, ci); | ||
1779 | asm_gencall(as, ci, args); | ||
1780 | } | ||
1781 | |||
1782 | static int asm_swapops(ASMState *as, IRIns *ir) | 2033 | static int asm_swapops(ASMState *as, IRIns *ir) |
1783 | { | 2034 | { |
1784 | IRIns *irl = IR(ir->op1); | 2035 | IRIns *irl = IR(ir->op1); |
1785 | IRIns *irr = IR(ir->op2); | 2036 | IRIns *irr = IR(ir->op2); |
1786 | lua_assert(ra_noreg(irr->r)); | 2037 | lj_assertA(ra_noreg(irr->r), "bad usage"); |
1787 | if (!irm_iscomm(lj_ir_mode[ir->o])) | 2038 | if (!irm_iscomm(lj_ir_mode[ir->o])) |
1788 | return 0; /* Can't swap non-commutative operations. */ | 2039 | return 0; /* Can't swap non-commutative operations. */ |
1789 | if (irref_isk(ir->op2)) | 2040 | if (irref_isk(ir->op2)) |
@@ -1955,11 +2206,28 @@ static void asm_add(ASMState *as, IRIns *ir) | |||
1955 | { | 2206 | { |
1956 | if (irt_isnum(ir->t)) | 2207 | if (irt_isnum(ir->t)) |
1957 | asm_fparith(as, ir, XO_ADDSD); | 2208 | asm_fparith(as, ir, XO_ADDSD); |
1958 | else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp || | 2209 | else if (as->flagmcp == as->mcp || irt_is64(ir->t) || !asm_lea(as, ir)) |
1959 | irt_is64(ir->t) || !asm_lea(as, ir)) | ||
1960 | asm_intarith(as, ir, XOg_ADD); | 2210 | asm_intarith(as, ir, XOg_ADD); |
1961 | } | 2211 | } |
1962 | 2212 | ||
2213 | static void asm_sub(ASMState *as, IRIns *ir) | ||
2214 | { | ||
2215 | if (irt_isnum(ir->t)) | ||
2216 | asm_fparith(as, ir, XO_SUBSD); | ||
2217 | else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ | ||
2218 | asm_intarith(as, ir, XOg_SUB); | ||
2219 | } | ||
2220 | |||
2221 | static void asm_mul(ASMState *as, IRIns *ir) | ||
2222 | { | ||
2223 | if (irt_isnum(ir->t)) | ||
2224 | asm_fparith(as, ir, XO_MULSD); | ||
2225 | else | ||
2226 | asm_intarith(as, ir, XOg_X_IMUL); | ||
2227 | } | ||
2228 | |||
2229 | #define asm_fpdiv(as, ir) asm_fparith(as, ir, XO_DIVSD) | ||
2230 | |||
1963 | static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) | 2231 | static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) |
1964 | { | 2232 | { |
1965 | Reg dest = ra_dest(as, ir, RSET_GPR); | 2233 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -1967,7 +2235,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) | |||
1967 | ra_left(as, dest, ir->op1); | 2235 | ra_left(as, dest, ir->op1); |
1968 | } | 2236 | } |
1969 | 2237 | ||
1970 | static void asm_min_max(ASMState *as, IRIns *ir, int cc) | 2238 | static void asm_neg(ASMState *as, IRIns *ir) |
2239 | { | ||
2240 | if (irt_isnum(ir->t)) | ||
2241 | asm_fparith(as, ir, XO_XORPS); | ||
2242 | else | ||
2243 | asm_neg_not(as, ir, XOg_NEG); | ||
2244 | } | ||
2245 | |||
2246 | #define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS) | ||
2247 | |||
2248 | static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) | ||
1971 | { | 2249 | { |
1972 | Reg right, dest = ra_dest(as, ir, RSET_GPR); | 2250 | Reg right, dest = ra_dest(as, ir, RSET_GPR); |
1973 | IRRef lref = ir->op1, rref = ir->op2; | 2251 | IRRef lref = ir->op1, rref = ir->op2; |
@@ -1978,7 +2256,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc) | |||
1978 | ra_left(as, dest, lref); | 2256 | ra_left(as, dest, lref); |
1979 | } | 2257 | } |
1980 | 2258 | ||
1981 | static void asm_bitswap(ASMState *as, IRIns *ir) | 2259 | static void asm_min(ASMState *as, IRIns *ir) |
2260 | { | ||
2261 | if (irt_isnum(ir->t)) | ||
2262 | asm_fparith(as, ir, XO_MINSD); | ||
2263 | else | ||
2264 | asm_intmin_max(as, ir, CC_G); | ||
2265 | } | ||
2266 | |||
2267 | static void asm_max(ASMState *as, IRIns *ir) | ||
2268 | { | ||
2269 | if (irt_isnum(ir->t)) | ||
2270 | asm_fparith(as, ir, XO_MAXSD); | ||
2271 | else | ||
2272 | asm_intmin_max(as, ir, CC_L); | ||
2273 | } | ||
2274 | |||
2275 | /* Note: don't use LEA for overflow-checking arithmetic! */ | ||
2276 | #define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD) | ||
2277 | #define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB) | ||
2278 | #define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL) | ||
2279 | |||
2280 | #define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT) | ||
2281 | |||
2282 | static void asm_bswap(ASMState *as, IRIns *ir) | ||
1982 | { | 2283 | { |
1983 | Reg dest = ra_dest(as, ir, RSET_GPR); | 2284 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1984 | as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), | 2285 | as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), |
@@ -1986,7 +2287,11 @@ static void asm_bitswap(ASMState *as, IRIns *ir) | |||
1986 | ra_left(as, dest, ir->op1); | 2287 | ra_left(as, dest, ir->op1); |
1987 | } | 2288 | } |
1988 | 2289 | ||
1989 | static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | 2290 | #define asm_band(as, ir) asm_intarith(as, ir, XOg_AND) |
2291 | #define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR) | ||
2292 | #define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR) | ||
2293 | |||
2294 | static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv) | ||
1990 | { | 2295 | { |
1991 | IRRef rref = ir->op2; | 2296 | IRRef rref = ir->op2; |
1992 | IRIns *irr = IR(rref); | 2297 | IRIns *irr = IR(rref); |
@@ -1995,17 +2300,33 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | |||
1995 | int shift; | 2300 | int shift; |
1996 | dest = ra_dest(as, ir, RSET_GPR); | 2301 | dest = ra_dest(as, ir, RSET_GPR); |
1997 | shift = irr->i & (irt_is64(ir->t) ? 63 : 31); | 2302 | shift = irr->i & (irt_is64(ir->t) ? 63 : 31); |
2303 | if (!xv && shift && (as->flags & JIT_F_BMI2)) { | ||
2304 | Reg left = asm_fuseloadm(as, ir->op1, RSET_GPR, irt_is64(ir->t)); | ||
2305 | if (left != dest) { /* BMI2 rotate right by constant. */ | ||
2306 | emit_i8(as, xs == XOg_ROL ? -shift : shift); | ||
2307 | emit_mrm(as, VEX_64IR(ir, XV_RORX), dest, left); | ||
2308 | return; | ||
2309 | } | ||
2310 | } | ||
1998 | switch (shift) { | 2311 | switch (shift) { |
1999 | case 0: break; | 2312 | case 0: break; |
2000 | case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break; | 2313 | case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break; |
2001 | default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break; | 2314 | default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break; |
2002 | } | 2315 | } |
2316 | } else if ((as->flags & JIT_F_BMI2) && xv) { /* BMI2 variable shifts. */ | ||
2317 | Reg left, right; | ||
2318 | dest = ra_dest(as, ir, RSET_GPR); | ||
2319 | right = ra_alloc1(as, rref, RSET_GPR); | ||
2320 | left = asm_fuseloadm(as, ir->op1, rset_exclude(RSET_GPR, right), | ||
2321 | irt_is64(ir->t)); | ||
2322 | emit_mrm(as, VEX_64IR(ir, xv) ^ (right << 19), dest, left); | ||
2323 | return; | ||
2003 | } else { /* Variable shifts implicitly use register cl (i.e. ecx). */ | 2324 | } else { /* Variable shifts implicitly use register cl (i.e. ecx). */ |
2004 | Reg right; | 2325 | Reg right; |
2005 | dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX)); | 2326 | dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX)); |
2006 | if (dest == RID_ECX) { | 2327 | if (dest == RID_ECX) { |
2007 | dest = ra_scratch(as, rset_exclude(RSET_GPR, RID_ECX)); | 2328 | dest = ra_scratch(as, rset_exclude(RSET_GPR, RID_ECX)); |
2008 | emit_rr(as, XO_MOV, RID_ECX, dest); | 2329 | emit_rr(as, XO_MOV, REX_64IR(ir, RID_ECX), dest); |
2009 | } | 2330 | } |
2010 | right = irr->r; | 2331 | right = irr->r; |
2011 | if (ra_noreg(right)) | 2332 | if (ra_noreg(right)) |
@@ -2025,6 +2346,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | |||
2025 | */ | 2346 | */ |
2026 | } | 2347 | } |
2027 | 2348 | ||
2349 | #define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL, XV_SHLX) | ||
2350 | #define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR, XV_SHRX) | ||
2351 | #define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR, XV_SARX) | ||
2352 | #define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL, 0) | ||
2353 | #define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR, 0) | ||
2354 | |||
2028 | /* -- Comparisons --------------------------------------------------------- */ | 2355 | /* -- Comparisons --------------------------------------------------------- */ |
2029 | 2356 | ||
2030 | /* Virtual flags for unordered FP comparisons. */ | 2357 | /* Virtual flags for unordered FP comparisons. */ |
@@ -2051,8 +2378,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = { | |||
2051 | }; | 2378 | }; |
2052 | 2379 | ||
2053 | /* FP and integer comparisons. */ | 2380 | /* FP and integer comparisons. */ |
2054 | static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | 2381 | static void asm_comp(ASMState *as, IRIns *ir) |
2055 | { | 2382 | { |
2383 | uint32_t cc = asm_compmap[ir->o]; | ||
2056 | if (irt_isnum(ir->t)) { | 2384 | if (irt_isnum(ir->t)) { |
2057 | IRRef lref = ir->op1; | 2385 | IRRef lref = ir->op1; |
2058 | IRRef rref = ir->op2; | 2386 | IRRef rref = ir->op2; |
@@ -2073,7 +2401,6 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | |||
2073 | cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */ | 2401 | cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */ |
2074 | } | 2402 | } |
2075 | left = ra_alloc1(as, lref, RSET_FPR); | 2403 | left = ra_alloc1(as, lref, RSET_FPR); |
2076 | right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left)); | ||
2077 | l_around = emit_label(as); | 2404 | l_around = emit_label(as); |
2078 | asm_guardcc(as, cc >> 4); | 2405 | asm_guardcc(as, cc >> 4); |
2079 | if (cc & VCC_P) { /* Extra CC_P branch required? */ | 2406 | if (cc & VCC_P) { /* Extra CC_P branch required? */ |
@@ -2090,14 +2417,16 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | |||
2090 | emit_jcc(as, CC_P, as->mcp); | 2417 | emit_jcc(as, CC_P, as->mcp); |
2091 | } | 2418 | } |
2092 | } | 2419 | } |
2420 | right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left)); | ||
2093 | emit_mrm(as, XO_UCOMISD, left, right); | 2421 | emit_mrm(as, XO_UCOMISD, left, right); |
2094 | } else { | 2422 | } else { |
2095 | IRRef lref = ir->op1, rref = ir->op2; | 2423 | IRRef lref = ir->op1, rref = ir->op2; |
2096 | IROp leftop = (IROp)(IR(lref)->o); | 2424 | IROp leftop = (IROp)(IR(lref)->o); |
2097 | Reg r64 = REX_64IR(ir, 0); | 2425 | Reg r64 = REX_64IR(ir, 0); |
2098 | int32_t imm = 0; | 2426 | int32_t imm = 0; |
2099 | lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || | 2427 | lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || |
2100 | irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t)); | 2428 | irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t), |
2429 | "bad comparison data type %d", irt_type(ir->t)); | ||
2101 | /* Swap constants (only for ABC) and fusable loads to the right. */ | 2430 | /* Swap constants (only for ABC) and fusable loads to the right. */ |
2102 | if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { | 2431 | if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { |
2103 | if ((cc & 0xc) == 0xc) cc ^= 0x53; /* L <-> G, LE <-> GE */ | 2432 | if ((cc & 0xc) == 0xc) cc ^= 0x53; /* L <-> G, LE <-> GE */ |
@@ -2179,7 +2508,7 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | |||
2179 | /* Use test r,r instead of cmp r,0. */ | 2508 | /* Use test r,r instead of cmp r,0. */ |
2180 | x86Op xo = XO_TEST; | 2509 | x86Op xo = XO_TEST; |
2181 | if (irt_isu8(ir->t)) { | 2510 | if (irt_isu8(ir->t)) { |
2182 | lua_assert(ir->o == IR_EQ || ir->o == IR_NE); | 2511 | lj_assertA(ir->o == IR_EQ || ir->o == IR_NE, "bad usage"); |
2183 | xo = XO_TESTb; | 2512 | xo = XO_TESTb; |
2184 | if (!rset_test(RSET_RANGE(RID_EAX, RID_EBX+1), left)) { | 2513 | if (!rset_test(RSET_RANGE(RID_EAX, RID_EBX+1), left)) { |
2185 | if (LJ_64) { | 2514 | if (LJ_64) { |
@@ -2207,6 +2536,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | |||
2207 | } | 2536 | } |
2208 | } | 2537 | } |
2209 | 2538 | ||
2539 | #define asm_equal(as, ir) asm_comp(as, ir) | ||
2540 | |||
2210 | #if LJ_32 && LJ_HASFFI | 2541 | #if LJ_32 && LJ_HASFFI |
2211 | /* 64 bit integer comparisons in 32 bit mode. */ | 2542 | /* 64 bit integer comparisons in 32 bit mode. */ |
2212 | static void asm_comp_int64(ASMState *as, IRIns *ir) | 2543 | static void asm_comp_int64(ASMState *as, IRIns *ir) |
@@ -2279,23 +2610,19 @@ static void asm_comp_int64(ASMState *as, IRIns *ir) | |||
2279 | } | 2610 | } |
2280 | #endif | 2611 | #endif |
2281 | 2612 | ||
2282 | /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ | 2613 | /* -- Split register ops -------------------------------------------------- */ |
2283 | 2614 | ||
2284 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ | 2615 | /* Hiword op of a split 32/32 or 64/64 bit op. Previous op is the loword op. */ |
2285 | static void asm_hiop(ASMState *as, IRIns *ir) | 2616 | static void asm_hiop(ASMState *as, IRIns *ir) |
2286 | { | 2617 | { |
2287 | #if LJ_32 && LJ_HASFFI | ||
2288 | /* HIOP is marked as a store because it needs its own DCE logic. */ | 2618 | /* HIOP is marked as a store because it needs its own DCE logic. */ |
2289 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | 2619 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ |
2290 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | 2620 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; |
2621 | #if LJ_32 && LJ_HASFFI | ||
2291 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ | 2622 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ |
2292 | if (usehi || uselo) { | ||
2293 | if (irt_isfp(ir->t)) | ||
2294 | asm_conv_fp_int64(as, ir); | ||
2295 | else | ||
2296 | asm_conv_int64_fp(as, ir); | ||
2297 | } | ||
2298 | as->curins--; /* Always skip the CONV. */ | 2623 | as->curins--; /* Always skip the CONV. */ |
2624 | if (usehi || uselo) | ||
2625 | asm_conv64(as, ir); | ||
2299 | return; | 2626 | return; |
2300 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ | 2627 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ |
2301 | asm_comp_int64(as, ir); | 2628 | asm_comp_int64(as, ir); |
@@ -2305,8 +2632,10 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
2305 | asm_fxstore(as, ir); | 2632 | asm_fxstore(as, ir); |
2306 | return; | 2633 | return; |
2307 | } | 2634 | } |
2635 | #endif | ||
2308 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 2636 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
2309 | switch ((ir-1)->o) { | 2637 | switch ((ir-1)->o) { |
2638 | #if LJ_32 && LJ_HASFFI | ||
2310 | case IR_ADD: | 2639 | case IR_ADD: |
2311 | as->flagmcp = NULL; | 2640 | as->flagmcp = NULL; |
2312 | as->curins--; | 2641 | as->curins--; |
@@ -2329,19 +2658,26 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
2329 | asm_neg_not(as, ir-1, XOg_NEG); | 2658 | asm_neg_not(as, ir-1, XOg_NEG); |
2330 | break; | 2659 | break; |
2331 | } | 2660 | } |
2332 | case IR_CALLN: | ||
2333 | case IR_CALLXS: | ||
2334 | if (!uselo) | ||
2335 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ | ||
2336 | break; | ||
2337 | case IR_CNEWI: | 2661 | case IR_CNEWI: |
2338 | /* Nothing to do here. Handled by CNEWI itself. */ | 2662 | /* Nothing to do here. Handled by CNEWI itself. */ |
2339 | break; | 2663 | break; |
2340 | default: lua_assert(0); break; | ||
2341 | } | ||
2342 | #else | ||
2343 | UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on x64 or without FFI. */ | ||
2344 | #endif | 2664 | #endif |
2665 | case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: | ||
2666 | if (!uselo) | ||
2667 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ | ||
2668 | break; | ||
2669 | default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; | ||
2670 | } | ||
2671 | } | ||
2672 | |||
2673 | /* -- Profiling ----------------------------------------------------------- */ | ||
2674 | |||
2675 | static void asm_prof(ASMState *as, IRIns *ir) | ||
2676 | { | ||
2677 | UNUSED(ir); | ||
2678 | asm_guardcc(as, CC_NE); | ||
2679 | emit_i8(as, HOOK_PROFILE); | ||
2680 | emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask); | ||
2345 | } | 2681 | } |
2346 | 2682 | ||
2347 | /* -- Stack handling ------------------------------------------------------ */ | 2683 | /* -- Stack handling ------------------------------------------------------ */ |
@@ -2358,14 +2694,19 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
2358 | emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0); | 2694 | emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0); |
2359 | else | 2695 | else |
2360 | ra_modified(as, r); | 2696 | ra_modified(as, r); |
2361 | emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot)); | 2697 | emit_gri(as, XG_ARITHi(XOg_CMP), r|REX_GC64, (int32_t)(8*topslot)); |
2362 | if (ra_hasreg(pbase) && pbase != r) | 2698 | if (ra_hasreg(pbase) && pbase != r) |
2363 | emit_rr(as, XO_ARITH(XOg_SUB), r, pbase); | 2699 | emit_rr(as, XO_ARITH(XOg_SUB), r|REX_GC64, pbase); |
2364 | else | 2700 | else |
2701 | #if LJ_GC64 | ||
2702 | emit_rmro(as, XO_ARITH(XOg_SUB), r|REX_64, RID_DISPATCH, | ||
2703 | (int32_t)dispofs(as, &J2G(as->J)->jit_base)); | ||
2704 | #else | ||
2365 | emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, | 2705 | emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, |
2366 | ptr2addr(&J2G(as->J)->jit_base)); | 2706 | ptr2addr(&J2G(as->J)->jit_base)); |
2367 | emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); | 2707 | #endif |
2368 | emit_getgl(as, r, jit_L); | 2708 | emit_rmro(as, XO_MOV, r|REX_GC64, r, offsetof(lua_State, maxstack)); |
2709 | emit_getgl(as, r, cur_L); | ||
2369 | if (allow == RSET_EMPTY) /* Spill temp. register. */ | 2710 | if (allow == RSET_EMPTY) /* Spill temp. register. */ |
2370 | emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); | 2711 | emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); |
2371 | } | 2712 | } |
@@ -2374,40 +2715,79 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
2374 | static void asm_stack_restore(ASMState *as, SnapShot *snap) | 2715 | static void asm_stack_restore(ASMState *as, SnapShot *snap) |
2375 | { | 2716 | { |
2376 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 2717 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
2377 | SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; | 2718 | #if !LJ_FR2 || defined(LUA_USE_ASSERT) |
2719 | SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; | ||
2720 | #endif | ||
2378 | MSize n, nent = snap->nent; | 2721 | MSize n, nent = snap->nent; |
2379 | /* Store the value of all modified slots to the Lua stack. */ | 2722 | /* Store the value of all modified slots to the Lua stack. */ |
2380 | for (n = 0; n < nent; n++) { | 2723 | for (n = 0; n < nent; n++) { |
2381 | SnapEntry sn = map[n]; | 2724 | SnapEntry sn = map[n]; |
2382 | BCReg s = snap_slot(sn); | 2725 | BCReg s = snap_slot(sn); |
2383 | int32_t ofs = 8*((int32_t)s-1); | 2726 | int32_t ofs = 8*((int32_t)s-1-LJ_FR2); |
2384 | IRRef ref = snap_ref(sn); | 2727 | IRRef ref = snap_ref(sn); |
2385 | IRIns *ir = IR(ref); | 2728 | IRIns *ir = IR(ref); |
2386 | if ((sn & SNAP_NORESTORE)) | 2729 | if ((sn & SNAP_NORESTORE)) |
2387 | continue; | 2730 | continue; |
2388 | if (irt_isnum(ir->t)) { | 2731 | if ((sn & SNAP_KEYINDEX)) { |
2732 | emit_movmroi(as, RID_BASE, ofs+4, LJ_KEYINDEX); | ||
2733 | if (irref_isk(ref)) { | ||
2734 | emit_movmroi(as, RID_BASE, ofs, ir->i); | ||
2735 | } else { | ||
2736 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | ||
2737 | emit_movtomro(as, src, RID_BASE, ofs); | ||
2738 | } | ||
2739 | } else if (irt_isnum(ir->t)) { | ||
2389 | Reg src = ra_alloc1(as, ref, RSET_FPR); | 2740 | Reg src = ra_alloc1(as, ref, RSET_FPR); |
2390 | emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); | 2741 | emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); |
2391 | } else { | 2742 | } else { |
2392 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || | 2743 | lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || |
2393 | (LJ_DUALNUM && irt_isinteger(ir->t))); | 2744 | (LJ_DUALNUM && irt_isinteger(ir->t)), |
2745 | "restore of IR type %d", irt_type(ir->t)); | ||
2394 | if (!irref_isk(ref)) { | 2746 | if (!irref_isk(ref)) { |
2395 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | 2747 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); |
2748 | #if LJ_GC64 | ||
2749 | if (irt_is64(ir->t)) { | ||
2750 | /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ | ||
2751 | emit_u32(as, irt_toitype(ir->t) << 15); | ||
2752 | emit_rmro(as, XO_ARITHi, XOg_OR, RID_BASE, ofs+4); | ||
2753 | } else if (LJ_DUALNUM && irt_isinteger(ir->t)) { | ||
2754 | emit_movmroi(as, RID_BASE, ofs+4, LJ_TISNUM << 15); | ||
2755 | } else { | ||
2756 | emit_movmroi(as, RID_BASE, ofs+4, (irt_toitype(ir->t)<<15)|0x7fff); | ||
2757 | } | ||
2758 | #endif | ||
2396 | emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); | 2759 | emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); |
2760 | #if LJ_GC64 | ||
2761 | } else { | ||
2762 | TValue k; | ||
2763 | lj_ir_kvalue(as->J->L, &k, ir); | ||
2764 | if (tvisnil(&k)) { | ||
2765 | emit_i32(as, -1); | ||
2766 | emit_rmro(as, XO_MOVmi, REX_64, RID_BASE, ofs); | ||
2767 | } else { | ||
2768 | emit_movmroi(as, RID_BASE, ofs+4, k.u32.hi); | ||
2769 | emit_movmroi(as, RID_BASE, ofs, k.u32.lo); | ||
2770 | } | ||
2771 | #else | ||
2397 | } else if (!irt_ispri(ir->t)) { | 2772 | } else if (!irt_ispri(ir->t)) { |
2398 | emit_movmroi(as, RID_BASE, ofs, ir->i); | 2773 | emit_movmroi(as, RID_BASE, ofs, ir->i); |
2774 | #endif | ||
2399 | } | 2775 | } |
2400 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { | 2776 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
2777 | #if !LJ_FR2 | ||
2401 | if (s != 0) /* Do not overwrite link to previous frame. */ | 2778 | if (s != 0) /* Do not overwrite link to previous frame. */ |
2402 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); | 2779 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); |
2780 | #endif | ||
2781 | #if !LJ_GC64 | ||
2403 | } else { | 2782 | } else { |
2404 | if (!(LJ_64 && irt_islightud(ir->t))) | 2783 | if (!(LJ_64 && irt_islightud(ir->t))) |
2405 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); | 2784 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); |
2785 | #endif | ||
2406 | } | 2786 | } |
2407 | } | 2787 | } |
2408 | checkmclim(as); | 2788 | checkmclim(as); |
2409 | } | 2789 | } |
2410 | lua_assert(map + nent == flinks); | 2790 | lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); |
2411 | } | 2791 | } |
2412 | 2792 | ||
2413 | /* -- GC handling --------------------------------------------------------- */ | 2793 | /* -- GC handling --------------------------------------------------------- */ |
@@ -2428,11 +2808,15 @@ static void asm_gc_check(ASMState *as) | |||
2428 | args[1] = ASMREF_TMP2; /* MSize steps */ | 2808 | args[1] = ASMREF_TMP2; /* MSize steps */ |
2429 | asm_gencall(as, ci, args); | 2809 | asm_gencall(as, ci, args); |
2430 | tmp = ra_releasetmp(as, ASMREF_TMP1); | 2810 | tmp = ra_releasetmp(as, ASMREF_TMP1); |
2811 | #if LJ_GC64 | ||
2812 | emit_rmro(as, XO_LEA, tmp|REX_64, RID_DISPATCH, GG_DISP2G); | ||
2813 | #else | ||
2431 | emit_loada(as, tmp, J2G(as->J)); | 2814 | emit_loada(as, tmp, J2G(as->J)); |
2815 | #endif | ||
2432 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps); | 2816 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps); |
2433 | /* Jump around GC step if GC total < GC threshold. */ | 2817 | /* Jump around GC step if GC total < GC threshold. */ |
2434 | emit_sjcc(as, CC_B, l_end); | 2818 | emit_sjcc(as, CC_B, l_end); |
2435 | emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold); | 2819 | emit_opgl(as, XO_ARITH(XOg_CMP), tmp|REX_GC64, gc.threshold); |
2436 | emit_getgl(as, tmp, gc.total); | 2820 | emit_getgl(as, tmp, gc.total); |
2437 | as->gcsteps = 0; | 2821 | as->gcsteps = 0; |
2438 | checkmclim(as); | 2822 | checkmclim(as); |
@@ -2447,16 +2831,16 @@ static void asm_loop_fixup(ASMState *as) | |||
2447 | MCode *target = as->mcp; | 2831 | MCode *target = as->mcp; |
2448 | if (as->realign) { /* Realigned loops use short jumps. */ | 2832 | if (as->realign) { /* Realigned loops use short jumps. */ |
2449 | as->realign = NULL; /* Stop another retry. */ | 2833 | as->realign = NULL; /* Stop another retry. */ |
2450 | lua_assert(((intptr_t)target & 15) == 0); | 2834 | lj_assertA(((intptr_t)target & 15) == 0, "loop realign failed"); |
2451 | if (as->loopinv) { /* Inverted loop branch? */ | 2835 | if (as->loopinv) { /* Inverted loop branch? */ |
2452 | p -= 5; | 2836 | p -= 5; |
2453 | p[0] = XI_JMP; | 2837 | p[0] = XI_JMP; |
2454 | lua_assert(target - p >= -128); | 2838 | lj_assertA(target - p >= -128, "loop realign failed"); |
2455 | p[-1] = (MCode)(target - p); /* Patch sjcc. */ | 2839 | p[-1] = (MCode)(target - p); /* Patch sjcc. */ |
2456 | if (as->loopinv == 2) | 2840 | if (as->loopinv == 2) |
2457 | p[-3] = (MCode)(target - p + 2); /* Patch opt. short jp. */ | 2841 | p[-3] = (MCode)(target - p + 2); /* Patch opt. short jp. */ |
2458 | } else { | 2842 | } else { |
2459 | lua_assert(target - p >= -128); | 2843 | lj_assertA(target - p >= -128, "loop realign failed"); |
2460 | p[-1] = (MCode)(int8_t)(target - p); /* Patch short jmp. */ | 2844 | p[-1] = (MCode)(int8_t)(target - p); /* Patch short jmp. */ |
2461 | p[-2] = XI_JMPs; | 2845 | p[-2] = XI_JMPs; |
2462 | } | 2846 | } |
@@ -2485,6 +2869,12 @@ static void asm_loop_fixup(ASMState *as) | |||
2485 | } | 2869 | } |
2486 | } | 2870 | } |
2487 | 2871 | ||
2872 | /* Fixup the tail of the loop. */ | ||
2873 | static void asm_loop_tail_fixup(ASMState *as) | ||
2874 | { | ||
2875 | UNUSED(as); /* Nothing to do. */ | ||
2876 | } | ||
2877 | |||
2488 | /* -- Head of trace ------------------------------------------------------- */ | 2878 | /* -- Head of trace ------------------------------------------------------- */ |
2489 | 2879 | ||
2490 | /* Coalesce BASE register for a root trace. */ | 2880 | /* Coalesce BASE register for a root trace. */ |
@@ -2497,7 +2887,7 @@ static void asm_head_root_base(ASMState *as) | |||
2497 | if (rset_test(as->modset, r) || irt_ismarked(ir->t)) | 2887 | if (rset_test(as->modset, r) || irt_ismarked(ir->t)) |
2498 | ir->r = RID_INIT; /* No inheritance for modified BASE register. */ | 2888 | ir->r = RID_INIT; /* No inheritance for modified BASE register. */ |
2499 | if (r != RID_BASE) | 2889 | if (r != RID_BASE) |
2500 | emit_rr(as, XO_MOV, r, RID_BASE); | 2890 | emit_rr(as, XO_MOV, r|REX_GC64, RID_BASE); |
2501 | } | 2891 | } |
2502 | } | 2892 | } |
2503 | 2893 | ||
@@ -2513,8 +2903,9 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) | |||
2513 | if (irp->r == r) { | 2903 | if (irp->r == r) { |
2514 | rset_clear(allow, r); /* Mark same BASE register as coalesced. */ | 2904 | rset_clear(allow, r); /* Mark same BASE register as coalesced. */ |
2515 | } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { | 2905 | } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { |
2906 | /* Move from coalesced parent reg. */ | ||
2516 | rset_clear(allow, irp->r); | 2907 | rset_clear(allow, irp->r); |
2517 | emit_rr(as, XO_MOV, r, irp->r); /* Move from coalesced parent reg. */ | 2908 | emit_rr(as, XO_MOV, r|REX_GC64, irp->r); |
2518 | } else { | 2909 | } else { |
2519 | emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ | 2910 | emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ |
2520 | } | 2911 | } |
@@ -2532,7 +2923,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |||
2532 | MCode *target, *q; | 2923 | MCode *target, *q; |
2533 | int32_t spadj = as->T->spadjust; | 2924 | int32_t spadj = as->T->spadjust; |
2534 | if (spadj == 0) { | 2925 | if (spadj == 0) { |
2535 | p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0); | 2926 | p -= LJ_64 ? 7 : 6; |
2536 | } else { | 2927 | } else { |
2537 | MCode *p1; | 2928 | MCode *p1; |
2538 | /* Patch stack adjustment. */ | 2929 | /* Patch stack adjustment. */ |
@@ -2544,24 +2935,15 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |||
2544 | p1 = p-9; | 2935 | p1 = p-9; |
2545 | *(int32_t *)p1 = spadj; | 2936 | *(int32_t *)p1 = spadj; |
2546 | } | 2937 | } |
2547 | if ((as->flags & JIT_F_LEA_AGU)) { | ||
2548 | #if LJ_64 | ||
2549 | p1[-4] = 0x48; | ||
2550 | #endif | ||
2551 | p1[-3] = (MCode)XI_LEA; | ||
2552 | p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP); | ||
2553 | p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP); | ||
2554 | } else { | ||
2555 | #if LJ_64 | 2938 | #if LJ_64 |
2556 | p1[-3] = 0x48; | 2939 | p1[-3] = 0x48; |
2557 | #endif | 2940 | #endif |
2558 | p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); | 2941 | p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); |
2559 | p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); | 2942 | p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); |
2560 | } | ||
2561 | } | 2943 | } |
2562 | /* Patch exit branch. */ | 2944 | /* Patch exit branch. */ |
2563 | target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; | 2945 | target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; |
2564 | *(int32_t *)(p-4) = jmprel(p, target); | 2946 | *(int32_t *)(p-4) = jmprel(as->J, p, target); |
2565 | p[-5] = XI_JMP; | 2947 | p[-5] = XI_JMP; |
2566 | /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */ | 2948 | /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */ |
2567 | for (q = as->mctop-1; q >= p; q--) | 2949 | for (q = as->mctop-1; q >= p; q--) |
@@ -2588,168 +2970,11 @@ static void asm_tail_prep(ASMState *as) | |||
2588 | as->invmcp = as->mcp = p; | 2970 | as->invmcp = as->mcp = p; |
2589 | } else { | 2971 | } else { |
2590 | /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ | 2972 | /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ |
2591 | as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0)); | 2973 | as->mcp = p - (LJ_64 ? 7 : 6); |
2592 | as->invmcp = NULL; | 2974 | as->invmcp = NULL; |
2593 | } | 2975 | } |
2594 | } | 2976 | } |
2595 | 2977 | ||
2596 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
2597 | |||
2598 | /* Assemble a single instruction. */ | ||
2599 | static void asm_ir(ASMState *as, IRIns *ir) | ||
2600 | { | ||
2601 | switch ((IROp)ir->o) { | ||
2602 | /* Miscellaneous ops. */ | ||
2603 | case IR_LOOP: asm_loop(as); break; | ||
2604 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
2605 | case IR_USE: | ||
2606 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
2607 | case IR_PHI: asm_phi(as, ir); break; | ||
2608 | case IR_HIOP: asm_hiop(as, ir); break; | ||
2609 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
2610 | |||
2611 | /* Guarded assertions. */ | ||
2612 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
2613 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
2614 | case IR_EQ: case IR_NE: case IR_ABC: | ||
2615 | asm_comp(as, ir, asm_compmap[ir->o]); | ||
2616 | break; | ||
2617 | |||
2618 | case IR_RETF: asm_retf(as, ir); break; | ||
2619 | |||
2620 | /* Bit ops. */ | ||
2621 | case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break; | ||
2622 | case IR_BSWAP: asm_bitswap(as, ir); break; | ||
2623 | |||
2624 | case IR_BAND: asm_intarith(as, ir, XOg_AND); break; | ||
2625 | case IR_BOR: asm_intarith(as, ir, XOg_OR); break; | ||
2626 | case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break; | ||
2627 | |||
2628 | case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break; | ||
2629 | case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break; | ||
2630 | case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break; | ||
2631 | case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break; | ||
2632 | case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break; | ||
2633 | |||
2634 | /* Arithmetic ops. */ | ||
2635 | case IR_ADD: asm_add(as, ir); break; | ||
2636 | case IR_SUB: | ||
2637 | if (irt_isnum(ir->t)) | ||
2638 | asm_fparith(as, ir, XO_SUBSD); | ||
2639 | else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ | ||
2640 | asm_intarith(as, ir, XOg_SUB); | ||
2641 | break; | ||
2642 | case IR_MUL: | ||
2643 | if (irt_isnum(ir->t)) | ||
2644 | asm_fparith(as, ir, XO_MULSD); | ||
2645 | else | ||
2646 | asm_intarith(as, ir, XOg_X_IMUL); | ||
2647 | break; | ||
2648 | case IR_DIV: | ||
2649 | #if LJ_64 && LJ_HASFFI | ||
2650 | if (!irt_isnum(ir->t)) | ||
2651 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : | ||
2652 | IRCALL_lj_carith_divu64); | ||
2653 | else | ||
2654 | #endif | ||
2655 | asm_fparith(as, ir, XO_DIVSD); | ||
2656 | break; | ||
2657 | case IR_MOD: | ||
2658 | #if LJ_64 && LJ_HASFFI | ||
2659 | if (!irt_isint(ir->t)) | ||
2660 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : | ||
2661 | IRCALL_lj_carith_modu64); | ||
2662 | else | ||
2663 | #endif | ||
2664 | asm_intmod(as, ir); | ||
2665 | break; | ||
2666 | |||
2667 | case IR_NEG: | ||
2668 | if (irt_isnum(ir->t)) | ||
2669 | asm_fparith(as, ir, XO_XORPS); | ||
2670 | else | ||
2671 | asm_neg_not(as, ir, XOg_NEG); | ||
2672 | break; | ||
2673 | case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break; | ||
2674 | |||
2675 | case IR_MIN: | ||
2676 | if (irt_isnum(ir->t)) | ||
2677 | asm_fparith(as, ir, XO_MINSD); | ||
2678 | else | ||
2679 | asm_min_max(as, ir, CC_G); | ||
2680 | break; | ||
2681 | case IR_MAX: | ||
2682 | if (irt_isnum(ir->t)) | ||
2683 | asm_fparith(as, ir, XO_MAXSD); | ||
2684 | else | ||
2685 | asm_min_max(as, ir, CC_L); | ||
2686 | break; | ||
2687 | |||
2688 | case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: | ||
2689 | asm_fpmath(as, ir); | ||
2690 | break; | ||
2691 | case IR_POW: | ||
2692 | #if LJ_64 && LJ_HASFFI | ||
2693 | if (!irt_isnum(ir->t)) | ||
2694 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
2695 | IRCALL_lj_carith_powu64); | ||
2696 | else | ||
2697 | #endif | ||
2698 | asm_fppowi(as, ir); | ||
2699 | break; | ||
2700 | |||
2701 | /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ | ||
2702 | case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; | ||
2703 | case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break; | ||
2704 | case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break; | ||
2705 | |||
2706 | /* Memory references. */ | ||
2707 | case IR_AREF: asm_aref(as, ir); break; | ||
2708 | case IR_HREF: asm_href(as, ir); break; | ||
2709 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
2710 | case IR_NEWREF: asm_newref(as, ir); break; | ||
2711 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
2712 | case IR_FREF: asm_fref(as, ir); break; | ||
2713 | case IR_STRREF: asm_strref(as, ir); break; | ||
2714 | |||
2715 | /* Loads and stores. */ | ||
2716 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
2717 | asm_ahuvload(as, ir); | ||
2718 | break; | ||
2719 | case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break; | ||
2720 | case IR_SLOAD: asm_sload(as, ir); break; | ||
2721 | |||
2722 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
2723 | case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break; | ||
2724 | |||
2725 | /* Allocations. */ | ||
2726 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
2727 | case IR_TNEW: asm_tnew(as, ir); break; | ||
2728 | case IR_TDUP: asm_tdup(as, ir); break; | ||
2729 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
2730 | |||
2731 | /* Write barriers. */ | ||
2732 | case IR_TBAR: asm_tbar(as, ir); break; | ||
2733 | case IR_OBAR: asm_obar(as, ir); break; | ||
2734 | |||
2735 | /* Type conversions. */ | ||
2736 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
2737 | case IR_CONV: asm_conv(as, ir); break; | ||
2738 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
2739 | case IR_STRTO: asm_strto(as, ir); break; | ||
2740 | |||
2741 | /* Calls. */ | ||
2742 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
2743 | case IR_CALLXS: asm_callx(as, ir); break; | ||
2744 | case IR_CARG: break; | ||
2745 | |||
2746 | default: | ||
2747 | setintV(&as->J->errinfo, ir->o); | ||
2748 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
2749 | break; | ||
2750 | } | ||
2751 | } | ||
2752 | |||
2753 | /* -- Trace setup --------------------------------------------------------- */ | 2978 | /* -- Trace setup --------------------------------------------------------- */ |
2754 | 2979 | ||
2755 | /* Ensure there are enough stack slots for call arguments. */ | 2980 | /* Ensure there are enough stack slots for call arguments. */ |
@@ -2772,6 +2997,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
2772 | static void asm_setup_target(ASMState *as) | 2997 | static void asm_setup_target(ASMState *as) |
2773 | { | 2998 | { |
2774 | asm_exitstub_setup(as, as->T->nsnap); | 2999 | asm_exitstub_setup(as, as->T->nsnap); |
3000 | as->mrm.base = 0; | ||
2775 | } | 3001 | } |
2776 | 3002 | ||
2777 | /* -- Trace patching ------------------------------------------------------ */ | 3003 | /* -- Trace patching ------------------------------------------------------ */ |
@@ -2885,18 +3111,24 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | |||
2885 | MCode *px = exitstub_addr(J, exitno) - 6; | 3111 | MCode *px = exitstub_addr(J, exitno) - 6; |
2886 | MCode *pe = p+len-6; | 3112 | MCode *pe = p+len-6; |
2887 | MCode *pgc = NULL; | 3113 | MCode *pgc = NULL; |
2888 | uint32_t stateaddr = u32ptr(&J2G(J)->vmstate); | 3114 | #if LJ_GC64 |
3115 | uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch)); | ||
3116 | #else | ||
3117 | uint32_t statei = u32ptr(&J2G(J)->vmstate); | ||
3118 | #endif | ||
2889 | if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) | 3119 | if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) |
2890 | *(int32_t *)(p+len-4) = jmprel(p+len, target); | 3120 | *(int32_t *)(p+len-4) = jmprel(J, p+len, target); |
2891 | /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ | 3121 | /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ |
2892 | for (; p < pe; p += asm_x86_inslen(p)) | 3122 | for (; p < pe; p += asm_x86_inslen(p)) { |
2893 | if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) | 3123 | intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64; |
3124 | if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi) | ||
2894 | break; | 3125 | break; |
2895 | lua_assert(p < pe); | 3126 | } |
3127 | lj_assertJ(p < pe, "instruction length decoder failed"); | ||
2896 | for (; p < pe; p += asm_x86_inslen(p)) { | 3128 | for (; p < pe; p += asm_x86_inslen(p)) { |
2897 | if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px && | 3129 | if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px && |
2898 | p != pgc) { | 3130 | p != pgc) { |
2899 | *(int32_t *)(p+2) = jmprel(p+6, target); | 3131 | *(int32_t *)(p+2) = jmprel(J, p+6, target); |
2900 | } else if (*p == XI_CALL && | 3132 | } else if (*p == XI_CALL && |
2901 | (void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) { | 3133 | (void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) { |
2902 | pgc = p+7; /* Do not patch GC check exit. */ | 3134 | pgc = p+7; /* Do not patch GC check exit. */ |
diff --git a/src/lj_assert.c b/src/lj_assert.c new file mode 100644 index 00000000..4b713b2b --- /dev/null +++ b/src/lj_assert.c | |||
@@ -0,0 +1,28 @@ | |||
1 | /* | ||
2 | ** Internal assertions. | ||
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_assert_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) | ||
10 | |||
11 | #include <stdio.h> | ||
12 | |||
13 | #include "lj_obj.h" | ||
14 | |||
15 | void lj_assert_fail(global_State *g, const char *file, int line, | ||
16 | const char *func, const char *fmt, ...) | ||
17 | { | ||
18 | va_list argp; | ||
19 | va_start(argp, fmt); | ||
20 | fprintf(stderr, "LuaJIT ASSERT %s:%d: %s: ", file, line, func); | ||
21 | vfprintf(stderr, fmt, argp); | ||
22 | fputc('\n', stderr); | ||
23 | va_end(argp); | ||
24 | UNUSED(g); /* May be NULL. TODO: optionally dump state. */ | ||
25 | abort(); | ||
26 | } | ||
27 | |||
28 | #endif | ||
diff --git a/src/lj_bc.h b/src/lj_bc.h index 22c43caa..02356e5b 100644 --- a/src/lj_bc.h +++ b/src/lj_bc.h | |||
@@ -89,6 +89,8 @@ | |||
89 | _(ISFC, dst, ___, var, ___) \ | 89 | _(ISFC, dst, ___, var, ___) \ |
90 | _(IST, ___, ___, var, ___) \ | 90 | _(IST, ___, ___, var, ___) \ |
91 | _(ISF, ___, ___, var, ___) \ | 91 | _(ISF, ___, ___, var, ___) \ |
92 | _(ISTYPE, var, ___, lit, ___) \ | ||
93 | _(ISNUM, var, ___, lit, ___) \ | ||
92 | \ | 94 | \ |
93 | /* Unary ops. */ \ | 95 | /* Unary ops. */ \ |
94 | _(MOV, dst, ___, var, ___) \ | 96 | _(MOV, dst, ___, var, ___) \ |
@@ -143,10 +145,12 @@ | |||
143 | _(TGETV, dst, var, var, index) \ | 145 | _(TGETV, dst, var, var, index) \ |
144 | _(TGETS, dst, var, str, index) \ | 146 | _(TGETS, dst, var, str, index) \ |
145 | _(TGETB, dst, var, lit, index) \ | 147 | _(TGETB, dst, var, lit, index) \ |
148 | _(TGETR, dst, var, var, index) \ | ||
146 | _(TSETV, var, var, var, newindex) \ | 149 | _(TSETV, var, var, var, newindex) \ |
147 | _(TSETS, var, var, str, newindex) \ | 150 | _(TSETS, var, var, str, newindex) \ |
148 | _(TSETB, var, var, lit, newindex) \ | 151 | _(TSETB, var, var, lit, newindex) \ |
149 | _(TSETM, base, ___, num, newindex) \ | 152 | _(TSETM, base, ___, num, newindex) \ |
153 | _(TSETR, var, var, var, newindex) \ | ||
150 | \ | 154 | \ |
151 | /* Calls and vararg handling. T = tail call. */ \ | 155 | /* Calls and vararg handling. T = tail call. */ \ |
152 | _(CALLM, base, lit, lit, call) \ | 156 | _(CALLM, base, lit, lit, call) \ |
diff --git a/src/lj_bcdump.h b/src/lj_bcdump.h index 8ca62f80..69da16e9 100644 --- a/src/lj_bcdump.h +++ b/src/lj_bcdump.h | |||
@@ -36,14 +36,15 @@ | |||
36 | /* If you perform *any* kind of private modifications to the bytecode itself | 36 | /* If you perform *any* kind of private modifications to the bytecode itself |
37 | ** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher. | 37 | ** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher. |
38 | */ | 38 | */ |
39 | #define BCDUMP_VERSION 1 | 39 | #define BCDUMP_VERSION 2 |
40 | 40 | ||
41 | /* Compatibility flags. */ | 41 | /* Compatibility flags. */ |
42 | #define BCDUMP_F_BE 0x01 | 42 | #define BCDUMP_F_BE 0x01 |
43 | #define BCDUMP_F_STRIP 0x02 | 43 | #define BCDUMP_F_STRIP 0x02 |
44 | #define BCDUMP_F_FFI 0x04 | 44 | #define BCDUMP_F_FFI 0x04 |
45 | #define BCDUMP_F_FR2 0x08 | ||
45 | 46 | ||
46 | #define BCDUMP_F_KNOWN (BCDUMP_F_FFI*2-1) | 47 | #define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1) |
47 | 48 | ||
48 | /* Type codes for the GC constants of a prototype. Plus length for strings. */ | 49 | /* Type codes for the GC constants of a prototype. Plus length for strings. */ |
49 | enum { | 50 | enum { |
@@ -61,6 +62,7 @@ enum { | |||
61 | 62 | ||
62 | LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, | 63 | LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, |
63 | void *data, int strip); | 64 | void *data, int strip); |
65 | LJ_FUNC GCproto *lj_bcread_proto(LexState *ls); | ||
64 | LJ_FUNC GCproto *lj_bcread(LexState *ls); | 66 | LJ_FUNC GCproto *lj_bcread(LexState *ls); |
65 | 67 | ||
66 | #endif | 68 | #endif |
diff --git a/src/lj_bcread.c b/src/lj_bcread.c index 4a925f1c..2ce05707 100644 --- a/src/lj_bcread.c +++ b/src/lj_bcread.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include "lj_obj.h" | 9 | #include "lj_obj.h" |
10 | #include "lj_gc.h" | 10 | #include "lj_gc.h" |
11 | #include "lj_err.h" | 11 | #include "lj_err.h" |
12 | #include "lj_buf.h" | ||
12 | #include "lj_str.h" | 13 | #include "lj_str.h" |
13 | #include "lj_tab.h" | 14 | #include "lj_tab.h" |
14 | #include "lj_bc.h" | 15 | #include "lj_bc.h" |
@@ -20,6 +21,7 @@ | |||
20 | #include "lj_lex.h" | 21 | #include "lj_lex.h" |
21 | #include "lj_bcdump.h" | 22 | #include "lj_bcdump.h" |
22 | #include "lj_state.h" | 23 | #include "lj_state.h" |
24 | #include "lj_strfmt.h" | ||
23 | 25 | ||
24 | /* Reuse some lexer fields for our own purposes. */ | 26 | /* Reuse some lexer fields for our own purposes. */ |
25 | #define bcread_flags(ls) ls->level | 27 | #define bcread_flags(ls) ls->level |
@@ -38,85 +40,74 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em) | |||
38 | const char *name = ls->chunkarg; | 40 | const char *name = ls->chunkarg; |
39 | if (*name == BCDUMP_HEAD1) name = "(binary)"; | 41 | if (*name == BCDUMP_HEAD1) name = "(binary)"; |
40 | else if (*name == '@' || *name == '=') name++; | 42 | else if (*name == '@' || *name == '=') name++; |
41 | lj_str_pushf(L, "%s: %s", name, err2msg(em)); | 43 | lj_strfmt_pushf(L, "%s: %s", name, err2msg(em)); |
42 | lj_err_throw(L, LUA_ERRSYNTAX); | 44 | lj_err_throw(L, LUA_ERRSYNTAX); |
43 | } | 45 | } |
44 | 46 | ||
45 | /* Resize input buffer. */ | 47 | /* Refill buffer. */ |
46 | static void bcread_resize(LexState *ls, MSize len) | ||
47 | { | ||
48 | if (ls->sb.sz < len) { | ||
49 | MSize sz = ls->sb.sz * 2; | ||
50 | while (len > sz) sz = sz * 2; | ||
51 | lj_str_resizebuf(ls->L, &ls->sb, sz); | ||
52 | /* Caveat: this may change ls->sb.buf which may affect ls->p. */ | ||
53 | } | ||
54 | } | ||
55 | |||
56 | /* Refill buffer if needed. */ | ||
57 | static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) | 48 | static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) |
58 | { | 49 | { |
59 | lua_assert(len != 0); | 50 | lj_assertLS(len != 0, "empty refill"); |
60 | if (len > LJ_MAX_MEM || ls->current < 0) | 51 | if (len > LJ_MAX_BUF || ls->c < 0) |
61 | bcread_error(ls, LJ_ERR_BCBAD); | 52 | bcread_error(ls, LJ_ERR_BCBAD); |
62 | do { | 53 | do { |
63 | const char *buf; | 54 | const char *buf; |
64 | size_t size; | 55 | size_t sz; |
65 | if (ls->n) { /* Copy remainder to buffer. */ | 56 | char *p = ls->sb.b; |
66 | if (ls->sb.n) { /* Move down in buffer. */ | 57 | MSize n = (MSize)(ls->pe - ls->p); |
67 | lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n); | 58 | if (n) { /* Copy remainder to buffer. */ |
68 | if (ls->n != ls->sb.n) | 59 | if (sbuflen(&ls->sb)) { /* Move down in buffer. */ |
69 | memmove(ls->sb.buf, ls->p, ls->n); | 60 | lj_assertLS(ls->pe == ls->sb.w, "bad buffer pointer"); |
61 | if (ls->p != p) memmove(p, ls->p, n); | ||
70 | } else { /* Copy from buffer provided by reader. */ | 62 | } else { /* Copy from buffer provided by reader. */ |
71 | bcread_resize(ls, len); | 63 | p = lj_buf_need(&ls->sb, len); |
72 | memcpy(ls->sb.buf, ls->p, ls->n); | 64 | memcpy(p, ls->p, n); |
73 | } | 65 | } |
74 | ls->p = ls->sb.buf; | 66 | ls->p = p; |
67 | ls->pe = p + n; | ||
75 | } | 68 | } |
76 | ls->sb.n = ls->n; | 69 | ls->sb.w = p + n; |
77 | buf = ls->rfunc(ls->L, ls->rdata, &size); /* Get more data from reader. */ | 70 | buf = ls->rfunc(ls->L, ls->rdata, &sz); /* Get more data from reader. */ |
78 | if (buf == NULL || size == 0) { /* EOF? */ | 71 | if (buf == NULL || sz == 0) { /* EOF? */ |
79 | if (need) bcread_error(ls, LJ_ERR_BCBAD); | 72 | if (need) bcread_error(ls, LJ_ERR_BCBAD); |
80 | ls->current = -1; /* Only bad if we get called again. */ | 73 | ls->c = -1; /* Only bad if we get called again. */ |
81 | break; | 74 | break; |
82 | } | 75 | } |
83 | if (size >= LJ_MAX_MEM - ls->sb.n) lj_err_mem(ls->L); | 76 | if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L); |
84 | if (ls->sb.n) { /* Append to buffer. */ | 77 | if (n) { /* Append to buffer. */ |
85 | MSize n = ls->sb.n + (MSize)size; | 78 | n += (MSize)sz; |
86 | bcread_resize(ls, n < len ? len : n); | 79 | p = lj_buf_need(&ls->sb, n < len ? len : n); |
87 | memcpy(ls->sb.buf + ls->sb.n, buf, size); | 80 | memcpy(ls->sb.w, buf, sz); |
88 | ls->n = ls->sb.n = n; | 81 | ls->sb.w = p + n; |
89 | ls->p = ls->sb.buf; | 82 | ls->p = p; |
83 | ls->pe = p + n; | ||
90 | } else { /* Return buffer provided by reader. */ | 84 | } else { /* Return buffer provided by reader. */ |
91 | ls->n = (MSize)size; | ||
92 | ls->p = buf; | 85 | ls->p = buf; |
86 | ls->pe = buf + sz; | ||
93 | } | 87 | } |
94 | } while (ls->n < len); | 88 | } while ((MSize)(ls->pe - ls->p) < len); |
95 | } | 89 | } |
96 | 90 | ||
97 | /* Need a certain number of bytes. */ | 91 | /* Need a certain number of bytes. */ |
98 | static LJ_AINLINE void bcread_need(LexState *ls, MSize len) | 92 | static LJ_AINLINE void bcread_need(LexState *ls, MSize len) |
99 | { | 93 | { |
100 | if (LJ_UNLIKELY(ls->n < len)) | 94 | if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len)) |
101 | bcread_fill(ls, len, 1); | 95 | bcread_fill(ls, len, 1); |
102 | } | 96 | } |
103 | 97 | ||
104 | /* Want to read up to a certain number of bytes, but may need less. */ | 98 | /* Want to read up to a certain number of bytes, but may need less. */ |
105 | static LJ_AINLINE void bcread_want(LexState *ls, MSize len) | 99 | static LJ_AINLINE void bcread_want(LexState *ls, MSize len) |
106 | { | 100 | { |
107 | if (LJ_UNLIKELY(ls->n < len)) | 101 | if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len)) |
108 | bcread_fill(ls, len, 0); | 102 | bcread_fill(ls, len, 0); |
109 | } | 103 | } |
110 | 104 | ||
111 | #define bcread_dec(ls) check_exp(ls->n > 0, ls->n--) | ||
112 | #define bcread_consume(ls, len) check_exp(ls->n >= (len), ls->n -= (len)) | ||
113 | |||
114 | /* Return memory block from buffer. */ | 105 | /* Return memory block from buffer. */ |
115 | static uint8_t *bcread_mem(LexState *ls, MSize len) | 106 | static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len) |
116 | { | 107 | { |
117 | uint8_t *p = (uint8_t *)ls->p; | 108 | uint8_t *p = (uint8_t *)ls->p; |
118 | bcread_consume(ls, len); | 109 | ls->p += len; |
119 | ls->p = (char *)p + len; | 110 | lj_assertLS(ls->p <= ls->pe, "buffer read overflow"); |
120 | return p; | 111 | return p; |
121 | } | 112 | } |
122 | 113 | ||
@@ -129,25 +120,15 @@ static void bcread_block(LexState *ls, void *q, MSize len) | |||
129 | /* Read byte from buffer. */ | 120 | /* Read byte from buffer. */ |
130 | static LJ_AINLINE uint32_t bcread_byte(LexState *ls) | 121 | static LJ_AINLINE uint32_t bcread_byte(LexState *ls) |
131 | { | 122 | { |
132 | bcread_dec(ls); | 123 | lj_assertLS(ls->p < ls->pe, "buffer read overflow"); |
133 | return (uint32_t)(uint8_t)*ls->p++; | 124 | return (uint32_t)(uint8_t)*ls->p++; |
134 | } | 125 | } |
135 | 126 | ||
136 | /* Read ULEB128 value from buffer. */ | 127 | /* Read ULEB128 value from buffer. */ |
137 | static uint32_t bcread_uleb128(LexState *ls) | 128 | static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls) |
138 | { | 129 | { |
139 | const uint8_t *p = (const uint8_t *)ls->p; | 130 | uint32_t v = lj_buf_ruleb128(&ls->p); |
140 | uint32_t v = *p++; | 131 | lj_assertLS(ls->p <= ls->pe, "buffer read overflow"); |
141 | if (LJ_UNLIKELY(v >= 0x80)) { | ||
142 | int sh = 0; | ||
143 | v &= 0x7f; | ||
144 | do { | ||
145 | v |= ((*p & 0x7f) << (sh += 7)); | ||
146 | bcread_dec(ls); | ||
147 | } while (*p++ >= 0x80); | ||
148 | } | ||
149 | bcread_dec(ls); | ||
150 | ls->p = (char *)p; | ||
151 | return v; | 132 | return v; |
152 | } | 133 | } |
153 | 134 | ||
@@ -161,11 +142,10 @@ static uint32_t bcread_uleb128_33(LexState *ls) | |||
161 | v &= 0x3f; | 142 | v &= 0x3f; |
162 | do { | 143 | do { |
163 | v |= ((*p & 0x7f) << (sh += 7)); | 144 | v |= ((*p & 0x7f) << (sh += 7)); |
164 | bcread_dec(ls); | ||
165 | } while (*p++ >= 0x80); | 145 | } while (*p++ >= 0x80); |
166 | } | 146 | } |
167 | bcread_dec(ls); | ||
168 | ls->p = (char *)p; | 147 | ls->p = (char *)p; |
148 | lj_assertLS(ls->p <= ls->pe, "buffer read overflow"); | ||
169 | return v; | 149 | return v; |
170 | } | 150 | } |
171 | 151 | ||
@@ -212,8 +192,8 @@ static void bcread_ktabk(LexState *ls, TValue *o) | |||
212 | o->u32.lo = bcread_uleb128(ls); | 192 | o->u32.lo = bcread_uleb128(ls); |
213 | o->u32.hi = bcread_uleb128(ls); | 193 | o->u32.hi = bcread_uleb128(ls); |
214 | } else { | 194 | } else { |
215 | lua_assert(tp <= BCDUMP_KTAB_TRUE); | 195 | lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp); |
216 | setitype(o, ~tp); | 196 | setpriV(o, ~tp); |
217 | } | 197 | } |
218 | } | 198 | } |
219 | 199 | ||
@@ -234,7 +214,7 @@ static GCtab *bcread_ktab(LexState *ls) | |||
234 | for (i = 0; i < nhash; i++) { | 214 | for (i = 0; i < nhash; i++) { |
235 | TValue key; | 215 | TValue key; |
236 | bcread_ktabk(ls, &key); | 216 | bcread_ktabk(ls, &key); |
237 | lua_assert(!tvisnil(&key)); | 217 | lj_assertLS(!tvisnil(&key), "nil key"); |
238 | bcread_ktabk(ls, lj_tab_set(ls->L, t, &key)); | 218 | bcread_ktabk(ls, lj_tab_set(ls->L, t, &key)); |
239 | } | 219 | } |
240 | } | 220 | } |
@@ -271,7 +251,7 @@ static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc) | |||
271 | #endif | 251 | #endif |
272 | } else { | 252 | } else { |
273 | lua_State *L = ls->L; | 253 | lua_State *L = ls->L; |
274 | lua_assert(tp == BCDUMP_KGC_CHILD); | 254 | lj_assertLS(tp == BCDUMP_KGC_CHILD, "bad constant type %d", tp); |
275 | if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */ | 255 | if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */ |
276 | bcread_error(ls, LJ_ERR_BCBAD); | 256 | bcread_error(ls, LJ_ERR_BCBAD); |
277 | L->top--; | 257 | L->top--; |
@@ -327,25 +307,13 @@ static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv) | |||
327 | } | 307 | } |
328 | 308 | ||
329 | /* Read a prototype. */ | 309 | /* Read a prototype. */ |
330 | static GCproto *bcread_proto(LexState *ls) | 310 | GCproto *lj_bcread_proto(LexState *ls) |
331 | { | 311 | { |
332 | GCproto *pt; | 312 | GCproto *pt; |
333 | MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept; | 313 | MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept; |
334 | MSize ofsk, ofsuv, ofsdbg; | 314 | MSize ofsk, ofsuv, ofsdbg; |
335 | MSize sizedbg = 0; | 315 | MSize sizedbg = 0; |
336 | BCLine firstline = 0, numline = 0; | 316 | BCLine firstline = 0, numline = 0; |
337 | MSize len, startn; | ||
338 | |||
339 | /* Read length. */ | ||
340 | if (ls->n > 0 && ls->p[0] == 0) { /* Shortcut EOF. */ | ||
341 | ls->n--; ls->p++; | ||
342 | return NULL; | ||
343 | } | ||
344 | bcread_want(ls, 5); | ||
345 | len = bcread_uleb128(ls); | ||
346 | if (!len) return NULL; /* EOF */ | ||
347 | bcread_need(ls, len); | ||
348 | startn = ls->n; | ||
349 | 317 | ||
350 | /* Read prototype header. */ | 318 | /* Read prototype header. */ |
351 | flags = bcread_byte(ls); | 319 | flags = bcread_byte(ls); |
@@ -414,9 +382,6 @@ static GCproto *bcread_proto(LexState *ls) | |||
414 | setmref(pt->uvinfo, NULL); | 382 | setmref(pt->uvinfo, NULL); |
415 | setmref(pt->varinfo, NULL); | 383 | setmref(pt->varinfo, NULL); |
416 | } | 384 | } |
417 | |||
418 | if (len != startn - ls->n) | ||
419 | bcread_error(ls, LJ_ERR_BCBAD); | ||
420 | return pt; | 385 | return pt; |
421 | } | 386 | } |
422 | 387 | ||
@@ -430,14 +395,11 @@ static int bcread_header(LexState *ls) | |||
430 | bcread_byte(ls) != BCDUMP_VERSION) return 0; | 395 | bcread_byte(ls) != BCDUMP_VERSION) return 0; |
431 | bcread_flags(ls) = flags = bcread_uleb128(ls); | 396 | bcread_flags(ls) = flags = bcread_uleb128(ls); |
432 | if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0; | 397 | if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0; |
398 | if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0; | ||
433 | if ((flags & BCDUMP_F_FFI)) { | 399 | if ((flags & BCDUMP_F_FFI)) { |
434 | #if LJ_HASFFI | 400 | #if LJ_HASFFI |
435 | lua_State *L = ls->L; | 401 | lua_State *L = ls->L; |
436 | if (!ctype_ctsG(G(L))) { | 402 | ctype_loadffi(L); |
437 | ptrdiff_t oldtop = savestack(L, L->top); | ||
438 | luaopen_ffi(L); /* Load FFI library on-demand. */ | ||
439 | L->top = restorestack(L, oldtop); | ||
440 | } | ||
441 | #else | 403 | #else |
442 | return 0; | 404 | return 0; |
443 | #endif | 405 | #endif |
@@ -456,19 +418,33 @@ static int bcread_header(LexState *ls) | |||
456 | GCproto *lj_bcread(LexState *ls) | 418 | GCproto *lj_bcread(LexState *ls) |
457 | { | 419 | { |
458 | lua_State *L = ls->L; | 420 | lua_State *L = ls->L; |
459 | lua_assert(ls->current == BCDUMP_HEAD1); | 421 | lj_assertLS(ls->c == BCDUMP_HEAD1, "bad bytecode header"); |
460 | bcread_savetop(L, ls, L->top); | 422 | bcread_savetop(L, ls, L->top); |
461 | lj_str_resetbuf(&ls->sb); | 423 | lj_buf_reset(&ls->sb); |
462 | /* Check for a valid bytecode dump header. */ | 424 | /* Check for a valid bytecode dump header. */ |
463 | if (!bcread_header(ls)) | 425 | if (!bcread_header(ls)) |
464 | bcread_error(ls, LJ_ERR_BCFMT); | 426 | bcread_error(ls, LJ_ERR_BCFMT); |
465 | for (;;) { /* Process all prototypes in the bytecode dump. */ | 427 | for (;;) { /* Process all prototypes in the bytecode dump. */ |
466 | GCproto *pt = bcread_proto(ls); | 428 | GCproto *pt; |
467 | if (!pt) break; | 429 | MSize len; |
430 | const char *startp; | ||
431 | /* Read length. */ | ||
432 | if (ls->p < ls->pe && ls->p[0] == 0) { /* Shortcut EOF. */ | ||
433 | ls->p++; | ||
434 | break; | ||
435 | } | ||
436 | bcread_want(ls, 5); | ||
437 | len = bcread_uleb128(ls); | ||
438 | if (!len) break; /* EOF */ | ||
439 | bcread_need(ls, len); | ||
440 | startp = ls->p; | ||
441 | pt = lj_bcread_proto(ls); | ||
442 | if (ls->p != startp + len) | ||
443 | bcread_error(ls, LJ_ERR_BCBAD); | ||
468 | setprotoV(L, L->top, pt); | 444 | setprotoV(L, L->top, pt); |
469 | incr_top(L); | 445 | incr_top(L); |
470 | } | 446 | } |
471 | if ((ls->n && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls)) | 447 | if ((ls->pe != ls->p && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls)) |
472 | bcread_error(ls, LJ_ERR_BCBAD); | 448 | bcread_error(ls, LJ_ERR_BCBAD); |
473 | /* Pop off last prototype. */ | 449 | /* Pop off last prototype. */ |
474 | L->top--; | 450 | L->top--; |
diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c index d836497e..2c70ff47 100644 --- a/src/lj_bcwrite.c +++ b/src/lj_bcwrite.c | |||
@@ -8,7 +8,7 @@ | |||
8 | 8 | ||
9 | #include "lj_obj.h" | 9 | #include "lj_obj.h" |
10 | #include "lj_gc.h" | 10 | #include "lj_gc.h" |
11 | #include "lj_str.h" | 11 | #include "lj_buf.h" |
12 | #include "lj_bc.h" | 12 | #include "lj_bc.h" |
13 | #if LJ_HASFFI | 13 | #if LJ_HASFFI |
14 | #include "lj_ctype.h" | 14 | #include "lj_ctype.h" |
@@ -17,99 +17,67 @@ | |||
17 | #include "lj_dispatch.h" | 17 | #include "lj_dispatch.h" |
18 | #include "lj_jit.h" | 18 | #include "lj_jit.h" |
19 | #endif | 19 | #endif |
20 | #include "lj_strfmt.h" | ||
20 | #include "lj_bcdump.h" | 21 | #include "lj_bcdump.h" |
21 | #include "lj_vm.h" | 22 | #include "lj_vm.h" |
22 | 23 | ||
23 | /* Context for bytecode writer. */ | 24 | /* Context for bytecode writer. */ |
24 | typedef struct BCWriteCtx { | 25 | typedef struct BCWriteCtx { |
25 | SBuf sb; /* Output buffer. */ | 26 | SBuf sb; /* Output buffer. */ |
26 | lua_State *L; /* Lua state. */ | ||
27 | GCproto *pt; /* Root prototype. */ | 27 | GCproto *pt; /* Root prototype. */ |
28 | lua_Writer wfunc; /* Writer callback. */ | 28 | lua_Writer wfunc; /* Writer callback. */ |
29 | void *wdata; /* Writer callback data. */ | 29 | void *wdata; /* Writer callback data. */ |
30 | int strip; /* Strip debug info. */ | 30 | int strip; /* Strip debug info. */ |
31 | int status; /* Status from writer callback. */ | 31 | int status; /* Status from writer callback. */ |
32 | #ifdef LUA_USE_ASSERT | ||
33 | global_State *g; | ||
34 | #endif | ||
32 | } BCWriteCtx; | 35 | } BCWriteCtx; |
33 | 36 | ||
34 | /* -- Output buffer handling ---------------------------------------------- */ | 37 | #ifdef LUA_USE_ASSERT |
35 | 38 | #define lj_assertBCW(c, ...) lj_assertG_(ctx->g, (c), __VA_ARGS__) | |
36 | /* Resize buffer if needed. */ | 39 | #else |
37 | static LJ_NOINLINE void bcwrite_resize(BCWriteCtx *ctx, MSize len) | 40 | #define lj_assertBCW(c, ...) ((void)ctx) |
38 | { | 41 | #endif |
39 | MSize sz = ctx->sb.sz * 2; | ||
40 | while (ctx->sb.n + len > sz) sz = sz * 2; | ||
41 | lj_str_resizebuf(ctx->L, &ctx->sb, sz); | ||
42 | } | ||
43 | |||
44 | /* Need a certain amount of buffer space. */ | ||
45 | static LJ_AINLINE void bcwrite_need(BCWriteCtx *ctx, MSize len) | ||
46 | { | ||
47 | if (LJ_UNLIKELY(ctx->sb.n + len > ctx->sb.sz)) | ||
48 | bcwrite_resize(ctx, len); | ||
49 | } | ||
50 | |||
51 | /* Add memory block to buffer. */ | ||
52 | static void bcwrite_block(BCWriteCtx *ctx, const void *p, MSize len) | ||
53 | { | ||
54 | uint8_t *q = (uint8_t *)(ctx->sb.buf + ctx->sb.n); | ||
55 | MSize i; | ||
56 | ctx->sb.n += len; | ||
57 | for (i = 0; i < len; i++) q[i] = ((uint8_t *)p)[i]; | ||
58 | } | ||
59 | |||
60 | /* Add byte to buffer. */ | ||
61 | static LJ_AINLINE void bcwrite_byte(BCWriteCtx *ctx, uint8_t b) | ||
62 | { | ||
63 | ctx->sb.buf[ctx->sb.n++] = b; | ||
64 | } | ||
65 | |||
66 | /* Add ULEB128 value to buffer. */ | ||
67 | static void bcwrite_uleb128(BCWriteCtx *ctx, uint32_t v) | ||
68 | { | ||
69 | MSize n = ctx->sb.n; | ||
70 | uint8_t *p = (uint8_t *)ctx->sb.buf; | ||
71 | for (; v >= 0x80; v >>= 7) | ||
72 | p[n++] = (uint8_t)((v & 0x7f) | 0x80); | ||
73 | p[n++] = (uint8_t)v; | ||
74 | ctx->sb.n = n; | ||
75 | } | ||
76 | 42 | ||
77 | /* -- Bytecode writer ----------------------------------------------------- */ | 43 | /* -- Bytecode writer ----------------------------------------------------- */ |
78 | 44 | ||
79 | /* Write a single constant key/value of a template table. */ | 45 | /* Write a single constant key/value of a template table. */ |
80 | static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) | 46 | static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) |
81 | { | 47 | { |
82 | bcwrite_need(ctx, 1+10); | 48 | char *p = lj_buf_more(&ctx->sb, 1+10); |
83 | if (tvisstr(o)) { | 49 | if (tvisstr(o)) { |
84 | const GCstr *str = strV(o); | 50 | const GCstr *str = strV(o); |
85 | MSize len = str->len; | 51 | MSize len = str->len; |
86 | bcwrite_need(ctx, 5+len); | 52 | p = lj_buf_more(&ctx->sb, 5+len); |
87 | bcwrite_uleb128(ctx, BCDUMP_KTAB_STR+len); | 53 | p = lj_strfmt_wuleb128(p, BCDUMP_KTAB_STR+len); |
88 | bcwrite_block(ctx, strdata(str), len); | 54 | p = lj_buf_wmem(p, strdata(str), len); |
89 | } else if (tvisint(o)) { | 55 | } else if (tvisint(o)) { |
90 | bcwrite_byte(ctx, BCDUMP_KTAB_INT); | 56 | *p++ = BCDUMP_KTAB_INT; |
91 | bcwrite_uleb128(ctx, intV(o)); | 57 | p = lj_strfmt_wuleb128(p, intV(o)); |
92 | } else if (tvisnum(o)) { | 58 | } else if (tvisnum(o)) { |
93 | if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ | 59 | if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ |
94 | lua_Number num = numV(o); | 60 | lua_Number num = numV(o); |
95 | int32_t k = lj_num2int(num); | 61 | int32_t k = lj_num2int(num); |
96 | if (num == (lua_Number)k) { /* -0 is never a constant. */ | 62 | if (num == (lua_Number)k) { /* -0 is never a constant. */ |
97 | bcwrite_byte(ctx, BCDUMP_KTAB_INT); | 63 | *p++ = BCDUMP_KTAB_INT; |
98 | bcwrite_uleb128(ctx, k); | 64 | p = lj_strfmt_wuleb128(p, k); |
65 | ctx->sb.w = p; | ||
99 | return; | 66 | return; |
100 | } | 67 | } |
101 | } | 68 | } |
102 | bcwrite_byte(ctx, BCDUMP_KTAB_NUM); | 69 | *p++ = BCDUMP_KTAB_NUM; |
103 | bcwrite_uleb128(ctx, o->u32.lo); | 70 | p = lj_strfmt_wuleb128(p, o->u32.lo); |
104 | bcwrite_uleb128(ctx, o->u32.hi); | 71 | p = lj_strfmt_wuleb128(p, o->u32.hi); |
105 | } else { | 72 | } else { |
106 | lua_assert(tvispri(o)); | 73 | lj_assertBCW(tvispri(o), "unhandled type %d", itype(o)); |
107 | bcwrite_byte(ctx, BCDUMP_KTAB_NIL+~itype(o)); | 74 | *p++ = BCDUMP_KTAB_NIL+~itype(o); |
108 | } | 75 | } |
76 | ctx->sb.w = p; | ||
109 | } | 77 | } |
110 | 78 | ||
111 | /* Write a template table. */ | 79 | /* Write a template table. */ |
112 | static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t) | 80 | static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t) |
113 | { | 81 | { |
114 | MSize narray = 0, nhash = 0; | 82 | MSize narray = 0, nhash = 0; |
115 | if (t->asize > 0) { /* Determine max. length of array part. */ | 83 | if (t->asize > 0) { /* Determine max. length of array part. */ |
@@ -127,8 +95,9 @@ static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t) | |||
127 | nhash += !tvisnil(&node[i].val); | 95 | nhash += !tvisnil(&node[i].val); |
128 | } | 96 | } |
129 | /* Write number of array slots and hash slots. */ | 97 | /* Write number of array slots and hash slots. */ |
130 | bcwrite_uleb128(ctx, narray); | 98 | p = lj_strfmt_wuleb128(p, narray); |
131 | bcwrite_uleb128(ctx, nhash); | 99 | p = lj_strfmt_wuleb128(p, nhash); |
100 | ctx->sb.w = p; | ||
132 | if (narray) { /* Write array entries (may contain nil). */ | 101 | if (narray) { /* Write array entries (may contain nil). */ |
133 | MSize i; | 102 | MSize i; |
134 | TValue *o = tvref(t->array); | 103 | TValue *o = tvref(t->array); |
@@ -155,12 +124,13 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) | |||
155 | for (i = 0; i < sizekgc; i++, kr++) { | 124 | for (i = 0; i < sizekgc; i++, kr++) { |
156 | GCobj *o = gcref(*kr); | 125 | GCobj *o = gcref(*kr); |
157 | MSize tp, need = 1; | 126 | MSize tp, need = 1; |
127 | char *p; | ||
158 | /* Determine constant type and needed size. */ | 128 | /* Determine constant type and needed size. */ |
159 | if (o->gch.gct == ~LJ_TSTR) { | 129 | if (o->gch.gct == ~LJ_TSTR) { |
160 | tp = BCDUMP_KGC_STR + gco2str(o)->len; | 130 | tp = BCDUMP_KGC_STR + gco2str(o)->len; |
161 | need = 5+gco2str(o)->len; | 131 | need = 5+gco2str(o)->len; |
162 | } else if (o->gch.gct == ~LJ_TPROTO) { | 132 | } else if (o->gch.gct == ~LJ_TPROTO) { |
163 | lua_assert((pt->flags & PROTO_CHILD)); | 133 | lj_assertBCW((pt->flags & PROTO_CHILD), "prototype has unexpected child"); |
164 | tp = BCDUMP_KGC_CHILD; | 134 | tp = BCDUMP_KGC_CHILD; |
165 | #if LJ_HASFFI | 135 | #if LJ_HASFFI |
166 | } else if (o->gch.gct == ~LJ_TCDATA) { | 136 | } else if (o->gch.gct == ~LJ_TCDATA) { |
@@ -171,34 +141,38 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) | |||
171 | } else if (id == CTID_UINT64) { | 141 | } else if (id == CTID_UINT64) { |
172 | tp = BCDUMP_KGC_U64; | 142 | tp = BCDUMP_KGC_U64; |
173 | } else { | 143 | } else { |
174 | lua_assert(id == CTID_COMPLEX_DOUBLE); | 144 | lj_assertBCW(id == CTID_COMPLEX_DOUBLE, |
145 | "bad cdata constant CTID %d", id); | ||
175 | tp = BCDUMP_KGC_COMPLEX; | 146 | tp = BCDUMP_KGC_COMPLEX; |
176 | } | 147 | } |
177 | #endif | 148 | #endif |
178 | } else { | 149 | } else { |
179 | lua_assert(o->gch.gct == ~LJ_TTAB); | 150 | lj_assertBCW(o->gch.gct == ~LJ_TTAB, |
151 | "bad constant GC type %d", o->gch.gct); | ||
180 | tp = BCDUMP_KGC_TAB; | 152 | tp = BCDUMP_KGC_TAB; |
181 | need = 1+2*5; | 153 | need = 1+2*5; |
182 | } | 154 | } |
183 | /* Write constant type. */ | 155 | /* Write constant type. */ |
184 | bcwrite_need(ctx, need); | 156 | p = lj_buf_more(&ctx->sb, need); |
185 | bcwrite_uleb128(ctx, tp); | 157 | p = lj_strfmt_wuleb128(p, tp); |
186 | /* Write constant data (if any). */ | 158 | /* Write constant data (if any). */ |
187 | if (tp >= BCDUMP_KGC_STR) { | 159 | if (tp >= BCDUMP_KGC_STR) { |
188 | bcwrite_block(ctx, strdata(gco2str(o)), gco2str(o)->len); | 160 | p = lj_buf_wmem(p, strdata(gco2str(o)), gco2str(o)->len); |
189 | } else if (tp == BCDUMP_KGC_TAB) { | 161 | } else if (tp == BCDUMP_KGC_TAB) { |
190 | bcwrite_ktab(ctx, gco2tab(o)); | 162 | bcwrite_ktab(ctx, p, gco2tab(o)); |
163 | continue; | ||
191 | #if LJ_HASFFI | 164 | #if LJ_HASFFI |
192 | } else if (tp != BCDUMP_KGC_CHILD) { | 165 | } else if (tp != BCDUMP_KGC_CHILD) { |
193 | cTValue *p = (TValue *)cdataptr(gco2cd(o)); | 166 | cTValue *q = (TValue *)cdataptr(gco2cd(o)); |
194 | bcwrite_uleb128(ctx, p[0].u32.lo); | 167 | p = lj_strfmt_wuleb128(p, q[0].u32.lo); |
195 | bcwrite_uleb128(ctx, p[0].u32.hi); | 168 | p = lj_strfmt_wuleb128(p, q[0].u32.hi); |
196 | if (tp == BCDUMP_KGC_COMPLEX) { | 169 | if (tp == BCDUMP_KGC_COMPLEX) { |
197 | bcwrite_uleb128(ctx, p[1].u32.lo); | 170 | p = lj_strfmt_wuleb128(p, q[1].u32.lo); |
198 | bcwrite_uleb128(ctx, p[1].u32.hi); | 171 | p = lj_strfmt_wuleb128(p, q[1].u32.hi); |
199 | } | 172 | } |
200 | #endif | 173 | #endif |
201 | } | 174 | } |
175 | ctx->sb.w = p; | ||
202 | } | 176 | } |
203 | } | 177 | } |
204 | 178 | ||
@@ -207,7 +181,7 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt) | |||
207 | { | 181 | { |
208 | MSize i, sizekn = pt->sizekn; | 182 | MSize i, sizekn = pt->sizekn; |
209 | cTValue *o = mref(pt->k, TValue); | 183 | cTValue *o = mref(pt->k, TValue); |
210 | bcwrite_need(ctx, 10*sizekn); | 184 | char *p = lj_buf_more(&ctx->sb, 10*sizekn); |
211 | for (i = 0; i < sizekn; i++, o++) { | 185 | for (i = 0; i < sizekn; i++, o++) { |
212 | int32_t k; | 186 | int32_t k; |
213 | if (tvisint(o)) { | 187 | if (tvisint(o)) { |
@@ -220,55 +194,55 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt) | |||
220 | k = lj_num2int(num); | 194 | k = lj_num2int(num); |
221 | if (num == (lua_Number)k) { /* -0 is never a constant. */ | 195 | if (num == (lua_Number)k) { /* -0 is never a constant. */ |
222 | save_int: | 196 | save_int: |
223 | bcwrite_uleb128(ctx, 2*(uint32_t)k | ((uint32_t)k & 0x80000000u)); | 197 | p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u)); |
224 | if (k < 0) { | 198 | if (k < 0) |
225 | char *p = &ctx->sb.buf[ctx->sb.n-1]; | 199 | p[-1] = (p[-1] & 7) | ((k>>27) & 0x18); |
226 | *p = (*p & 7) | ((k>>27) & 0x18); | ||
227 | } | ||
228 | continue; | 200 | continue; |
229 | } | 201 | } |
230 | } | 202 | } |
231 | bcwrite_uleb128(ctx, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u))); | 203 | p = lj_strfmt_wuleb128(p, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u))); |
232 | if (o->u32.lo >= 0x80000000u) { | 204 | if (o->u32.lo >= 0x80000000u) |
233 | char *p = &ctx->sb.buf[ctx->sb.n-1]; | 205 | p[-1] = (p[-1] & 7) | ((o->u32.lo>>27) & 0x18); |
234 | *p = (*p & 7) | ((o->u32.lo>>27) & 0x18); | 206 | p = lj_strfmt_wuleb128(p, o->u32.hi); |
235 | } | ||
236 | bcwrite_uleb128(ctx, o->u32.hi); | ||
237 | } | 207 | } |
238 | } | 208 | } |
209 | ctx->sb.w = p; | ||
239 | } | 210 | } |
240 | 211 | ||
241 | /* Write bytecode instructions. */ | 212 | /* Write bytecode instructions. */ |
242 | static void bcwrite_bytecode(BCWriteCtx *ctx, GCproto *pt) | 213 | static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt) |
243 | { | 214 | { |
244 | MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */ | 215 | MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */ |
245 | #if LJ_HASJIT | 216 | #if LJ_HASJIT |
246 | uint8_t *p = (uint8_t *)&ctx->sb.buf[ctx->sb.n]; | 217 | uint8_t *q = (uint8_t *)p; |
247 | #endif | 218 | #endif |
248 | bcwrite_block(ctx, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns)); | 219 | p = lj_buf_wmem(p, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns)); |
220 | UNUSED(ctx); | ||
249 | #if LJ_HASJIT | 221 | #if LJ_HASJIT |
250 | /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */ | 222 | /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */ |
251 | if ((pt->flags & PROTO_ILOOP) || pt->trace) { | 223 | if ((pt->flags & PROTO_ILOOP) || pt->trace) { |
252 | jit_State *J = L2J(ctx->L); | 224 | jit_State *J = L2J(sbufL(&ctx->sb)); |
253 | MSize i; | 225 | MSize i; |
254 | for (i = 0; i < nbc; i++, p += sizeof(BCIns)) { | 226 | for (i = 0; i < nbc; i++, q += sizeof(BCIns)) { |
255 | BCOp op = (BCOp)p[LJ_ENDIAN_SELECT(0, 3)]; | 227 | BCOp op = (BCOp)q[LJ_ENDIAN_SELECT(0, 3)]; |
256 | if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP || | 228 | if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP || |
257 | op == BC_JFORI) { | 229 | op == BC_JFORI) { |
258 | p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL); | 230 | q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL); |
259 | } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { | 231 | } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { |
260 | BCReg rd = p[LJ_ENDIAN_SELECT(2, 1)] + (p[LJ_ENDIAN_SELECT(3, 0)] << 8); | 232 | BCReg rd = q[LJ_ENDIAN_SELECT(2, 1)] + (q[LJ_ENDIAN_SELECT(3, 0)] << 8); |
261 | memcpy(p, &traceref(J, rd)->startins, 4); | 233 | memcpy(q, &traceref(J, rd)->startins, 4); |
262 | } | 234 | } |
263 | } | 235 | } |
264 | } | 236 | } |
265 | #endif | 237 | #endif |
238 | return p; | ||
266 | } | 239 | } |
267 | 240 | ||
268 | /* Write prototype. */ | 241 | /* Write prototype. */ |
269 | static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) | 242 | static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) |
270 | { | 243 | { |
271 | MSize sizedbg = 0; | 244 | MSize sizedbg = 0; |
245 | char *p; | ||
272 | 246 | ||
273 | /* Recursively write children of prototype. */ | 247 | /* Recursively write children of prototype. */ |
274 | if ((pt->flags & PROTO_CHILD)) { | 248 | if ((pt->flags & PROTO_CHILD)) { |
@@ -282,31 +256,32 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) | |||
282 | } | 256 | } |
283 | 257 | ||
284 | /* Start writing the prototype info to a buffer. */ | 258 | /* Start writing the prototype info to a buffer. */ |
285 | lj_str_resetbuf(&ctx->sb); | 259 | p = lj_buf_need(&ctx->sb, |
286 | ctx->sb.n = 5; /* Leave room for final size. */ | 260 | 5+4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2); |
287 | bcwrite_need(ctx, 4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2); | 261 | p += 5; /* Leave room for final size. */ |
288 | 262 | ||
289 | /* Write prototype header. */ | 263 | /* Write prototype header. */ |
290 | bcwrite_byte(ctx, (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI))); | 264 | *p++ = (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI)); |
291 | bcwrite_byte(ctx, pt->numparams); | 265 | *p++ = pt->numparams; |
292 | bcwrite_byte(ctx, pt->framesize); | 266 | *p++ = pt->framesize; |
293 | bcwrite_byte(ctx, pt->sizeuv); | 267 | *p++ = pt->sizeuv; |
294 | bcwrite_uleb128(ctx, pt->sizekgc); | 268 | p = lj_strfmt_wuleb128(p, pt->sizekgc); |
295 | bcwrite_uleb128(ctx, pt->sizekn); | 269 | p = lj_strfmt_wuleb128(p, pt->sizekn); |
296 | bcwrite_uleb128(ctx, pt->sizebc-1); | 270 | p = lj_strfmt_wuleb128(p, pt->sizebc-1); |
297 | if (!ctx->strip) { | 271 | if (!ctx->strip) { |
298 | if (proto_lineinfo(pt)) | 272 | if (proto_lineinfo(pt)) |
299 | sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt); | 273 | sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt); |
300 | bcwrite_uleb128(ctx, sizedbg); | 274 | p = lj_strfmt_wuleb128(p, sizedbg); |
301 | if (sizedbg) { | 275 | if (sizedbg) { |
302 | bcwrite_uleb128(ctx, pt->firstline); | 276 | p = lj_strfmt_wuleb128(p, pt->firstline); |
303 | bcwrite_uleb128(ctx, pt->numline); | 277 | p = lj_strfmt_wuleb128(p, pt->numline); |
304 | } | 278 | } |
305 | } | 279 | } |
306 | 280 | ||
307 | /* Write bytecode instructions and upvalue refs. */ | 281 | /* Write bytecode instructions and upvalue refs. */ |
308 | bcwrite_bytecode(ctx, pt); | 282 | p = bcwrite_bytecode(ctx, p, pt); |
309 | bcwrite_block(ctx, proto_uv(pt), pt->sizeuv*2); | 283 | p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2); |
284 | ctx->sb.w = p; | ||
310 | 285 | ||
311 | /* Write constants. */ | 286 | /* Write constants. */ |
312 | bcwrite_kgc(ctx, pt); | 287 | bcwrite_kgc(ctx, pt); |
@@ -314,18 +289,19 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) | |||
314 | 289 | ||
315 | /* Write debug info, if not stripped. */ | 290 | /* Write debug info, if not stripped. */ |
316 | if (sizedbg) { | 291 | if (sizedbg) { |
317 | bcwrite_need(ctx, sizedbg); | 292 | p = lj_buf_more(&ctx->sb, sizedbg); |
318 | bcwrite_block(ctx, proto_lineinfo(pt), sizedbg); | 293 | p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg); |
294 | ctx->sb.w = p; | ||
319 | } | 295 | } |
320 | 296 | ||
321 | /* Pass buffer to writer function. */ | 297 | /* Pass buffer to writer function. */ |
322 | if (ctx->status == 0) { | 298 | if (ctx->status == 0) { |
323 | MSize n = ctx->sb.n - 5; | 299 | MSize n = sbuflen(&ctx->sb) - 5; |
324 | MSize nn = (lj_fls(n)+8)*9 >> 6; | 300 | MSize nn = (lj_fls(n)+8)*9 >> 6; |
325 | ctx->sb.n = 5 - nn; | 301 | char *q = ctx->sb.b + (5 - nn); |
326 | bcwrite_uleb128(ctx, n); /* Fill in final size. */ | 302 | p = lj_strfmt_wuleb128(q, n); /* Fill in final size. */ |
327 | lua_assert(ctx->sb.n == 5); | 303 | lj_assertBCW(p == ctx->sb.b + 5, "bad ULEB128 write"); |
328 | ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf+5-nn, nn+n, ctx->wdata); | 304 | ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata); |
329 | } | 305 | } |
330 | } | 306 | } |
331 | 307 | ||
@@ -335,20 +311,21 @@ static void bcwrite_header(BCWriteCtx *ctx) | |||
335 | GCstr *chunkname = proto_chunkname(ctx->pt); | 311 | GCstr *chunkname = proto_chunkname(ctx->pt); |
336 | const char *name = strdata(chunkname); | 312 | const char *name = strdata(chunkname); |
337 | MSize len = chunkname->len; | 313 | MSize len = chunkname->len; |
338 | lj_str_resetbuf(&ctx->sb); | 314 | char *p = lj_buf_need(&ctx->sb, 5+5+len); |
339 | bcwrite_need(ctx, 5+5+len); | 315 | *p++ = BCDUMP_HEAD1; |
340 | bcwrite_byte(ctx, BCDUMP_HEAD1); | 316 | *p++ = BCDUMP_HEAD2; |
341 | bcwrite_byte(ctx, BCDUMP_HEAD2); | 317 | *p++ = BCDUMP_HEAD3; |
342 | bcwrite_byte(ctx, BCDUMP_HEAD3); | 318 | *p++ = BCDUMP_VERSION; |
343 | bcwrite_byte(ctx, BCDUMP_VERSION); | 319 | *p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) + |
344 | bcwrite_byte(ctx, (ctx->strip ? BCDUMP_F_STRIP : 0) + | 320 | LJ_BE*BCDUMP_F_BE + |
345 | (LJ_BE ? BCDUMP_F_BE : 0) + | 321 | ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) + |
346 | ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0)); | 322 | LJ_FR2*BCDUMP_F_FR2; |
347 | if (!ctx->strip) { | 323 | if (!ctx->strip) { |
348 | bcwrite_uleb128(ctx, len); | 324 | p = lj_strfmt_wuleb128(p, len); |
349 | bcwrite_block(ctx, name, len); | 325 | p = lj_buf_wmem(p, name, len); |
350 | } | 326 | } |
351 | ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf, ctx->sb.n, ctx->wdata); | 327 | ctx->status = ctx->wfunc(sbufL(&ctx->sb), ctx->sb.b, |
328 | (MSize)(p - ctx->sb.b), ctx->wdata); | ||
352 | } | 329 | } |
353 | 330 | ||
354 | /* Write footer of bytecode dump. */ | 331 | /* Write footer of bytecode dump. */ |
@@ -356,7 +333,7 @@ static void bcwrite_footer(BCWriteCtx *ctx) | |||
356 | { | 333 | { |
357 | if (ctx->status == 0) { | 334 | if (ctx->status == 0) { |
358 | uint8_t zero = 0; | 335 | uint8_t zero = 0; |
359 | ctx->status = ctx->wfunc(ctx->L, &zero, 1, ctx->wdata); | 336 | ctx->status = ctx->wfunc(sbufL(&ctx->sb), &zero, 1, ctx->wdata); |
360 | } | 337 | } |
361 | } | 338 | } |
362 | 339 | ||
@@ -364,8 +341,8 @@ static void bcwrite_footer(BCWriteCtx *ctx) | |||
364 | static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud) | 341 | static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud) |
365 | { | 342 | { |
366 | BCWriteCtx *ctx = (BCWriteCtx *)ud; | 343 | BCWriteCtx *ctx = (BCWriteCtx *)ud; |
367 | UNUSED(dummy); | 344 | UNUSED(L); UNUSED(dummy); |
368 | lj_str_resizebuf(L, &ctx->sb, 1024); /* Avoids resize for most prototypes. */ | 345 | lj_buf_need(&ctx->sb, 1024); /* Avoids resize for most prototypes. */ |
369 | bcwrite_header(ctx); | 346 | bcwrite_header(ctx); |
370 | bcwrite_proto(ctx, ctx->pt); | 347 | bcwrite_proto(ctx, ctx->pt); |
371 | bcwrite_footer(ctx); | 348 | bcwrite_footer(ctx); |
@@ -378,16 +355,18 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data, | |||
378 | { | 355 | { |
379 | BCWriteCtx ctx; | 356 | BCWriteCtx ctx; |
380 | int status; | 357 | int status; |
381 | ctx.L = L; | ||
382 | ctx.pt = pt; | 358 | ctx.pt = pt; |
383 | ctx.wfunc = writer; | 359 | ctx.wfunc = writer; |
384 | ctx.wdata = data; | 360 | ctx.wdata = data; |
385 | ctx.strip = strip; | 361 | ctx.strip = strip; |
386 | ctx.status = 0; | 362 | ctx.status = 0; |
387 | lj_str_initbuf(&ctx.sb); | 363 | #ifdef LUA_USE_ASSERT |
364 | ctx.g = G(L); | ||
365 | #endif | ||
366 | lj_buf_init(L, &ctx.sb); | ||
388 | status = lj_vm_cpcall(L, NULL, &ctx, cpwriter); | 367 | status = lj_vm_cpcall(L, NULL, &ctx, cpwriter); |
389 | if (status == 0) status = ctx.status; | 368 | if (status == 0) status = ctx.status; |
390 | lj_str_freebuf(G(ctx.L), &ctx.sb); | 369 | lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb); |
391 | return status; | 370 | return status; |
392 | } | 371 | } |
393 | 372 | ||
diff --git a/src/lj_buf.c b/src/lj_buf.c new file mode 100644 index 00000000..5a03ea6a --- /dev/null +++ b/src/lj_buf.c | |||
@@ -0,0 +1,305 @@ | |||
1 | /* | ||
2 | ** Buffer handling. | ||
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_buf_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | #include "lj_gc.h" | ||
11 | #include "lj_err.h" | ||
12 | #include "lj_buf.h" | ||
13 | #include "lj_str.h" | ||
14 | #include "lj_tab.h" | ||
15 | #include "lj_strfmt.h" | ||
16 | |||
17 | /* -- Buffer management --------------------------------------------------- */ | ||
18 | |||
19 | static void buf_grow(SBuf *sb, MSize sz) | ||
20 | { | ||
21 | MSize osz = sbufsz(sb), len = sbuflen(sb), nsz = osz; | ||
22 | char *b; | ||
23 | GCSize flag; | ||
24 | if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF; | ||
25 | while (nsz < sz) nsz += nsz; | ||
26 | flag = sbufflag(sb); | ||
27 | if ((flag & SBUF_FLAG_COW)) { /* Copy-on-write semantics. */ | ||
28 | lj_assertG_(G(sbufL(sb)), sb->w == sb->e, "bad SBuf COW"); | ||
29 | b = (char *)lj_mem_new(sbufL(sb), nsz); | ||
30 | setsbufflag(sb, flag & ~(GCSize)SBUF_FLAG_COW); | ||
31 | setgcrefnull(sbufX(sb)->cowref); | ||
32 | memcpy(b, sb->b, osz); | ||
33 | } else { | ||
34 | b = (char *)lj_mem_realloc(sbufL(sb), sb->b, osz, nsz); | ||
35 | } | ||
36 | if ((flag & SBUF_FLAG_EXT)) { | ||
37 | sbufX(sb)->r = sbufX(sb)->r - sb->b + b; /* Adjust read pointer, too. */ | ||
38 | } | ||
39 | /* Adjust buffer pointers. */ | ||
40 | sb->b = b; | ||
41 | sb->w = b + len; | ||
42 | sb->e = b + nsz; | ||
43 | if ((flag & SBUF_FLAG_BORROW)) { /* Adjust borrowed buffer pointers. */ | ||
44 | SBuf *bsb = mref(sbufX(sb)->bsb, SBuf); | ||
45 | bsb->b = b; | ||
46 | bsb->w = b + len; | ||
47 | bsb->e = b + nsz; | ||
48 | } | ||
49 | } | ||
50 | |||
51 | LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz) | ||
52 | { | ||
53 | lj_assertG_(G(sbufL(sb)), sz > sbufsz(sb), "SBuf overflow"); | ||
54 | if (LJ_UNLIKELY(sz > LJ_MAX_BUF)) | ||
55 | lj_err_mem(sbufL(sb)); | ||
56 | buf_grow(sb, sz); | ||
57 | return sb->b; | ||
58 | } | ||
59 | |||
60 | LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz) | ||
61 | { | ||
62 | if (sbufisext(sb)) { | ||
63 | SBufExt *sbx = (SBufExt *)sb; | ||
64 | MSize len = sbufxlen(sbx); | ||
65 | if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF)) | ||
66 | lj_err_mem(sbufL(sbx)); | ||
67 | if (len + sz > sbufsz(sbx)) { /* Must grow. */ | ||
68 | buf_grow((SBuf *)sbx, len + sz); | ||
69 | } else if (sbufxslack(sbx) < (sbufsz(sbx) >> 3)) { | ||
70 | /* Also grow to avoid excessive compactions, if slack < size/8. */ | ||
71 | buf_grow((SBuf *)sbx, sbuflen(sbx) + sz); /* Not sbufxlen! */ | ||
72 | return sbx->w; | ||
73 | } | ||
74 | if (sbx->r != sbx->b) { /* Compact by moving down. */ | ||
75 | memmove(sbx->b, sbx->r, len); | ||
76 | sbx->r = sbx->b; | ||
77 | sbx->w = sbx->b + len; | ||
78 | lj_assertG_(G(sbufL(sbx)), len + sz <= sbufsz(sbx), "bad SBuf compact"); | ||
79 | } | ||
80 | } else { | ||
81 | MSize len = sbuflen(sb); | ||
82 | lj_assertG_(G(sbufL(sb)), sz > sbufleft(sb), "SBuf overflow"); | ||
83 | if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF)) | ||
84 | lj_err_mem(sbufL(sb)); | ||
85 | buf_grow(sb, len + sz); | ||
86 | } | ||
87 | return sb->w; | ||
88 | } | ||
89 | |||
90 | void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb) | ||
91 | { | ||
92 | char *b = sb->b; | ||
93 | MSize osz = (MSize)(sb->e - b); | ||
94 | if (osz > 2*LJ_MIN_SBUF) { | ||
95 | MSize n = (MSize)(sb->w - b); | ||
96 | b = lj_mem_realloc(L, b, osz, (osz >> 1)); | ||
97 | sb->b = b; | ||
98 | sb->w = b + n; | ||
99 | sb->e = b + (osz >> 1); | ||
100 | } | ||
101 | lj_assertG_(G(sbufL(sb)), !sbufisext(sb), "YAGNI shrink SBufExt"); | ||
102 | } | ||
103 | |||
104 | char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz) | ||
105 | { | ||
106 | SBuf *sb = &G(L)->tmpbuf; | ||
107 | setsbufL(sb, L); | ||
108 | return lj_buf_need(sb, sz); | ||
109 | } | ||
110 | |||
111 | #if LJ_HASBUFFER && LJ_HASJIT | ||
112 | void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *ref) | ||
113 | { | ||
114 | lua_State *L = sbufL(sbx); | ||
115 | lj_bufx_free(L, sbx); | ||
116 | lj_bufx_set_cow(L, sbx, p, len); | ||
117 | setgcref(sbx->cowref, ref); | ||
118 | lj_gc_objbarrier(L, (GCudata *)sbx - 1, ref); | ||
119 | } | ||
120 | |||
121 | #if LJ_HASFFI | ||
122 | MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz) | ||
123 | { | ||
124 | lj_buf_more((SBuf *)sbx, sz); | ||
125 | return sbufleft(sbx); | ||
126 | } | ||
127 | #endif | ||
128 | #endif | ||
129 | |||
130 | /* -- Low-level buffer put operations ------------------------------------- */ | ||
131 | |||
132 | SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len) | ||
133 | { | ||
134 | char *w = lj_buf_more(sb, len); | ||
135 | w = lj_buf_wmem(w, q, len); | ||
136 | sb->w = w; | ||
137 | return sb; | ||
138 | } | ||
139 | |||
140 | #if LJ_HASJIT || LJ_HASFFI | ||
141 | static LJ_NOINLINE SBuf * LJ_FASTCALL lj_buf_putchar2(SBuf *sb, int c) | ||
142 | { | ||
143 | char *w = lj_buf_more2(sb, 1); | ||
144 | *w++ = (char)c; | ||
145 | sb->w = w; | ||
146 | return sb; | ||
147 | } | ||
148 | |||
149 | SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c) | ||
150 | { | ||
151 | char *w = sb->w; | ||
152 | if (LJ_LIKELY(w < sb->e)) { | ||
153 | *w++ = (char)c; | ||
154 | sb->w = w; | ||
155 | return sb; | ||
156 | } | ||
157 | return lj_buf_putchar2(sb, c); | ||
158 | } | ||
159 | #endif | ||
160 | |||
161 | SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s) | ||
162 | { | ||
163 | MSize len = s->len; | ||
164 | char *w = lj_buf_more(sb, len); | ||
165 | w = lj_buf_wmem(w, strdata(s), len); | ||
166 | sb->w = w; | ||
167 | return sb; | ||
168 | } | ||
169 | |||
170 | /* -- High-level buffer put operations ------------------------------------ */ | ||
171 | |||
172 | SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s) | ||
173 | { | ||
174 | MSize len = s->len; | ||
175 | char *w = lj_buf_more(sb, len), *e = w+len; | ||
176 | const char *q = strdata(s)+len-1; | ||
177 | while (w < e) | ||
178 | *w++ = *q--; | ||
179 | sb->w = w; | ||
180 | return sb; | ||
181 | } | ||
182 | |||
183 | SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s) | ||
184 | { | ||
185 | MSize len = s->len; | ||
186 | char *w = lj_buf_more(sb, len), *e = w+len; | ||
187 | const char *q = strdata(s); | ||
188 | for (; w < e; w++, q++) { | ||
189 | uint32_t c = *(unsigned char *)q; | ||
190 | #if LJ_TARGET_PPC | ||
191 | *w = c + ((c >= 'A' && c <= 'Z') << 5); | ||
192 | #else | ||
193 | if (c >= 'A' && c <= 'Z') c += 0x20; | ||
194 | *w = c; | ||
195 | #endif | ||
196 | } | ||
197 | sb->w = w; | ||
198 | return sb; | ||
199 | } | ||
200 | |||
201 | SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s) | ||
202 | { | ||
203 | MSize len = s->len; | ||
204 | char *w = lj_buf_more(sb, len), *e = w+len; | ||
205 | const char *q = strdata(s); | ||
206 | for (; w < e; w++, q++) { | ||
207 | uint32_t c = *(unsigned char *)q; | ||
208 | #if LJ_TARGET_PPC | ||
209 | *w = c - ((c >= 'a' && c <= 'z') << 5); | ||
210 | #else | ||
211 | if (c >= 'a' && c <= 'z') c -= 0x20; | ||
212 | *w = c; | ||
213 | #endif | ||
214 | } | ||
215 | sb->w = w; | ||
216 | return sb; | ||
217 | } | ||
218 | |||
219 | SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep) | ||
220 | { | ||
221 | MSize len = s->len; | ||
222 | if (rep > 0 && len) { | ||
223 | uint64_t tlen = (uint64_t)rep * len; | ||
224 | char *w; | ||
225 | if (LJ_UNLIKELY(tlen > LJ_MAX_STR)) | ||
226 | lj_err_mem(sbufL(sb)); | ||
227 | w = lj_buf_more(sb, (MSize)tlen); | ||
228 | if (len == 1) { /* Optimize a common case. */ | ||
229 | uint32_t c = strdata(s)[0]; | ||
230 | do { *w++ = c; } while (--rep > 0); | ||
231 | } else { | ||
232 | const char *e = strdata(s) + len; | ||
233 | do { | ||
234 | const char *q = strdata(s); | ||
235 | do { *w++ = *q++; } while (q < e); | ||
236 | } while (--rep > 0); | ||
237 | } | ||
238 | sb->w = w; | ||
239 | } | ||
240 | return sb; | ||
241 | } | ||
242 | |||
243 | SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e) | ||
244 | { | ||
245 | MSize seplen = sep ? sep->len : 0; | ||
246 | if (i <= e) { | ||
247 | for (;;) { | ||
248 | cTValue *o = lj_tab_getint(t, i); | ||
249 | char *w; | ||
250 | if (!o) { | ||
251 | badtype: /* Error: bad element type. */ | ||
252 | sb->w = (char *)(intptr_t)i; /* Store failing index. */ | ||
253 | return NULL; | ||
254 | } else if (tvisstr(o)) { | ||
255 | MSize len = strV(o)->len; | ||
256 | w = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len); | ||
257 | } else if (tvisint(o)) { | ||
258 | w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o)); | ||
259 | } else if (tvisnum(o)) { | ||
260 | w = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen); | ||
261 | } else { | ||
262 | goto badtype; | ||
263 | } | ||
264 | if (i++ == e) { | ||
265 | sb->w = w; | ||
266 | break; | ||
267 | } | ||
268 | if (seplen) w = lj_buf_wmem(w, strdata(sep), seplen); | ||
269 | sb->w = w; | ||
270 | } | ||
271 | } | ||
272 | return sb; | ||
273 | } | ||
274 | |||
275 | /* -- Miscellaneous buffer operations ------------------------------------- */ | ||
276 | |||
277 | GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb) | ||
278 | { | ||
279 | return lj_str_new(sbufL(sb), sb->b, sbuflen(sb)); | ||
280 | } | ||
281 | |||
282 | /* Concatenate two strings. */ | ||
283 | GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2) | ||
284 | { | ||
285 | MSize len1 = s1->len, len2 = s2->len; | ||
286 | char *buf = lj_buf_tmp(L, len1 + len2); | ||
287 | memcpy(buf, strdata(s1), len1); | ||
288 | memcpy(buf+len1, strdata(s2), len2); | ||
289 | return lj_str_new(L, buf, len1 + len2); | ||
290 | } | ||
291 | |||
292 | /* Read ULEB128 from buffer. */ | ||
293 | uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp) | ||
294 | { | ||
295 | const uint8_t *w = (const uint8_t *)*pp; | ||
296 | uint32_t v = *w++; | ||
297 | if (LJ_UNLIKELY(v >= 0x80)) { | ||
298 | int sh = 0; | ||
299 | v &= 0x7f; | ||
300 | do { v |= ((*w & 0x7f) << (sh += 7)); } while (*w++ >= 0x80); | ||
301 | } | ||
302 | *pp = (const char *)w; | ||
303 | return v; | ||
304 | } | ||
305 | |||
diff --git a/src/lj_buf.h b/src/lj_buf.h new file mode 100644 index 00000000..76114201 --- /dev/null +++ b/src/lj_buf.h | |||
@@ -0,0 +1,198 @@ | |||
1 | /* | ||
2 | ** Buffer handling. | ||
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_BUF_H | ||
7 | #define _LJ_BUF_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | #include "lj_gc.h" | ||
11 | #include "lj_str.h" | ||
12 | |||
13 | /* Resizable string buffers. */ | ||
14 | |||
15 | /* The SBuf struct definition is in lj_obj.h: | ||
16 | ** char *w; Write pointer. | ||
17 | ** char *e; End pointer. | ||
18 | ** char *b; Base pointer. | ||
19 | ** MRef L; lua_State, used for buffer resizing. Extension bits in 3 LSB. | ||
20 | */ | ||
21 | |||
22 | /* Extended string buffer. */ | ||
23 | typedef struct SBufExt { | ||
24 | SBufHeader; | ||
25 | union { | ||
26 | GCRef cowref; /* Copy-on-write object reference. */ | ||
27 | MRef bsb; /* Borrowed string buffer. */ | ||
28 | }; | ||
29 | char *r; /* Read pointer. */ | ||
30 | GCRef dict_str; /* Serialization string dictionary table. */ | ||
31 | GCRef dict_mt; /* Serialization metatable dictionary table. */ | ||
32 | int depth; /* Remaining recursion depth. */ | ||
33 | } SBufExt; | ||
34 | |||
35 | #define sbufsz(sb) ((MSize)((sb)->e - (sb)->b)) | ||
36 | #define sbuflen(sb) ((MSize)((sb)->w - (sb)->b)) | ||
37 | #define sbufleft(sb) ((MSize)((sb)->e - (sb)->w)) | ||
38 | #define sbufxlen(sbx) ((MSize)((sbx)->w - (sbx)->r)) | ||
39 | #define sbufxslack(sbx) ((MSize)((sbx)->r - (sbx)->b)) | ||
40 | |||
41 | #define SBUF_MASK_FLAG (7) | ||
42 | #define SBUF_MASK_L (~(GCSize)SBUF_MASK_FLAG) | ||
43 | #define SBUF_FLAG_EXT 1 /* Extended string buffer. */ | ||
44 | #define SBUF_FLAG_COW 2 /* Copy-on-write buffer. */ | ||
45 | #define SBUF_FLAG_BORROW 4 /* Borrowed string buffer. */ | ||
46 | |||
47 | #define sbufL(sb) \ | ||
48 | ((lua_State *)(void *)(uintptr_t)(mrefu((sb)->L) & SBUF_MASK_L)) | ||
49 | #define setsbufL(sb, l) (setmref((sb)->L, (l))) | ||
50 | #define setsbufXL(sb, l, flag) \ | ||
51 | (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) + (flag))) | ||
52 | #define setsbufXL_(sb, l) \ | ||
53 | (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) | (mrefu((sb)->L) & SBUF_MASK_FLAG))) | ||
54 | |||
55 | #define sbufflag(sb) (mrefu((sb)->L)) | ||
56 | #define sbufisext(sb) (sbufflag((sb)) & SBUF_FLAG_EXT) | ||
57 | #define sbufiscow(sb) (sbufflag((sb)) & SBUF_FLAG_COW) | ||
58 | #define sbufisborrow(sb) (sbufflag((sb)) & SBUF_FLAG_BORROW) | ||
59 | #define sbufiscoworborrow(sb) (sbufflag((sb)) & (SBUF_FLAG_COW|SBUF_FLAG_BORROW)) | ||
60 | #define sbufX(sb) \ | ||
61 | (lj_assertG_(G(sbufL(sb)), sbufisext(sb), "not an SBufExt"), (SBufExt *)(sb)) | ||
62 | #define setsbufflag(sb, flag) (setmrefu((sb)->L, (flag))) | ||
63 | |||
64 | #define tvisbuf(o) \ | ||
65 | (LJ_HASBUFFER && tvisudata(o) && udataV(o)->udtype == UDTYPE_BUFFER) | ||
66 | #define bufV(o) check_exp(tvisbuf(o), ((SBufExt *)uddata(udataV(o)))) | ||
67 | |||
68 | /* Buffer management */ | ||
69 | LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz); | ||
70 | LJ_FUNC char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz); | ||
71 | LJ_FUNC void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb); | ||
72 | LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz); | ||
73 | |||
74 | static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb) | ||
75 | { | ||
76 | setsbufL(sb, L); | ||
77 | sb->w = sb->e = sb->b = NULL; | ||
78 | } | ||
79 | |||
80 | static LJ_AINLINE void lj_buf_reset(SBuf *sb) | ||
81 | { | ||
82 | sb->w = sb->b; | ||
83 | } | ||
84 | |||
85 | static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L) | ||
86 | { | ||
87 | SBuf *sb = &G(L)->tmpbuf; | ||
88 | setsbufL(sb, L); | ||
89 | lj_buf_reset(sb); | ||
90 | return sb; | ||
91 | } | ||
92 | |||
93 | static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb) | ||
94 | { | ||
95 | lj_assertG(!sbufisext(sb), "bad free of SBufExt"); | ||
96 | lj_mem_free(g, sb->b, sbufsz(sb)); | ||
97 | } | ||
98 | |||
99 | static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz) | ||
100 | { | ||
101 | if (LJ_UNLIKELY(sz > sbufsz(sb))) | ||
102 | return lj_buf_need2(sb, sz); | ||
103 | return sb->b; | ||
104 | } | ||
105 | |||
106 | static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz) | ||
107 | { | ||
108 | if (LJ_UNLIKELY(sz > sbufleft(sb))) | ||
109 | return lj_buf_more2(sb, sz); | ||
110 | return sb->w; | ||
111 | } | ||
112 | |||
113 | /* Extended buffer management */ | ||
114 | static LJ_AINLINE void lj_bufx_init(lua_State *L, SBufExt *sbx) | ||
115 | { | ||
116 | memset(sbx, 0, sizeof(SBufExt)); | ||
117 | setsbufXL(sbx, L, SBUF_FLAG_EXT); | ||
118 | } | ||
119 | |||
120 | static LJ_AINLINE void lj_bufx_set_borrow(lua_State *L, SBufExt *sbx, SBuf *sb) | ||
121 | { | ||
122 | setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_BORROW); | ||
123 | setmref(sbx->bsb, sb); | ||
124 | sbx->r = sbx->w = sbx->b = sb->b; | ||
125 | sbx->e = sb->e; | ||
126 | } | ||
127 | |||
128 | static LJ_AINLINE void lj_bufx_set_cow(lua_State *L, SBufExt *sbx, | ||
129 | const char *p, MSize len) | ||
130 | { | ||
131 | setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_COW); | ||
132 | sbx->r = sbx->b = (char *)p; | ||
133 | sbx->w = sbx->e = (char *)p + len; | ||
134 | } | ||
135 | |||
136 | static LJ_AINLINE void lj_bufx_reset(SBufExt *sbx) | ||
137 | { | ||
138 | if (sbufiscow(sbx)) { | ||
139 | setmrefu(sbx->L, (mrefu(sbx->L) & ~(GCSize)SBUF_FLAG_COW)); | ||
140 | setgcrefnull(sbx->cowref); | ||
141 | sbx->b = sbx->e = NULL; | ||
142 | } | ||
143 | sbx->r = sbx->w = sbx->b; | ||
144 | } | ||
145 | |||
146 | static LJ_AINLINE void lj_bufx_free(lua_State *L, SBufExt *sbx) | ||
147 | { | ||
148 | if (!sbufiscoworborrow(sbx)) lj_mem_free(G(L), sbx->b, sbufsz(sbx)); | ||
149 | setsbufXL(sbx, L, SBUF_FLAG_EXT); | ||
150 | setgcrefnull(sbx->cowref); | ||
151 | sbx->r = sbx->w = sbx->b = sbx->e = NULL; | ||
152 | } | ||
153 | |||
154 | #if LJ_HASBUFFER && LJ_HASJIT | ||
155 | LJ_FUNC void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *o); | ||
156 | #if LJ_HASFFI | ||
157 | LJ_FUNC MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz); | ||
158 | #endif | ||
159 | #endif | ||
160 | |||
161 | /* Low-level buffer put operations */ | ||
162 | LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len); | ||
163 | #if LJ_HASJIT || LJ_HASFFI | ||
164 | LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c); | ||
165 | #endif | ||
166 | LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s); | ||
167 | |||
168 | static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len) | ||
169 | { | ||
170 | return (char *)memcpy(p, q, len) + len; | ||
171 | } | ||
172 | |||
173 | static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c) | ||
174 | { | ||
175 | char *w = lj_buf_more(sb, 1); | ||
176 | *w++ = (char)c; | ||
177 | sb->w = w; | ||
178 | } | ||
179 | |||
180 | /* High-level buffer put operations */ | ||
181 | LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s); | ||
182 | LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s); | ||
183 | LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s); | ||
184 | LJ_FUNC SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep); | ||
185 | LJ_FUNC SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, | ||
186 | int32_t i, int32_t e); | ||
187 | |||
188 | /* Miscellaneous buffer operations */ | ||
189 | LJ_FUNCA GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb); | ||
190 | LJ_FUNC GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2); | ||
191 | LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp); | ||
192 | |||
193 | static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb) | ||
194 | { | ||
195 | return lj_str_new(L, sb->b, sbuflen(sb)); | ||
196 | } | ||
197 | |||
198 | #endif | ||
diff --git a/src/lj_carith.c b/src/lj_carith.c index 462dbae4..1a2a058f 100644 --- a/src/lj_carith.c +++ b/src/lj_carith.c | |||
@@ -11,10 +11,12 @@ | |||
11 | #include "lj_err.h" | 11 | #include "lj_err.h" |
12 | #include "lj_tab.h" | 12 | #include "lj_tab.h" |
13 | #include "lj_meta.h" | 13 | #include "lj_meta.h" |
14 | #include "lj_ir.h" | ||
14 | #include "lj_ctype.h" | 15 | #include "lj_ctype.h" |
15 | #include "lj_cconv.h" | 16 | #include "lj_cconv.h" |
16 | #include "lj_cdata.h" | 17 | #include "lj_cdata.h" |
17 | #include "lj_carith.h" | 18 | #include "lj_carith.h" |
19 | #include "lj_strscan.h" | ||
18 | 20 | ||
19 | /* -- C data arithmetic --------------------------------------------------- */ | 21 | /* -- C data arithmetic --------------------------------------------------- */ |
20 | 22 | ||
@@ -120,7 +122,7 @@ static int carith_ptr(lua_State *L, CTState *cts, CDArith *ca, MMS mm) | |||
120 | setboolV(L->top-1, ((uintptr_t)pp < (uintptr_t)pp2)); | 122 | setboolV(L->top-1, ((uintptr_t)pp < (uintptr_t)pp2)); |
121 | return 1; | 123 | return 1; |
122 | } else { | 124 | } else { |
123 | lua_assert(mm == MM_le); | 125 | lj_assertL(mm == MM_le, "bad metamethod %d", mm); |
124 | setboolV(L->top-1, ((uintptr_t)pp <= (uintptr_t)pp2)); | 126 | setboolV(L->top-1, ((uintptr_t)pp <= (uintptr_t)pp2)); |
125 | return 1; | 127 | return 1; |
126 | } | 128 | } |
@@ -206,7 +208,9 @@ static int carith_int64(lua_State *L, CTState *cts, CDArith *ca, MMS mm) | |||
206 | *up = lj_carith_powu64(u0, u1); | 208 | *up = lj_carith_powu64(u0, u1); |
207 | break; | 209 | break; |
208 | case MM_unm: *up = (uint64_t)-(int64_t)u0; break; | 210 | case MM_unm: *up = (uint64_t)-(int64_t)u0; break; |
209 | default: lua_assert(0); break; | 211 | default: |
212 | lj_assertL(0, "bad metamethod %d", mm); | ||
213 | break; | ||
210 | } | 214 | } |
211 | lj_gc_check(L); | 215 | lj_gc_check(L); |
212 | return 1; | 216 | return 1; |
@@ -272,6 +276,81 @@ int lj_carith_op(lua_State *L, MMS mm) | |||
272 | return lj_carith_meta(L, cts, &ca, mm); | 276 | return lj_carith_meta(L, cts, &ca, mm); |
273 | } | 277 | } |
274 | 278 | ||
279 | /* -- 64 bit bit operations helpers --------------------------------------- */ | ||
280 | |||
281 | #if LJ_64 | ||
282 | #define B64DEF(name) \ | ||
283 | static LJ_AINLINE uint64_t lj_carith_##name(uint64_t x, int32_t sh) | ||
284 | #else | ||
285 | /* Not inlined on 32 bit archs, since some of these are quite lengthy. */ | ||
286 | #define B64DEF(name) \ | ||
287 | uint64_t LJ_NOINLINE lj_carith_##name(uint64_t x, int32_t sh) | ||
288 | #endif | ||
289 | |||
290 | B64DEF(shl64) { return x << (sh&63); } | ||
291 | B64DEF(shr64) { return x >> (sh&63); } | ||
292 | B64DEF(sar64) { return (uint64_t)((int64_t)x >> (sh&63)); } | ||
293 | B64DEF(rol64) { return lj_rol(x, (sh&63)); } | ||
294 | B64DEF(ror64) { return lj_ror(x, (sh&63)); } | ||
295 | |||
296 | #undef B64DEF | ||
297 | |||
298 | uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op) | ||
299 | { | ||
300 | switch (op) { | ||
301 | case IR_BSHL-IR_BSHL: x = lj_carith_shl64(x, sh); break; | ||
302 | case IR_BSHR-IR_BSHL: x = lj_carith_shr64(x, sh); break; | ||
303 | case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break; | ||
304 | case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break; | ||
305 | case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break; | ||
306 | default: | ||
307 | lj_assertX(0, "bad shift op %d", op); | ||
308 | break; | ||
309 | } | ||
310 | return x; | ||
311 | } | ||
312 | |||
313 | /* Equivalent to lj_lib_checkbit(), but handles cdata. */ | ||
314 | uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id) | ||
315 | { | ||
316 | TValue *o = L->base + narg-1; | ||
317 | if (o >= L->top) { | ||
318 | err: | ||
319 | lj_err_argt(L, narg, LUA_TNUMBER); | ||
320 | } else if (LJ_LIKELY(tvisnumber(o))) { | ||
321 | /* Handled below. */ | ||
322 | } else if (tviscdata(o)) { | ||
323 | CTState *cts = ctype_cts(L); | ||
324 | uint8_t *sp = (uint8_t *)cdataptr(cdataV(o)); | ||
325 | CTypeID sid = cdataV(o)->ctypeid; | ||
326 | CType *s = ctype_get(cts, sid); | ||
327 | uint64_t x; | ||
328 | if (ctype_isref(s->info)) { | ||
329 | sp = *(void **)sp; | ||
330 | sid = ctype_cid(s->info); | ||
331 | } | ||
332 | s = ctype_raw(cts, sid); | ||
333 | if (ctype_isenum(s->info)) s = ctype_child(cts, s); | ||
334 | if ((s->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) == | ||
335 | CTINFO(CT_NUM, CTF_UNSIGNED) && s->size == 8) | ||
336 | *id = CTID_UINT64; /* Use uint64_t, since it has the highest rank. */ | ||
337 | else if (!*id) | ||
338 | *id = CTID_INT64; /* Use int64_t, unless already set. */ | ||
339 | lj_cconv_ct_ct(cts, ctype_get(cts, *id), s, | ||
340 | (uint8_t *)&x, sp, CCF_ARG(narg)); | ||
341 | return x; | ||
342 | } else if (!(tvisstr(o) && lj_strscan_number(strV(o), o))) { | ||
343 | goto err; | ||
344 | } | ||
345 | if (LJ_LIKELY(tvisint(o))) { | ||
346 | return (uint32_t)intV(o); | ||
347 | } else { | ||
348 | int32_t i = lj_num2bit(numV(o)); | ||
349 | if (LJ_DUALNUM) setintV(o, i); | ||
350 | return (uint32_t)i; | ||
351 | } | ||
352 | } | ||
353 | |||
275 | /* -- 64 bit integer arithmetic helpers ----------------------------------- */ | 354 | /* -- 64 bit integer arithmetic helpers ----------------------------------- */ |
276 | 355 | ||
277 | #if LJ_32 && LJ_HASJIT | 356 | #if LJ_32 && LJ_HASJIT |
diff --git a/src/lj_carith.h b/src/lj_carith.h index 269c60ea..9d6b1dc9 100644 --- a/src/lj_carith.h +++ b/src/lj_carith.h | |||
@@ -12,6 +12,16 @@ | |||
12 | 12 | ||
13 | LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); | 13 | LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); |
14 | 14 | ||
15 | #if LJ_32 | ||
16 | LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh); | ||
17 | LJ_FUNC uint64_t lj_carith_shr64(uint64_t x, int32_t sh); | ||
18 | LJ_FUNC uint64_t lj_carith_sar64(uint64_t x, int32_t sh); | ||
19 | LJ_FUNC uint64_t lj_carith_rol64(uint64_t x, int32_t sh); | ||
20 | LJ_FUNC uint64_t lj_carith_ror64(uint64_t x, int32_t sh); | ||
21 | #endif | ||
22 | LJ_FUNC uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op); | ||
23 | LJ_FUNC uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id); | ||
24 | |||
15 | #if LJ_32 && LJ_HASJIT | 25 | #if LJ_32 && LJ_HASJIT |
16 | LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k); | 26 | LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k); |
17 | #endif | 27 | #endif |
diff --git a/src/lj_ccall.c b/src/lj_ccall.c index 4a859c73..25f54dee 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c | |||
@@ -9,7 +9,6 @@ | |||
9 | 9 | ||
10 | #include "lj_gc.h" | 10 | #include "lj_gc.h" |
11 | #include "lj_err.h" | 11 | #include "lj_err.h" |
12 | #include "lj_str.h" | ||
13 | #include "lj_tab.h" | 12 | #include "lj_tab.h" |
14 | #include "lj_ctype.h" | 13 | #include "lj_ctype.h" |
15 | #include "lj_cconv.h" | 14 | #include "lj_cconv.h" |
@@ -291,56 +290,85 @@ | |||
291 | #define CCALL_HANDLE_RET \ | 290 | #define CCALL_HANDLE_RET \ |
292 | if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0]; | 291 | if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0]; |
293 | 292 | ||
294 | #elif LJ_TARGET_PPC | 293 | #elif LJ_TARGET_ARM64 |
295 | /* -- PPC calling conventions --------------------------------------------- */ | 294 | /* -- ARM64 calling conventions ------------------------------------------- */ |
296 | 295 | ||
297 | #define CCALL_HANDLE_STRUCTRET \ | 296 | #define CCALL_HANDLE_STRUCTRET \ |
298 | cc->retref = 1; /* Return all structs by reference. */ \ | 297 | cc->retref = !ccall_classify_struct(cts, ctr); \ |
299 | cc->gpr[ngpr++] = (GPRArg)dp; | 298 | if (cc->retref) cc->retp = dp; |
299 | |||
300 | #define CCALL_HANDLE_STRUCTRET2 \ | ||
301 | unsigned int cl = ccall_classify_struct(cts, ctr); \ | ||
302 | if ((cl & 4)) { /* Combine float HFA from separate registers. */ \ | ||
303 | CTSize i = (cl >> 8) - 1; \ | ||
304 | do { ((uint32_t *)dp)[i] = cc->fpr[i].lo; } while (i--); \ | ||
305 | } else { \ | ||
306 | if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \ | ||
307 | memcpy(dp, sp, ctr->size); \ | ||
308 | } | ||
300 | 309 | ||
301 | #define CCALL_HANDLE_COMPLEXRET \ | 310 | #define CCALL_HANDLE_COMPLEXRET \ |
302 | /* Complex values are returned in 2 or 4 GPRs. */ \ | 311 | /* Complex values are returned in one or two FPRs. */ \ |
303 | cc->retref = 0; | 312 | cc->retref = 0; |
304 | 313 | ||
305 | #define CCALL_HANDLE_COMPLEXRET2 \ | 314 | #define CCALL_HANDLE_COMPLEXRET2 \ |
306 | memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */ | 315 | if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ |
316 | ((float *)dp)[0] = cc->fpr[0].f; \ | ||
317 | ((float *)dp)[1] = cc->fpr[1].f; \ | ||
318 | } else { /* Copy complex double from FPRs. */ \ | ||
319 | ((double *)dp)[0] = cc->fpr[0].d; \ | ||
320 | ((double *)dp)[1] = cc->fpr[1].d; \ | ||
321 | } | ||
307 | 322 | ||
308 | #define CCALL_HANDLE_STRUCTARG \ | 323 | #define CCALL_HANDLE_STRUCTARG \ |
309 | rp = cdataptr(lj_cdata_new(cts, did, sz)); \ | 324 | unsigned int cl = ccall_classify_struct(cts, d); \ |
310 | sz = CTSIZE_PTR; /* Pass all structs by reference. */ | 325 | if (cl == 0) { /* Pass struct by reference. */ \ |
326 | rp = cdataptr(lj_cdata_new(cts, did, sz)); \ | ||
327 | sz = CTSIZE_PTR; \ | ||
328 | } else if (cl > 1) { /* Pass struct in FPRs or on stack. */ \ | ||
329 | isfp = (cl & 4) ? 2 : 1; \ | ||
330 | } /* else: Pass struct in GPRs or on stack. */ | ||
311 | 331 | ||
312 | #define CCALL_HANDLE_COMPLEXARG \ | 332 | #define CCALL_HANDLE_COMPLEXARG \ |
313 | /* Pass complex by value in 2 or 4 GPRs. */ | 333 | /* Pass complex by value in separate (!) FPRs or on stack. */ \ |
334 | isfp = sz == 2*sizeof(float) ? 2 : 1; | ||
314 | 335 | ||
315 | #define CCALL_HANDLE_REGARG \ | 336 | #define CCALL_HANDLE_REGARG \ |
316 | if (isfp) { /* Try to pass argument in FPRs. */ \ | 337 | if (LJ_TARGET_OSX && isva) { \ |
317 | if (nfpr + 1 <= CCALL_NARG_FPR) { \ | 338 | /* IOS: All variadic arguments are on the stack. */ \ |
339 | } else if (isfp) { /* Try to pass argument in FPRs. */ \ | ||
340 | int n2 = ctype_isvector(d->info) ? 1 : \ | ||
341 | isfp == 1 ? n : (d->size >> (4-isfp)); \ | ||
342 | if (nfpr + n2 <= CCALL_NARG_FPR) { \ | ||
318 | dp = &cc->fpr[nfpr]; \ | 343 | dp = &cc->fpr[nfpr]; \ |
319 | nfpr += 1; \ | 344 | nfpr += n2; \ |
320 | d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ | ||
321 | goto done; \ | 345 | goto done; \ |
346 | } else { \ | ||
347 | nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ | ||
348 | if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \ | ||
322 | } \ | 349 | } \ |
323 | } else { /* Try to pass argument in GPRs. */ \ | 350 | } else { /* Try to pass argument in GPRs. */ \ |
324 | if (n > 1) { \ | 351 | if (!LJ_TARGET_OSX && (d->info & CTF_ALIGN) > CTALIGN_PTR) \ |
325 | lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ | 352 | ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ |
326 | if (ctype_isinteger(d->info)) \ | ||
327 | ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ | ||
328 | else if (ngpr + n > maxgpr) \ | ||
329 | ngpr = maxgpr; /* Prevent reordering. */ \ | ||
330 | } \ | ||
331 | if (ngpr + n <= maxgpr) { \ | 353 | if (ngpr + n <= maxgpr) { \ |
332 | dp = &cc->gpr[ngpr]; \ | 354 | dp = &cc->gpr[ngpr]; \ |
333 | ngpr += n; \ | 355 | ngpr += n; \ |
334 | goto done; \ | 356 | goto done; \ |
357 | } else { \ | ||
358 | ngpr = maxgpr; /* Prevent reordering. */ \ | ||
359 | if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \ | ||
335 | } \ | 360 | } \ |
336 | } | 361 | } |
337 | 362 | ||
363 | #if LJ_BE | ||
338 | #define CCALL_HANDLE_RET \ | 364 | #define CCALL_HANDLE_RET \ |
339 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ | 365 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ |
340 | ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ | 366 | sp = (uint8_t *)&cc->fpr[0].f; |
367 | #endif | ||
341 | 368 | ||
342 | #elif LJ_TARGET_PPCSPE | 369 | |
343 | /* -- PPC/SPE calling conventions ----------------------------------------- */ | 370 | #elif LJ_TARGET_PPC |
371 | /* -- PPC calling conventions --------------------------------------------- */ | ||
344 | 372 | ||
345 | #define CCALL_HANDLE_STRUCTRET \ | 373 | #define CCALL_HANDLE_STRUCTRET \ |
346 | cc->retref = 1; /* Return all structs by reference. */ \ | 374 | cc->retref = 1; /* Return all structs by reference. */ \ |
@@ -360,12 +388,13 @@ | |||
360 | #define CCALL_HANDLE_COMPLEXARG \ | 388 | #define CCALL_HANDLE_COMPLEXARG \ |
361 | /* Pass complex by value in 2 or 4 GPRs. */ | 389 | /* Pass complex by value in 2 or 4 GPRs. */ |
362 | 390 | ||
363 | /* PPC/SPE has a softfp ABI. */ | 391 | #define CCALL_HANDLE_GPR \ |
364 | #define CCALL_HANDLE_REGARG \ | 392 | /* Try to pass argument in GPRs. */ \ |
365 | if (n > 1) { /* Doesn't fit in a single GPR? */ \ | 393 | if (n > 1) { \ |
366 | lua_assert(n == 2 || n == 4); /* int64_t, double or complex (float). */ \ | 394 | /* int64_t or complex (float). */ \ |
367 | if (n == 2) \ | 395 | lj_assertL(n == 2 || n == 4, "bad GPR size %d", n); \ |
368 | ngpr = (ngpr + 1u) & ~1u; /* Only align 64 bit value to regpair. */ \ | 396 | if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \ |
397 | ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ | ||
369 | else if (ngpr + n > maxgpr) \ | 398 | else if (ngpr + n > maxgpr) \ |
370 | ngpr = maxgpr; /* Prevent reordering. */ \ | 399 | ngpr = maxgpr; /* Prevent reordering. */ \ |
371 | } \ | 400 | } \ |
@@ -373,10 +402,32 @@ | |||
373 | dp = &cc->gpr[ngpr]; \ | 402 | dp = &cc->gpr[ngpr]; \ |
374 | ngpr += n; \ | 403 | ngpr += n; \ |
375 | goto done; \ | 404 | goto done; \ |
405 | } \ | ||
406 | |||
407 | #if LJ_ABI_SOFTFP | ||
408 | #define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR | ||
409 | #else | ||
410 | #define CCALL_HANDLE_REGARG \ | ||
411 | if (isfp) { /* Try to pass argument in FPRs. */ \ | ||
412 | if (nfpr + 1 <= CCALL_NARG_FPR) { \ | ||
413 | dp = &cc->fpr[nfpr]; \ | ||
414 | nfpr += 1; \ | ||
415 | d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ | ||
416 | goto done; \ | ||
417 | } \ | ||
418 | } else { \ | ||
419 | CCALL_HANDLE_GPR \ | ||
376 | } | 420 | } |
421 | #endif | ||
377 | 422 | ||
378 | #elif LJ_TARGET_MIPS | 423 | #if !LJ_ABI_SOFTFP |
379 | /* -- MIPS calling conventions -------------------------------------------- */ | 424 | #define CCALL_HANDLE_RET \ |
425 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ | ||
426 | ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ | ||
427 | #endif | ||
428 | |||
429 | #elif LJ_TARGET_MIPS32 | ||
430 | /* -- MIPS o32 calling conventions ---------------------------------------- */ | ||
380 | 431 | ||
381 | #define CCALL_HANDLE_STRUCTRET \ | 432 | #define CCALL_HANDLE_STRUCTRET \ |
382 | cc->retref = 1; /* Return all structs by reference. */ \ | 433 | cc->retref = 1; /* Return all structs by reference. */ \ |
@@ -386,6 +437,18 @@ | |||
386 | /* Complex values are returned in 1 or 2 FPRs. */ \ | 437 | /* Complex values are returned in 1 or 2 FPRs. */ \ |
387 | cc->retref = 0; | 438 | cc->retref = 0; |
388 | 439 | ||
440 | #if LJ_ABI_SOFTFP | ||
441 | #define CCALL_HANDLE_COMPLEXRET2 \ | ||
442 | if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \ | ||
443 | ((intptr_t *)dp)[0] = cc->gpr[0]; \ | ||
444 | ((intptr_t *)dp)[1] = cc->gpr[1]; \ | ||
445 | } else { /* Copy complex double from GPRs. */ \ | ||
446 | ((intptr_t *)dp)[0] = cc->gpr[0]; \ | ||
447 | ((intptr_t *)dp)[1] = cc->gpr[1]; \ | ||
448 | ((intptr_t *)dp)[2] = cc->gpr[2]; \ | ||
449 | ((intptr_t *)dp)[3] = cc->gpr[3]; \ | ||
450 | } | ||
451 | #else | ||
389 | #define CCALL_HANDLE_COMPLEXRET2 \ | 452 | #define CCALL_HANDLE_COMPLEXRET2 \ |
390 | if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ | 453 | if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ |
391 | ((float *)dp)[0] = cc->fpr[0].f; \ | 454 | ((float *)dp)[0] = cc->fpr[0].f; \ |
@@ -394,6 +457,7 @@ | |||
394 | ((double *)dp)[0] = cc->fpr[0].d; \ | 457 | ((double *)dp)[0] = cc->fpr[0].d; \ |
395 | ((double *)dp)[1] = cc->fpr[1].d; \ | 458 | ((double *)dp)[1] = cc->fpr[1].d; \ |
396 | } | 459 | } |
460 | #endif | ||
397 | 461 | ||
398 | #define CCALL_HANDLE_STRUCTARG \ | 462 | #define CCALL_HANDLE_STRUCTARG \ |
399 | /* Pass all structs by value in registers and/or on the stack. */ | 463 | /* Pass all structs by value in registers and/or on the stack. */ |
@@ -401,6 +465,22 @@ | |||
401 | #define CCALL_HANDLE_COMPLEXARG \ | 465 | #define CCALL_HANDLE_COMPLEXARG \ |
402 | /* Pass complex by value in 2 or 4 GPRs. */ | 466 | /* Pass complex by value in 2 or 4 GPRs. */ |
403 | 467 | ||
468 | #define CCALL_HANDLE_GPR \ | ||
469 | if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \ | ||
470 | ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ | ||
471 | if (ngpr < maxgpr) { \ | ||
472 | dp = &cc->gpr[ngpr]; \ | ||
473 | if (ngpr + n > maxgpr) { \ | ||
474 | nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ | ||
475 | if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ | ||
476 | ngpr = maxgpr; \ | ||
477 | } else { \ | ||
478 | ngpr += n; \ | ||
479 | } \ | ||
480 | goto done; \ | ||
481 | } | ||
482 | |||
483 | #if !LJ_ABI_SOFTFP /* MIPS32 hard-float */ | ||
404 | #define CCALL_HANDLE_REGARG \ | 484 | #define CCALL_HANDLE_REGARG \ |
405 | if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \ | 485 | if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \ |
406 | /* Try to pass argument in FPRs. */ \ | 486 | /* Try to pass argument in FPRs. */ \ |
@@ -409,25 +489,91 @@ | |||
409 | goto done; \ | 489 | goto done; \ |
410 | } else { /* Try to pass argument in GPRs. */ \ | 490 | } else { /* Try to pass argument in GPRs. */ \ |
411 | nfpr = CCALL_NARG_FPR; \ | 491 | nfpr = CCALL_NARG_FPR; \ |
412 | if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \ | 492 | CCALL_HANDLE_GPR \ |
413 | ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ | 493 | } |
414 | if (ngpr < maxgpr) { \ | 494 | #else /* MIPS32 soft-float */ |
415 | dp = &cc->gpr[ngpr]; \ | 495 | #define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR |
416 | if (ngpr + n > maxgpr) { \ | 496 | #endif |
417 | nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ | 497 | |
418 | if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ | 498 | #if !LJ_ABI_SOFTFP |
419 | ngpr = maxgpr; \ | 499 | /* On MIPS64 soft-float, position of float return values is endian-dependant. */ |
420 | } else { \ | 500 | #define CCALL_HANDLE_RET \ |
421 | ngpr += n; \ | 501 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ |
422 | } \ | 502 | sp = (uint8_t *)&cc->fpr[0].f; |
423 | goto done; \ | 503 | #endif |
424 | } \ | 504 | |
505 | #elif LJ_TARGET_MIPS64 | ||
506 | /* -- MIPS n64 calling conventions ---------------------------------------- */ | ||
507 | |||
508 | #define CCALL_HANDLE_STRUCTRET \ | ||
509 | cc->retref = !(sz <= 16); \ | ||
510 | if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; | ||
511 | |||
512 | #define CCALL_HANDLE_STRUCTRET2 \ | ||
513 | ccall_copy_struct(cc, ctr, dp, sp, ccall_classify_struct(cts, ctr, ct)); | ||
514 | |||
515 | #define CCALL_HANDLE_COMPLEXRET \ | ||
516 | /* Complex values are returned in 1 or 2 FPRs. */ \ | ||
517 | cc->retref = 0; | ||
518 | |||
519 | #if LJ_ABI_SOFTFP /* MIPS64 soft-float */ | ||
520 | |||
521 | #define CCALL_HANDLE_COMPLEXRET2 \ | ||
522 | if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \ | ||
523 | ((intptr_t *)dp)[0] = cc->gpr[0]; \ | ||
524 | } else { /* Copy complex double from GPRs. */ \ | ||
525 | ((intptr_t *)dp)[0] = cc->gpr[0]; \ | ||
526 | ((intptr_t *)dp)[1] = cc->gpr[1]; \ | ||
527 | } | ||
528 | |||
529 | #define CCALL_HANDLE_COMPLEXARG \ | ||
530 | /* Pass complex by value in 2 or 4 GPRs. */ | ||
531 | |||
532 | /* Position of soft-float 'float' return value depends on endianess. */ | ||
533 | #define CCALL_HANDLE_RET \ | ||
534 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ | ||
535 | sp = (uint8_t *)cc->gpr + LJ_ENDIAN_SELECT(0, 4); | ||
536 | |||
537 | #else /* MIPS64 hard-float */ | ||
538 | |||
539 | #define CCALL_HANDLE_COMPLEXRET2 \ | ||
540 | if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ | ||
541 | ((float *)dp)[0] = cc->fpr[0].f; \ | ||
542 | ((float *)dp)[1] = cc->fpr[1].f; \ | ||
543 | } else { /* Copy complex double from FPRs. */ \ | ||
544 | ((double *)dp)[0] = cc->fpr[0].d; \ | ||
545 | ((double *)dp)[1] = cc->fpr[1].d; \ | ||
546 | } | ||
547 | |||
548 | #define CCALL_HANDLE_COMPLEXARG \ | ||
549 | if (sz == 2*sizeof(float)) { \ | ||
550 | isfp = 2; \ | ||
551 | if (ngpr < maxgpr) \ | ||
552 | sz *= 2; \ | ||
425 | } | 553 | } |
426 | 554 | ||
427 | #define CCALL_HANDLE_RET \ | 555 | #define CCALL_HANDLE_RET \ |
428 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ | 556 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ |
429 | sp = (uint8_t *)&cc->fpr[0].f; | 557 | sp = (uint8_t *)&cc->fpr[0].f; |
430 | 558 | ||
559 | #endif | ||
560 | |||
561 | #define CCALL_HANDLE_STRUCTARG \ | ||
562 | /* Pass all structs by value in registers and/or on the stack. */ | ||
563 | |||
564 | #define CCALL_HANDLE_REGARG \ | ||
565 | if (ngpr < maxgpr) { \ | ||
566 | dp = &cc->gpr[ngpr]; \ | ||
567 | if (ngpr + n > maxgpr) { \ | ||
568 | nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ | ||
569 | if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ | ||
570 | ngpr = maxgpr; \ | ||
571 | } else { \ | ||
572 | ngpr += n; \ | ||
573 | } \ | ||
574 | goto done; \ | ||
575 | } | ||
576 | |||
431 | #else | 577 | #else |
432 | #error "Missing calling convention definitions for this architecture" | 578 | #error "Missing calling convention definitions for this architecture" |
433 | #endif | 579 | #endif |
@@ -497,7 +643,8 @@ static void ccall_classify_ct(CTState *cts, CType *ct, int *rcl, CTSize ofs) | |||
497 | ccall_classify_struct(cts, ct, rcl, ofs); | 643 | ccall_classify_struct(cts, ct, rcl, ofs); |
498 | } else { | 644 | } else { |
499 | int cl = ctype_isfp(ct->info) ? CCALL_RCL_SSE : CCALL_RCL_INT; | 645 | int cl = ctype_isfp(ct->info) ? CCALL_RCL_SSE : CCALL_RCL_INT; |
500 | lua_assert(ctype_hassize(ct->info)); | 646 | lj_assertCTS(ctype_hassize(ct->info), |
647 | "classify ctype %08x without size", ct->info); | ||
501 | if ((ofs & (ct->size-1))) cl = CCALL_RCL_MEM; /* Unaligned. */ | 648 | if ((ofs & (ct->size-1))) cl = CCALL_RCL_MEM; /* Unaligned. */ |
502 | rcl[(ofs >= 8)] |= cl; | 649 | rcl[(ofs >= 8)] |= cl; |
503 | } | 650 | } |
@@ -522,12 +669,13 @@ static int ccall_classify_struct(CTState *cts, CType *ct, int *rcl, CTSize ofs) | |||
522 | } | 669 | } |
523 | 670 | ||
524 | /* Try to split up a small struct into registers. */ | 671 | /* Try to split up a small struct into registers. */ |
525 | static int ccall_struct_reg(CCallState *cc, GPRArg *dp, int *rcl) | 672 | static int ccall_struct_reg(CCallState *cc, CTState *cts, GPRArg *dp, int *rcl) |
526 | { | 673 | { |
527 | MSize ngpr = cc->ngpr, nfpr = cc->nfpr; | 674 | MSize ngpr = cc->ngpr, nfpr = cc->nfpr; |
528 | uint32_t i; | 675 | uint32_t i; |
676 | UNUSED(cts); | ||
529 | for (i = 0; i < 2; i++) { | 677 | for (i = 0; i < 2; i++) { |
530 | lua_assert(!(rcl[i] & CCALL_RCL_MEM)); | 678 | lj_assertCTS(!(rcl[i] & CCALL_RCL_MEM), "pass mem struct in reg"); |
531 | if ((rcl[i] & CCALL_RCL_INT)) { /* Integer class takes precedence. */ | 679 | if ((rcl[i] & CCALL_RCL_INT)) { /* Integer class takes precedence. */ |
532 | if (ngpr >= CCALL_NARG_GPR) return 1; /* Register overflow. */ | 680 | if (ngpr >= CCALL_NARG_GPR) return 1; /* Register overflow. */ |
533 | cc->gpr[ngpr++] = dp[i]; | 681 | cc->gpr[ngpr++] = dp[i]; |
@@ -548,7 +696,8 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl, | |||
548 | dp[0] = dp[1] = 0; | 696 | dp[0] = dp[1] = 0; |
549 | /* Convert to temp. struct. */ | 697 | /* Convert to temp. struct. */ |
550 | lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); | 698 | lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); |
551 | if (ccall_struct_reg(cc, dp, rcl)) { /* Register overflow? Pass on stack. */ | 699 | if (ccall_struct_reg(cc, cts, dp, rcl)) { |
700 | /* Register overflow? Pass on stack. */ | ||
552 | MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1; | 701 | MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1; |
553 | if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */ | 702 | if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */ |
554 | cc->nsp = nsp + n; | 703 | cc->nsp = nsp + n; |
@@ -621,6 +770,125 @@ noth: /* Not a homogeneous float/double aggregate. */ | |||
621 | 770 | ||
622 | #endif | 771 | #endif |
623 | 772 | ||
773 | /* -- ARM64 ABI struct classification ------------------------------------- */ | ||
774 | |||
775 | #if LJ_TARGET_ARM64 | ||
776 | |||
777 | /* Classify a struct based on its fields. */ | ||
778 | static unsigned int ccall_classify_struct(CTState *cts, CType *ct) | ||
779 | { | ||
780 | CTSize sz = ct->size; | ||
781 | unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); | ||
782 | while (ct->sib) { | ||
783 | CType *sct; | ||
784 | ct = ctype_get(cts, ct->sib); | ||
785 | if (ctype_isfield(ct->info)) { | ||
786 | sct = ctype_rawchild(cts, ct); | ||
787 | if (ctype_isfp(sct->info)) { | ||
788 | r |= sct->size; | ||
789 | if (!isu) n++; else if (n == 0) n = 1; | ||
790 | } else if (ctype_iscomplex(sct->info)) { | ||
791 | r |= (sct->size >> 1); | ||
792 | if (!isu) n += 2; else if (n < 2) n = 2; | ||
793 | } else if (ctype_isstruct(sct->info)) { | ||
794 | goto substruct; | ||
795 | } else { | ||
796 | goto noth; | ||
797 | } | ||
798 | } else if (ctype_isbitfield(ct->info)) { | ||
799 | goto noth; | ||
800 | } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { | ||
801 | sct = ctype_rawchild(cts, ct); | ||
802 | substruct: | ||
803 | if (sct->size > 0) { | ||
804 | unsigned int s = ccall_classify_struct(cts, sct); | ||
805 | if (s <= 1) goto noth; | ||
806 | r |= (s & 255); | ||
807 | if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); | ||
808 | } | ||
809 | } | ||
810 | } | ||
811 | if ((r == 4 || r == 8) && n <= 4) | ||
812 | return r + (n << 8); | ||
813 | noth: /* Not a homogeneous float/double aggregate. */ | ||
814 | return (sz <= 16); /* Return structs of size <= 16 in GPRs. */ | ||
815 | } | ||
816 | |||
817 | #endif | ||
818 | |||
819 | /* -- MIPS64 ABI struct classification ---------------------------- */ | ||
820 | |||
821 | #if LJ_TARGET_MIPS64 | ||
822 | |||
823 | #define FTYPE_FLOAT 1 | ||
824 | #define FTYPE_DOUBLE 2 | ||
825 | |||
826 | /* Classify FP fields (max. 2) and their types. */ | ||
827 | static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf) | ||
828 | { | ||
829 | int n = 0, ft = 0; | ||
830 | if ((ctf->info & CTF_VARARG) || (ct->info & CTF_UNION)) | ||
831 | goto noth; | ||
832 | while (ct->sib) { | ||
833 | CType *sct; | ||
834 | ct = ctype_get(cts, ct->sib); | ||
835 | if (n == 2) { | ||
836 | goto noth; | ||
837 | } else if (ctype_isfield(ct->info)) { | ||
838 | sct = ctype_rawchild(cts, ct); | ||
839 | if (ctype_isfp(sct->info)) { | ||
840 | ft |= (sct->size == 4 ? FTYPE_FLOAT : FTYPE_DOUBLE) << 2*n; | ||
841 | n++; | ||
842 | } else { | ||
843 | goto noth; | ||
844 | } | ||
845 | } else if (ctype_isbitfield(ct->info) || | ||
846 | ctype_isxattrib(ct->info, CTA_SUBTYPE)) { | ||
847 | goto noth; | ||
848 | } | ||
849 | } | ||
850 | if (n <= 2) | ||
851 | return ft; | ||
852 | noth: /* Not a homogeneous float/double aggregate. */ | ||
853 | return 0; /* Struct is in GPRs. */ | ||
854 | } | ||
855 | |||
856 | static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, | ||
857 | int ft) | ||
858 | { | ||
859 | if (LJ_ABI_SOFTFP ? ft : | ||
860 | ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) { | ||
861 | int i, ofs = 0; | ||
862 | for (i = 0; ft != 0; i++, ft >>= 2) { | ||
863 | if ((ft & 3) == FTYPE_FLOAT) { | ||
864 | #if LJ_ABI_SOFTFP | ||
865 | /* The 2nd FP struct result is in CARG1 (gpr[2]) and not CRET2. */ | ||
866 | memcpy((uint8_t *)dp + ofs, | ||
867 | (uint8_t *)&cc->gpr[2*i] + LJ_ENDIAN_SELECT(0, 4), 4); | ||
868 | #else | ||
869 | *(float *)((uint8_t *)dp + ofs) = cc->fpr[i].f; | ||
870 | #endif | ||
871 | ofs += 4; | ||
872 | } else { | ||
873 | ofs = (ofs + 7) & ~7; /* 64 bit alignment. */ | ||
874 | #if LJ_ABI_SOFTFP | ||
875 | *(intptr_t *)((uint8_t *)dp + ofs) = cc->gpr[2*i]; | ||
876 | #else | ||
877 | *(double *)((uint8_t *)dp + ofs) = cc->fpr[i].d; | ||
878 | #endif | ||
879 | ofs += 8; | ||
880 | } | ||
881 | } | ||
882 | } else { | ||
883 | #if !LJ_ABI_SOFTFP | ||
884 | if (ft) sp = (uint8_t *)&cc->fpr[0]; | ||
885 | #endif | ||
886 | memcpy(dp, sp, ctr->size); | ||
887 | } | ||
888 | } | ||
889 | |||
890 | #endif | ||
891 | |||
624 | /* -- Common C call handling ---------------------------------------------- */ | 892 | /* -- Common C call handling ---------------------------------------------- */ |
625 | 893 | ||
626 | /* Infer the destination CTypeID for a vararg argument. */ | 894 | /* Infer the destination CTypeID for a vararg argument. */ |
@@ -726,7 +994,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, | |||
726 | if (fid) { /* Get argument type from field. */ | 994 | if (fid) { /* Get argument type from field. */ |
727 | CType *ctf = ctype_get(cts, fid); | 995 | CType *ctf = ctype_get(cts, fid); |
728 | fid = ctf->sib; | 996 | fid = ctf->sib; |
729 | lua_assert(ctype_isfield(ctf->info)); | 997 | lj_assertL(ctype_isfield(ctf->info), "field expected"); |
730 | did = ctype_cid(ctf->info); | 998 | did = ctype_cid(ctf->info); |
731 | } else { | 999 | } else { |
732 | if (!(ct->info & CTF_VARARG)) | 1000 | if (!(ct->info & CTF_VARARG)) |
@@ -788,6 +1056,19 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, | |||
788 | *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : | 1056 | *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : |
789 | (int32_t)*(int16_t *)dp; | 1057 | (int32_t)*(int16_t *)dp; |
790 | } | 1058 | } |
1059 | #if LJ_TARGET_ARM64 && LJ_BE | ||
1060 | if (isfp && d->size == sizeof(float)) | ||
1061 | ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */ | ||
1062 | #endif | ||
1063 | #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) | ||
1064 | if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) | ||
1065 | #if LJ_TARGET_MIPS64 | ||
1066 | || (isfp && nsp == 0) | ||
1067 | #endif | ||
1068 | ) && d->size <= 4) { | ||
1069 | *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */ | ||
1070 | } | ||
1071 | #endif | ||
791 | #if LJ_TARGET_X64 && LJ_ABI_WIN | 1072 | #if LJ_TARGET_X64 && LJ_ABI_WIN |
792 | if (isva) { /* Windows/x64 mirrors varargs in both register sets. */ | 1073 | if (isva) { /* Windows/x64 mirrors varargs in both register sets. */ |
793 | if (nfpr == ngpr) | 1074 | if (nfpr == ngpr) |
@@ -803,13 +1084,19 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, | |||
803 | cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ | 1084 | cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ |
804 | cc->fpr[nfpr-2].d[1] = 0; | 1085 | cc->fpr[nfpr-2].d[1] = 0; |
805 | } | 1086 | } |
1087 | #elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP) | ||
1088 | if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) { | ||
1089 | /* Split float HFA or complex float into separate registers. */ | ||
1090 | CTSize i = (sz >> 2) - 1; | ||
1091 | do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--); | ||
1092 | } | ||
806 | #else | 1093 | #else |
807 | UNUSED(isfp); | 1094 | UNUSED(isfp); |
808 | #endif | 1095 | #endif |
809 | } | 1096 | } |
810 | if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ | 1097 | if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ |
811 | 1098 | ||
812 | #if LJ_TARGET_X64 || LJ_TARGET_PPC | 1099 | #if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) |
813 | cc->nfpr = nfpr; /* Required for vararg functions. */ | 1100 | cc->nfpr = nfpr; /* Required for vararg functions. */ |
814 | #endif | 1101 | #endif |
815 | cc->nsp = nsp; | 1102 | cc->nsp = nsp; |
@@ -844,7 +1131,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct, | |||
844 | CCALL_HANDLE_COMPLEXRET2 | 1131 | CCALL_HANDLE_COMPLEXRET2 |
845 | return 1; /* One GC step. */ | 1132 | return 1; /* One GC step. */ |
846 | } | 1133 | } |
847 | if (LJ_BE && ctype_isinteger_or_bool(ctr->info) && ctr->size < CTSIZE_PTR) | 1134 | if (LJ_BE && ctr->size < CTSIZE_PTR && |
1135 | (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info))) | ||
848 | sp += (CTSIZE_PTR - ctr->size); | 1136 | sp += (CTSIZE_PTR - ctr->size); |
849 | #if CCALL_NUM_FPR | 1137 | #if CCALL_NUM_FPR |
850 | if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info)) | 1138 | if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info)) |
@@ -854,7 +1142,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct, | |||
854 | CCALL_HANDLE_RET | 1142 | CCALL_HANDLE_RET |
855 | #endif | 1143 | #endif |
856 | /* No reference types end up here, so there's no need for the CTypeID. */ | 1144 | /* No reference types end up here, so there's no need for the CTypeID. */ |
857 | lua_assert(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info))); | 1145 | lj_assertL(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info)), |
1146 | "unexpected reference ctype"); | ||
858 | return lj_cconv_tv_ct(cts, ctr, 0, L->top-1, sp); | 1147 | return lj_cconv_tv_ct(cts, ctr, 0, L->top-1, sp); |
859 | } | 1148 | } |
860 | 1149 | ||
@@ -878,7 +1167,7 @@ int lj_ccall_func(lua_State *L, GCcdata *cd) | |||
878 | lj_vm_ffi_call(&cc); | 1167 | lj_vm_ffi_call(&cc); |
879 | if (cts->cb.slot != ~0u) { /* Blacklist function that called a callback. */ | 1168 | if (cts->cb.slot != ~0u) { /* Blacklist function that called a callback. */ |
880 | TValue tv; | 1169 | TValue tv; |
881 | setlightudV(&tv, (void *)cc.func); | 1170 | tv.u64 = ((uintptr_t)(void *)cc.func >> 2) | U64x(800000000, 00000000); |
882 | setboolV(lj_tab_set(L, cts->miscmap, &tv), 1); | 1171 | setboolV(lj_tab_set(L, cts->miscmap, &tv), 1); |
883 | } | 1172 | } |
884 | ct = (CType *)((intptr_t)ct+(intptr_t)cts->tab); /* May be reallocated. */ | 1173 | ct = (CType *)((intptr_t)ct+(intptr_t)cts->tab); /* May be reallocated. */ |
diff --git a/src/lj_ccall.h b/src/lj_ccall.h index b46483f1..0b3c5244 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h | |||
@@ -68,35 +68,56 @@ typedef union FPRArg { | |||
68 | float f[2]; | 68 | float f[2]; |
69 | } FPRArg; | 69 | } FPRArg; |
70 | 70 | ||
71 | #elif LJ_TARGET_PPC | 71 | #elif LJ_TARGET_ARM64 |
72 | 72 | ||
73 | #define CCALL_NARG_GPR 8 | 73 | #define CCALL_NARG_GPR 8 |
74 | #define CCALL_NRET_GPR 2 | ||
74 | #define CCALL_NARG_FPR 8 | 75 | #define CCALL_NARG_FPR 8 |
76 | #define CCALL_NRET_FPR 4 | ||
77 | #define CCALL_SPS_FREE 0 | ||
78 | |||
79 | typedef intptr_t GPRArg; | ||
80 | typedef union FPRArg { | ||
81 | double d; | ||
82 | struct { LJ_ENDIAN_LOHI(float f; , float g;) }; | ||
83 | struct { LJ_ENDIAN_LOHI(uint32_t lo; , uint32_t hi;) }; | ||
84 | } FPRArg; | ||
85 | |||
86 | #elif LJ_TARGET_PPC | ||
87 | |||
88 | #define CCALL_NARG_GPR 8 | ||
89 | #define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8) | ||
75 | #define CCALL_NRET_GPR 4 /* For complex double. */ | 90 | #define CCALL_NRET_GPR 4 /* For complex double. */ |
76 | #define CCALL_NRET_FPR 1 | 91 | #define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1) |
77 | #define CCALL_SPS_EXTRA 4 | 92 | #define CCALL_SPS_EXTRA 4 |
78 | #define CCALL_SPS_FREE 0 | 93 | #define CCALL_SPS_FREE 0 |
79 | 94 | ||
80 | typedef intptr_t GPRArg; | 95 | typedef intptr_t GPRArg; |
81 | typedef double FPRArg; | 96 | typedef double FPRArg; |
82 | 97 | ||
83 | #elif LJ_TARGET_PPCSPE | 98 | #elif LJ_TARGET_MIPS32 |
84 | 99 | ||
85 | #define CCALL_NARG_GPR 8 | 100 | #define CCALL_NARG_GPR 4 |
86 | #define CCALL_NARG_FPR 0 | 101 | #define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 2) |
87 | #define CCALL_NRET_GPR 4 /* For softfp complex double. */ | 102 | #define CCALL_NRET_GPR (LJ_ABI_SOFTFP ? 4 : 2) |
88 | #define CCALL_NRET_FPR 0 | 103 | #define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2) |
89 | #define CCALL_SPS_FREE 0 /* NYI */ | 104 | #define CCALL_SPS_EXTRA 7 |
105 | #define CCALL_SPS_FREE 1 | ||
90 | 106 | ||
91 | typedef intptr_t GPRArg; | 107 | typedef intptr_t GPRArg; |
108 | typedef union FPRArg { | ||
109 | double d; | ||
110 | struct { LJ_ENDIAN_LOHI(float f; , float g;) }; | ||
111 | } FPRArg; | ||
92 | 112 | ||
93 | #elif LJ_TARGET_MIPS | 113 | #elif LJ_TARGET_MIPS64 |
94 | 114 | ||
95 | #define CCALL_NARG_GPR 4 | 115 | /* FP args are positional and overlay the GPR array. */ |
96 | #define CCALL_NARG_FPR 2 | 116 | #define CCALL_NARG_GPR 8 |
117 | #define CCALL_NARG_FPR 0 | ||
97 | #define CCALL_NRET_GPR 2 | 118 | #define CCALL_NRET_GPR 2 |
98 | #define CCALL_NRET_FPR 2 | 119 | #define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2) |
99 | #define CCALL_SPS_EXTRA 7 | 120 | #define CCALL_SPS_EXTRA 3 |
100 | #define CCALL_SPS_FREE 1 | 121 | #define CCALL_SPS_FREE 1 |
101 | 122 | ||
102 | typedef intptr_t GPRArg; | 123 | typedef intptr_t GPRArg; |
@@ -145,6 +166,8 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { | |||
145 | uint8_t nfpr; /* Number of arguments in FPRs. */ | 166 | uint8_t nfpr; /* Number of arguments in FPRs. */ |
146 | #elif LJ_TARGET_X86 | 167 | #elif LJ_TARGET_X86 |
147 | uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ | 168 | uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ |
169 | #elif LJ_TARGET_ARM64 | ||
170 | void *retp; /* Aggregate return pointer in x8. */ | ||
148 | #elif LJ_TARGET_PPC | 171 | #elif LJ_TARGET_PPC |
149 | uint8_t nfpr; /* Number of arguments in FPRs. */ | 172 | uint8_t nfpr; /* Number of arguments in FPRs. */ |
150 | #endif | 173 | #endif |
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index 5a6785c6..43e44305 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c | |||
@@ -27,7 +27,7 @@ | |||
27 | 27 | ||
28 | #if LJ_OS_NOJIT | 28 | #if LJ_OS_NOJIT |
29 | 29 | ||
30 | /* Disabled callback support. */ | 30 | /* Callbacks disabled. */ |
31 | #define CALLBACK_SLOT2OFS(slot) (0*(slot)) | 31 | #define CALLBACK_SLOT2OFS(slot) (0*(slot)) |
32 | #define CALLBACK_OFS2SLOT(ofs) (0*(ofs)) | 32 | #define CALLBACK_OFS2SLOT(ofs) (0*(ofs)) |
33 | #define CALLBACK_MAX_SLOT 0 | 33 | #define CALLBACK_MAX_SLOT 0 |
@@ -35,7 +35,7 @@ | |||
35 | #elif LJ_TARGET_X86ORX64 | 35 | #elif LJ_TARGET_X86ORX64 |
36 | 36 | ||
37 | #define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0) | 37 | #define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0) |
38 | #define CALLBACK_MCODE_GROUP (-2+1+2+5+(LJ_64 ? 6 : 5)) | 38 | #define CALLBACK_MCODE_GROUP (-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5)) |
39 | 39 | ||
40 | #define CALLBACK_SLOT2OFS(slot) \ | 40 | #define CALLBACK_SLOT2OFS(slot) \ |
41 | (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot)) | 41 | (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot)) |
@@ -54,23 +54,22 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) | |||
54 | #elif LJ_TARGET_ARM | 54 | #elif LJ_TARGET_ARM |
55 | 55 | ||
56 | #define CALLBACK_MCODE_HEAD 32 | 56 | #define CALLBACK_MCODE_HEAD 32 |
57 | #define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) | 57 | |
58 | #define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) | 58 | #elif LJ_TARGET_ARM64 |
59 | #define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) | 59 | |
60 | #define CALLBACK_MCODE_HEAD 32 | ||
60 | 61 | ||
61 | #elif LJ_TARGET_PPC | 62 | #elif LJ_TARGET_PPC |
62 | 63 | ||
63 | #define CALLBACK_MCODE_HEAD 24 | 64 | #define CALLBACK_MCODE_HEAD 24 |
64 | #define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) | ||
65 | #define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) | ||
66 | #define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) | ||
67 | 65 | ||
68 | #elif LJ_TARGET_MIPS | 66 | #elif LJ_TARGET_MIPS32 |
69 | 67 | ||
70 | #define CALLBACK_MCODE_HEAD 24 | 68 | #define CALLBACK_MCODE_HEAD 20 |
71 | #define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) | 69 | |
72 | #define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) | 70 | #elif LJ_TARGET_MIPS64 |
73 | #define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) | 71 | |
72 | #define CALLBACK_MCODE_HEAD 52 | ||
74 | 73 | ||
75 | #else | 74 | #else |
76 | 75 | ||
@@ -81,6 +80,12 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) | |||
81 | 80 | ||
82 | #endif | 81 | #endif |
83 | 82 | ||
83 | #ifndef CALLBACK_SLOT2OFS | ||
84 | #define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) | ||
85 | #define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) | ||
86 | #define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) | ||
87 | #endif | ||
88 | |||
84 | /* Convert callback slot number to callback function pointer. */ | 89 | /* Convert callback slot number to callback function pointer. */ |
85 | static void *callback_slot2ptr(CTState *cts, MSize slot) | 90 | static void *callback_slot2ptr(CTState *cts, MSize slot) |
86 | { | 91 | { |
@@ -102,9 +107,9 @@ MSize lj_ccallback_ptr2slot(CTState *cts, void *p) | |||
102 | /* Initialize machine code for callback function pointers. */ | 107 | /* Initialize machine code for callback function pointers. */ |
103 | #if LJ_OS_NOJIT | 108 | #if LJ_OS_NOJIT |
104 | /* Disabled callback support. */ | 109 | /* Disabled callback support. */ |
105 | #define callback_mcode_init(g, p) UNUSED(p) | 110 | #define callback_mcode_init(g, p) (p) |
106 | #elif LJ_TARGET_X86ORX64 | 111 | #elif LJ_TARGET_X86ORX64 |
107 | static void callback_mcode_init(global_State *g, uint8_t *page) | 112 | static void *callback_mcode_init(global_State *g, uint8_t *page) |
108 | { | 113 | { |
109 | uint8_t *p = page; | 114 | uint8_t *p = page; |
110 | uint8_t *target = (uint8_t *)(void *)lj_vm_ffi_callback; | 115 | uint8_t *target = (uint8_t *)(void *)lj_vm_ffi_callback; |
@@ -119,8 +124,13 @@ static void callback_mcode_init(global_State *g, uint8_t *page) | |||
119 | /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */ | 124 | /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */ |
120 | *p++ = XI_PUSH + RID_EBP; | 125 | *p++ = XI_PUSH + RID_EBP; |
121 | *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8); | 126 | *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8); |
127 | #if LJ_GC64 | ||
128 | *p++ = 0x48; *p++ = XI_MOVri | RID_EBP; | ||
129 | *(uint64_t *)p = (uint64_t)(g); p += 8; | ||
130 | #else | ||
122 | *p++ = XI_MOVri | RID_EBP; | 131 | *p++ = XI_MOVri | RID_EBP; |
123 | *(int32_t *)p = i32ptr(g); p += 4; | 132 | *(int32_t *)p = i32ptr(g); p += 4; |
133 | #endif | ||
124 | #if LJ_64 | 134 | #if LJ_64 |
125 | /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */ | 135 | /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */ |
126 | *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; | 136 | *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; |
@@ -133,10 +143,10 @@ static void callback_mcode_init(global_State *g, uint8_t *page) | |||
133 | *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2); | 143 | *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2); |
134 | } | 144 | } |
135 | } | 145 | } |
136 | lua_assert(p - page <= CALLBACK_MCODE_SIZE); | 146 | return p; |
137 | } | 147 | } |
138 | #elif LJ_TARGET_ARM | 148 | #elif LJ_TARGET_ARM |
139 | static void callback_mcode_init(global_State *g, uint32_t *page) | 149 | static void *callback_mcode_init(global_State *g, uint32_t *page) |
140 | { | 150 | { |
141 | uint32_t *p = page; | 151 | uint32_t *p = page; |
142 | void *target = (void *)lj_vm_ffi_callback; | 152 | void *target = (void *)lj_vm_ffi_callback; |
@@ -155,10 +165,30 @@ static void callback_mcode_init(global_State *g, uint32_t *page) | |||
155 | *p = ARMI_B | ((page-p-2) & 0x00ffffffu); | 165 | *p = ARMI_B | ((page-p-2) & 0x00ffffffu); |
156 | p++; | 166 | p++; |
157 | } | 167 | } |
158 | lua_assert(p - page <= CALLBACK_MCODE_SIZE); | 168 | return p; |
169 | } | ||
170 | #elif LJ_TARGET_ARM64 | ||
171 | static void *callback_mcode_init(global_State *g, uint32_t *page) | ||
172 | { | ||
173 | uint32_t *p = page; | ||
174 | void *target = (void *)lj_vm_ffi_callback; | ||
175 | MSize slot; | ||
176 | *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4)); | ||
177 | *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5)); | ||
178 | *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11)); | ||
179 | *p++ = A64I_LE(A64I_NOP); | ||
180 | ((void **)p)[0] = target; | ||
181 | ((void **)p)[1] = g; | ||
182 | p += 4; | ||
183 | for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { | ||
184 | *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot)); | ||
185 | *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu)); | ||
186 | p++; | ||
187 | } | ||
188 | return p; | ||
159 | } | 189 | } |
160 | #elif LJ_TARGET_PPC | 190 | #elif LJ_TARGET_PPC |
161 | static void callback_mcode_init(global_State *g, uint32_t *page) | 191 | static void *callback_mcode_init(global_State *g, uint32_t *page) |
162 | { | 192 | { |
163 | uint32_t *p = page; | 193 | uint32_t *p = page; |
164 | void *target = (void *)lj_vm_ffi_callback; | 194 | void *target = (void *)lj_vm_ffi_callback; |
@@ -174,30 +204,43 @@ static void callback_mcode_init(global_State *g, uint32_t *page) | |||
174 | *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2); | 204 | *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2); |
175 | p++; | 205 | p++; |
176 | } | 206 | } |
177 | lua_assert(p - page <= CALLBACK_MCODE_SIZE); | 207 | return p; |
178 | } | 208 | } |
179 | #elif LJ_TARGET_MIPS | 209 | #elif LJ_TARGET_MIPS |
180 | static void callback_mcode_init(global_State *g, uint32_t *page) | 210 | static void *callback_mcode_init(global_State *g, uint32_t *page) |
181 | { | 211 | { |
182 | uint32_t *p = page; | 212 | uint32_t *p = page; |
183 | void *target = (void *)lj_vm_ffi_callback; | 213 | uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; |
214 | uintptr_t ug = (uintptr_t)(void *)g; | ||
184 | MSize slot; | 215 | MSize slot; |
185 | *p++ = MIPSI_SW | MIPSF_T(RID_R1)|MIPSF_S(RID_SP) | 0; | 216 | #if LJ_TARGET_MIPS32 |
186 | *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (u32ptr(target) >> 16); | 217 | *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 16); |
187 | *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (u32ptr(g) >> 16); | 218 | *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 16); |
188 | *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) |(u32ptr(target)&0xffff); | 219 | #else |
220 | *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 48); | ||
221 | *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 48); | ||
222 | *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 32) & 0xffff); | ||
223 | *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 32) & 0xffff); | ||
224 | *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16); | ||
225 | *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16); | ||
226 | *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 16) & 0xffff); | ||
227 | *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 16) & 0xffff); | ||
228 | *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16); | ||
229 | *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16); | ||
230 | #endif | ||
231 | *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | (target & 0xffff); | ||
189 | *p++ = MIPSI_JR | MIPSF_S(RID_R3); | 232 | *p++ = MIPSI_JR | MIPSF_S(RID_R3); |
190 | *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (u32ptr(g)&0xffff); | 233 | *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (ug & 0xffff); |
191 | for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { | 234 | for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { |
192 | *p = MIPSI_B | ((page-p-1) & 0x0000ffffu); | 235 | *p = MIPSI_B | ((page-p-1) & 0x0000ffffu); |
193 | p++; | 236 | p++; |
194 | *p++ = MIPSI_LI | MIPSF_T(RID_R1) | slot; | 237 | *p++ = MIPSI_LI | MIPSF_T(RID_R1) | slot; |
195 | } | 238 | } |
196 | lua_assert(p - page <= CALLBACK_MCODE_SIZE); | 239 | return p; |
197 | } | 240 | } |
198 | #else | 241 | #else |
199 | /* Missing support for this architecture. */ | 242 | /* Missing support for this architecture. */ |
200 | #define callback_mcode_init(g, p) UNUSED(p) | 243 | #define callback_mcode_init(g, p) (p) |
201 | #endif | 244 | #endif |
202 | 245 | ||
203 | /* -- Machine code management --------------------------------------------- */ | 246 | /* -- Machine code management --------------------------------------------- */ |
@@ -213,6 +256,11 @@ static void callback_mcode_init(global_State *g, uint32_t *page) | |||
213 | #ifndef MAP_ANONYMOUS | 256 | #ifndef MAP_ANONYMOUS |
214 | #define MAP_ANONYMOUS MAP_ANON | 257 | #define MAP_ANONYMOUS MAP_ANON |
215 | #endif | 258 | #endif |
259 | #ifdef PROT_MPROTECT | ||
260 | #define CCPROT_CREATE (PROT_MPROTECT(PROT_EXEC)) | ||
261 | #else | ||
262 | #define CCPROT_CREATE 0 | ||
263 | #endif | ||
216 | 264 | ||
217 | #endif | 265 | #endif |
218 | 266 | ||
@@ -220,15 +268,15 @@ static void callback_mcode_init(global_State *g, uint32_t *page) | |||
220 | static void callback_mcode_new(CTState *cts) | 268 | static void callback_mcode_new(CTState *cts) |
221 | { | 269 | { |
222 | size_t sz = (size_t)CALLBACK_MCODE_SIZE; | 270 | size_t sz = (size_t)CALLBACK_MCODE_SIZE; |
223 | void *p; | 271 | void *p, *pe; |
224 | if (CALLBACK_MAX_SLOT == 0) | 272 | if (CALLBACK_MAX_SLOT == 0) |
225 | lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); | 273 | lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); |
226 | #if LJ_TARGET_WINDOWS | 274 | #if LJ_TARGET_WINDOWS |
227 | p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); | 275 | p = LJ_WIN_VALLOC(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); |
228 | if (!p) | 276 | if (!p) |
229 | lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); | 277 | lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); |
230 | #elif LJ_TARGET_POSIX | 278 | #elif LJ_TARGET_POSIX |
231 | p = mmap(NULL, sz, (PROT_READ|PROT_WRITE), MAP_PRIVATE|MAP_ANONYMOUS, | 279 | p = mmap(NULL, sz, (PROT_READ|PROT_WRITE|CCPROT_CREATE), MAP_PRIVATE|MAP_ANONYMOUS, |
232 | -1, 0); | 280 | -1, 0); |
233 | if (p == MAP_FAILED) | 281 | if (p == MAP_FAILED) |
234 | lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); | 282 | lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); |
@@ -237,12 +285,15 @@ static void callback_mcode_new(CTState *cts) | |||
237 | p = lj_mem_new(cts->L, sz); | 285 | p = lj_mem_new(cts->L, sz); |
238 | #endif | 286 | #endif |
239 | cts->cb.mcode = p; | 287 | cts->cb.mcode = p; |
240 | callback_mcode_init(cts->g, p); | 288 | pe = callback_mcode_init(cts->g, p); |
289 | UNUSED(pe); | ||
290 | lj_assertCTS((size_t)((char *)pe - (char *)p) <= sz, | ||
291 | "miscalculated CALLBACK_MAX_SLOT"); | ||
241 | lj_mcode_sync(p, (char *)p + sz); | 292 | lj_mcode_sync(p, (char *)p + sz); |
242 | #if LJ_TARGET_WINDOWS | 293 | #if LJ_TARGET_WINDOWS |
243 | { | 294 | { |
244 | DWORD oprot; | 295 | DWORD oprot; |
245 | VirtualProtect(p, sz, PAGE_EXECUTE_READ, &oprot); | 296 | LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot); |
246 | } | 297 | } |
247 | #elif LJ_TARGET_POSIX | 298 | #elif LJ_TARGET_POSIX |
248 | mprotect(p, sz, (PROT_READ|PROT_EXEC)); | 299 | mprotect(p, sz, (PROT_READ|PROT_EXEC)); |
@@ -351,33 +402,78 @@ void lj_ccallback_mcode_free(CTState *cts) | |||
351 | goto done; \ | 402 | goto done; \ |
352 | } CALLBACK_HANDLE_REGARG_FP2 | 403 | } CALLBACK_HANDLE_REGARG_FP2 |
353 | 404 | ||
354 | #elif LJ_TARGET_PPC | 405 | #elif LJ_TARGET_ARM64 |
355 | 406 | ||
356 | #define CALLBACK_HANDLE_REGARG \ | 407 | #define CALLBACK_HANDLE_REGARG \ |
357 | if (isfp) { \ | 408 | if (isfp) { \ |
358 | if (nfpr + 1 <= CCALL_NARG_FPR) { \ | 409 | if (nfpr + n <= CCALL_NARG_FPR) { \ |
359 | sp = &cts->cb.fpr[nfpr++]; \ | 410 | sp = &cts->cb.fpr[nfpr]; \ |
360 | cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ | 411 | nfpr += n; \ |
361 | goto done; \ | 412 | goto done; \ |
413 | } else { \ | ||
414 | nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ | ||
362 | } \ | 415 | } \ |
363 | } else { /* Try to pass argument in GPRs. */ \ | 416 | } else { \ |
364 | if (n > 1) { \ | 417 | if (!LJ_TARGET_OSX && n > 1) \ |
365 | lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \ | 418 | ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ |
366 | ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ | ||
367 | } \ | ||
368 | if (ngpr + n <= maxgpr) { \ | 419 | if (ngpr + n <= maxgpr) { \ |
369 | sp = &cts->cb.gpr[ngpr]; \ | 420 | sp = &cts->cb.gpr[ngpr]; \ |
370 | ngpr += n; \ | 421 | ngpr += n; \ |
371 | goto done; \ | 422 | goto done; \ |
423 | } else { \ | ||
424 | ngpr = CCALL_NARG_GPR; /* Prevent reordering. */ \ | ||
425 | } \ | ||
426 | } | ||
427 | |||
428 | #elif LJ_TARGET_PPC | ||
429 | |||
430 | #define CALLBACK_HANDLE_GPR \ | ||
431 | if (n > 1) { \ | ||
432 | lj_assertCTS(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \ | ||
433 | ctype_isinteger(cta->info)) && n == 2, /* int64_t. */ \ | ||
434 | "bad GPR type"); \ | ||
435 | ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ | ||
436 | } \ | ||
437 | if (ngpr + n <= maxgpr) { \ | ||
438 | sp = &cts->cb.gpr[ngpr]; \ | ||
439 | ngpr += n; \ | ||
440 | goto done; \ | ||
441 | } | ||
442 | |||
443 | #if LJ_ABI_SOFTFP | ||
444 | #define CALLBACK_HANDLE_REGARG \ | ||
445 | CALLBACK_HANDLE_GPR \ | ||
446 | UNUSED(isfp); | ||
447 | #else | ||
448 | #define CALLBACK_HANDLE_REGARG \ | ||
449 | if (isfp) { \ | ||
450 | if (nfpr + 1 <= CCALL_NARG_FPR) { \ | ||
451 | sp = &cts->cb.fpr[nfpr++]; \ | ||
452 | cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ | ||
453 | goto done; \ | ||
372 | } \ | 454 | } \ |
455 | } else { /* Try to pass argument in GPRs. */ \ | ||
456 | CALLBACK_HANDLE_GPR \ | ||
373 | } | 457 | } |
458 | #endif | ||
374 | 459 | ||
460 | #if !LJ_ABI_SOFTFP | ||
375 | #define CALLBACK_HANDLE_RET \ | 461 | #define CALLBACK_HANDLE_RET \ |
376 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ | 462 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ |
377 | *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ | 463 | *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ |
464 | #endif | ||
378 | 465 | ||
379 | #elif LJ_TARGET_MIPS | 466 | #elif LJ_TARGET_MIPS32 |
380 | 467 | ||
468 | #define CALLBACK_HANDLE_GPR \ | ||
469 | if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ | ||
470 | if (ngpr + n <= maxgpr) { \ | ||
471 | sp = &cts->cb.gpr[ngpr]; \ | ||
472 | ngpr += n; \ | ||
473 | goto done; \ | ||
474 | } | ||
475 | |||
476 | #if !LJ_ABI_SOFTFP /* MIPS32 hard-float */ | ||
381 | #define CALLBACK_HANDLE_REGARG \ | 477 | #define CALLBACK_HANDLE_REGARG \ |
382 | if (isfp && nfpr < CCALL_NARG_FPR) { /* Try to pass argument in FPRs. */ \ | 478 | if (isfp && nfpr < CCALL_NARG_FPR) { /* Try to pass argument in FPRs. */ \ |
383 | sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \ | 479 | sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \ |
@@ -385,13 +481,36 @@ void lj_ccallback_mcode_free(CTState *cts) | |||
385 | goto done; \ | 481 | goto done; \ |
386 | } else { /* Try to pass argument in GPRs. */ \ | 482 | } else { /* Try to pass argument in GPRs. */ \ |
387 | nfpr = CCALL_NARG_FPR; \ | 483 | nfpr = CCALL_NARG_FPR; \ |
388 | if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ | 484 | CALLBACK_HANDLE_GPR \ |
389 | if (ngpr + n <= maxgpr) { \ | 485 | } |
390 | sp = &cts->cb.gpr[ngpr]; \ | 486 | #else /* MIPS32 soft-float */ |
391 | ngpr += n; \ | 487 | #define CALLBACK_HANDLE_REGARG \ |
392 | goto done; \ | 488 | CALLBACK_HANDLE_GPR \ |
393 | } \ | 489 | UNUSED(isfp); |
490 | #endif | ||
491 | |||
492 | #define CALLBACK_HANDLE_RET \ | ||
493 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ | ||
494 | ((float *)dp)[1] = *(float *)dp; | ||
495 | |||
496 | #elif LJ_TARGET_MIPS64 | ||
497 | |||
498 | #if !LJ_ABI_SOFTFP /* MIPS64 hard-float */ | ||
499 | #define CALLBACK_HANDLE_REGARG \ | ||
500 | if (ngpr + n <= maxgpr) { \ | ||
501 | sp = isfp ? (void*) &cts->cb.fpr[ngpr] : (void*) &cts->cb.gpr[ngpr]; \ | ||
502 | ngpr += n; \ | ||
503 | goto done; \ | ||
394 | } | 504 | } |
505 | #else /* MIPS64 soft-float */ | ||
506 | #define CALLBACK_HANDLE_REGARG \ | ||
507 | if (ngpr + n <= maxgpr) { \ | ||
508 | UNUSED(isfp); \ | ||
509 | sp = (void*) &cts->cb.gpr[ngpr]; \ | ||
510 | ngpr += n; \ | ||
511 | goto done; \ | ||
512 | } | ||
513 | #endif | ||
395 | 514 | ||
396 | #define CALLBACK_HANDLE_RET \ | 515 | #define CALLBACK_HANDLE_RET \ |
397 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ | 516 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ |
@@ -411,6 +530,7 @@ static void callback_conv_args(CTState *cts, lua_State *L) | |||
411 | int gcsteps = 0; | 530 | int gcsteps = 0; |
412 | CType *ct; | 531 | CType *ct; |
413 | GCfunc *fn; | 532 | GCfunc *fn; |
533 | int fntp; | ||
414 | MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR; | 534 | MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR; |
415 | #if CCALL_NARG_FPR | 535 | #if CCALL_NARG_FPR |
416 | MSize nfpr = 0; | 536 | MSize nfpr = 0; |
@@ -421,18 +541,27 @@ static void callback_conv_args(CTState *cts, lua_State *L) | |||
421 | 541 | ||
422 | if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) { | 542 | if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) { |
423 | ct = ctype_get(cts, id); | 543 | ct = ctype_get(cts, id); |
424 | rid = ctype_cid(ct->info); | 544 | rid = ctype_cid(ct->info); /* Return type. x86: +(spadj<<16). */ |
425 | fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot)); | 545 | fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot)); |
546 | fntp = LJ_TFUNC; | ||
426 | } else { /* Must set up frame first, before throwing the error. */ | 547 | } else { /* Must set up frame first, before throwing the error. */ |
427 | ct = NULL; | 548 | ct = NULL; |
428 | rid = 0; | 549 | rid = 0; |
429 | fn = (GCfunc *)L; | 550 | fn = (GCfunc *)L; |
551 | fntp = LJ_TTHREAD; | ||
552 | } | ||
553 | /* Continuation returns from callback. */ | ||
554 | if (LJ_FR2) { | ||
555 | (o++)->u64 = LJ_CONT_FFI_CALLBACK; | ||
556 | (o++)->u64 = rid; | ||
557 | } else { | ||
558 | o->u32.lo = LJ_CONT_FFI_CALLBACK; | ||
559 | o->u32.hi = rid; | ||
560 | o++; | ||
430 | } | 561 | } |
431 | o->u32.lo = LJ_CONT_FFI_CALLBACK; /* Continuation returns from callback. */ | 562 | setframe_gc(o, obj2gco(fn), fntp); |
432 | o->u32.hi = rid; /* Return type. x86: +(spadj<<16). */ | 563 | if (LJ_FR2) o++; |
433 | o++; | 564 | setframe_ftsz(o, ((char *)(o+1) - (char *)L->base) + FRAME_CONT); |
434 | setframe_gc(o, obj2gco(fn)); | ||
435 | setframe_ftsz(o, (int)((char *)(o+1) - (char *)L->base) + FRAME_CONT); | ||
436 | L->top = L->base = ++o; | 565 | L->top = L->base = ++o; |
437 | if (!ct) | 566 | if (!ct) |
438 | lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK); | 567 | lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK); |
@@ -459,7 +588,7 @@ static void callback_conv_args(CTState *cts, lua_State *L) | |||
459 | CTSize sz; | 588 | CTSize sz; |
460 | int isfp; | 589 | int isfp; |
461 | MSize n; | 590 | MSize n; |
462 | lua_assert(ctype_isfield(ctf->info)); | 591 | lj_assertCTS(ctype_isfield(ctf->info), "field expected"); |
463 | cta = ctype_rawchild(cts, ctf); | 592 | cta = ctype_rawchild(cts, ctf); |
464 | isfp = ctype_isfp(cta->info); | 593 | isfp = ctype_isfp(cta->info); |
465 | sz = (cta->size + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); | 594 | sz = (cta->size + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); |
@@ -474,7 +603,11 @@ static void callback_conv_args(CTState *cts, lua_State *L) | |||
474 | nsp += n; | 603 | nsp += n; |
475 | 604 | ||
476 | done: | 605 | done: |
477 | if (LJ_BE && cta->size < CTSIZE_PTR) | 606 | if (LJ_BE && cta->size < CTSIZE_PTR |
607 | #if LJ_TARGET_MIPS64 | ||
608 | && !(isfp && nsp) | ||
609 | #endif | ||
610 | ) | ||
478 | sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size); | 611 | sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size); |
479 | gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp); | 612 | gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp); |
480 | } | 613 | } |
@@ -483,9 +616,14 @@ static void callback_conv_args(CTState *cts, lua_State *L) | |||
483 | L->top = o; | 616 | L->top = o; |
484 | #if LJ_TARGET_X86 | 617 | #if LJ_TARGET_X86 |
485 | /* Store stack adjustment for returns from non-cdecl callbacks. */ | 618 | /* Store stack adjustment for returns from non-cdecl callbacks. */ |
486 | if (ctype_cconv(ct->info) != CTCC_CDECL) | 619 | if (ctype_cconv(ct->info) != CTCC_CDECL) { |
620 | #if LJ_FR2 | ||
621 | (L->base-3)->u64 |= (nsp << (16+2)); | ||
622 | #else | ||
487 | (L->base-2)->u32.hi |= (nsp << (16+2)); | 623 | (L->base-2)->u32.hi |= (nsp << (16+2)); |
488 | #endif | 624 | #endif |
625 | } | ||
626 | #endif | ||
489 | while (gcsteps-- > 0) | 627 | while (gcsteps-- > 0) |
490 | lj_gc_check(L); | 628 | lj_gc_check(L); |
491 | } | 629 | } |
@@ -493,7 +631,11 @@ static void callback_conv_args(CTState *cts, lua_State *L) | |||
493 | /* Convert Lua object to callback result. */ | 631 | /* Convert Lua object to callback result. */ |
494 | static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) | 632 | static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) |
495 | { | 633 | { |
634 | #if LJ_FR2 | ||
635 | CType *ctr = ctype_raw(cts, (uint16_t)(L->base-3)->u64); | ||
636 | #else | ||
496 | CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi); | 637 | CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi); |
638 | #endif | ||
497 | #if LJ_TARGET_X86 | 639 | #if LJ_TARGET_X86 |
498 | cts->cb.gpr[2] = 0; | 640 | cts->cb.gpr[2] = 0; |
499 | #endif | 641 | #endif |
@@ -503,6 +645,10 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) | |||
503 | if (ctype_isfp(ctr->info)) | 645 | if (ctype_isfp(ctr->info)) |
504 | dp = (uint8_t *)&cts->cb.fpr[0]; | 646 | dp = (uint8_t *)&cts->cb.fpr[0]; |
505 | #endif | 647 | #endif |
648 | #if LJ_TARGET_ARM64 && LJ_BE | ||
649 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) | ||
650 | dp = (uint8_t *)&cts->cb.fpr[0].f[1]; | ||
651 | #endif | ||
506 | lj_cconv_ct_tv(cts, ctr, dp, o, 0); | 652 | lj_cconv_ct_tv(cts, ctr, dp, o, 0); |
507 | #ifdef CALLBACK_HANDLE_RET | 653 | #ifdef CALLBACK_HANDLE_RET |
508 | CALLBACK_HANDLE_RET | 654 | CALLBACK_HANDLE_RET |
@@ -516,6 +662,12 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) | |||
516 | *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : | 662 | *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : |
517 | (int32_t)*(int16_t *)dp; | 663 | (int32_t)*(int16_t *)dp; |
518 | } | 664 | } |
665 | #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) | ||
666 | /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ | ||
667 | if (ctr->size <= 4 && | ||
668 | (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) | ||
669 | *(int64_t *)dp = (int64_t)*(int32_t *)dp; | ||
670 | #endif | ||
519 | #if LJ_TARGET_X86 | 671 | #if LJ_TARGET_X86 |
520 | if (ctype_isfp(ctr->info)) | 672 | if (ctype_isfp(ctr->info)) |
521 | cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2; | 673 | cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2; |
@@ -528,8 +680,8 @@ lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf) | |||
528 | { | 680 | { |
529 | lua_State *L = cts->L; | 681 | lua_State *L = cts->L; |
530 | global_State *g = cts->g; | 682 | global_State *g = cts->g; |
531 | lua_assert(L != NULL); | 683 | lj_assertG(L != NULL, "uninitialized cts->L in callback"); |
532 | if (gcref(g->jit_L)) { | 684 | if (tvref(g->jit_base)) { |
533 | setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK)); | 685 | setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK)); |
534 | if (g->panic) g->panic(L); | 686 | if (g->panic) g->panic(L); |
535 | exit(EXIT_FAILURE); | 687 | exit(EXIT_FAILURE); |
@@ -562,9 +714,9 @@ void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o) | |||
562 | } | 714 | } |
563 | callback_conv_result(cts, L, o); | 715 | callback_conv_result(cts, L, o); |
564 | /* Finally drop C frame and continuation frame. */ | 716 | /* Finally drop C frame and continuation frame. */ |
565 | L->cframe = cframe_prev(L->cframe); | 717 | L->top -= 2+2*LJ_FR2; |
566 | L->top -= 2; | ||
567 | L->base = obase; | 718 | L->base = obase; |
719 | L->cframe = cframe_prev(L->cframe); | ||
568 | cts->cb.slot = 0; /* Blacklist C function that called the callback. */ | 720 | cts->cb.slot = 0; /* Blacklist C function that called the callback. */ |
569 | } | 721 | } |
570 | 722 | ||
@@ -613,7 +765,7 @@ static CType *callback_checkfunc(CTState *cts, CType *ct) | |||
613 | CType *ctf = ctype_get(cts, fid); | 765 | CType *ctf = ctype_get(cts, fid); |
614 | if (!ctype_isattrib(ctf->info)) { | 766 | if (!ctype_isattrib(ctf->info)) { |
615 | CType *cta; | 767 | CType *cta; |
616 | lua_assert(ctype_isfield(ctf->info)); | 768 | lj_assertCTS(ctype_isfield(ctf->info), "field expected"); |
617 | cta = ctype_rawchild(cts, ctf); | 769 | cta = ctype_rawchild(cts, ctf); |
618 | if (!(ctype_isenum(cta->info) || ctype_isptr(cta->info) || | 770 | if (!(ctype_isenum(cta->info) || ctype_isptr(cta->info) || |
619 | (ctype_isnum(cta->info) && cta->size <= 8)) || | 771 | (ctype_isnum(cta->info) && cta->size <= 8)) || |
diff --git a/src/lj_cconv.c b/src/lj_cconv.c index 8556952f..3bbfd3f1 100644 --- a/src/lj_cconv.c +++ b/src/lj_cconv.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #if LJ_HASFFI | 8 | #if LJ_HASFFI |
9 | 9 | ||
10 | #include "lj_err.h" | 10 | #include "lj_err.h" |
11 | #include "lj_buf.h" | ||
11 | #include "lj_tab.h" | 12 | #include "lj_tab.h" |
12 | #include "lj_ctype.h" | 13 | #include "lj_ctype.h" |
13 | #include "lj_cdata.h" | 14 | #include "lj_cdata.h" |
@@ -122,19 +123,25 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s, | |||
122 | CTInfo dinfo = d->info, sinfo = s->info; | 123 | CTInfo dinfo = d->info, sinfo = s->info; |
123 | void *tmpptr; | 124 | void *tmpptr; |
124 | 125 | ||
125 | lua_assert(!ctype_isenum(dinfo) && !ctype_isenum(sinfo)); | 126 | lj_assertCTS(!ctype_isenum(dinfo) && !ctype_isenum(sinfo), |
126 | lua_assert(!ctype_isattrib(dinfo) && !ctype_isattrib(sinfo)); | 127 | "unresolved enum"); |
128 | lj_assertCTS(!ctype_isattrib(dinfo) && !ctype_isattrib(sinfo), | ||
129 | "unstripped attribute"); | ||
127 | 130 | ||
128 | if (ctype_type(dinfo) > CT_MAYCONVERT || ctype_type(sinfo) > CT_MAYCONVERT) | 131 | if (ctype_type(dinfo) > CT_MAYCONVERT || ctype_type(sinfo) > CT_MAYCONVERT) |
129 | goto err_conv; | 132 | goto err_conv; |
130 | 133 | ||
131 | /* Some basic sanity checks. */ | 134 | /* Some basic sanity checks. */ |
132 | lua_assert(!ctype_isnum(dinfo) || dsize > 0); | 135 | lj_assertCTS(!ctype_isnum(dinfo) || dsize > 0, "bad size for number type"); |
133 | lua_assert(!ctype_isnum(sinfo) || ssize > 0); | 136 | lj_assertCTS(!ctype_isnum(sinfo) || ssize > 0, "bad size for number type"); |
134 | lua_assert(!ctype_isbool(dinfo) || dsize == 1 || dsize == 4); | 137 | lj_assertCTS(!ctype_isbool(dinfo) || dsize == 1 || dsize == 4, |
135 | lua_assert(!ctype_isbool(sinfo) || ssize == 1 || ssize == 4); | 138 | "bad size for bool type"); |
136 | lua_assert(!ctype_isinteger(dinfo) || (1u<<lj_fls(dsize)) == dsize); | 139 | lj_assertCTS(!ctype_isbool(sinfo) || ssize == 1 || ssize == 4, |
137 | lua_assert(!ctype_isinteger(sinfo) || (1u<<lj_fls(ssize)) == ssize); | 140 | "bad size for bool type"); |
141 | lj_assertCTS(!ctype_isinteger(dinfo) || (1u<<lj_fls(dsize)) == dsize, | ||
142 | "bad size for integer type"); | ||
143 | lj_assertCTS(!ctype_isinteger(sinfo) || (1u<<lj_fls(ssize)) == ssize, | ||
144 | "bad size for integer type"); | ||
138 | 145 | ||
139 | switch (cconv_idx2(dinfo, sinfo)) { | 146 | switch (cconv_idx2(dinfo, sinfo)) { |
140 | /* Destination is a bool. */ | 147 | /* Destination is a bool. */ |
@@ -357,7 +364,7 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s, | |||
357 | if ((flags & CCF_CAST) || (d->info & CTF_VLA) || d != s) | 364 | if ((flags & CCF_CAST) || (d->info & CTF_VLA) || d != s) |
358 | goto err_conv; /* Must be exact same type. */ | 365 | goto err_conv; /* Must be exact same type. */ |
359 | copyval: /* Copy value. */ | 366 | copyval: /* Copy value. */ |
360 | lua_assert(dsize == ssize); | 367 | lj_assertCTS(dsize == ssize, "value copy with different sizes"); |
361 | memcpy(dp, sp, dsize); | 368 | memcpy(dp, sp, dsize); |
362 | break; | 369 | break; |
363 | 370 | ||
@@ -389,7 +396,7 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid, | |||
389 | lj_cconv_ct_ct(cts, ctype_get(cts, CTID_DOUBLE), s, | 396 | lj_cconv_ct_ct(cts, ctype_get(cts, CTID_DOUBLE), s, |
390 | (uint8_t *)&o->n, sp, 0); | 397 | (uint8_t *)&o->n, sp, 0); |
391 | /* Numbers are NOT canonicalized here! Beware of uninitialized data. */ | 398 | /* Numbers are NOT canonicalized here! Beware of uninitialized data. */ |
392 | lua_assert(tvisnum(o)); | 399 | lj_assertCTS(tvisnum(o), "non-canonical NaN passed"); |
393 | } | 400 | } |
394 | } else { | 401 | } else { |
395 | uint32_t b = s->size == 1 ? (*sp != 0) : (*(int *)sp != 0); | 402 | uint32_t b = s->size == 1 ? (*sp != 0) : (*(int *)sp != 0); |
@@ -406,7 +413,7 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid, | |||
406 | CTSize sz; | 413 | CTSize sz; |
407 | copyval: /* Copy value. */ | 414 | copyval: /* Copy value. */ |
408 | sz = s->size; | 415 | sz = s->size; |
409 | lua_assert(sz != CTSIZE_INVALID); | 416 | lj_assertCTS(sz != CTSIZE_INVALID, "value copy with invalid size"); |
410 | /* Attributes are stripped, qualifiers are kept (but mostly ignored). */ | 417 | /* Attributes are stripped, qualifiers are kept (but mostly ignored). */ |
411 | cd = lj_cdata_new(cts, ctype_typeid(cts, s), sz); | 418 | cd = lj_cdata_new(cts, ctype_typeid(cts, s), sz); |
412 | setcdataV(cts->L, o, cd); | 419 | setcdataV(cts->L, o, cd); |
@@ -421,19 +428,22 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp) | |||
421 | CTInfo info = s->info; | 428 | CTInfo info = s->info; |
422 | CTSize pos, bsz; | 429 | CTSize pos, bsz; |
423 | uint32_t val; | 430 | uint32_t val; |
424 | lua_assert(ctype_isbitfield(info)); | 431 | lj_assertCTS(ctype_isbitfield(info), "bitfield expected"); |
425 | /* NYI: packed bitfields may cause misaligned reads. */ | 432 | /* NYI: packed bitfields may cause misaligned reads. */ |
426 | switch (ctype_bitcsz(info)) { | 433 | switch (ctype_bitcsz(info)) { |
427 | case 4: val = *(uint32_t *)sp; break; | 434 | case 4: val = *(uint32_t *)sp; break; |
428 | case 2: val = *(uint16_t *)sp; break; | 435 | case 2: val = *(uint16_t *)sp; break; |
429 | case 1: val = *(uint8_t *)sp; break; | 436 | case 1: val = *(uint8_t *)sp; break; |
430 | default: lua_assert(0); val = 0; break; | 437 | default: |
438 | lj_assertCTS(0, "bad bitfield container size %d", ctype_bitcsz(info)); | ||
439 | val = 0; | ||
440 | break; | ||
431 | } | 441 | } |
432 | /* Check if a packed bitfield crosses a container boundary. */ | 442 | /* Check if a packed bitfield crosses a container boundary. */ |
433 | pos = ctype_bitpos(info); | 443 | pos = ctype_bitpos(info); |
434 | bsz = ctype_bitbsz(info); | 444 | bsz = ctype_bitbsz(info); |
435 | lua_assert(pos < 8*ctype_bitcsz(info)); | 445 | lj_assertCTS(pos < 8*ctype_bitcsz(info), "bad bitfield position"); |
436 | lua_assert(bsz > 0 && bsz <= 8*ctype_bitcsz(info)); | 446 | lj_assertCTS(bsz > 0 && bsz <= 8*ctype_bitcsz(info), "bad bitfield size"); |
437 | if (pos + bsz > 8*ctype_bitcsz(info)) | 447 | if (pos + bsz > 8*ctype_bitcsz(info)) |
438 | lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT); | 448 | lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT); |
439 | if (!(info & CTF_BOOL)) { | 449 | if (!(info & CTF_BOOL)) { |
@@ -448,8 +458,10 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp) | |||
448 | setintV(o, (int32_t)val); | 458 | setintV(o, (int32_t)val); |
449 | } | 459 | } |
450 | } else { | 460 | } else { |
451 | lua_assert(bsz == 1); | 461 | uint32_t b = (val >> pos) & 1; |
452 | setboolV(o, (val >> pos) & 1); | 462 | lj_assertCTS(bsz == 1, "bad bool bitfield size"); |
463 | setboolV(o, b); | ||
464 | setboolV(&cts->g->tmptv2, b); /* Remember for trace recorder. */ | ||
453 | } | 465 | } |
454 | return 0; /* No GC step needed. */ | 466 | return 0; /* No GC step needed. */ |
455 | } | 467 | } |
@@ -551,7 +563,7 @@ void lj_cconv_ct_tv(CTState *cts, CType *d, | |||
551 | sid = cdataV(o)->ctypeid; | 563 | sid = cdataV(o)->ctypeid; |
552 | s = ctype_get(cts, sid); | 564 | s = ctype_get(cts, sid); |
553 | if (ctype_isref(s->info)) { /* Resolve reference for value. */ | 565 | if (ctype_isref(s->info)) { /* Resolve reference for value. */ |
554 | lua_assert(s->size == CTSIZE_PTR); | 566 | lj_assertCTS(s->size == CTSIZE_PTR, "ref is not pointer-sized"); |
555 | sp = *(void **)sp; | 567 | sp = *(void **)sp; |
556 | sid = ctype_cid(s->info); | 568 | sid = ctype_cid(s->info); |
557 | } | 569 | } |
@@ -571,7 +583,7 @@ void lj_cconv_ct_tv(CTState *cts, CType *d, | |||
571 | CType *cct = lj_ctype_getfield(cts, d, str, &ofs); | 583 | CType *cct = lj_ctype_getfield(cts, d, str, &ofs); |
572 | if (!cct || !ctype_isconstval(cct->info)) | 584 | if (!cct || !ctype_isconstval(cct->info)) |
573 | goto err_conv; | 585 | goto err_conv; |
574 | lua_assert(d->size == 4); | 586 | lj_assertCTS(d->size == 4, "only 32 bit enum supported"); /* NYI */ |
575 | sp = (uint8_t *)&cct->size; | 587 | sp = (uint8_t *)&cct->size; |
576 | sid = ctype_cid(cct->info); | 588 | sid = ctype_cid(cct->info); |
577 | } else if (ctype_isrefarray(d->info)) { /* Copy string to array. */ | 589 | } else if (ctype_isrefarray(d->info)) { /* Copy string to array. */ |
@@ -610,8 +622,10 @@ void lj_cconv_ct_tv(CTState *cts, CType *d, | |||
610 | tmpptr = uddata(ud); | 622 | tmpptr = uddata(ud); |
611 | if (ud->udtype == UDTYPE_IO_FILE) | 623 | if (ud->udtype == UDTYPE_IO_FILE) |
612 | tmpptr = *(void **)tmpptr; | 624 | tmpptr = *(void **)tmpptr; |
625 | else if (ud->udtype == UDTYPE_BUFFER) | ||
626 | tmpptr = ((SBufExt *)tmpptr)->r; | ||
613 | } else if (tvislightud(o)) { | 627 | } else if (tvislightud(o)) { |
614 | tmpptr = lightudV(o); | 628 | tmpptr = lightudV(cts->g, o); |
615 | } else if (tvisfunc(o)) { | 629 | } else if (tvisfunc(o)) { |
616 | void *p = lj_ccallback_new(cts, d, funcV(o)); | 630 | void *p = lj_ccallback_new(cts, d, funcV(o)); |
617 | if (p) { | 631 | if (p) { |
@@ -635,10 +649,10 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o) | |||
635 | CTInfo info = d->info; | 649 | CTInfo info = d->info; |
636 | CTSize pos, bsz; | 650 | CTSize pos, bsz; |
637 | uint32_t val, mask; | 651 | uint32_t val, mask; |
638 | lua_assert(ctype_isbitfield(info)); | 652 | lj_assertCTS(ctype_isbitfield(info), "bitfield expected"); |
639 | if ((info & CTF_BOOL)) { | 653 | if ((info & CTF_BOOL)) { |
640 | uint8_t tmpbool; | 654 | uint8_t tmpbool; |
641 | lua_assert(ctype_bitbsz(info) == 1); | 655 | lj_assertCTS(ctype_bitbsz(info) == 1, "bad bool bitfield size"); |
642 | lj_cconv_ct_tv(cts, ctype_get(cts, CTID_BOOL), &tmpbool, o, 0); | 656 | lj_cconv_ct_tv(cts, ctype_get(cts, CTID_BOOL), &tmpbool, o, 0); |
643 | val = tmpbool; | 657 | val = tmpbool; |
644 | } else { | 658 | } else { |
@@ -647,8 +661,8 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o) | |||
647 | } | 661 | } |
648 | pos = ctype_bitpos(info); | 662 | pos = ctype_bitpos(info); |
649 | bsz = ctype_bitbsz(info); | 663 | bsz = ctype_bitbsz(info); |
650 | lua_assert(pos < 8*ctype_bitcsz(info)); | 664 | lj_assertCTS(pos < 8*ctype_bitcsz(info), "bad bitfield position"); |
651 | lua_assert(bsz > 0 && bsz <= 8*ctype_bitcsz(info)); | 665 | lj_assertCTS(bsz > 0 && bsz <= 8*ctype_bitcsz(info), "bad bitfield size"); |
652 | /* Check if a packed bitfield crosses a container boundary. */ | 666 | /* Check if a packed bitfield crosses a container boundary. */ |
653 | if (pos + bsz > 8*ctype_bitcsz(info)) | 667 | if (pos + bsz > 8*ctype_bitcsz(info)) |
654 | lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT); | 668 | lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT); |
@@ -659,7 +673,9 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o) | |||
659 | case 4: *(uint32_t *)dp = (*(uint32_t *)dp & ~mask) | (uint32_t)val; break; | 673 | case 4: *(uint32_t *)dp = (*(uint32_t *)dp & ~mask) | (uint32_t)val; break; |
660 | case 2: *(uint16_t *)dp = (*(uint16_t *)dp & ~mask) | (uint16_t)val; break; | 674 | case 2: *(uint16_t *)dp = (*(uint16_t *)dp & ~mask) | (uint16_t)val; break; |
661 | case 1: *(uint8_t *)dp = (*(uint8_t *)dp & ~mask) | (uint8_t)val; break; | 675 | case 1: *(uint8_t *)dp = (*(uint8_t *)dp & ~mask) | (uint8_t)val; break; |
662 | default: lua_assert(0); break; | 676 | default: |
677 | lj_assertCTS(0, "bad bitfield container size %d", ctype_bitcsz(info)); | ||
678 | break; | ||
663 | } | 679 | } |
664 | } | 680 | } |
665 | 681 | ||
diff --git a/src/lj_cconv.h b/src/lj_cconv.h index 2d1cb273..45b0ca1e 100644 --- a/src/lj_cconv.h +++ b/src/lj_cconv.h | |||
@@ -27,13 +27,14 @@ enum { | |||
27 | static LJ_AINLINE uint32_t cconv_idx(CTInfo info) | 27 | static LJ_AINLINE uint32_t cconv_idx(CTInfo info) |
28 | { | 28 | { |
29 | uint32_t idx = ((info >> 26) & 15u); /* Dispatch bits. */ | 29 | uint32_t idx = ((info >> 26) & 15u); /* Dispatch bits. */ |
30 | lua_assert(ctype_type(info) <= CT_MAYCONVERT); | 30 | lj_assertX(ctype_type(info) <= CT_MAYCONVERT, |
31 | "cannot convert ctype %08x", info); | ||
31 | #if LJ_64 | 32 | #if LJ_64 |
32 | idx = ((uint32_t)(U64x(f436fff5,fff7f021) >> 4*idx) & 15u); | 33 | idx = ((uint32_t)(U64x(f436fff5,fff7f021) >> 4*idx) & 15u); |
33 | #else | 34 | #else |
34 | idx = (((idx < 8 ? 0xfff7f021u : 0xf436fff5) >> 4*(idx & 7u)) & 15u); | 35 | idx = (((idx < 8 ? 0xfff7f021u : 0xf436fff5) >> 4*(idx & 7u)) & 15u); |
35 | #endif | 36 | #endif |
36 | lua_assert(idx < 8); | 37 | lj_assertX(idx < 8, "cannot convert ctype %08x", info); |
37 | return idx; | 38 | return idx; |
38 | } | 39 | } |
39 | 40 | ||
diff --git a/src/lj_cdata.c b/src/lj_cdata.c index 425e6bcf..01a74f5d 100644 --- a/src/lj_cdata.c +++ b/src/lj_cdata.c | |||
@@ -9,7 +9,6 @@ | |||
9 | 9 | ||
10 | #include "lj_gc.h" | 10 | #include "lj_gc.h" |
11 | #include "lj_err.h" | 11 | #include "lj_err.h" |
12 | #include "lj_str.h" | ||
13 | #include "lj_tab.h" | 12 | #include "lj_tab.h" |
14 | #include "lj_ctype.h" | 13 | #include "lj_ctype.h" |
15 | #include "lj_cconv.h" | 14 | #include "lj_cconv.h" |
@@ -27,20 +26,20 @@ GCcdata *lj_cdata_newref(CTState *cts, const void *p, CTypeID id) | |||
27 | } | 26 | } |
28 | 27 | ||
29 | /* Allocate variable-sized or specially aligned C data object. */ | 28 | /* Allocate variable-sized or specially aligned C data object. */ |
30 | GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align) | 29 | GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align) |
31 | { | 30 | { |
32 | global_State *g; | 31 | global_State *g; |
33 | MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) + | 32 | MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) + |
34 | (align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0); | 33 | (align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0); |
35 | char *p = lj_mem_newt(cts->L, extra + sz, char); | 34 | char *p = lj_mem_newt(L, extra + sz, char); |
36 | uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata); | 35 | uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata); |
37 | uintptr_t almask = (1u << align) - 1u; | 36 | uintptr_t almask = (1u << align) - 1u; |
38 | GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata)); | 37 | GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata)); |
39 | lua_assert((char *)cd - p < 65536); | 38 | lj_assertL((char *)cd - p < 65536, "excessive cdata alignment"); |
40 | cdatav(cd)->offset = (uint16_t)((char *)cd - p); | 39 | cdatav(cd)->offset = (uint16_t)((char *)cd - p); |
41 | cdatav(cd)->extra = extra; | 40 | cdatav(cd)->extra = extra; |
42 | cdatav(cd)->len = sz; | 41 | cdatav(cd)->len = sz; |
43 | g = cts->g; | 42 | g = G(L); |
44 | setgcrefr(cd->nextgc, g->gc.root); | 43 | setgcrefr(cd->nextgc, g->gc.root); |
45 | setgcref(g->gc.root, obj2gco(cd)); | 44 | setgcref(g->gc.root, obj2gco(cd)); |
46 | newwhite(g, obj2gco(cd)); | 45 | newwhite(g, obj2gco(cd)); |
@@ -50,6 +49,15 @@ GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align) | |||
50 | return cd; | 49 | return cd; |
51 | } | 50 | } |
52 | 51 | ||
52 | /* Allocate arbitrary C data object. */ | ||
53 | GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, CTInfo info) | ||
54 | { | ||
55 | if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN) | ||
56 | return lj_cdata_new(cts, id, sz); | ||
57 | else | ||
58 | return lj_cdata_newv(cts->L, id, sz, ctype_align(info)); | ||
59 | } | ||
60 | |||
53 | /* Free a C data object. */ | 61 | /* Free a C data object. */ |
54 | void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd) | 62 | void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd) |
55 | { | 63 | { |
@@ -68,29 +76,30 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd) | |||
68 | } else if (LJ_LIKELY(!cdataisv(cd))) { | 76 | } else if (LJ_LIKELY(!cdataisv(cd))) { |
69 | CType *ct = ctype_raw(ctype_ctsG(g), cd->ctypeid); | 77 | CType *ct = ctype_raw(ctype_ctsG(g), cd->ctypeid); |
70 | CTSize sz = ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR; | 78 | CTSize sz = ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR; |
71 | lua_assert(ctype_hassize(ct->info) || ctype_isfunc(ct->info) || | 79 | lj_assertG(ctype_hassize(ct->info) || ctype_isfunc(ct->info) || |
72 | ctype_isextern(ct->info)); | 80 | ctype_isextern(ct->info), "free of ctype without a size"); |
73 | lj_mem_free(g, cd, sizeof(GCcdata) + sz); | 81 | lj_mem_free(g, cd, sizeof(GCcdata) + sz); |
74 | } else { | 82 | } else { |
75 | lj_mem_free(g, memcdatav(cd), sizecdatav(cd)); | 83 | lj_mem_free(g, memcdatav(cd), sizecdatav(cd)); |
76 | } | 84 | } |
77 | } | 85 | } |
78 | 86 | ||
79 | TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd) | 87 | void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it) |
80 | { | 88 | { |
81 | global_State *g = G(L); | 89 | GCtab *t = ctype_ctsG(G(L))->finalizer; |
82 | GCtab *t = ctype_ctsG(g)->finalizer; | ||
83 | if (gcref(t->metatable)) { | 90 | if (gcref(t->metatable)) { |
84 | /* Add cdata to finalizer table, if still enabled. */ | 91 | /* Add cdata to finalizer table, if still enabled. */ |
85 | TValue *tv, tmp; | 92 | TValue *tv, tmp; |
86 | setcdataV(L, &tmp, cd); | 93 | setcdataV(L, &tmp, cd); |
87 | lj_gc_anybarriert(L, t); | 94 | lj_gc_anybarriert(L, t); |
88 | tv = lj_tab_set(L, t, &tmp); | 95 | tv = lj_tab_set(L, t, &tmp); |
89 | cd->marked |= LJ_GC_CDATA_FIN; | 96 | if (it == LJ_TNIL) { |
90 | return tv; | 97 | setnilV(tv); |
91 | } else { | 98 | cd->marked &= ~LJ_GC_CDATA_FIN; |
92 | /* Otherwise return dummy TValue. */ | 99 | } else { |
93 | return &g->tmptv; | 100 | setgcV(L, tv, obj, it); |
101 | cd->marked |= LJ_GC_CDATA_FIN; | ||
102 | } | ||
94 | } | 103 | } |
95 | } | 104 | } |
96 | 105 | ||
@@ -106,7 +115,7 @@ CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, uint8_t **pp, | |||
106 | 115 | ||
107 | /* Resolve reference for cdata object. */ | 116 | /* Resolve reference for cdata object. */ |
108 | if (ctype_isref(ct->info)) { | 117 | if (ctype_isref(ct->info)) { |
109 | lua_assert(ct->size == CTSIZE_PTR); | 118 | lj_assertCTS(ct->size == CTSIZE_PTR, "ref is not pointer-sized"); |
110 | p = *(uint8_t **)p; | 119 | p = *(uint8_t **)p; |
111 | ct = ctype_child(cts, ct); | 120 | ct = ctype_child(cts, ct); |
112 | } | 121 | } |
@@ -117,13 +126,19 @@ collect_attrib: | |||
117 | if (ctype_attrib(ct->info) == CTA_QUAL) *qual |= ct->size; | 126 | if (ctype_attrib(ct->info) == CTA_QUAL) *qual |= ct->size; |
118 | ct = ctype_child(cts, ct); | 127 | ct = ctype_child(cts, ct); |
119 | } | 128 | } |
120 | lua_assert(!ctype_isref(ct->info)); /* Interning rejects refs to refs. */ | 129 | /* Interning rejects refs to refs. */ |
130 | lj_assertCTS(!ctype_isref(ct->info), "bad ref of ref"); | ||
121 | 131 | ||
122 | if (tvisint(key)) { | 132 | if (tvisint(key)) { |
123 | idx = (ptrdiff_t)intV(key); | 133 | idx = (ptrdiff_t)intV(key); |
124 | goto integer_key; | 134 | goto integer_key; |
125 | } else if (tvisnum(key)) { /* Numeric key. */ | 135 | } else if (tvisnum(key)) { /* Numeric key. */ |
126 | idx = LJ_64 ? (ptrdiff_t)numV(key) : (ptrdiff_t)lj_num2int(numV(key)); | 136 | #ifdef _MSC_VER |
137 | /* Workaround for MSVC bug. */ | ||
138 | volatile | ||
139 | #endif | ||
140 | lua_Number n = numV(key); | ||
141 | idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n); | ||
127 | integer_key: | 142 | integer_key: |
128 | if (ctype_ispointer(ct->info)) { | 143 | if (ctype_ispointer(ct->info)) { |
129 | CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */ | 144 | CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */ |
@@ -198,7 +213,8 @@ collect_attrib: | |||
198 | static void cdata_getconst(CTState *cts, TValue *o, CType *ct) | 213 | static void cdata_getconst(CTState *cts, TValue *o, CType *ct) |
199 | { | 214 | { |
200 | CType *ctt = ctype_child(cts, ct); | 215 | CType *ctt = ctype_child(cts, ct); |
201 | lua_assert(ctype_isinteger(ctt->info) && ctt->size <= 4); | 216 | lj_assertCTS(ctype_isinteger(ctt->info) && ctt->size <= 4, |
217 | "only 32 bit const supported"); /* NYI */ | ||
202 | /* Constants are already zero-extended/sign-extended to 32 bits. */ | 218 | /* Constants are already zero-extended/sign-extended to 32 bits. */ |
203 | if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0) | 219 | if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0) |
204 | setnumV(o, (lua_Number)(uint32_t)ct->size); | 220 | setnumV(o, (lua_Number)(uint32_t)ct->size); |
@@ -219,13 +235,14 @@ int lj_cdata_get(CTState *cts, CType *s, TValue *o, uint8_t *sp) | |||
219 | } | 235 | } |
220 | 236 | ||
221 | /* Get child type of pointer/array/field. */ | 237 | /* Get child type of pointer/array/field. */ |
222 | lua_assert(ctype_ispointer(s->info) || ctype_isfield(s->info)); | 238 | lj_assertCTS(ctype_ispointer(s->info) || ctype_isfield(s->info), |
239 | "pointer or field expected"); | ||
223 | sid = ctype_cid(s->info); | 240 | sid = ctype_cid(s->info); |
224 | s = ctype_get(cts, sid); | 241 | s = ctype_get(cts, sid); |
225 | 242 | ||
226 | /* Resolve reference for field. */ | 243 | /* Resolve reference for field. */ |
227 | if (ctype_isref(s->info)) { | 244 | if (ctype_isref(s->info)) { |
228 | lua_assert(s->size == CTSIZE_PTR); | 245 | lj_assertCTS(s->size == CTSIZE_PTR, "ref is not pointer-sized"); |
229 | sp = *(uint8_t **)sp; | 246 | sp = *(uint8_t **)sp; |
230 | sid = ctype_cid(s->info); | 247 | sid = ctype_cid(s->info); |
231 | s = ctype_get(cts, sid); | 248 | s = ctype_get(cts, sid); |
@@ -252,12 +269,13 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual) | |||
252 | } | 269 | } |
253 | 270 | ||
254 | /* Get child type of pointer/array/field. */ | 271 | /* Get child type of pointer/array/field. */ |
255 | lua_assert(ctype_ispointer(d->info) || ctype_isfield(d->info)); | 272 | lj_assertCTS(ctype_ispointer(d->info) || ctype_isfield(d->info), |
273 | "pointer or field expected"); | ||
256 | d = ctype_child(cts, d); | 274 | d = ctype_child(cts, d); |
257 | 275 | ||
258 | /* Resolve reference for field. */ | 276 | /* Resolve reference for field. */ |
259 | if (ctype_isref(d->info)) { | 277 | if (ctype_isref(d->info)) { |
260 | lua_assert(d->size == CTSIZE_PTR); | 278 | lj_assertCTS(d->size == CTSIZE_PTR, "ref is not pointer-sized"); |
261 | dp = *(uint8_t **)dp; | 279 | dp = *(uint8_t **)dp; |
262 | d = ctype_child(cts, d); | 280 | d = ctype_child(cts, d); |
263 | } | 281 | } |
@@ -272,7 +290,8 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual) | |||
272 | d = ctype_child(cts, d); | 290 | d = ctype_child(cts, d); |
273 | } | 291 | } |
274 | 292 | ||
275 | lua_assert(ctype_hassize(d->info) && !ctype_isvoid(d->info)); | 293 | lj_assertCTS(ctype_hassize(d->info), "store to ctype without size"); |
294 | lj_assertCTS(!ctype_isvoid(d->info), "store to void type"); | ||
276 | 295 | ||
277 | if (((d->info|qual) & CTF_CONST)) { | 296 | if (((d->info|qual) & CTF_CONST)) { |
278 | err_const: | 297 | err_const: |
diff --git a/src/lj_cdata.h b/src/lj_cdata.h index 2a82a9d8..de52e8aa 100644 --- a/src/lj_cdata.h +++ b/src/lj_cdata.h | |||
@@ -18,7 +18,7 @@ static LJ_AINLINE void *cdata_getptr(void *p, CTSize sz) | |||
18 | if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ | 18 | if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ |
19 | return ((void *)(uintptr_t)*(uint32_t *)p); | 19 | return ((void *)(uintptr_t)*(uint32_t *)p); |
20 | } else { | 20 | } else { |
21 | lua_assert(sz == CTSIZE_PTR); | 21 | lj_assertX(sz == CTSIZE_PTR, "bad pointer size %d", sz); |
22 | return *(void **)p; | 22 | return *(void **)p; |
23 | } | 23 | } |
24 | } | 24 | } |
@@ -29,7 +29,7 @@ static LJ_AINLINE void cdata_setptr(void *p, CTSize sz, const void *v) | |||
29 | if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ | 29 | if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ |
30 | *(uint32_t *)p = (uint32_t)(uintptr_t)v; | 30 | *(uint32_t *)p = (uint32_t)(uintptr_t)v; |
31 | } else { | 31 | } else { |
32 | lua_assert(sz == CTSIZE_PTR); | 32 | lj_assertX(sz == CTSIZE_PTR, "bad pointer size %d", sz); |
33 | *(void **)p = (void *)v; | 33 | *(void **)p = (void *)v; |
34 | } | 34 | } |
35 | } | 35 | } |
@@ -40,7 +40,8 @@ static LJ_AINLINE GCcdata *lj_cdata_new(CTState *cts, CTypeID id, CTSize sz) | |||
40 | GCcdata *cd; | 40 | GCcdata *cd; |
41 | #ifdef LUA_USE_ASSERT | 41 | #ifdef LUA_USE_ASSERT |
42 | CType *ct = ctype_raw(cts, id); | 42 | CType *ct = ctype_raw(cts, id); |
43 | lua_assert((ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR) == sz); | 43 | lj_assertCTS((ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR) == sz, |
44 | "inconsistent size of fixed-size cdata alloc"); | ||
44 | #endif | 45 | #endif |
45 | cd = (GCcdata *)lj_mem_newgco(cts->L, sizeof(GCcdata) + sz); | 46 | cd = (GCcdata *)lj_mem_newgco(cts->L, sizeof(GCcdata) + sz); |
46 | cd->gct = ~LJ_TCDATA; | 47 | cd->gct = ~LJ_TCDATA; |
@@ -58,11 +59,14 @@ static LJ_AINLINE GCcdata *lj_cdata_new_(lua_State *L, CTypeID id, CTSize sz) | |||
58 | } | 59 | } |
59 | 60 | ||
60 | LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id); | 61 | LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id); |
61 | LJ_FUNC GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, | 62 | LJ_FUNC GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, |
62 | CTSize align); | 63 | CTSize align); |
64 | LJ_FUNC GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, | ||
65 | CTInfo info); | ||
63 | 66 | ||
64 | LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd); | 67 | LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd); |
65 | LJ_FUNCA TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd); | 68 | LJ_FUNC void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, |
69 | uint32_t it); | ||
66 | 70 | ||
67 | LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, | 71 | LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, |
68 | uint8_t **pp, CTInfo *qual); | 72 | uint8_t **pp, CTInfo *qual); |
diff --git a/src/lj_clib.c b/src/lj_clib.c index e0f274bb..57669a14 100644 --- a/src/lj_clib.c +++ b/src/lj_clib.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include "lj_cconv.h" | 16 | #include "lj_cconv.h" |
17 | #include "lj_cdata.h" | 17 | #include "lj_cdata.h" |
18 | #include "lj_clib.h" | 18 | #include "lj_clib.h" |
19 | #include "lj_strfmt.h" | ||
19 | 20 | ||
20 | /* -- OS-specific functions ----------------------------------------------- */ | 21 | /* -- OS-specific functions ----------------------------------------------- */ |
21 | 22 | ||
@@ -61,7 +62,7 @@ static const char *clib_extname(lua_State *L, const char *name) | |||
61 | #endif | 62 | #endif |
62 | ) { | 63 | ) { |
63 | if (!strchr(name, '.')) { | 64 | if (!strchr(name, '.')) { |
64 | name = lj_str_pushf(L, CLIB_SOEXT, name); | 65 | name = lj_strfmt_pushf(L, CLIB_SOEXT, name); |
65 | L->top--; | 66 | L->top--; |
66 | #if LJ_TARGET_CYGWIN | 67 | #if LJ_TARGET_CYGWIN |
67 | } else { | 68 | } else { |
@@ -70,7 +71,7 @@ static const char *clib_extname(lua_State *L, const char *name) | |||
70 | } | 71 | } |
71 | if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] && | 72 | if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] && |
72 | name[2] == CLIB_SOPREFIX[2])) { | 73 | name[2] == CLIB_SOPREFIX[2])) { |
73 | name = lj_str_pushf(L, CLIB_SOPREFIX "%s", name); | 74 | name = lj_strfmt_pushf(L, CLIB_SOPREFIX "%s", name); |
74 | L->top--; | 75 | L->top--; |
75 | } | 76 | } |
76 | } | 77 | } |
@@ -158,11 +159,13 @@ BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); | |||
158 | /* Default libraries. */ | 159 | /* Default libraries. */ |
159 | enum { | 160 | enum { |
160 | CLIB_HANDLE_EXE, | 161 | CLIB_HANDLE_EXE, |
162 | #if !LJ_TARGET_UWP | ||
161 | CLIB_HANDLE_DLL, | 163 | CLIB_HANDLE_DLL, |
162 | CLIB_HANDLE_CRT, | 164 | CLIB_HANDLE_CRT, |
163 | CLIB_HANDLE_KERNEL32, | 165 | CLIB_HANDLE_KERNEL32, |
164 | CLIB_HANDLE_USER32, | 166 | CLIB_HANDLE_USER32, |
165 | CLIB_HANDLE_GDI32, | 167 | CLIB_HANDLE_GDI32, |
168 | #endif | ||
166 | CLIB_HANDLE_MAX | 169 | CLIB_HANDLE_MAX |
167 | }; | 170 | }; |
168 | 171 | ||
@@ -172,11 +175,19 @@ LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt, | |||
172 | const char *name) | 175 | const char *name) |
173 | { | 176 | { |
174 | DWORD err = GetLastError(); | 177 | DWORD err = GetLastError(); |
178 | #if LJ_TARGET_XBOXONE | ||
179 | wchar_t wbuf[128]; | ||
180 | char buf[128*2]; | ||
181 | if (!FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM, | ||
182 | NULL, err, 0, wbuf, sizeof(wbuf)/sizeof(wchar_t), NULL) || | ||
183 | !WideCharToMultiByte(CP_ACP, 0, wbuf, 128, buf, 128*2, NULL, NULL)) | ||
184 | #else | ||
175 | char buf[128]; | 185 | char buf[128]; |
176 | if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM, | 186 | if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM, |
177 | NULL, err, 0, buf, sizeof(buf), NULL)) | 187 | NULL, err, 0, buf, sizeof(buf), NULL)) |
188 | #endif | ||
178 | buf[0] = '\0'; | 189 | buf[0] = '\0'; |
179 | lj_err_callermsg(L, lj_str_pushf(L, fmt, name, buf)); | 190 | lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, buf)); |
180 | } | 191 | } |
181 | 192 | ||
182 | static int clib_needext(const char *s) | 193 | static int clib_needext(const char *s) |
@@ -191,7 +202,7 @@ static int clib_needext(const char *s) | |||
191 | static const char *clib_extname(lua_State *L, const char *name) | 202 | static const char *clib_extname(lua_State *L, const char *name) |
192 | { | 203 | { |
193 | if (clib_needext(name)) { | 204 | if (clib_needext(name)) { |
194 | name = lj_str_pushf(L, "%s.dll", name); | 205 | name = lj_strfmt_pushf(L, "%s.dll", name); |
195 | L->top--; | 206 | L->top--; |
196 | } | 207 | } |
197 | return name; | 208 | return name; |
@@ -200,7 +211,7 @@ static const char *clib_extname(lua_State *L, const char *name) | |||
200 | static void *clib_loadlib(lua_State *L, const char *name, int global) | 211 | static void *clib_loadlib(lua_State *L, const char *name, int global) |
201 | { | 212 | { |
202 | DWORD oldwerr = GetLastError(); | 213 | DWORD oldwerr = GetLastError(); |
203 | void *h = (void *)LoadLibraryA(clib_extname(L, name)); | 214 | void *h = LJ_WIN_LOADLIBA(clib_extname(L, name)); |
204 | if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name); | 215 | if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name); |
205 | SetLastError(oldwerr); | 216 | SetLastError(oldwerr); |
206 | UNUSED(global); | 217 | UNUSED(global); |
@@ -210,6 +221,7 @@ static void *clib_loadlib(lua_State *L, const char *name, int global) | |||
210 | static void clib_unloadlib(CLibrary *cl) | 221 | static void clib_unloadlib(CLibrary *cl) |
211 | { | 222 | { |
212 | if (cl->handle == CLIB_DEFHANDLE) { | 223 | if (cl->handle == CLIB_DEFHANDLE) { |
224 | #if !LJ_TARGET_UWP | ||
213 | MSize i; | 225 | MSize i; |
214 | for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) { | 226 | for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) { |
215 | void *h = clib_def_handle[i]; | 227 | void *h = clib_def_handle[i]; |
@@ -218,11 +230,16 @@ static void clib_unloadlib(CLibrary *cl) | |||
218 | FreeLibrary((HINSTANCE)h); | 230 | FreeLibrary((HINSTANCE)h); |
219 | } | 231 | } |
220 | } | 232 | } |
233 | #endif | ||
221 | } else if (cl->handle) { | 234 | } else if (cl->handle) { |
222 | FreeLibrary((HINSTANCE)cl->handle); | 235 | FreeLibrary((HINSTANCE)cl->handle); |
223 | } | 236 | } |
224 | } | 237 | } |
225 | 238 | ||
239 | #if LJ_TARGET_UWP | ||
240 | EXTERN_C IMAGE_DOS_HEADER __ImageBase; | ||
241 | #endif | ||
242 | |||
226 | static void *clib_getsym(CLibrary *cl, const char *name) | 243 | static void *clib_getsym(CLibrary *cl, const char *name) |
227 | { | 244 | { |
228 | void *p = NULL; | 245 | void *p = NULL; |
@@ -231,6 +248,9 @@ static void *clib_getsym(CLibrary *cl, const char *name) | |||
231 | for (i = 0; i < CLIB_HANDLE_MAX; i++) { | 248 | for (i = 0; i < CLIB_HANDLE_MAX; i++) { |
232 | HINSTANCE h = (HINSTANCE)clib_def_handle[i]; | 249 | HINSTANCE h = (HINSTANCE)clib_def_handle[i]; |
233 | if (!(void *)h) { /* Resolve default library handles (once). */ | 250 | if (!(void *)h) { /* Resolve default library handles (once). */ |
251 | #if LJ_TARGET_UWP | ||
252 | h = (HINSTANCE)&__ImageBase; | ||
253 | #else | ||
234 | switch (i) { | 254 | switch (i) { |
235 | case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, &h); break; | 255 | case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, &h); break; |
236 | case CLIB_HANDLE_DLL: | 256 | case CLIB_HANDLE_DLL: |
@@ -241,11 +261,12 @@ static void *clib_getsym(CLibrary *cl, const char *name) | |||
241 | GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, | 261 | GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, |
242 | (const char *)&_fmode, &h); | 262 | (const char *)&_fmode, &h); |
243 | break; | 263 | break; |
244 | case CLIB_HANDLE_KERNEL32: h = LoadLibraryA("kernel32.dll"); break; | 264 | case CLIB_HANDLE_KERNEL32: h = LJ_WIN_LOADLIBA("kernel32.dll"); break; |
245 | case CLIB_HANDLE_USER32: h = LoadLibraryA("user32.dll"); break; | 265 | case CLIB_HANDLE_USER32: h = LJ_WIN_LOADLIBA("user32.dll"); break; |
246 | case CLIB_HANDLE_GDI32: h = LoadLibraryA("gdi32.dll"); break; | 266 | case CLIB_HANDLE_GDI32: h = LJ_WIN_LOADLIBA("gdi32.dll"); break; |
247 | } | 267 | } |
248 | if (!h) continue; | 268 | if (!h) continue; |
269 | #endif | ||
249 | clib_def_handle[i] = (void *)h; | 270 | clib_def_handle[i] = (void *)h; |
250 | } | 271 | } |
251 | p = (void *)GetProcAddress(h, name); | 272 | p = (void *)GetProcAddress(h, name); |
@@ -264,7 +285,7 @@ static void *clib_getsym(CLibrary *cl, const char *name) | |||
264 | LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt, | 285 | LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt, |
265 | const char *name) | 286 | const char *name) |
266 | { | 287 | { |
267 | lj_err_callermsg(L, lj_str_pushf(L, fmt, name, "no support for this OS")); | 288 | lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, "no support for this OS")); |
268 | } | 289 | } |
269 | 290 | ||
270 | static void *clib_loadlib(lua_State *L, const char *name, int global) | 291 | static void *clib_loadlib(lua_State *L, const char *name, int global) |
@@ -329,7 +350,8 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name) | |||
329 | lj_err_callerv(L, LJ_ERR_FFI_NODECL, strdata(name)); | 350 | lj_err_callerv(L, LJ_ERR_FFI_NODECL, strdata(name)); |
330 | if (ctype_isconstval(ct->info)) { | 351 | if (ctype_isconstval(ct->info)) { |
331 | CType *ctt = ctype_child(cts, ct); | 352 | CType *ctt = ctype_child(cts, ct); |
332 | lua_assert(ctype_isinteger(ctt->info) && ctt->size <= 4); | 353 | lj_assertCTS(ctype_isinteger(ctt->info) && ctt->size <= 4, |
354 | "only 32 bit const supported"); /* NYI */ | ||
333 | if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0) | 355 | if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0) |
334 | setnumV(tv, (lua_Number)(uint32_t)ct->size); | 356 | setnumV(tv, (lua_Number)(uint32_t)ct->size); |
335 | else | 357 | else |
@@ -341,14 +363,15 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name) | |||
341 | #endif | 363 | #endif |
342 | void *p = clib_getsym(cl, sym); | 364 | void *p = clib_getsym(cl, sym); |
343 | GCcdata *cd; | 365 | GCcdata *cd; |
344 | lua_assert(ctype_isfunc(ct->info) || ctype_isextern(ct->info)); | 366 | lj_assertCTS(ctype_isfunc(ct->info) || ctype_isextern(ct->info), |
367 | "unexpected ctype %08x in clib", ct->info); | ||
345 | #if LJ_TARGET_X86 && LJ_ABI_WIN | 368 | #if LJ_TARGET_X86 && LJ_ABI_WIN |
346 | /* Retry with decorated name for fastcall/stdcall functions. */ | 369 | /* Retry with decorated name for fastcall/stdcall functions. */ |
347 | if (!p && ctype_isfunc(ct->info)) { | 370 | if (!p && ctype_isfunc(ct->info)) { |
348 | CTInfo cconv = ctype_cconv(ct->info); | 371 | CTInfo cconv = ctype_cconv(ct->info); |
349 | if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) { | 372 | if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) { |
350 | CTSize sz = clib_func_argsize(cts, ct); | 373 | CTSize sz = clib_func_argsize(cts, ct); |
351 | const char *symd = lj_str_pushf(L, | 374 | const char *symd = lj_strfmt_pushf(L, |
352 | cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d", | 375 | cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d", |
353 | sym, sz); | 376 | sym, sz); |
354 | L->top--; | 377 | L->top--; |
diff --git a/src/lj_cparse.c b/src/lj_cparse.c index 9cd26d67..e364939d 100644 --- a/src/lj_cparse.c +++ b/src/lj_cparse.c | |||
@@ -9,13 +9,14 @@ | |||
9 | 9 | ||
10 | #include "lj_gc.h" | 10 | #include "lj_gc.h" |
11 | #include "lj_err.h" | 11 | #include "lj_err.h" |
12 | #include "lj_str.h" | 12 | #include "lj_buf.h" |
13 | #include "lj_ctype.h" | 13 | #include "lj_ctype.h" |
14 | #include "lj_cparse.h" | 14 | #include "lj_cparse.h" |
15 | #include "lj_frame.h" | 15 | #include "lj_frame.h" |
16 | #include "lj_vm.h" | 16 | #include "lj_vm.h" |
17 | #include "lj_char.h" | 17 | #include "lj_char.h" |
18 | #include "lj_strscan.h" | 18 | #include "lj_strscan.h" |
19 | #include "lj_strfmt.h" | ||
19 | 20 | ||
20 | /* | 21 | /* |
21 | ** Important note: this is NOT a validating C parser! This is a minimal | 22 | ** Important note: this is NOT a validating C parser! This is a minimal |
@@ -27,6 +28,30 @@ | |||
27 | ** If in doubt, please check the input against your favorite C compiler. | 28 | ** If in doubt, please check the input against your favorite C compiler. |
28 | */ | 29 | */ |
29 | 30 | ||
31 | #ifdef LUA_USE_ASSERT | ||
32 | #define lj_assertCP(c, ...) (lj_assertG_(G(cp->L), (c), __VA_ARGS__)) | ||
33 | #else | ||
34 | #define lj_assertCP(c, ...) ((void)cp) | ||
35 | #endif | ||
36 | |||
37 | /* -- Miscellaneous ------------------------------------------------------- */ | ||
38 | |||
39 | /* Match string against a C literal. */ | ||
40 | #define cp_str_is(str, k) \ | ||
41 | ((str)->len == sizeof(k)-1 && !memcmp(strdata(str), k, sizeof(k)-1)) | ||
42 | |||
43 | /* Check string against a linear list of matches. */ | ||
44 | int lj_cparse_case(GCstr *str, const char *match) | ||
45 | { | ||
46 | MSize len; | ||
47 | int n; | ||
48 | for (n = 0; (len = (MSize)*match++); n++, match += len) { | ||
49 | if (str->len == len && !memcmp(match, strdata(str), len)) | ||
50 | return n; | ||
51 | } | ||
52 | return -1; | ||
53 | } | ||
54 | |||
30 | /* -- C lexer ------------------------------------------------------------- */ | 55 | /* -- C lexer ------------------------------------------------------------- */ |
31 | 56 | ||
32 | /* C lexer token names. */ | 57 | /* C lexer token names. */ |
@@ -42,13 +67,13 @@ LJ_NORET static void cp_err(CPState *cp, ErrMsg em); | |||
42 | 67 | ||
43 | static const char *cp_tok2str(CPState *cp, CPToken tok) | 68 | static const char *cp_tok2str(CPState *cp, CPToken tok) |
44 | { | 69 | { |
45 | lua_assert(tok < CTOK_FIRSTDECL); | 70 | lj_assertCP(tok < CTOK_FIRSTDECL, "bad CPToken %d", tok); |
46 | if (tok > CTOK_OFS) | 71 | if (tok > CTOK_OFS) |
47 | return ctoknames[tok-CTOK_OFS-1]; | 72 | return ctoknames[tok-CTOK_OFS-1]; |
48 | else if (!lj_char_iscntrl(tok)) | 73 | else if (!lj_char_iscntrl(tok)) |
49 | return lj_str_pushf(cp->L, "%c", tok); | 74 | return lj_strfmt_pushf(cp->L, "%c", tok); |
50 | else | 75 | else |
51 | return lj_str_pushf(cp->L, "char(%d)", tok); | 76 | return lj_strfmt_pushf(cp->L, "char(%d)", tok); |
52 | } | 77 | } |
53 | 78 | ||
54 | /* End-of-line? */ | 79 | /* End-of-line? */ |
@@ -85,24 +110,10 @@ static LJ_NOINLINE CPChar cp_get_bs(CPState *cp) | |||
85 | return cp_get(cp); | 110 | return cp_get(cp); |
86 | } | 111 | } |
87 | 112 | ||
88 | /* Grow save buffer. */ | ||
89 | static LJ_NOINLINE void cp_save_grow(CPState *cp, CPChar c) | ||
90 | { | ||
91 | MSize newsize; | ||
92 | if (cp->sb.sz >= CPARSE_MAX_BUF/2) | ||
93 | cp_err(cp, LJ_ERR_XELEM); | ||
94 | newsize = cp->sb.sz * 2; | ||
95 | lj_str_resizebuf(cp->L, &cp->sb, newsize); | ||
96 | cp->sb.buf[cp->sb.n++] = (char)c; | ||
97 | } | ||
98 | |||
99 | /* Save character in buffer. */ | 113 | /* Save character in buffer. */ |
100 | static LJ_AINLINE void cp_save(CPState *cp, CPChar c) | 114 | static LJ_AINLINE void cp_save(CPState *cp, CPChar c) |
101 | { | 115 | { |
102 | if (LJ_UNLIKELY(cp->sb.n + 1 > cp->sb.sz)) | 116 | lj_buf_putb(&cp->sb, c); |
103 | cp_save_grow(cp, c); | ||
104 | else | ||
105 | cp->sb.buf[cp->sb.n++] = (char)c; | ||
106 | } | 117 | } |
107 | 118 | ||
108 | /* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */ | 119 | /* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */ |
@@ -122,20 +133,20 @@ LJ_NORET static void cp_errmsg(CPState *cp, CPToken tok, ErrMsg em, ...) | |||
122 | tokstr = NULL; | 133 | tokstr = NULL; |
123 | } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING || | 134 | } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING || |
124 | tok >= CTOK_FIRSTDECL) { | 135 | tok >= CTOK_FIRSTDECL) { |
125 | if (cp->sb.n == 0) cp_save(cp, '$'); | 136 | if (cp->sb.w == cp->sb.b) cp_save(cp, '$'); |
126 | cp_save(cp, '\0'); | 137 | cp_save(cp, '\0'); |
127 | tokstr = cp->sb.buf; | 138 | tokstr = cp->sb.b; |
128 | } else { | 139 | } else { |
129 | tokstr = cp_tok2str(cp, tok); | 140 | tokstr = cp_tok2str(cp, tok); |
130 | } | 141 | } |
131 | L = cp->L; | 142 | L = cp->L; |
132 | va_start(argp, em); | 143 | va_start(argp, em); |
133 | msg = lj_str_pushvf(L, err2msg(em), argp); | 144 | msg = lj_strfmt_pushvf(L, err2msg(em), argp); |
134 | va_end(argp); | 145 | va_end(argp); |
135 | if (tokstr) | 146 | if (tokstr) |
136 | msg = lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr); | 147 | msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr); |
137 | if (cp->linenumber > 1) | 148 | if (cp->linenumber > 1) |
138 | msg = lj_str_pushf(L, "%s at line %d", msg, cp->linenumber); | 149 | msg = lj_strfmt_pushf(L, "%s at line %d", msg, cp->linenumber); |
139 | lj_err_callermsg(L, msg); | 150 | lj_err_callermsg(L, msg); |
140 | } | 151 | } |
141 | 152 | ||
@@ -164,7 +175,8 @@ static CPToken cp_number(CPState *cp) | |||
164 | TValue o; | 175 | TValue o; |
165 | do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); | 176 | do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); |
166 | cp_save(cp, '\0'); | 177 | cp_save(cp, '\0'); |
167 | fmt = lj_strscan_scan((const uint8_t *)cp->sb.buf, &o, STRSCAN_OPT_C); | 178 | fmt = lj_strscan_scan((const uint8_t *)(cp->sb.b), sbuflen(&cp->sb)-1, |
179 | &o, STRSCAN_OPT_C); | ||
168 | if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32; | 180 | if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32; |
169 | else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32; | 181 | else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32; |
170 | else if (!(cp->mode & CPARSE_MODE_SKIP)) | 182 | else if (!(cp->mode & CPARSE_MODE_SKIP)) |
@@ -177,7 +189,7 @@ static CPToken cp_number(CPState *cp) | |||
177 | static CPToken cp_ident(CPState *cp) | 189 | static CPToken cp_ident(CPState *cp) |
178 | { | 190 | { |
179 | do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); | 191 | do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); |
180 | cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); | 192 | cp->str = lj_buf_str(cp->L, &cp->sb); |
181 | cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask); | 193 | cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask); |
182 | if (ctype_type(cp->ct->info) == CT_KW) | 194 | if (ctype_type(cp->ct->info) == CT_KW) |
183 | return ctype_cid(cp->ct->info); | 195 | return ctype_cid(cp->ct->info); |
@@ -263,11 +275,11 @@ static CPToken cp_string(CPState *cp) | |||
263 | } | 275 | } |
264 | cp_get(cp); | 276 | cp_get(cp); |
265 | if (delim == '"') { | 277 | if (delim == '"') { |
266 | cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); | 278 | cp->str = lj_buf_str(cp->L, &cp->sb); |
267 | return CTOK_STRING; | 279 | return CTOK_STRING; |
268 | } else { | 280 | } else { |
269 | if (cp->sb.n != 1) cp_err_token(cp, '\''); | 281 | if (sbuflen(&cp->sb) != 1) cp_err_token(cp, '\''); |
270 | cp->val.i32 = (int32_t)(char)cp->sb.buf[0]; | 282 | cp->val.i32 = (int32_t)(char)*cp->sb.b; |
271 | cp->val.id = CTID_INT32; | 283 | cp->val.id = CTID_INT32; |
272 | return CTOK_INTEGER; | 284 | return CTOK_INTEGER; |
273 | } | 285 | } |
@@ -296,7 +308,7 @@ static void cp_comment_cpp(CPState *cp) | |||
296 | /* Lexical scanner for C. Only a minimal subset is implemented. */ | 308 | /* Lexical scanner for C. Only a minimal subset is implemented. */ |
297 | static CPToken cp_next_(CPState *cp) | 309 | static CPToken cp_next_(CPState *cp) |
298 | { | 310 | { |
299 | lj_str_resetbuf(&cp->sb); | 311 | lj_buf_reset(&cp->sb); |
300 | for (;;) { | 312 | for (;;) { |
301 | if (lj_char_isident(cp->c)) | 313 | if (lj_char_isident(cp->c)) |
302 | return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp); | 314 | return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp); |
@@ -385,9 +397,8 @@ static void cp_init(CPState *cp) | |||
385 | cp->depth = 0; | 397 | cp->depth = 0; |
386 | cp->curpack = 0; | 398 | cp->curpack = 0; |
387 | cp->packstack[0] = 255; | 399 | cp->packstack[0] = 255; |
388 | lj_str_initbuf(&cp->sb); | 400 | lj_buf_init(cp->L, &cp->sb); |
389 | lj_str_resizebuf(cp->L, &cp->sb, LJ_MIN_SBUF); | 401 | lj_assertCP(cp->p != NULL, "uninitialized cp->p"); |
390 | lua_assert(cp->p != NULL); | ||
391 | cp_get(cp); /* Read-ahead first char. */ | 402 | cp_get(cp); /* Read-ahead first char. */ |
392 | cp->tok = 0; | 403 | cp->tok = 0; |
393 | cp->tmask = CPNS_DEFAULT; | 404 | cp->tmask = CPNS_DEFAULT; |
@@ -398,7 +409,7 @@ static void cp_init(CPState *cp) | |||
398 | static void cp_cleanup(CPState *cp) | 409 | static void cp_cleanup(CPState *cp) |
399 | { | 410 | { |
400 | global_State *g = G(cp->L); | 411 | global_State *g = G(cp->L); |
401 | lj_str_freebuf(g, &cp->sb); | 412 | lj_buf_free(g, &cp->sb); |
402 | } | 413 | } |
403 | 414 | ||
404 | /* Check and consume optional token. */ | 415 | /* Check and consume optional token. */ |
@@ -848,12 +859,13 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) | |||
848 | /* The cid is already part of info for copies of pointers/functions. */ | 859 | /* The cid is already part of info for copies of pointers/functions. */ |
849 | idx = ct->next; | 860 | idx = ct->next; |
850 | if (ctype_istypedef(info)) { | 861 | if (ctype_istypedef(info)) { |
851 | lua_assert(id == 0); | 862 | lj_assertCP(id == 0, "typedef not at toplevel"); |
852 | id = ctype_cid(info); | 863 | id = ctype_cid(info); |
853 | /* Always refetch info/size, since struct/enum may have been completed. */ | 864 | /* Always refetch info/size, since struct/enum may have been completed. */ |
854 | cinfo = ctype_get(cp->cts, id)->info; | 865 | cinfo = ctype_get(cp->cts, id)->info; |
855 | csize = ctype_get(cp->cts, id)->size; | 866 | csize = ctype_get(cp->cts, id)->size; |
856 | lua_assert(ctype_isstruct(cinfo) || ctype_isenum(cinfo)); | 867 | lj_assertCP(ctype_isstruct(cinfo) || ctype_isenum(cinfo), |
868 | "typedef of bad type"); | ||
857 | } else if (ctype_isfunc(info)) { /* Intern function. */ | 869 | } else if (ctype_isfunc(info)) { /* Intern function. */ |
858 | CType *fct; | 870 | CType *fct; |
859 | CTypeID fid; | 871 | CTypeID fid; |
@@ -886,7 +898,7 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) | |||
886 | /* Inherit csize/cinfo from original type. */ | 898 | /* Inherit csize/cinfo from original type. */ |
887 | } else { | 899 | } else { |
888 | if (ctype_isnum(info)) { /* Handle mode/vector-size attributes. */ | 900 | if (ctype_isnum(info)) { /* Handle mode/vector-size attributes. */ |
889 | lua_assert(id == 0); | 901 | lj_assertCP(id == 0, "number not at toplevel"); |
890 | if (!(info & CTF_BOOL)) { | 902 | if (!(info & CTF_BOOL)) { |
891 | CTSize msize = ctype_msizeP(decl->attr); | 903 | CTSize msize = ctype_msizeP(decl->attr); |
892 | CTSize vsize = ctype_vsizeP(decl->attr); | 904 | CTSize vsize = ctype_vsizeP(decl->attr); |
@@ -941,7 +953,7 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) | |||
941 | info = (info & ~CTF_ALIGN) | (cinfo & CTF_ALIGN); | 953 | info = (info & ~CTF_ALIGN) | (cinfo & CTF_ALIGN); |
942 | info |= (cinfo & CTF_QUAL); /* Inherit qual. */ | 954 | info |= (cinfo & CTF_QUAL); /* Inherit qual. */ |
943 | } else { | 955 | } else { |
944 | lua_assert(ctype_isvoid(info)); | 956 | lj_assertCP(ctype_isvoid(info), "bad ctype %08x", info); |
945 | } | 957 | } |
946 | csize = size; | 958 | csize = size; |
947 | cinfo = info+id; | 959 | cinfo = info+id; |
@@ -953,8 +965,6 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) | |||
953 | 965 | ||
954 | /* -- C declaration parser ------------------------------------------------ */ | 966 | /* -- C declaration parser ------------------------------------------------ */ |
955 | 967 | ||
956 | #define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be) | ||
957 | |||
958 | /* Reset declaration state to declaration specifier. */ | 968 | /* Reset declaration state to declaration specifier. */ |
959 | static void cp_decl_reset(CPDecl *decl) | 969 | static void cp_decl_reset(CPDecl *decl) |
960 | { | 970 | { |
@@ -1031,7 +1041,7 @@ static void cp_decl_asm(CPState *cp, CPDecl *decl) | |||
1031 | if (cp->tok == CTOK_STRING) { | 1041 | if (cp->tok == CTOK_STRING) { |
1032 | GCstr *str = cp->str; | 1042 | GCstr *str = cp->str; |
1033 | while (cp_next(cp) == CTOK_STRING) { | 1043 | while (cp_next(cp) == CTOK_STRING) { |
1034 | lj_str_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str)); | 1044 | lj_strfmt_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str)); |
1035 | cp->L->top--; | 1045 | cp->L->top--; |
1036 | str = strV(cp->L->top); | 1046 | str = strV(cp->L->top); |
1037 | } | 1047 | } |
@@ -1083,44 +1093,57 @@ static void cp_decl_gccattribute(CPState *cp, CPDecl *decl) | |||
1083 | if (cp->tok == CTOK_IDENT) { | 1093 | if (cp->tok == CTOK_IDENT) { |
1084 | GCstr *attrstr = cp->str; | 1094 | GCstr *attrstr = cp->str; |
1085 | cp_next(cp); | 1095 | cp_next(cp); |
1086 | switch (attrstr->hash) { | 1096 | switch (lj_cparse_case(attrstr, |
1087 | case H_(64a9208e,8ce14319): case H_(8e6331b2,95a282af): /* aligned */ | 1097 | "\007aligned" "\013__aligned__" |
1098 | "\006packed" "\012__packed__" | ||
1099 | "\004mode" "\010__mode__" | ||
1100 | "\013vector_size" "\017__vector_size__" | ||
1101 | #if LJ_TARGET_X86 | ||
1102 | "\007regparm" "\013__regparm__" | ||
1103 | "\005cdecl" "\011__cdecl__" | ||
1104 | "\010thiscall" "\014__thiscall__" | ||
1105 | "\010fastcall" "\014__fastcall__" | ||
1106 | "\007stdcall" "\013__stdcall__" | ||
1107 | "\012sseregparm" "\016__sseregparm__" | ||
1108 | #endif | ||
1109 | )) { | ||
1110 | case 0: case 1: /* aligned */ | ||
1088 | cp_decl_align(cp, decl); | 1111 | cp_decl_align(cp, decl); |
1089 | break; | 1112 | break; |
1090 | case H_(42eb47de,f0ede26c): case H_(29f48a09,cf383e0c): /* packed */ | 1113 | case 2: case 3: /* packed */ |
1091 | decl->attr |= CTFP_PACKED; | 1114 | decl->attr |= CTFP_PACKED; |
1092 | break; | 1115 | break; |
1093 | case H_(0a84eef6,8dfab04c): case H_(995cf92c,d5696591): /* mode */ | 1116 | case 4: case 5: /* mode */ |
1094 | cp_decl_mode(cp, decl); | 1117 | cp_decl_mode(cp, decl); |
1095 | break; | 1118 | break; |
1096 | case H_(0ab31997,2d5213fa): case H_(bf875611,200e9990): /* vector_size */ | 1119 | case 6: case 7: /* vector_size */ |
1097 | { | 1120 | { |
1098 | CTSize vsize = cp_decl_sizeattr(cp); | 1121 | CTSize vsize = cp_decl_sizeattr(cp); |
1099 | if (vsize) CTF_INSERT(decl->attr, VSIZEP, lj_fls(vsize)); | 1122 | if (vsize) CTF_INSERT(decl->attr, VSIZEP, lj_fls(vsize)); |
1100 | } | 1123 | } |
1101 | break; | 1124 | break; |
1102 | #if LJ_TARGET_X86 | 1125 | #if LJ_TARGET_X86 |
1103 | case H_(5ad22db8,c689b848): case H_(439150fa,65ea78cb): /* regparm */ | 1126 | case 8: case 9: /* regparm */ |
1104 | CTF_INSERT(decl->fattr, REGPARM, cp_decl_sizeattr(cp)); | 1127 | CTF_INSERT(decl->fattr, REGPARM, cp_decl_sizeattr(cp)); |
1105 | decl->fattr |= CTFP_CCONV; | 1128 | decl->fattr |= CTFP_CCONV; |
1106 | break; | 1129 | break; |
1107 | case H_(18fc0b98,7ff4c074): case H_(4e62abed,0a747424): /* cdecl */ | 1130 | case 10: case 11: /* cdecl */ |
1108 | CTF_INSERT(decl->fattr, CCONV, CTCC_CDECL); | 1131 | CTF_INSERT(decl->fattr, CCONV, CTCC_CDECL); |
1109 | decl->fattr |= CTFP_CCONV; | 1132 | decl->fattr |= CTFP_CCONV; |
1110 | break; | 1133 | break; |
1111 | case H_(72b2e41b,494c5a44): case H_(f2356d59,f25fc9bd): /* thiscall */ | 1134 | case 12: case 13: /* thiscall */ |
1112 | CTF_INSERT(decl->fattr, CCONV, CTCC_THISCALL); | 1135 | CTF_INSERT(decl->fattr, CCONV, CTCC_THISCALL); |
1113 | decl->fattr |= CTFP_CCONV; | 1136 | decl->fattr |= CTFP_CCONV; |
1114 | break; | 1137 | break; |
1115 | case H_(0d0ffc42,ab746f88): case H_(21c54ba1,7f0ca7e3): /* fastcall */ | 1138 | case 14: case 15: /* fastcall */ |
1116 | CTF_INSERT(decl->fattr, CCONV, CTCC_FASTCALL); | 1139 | CTF_INSERT(decl->fattr, CCONV, CTCC_FASTCALL); |
1117 | decl->fattr |= CTFP_CCONV; | 1140 | decl->fattr |= CTFP_CCONV; |
1118 | break; | 1141 | break; |
1119 | case H_(ef76b040,9412e06a): case H_(de56697b,c750e6e1): /* stdcall */ | 1142 | case 16: case 17: /* stdcall */ |
1120 | CTF_INSERT(decl->fattr, CCONV, CTCC_STDCALL); | 1143 | CTF_INSERT(decl->fattr, CCONV, CTCC_STDCALL); |
1121 | decl->fattr |= CTFP_CCONV; | 1144 | decl->fattr |= CTFP_CCONV; |
1122 | break; | 1145 | break; |
1123 | case H_(ea78b622,f234bd8e): case H_(252ffb06,8d50f34b): /* sseregparm */ | 1146 | case 18: case 19: /* sseregparm */ |
1124 | decl->fattr |= CTF_SSEREGPARM; | 1147 | decl->fattr |= CTF_SSEREGPARM; |
1125 | decl->fattr |= CTFP_CCONV; | 1148 | decl->fattr |= CTFP_CCONV; |
1126 | break; | 1149 | break; |
@@ -1152,16 +1175,13 @@ static void cp_decl_msvcattribute(CPState *cp, CPDecl *decl) | |||
1152 | while (cp->tok == CTOK_IDENT) { | 1175 | while (cp->tok == CTOK_IDENT) { |
1153 | GCstr *attrstr = cp->str; | 1176 | GCstr *attrstr = cp->str; |
1154 | cp_next(cp); | 1177 | cp_next(cp); |
1155 | switch (attrstr->hash) { | 1178 | if (cp_str_is(attrstr, "align")) { |
1156 | case H_(bc2395fa,98f267f8): /* align */ | ||
1157 | cp_decl_align(cp, decl); | 1179 | cp_decl_align(cp, decl); |
1158 | break; | 1180 | } else { /* Ignore all other attributes. */ |
1159 | default: /* Ignore all other attributes. */ | ||
1160 | if (cp_opt(cp, '(')) { | 1181 | if (cp_opt(cp, '(')) { |
1161 | while (cp->tok != ')' && cp->tok != CTOK_EOF) cp_next(cp); | 1182 | while (cp->tok != ')' && cp->tok != CTOK_EOF) cp_next(cp); |
1162 | cp_check(cp, ')'); | 1183 | cp_check(cp, ')'); |
1163 | } | 1184 | } |
1164 | break; | ||
1165 | } | 1185 | } |
1166 | } | 1186 | } |
1167 | cp_check(cp, ')'); | 1187 | cp_check(cp, ')'); |
@@ -1572,7 +1592,7 @@ end_decl: | |||
1572 | cp_errmsg(cp, cp->tok, LJ_ERR_FFI_DECLSPEC); | 1592 | cp_errmsg(cp, cp->tok, LJ_ERR_FFI_DECLSPEC); |
1573 | sz = sizeof(int); | 1593 | sz = sizeof(int); |
1574 | } | 1594 | } |
1575 | lua_assert(sz != 0); | 1595 | lj_assertCP(sz != 0, "basic ctype with zero size"); |
1576 | info += CTALIGN(lj_fls(sz)); /* Use natural alignment. */ | 1596 | info += CTALIGN(lj_fls(sz)); /* Use natural alignment. */ |
1577 | info += (decl->attr & CTF_QUAL); /* Merge qualifiers. */ | 1597 | info += (decl->attr & CTF_QUAL); /* Merge qualifiers. */ |
1578 | cp_push(decl, info, sz); | 1598 | cp_push(decl, info, sz); |
@@ -1741,17 +1761,16 @@ static CTypeID cp_decl_abstract(CPState *cp) | |||
1741 | static void cp_pragma(CPState *cp, BCLine pragmaline) | 1761 | static void cp_pragma(CPState *cp, BCLine pragmaline) |
1742 | { | 1762 | { |
1743 | cp_next(cp); | 1763 | cp_next(cp); |
1744 | if (cp->tok == CTOK_IDENT && | 1764 | if (cp->tok == CTOK_IDENT && cp_str_is(cp->str, "pack")) { |
1745 | cp->str->hash == H_(e79b999f,42ca3e85)) { /* pack */ | ||
1746 | cp_next(cp); | 1765 | cp_next(cp); |
1747 | cp_check(cp, '('); | 1766 | cp_check(cp, '('); |
1748 | if (cp->tok == CTOK_IDENT) { | 1767 | if (cp->tok == CTOK_IDENT) { |
1749 | if (cp->str->hash == H_(738e923c,a1b65954)) { /* push */ | 1768 | if (cp_str_is(cp->str, "push")) { |
1750 | if (cp->curpack < CPARSE_MAX_PACKSTACK) { | 1769 | if (cp->curpack < CPARSE_MAX_PACKSTACK) { |
1751 | cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack]; | 1770 | cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack]; |
1752 | cp->curpack++; | 1771 | cp->curpack++; |
1753 | } | 1772 | } |
1754 | } else if (cp->str->hash == H_(6c71cf27,6c71cf27)) { /* pop */ | 1773 | } else if (cp_str_is(cp->str, "pop")) { |
1755 | if (cp->curpack > 0) cp->curpack--; | 1774 | if (cp->curpack > 0) cp->curpack--; |
1756 | } else { | 1775 | } else { |
1757 | cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); | 1776 | cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); |
@@ -1773,6 +1792,16 @@ static void cp_pragma(CPState *cp, BCLine pragmaline) | |||
1773 | } | 1792 | } |
1774 | } | 1793 | } |
1775 | 1794 | ||
1795 | /* Handle line number. */ | ||
1796 | static void cp_line(CPState *cp, BCLine hashline) | ||
1797 | { | ||
1798 | BCLine newline = cp->val.u32; | ||
1799 | /* TODO: Handle file name and include it in error messages. */ | ||
1800 | while (cp->tok != CTOK_EOF && cp->linenumber == hashline) | ||
1801 | cp_next(cp); | ||
1802 | cp->linenumber = newline; | ||
1803 | } | ||
1804 | |||
1776 | /* Parse multiple C declarations of types or extern identifiers. */ | 1805 | /* Parse multiple C declarations of types or extern identifiers. */ |
1777 | static void cp_decl_multi(CPState *cp) | 1806 | static void cp_decl_multi(CPState *cp) |
1778 | { | 1807 | { |
@@ -1785,12 +1814,21 @@ static void cp_decl_multi(CPState *cp) | |||
1785 | continue; | 1814 | continue; |
1786 | } | 1815 | } |
1787 | if (cp->tok == '#') { /* Workaround, since we have no preprocessor, yet. */ | 1816 | if (cp->tok == '#') { /* Workaround, since we have no preprocessor, yet. */ |
1788 | BCLine pragmaline = cp->linenumber; | 1817 | BCLine hashline = cp->linenumber; |
1789 | if (!(cp_next(cp) == CTOK_IDENT && | 1818 | CPToken tok = cp_next(cp); |
1790 | cp->str->hash == H_(f5e6b4f8,1d509107))) /* pragma */ | 1819 | if (tok == CTOK_INTEGER) { |
1820 | cp_line(cp, hashline); | ||
1821 | continue; | ||
1822 | } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "line")) { | ||
1823 | if (cp_next(cp) != CTOK_INTEGER) cp_err_token(cp, tok); | ||
1824 | cp_line(cp, hashline); | ||
1825 | continue; | ||
1826 | } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "pragma")) { | ||
1827 | cp_pragma(cp, hashline); | ||
1828 | continue; | ||
1829 | } else { | ||
1791 | cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); | 1830 | cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); |
1792 | cp_pragma(cp, pragmaline); | 1831 | } |
1793 | continue; | ||
1794 | } | 1832 | } |
1795 | scl = cp_decl_spec(cp, &decl, CDF_TYPEDEF|CDF_EXTERN|CDF_STATIC); | 1833 | scl = cp_decl_spec(cp, &decl, CDF_TYPEDEF|CDF_EXTERN|CDF_STATIC); |
1796 | if ((cp->tok == ';' || cp->tok == CTOK_EOF) && | 1834 | if ((cp->tok == ';' || cp->tok == CTOK_EOF) && |
@@ -1814,7 +1852,7 @@ static void cp_decl_multi(CPState *cp) | |||
1814 | /* Treat both static and extern function declarations as extern. */ | 1852 | /* Treat both static and extern function declarations as extern. */ |
1815 | ct = ctype_get(cp->cts, ctypeid); | 1853 | ct = ctype_get(cp->cts, ctypeid); |
1816 | /* We always get new anonymous functions (typedefs are copied). */ | 1854 | /* We always get new anonymous functions (typedefs are copied). */ |
1817 | lua_assert(gcref(ct->name) == NULL); | 1855 | lj_assertCP(gcref(ct->name) == NULL, "unexpected named function"); |
1818 | id = ctypeid; /* Just name it. */ | 1856 | id = ctypeid; /* Just name it. */ |
1819 | } else if ((scl & CDF_STATIC)) { /* Accept static constants. */ | 1857 | } else if ((scl & CDF_STATIC)) { /* Accept static constants. */ |
1820 | id = cp_decl_constinit(cp, &ct, ctypeid); | 1858 | id = cp_decl_constinit(cp, &ct, ctypeid); |
@@ -1856,8 +1894,6 @@ static void cp_decl_single(CPState *cp) | |||
1856 | if (cp->tok != CTOK_EOF) cp_err_token(cp, CTOK_EOF); | 1894 | if (cp->tok != CTOK_EOF) cp_err_token(cp, CTOK_EOF); |
1857 | } | 1895 | } |
1858 | 1896 | ||
1859 | #undef H_ | ||
1860 | |||
1861 | /* ------------------------------------------------------------------------ */ | 1897 | /* ------------------------------------------------------------------------ */ |
1862 | 1898 | ||
1863 | /* Protected callback for C parser. */ | 1899 | /* Protected callback for C parser. */ |
@@ -1873,7 +1909,7 @@ static TValue *cpcparser(lua_State *L, lua_CFunction dummy, void *ud) | |||
1873 | cp_decl_single(cp); | 1909 | cp_decl_single(cp); |
1874 | if (cp->param && cp->param != cp->L->top) | 1910 | if (cp->param && cp->param != cp->L->top) |
1875 | cp_err(cp, LJ_ERR_FFI_NUMPARAM); | 1911 | cp_err(cp, LJ_ERR_FFI_NUMPARAM); |
1876 | lua_assert(cp->depth == 0); | 1912 | lj_assertCP(cp->depth == 0, "unbalanced cparser declaration depth"); |
1877 | return NULL; | 1913 | return NULL; |
1878 | } | 1914 | } |
1879 | 1915 | ||
diff --git a/src/lj_cparse.h b/src/lj_cparse.h index df884497..c0f61edc 100644 --- a/src/lj_cparse.h +++ b/src/lj_cparse.h | |||
@@ -60,6 +60,8 @@ typedef struct CPState { | |||
60 | 60 | ||
61 | LJ_FUNC int lj_cparse(CPState *cp); | 61 | LJ_FUNC int lj_cparse(CPState *cp); |
62 | 62 | ||
63 | LJ_FUNC int lj_cparse_case(GCstr *str, const char *match); | ||
64 | |||
63 | #endif | 65 | #endif |
64 | 66 | ||
65 | #endif | 67 | #endif |
diff --git a/src/lj_crecord.c b/src/lj_crecord.c index 3f3552a6..bc21d859 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c | |||
@@ -11,13 +11,13 @@ | |||
11 | #if LJ_HASJIT && LJ_HASFFI | 11 | #if LJ_HASJIT && LJ_HASFFI |
12 | 12 | ||
13 | #include "lj_err.h" | 13 | #include "lj_err.h" |
14 | #include "lj_str.h" | ||
15 | #include "lj_tab.h" | 14 | #include "lj_tab.h" |
16 | #include "lj_frame.h" | 15 | #include "lj_frame.h" |
17 | #include "lj_ctype.h" | 16 | #include "lj_ctype.h" |
18 | #include "lj_cdata.h" | 17 | #include "lj_cdata.h" |
19 | #include "lj_cparse.h" | 18 | #include "lj_cparse.h" |
20 | #include "lj_cconv.h" | 19 | #include "lj_cconv.h" |
20 | #include "lj_carith.h" | ||
21 | #include "lj_clib.h" | 21 | #include "lj_clib.h" |
22 | #include "lj_ccall.h" | 22 | #include "lj_ccall.h" |
23 | #include "lj_ff.h" | 23 | #include "lj_ff.h" |
@@ -31,6 +31,7 @@ | |||
31 | #include "lj_snap.h" | 31 | #include "lj_snap.h" |
32 | #include "lj_crecord.h" | 32 | #include "lj_crecord.h" |
33 | #include "lj_dispatch.h" | 33 | #include "lj_dispatch.h" |
34 | #include "lj_strfmt.h" | ||
34 | 35 | ||
35 | /* Some local macros to save typing. Undef'd at the end. */ | 36 | /* Some local macros to save typing. Undef'd at the end. */ |
36 | #define IR(ref) (&J->cur.ir[(ref)]) | 37 | #define IR(ref) (&J->cur.ir[(ref)]) |
@@ -60,7 +61,8 @@ static GCcdata *argv2cdata(jit_State *J, TRef tr, cTValue *o) | |||
60 | static CTypeID crec_constructor(jit_State *J, GCcdata *cd, TRef tr) | 61 | static CTypeID crec_constructor(jit_State *J, GCcdata *cd, TRef tr) |
61 | { | 62 | { |
62 | CTypeID id; | 63 | CTypeID id; |
63 | lua_assert(tref_iscdata(tr) && cd->ctypeid == CTID_CTYPEID); | 64 | lj_assertJ(tref_iscdata(tr) && cd->ctypeid == CTID_CTYPEID, |
65 | "expected CTypeID cdata"); | ||
64 | id = *(CTypeID *)cdataptr(cd); | 66 | id = *(CTypeID *)cdataptr(cd); |
65 | tr = emitir(IRT(IR_FLOAD, IRT_INT), tr, IRFL_CDATA_INT); | 67 | tr = emitir(IRT(IR_FLOAD, IRT_INT), tr, IRFL_CDATA_INT); |
66 | emitir(IRTG(IR_EQ, IRT_INT), tr, lj_ir_kint(J, (int32_t)id)); | 68 | emitir(IRTG(IR_EQ, IRT_INT), tr, lj_ir_kint(J, (int32_t)id)); |
@@ -211,7 +213,7 @@ static void crec_copy_emit(jit_State *J, CRecMemList *ml, MSize mlp, | |||
211 | ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0); | 213 | ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0); |
212 | ml[i].trofs = trofs; | 214 | ml[i].trofs = trofs; |
213 | i++; | 215 | i++; |
214 | rwin += (LJ_SOFTFP && ml[i].tp == IRT_NUM) ? 2 : 1; | 216 | rwin += (LJ_SOFTFP32 && ml[i].tp == IRT_NUM) ? 2 : 1; |
215 | if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */ | 217 | if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */ |
216 | rwin = 0; | 218 | rwin = 0; |
217 | for ( ; j < i; j++) { | 219 | for ( ; j < i; j++) { |
@@ -236,13 +238,14 @@ static void crec_copy(jit_State *J, TRef trdst, TRef trsrc, TRef trlen, | |||
236 | if (len > CREC_COPY_MAXLEN) goto fallback; | 238 | if (len > CREC_COPY_MAXLEN) goto fallback; |
237 | if (ct) { | 239 | if (ct) { |
238 | CTState *cts = ctype_ctsG(J2G(J)); | 240 | CTState *cts = ctype_ctsG(J2G(J)); |
239 | lua_assert(ctype_isarray(ct->info) || ctype_isstruct(ct->info)); | 241 | lj_assertJ(ctype_isarray(ct->info) || ctype_isstruct(ct->info), |
242 | "copy of non-aggregate"); | ||
240 | if (ctype_isarray(ct->info)) { | 243 | if (ctype_isarray(ct->info)) { |
241 | CType *cct = ctype_rawchild(cts, ct); | 244 | CType *cct = ctype_rawchild(cts, ct); |
242 | tp = crec_ct2irt(cts, cct); | 245 | tp = crec_ct2irt(cts, cct); |
243 | if (tp == IRT_CDATA) goto rawcopy; | 246 | if (tp == IRT_CDATA) goto rawcopy; |
244 | step = lj_ir_type_size[tp]; | 247 | step = lj_ir_type_size[tp]; |
245 | lua_assert((len & (step-1)) == 0); | 248 | lj_assertJ((len & (step-1)) == 0, "copy of fractional size"); |
246 | } else if ((ct->info & CTF_UNION)) { | 249 | } else if ((ct->info & CTF_UNION)) { |
247 | step = (1u << ctype_align(ct->info)); | 250 | step = (1u << ctype_align(ct->info)); |
248 | goto rawcopy; | 251 | goto rawcopy; |
@@ -441,7 +444,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, | |||
441 | /* fallthrough */ | 444 | /* fallthrough */ |
442 | case CCX(I, F): | 445 | case CCX(I, F): |
443 | if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; | 446 | if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; |
444 | sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY); | 447 | sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY); |
445 | goto xstore; | 448 | goto xstore; |
446 | case CCX(I, P): | 449 | case CCX(I, P): |
447 | case CCX(I, A): | 450 | case CCX(I, A): |
@@ -521,7 +524,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, | |||
521 | if (st == IRT_CDATA) goto err_nyi; | 524 | if (st == IRT_CDATA) goto err_nyi; |
522 | /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ | 525 | /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ |
523 | sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, | 526 | sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, |
524 | st, IRCONV_TRUNC|IRCONV_ANY); | 527 | st, IRCONV_ANY); |
525 | goto xstore; | 528 | goto xstore; |
526 | 529 | ||
527 | /* Destination is an array. */ | 530 | /* Destination is an array. */ |
@@ -613,10 +616,12 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval) | |||
613 | sp = lj_ir_kptr(J, NULL); | 616 | sp = lj_ir_kptr(J, NULL); |
614 | } else if (tref_isudata(sp)) { | 617 | } else if (tref_isudata(sp)) { |
615 | GCudata *ud = udataV(sval); | 618 | GCudata *ud = udataV(sval); |
616 | if (ud->udtype == UDTYPE_IO_FILE) { | 619 | if (ud->udtype == UDTYPE_IO_FILE || ud->udtype == UDTYPE_BUFFER) { |
617 | TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), sp, IRFL_UDATA_UDTYPE); | 620 | TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), sp, IRFL_UDATA_UDTYPE); |
618 | emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE)); | 621 | emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, ud->udtype)); |
619 | sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp, IRFL_UDATA_FILE); | 622 | sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp, |
623 | ud->udtype == UDTYPE_IO_FILE ? IRFL_UDATA_FILE : | ||
624 | IRFL_SBUF_R); | ||
620 | } else { | 625 | } else { |
621 | sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCudata))); | 626 | sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCudata))); |
622 | } | 627 | } |
@@ -628,7 +633,8 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval) | |||
628 | /* Specialize to the name of the enum constant. */ | 633 | /* Specialize to the name of the enum constant. */ |
629 | emitir(IRTG(IR_EQ, IRT_STR), sp, lj_ir_kstr(J, str)); | 634 | emitir(IRTG(IR_EQ, IRT_STR), sp, lj_ir_kstr(J, str)); |
630 | if (cct && ctype_isconstval(cct->info)) { | 635 | if (cct && ctype_isconstval(cct->info)) { |
631 | lua_assert(ctype_child(cts, cct)->size == 4); | 636 | lj_assertJ(ctype_child(cts, cct)->size == 4, |
637 | "only 32 bit const supported"); /* NYI */ | ||
632 | svisnz = (void *)(intptr_t)(ofs != 0); | 638 | svisnz = (void *)(intptr_t)(ofs != 0); |
633 | sp = lj_ir_kint(J, (int32_t)ofs); | 639 | sp = lj_ir_kint(J, (int32_t)ofs); |
634 | sid = ctype_cid(cct->info); | 640 | sid = ctype_cid(cct->info); |
@@ -640,12 +646,22 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval) | |||
640 | sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr))); | 646 | sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr))); |
641 | sid = CTID_A_CCHAR; | 647 | sid = CTID_A_CCHAR; |
642 | } | 648 | } |
643 | } else { /* NYI: tref_istab(sp), tref_islightud(sp). */ | 649 | } else if (tref_islightud(sp)) { |
650 | #if LJ_64 | ||
651 | lj_trace_err(J, LJ_TRERR_NYICONV); | ||
652 | #endif | ||
653 | } else { /* NYI: tref_istab(sp). */ | ||
644 | IRType t; | 654 | IRType t; |
645 | sid = argv2cdata(J, sp, sval)->ctypeid; | 655 | sid = argv2cdata(J, sp, sval)->ctypeid; |
646 | s = ctype_raw(cts, sid); | 656 | s = ctype_raw(cts, sid); |
647 | svisnz = cdataptr(cdataV(sval)); | 657 | svisnz = cdataptr(cdataV(sval)); |
648 | t = crec_ct2irt(cts, s); | 658 | if (ctype_isfunc(s->info)) { |
659 | sid = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|sid), CTSIZE_PTR); | ||
660 | s = ctype_get(cts, sid); | ||
661 | t = IRT_PTR; | ||
662 | } else { | ||
663 | t = crec_ct2irt(cts, s); | ||
664 | } | ||
649 | if (ctype_isptr(s->info)) { | 665 | if (ctype_isptr(s->info)) { |
650 | sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR); | 666 | sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR); |
651 | if (ctype_isref(s->info)) { | 667 | if (ctype_isref(s->info)) { |
@@ -700,6 +716,19 @@ static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz) | |||
700 | return tr; | 716 | return tr; |
701 | } | 717 | } |
702 | 718 | ||
719 | /* Tailcall to function. */ | ||
720 | static void crec_tailcall(jit_State *J, RecordFFData *rd, cTValue *tv) | ||
721 | { | ||
722 | TRef kfunc = lj_ir_kfunc(J, funcV(tv)); | ||
723 | #if LJ_FR2 | ||
724 | J->base[-2] = kfunc; | ||
725 | J->base[-1] = TREF_FRAME; | ||
726 | #else | ||
727 | J->base[-1] = kfunc | TREF_FRAME; | ||
728 | #endif | ||
729 | rd->nres = -1; /* Pending tailcall. */ | ||
730 | } | ||
731 | |||
703 | /* Record ctype __index/__newindex metamethods. */ | 732 | /* Record ctype __index/__newindex metamethods. */ |
704 | static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, | 733 | static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, |
705 | RecordFFData *rd) | 734 | RecordFFData *rd) |
@@ -709,8 +738,7 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, | |||
709 | if (!tv) | 738 | if (!tv) |
710 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 739 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
711 | if (tvisfunc(tv)) { | 740 | if (tvisfunc(tv)) { |
712 | J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; | 741 | crec_tailcall(J, rd, tv); |
713 | rd->nres = -1; /* Pending tailcall. */ | ||
714 | } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) { | 742 | } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) { |
715 | /* Specialize to result of __index lookup. */ | 743 | /* Specialize to result of __index lookup. */ |
716 | cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]); | 744 | cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]); |
@@ -727,6 +755,48 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, | |||
727 | } | 755 | } |
728 | } | 756 | } |
729 | 757 | ||
758 | /* Record bitfield load/store. */ | ||
759 | static void crec_index_bf(jit_State *J, RecordFFData *rd, TRef ptr, CTInfo info) | ||
760 | { | ||
761 | IRType t = IRT_I8 + 2*lj_fls(ctype_bitcsz(info)) + ((info&CTF_UNSIGNED)?1:0); | ||
762 | TRef tr = emitir(IRT(IR_XLOAD, t), ptr, 0); | ||
763 | CTSize pos = ctype_bitpos(info), bsz = ctype_bitbsz(info), shift = 32 - bsz; | ||
764 | lj_assertJ(t <= IRT_U32, "only 32 bit bitfields supported"); /* NYI */ | ||
765 | if (rd->data == 0) { /* __index metamethod. */ | ||
766 | if ((info & CTF_BOOL)) { | ||
767 | tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << pos)))); | ||
768 | /* Assume not equal to zero. Fixup and emit pending guard later. */ | ||
769 | lj_ir_set(J, IRTGI(IR_NE), tr, lj_ir_kint(J, 0)); | ||
770 | J->postproc = LJ_POST_FIXGUARD; | ||
771 | tr = TREF_TRUE; | ||
772 | } else if (!(info & CTF_UNSIGNED)) { | ||
773 | tr = emitir(IRTI(IR_BSHL), tr, lj_ir_kint(J, shift - pos)); | ||
774 | tr = emitir(IRTI(IR_BSAR), tr, lj_ir_kint(J, shift)); | ||
775 | } else { | ||
776 | lj_assertJ(bsz < 32, "unexpected full bitfield index"); | ||
777 | tr = emitir(IRTI(IR_BSHR), tr, lj_ir_kint(J, pos)); | ||
778 | tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << bsz)-1))); | ||
779 | /* We can omit the U32 to NUM conversion, since bsz < 32. */ | ||
780 | } | ||
781 | J->base[0] = tr; | ||
782 | } else { /* __newindex metamethod. */ | ||
783 | CTState *cts = ctype_ctsG(J2G(J)); | ||
784 | CType *ct = ctype_get(cts, | ||
785 | (info & CTF_BOOL) ? CTID_BOOL : | ||
786 | (info & CTF_UNSIGNED) ? CTID_UINT32 : CTID_INT32); | ||
787 | int32_t mask = (int32_t)(((1u << bsz)-1) << pos); | ||
788 | TRef sp = crec_ct_tv(J, ct, 0, J->base[2], &rd->argv[2]); | ||
789 | sp = emitir(IRTI(IR_BSHL), sp, lj_ir_kint(J, pos)); | ||
790 | /* Use of the target type avoids forwarding conversions. */ | ||
791 | sp = emitir(IRT(IR_BAND, t), sp, lj_ir_kint(J, mask)); | ||
792 | tr = emitir(IRT(IR_BAND, t), tr, lj_ir_kint(J, (int32_t)~mask)); | ||
793 | tr = emitir(IRT(IR_BOR, t), tr, sp); | ||
794 | emitir(IRT(IR_XSTORE, t), ptr, tr); | ||
795 | rd->nres = 0; | ||
796 | J->needsnap = 1; | ||
797 | } | ||
798 | } | ||
799 | |||
730 | void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) | 800 | void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) |
731 | { | 801 | { |
732 | TRef idx, ptr = J->base[0]; | 802 | TRef idx, ptr = J->base[0]; |
@@ -801,6 +871,7 @@ again: | |||
801 | CType *fct; | 871 | CType *fct; |
802 | fct = lj_ctype_getfield(cts, ct, name, &fofs); | 872 | fct = lj_ctype_getfield(cts, ct, name, &fofs); |
803 | if (fct) { | 873 | if (fct) { |
874 | ofs += (ptrdiff_t)fofs; | ||
804 | /* Always specialize to the field name. */ | 875 | /* Always specialize to the field name. */ |
805 | emitir(IRTG(IR_EQ, IRT_STR), idx, lj_ir_kstr(J, name)); | 876 | emitir(IRTG(IR_EQ, IRT_STR), idx, lj_ir_kstr(J, name)); |
806 | if (ctype_isconstval(fct->info)) { | 877 | if (ctype_isconstval(fct->info)) { |
@@ -812,12 +883,14 @@ again: | |||
812 | J->base[0] = lj_ir_kint(J, (int32_t)fct->size); | 883 | J->base[0] = lj_ir_kint(J, (int32_t)fct->size); |
813 | return; /* Interpreter will throw for newindex. */ | 884 | return; /* Interpreter will throw for newindex. */ |
814 | } else if (ctype_isbitfield(fct->info)) { | 885 | } else if (ctype_isbitfield(fct->info)) { |
815 | lj_trace_err(J, LJ_TRERR_NYICONV); | 886 | if (ofs) |
887 | ptr = emitir(IRT(IR_ADD, IRT_PTR), ptr, lj_ir_kintp(J, ofs)); | ||
888 | crec_index_bf(J, rd, ptr, fct->info); | ||
889 | return; | ||
816 | } else { | 890 | } else { |
817 | lua_assert(ctype_isfield(fct->info)); | 891 | lj_assertJ(ctype_isfield(fct->info), "field expected"); |
818 | sid = ctype_cid(fct->info); | 892 | sid = ctype_cid(fct->info); |
819 | } | 893 | } |
820 | ofs += (ptrdiff_t)fofs; | ||
821 | } | 894 | } |
822 | } else if (ctype_iscomplex(ct->info)) { | 895 | } else if (ctype_iscomplex(ct->info)) { |
823 | if (name->len == 2 && | 896 | if (name->len == 2 && |
@@ -867,21 +940,17 @@ again: | |||
867 | } | 940 | } |
868 | 941 | ||
869 | /* Record setting a finalizer. */ | 942 | /* Record setting a finalizer. */ |
870 | static void crec_finalizer(jit_State *J, TRef trcd, cTValue *fin) | 943 | static void crec_finalizer(jit_State *J, TRef trcd, TRef trfin, cTValue *fin) |
871 | { | 944 | { |
872 | TRef trlo = lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd); | 945 | if (tvisgcv(fin)) { |
873 | TRef trhi = emitir(IRT(IR_ADD, IRT_P32), trlo, lj_ir_kint(J, 4)); | 946 | if (!trfin) trfin = lj_ir_kptr(J, gcval(fin)); |
874 | if (LJ_BE) { TRef tmp = trlo; trlo = trhi; trhi = tmp; } | 947 | } else if (tvisnil(fin)) { |
875 | if (tvisfunc(fin)) { | 948 | trfin = lj_ir_kptr(J, NULL); |
876 | emitir(IRT(IR_XSTORE, IRT_P32), trlo, lj_ir_kfunc(J, funcV(fin))); | ||
877 | emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TFUNC)); | ||
878 | } else if (tviscdata(fin)) { | ||
879 | emitir(IRT(IR_XSTORE, IRT_P32), trlo, | ||
880 | lj_ir_kgc(J, obj2gco(cdataV(fin)), IRT_CDATA)); | ||
881 | emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TCDATA)); | ||
882 | } else { | 949 | } else { |
883 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 950 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
884 | } | 951 | } |
952 | lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd, | ||
953 | trfin, lj_ir_kint(J, (int32_t)itype(fin))); | ||
885 | J->needsnap = 1; | 954 | J->needsnap = 1; |
886 | } | 955 | } |
887 | 956 | ||
@@ -892,10 +961,8 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) | |||
892 | CTSize sz; | 961 | CTSize sz; |
893 | CTInfo info = lj_ctype_info(cts, id, &sz); | 962 | CTInfo info = lj_ctype_info(cts, id, &sz); |
894 | CType *d = ctype_raw(cts, id); | 963 | CType *d = ctype_raw(cts, id); |
895 | TRef trid; | 964 | TRef trcd, trid = lj_ir_kint(J, id); |
896 | if (!sz || sz > 128 || (info & CTF_VLA) || ctype_align(info) > CT_MEMALIGN) | 965 | cTValue *fin; |
897 | lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: large/special allocations. */ | ||
898 | trid = lj_ir_kint(J, id); | ||
899 | /* Use special instruction to box pointer or 32/64 bit integer. */ | 966 | /* Use special instruction to box pointer or 32/64 bit integer. */ |
900 | if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) { | 967 | if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) { |
901 | TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) : | 968 | TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) : |
@@ -903,11 +970,36 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) | |||
903 | sz == 4 ? lj_ir_kint(J, 0) : | 970 | sz == 4 ? lj_ir_kint(J, 0) : |
904 | (lj_needsplit(J), lj_ir_kint64(J, 0)); | 971 | (lj_needsplit(J), lj_ir_kint64(J, 0)); |
905 | J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp); | 972 | J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp); |
973 | return; | ||
906 | } else { | 974 | } else { |
907 | TRef trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, TREF_NIL); | 975 | TRef trsz = TREF_NIL; |
908 | cTValue *fin; | 976 | if ((info & CTF_VLA)) { /* Calculate VLA/VLS size at runtime. */ |
909 | J->base[0] = trcd; | 977 | CTSize sz0, sz1; |
910 | if (J->base[1] && !J->base[2] && | 978 | if (!J->base[1] || J->base[2]) |
979 | lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init VLA/VLS. */ | ||
980 | trsz = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0, | ||
981 | J->base[1], &rd->argv[1]); | ||
982 | sz0 = lj_ctype_vlsize(cts, d, 0); | ||
983 | sz1 = lj_ctype_vlsize(cts, d, 1); | ||
984 | trsz = emitir(IRTGI(IR_MULOV), trsz, lj_ir_kint(J, (int32_t)(sz1-sz0))); | ||
985 | trsz = emitir(IRTGI(IR_ADDOV), trsz, lj_ir_kint(J, (int32_t)sz0)); | ||
986 | J->base[1] = 0; /* Simplify logic below. */ | ||
987 | } else if (ctype_align(info) > CT_MEMALIGN) { | ||
988 | trsz = lj_ir_kint(J, sz); | ||
989 | } | ||
990 | trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, trsz); | ||
991 | if (sz > 128 || (info & CTF_VLA)) { | ||
992 | TRef dp; | ||
993 | CTSize align; | ||
994 | special: /* Only handle bulk zero-fill for large/VLA/VLS types. */ | ||
995 | if (J->base[1]) | ||
996 | lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init large/VLA/VLS types. */ | ||
997 | dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, sizeof(GCcdata))); | ||
998 | if (trsz == TREF_NIL) trsz = lj_ir_kint(J, sz); | ||
999 | align = ctype_align(info); | ||
1000 | if (align < CT_MEMALIGN) align = CT_MEMALIGN; | ||
1001 | crec_fill(J, dp, trsz, lj_ir_kint(J, 0), (1u << align)); | ||
1002 | } else if (J->base[1] && !J->base[2] && | ||
911 | !lj_cconv_multi_init(cts, d, &rd->argv[1])) { | 1003 | !lj_cconv_multi_init(cts, d, &rd->argv[1])) { |
912 | goto single_init; | 1004 | goto single_init; |
913 | } else if (ctype_isarray(d->info)) { | 1005 | } else if (ctype_isarray(d->info)) { |
@@ -918,8 +1010,9 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) | |||
918 | TValue *sval = &tv; | 1010 | TValue *sval = &tv; |
919 | MSize i; | 1011 | MSize i; |
920 | tv.u64 = 0; | 1012 | tv.u64 = 0; |
921 | if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) | 1013 | if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)) || |
922 | lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init array of aggregates. */ | 1014 | esize * CREC_FILL_MAXUNROLL < sz) |
1015 | goto special; | ||
923 | for (i = 1, ofs = 0; ofs < sz; ofs += esize) { | 1016 | for (i = 1, ofs = 0; ofs < sz; ofs += esize) { |
924 | TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, | 1017 | TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, |
925 | lj_ir_kintp(J, ofs + sizeof(GCcdata))); | 1018 | lj_ir_kintp(J, ofs + sizeof(GCcdata))); |
@@ -933,8 +1026,26 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) | |||
933 | crec_ct_tv(J, dc, dp, sp, sval); | 1026 | crec_ct_tv(J, dc, dp, sp, sval); |
934 | } | 1027 | } |
935 | } else if (ctype_isstruct(d->info)) { | 1028 | } else if (ctype_isstruct(d->info)) { |
936 | CTypeID fid = d->sib; | 1029 | CTypeID fid; |
937 | MSize i = 1; | 1030 | MSize i = 1; |
1031 | if (!J->base[1]) { /* Handle zero-fill of struct-of-NYI. */ | ||
1032 | fid = d->sib; | ||
1033 | while (fid) { | ||
1034 | CType *df = ctype_get(cts, fid); | ||
1035 | fid = df->sib; | ||
1036 | if (ctype_isfield(df->info)) { | ||
1037 | CType *dc; | ||
1038 | if (!gcref(df->name)) continue; /* Ignore unnamed fields. */ | ||
1039 | dc = ctype_rawchild(cts, df); /* Field type. */ | ||
1040 | if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info) || | ||
1041 | ctype_isenum(dc->info))) | ||
1042 | goto special; | ||
1043 | } else if (!ctype_isconstval(df->info)) { | ||
1044 | goto special; | ||
1045 | } | ||
1046 | } | ||
1047 | } | ||
1048 | fid = d->sib; | ||
938 | while (fid) { | 1049 | while (fid) { |
939 | CType *df = ctype_get(cts, fid); | 1050 | CType *df = ctype_get(cts, fid); |
940 | fid = df->sib; | 1051 | fid = df->sib; |
@@ -981,11 +1092,12 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) | |||
981 | crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv); | 1092 | crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv); |
982 | } | 1093 | } |
983 | } | 1094 | } |
984 | /* Handle __gc metamethod. */ | ||
985 | fin = lj_ctype_meta(cts, id, MM_gc); | ||
986 | if (fin) | ||
987 | crec_finalizer(J, trcd, fin); | ||
988 | } | 1095 | } |
1096 | J->base[0] = trcd; | ||
1097 | /* Handle __gc metamethod. */ | ||
1098 | fin = lj_ctype_meta(cts, id, MM_gc); | ||
1099 | if (fin) | ||
1100 | crec_finalizer(J, trcd, 0, fin); | ||
989 | } | 1101 | } |
990 | 1102 | ||
991 | /* Record argument conversions. */ | 1103 | /* Record argument conversions. */ |
@@ -1026,7 +1138,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, | |||
1026 | if (fid) { /* Get argument type from field. */ | 1138 | if (fid) { /* Get argument type from field. */ |
1027 | CType *ctf = ctype_get(cts, fid); | 1139 | CType *ctf = ctype_get(cts, fid); |
1028 | fid = ctf->sib; | 1140 | fid = ctf->sib; |
1029 | lua_assert(ctype_isfield(ctf->info)); | 1141 | lj_assertJ(ctype_isfield(ctf->info), "field expected"); |
1030 | did = ctype_cid(ctf->info); | 1142 | did = ctype_cid(ctf->info); |
1031 | } else { | 1143 | } else { |
1032 | if (!(ct->info & CTF_VARARG)) | 1144 | if (!(ct->info & CTF_VARARG)) |
@@ -1045,7 +1157,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, | |||
1045 | else | 1157 | else |
1046 | tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT); | 1158 | tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT); |
1047 | } | 1159 | } |
1048 | } else if (LJ_SOFTFP && ctype_isfp(d->info) && d->size > 4) { | 1160 | } else if (LJ_SOFTFP32 && ctype_isfp(d->info) && d->size > 4) { |
1049 | lj_needsplit(J); | 1161 | lj_needsplit(J); |
1050 | } | 1162 | } |
1051 | #if LJ_TARGET_X86 | 1163 | #if LJ_TARGET_X86 |
@@ -1091,20 +1203,20 @@ static void crec_snap_caller(jit_State *J) | |||
1091 | lua_State *L = J->L; | 1203 | lua_State *L = J->L; |
1092 | TValue *base = L->base, *top = L->top; | 1204 | TValue *base = L->base, *top = L->top; |
1093 | const BCIns *pc = J->pc; | 1205 | const BCIns *pc = J->pc; |
1094 | TRef ftr = J->base[-1]; | 1206 | TRef ftr = J->base[-1-LJ_FR2]; |
1095 | ptrdiff_t delta; | 1207 | ptrdiff_t delta; |
1096 | if (!frame_islua(base-1) || J->framedepth <= 0) | 1208 | if (!frame_islua(base-1) || J->framedepth <= 0) |
1097 | lj_trace_err(J, LJ_TRERR_NYICALL); | 1209 | lj_trace_err(J, LJ_TRERR_NYICALL); |
1098 | J->pc = frame_pc(base-1); delta = 1+bc_a(J->pc[-1]); | 1210 | J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]); |
1099 | L->top = base; L->base = base - delta; | 1211 | L->top = base; L->base = base - delta; |
1100 | J->base[-1] = TREF_FALSE; | 1212 | J->base[-1-LJ_FR2] = TREF_FALSE; |
1101 | J->base -= delta; J->baseslot -= (BCReg)delta; | 1213 | J->base -= delta; J->baseslot -= (BCReg)delta; |
1102 | J->maxslot = (BCReg)delta; J->framedepth--; | 1214 | J->maxslot = (BCReg)delta-LJ_FR2; J->framedepth--; |
1103 | lj_snap_add(J); | 1215 | lj_snap_add(J); |
1104 | L->base = base; L->top = top; | 1216 | L->base = base; L->top = top; |
1105 | J->framedepth++; J->maxslot = 1; | 1217 | J->framedepth++; J->maxslot = 1; |
1106 | J->base += delta; J->baseslot += (BCReg)delta; | 1218 | J->base += delta; J->baseslot += (BCReg)delta; |
1107 | J->base[-1] = ftr; J->pc = pc; | 1219 | J->base[-1-LJ_FR2] = ftr; J->pc = pc; |
1108 | } | 1220 | } |
1109 | 1221 | ||
1110 | /* Record function call. */ | 1222 | /* Record function call. */ |
@@ -1124,8 +1236,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) | |||
1124 | TRef tr; | 1236 | TRef tr; |
1125 | TValue tv; | 1237 | TValue tv; |
1126 | /* Check for blacklisted C functions that might call a callback. */ | 1238 | /* Check for blacklisted C functions that might call a callback. */ |
1127 | setlightudV(&tv, | 1239 | tv.u64 = ((uintptr_t)cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4) >> 2) | U64x(800000000, 00000000); |
1128 | cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4)); | ||
1129 | if (tvistrue(lj_tab_get(J->L, cts->miscmap, &tv))) | 1240 | if (tvistrue(lj_tab_get(J->L, cts->miscmap, &tv))) |
1130 | lj_trace_err(J, LJ_TRERR_BLACKL); | 1241 | lj_trace_err(J, LJ_TRERR_BLACKL); |
1131 | if (ctype_isvoid(ctr->info)) { | 1242 | if (ctype_isvoid(ctr->info)) { |
@@ -1196,8 +1307,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd) | |||
1196 | tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm); | 1307 | tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm); |
1197 | if (tv) { | 1308 | if (tv) { |
1198 | if (tvisfunc(tv)) { | 1309 | if (tvisfunc(tv)) { |
1199 | J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; | 1310 | crec_tailcall(J, rd, tv); |
1200 | rd->nres = -1; /* Pending tailcall. */ | ||
1201 | return; | 1311 | return; |
1202 | } | 1312 | } |
1203 | } else if (mm == MM_new) { | 1313 | } else if (mm == MM_new) { |
@@ -1238,7 +1348,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm) | |||
1238 | for (i = 0; i < 2; i++) { | 1348 | for (i = 0; i < 2; i++) { |
1239 | IRType st = tref_type(sp[i]); | 1349 | IRType st = tref_type(sp[i]); |
1240 | if (st == IRT_NUM || st == IRT_FLOAT) | 1350 | if (st == IRT_NUM || st == IRT_FLOAT) |
1241 | sp[i] = emitconv(sp[i], dt, st, IRCONV_TRUNC|IRCONV_ANY); | 1351 | sp[i] = emitconv(sp[i], dt, st, IRCONV_ANY); |
1242 | else if (!(st == IRT_I64 || st == IRT_U64)) | 1352 | else if (!(st == IRT_I64 || st == IRT_U64)) |
1243 | sp[i] = emitconv(sp[i], dt, IRT_INT, | 1353 | sp[i] = emitconv(sp[i], dt, IRT_INT, |
1244 | (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); | 1354 | (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); |
@@ -1307,15 +1417,14 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm) | |||
1307 | CTypeID id; | 1417 | CTypeID id; |
1308 | #if LJ_64 | 1418 | #if LJ_64 |
1309 | if (t == IRT_NUM || t == IRT_FLOAT) | 1419 | if (t == IRT_NUM || t == IRT_FLOAT) |
1310 | tr = emitconv(tr, IRT_INTP, t, IRCONV_TRUNC|IRCONV_ANY); | 1420 | tr = emitconv(tr, IRT_INTP, t, IRCONV_ANY); |
1311 | else if (!(t == IRT_I64 || t == IRT_U64)) | 1421 | else if (!(t == IRT_I64 || t == IRT_U64)) |
1312 | tr = emitconv(tr, IRT_INTP, IRT_INT, | 1422 | tr = emitconv(tr, IRT_INTP, IRT_INT, |
1313 | ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); | 1423 | ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); |
1314 | #else | 1424 | #else |
1315 | if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) { | 1425 | if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) { |
1316 | tr = emitconv(tr, IRT_INTP, t, | 1426 | tr = emitconv(tr, IRT_INTP, t, |
1317 | (t == IRT_NUM || t == IRT_FLOAT) ? | 1427 | (t == IRT_NUM || t == IRT_FLOAT) ? IRCONV_ANY : 0); |
1318 | IRCONV_TRUNC|IRCONV_ANY : 0); | ||
1319 | } | 1428 | } |
1320 | #endif | 1429 | #endif |
1321 | tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); | 1430 | tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); |
@@ -1347,8 +1456,7 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts, | |||
1347 | } | 1456 | } |
1348 | if (tv) { | 1457 | if (tv) { |
1349 | if (tvisfunc(tv)) { | 1458 | if (tvisfunc(tv)) { |
1350 | J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; | 1459 | crec_tailcall(J, rd, tv); |
1351 | rd->nres = -1; /* Pending tailcall. */ | ||
1352 | return 0; | 1460 | return 0; |
1353 | } /* NYI: non-function metamethods. */ | 1461 | } /* NYI: non-function metamethods. */ |
1354 | } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */ | 1462 | } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */ |
@@ -1460,8 +1568,7 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd) | |||
1460 | !irt_isguard(J->guardemit)) { | 1568 | !irt_isguard(J->guardemit)) { |
1461 | const BCIns *pc = frame_contpc(J->L->base-1) - 1; | 1569 | const BCIns *pc = frame_contpc(J->L->base-1) - 1; |
1462 | if (bc_op(*pc) <= BC_ISNEP) { | 1570 | if (bc_op(*pc) <= BC_ISNEP) { |
1463 | setframe_pc(&J2G(J)->tmptv, pc); | 1571 | J2G(J)->tmptv.u64 = (uint64_t)(uintptr_t)pc; |
1464 | J2G(J)->tmptv.u32.lo = ((tref_istrue(tr) ^ bc_op(*pc)) & 1); | ||
1465 | J->postproc = LJ_POST_FIXCOMP; | 1572 | J->postproc = LJ_POST_FIXCOMP; |
1466 | } | 1573 | } |
1467 | } | 1574 | } |
@@ -1650,7 +1757,139 @@ void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd) | |||
1650 | void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd) | 1757 | void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd) |
1651 | { | 1758 | { |
1652 | argv2cdata(J, J->base[0], &rd->argv[0]); | 1759 | argv2cdata(J, J->base[0], &rd->argv[0]); |
1653 | crec_finalizer(J, J->base[0], &rd->argv[1]); | 1760 | if (!J->base[1]) |
1761 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
1762 | crec_finalizer(J, J->base[0], J->base[1], &rd->argv[1]); | ||
1763 | } | ||
1764 | |||
1765 | /* -- 64 bit bit.* library functions -------------------------------------- */ | ||
1766 | |||
1767 | /* Determine bit operation type from argument type. */ | ||
1768 | static CTypeID crec_bit64_type(CTState *cts, cTValue *tv) | ||
1769 | { | ||
1770 | if (tviscdata(tv)) { | ||
1771 | CType *ct = lj_ctype_rawref(cts, cdataV(tv)->ctypeid); | ||
1772 | if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct); | ||
1773 | if ((ct->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) == | ||
1774 | CTINFO(CT_NUM, CTF_UNSIGNED) && ct->size == 8) | ||
1775 | return CTID_UINT64; /* Use uint64_t, since it has the highest rank. */ | ||
1776 | return CTID_INT64; /* Otherwise use int64_t. */ | ||
1777 | } | ||
1778 | return 0; /* Use regular 32 bit ops. */ | ||
1779 | } | ||
1780 | |||
1781 | void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd) | ||
1782 | { | ||
1783 | CTState *cts = ctype_ctsG(J2G(J)); | ||
1784 | TRef tr = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0, | ||
1785 | J->base[0], &rd->argv[0]); | ||
1786 | if (!tref_isinteger(tr)) | ||
1787 | tr = emitconv(tr, IRT_INT, tref_type(tr), 0); | ||
1788 | J->base[0] = tr; | ||
1789 | } | ||
1790 | |||
1791 | int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd) | ||
1792 | { | ||
1793 | CTState *cts = ctype_ctsG(J2G(J)); | ||
1794 | CTypeID id = crec_bit64_type(cts, &rd->argv[0]); | ||
1795 | if (id) { | ||
1796 | TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]); | ||
1797 | tr = emitir(IRT(rd->data, id-CTID_INT64+IRT_I64), tr, 0); | ||
1798 | J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); | ||
1799 | return 1; | ||
1800 | } | ||
1801 | return 0; | ||
1802 | } | ||
1803 | |||
1804 | int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd) | ||
1805 | { | ||
1806 | CTState *cts = ctype_ctsG(J2G(J)); | ||
1807 | CTypeID id = 0; | ||
1808 | MSize i; | ||
1809 | for (i = 0; J->base[i] != 0; i++) { | ||
1810 | CTypeID aid = crec_bit64_type(cts, &rd->argv[i]); | ||
1811 | if (id < aid) id = aid; /* Determine highest type rank of all arguments. */ | ||
1812 | } | ||
1813 | if (id) { | ||
1814 | CType *ct = ctype_get(cts, id); | ||
1815 | uint32_t ot = IRT(rd->data, id-CTID_INT64+IRT_I64); | ||
1816 | TRef tr = crec_ct_tv(J, ct, 0, J->base[0], &rd->argv[0]); | ||
1817 | for (i = 1; J->base[i] != 0; i++) { | ||
1818 | TRef tr2 = crec_ct_tv(J, ct, 0, J->base[i], &rd->argv[i]); | ||
1819 | tr = emitir(ot, tr, tr2); | ||
1820 | } | ||
1821 | J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); | ||
1822 | return 1; | ||
1823 | } | ||
1824 | return 0; | ||
1825 | } | ||
1826 | |||
1827 | int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd) | ||
1828 | { | ||
1829 | CTState *cts = ctype_ctsG(J2G(J)); | ||
1830 | CTypeID id; | ||
1831 | TRef tsh = 0; | ||
1832 | if (J->base[0] && tref_iscdata(J->base[1])) { | ||
1833 | tsh = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0, | ||
1834 | J->base[1], &rd->argv[1]); | ||
1835 | if (!tref_isinteger(tsh)) | ||
1836 | tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0); | ||
1837 | J->base[1] = tsh; | ||
1838 | } | ||
1839 | id = crec_bit64_type(cts, &rd->argv[0]); | ||
1840 | if (id) { | ||
1841 | TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]); | ||
1842 | uint32_t op = rd->data; | ||
1843 | if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]); | ||
1844 | if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && | ||
1845 | !tref_isk(tsh)) | ||
1846 | tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63)); | ||
1847 | #ifdef LJ_TARGET_UNIFYROT | ||
1848 | if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { | ||
1849 | op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; | ||
1850 | tsh = emitir(IRTI(IR_NEG), tsh, tsh); | ||
1851 | } | ||
1852 | #endif | ||
1853 | tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh); | ||
1854 | J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); | ||
1855 | return 1; | ||
1856 | } | ||
1857 | return 0; | ||
1858 | } | ||
1859 | |||
1860 | TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr) | ||
1861 | { | ||
1862 | CTState *cts = ctype_ctsG(J2G(J)); | ||
1863 | CTypeID id = crec_bit64_type(cts, &rd->argv[0]); | ||
1864 | TRef tr, trsf = J->base[1]; | ||
1865 | SFormat sf = (STRFMT_UINT|STRFMT_T_HEX); | ||
1866 | int32_t n; | ||
1867 | if (trsf) { | ||
1868 | CTypeID id2 = 0; | ||
1869 | n = (int32_t)lj_carith_check64(J->L, 2, &id2); | ||
1870 | if (id2) | ||
1871 | trsf = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0, trsf, &rd->argv[1]); | ||
1872 | else | ||
1873 | trsf = lj_opt_narrow_tobit(J, trsf); | ||
1874 | emitir(IRTGI(IR_EQ), trsf, lj_ir_kint(J, n)); /* Specialize to n. */ | ||
1875 | } else { | ||
1876 | n = id ? 16 : 8; | ||
1877 | } | ||
1878 | if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; } | ||
1879 | sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC); | ||
1880 | if (id) { | ||
1881 | tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]); | ||
1882 | if (n < 16) | ||
1883 | tr = emitir(IRT(IR_BAND, IRT_U64), tr, | ||
1884 | lj_ir_kint64(J, ((uint64_t)1 << 4*n)-1)); | ||
1885 | } else { | ||
1886 | tr = lj_opt_narrow_tobit(J, J->base[0]); | ||
1887 | if (n < 8) | ||
1888 | tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << 4*n)-1))); | ||
1889 | tr = emitconv(tr, IRT_U64, IRT_INT, 0); /* No sign-extension. */ | ||
1890 | lj_needsplit(J); | ||
1891 | } | ||
1892 | return lj_ir_call(J, IRCALL_lj_strfmt_putfxint, hdr, lj_ir_kint(J, sf), tr); | ||
1654 | } | 1893 | } |
1655 | 1894 | ||
1656 | /* -- Miscellaneous library functions ------------------------------------- */ | 1895 | /* -- Miscellaneous library functions ------------------------------------- */ |
@@ -1674,6 +1913,30 @@ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd) | |||
1674 | } | 1913 | } |
1675 | } | 1914 | } |
1676 | 1915 | ||
1916 | TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o) | ||
1917 | { | ||
1918 | CTypeID id = argv2cdata(J, tr, o)->ctypeid; | ||
1919 | if (!(id == CTID_INT64 || id == CTID_UINT64)) | ||
1920 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
1921 | lj_needsplit(J); | ||
1922 | return emitir(IRT(IR_FLOAD, id == CTID_INT64 ? IRT_I64 : IRT_U64), tr, | ||
1923 | IRFL_CDATA_INT64); | ||
1924 | } | ||
1925 | |||
1926 | #if LJ_HASBUFFER | ||
1927 | TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o) | ||
1928 | { | ||
1929 | CTState *cts = ctype_ctsG(J2G(J)); | ||
1930 | if (!tref_iscdata(tr)) lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
1931 | return crec_ct_tv(J, ctype_get(cts, CTID_P_CVOID), 0, tr, o); | ||
1932 | } | ||
1933 | |||
1934 | TRef lj_crecord_topuint8(jit_State *J, TRef tr) | ||
1935 | { | ||
1936 | return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, CTID_P_UINT8), tr); | ||
1937 | } | ||
1938 | #endif | ||
1939 | |||
1677 | #undef IR | 1940 | #undef IR |
1678 | #undef emitir | 1941 | #undef emitir |
1679 | #undef emitconv | 1942 | #undef emitconv |
diff --git a/src/lj_crecord.h b/src/lj_crecord.h index 513ded7b..2c8cf05c 100644 --- a/src/lj_crecord.h +++ b/src/lj_crecord.h | |||
@@ -25,7 +25,19 @@ LJ_FUNC void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd); | |||
25 | LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd); | 25 | LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd); |
26 | LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd); | 26 | LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd); |
27 | LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd); | 27 | LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd); |
28 | |||
29 | LJ_FUNC void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd); | ||
30 | LJ_FUNC int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd); | ||
31 | LJ_FUNC int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd); | ||
32 | LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd); | ||
33 | LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr); | ||
34 | |||
28 | LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd); | 35 | LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd); |
36 | LJ_FUNC TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o); | ||
37 | #if LJ_HASBUFFER | ||
38 | LJ_FUNC TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o); | ||
39 | LJ_FUNC TRef lj_crecord_topuint8(jit_State *J, TRef tr); | ||
40 | #endif | ||
29 | #endif | 41 | #endif |
30 | 42 | ||
31 | #endif | 43 | #endif |
diff --git a/src/lj_ctype.c b/src/lj_ctype.c index 68edb287..4655eee7 100644 --- a/src/lj_ctype.c +++ b/src/lj_ctype.c | |||
@@ -11,8 +11,10 @@ | |||
11 | #include "lj_err.h" | 11 | #include "lj_err.h" |
12 | #include "lj_str.h" | 12 | #include "lj_str.h" |
13 | #include "lj_tab.h" | 13 | #include "lj_tab.h" |
14 | #include "lj_strfmt.h" | ||
14 | #include "lj_ctype.h" | 15 | #include "lj_ctype.h" |
15 | #include "lj_ccallback.h" | 16 | #include "lj_ccallback.h" |
17 | #include "lj_buf.h" | ||
16 | 18 | ||
17 | /* -- C type definitions -------------------------------------------------- */ | 19 | /* -- C type definitions -------------------------------------------------- */ |
18 | 20 | ||
@@ -37,6 +39,8 @@ | |||
37 | _("uint64_t", UINT64) \ | 39 | _("uint64_t", UINT64) \ |
38 | _("intptr_t", INT_PSZ) \ | 40 | _("intptr_t", INT_PSZ) \ |
39 | _("uintptr_t", UINT_PSZ) \ | 41 | _("uintptr_t", UINT_PSZ) \ |
42 | /* From POSIX. */ \ | ||
43 | _("ssize_t", INT_PSZ) \ | ||
40 | /* End of typedef list. */ | 44 | /* End of typedef list. */ |
41 | 45 | ||
42 | /* Keywords (only the ones we actually care for). */ | 46 | /* Keywords (only the ones we actually care for). */ |
@@ -149,7 +153,7 @@ CTypeID lj_ctype_new(CTState *cts, CType **ctp) | |||
149 | { | 153 | { |
150 | CTypeID id = cts->top; | 154 | CTypeID id = cts->top; |
151 | CType *ct; | 155 | CType *ct; |
152 | lua_assert(cts->L); | 156 | lj_assertCTS(cts->L, "uninitialized cts->L"); |
153 | if (LJ_UNLIKELY(id >= cts->sizetab)) { | 157 | if (LJ_UNLIKELY(id >= cts->sizetab)) { |
154 | if (id >= CTID_MAX) lj_err_msg(cts->L, LJ_ERR_TABOV); | 158 | if (id >= CTID_MAX) lj_err_msg(cts->L, LJ_ERR_TABOV); |
155 | #ifdef LUAJIT_CTYPE_CHECK_ANCHOR | 159 | #ifdef LUAJIT_CTYPE_CHECK_ANCHOR |
@@ -178,7 +182,7 @@ CTypeID lj_ctype_intern(CTState *cts, CTInfo info, CTSize size) | |||
178 | { | 182 | { |
179 | uint32_t h = ct_hashtype(info, size); | 183 | uint32_t h = ct_hashtype(info, size); |
180 | CTypeID id = cts->hash[h]; | 184 | CTypeID id = cts->hash[h]; |
181 | lua_assert(cts->L); | 185 | lj_assertCTS(cts->L, "uninitialized cts->L"); |
182 | while (id) { | 186 | while (id) { |
183 | CType *ct = ctype_get(cts, id); | 187 | CType *ct = ctype_get(cts, id); |
184 | if (ct->info == info && ct->size == size) | 188 | if (ct->info == info && ct->size == size) |
@@ -294,9 +298,9 @@ CTSize lj_ctype_vlsize(CTState *cts, CType *ct, CTSize nelem) | |||
294 | } | 298 | } |
295 | ct = ctype_raw(cts, arrid); | 299 | ct = ctype_raw(cts, arrid); |
296 | } | 300 | } |
297 | lua_assert(ctype_isvlarray(ct->info)); /* Must be a VLA. */ | 301 | lj_assertCTS(ctype_isvlarray(ct->info), "VLA expected"); |
298 | ct = ctype_rawchild(cts, ct); /* Get array element. */ | 302 | ct = ctype_rawchild(cts, ct); /* Get array element. */ |
299 | lua_assert(ctype_hassize(ct->info)); | 303 | lj_assertCTS(ctype_hassize(ct->info), "bad VLA without size"); |
300 | /* Calculate actual size of VLA and check for overflow. */ | 304 | /* Calculate actual size of VLA and check for overflow. */ |
301 | xsz += (uint64_t)ct->size * nelem; | 305 | xsz += (uint64_t)ct->size * nelem; |
302 | return xsz < 0x80000000u ? (CTSize)xsz : CTSIZE_INVALID; | 306 | return xsz < 0x80000000u ? (CTSize)xsz : CTSIZE_INVALID; |
@@ -319,7 +323,8 @@ CTInfo lj_ctype_info(CTState *cts, CTypeID id, CTSize *szp) | |||
319 | } else { | 323 | } else { |
320 | if (!(qual & CTFP_ALIGNED)) qual |= (info & CTF_ALIGN); | 324 | if (!(qual & CTFP_ALIGNED)) qual |= (info & CTF_ALIGN); |
321 | qual |= (info & ~(CTF_ALIGN|CTMASK_CID)); | 325 | qual |= (info & ~(CTF_ALIGN|CTMASK_CID)); |
322 | lua_assert(ctype_hassize(info) || ctype_isfunc(info)); | 326 | lj_assertCTS(ctype_hassize(info) || ctype_isfunc(info), |
327 | "ctype without size"); | ||
323 | *szp = ctype_isfunc(info) ? CTSIZE_INVALID : ct->size; | 328 | *szp = ctype_isfunc(info) ? CTSIZE_INVALID : ct->size; |
324 | break; | 329 | break; |
325 | } | 330 | } |
@@ -524,7 +529,7 @@ static void ctype_repr(CTRepr *ctr, CTypeID id) | |||
524 | ctype_appc(ctr, ')'); | 529 | ctype_appc(ctr, ')'); |
525 | break; | 530 | break; |
526 | default: | 531 | default: |
527 | lua_assert(0); | 532 | lj_assertG_(ctr->cts->g, 0, "bad ctype %08x", info); |
528 | break; | 533 | break; |
529 | } | 534 | } |
530 | ct = ctype_get(ctr->cts, ctype_cid(info)); | 535 | ct = ctype_get(ctr->cts, ctype_cid(info)); |
@@ -568,19 +573,18 @@ GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned) | |||
568 | /* Convert complex to string with 'i' or 'I' suffix. */ | 573 | /* Convert complex to string with 'i' or 'I' suffix. */ |
569 | GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size) | 574 | GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size) |
570 | { | 575 | { |
571 | char buf[2*LJ_STR_NUMBUF+2+1]; | 576 | SBuf *sb = lj_buf_tmp_(L); |
572 | TValue re, im; | 577 | TValue re, im; |
573 | size_t len; | ||
574 | if (size == 2*sizeof(double)) { | 578 | if (size == 2*sizeof(double)) { |
575 | re.n = *(double *)sp; im.n = ((double *)sp)[1]; | 579 | re.n = *(double *)sp; im.n = ((double *)sp)[1]; |
576 | } else { | 580 | } else { |
577 | re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1]; | 581 | re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1]; |
578 | } | 582 | } |
579 | len = lj_str_bufnum(buf, &re); | 583 | lj_strfmt_putfnum(sb, STRFMT_G14, re.n); |
580 | if (!(im.u32.hi & 0x80000000u) || im.n != im.n) buf[len++] = '+'; | 584 | if (!(im.u32.hi & 0x80000000u) || im.n != im.n) lj_buf_putchar(sb, '+'); |
581 | len += lj_str_bufnum(buf+len, &im); | 585 | lj_strfmt_putfnum(sb, STRFMT_G14, im.n); |
582 | buf[len] = buf[len-1] >= 'a' ? 'I' : 'i'; | 586 | lj_buf_putchar(sb, sb->w[-1] >= 'a' ? 'I' : 'i'); |
583 | return lj_str_new(L, buf, len+1); | 587 | return lj_buf_str(L, sb); |
584 | } | 588 | } |
585 | 589 | ||
586 | /* -- C type state -------------------------------------------------------- */ | 590 | /* -- C type state -------------------------------------------------------- */ |
diff --git a/src/lj_ctype.h b/src/lj_ctype.h index 77551e76..2473b57e 100644 --- a/src/lj_ctype.h +++ b/src/lj_ctype.h | |||
@@ -260,10 +260,16 @@ typedef struct CTState { | |||
260 | 260 | ||
261 | #define CT_MEMALIGN 3 /* Alignment guaranteed by memory allocator. */ | 261 | #define CT_MEMALIGN 3 /* Alignment guaranteed by memory allocator. */ |
262 | 262 | ||
263 | #ifdef LUA_USE_ASSERT | ||
264 | #define lj_assertCTS(c, ...) (lj_assertG_(cts->g, (c), __VA_ARGS__)) | ||
265 | #else | ||
266 | #define lj_assertCTS(c, ...) ((void)cts) | ||
267 | #endif | ||
268 | |||
263 | /* -- Predefined types ---------------------------------------------------- */ | 269 | /* -- Predefined types ---------------------------------------------------- */ |
264 | 270 | ||
265 | /* Target-dependent types. */ | 271 | /* Target-dependent types. */ |
266 | #if LJ_TARGET_PPC || LJ_TARGET_PPCSPE | 272 | #if LJ_TARGET_PPC |
267 | #define CTTYDEFP(_) \ | 273 | #define CTTYDEFP(_) \ |
268 | _(LINT32, 4, CT_NUM, CTF_LONG|CTALIGN(2)) | 274 | _(LINT32, 4, CT_NUM, CTF_LONG|CTALIGN(2)) |
269 | #else | 275 | #else |
@@ -292,6 +298,7 @@ typedef struct CTState { | |||
292 | _(P_VOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_VOID) \ | 298 | _(P_VOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_VOID) \ |
293 | _(P_CVOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CVOID) \ | 299 | _(P_CVOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CVOID) \ |
294 | _(P_CCHAR, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CCHAR) \ | 300 | _(P_CCHAR, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CCHAR) \ |
301 | _(P_UINT8, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_UINT8) \ | ||
295 | _(A_CCHAR, -1, CT_ARRAY, CTF_CONST|CTALIGN(0)|CTID_CCHAR) \ | 302 | _(A_CCHAR, -1, CT_ARRAY, CTF_CONST|CTALIGN(0)|CTID_CCHAR) \ |
296 | _(CTYPEID, 4, CT_ENUM, CTALIGN(2)|CTID_INT32) \ | 303 | _(CTYPEID, 4, CT_ENUM, CTALIGN(2)|CTID_INT32) \ |
297 | CTTYDEFP(_) \ | 304 | CTTYDEFP(_) \ |
@@ -383,6 +390,16 @@ static LJ_AINLINE CTState *ctype_cts(lua_State *L) | |||
383 | return cts; | 390 | return cts; |
384 | } | 391 | } |
385 | 392 | ||
393 | /* Load FFI library on-demand. */ | ||
394 | #define ctype_loadffi(L) \ | ||
395 | do { \ | ||
396 | if (!ctype_ctsG(G(L))) { \ | ||
397 | ptrdiff_t oldtop = (char *)L->top - mref(L->stack, char); \ | ||
398 | luaopen_ffi(L); \ | ||
399 | L->top = (TValue *)(mref(L->stack, char) + oldtop); \ | ||
400 | } \ | ||
401 | } while (0) | ||
402 | |||
386 | /* Save and restore state of C type table. */ | 403 | /* Save and restore state of C type table. */ |
387 | #define LJ_CTYPE_SAVE(cts) CTState savects_ = *(cts) | 404 | #define LJ_CTYPE_SAVE(cts) CTState savects_ = *(cts) |
388 | #define LJ_CTYPE_RESTORE(cts) \ | 405 | #define LJ_CTYPE_RESTORE(cts) \ |
@@ -392,7 +409,8 @@ static LJ_AINLINE CTState *ctype_cts(lua_State *L) | |||
392 | /* Check C type ID for validity when assertions are enabled. */ | 409 | /* Check C type ID for validity when assertions are enabled. */ |
393 | static LJ_AINLINE CTypeID ctype_check(CTState *cts, CTypeID id) | 410 | static LJ_AINLINE CTypeID ctype_check(CTState *cts, CTypeID id) |
394 | { | 411 | { |
395 | lua_assert(id > 0 && id < cts->top); UNUSED(cts); | 412 | UNUSED(cts); |
413 | lj_assertCTS(id > 0 && id < cts->top, "bad CTID %d", id); | ||
396 | return id; | 414 | return id; |
397 | } | 415 | } |
398 | 416 | ||
@@ -408,8 +426,9 @@ static LJ_AINLINE CType *ctype_get(CTState *cts, CTypeID id) | |||
408 | /* Get child C type. */ | 426 | /* Get child C type. */ |
409 | static LJ_AINLINE CType *ctype_child(CTState *cts, CType *ct) | 427 | static LJ_AINLINE CType *ctype_child(CTState *cts, CType *ct) |
410 | { | 428 | { |
411 | lua_assert(!(ctype_isvoid(ct->info) || ctype_isstruct(ct->info) || | 429 | lj_assertCTS(!(ctype_isvoid(ct->info) || ctype_isstruct(ct->info) || |
412 | ctype_isbitfield(ct->info))); /* These don't have children. */ | 430 | ctype_isbitfield(ct->info)), |
431 | "ctype %08x has no children", ct->info); | ||
413 | return ctype_get(cts, ctype_cid(ct->info)); | 432 | return ctype_get(cts, ctype_cid(ct->info)); |
414 | } | 433 | } |
415 | 434 | ||
diff --git a/src/lj_debug.c b/src/lj_debug.c index 65ec26f0..112f5358 100644 --- a/src/lj_debug.c +++ b/src/lj_debug.c | |||
@@ -9,12 +9,12 @@ | |||
9 | #include "lj_obj.h" | 9 | #include "lj_obj.h" |
10 | #include "lj_err.h" | 10 | #include "lj_err.h" |
11 | #include "lj_debug.h" | 11 | #include "lj_debug.h" |
12 | #include "lj_str.h" | 12 | #include "lj_buf.h" |
13 | #include "lj_tab.h" | 13 | #include "lj_tab.h" |
14 | #include "lj_state.h" | 14 | #include "lj_state.h" |
15 | #include "lj_frame.h" | 15 | #include "lj_frame.h" |
16 | #include "lj_bc.h" | 16 | #include "lj_bc.h" |
17 | #include "lj_vm.h" | 17 | #include "lj_strfmt.h" |
18 | #if LJ_HASJIT | 18 | #if LJ_HASJIT |
19 | #include "lj_jit.h" | 19 | #include "lj_jit.h" |
20 | #endif | 20 | #endif |
@@ -24,11 +24,11 @@ | |||
24 | /* Get frame corresponding to a level. */ | 24 | /* Get frame corresponding to a level. */ |
25 | cTValue *lj_debug_frame(lua_State *L, int level, int *size) | 25 | cTValue *lj_debug_frame(lua_State *L, int level, int *size) |
26 | { | 26 | { |
27 | cTValue *frame, *nextframe, *bot = tvref(L->stack); | 27 | cTValue *frame, *nextframe, *bot = tvref(L->stack)+LJ_FR2; |
28 | /* Traverse frames backwards. */ | 28 | /* Traverse frames backwards. */ |
29 | for (nextframe = frame = L->base-1; frame > bot; ) { | 29 | for (nextframe = frame = L->base-1; frame > bot; ) { |
30 | if (frame_gc(frame) == obj2gco(L)) | 30 | if (frame_gc(frame) == obj2gco(L)) |
31 | level++; /* Skip dummy frames. See lj_meta_call(). */ | 31 | level++; /* Skip dummy frames. See lj_err_optype_call(). */ |
32 | if (level-- == 0) { | 32 | if (level-- == 0) { |
33 | *size = (int)(nextframe - frame); | 33 | *size = (int)(nextframe - frame); |
34 | return frame; /* Level found. */ | 34 | return frame; /* Level found. */ |
@@ -55,7 +55,8 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) | |||
55 | const BCIns *ins; | 55 | const BCIns *ins; |
56 | GCproto *pt; | 56 | GCproto *pt; |
57 | BCPos pos; | 57 | BCPos pos; |
58 | lua_assert(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD); | 58 | lj_assertL(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD, |
59 | "function or frame expected"); | ||
59 | if (!isluafunc(fn)) { /* Cannot derive a PC for non-Lua functions. */ | 60 | if (!isluafunc(fn)) { /* Cannot derive a PC for non-Lua functions. */ |
60 | return NO_BCPOS; | 61 | return NO_BCPOS; |
61 | } else if (nextframe == NULL) { /* Lua function on top. */ | 62 | } else if (nextframe == NULL) { /* Lua function on top. */ |
@@ -87,8 +88,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) | |||
87 | if (frame_islua(f)) { | 88 | if (frame_islua(f)) { |
88 | f = frame_prevl(f); | 89 | f = frame_prevl(f); |
89 | } else { | 90 | } else { |
90 | if (frame_isc(f) || (LJ_HASFFI && frame_iscont(f) && | 91 | if (frame_isc(f) || (frame_iscont(f) && frame_iscont_fficb(f))) |
91 | (f-1)->u32.lo == LJ_CONT_FFI_CALLBACK)) | ||
92 | cf = cframe_raw(cframe_prev(cf)); | 92 | cf = cframe_raw(cframe_prev(cf)); |
93 | f = frame_prevd(f); | 93 | f = frame_prevd(f); |
94 | } | 94 | } |
@@ -102,7 +102,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) | |||
102 | #if LJ_HASJIT | 102 | #if LJ_HASJIT |
103 | if (pos > pt->sizebc) { /* Undo the effects of lj_trace_exit for JLOOP. */ | 103 | if (pos > pt->sizebc) { /* Undo the effects of lj_trace_exit for JLOOP. */ |
104 | GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins)); | 104 | GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins)); |
105 | lua_assert(bc_isret(bc_op(ins[-1]))); | 105 | lj_assertL(bc_isret(bc_op(ins[-1])), "return bytecode expected"); |
106 | pos = proto_bcpos(pt, mref(T->startpc, const BCIns)); | 106 | pos = proto_bcpos(pt, mref(T->startpc, const BCIns)); |
107 | } | 107 | } |
108 | #endif | 108 | #endif |
@@ -135,7 +135,7 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe) | |||
135 | BCPos pc = debug_framepc(L, fn, nextframe); | 135 | BCPos pc = debug_framepc(L, fn, nextframe); |
136 | if (pc != NO_BCPOS) { | 136 | if (pc != NO_BCPOS) { |
137 | GCproto *pt = funcproto(fn); | 137 | GCproto *pt = funcproto(fn); |
138 | lua_assert(pc <= pt->sizebc); | 138 | lj_assertL(pc <= pt->sizebc, "PC out of range"); |
139 | return lj_debug_line(pt, pc); | 139 | return lj_debug_line(pt, pc); |
140 | } | 140 | } |
141 | return -1; | 141 | return -1; |
@@ -143,38 +143,25 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe) | |||
143 | 143 | ||
144 | /* -- Variable names ------------------------------------------------------ */ | 144 | /* -- Variable names ------------------------------------------------------ */ |
145 | 145 | ||
146 | /* Read ULEB128 value. */ | ||
147 | static uint32_t debug_read_uleb128(const uint8_t **pp) | ||
148 | { | ||
149 | const uint8_t *p = *pp; | ||
150 | uint32_t v = *p++; | ||
151 | if (LJ_UNLIKELY(v >= 0x80)) { | ||
152 | int sh = 0; | ||
153 | v &= 0x7f; | ||
154 | do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80); | ||
155 | } | ||
156 | *pp = p; | ||
157 | return v; | ||
158 | } | ||
159 | |||
160 | /* Get name of a local variable from slot number and PC. */ | 146 | /* Get name of a local variable from slot number and PC. */ |
161 | static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot) | 147 | static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot) |
162 | { | 148 | { |
163 | const uint8_t *p = proto_varinfo(pt); | 149 | const char *p = (const char *)proto_varinfo(pt); |
164 | if (p) { | 150 | if (p) { |
165 | BCPos lastpc = 0; | 151 | BCPos lastpc = 0; |
166 | for (;;) { | 152 | for (;;) { |
167 | const char *name = (const char *)p; | 153 | const char *name = p; |
168 | uint32_t vn = *p++; | 154 | uint32_t vn = *(const uint8_t *)p; |
169 | BCPos startpc, endpc; | 155 | BCPos startpc, endpc; |
170 | if (vn < VARNAME__MAX) { | 156 | if (vn < VARNAME__MAX) { |
171 | if (vn == VARNAME_END) break; /* End of varinfo. */ | 157 | if (vn == VARNAME_END) break; /* End of varinfo. */ |
172 | } else { | 158 | } else { |
173 | while (*p++) ; /* Skip over variable name string. */ | 159 | do { p++; } while (*(const uint8_t *)p); /* Skip over variable name. */ |
174 | } | 160 | } |
175 | lastpc = startpc = lastpc + debug_read_uleb128(&p); | 161 | p++; |
162 | lastpc = startpc = lastpc + lj_buf_ruleb128(&p); | ||
176 | if (startpc > pc) break; | 163 | if (startpc > pc) break; |
177 | endpc = startpc + debug_read_uleb128(&p); | 164 | endpc = startpc + lj_buf_ruleb128(&p); |
178 | if (pc < endpc && slot-- == 0) { | 165 | if (pc < endpc && slot-- == 0) { |
179 | if (vn < VARNAME__MAX) { | 166 | if (vn < VARNAME__MAX) { |
180 | #define VARNAMESTR(name, str) str "\0" | 167 | #define VARNAMESTR(name, str) str "\0" |
@@ -199,7 +186,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar, | |||
199 | TValue *nextframe = size ? frame + size : NULL; | 186 | TValue *nextframe = size ? frame + size : NULL; |
200 | GCfunc *fn = frame_func(frame); | 187 | GCfunc *fn = frame_func(frame); |
201 | BCPos pc = debug_framepc(L, fn, nextframe); | 188 | BCPos pc = debug_framepc(L, fn, nextframe); |
202 | if (!nextframe) nextframe = L->top; | 189 | if (!nextframe) nextframe = L->top+LJ_FR2; |
203 | if ((int)slot1 < 0) { /* Negative slot number is for varargs. */ | 190 | if ((int)slot1 < 0) { /* Negative slot number is for varargs. */ |
204 | if (pc != NO_BCPOS) { | 191 | if (pc != NO_BCPOS) { |
205 | GCproto *pt = funcproto(fn); | 192 | GCproto *pt = funcproto(fn); |
@@ -209,7 +196,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar, | |||
209 | nextframe = frame; | 196 | nextframe = frame; |
210 | frame = frame_prevd(frame); | 197 | frame = frame_prevd(frame); |
211 | } | 198 | } |
212 | if (frame + slot1 < nextframe) { | 199 | if (frame + slot1+LJ_FR2 < nextframe) { |
213 | *name = "(*vararg)"; | 200 | *name = "(*vararg)"; |
214 | return frame+slot1; | 201 | return frame+slot1; |
215 | } | 202 | } |
@@ -220,7 +207,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar, | |||
220 | if (pc != NO_BCPOS && | 207 | if (pc != NO_BCPOS && |
221 | (*name = debug_varname(funcproto(fn), pc, slot1-1)) != NULL) | 208 | (*name = debug_varname(funcproto(fn), pc, slot1-1)) != NULL) |
222 | ; | 209 | ; |
223 | else if (slot1 > 0 && frame + slot1 < nextframe) | 210 | else if (slot1 > 0 && frame + slot1+LJ_FR2 < nextframe) |
224 | *name = "(*temporary)"; | 211 | *name = "(*temporary)"; |
225 | return frame+slot1; | 212 | return frame+slot1; |
226 | } | 213 | } |
@@ -229,7 +216,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar, | |||
229 | const char *lj_debug_uvname(GCproto *pt, uint32_t idx) | 216 | const char *lj_debug_uvname(GCproto *pt, uint32_t idx) |
230 | { | 217 | { |
231 | const uint8_t *p = proto_uvinfo(pt); | 218 | const uint8_t *p = proto_uvinfo(pt); |
232 | lua_assert(idx < pt->sizeuv); | 219 | lj_assertX(idx < pt->sizeuv, "bad upvalue index"); |
233 | if (!p) return ""; | 220 | if (!p) return ""; |
234 | if (idx) while (*p++ || --idx) ; | 221 | if (idx) while (*p++ || --idx) ; |
235 | return (const char *)p; | 222 | return (const char *)p; |
@@ -286,7 +273,7 @@ restart: | |||
286 | *name = strdata(gco2str(proto_kgc(pt, ~(ptrdiff_t)bc_c(ins)))); | 273 | *name = strdata(gco2str(proto_kgc(pt, ~(ptrdiff_t)bc_c(ins)))); |
287 | if (ip > proto_bc(pt)) { | 274 | if (ip > proto_bc(pt)) { |
288 | BCIns insp = ip[-1]; | 275 | BCIns insp = ip[-1]; |
289 | if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1 && | 276 | if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1+LJ_FR2 && |
290 | bc_d(insp) == bc_b(ins)) | 277 | bc_d(insp) == bc_b(ins)) |
291 | return "method"; | 278 | return "method"; |
292 | } | 279 | } |
@@ -303,12 +290,12 @@ restart: | |||
303 | } | 290 | } |
304 | 291 | ||
305 | /* Deduce function name from caller of a frame. */ | 292 | /* Deduce function name from caller of a frame. */ |
306 | const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name) | 293 | const char *lj_debug_funcname(lua_State *L, cTValue *frame, const char **name) |
307 | { | 294 | { |
308 | TValue *pframe; | 295 | cTValue *pframe; |
309 | GCfunc *fn; | 296 | GCfunc *fn; |
310 | BCPos pc; | 297 | BCPos pc; |
311 | if (frame <= tvref(L->stack)) | 298 | if (frame <= tvref(L->stack)+LJ_FR2) |
312 | return NULL; | 299 | return NULL; |
313 | if (frame_isvarg(frame)) | 300 | if (frame_isvarg(frame)) |
314 | frame = frame_prevd(frame); | 301 | frame = frame_prevd(frame); |
@@ -334,7 +321,7 @@ const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name) | |||
334 | /* -- Source code locations ----------------------------------------------- */ | 321 | /* -- Source code locations ----------------------------------------------- */ |
335 | 322 | ||
336 | /* Generate shortened source name. */ | 323 | /* Generate shortened source name. */ |
337 | void lj_debug_shortname(char *out, GCstr *str) | 324 | void lj_debug_shortname(char *out, GCstr *str, BCLine line) |
338 | { | 325 | { |
339 | const char *src = strdata(str); | 326 | const char *src = strdata(str); |
340 | if (*src == '=') { | 327 | if (*src == '=') { |
@@ -348,11 +335,11 @@ void lj_debug_shortname(char *out, GCstr *str) | |||
348 | *out++ = '.'; *out++ = '.'; *out++ = '.'; | 335 | *out++ = '.'; *out++ = '.'; *out++ = '.'; |
349 | } | 336 | } |
350 | strcpy(out, src); | 337 | strcpy(out, src); |
351 | } else { /* Output [string "string"]. */ | 338 | } else { /* Output [string "string"] or [builtin:name]. */ |
352 | size_t len; /* Length, up to first control char. */ | 339 | size_t len; /* Length, up to first control char. */ |
353 | for (len = 0; len < LUA_IDSIZE-12; len++) | 340 | for (len = 0; len < LUA_IDSIZE-12; len++) |
354 | if (((const unsigned char *)src)[len] < ' ') break; | 341 | if (((const unsigned char *)src)[len] < ' ') break; |
355 | strcpy(out, "[string \""); out += 9; | 342 | strcpy(out, line == ~(BCLine)0 ? "[builtin:" : "[string \""); out += 9; |
356 | if (src[len] != '\0') { /* Must truncate? */ | 343 | if (src[len] != '\0') { /* Must truncate? */ |
357 | if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15; | 344 | if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15; |
358 | strncpy(out, src, len); out += len; | 345 | strncpy(out, src, len); out += len; |
@@ -360,7 +347,7 @@ void lj_debug_shortname(char *out, GCstr *str) | |||
360 | } else { | 347 | } else { |
361 | strcpy(out, src); out += len; | 348 | strcpy(out, src); out += len; |
362 | } | 349 | } |
363 | strcpy(out, "\"]"); | 350 | strcpy(out, line == ~(BCLine)0 ? "]" : "\"]"); |
364 | } | 351 | } |
365 | } | 352 | } |
366 | 353 | ||
@@ -373,14 +360,15 @@ void lj_debug_addloc(lua_State *L, const char *msg, | |||
373 | if (isluafunc(fn)) { | 360 | if (isluafunc(fn)) { |
374 | BCLine line = debug_frameline(L, fn, nextframe); | 361 | BCLine line = debug_frameline(L, fn, nextframe); |
375 | if (line >= 0) { | 362 | if (line >= 0) { |
363 | GCproto *pt = funcproto(fn); | ||
376 | char buf[LUA_IDSIZE]; | 364 | char buf[LUA_IDSIZE]; |
377 | lj_debug_shortname(buf, proto_chunkname(funcproto(fn))); | 365 | lj_debug_shortname(buf, proto_chunkname(pt), pt->firstline); |
378 | lj_str_pushf(L, "%s:%d: %s", buf, line, msg); | 366 | lj_strfmt_pushf(L, "%s:%d: %s", buf, line, msg); |
379 | return; | 367 | return; |
380 | } | 368 | } |
381 | } | 369 | } |
382 | } | 370 | } |
383 | lj_str_pushf(L, "%s", msg); | 371 | lj_strfmt_pushf(L, "%s", msg); |
384 | } | 372 | } |
385 | 373 | ||
386 | /* Push location string for a bytecode position to Lua stack. */ | 374 | /* Push location string for a bytecode position to Lua stack. */ |
@@ -390,20 +378,22 @@ void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc) | |||
390 | const char *s = strdata(name); | 378 | const char *s = strdata(name); |
391 | MSize i, len = name->len; | 379 | MSize i, len = name->len; |
392 | BCLine line = lj_debug_line(pt, pc); | 380 | BCLine line = lj_debug_line(pt, pc); |
393 | if (*s == '@') { | 381 | if (pt->firstline == ~(BCLine)0) { |
382 | lj_strfmt_pushf(L, "builtin:%s", s); | ||
383 | } else if (*s == '@') { | ||
394 | s++; len--; | 384 | s++; len--; |
395 | for (i = len; i > 0; i--) | 385 | for (i = len; i > 0; i--) |
396 | if (s[i] == '/' || s[i] == '\\') { | 386 | if (s[i] == '/' || s[i] == '\\') { |
397 | s += i+1; | 387 | s += i+1; |
398 | break; | 388 | break; |
399 | } | 389 | } |
400 | lj_str_pushf(L, "%s:%d", s, line); | 390 | lj_strfmt_pushf(L, "%s:%d", s, line); |
401 | } else if (len > 40) { | 391 | } else if (len > 40) { |
402 | lj_str_pushf(L, "%p:%d", pt, line); | 392 | lj_strfmt_pushf(L, "%p:%d", pt, line); |
403 | } else if (*s == '=') { | 393 | } else if (*s == '=') { |
404 | lj_str_pushf(L, "%s:%d", s+1, line); | 394 | lj_strfmt_pushf(L, "%s:%d", s+1, line); |
405 | } else { | 395 | } else { |
406 | lj_str_pushf(L, "\"%s\":%d", s, line); | 396 | lj_strfmt_pushf(L, "\"%s\":%d", s, line); |
407 | } | 397 | } |
408 | } | 398 | } |
409 | 399 | ||
@@ -451,13 +441,14 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext) | |||
451 | } else { | 441 | } else { |
452 | uint32_t offset = (uint32_t)ar->i_ci & 0xffff; | 442 | uint32_t offset = (uint32_t)ar->i_ci & 0xffff; |
453 | uint32_t size = (uint32_t)ar->i_ci >> 16; | 443 | uint32_t size = (uint32_t)ar->i_ci >> 16; |
454 | lua_assert(offset != 0); | 444 | lj_assertL(offset != 0, "bad frame offset"); |
455 | frame = tvref(L->stack) + offset; | 445 | frame = tvref(L->stack) + offset; |
456 | if (size) nextframe = frame + size; | 446 | if (size) nextframe = frame + size; |
457 | lua_assert(frame <= tvref(L->maxstack) && | 447 | lj_assertL(frame <= tvref(L->maxstack) && |
458 | (!nextframe || nextframe <= tvref(L->maxstack))); | 448 | (!nextframe || nextframe <= tvref(L->maxstack)), |
449 | "broken frame chain"); | ||
459 | fn = frame_func(frame); | 450 | fn = frame_func(frame); |
460 | lua_assert(fn->c.gct == ~LJ_TFUNC); | 451 | lj_assertL(fn->c.gct == ~LJ_TFUNC, "bad frame function"); |
461 | } | 452 | } |
462 | for (; *what; what++) { | 453 | for (; *what; what++) { |
463 | if (*what == 'S') { | 454 | if (*what == 'S') { |
@@ -466,7 +457,7 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext) | |||
466 | BCLine firstline = pt->firstline; | 457 | BCLine firstline = pt->firstline; |
467 | GCstr *name = proto_chunkname(pt); | 458 | GCstr *name = proto_chunkname(pt); |
468 | ar->source = strdata(name); | 459 | ar->source = strdata(name); |
469 | lj_debug_shortname(ar->short_src, name); | 460 | lj_debug_shortname(ar->short_src, name, pt->firstline); |
470 | ar->linedefined = (int)firstline; | 461 | ar->linedefined = (int)firstline; |
471 | ar->lastlinedefined = (int)(firstline + pt->numline); | 462 | ar->lastlinedefined = (int)(firstline + pt->numline); |
472 | ar->what = (firstline || !pt->numline) ? "Lua" : "main"; | 463 | ar->what = (firstline || !pt->numline) ? "Lua" : "main"; |
@@ -556,6 +547,111 @@ LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar) | |||
556 | } | 547 | } |
557 | } | 548 | } |
558 | 549 | ||
550 | #if LJ_HASPROFILE | ||
551 | /* Put the chunkname into a buffer. */ | ||
552 | static int debug_putchunkname(SBuf *sb, GCproto *pt, int pathstrip) | ||
553 | { | ||
554 | GCstr *name = proto_chunkname(pt); | ||
555 | const char *p = strdata(name); | ||
556 | if (pt->firstline == ~(BCLine)0) { | ||
557 | lj_buf_putmem(sb, "[builtin:", 9); | ||
558 | lj_buf_putstr(sb, name); | ||
559 | lj_buf_putb(sb, ']'); | ||
560 | return 0; | ||
561 | } | ||
562 | if (*p == '=' || *p == '@') { | ||
563 | MSize len = name->len-1; | ||
564 | p++; | ||
565 | if (pathstrip) { | ||
566 | int i; | ||
567 | for (i = len-1; i >= 0; i--) | ||
568 | if (p[i] == '/' || p[i] == '\\') { | ||
569 | len -= i+1; | ||
570 | p = p+i+1; | ||
571 | break; | ||
572 | } | ||
573 | } | ||
574 | lj_buf_putmem(sb, p, len); | ||
575 | } else { | ||
576 | lj_buf_putmem(sb, "[string]", 8); | ||
577 | } | ||
578 | return 1; | ||
579 | } | ||
580 | |||
581 | /* Put a compact stack dump into a buffer. */ | ||
582 | void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth) | ||
583 | { | ||
584 | int level = 0, dir = 1, pathstrip = 1; | ||
585 | MSize lastlen = 0; | ||
586 | if (depth < 0) { level = ~depth; depth = dir = -1; } /* Reverse frames. */ | ||
587 | while (level != depth) { /* Loop through all frame. */ | ||
588 | int size; | ||
589 | cTValue *frame = lj_debug_frame(L, level, &size); | ||
590 | if (frame) { | ||
591 | cTValue *nextframe = size ? frame+size : NULL; | ||
592 | GCfunc *fn = frame_func(frame); | ||
593 | const uint8_t *p = (const uint8_t *)fmt; | ||
594 | int c; | ||
595 | while ((c = *p++)) { | ||
596 | switch (c) { | ||
597 | case 'p': /* Preserve full path. */ | ||
598 | pathstrip = 0; | ||
599 | break; | ||
600 | case 'F': case 'f': { /* Dump function name. */ | ||
601 | const char *name; | ||
602 | const char *what = lj_debug_funcname(L, frame, &name); | ||
603 | if (what) { | ||
604 | if (c == 'F' && isluafunc(fn)) { /* Dump module:name for 'F'. */ | ||
605 | GCproto *pt = funcproto(fn); | ||
606 | if (pt->firstline != ~(BCLine)0) { /* Not a bytecode builtin. */ | ||
607 | debug_putchunkname(sb, pt, pathstrip); | ||
608 | lj_buf_putb(sb, ':'); | ||
609 | } | ||
610 | } | ||
611 | lj_buf_putmem(sb, name, (MSize)strlen(name)); | ||
612 | break; | ||
613 | } /* else: can't derive a name, dump module:line. */ | ||
614 | } | ||
615 | /* fallthrough */ | ||
616 | case 'l': /* Dump module:line. */ | ||
617 | if (isluafunc(fn)) { | ||
618 | GCproto *pt = funcproto(fn); | ||
619 | if (debug_putchunkname(sb, pt, pathstrip)) { | ||
620 | /* Regular Lua function. */ | ||
621 | BCLine line = c == 'l' ? debug_frameline(L, fn, nextframe) : | ||
622 | pt->firstline; | ||
623 | lj_buf_putb(sb, ':'); | ||
624 | lj_strfmt_putint(sb, line >= 0 ? line : pt->firstline); | ||
625 | } | ||
626 | } else if (isffunc(fn)) { /* Dump numbered builtins. */ | ||
627 | lj_buf_putmem(sb, "[builtin#", 9); | ||
628 | lj_strfmt_putint(sb, fn->c.ffid); | ||
629 | lj_buf_putb(sb, ']'); | ||
630 | } else { /* Dump C function address. */ | ||
631 | lj_buf_putb(sb, '@'); | ||
632 | lj_strfmt_putptr(sb, fn->c.f); | ||
633 | } | ||
634 | break; | ||
635 | case 'Z': /* Zap trailing separator. */ | ||
636 | lastlen = sbuflen(sb); | ||
637 | break; | ||
638 | default: | ||
639 | lj_buf_putb(sb, c); | ||
640 | break; | ||
641 | } | ||
642 | } | ||
643 | } else if (dir == 1) { | ||
644 | break; | ||
645 | } else { | ||
646 | level -= size; /* Reverse frame order: quickly skip missing level. */ | ||
647 | } | ||
648 | level += dir; | ||
649 | } | ||
650 | if (lastlen) | ||
651 | sb->w = sb->b + lastlen; /* Zap trailing separator. */ | ||
652 | } | ||
653 | #endif | ||
654 | |||
559 | /* Number of frames for the leading and trailing part of a traceback. */ | 655 | /* Number of frames for the leading and trailing part of a traceback. */ |
560 | #define TRACEBACK_LEVELS1 12 | 656 | #define TRACEBACK_LEVELS1 12 |
561 | #define TRACEBACK_LEVELS2 10 | 657 | #define TRACEBACK_LEVELS2 10 |
diff --git a/src/lj_debug.h b/src/lj_debug.h index 15cdee3c..28127ae9 100644 --- a/src/lj_debug.h +++ b/src/lj_debug.h | |||
@@ -33,14 +33,18 @@ LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp, | |||
33 | GCobj **op); | 33 | GCobj **op); |
34 | LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc, | 34 | LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc, |
35 | BCReg slot, const char **name); | 35 | BCReg slot, const char **name); |
36 | LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame, | 36 | LJ_FUNC const char *lj_debug_funcname(lua_State *L, cTValue *frame, |
37 | const char **name); | 37 | const char **name); |
38 | LJ_FUNC void lj_debug_shortname(char *out, GCstr *str); | 38 | LJ_FUNC void lj_debug_shortname(char *out, GCstr *str, BCLine line); |
39 | LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg, | 39 | LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg, |
40 | cTValue *frame, cTValue *nextframe); | 40 | cTValue *frame, cTValue *nextframe); |
41 | LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc); | 41 | LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc); |
42 | LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, | 42 | LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, |
43 | int ext); | 43 | int ext); |
44 | #if LJ_HASPROFILE | ||
45 | LJ_FUNC void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, | ||
46 | int depth); | ||
47 | #endif | ||
44 | 48 | ||
45 | /* Fixed internal variable names. */ | 49 | /* Fixed internal variable names. */ |
46 | #define VARNAMEDEF(_) \ | 50 | #define VARNAMEDEF(_) \ |
diff --git a/src/lj_def.h b/src/lj_def.h index d09ebb10..b61297aa 100644 --- a/src/lj_def.h +++ b/src/lj_def.h | |||
@@ -46,10 +46,14 @@ typedef unsigned int uintptr_t; | |||
46 | #include <stdlib.h> | 46 | #include <stdlib.h> |
47 | 47 | ||
48 | /* Various VM limits. */ | 48 | /* Various VM limits. */ |
49 | #define LJ_MAX_MEM 0x7fffff00 /* Max. total memory allocation. */ | 49 | #define LJ_MAX_MEM32 0x7fffff00 /* Max. 32 bit memory allocation. */ |
50 | #define LJ_MAX_MEM64 ((uint64_t)1<<47) /* Max. 64 bit memory allocation. */ | ||
51 | /* Max. total memory allocation. */ | ||
52 | #define LJ_MAX_MEM (LJ_GC64 ? LJ_MAX_MEM64 : LJ_MAX_MEM32) | ||
50 | #define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */ | 53 | #define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */ |
51 | #define LJ_MAX_STR LJ_MAX_MEM /* Max. string length. */ | 54 | #define LJ_MAX_STR LJ_MAX_MEM32 /* Max. string length. */ |
52 | #define LJ_MAX_UDATA LJ_MAX_MEM /* Max. userdata length. */ | 55 | #define LJ_MAX_BUF LJ_MAX_MEM32 /* Max. buffer length. */ |
56 | #define LJ_MAX_UDATA LJ_MAX_MEM32 /* Max. userdata length. */ | ||
53 | 57 | ||
54 | #define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */ | 58 | #define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */ |
55 | #define LJ_MAX_HBITS 26 /* Max. hash bits. */ | 59 | #define LJ_MAX_HBITS 26 /* Max. hash bits. */ |
@@ -57,7 +61,7 @@ typedef unsigned int uintptr_t; | |||
57 | #define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */ | 61 | #define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */ |
58 | #define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */ | 62 | #define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */ |
59 | 63 | ||
60 | #define LJ_MAX_LINE LJ_MAX_MEM /* Max. source code line number. */ | 64 | #define LJ_MAX_LINE LJ_MAX_MEM32 /* Max. source code line number. */ |
61 | #define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */ | 65 | #define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */ |
62 | #define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */ | 66 | #define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */ |
63 | #define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */ | 67 | #define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */ |
@@ -65,7 +69,7 @@ typedef unsigned int uintptr_t; | |||
65 | #define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */ | 69 | #define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */ |
66 | 70 | ||
67 | #define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ | 71 | #define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ |
68 | #define LJ_STACK_EXTRA 5 /* Extra stack space (metamethods). */ | 72 | #define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */ |
69 | 73 | ||
70 | #define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */ | 74 | #define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */ |
71 | 75 | ||
@@ -76,7 +80,6 @@ typedef unsigned int uintptr_t; | |||
76 | #define LJ_MIN_SBUF 32 /* Min. string buffer length. */ | 80 | #define LJ_MIN_SBUF 32 /* Min. string buffer length. */ |
77 | #define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */ | 81 | #define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */ |
78 | #define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */ | 82 | #define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */ |
79 | #define LJ_MIN_K64SZ 16 /* Min. size for chained K64Array. */ | ||
80 | 83 | ||
81 | /* JIT compiler limits. */ | 84 | /* JIT compiler limits. */ |
82 | #define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ | 85 | #define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ |
@@ -91,6 +94,9 @@ typedef unsigned int uintptr_t; | |||
91 | #define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo) | 94 | #define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo) |
92 | #define i32ptr(p) ((int32_t)(intptr_t)(void *)(p)) | 95 | #define i32ptr(p) ((int32_t)(intptr_t)(void *)(p)) |
93 | #define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) | 96 | #define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) |
97 | #define i64ptr(p) ((int64_t)(intptr_t)(void *)(p)) | ||
98 | #define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p)) | ||
99 | #define igcptr(p) (LJ_GC64 ? i64ptr(p) : i32ptr(p)) | ||
94 | 100 | ||
95 | #define checki8(x) ((x) == (int32_t)(int8_t)(x)) | 101 | #define checki8(x) ((x) == (int32_t)(int8_t)(x)) |
96 | #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) | 102 | #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) |
@@ -98,7 +104,10 @@ typedef unsigned int uintptr_t; | |||
98 | #define checku16(x) ((x) == (int32_t)(uint16_t)(x)) | 104 | #define checku16(x) ((x) == (int32_t)(uint16_t)(x)) |
99 | #define checki32(x) ((x) == (int32_t)(x)) | 105 | #define checki32(x) ((x) == (int32_t)(x)) |
100 | #define checku32(x) ((x) == (uint32_t)(x)) | 106 | #define checku32(x) ((x) == (uint32_t)(x)) |
107 | #define checkptr31(x) (((uint64_t)(uintptr_t)(x) >> 31) == 0) | ||
101 | #define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x)) | 108 | #define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x)) |
109 | #define checkptr47(x) (((uint64_t)(uintptr_t)(x) >> 47) == 0) | ||
110 | #define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr31((x)) :1) | ||
102 | 111 | ||
103 | /* Every half-decent C compiler transforms this into a rotate instruction. */ | 112 | /* Every half-decent C compiler transforms this into a rotate instruction. */ |
104 | #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) | 113 | #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) |
@@ -111,7 +120,7 @@ typedef uintptr_t BloomFilter; | |||
111 | #define bloomset(b, x) ((b) |= bloombit((x))) | 120 | #define bloomset(b, x) ((b) |= bloombit((x))) |
112 | #define bloomtest(b, x) ((b) & bloombit((x))) | 121 | #define bloomtest(b, x) ((b) & bloombit((x))) |
113 | 122 | ||
114 | #if defined(__GNUC__) || defined(__psp2__) | 123 | #if defined(__GNUC__) || defined(__clang__) || defined(__psp2__) |
115 | 124 | ||
116 | #define LJ_NORET __attribute__((noreturn)) | 125 | #define LJ_NORET __attribute__((noreturn)) |
117 | #define LJ_ALIGN(n) __attribute__((aligned(n))) | 126 | #define LJ_ALIGN(n) __attribute__((aligned(n))) |
@@ -173,7 +182,7 @@ static LJ_AINLINE uint64_t lj_bswap64(uint64_t x) | |||
173 | { | 182 | { |
174 | return ((uint64_t)lj_bswap((uint32_t)x)<<32) | lj_bswap((uint32_t)(x>>32)); | 183 | return ((uint64_t)lj_bswap((uint32_t)x)<<32) | lj_bswap((uint32_t)(x>>32)); |
175 | } | 184 | } |
176 | #elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) | 185 | #elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __clang__ |
177 | static LJ_AINLINE uint32_t lj_bswap(uint32_t x) | 186 | static LJ_AINLINE uint32_t lj_bswap(uint32_t x) |
178 | { | 187 | { |
179 | return (uint32_t)__builtin_bswap32((int32_t)x); | 188 | return (uint32_t)__builtin_bswap32((int32_t)x); |
@@ -329,14 +338,28 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v) | |||
329 | #define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET | 338 | #define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET |
330 | #define LJ_ASMF_NORET LJ_ASMF LJ_NORET | 339 | #define LJ_ASMF_NORET LJ_ASMF LJ_NORET |
331 | 340 | ||
332 | /* Runtime assertions. */ | 341 | /* Internal assertions. */ |
333 | #ifdef lua_assert | 342 | #if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) |
334 | #define check_exp(c, e) (lua_assert(c), (e)) | 343 | #define lj_assert_check(g, c, ...) \ |
335 | #define api_check(l, e) lua_assert(e) | 344 | ((c) ? (void)0 : \ |
345 | (lj_assert_fail((g), __FILE__, __LINE__, __func__, __VA_ARGS__), 0)) | ||
346 | #define lj_checkapi(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__) | ||
336 | #else | 347 | #else |
337 | #define lua_assert(c) ((void)0) | 348 | #define lj_checkapi(c, ...) ((void)L) |
349 | #endif | ||
350 | |||
351 | #ifdef LUA_USE_ASSERT | ||
352 | #define lj_assertG_(g, c, ...) lj_assert_check((g), (c), __VA_ARGS__) | ||
353 | #define lj_assertG(c, ...) lj_assert_check(g, (c), __VA_ARGS__) | ||
354 | #define lj_assertL(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__) | ||
355 | #define lj_assertX(c, ...) lj_assert_check(NULL, (c), __VA_ARGS__) | ||
356 | #define check_exp(c, e) (lj_assertX((c), #c), (e)) | ||
357 | #else | ||
358 | #define lj_assertG_(g, c, ...) ((void)0) | ||
359 | #define lj_assertG(c, ...) ((void)g) | ||
360 | #define lj_assertL(c, ...) ((void)L) | ||
361 | #define lj_assertX(c, ...) ((void)0) | ||
338 | #define check_exp(c, e) (e) | 362 | #define check_exp(c, e) (e) |
339 | #define api_check luai_apicheck | ||
340 | #endif | 363 | #endif |
341 | 364 | ||
342 | /* Static assertions. */ | 365 | /* Static assertions. */ |
@@ -350,4 +373,9 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v) | |||
350 | extern void LJ_ASSERT_NAME(__LINE__)(int STATIC_ASSERTION_FAILED[(cond)?1:-1]) | 373 | extern void LJ_ASSERT_NAME(__LINE__)(int STATIC_ASSERTION_FAILED[(cond)?1:-1]) |
351 | #endif | 374 | #endif |
352 | 375 | ||
376 | /* PRNG state. Need this here, details in lj_prng.h. */ | ||
377 | typedef struct PRNGState { | ||
378 | uint64_t u[4]; | ||
379 | } PRNGState; | ||
380 | |||
353 | #endif | 381 | #endif |
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index 54c86038..ded382aa 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c | |||
@@ -8,6 +8,7 @@ | |||
8 | 8 | ||
9 | #include "lj_obj.h" | 9 | #include "lj_obj.h" |
10 | #include "lj_err.h" | 10 | #include "lj_err.h" |
11 | #include "lj_buf.h" | ||
11 | #include "lj_func.h" | 12 | #include "lj_func.h" |
12 | #include "lj_str.h" | 13 | #include "lj_str.h" |
13 | #include "lj_tab.h" | 14 | #include "lj_tab.h" |
@@ -17,6 +18,7 @@ | |||
17 | #include "lj_frame.h" | 18 | #include "lj_frame.h" |
18 | #include "lj_bc.h" | 19 | #include "lj_bc.h" |
19 | #include "lj_ff.h" | 20 | #include "lj_ff.h" |
21 | #include "lj_strfmt.h" | ||
20 | #if LJ_HASJIT | 22 | #if LJ_HASJIT |
21 | #include "lj_jit.h" | 23 | #include "lj_jit.h" |
22 | #endif | 24 | #endif |
@@ -25,6 +27,9 @@ | |||
25 | #endif | 27 | #endif |
26 | #include "lj_trace.h" | 28 | #include "lj_trace.h" |
27 | #include "lj_dispatch.h" | 29 | #include "lj_dispatch.h" |
30 | #if LJ_HASPROFILE | ||
31 | #include "lj_profile.h" | ||
32 | #endif | ||
28 | #include "lj_vm.h" | 33 | #include "lj_vm.h" |
29 | #include "luajit.h" | 34 | #include "luajit.h" |
30 | 35 | ||
@@ -37,6 +42,12 @@ LJ_STATIC_ASSERT(GG_NUM_ASMFF == FF_NUM_ASMFUNC); | |||
37 | #include <math.h> | 42 | #include <math.h> |
38 | LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, | 43 | LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, |
39 | lua_State *co); | 44 | lua_State *co); |
45 | #if !LJ_HASJIT | ||
46 | #define lj_dispatch_stitch lj_dispatch_ins | ||
47 | #endif | ||
48 | #if !LJ_HASPROFILE | ||
49 | #define lj_dispatch_profile lj_dispatch_ins | ||
50 | #endif | ||
40 | 51 | ||
41 | #define GOTFUNC(name) (ASMFunction)name, | 52 | #define GOTFUNC(name) (ASMFunction)name, |
42 | static const ASMFunction dispatch_got[] = { | 53 | static const ASMFunction dispatch_got[] = { |
@@ -57,6 +68,8 @@ void lj_dispatch_init(GG_State *GG) | |||
57 | /* The JIT engine is off by default. luaopen_jit() turns it on. */ | 68 | /* The JIT engine is off by default. luaopen_jit() turns it on. */ |
58 | disp[BC_FORL] = disp[BC_IFORL]; | 69 | disp[BC_FORL] = disp[BC_IFORL]; |
59 | disp[BC_ITERL] = disp[BC_IITERL]; | 70 | disp[BC_ITERL] = disp[BC_IITERL]; |
71 | /* Workaround for stable v2.1 bytecode. TODO: Replace with BC_IITERN. */ | ||
72 | disp[BC_ITERN] = &lj_vm_IITERN; | ||
60 | disp[BC_LOOP] = disp[BC_ILOOP]; | 73 | disp[BC_LOOP] = disp[BC_ILOOP]; |
61 | disp[BC_FUNCF] = disp[BC_IFUNCF]; | 74 | disp[BC_FUNCF] = disp[BC_IFUNCF]; |
62 | disp[BC_FUNCV] = disp[BC_IFUNCV]; | 75 | disp[BC_FUNCV] = disp[BC_IFUNCV]; |
@@ -64,7 +77,7 @@ void lj_dispatch_init(GG_State *GG) | |||
64 | for (i = 0; i < GG_NUM_ASMFF; i++) | 77 | for (i = 0; i < GG_NUM_ASMFF; i++) |
65 | GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); | 78 | GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); |
66 | #if LJ_TARGET_MIPS | 79 | #if LJ_TARGET_MIPS |
67 | memcpy(GG->got, dispatch_got, LJ_GOT__MAX*4); | 80 | memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *)); |
68 | #endif | 81 | #endif |
69 | } | 82 | } |
70 | 83 | ||
@@ -82,11 +95,12 @@ void lj_dispatch_init_hotcount(global_State *g) | |||
82 | #endif | 95 | #endif |
83 | 96 | ||
84 | /* Internal dispatch mode bits. */ | 97 | /* Internal dispatch mode bits. */ |
85 | #define DISPMODE_JIT 0x01 /* JIT compiler on. */ | 98 | #define DISPMODE_CALL 0x01 /* Override call dispatch. */ |
86 | #define DISPMODE_REC 0x02 /* Recording active. */ | 99 | #define DISPMODE_RET 0x02 /* Override return dispatch. */ |
87 | #define DISPMODE_INS 0x04 /* Override instruction dispatch. */ | 100 | #define DISPMODE_INS 0x04 /* Override instruction dispatch. */ |
88 | #define DISPMODE_CALL 0x08 /* Override call dispatch. */ | 101 | #define DISPMODE_JIT 0x10 /* JIT compiler on. */ |
89 | #define DISPMODE_RET 0x10 /* Override return dispatch. */ | 102 | #define DISPMODE_REC 0x20 /* Recording active. */ |
103 | #define DISPMODE_PROF 0x40 /* Profiling active. */ | ||
90 | 104 | ||
91 | /* Update dispatch table depending on various flags. */ | 105 | /* Update dispatch table depending on various flags. */ |
92 | void lj_dispatch_update(global_State *g) | 106 | void lj_dispatch_update(global_State *g) |
@@ -98,24 +112,29 @@ void lj_dispatch_update(global_State *g) | |||
98 | mode |= G2J(g)->state != LJ_TRACE_IDLE ? | 112 | mode |= G2J(g)->state != LJ_TRACE_IDLE ? |
99 | (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0; | 113 | (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0; |
100 | #endif | 114 | #endif |
115 | #if LJ_HASPROFILE | ||
116 | mode |= (g->hookmask & HOOK_PROFILE) ? (DISPMODE_PROF|DISPMODE_INS) : 0; | ||
117 | #endif | ||
101 | mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0; | 118 | mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0; |
102 | mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0; | 119 | mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0; |
103 | mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0; | 120 | mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0; |
104 | if (oldmode != mode) { /* Mode changed? */ | 121 | if (oldmode != mode) { /* Mode changed? */ |
105 | ASMFunction *disp = G2GG(g)->dispatch; | 122 | ASMFunction *disp = G2GG(g)->dispatch; |
106 | ASMFunction f_forl, f_iterl, f_loop, f_funcf, f_funcv; | 123 | ASMFunction f_forl, f_iterl, f_itern, f_loop, f_funcf, f_funcv; |
107 | g->dispatchmode = mode; | 124 | g->dispatchmode = mode; |
108 | 125 | ||
109 | /* Hotcount if JIT is on, but not while recording. */ | 126 | /* Hotcount if JIT is on, but not while recording. */ |
110 | if ((mode & (DISPMODE_JIT|DISPMODE_REC)) == DISPMODE_JIT) { | 127 | if ((mode & (DISPMODE_JIT|DISPMODE_REC)) == DISPMODE_JIT) { |
111 | f_forl = makeasmfunc(lj_bc_ofs[BC_FORL]); | 128 | f_forl = makeasmfunc(lj_bc_ofs[BC_FORL]); |
112 | f_iterl = makeasmfunc(lj_bc_ofs[BC_ITERL]); | 129 | f_iterl = makeasmfunc(lj_bc_ofs[BC_ITERL]); |
130 | f_itern = makeasmfunc(lj_bc_ofs[BC_ITERN]); | ||
113 | f_loop = makeasmfunc(lj_bc_ofs[BC_LOOP]); | 131 | f_loop = makeasmfunc(lj_bc_ofs[BC_LOOP]); |
114 | f_funcf = makeasmfunc(lj_bc_ofs[BC_FUNCF]); | 132 | f_funcf = makeasmfunc(lj_bc_ofs[BC_FUNCF]); |
115 | f_funcv = makeasmfunc(lj_bc_ofs[BC_FUNCV]); | 133 | f_funcv = makeasmfunc(lj_bc_ofs[BC_FUNCV]); |
116 | } else { /* Otherwise use the non-hotcounting instructions. */ | 134 | } else { /* Otherwise use the non-hotcounting instructions. */ |
117 | f_forl = disp[GG_LEN_DDISP+BC_IFORL]; | 135 | f_forl = disp[GG_LEN_DDISP+BC_IFORL]; |
118 | f_iterl = disp[GG_LEN_DDISP+BC_IITERL]; | 136 | f_iterl = disp[GG_LEN_DDISP+BC_IITERL]; |
137 | f_itern = &lj_vm_IITERN; | ||
119 | f_loop = disp[GG_LEN_DDISP+BC_ILOOP]; | 138 | f_loop = disp[GG_LEN_DDISP+BC_ILOOP]; |
120 | f_funcf = makeasmfunc(lj_bc_ofs[BC_IFUNCF]); | 139 | f_funcf = makeasmfunc(lj_bc_ofs[BC_IFUNCF]); |
121 | f_funcv = makeasmfunc(lj_bc_ofs[BC_IFUNCV]); | 140 | f_funcv = makeasmfunc(lj_bc_ofs[BC_IFUNCV]); |
@@ -123,12 +142,13 @@ void lj_dispatch_update(global_State *g) | |||
123 | /* Init static counting instruction dispatch first (may be copied below). */ | 142 | /* Init static counting instruction dispatch first (may be copied below). */ |
124 | disp[GG_LEN_DDISP+BC_FORL] = f_forl; | 143 | disp[GG_LEN_DDISP+BC_FORL] = f_forl; |
125 | disp[GG_LEN_DDISP+BC_ITERL] = f_iterl; | 144 | disp[GG_LEN_DDISP+BC_ITERL] = f_iterl; |
145 | disp[GG_LEN_DDISP+BC_ITERN] = f_itern; | ||
126 | disp[GG_LEN_DDISP+BC_LOOP] = f_loop; | 146 | disp[GG_LEN_DDISP+BC_LOOP] = f_loop; |
127 | 147 | ||
128 | /* Set dynamic instruction dispatch. */ | 148 | /* Set dynamic instruction dispatch. */ |
129 | if ((oldmode ^ mode) & (DISPMODE_REC|DISPMODE_INS)) { | 149 | if ((oldmode ^ mode) & (DISPMODE_PROF|DISPMODE_REC|DISPMODE_INS)) { |
130 | /* Need to update the whole table. */ | 150 | /* Need to update the whole table. */ |
131 | if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { /* No ins dispatch? */ | 151 | if (!(mode & DISPMODE_INS)) { /* No ins dispatch? */ |
132 | /* Copy static dispatch table to dynamic dispatch table. */ | 152 | /* Copy static dispatch table to dynamic dispatch table. */ |
133 | memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction)); | 153 | memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction)); |
134 | /* Overwrite with dynamic return dispatch. */ | 154 | /* Overwrite with dynamic return dispatch. */ |
@@ -140,15 +160,17 @@ void lj_dispatch_update(global_State *g) | |||
140 | } | 160 | } |
141 | } else { | 161 | } else { |
142 | /* The recording dispatch also checks for hooks. */ | 162 | /* The recording dispatch also checks for hooks. */ |
143 | ASMFunction f = (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook; | 163 | ASMFunction f = (mode & DISPMODE_PROF) ? lj_vm_profhook : |
164 | (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook; | ||
144 | uint32_t i; | 165 | uint32_t i; |
145 | for (i = 0; i < GG_LEN_SDISP; i++) | 166 | for (i = 0; i < GG_LEN_SDISP; i++) |
146 | disp[i] = f; | 167 | disp[i] = f; |
147 | } | 168 | } |
148 | } else if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { | 169 | } else if (!(mode & DISPMODE_INS)) { |
149 | /* Otherwise set dynamic counting ins. */ | 170 | /* Otherwise set dynamic counting ins. */ |
150 | disp[BC_FORL] = f_forl; | 171 | disp[BC_FORL] = f_forl; |
151 | disp[BC_ITERL] = f_iterl; | 172 | disp[BC_ITERL] = f_iterl; |
173 | disp[BC_ITERN] = f_itern; | ||
152 | disp[BC_LOOP] = f_loop; | 174 | disp[BC_LOOP] = f_loop; |
153 | /* Set dynamic return dispatch. */ | 175 | /* Set dynamic return dispatch. */ |
154 | if ((mode & DISPMODE_RET)) { | 176 | if ((mode & DISPMODE_RET)) { |
@@ -236,22 +258,15 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) | |||
236 | } else { | 258 | } else { |
237 | if (!(mode & LUAJIT_MODE_ON)) | 259 | if (!(mode & LUAJIT_MODE_ON)) |
238 | G2J(g)->flags &= ~(uint32_t)JIT_F_ON; | 260 | G2J(g)->flags &= ~(uint32_t)JIT_F_ON; |
239 | #if LJ_TARGET_X86ORX64 | ||
240 | else if ((G2J(g)->flags & JIT_F_SSE2)) | ||
241 | G2J(g)->flags |= (uint32_t)JIT_F_ON; | ||
242 | else | ||
243 | return 0; /* Don't turn on JIT compiler without SSE2 support. */ | ||
244 | #else | ||
245 | else | 261 | else |
246 | G2J(g)->flags |= (uint32_t)JIT_F_ON; | 262 | G2J(g)->flags |= (uint32_t)JIT_F_ON; |
247 | #endif | ||
248 | lj_dispatch_update(g); | 263 | lj_dispatch_update(g); |
249 | } | 264 | } |
250 | break; | 265 | break; |
251 | case LUAJIT_MODE_FUNC: | 266 | case LUAJIT_MODE_FUNC: |
252 | case LUAJIT_MODE_ALLFUNC: | 267 | case LUAJIT_MODE_ALLFUNC: |
253 | case LUAJIT_MODE_ALLSUBFUNC: { | 268 | case LUAJIT_MODE_ALLSUBFUNC: { |
254 | cTValue *tv = idx == 0 ? frame_prev(L->base-1) : | 269 | cTValue *tv = idx == 0 ? frame_prev(L->base-1)-LJ_FR2 : |
255 | idx > 0 ? L->base + (idx-1) : L->top + idx; | 270 | idx > 0 ? L->base + (idx-1) : L->top + idx; |
256 | GCproto *pt; | 271 | GCproto *pt; |
257 | if ((idx == 0 || tvisfunc(tv)) && isluafunc(&gcval(tv)->fn)) | 272 | if ((idx == 0 || tvisfunc(tv)) && isluafunc(&gcval(tv)->fn)) |
@@ -286,7 +301,7 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) | |||
286 | if (idx != 0) { | 301 | if (idx != 0) { |
287 | cTValue *tv = idx > 0 ? L->base + (idx-1) : L->top + idx; | 302 | cTValue *tv = idx > 0 ? L->base + (idx-1) : L->top + idx; |
288 | if (tvislightud(tv)) | 303 | if (tvislightud(tv)) |
289 | g->wrapf = (lua_CFunction)lightudV(tv); | 304 | g->wrapf = (lua_CFunction)lightudV(g, tv); |
290 | else | 305 | else |
291 | return 0; /* Failed. */ | 306 | return 0; /* Failed. */ |
292 | } else { | 307 | } else { |
@@ -352,10 +367,19 @@ static void callhook(lua_State *L, int event, BCLine line) | |||
352 | /* Top frame, nextframe = NULL. */ | 367 | /* Top frame, nextframe = NULL. */ |
353 | ar.i_ci = (int)((L->base-1) - tvref(L->stack)); | 368 | ar.i_ci = (int)((L->base-1) - tvref(L->stack)); |
354 | lj_state_checkstack(L, 1+LUA_MINSTACK); | 369 | lj_state_checkstack(L, 1+LUA_MINSTACK); |
370 | #if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF | ||
371 | lj_profile_hook_enter(g); | ||
372 | #else | ||
355 | hook_enter(g); | 373 | hook_enter(g); |
374 | #endif | ||
356 | hookf(L, &ar); | 375 | hookf(L, &ar); |
357 | lua_assert(hook_active(g)); | 376 | lj_assertG(hook_active(g), "active hook flag removed"); |
377 | setgcref(g->cur_L, obj2gco(L)); | ||
378 | #if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF | ||
379 | lj_profile_hook_leave(g); | ||
380 | #else | ||
358 | hook_leave(g); | 381 | hook_leave(g); |
382 | #endif | ||
359 | } | 383 | } |
360 | } | 384 | } |
361 | 385 | ||
@@ -368,7 +392,7 @@ static BCReg cur_topslot(GCproto *pt, const BCIns *pc, uint32_t nres) | |||
368 | if (bc_op(ins) == BC_UCLO) | 392 | if (bc_op(ins) == BC_UCLO) |
369 | ins = pc[bc_j(ins)]; | 393 | ins = pc[bc_j(ins)]; |
370 | switch (bc_op(ins)) { | 394 | switch (bc_op(ins)) { |
371 | case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1; | 395 | case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1+LJ_FR2; |
372 | case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1; | 396 | case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1; |
373 | case BC_TSETM: return bc_a(ins) + nres-1; | 397 | case BC_TSETM: return bc_a(ins) + nres-1; |
374 | default: return pt->framesize; | 398 | default: return pt->framesize; |
@@ -397,7 +421,8 @@ void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc) | |||
397 | #endif | 421 | #endif |
398 | J->L = L; | 422 | J->L = L; |
399 | lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ | 423 | lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ |
400 | lua_assert(L->top - L->base == delta); | 424 | lj_assertG(L->top - L->base == delta, |
425 | "unbalanced stack after tracing of instruction"); | ||
401 | } | 426 | } |
402 | } | 427 | } |
403 | #endif | 428 | #endif |
@@ -457,7 +482,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) | |||
457 | #endif | 482 | #endif |
458 | pc = (const BCIns *)((uintptr_t)pc & ~(uintptr_t)1); | 483 | pc = (const BCIns *)((uintptr_t)pc & ~(uintptr_t)1); |
459 | lj_trace_hot(J, pc); | 484 | lj_trace_hot(J, pc); |
460 | lua_assert(L->top - L->base == delta); | 485 | lj_assertG(L->top - L->base == delta, |
486 | "unbalanced stack after hot call"); | ||
461 | goto out; | 487 | goto out; |
462 | } else if (J->state != LJ_TRACE_IDLE && | 488 | } else if (J->state != LJ_TRACE_IDLE && |
463 | !(g->hookmask & (HOOK_GC|HOOK_VMEVENT))) { | 489 | !(g->hookmask & (HOOK_GC|HOOK_VMEVENT))) { |
@@ -466,7 +492,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) | |||
466 | #endif | 492 | #endif |
467 | /* Record the FUNC* bytecodes, too. */ | 493 | /* Record the FUNC* bytecodes, too. */ |
468 | lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ | 494 | lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ |
469 | lua_assert(L->top - L->base == delta); | 495 | lj_assertG(L->top - L->base == delta, |
496 | "unbalanced stack after hot instruction"); | ||
470 | } | 497 | } |
471 | #endif | 498 | #endif |
472 | if ((g->hookmask & LUA_MASKCALL)) { | 499 | if ((g->hookmask & LUA_MASKCALL)) { |
@@ -492,3 +519,41 @@ out: | |||
492 | return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */ | 519 | return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */ |
493 | } | 520 | } |
494 | 521 | ||
522 | #if LJ_HASJIT | ||
523 | /* Stitch a new trace. */ | ||
524 | void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc) | ||
525 | { | ||
526 | ERRNO_SAVE | ||
527 | lua_State *L = J->L; | ||
528 | void *cf = cframe_raw(L->cframe); | ||
529 | const BCIns *oldpc = cframe_pc(cf); | ||
530 | setcframe_pc(cf, pc); | ||
531 | /* Before dispatch, have to bias PC by 1. */ | ||
532 | L->top = L->base + cur_topslot(curr_proto(L), pc+1, cframe_multres_n(cf)); | ||
533 | lj_trace_stitch(J, pc-1); /* Point to the CALL instruction. */ | ||
534 | setcframe_pc(cf, oldpc); | ||
535 | ERRNO_RESTORE | ||
536 | } | ||
537 | #endif | ||
538 | |||
539 | #if LJ_HASPROFILE | ||
540 | /* Profile dispatch. */ | ||
541 | void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc) | ||
542 | { | ||
543 | ERRNO_SAVE | ||
544 | GCfunc *fn = curr_func(L); | ||
545 | GCproto *pt = funcproto(fn); | ||
546 | void *cf = cframe_raw(L->cframe); | ||
547 | const BCIns *oldpc = cframe_pc(cf); | ||
548 | global_State *g; | ||
549 | setcframe_pc(cf, pc); | ||
550 | L->top = L->base + cur_topslot(pt, pc, cframe_multres_n(cf)); | ||
551 | lj_profile_interpreter(L); | ||
552 | setcframe_pc(cf, oldpc); | ||
553 | g = G(L); | ||
554 | setgcref(g->cur_L, obj2gco(L)); | ||
555 | setvmstate(g, INTERP); | ||
556 | ERRNO_RESTORE | ||
557 | } | ||
558 | #endif | ||
559 | |||
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index cb4cbf8e..0594af51 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h | |||
@@ -14,8 +14,24 @@ | |||
14 | 14 | ||
15 | #if LJ_TARGET_MIPS | 15 | #if LJ_TARGET_MIPS |
16 | /* Need our own global offset table for the dreaded MIPS calling conventions. */ | 16 | /* Need our own global offset table for the dreaded MIPS calling conventions. */ |
17 | |||
18 | #ifndef _LJ_VM_H | ||
19 | LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b); | ||
20 | #endif | ||
21 | |||
22 | #if LJ_SOFTFP | ||
23 | #ifndef _LJ_IRCALL_H | ||
24 | extern double __adddf3(double a, double b); | ||
25 | extern double __subdf3(double a, double b); | ||
26 | extern double __muldf3(double a, double b); | ||
27 | extern double __divdf3(double a, double b); | ||
28 | #endif | ||
29 | #define SFGOTDEF(_) _(sqrt) _(__adddf3) _(__subdf3) _(__muldf3) _(__divdf3) | ||
30 | #else | ||
31 | #define SFGOTDEF(_) | ||
32 | #endif | ||
17 | #if LJ_HASJIT | 33 | #if LJ_HASJIT |
18 | #define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot) | 34 | #define JITGOTDEF(_) _(lj_err_trace) _(lj_trace_exit) _(lj_trace_hot) |
19 | #else | 35 | #else |
20 | #define JITGOTDEF(_) | 36 | #define JITGOTDEF(_) |
21 | #endif | 37 | #endif |
@@ -28,16 +44,19 @@ | |||
28 | #define GOTDEF(_) \ | 44 | #define GOTDEF(_) \ |
29 | _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ | 45 | _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ |
30 | _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ | 46 | _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ |
31 | _(pow) _(fmod) _(ldexp) \ | 47 | _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \ |
32 | _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_err_throw) _(lj_err_run) \ | 48 | _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \ |
49 | _(lj_dispatch_profile) _(lj_err_throw) \ | ||
33 | _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ | 50 | _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ |
34 | _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ | 51 | _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ |
35 | _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ | 52 | _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ |
36 | _(lj_meta_for) _(lj_meta_len) _(lj_meta_tget) _(lj_meta_tset) \ | 53 | _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \ |
37 | _(lj_state_growstack) _(lj_str_fromnum) _(lj_str_fromnumber) _(lj_str_new) \ | 54 | _(lj_meta_tset) _(lj_state_growstack) _(lj_strfmt_number) \ |
38 | _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) _(lj_tab_new) \ | 55 | _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \ |
39 | _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ | 56 | _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ |
40 | JITGOTDEF(_) FFIGOTDEF(_) | 57 | _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \ |
58 | _(lj_buf_putstr_upper) _(lj_buf_tostr) \ | ||
59 | JITGOTDEF(_) FFIGOTDEF(_) SFGOTDEF(_) | ||
41 | 60 | ||
42 | enum { | 61 | enum { |
43 | #define GOTENUM(name) LJ_GOT_##name, | 62 | #define GOTENUM(name) LJ_GOT_##name, |
@@ -60,7 +79,7 @@ typedef uint16_t HotCount; | |||
60 | #define HOTCOUNT_CALL 1 | 79 | #define HOTCOUNT_CALL 1 |
61 | 80 | ||
62 | /* This solves a circular dependency problem -- bump as needed. Sigh. */ | 81 | /* This solves a circular dependency problem -- bump as needed. Sigh. */ |
63 | #define GG_NUM_ASMFF 62 | 82 | #define GG_NUM_ASMFF 57 |
64 | 83 | ||
65 | #define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF) | 84 | #define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF) |
66 | #define GG_LEN_SDISP BC_FUNCF | 85 | #define GG_LEN_SDISP BC_FUNCF |
@@ -96,6 +115,7 @@ typedef struct GG_State { | |||
96 | #define J2G(J) (&J2GG(J)->g) | 115 | #define J2G(J) (&J2GG(J)->g) |
97 | #define G2J(gl) (&G2GG(gl)->J) | 116 | #define G2J(gl) (&G2GG(gl)->J) |
98 | #define L2J(L) (&L2GG(L)->J) | 117 | #define L2J(L) (&L2GG(L)->J) |
118 | #define GG_G2J (GG_OFS(J) - GG_OFS(g)) | ||
99 | #define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g)) | 119 | #define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g)) |
100 | #define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch)) | 120 | #define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch)) |
101 | #define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch)) | 121 | #define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch)) |
@@ -117,7 +137,12 @@ LJ_FUNC void lj_dispatch_update(global_State *g); | |||
117 | /* Instruction dispatch callback for hooks or when recording. */ | 137 | /* Instruction dispatch callback for hooks or when recording. */ |
118 | LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc); | 138 | LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc); |
119 | LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc); | 139 | LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc); |
120 | LJ_FUNCA void LJ_FASTCALL lj_dispatch_return(lua_State *L, const BCIns *pc); | 140 | #if LJ_HASJIT |
141 | LJ_FUNCA void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc); | ||
142 | #endif | ||
143 | #if LJ_HASPROFILE | ||
144 | LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc); | ||
145 | #endif | ||
121 | 146 | ||
122 | #if LJ_HASFFI && !defined(_BUILDVM_H) | 147 | #if LJ_HASFFI && !defined(_BUILDVM_H) |
123 | /* Save/restore errno and GetLastError() around hooks, exits and recording. */ | 148 | /* Save/restore errno and GetLastError() around hooks, exits and recording. */ |
diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h index 2db07ef6..cfb174f4 100644 --- a/src/lj_emit_arm.h +++ b/src/lj_emit_arm.h | |||
@@ -81,7 +81,8 @@ static void emit_m(ASMState *as, ARMIns ai, Reg rm) | |||
81 | 81 | ||
82 | static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) | 82 | static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) |
83 | { | 83 | { |
84 | lua_assert(ofs >= -255 && ofs <= 255); | 84 | lj_assertA(ofs >= -255 && ofs <= 255, |
85 | "load/store offset %d out of range", ofs); | ||
85 | if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; | 86 | if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; |
86 | *--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) | | 87 | *--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) | |
87 | ((ofs & 0xf0) << 4) | (ofs & 0x0f); | 88 | ((ofs & 0xf0) << 4) | (ofs & 0x0f); |
@@ -89,7 +90,8 @@ static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) | |||
89 | 90 | ||
90 | static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) | 91 | static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) |
91 | { | 92 | { |
92 | lua_assert(ofs >= -4095 && ofs <= 4095); | 93 | lj_assertA(ofs >= -4095 && ofs <= 4095, |
94 | "load/store offset %d out of range", ofs); | ||
93 | /* Combine LDR/STR pairs to LDRD/STRD. */ | 95 | /* Combine LDR/STR pairs to LDRD/STRD. */ |
94 | if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4)) && | 96 | if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4)) && |
95 | (ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn && | 97 | (ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn && |
@@ -106,7 +108,8 @@ static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) | |||
106 | #if !LJ_SOFTFP | 108 | #if !LJ_SOFTFP |
107 | static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) | 109 | static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) |
108 | { | 110 | { |
109 | lua_assert(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0); | 111 | lj_assertA(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0, |
112 | "load/store offset %d out of range", ofs); | ||
110 | if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; | 113 | if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; |
111 | *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2); | 114 | *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2); |
112 | } | 115 | } |
@@ -124,7 +127,7 @@ static int emit_kdelta1(ASMState *as, Reg d, int32_t i) | |||
124 | while (work) { | 127 | while (work) { |
125 | Reg r = rset_picktop(work); | 128 | Reg r = rset_picktop(work); |
126 | IRRef ref = regcost_ref(as->cost[r]); | 129 | IRRef ref = regcost_ref(as->cost[r]); |
127 | lua_assert(r != d); | 130 | lj_assertA(r != d, "dest reg not free"); |
128 | if (emit_canremat(ref)) { | 131 | if (emit_canremat(ref)) { |
129 | int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); | 132 | int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); |
130 | uint32_t k = emit_isk12(ARMI_ADD, delta); | 133 | uint32_t k = emit_isk12(ARMI_ADD, delta); |
@@ -142,13 +145,13 @@ static int emit_kdelta1(ASMState *as, Reg d, int32_t i) | |||
142 | } | 145 | } |
143 | 146 | ||
144 | /* Try to find a two step delta relative to another constant. */ | 147 | /* Try to find a two step delta relative to another constant. */ |
145 | static int emit_kdelta2(ASMState *as, Reg d, int32_t i) | 148 | static int emit_kdelta2(ASMState *as, Reg rd, int32_t i) |
146 | { | 149 | { |
147 | RegSet work = ~as->freeset & RSET_GPR; | 150 | RegSet work = ~as->freeset & RSET_GPR; |
148 | while (work) { | 151 | while (work) { |
149 | Reg r = rset_picktop(work); | 152 | Reg r = rset_picktop(work); |
150 | IRRef ref = regcost_ref(as->cost[r]); | 153 | IRRef ref = regcost_ref(as->cost[r]); |
151 | lua_assert(r != d); | 154 | lj_assertA(r != rd, "dest reg %d not free", rd); |
152 | if (emit_canremat(ref)) { | 155 | if (emit_canremat(ref)) { |
153 | int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i; | 156 | int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i; |
154 | if (other) { | 157 | if (other) { |
@@ -159,8 +162,8 @@ static int emit_kdelta2(ASMState *as, Reg d, int32_t i) | |||
159 | k2 = emit_isk12(0, delta & (255 << sh)); | 162 | k2 = emit_isk12(0, delta & (255 << sh)); |
160 | k = emit_isk12(0, delta & ~(255 << sh)); | 163 | k = emit_isk12(0, delta & ~(255 << sh)); |
161 | if (k) { | 164 | if (k) { |
162 | emit_dn(as, ARMI_ADD^k2^inv, d, d); | 165 | emit_dn(as, ARMI_ADD^k2^inv, rd, rd); |
163 | emit_dn(as, ARMI_ADD^k^inv, d, r); | 166 | emit_dn(as, ARMI_ADD^k^inv, rd, r); |
164 | return 1; | 167 | return 1; |
165 | } | 168 | } |
166 | } | 169 | } |
@@ -171,23 +174,24 @@ static int emit_kdelta2(ASMState *as, Reg d, int32_t i) | |||
171 | } | 174 | } |
172 | 175 | ||
173 | /* Load a 32 bit constant into a GPR. */ | 176 | /* Load a 32 bit constant into a GPR. */ |
174 | static void emit_loadi(ASMState *as, Reg r, int32_t i) | 177 | static void emit_loadi(ASMState *as, Reg rd, int32_t i) |
175 | { | 178 | { |
176 | uint32_t k = emit_isk12(ARMI_MOV, i); | 179 | uint32_t k = emit_isk12(ARMI_MOV, i); |
177 | lua_assert(rset_test(as->freeset, r) || r == RID_TMP); | 180 | lj_assertA(rset_test(as->freeset, rd) || rd == RID_TMP, |
181 | "dest reg %d not free", rd); | ||
178 | if (k) { | 182 | if (k) { |
179 | /* Standard K12 constant. */ | 183 | /* Standard K12 constant. */ |
180 | emit_d(as, ARMI_MOV^k, r); | 184 | emit_d(as, ARMI_MOV^k, rd); |
181 | } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { | 185 | } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { |
182 | /* 16 bit loword constant for ARMv6T2. */ | 186 | /* 16 bit loword constant for ARMv6T2. */ |
183 | emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); | 187 | emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); |
184 | } else if (emit_kdelta1(as, r, i)) { | 188 | } else if (emit_kdelta1(as, rd, i)) { |
185 | /* One step delta relative to another constant. */ | 189 | /* One step delta relative to another constant. */ |
186 | } else if ((as->flags & JIT_F_ARMV6T2)) { | 190 | } else if ((as->flags & JIT_F_ARMV6T2)) { |
187 | /* 32 bit hiword/loword constant for ARMv6T2. */ | 191 | /* 32 bit hiword/loword constant for ARMv6T2. */ |
188 | emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), r); | 192 | emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd); |
189 | emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); | 193 | emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); |
190 | } else if (emit_kdelta2(as, r, i)) { | 194 | } else if (emit_kdelta2(as, rd, i)) { |
191 | /* Two step delta relative to another constant. */ | 195 | /* Two step delta relative to another constant. */ |
192 | } else { | 196 | } else { |
193 | /* Otherwise construct the constant with up to 4 instructions. */ | 197 | /* Otherwise construct the constant with up to 4 instructions. */ |
@@ -197,17 +201,17 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) | |||
197 | int32_t m = i & (255 << sh); | 201 | int32_t m = i & (255 << sh); |
198 | i &= ~(255 << sh); | 202 | i &= ~(255 << sh); |
199 | if (i == 0) { | 203 | if (i == 0) { |
200 | emit_d(as, ARMI_MOV ^ emit_isk12(0, m), r); | 204 | emit_d(as, ARMI_MOV ^ emit_isk12(0, m), rd); |
201 | break; | 205 | break; |
202 | } | 206 | } |
203 | emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), r, r); | 207 | emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), rd, rd); |
204 | } | 208 | } |
205 | } | 209 | } |
206 | } | 210 | } |
207 | 211 | ||
208 | #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) | 212 | #define emit_loada(as, rd, addr) emit_loadi(as, (rd), i32ptr((addr))) |
209 | 213 | ||
210 | static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); | 214 | static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); |
211 | 215 | ||
212 | /* Get/set from constant pointer. */ | 216 | /* Get/set from constant pointer. */ |
213 | static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) | 217 | static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) |
@@ -219,8 +223,9 @@ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) | |||
219 | 223 | ||
220 | #if !LJ_SOFTFP | 224 | #if !LJ_SOFTFP |
221 | /* Load a number constant into an FPR. */ | 225 | /* Load a number constant into an FPR. */ |
222 | static void emit_loadn(ASMState *as, Reg r, cTValue *tv) | 226 | static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) |
223 | { | 227 | { |
228 | cTValue *tv = ir_knum(ir); | ||
224 | int32_t i; | 229 | int32_t i; |
225 | if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { | 230 | if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { |
226 | uint32_t hi = tv->u32.hi; | 231 | uint32_t hi = tv->u32.hi; |
@@ -260,7 +265,7 @@ static void emit_branch(ASMState *as, ARMIns ai, MCode *target) | |||
260 | { | 265 | { |
261 | MCode *p = as->mcp; | 266 | MCode *p = as->mcp; |
262 | ptrdiff_t delta = (target - p) - 1; | 267 | ptrdiff_t delta = (target - p) - 1; |
263 | lua_assert(((delta + 0x00800000) >> 24) == 0); | 268 | lj_assertA(((delta + 0x00800000) >> 24) == 0, "branch target out of range"); |
264 | *--p = ai | ((uint32_t)delta & 0x00ffffffu); | 269 | *--p = ai | ((uint32_t)delta & 0x00ffffffu); |
265 | as->mcp = p; | 270 | as->mcp = p; |
266 | } | 271 | } |
@@ -288,7 +293,7 @@ static void emit_call(ASMState *as, void *target) | |||
288 | static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | 293 | static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) |
289 | { | 294 | { |
290 | #if LJ_SOFTFP | 295 | #if LJ_SOFTFP |
291 | lua_assert(!irt_isnum(ir->t)); UNUSED(ir); | 296 | lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); |
292 | #else | 297 | #else |
293 | if (dst >= RID_MAX_GPR) { | 298 | if (dst >= RID_MAX_GPR) { |
294 | emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S, | 299 | emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S, |
@@ -308,30 +313,30 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | |||
308 | emit_dm(as, ARMI_MOV, dst, src); | 313 | emit_dm(as, ARMI_MOV, dst, src); |
309 | } | 314 | } |
310 | 315 | ||
311 | /* Generic load of register from stack slot. */ | 316 | /* Generic load of register with base and (small) offset address. */ |
312 | static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 317 | static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) |
313 | { | 318 | { |
314 | #if LJ_SOFTFP | 319 | #if LJ_SOFTFP |
315 | lua_assert(!irt_isnum(ir->t)); UNUSED(ir); | 320 | lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); |
316 | #else | 321 | #else |
317 | if (r >= RID_MAX_GPR) | 322 | if (r >= RID_MAX_GPR) |
318 | emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, RID_SP, ofs); | 323 | emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs); |
319 | else | 324 | else |
320 | #endif | 325 | #endif |
321 | emit_lso(as, ARMI_LDR, r, RID_SP, ofs); | 326 | emit_lso(as, ARMI_LDR, r, base, ofs); |
322 | } | 327 | } |
323 | 328 | ||
324 | /* Generic store of register to stack slot. */ | 329 | /* Generic store of register with base and (small) offset address. */ |
325 | static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 330 | static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) |
326 | { | 331 | { |
327 | #if LJ_SOFTFP | 332 | #if LJ_SOFTFP |
328 | lua_assert(!irt_isnum(ir->t)); UNUSED(ir); | 333 | lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); |
329 | #else | 334 | #else |
330 | if (r >= RID_MAX_GPR) | 335 | if (r >= RID_MAX_GPR) |
331 | emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, RID_SP, ofs); | 336 | emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs); |
332 | else | 337 | else |
333 | #endif | 338 | #endif |
334 | emit_lso(as, ARMI_STR, r, RID_SP, ofs); | 339 | emit_lso(as, ARMI_STR, r, base, ofs); |
335 | } | 340 | } |
336 | 341 | ||
337 | /* Emit an arithmetic/logic operation with a constant operand. */ | 342 | /* Emit an arithmetic/logic operation with a constant operand. */ |
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h new file mode 100644 index 00000000..c4b4c147 --- /dev/null +++ b/src/lj_emit_arm64.h | |||
@@ -0,0 +1,424 @@ | |||
1 | /* | ||
2 | ** ARM64 instruction emitter. | ||
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. | ||
6 | ** Sponsored by Cisco Systems, Inc. | ||
7 | */ | ||
8 | |||
9 | /* -- Constant encoding --------------------------------------------------- */ | ||
10 | |||
11 | static uint64_t get_k64val(ASMState *as, IRRef ref) | ||
12 | { | ||
13 | IRIns *ir = IR(ref); | ||
14 | if (ir->o == IR_KINT64) { | ||
15 | return ir_kint64(ir)->u64; | ||
16 | } else if (ir->o == IR_KGC) { | ||
17 | return (uint64_t)ir_kgc(ir); | ||
18 | } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
19 | return (uint64_t)ir_kptr(ir); | ||
20 | } else { | ||
21 | lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, | ||
22 | "bad 64 bit const IR op %d", ir->o); | ||
23 | return ir->i; /* Sign-extended. */ | ||
24 | } | ||
25 | } | ||
26 | |||
27 | /* Encode constant in K12 format for data processing instructions. */ | ||
28 | static uint32_t emit_isk12(int64_t n) | ||
29 | { | ||
30 | uint64_t k = (n < 0) ? -n : n; | ||
31 | uint32_t m = (n < 0) ? 0x40000000 : 0; | ||
32 | if (k < 0x1000) { | ||
33 | return A64I_K12|m|A64F_U12(k); | ||
34 | } else if ((k & 0xfff000) == k) { | ||
35 | return A64I_K12|m|0x400000|A64F_U12(k>>12); | ||
36 | } | ||
37 | return 0; | ||
38 | } | ||
39 | |||
40 | #define emit_clz64(n) __builtin_clzll(n) | ||
41 | #define emit_ctz64(n) __builtin_ctzll(n) | ||
42 | |||
43 | /* Encode constant in K13 format for logical data processing instructions. */ | ||
44 | static uint32_t emit_isk13(uint64_t n, int is64) | ||
45 | { | ||
46 | int inv = 0, w = 128, lz, tz; | ||
47 | if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */ | ||
48 | if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */ | ||
49 | do { /* Find the repeat width. */ | ||
50 | if (is64 && (uint32_t)(n^(n>>32))) break; | ||
51 | n = (uint32_t)n; | ||
52 | if (!n) return 0; /* Ditto when passing n=0xffffffff and is64=0. */ | ||
53 | w = 32; if ((n^(n>>16)) & 0xffff) break; | ||
54 | n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break; | ||
55 | n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break; | ||
56 | n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break; | ||
57 | n = n & 0x3; w = 2; | ||
58 | } while (0); | ||
59 | lz = emit_clz64(n); | ||
60 | tz = emit_ctz64(n); | ||
61 | if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */ | ||
62 | if (inv) | ||
63 | return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10); | ||
64 | else | ||
65 | return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10); | ||
66 | } | ||
67 | |||
68 | static uint32_t emit_isfpk64(uint64_t n) | ||
69 | { | ||
70 | uint64_t etop9 = ((n >> 54) & 0x1ff); | ||
71 | if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) { | ||
72 | return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80)); | ||
73 | } | ||
74 | return ~0u; | ||
75 | } | ||
76 | |||
77 | /* -- Emit basic instructions --------------------------------------------- */ | ||
78 | |||
79 | static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra) | ||
80 | { | ||
81 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra); | ||
82 | } | ||
83 | |||
84 | static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm) | ||
85 | { | ||
86 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm); | ||
87 | } | ||
88 | |||
89 | static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm) | ||
90 | { | ||
91 | *--as->mcp = ai | A64F_D(rd) | A64F_M(rm); | ||
92 | } | ||
93 | |||
94 | static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn) | ||
95 | { | ||
96 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn); | ||
97 | } | ||
98 | |||
99 | static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm) | ||
100 | { | ||
101 | *--as->mcp = ai | A64F_N(rn) | A64F_M(rm); | ||
102 | } | ||
103 | |||
104 | static void emit_d(ASMState *as, A64Ins ai, Reg rd) | ||
105 | { | ||
106 | *--as->mcp = ai | A64F_D(rd); | ||
107 | } | ||
108 | |||
109 | static void emit_n(ASMState *as, A64Ins ai, Reg rn) | ||
110 | { | ||
111 | *--as->mcp = ai | A64F_N(rn); | ||
112 | } | ||
113 | |||
114 | static int emit_checkofs(A64Ins ai, int64_t ofs) | ||
115 | { | ||
116 | int scale = (ai >> 30) & 3; | ||
117 | if (ofs < 0 || (ofs & ((1<<scale)-1))) { | ||
118 | return (ofs >= -256 && ofs <= 255) ? -1 : 0; | ||
119 | } else { | ||
120 | return (ofs < (4096<<scale)) ? 1 : 0; | ||
121 | } | ||
122 | } | ||
123 | |||
124 | static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) | ||
125 | { | ||
126 | int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3; | ||
127 | lj_assertA(ot, "load/store offset %d out of range", ofs); | ||
128 | /* Combine LDR/STR pairs to LDP/STP. */ | ||
129 | if ((sc == 2 || sc == 3) && | ||
130 | (!(ai & 0x400000) || rd != rn) && | ||
131 | as->mcp != as->mcloop) { | ||
132 | uint32_t prev = *as->mcp & ~A64F_D(31); | ||
133 | int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc); | ||
134 | A64Ins aip; | ||
135 | if (prev == (ai | A64F_N(rn) | A64F_U12(ofsm>>sc)) || | ||
136 | prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) { | ||
137 | aip = (A64F_A(rd) | A64F_D(*as->mcp & 31)); | ||
138 | } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) || | ||
139 | prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) { | ||
140 | aip = (A64F_D(rd) | A64F_A(*as->mcp & 31)); | ||
141 | ofsm = ofs; | ||
142 | } else { | ||
143 | goto nopair; | ||
144 | } | ||
145 | if (ofsm >= (int)((unsigned int)-64<<sc) && ofsm <= (63<<sc)) { | ||
146 | *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) | | ||
147 | (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); | ||
148 | return; | ||
149 | } | ||
150 | } | ||
151 | nopair: | ||
152 | if (ot == 1) | ||
153 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc); | ||
154 | else | ||
155 | *--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff); | ||
156 | } | ||
157 | |||
158 | /* -- Emit loads/stores --------------------------------------------------- */ | ||
159 | |||
160 | /* Prefer rematerialization of BASE/L from global_State over spills. */ | ||
161 | #define emit_canremat(ref) ((ref) <= ASMREF_L) | ||
162 | |||
163 | /* Try to find an N-step delta relative to other consts with N < lim. */ | ||
164 | static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) | ||
165 | { | ||
166 | RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL); | ||
167 | if (lim <= 1) return 0; /* Can't beat that. */ | ||
168 | while (work) { | ||
169 | Reg r = rset_picktop(work); | ||
170 | IRRef ref = regcost_ref(as->cost[r]); | ||
171 | lj_assertA(r != rd, "dest reg %d not free", rd); | ||
172 | if (ref < REF_TRUE) { | ||
173 | uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : | ||
174 | get_k64val(as, ref); | ||
175 | int64_t delta = (int64_t)(k - kx); | ||
176 | if (delta == 0) { | ||
177 | emit_dm(as, A64I_MOVx, rd, r); | ||
178 | return 1; | ||
179 | } else { | ||
180 | uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta); | ||
181 | if (k12) { | ||
182 | emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r); | ||
183 | return 1; | ||
184 | } | ||
185 | /* Do other ops or multi-step deltas pay off? Probably not. | ||
186 | ** E.g. XOR rarely helps with pointer consts. | ||
187 | */ | ||
188 | } | ||
189 | } | ||
190 | rset_clear(work, r); | ||
191 | } | ||
192 | return 0; /* Failed. */ | ||
193 | } | ||
194 | |||
195 | static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) | ||
196 | { | ||
197 | int i, zeros = 0, ones = 0, neg; | ||
198 | if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ | ||
199 | /* Count homogeneous 16 bit fragments. */ | ||
200 | for (i = 0; i < 4; i++) { | ||
201 | uint64_t frag = (u64 >> i*16) & 0xffff; | ||
202 | zeros += (frag == 0); | ||
203 | ones += (frag == 0xffff); | ||
204 | } | ||
205 | neg = ones > zeros; /* Use MOVN if it pays off. */ | ||
206 | if ((neg ? ones : zeros) < 3) { /* Need 2+ ins. Try shorter K13 encoding. */ | ||
207 | uint32_t k13 = emit_isk13(u64, is64); | ||
208 | if (k13) { | ||
209 | emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); | ||
210 | return; | ||
211 | } | ||
212 | } | ||
213 | if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { | ||
214 | int shift = 0, lshift = 0; | ||
215 | uint64_t n64 = neg ? ~u64 : u64; | ||
216 | if (n64 != 0) { | ||
217 | /* Find first/last fragment to be filled. */ | ||
218 | shift = (63-emit_clz64(n64)) & ~15; | ||
219 | lshift = emit_ctz64(n64) & ~15; | ||
220 | } | ||
221 | /* MOVK requires the original value (u64). */ | ||
222 | while (shift > lshift) { | ||
223 | uint32_t u16 = (u64 >> shift) & 0xffff; | ||
224 | /* Skip fragments that are correctly filled by MOVN/MOVZ. */ | ||
225 | if (u16 != (neg ? 0xffff : 0)) | ||
226 | emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); | ||
227 | shift -= 16; | ||
228 | } | ||
229 | /* But MOVN needs an inverted value (n64). */ | ||
230 | emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | | ||
231 | A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); | ||
232 | } | ||
233 | } | ||
234 | |||
235 | /* Load a 32 bit constant into a GPR. */ | ||
236 | #define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0) | ||
237 | |||
238 | /* Load a 64 bit constant into a GPR. */ | ||
239 | #define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X) | ||
240 | |||
241 | #define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr)) | ||
242 | |||
243 | #define glofs(as, k) \ | ||
244 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) | ||
245 | #define mcpofs(as, k) \ | ||
246 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1))) | ||
247 | #define checkmcpofs(as, k) \ | ||
248 | (A64F_S_OK(mcpofs(as, k)>>2, 19)) | ||
249 | |||
250 | static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); | ||
251 | |||
252 | /* Get/set from constant pointer. */ | ||
253 | static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p) | ||
254 | { | ||
255 | /* First, check if ip + offset is in range. */ | ||
256 | if ((ai & 0x00400000) && checkmcpofs(as, p)) { | ||
257 | emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r); | ||
258 | } else { | ||
259 | Reg base = RID_GL; /* Next, try GL + offset. */ | ||
260 | int64_t ofs = glofs(as, p); | ||
261 | if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */ | ||
262 | int64_t i64 = i64ptr(p); | ||
263 | base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r)); | ||
264 | ofs = i64 & 0x7fffull; | ||
265 | } | ||
266 | emit_lso(as, ai, r, base, ofs); | ||
267 | } | ||
268 | } | ||
269 | |||
270 | /* Load 64 bit IR constant into register. */ | ||
271 | static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) | ||
272 | { | ||
273 | const uint64_t *k = &ir_k64(ir)->u64; | ||
274 | int64_t ofs; | ||
275 | if (r >= RID_MAX_GPR) { | ||
276 | uint32_t fpk = emit_isfpk64(*k); | ||
277 | if (fpk != ~0u) { | ||
278 | emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31)); | ||
279 | return; | ||
280 | } | ||
281 | } | ||
282 | ofs = glofs(as, k); | ||
283 | if (emit_checkofs(A64I_LDRx, ofs)) { | ||
284 | emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx, | ||
285 | (r & 31), RID_GL, ofs); | ||
286 | } else { | ||
287 | if (r >= RID_MAX_GPR) { | ||
288 | emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP); | ||
289 | r = RID_TMP; | ||
290 | } | ||
291 | if (checkmcpofs(as, k)) | ||
292 | emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r); | ||
293 | else | ||
294 | emit_loadu64(as, r, *k); | ||
295 | } | ||
296 | } | ||
297 | |||
298 | /* Get/set global_State fields. */ | ||
299 | #define emit_getgl(as, r, field) \ | ||
300 | emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field) | ||
301 | #define emit_setgl(as, r, field) \ | ||
302 | emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field) | ||
303 | |||
304 | /* Trace number is determined from pc of exit instruction. */ | ||
305 | #define emit_setvmstate(as, i) UNUSED(i) | ||
306 | |||
307 | /* -- Emit control-flow instructions -------------------------------------- */ | ||
308 | |||
309 | /* Label for internal jumps. */ | ||
310 | typedef MCode *MCLabel; | ||
311 | |||
312 | /* Return label pointing to current PC. */ | ||
313 | #define emit_label(as) ((as)->mcp) | ||
314 | |||
315 | static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target) | ||
316 | { | ||
317 | MCode *p = --as->mcp; | ||
318 | ptrdiff_t delta = target - p; | ||
319 | lj_assertA(A64F_S_OK(delta, 19), "branch target out of range"); | ||
320 | *p = A64I_BCC | A64F_S19(delta) | cond; | ||
321 | } | ||
322 | |||
323 | static void emit_branch(ASMState *as, A64Ins ai, MCode *target) | ||
324 | { | ||
325 | MCode *p = --as->mcp; | ||
326 | ptrdiff_t delta = target - p; | ||
327 | lj_assertA(A64F_S_OK(delta, 26), "branch target out of range"); | ||
328 | *p = ai | A64F_S26(delta); | ||
329 | } | ||
330 | |||
331 | static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target) | ||
332 | { | ||
333 | MCode *p = --as->mcp; | ||
334 | ptrdiff_t delta = target - p; | ||
335 | lj_assertA(bit < 63, "bit number out of range"); | ||
336 | lj_assertA(A64F_S_OK(delta, 14), "branch target out of range"); | ||
337 | if (bit > 31) ai |= A64I_X; | ||
338 | *p = ai | A64F_BIT(bit & 31) | A64F_S14(delta) | r; | ||
339 | } | ||
340 | |||
341 | static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target) | ||
342 | { | ||
343 | MCode *p = --as->mcp; | ||
344 | ptrdiff_t delta = target - p; | ||
345 | lj_assertA(A64F_S_OK(delta, 19), "branch target out of range"); | ||
346 | *p = ai | A64F_S19(delta) | r; | ||
347 | } | ||
348 | |||
349 | #define emit_jmp(as, target) emit_branch(as, A64I_B, (target)) | ||
350 | |||
351 | static void emit_call(ASMState *as, void *target) | ||
352 | { | ||
353 | MCode *p = --as->mcp; | ||
354 | ptrdiff_t delta = (char *)target - (char *)p; | ||
355 | if (A64F_S_OK(delta>>2, 26)) { | ||
356 | *p = A64I_BL | A64F_S26(delta>>2); | ||
357 | } else { /* Target out of range: need indirect call. But don't use R0-R7. */ | ||
358 | Reg r = ra_allock(as, i64ptr(target), | ||
359 | RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); | ||
360 | *p = A64I_BLR | A64F_N(r); | ||
361 | } | ||
362 | } | ||
363 | |||
364 | /* -- Emit generic operations --------------------------------------------- */ | ||
365 | |||
366 | /* Generic move between two regs. */ | ||
367 | static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | ||
368 | { | ||
369 | if (dst >= RID_MAX_GPR) { | ||
370 | emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S, | ||
371 | (dst & 31), (src & 31)); | ||
372 | return; | ||
373 | } | ||
374 | if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */ | ||
375 | MCode ins = *as->mcp, swp = (src^dst); | ||
376 | if ((ins & 0xbf800000) == 0xb9000000) { | ||
377 | if (!((ins ^ (dst << 5)) & 0x000003e0)) | ||
378 | *as->mcp = ins ^ (swp << 5); /* Swap N in load/store. */ | ||
379 | if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f)) | ||
380 | *as->mcp = ins ^ swp; /* Swap D in store. */ | ||
381 | } | ||
382 | } | ||
383 | emit_dm(as, A64I_MOVx, dst, src); | ||
384 | } | ||
385 | |||
386 | /* Generic load of register with base and (small) offset address. */ | ||
387 | static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) | ||
388 | { | ||
389 | if (r >= RID_MAX_GPR) | ||
390 | emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs); | ||
391 | else | ||
392 | emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs); | ||
393 | } | ||
394 | |||
395 | /* Generic store of register with base and (small) offset address. */ | ||
396 | static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) | ||
397 | { | ||
398 | if (r >= RID_MAX_GPR) | ||
399 | emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs); | ||
400 | else | ||
401 | emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs); | ||
402 | } | ||
403 | |||
404 | /* Emit an arithmetic operation with a constant operand. */ | ||
405 | static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src, | ||
406 | int32_t i, RegSet allow) | ||
407 | { | ||
408 | uint32_t k = emit_isk12(i); | ||
409 | if (k) | ||
410 | emit_dn(as, ai^k, dest, src); | ||
411 | else | ||
412 | emit_dnm(as, ai, dest, src, ra_allock(as, i, allow)); | ||
413 | } | ||
414 | |||
415 | /* Add offset to pointer. */ | ||
416 | static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | ||
417 | { | ||
418 | if (ofs) | ||
419 | emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r, | ||
420 | ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r)); | ||
421 | } | ||
422 | |||
423 | #define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) | ||
424 | |||
diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h index 366cf7ab..0cea5479 100644 --- a/src/lj_emit_mips.h +++ b/src/lj_emit_mips.h | |||
@@ -3,6 +3,32 @@ | |||
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | 3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #if LJ_64 | ||
7 | static intptr_t get_k64val(ASMState *as, IRRef ref) | ||
8 | { | ||
9 | IRIns *ir = IR(ref); | ||
10 | if (ir->o == IR_KINT64) { | ||
11 | return (intptr_t)ir_kint64(ir)->u64; | ||
12 | } else if (ir->o == IR_KGC) { | ||
13 | return (intptr_t)ir_kgc(ir); | ||
14 | } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
15 | return (intptr_t)ir_kptr(ir); | ||
16 | } else if (LJ_SOFTFP && ir->o == IR_KNUM) { | ||
17 | return (intptr_t)ir_knum(ir)->u64; | ||
18 | } else { | ||
19 | lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, | ||
20 | "bad 64 bit const IR op %d", ir->o); | ||
21 | return ir->i; /* Sign-extended. */ | ||
22 | } | ||
23 | } | ||
24 | #endif | ||
25 | |||
26 | #if LJ_64 | ||
27 | #define get_kval(as, ref) get_k64val(as, ref) | ||
28 | #else | ||
29 | #define get_kval(as, ref) (IR((ref))->i) | ||
30 | #endif | ||
31 | |||
6 | /* -- Emit basic instructions --------------------------------------------- */ | 32 | /* -- Emit basic instructions --------------------------------------------- */ |
7 | 33 | ||
8 | static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt) | 34 | static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt) |
@@ -35,7 +61,7 @@ static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh) | |||
35 | 61 | ||
36 | static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) | 62 | static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) |
37 | { | 63 | { |
38 | if ((as->flags & JIT_F_MIPS32R2)) { | 64 | if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { |
39 | emit_dta(as, MIPSI_ROTR, dest, src, shift); | 65 | emit_dta(as, MIPSI_ROTR, dest, src, shift); |
40 | } else { | 66 | } else { |
41 | emit_dst(as, MIPSI_OR, dest, dest, tmp); | 67 | emit_dst(as, MIPSI_OR, dest, dest, tmp); |
@@ -44,23 +70,32 @@ static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) | |||
44 | } | 70 | } |
45 | } | 71 | } |
46 | 72 | ||
73 | #if LJ_64 || LJ_HASBUFFER | ||
74 | static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb, | ||
75 | uint32_t lsb) | ||
76 | { | ||
77 | *--as->mcp = mi | MIPSF_T(rt) | MIPSF_S(rs) | MIPSF_M(msb) | MIPSF_L(lsb); | ||
78 | } | ||
79 | #endif | ||
80 | |||
47 | /* -- Emit loads/stores --------------------------------------------------- */ | 81 | /* -- Emit loads/stores --------------------------------------------------- */ |
48 | 82 | ||
49 | /* Prefer rematerialization of BASE/L from global_State over spills. */ | 83 | /* Prefer rematerialization of BASE/L from global_State over spills. */ |
50 | #define emit_canremat(ref) ((ref) <= REF_BASE) | 84 | #define emit_canremat(ref) ((ref) <= REF_BASE) |
51 | 85 | ||
52 | /* Try to find a one step delta relative to another constant. */ | 86 | /* Try to find a one step delta relative to another constant. */ |
53 | static int emit_kdelta1(ASMState *as, Reg t, int32_t i) | 87 | static int emit_kdelta1(ASMState *as, Reg rd, intptr_t i) |
54 | { | 88 | { |
55 | RegSet work = ~as->freeset & RSET_GPR; | 89 | RegSet work = ~as->freeset & RSET_GPR; |
56 | while (work) { | 90 | while (work) { |
57 | Reg r = rset_picktop(work); | 91 | Reg r = rset_picktop(work); |
58 | IRRef ref = regcost_ref(as->cost[r]); | 92 | IRRef ref = regcost_ref(as->cost[r]); |
59 | lua_assert(r != t); | 93 | lj_assertA(r != rd, "dest reg %d not free", rd); |
60 | if (ref < ASMREF_L) { | 94 | if (ref < ASMREF_L) { |
61 | int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); | 95 | intptr_t delta = (intptr_t)((uintptr_t)i - |
96 | (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(as, ref))); | ||
62 | if (checki16(delta)) { | 97 | if (checki16(delta)) { |
63 | emit_tsi(as, MIPSI_ADDIU, t, r, delta); | 98 | emit_tsi(as, MIPSI_AADDIU, rd, r, delta); |
64 | return 1; | 99 | return 1; |
65 | } | 100 | } |
66 | } | 101 | } |
@@ -76,8 +111,8 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) | |||
76 | emit_ti(as, MIPSI_LI, r, i); | 111 | emit_ti(as, MIPSI_LI, r, i); |
77 | } else { | 112 | } else { |
78 | if ((i & 0xffff)) { | 113 | if ((i & 0xffff)) { |
79 | int32_t jgl = i32ptr(J2G(as->J)); | 114 | intptr_t jgl = (intptr_t)(void *)J2G(as->J); |
80 | if ((uint32_t)(i-jgl) < 65536) { | 115 | if ((uintptr_t)(i-jgl) < 65536) { |
81 | emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768); | 116 | emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768); |
82 | return; | 117 | return; |
83 | } else if (emit_kdelta1(as, r, i)) { | 118 | } else if (emit_kdelta1(as, r, i)) { |
@@ -92,16 +127,49 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) | |||
92 | } | 127 | } |
93 | } | 128 | } |
94 | 129 | ||
130 | #if LJ_64 | ||
131 | /* Load a 64 bit constant into a GPR. */ | ||
132 | static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) | ||
133 | { | ||
134 | if (checki32((int64_t)u64)) { | ||
135 | emit_loadi(as, r, (int32_t)u64); | ||
136 | } else { | ||
137 | uint64_t delta = u64 - (uint64_t)(void *)J2G(as->J); | ||
138 | if (delta < 65536) { | ||
139 | emit_tsi(as, MIPSI_DADDIU, r, RID_JGL, (int32_t)(delta-32768)); | ||
140 | } else if (emit_kdelta1(as, r, (intptr_t)u64)) { | ||
141 | return; | ||
142 | } else { | ||
143 | /* TODO MIPSR6: Use DAHI & DATI. Caveat: sign-extension. */ | ||
144 | if ((u64 & 0xffff)) { | ||
145 | emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff); | ||
146 | } | ||
147 | if (((u64 >> 16) & 0xffff)) { | ||
148 | emit_dta(as, MIPSI_DSLL, r, r, 16); | ||
149 | emit_tsi(as, MIPSI_ORI, r, r, (u64 >> 16) & 0xffff); | ||
150 | emit_dta(as, MIPSI_DSLL, r, r, 16); | ||
151 | } else { | ||
152 | emit_dta(as, MIPSI_DSLL32, r, r, 0); | ||
153 | } | ||
154 | emit_loadi(as, r, (int32_t)(u64 >> 32)); | ||
155 | } | ||
156 | /* TODO: There are probably more optimization opportunities. */ | ||
157 | } | ||
158 | } | ||
159 | |||
160 | #define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) | ||
161 | #else | ||
95 | #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) | 162 | #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) |
163 | #endif | ||
96 | 164 | ||
97 | static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); | 165 | static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); |
98 | static void ra_allockreg(ASMState *as, int32_t k, Reg r); | 166 | static void ra_allockreg(ASMState *as, intptr_t k, Reg r); |
99 | 167 | ||
100 | /* Get/set from constant pointer. */ | 168 | /* Get/set from constant pointer. */ |
101 | static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) | 169 | static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) |
102 | { | 170 | { |
103 | int32_t jgl = i32ptr(J2G(as->J)); | 171 | intptr_t jgl = (intptr_t)(J2G(as->J)); |
104 | int32_t i = i32ptr(p); | 172 | intptr_t i = (intptr_t)(p); |
105 | Reg base; | 173 | Reg base; |
106 | if ((uint32_t)(i-jgl) < 65536) { | 174 | if ((uint32_t)(i-jgl) < 65536) { |
107 | i = i-jgl-32768; | 175 | i = i-jgl-32768; |
@@ -112,8 +180,24 @@ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) | |||
112 | emit_tsi(as, mi, r, base, i); | 180 | emit_tsi(as, mi, r, base, i); |
113 | } | 181 | } |
114 | 182 | ||
115 | #define emit_loadn(as, r, tv) \ | 183 | #if LJ_64 |
116 | emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)(tv), RSET_GPR) | 184 | static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) |
185 | { | ||
186 | const uint64_t *k = &ir_k64(ir)->u64; | ||
187 | Reg r64 = r; | ||
188 | if (rset_test(RSET_FPR, r)) { | ||
189 | r64 = RID_TMP; | ||
190 | emit_tg(as, MIPSI_DMTC1, r64, r); | ||
191 | } | ||
192 | if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536) | ||
193 | emit_lsptr(as, MIPSI_LD, r64, (void *)k, 0); | ||
194 | else | ||
195 | emit_loadu64(as, r64, *k); | ||
196 | } | ||
197 | #else | ||
198 | #define emit_loadk64(as, r, ir) \ | ||
199 | emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) | ||
200 | #endif | ||
117 | 201 | ||
118 | /* Get/set global_State fields. */ | 202 | /* Get/set global_State fields. */ |
119 | static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) | 203 | static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) |
@@ -122,9 +206,9 @@ static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) | |||
122 | } | 206 | } |
123 | 207 | ||
124 | #define emit_getgl(as, r, field) \ | 208 | #define emit_getgl(as, r, field) \ |
125 | emit_lsglptr(as, MIPSI_LW, (r), (int32_t)offsetof(global_State, field)) | 209 | emit_lsglptr(as, MIPSI_AL, (r), (int32_t)offsetof(global_State, field)) |
126 | #define emit_setgl(as, r, field) \ | 210 | #define emit_setgl(as, r, field) \ |
127 | emit_lsglptr(as, MIPSI_SW, (r), (int32_t)offsetof(global_State, field)) | 211 | emit_lsglptr(as, MIPSI_AS, (r), (int32_t)offsetof(global_State, field)) |
128 | 212 | ||
129 | /* Trace number is determined from per-trace exit stubs. */ | 213 | /* Trace number is determined from per-trace exit stubs. */ |
130 | #define emit_setvmstate(as, i) UNUSED(i) | 214 | #define emit_setvmstate(as, i) UNUSED(i) |
@@ -141,7 +225,7 @@ static void emit_branch(ASMState *as, MIPSIns mi, Reg rs, Reg rt, MCode *target) | |||
141 | { | 225 | { |
142 | MCode *p = as->mcp; | 226 | MCode *p = as->mcp; |
143 | ptrdiff_t delta = target - p; | 227 | ptrdiff_t delta = target - p; |
144 | lua_assert(((delta + 0x8000) >> 16) == 0); | 228 | lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range"); |
145 | *--p = mi | MIPSF_S(rs) | MIPSF_T(rt) | ((uint32_t)delta & 0xffffu); | 229 | *--p = mi | MIPSF_S(rs) | MIPSF_T(rt) | ((uint32_t)delta & 0xffffu); |
146 | as->mcp = p; | 230 | as->mcp = p; |
147 | } | 231 | } |
@@ -152,16 +236,31 @@ static void emit_jmp(ASMState *as, MCode *target) | |||
152 | emit_branch(as, MIPSI_B, RID_ZERO, RID_ZERO, (target)); | 236 | emit_branch(as, MIPSI_B, RID_ZERO, RID_ZERO, (target)); |
153 | } | 237 | } |
154 | 238 | ||
155 | static void emit_call(ASMState *as, void *target) | 239 | static void emit_call(ASMState *as, void *target, int needcfa) |
156 | { | 240 | { |
157 | MCode *p = as->mcp; | 241 | MCode *p = as->mcp; |
158 | *--p = MIPSI_NOP; | 242 | #if LJ_TARGET_MIPSR6 |
159 | if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) | 243 | ptrdiff_t delta = (char *)target - (char *)p; |
244 | if ((((delta>>2) + 0x02000000) >> 26) == 0) { /* Try compact call first. */ | ||
245 | *--p = MIPSI_BALC | (((uintptr_t)delta >>2) & 0x03ffffffu); | ||
246 | as->mcp = p; | ||
247 | return; | ||
248 | } | ||
249 | #endif | ||
250 | *--p = MIPSI_NOP; /* Delay slot. */ | ||
251 | if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) { | ||
252 | #if !LJ_TARGET_MIPSR6 | ||
253 | *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) | | ||
254 | (((uintptr_t)target >>2) & 0x03ffffffu); | ||
255 | #else | ||
160 | *--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu); | 256 | *--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu); |
161 | else /* Target out of range: need indirect call. */ | 257 | #endif |
258 | } else { /* Target out of range: need indirect call. */ | ||
162 | *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR); | 259 | *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR); |
260 | needcfa = 1; | ||
261 | } | ||
163 | as->mcp = p; | 262 | as->mcp = p; |
164 | ra_allockreg(as, i32ptr(target), RID_CFUNCADDR); | 263 | if (needcfa) ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR); |
165 | } | 264 | } |
166 | 265 | ||
167 | /* -- Emit generic operations --------------------------------------------- */ | 266 | /* -- Emit generic operations --------------------------------------------- */ |
@@ -178,32 +277,32 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | |||
178 | emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src); | 277 | emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src); |
179 | } | 278 | } |
180 | 279 | ||
181 | /* Generic load of register from stack slot. */ | 280 | /* Generic load of register with base and (small) offset address. */ |
182 | static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 281 | static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) |
183 | { | 282 | { |
184 | if (r < RID_MAX_GPR) | 283 | if (r < RID_MAX_GPR) |
185 | emit_tsi(as, MIPSI_LW, r, RID_SP, ofs); | 284 | emit_tsi(as, irt_is64(ir->t) ? MIPSI_LD : MIPSI_LW, r, base, ofs); |
186 | else | 285 | else |
187 | emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1, | 286 | emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1, |
188 | (r & 31), RID_SP, ofs); | 287 | (r & 31), base, ofs); |
189 | } | 288 | } |
190 | 289 | ||
191 | /* Generic store of register to stack slot. */ | 290 | /* Generic store of register with base and (small) offset address. */ |
192 | static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 291 | static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) |
193 | { | 292 | { |
194 | if (r < RID_MAX_GPR) | 293 | if (r < RID_MAX_GPR) |
195 | emit_tsi(as, MIPSI_SW, r, RID_SP, ofs); | 294 | emit_tsi(as, irt_is64(ir->t) ? MIPSI_SD : MIPSI_SW, r, base, ofs); |
196 | else | 295 | else |
197 | emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1, | 296 | emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1, |
198 | (r&31), RID_SP, ofs); | 297 | (r&31), base, ofs); |
199 | } | 298 | } |
200 | 299 | ||
201 | /* Add offset to pointer. */ | 300 | /* Add offset to pointer. */ |
202 | static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | 301 | static void emit_addptr(ASMState *as, Reg r, int32_t ofs) |
203 | { | 302 | { |
204 | if (ofs) { | 303 | if (ofs) { |
205 | lua_assert(checki16(ofs)); | 304 | lj_assertA(checki16(ofs), "offset %d out of range", ofs); |
206 | emit_tsi(as, MIPSI_ADDIU, r, r, ofs); | 305 | emit_tsi(as, MIPSI_AADDIU, r, r, ofs); |
207 | } | 306 | } |
208 | } | 307 | } |
209 | 308 | ||
diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h index 6d0ea185..86760e78 100644 --- a/src/lj_emit_ppc.h +++ b/src/lj_emit_ppc.h | |||
@@ -41,13 +41,13 @@ static void emit_rot(ASMState *as, PPCIns pi, Reg ra, Reg rs, | |||
41 | 41 | ||
42 | static void emit_slwi(ASMState *as, Reg ra, Reg rs, int32_t n) | 42 | static void emit_slwi(ASMState *as, Reg ra, Reg rs, int32_t n) |
43 | { | 43 | { |
44 | lua_assert(n >= 0 && n < 32); | 44 | lj_assertA(n >= 0 && n < 32, "shift out or range"); |
45 | emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31-n); | 45 | emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31-n); |
46 | } | 46 | } |
47 | 47 | ||
48 | static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) | 48 | static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) |
49 | { | 49 | { |
50 | lua_assert(n >= 0 && n < 32); | 50 | lj_assertA(n >= 0 && n < 32, "shift out or range"); |
51 | emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31); | 51 | emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31); |
52 | } | 52 | } |
53 | 53 | ||
@@ -57,17 +57,17 @@ static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) | |||
57 | #define emit_canremat(ref) ((ref) <= REF_BASE) | 57 | #define emit_canremat(ref) ((ref) <= REF_BASE) |
58 | 58 | ||
59 | /* Try to find a one step delta relative to another constant. */ | 59 | /* Try to find a one step delta relative to another constant. */ |
60 | static int emit_kdelta1(ASMState *as, Reg t, int32_t i) | 60 | static int emit_kdelta1(ASMState *as, Reg rd, int32_t i) |
61 | { | 61 | { |
62 | RegSet work = ~as->freeset & RSET_GPR; | 62 | RegSet work = ~as->freeset & RSET_GPR; |
63 | while (work) { | 63 | while (work) { |
64 | Reg r = rset_picktop(work); | 64 | Reg r = rset_picktop(work); |
65 | IRRef ref = regcost_ref(as->cost[r]); | 65 | IRRef ref = regcost_ref(as->cost[r]); |
66 | lua_assert(r != t); | 66 | lj_assertA(r != rd, "dest reg %d not free", rd); |
67 | if (ref < ASMREF_L) { | 67 | if (ref < ASMREF_L) { |
68 | int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); | 68 | int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); |
69 | if (checki16(delta)) { | 69 | if (checki16(delta)) { |
70 | emit_tai(as, PPCI_ADDI, t, r, delta); | 70 | emit_tai(as, PPCI_ADDI, rd, r, delta); |
71 | return 1; | 71 | return 1; |
72 | } | 72 | } |
73 | } | 73 | } |
@@ -98,7 +98,7 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) | |||
98 | 98 | ||
99 | #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) | 99 | #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) |
100 | 100 | ||
101 | static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); | 101 | static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); |
102 | 102 | ||
103 | /* Get/set from constant pointer. */ | 103 | /* Get/set from constant pointer. */ |
104 | static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) | 104 | static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) |
@@ -115,8 +115,8 @@ static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) | |||
115 | emit_tai(as, pi, r, base, i); | 115 | emit_tai(as, pi, r, base, i); |
116 | } | 116 | } |
117 | 117 | ||
118 | #define emit_loadn(as, r, tv) \ | 118 | #define emit_loadk64(as, r, ir) \ |
119 | emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)(tv), RSET_GPR) | 119 | emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) |
120 | 120 | ||
121 | /* Get/set global_State fields. */ | 121 | /* Get/set global_State fields. */ |
122 | static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs) | 122 | static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs) |
@@ -144,7 +144,7 @@ static void emit_condbranch(ASMState *as, PPCIns pi, PPCCC cc, MCode *target) | |||
144 | { | 144 | { |
145 | MCode *p = --as->mcp; | 145 | MCode *p = --as->mcp; |
146 | ptrdiff_t delta = (char *)target - (char *)p; | 146 | ptrdiff_t delta = (char *)target - (char *)p; |
147 | lua_assert(((delta + 0x8000) >> 16) == 0); | 147 | lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range"); |
148 | pi ^= (delta & 0x8000) * (PPCF_Y/0x8000); | 148 | pi ^= (delta & 0x8000) * (PPCF_Y/0x8000); |
149 | *p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu); | 149 | *p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu); |
150 | } | 150 | } |
@@ -186,22 +186,22 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | |||
186 | emit_fb(as, PPCI_FMR, dst, src); | 186 | emit_fb(as, PPCI_FMR, dst, src); |
187 | } | 187 | } |
188 | 188 | ||
189 | /* Generic load of register from stack slot. */ | 189 | /* Generic load of register with base and (small) offset address. */ |
190 | static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 190 | static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) |
191 | { | 191 | { |
192 | if (r < RID_MAX_GPR) | 192 | if (r < RID_MAX_GPR) |
193 | emit_tai(as, PPCI_LWZ, r, RID_SP, ofs); | 193 | emit_tai(as, PPCI_LWZ, r, base, ofs); |
194 | else | 194 | else |
195 | emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, RID_SP, ofs); | 195 | emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, base, ofs); |
196 | } | 196 | } |
197 | 197 | ||
198 | /* Generic store of register to stack slot. */ | 198 | /* Generic store of register with base and (small) offset address. */ |
199 | static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 199 | static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) |
200 | { | 200 | { |
201 | if (r < RID_MAX_GPR) | 201 | if (r < RID_MAX_GPR) |
202 | emit_tai(as, PPCI_STW, r, RID_SP, ofs); | 202 | emit_tai(as, PPCI_STW, r, base, ofs); |
203 | else | 203 | else |
204 | emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, RID_SP, ofs); | 204 | emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, base, ofs); |
205 | } | 205 | } |
206 | 206 | ||
207 | /* Emit a compare (for equality) with a constant operand. */ | 207 | /* Emit a compare (for equality) with a constant operand. */ |
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index d8b4b8a0..3d3beda3 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h | |||
@@ -13,10 +13,17 @@ | |||
13 | if (rex != 0x40) *--(p) = rex; } | 13 | if (rex != 0x40) *--(p) = rex; } |
14 | #define FORCE_REX 0x200 | 14 | #define FORCE_REX 0x200 |
15 | #define REX_64 (FORCE_REX|0x080000) | 15 | #define REX_64 (FORCE_REX|0x080000) |
16 | #define VEX_64 0x800000 | ||
16 | #else | 17 | #else |
17 | #define REXRB(p, rr, rb) ((void)0) | 18 | #define REXRB(p, rr, rb) ((void)0) |
18 | #define FORCE_REX 0 | 19 | #define FORCE_REX 0 |
19 | #define REX_64 0 | 20 | #define REX_64 0 |
21 | #define VEX_64 0 | ||
22 | #endif | ||
23 | #if LJ_GC64 | ||
24 | #define REX_GC64 REX_64 | ||
25 | #else | ||
26 | #define REX_GC64 0 | ||
20 | #endif | 27 | #endif |
21 | 28 | ||
22 | #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) | 29 | #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) |
@@ -31,7 +38,14 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, | |||
31 | MCode *p, int delta) | 38 | MCode *p, int delta) |
32 | { | 39 | { |
33 | int n = (int8_t)xo; | 40 | int n = (int8_t)xo; |
34 | #if defined(__GNUC__) | 41 | if (n == -60) { /* VEX-encoded instruction */ |
42 | #if LJ_64 | ||
43 | xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13; | ||
44 | #endif | ||
45 | *(uint32_t *)(p+delta-5) = (uint32_t)xo; | ||
46 | return p+delta-5; | ||
47 | } | ||
48 | #if defined(__GNUC__) || defined(__clang__) | ||
35 | if (__builtin_constant_p(xo) && n == -2) | 49 | if (__builtin_constant_p(xo) && n == -2) |
36 | p[delta-2] = (MCode)(xo >> 24); | 50 | p[delta-2] = (MCode)(xo >> 24); |
37 | else if (__builtin_constant_p(xo) && n == -3) | 51 | else if (__builtin_constant_p(xo) && n == -3) |
@@ -78,33 +92,24 @@ static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2) | |||
78 | /* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */ | 92 | /* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */ |
79 | static int32_t ptr2addr(const void *p) | 93 | static int32_t ptr2addr(const void *p) |
80 | { | 94 | { |
81 | lua_assert((uintptr_t)p < (uintptr_t)0x80000000); | 95 | lj_assertX((uintptr_t)p < (uintptr_t)0x80000000, "pointer outside 2G range"); |
82 | return i32ptr(p); | 96 | return i32ptr(p); |
83 | } | 97 | } |
84 | #else | 98 | #else |
85 | #define ptr2addr(p) (i32ptr((p))) | 99 | #define ptr2addr(p) (i32ptr((p))) |
86 | #endif | 100 | #endif |
87 | 101 | ||
88 | /* op r, [addr] */ | ||
89 | static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) | ||
90 | { | ||
91 | MCode *p = as->mcp; | ||
92 | *(int32_t *)(p-4) = ptr2addr(addr); | ||
93 | #if LJ_64 | ||
94 | p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); | ||
95 | as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); | ||
96 | #else | ||
97 | as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); | ||
98 | #endif | ||
99 | } | ||
100 | |||
101 | /* op r, [base+ofs] */ | 102 | /* op r, [base+ofs] */ |
102 | static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) | 103 | static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) |
103 | { | 104 | { |
104 | MCode *p = as->mcp; | 105 | MCode *p = as->mcp; |
105 | x86Mode mode; | 106 | x86Mode mode; |
106 | if (ra_hasreg(rb)) { | 107 | if (ra_hasreg(rb)) { |
107 | if (ofs == 0 && (rb&7) != RID_EBP) { | 108 | if (LJ_GC64 && rb == RID_RIP) { |
109 | mode = XM_OFS0; | ||
110 | p -= 4; | ||
111 | *(int32_t *)p = ofs; | ||
112 | } else if (ofs == 0 && (rb&7) != RID_EBP) { | ||
108 | mode = XM_OFS0; | 113 | mode = XM_OFS0; |
109 | } else if (checki8(ofs)) { | 114 | } else if (checki8(ofs)) { |
110 | *--p = (MCode)ofs; | 115 | *--p = (MCode)ofs; |
@@ -202,6 +207,11 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb) | |||
202 | *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); | 207 | *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); |
203 | rb = RID_ESP; | 208 | rb = RID_ESP; |
204 | #endif | 209 | #endif |
210 | } else if (LJ_GC64 && rb == RID_RIP) { | ||
211 | lj_assertA(as->mrm.idx == RID_NONE, "RIP-rel mrm cannot have index"); | ||
212 | mode = XM_OFS0; | ||
213 | p -= 4; | ||
214 | *(int32_t *)p = as->mrm.ofs; | ||
205 | } else { | 215 | } else { |
206 | if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { | 216 | if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { |
207 | mode = XM_OFS0; | 217 | mode = XM_OFS0; |
@@ -241,10 +251,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i) | |||
241 | 251 | ||
242 | /* -- Emit loads/stores --------------------------------------------------- */ | 252 | /* -- Emit loads/stores --------------------------------------------------- */ |
243 | 253 | ||
244 | /* Instruction selection for XMM moves. */ | ||
245 | #define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS) | ||
246 | #define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD) | ||
247 | |||
248 | /* mov [base+ofs], i */ | 254 | /* mov [base+ofs], i */ |
249 | static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) | 255 | static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) |
250 | { | 256 | { |
@@ -259,8 +265,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) | |||
259 | /* Get/set global_State fields. */ | 265 | /* Get/set global_State fields. */ |
260 | #define emit_opgl(as, xo, r, field) \ | 266 | #define emit_opgl(as, xo, r, field) \ |
261 | emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) | 267 | emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) |
262 | #define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field) | 268 | #define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field) |
263 | #define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field) | 269 | #define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field) |
264 | 270 | ||
265 | #define emit_setvmstate(as, i) \ | 271 | #define emit_setvmstate(as, i) \ |
266 | (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) | 272 | (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) |
@@ -285,9 +291,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) | |||
285 | } | 291 | } |
286 | } | 292 | } |
287 | 293 | ||
294 | #if LJ_GC64 | ||
295 | #define dispofs(as, k) \ | ||
296 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch)) | ||
297 | #define mcpofs(as, k) \ | ||
298 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp)) | ||
299 | #define mctopofs(as, k) \ | ||
300 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop)) | ||
301 | /* mov r, addr */ | ||
302 | #define emit_loada(as, r, addr) \ | ||
303 | emit_loadu64(as, (r), (uintptr_t)(addr)) | ||
304 | #else | ||
288 | /* mov r, addr */ | 305 | /* mov r, addr */ |
289 | #define emit_loada(as, r, addr) \ | 306 | #define emit_loada(as, r, addr) \ |
290 | emit_loadi(as, (r), ptr2addr((addr))) | 307 | emit_loadi(as, (r), ptr2addr((addr))) |
308 | #endif | ||
291 | 309 | ||
292 | #if LJ_64 | 310 | #if LJ_64 |
293 | /* mov r, imm64 or shorter 32 bit extended load. */ | 311 | /* mov r, imm64 or shorter 32 bit extended load. */ |
@@ -299,6 +317,15 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) | |||
299 | MCode *p = as->mcp; | 317 | MCode *p = as->mcp; |
300 | *(int32_t *)(p-4) = (int32_t)u64; | 318 | *(int32_t *)(p-4) = (int32_t)u64; |
301 | as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); | 319 | as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); |
320 | #if LJ_GC64 | ||
321 | } else if (checki32(dispofs(as, u64))) { | ||
322 | emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64)); | ||
323 | } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) { | ||
324 | /* Since as->realign assumes the code size doesn't change, check | ||
325 | ** RIP-relative addressing reachability for both as->mcp and as->mctop. | ||
326 | */ | ||
327 | emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64)); | ||
328 | #endif | ||
302 | } else { /* Full-size 64 bit load. */ | 329 | } else { /* Full-size 64 bit load. */ |
303 | MCode *p = as->mcp; | 330 | MCode *p = as->mcp; |
304 | *(uint64_t *)(p-8) = u64; | 331 | *(uint64_t *)(p-8) = u64; |
@@ -310,13 +337,90 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) | |||
310 | } | 337 | } |
311 | #endif | 338 | #endif |
312 | 339 | ||
313 | /* movsd r, [&tv->n] / xorps r, r */ | 340 | /* op r, [addr] */ |
314 | static void emit_loadn(ASMState *as, Reg r, cTValue *tv) | 341 | static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) |
315 | { | 342 | { |
316 | if (tvispzero(tv)) /* Use xor only for +0. */ | 343 | #if LJ_GC64 |
317 | emit_rr(as, XO_XORPS, r, r); | 344 | if (checki32(dispofs(as, addr))) { |
318 | else | 345 | emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr)); |
319 | emit_rma(as, XMM_MOVRM(as), r, &tv->n); | 346 | } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) { |
347 | emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr)); | ||
348 | } else if (!checki32((intptr_t)addr)) { | ||
349 | Reg ra = (rr & 15); | ||
350 | if (xo != XO_MOV) { | ||
351 | /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */ | ||
352 | uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch; | ||
353 | uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0; | ||
354 | ra = RID_DISPATCH; | ||
355 | if (checku32(dispaddr)) { | ||
356 | emit_loadi(as, ra, (int32_t)dispaddr); | ||
357 | } else { /* Full-size 64 bit load. */ | ||
358 | MCode *p = as->mcp; | ||
359 | *(uint64_t *)(p-8) = dispaddr; | ||
360 | p[-9] = (MCode)(XI_MOVri+(ra&7)); | ||
361 | p[-10] = 0x48 + ((ra>>3)&1); | ||
362 | p -= 10; | ||
363 | as->mcp = p; | ||
364 | } | ||
365 | if (xo == XO_GROUP3b) emit_i8(as, i8); | ||
366 | } | ||
367 | emit_rmro(as, xo, rr, ra, 0); | ||
368 | emit_loadu64(as, ra, (uintptr_t)addr); | ||
369 | } else | ||
370 | #endif | ||
371 | { | ||
372 | MCode *p = as->mcp; | ||
373 | *(int32_t *)(p-4) = ptr2addr(addr); | ||
374 | #if LJ_64 | ||
375 | p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); | ||
376 | as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); | ||
377 | #else | ||
378 | as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); | ||
379 | #endif | ||
380 | } | ||
381 | } | ||
382 | |||
383 | /* Load 64 bit IR constant into register. */ | ||
384 | static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) | ||
385 | { | ||
386 | Reg r64; | ||
387 | x86Op xo; | ||
388 | const uint64_t *k = &ir_k64(ir)->u64; | ||
389 | if (rset_test(RSET_FPR, r)) { | ||
390 | r64 = r; | ||
391 | xo = XO_MOVSD; | ||
392 | } else { | ||
393 | r64 = r | REX_64; | ||
394 | xo = XO_MOV; | ||
395 | } | ||
396 | if (*k == 0) { | ||
397 | emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r); | ||
398 | #if LJ_GC64 | ||
399 | } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) || | ||
400 | (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) { | ||
401 | emit_rma(as, xo, r64, k); | ||
402 | } else { | ||
403 | if (ir->i) { | ||
404 | lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i), | ||
405 | "bad interned 64 bit constant"); | ||
406 | } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) { | ||
407 | emit_loadu64(as, r, *k); | ||
408 | return; | ||
409 | } else { | ||
410 | /* If all else fails, add the FP constant at the MCode area bottom. */ | ||
411 | while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; | ||
412 | *(uint64_t *)as->mcbot = *k; | ||
413 | ir->i = (int32_t)(as->mctop - as->mcbot); | ||
414 | as->mcbot += 8; | ||
415 | as->mclim = as->mcbot + MCLIM_REDZONE; | ||
416 | lj_mcode_commitbot(as->J, as->mcbot); | ||
417 | } | ||
418 | emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i)); | ||
419 | #else | ||
420 | } else { | ||
421 | emit_rma(as, xo, r64, k); | ||
422 | #endif | ||
423 | } | ||
320 | } | 424 | } |
321 | 425 | ||
322 | /* -- Emit control-flow instructions -------------------------------------- */ | 426 | /* -- Emit control-flow instructions -------------------------------------- */ |
@@ -330,7 +434,7 @@ static void emit_sjmp(ASMState *as, MCLabel target) | |||
330 | { | 434 | { |
331 | MCode *p = as->mcp; | 435 | MCode *p = as->mcp; |
332 | ptrdiff_t delta = target - p; | 436 | ptrdiff_t delta = target - p; |
333 | lua_assert(delta == (int8_t)delta); | 437 | lj_assertA(delta == (int8_t)delta, "short jump target out of range"); |
334 | p[-1] = (MCode)(int8_t)delta; | 438 | p[-1] = (MCode)(int8_t)delta; |
335 | p[-2] = XI_JMPs; | 439 | p[-2] = XI_JMPs; |
336 | as->mcp = p - 2; | 440 | as->mcp = p - 2; |
@@ -342,7 +446,7 @@ static void emit_sjcc(ASMState *as, int cc, MCLabel target) | |||
342 | { | 446 | { |
343 | MCode *p = as->mcp; | 447 | MCode *p = as->mcp; |
344 | ptrdiff_t delta = target - p; | 448 | ptrdiff_t delta = target - p; |
345 | lua_assert(delta == (int8_t)delta); | 449 | lj_assertA(delta == (int8_t)delta, "short jump target out of range"); |
346 | p[-1] = (MCode)(int8_t)delta; | 450 | p[-1] = (MCode)(int8_t)delta; |
347 | p[-2] = (MCode)(XI_JCCs+(cc&15)); | 451 | p[-2] = (MCode)(XI_JCCs+(cc&15)); |
348 | as->mcp = p - 2; | 452 | as->mcp = p - 2; |
@@ -368,10 +472,11 @@ static void emit_sfixup(ASMState *as, MCLabel source) | |||
368 | #define emit_label(as) ((as)->mcp) | 472 | #define emit_label(as) ((as)->mcp) |
369 | 473 | ||
370 | /* Compute relative 32 bit offset for jump and call instructions. */ | 474 | /* Compute relative 32 bit offset for jump and call instructions. */ |
371 | static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target) | 475 | static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target) |
372 | { | 476 | { |
373 | ptrdiff_t delta = target - p; | 477 | ptrdiff_t delta = target - p; |
374 | lua_assert(delta == (int32_t)delta); | 478 | UNUSED(J); |
479 | lj_assertJ(delta == (int32_t)delta, "jump target out of range"); | ||
375 | return (int32_t)delta; | 480 | return (int32_t)delta; |
376 | } | 481 | } |
377 | 482 | ||
@@ -379,7 +484,7 @@ static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target) | |||
379 | static void emit_jcc(ASMState *as, int cc, MCode *target) | 484 | static void emit_jcc(ASMState *as, int cc, MCode *target) |
380 | { | 485 | { |
381 | MCode *p = as->mcp; | 486 | MCode *p = as->mcp; |
382 | *(int32_t *)(p-4) = jmprel(p, target); | 487 | *(int32_t *)(p-4) = jmprel(as->J, p, target); |
383 | p[-5] = (MCode)(XI_JCCn+(cc&15)); | 488 | p[-5] = (MCode)(XI_JCCn+(cc&15)); |
384 | p[-6] = 0x0f; | 489 | p[-6] = 0x0f; |
385 | as->mcp = p - 6; | 490 | as->mcp = p - 6; |
@@ -389,7 +494,7 @@ static void emit_jcc(ASMState *as, int cc, MCode *target) | |||
389 | static void emit_jmp(ASMState *as, MCode *target) | 494 | static void emit_jmp(ASMState *as, MCode *target) |
390 | { | 495 | { |
391 | MCode *p = as->mcp; | 496 | MCode *p = as->mcp; |
392 | *(int32_t *)(p-4) = jmprel(p, target); | 497 | *(int32_t *)(p-4) = jmprel(as->J, p, target); |
393 | p[-5] = XI_JMP; | 498 | p[-5] = XI_JMP; |
394 | as->mcp = p - 5; | 499 | as->mcp = p - 5; |
395 | } | 500 | } |
@@ -406,7 +511,7 @@ static void emit_call_(ASMState *as, MCode *target) | |||
406 | return; | 511 | return; |
407 | } | 512 | } |
408 | #endif | 513 | #endif |
409 | *(int32_t *)(p-4) = jmprel(p, target); | 514 | *(int32_t *)(p-4) = jmprel(as->J, p, target); |
410 | p[-5] = XI_CALL; | 515 | p[-5] = XI_CALL; |
411 | as->mcp = p - 5; | 516 | as->mcp = p - 5; |
412 | } | 517 | } |
@@ -418,8 +523,10 @@ static void emit_call_(ASMState *as, MCode *target) | |||
418 | /* Use 64 bit operations to handle 64 bit IR types. */ | 523 | /* Use 64 bit operations to handle 64 bit IR types. */ |
419 | #if LJ_64 | 524 | #if LJ_64 |
420 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) | 525 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) |
526 | #define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0)) | ||
421 | #else | 527 | #else |
422 | #define REX_64IR(ir, r) (r) | 528 | #define REX_64IR(ir, r) (r) |
529 | #define VEX_64IR(ir, r) (r) | ||
423 | #endif | 530 | #endif |
424 | 531 | ||
425 | /* Generic move between two regs. */ | 532 | /* Generic move between two regs. */ |
@@ -429,35 +536,32 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | |||
429 | if (dst < RID_MAX_GPR) | 536 | if (dst < RID_MAX_GPR) |
430 | emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); | 537 | emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); |
431 | else | 538 | else |
432 | emit_rr(as, XMM_MOVRR(as), dst, src); | 539 | emit_rr(as, XO_MOVAPS, dst, src); |
433 | } | 540 | } |
434 | 541 | ||
435 | /* Generic load of register from stack slot. */ | 542 | /* Generic load of register with base and (small) offset address. */ |
436 | static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 543 | static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) |
437 | { | 544 | { |
438 | if (r < RID_MAX_GPR) | 545 | if (r < RID_MAX_GPR) |
439 | emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); | 546 | emit_rmro(as, XO_MOV, REX_64IR(ir, r), base, ofs); |
440 | else | 547 | else |
441 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs); | 548 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, base, ofs); |
442 | } | 549 | } |
443 | 550 | ||
444 | /* Generic store of register to stack slot. */ | 551 | /* Generic store of register with base and (small) offset address. */ |
445 | static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 552 | static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) |
446 | { | 553 | { |
447 | if (r < RID_MAX_GPR) | 554 | if (r < RID_MAX_GPR) |
448 | emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, ofs); | 555 | emit_rmro(as, XO_MOVto, REX_64IR(ir, r), base, ofs); |
449 | else | 556 | else |
450 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, RID_ESP, ofs); | 557 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, base, ofs); |
451 | } | 558 | } |
452 | 559 | ||
453 | /* Add offset to pointer. */ | 560 | /* Add offset to pointer. */ |
454 | static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | 561 | static void emit_addptr(ASMState *as, Reg r, int32_t ofs) |
455 | { | 562 | { |
456 | if (ofs) { | 563 | if (ofs) { |
457 | if ((as->flags & JIT_F_LEA_AGU)) | 564 | emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); |
458 | emit_rmro(as, XO_LEA, r, r, ofs); | ||
459 | else | ||
460 | emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs); | ||
461 | } | 565 | } |
462 | } | 566 | } |
463 | 567 | ||
diff --git a/src/lj_err.c b/src/lj_err.c index b677b655..563c7706 100644 --- a/src/lj_err.c +++ b/src/lj_err.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include "lj_ff.h" | 16 | #include "lj_ff.h" |
17 | #include "lj_trace.h" | 17 | #include "lj_trace.h" |
18 | #include "lj_vm.h" | 18 | #include "lj_vm.h" |
19 | #include "lj_strfmt.h" | ||
19 | 20 | ||
20 | /* | 21 | /* |
21 | ** LuaJIT can either use internal or external frame unwinding: | 22 | ** LuaJIT can either use internal or external frame unwinding: |
@@ -28,12 +29,18 @@ | |||
28 | ** Pros and Cons: | 29 | ** Pros and Cons: |
29 | ** | 30 | ** |
30 | ** - EXT requires unwind tables for *all* functions on the C stack between | 31 | ** - EXT requires unwind tables for *all* functions on the C stack between |
31 | ** the pcall/catch and the error/throw. This is the default on x64, | 32 | ** the pcall/catch and the error/throw. C modules used by Lua code can |
32 | ** but needs to be manually enabled on x86/PPC for non-C++ code. | 33 | ** throw errors, so these need to have unwind tables, too. Transitively |
34 | ** this applies to all system libraries used by C modules -- at least | ||
35 | ** when they have callbacks which may throw an error. | ||
33 | ** | 36 | ** |
34 | ** - INT is faster when actually throwing errors (but this happens rarely). | 37 | ** - INT is faster when actually throwing errors, but this happens rarely. |
35 | ** Setting up error handlers is zero-cost in any case. | 38 | ** Setting up error handlers is zero-cost in any case. |
36 | ** | 39 | ** |
40 | ** - INT needs to save *all* callee-saved registers when entering the | ||
41 | ** interpreter. EXT only needs to save those actually used inside the | ||
42 | ** interpreter. JIT-compiled code may need to save some more. | ||
43 | ** | ||
37 | ** - EXT provides full interoperability with C++ exceptions. You can throw | 44 | ** - EXT provides full interoperability with C++ exceptions. You can throw |
38 | ** Lua errors or C++ exceptions through a mix of Lua frames and C++ frames. | 45 | ** Lua errors or C++ exceptions through a mix of Lua frames and C++ frames. |
39 | ** C++ destructors are called as needed. C++ exceptions caught by pcall | 46 | ** C++ destructors are called as needed. C++ exceptions caught by pcall |
@@ -45,27 +52,38 @@ | |||
45 | ** the wrapper function feature. Lua errors thrown through C++ frames | 52 | ** the wrapper function feature. Lua errors thrown through C++ frames |
46 | ** cannot be caught by C++ code and C++ destructors are not run. | 53 | ** cannot be caught by C++ code and C++ destructors are not run. |
47 | ** | 54 | ** |
48 | ** EXT is the default on x64 systems, INT is the default on all other systems. | 55 | ** - EXT can handle errors from internal helper functions that are called |
56 | ** from JIT-compiled code (except for Windows/x86 and 32 bit ARM). | ||
57 | ** INT has no choice but to call the panic handler, if this happens. | ||
58 | ** Note: this is mainly relevant for out-of-memory errors. | ||
59 | ** | ||
60 | ** EXT is the default on all systems where the toolchain produces unwind | ||
61 | ** tables by default (*). This is hard-coded and/or detected in src/Makefile. | ||
62 | ** You can thwart the detection with: TARGET_XCFLAGS=-DLUAJIT_UNWIND_INTERNAL | ||
63 | ** | ||
64 | ** INT is the default on all other systems. | ||
65 | ** | ||
66 | ** EXT can be manually enabled for toolchains that are able to produce | ||
67 | ** conforming unwind tables: | ||
68 | ** "TARGET_XCFLAGS=-funwind-tables -DLUAJIT_UNWIND_EXTERNAL" | ||
69 | ** As explained above, *all* C code used directly or indirectly by LuaJIT | ||
70 | ** must be compiled with -funwind-tables (or -fexceptions). C++ code must | ||
71 | ** *not* be compiled with -fno-exceptions. | ||
72 | ** | ||
73 | ** If you're unsure whether error handling inside the VM works correctly, | ||
74 | ** try running this and check whether it prints "OK": | ||
49 | ** | 75 | ** |
50 | ** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack | 76 | ** luajit -e "print(select(2, load('OK')):match('OK'))" |
51 | ** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled | ||
52 | ** with -funwind-tables (or -fexceptions). This includes LuaJIT itself (set | ||
53 | ** TARGET_CFLAGS), all of your C/Lua binding code, all loadable C modules | ||
54 | ** and all C libraries that have callbacks which may be used to call back | ||
55 | ** into Lua. C++ code must *not* be compiled with -fno-exceptions. | ||
56 | ** | 77 | ** |
57 | ** EXT cannot be enabled on WIN32 since system exceptions use code-driven SEH. | 78 | ** (*) Originally, toolchains only generated unwind tables for C++ code. For |
58 | ** EXT is mandatory on WIN64 since the calling convention has an abundance | 79 | ** interoperability reasons, this can be manually enabled for plain C code, |
59 | ** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15). | 80 | ** too (with -funwind-tables). With the introduction of the x64 architecture, |
60 | ** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4). | 81 | ** the corresponding POSIX and Windows ABIs mandated unwind tables for all |
82 | ** code. Over the following years most desktop and server platforms have | ||
83 | ** enabled unwind tables by default on all architectures. OTOH mobile and | ||
84 | ** embedded platforms do not consistently mandate unwind tables. | ||
61 | */ | 85 | */ |
62 | 86 | ||
63 | #if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND | ||
64 | #define LJ_UNWIND_EXT 1 | ||
65 | #elif LJ_TARGET_X64 && LJ_TARGET_WINDOWS | ||
66 | #define LJ_UNWIND_EXT 1 | ||
67 | #endif | ||
68 | |||
69 | /* -- Error messages ------------------------------------------------------ */ | 87 | /* -- Error messages ------------------------------------------------------ */ |
70 | 88 | ||
71 | /* Error message strings. */ | 89 | /* Error message strings. */ |
@@ -98,14 +116,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) | |||
98 | TValue *top = restorestack(L, -nres); | 116 | TValue *top = restorestack(L, -nres); |
99 | if (frame < top) { /* Frame reached? */ | 117 | if (frame < top) { /* Frame reached? */ |
100 | if (errcode) { | 118 | if (errcode) { |
101 | L->cframe = cframe_prev(cf); | ||
102 | L->base = frame+1; | 119 | L->base = frame+1; |
120 | L->cframe = cframe_prev(cf); | ||
103 | unwindstack(L, top); | 121 | unwindstack(L, top); |
104 | } | 122 | } |
105 | return cf; | 123 | return cf; |
106 | } | 124 | } |
107 | } | 125 | } |
108 | if (frame <= tvref(L->stack)) | 126 | if (frame <= tvref(L->stack)+LJ_FR2) |
109 | break; | 127 | break; |
110 | switch (frame_typep(frame)) { | 128 | switch (frame_typep(frame)) { |
111 | case FRAME_LUA: /* Lua frame. */ | 129 | case FRAME_LUA: /* Lua frame. */ |
@@ -113,14 +131,12 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) | |||
113 | frame = frame_prevl(frame); | 131 | frame = frame_prevl(frame); |
114 | break; | 132 | break; |
115 | case FRAME_C: /* C frame. */ | 133 | case FRAME_C: /* C frame. */ |
116 | #if LJ_HASFFI | ||
117 | unwind_c: | 134 | unwind_c: |
118 | #endif | ||
119 | #if LJ_UNWIND_EXT | 135 | #if LJ_UNWIND_EXT |
120 | if (errcode) { | 136 | if (errcode) { |
121 | L->cframe = cframe_prev(cf); | ||
122 | L->base = frame_prevd(frame) + 1; | 137 | L->base = frame_prevd(frame) + 1; |
123 | unwindstack(L, frame); | 138 | L->cframe = cframe_prev(cf); |
139 | unwindstack(L, frame - LJ_FR2); | ||
124 | } else if (cf != stopcf) { | 140 | } else if (cf != stopcf) { |
125 | cf = cframe_prev(cf); | 141 | cf = cframe_prev(cf); |
126 | frame = frame_prevd(frame); | 142 | frame = frame_prevd(frame); |
@@ -143,16 +159,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) | |||
143 | return cf; | 159 | return cf; |
144 | } | 160 | } |
145 | if (errcode) { | 161 | if (errcode) { |
146 | L->cframe = cframe_prev(cf); | ||
147 | L->base = frame_prevd(frame) + 1; | 162 | L->base = frame_prevd(frame) + 1; |
148 | unwindstack(L, frame); | 163 | L->cframe = cframe_prev(cf); |
164 | unwindstack(L, frame - LJ_FR2); | ||
149 | } | 165 | } |
150 | return cf; | 166 | return cf; |
151 | case FRAME_CONT: /* Continuation frame. */ | 167 | case FRAME_CONT: /* Continuation frame. */ |
152 | #if LJ_HASFFI | 168 | if (frame_iscont_fficb(frame)) |
153 | if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK) | ||
154 | goto unwind_c; | 169 | goto unwind_c; |
155 | #endif | ||
156 | /* fallthrough */ | 170 | /* fallthrough */ |
157 | case FRAME_VARG: /* Vararg frame. */ | 171 | case FRAME_VARG: /* Vararg frame. */ |
158 | frame = frame_prevd(frame); | 172 | frame = frame_prevd(frame); |
@@ -166,8 +180,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) | |||
166 | } | 180 | } |
167 | if (frame_typep(frame) == FRAME_PCALL) | 181 | if (frame_typep(frame) == FRAME_PCALL) |
168 | hook_leave(G(L)); | 182 | hook_leave(G(L)); |
169 | L->cframe = cf; | ||
170 | L->base = frame_prevd(frame) + 1; | 183 | L->base = frame_prevd(frame) + 1; |
184 | L->cframe = cf; | ||
171 | unwindstack(L, L->base); | 185 | unwindstack(L, L->base); |
172 | } | 186 | } |
173 | return (void *)((intptr_t)cf | CFRAME_UNWIND_FF); | 187 | return (void *)((intptr_t)cf | CFRAME_UNWIND_FF); |
@@ -175,8 +189,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) | |||
175 | } | 189 | } |
176 | /* No C frame. */ | 190 | /* No C frame. */ |
177 | if (errcode) { | 191 | if (errcode) { |
192 | L->base = tvref(L->stack)+1+LJ_FR2; | ||
178 | L->cframe = NULL; | 193 | L->cframe = NULL; |
179 | L->base = tvref(L->stack)+1; | ||
180 | unwindstack(L, L->base); | 194 | unwindstack(L, L->base); |
181 | if (G(L)->panic) | 195 | if (G(L)->panic) |
182 | G(L)->panic(L); | 196 | G(L)->panic(L); |
@@ -187,33 +201,206 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) | |||
187 | 201 | ||
188 | /* -- External frame unwinding -------------------------------------------- */ | 202 | /* -- External frame unwinding -------------------------------------------- */ |
189 | 203 | ||
190 | #if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_ABI_WIN | 204 | #if LJ_ABI_WIN |
191 | 205 | ||
192 | /* | 206 | /* |
193 | ** We have to use our own definitions instead of the mandatory (!) unwind.h, | 207 | ** Someone in Redmond owes me several days of my life. A lot of this is |
194 | ** since various OS, distros and compilers mess up the header installation. | 208 | ** undocumented or just plain wrong on MSDN. Some of it can be gathered |
209 | ** from 3rd party docs or must be found by trial-and-error. They really | ||
210 | ** don't want you to write your own language-specific exception handler | ||
211 | ** or to interact gracefully with MSVC. :-( | ||
212 | ** | ||
213 | ** Apparently MSVC doesn't call C++ destructors for foreign exceptions | ||
214 | ** unless you compile your C++ code with /EHa. Unfortunately this means | ||
215 | ** catch (...) also catches things like access violations. The use of | ||
216 | ** _set_se_translator doesn't really help, because it requires /EHa, too. | ||
195 | */ | 217 | */ |
196 | 218 | ||
197 | typedef struct _Unwind_Exception | 219 | #define WIN32_LEAN_AND_MEAN |
220 | #include <windows.h> | ||
221 | |||
222 | #if LJ_TARGET_X86 | ||
223 | typedef void *UndocumentedDispatcherContext; /* Unused on x86. */ | ||
224 | #else | ||
225 | /* Taken from: http://www.nynaeve.net/?p=99 */ | ||
226 | typedef struct UndocumentedDispatcherContext { | ||
227 | ULONG64 ControlPc; | ||
228 | ULONG64 ImageBase; | ||
229 | PRUNTIME_FUNCTION FunctionEntry; | ||
230 | ULONG64 EstablisherFrame; | ||
231 | ULONG64 TargetIp; | ||
232 | PCONTEXT ContextRecord; | ||
233 | void (*LanguageHandler)(void); | ||
234 | PVOID HandlerData; | ||
235 | PUNWIND_HISTORY_TABLE HistoryTable; | ||
236 | ULONG ScopeIndex; | ||
237 | ULONG Fill0; | ||
238 | } UndocumentedDispatcherContext; | ||
239 | #endif | ||
240 | |||
241 | /* Another wild guess. */ | ||
242 | extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow); | ||
243 | |||
244 | #if LJ_TARGET_X64 && defined(MINGW_SDK_INIT) | ||
245 | /* Workaround for broken MinGW64 declaration. */ | ||
246 | VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx"); | ||
247 | #define RtlUnwindEx RtlUnwindEx_FIXED | ||
248 | #endif | ||
249 | |||
250 | #define LJ_MSVC_EXCODE ((DWORD)0xe06d7363) | ||
251 | #define LJ_GCC_EXCODE ((DWORD)0x20474343) | ||
252 | |||
253 | #define LJ_EXCODE ((DWORD)0xe24c4a00) | ||
254 | #define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c)) | ||
255 | #define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) | ||
256 | #define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff)) | ||
257 | |||
258 | /* Windows exception handler for interpreter frame. */ | ||
259 | LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec, | ||
260 | void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) | ||
198 | { | 261 | { |
199 | uint64_t exclass; | 262 | #if LJ_TARGET_X86 |
200 | void (*excleanup)(int, struct _Unwind_Exception *); | 263 | void *cf = (char *)f - CFRAME_OFS_SEH; |
201 | uintptr_t p1, p2; | 264 | #else |
202 | } __attribute__((__aligned__)) _Unwind_Exception; | 265 | void *cf = f; |
266 | #endif | ||
267 | lua_State *L = cframe_L(cf); | ||
268 | int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? | ||
269 | LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; | ||
270 | if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */ | ||
271 | /* Unwind internal frames. */ | ||
272 | err_unwind(L, cf, errcode); | ||
273 | } else { | ||
274 | void *cf2 = err_unwind(L, cf, 0); | ||
275 | if (cf2) { /* We catch it, so start unwinding the upper frames. */ | ||
276 | if (rec->ExceptionCode == LJ_MSVC_EXCODE || | ||
277 | rec->ExceptionCode == LJ_GCC_EXCODE) { | ||
278 | #if !LJ_TARGET_CYGWIN | ||
279 | __DestructExceptionObject(rec, 1); | ||
280 | #endif | ||
281 | setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); | ||
282 | } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) { | ||
283 | /* Don't catch access violations etc. */ | ||
284 | return 1; /* ExceptionContinueSearch */ | ||
285 | } | ||
286 | #if LJ_TARGET_X86 | ||
287 | UNUSED(ctx); | ||
288 | UNUSED(dispatch); | ||
289 | /* Call all handlers for all lower C frames (including ourselves) again | ||
290 | ** with EH_UNWINDING set. Then call the specified function, passing cf | ||
291 | ** and errcode. | ||
292 | */ | ||
293 | lj_vm_rtlunwind(cf, (void *)rec, | ||
294 | (cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? | ||
295 | (void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode); | ||
296 | /* lj_vm_rtlunwind does not return. */ | ||
297 | #else | ||
298 | /* Unwind the stack and call all handlers for all lower C frames | ||
299 | ** (including ourselves) again with EH_UNWINDING set. Then set | ||
300 | ** stack pointer = cf, result = errcode and jump to the specified target. | ||
301 | */ | ||
302 | RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? | ||
303 | lj_vm_unwind_ff_eh : | ||
304 | lj_vm_unwind_c_eh), | ||
305 | rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable); | ||
306 | /* RtlUnwindEx should never return. */ | ||
307 | #endif | ||
308 | } | ||
309 | } | ||
310 | return 1; /* ExceptionContinueSearch */ | ||
311 | } | ||
312 | |||
313 | #if LJ_UNWIND_JIT | ||
314 | |||
315 | #if LJ_TARGET_X64 | ||
316 | #define CONTEXT_REG_PC Rip | ||
317 | #elif LJ_TARGET_ARM64 | ||
318 | #define CONTEXT_REG_PC Pc | ||
319 | #else | ||
320 | #error "NYI: Windows arch-specific unwinder for JIT-compiled code" | ||
321 | #endif | ||
322 | |||
323 | /* Windows unwinder for JIT-compiled code. */ | ||
324 | static void err_unwind_win_jit(global_State *g, int errcode) | ||
325 | { | ||
326 | CONTEXT ctx; | ||
327 | UNWIND_HISTORY_TABLE hist; | ||
328 | |||
329 | memset(&hist, 0, sizeof(hist)); | ||
330 | RtlCaptureContext(&ctx); | ||
331 | while (1) { | ||
332 | uintptr_t frame, base, addr = ctx.CONTEXT_REG_PC; | ||
333 | void *hdata; | ||
334 | PRUNTIME_FUNCTION func = RtlLookupFunctionEntry(addr, &base, &hist); | ||
335 | if (!func) { /* Found frame without .pdata: must be JIT-compiled code. */ | ||
336 | ExitNo exitno; | ||
337 | uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno); | ||
338 | if (stub) { /* Jump to side exit to unwind the trace. */ | ||
339 | ctx.CONTEXT_REG_PC = stub; | ||
340 | G2J(g)->exitcode = errcode; | ||
341 | RtlRestoreContext(&ctx, NULL); /* Does not return. */ | ||
342 | } | ||
343 | break; | ||
344 | } | ||
345 | RtlVirtualUnwind(UNW_FLAG_NHANDLER, base, addr, func, | ||
346 | &ctx, &hdata, &frame, NULL); | ||
347 | if (!addr) break; | ||
348 | } | ||
349 | /* Unwinding failed, if we end up here. */ | ||
350 | } | ||
351 | #endif | ||
352 | |||
353 | /* Raise Windows exception. */ | ||
354 | static void err_raise_ext(global_State *g, int errcode) | ||
355 | { | ||
356 | #if LJ_UNWIND_JIT | ||
357 | if (tvref(g->jit_base)) { | ||
358 | err_unwind_win_jit(g, errcode); | ||
359 | return; /* Unwinding failed. */ | ||
360 | } | ||
361 | #elif LJ_HASJIT | ||
362 | /* Cannot catch on-trace errors for Windows/x86 SEH. Unwind to interpreter. */ | ||
363 | setmref(g->jit_base, NULL); | ||
364 | #endif | ||
365 | UNUSED(g); | ||
366 | RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL); | ||
367 | } | ||
368 | |||
369 | #elif !LJ_NO_UNWIND && (defined(__GNUC__) || defined(__clang__)) | ||
370 | |||
371 | /* | ||
372 | ** We have to use our own definitions instead of the mandatory (!) unwind.h, | ||
373 | ** since various OS, distros and compilers mess up the header installation. | ||
374 | */ | ||
203 | 375 | ||
204 | typedef struct _Unwind_Context _Unwind_Context; | 376 | typedef struct _Unwind_Context _Unwind_Context; |
205 | 377 | ||
206 | #define _URC_OK 0 | 378 | #define _URC_OK 0 |
379 | #define _URC_FATAL_PHASE2_ERROR 2 | ||
207 | #define _URC_FATAL_PHASE1_ERROR 3 | 380 | #define _URC_FATAL_PHASE1_ERROR 3 |
208 | #define _URC_HANDLER_FOUND 6 | 381 | #define _URC_HANDLER_FOUND 6 |
209 | #define _URC_INSTALL_CONTEXT 7 | 382 | #define _URC_INSTALL_CONTEXT 7 |
210 | #define _URC_CONTINUE_UNWIND 8 | 383 | #define _URC_CONTINUE_UNWIND 8 |
211 | #define _URC_FAILURE 9 | 384 | #define _URC_FAILURE 9 |
212 | 385 | ||
386 | #define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */ | ||
387 | #define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c)) | ||
388 | #define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff) | ||
389 | #define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff)) | ||
390 | |||
213 | #if !LJ_TARGET_ARM | 391 | #if !LJ_TARGET_ARM |
214 | 392 | ||
393 | typedef struct _Unwind_Exception | ||
394 | { | ||
395 | uint64_t exclass; | ||
396 | void (*excleanup)(int, struct _Unwind_Exception *); | ||
397 | uintptr_t p1, p2; | ||
398 | } __attribute__((__aligned__)) _Unwind_Exception; | ||
399 | #define UNWIND_EXCEPTION_TYPE _Unwind_Exception | ||
400 | |||
215 | extern uintptr_t _Unwind_GetCFA(_Unwind_Context *); | 401 | extern uintptr_t _Unwind_GetCFA(_Unwind_Context *); |
216 | extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t); | 402 | extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t); |
403 | extern uintptr_t _Unwind_GetIP(_Unwind_Context *); | ||
217 | extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t); | 404 | extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t); |
218 | extern void _Unwind_DeleteException(_Unwind_Exception *); | 405 | extern void _Unwind_DeleteException(_Unwind_Exception *); |
219 | extern int _Unwind_RaiseException(_Unwind_Exception *); | 406 | extern int _Unwind_RaiseException(_Unwind_Exception *); |
@@ -223,11 +410,6 @@ extern int _Unwind_RaiseException(_Unwind_Exception *); | |||
223 | #define _UA_HANDLER_FRAME 4 | 410 | #define _UA_HANDLER_FRAME 4 |
224 | #define _UA_FORCE_UNWIND 8 | 411 | #define _UA_FORCE_UNWIND 8 |
225 | 412 | ||
226 | #define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */ | ||
227 | #define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c)) | ||
228 | #define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff) | ||
229 | #define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff)) | ||
230 | |||
231 | /* DWARF2 personality handler referenced from interpreter .eh_frame. */ | 413 | /* DWARF2 personality handler referenced from interpreter .eh_frame. */ |
232 | LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, | 414 | LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, |
233 | uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx) | 415 | uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx) |
@@ -236,7 +418,6 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, | |||
236 | lua_State *L; | 418 | lua_State *L; |
237 | if (version != 1) | 419 | if (version != 1) |
238 | return _URC_FATAL_PHASE1_ERROR; | 420 | return _URC_FATAL_PHASE1_ERROR; |
239 | UNUSED(uexclass); | ||
240 | cf = (void *)_Unwind_GetCFA(ctx); | 421 | cf = (void *)_Unwind_GetCFA(ctx); |
241 | L = cframe_L(cf); | 422 | L = cframe_L(cf); |
242 | if ((actions & _UA_SEARCH_PHASE)) { | 423 | if ((actions & _UA_SEARCH_PHASE)) { |
@@ -284,27 +465,162 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, | |||
284 | ** it on non-x64 because the interpreter restores all callee-saved regs. | 465 | ** it on non-x64 because the interpreter restores all callee-saved regs. |
285 | */ | 466 | */ |
286 | lj_err_throw(L, errcode); | 467 | lj_err_throw(L, errcode); |
468 | #if LJ_TARGET_X64 | ||
469 | #error "Broken build system -- only use the provided Makefiles!" | ||
470 | #endif | ||
287 | #endif | 471 | #endif |
288 | } | 472 | } |
289 | return _URC_CONTINUE_UNWIND; | 473 | return _URC_CONTINUE_UNWIND; |
290 | } | 474 | } |
291 | 475 | ||
292 | #if LJ_UNWIND_EXT | 476 | #if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT) |
293 | static __thread _Unwind_Exception static_uex; | 477 | struct dwarf_eh_bases { void *tbase, *dbase, *func; }; |
478 | extern const void *_Unwind_Find_FDE(void *pc, struct dwarf_eh_bases *bases); | ||
294 | 479 | ||
295 | /* Raise DWARF2 exception. */ | 480 | /* Verify that external error handling actually has a chance to work. */ |
296 | static void err_raise_ext(int errcode) | 481 | void lj_err_verify(void) |
297 | { | 482 | { |
298 | static_uex.exclass = LJ_UEXCLASS_MAKE(errcode); | 483 | #if !LJ_TARGET_OSX |
299 | static_uex.excleanup = NULL; | 484 | /* Check disabled on MacOS due to brilliant software engineering at Apple. */ |
300 | _Unwind_RaiseException(&static_uex); | 485 | struct dwarf_eh_bases ehb; |
486 | lj_assertX(_Unwind_Find_FDE((void *)lj_err_throw, &ehb), "broken build: external frame unwinding enabled, but missing -funwind-tables"); | ||
487 | #endif | ||
488 | /* Check disabled, because of broken Fedora/ARM64. See #722. | ||
489 | lj_assertX(_Unwind_Find_FDE((void *)_Unwind_RaiseException, &ehb), "broken build: external frame unwinding enabled, but system libraries have no unwind tables"); | ||
490 | */ | ||
301 | } | 491 | } |
302 | #endif | 492 | #endif |
303 | 493 | ||
494 | #if LJ_UNWIND_JIT | ||
495 | /* DWARF2 personality handler for JIT-compiled code. */ | ||
496 | static int err_unwind_jit(int version, int actions, | ||
497 | uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx) | ||
498 | { | ||
499 | /* NYI: FFI C++ exception interoperability. */ | ||
500 | if (version != 1 || !LJ_UEXCLASS_CHECK(uexclass)) | ||
501 | return _URC_FATAL_PHASE1_ERROR; | ||
502 | if ((actions & _UA_SEARCH_PHASE)) { | ||
503 | return _URC_HANDLER_FOUND; | ||
504 | } | ||
505 | if ((actions & _UA_CLEANUP_PHASE)) { | ||
506 | global_State *g = *(global_State **)(uex+1); | ||
507 | ExitNo exitno; | ||
508 | uintptr_t addr = _Unwind_GetIP(ctx); /* Return address _after_ call. */ | ||
509 | uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno); | ||
510 | lj_assertG(tvref(g->jit_base), "unexpected throw across mcode frame"); | ||
511 | if (stub) { /* Jump to side exit to unwind the trace. */ | ||
512 | G2J(g)->exitcode = LJ_UEXCLASS_ERRCODE(uexclass); | ||
513 | #ifdef LJ_TARGET_MIPS | ||
514 | _Unwind_SetGR(ctx, 4, stub); | ||
515 | _Unwind_SetGR(ctx, 5, exitno); | ||
516 | _Unwind_SetIP(ctx, (uintptr_t)(void *)lj_vm_unwind_stub); | ||
517 | #else | ||
518 | _Unwind_SetIP(ctx, stub); | ||
519 | #endif | ||
520 | return _URC_INSTALL_CONTEXT; | ||
521 | } | ||
522 | return _URC_FATAL_PHASE2_ERROR; | ||
523 | } | ||
524 | return _URC_FATAL_PHASE1_ERROR; | ||
525 | } | ||
526 | |||
527 | /* DWARF2 template frame info for JIT-compiled code. | ||
528 | ** | ||
529 | ** After copying the template to the start of the mcode segment, | ||
530 | ** the frame handler function and the code size is patched. | ||
531 | ** The frame handler always installs a new context to jump to the exit, | ||
532 | ** so don't bother to add any unwind opcodes. | ||
533 | */ | ||
534 | static const uint8_t err_frame_jit_template[] = { | ||
535 | #if LJ_BE | ||
536 | 0,0,0, | ||
537 | #endif | ||
538 | LJ_64 ? 0x1c : 0x14, /* CIE length. */ | ||
539 | #if LJ_LE | ||
540 | 0,0,0, | ||
541 | #endif | ||
542 | 0,0,0,0, 1, 'z','P','R',0, /* CIE mark, CIE version, augmentation. */ | ||
543 | 1, LJ_64 ? 0x78 : 0x7c, LJ_TARGET_EHRAREG, /* Code/data align, RA. */ | ||
544 | #if LJ_64 | ||
545 | 10, 0, 0,0,0,0,0,0,0,0, 0x1b, /* Aug. data ABS handler, PCREL|SDATA4 code. */ | ||
546 | 0,0,0,0,0, /* Alignment. */ | ||
547 | #else | ||
548 | 6, 0, 0,0,0,0, 0x1b, /* Aug. data ABS handler, PCREL|SDATA4 code. */ | ||
549 | 0, /* Alignment. */ | ||
550 | #endif | ||
551 | #if LJ_BE | ||
552 | 0,0,0, | ||
553 | #endif | ||
554 | LJ_64 ? 0x14 : 0x10, /* FDE length. */ | ||
555 | 0,0,0, | ||
556 | LJ_64 ? 0x24 : 0x1c, /* CIE offset. */ | ||
557 | 0,0,0, | ||
558 | LJ_64 ? 0x14 : 0x10, /* Code offset. After Final FDE. */ | ||
559 | #if LJ_LE | ||
560 | 0,0,0, | ||
561 | #endif | ||
562 | 0,0,0,0, 0, 0,0,0, /* Code size, augmentation length, alignment. */ | ||
563 | #if LJ_64 | ||
564 | 0,0,0,0, /* Alignment. */ | ||
565 | #endif | ||
566 | 0,0,0,0 /* Final FDE. */ | ||
567 | }; | ||
568 | |||
569 | #define ERR_FRAME_JIT_OFS_HANDLER 0x12 | ||
570 | #define ERR_FRAME_JIT_OFS_FDE (LJ_64 ? 0x20 : 0x18) | ||
571 | #define ERR_FRAME_JIT_OFS_CODE_SIZE (LJ_64 ? 0x2c : 0x24) | ||
572 | #if LJ_TARGET_OSX | ||
573 | #define ERR_FRAME_JIT_OFS_REGISTER ERR_FRAME_JIT_OFS_FDE | ||
304 | #else | 574 | #else |
575 | #define ERR_FRAME_JIT_OFS_REGISTER 0 | ||
576 | #endif | ||
577 | |||
578 | extern void __register_frame(const void *); | ||
579 | extern void __deregister_frame(const void *); | ||
580 | |||
581 | uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info) | ||
582 | { | ||
583 | void **handler; | ||
584 | memcpy(info, err_frame_jit_template, sizeof(err_frame_jit_template)); | ||
585 | handler = (void *)err_unwind_jit; | ||
586 | memcpy(info + ERR_FRAME_JIT_OFS_HANDLER, &handler, sizeof(handler)); | ||
587 | *(uint32_t *)(info + ERR_FRAME_JIT_OFS_CODE_SIZE) = | ||
588 | (uint32_t)(sz - sizeof(err_frame_jit_template) - (info - (uint8_t *)base)); | ||
589 | __register_frame(info + ERR_FRAME_JIT_OFS_REGISTER); | ||
590 | #ifdef LUA_USE_ASSERT | ||
591 | { | ||
592 | struct dwarf_eh_bases ehb; | ||
593 | lj_assertX(_Unwind_Find_FDE(info + sizeof(err_frame_jit_template)+1, &ehb), | ||
594 | "bad JIT unwind table registration"); | ||
595 | } | ||
596 | #endif | ||
597 | return info + sizeof(err_frame_jit_template); | ||
598 | } | ||
599 | |||
600 | void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info) | ||
601 | { | ||
602 | UNUSED(base); UNUSED(sz); | ||
603 | __deregister_frame(info + ERR_FRAME_JIT_OFS_REGISTER); | ||
604 | } | ||
605 | #endif | ||
606 | |||
607 | #else /* LJ_TARGET_ARM */ | ||
608 | |||
609 | #define _US_VIRTUAL_UNWIND_FRAME 0 | ||
610 | #define _US_UNWIND_FRAME_STARTING 1 | ||
611 | #define _US_ACTION_MASK 3 | ||
612 | #define _US_FORCE_UNWIND 8 | ||
613 | |||
614 | typedef struct _Unwind_Control_Block _Unwind_Control_Block; | ||
615 | #define UNWIND_EXCEPTION_TYPE _Unwind_Control_Block | ||
616 | |||
617 | struct _Unwind_Control_Block { | ||
618 | uint64_t exclass; | ||
619 | uint32_t misc[20]; | ||
620 | }; | ||
305 | 621 | ||
306 | extern void _Unwind_DeleteException(void *); | 622 | extern int _Unwind_RaiseException(_Unwind_Control_Block *); |
307 | extern int __gnu_unwind_frame (void *, _Unwind_Context *); | 623 | extern int __gnu_unwind_frame(_Unwind_Control_Block *, _Unwind_Context *); |
308 | extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *); | 624 | extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *); |
309 | extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *); | 625 | extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *); |
310 | 626 | ||
@@ -320,126 +636,98 @@ static inline void _Unwind_SetGR(_Unwind_Context *ctx, int r, uint32_t v) | |||
320 | _Unwind_VRS_Set(ctx, 0, r, 0, &v); | 636 | _Unwind_VRS_Set(ctx, 0, r, 0, &v); |
321 | } | 637 | } |
322 | 638 | ||
323 | #define _US_VIRTUAL_UNWIND_FRAME 0 | 639 | extern void lj_vm_unwind_ext(void); |
324 | #define _US_UNWIND_FRAME_STARTING 1 | ||
325 | #define _US_ACTION_MASK 3 | ||
326 | #define _US_FORCE_UNWIND 8 | ||
327 | 640 | ||
328 | /* ARM unwinder personality handler referenced from interpreter .ARM.extab. */ | 641 | /* ARM unwinder personality handler referenced from interpreter .ARM.extab. */ |
329 | LJ_FUNCA int lj_err_unwind_arm(int state, void *ucb, _Unwind_Context *ctx) | 642 | LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb, |
643 | _Unwind_Context *ctx) | ||
330 | { | 644 | { |
331 | void *cf = (void *)_Unwind_GetGR(ctx, 13); | 645 | void *cf = (void *)_Unwind_GetGR(ctx, 13); |
332 | lua_State *L = cframe_L(cf); | 646 | lua_State *L = cframe_L(cf); |
333 | if ((state & _US_ACTION_MASK) == _US_VIRTUAL_UNWIND_FRAME) { | 647 | int errcode; |
334 | setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); | 648 | |
649 | switch ((state & _US_ACTION_MASK)) { | ||
650 | case _US_VIRTUAL_UNWIND_FRAME: | ||
651 | if ((state & _US_FORCE_UNWIND)) break; | ||
335 | return _URC_HANDLER_FOUND; | 652 | return _URC_HANDLER_FOUND; |
336 | } | 653 | case _US_UNWIND_FRAME_STARTING: |
337 | if ((state&(_US_ACTION_MASK|_US_FORCE_UNWIND)) == _US_UNWIND_FRAME_STARTING) { | 654 | if (LJ_UEXCLASS_CHECK(ucb->exclass)) { |
338 | _Unwind_DeleteException(ucb); | 655 | errcode = LJ_UEXCLASS_ERRCODE(ucb->exclass); |
339 | _Unwind_SetGR(ctx, 15, (uint32_t)(void *)lj_err_throw); | 656 | } else { |
340 | _Unwind_SetGR(ctx, 0, (uint32_t)L); | 657 | errcode = LUA_ERRRUN; |
341 | _Unwind_SetGR(ctx, 1, (uint32_t)LUA_ERRRUN); | 658 | setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); |
659 | } | ||
660 | cf = err_unwind(L, cf, errcode); | ||
661 | if ((state & _US_FORCE_UNWIND) || cf == NULL) break; | ||
662 | _Unwind_SetGR(ctx, 15, (uint32_t)lj_vm_unwind_ext); | ||
663 | _Unwind_SetGR(ctx, 0, (uint32_t)ucb); | ||
664 | _Unwind_SetGR(ctx, 1, (uint32_t)errcode); | ||
665 | _Unwind_SetGR(ctx, 2, cframe_unwind_ff(cf) ? | ||
666 | (uint32_t)lj_vm_unwind_ff_eh : | ||
667 | (uint32_t)lj_vm_unwind_c_eh); | ||
342 | return _URC_INSTALL_CONTEXT; | 668 | return _URC_INSTALL_CONTEXT; |
669 | default: | ||
670 | return _URC_FAILURE; | ||
343 | } | 671 | } |
344 | if (__gnu_unwind_frame(ucb, ctx) != _URC_OK) | 672 | if (__gnu_unwind_frame(ucb, ctx) != _URC_OK) |
345 | return _URC_FAILURE; | 673 | return _URC_FAILURE; |
674 | #ifdef LUA_USE_ASSERT | ||
675 | /* We should never get here unless this is a forced unwind aka backtrace. */ | ||
676 | if (_Unwind_GetGR(ctx, 0) == 0xff33aa77) { | ||
677 | _Unwind_SetGR(ctx, 0, 0xff33aa88); | ||
678 | } | ||
679 | #endif | ||
346 | return _URC_CONTINUE_UNWIND; | 680 | return _URC_CONTINUE_UNWIND; |
347 | } | 681 | } |
348 | 682 | ||
349 | #endif | 683 | #if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT) |
684 | typedef int (*_Unwind_Trace_Fn)(_Unwind_Context *, void *); | ||
685 | extern int _Unwind_Backtrace(_Unwind_Trace_Fn, void *); | ||
686 | |||
687 | static int err_verify_bt(_Unwind_Context *ctx, int *got) | ||
688 | { | ||
689 | if (_Unwind_GetGR(ctx, 0) == 0xff33aa88) { *got = 2; } | ||
690 | else if (*got == 0) { *got = 1; _Unwind_SetGR(ctx, 0, 0xff33aa77); } | ||
691 | return _URC_OK; | ||
692 | } | ||
350 | 693 | ||
351 | #elif LJ_TARGET_X64 && LJ_ABI_WIN | 694 | /* Verify that external error handling actually has a chance to work. */ |
695 | void lj_err_verify(void) | ||
696 | { | ||
697 | int got = 0; | ||
698 | _Unwind_Backtrace((_Unwind_Trace_Fn)err_verify_bt, &got); | ||
699 | lj_assertX(got == 2, "broken build: external frame unwinding enabled, but missing -funwind-tables"); | ||
700 | } | ||
701 | #endif | ||
352 | 702 | ||
353 | /* | 703 | /* |
354 | ** Someone in Redmond owes me several days of my life. A lot of this is | 704 | ** Note: LJ_UNWIND_JIT is not implemented for 32 bit ARM. |
355 | ** undocumented or just plain wrong on MSDN. Some of it can be gathered | ||
356 | ** from 3rd party docs or must be found by trial-and-error. They really | ||
357 | ** don't want you to write your own language-specific exception handler | ||
358 | ** or to interact gracefully with MSVC. :-( | ||
359 | ** | 705 | ** |
360 | ** Apparently MSVC doesn't call C++ destructors for foreign exceptions | 706 | ** The quirky ARM unwind API doesn't have __register_frame(). |
361 | ** unless you compile your C++ code with /EHa. Unfortunately this means | 707 | ** A potential workaround might involve _Unwind_Backtrace. |
362 | ** catch (...) also catches things like access violations. The use of | 708 | ** But most 32 bit ARM targets don't qualify for LJ_UNWIND_EXT, anyway, |
363 | ** _set_se_translator doesn't really help, because it requires /EHa, too. | 709 | ** since they are built without unwind tables by default. |
364 | */ | 710 | */ |
365 | 711 | ||
366 | #define WIN32_LEAN_AND_MEAN | 712 | #endif /* LJ_TARGET_ARM */ |
367 | #include <windows.h> | ||
368 | 713 | ||
369 | /* Taken from: http://www.nynaeve.net/?p=99 */ | ||
370 | typedef struct UndocumentedDispatcherContext { | ||
371 | ULONG64 ControlPc; | ||
372 | ULONG64 ImageBase; | ||
373 | PRUNTIME_FUNCTION FunctionEntry; | ||
374 | ULONG64 EstablisherFrame; | ||
375 | ULONG64 TargetIp; | ||
376 | PCONTEXT ContextRecord; | ||
377 | void (*LanguageHandler)(void); | ||
378 | PVOID HandlerData; | ||
379 | PUNWIND_HISTORY_TABLE HistoryTable; | ||
380 | ULONG ScopeIndex; | ||
381 | ULONG Fill0; | ||
382 | } UndocumentedDispatcherContext; | ||
383 | |||
384 | /* Another wild guess. */ | ||
385 | extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow); | ||
386 | |||
387 | #ifdef MINGW_SDK_INIT | ||
388 | /* Workaround for broken MinGW64 declaration. */ | ||
389 | VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx"); | ||
390 | #define RtlUnwindEx RtlUnwindEx_FIXED | ||
391 | #endif | ||
392 | |||
393 | #define LJ_MSVC_EXCODE ((DWORD)0xe06d7363) | ||
394 | #define LJ_GCC_EXCODE ((DWORD)0x20474343) | ||
395 | 714 | ||
396 | #define LJ_EXCODE ((DWORD)0xe24c4a00) | 715 | #if LJ_UNWIND_EXT |
397 | #define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c)) | 716 | static __thread struct { |
398 | #define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) | 717 | UNWIND_EXCEPTION_TYPE ex; |
399 | #define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff)) | 718 | global_State *g; |
719 | } static_uex; | ||
400 | 720 | ||
401 | /* Win64 exception handler for interpreter frame. */ | 721 | /* Raise external exception. */ |
402 | LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec, | 722 | static void err_raise_ext(global_State *g, int errcode) |
403 | void *cf, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) | ||
404 | { | 723 | { |
405 | lua_State *L = cframe_L(cf); | 724 | memset(&static_uex, 0, sizeof(static_uex)); |
406 | int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? | 725 | static_uex.ex.exclass = LJ_UEXCLASS_MAKE(errcode); |
407 | LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; | 726 | static_uex.g = g; |
408 | if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */ | 727 | _Unwind_RaiseException(&static_uex.ex); |
409 | /* Unwind internal frames. */ | ||
410 | err_unwind(L, cf, errcode); | ||
411 | } else { | ||
412 | void *cf2 = err_unwind(L, cf, 0); | ||
413 | if (cf2) { /* We catch it, so start unwinding the upper frames. */ | ||
414 | if (rec->ExceptionCode == LJ_MSVC_EXCODE || | ||
415 | rec->ExceptionCode == LJ_GCC_EXCODE) { | ||
416 | #if LJ_TARGET_WINDOWS | ||
417 | __DestructExceptionObject(rec, 1); | ||
418 | #endif | ||
419 | setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); | ||
420 | } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) { | ||
421 | /* Don't catch access violations etc. */ | ||
422 | return ExceptionContinueSearch; | ||
423 | } | ||
424 | /* Unwind the stack and call all handlers for all lower C frames | ||
425 | ** (including ourselves) again with EH_UNWINDING set. Then set | ||
426 | ** rsp = cf, rax = errcode and jump to the specified target. | ||
427 | */ | ||
428 | RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? | ||
429 | lj_vm_unwind_ff_eh : | ||
430 | lj_vm_unwind_c_eh), | ||
431 | rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable); | ||
432 | /* RtlUnwindEx should never return. */ | ||
433 | } | ||
434 | } | ||
435 | return ExceptionContinueSearch; | ||
436 | } | 728 | } |
437 | 729 | ||
438 | /* Raise Windows exception. */ | 730 | #endif |
439 | static void err_raise_ext(int errcode) | ||
440 | { | ||
441 | RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL); | ||
442 | } | ||
443 | 731 | ||
444 | #endif | 732 | #endif |
445 | 733 | ||
@@ -450,22 +738,23 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode) | |||
450 | { | 738 | { |
451 | global_State *g = G(L); | 739 | global_State *g = G(L); |
452 | lj_trace_abort(g); | 740 | lj_trace_abort(g); |
453 | setgcrefnull(g->jit_L); | 741 | L->status = LUA_OK; |
454 | L->status = 0; | ||
455 | #if LJ_UNWIND_EXT | 742 | #if LJ_UNWIND_EXT |
456 | err_raise_ext(errcode); | 743 | err_raise_ext(g, errcode); |
457 | /* | 744 | /* |
458 | ** A return from this function signals a corrupt C stack that cannot be | 745 | ** A return from this function signals a corrupt C stack that cannot be |
459 | ** unwound. We have no choice but to call the panic function and exit. | 746 | ** unwound. We have no choice but to call the panic function and exit. |
460 | ** | 747 | ** |
461 | ** Usually this is caused by a C function without unwind information. | 748 | ** Usually this is caused by a C function without unwind information. |
462 | ** This should never happen on x64, but may happen if you've manually | 749 | ** This may happen if you've manually enabled LUAJIT_UNWIND_EXTERNAL |
463 | ** enabled LUAJIT_UNWIND_EXTERNAL and forgot to recompile *every* | 750 | ** and forgot to recompile *every* non-C++ file with -funwind-tables. |
464 | ** non-C++ file with -funwind-tables. | ||
465 | */ | 751 | */ |
466 | if (G(L)->panic) | 752 | if (G(L)->panic) |
467 | G(L)->panic(L); | 753 | G(L)->panic(L); |
468 | #else | 754 | #else |
755 | #if LJ_HASJIT | ||
756 | setmref(g->jit_base, NULL); | ||
757 | #endif | ||
469 | { | 758 | { |
470 | void *cf = err_unwind(L, NULL, errcode); | 759 | void *cf = err_unwind(L, NULL, errcode); |
471 | if (cframe_unwind_ff(cf)) | 760 | if (cframe_unwind_ff(cf)) |
@@ -495,7 +784,7 @@ LJ_NOINLINE void lj_err_mem(lua_State *L) | |||
495 | /* Find error function for runtime errors. Requires an extra stack traversal. */ | 784 | /* Find error function for runtime errors. Requires an extra stack traversal. */ |
496 | static ptrdiff_t finderrfunc(lua_State *L) | 785 | static ptrdiff_t finderrfunc(lua_State *L) |
497 | { | 786 | { |
498 | cTValue *frame = L->base-1, *bot = tvref(L->stack); | 787 | cTValue *frame = L->base-1, *bot = tvref(L->stack)+LJ_FR2; |
499 | void *cf = L->cframe; | 788 | void *cf = L->cframe; |
500 | while (frame > bot && cf) { | 789 | while (frame > bot && cf) { |
501 | while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */ | 790 | while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */ |
@@ -519,10 +808,8 @@ static ptrdiff_t finderrfunc(lua_State *L) | |||
519 | frame = frame_prevd(frame); | 808 | frame = frame_prevd(frame); |
520 | break; | 809 | break; |
521 | case FRAME_CONT: | 810 | case FRAME_CONT: |
522 | #if LJ_HASFFI | 811 | if (frame_iscont_fficb(frame)) |
523 | if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK) | ||
524 | cf = cframe_prev(cf); | 812 | cf = cframe_prev(cf); |
525 | #endif | ||
526 | frame = frame_prevd(frame); | 813 | frame = frame_prevd(frame); |
527 | break; | 814 | break; |
528 | case FRAME_CP: | 815 | case FRAME_CP: |
@@ -534,11 +821,11 @@ static ptrdiff_t finderrfunc(lua_State *L) | |||
534 | break; | 821 | break; |
535 | case FRAME_PCALL: | 822 | case FRAME_PCALL: |
536 | case FRAME_PCALLH: | 823 | case FRAME_PCALLH: |
537 | if (frame_ftsz(frame) >= (ptrdiff_t)(2*sizeof(TValue))) /* xpcall? */ | 824 | if (frame_func(frame_prevd(frame))->c.ffid == FF_xpcall) |
538 | return savestack(L, frame-1); /* Point to xpcall's errorfunc. */ | 825 | return savestack(L, frame_prevd(frame)+1); /* xpcall's errorfunc. */ |
539 | return 0; | 826 | return 0; |
540 | default: | 827 | default: |
541 | lua_assert(0); | 828 | lj_assertL(0, "bad frame type"); |
542 | return 0; | 829 | return 0; |
543 | } | 830 | } |
544 | } | 831 | } |
@@ -548,7 +835,7 @@ static ptrdiff_t finderrfunc(lua_State *L) | |||
548 | /* Runtime error. */ | 835 | /* Runtime error. */ |
549 | LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L) | 836 | LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L) |
550 | { | 837 | { |
551 | ptrdiff_t ef = finderrfunc(L); | 838 | ptrdiff_t ef = (LJ_HASJIT && tvref(G(L)->jit_base)) ? 0 : finderrfunc(L); |
552 | if (ef) { | 839 | if (ef) { |
553 | TValue *errfunc = restorestack(L, ef); | 840 | TValue *errfunc = restorestack(L, ef); |
554 | TValue *top = L->top; | 841 | TValue *top = L->top; |
@@ -558,14 +845,25 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L) | |||
558 | lj_err_throw(L, LUA_ERRERR); | 845 | lj_err_throw(L, LUA_ERRERR); |
559 | } | 846 | } |
560 | L->status = LUA_ERRERR; | 847 | L->status = LUA_ERRERR; |
561 | copyTV(L, top, top-1); | 848 | copyTV(L, top+LJ_FR2, top-1); |
562 | copyTV(L, top-1, errfunc); | 849 | copyTV(L, top-1, errfunc); |
850 | if (LJ_FR2) setnilV(top++); | ||
563 | L->top = top+1; | 851 | L->top = top+1; |
564 | lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */ | 852 | lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */ |
565 | } | 853 | } |
566 | lj_err_throw(L, LUA_ERRRUN); | 854 | lj_err_throw(L, LUA_ERRRUN); |
567 | } | 855 | } |
568 | 856 | ||
857 | #if LJ_HASJIT | ||
858 | LJ_NOINLINE void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode) | ||
859 | { | ||
860 | if (errcode == LUA_ERRRUN) | ||
861 | lj_err_run(L); | ||
862 | else | ||
863 | lj_err_throw(L, errcode); | ||
864 | } | ||
865 | #endif | ||
866 | |||
569 | /* Formatted runtime error message. */ | 867 | /* Formatted runtime error message. */ |
570 | LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...) | 868 | LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...) |
571 | { | 869 | { |
@@ -573,7 +871,7 @@ LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...) | |||
573 | va_list argp; | 871 | va_list argp; |
574 | va_start(argp, em); | 872 | va_start(argp, em); |
575 | if (curr_funcisL(L)) L->top = curr_topL(L); | 873 | if (curr_funcisL(L)) L->top = curr_topL(L); |
576 | msg = lj_str_pushvf(L, err2msg(em), argp); | 874 | msg = lj_strfmt_pushvf(L, err2msg(em), argp); |
577 | va_end(argp); | 875 | va_end(argp); |
578 | lj_debug_addloc(L, msg, L->base-1, NULL); | 876 | lj_debug_addloc(L, msg, L->base-1, NULL); |
579 | lj_err_run(L); | 877 | lj_err_run(L); |
@@ -591,11 +889,11 @@ LJ_NOINLINE void lj_err_lex(lua_State *L, GCstr *src, const char *tok, | |||
591 | { | 889 | { |
592 | char buff[LUA_IDSIZE]; | 890 | char buff[LUA_IDSIZE]; |
593 | const char *msg; | 891 | const char *msg; |
594 | lj_debug_shortname(buff, src); | 892 | lj_debug_shortname(buff, src, line); |
595 | msg = lj_str_pushvf(L, err2msg(em), argp); | 893 | msg = lj_strfmt_pushvf(L, err2msg(em), argp); |
596 | msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg); | 894 | msg = lj_strfmt_pushf(L, "%s:%d: %s", buff, line, msg); |
597 | if (tok) | 895 | if (tok) |
598 | lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok); | 896 | lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok); |
599 | lj_err_throw(L, LUA_ERRSYNTAX); | 897 | lj_err_throw(L, LUA_ERRSYNTAX); |
600 | } | 898 | } |
601 | 899 | ||
@@ -634,8 +932,9 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o) | |||
634 | const BCIns *pc = cframe_Lpc(L); | 932 | const BCIns *pc = cframe_Lpc(L); |
635 | if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) { | 933 | if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) { |
636 | const char *tname = lj_typename(o); | 934 | const char *tname = lj_typename(o); |
935 | setframe_gc(o, obj2gco(L), LJ_TTHREAD); | ||
936 | if (LJ_FR2) o++; | ||
637 | setframe_pc(o, pc); | 937 | setframe_pc(o, pc); |
638 | setframe_gc(o, obj2gco(L)); | ||
639 | L->top = L->base = o+1; | 938 | L->top = L->base = o+1; |
640 | err_msgv(L, LJ_ERR_BADCALL, tname); | 939 | err_msgv(L, LJ_ERR_BADCALL, tname); |
641 | } | 940 | } |
@@ -645,28 +944,27 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o) | |||
645 | /* Error in context of caller. */ | 944 | /* Error in context of caller. */ |
646 | LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg) | 945 | LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg) |
647 | { | 946 | { |
648 | TValue *frame = L->base-1; | 947 | TValue *frame = NULL, *pframe = NULL; |
649 | TValue *pframe = NULL; | 948 | if (!(LJ_HASJIT && tvref(G(L)->jit_base))) { |
650 | if (frame_islua(frame)) { | 949 | frame = L->base-1; |
651 | pframe = frame_prevl(frame); | 950 | if (frame_islua(frame)) { |
652 | } else if (frame_iscont(frame)) { | 951 | pframe = frame_prevl(frame); |
952 | } else if (frame_iscont(frame)) { | ||
953 | if (frame_iscont_fficb(frame)) { | ||
954 | pframe = frame; | ||
955 | frame = NULL; | ||
956 | } else { | ||
957 | pframe = frame_prevd(frame); | ||
653 | #if LJ_HASFFI | 958 | #if LJ_HASFFI |
654 | if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK) { | 959 | /* Remove frame for FFI metamethods. */ |
655 | pframe = frame; | 960 | if (frame_func(frame)->c.ffid >= FF_ffi_meta___index && |
656 | frame = NULL; | 961 | frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) { |
657 | } else | 962 | L->base = pframe+1; |
963 | L->top = frame; | ||
964 | setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame)); | ||
965 | } | ||
658 | #endif | 966 | #endif |
659 | { | ||
660 | pframe = frame_prevd(frame); | ||
661 | #if LJ_HASFFI | ||
662 | /* Remove frame for FFI metamethods. */ | ||
663 | if (frame_func(frame)->c.ffid >= FF_ffi_meta___index && | ||
664 | frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) { | ||
665 | L->base = pframe+1; | ||
666 | L->top = frame; | ||
667 | setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame)); | ||
668 | } | 967 | } |
669 | #endif | ||
670 | } | 968 | } |
671 | } | 969 | } |
672 | lj_debug_addloc(L, msg, pframe, frame); | 970 | lj_debug_addloc(L, msg, pframe, frame); |
@@ -679,7 +977,7 @@ LJ_NOINLINE void lj_err_callerv(lua_State *L, ErrMsg em, ...) | |||
679 | const char *msg; | 977 | const char *msg; |
680 | va_list argp; | 978 | va_list argp; |
681 | va_start(argp, em); | 979 | va_start(argp, em); |
682 | msg = lj_str_pushvf(L, err2msg(em), argp); | 980 | msg = lj_strfmt_pushvf(L, err2msg(em), argp); |
683 | va_end(argp); | 981 | va_end(argp); |
684 | lj_err_callermsg(L, msg); | 982 | lj_err_callermsg(L, msg); |
685 | } | 983 | } |
@@ -699,9 +997,9 @@ LJ_NORET LJ_NOINLINE static void err_argmsg(lua_State *L, int narg, | |||
699 | if (narg < 0 && narg > LUA_REGISTRYINDEX) | 997 | if (narg < 0 && narg > LUA_REGISTRYINDEX) |
700 | narg = (int)(L->top - L->base) + narg + 1; | 998 | narg = (int)(L->top - L->base) + narg + 1; |
701 | if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */ | 999 | if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */ |
702 | msg = lj_str_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg); | 1000 | msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg); |
703 | else | 1001 | else |
704 | msg = lj_str_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg); | 1002 | msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg); |
705 | lj_err_callermsg(L, msg); | 1003 | lj_err_callermsg(L, msg); |
706 | } | 1004 | } |
707 | 1005 | ||
@@ -711,7 +1009,7 @@ LJ_NOINLINE void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...) | |||
711 | const char *msg; | 1009 | const char *msg; |
712 | va_list argp; | 1010 | va_list argp; |
713 | va_start(argp, em); | 1011 | va_start(argp, em); |
714 | msg = lj_str_pushvf(L, err2msg(em), argp); | 1012 | msg = lj_strfmt_pushvf(L, err2msg(em), argp); |
715 | va_end(argp); | 1013 | va_end(argp); |
716 | err_argmsg(L, narg, msg); | 1014 | err_argmsg(L, narg, msg); |
717 | } | 1015 | } |
@@ -741,7 +1039,7 @@ LJ_NOINLINE void lj_err_argtype(lua_State *L, int narg, const char *xname) | |||
741 | TValue *o = narg < 0 ? L->top + narg : L->base + narg-1; | 1039 | TValue *o = narg < 0 ? L->top + narg : L->base + narg-1; |
742 | tname = o < L->top ? lj_typename(o) : lj_obj_typename[0]; | 1040 | tname = o < L->top ? lj_typename(o) : lj_obj_typename[0]; |
743 | } | 1041 | } |
744 | msg = lj_str_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname); | 1042 | msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname); |
745 | err_argmsg(L, narg, msg); | 1043 | err_argmsg(L, narg, msg); |
746 | } | 1044 | } |
747 | 1045 | ||
@@ -791,7 +1089,7 @@ LUALIB_API int luaL_error(lua_State *L, const char *fmt, ...) | |||
791 | const char *msg; | 1089 | const char *msg; |
792 | va_list argp; | 1090 | va_list argp; |
793 | va_start(argp, fmt); | 1091 | va_start(argp, fmt); |
794 | msg = lj_str_pushvf(L, fmt, argp); | 1092 | msg = lj_strfmt_pushvf(L, fmt, argp); |
795 | va_end(argp); | 1093 | va_end(argp); |
796 | lj_err_callermsg(L, msg); | 1094 | lj_err_callermsg(L, msg); |
797 | return 0; /* unreachable */ | 1095 | return 0; /* unreachable */ |
diff --git a/src/lj_err.h b/src/lj_err.h index 59253b58..bd4de9ae 100644 --- a/src/lj_err.h +++ b/src/lj_err.h | |||
@@ -23,7 +23,10 @@ LJ_DATA const char *lj_err_allmsg; | |||
23 | LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em); | 23 | LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em); |
24 | LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode); | 24 | LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode); |
25 | LJ_FUNC_NORET void lj_err_mem(lua_State *L); | 25 | LJ_FUNC_NORET void lj_err_mem(lua_State *L); |
26 | LJ_FUNCA_NORET void LJ_FASTCALL lj_err_run(lua_State *L); | 26 | LJ_FUNC_NORET void LJ_FASTCALL lj_err_run(lua_State *L); |
27 | #if LJ_HASJIT | ||
28 | LJ_FUNCA_NORET void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode); | ||
29 | #endif | ||
27 | LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em); | 30 | LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em); |
28 | LJ_FUNC_NORET void lj_err_lex(lua_State *L, GCstr *src, const char *tok, | 31 | LJ_FUNC_NORET void lj_err_lex(lua_State *L, GCstr *src, const char *tok, |
29 | BCLine line, ErrMsg em, va_list argp); | 32 | BCLine line, ErrMsg em, va_list argp); |
@@ -38,4 +41,18 @@ LJ_FUNC_NORET void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...); | |||
38 | LJ_FUNC_NORET void lj_err_argtype(lua_State *L, int narg, const char *xname); | 41 | LJ_FUNC_NORET void lj_err_argtype(lua_State *L, int narg, const char *xname); |
39 | LJ_FUNC_NORET void lj_err_argt(lua_State *L, int narg, int tt); | 42 | LJ_FUNC_NORET void lj_err_argt(lua_State *L, int narg, int tt); |
40 | 43 | ||
44 | #if LJ_UNWIND_JIT && !LJ_ABI_WIN | ||
45 | LJ_FUNC uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info); | ||
46 | LJ_FUNC void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info); | ||
47 | #else | ||
48 | #define lj_err_register_mcode(base, sz, info) (info) | ||
49 | #define lj_err_deregister_mcode(base, sz, info) UNUSED(base) | ||
50 | #endif | ||
51 | |||
52 | #if LJ_UNWIND_EXT && !LJ_ABI_WIN && defined(LUA_USE_ASSERT) | ||
53 | LJ_FUNC void lj_err_verify(void); | ||
54 | #else | ||
55 | #define lj_err_verify() ((void)0) | ||
56 | #endif | ||
57 | |||
41 | #endif | 58 | #endif |
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h index 2c26a4f1..2e5c776a 100644 --- a/src/lj_errmsg.h +++ b/src/lj_errmsg.h | |||
@@ -67,6 +67,7 @@ ERRDEF(PROTMT, "cannot change a protected metatable") | |||
67 | ERRDEF(UNPACK, "too many results to unpack") | 67 | ERRDEF(UNPACK, "too many results to unpack") |
68 | ERRDEF(RDRSTR, "reader function must return a string") | 68 | ERRDEF(RDRSTR, "reader function must return a string") |
69 | ERRDEF(PRTOSTR, LUA_QL("tostring") " must return a string to " LUA_QL("print")) | 69 | ERRDEF(PRTOSTR, LUA_QL("tostring") " must return a string to " LUA_QL("print")) |
70 | ERRDEF(NUMRNG, "number out of range") | ||
70 | ERRDEF(IDXRNG, "index out of range") | 71 | ERRDEF(IDXRNG, "index out of range") |
71 | ERRDEF(BASERNG, "base out of range") | 72 | ERRDEF(BASERNG, "base out of range") |
72 | ERRDEF(LVLRNG, "level out of range") | 73 | ERRDEF(LVLRNG, "level out of range") |
@@ -96,18 +97,12 @@ ERRDEF(STRPATX, "pattern too complex") | |||
96 | ERRDEF(STRCAPI, "invalid capture index") | 97 | ERRDEF(STRCAPI, "invalid capture index") |
97 | ERRDEF(STRCAPN, "too many captures") | 98 | ERRDEF(STRCAPN, "too many captures") |
98 | ERRDEF(STRCAPU, "unfinished capture") | 99 | ERRDEF(STRCAPU, "unfinished capture") |
99 | ERRDEF(STRFMTO, "invalid option " LUA_QL("%%%c") " to " LUA_QL("format")) | 100 | ERRDEF(STRFMT, "invalid option " LUA_QS " to " LUA_QL("format")) |
100 | ERRDEF(STRFMTR, "invalid format (repeated flags)") | ||
101 | ERRDEF(STRFMTW, "invalid format (width or precision too long)") | ||
102 | ERRDEF(STRGSRV, "invalid replacement value (a %s)") | 101 | ERRDEF(STRGSRV, "invalid replacement value (a %s)") |
103 | ERRDEF(BADMODN, "name conflict for module " LUA_QS) | 102 | ERRDEF(BADMODN, "name conflict for module " LUA_QS) |
104 | #if LJ_HASJIT | 103 | #if LJ_HASJIT |
105 | ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?") | 104 | ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?") |
106 | #if LJ_TARGET_X86ORX64 | ||
107 | ERRDEF(NOJIT, "JIT compiler disabled, CPU does not support SSE2") | ||
108 | #else | ||
109 | ERRDEF(NOJIT, "JIT compiler disabled") | 105 | ERRDEF(NOJIT, "JIT compiler disabled") |
110 | #endif | ||
111 | #elif defined(LJ_ARCH_NOJIT) | 106 | #elif defined(LJ_ARCH_NOJIT) |
112 | ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") | 107 | ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") |
113 | #else | 108 | #else |
@@ -118,7 +113,6 @@ ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS) | |||
118 | /* Lexer/parser errors. */ | 113 | /* Lexer/parser errors. */ |
119 | ERRDEF(XMODE, "attempt to load chunk with wrong mode") | 114 | ERRDEF(XMODE, "attempt to load chunk with wrong mode") |
120 | ERRDEF(XNEAR, "%s near " LUA_QS) | 115 | ERRDEF(XNEAR, "%s near " LUA_QS) |
121 | ERRDEF(XELEM, "lexical element too long") | ||
122 | ERRDEF(XLINES, "chunk has too many lines") | 116 | ERRDEF(XLINES, "chunk has too many lines") |
123 | ERRDEF(XLEVELS, "chunk has too many syntax levels") | 117 | ERRDEF(XLEVELS, "chunk has too many syntax levels") |
124 | ERRDEF(XNUMBER, "malformed number") | 118 | ERRDEF(XNUMBER, "malformed number") |
@@ -186,6 +180,19 @@ ERRDEF(FFI_NYIPACKBIT, "NYI: packed bit fields") | |||
186 | ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)") | 180 | ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)") |
187 | #endif | 181 | #endif |
188 | 182 | ||
183 | #if LJ_HASBUFFER | ||
184 | /* String buffer errors. */ | ||
185 | ERRDEF(BUFFER_SELF, "cannot put buffer into itself") | ||
186 | ERRDEF(BUFFER_BADOPT, "bad options table") | ||
187 | ERRDEF(BUFFER_BADENC, "cannot serialize " LUA_QS) | ||
188 | ERRDEF(BUFFER_BADDEC, "cannot deserialize tag 0x%02x") | ||
189 | ERRDEF(BUFFER_BADDICTX, "cannot deserialize dictionary index %d") | ||
190 | ERRDEF(BUFFER_DEPTH, "too deep to serialize") | ||
191 | ERRDEF(BUFFER_DUPKEY, "duplicate table key") | ||
192 | ERRDEF(BUFFER_EOB, "unexpected end of buffer") | ||
193 | ERRDEF(BUFFER_LEFTOV, "left-over data in buffer") | ||
194 | #endif | ||
195 | |||
189 | #undef ERRDEF | 196 | #undef ERRDEF |
190 | 197 | ||
191 | /* Detecting unused error messages: | 198 | /* Detecting unused error messages: |
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index f833bc16..13f91333 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #if LJ_HASJIT | 11 | #if LJ_HASJIT |
12 | 12 | ||
13 | #include "lj_err.h" | 13 | #include "lj_err.h" |
14 | #include "lj_buf.h" | ||
14 | #include "lj_str.h" | 15 | #include "lj_str.h" |
15 | #include "lj_tab.h" | 16 | #include "lj_tab.h" |
16 | #include "lj_frame.h" | 17 | #include "lj_frame.h" |
@@ -27,6 +28,8 @@ | |||
27 | #include "lj_dispatch.h" | 28 | #include "lj_dispatch.h" |
28 | #include "lj_vm.h" | 29 | #include "lj_vm.h" |
29 | #include "lj_strscan.h" | 30 | #include "lj_strscan.h" |
31 | #include "lj_strfmt.h" | ||
32 | #include "lj_serialize.h" | ||
30 | 33 | ||
31 | /* Some local macros to save typing. Undef'd at the end. */ | 34 | /* Some local macros to save typing. Undef'd at the end. */ |
32 | #define IR(ref) (&J->cur.ir[(ref)]) | 35 | #define IR(ref) (&J->cur.ir[(ref)]) |
@@ -79,10 +82,7 @@ static GCstr *argv2str(jit_State *J, TValue *o) | |||
79 | GCstr *s; | 82 | GCstr *s; |
80 | if (!tvisnumber(o)) | 83 | if (!tvisnumber(o)) |
81 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 84 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
82 | if (tvisint(o)) | 85 | s = lj_strfmt_number(J->L, o); |
83 | s = lj_str_fromint(J->L, intV(o)); | ||
84 | else | ||
85 | s = lj_str_fromnum(J->L, &o->n); | ||
86 | setstrV(J->L, o, s); | 86 | setstrV(J->L, o, s); |
87 | return s; | 87 | return s; |
88 | } | 88 | } |
@@ -98,27 +98,102 @@ static ptrdiff_t results_wanted(jit_State *J) | |||
98 | return -1; | 98 | return -1; |
99 | } | 99 | } |
100 | 100 | ||
101 | /* Throw error for unsupported variant of fast function. */ | 101 | /* Trace stitching: add continuation below frame to start a new trace. */ |
102 | LJ_NORET static void recff_nyiu(jit_State *J) | 102 | static void recff_stitch(jit_State *J) |
103 | { | 103 | { |
104 | setfuncV(J->L, &J->errinfo, J->fn); | 104 | ASMFunction cont = lj_cont_stitch; |
105 | lj_trace_err_info(J, LJ_TRERR_NYIFFU); | 105 | lua_State *L = J->L; |
106 | TValue *base = L->base; | ||
107 | BCReg nslot = J->maxslot + 1 + LJ_FR2; | ||
108 | TValue *nframe = base + 1 + LJ_FR2; | ||
109 | const BCIns *pc = frame_pc(base-1); | ||
110 | TValue *pframe = frame_prevl(base-1); | ||
111 | |||
112 | /* Check for this now. Throwing in lj_record_stop messes up the stack. */ | ||
113 | if (J->cur.nsnap >= (MSize)J->param[JIT_P_maxsnap]) | ||
114 | lj_trace_err(J, LJ_TRERR_SNAPOV); | ||
115 | |||
116 | /* Move func + args up in Lua stack and insert continuation. */ | ||
117 | memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot); | ||
118 | setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT); | ||
119 | setcont(base-LJ_FR2, cont); | ||
120 | setframe_pc(base, pc); | ||
121 | setnilV(base-1-LJ_FR2); /* Incorrect, but rec_check_slots() won't run anymore. */ | ||
122 | L->base += 2 + LJ_FR2; | ||
123 | L->top += 2 + LJ_FR2; | ||
124 | |||
125 | /* Ditto for the IR. */ | ||
126 | memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot); | ||
127 | #if LJ_FR2 | ||
128 | J->base[2] = TREF_FRAME; | ||
129 | J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); | ||
130 | J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT; | ||
131 | #else | ||
132 | J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; | ||
133 | #endif | ||
134 | J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J))); | ||
135 | J->base += 2 + LJ_FR2; | ||
136 | J->baseslot += 2 + LJ_FR2; | ||
137 | J->framedepth++; | ||
138 | |||
139 | lj_record_stop(J, LJ_TRLINK_STITCH, 0); | ||
140 | |||
141 | /* Undo Lua stack changes. */ | ||
142 | memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot); | ||
143 | setframe_pc(base-1, pc); | ||
144 | L->base -= 2 + LJ_FR2; | ||
145 | L->top -= 2 + LJ_FR2; | ||
106 | } | 146 | } |
107 | 147 | ||
108 | /* Fallback handler for all fast functions that are not recorded (yet). */ | 148 | /* Fallback handler for fast functions that are not recorded (yet). */ |
109 | static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd) | 149 | static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd) |
110 | { | 150 | { |
111 | setfuncV(J->L, &J->errinfo, J->fn); | 151 | if (J->cur.nins < (IRRef)J->param[JIT_P_minstitch] + REF_BASE) { |
112 | lj_trace_err_info(J, LJ_TRERR_NYIFF); | 152 | lj_trace_err_info(J, LJ_TRERR_TRACEUV); |
113 | UNUSED(rd); | 153 | } else { |
154 | /* Can only stitch from Lua call. */ | ||
155 | if (J->framedepth && frame_islua(J->L->base-1)) { | ||
156 | BCOp op = bc_op(*frame_pc(J->L->base-1)); | ||
157 | /* Stitched trace cannot start with *M op with variable # of args. */ | ||
158 | if (!(op == BC_CALLM || op == BC_CALLMT || | ||
159 | op == BC_RETM || op == BC_TSETM)) { | ||
160 | switch (J->fn->c.ffid) { | ||
161 | case FF_error: | ||
162 | case FF_debug_sethook: | ||
163 | case FF_jit_flush: | ||
164 | break; /* Don't stitch across special builtins. */ | ||
165 | default: | ||
166 | recff_stitch(J); /* Use trace stitching. */ | ||
167 | rd->nres = -1; | ||
168 | return; | ||
169 | } | ||
170 | } | ||
171 | } | ||
172 | /* Otherwise stop trace and return to interpreter. */ | ||
173 | lj_record_stop(J, LJ_TRLINK_RETURN, 0); | ||
174 | rd->nres = -1; | ||
175 | } | ||
114 | } | 176 | } |
115 | 177 | ||
116 | /* C functions can have arbitrary side-effects and are not recorded (yet). */ | 178 | /* Fallback handler for unsupported variants of fast functions. */ |
117 | static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd) | 179 | #define recff_nyiu recff_nyi |
180 | |||
181 | /* Must stop the trace for classic C functions with arbitrary side-effects. */ | ||
182 | #define recff_c recff_nyi | ||
183 | |||
184 | /* Emit BUFHDR for the global temporary buffer. */ | ||
185 | static TRef recff_bufhdr(jit_State *J) | ||
118 | { | 186 | { |
119 | setfuncV(J->L, &J->errinfo, J->fn); | 187 | return emitir(IRT(IR_BUFHDR, IRT_PGC), |
120 | lj_trace_err_info(J, LJ_TRERR_NYICF); | 188 | lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); |
121 | UNUSED(rd); | 189 | } |
190 | |||
191 | /* Emit TMPREF. */ | ||
192 | static TRef recff_tmpref(jit_State *J, TRef tr, int mode) | ||
193 | { | ||
194 | if (!LJ_DUALNUM && tref_isinteger(tr)) | ||
195 | tr = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); | ||
196 | return emitir(IRT(IR_TMPREF, IRT_PGC), tr, mode); | ||
122 | } | 197 | } |
123 | 198 | ||
124 | /* -- Base library fast functions ----------------------------------------- */ | 199 | /* -- Base library fast functions ----------------------------------------- */ |
@@ -135,7 +210,7 @@ static void LJ_FASTCALL recff_type(jit_State *J, RecordFFData *rd) | |||
135 | uint32_t t; | 210 | uint32_t t; |
136 | if (tvisnumber(&rd->argv[0])) | 211 | if (tvisnumber(&rd->argv[0])) |
137 | t = ~LJ_TNUMX; | 212 | t = ~LJ_TNUMX; |
138 | else if (LJ_64 && tvislightud(&rd->argv[0])) | 213 | else if (LJ_64 && !LJ_GC64 && tvislightud(&rd->argv[0])) |
139 | t = ~LJ_TLIGHTUD; | 214 | t = ~LJ_TLIGHTUD; |
140 | else | 215 | else |
141 | t = ~itype(&rd->argv[0]); | 216 | t = ~itype(&rd->argv[0]); |
@@ -167,7 +242,7 @@ static void LJ_FASTCALL recff_setmetatable(jit_State *J, RecordFFData *rd) | |||
167 | ix.tab = tr; | 242 | ix.tab = tr; |
168 | copyTV(J->L, &ix.tabv, &rd->argv[0]); | 243 | copyTV(J->L, &ix.tabv, &rd->argv[0]); |
169 | lj_record_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable. */ | 244 | lj_record_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable. */ |
170 | fref = emitir(IRT(IR_FREF, IRT_P32), tr, IRFL_TAB_META); | 245 | fref = emitir(IRT(IR_FREF, IRT_PGC), tr, IRFL_TAB_META); |
171 | mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt; | 246 | mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt; |
172 | emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref); | 247 | emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref); |
173 | if (!tref_isnil(mt)) | 248 | if (!tref_isnil(mt)) |
@@ -220,7 +295,7 @@ static void LJ_FASTCALL recff_rawlen(jit_State *J, RecordFFData *rd) | |||
220 | if (tref_isstr(tr)) | 295 | if (tref_isstr(tr)) |
221 | J->base[0] = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN); | 296 | J->base[0] = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN); |
222 | else if (tref_istab(tr)) | 297 | else if (tref_istab(tr)) |
223 | J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, tr); | 298 | J->base[0] = emitir(IRTI(IR_ALEN), tr, TREF_NIL); |
224 | /* else: Interpreter will throw. */ | 299 | /* else: Interpreter will throw. */ |
225 | UNUSED(rd); | 300 | UNUSED(rd); |
226 | } | 301 | } |
@@ -233,9 +308,9 @@ int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv) | |||
233 | if (strV(tv)->len == 1) { | 308 | if (strV(tv)->len == 1) { |
234 | emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv))); | 309 | emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv))); |
235 | } else { | 310 | } else { |
236 | TRef trptr = emitir(IRT(IR_STRREF, IRT_P32), tr, lj_ir_kint(J, 0)); | 311 | TRef trptr = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0)); |
237 | TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY); | 312 | TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY); |
238 | emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#')); | 313 | emitir(IRTGI(IR_EQ), trchar, lj_ir_kint(J, '#')); |
239 | } | 314 | } |
240 | return 0; | 315 | return 0; |
241 | } else { /* select(n, ...) */ | 316 | } else { /* select(n, ...) */ |
@@ -263,7 +338,8 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd) | |||
263 | J->base[i] = J->base[start+i]; | 338 | J->base[i] = J->base[start+i]; |
264 | } /* else: Interpreter will throw. */ | 339 | } /* else: Interpreter will throw. */ |
265 | } else { | 340 | } else { |
266 | recff_nyiu(J); | 341 | recff_nyiu(J, rd); |
342 | return; | ||
267 | } | 343 | } |
268 | } /* else: Interpreter will throw. */ | 344 | } /* else: Interpreter will throw. */ |
269 | } | 345 | } |
@@ -274,14 +350,18 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd) | |||
274 | TRef base = J->base[1]; | 350 | TRef base = J->base[1]; |
275 | if (tr && !tref_isnil(base)) { | 351 | if (tr && !tref_isnil(base)) { |
276 | base = lj_opt_narrow_toint(J, base); | 352 | base = lj_opt_narrow_toint(J, base); |
277 | if (!tref_isk(base) || IR(tref_ref(base))->i != 10) | 353 | if (!tref_isk(base) || IR(tref_ref(base))->i != 10) { |
278 | recff_nyiu(J); | 354 | recff_nyiu(J, rd); |
355 | return; | ||
356 | } | ||
279 | } | 357 | } |
280 | if (tref_isnumber_str(tr)) { | 358 | if (tref_isnumber_str(tr)) { |
281 | if (tref_isstr(tr)) { | 359 | if (tref_isstr(tr)) { |
282 | TValue tmp; | 360 | TValue tmp; |
283 | if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) | 361 | if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) { |
284 | recff_nyiu(J); /* Would need an inverted STRTO for this case. */ | 362 | recff_nyiu(J, rd); /* Would need an inverted STRTO for this case. */ |
363 | return; | ||
364 | } | ||
285 | tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); | 365 | tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); |
286 | } | 366 | } |
287 | #if LJ_HASFFI | 367 | #if LJ_HASFFI |
@@ -313,10 +393,10 @@ static int recff_metacall(jit_State *J, RecordFFData *rd, MMS mm) | |||
313 | int errcode; | 393 | int errcode; |
314 | TValue argv0; | 394 | TValue argv0; |
315 | /* Temporarily insert metamethod below object. */ | 395 | /* Temporarily insert metamethod below object. */ |
316 | J->base[1] = J->base[0]; | 396 | J->base[1+LJ_FR2] = J->base[0]; |
317 | J->base[0] = ix.mobj; | 397 | J->base[0] = ix.mobj; |
318 | copyTV(J->L, &argv0, &rd->argv[0]); | 398 | copyTV(J->L, &argv0, &rd->argv[0]); |
319 | copyTV(J->L, &rd->argv[1], &rd->argv[0]); | 399 | copyTV(J->L, &rd->argv[1+LJ_FR2], &rd->argv[0]); |
320 | copyTV(J->L, &rd->argv[0], &ix.mobjv); | 400 | copyTV(J->L, &rd->argv[0], &ix.mobjv); |
321 | /* Need to protect lj_record_tailcall because it may throw. */ | 401 | /* Need to protect lj_record_tailcall because it may throw. */ |
322 | errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp); | 402 | errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp); |
@@ -336,13 +416,15 @@ static void LJ_FASTCALL recff_tostring(jit_State *J, RecordFFData *rd) | |||
336 | if (tref_isstr(tr)) { | 416 | if (tref_isstr(tr)) { |
337 | /* Ignore __tostring in the string base metatable. */ | 417 | /* Ignore __tostring in the string base metatable. */ |
338 | /* Pass on result in J->base[0]. */ | 418 | /* Pass on result in J->base[0]. */ |
339 | } else if (!recff_metacall(J, rd, MM_tostring)) { | 419 | } else if (tr && !recff_metacall(J, rd, MM_tostring)) { |
340 | if (tref_isnumber(tr)) { | 420 | if (tref_isnumber(tr)) { |
341 | J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); | 421 | J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, |
422 | tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT); | ||
342 | } else if (tref_ispri(tr)) { | 423 | } else if (tref_ispri(tr)) { |
343 | J->base[0] = lj_ir_kstr(J, strV(&J->fn->c.upvalue[tref_type(tr)])); | 424 | J->base[0] = lj_ir_kstr(J, lj_strfmt_obj(J->L, &rd->argv[0])); |
344 | } else { | 425 | } else { |
345 | recff_nyiu(J); | 426 | recff_nyiu(J, rd); |
427 | return; | ||
346 | } | 428 | } |
347 | } | 429 | } |
348 | } | 430 | } |
@@ -364,15 +446,15 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd) | |||
364 | } /* else: Interpreter will throw. */ | 446 | } /* else: Interpreter will throw. */ |
365 | } | 447 | } |
366 | 448 | ||
367 | static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd) | 449 | static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd) |
368 | { | 450 | { |
369 | TRef tr = J->base[0]; | 451 | TRef tr = J->base[0]; |
370 | if (!((LJ_52 || (LJ_HASFFI && tref_iscdata(tr))) && | 452 | if (!((LJ_52 || (LJ_HASFFI && tref_iscdata(tr))) && |
371 | recff_metacall(J, rd, MM_ipairs))) { | 453 | recff_metacall(J, rd, MM_pairs + rd->data))) { |
372 | if (tref_istab(tr)) { | 454 | if (tref_istab(tr)) { |
373 | J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0])); | 455 | J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0])); |
374 | J->base[1] = tr; | 456 | J->base[1] = tr; |
375 | J->base[2] = lj_ir_kint(J, 0); | 457 | J->base[2] = rd->data ? lj_ir_kint(J, 0) : TREF_NIL; |
376 | rd->nres = 3; | 458 | rd->nres = 3; |
377 | } /* else: Interpreter will throw. */ | 459 | } /* else: Interpreter will throw. */ |
378 | } | 460 | } |
@@ -381,8 +463,13 @@ static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd) | |||
381 | static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) | 463 | static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) |
382 | { | 464 | { |
383 | if (J->maxslot >= 1) { | 465 | if (J->maxslot >= 1) { |
466 | #if LJ_FR2 | ||
467 | /* Shift function arguments up. */ | ||
468 | memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot); | ||
469 | #endif | ||
384 | lj_record_call(J, 0, J->maxslot - 1); | 470 | lj_record_call(J, 0, J->maxslot - 1); |
385 | rd->nres = -1; /* Pending call. */ | 471 | rd->nres = -1; /* Pending call. */ |
472 | J->needsnap = 1; /* Start catching on-trace errors. */ | ||
386 | } /* else: Interpreter will throw. */ | 473 | } /* else: Interpreter will throw. */ |
387 | } | 474 | } |
388 | 475 | ||
@@ -406,6 +493,10 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd) | |||
406 | copyTV(J->L, &argv1, &rd->argv[1]); | 493 | copyTV(J->L, &argv1, &rd->argv[1]); |
407 | copyTV(J->L, &rd->argv[0], &argv1); | 494 | copyTV(J->L, &rd->argv[0], &argv1); |
408 | copyTV(J->L, &rd->argv[1], &argv0); | 495 | copyTV(J->L, &rd->argv[1], &argv0); |
496 | #if LJ_FR2 | ||
497 | /* Shift function arguments up. */ | ||
498 | memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1)); | ||
499 | #endif | ||
409 | /* Need to protect lj_record_call because it may throw. */ | 500 | /* Need to protect lj_record_call because it may throw. */ |
410 | errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp); | 501 | errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp); |
411 | /* Always undo Lua stack swap to avoid confusing the interpreter. */ | 502 | /* Always undo Lua stack swap to avoid confusing the interpreter. */ |
@@ -414,7 +505,54 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd) | |||
414 | if (errcode) | 505 | if (errcode) |
415 | lj_err_throw(J->L, errcode); /* Propagate errors. */ | 506 | lj_err_throw(J->L, errcode); /* Propagate errors. */ |
416 | rd->nres = -1; /* Pending call. */ | 507 | rd->nres = -1; /* Pending call. */ |
508 | J->needsnap = 1; /* Start catching on-trace errors. */ | ||
509 | } /* else: Interpreter will throw. */ | ||
510 | } | ||
511 | |||
512 | static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd) | ||
513 | { | ||
514 | TRef tr = J->base[0]; | ||
515 | /* Only support getfenv(0) for now. */ | ||
516 | if (tref_isint(tr) && tref_isk(tr) && IR(tref_ref(tr))->i == 0) { | ||
517 | TRef trl = emitir(IRT(IR_LREF, IRT_THREAD), 0, 0); | ||
518 | J->base[0] = emitir(IRT(IR_FLOAD, IRT_TAB), trl, IRFL_THREAD_ENV); | ||
519 | return; | ||
520 | } | ||
521 | recff_nyiu(J, rd); | ||
522 | } | ||
523 | |||
524 | static void LJ_FASTCALL recff_next(jit_State *J, RecordFFData *rd) | ||
525 | { | ||
526 | #if LJ_BE | ||
527 | /* YAGNI: Disabled on big-endian due to issues with lj_vm_next, | ||
528 | ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair. | ||
529 | */ | ||
530 | recff_nyi(J, rd); | ||
531 | #else | ||
532 | TRef tab = J->base[0]; | ||
533 | if (tref_istab(tab)) { | ||
534 | RecordIndex ix; | ||
535 | cTValue *keyv; | ||
536 | ix.tab = tab; | ||
537 | if (tref_isnil(J->base[1])) { /* Shortcut for start of traversal. */ | ||
538 | ix.key = lj_ir_kint(J, 0); | ||
539 | keyv = niltvg(J2G(J)); | ||
540 | } else { | ||
541 | TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1); | ||
542 | ix.key = lj_ir_call(J, IRCALL_lj_tab_keyindex, tab, tmp); | ||
543 | keyv = &rd->argv[1]; | ||
544 | } | ||
545 | copyTV(J->L, &ix.tabv, &rd->argv[0]); | ||
546 | ix.keyv.u32.lo = lj_tab_keyindex(tabV(&ix.tabv), keyv); | ||
547 | /* Omit the value, if not used by the caller. */ | ||
548 | ix.idxchain = (J->framedepth && frame_islua(J->L->base-1) && | ||
549 | bc_b(frame_pc(J->L->base-1)[-1])-1 < 2); | ||
550 | ix.mobj = 0; /* We don't need the next index. */ | ||
551 | rd->nres = lj_record_next(J, &ix); | ||
552 | J->base[0] = ix.key; | ||
553 | J->base[1] = ix.val; | ||
417 | } /* else: Interpreter will throw. */ | 554 | } /* else: Interpreter will throw. */ |
555 | #endif | ||
418 | } | 556 | } |
419 | 557 | ||
420 | /* -- Math library fast functions ----------------------------------------- */ | 558 | /* -- Math library fast functions ----------------------------------------- */ |
@@ -422,7 +560,7 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd) | |||
422 | static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) | 560 | static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) |
423 | { | 561 | { |
424 | TRef tr = lj_ir_tonum(J, J->base[0]); | 562 | TRef tr = lj_ir_tonum(J, J->base[0]); |
425 | J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_knum_abs(J)); | 563 | J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_ksimd(J, LJ_KSIMD_ABS)); |
426 | UNUSED(rd); | 564 | UNUSED(rd); |
427 | } | 565 | } |
428 | 566 | ||
@@ -475,7 +613,7 @@ static void LJ_FASTCALL recff_math_atan2(jit_State *J, RecordFFData *rd) | |||
475 | { | 613 | { |
476 | TRef tr = lj_ir_tonum(J, J->base[0]); | 614 | TRef tr = lj_ir_tonum(J, J->base[0]); |
477 | TRef tr2 = lj_ir_tonum(J, J->base[1]); | 615 | TRef tr2 = lj_ir_tonum(J, J->base[1]); |
478 | J->base[0] = emitir(IRTN(IR_ATAN2), tr, tr2); | 616 | J->base[0] = lj_ir_call(J, IRCALL_atan2, tr, tr2); |
479 | UNUSED(rd); | 617 | UNUSED(rd); |
480 | } | 618 | } |
481 | 619 | ||
@@ -492,51 +630,12 @@ static void LJ_FASTCALL recff_math_ldexp(jit_State *J, RecordFFData *rd) | |||
492 | UNUSED(rd); | 630 | UNUSED(rd); |
493 | } | 631 | } |
494 | 632 | ||
495 | /* Record math.asin, math.acos, math.atan. */ | 633 | static void LJ_FASTCALL recff_math_call(jit_State *J, RecordFFData *rd) |
496 | static void LJ_FASTCALL recff_math_atrig(jit_State *J, RecordFFData *rd) | ||
497 | { | ||
498 | TRef y = lj_ir_tonum(J, J->base[0]); | ||
499 | TRef x = lj_ir_knum_one(J); | ||
500 | uint32_t ffid = rd->data; | ||
501 | if (ffid != FF_math_atan) { | ||
502 | TRef tmp = emitir(IRTN(IR_MUL), y, y); | ||
503 | tmp = emitir(IRTN(IR_SUB), x, tmp); | ||
504 | tmp = emitir(IRTN(IR_FPMATH), tmp, IRFPM_SQRT); | ||
505 | if (ffid == FF_math_asin) { x = tmp; } else { x = y; y = tmp; } | ||
506 | } | ||
507 | J->base[0] = emitir(IRTN(IR_ATAN2), y, x); | ||
508 | } | ||
509 | |||
510 | static void LJ_FASTCALL recff_math_htrig(jit_State *J, RecordFFData *rd) | ||
511 | { | 634 | { |
512 | TRef tr = lj_ir_tonum(J, J->base[0]); | 635 | TRef tr = lj_ir_tonum(J, J->base[0]); |
513 | J->base[0] = emitir(IRTN(IR_CALLN), tr, rd->data); | 636 | J->base[0] = emitir(IRTN(IR_CALLN), tr, rd->data); |
514 | } | 637 | } |
515 | 638 | ||
516 | static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd) | ||
517 | { | ||
518 | TRef tr = J->base[0]; | ||
519 | if (tref_isinteger(tr)) { | ||
520 | J->base[0] = tr; | ||
521 | J->base[1] = lj_ir_kint(J, 0); | ||
522 | } else { | ||
523 | TRef trt; | ||
524 | tr = lj_ir_tonum(J, tr); | ||
525 | trt = emitir(IRTN(IR_FPMATH), tr, IRFPM_TRUNC); | ||
526 | J->base[0] = trt; | ||
527 | J->base[1] = emitir(IRTN(IR_SUB), tr, trt); | ||
528 | } | ||
529 | rd->nres = 2; | ||
530 | } | ||
531 | |||
532 | static void LJ_FASTCALL recff_math_degrad(jit_State *J, RecordFFData *rd) | ||
533 | { | ||
534 | TRef tr = lj_ir_tonum(J, J->base[0]); | ||
535 | TRef trm = lj_ir_knum(J, numV(&J->fn->c.upvalue[0])); | ||
536 | J->base[0] = emitir(IRTN(IR_MUL), tr, trm); | ||
537 | UNUSED(rd); | ||
538 | } | ||
539 | |||
540 | static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) | 639 | static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) |
541 | { | 640 | { |
542 | J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1], | 641 | J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1], |
@@ -567,7 +666,7 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd) | |||
567 | GCudata *ud = udataV(&J->fn->c.upvalue[0]); | 666 | GCudata *ud = udataV(&J->fn->c.upvalue[0]); |
568 | TRef tr, one; | 667 | TRef tr, one; |
569 | lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */ | 668 | lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */ |
570 | tr = lj_ir_call(J, IRCALL_lj_math_random_step, lj_ir_kptr(J, uddata(ud))); | 669 | tr = lj_ir_call(J, IRCALL_lj_prng_u64d, lj_ir_kptr(J, uddata(ud))); |
571 | one = lj_ir_knum_one(J); | 670 | one = lj_ir_knum_one(J); |
572 | tr = emitir(IRTN(IR_SUB), tr, one); | 671 | tr = emitir(IRTN(IR_SUB), tr, one); |
573 | if (J->base[0]) { | 672 | if (J->base[0]) { |
@@ -591,48 +690,105 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd) | |||
591 | 690 | ||
592 | /* -- Bit library fast functions ------------------------------------------ */ | 691 | /* -- Bit library fast functions ------------------------------------------ */ |
593 | 692 | ||
594 | /* Record unary bit.tobit, bit.bnot, bit.bswap. */ | 693 | /* Record bit.tobit. */ |
694 | static void LJ_FASTCALL recff_bit_tobit(jit_State *J, RecordFFData *rd) | ||
695 | { | ||
696 | TRef tr = J->base[0]; | ||
697 | #if LJ_HASFFI | ||
698 | if (tref_iscdata(tr)) { recff_bit64_tobit(J, rd); return; } | ||
699 | #endif | ||
700 | J->base[0] = lj_opt_narrow_tobit(J, tr); | ||
701 | UNUSED(rd); | ||
702 | } | ||
703 | |||
704 | /* Record unary bit.bnot, bit.bswap. */ | ||
595 | static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) | 705 | static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) |
596 | { | 706 | { |
597 | TRef tr = lj_opt_narrow_tobit(J, J->base[0]); | 707 | #if LJ_HASFFI |
598 | J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); | 708 | if (recff_bit64_unary(J, rd)) |
709 | return; | ||
710 | #endif | ||
711 | J->base[0] = emitir(IRTI(rd->data), lj_opt_narrow_tobit(J, J->base[0]), 0); | ||
599 | } | 712 | } |
600 | 713 | ||
601 | /* Record N-ary bit.band, bit.bor, bit.bxor. */ | 714 | /* Record N-ary bit.band, bit.bor, bit.bxor. */ |
602 | static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) | 715 | static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) |
603 | { | 716 | { |
604 | TRef tr = lj_opt_narrow_tobit(J, J->base[0]); | 717 | #if LJ_HASFFI |
605 | uint32_t op = rd->data; | 718 | if (recff_bit64_nary(J, rd)) |
606 | BCReg i; | 719 | return; |
607 | for (i = 1; J->base[i] != 0; i++) | 720 | #endif |
608 | tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i])); | 721 | { |
609 | J->base[0] = tr; | 722 | TRef tr = lj_opt_narrow_tobit(J, J->base[0]); |
723 | uint32_t ot = IRTI(rd->data); | ||
724 | BCReg i; | ||
725 | for (i = 1; J->base[i] != 0; i++) | ||
726 | tr = emitir(ot, tr, lj_opt_narrow_tobit(J, J->base[i])); | ||
727 | J->base[0] = tr; | ||
728 | } | ||
610 | } | 729 | } |
611 | 730 | ||
612 | /* Record bit shifts. */ | 731 | /* Record bit shifts. */ |
613 | static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) | 732 | static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) |
614 | { | 733 | { |
615 | TRef tr = lj_opt_narrow_tobit(J, J->base[0]); | 734 | #if LJ_HASFFI |
616 | TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); | 735 | if (recff_bit64_shift(J, rd)) |
617 | IROp op = (IROp)rd->data; | 736 | return; |
618 | if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && | 737 | #endif |
619 | !tref_isk(tsh)) | 738 | { |
620 | tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); | 739 | TRef tr = lj_opt_narrow_tobit(J, J->base[0]); |
740 | TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); | ||
741 | IROp op = (IROp)rd->data; | ||
742 | if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && | ||
743 | !tref_isk(tsh)) | ||
744 | tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); | ||
621 | #ifdef LJ_TARGET_UNIFYROT | 745 | #ifdef LJ_TARGET_UNIFYROT |
622 | if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { | 746 | if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { |
623 | op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; | 747 | op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; |
624 | tsh = emitir(IRTI(IR_NEG), tsh, tsh); | 748 | tsh = emitir(IRTI(IR_NEG), tsh, tsh); |
749 | } | ||
750 | #endif | ||
751 | J->base[0] = emitir(IRTI(op), tr, tsh); | ||
625 | } | 752 | } |
753 | } | ||
754 | |||
755 | static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd) | ||
756 | { | ||
757 | #if LJ_HASFFI | ||
758 | TRef hdr = recff_bufhdr(J); | ||
759 | TRef tr = recff_bit64_tohex(J, rd, hdr); | ||
760 | J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); | ||
761 | #else | ||
762 | recff_nyiu(J, rd); /* Don't bother working around this NYI. */ | ||
626 | #endif | 763 | #endif |
627 | J->base[0] = emitir(IRTI(op), tr, tsh); | ||
628 | } | 764 | } |
629 | 765 | ||
630 | /* -- String library fast functions --------------------------------------- */ | 766 | /* -- String library fast functions --------------------------------------- */ |
631 | 767 | ||
632 | static void LJ_FASTCALL recff_string_len(jit_State *J, RecordFFData *rd) | 768 | /* Specialize to relative starting position for string. */ |
769 | static TRef recff_string_start(jit_State *J, GCstr *s, int32_t *st, TRef tr, | ||
770 | TRef trlen, TRef tr0) | ||
633 | { | 771 | { |
634 | J->base[0] = emitir(IRTI(IR_FLOAD), lj_ir_tostr(J, J->base[0]), IRFL_STR_LEN); | 772 | int32_t start = *st; |
635 | UNUSED(rd); | 773 | if (start < 0) { |
774 | emitir(IRTGI(IR_LT), tr, tr0); | ||
775 | tr = emitir(IRTI(IR_ADD), trlen, tr); | ||
776 | start = start + (int32_t)s->len; | ||
777 | emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), tr, tr0); | ||
778 | if (start < 0) { | ||
779 | tr = tr0; | ||
780 | start = 0; | ||
781 | } | ||
782 | } else if (start == 0) { | ||
783 | emitir(IRTGI(IR_EQ), tr, tr0); | ||
784 | tr = tr0; | ||
785 | } else { | ||
786 | tr = emitir(IRTI(IR_ADD), tr, lj_ir_kint(J, -1)); | ||
787 | emitir(IRTGI(IR_GE), tr, tr0); | ||
788 | start--; | ||
789 | } | ||
790 | *st = start; | ||
791 | return tr; | ||
636 | } | 792 | } |
637 | 793 | ||
638 | /* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */ | 794 | /* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */ |
@@ -679,39 +835,21 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) | |||
679 | } else if ((MSize)end <= str->len) { | 835 | } else if ((MSize)end <= str->len) { |
680 | emitir(IRTGI(IR_ULE), trend, trlen); | 836 | emitir(IRTGI(IR_ULE), trend, trlen); |
681 | } else { | 837 | } else { |
682 | emitir(IRTGI(IR_GT), trend, trlen); | 838 | emitir(IRTGI(IR_UGT), trend, trlen); |
683 | end = (int32_t)str->len; | 839 | end = (int32_t)str->len; |
684 | trend = trlen; | 840 | trend = trlen; |
685 | } | 841 | } |
686 | if (start < 0) { | 842 | trstart = recff_string_start(J, str, &start, trstart, trlen, tr0); |
687 | emitir(IRTGI(IR_LT), trstart, tr0); | ||
688 | trstart = emitir(IRTI(IR_ADD), trlen, trstart); | ||
689 | start = start+(int32_t)str->len; | ||
690 | emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), trstart, tr0); | ||
691 | if (start < 0) { | ||
692 | trstart = tr0; | ||
693 | start = 0; | ||
694 | } | ||
695 | } else { | ||
696 | if (start == 0) { | ||
697 | emitir(IRTGI(IR_EQ), trstart, tr0); | ||
698 | trstart = tr0; | ||
699 | } else { | ||
700 | trstart = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, -1)); | ||
701 | emitir(IRTGI(IR_GE), trstart, tr0); | ||
702 | start--; | ||
703 | } | ||
704 | } | ||
705 | if (rd->data) { /* Return string.sub result. */ | 843 | if (rd->data) { /* Return string.sub result. */ |
706 | if (end - start >= 0) { | 844 | if (end - start >= 0) { |
707 | /* Also handle empty range here, to avoid extra traces. */ | 845 | /* Also handle empty range here, to avoid extra traces. */ |
708 | TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart); | 846 | TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart); |
709 | emitir(IRTGI(IR_GE), trslen, tr0); | 847 | emitir(IRTGI(IR_GE), trslen, tr0); |
710 | trptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart); | 848 | trptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart); |
711 | J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen); | 849 | J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen); |
712 | } else { /* Range underflow: return empty string. */ | 850 | } else { /* Range underflow: return empty string. */ |
713 | emitir(IRTGI(IR_LT), trend, trstart); | 851 | emitir(IRTGI(IR_LT), trend, trstart); |
714 | J->base[0] = lj_ir_kstr(J, lj_str_new(J->L, strdata(str), 0)); | 852 | J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty); |
715 | } | 853 | } |
716 | } else { /* Return string.byte result(s). */ | 854 | } else { /* Return string.byte result(s). */ |
717 | ptrdiff_t i, len = end - start; | 855 | ptrdiff_t i, len = end - start; |
@@ -723,7 +861,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) | |||
723 | rd->nres = len; | 861 | rd->nres = len; |
724 | for (i = 0; i < len; i++) { | 862 | for (i = 0; i < len; i++) { |
725 | TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i)); | 863 | TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i)); |
726 | tmp = emitir(IRT(IR_STRREF, IRT_P32), trstr, tmp); | 864 | tmp = emitir(IRT(IR_STRREF, IRT_PGC), trstr, tmp); |
727 | J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY); | 865 | J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY); |
728 | } | 866 | } |
729 | } else { /* Empty range or range underflow: return no results. */ | 867 | } else { /* Empty range or range underflow: return no results. */ |
@@ -733,48 +871,527 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) | |||
733 | } | 871 | } |
734 | } | 872 | } |
735 | 873 | ||
736 | /* -- Table library fast functions ---------------------------------------- */ | 874 | static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd) |
737 | |||
738 | static void LJ_FASTCALL recff_table_getn(jit_State *J, RecordFFData *rd) | ||
739 | { | 875 | { |
740 | if (tref_istab(J->base[0])) | 876 | TRef k255 = lj_ir_kint(J, 255); |
741 | J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, J->base[0]); | 877 | BCReg i; |
742 | /* else: Interpreter will throw. */ | 878 | for (i = 0; J->base[i] != 0; i++) { /* Convert char values to strings. */ |
879 | TRef tr = lj_opt_narrow_toint(J, J->base[i]); | ||
880 | emitir(IRTGI(IR_ULE), tr, k255); | ||
881 | J->base[i] = emitir(IRT(IR_TOSTR, IRT_STR), tr, IRTOSTR_CHAR); | ||
882 | } | ||
883 | if (i > 1) { /* Concatenate the strings, if there's more than one. */ | ||
884 | TRef hdr = recff_bufhdr(J), tr = hdr; | ||
885 | for (i = 0; J->base[i] != 0; i++) | ||
886 | tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, J->base[i]); | ||
887 | J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); | ||
888 | } else if (i == 0) { | ||
889 | J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty); | ||
890 | } | ||
743 | UNUSED(rd); | 891 | UNUSED(rd); |
744 | } | 892 | } |
745 | 893 | ||
746 | static void LJ_FASTCALL recff_table_remove(jit_State *J, RecordFFData *rd) | 894 | static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd) |
747 | { | 895 | { |
748 | TRef tab = J->base[0]; | 896 | TRef str = lj_ir_tostr(J, J->base[0]); |
749 | rd->nres = 0; | 897 | TRef rep = lj_opt_narrow_toint(J, J->base[1]); |
750 | if (tref_istab(tab)) { | 898 | TRef hdr, tr, str2 = 0; |
751 | if (tref_isnil(J->base[1])) { /* Simple pop: t[#t] = nil */ | 899 | if (!tref_isnil(J->base[2])) { |
752 | TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, tab); | 900 | TRef sep = lj_ir_tostr(J, J->base[2]); |
753 | GCtab *t = tabV(&rd->argv[0]); | 901 | int32_t vrep = argv2int(J, &rd->argv[1]); |
754 | MSize len = lj_tab_len(t); | 902 | emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1)); |
755 | emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0)); | 903 | if (vrep > 1) { |
756 | if (len) { | 904 | TRef hdr2 = recff_bufhdr(J); |
757 | RecordIndex ix; | 905 | TRef tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), hdr2, sep); |
758 | ix.tab = tab; | 906 | tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr2, str); |
759 | ix.key = trlen; | 907 | str2 = emitir(IRTG(IR_BUFSTR, IRT_STR), tr2, hdr2); |
760 | settabV(J->L, &ix.tabv, t); | 908 | } |
761 | setintV(&ix.keyv, len); | 909 | } |
762 | ix.idxchain = 0; | 910 | tr = hdr = recff_bufhdr(J); |
763 | if (results_wanted(J) != 0) { /* Specialize load only if needed. */ | 911 | if (str2) { |
764 | ix.val = 0; | 912 | tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, str); |
765 | J->base[0] = lj_record_idx(J, &ix); /* Load previous value. */ | 913 | str = str2; |
766 | rd->nres = 1; | 914 | rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1)); |
767 | /* Assumes ix.key/ix.tab is not modified for raw lj_record_idx(). */ | 915 | } |
916 | tr = lj_ir_call(J, IRCALL_lj_buf_putstr_rep, tr, str, rep); | ||
917 | J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); | ||
918 | } | ||
919 | |||
920 | static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd) | ||
921 | { | ||
922 | TRef str = lj_ir_tostr(J, J->base[0]); | ||
923 | TRef hdr = recff_bufhdr(J); | ||
924 | TRef tr = lj_ir_call(J, rd->data, hdr, str); | ||
925 | J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); | ||
926 | } | ||
927 | |||
928 | static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd) | ||
929 | { | ||
930 | TRef trstr = lj_ir_tostr(J, J->base[0]); | ||
931 | TRef trpat = lj_ir_tostr(J, J->base[1]); | ||
932 | TRef trlen = emitir(IRTI(IR_FLOAD), trstr, IRFL_STR_LEN); | ||
933 | TRef tr0 = lj_ir_kint(J, 0); | ||
934 | TRef trstart; | ||
935 | GCstr *str = argv2str(J, &rd->argv[0]); | ||
936 | GCstr *pat = argv2str(J, &rd->argv[1]); | ||
937 | int32_t start; | ||
938 | J->needsnap = 1; | ||
939 | if (tref_isnil(J->base[2])) { | ||
940 | trstart = lj_ir_kint(J, 1); | ||
941 | start = 1; | ||
942 | } else { | ||
943 | trstart = lj_opt_narrow_toint(J, J->base[2]); | ||
944 | start = argv2int(J, &rd->argv[2]); | ||
945 | } | ||
946 | trstart = recff_string_start(J, str, &start, trstart, trlen, tr0); | ||
947 | if ((MSize)start <= str->len) { | ||
948 | emitir(IRTGI(IR_ULE), trstart, trlen); | ||
949 | } else { | ||
950 | emitir(IRTGI(IR_UGT), trstart, trlen); | ||
951 | #if LJ_52 | ||
952 | J->base[0] = TREF_NIL; | ||
953 | return; | ||
954 | #else | ||
955 | trstart = trlen; | ||
956 | start = str->len; | ||
957 | #endif | ||
958 | } | ||
959 | /* Fixed arg or no pattern matching chars? (Specialized to pattern string.) */ | ||
960 | if ((J->base[2] && tref_istruecond(J->base[3])) || | ||
961 | (emitir(IRTG(IR_EQ, IRT_STR), trpat, lj_ir_kstr(J, pat)), | ||
962 | !lj_str_haspattern(pat))) { /* Search for fixed string. */ | ||
963 | TRef trsptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart); | ||
964 | TRef trpptr = emitir(IRT(IR_STRREF, IRT_PGC), trpat, tr0); | ||
965 | TRef trslen = emitir(IRTI(IR_SUB), trlen, trstart); | ||
966 | TRef trplen = emitir(IRTI(IR_FLOAD), trpat, IRFL_STR_LEN); | ||
967 | TRef tr = lj_ir_call(J, IRCALL_lj_str_find, trsptr, trpptr, trslen, trplen); | ||
968 | TRef trp0 = lj_ir_kkptr(J, NULL); | ||
969 | if (lj_str_find(strdata(str)+(MSize)start, strdata(pat), | ||
970 | str->len-(MSize)start, pat->len)) { | ||
971 | TRef pos; | ||
972 | emitir(IRTG(IR_NE, IRT_PGC), tr, trp0); | ||
973 | /* Recompute offset. trsptr may not point into trstr after folding. */ | ||
974 | pos = emitir(IRTI(IR_ADD), emitir(IRTI(IR_SUB), tr, trsptr), trstart); | ||
975 | J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1)); | ||
976 | J->base[1] = emitir(IRTI(IR_ADD), pos, trplen); | ||
977 | rd->nres = 2; | ||
978 | } else { | ||
979 | emitir(IRTG(IR_EQ, IRT_PGC), tr, trp0); | ||
980 | J->base[0] = TREF_NIL; | ||
981 | } | ||
982 | } else { /* Search for pattern. */ | ||
983 | recff_nyiu(J, rd); | ||
984 | return; | ||
985 | } | ||
986 | } | ||
987 | |||
988 | static void recff_format(jit_State *J, RecordFFData *rd, TRef hdr, int sbufx) | ||
989 | { | ||
990 | ptrdiff_t arg = sbufx; | ||
991 | TRef tr = hdr, trfmt = lj_ir_tostr(J, J->base[arg]); | ||
992 | GCstr *fmt = argv2str(J, &rd->argv[arg]); | ||
993 | FormatState fs; | ||
994 | SFormat sf; | ||
995 | /* Specialize to the format string. */ | ||
996 | emitir(IRTG(IR_EQ, IRT_STR), trfmt, lj_ir_kstr(J, fmt)); | ||
997 | lj_strfmt_init(&fs, strdata(fmt), fmt->len); | ||
998 | while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { /* Parse format. */ | ||
999 | TRef tra = sf == STRFMT_LIT ? 0 : J->base[++arg]; | ||
1000 | TRef trsf = lj_ir_kint(J, (int32_t)sf); | ||
1001 | IRCallID id; | ||
1002 | switch (STRFMT_TYPE(sf)) { | ||
1003 | case STRFMT_LIT: | ||
1004 | tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, | ||
1005 | lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len))); | ||
1006 | break; | ||
1007 | case STRFMT_INT: | ||
1008 | id = IRCALL_lj_strfmt_putfnum_int; | ||
1009 | handle_int: | ||
1010 | if (!tref_isinteger(tra)) { | ||
1011 | #if LJ_HASFFI | ||
1012 | if (tref_iscdata(tra)) { | ||
1013 | tra = lj_crecord_loadiu64(J, tra, &rd->argv[arg]); | ||
1014 | tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra); | ||
1015 | break; | ||
768 | } | 1016 | } |
769 | ix.val = TREF_NIL; | 1017 | #endif |
770 | lj_record_idx(J, &ix); /* Remove value. */ | 1018 | goto handle_num; |
1019 | } | ||
1020 | if (sf == STRFMT_INT) { /* Shortcut for plain %d. */ | ||
1021 | tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, | ||
1022 | emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT)); | ||
1023 | } else { | ||
1024 | #if LJ_HASFFI | ||
1025 | tra = emitir(IRT(IR_CONV, IRT_U64), tra, | ||
1026 | (IRT_INT|(IRT_U64<<5)|IRCONV_SEXT)); | ||
1027 | tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra); | ||
1028 | lj_needsplit(J); | ||
1029 | #else | ||
1030 | recff_nyiu(J, rd); /* Don't bother working around this NYI. */ | ||
1031 | return; | ||
1032 | #endif | ||
1033 | } | ||
1034 | break; | ||
1035 | case STRFMT_UINT: | ||
1036 | id = IRCALL_lj_strfmt_putfnum_uint; | ||
1037 | goto handle_int; | ||
1038 | case STRFMT_NUM: | ||
1039 | id = IRCALL_lj_strfmt_putfnum; | ||
1040 | handle_num: | ||
1041 | tra = lj_ir_tonum(J, tra); | ||
1042 | tr = lj_ir_call(J, id, tr, trsf, tra); | ||
1043 | if (LJ_SOFTFP32) lj_needsplit(J); | ||
1044 | break; | ||
1045 | case STRFMT_STR: | ||
1046 | if (!tref_isstr(tra)) { | ||
1047 | recff_nyiu(J, rd); /* NYI: __tostring and non-string types for %s. */ | ||
1048 | /* NYI: also buffers. */ | ||
1049 | return; | ||
771 | } | 1050 | } |
772 | } else { /* Complex case: remove in the middle. */ | 1051 | if (sf == STRFMT_STR) /* Shortcut for plain %s. */ |
773 | recff_nyiu(J); | 1052 | tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, tra); |
1053 | else if ((sf & STRFMT_T_QUOTED)) | ||
1054 | tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra); | ||
1055 | else | ||
1056 | tr = lj_ir_call(J, IRCALL_lj_strfmt_putfstr, tr, trsf, tra); | ||
1057 | break; | ||
1058 | case STRFMT_CHAR: | ||
1059 | tra = lj_opt_narrow_toint(J, tra); | ||
1060 | if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */ | ||
1061 | tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, | ||
1062 | emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR)); | ||
1063 | else | ||
1064 | tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra); | ||
1065 | break; | ||
1066 | case STRFMT_PTR: /* NYI */ | ||
1067 | case STRFMT_ERR: | ||
1068 | default: | ||
1069 | recff_nyiu(J, rd); | ||
1070 | return; | ||
1071 | } | ||
1072 | } | ||
1073 | if (sbufx) { | ||
1074 | emitir(IRT(IR_USE, IRT_NIL), tr, 0); | ||
1075 | } else { | ||
1076 | J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); | ||
1077 | } | ||
1078 | } | ||
1079 | |||
1080 | static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) | ||
1081 | { | ||
1082 | recff_format(J, rd, recff_bufhdr(J), 0); | ||
1083 | } | ||
1084 | |||
1085 | /* -- Buffer library fast functions --------------------------------------- */ | ||
1086 | |||
1087 | #if LJ_HASBUFFER | ||
1088 | |||
1089 | static LJ_AINLINE TRef recff_sbufx_get_L(jit_State *J, TRef ud) | ||
1090 | { | ||
1091 | return emitir(IRT(IR_FLOAD, IRT_PGC), ud, IRFL_SBUF_L); | ||
1092 | } | ||
1093 | |||
1094 | static LJ_AINLINE void recff_sbufx_set_L(jit_State *J, TRef ud, TRef val) | ||
1095 | { | ||
1096 | TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_L); | ||
1097 | emitir(IRT(IR_FSTORE, IRT_PGC), fref, val); | ||
1098 | } | ||
1099 | |||
1100 | static LJ_AINLINE TRef recff_sbufx_get_ptr(jit_State *J, TRef ud, IRFieldID fl) | ||
1101 | { | ||
1102 | return emitir(IRT(IR_FLOAD, IRT_PTR), ud, fl); | ||
1103 | } | ||
1104 | |||
1105 | static LJ_AINLINE void recff_sbufx_set_ptr(jit_State *J, TRef ud, IRFieldID fl, TRef val) | ||
1106 | { | ||
1107 | TRef fref = emitir(IRT(IR_FREF, IRT_PTR), ud, fl); | ||
1108 | emitir(IRT(IR_FSTORE, IRT_PTR), fref, val); | ||
1109 | } | ||
1110 | |||
1111 | static LJ_AINLINE TRef recff_sbufx_len(jit_State *J, TRef trr, TRef trw) | ||
1112 | { | ||
1113 | TRef len = emitir(IRT(IR_SUB, IRT_INTP), trw, trr); | ||
1114 | if (LJ_64) | ||
1115 | len = emitir(IRTI(IR_CONV), len, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE); | ||
1116 | return len; | ||
1117 | } | ||
1118 | |||
1119 | /* Emit typecheck for string buffer. */ | ||
1120 | static TRef recff_sbufx_check(jit_State *J, RecordFFData *rd, int arg) | ||
1121 | { | ||
1122 | TRef trtype, ud = J->base[arg]; | ||
1123 | if (!tvisbuf(&rd->argv[arg])) lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
1124 | trtype = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE); | ||
1125 | emitir(IRTGI(IR_EQ), trtype, lj_ir_kint(J, UDTYPE_BUFFER)); | ||
1126 | J->needsnap = 1; | ||
1127 | return ud; | ||
1128 | } | ||
1129 | |||
1130 | /* Emit BUFHDR for write to extended string buffer. */ | ||
1131 | static TRef recff_sbufx_write(jit_State *J, TRef ud) | ||
1132 | { | ||
1133 | TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kint(J, sizeof(GCudata))); | ||
1134 | return emitir(IRT(IR_BUFHDR, IRT_PGC), trbuf, IRBUFHDR_WRITE); | ||
1135 | } | ||
1136 | |||
1137 | /* Check for integer in range for the buffer API. */ | ||
1138 | static TRef recff_sbufx_checkint(jit_State *J, RecordFFData *rd, int arg) | ||
1139 | { | ||
1140 | TRef tr = J->base[arg]; | ||
1141 | TRef trlim = lj_ir_kint(J, LJ_MAX_BUF); | ||
1142 | if (tref_isinteger(tr)) { | ||
1143 | emitir(IRTGI(IR_ULE), tr, trlim); | ||
1144 | } else if (tref_isnum(tr)) { | ||
1145 | tr = emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY); | ||
1146 | emitir(IRTGI(IR_ULE), tr, trlim); | ||
1147 | #if LJ_HASFFI | ||
1148 | } else if (tref_iscdata(tr)) { | ||
1149 | tr = lj_crecord_loadiu64(J, tr, &rd->argv[arg]); | ||
1150 | emitir(IRTG(IR_ULE, IRT_U64), tr, lj_ir_kint64(J, LJ_MAX_BUF)); | ||
1151 | tr = emitir(IRTI(IR_CONV), tr, (IRT_INT<<5)|IRT_I64|IRCONV_NONE); | ||
1152 | #else | ||
1153 | UNUSED(rd); | ||
1154 | #endif | ||
1155 | } else { | ||
1156 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
1157 | } | ||
1158 | return tr; | ||
1159 | } | ||
1160 | |||
1161 | static void LJ_FASTCALL recff_buffer_method_reset(jit_State *J, RecordFFData *rd) | ||
1162 | { | ||
1163 | TRef ud = recff_sbufx_check(J, rd, 0); | ||
1164 | SBufExt *sbx = bufV(&rd->argv[0]); | ||
1165 | int iscow = (int)sbufiscow(sbx); | ||
1166 | TRef trl = recff_sbufx_get_L(J, ud); | ||
1167 | TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kint(J, SBUF_FLAG_COW)); | ||
1168 | TRef zero = lj_ir_kint(J, 0); | ||
1169 | emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zero); | ||
1170 | if (iscow) { | ||
1171 | trl = emitir(IRT(IR_BXOR, IRT_IGC), trl, | ||
1172 | LJ_GC64 ? lj_ir_kint64(J, SBUF_FLAG_COW) : | ||
1173 | lj_ir_kint(J, SBUF_FLAG_COW)); | ||
1174 | recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zero); | ||
1175 | recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zero); | ||
1176 | recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zero); | ||
1177 | recff_sbufx_set_L(J, ud, trl); | ||
1178 | emitir(IRT(IR_FSTORE, IRT_PGC), | ||
1179 | emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zero); | ||
1180 | recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zero); | ||
1181 | } else { | ||
1182 | TRef trb = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_B); | ||
1183 | recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trb); | ||
1184 | recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trb); | ||
1185 | } | ||
1186 | } | ||
1187 | |||
1188 | static void LJ_FASTCALL recff_buffer_method_skip(jit_State *J, RecordFFData *rd) | ||
1189 | { | ||
1190 | TRef ud = recff_sbufx_check(J, rd, 0); | ||
1191 | TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); | ||
1192 | TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); | ||
1193 | TRef len = recff_sbufx_len(J, trr, trw); | ||
1194 | TRef trn = recff_sbufx_checkint(J, rd, 1); | ||
1195 | len = emitir(IRTI(IR_MIN), len, trn); | ||
1196 | trr = emitir(IRT(IR_ADD, IRT_PTR), trr, len); | ||
1197 | recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr); | ||
1198 | } | ||
1199 | |||
1200 | static void LJ_FASTCALL recff_buffer_method_set(jit_State *J, RecordFFData *rd) | ||
1201 | { | ||
1202 | TRef ud = recff_sbufx_check(J, rd, 0); | ||
1203 | TRef trbuf = recff_sbufx_write(J, ud); | ||
1204 | TRef tr = J->base[1]; | ||
1205 | if (tref_isstr(tr)) { | ||
1206 | TRef trp = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0)); | ||
1207 | TRef len = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN); | ||
1208 | lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr); | ||
1209 | #if LJ_HASFFI | ||
1210 | } else if (tref_iscdata(tr)) { | ||
1211 | TRef trp = lj_crecord_topcvoid(J, tr, &rd->argv[1]); | ||
1212 | TRef len = recff_sbufx_checkint(J, rd, 2); | ||
1213 | lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr); | ||
1214 | #endif | ||
1215 | } /* else: Interpreter will throw. */ | ||
1216 | } | ||
1217 | |||
1218 | static void LJ_FASTCALL recff_buffer_method_put(jit_State *J, RecordFFData *rd) | ||
1219 | { | ||
1220 | TRef ud = recff_sbufx_check(J, rd, 0); | ||
1221 | TRef trbuf = recff_sbufx_write(J, ud); | ||
1222 | TRef tr; | ||
1223 | ptrdiff_t arg; | ||
1224 | if (!J->base[1]) return; | ||
1225 | for (arg = 1; (tr = J->base[arg]); arg++) { | ||
1226 | if (tref_isstr(tr)) { | ||
1227 | trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf, tr); | ||
1228 | } else if (tref_isnumber(tr)) { | ||
1229 | trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf, | ||
1230 | emitir(IRT(IR_TOSTR, IRT_STR), tr, | ||
1231 | tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT)); | ||
1232 | } else if (tref_isudata(tr)) { | ||
1233 | TRef ud2 = recff_sbufx_check(J, rd, arg); | ||
1234 | TRef trr = recff_sbufx_get_ptr(J, ud2, IRFL_SBUF_R); | ||
1235 | TRef trw = recff_sbufx_get_ptr(J, ud2, IRFL_SBUF_W); | ||
1236 | TRef len = recff_sbufx_len(J, trr, trw); | ||
1237 | emitir(IRTG(IR_NE, IRT_PGC), ud, ud2); | ||
1238 | trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, trr, len); | ||
1239 | } else { | ||
1240 | recff_nyiu(J, rd); | ||
1241 | } | ||
1242 | } | ||
1243 | emitir(IRT(IR_USE, IRT_NIL), trbuf, 0); | ||
1244 | } | ||
1245 | |||
1246 | static void LJ_FASTCALL recff_buffer_method_putf(jit_State *J, RecordFFData *rd) | ||
1247 | { | ||
1248 | TRef ud = recff_sbufx_check(J, rd, 0); | ||
1249 | TRef trbuf = recff_sbufx_write(J, ud); | ||
1250 | recff_format(J, rd, trbuf, 1); | ||
1251 | } | ||
1252 | |||
1253 | static void LJ_FASTCALL recff_buffer_method_get(jit_State *J, RecordFFData *rd) | ||
1254 | { | ||
1255 | TRef ud = recff_sbufx_check(J, rd, 0); | ||
1256 | TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); | ||
1257 | TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); | ||
1258 | TRef tr; | ||
1259 | ptrdiff_t arg; | ||
1260 | if (!J->base[1]) { J->base[1] = TREF_NIL; J->base[2] = 0; } | ||
1261 | for (arg = 0; (tr = J->base[arg+1]); arg++) { | ||
1262 | TRef len = recff_sbufx_len(J, trr, trw); | ||
1263 | if (tref_isnil(tr)) { | ||
1264 | J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len); | ||
1265 | trr = trw; | ||
1266 | } else { | ||
1267 | TRef trn = recff_sbufx_checkint(J, rd, arg+1); | ||
1268 | TRef tru; | ||
1269 | len = emitir(IRTI(IR_MIN), len, trn); | ||
1270 | tru = emitir(IRT(IR_ADD, IRT_PTR), trr, len); | ||
1271 | J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len); | ||
1272 | trr = tru; /* Doing the ADD before the SNEW generates better code. */ | ||
774 | } | 1273 | } |
1274 | recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr); | ||
1275 | } | ||
1276 | rd->nres = arg; | ||
1277 | } | ||
1278 | |||
1279 | static void LJ_FASTCALL recff_buffer_method___tostring(jit_State *J, RecordFFData *rd) | ||
1280 | { | ||
1281 | TRef ud = recff_sbufx_check(J, rd, 0); | ||
1282 | TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); | ||
1283 | TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); | ||
1284 | J->base[0] = emitir(IRT(IR_XSNEW, IRT_STR), trr, recff_sbufx_len(J, trr, trw)); | ||
1285 | } | ||
1286 | |||
1287 | static void LJ_FASTCALL recff_buffer_method___len(jit_State *J, RecordFFData *rd) | ||
1288 | { | ||
1289 | TRef ud = recff_sbufx_check(J, rd, 0); | ||
1290 | TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); | ||
1291 | TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); | ||
1292 | J->base[0] = recff_sbufx_len(J, trr, trw); | ||
1293 | } | ||
1294 | |||
1295 | #if LJ_HASFFI | ||
1296 | static void LJ_FASTCALL recff_buffer_method_putcdata(jit_State *J, RecordFFData *rd) | ||
1297 | { | ||
1298 | TRef ud = recff_sbufx_check(J, rd, 0); | ||
1299 | TRef trbuf = recff_sbufx_write(J, ud); | ||
1300 | TRef tr = lj_crecord_topcvoid(J, J->base[1], &rd->argv[1]); | ||
1301 | TRef len = recff_sbufx_checkint(J, rd, 2); | ||
1302 | trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, tr, len); | ||
1303 | emitir(IRT(IR_USE, IRT_NIL), trbuf, 0); | ||
1304 | } | ||
1305 | |||
1306 | static void LJ_FASTCALL recff_buffer_method_reserve(jit_State *J, RecordFFData *rd) | ||
1307 | { | ||
1308 | TRef ud = recff_sbufx_check(J, rd, 0); | ||
1309 | TRef trbuf = recff_sbufx_write(J, ud); | ||
1310 | TRef trsz = recff_sbufx_checkint(J, rd, 1); | ||
1311 | J->base[1] = lj_ir_call(J, IRCALL_lj_bufx_more, trbuf, trsz); | ||
1312 | J->base[0] = lj_crecord_topuint8(J, recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W)); | ||
1313 | rd->nres = 2; | ||
1314 | } | ||
1315 | |||
1316 | static void LJ_FASTCALL recff_buffer_method_commit(jit_State *J, RecordFFData *rd) | ||
1317 | { | ||
1318 | TRef ud = recff_sbufx_check(J, rd, 0); | ||
1319 | TRef len = recff_sbufx_checkint(J, rd, 1); | ||
1320 | TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); | ||
1321 | TRef tre = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_E); | ||
1322 | TRef left = emitir(IRT(IR_SUB, IRT_INTP), tre, trw); | ||
1323 | if (LJ_64) | ||
1324 | left = emitir(IRTI(IR_CONV), left, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE); | ||
1325 | emitir(IRTGI(IR_ULE), len, left); | ||
1326 | trw = emitir(IRT(IR_ADD, IRT_PTR), trw, len); | ||
1327 | recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trw); | ||
1328 | } | ||
1329 | |||
1330 | static void LJ_FASTCALL recff_buffer_method_ref(jit_State *J, RecordFFData *rd) | ||
1331 | { | ||
1332 | TRef ud = recff_sbufx_check(J, rd, 0); | ||
1333 | TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); | ||
1334 | TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); | ||
1335 | J->base[0] = lj_crecord_topuint8(J, trr); | ||
1336 | J->base[1] = recff_sbufx_len(J, trr, trw); | ||
1337 | rd->nres = 2; | ||
1338 | } | ||
1339 | #endif | ||
1340 | |||
1341 | static void LJ_FASTCALL recff_buffer_method_encode(jit_State *J, RecordFFData *rd) | ||
1342 | { | ||
1343 | TRef ud = recff_sbufx_check(J, rd, 0); | ||
1344 | TRef trbuf = recff_sbufx_write(J, ud); | ||
1345 | TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1); | ||
1346 | lj_ir_call(J, IRCALL_lj_serialize_put, trbuf, tmp); | ||
1347 | /* No IR_USE needed, since the call is a store. */ | ||
1348 | } | ||
1349 | |||
1350 | static void LJ_FASTCALL recff_buffer_method_decode(jit_State *J, RecordFFData *rd) | ||
1351 | { | ||
1352 | TRef ud = recff_sbufx_check(J, rd, 0); | ||
1353 | TRef trbuf = recff_sbufx_write(J, ud); | ||
1354 | TRef tmp = recff_tmpref(J, TREF_NIL, IRTMPREF_OUT1); | ||
1355 | TRef trr = lj_ir_call(J, IRCALL_lj_serialize_get, trbuf, tmp); | ||
1356 | IRType t = (IRType)lj_serialize_peektype(bufV(&rd->argv[0])); | ||
1357 | /* No IR_USE needed, since the call is a store. */ | ||
1358 | J->base[0] = lj_record_vload(J, tmp, 0, t); | ||
1359 | /* The sbx->r store must be after the VLOAD type check, in case it fails. */ | ||
1360 | recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr); | ||
1361 | } | ||
1362 | |||
1363 | static void LJ_FASTCALL recff_buffer_encode(jit_State *J, RecordFFData *rd) | ||
1364 | { | ||
1365 | TRef tmp = recff_tmpref(J, J->base[0], IRTMPREF_IN1); | ||
1366 | J->base[0] = lj_ir_call(J, IRCALL_lj_serialize_encode, tmp); | ||
1367 | /* IR_USE needed for IR_CALLA, because the encoder may throw non-OOM. */ | ||
1368 | emitir(IRT(IR_USE, IRT_NIL), J->base[0], 0); | ||
1369 | UNUSED(rd); | ||
1370 | } | ||
1371 | |||
1372 | static void LJ_FASTCALL recff_buffer_decode(jit_State *J, RecordFFData *rd) | ||
1373 | { | ||
1374 | if (tvisstr(&rd->argv[0])) { | ||
1375 | GCstr *str = strV(&rd->argv[0]); | ||
1376 | SBufExt sbx; | ||
1377 | IRType t; | ||
1378 | TRef tmp = recff_tmpref(J, TREF_NIL, IRTMPREF_OUT1); | ||
1379 | TRef tr = lj_ir_call(J, IRCALL_lj_serialize_decode, tmp, J->base[0]); | ||
1380 | /* IR_USE needed for IR_CALLA, because the decoder may throw non-OOM. | ||
1381 | ** That's why IRCALL_lj_serialize_decode needs a fake INT result. | ||
1382 | */ | ||
1383 | emitir(IRT(IR_USE, IRT_NIL), tr, 0); | ||
1384 | memset(&sbx, 0, sizeof(SBufExt)); | ||
1385 | lj_bufx_set_cow(J->L, &sbx, strdata(str), str->len); | ||
1386 | t = (IRType)lj_serialize_peektype(&sbx); | ||
1387 | J->base[0] = lj_record_vload(J, tmp, 0, t); | ||
775 | } /* else: Interpreter will throw. */ | 1388 | } /* else: Interpreter will throw. */ |
776 | } | 1389 | } |
777 | 1390 | ||
1391 | #endif | ||
1392 | |||
1393 | /* -- Table library fast functions ---------------------------------------- */ | ||
1394 | |||
778 | static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) | 1395 | static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) |
779 | { | 1396 | { |
780 | RecordIndex ix; | 1397 | RecordIndex ix; |
@@ -783,7 +1400,7 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) | |||
783 | rd->nres = 0; | 1400 | rd->nres = 0; |
784 | if (tref_istab(ix.tab) && ix.val) { | 1401 | if (tref_istab(ix.tab) && ix.val) { |
785 | if (!J->base[2]) { /* Simple push: t[#t+1] = v */ | 1402 | if (!J->base[2]) { /* Simple push: t[#t+1] = v */ |
786 | TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, ix.tab); | 1403 | TRef trlen = emitir(IRTI(IR_ALEN), ix.tab, TREF_NIL); |
787 | GCtab *t = tabV(&rd->argv[0]); | 1404 | GCtab *t = tabV(&rd->argv[0]); |
788 | ix.key = emitir(IRTI(IR_ADD), trlen, lj_ir_kint(J, 1)); | 1405 | ix.key = emitir(IRTI(IR_ADD), trlen, lj_ir_kint(J, 1)); |
789 | settabV(J->L, &ix.tabv, t); | 1406 | settabV(J->L, &ix.tabv, t); |
@@ -791,11 +1408,49 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) | |||
791 | ix.idxchain = 0; | 1408 | ix.idxchain = 0; |
792 | lj_record_idx(J, &ix); /* Set new value. */ | 1409 | lj_record_idx(J, &ix); /* Set new value. */ |
793 | } else { /* Complex case: insert in the middle. */ | 1410 | } else { /* Complex case: insert in the middle. */ |
794 | recff_nyiu(J); | 1411 | recff_nyiu(J, rd); |
1412 | return; | ||
795 | } | 1413 | } |
796 | } /* else: Interpreter will throw. */ | 1414 | } /* else: Interpreter will throw. */ |
797 | } | 1415 | } |
798 | 1416 | ||
1417 | static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd) | ||
1418 | { | ||
1419 | TRef tab = J->base[0]; | ||
1420 | if (tref_istab(tab)) { | ||
1421 | TRef sep = !tref_isnil(J->base[1]) ? | ||
1422 | lj_ir_tostr(J, J->base[1]) : lj_ir_knull(J, IRT_STR); | ||
1423 | TRef tri = (J->base[1] && !tref_isnil(J->base[2])) ? | ||
1424 | lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1); | ||
1425 | TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ? | ||
1426 | lj_opt_narrow_toint(J, J->base[3]) : | ||
1427 | emitir(IRTI(IR_ALEN), tab, TREF_NIL); | ||
1428 | TRef hdr = recff_bufhdr(J); | ||
1429 | TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre); | ||
1430 | emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL)); | ||
1431 | J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); | ||
1432 | } /* else: Interpreter will throw. */ | ||
1433 | UNUSED(rd); | ||
1434 | } | ||
1435 | |||
1436 | static void LJ_FASTCALL recff_table_new(jit_State *J, RecordFFData *rd) | ||
1437 | { | ||
1438 | TRef tra = lj_opt_narrow_toint(J, J->base[0]); | ||
1439 | TRef trh = lj_opt_narrow_toint(J, J->base[1]); | ||
1440 | J->base[0] = lj_ir_call(J, IRCALL_lj_tab_new_ah, tra, trh); | ||
1441 | UNUSED(rd); | ||
1442 | } | ||
1443 | |||
1444 | static void LJ_FASTCALL recff_table_clear(jit_State *J, RecordFFData *rd) | ||
1445 | { | ||
1446 | TRef tr = J->base[0]; | ||
1447 | if (tref_istab(tr)) { | ||
1448 | rd->nres = 0; | ||
1449 | lj_ir_call(J, IRCALL_lj_tab_clear, tr); | ||
1450 | J->needsnap = 1; | ||
1451 | } /* else: Interpreter will throw. */ | ||
1452 | } | ||
1453 | |||
799 | /* -- I/O library fast functions ------------------------------------------ */ | 1454 | /* -- I/O library fast functions ------------------------------------------ */ |
800 | 1455 | ||
801 | /* Get FILE* for I/O function. Any I/O error aborts recording, so there's | 1456 | /* Get FILE* for I/O function. Any I/O error aborts recording, so there's |
@@ -805,8 +1460,7 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id) | |||
805 | { | 1460 | { |
806 | TRef tr, ud, fp; | 1461 | TRef tr, ud, fp; |
807 | if (id) { /* io.func() */ | 1462 | if (id) { /* io.func() */ |
808 | tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); | 1463 | ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id])); |
809 | ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); | ||
810 | } else { /* fp:method() */ | 1464 | } else { /* fp:method() */ |
811 | ud = J->base[0]; | 1465 | ud = J->base[0]; |
812 | if (!tref_isudata(ud)) | 1466 | if (!tref_isudata(ud)) |
@@ -828,10 +1482,13 @@ static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd) | |||
828 | ptrdiff_t i = rd->data == 0 ? 1 : 0; | 1482 | ptrdiff_t i = rd->data == 0 ? 1 : 0; |
829 | for (; J->base[i]; i++) { | 1483 | for (; J->base[i]; i++) { |
830 | TRef str = lj_ir_tostr(J, J->base[i]); | 1484 | TRef str = lj_ir_tostr(J, J->base[i]); |
831 | TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero); | 1485 | TRef buf = emitir(IRT(IR_STRREF, IRT_PGC), str, zero); |
832 | TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN); | 1486 | TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN); |
833 | if (tref_isk(len) && IR(tref_ref(len))->i == 1) { | 1487 | if (tref_isk(len) && IR(tref_ref(len))->i == 1) { |
834 | TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY); | 1488 | IRIns *irs = IR(tref_ref(str)); |
1489 | TRef tr = (irs->o == IR_TOSTR && irs->op2 == IRTOSTR_CHAR) ? | ||
1490 | irs->op1 : | ||
1491 | emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY); | ||
835 | tr = lj_ir_call(J, IRCALL_fputc, tr, fp); | 1492 | tr = lj_ir_call(J, IRCALL_fputc, tr, fp); |
836 | if (results_wanted(J) != 0) /* Check result only if not ignored. */ | 1493 | if (results_wanted(J) != 0) /* Check result only if not ignored. */ |
837 | emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1)); | 1494 | emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1)); |
@@ -853,6 +1510,28 @@ static void LJ_FASTCALL recff_io_flush(jit_State *J, RecordFFData *rd) | |||
853 | J->base[0] = TREF_TRUE; | 1510 | J->base[0] = TREF_TRUE; |
854 | } | 1511 | } |
855 | 1512 | ||
1513 | /* -- Debug library fast functions ---------------------------------------- */ | ||
1514 | |||
1515 | static void LJ_FASTCALL recff_debug_getmetatable(jit_State *J, RecordFFData *rd) | ||
1516 | { | ||
1517 | GCtab *mt; | ||
1518 | TRef mtref; | ||
1519 | TRef tr = J->base[0]; | ||
1520 | if (tref_istab(tr)) { | ||
1521 | mt = tabref(tabV(&rd->argv[0])->metatable); | ||
1522 | mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_TAB_META); | ||
1523 | } else if (tref_isudata(tr)) { | ||
1524 | mt = tabref(udataV(&rd->argv[0])->metatable); | ||
1525 | mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_UDATA_META); | ||
1526 | } else { | ||
1527 | mt = tabref(basemt_obj(J2G(J), &rd->argv[0])); | ||
1528 | J->base[0] = mt ? lj_ir_ktab(J, mt) : TREF_NIL; | ||
1529 | return; | ||
1530 | } | ||
1531 | emitir(IRTG(mt ? IR_NE : IR_EQ, IRT_TAB), mtref, lj_ir_knull(J, IRT_TAB)); | ||
1532 | J->base[0] = mt ? mtref : TREF_NIL; | ||
1533 | } | ||
1534 | |||
856 | /* -- Record calls to fast functions -------------------------------------- */ | 1535 | /* -- Record calls to fast functions -------------------------------------- */ |
857 | 1536 | ||
858 | #include "lj_recdef.h" | 1537 | #include "lj_recdef.h" |
diff --git a/src/lj_frame.h b/src/lj_frame.h index 33bd8e3e..aa1dc11a 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h | |||
@@ -11,7 +11,16 @@ | |||
11 | 11 | ||
12 | /* -- Lua stack frame ----------------------------------------------------- */ | 12 | /* -- Lua stack frame ----------------------------------------------------- */ |
13 | 13 | ||
14 | /* Frame type markers in callee function slot (callee base-1). */ | 14 | /* Frame type markers in LSB of PC (4-byte aligned) or delta (8-byte aligned: |
15 | ** | ||
16 | ** PC 00 Lua frame | ||
17 | ** delta 001 C frame | ||
18 | ** delta 010 Continuation frame | ||
19 | ** delta 011 Lua vararg frame | ||
20 | ** delta 101 cpcall() frame | ||
21 | ** delta 110 ff pcall() frame | ||
22 | ** delta 111 ff pcall() frame with active hook | ||
23 | */ | ||
15 | enum { | 24 | enum { |
16 | FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG, | 25 | FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG, |
17 | FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH | 26 | FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH |
@@ -21,9 +30,47 @@ enum { | |||
21 | #define FRAME_TYPEP (FRAME_TYPE|FRAME_P) | 30 | #define FRAME_TYPEP (FRAME_TYPE|FRAME_P) |
22 | 31 | ||
23 | /* Macros to access and modify Lua frames. */ | 32 | /* Macros to access and modify Lua frames. */ |
33 | #if LJ_FR2 | ||
34 | /* Two-slot frame info, required for 64 bit PC/GCRef: | ||
35 | ** | ||
36 | ** base-2 base-1 | base base+1 ... | ||
37 | ** [func PC/delta/ft] | [slots ...] | ||
38 | ** ^-- frame | ^-- base ^-- top | ||
39 | ** | ||
40 | ** Continuation frames: | ||
41 | ** | ||
42 | ** base-4 base-3 base-2 base-1 | base base+1 ... | ||
43 | ** [cont PC ] [func PC/delta/ft] | [slots ...] | ||
44 | ** ^-- frame | ^-- base ^-- top | ||
45 | */ | ||
46 | #define frame_gc(f) (gcval((f)-1)) | ||
47 | #define frame_ftsz(f) ((ptrdiff_t)(f)->ftsz) | ||
48 | #define frame_pc(f) ((const BCIns *)frame_ftsz(f)) | ||
49 | #define setframe_gc(f, p, tp) (setgcVraw((f), (p), (tp))) | ||
50 | #define setframe_ftsz(f, sz) ((f)->ftsz = (sz)) | ||
51 | #define setframe_pc(f, pc) ((f)->ftsz = (int64_t)(intptr_t)(pc)) | ||
52 | #else | ||
53 | /* One-slot frame info, sufficient for 32 bit PC/GCRef: | ||
54 | ** | ||
55 | ** base-1 | base base+1 ... | ||
56 | ** lo hi | | ||
57 | ** [func | PC/delta/ft] | [slots ...] | ||
58 | ** ^-- frame | ^-- base ^-- top | ||
59 | ** | ||
60 | ** Continuation frames: | ||
61 | ** | ||
62 | ** base-2 base-1 | base base+1 ... | ||
63 | ** lo hi lo hi | | ||
64 | ** [cont | PC] [func | PC/delta/ft] | [slots ...] | ||
65 | ** ^-- frame | ^-- base ^-- top | ||
66 | */ | ||
24 | #define frame_gc(f) (gcref((f)->fr.func)) | 67 | #define frame_gc(f) (gcref((f)->fr.func)) |
25 | #define frame_func(f) (&frame_gc(f)->fn) | 68 | #define frame_ftsz(f) ((ptrdiff_t)(f)->fr.tp.ftsz) |
26 | #define frame_ftsz(f) ((f)->fr.tp.ftsz) | 69 | #define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns)) |
70 | #define setframe_gc(f, p, tp) (setgcref((f)->fr.func, (p)), UNUSED(tp)) | ||
71 | #define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (int32_t)(sz)) | ||
72 | #define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc))) | ||
73 | #endif | ||
27 | 74 | ||
28 | #define frame_type(f) (frame_ftsz(f) & FRAME_TYPE) | 75 | #define frame_type(f) (frame_ftsz(f) & FRAME_TYPE) |
29 | #define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP) | 76 | #define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP) |
@@ -33,33 +80,53 @@ enum { | |||
33 | #define frame_isvarg(f) (frame_typep(f) == FRAME_VARG) | 80 | #define frame_isvarg(f) (frame_typep(f) == FRAME_VARG) |
34 | #define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL) | 81 | #define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL) |
35 | 82 | ||
36 | #define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns)) | 83 | #define frame_func(f) (&frame_gc(f)->fn) |
84 | #define frame_delta(f) (frame_ftsz(f) >> 3) | ||
85 | #define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP) | ||
86 | |||
87 | enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ | ||
88 | |||
89 | #if LJ_FR2 | ||
90 | #define frame_contpc(f) (frame_pc((f)-2)) | ||
91 | #define frame_contv(f) (((f)-3)->u64) | ||
92 | #else | ||
37 | #define frame_contpc(f) (frame_pc((f)-1)) | 93 | #define frame_contpc(f) (frame_pc((f)-1)) |
38 | #if LJ_64 | 94 | #define frame_contv(f) (((f)-1)->u32.lo) |
95 | #endif | ||
96 | #if LJ_FR2 | ||
97 | #define frame_contf(f) ((ASMFunction)(uintptr_t)((f)-3)->u64) | ||
98 | #elif LJ_64 | ||
39 | #define frame_contf(f) \ | 99 | #define frame_contf(f) \ |
40 | ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \ | 100 | ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \ |
41 | (intptr_t)(int32_t)((f)-1)->u32.lo)) | 101 | (intptr_t)(int32_t)((f)-1)->u32.lo)) |
42 | #else | 102 | #else |
43 | #define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void)) | 103 | #define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void)) |
44 | #endif | 104 | #endif |
45 | #define frame_delta(f) (frame_ftsz(f) >> 3) | 105 | #define frame_iscont_fficb(f) \ |
46 | #define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP) | 106 | (LJ_HASFFI && frame_contv(f) == LJ_CONT_FFI_CALLBACK) |
47 | 107 | ||
48 | #define frame_prevl(f) ((f) - (1+bc_a(frame_pc(f)[-1]))) | 108 | #define frame_prevl(f) ((f) - (1+LJ_FR2+bc_a(frame_pc(f)[-1]))) |
49 | #define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f))) | 109 | #define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f))) |
50 | #define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f)) | 110 | #define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f)) |
51 | /* Note: this macro does not skip over FRAME_VARG. */ | 111 | /* Note: this macro does not skip over FRAME_VARG. */ |
52 | 112 | ||
53 | #define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc))) | ||
54 | #define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (sz)) | ||
55 | #define setframe_gc(f, p) (setgcref((f)->fr.func, (p))) | ||
56 | |||
57 | /* -- C stack frame ------------------------------------------------------- */ | 113 | /* -- C stack frame ------------------------------------------------------- */ |
58 | 114 | ||
59 | /* Macros to access and modify the C stack frame chain. */ | 115 | /* Macros to access and modify the C stack frame chain. */ |
60 | 116 | ||
61 | /* These definitions must match with the arch-specific *.dasc files. */ | 117 | /* These definitions must match with the arch-specific *.dasc files. */ |
62 | #if LJ_TARGET_X86 | 118 | #if LJ_TARGET_X86 |
119 | #if LJ_ABI_WIN | ||
120 | #define CFRAME_OFS_ERRF (19*4) | ||
121 | #define CFRAME_OFS_NRES (18*4) | ||
122 | #define CFRAME_OFS_PREV (17*4) | ||
123 | #define CFRAME_OFS_L (16*4) | ||
124 | #define CFRAME_OFS_SEH (9*4) | ||
125 | #define CFRAME_OFS_PC (6*4) | ||
126 | #define CFRAME_OFS_MULTRES (5*4) | ||
127 | #define CFRAME_SIZE (16*4) | ||
128 | #define CFRAME_SHIFT_MULTRES 0 | ||
129 | #else | ||
63 | #define CFRAME_OFS_ERRF (15*4) | 130 | #define CFRAME_OFS_ERRF (15*4) |
64 | #define CFRAME_OFS_NRES (14*4) | 131 | #define CFRAME_OFS_NRES (14*4) |
65 | #define CFRAME_OFS_PREV (13*4) | 132 | #define CFRAME_OFS_PREV (13*4) |
@@ -68,24 +135,41 @@ enum { | |||
68 | #define CFRAME_OFS_MULTRES (5*4) | 135 | #define CFRAME_OFS_MULTRES (5*4) |
69 | #define CFRAME_SIZE (12*4) | 136 | #define CFRAME_SIZE (12*4) |
70 | #define CFRAME_SHIFT_MULTRES 0 | 137 | #define CFRAME_SHIFT_MULTRES 0 |
138 | #endif | ||
71 | #elif LJ_TARGET_X64 | 139 | #elif LJ_TARGET_X64 |
72 | #if LJ_ABI_WIN | 140 | #if LJ_ABI_WIN |
73 | #define CFRAME_OFS_PREV (13*8) | 141 | #define CFRAME_OFS_PREV (13*8) |
142 | #if LJ_GC64 | ||
143 | #define CFRAME_OFS_PC (12*8) | ||
144 | #define CFRAME_OFS_L (11*8) | ||
145 | #define CFRAME_OFS_ERRF (21*4) | ||
146 | #define CFRAME_OFS_NRES (20*4) | ||
147 | #define CFRAME_OFS_MULTRES (8*4) | ||
148 | #else | ||
74 | #define CFRAME_OFS_PC (25*4) | 149 | #define CFRAME_OFS_PC (25*4) |
75 | #define CFRAME_OFS_L (24*4) | 150 | #define CFRAME_OFS_L (24*4) |
76 | #define CFRAME_OFS_ERRF (23*4) | 151 | #define CFRAME_OFS_ERRF (23*4) |
77 | #define CFRAME_OFS_NRES (22*4) | 152 | #define CFRAME_OFS_NRES (22*4) |
78 | #define CFRAME_OFS_MULTRES (21*4) | 153 | #define CFRAME_OFS_MULTRES (21*4) |
154 | #endif | ||
79 | #define CFRAME_SIZE (10*8) | 155 | #define CFRAME_SIZE (10*8) |
80 | #define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8) | 156 | #define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8) |
81 | #define CFRAME_SHIFT_MULTRES 0 | 157 | #define CFRAME_SHIFT_MULTRES 0 |
82 | #else | 158 | #else |
83 | #define CFRAME_OFS_PREV (4*8) | 159 | #define CFRAME_OFS_PREV (4*8) |
160 | #if LJ_GC64 | ||
161 | #define CFRAME_OFS_PC (3*8) | ||
162 | #define CFRAME_OFS_L (2*8) | ||
163 | #define CFRAME_OFS_ERRF (3*4) | ||
164 | #define CFRAME_OFS_NRES (2*4) | ||
165 | #define CFRAME_OFS_MULTRES (0*4) | ||
166 | #else | ||
84 | #define CFRAME_OFS_PC (7*4) | 167 | #define CFRAME_OFS_PC (7*4) |
85 | #define CFRAME_OFS_L (6*4) | 168 | #define CFRAME_OFS_L (6*4) |
86 | #define CFRAME_OFS_ERRF (5*4) | 169 | #define CFRAME_OFS_ERRF (5*4) |
87 | #define CFRAME_OFS_NRES (4*4) | 170 | #define CFRAME_OFS_NRES (4*4) |
88 | #define CFRAME_OFS_MULTRES (1*4) | 171 | #define CFRAME_OFS_MULTRES (1*4) |
172 | #endif | ||
89 | #if LJ_NO_UNWIND | 173 | #if LJ_NO_UNWIND |
90 | #define CFRAME_SIZE (12*8) | 174 | #define CFRAME_SIZE (12*8) |
91 | #else | 175 | #else |
@@ -107,6 +191,15 @@ enum { | |||
107 | #define CFRAME_SIZE 64 | 191 | #define CFRAME_SIZE 64 |
108 | #endif | 192 | #endif |
109 | #define CFRAME_SHIFT_MULTRES 3 | 193 | #define CFRAME_SHIFT_MULTRES 3 |
194 | #elif LJ_TARGET_ARM64 | ||
195 | #define CFRAME_OFS_ERRF 36 | ||
196 | #define CFRAME_OFS_NRES 40 | ||
197 | #define CFRAME_OFS_PREV 0 | ||
198 | #define CFRAME_OFS_L 16 | ||
199 | #define CFRAME_OFS_PC 8 | ||
200 | #define CFRAME_OFS_MULTRES 32 | ||
201 | #define CFRAME_SIZE 208 | ||
202 | #define CFRAME_SHIFT_MULTRES 3 | ||
110 | #elif LJ_TARGET_PPC | 203 | #elif LJ_TARGET_PPC |
111 | #if LJ_TARGET_XBOX360 | 204 | #if LJ_TARGET_XBOX360 |
112 | #define CFRAME_OFS_ERRF 424 | 205 | #define CFRAME_OFS_ERRF 424 |
@@ -117,7 +210,7 @@ enum { | |||
117 | #define CFRAME_OFS_MULTRES 408 | 210 | #define CFRAME_OFS_MULTRES 408 |
118 | #define CFRAME_SIZE 384 | 211 | #define CFRAME_SIZE 384 |
119 | #define CFRAME_SHIFT_MULTRES 3 | 212 | #define CFRAME_SHIFT_MULTRES 3 |
120 | #elif LJ_ARCH_PPC64 | 213 | #elif LJ_ARCH_PPC32ON64 |
121 | #define CFRAME_OFS_ERRF 472 | 214 | #define CFRAME_OFS_ERRF 472 |
122 | #define CFRAME_OFS_NRES 468 | 215 | #define CFRAME_OFS_NRES 468 |
123 | #define CFRAME_OFS_PREV 448 | 216 | #define CFRAME_OFS_PREV 448 |
@@ -133,26 +226,43 @@ enum { | |||
133 | #define CFRAME_OFS_L 36 | 226 | #define CFRAME_OFS_L 36 |
134 | #define CFRAME_OFS_PC 32 | 227 | #define CFRAME_OFS_PC 32 |
135 | #define CFRAME_OFS_MULTRES 28 | 228 | #define CFRAME_OFS_MULTRES 28 |
136 | #define CFRAME_SIZE 272 | 229 | #define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128) |
137 | #define CFRAME_SHIFT_MULTRES 3 | 230 | #define CFRAME_SHIFT_MULTRES 3 |
138 | #endif | 231 | #endif |
139 | #elif LJ_TARGET_PPCSPE | 232 | #elif LJ_TARGET_MIPS32 |
140 | #define CFRAME_OFS_ERRF 28 | 233 | #if LJ_ARCH_HASFPU |
141 | #define CFRAME_OFS_NRES 24 | ||
142 | #define CFRAME_OFS_PREV 20 | ||
143 | #define CFRAME_OFS_L 16 | ||
144 | #define CFRAME_OFS_PC 12 | ||
145 | #define CFRAME_OFS_MULTRES 8 | ||
146 | #define CFRAME_SIZE 184 | ||
147 | #define CFRAME_SHIFT_MULTRES 3 | ||
148 | #elif LJ_TARGET_MIPS | ||
149 | #define CFRAME_OFS_ERRF 124 | 234 | #define CFRAME_OFS_ERRF 124 |
150 | #define CFRAME_OFS_NRES 120 | 235 | #define CFRAME_OFS_NRES 120 |
151 | #define CFRAME_OFS_PREV 116 | 236 | #define CFRAME_OFS_PREV 116 |
152 | #define CFRAME_OFS_L 112 | 237 | #define CFRAME_OFS_L 112 |
238 | #define CFRAME_SIZE 112 | ||
239 | #else | ||
240 | #define CFRAME_OFS_ERRF 76 | ||
241 | #define CFRAME_OFS_NRES 72 | ||
242 | #define CFRAME_OFS_PREV 68 | ||
243 | #define CFRAME_OFS_L 64 | ||
244 | #define CFRAME_SIZE 64 | ||
245 | #endif | ||
153 | #define CFRAME_OFS_PC 20 | 246 | #define CFRAME_OFS_PC 20 |
154 | #define CFRAME_OFS_MULTRES 16 | 247 | #define CFRAME_OFS_MULTRES 16 |
155 | #define CFRAME_SIZE 112 | 248 | #define CFRAME_SHIFT_MULTRES 3 |
249 | #elif LJ_TARGET_MIPS64 | ||
250 | #if LJ_ARCH_HASFPU | ||
251 | #define CFRAME_OFS_ERRF 188 | ||
252 | #define CFRAME_OFS_NRES 184 | ||
253 | #define CFRAME_OFS_PREV 176 | ||
254 | #define CFRAME_OFS_L 168 | ||
255 | #define CFRAME_OFS_PC 160 | ||
256 | #define CFRAME_SIZE 192 | ||
257 | #else | ||
258 | #define CFRAME_OFS_ERRF 124 | ||
259 | #define CFRAME_OFS_NRES 120 | ||
260 | #define CFRAME_OFS_PREV 112 | ||
261 | #define CFRAME_OFS_L 104 | ||
262 | #define CFRAME_OFS_PC 96 | ||
263 | #define CFRAME_SIZE 128 | ||
264 | #endif | ||
265 | #define CFRAME_OFS_MULTRES 0 | ||
156 | #define CFRAME_SHIFT_MULTRES 3 | 266 | #define CFRAME_SHIFT_MULTRES 3 |
157 | #else | 267 | #else |
158 | #error "Missing CFRAME_* definitions for this architecture" | 268 | #error "Missing CFRAME_* definitions for this architecture" |
diff --git a/src/lj_func.c b/src/lj_func.c index 5df652d8..9795a771 100644 --- a/src/lj_func.c +++ b/src/lj_func.c | |||
@@ -24,9 +24,11 @@ void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt) | |||
24 | 24 | ||
25 | /* -- Upvalues ------------------------------------------------------------ */ | 25 | /* -- Upvalues ------------------------------------------------------------ */ |
26 | 26 | ||
27 | static void unlinkuv(GCupval *uv) | 27 | static void unlinkuv(global_State *g, GCupval *uv) |
28 | { | 28 | { |
29 | lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); | 29 | UNUSED(g); |
30 | lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv, | ||
31 | "broken upvalue chain"); | ||
30 | setgcrefr(uvnext(uv)->prev, uv->prev); | 32 | setgcrefr(uvnext(uv)->prev, uv->prev); |
31 | setgcrefr(uvprev(uv)->next, uv->next); | 33 | setgcrefr(uvprev(uv)->next, uv->next); |
32 | } | 34 | } |
@@ -40,7 +42,7 @@ static GCupval *func_finduv(lua_State *L, TValue *slot) | |||
40 | GCupval *uv; | 42 | GCupval *uv; |
41 | /* Search the sorted list of open upvalues. */ | 43 | /* Search the sorted list of open upvalues. */ |
42 | while (gcref(*pp) != NULL && uvval((p = gco2uv(gcref(*pp)))) >= slot) { | 44 | while (gcref(*pp) != NULL && uvval((p = gco2uv(gcref(*pp)))) >= slot) { |
43 | lua_assert(!p->closed && uvval(p) != &p->tv); | 45 | lj_assertG(!p->closed && uvval(p) != &p->tv, "closed upvalue in chain"); |
44 | if (uvval(p) == slot) { /* Found open upvalue pointing to same slot? */ | 46 | if (uvval(p) == slot) { /* Found open upvalue pointing to same slot? */ |
45 | if (isdead(g, obj2gco(p))) /* Resurrect it, if it's dead. */ | 47 | if (isdead(g, obj2gco(p))) /* Resurrect it, if it's dead. */ |
46 | flipwhite(obj2gco(p)); | 48 | flipwhite(obj2gco(p)); |
@@ -61,7 +63,8 @@ static GCupval *func_finduv(lua_State *L, TValue *slot) | |||
61 | setgcrefr(uv->next, g->uvhead.next); | 63 | setgcrefr(uv->next, g->uvhead.next); |
62 | setgcref(uvnext(uv)->prev, obj2gco(uv)); | 64 | setgcref(uvnext(uv)->prev, obj2gco(uv)); |
63 | setgcref(g->uvhead.next, obj2gco(uv)); | 65 | setgcref(g->uvhead.next, obj2gco(uv)); |
64 | lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); | 66 | lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv, |
67 | "broken upvalue chain"); | ||
65 | return uv; | 68 | return uv; |
66 | } | 69 | } |
67 | 70 | ||
@@ -84,12 +87,13 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level) | |||
84 | while (gcref(L->openupval) != NULL && | 87 | while (gcref(L->openupval) != NULL && |
85 | uvval((uv = gco2uv(gcref(L->openupval)))) >= level) { | 88 | uvval((uv = gco2uv(gcref(L->openupval)))) >= level) { |
86 | GCobj *o = obj2gco(uv); | 89 | GCobj *o = obj2gco(uv); |
87 | lua_assert(!isblack(o) && !uv->closed && uvval(uv) != &uv->tv); | 90 | lj_assertG(!isblack(o), "bad black upvalue"); |
91 | lj_assertG(!uv->closed && uvval(uv) != &uv->tv, "closed upvalue in chain"); | ||
88 | setgcrefr(L->openupval, uv->nextgc); /* No longer in open list. */ | 92 | setgcrefr(L->openupval, uv->nextgc); /* No longer in open list. */ |
89 | if (isdead(g, o)) { | 93 | if (isdead(g, o)) { |
90 | lj_func_freeuv(g, uv); | 94 | lj_func_freeuv(g, uv); |
91 | } else { | 95 | } else { |
92 | unlinkuv(uv); | 96 | unlinkuv(g, uv); |
93 | lj_gc_closeuv(g, uv); | 97 | lj_gc_closeuv(g, uv); |
94 | } | 98 | } |
95 | } | 99 | } |
@@ -98,7 +102,7 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level) | |||
98 | void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv) | 102 | void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv) |
99 | { | 103 | { |
100 | if (!uv->closed) | 104 | if (!uv->closed) |
101 | unlinkuv(uv); | 105 | unlinkuv(g, uv); |
102 | lj_mem_freet(g, uv); | 106 | lj_mem_freet(g, uv); |
103 | } | 107 | } |
104 | 108 | ||
diff --git a/src/lj_gc.c b/src/lj_gc.c index 899b4e02..b35a0d44 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include "lj_obj.h" | 12 | #include "lj_obj.h" |
13 | #include "lj_gc.h" | 13 | #include "lj_gc.h" |
14 | #include "lj_err.h" | 14 | #include "lj_err.h" |
15 | #include "lj_buf.h" | ||
15 | #include "lj_str.h" | 16 | #include "lj_str.h" |
16 | #include "lj_tab.h" | 17 | #include "lj_tab.h" |
17 | #include "lj_func.h" | 18 | #include "lj_func.h" |
@@ -24,6 +25,7 @@ | |||
24 | #include "lj_cdata.h" | 25 | #include "lj_cdata.h" |
25 | #endif | 26 | #endif |
26 | #include "lj_trace.h" | 27 | #include "lj_trace.h" |
28 | #include "lj_dispatch.h" | ||
27 | #include "lj_vm.h" | 29 | #include "lj_vm.h" |
28 | 30 | ||
29 | #define GCSTEPSIZE 1024u | 31 | #define GCSTEPSIZE 1024u |
@@ -40,7 +42,8 @@ | |||
40 | 42 | ||
41 | /* Mark a TValue (if needed). */ | 43 | /* Mark a TValue (if needed). */ |
42 | #define gc_marktv(g, tv) \ | 44 | #define gc_marktv(g, tv) \ |
43 | { lua_assert(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct)); \ | 45 | { lj_assertG(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct), \ |
46 | "TValue and GC type mismatch"); \ | ||
44 | if (tviswhite(tv)) gc_mark(g, gcV(tv)); } | 47 | if (tviswhite(tv)) gc_mark(g, gcV(tv)); } |
45 | 48 | ||
46 | /* Mark a GCobj (if needed). */ | 49 | /* Mark a GCobj (if needed). */ |
@@ -54,21 +57,32 @@ | |||
54 | static void gc_mark(global_State *g, GCobj *o) | 57 | static void gc_mark(global_State *g, GCobj *o) |
55 | { | 58 | { |
56 | int gct = o->gch.gct; | 59 | int gct = o->gch.gct; |
57 | lua_assert(iswhite(o) && !isdead(g, o)); | 60 | lj_assertG(iswhite(o), "mark of non-white object"); |
61 | lj_assertG(!isdead(g, o), "mark of dead object"); | ||
58 | white2gray(o); | 62 | white2gray(o); |
59 | if (LJ_UNLIKELY(gct == ~LJ_TUDATA)) { | 63 | if (LJ_UNLIKELY(gct == ~LJ_TUDATA)) { |
60 | GCtab *mt = tabref(gco2ud(o)->metatable); | 64 | GCtab *mt = tabref(gco2ud(o)->metatable); |
61 | gray2black(o); /* Userdata are never gray. */ | 65 | gray2black(o); /* Userdata are never gray. */ |
62 | if (mt) gc_markobj(g, mt); | 66 | if (mt) gc_markobj(g, mt); |
63 | gc_markobj(g, tabref(gco2ud(o)->env)); | 67 | gc_markobj(g, tabref(gco2ud(o)->env)); |
68 | if (LJ_HASBUFFER && gco2ud(o)->udtype == UDTYPE_BUFFER) { | ||
69 | SBufExt *sbx = (SBufExt *)uddata(gco2ud(o)); | ||
70 | if (sbufiscow(sbx) && gcref(sbx->cowref)) | ||
71 | gc_markobj(g, gcref(sbx->cowref)); | ||
72 | if (gcref(sbx->dict_str)) | ||
73 | gc_markobj(g, gcref(sbx->dict_str)); | ||
74 | if (gcref(sbx->dict_mt)) | ||
75 | gc_markobj(g, gcref(sbx->dict_mt)); | ||
76 | } | ||
64 | } else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) { | 77 | } else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) { |
65 | GCupval *uv = gco2uv(o); | 78 | GCupval *uv = gco2uv(o); |
66 | gc_marktv(g, uvval(uv)); | 79 | gc_marktv(g, uvval(uv)); |
67 | if (uv->closed) | 80 | if (uv->closed) |
68 | gray2black(o); /* Closed upvalues are never gray. */ | 81 | gray2black(o); /* Closed upvalues are never gray. */ |
69 | } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) { | 82 | } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) { |
70 | lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB || | 83 | lj_assertG(gct == ~LJ_TFUNC || gct == ~LJ_TTAB || |
71 | gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO); | 84 | gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE, |
85 | "bad GC type %d", gct); | ||
72 | setgcrefr(o->gch.gclist, g->gc.gray); | 86 | setgcrefr(o->gch.gclist, g->gc.gray); |
73 | setgcref(g->gc.gray, o); | 87 | setgcref(g->gc.gray, o); |
74 | } | 88 | } |
@@ -101,7 +115,8 @@ static void gc_mark_uv(global_State *g) | |||
101 | { | 115 | { |
102 | GCupval *uv; | 116 | GCupval *uv; |
103 | for (uv = uvnext(&g->uvhead); uv != &g->uvhead; uv = uvnext(uv)) { | 117 | for (uv = uvnext(&g->uvhead); uv != &g->uvhead; uv = uvnext(uv)) { |
104 | lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); | 118 | lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv, |
119 | "broken upvalue chain"); | ||
105 | if (isgray(obj2gco(uv))) | 120 | if (isgray(obj2gco(uv))) |
106 | gc_marktv(g, uvval(uv)); | 121 | gc_marktv(g, uvval(uv)); |
107 | } | 122 | } |
@@ -196,7 +211,7 @@ static int gc_traverse_tab(global_State *g, GCtab *t) | |||
196 | for (i = 0; i <= hmask; i++) { | 211 | for (i = 0; i <= hmask; i++) { |
197 | Node *n = &node[i]; | 212 | Node *n = &node[i]; |
198 | if (!tvisnil(&n->val)) { /* Mark non-empty slot. */ | 213 | if (!tvisnil(&n->val)) { /* Mark non-empty slot. */ |
199 | lua_assert(!tvisnil(&n->key)); | 214 | lj_assertG(!tvisnil(&n->key), "mark of nil key in non-empty slot"); |
200 | if (!(weak & LJ_GC_WEAKKEY)) gc_marktv(g, &n->key); | 215 | if (!(weak & LJ_GC_WEAKKEY)) gc_marktv(g, &n->key); |
201 | if (!(weak & LJ_GC_WEAKVAL)) gc_marktv(g, &n->val); | 216 | if (!(weak & LJ_GC_WEAKVAL)) gc_marktv(g, &n->val); |
202 | } | 217 | } |
@@ -211,7 +226,8 @@ static void gc_traverse_func(global_State *g, GCfunc *fn) | |||
211 | gc_markobj(g, tabref(fn->c.env)); | 226 | gc_markobj(g, tabref(fn->c.env)); |
212 | if (isluafunc(fn)) { | 227 | if (isluafunc(fn)) { |
213 | uint32_t i; | 228 | uint32_t i; |
214 | lua_assert(fn->l.nupvalues <= funcproto(fn)->sizeuv); | 229 | lj_assertG(fn->l.nupvalues <= funcproto(fn)->sizeuv, |
230 | "function upvalues out of range"); | ||
215 | gc_markobj(g, funcproto(fn)); | 231 | gc_markobj(g, funcproto(fn)); |
216 | for (i = 0; i < fn->l.nupvalues; i++) /* Mark Lua function upvalues. */ | 232 | for (i = 0; i < fn->l.nupvalues; i++) /* Mark Lua function upvalues. */ |
217 | gc_markobj(g, &gcref(fn->l.uvptr[i])->uv); | 233 | gc_markobj(g, &gcref(fn->l.uvptr[i])->uv); |
@@ -227,7 +243,7 @@ static void gc_traverse_func(global_State *g, GCfunc *fn) | |||
227 | static void gc_marktrace(global_State *g, TraceNo traceno) | 243 | static void gc_marktrace(global_State *g, TraceNo traceno) |
228 | { | 244 | { |
229 | GCobj *o = obj2gco(traceref(G2J(g), traceno)); | 245 | GCobj *o = obj2gco(traceref(G2J(g), traceno)); |
230 | lua_assert(traceno != G2J(g)->cur.traceno); | 246 | lj_assertG(traceno != G2J(g)->cur.traceno, "active trace escaped"); |
231 | if (iswhite(o)) { | 247 | if (iswhite(o)) { |
232 | white2gray(o); | 248 | white2gray(o); |
233 | setgcrefr(o->gch.gclist, g->gc.gray); | 249 | setgcrefr(o->gch.gclist, g->gc.gray); |
@@ -244,6 +260,8 @@ static void gc_traverse_trace(global_State *g, GCtrace *T) | |||
244 | IRIns *ir = &T->ir[ref]; | 260 | IRIns *ir = &T->ir[ref]; |
245 | if (ir->o == IR_KGC) | 261 | if (ir->o == IR_KGC) |
246 | gc_markobj(g, ir_kgc(ir)); | 262 | gc_markobj(g, ir_kgc(ir)); |
263 | if (irt_is64(ir->t) && ir->o != IR_KNULL) | ||
264 | ref++; | ||
247 | } | 265 | } |
248 | if (T->link) gc_marktrace(g, T->link); | 266 | if (T->link) gc_marktrace(g, T->link); |
249 | if (T->nextroot) gc_marktrace(g, T->nextroot); | 267 | if (T->nextroot) gc_marktrace(g, T->nextroot); |
@@ -274,12 +292,12 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th) | |||
274 | { | 292 | { |
275 | TValue *frame, *top = th->top-1, *bot = tvref(th->stack); | 293 | TValue *frame, *top = th->top-1, *bot = tvref(th->stack); |
276 | /* Note: extra vararg frame not skipped, marks function twice (harmless). */ | 294 | /* Note: extra vararg frame not skipped, marks function twice (harmless). */ |
277 | for (frame = th->base-1; frame > bot; frame = frame_prev(frame)) { | 295 | for (frame = th->base-1; frame > bot+LJ_FR2; frame = frame_prev(frame)) { |
278 | GCfunc *fn = frame_func(frame); | 296 | GCfunc *fn = frame_func(frame); |
279 | TValue *ftop = frame; | 297 | TValue *ftop = frame; |
280 | if (isluafunc(fn)) ftop += funcproto(fn)->framesize; | 298 | if (isluafunc(fn)) ftop += funcproto(fn)->framesize; |
281 | if (ftop > top) top = ftop; | 299 | if (ftop > top) top = ftop; |
282 | gc_markobj(g, fn); /* Need to mark hidden function (or L). */ | 300 | if (!LJ_FR2) gc_markobj(g, fn); /* Need to mark hidden function (or L). */ |
283 | } | 301 | } |
284 | top++; /* Correct bias of -1 (frame == base-1). */ | 302 | top++; /* Correct bias of -1 (frame == base-1). */ |
285 | if (top > tvref(th->maxstack)) top = tvref(th->maxstack); | 303 | if (top > tvref(th->maxstack)) top = tvref(th->maxstack); |
@@ -290,7 +308,7 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th) | |||
290 | static void gc_traverse_thread(global_State *g, lua_State *th) | 308 | static void gc_traverse_thread(global_State *g, lua_State *th) |
291 | { | 309 | { |
292 | TValue *o, *top = th->top; | 310 | TValue *o, *top = th->top; |
293 | for (o = tvref(th->stack)+1; o < top; o++) | 311 | for (o = tvref(th->stack)+1+LJ_FR2; o < top; o++) |
294 | gc_marktv(g, o); | 312 | gc_marktv(g, o); |
295 | if (g->gc.state == GCSatomic) { | 313 | if (g->gc.state == GCSatomic) { |
296 | top = tvref(th->stack) + th->stacksize; | 314 | top = tvref(th->stack) + th->stacksize; |
@@ -306,7 +324,7 @@ static size_t propagatemark(global_State *g) | |||
306 | { | 324 | { |
307 | GCobj *o = gcref(g->gc.gray); | 325 | GCobj *o = gcref(g->gc.gray); |
308 | int gct = o->gch.gct; | 326 | int gct = o->gch.gct; |
309 | lua_assert(isgray(o)); | 327 | lj_assertG(isgray(o), "propagation of non-gray object"); |
310 | gray2black(o); | 328 | gray2black(o); |
311 | setgcrefr(g->gc.gray, o->gch.gclist); /* Remove from gray list. */ | 329 | setgcrefr(g->gc.gray, o->gch.gclist); /* Remove from gray list. */ |
312 | if (LJ_LIKELY(gct == ~LJ_TTAB)) { | 330 | if (LJ_LIKELY(gct == ~LJ_TTAB)) { |
@@ -338,7 +356,7 @@ static size_t propagatemark(global_State *g) | |||
338 | return ((sizeof(GCtrace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) + | 356 | return ((sizeof(GCtrace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) + |
339 | T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry); | 357 | T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry); |
340 | #else | 358 | #else |
341 | lua_assert(0); | 359 | lj_assertG(0, "bad GC type %d", gct); |
342 | return 0; | 360 | return 0; |
343 | #endif | 361 | #endif |
344 | } | 362 | } |
@@ -355,15 +373,6 @@ static size_t gc_propagate_gray(global_State *g) | |||
355 | 373 | ||
356 | /* -- Sweep phase --------------------------------------------------------- */ | 374 | /* -- Sweep phase --------------------------------------------------------- */ |
357 | 375 | ||
358 | /* Try to shrink some common data structures. */ | ||
359 | static void gc_shrink(global_State *g, lua_State *L) | ||
360 | { | ||
361 | if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1) | ||
362 | lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */ | ||
363 | if (g->tmpbuf.sz > LJ_MIN_SBUF*2) | ||
364 | lj_str_resizebuf(L, &g->tmpbuf, g->tmpbuf.sz >> 1); /* Shrink temp buf. */ | ||
365 | } | ||
366 | |||
367 | /* Type of GC free functions. */ | 376 | /* Type of GC free functions. */ |
368 | typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o); | 377 | typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o); |
369 | 378 | ||
@@ -389,7 +398,7 @@ static const GCFreeFunc gc_freefunc[] = { | |||
389 | }; | 398 | }; |
390 | 399 | ||
391 | /* Full sweep of a GC list. */ | 400 | /* Full sweep of a GC list. */ |
392 | #define gc_fullsweep(g, p) gc_sweep(g, (p), LJ_MAX_MEM) | 401 | #define gc_fullsweep(g, p) gc_sweep(g, (p), ~(uint32_t)0) |
393 | 402 | ||
394 | /* Partial sweep of a GC list. */ | 403 | /* Partial sweep of a GC list. */ |
395 | static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) | 404 | static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) |
@@ -401,11 +410,13 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) | |||
401 | if (o->gch.gct == ~LJ_TTHREAD) /* Need to sweep open upvalues, too. */ | 410 | if (o->gch.gct == ~LJ_TTHREAD) /* Need to sweep open upvalues, too. */ |
402 | gc_fullsweep(g, &gco2th(o)->openupval); | 411 | gc_fullsweep(g, &gco2th(o)->openupval); |
403 | if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */ | 412 | if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */ |
404 | lua_assert(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED)); | 413 | lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED), |
414 | "sweep of undead object"); | ||
405 | makewhite(g, o); /* Value is alive, change to the current white. */ | 415 | makewhite(g, o); /* Value is alive, change to the current white. */ |
406 | p = &o->gch.nextgc; | 416 | p = &o->gch.nextgc; |
407 | } else { /* Otherwise value is dead, free it. */ | 417 | } else { /* Otherwise value is dead, free it. */ |
408 | lua_assert(isdead(g, o) || ow == LJ_GC_SFIXED); | 418 | lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED, |
419 | "sweep of unlive object"); | ||
409 | setgcrefr(*p, o->gch.nextgc); | 420 | setgcrefr(*p, o->gch.nextgc); |
410 | if (o == gcref(g->gc.root)) | 421 | if (o == gcref(g->gc.root)) |
411 | setgcrefr(g->gc.root, o->gch.nextgc); /* Adjust list anchor. */ | 422 | setgcrefr(g->gc.root, o->gch.nextgc); /* Adjust list anchor. */ |
@@ -415,6 +426,32 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) | |||
415 | return p; | 426 | return p; |
416 | } | 427 | } |
417 | 428 | ||
429 | /* Sweep one string interning table chain. Preserves hashalg bit. */ | ||
430 | static void gc_sweepstr(global_State *g, GCRef *chain) | ||
431 | { | ||
432 | /* Mask with other white and LJ_GC_FIXED. Or LJ_GC_SFIXED on shutdown. */ | ||
433 | int ow = otherwhite(g); | ||
434 | uintptr_t u = gcrefu(*chain); | ||
435 | GCRef q; | ||
436 | GCRef *p = &q; | ||
437 | GCobj *o; | ||
438 | setgcrefp(q, (u & ~(uintptr_t)1)); | ||
439 | while ((o = gcref(*p)) != NULL) { | ||
440 | if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */ | ||
441 | lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED), | ||
442 | "sweep of undead string"); | ||
443 | makewhite(g, o); /* String is alive, change to the current white. */ | ||
444 | p = &o->gch.nextgc; | ||
445 | } else { /* Otherwise string is dead, free it. */ | ||
446 | lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED, | ||
447 | "sweep of unlive string"); | ||
448 | setgcrefr(*p, o->gch.nextgc); | ||
449 | lj_str_free(g, gco2str(o)); | ||
450 | } | ||
451 | } | ||
452 | setgcrefp(*chain, (gcrefu(q) | (u & 1))); | ||
453 | } | ||
454 | |||
418 | /* Check whether we can clear a key or a value slot from a table. */ | 455 | /* Check whether we can clear a key or a value slot from a table. */ |
419 | static int gc_mayclear(cTValue *o, int val) | 456 | static int gc_mayclear(cTValue *o, int val) |
420 | { | 457 | { |
@@ -432,11 +469,12 @@ static int gc_mayclear(cTValue *o, int val) | |||
432 | } | 469 | } |
433 | 470 | ||
434 | /* Clear collected entries from weak tables. */ | 471 | /* Clear collected entries from weak tables. */ |
435 | static void gc_clearweak(GCobj *o) | 472 | static void gc_clearweak(global_State *g, GCobj *o) |
436 | { | 473 | { |
474 | UNUSED(g); | ||
437 | while (o) { | 475 | while (o) { |
438 | GCtab *t = gco2tab(o); | 476 | GCtab *t = gco2tab(o); |
439 | lua_assert((t->marked & LJ_GC_WEAK)); | 477 | lj_assertG((t->marked & LJ_GC_WEAK), "clear of non-weak table"); |
440 | if ((t->marked & LJ_GC_WEAKVAL)) { | 478 | if ((t->marked & LJ_GC_WEAKVAL)) { |
441 | MSize i, asize = t->asize; | 479 | MSize i, asize = t->asize; |
442 | for (i = 0; i < asize; i++) { | 480 | for (i = 0; i < asize; i++) { |
@@ -467,18 +505,21 @@ static void gc_call_finalizer(global_State *g, lua_State *L, | |||
467 | { | 505 | { |
468 | /* Save and restore lots of state around the __gc callback. */ | 506 | /* Save and restore lots of state around the __gc callback. */ |
469 | uint8_t oldh = hook_save(g); | 507 | uint8_t oldh = hook_save(g); |
470 | MSize oldt = g->gc.threshold; | 508 | GCSize oldt = g->gc.threshold; |
471 | int errcode; | 509 | int errcode; |
472 | TValue *top; | 510 | TValue *top; |
473 | lj_trace_abort(g); | 511 | lj_trace_abort(g); |
474 | top = L->top; | ||
475 | L->top = top+2; | ||
476 | hook_entergc(g); /* Disable hooks and new traces during __gc. */ | 512 | hook_entergc(g); /* Disable hooks and new traces during __gc. */ |
513 | if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g); | ||
477 | g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */ | 514 | g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */ |
478 | copyTV(L, top, mo); | 515 | top = L->top; |
479 | setgcV(L, top+1, o, ~o->gch.gct); | 516 | copyTV(L, top++, mo); |
480 | errcode = lj_vm_pcall(L, top+1, 1+0, -1); /* Stack: |mo|o| -> | */ | 517 | if (LJ_FR2) setnilV(top++); |
518 | setgcV(L, top, o, ~o->gch.gct); | ||
519 | L->top = top+1; | ||
520 | errcode = lj_vm_pcall(L, top, 1+0, -1); /* Stack: |mo|o| -> | */ | ||
481 | hook_restore(g, oldh); | 521 | hook_restore(g, oldh); |
522 | if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g); | ||
482 | g->gc.threshold = oldt; /* Restore GC threshold. */ | 523 | g->gc.threshold = oldt; /* Restore GC threshold. */ |
483 | if (errcode) | 524 | if (errcode) |
484 | lj_err_throw(L, errcode); /* Propagate errors. */ | 525 | lj_err_throw(L, errcode); /* Propagate errors. */ |
@@ -490,7 +531,7 @@ static void gc_finalize(lua_State *L) | |||
490 | global_State *g = G(L); | 531 | global_State *g = G(L); |
491 | GCobj *o = gcnext(gcref(g->gc.mmudata)); | 532 | GCobj *o = gcnext(gcref(g->gc.mmudata)); |
492 | cTValue *mo; | 533 | cTValue *mo; |
493 | lua_assert(gcref(g->jit_L) == NULL); /* Must not be called on trace. */ | 534 | lj_assertG(tvref(g->jit_base) == NULL, "finalizer called on trace"); |
494 | /* Unchain from list of userdata to be finalized. */ | 535 | /* Unchain from list of userdata to be finalized. */ |
495 | if (o == gcref(g->gc.mmudata)) | 536 | if (o == gcref(g->gc.mmudata)) |
496 | setgcrefnull(g->gc.mmudata); | 537 | setgcrefnull(g->gc.mmudata); |
@@ -565,9 +606,9 @@ void lj_gc_freeall(global_State *g) | |||
565 | /* Free everything, except super-fixed objects (the main thread). */ | 606 | /* Free everything, except super-fixed objects (the main thread). */ |
566 | g->gc.currentwhite = LJ_GC_WHITES | LJ_GC_SFIXED; | 607 | g->gc.currentwhite = LJ_GC_WHITES | LJ_GC_SFIXED; |
567 | gc_fullsweep(g, &g->gc.root); | 608 | gc_fullsweep(g, &g->gc.root); |
568 | strmask = g->strmask; | 609 | strmask = g->str.mask; |
569 | for (i = 0; i <= strmask; i++) /* Free all string hash chains. */ | 610 | for (i = 0; i <= strmask; i++) /* Free all string hash chains. */ |
570 | gc_fullsweep(g, &g->strhash[i]); | 611 | gc_sweepstr(g, &g->str.tab[i]); |
571 | } | 612 | } |
572 | 613 | ||
573 | /* -- Collector ----------------------------------------------------------- */ | 614 | /* -- Collector ----------------------------------------------------------- */ |
@@ -582,7 +623,7 @@ static void atomic(global_State *g, lua_State *L) | |||
582 | 623 | ||
583 | setgcrefr(g->gc.gray, g->gc.weak); /* Empty the list of weak tables. */ | 624 | setgcrefr(g->gc.gray, g->gc.weak); /* Empty the list of weak tables. */ |
584 | setgcrefnull(g->gc.weak); | 625 | setgcrefnull(g->gc.weak); |
585 | lua_assert(!iswhite(obj2gco(mainthread(g)))); | 626 | lj_assertG(!iswhite(obj2gco(mainthread(g))), "main thread turned white"); |
586 | gc_markobj(g, L); /* Mark running thread. */ | 627 | gc_markobj(g, L); /* Mark running thread. */ |
587 | gc_traverse_curtrace(g); /* Traverse current trace. */ | 628 | gc_traverse_curtrace(g); /* Traverse current trace. */ |
588 | gc_mark_gcroot(g); /* Mark GC roots (again). */ | 629 | gc_mark_gcroot(g); /* Mark GC roots (again). */ |
@@ -597,13 +638,15 @@ static void atomic(global_State *g, lua_State *L) | |||
597 | udsize += gc_propagate_gray(g); /* And propagate the marks. */ | 638 | udsize += gc_propagate_gray(g); /* And propagate the marks. */ |
598 | 639 | ||
599 | /* All marking done, clear weak tables. */ | 640 | /* All marking done, clear weak tables. */ |
600 | gc_clearweak(gcref(g->gc.weak)); | 641 | gc_clearweak(g, gcref(g->gc.weak)); |
642 | |||
643 | lj_buf_shrink(L, &g->tmpbuf); /* Shrink temp buffer. */ | ||
601 | 644 | ||
602 | /* Prepare for sweep phase. */ | 645 | /* Prepare for sweep phase. */ |
603 | g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */ | 646 | g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */ |
604 | g->strempty.marked = g->gc.currentwhite; | 647 | g->strempty.marked = g->gc.currentwhite; |
605 | setmref(g->gc.sweep, &g->gc.root); | 648 | setmref(g->gc.sweep, &g->gc.root); |
606 | g->gc.estimate = g->gc.total - (MSize)udsize; /* Initial estimate. */ | 649 | g->gc.estimate = g->gc.total - (GCSize)udsize; /* Initial estimate. */ |
607 | } | 650 | } |
608 | 651 | ||
609 | /* GC state machine. Returns a cost estimate for each step performed. */ | 652 | /* GC state machine. Returns a cost estimate for each step performed. */ |
@@ -620,28 +663,29 @@ static size_t gc_onestep(lua_State *L) | |||
620 | g->gc.state = GCSatomic; /* End of mark phase. */ | 663 | g->gc.state = GCSatomic; /* End of mark phase. */ |
621 | return 0; | 664 | return 0; |
622 | case GCSatomic: | 665 | case GCSatomic: |
623 | if (gcref(g->jit_L)) /* Don't run atomic phase on trace. */ | 666 | if (tvref(g->jit_base)) /* Don't run atomic phase on trace. */ |
624 | return LJ_MAX_MEM; | 667 | return LJ_MAX_MEM; |
625 | atomic(g, L); | 668 | atomic(g, L); |
626 | g->gc.state = GCSsweepstring; /* Start of sweep phase. */ | 669 | g->gc.state = GCSsweepstring; /* Start of sweep phase. */ |
627 | g->gc.sweepstr = 0; | 670 | g->gc.sweepstr = 0; |
628 | return 0; | 671 | return 0; |
629 | case GCSsweepstring: { | 672 | case GCSsweepstring: { |
630 | MSize old = g->gc.total; | 673 | GCSize old = g->gc.total; |
631 | gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */ | 674 | gc_sweepstr(g, &g->str.tab[g->gc.sweepstr++]); /* Sweep one chain. */ |
632 | if (g->gc.sweepstr > g->strmask) | 675 | if (g->gc.sweepstr > g->str.mask) |
633 | g->gc.state = GCSsweep; /* All string hash chains sweeped. */ | 676 | g->gc.state = GCSsweep; /* All string hash chains sweeped. */ |
634 | lua_assert(old >= g->gc.total); | 677 | lj_assertG(old >= g->gc.total, "sweep increased memory"); |
635 | g->gc.estimate -= old - g->gc.total; | 678 | g->gc.estimate -= old - g->gc.total; |
636 | return GCSWEEPCOST; | 679 | return GCSWEEPCOST; |
637 | } | 680 | } |
638 | case GCSsweep: { | 681 | case GCSsweep: { |
639 | MSize old = g->gc.total; | 682 | GCSize old = g->gc.total; |
640 | setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX)); | 683 | setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX)); |
641 | lua_assert(old >= g->gc.total); | 684 | lj_assertG(old >= g->gc.total, "sweep increased memory"); |
642 | g->gc.estimate -= old - g->gc.total; | 685 | g->gc.estimate -= old - g->gc.total; |
643 | if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) { | 686 | if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) { |
644 | gc_shrink(g, L); | 687 | if (g->str.num <= (g->str.mask >> 2) && g->str.mask > LJ_MIN_STRTAB*2-1) |
688 | lj_str_resize(L, g->str.mask >> 1); /* Shrink string table. */ | ||
645 | if (gcref(g->gc.mmudata)) { /* Need any finalizations? */ | 689 | if (gcref(g->gc.mmudata)) { /* Need any finalizations? */ |
646 | g->gc.state = GCSfinalize; | 690 | g->gc.state = GCSfinalize; |
647 | #if LJ_HASFFI | 691 | #if LJ_HASFFI |
@@ -656,7 +700,7 @@ static size_t gc_onestep(lua_State *L) | |||
656 | } | 700 | } |
657 | case GCSfinalize: | 701 | case GCSfinalize: |
658 | if (gcref(g->gc.mmudata) != NULL) { | 702 | if (gcref(g->gc.mmudata) != NULL) { |
659 | if (gcref(g->jit_L)) /* Don't call finalizers on trace. */ | 703 | if (tvref(g->jit_base)) /* Don't call finalizers on trace. */ |
660 | return LJ_MAX_MEM; | 704 | return LJ_MAX_MEM; |
661 | gc_finalize(L); /* Finalize one userdata object. */ | 705 | gc_finalize(L); /* Finalize one userdata object. */ |
662 | if (g->gc.estimate > GCFINALIZECOST) | 706 | if (g->gc.estimate > GCFINALIZECOST) |
@@ -670,7 +714,7 @@ static size_t gc_onestep(lua_State *L) | |||
670 | g->gc.debt = 0; | 714 | g->gc.debt = 0; |
671 | return 0; | 715 | return 0; |
672 | default: | 716 | default: |
673 | lua_assert(0); | 717 | lj_assertG(0, "bad GC state"); |
674 | return 0; | 718 | return 0; |
675 | } | 719 | } |
676 | } | 720 | } |
@@ -679,7 +723,7 @@ static size_t gc_onestep(lua_State *L) | |||
679 | int LJ_FASTCALL lj_gc_step(lua_State *L) | 723 | int LJ_FASTCALL lj_gc_step(lua_State *L) |
680 | { | 724 | { |
681 | global_State *g = G(L); | 725 | global_State *g = G(L); |
682 | MSize lim; | 726 | GCSize lim; |
683 | int32_t ostate = g->vmstate; | 727 | int32_t ostate = g->vmstate; |
684 | setvmstate(g, GC); | 728 | setvmstate(g, GC); |
685 | lim = (GCSTEPSIZE/100) * g->gc.stepmul; | 729 | lim = (GCSTEPSIZE/100) * g->gc.stepmul; |
@@ -688,13 +732,13 @@ int LJ_FASTCALL lj_gc_step(lua_State *L) | |||
688 | if (g->gc.total > g->gc.threshold) | 732 | if (g->gc.total > g->gc.threshold) |
689 | g->gc.debt += g->gc.total - g->gc.threshold; | 733 | g->gc.debt += g->gc.total - g->gc.threshold; |
690 | do { | 734 | do { |
691 | lim -= (MSize)gc_onestep(L); | 735 | lim -= (GCSize)gc_onestep(L); |
692 | if (g->gc.state == GCSpause) { | 736 | if (g->gc.state == GCSpause) { |
693 | g->gc.threshold = (g->gc.estimate/100) * g->gc.pause; | 737 | g->gc.threshold = (g->gc.estimate/100) * g->gc.pause; |
694 | g->vmstate = ostate; | 738 | g->vmstate = ostate; |
695 | return 1; /* Finished a GC cycle. */ | 739 | return 1; /* Finished a GC cycle. */ |
696 | } | 740 | } |
697 | } while ((int32_t)lim > 0); | 741 | } while (sizeof(lim) == 8 ? ((int64_t)lim > 0) : ((int32_t)lim > 0)); |
698 | if (g->gc.debt < GCSTEPSIZE) { | 742 | if (g->gc.debt < GCSTEPSIZE) { |
699 | g->gc.threshold = g->gc.total + GCSTEPSIZE; | 743 | g->gc.threshold = g->gc.total + GCSTEPSIZE; |
700 | g->vmstate = ostate; | 744 | g->vmstate = ostate; |
@@ -718,8 +762,8 @@ void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L) | |||
718 | /* Perform multiple GC steps. Called from JIT-compiled code. */ | 762 | /* Perform multiple GC steps. Called from JIT-compiled code. */ |
719 | int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps) | 763 | int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps) |
720 | { | 764 | { |
721 | lua_State *L = gco2th(gcref(g->jit_L)); | 765 | lua_State *L = gco2th(gcref(g->cur_L)); |
722 | L->base = mref(G(L)->jit_base, TValue); | 766 | L->base = tvref(G(L)->jit_base); |
723 | L->top = curr_topL(L); | 767 | L->top = curr_topL(L); |
724 | while (steps-- > 0 && lj_gc_step(L) == 0) | 768 | while (steps-- > 0 && lj_gc_step(L) == 0) |
725 | ; | 769 | ; |
@@ -744,7 +788,8 @@ void lj_gc_fullgc(lua_State *L) | |||
744 | } | 788 | } |
745 | while (g->gc.state == GCSsweepstring || g->gc.state == GCSsweep) | 789 | while (g->gc.state == GCSsweepstring || g->gc.state == GCSsweep) |
746 | gc_onestep(L); /* Finish sweep. */ | 790 | gc_onestep(L); /* Finish sweep. */ |
747 | lua_assert(g->gc.state == GCSfinalize || g->gc.state == GCSpause); | 791 | lj_assertG(g->gc.state == GCSfinalize || g->gc.state == GCSpause, |
792 | "bad GC state"); | ||
748 | /* Now perform a full GC. */ | 793 | /* Now perform a full GC. */ |
749 | g->gc.state = GCSpause; | 794 | g->gc.state = GCSpause; |
750 | do { gc_onestep(L); } while (g->gc.state != GCSpause); | 795 | do { gc_onestep(L); } while (g->gc.state != GCSpause); |
@@ -757,9 +802,11 @@ void lj_gc_fullgc(lua_State *L) | |||
757 | /* Move the GC propagation frontier forward. */ | 802 | /* Move the GC propagation frontier forward. */ |
758 | void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v) | 803 | void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v) |
759 | { | 804 | { |
760 | lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o)); | 805 | lj_assertG(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o), |
761 | lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); | 806 | "bad object states for forward barrier"); |
762 | lua_assert(o->gch.gct != ~LJ_TTAB); | 807 | lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause, |
808 | "bad GC state"); | ||
809 | lj_assertG(o->gch.gct != ~LJ_TTAB, "barrier object is not a table"); | ||
763 | /* Preserve invariant during propagation. Otherwise it doesn't matter. */ | 810 | /* Preserve invariant during propagation. Otherwise it doesn't matter. */ |
764 | if (g->gc.state == GCSpropagate || g->gc.state == GCSatomic) | 811 | if (g->gc.state == GCSpropagate || g->gc.state == GCSatomic) |
765 | gc_mark(g, v); /* Move frontier forward. */ | 812 | gc_mark(g, v); /* Move frontier forward. */ |
@@ -796,7 +843,8 @@ void lj_gc_closeuv(global_State *g, GCupval *uv) | |||
796 | lj_gc_barrierf(g, o, gcV(&uv->tv)); | 843 | lj_gc_barrierf(g, o, gcV(&uv->tv)); |
797 | } else { | 844 | } else { |
798 | makewhite(g, o); /* Make it white, i.e. sweep the upvalue. */ | 845 | makewhite(g, o); /* Make it white, i.e. sweep the upvalue. */ |
799 | lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); | 846 | lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause, |
847 | "bad GC state"); | ||
800 | } | 848 | } |
801 | } | 849 | } |
802 | } | 850 | } |
@@ -813,27 +861,29 @@ void lj_gc_barriertrace(global_State *g, uint32_t traceno) | |||
813 | /* -- Allocator ----------------------------------------------------------- */ | 861 | /* -- Allocator ----------------------------------------------------------- */ |
814 | 862 | ||
815 | /* Call pluggable memory allocator to allocate or resize a fragment. */ | 863 | /* Call pluggable memory allocator to allocate or resize a fragment. */ |
816 | void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz) | 864 | void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz) |
817 | { | 865 | { |
818 | global_State *g = G(L); | 866 | global_State *g = G(L); |
819 | lua_assert((osz == 0) == (p == NULL)); | 867 | lj_assertG((osz == 0) == (p == NULL), "realloc API violation"); |
820 | p = g->allocf(g->allocd, p, osz, nsz); | 868 | p = g->allocf(g->allocd, p, osz, nsz); |
821 | if (p == NULL && nsz > 0) | 869 | if (p == NULL && nsz > 0) |
822 | lj_err_mem(L); | 870 | lj_err_mem(L); |
823 | lua_assert((nsz == 0) == (p == NULL)); | 871 | lj_assertG((nsz == 0) == (p == NULL), "allocf API violation"); |
824 | lua_assert(checkptr32(p)); | 872 | lj_assertG(checkptrGC(p), |
873 | "allocated memory address %p outside required range", p); | ||
825 | g->gc.total = (g->gc.total - osz) + nsz; | 874 | g->gc.total = (g->gc.total - osz) + nsz; |
826 | return p; | 875 | return p; |
827 | } | 876 | } |
828 | 877 | ||
829 | /* Allocate new GC object and link it to the root set. */ | 878 | /* Allocate new GC object and link it to the root set. */ |
830 | void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size) | 879 | void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size) |
831 | { | 880 | { |
832 | global_State *g = G(L); | 881 | global_State *g = G(L); |
833 | GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size); | 882 | GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size); |
834 | if (o == NULL) | 883 | if (o == NULL) |
835 | lj_err_mem(L); | 884 | lj_err_mem(L); |
836 | lua_assert(checkptr32(o)); | 885 | lj_assertG(checkptrGC(o), |
886 | "allocated memory address %p outside required range", o); | ||
837 | g->gc.total += size; | 887 | g->gc.total += size; |
838 | setgcrefr(o->gch.nextgc, g->gc.root); | 888 | setgcrefr(o->gch.nextgc, g->gc.root); |
839 | setgcref(g->gc.root, o); | 889 | setgcref(g->gc.root, o); |
diff --git a/src/lj_gc.h b/src/lj_gc.h index c211e072..0df7dee6 100644 --- a/src/lj_gc.h +++ b/src/lj_gc.h | |||
@@ -81,8 +81,10 @@ LJ_FUNC void lj_gc_barriertrace(global_State *g, uint32_t traceno); | |||
81 | static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t) | 81 | static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t) |
82 | { | 82 | { |
83 | GCobj *o = obj2gco(t); | 83 | GCobj *o = obj2gco(t); |
84 | lua_assert(isblack(o) && !isdead(g, o)); | 84 | lj_assertG(isblack(o) && !isdead(g, o), |
85 | lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); | 85 | "bad object states for backward barrier"); |
86 | lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause, | ||
87 | "bad GC state"); | ||
86 | black2gray(o); | 88 | black2gray(o); |
87 | setgcrefr(t->gclist, g->gc.grayagain); | 89 | setgcrefr(t->gclist, g->gc.grayagain); |
88 | setgcref(g->gc.grayagain, o); | 90 | setgcref(g->gc.grayagain, o); |
@@ -107,8 +109,8 @@ static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t) | |||
107 | lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); } | 109 | lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); } |
108 | 110 | ||
109 | /* Allocator. */ | 111 | /* Allocator. */ |
110 | LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz); | 112 | LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz); |
111 | LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size); | 113 | LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size); |
112 | LJ_FUNC void *lj_mem_grow(lua_State *L, void *p, | 114 | LJ_FUNC void *lj_mem_grow(lua_State *L, void *p, |
113 | MSize *szp, MSize lim, MSize esz); | 115 | MSize *szp, MSize lim, MSize esz); |
114 | 116 | ||
@@ -116,13 +118,13 @@ LJ_FUNC void *lj_mem_grow(lua_State *L, void *p, | |||
116 | 118 | ||
117 | static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize) | 119 | static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize) |
118 | { | 120 | { |
119 | g->gc.total -= (MSize)osize; | 121 | g->gc.total -= (GCSize)osize; |
120 | g->allocf(g->allocd, p, osize, 0); | 122 | g->allocf(g->allocd, p, osize, 0); |
121 | } | 123 | } |
122 | 124 | ||
123 | #define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (MSize)((n)*sizeof(t)))) | 125 | #define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (GCSize)((n)*sizeof(t)))) |
124 | #define lj_mem_reallocvec(L, p, on, n, t) \ | 126 | #define lj_mem_reallocvec(L, p, on, n, t) \ |
125 | ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (MSize)((n)*sizeof(t)))) | 127 | ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (GCSize)((n)*sizeof(t)))) |
126 | #define lj_mem_growvec(L, p, n, m, t) \ | 128 | #define lj_mem_growvec(L, p, n, m, t) \ |
127 | ((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t))) | 129 | ((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t))) |
128 | #define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t)) | 130 | #define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t)) |
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c index e4b68375..c50d0d4c 100644 --- a/src/lj_gdbjit.c +++ b/src/lj_gdbjit.c | |||
@@ -14,6 +14,8 @@ | |||
14 | #include "lj_err.h" | 14 | #include "lj_err.h" |
15 | #include "lj_debug.h" | 15 | #include "lj_debug.h" |
16 | #include "lj_frame.h" | 16 | #include "lj_frame.h" |
17 | #include "lj_buf.h" | ||
18 | #include "lj_strfmt.h" | ||
17 | #include "lj_jit.h" | 19 | #include "lj_jit.h" |
18 | #include "lj_dispatch.h" | 20 | #include "lj_dispatch.h" |
19 | 21 | ||
@@ -294,6 +296,9 @@ enum { | |||
294 | #elif LJ_TARGET_ARM | 296 | #elif LJ_TARGET_ARM |
295 | DW_REG_SP = 13, | 297 | DW_REG_SP = 13, |
296 | DW_REG_RA = 14, | 298 | DW_REG_RA = 14, |
299 | #elif LJ_TARGET_ARM64 | ||
300 | DW_REG_SP = 31, | ||
301 | DW_REG_RA = 30, | ||
297 | #elif LJ_TARGET_PPC | 302 | #elif LJ_TARGET_PPC |
298 | DW_REG_SP = 1, | 303 | DW_REG_SP = 1, |
299 | DW_REG_RA = 65, | 304 | DW_REG_RA = 65, |
@@ -358,7 +363,7 @@ static const ELFheader elfhdr_template = { | |||
358 | .eosabi = 12, | 363 | .eosabi = 12, |
359 | #elif defined(__DragonFly__) | 364 | #elif defined(__DragonFly__) |
360 | .eosabi = 0, | 365 | .eosabi = 0, |
361 | #elif (defined(__sun__) && defined(__svr4__)) | 366 | #elif LJ_TARGET_SOLARIS |
362 | .eosabi = 6, | 367 | .eosabi = 6, |
363 | #else | 368 | #else |
364 | .eosabi = 0, | 369 | .eosabi = 0, |
@@ -372,6 +377,8 @@ static const ELFheader elfhdr_template = { | |||
372 | .machine = 62, | 377 | .machine = 62, |
373 | #elif LJ_TARGET_ARM | 378 | #elif LJ_TARGET_ARM |
374 | .machine = 40, | 379 | .machine = 40, |
380 | #elif LJ_TARGET_ARM64 | ||
381 | .machine = 183, | ||
375 | #elif LJ_TARGET_PPC | 382 | #elif LJ_TARGET_PPC |
376 | .machine = 20, | 383 | .machine = 20, |
377 | #elif LJ_TARGET_MIPS | 384 | #elif LJ_TARGET_MIPS |
@@ -428,16 +435,6 @@ static void gdbjit_catnum(GDBJITctx *ctx, uint32_t n) | |||
428 | *ctx->p++ = '0' + n; | 435 | *ctx->p++ = '0' + n; |
429 | } | 436 | } |
430 | 437 | ||
431 | /* Add a ULEB128 value. */ | ||
432 | static void gdbjit_uleb128(GDBJITctx *ctx, uint32_t v) | ||
433 | { | ||
434 | uint8_t *p = ctx->p; | ||
435 | for (; v >= 0x80; v >>= 7) | ||
436 | *p++ = (uint8_t)((v & 0x7f) | 0x80); | ||
437 | *p++ = (uint8_t)v; | ||
438 | ctx->p = p; | ||
439 | } | ||
440 | |||
441 | /* Add a SLEB128 value. */ | 438 | /* Add a SLEB128 value. */ |
442 | static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) | 439 | static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) |
443 | { | 440 | { |
@@ -454,7 +451,7 @@ static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) | |||
454 | #define DU16(x) (*(uint16_t *)p = (x), p += 2) | 451 | #define DU16(x) (*(uint16_t *)p = (x), p += 2) |
455 | #define DU32(x) (*(uint32_t *)p = (x), p += 4) | 452 | #define DU32(x) (*(uint32_t *)p = (x), p += 4) |
456 | #define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t)) | 453 | #define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t)) |
457 | #define DUV(x) (ctx->p = p, gdbjit_uleb128(ctx, (x)), p = ctx->p) | 454 | #define DUV(x) (p = (uint8_t *)lj_strfmt_wuleb128((char *)p, (x))) |
458 | #define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p) | 455 | #define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p) |
459 | #define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p) | 456 | #define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p) |
460 | #define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop | 457 | #define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop |
@@ -564,13 +561,20 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx) | |||
564 | DB(DW_CFA_offset|DW_REG_15); DUV(4); | 561 | DB(DW_CFA_offset|DW_REG_15); DUV(4); |
565 | DB(DW_CFA_offset|DW_REG_14); DUV(5); | 562 | DB(DW_CFA_offset|DW_REG_14); DUV(5); |
566 | /* Extra registers saved for JIT-compiled code. */ | 563 | /* Extra registers saved for JIT-compiled code. */ |
567 | DB(DW_CFA_offset|DW_REG_13); DUV(9); | 564 | DB(DW_CFA_offset|DW_REG_13); DUV(LJ_GC64 ? 10 : 9); |
568 | DB(DW_CFA_offset|DW_REG_12); DUV(10); | 565 | DB(DW_CFA_offset|DW_REG_12); DUV(LJ_GC64 ? 11 : 10); |
569 | #elif LJ_TARGET_ARM | 566 | #elif LJ_TARGET_ARM |
570 | { | 567 | { |
571 | int i; | 568 | int i; |
572 | for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); } | 569 | for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); } |
573 | } | 570 | } |
571 | #elif LJ_TARGET_ARM64 | ||
572 | { | ||
573 | int i; | ||
574 | DB(DW_CFA_offset|31); DUV(2); | ||
575 | for (i = 28; i >= 19; i--) { DB(DW_CFA_offset|i); DUV(3+(28-i)); } | ||
576 | for (i = 15; i >= 8; i--) { DB(DW_CFA_offset|32|i); DUV(28-i); } | ||
577 | } | ||
574 | #elif LJ_TARGET_PPC | 578 | #elif LJ_TARGET_PPC |
575 | { | 579 | { |
576 | int i; | 580 | int i; |
@@ -720,13 +724,27 @@ static void gdbjit_buildobj(GDBJITctx *ctx) | |||
720 | SECTALIGN(ctx->p, sizeof(uintptr_t)); | 724 | SECTALIGN(ctx->p, sizeof(uintptr_t)); |
721 | gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame, gdbjit_ehframe); | 725 | gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame, gdbjit_ehframe); |
722 | ctx->objsize = (size_t)((char *)ctx->p - (char *)obj); | 726 | ctx->objsize = (size_t)((char *)ctx->p - (char *)obj); |
723 | lua_assert(ctx->objsize < sizeof(GDBJITobj)); | 727 | lj_assertX(ctx->objsize < sizeof(GDBJITobj), "GDBJITobj overflow"); |
724 | } | 728 | } |
725 | 729 | ||
726 | #undef SECTALIGN | 730 | #undef SECTALIGN |
727 | 731 | ||
728 | /* -- Interface to GDB JIT API -------------------------------------------- */ | 732 | /* -- Interface to GDB JIT API -------------------------------------------- */ |
729 | 733 | ||
734 | static int gdbjit_lock; | ||
735 | |||
736 | static void gdbjit_lock_acquire() | ||
737 | { | ||
738 | while (__sync_lock_test_and_set(&gdbjit_lock, 1)) { | ||
739 | /* Just spin; futexes or pthreads aren't worth the portability cost. */ | ||
740 | } | ||
741 | } | ||
742 | |||
743 | static void gdbjit_lock_release() | ||
744 | { | ||
745 | __sync_lock_release(&gdbjit_lock); | ||
746 | } | ||
747 | |||
730 | /* Add new entry to GDB JIT symbol chain. */ | 748 | /* Add new entry to GDB JIT symbol chain. */ |
731 | static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) | 749 | static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) |
732 | { | 750 | { |
@@ -738,6 +756,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) | |||
738 | ctx->T->gdbjit_entry = (void *)eo; | 756 | ctx->T->gdbjit_entry = (void *)eo; |
739 | /* Link new entry to chain and register it. */ | 757 | /* Link new entry to chain and register it. */ |
740 | eo->entry.prev_entry = NULL; | 758 | eo->entry.prev_entry = NULL; |
759 | gdbjit_lock_acquire(); | ||
741 | eo->entry.next_entry = __jit_debug_descriptor.first_entry; | 760 | eo->entry.next_entry = __jit_debug_descriptor.first_entry; |
742 | if (eo->entry.next_entry) | 761 | if (eo->entry.next_entry) |
743 | eo->entry.next_entry->prev_entry = &eo->entry; | 762 | eo->entry.next_entry->prev_entry = &eo->entry; |
@@ -747,6 +766,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) | |||
747 | __jit_debug_descriptor.relevant_entry = &eo->entry; | 766 | __jit_debug_descriptor.relevant_entry = &eo->entry; |
748 | __jit_debug_descriptor.action_flag = GDBJIT_REGISTER; | 767 | __jit_debug_descriptor.action_flag = GDBJIT_REGISTER; |
749 | __jit_debug_register_code(); | 768 | __jit_debug_register_code(); |
769 | gdbjit_lock_release(); | ||
750 | } | 770 | } |
751 | 771 | ||
752 | /* Add debug info for newly compiled trace and notify GDB. */ | 772 | /* Add debug info for newly compiled trace and notify GDB. */ |
@@ -762,7 +782,8 @@ void lj_gdbjit_addtrace(jit_State *J, GCtrace *T) | |||
762 | ctx.spadjp = CFRAME_SIZE_JIT + | 782 | ctx.spadjp = CFRAME_SIZE_JIT + |
763 | (MSize)(parent ? traceref(J, parent)->spadjust : 0); | 783 | (MSize)(parent ? traceref(J, parent)->spadjust : 0); |
764 | ctx.spadj = CFRAME_SIZE_JIT + T->spadjust; | 784 | ctx.spadj = CFRAME_SIZE_JIT + T->spadjust; |
765 | lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc); | 785 | lj_assertJ(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc, |
786 | "start PC out of range"); | ||
766 | ctx.lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); | 787 | ctx.lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); |
767 | ctx.filename = proto_chunknamestr(pt); | 788 | ctx.filename = proto_chunknamestr(pt); |
768 | if (*ctx.filename == '@' || *ctx.filename == '=') | 789 | if (*ctx.filename == '@' || *ctx.filename == '=') |
@@ -778,6 +799,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T) | |||
778 | { | 799 | { |
779 | GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry; | 800 | GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry; |
780 | if (eo) { | 801 | if (eo) { |
802 | gdbjit_lock_acquire(); | ||
781 | if (eo->entry.prev_entry) | 803 | if (eo->entry.prev_entry) |
782 | eo->entry.prev_entry->next_entry = eo->entry.next_entry; | 804 | eo->entry.prev_entry->next_entry = eo->entry.next_entry; |
783 | else | 805 | else |
@@ -787,6 +809,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T) | |||
787 | __jit_debug_descriptor.relevant_entry = &eo->entry; | 809 | __jit_debug_descriptor.relevant_entry = &eo->entry; |
788 | __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER; | 810 | __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER; |
789 | __jit_debug_register_code(); | 811 | __jit_debug_register_code(); |
812 | gdbjit_lock_release(); | ||
790 | lj_mem_free(J2G(J), eo, eo->sz); | 813 | lj_mem_free(J2G(J), eo, eo->sz); |
791 | } | 814 | } |
792 | } | 815 | } |
diff --git a/src/lj_ir.c b/src/lj_ir.c index b2846680..65901510 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #if LJ_HASJIT | 15 | #if LJ_HASJIT |
16 | 16 | ||
17 | #include "lj_gc.h" | 17 | #include "lj_gc.h" |
18 | #include "lj_buf.h" | ||
18 | #include "lj_str.h" | 19 | #include "lj_str.h" |
19 | #include "lj_tab.h" | 20 | #include "lj_tab.h" |
20 | #include "lj_ir.h" | 21 | #include "lj_ir.h" |
@@ -29,14 +30,16 @@ | |||
29 | #endif | 30 | #endif |
30 | #include "lj_vm.h" | 31 | #include "lj_vm.h" |
31 | #include "lj_strscan.h" | 32 | #include "lj_strscan.h" |
32 | #include "lj_lib.h" | 33 | #include "lj_serialize.h" |
34 | #include "lj_strfmt.h" | ||
35 | #include "lj_prng.h" | ||
33 | 36 | ||
34 | /* Some local macros to save typing. Undef'd at the end. */ | 37 | /* Some local macros to save typing. Undef'd at the end. */ |
35 | #define IR(ref) (&J->cur.ir[(ref)]) | 38 | #define IR(ref) (&J->cur.ir[(ref)]) |
36 | #define fins (&J->fold.ins) | 39 | #define fins (&J->fold.ins) |
37 | 40 | ||
38 | /* Pass IR on to next optimization in chain (FOLD). */ | 41 | /* Pass IR on to next optimization in chain (FOLD). */ |
39 | #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) | 42 | #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) |
40 | 43 | ||
41 | /* -- IR tables ----------------------------------------------------------- */ | 44 | /* -- IR tables ----------------------------------------------------------- */ |
42 | 45 | ||
@@ -88,8 +91,9 @@ static void lj_ir_growbot(jit_State *J) | |||
88 | { | 91 | { |
89 | IRIns *baseir = J->irbuf + J->irbotlim; | 92 | IRIns *baseir = J->irbuf + J->irbotlim; |
90 | MSize szins = J->irtoplim - J->irbotlim; | 93 | MSize szins = J->irtoplim - J->irbotlim; |
91 | lua_assert(szins != 0); | 94 | lj_assertJ(szins != 0, "zero IR size"); |
92 | lua_assert(J->cur.nk == J->irbotlim); | 95 | lj_assertJ(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim, |
96 | "unexpected IR growth"); | ||
93 | if (J->cur.nins + (szins >> 1) < J->irtoplim) { | 97 | if (J->cur.nins + (szins >> 1) < J->irtoplim) { |
94 | /* More than half of the buffer is free on top: shift up by a quarter. */ | 98 | /* More than half of the buffer is free on top: shift up by a quarter. */ |
95 | MSize ofs = szins >> 2; | 99 | MSize ofs = szins >> 2; |
@@ -143,6 +147,17 @@ TRef lj_ir_call(jit_State *J, IRCallID id, ...) | |||
143 | return emitir(CCI_OPTYPE(ci), tr, id); | 147 | return emitir(CCI_OPTYPE(ci), tr, id); |
144 | } | 148 | } |
145 | 149 | ||
150 | /* Load field of type t from GG_State + offset. Must be 32 bit aligned. */ | ||
151 | TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs) | ||
152 | { | ||
153 | lj_assertJ((ofs & 3) == 0, "unaligned GG_State field offset"); | ||
154 | ofs >>= 2; | ||
155 | lj_assertJ(ofs >= IRFL__MAX && ofs <= 0x3ff, | ||
156 | "GG_State field offset breaks 10 bit FOLD key limit"); | ||
157 | lj_ir_set(J, IRT(IR_FLOAD, t), REF_NIL, ofs); | ||
158 | return lj_opt_fold(J); | ||
159 | } | ||
160 | |||
146 | /* -- Interning of constants ---------------------------------------------- */ | 161 | /* -- Interning of constants ---------------------------------------------- */ |
147 | 162 | ||
148 | /* | 163 | /* |
@@ -163,6 +178,24 @@ static LJ_AINLINE IRRef ir_nextk(jit_State *J) | |||
163 | return ref; | 178 | return ref; |
164 | } | 179 | } |
165 | 180 | ||
181 | /* Get ref of next 64 bit IR constant and optionally grow IR. | ||
182 | ** Note: this may invalidate all IRIns *! | ||
183 | */ | ||
184 | static LJ_AINLINE IRRef ir_nextk64(jit_State *J) | ||
185 | { | ||
186 | IRRef ref = J->cur.nk - 2; | ||
187 | lj_assertJ(J->state != LJ_TRACE_ASM, "bad JIT state"); | ||
188 | if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J); | ||
189 | J->cur.nk = ref; | ||
190 | return ref; | ||
191 | } | ||
192 | |||
193 | #if LJ_GC64 | ||
194 | #define ir_nextkgc ir_nextk64 | ||
195 | #else | ||
196 | #define ir_nextkgc ir_nextk | ||
197 | #endif | ||
198 | |||
166 | /* Intern int32_t constant. */ | 199 | /* Intern int32_t constant. */ |
167 | TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k) | 200 | TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k) |
168 | { | 201 | { |
@@ -182,79 +215,21 @@ found: | |||
182 | return TREF(ref, IRT_INT); | 215 | return TREF(ref, IRT_INT); |
183 | } | 216 | } |
184 | 217 | ||
185 | /* The MRef inside the KNUM/KINT64 IR instructions holds the address of the | 218 | /* Intern 64 bit constant, given by its 64 bit pattern. */ |
186 | ** 64 bit constant. The constants themselves are stored in a chained array | 219 | TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64) |
187 | ** and shared across traces. | ||
188 | ** | ||
189 | ** Rationale for choosing this data structure: | ||
190 | ** - The address of the constants is embedded in the generated machine code | ||
191 | ** and must never move. A resizable array or hash table wouldn't work. | ||
192 | ** - Most apps need very few non-32 bit integer constants (less than a dozen). | ||
193 | ** - Linear search is hard to beat in terms of speed and low complexity. | ||
194 | */ | ||
195 | typedef struct K64Array { | ||
196 | MRef next; /* Pointer to next list. */ | ||
197 | MSize numk; /* Number of used elements in this array. */ | ||
198 | TValue k[LJ_MIN_K64SZ]; /* Array of constants. */ | ||
199 | } K64Array; | ||
200 | |||
201 | /* Free all chained arrays. */ | ||
202 | void lj_ir_k64_freeall(jit_State *J) | ||
203 | { | ||
204 | K64Array *k; | ||
205 | for (k = mref(J->k64, K64Array); k; ) { | ||
206 | K64Array *next = mref(k->next, K64Array); | ||
207 | lj_mem_free(J2G(J), k, sizeof(K64Array)); | ||
208 | k = next; | ||
209 | } | ||
210 | } | ||
211 | |||
212 | /* Find 64 bit constant in chained array or add it. */ | ||
213 | cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64) | ||
214 | { | ||
215 | K64Array *k, *kp = NULL; | ||
216 | TValue *ntv; | ||
217 | MSize idx; | ||
218 | /* Search for the constant in the whole chain of arrays. */ | ||
219 | for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) { | ||
220 | kp = k; /* Remember previous element in list. */ | ||
221 | for (idx = 0; idx < k->numk; idx++) { /* Search one array. */ | ||
222 | TValue *tv = &k->k[idx]; | ||
223 | if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */ | ||
224 | return tv; | ||
225 | } | ||
226 | } | ||
227 | /* Constant was not found, need to add it. */ | ||
228 | if (!(kp && kp->numk < LJ_MIN_K64SZ)) { /* Allocate a new array. */ | ||
229 | K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array); | ||
230 | setmref(kn->next, NULL); | ||
231 | kn->numk = 0; | ||
232 | if (kp) | ||
233 | setmref(kp->next, kn); /* Chain to the end of the list. */ | ||
234 | else | ||
235 | setmref(J->k64, kn); /* Link first array. */ | ||
236 | kp = kn; | ||
237 | } | ||
238 | ntv = &kp->k[kp->numk++]; /* Add to current array. */ | ||
239 | ntv->u64 = u64; | ||
240 | return ntv; | ||
241 | } | ||
242 | |||
243 | /* Intern 64 bit constant, given by its address. */ | ||
244 | TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv) | ||
245 | { | 220 | { |
246 | IRIns *ir, *cir = J->cur.ir; | 221 | IRIns *ir, *cir = J->cur.ir; |
247 | IRRef ref; | 222 | IRRef ref; |
248 | IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64; | 223 | IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64; |
249 | for (ref = J->chain[op]; ref; ref = cir[ref].prev) | 224 | for (ref = J->chain[op]; ref; ref = cir[ref].prev) |
250 | if (ir_k64(&cir[ref]) == tv) | 225 | if (ir_k64(&cir[ref])->u64 == u64) |
251 | goto found; | 226 | goto found; |
252 | ref = ir_nextk(J); | 227 | ref = ir_nextk64(J); |
253 | ir = IR(ref); | 228 | ir = IR(ref); |
254 | lua_assert(checkptr32(tv)); | 229 | ir[1].tv.u64 = u64; |
255 | setmref(ir->ptr, tv); | ||
256 | ir->t.irt = t; | 230 | ir->t.irt = t; |
257 | ir->o = op; | 231 | ir->o = op; |
232 | ir->op12 = 0; | ||
258 | ir->prev = J->chain[op]; | 233 | ir->prev = J->chain[op]; |
259 | J->chain[op] = (IRRef1)ref; | 234 | J->chain[op] = (IRRef1)ref; |
260 | found: | 235 | found: |
@@ -264,13 +239,13 @@ found: | |||
264 | /* Intern FP constant, given by its 64 bit pattern. */ | 239 | /* Intern FP constant, given by its 64 bit pattern. */ |
265 | TRef lj_ir_knum_u64(jit_State *J, uint64_t u64) | 240 | TRef lj_ir_knum_u64(jit_State *J, uint64_t u64) |
266 | { | 241 | { |
267 | return lj_ir_k64(J, IR_KNUM, lj_ir_k64_find(J, u64)); | 242 | return lj_ir_k64(J, IR_KNUM, u64); |
268 | } | 243 | } |
269 | 244 | ||
270 | /* Intern 64 bit integer constant. */ | 245 | /* Intern 64 bit integer constant. */ |
271 | TRef lj_ir_kint64(jit_State *J, uint64_t u64) | 246 | TRef lj_ir_kint64(jit_State *J, uint64_t u64) |
272 | { | 247 | { |
273 | return lj_ir_k64(J, IR_KINT64, lj_ir_k64_find(J, u64)); | 248 | return lj_ir_k64(J, IR_KINT64, u64); |
274 | } | 249 | } |
275 | 250 | ||
276 | /* Check whether a number is int and return it. -0 is NOT considered an int. */ | 251 | /* Check whether a number is int and return it. -0 is NOT considered an int. */ |
@@ -305,14 +280,15 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t) | |||
305 | { | 280 | { |
306 | IRIns *ir, *cir = J->cur.ir; | 281 | IRIns *ir, *cir = J->cur.ir; |
307 | IRRef ref; | 282 | IRRef ref; |
308 | lua_assert(!isdead(J2G(J), o)); | 283 | lj_assertJ(!isdead(J2G(J), o), "interning of dead GC object"); |
309 | for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev) | 284 | for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev) |
310 | if (ir_kgc(&cir[ref]) == o) | 285 | if (ir_kgc(&cir[ref]) == o) |
311 | goto found; | 286 | goto found; |
312 | ref = ir_nextk(J); | 287 | ref = ir_nextkgc(J); |
313 | ir = IR(ref); | 288 | ir = IR(ref); |
314 | /* NOBARRIER: Current trace is a GC root. */ | 289 | /* NOBARRIER: Current trace is a GC root. */ |
315 | setgcref(ir->gcr, o); | 290 | ir->op12 = 0; |
291 | setgcref(ir[LJ_GC64].gcr, o); | ||
316 | ir->t.irt = (uint8_t)t; | 292 | ir->t.irt = (uint8_t)t; |
317 | ir->o = IR_KGC; | 293 | ir->o = IR_KGC; |
318 | ir->prev = J->chain[IR_KGC]; | 294 | ir->prev = J->chain[IR_KGC]; |
@@ -321,24 +297,44 @@ found: | |||
321 | return TREF(ref, t); | 297 | return TREF(ref, t); |
322 | } | 298 | } |
323 | 299 | ||
324 | /* Intern 32 bit pointer constant. */ | 300 | /* Allocate GCtrace constant placeholder (no interning). */ |
301 | TRef lj_ir_ktrace(jit_State *J) | ||
302 | { | ||
303 | IRRef ref = ir_nextkgc(J); | ||
304 | IRIns *ir = IR(ref); | ||
305 | lj_assertJ(irt_toitype_(IRT_P64) == LJ_TTRACE, "mismatched type mapping"); | ||
306 | ir->t.irt = IRT_P64; | ||
307 | ir->o = LJ_GC64 ? IR_KNUM : IR_KNULL; /* Not IR_KGC yet, but same size. */ | ||
308 | ir->op12 = 0; | ||
309 | ir->prev = 0; | ||
310 | return TREF(ref, IRT_P64); | ||
311 | } | ||
312 | |||
313 | /* Intern pointer constant. */ | ||
325 | TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr) | 314 | TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr) |
326 | { | 315 | { |
327 | IRIns *ir, *cir = J->cur.ir; | 316 | IRIns *ir, *cir = J->cur.ir; |
328 | IRRef ref; | 317 | IRRef ref; |
329 | lua_assert((void *)(intptr_t)i32ptr(ptr) == ptr); | 318 | #if LJ_64 && !LJ_GC64 |
319 | lj_assertJ((void *)(uintptr_t)u32ptr(ptr) == ptr, "out-of-range GC pointer"); | ||
320 | #endif | ||
330 | for (ref = J->chain[op]; ref; ref = cir[ref].prev) | 321 | for (ref = J->chain[op]; ref; ref = cir[ref].prev) |
331 | if (mref(cir[ref].ptr, void) == ptr) | 322 | if (ir_kptr(&cir[ref]) == ptr) |
332 | goto found; | 323 | goto found; |
324 | #if LJ_GC64 | ||
325 | ref = ir_nextk64(J); | ||
326 | #else | ||
333 | ref = ir_nextk(J); | 327 | ref = ir_nextk(J); |
328 | #endif | ||
334 | ir = IR(ref); | 329 | ir = IR(ref); |
335 | setmref(ir->ptr, ptr); | 330 | ir->op12 = 0; |
336 | ir->t.irt = IRT_P32; | 331 | setmref(ir[LJ_GC64].ptr, ptr); |
332 | ir->t.irt = IRT_PGC; | ||
337 | ir->o = op; | 333 | ir->o = op; |
338 | ir->prev = J->chain[op]; | 334 | ir->prev = J->chain[op]; |
339 | J->chain[op] = (IRRef1)ref; | 335 | J->chain[op] = (IRRef1)ref; |
340 | found: | 336 | found: |
341 | return TREF(ref, IRT_P32); | 337 | return TREF(ref, IRT_PGC); |
342 | } | 338 | } |
343 | 339 | ||
344 | /* Intern typed NULL constant. */ | 340 | /* Intern typed NULL constant. */ |
@@ -367,7 +363,8 @@ TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot) | |||
367 | IRRef2 op12 = IRREF2((IRRef1)key, (IRRef1)slot); | 363 | IRRef2 op12 = IRREF2((IRRef1)key, (IRRef1)slot); |
368 | IRRef ref; | 364 | IRRef ref; |
369 | /* Const part is not touched by CSE/DCE, so 0-65535 is ok for IRMlit here. */ | 365 | /* Const part is not touched by CSE/DCE, so 0-65535 is ok for IRMlit here. */ |
370 | lua_assert(tref_isk(key) && slot == (IRRef)(IRRef1)slot); | 366 | lj_assertJ(tref_isk(key) && slot == (IRRef)(IRRef1)slot, |
367 | "out-of-range key/slot"); | ||
371 | for (ref = J->chain[IR_KSLOT]; ref; ref = cir[ref].prev) | 368 | for (ref = J->chain[IR_KSLOT]; ref; ref = cir[ref].prev) |
372 | if (cir[ref].op12 == op12) | 369 | if (cir[ref].op12 == op12) |
373 | goto found; | 370 | goto found; |
@@ -388,14 +385,15 @@ found: | |||
388 | void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) | 385 | void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) |
389 | { | 386 | { |
390 | UNUSED(L); | 387 | UNUSED(L); |
391 | lua_assert(ir->o != IR_KSLOT); /* Common mistake. */ | 388 | lj_assertL(ir->o != IR_KSLOT, "unexpected KSLOT"); /* Common mistake. */ |
392 | switch (ir->o) { | 389 | switch (ir->o) { |
393 | case IR_KPRI: setitype(tv, irt_toitype(ir->t)); break; | 390 | case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break; |
394 | case IR_KINT: setintV(tv, ir->i); break; | 391 | case IR_KINT: setintV(tv, ir->i); break; |
395 | case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break; | 392 | case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break; |
396 | case IR_KPTR: case IR_KKPTR: case IR_KNULL: | 393 | case IR_KPTR: case IR_KKPTR: |
397 | setlightudV(tv, mref(ir->ptr, void)); | 394 | setnumV(tv, (lua_Number)(uintptr_t)ir_kptr(ir)); |
398 | break; | 395 | break; |
396 | case IR_KNULL: setintV(tv, 0); break; | ||
399 | case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break; | 397 | case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break; |
400 | #if LJ_HASFFI | 398 | #if LJ_HASFFI |
401 | case IR_KINT64: { | 399 | case IR_KINT64: { |
@@ -405,7 +403,7 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) | |||
405 | break; | 403 | break; |
406 | } | 404 | } |
407 | #endif | 405 | #endif |
408 | default: lua_assert(0); break; | 406 | default: lj_assertL(0, "bad IR constant op %d", ir->o); break; |
409 | } | 407 | } |
410 | } | 408 | } |
411 | 409 | ||
@@ -443,7 +441,8 @@ TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr) | |||
443 | if (!tref_isstr(tr)) { | 441 | if (!tref_isstr(tr)) { |
444 | if (!tref_isnumber(tr)) | 442 | if (!tref_isnumber(tr)) |
445 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 443 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
446 | tr = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); | 444 | tr = emitir(IRT(IR_TOSTR, IRT_STR), tr, |
445 | tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT); | ||
447 | } | 446 | } |
448 | return tr; | 447 | return tr; |
449 | } | 448 | } |
@@ -464,7 +463,7 @@ int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op) | |||
464 | case IR_UGE: return !(a < b); | 463 | case IR_UGE: return !(a < b); |
465 | case IR_ULE: return !(a > b); | 464 | case IR_ULE: return !(a > b); |
466 | case IR_UGT: return !(a <= b); | 465 | case IR_UGT: return !(a <= b); |
467 | default: lua_assert(0); return 0; | 466 | default: lj_assertX(0, "bad IR op %d", op); return 0; |
468 | } | 467 | } |
469 | } | 468 | } |
470 | 469 | ||
@@ -477,7 +476,7 @@ int lj_ir_strcmp(GCstr *a, GCstr *b, IROp op) | |||
477 | case IR_GE: return (res >= 0); | 476 | case IR_GE: return (res >= 0); |
478 | case IR_LE: return (res <= 0); | 477 | case IR_LE: return (res <= 0); |
479 | case IR_GT: return (res > 0); | 478 | case IR_GT: return (res > 0); |
480 | default: lua_assert(0); return 0; | 479 | default: lj_assertX(0, "bad IR op %d", op); return 0; |
481 | } | 480 | } |
482 | } | 481 | } |
483 | 482 | ||
diff --git a/src/lj_ir.h b/src/lj_ir.h index da73a4b7..ed492e93 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
@@ -40,6 +40,7 @@ | |||
40 | _(USE, S , ref, ___) \ | 40 | _(USE, S , ref, ___) \ |
41 | _(PHI, S , ref, ref) \ | 41 | _(PHI, S , ref, ref) \ |
42 | _(RENAME, S , ref, lit) \ | 42 | _(RENAME, S , ref, lit) \ |
43 | _(PROF, S , ___, ___) \ | ||
43 | \ | 44 | \ |
44 | /* Constants. */ \ | 45 | /* Constants. */ \ |
45 | _(KPRI, N , ___, ___) \ | 46 | _(KPRI, N , ___, ___) \ |
@@ -74,7 +75,6 @@ | |||
74 | _(NEG, N , ref, ref) \ | 75 | _(NEG, N , ref, ref) \ |
75 | \ | 76 | \ |
76 | _(ABS, N , ref, ref) \ | 77 | _(ABS, N , ref, ref) \ |
77 | _(ATAN2, N , ref, ref) \ | ||
78 | _(LDEXP, N , ref, ref) \ | 78 | _(LDEXP, N , ref, ref) \ |
79 | _(MIN, C , ref, ref) \ | 79 | _(MIN, C , ref, ref) \ |
80 | _(MAX, C , ref, ref) \ | 80 | _(MAX, C , ref, ref) \ |
@@ -95,7 +95,9 @@ | |||
95 | _(UREFO, LW, ref, lit) \ | 95 | _(UREFO, LW, ref, lit) \ |
96 | _(UREFC, LW, ref, lit) \ | 96 | _(UREFC, LW, ref, lit) \ |
97 | _(FREF, R , ref, lit) \ | 97 | _(FREF, R , ref, lit) \ |
98 | _(TMPREF, S , ref, lit) \ | ||
98 | _(STRREF, N , ref, ref) \ | 99 | _(STRREF, N , ref, ref) \ |
100 | _(LREF, L , ___, ___) \ | ||
99 | \ | 101 | \ |
100 | /* Loads and Stores. These must be in the same order. */ \ | 102 | /* Loads and Stores. These must be in the same order. */ \ |
101 | _(ALOAD, L , ref, ___) \ | 103 | _(ALOAD, L , ref, ___) \ |
@@ -104,7 +106,8 @@ | |||
104 | _(FLOAD, L , ref, lit) \ | 106 | _(FLOAD, L , ref, lit) \ |
105 | _(XLOAD, L , ref, lit) \ | 107 | _(XLOAD, L , ref, lit) \ |
106 | _(SLOAD, L , lit, lit) \ | 108 | _(SLOAD, L , lit, lit) \ |
107 | _(VLOAD, L , ref, ___) \ | 109 | _(VLOAD, L , ref, lit) \ |
110 | _(ALEN, L , ref, ref) \ | ||
108 | \ | 111 | \ |
109 | _(ASTORE, S , ref, ref) \ | 112 | _(ASTORE, S , ref, ref) \ |
110 | _(HSTORE, S , ref, ref) \ | 113 | _(HSTORE, S , ref, ref) \ |
@@ -120,6 +123,11 @@ | |||
120 | _(CNEW, AW, ref, ref) \ | 123 | _(CNEW, AW, ref, ref) \ |
121 | _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ | 124 | _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ |
122 | \ | 125 | \ |
126 | /* Buffer operations. */ \ | ||
127 | _(BUFHDR, L , ref, lit) \ | ||
128 | _(BUFPUT, LW, ref, ref) \ | ||
129 | _(BUFSTR, AW, ref, ref) \ | ||
130 | \ | ||
123 | /* Barriers. */ \ | 131 | /* Barriers. */ \ |
124 | _(TBAR, S , ref, ___) \ | 132 | _(TBAR, S , ref, ___) \ |
125 | _(OBAR, S , ref, ref) \ | 133 | _(OBAR, S , ref, ref) \ |
@@ -128,12 +136,13 @@ | |||
128 | /* Type conversions. */ \ | 136 | /* Type conversions. */ \ |
129 | _(CONV, N , ref, lit) \ | 137 | _(CONV, N , ref, lit) \ |
130 | _(TOBIT, N , ref, ref) \ | 138 | _(TOBIT, N , ref, ref) \ |
131 | _(TOSTR, N , ref, ___) \ | 139 | _(TOSTR, N , ref, lit) \ |
132 | _(STRTO, N , ref, ___) \ | 140 | _(STRTO, N , ref, ___) \ |
133 | \ | 141 | \ |
134 | /* Calls. */ \ | 142 | /* Calls. */ \ |
135 | _(CALLN, N , ref, lit) \ | 143 | _(CALLN, NW, ref, lit) \ |
136 | _(CALLL, L , ref, lit) \ | 144 | _(CALLA, AW, ref, lit) \ |
145 | _(CALLL, LW, ref, lit) \ | ||
137 | _(CALLS, S , ref, lit) \ | 146 | _(CALLS, S , ref, lit) \ |
138 | _(CALLXS, S , ref, ref) \ | 147 | _(CALLXS, S , ref, ref) \ |
139 | _(CARG, N , ref, ref) \ | 148 | _(CARG, N , ref, ref) \ |
@@ -170,8 +179,7 @@ LJ_STATIC_ASSERT((int)IR_XLOAD + IRDELTA_L2S == (int)IR_XSTORE); | |||
170 | /* FPMATH sub-functions. ORDER FPM. */ | 179 | /* FPMATH sub-functions. ORDER FPM. */ |
171 | #define IRFPMDEF(_) \ | 180 | #define IRFPMDEF(_) \ |
172 | _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \ | 181 | _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \ |
173 | _(SQRT) _(EXP) _(EXP2) _(LOG) _(LOG2) _(LOG10) \ | 182 | _(SQRT) _(LOG) _(LOG2) \ |
174 | _(SIN) _(COS) _(TAN) \ | ||
175 | _(OTHER) | 183 | _(OTHER) |
176 | 184 | ||
177 | typedef enum { | 185 | typedef enum { |
@@ -186,6 +194,8 @@ IRFPMDEF(FPMENUM) | |||
186 | _(STR_LEN, offsetof(GCstr, len)) \ | 194 | _(STR_LEN, offsetof(GCstr, len)) \ |
187 | _(FUNC_ENV, offsetof(GCfunc, l.env)) \ | 195 | _(FUNC_ENV, offsetof(GCfunc, l.env)) \ |
188 | _(FUNC_PC, offsetof(GCfunc, l.pc)) \ | 196 | _(FUNC_PC, offsetof(GCfunc, l.pc)) \ |
197 | _(FUNC_FFID, offsetof(GCfunc, l.ffid)) \ | ||
198 | _(THREAD_ENV, offsetof(lua_State, env)) \ | ||
189 | _(TAB_META, offsetof(GCtab, metatable)) \ | 199 | _(TAB_META, offsetof(GCtab, metatable)) \ |
190 | _(TAB_ARRAY, offsetof(GCtab, array)) \ | 200 | _(TAB_ARRAY, offsetof(GCtab, array)) \ |
191 | _(TAB_NODE, offsetof(GCtab, node)) \ | 201 | _(TAB_NODE, offsetof(GCtab, node)) \ |
@@ -195,9 +205,15 @@ IRFPMDEF(FPMENUM) | |||
195 | _(UDATA_META, offsetof(GCudata, metatable)) \ | 205 | _(UDATA_META, offsetof(GCudata, metatable)) \ |
196 | _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \ | 206 | _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \ |
197 | _(UDATA_FILE, sizeof(GCudata)) \ | 207 | _(UDATA_FILE, sizeof(GCudata)) \ |
208 | _(SBUF_W, sizeof(GCudata) + offsetof(SBufExt, w)) \ | ||
209 | _(SBUF_E, sizeof(GCudata) + offsetof(SBufExt, e)) \ | ||
210 | _(SBUF_B, sizeof(GCudata) + offsetof(SBufExt, b)) \ | ||
211 | _(SBUF_L, sizeof(GCudata) + offsetof(SBufExt, L)) \ | ||
212 | _(SBUF_REF, sizeof(GCudata) + offsetof(SBufExt, cowref)) \ | ||
213 | _(SBUF_R, sizeof(GCudata) + offsetof(SBufExt, r)) \ | ||
198 | _(CDATA_CTYPEID, offsetof(GCcdata, ctypeid)) \ | 214 | _(CDATA_CTYPEID, offsetof(GCcdata, ctypeid)) \ |
199 | _(CDATA_PTR, sizeof(GCcdata)) \ | 215 | _(CDATA_PTR, sizeof(GCcdata)) \ |
200 | _(CDATA_INT, sizeof(GCcdata)) \ | 216 | _(CDATA_INT, sizeof(GCcdata)) \ |
201 | _(CDATA_INT64, sizeof(GCcdata)) \ | 217 | _(CDATA_INT64, sizeof(GCcdata)) \ |
202 | _(CDATA_INT64_4, sizeof(GCcdata) + 4) | 218 | _(CDATA_INT64_4, sizeof(GCcdata) + 4) |
203 | 219 | ||
@@ -208,18 +224,29 @@ IRFLDEF(FLENUM) | |||
208 | IRFL__MAX | 224 | IRFL__MAX |
209 | } IRFieldID; | 225 | } IRFieldID; |
210 | 226 | ||
227 | /* TMPREF mode bits, stored in op2. */ | ||
228 | #define IRTMPREF_IN1 0x01 /* First input value. */ | ||
229 | #define IRTMPREF_OUT1 0x02 /* First output value. */ | ||
230 | #define IRTMPREF_OUT2 0x04 /* Second output value. */ | ||
231 | |||
211 | /* SLOAD mode bits, stored in op2. */ | 232 | /* SLOAD mode bits, stored in op2. */ |
212 | #define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */ | 233 | #define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */ |
213 | #define IRSLOAD_FRAME 0x02 /* Load hiword of frame. */ | 234 | #define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */ |
214 | #define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */ | 235 | #define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */ |
215 | #define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ | 236 | #define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ |
216 | #define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ | 237 | #define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ |
217 | #define IRSLOAD_INHERIT 0x20 /* Inherited by exits/side traces. */ | 238 | #define IRSLOAD_INHERIT 0x20 /* Inherited by exits/side traces. */ |
239 | #define IRSLOAD_KEYINDEX 0x40 /* Table traversal key index. */ | ||
218 | 240 | ||
219 | /* XLOAD mode, stored in op2. */ | 241 | /* XLOAD mode bits, stored in op2. */ |
220 | #define IRXLOAD_READONLY 1 /* Load from read-only data. */ | 242 | #define IRXLOAD_READONLY 0x01 /* Load from read-only data. */ |
221 | #define IRXLOAD_VOLATILE 2 /* Load from volatile data. */ | 243 | #define IRXLOAD_VOLATILE 0x02 /* Load from volatile data. */ |
222 | #define IRXLOAD_UNALIGNED 4 /* Unaligned load. */ | 244 | #define IRXLOAD_UNALIGNED 0x04 /* Unaligned load. */ |
245 | |||
246 | /* BUFHDR mode, stored in op2. */ | ||
247 | #define IRBUFHDR_RESET 0 /* Reset buffer. */ | ||
248 | #define IRBUFHDR_APPEND 1 /* Append to buffer. */ | ||
249 | #define IRBUFHDR_WRITE 2 /* Write to string buffer. */ | ||
223 | 250 | ||
224 | /* CONV mode, stored in op2. */ | 251 | /* CONV mode, stored in op2. */ |
225 | #define IRCONV_SRCMASK 0x001f /* Source IRType. */ | 252 | #define IRCONV_SRCMASK 0x001f /* Source IRType. */ |
@@ -227,7 +254,6 @@ IRFLDEF(FLENUM) | |||
227 | #define IRCONV_DSH 5 | 254 | #define IRCONV_DSH 5 |
228 | #define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT) | 255 | #define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT) |
229 | #define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM) | 256 | #define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM) |
230 | #define IRCONV_TRUNC 0x0400 /* Truncate number to integer. */ | ||
231 | #define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */ | 257 | #define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */ |
232 | #define IRCONV_MODEMASK 0x0fff | 258 | #define IRCONV_MODEMASK 0x0fff |
233 | #define IRCONV_CONVMASK 0xf000 | 259 | #define IRCONV_CONVMASK 0xf000 |
@@ -237,6 +263,12 @@ IRFLDEF(FLENUM) | |||
237 | #define IRCONV_ANY (1<<IRCONV_CSH) /* Any FP number is ok. */ | 263 | #define IRCONV_ANY (1<<IRCONV_CSH) /* Any FP number is ok. */ |
238 | #define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */ | 264 | #define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */ |
239 | #define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */ | 265 | #define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */ |
266 | #define IRCONV_NONE IRCONV_ANY /* INT|*64 no conv, but change type. */ | ||
267 | |||
268 | /* TOSTR mode, stored in op2. */ | ||
269 | #define IRTOSTR_INT 0 /* Convert integer to string. */ | ||
270 | #define IRTOSTR_NUM 1 /* Convert number to string. */ | ||
271 | #define IRTOSTR_CHAR 2 /* Convert char value to string. */ | ||
240 | 272 | ||
241 | /* -- IR operands --------------------------------------------------------- */ | 273 | /* -- IR operands --------------------------------------------------------- */ |
242 | 274 | ||
@@ -276,7 +308,9 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; | |||
276 | 308 | ||
277 | /* -- IR instruction types ------------------------------------------------ */ | 309 | /* -- IR instruction types ------------------------------------------------ */ |
278 | 310 | ||
279 | /* Map of itypes to non-negative numbers. ORDER LJ_T. | 311 | #define IRTSIZE_PGC (LJ_GC64 ? 8 : 4) |
312 | |||
313 | /* Map of itypes to non-negative numbers and their sizes. ORDER LJ_T. | ||
280 | ** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for | 314 | ** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for |
281 | ** IRT_P32 and IRT_P64, which never escape the IR. | 315 | ** IRT_P32 and IRT_P64, which never escape the IR. |
282 | ** The various integers are only used in the IR and can only escape to | 316 | ** The various integers are only used in the IR and can only escape to |
@@ -284,12 +318,13 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; | |||
284 | ** contiguous and next to IRT_NUM (see the typerange macros below). | 318 | ** contiguous and next to IRT_NUM (see the typerange macros below). |
285 | */ | 319 | */ |
286 | #define IRTDEF(_) \ | 320 | #define IRTDEF(_) \ |
287 | _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) _(STR, 4) \ | 321 | _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) \ |
288 | _(P32, 4) _(THREAD, 4) _(PROTO, 4) _(FUNC, 4) _(P64, 8) _(CDATA, 4) \ | 322 | _(STR, IRTSIZE_PGC) _(P32, 4) _(THREAD, IRTSIZE_PGC) _(PROTO, IRTSIZE_PGC) \ |
289 | _(TAB, 4) _(UDATA, 4) \ | 323 | _(FUNC, IRTSIZE_PGC) _(P64, 8) _(CDATA, IRTSIZE_PGC) _(TAB, IRTSIZE_PGC) \ |
324 | _(UDATA, IRTSIZE_PGC) \ | ||
290 | _(FLOAT, 4) _(NUM, 8) _(I8, 1) _(U8, 1) _(I16, 2) _(U16, 2) \ | 325 | _(FLOAT, 4) _(NUM, 8) _(I8, 1) _(U8, 1) _(I16, 2) _(U16, 2) \ |
291 | _(INT, 4) _(U32, 4) _(I64, 8) _(U64, 8) \ | 326 | _(INT, 4) _(U32, 4) _(I64, 8) _(U64, 8) \ |
292 | _(SOFTFP, 4) /* There is room for 9 more types. */ | 327 | _(SOFTFP, 4) /* There is room for 8 more types. */ |
293 | 328 | ||
294 | /* IR result type and flags (8 bit). */ | 329 | /* IR result type and flags (8 bit). */ |
295 | typedef enum { | 330 | typedef enum { |
@@ -300,6 +335,8 @@ IRTDEF(IRTENUM) | |||
300 | 335 | ||
301 | /* Native pointer type and the corresponding integer type. */ | 336 | /* Native pointer type and the corresponding integer type. */ |
302 | IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32, | 337 | IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32, |
338 | IRT_PGC = LJ_GC64 ? IRT_P64 : IRT_P32, | ||
339 | IRT_IGC = LJ_GC64 ? IRT_I64 : IRT_INT, | ||
303 | IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT, | 340 | IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT, |
304 | IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32, | 341 | IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32, |
305 | 342 | ||
@@ -353,7 +390,14 @@ typedef struct IRType1 { uint8_t irt; } IRType1; | |||
353 | #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) | 390 | #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) |
354 | #define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) | 391 | #define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) |
355 | 392 | ||
356 | #if LJ_64 | 393 | #if LJ_GC64 |
394 | /* Include IRT_NIL, so IR(ASMREF_L) (aka REF_NIL) is considered 64 bit. */ | ||
395 | #define IRT_IS64 \ | ||
396 | ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|\ | ||
397 | (1u<<IRT_LIGHTUD)|(1u<<IRT_STR)|(1u<<IRT_THREAD)|(1u<<IRT_PROTO)|\ | ||
398 | (1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA)|\ | ||
399 | (1u<<IRT_NIL)) | ||
400 | #elif LJ_64 | ||
357 | #define IRT_IS64 \ | 401 | #define IRT_IS64 \ |
358 | ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD)) | 402 | ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD)) |
359 | #else | 403 | #else |
@@ -374,7 +418,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv) | |||
374 | return IRT_INT; | 418 | return IRT_INT; |
375 | else if (tvisnum(tv)) | 419 | else if (tvisnum(tv)) |
376 | return IRT_NUM; | 420 | return IRT_NUM; |
377 | #if LJ_64 | 421 | #if LJ_64 && !LJ_GC64 |
378 | else if (tvislightud(tv)) | 422 | else if (tvislightud(tv)) |
379 | return IRT_LIGHTUD; | 423 | return IRT_LIGHTUD; |
380 | #endif | 424 | #endif |
@@ -384,11 +428,12 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv) | |||
384 | 428 | ||
385 | static LJ_AINLINE uint32_t irt_toitype_(IRType t) | 429 | static LJ_AINLINE uint32_t irt_toitype_(IRType t) |
386 | { | 430 | { |
387 | lua_assert(!LJ_64 || t != IRT_LIGHTUD); | 431 | lj_assertX(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD, |
432 | "no plain type tag for lightuserdata"); | ||
388 | if (LJ_DUALNUM && t > IRT_NUM) { | 433 | if (LJ_DUALNUM && t > IRT_NUM) { |
389 | return LJ_TISNUM; | 434 | return LJ_TISNUM; |
390 | } else { | 435 | } else { |
391 | lua_assert(t <= IRT_NUM); | 436 | lj_assertX(t <= IRT_NUM, "no plain type tag for IR type %d", t); |
392 | return ~(uint32_t)t; | 437 | return ~(uint32_t)t; |
393 | } | 438 | } |
394 | } | 439 | } |
@@ -451,6 +496,7 @@ typedef uint32_t TRef; | |||
451 | #define TREF_REFMASK 0x0000ffff | 496 | #define TREF_REFMASK 0x0000ffff |
452 | #define TREF_FRAME 0x00010000 | 497 | #define TREF_FRAME 0x00010000 |
453 | #define TREF_CONT 0x00020000 | 498 | #define TREF_CONT 0x00020000 |
499 | #define TREF_KEYINDEX 0x00100000 | ||
454 | 500 | ||
455 | #define TREF(ref, t) ((TRef)((ref) + ((t)<<24))) | 501 | #define TREF(ref, t) ((TRef)((ref) + ((t)<<24))) |
456 | 502 | ||
@@ -464,6 +510,7 @@ typedef uint32_t TRef; | |||
464 | #define tref_isnil(tr) (tref_istype((tr), IRT_NIL)) | 510 | #define tref_isnil(tr) (tref_istype((tr), IRT_NIL)) |
465 | #define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE)) | 511 | #define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE)) |
466 | #define tref_istrue(tr) (tref_istype((tr), IRT_TRUE)) | 512 | #define tref_istrue(tr) (tref_istype((tr), IRT_TRUE)) |
513 | #define tref_islightud(tr) (tref_istype((tr), IRT_LIGHTUD)) | ||
467 | #define tref_isstr(tr) (tref_istype((tr), IRT_STR)) | 514 | #define tref_isstr(tr) (tref_istype((tr), IRT_STR)) |
468 | #define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC)) | 515 | #define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC)) |
469 | #define tref_iscdata(tr) (tref_istype((tr), IRT_CDATA)) | 516 | #define tref_iscdata(tr) (tref_istype((tr), IRT_CDATA)) |
@@ -496,7 +543,9 @@ typedef uint32_t TRef; | |||
496 | ** +-------+-------+---+---+---+---+ | 543 | ** +-------+-------+---+---+---+---+ |
497 | ** | op1 | op2 | t | o | r | s | | 544 | ** | op1 | op2 | t | o | r | s | |
498 | ** +-------+-------+---+---+---+---+ | 545 | ** +-------+-------+---+---+---+---+ |
499 | ** | op12/i/gco | ot | prev | (alternative fields in union) | 546 | ** | op12/i/gco32 | ot | prev | (alternative fields in union) |
547 | ** +-------+-------+---+---+---+---+ | ||
548 | ** | TValue/gco64 | (2nd IR slot for 64 bit constants) | ||
500 | ** +---------------+-------+-------+ | 549 | ** +---------------+-------+-------+ |
501 | ** 32 16 16 | 550 | ** 32 16 16 |
502 | ** | 551 | ** |
@@ -524,21 +573,27 @@ typedef union IRIns { | |||
524 | ) | 573 | ) |
525 | }; | 574 | }; |
526 | int32_t i; /* 32 bit signed integer literal (overlaps op12). */ | 575 | int32_t i; /* 32 bit signed integer literal (overlaps op12). */ |
527 | GCRef gcr; /* GCobj constant (overlaps op12). */ | 576 | GCRef gcr; /* GCobj constant (overlaps op12 or entire slot). */ |
528 | MRef ptr; /* Pointer constant (overlaps op12). */ | 577 | MRef ptr; /* Pointer constant (overlaps op12 or entire slot). */ |
578 | TValue tv; /* TValue constant (overlaps entire slot). */ | ||
529 | } IRIns; | 579 | } IRIns; |
530 | 580 | ||
531 | #define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)->gcr)) | 581 | #define ir_isk64(ir) \ |
582 | ((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \ | ||
583 | (LJ_GC64 && \ | ||
584 | ((ir)->o == IR_KGC || (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR))) | ||
585 | |||
586 | #define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr)) | ||
532 | #define ir_kstr(ir) (gco2str(ir_kgc((ir)))) | 587 | #define ir_kstr(ir) (gco2str(ir_kgc((ir)))) |
533 | #define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) | 588 | #define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) |
534 | #define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) | 589 | #define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) |
535 | #define ir_kcdata(ir) (gco2cd(ir_kgc((ir)))) | 590 | #define ir_kcdata(ir) (gco2cd(ir_kgc((ir)))) |
536 | #define ir_knum(ir) check_exp((ir)->o == IR_KNUM, mref((ir)->ptr, cTValue)) | 591 | #define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv) |
537 | #define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) | 592 | #define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv) |
538 | #define ir_k64(ir) \ | 593 | #define ir_k64(ir) check_exp(ir_isk64(ir), &(ir)[1].tv) |
539 | check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) | ||
540 | #define ir_kptr(ir) \ | 594 | #define ir_kptr(ir) \ |
541 | check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void)) | 595 | check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \ |
596 | mref((ir)[LJ_GC64].ptr, void)) | ||
542 | 597 | ||
543 | /* A store or any other op with a non-weak guard has a side-effect. */ | 598 | /* A store or any other op with a non-weak guard has a side-effect. */ |
544 | static LJ_AINLINE int ir_sideeff(IRIns *ir) | 599 | static LJ_AINLINE int ir_sideeff(IRIns *ir) |
diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 9ddfb156..7fc3d1fd 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h | |||
@@ -16,22 +16,26 @@ typedef struct CCallInfo { | |||
16 | uint32_t flags; /* Number of arguments and flags. */ | 16 | uint32_t flags; /* Number of arguments and flags. */ |
17 | } CCallInfo; | 17 | } CCallInfo; |
18 | 18 | ||
19 | #define CCI_NARGS(ci) ((ci)->flags & 0xff) /* Extract # of args. */ | 19 | #define CCI_NARGS(ci) ((ci)->flags & 0xff) /* # of args. */ |
20 | #define CCI_NARGS_MAX 32 /* Max. # of args. */ | 20 | #define CCI_NARGS_MAX 32 /* Max. # of args. */ |
21 | 21 | ||
22 | #define CCI_OTSHIFT 16 | 22 | #define CCI_OTSHIFT 16 |
23 | #define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */ | 23 | #define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */ |
24 | #define CCI_TYPE(ci) (((ci)->flags>>CCI_OTSHIFT) & IRT_TYPE) | ||
24 | #define CCI_OPSHIFT 24 | 25 | #define CCI_OPSHIFT 24 |
25 | #define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */ | 26 | #define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */ |
26 | 27 | ||
27 | #define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT) | 28 | #define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT) |
29 | #define CCI_CALL_A (IR_CALLA << CCI_OPSHIFT) | ||
28 | #define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT) | 30 | #define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT) |
29 | #define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT) | 31 | #define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT) |
30 | #define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL) | 32 | #define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL) |
33 | #define CCI_CALL_FA (CCI_CALL_A|CCI_CC_FASTCALL) | ||
31 | #define CCI_CALL_FL (CCI_CALL_L|CCI_CC_FASTCALL) | 34 | #define CCI_CALL_FL (CCI_CALL_L|CCI_CC_FASTCALL) |
32 | #define CCI_CALL_FS (CCI_CALL_S|CCI_CC_FASTCALL) | 35 | #define CCI_CALL_FS (CCI_CALL_S|CCI_CC_FASTCALL) |
33 | 36 | ||
34 | /* C call info flags. */ | 37 | /* C call info flags. */ |
38 | #define CCI_T (IRT_GUARD << CCI_OTSHIFT) /* May throw. */ | ||
35 | #define CCI_L 0x0100 /* Implicit L arg. */ | 39 | #define CCI_L 0x0100 /* Implicit L arg. */ |
36 | #define CCI_CASTU64 0x0200 /* Cast u64 result to number. */ | 40 | #define CCI_CASTU64 0x0200 /* Cast u64 result to number. */ |
37 | #define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */ | 41 | #define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */ |
@@ -45,6 +49,17 @@ typedef struct CCallInfo { | |||
45 | #define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */ | 49 | #define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */ |
46 | #define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */ | 50 | #define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */ |
47 | 51 | ||
52 | /* Extra args for SOFTFP, SPLIT 64 bit. */ | ||
53 | #define CCI_XARGS_SHIFT 14 | ||
54 | #define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3) | ||
55 | #define CCI_XA (1u << CCI_XARGS_SHIFT) | ||
56 | |||
57 | #if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) | ||
58 | #define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci))) | ||
59 | #else | ||
60 | #define CCI_XNARGS(ci) CCI_NARGS((ci)) | ||
61 | #endif | ||
62 | |||
48 | /* Helpers for conditional function definitions. */ | 63 | /* Helpers for conditional function definitions. */ |
49 | #define IRCALLCOND_ANY(x) x | 64 | #define IRCALLCOND_ANY(x) x |
50 | 65 | ||
@@ -66,6 +81,18 @@ typedef struct CCallInfo { | |||
66 | #define IRCALLCOND_SOFTFP_FFI(x) NULL | 81 | #define IRCALLCOND_SOFTFP_FFI(x) NULL |
67 | #endif | 82 | #endif |
68 | 83 | ||
84 | #if LJ_SOFTFP && LJ_TARGET_MIPS | ||
85 | #define IRCALLCOND_SOFTFP_MIPS(x) x | ||
86 | #else | ||
87 | #define IRCALLCOND_SOFTFP_MIPS(x) NULL | ||
88 | #endif | ||
89 | |||
90 | #if LJ_SOFTFP && LJ_TARGET_MIPS64 | ||
91 | #define IRCALLCOND_SOFTFP_MIPS64(x) x | ||
92 | #else | ||
93 | #define IRCALLCOND_SOFTFP_MIPS64(x) NULL | ||
94 | #endif | ||
95 | |||
69 | #define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS) | 96 | #define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS) |
70 | 97 | ||
71 | #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) | 98 | #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) |
@@ -86,93 +113,159 @@ typedef struct CCallInfo { | |||
86 | #define IRCALLCOND_FFI32(x) NULL | 113 | #define IRCALLCOND_FFI32(x) NULL |
87 | #endif | 114 | #endif |
88 | 115 | ||
116 | #if LJ_HASBUFFER | ||
117 | #define IRCALLCOND_BUFFER(x) x | ||
118 | #else | ||
119 | #define IRCALLCOND_BUFFER(x) NULL | ||
120 | #endif | ||
121 | |||
122 | #if LJ_HASBUFFER && LJ_HASFFI | ||
123 | #define IRCALLCOND_BUFFFI(x) x | ||
124 | #else | ||
125 | #define IRCALLCOND_BUFFFI(x) NULL | ||
126 | #endif | ||
127 | |||
89 | #if LJ_SOFTFP | 128 | #if LJ_SOFTFP |
90 | #define ARG1_FP 2 /* Treat as 2 32 bit arguments. */ | 129 | #define XA_FP CCI_XA |
130 | #define XA2_FP (CCI_XA+CCI_XA) | ||
91 | #else | 131 | #else |
92 | #define ARG1_FP 1 | 132 | #define XA_FP 0 |
133 | #define XA2_FP 0 | ||
134 | #endif | ||
135 | |||
136 | #if LJ_SOFTFP32 | ||
137 | #define XA_FP32 CCI_XA | ||
138 | #define XA2_FP32 (CCI_XA+CCI_XA) | ||
139 | #else | ||
140 | #define XA_FP32 0 | ||
141 | #define XA2_FP32 0 | ||
93 | #endif | 142 | #endif |
94 | 143 | ||
95 | #if LJ_32 | 144 | #if LJ_32 |
96 | #define ARG2_64 4 /* Treat as 4 32 bit arguments. */ | 145 | #define XA_64 CCI_XA |
146 | #define XA2_64 (CCI_XA+CCI_XA) | ||
97 | #else | 147 | #else |
98 | #define ARG2_64 2 | 148 | #define XA_64 0 |
149 | #define XA2_64 0 | ||
99 | #endif | 150 | #endif |
100 | 151 | ||
101 | /* Function definitions for CALL* instructions. */ | 152 | /* Function definitions for CALL* instructions. */ |
102 | #define IRCALLDEF(_) \ | 153 | #define IRCALLDEF(_) \ |
103 | _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ | 154 | _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ |
104 | _(ANY, lj_str_new, 3, S, STR, CCI_L) \ | 155 | _(ANY, lj_str_find, 4, N, PGC, 0) \ |
156 | _(ANY, lj_str_new, 3, S, STR, CCI_L|CCI_T) \ | ||
105 | _(ANY, lj_strscan_num, 2, FN, INT, 0) \ | 157 | _(ANY, lj_strscan_num, 2, FN, INT, 0) \ |
106 | _(ANY, lj_str_fromint, 2, FN, STR, CCI_L) \ | 158 | _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L|CCI_T) \ |
107 | _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \ | 159 | _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L|CCI_T) \ |
108 | _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ | 160 | _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L|CCI_T) \ |
109 | _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ | 161 | _(ANY, lj_strfmt_putint, 2, FL, PGC, CCI_T) \ |
110 | _(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \ | 162 | _(ANY, lj_strfmt_putnum, 2, FL, PGC, CCI_T) \ |
163 | _(ANY, lj_strfmt_putquoted, 2, FL, PGC, CCI_T) \ | ||
164 | _(ANY, lj_strfmt_putfxint, 3, L, PGC, XA_64|CCI_T) \ | ||
165 | _(ANY, lj_strfmt_putfnum_int, 3, L, PGC, XA_FP|CCI_T) \ | ||
166 | _(ANY, lj_strfmt_putfnum_uint, 3, L, PGC, XA_FP|CCI_T) \ | ||
167 | _(ANY, lj_strfmt_putfnum, 3, L, PGC, XA_FP|CCI_T) \ | ||
168 | _(ANY, lj_strfmt_putfstr, 3, L, PGC, CCI_T) \ | ||
169 | _(ANY, lj_strfmt_putfchar, 3, L, PGC, CCI_T) \ | ||
170 | _(ANY, lj_buf_putmem, 3, S, PGC, CCI_T) \ | ||
171 | _(ANY, lj_buf_putstr, 2, FL, PGC, CCI_T) \ | ||
172 | _(ANY, lj_buf_putchar, 2, FL, PGC, CCI_T) \ | ||
173 | _(ANY, lj_buf_putstr_reverse, 2, FL, PGC, CCI_T) \ | ||
174 | _(ANY, lj_buf_putstr_lower, 2, FL, PGC, CCI_T) \ | ||
175 | _(ANY, lj_buf_putstr_upper, 2, FL, PGC, CCI_T) \ | ||
176 | _(ANY, lj_buf_putstr_rep, 3, L, PGC, CCI_T) \ | ||
177 | _(ANY, lj_buf_puttab, 5, L, PGC, CCI_T) \ | ||
178 | _(BUFFER, lj_bufx_set, 4, S, NIL, 0) \ | ||
179 | _(BUFFFI, lj_bufx_more, 2, FS, INT, CCI_T) \ | ||
180 | _(BUFFER, lj_serialize_put, 2, FS, PGC, CCI_T) \ | ||
181 | _(BUFFER, lj_serialize_get, 2, FS, PTR, CCI_T) \ | ||
182 | _(BUFFER, lj_serialize_encode, 2, FA, STR, CCI_L|CCI_T) \ | ||
183 | _(BUFFER, lj_serialize_decode, 3, A, INT, CCI_L|CCI_T) \ | ||
184 | _(ANY, lj_buf_tostr, 1, FL, STR, CCI_T) \ | ||
185 | _(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L|CCI_T) \ | ||
186 | _(ANY, lj_tab_new1, 2, FA, TAB, CCI_L|CCI_T) \ | ||
187 | _(ANY, lj_tab_dup, 2, FA, TAB, CCI_L|CCI_T) \ | ||
188 | _(ANY, lj_tab_clear, 1, FS, NIL, 0) \ | ||
189 | _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L|CCI_T) \ | ||
190 | _(ANY, lj_tab_keyindex, 2, FL, INT, 0) \ | ||
191 | _(ANY, lj_vm_next, 2, FL, PTR, 0) \ | ||
111 | _(ANY, lj_tab_len, 1, FL, INT, 0) \ | 192 | _(ANY, lj_tab_len, 1, FL, INT, 0) \ |
193 | _(ANY, lj_tab_len_hint, 2, FL, INT, 0) \ | ||
112 | _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ | 194 | _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ |
113 | _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ | 195 | _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ |
114 | _(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \ | 196 | _(ANY, lj_mem_newgco, 2, FA, PGC, CCI_L|CCI_T) \ |
115 | _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \ | 197 | _(ANY, lj_prng_u64d, 1, FS, NUM, CCI_CASTU64) \ |
116 | _(ANY, lj_vm_modi, 2, FN, INT, 0) \ | 198 | _(ANY, lj_vm_modi, 2, FN, INT, 0) \ |
117 | _(ANY, sinh, ARG1_FP, N, NUM, 0) \ | 199 | _(ANY, log10, 1, N, NUM, XA_FP) \ |
118 | _(ANY, cosh, ARG1_FP, N, NUM, 0) \ | 200 | _(ANY, exp, 1, N, NUM, XA_FP) \ |
119 | _(ANY, tanh, ARG1_FP, N, NUM, 0) \ | 201 | _(ANY, sin, 1, N, NUM, XA_FP) \ |
120 | _(ANY, fputc, 2, S, INT, 0) \ | 202 | _(ANY, cos, 1, N, NUM, XA_FP) \ |
121 | _(ANY, fwrite, 4, S, INT, 0) \ | 203 | _(ANY, tan, 1, N, NUM, XA_FP) \ |
122 | _(ANY, fflush, 1, S, INT, 0) \ | 204 | _(ANY, asin, 1, N, NUM, XA_FP) \ |
205 | _(ANY, acos, 1, N, NUM, XA_FP) \ | ||
206 | _(ANY, atan, 1, N, NUM, XA_FP) \ | ||
207 | _(ANY, sinh, 1, N, NUM, XA_FP) \ | ||
208 | _(ANY, cosh, 1, N, NUM, XA_FP) \ | ||
209 | _(ANY, tanh, 1, N, NUM, XA_FP) \ | ||
210 | _(ANY, fputc, 2, S, INT, 0) \ | ||
211 | _(ANY, fwrite, 4, S, INT, 0) \ | ||
212 | _(ANY, fflush, 1, S, INT, 0) \ | ||
123 | /* ORDER FPM */ \ | 213 | /* ORDER FPM */ \ |
124 | _(FPMATH, lj_vm_floor, ARG1_FP, N, NUM, 0) \ | 214 | _(FPMATH, lj_vm_floor, 1, N, NUM, XA_FP) \ |
125 | _(FPMATH, lj_vm_ceil, ARG1_FP, N, NUM, 0) \ | 215 | _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \ |
126 | _(FPMATH, lj_vm_trunc, ARG1_FP, N, NUM, 0) \ | 216 | _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \ |
127 | _(FPMATH, sqrt, ARG1_FP, N, NUM, 0) \ | 217 | _(FPMATH, sqrt, 1, N, NUM, XA_FP) \ |
128 | _(FPMATH, exp, ARG1_FP, N, NUM, 0) \ | 218 | _(ANY, log, 1, N, NUM, XA_FP) \ |
129 | _(FPMATH, lj_vm_exp2, ARG1_FP, N, NUM, 0) \ | 219 | _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \ |
130 | _(FPMATH, log, ARG1_FP, N, NUM, 0) \ | 220 | _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \ |
131 | _(FPMATH, lj_vm_log2, ARG1_FP, N, NUM, 0) \ | 221 | _(ANY, pow, 2, N, NUM, XA2_FP) \ |
132 | _(FPMATH, log10, ARG1_FP, N, NUM, 0) \ | 222 | _(ANY, atan2, 2, N, NUM, XA2_FP) \ |
133 | _(FPMATH, sin, ARG1_FP, N, NUM, 0) \ | 223 | _(ANY, ldexp, 2, N, NUM, XA_FP) \ |
134 | _(FPMATH, cos, ARG1_FP, N, NUM, 0) \ | 224 | _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \ |
135 | _(FPMATH, tan, ARG1_FP, N, NUM, 0) \ | 225 | _(SOFTFP, softfp_add, 2, N, NUM, XA2_FP32) \ |
136 | _(FPMATH, lj_vm_powi, ARG1_FP+1, N, NUM, 0) \ | 226 | _(SOFTFP, softfp_sub, 2, N, NUM, XA2_FP32) \ |
137 | _(FPMATH, pow, ARG1_FP*2, N, NUM, 0) \ | 227 | _(SOFTFP, softfp_mul, 2, N, NUM, XA2_FP32) \ |
138 | _(FPMATH, atan2, ARG1_FP*2, N, NUM, 0) \ | 228 | _(SOFTFP, softfp_div, 2, N, NUM, XA2_FP32) \ |
139 | _(FPMATH, ldexp, ARG1_FP+1, N, NUM, 0) \ | 229 | _(SOFTFP, softfp_cmp, 2, N, NIL, XA2_FP32) \ |
140 | _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \ | ||
141 | _(SOFTFP, softfp_add, 4, N, NUM, 0) \ | ||
142 | _(SOFTFP, softfp_sub, 4, N, NUM, 0) \ | ||
143 | _(SOFTFP, softfp_mul, 4, N, NUM, 0) \ | ||
144 | _(SOFTFP, softfp_div, 4, N, NUM, 0) \ | ||
145 | _(SOFTFP, softfp_cmp, 4, N, NIL, 0) \ | ||
146 | _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \ | 230 | _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \ |
147 | _(SOFTFP, softfp_d2i, 2, N, INT, 0) \ | 231 | _(SOFTFP, softfp_d2i, 1, N, INT, XA_FP32) \ |
232 | _(SOFTFP_MIPS, lj_vm_sfmin, 2, N, NUM, XA2_FP32) \ | ||
233 | _(SOFTFP_MIPS, lj_vm_sfmax, 2, N, NUM, XA2_FP32) \ | ||
234 | _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \ | ||
148 | _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ | 235 | _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ |
149 | _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ | 236 | _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ |
150 | _(SOFTFP_FFI, softfp_d2ui, 2, N, INT, 0) \ | 237 | _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \ |
151 | _(SOFTFP_FFI, softfp_d2f, 2, N, FLOAT, 0) \ | 238 | _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \ |
152 | _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ | 239 | _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ |
153 | _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ | 240 | _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ |
154 | _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ | 241 | _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ |
155 | _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \ | 242 | _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \ |
156 | _(FP64_FFI, fp64_l2d, 2, N, NUM, 0) \ | 243 | _(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \ |
157 | _(FP64_FFI, fp64_ul2d, 2, N, NUM, 0) \ | 244 | _(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \ |
158 | _(FP64_FFI, fp64_l2f, 2, N, FLOAT, 0) \ | 245 | _(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \ |
159 | _(FP64_FFI, fp64_ul2f, 2, N, FLOAT, 0) \ | 246 | _(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \ |
160 | _(FP64_FFI, fp64_d2l, ARG1_FP, N, I64, 0) \ | 247 | _(FP64_FFI, fp64_d2l, 1, N, I64, XA_FP) \ |
161 | _(FP64_FFI, fp64_d2ul, ARG1_FP, N, U64, 0) \ | 248 | _(FP64_FFI, fp64_d2ul, 1, N, U64, XA_FP) \ |
162 | _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \ | 249 | _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \ |
163 | _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \ | 250 | _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \ |
164 | _(FFI, lj_carith_divi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ | 251 | _(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ |
165 | _(FFI, lj_carith_divu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ | 252 | _(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \ |
166 | _(FFI, lj_carith_modi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ | 253 | _(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ |
167 | _(FFI, lj_carith_modu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ | 254 | _(FFI, lj_carith_modu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \ |
168 | _(FFI, lj_carith_powi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ | 255 | _(FFI, lj_carith_powi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ |
169 | _(FFI, lj_carith_powu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ | 256 | _(FFI, lj_carith_powu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \ |
170 | _(FFI, lj_cdata_setfin, 2, FN, P32, CCI_L) \ | 257 | _(FFI, lj_cdata_newv, 4, S, CDATA, CCI_L) \ |
171 | _(FFI, strlen, 1, L, INTP, 0) \ | 258 | _(FFI, lj_cdata_setfin, 4, S, NIL, CCI_L) \ |
172 | _(FFI, memcpy, 3, S, PTR, 0) \ | 259 | _(FFI, strlen, 1, L, INTP, 0) \ |
173 | _(FFI, memset, 3, S, PTR, 0) \ | 260 | _(FFI, memcpy, 3, S, PTR, 0) \ |
174 | _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \ | 261 | _(FFI, memset, 3, S, PTR, 0) \ |
175 | _(FFI32, lj_carith_mul64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) | 262 | _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \ |
263 | _(FFI32, lj_carith_mul64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ | ||
264 | _(FFI32, lj_carith_shl64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \ | ||
265 | _(FFI32, lj_carith_shr64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \ | ||
266 | _(FFI32, lj_carith_sar64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \ | ||
267 | _(FFI32, lj_carith_rol64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \ | ||
268 | _(FFI32, lj_carith_ror64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \ | ||
176 | \ | 269 | \ |
177 | /* End of list. */ | 270 | /* End of list. */ |
178 | 271 | ||
@@ -220,6 +313,22 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; | |||
220 | #define fp64_f2l __aeabi_f2lz | 313 | #define fp64_f2l __aeabi_f2lz |
221 | #define fp64_f2ul __aeabi_f2ulz | 314 | #define fp64_f2ul __aeabi_f2ulz |
222 | #endif | 315 | #endif |
316 | #elif LJ_TARGET_MIPS || LJ_TARGET_PPC | ||
317 | #define softfp_add __adddf3 | ||
318 | #define softfp_sub __subdf3 | ||
319 | #define softfp_mul __muldf3 | ||
320 | #define softfp_div __divdf3 | ||
321 | #define softfp_cmp __ledf2 | ||
322 | #define softfp_i2d __floatsidf | ||
323 | #define softfp_d2i __fixdfsi | ||
324 | #define softfp_ui2d __floatunsidf | ||
325 | #define softfp_f2d __extendsfdf2 | ||
326 | #define softfp_d2ui __fixunsdfsi | ||
327 | #define softfp_d2f __truncdfsf2 | ||
328 | #define softfp_i2f __floatsisf | ||
329 | #define softfp_ui2f __floatunsisf | ||
330 | #define softfp_f2i __fixsfsi | ||
331 | #define softfp_f2ui __fixunssfsi | ||
223 | #else | 332 | #else |
224 | #error "Missing soft-float definitions for target architecture" | 333 | #error "Missing soft-float definitions for target architecture" |
225 | #endif | 334 | #endif |
@@ -240,10 +349,14 @@ extern float softfp_ui2f(uint32_t a); | |||
240 | extern int32_t softfp_f2i(float a); | 349 | extern int32_t softfp_f2i(float a); |
241 | extern uint32_t softfp_f2ui(float a); | 350 | extern uint32_t softfp_f2ui(float a); |
242 | #endif | 351 | #endif |
352 | #if LJ_TARGET_MIPS | ||
353 | extern double lj_vm_sfmin(double a, double b); | ||
354 | extern double lj_vm_sfmax(double a, double b); | ||
355 | #endif | ||
243 | #endif | 356 | #endif |
244 | 357 | ||
245 | #if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP) | 358 | #if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP) |
246 | #ifdef __GNUC__ | 359 | #if defined(__GNUC__) || defined(__clang__) |
247 | #define fp64_l2d __floatdidf | 360 | #define fp64_l2d __floatdidf |
248 | #define fp64_ul2d __floatundidf | 361 | #define fp64_ul2d __floatundidf |
249 | #define fp64_l2f __floatdisf | 362 | #define fp64_l2f __floatdisf |
diff --git a/src/lj_iropt.h b/src/lj_iropt.h index e89d796f..7bce34c7 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h | |||
@@ -36,11 +36,11 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J) | |||
36 | return ref; | 36 | return ref; |
37 | } | 37 | } |
38 | 38 | ||
39 | LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs); | ||
40 | |||
39 | /* Interning of constants. */ | 41 | /* Interning of constants. */ |
40 | LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k); | 42 | LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k); |
41 | LJ_FUNC void lj_ir_k64_freeall(jit_State *J); | 43 | LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64); |
42 | LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv); | ||
43 | LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64); | ||
44 | LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64); | 44 | LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64); |
45 | LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n); | 45 | LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n); |
46 | LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64); | 46 | LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64); |
@@ -48,6 +48,7 @@ LJ_FUNC TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t); | |||
48 | LJ_FUNC TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr); | 48 | LJ_FUNC TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr); |
49 | LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t); | 49 | LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t); |
50 | LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot); | 50 | LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot); |
51 | LJ_FUNC TRef lj_ir_ktrace(jit_State *J); | ||
51 | 52 | ||
52 | #if LJ_64 | 53 | #if LJ_64 |
53 | #define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k)) | 54 | #define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k)) |
@@ -74,8 +75,8 @@ static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n) | |||
74 | #define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000)) | 75 | #define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000)) |
75 | 76 | ||
76 | /* Special 128 bit SIMD constants. */ | 77 | /* Special 128 bit SIMD constants. */ |
77 | #define lj_ir_knum_abs(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_ABS)) | 78 | #define lj_ir_ksimd(J, idx) \ |
78 | #define lj_ir_knum_neg(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_NEG)) | 79 | lj_ir_ggfload(J, IRT_NUM, (uintptr_t)LJ_KSIMD(J, idx) - (uintptr_t)J2GG(J)) |
79 | 80 | ||
80 | /* Access to constants. */ | 81 | /* Access to constants. */ |
81 | LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir); | 82 | LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir); |
@@ -119,10 +120,11 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J); | |||
119 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); | 120 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); |
120 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); | 121 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); |
121 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J); | 122 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J); |
122 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J); | 123 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J); |
123 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J); | 124 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J); |
124 | LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J); | 125 | LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J); |
125 | LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim); | 126 | LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim); |
127 | LJ_FUNC int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim); | ||
126 | LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref); | 128 | LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref); |
127 | 129 | ||
128 | /* Dead-store elimination. */ | 130 | /* Dead-store elimination. */ |
@@ -149,7 +151,7 @@ LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); | |||
149 | /* Optimization passes. */ | 151 | /* Optimization passes. */ |
150 | LJ_FUNC void lj_opt_dce(jit_State *J); | 152 | LJ_FUNC void lj_opt_dce(jit_State *J); |
151 | LJ_FUNC int lj_opt_loop(jit_State *J); | 153 | LJ_FUNC int lj_opt_loop(jit_State *J); |
152 | #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) | 154 | #if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) |
153 | LJ_FUNC void lj_opt_split(jit_State *J); | 155 | LJ_FUNC void lj_opt_split(jit_State *J); |
154 | #else | 156 | #else |
155 | #define lj_opt_split(J) UNUSED(J) | 157 | #define lj_opt_split(J) UNUSED(J) |
diff --git a/src/lj_jit.h b/src/lj_jit.h index 02850935..0b5ad4d6 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
@@ -9,71 +9,85 @@ | |||
9 | #include "lj_obj.h" | 9 | #include "lj_obj.h" |
10 | #include "lj_ir.h" | 10 | #include "lj_ir.h" |
11 | 11 | ||
12 | /* JIT engine flags. */ | 12 | /* -- JIT engine flags ---------------------------------------------------- */ |
13 | |||
14 | /* General JIT engine flags. 4 bits. */ | ||
13 | #define JIT_F_ON 0x00000001 | 15 | #define JIT_F_ON 0x00000001 |
14 | 16 | ||
15 | /* CPU-specific JIT engine flags. */ | 17 | /* CPU-specific JIT engine flags. 12 bits. Flags and strings must match. */ |
18 | #define JIT_F_CPU 0x00000010 | ||
19 | |||
16 | #if LJ_TARGET_X86ORX64 | 20 | #if LJ_TARGET_X86ORX64 |
17 | #define JIT_F_CMOV 0x00000010 | 21 | |
18 | #define JIT_F_SSE2 0x00000020 | 22 | #define JIT_F_SSE3 (JIT_F_CPU << 0) |
19 | #define JIT_F_SSE3 0x00000040 | 23 | #define JIT_F_SSE4_1 (JIT_F_CPU << 1) |
20 | #define JIT_F_SSE4_1 0x00000080 | 24 | #define JIT_F_BMI2 (JIT_F_CPU << 2) |
21 | #define JIT_F_P4 0x00000100 | 25 | |
22 | #define JIT_F_PREFER_IMUL 0x00000200 | 26 | |
23 | #define JIT_F_SPLIT_XMM 0x00000400 | 27 | #define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2" |
24 | #define JIT_F_LEA_AGU 0x00000800 | 28 | |
25 | |||
26 | /* Names for the CPU-specific flags. Must match the order above. */ | ||
27 | #define JIT_F_CPU_FIRST JIT_F_CMOV | ||
28 | #define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM" | ||
29 | #elif LJ_TARGET_ARM | 29 | #elif LJ_TARGET_ARM |
30 | #define JIT_F_ARMV6_ 0x00000010 | 30 | |
31 | #define JIT_F_ARMV6T2_ 0x00000020 | 31 | #define JIT_F_ARMV6_ (JIT_F_CPU << 0) |
32 | #define JIT_F_ARMV7 0x00000040 | 32 | #define JIT_F_ARMV6T2_ (JIT_F_CPU << 1) |
33 | #define JIT_F_VFPV2 0x00000080 | 33 | #define JIT_F_ARMV7 (JIT_F_CPU << 2) |
34 | #define JIT_F_VFPV3 0x00000100 | 34 | #define JIT_F_ARMV8 (JIT_F_CPU << 3) |
35 | 35 | #define JIT_F_VFPV2 (JIT_F_CPU << 4) | |
36 | #define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7) | 36 | #define JIT_F_VFPV3 (JIT_F_CPU << 5) |
37 | #define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7) | 37 | |
38 | #define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8) | ||
39 | #define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8) | ||
38 | #define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3) | 40 | #define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3) |
39 | 41 | ||
40 | /* Names for the CPU-specific flags. Must match the order above. */ | 42 | #define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5ARMv8\5VFPv2\5VFPv3" |
41 | #define JIT_F_CPU_FIRST JIT_F_ARMV6_ | 43 | |
42 | #define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3" | ||
43 | #elif LJ_TARGET_PPC | 44 | #elif LJ_TARGET_PPC |
44 | #define JIT_F_SQRT 0x00000010 | ||
45 | #define JIT_F_ROUND 0x00000020 | ||
46 | 45 | ||
47 | /* Names for the CPU-specific flags. Must match the order above. */ | 46 | #define JIT_F_SQRT (JIT_F_CPU << 0) |
48 | #define JIT_F_CPU_FIRST JIT_F_SQRT | 47 | #define JIT_F_ROUND (JIT_F_CPU << 1) |
48 | |||
49 | #define JIT_F_CPUSTRING "\4SQRT\5ROUND" | 49 | #define JIT_F_CPUSTRING "\4SQRT\5ROUND" |
50 | |||
50 | #elif LJ_TARGET_MIPS | 51 | #elif LJ_TARGET_MIPS |
51 | #define JIT_F_MIPS32R2 0x00000010 | ||
52 | 52 | ||
53 | /* Names for the CPU-specific flags. Must match the order above. */ | 53 | #define JIT_F_MIPSXXR2 (JIT_F_CPU << 0) |
54 | #define JIT_F_CPU_FIRST JIT_F_MIPS32R2 | 54 | |
55 | #if LJ_TARGET_MIPS32 | ||
56 | #if LJ_TARGET_MIPSR6 | ||
57 | #define JIT_F_CPUSTRING "\010MIPS32R6" | ||
58 | #else | ||
55 | #define JIT_F_CPUSTRING "\010MIPS32R2" | 59 | #define JIT_F_CPUSTRING "\010MIPS32R2" |
60 | #endif | ||
61 | #else | ||
62 | #if LJ_TARGET_MIPSR6 | ||
63 | #define JIT_F_CPUSTRING "\010MIPS64R6" | ||
56 | #else | 64 | #else |
57 | #define JIT_F_CPU_FIRST 0 | 65 | #define JIT_F_CPUSTRING "\010MIPS64R2" |
66 | #endif | ||
67 | #endif | ||
68 | |||
69 | #else | ||
70 | |||
58 | #define JIT_F_CPUSTRING "" | 71 | #define JIT_F_CPUSTRING "" |
72 | |||
59 | #endif | 73 | #endif |
60 | 74 | ||
61 | /* Optimization flags. */ | 75 | /* Optimization flags. 12 bits. */ |
76 | #define JIT_F_OPT 0x00010000 | ||
62 | #define JIT_F_OPT_MASK 0x0fff0000 | 77 | #define JIT_F_OPT_MASK 0x0fff0000 |
63 | 78 | ||
64 | #define JIT_F_OPT_FOLD 0x00010000 | 79 | #define JIT_F_OPT_FOLD (JIT_F_OPT << 0) |
65 | #define JIT_F_OPT_CSE 0x00020000 | 80 | #define JIT_F_OPT_CSE (JIT_F_OPT << 1) |
66 | #define JIT_F_OPT_DCE 0x00040000 | 81 | #define JIT_F_OPT_DCE (JIT_F_OPT << 2) |
67 | #define JIT_F_OPT_FWD 0x00080000 | 82 | #define JIT_F_OPT_FWD (JIT_F_OPT << 3) |
68 | #define JIT_F_OPT_DSE 0x00100000 | 83 | #define JIT_F_OPT_DSE (JIT_F_OPT << 4) |
69 | #define JIT_F_OPT_NARROW 0x00200000 | 84 | #define JIT_F_OPT_NARROW (JIT_F_OPT << 5) |
70 | #define JIT_F_OPT_LOOP 0x00400000 | 85 | #define JIT_F_OPT_LOOP (JIT_F_OPT << 6) |
71 | #define JIT_F_OPT_ABC 0x00800000 | 86 | #define JIT_F_OPT_ABC (JIT_F_OPT << 7) |
72 | #define JIT_F_OPT_SINK 0x01000000 | 87 | #define JIT_F_OPT_SINK (JIT_F_OPT << 8) |
73 | #define JIT_F_OPT_FUSE 0x02000000 | 88 | #define JIT_F_OPT_FUSE (JIT_F_OPT << 9) |
74 | 89 | ||
75 | /* Optimizations names for -O. Must match the order above. */ | 90 | /* Optimizations names for -O. Must match the order above. */ |
76 | #define JIT_F_OPT_FIRST JIT_F_OPT_FOLD | ||
77 | #define JIT_F_OPTSTRING \ | 91 | #define JIT_F_OPTSTRING \ |
78 | "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse" | 92 | "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse" |
79 | 93 | ||
@@ -85,6 +99,8 @@ | |||
85 | JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) | 99 | JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) |
86 | #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 | 100 | #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 |
87 | 101 | ||
102 | /* -- JIT engine parameters ----------------------------------------------- */ | ||
103 | |||
88 | #if LJ_TARGET_WINDOWS || LJ_64 | 104 | #if LJ_TARGET_WINDOWS || LJ_64 |
89 | /* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ | 105 | /* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ |
90 | #define JIT_P_sizemcode_DEFAULT 64 | 106 | #define JIT_P_sizemcode_DEFAULT 64 |
@@ -100,6 +116,7 @@ | |||
100 | _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ | 116 | _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ |
101 | _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ | 117 | _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ |
102 | _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ | 118 | _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ |
119 | _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \ | ||
103 | \ | 120 | \ |
104 | _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ | 121 | _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ |
105 | _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ | 122 | _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ |
@@ -126,11 +143,14 @@ JIT_PARAMDEF(JIT_PARAMENUM) | |||
126 | #define JIT_PARAMSTR(len, name, value) #len #name | 143 | #define JIT_PARAMSTR(len, name, value) #len #name |
127 | #define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) | 144 | #define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) |
128 | 145 | ||
146 | /* -- JIT engine data structures ------------------------------------------ */ | ||
147 | |||
129 | /* Trace compiler state. */ | 148 | /* Trace compiler state. */ |
130 | typedef enum { | 149 | typedef enum { |
131 | LJ_TRACE_IDLE, /* Trace compiler idle. */ | 150 | LJ_TRACE_IDLE, /* Trace compiler idle. */ |
132 | LJ_TRACE_ACTIVE = 0x10, | 151 | LJ_TRACE_ACTIVE = 0x10, |
133 | LJ_TRACE_RECORD, /* Bytecode recording active. */ | 152 | LJ_TRACE_RECORD, /* Bytecode recording active. */ |
153 | LJ_TRACE_RECORD_1ST, /* Record 1st instruction, too. */ | ||
134 | LJ_TRACE_START, /* New trace started. */ | 154 | LJ_TRACE_START, /* New trace started. */ |
135 | LJ_TRACE_END, /* End of trace. */ | 155 | LJ_TRACE_END, /* End of trace. */ |
136 | LJ_TRACE_ASM, /* Assemble trace. */ | 156 | LJ_TRACE_ASM, /* Assemble trace. */ |
@@ -165,6 +185,7 @@ typedef struct MCLink { | |||
165 | typedef struct SnapShot { | 185 | typedef struct SnapShot { |
166 | uint32_t mapofs; /* Offset into snapshot map. */ | 186 | uint32_t mapofs; /* Offset into snapshot map. */ |
167 | IRRef1 ref; /* First IR ref for this snapshot. */ | 187 | IRRef1 ref; /* First IR ref for this snapshot. */ |
188 | uint16_t mcofs; /* Offset into machine code in MCode units. */ | ||
168 | uint8_t nslots; /* Number of valid slots. */ | 189 | uint8_t nslots; /* Number of valid slots. */ |
169 | uint8_t topslot; /* Maximum frame extent. */ | 190 | uint8_t topslot; /* Maximum frame extent. */ |
170 | uint8_t nent; /* Number of compressed entries. */ | 191 | uint8_t nent; /* Number of compressed entries. */ |
@@ -180,20 +201,35 @@ typedef uint32_t SnapEntry; | |||
180 | #define SNAP_CONT 0x020000 /* Continuation slot. */ | 201 | #define SNAP_CONT 0x020000 /* Continuation slot. */ |
181 | #define SNAP_NORESTORE 0x040000 /* No need to restore slot. */ | 202 | #define SNAP_NORESTORE 0x040000 /* No need to restore slot. */ |
182 | #define SNAP_SOFTFPNUM 0x080000 /* Soft-float number. */ | 203 | #define SNAP_SOFTFPNUM 0x080000 /* Soft-float number. */ |
204 | #define SNAP_KEYINDEX 0x100000 /* Traversal key index. */ | ||
183 | LJ_STATIC_ASSERT(SNAP_FRAME == TREF_FRAME); | 205 | LJ_STATIC_ASSERT(SNAP_FRAME == TREF_FRAME); |
184 | LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); | 206 | LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); |
207 | LJ_STATIC_ASSERT(SNAP_KEYINDEX == TREF_KEYINDEX); | ||
185 | 208 | ||
186 | #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) | 209 | #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) |
187 | #define SNAP_TR(slot, tr) \ | 210 | #define SNAP_TR(slot, tr) \ |
188 | (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) | 211 | (((SnapEntry)(slot) << 24) + \ |
212 | ((tr) & (TREF_KEYINDEX|TREF_CONT|TREF_FRAME|TREF_REFMASK))) | ||
213 | #if !LJ_FR2 | ||
189 | #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) | 214 | #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) |
215 | #endif | ||
190 | #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) | 216 | #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) |
191 | #define snap_ref(sn) ((sn) & 0xffff) | 217 | #define snap_ref(sn) ((sn) & 0xffff) |
192 | #define snap_slot(sn) ((BCReg)((sn) >> 24)) | 218 | #define snap_slot(sn) ((BCReg)((sn) >> 24)) |
193 | #define snap_isframe(sn) ((sn) & SNAP_FRAME) | 219 | #define snap_isframe(sn) ((sn) & SNAP_FRAME) |
194 | #define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn)) | ||
195 | #define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) | 220 | #define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) |
196 | 221 | ||
222 | static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn) | ||
223 | { | ||
224 | #if LJ_FR2 | ||
225 | uint64_t pcbase; | ||
226 | memcpy(&pcbase, sn, sizeof(uint64_t)); | ||
227 | return (const BCIns *)(pcbase >> 8); | ||
228 | #else | ||
229 | return (const BCIns *)(uintptr_t)*sn; | ||
230 | #endif | ||
231 | } | ||
232 | |||
197 | /* Snapshot and exit numbers. */ | 233 | /* Snapshot and exit numbers. */ |
198 | typedef uint32_t SnapNo; | 234 | typedef uint32_t SnapNo; |
199 | typedef uint32_t ExitNo; | 235 | typedef uint32_t ExitNo; |
@@ -211,7 +247,8 @@ typedef enum { | |||
211 | LJ_TRLINK_UPREC, /* Up-recursion. */ | 247 | LJ_TRLINK_UPREC, /* Up-recursion. */ |
212 | LJ_TRLINK_DOWNREC, /* Down-recursion. */ | 248 | LJ_TRLINK_DOWNREC, /* Down-recursion. */ |
213 | LJ_TRLINK_INTERP, /* Fallback to interpreter. */ | 249 | LJ_TRLINK_INTERP, /* Fallback to interpreter. */ |
214 | LJ_TRLINK_RETURN /* Return to interpreter. */ | 250 | LJ_TRLINK_RETURN, /* Return to interpreter. */ |
251 | LJ_TRLINK_STITCH /* Trace stitching. */ | ||
215 | } TraceLink; | 252 | } TraceLink; |
216 | 253 | ||
217 | /* Trace object. */ | 254 | /* Trace object. */ |
@@ -219,6 +256,9 @@ typedef struct GCtrace { | |||
219 | GCHeader; | 256 | GCHeader; |
220 | uint16_t nsnap; /* Number of snapshots. */ | 257 | uint16_t nsnap; /* Number of snapshots. */ |
221 | IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ | 258 | IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ |
259 | #if LJ_GC64 | ||
260 | uint32_t unused_gc64; | ||
261 | #endif | ||
222 | GCRef gclist; | 262 | GCRef gclist; |
223 | IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ | 263 | IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ |
224 | IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ | 264 | IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ |
@@ -294,6 +334,16 @@ typedef struct ScEvEntry { | |||
294 | uint8_t dir; /* Direction. 1: +, 0: -. */ | 334 | uint8_t dir; /* Direction. 1: +, 0: -. */ |
295 | } ScEvEntry; | 335 | } ScEvEntry; |
296 | 336 | ||
337 | /* Reverse bytecode map (IRRef -> PC). Only for selected instructions. */ | ||
338 | typedef struct RBCHashEntry { | ||
339 | MRef pc; /* Bytecode PC. */ | ||
340 | GCRef pt; /* Prototype. */ | ||
341 | IRRef ref; /* IR reference. */ | ||
342 | } RBCHashEntry; | ||
343 | |||
344 | /* Number of slots in the reverse bytecode hash table. Must be a power of 2. */ | ||
345 | #define RBCHASH_SLOTS 8 | ||
346 | |||
297 | /* 128 bit SIMD constants. */ | 347 | /* 128 bit SIMD constants. */ |
298 | enum { | 348 | enum { |
299 | LJ_KSIMD_ABS, | 349 | LJ_KSIMD_ABS, |
@@ -301,12 +351,51 @@ enum { | |||
301 | LJ_KSIMD__MAX | 351 | LJ_KSIMD__MAX |
302 | }; | 352 | }; |
303 | 353 | ||
354 | enum { | ||
355 | #if LJ_TARGET_X86ORX64 | ||
356 | LJ_K64_TOBIT, /* 2^52 + 2^51 */ | ||
357 | LJ_K64_2P64, /* 2^64 */ | ||
358 | LJ_K64_M2P64, /* -2^64 */ | ||
359 | #if LJ_32 | ||
360 | LJ_K64_M2P64_31, /* -2^64 or -2^31 */ | ||
361 | #else | ||
362 | LJ_K64_M2P64_31 = LJ_K64_M2P64, | ||
363 | #endif | ||
364 | #endif | ||
365 | #if LJ_TARGET_MIPS | ||
366 | LJ_K64_2P31, /* 2^31 */ | ||
367 | #if LJ_64 | ||
368 | LJ_K64_2P63, /* 2^63 */ | ||
369 | LJ_K64_M2P64, /* -2^64 */ | ||
370 | #endif | ||
371 | #endif | ||
372 | LJ_K64__MAX, | ||
373 | }; | ||
374 | |||
375 | enum { | ||
376 | #if LJ_TARGET_X86ORX64 | ||
377 | LJ_K32_M2P64_31, /* -2^64 or -2^31 */ | ||
378 | #endif | ||
379 | #if LJ_TARGET_PPC | ||
380 | LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ | ||
381 | LJ_K32_2P52, /* 2^52 */ | ||
382 | #endif | ||
383 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | ||
384 | LJ_K32_2P31, /* 2^31 */ | ||
385 | #endif | ||
386 | #if LJ_TARGET_MIPS64 | ||
387 | LJ_K32_2P63, /* 2^63 */ | ||
388 | LJ_K32_M2P64, /* -2^64 */ | ||
389 | #endif | ||
390 | LJ_K32__MAX | ||
391 | }; | ||
392 | |||
304 | /* Get 16 byte aligned pointer to SIMD constant. */ | 393 | /* Get 16 byte aligned pointer to SIMD constant. */ |
305 | #define LJ_KSIMD(J, n) \ | 394 | #define LJ_KSIMD(J, n) \ |
306 | ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) | 395 | ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) |
307 | 396 | ||
308 | /* Set/reset flag to activate the SPLIT pass for the current trace. */ | 397 | /* Set/reset flag to activate the SPLIT pass for the current trace. */ |
309 | #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) | 398 | #if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) |
310 | #define lj_needsplit(J) (J->needsplit = 1) | 399 | #define lj_needsplit(J) (J->needsplit = 1) |
311 | #define lj_resetsplit(J) (J->needsplit = 0) | 400 | #define lj_resetsplit(J) (J->needsplit = 0) |
312 | #else | 401 | #else |
@@ -317,13 +406,14 @@ enum { | |||
317 | /* Fold state is used to fold instructions on-the-fly. */ | 406 | /* Fold state is used to fold instructions on-the-fly. */ |
318 | typedef struct FoldState { | 407 | typedef struct FoldState { |
319 | IRIns ins; /* Currently emitted instruction. */ | 408 | IRIns ins; /* Currently emitted instruction. */ |
320 | IRIns left; /* Instruction referenced by left operand. */ | 409 | IRIns left[2]; /* Instruction referenced by left operand. */ |
321 | IRIns right; /* Instruction referenced by right operand. */ | 410 | IRIns right[2]; /* Instruction referenced by right operand. */ |
322 | } FoldState; | 411 | } FoldState; |
323 | 412 | ||
324 | /* JIT compiler state. */ | 413 | /* JIT compiler state. */ |
325 | typedef struct jit_State { | 414 | typedef struct jit_State { |
326 | GCtrace cur; /* Current trace. */ | 415 | GCtrace cur; /* Current trace. */ |
416 | GCtrace *curfinal; /* Final address of current trace (set during asm). */ | ||
327 | 417 | ||
328 | lua_State *L; /* Current Lua state. */ | 418 | lua_State *L; /* Current Lua state. */ |
329 | const BCIns *pc; /* Current PC. */ | 419 | const BCIns *pc; /* Current PC. */ |
@@ -353,8 +443,9 @@ typedef struct jit_State { | |||
353 | int32_t framedepth; /* Current frame depth. */ | 443 | int32_t framedepth; /* Current frame depth. */ |
354 | int32_t retdepth; /* Return frame depth (count of RETF). */ | 444 | int32_t retdepth; /* Return frame depth (count of RETF). */ |
355 | 445 | ||
356 | MRef k64; /* Pointer to chained array of 64 bit constants. */ | 446 | uint32_t k32[LJ_K32__MAX]; /* Common 4 byte constants used by backends. */ |
357 | TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ | 447 | TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ |
448 | TValue k64[LJ_K64__MAX]; /* Common 8 byte constants. */ | ||
358 | 449 | ||
359 | IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ | 450 | IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ |
360 | IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ | 451 | IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ |
@@ -367,13 +458,15 @@ typedef struct jit_State { | |||
367 | MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ | 458 | MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ |
368 | 459 | ||
369 | PostProc postproc; /* Required post-processing after execution. */ | 460 | PostProc postproc; /* Required post-processing after execution. */ |
370 | #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) | 461 | #if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) |
371 | int needsplit; /* Need SPLIT pass. */ | 462 | uint8_t needsplit; /* Need SPLIT pass. */ |
372 | #endif | 463 | #endif |
464 | uint8_t retryrec; /* Retry recording. */ | ||
373 | 465 | ||
374 | GCRef *trace; /* Array of traces. */ | 466 | GCRef *trace; /* Array of traces. */ |
375 | TraceNo freetrace; /* Start of scan for next free trace. */ | 467 | TraceNo freetrace; /* Start of scan for next free trace. */ |
376 | MSize sizetrace; /* Size of trace array. */ | 468 | MSize sizetrace; /* Size of trace array. */ |
469 | IRRef1 ktrace; /* Reference to KGC with GCtrace. */ | ||
377 | 470 | ||
378 | IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ | 471 | IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ |
379 | TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ | 472 | TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ |
@@ -384,7 +477,10 @@ typedef struct jit_State { | |||
384 | 477 | ||
385 | HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */ | 478 | HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */ |
386 | uint32_t penaltyslot; /* Round-robin index into penalty slots. */ | 479 | uint32_t penaltyslot; /* Round-robin index into penalty slots. */ |
387 | uint32_t prngstate; /* PRNG state. */ | 480 | |
481 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
482 | RBCHashEntry rbchash[RBCHASH_SLOTS]; /* Reverse bytecode map. */ | ||
483 | #endif | ||
388 | 484 | ||
389 | BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ | 485 | BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ |
390 | uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ | 486 | uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ |
@@ -394,6 +490,7 @@ typedef struct jit_State { | |||
394 | const BCIns *startpc; /* Bytecode PC of starting instruction. */ | 490 | const BCIns *startpc; /* Bytecode PC of starting instruction. */ |
395 | TraceNo parent; /* Parent of current side trace (0 for root traces). */ | 491 | TraceNo parent; /* Parent of current side trace (0 for root traces). */ |
396 | ExitNo exitno; /* Exit number in parent of current side trace. */ | 492 | ExitNo exitno; /* Exit number in parent of current side trace. */ |
493 | int exitcode; /* Exit code from unwound trace. */ | ||
397 | 494 | ||
398 | BCIns *patchpc; /* PC for pending re-patch. */ | 495 | BCIns *patchpc; /* PC for pending re-patch. */ |
399 | BCIns patchins; /* Instruction for pending re-patch. */ | 496 | BCIns patchins; /* Instruction for pending re-patch. */ |
@@ -406,14 +503,18 @@ typedef struct jit_State { | |||
406 | size_t szallmcarea; /* Total size of all allocated mcode areas. */ | 503 | size_t szallmcarea; /* Total size of all allocated mcode areas. */ |
407 | 504 | ||
408 | TValue errinfo; /* Additional info element for trace errors. */ | 505 | TValue errinfo; /* Additional info element for trace errors. */ |
506 | |||
507 | #if LJ_HASPROFILE | ||
508 | GCproto *prev_pt; /* Previous prototype. */ | ||
509 | BCLine prev_line; /* Previous line. */ | ||
510 | int prof_mode; /* Profiling mode: 0, 'f', 'l'. */ | ||
511 | #endif | ||
409 | } jit_State; | 512 | } jit_State; |
410 | 513 | ||
411 | /* Trivial PRNG e.g. used for penalty randomization. */ | 514 | #ifdef LUA_USE_ASSERT |
412 | static LJ_AINLINE uint32_t LJ_PRNG_BITS(jit_State *J, int bits) | 515 | #define lj_assertJ(c, ...) lj_assertG_(J2G(J), (c), __VA_ARGS__) |
413 | { | 516 | #else |
414 | /* Yes, this LCG is very weak, but that doesn't matter for our use case. */ | 517 | #define lj_assertJ(c, ...) ((void)J) |
415 | J->prngstate = J->prngstate * 1103515245 + 12345; | 518 | #endif |
416 | return J->prngstate >> (32-bits); | ||
417 | } | ||
418 | 519 | ||
419 | #endif | 520 | #endif |
diff --git a/src/lj_lex.c b/src/lj_lex.c index a74b4d6d..463a87ce 100644 --- a/src/lj_lex.c +++ b/src/lj_lex.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include "lj_obj.h" | 12 | #include "lj_obj.h" |
13 | #include "lj_gc.h" | 13 | #include "lj_gc.h" |
14 | #include "lj_err.h" | 14 | #include "lj_err.h" |
15 | #include "lj_buf.h" | ||
15 | #include "lj_str.h" | 16 | #include "lj_str.h" |
16 | #if LJ_HASFFI | 17 | #if LJ_HASFFI |
17 | #include "lj_tab.h" | 18 | #include "lj_tab.h" |
@@ -24,6 +25,7 @@ | |||
24 | #include "lj_parse.h" | 25 | #include "lj_parse.h" |
25 | #include "lj_char.h" | 26 | #include "lj_char.h" |
26 | #include "lj_strscan.h" | 27 | #include "lj_strscan.h" |
28 | #include "lj_strfmt.h" | ||
27 | 29 | ||
28 | /* Lua lexer token names. */ | 30 | /* Lua lexer token names. */ |
29 | static const char *const tokennames[] = { | 31 | static const char *const tokennames[] = { |
@@ -37,54 +39,54 @@ TKDEF(TKSTR1, TKSTR2) | |||
37 | 39 | ||
38 | /* -- Buffer handling ----------------------------------------------------- */ | 40 | /* -- Buffer handling ----------------------------------------------------- */ |
39 | 41 | ||
40 | #define char2int(c) ((int)(uint8_t)(c)) | 42 | #define LEX_EOF (-1) |
41 | #define next(ls) \ | 43 | #define lex_iseol(ls) (ls->c == '\n' || ls->c == '\r') |
42 | (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls)) | ||
43 | #define save_and_next(ls) (save(ls, ls->current), next(ls)) | ||
44 | #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') | ||
45 | #define END_OF_STREAM (-1) | ||
46 | 44 | ||
47 | static int fillbuf(LexState *ls) | 45 | /* Get more input from reader. */ |
46 | static LJ_NOINLINE LexChar lex_more(LexState *ls) | ||
48 | { | 47 | { |
49 | size_t sz; | 48 | size_t sz; |
50 | const char *buf = ls->rfunc(ls->L, ls->rdata, &sz); | 49 | const char *p = ls->rfunc(ls->L, ls->rdata, &sz); |
51 | if (buf == NULL || sz == 0) return END_OF_STREAM; | 50 | if (p == NULL || sz == 0) return LEX_EOF; |
52 | if (sz >= LJ_MAX_MEM) { | 51 | if (sz >= LJ_MAX_BUF) { |
53 | if (sz != ~(size_t)0) lj_err_mem(ls->L); | 52 | if (sz != ~(size_t)0) lj_err_mem(ls->L); |
53 | sz = ~(uintptr_t)0 - (uintptr_t)p; | ||
54 | if (sz >= LJ_MAX_BUF) sz = LJ_MAX_BUF-1; | ||
54 | ls->endmark = 1; | 55 | ls->endmark = 1; |
55 | } | 56 | } |
56 | ls->n = (MSize)sz - 1; | 57 | ls->pe = p + sz; |
57 | ls->p = buf; | 58 | ls->p = p + 1; |
58 | return char2int(*(ls->p++)); | 59 | return (LexChar)(uint8_t)p[0]; |
59 | } | 60 | } |
60 | 61 | ||
61 | static LJ_NOINLINE void save_grow(LexState *ls, int c) | 62 | /* Get next character. */ |
63 | static LJ_AINLINE LexChar lex_next(LexState *ls) | ||
62 | { | 64 | { |
63 | MSize newsize; | 65 | return (ls->c = ls->p < ls->pe ? (LexChar)(uint8_t)*ls->p++ : lex_more(ls)); |
64 | if (ls->sb.sz >= LJ_MAX_STR/2) | ||
65 | lj_lex_error(ls, 0, LJ_ERR_XELEM); | ||
66 | newsize = ls->sb.sz * 2; | ||
67 | lj_str_resizebuf(ls->L, &ls->sb, newsize); | ||
68 | ls->sb.buf[ls->sb.n++] = (char)c; | ||
69 | } | 66 | } |
70 | 67 | ||
71 | static LJ_AINLINE void save(LexState *ls, int c) | 68 | /* Save character. */ |
69 | static LJ_AINLINE void lex_save(LexState *ls, LexChar c) | ||
72 | { | 70 | { |
73 | if (LJ_UNLIKELY(ls->sb.n + 1 > ls->sb.sz)) | 71 | lj_buf_putb(&ls->sb, c); |
74 | save_grow(ls, c); | 72 | } |
75 | else | 73 | |
76 | ls->sb.buf[ls->sb.n++] = (char)c; | 74 | /* Save previous character and get next character. */ |
75 | static LJ_AINLINE LexChar lex_savenext(LexState *ls) | ||
76 | { | ||
77 | lex_save(ls, ls->c); | ||
78 | return lex_next(ls); | ||
77 | } | 79 | } |
78 | 80 | ||
79 | static void inclinenumber(LexState *ls) | 81 | /* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */ |
82 | static void lex_newline(LexState *ls) | ||
80 | { | 83 | { |
81 | int old = ls->current; | 84 | LexChar old = ls->c; |
82 | lua_assert(currIsNewline(ls)); | 85 | lj_assertLS(lex_iseol(ls), "bad usage"); |
83 | next(ls); /* skip `\n' or `\r' */ | 86 | lex_next(ls); /* Skip "\n" or "\r". */ |
84 | if (currIsNewline(ls) && ls->current != old) | 87 | if (lex_iseol(ls) && ls->c != old) lex_next(ls); /* Skip "\n\r" or "\r\n". */ |
85 | next(ls); /* skip `\n\r' or `\r\n' */ | ||
86 | if (++ls->linenumber >= LJ_MAX_LINE) | 88 | if (++ls->linenumber >= LJ_MAX_LINE) |
87 | lj_lex_error(ls, ls->token, LJ_ERR_XLINES); | 89 | lj_lex_error(ls, ls->tok, LJ_ERR_XLINES); |
88 | } | 90 | } |
89 | 91 | ||
90 | /* -- Scanner for terminals ----------------------------------------------- */ | 92 | /* -- Scanner for terminals ----------------------------------------------- */ |
@@ -93,19 +95,17 @@ static void inclinenumber(LexState *ls) | |||
93 | static void lex_number(LexState *ls, TValue *tv) | 95 | static void lex_number(LexState *ls, TValue *tv) |
94 | { | 96 | { |
95 | StrScanFmt fmt; | 97 | StrScanFmt fmt; |
96 | int c, xp = 'e'; | 98 | LexChar c, xp = 'e'; |
97 | lua_assert(lj_char_isdigit(ls->current)); | 99 | lj_assertLS(lj_char_isdigit(ls->c), "bad usage"); |
98 | if ((c = ls->current) == '0') { | 100 | if ((c = ls->c) == '0' && (lex_savenext(ls) | 0x20) == 'x') |
99 | save_and_next(ls); | 101 | xp = 'p'; |
100 | if ((ls->current | 0x20) == 'x') xp = 'p'; | 102 | while (lj_char_isident(ls->c) || ls->c == '.' || |
101 | } | 103 | ((ls->c == '-' || ls->c == '+') && (c | 0x20) == xp)) { |
102 | while (lj_char_isident(ls->current) || ls->current == '.' || | 104 | c = ls->c; |
103 | ((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) { | 105 | lex_savenext(ls); |
104 | c = ls->current; | ||
105 | save_and_next(ls); | ||
106 | } | 106 | } |
107 | save(ls, '\0'); | 107 | lex_save(ls, '\0'); |
108 | fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv, | 108 | fmt = lj_strscan_scan((const uint8_t *)ls->sb.b, sbuflen(&ls->sb)-1, tv, |
109 | (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | | 109 | (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | |
110 | (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); | 110 | (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); |
111 | if (LJ_DUALNUM && fmt == STRSCAN_INT) { | 111 | if (LJ_DUALNUM && fmt == STRSCAN_INT) { |
@@ -116,12 +116,9 @@ static void lex_number(LexState *ls, TValue *tv) | |||
116 | } else if (fmt != STRSCAN_ERROR) { | 116 | } else if (fmt != STRSCAN_ERROR) { |
117 | lua_State *L = ls->L; | 117 | lua_State *L = ls->L; |
118 | GCcdata *cd; | 118 | GCcdata *cd; |
119 | lua_assert(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG); | 119 | lj_assertLS(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG, |
120 | if (!ctype_ctsG(G(L))) { | 120 | "unexpected number format %d", fmt); |
121 | ptrdiff_t oldtop = savestack(L, L->top); | 121 | ctype_loadffi(L); |
122 | luaopen_ffi(L); /* Load FFI library on-demand. */ | ||
123 | L->top = restorestack(L, oldtop); | ||
124 | } | ||
125 | if (fmt == STRSCAN_IMAG) { | 122 | if (fmt == STRSCAN_IMAG) { |
126 | cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double)); | 123 | cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double)); |
127 | ((double *)cdataptr(cd))[0] = 0; | 124 | ((double *)cdataptr(cd))[0] = 0; |
@@ -133,65 +130,66 @@ static void lex_number(LexState *ls, TValue *tv) | |||
133 | lj_parse_keepcdata(ls, tv, cd); | 130 | lj_parse_keepcdata(ls, tv, cd); |
134 | #endif | 131 | #endif |
135 | } else { | 132 | } else { |
136 | lua_assert(fmt == STRSCAN_ERROR); | 133 | lj_assertLS(fmt == STRSCAN_ERROR, |
134 | "unexpected number format %d", fmt); | ||
137 | lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); | 135 | lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); |
138 | } | 136 | } |
139 | } | 137 | } |
140 | 138 | ||
141 | static int skip_sep(LexState *ls) | 139 | /* Skip equal signs for "[=...=[" and "]=...=]" and return their count. */ |
140 | static int lex_skipeq(LexState *ls) | ||
142 | { | 141 | { |
143 | int count = 0; | 142 | int count = 0; |
144 | int s = ls->current; | 143 | LexChar s = ls->c; |
145 | lua_assert(s == '[' || s == ']'); | 144 | lj_assertLS(s == '[' || s == ']', "bad usage"); |
146 | save_and_next(ls); | 145 | while (lex_savenext(ls) == '=' && count < 0x20000000) |
147 | while (ls->current == '=' && count < 0x20000000) { | ||
148 | save_and_next(ls); | ||
149 | count++; | 146 | count++; |
150 | } | 147 | return (ls->c == s) ? count : (-count) - 1; |
151 | return (ls->current == s) ? count : (-count) - 1; | ||
152 | } | 148 | } |
153 | 149 | ||
154 | static void read_long_string(LexState *ls, TValue *tv, int sep) | 150 | /* Parse a long string or long comment (tv set to NULL). */ |
151 | static void lex_longstring(LexState *ls, TValue *tv, int sep) | ||
155 | { | 152 | { |
156 | save_and_next(ls); /* skip 2nd `[' */ | 153 | lex_savenext(ls); /* Skip second '['. */ |
157 | if (currIsNewline(ls)) /* string starts with a newline? */ | 154 | if (lex_iseol(ls)) /* Skip initial newline. */ |
158 | inclinenumber(ls); /* skip it */ | 155 | lex_newline(ls); |
159 | for (;;) { | 156 | for (;;) { |
160 | switch (ls->current) { | 157 | switch (ls->c) { |
161 | case END_OF_STREAM: | 158 | case LEX_EOF: |
162 | lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); | 159 | lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); |
163 | break; | 160 | break; |
164 | case ']': | 161 | case ']': |
165 | if (skip_sep(ls) == sep) { | 162 | if (lex_skipeq(ls) == sep) { |
166 | save_and_next(ls); /* skip 2nd `]' */ | 163 | lex_savenext(ls); /* Skip second ']'. */ |
167 | goto endloop; | 164 | goto endloop; |
168 | } | 165 | } |
169 | break; | 166 | break; |
170 | case '\n': | 167 | case '\n': |
171 | case '\r': | 168 | case '\r': |
172 | save(ls, '\n'); | 169 | lex_save(ls, '\n'); |
173 | inclinenumber(ls); | 170 | lex_newline(ls); |
174 | if (!tv) lj_str_resetbuf(&ls->sb); /* avoid wasting space */ | 171 | if (!tv) lj_buf_reset(&ls->sb); /* Don't waste space for comments. */ |
175 | break; | 172 | break; |
176 | default: | 173 | default: |
177 | if (tv) save_and_next(ls); | 174 | lex_savenext(ls); |
178 | else next(ls); | ||
179 | break; | 175 | break; |
180 | } | 176 | } |
181 | } endloop: | 177 | } endloop: |
182 | if (tv) { | 178 | if (tv) { |
183 | GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep), | 179 | GCstr *str = lj_parse_keepstr(ls, ls->sb.b + (2 + (MSize)sep), |
184 | ls->sb.n - 2*(2 + (MSize)sep)); | 180 | sbuflen(&ls->sb) - 2*(2 + (MSize)sep)); |
185 | setstrV(ls->L, tv, str); | 181 | setstrV(ls->L, tv, str); |
186 | } | 182 | } |
187 | } | 183 | } |
188 | 184 | ||
189 | static void read_string(LexState *ls, int delim, TValue *tv) | 185 | /* Parse a string. */ |
186 | static void lex_string(LexState *ls, TValue *tv) | ||
190 | { | 187 | { |
191 | save_and_next(ls); | 188 | LexChar delim = ls->c; /* Delimiter is '\'' or '"'. */ |
192 | while (ls->current != delim) { | 189 | lex_savenext(ls); |
193 | switch (ls->current) { | 190 | while (ls->c != delim) { |
194 | case END_OF_STREAM: | 191 | switch (ls->c) { |
192 | case LEX_EOF: | ||
195 | lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); | 193 | lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); |
196 | continue; | 194 | continue; |
197 | case '\n': | 195 | case '\n': |
@@ -199,7 +197,7 @@ static void read_string(LexState *ls, int delim, TValue *tv) | |||
199 | lj_lex_error(ls, TK_string, LJ_ERR_XSTR); | 197 | lj_lex_error(ls, TK_string, LJ_ERR_XSTR); |
200 | continue; | 198 | continue; |
201 | case '\\': { | 199 | case '\\': { |
202 | int c = next(ls); /* Skip the '\\'. */ | 200 | LexChar c = lex_next(ls); /* Skip the '\\'. */ |
203 | switch (c) { | 201 | switch (c) { |
204 | case 'a': c = '\a'; break; | 202 | case 'a': c = '\a'; break; |
205 | case 'b': c = '\b'; break; | 203 | case 'b': c = '\b'; break; |
@@ -209,111 +207,139 @@ static void read_string(LexState *ls, int delim, TValue *tv) | |||
209 | case 't': c = '\t'; break; | 207 | case 't': c = '\t'; break; |
210 | case 'v': c = '\v'; break; | 208 | case 'v': c = '\v'; break; |
211 | case 'x': /* Hexadecimal escape '\xXX'. */ | 209 | case 'x': /* Hexadecimal escape '\xXX'. */ |
212 | c = (next(ls) & 15u) << 4; | 210 | c = (lex_next(ls) & 15u) << 4; |
213 | if (!lj_char_isdigit(ls->current)) { | 211 | if (!lj_char_isdigit(ls->c)) { |
214 | if (!lj_char_isxdigit(ls->current)) goto err_xesc; | 212 | if (!lj_char_isxdigit(ls->c)) goto err_xesc; |
215 | c += 9 << 4; | 213 | c += 9 << 4; |
216 | } | 214 | } |
217 | c += (next(ls) & 15u); | 215 | c += (lex_next(ls) & 15u); |
218 | if (!lj_char_isdigit(ls->current)) { | 216 | if (!lj_char_isdigit(ls->c)) { |
219 | if (!lj_char_isxdigit(ls->current)) goto err_xesc; | 217 | if (!lj_char_isxdigit(ls->c)) goto err_xesc; |
220 | c += 9; | 218 | c += 9; |
221 | } | 219 | } |
222 | break; | 220 | break; |
221 | case 'u': /* Unicode escape '\u{XX...}'. */ | ||
222 | if (lex_next(ls) != '{') goto err_xesc; | ||
223 | lex_next(ls); | ||
224 | c = 0; | ||
225 | do { | ||
226 | c = (c << 4) | (ls->c & 15u); | ||
227 | if (!lj_char_isdigit(ls->c)) { | ||
228 | if (!lj_char_isxdigit(ls->c)) goto err_xesc; | ||
229 | c += 9; | ||
230 | } | ||
231 | if (c >= 0x110000) goto err_xesc; /* Out of Unicode range. */ | ||
232 | } while (lex_next(ls) != '}'); | ||
233 | if (c < 0x800) { | ||
234 | if (c < 0x80) break; | ||
235 | lex_save(ls, 0xc0 | (c >> 6)); | ||
236 | } else { | ||
237 | if (c >= 0x10000) { | ||
238 | lex_save(ls, 0xf0 | (c >> 18)); | ||
239 | lex_save(ls, 0x80 | ((c >> 12) & 0x3f)); | ||
240 | } else { | ||
241 | if (c >= 0xd800 && c < 0xe000) goto err_xesc; /* No surrogates. */ | ||
242 | lex_save(ls, 0xe0 | (c >> 12)); | ||
243 | } | ||
244 | lex_save(ls, 0x80 | ((c >> 6) & 0x3f)); | ||
245 | } | ||
246 | c = 0x80 | (c & 0x3f); | ||
247 | break; | ||
223 | case 'z': /* Skip whitespace. */ | 248 | case 'z': /* Skip whitespace. */ |
224 | next(ls); | 249 | lex_next(ls); |
225 | while (lj_char_isspace(ls->current)) | 250 | while (lj_char_isspace(ls->c)) |
226 | if (currIsNewline(ls)) inclinenumber(ls); else next(ls); | 251 | if (lex_iseol(ls)) lex_newline(ls); else lex_next(ls); |
227 | continue; | 252 | continue; |
228 | case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue; | 253 | case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); continue; |
229 | case '\\': case '\"': case '\'': break; | 254 | case '\\': case '\"': case '\'': break; |
230 | case END_OF_STREAM: continue; | 255 | case LEX_EOF: continue; |
231 | default: | 256 | default: |
232 | if (!lj_char_isdigit(c)) | 257 | if (!lj_char_isdigit(c)) |
233 | goto err_xesc; | 258 | goto err_xesc; |
234 | c -= '0'; /* Decimal escape '\ddd'. */ | 259 | c -= '0'; /* Decimal escape '\ddd'. */ |
235 | if (lj_char_isdigit(next(ls))) { | 260 | if (lj_char_isdigit(lex_next(ls))) { |
236 | c = c*10 + (ls->current - '0'); | 261 | c = c*10 + (ls->c - '0'); |
237 | if (lj_char_isdigit(next(ls))) { | 262 | if (lj_char_isdigit(lex_next(ls))) { |
238 | c = c*10 + (ls->current - '0'); | 263 | c = c*10 + (ls->c - '0'); |
239 | if (c > 255) { | 264 | if (c > 255) { |
240 | err_xesc: | 265 | err_xesc: |
241 | lj_lex_error(ls, TK_string, LJ_ERR_XESC); | 266 | lj_lex_error(ls, TK_string, LJ_ERR_XESC); |
242 | } | 267 | } |
243 | next(ls); | 268 | lex_next(ls); |
244 | } | 269 | } |
245 | } | 270 | } |
246 | save(ls, c); | 271 | lex_save(ls, c); |
247 | continue; | 272 | continue; |
248 | } | 273 | } |
249 | save(ls, c); | 274 | lex_save(ls, c); |
250 | next(ls); | 275 | lex_next(ls); |
251 | continue; | 276 | continue; |
252 | } | 277 | } |
253 | default: | 278 | default: |
254 | save_and_next(ls); | 279 | lex_savenext(ls); |
255 | break; | 280 | break; |
256 | } | 281 | } |
257 | } | 282 | } |
258 | save_and_next(ls); /* skip delimiter */ | 283 | lex_savenext(ls); /* Skip trailing delimiter. */ |
259 | setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2)); | 284 | setstrV(ls->L, tv, |
285 | lj_parse_keepstr(ls, ls->sb.b+1, sbuflen(&ls->sb)-2)); | ||
260 | } | 286 | } |
261 | 287 | ||
262 | /* -- Main lexical scanner ------------------------------------------------ */ | 288 | /* -- Main lexical scanner ------------------------------------------------ */ |
263 | 289 | ||
264 | static int llex(LexState *ls, TValue *tv) | 290 | /* Get next lexical token. */ |
291 | static LexToken lex_scan(LexState *ls, TValue *tv) | ||
265 | { | 292 | { |
266 | lj_str_resetbuf(&ls->sb); | 293 | lj_buf_reset(&ls->sb); |
267 | for (;;) { | 294 | for (;;) { |
268 | if (lj_char_isident(ls->current)) { | 295 | if (lj_char_isident(ls->c)) { |
269 | GCstr *s; | 296 | GCstr *s; |
270 | if (lj_char_isdigit(ls->current)) { /* Numeric literal. */ | 297 | if (lj_char_isdigit(ls->c)) { /* Numeric literal. */ |
271 | lex_number(ls, tv); | 298 | lex_number(ls, tv); |
272 | return TK_number; | 299 | return TK_number; |
273 | } | 300 | } |
274 | /* Identifier or reserved word. */ | 301 | /* Identifier or reserved word. */ |
275 | do { | 302 | do { |
276 | save_and_next(ls); | 303 | lex_savenext(ls); |
277 | } while (lj_char_isident(ls->current)); | 304 | } while (lj_char_isident(ls->c)); |
278 | s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n); | 305 | s = lj_parse_keepstr(ls, ls->sb.b, sbuflen(&ls->sb)); |
279 | setstrV(ls->L, tv, s); | 306 | setstrV(ls->L, tv, s); |
280 | if (s->reserved > 0) /* Reserved word? */ | 307 | if (s->reserved > 0) /* Reserved word? */ |
281 | return TK_OFS + s->reserved; | 308 | return TK_OFS + s->reserved; |
282 | return TK_name; | 309 | return TK_name; |
283 | } | 310 | } |
284 | switch (ls->current) { | 311 | switch (ls->c) { |
285 | case '\n': | 312 | case '\n': |
286 | case '\r': | 313 | case '\r': |
287 | inclinenumber(ls); | 314 | lex_newline(ls); |
288 | continue; | 315 | continue; |
289 | case ' ': | 316 | case ' ': |
290 | case '\t': | 317 | case '\t': |
291 | case '\v': | 318 | case '\v': |
292 | case '\f': | 319 | case '\f': |
293 | next(ls); | 320 | lex_next(ls); |
294 | continue; | 321 | continue; |
295 | case '-': | 322 | case '-': |
296 | next(ls); | 323 | lex_next(ls); |
297 | if (ls->current != '-') return '-'; | 324 | if (ls->c != '-') return '-'; |
298 | /* else is a comment */ | 325 | lex_next(ls); |
299 | next(ls); | 326 | if (ls->c == '[') { /* Long comment "--[=*[...]=*]". */ |
300 | if (ls->current == '[') { | 327 | int sep = lex_skipeq(ls); |
301 | int sep = skip_sep(ls); | 328 | lj_buf_reset(&ls->sb); /* `lex_skipeq' may dirty the buffer */ |
302 | lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */ | ||
303 | if (sep >= 0) { | 329 | if (sep >= 0) { |
304 | read_long_string(ls, NULL, sep); /* long comment */ | 330 | lex_longstring(ls, NULL, sep); |
305 | lj_str_resetbuf(&ls->sb); | 331 | lj_buf_reset(&ls->sb); |
306 | continue; | 332 | continue; |
307 | } | 333 | } |
308 | } | 334 | } |
309 | /* else short comment */ | 335 | /* Short comment "--.*\n". */ |
310 | while (!currIsNewline(ls) && ls->current != END_OF_STREAM) | 336 | while (!lex_iseol(ls) && ls->c != LEX_EOF) |
311 | next(ls); | 337 | lex_next(ls); |
312 | continue; | 338 | continue; |
313 | case '[': { | 339 | case '[': { |
314 | int sep = skip_sep(ls); | 340 | int sep = lex_skipeq(ls); |
315 | if (sep >= 0) { | 341 | if (sep >= 0) { |
316 | read_long_string(ls, tv, sep); | 342 | lex_longstring(ls, tv, sep); |
317 | return TK_string; | 343 | return TK_string; |
318 | } else if (sep == -1) { | 344 | } else if (sep == -1) { |
319 | return '['; | 345 | return '['; |
@@ -323,44 +349,43 @@ static int llex(LexState *ls, TValue *tv) | |||
323 | } | 349 | } |
324 | } | 350 | } |
325 | case '=': | 351 | case '=': |
326 | next(ls); | 352 | lex_next(ls); |
327 | if (ls->current != '=') return '='; else { next(ls); return TK_eq; } | 353 | if (ls->c != '=') return '='; else { lex_next(ls); return TK_eq; } |
328 | case '<': | 354 | case '<': |
329 | next(ls); | 355 | lex_next(ls); |
330 | if (ls->current != '=') return '<'; else { next(ls); return TK_le; } | 356 | if (ls->c != '=') return '<'; else { lex_next(ls); return TK_le; } |
331 | case '>': | 357 | case '>': |
332 | next(ls); | 358 | lex_next(ls); |
333 | if (ls->current != '=') return '>'; else { next(ls); return TK_ge; } | 359 | if (ls->c != '=') return '>'; else { lex_next(ls); return TK_ge; } |
334 | case '~': | 360 | case '~': |
335 | next(ls); | 361 | lex_next(ls); |
336 | if (ls->current != '=') return '~'; else { next(ls); return TK_ne; } | 362 | if (ls->c != '=') return '~'; else { lex_next(ls); return TK_ne; } |
337 | case ':': | 363 | case ':': |
338 | next(ls); | 364 | lex_next(ls); |
339 | if (ls->current != ':') return ':'; else { next(ls); return TK_label; } | 365 | if (ls->c != ':') return ':'; else { lex_next(ls); return TK_label; } |
340 | case '"': | 366 | case '"': |
341 | case '\'': | 367 | case '\'': |
342 | read_string(ls, ls->current, tv); | 368 | lex_string(ls, tv); |
343 | return TK_string; | 369 | return TK_string; |
344 | case '.': | 370 | case '.': |
345 | save_and_next(ls); | 371 | if (lex_savenext(ls) == '.') { |
346 | if (ls->current == '.') { | 372 | lex_next(ls); |
347 | next(ls); | 373 | if (ls->c == '.') { |
348 | if (ls->current == '.') { | 374 | lex_next(ls); |
349 | next(ls); | ||
350 | return TK_dots; /* ... */ | 375 | return TK_dots; /* ... */ |
351 | } | 376 | } |
352 | return TK_concat; /* .. */ | 377 | return TK_concat; /* .. */ |
353 | } else if (!lj_char_isdigit(ls->current)) { | 378 | } else if (!lj_char_isdigit(ls->c)) { |
354 | return '.'; | 379 | return '.'; |
355 | } else { | 380 | } else { |
356 | lex_number(ls, tv); | 381 | lex_number(ls, tv); |
357 | return TK_number; | 382 | return TK_number; |
358 | } | 383 | } |
359 | case END_OF_STREAM: | 384 | case LEX_EOF: |
360 | return TK_eof; | 385 | return TK_eof; |
361 | default: { | 386 | default: { |
362 | int c = ls->current; | 387 | LexChar c = ls->c; |
363 | next(ls); | 388 | lex_next(ls); |
364 | return c; /* Single-char tokens (+ - / ...). */ | 389 | return c; /* Single-char tokens (+ - / ...). */ |
365 | } | 390 | } |
366 | } | 391 | } |
@@ -375,36 +400,33 @@ int lj_lex_setup(lua_State *L, LexState *ls) | |||
375 | int header = 0; | 400 | int header = 0; |
376 | ls->L = L; | 401 | ls->L = L; |
377 | ls->fs = NULL; | 402 | ls->fs = NULL; |
378 | ls->n = 0; | 403 | ls->pe = ls->p = NULL; |
379 | ls->p = NULL; | ||
380 | ls->vstack = NULL; | 404 | ls->vstack = NULL; |
381 | ls->sizevstack = 0; | 405 | ls->sizevstack = 0; |
382 | ls->vtop = 0; | 406 | ls->vtop = 0; |
383 | ls->bcstack = NULL; | 407 | ls->bcstack = NULL; |
384 | ls->sizebcstack = 0; | 408 | ls->sizebcstack = 0; |
385 | ls->token = 0; | 409 | ls->tok = 0; |
386 | ls->lookahead = TK_eof; /* No look-ahead token. */ | 410 | ls->lookahead = TK_eof; /* No look-ahead token. */ |
387 | ls->linenumber = 1; | 411 | ls->linenumber = 1; |
388 | ls->lastline = 1; | 412 | ls->lastline = 1; |
389 | ls->endmark = 0; | 413 | ls->endmark = 0; |
390 | lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF); | 414 | lex_next(ls); /* Read-ahead first char. */ |
391 | next(ls); /* Read-ahead first char. */ | 415 | if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb && |
392 | if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb && | 416 | (uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */ |
393 | char2int(ls->p[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */ | ||
394 | ls->n -= 2; | ||
395 | ls->p += 2; | 417 | ls->p += 2; |
396 | next(ls); | 418 | lex_next(ls); |
397 | header = 1; | 419 | header = 1; |
398 | } | 420 | } |
399 | if (ls->current == '#') { /* Skip POSIX #! header line. */ | 421 | if (ls->c == '#') { /* Skip POSIX #! header line. */ |
400 | do { | 422 | do { |
401 | next(ls); | 423 | lex_next(ls); |
402 | if (ls->current == END_OF_STREAM) return 0; | 424 | if (ls->c == LEX_EOF) return 0; |
403 | } while (!currIsNewline(ls)); | 425 | } while (!lex_iseol(ls)); |
404 | inclinenumber(ls); | 426 | lex_newline(ls); |
405 | header = 1; | 427 | header = 1; |
406 | } | 428 | } |
407 | if (ls->current == LUA_SIGNATURE[0]) { /* Bytecode dump. */ | 429 | if (ls->c == LUA_SIGNATURE[0]) { /* Bytecode dump. */ |
408 | if (header) { | 430 | if (header) { |
409 | /* | 431 | /* |
410 | ** Loading bytecode with an extra header is disabled for security | 432 | ** Loading bytecode with an extra header is disabled for security |
@@ -426,55 +448,60 @@ void lj_lex_cleanup(lua_State *L, LexState *ls) | |||
426 | global_State *g = G(L); | 448 | global_State *g = G(L); |
427 | lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine); | 449 | lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine); |
428 | lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo); | 450 | lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo); |
429 | lj_str_freebuf(g, &ls->sb); | 451 | lj_buf_free(g, &ls->sb); |
430 | } | 452 | } |
431 | 453 | ||
454 | /* Return next lexical token. */ | ||
432 | void lj_lex_next(LexState *ls) | 455 | void lj_lex_next(LexState *ls) |
433 | { | 456 | { |
434 | ls->lastline = ls->linenumber; | 457 | ls->lastline = ls->linenumber; |
435 | if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */ | 458 | if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */ |
436 | ls->token = llex(ls, &ls->tokenval); /* Get next token. */ | 459 | ls->tok = lex_scan(ls, &ls->tokval); /* Get next token. */ |
437 | } else { /* Otherwise return lookahead token. */ | 460 | } else { /* Otherwise return lookahead token. */ |
438 | ls->token = ls->lookahead; | 461 | ls->tok = ls->lookahead; |
439 | ls->lookahead = TK_eof; | 462 | ls->lookahead = TK_eof; |
440 | ls->tokenval = ls->lookaheadval; | 463 | ls->tokval = ls->lookaheadval; |
441 | } | 464 | } |
442 | } | 465 | } |
443 | 466 | ||
467 | /* Look ahead for the next token. */ | ||
444 | LexToken lj_lex_lookahead(LexState *ls) | 468 | LexToken lj_lex_lookahead(LexState *ls) |
445 | { | 469 | { |
446 | lua_assert(ls->lookahead == TK_eof); | 470 | lj_assertLS(ls->lookahead == TK_eof, "double lookahead"); |
447 | ls->lookahead = llex(ls, &ls->lookaheadval); | 471 | ls->lookahead = lex_scan(ls, &ls->lookaheadval); |
448 | return ls->lookahead; | 472 | return ls->lookahead; |
449 | } | 473 | } |
450 | 474 | ||
451 | const char *lj_lex_token2str(LexState *ls, LexToken token) | 475 | /* Convert token to string. */ |
476 | const char *lj_lex_token2str(LexState *ls, LexToken tok) | ||
452 | { | 477 | { |
453 | if (token > TK_OFS) | 478 | if (tok > TK_OFS) |
454 | return tokennames[token-TK_OFS-1]; | 479 | return tokennames[tok-TK_OFS-1]; |
455 | else if (!lj_char_iscntrl(token)) | 480 | else if (!lj_char_iscntrl(tok)) |
456 | return lj_str_pushf(ls->L, "%c", token); | 481 | return lj_strfmt_pushf(ls->L, "%c", tok); |
457 | else | 482 | else |
458 | return lj_str_pushf(ls->L, "char(%d)", token); | 483 | return lj_strfmt_pushf(ls->L, "char(%d)", tok); |
459 | } | 484 | } |
460 | 485 | ||
461 | void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...) | 486 | /* Lexer error. */ |
487 | void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...) | ||
462 | { | 488 | { |
463 | const char *tok; | 489 | const char *tokstr; |
464 | va_list argp; | 490 | va_list argp; |
465 | if (token == 0) { | 491 | if (tok == 0) { |
466 | tok = NULL; | 492 | tokstr = NULL; |
467 | } else if (token == TK_name || token == TK_string || token == TK_number) { | 493 | } else if (tok == TK_name || tok == TK_string || tok == TK_number) { |
468 | save(ls, '\0'); | 494 | lex_save(ls, '\0'); |
469 | tok = ls->sb.buf; | 495 | tokstr = ls->sb.b; |
470 | } else { | 496 | } else { |
471 | tok = lj_lex_token2str(ls, token); | 497 | tokstr = lj_lex_token2str(ls, tok); |
472 | } | 498 | } |
473 | va_start(argp, em); | 499 | va_start(argp, em); |
474 | lj_err_lex(ls->L, ls->chunkname, tok, ls->linenumber, em, argp); | 500 | lj_err_lex(ls->L, ls->chunkname, tokstr, ls->linenumber, em, argp); |
475 | va_end(argp); | 501 | va_end(argp); |
476 | } | 502 | } |
477 | 503 | ||
504 | /* Initialize strings for reserved words. */ | ||
478 | void lj_lex_init(lua_State *L) | 505 | void lj_lex_init(lua_State *L) |
479 | { | 506 | { |
480 | uint32_t i; | 507 | uint32_t i; |
diff --git a/src/lj_lex.h b/src/lj_lex.h index a284af19..cb5b5769 100644 --- a/src/lj_lex.h +++ b/src/lj_lex.h | |||
@@ -30,7 +30,8 @@ TKDEF(TKENUM1, TKENUM2) | |||
30 | TK_RESERVED = TK_while - TK_OFS | 30 | TK_RESERVED = TK_while - TK_OFS |
31 | }; | 31 | }; |
32 | 32 | ||
33 | typedef int LexToken; | 33 | typedef int LexChar; /* Lexical character. Unsigned ext. from char. */ |
34 | typedef int LexToken; /* Lexical token. */ | ||
34 | 35 | ||
35 | /* Combined bytecode ins/line. Only used during bytecode generation. */ | 36 | /* Combined bytecode ins/line. Only used during bytecode generation. */ |
36 | typedef struct BCInsLine { | 37 | typedef struct BCInsLine { |
@@ -51,13 +52,13 @@ typedef struct VarInfo { | |||
51 | typedef struct LexState { | 52 | typedef struct LexState { |
52 | struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */ | 53 | struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */ |
53 | struct lua_State *L; /* Lua state. */ | 54 | struct lua_State *L; /* Lua state. */ |
54 | TValue tokenval; /* Current token value. */ | 55 | TValue tokval; /* Current token value. */ |
55 | TValue lookaheadval; /* Lookahead token value. */ | 56 | TValue lookaheadval; /* Lookahead token value. */ |
56 | int current; /* Current character (charint). */ | ||
57 | LexToken token; /* Current token. */ | ||
58 | LexToken lookahead; /* Lookahead token. */ | ||
59 | MSize n; /* Bytes left in input buffer. */ | ||
60 | const char *p; /* Current position in input buffer. */ | 57 | const char *p; /* Current position in input buffer. */ |
58 | const char *pe; /* End of input buffer. */ | ||
59 | LexChar c; /* Current character. */ | ||
60 | LexToken tok; /* Current token. */ | ||
61 | LexToken lookahead; /* Lookahead token. */ | ||
61 | SBuf sb; /* String buffer for tokens. */ | 62 | SBuf sb; /* String buffer for tokens. */ |
62 | lua_Reader rfunc; /* Reader callback. */ | 63 | lua_Reader rfunc; /* Reader callback. */ |
63 | void *rdata; /* Reader callback data. */ | 64 | void *rdata; /* Reader callback data. */ |
@@ -79,8 +80,14 @@ LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls); | |||
79 | LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls); | 80 | LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls); |
80 | LJ_FUNC void lj_lex_next(LexState *ls); | 81 | LJ_FUNC void lj_lex_next(LexState *ls); |
81 | LJ_FUNC LexToken lj_lex_lookahead(LexState *ls); | 82 | LJ_FUNC LexToken lj_lex_lookahead(LexState *ls); |
82 | LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken token); | 83 | LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok); |
83 | LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...); | 84 | LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...); |
84 | LJ_FUNC void lj_lex_init(lua_State *L); | 85 | LJ_FUNC void lj_lex_init(lua_State *L); |
85 | 86 | ||
87 | #ifdef LUA_USE_ASSERT | ||
88 | #define lj_assertLS(c, ...) (lj_assertG_(G(ls->L), (c), __VA_ARGS__)) | ||
89 | #else | ||
90 | #define lj_assertLS(c, ...) ((void)ls) | ||
91 | #endif | ||
92 | |||
86 | #endif | 93 | #endif |
diff --git a/src/lj_lib.c b/src/lj_lib.c index 5796766a..82a9e256 100644 --- a/src/lj_lib.c +++ b/src/lj_lib.c | |||
@@ -16,8 +16,14 @@ | |||
16 | #include "lj_func.h" | 16 | #include "lj_func.h" |
17 | #include "lj_bc.h" | 17 | #include "lj_bc.h" |
18 | #include "lj_dispatch.h" | 18 | #include "lj_dispatch.h" |
19 | #if LJ_HASFFI | ||
20 | #include "lj_ctype.h" | ||
21 | #endif | ||
19 | #include "lj_vm.h" | 22 | #include "lj_vm.h" |
20 | #include "lj_strscan.h" | 23 | #include "lj_strscan.h" |
24 | #include "lj_strfmt.h" | ||
25 | #include "lj_lex.h" | ||
26 | #include "lj_bcdump.h" | ||
21 | #include "lj_lib.h" | 27 | #include "lj_lib.h" |
22 | 28 | ||
23 | /* -- Library initialization ---------------------------------------------- */ | 29 | /* -- Library initialization ---------------------------------------------- */ |
@@ -43,6 +49,28 @@ static GCtab *lib_create_table(lua_State *L, const char *libname, int hsize) | |||
43 | return tabV(L->top-1); | 49 | return tabV(L->top-1); |
44 | } | 50 | } |
45 | 51 | ||
52 | static const uint8_t *lib_read_lfunc(lua_State *L, const uint8_t *p, GCtab *tab) | ||
53 | { | ||
54 | int len = *p++; | ||
55 | GCstr *name = lj_str_new(L, (const char *)p, len); | ||
56 | LexState ls; | ||
57 | GCproto *pt; | ||
58 | GCfunc *fn; | ||
59 | memset(&ls, 0, sizeof(ls)); | ||
60 | ls.L = L; | ||
61 | ls.p = (const char *)(p+len); | ||
62 | ls.pe = (const char *)~(uintptr_t)0; | ||
63 | ls.c = -1; | ||
64 | ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE)); | ||
65 | ls.chunkname = name; | ||
66 | pt = lj_bcread_proto(&ls); | ||
67 | pt->firstline = ~(BCLine)0; | ||
68 | fn = lj_func_newL_empty(L, pt, tabref(L->env)); | ||
69 | /* NOBARRIER: See below for common barrier. */ | ||
70 | setfuncV(L, lj_tab_setstr(L, tab, name), fn); | ||
71 | return (const uint8_t *)ls.p; | ||
72 | } | ||
73 | |||
46 | void lj_lib_register(lua_State *L, const char *libname, | 74 | void lj_lib_register(lua_State *L, const char *libname, |
47 | const uint8_t *p, const lua_CFunction *cf) | 75 | const uint8_t *p, const lua_CFunction *cf) |
48 | { | 76 | { |
@@ -87,6 +115,9 @@ void lj_lib_register(lua_State *L, const char *libname, | |||
87 | ofn = fn; | 115 | ofn = fn; |
88 | } else { | 116 | } else { |
89 | switch (tag | len) { | 117 | switch (tag | len) { |
118 | case LIBINIT_LUA: | ||
119 | p = lib_read_lfunc(L, p, tab); | ||
120 | break; | ||
90 | case LIBINIT_SET: | 121 | case LIBINIT_SET: |
91 | L->top -= 2; | 122 | L->top -= 2; |
92 | if (tvisstr(L->top+1) && strV(L->top+1)->len == 0) | 123 | if (tvisstr(L->top+1) && strV(L->top+1)->len == 0) |
@@ -120,6 +151,37 @@ void lj_lib_register(lua_State *L, const char *libname, | |||
120 | } | 151 | } |
121 | } | 152 | } |
122 | 153 | ||
154 | /* Push internal function on the stack. */ | ||
155 | GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n) | ||
156 | { | ||
157 | GCfunc *fn; | ||
158 | lua_pushcclosure(L, f, n); | ||
159 | fn = funcV(L->top-1); | ||
160 | fn->c.ffid = (uint8_t)id; | ||
161 | setmref(fn->c.pc, &G(L)->bc_cfunc_int); | ||
162 | return fn; | ||
163 | } | ||
164 | |||
165 | void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f, GCtab *env) | ||
166 | { | ||
167 | luaL_findtable(L, LUA_REGISTRYINDEX, "_PRELOAD", 4); | ||
168 | lua_pushcfunction(L, f); | ||
169 | /* NOBARRIER: The function is new (marked white). */ | ||
170 | setgcref(funcV(L->top-1)->c.env, obj2gco(env)); | ||
171 | lua_setfield(L, -2, name); | ||
172 | L->top--; | ||
173 | } | ||
174 | |||
175 | int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id, const char *name) | ||
176 | { | ||
177 | GCfunc *fn = lj_lib_pushcf(L, cf, id); | ||
178 | GCtab *t = tabref(curr_func(L)->c.env); /* Reference to parent table. */ | ||
179 | setfuncV(L, lj_tab_setstr(L, t, lj_str_newz(L, name)), fn); | ||
180 | lj_gc_anybarriert(L, t); | ||
181 | setfuncV(L, L->top++, fn); | ||
182 | return 1; | ||
183 | } | ||
184 | |||
123 | /* -- Type checks --------------------------------------------------------- */ | 185 | /* -- Type checks --------------------------------------------------------- */ |
124 | 186 | ||
125 | TValue *lj_lib_checkany(lua_State *L, int narg) | 187 | TValue *lj_lib_checkany(lua_State *L, int narg) |
@@ -137,7 +199,7 @@ GCstr *lj_lib_checkstr(lua_State *L, int narg) | |||
137 | if (LJ_LIKELY(tvisstr(o))) { | 199 | if (LJ_LIKELY(tvisstr(o))) { |
138 | return strV(o); | 200 | return strV(o); |
139 | } else if (tvisnumber(o)) { | 201 | } else if (tvisnumber(o)) { |
140 | GCstr *s = lj_str_fromnumber(L, o); | 202 | GCstr *s = lj_strfmt_number(L, o); |
141 | setstrV(L, o, s); | 203 | setstrV(L, o, s); |
142 | return s; | 204 | return s; |
143 | } | 205 | } |
@@ -196,20 +258,6 @@ int32_t lj_lib_optint(lua_State *L, int narg, int32_t def) | |||
196 | return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def; | 258 | return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def; |
197 | } | 259 | } |
198 | 260 | ||
199 | int32_t lj_lib_checkbit(lua_State *L, int narg) | ||
200 | { | ||
201 | TValue *o = L->base + narg-1; | ||
202 | if (!(o < L->top && lj_strscan_numberobj(o))) | ||
203 | lj_err_argt(L, narg, LUA_TNUMBER); | ||
204 | if (LJ_LIKELY(tvisint(o))) { | ||
205 | return intV(o); | ||
206 | } else { | ||
207 | int32_t i = lj_num2bit(numV(o)); | ||
208 | if (LJ_DUALNUM) setintV(o, i); | ||
209 | return i; | ||
210 | } | ||
211 | } | ||
212 | |||
213 | GCfunc *lj_lib_checkfunc(lua_State *L, int narg) | 261 | GCfunc *lj_lib_checkfunc(lua_State *L, int narg) |
214 | { | 262 | { |
215 | TValue *o = L->base + narg-1; | 263 | TValue *o = L->base + narg-1; |
@@ -256,3 +304,56 @@ int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst) | |||
256 | return def; | 304 | return def; |
257 | } | 305 | } |
258 | 306 | ||
307 | /* -- Strict type checks -------------------------------------------------- */ | ||
308 | |||
309 | /* The following type checks do not coerce between strings and numbers. | ||
310 | ** And they handle plain int64_t/uint64_t FFI numbers, too. | ||
311 | */ | ||
312 | |||
313 | #if LJ_HASBUFFER | ||
314 | GCstr *lj_lib_checkstrx(lua_State *L, int narg) | ||
315 | { | ||
316 | TValue *o = L->base + narg-1; | ||
317 | if (!(o < L->top && tvisstr(o))) lj_err_argt(L, narg, LUA_TSTRING); | ||
318 | return strV(o); | ||
319 | } | ||
320 | |||
321 | int32_t lj_lib_checkintrange(lua_State *L, int narg, int32_t a, int32_t b) | ||
322 | { | ||
323 | TValue *o = L->base + narg-1; | ||
324 | lj_assertL(b >= 0, "expected range must be non-negative"); | ||
325 | if (o < L->top) { | ||
326 | if (LJ_LIKELY(tvisint(o))) { | ||
327 | int32_t i = intV(o); | ||
328 | if (i >= a && i <= b) return i; | ||
329 | } else if (LJ_LIKELY(tvisnum(o))) { | ||
330 | /* For performance reasons, this doesn't check for integerness or | ||
331 | ** integer overflow. Overflow detection still works, since all FPUs | ||
332 | ** return either MININT or MAXINT, which is then out of range. | ||
333 | */ | ||
334 | int32_t i = (int32_t)numV(o); | ||
335 | if (i >= a && i <= b) return i; | ||
336 | #if LJ_HASFFI | ||
337 | } else if (tviscdata(o)) { | ||
338 | GCcdata *cd = cdataV(o); | ||
339 | if (cd->ctypeid == CTID_INT64) { | ||
340 | int64_t i = *(int64_t *)cdataptr(cd); | ||
341 | if (i >= (int64_t)a && i <= (int64_t)b) return (int32_t)i; | ||
342 | } else if (cd->ctypeid == CTID_UINT64) { | ||
343 | uint64_t i = *(uint64_t *)cdataptr(cd); | ||
344 | if ((a < 0 || i >= (uint64_t)a) && i <= (uint64_t)b) return (int32_t)i; | ||
345 | } else { | ||
346 | goto badtype; | ||
347 | } | ||
348 | #endif | ||
349 | } else { | ||
350 | goto badtype; | ||
351 | } | ||
352 | lj_err_arg(L, narg, LJ_ERR_NUMRNG); | ||
353 | } | ||
354 | badtype: | ||
355 | lj_err_argt(L, narg, LUA_TNUMBER); | ||
356 | return 0; /* unreachable */ | ||
357 | } | ||
358 | #endif | ||
359 | |||
diff --git a/src/lj_lib.h b/src/lj_lib.h index 55529ad8..a18f52bf 100644 --- a/src/lj_lib.h +++ b/src/lj_lib.h | |||
@@ -41,15 +41,28 @@ LJ_FUNC void lj_lib_checknumber(lua_State *L, int narg); | |||
41 | LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg); | 41 | LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg); |
42 | LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg); | 42 | LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg); |
43 | LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def); | 43 | LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def); |
44 | LJ_FUNC int32_t lj_lib_checkbit(lua_State *L, int narg); | ||
45 | LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg); | 44 | LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg); |
46 | LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg); | 45 | LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg); |
47 | LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); | 46 | LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); |
48 | LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); | 47 | LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); |
49 | 48 | ||
49 | #if LJ_HASBUFFER | ||
50 | LJ_FUNC GCstr *lj_lib_checkstrx(lua_State *L, int narg); | ||
51 | LJ_FUNC int32_t lj_lib_checkintrange(lua_State *L, int narg, | ||
52 | int32_t a, int32_t b); | ||
53 | #endif | ||
54 | |||
50 | /* Avoid including lj_frame.h. */ | 55 | /* Avoid including lj_frame.h. */ |
56 | #if LJ_GC64 | ||
57 | #define lj_lib_upvalue(L, n) \ | ||
58 | (&gcval(L->base-2)->fn.c.upvalue[(n)-1]) | ||
59 | #elif LJ_FR2 | ||
60 | #define lj_lib_upvalue(L, n) \ | ||
61 | (&gcref((L->base-2)->gcr)->fn.c.upvalue[(n)-1]) | ||
62 | #else | ||
51 | #define lj_lib_upvalue(L, n) \ | 63 | #define lj_lib_upvalue(L, n) \ |
52 | (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1]) | 64 | (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1]) |
65 | #endif | ||
53 | 66 | ||
54 | #if LJ_TARGET_WINDOWS | 67 | #if LJ_TARGET_WINDOWS |
55 | #define lj_lib_checkfpu(L) \ | 68 | #define lj_lib_checkfpu(L) \ |
@@ -60,23 +73,14 @@ LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); | |||
60 | #define lj_lib_checkfpu(L) UNUSED(L) | 73 | #define lj_lib_checkfpu(L) UNUSED(L) |
61 | #endif | 74 | #endif |
62 | 75 | ||
63 | /* Push internal function on the stack. */ | 76 | LJ_FUNC GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n); |
64 | static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f, | ||
65 | int id, int n) | ||
66 | { | ||
67 | GCfunc *fn; | ||
68 | lua_pushcclosure(L, f, n); | ||
69 | fn = funcV(L->top-1); | ||
70 | fn->c.ffid = (uint8_t)id; | ||
71 | setmref(fn->c.pc, &G(L)->bc_cfunc_int); | ||
72 | } | ||
73 | |||
74 | #define lj_lib_pushcf(L, fn, id) (lj_lib_pushcc(L, (fn), (id), 0)) | 77 | #define lj_lib_pushcf(L, fn, id) (lj_lib_pushcc(L, (fn), (id), 0)) |
75 | 78 | ||
76 | /* Library function declarations. Scanned by buildvm. */ | 79 | /* Library function declarations. Scanned by buildvm. */ |
77 | #define LJLIB_CF(name) static int lj_cf_##name(lua_State *L) | 80 | #define LJLIB_CF(name) static int lj_cf_##name(lua_State *L) |
78 | #define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L) | 81 | #define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L) |
79 | #define LJLIB_ASM_(name) | 82 | #define LJLIB_ASM_(name) |
83 | #define LJLIB_LUA(name) | ||
80 | #define LJLIB_SET(name) | 84 | #define LJLIB_SET(name) |
81 | #define LJLIB_PUSH(arg) | 85 | #define LJLIB_PUSH(arg) |
82 | #define LJLIB_REC(handler) | 86 | #define LJLIB_REC(handler) |
@@ -88,6 +92,10 @@ static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f, | |||
88 | 92 | ||
89 | LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, | 93 | LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, |
90 | const uint8_t *init, const lua_CFunction *cf); | 94 | const uint8_t *init, const lua_CFunction *cf); |
95 | LJ_FUNC void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f, | ||
96 | GCtab *env); | ||
97 | LJ_FUNC int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id, | ||
98 | const char *name); | ||
91 | 99 | ||
92 | /* Library init data tags. */ | 100 | /* Library init data tags. */ |
93 | #define LIBINIT_LENMASK 0x3f | 101 | #define LIBINIT_LENMASK 0x3f |
@@ -96,7 +104,8 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, | |||
96 | #define LIBINIT_ASM 0x40 | 104 | #define LIBINIT_ASM 0x40 |
97 | #define LIBINIT_ASM_ 0x80 | 105 | #define LIBINIT_ASM_ 0x80 |
98 | #define LIBINIT_STRING 0xc0 | 106 | #define LIBINIT_STRING 0xc0 |
99 | #define LIBINIT_MAXSTR 0x39 | 107 | #define LIBINIT_MAXSTR 0x38 |
108 | #define LIBINIT_LUA 0xf9 | ||
100 | #define LIBINIT_SET 0xfa | 109 | #define LIBINIT_SET 0xfa |
101 | #define LIBINIT_NUMBER 0xfb | 110 | #define LIBINIT_NUMBER 0xfb |
102 | #define LIBINIT_COPY 0xfc | 111 | #define LIBINIT_COPY 0xfc |
@@ -104,9 +113,4 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, | |||
104 | #define LIBINIT_FFID 0xfe | 113 | #define LIBINIT_FFID 0xfe |
105 | #define LIBINIT_END 0xff | 114 | #define LIBINIT_END 0xff |
106 | 115 | ||
107 | /* Exported library functions. */ | ||
108 | |||
109 | typedef struct RandomState RandomState; | ||
110 | LJ_FUNC uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs); | ||
111 | |||
112 | #endif | 116 | #endif |
diff --git a/src/lj_load.c b/src/lj_load.c index dbd36ac7..0aab4884 100644 --- a/src/lj_load.c +++ b/src/lj_load.c | |||
@@ -15,7 +15,7 @@ | |||
15 | #include "lj_obj.h" | 15 | #include "lj_obj.h" |
16 | #include "lj_gc.h" | 16 | #include "lj_gc.h" |
17 | #include "lj_err.h" | 17 | #include "lj_err.h" |
18 | #include "lj_str.h" | 18 | #include "lj_buf.h" |
19 | #include "lj_func.h" | 19 | #include "lj_func.h" |
20 | #include "lj_frame.h" | 20 | #include "lj_frame.h" |
21 | #include "lj_vm.h" | 21 | #include "lj_vm.h" |
@@ -54,7 +54,7 @@ LUA_API int lua_loadx(lua_State *L, lua_Reader reader, void *data, | |||
54 | ls.rdata = data; | 54 | ls.rdata = data; |
55 | ls.chunkarg = chunkname ? chunkname : "?"; | 55 | ls.chunkarg = chunkname ? chunkname : "?"; |
56 | ls.mode = mode; | 56 | ls.mode = mode; |
57 | lj_str_initbuf(&ls.sb); | 57 | lj_buf_init(L, &ls.sb); |
58 | status = lj_vm_cpcall(L, NULL, &ls, cpparser); | 58 | status = lj_vm_cpcall(L, NULL, &ls, cpparser); |
59 | lj_lex_cleanup(L, &ls); | 59 | lj_lex_cleanup(L, &ls); |
60 | lj_gc_check(L); | 60 | lj_gc_check(L); |
@@ -159,7 +159,7 @@ LUALIB_API int luaL_loadstring(lua_State *L, const char *s) | |||
159 | LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data) | 159 | LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data) |
160 | { | 160 | { |
161 | cTValue *o = L->top-1; | 161 | cTValue *o = L->top-1; |
162 | api_check(L, L->top > L->base); | 162 | lj_checkapi(L->top > L->base, "top slot empty"); |
163 | if (tvisfunc(o) && isluafunc(funcV(o))) | 163 | if (tvisfunc(o) && isluafunc(funcV(o))) |
164 | return lj_bcwrite(L, funcproto(funcV(o)), writer, data, 0); | 164 | return lj_bcwrite(L, funcproto(funcV(o)), writer, data, 0); |
165 | else | 165 | else |
diff --git a/src/lj_mcode.c b/src/lj_mcode.c index ac37c1a6..42aab0bc 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include "lj_mcode.h" | 14 | #include "lj_mcode.h" |
15 | #include "lj_trace.h" | 15 | #include "lj_trace.h" |
16 | #include "lj_dispatch.h" | 16 | #include "lj_dispatch.h" |
17 | #include "lj_prng.h" | ||
17 | #endif | 18 | #endif |
18 | #if LJ_HASJIT || LJ_HASFFI | 19 | #if LJ_HASJIT || LJ_HASFFI |
19 | #include "lj_vm.h" | 20 | #include "lj_vm.h" |
@@ -44,7 +45,7 @@ void lj_mcode_sync(void *start, void *end) | |||
44 | sys_icache_invalidate(start, (char *)end-(char *)start); | 45 | sys_icache_invalidate(start, (char *)end-(char *)start); |
45 | #elif LJ_TARGET_PPC | 46 | #elif LJ_TARGET_PPC |
46 | lj_vm_cachesync(start, end); | 47 | lj_vm_cachesync(start, end); |
47 | #elif defined(__GNUC__) | 48 | #elif defined(__GNUC__) || defined(__clang__) |
48 | __clear_cache(start, end); | 49 | __clear_cache(start, end); |
49 | #else | 50 | #else |
50 | #error "Missing builtin to flush instruction cache" | 51 | #error "Missing builtin to flush instruction cache" |
@@ -66,8 +67,8 @@ void lj_mcode_sync(void *start, void *end) | |||
66 | 67 | ||
67 | static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) | 68 | static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) |
68 | { | 69 | { |
69 | void *p = VirtualAlloc((void *)hint, sz, | 70 | void *p = LJ_WIN_VALLOC((void *)hint, sz, |
70 | MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); | 71 | MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); |
71 | if (!p && !hint) | 72 | if (!p && !hint) |
72 | lj_trace_err(J, LJ_TRERR_MCODEAL); | 73 | lj_trace_err(J, LJ_TRERR_MCODEAL); |
73 | return p; | 74 | return p; |
@@ -82,7 +83,7 @@ static void mcode_free(jit_State *J, void *p, size_t sz) | |||
82 | static int mcode_setprot(void *p, size_t sz, DWORD prot) | 83 | static int mcode_setprot(void *p, size_t sz, DWORD prot) |
83 | { | 84 | { |
84 | DWORD oprot; | 85 | DWORD oprot; |
85 | return !VirtualProtect(p, sz, prot, &oprot); | 86 | return !LJ_WIN_VPROTECT(p, sz, prot, &oprot); |
86 | } | 87 | } |
87 | 88 | ||
88 | #elif LJ_TARGET_POSIX | 89 | #elif LJ_TARGET_POSIX |
@@ -96,10 +97,15 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot) | |||
96 | #define MCPROT_RW (PROT_READ|PROT_WRITE) | 97 | #define MCPROT_RW (PROT_READ|PROT_WRITE) |
97 | #define MCPROT_RX (PROT_READ|PROT_EXEC) | 98 | #define MCPROT_RX (PROT_READ|PROT_EXEC) |
98 | #define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC) | 99 | #define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC) |
100 | #ifdef PROT_MPROTECT | ||
101 | #define MCPROT_CREATE (PROT_MPROTECT(MCPROT_RWX)) | ||
102 | #else | ||
103 | #define MCPROT_CREATE 0 | ||
104 | #endif | ||
99 | 105 | ||
100 | static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) | 106 | static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) |
101 | { | 107 | { |
102 | void *p = mmap((void *)hint, sz, prot, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); | 108 | void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); |
103 | if (p == MAP_FAILED) { | 109 | if (p == MAP_FAILED) { |
104 | if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL); | 110 | if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL); |
105 | p = NULL; | 111 | p = NULL; |
@@ -118,52 +124,34 @@ static int mcode_setprot(void *p, size_t sz, int prot) | |||
118 | return mprotect(p, sz, prot); | 124 | return mprotect(p, sz, prot); |
119 | } | 125 | } |
120 | 126 | ||
121 | #elif LJ_64 | ||
122 | |||
123 | #error "Missing OS support for explicit placement of executable memory" | ||
124 | |||
125 | #else | 127 | #else |
126 | 128 | ||
127 | /* Fallback allocator. This will fail if memory is not executable by default. */ | 129 | #error "Missing OS support for explicit placement of executable memory" |
128 | #define LUAJIT_UNPROTECT_MCODE | ||
129 | #define MCPROT_RW 0 | ||
130 | #define MCPROT_RX 0 | ||
131 | #define MCPROT_RWX 0 | ||
132 | |||
133 | static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) | ||
134 | { | ||
135 | UNUSED(hint); UNUSED(prot); | ||
136 | return lj_mem_new(J->L, sz); | ||
137 | } | ||
138 | |||
139 | static void mcode_free(jit_State *J, void *p, size_t sz) | ||
140 | { | ||
141 | lj_mem_free(J2G(J), p, sz); | ||
142 | } | ||
143 | 130 | ||
144 | #endif | 131 | #endif |
145 | 132 | ||
146 | /* -- MCode area protection ----------------------------------------------- */ | 133 | /* -- MCode area protection ----------------------------------------------- */ |
147 | 134 | ||
148 | /* Define this ONLY if page protection twiddling becomes a bottleneck. */ | 135 | #if LUAJIT_SECURITY_MCODE == 0 |
149 | #ifdef LUAJIT_UNPROTECT_MCODE | ||
150 | 136 | ||
151 | /* It's generally considered to be a potential security risk to have | 137 | /* Define this ONLY if page protection twiddling becomes a bottleneck. |
138 | ** | ||
139 | ** It's generally considered to be a potential security risk to have | ||
152 | ** pages with simultaneous write *and* execute access in a process. | 140 | ** pages with simultaneous write *and* execute access in a process. |
153 | ** | 141 | ** |
154 | ** Do not even think about using this mode for server processes or | 142 | ** Do not even think about using this mode for server processes or |
155 | ** apps handling untrusted external data (such as a browser). | 143 | ** apps handling untrusted external data. |
156 | ** | 144 | ** |
157 | ** The security risk is not in LuaJIT itself -- but if an adversary finds | 145 | ** The security risk is not in LuaJIT itself -- but if an adversary finds |
158 | ** any *other* flaw in your C application logic, then any RWX memory page | 146 | ** any *other* flaw in your C application logic, then any RWX memory pages |
159 | ** simplifies writing an exploit considerably. | 147 | ** simplify writing an exploit considerably. |
160 | */ | 148 | */ |
161 | #define MCPROT_GEN MCPROT_RWX | 149 | #define MCPROT_GEN MCPROT_RWX |
162 | #define MCPROT_RUN MCPROT_RWX | 150 | #define MCPROT_RUN MCPROT_RWX |
163 | 151 | ||
164 | static void mcode_protect(jit_State *J, int prot) | 152 | static void mcode_protect(jit_State *J, int prot) |
165 | { | 153 | { |
166 | UNUSED(J); UNUSED(prot); | 154 | UNUSED(J); UNUSED(prot); UNUSED(mcode_setprot); |
167 | } | 155 | } |
168 | 156 | ||
169 | #else | 157 | #else |
@@ -221,8 +209,8 @@ static void *mcode_alloc(jit_State *J, size_t sz) | |||
221 | */ | 209 | */ |
222 | #if LJ_TARGET_MIPS | 210 | #if LJ_TARGET_MIPS |
223 | /* Use the middle of the 256MB-aligned region. */ | 211 | /* Use the middle of the 256MB-aligned region. */ |
224 | uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & 0xf0000000u) + | 212 | uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & |
225 | 0x08000000u; | 213 | ~(uintptr_t)0x0fffffffu) + 0x08000000u; |
226 | #else | 214 | #else |
227 | uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; | 215 | uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; |
228 | #endif | 216 | #endif |
@@ -242,7 +230,7 @@ static void *mcode_alloc(jit_State *J, size_t sz) | |||
242 | } | 230 | } |
243 | /* Next try probing 64K-aligned pseudo-random addresses. */ | 231 | /* Next try probing 64K-aligned pseudo-random addresses. */ |
244 | do { | 232 | do { |
245 | hint = LJ_PRNG_BITS(J, LJ_TARGET_JUMPRANGE-16) << 16; | 233 | hint = lj_prng_u64(&J2G(J)->prng) & ((1u<<LJ_TARGET_JUMPRANGE)-0x10000); |
246 | } while (!(hint + sz < range+range)); | 234 | } while (!(hint + sz < range+range)); |
247 | hint = target + hint - range; | 235 | hint = target + hint - range; |
248 | } | 236 | } |
@@ -255,7 +243,7 @@ static void *mcode_alloc(jit_State *J, size_t sz) | |||
255 | /* All memory addresses are reachable by relative jumps. */ | 243 | /* All memory addresses are reachable by relative jumps. */ |
256 | static void *mcode_alloc(jit_State *J, size_t sz) | 244 | static void *mcode_alloc(jit_State *J, size_t sz) |
257 | { | 245 | { |
258 | #ifdef __OpenBSD__ | 246 | #if defined(__OpenBSD__) || defined(__NetBSD__) || LJ_TARGET_UWP |
259 | /* Allow better executable memory allocation for OpenBSD W^X mode. */ | 247 | /* Allow better executable memory allocation for OpenBSD W^X mode. */ |
260 | void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN); | 248 | void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN); |
261 | if (p && mcode_setprot(p, sz, MCPROT_GEN)) { | 249 | if (p && mcode_setprot(p, sz, MCPROT_GEN)) { |
@@ -286,6 +274,7 @@ static void mcode_allocarea(jit_State *J) | |||
286 | ((MCLink *)J->mcarea)->next = oldarea; | 274 | ((MCLink *)J->mcarea)->next = oldarea; |
287 | ((MCLink *)J->mcarea)->size = sz; | 275 | ((MCLink *)J->mcarea)->size = sz; |
288 | J->szallmcarea += sz; | 276 | J->szallmcarea += sz; |
277 | J->mcbot = (MCode *)lj_err_register_mcode(J->mcarea, sz, (uint8_t *)J->mcbot); | ||
289 | } | 278 | } |
290 | 279 | ||
291 | /* Free all MCode areas. */ | 280 | /* Free all MCode areas. */ |
@@ -296,7 +285,9 @@ void lj_mcode_free(jit_State *J) | |||
296 | J->szallmcarea = 0; | 285 | J->szallmcarea = 0; |
297 | while (mc) { | 286 | while (mc) { |
298 | MCode *next = ((MCLink *)mc)->next; | 287 | MCode *next = ((MCLink *)mc)->next; |
299 | mcode_free(J, mc, ((MCLink *)mc)->size); | 288 | size_t sz = ((MCLink *)mc)->size; |
289 | lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink)); | ||
290 | mcode_free(J, mc, sz); | ||
300 | mc = next; | 291 | mc = next; |
301 | } | 292 | } |
302 | } | 293 | } |
@@ -331,35 +322,36 @@ void lj_mcode_abort(jit_State *J) | |||
331 | /* Set/reset protection to allow patching of MCode areas. */ | 322 | /* Set/reset protection to allow patching of MCode areas. */ |
332 | MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish) | 323 | MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish) |
333 | { | 324 | { |
334 | #ifdef LUAJIT_UNPROTECT_MCODE | ||
335 | UNUSED(J); UNUSED(ptr); UNUSED(finish); | ||
336 | return NULL; | ||
337 | #else | ||
338 | if (finish) { | 325 | if (finish) { |
326 | #if LUAJIT_SECURITY_MCODE | ||
339 | if (J->mcarea == ptr) | 327 | if (J->mcarea == ptr) |
340 | mcode_protect(J, MCPROT_RUN); | 328 | mcode_protect(J, MCPROT_RUN); |
341 | else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN))) | 329 | else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN))) |
342 | mcode_protfail(J); | 330 | mcode_protfail(J); |
331 | #endif | ||
343 | return NULL; | 332 | return NULL; |
344 | } else { | 333 | } else { |
345 | MCode *mc = J->mcarea; | 334 | MCode *mc = J->mcarea; |
346 | /* Try current area first to use the protection cache. */ | 335 | /* Try current area first to use the protection cache. */ |
347 | if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) { | 336 | if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) { |
337 | #if LUAJIT_SECURITY_MCODE | ||
348 | mcode_protect(J, MCPROT_GEN); | 338 | mcode_protect(J, MCPROT_GEN); |
339 | #endif | ||
349 | return mc; | 340 | return mc; |
350 | } | 341 | } |
351 | /* Otherwise search through the list of MCode areas. */ | 342 | /* Otherwise search through the list of MCode areas. */ |
352 | for (;;) { | 343 | for (;;) { |
353 | mc = ((MCLink *)mc)->next; | 344 | mc = ((MCLink *)mc)->next; |
354 | lua_assert(mc != NULL); | 345 | lj_assertJ(mc != NULL, "broken MCode area chain"); |
355 | if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) { | 346 | if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) { |
347 | #if LUAJIT_SECURITY_MCODE | ||
356 | if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN))) | 348 | if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN))) |
357 | mcode_protfail(J); | 349 | mcode_protfail(J); |
350 | #endif | ||
358 | return mc; | 351 | return mc; |
359 | } | 352 | } |
360 | } | 353 | } |
361 | } | 354 | } |
362 | #endif | ||
363 | } | 355 | } |
364 | 356 | ||
365 | /* Limit of MCode reservation reached. */ | 357 | /* Limit of MCode reservation reached. */ |
diff --git a/src/lj_meta.c b/src/lj_meta.c index 1d4d2234..5324c666 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include "lj_obj.h" | 12 | #include "lj_obj.h" |
13 | #include "lj_gc.h" | 13 | #include "lj_gc.h" |
14 | #include "lj_err.h" | 14 | #include "lj_err.h" |
15 | #include "lj_buf.h" | ||
15 | #include "lj_str.h" | 16 | #include "lj_str.h" |
16 | #include "lj_tab.h" | 17 | #include "lj_tab.h" |
17 | #include "lj_meta.h" | 18 | #include "lj_meta.h" |
@@ -19,6 +20,8 @@ | |||
19 | #include "lj_bc.h" | 20 | #include "lj_bc.h" |
20 | #include "lj_vm.h" | 21 | #include "lj_vm.h" |
21 | #include "lj_strscan.h" | 22 | #include "lj_strscan.h" |
23 | #include "lj_strfmt.h" | ||
24 | #include "lj_lib.h" | ||
22 | 25 | ||
23 | /* -- Metamethod handling ------------------------------------------------- */ | 26 | /* -- Metamethod handling ------------------------------------------------- */ |
24 | 27 | ||
@@ -44,7 +47,7 @@ void lj_meta_init(lua_State *L) | |||
44 | cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name) | 47 | cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name) |
45 | { | 48 | { |
46 | cTValue *mo = lj_tab_getstr(mt, name); | 49 | cTValue *mo = lj_tab_getstr(mt, name); |
47 | lua_assert(mm <= MM_FAST); | 50 | lj_assertX(mm <= MM_FAST, "bad metamethod %d", mm); |
48 | if (!mo || tvisnil(mo)) { /* No metamethod? */ | 51 | if (!mo || tvisnil(mo)) { /* No metamethod? */ |
49 | mt->nomm |= (uint8_t)(1u<<mm); /* Set negative cache flag. */ | 52 | mt->nomm |= (uint8_t)(1u<<mm); /* Set negative cache flag. */ |
50 | return NULL; | 53 | return NULL; |
@@ -77,12 +80,16 @@ int lj_meta_tailcall(lua_State *L, cTValue *tv) | |||
77 | TValue *base = L->base; | 80 | TValue *base = L->base; |
78 | TValue *top = L->top; | 81 | TValue *top = L->top; |
79 | const BCIns *pc = frame_pc(base-1); /* Preserve old PC from frame. */ | 82 | const BCIns *pc = frame_pc(base-1); /* Preserve old PC from frame. */ |
80 | copyTV(L, base-1, tv); /* Replace frame with new object. */ | 83 | copyTV(L, base-1-LJ_FR2, tv); /* Replace frame with new object. */ |
81 | top->u32.lo = LJ_CONT_TAILCALL; | 84 | if (LJ_FR2) |
82 | setframe_pc(top, pc); | 85 | (top++)->u64 = LJ_CONT_TAILCALL; |
83 | setframe_gc(top+1, obj2gco(L)); /* Dummy frame object. */ | 86 | else |
84 | setframe_ftsz(top+1, (int)((char *)(top+2) - (char *)base) + FRAME_CONT); | 87 | top->u32.lo = LJ_CONT_TAILCALL; |
85 | L->base = L->top = top+2; | 88 | setframe_pc(top++, pc); |
89 | setframe_gc(top, obj2gco(L), LJ_TTHREAD); /* Dummy frame object. */ | ||
90 | if (LJ_FR2) top++; | ||
91 | setframe_ftsz(top, ((char *)(top+1) - (char *)base) + FRAME_CONT); | ||
92 | L->base = L->top = top+1; | ||
86 | /* | 93 | /* |
87 | ** before: [old_mo|PC] [... ...] | 94 | ** before: [old_mo|PC] [... ...] |
88 | ** ^base ^top | 95 | ** ^base ^top |
@@ -113,11 +120,13 @@ static TValue *mmcall(lua_State *L, ASMFunction cont, cTValue *mo, | |||
113 | */ | 120 | */ |
114 | TValue *top = L->top; | 121 | TValue *top = L->top; |
115 | if (curr_funcisL(L)) top = curr_topL(L); | 122 | if (curr_funcisL(L)) top = curr_topL(L); |
116 | setcont(top, cont); /* Assembler VM stores PC in upper word. */ | 123 | setcont(top++, cont); /* Assembler VM stores PC in upper word or FR2. */ |
117 | copyTV(L, top+1, mo); /* Store metamethod and two arguments. */ | 124 | if (LJ_FR2) setnilV(top++); |
118 | copyTV(L, top+2, a); | 125 | copyTV(L, top++, mo); /* Store metamethod and two arguments. */ |
119 | copyTV(L, top+3, b); | 126 | if (LJ_FR2) setnilV(top++); |
120 | return top+2; /* Return new base. */ | 127 | copyTV(L, top, a); |
128 | copyTV(L, top+1, b); | ||
129 | return top; /* Return new base. */ | ||
121 | } | 130 | } |
122 | 131 | ||
123 | /* -- C helpers for some instructions, called from assembler VM ----------- */ | 132 | /* -- C helpers for some instructions, called from assembler VM ----------- */ |
@@ -225,27 +234,14 @@ TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb, cTValue *rc, | |||
225 | } | 234 | } |
226 | } | 235 | } |
227 | 236 | ||
228 | /* In-place coercion of a number to a string. */ | ||
229 | static LJ_AINLINE int tostring(lua_State *L, TValue *o) | ||
230 | { | ||
231 | if (tvisstr(o)) { | ||
232 | return 1; | ||
233 | } else if (tvisnumber(o)) { | ||
234 | setstrV(L, o, lj_str_fromnumber(L, o)); | ||
235 | return 1; | ||
236 | } else { | ||
237 | return 0; | ||
238 | } | ||
239 | } | ||
240 | |||
241 | /* Helper for CAT. Coercion, iterative concat, __concat metamethod. */ | 237 | /* Helper for CAT. Coercion, iterative concat, __concat metamethod. */ |
242 | TValue *lj_meta_cat(lua_State *L, TValue *top, int left) | 238 | TValue *lj_meta_cat(lua_State *L, TValue *top, int left) |
243 | { | 239 | { |
244 | int fromc = 0; | 240 | int fromc = 0; |
245 | if (left < 0) { left = -left; fromc = 1; } | 241 | if (left < 0) { left = -left; fromc = 1; } |
246 | do { | 242 | do { |
247 | int n = 1; | 243 | if (!(tvisstr(top) || tvisnumber(top) || tvisbuf(top)) || |
248 | if (!(tvisstr(top-1) || tvisnumber(top-1)) || !tostring(L, top)) { | 244 | !(tvisstr(top-1) || tvisnumber(top-1) || tvisbuf(top-1))) { |
249 | cTValue *mo = lj_meta_lookup(L, top-1, MM_concat); | 245 | cTValue *mo = lj_meta_lookup(L, top-1, MM_concat); |
250 | if (tvisnil(mo)) { | 246 | if (tvisnil(mo)) { |
251 | mo = lj_meta_lookup(L, top, MM_concat); | 247 | mo = lj_meta_lookup(L, top, MM_concat); |
@@ -266,13 +262,12 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) | |||
266 | ** after mm: [...][CAT stack ...] <--push-- [result] | 262 | ** after mm: [...][CAT stack ...] <--push-- [result] |
267 | ** next step: [...][CAT stack .............] | 263 | ** next step: [...][CAT stack .............] |
268 | */ | 264 | */ |
269 | copyTV(L, top+2, top); /* Careful with the order of stack copies! */ | 265 | copyTV(L, top+2*LJ_FR2+2, top); /* Carefully ordered stack copies! */ |
270 | copyTV(L, top+1, top-1); | 266 | copyTV(L, top+2*LJ_FR2+1, top-1); |
271 | copyTV(L, top, mo); | 267 | copyTV(L, top+LJ_FR2, mo); |
272 | setcont(top-1, lj_cont_cat); | 268 | setcont(top-1, lj_cont_cat); |
269 | if (LJ_FR2) { setnilV(top); setnilV(top+2); top += 2; } | ||
273 | return top+1; /* Trigger metamethod call. */ | 270 | return top+1; /* Trigger metamethod call. */ |
274 | } else if (strV(top)->len == 0) { /* Shortcut. */ | ||
275 | (void)tostring(L, top-1); | ||
276 | } else { | 271 | } else { |
277 | /* Pick as many strings as possible from the top and concatenate them: | 272 | /* Pick as many strings as possible from the top and concatenate them: |
278 | ** | 273 | ** |
@@ -281,27 +276,33 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) | |||
281 | ** concat: [...][CAT stack ...] [result] | 276 | ** concat: [...][CAT stack ...] [result] |
282 | ** next step: [...][CAT stack ............] | 277 | ** next step: [...][CAT stack ............] |
283 | */ | 278 | */ |
284 | MSize tlen = strV(top)->len; | 279 | TValue *e, *o = top; |
285 | char *buffer; | 280 | uint64_t tlen = tvisstr(o) ? strV(o)->len : |
286 | int i; | 281 | tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM; |
287 | for (n = 1; n <= left && tostring(L, top-n); n++) { | 282 | SBuf *sb; |
288 | MSize len = strV(top-n)->len; | 283 | do { |
289 | if (len >= LJ_MAX_STR - tlen) | 284 | o--; tlen += tvisstr(o) ? strV(o)->len : |
290 | lj_err_msg(L, LJ_ERR_STROV); | 285 | tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM; |
291 | tlen += len; | 286 | } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1))); |
292 | } | 287 | if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV); |
293 | buffer = lj_str_needbuf(L, &G(L)->tmpbuf, tlen); | 288 | sb = lj_buf_tmp_(L); |
294 | n--; | 289 | lj_buf_more(sb, (MSize)tlen); |
295 | tlen = 0; | 290 | for (e = top, top = o; o <= e; o++) { |
296 | for (i = n; i >= 0; i--) { | 291 | if (tvisstr(o)) { |
297 | MSize len = strV(top-i)->len; | 292 | GCstr *s = strV(o); |
298 | memcpy(buffer + tlen, strVdata(top-i), len); | 293 | MSize len = s->len; |
299 | tlen += len; | 294 | lj_buf_putmem(sb, strdata(s), len); |
295 | } else if (tvisbuf(o)) { | ||
296 | SBufExt *sbx = bufV(o); | ||
297 | lj_buf_putmem(sb, sbx->r, sbufxlen(sbx)); | ||
298 | } else if (tvisint(o)) { | ||
299 | lj_strfmt_putint(sb, intV(o)); | ||
300 | } else { | ||
301 | lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)); | ||
302 | } | ||
300 | } | 303 | } |
301 | setstrV(L, top-n, lj_str_new(L, buffer, tlen)); | 304 | setstrV(L, top, lj_buf_str(L, sb)); |
302 | } | 305 | } |
303 | left -= n; | ||
304 | top -= n; | ||
305 | } while (left >= 1); | 306 | } while (left >= 1); |
306 | if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) { | 307 | if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) { |
307 | if (!fromc) L->top = curr_topL(L); | 308 | if (!fromc) L->top = curr_topL(L); |
@@ -338,12 +339,14 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne) | |||
338 | return (TValue *)(intptr_t)ne; | 339 | return (TValue *)(intptr_t)ne; |
339 | } | 340 | } |
340 | top = curr_top(L); | 341 | top = curr_top(L); |
341 | setcont(top, ne ? lj_cont_condf : lj_cont_condt); | 342 | setcont(top++, ne ? lj_cont_condf : lj_cont_condt); |
342 | copyTV(L, top+1, mo); | 343 | if (LJ_FR2) setnilV(top++); |
344 | copyTV(L, top++, mo); | ||
345 | if (LJ_FR2) setnilV(top++); | ||
343 | it = ~(uint32_t)o1->gch.gct; | 346 | it = ~(uint32_t)o1->gch.gct; |
344 | setgcV(L, top+2, o1, it); | 347 | setgcV(L, top, o1, it); |
345 | setgcV(L, top+3, o2, it); | 348 | setgcV(L, top+1, o2, it); |
346 | return top+2; /* Trigger metamethod call. */ | 349 | return top; /* Trigger metamethod call. */ |
347 | } | 350 | } |
348 | return (TValue *)(intptr_t)ne; | 351 | return (TValue *)(intptr_t)ne; |
349 | } | 352 | } |
@@ -365,8 +368,8 @@ TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins) | |||
365 | } else if (op == BC_ISEQN) { | 368 | } else if (op == BC_ISEQN) { |
366 | o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)]; | 369 | o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)]; |
367 | } else { | 370 | } else { |
368 | lua_assert(op == BC_ISEQP); | 371 | lj_assertL(op == BC_ISEQP, "bad bytecode op %d", op); |
369 | setitype(&tv, ~bc_d(ins)); | 372 | setpriV(&tv, ~bc_d(ins)); |
370 | o2 = &tv; | 373 | o2 = &tv; |
371 | } | 374 | } |
372 | mo = lj_meta_lookup(L, o1mm, MM_eq); | 375 | mo = lj_meta_lookup(L, o1mm, MM_eq); |
@@ -423,6 +426,18 @@ TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op) | |||
423 | } | 426 | } |
424 | } | 427 | } |
425 | 428 | ||
429 | /* Helper for ISTYPE and ISNUM. Implicit coercion or error. */ | ||
430 | void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp) | ||
431 | { | ||
432 | L->top = curr_topL(L); | ||
433 | ra++; tp--; | ||
434 | lj_assertL(LJ_DUALNUM || tp != ~LJ_TNUMX, "bad type for ISTYPE"); | ||
435 | if (LJ_DUALNUM && tp == ~LJ_TNUMX) lj_lib_checkint(L, ra); | ||
436 | else if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra); | ||
437 | else if (tp == ~LJ_TSTR) lj_lib_checkstr(L, ra); | ||
438 | else lj_err_argtype(L, ra, lj_obj_itypename[tp]); | ||
439 | } | ||
440 | |||
426 | /* Helper for calls. __call metamethod. */ | 441 | /* Helper for calls. __call metamethod. */ |
427 | void lj_meta_call(lua_State *L, TValue *func, TValue *top) | 442 | void lj_meta_call(lua_State *L, TValue *func, TValue *top) |
428 | { | 443 | { |
@@ -430,7 +445,8 @@ void lj_meta_call(lua_State *L, TValue *func, TValue *top) | |||
430 | TValue *p; | 445 | TValue *p; |
431 | if (!tvisfunc(mo)) | 446 | if (!tvisfunc(mo)) |
432 | lj_err_optype_call(L, func); | 447 | lj_err_optype_call(L, func); |
433 | for (p = top; p > func; p--) copyTV(L, p, p-1); | 448 | for (p = top; p > func+2*LJ_FR2; p--) copyTV(L, p, p-1); |
449 | if (LJ_FR2) copyTV(L, func+2, func); | ||
434 | copyTV(L, func, mo); | 450 | copyTV(L, func, mo); |
435 | } | 451 | } |
436 | 452 | ||
diff --git a/src/lj_meta.h b/src/lj_meta.h index 9c36aea5..3a6eaac2 100644 --- a/src/lj_meta.h +++ b/src/lj_meta.h | |||
@@ -31,6 +31,7 @@ LJ_FUNCA TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o); | |||
31 | LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); | 31 | LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); |
32 | LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); | 32 | LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); |
33 | LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); | 33 | LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); |
34 | LJ_FUNCA void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp); | ||
34 | LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); | 35 | LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); |
35 | LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o); | 36 | LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o); |
36 | 37 | ||
diff --git a/src/lj_obj.c b/src/lj_obj.c index 528b3a58..65cbe1a1 100644 --- a/src/lj_obj.c +++ b/src/lj_obj.c | |||
@@ -20,7 +20,7 @@ LJ_DATADEF const char *const lj_obj_itypename[] = { /* ORDER LJ_T */ | |||
20 | }; | 20 | }; |
21 | 21 | ||
22 | /* Compare two objects without calling metamethods. */ | 22 | /* Compare two objects without calling metamethods. */ |
23 | int lj_obj_equal(cTValue *o1, cTValue *o2) | 23 | int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2) |
24 | { | 24 | { |
25 | if (itype(o1) == itype(o2)) { | 25 | if (itype(o1) == itype(o2)) { |
26 | if (tvispri(o1)) | 26 | if (tvispri(o1)) |
@@ -33,3 +33,19 @@ int lj_obj_equal(cTValue *o1, cTValue *o2) | |||
33 | return numberVnum(o1) == numberVnum(o2); | 33 | return numberVnum(o1) == numberVnum(o2); |
34 | } | 34 | } |
35 | 35 | ||
36 | /* Return pointer to object or its object data. */ | ||
37 | const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o) | ||
38 | { | ||
39 | UNUSED(g); | ||
40 | if (tvisudata(o)) | ||
41 | return uddata(udataV(o)); | ||
42 | else if (tvislightud(o)) | ||
43 | return lightudV(g, o); | ||
44 | else if (LJ_HASFFI && tviscdata(o)) | ||
45 | return cdataptr(cdataV(o)); | ||
46 | else if (tvisgcv(o)) | ||
47 | return gcV(o); | ||
48 | else | ||
49 | return NULL; | ||
50 | } | ||
51 | |||
diff --git a/src/lj_obj.h b/src/lj_obj.h index 3e506bdb..0a63fddf 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h | |||
@@ -13,44 +13,81 @@ | |||
13 | #include "lj_def.h" | 13 | #include "lj_def.h" |
14 | #include "lj_arch.h" | 14 | #include "lj_arch.h" |
15 | 15 | ||
16 | /* -- Memory references (32 bit address space) ---------------------------- */ | 16 | /* -- Memory references --------------------------------------------------- */ |
17 | 17 | ||
18 | /* Memory size. */ | 18 | /* Memory and GC object sizes. */ |
19 | typedef uint32_t MSize; | 19 | typedef uint32_t MSize; |
20 | #if LJ_GC64 | ||
21 | typedef uint64_t GCSize; | ||
22 | #else | ||
23 | typedef uint32_t GCSize; | ||
24 | #endif | ||
20 | 25 | ||
21 | /* Memory reference */ | 26 | /* Memory reference */ |
22 | typedef struct MRef { | 27 | typedef struct MRef { |
28 | #if LJ_GC64 | ||
29 | uint64_t ptr64; /* True 64 bit pointer. */ | ||
30 | #else | ||
23 | uint32_t ptr32; /* Pseudo 32 bit pointer. */ | 31 | uint32_t ptr32; /* Pseudo 32 bit pointer. */ |
32 | #endif | ||
24 | } MRef; | 33 | } MRef; |
25 | 34 | ||
35 | #if LJ_GC64 | ||
36 | #define mref(r, t) ((t *)(void *)(r).ptr64) | ||
37 | #define mrefu(r) ((r).ptr64) | ||
38 | |||
39 | #define setmref(r, p) ((r).ptr64 = (uint64_t)(void *)(p)) | ||
40 | #define setmrefu(r, u) ((r).ptr64 = (uint64_t)(u)) | ||
41 | #define setmrefr(r, v) ((r).ptr64 = (v).ptr64) | ||
42 | #else | ||
26 | #define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32) | 43 | #define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32) |
44 | #define mrefu(r) ((r).ptr32) | ||
27 | 45 | ||
28 | #define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p)) | 46 | #define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p)) |
47 | #define setmrefu(r, u) ((r).ptr32 = (uint32_t)(u)) | ||
29 | #define setmrefr(r, v) ((r).ptr32 = (v).ptr32) | 48 | #define setmrefr(r, v) ((r).ptr32 = (v).ptr32) |
49 | #endif | ||
30 | 50 | ||
31 | /* -- GC object references (32 bit address space) ------------------------- */ | 51 | /* -- GC object references ------------------------------------------------ */ |
32 | 52 | ||
33 | /* GCobj reference */ | 53 | /* GCobj reference */ |
34 | typedef struct GCRef { | 54 | typedef struct GCRef { |
55 | #if LJ_GC64 | ||
56 | uint64_t gcptr64; /* True 64 bit pointer. */ | ||
57 | #else | ||
35 | uint32_t gcptr32; /* Pseudo 32 bit pointer. */ | 58 | uint32_t gcptr32; /* Pseudo 32 bit pointer. */ |
59 | #endif | ||
36 | } GCRef; | 60 | } GCRef; |
37 | 61 | ||
38 | /* Common GC header for all collectable objects. */ | 62 | /* Common GC header for all collectable objects. */ |
39 | #define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct | 63 | #define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct |
40 | /* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */ | 64 | /* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */ |
41 | 65 | ||
66 | #if LJ_GC64 | ||
67 | #define gcref(r) ((GCobj *)(r).gcptr64) | ||
68 | #define gcrefp(r, t) ((t *)(void *)(r).gcptr64) | ||
69 | #define gcrefu(r) ((r).gcptr64) | ||
70 | #define gcrefeq(r1, r2) ((r1).gcptr64 == (r2).gcptr64) | ||
71 | |||
72 | #define setgcref(r, gc) ((r).gcptr64 = (uint64_t)&(gc)->gch) | ||
73 | #define setgcreft(r, gc, it) \ | ||
74 | (r).gcptr64 = (uint64_t)&(gc)->gch | (((uint64_t)(it)) << 47) | ||
75 | #define setgcrefp(r, p) ((r).gcptr64 = (uint64_t)(p)) | ||
76 | #define setgcrefnull(r) ((r).gcptr64 = 0) | ||
77 | #define setgcrefr(r, v) ((r).gcptr64 = (v).gcptr64) | ||
78 | #else | ||
42 | #define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32) | 79 | #define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32) |
43 | #define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32) | 80 | #define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32) |
44 | #define gcrefu(r) ((r).gcptr32) | 81 | #define gcrefu(r) ((r).gcptr32) |
45 | #define gcrefi(r) ((int32_t)(r).gcptr32) | ||
46 | #define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32) | 82 | #define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32) |
47 | #define gcnext(gc) (gcref((gc)->gch.nextgc)) | ||
48 | 83 | ||
49 | #define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch) | 84 | #define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch) |
50 | #define setgcrefi(r, i) ((r).gcptr32 = (uint32_t)(i)) | ||
51 | #define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p)) | 85 | #define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p)) |
52 | #define setgcrefnull(r) ((r).gcptr32 = 0) | 86 | #define setgcrefnull(r) ((r).gcptr32 = 0) |
53 | #define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32) | 87 | #define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32) |
88 | #endif | ||
89 | |||
90 | #define gcnext(gc) (gcref((gc)->gch.nextgc)) | ||
54 | 91 | ||
55 | /* IMPORTANT NOTE: | 92 | /* IMPORTANT NOTE: |
56 | ** | 93 | ** |
@@ -119,11 +156,10 @@ typedef int32_t BCLine; /* Bytecode line number. */ | |||
119 | /* Internal assembler functions. Never call these directly from C. */ | 156 | /* Internal assembler functions. Never call these directly from C. */ |
120 | typedef void (*ASMFunction)(void); | 157 | typedef void (*ASMFunction)(void); |
121 | 158 | ||
122 | /* Resizable string buffer. Need this here, details in lj_str.h. */ | 159 | /* Resizable string buffer. Need this here, details in lj_buf.h. */ |
160 | #define SBufHeader char *w, *e, *b; MRef L | ||
123 | typedef struct SBuf { | 161 | typedef struct SBuf { |
124 | char *buf; /* String buffer base. */ | 162 | SBufHeader; |
125 | MSize n; /* String buffer length. */ | ||
126 | MSize sz; /* String buffer size. */ | ||
127 | } SBuf; | 163 | } SBuf; |
128 | 164 | ||
129 | /* -- Tags and values ----------------------------------------------------- */ | 165 | /* -- Tags and values ----------------------------------------------------- */ |
@@ -131,13 +167,23 @@ typedef struct SBuf { | |||
131 | /* Frame link. */ | 167 | /* Frame link. */ |
132 | typedef union { | 168 | typedef union { |
133 | int32_t ftsz; /* Frame type and size of previous frame. */ | 169 | int32_t ftsz; /* Frame type and size of previous frame. */ |
134 | MRef pcr; /* Overlaps PC for Lua frames. */ | 170 | MRef pcr; /* Or PC for Lua frames. */ |
135 | } FrameLink; | 171 | } FrameLink; |
136 | 172 | ||
137 | /* Tagged value. */ | 173 | /* Tagged value. */ |
138 | typedef LJ_ALIGN(8) union TValue { | 174 | typedef LJ_ALIGN(8) union TValue { |
139 | uint64_t u64; /* 64 bit pattern overlaps number. */ | 175 | uint64_t u64; /* 64 bit pattern overlaps number. */ |
140 | lua_Number n; /* Number object overlaps split tag/value object. */ | 176 | lua_Number n; /* Number object overlaps split tag/value object. */ |
177 | #if LJ_GC64 | ||
178 | GCRef gcr; /* GCobj reference with tag. */ | ||
179 | int64_t it64; | ||
180 | struct { | ||
181 | LJ_ENDIAN_LOHI( | ||
182 | int32_t i; /* Integer value. */ | ||
183 | , uint32_t it; /* Internal object tag. Must overlap MSW of number. */ | ||
184 | ) | ||
185 | }; | ||
186 | #else | ||
141 | struct { | 187 | struct { |
142 | LJ_ENDIAN_LOHI( | 188 | LJ_ENDIAN_LOHI( |
143 | union { | 189 | union { |
@@ -147,12 +193,17 @@ typedef LJ_ALIGN(8) union TValue { | |||
147 | , uint32_t it; /* Internal object tag. Must overlap MSW of number. */ | 193 | , uint32_t it; /* Internal object tag. Must overlap MSW of number. */ |
148 | ) | 194 | ) |
149 | }; | 195 | }; |
196 | #endif | ||
197 | #if LJ_FR2 | ||
198 | int64_t ftsz; /* Frame type and size of previous frame, or PC. */ | ||
199 | #else | ||
150 | struct { | 200 | struct { |
151 | LJ_ENDIAN_LOHI( | 201 | LJ_ENDIAN_LOHI( |
152 | GCRef func; /* Function for next frame (or dummy L). */ | 202 | GCRef func; /* Function for next frame (or dummy L). */ |
153 | , FrameLink tp; /* Link to previous frame. */ | 203 | , FrameLink tp; /* Link to previous frame. */ |
154 | ) | 204 | ) |
155 | } fr; | 205 | } fr; |
206 | #endif | ||
156 | struct { | 207 | struct { |
157 | LJ_ENDIAN_LOHI( | 208 | LJ_ENDIAN_LOHI( |
158 | uint32_t lo; /* Lower 32 bits of number. */ | 209 | uint32_t lo; /* Lower 32 bits of number. */ |
@@ -172,6 +223,8 @@ typedef const TValue cTValue; | |||
172 | 223 | ||
173 | /* Internal object tags. | 224 | /* Internal object tags. |
174 | ** | 225 | ** |
226 | ** Format for 32 bit GC references (!LJ_GC64): | ||
227 | ** | ||
175 | ** Internal tags overlap the MSW of a number object (must be a double). | 228 | ** Internal tags overlap the MSW of a number object (must be a double). |
176 | ** Interpreted as a double these are special NaNs. The FPU only generates | 229 | ** Interpreted as a double these are special NaNs. The FPU only generates |
177 | ** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available | 230 | ** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available |
@@ -181,11 +234,24 @@ typedef const TValue cTValue; | |||
181 | ** ---MSW---.---LSW--- | 234 | ** ---MSW---.---LSW--- |
182 | ** primitive types | itype | | | 235 | ** primitive types | itype | | |
183 | ** lightuserdata | itype | void * | (32 bit platforms) | 236 | ** lightuserdata | itype | void * | (32 bit platforms) |
184 | ** lightuserdata |ffff| void * | (64 bit platforms, 47 bit pointers) | 237 | ** lightuserdata |ffff|seg| ofs | (64 bit platforms) |
185 | ** GC objects | itype | GCRef | | 238 | ** GC objects | itype | GCRef | |
186 | ** int (LJ_DUALNUM)| itype | int | | 239 | ** int (LJ_DUALNUM)| itype | int | |
187 | ** number -------double------ | 240 | ** number -------double------ |
188 | ** | 241 | ** |
242 | ** Format for 64 bit GC references (LJ_GC64): | ||
243 | ** | ||
244 | ** The upper 13 bits must be 1 (0xfff8...) for a special NaN. The next | ||
245 | ** 4 bits hold the internal tag. The lowest 47 bits either hold a pointer, | ||
246 | ** a zero-extended 32 bit integer or all bits set to 1 for primitive types. | ||
247 | ** | ||
248 | ** ------MSW------.------LSW------ | ||
249 | ** primitive types |1..1|itype|1..................1| | ||
250 | ** GC objects |1..1|itype|-------GCRef--------| | ||
251 | ** lightuserdata |1..1|itype|seg|------ofs-------| | ||
252 | ** int (LJ_DUALNUM) |1..1|itype|0..0|-----int-------| | ||
253 | ** number ------------double------------- | ||
254 | ** | ||
189 | ** ORDER LJ_T | 255 | ** ORDER LJ_T |
190 | ** Primitive types nil/false/true must be first, lightuserdata next. | 256 | ** Primitive types nil/false/true must be first, lightuserdata next. |
191 | ** GC objects are at the end, table/userdata must be lowest. | 257 | ** GC objects are at the end, table/userdata must be lowest. |
@@ -208,7 +274,7 @@ typedef const TValue cTValue; | |||
208 | #define LJ_TNUMX (~13u) | 274 | #define LJ_TNUMX (~13u) |
209 | 275 | ||
210 | /* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */ | 276 | /* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */ |
211 | #if LJ_64 | 277 | #if LJ_64 && !LJ_GC64 |
212 | #define LJ_TISNUM 0xfffeffffu | 278 | #define LJ_TISNUM 0xfffeffffu |
213 | #else | 279 | #else |
214 | #define LJ_TISNUM LJ_TNUMX | 280 | #define LJ_TISNUM LJ_TNUMX |
@@ -218,14 +284,31 @@ typedef const TValue cTValue; | |||
218 | #define LJ_TISGCV (LJ_TSTR+1) | 284 | #define LJ_TISGCV (LJ_TSTR+1) |
219 | #define LJ_TISTABUD LJ_TTAB | 285 | #define LJ_TISTABUD LJ_TTAB |
220 | 286 | ||
287 | /* Type marker for slot holding a traversal index. Must be lightuserdata. */ | ||
288 | #define LJ_KEYINDEX 0xfffe7fffu | ||
289 | |||
290 | #if LJ_GC64 | ||
291 | #define LJ_GCVMASK (((uint64_t)1 << 47) - 1) | ||
292 | #endif | ||
293 | |||
294 | #if LJ_64 | ||
295 | /* To stay within 47 bits, lightuserdata is segmented. */ | ||
296 | #define LJ_LIGHTUD_BITS_SEG 8 | ||
297 | #define LJ_LIGHTUD_BITS_LO (47 - LJ_LIGHTUD_BITS_SEG) | ||
298 | #endif | ||
299 | |||
221 | /* -- String object ------------------------------------------------------- */ | 300 | /* -- String object ------------------------------------------------------- */ |
222 | 301 | ||
302 | typedef uint32_t StrHash; /* String hash value. */ | ||
303 | typedef uint32_t StrID; /* String ID. */ | ||
304 | |||
223 | /* String object header. String payload follows. */ | 305 | /* String object header. String payload follows. */ |
224 | typedef struct GCstr { | 306 | typedef struct GCstr { |
225 | GCHeader; | 307 | GCHeader; |
226 | uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */ | 308 | uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */ |
227 | uint8_t unused; | 309 | uint8_t hashalg; /* Hash algorithm. */ |
228 | MSize hash; /* Hash of string. */ | 310 | StrID sid; /* Interned string ID. */ |
311 | StrHash hash; /* Hash of string. */ | ||
229 | MSize len; /* Size of string. */ | 312 | MSize len; /* Size of string. */ |
230 | } GCstr; | 313 | } GCstr; |
231 | 314 | ||
@@ -233,7 +316,6 @@ typedef struct GCstr { | |||
233 | #define strdata(s) ((const char *)((s)+1)) | 316 | #define strdata(s) ((const char *)((s)+1)) |
234 | #define strdatawr(s) ((char *)((s)+1)) | 317 | #define strdatawr(s) ((char *)((s)+1)) |
235 | #define strVdata(o) strdata(strV(o)) | 318 | #define strVdata(o) strdata(strV(o)) |
236 | #define sizestring(s) (sizeof(struct GCstr)+(s)->len+1) | ||
237 | 319 | ||
238 | /* -- Userdata object ----------------------------------------------------- */ | 320 | /* -- Userdata object ----------------------------------------------------- */ |
239 | 321 | ||
@@ -253,6 +335,7 @@ enum { | |||
253 | UDTYPE_USERDATA, /* Regular userdata. */ | 335 | UDTYPE_USERDATA, /* Regular userdata. */ |
254 | UDTYPE_IO_FILE, /* I/O library FILE. */ | 336 | UDTYPE_IO_FILE, /* I/O library FILE. */ |
255 | UDTYPE_FFI_CLIB, /* FFI C library namespace. */ | 337 | UDTYPE_FFI_CLIB, /* FFI C library namespace. */ |
338 | UDTYPE_BUFFER, /* String buffer. */ | ||
256 | UDTYPE__MAX | 339 | UDTYPE__MAX |
257 | }; | 340 | }; |
258 | 341 | ||
@@ -291,6 +374,9 @@ typedef struct GCproto { | |||
291 | uint8_t numparams; /* Number of parameters. */ | 374 | uint8_t numparams; /* Number of parameters. */ |
292 | uint8_t framesize; /* Fixed frame size. */ | 375 | uint8_t framesize; /* Fixed frame size. */ |
293 | MSize sizebc; /* Number of bytecode instructions. */ | 376 | MSize sizebc; /* Number of bytecode instructions. */ |
377 | #if LJ_GC64 | ||
378 | uint32_t unused_gc64; | ||
379 | #endif | ||
294 | GCRef gclist; | 380 | GCRef gclist; |
295 | MRef k; /* Split constant array (points to the middle). */ | 381 | MRef k; /* Split constant array (points to the middle). */ |
296 | MRef uv; /* Upvalue list. local slot|0x8000 or parent uv idx. */ | 382 | MRef uv; /* Upvalue list. local slot|0x8000 or parent uv idx. */ |
@@ -402,7 +488,9 @@ typedef struct Node { | |||
402 | TValue val; /* Value object. Must be first field. */ | 488 | TValue val; /* Value object. Must be first field. */ |
403 | TValue key; /* Key object. */ | 489 | TValue key; /* Key object. */ |
404 | MRef next; /* Hash chain. */ | 490 | MRef next; /* Hash chain. */ |
491 | #if !LJ_GC64 | ||
405 | MRef freetop; /* Top of free elements (stored in t->node[0]). */ | 492 | MRef freetop; /* Top of free elements (stored in t->node[0]). */ |
493 | #endif | ||
406 | } Node; | 494 | } Node; |
407 | 495 | ||
408 | LJ_STATIC_ASSERT(offsetof(Node, val) == 0); | 496 | LJ_STATIC_ASSERT(offsetof(Node, val) == 0); |
@@ -417,12 +505,22 @@ typedef struct GCtab { | |||
417 | MRef node; /* Hash part. */ | 505 | MRef node; /* Hash part. */ |
418 | uint32_t asize; /* Size of array part (keys [0, asize-1]). */ | 506 | uint32_t asize; /* Size of array part (keys [0, asize-1]). */ |
419 | uint32_t hmask; /* Hash part mask (size of hash part - 1). */ | 507 | uint32_t hmask; /* Hash part mask (size of hash part - 1). */ |
508 | #if LJ_GC64 | ||
509 | MRef freetop; /* Top of free elements. */ | ||
510 | #endif | ||
420 | } GCtab; | 511 | } GCtab; |
421 | 512 | ||
422 | #define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab)) | 513 | #define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab)) |
423 | #define tabref(r) (&gcref((r))->tab) | 514 | #define tabref(r) (&gcref((r))->tab) |
424 | #define noderef(r) (mref((r), Node)) | 515 | #define noderef(r) (mref((r), Node)) |
425 | #define nextnode(n) (mref((n)->next, Node)) | 516 | #define nextnode(n) (mref((n)->next, Node)) |
517 | #if LJ_GC64 | ||
518 | #define getfreetop(t, n) (noderef((t)->freetop)) | ||
519 | #define setfreetop(t, n, v) (setmref((t)->freetop, (v))) | ||
520 | #else | ||
521 | #define getfreetop(t, n) (noderef((n)->freetop)) | ||
522 | #define setfreetop(t, n, v) (setmref((n)->freetop, (v))) | ||
523 | #endif | ||
426 | 524 | ||
427 | /* -- State objects ------------------------------------------------------- */ | 525 | /* -- State objects ------------------------------------------------------- */ |
428 | 526 | ||
@@ -488,13 +586,18 @@ typedef enum { | |||
488 | #define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)]) | 586 | #define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)]) |
489 | #define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)])) | 587 | #define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)])) |
490 | 588 | ||
589 | /* Garbage collector state. */ | ||
491 | typedef struct GCState { | 590 | typedef struct GCState { |
492 | MSize total; /* Memory currently allocated. */ | 591 | GCSize total; /* Memory currently allocated. */ |
493 | MSize threshold; /* Memory threshold. */ | 592 | GCSize threshold; /* Memory threshold. */ |
494 | uint8_t currentwhite; /* Current white color. */ | 593 | uint8_t currentwhite; /* Current white color. */ |
495 | uint8_t state; /* GC state. */ | 594 | uint8_t state; /* GC state. */ |
496 | uint8_t nocdatafin; /* No cdata finalizer called. */ | 595 | uint8_t nocdatafin; /* No cdata finalizer called. */ |
497 | uint8_t unused2; | 596 | #if LJ_64 |
597 | uint8_t lightudnum; /* Number of lightuserdata segments - 1. */ | ||
598 | #else | ||
599 | uint8_t unused1; | ||
600 | #endif | ||
498 | MSize sweepstr; /* Sweep position in string table. */ | 601 | MSize sweepstr; /* Sweep position in string table. */ |
499 | GCRef root; /* List of all collectable objects. */ | 602 | GCRef root; /* List of all collectable objects. */ |
500 | MRef sweep; /* Sweep position in root list. */ | 603 | MRef sweep; /* Sweep position in root list. */ |
@@ -502,42 +605,57 @@ typedef struct GCState { | |||
502 | GCRef grayagain; /* List of objects for atomic traversal. */ | 605 | GCRef grayagain; /* List of objects for atomic traversal. */ |
503 | GCRef weak; /* List of weak tables (to be cleared). */ | 606 | GCRef weak; /* List of weak tables (to be cleared). */ |
504 | GCRef mmudata; /* List of userdata (to be finalized). */ | 607 | GCRef mmudata; /* List of userdata (to be finalized). */ |
608 | GCSize debt; /* Debt (how much GC is behind schedule). */ | ||
609 | GCSize estimate; /* Estimate of memory actually in use. */ | ||
505 | MSize stepmul; /* Incremental GC step granularity. */ | 610 | MSize stepmul; /* Incremental GC step granularity. */ |
506 | MSize debt; /* Debt (how much GC is behind schedule). */ | ||
507 | MSize estimate; /* Estimate of memory actually in use. */ | ||
508 | MSize pause; /* Pause between successive GC cycles. */ | 611 | MSize pause; /* Pause between successive GC cycles. */ |
612 | #if LJ_64 | ||
613 | MRef lightudseg; /* Upper bits of lightuserdata segments. */ | ||
614 | #endif | ||
509 | } GCState; | 615 | } GCState; |
510 | 616 | ||
617 | /* String interning state. */ | ||
618 | typedef struct StrInternState { | ||
619 | GCRef *tab; /* String hash table anchors. */ | ||
620 | MSize mask; /* String hash mask (size of hash table - 1). */ | ||
621 | MSize num; /* Number of strings in hash table. */ | ||
622 | StrID id; /* Next string ID. */ | ||
623 | uint8_t idreseed; /* String ID reseed counter. */ | ||
624 | uint8_t second; /* String interning table uses secondary hashing. */ | ||
625 | uint8_t unused1; | ||
626 | uint8_t unused2; | ||
627 | LJ_ALIGN(8) uint64_t seed; /* Random string seed. */ | ||
628 | } StrInternState; | ||
629 | |||
511 | /* Global state, shared by all threads of a Lua universe. */ | 630 | /* Global state, shared by all threads of a Lua universe. */ |
512 | typedef struct global_State { | 631 | typedef struct global_State { |
513 | GCRef *strhash; /* String hash table (hash chain anchors). */ | ||
514 | MSize strmask; /* String hash mask (size of hash table - 1). */ | ||
515 | MSize strnum; /* Number of strings in hash table. */ | ||
516 | lua_Alloc allocf; /* Memory allocator. */ | 632 | lua_Alloc allocf; /* Memory allocator. */ |
517 | void *allocd; /* Memory allocator data. */ | 633 | void *allocd; /* Memory allocator data. */ |
518 | GCState gc; /* Garbage collector. */ | 634 | GCState gc; /* Garbage collector. */ |
519 | SBuf tmpbuf; /* Temporary buffer for string concatenation. */ | ||
520 | Node nilnode; /* Fallback 1-element hash part (nil key and value). */ | ||
521 | GCstr strempty; /* Empty string. */ | 635 | GCstr strempty; /* Empty string. */ |
522 | uint8_t stremptyz; /* Zero terminator of empty string. */ | 636 | uint8_t stremptyz; /* Zero terminator of empty string. */ |
523 | uint8_t hookmask; /* Hook mask. */ | 637 | uint8_t hookmask; /* Hook mask. */ |
524 | uint8_t dispatchmode; /* Dispatch mode. */ | 638 | uint8_t dispatchmode; /* Dispatch mode. */ |
525 | uint8_t vmevmask; /* VM event mask. */ | 639 | uint8_t vmevmask; /* VM event mask. */ |
640 | StrInternState str; /* String interning. */ | ||
641 | volatile int32_t vmstate; /* VM state or current JIT code trace number. */ | ||
526 | GCRef mainthref; /* Link to main thread. */ | 642 | GCRef mainthref; /* Link to main thread. */ |
527 | TValue registrytv; /* Anchor for registry. */ | 643 | SBuf tmpbuf; /* Temporary string buffer. */ |
528 | TValue tmptv, tmptv2; /* Temporary TValues. */ | 644 | TValue tmptv, tmptv2; /* Temporary TValues. */ |
645 | Node nilnode; /* Fallback 1-element hash part (nil key and value). */ | ||
646 | TValue registrytv; /* Anchor for registry. */ | ||
529 | GCupval uvhead; /* Head of double-linked list of all open upvalues. */ | 647 | GCupval uvhead; /* Head of double-linked list of all open upvalues. */ |
530 | int32_t hookcount; /* Instruction hook countdown. */ | 648 | int32_t hookcount; /* Instruction hook countdown. */ |
531 | int32_t hookcstart; /* Start count for instruction hook counter. */ | 649 | int32_t hookcstart; /* Start count for instruction hook counter. */ |
532 | lua_Hook hookf; /* Hook function. */ | 650 | lua_Hook hookf; /* Hook function. */ |
533 | lua_CFunction wrapf; /* Wrapper for C function calls. */ | 651 | lua_CFunction wrapf; /* Wrapper for C function calls. */ |
534 | lua_CFunction panic; /* Called as a last resort for errors. */ | 652 | lua_CFunction panic; /* Called as a last resort for errors. */ |
535 | volatile int32_t vmstate; /* VM state or current JIT code trace number. */ | ||
536 | BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */ | 653 | BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */ |
537 | BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */ | 654 | BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */ |
538 | GCRef jit_L; /* Current JIT code lua_State or NULL. */ | 655 | GCRef cur_L; /* Currently executing lua_State. */ |
539 | MRef jit_base; /* Current JIT code L->base. */ | 656 | MRef jit_base; /* Current JIT code L->base or NULL. */ |
540 | MRef ctype_state; /* Pointer to C type state. */ | 657 | MRef ctype_state; /* Pointer to C type state. */ |
658 | PRNGState prng; /* Global PRNG state. */ | ||
541 | GCRef gcroot[GCROOT_MAX]; /* GC roots. */ | 659 | GCRef gcroot[GCROOT_MAX]; /* GC roots. */ |
542 | } global_State; | 660 | } global_State; |
543 | 661 | ||
@@ -553,9 +671,11 @@ typedef struct global_State { | |||
553 | #define HOOK_ACTIVE_SHIFT 4 | 671 | #define HOOK_ACTIVE_SHIFT 4 |
554 | #define HOOK_VMEVENT 0x20 | 672 | #define HOOK_VMEVENT 0x20 |
555 | #define HOOK_GC 0x40 | 673 | #define HOOK_GC 0x40 |
674 | #define HOOK_PROFILE 0x80 | ||
556 | #define hook_active(g) ((g)->hookmask & HOOK_ACTIVE) | 675 | #define hook_active(g) ((g)->hookmask & HOOK_ACTIVE) |
557 | #define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE) | 676 | #define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE) |
558 | #define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC)) | 677 | #define hook_entergc(g) \ |
678 | ((g)->hookmask = ((g)->hookmask | (HOOK_ACTIVE|HOOK_GC)) & ~HOOK_PROFILE) | ||
559 | #define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT)) | 679 | #define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT)) |
560 | #define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE) | 680 | #define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE) |
561 | #define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK) | 681 | #define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK) |
@@ -583,12 +703,23 @@ struct lua_State { | |||
583 | #define registry(L) (&G(L)->registrytv) | 703 | #define registry(L) (&G(L)->registrytv) |
584 | 704 | ||
585 | /* Macros to access the currently executing (Lua) function. */ | 705 | /* Macros to access the currently executing (Lua) function. */ |
706 | #if LJ_GC64 | ||
707 | #define curr_func(L) (&gcval(L->base-2)->fn) | ||
708 | #elif LJ_FR2 | ||
709 | #define curr_func(L) (&gcref((L->base-2)->gcr)->fn) | ||
710 | #else | ||
586 | #define curr_func(L) (&gcref((L->base-1)->fr.func)->fn) | 711 | #define curr_func(L) (&gcref((L->base-1)->fr.func)->fn) |
712 | #endif | ||
587 | #define curr_funcisL(L) (isluafunc(curr_func(L))) | 713 | #define curr_funcisL(L) (isluafunc(curr_func(L))) |
588 | #define curr_proto(L) (funcproto(curr_func(L))) | 714 | #define curr_proto(L) (funcproto(curr_func(L))) |
589 | #define curr_topL(L) (L->base + curr_proto(L)->framesize) | 715 | #define curr_topL(L) (L->base + curr_proto(L)->framesize) |
590 | #define curr_top(L) (curr_funcisL(L) ? curr_topL(L) : L->top) | 716 | #define curr_top(L) (curr_funcisL(L) ? curr_topL(L) : L->top) |
591 | 717 | ||
718 | #if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) | ||
719 | LJ_FUNC_NORET void lj_assert_fail(global_State *g, const char *file, int line, | ||
720 | const char *func, const char *fmt, ...); | ||
721 | #endif | ||
722 | |||
592 | /* -- GC object definition and conversions -------------------------------- */ | 723 | /* -- GC object definition and conversions -------------------------------- */ |
593 | 724 | ||
594 | /* GC header for generic access to common fields of GC objects. */ | 725 | /* GC header for generic access to common fields of GC objects. */ |
@@ -642,17 +773,18 @@ typedef union GCobj { | |||
642 | 773 | ||
643 | /* -- TValue getters/setters ---------------------------------------------- */ | 774 | /* -- TValue getters/setters ---------------------------------------------- */ |
644 | 775 | ||
645 | #ifdef LUA_USE_ASSERT | ||
646 | #include "lj_gc.h" | ||
647 | #endif | ||
648 | |||
649 | /* Macros to test types. */ | 776 | /* Macros to test types. */ |
777 | #if LJ_GC64 | ||
778 | #define itype(o) ((uint32_t)((o)->it64 >> 47)) | ||
779 | #define tvisnil(o) ((o)->it64 == -1) | ||
780 | #else | ||
650 | #define itype(o) ((o)->it) | 781 | #define itype(o) ((o)->it) |
651 | #define tvisnil(o) (itype(o) == LJ_TNIL) | 782 | #define tvisnil(o) (itype(o) == LJ_TNIL) |
783 | #endif | ||
652 | #define tvisfalse(o) (itype(o) == LJ_TFALSE) | 784 | #define tvisfalse(o) (itype(o) == LJ_TFALSE) |
653 | #define tvistrue(o) (itype(o) == LJ_TTRUE) | 785 | #define tvistrue(o) (itype(o) == LJ_TTRUE) |
654 | #define tvisbool(o) (tvisfalse(o) || tvistrue(o)) | 786 | #define tvisbool(o) (tvisfalse(o) || tvistrue(o)) |
655 | #if LJ_64 | 787 | #if LJ_64 && !LJ_GC64 |
656 | #define tvislightud(o) (((int32_t)itype(o) >> 15) == -2) | 788 | #define tvislightud(o) (((int32_t)itype(o) >> 15) == -2) |
657 | #else | 789 | #else |
658 | #define tvislightud(o) (itype(o) == LJ_TLIGHTUD) | 790 | #define tvislightud(o) (itype(o) == LJ_TLIGHTUD) |
@@ -686,7 +818,7 @@ typedef union GCobj { | |||
686 | #define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64) | 818 | #define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64) |
687 | 819 | ||
688 | /* Macros to convert type ids. */ | 820 | /* Macros to convert type ids. */ |
689 | #if LJ_64 | 821 | #if LJ_64 && !LJ_GC64 |
690 | #define itypemap(o) \ | 822 | #define itypemap(o) \ |
691 | (tvisnumber(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o)) | 823 | (tvisnumber(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o)) |
692 | #else | 824 | #else |
@@ -694,13 +826,30 @@ typedef union GCobj { | |||
694 | #endif | 826 | #endif |
695 | 827 | ||
696 | /* Macros to get tagged values. */ | 828 | /* Macros to get tagged values. */ |
829 | #if LJ_GC64 | ||
830 | #define gcval(o) ((GCobj *)(gcrefu((o)->gcr) & LJ_GCVMASK)) | ||
831 | #else | ||
697 | #define gcval(o) (gcref((o)->gcr)) | 832 | #define gcval(o) (gcref((o)->gcr)) |
698 | #define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - (o)->it)) | 833 | #endif |
834 | #define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - itype(o))) | ||
699 | #if LJ_64 | 835 | #if LJ_64 |
700 | #define lightudV(o) \ | 836 | #define lightudseg(u) \ |
701 | check_exp(tvislightud(o), (void *)((o)->u64 & U64x(00007fff,ffffffff))) | 837 | (((u) >> LJ_LIGHTUD_BITS_LO) & ((1 << LJ_LIGHTUD_BITS_SEG)-1)) |
838 | #define lightudlo(u) \ | ||
839 | ((u) & (((uint64_t)1 << LJ_LIGHTUD_BITS_LO) - 1)) | ||
840 | #define lightudup(p) \ | ||
841 | ((uint32_t)(((p) >> LJ_LIGHTUD_BITS_LO) << (LJ_LIGHTUD_BITS_LO-32))) | ||
842 | static LJ_AINLINE void *lightudV(global_State *g, cTValue *o) | ||
843 | { | ||
844 | uint64_t u = o->u64; | ||
845 | uint64_t seg = lightudseg(u); | ||
846 | uint32_t *segmap = mref(g->gc.lightudseg, uint32_t); | ||
847 | lj_assertG(tvislightud(o), "lightuserdata expected"); | ||
848 | lj_assertG(seg <= g->gc.lightudnum, "bad lightuserdata segment %d", seg); | ||
849 | return (void *)(((uint64_t)segmap[seg] << 32) | lightudlo(u)); | ||
850 | } | ||
702 | #else | 851 | #else |
703 | #define lightudV(o) check_exp(tvislightud(o), gcrefp((o)->gcr, void)) | 852 | #define lightudV(g, o) check_exp(tvislightud(o), gcrefp((o)->gcr, void)) |
704 | #endif | 853 | #endif |
705 | #define gcV(o) check_exp(tvisgcv(o), gcval(o)) | 854 | #define gcV(o) check_exp(tvisgcv(o), gcval(o)) |
706 | #define strV(o) check_exp(tvisstr(o), &gcval(o)->str) | 855 | #define strV(o) check_exp(tvisstr(o), &gcval(o)->str) |
@@ -714,40 +863,70 @@ typedef union GCobj { | |||
714 | #define intV(o) check_exp(tvisint(o), (int32_t)(o)->i) | 863 | #define intV(o) check_exp(tvisint(o), (int32_t)(o)->i) |
715 | 864 | ||
716 | /* Macros to set tagged values. */ | 865 | /* Macros to set tagged values. */ |
866 | #if LJ_GC64 | ||
867 | #define setitype(o, i) ((o)->it = ((i) << 15)) | ||
868 | #define setnilV(o) ((o)->it64 = -1) | ||
869 | #define setpriV(o, x) ((o)->it64 = (int64_t)~((uint64_t)~(x)<<47)) | ||
870 | #define setboolV(o, x) ((o)->it64 = (int64_t)~((uint64_t)((x)+1)<<47)) | ||
871 | #else | ||
717 | #define setitype(o, i) ((o)->it = (i)) | 872 | #define setitype(o, i) ((o)->it = (i)) |
718 | #define setnilV(o) ((o)->it = LJ_TNIL) | 873 | #define setnilV(o) ((o)->it = LJ_TNIL) |
719 | #define setboolV(o, x) ((o)->it = LJ_TFALSE-(uint32_t)(x)) | 874 | #define setboolV(o, x) ((o)->it = LJ_TFALSE-(uint32_t)(x)) |
875 | #define setpriV(o, i) (setitype((o), (i))) | ||
876 | #endif | ||
720 | 877 | ||
721 | static LJ_AINLINE void setlightudV(TValue *o, void *p) | 878 | static LJ_AINLINE void setrawlightudV(TValue *o, void *p) |
722 | { | 879 | { |
723 | #if LJ_64 | 880 | #if LJ_GC64 |
881 | o->u64 = (uint64_t)p | (((uint64_t)LJ_TLIGHTUD) << 47); | ||
882 | #elif LJ_64 | ||
724 | o->u64 = (uint64_t)p | (((uint64_t)0xffff) << 48); | 883 | o->u64 = (uint64_t)p | (((uint64_t)0xffff) << 48); |
725 | #else | 884 | #else |
726 | setgcrefp(o->gcr, p); setitype(o, LJ_TLIGHTUD); | 885 | setgcrefp(o->gcr, p); setitype(o, LJ_TLIGHTUD); |
727 | #endif | 886 | #endif |
728 | } | 887 | } |
729 | 888 | ||
730 | #if LJ_64 | 889 | #if LJ_FR2 || LJ_32 |
731 | #define checklightudptr(L, p) \ | 890 | #define contptr(f) ((void *)(f)) |
732 | (((uint64_t)(p) >> 47) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p)) | 891 | #define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)contptr(f)) |
892 | #else | ||
893 | #define contptr(f) \ | ||
894 | ((void *)(uintptr_t)(uint32_t)((intptr_t)(f) - (intptr_t)lj_vm_asm_begin)) | ||
733 | #define setcont(o, f) \ | 895 | #define setcont(o, f) \ |
734 | ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin) | 896 | ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin) |
735 | #else | ||
736 | #define checklightudptr(L, p) (p) | ||
737 | #define setcont(o, f) setlightudV((o), (void *)(f)) | ||
738 | #endif | 897 | #endif |
739 | 898 | ||
740 | #define tvchecklive(L, o) \ | 899 | static LJ_AINLINE void checklivetv(lua_State *L, TValue *o, const char *msg) |
741 | UNUSED(L), lua_assert(!tvisgcv(o) || \ | 900 | { |
742 | ((~itype(o) == gcval(o)->gch.gct) && !isdead(G(L), gcval(o)))) | 901 | UNUSED(L); UNUSED(o); UNUSED(msg); |
902 | #if LUA_USE_ASSERT | ||
903 | if (tvisgcv(o)) { | ||
904 | lj_assertL(~itype(o) == gcval(o)->gch.gct, | ||
905 | "mismatch of TValue type %d vs GC type %d", | ||
906 | ~itype(o), gcval(o)->gch.gct); | ||
907 | /* Copy of isdead check from lj_gc.h to avoid circular include. */ | ||
908 | lj_assertL(!(gcval(o)->gch.marked & (G(L)->gc.currentwhite ^ 3) & 3), msg); | ||
909 | } | ||
910 | #endif | ||
911 | } | ||
912 | |||
913 | static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype) | ||
914 | { | ||
915 | #if LJ_GC64 | ||
916 | setgcreft(o->gcr, v, itype); | ||
917 | #else | ||
918 | setgcref(o->gcr, v); setitype(o, itype); | ||
919 | #endif | ||
920 | } | ||
743 | 921 | ||
744 | static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t itype) | 922 | static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it) |
745 | { | 923 | { |
746 | setgcref(o->gcr, v); setitype(o, itype); tvchecklive(L, o); | 924 | setgcVraw(o, v, it); |
925 | checklivetv(L, o, "store to dead GC object"); | ||
747 | } | 926 | } |
748 | 927 | ||
749 | #define define_setV(name, type, tag) \ | 928 | #define define_setV(name, type, tag) \ |
750 | static LJ_AINLINE void name(lua_State *L, TValue *o, type *v) \ | 929 | static LJ_AINLINE void name(lua_State *L, TValue *o, const type *v) \ |
751 | { \ | 930 | { \ |
752 | setgcV(L, o, obj2gco(v), tag); \ | 931 | setgcV(L, o, obj2gco(v), tag); \ |
753 | } | 932 | } |
@@ -790,13 +969,17 @@ static LJ_AINLINE void setint64V(TValue *o, int64_t i) | |||
790 | /* Copy tagged values. */ | 969 | /* Copy tagged values. */ |
791 | static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) | 970 | static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) |
792 | { | 971 | { |
793 | *o1 = *o2; tvchecklive(L, o1); | 972 | *o1 = *o2; |
973 | checklivetv(L, o1, "copy of dead GC object"); | ||
794 | } | 974 | } |
795 | 975 | ||
796 | /* -- Number to integer conversion ---------------------------------------- */ | 976 | /* -- Number to integer conversion ---------------------------------------- */ |
797 | 977 | ||
798 | #if LJ_SOFTFP | 978 | #if LJ_SOFTFP |
799 | LJ_ASMF int32_t lj_vm_tobit(double x); | 979 | LJ_ASMF int32_t lj_vm_tobit(double x); |
980 | #if LJ_TARGET_MIPS64 | ||
981 | LJ_ASMF int32_t lj_vm_tointg(double x); | ||
982 | #endif | ||
800 | #endif | 983 | #endif |
801 | 984 | ||
802 | static LJ_AINLINE int32_t lj_num2bit(lua_Number n) | 985 | static LJ_AINLINE int32_t lj_num2bit(lua_Number n) |
@@ -810,11 +993,7 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n) | |||
810 | #endif | 993 | #endif |
811 | } | 994 | } |
812 | 995 | ||
813 | #if LJ_TARGET_X86 && !defined(__SSE2__) | ||
814 | #define lj_num2int(n) lj_num2bit((n)) | ||
815 | #else | ||
816 | #define lj_num2int(n) ((int32_t)(n)) | 996 | #define lj_num2int(n) ((int32_t)(n)) |
817 | #endif | ||
818 | 997 | ||
819 | /* | 998 | /* |
820 | ** This must match the JIT backend behavior. In particular for archs | 999 | ** This must match the JIT backend behavior. In particular for archs |
@@ -859,6 +1038,7 @@ LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1]; | |||
859 | #define lj_typename(o) (lj_obj_itypename[itypemap(o)]) | 1038 | #define lj_typename(o) (lj_obj_itypename[itypemap(o)]) |
860 | 1039 | ||
861 | /* Compare two objects without calling metamethods. */ | 1040 | /* Compare two objects without calling metamethods. */ |
862 | LJ_FUNC int lj_obj_equal(cTValue *o1, cTValue *o2); | 1041 | LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2); |
1042 | LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o); | ||
863 | 1043 | ||
864 | #endif | 1044 | #endif |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 82ed2d32..e3fe8bbf 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
@@ -14,18 +14,21 @@ | |||
14 | 14 | ||
15 | #if LJ_HASJIT | 15 | #if LJ_HASJIT |
16 | 16 | ||
17 | #include "lj_buf.h" | ||
17 | #include "lj_str.h" | 18 | #include "lj_str.h" |
18 | #include "lj_tab.h" | 19 | #include "lj_tab.h" |
19 | #include "lj_ir.h" | 20 | #include "lj_ir.h" |
20 | #include "lj_jit.h" | 21 | #include "lj_jit.h" |
22 | #include "lj_ircall.h" | ||
21 | #include "lj_iropt.h" | 23 | #include "lj_iropt.h" |
22 | #include "lj_trace.h" | 24 | #include "lj_trace.h" |
23 | #if LJ_HASFFI | 25 | #if LJ_HASFFI |
24 | #include "lj_ctype.h" | 26 | #include "lj_ctype.h" |
25 | #endif | ||
26 | #include "lj_carith.h" | 27 | #include "lj_carith.h" |
28 | #endif | ||
27 | #include "lj_vm.h" | 29 | #include "lj_vm.h" |
28 | #include "lj_strscan.h" | 30 | #include "lj_strscan.h" |
31 | #include "lj_strfmt.h" | ||
29 | 32 | ||
30 | /* Here's a short description how the FOLD engine processes instructions: | 33 | /* Here's a short description how the FOLD engine processes instructions: |
31 | ** | 34 | ** |
@@ -133,8 +136,8 @@ | |||
133 | /* Some local macros to save typing. Undef'd at the end. */ | 136 | /* Some local macros to save typing. Undef'd at the end. */ |
134 | #define IR(ref) (&J->cur.ir[(ref)]) | 137 | #define IR(ref) (&J->cur.ir[(ref)]) |
135 | #define fins (&J->fold.ins) | 138 | #define fins (&J->fold.ins) |
136 | #define fleft (&J->fold.left) | 139 | #define fleft (J->fold.left) |
137 | #define fright (&J->fold.right) | 140 | #define fright (J->fold.right) |
138 | #define knumleft (ir_knum(fleft)->n) | 141 | #define knumleft (ir_knum(fleft)->n) |
139 | #define knumright (ir_knum(fright)->n) | 142 | #define knumright (ir_knum(fright)->n) |
140 | 143 | ||
@@ -155,13 +158,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J); | |||
155 | 158 | ||
156 | /* Barrier to prevent folding across a GC step. | 159 | /* Barrier to prevent folding across a GC step. |
157 | ** GC steps can only happen at the head of a trace and at LOOP. | 160 | ** GC steps can only happen at the head of a trace and at LOOP. |
158 | ** And the GC is only driven forward if there is at least one allocation. | 161 | ** And the GC is only driven forward if there's at least one allocation. |
159 | */ | 162 | */ |
160 | #define gcstep_barrier(J, ref) \ | 163 | #define gcstep_barrier(J, ref) \ |
161 | ((ref) < J->chain[IR_LOOP] && \ | 164 | ((ref) < J->chain[IR_LOOP] && \ |
162 | (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ | 165 | (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ |
163 | J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ | 166 | J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ |
164 | J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) | 167 | J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \ |
168 | J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA])) | ||
165 | 169 | ||
166 | /* -- Constant folding for FP numbers ------------------------------------- */ | 170 | /* -- Constant folding for FP numbers ------------------------------------- */ |
167 | 171 | ||
@@ -169,9 +173,6 @@ LJFOLD(ADD KNUM KNUM) | |||
169 | LJFOLD(SUB KNUM KNUM) | 173 | LJFOLD(SUB KNUM KNUM) |
170 | LJFOLD(MUL KNUM KNUM) | 174 | LJFOLD(MUL KNUM KNUM) |
171 | LJFOLD(DIV KNUM KNUM) | 175 | LJFOLD(DIV KNUM KNUM) |
172 | LJFOLD(NEG KNUM KNUM) | ||
173 | LJFOLD(ABS KNUM KNUM) | ||
174 | LJFOLD(ATAN2 KNUM KNUM) | ||
175 | LJFOLD(LDEXP KNUM KNUM) | 176 | LJFOLD(LDEXP KNUM KNUM) |
176 | LJFOLD(MIN KNUM KNUM) | 177 | LJFOLD(MIN KNUM KNUM) |
177 | LJFOLD(MAX KNUM KNUM) | 178 | LJFOLD(MAX KNUM KNUM) |
@@ -183,6 +184,15 @@ LJFOLDF(kfold_numarith) | |||
183 | return lj_ir_knum(J, y); | 184 | return lj_ir_knum(J, y); |
184 | } | 185 | } |
185 | 186 | ||
187 | LJFOLD(NEG KNUM FLOAD) | ||
188 | LJFOLD(ABS KNUM FLOAD) | ||
189 | LJFOLDF(kfold_numabsneg) | ||
190 | { | ||
191 | lua_Number a = knumleft; | ||
192 | lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD); | ||
193 | return lj_ir_knum(J, y); | ||
194 | } | ||
195 | |||
186 | LJFOLD(LDEXP KNUM KINT) | 196 | LJFOLD(LDEXP KNUM KINT) |
187 | LJFOLDF(kfold_ldexp) | 197 | LJFOLDF(kfold_ldexp) |
188 | { | 198 | { |
@@ -202,11 +212,36 @@ LJFOLDF(kfold_fpmath) | |||
202 | return lj_ir_knum(J, y); | 212 | return lj_ir_knum(J, y); |
203 | } | 213 | } |
204 | 214 | ||
215 | LJFOLD(CALLN KNUM any) | ||
216 | LJFOLDF(kfold_fpcall1) | ||
217 | { | ||
218 | const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; | ||
219 | if (CCI_TYPE(ci) == IRT_NUM) { | ||
220 | double y = ((double (*)(double))ci->func)(knumleft); | ||
221 | return lj_ir_knum(J, y); | ||
222 | } | ||
223 | return NEXTFOLD; | ||
224 | } | ||
225 | |||
226 | LJFOLD(CALLN CARG IRCALL_atan2) | ||
227 | LJFOLDF(kfold_fpcall2) | ||
228 | { | ||
229 | if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { | ||
230 | const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; | ||
231 | double a = ir_knum(IR(fleft->op1))->n; | ||
232 | double b = ir_knum(IR(fleft->op2))->n; | ||
233 | double y = ((double (*)(double, double))ci->func)(a, b); | ||
234 | return lj_ir_knum(J, y); | ||
235 | } | ||
236 | return NEXTFOLD; | ||
237 | } | ||
238 | |||
205 | LJFOLD(POW KNUM KINT) | 239 | LJFOLD(POW KNUM KINT) |
240 | LJFOLD(POW KNUM KNUM) | ||
206 | LJFOLDF(kfold_numpow) | 241 | LJFOLDF(kfold_numpow) |
207 | { | 242 | { |
208 | lua_Number a = knumleft; | 243 | lua_Number a = knumleft; |
209 | lua_Number b = (lua_Number)fright->i; | 244 | lua_Number b = fright->o == IR_KINT ? (lua_Number)fright->i : knumright; |
210 | lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD); | 245 | lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD); |
211 | return lj_ir_knum(J, y); | 246 | return lj_ir_knum(J, y); |
212 | } | 247 | } |
@@ -247,7 +282,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op) | |||
247 | case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break; | 282 | case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break; |
248 | case IR_MIN: k1 = k1 < k2 ? k1 : k2; break; | 283 | case IR_MIN: k1 = k1 < k2 ? k1 : k2; break; |
249 | case IR_MAX: k1 = k1 > k2 ? k1 : k2; break; | 284 | case IR_MAX: k1 = k1 > k2 ? k1 : k2; break; |
250 | default: lua_assert(0); break; | 285 | default: lj_assertX(0, "bad IR op %d", op); break; |
251 | } | 286 | } |
252 | return k1; | 287 | return k1; |
253 | } | 288 | } |
@@ -319,7 +354,7 @@ LJFOLDF(kfold_intcomp) | |||
319 | case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b); | 354 | case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b); |
320 | case IR_ABC: | 355 | case IR_ABC: |
321 | case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b); | 356 | case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b); |
322 | default: lua_assert(0); return FAILFOLD; | 357 | default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD; |
323 | } | 358 | } |
324 | } | 359 | } |
325 | 360 | ||
@@ -333,21 +368,29 @@ LJFOLDF(kfold_intcomp0) | |||
333 | 368 | ||
334 | /* -- Constant folding for 64 bit integers -------------------------------- */ | 369 | /* -- Constant folding for 64 bit integers -------------------------------- */ |
335 | 370 | ||
336 | static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) | 371 | static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2, |
372 | IROp op) | ||
337 | { | 373 | { |
374 | UNUSED(J); | ||
375 | #if LJ_HASFFI | ||
338 | switch (op) { | 376 | switch (op) { |
339 | #if LJ_64 || LJ_HASFFI | ||
340 | case IR_ADD: k1 += k2; break; | 377 | case IR_ADD: k1 += k2; break; |
341 | case IR_SUB: k1 -= k2; break; | 378 | case IR_SUB: k1 -= k2; break; |
342 | #endif | ||
343 | #if LJ_HASFFI | ||
344 | case IR_MUL: k1 *= k2; break; | 379 | case IR_MUL: k1 *= k2; break; |
345 | case IR_BAND: k1 &= k2; break; | 380 | case IR_BAND: k1 &= k2; break; |
346 | case IR_BOR: k1 |= k2; break; | 381 | case IR_BOR: k1 |= k2; break; |
347 | case IR_BXOR: k1 ^= k2; break; | 382 | case IR_BXOR: k1 ^= k2; break; |
348 | #endif | 383 | case IR_BSHL: k1 <<= (k2 & 63); break; |
349 | default: UNUSED(k2); lua_assert(0); break; | 384 | case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break; |
385 | case IR_BSAR: k1 >>= (k2 & 63); break; | ||
386 | case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break; | ||
387 | case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break; | ||
388 | default: lj_assertJ(0, "bad IR op %d", op); break; | ||
350 | } | 389 | } |
390 | #else | ||
391 | UNUSED(k2); UNUSED(op); | ||
392 | lj_assertJ(0, "FFI IR op without FFI"); | ||
393 | #endif | ||
351 | return k1; | 394 | return k1; |
352 | } | 395 | } |
353 | 396 | ||
@@ -359,7 +402,7 @@ LJFOLD(BOR KINT64 KINT64) | |||
359 | LJFOLD(BXOR KINT64 KINT64) | 402 | LJFOLD(BXOR KINT64 KINT64) |
360 | LJFOLDF(kfold_int64arith) | 403 | LJFOLDF(kfold_int64arith) |
361 | { | 404 | { |
362 | return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64, | 405 | return INT64FOLD(kfold_int64arith(J, ir_k64(fleft)->u64, |
363 | ir_k64(fright)->u64, (IROp)fins->o)); | 406 | ir_k64(fright)->u64, (IROp)fins->o)); |
364 | } | 407 | } |
365 | 408 | ||
@@ -381,7 +424,7 @@ LJFOLDF(kfold_int64arith2) | |||
381 | } | 424 | } |
382 | return INT64FOLD(k1); | 425 | return INT64FOLD(k1); |
383 | #else | 426 | #else |
384 | UNUSED(J); lua_assert(0); return FAILFOLD; | 427 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
385 | #endif | 428 | #endif |
386 | } | 429 | } |
387 | 430 | ||
@@ -392,22 +435,12 @@ LJFOLD(BROL KINT64 KINT) | |||
392 | LJFOLD(BROR KINT64 KINT) | 435 | LJFOLD(BROR KINT64 KINT) |
393 | LJFOLDF(kfold_int64shift) | 436 | LJFOLDF(kfold_int64shift) |
394 | { | 437 | { |
395 | #if LJ_HASFFI || LJ_64 | 438 | #if LJ_HASFFI |
396 | uint64_t k = ir_k64(fleft)->u64; | 439 | uint64_t k = ir_k64(fleft)->u64; |
397 | int32_t sh = (fright->i & 63); | 440 | int32_t sh = (fright->i & 63); |
398 | switch ((IROp)fins->o) { | 441 | return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL)); |
399 | case IR_BSHL: k <<= sh; break; | ||
400 | #if LJ_HASFFI | ||
401 | case IR_BSHR: k >>= sh; break; | ||
402 | case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break; | ||
403 | case IR_BROL: k = lj_rol(k, sh); break; | ||
404 | case IR_BROR: k = lj_ror(k, sh); break; | ||
405 | #endif | ||
406 | default: lua_assert(0); break; | ||
407 | } | ||
408 | return INT64FOLD(k); | ||
409 | #else | 442 | #else |
410 | UNUSED(J); lua_assert(0); return FAILFOLD; | 443 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
411 | #endif | 444 | #endif |
412 | } | 445 | } |
413 | 446 | ||
@@ -417,7 +450,7 @@ LJFOLDF(kfold_bnot64) | |||
417 | #if LJ_HASFFI | 450 | #if LJ_HASFFI |
418 | return INT64FOLD(~ir_k64(fleft)->u64); | 451 | return INT64FOLD(~ir_k64(fleft)->u64); |
419 | #else | 452 | #else |
420 | UNUSED(J); lua_assert(0); return FAILFOLD; | 453 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
421 | #endif | 454 | #endif |
422 | } | 455 | } |
423 | 456 | ||
@@ -427,7 +460,7 @@ LJFOLDF(kfold_bswap64) | |||
427 | #if LJ_HASFFI | 460 | #if LJ_HASFFI |
428 | return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64)); | 461 | return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64)); |
429 | #else | 462 | #else |
430 | UNUSED(J); lua_assert(0); return FAILFOLD; | 463 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
431 | #endif | 464 | #endif |
432 | } | 465 | } |
433 | 466 | ||
@@ -452,10 +485,10 @@ LJFOLDF(kfold_int64comp) | |||
452 | case IR_UGE: return CONDFOLD(a >= b); | 485 | case IR_UGE: return CONDFOLD(a >= b); |
453 | case IR_ULE: return CONDFOLD(a <= b); | 486 | case IR_ULE: return CONDFOLD(a <= b); |
454 | case IR_UGT: return CONDFOLD(a > b); | 487 | case IR_UGT: return CONDFOLD(a > b); |
455 | default: lua_assert(0); return FAILFOLD; | 488 | default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD; |
456 | } | 489 | } |
457 | #else | 490 | #else |
458 | UNUSED(J); lua_assert(0); return FAILFOLD; | 491 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
459 | #endif | 492 | #endif |
460 | } | 493 | } |
461 | 494 | ||
@@ -467,7 +500,7 @@ LJFOLDF(kfold_int64comp0) | |||
467 | return DROPFOLD; | 500 | return DROPFOLD; |
468 | return NEXTFOLD; | 501 | return NEXTFOLD; |
469 | #else | 502 | #else |
470 | UNUSED(J); lua_assert(0); return FAILFOLD; | 503 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
471 | #endif | 504 | #endif |
472 | } | 505 | } |
473 | 506 | ||
@@ -481,6 +514,7 @@ LJFOLDF(kfold_snew_kptr) | |||
481 | } | 514 | } |
482 | 515 | ||
483 | LJFOLD(SNEW any KINT) | 516 | LJFOLD(SNEW any KINT) |
517 | LJFOLD(XSNEW any KINT) | ||
484 | LJFOLDF(kfold_snew_empty) | 518 | LJFOLDF(kfold_snew_empty) |
485 | { | 519 | { |
486 | if (fright->i == 0) | 520 | if (fright->i == 0) |
@@ -492,7 +526,7 @@ LJFOLD(STRREF KGC KINT) | |||
492 | LJFOLDF(kfold_strref) | 526 | LJFOLDF(kfold_strref) |
493 | { | 527 | { |
494 | GCstr *str = ir_kstr(fleft); | 528 | GCstr *str = ir_kstr(fleft); |
495 | lua_assert((MSize)fright->i <= str->len); | 529 | lj_assertJ((MSize)fright->i <= str->len, "bad string ref"); |
496 | return lj_ir_kkptr(J, (char *)strdata(str) + fright->i); | 530 | return lj_ir_kkptr(J, (char *)strdata(str) + fright->i); |
497 | } | 531 | } |
498 | 532 | ||
@@ -510,7 +544,7 @@ LJFOLDF(kfold_strref_snew) | |||
510 | PHIBARRIER(ir); | 544 | PHIBARRIER(ir); |
511 | fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ | 545 | fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ |
512 | fins->op1 = str; | 546 | fins->op1 = str; |
513 | fins->ot = IRT(IR_STRREF, IRT_P32); | 547 | fins->ot = IRT(IR_STRREF, IRT_PGC); |
514 | return RETRYFOLD; | 548 | return RETRYFOLD; |
515 | } | 549 | } |
516 | } | 550 | } |
@@ -528,6 +562,210 @@ LJFOLDF(kfold_strcmp) | |||
528 | return NEXTFOLD; | 562 | return NEXTFOLD; |
529 | } | 563 | } |
530 | 564 | ||
565 | /* -- Constant folding and forwarding for buffers ------------------------- */ | ||
566 | |||
567 | /* | ||
568 | ** Buffer ops perform stores, but their effect is limited to the buffer | ||
569 | ** itself. Also, buffer ops are chained: a use of an op implies a use of | ||
570 | ** all other ops up the chain. Conversely, if an op is unused, all ops | ||
571 | ** up the chain can go unsed. This largely eliminates the need to treat | ||
572 | ** them as stores. | ||
573 | ** | ||
574 | ** Alas, treating them as normal (IRM_N) ops doesn't work, because they | ||
575 | ** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP | ||
576 | ** or if FOLD is disabled. | ||
577 | ** | ||
578 | ** The compromise is to declare them as loads, emit them like stores and | ||
579 | ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain | ||
580 | ** fragments left over from CSE are eliminated by DCE. | ||
581 | ** | ||
582 | ** The string buffer methods emit a USE instead of a BUFSTR to keep the | ||
583 | ** chain alive. | ||
584 | */ | ||
585 | |||
586 | LJFOLD(BUFHDR any any) | ||
587 | LJFOLDF(bufhdr_merge) | ||
588 | { | ||
589 | return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD; | ||
590 | } | ||
591 | |||
592 | LJFOLD(BUFPUT any BUFSTR) | ||
593 | LJFOLDF(bufput_bufstr) | ||
594 | { | ||
595 | if ((J->flags & JIT_F_OPT_FWD)) { | ||
596 | IRRef hdr = fright->op2; | ||
597 | /* New buffer, no other buffer op inbetween and same buffer? */ | ||
598 | if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET && | ||
599 | fleft->prev == hdr && | ||
600 | fleft->op1 == IR(hdr)->op1 && | ||
601 | !(irt_isphi(fright->t) && IR(hdr)->prev)) { | ||
602 | IRRef ref = fins->op1; | ||
603 | IR(ref)->op2 = IRBUFHDR_APPEND; /* Modify BUFHDR. */ | ||
604 | IR(ref)->op1 = fright->op1; | ||
605 | return ref; | ||
606 | } | ||
607 | /* Replay puts to global temporary buffer. */ | ||
608 | if (IR(hdr)->op2 == IRBUFHDR_RESET) { | ||
609 | IRIns *ir = IR(fright->op1); | ||
610 | /* For now only handle single string.reverse .lower .upper .rep. */ | ||
611 | if (ir->o == IR_CALLL && | ||
612 | ir->op2 >= IRCALL_lj_buf_putstr_reverse && | ||
613 | ir->op2 <= IRCALL_lj_buf_putstr_rep) { | ||
614 | IRIns *carg1 = IR(ir->op1); | ||
615 | if (ir->op2 == IRCALL_lj_buf_putstr_rep) { | ||
616 | IRIns *carg2 = IR(carg1->op1); | ||
617 | if (carg2->op1 == hdr) { | ||
618 | return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2); | ||
619 | } | ||
620 | } else if (carg1->op1 == hdr) { | ||
621 | return lj_ir_call(J, ir->op2, fins->op1, carg1->op2); | ||
622 | } | ||
623 | } | ||
624 | } | ||
625 | } | ||
626 | return EMITFOLD; /* Always emit, CSE later. */ | ||
627 | } | ||
628 | |||
629 | LJFOLD(BUFPUT any any) | ||
630 | LJFOLDF(bufput_kgc) | ||
631 | { | ||
632 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) { | ||
633 | GCstr *s2 = ir_kstr(fright); | ||
634 | if (s2->len == 0) { /* Empty string? */ | ||
635 | return LEFTFOLD; | ||
636 | } else { | ||
637 | if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) && | ||
638 | !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */ | ||
639 | GCstr *s1 = ir_kstr(IR(fleft->op2)); | ||
640 | IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2)); | ||
641 | /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */ | ||
642 | IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */ | ||
643 | return fins->op1; | ||
644 | } | ||
645 | } | ||
646 | } | ||
647 | return EMITFOLD; /* Always emit, CSE later. */ | ||
648 | } | ||
649 | |||
650 | LJFOLD(BUFSTR any any) | ||
651 | LJFOLDF(bufstr_kfold_cse) | ||
652 | { | ||
653 | lj_assertJ(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT || | ||
654 | fleft->o == IR_CALLL, | ||
655 | "bad buffer constructor IR op %d", fleft->o); | ||
656 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { | ||
657 | if (fleft->o == IR_BUFHDR) { /* No put operations? */ | ||
658 | if (fleft->op2 == IRBUFHDR_RESET) /* Empty buffer? */ | ||
659 | return lj_ir_kstr(J, &J2G(J)->strempty); | ||
660 | fins->op1 = fleft->op1; | ||
661 | fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */ | ||
662 | return CSEFOLD; | ||
663 | } else if (fleft->o == IR_BUFPUT) { | ||
664 | IRIns *irb = IR(fleft->op1); | ||
665 | if (irb->o == IR_BUFHDR && irb->op2 == IRBUFHDR_RESET) | ||
666 | return fleft->op2; /* Shortcut for a single put operation. */ | ||
667 | } | ||
668 | } | ||
669 | /* Try to CSE the whole chain. */ | ||
670 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
671 | IRRef ref = J->chain[IR_BUFSTR]; | ||
672 | while (ref) { | ||
673 | IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1); | ||
674 | while (ira->o == irb->o && ira->op2 == irb->op2) { | ||
675 | lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT || | ||
676 | ira->o == IR_CALLL || ira->o == IR_CARG, | ||
677 | "bad buffer constructor IR op %d", ira->o); | ||
678 | if (ira->o == IR_BUFHDR && ira->op2 == IRBUFHDR_RESET) | ||
679 | return ref; /* CSE succeeded. */ | ||
680 | if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab) | ||
681 | break; | ||
682 | ira = IR(ira->op1); | ||
683 | irb = IR(irb->op1); | ||
684 | } | ||
685 | ref = irs->prev; | ||
686 | } | ||
687 | } | ||
688 | return EMITFOLD; /* No CSE possible. */ | ||
689 | } | ||
690 | |||
691 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse) | ||
692 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper) | ||
693 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower) | ||
694 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted) | ||
695 | LJFOLDF(bufput_kfold_op) | ||
696 | { | ||
697 | if (irref_isk(fleft->op2)) { | ||
698 | const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; | ||
699 | SBuf *sb = lj_buf_tmp_(J->L); | ||
700 | sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb, | ||
701 | ir_kstr(IR(fleft->op2))); | ||
702 | fins->o = IR_BUFPUT; | ||
703 | fins->op1 = fleft->op1; | ||
704 | fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb)); | ||
705 | return RETRYFOLD; | ||
706 | } | ||
707 | return EMITFOLD; /* Always emit, CSE later. */ | ||
708 | } | ||
709 | |||
710 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep) | ||
711 | LJFOLDF(bufput_kfold_rep) | ||
712 | { | ||
713 | if (irref_isk(fleft->op2)) { | ||
714 | IRIns *irc = IR(fleft->op1); | ||
715 | if (irref_isk(irc->op2)) { | ||
716 | SBuf *sb = lj_buf_tmp_(J->L); | ||
717 | sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i); | ||
718 | fins->o = IR_BUFPUT; | ||
719 | fins->op1 = irc->op1; | ||
720 | fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb)); | ||
721 | return RETRYFOLD; | ||
722 | } | ||
723 | } | ||
724 | return EMITFOLD; /* Always emit, CSE later. */ | ||
725 | } | ||
726 | |||
727 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint) | ||
728 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int) | ||
729 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint) | ||
730 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum) | ||
731 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr) | ||
732 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar) | ||
733 | LJFOLDF(bufput_kfold_fmt) | ||
734 | { | ||
735 | IRIns *irc = IR(fleft->op1); | ||
736 | lj_assertJ(irref_isk(irc->op2), "SFormat must be const"); | ||
737 | if (irref_isk(fleft->op2)) { | ||
738 | SFormat sf = (SFormat)IR(irc->op2)->i; | ||
739 | IRIns *ira = IR(fleft->op2); | ||
740 | SBuf *sb = lj_buf_tmp_(J->L); | ||
741 | switch (fins->op2) { | ||
742 | case IRCALL_lj_strfmt_putfxint: | ||
743 | sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64); | ||
744 | break; | ||
745 | case IRCALL_lj_strfmt_putfstr: | ||
746 | sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira)); | ||
747 | break; | ||
748 | case IRCALL_lj_strfmt_putfchar: | ||
749 | sb = lj_strfmt_putfchar(sb, sf, ira->i); | ||
750 | break; | ||
751 | case IRCALL_lj_strfmt_putfnum_int: | ||
752 | case IRCALL_lj_strfmt_putfnum_uint: | ||
753 | case IRCALL_lj_strfmt_putfnum: | ||
754 | default: { | ||
755 | const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; | ||
756 | sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf, | ||
757 | ir_knum(ira)->n); | ||
758 | break; | ||
759 | } | ||
760 | } | ||
761 | fins->o = IR_BUFPUT; | ||
762 | fins->op1 = irc->op1; | ||
763 | fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb)); | ||
764 | return RETRYFOLD; | ||
765 | } | ||
766 | return EMITFOLD; /* Always emit, CSE later. */ | ||
767 | } | ||
768 | |||
531 | /* -- Constant folding of pointer arithmetic ------------------------------ */ | 769 | /* -- Constant folding of pointer arithmetic ------------------------------ */ |
532 | 770 | ||
533 | LJFOLD(ADD KGC KINT) | 771 | LJFOLD(ADD KGC KINT) |
@@ -648,27 +886,22 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM) | |||
648 | LJFOLDF(kfold_conv_knum_int_num) | 886 | LJFOLDF(kfold_conv_knum_int_num) |
649 | { | 887 | { |
650 | lua_Number n = knumleft; | 888 | lua_Number n = knumleft; |
651 | if (!(fins->op2 & IRCONV_TRUNC)) { | 889 | int32_t k = lj_num2int(n); |
652 | int32_t k = lj_num2int(n); | 890 | if (irt_isguard(fins->t) && n != (lua_Number)k) { |
653 | if (irt_isguard(fins->t) && n != (lua_Number)k) { | 891 | /* We're about to create a guard which always fails, like CONV +1.5. |
654 | /* We're about to create a guard which always fails, like CONV +1.5. | 892 | ** Some pathological loops cause this during LICM, e.g.: |
655 | ** Some pathological loops cause this during LICM, e.g.: | 893 | ** local x,k,t = 0,1.5,{1,[1.5]=2} |
656 | ** local x,k,t = 0,1.5,{1,[1.5]=2} | 894 | ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end |
657 | ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end | 895 | ** assert(x == 300) |
658 | ** assert(x == 300) | 896 | */ |
659 | */ | 897 | return FAILFOLD; |
660 | return FAILFOLD; | ||
661 | } | ||
662 | return INTFOLD(k); | ||
663 | } else { | ||
664 | return INTFOLD((int32_t)n); | ||
665 | } | 898 | } |
899 | return INTFOLD(k); | ||
666 | } | 900 | } |
667 | 901 | ||
668 | LJFOLD(CONV KNUM IRCONV_U32_NUM) | 902 | LJFOLD(CONV KNUM IRCONV_U32_NUM) |
669 | LJFOLDF(kfold_conv_knum_u32_num) | 903 | LJFOLDF(kfold_conv_knum_u32_num) |
670 | { | 904 | { |
671 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
672 | #ifdef _MSC_VER | 905 | #ifdef _MSC_VER |
673 | { /* Workaround for MSVC bug. */ | 906 | { /* Workaround for MSVC bug. */ |
674 | volatile uint32_t u = (uint32_t)knumleft; | 907 | volatile uint32_t u = (uint32_t)knumleft; |
@@ -682,27 +915,27 @@ LJFOLDF(kfold_conv_knum_u32_num) | |||
682 | LJFOLD(CONV KNUM IRCONV_I64_NUM) | 915 | LJFOLD(CONV KNUM IRCONV_I64_NUM) |
683 | LJFOLDF(kfold_conv_knum_i64_num) | 916 | LJFOLDF(kfold_conv_knum_i64_num) |
684 | { | 917 | { |
685 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
686 | return INT64FOLD((uint64_t)(int64_t)knumleft); | 918 | return INT64FOLD((uint64_t)(int64_t)knumleft); |
687 | } | 919 | } |
688 | 920 | ||
689 | LJFOLD(CONV KNUM IRCONV_U64_NUM) | 921 | LJFOLD(CONV KNUM IRCONV_U64_NUM) |
690 | LJFOLDF(kfold_conv_knum_u64_num) | 922 | LJFOLDF(kfold_conv_knum_u64_num) |
691 | { | 923 | { |
692 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
693 | return INT64FOLD(lj_num2u64(knumleft)); | 924 | return INT64FOLD(lj_num2u64(knumleft)); |
694 | } | 925 | } |
695 | 926 | ||
696 | LJFOLD(TOSTR KNUM) | 927 | LJFOLD(TOSTR KNUM any) |
697 | LJFOLDF(kfold_tostr_knum) | 928 | LJFOLDF(kfold_tostr_knum) |
698 | { | 929 | { |
699 | return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); | 930 | return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft))); |
700 | } | 931 | } |
701 | 932 | ||
702 | LJFOLD(TOSTR KINT) | 933 | LJFOLD(TOSTR KINT any) |
703 | LJFOLDF(kfold_tostr_kint) | 934 | LJFOLDF(kfold_tostr_kint) |
704 | { | 935 | { |
705 | return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); | 936 | return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ? |
937 | lj_strfmt_int(J->L, fleft->i) : | ||
938 | lj_strfmt_char(J->L, fleft->i)); | ||
706 | } | 939 | } |
707 | 940 | ||
708 | LJFOLD(STRTO KGC) | 941 | LJFOLD(STRTO KGC) |
@@ -750,13 +983,13 @@ LJFOLDF(shortcut_round) | |||
750 | return NEXTFOLD; | 983 | return NEXTFOLD; |
751 | } | 984 | } |
752 | 985 | ||
753 | LJFOLD(ABS ABS KNUM) | 986 | LJFOLD(ABS ABS FLOAD) |
754 | LJFOLDF(shortcut_left) | 987 | LJFOLDF(shortcut_left) |
755 | { | 988 | { |
756 | return LEFTFOLD; /* f(g(x)) ==> g(x) */ | 989 | return LEFTFOLD; /* f(g(x)) ==> g(x) */ |
757 | } | 990 | } |
758 | 991 | ||
759 | LJFOLD(ABS NEG KNUM) | 992 | LJFOLD(ABS NEG FLOAD) |
760 | LJFOLDF(shortcut_dropleft) | 993 | LJFOLDF(shortcut_dropleft) |
761 | { | 994 | { |
762 | PHIBARRIER(fleft); | 995 | PHIBARRIER(fleft); |
@@ -836,8 +1069,10 @@ LJFOLDF(simplify_nummuldiv_k) | |||
836 | if (n == 1.0) { /* x o 1 ==> x */ | 1069 | if (n == 1.0) { /* x o 1 ==> x */ |
837 | return LEFTFOLD; | 1070 | return LEFTFOLD; |
838 | } else if (n == -1.0) { /* x o -1 ==> -x */ | 1071 | } else if (n == -1.0) { /* x o -1 ==> -x */ |
1072 | IRRef op1 = fins->op1; | ||
1073 | fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */ | ||
1074 | fins->op1 = op1; | ||
839 | fins->o = IR_NEG; | 1075 | fins->o = IR_NEG; |
840 | fins->op2 = (IRRef1)lj_ir_knum_neg(J); | ||
841 | return RETRYFOLD; | 1076 | return RETRYFOLD; |
842 | } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ | 1077 | } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ |
843 | fins->o = IR_ADD; | 1078 | fins->o = IR_ADD; |
@@ -878,7 +1113,7 @@ LJFOLDF(simplify_nummuldiv_negneg) | |||
878 | } | 1113 | } |
879 | 1114 | ||
880 | LJFOLD(POW any KINT) | 1115 | LJFOLD(POW any KINT) |
881 | LJFOLDF(simplify_numpow_xk) | 1116 | LJFOLDF(simplify_numpow_xkint) |
882 | { | 1117 | { |
883 | int32_t k = fright->i; | 1118 | int32_t k = fright->i; |
884 | TRef ref = fins->op1; | 1119 | TRef ref = fins->op1; |
@@ -907,13 +1142,22 @@ LJFOLDF(simplify_numpow_xk) | |||
907 | return ref; | 1142 | return ref; |
908 | } | 1143 | } |
909 | 1144 | ||
1145 | LJFOLD(POW any KNUM) | ||
1146 | LJFOLDF(simplify_numpow_xknum) | ||
1147 | { | ||
1148 | if (knumright == 0.5) /* x ^ 0.5 ==> sqrt(x) */ | ||
1149 | return emitir(IRTN(IR_FPMATH), fins->op1, IRFPM_SQRT); | ||
1150 | return NEXTFOLD; | ||
1151 | } | ||
1152 | |||
910 | LJFOLD(POW KNUM any) | 1153 | LJFOLD(POW KNUM any) |
911 | LJFOLDF(simplify_numpow_kx) | 1154 | LJFOLDF(simplify_numpow_kx) |
912 | { | 1155 | { |
913 | lua_Number n = knumleft; | 1156 | lua_Number n = knumleft; |
914 | if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */ | 1157 | if (n == 2.0 && irt_isint(fright->t)) { /* 2.0 ^ i ==> ldexp(1.0, i) */ |
915 | fins->o = IR_CONV; | ||
916 | #if LJ_TARGET_X86ORX64 | 1158 | #if LJ_TARGET_X86ORX64 |
1159 | /* Different IR_LDEXP calling convention on x86/x64 requires conversion. */ | ||
1160 | fins->o = IR_CONV; | ||
917 | fins->op1 = fins->op2; | 1161 | fins->op1 = fins->op2; |
918 | fins->op2 = IRCONV_NUM_INT; | 1162 | fins->op2 = IRCONV_NUM_INT; |
919 | fins->op2 = (IRRef1)lj_opt_fold(J); | 1163 | fins->op2 = (IRRef1)lj_opt_fold(J); |
@@ -1007,10 +1251,10 @@ LJFOLDF(simplify_tobit_conv) | |||
1007 | { | 1251 | { |
1008 | /* Fold even across PHI to avoid expensive num->int conversions in loop. */ | 1252 | /* Fold even across PHI to avoid expensive num->int conversions in loop. */ |
1009 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) { | 1253 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) { |
1010 | lua_assert(irt_isnum(fleft->t)); | 1254 | lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg"); |
1011 | return fleft->op1; | 1255 | return fleft->op1; |
1012 | } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { | 1256 | } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { |
1013 | lua_assert(irt_isnum(fleft->t)); | 1257 | lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg"); |
1014 | fins->o = IR_CONV; | 1258 | fins->o = IR_CONV; |
1015 | fins->op1 = fleft->op1; | 1259 | fins->op1 = fleft->op1; |
1016 | fins->op2 = (IRT_INT<<5)|IRT_U32; | 1260 | fins->op2 = (IRT_INT<<5)|IRT_U32; |
@@ -1050,7 +1294,7 @@ LJFOLDF(simplify_conv_sext) | |||
1050 | /* Use scalar evolution analysis results to strength-reduce sign-extension. */ | 1294 | /* Use scalar evolution analysis results to strength-reduce sign-extension. */ |
1051 | if (ref == J->scev.idx) { | 1295 | if (ref == J->scev.idx) { |
1052 | IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; | 1296 | IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; |
1053 | lua_assert(irt_isint(J->scev.t)); | 1297 | lj_assertJ(irt_isint(J->scev.t), "only int SCEV supported"); |
1054 | if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { | 1298 | if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { |
1055 | ok_reduce: | 1299 | ok_reduce: |
1056 | #if LJ_TARGET_X64 | 1300 | #if LJ_TARGET_X64 |
@@ -1081,6 +1325,10 @@ LJFOLD(CONV SUB IRCONV_U32_U64) | |||
1081 | LJFOLD(CONV MUL IRCONV_U32_U64) | 1325 | LJFOLD(CONV MUL IRCONV_U32_U64) |
1082 | LJFOLDF(simplify_conv_narrow) | 1326 | LJFOLDF(simplify_conv_narrow) |
1083 | { | 1327 | { |
1328 | #if LJ_64 | ||
1329 | UNUSED(J); | ||
1330 | return NEXTFOLD; | ||
1331 | #else | ||
1084 | IROp op = (IROp)fleft->o; | 1332 | IROp op = (IROp)fleft->o; |
1085 | IRType t = irt_type(fins->t); | 1333 | IRType t = irt_type(fins->t); |
1086 | IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2; | 1334 | IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2; |
@@ -1091,6 +1339,7 @@ LJFOLDF(simplify_conv_narrow) | |||
1091 | fins->op1 = op1; | 1339 | fins->op1 = op1; |
1092 | fins->op2 = op2; | 1340 | fins->op2 = op2; |
1093 | return RETRYFOLD; | 1341 | return RETRYFOLD; |
1342 | #endif | ||
1094 | } | 1343 | } |
1095 | 1344 | ||
1096 | /* Special CSE rule for CONV. */ | 1345 | /* Special CSE rule for CONV. */ |
@@ -1126,7 +1375,8 @@ LJFOLDF(narrow_convert) | |||
1126 | /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */ | 1375 | /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */ |
1127 | if (J->chain[IR_LOOP]) | 1376 | if (J->chain[IR_LOOP]) |
1128 | return NEXTFOLD; | 1377 | return NEXTFOLD; |
1129 | lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT); | 1378 | lj_assertJ(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT, |
1379 | "unexpected CONV TOBIT"); | ||
1130 | return lj_opt_narrow_convert(J); | 1380 | return lj_opt_narrow_convert(J); |
1131 | } | 1381 | } |
1132 | 1382 | ||
@@ -1204,7 +1454,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k) | |||
1204 | ** But this is mainly intended for simple address arithmetic. | 1454 | ** But this is mainly intended for simple address arithmetic. |
1205 | ** Also it's easier for the backend to optimize the original multiplies. | 1455 | ** Also it's easier for the backend to optimize the original multiplies. |
1206 | */ | 1456 | */ |
1207 | if (k == 1) { /* i * 1 ==> i */ | 1457 | if (k == 0) { /* i * 0 ==> 0 */ |
1458 | return RIGHTFOLD; | ||
1459 | } else if (k == 1) { /* i * 1 ==> i */ | ||
1208 | return LEFTFOLD; | 1460 | return LEFTFOLD; |
1209 | } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ | 1461 | } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ |
1210 | fins->o = IR_BSHL; | 1462 | fins->o = IR_BSHL; |
@@ -1217,9 +1469,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k) | |||
1217 | LJFOLD(MUL any KINT) | 1469 | LJFOLD(MUL any KINT) |
1218 | LJFOLDF(simplify_intmul_k32) | 1470 | LJFOLDF(simplify_intmul_k32) |
1219 | { | 1471 | { |
1220 | if (fright->i == 0) /* i * 0 ==> 0 */ | 1472 | if (fright->i >= 0) |
1221 | return INTFOLD(0); | ||
1222 | else if (fright->i > 0) | ||
1223 | return simplify_intmul_k(J, fright->i); | 1473 | return simplify_intmul_k(J, fright->i); |
1224 | return NEXTFOLD; | 1474 | return NEXTFOLD; |
1225 | } | 1475 | } |
@@ -1227,21 +1477,20 @@ LJFOLDF(simplify_intmul_k32) | |||
1227 | LJFOLD(MUL any KINT64) | 1477 | LJFOLD(MUL any KINT64) |
1228 | LJFOLDF(simplify_intmul_k64) | 1478 | LJFOLDF(simplify_intmul_k64) |
1229 | { | 1479 | { |
1230 | if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ | 1480 | #if LJ_HASFFI |
1231 | return INT64FOLD(0); | 1481 | if (ir_kint64(fright)->u64 < 0x80000000u) |
1232 | #if LJ_64 | ||
1233 | /* NYI: SPLIT for BSHL and 32 bit backend support. */ | ||
1234 | else if (ir_kint64(fright)->u64 < 0x80000000u) | ||
1235 | return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); | 1482 | return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); |
1236 | #endif | ||
1237 | return NEXTFOLD; | 1483 | return NEXTFOLD; |
1484 | #else | ||
1485 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; | ||
1486 | #endif | ||
1238 | } | 1487 | } |
1239 | 1488 | ||
1240 | LJFOLD(MOD any KINT) | 1489 | LJFOLD(MOD any KINT) |
1241 | LJFOLDF(simplify_intmod_k) | 1490 | LJFOLDF(simplify_intmod_k) |
1242 | { | 1491 | { |
1243 | int32_t k = fright->i; | 1492 | int32_t k = fright->i; |
1244 | lua_assert(k != 0); | 1493 | lj_assertJ(k != 0, "integer mod 0"); |
1245 | if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */ | 1494 | if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */ |
1246 | fins->o = IR_BAND; | 1495 | fins->o = IR_BAND; |
1247 | fins->op2 = lj_ir_kint(J, k-1); | 1496 | fins->op2 = lj_ir_kint(J, k-1); |
@@ -1490,6 +1739,15 @@ LJFOLDF(simplify_shiftk_andk) | |||
1490 | fins->op2 = (IRRef1)lj_ir_kint(J, k); | 1739 | fins->op2 = (IRRef1)lj_ir_kint(J, k); |
1491 | fins->ot = IRTI(IR_BAND); | 1740 | fins->ot = IRTI(IR_BAND); |
1492 | return RETRYFOLD; | 1741 | return RETRYFOLD; |
1742 | } else if (irk->o == IR_KINT64) { | ||
1743 | uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, fright->i, | ||
1744 | (IROp)fins->o); | ||
1745 | IROpT ot = fleft->ot; | ||
1746 | fins->op1 = fleft->op1; | ||
1747 | fins->op1 = (IRRef1)lj_opt_fold(J); | ||
1748 | fins->op2 = (IRRef1)lj_ir_kint64(J, k); | ||
1749 | fins->ot = ot; | ||
1750 | return RETRYFOLD; | ||
1493 | } | 1751 | } |
1494 | return NEXTFOLD; | 1752 | return NEXTFOLD; |
1495 | } | 1753 | } |
@@ -1505,6 +1763,47 @@ LJFOLDF(simplify_andk_shiftk) | |||
1505 | return NEXTFOLD; | 1763 | return NEXTFOLD; |
1506 | } | 1764 | } |
1507 | 1765 | ||
1766 | LJFOLD(BAND BOR KINT) | ||
1767 | LJFOLD(BOR BAND KINT) | ||
1768 | LJFOLDF(simplify_andor_k) | ||
1769 | { | ||
1770 | IRIns *irk = IR(fleft->op2); | ||
1771 | PHIBARRIER(fleft); | ||
1772 | if (irk->o == IR_KINT) { | ||
1773 | int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o); | ||
1774 | /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ | ||
1775 | /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ | ||
1776 | if (k == (fins->o == IR_BAND ? 0 : -1)) { | ||
1777 | fins->op1 = fleft->op1; | ||
1778 | return RETRYFOLD; | ||
1779 | } | ||
1780 | } | ||
1781 | return NEXTFOLD; | ||
1782 | } | ||
1783 | |||
1784 | LJFOLD(BAND BOR KINT64) | ||
1785 | LJFOLD(BOR BAND KINT64) | ||
1786 | LJFOLDF(simplify_andor_k64) | ||
1787 | { | ||
1788 | #if LJ_HASFFI | ||
1789 | IRIns *irk = IR(fleft->op2); | ||
1790 | PHIBARRIER(fleft); | ||
1791 | if (irk->o == IR_KINT64) { | ||
1792 | uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64, | ||
1793 | (IROp)fins->o); | ||
1794 | /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ | ||
1795 | /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ | ||
1796 | if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) { | ||
1797 | fins->op1 = fleft->op1; | ||
1798 | return RETRYFOLD; | ||
1799 | } | ||
1800 | } | ||
1801 | return NEXTFOLD; | ||
1802 | #else | ||
1803 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; | ||
1804 | #endif | ||
1805 | } | ||
1806 | |||
1508 | /* -- Reassociation ------------------------------------------------------- */ | 1807 | /* -- Reassociation ------------------------------------------------------- */ |
1509 | 1808 | ||
1510 | LJFOLD(ADD ADD KINT) | 1809 | LJFOLD(ADD ADD KINT) |
@@ -1534,11 +1833,11 @@ LJFOLD(BOR BOR KINT64) | |||
1534 | LJFOLD(BXOR BXOR KINT64) | 1833 | LJFOLD(BXOR BXOR KINT64) |
1535 | LJFOLDF(reassoc_intarith_k64) | 1834 | LJFOLDF(reassoc_intarith_k64) |
1536 | { | 1835 | { |
1537 | #if LJ_HASFFI || LJ_64 | 1836 | #if LJ_HASFFI |
1538 | IRIns *irk = IR(fleft->op2); | 1837 | IRIns *irk = IR(fleft->op2); |
1539 | if (irk->o == IR_KINT64) { | 1838 | if (irk->o == IR_KINT64) { |
1540 | uint64_t k = kfold_int64arith(ir_k64(irk)->u64, | 1839 | uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64, |
1541 | ir_k64(fright)->u64, (IROp)fins->o); | 1840 | (IROp)fins->o); |
1542 | PHIBARRIER(fleft); | 1841 | PHIBARRIER(fleft); |
1543 | fins->op1 = fleft->op1; | 1842 | fins->op1 = fleft->op1; |
1544 | fins->op2 = (IRRef1)lj_ir_kint64(J, k); | 1843 | fins->op2 = (IRRef1)lj_ir_kint64(J, k); |
@@ -1546,12 +1845,10 @@ LJFOLDF(reassoc_intarith_k64) | |||
1546 | } | 1845 | } |
1547 | return NEXTFOLD; | 1846 | return NEXTFOLD; |
1548 | #else | 1847 | #else |
1549 | UNUSED(J); lua_assert(0); return FAILFOLD; | 1848 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
1550 | #endif | 1849 | #endif |
1551 | } | 1850 | } |
1552 | 1851 | ||
1553 | LJFOLD(MIN MIN any) | ||
1554 | LJFOLD(MAX MAX any) | ||
1555 | LJFOLD(BAND BAND any) | 1852 | LJFOLD(BAND BAND any) |
1556 | LJFOLD(BOR BOR any) | 1853 | LJFOLD(BOR BOR any) |
1557 | LJFOLDF(reassoc_dup) | 1854 | LJFOLDF(reassoc_dup) |
@@ -1561,6 +1858,15 @@ LJFOLDF(reassoc_dup) | |||
1561 | return NEXTFOLD; | 1858 | return NEXTFOLD; |
1562 | } | 1859 | } |
1563 | 1860 | ||
1861 | LJFOLD(MIN MIN any) | ||
1862 | LJFOLD(MAX MAX any) | ||
1863 | LJFOLDF(reassoc_dup_minmax) | ||
1864 | { | ||
1865 | if (fins->op2 == fleft->op2) | ||
1866 | return LEFTFOLD; /* (a o b) o b ==> a o b */ | ||
1867 | return NEXTFOLD; | ||
1868 | } | ||
1869 | |||
1564 | LJFOLD(BXOR BXOR any) | 1870 | LJFOLD(BXOR BXOR any) |
1565 | LJFOLDF(reassoc_bxor) | 1871 | LJFOLDF(reassoc_bxor) |
1566 | { | 1872 | { |
@@ -1599,23 +1905,12 @@ LJFOLDF(reassoc_shift) | |||
1599 | return NEXTFOLD; | 1905 | return NEXTFOLD; |
1600 | } | 1906 | } |
1601 | 1907 | ||
1602 | LJFOLD(MIN MIN KNUM) | ||
1603 | LJFOLD(MAX MAX KNUM) | ||
1604 | LJFOLD(MIN MIN KINT) | 1908 | LJFOLD(MIN MIN KINT) |
1605 | LJFOLD(MAX MAX KINT) | 1909 | LJFOLD(MAX MAX KINT) |
1606 | LJFOLDF(reassoc_minmax_k) | 1910 | LJFOLDF(reassoc_minmax_k) |
1607 | { | 1911 | { |
1608 | IRIns *irk = IR(fleft->op2); | 1912 | IRIns *irk = IR(fleft->op2); |
1609 | if (irk->o == IR_KNUM) { | 1913 | if (irk->o == IR_KINT) { |
1610 | lua_Number a = ir_knum(irk)->n; | ||
1611 | lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD); | ||
1612 | if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ | ||
1613 | return LEFTFOLD; | ||
1614 | PHIBARRIER(fleft); | ||
1615 | fins->op1 = fleft->op1; | ||
1616 | fins->op2 = (IRRef1)lj_ir_knum(J, y); | ||
1617 | return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */ | ||
1618 | } else if (irk->o == IR_KINT) { | ||
1619 | int32_t a = irk->i; | 1914 | int32_t a = irk->i; |
1620 | int32_t y = kfold_intop(a, fright->i, fins->o); | 1915 | int32_t y = kfold_intop(a, fright->i, fins->o); |
1621 | if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ | 1916 | if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ |
@@ -1628,24 +1923,6 @@ LJFOLDF(reassoc_minmax_k) | |||
1628 | return NEXTFOLD; | 1923 | return NEXTFOLD; |
1629 | } | 1924 | } |
1630 | 1925 | ||
1631 | LJFOLD(MIN MAX any) | ||
1632 | LJFOLD(MAX MIN any) | ||
1633 | LJFOLDF(reassoc_minmax_left) | ||
1634 | { | ||
1635 | if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2) | ||
1636 | return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */ | ||
1637 | return NEXTFOLD; | ||
1638 | } | ||
1639 | |||
1640 | LJFOLD(MIN any MAX) | ||
1641 | LJFOLD(MAX any MIN) | ||
1642 | LJFOLDF(reassoc_minmax_right) | ||
1643 | { | ||
1644 | if (fins->op1 == fright->op1 || fins->op1 == fright->op2) | ||
1645 | return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */ | ||
1646 | return NEXTFOLD; | ||
1647 | } | ||
1648 | |||
1649 | /* -- Array bounds check elimination -------------------------------------- */ | 1926 | /* -- Array bounds check elimination -------------------------------------- */ |
1650 | 1927 | ||
1651 | /* Eliminate ABC across PHIs to handle t[i-1] forwarding case. | 1928 | /* Eliminate ABC across PHIs to handle t[i-1] forwarding case. |
@@ -1772,8 +2049,6 @@ LJFOLDF(comm_comp) | |||
1772 | 2049 | ||
1773 | LJFOLD(BAND any any) | 2050 | LJFOLD(BAND any any) |
1774 | LJFOLD(BOR any any) | 2051 | LJFOLD(BOR any any) |
1775 | LJFOLD(MIN any any) | ||
1776 | LJFOLD(MAX any any) | ||
1777 | LJFOLDF(comm_dup) | 2052 | LJFOLDF(comm_dup) |
1778 | { | 2053 | { |
1779 | if (fins->op1 == fins->op2) /* x o x ==> x */ | 2054 | if (fins->op1 == fins->op2) /* x o x ==> x */ |
@@ -1781,6 +2056,15 @@ LJFOLDF(comm_dup) | |||
1781 | return fold_comm_swap(J); | 2056 | return fold_comm_swap(J); |
1782 | } | 2057 | } |
1783 | 2058 | ||
2059 | LJFOLD(MIN any any) | ||
2060 | LJFOLD(MAX any any) | ||
2061 | LJFOLDF(comm_dup_minmax) | ||
2062 | { | ||
2063 | if (fins->op1 == fins->op2) /* x o x ==> x */ | ||
2064 | return LEFTFOLD; | ||
2065 | return NEXTFOLD; | ||
2066 | } | ||
2067 | |||
1784 | LJFOLD(BXOR any any) | 2068 | LJFOLD(BXOR any any) |
1785 | LJFOLDF(comm_bxor) | 2069 | LJFOLDF(comm_bxor) |
1786 | { | 2070 | { |
@@ -1817,7 +2101,7 @@ LJFOLDF(merge_eqne_snew_kgc) | |||
1817 | { | 2101 | { |
1818 | GCstr *kstr = ir_kstr(fright); | 2102 | GCstr *kstr = ir_kstr(fright); |
1819 | int32_t len = (int32_t)kstr->len; | 2103 | int32_t len = (int32_t)kstr->len; |
1820 | lua_assert(irt_isstr(fins->t)); | 2104 | lj_assertJ(irt_isstr(fins->t), "bad equality IR type"); |
1821 | 2105 | ||
1822 | #if LJ_TARGET_UNALIGNED | 2106 | #if LJ_TARGET_UNALIGNED |
1823 | #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */ | 2107 | #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */ |
@@ -1881,7 +2165,7 @@ LJFOLD(HLOAD KKPTR) | |||
1881 | LJFOLDF(kfold_hload_kkptr) | 2165 | LJFOLDF(kfold_hload_kkptr) |
1882 | { | 2166 | { |
1883 | UNUSED(J); | 2167 | UNUSED(J); |
1884 | lua_assert(ir_kptr(fleft) == niltvg(J2G(J))); | 2168 | lj_assertJ(ir_kptr(fleft) == niltvg(J2G(J)), "expected niltv"); |
1885 | return TREF_NIL; | 2169 | return TREF_NIL; |
1886 | } | 2170 | } |
1887 | 2171 | ||
@@ -1891,8 +2175,8 @@ LJFOLDX(lj_opt_fwd_hload) | |||
1891 | LJFOLD(ULOAD any) | 2175 | LJFOLD(ULOAD any) |
1892 | LJFOLDX(lj_opt_fwd_uload) | 2176 | LJFOLDX(lj_opt_fwd_uload) |
1893 | 2177 | ||
1894 | LJFOLD(CALLL any IRCALL_lj_tab_len) | 2178 | LJFOLD(ALEN any any) |
1895 | LJFOLDX(lj_opt_fwd_tab_len) | 2179 | LJFOLDX(lj_opt_fwd_alen) |
1896 | 2180 | ||
1897 | /* Upvalue refs are really loads, but there are no corresponding stores. | 2181 | /* Upvalue refs are really loads, but there are no corresponding stores. |
1898 | ** So CSE is ok for them, except for UREFO across a GC step (see below). | 2182 | ** So CSE is ok for them, except for UREFO across a GC step (see below). |
@@ -1953,6 +2237,7 @@ LJFOLDF(fwd_href_tdup) | |||
1953 | ** an aliased table, as it may invalidate all of the pointers and fields. | 2237 | ** an aliased table, as it may invalidate all of the pointers and fields. |
1954 | ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on | 2238 | ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on |
1955 | ** FLOADs. And NEWREF itself is treated like a store (see below). | 2239 | ** FLOADs. And NEWREF itself is treated like a store (see below). |
2240 | ** LREF is constant (per trace) since coroutine switches are not inlined. | ||
1956 | */ | 2241 | */ |
1957 | LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) | 2242 | LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) |
1958 | LJFOLDF(fload_tab_tnew_asize) | 2243 | LJFOLDF(fload_tab_tnew_asize) |
@@ -2016,6 +2301,35 @@ LJFOLDF(fload_str_len_snew) | |||
2016 | return NEXTFOLD; | 2301 | return NEXTFOLD; |
2017 | } | 2302 | } |
2018 | 2303 | ||
2304 | LJFOLD(FLOAD TOSTR IRFL_STR_LEN) | ||
2305 | LJFOLDF(fload_str_len_tostr) | ||
2306 | { | ||
2307 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR) | ||
2308 | return INTFOLD(1); | ||
2309 | return NEXTFOLD; | ||
2310 | } | ||
2311 | |||
2312 | LJFOLD(FLOAD any IRFL_SBUF_W) | ||
2313 | LJFOLD(FLOAD any IRFL_SBUF_E) | ||
2314 | LJFOLD(FLOAD any IRFL_SBUF_B) | ||
2315 | LJFOLD(FLOAD any IRFL_SBUF_L) | ||
2316 | LJFOLD(FLOAD any IRFL_SBUF_REF) | ||
2317 | LJFOLD(FLOAD any IRFL_SBUF_R) | ||
2318 | LJFOLDF(fload_sbuf) | ||
2319 | { | ||
2320 | TRef tr = lj_opt_fwd_fload(J); | ||
2321 | return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD; | ||
2322 | } | ||
2323 | |||
2324 | /* The fast function ID of function objects is immutable. */ | ||
2325 | LJFOLD(FLOAD KGC IRFL_FUNC_FFID) | ||
2326 | LJFOLDF(fload_func_ffid_kgc) | ||
2327 | { | ||
2328 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) | ||
2329 | return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid); | ||
2330 | return NEXTFOLD; | ||
2331 | } | ||
2332 | |||
2019 | /* The C type ID of cdata objects is immutable. */ | 2333 | /* The C type ID of cdata objects is immutable. */ |
2020 | LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) | 2334 | LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) |
2021 | LJFOLDF(fload_cdata_typeid_kgc) | 2335 | LJFOLDF(fload_cdata_typeid_kgc) |
@@ -2062,6 +2376,8 @@ LJFOLDF(fload_cdata_ptr_int64_cnew) | |||
2062 | } | 2376 | } |
2063 | 2377 | ||
2064 | LJFOLD(FLOAD any IRFL_STR_LEN) | 2378 | LJFOLD(FLOAD any IRFL_STR_LEN) |
2379 | LJFOLD(FLOAD any IRFL_FUNC_ENV) | ||
2380 | LJFOLD(FLOAD any IRFL_THREAD_ENV) | ||
2065 | LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) | 2381 | LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) |
2066 | LJFOLD(FLOAD any IRFL_CDATA_PTR) | 2382 | LJFOLD(FLOAD any IRFL_CDATA_PTR) |
2067 | LJFOLD(FLOAD any IRFL_CDATA_INT) | 2383 | LJFOLD(FLOAD any IRFL_CDATA_INT) |
@@ -2081,7 +2397,7 @@ LJFOLDF(fwd_sload) | |||
2081 | TRef tr = lj_opt_cse(J); | 2397 | TRef tr = lj_opt_cse(J); |
2082 | return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr; | 2398 | return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr; |
2083 | } else { | 2399 | } else { |
2084 | lua_assert(J->slot[fins->op1] != 0); | 2400 | lj_assertJ(J->slot[fins->op1] != 0, "uninitialized slot accessed"); |
2085 | return J->slot[fins->op1]; | 2401 | return J->slot[fins->op1]; |
2086 | } | 2402 | } |
2087 | } | 2403 | } |
@@ -2138,6 +2454,17 @@ LJFOLDF(barrier_tnew_tdup) | |||
2138 | return DROPFOLD; | 2454 | return DROPFOLD; |
2139 | } | 2455 | } |
2140 | 2456 | ||
2457 | /* -- Profiling ----------------------------------------------------------- */ | ||
2458 | |||
2459 | LJFOLD(PROF any any) | ||
2460 | LJFOLDF(prof) | ||
2461 | { | ||
2462 | IRRef ref = J->chain[IR_PROF]; | ||
2463 | if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */ | ||
2464 | return ref; | ||
2465 | return EMITFOLD; | ||
2466 | } | ||
2467 | |||
2141 | /* -- Stores and allocations ---------------------------------------------- */ | 2468 | /* -- Stores and allocations ---------------------------------------------- */ |
2142 | 2469 | ||
2143 | /* Stores and allocations cannot be folded or passed on to CSE in general. | 2470 | /* Stores and allocations cannot be folded or passed on to CSE in general. |
@@ -2160,8 +2487,10 @@ LJFOLD(XSTORE any any) | |||
2160 | LJFOLDX(lj_opt_dse_xstore) | 2487 | LJFOLDX(lj_opt_dse_xstore) |
2161 | 2488 | ||
2162 | LJFOLD(NEWREF any any) /* Treated like a store. */ | 2489 | LJFOLD(NEWREF any any) /* Treated like a store. */ |
2163 | LJFOLD(CALLS any any) | 2490 | LJFOLD(TMPREF any any) |
2491 | LJFOLD(CALLA any any) | ||
2164 | LJFOLD(CALLL any any) /* Safeguard fallback. */ | 2492 | LJFOLD(CALLL any any) /* Safeguard fallback. */ |
2493 | LJFOLD(CALLS any any) | ||
2165 | LJFOLD(CALLXS any any) | 2494 | LJFOLD(CALLXS any any) |
2166 | LJFOLD(XBAR) | 2495 | LJFOLD(XBAR) |
2167 | LJFOLD(RETF any any) /* Modifies BASE. */ | 2496 | LJFOLD(RETF any any) /* Modifies BASE. */ |
@@ -2194,8 +2523,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J) | |||
2194 | IRRef ref; | 2523 | IRRef ref; |
2195 | 2524 | ||
2196 | if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) { | 2525 | if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) { |
2197 | lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | | 2526 | lj_assertJ(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | |
2198 | JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT); | 2527 | JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT, |
2528 | "bad JIT_F_OPT_DEFAULT"); | ||
2199 | /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */ | 2529 | /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */ |
2200 | if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N) | 2530 | if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N) |
2201 | return lj_opt_cse(J); | 2531 | return lj_opt_cse(J); |
@@ -2220,10 +2550,14 @@ retry: | |||
2220 | if (fins->op1 >= J->cur.nk) { | 2550 | if (fins->op1 >= J->cur.nk) { |
2221 | key += (uint32_t)IR(fins->op1)->o << 10; | 2551 | key += (uint32_t)IR(fins->op1)->o << 10; |
2222 | *fleft = *IR(fins->op1); | 2552 | *fleft = *IR(fins->op1); |
2553 | if (fins->op1 < REF_TRUE) | ||
2554 | fleft[1] = IR(fins->op1)[1]; | ||
2223 | } | 2555 | } |
2224 | if (fins->op2 >= J->cur.nk) { | 2556 | if (fins->op2 >= J->cur.nk) { |
2225 | key += (uint32_t)IR(fins->op2)->o; | 2557 | key += (uint32_t)IR(fins->op2)->o; |
2226 | *fright = *IR(fins->op2); | 2558 | *fright = *IR(fins->op2); |
2559 | if (fins->op2 < REF_TRUE) | ||
2560 | fright[1] = IR(fins->op2)[1]; | ||
2227 | } else { | 2561 | } else { |
2228 | key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ | 2562 | key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ |
2229 | } | 2563 | } |
@@ -2253,7 +2587,7 @@ retry: | |||
2253 | return lj_ir_kint(J, fins->i); | 2587 | return lj_ir_kint(J, fins->i); |
2254 | if (ref == FAILFOLD) | 2588 | if (ref == FAILFOLD) |
2255 | lj_trace_err(J, LJ_TRERR_GFAIL); | 2589 | lj_trace_err(J, LJ_TRERR_GFAIL); |
2256 | lua_assert(ref == DROPFOLD); | 2590 | lj_assertJ(ref == DROPFOLD, "bad fold result"); |
2257 | return REF_DROP; | 2591 | return REF_DROP; |
2258 | } | 2592 | } |
2259 | 2593 | ||
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index 466f88de..ee3ee049 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c | |||
@@ -11,7 +11,7 @@ | |||
11 | #if LJ_HASJIT | 11 | #if LJ_HASJIT |
12 | 12 | ||
13 | #include "lj_err.h" | 13 | #include "lj_err.h" |
14 | #include "lj_str.h" | 14 | #include "lj_buf.h" |
15 | #include "lj_ir.h" | 15 | #include "lj_ir.h" |
16 | #include "lj_jit.h" | 16 | #include "lj_jit.h" |
17 | #include "lj_iropt.h" | 17 | #include "lj_iropt.h" |
@@ -225,6 +225,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap, | |||
225 | /* Setup new snapshot. */ | 225 | /* Setup new snapshot. */ |
226 | snap->mapofs = (uint32_t)nmapofs; | 226 | snap->mapofs = (uint32_t)nmapofs; |
227 | snap->ref = (IRRef1)J->cur.nins; | 227 | snap->ref = (IRRef1)J->cur.nins; |
228 | snap->mcofs = 0; | ||
228 | snap->nslots = nslots; | 229 | snap->nslots = nslots; |
229 | snap->topslot = osnap->topslot; | 230 | snap->topslot = osnap->topslot; |
230 | snap->count = 0; | 231 | snap->count = 0; |
@@ -254,9 +255,16 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap, | |||
254 | J->cur.nsnapmap = (uint32_t)(nmap - J->cur.snapmap); | 255 | J->cur.nsnapmap = (uint32_t)(nmap - J->cur.snapmap); |
255 | } | 256 | } |
256 | 257 | ||
258 | typedef struct LoopState { | ||
259 | jit_State *J; | ||
260 | IRRef1 *subst; | ||
261 | MSize sizesubst; | ||
262 | } LoopState; | ||
263 | |||
257 | /* Unroll loop. */ | 264 | /* Unroll loop. */ |
258 | static void loop_unroll(jit_State *J) | 265 | static void loop_unroll(LoopState *lps) |
259 | { | 266 | { |
267 | jit_State *J = lps->J; | ||
260 | IRRef1 phi[LJ_MAX_PHI]; | 268 | IRRef1 phi[LJ_MAX_PHI]; |
261 | uint32_t nphi = 0; | 269 | uint32_t nphi = 0; |
262 | IRRef1 *subst; | 270 | IRRef1 *subst; |
@@ -265,13 +273,13 @@ static void loop_unroll(jit_State *J) | |||
265 | SnapEntry *loopmap, *psentinel; | 273 | SnapEntry *loopmap, *psentinel; |
266 | IRRef ins, invar; | 274 | IRRef ins, invar; |
267 | 275 | ||
268 | /* Use temp buffer for substitution table. | 276 | /* Allocate substitution table. |
269 | ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. | 277 | ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. |
270 | ** Caveat: don't call into the VM or run the GC or the buffer may be gone. | ||
271 | */ | 278 | */ |
272 | invar = J->cur.nins; | 279 | invar = J->cur.nins; |
273 | subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, | 280 | lps->sizesubst = invar - REF_BIAS; |
274 | (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS; | 281 | lps->subst = lj_mem_newvec(J->L, lps->sizesubst, IRRef1); |
282 | subst = lps->subst - REF_BIAS; | ||
275 | subst[REF_BASE] = REF_BASE; | 283 | subst[REF_BASE] = REF_BASE; |
276 | 284 | ||
277 | /* LOOP separates the pre-roll from the loop body. */ | 285 | /* LOOP separates the pre-roll from the loop body. */ |
@@ -292,7 +300,8 @@ static void loop_unroll(jit_State *J) | |||
292 | loopmap = &J->cur.snapmap[loopsnap->mapofs]; | 300 | loopmap = &J->cur.snapmap[loopsnap->mapofs]; |
293 | /* The PC of snapshot #0 and the loop snapshot must match. */ | 301 | /* The PC of snapshot #0 and the loop snapshot must match. */ |
294 | psentinel = &loopmap[loopsnap->nent]; | 302 | psentinel = &loopmap[loopsnap->nent]; |
295 | lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]); | 303 | lj_assertJ(*psentinel == J->cur.snapmap[J->cur.snap[0].nent], |
304 | "mismatched PC for loop snapshot"); | ||
296 | *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */ | 305 | *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */ |
297 | 306 | ||
298 | /* Start substitution with snapshot #1 (#0 is empty for root traces). */ | 307 | /* Start substitution with snapshot #1 (#0 is empty for root traces). */ |
@@ -345,10 +354,12 @@ static void loop_unroll(jit_State *J) | |||
345 | irr = IR(ref); | 354 | irr = IR(ref); |
346 | goto phiconv; | 355 | goto phiconv; |
347 | } | 356 | } |
348 | } else if (ref != REF_DROP && irr->o == IR_CONV && | 357 | } else if (ref != REF_DROP && ref > invar && |
349 | ref > invar && irr->op1 < invar) { | 358 | ((irr->o == IR_CONV && irr->op1 < invar) || |
350 | /* May need an extra PHI for a CONV. */ | 359 | (irr->o == IR_ALEN && irr->op2 < invar && |
351 | ref = irr->op1; | 360 | irr->op2 != REF_NIL))) { |
361 | /* May need an extra PHI for a CONV or ALEN hint. */ | ||
362 | ref = irr->o == IR_CONV ? irr->op1 : irr->op2; | ||
352 | irr = IR(ref); | 363 | irr = IR(ref); |
353 | phiconv: | 364 | phiconv: |
354 | if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) { | 365 | if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) { |
@@ -363,7 +374,7 @@ static void loop_unroll(jit_State *J) | |||
363 | } | 374 | } |
364 | if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */ | 375 | if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */ |
365 | J->cur.nsnapmap = (uint32_t)J->cur.snap[--J->cur.nsnap].mapofs; | 376 | J->cur.nsnapmap = (uint32_t)J->cur.snap[--J->cur.nsnap].mapofs; |
366 | lua_assert(J->cur.nsnapmap <= J->sizesnapmap); | 377 | lj_assertJ(J->cur.nsnapmap <= J->sizesnapmap, "bad snapshot map index"); |
367 | *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */ | 378 | *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */ |
368 | 379 | ||
369 | loop_emit_phi(J, subst, phi, nphi, onsnap); | 380 | loop_emit_phi(J, subst, phi, nphi, onsnap); |
@@ -396,7 +407,7 @@ static void loop_undo(jit_State *J, IRRef ins, SnapNo nsnap, MSize nsnapmap) | |||
396 | static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud) | 407 | static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud) |
397 | { | 408 | { |
398 | UNUSED(L); UNUSED(dummy); | 409 | UNUSED(L); UNUSED(dummy); |
399 | loop_unroll((jit_State *)ud); | 410 | loop_unroll((LoopState *)ud); |
400 | return NULL; | 411 | return NULL; |
401 | } | 412 | } |
402 | 413 | ||
@@ -406,7 +417,13 @@ int lj_opt_loop(jit_State *J) | |||
406 | IRRef nins = J->cur.nins; | 417 | IRRef nins = J->cur.nins; |
407 | SnapNo nsnap = J->cur.nsnap; | 418 | SnapNo nsnap = J->cur.nsnap; |
408 | MSize nsnapmap = J->cur.nsnapmap; | 419 | MSize nsnapmap = J->cur.nsnapmap; |
409 | int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); | 420 | LoopState lps; |
421 | int errcode; | ||
422 | lps.J = J; | ||
423 | lps.subst = NULL; | ||
424 | lps.sizesubst = 0; | ||
425 | errcode = lj_vm_cpcall(J->L, NULL, &lps, cploop_opt); | ||
426 | lj_mem_freevec(J2G(J), lps.subst, lps.sizesubst, IRRef1); | ||
410 | if (LJ_UNLIKELY(errcode)) { | 427 | if (LJ_UNLIKELY(errcode)) { |
411 | lua_State *L = J->L; | 428 | lua_State *L = J->L; |
412 | if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */ | 429 | if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */ |
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index feec6bb7..cafa0523 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c | |||
@@ -17,12 +17,14 @@ | |||
17 | #include "lj_ir.h" | 17 | #include "lj_ir.h" |
18 | #include "lj_jit.h" | 18 | #include "lj_jit.h" |
19 | #include "lj_iropt.h" | 19 | #include "lj_iropt.h" |
20 | #include "lj_ircall.h" | ||
21 | #include "lj_dispatch.h" | ||
20 | 22 | ||
21 | /* Some local macros to save typing. Undef'd at the end. */ | 23 | /* Some local macros to save typing. Undef'd at the end. */ |
22 | #define IR(ref) (&J->cur.ir[(ref)]) | 24 | #define IR(ref) (&J->cur.ir[(ref)]) |
23 | #define fins (&J->fold.ins) | 25 | #define fins (&J->fold.ins) |
24 | #define fleft (&J->fold.left) | 26 | #define fleft (J->fold.left) |
25 | #define fright (&J->fold.right) | 27 | #define fright (J->fold.right) |
26 | 28 | ||
27 | /* | 29 | /* |
28 | ** Caveat #1: return value is not always a TRef -- only use with tref_ref(). | 30 | ** Caveat #1: return value is not always a TRef -- only use with tref_ref(). |
@@ -55,8 +57,8 @@ static AliasRet aa_table(jit_State *J, IRRef ta, IRRef tb) | |||
55 | { | 57 | { |
56 | IRIns *taba = IR(ta), *tabb = IR(tb); | 58 | IRIns *taba = IR(ta), *tabb = IR(tb); |
57 | int newa, newb; | 59 | int newa, newb; |
58 | lua_assert(ta != tb); | 60 | lj_assertJ(ta != tb, "bad usage"); |
59 | lua_assert(irt_istab(taba->t) && irt_istab(tabb->t)); | 61 | lj_assertJ(irt_istab(taba->t) && irt_istab(tabb->t), "bad usage"); |
60 | /* Disambiguate new allocations. */ | 62 | /* Disambiguate new allocations. */ |
61 | newa = (taba->o == IR_TNEW || taba->o == IR_TDUP); | 63 | newa = (taba->o == IR_TNEW || taba->o == IR_TDUP); |
62 | newb = (tabb->o == IR_TNEW || tabb->o == IR_TDUP); | 64 | newb = (tabb->o == IR_TNEW || tabb->o == IR_TDUP); |
@@ -70,6 +72,34 @@ static AliasRet aa_table(jit_State *J, IRRef ta, IRRef tb) | |||
70 | return aa_escape(J, taba, tabb); | 72 | return aa_escape(J, taba, tabb); |
71 | } | 73 | } |
72 | 74 | ||
75 | /* Check whether there's no aliasing table.clear. */ | ||
76 | static int fwd_aa_tab_clear(jit_State *J, IRRef lim, IRRef ta) | ||
77 | { | ||
78 | IRRef ref = J->chain[IR_CALLS]; | ||
79 | while (ref > lim) { | ||
80 | IRIns *calls = IR(ref); | ||
81 | if (calls->op2 == IRCALL_lj_tab_clear && | ||
82 | (ta == calls->op1 || aa_table(J, ta, calls->op1) != ALIAS_NO)) | ||
83 | return 0; /* Conflict. */ | ||
84 | ref = calls->prev; | ||
85 | } | ||
86 | return 1; /* No conflict. Can safely FOLD/CSE. */ | ||
87 | } | ||
88 | |||
89 | /* Check whether there's no aliasing NEWREF/table.clear for the left operand. */ | ||
90 | int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim) | ||
91 | { | ||
92 | IRRef ta = fins->op1; | ||
93 | IRRef ref = J->chain[IR_NEWREF]; | ||
94 | while (ref > lim) { | ||
95 | IRIns *newref = IR(ref); | ||
96 | if (ta == newref->op1 || aa_table(J, ta, newref->op1) != ALIAS_NO) | ||
97 | return 0; /* Conflict. */ | ||
98 | ref = newref->prev; | ||
99 | } | ||
100 | return fwd_aa_tab_clear(J, lim, ta); | ||
101 | } | ||
102 | |||
73 | /* Alias analysis for array and hash access using key-based disambiguation. */ | 103 | /* Alias analysis for array and hash access using key-based disambiguation. */ |
74 | static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) | 104 | static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) |
75 | { | 105 | { |
@@ -98,7 +128,7 @@ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) | |||
98 | /* Disambiguate array references based on index arithmetic. */ | 128 | /* Disambiguate array references based on index arithmetic. */ |
99 | int32_t ofsa = 0, ofsb = 0; | 129 | int32_t ofsa = 0, ofsb = 0; |
100 | IRRef basea = ka, baseb = kb; | 130 | IRRef basea = ka, baseb = kb; |
101 | lua_assert(refb->o == IR_AREF); | 131 | lj_assertJ(refb->o == IR_AREF, "expected AREF"); |
102 | /* Gather base and offset from t[base] or t[base+-ofs]. */ | 132 | /* Gather base and offset from t[base] or t[base+-ofs]. */ |
103 | if (keya->o == IR_ADD && irref_isk(keya->op2)) { | 133 | if (keya->o == IR_ADD && irref_isk(keya->op2)) { |
104 | basea = keya->op1; | 134 | basea = keya->op1; |
@@ -116,8 +146,9 @@ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) | |||
116 | return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */ | 146 | return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */ |
117 | } else { | 147 | } else { |
118 | /* Disambiguate hash references based on the type of their keys. */ | 148 | /* Disambiguate hash references based on the type of their keys. */ |
119 | lua_assert((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) && | 149 | lj_assertJ((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) && |
120 | (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF)); | 150 | (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF), |
151 | "bad xREF IR op %d or %d", refa->o, refb->o); | ||
121 | if (!irt_sametype(keya->t, keyb->t)) | 152 | if (!irt_sametype(keya->t, keyb->t)) |
122 | return ALIAS_NO; /* Different key types. */ | 153 | return ALIAS_NO; /* Different key types. */ |
123 | } | 154 | } |
@@ -151,7 +182,8 @@ static TRef fwd_ahload(jit_State *J, IRRef xref) | |||
151 | IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr; | 182 | IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr; |
152 | IRRef tab = ir->op1; | 183 | IRRef tab = ir->op1; |
153 | ir = IR(tab); | 184 | ir = IR(tab); |
154 | if (ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) { | 185 | if ((ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) && |
186 | fwd_aa_tab_clear(J, tab, tab)) { | ||
155 | /* A NEWREF with a number key may end up pointing to the array part. | 187 | /* A NEWREF with a number key may end up pointing to the array part. |
156 | ** But it's referenced from HSTORE and not found in the ASTORE chain. | 188 | ** But it's referenced from HSTORE and not found in the ASTORE chain. |
157 | ** For now simply consider this a conflict without forwarding anything. | 189 | ** For now simply consider this a conflict without forwarding anything. |
@@ -191,7 +223,8 @@ static TRef fwd_ahload(jit_State *J, IRRef xref) | |||
191 | if (key->o == IR_KSLOT) key = IR(key->op1); | 223 | if (key->o == IR_KSLOT) key = IR(key->op1); |
192 | lj_ir_kvalue(J->L, &keyv, key); | 224 | lj_ir_kvalue(J->L, &keyv, key); |
193 | tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv); | 225 | tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv); |
194 | lua_assert(itype2irt(tv) == irt_type(fins->t)); | 226 | lj_assertJ(itype2irt(tv) == irt_type(fins->t), |
227 | "mismatched type in constant table"); | ||
195 | if (irt_isnum(fins->t)) | 228 | if (irt_isnum(fins->t)) |
196 | return lj_ir_knum_u64(J, tv->u64); | 229 | return lj_ir_knum_u64(J, tv->u64); |
197 | else if (LJ_DUALNUM && irt_isint(fins->t)) | 230 | else if (LJ_DUALNUM && irt_isint(fins->t)) |
@@ -265,7 +298,7 @@ TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J) | |||
265 | while (ref > tab) { | 298 | while (ref > tab) { |
266 | IRIns *newref = IR(ref); | 299 | IRIns *newref = IR(ref); |
267 | if (tab == newref->op1) { | 300 | if (tab == newref->op1) { |
268 | if (fright->op1 == newref->op2) | 301 | if (fright->op1 == newref->op2 && fwd_aa_tab_clear(J, ref, tab)) |
269 | return ref; /* Forward from NEWREF. */ | 302 | return ref; /* Forward from NEWREF. */ |
270 | else | 303 | else |
271 | goto docse; | 304 | goto docse; |
@@ -275,7 +308,7 @@ TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J) | |||
275 | ref = newref->prev; | 308 | ref = newref->prev; |
276 | } | 309 | } |
277 | /* No conflicting NEWREF: key location unchanged for HREFK of TDUP. */ | 310 | /* No conflicting NEWREF: key location unchanged for HREFK of TDUP. */ |
278 | if (IR(tab)->o == IR_TDUP) | 311 | if (IR(tab)->o == IR_TDUP && fwd_aa_tab_clear(J, tab, tab)) |
279 | fins->t.irt &= ~IRT_GUARD; /* Drop HREFK guard. */ | 312 | fins->t.irt &= ~IRT_GUARD; /* Drop HREFK guard. */ |
280 | docse: | 313 | docse: |
281 | return CSEFOLD; | 314 | return CSEFOLD; |
@@ -309,20 +342,6 @@ int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J) | |||
309 | return 1; /* No conflict. Can fold to niltv. */ | 342 | return 1; /* No conflict. Can fold to niltv. */ |
310 | } | 343 | } |
311 | 344 | ||
312 | /* Check whether there's no aliasing NEWREF for the left operand. */ | ||
313 | int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim) | ||
314 | { | ||
315 | IRRef ta = fins->op1; | ||
316 | IRRef ref = J->chain[IR_NEWREF]; | ||
317 | while (ref > lim) { | ||
318 | IRIns *newref = IR(ref); | ||
319 | if (ta == newref->op1 || aa_table(J, ta, newref->op1) != ALIAS_NO) | ||
320 | return 0; /* Conflict. */ | ||
321 | ref = newref->prev; | ||
322 | } | ||
323 | return 1; /* No conflict. Can safely FOLD/CSE. */ | ||
324 | } | ||
325 | |||
326 | /* ASTORE/HSTORE elimination. */ | 345 | /* ASTORE/HSTORE elimination. */ |
327 | TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J) | 346 | TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J) |
328 | { | 347 | { |
@@ -346,9 +365,12 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J) | |||
346 | /* Different value: try to eliminate the redundant store. */ | 365 | /* Different value: try to eliminate the redundant store. */ |
347 | if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ | 366 | if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ |
348 | IRIns *ir; | 367 | IRIns *ir; |
349 | /* Check for any intervening guards (includes conflicting loads). */ | 368 | /* Check for any intervening guards (includes conflicting loads). |
369 | ** Note that lj_tab_keyindex and lj_vm_next don't need guards, | ||
370 | ** since they are followed by at least one guarded VLOAD. | ||
371 | */ | ||
350 | for (ir = IR(J->cur.nins-1); ir > store; ir--) | 372 | for (ir = IR(J->cur.nins-1); ir > store; ir--) |
351 | if (irt_isguard(ir->t) || ir->o == IR_CALLL) | 373 | if (irt_isguard(ir->t) || ir->o == IR_ALEN) |
352 | goto doemit; /* No elimination possible. */ | 374 | goto doemit; /* No elimination possible. */ |
353 | /* Remove redundant store from chain and replace with NOP. */ | 375 | /* Remove redundant store from chain and replace with NOP. */ |
354 | *refp = store->prev; | 376 | *refp = store->prev; |
@@ -363,6 +385,67 @@ doemit: | |||
363 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ | 385 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ |
364 | } | 386 | } |
365 | 387 | ||
388 | /* ALEN forwarding. */ | ||
389 | TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J) | ||
390 | { | ||
391 | IRRef tab = fins->op1; /* Table reference. */ | ||
392 | IRRef lim = tab; /* Search limit. */ | ||
393 | IRRef ref; | ||
394 | |||
395 | /* Search for conflicting HSTORE with numeric key. */ | ||
396 | ref = J->chain[IR_HSTORE]; | ||
397 | while (ref > lim) { | ||
398 | IRIns *store = IR(ref); | ||
399 | IRIns *href = IR(store->op1); | ||
400 | IRIns *key = IR(href->op2); | ||
401 | if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) { | ||
402 | lim = ref; /* Conflicting store found, limits search for ALEN. */ | ||
403 | break; | ||
404 | } | ||
405 | ref = store->prev; | ||
406 | } | ||
407 | |||
408 | /* Try to find a matching ALEN. */ | ||
409 | ref = J->chain[IR_ALEN]; | ||
410 | while (ref > lim) { | ||
411 | /* CSE for ALEN only depends on the table, not the hint. */ | ||
412 | if (IR(ref)->op1 == tab) { | ||
413 | IRRef sref; | ||
414 | |||
415 | /* Search for aliasing table.clear. */ | ||
416 | if (!fwd_aa_tab_clear(J, ref, tab)) | ||
417 | break; | ||
418 | |||
419 | /* Search for hint-forwarding or conflicting store. */ | ||
420 | sref = J->chain[IR_ASTORE]; | ||
421 | while (sref > ref) { | ||
422 | IRIns *store = IR(sref); | ||
423 | IRIns *aref = IR(store->op1); | ||
424 | IRIns *fref = IR(aref->op1); | ||
425 | if (tab == fref->op1) { /* ASTORE to the same table. */ | ||
426 | /* Detect t[#t+1] = x idiom for push. */ | ||
427 | IRIns *idx = IR(aref->op2); | ||
428 | if (!irt_isnil(store->t) && | ||
429 | idx->o == IR_ADD && idx->op1 == ref && | ||
430 | IR(idx->op2)->o == IR_KINT && IR(idx->op2)->i == 1) { | ||
431 | /* Note: this requires an extra PHI check in loop unroll. */ | ||
432 | fins->op2 = aref->op2; /* Set ALEN hint. */ | ||
433 | } | ||
434 | goto doemit; /* Conflicting store, possibly giving a hint. */ | ||
435 | } else if (aa_table(J, tab, fref->op1) == ALIAS_NO) { | ||
436 | goto doemit; /* Conflicting store. */ | ||
437 | } | ||
438 | sref = store->prev; | ||
439 | } | ||
440 | |||
441 | return ref; /* Plain ALEN forwarding. */ | ||
442 | } | ||
443 | ref = IR(ref)->prev; | ||
444 | } | ||
445 | doemit: | ||
446 | return EMITFOLD; | ||
447 | } | ||
448 | |||
366 | /* -- ULOAD forwarding ---------------------------------------------------- */ | 449 | /* -- ULOAD forwarding ---------------------------------------------------- */ |
367 | 450 | ||
368 | /* The current alias analysis for upvalues is very simplistic. It only | 451 | /* The current alias analysis for upvalues is very simplistic. It only |
@@ -412,7 +495,6 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J) | |||
412 | 495 | ||
413 | cselim: | 496 | cselim: |
414 | /* Try to find a matching load. Below the conflicting store, if any. */ | 497 | /* Try to find a matching load. Below the conflicting store, if any. */ |
415 | |||
416 | ref = J->chain[IR_ULOAD]; | 498 | ref = J->chain[IR_ULOAD]; |
417 | while (ref > lim) { | 499 | while (ref > lim) { |
418 | IRIns *ir = IR(ref); | 500 | IRIns *ir = IR(ref); |
@@ -542,8 +624,9 @@ TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J) | |||
542 | goto doemit; | 624 | goto doemit; |
543 | break; /* Otherwise continue searching. */ | 625 | break; /* Otherwise continue searching. */ |
544 | case ALIAS_MUST: | 626 | case ALIAS_MUST: |
545 | if (store->op2 == val) /* Same value: drop the new store. */ | 627 | if (store->op2 == val && |
546 | return DROPFOLD; | 628 | !(xr->op2 >= IRFL_SBUF_W && xr->op2 <= IRFL_SBUF_R)) |
629 | return DROPFOLD; /* Same value: drop the new store. */ | ||
547 | /* Different value: try to eliminate the redundant store. */ | 630 | /* Different value: try to eliminate the redundant store. */ |
548 | if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ | 631 | if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ |
549 | IRIns *ir; | 632 | IRIns *ir; |
@@ -564,6 +647,29 @@ doemit: | |||
564 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ | 647 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ |
565 | } | 648 | } |
566 | 649 | ||
650 | /* Check whether there's no aliasing buffer op between IRFL_SBUF_*. */ | ||
651 | int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim) | ||
652 | { | ||
653 | IRRef ref; | ||
654 | if (J->chain[IR_BUFPUT] > lim) | ||
655 | return 0; /* Conflict. */ | ||
656 | ref = J->chain[IR_CALLS]; | ||
657 | while (ref > lim) { | ||
658 | IRIns *ir = IR(ref); | ||
659 | if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr) | ||
660 | return 0; /* Conflict. */ | ||
661 | ref = ir->prev; | ||
662 | } | ||
663 | ref = J->chain[IR_CALLL]; | ||
664 | while (ref > lim) { | ||
665 | IRIns *ir = IR(ref); | ||
666 | if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr) | ||
667 | return 0; /* Conflict. */ | ||
668 | ref = ir->prev; | ||
669 | } | ||
670 | return 1; /* No conflict. Can safely FOLD/CSE. */ | ||
671 | } | ||
672 | |||
567 | /* -- XLOAD forwarding and XSTORE elimination ----------------------------- */ | 673 | /* -- XLOAD forwarding and XSTORE elimination ----------------------------- */ |
568 | 674 | ||
569 | /* Find cdata allocation for a reference (if any). */ | 675 | /* Find cdata allocation for a reference (if any). */ |
@@ -815,35 +921,6 @@ doemit: | |||
815 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ | 921 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ |
816 | } | 922 | } |
817 | 923 | ||
818 | /* -- Forwarding of lj_tab_len -------------------------------------------- */ | ||
819 | |||
820 | /* This is rather simplistic right now, but better than nothing. */ | ||
821 | TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J) | ||
822 | { | ||
823 | IRRef tab = fins->op1; /* Table reference. */ | ||
824 | IRRef lim = tab; /* Search limit. */ | ||
825 | IRRef ref; | ||
826 | |||
827 | /* Any ASTORE is a conflict and limits the search. */ | ||
828 | if (J->chain[IR_ASTORE] > lim) lim = J->chain[IR_ASTORE]; | ||
829 | |||
830 | /* Search for conflicting HSTORE with numeric key. */ | ||
831 | ref = J->chain[IR_HSTORE]; | ||
832 | while (ref > lim) { | ||
833 | IRIns *store = IR(ref); | ||
834 | IRIns *href = IR(store->op1); | ||
835 | IRIns *key = IR(href->op2); | ||
836 | if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) { | ||
837 | lim = ref; /* Conflicting store found, limits search for TLEN. */ | ||
838 | break; | ||
839 | } | ||
840 | ref = store->prev; | ||
841 | } | ||
842 | |||
843 | /* Try to find a matching load. Below the conflicting store, if any. */ | ||
844 | return lj_opt_cselim(J, lim); | ||
845 | } | ||
846 | |||
847 | /* -- ASTORE/HSTORE previous type analysis -------------------------------- */ | 924 | /* -- ASTORE/HSTORE previous type analysis -------------------------------- */ |
848 | 925 | ||
849 | /* Check whether the previous value for a table store is non-nil. | 926 | /* Check whether the previous value for a table store is non-nil. |
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 34fe6c39..57b19613 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c | |||
@@ -372,17 +372,17 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) | |||
372 | } else if (op == NARROW_CONV) { | 372 | } else if (op == NARROW_CONV) { |
373 | *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ | 373 | *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ |
374 | } else if (op == NARROW_SEXT) { | 374 | } else if (op == NARROW_SEXT) { |
375 | lua_assert(sp >= nc->stack+1); | 375 | lj_assertJ(sp >= nc->stack+1, "stack underflow"); |
376 | sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1], | 376 | sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1], |
377 | (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); | 377 | (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); |
378 | } else if (op == NARROW_INT) { | 378 | } else if (op == NARROW_INT) { |
379 | lua_assert(next < last); | 379 | lj_assertJ(next < last, "missing arg to NARROW_INT"); |
380 | *sp++ = nc->t == IRT_I64 ? | 380 | *sp++ = nc->t == IRT_I64 ? |
381 | lj_ir_kint64(J, (int64_t)(int32_t)*next++) : | 381 | lj_ir_kint64(J, (int64_t)(int32_t)*next++) : |
382 | lj_ir_kint(J, *next++); | 382 | lj_ir_kint(J, *next++); |
383 | } else { /* Regular IROpT. Pops two operands and pushes one result. */ | 383 | } else { /* Regular IROpT. Pops two operands and pushes one result. */ |
384 | IRRef mode = nc->mode; | 384 | IRRef mode = nc->mode; |
385 | lua_assert(sp >= nc->stack+2); | 385 | lj_assertJ(sp >= nc->stack+2, "stack underflow"); |
386 | sp--; | 386 | sp--; |
387 | /* Omit some overflow checks for array indexing. See comments above. */ | 387 | /* Omit some overflow checks for array indexing. See comments above. */ |
388 | if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { | 388 | if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { |
@@ -398,7 +398,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) | |||
398 | narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode); | 398 | narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode); |
399 | } | 399 | } |
400 | } | 400 | } |
401 | lua_assert(sp == nc->stack+1); | 401 | lj_assertJ(sp == nc->stack+1, "stack misalignment"); |
402 | return nc->stack[0]; | 402 | return nc->stack[0]; |
403 | } | 403 | } |
404 | 404 | ||
@@ -452,7 +452,7 @@ static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode) | |||
452 | TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr) | 452 | TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr) |
453 | { | 453 | { |
454 | IRIns *ir; | 454 | IRIns *ir; |
455 | lua_assert(tref_isnumber(tr)); | 455 | lj_assertJ(tref_isnumber(tr), "expected number type"); |
456 | if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ | 456 | if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ |
457 | return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX); | 457 | return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX); |
458 | /* Omit some overflow checks for array indexing. See comments above. */ | 458 | /* Omit some overflow checks for array indexing. See comments above. */ |
@@ -499,7 +499,7 @@ TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr) | |||
499 | /* Narrow C array index (overflow undefined). */ | 499 | /* Narrow C array index (overflow undefined). */ |
500 | TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) | 500 | TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) |
501 | { | 501 | { |
502 | lua_assert(tref_isnumber(tr)); | 502 | lj_assertJ(tref_isnumber(tr), "expected number type"); |
503 | if (tref_isnum(tr)) | 503 | if (tref_isnum(tr)) |
504 | return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY); | 504 | return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY); |
505 | /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ | 505 | /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ |
@@ -551,11 +551,16 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc) | |||
551 | { | 551 | { |
552 | rc = conv_str_tonum(J, rc, vc); | 552 | rc = conv_str_tonum(J, rc, vc); |
553 | if (tref_isinteger(rc)) { | 553 | if (tref_isinteger(rc)) { |
554 | if ((uint32_t)numberVint(vc) != 0x80000000u) | 554 | uint32_t k = (uint32_t)numberVint(vc); |
555 | return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc); | 555 | if ((LJ_DUALNUM || k != 0) && k != 0x80000000u) { |
556 | TRef zero = lj_ir_kint(J, 0); | ||
557 | if (!LJ_DUALNUM) | ||
558 | emitir(IRTGI(IR_NE), rc, zero); | ||
559 | return emitir(IRTGI(IR_SUBOV), zero, rc); | ||
560 | } | ||
556 | rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); | 561 | rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); |
557 | } | 562 | } |
558 | return emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J)); | 563 | return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG)); |
559 | } | 564 | } |
560 | 565 | ||
561 | /* Narrowing of modulo operator. */ | 566 | /* Narrowing of modulo operator. */ |
@@ -588,10 +593,10 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) | |||
588 | /* Narrowing must be unconditional to preserve (-x)^i semantics. */ | 593 | /* Narrowing must be unconditional to preserve (-x)^i semantics. */ |
589 | if (tvisint(vc) || numisint(numV(vc))) { | 594 | if (tvisint(vc) || numisint(numV(vc))) { |
590 | int checkrange = 0; | 595 | int checkrange = 0; |
591 | /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ | 596 | /* pow() is faster for bigger exponents. But do this only for (+k)^i. */ |
592 | if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { | 597 | if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { |
593 | int32_t k = numberVint(vc); | 598 | int32_t k = numberVint(vc); |
594 | if (!(k >= -65536 && k <= 65536)) goto split_pow; | 599 | if (!(k >= -65536 && k <= 65536)) goto force_pow_num; |
595 | checkrange = 1; | 600 | checkrange = 1; |
596 | } | 601 | } |
597 | if (!tref_isinteger(rc)) { | 602 | if (!tref_isinteger(rc)) { |
@@ -602,19 +607,11 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) | |||
602 | TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536)); | 607 | TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536)); |
603 | emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536)); | 608 | emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536)); |
604 | } | 609 | } |
605 | return emitir(IRTN(IR_POW), rb, rc); | 610 | } else { |
611 | force_pow_num: | ||
612 | rc = lj_ir_tonum(J, rc); /* Want POW(num, num), not POW(num, int). */ | ||
606 | } | 613 | } |
607 | split_pow: | 614 | return emitir(IRTN(IR_POW), rb, rc); |
608 | /* FOLD covers most cases, but some are easier to do here. */ | ||
609 | if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb))))) | ||
610 | return rb; /* 1 ^ x ==> 1 */ | ||
611 | rc = lj_ir_tonum(J, rc); | ||
612 | if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5) | ||
613 | return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT); /* x ^ 0.5 ==> sqrt(x) */ | ||
614 | /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */ | ||
615 | rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2); | ||
616 | rc = emitir(IRTN(IR_MUL), rb, rc); | ||
617 | return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2); | ||
618 | } | 615 | } |
619 | 616 | ||
620 | /* -- Predictive narrowing of induction variables ------------------------- */ | 617 | /* -- Predictive narrowing of induction variables ------------------------- */ |
@@ -630,9 +627,10 @@ static int narrow_forl(jit_State *J, cTValue *o) | |||
630 | /* Narrow the FORL index type by looking at the runtime values. */ | 627 | /* Narrow the FORL index type by looking at the runtime values. */ |
631 | IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv) | 628 | IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv) |
632 | { | 629 | { |
633 | lua_assert(tvisnumber(&tv[FORL_IDX]) && | 630 | lj_assertJ(tvisnumber(&tv[FORL_IDX]) && |
634 | tvisnumber(&tv[FORL_STOP]) && | 631 | tvisnumber(&tv[FORL_STOP]) && |
635 | tvisnumber(&tv[FORL_STEP])); | 632 | tvisnumber(&tv[FORL_STEP]), |
633 | "expected number types"); | ||
636 | /* Narrow only if the runtime values of start/stop/step are all integers. */ | 634 | /* Narrow only if the runtime values of start/stop/step are all integers. */ |
637 | if (narrow_forl(J, &tv[FORL_IDX]) && | 635 | if (narrow_forl(J, &tv[FORL_IDX]) && |
638 | narrow_forl(J, &tv[FORL_STOP]) && | 636 | narrow_forl(J, &tv[FORL_STOP]) && |
diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c index 784d9a1a..4b9008be 100644 --- a/src/lj_opt_sink.c +++ b/src/lj_opt_sink.c | |||
@@ -86,8 +86,7 @@ static void sink_mark_ins(jit_State *J) | |||
86 | switch (ir->o) { | 86 | switch (ir->o) { |
87 | case IR_BASE: | 87 | case IR_BASE: |
88 | return; /* Finished. */ | 88 | return; /* Finished. */ |
89 | case IR_CALLL: /* IRCALL_lj_tab_len */ | 89 | case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: case IR_ALEN: |
90 | case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: | ||
91 | irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */ | 90 | irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */ |
92 | break; | 91 | break; |
93 | case IR_FLOAD: | 92 | case IR_FLOAD: |
@@ -173,8 +172,8 @@ static void sink_remark_phi(jit_State *J) | |||
173 | /* Sweep instructions and tag sunken allocations and stores. */ | 172 | /* Sweep instructions and tag sunken allocations and stores. */ |
174 | static void sink_sweep_ins(jit_State *J) | 173 | static void sink_sweep_ins(jit_State *J) |
175 | { | 174 | { |
176 | IRIns *ir, *irfirst = IR(J->cur.nk); | 175 | IRIns *ir, *irbase = IR(REF_BASE); |
177 | for (ir = IR(J->cur.nins-1) ; ir >= irfirst; ir--) { | 176 | for (ir = IR(J->cur.nins-1) ; ir >= irbase; ir--) { |
178 | switch (ir->o) { | 177 | switch (ir->o) { |
179 | case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: { | 178 | case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: { |
180 | IRIns *ira = sink_checkalloc(J, ir); | 179 | IRIns *ira = sink_checkalloc(J, ir); |
@@ -224,6 +223,13 @@ static void sink_sweep_ins(jit_State *J) | |||
224 | break; | 223 | break; |
225 | } | 224 | } |
226 | } | 225 | } |
226 | for (ir = IR(J->cur.nk); ir < irbase; ir++) { | ||
227 | irt_clearmark(ir->t); | ||
228 | ir->prev = REGSP_INIT; | ||
229 | /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ | ||
230 | if (irt_is64(ir->t) && ir->o != IR_KNULL) | ||
231 | ir++; | ||
232 | } | ||
227 | } | 233 | } |
228 | 234 | ||
229 | /* Allocation sinking and store sinking. | 235 | /* Allocation sinking and store sinking. |
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c index 190b6ba4..6d32712b 100644 --- a/src/lj_opt_split.c +++ b/src/lj_opt_split.c | |||
@@ -8,14 +8,15 @@ | |||
8 | 8 | ||
9 | #include "lj_obj.h" | 9 | #include "lj_obj.h" |
10 | 10 | ||
11 | #if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) | 11 | #if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) |
12 | 12 | ||
13 | #include "lj_err.h" | 13 | #include "lj_err.h" |
14 | #include "lj_str.h" | 14 | #include "lj_buf.h" |
15 | #include "lj_ir.h" | 15 | #include "lj_ir.h" |
16 | #include "lj_jit.h" | 16 | #include "lj_jit.h" |
17 | #include "lj_ircall.h" | 17 | #include "lj_ircall.h" |
18 | #include "lj_iropt.h" | 18 | #include "lj_iropt.h" |
19 | #include "lj_dispatch.h" | ||
19 | #include "lj_vm.h" | 20 | #include "lj_vm.h" |
20 | 21 | ||
21 | /* SPLIT pass: | 22 | /* SPLIT pass: |
@@ -139,6 +140,7 @@ static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir, | |||
139 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); | 140 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); |
140 | return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); | 141 | return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); |
141 | } | 142 | } |
143 | #endif | ||
142 | 144 | ||
143 | /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ | 145 | /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ |
144 | static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, | 146 | static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, |
@@ -155,7 +157,6 @@ static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, | |||
155 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); | 157 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); |
156 | return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); | 158 | return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); |
157 | } | 159 | } |
158 | #endif | ||
159 | 160 | ||
160 | /* Emit a CALLN with two split 64 bit arguments. */ | 161 | /* Emit a CALLN with two split 64 bit arguments. */ |
161 | static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, | 162 | static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, |
@@ -192,9 +193,121 @@ static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref) | |||
192 | nref = ir->op1; | 193 | nref = ir->op1; |
193 | if (ofs == 0) return nref; | 194 | if (ofs == 0) return nref; |
194 | } | 195 | } |
195 | return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs)); | 196 | return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs)); |
196 | } | 197 | } |
197 | 198 | ||
199 | #if LJ_HASFFI | ||
200 | static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst, | ||
201 | IRIns *oir, IRIns *nir, IRIns *ir) | ||
202 | { | ||
203 | IROp op = ir->o; | ||
204 | IRRef kref = nir->op2; | ||
205 | if (irref_isk(kref)) { /* Optimize constant shifts. */ | ||
206 | int32_t k = (IR(kref)->i & 63); | ||
207 | IRRef lo = nir->op1, hi = hisubst[ir->op1]; | ||
208 | if (op == IR_BROL || op == IR_BROR) { | ||
209 | if (op == IR_BROR) k = (-k & 63); | ||
210 | if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; } | ||
211 | if (k == 0) { | ||
212 | passthrough: | ||
213 | J->cur.nins--; | ||
214 | ir->prev = lo; | ||
215 | return hi; | ||
216 | } else { | ||
217 | TRef k1, k2; | ||
218 | IRRef t1, t2, t3, t4; | ||
219 | J->cur.nins--; | ||
220 | k1 = lj_ir_kint(J, k); | ||
221 | k2 = lj_ir_kint(J, (-k & 31)); | ||
222 | t1 = split_emit(J, IRTI(IR_BSHL), lo, k1); | ||
223 | t2 = split_emit(J, IRTI(IR_BSHL), hi, k1); | ||
224 | t3 = split_emit(J, IRTI(IR_BSHR), lo, k2); | ||
225 | t4 = split_emit(J, IRTI(IR_BSHR), hi, k2); | ||
226 | ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4); | ||
227 | return split_emit(J, IRTI(IR_BOR), t2, t3); | ||
228 | } | ||
229 | } else if (k == 0) { | ||
230 | goto passthrough; | ||
231 | } else if (k < 32) { | ||
232 | if (op == IR_BSHL) { | ||
233 | IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref); | ||
234 | IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31))); | ||
235 | return split_emit(J, IRTI(IR_BOR), t1, t2); | ||
236 | } else { | ||
237 | IRRef t1 = ir->prev, t2; | ||
238 | lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage"); | ||
239 | nir->o = IR_BSHR; | ||
240 | t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31))); | ||
241 | ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2); | ||
242 | return split_emit(J, IRTI(op), hi, kref); | ||
243 | } | ||
244 | } else { | ||
245 | if (op == IR_BSHL) { | ||
246 | if (k == 32) | ||
247 | J->cur.nins--; | ||
248 | else | ||
249 | lo = ir->prev; | ||
250 | ir->prev = lj_ir_kint(J, 0); | ||
251 | return lo; | ||
252 | } else { | ||
253 | lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage"); | ||
254 | if (k == 32) { | ||
255 | J->cur.nins--; | ||
256 | ir->prev = hi; | ||
257 | } else { | ||
258 | nir->op1 = hi; | ||
259 | } | ||
260 | if (op == IR_BSHR) | ||
261 | return lj_ir_kint(J, 0); | ||
262 | else | ||
263 | return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31)); | ||
264 | } | ||
265 | } | ||
266 | } | ||
267 | return split_call_li(J, hisubst, oir, ir, | ||
268 | op - IR_BSHL + IRCALL_lj_carith_shl64); | ||
269 | } | ||
270 | |||
271 | static IRRef split_bitop(jit_State *J, IRRef1 *hisubst, | ||
272 | IRIns *nir, IRIns *ir) | ||
273 | { | ||
274 | IROp op = ir->o; | ||
275 | IRRef hi, kref = nir->op2; | ||
276 | if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */ | ||
277 | int32_t k = IR(kref)->i; | ||
278 | if (k == 0 || k == -1) { | ||
279 | if (op == IR_BAND) k = ~k; | ||
280 | if (k == 0) { | ||
281 | J->cur.nins--; | ||
282 | ir->prev = nir->op1; | ||
283 | } else if (op == IR_BXOR) { | ||
284 | nir->o = IR_BNOT; | ||
285 | nir->op2 = 0; | ||
286 | } else { | ||
287 | J->cur.nins--; | ||
288 | ir->prev = kref; | ||
289 | } | ||
290 | } | ||
291 | } | ||
292 | hi = hisubst[ir->op1]; | ||
293 | kref = hisubst[ir->op2]; | ||
294 | if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */ | ||
295 | int32_t k = IR(kref)->i; | ||
296 | if (k == 0 || k == -1) { | ||
297 | if (op == IR_BAND) k = ~k; | ||
298 | if (k == 0) { | ||
299 | return hi; | ||
300 | } else if (op == IR_BXOR) { | ||
301 | return split_emit(J, IRTI(IR_BNOT), hi, 0); | ||
302 | } else { | ||
303 | return kref; | ||
304 | } | ||
305 | } | ||
306 | } | ||
307 | return split_emit(J, IRTI(op), hi, kref); | ||
308 | } | ||
309 | #endif | ||
310 | |||
198 | /* Substitute references of a snapshot. */ | 311 | /* Substitute references of a snapshot. */ |
199 | static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) | 312 | static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) |
200 | { | 313 | { |
@@ -214,7 +327,7 @@ static void split_ir(jit_State *J) | |||
214 | IRRef nins = J->cur.nins, nk = J->cur.nk; | 327 | IRRef nins = J->cur.nins, nk = J->cur.nk; |
215 | MSize irlen = nins - nk; | 328 | MSize irlen = nins - nk; |
216 | MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); | 329 | MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); |
217 | IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); | 330 | IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need); |
218 | IRRef1 *hisubst; | 331 | IRRef1 *hisubst; |
219 | IRRef ref, snref; | 332 | IRRef ref, snref; |
220 | SnapShot *snap; | 333 | SnapShot *snap; |
@@ -241,6 +354,8 @@ static void split_ir(jit_State *J) | |||
241 | ir->prev = ref; /* Identity substitution for loword. */ | 354 | ir->prev = ref; /* Identity substitution for loword. */ |
242 | hisubst[ref] = 0; | 355 | hisubst[ref] = 0; |
243 | } | 356 | } |
357 | if (irt_is64(ir->t) && ir->o != IR_KNULL) | ||
358 | ref++; | ||
244 | } | 359 | } |
245 | 360 | ||
246 | /* Process old IR instructions. */ | 361 | /* Process old IR instructions. */ |
@@ -288,32 +403,8 @@ static void split_ir(jit_State *J) | |||
288 | hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); | 403 | hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); |
289 | break; | 404 | break; |
290 | case IR_FPMATH: | 405 | case IR_FPMATH: |
291 | /* Try to rejoin pow from EXP2, MUL and LOG2. */ | ||
292 | if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) { | ||
293 | IRIns *irp = IR(nir->op1); | ||
294 | if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) { | ||
295 | IRIns *irm4 = IR(irp->op1); | ||
296 | IRIns *irm3 = IR(irm4->op1); | ||
297 | IRIns *irm12 = IR(irm3->op1); | ||
298 | IRIns *irl1 = IR(irm12->op1); | ||
299 | if (irm12->op1 > J->loopref && irl1->o == IR_CALLN && | ||
300 | irl1->op2 == IRCALL_lj_vm_log2) { | ||
301 | IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */ | ||
302 | IRRef arg3 = irm3->op2, arg4 = irm4->op2; | ||
303 | J->cur.nins--; | ||
304 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3); | ||
305 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4); | ||
306 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow); | ||
307 | hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); | ||
308 | break; | ||
309 | } | ||
310 | } | ||
311 | } | ||
312 | hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); | 406 | hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); |
313 | break; | 407 | break; |
314 | case IR_ATAN2: | ||
315 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2); | ||
316 | break; | ||
317 | case IR_LDEXP: | 408 | case IR_LDEXP: |
318 | hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); | 409 | hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); |
319 | break; | 410 | break; |
@@ -321,7 +412,8 @@ static void split_ir(jit_State *J) | |||
321 | nir->o = IR_CONV; /* Pass through loword. */ | 412 | nir->o = IR_CONV; /* Pass through loword. */ |
322 | nir->op2 = (IRT_INT << 5) | IRT_INT; | 413 | nir->op2 = (IRT_INT << 5) | IRT_INT; |
323 | hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), | 414 | hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), |
324 | hisubst[ir->op1], hisubst[ir->op2]); | 415 | hisubst[ir->op1], |
416 | lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG)))); | ||
325 | break; | 417 | break; |
326 | case IR_SLOAD: | 418 | case IR_SLOAD: |
327 | if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ | 419 | if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ |
@@ -336,15 +428,24 @@ static void split_ir(jit_State *J) | |||
336 | case IR_STRTO: | 428 | case IR_STRTO: |
337 | hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); | 429 | hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); |
338 | break; | 430 | break; |
431 | case IR_FLOAD: | ||
432 | lj_assertJ(ir->op1 == REF_NIL, "expected FLOAD from GG_State"); | ||
433 | hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4)); | ||
434 | nir->op2 += LJ_BE*4; | ||
435 | break; | ||
339 | case IR_XLOAD: { | 436 | case IR_XLOAD: { |
340 | IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ | 437 | IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ |
341 | J->cur.nins--; | 438 | J->cur.nins--; |
342 | hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ | 439 | hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ |
440 | #if LJ_BE | ||
441 | hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2); | ||
442 | inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD); | ||
443 | #endif | ||
343 | nref = lj_ir_nextins(J); | 444 | nref = lj_ir_nextins(J); |
344 | nir = IR(nref); | 445 | nir = IR(nref); |
345 | *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */ | 446 | *nir = inslo; /* Re-emit lo XLOAD. */ |
346 | hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); | ||
347 | #if LJ_LE | 447 | #if LJ_LE |
448 | hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); | ||
348 | ir->prev = nref; | 449 | ir->prev = nref; |
349 | #else | 450 | #else |
350 | ir->prev = hi; hi = nref; | 451 | ir->prev = hi; hi = nref; |
@@ -364,8 +465,9 @@ static void split_ir(jit_State *J) | |||
364 | break; | 465 | break; |
365 | } | 466 | } |
366 | #endif | 467 | #endif |
367 | lua_assert(st == IRT_INT || | 468 | lj_assertJ(st == IRT_INT || |
368 | (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); | 469 | (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)), |
470 | "bad source type for CONV"); | ||
369 | nir->o = IR_CALLN; | 471 | nir->o = IR_CALLN; |
370 | #if LJ_32 && LJ_HASFFI | 472 | #if LJ_32 && LJ_HASFFI |
371 | nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : | 473 | nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : |
@@ -395,7 +497,8 @@ static void split_ir(jit_State *J) | |||
395 | hi = nir->op2; | 497 | hi = nir->op2; |
396 | break; | 498 | break; |
397 | default: | 499 | default: |
398 | lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX); | 500 | lj_assertJ(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX, |
501 | "bad IR op %d", ir->o); | ||
399 | hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), | 502 | hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), |
400 | hisubst[ir->op1], hisubst[ir->op2]); | 503 | hisubst[ir->op1], hisubst[ir->op2]); |
401 | break; | 504 | break; |
@@ -438,8 +541,21 @@ static void split_ir(jit_State *J) | |||
438 | irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | 541 | irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : |
439 | IRCALL_lj_carith_powu64); | 542 | IRCALL_lj_carith_powu64); |
440 | break; | 543 | break; |
544 | case IR_BNOT: | ||
545 | hi = split_emit(J, IRTI(IR_BNOT), hiref, 0); | ||
546 | break; | ||
547 | case IR_BSWAP: | ||
548 | ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0); | ||
549 | hi = nref; | ||
550 | break; | ||
551 | case IR_BAND: case IR_BOR: case IR_BXOR: | ||
552 | hi = split_bitop(J, hisubst, nir, ir); | ||
553 | break; | ||
554 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: | ||
555 | hi = split_bitshift(J, hisubst, oir, nir, ir); | ||
556 | break; | ||
441 | case IR_FLOAD: | 557 | case IR_FLOAD: |
442 | lua_assert(ir->op2 == IRFL_CDATA_INT64); | 558 | lj_assertJ(ir->op2 == IRFL_CDATA_INT64, "only INT64 supported"); |
443 | hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); | 559 | hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); |
444 | #if LJ_BE | 560 | #if LJ_BE |
445 | ir->prev = hi; hi = nref; | 561 | ir->prev = hi; hi = nref; |
@@ -505,7 +621,7 @@ static void split_ir(jit_State *J) | |||
505 | hi = nir->op2; | 621 | hi = nir->op2; |
506 | break; | 622 | break; |
507 | default: | 623 | default: |
508 | lua_assert(ir->o <= IR_NE); /* Comparisons. */ | 624 | lj_assertJ(ir->o <= IR_NE, "bad IR op %d", ir->o); /* Comparisons. */ |
509 | split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); | 625 | split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); |
510 | break; | 626 | break; |
511 | } | 627 | } |
@@ -529,7 +645,7 @@ static void split_ir(jit_State *J) | |||
529 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); | 645 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); |
530 | #endif | 646 | #endif |
531 | ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); | 647 | ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); |
532 | } else if (ir->o == IR_TOSTR) { | 648 | } else if (ir->o == IR_TOSTR || ir->o == IR_TMPREF) { |
533 | if (hisubst[ir->op1]) { | 649 | if (hisubst[ir->op1]) { |
534 | if (irref_isk(ir->op1)) | 650 | if (irref_isk(ir->op1)) |
535 | nir->op1 = ir->op1; | 651 | nir->op1 = ir->op1; |
@@ -583,7 +699,7 @@ static void split_ir(jit_State *J) | |||
583 | #if LJ_SOFTFP | 699 | #if LJ_SOFTFP |
584 | if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { | 700 | if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { |
585 | if (irt_isguard(ir->t)) { | 701 | if (irt_isguard(ir->t)) { |
586 | lua_assert(st == IRT_NUM && irt_isint(ir->t)); | 702 | lj_assertJ(st == IRT_NUM && irt_isint(ir->t), "bad CONV types"); |
587 | J->cur.nins--; | 703 | J->cur.nins--; |
588 | ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); | 704 | ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); |
589 | } else { | 705 | } else { |
@@ -714,7 +830,7 @@ void lj_opt_split(jit_State *J) | |||
714 | if (!J->needsplit) | 830 | if (!J->needsplit) |
715 | J->needsplit = split_needsplit(J); | 831 | J->needsplit = split_needsplit(J); |
716 | #else | 832 | #else |
717 | lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */ | 833 | lj_assertJ(J->needsplit >= split_needsplit(J), "bad SPLIT state"); |
718 | #endif | 834 | #endif |
719 | if (J->needsplit) { | 835 | if (J->needsplit) { |
720 | int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); | 836 | int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); |
diff --git a/src/lj_parse.c b/src/lj_parse.c index 58353bab..78df8b5d 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include "lj_gc.h" | 13 | #include "lj_gc.h" |
14 | #include "lj_err.h" | 14 | #include "lj_err.h" |
15 | #include "lj_debug.h" | 15 | #include "lj_debug.h" |
16 | #include "lj_buf.h" | ||
16 | #include "lj_str.h" | 17 | #include "lj_str.h" |
17 | #include "lj_tab.h" | 18 | #include "lj_tab.h" |
18 | #include "lj_func.h" | 19 | #include "lj_func.h" |
@@ -21,6 +22,7 @@ | |||
21 | #if LJ_HASFFI | 22 | #if LJ_HASFFI |
22 | #include "lj_ctype.h" | 23 | #include "lj_ctype.h" |
23 | #endif | 24 | #endif |
25 | #include "lj_strfmt.h" | ||
24 | #include "lj_lex.h" | 26 | #include "lj_lex.h" |
25 | #include "lj_parse.h" | 27 | #include "lj_parse.h" |
26 | #include "lj_vm.h" | 28 | #include "lj_vm.h" |
@@ -161,16 +163,22 @@ LJ_STATIC_ASSERT((int)BC_MULVV-(int)BC_ADDVV == (int)OPR_MUL-(int)OPR_ADD); | |||
161 | LJ_STATIC_ASSERT((int)BC_DIVVV-(int)BC_ADDVV == (int)OPR_DIV-(int)OPR_ADD); | 163 | LJ_STATIC_ASSERT((int)BC_DIVVV-(int)BC_ADDVV == (int)OPR_DIV-(int)OPR_ADD); |
162 | LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD); | 164 | LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD); |
163 | 165 | ||
166 | #ifdef LUA_USE_ASSERT | ||
167 | #define lj_assertFS(c, ...) (lj_assertG_(G(fs->L), (c), __VA_ARGS__)) | ||
168 | #else | ||
169 | #define lj_assertFS(c, ...) ((void)fs) | ||
170 | #endif | ||
171 | |||
164 | /* -- Error handling ------------------------------------------------------ */ | 172 | /* -- Error handling ------------------------------------------------------ */ |
165 | 173 | ||
166 | LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em) | 174 | LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em) |
167 | { | 175 | { |
168 | lj_lex_error(ls, ls->token, em); | 176 | lj_lex_error(ls, ls->tok, em); |
169 | } | 177 | } |
170 | 178 | ||
171 | LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken token) | 179 | LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken tok) |
172 | { | 180 | { |
173 | lj_lex_error(ls, ls->token, LJ_ERR_XTOKEN, lj_lex_token2str(ls, token)); | 181 | lj_lex_error(ls, ls->tok, LJ_ERR_XTOKEN, lj_lex_token2str(ls, tok)); |
174 | } | 182 | } |
175 | 183 | ||
176 | LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what) | 184 | LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what) |
@@ -198,7 +206,7 @@ static BCReg const_num(FuncState *fs, ExpDesc *e) | |||
198 | { | 206 | { |
199 | lua_State *L = fs->L; | 207 | lua_State *L = fs->L; |
200 | TValue *o; | 208 | TValue *o; |
201 | lua_assert(expr_isnumk(e)); | 209 | lj_assertFS(expr_isnumk(e), "bad usage"); |
202 | o = lj_tab_set(L, fs->kt, &e->u.nval); | 210 | o = lj_tab_set(L, fs->kt, &e->u.nval); |
203 | if (tvhaskslot(o)) | 211 | if (tvhaskslot(o)) |
204 | return tvkslot(o); | 212 | return tvkslot(o); |
@@ -223,7 +231,7 @@ static BCReg const_gc(FuncState *fs, GCobj *gc, uint32_t itype) | |||
223 | /* Add a string constant. */ | 231 | /* Add a string constant. */ |
224 | static BCReg const_str(FuncState *fs, ExpDesc *e) | 232 | static BCReg const_str(FuncState *fs, ExpDesc *e) |
225 | { | 233 | { |
226 | lua_assert(expr_isstrk(e) || e->k == VGLOBAL); | 234 | lj_assertFS(expr_isstrk(e) || e->k == VGLOBAL, "bad usage"); |
227 | return const_gc(fs, obj2gco(e->u.sval), LJ_TSTR); | 235 | return const_gc(fs, obj2gco(e->u.sval), LJ_TSTR); |
228 | } | 236 | } |
229 | 237 | ||
@@ -311,7 +319,7 @@ static void jmp_patchins(FuncState *fs, BCPos pc, BCPos dest) | |||
311 | { | 319 | { |
312 | BCIns *jmp = &fs->bcbase[pc].ins; | 320 | BCIns *jmp = &fs->bcbase[pc].ins; |
313 | BCPos offset = dest-(pc+1)+BCBIAS_J; | 321 | BCPos offset = dest-(pc+1)+BCBIAS_J; |
314 | lua_assert(dest != NO_JMP); | 322 | lj_assertFS(dest != NO_JMP, "uninitialized jump target"); |
315 | if (offset > BCMAX_D) | 323 | if (offset > BCMAX_D) |
316 | err_syntax(fs->ls, LJ_ERR_XJUMP); | 324 | err_syntax(fs->ls, LJ_ERR_XJUMP); |
317 | setbc_d(jmp, offset); | 325 | setbc_d(jmp, offset); |
@@ -360,7 +368,7 @@ static void jmp_patch(FuncState *fs, BCPos list, BCPos target) | |||
360 | if (target == fs->pc) { | 368 | if (target == fs->pc) { |
361 | jmp_tohere(fs, list); | 369 | jmp_tohere(fs, list); |
362 | } else { | 370 | } else { |
363 | lua_assert(target < fs->pc); | 371 | lj_assertFS(target < fs->pc, "bad jump target"); |
364 | jmp_patchval(fs, list, target, NO_REG, target); | 372 | jmp_patchval(fs, list, target, NO_REG, target); |
365 | } | 373 | } |
366 | } | 374 | } |
@@ -390,7 +398,7 @@ static void bcreg_free(FuncState *fs, BCReg reg) | |||
390 | { | 398 | { |
391 | if (reg >= fs->nactvar) { | 399 | if (reg >= fs->nactvar) { |
392 | fs->freereg--; | 400 | fs->freereg--; |
393 | lua_assert(reg == fs->freereg); | 401 | lj_assertFS(reg == fs->freereg, "bad regfree"); |
394 | } | 402 | } |
395 | } | 403 | } |
396 | 404 | ||
@@ -540,7 +548,7 @@ static void expr_toreg_nobranch(FuncState *fs, ExpDesc *e, BCReg reg) | |||
540 | } else if (e->k <= VKTRUE) { | 548 | } else if (e->k <= VKTRUE) { |
541 | ins = BCINS_AD(BC_KPRI, reg, const_pri(e)); | 549 | ins = BCINS_AD(BC_KPRI, reg, const_pri(e)); |
542 | } else { | 550 | } else { |
543 | lua_assert(e->k == VVOID || e->k == VJMP); | 551 | lj_assertFS(e->k == VVOID || e->k == VJMP, "bad expr type %d", e->k); |
544 | return; | 552 | return; |
545 | } | 553 | } |
546 | bcemit_INS(fs, ins); | 554 | bcemit_INS(fs, ins); |
@@ -635,7 +643,7 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e) | |||
635 | ins = BCINS_AD(BC_GSET, ra, const_str(fs, var)); | 643 | ins = BCINS_AD(BC_GSET, ra, const_str(fs, var)); |
636 | } else { | 644 | } else { |
637 | BCReg ra, rc; | 645 | BCReg ra, rc; |
638 | lua_assert(var->k == VINDEXED); | 646 | lj_assertFS(var->k == VINDEXED, "bad expr type %d", var->k); |
639 | ra = expr_toanyreg(fs, e); | 647 | ra = expr_toanyreg(fs, e); |
640 | rc = var->u.s.aux; | 648 | rc = var->u.s.aux; |
641 | if ((int32_t)rc < 0) { | 649 | if ((int32_t)rc < 0) { |
@@ -643,10 +651,12 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e) | |||
643 | } else if (rc > BCMAX_C) { | 651 | } else if (rc > BCMAX_C) { |
644 | ins = BCINS_ABC(BC_TSETB, ra, var->u.s.info, rc-(BCMAX_C+1)); | 652 | ins = BCINS_ABC(BC_TSETB, ra, var->u.s.info, rc-(BCMAX_C+1)); |
645 | } else { | 653 | } else { |
654 | #ifdef LUA_USE_ASSERT | ||
646 | /* Free late alloced key reg to avoid assert on free of value reg. */ | 655 | /* Free late alloced key reg to avoid assert on free of value reg. */ |
647 | /* This can only happen when called from expr_table(). */ | 656 | /* This can only happen when called from expr_table(). */ |
648 | lua_assert(e->k != VNONRELOC || ra < fs->nactvar || | 657 | if (e->k == VNONRELOC && ra >= fs->nactvar && rc >= ra) |
649 | rc < ra || (bcreg_free(fs, rc),1)); | 658 | bcreg_free(fs, rc); |
659 | #endif | ||
650 | ins = BCINS_ABC(BC_TSETV, ra, var->u.s.info, rc); | 660 | ins = BCINS_ABC(BC_TSETV, ra, var->u.s.info, rc); |
651 | } | 661 | } |
652 | } | 662 | } |
@@ -660,16 +670,16 @@ static void bcemit_method(FuncState *fs, ExpDesc *e, ExpDesc *key) | |||
660 | BCReg idx, func, obj = expr_toanyreg(fs, e); | 670 | BCReg idx, func, obj = expr_toanyreg(fs, e); |
661 | expr_free(fs, e); | 671 | expr_free(fs, e); |
662 | func = fs->freereg; | 672 | func = fs->freereg; |
663 | bcemit_AD(fs, BC_MOV, func+1, obj); /* Copy object to first argument. */ | 673 | bcemit_AD(fs, BC_MOV, func+1+LJ_FR2, obj); /* Copy object to 1st argument. */ |
664 | lua_assert(expr_isstrk(key)); | 674 | lj_assertFS(expr_isstrk(key), "bad usage"); |
665 | idx = const_str(fs, key); | 675 | idx = const_str(fs, key); |
666 | if (idx <= BCMAX_C) { | 676 | if (idx <= BCMAX_C) { |
667 | bcreg_reserve(fs, 2); | 677 | bcreg_reserve(fs, 2+LJ_FR2); |
668 | bcemit_ABC(fs, BC_TGETS, func, obj, idx); | 678 | bcemit_ABC(fs, BC_TGETS, func, obj, idx); |
669 | } else { | 679 | } else { |
670 | bcreg_reserve(fs, 3); | 680 | bcreg_reserve(fs, 3+LJ_FR2); |
671 | bcemit_AD(fs, BC_KSTR, func+2, idx); | 681 | bcemit_AD(fs, BC_KSTR, func+2+LJ_FR2, idx); |
672 | bcemit_ABC(fs, BC_TGETV, func, obj, func+2); | 682 | bcemit_ABC(fs, BC_TGETV, func, obj, func+2+LJ_FR2); |
673 | fs->freereg--; | 683 | fs->freereg--; |
674 | } | 684 | } |
675 | e->u.s.info = func; | 685 | e->u.s.info = func; |
@@ -801,7 +811,8 @@ static void bcemit_arith(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2) | |||
801 | else | 811 | else |
802 | rc = expr_toanyreg(fs, e2); | 812 | rc = expr_toanyreg(fs, e2); |
803 | /* 1st operand discharged by bcemit_binop_left, but need KNUM/KSHORT. */ | 813 | /* 1st operand discharged by bcemit_binop_left, but need KNUM/KSHORT. */ |
804 | lua_assert(expr_isnumk(e1) || e1->k == VNONRELOC); | 814 | lj_assertFS(expr_isnumk(e1) || e1->k == VNONRELOC, |
815 | "bad expr type %d", e1->k); | ||
805 | expr_toval(fs, e1); | 816 | expr_toval(fs, e1); |
806 | /* Avoid two consts to satisfy bytecode constraints. */ | 817 | /* Avoid two consts to satisfy bytecode constraints. */ |
807 | if (expr_isnumk(e1) && !expr_isnumk(e2) && | 818 | if (expr_isnumk(e1) && !expr_isnumk(e2) && |
@@ -889,19 +900,20 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2) | |||
889 | if (op <= OPR_POW) { | 900 | if (op <= OPR_POW) { |
890 | bcemit_arith(fs, op, e1, e2); | 901 | bcemit_arith(fs, op, e1, e2); |
891 | } else if (op == OPR_AND) { | 902 | } else if (op == OPR_AND) { |
892 | lua_assert(e1->t == NO_JMP); /* List must be closed. */ | 903 | lj_assertFS(e1->t == NO_JMP, "jump list not closed"); |
893 | expr_discharge(fs, e2); | 904 | expr_discharge(fs, e2); |
894 | jmp_append(fs, &e2->f, e1->f); | 905 | jmp_append(fs, &e2->f, e1->f); |
895 | *e1 = *e2; | 906 | *e1 = *e2; |
896 | } else if (op == OPR_OR) { | 907 | } else if (op == OPR_OR) { |
897 | lua_assert(e1->f == NO_JMP); /* List must be closed. */ | 908 | lj_assertFS(e1->f == NO_JMP, "jump list not closed"); |
898 | expr_discharge(fs, e2); | 909 | expr_discharge(fs, e2); |
899 | jmp_append(fs, &e2->t, e1->t); | 910 | jmp_append(fs, &e2->t, e1->t); |
900 | *e1 = *e2; | 911 | *e1 = *e2; |
901 | } else if (op == OPR_CONCAT) { | 912 | } else if (op == OPR_CONCAT) { |
902 | expr_toval(fs, e2); | 913 | expr_toval(fs, e2); |
903 | if (e2->k == VRELOCABLE && bc_op(*bcptr(fs, e2)) == BC_CAT) { | 914 | if (e2->k == VRELOCABLE && bc_op(*bcptr(fs, e2)) == BC_CAT) { |
904 | lua_assert(e1->u.s.info == bc_b(*bcptr(fs, e2))-1); | 915 | lj_assertFS(e1->u.s.info == bc_b(*bcptr(fs, e2))-1, |
916 | "bad CAT stack layout"); | ||
905 | expr_free(fs, e1); | 917 | expr_free(fs, e1); |
906 | setbc_b(bcptr(fs, e2), e1->u.s.info); | 918 | setbc_b(bcptr(fs, e2), e1->u.s.info); |
907 | e1->u.s.info = e2->u.s.info; | 919 | e1->u.s.info = e2->u.s.info; |
@@ -913,8 +925,9 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2) | |||
913 | } | 925 | } |
914 | e1->k = VRELOCABLE; | 926 | e1->k = VRELOCABLE; |
915 | } else { | 927 | } else { |
916 | lua_assert(op == OPR_NE || op == OPR_EQ || | 928 | lj_assertFS(op == OPR_NE || op == OPR_EQ || |
917 | op == OPR_LT || op == OPR_GE || op == OPR_LE || op == OPR_GT); | 929 | op == OPR_LT || op == OPR_GE || op == OPR_LE || op == OPR_GT, |
930 | "bad binop %d", op); | ||
918 | bcemit_comp(fs, op, e1, e2); | 931 | bcemit_comp(fs, op, e1, e2); |
919 | } | 932 | } |
920 | } | 933 | } |
@@ -943,10 +956,10 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e) | |||
943 | e->u.s.info = fs->freereg-1; | 956 | e->u.s.info = fs->freereg-1; |
944 | e->k = VNONRELOC; | 957 | e->k = VNONRELOC; |
945 | } else { | 958 | } else { |
946 | lua_assert(e->k == VNONRELOC); | 959 | lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k); |
947 | } | 960 | } |
948 | } else { | 961 | } else { |
949 | lua_assert(op == BC_UNM || op == BC_LEN); | 962 | lj_assertFS(op == BC_UNM || op == BC_LEN, "bad unop %d", op); |
950 | if (op == BC_UNM && !expr_hasjump(e)) { /* Constant-fold negations. */ | 963 | if (op == BC_UNM && !expr_hasjump(e)) { /* Constant-fold negations. */ |
951 | #if LJ_HASFFI | 964 | #if LJ_HASFFI |
952 | if (e->k == VKCDATA) { /* Fold in-place since cdata is not interned. */ | 965 | if (e->k == VKCDATA) { /* Fold in-place since cdata is not interned. */ |
@@ -986,7 +999,7 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e) | |||
986 | /* Check and consume optional token. */ | 999 | /* Check and consume optional token. */ |
987 | static int lex_opt(LexState *ls, LexToken tok) | 1000 | static int lex_opt(LexState *ls, LexToken tok) |
988 | { | 1001 | { |
989 | if (ls->token == tok) { | 1002 | if (ls->tok == tok) { |
990 | lj_lex_next(ls); | 1003 | lj_lex_next(ls); |
991 | return 1; | 1004 | return 1; |
992 | } | 1005 | } |
@@ -996,7 +1009,7 @@ static int lex_opt(LexState *ls, LexToken tok) | |||
996 | /* Check and consume token. */ | 1009 | /* Check and consume token. */ |
997 | static void lex_check(LexState *ls, LexToken tok) | 1010 | static void lex_check(LexState *ls, LexToken tok) |
998 | { | 1011 | { |
999 | if (ls->token != tok) | 1012 | if (ls->tok != tok) |
1000 | err_token(ls, tok); | 1013 | err_token(ls, tok); |
1001 | lj_lex_next(ls); | 1014 | lj_lex_next(ls); |
1002 | } | 1015 | } |
@@ -1010,7 +1023,7 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line) | |||
1010 | } else { | 1023 | } else { |
1011 | const char *swhat = lj_lex_token2str(ls, what); | 1024 | const char *swhat = lj_lex_token2str(ls, what); |
1012 | const char *swho = lj_lex_token2str(ls, who); | 1025 | const char *swho = lj_lex_token2str(ls, who); |
1013 | lj_lex_error(ls, ls->token, LJ_ERR_XMATCH, swhat, swho, line); | 1026 | lj_lex_error(ls, ls->tok, LJ_ERR_XMATCH, swhat, swho, line); |
1014 | } | 1027 | } |
1015 | } | 1028 | } |
1016 | } | 1029 | } |
@@ -1019,9 +1032,9 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line) | |||
1019 | static GCstr *lex_str(LexState *ls) | 1032 | static GCstr *lex_str(LexState *ls) |
1020 | { | 1033 | { |
1021 | GCstr *s; | 1034 | GCstr *s; |
1022 | if (ls->token != TK_name && (LJ_52 || ls->token != TK_goto)) | 1035 | if (ls->tok != TK_name && (LJ_52 || ls->tok != TK_goto)) |
1023 | err_token(ls, TK_name); | 1036 | err_token(ls, TK_name); |
1024 | s = strV(&ls->tokenval); | 1037 | s = strV(&ls->tokval); |
1025 | lj_lex_next(ls); | 1038 | lj_lex_next(ls); |
1026 | return s; | 1039 | return s; |
1027 | } | 1040 | } |
@@ -1041,8 +1054,9 @@ static void var_new(LexState *ls, BCReg n, GCstr *name) | |||
1041 | lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK); | 1054 | lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK); |
1042 | lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo); | 1055 | lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo); |
1043 | } | 1056 | } |
1044 | lua_assert((uintptr_t)name < VARNAME__MAX || | 1057 | lj_assertFS((uintptr_t)name < VARNAME__MAX || |
1045 | lj_tab_getstr(fs->kt, name) != NULL); | 1058 | lj_tab_getstr(fs->kt, name) != NULL, |
1059 | "unanchored variable name"); | ||
1046 | /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */ | 1060 | /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */ |
1047 | setgcref(ls->vstack[vtop].name, obj2gco(name)); | 1061 | setgcref(ls->vstack[vtop].name, obj2gco(name)); |
1048 | fs->varmap[fs->nactvar+n] = (uint16_t)vtop; | 1062 | fs->varmap[fs->nactvar+n] = (uint16_t)vtop; |
@@ -1097,7 +1111,7 @@ static MSize var_lookup_uv(FuncState *fs, MSize vidx, ExpDesc *e) | |||
1097 | return i; /* Already exists. */ | 1111 | return i; /* Already exists. */ |
1098 | /* Otherwise create a new one. */ | 1112 | /* Otherwise create a new one. */ |
1099 | checklimit(fs, fs->nuv, LJ_MAX_UPVAL, "upvalues"); | 1113 | checklimit(fs, fs->nuv, LJ_MAX_UPVAL, "upvalues"); |
1100 | lua_assert(e->k == VLOCAL || e->k == VUPVAL); | 1114 | lj_assertFS(e->k == VLOCAL || e->k == VUPVAL, "bad expr type %d", e->k); |
1101 | fs->uvmap[n] = (uint16_t)vidx; | 1115 | fs->uvmap[n] = (uint16_t)vidx; |
1102 | fs->uvtmp[n] = (uint16_t)(e->k == VLOCAL ? vidx : LJ_MAX_VSTACK+e->u.s.info); | 1116 | fs->uvtmp[n] = (uint16_t)(e->k == VLOCAL ? vidx : LJ_MAX_VSTACK+e->u.s.info); |
1103 | fs->nuv = n+1; | 1117 | fs->nuv = n+1; |
@@ -1148,7 +1162,8 @@ static MSize gola_new(LexState *ls, GCstr *name, uint8_t info, BCPos pc) | |||
1148 | lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK); | 1162 | lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK); |
1149 | lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo); | 1163 | lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo); |
1150 | } | 1164 | } |
1151 | lua_assert(name == NAME_BREAK || lj_tab_getstr(fs->kt, name) != NULL); | 1165 | lj_assertFS(name == NAME_BREAK || lj_tab_getstr(fs->kt, name) != NULL, |
1166 | "unanchored label name"); | ||
1152 | /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */ | 1167 | /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */ |
1153 | setgcref(ls->vstack[vtop].name, obj2gco(name)); | 1168 | setgcref(ls->vstack[vtop].name, obj2gco(name)); |
1154 | ls->vstack[vtop].startpc = pc; | 1169 | ls->vstack[vtop].startpc = pc; |
@@ -1178,8 +1193,9 @@ static void gola_close(LexState *ls, VarInfo *vg) | |||
1178 | FuncState *fs = ls->fs; | 1193 | FuncState *fs = ls->fs; |
1179 | BCPos pc = vg->startpc; | 1194 | BCPos pc = vg->startpc; |
1180 | BCIns *ip = &fs->bcbase[pc].ins; | 1195 | BCIns *ip = &fs->bcbase[pc].ins; |
1181 | lua_assert(gola_isgoto(vg)); | 1196 | lj_assertFS(gola_isgoto(vg), "expected goto"); |
1182 | lua_assert(bc_op(*ip) == BC_JMP || bc_op(*ip) == BC_UCLO); | 1197 | lj_assertFS(bc_op(*ip) == BC_JMP || bc_op(*ip) == BC_UCLO, |
1198 | "bad bytecode op %d", bc_op(*ip)); | ||
1183 | setbc_a(ip, vg->slot); | 1199 | setbc_a(ip, vg->slot); |
1184 | if (bc_op(*ip) == BC_JMP) { | 1200 | if (bc_op(*ip) == BC_JMP) { |
1185 | BCPos next = jmp_next(fs, pc); | 1201 | BCPos next = jmp_next(fs, pc); |
@@ -1198,9 +1214,9 @@ static void gola_resolve(LexState *ls, FuncScope *bl, MSize idx) | |||
1198 | if (gcrefeq(vg->name, vl->name) && gola_isgoto(vg)) { | 1214 | if (gcrefeq(vg->name, vl->name) && gola_isgoto(vg)) { |
1199 | if (vg->slot < vl->slot) { | 1215 | if (vg->slot < vl->slot) { |
1200 | GCstr *name = strref(var_get(ls, ls->fs, vg->slot).name); | 1216 | GCstr *name = strref(var_get(ls, ls->fs, vg->slot).name); |
1201 | lua_assert((uintptr_t)name >= VARNAME__MAX); | 1217 | lj_assertLS((uintptr_t)name >= VARNAME__MAX, "expected goto name"); |
1202 | ls->linenumber = ls->fs->bcbase[vg->startpc].line; | 1218 | ls->linenumber = ls->fs->bcbase[vg->startpc].line; |
1203 | lua_assert(strref(vg->name) != NAME_BREAK); | 1219 | lj_assertLS(strref(vg->name) != NAME_BREAK, "unexpected break"); |
1204 | lj_lex_error(ls, 0, LJ_ERR_XGSCOPE, | 1220 | lj_lex_error(ls, 0, LJ_ERR_XGSCOPE, |
1205 | strdata(strref(vg->name)), strdata(name)); | 1221 | strdata(strref(vg->name)), strdata(name)); |
1206 | } | 1222 | } |
@@ -1264,7 +1280,7 @@ static void fscope_begin(FuncState *fs, FuncScope *bl, int flags) | |||
1264 | bl->vstart = fs->ls->vtop; | 1280 | bl->vstart = fs->ls->vtop; |
1265 | bl->prev = fs->bl; | 1281 | bl->prev = fs->bl; |
1266 | fs->bl = bl; | 1282 | fs->bl = bl; |
1267 | lua_assert(fs->freereg == fs->nactvar); | 1283 | lj_assertFS(fs->freereg == fs->nactvar, "bad regalloc"); |
1268 | } | 1284 | } |
1269 | 1285 | ||
1270 | /* End a scope. */ | 1286 | /* End a scope. */ |
@@ -1275,7 +1291,7 @@ static void fscope_end(FuncState *fs) | |||
1275 | fs->bl = bl->prev; | 1291 | fs->bl = bl->prev; |
1276 | var_remove(ls, bl->nactvar); | 1292 | var_remove(ls, bl->nactvar); |
1277 | fs->freereg = fs->nactvar; | 1293 | fs->freereg = fs->nactvar; |
1278 | lua_assert(bl->nactvar == fs->nactvar); | 1294 | lj_assertFS(bl->nactvar == fs->nactvar, "bad regalloc"); |
1279 | if ((bl->flags & (FSCOPE_UPVAL|FSCOPE_NOCLOSE)) == FSCOPE_UPVAL) | 1295 | if ((bl->flags & (FSCOPE_UPVAL|FSCOPE_NOCLOSE)) == FSCOPE_UPVAL) |
1280 | bcemit_AJ(fs, BC_UCLO, bl->nactvar, 0); | 1296 | bcemit_AJ(fs, BC_UCLO, bl->nactvar, 0); |
1281 | if ((bl->flags & FSCOPE_BREAK)) { | 1297 | if ((bl->flags & FSCOPE_BREAK)) { |
@@ -1362,13 +1378,13 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt, void *kptr) | |||
1362 | Node *n = &node[i]; | 1378 | Node *n = &node[i]; |
1363 | if (tvhaskslot(&n->val)) { | 1379 | if (tvhaskslot(&n->val)) { |
1364 | ptrdiff_t kidx = (ptrdiff_t)tvkslot(&n->val); | 1380 | ptrdiff_t kidx = (ptrdiff_t)tvkslot(&n->val); |
1365 | lua_assert(!tvisint(&n->key)); | 1381 | lj_assertFS(!tvisint(&n->key), "unexpected integer key"); |
1366 | if (tvisnum(&n->key)) { | 1382 | if (tvisnum(&n->key)) { |
1367 | TValue *tv = &((TValue *)kptr)[kidx]; | 1383 | TValue *tv = &((TValue *)kptr)[kidx]; |
1368 | if (LJ_DUALNUM) { | 1384 | if (LJ_DUALNUM) { |
1369 | lua_Number nn = numV(&n->key); | 1385 | lua_Number nn = numV(&n->key); |
1370 | int32_t k = lj_num2int(nn); | 1386 | int32_t k = lj_num2int(nn); |
1371 | lua_assert(!tvismzero(&n->key)); | 1387 | lj_assertFS(!tvismzero(&n->key), "unexpected -0 key"); |
1372 | if ((lua_Number)k == nn) | 1388 | if ((lua_Number)k == nn) |
1373 | setintV(tv, k); | 1389 | setintV(tv, k); |
1374 | else | 1390 | else |
@@ -1416,98 +1432,66 @@ static void fs_fixup_line(FuncState *fs, GCproto *pt, | |||
1416 | uint8_t *li = (uint8_t *)lineinfo; | 1432 | uint8_t *li = (uint8_t *)lineinfo; |
1417 | do { | 1433 | do { |
1418 | BCLine delta = base[i].line - first; | 1434 | BCLine delta = base[i].line - first; |
1419 | lua_assert(delta >= 0 && delta < 256); | 1435 | lj_assertFS(delta >= 0 && delta < 256, "bad line delta"); |
1420 | li[i] = (uint8_t)delta; | 1436 | li[i] = (uint8_t)delta; |
1421 | } while (++i < n); | 1437 | } while (++i < n); |
1422 | } else if (LJ_LIKELY(numline < 65536)) { | 1438 | } else if (LJ_LIKELY(numline < 65536)) { |
1423 | uint16_t *li = (uint16_t *)lineinfo; | 1439 | uint16_t *li = (uint16_t *)lineinfo; |
1424 | do { | 1440 | do { |
1425 | BCLine delta = base[i].line - first; | 1441 | BCLine delta = base[i].line - first; |
1426 | lua_assert(delta >= 0 && delta < 65536); | 1442 | lj_assertFS(delta >= 0 && delta < 65536, "bad line delta"); |
1427 | li[i] = (uint16_t)delta; | 1443 | li[i] = (uint16_t)delta; |
1428 | } while (++i < n); | 1444 | } while (++i < n); |
1429 | } else { | 1445 | } else { |
1430 | uint32_t *li = (uint32_t *)lineinfo; | 1446 | uint32_t *li = (uint32_t *)lineinfo; |
1431 | do { | 1447 | do { |
1432 | BCLine delta = base[i].line - first; | 1448 | BCLine delta = base[i].line - first; |
1433 | lua_assert(delta >= 0); | 1449 | lj_assertFS(delta >= 0, "bad line delta"); |
1434 | li[i] = (uint32_t)delta; | 1450 | li[i] = (uint32_t)delta; |
1435 | } while (++i < n); | 1451 | } while (++i < n); |
1436 | } | 1452 | } |
1437 | } | 1453 | } |
1438 | 1454 | ||
1439 | /* Resize buffer if needed. */ | ||
1440 | static LJ_NOINLINE void fs_buf_resize(LexState *ls, MSize len) | ||
1441 | { | ||
1442 | MSize sz = ls->sb.sz * 2; | ||
1443 | while (ls->sb.n + len > sz) sz = sz * 2; | ||
1444 | lj_str_resizebuf(ls->L, &ls->sb, sz); | ||
1445 | } | ||
1446 | |||
1447 | static LJ_AINLINE void fs_buf_need(LexState *ls, MSize len) | ||
1448 | { | ||
1449 | if (LJ_UNLIKELY(ls->sb.n + len > ls->sb.sz)) | ||
1450 | fs_buf_resize(ls, len); | ||
1451 | } | ||
1452 | |||
1453 | /* Add string to buffer. */ | ||
1454 | static void fs_buf_str(LexState *ls, const char *str, MSize len) | ||
1455 | { | ||
1456 | char *p = ls->sb.buf + ls->sb.n; | ||
1457 | MSize i; | ||
1458 | ls->sb.n += len; | ||
1459 | for (i = 0; i < len; i++) p[i] = str[i]; | ||
1460 | } | ||
1461 | |||
1462 | /* Add ULEB128 value to buffer. */ | ||
1463 | static void fs_buf_uleb128(LexState *ls, uint32_t v) | ||
1464 | { | ||
1465 | MSize n = ls->sb.n; | ||
1466 | uint8_t *p = (uint8_t *)ls->sb.buf; | ||
1467 | for (; v >= 0x80; v >>= 7) | ||
1468 | p[n++] = (uint8_t)((v & 0x7f) | 0x80); | ||
1469 | p[n++] = (uint8_t)v; | ||
1470 | ls->sb.n = n; | ||
1471 | } | ||
1472 | |||
1473 | /* Prepare variable info for prototype. */ | 1455 | /* Prepare variable info for prototype. */ |
1474 | static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar) | 1456 | static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar) |
1475 | { | 1457 | { |
1476 | VarInfo *vs =ls->vstack, *ve; | 1458 | VarInfo *vs =ls->vstack, *ve; |
1477 | MSize i, n; | 1459 | MSize i, n; |
1478 | BCPos lastpc; | 1460 | BCPos lastpc; |
1479 | lj_str_resetbuf(&ls->sb); /* Copy to temp. string buffer. */ | 1461 | lj_buf_reset(&ls->sb); /* Copy to temp. string buffer. */ |
1480 | /* Store upvalue names. */ | 1462 | /* Store upvalue names. */ |
1481 | for (i = 0, n = fs->nuv; i < n; i++) { | 1463 | for (i = 0, n = fs->nuv; i < n; i++) { |
1482 | GCstr *s = strref(vs[fs->uvmap[i]].name); | 1464 | GCstr *s = strref(vs[fs->uvmap[i]].name); |
1483 | MSize len = s->len+1; | 1465 | MSize len = s->len+1; |
1484 | fs_buf_need(ls, len); | 1466 | char *p = lj_buf_more(&ls->sb, len); |
1485 | fs_buf_str(ls, strdata(s), len); | 1467 | p = lj_buf_wmem(p, strdata(s), len); |
1468 | ls->sb.w = p; | ||
1486 | } | 1469 | } |
1487 | *ofsvar = ls->sb.n; | 1470 | *ofsvar = sbuflen(&ls->sb); |
1488 | lastpc = 0; | 1471 | lastpc = 0; |
1489 | /* Store local variable names and compressed ranges. */ | 1472 | /* Store local variable names and compressed ranges. */ |
1490 | for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) { | 1473 | for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) { |
1491 | if (!gola_isgotolabel(vs)) { | 1474 | if (!gola_isgotolabel(vs)) { |
1492 | GCstr *s = strref(vs->name); | 1475 | GCstr *s = strref(vs->name); |
1493 | BCPos startpc; | 1476 | BCPos startpc; |
1477 | char *p; | ||
1494 | if ((uintptr_t)s < VARNAME__MAX) { | 1478 | if ((uintptr_t)s < VARNAME__MAX) { |
1495 | fs_buf_need(ls, 1 + 2*5); | 1479 | p = lj_buf_more(&ls->sb, 1 + 2*5); |
1496 | ls->sb.buf[ls->sb.n++] = (uint8_t)(uintptr_t)s; | 1480 | *p++ = (char)(uintptr_t)s; |
1497 | } else { | 1481 | } else { |
1498 | MSize len = s->len+1; | 1482 | MSize len = s->len+1; |
1499 | fs_buf_need(ls, len + 2*5); | 1483 | p = lj_buf_more(&ls->sb, len + 2*5); |
1500 | fs_buf_str(ls, strdata(s), len); | 1484 | p = lj_buf_wmem(p, strdata(s), len); |
1501 | } | 1485 | } |
1502 | startpc = vs->startpc; | 1486 | startpc = vs->startpc; |
1503 | fs_buf_uleb128(ls, startpc-lastpc); | 1487 | p = lj_strfmt_wuleb128(p, startpc-lastpc); |
1504 | fs_buf_uleb128(ls, vs->endpc-startpc); | 1488 | p = lj_strfmt_wuleb128(p, vs->endpc-startpc); |
1489 | ls->sb.w = p; | ||
1505 | lastpc = startpc; | 1490 | lastpc = startpc; |
1506 | } | 1491 | } |
1507 | } | 1492 | } |
1508 | fs_buf_need(ls, 1); | 1493 | lj_buf_putb(&ls->sb, '\0'); /* Terminator for varinfo. */ |
1509 | ls->sb.buf[ls->sb.n++] = '\0'; /* Terminator for varinfo. */ | 1494 | return sbuflen(&ls->sb); |
1510 | return ls->sb.n; | ||
1511 | } | 1495 | } |
1512 | 1496 | ||
1513 | /* Fixup variable info for prototype. */ | 1497 | /* Fixup variable info for prototype. */ |
@@ -1515,7 +1499,7 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar) | |||
1515 | { | 1499 | { |
1516 | setmref(pt->uvinfo, p); | 1500 | setmref(pt->uvinfo, p); |
1517 | setmref(pt->varinfo, (char *)p + ofsvar); | 1501 | setmref(pt->varinfo, (char *)p + ofsvar); |
1518 | memcpy(p, ls->sb.buf, ls->sb.n); /* Copy from temp. string buffer. */ | 1502 | memcpy(p, ls->sb.b, sbuflen(&ls->sb)); /* Copy from temp. buffer. */ |
1519 | } | 1503 | } |
1520 | #else | 1504 | #else |
1521 | 1505 | ||
@@ -1552,7 +1536,7 @@ static void fs_fixup_ret(FuncState *fs) | |||
1552 | } | 1536 | } |
1553 | fs->bl->flags |= FSCOPE_NOCLOSE; /* Handled above. */ | 1537 | fs->bl->flags |= FSCOPE_NOCLOSE; /* Handled above. */ |
1554 | fscope_end(fs); | 1538 | fscope_end(fs); |
1555 | lua_assert(fs->bl == NULL); | 1539 | lj_assertFS(fs->bl == NULL, "bad scope nesting"); |
1556 | /* May need to fixup returns encoded before first function was created. */ | 1540 | /* May need to fixup returns encoded before first function was created. */ |
1557 | if (fs->flags & PROTO_FIXUP_RETURN) { | 1541 | if (fs->flags & PROTO_FIXUP_RETURN) { |
1558 | BCPos pc; | 1542 | BCPos pc; |
@@ -1624,7 +1608,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line) | |||
1624 | L->top--; /* Pop table of constants. */ | 1608 | L->top--; /* Pop table of constants. */ |
1625 | ls->vtop = fs->vbase; /* Reset variable stack. */ | 1609 | ls->vtop = fs->vbase; /* Reset variable stack. */ |
1626 | ls->fs = fs->prev; | 1610 | ls->fs = fs->prev; |
1627 | lua_assert(ls->fs != NULL || ls->token == TK_eof); | 1611 | lj_assertL(ls->fs != NULL || ls->tok == TK_eof, "bad parser state"); |
1628 | return pt; | 1612 | return pt; |
1629 | } | 1613 | } |
1630 | 1614 | ||
@@ -1718,15 +1702,15 @@ static void expr_bracket(LexState *ls, ExpDesc *v) | |||
1718 | } | 1702 | } |
1719 | 1703 | ||
1720 | /* Get value of constant expression. */ | 1704 | /* Get value of constant expression. */ |
1721 | static void expr_kvalue(TValue *v, ExpDesc *e) | 1705 | static void expr_kvalue(FuncState *fs, TValue *v, ExpDesc *e) |
1722 | { | 1706 | { |
1707 | UNUSED(fs); | ||
1723 | if (e->k <= VKTRUE) { | 1708 | if (e->k <= VKTRUE) { |
1724 | setitype(v, ~(uint32_t)e->k); | 1709 | setpriV(v, ~(uint32_t)e->k); |
1725 | } else if (e->k == VKSTR) { | 1710 | } else if (e->k == VKSTR) { |
1726 | setgcref(v->gcr, obj2gco(e->u.sval)); | 1711 | setgcVraw(v, obj2gco(e->u.sval), LJ_TSTR); |
1727 | setitype(v, LJ_TSTR); | ||
1728 | } else { | 1712 | } else { |
1729 | lua_assert(tvisnumber(expr_numtv(e))); | 1713 | lj_assertFS(tvisnumber(expr_numtv(e)), "bad number constant"); |
1730 | *v = *expr_numtv(e); | 1714 | *v = *expr_numtv(e); |
1731 | } | 1715 | } |
1732 | } | 1716 | } |
@@ -1746,15 +1730,15 @@ static void expr_table(LexState *ls, ExpDesc *e) | |||
1746 | bcreg_reserve(fs, 1); | 1730 | bcreg_reserve(fs, 1); |
1747 | freg++; | 1731 | freg++; |
1748 | lex_check(ls, '{'); | 1732 | lex_check(ls, '{'); |
1749 | while (ls->token != '}') { | 1733 | while (ls->tok != '}') { |
1750 | ExpDesc key, val; | 1734 | ExpDesc key, val; |
1751 | vcall = 0; | 1735 | vcall = 0; |
1752 | if (ls->token == '[') { | 1736 | if (ls->tok == '[') { |
1753 | expr_bracket(ls, &key); /* Already calls expr_toval. */ | 1737 | expr_bracket(ls, &key); /* Already calls expr_toval. */ |
1754 | if (!expr_isk(&key)) expr_index(fs, e, &key); | 1738 | if (!expr_isk(&key)) expr_index(fs, e, &key); |
1755 | if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++; | 1739 | if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++; |
1756 | lex_check(ls, '='); | 1740 | lex_check(ls, '='); |
1757 | } else if ((ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) && | 1741 | } else if ((ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) && |
1758 | lj_lex_lookahead(ls) == '=') { | 1742 | lj_lex_lookahead(ls) == '=') { |
1759 | expr_str(ls, &key); | 1743 | expr_str(ls, &key); |
1760 | lex_check(ls, '='); | 1744 | lex_check(ls, '='); |
@@ -1776,11 +1760,11 @@ static void expr_table(LexState *ls, ExpDesc *e) | |||
1776 | fs->bcbase[pc].ins = BCINS_AD(BC_TDUP, freg-1, kidx); | 1760 | fs->bcbase[pc].ins = BCINS_AD(BC_TDUP, freg-1, kidx); |
1777 | } | 1761 | } |
1778 | vcall = 0; | 1762 | vcall = 0; |
1779 | expr_kvalue(&k, &key); | 1763 | expr_kvalue(fs, &k, &key); |
1780 | v = lj_tab_set(fs->L, t, &k); | 1764 | v = lj_tab_set(fs->L, t, &k); |
1781 | lj_gc_anybarriert(fs->L, t); | 1765 | lj_gc_anybarriert(fs->L, t); |
1782 | if (expr_isk_nojump(&val)) { /* Add const key/value to template table. */ | 1766 | if (expr_isk_nojump(&val)) { /* Add const key/value to template table. */ |
1783 | expr_kvalue(v, &val); | 1767 | expr_kvalue(fs, v, &val); |
1784 | } else { /* Otherwise create dummy string key (avoids lj_tab_newkey). */ | 1768 | } else { /* Otherwise create dummy string key (avoids lj_tab_newkey). */ |
1785 | settabV(fs->L, v, t); /* Preserve key with table itself as value. */ | 1769 | settabV(fs->L, v, t); /* Preserve key with table itself as value. */ |
1786 | fixt = 1; /* Fix this later, after all resizes. */ | 1770 | fixt = 1; /* Fix this later, after all resizes. */ |
@@ -1799,8 +1783,9 @@ static void expr_table(LexState *ls, ExpDesc *e) | |||
1799 | if (vcall) { | 1783 | if (vcall) { |
1800 | BCInsLine *ilp = &fs->bcbase[fs->pc-1]; | 1784 | BCInsLine *ilp = &fs->bcbase[fs->pc-1]; |
1801 | ExpDesc en; | 1785 | ExpDesc en; |
1802 | lua_assert(bc_a(ilp->ins) == freg && | 1786 | lj_assertFS(bc_a(ilp->ins) == freg && |
1803 | bc_op(ilp->ins) == (narr > 256 ? BC_TSETV : BC_TSETB)); | 1787 | bc_op(ilp->ins) == (narr > 256 ? BC_TSETV : BC_TSETB), |
1788 | "bad CALL code generation"); | ||
1804 | expr_init(&en, VKNUM, 0); | 1789 | expr_init(&en, VKNUM, 0); |
1805 | en.u.nval.u32.lo = narr-1; | 1790 | en.u.nval.u32.lo = narr-1; |
1806 | en.u.nval.u32.hi = 0x43300000; /* Biased integer to avoid denormals. */ | 1791 | en.u.nval.u32.hi = 0x43300000; /* Biased integer to avoid denormals. */ |
@@ -1830,7 +1815,7 @@ static void expr_table(LexState *ls, ExpDesc *e) | |||
1830 | for (i = 0; i <= hmask; i++) { | 1815 | for (i = 0; i <= hmask; i++) { |
1831 | Node *n = &node[i]; | 1816 | Node *n = &node[i]; |
1832 | if (tvistab(&n->val)) { | 1817 | if (tvistab(&n->val)) { |
1833 | lua_assert(tabV(&n->val) == t); | 1818 | lj_assertFS(tabV(&n->val) == t, "bad dummy key in template table"); |
1834 | setnilV(&n->val); /* Turn value into nil. */ | 1819 | setnilV(&n->val); /* Turn value into nil. */ |
1835 | } | 1820 | } |
1836 | } | 1821 | } |
@@ -1847,11 +1832,11 @@ static BCReg parse_params(LexState *ls, int needself) | |||
1847 | lex_check(ls, '('); | 1832 | lex_check(ls, '('); |
1848 | if (needself) | 1833 | if (needself) |
1849 | var_new_lit(ls, nparams++, "self"); | 1834 | var_new_lit(ls, nparams++, "self"); |
1850 | if (ls->token != ')') { | 1835 | if (ls->tok != ')') { |
1851 | do { | 1836 | do { |
1852 | if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { | 1837 | if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) { |
1853 | var_new(ls, nparams++, lex_str(ls)); | 1838 | var_new(ls, nparams++, lex_str(ls)); |
1854 | } else if (ls->token == TK_dots) { | 1839 | } else if (ls->tok == TK_dots) { |
1855 | lj_lex_next(ls); | 1840 | lj_lex_next(ls); |
1856 | fs->flags |= PROTO_VARARG; | 1841 | fs->flags |= PROTO_VARARG; |
1857 | break; | 1842 | break; |
@@ -1861,7 +1846,7 @@ static BCReg parse_params(LexState *ls, int needself) | |||
1861 | } while (lex_opt(ls, ',')); | 1846 | } while (lex_opt(ls, ',')); |
1862 | } | 1847 | } |
1863 | var_add(ls, nparams); | 1848 | var_add(ls, nparams); |
1864 | lua_assert(fs->nactvar == nparams); | 1849 | lj_assertFS(fs->nactvar == nparams, "bad regalloc"); |
1865 | bcreg_reserve(fs, nparams); | 1850 | bcreg_reserve(fs, nparams); |
1866 | lex_check(ls, ')'); | 1851 | lex_check(ls, ')'); |
1867 | return nparams; | 1852 | return nparams; |
@@ -1885,7 +1870,7 @@ static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line) | |||
1885 | fs.bclim = pfs->bclim - pfs->pc; | 1870 | fs.bclim = pfs->bclim - pfs->pc; |
1886 | bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */ | 1871 | bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */ |
1887 | parse_chunk(ls); | 1872 | parse_chunk(ls); |
1888 | if (ls->token != TK_end) lex_match(ls, TK_end, TK_function, line); | 1873 | if (ls->tok != TK_end) lex_match(ls, TK_end, TK_function, line); |
1889 | pt = fs_finish(ls, (ls->lastline = ls->linenumber)); | 1874 | pt = fs_finish(ls, (ls->lastline = ls->linenumber)); |
1890 | pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */ | 1875 | pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */ |
1891 | pfs->bclim = (BCPos)(ls->sizebcstack - oldbase); | 1876 | pfs->bclim = (BCPos)(ls->sizebcstack - oldbase); |
@@ -1924,13 +1909,13 @@ static void parse_args(LexState *ls, ExpDesc *e) | |||
1924 | BCIns ins; | 1909 | BCIns ins; |
1925 | BCReg base; | 1910 | BCReg base; |
1926 | BCLine line = ls->linenumber; | 1911 | BCLine line = ls->linenumber; |
1927 | if (ls->token == '(') { | 1912 | if (ls->tok == '(') { |
1928 | #if !LJ_52 | 1913 | #if !LJ_52 |
1929 | if (line != ls->lastline) | 1914 | if (line != ls->lastline) |
1930 | err_syntax(ls, LJ_ERR_XAMBIG); | 1915 | err_syntax(ls, LJ_ERR_XAMBIG); |
1931 | #endif | 1916 | #endif |
1932 | lj_lex_next(ls); | 1917 | lj_lex_next(ls); |
1933 | if (ls->token == ')') { /* f(). */ | 1918 | if (ls->tok == ')') { /* f(). */ |
1934 | args.k = VVOID; | 1919 | args.k = VVOID; |
1935 | } else { | 1920 | } else { |
1936 | expr_list(ls, &args); | 1921 | expr_list(ls, &args); |
@@ -1938,24 +1923,24 @@ static void parse_args(LexState *ls, ExpDesc *e) | |||
1938 | setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */ | 1923 | setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */ |
1939 | } | 1924 | } |
1940 | lex_match(ls, ')', '(', line); | 1925 | lex_match(ls, ')', '(', line); |
1941 | } else if (ls->token == '{') { | 1926 | } else if (ls->tok == '{') { |
1942 | expr_table(ls, &args); | 1927 | expr_table(ls, &args); |
1943 | } else if (ls->token == TK_string) { | 1928 | } else if (ls->tok == TK_string) { |
1944 | expr_init(&args, VKSTR, 0); | 1929 | expr_init(&args, VKSTR, 0); |
1945 | args.u.sval = strV(&ls->tokenval); | 1930 | args.u.sval = strV(&ls->tokval); |
1946 | lj_lex_next(ls); | 1931 | lj_lex_next(ls); |
1947 | } else { | 1932 | } else { |
1948 | err_syntax(ls, LJ_ERR_XFUNARG); | 1933 | err_syntax(ls, LJ_ERR_XFUNARG); |
1949 | return; /* Silence compiler. */ | 1934 | return; /* Silence compiler. */ |
1950 | } | 1935 | } |
1951 | lua_assert(e->k == VNONRELOC); | 1936 | lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k); |
1952 | base = e->u.s.info; /* Base register for call. */ | 1937 | base = e->u.s.info; /* Base register for call. */ |
1953 | if (args.k == VCALL) { | 1938 | if (args.k == VCALL) { |
1954 | ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1); | 1939 | ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - LJ_FR2); |
1955 | } else { | 1940 | } else { |
1956 | if (args.k != VVOID) | 1941 | if (args.k != VVOID) |
1957 | expr_tonextreg(fs, &args); | 1942 | expr_tonextreg(fs, &args); |
1958 | ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base); | 1943 | ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base - LJ_FR2); |
1959 | } | 1944 | } |
1960 | expr_init(e, VCALL, bcemit_INS(fs, ins)); | 1945 | expr_init(e, VCALL, bcemit_INS(fs, ins)); |
1961 | e->u.s.aux = base; | 1946 | e->u.s.aux = base; |
@@ -1968,33 +1953,34 @@ static void expr_primary(LexState *ls, ExpDesc *v) | |||
1968 | { | 1953 | { |
1969 | FuncState *fs = ls->fs; | 1954 | FuncState *fs = ls->fs; |
1970 | /* Parse prefix expression. */ | 1955 | /* Parse prefix expression. */ |
1971 | if (ls->token == '(') { | 1956 | if (ls->tok == '(') { |
1972 | BCLine line = ls->linenumber; | 1957 | BCLine line = ls->linenumber; |
1973 | lj_lex_next(ls); | 1958 | lj_lex_next(ls); |
1974 | expr(ls, v); | 1959 | expr(ls, v); |
1975 | lex_match(ls, ')', '(', line); | 1960 | lex_match(ls, ')', '(', line); |
1976 | expr_discharge(ls->fs, v); | 1961 | expr_discharge(ls->fs, v); |
1977 | } else if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { | 1962 | } else if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) { |
1978 | var_lookup(ls, v); | 1963 | var_lookup(ls, v); |
1979 | } else { | 1964 | } else { |
1980 | err_syntax(ls, LJ_ERR_XSYMBOL); | 1965 | err_syntax(ls, LJ_ERR_XSYMBOL); |
1981 | } | 1966 | } |
1982 | for (;;) { /* Parse multiple expression suffixes. */ | 1967 | for (;;) { /* Parse multiple expression suffixes. */ |
1983 | if (ls->token == '.') { | 1968 | if (ls->tok == '.') { |
1984 | expr_field(ls, v); | 1969 | expr_field(ls, v); |
1985 | } else if (ls->token == '[') { | 1970 | } else if (ls->tok == '[') { |
1986 | ExpDesc key; | 1971 | ExpDesc key; |
1987 | expr_toanyreg(fs, v); | 1972 | expr_toanyreg(fs, v); |
1988 | expr_bracket(ls, &key); | 1973 | expr_bracket(ls, &key); |
1989 | expr_index(fs, v, &key); | 1974 | expr_index(fs, v, &key); |
1990 | } else if (ls->token == ':') { | 1975 | } else if (ls->tok == ':') { |
1991 | ExpDesc key; | 1976 | ExpDesc key; |
1992 | lj_lex_next(ls); | 1977 | lj_lex_next(ls); |
1993 | expr_str(ls, &key); | 1978 | expr_str(ls, &key); |
1994 | bcemit_method(fs, v, &key); | 1979 | bcemit_method(fs, v, &key); |
1995 | parse_args(ls, v); | 1980 | parse_args(ls, v); |
1996 | } else if (ls->token == '(' || ls->token == TK_string || ls->token == '{') { | 1981 | } else if (ls->tok == '(' || ls->tok == TK_string || ls->tok == '{') { |
1997 | expr_tonextreg(fs, v); | 1982 | expr_tonextreg(fs, v); |
1983 | if (LJ_FR2) bcreg_reserve(fs, 1); | ||
1998 | parse_args(ls, v); | 1984 | parse_args(ls, v); |
1999 | } else { | 1985 | } else { |
2000 | break; | 1986 | break; |
@@ -2005,14 +1991,14 @@ static void expr_primary(LexState *ls, ExpDesc *v) | |||
2005 | /* Parse simple expression. */ | 1991 | /* Parse simple expression. */ |
2006 | static void expr_simple(LexState *ls, ExpDesc *v) | 1992 | static void expr_simple(LexState *ls, ExpDesc *v) |
2007 | { | 1993 | { |
2008 | switch (ls->token) { | 1994 | switch (ls->tok) { |
2009 | case TK_number: | 1995 | case TK_number: |
2010 | expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokenval)) ? VKCDATA : VKNUM, 0); | 1996 | expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokval)) ? VKCDATA : VKNUM, 0); |
2011 | copyTV(ls->L, &v->u.nval, &ls->tokenval); | 1997 | copyTV(ls->L, &v->u.nval, &ls->tokval); |
2012 | break; | 1998 | break; |
2013 | case TK_string: | 1999 | case TK_string: |
2014 | expr_init(v, VKSTR, 0); | 2000 | expr_init(v, VKSTR, 0); |
2015 | v->u.sval = strV(&ls->tokenval); | 2001 | v->u.sval = strV(&ls->tokval); |
2016 | break; | 2002 | break; |
2017 | case TK_nil: | 2003 | case TK_nil: |
2018 | expr_init(v, VKNIL, 0); | 2004 | expr_init(v, VKNIL, 0); |
@@ -2100,11 +2086,11 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit); | |||
2100 | static void expr_unop(LexState *ls, ExpDesc *v) | 2086 | static void expr_unop(LexState *ls, ExpDesc *v) |
2101 | { | 2087 | { |
2102 | BCOp op; | 2088 | BCOp op; |
2103 | if (ls->token == TK_not) { | 2089 | if (ls->tok == TK_not) { |
2104 | op = BC_NOT; | 2090 | op = BC_NOT; |
2105 | } else if (ls->token == '-') { | 2091 | } else if (ls->tok == '-') { |
2106 | op = BC_UNM; | 2092 | op = BC_UNM; |
2107 | } else if (ls->token == '#') { | 2093 | } else if (ls->tok == '#') { |
2108 | op = BC_LEN; | 2094 | op = BC_LEN; |
2109 | } else { | 2095 | } else { |
2110 | expr_simple(ls, v); | 2096 | expr_simple(ls, v); |
@@ -2121,7 +2107,7 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit) | |||
2121 | BinOpr op; | 2107 | BinOpr op; |
2122 | synlevel_begin(ls); | 2108 | synlevel_begin(ls); |
2123 | expr_unop(ls, v); | 2109 | expr_unop(ls, v); |
2124 | op = token2binop(ls->token); | 2110 | op = token2binop(ls->tok); |
2125 | while (op != OPR_NOBINOPR && priority[op].left > limit) { | 2111 | while (op != OPR_NOBINOPR && priority[op].left > limit) { |
2126 | ExpDesc v2; | 2112 | ExpDesc v2; |
2127 | BinOpr nextop; | 2113 | BinOpr nextop; |
@@ -2310,9 +2296,9 @@ static void parse_func(LexState *ls, BCLine line) | |||
2310 | lj_lex_next(ls); /* Skip 'function'. */ | 2296 | lj_lex_next(ls); /* Skip 'function'. */ |
2311 | /* Parse function name. */ | 2297 | /* Parse function name. */ |
2312 | var_lookup(ls, &v); | 2298 | var_lookup(ls, &v); |
2313 | while (ls->token == '.') /* Multiple dot-separated fields. */ | 2299 | while (ls->tok == '.') /* Multiple dot-separated fields. */ |
2314 | expr_field(ls, &v); | 2300 | expr_field(ls, &v); |
2315 | if (ls->token == ':') { /* Optional colon to signify method call. */ | 2301 | if (ls->tok == ':') { /* Optional colon to signify method call. */ |
2316 | needself = 1; | 2302 | needself = 1; |
2317 | expr_field(ls, &v); | 2303 | expr_field(ls, &v); |
2318 | } | 2304 | } |
@@ -2325,9 +2311,9 @@ static void parse_func(LexState *ls, BCLine line) | |||
2325 | /* -- Control transfer statements ----------------------------------------- */ | 2311 | /* -- Control transfer statements ----------------------------------------- */ |
2326 | 2312 | ||
2327 | /* Check for end of block. */ | 2313 | /* Check for end of block. */ |
2328 | static int endofblock(LexToken token) | 2314 | static int parse_isend(LexToken tok) |
2329 | { | 2315 | { |
2330 | switch (token) { | 2316 | switch (tok) { |
2331 | case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof: | 2317 | case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof: |
2332 | return 1; | 2318 | return 1; |
2333 | default: | 2319 | default: |
@@ -2342,7 +2328,7 @@ static void parse_return(LexState *ls) | |||
2342 | FuncState *fs = ls->fs; | 2328 | FuncState *fs = ls->fs; |
2343 | lj_lex_next(ls); /* Skip 'return'. */ | 2329 | lj_lex_next(ls); /* Skip 'return'. */ |
2344 | fs->flags |= PROTO_HAS_RETURN; | 2330 | fs->flags |= PROTO_HAS_RETURN; |
2345 | if (endofblock(ls->token) || ls->token == ';') { /* Bare return. */ | 2331 | if (parse_isend(ls->tok) || ls->tok == ';') { /* Bare return. */ |
2346 | ins = BCINS_AD(BC_RET0, 0, 1); | 2332 | ins = BCINS_AD(BC_RET0, 0, 1); |
2347 | } else { /* Return with one or more values. */ | 2333 | } else { /* Return with one or more values. */ |
2348 | ExpDesc e; /* Receives the _last_ expression in the list. */ | 2334 | ExpDesc e; /* Receives the _last_ expression in the list. */ |
@@ -2408,18 +2394,18 @@ static void parse_label(LexState *ls) | |||
2408 | lex_check(ls, TK_label); | 2394 | lex_check(ls, TK_label); |
2409 | /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */ | 2395 | /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */ |
2410 | for (;;) { | 2396 | for (;;) { |
2411 | if (ls->token == TK_label) { | 2397 | if (ls->tok == TK_label) { |
2412 | synlevel_begin(ls); | 2398 | synlevel_begin(ls); |
2413 | parse_label(ls); | 2399 | parse_label(ls); |
2414 | synlevel_end(ls); | 2400 | synlevel_end(ls); |
2415 | } else if (LJ_52 && ls->token == ';') { | 2401 | } else if (LJ_52 && ls->tok == ';') { |
2416 | lj_lex_next(ls); | 2402 | lj_lex_next(ls); |
2417 | } else { | 2403 | } else { |
2418 | break; | 2404 | break; |
2419 | } | 2405 | } |
2420 | } | 2406 | } |
2421 | /* Trailing label is considered to be outside of scope. */ | 2407 | /* Trailing label is considered to be outside of scope. */ |
2422 | if (endofblock(ls->token) && ls->token != TK_until) | 2408 | if (parse_isend(ls->tok) && ls->tok != TK_until) |
2423 | ls->vstack[idx].slot = fs->bl->nactvar; | 2409 | ls->vstack[idx].slot = fs->bl->nactvar; |
2424 | gola_resolve(ls, fs->bl, idx); | 2410 | gola_resolve(ls, fs->bl, idx); |
2425 | } | 2411 | } |
@@ -2575,7 +2561,8 @@ static void parse_for_iter(LexState *ls, GCstr *indexname) | |||
2575 | lex_check(ls, TK_in); | 2561 | lex_check(ls, TK_in); |
2576 | line = ls->linenumber; | 2562 | line = ls->linenumber; |
2577 | assign_adjust(ls, 3, expr_list(ls, &e), &e); | 2563 | assign_adjust(ls, 3, expr_list(ls, &e), &e); |
2578 | bcreg_bump(fs, 3); /* The iterator needs another 3 slots (func + 2 args). */ | 2564 | /* The iterator needs another 3 [4] slots (func [pc] | state ctl). */ |
2565 | bcreg_bump(fs, 3+LJ_FR2); | ||
2579 | isnext = (nvars <= 5 && predict_next(ls, fs, exprpc)); | 2566 | isnext = (nvars <= 5 && predict_next(ls, fs, exprpc)); |
2580 | var_add(ls, 3); /* Hidden control variables. */ | 2567 | var_add(ls, 3); /* Hidden control variables. */ |
2581 | lex_check(ls, TK_do); | 2568 | lex_check(ls, TK_do); |
@@ -2603,9 +2590,9 @@ static void parse_for(LexState *ls, BCLine line) | |||
2603 | fscope_begin(fs, &bl, FSCOPE_LOOP); | 2590 | fscope_begin(fs, &bl, FSCOPE_LOOP); |
2604 | lj_lex_next(ls); /* Skip 'for'. */ | 2591 | lj_lex_next(ls); /* Skip 'for'. */ |
2605 | varname = lex_str(ls); /* Get first variable name. */ | 2592 | varname = lex_str(ls); /* Get first variable name. */ |
2606 | if (ls->token == '=') | 2593 | if (ls->tok == '=') |
2607 | parse_for_num(ls, varname, line); | 2594 | parse_for_num(ls, varname, line); |
2608 | else if (ls->token == ',' || ls->token == TK_in) | 2595 | else if (ls->tok == ',' || ls->tok == TK_in) |
2609 | parse_for_iter(ls, varname); | 2596 | parse_for_iter(ls, varname); |
2610 | else | 2597 | else |
2611 | err_syntax(ls, LJ_ERR_XFOR); | 2598 | err_syntax(ls, LJ_ERR_XFOR); |
@@ -2631,12 +2618,12 @@ static void parse_if(LexState *ls, BCLine line) | |||
2631 | BCPos flist; | 2618 | BCPos flist; |
2632 | BCPos escapelist = NO_JMP; | 2619 | BCPos escapelist = NO_JMP; |
2633 | flist = parse_then(ls); | 2620 | flist = parse_then(ls); |
2634 | while (ls->token == TK_elseif) { /* Parse multiple 'elseif' blocks. */ | 2621 | while (ls->tok == TK_elseif) { /* Parse multiple 'elseif' blocks. */ |
2635 | jmp_append(fs, &escapelist, bcemit_jmp(fs)); | 2622 | jmp_append(fs, &escapelist, bcemit_jmp(fs)); |
2636 | jmp_tohere(fs, flist); | 2623 | jmp_tohere(fs, flist); |
2637 | flist = parse_then(ls); | 2624 | flist = parse_then(ls); |
2638 | } | 2625 | } |
2639 | if (ls->token == TK_else) { /* Parse optional 'else' block. */ | 2626 | if (ls->tok == TK_else) { /* Parse optional 'else' block. */ |
2640 | jmp_append(fs, &escapelist, bcemit_jmp(fs)); | 2627 | jmp_append(fs, &escapelist, bcemit_jmp(fs)); |
2641 | jmp_tohere(fs, flist); | 2628 | jmp_tohere(fs, flist); |
2642 | lj_lex_next(ls); /* Skip 'else'. */ | 2629 | lj_lex_next(ls); /* Skip 'else'. */ |
@@ -2654,7 +2641,7 @@ static void parse_if(LexState *ls, BCLine line) | |||
2654 | static int parse_stmt(LexState *ls) | 2641 | static int parse_stmt(LexState *ls) |
2655 | { | 2642 | { |
2656 | BCLine line = ls->linenumber; | 2643 | BCLine line = ls->linenumber; |
2657 | switch (ls->token) { | 2644 | switch (ls->tok) { |
2658 | case TK_if: | 2645 | case TK_if: |
2659 | parse_if(ls, line); | 2646 | parse_if(ls, line); |
2660 | break; | 2647 | break; |
@@ -2713,11 +2700,12 @@ static void parse_chunk(LexState *ls) | |||
2713 | { | 2700 | { |
2714 | int islast = 0; | 2701 | int islast = 0; |
2715 | synlevel_begin(ls); | 2702 | synlevel_begin(ls); |
2716 | while (!islast && !endofblock(ls->token)) { | 2703 | while (!islast && !parse_isend(ls->tok)) { |
2717 | islast = parse_stmt(ls); | 2704 | islast = parse_stmt(ls); |
2718 | lex_opt(ls, ';'); | 2705 | lex_opt(ls, ';'); |
2719 | lua_assert(ls->fs->framesize >= ls->fs->freereg && | 2706 | lj_assertLS(ls->fs->framesize >= ls->fs->freereg && |
2720 | ls->fs->freereg >= ls->fs->nactvar); | 2707 | ls->fs->freereg >= ls->fs->nactvar, |
2708 | "bad regalloc"); | ||
2721 | ls->fs->freereg = ls->fs->nactvar; /* Free registers after each stmt. */ | 2709 | ls->fs->freereg = ls->fs->nactvar; /* Free registers after each stmt. */ |
2722 | } | 2710 | } |
2723 | synlevel_end(ls); | 2711 | synlevel_end(ls); |
@@ -2748,13 +2736,12 @@ GCproto *lj_parse(LexState *ls) | |||
2748 | bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */ | 2736 | bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */ |
2749 | lj_lex_next(ls); /* Read-ahead first token. */ | 2737 | lj_lex_next(ls); /* Read-ahead first token. */ |
2750 | parse_chunk(ls); | 2738 | parse_chunk(ls); |
2751 | if (ls->token != TK_eof) | 2739 | if (ls->tok != TK_eof) |
2752 | err_token(ls, TK_eof); | 2740 | err_token(ls, TK_eof); |
2753 | pt = fs_finish(ls, ls->linenumber); | 2741 | pt = fs_finish(ls, ls->linenumber); |
2754 | L->top--; /* Drop chunkname. */ | 2742 | L->top--; /* Drop chunkname. */ |
2755 | lua_assert(fs.prev == NULL); | 2743 | lj_assertL(fs.prev == NULL && ls->fs == NULL, "mismatched frame nesting"); |
2756 | lua_assert(ls->fs == NULL); | 2744 | lj_assertL(pt->sizeuv == 0, "toplevel proto has upvalues"); |
2757 | lua_assert(pt->sizeuv == 0); | ||
2758 | return pt; | 2745 | return pt; |
2759 | } | 2746 | } |
2760 | 2747 | ||
diff --git a/src/lj_prng.c b/src/lj_prng.c new file mode 100644 index 00000000..a9bd350a --- /dev/null +++ b/src/lj_prng.c | |||
@@ -0,0 +1,250 @@ | |||
1 | /* | ||
2 | ** Pseudo-random number generation. | ||
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_prng_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | /* To get the syscall prototype. */ | ||
10 | #if defined(__linux__) && !defined(_GNU_SOURCE) | ||
11 | #define _GNU_SOURCE | ||
12 | #endif | ||
13 | |||
14 | #include "lj_def.h" | ||
15 | #include "lj_arch.h" | ||
16 | #include "lj_prng.h" | ||
17 | |||
18 | /* -- PRNG step function -------------------------------------------------- */ | ||
19 | |||
20 | /* This implements a Tausworthe PRNG with period 2^223. Based on: | ||
21 | ** Tables of maximally-equidistributed combined LFSR generators, | ||
22 | ** Pierre L'Ecuyer, 1991, table 3, 1st entry. | ||
23 | ** Full-period ME-CF generator with L=64, J=4, k=223, N1=49. | ||
24 | ** | ||
25 | ** Important note: This PRNG is NOT suitable for cryptographic use! | ||
26 | ** | ||
27 | ** But it works fine for math.random(), which has an API that's not | ||
28 | ** suitable for cryptography, anyway. | ||
29 | ** | ||
30 | ** When used as a securely seeded global PRNG, it substantially raises | ||
31 | ** the difficulty for various attacks on the VM. | ||
32 | */ | ||
33 | |||
34 | /* Update generator i and compute a running xor of all states. */ | ||
35 | #define TW223_GEN(rs, z, r, i, k, q, s) \ | ||
36 | z = rs->u[i]; \ | ||
37 | z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \ | ||
38 | r ^= z; rs->u[i] = z; | ||
39 | |||
40 | #define TW223_STEP(rs, z, r) \ | ||
41 | TW223_GEN(rs, z, r, 0, 63, 31, 18) \ | ||
42 | TW223_GEN(rs, z, r, 1, 58, 19, 28) \ | ||
43 | TW223_GEN(rs, z, r, 2, 55, 24, 7) \ | ||
44 | TW223_GEN(rs, z, r, 3, 47, 21, 8) | ||
45 | |||
46 | /* PRNG step function with uint64_t result. */ | ||
47 | LJ_NOINLINE uint64_t LJ_FASTCALL lj_prng_u64(PRNGState *rs) | ||
48 | { | ||
49 | uint64_t z, r = 0; | ||
50 | TW223_STEP(rs, z, r) | ||
51 | return r; | ||
52 | } | ||
53 | |||
54 | /* PRNG step function with double in uint64_t result. */ | ||
55 | LJ_NOINLINE uint64_t LJ_FASTCALL lj_prng_u64d(PRNGState *rs) | ||
56 | { | ||
57 | uint64_t z, r = 0; | ||
58 | TW223_STEP(rs, z, r) | ||
59 | /* Returns a double bit pattern in the range 1.0 <= d < 2.0. */ | ||
60 | return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000); | ||
61 | } | ||
62 | |||
63 | /* Condition seed: ensure k[i] MSB of u[i] are non-zero. */ | ||
64 | static LJ_AINLINE void lj_prng_condition(PRNGState *rs) | ||
65 | { | ||
66 | if (rs->u[0] < (1u << 1)) rs->u[0] += (1u << 1); | ||
67 | if (rs->u[1] < (1u << 6)) rs->u[1] += (1u << 6); | ||
68 | if (rs->u[2] < (1u << 9)) rs->u[2] += (1u << 9); | ||
69 | if (rs->u[3] < (1u << 17)) rs->u[3] += (1u << 17); | ||
70 | } | ||
71 | |||
72 | /* -- PRNG seeding from OS ------------------------------------------------ */ | ||
73 | |||
74 | #if LUAJIT_SECURITY_PRNG == 0 | ||
75 | |||
76 | /* Nothing to define. */ | ||
77 | |||
78 | #elif LJ_TARGET_XBOX360 | ||
79 | |||
80 | extern int XNetRandom(void *buf, unsigned int len); | ||
81 | |||
82 | #elif LJ_TARGET_PS3 | ||
83 | |||
84 | extern int sys_get_random_number(void *buf, uint64_t len); | ||
85 | |||
86 | #elif LJ_TARGET_PS4 || LJ_TARGET_PSVITA | ||
87 | |||
88 | extern int sceRandomGetRandomNumber(void *buf, size_t len); | ||
89 | |||
90 | #elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOXONE | ||
91 | |||
92 | #define WIN32_LEAN_AND_MEAN | ||
93 | #include <windows.h> | ||
94 | |||
95 | #if LJ_TARGET_UWP || LJ_TARGET_XBOXONE | ||
96 | /* Must use BCryptGenRandom. */ | ||
97 | #include <bcrypt.h> | ||
98 | #pragma comment(lib, "bcrypt.lib") | ||
99 | #else | ||
100 | /* If you wonder about this mess, then search online for RtlGenRandom. */ | ||
101 | typedef BOOLEAN (WINAPI *PRGR)(void *buf, ULONG len); | ||
102 | static PRGR libfunc_rgr; | ||
103 | #endif | ||
104 | |||
105 | #elif LJ_TARGET_POSIX | ||
106 | |||
107 | #if LJ_TARGET_LINUX | ||
108 | /* Avoid a dependency on glibc 2.25+ and use the getrandom syscall instead. */ | ||
109 | #include <sys/syscall.h> | ||
110 | #else | ||
111 | |||
112 | #if LJ_TARGET_OSX && !LJ_TARGET_IOS | ||
113 | /* | ||
114 | ** In their infinite wisdom Apple decided to disallow getentropy() in the | ||
115 | ** iOS App Store. Even though the call is common to all BSD-ish OS, it's | ||
116 | ** recommended by Apple in their own security-related docs, and, to top | ||
117 | ** off the foolery, /dev/urandom is handled by the same kernel code, | ||
118 | ** yet accessing it is actually permitted (but less efficient). | ||
119 | */ | ||
120 | #include <Availability.h> | ||
121 | #if __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200 | ||
122 | #define LJ_TARGET_HAS_GETENTROPY 1 | ||
123 | #endif | ||
124 | #elif (LJ_TARGET_BSD && !defined(__NetBSD__)) || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN | ||
125 | #define LJ_TARGET_HAS_GETENTROPY 1 | ||
126 | #endif | ||
127 | |||
128 | #if LJ_TARGET_HAS_GETENTROPY | ||
129 | extern int getentropy(void *buf, size_t len); | ||
130 | #ifdef __ELF__ | ||
131 | __attribute__((weak)) | ||
132 | #endif | ||
133 | ; | ||
134 | #endif | ||
135 | |||
136 | #endif | ||
137 | |||
138 | /* For the /dev/urandom fallback. */ | ||
139 | #include <fcntl.h> | ||
140 | #include <unistd.h> | ||
141 | |||
142 | #endif | ||
143 | |||
144 | #if LUAJIT_SECURITY_PRNG == 0 | ||
145 | |||
146 | /* If you really don't care about security, then define | ||
147 | ** LUAJIT_SECURITY_PRNG=0. This yields a predictable seed | ||
148 | ** and provides NO SECURITY against various attacks on the VM. | ||
149 | ** | ||
150 | ** BTW: This is NOT the way to get predictable table iteration, | ||
151 | ** predictable trace generation, predictable bytecode generation, etc. | ||
152 | */ | ||
153 | int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs) | ||
154 | { | ||
155 | lj_prng_seed_fixed(rs); /* The fixed seed is already conditioned. */ | ||
156 | return 1; | ||
157 | } | ||
158 | |||
159 | #else | ||
160 | |||
161 | /* Securely seed PRNG from system entropy. Returns 0 on failure. */ | ||
162 | int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs) | ||
163 | { | ||
164 | #if LJ_TARGET_XBOX360 | ||
165 | |||
166 | if (XNetRandom(rs->u, (unsigned int)sizeof(rs->u)) == 0) | ||
167 | goto ok; | ||
168 | |||
169 | #elif LJ_TARGET_PS3 | ||
170 | |||
171 | if (sys_get_random_number(rs->u, sizeof(rs->u)) == 0) | ||
172 | goto ok; | ||
173 | |||
174 | #elif LJ_TARGET_PS4 || LJ_TARGET_PSVITA | ||
175 | |||
176 | if (sceRandomGetRandomNumber(rs->u, sizeof(rs->u)) == 0) | ||
177 | goto ok; | ||
178 | |||
179 | #elif LJ_TARGET_UWP || LJ_TARGET_XBOXONE | ||
180 | |||
181 | if (BCryptGenRandom(NULL, (PUCHAR)(rs->u), (ULONG)sizeof(rs->u), | ||
182 | BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0) | ||
183 | goto ok; | ||
184 | |||
185 | #elif LJ_TARGET_WINDOWS | ||
186 | |||
187 | /* Keep the library loaded in case multiple VMs are started. */ | ||
188 | if (!libfunc_rgr) { | ||
189 | HMODULE lib = LJ_WIN_LOADLIBA("advapi32.dll"); | ||
190 | if (!lib) return 0; | ||
191 | libfunc_rgr = (PRGR)GetProcAddress(lib, "SystemFunction036"); | ||
192 | if (!libfunc_rgr) return 0; | ||
193 | } | ||
194 | if (libfunc_rgr(rs->u, (ULONG)sizeof(rs->u))) | ||
195 | goto ok; | ||
196 | |||
197 | #elif LJ_TARGET_POSIX | ||
198 | |||
199 | #if LJ_TARGET_LINUX && defined(SYS_getrandom) | ||
200 | |||
201 | if (syscall(SYS_getrandom, rs->u, sizeof(rs->u), 0) == (long)sizeof(rs->u)) | ||
202 | goto ok; | ||
203 | |||
204 | #elif LJ_TARGET_HAS_GETENTROPY | ||
205 | |||
206 | #ifdef __ELF__ | ||
207 | if (&getentropy && getentropy(rs->u, sizeof(rs->u)) == 0) | ||
208 | goto ok; | ||
209 | #else | ||
210 | if (getentropy(rs->u, sizeof(rs->u)) == 0) | ||
211 | goto ok; | ||
212 | #endif | ||
213 | |||
214 | #endif | ||
215 | |||
216 | /* Fallback to /dev/urandom. This may fail if the device is not | ||
217 | ** existent or accessible in a chroot or container, or if the process | ||
218 | ** or the OS ran out of file descriptors. | ||
219 | */ | ||
220 | { | ||
221 | int fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC); | ||
222 | if (fd != -1) { | ||
223 | ssize_t n = read(fd, rs->u, sizeof(rs->u)); | ||
224 | (void)close(fd); | ||
225 | if (n == (ssize_t)sizeof(rs->u)) | ||
226 | goto ok; | ||
227 | } | ||
228 | } | ||
229 | |||
230 | #else | ||
231 | |||
232 | /* Add an elif above for your OS with a secure PRNG seed. | ||
233 | ** Note that fiddling around with rand(), getpid(), time() or coercing | ||
234 | ** ASLR to yield a few bits of randomness is not helpful. | ||
235 | ** If you don't want any security, then don't pretend you have any | ||
236 | ** and simply define LUAJIT_SECURITY_PRNG=0 for the build. | ||
237 | */ | ||
238 | #error "Missing secure PRNG seed for this OS" | ||
239 | |||
240 | #endif | ||
241 | return 0; /* Fail. */ | ||
242 | |||
243 | ok: | ||
244 | lj_prng_condition(rs); | ||
245 | (void)lj_prng_u64(rs); | ||
246 | return 1; /* Success. */ | ||
247 | } | ||
248 | |||
249 | #endif | ||
250 | |||
diff --git a/src/lj_prng.h b/src/lj_prng.h new file mode 100644 index 00000000..bdc958ab --- /dev/null +++ b/src/lj_prng.h | |||
@@ -0,0 +1,24 @@ | |||
1 | /* | ||
2 | ** Pseudo-random number generation. | ||
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_PRNG_H | ||
7 | #define _LJ_PRNG_H | ||
8 | |||
9 | #include "lj_def.h" | ||
10 | |||
11 | LJ_FUNC int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs); | ||
12 | LJ_FUNC uint64_t LJ_FASTCALL lj_prng_u64(PRNGState *rs); | ||
13 | LJ_FUNC uint64_t LJ_FASTCALL lj_prng_u64d(PRNGState *rs); | ||
14 | |||
15 | /* This is just the precomputed result of lib_math.c:random_seed(rs, 0.0). */ | ||
16 | static LJ_AINLINE void lj_prng_seed_fixed(PRNGState *rs) | ||
17 | { | ||
18 | rs->u[0] = U64x(a0d27757,0a345b8c); | ||
19 | rs->u[1] = U64x(764a296c,5d4aa64f); | ||
20 | rs->u[2] = U64x(51220704,070adeaa); | ||
21 | rs->u[3] = U64x(2a2717b5,a7b7b927); | ||
22 | } | ||
23 | |||
24 | #endif | ||
diff --git a/src/lj_profile.c b/src/lj_profile.c new file mode 100644 index 00000000..f0af91cb --- /dev/null +++ b/src/lj_profile.c | |||
@@ -0,0 +1,367 @@ | |||
1 | /* | ||
2 | ** Low-overhead profiling. | ||
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_profile_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASPROFILE | ||
12 | |||
13 | #include "lj_buf.h" | ||
14 | #include "lj_frame.h" | ||
15 | #include "lj_debug.h" | ||
16 | #include "lj_dispatch.h" | ||
17 | #if LJ_HASJIT | ||
18 | #include "lj_jit.h" | ||
19 | #include "lj_trace.h" | ||
20 | #endif | ||
21 | #include "lj_profile.h" | ||
22 | |||
23 | #include "luajit.h" | ||
24 | |||
25 | #if LJ_PROFILE_SIGPROF | ||
26 | |||
27 | #include <sys/time.h> | ||
28 | #include <signal.h> | ||
29 | #define profile_lock(ps) UNUSED(ps) | ||
30 | #define profile_unlock(ps) UNUSED(ps) | ||
31 | |||
32 | #elif LJ_PROFILE_PTHREAD | ||
33 | |||
34 | #include <pthread.h> | ||
35 | #include <time.h> | ||
36 | #if LJ_TARGET_PS3 | ||
37 | #include <sys/timer.h> | ||
38 | #endif | ||
39 | #define profile_lock(ps) pthread_mutex_lock(&ps->lock) | ||
40 | #define profile_unlock(ps) pthread_mutex_unlock(&ps->lock) | ||
41 | |||
42 | #elif LJ_PROFILE_WTHREAD | ||
43 | |||
44 | #define WIN32_LEAN_AND_MEAN | ||
45 | #if LJ_TARGET_XBOX360 | ||
46 | #include <xtl.h> | ||
47 | #include <xbox.h> | ||
48 | #else | ||
49 | #include <windows.h> | ||
50 | #endif | ||
51 | typedef unsigned int (WINAPI *WMM_TPFUNC)(unsigned int); | ||
52 | #define profile_lock(ps) EnterCriticalSection(&ps->lock) | ||
53 | #define profile_unlock(ps) LeaveCriticalSection(&ps->lock) | ||
54 | |||
55 | #endif | ||
56 | |||
57 | /* Profiler state. */ | ||
58 | typedef struct ProfileState { | ||
59 | global_State *g; /* VM state that started the profiler. */ | ||
60 | luaJIT_profile_callback cb; /* Profiler callback. */ | ||
61 | void *data; /* Profiler callback data. */ | ||
62 | SBuf sb; /* String buffer for stack dumps. */ | ||
63 | int interval; /* Sample interval in milliseconds. */ | ||
64 | int samples; /* Number of samples for next callback. */ | ||
65 | int vmstate; /* VM state when profile timer triggered. */ | ||
66 | #if LJ_PROFILE_SIGPROF | ||
67 | struct sigaction oldsa; /* Previous SIGPROF state. */ | ||
68 | #elif LJ_PROFILE_PTHREAD | ||
69 | pthread_mutex_t lock; /* g->hookmask update lock. */ | ||
70 | pthread_t thread; /* Timer thread. */ | ||
71 | int abort; /* Abort timer thread. */ | ||
72 | #elif LJ_PROFILE_WTHREAD | ||
73 | #if LJ_TARGET_WINDOWS | ||
74 | HINSTANCE wmm; /* WinMM library handle. */ | ||
75 | WMM_TPFUNC wmm_tbp; /* WinMM timeBeginPeriod function. */ | ||
76 | WMM_TPFUNC wmm_tep; /* WinMM timeEndPeriod function. */ | ||
77 | #endif | ||
78 | CRITICAL_SECTION lock; /* g->hookmask update lock. */ | ||
79 | HANDLE thread; /* Timer thread. */ | ||
80 | int abort; /* Abort timer thread. */ | ||
81 | #endif | ||
82 | } ProfileState; | ||
83 | |||
84 | /* Sadly, we have to use a static profiler state. | ||
85 | ** | ||
86 | ** The SIGPROF variant needs a static pointer to the global state, anyway. | ||
87 | ** And it would be hard to extend for multiple threads. You can still use | ||
88 | ** multiple VMs in multiple threads, but only profile one at a time. | ||
89 | */ | ||
90 | static ProfileState profile_state; | ||
91 | |||
92 | /* Default sample interval in milliseconds. */ | ||
93 | #define LJ_PROFILE_INTERVAL_DEFAULT 10 | ||
94 | |||
95 | /* -- Profiler/hook interaction ------------------------------------------- */ | ||
96 | |||
97 | #if !LJ_PROFILE_SIGPROF | ||
98 | void LJ_FASTCALL lj_profile_hook_enter(global_State *g) | ||
99 | { | ||
100 | ProfileState *ps = &profile_state; | ||
101 | if (ps->g) { | ||
102 | profile_lock(ps); | ||
103 | hook_enter(g); | ||
104 | profile_unlock(ps); | ||
105 | } else { | ||
106 | hook_enter(g); | ||
107 | } | ||
108 | } | ||
109 | |||
110 | void LJ_FASTCALL lj_profile_hook_leave(global_State *g) | ||
111 | { | ||
112 | ProfileState *ps = &profile_state; | ||
113 | if (ps->g) { | ||
114 | profile_lock(ps); | ||
115 | hook_leave(g); | ||
116 | profile_unlock(ps); | ||
117 | } else { | ||
118 | hook_leave(g); | ||
119 | } | ||
120 | } | ||
121 | #endif | ||
122 | |||
123 | /* -- Profile callbacks --------------------------------------------------- */ | ||
124 | |||
125 | /* Callback from profile hook (HOOK_PROFILE already cleared). */ | ||
126 | void LJ_FASTCALL lj_profile_interpreter(lua_State *L) | ||
127 | { | ||
128 | ProfileState *ps = &profile_state; | ||
129 | global_State *g = G(L); | ||
130 | uint8_t mask; | ||
131 | profile_lock(ps); | ||
132 | mask = (g->hookmask & ~HOOK_PROFILE); | ||
133 | if (!(mask & HOOK_VMEVENT)) { | ||
134 | int samples = ps->samples; | ||
135 | ps->samples = 0; | ||
136 | g->hookmask = HOOK_VMEVENT; | ||
137 | lj_dispatch_update(g); | ||
138 | profile_unlock(ps); | ||
139 | ps->cb(ps->data, L, samples, ps->vmstate); /* Invoke user callback. */ | ||
140 | profile_lock(ps); | ||
141 | mask |= (g->hookmask & HOOK_PROFILE); | ||
142 | } | ||
143 | g->hookmask = mask; | ||
144 | lj_dispatch_update(g); | ||
145 | profile_unlock(ps); | ||
146 | } | ||
147 | |||
148 | /* Trigger profile hook. Asynchronous call from OS-specific profile timer. */ | ||
149 | static void profile_trigger(ProfileState *ps) | ||
150 | { | ||
151 | global_State *g = ps->g; | ||
152 | uint8_t mask; | ||
153 | profile_lock(ps); | ||
154 | ps->samples++; /* Always increment number of samples. */ | ||
155 | mask = g->hookmask; | ||
156 | if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT|HOOK_GC))) { /* Set profile hook. */ | ||
157 | int st = g->vmstate; | ||
158 | ps->vmstate = st >= 0 ? 'N' : | ||
159 | st == ~LJ_VMST_INTERP ? 'I' : | ||
160 | st == ~LJ_VMST_C ? 'C' : | ||
161 | st == ~LJ_VMST_GC ? 'G' : 'J'; | ||
162 | g->hookmask = (mask | HOOK_PROFILE); | ||
163 | lj_dispatch_update(g); | ||
164 | } | ||
165 | profile_unlock(ps); | ||
166 | } | ||
167 | |||
168 | /* -- OS-specific profile timer handling ---------------------------------- */ | ||
169 | |||
170 | #if LJ_PROFILE_SIGPROF | ||
171 | |||
172 | /* SIGPROF handler. */ | ||
173 | static void profile_signal(int sig) | ||
174 | { | ||
175 | UNUSED(sig); | ||
176 | profile_trigger(&profile_state); | ||
177 | } | ||
178 | |||
179 | /* Start profiling timer. */ | ||
180 | static void profile_timer_start(ProfileState *ps) | ||
181 | { | ||
182 | int interval = ps->interval; | ||
183 | struct itimerval tm; | ||
184 | struct sigaction sa; | ||
185 | tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000; | ||
186 | tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000; | ||
187 | setitimer(ITIMER_PROF, &tm, NULL); | ||
188 | sa.sa_flags = SA_RESTART; | ||
189 | sa.sa_handler = profile_signal; | ||
190 | sigemptyset(&sa.sa_mask); | ||
191 | sigaction(SIGPROF, &sa, &ps->oldsa); | ||
192 | } | ||
193 | |||
194 | /* Stop profiling timer. */ | ||
195 | static void profile_timer_stop(ProfileState *ps) | ||
196 | { | ||
197 | struct itimerval tm; | ||
198 | tm.it_value.tv_sec = tm.it_interval.tv_sec = 0; | ||
199 | tm.it_value.tv_usec = tm.it_interval.tv_usec = 0; | ||
200 | setitimer(ITIMER_PROF, &tm, NULL); | ||
201 | sigaction(SIGPROF, &ps->oldsa, NULL); | ||
202 | } | ||
203 | |||
204 | #elif LJ_PROFILE_PTHREAD | ||
205 | |||
206 | /* POSIX timer thread. */ | ||
207 | static void *profile_thread(ProfileState *ps) | ||
208 | { | ||
209 | int interval = ps->interval; | ||
210 | #if !LJ_TARGET_PS3 | ||
211 | struct timespec ts; | ||
212 | ts.tv_sec = interval / 1000; | ||
213 | ts.tv_nsec = (interval % 1000) * 1000000; | ||
214 | #endif | ||
215 | while (1) { | ||
216 | #if LJ_TARGET_PS3 | ||
217 | sys_timer_usleep(interval * 1000); | ||
218 | #else | ||
219 | nanosleep(&ts, NULL); | ||
220 | #endif | ||
221 | if (ps->abort) break; | ||
222 | profile_trigger(ps); | ||
223 | } | ||
224 | return NULL; | ||
225 | } | ||
226 | |||
227 | /* Start profiling timer thread. */ | ||
228 | static void profile_timer_start(ProfileState *ps) | ||
229 | { | ||
230 | pthread_mutex_init(&ps->lock, 0); | ||
231 | ps->abort = 0; | ||
232 | pthread_create(&ps->thread, NULL, (void *(*)(void *))profile_thread, ps); | ||
233 | } | ||
234 | |||
235 | /* Stop profiling timer thread. */ | ||
236 | static void profile_timer_stop(ProfileState *ps) | ||
237 | { | ||
238 | ps->abort = 1; | ||
239 | pthread_join(ps->thread, NULL); | ||
240 | pthread_mutex_destroy(&ps->lock); | ||
241 | } | ||
242 | |||
243 | #elif LJ_PROFILE_WTHREAD | ||
244 | |||
245 | /* Windows timer thread. */ | ||
246 | static DWORD WINAPI profile_thread(void *psx) | ||
247 | { | ||
248 | ProfileState *ps = (ProfileState *)psx; | ||
249 | int interval = ps->interval; | ||
250 | #if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP | ||
251 | ps->wmm_tbp(interval); | ||
252 | #endif | ||
253 | while (1) { | ||
254 | Sleep(interval); | ||
255 | if (ps->abort) break; | ||
256 | profile_trigger(ps); | ||
257 | } | ||
258 | #if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP | ||
259 | ps->wmm_tep(interval); | ||
260 | #endif | ||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | /* Start profiling timer thread. */ | ||
265 | static void profile_timer_start(ProfileState *ps) | ||
266 | { | ||
267 | #if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP | ||
268 | if (!ps->wmm) { /* Load WinMM library on-demand. */ | ||
269 | ps->wmm = LJ_WIN_LOADLIBA("winmm.dll"); | ||
270 | if (ps->wmm) { | ||
271 | ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeBeginPeriod"); | ||
272 | ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeEndPeriod"); | ||
273 | if (!ps->wmm_tbp || !ps->wmm_tep) { | ||
274 | ps->wmm = NULL; | ||
275 | return; | ||
276 | } | ||
277 | } | ||
278 | } | ||
279 | #endif | ||
280 | InitializeCriticalSection(&ps->lock); | ||
281 | ps->abort = 0; | ||
282 | ps->thread = CreateThread(NULL, 0, profile_thread, ps, 0, NULL); | ||
283 | } | ||
284 | |||
285 | /* Stop profiling timer thread. */ | ||
286 | static void profile_timer_stop(ProfileState *ps) | ||
287 | { | ||
288 | ps->abort = 1; | ||
289 | WaitForSingleObject(ps->thread, INFINITE); | ||
290 | DeleteCriticalSection(&ps->lock); | ||
291 | } | ||
292 | |||
293 | #endif | ||
294 | |||
295 | /* -- Public profiling API ------------------------------------------------ */ | ||
296 | |||
297 | /* Start profiling. */ | ||
298 | LUA_API void luaJIT_profile_start(lua_State *L, const char *mode, | ||
299 | luaJIT_profile_callback cb, void *data) | ||
300 | { | ||
301 | ProfileState *ps = &profile_state; | ||
302 | int interval = LJ_PROFILE_INTERVAL_DEFAULT; | ||
303 | while (*mode) { | ||
304 | int m = *mode++; | ||
305 | switch (m) { | ||
306 | case 'i': | ||
307 | interval = 0; | ||
308 | while (*mode >= '0' && *mode <= '9') | ||
309 | interval = interval * 10 + (*mode++ - '0'); | ||
310 | if (interval <= 0) interval = 1; | ||
311 | break; | ||
312 | #if LJ_HASJIT | ||
313 | case 'l': case 'f': | ||
314 | L2J(L)->prof_mode = m; | ||
315 | lj_trace_flushall(L); | ||
316 | break; | ||
317 | #endif | ||
318 | default: /* Ignore unknown mode chars. */ | ||
319 | break; | ||
320 | } | ||
321 | } | ||
322 | if (ps->g) { | ||
323 | luaJIT_profile_stop(L); | ||
324 | if (ps->g) return; /* Profiler in use by another VM. */ | ||
325 | } | ||
326 | ps->g = G(L); | ||
327 | ps->interval = interval; | ||
328 | ps->cb = cb; | ||
329 | ps->data = data; | ||
330 | ps->samples = 0; | ||
331 | lj_buf_init(L, &ps->sb); | ||
332 | profile_timer_start(ps); | ||
333 | } | ||
334 | |||
335 | /* Stop profiling. */ | ||
336 | LUA_API void luaJIT_profile_stop(lua_State *L) | ||
337 | { | ||
338 | ProfileState *ps = &profile_state; | ||
339 | global_State *g = ps->g; | ||
340 | if (G(L) == g) { /* Only stop profiler if started by this VM. */ | ||
341 | profile_timer_stop(ps); | ||
342 | g->hookmask &= ~HOOK_PROFILE; | ||
343 | lj_dispatch_update(g); | ||
344 | #if LJ_HASJIT | ||
345 | G2J(g)->prof_mode = 0; | ||
346 | lj_trace_flushall(L); | ||
347 | #endif | ||
348 | lj_buf_free(g, &ps->sb); | ||
349 | ps->sb.w = ps->sb.e = NULL; | ||
350 | ps->g = NULL; | ||
351 | } | ||
352 | } | ||
353 | |||
354 | /* Return a compact stack dump. */ | ||
355 | LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt, | ||
356 | int depth, size_t *len) | ||
357 | { | ||
358 | ProfileState *ps = &profile_state; | ||
359 | SBuf *sb = &ps->sb; | ||
360 | setsbufL(sb, L); | ||
361 | lj_buf_reset(sb); | ||
362 | lj_debug_dumpstack(L, sb, fmt, depth); | ||
363 | *len = (size_t)sbuflen(sb); | ||
364 | return sb->b; | ||
365 | } | ||
366 | |||
367 | #endif | ||
diff --git a/src/lj_profile.h b/src/lj_profile.h new file mode 100644 index 00000000..3969f8e8 --- /dev/null +++ b/src/lj_profile.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | ** Low-overhead profiling. | ||
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_PROFILE_H | ||
7 | #define _LJ_PROFILE_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASPROFILE | ||
12 | |||
13 | LJ_FUNC void LJ_FASTCALL lj_profile_interpreter(lua_State *L); | ||
14 | #if !LJ_PROFILE_SIGPROF | ||
15 | LJ_FUNC void LJ_FASTCALL lj_profile_hook_enter(global_State *g); | ||
16 | LJ_FUNC void LJ_FASTCALL lj_profile_hook_leave(global_State *g); | ||
17 | #endif | ||
18 | |||
19 | #endif | ||
20 | |||
21 | #endif | ||
diff --git a/src/lj_record.c b/src/lj_record.c index f7552db0..44163e5b 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
@@ -20,6 +20,9 @@ | |||
20 | #endif | 20 | #endif |
21 | #include "lj_bc.h" | 21 | #include "lj_bc.h" |
22 | #include "lj_ff.h" | 22 | #include "lj_ff.h" |
23 | #if LJ_HASPROFILE | ||
24 | #include "lj_debug.h" | ||
25 | #endif | ||
23 | #include "lj_ir.h" | 26 | #include "lj_ir.h" |
24 | #include "lj_jit.h" | 27 | #include "lj_jit.h" |
25 | #include "lj_ircall.h" | 28 | #include "lj_ircall.h" |
@@ -30,6 +33,7 @@ | |||
30 | #include "lj_snap.h" | 33 | #include "lj_snap.h" |
31 | #include "lj_dispatch.h" | 34 | #include "lj_dispatch.h" |
32 | #include "lj_vm.h" | 35 | #include "lj_vm.h" |
36 | #include "lj_prng.h" | ||
33 | 37 | ||
34 | /* Some local macros to save typing. Undef'd at the end. */ | 38 | /* Some local macros to save typing. Undef'd at the end. */ |
35 | #define IR(ref) (&J->cur.ir[(ref)]) | 39 | #define IR(ref) (&J->cur.ir[(ref)]) |
@@ -47,31 +51,52 @@ | |||
47 | static void rec_check_ir(jit_State *J) | 51 | static void rec_check_ir(jit_State *J) |
48 | { | 52 | { |
49 | IRRef i, nins = J->cur.nins, nk = J->cur.nk; | 53 | IRRef i, nins = J->cur.nins, nk = J->cur.nk; |
50 | lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536); | 54 | lj_assertJ(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536, |
51 | for (i = nins-1; i >= nk; i--) { | 55 | "inconsistent IR layout"); |
56 | for (i = nk; i < nins; i++) { | ||
52 | IRIns *ir = IR(i); | 57 | IRIns *ir = IR(i); |
53 | uint32_t mode = lj_ir_mode[ir->o]; | 58 | uint32_t mode = lj_ir_mode[ir->o]; |
54 | IRRef op1 = ir->op1; | 59 | IRRef op1 = ir->op1; |
55 | IRRef op2 = ir->op2; | 60 | IRRef op2 = ir->op2; |
61 | const char *err = NULL; | ||
56 | switch (irm_op1(mode)) { | 62 | switch (irm_op1(mode)) { |
57 | case IRMnone: lua_assert(op1 == 0); break; | 63 | case IRMnone: |
58 | case IRMref: lua_assert(op1 >= nk); | 64 | if (op1 != 0) err = "IRMnone op1 used"; |
59 | lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; | 65 | break; |
66 | case IRMref: | ||
67 | if (op1 < nk || (i >= REF_BIAS ? op1 >= i : op1 <= i)) | ||
68 | err = "IRMref op1 out of range"; | ||
69 | break; | ||
60 | case IRMlit: break; | 70 | case IRMlit: break; |
61 | case IRMcst: lua_assert(i < REF_BIAS); continue; | 71 | case IRMcst: |
72 | if (i >= REF_BIAS) { err = "constant in IR range"; break; } | ||
73 | if (irt_is64(ir->t) && ir->o != IR_KNULL) | ||
74 | i++; | ||
75 | continue; | ||
62 | } | 76 | } |
63 | switch (irm_op2(mode)) { | 77 | switch (irm_op2(mode)) { |
64 | case IRMnone: lua_assert(op2 == 0); break; | 78 | case IRMnone: |
65 | case IRMref: lua_assert(op2 >= nk); | 79 | if (op2) err = "IRMnone op2 used"; |
66 | lua_assert(i >= REF_BIAS ? op2 < i : op2 > i); break; | 80 | break; |
81 | case IRMref: | ||
82 | if (op2 < nk || (i >= REF_BIAS ? op2 >= i : op2 <= i)) | ||
83 | err = "IRMref op2 out of range"; | ||
84 | break; | ||
67 | case IRMlit: break; | 85 | case IRMlit: break; |
68 | case IRMcst: lua_assert(0); break; | 86 | case IRMcst: err = "IRMcst op2"; break; |
69 | } | 87 | } |
70 | if (ir->prev) { | 88 | if (!err && ir->prev) { |
71 | lua_assert(ir->prev >= nk); | 89 | if (ir->prev < nk || (i >= REF_BIAS ? ir->prev >= i : ir->prev <= i)) |
72 | lua_assert(i >= REF_BIAS ? ir->prev < i : ir->prev > i); | 90 | err = "chain out of range"; |
73 | lua_assert(ir->o == IR_NOP || IR(ir->prev)->o == ir->o); | 91 | else if (ir->o != IR_NOP && IR(ir->prev)->o != ir->o) |
92 | err = "chain to different op"; | ||
74 | } | 93 | } |
94 | lj_assertJ(!err, "bad IR %04d op %d(%04d,%04d): %s", | ||
95 | i-REF_BIAS, | ||
96 | ir->o, | ||
97 | irm_op1(mode) == IRMref ? op1-REF_BIAS : op1, | ||
98 | irm_op2(mode) == IRMref ? op2-REF_BIAS : op2, | ||
99 | err); | ||
75 | } | 100 | } |
76 | } | 101 | } |
77 | 102 | ||
@@ -81,48 +106,79 @@ static void rec_check_slots(jit_State *J) | |||
81 | BCReg s, nslots = J->baseslot + J->maxslot; | 106 | BCReg s, nslots = J->baseslot + J->maxslot; |
82 | int32_t depth = 0; | 107 | int32_t depth = 0; |
83 | cTValue *base = J->L->base - J->baseslot; | 108 | cTValue *base = J->L->base - J->baseslot; |
84 | lua_assert(J->baseslot >= 1); | 109 | lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot"); |
85 | lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME)); | 110 | lj_assertJ(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME), |
86 | lua_assert(nslots <= LJ_MAX_JSLOTS); | 111 | "baseslot does not point to frame"); |
112 | lj_assertJ(nslots <= LJ_MAX_JSLOTS, "slot overflow"); | ||
87 | for (s = 0; s < nslots; s++) { | 113 | for (s = 0; s < nslots; s++) { |
88 | TRef tr = J->slot[s]; | 114 | TRef tr = J->slot[s]; |
89 | if (tr) { | 115 | if (tr) { |
90 | cTValue *tv = &base[s]; | 116 | cTValue *tv = &base[s]; |
91 | IRRef ref = tref_ref(tr); | 117 | IRRef ref = tref_ref(tr); |
92 | IRIns *ir; | 118 | IRIns *ir = NULL; /* Silence compiler. */ |
93 | lua_assert(ref >= J->cur.nk && ref < J->cur.nins); | 119 | if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) { |
94 | ir = IR(ref); | 120 | lj_assertJ(ref >= J->cur.nk && ref < J->cur.nins, |
95 | lua_assert(irt_t(ir->t) == tref_t(tr)); | 121 | "slot %d ref %04d out of range", s, ref - REF_BIAS); |
122 | ir = IR(ref); | ||
123 | lj_assertJ(irt_t(ir->t) == tref_t(tr), "slot %d IR type mismatch", s); | ||
124 | } | ||
96 | if (s == 0) { | 125 | if (s == 0) { |
97 | lua_assert(tref_isfunc(tr)); | 126 | lj_assertJ(tref_isfunc(tr), "frame slot 0 is not a function"); |
127 | #if LJ_FR2 | ||
128 | } else if (s == 1) { | ||
129 | lj_assertJ((tr & ~TREF_FRAME) == 0, "bad frame slot 1"); | ||
130 | #endif | ||
98 | } else if ((tr & TREF_FRAME)) { | 131 | } else if ((tr & TREF_FRAME)) { |
99 | GCfunc *fn = gco2func(frame_gc(tv)); | 132 | GCfunc *fn = gco2func(frame_gc(tv)); |
100 | BCReg delta = (BCReg)(tv - frame_prev(tv)); | 133 | BCReg delta = (BCReg)(tv - frame_prev(tv)); |
101 | lua_assert(tref_isfunc(tr)); | 134 | #if LJ_FR2 |
102 | if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); | 135 | lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64, |
103 | lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta)); | 136 | "frame slot %d PC mismatch", s); |
137 | tr = J->slot[s-1]; | ||
138 | ir = IR(tref_ref(tr)); | ||
139 | #endif | ||
140 | lj_assertJ(tref_isfunc(tr), | ||
141 | "frame slot %d is not a function", s-LJ_FR2); | ||
142 | lj_assertJ(!tref_isk(tr) || fn == ir_kfunc(ir), | ||
143 | "frame slot %d function mismatch", s-LJ_FR2); | ||
144 | lj_assertJ(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME) | ||
145 | : (s == delta + LJ_FR2), | ||
146 | "frame slot %d broken chain", s-LJ_FR2); | ||
104 | depth++; | 147 | depth++; |
105 | } else if ((tr & TREF_CONT)) { | 148 | } else if ((tr & TREF_CONT)) { |
106 | lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); | 149 | #if LJ_FR2 |
107 | lua_assert((J->slot[s+1] & TREF_FRAME)); | 150 | lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64, |
151 | "cont slot %d continuation mismatch", s); | ||
152 | #else | ||
153 | lj_assertJ(ir_kptr(ir) == gcrefp(tv->gcr, void), | ||
154 | "cont slot %d continuation mismatch", s); | ||
155 | #endif | ||
156 | lj_assertJ((J->slot[s+1+LJ_FR2] & TREF_FRAME), | ||
157 | "cont slot %d not followed by frame", s); | ||
108 | depth++; | 158 | depth++; |
159 | } else if ((tr & TREF_KEYINDEX)) { | ||
160 | lj_assertJ(tref_isint(tr), "keyindex slot %d bad type %d", | ||
161 | s, tref_type(tr)); | ||
109 | } else { | 162 | } else { |
110 | if (tvisnumber(tv)) | 163 | /* Number repr. may differ, but other types must be the same. */ |
111 | lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */ | 164 | lj_assertJ(tvisnumber(tv) ? tref_isnumber(tr) : |
112 | else | 165 | itype2irt(tv) == tref_type(tr), |
113 | lua_assert(itype2irt(tv) == tref_type(tr)); | 166 | "slot %d type mismatch: stack type %d vs IR type %d", |
167 | s, itypemap(tv), tref_type(tr)); | ||
114 | if (tref_isk(tr)) { /* Compare constants. */ | 168 | if (tref_isk(tr)) { /* Compare constants. */ |
115 | TValue tvk; | 169 | TValue tvk; |
116 | lj_ir_kvalue(J->L, &tvk, ir); | 170 | lj_ir_kvalue(J->L, &tvk, ir); |
117 | if (!(tvisnum(&tvk) && tvisnan(&tvk))) | 171 | lj_assertJ((tvisnum(&tvk) && tvisnan(&tvk)) ? |
118 | lua_assert(lj_obj_equal(tv, &tvk)); | 172 | (tvisnum(tv) && tvisnan(tv)) : |
119 | else | 173 | lj_obj_equal(tv, &tvk), |
120 | lua_assert(tvisnum(tv) && tvisnan(tv)); | 174 | "slot %d const mismatch: stack %016llx vs IR %016llx", |
175 | s, tv->u64, tvk.u64); | ||
121 | } | 176 | } |
122 | } | 177 | } |
123 | } | 178 | } |
124 | } | 179 | } |
125 | lua_assert(J->framedepth == depth); | 180 | lj_assertJ(J->framedepth == depth, |
181 | "frame depth mismatch %d vs %d", J->framedepth, depth); | ||
126 | } | 182 | } |
127 | #endif | 183 | #endif |
128 | 184 | ||
@@ -156,10 +212,11 @@ static TRef sload(jit_State *J, int32_t slot) | |||
156 | /* Get TRef for current function. */ | 212 | /* Get TRef for current function. */ |
157 | static TRef getcurrf(jit_State *J) | 213 | static TRef getcurrf(jit_State *J) |
158 | { | 214 | { |
159 | if (J->base[-1]) | 215 | if (J->base[-1-LJ_FR2]) |
160 | return J->base[-1]; | 216 | return J->base[-1-LJ_FR2]; |
161 | lua_assert(J->baseslot == 1); | 217 | /* Non-base frame functions ought to be loaded already. */ |
162 | return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY); | 218 | lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot"); |
219 | return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY); | ||
163 | } | 220 | } |
164 | 221 | ||
165 | /* Compare for raw object equality. | 222 | /* Compare for raw object equality. |
@@ -205,6 +262,14 @@ TRef lj_record_constify(jit_State *J, cTValue *o) | |||
205 | return 0; /* Can't represent lightuserdata (pointless). */ | 262 | return 0; /* Can't represent lightuserdata (pointless). */ |
206 | } | 263 | } |
207 | 264 | ||
265 | /* Emit a VLOAD with the correct type. */ | ||
266 | TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t) | ||
267 | { | ||
268 | TRef tr = emitir(IRTG(IR_VLOAD, t), ref, idx); | ||
269 | if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ | ||
270 | return tr; | ||
271 | } | ||
272 | |||
208 | /* -- Record loop ops ----------------------------------------------------- */ | 273 | /* -- Record loop ops ----------------------------------------------------- */ |
209 | 274 | ||
210 | /* Loop event. */ | 275 | /* Loop event. */ |
@@ -221,17 +286,21 @@ static void canonicalize_slots(jit_State *J) | |||
221 | if (LJ_DUALNUM) return; | 286 | if (LJ_DUALNUM) return; |
222 | for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { | 287 | for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { |
223 | TRef tr = J->slot[s]; | 288 | TRef tr = J->slot[s]; |
224 | if (tref_isinteger(tr)) { | 289 | if (tref_isinteger(tr) && !(tr & TREF_KEYINDEX)) { |
225 | IRIns *ir = IR(tref_ref(tr)); | 290 | IRIns *ir = IR(tref_ref(tr)); |
226 | if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY))) | 291 | if (!(ir->o == IR_SLOAD && (ir->op2 & (IRSLOAD_READONLY)))) |
227 | J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); | 292 | J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); |
228 | } | 293 | } |
229 | } | 294 | } |
230 | } | 295 | } |
231 | 296 | ||
232 | /* Stop recording. */ | 297 | /* Stop recording. */ |
233 | static void rec_stop(jit_State *J, TraceLink linktype, TraceNo lnk) | 298 | void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk) |
234 | { | 299 | { |
300 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
301 | if (J->retryrec) | ||
302 | lj_trace_err(J, LJ_TRERR_RETRY); | ||
303 | #endif | ||
235 | lj_trace_end(J); | 304 | lj_trace_end(J); |
236 | J->cur.linktype = (uint8_t)linktype; | 305 | J->cur.linktype = (uint8_t)linktype; |
237 | J->cur.link = (uint16_t)lnk; | 306 | J->cur.link = (uint16_t)lnk; |
@@ -399,7 +468,8 @@ static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev, | |||
399 | TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode); | 468 | TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode); |
400 | TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode); | 469 | TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode); |
401 | int tc, dir = rec_for_direction(&tv[FORL_STEP]); | 470 | int tc, dir = rec_for_direction(&tv[FORL_STEP]); |
402 | lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); | 471 | lj_assertJ(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI, |
472 | "bad bytecode %d instead of FORI/JFORI", bc_op(*fori)); | ||
403 | scev->t.irt = t; | 473 | scev->t.irt = t; |
404 | scev->dir = dir; | 474 | scev->dir = dir; |
405 | scev->stop = tref_ref(stop); | 475 | scev->stop = tref_ref(stop); |
@@ -455,7 +525,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) | |||
455 | IRT_NUM; | 525 | IRT_NUM; |
456 | for (i = FORL_IDX; i <= FORL_STEP; i++) { | 526 | for (i = FORL_IDX; i <= FORL_STEP; i++) { |
457 | if (!tr[i]) sload(J, ra+i); | 527 | if (!tr[i]) sload(J, ra+i); |
458 | lua_assert(tref_isnumber_str(tr[i])); | 528 | lj_assertJ(tref_isnumber_str(tr[i]), "bad FORI argument type"); |
459 | if (tref_isstr(tr[i])) | 529 | if (tref_isstr(tr[i])) |
460 | tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0); | 530 | tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0); |
461 | if (t == IRT_INT) { | 531 | if (t == IRT_INT) { |
@@ -499,8 +569,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) | |||
499 | static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) | 569 | static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) |
500 | { | 570 | { |
501 | BCReg ra = bc_a(iterins); | 571 | BCReg ra = bc_a(iterins); |
502 | lua_assert(J->base[ra] != 0); | 572 | if (!tref_isnil(getslot(J, ra))) { /* Looping back? */ |
503 | if (!tref_isnil(J->base[ra])) { /* Looping back? */ | ||
504 | J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ | 573 | J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ |
505 | J->maxslot = ra-1+bc_b(J->pc[-1]); | 574 | J->maxslot = ra-1+bc_b(J->pc[-1]); |
506 | J->pc += bc_j(iterins)+1; | 575 | J->pc += bc_j(iterins)+1; |
@@ -538,12 +607,13 @@ static int innerloopleft(jit_State *J, const BCIns *pc) | |||
538 | /* Handle the case when an interpreted loop op is hit. */ | 607 | /* Handle the case when an interpreted loop op is hit. */ |
539 | static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) | 608 | static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) |
540 | { | 609 | { |
541 | if (J->parent == 0) { | 610 | if (J->parent == 0 && J->exitno == 0) { |
542 | if (pc == J->startpc && J->framedepth + J->retdepth == 0) { | 611 | if (pc == J->startpc && J->framedepth + J->retdepth == 0) { |
612 | if (bc_op(J->cur.startins) == BC_ITERN) return; /* See rec_itern(). */ | ||
543 | /* Same loop? */ | 613 | /* Same loop? */ |
544 | if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ | 614 | if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ |
545 | lj_trace_err(J, LJ_TRERR_LLEAVE); | 615 | lj_trace_err(J, LJ_TRERR_LLEAVE); |
546 | rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping root trace. */ | 616 | lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */ |
547 | } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */ | 617 | } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */ |
548 | /* It's usually better to abort here and wait until the inner loop | 618 | /* It's usually better to abort here and wait until the inner loop |
549 | ** is traced. But if the inner loop repeatedly didn't loop back, | 619 | ** is traced. But if the inner loop repeatedly didn't loop back, |
@@ -568,18 +638,129 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) | |||
568 | /* Handle the case when an already compiled loop op is hit. */ | 638 | /* Handle the case when an already compiled loop op is hit. */ |
569 | static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) | 639 | static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) |
570 | { | 640 | { |
571 | if (J->parent == 0) { /* Root trace hit an inner loop. */ | 641 | if (J->parent == 0 && J->exitno == 0) { /* Root trace hit an inner loop. */ |
572 | /* Better let the inner loop spawn a side trace back here. */ | 642 | /* Better let the inner loop spawn a side trace back here. */ |
573 | lj_trace_err(J, LJ_TRERR_LINNER); | 643 | lj_trace_err(J, LJ_TRERR_LINNER); |
574 | } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ | 644 | } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ |
575 | J->instunroll = 0; /* Cannot continue across a compiled loop op. */ | 645 | J->instunroll = 0; /* Cannot continue across a compiled loop op. */ |
576 | if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) | 646 | if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) |
577 | rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form an extra loop. */ | 647 | lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form extra loop. */ |
578 | else | 648 | else |
579 | rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */ | 649 | lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */ |
580 | } /* Side trace continues across a loop that's left or not entered. */ | 650 | } /* Side trace continues across a loop that's left or not entered. */ |
581 | } | 651 | } |
582 | 652 | ||
653 | /* Record ITERN. */ | ||
654 | static LoopEvent rec_itern(jit_State *J, BCReg ra, BCReg rb) | ||
655 | { | ||
656 | #if LJ_BE | ||
657 | /* YAGNI: Disabled on big-endian due to issues with lj_vm_next, | ||
658 | ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair. | ||
659 | */ | ||
660 | UNUSED(ra); UNUSED(rb); | ||
661 | setintV(&J->errinfo, (int32_t)BC_ITERN); | ||
662 | lj_trace_err_info(J, LJ_TRERR_NYIBC); | ||
663 | #else | ||
664 | RecordIndex ix; | ||
665 | /* Since ITERN is recorded at the start, we need our own loop detection. */ | ||
666 | if (J->pc == J->startpc && | ||
667 | (J->cur.nins > REF_FIRST+1 || | ||
668 | (J->cur.nins == REF_FIRST+1 && J->cur.ir[REF_FIRST].o != IR_PROF)) && | ||
669 | J->framedepth + J->retdepth == 0 && J->parent == 0 && J->exitno == 0) { | ||
670 | lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */ | ||
671 | return LOOPEV_ENTER; | ||
672 | } | ||
673 | J->maxslot = ra; | ||
674 | lj_snap_add(J); /* Required to make JLOOP the first ins in a side-trace. */ | ||
675 | ix.tab = getslot(J, ra-2); | ||
676 | ix.key = J->base[ra-1] ? J->base[ra-1] : | ||
677 | sloadt(J, (int32_t)(ra-1), IRT_INT, IRSLOAD_KEYINDEX); | ||
678 | copyTV(J->L, &ix.tabv, &J->L->base[ra-2]); | ||
679 | copyTV(J->L, &ix.keyv, &J->L->base[ra-1]); | ||
680 | ix.idxchain = (rb < 3); /* Omit value type check, if unused. */ | ||
681 | ix.mobj = 1; /* We need the next index, too. */ | ||
682 | J->maxslot = ra + lj_record_next(J, &ix); | ||
683 | J->needsnap = 1; | ||
684 | if (!tref_isnil(ix.key)) { /* Looping back? */ | ||
685 | J->base[ra-1] = ix.mobj | TREF_KEYINDEX; /* Control var has next index. */ | ||
686 | J->base[ra] = ix.key; | ||
687 | J->base[ra+1] = ix.val; | ||
688 | J->pc += bc_j(J->pc[1])+2; | ||
689 | return LOOPEV_ENTER; | ||
690 | } else { | ||
691 | J->maxslot = ra-3; | ||
692 | J->pc += 2; | ||
693 | return LOOPEV_LEAVE; | ||
694 | } | ||
695 | #endif | ||
696 | } | ||
697 | |||
698 | /* Record ISNEXT. */ | ||
699 | static void rec_isnext(jit_State *J, BCReg ra) | ||
700 | { | ||
701 | cTValue *b = &J->L->base[ra-3]; | ||
702 | if (tvisfunc(b) && funcV(b)->c.ffid == FF_next && | ||
703 | tvistab(b+1) && tvisnil(b+2)) { | ||
704 | /* These checks are folded away for a compiled pairs(). */ | ||
705 | TRef func = getslot(J, ra-3); | ||
706 | TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), func, IRFL_FUNC_FFID); | ||
707 | emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, FF_next)); | ||
708 | (void)getslot(J, ra-2); /* Type check for table. */ | ||
709 | (void)getslot(J, ra-1); /* Type check for nil key. */ | ||
710 | J->base[ra-1] = lj_ir_kint(J, 0) | TREF_KEYINDEX; | ||
711 | J->maxslot = ra; | ||
712 | } else { /* Abort trace. Interpreter will despecialize bytecode. */ | ||
713 | lj_trace_err(J, LJ_TRERR_RECERR); | ||
714 | } | ||
715 | } | ||
716 | |||
717 | /* -- Record profiler hook checks ----------------------------------------- */ | ||
718 | |||
719 | #if LJ_HASPROFILE | ||
720 | |||
721 | /* Need to insert profiler hook check? */ | ||
722 | static int rec_profile_need(jit_State *J, GCproto *pt, const BCIns *pc) | ||
723 | { | ||
724 | GCproto *ppt; | ||
725 | lj_assertJ(J->prof_mode == 'f' || J->prof_mode == 'l', | ||
726 | "bad profiler mode %c", J->prof_mode); | ||
727 | if (!pt) | ||
728 | return 0; | ||
729 | ppt = J->prev_pt; | ||
730 | J->prev_pt = pt; | ||
731 | if (pt != ppt && ppt) { | ||
732 | J->prev_line = -1; | ||
733 | return 1; | ||
734 | } | ||
735 | if (J->prof_mode == 'l') { | ||
736 | BCLine line = lj_debug_line(pt, proto_bcpos(pt, pc)); | ||
737 | BCLine pline = J->prev_line; | ||
738 | J->prev_line = line; | ||
739 | if (pline != line) | ||
740 | return 1; | ||
741 | } | ||
742 | return 0; | ||
743 | } | ||
744 | |||
745 | static void rec_profile_ins(jit_State *J, const BCIns *pc) | ||
746 | { | ||
747 | if (J->prof_mode && rec_profile_need(J, J->pt, pc)) { | ||
748 | emitir(IRTG(IR_PROF, IRT_NIL), 0, 0); | ||
749 | lj_snap_add(J); | ||
750 | } | ||
751 | } | ||
752 | |||
753 | static void rec_profile_ret(jit_State *J) | ||
754 | { | ||
755 | if (J->prof_mode == 'f') { | ||
756 | emitir(IRTG(IR_PROF, IRT_NIL), 0, 0); | ||
757 | J->prev_pt = NULL; | ||
758 | lj_snap_add(J); | ||
759 | } | ||
760 | } | ||
761 | |||
762 | #endif | ||
763 | |||
583 | /* -- Record calls and returns -------------------------------------------- */ | 764 | /* -- Record calls and returns -------------------------------------------- */ |
584 | 765 | ||
585 | /* Specialize to the runtime value of the called function or its prototype. */ | 766 | /* Specialize to the runtime value of the called function or its prototype. */ |
@@ -590,11 +771,26 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr) | |||
590 | GCproto *pt = funcproto(fn); | 771 | GCproto *pt = funcproto(fn); |
591 | /* Too many closures created? Probably not a monomorphic function. */ | 772 | /* Too many closures created? Probably not a monomorphic function. */ |
592 | if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */ | 773 | if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */ |
593 | TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC); | 774 | TRef trpt = emitir(IRT(IR_FLOAD, IRT_PGC), tr, IRFL_FUNC_PC); |
594 | emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt))); | 775 | emitir(IRTG(IR_EQ, IRT_PGC), trpt, lj_ir_kptr(J, proto_bc(pt))); |
595 | (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ | 776 | (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ |
596 | return tr; | 777 | return tr; |
597 | } | 778 | } |
779 | } else { | ||
780 | /* Don't specialize to non-monomorphic builtins. */ | ||
781 | switch (fn->c.ffid) { | ||
782 | case FF_coroutine_wrap_aux: | ||
783 | case FF_string_gmatch_aux: | ||
784 | /* NYI: io_file_iter doesn't have an ffid, yet. */ | ||
785 | { /* Specialize to the ffid. */ | ||
786 | TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID); | ||
787 | emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, fn->c.ffid)); | ||
788 | } | ||
789 | return tr; | ||
790 | default: | ||
791 | /* NYI: don't specialize to non-monomorphic C functions. */ | ||
792 | break; | ||
793 | } | ||
598 | } | 794 | } |
599 | /* Otherwise specialize to the function (closure) value itself. */ | 795 | /* Otherwise specialize to the function (closure) value itself. */ |
600 | kfunc = lj_ir_kfunc(J, fn); | 796 | kfunc = lj_ir_kfunc(J, fn); |
@@ -607,21 +803,31 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) | |||
607 | { | 803 | { |
608 | RecordIndex ix; | 804 | RecordIndex ix; |
609 | TValue *functv = &J->L->base[func]; | 805 | TValue *functv = &J->L->base[func]; |
610 | TRef *fbase = &J->base[func]; | 806 | TRef kfunc, *fbase = &J->base[func]; |
611 | ptrdiff_t i; | 807 | ptrdiff_t i; |
612 | for (i = 0; i <= nargs; i++) | 808 | (void)getslot(J, func); /* Ensure func has a reference. */ |
613 | (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ | 809 | for (i = 1; i <= nargs; i++) |
810 | (void)getslot(J, func+LJ_FR2+i); /* Ensure all args have a reference. */ | ||
614 | if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ | 811 | if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ |
615 | ix.tab = fbase[0]; | 812 | ix.tab = fbase[0]; |
616 | copyTV(J->L, &ix.tabv, functv); | 813 | copyTV(J->L, &ix.tabv, functv); |
617 | if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) | 814 | if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) |
618 | lj_trace_err(J, LJ_TRERR_NOMM); | 815 | lj_trace_err(J, LJ_TRERR_NOMM); |
619 | for (i = ++nargs; i > 0; i--) /* Shift arguments up. */ | 816 | for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */ |
620 | fbase[i] = fbase[i-1]; | 817 | fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1]; |
818 | #if LJ_FR2 | ||
819 | fbase[2] = fbase[0]; | ||
820 | #endif | ||
621 | fbase[0] = ix.mobj; /* Replace function. */ | 821 | fbase[0] = ix.mobj; /* Replace function. */ |
622 | functv = &ix.mobjv; | 822 | functv = &ix.mobjv; |
623 | } | 823 | } |
624 | fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]); | 824 | kfunc = rec_call_specialize(J, funcV(functv), fbase[0]); |
825 | #if LJ_FR2 | ||
826 | fbase[0] = kfunc; | ||
827 | fbase[1] = TREF_FRAME; | ||
828 | #else | ||
829 | fbase[0] = kfunc | TREF_FRAME; | ||
830 | #endif | ||
625 | J->maxslot = (BCReg)nargs; | 831 | J->maxslot = (BCReg)nargs; |
626 | } | 832 | } |
627 | 833 | ||
@@ -631,8 +837,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs) | |||
631 | rec_call_setup(J, func, nargs); | 837 | rec_call_setup(J, func, nargs); |
632 | /* Bump frame. */ | 838 | /* Bump frame. */ |
633 | J->framedepth++; | 839 | J->framedepth++; |
634 | J->base += func+1; | 840 | J->base += func+1+LJ_FR2; |
635 | J->baseslot += func+1; | 841 | J->baseslot += func+1+LJ_FR2; |
636 | if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) | 842 | if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) |
637 | lj_trace_err(J, LJ_TRERR_STACKOV); | 843 | lj_trace_err(J, LJ_TRERR_STACKOV); |
638 | } | 844 | } |
@@ -650,7 +856,9 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs) | |||
650 | func += cbase; | 856 | func += cbase; |
651 | } | 857 | } |
652 | /* Move func + args down. */ | 858 | /* Move func + args down. */ |
653 | memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1)); | 859 | if (LJ_FR2 && J->baseslot == 2) |
860 | J->base[func+1] = TREF_FRAME; | ||
861 | memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2)); | ||
654 | /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ | 862 | /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ |
655 | /* Tailcalls can form a loop, so count towards the loop unroll limit. */ | 863 | /* Tailcalls can form a loop, so count towards the loop unroll limit. */ |
656 | if (++J->tailcalled > J->loopunroll) | 864 | if (++J->tailcalled > J->loopunroll) |
@@ -680,6 +888,8 @@ static int check_downrec_unroll(jit_State *J, GCproto *pt) | |||
680 | return 0; | 888 | return 0; |
681 | } | 889 | } |
682 | 890 | ||
891 | static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot); | ||
892 | |||
683 | /* Record return. */ | 893 | /* Record return. */ |
684 | void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | 894 | void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) |
685 | { | 895 | { |
@@ -691,30 +901,32 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
691 | BCReg cbase = (BCReg)frame_delta(frame); | 901 | BCReg cbase = (BCReg)frame_delta(frame); |
692 | if (--J->framedepth <= 0) | 902 | if (--J->framedepth <= 0) |
693 | lj_trace_err(J, LJ_TRERR_NYIRETL); | 903 | lj_trace_err(J, LJ_TRERR_NYIRETL); |
694 | lua_assert(J->baseslot > 1); | 904 | lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return"); |
695 | gotresults++; | 905 | gotresults++; |
696 | rbase += cbase; | 906 | rbase += cbase; |
697 | J->baseslot -= (BCReg)cbase; | 907 | J->baseslot -= (BCReg)cbase; |
698 | J->base -= cbase; | 908 | J->base -= cbase; |
699 | J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */ | 909 | J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */ |
700 | frame = frame_prevd(frame); | 910 | frame = frame_prevd(frame); |
911 | J->needsnap = 1; /* Stop catching on-trace errors. */ | ||
701 | } | 912 | } |
702 | /* Return to lower frame via interpreter for unhandled cases. */ | 913 | /* Return to lower frame via interpreter for unhandled cases. */ |
703 | if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) && | 914 | if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) && |
704 | (!frame_islua(frame) || | 915 | (!frame_islua(frame) || |
705 | (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))))) { | 916 | (J->parent == 0 && J->exitno == 0 && |
917 | !bc_isret(bc_op(J->cur.startins))))) { | ||
706 | /* NYI: specialize to frame type and return directly, not via RET*. */ | 918 | /* NYI: specialize to frame type and return directly, not via RET*. */ |
707 | for (i = 0; i < (ptrdiff_t)rbase; i++) | 919 | for (i = 0; i < (ptrdiff_t)rbase; i++) |
708 | J->base[i] = 0; /* Purge dead slots. */ | 920 | J->base[i] = 0; /* Purge dead slots. */ |
709 | J->maxslot = rbase + (BCReg)gotresults; | 921 | J->maxslot = rbase + (BCReg)gotresults; |
710 | rec_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */ | 922 | lj_record_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */ |
711 | return; | 923 | return; |
712 | } | 924 | } |
713 | if (frame_isvarg(frame)) { | 925 | if (frame_isvarg(frame)) { |
714 | BCReg cbase = (BCReg)frame_delta(frame); | 926 | BCReg cbase = (BCReg)frame_delta(frame); |
715 | if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ | 927 | if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ |
716 | lj_trace_err(J, LJ_TRERR_NYIRETL); | 928 | lj_trace_err(J, LJ_TRERR_NYIRETL); |
717 | lua_assert(J->baseslot > 1); | 929 | lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return"); |
718 | rbase += cbase; | 930 | rbase += cbase; |
719 | J->baseslot -= (BCReg)cbase; | 931 | J->baseslot -= (BCReg)cbase; |
720 | J->base -= cbase; | 932 | J->base -= cbase; |
@@ -724,27 +936,28 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
724 | BCIns callins = *(frame_pc(frame)-1); | 936 | BCIns callins = *(frame_pc(frame)-1); |
725 | ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; | 937 | ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; |
726 | BCReg cbase = bc_a(callins); | 938 | BCReg cbase = bc_a(callins); |
727 | GCproto *pt = funcproto(frame_func(frame - (cbase+1))); | 939 | GCproto *pt = funcproto(frame_func(frame - (cbase+1+LJ_FR2))); |
728 | if ((pt->flags & PROTO_NOJIT)) | 940 | if ((pt->flags & PROTO_NOJIT)) |
729 | lj_trace_err(J, LJ_TRERR_CJITOFF); | 941 | lj_trace_err(J, LJ_TRERR_CJITOFF); |
730 | if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { | 942 | if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { |
731 | if (check_downrec_unroll(J, pt)) { | 943 | if (check_downrec_unroll(J, pt)) { |
732 | J->maxslot = (BCReg)(rbase + gotresults); | 944 | J->maxslot = (BCReg)(rbase + gotresults); |
733 | lj_snap_purge(J); | 945 | lj_snap_purge(J); |
734 | rec_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-recursion. */ | 946 | lj_record_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-rec. */ |
735 | return; | 947 | return; |
736 | } | 948 | } |
737 | lj_snap_add(J); | 949 | lj_snap_add(J); |
738 | } | 950 | } |
739 | for (i = 0; i < nresults; i++) /* Adjust results. */ | 951 | for (i = 0; i < nresults; i++) /* Adjust results. */ |
740 | J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL; | 952 | J->base[i-1-LJ_FR2] = i < gotresults ? J->base[rbase+i] : TREF_NIL; |
741 | J->maxslot = cbase+(BCReg)nresults; | 953 | J->maxslot = cbase+(BCReg)nresults; |
742 | if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ | 954 | if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ |
743 | J->framedepth--; | 955 | J->framedepth--; |
744 | lua_assert(J->baseslot > cbase+1); | 956 | lj_assertJ(J->baseslot > cbase+1+LJ_FR2, "bad baseslot for return"); |
745 | J->baseslot -= cbase+1; | 957 | J->baseslot -= cbase+1+LJ_FR2; |
746 | J->base -= cbase+1; | 958 | J->base -= cbase+1+LJ_FR2; |
747 | } else if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { | 959 | } else if (J->parent == 0 && J->exitno == 0 && |
960 | !bc_isret(bc_op(J->cur.startins))) { | ||
748 | /* Return to lower frame would leave the loop in a root trace. */ | 961 | /* Return to lower frame would leave the loop in a root trace. */ |
749 | lj_trace_err(J, LJ_TRERR_LLEAVE); | 962 | lj_trace_err(J, LJ_TRERR_LLEAVE); |
750 | } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */ | 963 | } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */ |
@@ -752,13 +965,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
752 | } else { /* Return to lower frame. Guard for the target we return to. */ | 965 | } else { /* Return to lower frame. Guard for the target we return to. */ |
753 | TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); | 966 | TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); |
754 | TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); | 967 | TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); |
755 | emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc); | 968 | emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc); |
756 | J->retdepth++; | 969 | J->retdepth++; |
757 | J->needsnap = 1; | 970 | J->needsnap = 1; |
758 | lua_assert(J->baseslot == 1); | 971 | lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot for return"); |
759 | /* Shift result slots up and clear the slots of the new frame below. */ | 972 | /* Shift result slots up and clear the slots of the new frame below. */ |
760 | memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults); | 973 | memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults); |
761 | memset(J->base-1, 0, sizeof(TRef)*(cbase+1)); | 974 | memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2)); |
762 | } | 975 | } |
763 | } else if (frame_iscont(frame)) { /* Return to continuation frame. */ | 976 | } else if (frame_iscont(frame)) { /* Return to continuation frame. */ |
764 | ASMFunction cont = frame_contf(frame); | 977 | ASMFunction cont = frame_contf(frame); |
@@ -767,24 +980,52 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
767 | lj_trace_err(J, LJ_TRERR_NYIRETL); | 980 | lj_trace_err(J, LJ_TRERR_NYIRETL); |
768 | J->baseslot -= (BCReg)cbase; | 981 | J->baseslot -= (BCReg)cbase; |
769 | J->base -= cbase; | 982 | J->base -= cbase; |
770 | J->maxslot = cbase-2; | 983 | J->maxslot = cbase-(2<<LJ_FR2); |
771 | if (cont == lj_cont_ra) { | 984 | if (cont == lj_cont_ra) { |
772 | /* Copy result to destination slot. */ | 985 | /* Copy result to destination slot. */ |
773 | BCReg dst = bc_a(*(frame_contpc(frame)-1)); | 986 | BCReg dst = bc_a(*(frame_contpc(frame)-1)); |
774 | J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; | 987 | J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; |
775 | if (dst >= J->maxslot) J->maxslot = dst+1; | 988 | if (dst >= J->maxslot) { |
989 | J->maxslot = dst+1; | ||
990 | } | ||
776 | } else if (cont == lj_cont_nop) { | 991 | } else if (cont == lj_cont_nop) { |
777 | /* Nothing to do here. */ | 992 | /* Nothing to do here. */ |
778 | } else if (cont == lj_cont_cat) { | 993 | } else if (cont == lj_cont_cat) { |
779 | lua_assert(0); | 994 | BCReg bslot = bc_b(*(frame_contpc(frame)-1)); |
995 | TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL; | ||
996 | if (bslot != J->maxslot) { /* Concatenate the remainder. */ | ||
997 | TValue *b = J->L->base, save; /* Simulate lower frame and result. */ | ||
998 | /* Can't handle MM_concat + CALLT + fast func side-effects. */ | ||
999 | if (J->postproc != LJ_POST_NONE) | ||
1000 | lj_trace_err(J, LJ_TRERR_NYIRETL); | ||
1001 | J->base[J->maxslot] = tr; | ||
1002 | copyTV(J->L, &save, b-(2<<LJ_FR2)); | ||
1003 | if (gotresults) | ||
1004 | copyTV(J->L, b-(2<<LJ_FR2), b+rbase); | ||
1005 | else | ||
1006 | setnilV(b-(2<<LJ_FR2)); | ||
1007 | J->L->base = b - cbase; | ||
1008 | tr = rec_cat(J, bslot, cbase-(2<<LJ_FR2)); | ||
1009 | b = J->L->base + cbase; /* Undo. */ | ||
1010 | J->L->base = b; | ||
1011 | copyTV(J->L, b-(2<<LJ_FR2), &save); | ||
1012 | } | ||
1013 | if (tr) { /* Store final result. */ | ||
1014 | BCReg dst = bc_a(*(frame_contpc(frame)-1)); | ||
1015 | J->base[dst] = tr; | ||
1016 | if (dst >= J->maxslot) { | ||
1017 | J->maxslot = dst+1; | ||
1018 | } | ||
1019 | } /* Otherwise continue with another __concat call. */ | ||
780 | } else { | 1020 | } else { |
781 | /* Result type already specialized. */ | 1021 | /* Result type already specialized. */ |
782 | lua_assert(cont == lj_cont_condf || cont == lj_cont_condt); | 1022 | lj_assertJ(cont == lj_cont_condf || cont == lj_cont_condt, |
1023 | "bad continuation type"); | ||
783 | } | 1024 | } |
784 | } else { | 1025 | } else { |
785 | lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ | 1026 | lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ |
786 | } | 1027 | } |
787 | lua_assert(J->baseslot >= 1); | 1028 | lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot for return"); |
788 | } | 1029 | } |
789 | 1030 | ||
790 | /* -- Metamethod handling ------------------------------------------------- */ | 1031 | /* -- Metamethod handling ------------------------------------------------- */ |
@@ -792,19 +1033,17 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
792 | /* Prepare to record call to metamethod. */ | 1033 | /* Prepare to record call to metamethod. */ |
793 | static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) | 1034 | static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) |
794 | { | 1035 | { |
795 | BCReg s, top = curr_proto(J->L)->framesize; | 1036 | BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize; |
796 | TRef trcont; | 1037 | #if LJ_FR2 |
797 | setcont(&J->L->base[top], cont); | 1038 | J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); |
798 | #if LJ_64 | 1039 | J->base[top+1] = TREF_CONT; |
799 | trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin)); | ||
800 | #else | 1040 | #else |
801 | trcont = lj_ir_kptr(J, (void *)cont); | 1041 | J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; |
802 | #endif | 1042 | #endif |
803 | J->base[top] = trcont | TREF_CONT; | ||
804 | J->framedepth++; | 1043 | J->framedepth++; |
805 | for (s = J->maxslot; s < top; s++) | 1044 | for (s = J->maxslot; s < top; s++) |
806 | J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ | 1045 | J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ |
807 | return top+1; | 1046 | return top+1+LJ_FR2; |
808 | } | 1047 | } |
809 | 1048 | ||
810 | /* Record metamethod lookup. */ | 1049 | /* Record metamethod lookup. */ |
@@ -823,7 +1062,7 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) | |||
823 | cTValue *mo; | 1062 | cTValue *mo; |
824 | if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) { | 1063 | if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) { |
825 | /* Specialize to the C library namespace object. */ | 1064 | /* Specialize to the C library namespace object. */ |
826 | emitir(IRTG(IR_EQ, IRT_P32), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); | 1065 | emitir(IRTG(IR_EQ, IRT_PGC), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); |
827 | } else { | 1066 | } else { |
828 | /* Specialize to the type of userdata. */ | 1067 | /* Specialize to the type of userdata. */ |
829 | TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE); | 1068 | TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE); |
@@ -852,7 +1091,8 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) | |||
852 | } | 1091 | } |
853 | /* The cdata metatable is treated as immutable. */ | 1092 | /* The cdata metatable is treated as immutable. */ |
854 | if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; | 1093 | if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; |
855 | ix->mt = mix.tab = lj_ir_ktab(J, mt); | 1094 | ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB, |
1095 | GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)])); | ||
856 | goto nocheck; | 1096 | goto nocheck; |
857 | } | 1097 | } |
858 | ix->mt = mt ? mix.tab : TREF_NIL; | 1098 | ix->mt = mt ? mix.tab : TREF_NIL; |
@@ -879,12 +1119,12 @@ nocheck: | |||
879 | static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) | 1119 | static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) |
880 | { | 1120 | { |
881 | /* Set up metamethod call first to save ix->tab and ix->tabv. */ | 1121 | /* Set up metamethod call first to save ix->tab and ix->tabv. */ |
882 | BCReg func = rec_mm_prep(J, lj_cont_ra); | 1122 | BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra); |
883 | TRef *base = J->base + func; | 1123 | TRef *base = J->base + func; |
884 | TValue *basev = J->L->base + func; | 1124 | TValue *basev = J->L->base + func; |
885 | base[1] = ix->tab; base[2] = ix->key; | 1125 | base[1+LJ_FR2] = ix->tab; base[2+LJ_FR2] = ix->key; |
886 | copyTV(J->L, basev+1, &ix->tabv); | 1126 | copyTV(J->L, basev+1+LJ_FR2, &ix->tabv); |
887 | copyTV(J->L, basev+2, &ix->keyv); | 1127 | copyTV(J->L, basev+2+LJ_FR2, &ix->keyv); |
888 | if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ | 1128 | if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ |
889 | if (mm != MM_unm) { | 1129 | if (mm != MM_unm) { |
890 | ix->tab = ix->key; | 1130 | ix->tab = ix->key; |
@@ -896,6 +1136,9 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) | |||
896 | } | 1136 | } |
897 | ok: | 1137 | ok: |
898 | base[0] = ix->mobj; | 1138 | base[0] = ix->mobj; |
1139 | #if LJ_FR2 | ||
1140 | base[1] = 0; | ||
1141 | #endif | ||
899 | copyTV(J->L, basev+0, &ix->mobjv); | 1142 | copyTV(J->L, basev+0, &ix->mobjv); |
900 | lj_record_call(J, func, 2); | 1143 | lj_record_call(J, func, 2); |
901 | return 0; /* No result yet. */ | 1144 | return 0; /* No result yet. */ |
@@ -912,6 +1155,8 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) | |||
912 | TRef *base = J->base + func; | 1155 | TRef *base = J->base + func; |
913 | TValue *basev = J->L->base + func; | 1156 | TValue *basev = J->L->base + func; |
914 | base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); | 1157 | base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); |
1158 | base += LJ_FR2; | ||
1159 | basev += LJ_FR2; | ||
915 | base[1] = tr; copyTV(J->L, basev+1, tv); | 1160 | base[1] = tr; copyTV(J->L, basev+1, tv); |
916 | #if LJ_52 | 1161 | #if LJ_52 |
917 | base[2] = tr; copyTV(J->L, basev+2, tv); | 1162 | base[2] = tr; copyTV(J->L, basev+2, tv); |
@@ -921,7 +1166,7 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) | |||
921 | lj_record_call(J, func, 2); | 1166 | lj_record_call(J, func, 2); |
922 | } else { | 1167 | } else { |
923 | if (LJ_52 && tref_istab(tr)) | 1168 | if (LJ_52 && tref_istab(tr)) |
924 | return lj_ir_call(J, IRCALL_lj_tab_len, tr); | 1169 | return emitir(IRTI(IR_ALEN), tr, TREF_NIL); |
925 | lj_trace_err(J, LJ_TRERR_NOMM); | 1170 | lj_trace_err(J, LJ_TRERR_NOMM); |
926 | } | 1171 | } |
927 | return 0; /* No result yet. */ | 1172 | return 0; /* No result yet. */ |
@@ -931,10 +1176,10 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) | |||
931 | static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) | 1176 | static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) |
932 | { | 1177 | { |
933 | BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); | 1178 | BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); |
934 | TRef *base = J->base + func; | 1179 | TRef *base = J->base + func + LJ_FR2; |
935 | TValue *tv = J->L->base + func; | 1180 | TValue *tv = J->L->base + func + LJ_FR2; |
936 | base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; | 1181 | base[-LJ_FR2] = ix->mobj; base[1] = ix->val; base[2] = ix->key; |
937 | copyTV(J->L, tv+0, &ix->mobjv); | 1182 | copyTV(J->L, tv-LJ_FR2, &ix->mobjv); |
938 | copyTV(J->L, tv+1, &ix->valv); | 1183 | copyTV(J->L, tv+1, &ix->valv); |
939 | copyTV(J->L, tv+2, &ix->keyv); | 1184 | copyTV(J->L, tv+2, &ix->keyv); |
940 | lj_record_call(J, func, 2); | 1185 | lj_record_call(J, func, 2); |
@@ -1030,7 +1275,7 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm) | |||
1030 | ix->tab = ix->val; | 1275 | ix->tab = ix->val; |
1031 | copyTV(J->L, &ix->tabv, &ix->valv); | 1276 | copyTV(J->L, &ix->tabv, &ix->valv); |
1032 | } else { | 1277 | } else { |
1033 | lua_assert(tref_iscdata(ix->key)); | 1278 | lj_assertJ(tref_iscdata(ix->key), "cdata expected"); |
1034 | ix->tab = ix->key; | 1279 | ix->tab = ix->key; |
1035 | copyTV(J->L, &ix->tabv, &ix->keyv); | 1280 | copyTV(J->L, &ix->tabv, &ix->keyv); |
1036 | } | 1281 | } |
@@ -1041,6 +1286,72 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm) | |||
1041 | 1286 | ||
1042 | /* -- Indexed access ------------------------------------------------------ */ | 1287 | /* -- Indexed access ------------------------------------------------------ */ |
1043 | 1288 | ||
1289 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
1290 | /* Bump table allocations in bytecode when they grow during recording. */ | ||
1291 | static void rec_idx_bump(jit_State *J, RecordIndex *ix) | ||
1292 | { | ||
1293 | RBCHashEntry *rbc = &J->rbchash[(ix->tab & (RBCHASH_SLOTS-1))]; | ||
1294 | if (tref_ref(ix->tab) == rbc->ref) { | ||
1295 | const BCIns *pc = mref(rbc->pc, const BCIns); | ||
1296 | GCtab *tb = tabV(&ix->tabv); | ||
1297 | uint32_t nhbits; | ||
1298 | IRIns *ir; | ||
1299 | if (!tvisnil(&ix->keyv)) | ||
1300 | (void)lj_tab_set(J->L, tb, &ix->keyv); /* Grow table right now. */ | ||
1301 | nhbits = tb->hmask > 0 ? lj_fls(tb->hmask)+1 : 0; | ||
1302 | ir = IR(tref_ref(ix->tab)); | ||
1303 | if (ir->o == IR_TNEW) { | ||
1304 | uint32_t ah = bc_d(*pc); | ||
1305 | uint32_t asize = ah & 0x7ff, hbits = ah >> 11; | ||
1306 | if (nhbits > hbits) hbits = nhbits; | ||
1307 | if (tb->asize > asize) { | ||
1308 | asize = tb->asize <= 0x7ff ? tb->asize : 0x7ff; | ||
1309 | } | ||
1310 | if ((asize | (hbits<<11)) != ah) { /* Has the size changed? */ | ||
1311 | /* Patch bytecode, but continue recording (for more patching). */ | ||
1312 | setbc_d(pc, (asize | (hbits<<11))); | ||
1313 | /* Patching TNEW operands is only safe if the trace is aborted. */ | ||
1314 | ir->op1 = asize; ir->op2 = hbits; | ||
1315 | J->retryrec = 1; /* Abort the trace at the end of recording. */ | ||
1316 | } | ||
1317 | } else if (ir->o == IR_TDUP) { | ||
1318 | GCtab *tpl = gco2tab(proto_kgc(&gcref(rbc->pt)->pt, ~(ptrdiff_t)bc_d(*pc))); | ||
1319 | /* Grow template table, but preserve keys with nil values. */ | ||
1320 | if ((tb->asize > tpl->asize && (1u << nhbits)-1 == tpl->hmask) || | ||
1321 | (tb->asize == tpl->asize && (1u << nhbits)-1 > tpl->hmask)) { | ||
1322 | Node *node = noderef(tpl->node); | ||
1323 | uint32_t i, hmask = tpl->hmask, asize; | ||
1324 | TValue *array; | ||
1325 | for (i = 0; i <= hmask; i++) { | ||
1326 | if (!tvisnil(&node[i].key) && tvisnil(&node[i].val)) | ||
1327 | settabV(J->L, &node[i].val, tpl); | ||
1328 | } | ||
1329 | if (!tvisnil(&ix->keyv) && tref_isk(ix->key)) { | ||
1330 | TValue *o = lj_tab_set(J->L, tpl, &ix->keyv); | ||
1331 | if (tvisnil(o)) settabV(J->L, o, tpl); | ||
1332 | } | ||
1333 | lj_tab_resize(J->L, tpl, tb->asize, nhbits); | ||
1334 | node = noderef(tpl->node); | ||
1335 | hmask = tpl->hmask; | ||
1336 | for (i = 0; i <= hmask; i++) { | ||
1337 | /* This is safe, since template tables only hold immutable values. */ | ||
1338 | if (tvistab(&node[i].val)) | ||
1339 | setnilV(&node[i].val); | ||
1340 | } | ||
1341 | /* The shape of the table may have changed. Clean up array part, too. */ | ||
1342 | asize = tpl->asize; | ||
1343 | array = tvref(tpl->array); | ||
1344 | for (i = 0; i < asize; i++) { | ||
1345 | if (tvistab(&array[i])) | ||
1346 | setnilV(&array[i]); | ||
1347 | } | ||
1348 | J->retryrec = 1; /* Abort the trace at the end of recording. */ | ||
1349 | } | ||
1350 | } | ||
1351 | } | ||
1352 | } | ||
1353 | #endif | ||
1354 | |||
1044 | /* Record bounds-check. */ | 1355 | /* Record bounds-check. */ |
1045 | static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) | 1356 | static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) |
1046 | { | 1357 | { |
@@ -1061,7 +1372,8 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) | |||
1061 | /* Got scalar evolution analysis results for this reference? */ | 1372 | /* Got scalar evolution analysis results for this reference? */ |
1062 | if (ref == J->scev.idx) { | 1373 | if (ref == J->scev.idx) { |
1063 | int32_t stop; | 1374 | int32_t stop; |
1064 | lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); | 1375 | lj_assertJ(irt_isint(J->scev.t) && ir->o == IR_SLOAD, |
1376 | "only int SCEV supported"); | ||
1065 | stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]); | 1377 | stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]); |
1066 | /* Runtime value for stop of loop is within bounds? */ | 1378 | /* Runtime value for stop of loop is within bounds? */ |
1067 | if ((uint64_t)stop + ofs < (uint64_t)asize) { | 1379 | if ((uint64_t)stop + ofs < (uint64_t)asize) { |
@@ -1080,11 +1392,14 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) | |||
1080 | } | 1392 | } |
1081 | 1393 | ||
1082 | /* Record indexed key lookup. */ | 1394 | /* Record indexed key lookup. */ |
1083 | static TRef rec_idx_key(jit_State *J, RecordIndex *ix) | 1395 | static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref, |
1396 | IRType1 *rbguard) | ||
1084 | { | 1397 | { |
1085 | TRef key; | 1398 | TRef key; |
1086 | GCtab *t = tabV(&ix->tabv); | 1399 | GCtab *t = tabV(&ix->tabv); |
1087 | ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */ | 1400 | ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */ |
1401 | *rbref = 0; | ||
1402 | rbguard->irt = 0; | ||
1088 | 1403 | ||
1089 | /* Integer keys are looked up in the array part first. */ | 1404 | /* Integer keys are looked up in the array part first. */ |
1090 | key = ix->key; | 1405 | key = ix->key; |
@@ -1098,8 +1413,8 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix) | |||
1098 | if ((MSize)k < t->asize) { /* Currently an array key? */ | 1413 | if ((MSize)k < t->asize) { /* Currently an array key? */ |
1099 | TRef arrayref; | 1414 | TRef arrayref; |
1100 | rec_idx_abc(J, asizeref, ikey, t->asize); | 1415 | rec_idx_abc(J, asizeref, ikey, t->asize); |
1101 | arrayref = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_ARRAY); | 1416 | arrayref = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_ARRAY); |
1102 | return emitir(IRT(IR_AREF, IRT_P32), arrayref, ikey); | 1417 | return emitir(IRT(IR_AREF, IRT_PGC), arrayref, ikey); |
1103 | } else { /* Currently not in array (may be an array extension)? */ | 1418 | } else { /* Currently not in array (may be an array extension)? */ |
1104 | emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */ | 1419 | emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */ |
1105 | if (k == 0 && tref_isk(key)) | 1420 | if (k == 0 && tref_isk(key)) |
@@ -1134,16 +1449,18 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix) | |||
1134 | MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val); | 1449 | MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val); |
1135 | if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) && | 1450 | if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) && |
1136 | hslot <= 65535*(MSize)sizeof(Node)) { | 1451 | hslot <= 65535*(MSize)sizeof(Node)) { |
1137 | TRef node, kslot; | 1452 | TRef node, kslot, hm; |
1138 | TRef hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); | 1453 | *rbref = J->cur.nins; /* Mark possible rollback point. */ |
1454 | *rbguard = J->guardemit; | ||
1455 | hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); | ||
1139 | emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); | 1456 | emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); |
1140 | node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE); | 1457 | node = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_NODE); |
1141 | kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); | 1458 | kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); |
1142 | return emitir(IRTG(IR_HREFK, IRT_P32), node, kslot); | 1459 | return emitir(IRTG(IR_HREFK, IRT_PGC), node, kslot); |
1143 | } | 1460 | } |
1144 | } | 1461 | } |
1145 | /* Fall back to a regular hash lookup. */ | 1462 | /* Fall back to a regular hash lookup. */ |
1146 | return emitir(IRT(IR_HREF, IRT_P32), ix->tab, key); | 1463 | return emitir(IRT(IR_HREF, IRT_PGC), ix->tab, key); |
1147 | } | 1464 | } |
1148 | 1465 | ||
1149 | /* Determine whether a key is NOT one of the fast metamethod names. */ | 1466 | /* Determine whether a key is NOT one of the fast metamethod names. */ |
@@ -1168,20 +1485,22 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1168 | { | 1485 | { |
1169 | TRef xref; | 1486 | TRef xref; |
1170 | IROp xrefop, loadop; | 1487 | IROp xrefop, loadop; |
1488 | IRRef rbref; | ||
1489 | IRType1 rbguard; | ||
1171 | cTValue *oldv; | 1490 | cTValue *oldv; |
1172 | 1491 | ||
1173 | while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ | 1492 | while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ |
1174 | /* Never call raw lj_record_idx() on non-table. */ | 1493 | /* Never call raw lj_record_idx() on non-table. */ |
1175 | lua_assert(ix->idxchain != 0); | 1494 | lj_assertJ(ix->idxchain != 0, "bad usage"); |
1176 | if (!lj_record_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index)) | 1495 | if (!lj_record_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index)) |
1177 | lj_trace_err(J, LJ_TRERR_NOMM); | 1496 | lj_trace_err(J, LJ_TRERR_NOMM); |
1178 | handlemm: | 1497 | handlemm: |
1179 | if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ | 1498 | if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ |
1180 | BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); | 1499 | BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); |
1181 | TRef *base = J->base + func; | 1500 | TRef *base = J->base + func + LJ_FR2; |
1182 | TValue *tv = J->L->base + func; | 1501 | TValue *tv = J->L->base + func + LJ_FR2; |
1183 | base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; | 1502 | base[-LJ_FR2] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; |
1184 | setfuncV(J->L, tv+0, funcV(&ix->mobjv)); | 1503 | setfuncV(J->L, tv-LJ_FR2, funcV(&ix->mobjv)); |
1185 | copyTV(J->L, tv+1, &ix->tabv); | 1504 | copyTV(J->L, tv+1, &ix->tabv); |
1186 | copyTV(J->L, tv+2, &ix->keyv); | 1505 | copyTV(J->L, tv+2, &ix->keyv); |
1187 | if (ix->val) { | 1506 | if (ix->val) { |
@@ -1194,6 +1513,16 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1194 | return 0; /* No result yet. */ | 1513 | return 0; /* No result yet. */ |
1195 | } | 1514 | } |
1196 | } | 1515 | } |
1516 | #if LJ_HASBUFFER | ||
1517 | /* The index table of buffer objects is treated as immutable. */ | ||
1518 | if (ix->mt == TREF_NIL && !ix->val && | ||
1519 | tref_isudata(ix->tab) && udataV(&ix->tabv)->udtype == UDTYPE_BUFFER && | ||
1520 | tref_istab(ix->mobj) && tref_isstr(ix->key) && tref_isk(ix->key)) { | ||
1521 | cTValue *val = lj_tab_getstr(tabV(&ix->mobjv), strV(&ix->keyv)); | ||
1522 | TRef tr = lj_record_constify(J, val); | ||
1523 | if (tr) return tr; /* Specialize to the value, i.e. a method. */ | ||
1524 | } | ||
1525 | #endif | ||
1197 | /* Otherwise retry lookup with metaobject. */ | 1526 | /* Otherwise retry lookup with metaobject. */ |
1198 | ix->tab = ix->mobj; | 1527 | ix->tab = ix->mobj; |
1199 | copyTV(J->L, &ix->tabv, &ix->mobjv); | 1528 | copyTV(J->L, &ix->tabv, &ix->mobjv); |
@@ -1213,7 +1542,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1213 | } | 1542 | } |
1214 | 1543 | ||
1215 | /* Record the key lookup. */ | 1544 | /* Record the key lookup. */ |
1216 | xref = rec_idx_key(J, ix); | 1545 | xref = rec_idx_key(J, ix, &rbref, &rbguard); |
1217 | xrefop = IR(tref_ref(xref))->o; | 1546 | xrefop = IR(tref_ref(xref))->o; |
1218 | loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD; | 1547 | loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD; |
1219 | /* The lj_meta_tset() inconsistency is gone, but better play safe. */ | 1548 | /* The lj_meta_tset() inconsistency is gone, but better play safe. */ |
@@ -1223,11 +1552,15 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1223 | IRType t = itype2irt(oldv); | 1552 | IRType t = itype2irt(oldv); |
1224 | TRef res; | 1553 | TRef res; |
1225 | if (oldv == niltvg(J2G(J))) { | 1554 | if (oldv == niltvg(J2G(J))) { |
1226 | emitir(IRTG(IR_EQ, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); | 1555 | emitir(IRTG(IR_EQ, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); |
1227 | res = TREF_NIL; | 1556 | res = TREF_NIL; |
1228 | } else { | 1557 | } else { |
1229 | res = emitir(IRTG(loadop, t), xref, 0); | 1558 | res = emitir(IRTG(loadop, t), xref, 0); |
1230 | } | 1559 | } |
1560 | if (tref_ref(res) < rbref) { /* HREFK + load forwarded? */ | ||
1561 | lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */ | ||
1562 | J->guardemit = rbguard; | ||
1563 | } | ||
1231 | if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index)) | 1564 | if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index)) |
1232 | goto handlemm; | 1565 | goto handlemm; |
1233 | if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */ | 1566 | if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */ |
@@ -1235,6 +1568,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1235 | } else { /* Indexed store. */ | 1568 | } else { /* Indexed store. */ |
1236 | GCtab *mt = tabref(tabV(&ix->tabv)->metatable); | 1569 | GCtab *mt = tabref(tabV(&ix->tabv)->metatable); |
1237 | int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val); | 1570 | int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val); |
1571 | if (tref_ref(xref) < rbref) { /* HREFK forwarded? */ | ||
1572 | lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */ | ||
1573 | J->guardemit = rbguard; | ||
1574 | } | ||
1238 | if (tvisnil(oldv)) { /* Previous value was nil? */ | 1575 | if (tvisnil(oldv)) { /* Previous value was nil? */ |
1239 | /* Need to duplicate the hasmm check for the early guards. */ | 1576 | /* Need to duplicate the hasmm check for the early guards. */ |
1240 | int hasmm = 0; | 1577 | int hasmm = 0; |
@@ -1245,24 +1582,28 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1245 | if (hasmm) | 1582 | if (hasmm) |
1246 | emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */ | 1583 | emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */ |
1247 | else if (xrefop == IR_HREF) | 1584 | else if (xrefop == IR_HREF) |
1248 | emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_P32), | 1585 | emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_PGC), |
1249 | xref, lj_ir_kkptr(J, niltvg(J2G(J)))); | 1586 | xref, lj_ir_kkptr(J, niltvg(J2G(J)))); |
1250 | if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) { | 1587 | if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) { |
1251 | lua_assert(hasmm); | 1588 | lj_assertJ(hasmm, "inconsistent metamethod handling"); |
1252 | goto handlemm; | 1589 | goto handlemm; |
1253 | } | 1590 | } |
1254 | lua_assert(!hasmm); | 1591 | lj_assertJ(!hasmm, "inconsistent metamethod handling"); |
1255 | if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */ | 1592 | if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */ |
1256 | TRef key = ix->key; | 1593 | TRef key = ix->key; |
1257 | if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ | 1594 | if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ |
1258 | key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); | 1595 | key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); |
1259 | xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key); | 1596 | xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key); |
1260 | keybarrier = 0; /* NEWREF already takes care of the key barrier. */ | 1597 | keybarrier = 0; /* NEWREF already takes care of the key barrier. */ |
1598 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
1599 | if ((J->flags & JIT_F_OPT_SINK)) /* Avoid a separate flag. */ | ||
1600 | rec_idx_bump(J, ix); | ||
1601 | #endif | ||
1261 | } | 1602 | } |
1262 | } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) { | 1603 | } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) { |
1263 | /* Cannot derive that the previous value was non-nil, must do checks. */ | 1604 | /* Cannot derive that the previous value was non-nil, must do checks. */ |
1264 | if (xrefop == IR_HREF) /* Guard against store to niltv. */ | 1605 | if (xrefop == IR_HREF) /* Guard against store to niltv. */ |
1265 | emitir(IRTG(IR_NE, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); | 1606 | emitir(IRTG(IR_NE, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); |
1266 | if (ix->idxchain) { /* Metamethod lookup required? */ | 1607 | if (ix->idxchain) { /* Metamethod lookup required? */ |
1267 | /* A check for NULL metatable is cheaper (hoistable) than a load. */ | 1608 | /* A check for NULL metatable is cheaper (hoistable) than a load. */ |
1268 | if (!mt) { | 1609 | if (!mt) { |
@@ -1284,7 +1625,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1284 | emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0); | 1625 | emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0); |
1285 | /* Invalidate neg. metamethod cache for stores with certain string keys. */ | 1626 | /* Invalidate neg. metamethod cache for stores with certain string keys. */ |
1286 | if (!nommstr(J, ix->key)) { | 1627 | if (!nommstr(J, ix->key)) { |
1287 | TRef fref = emitir(IRT(IR_FREF, IRT_P32), ix->tab, IRFL_TAB_NOMM); | 1628 | TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ix->tab, IRFL_TAB_NOMM); |
1288 | emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0)); | 1629 | emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0)); |
1289 | } | 1630 | } |
1290 | J->needsnap = 1; | 1631 | J->needsnap = 1; |
@@ -1292,6 +1633,72 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1292 | } | 1633 | } |
1293 | } | 1634 | } |
1294 | 1635 | ||
1636 | /* Determine result type of table traversal. */ | ||
1637 | static IRType rec_next_types(GCtab *t, uint32_t idx) | ||
1638 | { | ||
1639 | for (; idx < t->asize; idx++) { | ||
1640 | cTValue *a = arrayslot(t, idx); | ||
1641 | if (LJ_LIKELY(!tvisnil(a))) | ||
1642 | return (LJ_DUALNUM ? IRT_INT : IRT_NUM) + (itype2irt(a) << 8); | ||
1643 | } | ||
1644 | idx -= t->asize; | ||
1645 | for (; idx <= t->hmask; idx++) { | ||
1646 | Node *n = &noderef(t->node)[idx]; | ||
1647 | if (!tvisnil(&n->val)) | ||
1648 | return itype2irt(&n->key) + (itype2irt(&n->val) << 8); | ||
1649 | } | ||
1650 | return IRT_NIL + (IRT_NIL << 8); | ||
1651 | } | ||
1652 | |||
1653 | /* Record a table traversal step aka next(). */ | ||
1654 | int lj_record_next(jit_State *J, RecordIndex *ix) | ||
1655 | { | ||
1656 | IRType t, tkey, tval; | ||
1657 | TRef trvk; | ||
1658 | t = rec_next_types(tabV(&ix->tabv), ix->keyv.u32.lo); | ||
1659 | tkey = (t & 0xff); tval = (t >> 8); | ||
1660 | trvk = lj_ir_call(J, IRCALL_lj_vm_next, ix->tab, ix->key); | ||
1661 | if (ix->mobj || tkey == IRT_NIL) { | ||
1662 | TRef idx = emitir(IRTI(IR_HIOP), trvk, trvk); | ||
1663 | /* Always check for invalid key from next() for nil result. */ | ||
1664 | if (!ix->mobj) emitir(IRTGI(IR_NE), idx, lj_ir_kint(J, -1)); | ||
1665 | ix->mobj = idx; | ||
1666 | } | ||
1667 | ix->key = lj_record_vload(J, trvk, 1, tkey); | ||
1668 | if (tkey == IRT_NIL || ix->idxchain) { /* Omit value type check. */ | ||
1669 | ix->val = TREF_NIL; | ||
1670 | return 1; | ||
1671 | } else { /* Need value. */ | ||
1672 | ix->val = lj_record_vload(J, trvk, 0, tval); | ||
1673 | return 2; | ||
1674 | } | ||
1675 | } | ||
1676 | |||
1677 | static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i) | ||
1678 | { | ||
1679 | RecordIndex ix; | ||
1680 | cTValue *basev = J->L->base; | ||
1681 | GCtab *t = tabV(&basev[ra-1]); | ||
1682 | settabV(J->L, &ix.tabv, t); | ||
1683 | ix.tab = getslot(J, ra-1); | ||
1684 | ix.idxchain = 0; | ||
1685 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
1686 | if ((J->flags & JIT_F_OPT_SINK)) { | ||
1687 | if (t->asize < i+rn-ra) | ||
1688 | lj_tab_reasize(J->L, t, i+rn-ra); | ||
1689 | setnilV(&ix.keyv); | ||
1690 | rec_idx_bump(J, &ix); | ||
1691 | } | ||
1692 | #endif | ||
1693 | for (; ra < rn; i++, ra++) { | ||
1694 | setintV(&ix.keyv, i); | ||
1695 | ix.key = lj_ir_kint(J, i); | ||
1696 | copyTV(J->L, &ix.valv, &basev[ra]); | ||
1697 | ix.val = getslot(J, ra); | ||
1698 | lj_record_idx(J, &ix); | ||
1699 | } | ||
1700 | } | ||
1701 | |||
1295 | /* -- Upvalue access ------------------------------------------------------ */ | 1702 | /* -- Upvalue access ------------------------------------------------------ */ |
1296 | 1703 | ||
1297 | /* Check whether upvalue is immutable and ok to constify. */ | 1704 | /* Check whether upvalue is immutable and ok to constify. */ |
@@ -1328,13 +1735,17 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) | |||
1328 | int needbarrier = 0; | 1735 | int needbarrier = 0; |
1329 | if (rec_upvalue_constify(J, uvp)) { /* Try to constify immutable upvalue. */ | 1736 | if (rec_upvalue_constify(J, uvp)) { /* Try to constify immutable upvalue. */ |
1330 | TRef tr, kfunc; | 1737 | TRef tr, kfunc; |
1331 | lua_assert(val == 0); | 1738 | lj_assertJ(val == 0, "bad usage"); |
1332 | if (!tref_isk(fn)) { /* Late specialization of current function. */ | 1739 | if (!tref_isk(fn)) { /* Late specialization of current function. */ |
1333 | if (J->pt->flags >= PROTO_CLC_POLY) | 1740 | if (J->pt->flags >= PROTO_CLC_POLY) |
1334 | goto noconstify; | 1741 | goto noconstify; |
1335 | kfunc = lj_ir_kfunc(J, J->fn); | 1742 | kfunc = lj_ir_kfunc(J, J->fn); |
1336 | emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); | 1743 | emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); |
1337 | J->base[-1] = TREF_FRAME | kfunc; | 1744 | #if LJ_FR2 |
1745 | J->base[-2] = kfunc; | ||
1746 | #else | ||
1747 | J->base[-1] = kfunc | TREF_FRAME; | ||
1748 | #endif | ||
1338 | fn = kfunc; | 1749 | fn = kfunc; |
1339 | } | 1750 | } |
1340 | tr = lj_record_constify(J, uvval(uvp)); | 1751 | tr = lj_record_constify(J, uvval(uvp)); |
@@ -1345,16 +1756,16 @@ noconstify: | |||
1345 | /* Note: this effectively limits LJ_MAX_UPVAL to 127. */ | 1756 | /* Note: this effectively limits LJ_MAX_UPVAL to 127. */ |
1346 | uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); | 1757 | uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); |
1347 | if (!uvp->closed) { | 1758 | if (!uvp->closed) { |
1348 | uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_P32), fn, uv)); | 1759 | uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv)); |
1349 | /* In current stack? */ | 1760 | /* In current stack? */ |
1350 | if (uvval(uvp) >= tvref(J->L->stack) && | 1761 | if (uvval(uvp) >= tvref(J->L->stack) && |
1351 | uvval(uvp) < tvref(J->L->maxstack)) { | 1762 | uvval(uvp) < tvref(J->L->maxstack)) { |
1352 | int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); | 1763 | int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); |
1353 | if (slot >= 0) { /* Aliases an SSA slot? */ | 1764 | if (slot >= 0) { /* Aliases an SSA slot? */ |
1354 | emitir(IRTG(IR_EQ, IRT_P32), | 1765 | emitir(IRTG(IR_EQ, IRT_PGC), |
1355 | REF_BASE, | 1766 | REF_BASE, |
1356 | emitir(IRT(IR_ADD, IRT_P32), uref, | 1767 | emitir(IRT(IR_ADD, IRT_PGC), uref, |
1357 | lj_ir_kint(J, (slot - 1) * -8))); | 1768 | lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8))); |
1358 | slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ | 1769 | slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ |
1359 | if (val == 0) { | 1770 | if (val == 0) { |
1360 | return getslot(J, slot); | 1771 | return getslot(J, slot); |
@@ -1365,12 +1776,12 @@ noconstify: | |||
1365 | } | 1776 | } |
1366 | } | 1777 | } |
1367 | } | 1778 | } |
1368 | emitir(IRTG(IR_UGT, IRT_P32), | 1779 | emitir(IRTG(IR_UGT, IRT_PGC), |
1369 | emitir(IRT(IR_SUB, IRT_P32), uref, REF_BASE), | 1780 | emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE), |
1370 | lj_ir_kint(J, (J->baseslot + J->maxslot) * 8)); | 1781 | lj_ir_kint(J, (J->baseslot + J->maxslot) * 8)); |
1371 | } else { | 1782 | } else { |
1372 | needbarrier = 1; | 1783 | needbarrier = 1; |
1373 | uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_P32), fn, uv)); | 1784 | uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv)); |
1374 | } | 1785 | } |
1375 | if (val == 0) { /* Upvalue load */ | 1786 | if (val == 0) { /* Upvalue load */ |
1376 | IRType t = itype2irt(uvval(uvp)); | 1787 | IRType t = itype2irt(uvval(uvp)); |
@@ -1409,16 +1820,16 @@ static void check_call_unroll(jit_State *J, TraceNo lnk) | |||
1409 | if (count + J->tailcalled > J->param[JIT_P_recunroll]) { | 1820 | if (count + J->tailcalled > J->param[JIT_P_recunroll]) { |
1410 | J->pc++; | 1821 | J->pc++; |
1411 | if (J->framedepth + J->retdepth == 0) | 1822 | if (J->framedepth + J->retdepth == 0) |
1412 | rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-recursion. */ | 1823 | lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-rec. */ |
1413 | else | 1824 | else |
1414 | rec_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */ | 1825 | lj_record_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */ |
1415 | } | 1826 | } |
1416 | } else { | 1827 | } else { |
1417 | if (count > J->param[JIT_P_callunroll]) { | 1828 | if (count > J->param[JIT_P_callunroll]) { |
1418 | if (lnk) { /* Possible tail- or up-recursion. */ | 1829 | if (lnk) { /* Possible tail- or up-recursion. */ |
1419 | lj_trace_flush(J, lnk); /* Flush trace that only returns. */ | 1830 | lj_trace_flush(J, lnk); /* Flush trace that only returns. */ |
1420 | /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */ | 1831 | /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */ |
1421 | hotcount_set(J2GG(J), J->pc+1, LJ_PRNG_BITS(J, 4)); | 1832 | hotcount_set(J2GG(J), J->pc+1, lj_prng_u64(&J2G(J)->prng) & 15u); |
1422 | } | 1833 | } |
1423 | lj_trace_err(J, LJ_TRERR_CUNROLL); | 1834 | lj_trace_err(J, LJ_TRERR_CUNROLL); |
1424 | } | 1835 | } |
@@ -1445,11 +1856,14 @@ static void rec_func_setup(jit_State *J) | |||
1445 | static void rec_func_vararg(jit_State *J) | 1856 | static void rec_func_vararg(jit_State *J) |
1446 | { | 1857 | { |
1447 | GCproto *pt = J->pt; | 1858 | GCproto *pt = J->pt; |
1448 | BCReg s, fixargs, vframe = J->maxslot+1; | 1859 | BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2; |
1449 | lua_assert((pt->flags & PROTO_VARARG)); | 1860 | lj_assertJ((pt->flags & PROTO_VARARG), "FUNCV in non-vararg function"); |
1450 | if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) | 1861 | if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) |
1451 | lj_trace_err(J, LJ_TRERR_STACKOV); | 1862 | lj_trace_err(J, LJ_TRERR_STACKOV); |
1452 | J->base[vframe-1] = J->base[-1]; /* Copy function up. */ | 1863 | J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */ |
1864 | #if LJ_FR2 | ||
1865 | J->base[vframe-1] = TREF_FRAME; | ||
1866 | #endif | ||
1453 | /* Copy fixarg slots up and set their original slots to nil. */ | 1867 | /* Copy fixarg slots up and set their original slots to nil. */ |
1454 | fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; | 1868 | fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; |
1455 | for (s = 0; s < fixargs; s++) { | 1869 | for (s = 0; s < fixargs; s++) { |
@@ -1485,9 +1899,9 @@ static void rec_func_jit(jit_State *J, TraceNo lnk) | |||
1485 | } | 1899 | } |
1486 | J->instunroll = 0; /* Cannot continue across a compiled function. */ | 1900 | J->instunroll = 0; /* Cannot continue across a compiled function. */ |
1487 | if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) | 1901 | if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) |
1488 | rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-recursion. */ | 1902 | lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-rec. */ |
1489 | else | 1903 | else |
1490 | rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */ | 1904 | lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */ |
1491 | } | 1905 | } |
1492 | 1906 | ||
1493 | /* -- Vararg handling ----------------------------------------------------- */ | 1907 | /* -- Vararg handling ----------------------------------------------------- */ |
@@ -1511,8 +1925,10 @@ static int select_detect(jit_State *J) | |||
1511 | static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) | 1925 | static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) |
1512 | { | 1926 | { |
1513 | int32_t numparams = J->pt->numparams; | 1927 | int32_t numparams = J->pt->numparams; |
1514 | ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1; | 1928 | ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2; |
1515 | lua_assert(frame_isvarg(J->L->base-1)); | 1929 | lj_assertJ(frame_isvarg(J->L->base-1), "VARG in non-vararg frame"); |
1930 | if (LJ_FR2 && dst > J->maxslot) | ||
1931 | J->base[dst-1] = 0; /* Prevent resurrection of unrelated slot. */ | ||
1516 | if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ | 1932 | if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ |
1517 | ptrdiff_t i; | 1933 | ptrdiff_t i; |
1518 | if (nvararg < 0) nvararg = 0; | 1934 | if (nvararg < 0) nvararg = 0; |
@@ -1523,10 +1939,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) | |||
1523 | J->maxslot = dst + (BCReg)nresults; | 1939 | J->maxslot = dst + (BCReg)nresults; |
1524 | } | 1940 | } |
1525 | for (i = 0; i < nresults; i++) | 1941 | for (i = 0; i < nresults; i++) |
1526 | J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL; | 1942 | J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL; |
1527 | } else { /* Unknown number of varargs passed to trace. */ | 1943 | } else { /* Unknown number of varargs passed to trace. */ |
1528 | TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME); | 1944 | TRef fr = emitir(IRTI(IR_SLOAD), LJ_FR2, IRSLOAD_READONLY|IRSLOAD_FRAME); |
1529 | int32_t frofs = 8*(1+numparams)+FRAME_VARG; | 1945 | int32_t frofs = 8*(1+LJ_FR2+numparams)+FRAME_VARG; |
1530 | if (nresults >= 0) { /* Known fixed number of results. */ | 1946 | if (nresults >= 0) { /* Known fixed number of results. */ |
1531 | ptrdiff_t i; | 1947 | ptrdiff_t i; |
1532 | if (nvararg > 0) { | 1948 | if (nvararg > 0) { |
@@ -1535,16 +1951,13 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) | |||
1535 | if (nvararg >= nresults) | 1951 | if (nvararg >= nresults) |
1536 | emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults)); | 1952 | emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults)); |
1537 | else | 1953 | else |
1538 | emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, frame_ftsz(J->L->base-1))); | 1954 | emitir(IRTGI(IR_EQ), fr, |
1539 | vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); | 1955 | lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1))); |
1540 | vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); | 1956 | vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); |
1957 | vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8)); | ||
1541 | for (i = 0; i < nload; i++) { | 1958 | for (i = 0; i < nload; i++) { |
1542 | IRType t = itype2irt(&J->L->base[i-1-nvararg]); | 1959 | IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]); |
1543 | TRef aref = emitir(IRT(IR_AREF, IRT_P32), | 1960 | J->base[dst+i] = lj_record_vload(J, vbase, i, t); |
1544 | vbase, lj_ir_kint(J, (int32_t)i)); | ||
1545 | TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); | ||
1546 | if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ | ||
1547 | J->base[dst+i] = tr; | ||
1548 | } | 1961 | } |
1549 | } else { | 1962 | } else { |
1550 | emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs)); | 1963 | emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs)); |
@@ -1586,15 +1999,15 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) | |||
1586 | } | 1999 | } |
1587 | if (idx != 0 && idx <= nvararg) { | 2000 | if (idx != 0 && idx <= nvararg) { |
1588 | IRType t; | 2001 | IRType t; |
1589 | TRef aref, vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); | 2002 | TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); |
1590 | vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); | 2003 | vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, |
1591 | t = itype2irt(&J->L->base[idx-2-nvararg]); | 2004 | lj_ir_kint(J, frofs-(8<<LJ_FR2))); |
1592 | aref = emitir(IRT(IR_AREF, IRT_P32), vbase, tridx); | 2005 | t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]); |
1593 | tr = emitir(IRTG(IR_VLOAD, t), aref, 0); | 2006 | aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx); |
1594 | if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ | 2007 | tr = lj_record_vload(J, aref, 0, t); |
1595 | } | 2008 | } |
1596 | J->base[dst-2] = tr; | 2009 | J->base[dst-2-LJ_FR2] = tr; |
1597 | J->maxslot = dst-1; | 2010 | J->maxslot = dst-1-LJ_FR2; |
1598 | J->bcskip = 2; /* Skip CALLM + select. */ | 2011 | J->bcskip = 2; /* Skip CALLM + select. */ |
1599 | } else { | 2012 | } else { |
1600 | nyivarg: | 2013 | nyivarg: |
@@ -1612,8 +2025,63 @@ static TRef rec_tnew(jit_State *J, uint32_t ah) | |||
1612 | { | 2025 | { |
1613 | uint32_t asize = ah & 0x7ff; | 2026 | uint32_t asize = ah & 0x7ff; |
1614 | uint32_t hbits = ah >> 11; | 2027 | uint32_t hbits = ah >> 11; |
2028 | TRef tr; | ||
1615 | if (asize == 0x7ff) asize = 0x801; | 2029 | if (asize == 0x7ff) asize = 0x801; |
1616 | return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits); | 2030 | tr = emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits); |
2031 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
2032 | J->rbchash[(tr & (RBCHASH_SLOTS-1))].ref = tref_ref(tr); | ||
2033 | setmref(J->rbchash[(tr & (RBCHASH_SLOTS-1))].pc, J->pc); | ||
2034 | setgcref(J->rbchash[(tr & (RBCHASH_SLOTS-1))].pt, obj2gco(J->pt)); | ||
2035 | #endif | ||
2036 | return tr; | ||
2037 | } | ||
2038 | |||
2039 | /* -- Concatenation ------------------------------------------------------- */ | ||
2040 | |||
2041 | static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot) | ||
2042 | { | ||
2043 | TRef *top = &J->base[topslot]; | ||
2044 | TValue savetv[5]; | ||
2045 | BCReg s; | ||
2046 | RecordIndex ix; | ||
2047 | lj_assertJ(baseslot < topslot, "bad CAT arg"); | ||
2048 | for (s = baseslot; s <= topslot; s++) | ||
2049 | (void)getslot(J, s); /* Ensure all arguments have a reference. */ | ||
2050 | if (tref_isnumber_str(top[0]) && tref_isnumber_str(top[-1])) { | ||
2051 | TRef tr, hdr, *trp, *xbase, *base = &J->base[baseslot]; | ||
2052 | /* First convert numbers to strings. */ | ||
2053 | for (trp = top; trp >= base; trp--) { | ||
2054 | if (tref_isnumber(*trp)) | ||
2055 | *trp = emitir(IRT(IR_TOSTR, IRT_STR), *trp, | ||
2056 | tref_isnum(*trp) ? IRTOSTR_NUM : IRTOSTR_INT); | ||
2057 | else if (!tref_isstr(*trp)) | ||
2058 | break; | ||
2059 | } | ||
2060 | xbase = ++trp; | ||
2061 | tr = hdr = emitir(IRT(IR_BUFHDR, IRT_PGC), | ||
2062 | lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); | ||
2063 | do { | ||
2064 | tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, *trp++); | ||
2065 | } while (trp <= top); | ||
2066 | tr = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); | ||
2067 | J->maxslot = (BCReg)(xbase - J->base); | ||
2068 | if (xbase == base) return tr; /* Return simple concatenation result. */ | ||
2069 | /* Pass partial result. */ | ||
2070 | topslot = J->maxslot--; | ||
2071 | *xbase = tr; | ||
2072 | top = xbase; | ||
2073 | setstrV(J->L, &ix.keyv, &J2G(J)->strempty); /* Simulate string result. */ | ||
2074 | } else { | ||
2075 | J->maxslot = topslot-1; | ||
2076 | copyTV(J->L, &ix.keyv, &J->L->base[topslot]); | ||
2077 | } | ||
2078 | copyTV(J->L, &ix.tabv, &J->L->base[topslot-1]); | ||
2079 | ix.tab = top[-1]; | ||
2080 | ix.key = top[0]; | ||
2081 | memcpy(savetv, &J->L->base[topslot-1], sizeof(savetv)); /* Save slots. */ | ||
2082 | rec_mm_arith(J, &ix, MM_concat); /* Call __concat metamethod. */ | ||
2083 | memcpy(&J->L->base[topslot-1], savetv, sizeof(savetv)); /* Restore slots. */ | ||
2084 | return 0; /* No result yet. */ | ||
1617 | } | 2085 | } |
1618 | 2086 | ||
1619 | /* -- Record bytecode ops ------------------------------------------------- */ | 2087 | /* -- Record bytecode ops ------------------------------------------------- */ |
@@ -1634,7 +2102,15 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond) | |||
1634 | const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); | 2102 | const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); |
1635 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; | 2103 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; |
1636 | /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ | 2104 | /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ |
2105 | #if LJ_FR2 | ||
2106 | SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent]; | ||
2107 | uint64_t pcbase; | ||
2108 | memcpy(&pcbase, flink, sizeof(uint64_t)); | ||
2109 | pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8); | ||
2110 | memcpy(flink, &pcbase, sizeof(uint64_t)); | ||
2111 | #else | ||
1637 | J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); | 2112 | J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); |
2113 | #endif | ||
1638 | J->needsnap = 1; | 2114 | J->needsnap = 1; |
1639 | if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); | 2115 | if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); |
1640 | lj_snap_shrink(J); /* Shrink last snapshot if possible. */ | 2116 | lj_snap_shrink(J); /* Shrink last snapshot if possible. */ |
@@ -1654,7 +2130,7 @@ void lj_record_ins(jit_State *J) | |||
1654 | if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) { | 2130 | if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) { |
1655 | switch (J->postproc) { | 2131 | switch (J->postproc) { |
1656 | case LJ_POST_FIXCOMP: /* Fixup comparison. */ | 2132 | case LJ_POST_FIXCOMP: /* Fixup comparison. */ |
1657 | pc = frame_pc(&J2G(J)->tmptv); | 2133 | pc = (const BCIns *)(uintptr_t)J2G(J)->tmptv.u64; |
1658 | rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1))); | 2134 | rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1))); |
1659 | /* fallthrough */ | 2135 | /* fallthrough */ |
1660 | case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */ | 2136 | case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */ |
@@ -1692,7 +2168,7 @@ void lj_record_ins(jit_State *J) | |||
1692 | if (bc_op(*J->pc) >= BC__MAX) | 2168 | if (bc_op(*J->pc) >= BC__MAX) |
1693 | return; | 2169 | return; |
1694 | break; | 2170 | break; |
1695 | default: lua_assert(0); break; | 2171 | default: lj_assertJ(0, "bad post-processing mode"); break; |
1696 | } | 2172 | } |
1697 | J->postproc = LJ_POST_NONE; | 2173 | J->postproc = LJ_POST_NONE; |
1698 | } | 2174 | } |
@@ -1700,7 +2176,7 @@ void lj_record_ins(jit_State *J) | |||
1700 | /* Need snapshot before recording next bytecode (e.g. after a store). */ | 2176 | /* Need snapshot before recording next bytecode (e.g. after a store). */ |
1701 | if (J->needsnap) { | 2177 | if (J->needsnap) { |
1702 | J->needsnap = 0; | 2178 | J->needsnap = 0; |
1703 | lj_snap_purge(J); | 2179 | if (J->pt) lj_snap_purge(J); |
1704 | lj_snap_add(J); | 2180 | lj_snap_add(J); |
1705 | J->mergesnap = 1; | 2181 | J->mergesnap = 1; |
1706 | } | 2182 | } |
@@ -1722,6 +2198,10 @@ void lj_record_ins(jit_State *J) | |||
1722 | rec_check_ir(J); | 2198 | rec_check_ir(J); |
1723 | #endif | 2199 | #endif |
1724 | 2200 | ||
2201 | #if LJ_HASPROFILE | ||
2202 | rec_profile_ins(J, pc); | ||
2203 | #endif | ||
2204 | |||
1725 | /* Keep a copy of the runtime values of var/num/str operands. */ | 2205 | /* Keep a copy of the runtime values of var/num/str operands. */ |
1726 | #define rav (&ix.valv) | 2206 | #define rav (&ix.valv) |
1727 | #define rbv (&ix.tabv) | 2207 | #define rbv (&ix.tabv) |
@@ -1748,7 +2228,7 @@ void lj_record_ins(jit_State *J) | |||
1748 | switch (bcmode_c(op)) { | 2228 | switch (bcmode_c(op)) { |
1749 | case BCMvar: | 2229 | case BCMvar: |
1750 | copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; | 2230 | copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; |
1751 | case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; | 2231 | case BCMpri: setpriV(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; |
1752 | case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc); | 2232 | case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc); |
1753 | copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) : | 2233 | copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) : |
1754 | lj_ir_knumint(J, numV(tv)); } break; | 2234 | lj_ir_knumint(J, numV(tv)); } break; |
@@ -1843,6 +2323,18 @@ void lj_record_ins(jit_State *J) | |||
1843 | J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ | 2323 | J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ |
1844 | break; | 2324 | break; |
1845 | 2325 | ||
2326 | case BC_ISTYPE: case BC_ISNUM: | ||
2327 | /* These coercions need to correspond with lj_meta_istype(). */ | ||
2328 | if (LJ_DUALNUM && rc == ~LJ_TNUMX+1) | ||
2329 | ra = lj_opt_narrow_toint(J, ra); | ||
2330 | else if (rc == ~LJ_TNUMX+2) | ||
2331 | ra = lj_ir_tonum(J, ra); | ||
2332 | else if (rc == ~LJ_TSTR+1) | ||
2333 | ra = lj_ir_tostr(J, ra); | ||
2334 | /* else: type specialization suffices. */ | ||
2335 | J->base[bc_a(ins)] = ra; | ||
2336 | break; | ||
2337 | |||
1846 | /* -- Unary ops --------------------------------------------------------- */ | 2338 | /* -- Unary ops --------------------------------------------------------- */ |
1847 | 2339 | ||
1848 | case BC_NOT: | 2340 | case BC_NOT: |
@@ -1854,7 +2346,7 @@ void lj_record_ins(jit_State *J) | |||
1854 | if (tref_isstr(rc)) | 2346 | if (tref_isstr(rc)) |
1855 | rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); | 2347 | rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); |
1856 | else if (!LJ_52 && tref_istab(rc)) | 2348 | else if (!LJ_52 && tref_istab(rc)) |
1857 | rc = lj_ir_call(J, IRCALL_lj_tab_len, rc); | 2349 | rc = emitir(IRTI(IR_ALEN), rc, TREF_NIL); |
1858 | else | 2350 | else |
1859 | rc = rec_mm_len(J, rc, rcv); | 2351 | rc = rec_mm_len(J, rc, rcv); |
1860 | break; | 2352 | break; |
@@ -1906,11 +2398,23 @@ void lj_record_ins(jit_State *J) | |||
1906 | rc = rec_mm_arith(J, &ix, MM_pow); | 2398 | rc = rec_mm_arith(J, &ix, MM_pow); |
1907 | break; | 2399 | break; |
1908 | 2400 | ||
2401 | /* -- Miscellaneous ops ------------------------------------------------- */ | ||
2402 | |||
2403 | case BC_CAT: | ||
2404 | rc = rec_cat(J, rb, rc); | ||
2405 | break; | ||
2406 | |||
1909 | /* -- Constant and move ops --------------------------------------------- */ | 2407 | /* -- Constant and move ops --------------------------------------------- */ |
1910 | 2408 | ||
1911 | case BC_MOV: | 2409 | case BC_MOV: |
1912 | /* Clear gap of method call to avoid resurrecting previous refs. */ | 2410 | /* Clear gap of method call to avoid resurrecting previous refs. */ |
1913 | if (ra > J->maxslot) J->base[ra-1] = 0; | 2411 | if (ra > J->maxslot) { |
2412 | #if LJ_FR2 | ||
2413 | memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef)); | ||
2414 | #else | ||
2415 | J->base[ra-1] = 0; | ||
2416 | #endif | ||
2417 | } | ||
1914 | break; | 2418 | break; |
1915 | case BC_KSTR: case BC_KNUM: case BC_KPRI: | 2419 | case BC_KSTR: case BC_KNUM: case BC_KPRI: |
1916 | break; | 2420 | break; |
@@ -1918,6 +2422,8 @@ void lj_record_ins(jit_State *J) | |||
1918 | rc = lj_ir_kint(J, (int32_t)(int16_t)rc); | 2422 | rc = lj_ir_kint(J, (int32_t)(int16_t)rc); |
1919 | break; | 2423 | break; |
1920 | case BC_KNIL: | 2424 | case BC_KNIL: |
2425 | if (LJ_FR2 && ra > J->maxslot) | ||
2426 | J->base[ra-1] = 0; | ||
1921 | while (ra <= rc) | 2427 | while (ra <= rc) |
1922 | J->base[ra++] = TREF_NIL; | 2428 | J->base[ra++] = TREF_NIL; |
1923 | if (rc >= J->maxslot) J->maxslot = rc+1; | 2429 | if (rc >= J->maxslot) J->maxslot = rc+1; |
@@ -1954,6 +2460,14 @@ void lj_record_ins(jit_State *J) | |||
1954 | ix.idxchain = LJ_MAX_IDXCHAIN; | 2460 | ix.idxchain = LJ_MAX_IDXCHAIN; |
1955 | rc = lj_record_idx(J, &ix); | 2461 | rc = lj_record_idx(J, &ix); |
1956 | break; | 2462 | break; |
2463 | case BC_TGETR: case BC_TSETR: | ||
2464 | ix.idxchain = 0; | ||
2465 | rc = lj_record_idx(J, &ix); | ||
2466 | break; | ||
2467 | |||
2468 | case BC_TSETM: | ||
2469 | rec_tsetm(J, ra, (BCReg)(J->L->top - J->L->base), (int32_t)rcv->u32.lo); | ||
2470 | break; | ||
1957 | 2471 | ||
1958 | case BC_TNEW: | 2472 | case BC_TNEW: |
1959 | rc = rec_tnew(J, rc); | 2473 | rc = rec_tnew(J, rc); |
@@ -1961,33 +2475,38 @@ void lj_record_ins(jit_State *J) | |||
1961 | case BC_TDUP: | 2475 | case BC_TDUP: |
1962 | rc = emitir(IRTG(IR_TDUP, IRT_TAB), | 2476 | rc = emitir(IRTG(IR_TDUP, IRT_TAB), |
1963 | lj_ir_ktab(J, gco2tab(proto_kgc(J->pt, ~(ptrdiff_t)rc))), 0); | 2477 | lj_ir_ktab(J, gco2tab(proto_kgc(J->pt, ~(ptrdiff_t)rc))), 0); |
2478 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
2479 | J->rbchash[(rc & (RBCHASH_SLOTS-1))].ref = tref_ref(rc); | ||
2480 | setmref(J->rbchash[(rc & (RBCHASH_SLOTS-1))].pc, pc); | ||
2481 | setgcref(J->rbchash[(rc & (RBCHASH_SLOTS-1))].pt, obj2gco(J->pt)); | ||
2482 | #endif | ||
1964 | break; | 2483 | break; |
1965 | 2484 | ||
1966 | /* -- Calls and vararg handling ----------------------------------------- */ | 2485 | /* -- Calls and vararg handling ----------------------------------------- */ |
1967 | 2486 | ||
1968 | case BC_ITERC: | 2487 | case BC_ITERC: |
1969 | J->base[ra] = getslot(J, ra-3); | 2488 | J->base[ra] = getslot(J, ra-3); |
1970 | J->base[ra+1] = getslot(J, ra-2); | 2489 | J->base[ra+1+LJ_FR2] = getslot(J, ra-2); |
1971 | J->base[ra+2] = getslot(J, ra-1); | 2490 | J->base[ra+2+LJ_FR2] = getslot(J, ra-1); |
1972 | { /* Do the actual copy now because lj_record_call needs the values. */ | 2491 | { /* Do the actual copy now because lj_record_call needs the values. */ |
1973 | TValue *b = &J->L->base[ra]; | 2492 | TValue *b = &J->L->base[ra]; |
1974 | copyTV(J->L, b, b-3); | 2493 | copyTV(J->L, b, b-3); |
1975 | copyTV(J->L, b+1, b-2); | 2494 | copyTV(J->L, b+1+LJ_FR2, b-2); |
1976 | copyTV(J->L, b+2, b-1); | 2495 | copyTV(J->L, b+2+LJ_FR2, b-1); |
1977 | } | 2496 | } |
1978 | lj_record_call(J, ra, (ptrdiff_t)rc-1); | 2497 | lj_record_call(J, ra, (ptrdiff_t)rc-1); |
1979 | break; | 2498 | break; |
1980 | 2499 | ||
1981 | /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */ | 2500 | /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */ |
1982 | case BC_CALLM: | 2501 | case BC_CALLM: |
1983 | rc = (BCReg)(J->L->top - J->L->base) - ra; | 2502 | rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2; |
1984 | /* fallthrough */ | 2503 | /* fallthrough */ |
1985 | case BC_CALL: | 2504 | case BC_CALL: |
1986 | lj_record_call(J, ra, (ptrdiff_t)rc-1); | 2505 | lj_record_call(J, ra, (ptrdiff_t)rc-1); |
1987 | break; | 2506 | break; |
1988 | 2507 | ||
1989 | case BC_CALLMT: | 2508 | case BC_CALLMT: |
1990 | rc = (BCReg)(J->L->top - J->L->base) - ra; | 2509 | rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2; |
1991 | /* fallthrough */ | 2510 | /* fallthrough */ |
1992 | case BC_CALLT: | 2511 | case BC_CALLT: |
1993 | lj_record_tailcall(J, ra, (ptrdiff_t)rc-1); | 2512 | lj_record_tailcall(J, ra, (ptrdiff_t)rc-1); |
@@ -2004,6 +2523,9 @@ void lj_record_ins(jit_State *J) | |||
2004 | rc = (BCReg)(J->L->top - J->L->base) - ra + 1; | 2523 | rc = (BCReg)(J->L->top - J->L->base) - ra + 1; |
2005 | /* fallthrough */ | 2524 | /* fallthrough */ |
2006 | case BC_RET: case BC_RET0: case BC_RET1: | 2525 | case BC_RET: case BC_RET0: case BC_RET1: |
2526 | #if LJ_HASPROFILE | ||
2527 | rec_profile_ret(J); | ||
2528 | #endif | ||
2007 | lj_record_ret(J, ra, (ptrdiff_t)rc-1); | 2529 | lj_record_ret(J, ra, (ptrdiff_t)rc-1); |
2008 | break; | 2530 | break; |
2009 | 2531 | ||
@@ -2014,9 +2536,10 @@ void lj_record_ins(jit_State *J) | |||
2014 | J->loopref = J->cur.nins; | 2536 | J->loopref = J->cur.nins; |
2015 | break; | 2537 | break; |
2016 | case BC_JFORI: | 2538 | case BC_JFORI: |
2017 | lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); | 2539 | lj_assertJ(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL, |
2540 | "JFORI does not point to JFORL"); | ||
2018 | if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ | 2541 | if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ |
2019 | rec_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); | 2542 | lj_record_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); |
2020 | /* Continue tracing if the loop is not entered. */ | 2543 | /* Continue tracing if the loop is not entered. */ |
2021 | break; | 2544 | break; |
2022 | 2545 | ||
@@ -2026,6 +2549,9 @@ void lj_record_ins(jit_State *J) | |||
2026 | case BC_ITERL: | 2549 | case BC_ITERL: |
2027 | rec_loop_interp(J, pc, rec_iterl(J, *pc)); | 2550 | rec_loop_interp(J, pc, rec_iterl(J, *pc)); |
2028 | break; | 2551 | break; |
2552 | case BC_ITERN: | ||
2553 | rec_loop_interp(J, pc, rec_itern(J, ra, rb)); | ||
2554 | break; | ||
2029 | case BC_LOOP: | 2555 | case BC_LOOP: |
2030 | rec_loop_interp(J, pc, rec_loop(J, ra, 1)); | 2556 | rec_loop_interp(J, pc, rec_loop(J, ra, 1)); |
2031 | break; | 2557 | break; |
@@ -2054,6 +2580,10 @@ void lj_record_ins(jit_State *J) | |||
2054 | J->maxslot = ra; /* Shrink used slots. */ | 2580 | J->maxslot = ra; /* Shrink used slots. */ |
2055 | break; | 2581 | break; |
2056 | 2582 | ||
2583 | case BC_ISNEXT: | ||
2584 | rec_isnext(J, ra); | ||
2585 | break; | ||
2586 | |||
2057 | /* -- Function headers -------------------------------------------------- */ | 2587 | /* -- Function headers -------------------------------------------------- */ |
2058 | 2588 | ||
2059 | case BC_FUNCF: | 2589 | case BC_FUNCF: |
@@ -2068,7 +2598,8 @@ void lj_record_ins(jit_State *J) | |||
2068 | rec_func_lua(J); | 2598 | rec_func_lua(J); |
2069 | break; | 2599 | break; |
2070 | case BC_JFUNCV: | 2600 | case BC_JFUNCV: |
2071 | lua_assert(0); /* Cannot happen. No hotcall counting for varag funcs. */ | 2601 | /* Cannot happen. No hotcall counting for varag funcs. */ |
2602 | lj_assertJ(0, "unsupported vararg hotcall"); | ||
2072 | break; | 2603 | break; |
2073 | 2604 | ||
2074 | case BC_FUNCC: | 2605 | case BC_FUNCC: |
@@ -2082,12 +2613,8 @@ void lj_record_ins(jit_State *J) | |||
2082 | break; | 2613 | break; |
2083 | } | 2614 | } |
2084 | /* fallthrough */ | 2615 | /* fallthrough */ |
2085 | case BC_ITERN: | ||
2086 | case BC_ISNEXT: | ||
2087 | case BC_CAT: | ||
2088 | case BC_UCLO: | 2616 | case BC_UCLO: |
2089 | case BC_FNEW: | 2617 | case BC_FNEW: |
2090 | case BC_TSETM: | ||
2091 | setintV(&J->errinfo, (int32_t)op); | 2618 | setintV(&J->errinfo, (int32_t)op); |
2092 | lj_trace_err_info(J, LJ_TRERR_NYIBC); | 2619 | lj_trace_err_info(J, LJ_TRERR_NYIBC); |
2093 | break; | 2620 | break; |
@@ -2096,15 +2623,21 @@ void lj_record_ins(jit_State *J) | |||
2096 | /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ | 2623 | /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ |
2097 | if (bcmode_a(op) == BCMdst && rc) { | 2624 | if (bcmode_a(op) == BCMdst && rc) { |
2098 | J->base[ra] = rc; | 2625 | J->base[ra] = rc; |
2099 | if (ra >= J->maxslot) J->maxslot = ra+1; | 2626 | if (ra >= J->maxslot) { |
2627 | #if LJ_FR2 | ||
2628 | if (ra > J->maxslot) J->base[ra-1] = 0; | ||
2629 | #endif | ||
2630 | J->maxslot = ra+1; | ||
2631 | } | ||
2100 | } | 2632 | } |
2101 | 2633 | ||
2102 | #undef rav | 2634 | #undef rav |
2103 | #undef rbv | 2635 | #undef rbv |
2104 | #undef rcv | 2636 | #undef rcv |
2105 | 2637 | ||
2106 | /* Limit the number of recorded IR instructions. */ | 2638 | /* Limit the number of recorded IR instructions and constants. */ |
2107 | if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord]) | 2639 | if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord] || |
2640 | J->cur.nk < REF_BIAS-(IRRef)J->param[JIT_P_maxirconst]) | ||
2108 | lj_trace_err(J, LJ_TRERR_TRACEOV); | 2641 | lj_trace_err(J, LJ_TRERR_TRACEOV); |
2109 | } | 2642 | } |
2110 | 2643 | ||
@@ -2124,13 +2657,20 @@ static const BCIns *rec_setup_root(jit_State *J) | |||
2124 | J->bc_min = pc; | 2657 | J->bc_min = pc; |
2125 | break; | 2658 | break; |
2126 | case BC_ITERL: | 2659 | case BC_ITERL: |
2127 | lua_assert(bc_op(pc[-1]) == BC_ITERC); | 2660 | lj_assertJ(bc_op(pc[-1]) == BC_ITERC, "no ITERC before ITERL"); |
2128 | J->maxslot = ra + bc_b(pc[-1]) - 1; | 2661 | J->maxslot = ra + bc_b(pc[-1]) - 1; |
2129 | J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); | 2662 | J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); |
2130 | pc += 1+bc_j(ins); | 2663 | pc += 1+bc_j(ins); |
2131 | lua_assert(bc_op(pc[-1]) == BC_JMP); | 2664 | lj_assertJ(bc_op(pc[-1]) == BC_JMP, "ITERL does not point to JMP+1"); |
2132 | J->bc_min = pc; | 2665 | J->bc_min = pc; |
2133 | break; | 2666 | break; |
2667 | case BC_ITERN: | ||
2668 | lj_assertJ(bc_op(pc[1]) == BC_ITERL, "no ITERL after ITERN"); | ||
2669 | J->maxslot = ra; | ||
2670 | J->bc_extent = (MSize)(-bc_j(pc[1]))*sizeof(BCIns); | ||
2671 | J->bc_min = pc+2 + bc_j(pc[1]); | ||
2672 | J->state = LJ_TRACE_RECORD_1ST; /* Record the first ITERN, too. */ | ||
2673 | break; | ||
2134 | case BC_LOOP: | 2674 | case BC_LOOP: |
2135 | /* Only check BC range for real loops, but not for "repeat until true". */ | 2675 | /* Only check BC range for real loops, but not for "repeat until true". */ |
2136 | pcj = pc + bc_j(ins); | 2676 | pcj = pc + bc_j(ins); |
@@ -2153,8 +2693,14 @@ static const BCIns *rec_setup_root(jit_State *J) | |||
2153 | J->maxslot = J->pt->numparams; | 2693 | J->maxslot = J->pt->numparams; |
2154 | pc++; | 2694 | pc++; |
2155 | break; | 2695 | break; |
2696 | case BC_CALLM: | ||
2697 | case BC_CALL: | ||
2698 | case BC_ITERC: | ||
2699 | /* No bytecode range check for stitched traces. */ | ||
2700 | pc++; | ||
2701 | break; | ||
2156 | default: | 2702 | default: |
2157 | lua_assert(0); | 2703 | lj_assertJ(0, "bad root trace start bytecode %d", bc_op(ins)); |
2158 | break; | 2704 | break; |
2159 | } | 2705 | } |
2160 | return pc; | 2706 | return pc; |
@@ -2168,11 +2714,14 @@ void lj_record_setup(jit_State *J) | |||
2168 | /* Initialize state related to current trace. */ | 2714 | /* Initialize state related to current trace. */ |
2169 | memset(J->slot, 0, sizeof(J->slot)); | 2715 | memset(J->slot, 0, sizeof(J->slot)); |
2170 | memset(J->chain, 0, sizeof(J->chain)); | 2716 | memset(J->chain, 0, sizeof(J->chain)); |
2717 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
2718 | memset(J->rbchash, 0, sizeof(J->rbchash)); | ||
2719 | #endif | ||
2171 | memset(J->bpropcache, 0, sizeof(J->bpropcache)); | 2720 | memset(J->bpropcache, 0, sizeof(J->bpropcache)); |
2172 | J->scev.idx = REF_NIL; | 2721 | J->scev.idx = REF_NIL; |
2173 | setmref(J->scev.pc, NULL); | 2722 | setmref(J->scev.pc, NULL); |
2174 | 2723 | ||
2175 | J->baseslot = 1; /* Invoking function is at base[-1]. */ | 2724 | J->baseslot = 1+LJ_FR2; /* Invoking function is at base[-1-LJ_FR2]. */ |
2176 | J->base = J->slot + J->baseslot; | 2725 | J->base = J->slot + J->baseslot; |
2177 | J->maxslot = 0; | 2726 | J->maxslot = 0; |
2178 | J->framedepth = 0; | 2727 | J->framedepth = 0; |
@@ -2187,7 +2736,7 @@ void lj_record_setup(jit_State *J) | |||
2187 | J->bc_extent = ~(MSize)0; | 2736 | J->bc_extent = ~(MSize)0; |
2188 | 2737 | ||
2189 | /* Emit instructions for fixed references. Also triggers initial IR alloc. */ | 2738 | /* Emit instructions for fixed references. Also triggers initial IR alloc. */ |
2190 | emitir_raw(IRT(IR_BASE, IRT_P32), J->parent, J->exitno); | 2739 | emitir_raw(IRT(IR_BASE, IRT_PGC), J->parent, J->exitno); |
2191 | for (i = 0; i <= 2; i++) { | 2740 | for (i = 0; i <= 2; i++) { |
2192 | IRIns *ir = IR(REF_NIL-i); | 2741 | IRIns *ir = IR(REF_NIL-i); |
2193 | ir->i = 0; | 2742 | ir->i = 0; |
@@ -2218,10 +2767,15 @@ void lj_record_setup(jit_State *J) | |||
2218 | } | 2767 | } |
2219 | lj_snap_replay(J, T); | 2768 | lj_snap_replay(J, T); |
2220 | sidecheck: | 2769 | sidecheck: |
2221 | if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || | 2770 | if ((traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || |
2222 | T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + | 2771 | T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + |
2223 | J->param[JIT_P_tryside]) { | 2772 | J->param[JIT_P_tryside])) { |
2224 | rec_stop(J, LJ_TRLINK_INTERP, 0); | 2773 | if (bc_op(*J->pc) == BC_JLOOP) { |
2774 | BCIns startins = traceref(J, bc_d(*J->pc))->startins; | ||
2775 | if (bc_op(startins) == BC_ITERN) | ||
2776 | rec_itern(J, bc_a(startins), bc_b(startins)); | ||
2777 | } | ||
2778 | lj_record_stop(J, LJ_TRLINK_INTERP, 0); | ||
2225 | } | 2779 | } |
2226 | } else { /* Root trace. */ | 2780 | } else { /* Root trace. */ |
2227 | J->cur.root = 0; | 2781 | J->cur.root = 0; |
@@ -2229,13 +2783,20 @@ void lj_record_setup(jit_State *J) | |||
2229 | J->pc = rec_setup_root(J); | 2783 | J->pc = rec_setup_root(J); |
2230 | /* Note: the loop instruction itself is recorded at the end and not | 2784 | /* Note: the loop instruction itself is recorded at the end and not |
2231 | ** at the start! So snapshot #0 needs to point to the *next* instruction. | 2785 | ** at the start! So snapshot #0 needs to point to the *next* instruction. |
2786 | ** The one exception is BC_ITERN, which sets LJ_TRACE_RECORD_1ST. | ||
2232 | */ | 2787 | */ |
2233 | lj_snap_add(J); | 2788 | lj_snap_add(J); |
2234 | if (bc_op(J->cur.startins) == BC_FORL) | 2789 | if (bc_op(J->cur.startins) == BC_FORL) |
2235 | rec_for_loop(J, J->pc-1, &J->scev, 1); | 2790 | rec_for_loop(J, J->pc-1, &J->scev, 1); |
2791 | else if (bc_op(J->cur.startins) == BC_ITERC) | ||
2792 | J->startpc = NULL; | ||
2236 | if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) | 2793 | if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) |
2237 | lj_trace_err(J, LJ_TRERR_STACKOV); | 2794 | lj_trace_err(J, LJ_TRERR_STACKOV); |
2238 | } | 2795 | } |
2796 | #if LJ_HASPROFILE | ||
2797 | J->prev_pt = NULL; | ||
2798 | J->prev_line = -1; | ||
2799 | #endif | ||
2239 | #ifdef LUAJIT_ENABLE_CHECKHOOK | 2800 | #ifdef LUAJIT_ENABLE_CHECKHOOK |
2240 | /* Regularly check for instruction/line hooks from compiled code and | 2801 | /* Regularly check for instruction/line hooks from compiled code and |
2241 | ** exit to the interpreter if the hooks are set. | 2802 | ** exit to the interpreter if the hooks are set. |
diff --git a/src/lj_record.h b/src/lj_record.h index 4b180fc2..ab2f4c8d 100644 --- a/src/lj_record.h +++ b/src/lj_record.h | |||
@@ -28,7 +28,9 @@ typedef struct RecordIndex { | |||
28 | 28 | ||
29 | LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b, | 29 | LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b, |
30 | cTValue *av, cTValue *bv); | 30 | cTValue *av, cTValue *bv); |
31 | LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk); | ||
31 | LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o); | 32 | LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o); |
33 | LJ_FUNC TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t); | ||
32 | 34 | ||
33 | LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs); | 35 | LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs); |
34 | LJ_FUNC void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs); | 36 | LJ_FUNC void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs); |
@@ -36,6 +38,7 @@ LJ_FUNC void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults); | |||
36 | 38 | ||
37 | LJ_FUNC int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm); | 39 | LJ_FUNC int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm); |
38 | LJ_FUNC TRef lj_record_idx(jit_State *J, RecordIndex *ix); | 40 | LJ_FUNC TRef lj_record_idx(jit_State *J, RecordIndex *ix); |
41 | LJ_FUNC int lj_record_next(jit_State *J, RecordIndex *ix); | ||
39 | 42 | ||
40 | LJ_FUNC void lj_record_ins(jit_State *J); | 43 | LJ_FUNC void lj_record_ins(jit_State *J); |
41 | LJ_FUNC void lj_record_setup(jit_State *J); | 44 | LJ_FUNC void lj_record_setup(jit_State *J); |
diff --git a/src/lj_serialize.c b/src/lj_serialize.c new file mode 100644 index 00000000..f7e51828 --- /dev/null +++ b/src/lj_serialize.c | |||
@@ -0,0 +1,539 @@ | |||
1 | /* | ||
2 | ** Object de/serialization. | ||
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_serialize_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASBUFFER | ||
12 | #include "lj_err.h" | ||
13 | #include "lj_buf.h" | ||
14 | #include "lj_str.h" | ||
15 | #include "lj_tab.h" | ||
16 | #include "lj_udata.h" | ||
17 | #if LJ_HASFFI | ||
18 | #include "lj_ctype.h" | ||
19 | #include "lj_cdata.h" | ||
20 | #endif | ||
21 | #if LJ_HASJIT | ||
22 | #include "lj_ir.h" | ||
23 | #endif | ||
24 | #include "lj_serialize.h" | ||
25 | |||
26 | /* Tags for internal serialization format. */ | ||
27 | enum { | ||
28 | SER_TAG_NIL, /* 0x00 */ | ||
29 | SER_TAG_FALSE, | ||
30 | SER_TAG_TRUE, | ||
31 | SER_TAG_NULL, | ||
32 | SER_TAG_LIGHTUD32, | ||
33 | SER_TAG_LIGHTUD64, | ||
34 | SER_TAG_INT, | ||
35 | SER_TAG_NUM, | ||
36 | SER_TAG_TAB, /* 0x08 */ | ||
37 | SER_TAG_DICT_MT = SER_TAG_TAB+6, | ||
38 | SER_TAG_DICT_STR, | ||
39 | SER_TAG_INT64, /* 0x10 */ | ||
40 | SER_TAG_UINT64, | ||
41 | SER_TAG_COMPLEX, | ||
42 | SER_TAG_0x13, | ||
43 | SER_TAG_0x14, | ||
44 | SER_TAG_0x15, | ||
45 | SER_TAG_0x16, | ||
46 | SER_TAG_0x17, | ||
47 | SER_TAG_0x18, /* 0x18 */ | ||
48 | SER_TAG_0x19, | ||
49 | SER_TAG_0x1a, | ||
50 | SER_TAG_0x1b, | ||
51 | SER_TAG_0x1c, | ||
52 | SER_TAG_0x1d, | ||
53 | SER_TAG_0x1e, | ||
54 | SER_TAG_0x1f, | ||
55 | SER_TAG_STR, /* 0x20 + str->len */ | ||
56 | }; | ||
57 | LJ_STATIC_ASSERT((SER_TAG_TAB & 7) == 0); | ||
58 | |||
59 | /* -- Helper functions ---------------------------------------------------- */ | ||
60 | |||
61 | static LJ_AINLINE char *serialize_more(char *w, SBufExt *sbx, MSize sz) | ||
62 | { | ||
63 | if (LJ_UNLIKELY(sz > (MSize)(sbx->e - w))) { | ||
64 | sbx->w = w; | ||
65 | w = lj_buf_more2((SBuf *)sbx, sz); | ||
66 | } | ||
67 | return w; | ||
68 | } | ||
69 | |||
70 | /* Write U124 to buffer. */ | ||
71 | static LJ_NOINLINE char *serialize_wu124_(char *w, uint32_t v) | ||
72 | { | ||
73 | if (v < 0x1fe0) { | ||
74 | v -= 0xe0; | ||
75 | *w++ = (char)(0xe0 | (v >> 8)); *w++ = (char)v; | ||
76 | } else { | ||
77 | *w++ = (char)0xff; | ||
78 | #if LJ_BE | ||
79 | v = lj_bswap(v); | ||
80 | #endif | ||
81 | memcpy(w, &v, 4); w += 4; | ||
82 | } | ||
83 | return w; | ||
84 | } | ||
85 | |||
86 | static LJ_AINLINE char *serialize_wu124(char *w, uint32_t v) | ||
87 | { | ||
88 | if (LJ_LIKELY(v < 0xe0)) { | ||
89 | *w++ = (char)v; | ||
90 | return w; | ||
91 | } else { | ||
92 | return serialize_wu124_(w, v); | ||
93 | } | ||
94 | } | ||
95 | |||
96 | static LJ_NOINLINE char *serialize_ru124_(char *r, char *w, uint32_t *pv) | ||
97 | { | ||
98 | uint32_t v = *pv; | ||
99 | if (v != 0xff) { | ||
100 | if (r >= w) return NULL; | ||
101 | v = ((v & 0x1f) << 8) + *(uint8_t *)r + 0xe0; r++; | ||
102 | } else { | ||
103 | if (r + 4 > w) return NULL; | ||
104 | v = lj_getu32(r); r += 4; | ||
105 | #if LJ_BE | ||
106 | v = lj_bswap(v); | ||
107 | #endif | ||
108 | } | ||
109 | *pv = v; | ||
110 | return r; | ||
111 | } | ||
112 | |||
113 | static LJ_AINLINE char *serialize_ru124(char *r, char *w, uint32_t *pv) | ||
114 | { | ||
115 | if (LJ_LIKELY(r < w)) { | ||
116 | uint32_t v = *(uint8_t *)r; r++; | ||
117 | *pv = v; | ||
118 | if (LJ_UNLIKELY(v >= 0xe0)) { | ||
119 | r = serialize_ru124_(r, w, pv); | ||
120 | } | ||
121 | return r; | ||
122 | } | ||
123 | return NULL; | ||
124 | } | ||
125 | |||
126 | /* Prepare string dictionary for use (once). */ | ||
127 | void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict) | ||
128 | { | ||
129 | if (!dict->hmask) { /* No hash part means not prepared, yet. */ | ||
130 | MSize i, len = lj_tab_len(dict); | ||
131 | if (!len) return; | ||
132 | lj_tab_resize(L, dict, dict->asize, hsize2hbits(len)); | ||
133 | for (i = 1; i <= len && i < dict->asize; i++) { | ||
134 | cTValue *o = arrayslot(dict, i); | ||
135 | if (tvisstr(o)) { | ||
136 | if (!lj_tab_getstr(dict, strV(o))) { /* Ignore dups. */ | ||
137 | lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1); | ||
138 | } | ||
139 | } else if (!tvisfalse(o)) { | ||
140 | lj_err_caller(L, LJ_ERR_BUFFER_BADOPT); | ||
141 | } | ||
142 | } | ||
143 | } | ||
144 | } | ||
145 | |||
146 | /* Prepare metatable dictionary for use (once). */ | ||
147 | void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict) | ||
148 | { | ||
149 | if (!dict->hmask) { /* No hash part means not prepared, yet. */ | ||
150 | MSize i, len = lj_tab_len(dict); | ||
151 | if (!len) return; | ||
152 | lj_tab_resize(L, dict, dict->asize, hsize2hbits(len)); | ||
153 | for (i = 1; i <= len && i < dict->asize; i++) { | ||
154 | cTValue *o = arrayslot(dict, i); | ||
155 | if (tvistab(o)) { | ||
156 | if (tvisnil(lj_tab_get(L, dict, o))) { /* Ignore dups. */ | ||
157 | lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1); | ||
158 | } | ||
159 | } else if (!tvisfalse(o)) { | ||
160 | lj_err_caller(L, LJ_ERR_BUFFER_BADOPT); | ||
161 | } | ||
162 | } | ||
163 | } | ||
164 | } | ||
165 | |||
166 | /* -- Internal serializer ------------------------------------------------- */ | ||
167 | |||
168 | /* Put serialized object into buffer. */ | ||
169 | static char *serialize_put(char *w, SBufExt *sbx, cTValue *o) | ||
170 | { | ||
171 | if (LJ_LIKELY(tvisstr(o))) { | ||
172 | const GCstr *str = strV(o); | ||
173 | MSize len = str->len; | ||
174 | w = serialize_more(w, sbx, 5+len); | ||
175 | w = serialize_wu124(w, SER_TAG_STR + len); | ||
176 | w = lj_buf_wmem(w, strdata(str), len); | ||
177 | } else if (tvisint(o)) { | ||
178 | uint32_t x = LJ_BE ? lj_bswap((uint32_t)intV(o)) : (uint32_t)intV(o); | ||
179 | w = serialize_more(w, sbx, 1+4); | ||
180 | *w++ = SER_TAG_INT; memcpy(w, &x, 4); w += 4; | ||
181 | } else if (tvisnum(o)) { | ||
182 | uint64_t x = LJ_BE ? lj_bswap64(o->u64) : o->u64; | ||
183 | w = serialize_more(w, sbx, 1+sizeof(lua_Number)); | ||
184 | *w++ = SER_TAG_NUM; memcpy(w, &x, 8); w += 8; | ||
185 | } else if (tvispri(o)) { | ||
186 | w = serialize_more(w, sbx, 1); | ||
187 | *w++ = (char)(SER_TAG_NIL + ~itype(o)); | ||
188 | } else if (tvistab(o)) { | ||
189 | const GCtab *t = tabV(o); | ||
190 | uint32_t narray = 0, nhash = 0, one = 2; | ||
191 | if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH); | ||
192 | sbx->depth--; | ||
193 | if (t->asize > 0) { /* Determine max. length of array part. */ | ||
194 | ptrdiff_t i; | ||
195 | TValue *array = tvref(t->array); | ||
196 | for (i = (ptrdiff_t)t->asize-1; i >= 0; i--) | ||
197 | if (!tvisnil(&array[i])) | ||
198 | break; | ||
199 | narray = (uint32_t)(i+1); | ||
200 | if (narray && tvisnil(&array[0])) one = 4; | ||
201 | } | ||
202 | if (t->hmask > 0) { /* Count number of used hash slots. */ | ||
203 | uint32_t i, hmask = t->hmask; | ||
204 | Node *node = noderef(t->node); | ||
205 | for (i = 0; i <= hmask; i++) | ||
206 | nhash += !tvisnil(&node[i].val); | ||
207 | } | ||
208 | /* Write metatable index. */ | ||
209 | if (LJ_UNLIKELY(tabref(sbx->dict_mt)) && tabref(t->metatable)) { | ||
210 | TValue mto; | ||
211 | Node *n; | ||
212 | settabV(sbufL(sbx), &mto, tabref(t->metatable)); | ||
213 | n = hashgcref(tabref(sbx->dict_mt), mto.gcr); | ||
214 | do { | ||
215 | if (n->key.u64 == mto.u64) { | ||
216 | uint32_t idx = n->val.u32.lo; | ||
217 | w = serialize_more(w, sbx, 1+5); | ||
218 | *w++ = SER_TAG_DICT_MT; | ||
219 | w = serialize_wu124(w, idx); | ||
220 | break; | ||
221 | } | ||
222 | } while ((n = nextnode(n))); | ||
223 | } | ||
224 | /* Write number of array slots and hash slots. */ | ||
225 | w = serialize_more(w, sbx, 1+2*5); | ||
226 | *w++ = (char)(SER_TAG_TAB + (nhash ? 1 : 0) + (narray ? one : 0)); | ||
227 | if (narray) w = serialize_wu124(w, narray); | ||
228 | if (nhash) w = serialize_wu124(w, nhash); | ||
229 | if (narray) { /* Write array entries. */ | ||
230 | cTValue *oa = tvref(t->array) + (one >> 2); | ||
231 | cTValue *oe = tvref(t->array) + narray; | ||
232 | while (oa < oe) w = serialize_put(w, sbx, oa++); | ||
233 | } | ||
234 | if (nhash) { /* Write hash entries. */ | ||
235 | const Node *node = noderef(t->node) + t->hmask; | ||
236 | GCtab *dict_str = tabref(sbx->dict_str); | ||
237 | if (LJ_UNLIKELY(dict_str)) { | ||
238 | for (;; node--) | ||
239 | if (!tvisnil(&node->val)) { | ||
240 | if (LJ_LIKELY(tvisstr(&node->key))) { | ||
241 | /* Inlined lj_tab_getstr is 30% faster. */ | ||
242 | const GCstr *str = strV(&node->key); | ||
243 | Node *n = hashstr(dict_str, str); | ||
244 | do { | ||
245 | if (tvisstr(&n->key) && strV(&n->key) == str) { | ||
246 | uint32_t idx = n->val.u32.lo; | ||
247 | w = serialize_more(w, sbx, 1+5); | ||
248 | *w++ = SER_TAG_DICT_STR; | ||
249 | w = serialize_wu124(w, idx); | ||
250 | break; | ||
251 | } | ||
252 | n = nextnode(n); | ||
253 | if (!n) { | ||
254 | MSize len = str->len; | ||
255 | w = serialize_more(w, sbx, 5+len); | ||
256 | w = serialize_wu124(w, SER_TAG_STR + len); | ||
257 | w = lj_buf_wmem(w, strdata(str), len); | ||
258 | break; | ||
259 | } | ||
260 | } while (1); | ||
261 | } else { | ||
262 | w = serialize_put(w, sbx, &node->key); | ||
263 | } | ||
264 | w = serialize_put(w, sbx, &node->val); | ||
265 | if (--nhash == 0) break; | ||
266 | } | ||
267 | } else { | ||
268 | for (;; node--) | ||
269 | if (!tvisnil(&node->val)) { | ||
270 | w = serialize_put(w, sbx, &node->key); | ||
271 | w = serialize_put(w, sbx, &node->val); | ||
272 | if (--nhash == 0) break; | ||
273 | } | ||
274 | } | ||
275 | } | ||
276 | sbx->depth++; | ||
277 | #if LJ_HASFFI | ||
278 | } else if (tviscdata(o)) { | ||
279 | CTState *cts = ctype_cts(sbufL(sbx)); | ||
280 | CType *s = ctype_raw(cts, cdataV(o)->ctypeid); | ||
281 | uint8_t *sp = cdataptr(cdataV(o)); | ||
282 | if (ctype_isinteger(s->info) && s->size == 8) { | ||
283 | w = serialize_more(w, sbx, 1+8); | ||
284 | *w++ = (s->info & CTF_UNSIGNED) ? SER_TAG_UINT64 : SER_TAG_INT64; | ||
285 | #if LJ_BE | ||
286 | { uint64_t u = lj_bswap64(*(uint64_t *)sp); memcpy(w, &u, 8); } | ||
287 | #else | ||
288 | memcpy(w, sp, 8); | ||
289 | #endif | ||
290 | w += 8; | ||
291 | } else if (ctype_iscomplex(s->info) && s->size == 16) { | ||
292 | w = serialize_more(w, sbx, 1+16); | ||
293 | *w++ = SER_TAG_COMPLEX; | ||
294 | #if LJ_BE | ||
295 | { /* Only swap the doubles. The re/im order stays the same. */ | ||
296 | uint64_t u = lj_bswap64(((uint64_t *)sp)[0]); memcpy(w, &u, 8); | ||
297 | u = lj_bswap64(((uint64_t *)sp)[1]); memcpy(w+8, &u, 8); | ||
298 | } | ||
299 | #else | ||
300 | memcpy(w, sp, 16); | ||
301 | #endif | ||
302 | w += 16; | ||
303 | } else { | ||
304 | goto badenc; /* NYI other cdata */ | ||
305 | } | ||
306 | #endif | ||
307 | } else if (tvislightud(o)) { | ||
308 | uintptr_t ud = (uintptr_t)lightudV(G(sbufL(sbx)), o); | ||
309 | w = serialize_more(w, sbx, 1+sizeof(ud)); | ||
310 | if (ud == 0) { | ||
311 | *w++ = SER_TAG_NULL; | ||
312 | } else if (LJ_32 || checku32(ud)) { | ||
313 | #if LJ_BE && LJ_64 | ||
314 | ud = lj_bswap64(ud); | ||
315 | #elif LJ_BE | ||
316 | ud = lj_bswap(ud); | ||
317 | #endif | ||
318 | *w++ = SER_TAG_LIGHTUD32; memcpy(w, &ud, 4); w += 4; | ||
319 | #if LJ_64 | ||
320 | } else { | ||
321 | #if LJ_BE | ||
322 | ud = lj_bswap64(ud); | ||
323 | #endif | ||
324 | *w++ = SER_TAG_LIGHTUD64; memcpy(w, &ud, 8); w += 8; | ||
325 | #endif | ||
326 | } | ||
327 | } else { | ||
328 | /* NYI userdata */ | ||
329 | #if LJ_HASFFI | ||
330 | badenc: | ||
331 | #endif | ||
332 | lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADENC, lj_typename(o)); | ||
333 | } | ||
334 | return w; | ||
335 | } | ||
336 | |||
337 | /* Get serialized object from buffer. */ | ||
338 | static char *serialize_get(char *r, SBufExt *sbx, TValue *o) | ||
339 | { | ||
340 | char *w = sbx->w; | ||
341 | uint32_t tp; | ||
342 | r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob; | ||
343 | if (LJ_LIKELY(tp >= SER_TAG_STR)) { | ||
344 | uint32_t len = tp - SER_TAG_STR; | ||
345 | if (LJ_UNLIKELY(len > (uint32_t)(w - r))) goto eob; | ||
346 | setstrV(sbufL(sbx), o, lj_str_new(sbufL(sbx), r, len)); | ||
347 | r += len; | ||
348 | } else if (tp == SER_TAG_INT) { | ||
349 | if (LJ_UNLIKELY(r + 4 > w)) goto eob; | ||
350 | setintV(o, (int32_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r))); | ||
351 | r += 4; | ||
352 | } else if (tp == SER_TAG_NUM) { | ||
353 | if (LJ_UNLIKELY(r + 8 > w)) goto eob; | ||
354 | memcpy(o, r, 8); r += 8; | ||
355 | #if LJ_BE | ||
356 | o->u64 = lj_bswap64(o->u64); | ||
357 | #endif | ||
358 | if (!tvisnum(o)) setnanV(o); /* Fix non-canonical NaNs. */ | ||
359 | } else if (tp <= SER_TAG_TRUE) { | ||
360 | setpriV(o, ~tp); | ||
361 | } else if (tp == SER_TAG_DICT_STR) { | ||
362 | GCtab *dict_str; | ||
363 | uint32_t idx; | ||
364 | r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob; | ||
365 | idx++; | ||
366 | dict_str = tabref(sbx->dict_str); | ||
367 | if (dict_str && idx < dict_str->asize && tvisstr(arrayslot(dict_str, idx))) | ||
368 | copyTV(sbufL(sbx), o, arrayslot(dict_str, idx)); | ||
369 | else | ||
370 | lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx); | ||
371 | } else if (tp >= SER_TAG_TAB && tp <= SER_TAG_DICT_MT) { | ||
372 | uint32_t narray = 0, nhash = 0; | ||
373 | GCtab *t, *mt = NULL; | ||
374 | if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH); | ||
375 | sbx->depth--; | ||
376 | if (tp == SER_TAG_DICT_MT) { | ||
377 | GCtab *dict_mt; | ||
378 | uint32_t idx; | ||
379 | r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob; | ||
380 | idx++; | ||
381 | dict_mt = tabref(sbx->dict_mt); | ||
382 | if (dict_mt && idx < dict_mt->asize && tvistab(arrayslot(dict_mt, idx))) | ||
383 | mt = tabV(arrayslot(dict_mt, idx)); | ||
384 | else | ||
385 | lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx); | ||
386 | r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob; | ||
387 | if (!(tp >= SER_TAG_TAB && tp < SER_TAG_DICT_MT)) goto badtag; | ||
388 | } | ||
389 | if (tp >= SER_TAG_TAB+2) { | ||
390 | r = serialize_ru124(r, w, &narray); if (LJ_UNLIKELY(!r)) goto eob; | ||
391 | } | ||
392 | if ((tp & 1)) { | ||
393 | r = serialize_ru124(r, w, &nhash); if (LJ_UNLIKELY(!r)) goto eob; | ||
394 | } | ||
395 | t = lj_tab_new(sbufL(sbx), narray, hsize2hbits(nhash)); | ||
396 | /* NOBARRIER: The table is new (marked white). */ | ||
397 | setgcref(t->metatable, obj2gco(mt)); | ||
398 | settabV(sbufL(sbx), o, t); | ||
399 | if (narray) { | ||
400 | TValue *oa = tvref(t->array) + (tp >= SER_TAG_TAB+4); | ||
401 | TValue *oe = tvref(t->array) + narray; | ||
402 | while (oa < oe) r = serialize_get(r, sbx, oa++); | ||
403 | } | ||
404 | if (nhash) { | ||
405 | do { | ||
406 | TValue k, *v; | ||
407 | r = serialize_get(r, sbx, &k); | ||
408 | v = lj_tab_set(sbufL(sbx), t, &k); | ||
409 | if (LJ_UNLIKELY(!tvisnil(v))) | ||
410 | lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DUPKEY); | ||
411 | r = serialize_get(r, sbx, v); | ||
412 | } while (--nhash); | ||
413 | } | ||
414 | sbx->depth++; | ||
415 | #if LJ_HASFFI | ||
416 | } else if (tp >= SER_TAG_INT64 && tp <= SER_TAG_COMPLEX) { | ||
417 | uint32_t sz = tp == SER_TAG_COMPLEX ? 16 : 8; | ||
418 | GCcdata *cd; | ||
419 | if (LJ_UNLIKELY(r + sz > w)) goto eob; | ||
420 | if (LJ_UNLIKELY(!ctype_ctsG(G(sbufL(sbx))))) goto badtag; | ||
421 | cd = lj_cdata_new_(sbufL(sbx), | ||
422 | tp == SER_TAG_INT64 ? CTID_INT64 : | ||
423 | tp == SER_TAG_UINT64 ? CTID_UINT64 : CTID_COMPLEX_DOUBLE, | ||
424 | sz); | ||
425 | memcpy(cdataptr(cd), r, sz); r += sz; | ||
426 | #if LJ_BE | ||
427 | *(uint64_t *)cdataptr(cd) = lj_bswap64(*(uint64_t *)cdataptr(cd)); | ||
428 | if (sz == 16) | ||
429 | ((uint64_t *)cdataptr(cd))[1] = lj_bswap64(((uint64_t *)cdataptr(cd))[1]); | ||
430 | #endif | ||
431 | if (sz == 16) { /* Fix non-canonical NaNs. */ | ||
432 | TValue *cdo = (TValue *)cdataptr(cd); | ||
433 | if (!tvisnum(&cdo[0])) setnanV(&cdo[0]); | ||
434 | if (!tvisnum(&cdo[1])) setnanV(&cdo[1]); | ||
435 | } | ||
436 | setcdataV(sbufL(sbx), o, cd); | ||
437 | #endif | ||
438 | } else if (tp <= (LJ_64 ? SER_TAG_LIGHTUD64 : SER_TAG_LIGHTUD32)) { | ||
439 | uintptr_t ud = 0; | ||
440 | if (tp == SER_TAG_LIGHTUD32) { | ||
441 | if (LJ_UNLIKELY(r + 4 > w)) goto eob; | ||
442 | ud = (uintptr_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r)); | ||
443 | r += 4; | ||
444 | } | ||
445 | #if LJ_64 | ||
446 | else if (tp == SER_TAG_LIGHTUD64) { | ||
447 | if (LJ_UNLIKELY(r + 8 > w)) goto eob; | ||
448 | memcpy(&ud, r, 8); r += 8; | ||
449 | #if LJ_BE | ||
450 | ud = lj_bswap64(ud); | ||
451 | #endif | ||
452 | } | ||
453 | setrawlightudV(o, lj_lightud_intern(sbufL(sbx), (void *)ud)); | ||
454 | #else | ||
455 | setrawlightudV(o, (void *)ud); | ||
456 | #endif | ||
457 | } else { | ||
458 | badtag: | ||
459 | lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDEC, tp); | ||
460 | } | ||
461 | return r; | ||
462 | eob: | ||
463 | lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_EOB); | ||
464 | return NULL; | ||
465 | } | ||
466 | |||
467 | /* -- External serialization API ------------------------------------------ */ | ||
468 | |||
469 | /* Encode to buffer. */ | ||
470 | SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o) | ||
471 | { | ||
472 | sbx->depth = LJ_SERIALIZE_DEPTH; | ||
473 | sbx->w = serialize_put(sbx->w, sbx, o); | ||
474 | return sbx; | ||
475 | } | ||
476 | |||
477 | /* Decode from buffer. */ | ||
478 | char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o) | ||
479 | { | ||
480 | sbx->depth = LJ_SERIALIZE_DEPTH; | ||
481 | return serialize_get(sbx->r, sbx, o); | ||
482 | } | ||
483 | |||
484 | /* Stand-alone encoding, borrowing from global temporary buffer. */ | ||
485 | GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o) | ||
486 | { | ||
487 | SBufExt sbx; | ||
488 | char *w; | ||
489 | memset(&sbx, 0, sizeof(SBufExt)); | ||
490 | lj_bufx_set_borrow(L, &sbx, &G(L)->tmpbuf); | ||
491 | sbx.depth = LJ_SERIALIZE_DEPTH; | ||
492 | w = serialize_put(sbx.w, &sbx, o); | ||
493 | return lj_str_new(L, sbx.b, (size_t)(w - sbx.b)); | ||
494 | } | ||
495 | |||
496 | /* Stand-alone decoding, copy-on-write from string. */ | ||
497 | void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str) | ||
498 | { | ||
499 | SBufExt sbx; | ||
500 | char *r; | ||
501 | memset(&sbx, 0, sizeof(SBufExt)); | ||
502 | lj_bufx_set_cow(L, &sbx, strdata(str), str->len); | ||
503 | /* No need to set sbx.cowref here. */ | ||
504 | sbx.depth = LJ_SERIALIZE_DEPTH; | ||
505 | r = serialize_get(sbx.r, &sbx, o); | ||
506 | if (r != sbx.w) lj_err_caller(L, LJ_ERR_BUFFER_LEFTOV); | ||
507 | } | ||
508 | |||
509 | #if LJ_HASJIT | ||
510 | /* Peek into buffer to find the result IRType for specialization purposes. */ | ||
511 | LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx) | ||
512 | { | ||
513 | uint32_t tp; | ||
514 | if (serialize_ru124(sbx->r, sbx->w, &tp)) { | ||
515 | /* This must match the handling of all tags in the decoder above. */ | ||
516 | switch (tp) { | ||
517 | case SER_TAG_NIL: return IRT_NIL; | ||
518 | case SER_TAG_FALSE: return IRT_FALSE; | ||
519 | case SER_TAG_TRUE: return IRT_TRUE; | ||
520 | case SER_TAG_NULL: case SER_TAG_LIGHTUD32: case SER_TAG_LIGHTUD64: | ||
521 | return IRT_LIGHTUD; | ||
522 | case SER_TAG_INT: return LJ_DUALNUM ? IRT_INT : IRT_NUM; | ||
523 | case SER_TAG_NUM: return IRT_NUM; | ||
524 | case SER_TAG_TAB: case SER_TAG_TAB+1: case SER_TAG_TAB+2: | ||
525 | case SER_TAG_TAB+3: case SER_TAG_TAB+4: case SER_TAG_TAB+5: | ||
526 | case SER_TAG_DICT_MT: | ||
527 | return IRT_TAB; | ||
528 | case SER_TAG_INT64: case SER_TAG_UINT64: case SER_TAG_COMPLEX: | ||
529 | return IRT_CDATA; | ||
530 | case SER_TAG_DICT_STR: | ||
531 | default: | ||
532 | return IRT_STR; | ||
533 | } | ||
534 | } | ||
535 | return IRT_NIL; /* Will fail on actual decode. */ | ||
536 | } | ||
537 | #endif | ||
538 | |||
539 | #endif | ||
diff --git a/src/lj_serialize.h b/src/lj_serialize.h new file mode 100644 index 00000000..d3f4275a --- /dev/null +++ b/src/lj_serialize.h | |||
@@ -0,0 +1,28 @@ | |||
1 | /* | ||
2 | ** Object de/serialization. | ||
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_SERIALIZE_H | ||
7 | #define _LJ_SERIALIZE_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | #include "lj_buf.h" | ||
11 | |||
12 | #if LJ_HASBUFFER | ||
13 | |||
14 | #define LJ_SERIALIZE_DEPTH 100 /* Default depth. */ | ||
15 | |||
16 | LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict); | ||
17 | LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict); | ||
18 | LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o); | ||
19 | LJ_FUNC char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o); | ||
20 | LJ_FUNC GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o); | ||
21 | LJ_FUNC void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str); | ||
22 | #if LJ_HASJIT | ||
23 | LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx); | ||
24 | #endif | ||
25 | |||
26 | #endif | ||
27 | |||
28 | #endif | ||
diff --git a/src/lj_snap.c b/src/lj_snap.c index 7a02c9a9..bcc9da38 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c | |||
@@ -68,20 +68,37 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) | |||
68 | for (s = 0; s < nslots; s++) { | 68 | for (s = 0; s < nslots; s++) { |
69 | TRef tr = J->slot[s]; | 69 | TRef tr = J->slot[s]; |
70 | IRRef ref = tref_ref(tr); | 70 | IRRef ref = tref_ref(tr); |
71 | #if LJ_FR2 | ||
72 | if (s == 1) { /* Ignore slot 1 in LJ_FR2 mode, except if tailcalled. */ | ||
73 | if ((tr & TREF_FRAME)) | ||
74 | map[n++] = SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL); | ||
75 | continue; | ||
76 | } | ||
77 | if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) { | ||
78 | cTValue *base = J->L->base - J->baseslot; | ||
79 | tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64); | ||
80 | ref = tref_ref(tr); | ||
81 | } | ||
82 | #endif | ||
71 | if (ref) { | 83 | if (ref) { |
72 | SnapEntry sn = SNAP_TR(s, tr); | 84 | SnapEntry sn = SNAP_TR(s, tr); |
73 | IRIns *ir = &J->cur.ir[ref]; | 85 | IRIns *ir = &J->cur.ir[ref]; |
74 | if (!(sn & (SNAP_CONT|SNAP_FRAME)) && | 86 | if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) && |
75 | ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { | 87 | ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { |
76 | /* No need to snapshot unmodified non-inherited slots. */ | 88 | /* |
77 | if (!(ir->op2 & IRSLOAD_INHERIT)) | 89 | ** No need to snapshot unmodified non-inherited slots. |
90 | ** But always snapshot the function below a frame in LJ_FR2 mode. | ||
91 | */ | ||
92 | if (!(ir->op2 & IRSLOAD_INHERIT) && | ||
93 | (!LJ_FR2 || s == 0 || s+1 == nslots || | ||
94 | !(J->slot[s+1] & (TREF_CONT|TREF_FRAME)))) | ||
78 | continue; | 95 | continue; |
79 | /* No need to restore readonly slots and unmodified non-parent slots. */ | 96 | /* No need to restore readonly slots and unmodified non-parent slots. */ |
80 | if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && | 97 | if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && |
81 | (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) | 98 | (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) |
82 | sn |= SNAP_NORESTORE; | 99 | sn |= SNAP_NORESTORE; |
83 | } | 100 | } |
84 | if (LJ_SOFTFP && irt_isnum(ir->t)) | 101 | if (LJ_SOFTFP32 && irt_isnum(ir->t)) |
85 | sn |= SNAP_SOFTFPNUM; | 102 | sn |= SNAP_SOFTFPNUM; |
86 | map[n++] = sn; | 103 | map[n++] = sn; |
87 | } | 104 | } |
@@ -90,35 +107,54 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) | |||
90 | } | 107 | } |
91 | 108 | ||
92 | /* Add frame links at the end of the snapshot. */ | 109 | /* Add frame links at the end of the snapshot. */ |
93 | static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map) | 110 | static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) |
94 | { | 111 | { |
95 | cTValue *frame = J->L->base - 1; | 112 | cTValue *frame = J->L->base - 1; |
96 | cTValue *lim = J->L->base - J->baseslot; | 113 | cTValue *lim = J->L->base - J->baseslot + LJ_FR2; |
97 | cTValue *ftop = frame + funcproto(frame_func(frame))->framesize; | 114 | GCfunc *fn = frame_func(frame); |
115 | cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; | ||
116 | #if LJ_FR2 | ||
117 | uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2); | ||
118 | lj_assertJ(2 <= J->baseslot && J->baseslot <= 257, "bad baseslot"); | ||
119 | memcpy(map, &pcbase, sizeof(uint64_t)); | ||
120 | #else | ||
98 | MSize f = 0; | 121 | MSize f = 0; |
99 | map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ | 122 | map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ |
100 | lua_assert(!J->pt || | 123 | #endif |
124 | lj_assertJ(!J->pt || | ||
101 | (J->pc >= proto_bc(J->pt) && | 125 | (J->pc >= proto_bc(J->pt) && |
102 | J->pc < proto_bc(J->pt) + J->pt->sizebc)); | 126 | J->pc < proto_bc(J->pt) + J->pt->sizebc), "bad snapshot PC"); |
103 | while (frame > lim) { /* Backwards traversal of all frames above base. */ | 127 | while (frame > lim) { /* Backwards traversal of all frames above base. */ |
104 | if (frame_islua(frame)) { | 128 | if (frame_islua(frame)) { |
129 | #if !LJ_FR2 | ||
105 | map[f++] = SNAP_MKPC(frame_pc(frame)); | 130 | map[f++] = SNAP_MKPC(frame_pc(frame)); |
131 | #endif | ||
106 | frame = frame_prevl(frame); | 132 | frame = frame_prevl(frame); |
107 | } else if (frame_iscont(frame)) { | 133 | } else if (frame_iscont(frame)) { |
134 | #if !LJ_FR2 | ||
108 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); | 135 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); |
109 | map[f++] = SNAP_MKPC(frame_contpc(frame)); | 136 | map[f++] = SNAP_MKPC(frame_contpc(frame)); |
137 | #endif | ||
110 | frame = frame_prevd(frame); | 138 | frame = frame_prevd(frame); |
111 | } else { | 139 | } else { |
112 | lua_assert(!frame_isc(frame)); | 140 | lj_assertJ(!frame_isc(frame), "broken frame chain"); |
141 | #if !LJ_FR2 | ||
113 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); | 142 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); |
143 | #endif | ||
114 | frame = frame_prevd(frame); | 144 | frame = frame_prevd(frame); |
115 | continue; | 145 | continue; |
116 | } | 146 | } |
117 | if (frame + funcproto(frame_func(frame))->framesize > ftop) | 147 | if (frame + funcproto(frame_func(frame))->framesize > ftop) |
118 | ftop = frame + funcproto(frame_func(frame))->framesize; | 148 | ftop = frame + funcproto(frame_func(frame))->framesize; |
119 | } | 149 | } |
120 | lua_assert(f == (MSize)(1 + J->framedepth)); | 150 | *topslot = (uint8_t)(ftop - lim); |
121 | return (BCReg)(ftop - lim); | 151 | #if LJ_FR2 |
152 | lj_assertJ(sizeof(SnapEntry) * 2 == sizeof(uint64_t), "bad SnapEntry def"); | ||
153 | return 2; | ||
154 | #else | ||
155 | lj_assertJ(f == (MSize)(1 + J->framedepth), "miscalculated snapshot size"); | ||
156 | return f; | ||
157 | #endif | ||
122 | } | 158 | } |
123 | 159 | ||
124 | /* Take a snapshot of the current stack. */ | 160 | /* Take a snapshot of the current stack. */ |
@@ -128,16 +164,17 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) | |||
128 | MSize nent; | 164 | MSize nent; |
129 | SnapEntry *p; | 165 | SnapEntry *p; |
130 | /* Conservative estimate. */ | 166 | /* Conservative estimate. */ |
131 | lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1); | 167 | lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1)); |
132 | p = &J->cur.snapmap[nsnapmap]; | 168 | p = &J->cur.snapmap[nsnapmap]; |
133 | nent = snapshot_slots(J, p, nslots); | 169 | nent = snapshot_slots(J, p, nslots); |
134 | snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent); | 170 | snap->nent = (uint8_t)nent; |
171 | nent += snapshot_framelinks(J, p + nent, &snap->topslot); | ||
135 | snap->mapofs = (uint32_t)nsnapmap; | 172 | snap->mapofs = (uint32_t)nsnapmap; |
136 | snap->ref = (IRRef1)J->cur.nins; | 173 | snap->ref = (IRRef1)J->cur.nins; |
137 | snap->nent = (uint8_t)nent; | 174 | snap->mcofs = 0; |
138 | snap->nslots = (uint8_t)nslots; | 175 | snap->nslots = (uint8_t)nslots; |
139 | snap->count = 0; | 176 | snap->count = 0; |
140 | J->cur.nsnapmap = (uint32_t)(nsnapmap + nent + 1 + J->framedepth); | 177 | J->cur.nsnapmap = (uint32_t)(nsnapmap + nent); |
141 | } | 178 | } |
142 | 179 | ||
143 | /* Add or merge a snapshot. */ | 180 | /* Add or merge a snapshot. */ |
@@ -146,8 +183,8 @@ void lj_snap_add(jit_State *J) | |||
146 | MSize nsnap = J->cur.nsnap; | 183 | MSize nsnap = J->cur.nsnap; |
147 | MSize nsnapmap = J->cur.nsnapmap; | 184 | MSize nsnapmap = J->cur.nsnapmap; |
148 | /* Merge if no ins. inbetween or if requested and no guard inbetween. */ | 185 | /* Merge if no ins. inbetween or if requested and no guard inbetween. */ |
149 | if (J->mergesnap ? !irt_isguard(J->guardemit) : | 186 | if ((nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins) || |
150 | (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { | 187 | (J->mergesnap && !irt_isguard(J->guardemit))) { |
151 | if (nsnap == 1) { /* But preserve snap #0 PC. */ | 188 | if (nsnap == 1) { /* But preserve snap #0 PC. */ |
152 | emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0); | 189 | emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0); |
153 | goto nomerge; | 190 | goto nomerge; |
@@ -194,7 +231,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, | |||
194 | #define DEF_SLOT(s) udf[(s)] *= 3 | 231 | #define DEF_SLOT(s) udf[(s)] *= 3 |
195 | 232 | ||
196 | /* Scan through following bytecode and check for uses/defs. */ | 233 | /* Scan through following bytecode and check for uses/defs. */ |
197 | lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); | 234 | lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc, |
235 | "snapshot PC out of range"); | ||
198 | for (;;) { | 236 | for (;;) { |
199 | BCIns ins = *pc++; | 237 | BCIns ins = *pc++; |
200 | BCOp op = bc_op(ins); | 238 | BCOp op = bc_op(ins); |
@@ -205,7 +243,7 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, | |||
205 | switch (bcmode_c(op)) { | 243 | switch (bcmode_c(op)) { |
206 | case BCMvar: USE_SLOT(bc_c(ins)); break; | 244 | case BCMvar: USE_SLOT(bc_c(ins)); break; |
207 | case BCMrbase: | 245 | case BCMrbase: |
208 | lua_assert(op == BC_CAT); | 246 | lj_assertJ(op == BC_CAT, "unhandled op %d with RC rbase", op); |
209 | for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s); | 247 | for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s); |
210 | for (; s < maxslot; s++) DEF_SLOT(s); | 248 | for (; s < maxslot; s++) DEF_SLOT(s); |
211 | break; | 249 | break; |
@@ -245,7 +283,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, | |||
245 | case BCMbase: | 283 | case BCMbase: |
246 | if (op >= BC_CALLM && op <= BC_ITERN) { | 284 | if (op >= BC_CALLM && op <= BC_ITERN) { |
247 | BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ? | 285 | BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ? |
248 | maxslot : (bc_a(ins) + bc_c(ins)); | 286 | maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2); |
287 | if (LJ_FR2) DEF_SLOT(bc_a(ins)+1); | ||
249 | s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0); | 288 | s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0); |
250 | for (; s < top; s++) USE_SLOT(s); | 289 | for (; s < top; s++) USE_SLOT(s); |
251 | for (; s < maxslot; s++) DEF_SLOT(s); | 290 | for (; s < maxslot; s++) DEF_SLOT(s); |
@@ -263,7 +302,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, | |||
263 | break; | 302 | break; |
264 | default: break; | 303 | default: break; |
265 | } | 304 | } |
266 | lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); | 305 | lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc, |
306 | "use/def analysis PC out of range"); | ||
267 | } | 307 | } |
268 | 308 | ||
269 | #undef USE_SLOT | 309 | #undef USE_SLOT |
@@ -321,8 +361,8 @@ void lj_snap_shrink(jit_State *J) | |||
321 | MSize n, m, nlim, nent = snap->nent; | 361 | MSize n, m, nlim, nent = snap->nent; |
322 | uint8_t udf[SNAP_USEDEF_SLOTS]; | 362 | uint8_t udf[SNAP_USEDEF_SLOTS]; |
323 | BCReg maxslot = J->maxslot; | 363 | BCReg maxslot = J->maxslot; |
324 | BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot); | ||
325 | BCReg baseslot = J->baseslot; | 364 | BCReg baseslot = J->baseslot; |
365 | BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot); | ||
326 | if (minslot < maxslot) snap_useuv(J->pt, udf); | 366 | if (minslot < maxslot) snap_useuv(J->pt, udf); |
327 | maxslot += baseslot; | 367 | maxslot += baseslot; |
328 | minslot += baseslot; | 368 | minslot += baseslot; |
@@ -365,25 +405,26 @@ static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs) | |||
365 | } | 405 | } |
366 | 406 | ||
367 | /* Copy RegSP from parent snapshot to the parent links of the IR. */ | 407 | /* Copy RegSP from parent snapshot to the parent links of the IR. */ |
368 | IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) | 408 | IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno, IRIns *ir) |
369 | { | 409 | { |
370 | SnapShot *snap = &T->snap[snapno]; | 410 | SnapShot *snap = &T->snap[snapno]; |
371 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 411 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
372 | BloomFilter rfilt = snap_renamefilter(T, snapno); | 412 | BloomFilter rfilt = snap_renamefilter(T, snapno); |
373 | MSize n = 0; | 413 | MSize n = 0; |
374 | IRRef ref = 0; | 414 | IRRef ref = 0; |
415 | UNUSED(J); | ||
375 | for ( ; ; ir++) { | 416 | for ( ; ; ir++) { |
376 | uint32_t rs; | 417 | uint32_t rs; |
377 | if (ir->o == IR_SLOAD) { | 418 | if (ir->o == IR_SLOAD) { |
378 | if (!(ir->op2 & IRSLOAD_PARENT)) break; | 419 | if (!(ir->op2 & IRSLOAD_PARENT)) break; |
379 | for ( ; ; n++) { | 420 | for ( ; ; n++) { |
380 | lua_assert(n < snap->nent); | 421 | lj_assertJ(n < snap->nent, "slot %d not found in snapshot", ir->op1); |
381 | if (snap_slot(map[n]) == ir->op1) { | 422 | if (snap_slot(map[n]) == ir->op1) { |
382 | ref = snap_ref(map[n++]); | 423 | ref = snap_ref(map[n++]); |
383 | break; | 424 | break; |
384 | } | 425 | } |
385 | } | 426 | } |
386 | } else if (LJ_SOFTFP && ir->o == IR_HIOP) { | 427 | } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) { |
387 | ref++; | 428 | ref++; |
388 | } else if (ir->o == IR_PVAL) { | 429 | } else if (ir->o == IR_PVAL) { |
389 | ref = ir->op1 + REF_BIAS; | 430 | ref = ir->op1 + REF_BIAS; |
@@ -394,7 +435,7 @@ IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) | |||
394 | if (bloomtest(rfilt, ref)) | 435 | if (bloomtest(rfilt, ref)) |
395 | rs = snap_renameref(T, snapno, ref, rs); | 436 | rs = snap_renameref(T, snapno, ref, rs); |
396 | ir->prev = (uint16_t)rs; | 437 | ir->prev = (uint16_t)rs; |
397 | lua_assert(regsp_used(rs)); | 438 | lj_assertJ(regsp_used(rs), "unused IR %04d in snapshot", ref - REF_BIAS); |
398 | } | 439 | } |
399 | return ir; | 440 | return ir; |
400 | } | 441 | } |
@@ -409,10 +450,10 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir) | |||
409 | case IR_KPRI: return TREF_PRI(irt_type(ir->t)); | 450 | case IR_KPRI: return TREF_PRI(irt_type(ir->t)); |
410 | case IR_KINT: return lj_ir_kint(J, ir->i); | 451 | case IR_KINT: return lj_ir_kint(J, ir->i); |
411 | case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); | 452 | case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); |
412 | case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir)); | 453 | case IR_KNUM: case IR_KINT64: |
413 | case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir)); | 454 | return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64); |
414 | case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ | 455 | case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ |
415 | default: lua_assert(0); return TREF_NIL; break; | 456 | default: lj_assertJ(0, "bad IR constant op %d", ir->o); return TREF_NIL; |
416 | } | 457 | } |
417 | } | 458 | } |
418 | 459 | ||
@@ -422,7 +463,7 @@ static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref) | |||
422 | MSize j; | 463 | MSize j; |
423 | for (j = 0; j < nmax; j++) | 464 | for (j = 0; j < nmax; j++) |
424 | if (snap_ref(map[j]) == ref) | 465 | if (snap_ref(map[j]) == ref) |
425 | return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME); | 466 | return J->slot[snap_slot(map[j])] & ~(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME); |
426 | return 0; | 467 | return 0; |
427 | } | 468 | } |
428 | 469 | ||
@@ -483,21 +524,27 @@ void lj_snap_replay(jit_State *J, GCtrace *T) | |||
483 | goto setslot; | 524 | goto setslot; |
484 | bloomset(seen, ref); | 525 | bloomset(seen, ref); |
485 | if (irref_isk(ref)) { | 526 | if (irref_isk(ref)) { |
486 | tr = snap_replay_const(J, ir); | 527 | /* See special treatment of LJ_FR2 slot 1 in snapshot_slots() above. */ |
528 | if (LJ_FR2 && (sn == SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL))) | ||
529 | tr = 0; | ||
530 | else | ||
531 | tr = snap_replay_const(J, ir); | ||
487 | } else if (!regsp_used(ir->prev)) { | 532 | } else if (!regsp_used(ir->prev)) { |
488 | pass23 = 1; | 533 | pass23 = 1; |
489 | lua_assert(s != 0); | 534 | lj_assertJ(s != 0, "unused slot 0 in snapshot"); |
490 | tr = s; | 535 | tr = s; |
491 | } else { | 536 | } else { |
492 | IRType t = irt_type(ir->t); | 537 | IRType t = irt_type(ir->t); |
493 | uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; | 538 | uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; |
494 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; | 539 | if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; |
495 | if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); | 540 | if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); |
541 | if ((sn & SNAP_KEYINDEX)) mode |= IRSLOAD_KEYINDEX; | ||
496 | tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); | 542 | tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); |
497 | } | 543 | } |
498 | setslot: | 544 | setslot: |
499 | J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ | 545 | /* Same as TREF_* flags. */ |
500 | J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s); | 546 | J->slot[s] = tr | (sn&(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME)); |
547 | J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2)); | ||
501 | if ((sn & SNAP_FRAME)) | 548 | if ((sn & SNAP_FRAME)) |
502 | J->baseslot = s+1; | 549 | J->baseslot = s+1; |
503 | } | 550 | } |
@@ -512,8 +559,9 @@ void lj_snap_replay(jit_State *J, GCtrace *T) | |||
512 | if (regsp_reg(ir->r) == RID_SUNK) { | 559 | if (regsp_reg(ir->r) == RID_SUNK) { |
513 | if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; | 560 | if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; |
514 | pass23 = 1; | 561 | pass23 = 1; |
515 | lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || | 562 | lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP || |
516 | ir->o == IR_CNEW || ir->o == IR_CNEWI); | 563 | ir->o == IR_CNEW || ir->o == IR_CNEWI, |
564 | "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o); | ||
517 | if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); | 565 | if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); |
518 | if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); | 566 | if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); |
519 | if (LJ_HASFFI && ir->o == IR_CNEWI) { | 567 | if (LJ_HASFFI && ir->o == IR_CNEWI) { |
@@ -525,13 +573,14 @@ void lj_snap_replay(jit_State *J, GCtrace *T) | |||
525 | if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { | 573 | if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { |
526 | if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) | 574 | if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) |
527 | snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); | 575 | snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); |
528 | else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && | 576 | else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && |
529 | irs+1 < irlast && (irs+1)->o == IR_HIOP) | 577 | irs+1 < irlast && (irs+1)->o == IR_HIOP) |
530 | snap_pref(J, T, map, nent, seen, (irs+1)->op2); | 578 | snap_pref(J, T, map, nent, seen, (irs+1)->op2); |
531 | } | 579 | } |
532 | } | 580 | } |
533 | } else if (!irref_isk(refp) && !regsp_used(ir->prev)) { | 581 | } else if (!irref_isk(refp) && !regsp_used(ir->prev)) { |
534 | lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); | 582 | lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, |
583 | "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o); | ||
535 | J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1); | 584 | J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1); |
536 | } | 585 | } |
537 | } | 586 | } |
@@ -581,20 +630,21 @@ void lj_snap_replay(jit_State *J, GCtrace *T) | |||
581 | val = snap_pref(J, T, map, nent, seen, irs->op2); | 630 | val = snap_pref(J, T, map, nent, seen, irs->op2); |
582 | if (val == 0) { | 631 | if (val == 0) { |
583 | IRIns *irc = &T->ir[irs->op2]; | 632 | IRIns *irc = &T->ir[irs->op2]; |
584 | lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); | 633 | lj_assertJ(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT, |
634 | "sunk store for parent IR %04d with bad op %d", | ||
635 | refp - REF_BIAS, irc->o); | ||
585 | val = snap_pref(J, T, map, nent, seen, irc->op1); | 636 | val = snap_pref(J, T, map, nent, seen, irc->op1); |
586 | val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); | 637 | val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); |
587 | } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && | 638 | } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && |
588 | irs+1 < irlast && (irs+1)->o == IR_HIOP) { | 639 | irs+1 < irlast && (irs+1)->o == IR_HIOP) { |
589 | IRType t = IRT_I64; | 640 | IRType t = IRT_I64; |
590 | if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) | 641 | if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP) |
591 | t = IRT_NUM; | 642 | t = IRT_NUM; |
592 | lj_needsplit(J); | 643 | lj_needsplit(J); |
593 | if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { | 644 | if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { |
594 | uint64_t k = (uint32_t)T->ir[irs->op2].i + | 645 | uint64_t k = (uint32_t)T->ir[irs->op2].i + |
595 | ((uint64_t)T->ir[(irs+1)->op2].i << 32); | 646 | ((uint64_t)T->ir[(irs+1)->op2].i << 32); |
596 | val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, | 647 | val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k); |
597 | lj_ir_k64_find(J, k)); | ||
598 | } else { | 648 | } else { |
599 | val = emitir_raw(IRT(IR_HIOP, t), val, | 649 | val = emitir_raw(IRT(IR_HIOP, t), val, |
600 | snap_pref(J, T, map, nent, seen, (irs+1)->op2)); | 650 | snap_pref(J, T, map, nent, seen, (irs+1)->op2)); |
@@ -632,7 +682,14 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, | |||
632 | IRType1 t = ir->t; | 682 | IRType1 t = ir->t; |
633 | RegSP rs = ir->prev; | 683 | RegSP rs = ir->prev; |
634 | if (irref_isk(ref)) { /* Restore constant slot. */ | 684 | if (irref_isk(ref)) { /* Restore constant slot. */ |
635 | lj_ir_kvalue(J->L, o, ir); | 685 | if (ir->o == IR_KPTR) { |
686 | o->u64 = (uint64_t)(uintptr_t)ir_kptr(ir); | ||
687 | } else { | ||
688 | lj_assertJ(!(ir->o == IR_KKPTR || ir->o == IR_KNULL), | ||
689 | "restore of const from IR %04d with bad op %d", | ||
690 | ref - REF_BIAS, ir->o); | ||
691 | lj_ir_kvalue(J->L, o, ir); | ||
692 | } | ||
636 | return; | 693 | return; |
637 | } | 694 | } |
638 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) | 695 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) |
@@ -641,22 +698,24 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, | |||
641 | int32_t *sps = &ex->spill[regsp_spill(rs)]; | 698 | int32_t *sps = &ex->spill[regsp_spill(rs)]; |
642 | if (irt_isinteger(t)) { | 699 | if (irt_isinteger(t)) { |
643 | setintV(o, *sps); | 700 | setintV(o, *sps); |
644 | #if !LJ_SOFTFP | 701 | #if !LJ_SOFTFP32 |
645 | } else if (irt_isnum(t)) { | 702 | } else if (irt_isnum(t)) { |
646 | o->u64 = *(uint64_t *)sps; | 703 | o->u64 = *(uint64_t *)sps; |
647 | #endif | 704 | #endif |
648 | } else if (LJ_64 && irt_islightud(t)) { | 705 | #if LJ_64 && !LJ_GC64 |
706 | } else if (irt_islightud(t)) { | ||
649 | /* 64 bit lightuserdata which may escape already has the tag bits. */ | 707 | /* 64 bit lightuserdata which may escape already has the tag bits. */ |
650 | o->u64 = *(uint64_t *)sps; | 708 | o->u64 = *(uint64_t *)sps; |
709 | #endif | ||
651 | } else { | 710 | } else { |
652 | lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ | 711 | lj_assertJ(!irt_ispri(t), "PRI ref with spill slot"); |
653 | setgcrefi(o->gcr, *sps); | 712 | setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); |
654 | setitype(o, irt_toitype(t)); | ||
655 | } | 713 | } |
656 | } else { /* Restore from register. */ | 714 | } else { /* Restore from register. */ |
657 | Reg r = regsp_reg(rs); | 715 | Reg r = regsp_reg(rs); |
658 | if (ra_noreg(r)) { | 716 | if (ra_noreg(r)) { |
659 | lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); | 717 | lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, |
718 | "restore from IR %04d has no reg", ref - REF_BIAS); | ||
660 | snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o); | 719 | snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o); |
661 | if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o)); | 720 | if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o)); |
662 | return; | 721 | return; |
@@ -665,21 +724,26 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, | |||
665 | #if !LJ_SOFTFP | 724 | #if !LJ_SOFTFP |
666 | } else if (irt_isnum(t)) { | 725 | } else if (irt_isnum(t)) { |
667 | setnumV(o, ex->fpr[r-RID_MIN_FPR]); | 726 | setnumV(o, ex->fpr[r-RID_MIN_FPR]); |
727 | #elif LJ_64 /* && LJ_SOFTFP */ | ||
728 | } else if (irt_isnum(t)) { | ||
729 | o->u64 = ex->gpr[r-RID_MIN_GPR]; | ||
668 | #endif | 730 | #endif |
669 | } else if (LJ_64 && irt_islightud(t)) { | 731 | #if LJ_64 && !LJ_GC64 |
670 | /* 64 bit lightuserdata which may escape already has the tag bits. */ | 732 | } else if (irt_is64(t)) { |
733 | /* 64 bit values that already have the tag bits. */ | ||
671 | o->u64 = ex->gpr[r-RID_MIN_GPR]; | 734 | o->u64 = ex->gpr[r-RID_MIN_GPR]; |
735 | #endif | ||
736 | } else if (irt_ispri(t)) { | ||
737 | setpriV(o, irt_toitype(t)); | ||
672 | } else { | 738 | } else { |
673 | if (!irt_ispri(t)) | 739 | setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t)); |
674 | setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); | ||
675 | setitype(o, irt_toitype(t)); | ||
676 | } | 740 | } |
677 | } | 741 | } |
678 | } | 742 | } |
679 | 743 | ||
680 | #if LJ_HASFFI | 744 | #if LJ_HASFFI |
681 | /* Restore raw data from the trace exit state. */ | 745 | /* Restore raw data from the trace exit state. */ |
682 | static void snap_restoredata(GCtrace *T, ExitState *ex, | 746 | static void snap_restoredata(jit_State *J, GCtrace *T, ExitState *ex, |
683 | SnapNo snapno, BloomFilter rfilt, | 747 | SnapNo snapno, BloomFilter rfilt, |
684 | IRRef ref, void *dst, CTSize sz) | 748 | IRRef ref, void *dst, CTSize sz) |
685 | { | 749 | { |
@@ -687,9 +751,10 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, | |||
687 | RegSP rs = ir->prev; | 751 | RegSP rs = ir->prev; |
688 | int32_t *src; | 752 | int32_t *src; |
689 | uint64_t tmp; | 753 | uint64_t tmp; |
754 | UNUSED(J); | ||
690 | if (irref_isk(ref)) { | 755 | if (irref_isk(ref)) { |
691 | if (ir->o == IR_KNUM || ir->o == IR_KINT64) { | 756 | if (ir_isk64(ir)) { |
692 | src = mref(ir->ptr, int32_t); | 757 | src = (int32_t *)&ir[1]; |
693 | } else if (sz == 8) { | 758 | } else if (sz == 8) { |
694 | tmp = (uint64_t)(uint32_t)ir->i; | 759 | tmp = (uint64_t)(uint32_t)ir->i; |
695 | src = (int32_t *)&tmp; | 760 | src = (int32_t *)&tmp; |
@@ -709,8 +774,9 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, | |||
709 | Reg r = regsp_reg(rs); | 774 | Reg r = regsp_reg(rs); |
710 | if (ra_noreg(r)) { | 775 | if (ra_noreg(r)) { |
711 | /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */ | 776 | /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */ |
712 | lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); | 777 | lj_assertJ(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, |
713 | snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4); | 778 | "restore from IR %04d has no reg", ref - REF_BIAS); |
779 | snap_restoredata(J, T, ex, snapno, rfilt, ir->op1, dst, 4); | ||
714 | *(lua_Number *)dst = (lua_Number)*(int32_t *)dst; | 780 | *(lua_Number *)dst = (lua_Number)*(int32_t *)dst; |
715 | return; | 781 | return; |
716 | } | 782 | } |
@@ -726,11 +792,13 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, | |||
726 | #else | 792 | #else |
727 | if (LJ_BE && sz == 4) src++; | 793 | if (LJ_BE && sz == 4) src++; |
728 | #endif | 794 | #endif |
729 | } | 795 | } else |
730 | #endif | 796 | #endif |
797 | if (LJ_64 && LJ_BE && sz == 4) src++; | ||
731 | } | 798 | } |
732 | } | 799 | } |
733 | lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); | 800 | lj_assertJ(sz == 1 || sz == 2 || sz == 4 || sz == 8, |
801 | "restore from IR %04d with bad size %d", ref - REF_BIAS, sz); | ||
734 | if (sz == 4) *(int32_t *)dst = *src; | 802 | if (sz == 4) *(int32_t *)dst = *src; |
735 | else if (sz == 8) *(int64_t *)dst = *(int64_t *)src; | 803 | else if (sz == 8) *(int64_t *)dst = *(int64_t *)src; |
736 | else if (sz == 1) *(int8_t *)dst = (int8_t)*src; | 804 | else if (sz == 1) *(int8_t *)dst = (int8_t)*src; |
@@ -743,24 +811,27 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, | |||
743 | SnapNo snapno, BloomFilter rfilt, | 811 | SnapNo snapno, BloomFilter rfilt, |
744 | IRIns *ir, TValue *o) | 812 | IRIns *ir, TValue *o) |
745 | { | 813 | { |
746 | lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || | 814 | lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP || |
747 | ir->o == IR_CNEW || ir->o == IR_CNEWI); | 815 | ir->o == IR_CNEW || ir->o == IR_CNEWI, |
816 | "sunk allocation with bad op %d", ir->o); | ||
748 | #if LJ_HASFFI | 817 | #if LJ_HASFFI |
749 | if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { | 818 | if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { |
750 | CTState *cts = ctype_cts(J->L); | 819 | CTState *cts = ctype_cts(J->L); |
751 | CTypeID id = (CTypeID)T->ir[ir->op1].i; | 820 | CTypeID id = (CTypeID)T->ir[ir->op1].i; |
752 | CTSize sz = lj_ctype_size(cts, id); | 821 | CTSize sz; |
753 | GCcdata *cd = lj_cdata_new(cts, id, sz); | 822 | CTInfo info = lj_ctype_info(cts, id, &sz); |
823 | GCcdata *cd = lj_cdata_newx(cts, id, sz, info); | ||
754 | setcdataV(J->L, o, cd); | 824 | setcdataV(J->L, o, cd); |
755 | if (ir->o == IR_CNEWI) { | 825 | if (ir->o == IR_CNEWI) { |
756 | uint8_t *p = (uint8_t *)cdataptr(cd); | 826 | uint8_t *p = (uint8_t *)cdataptr(cd); |
757 | lua_assert(sz == 4 || sz == 8); | 827 | lj_assertJ(sz == 4 || sz == 8, "sunk cdata with bad size %d", sz); |
758 | if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) { | 828 | if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) { |
759 | snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4); | 829 | snap_restoredata(J, T, ex, snapno, rfilt, (ir+1)->op2, |
830 | LJ_LE ? p+4 : p, 4); | ||
760 | if (LJ_BE) p += 4; | 831 | if (LJ_BE) p += 4; |
761 | sz = 4; | 832 | sz = 4; |
762 | } | 833 | } |
763 | snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz); | 834 | snap_restoredata(J, T, ex, snapno, rfilt, ir->op2, p, sz); |
764 | } else { | 835 | } else { |
765 | IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref]; | 836 | IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref]; |
766 | for (irs = ir+1; irs < irlast; irs++) | 837 | for (irs = ir+1; irs < irlast; irs++) |
@@ -768,8 +839,11 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, | |||
768 | IRIns *iro = &T->ir[T->ir[irs->op1].op2]; | 839 | IRIns *iro = &T->ir[T->ir[irs->op1].op2]; |
769 | uint8_t *p = (uint8_t *)cd; | 840 | uint8_t *p = (uint8_t *)cd; |
770 | CTSize szs; | 841 | CTSize szs; |
771 | lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD); | 842 | lj_assertJ(irs->o == IR_XSTORE, "sunk store with bad op %d", irs->o); |
772 | lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64); | 843 | lj_assertJ(T->ir[irs->op1].o == IR_ADD, |
844 | "sunk store with bad add op %d", T->ir[irs->op1].o); | ||
845 | lj_assertJ(iro->o == IR_KINT || iro->o == IR_KINT64, | ||
846 | "sunk store with bad const offset op %d", iro->o); | ||
773 | if (irt_is64(irs->t)) szs = 8; | 847 | if (irt_is64(irs->t)) szs = 8; |
774 | else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1; | 848 | else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1; |
775 | else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2; | 849 | else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2; |
@@ -778,14 +852,16 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, | |||
778 | p += (int64_t)ir_k64(iro)->u64; | 852 | p += (int64_t)ir_k64(iro)->u64; |
779 | else | 853 | else |
780 | p += iro->i; | 854 | p += iro->i; |
781 | lua_assert(p >= (uint8_t *)cdataptr(cd) && | 855 | lj_assertJ(p >= (uint8_t *)cdataptr(cd) && |
782 | p + szs <= (uint8_t *)cdataptr(cd) + sz); | 856 | p + szs <= (uint8_t *)cdataptr(cd) + sz, |
857 | "sunk store with offset out of range"); | ||
783 | if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { | 858 | if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { |
784 | lua_assert(szs == 4); | 859 | lj_assertJ(szs == 4, "sunk store with bad size %d", szs); |
785 | snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4); | 860 | snap_restoredata(J, T, ex, snapno, rfilt, (irs+1)->op2, |
861 | LJ_LE ? p+4 : p, 4); | ||
786 | if (LJ_BE) p += 4; | 862 | if (LJ_BE) p += 4; |
787 | } | 863 | } |
788 | snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs); | 864 | snap_restoredata(J, T, ex, snapno, rfilt, irs->op2, p, szs); |
789 | } | 865 | } |
790 | } | 866 | } |
791 | } else | 867 | } else |
@@ -800,10 +876,12 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, | |||
800 | if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { | 876 | if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { |
801 | IRIns *irk = &T->ir[irs->op1]; | 877 | IRIns *irk = &T->ir[irs->op1]; |
802 | TValue tmp, *val; | 878 | TValue tmp, *val; |
803 | lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || | 879 | lj_assertJ(irs->o == IR_ASTORE || irs->o == IR_HSTORE || |
804 | irs->o == IR_FSTORE); | 880 | irs->o == IR_FSTORE, |
881 | "sunk store with bad op %d", irs->o); | ||
805 | if (irk->o == IR_FREF) { | 882 | if (irk->o == IR_FREF) { |
806 | lua_assert(irk->op2 == IRFL_TAB_META); | 883 | lj_assertJ(irk->op2 == IRFL_TAB_META, |
884 | "sunk store with bad field %d", irk->op2); | ||
807 | snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); | 885 | snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); |
808 | /* NOBARRIER: The table is new (marked white). */ | 886 | /* NOBARRIER: The table is new (marked white). */ |
809 | setgcref(t->metatable, obj2gco(tabV(&tmp))); | 887 | setgcref(t->metatable, obj2gco(tabV(&tmp))); |
@@ -814,7 +892,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, | |||
814 | val = lj_tab_set(J->L, t, &tmp); | 892 | val = lj_tab_set(J->L, t, &tmp); |
815 | /* NOBARRIER: The table is new (marked white). */ | 893 | /* NOBARRIER: The table is new (marked white). */ |
816 | snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); | 894 | snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); |
817 | if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { | 895 | if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { |
818 | snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); | 896 | snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); |
819 | val->u32.hi = tmp.u32.lo; | 897 | val->u32.hi = tmp.u32.lo; |
820 | } | 898 | } |
@@ -832,11 +910,15 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) | |||
832 | SnapShot *snap = &T->snap[snapno]; | 910 | SnapShot *snap = &T->snap[snapno]; |
833 | MSize n, nent = snap->nent; | 911 | MSize n, nent = snap->nent; |
834 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 912 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
835 | SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1]; | 913 | #if !LJ_FR2 || defined(LUA_USE_ASSERT) |
836 | int32_t ftsz0; | 914 | SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2]; |
915 | #endif | ||
916 | #if !LJ_FR2 | ||
917 | ptrdiff_t ftsz0; | ||
918 | #endif | ||
837 | TValue *frame; | 919 | TValue *frame; |
838 | BloomFilter rfilt = snap_renamefilter(T, snapno); | 920 | BloomFilter rfilt = snap_renamefilter(T, snapno); |
839 | const BCIns *pc = snap_pc(map[nent]); | 921 | const BCIns *pc = snap_pc(&map[nent]); |
840 | lua_State *L = J->L; | 922 | lua_State *L = J->L; |
841 | 923 | ||
842 | /* Set interpreter PC to the next PC to get correct error messages. */ | 924 | /* Set interpreter PC to the next PC to get correct error messages. */ |
@@ -849,8 +931,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) | |||
849 | } | 931 | } |
850 | 932 | ||
851 | /* Fill stack slots with data from the registers and spill slots. */ | 933 | /* Fill stack slots with data from the registers and spill slots. */ |
852 | frame = L->base-1; | 934 | frame = L->base-1-LJ_FR2; |
935 | #if !LJ_FR2 | ||
853 | ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ | 936 | ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ |
937 | #endif | ||
854 | for (n = 0; n < nent; n++) { | 938 | for (n = 0; n < nent; n++) { |
855 | SnapEntry sn = map[n]; | 939 | SnapEntry sn = map[n]; |
856 | if (!(sn & SNAP_NORESTORE)) { | 940 | if (!(sn & SNAP_NORESTORE)) { |
@@ -869,18 +953,27 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) | |||
869 | continue; | 953 | continue; |
870 | } | 954 | } |
871 | snap_restoreval(J, T, ex, snapno, rfilt, ref, o); | 955 | snap_restoreval(J, T, ex, snapno, rfilt, ref, o); |
872 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { | 956 | if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { |
873 | TValue tmp; | 957 | TValue tmp; |
874 | snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); | 958 | snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); |
875 | o->u32.hi = tmp.u32.lo; | 959 | o->u32.hi = tmp.u32.lo; |
960 | #if !LJ_FR2 | ||
876 | } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { | 961 | } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
877 | /* Overwrite tag with frame link. */ | 962 | /* Overwrite tag with frame link. */ |
878 | o->fr.tp.ftsz = snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0; | 963 | setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); |
879 | L->base = o+1; | 964 | L->base = o+1; |
965 | #endif | ||
966 | } else if ((sn & SNAP_KEYINDEX)) { | ||
967 | /* A IRT_INT key index slot is restored as a number. Undo this. */ | ||
968 | o->u32.lo = (uint32_t)(LJ_DUALNUM ? intV(o) : lj_num2int(numV(o))); | ||
969 | o->u32.hi = LJ_KEYINDEX; | ||
880 | } | 970 | } |
881 | } | 971 | } |
882 | } | 972 | } |
883 | lua_assert(map + nent == flinks); | 973 | #if LJ_FR2 |
974 | L->base += (map[nent+LJ_BE] & 0xff); | ||
975 | #endif | ||
976 | lj_assertJ(map + nent == flinks, "inconsistent frames in snapshot"); | ||
884 | 977 | ||
885 | /* Compute current stack top. */ | 978 | /* Compute current stack top. */ |
886 | switch (bc_op(*pc)) { | 979 | switch (bc_op(*pc)) { |
diff --git a/src/lj_snap.h b/src/lj_snap.h index 03cf9038..b7dabed8 100644 --- a/src/lj_snap.h +++ b/src/lj_snap.h | |||
@@ -13,7 +13,8 @@ | |||
13 | LJ_FUNC void lj_snap_add(jit_State *J); | 13 | LJ_FUNC void lj_snap_add(jit_State *J); |
14 | LJ_FUNC void lj_snap_purge(jit_State *J); | 14 | LJ_FUNC void lj_snap_purge(jit_State *J); |
15 | LJ_FUNC void lj_snap_shrink(jit_State *J); | 15 | LJ_FUNC void lj_snap_shrink(jit_State *J); |
16 | LJ_FUNC IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir); | 16 | LJ_FUNC IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno, |
17 | IRIns *ir); | ||
17 | LJ_FUNC void lj_snap_replay(jit_State *J, GCtrace *T); | 18 | LJ_FUNC void lj_snap_replay(jit_State *J, GCtrace *T); |
18 | LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr); | 19 | LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr); |
19 | LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need); | 20 | LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need); |
diff --git a/src/lj_state.c b/src/lj_state.c index 1e2cfde9..0b9c46ba 100644 --- a/src/lj_state.c +++ b/src/lj_state.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include "lj_obj.h" | 12 | #include "lj_obj.h" |
13 | #include "lj_gc.h" | 13 | #include "lj_gc.h" |
14 | #include "lj_err.h" | 14 | #include "lj_err.h" |
15 | #include "lj_buf.h" | ||
15 | #include "lj_str.h" | 16 | #include "lj_str.h" |
16 | #include "lj_tab.h" | 17 | #include "lj_tab.h" |
17 | #include "lj_func.h" | 18 | #include "lj_func.h" |
@@ -24,8 +25,10 @@ | |||
24 | #include "lj_trace.h" | 25 | #include "lj_trace.h" |
25 | #include "lj_dispatch.h" | 26 | #include "lj_dispatch.h" |
26 | #include "lj_vm.h" | 27 | #include "lj_vm.h" |
28 | #include "lj_prng.h" | ||
27 | #include "lj_lex.h" | 29 | #include "lj_lex.h" |
28 | #include "lj_alloc.h" | 30 | #include "lj_alloc.h" |
31 | #include "luajit.h" | ||
29 | 32 | ||
30 | /* -- Stack handling ------------------------------------------------------ */ | 33 | /* -- Stack handling ------------------------------------------------------ */ |
31 | 34 | ||
@@ -47,6 +50,7 @@ | |||
47 | ** one extra slot if mobj is not a function. Only lj_meta_tset needs 5 | 50 | ** one extra slot if mobj is not a function. Only lj_meta_tset needs 5 |
48 | ** slots above top, but then mobj is always a function. So we can get by | 51 | ** slots above top, but then mobj is always a function. So we can get by |
49 | ** with 5 extra slots. | 52 | ** with 5 extra slots. |
53 | ** LJ_FR2: We need 2 more slots for the frame PC and the continuation PC. | ||
50 | */ | 54 | */ |
51 | 55 | ||
52 | /* Resize stack slots and adjust pointers in state. */ | 56 | /* Resize stack slots and adjust pointers in state. */ |
@@ -57,9 +61,10 @@ static void resizestack(lua_State *L, MSize n) | |||
57 | MSize oldsize = L->stacksize; | 61 | MSize oldsize = L->stacksize; |
58 | MSize realsize = n + 1 + LJ_STACK_EXTRA; | 62 | MSize realsize = n + 1 + LJ_STACK_EXTRA; |
59 | GCobj *up; | 63 | GCobj *up; |
60 | lua_assert((MSize)(tvref(L->maxstack)-oldst)==L->stacksize-LJ_STACK_EXTRA-1); | 64 | lj_assertL((MSize)(tvref(L->maxstack)-oldst) == L->stacksize-LJ_STACK_EXTRA-1, |
65 | "inconsistent stack size"); | ||
61 | st = (TValue *)lj_mem_realloc(L, tvref(L->stack), | 66 | st = (TValue *)lj_mem_realloc(L, tvref(L->stack), |
62 | (MSize)(L->stacksize*sizeof(TValue)), | 67 | (MSize)(oldsize*sizeof(TValue)), |
63 | (MSize)(realsize*sizeof(TValue))); | 68 | (MSize)(realsize*sizeof(TValue))); |
64 | setmref(L->stack, st); | 69 | setmref(L->stack, st); |
65 | delta = (char *)st - (char *)oldst; | 70 | delta = (char *)st - (char *)oldst; |
@@ -67,12 +72,12 @@ static void resizestack(lua_State *L, MSize n) | |||
67 | while (oldsize < realsize) /* Clear new slots. */ | 72 | while (oldsize < realsize) /* Clear new slots. */ |
68 | setnilV(st + oldsize++); | 73 | setnilV(st + oldsize++); |
69 | L->stacksize = realsize; | 74 | L->stacksize = realsize; |
75 | if ((size_t)(mref(G(L)->jit_base, char) - (char *)oldst) < oldsize) | ||
76 | setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta); | ||
70 | L->base = (TValue *)((char *)L->base + delta); | 77 | L->base = (TValue *)((char *)L->base + delta); |
71 | L->top = (TValue *)((char *)L->top + delta); | 78 | L->top = (TValue *)((char *)L->top + delta); |
72 | for (up = gcref(L->openupval); up != NULL; up = gcnext(up)) | 79 | for (up = gcref(L->openupval); up != NULL; up = gcnext(up)) |
73 | setmref(gco2uv(up)->v, (TValue *)((char *)uvval(gco2uv(up)) + delta)); | 80 | setmref(gco2uv(up)->v, (TValue *)((char *)uvval(gco2uv(up)) + delta)); |
74 | if (obj2gco(L) == gcref(G(L)->jit_L)) | ||
75 | setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta); | ||
76 | } | 81 | } |
77 | 82 | ||
78 | /* Relimit stack after error, in case the limit was overdrawn. */ | 83 | /* Relimit stack after error, in case the limit was overdrawn. */ |
@@ -89,7 +94,8 @@ void lj_state_shrinkstack(lua_State *L, MSize used) | |||
89 | return; /* Avoid stack shrinking while handling stack overflow. */ | 94 | return; /* Avoid stack shrinking while handling stack overflow. */ |
90 | if (4*used < L->stacksize && | 95 | if (4*used < L->stacksize && |
91 | 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize && | 96 | 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize && |
92 | obj2gco(L) != gcref(G(L)->jit_L)) /* Don't shrink stack of live trace. */ | 97 | /* Don't shrink stack of live trace. */ |
98 | (tvref(G(L)->jit_base) == NULL || obj2gco(L) != gcref(G(L)->cur_L))) | ||
93 | resizestack(L, L->stacksize >> 1); | 99 | resizestack(L, L->stacksize >> 1); |
94 | } | 100 | } |
95 | 101 | ||
@@ -125,8 +131,9 @@ static void stack_init(lua_State *L1, lua_State *L) | |||
125 | L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA; | 131 | L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA; |
126 | stend = st + L1->stacksize; | 132 | stend = st + L1->stacksize; |
127 | setmref(L1->maxstack, stend - LJ_STACK_EXTRA - 1); | 133 | setmref(L1->maxstack, stend - LJ_STACK_EXTRA - 1); |
128 | L1->base = L1->top = st+1; | 134 | setthreadV(L1, st++, L1); /* Needed for curr_funcisL() on empty stack. */ |
129 | setthreadV(L1, st, L1); /* Needed for curr_funcisL() on empty stack. */ | 135 | if (LJ_FR2) setnilV(st++); |
136 | L1->base = L1->top = st; | ||
130 | while (st < stend) /* Clear new slots. */ | 137 | while (st < stend) /* Clear new slots. */ |
131 | setnilV(st++); | 138 | setnilV(st++); |
132 | } | 139 | } |
@@ -143,12 +150,13 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud) | |||
143 | /* NOBARRIER: State initialization, all objects are white. */ | 150 | /* NOBARRIER: State initialization, all objects are white. */ |
144 | setgcref(L->env, obj2gco(lj_tab_new(L, 0, LJ_MIN_GLOBAL))); | 151 | setgcref(L->env, obj2gco(lj_tab_new(L, 0, LJ_MIN_GLOBAL))); |
145 | settabV(L, registry(L), lj_tab_new(L, 0, LJ_MIN_REGISTRY)); | 152 | settabV(L, registry(L), lj_tab_new(L, 0, LJ_MIN_REGISTRY)); |
146 | lj_str_resize(L, LJ_MIN_STRTAB-1); | 153 | lj_str_init(L); |
147 | lj_meta_init(L); | 154 | lj_meta_init(L); |
148 | lj_lex_init(L); | 155 | lj_lex_init(L); |
149 | fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */ | 156 | fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */ |
150 | g->gc.threshold = 4*g->gc.total; | 157 | g->gc.threshold = 4*g->gc.total; |
151 | lj_trace_initstate(g); | 158 | lj_trace_initstate(g); |
159 | lj_err_verify(); | ||
152 | return NULL; | 160 | return NULL; |
153 | } | 161 | } |
154 | 162 | ||
@@ -157,16 +165,25 @@ static void close_state(lua_State *L) | |||
157 | global_State *g = G(L); | 165 | global_State *g = G(L); |
158 | lj_func_closeuv(L, tvref(L->stack)); | 166 | lj_func_closeuv(L, tvref(L->stack)); |
159 | lj_gc_freeall(g); | 167 | lj_gc_freeall(g); |
160 | lua_assert(gcref(g->gc.root) == obj2gco(L)); | 168 | lj_assertG(gcref(g->gc.root) == obj2gco(L), |
161 | lua_assert(g->strnum == 0); | 169 | "main thread is not first GC object"); |
170 | lj_assertG(g->str.num == 0, "leaked %d strings", g->str.num); | ||
162 | lj_trace_freestate(g); | 171 | lj_trace_freestate(g); |
163 | #if LJ_HASFFI | 172 | #if LJ_HASFFI |
164 | lj_ctype_freestate(g); | 173 | lj_ctype_freestate(g); |
165 | #endif | 174 | #endif |
166 | lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); | 175 | lj_str_freetab(g); |
167 | lj_str_freebuf(g, &g->tmpbuf); | 176 | lj_buf_free(g, &g->tmpbuf); |
168 | lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); | 177 | lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); |
169 | lua_assert(g->gc.total == sizeof(GG_State)); | 178 | #if LJ_64 |
179 | if (mref(g->gc.lightudseg, uint32_t)) { | ||
180 | MSize segnum = g->gc.lightudnum ? (2 << lj_fls(g->gc.lightudnum)) : 2; | ||
181 | lj_mem_freevec(g, mref(g->gc.lightudseg, uint32_t), segnum, uint32_t); | ||
182 | } | ||
183 | #endif | ||
184 | lj_assertG(g->gc.total == sizeof(GG_State), | ||
185 | "memory leak of %lld bytes", | ||
186 | (long long)(g->gc.total - sizeof(GG_State))); | ||
170 | #ifndef LUAJIT_USE_SYSMALLOC | 187 | #ifndef LUAJIT_USE_SYSMALLOC |
171 | if (g->allocf == lj_alloc_f) | 188 | if (g->allocf == lj_alloc_f) |
172 | lj_alloc_destroy(g->allocd); | 189 | lj_alloc_destroy(g->allocd); |
@@ -175,17 +192,34 @@ static void close_state(lua_State *L) | |||
175 | g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0); | 192 | g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0); |
176 | } | 193 | } |
177 | 194 | ||
178 | #if LJ_64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) | 195 | #if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) |
179 | lua_State *lj_state_newstate(lua_Alloc f, void *ud) | 196 | lua_State *lj_state_newstate(lua_Alloc allocf, void *allocd) |
180 | #else | 197 | #else |
181 | LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) | 198 | LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd) |
182 | #endif | 199 | #endif |
183 | { | 200 | { |
184 | GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State)); | 201 | PRNGState prng; |
185 | lua_State *L = &GG->L; | 202 | GG_State *GG; |
186 | global_State *g = &GG->g; | 203 | lua_State *L; |
187 | if (GG == NULL || !checkptr32(GG)) return NULL; | 204 | global_State *g; |
205 | /* We need the PRNG for the memory allocator, so initialize this first. */ | ||
206 | if (!lj_prng_seed_secure(&prng)) { | ||
207 | lj_assertX(0, "secure PRNG seeding failed"); | ||
208 | /* Can only return NULL here, so this errors with "not enough memory". */ | ||
209 | return NULL; | ||
210 | } | ||
211 | #ifndef LUAJIT_USE_SYSMALLOC | ||
212 | if (allocf == LJ_ALLOCF_INTERNAL) { | ||
213 | allocd = lj_alloc_create(&prng); | ||
214 | if (!allocd) return NULL; | ||
215 | allocf = lj_alloc_f; | ||
216 | } | ||
217 | #endif | ||
218 | GG = (GG_State *)allocf(allocd, NULL, 0, sizeof(GG_State)); | ||
219 | if (GG == NULL || !checkptrGC(GG)) return NULL; | ||
188 | memset(GG, 0, sizeof(GG_State)); | 220 | memset(GG, 0, sizeof(GG_State)); |
221 | L = &GG->L; | ||
222 | g = &GG->g; | ||
189 | L->gct = ~LJ_TTHREAD; | 223 | L->gct = ~LJ_TTHREAD; |
190 | L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */ | 224 | L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */ |
191 | L->dummy_ffid = FF_C; | 225 | L->dummy_ffid = FF_C; |
@@ -193,17 +227,25 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) | |||
193 | g->gc.currentwhite = LJ_GC_WHITE0 | LJ_GC_FIXED; | 227 | g->gc.currentwhite = LJ_GC_WHITE0 | LJ_GC_FIXED; |
194 | g->strempty.marked = LJ_GC_WHITE0; | 228 | g->strempty.marked = LJ_GC_WHITE0; |
195 | g->strempty.gct = ~LJ_TSTR; | 229 | g->strempty.gct = ~LJ_TSTR; |
196 | g->allocf = f; | 230 | g->allocf = allocf; |
197 | g->allocd = ud; | 231 | g->allocd = allocd; |
232 | g->prng = prng; | ||
233 | #ifndef LUAJIT_USE_SYSMALLOC | ||
234 | if (allocf == lj_alloc_f) { | ||
235 | lj_alloc_setprng(allocd, &g->prng); | ||
236 | } | ||
237 | #endif | ||
198 | setgcref(g->mainthref, obj2gco(L)); | 238 | setgcref(g->mainthref, obj2gco(L)); |
199 | setgcref(g->uvhead.prev, obj2gco(&g->uvhead)); | 239 | setgcref(g->uvhead.prev, obj2gco(&g->uvhead)); |
200 | setgcref(g->uvhead.next, obj2gco(&g->uvhead)); | 240 | setgcref(g->uvhead.next, obj2gco(&g->uvhead)); |
201 | g->strmask = ~(MSize)0; | 241 | g->str.mask = ~(MSize)0; |
202 | setnilV(registry(L)); | 242 | setnilV(registry(L)); |
203 | setnilV(&g->nilnode.val); | 243 | setnilV(&g->nilnode.val); |
204 | setnilV(&g->nilnode.key); | 244 | setnilV(&g->nilnode.key); |
245 | #if !LJ_GC64 | ||
205 | setmref(g->nilnode.freetop, &g->nilnode); | 246 | setmref(g->nilnode.freetop, &g->nilnode); |
206 | lj_str_initbuf(&g->tmpbuf); | 247 | #endif |
248 | lj_buf_init(NULL, &g->tmpbuf); | ||
207 | g->gc.state = GCSpause; | 249 | g->gc.state = GCSpause; |
208 | setgcref(g->gc.root, obj2gco(L)); | 250 | setgcref(g->gc.root, obj2gco(L)); |
209 | setmref(g->gc.sweep, &g->gc.root); | 251 | setmref(g->gc.sweep, &g->gc.root); |
@@ -217,7 +259,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) | |||
217 | close_state(L); | 259 | close_state(L); |
218 | return NULL; | 260 | return NULL; |
219 | } | 261 | } |
220 | L->status = 0; | 262 | L->status = LUA_OK; |
221 | return L; | 263 | return L; |
222 | } | 264 | } |
223 | 265 | ||
@@ -236,6 +278,10 @@ LUA_API void lua_close(lua_State *L) | |||
236 | global_State *g = G(L); | 278 | global_State *g = G(L); |
237 | int i; | 279 | int i; |
238 | L = mainthread(g); /* Only the main thread can be closed. */ | 280 | L = mainthread(g); /* Only the main thread can be closed. */ |
281 | #if LJ_HASPROFILE | ||
282 | luaJIT_profile_stop(L); | ||
283 | #endif | ||
284 | setgcrefnull(g->cur_L); | ||
239 | lj_func_closeuv(L, tvref(L->stack)); | 285 | lj_func_closeuv(L, tvref(L->stack)); |
240 | lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */ | 286 | lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */ |
241 | #if LJ_HASJIT | 287 | #if LJ_HASJIT |
@@ -245,10 +291,10 @@ LUA_API void lua_close(lua_State *L) | |||
245 | #endif | 291 | #endif |
246 | for (i = 0;;) { | 292 | for (i = 0;;) { |
247 | hook_enter(g); | 293 | hook_enter(g); |
248 | L->status = 0; | 294 | L->status = LUA_OK; |
295 | L->base = L->top = tvref(L->stack) + 1 + LJ_FR2; | ||
249 | L->cframe = NULL; | 296 | L->cframe = NULL; |
250 | L->base = L->top = tvref(L->stack) + 1; | 297 | if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == LUA_OK) { |
251 | if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == 0) { | ||
252 | if (++i >= 10) break; | 298 | if (++i >= 10) break; |
253 | lj_gc_separateudata(g, 1); /* Separate udata again. */ | 299 | lj_gc_separateudata(g, 1); /* Separate udata again. */ |
254 | if (gcref(g->gc.mmudata) == NULL) /* Until nothing is left to do. */ | 300 | if (gcref(g->gc.mmudata) == NULL) /* Until nothing is left to do. */ |
@@ -263,7 +309,7 @@ lua_State *lj_state_new(lua_State *L) | |||
263 | lua_State *L1 = lj_mem_newobj(L, lua_State); | 309 | lua_State *L1 = lj_mem_newobj(L, lua_State); |
264 | L1->gct = ~LJ_TTHREAD; | 310 | L1->gct = ~LJ_TTHREAD; |
265 | L1->dummy_ffid = FF_C; | 311 | L1->dummy_ffid = FF_C; |
266 | L1->status = 0; | 312 | L1->status = LUA_OK; |
267 | L1->stacksize = 0; | 313 | L1->stacksize = 0; |
268 | setmref(L1->stack, NULL); | 314 | setmref(L1->stack, NULL); |
269 | L1->cframe = NULL; | 315 | L1->cframe = NULL; |
@@ -272,15 +318,17 @@ lua_State *lj_state_new(lua_State *L) | |||
272 | setmrefr(L1->glref, L->glref); | 318 | setmrefr(L1->glref, L->glref); |
273 | setgcrefr(L1->env, L->env); | 319 | setgcrefr(L1->env, L->env); |
274 | stack_init(L1, L); /* init stack */ | 320 | stack_init(L1, L); /* init stack */ |
275 | lua_assert(iswhite(obj2gco(L1))); | 321 | lj_assertL(iswhite(obj2gco(L1)), "new thread object is not white"); |
276 | return L1; | 322 | return L1; |
277 | } | 323 | } |
278 | 324 | ||
279 | void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) | 325 | void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) |
280 | { | 326 | { |
281 | lua_assert(L != mainthread(g)); | 327 | lj_assertG(L != mainthread(g), "free of main thread"); |
328 | if (obj2gco(L) == gcref(g->cur_L)) | ||
329 | setgcrefnull(g->cur_L); | ||
282 | lj_func_closeuv(L, tvref(L->stack)); | 330 | lj_func_closeuv(L, tvref(L->stack)); |
283 | lua_assert(gcref(L->openupval) == NULL); | 331 | lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues"); |
284 | lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); | 332 | lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); |
285 | lj_mem_freet(g, L); | 333 | lj_mem_freet(g, L); |
286 | } | 334 | } |
diff --git a/src/lj_state.h b/src/lj_state.h index 48c4d700..d22b7a6f 100644 --- a/src/lj_state.h +++ b/src/lj_state.h | |||
@@ -28,8 +28,10 @@ static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) | |||
28 | 28 | ||
29 | LJ_FUNC lua_State *lj_state_new(lua_State *L); | 29 | LJ_FUNC lua_State *lj_state_new(lua_State *L); |
30 | LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L); | 30 | LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L); |
31 | #if LJ_64 | 31 | #if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) |
32 | LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud); | 32 | LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud); |
33 | #endif | 33 | #endif |
34 | 34 | ||
35 | #define LJ_ALLOCF_INTERNAL ((lua_Alloc)(void *)(uintptr_t)(1237<<4)) | ||
36 | |||
35 | #endif | 37 | #endif |
diff --git a/src/lj_str.c b/src/lj_str.c index 60912aed..a5282da6 100644 --- a/src/lj_str.c +++ b/src/lj_str.c | |||
@@ -1,13 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | ** String handling. | 2 | ** String handling. |
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | 3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h |
4 | ** | ||
5 | ** Portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | 4 | */ |
8 | 5 | ||
9 | #include <stdio.h> | ||
10 | |||
11 | #define lj_str_c | 6 | #define lj_str_c |
12 | #define LUA_CORE | 7 | #define LUA_CORE |
13 | 8 | ||
@@ -15,10 +10,10 @@ | |||
15 | #include "lj_gc.h" | 10 | #include "lj_gc.h" |
16 | #include "lj_err.h" | 11 | #include "lj_err.h" |
17 | #include "lj_str.h" | 12 | #include "lj_str.h" |
18 | #include "lj_state.h" | ||
19 | #include "lj_char.h" | 13 | #include "lj_char.h" |
14 | #include "lj_prng.h" | ||
20 | 15 | ||
21 | /* -- String interning ---------------------------------------------------- */ | 16 | /* -- String helpers ------------------------------------------------------ */ |
22 | 17 | ||
23 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ | 18 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ |
24 | int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) | 19 | int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) |
@@ -43,297 +38,333 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) | |||
43 | return (int32_t)(a->len - b->len); | 38 | return (int32_t)(a->len - b->len); |
44 | } | 39 | } |
45 | 40 | ||
46 | /* Fast string data comparison. Caveat: unaligned access to 1st string! */ | 41 | /* Find fixed string p inside string s. */ |
47 | static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) | 42 | const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen) |
48 | { | 43 | { |
49 | MSize i = 0; | 44 | if (plen <= slen) { |
50 | lua_assert(len > 0); | 45 | if (plen == 0) { |
51 | lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4); | 46 | return s; |
52 | do { /* Note: innocuous access up to end of string + 3. */ | 47 | } else { |
53 | uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i); | 48 | int c = *(const uint8_t *)p++; |
54 | if (v) { | 49 | plen--; slen -= plen; |
55 | i -= len; | 50 | while (slen) { |
56 | #if LJ_LE | 51 | const char *q = (const char *)memchr(s, c, slen); |
57 | return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1; | 52 | if (!q) break; |
58 | #else | 53 | if (memcmp(q+1, p, plen) == 0) return q; |
59 | return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1; | 54 | q++; slen -= (MSize)(q-s); s = q; |
60 | #endif | 55 | } |
61 | } | 56 | } |
62 | i += 4; | 57 | } |
63 | } while (i < len); | 58 | return NULL; |
64 | return 0; | ||
65 | } | 59 | } |
66 | 60 | ||
67 | /* Resize the string hash table (grow and shrink). */ | 61 | /* Check whether a string has a pattern matching character. */ |
68 | void lj_str_resize(lua_State *L, MSize newmask) | 62 | int lj_str_haspattern(GCstr *s) |
69 | { | 63 | { |
70 | global_State *g = G(L); | 64 | const char *p = strdata(s), *q = p + s->len; |
71 | GCRef *newhash; | 65 | while (p < q) { |
72 | MSize i; | 66 | int c = *(const uint8_t *)p++; |
73 | if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) | 67 | if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c)) |
74 | return; /* No resizing during GC traversal or if already too big. */ | 68 | return 1; /* Found a pattern matching char. */ |
75 | newhash = lj_mem_newvec(L, newmask+1, GCRef); | ||
76 | memset(newhash, 0, (newmask+1)*sizeof(GCRef)); | ||
77 | for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */ | ||
78 | GCobj *p = gcref(g->strhash[i]); | ||
79 | while (p) { /* Follow each hash chain and reinsert all strings. */ | ||
80 | MSize h = gco2str(p)->hash & newmask; | ||
81 | GCobj *next = gcnext(p); | ||
82 | /* NOBARRIER: The string table is a GC root. */ | ||
83 | setgcrefr(p->gch.nextgc, newhash[h]); | ||
84 | setgcref(newhash[h], p); | ||
85 | p = next; | ||
86 | } | ||
87 | } | 69 | } |
88 | lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); | 70 | return 0; /* No pattern matching chars found. */ |
89 | g->strmask = newmask; | ||
90 | g->strhash = newhash; | ||
91 | } | 71 | } |
92 | 72 | ||
93 | /* Intern a string and return string object. */ | 73 | /* -- String hashing ------------------------------------------------------ */ |
94 | GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) | 74 | |
75 | /* Keyed sparse ARX string hash. Constant time. */ | ||
76 | static StrHash hash_sparse(uint64_t seed, const char *str, MSize len) | ||
95 | { | 77 | { |
96 | global_State *g; | 78 | /* Constants taken from lookup3 hash by Bob Jenkins. */ |
97 | GCstr *s; | 79 | StrHash a, b, h = len ^ (StrHash)seed; |
98 | GCobj *o; | ||
99 | MSize len = (MSize)lenx; | ||
100 | MSize a, b, h = len; | ||
101 | if (lenx >= LJ_MAX_STR) | ||
102 | lj_err_msg(L, LJ_ERR_STROV); | ||
103 | g = G(L); | ||
104 | /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */ | ||
105 | if (len >= 4) { /* Caveat: unaligned access! */ | 80 | if (len >= 4) { /* Caveat: unaligned access! */ |
106 | a = lj_getu32(str); | 81 | a = lj_getu32(str); |
107 | h ^= lj_getu32(str+len-4); | 82 | h ^= lj_getu32(str+len-4); |
108 | b = lj_getu32(str+(len>>1)-2); | 83 | b = lj_getu32(str+(len>>1)-2); |
109 | h ^= b; h -= lj_rol(b, 14); | 84 | h ^= b; h -= lj_rol(b, 14); |
110 | b += lj_getu32(str+(len>>2)-1); | 85 | b += lj_getu32(str+(len>>2)-1); |
111 | } else if (len > 0) { | 86 | } else { |
112 | a = *(const uint8_t *)str; | 87 | a = *(const uint8_t *)str; |
113 | h ^= *(const uint8_t *)(str+len-1); | 88 | h ^= *(const uint8_t *)(str+len-1); |
114 | b = *(const uint8_t *)(str+(len>>1)); | 89 | b = *(const uint8_t *)(str+(len>>1)); |
115 | h ^= b; h -= lj_rol(b, 14); | 90 | h ^= b; h -= lj_rol(b, 14); |
116 | } else { | ||
117 | return &g->strempty; | ||
118 | } | 91 | } |
119 | a ^= h; a -= lj_rol(h, 11); | 92 | a ^= h; a -= lj_rol(h, 11); |
120 | b ^= a; b -= lj_rol(a, 25); | 93 | b ^= a; b -= lj_rol(a, 25); |
121 | h ^= b; h -= lj_rol(b, 16); | 94 | h ^= b; h -= lj_rol(b, 16); |
122 | /* Check if the string has already been interned. */ | 95 | return h; |
123 | o = gcref(g->strhash[h & g->strmask]); | ||
124 | if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) { | ||
125 | while (o != NULL) { | ||
126 | GCstr *sx = gco2str(o); | ||
127 | if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) { | ||
128 | /* Resurrect if dead. Can only happen with fixstring() (keywords). */ | ||
129 | if (isdead(g, o)) flipwhite(o); | ||
130 | return sx; /* Return existing string. */ | ||
131 | } | ||
132 | o = gcnext(o); | ||
133 | } | ||
134 | } else { /* Slow path: end of string is too close to a page boundary. */ | ||
135 | while (o != NULL) { | ||
136 | GCstr *sx = gco2str(o); | ||
137 | if (sx->len == len && memcmp(str, strdata(sx), len) == 0) { | ||
138 | /* Resurrect if dead. Can only happen with fixstring() (keywords). */ | ||
139 | if (isdead(g, o)) flipwhite(o); | ||
140 | return sx; /* Return existing string. */ | ||
141 | } | ||
142 | o = gcnext(o); | ||
143 | } | ||
144 | } | ||
145 | /* Nope, create a new string. */ | ||
146 | s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr); | ||
147 | newwhite(g, s); | ||
148 | s->gct = ~LJ_TSTR; | ||
149 | s->len = len; | ||
150 | s->hash = h; | ||
151 | s->reserved = 0; | ||
152 | memcpy(strdatawr(s), str, len); | ||
153 | strdatawr(s)[len] = '\0'; /* Zero-terminate string. */ | ||
154 | /* Add it to string hash table. */ | ||
155 | h &= g->strmask; | ||
156 | s->nextgc = g->strhash[h]; | ||
157 | /* NOBARRIER: The string table is a GC root. */ | ||
158 | setgcref(g->strhash[h], obj2gco(s)); | ||
159 | if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */ | ||
160 | lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */ | ||
161 | return s; /* Return newly interned string. */ | ||
162 | } | 96 | } |
163 | 97 | ||
164 | void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) | 98 | #if LUAJIT_SECURITY_STRHASH |
99 | /* Keyed dense ARX string hash. Linear time. */ | ||
100 | static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h, | ||
101 | const char *str, MSize len) | ||
165 | { | 102 | { |
166 | g->strnum--; | 103 | StrHash b = lj_bswap(lj_rol(h ^ (StrHash)(seed >> 32), 4)); |
167 | lj_mem_free(g, s, sizestring(s)); | 104 | if (len > 12) { |
105 | StrHash a = (StrHash)seed; | ||
106 | const char *pe = str+len-12, *p = pe, *q = str; | ||
107 | do { | ||
108 | a += lj_getu32(p); | ||
109 | b += lj_getu32(p+4); | ||
110 | h += lj_getu32(p+8); | ||
111 | p = q; q += 12; | ||
112 | h ^= b; h -= lj_rol(b, 14); | ||
113 | a ^= h; a -= lj_rol(h, 11); | ||
114 | b ^= a; b -= lj_rol(a, 25); | ||
115 | } while (p < pe); | ||
116 | h ^= b; h -= lj_rol(b, 16); | ||
117 | a ^= h; a -= lj_rol(h, 4); | ||
118 | b ^= a; b -= lj_rol(a, 14); | ||
119 | } | ||
120 | return b; | ||
168 | } | 121 | } |
122 | #endif | ||
169 | 123 | ||
170 | /* -- Type conversions ---------------------------------------------------- */ | 124 | /* -- String interning ---------------------------------------------------- */ |
171 | 125 | ||
172 | /* Print number to buffer. Canonicalizes non-finite values. */ | 126 | #define LJ_STR_MAXCOLL 32 |
173 | size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o) | ||
174 | { | ||
175 | if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */ | ||
176 | lua_Number n = o->n; | ||
177 | #if __BIONIC__ | ||
178 | if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; } | ||
179 | #endif | ||
180 | return (size_t)lua_number2str(s, n); | ||
181 | } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) { | ||
182 | s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3; | ||
183 | } else if ((o->u32.hi & 0x80000000) == 0) { | ||
184 | s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3; | ||
185 | } else { | ||
186 | s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4; | ||
187 | } | ||
188 | } | ||
189 | 127 | ||
190 | /* Print integer to buffer. Returns pointer to start. */ | 128 | /* Resize the string interning hash table (grow and shrink). */ |
191 | char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k) | 129 | void lj_str_resize(lua_State *L, MSize newmask) |
192 | { | 130 | { |
193 | uint32_t u = (uint32_t)(k < 0 ? -k : k); | 131 | global_State *g = G(L); |
194 | p += 1+10; | 132 | GCRef *newtab, *oldtab = g->str.tab; |
195 | do { *--p = (char)('0' + u % 10); } while (u /= 10); | 133 | MSize i; |
196 | if (k < 0) *--p = '-'; | ||
197 | return p; | ||
198 | } | ||
199 | 134 | ||
200 | /* Convert number to string. */ | 135 | /* No resizing during GC traversal or if already too big. */ |
201 | GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np) | 136 | if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) |
202 | { | 137 | return; |
203 | char buf[LJ_STR_NUMBUF]; | ||
204 | size_t len = lj_str_bufnum(buf, (TValue *)np); | ||
205 | return lj_str_new(L, buf, len); | ||
206 | } | ||
207 | 138 | ||
208 | /* Convert integer to string. */ | 139 | newtab = lj_mem_newvec(L, newmask+1, GCRef); |
209 | GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k) | 140 | memset(newtab, 0, (newmask+1)*sizeof(GCRef)); |
210 | { | ||
211 | char s[1+10]; | ||
212 | char *p = lj_str_bufint(s, k); | ||
213 | return lj_str_new(L, p, (size_t)(s+sizeof(s)-p)); | ||
214 | } | ||
215 | 141 | ||
216 | GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o) | 142 | #if LUAJIT_SECURITY_STRHASH |
217 | { | 143 | /* Check which chains need secondary hashes. */ |
218 | return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n); | 144 | if (g->str.second) { |
219 | } | 145 | int newsecond = 0; |
146 | /* Compute primary chain lengths. */ | ||
147 | for (i = g->str.mask; i != ~(MSize)0; i--) { | ||
148 | GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1); | ||
149 | while (o) { | ||
150 | GCstr *s = gco2str(o); | ||
151 | MSize hash = s->hashalg ? hash_sparse(g->str.seed, strdata(s), s->len) : | ||
152 | s->hash; | ||
153 | hash &= newmask; | ||
154 | setgcrefp(newtab[hash], gcrefu(newtab[hash]) + 1); | ||
155 | o = gcnext(o); | ||
156 | } | ||
157 | } | ||
158 | /* Mark secondary chains. */ | ||
159 | for (i = newmask; i != ~(MSize)0; i--) { | ||
160 | int secondary = gcrefu(newtab[i]) > LJ_STR_MAXCOLL; | ||
161 | newsecond |= secondary; | ||
162 | setgcrefp(newtab[i], secondary); | ||
163 | } | ||
164 | g->str.second = newsecond; | ||
165 | } | ||
166 | #endif | ||
220 | 167 | ||
221 | /* -- String formatting --------------------------------------------------- */ | 168 | /* Reinsert all strings from the old table into the new table. */ |
169 | for (i = g->str.mask; i != ~(MSize)0; i--) { | ||
170 | GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1); | ||
171 | while (o) { | ||
172 | GCobj *next = gcnext(o); | ||
173 | GCstr *s = gco2str(o); | ||
174 | MSize hash = s->hash; | ||
175 | #if LUAJIT_SECURITY_STRHASH | ||
176 | uintptr_t u; | ||
177 | if (LJ_LIKELY(!s->hashalg)) { /* String hashed with primary hash. */ | ||
178 | hash &= newmask; | ||
179 | u = gcrefu(newtab[hash]); | ||
180 | if (LJ_UNLIKELY(u & 1)) { /* Switch string to secondary hash. */ | ||
181 | s->hash = hash = hash_dense(g->str.seed, s->hash, strdata(s), s->len); | ||
182 | s->hashalg = 1; | ||
183 | hash &= newmask; | ||
184 | u = gcrefu(newtab[hash]); | ||
185 | } | ||
186 | } else { /* String hashed with secondary hash. */ | ||
187 | MSize shash = hash_sparse(g->str.seed, strdata(s), s->len); | ||
188 | u = gcrefu(newtab[shash & newmask]); | ||
189 | if (u & 1) { | ||
190 | hash &= newmask; | ||
191 | u = gcrefu(newtab[hash]); | ||
192 | } else { /* Revert string back to primary hash. */ | ||
193 | s->hash = shash; | ||
194 | s->hashalg = 0; | ||
195 | hash = (shash & newmask); | ||
196 | } | ||
197 | } | ||
198 | /* NOBARRIER: The string table is a GC root. */ | ||
199 | setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1)); | ||
200 | setgcrefp(newtab[hash], ((uintptr_t)o | (u & 1))); | ||
201 | #else | ||
202 | hash &= newmask; | ||
203 | /* NOBARRIER: The string table is a GC root. */ | ||
204 | setgcrefr(o->gch.nextgc, newtab[hash]); | ||
205 | setgcref(newtab[hash], o); | ||
206 | #endif | ||
207 | o = next; | ||
208 | } | ||
209 | } | ||
210 | |||
211 | /* Free old table and replace with new table. */ | ||
212 | lj_str_freetab(g); | ||
213 | g->str.tab = newtab; | ||
214 | g->str.mask = newmask; | ||
215 | } | ||
222 | 216 | ||
223 | static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len) | 217 | #if LUAJIT_SECURITY_STRHASH |
218 | /* Rehash and rechain all strings in a chain. */ | ||
219 | static LJ_NOINLINE GCstr *lj_str_rehash_chain(lua_State *L, StrHash hashc, | ||
220 | const char *str, MSize len) | ||
224 | { | 221 | { |
225 | char *p; | 222 | global_State *g = G(L); |
226 | MSize i; | 223 | int ow = g->gc.state == GCSsweepstring ? otherwhite(g) : 0; /* Sweeping? */ |
227 | if (sb->n + len > sb->sz) { | 224 | GCRef *strtab = g->str.tab; |
228 | MSize sz = sb->sz * 2; | 225 | MSize strmask = g->str.mask; |
229 | while (sb->n + len > sz) sz = sz * 2; | 226 | GCobj *o = gcref(strtab[hashc & strmask]); |
230 | lj_str_resizebuf(L, sb, sz); | 227 | setgcrefp(strtab[hashc & strmask], (void *)((uintptr_t)1)); |
228 | g->str.second = 1; | ||
229 | while (o) { | ||
230 | uintptr_t u; | ||
231 | GCobj *next = gcnext(o); | ||
232 | GCstr *s = gco2str(o); | ||
233 | StrHash hash; | ||
234 | if (ow) { /* Must sweep while rechaining. */ | ||
235 | if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* String alive? */ | ||
236 | lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED), | ||
237 | "sweep of undead string"); | ||
238 | makewhite(g, o); | ||
239 | } else { /* Free dead string. */ | ||
240 | lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED, | ||
241 | "sweep of unlive string"); | ||
242 | lj_str_free(g, s); | ||
243 | o = next; | ||
244 | continue; | ||
245 | } | ||
246 | } | ||
247 | hash = s->hash; | ||
248 | if (!s->hashalg) { /* Rehash with secondary hash. */ | ||
249 | hash = hash_dense(g->str.seed, hash, strdata(s), s->len); | ||
250 | s->hash = hash; | ||
251 | s->hashalg = 1; | ||
252 | } | ||
253 | /* Rechain. */ | ||
254 | hash &= strmask; | ||
255 | u = gcrefu(strtab[hash]); | ||
256 | setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1)); | ||
257 | setgcrefp(strtab[hash], ((uintptr_t)o | (u & 1))); | ||
258 | o = next; | ||
231 | } | 259 | } |
232 | p = sb->buf + sb->n; | 260 | /* Try to insert the pending string again. */ |
233 | sb->n += len; | 261 | return lj_str_new(L, str, len); |
234 | for (i = 0; i < len; i++) p[i] = str[i]; | ||
235 | } | 262 | } |
263 | #endif | ||
264 | |||
265 | /* Reseed String ID from PRNG after random interval < 2^bits. */ | ||
266 | #if LUAJIT_SECURITY_STRID == 1 | ||
267 | #define STRID_RESEED_INTERVAL 8 | ||
268 | #elif LUAJIT_SECURITY_STRID == 2 | ||
269 | #define STRID_RESEED_INTERVAL 4 | ||
270 | #elif LUAJIT_SECURITY_STRID >= 3 | ||
271 | #define STRID_RESEED_INTERVAL 0 | ||
272 | #endif | ||
236 | 273 | ||
237 | static void addchar(lua_State *L, SBuf *sb, int c) | 274 | /* Allocate a new string and add to string interning table. */ |
275 | static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len, | ||
276 | StrHash hash, int hashalg) | ||
238 | { | 277 | { |
239 | if (sb->n + 1 > sb->sz) { | 278 | GCstr *s = lj_mem_newt(L, lj_str_size(len), GCstr); |
240 | MSize sz = sb->sz * 2; | 279 | global_State *g = G(L); |
241 | lj_str_resizebuf(L, sb, sz); | 280 | uintptr_t u; |
281 | newwhite(g, s); | ||
282 | s->gct = ~LJ_TSTR; | ||
283 | s->len = len; | ||
284 | s->hash = hash; | ||
285 | #ifndef STRID_RESEED_INTERVAL | ||
286 | s->sid = g->str.id++; | ||
287 | #elif STRID_RESEED_INTERVAL | ||
288 | if (!g->str.idreseed--) { | ||
289 | uint64_t r = lj_prng_u64(&g->prng); | ||
290 | g->str.id = (StrID)r; | ||
291 | g->str.idreseed = (uint8_t)(r >> (64 - STRID_RESEED_INTERVAL)); | ||
242 | } | 292 | } |
243 | sb->buf[sb->n++] = (char)c; | 293 | s->sid = g->str.id++; |
294 | #else | ||
295 | s->sid = (StrID)lj_prng_u64(&g->prng); | ||
296 | #endif | ||
297 | s->reserved = 0; | ||
298 | s->hashalg = (uint8_t)hashalg; | ||
299 | /* Clear last 4 bytes of allocated memory. Implies zero-termination, too. */ | ||
300 | *(uint32_t *)(strdatawr(s)+(len & ~(MSize)3)) = 0; | ||
301 | memcpy(strdatawr(s), str, len); | ||
302 | /* Add to string hash table. */ | ||
303 | hash &= g->str.mask; | ||
304 | u = gcrefu(g->str.tab[hash]); | ||
305 | setgcrefp(s->nextgc, (u & ~(uintptr_t)1)); | ||
306 | /* NOBARRIER: The string table is a GC root. */ | ||
307 | setgcrefp(g->str.tab[hash], ((uintptr_t)s | (u & 1))); | ||
308 | if (g->str.num++ > g->str.mask) /* Allow a 100% load factor. */ | ||
309 | lj_str_resize(L, (g->str.mask<<1)+1); /* Grow string table. */ | ||
310 | return s; /* Return newly interned string. */ | ||
244 | } | 311 | } |
245 | 312 | ||
246 | /* Push formatted message as a string object to Lua stack. va_list variant. */ | 313 | /* Intern a string and return string object. */ |
247 | const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp) | 314 | GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) |
248 | { | 315 | { |
249 | SBuf *sb = &G(L)->tmpbuf; | 316 | global_State *g = G(L); |
250 | lj_str_needbuf(L, sb, (MSize)strlen(fmt)); | 317 | if (lenx-1 < LJ_MAX_STR-1) { |
251 | lj_str_resetbuf(sb); | 318 | MSize len = (MSize)lenx; |
252 | for (;;) { | 319 | StrHash hash = hash_sparse(g->str.seed, str, len); |
253 | const char *e = strchr(fmt, '%'); | 320 | MSize coll = 0; |
254 | if (e == NULL) break; | 321 | int hashalg = 0; |
255 | addstr(L, sb, fmt, (MSize)(e-fmt)); | 322 | /* Check if the string has already been interned. */ |
256 | /* This function only handles %s, %c, %d, %f and %p formats. */ | 323 | GCobj *o = gcref(g->str.tab[hash & g->str.mask]); |
257 | switch (e[1]) { | 324 | #if LUAJIT_SECURITY_STRHASH |
258 | case 's': { | 325 | if (LJ_UNLIKELY((uintptr_t)o & 1)) { /* Secondary hash for this chain? */ |
259 | const char *s = va_arg(argp, char *); | 326 | hashalg = 1; |
260 | if (s == NULL) s = "(null)"; | 327 | hash = hash_dense(g->str.seed, hash, str, len); |
261 | addstr(L, sb, s, (MSize)strlen(s)); | 328 | o = (GCobj *)(gcrefu(g->str.tab[hash & g->str.mask]) & ~(uintptr_t)1); |
262 | break; | 329 | } |
263 | } | ||
264 | case 'c': | ||
265 | addchar(L, sb, va_arg(argp, int)); | ||
266 | break; | ||
267 | case 'd': { | ||
268 | char buf[LJ_STR_INTBUF]; | ||
269 | char *p = lj_str_bufint(buf, va_arg(argp, int32_t)); | ||
270 | addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p)); | ||
271 | break; | ||
272 | } | ||
273 | case 'f': { | ||
274 | char buf[LJ_STR_NUMBUF]; | ||
275 | TValue tv; | ||
276 | MSize len; | ||
277 | tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER)); | ||
278 | len = (MSize)lj_str_bufnum(buf, &tv); | ||
279 | addstr(L, sb, buf, len); | ||
280 | break; | ||
281 | } | ||
282 | case 'p': { | ||
283 | #define FMTP_CHARS (2*sizeof(ptrdiff_t)) | ||
284 | char buf[2+FMTP_CHARS]; | ||
285 | ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *)); | ||
286 | ptrdiff_t i, lasti = 2+FMTP_CHARS; | ||
287 | if (p == 0) { | ||
288 | addstr(L, sb, "NULL", 4); | ||
289 | break; | ||
290 | } | ||
291 | #if LJ_64 | ||
292 | /* Shorten output for 64 bit pointers. */ | ||
293 | lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0); | ||
294 | #endif | 330 | #endif |
295 | buf[0] = '0'; | 331 | while (o != NULL) { |
296 | buf[1] = 'x'; | 332 | GCstr *sx = gco2str(o); |
297 | for (i = lasti-1; i >= 2; i--, p >>= 4) | 333 | if (sx->hash == hash && sx->len == len) { |
298 | buf[i] = "0123456789abcdef"[(p & 15)]; | 334 | if (memcmp(str, strdata(sx), len) == 0) { |
299 | addstr(L, sb, buf, (MSize)lasti); | 335 | if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */ |
300 | break; | 336 | return sx; /* Return existing string. */ |
337 | } | ||
338 | coll++; | ||
301 | } | 339 | } |
302 | case '%': | 340 | coll++; |
303 | addchar(L, sb, '%'); | 341 | o = gcnext(o); |
304 | break; | 342 | } |
305 | default: | 343 | #if LUAJIT_SECURITY_STRHASH |
306 | addchar(L, sb, '%'); | 344 | /* Rehash chain if there are too many collisions. */ |
307 | addchar(L, sb, e[1]); | 345 | if (LJ_UNLIKELY(coll > LJ_STR_MAXCOLL) && !hashalg) { |
308 | break; | 346 | return lj_str_rehash_chain(L, hash, str, len); |
309 | } | 347 | } |
310 | fmt = e+2; | 348 | #endif |
349 | /* Otherwise allocate a new string. */ | ||
350 | return lj_str_alloc(L, str, len, hash, hashalg); | ||
351 | } else { | ||
352 | if (lenx) | ||
353 | lj_err_msg(L, LJ_ERR_STROV); | ||
354 | return &g->strempty; | ||
311 | } | 355 | } |
312 | addstr(L, sb, fmt, (MSize)strlen(fmt)); | ||
313 | setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n)); | ||
314 | incr_top(L); | ||
315 | return strVdata(L->top - 1); | ||
316 | } | 356 | } |
317 | 357 | ||
318 | /* Push formatted message as a string object to Lua stack. Vararg variant. */ | 358 | void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) |
319 | const char *lj_str_pushf(lua_State *L, const char *fmt, ...) | ||
320 | { | 359 | { |
321 | const char *msg; | 360 | g->str.num--; |
322 | va_list argp; | 361 | lj_mem_free(g, s, lj_str_size(s->len)); |
323 | va_start(argp, fmt); | ||
324 | msg = lj_str_pushvf(L, fmt, argp); | ||
325 | va_end(argp); | ||
326 | return msg; | ||
327 | } | 362 | } |
328 | 363 | ||
329 | /* -- Buffer handling ----------------------------------------------------- */ | 364 | void LJ_FASTCALL lj_str_init(lua_State *L) |
330 | |||
331 | char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz) | ||
332 | { | 365 | { |
333 | if (sz > sb->sz) { | 366 | global_State *g = G(L); |
334 | if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF; | 367 | g->str.seed = lj_prng_u64(&g->prng); |
335 | lj_str_resizebuf(L, sb, sz); | 368 | lj_str_resize(L, LJ_MIN_STRTAB-1); |
336 | } | ||
337 | return sb->buf; | ||
338 | } | 369 | } |
339 | 370 | ||
diff --git a/src/lj_str.h b/src/lj_str.h index e304f72f..28edb5a5 100644 --- a/src/lj_str.h +++ b/src/lj_str.h | |||
@@ -10,41 +10,22 @@ | |||
10 | 10 | ||
11 | #include "lj_obj.h" | 11 | #include "lj_obj.h" |
12 | 12 | ||
13 | /* String interning. */ | 13 | /* String helpers. */ |
14 | LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); | 14 | LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); |
15 | LJ_FUNC const char *lj_str_find(const char *s, const char *f, | ||
16 | MSize slen, MSize flen); | ||
17 | LJ_FUNC int lj_str_haspattern(GCstr *s); | ||
18 | |||
19 | /* String interning. */ | ||
15 | LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); | 20 | LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); |
16 | LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); | 21 | LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); |
17 | LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); | 22 | LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); |
23 | LJ_FUNC void LJ_FASTCALL lj_str_init(lua_State *L); | ||
24 | #define lj_str_freetab(g) \ | ||
25 | (lj_mem_freevec(g, g->str.tab, g->str.mask+1, GCRef)) | ||
18 | 26 | ||
19 | #define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) | 27 | #define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) |
20 | #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) | 28 | #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) |
21 | 29 | #define lj_str_size(len) (sizeof(GCstr) + (((len)+4) & ~(MSize)3)) | |
22 | /* Type conversions. */ | ||
23 | LJ_FUNC size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o); | ||
24 | LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k); | ||
25 | LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np); | ||
26 | LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k); | ||
27 | LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o); | ||
28 | |||
29 | #define LJ_STR_INTBUF (1+10) | ||
30 | #define LJ_STR_NUMBUF LUAI_MAXNUMBER2STR | ||
31 | |||
32 | /* String formatting. */ | ||
33 | LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp); | ||
34 | LJ_FUNC const char *lj_str_pushf(lua_State *L, const char *fmt, ...) | ||
35 | #if defined(__GNUC__) | ||
36 | __attribute__ ((format (printf, 2, 3))) | ||
37 | #endif | ||
38 | ; | ||
39 | |||
40 | /* Resizable string buffers. Struct definition in lj_obj.h. */ | ||
41 | LJ_FUNC char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz); | ||
42 | |||
43 | #define lj_str_initbuf(sb) ((sb)->buf = NULL, (sb)->sz = 0) | ||
44 | #define lj_str_resetbuf(sb) ((sb)->n = 0) | ||
45 | #define lj_str_resizebuf(L, sb, size) \ | ||
46 | ((sb)->buf = (char *)lj_mem_realloc(L, (sb)->buf, (sb)->sz, (size)), \ | ||
47 | (sb)->sz = (size)) | ||
48 | #define lj_str_freebuf(g, sb) lj_mem_free(g, (void *)(sb)->buf, (sb)->sz) | ||
49 | 30 | ||
50 | #endif | 31 | #endif |
diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c new file mode 100644 index 00000000..5c808290 --- /dev/null +++ b/src/lj_strfmt.c | |||
@@ -0,0 +1,606 @@ | |||
1 | /* | ||
2 | ** String formatting. | ||
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #include <stdio.h> | ||
7 | |||
8 | #define lj_strfmt_c | ||
9 | #define LUA_CORE | ||
10 | |||
11 | #include "lj_obj.h" | ||
12 | #include "lj_err.h" | ||
13 | #include "lj_buf.h" | ||
14 | #include "lj_str.h" | ||
15 | #include "lj_meta.h" | ||
16 | #include "lj_state.h" | ||
17 | #include "lj_char.h" | ||
18 | #include "lj_strfmt.h" | ||
19 | #if LJ_HASFFI | ||
20 | #include "lj_ctype.h" | ||
21 | #endif | ||
22 | #include "lj_lib.h" | ||
23 | |||
24 | /* -- Format parser ------------------------------------------------------- */ | ||
25 | |||
26 | static const uint8_t strfmt_map[('x'-'A')+1] = { | ||
27 | STRFMT_A,0,0,0,STRFMT_E,STRFMT_F,STRFMT_G,0,0,0,0,0,0, | ||
28 | 0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0, | ||
29 | 0,0,0,0,0,0, | ||
30 | STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0, | ||
31 | 0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X | ||
32 | }; | ||
33 | |||
34 | SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs) | ||
35 | { | ||
36 | const uint8_t *p = fs->p, *e = fs->e; | ||
37 | fs->str = (const char *)p; | ||
38 | for (; p < e; p++) { | ||
39 | if (*p == '%') { /* Escape char? */ | ||
40 | if (p[1] == '%') { /* '%%'? */ | ||
41 | fs->p = ++p+1; | ||
42 | goto retlit; | ||
43 | } else { | ||
44 | SFormat sf = 0; | ||
45 | uint32_t c; | ||
46 | if (p != (const uint8_t *)fs->str) | ||
47 | break; | ||
48 | for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) { | ||
49 | /* Parse flags. */ | ||
50 | if (*p == '-') sf |= STRFMT_F_LEFT; | ||
51 | else if (*p == '+') sf |= STRFMT_F_PLUS; | ||
52 | else if (*p == '0') sf |= STRFMT_F_ZERO; | ||
53 | else if (*p == ' ') sf |= STRFMT_F_SPACE; | ||
54 | else if (*p == '#') sf |= STRFMT_F_ALT; | ||
55 | else break; | ||
56 | } | ||
57 | if ((uint32_t)*p - '0' < 10) { /* Parse width. */ | ||
58 | uint32_t width = (uint32_t)*p++ - '0'; | ||
59 | if ((uint32_t)*p - '0' < 10) | ||
60 | width = (uint32_t)*p++ - '0' + width*10; | ||
61 | sf |= (width << STRFMT_SH_WIDTH); | ||
62 | } | ||
63 | if (*p == '.') { /* Parse precision. */ | ||
64 | uint32_t prec = 0; | ||
65 | p++; | ||
66 | if ((uint32_t)*p - '0' < 10) { | ||
67 | prec = (uint32_t)*p++ - '0'; | ||
68 | if ((uint32_t)*p - '0' < 10) | ||
69 | prec = (uint32_t)*p++ - '0' + prec*10; | ||
70 | } | ||
71 | sf |= ((prec+1) << STRFMT_SH_PREC); | ||
72 | } | ||
73 | /* Parse conversion. */ | ||
74 | c = (uint32_t)*p - 'A'; | ||
75 | if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) { | ||
76 | uint32_t sx = strfmt_map[c]; | ||
77 | if (sx) { | ||
78 | fs->p = p+1; | ||
79 | return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER)); | ||
80 | } | ||
81 | } | ||
82 | /* Return error location. */ | ||
83 | if (*p >= 32) p++; | ||
84 | fs->len = (MSize)(p - (const uint8_t *)fs->str); | ||
85 | fs->p = fs->e; | ||
86 | return STRFMT_ERR; | ||
87 | } | ||
88 | } | ||
89 | } | ||
90 | fs->p = p; | ||
91 | retlit: | ||
92 | fs->len = (MSize)(p - (const uint8_t *)fs->str); | ||
93 | return fs->len ? STRFMT_LIT : STRFMT_EOF; | ||
94 | } | ||
95 | |||
96 | /* -- Raw conversions ----------------------------------------------------- */ | ||
97 | |||
98 | #define WINT_R(x, sh, sc) \ | ||
99 | { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); } | ||
100 | |||
101 | /* Write integer to buffer. */ | ||
102 | char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k) | ||
103 | { | ||
104 | uint32_t u = (uint32_t)k; | ||
105 | if (k < 0) { u = (uint32_t)-k; *p++ = '-'; } | ||
106 | if (u < 10000) { | ||
107 | if (u < 10) goto dig1; | ||
108 | if (u < 100) goto dig2; | ||
109 | if (u < 1000) goto dig3; | ||
110 | } else { | ||
111 | uint32_t v = u / 10000; u -= v * 10000; | ||
112 | if (v < 10000) { | ||
113 | if (v < 10) goto dig5; | ||
114 | if (v < 100) goto dig6; | ||
115 | if (v < 1000) goto dig7; | ||
116 | } else { | ||
117 | uint32_t w = v / 10000; v -= w * 10000; | ||
118 | if (w >= 10) WINT_R(w, 10, 10) | ||
119 | *p++ = (char)('0'+w); | ||
120 | } | ||
121 | WINT_R(v, 23, 1000) | ||
122 | dig7: WINT_R(v, 12, 100) | ||
123 | dig6: WINT_R(v, 10, 10) | ||
124 | dig5: *p++ = (char)('0'+v); | ||
125 | } | ||
126 | WINT_R(u, 23, 1000) | ||
127 | dig3: WINT_R(u, 12, 100) | ||
128 | dig2: WINT_R(u, 10, 10) | ||
129 | dig1: *p++ = (char)('0'+u); | ||
130 | return p; | ||
131 | } | ||
132 | #undef WINT_R | ||
133 | |||
134 | /* Write pointer to buffer. */ | ||
135 | char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v) | ||
136 | { | ||
137 | ptrdiff_t x = (ptrdiff_t)v; | ||
138 | MSize i, n = STRFMT_MAXBUF_PTR; | ||
139 | if (x == 0) { | ||
140 | *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L'; | ||
141 | return p; | ||
142 | } | ||
143 | #if LJ_64 | ||
144 | /* Shorten output for 64 bit pointers. */ | ||
145 | n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0); | ||
146 | #endif | ||
147 | p[0] = '0'; | ||
148 | p[1] = 'x'; | ||
149 | for (i = n-1; i >= 2; i--, x >>= 4) | ||
150 | p[i] = "0123456789abcdef"[(x & 15)]; | ||
151 | return p+n; | ||
152 | } | ||
153 | |||
154 | /* Write ULEB128 to buffer. */ | ||
155 | char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v) | ||
156 | { | ||
157 | for (; v >= 0x80; v >>= 7) | ||
158 | *p++ = (char)((v & 0x7f) | 0x80); | ||
159 | *p++ = (char)v; | ||
160 | return p; | ||
161 | } | ||
162 | |||
163 | /* Return string or write number to tmp buffer and return pointer to start. */ | ||
164 | const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) | ||
165 | { | ||
166 | SBuf *sb; | ||
167 | if (tvisstr(o)) { | ||
168 | *lenp = strV(o)->len; | ||
169 | return strVdata(o); | ||
170 | } else if (tvisbuf(o)) { | ||
171 | SBufExt *sbx = bufV(o); | ||
172 | *lenp = sbufxlen(sbx); | ||
173 | return sbx->r; | ||
174 | } else if (tvisint(o)) { | ||
175 | sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o)); | ||
176 | } else if (tvisnum(o)) { | ||
177 | sb = lj_strfmt_putfnum(lj_buf_tmp_(L), STRFMT_G14, o->n); | ||
178 | } else { | ||
179 | return NULL; | ||
180 | } | ||
181 | *lenp = sbuflen(sb); | ||
182 | return sb->b; | ||
183 | } | ||
184 | |||
185 | /* -- Unformatted conversions to buffer ----------------------------------- */ | ||
186 | |||
187 | /* Add integer to buffer. */ | ||
188 | SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k) | ||
189 | { | ||
190 | sb->w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k); | ||
191 | return sb; | ||
192 | } | ||
193 | |||
194 | #if LJ_HASJIT | ||
195 | /* Add number to buffer. */ | ||
196 | SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o) | ||
197 | { | ||
198 | return lj_strfmt_putfnum(sb, STRFMT_G14, o->n); | ||
199 | } | ||
200 | #endif | ||
201 | |||
202 | SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v) | ||
203 | { | ||
204 | sb->w = lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v); | ||
205 | return sb; | ||
206 | } | ||
207 | |||
208 | /* Add quoted string to buffer. */ | ||
209 | static SBuf *strfmt_putquotedlen(SBuf *sb, const char *s, MSize len) | ||
210 | { | ||
211 | lj_buf_putb(sb, '"'); | ||
212 | while (len--) { | ||
213 | uint32_t c = (uint32_t)(uint8_t)*s++; | ||
214 | char *w = lj_buf_more(sb, 4); | ||
215 | if (c == '"' || c == '\\' || c == '\n') { | ||
216 | *w++ = '\\'; | ||
217 | } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ | ||
218 | uint32_t d; | ||
219 | *w++ = '\\'; | ||
220 | if (c >= 100 || lj_char_isdigit((uint8_t)*s)) { | ||
221 | *w++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100; | ||
222 | goto tens; | ||
223 | } else if (c >= 10) { | ||
224 | tens: | ||
225 | d = (c * 205) >> 11; c -= d * 10; *w++ = (char)('0'+d); | ||
226 | } | ||
227 | c += '0'; | ||
228 | } | ||
229 | *w++ = (char)c; | ||
230 | sb->w = w; | ||
231 | } | ||
232 | lj_buf_putb(sb, '"'); | ||
233 | return sb; | ||
234 | } | ||
235 | |||
236 | #if LJ_HASJIT | ||
237 | SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str) | ||
238 | { | ||
239 | return strfmt_putquotedlen(sb, strdata(str), str->len); | ||
240 | } | ||
241 | #endif | ||
242 | |||
243 | /* -- Formatted conversions to buffer ------------------------------------- */ | ||
244 | |||
245 | /* Add formatted char to buffer. */ | ||
246 | SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c) | ||
247 | { | ||
248 | MSize width = STRFMT_WIDTH(sf); | ||
249 | char *w = lj_buf_more(sb, width > 1 ? width : 1); | ||
250 | if ((sf & STRFMT_F_LEFT)) *w++ = (char)c; | ||
251 | while (width-- > 1) *w++ = ' '; | ||
252 | if (!(sf & STRFMT_F_LEFT)) *w++ = (char)c; | ||
253 | sb->w = w; | ||
254 | return sb; | ||
255 | } | ||
256 | |||
257 | /* Add formatted string to buffer. */ | ||
258 | static SBuf *strfmt_putfstrlen(SBuf *sb, SFormat sf, const char *s, MSize len) | ||
259 | { | ||
260 | MSize width = STRFMT_WIDTH(sf); | ||
261 | char *w; | ||
262 | if (len > STRFMT_PREC(sf)) len = STRFMT_PREC(sf); | ||
263 | w = lj_buf_more(sb, width > len ? width : len); | ||
264 | if ((sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len); | ||
265 | while (width-- > len) *w++ = ' '; | ||
266 | if (!(sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len); | ||
267 | sb->w = w; | ||
268 | return sb; | ||
269 | } | ||
270 | |||
271 | #if LJ_HASJIT | ||
272 | SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str) | ||
273 | { | ||
274 | return strfmt_putfstrlen(sb, sf, strdata(str), str->len); | ||
275 | } | ||
276 | #endif | ||
277 | |||
278 | /* Add formatted signed/unsigned integer to buffer. */ | ||
279 | SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k) | ||
280 | { | ||
281 | char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *w; | ||
282 | #ifdef LUA_USE_ASSERT | ||
283 | char *ws; | ||
284 | #endif | ||
285 | MSize prefix = 0, len, prec, pprec, width, need; | ||
286 | |||
287 | /* Figure out signed prefixes. */ | ||
288 | if (STRFMT_TYPE(sf) == STRFMT_INT) { | ||
289 | if ((int64_t)k < 0) { | ||
290 | k = (uint64_t)-(int64_t)k; | ||
291 | prefix = 256 + '-'; | ||
292 | } else if ((sf & STRFMT_F_PLUS)) { | ||
293 | prefix = 256 + '+'; | ||
294 | } else if ((sf & STRFMT_F_SPACE)) { | ||
295 | prefix = 256 + ' '; | ||
296 | } | ||
297 | } | ||
298 | |||
299 | /* Convert number and store to fixed-size buffer in reverse order. */ | ||
300 | prec = STRFMT_PREC(sf); | ||
301 | if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO; | ||
302 | if (k == 0) { /* Special-case zero argument. */ | ||
303 | if (prec != 0 || | ||
304 | (sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT)) | ||
305 | *--q = '0'; | ||
306 | } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) { /* Decimal. */ | ||
307 | uint32_t k2; | ||
308 | while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; } | ||
309 | k2 = (uint32_t)k; | ||
310 | do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2); | ||
311 | } else if ((sf & STRFMT_T_HEX)) { /* Hex. */ | ||
312 | const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" : | ||
313 | "0123456789abcdef"; | ||
314 | do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k); | ||
315 | if ((sf & STRFMT_F_ALT)) prefix = 512 + ((sf & STRFMT_F_UPPER) ? 'X' : 'x'); | ||
316 | } else { /* Octal. */ | ||
317 | do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k); | ||
318 | if ((sf & STRFMT_F_ALT)) *--q = '0'; | ||
319 | } | ||
320 | |||
321 | /* Calculate sizes. */ | ||
322 | len = (MSize)(buf + sizeof(buf) - q); | ||
323 | if ((int32_t)len >= (int32_t)prec) prec = len; | ||
324 | width = STRFMT_WIDTH(sf); | ||
325 | pprec = prec + (prefix >> 8); | ||
326 | need = width > pprec ? width : pprec; | ||
327 | w = lj_buf_more(sb, need); | ||
328 | #ifdef LUA_USE_ASSERT | ||
329 | ws = w; | ||
330 | #endif | ||
331 | |||
332 | /* Format number with leading/trailing whitespace and zeros. */ | ||
333 | if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0) | ||
334 | while (width-- > pprec) *w++ = ' '; | ||
335 | if (prefix) { | ||
336 | if ((char)prefix >= 'X') *w++ = '0'; | ||
337 | *w++ = (char)prefix; | ||
338 | } | ||
339 | if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO) | ||
340 | while (width-- > pprec) *w++ = '0'; | ||
341 | while (prec-- > len) *w++ = '0'; | ||
342 | while (q < buf + sizeof(buf)) *w++ = *q++; /* Add number itself. */ | ||
343 | if ((sf & STRFMT_F_LEFT)) | ||
344 | while (width-- > pprec) *w++ = ' '; | ||
345 | |||
346 | lj_assertX(need == (MSize)(w - ws), "miscalculated format size"); | ||
347 | sb->w = w; | ||
348 | return sb; | ||
349 | } | ||
350 | |||
351 | /* Add number formatted as signed integer to buffer. */ | ||
352 | SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n) | ||
353 | { | ||
354 | int64_t k = (int64_t)n; | ||
355 | if (checki32(k) && sf == STRFMT_INT) | ||
356 | return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */ | ||
357 | else | ||
358 | return lj_strfmt_putfxint(sb, sf, (uint64_t)k); | ||
359 | } | ||
360 | |||
361 | /* Add number formatted as unsigned integer to buffer. */ | ||
362 | SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n) | ||
363 | { | ||
364 | int64_t k; | ||
365 | if (n >= 9223372036854775808.0) | ||
366 | k = (int64_t)(n - 18446744073709551616.0); | ||
367 | else | ||
368 | k = (int64_t)n; | ||
369 | return lj_strfmt_putfxint(sb, sf, (uint64_t)k); | ||
370 | } | ||
371 | |||
372 | /* Format stack arguments to buffer. */ | ||
373 | int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry) | ||
374 | { | ||
375 | int narg = (int)(L->top - L->base); | ||
376 | GCstr *fmt = lj_lib_checkstr(L, arg); | ||
377 | FormatState fs; | ||
378 | SFormat sf; | ||
379 | lj_strfmt_init(&fs, strdata(fmt), fmt->len); | ||
380 | while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { | ||
381 | if (sf == STRFMT_LIT) { | ||
382 | lj_buf_putmem(sb, fs.str, fs.len); | ||
383 | } else if (sf == STRFMT_ERR) { | ||
384 | lj_err_callerv(L, LJ_ERR_STRFMT, | ||
385 | strdata(lj_str_new(L, fs.str, fs.len))); | ||
386 | } else { | ||
387 | TValue *o = &L->base[arg++]; | ||
388 | if (arg > narg) | ||
389 | lj_err_arg(L, arg, LJ_ERR_NOVAL); | ||
390 | switch (STRFMT_TYPE(sf)) { | ||
391 | case STRFMT_INT: | ||
392 | if (tvisint(o)) { | ||
393 | int32_t k = intV(o); | ||
394 | if (sf == STRFMT_INT) | ||
395 | lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */ | ||
396 | else | ||
397 | lj_strfmt_putfxint(sb, sf, k); | ||
398 | break; | ||
399 | } | ||
400 | #if LJ_HASFFI | ||
401 | if (tviscdata(o)) { | ||
402 | GCcdata *cd = cdataV(o); | ||
403 | if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) { | ||
404 | lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd)); | ||
405 | break; | ||
406 | } | ||
407 | } | ||
408 | #endif | ||
409 | lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg)); | ||
410 | break; | ||
411 | case STRFMT_UINT: | ||
412 | if (tvisint(o)) { | ||
413 | lj_strfmt_putfxint(sb, sf, intV(o)); | ||
414 | break; | ||
415 | } | ||
416 | #if LJ_HASFFI | ||
417 | if (tviscdata(o)) { | ||
418 | GCcdata *cd = cdataV(o); | ||
419 | if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) { | ||
420 | lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd)); | ||
421 | break; | ||
422 | } | ||
423 | } | ||
424 | #endif | ||
425 | lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg)); | ||
426 | break; | ||
427 | case STRFMT_NUM: | ||
428 | lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg)); | ||
429 | break; | ||
430 | case STRFMT_STR: { | ||
431 | MSize len; | ||
432 | const char *s; | ||
433 | cTValue *mo; | ||
434 | if (LJ_UNLIKELY(!tvisstr(o) && !tvisbuf(o)) && retry >= 0 && | ||
435 | !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { | ||
436 | /* Call __tostring metamethod once. */ | ||
437 | copyTV(L, L->top++, mo); | ||
438 | copyTV(L, L->top++, o); | ||
439 | lua_call(L, 1, 1); | ||
440 | o = &L->base[arg-1]; /* Stack may have been reallocated. */ | ||
441 | copyTV(L, o, --L->top); /* Replace inline for retry. */ | ||
442 | if (retry < 2) { /* Global buffer may have been overwritten. */ | ||
443 | retry = 1; | ||
444 | break; | ||
445 | } | ||
446 | } | ||
447 | if (LJ_LIKELY(tvisstr(o))) { | ||
448 | len = strV(o)->len; | ||
449 | s = strVdata(o); | ||
450 | #if LJ_HASBUFFER | ||
451 | } else if (tvisbuf(o)) { | ||
452 | SBufExt *sbx = bufV(o); | ||
453 | if (sbx == (SBufExt *)sb) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF); | ||
454 | len = sbufxlen(sbx); | ||
455 | s = sbx->r; | ||
456 | #endif | ||
457 | } else { | ||
458 | GCstr *str = lj_strfmt_obj(L, o); | ||
459 | len = str->len; | ||
460 | s = strdata(str); | ||
461 | } | ||
462 | if ((sf & STRFMT_T_QUOTED)) | ||
463 | strfmt_putquotedlen(sb, s, len); /* No formatting. */ | ||
464 | else | ||
465 | strfmt_putfstrlen(sb, sf, s, len); | ||
466 | break; | ||
467 | } | ||
468 | case STRFMT_CHAR: | ||
469 | lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg)); | ||
470 | break; | ||
471 | case STRFMT_PTR: /* No formatting. */ | ||
472 | lj_strfmt_putptr(sb, lj_obj_ptr(G(L), o)); | ||
473 | break; | ||
474 | default: | ||
475 | lj_assertL(0, "bad string format type"); | ||
476 | break; | ||
477 | } | ||
478 | } | ||
479 | } | ||
480 | return retry; | ||
481 | } | ||
482 | |||
483 | /* -- Conversions to strings ---------------------------------------------- */ | ||
484 | |||
485 | /* Convert integer to string. */ | ||
486 | GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k) | ||
487 | { | ||
488 | char buf[STRFMT_MAXBUF_INT]; | ||
489 | MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf); | ||
490 | return lj_str_new(L, buf, len); | ||
491 | } | ||
492 | |||
493 | /* Convert integer or number to string. */ | ||
494 | GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o) | ||
495 | { | ||
496 | return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o); | ||
497 | } | ||
498 | |||
499 | #if LJ_HASJIT | ||
500 | /* Convert char value to string. */ | ||
501 | GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c) | ||
502 | { | ||
503 | char buf[1]; | ||
504 | buf[0] = c; | ||
505 | return lj_str_new(L, buf, 1); | ||
506 | } | ||
507 | #endif | ||
508 | |||
509 | /* Raw conversion of object to string. */ | ||
510 | GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o) | ||
511 | { | ||
512 | if (tvisstr(o)) { | ||
513 | return strV(o); | ||
514 | } else if (tvisnumber(o)) { | ||
515 | return lj_strfmt_number(L, o); | ||
516 | } else if (tvisnil(o)) { | ||
517 | return lj_str_newlit(L, "nil"); | ||
518 | } else if (tvisfalse(o)) { | ||
519 | return lj_str_newlit(L, "false"); | ||
520 | } else if (tvistrue(o)) { | ||
521 | return lj_str_newlit(L, "true"); | ||
522 | } else { | ||
523 | char buf[8+2+2+16], *p = buf; | ||
524 | p = lj_buf_wmem(p, lj_typename(o), (MSize)strlen(lj_typename(o))); | ||
525 | *p++ = ':'; *p++ = ' '; | ||
526 | if (tvisfunc(o) && isffunc(funcV(o))) { | ||
527 | p = lj_buf_wmem(p, "builtin#", 8); | ||
528 | p = lj_strfmt_wint(p, funcV(o)->c.ffid); | ||
529 | } else { | ||
530 | p = lj_strfmt_wptr(p, lj_obj_ptr(G(L), o)); | ||
531 | } | ||
532 | return lj_str_new(L, buf, (size_t)(p - buf)); | ||
533 | } | ||
534 | } | ||
535 | |||
536 | /* -- Internal string formatting ------------------------------------------ */ | ||
537 | |||
538 | /* | ||
539 | ** These functions are only used for lua_pushfstring(), lua_pushvfstring() | ||
540 | ** and for internal string formatting (e.g. error messages). Caveat: unlike | ||
541 | ** string.format(), only a limited subset of formats and flags are supported! | ||
542 | ** | ||
543 | ** LuaJIT has support for a couple more formats than Lua 5.1/5.2: | ||
544 | ** - %d %u %o %x with full formatting, 32 bit integers only. | ||
545 | ** - %f and other FP formats are really %.14g. | ||
546 | ** - %s %c %p without formatting. | ||
547 | */ | ||
548 | |||
549 | /* Push formatted message as a string object to Lua stack. va_list variant. */ | ||
550 | const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp) | ||
551 | { | ||
552 | SBuf *sb = lj_buf_tmp_(L); | ||
553 | FormatState fs; | ||
554 | SFormat sf; | ||
555 | GCstr *str; | ||
556 | lj_strfmt_init(&fs, fmt, (MSize)strlen(fmt)); | ||
557 | while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { | ||
558 | switch (STRFMT_TYPE(sf)) { | ||
559 | case STRFMT_LIT: | ||
560 | lj_buf_putmem(sb, fs.str, fs.len); | ||
561 | break; | ||
562 | case STRFMT_INT: | ||
563 | lj_strfmt_putfxint(sb, sf, va_arg(argp, int32_t)); | ||
564 | break; | ||
565 | case STRFMT_UINT: | ||
566 | lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t)); | ||
567 | break; | ||
568 | case STRFMT_NUM: | ||
569 | lj_strfmt_putfnum(sb, STRFMT_G14, va_arg(argp, lua_Number)); | ||
570 | break; | ||
571 | case STRFMT_STR: { | ||
572 | const char *s = va_arg(argp, char *); | ||
573 | if (s == NULL) s = "(null)"; | ||
574 | lj_buf_putmem(sb, s, (MSize)strlen(s)); | ||
575 | break; | ||
576 | } | ||
577 | case STRFMT_CHAR: | ||
578 | lj_buf_putb(sb, va_arg(argp, int)); | ||
579 | break; | ||
580 | case STRFMT_PTR: | ||
581 | lj_strfmt_putptr(sb, va_arg(argp, void *)); | ||
582 | break; | ||
583 | case STRFMT_ERR: | ||
584 | default: | ||
585 | lj_buf_putb(sb, '?'); | ||
586 | lj_assertL(0, "bad string format near offset %d", fs.len); | ||
587 | break; | ||
588 | } | ||
589 | } | ||
590 | str = lj_buf_str(L, sb); | ||
591 | setstrV(L, L->top, str); | ||
592 | incr_top(L); | ||
593 | return strdata(str); | ||
594 | } | ||
595 | |||
596 | /* Push formatted message as a string object to Lua stack. Vararg variant. */ | ||
597 | const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...) | ||
598 | { | ||
599 | const char *msg; | ||
600 | va_list argp; | ||
601 | va_start(argp, fmt); | ||
602 | msg = lj_strfmt_pushvf(L, fmt, argp); | ||
603 | va_end(argp); | ||
604 | return msg; | ||
605 | } | ||
606 | |||
diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h new file mode 100644 index 00000000..a4529604 --- /dev/null +++ b/src/lj_strfmt.h | |||
@@ -0,0 +1,131 @@ | |||
1 | /* | ||
2 | ** String formatting. | ||
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_STRFMT_H | ||
7 | #define _LJ_STRFMT_H | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | typedef uint32_t SFormat; /* Format indicator. */ | ||
12 | |||
13 | /* Format parser state. */ | ||
14 | typedef struct FormatState { | ||
15 | const uint8_t *p; /* Current format string pointer. */ | ||
16 | const uint8_t *e; /* End of format string. */ | ||
17 | const char *str; /* Returned literal string. */ | ||
18 | MSize len; /* Size of literal string. */ | ||
19 | } FormatState; | ||
20 | |||
21 | /* Format types (max. 16). */ | ||
22 | typedef enum FormatType { | ||
23 | STRFMT_EOF, STRFMT_ERR, STRFMT_LIT, | ||
24 | STRFMT_INT, STRFMT_UINT, STRFMT_NUM, STRFMT_STR, STRFMT_CHAR, STRFMT_PTR | ||
25 | } FormatType; | ||
26 | |||
27 | /* Format subtypes (bits are reused). */ | ||
28 | #define STRFMT_T_HEX 0x0010 /* STRFMT_UINT */ | ||
29 | #define STRFMT_T_OCT 0x0020 /* STRFMT_UINT */ | ||
30 | #define STRFMT_T_FP_A 0x0000 /* STRFMT_NUM */ | ||
31 | #define STRFMT_T_FP_E 0x0010 /* STRFMT_NUM */ | ||
32 | #define STRFMT_T_FP_F 0x0020 /* STRFMT_NUM */ | ||
33 | #define STRFMT_T_FP_G 0x0030 /* STRFMT_NUM */ | ||
34 | #define STRFMT_T_QUOTED 0x0010 /* STRFMT_STR */ | ||
35 | |||
36 | /* Format flags. */ | ||
37 | #define STRFMT_F_LEFT 0x0100 | ||
38 | #define STRFMT_F_PLUS 0x0200 | ||
39 | #define STRFMT_F_ZERO 0x0400 | ||
40 | #define STRFMT_F_SPACE 0x0800 | ||
41 | #define STRFMT_F_ALT 0x1000 | ||
42 | #define STRFMT_F_UPPER 0x2000 | ||
43 | |||
44 | /* Format indicator fields. */ | ||
45 | #define STRFMT_SH_WIDTH 16 | ||
46 | #define STRFMT_SH_PREC 24 | ||
47 | |||
48 | #define STRFMT_TYPE(sf) ((FormatType)((sf) & 15)) | ||
49 | #define STRFMT_WIDTH(sf) (((sf) >> STRFMT_SH_WIDTH) & 255u) | ||
50 | #define STRFMT_PREC(sf) ((((sf) >> STRFMT_SH_PREC) & 255u) - 1u) | ||
51 | #define STRFMT_FP(sf) (((sf) >> 4) & 3) | ||
52 | |||
53 | /* Formats for conversion characters. */ | ||
54 | #define STRFMT_A (STRFMT_NUM|STRFMT_T_FP_A) | ||
55 | #define STRFMT_C (STRFMT_CHAR) | ||
56 | #define STRFMT_D (STRFMT_INT) | ||
57 | #define STRFMT_E (STRFMT_NUM|STRFMT_T_FP_E) | ||
58 | #define STRFMT_F (STRFMT_NUM|STRFMT_T_FP_F) | ||
59 | #define STRFMT_G (STRFMT_NUM|STRFMT_T_FP_G) | ||
60 | #define STRFMT_I STRFMT_D | ||
61 | #define STRFMT_O (STRFMT_UINT|STRFMT_T_OCT) | ||
62 | #define STRFMT_P (STRFMT_PTR) | ||
63 | #define STRFMT_Q (STRFMT_STR|STRFMT_T_QUOTED) | ||
64 | #define STRFMT_S (STRFMT_STR) | ||
65 | #define STRFMT_U (STRFMT_UINT) | ||
66 | #define STRFMT_X (STRFMT_UINT|STRFMT_T_HEX) | ||
67 | #define STRFMT_G14 (STRFMT_G | ((14+1) << STRFMT_SH_PREC)) | ||
68 | |||
69 | /* Maximum buffer sizes for conversions. */ | ||
70 | #define STRFMT_MAXBUF_XINT (1+22) /* '0' prefix + uint64_t in octal. */ | ||
71 | #define STRFMT_MAXBUF_INT (1+10) /* Sign + int32_t in decimal. */ | ||
72 | #define STRFMT_MAXBUF_NUM 32 /* Must correspond with STRFMT_G14. */ | ||
73 | #define STRFMT_MAXBUF_PTR (2+2*sizeof(ptrdiff_t)) /* "0x" + hex ptr. */ | ||
74 | |||
75 | /* Format parser. */ | ||
76 | LJ_FUNC SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs); | ||
77 | |||
78 | static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len) | ||
79 | { | ||
80 | fs->p = (const uint8_t *)p; | ||
81 | fs->e = (const uint8_t *)p + len; | ||
82 | /* Must be NUL-terminated. May have NULs inside, too. */ | ||
83 | lj_assertX(*fs->e == 0, "format not NUL-terminated"); | ||
84 | } | ||
85 | |||
86 | /* Raw conversions. */ | ||
87 | LJ_FUNC char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k); | ||
88 | LJ_FUNC char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v); | ||
89 | LJ_FUNC char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v); | ||
90 | LJ_FUNC const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp); | ||
91 | |||
92 | /* Unformatted conversions to buffer. */ | ||
93 | LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k); | ||
94 | #if LJ_HASJIT | ||
95 | LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o); | ||
96 | #endif | ||
97 | LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v); | ||
98 | #if LJ_HASJIT | ||
99 | LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str); | ||
100 | #endif | ||
101 | |||
102 | /* Formatted conversions to buffer. */ | ||
103 | LJ_FUNC SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k); | ||
104 | LJ_FUNC SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n); | ||
105 | LJ_FUNC SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n); | ||
106 | LJ_FUNC SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat, lua_Number n); | ||
107 | LJ_FUNC SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat, int32_t c); | ||
108 | #if LJ_HASJIT | ||
109 | LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str); | ||
110 | #endif | ||
111 | LJ_FUNC int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry); | ||
112 | |||
113 | /* Conversions to strings. */ | ||
114 | LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k); | ||
115 | LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o); | ||
116 | LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o); | ||
117 | #if LJ_HASJIT | ||
118 | LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c); | ||
119 | #endif | ||
120 | LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o); | ||
121 | |||
122 | /* Internal string formatting. */ | ||
123 | LJ_FUNC const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, | ||
124 | va_list argp); | ||
125 | LJ_FUNC const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...) | ||
126 | #if defined(__GNUC__) || defined(__clang__) | ||
127 | __attribute__ ((format (printf, 2, 3))) | ||
128 | #endif | ||
129 | ; | ||
130 | |||
131 | #endif | ||
diff --git a/src/lj_strfmt_num.c b/src/lj_strfmt_num.c new file mode 100644 index 00000000..3c60695c --- /dev/null +++ b/src/lj_strfmt_num.c | |||
@@ -0,0 +1,592 @@ | |||
1 | /* | ||
2 | ** String formatting for floating-point numbers. | ||
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** Contributed by Peter Cawley. | ||
5 | */ | ||
6 | |||
7 | #include <stdio.h> | ||
8 | |||
9 | #define lj_strfmt_num_c | ||
10 | #define LUA_CORE | ||
11 | |||
12 | #include "lj_obj.h" | ||
13 | #include "lj_buf.h" | ||
14 | #include "lj_str.h" | ||
15 | #include "lj_strfmt.h" | ||
16 | |||
17 | /* -- Precomputed tables -------------------------------------------------- */ | ||
18 | |||
19 | /* Rescale factors to push the exponent of a number towards zero. */ | ||
20 | #define RESCALE_EXPONENTS(P, N) \ | ||
21 | P(308), P(289), P(270), P(250), P(231), P(212), P(193), P(173), P(154), \ | ||
22 | P(135), P(115), P(96), P(77), P(58), P(38), P(0), P(0), P(0), N(39), N(58), \ | ||
23 | N(77), N(96), N(116), N(135), N(154), N(174), N(193), N(212), N(231), \ | ||
24 | N(251), N(270), N(289) | ||
25 | |||
26 | #define ONE_E_P(X) 1e+0 ## X | ||
27 | #define ONE_E_N(X) 1e-0 ## X | ||
28 | static const int16_t rescale_e[] = { RESCALE_EXPONENTS(-, +) }; | ||
29 | static const double rescale_n[] = { RESCALE_EXPONENTS(ONE_E_P, ONE_E_N) }; | ||
30 | #undef ONE_E_N | ||
31 | #undef ONE_E_P | ||
32 | |||
33 | /* | ||
34 | ** For p in range -70 through 57, this table encodes pairs (m, e) such that | ||
35 | ** 4*2^p <= (uint8_t)m*10^e, and is the smallest value for which this holds. | ||
36 | */ | ||
37 | static const int8_t four_ulp_m_e[] = { | ||
38 | 34, -21, 68, -21, 14, -20, 28, -20, 55, -20, 2, -19, 3, -19, 5, -19, 9, -19, | ||
39 | -82, -18, 35, -18, 7, -17, -117, -17, 28, -17, 56, -17, 112, -16, -33, -16, | ||
40 | 45, -16, 89, -16, -78, -15, 36, -15, 72, -15, -113, -14, 29, -14, 57, -14, | ||
41 | 114, -13, -28, -13, 46, -13, 91, -12, -74, -12, 37, -12, 73, -12, 15, -11, 3, | ||
42 | -11, 59, -11, 2, -10, 3, -10, 5, -10, 1, -9, -69, -9, 38, -9, 75, -9, 15, -7, | ||
43 | 3, -7, 6, -7, 12, -6, -17, -7, 48, -7, 96, -7, -65, -6, 39, -6, 77, -6, -103, | ||
44 | -5, 31, -5, 62, -5, 123, -4, -11, -4, 49, -4, 98, -4, -60, -3, 4, -2, 79, -3, | ||
45 | 16, -2, 32, -2, 63, -2, 2, -1, 25, 0, 5, 1, 1, 2, 2, 2, 4, 2, 8, 2, 16, 2, | ||
46 | 32, 2, 64, 2, -128, 2, 26, 2, 52, 2, 103, 3, -51, 3, 41, 4, 82, 4, -92, 4, | ||
47 | 33, 4, 66, 4, -124, 5, 27, 5, 53, 5, 105, 6, 21, 6, 42, 6, 84, 6, 17, 7, 34, | ||
48 | 7, 68, 7, 2, 8, 3, 8, 6, 8, 108, 9, -41, 9, 43, 10, 86, 9, -84, 10, 35, 10, | ||
49 | 69, 10, -118, 11, 28, 11, 55, 12, 11, 13, 22, 13, 44, 13, 88, 13, -80, 13, | ||
50 | 36, 13, 71, 13, -115, 14, 29, 14, 57, 14, 113, 15, -30, 15, 46, 15, 91, 15, | ||
51 | 19, 16, 37, 16, 73, 16, 2, 17, 3, 17, 6, 17 | ||
52 | }; | ||
53 | |||
54 | /* min(2^32-1, 10^e-1) for e in range 0 through 10 */ | ||
55 | static uint32_t ndigits_dec_threshold[] = { | ||
56 | 0, 9U, 99U, 999U, 9999U, 99999U, 999999U, | ||
57 | 9999999U, 99999999U, 999999999U, 0xffffffffU | ||
58 | }; | ||
59 | |||
60 | /* -- Helper functions ---------------------------------------------------- */ | ||
61 | |||
62 | /* Compute the number of digits in the decimal representation of x. */ | ||
63 | static MSize ndigits_dec(uint32_t x) | ||
64 | { | ||
65 | MSize t = ((lj_fls(x | 1) * 77) >> 8) + 1; /* 2^8/77 is roughly log2(10) */ | ||
66 | return t + (x > ndigits_dec_threshold[t]); | ||
67 | } | ||
68 | |||
69 | #define WINT_R(x, sh, sc) \ | ||
70 | { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); } | ||
71 | |||
72 | /* Write 9-digit unsigned integer to buffer. */ | ||
73 | static char *lj_strfmt_wuint9(char *p, uint32_t u) | ||
74 | { | ||
75 | uint32_t v = u / 10000, w; | ||
76 | u -= v * 10000; | ||
77 | w = v / 10000; | ||
78 | v -= w * 10000; | ||
79 | *p++ = (char)('0'+w); | ||
80 | WINT_R(v, 23, 1000) | ||
81 | WINT_R(v, 12, 100) | ||
82 | WINT_R(v, 10, 10) | ||
83 | *p++ = (char)('0'+v); | ||
84 | WINT_R(u, 23, 1000) | ||
85 | WINT_R(u, 12, 100) | ||
86 | WINT_R(u, 10, 10) | ||
87 | *p++ = (char)('0'+u); | ||
88 | return p; | ||
89 | } | ||
90 | #undef WINT_R | ||
91 | |||
92 | /* -- Extended precision arithmetic --------------------------------------- */ | ||
93 | |||
94 | /* | ||
95 | ** The "nd" format is a fixed-precision decimal representation for numbers. It | ||
96 | ** consists of up to 64 uint32_t values, with each uint32_t storing a value | ||
97 | ** in the range [0, 1e9). A number in "nd" format consists of three variables: | ||
98 | ** | ||
99 | ** uint32_t nd[64]; | ||
100 | ** uint32_t ndlo; | ||
101 | ** uint32_t ndhi; | ||
102 | ** | ||
103 | ** The integral part of the number is stored in nd[0 ... ndhi], the value of | ||
104 | ** which is sum{i in [0, ndhi] | nd[i] * 10^(9*i)}. If the fractional part of | ||
105 | ** the number is zero, ndlo is zero. Otherwise, the fractional part is stored | ||
106 | ** in nd[ndlo ... 63], the value of which is taken to be | ||
107 | ** sum{i in [ndlo, 63] | nd[i] * 10^(9*(i-64))}. | ||
108 | ** | ||
109 | ** If the array part had 128 elements rather than 64, then every double would | ||
110 | ** have an exact representation in "nd" format. With 64 elements, all integral | ||
111 | ** doubles have an exact representation, and all non-integral doubles have | ||
112 | ** enough digits to make both %.99e and %.99f do the right thing. | ||
113 | */ | ||
114 | |||
115 | #if LJ_64 | ||
116 | #define ND_MUL2K_MAX_SHIFT 29 | ||
117 | #define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) / 1000000000)) | ||
118 | #else | ||
119 | #define ND_MUL2K_MAX_SHIFT 11 | ||
120 | #define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) >> 9) / 1953125) | ||
121 | #endif | ||
122 | |||
123 | /* Multiply nd by 2^k and add carry_in (ndlo is assumed to be zero). */ | ||
124 | static uint32_t nd_mul2k(uint32_t* nd, uint32_t ndhi, uint32_t k, | ||
125 | uint32_t carry_in, SFormat sf) | ||
126 | { | ||
127 | uint32_t i, ndlo = 0, start = 1; | ||
128 | /* Performance hacks. */ | ||
129 | if (k > ND_MUL2K_MAX_SHIFT*2 && STRFMT_FP(sf) != STRFMT_FP(STRFMT_T_FP_F)) { | ||
130 | start = ndhi - (STRFMT_PREC(sf) + 17) / 8; | ||
131 | } | ||
132 | /* Real logic. */ | ||
133 | while (k >= ND_MUL2K_MAX_SHIFT) { | ||
134 | for (i = ndlo; i <= ndhi; i++) { | ||
135 | uint64_t val = ((uint64_t)nd[i] << ND_MUL2K_MAX_SHIFT) | carry_in; | ||
136 | carry_in = ND_MUL2K_DIV1E9(val); | ||
137 | nd[i] = (uint32_t)val - carry_in * 1000000000; | ||
138 | } | ||
139 | if (carry_in) { | ||
140 | nd[++ndhi] = carry_in; carry_in = 0; | ||
141 | if (start++ == ndlo) ++ndlo; | ||
142 | } | ||
143 | k -= ND_MUL2K_MAX_SHIFT; | ||
144 | } | ||
145 | if (k) { | ||
146 | for (i = ndlo; i <= ndhi; i++) { | ||
147 | uint64_t val = ((uint64_t)nd[i] << k) | carry_in; | ||
148 | carry_in = ND_MUL2K_DIV1E9(val); | ||
149 | nd[i] = (uint32_t)val - carry_in * 1000000000; | ||
150 | } | ||
151 | if (carry_in) nd[++ndhi] = carry_in; | ||
152 | } | ||
153 | return ndhi; | ||
154 | } | ||
155 | |||
156 | /* Divide nd by 2^k (ndlo is assumed to be zero). */ | ||
157 | static uint32_t nd_div2k(uint32_t* nd, uint32_t ndhi, uint32_t k, SFormat sf) | ||
158 | { | ||
159 | uint32_t ndlo = 0, stop1 = ~0, stop2 = ~0; | ||
160 | /* Performance hacks. */ | ||
161 | if (!ndhi) { | ||
162 | if (!nd[0]) { | ||
163 | return 0; | ||
164 | } else { | ||
165 | uint32_t s = lj_ffs(nd[0]); | ||
166 | if (s >= k) { nd[0] >>= k; return 0; } | ||
167 | nd[0] >>= s; k -= s; | ||
168 | } | ||
169 | } | ||
170 | if (k > 18) { | ||
171 | if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_F)) { | ||
172 | stop1 = 63 - (int32_t)STRFMT_PREC(sf) / 9; | ||
173 | } else { | ||
174 | int32_t floorlog2 = ndhi * 29 + lj_fls(nd[ndhi]) - k; | ||
175 | int32_t floorlog10 = (int32_t)(floorlog2 * 0.30102999566398114); | ||
176 | stop1 = 62 + (floorlog10 - (int32_t)STRFMT_PREC(sf)) / 9; | ||
177 | stop2 = 61 + ndhi - (int32_t)STRFMT_PREC(sf) / 8; | ||
178 | } | ||
179 | } | ||
180 | /* Real logic. */ | ||
181 | while (k >= 9) { | ||
182 | uint32_t i = ndhi, carry = 0; | ||
183 | for (;;) { | ||
184 | uint32_t val = nd[i]; | ||
185 | nd[i] = (val >> 9) + carry; | ||
186 | carry = (val & 0x1ff) * 1953125; | ||
187 | if (i == ndlo) break; | ||
188 | i = (i - 1) & 0x3f; | ||
189 | } | ||
190 | if (ndlo != stop1 && ndlo != stop2) { | ||
191 | if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; } | ||
192 | if (!nd[ndhi]) { ndhi = (ndhi - 1) & 0x3f; stop2--; } | ||
193 | } else if (!nd[ndhi]) { | ||
194 | if (ndhi != ndlo) { ndhi = (ndhi - 1) & 0x3f; stop2--; } | ||
195 | else return ndlo; | ||
196 | } | ||
197 | k -= 9; | ||
198 | } | ||
199 | if (k) { | ||
200 | uint32_t mask = (1U << k) - 1, mul = 1000000000 >> k, i = ndhi, carry = 0; | ||
201 | for (;;) { | ||
202 | uint32_t val = nd[i]; | ||
203 | nd[i] = (val >> k) + carry; | ||
204 | carry = (val & mask) * mul; | ||
205 | if (i == ndlo) break; | ||
206 | i = (i - 1) & 0x3f; | ||
207 | } | ||
208 | if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; } | ||
209 | } | ||
210 | return ndlo; | ||
211 | } | ||
212 | |||
213 | /* Add m*10^e to nd (assumes ndlo <= e/9 <= ndhi and 0 <= m <= 9). */ | ||
214 | static uint32_t nd_add_m10e(uint32_t* nd, uint32_t ndhi, uint8_t m, int32_t e) | ||
215 | { | ||
216 | uint32_t i, carry; | ||
217 | if (e >= 0) { | ||
218 | i = (uint32_t)e/9; | ||
219 | carry = m * (ndigits_dec_threshold[e - (int32_t)i*9] + 1); | ||
220 | } else { | ||
221 | int32_t f = (e-8)/9; | ||
222 | i = (uint32_t)(64 + f); | ||
223 | carry = m * (ndigits_dec_threshold[e - f*9] + 1); | ||
224 | } | ||
225 | for (;;) { | ||
226 | uint32_t val = nd[i] + carry; | ||
227 | if (LJ_UNLIKELY(val >= 1000000000)) { | ||
228 | val -= 1000000000; | ||
229 | nd[i] = val; | ||
230 | if (LJ_UNLIKELY(i == ndhi)) { | ||
231 | ndhi = (ndhi + 1) & 0x3f; | ||
232 | nd[ndhi] = 1; | ||
233 | break; | ||
234 | } | ||
235 | carry = 1; | ||
236 | i = (i + 1) & 0x3f; | ||
237 | } else { | ||
238 | nd[i] = val; | ||
239 | break; | ||
240 | } | ||
241 | } | ||
242 | return ndhi; | ||
243 | } | ||
244 | |||
245 | /* Test whether two "nd" values are equal in their most significant digits. */ | ||
246 | static int nd_similar(uint32_t* nd, uint32_t ndhi, uint32_t* ref, MSize hilen, | ||
247 | MSize prec) | ||
248 | { | ||
249 | char nd9[9], ref9[9]; | ||
250 | if (hilen <= prec) { | ||
251 | if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0; | ||
252 | prec -= hilen; ref--; ndhi = (ndhi - 1) & 0x3f; | ||
253 | if (prec >= 9) { | ||
254 | if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0; | ||
255 | prec -= 9; ref--; ndhi = (ndhi - 1) & 0x3f; | ||
256 | } | ||
257 | } else { | ||
258 | prec -= hilen - 9; | ||
259 | } | ||
260 | lj_assertX(prec < 9, "bad precision %d", prec); | ||
261 | lj_strfmt_wuint9(nd9, nd[ndhi]); | ||
262 | lj_strfmt_wuint9(ref9, *ref); | ||
263 | return !memcmp(nd9, ref9, prec) && (nd9[prec] < '5') == (ref9[prec] < '5'); | ||
264 | } | ||
265 | |||
266 | /* -- Formatted conversions to buffer ------------------------------------- */ | ||
267 | |||
268 | /* Write formatted floating-point number to either sb or p. */ | ||
269 | static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p) | ||
270 | { | ||
271 | MSize width = STRFMT_WIDTH(sf), prec = STRFMT_PREC(sf), len; | ||
272 | TValue t; | ||
273 | t.n = n; | ||
274 | if (LJ_UNLIKELY((t.u32.hi << 1) >= 0xffe00000)) { | ||
275 | /* Handle non-finite values uniformly for %a, %e, %f, %g. */ | ||
276 | int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0; | ||
277 | if (((t.u32.hi & 0x000fffff) | t.u32.lo) != 0) { | ||
278 | ch ^= ('n' << 16) | ('a' << 8) | 'n'; | ||
279 | if ((sf & STRFMT_F_SPACE)) prefix = ' '; | ||
280 | } else { | ||
281 | ch ^= ('i' << 16) | ('n' << 8) | 'f'; | ||
282 | if ((t.u32.hi & 0x80000000)) prefix = '-'; | ||
283 | else if ((sf & STRFMT_F_PLUS)) prefix = '+'; | ||
284 | else if ((sf & STRFMT_F_SPACE)) prefix = ' '; | ||
285 | } | ||
286 | len = 3 + (prefix != 0); | ||
287 | if (!p) p = lj_buf_more(sb, width > len ? width : len); | ||
288 | if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' '; | ||
289 | if (prefix) *p++ = prefix; | ||
290 | *p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch; | ||
291 | } else if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_A)) { | ||
292 | /* %a */ | ||
293 | const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEFPX" | ||
294 | : "0123456789abcdefpx"; | ||
295 | int32_t e = (t.u32.hi >> 20) & 0x7ff; | ||
296 | char prefix = 0, eprefix = '+'; | ||
297 | if (t.u32.hi & 0x80000000) prefix = '-'; | ||
298 | else if ((sf & STRFMT_F_PLUS)) prefix = '+'; | ||
299 | else if ((sf & STRFMT_F_SPACE)) prefix = ' '; | ||
300 | t.u32.hi &= 0xfffff; | ||
301 | if (e) { | ||
302 | t.u32.hi |= 0x100000; | ||
303 | e -= 1023; | ||
304 | } else if (t.u32.lo | t.u32.hi) { | ||
305 | /* Non-zero denormal - normalise it. */ | ||
306 | uint32_t shift = t.u32.hi ? 20-lj_fls(t.u32.hi) : 52-lj_fls(t.u32.lo); | ||
307 | e = -1022 - shift; | ||
308 | t.u64 <<= shift; | ||
309 | } | ||
310 | /* abs(n) == t.u64 * 2^(e - 52) */ | ||
311 | /* If n != 0, bit 52 of t.u64 is set, and is the highest set bit. */ | ||
312 | if ((int32_t)prec < 0) { | ||
313 | /* Default precision: use smallest precision giving exact result. */ | ||
314 | prec = t.u32.lo ? 13-lj_ffs(t.u32.lo)/4 : 5-lj_ffs(t.u32.hi|0x100000)/4; | ||
315 | } else if (prec < 13) { | ||
316 | /* Precision is sufficiently low as to maybe require rounding. */ | ||
317 | t.u64 += (((uint64_t)1) << (51 - prec*4)); | ||
318 | } | ||
319 | if (e < 0) { | ||
320 | eprefix = '-'; | ||
321 | e = -e; | ||
322 | } | ||
323 | len = 5 + ndigits_dec((uint32_t)e) + prec + (prefix != 0) | ||
324 | + ((prec | (sf & STRFMT_F_ALT)) != 0); | ||
325 | if (!p) p = lj_buf_more(sb, width > len ? width : len); | ||
326 | if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) { | ||
327 | while (width-- > len) *p++ = ' '; | ||
328 | } | ||
329 | if (prefix) *p++ = prefix; | ||
330 | *p++ = '0'; | ||
331 | *p++ = hexdig[17]; /* x or X */ | ||
332 | if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) { | ||
333 | while (width-- > len) *p++ = '0'; | ||
334 | } | ||
335 | *p++ = '0' + (t.u32.hi >> 20); /* Usually '1', sometimes '0' or '2'. */ | ||
336 | if ((prec | (sf & STRFMT_F_ALT))) { | ||
337 | /* Emit fractional part. */ | ||
338 | char *q = p + 1 + prec; | ||
339 | *p = '.'; | ||
340 | if (prec < 13) t.u64 >>= (52 - prec*4); | ||
341 | else while (prec > 13) p[prec--] = '0'; | ||
342 | while (prec) { p[prec--] = hexdig[t.u64 & 15]; t.u64 >>= 4; } | ||
343 | p = q; | ||
344 | } | ||
345 | *p++ = hexdig[16]; /* p or P */ | ||
346 | *p++ = eprefix; /* + or - */ | ||
347 | p = lj_strfmt_wint(p, e); | ||
348 | } else { | ||
349 | /* %e or %f or %g - begin by converting n to "nd" format. */ | ||
350 | uint32_t nd[64]; | ||
351 | uint32_t ndhi = 0, ndlo, i; | ||
352 | int32_t e = (t.u32.hi >> 20) & 0x7ff, ndebias = 0; | ||
353 | char prefix = 0, *q; | ||
354 | if (t.u32.hi & 0x80000000) prefix = '-'; | ||
355 | else if ((sf & STRFMT_F_PLUS)) prefix = '+'; | ||
356 | else if ((sf & STRFMT_F_SPACE)) prefix = ' '; | ||
357 | prec += ((int32_t)prec >> 31) & 7; /* Default precision is 6. */ | ||
358 | if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_G)) { | ||
359 | /* %g - decrement precision if non-zero (to make it like %e). */ | ||
360 | prec--; | ||
361 | prec ^= (uint32_t)((int32_t)prec >> 31); | ||
362 | } | ||
363 | if ((sf & STRFMT_T_FP_E) && prec < 14 && n != 0) { | ||
364 | /* Precision is sufficiently low that rescaling will probably work. */ | ||
365 | if ((ndebias = rescale_e[e >> 6])) { | ||
366 | t.n = n * rescale_n[e >> 6]; | ||
367 | if (LJ_UNLIKELY(!e)) t.n *= 1e10, ndebias -= 10; | ||
368 | t.u64 -= 2; /* Convert 2ulp below (later we convert 2ulp above). */ | ||
369 | nd[0] = 0x100000 | (t.u32.hi & 0xfffff); | ||
370 | e = ((t.u32.hi >> 20) & 0x7ff) - 1075 - (ND_MUL2K_MAX_SHIFT < 29); | ||
371 | goto load_t_lo; rescale_failed: | ||
372 | t.n = n; | ||
373 | e = (t.u32.hi >> 20) & 0x7ff; | ||
374 | ndebias = ndhi = 0; | ||
375 | } | ||
376 | } | ||
377 | nd[0] = t.u32.hi & 0xfffff; | ||
378 | if (e == 0) e++; else nd[0] |= 0x100000; | ||
379 | e -= 1043; | ||
380 | if (t.u32.lo) { | ||
381 | e -= 32 + (ND_MUL2K_MAX_SHIFT < 29); load_t_lo: | ||
382 | #if ND_MUL2K_MAX_SHIFT >= 29 | ||
383 | nd[0] = (nd[0] << 3) | (t.u32.lo >> 29); | ||
384 | ndhi = nd_mul2k(nd, ndhi, 29, t.u32.lo & 0x1fffffff, sf); | ||
385 | #elif ND_MUL2K_MAX_SHIFT >= 11 | ||
386 | ndhi = nd_mul2k(nd, ndhi, 11, t.u32.lo >> 21, sf); | ||
387 | ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo >> 10) & 0x7ff, sf); | ||
388 | ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo << 1) & 0x7ff, sf); | ||
389 | #else | ||
390 | #error "ND_MUL2K_MAX_SHIFT too small" | ||
391 | #endif | ||
392 | } | ||
393 | if (e >= 0) { | ||
394 | ndhi = nd_mul2k(nd, ndhi, (uint32_t)e, 0, sf); | ||
395 | ndlo = 0; | ||
396 | } else { | ||
397 | ndlo = nd_div2k(nd, ndhi, (uint32_t)-e, sf); | ||
398 | if (ndhi && !nd[ndhi]) ndhi--; | ||
399 | } | ||
400 | /* abs(n) == nd * 10^ndebias (for slightly loose interpretation of ==) */ | ||
401 | if ((sf & STRFMT_T_FP_E)) { | ||
402 | /* %e or %g - assume %e and start by calculating nd's exponent (nde). */ | ||
403 | char eprefix = '+'; | ||
404 | int32_t nde = -1; | ||
405 | MSize hilen; | ||
406 | if (ndlo && !nd[ndhi]) { | ||
407 | ndhi = 64; do {} while (!nd[--ndhi]); | ||
408 | nde -= 64 * 9; | ||
409 | } | ||
410 | hilen = ndigits_dec(nd[ndhi]); | ||
411 | nde += ndhi * 9 + hilen; | ||
412 | if (ndebias) { | ||
413 | /* | ||
414 | ** Rescaling was performed, but this introduced some error, and might | ||
415 | ** have pushed us across a rounding boundary. We check whether this | ||
416 | ** error affected the result by introducing even more error (2ulp in | ||
417 | ** either direction), and seeing whether a rounding boundary was | ||
418 | ** crossed. Having already converted the -2ulp case, we save off its | ||
419 | ** most significant digits, convert the +2ulp case, and compare them. | ||
420 | */ | ||
421 | int32_t eidx = e + 70 + (ND_MUL2K_MAX_SHIFT < 29) | ||
422 | + (t.u32.lo >= 0xfffffffe && !(~t.u32.hi << 12)); | ||
423 | const int8_t *m_e = four_ulp_m_e + eidx * 2; | ||
424 | lj_assertG_(G(sbufL(sb)), 0 <= eidx && eidx < 128, "bad eidx %d", eidx); | ||
425 | nd[33] = nd[ndhi]; | ||
426 | nd[32] = nd[(ndhi - 1) & 0x3f]; | ||
427 | nd[31] = nd[(ndhi - 2) & 0x3f]; | ||
428 | nd_add_m10e(nd, ndhi, (uint8_t)*m_e, m_e[1]); | ||
429 | if (LJ_UNLIKELY(!nd_similar(nd, ndhi, nd + 33, hilen, prec + 1))) { | ||
430 | goto rescale_failed; | ||
431 | } | ||
432 | } | ||
433 | if ((int32_t)(prec - nde) < (0x3f & -(int32_t)ndlo) * 9) { | ||
434 | /* Precision is sufficiently low as to maybe require rounding. */ | ||
435 | ndhi = nd_add_m10e(nd, ndhi, 5, nde - prec - 1); | ||
436 | nde += (hilen != ndigits_dec(nd[ndhi])); | ||
437 | } | ||
438 | nde += ndebias; | ||
439 | if ((sf & STRFMT_T_FP_F)) { | ||
440 | /* %g */ | ||
441 | if ((int32_t)prec >= nde && nde >= -4) { | ||
442 | if (nde < 0) ndhi = 0; | ||
443 | prec -= nde; | ||
444 | goto g_format_like_f; | ||
445 | } else if (!(sf & STRFMT_F_ALT) && prec && width > 5) { | ||
446 | /* Decrease precision in order to strip trailing zeroes. */ | ||
447 | char tail[9]; | ||
448 | uint32_t maxprec = hilen - 1 + ((ndhi - ndlo) & 0x3f) * 9; | ||
449 | if (prec >= maxprec) prec = maxprec; | ||
450 | else ndlo = (ndhi - (((int32_t)(prec - hilen) + 9) / 9)) & 0x3f; | ||
451 | i = prec - hilen - (((ndhi - ndlo) & 0x3f) * 9) + 10; | ||
452 | lj_strfmt_wuint9(tail, nd[ndlo]); | ||
453 | while (prec && tail[--i] == '0') { | ||
454 | prec--; | ||
455 | if (!i) { | ||
456 | if (ndlo == ndhi) { prec = 0; break; } | ||
457 | lj_strfmt_wuint9(tail, nd[++ndlo]); | ||
458 | i = 9; | ||
459 | } | ||
460 | } | ||
461 | } | ||
462 | } | ||
463 | if (nde < 0) { | ||
464 | /* Make nde non-negative. */ | ||
465 | eprefix = '-'; | ||
466 | nde = -nde; | ||
467 | } | ||
468 | len = 3 + prec + (prefix != 0) + ndigits_dec((uint32_t)nde) + (nde < 10) | ||
469 | + ((prec | (sf & STRFMT_F_ALT)) != 0); | ||
470 | if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 5); | ||
471 | if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) { | ||
472 | while (width-- > len) *p++ = ' '; | ||
473 | } | ||
474 | if (prefix) *p++ = prefix; | ||
475 | if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) { | ||
476 | while (width-- > len) *p++ = '0'; | ||
477 | } | ||
478 | q = lj_strfmt_wint(p + 1, nd[ndhi]); | ||
479 | p[0] = p[1]; /* Put leading digit in the correct place. */ | ||
480 | if ((prec | (sf & STRFMT_F_ALT))) { | ||
481 | /* Emit fractional part. */ | ||
482 | p[1] = '.'; p += 2; | ||
483 | prec -= (MSize)(q - p); p = q; /* Account for digits already emitted. */ | ||
484 | /* Then emit chunks of 9 digits (this may emit 8 digits too many). */ | ||
485 | for (i = ndhi; (int32_t)prec > 0 && i != ndlo; prec -= 9) { | ||
486 | i = (i - 1) & 0x3f; | ||
487 | p = lj_strfmt_wuint9(p, nd[i]); | ||
488 | } | ||
489 | if ((sf & STRFMT_T_FP_F) && !(sf & STRFMT_F_ALT)) { | ||
490 | /* %g (and not %#g) - strip trailing zeroes. */ | ||
491 | p += (int32_t)prec & ((int32_t)prec >> 31); | ||
492 | while (p[-1] == '0') p--; | ||
493 | if (p[-1] == '.') p--; | ||
494 | } else { | ||
495 | /* %e (or %#g) - emit trailing zeroes. */ | ||
496 | while ((int32_t)prec > 0) { *p++ = '0'; prec--; } | ||
497 | p += (int32_t)prec; | ||
498 | } | ||
499 | } else { | ||
500 | p++; | ||
501 | } | ||
502 | *p++ = (sf & STRFMT_F_UPPER) ? 'E' : 'e'; | ||
503 | *p++ = eprefix; /* + or - */ | ||
504 | if (nde < 10) *p++ = '0'; /* Always at least two digits of exponent. */ | ||
505 | p = lj_strfmt_wint(p, nde); | ||
506 | } else { | ||
507 | /* %f (or, shortly, %g in %f style) */ | ||
508 | if (prec < (MSize)(0x3f & -(int32_t)ndlo) * 9) { | ||
509 | /* Precision is sufficiently low as to maybe require rounding. */ | ||
510 | ndhi = nd_add_m10e(nd, ndhi, 5, 0 - prec - 1); | ||
511 | } | ||
512 | g_format_like_f: | ||
513 | if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT) && prec && width) { | ||
514 | /* Decrease precision in order to strip trailing zeroes. */ | ||
515 | if (ndlo) { | ||
516 | /* nd has a fractional part; we need to look at its digits. */ | ||
517 | char tail[9]; | ||
518 | uint32_t maxprec = (64 - ndlo) * 9; | ||
519 | if (prec >= maxprec) prec = maxprec; | ||
520 | else ndlo = 64 - (prec + 8) / 9; | ||
521 | i = prec - ((63 - ndlo) * 9); | ||
522 | lj_strfmt_wuint9(tail, nd[ndlo]); | ||
523 | while (prec && tail[--i] == '0') { | ||
524 | prec--; | ||
525 | if (!i) { | ||
526 | if (ndlo == 63) { prec = 0; break; } | ||
527 | lj_strfmt_wuint9(tail, nd[++ndlo]); | ||
528 | i = 9; | ||
529 | } | ||
530 | } | ||
531 | } else { | ||
532 | /* nd has no fractional part, so precision goes straight to zero. */ | ||
533 | prec = 0; | ||
534 | } | ||
535 | } | ||
536 | len = ndhi * 9 + ndigits_dec(nd[ndhi]) + prec + (prefix != 0) | ||
537 | + ((prec | (sf & STRFMT_F_ALT)) != 0); | ||
538 | if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 8); | ||
539 | if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) { | ||
540 | while (width-- > len) *p++ = ' '; | ||
541 | } | ||
542 | if (prefix) *p++ = prefix; | ||
543 | if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) { | ||
544 | while (width-- > len) *p++ = '0'; | ||
545 | } | ||
546 | /* Emit integer part. */ | ||
547 | p = lj_strfmt_wint(p, nd[ndhi]); | ||
548 | i = ndhi; | ||
549 | while (i) p = lj_strfmt_wuint9(p, nd[--i]); | ||
550 | if ((prec | (sf & STRFMT_F_ALT))) { | ||
551 | /* Emit fractional part. */ | ||
552 | *p++ = '.'; | ||
553 | /* Emit chunks of 9 digits (this may emit 8 digits too many). */ | ||
554 | while ((int32_t)prec > 0 && i != ndlo) { | ||
555 | i = (i - 1) & 0x3f; | ||
556 | p = lj_strfmt_wuint9(p, nd[i]); | ||
557 | prec -= 9; | ||
558 | } | ||
559 | if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT)) { | ||
560 | /* %g (and not %#g) - strip trailing zeroes. */ | ||
561 | p += (int32_t)prec & ((int32_t)prec >> 31); | ||
562 | while (p[-1] == '0') p--; | ||
563 | if (p[-1] == '.') p--; | ||
564 | } else { | ||
565 | /* %f (or %#g) - emit trailing zeroes. */ | ||
566 | while ((int32_t)prec > 0) { *p++ = '0'; prec--; } | ||
567 | p += (int32_t)prec; | ||
568 | } | ||
569 | } | ||
570 | } | ||
571 | } | ||
572 | if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' '; | ||
573 | return p; | ||
574 | } | ||
575 | |||
576 | /* Add formatted floating-point number to buffer. */ | ||
577 | SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n) | ||
578 | { | ||
579 | sb->w = lj_strfmt_wfnum(sb, sf, n, NULL); | ||
580 | return sb; | ||
581 | } | ||
582 | |||
583 | /* -- Conversions to strings ---------------------------------------------- */ | ||
584 | |||
585 | /* Convert number to string. */ | ||
586 | GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o) | ||
587 | { | ||
588 | char buf[STRFMT_MAXBUF_NUM]; | ||
589 | MSize len = (MSize)(lj_strfmt_wfnum(NULL, STRFMT_G14, o->n, buf) - buf); | ||
590 | return lj_str_new(L, buf, len); | ||
591 | } | ||
592 | |||
diff --git a/src/lj_strscan.c b/src/lj_strscan.c index 914cfb7a..1d1c1c74 100644 --- a/src/lj_strscan.c +++ b/src/lj_strscan.c | |||
@@ -80,7 +80,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg) | |||
80 | /* Avoid double rounding for denormals. */ | 80 | /* Avoid double rounding for denormals. */ |
81 | if (LJ_UNLIKELY(ex2 <= -1075 && x != 0)) { | 81 | if (LJ_UNLIKELY(ex2 <= -1075 && x != 0)) { |
82 | /* NYI: all of this generates way too much code on 32 bit CPUs. */ | 82 | /* NYI: all of this generates way too much code on 32 bit CPUs. */ |
83 | #if defined(__GNUC__) && LJ_64 | 83 | #if (defined(__GNUC__) || defined(__clang__)) && LJ_64 |
84 | int32_t b = (int32_t)(__builtin_clzll(x)^63); | 84 | int32_t b = (int32_t)(__builtin_clzll(x)^63); |
85 | #else | 85 | #else |
86 | int32_t b = (x>>32) ? 32+(int32_t)lj_fls((uint32_t)(x>>32)) : | 86 | int32_t b = (x>>32) ? 32+(int32_t)lj_fls((uint32_t)(x>>32)) : |
@@ -94,7 +94,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg) | |||
94 | } | 94 | } |
95 | 95 | ||
96 | /* Convert to double using a signed int64_t conversion, then rescale. */ | 96 | /* Convert to double using a signed int64_t conversion, then rescale. */ |
97 | lua_assert((int64_t)x >= 0); | 97 | lj_assertX((int64_t)x >= 0, "bad double conversion"); |
98 | n = (double)(int64_t)x; | 98 | n = (double)(int64_t)x; |
99 | if (neg) n = -n; | 99 | if (neg) n = -n; |
100 | if (ex2) n = ldexp(n, ex2); | 100 | if (ex2) n = ldexp(n, ex2); |
@@ -142,7 +142,7 @@ static StrScanFmt strscan_hex(const uint8_t *p, TValue *o, | |||
142 | break; | 142 | break; |
143 | } | 143 | } |
144 | 144 | ||
145 | /* Reduce range then convert to double. */ | 145 | /* Reduce range, then convert to double. */ |
146 | if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; } | 146 | if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; } |
147 | strscan_double(x, o, ex2, neg); | 147 | strscan_double(x, o, ex2, neg); |
148 | return fmt; | 148 | return fmt; |
@@ -264,7 +264,7 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o, | |||
264 | uint32_t hi = 0, lo = (uint32_t)(xip-xi); | 264 | uint32_t hi = 0, lo = (uint32_t)(xip-xi); |
265 | int32_t ex2 = 0, idig = (int32_t)lo + (ex10 >> 1); | 265 | int32_t ex2 = 0, idig = (int32_t)lo + (ex10 >> 1); |
266 | 266 | ||
267 | lua_assert(lo > 0 && (ex10 & 1) == 0); | 267 | lj_assertX(lo > 0 && (ex10 & 1) == 0, "bad lo %d ex10 %d", lo, ex10); |
268 | 268 | ||
269 | /* Handle simple overflow/underflow. */ | 269 | /* Handle simple overflow/underflow. */ |
270 | if (idig > 310/2) { if (neg) setminfV(o); else setpinfV(o); return fmt; } | 270 | if (idig > 310/2) { if (neg) setminfV(o); else setpinfV(o); return fmt; } |
@@ -328,10 +328,55 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o, | |||
328 | return fmt; | 328 | return fmt; |
329 | } | 329 | } |
330 | 330 | ||
331 | /* Parse binary number. */ | ||
332 | static StrScanFmt strscan_bin(const uint8_t *p, TValue *o, | ||
333 | StrScanFmt fmt, uint32_t opt, | ||
334 | int32_t ex2, int32_t neg, uint32_t dig) | ||
335 | { | ||
336 | uint64_t x = 0; | ||
337 | uint32_t i; | ||
338 | |||
339 | if (ex2 || dig > 64) return STRSCAN_ERROR; | ||
340 | |||
341 | /* Scan binary digits. */ | ||
342 | for (i = dig; i; i--, p++) { | ||
343 | if ((*p & ~1) != '0') return STRSCAN_ERROR; | ||
344 | x = (x << 1) | (*p & 1); | ||
345 | } | ||
346 | |||
347 | /* Format-specific handling. */ | ||
348 | switch (fmt) { | ||
349 | case STRSCAN_INT: | ||
350 | if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) { | ||
351 | o->i = neg ? -(int32_t)x : (int32_t)x; | ||
352 | return STRSCAN_INT; /* Fast path for 32 bit integers. */ | ||
353 | } | ||
354 | if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; break; } | ||
355 | /* fallthrough */ | ||
356 | case STRSCAN_U32: | ||
357 | if (dig > 32) return STRSCAN_ERROR; | ||
358 | o->i = neg ? -(int32_t)x : (int32_t)x; | ||
359 | return STRSCAN_U32; | ||
360 | case STRSCAN_I64: | ||
361 | case STRSCAN_U64: | ||
362 | o->u64 = neg ? (uint64_t)-(int64_t)x : x; | ||
363 | return fmt; | ||
364 | default: | ||
365 | break; | ||
366 | } | ||
367 | |||
368 | /* Reduce range, then convert to double. */ | ||
369 | if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; } | ||
370 | strscan_double(x, o, ex2, neg); | ||
371 | return fmt; | ||
372 | } | ||
373 | |||
331 | /* Scan string containing a number. Returns format. Returns value in o. */ | 374 | /* Scan string containing a number. Returns format. Returns value in o. */ |
332 | StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) | 375 | StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o, |
376 | uint32_t opt) | ||
333 | { | 377 | { |
334 | int32_t neg = 0; | 378 | int32_t neg = 0; |
379 | const uint8_t *pe = p + len; | ||
335 | 380 | ||
336 | /* Remove leading space, parse sign and non-numbers. */ | 381 | /* Remove leading space, parse sign and non-numbers. */ |
337 | if (LJ_UNLIKELY(!lj_char_isdigit(*p))) { | 382 | if (LJ_UNLIKELY(!lj_char_isdigit(*p))) { |
@@ -349,7 +394,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) | |||
349 | p += 3; | 394 | p += 3; |
350 | } | 395 | } |
351 | while (lj_char_isspace(*p)) p++; | 396 | while (lj_char_isspace(*p)) p++; |
352 | if (*p) return STRSCAN_ERROR; | 397 | if (*p || p < pe) return STRSCAN_ERROR; |
353 | o->u64 = tmp.u64; | 398 | o->u64 = tmp.u64; |
354 | return STRSCAN_NUM; | 399 | return STRSCAN_NUM; |
355 | } | 400 | } |
@@ -366,8 +411,12 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) | |||
366 | 411 | ||
367 | /* Determine base and skip leading zeros. */ | 412 | /* Determine base and skip leading zeros. */ |
368 | if (LJ_UNLIKELY(*p <= '0')) { | 413 | if (LJ_UNLIKELY(*p <= '0')) { |
369 | if (*p == '0' && casecmp(p[1], 'x')) | 414 | if (*p == '0') { |
370 | base = 16, cmask = LJ_CHAR_XDIGIT, p += 2; | 415 | if (casecmp(p[1], 'x')) |
416 | base = 16, cmask = LJ_CHAR_XDIGIT, p += 2; | ||
417 | else if (casecmp(p[1], 'b')) | ||
418 | base = 2, cmask = LJ_CHAR_DIGIT, p += 2; | ||
419 | } | ||
371 | for ( ; ; p++) { | 420 | for ( ; ; p++) { |
372 | if (*p == '0') { | 421 | if (*p == '0') { |
373 | hasdig = 1; | 422 | hasdig = 1; |
@@ -396,6 +445,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) | |||
396 | 445 | ||
397 | /* Handle decimal point. */ | 446 | /* Handle decimal point. */ |
398 | if (dp) { | 447 | if (dp) { |
448 | if (base == 2) return STRSCAN_ERROR; | ||
399 | fmt = STRSCAN_NUM; | 449 | fmt = STRSCAN_NUM; |
400 | if (dig) { | 450 | if (dig) { |
401 | ex = (int32_t)(dp-(p-1)); dp = p-1; | 451 | ex = (int32_t)(dp-(p-1)); dp = p-1; |
@@ -406,7 +456,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) | |||
406 | } | 456 | } |
407 | 457 | ||
408 | /* Parse exponent. */ | 458 | /* Parse exponent. */ |
409 | if (casecmp(*p, (uint32_t)(base == 16 ? 'p' : 'e'))) { | 459 | if (base >= 10 && casecmp(*p, (uint32_t)(base == 16 ? 'p' : 'e'))) { |
410 | uint32_t xx; | 460 | uint32_t xx; |
411 | int negx = 0; | 461 | int negx = 0; |
412 | fmt = STRSCAN_NUM; p++; | 462 | fmt = STRSCAN_NUM; p++; |
@@ -445,6 +495,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) | |||
445 | while (lj_char_isspace(*p)) p++; | 495 | while (lj_char_isspace(*p)) p++; |
446 | if (*p) return STRSCAN_ERROR; | 496 | if (*p) return STRSCAN_ERROR; |
447 | } | 497 | } |
498 | if (p < pe) return STRSCAN_ERROR; | ||
448 | 499 | ||
449 | /* Fast path for decimal 32 bit integers. */ | 500 | /* Fast path for decimal 32 bit integers. */ |
450 | if (fmt == STRSCAN_INT && base == 10 && | 501 | if (fmt == STRSCAN_INT && base == 10 && |
@@ -466,6 +517,8 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) | |||
466 | return strscan_oct(sp, o, fmt, neg, dig); | 517 | return strscan_oct(sp, o, fmt, neg, dig); |
467 | if (base == 16) | 518 | if (base == 16) |
468 | fmt = strscan_hex(sp, o, fmt, opt, ex, neg, dig); | 519 | fmt = strscan_hex(sp, o, fmt, opt, ex, neg, dig); |
520 | else if (base == 2) | ||
521 | fmt = strscan_bin(sp, o, fmt, opt, ex, neg, dig); | ||
469 | else | 522 | else |
470 | fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig); | 523 | fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig); |
471 | 524 | ||
@@ -481,18 +534,19 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) | |||
481 | 534 | ||
482 | int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o) | 535 | int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o) |
483 | { | 536 | { |
484 | StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o, | 537 | StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o, |
485 | STRSCAN_OPT_TONUM); | 538 | STRSCAN_OPT_TONUM); |
486 | lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM); | 539 | lj_assertX(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM, "bad scan format"); |
487 | return (fmt != STRSCAN_ERROR); | 540 | return (fmt != STRSCAN_ERROR); |
488 | } | 541 | } |
489 | 542 | ||
490 | #if LJ_DUALNUM | 543 | #if LJ_DUALNUM |
491 | int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o) | 544 | int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o) |
492 | { | 545 | { |
493 | StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o, | 546 | StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o, |
494 | STRSCAN_OPT_TOINT); | 547 | STRSCAN_OPT_TOINT); |
495 | lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT); | 548 | lj_assertX(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT, |
549 | "bad scan format"); | ||
496 | if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM); | 550 | if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM); |
497 | return (fmt != STRSCAN_ERROR); | 551 | return (fmt != STRSCAN_ERROR); |
498 | } | 552 | } |
diff --git a/src/lj_strscan.h b/src/lj_strscan.h index 61ddcb45..8ed31542 100644 --- a/src/lj_strscan.h +++ b/src/lj_strscan.h | |||
@@ -22,7 +22,8 @@ typedef enum { | |||
22 | STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64, | 22 | STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64, |
23 | } StrScanFmt; | 23 | } StrScanFmt; |
24 | 24 | ||
25 | LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt); | 25 | LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o, |
26 | uint32_t opt); | ||
26 | LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o); | 27 | LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o); |
27 | #if LJ_DUALNUM | 28 | #if LJ_DUALNUM |
28 | LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o); | 29 | LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o); |
diff --git a/src/lj_tab.c b/src/lj_tab.c index c5b6bcbf..c3609b38 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c | |||
@@ -16,25 +16,10 @@ | |||
16 | 16 | ||
17 | /* -- Object hashing ------------------------------------------------------ */ | 17 | /* -- Object hashing ------------------------------------------------------ */ |
18 | 18 | ||
19 | /* Hash values are masked with the table hash mask and used as an index. */ | ||
20 | static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash) | ||
21 | { | ||
22 | Node *n = noderef(t->node); | ||
23 | return &n[hash & t->hmask]; | ||
24 | } | ||
25 | |||
26 | /* String hashes are precomputed when they are interned. */ | ||
27 | #define hashstr(t, s) hashmask(t, (s)->hash) | ||
28 | |||
29 | #define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) | ||
30 | #define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) | ||
31 | #define hashptr(t, p) hashlohi((t), u32ptr(p), u32ptr(p) + HASH_BIAS) | ||
32 | #define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS) | ||
33 | |||
34 | /* Hash an arbitrary key and return its anchor position in the hash table. */ | 19 | /* Hash an arbitrary key and return its anchor position in the hash table. */ |
35 | static Node *hashkey(const GCtab *t, cTValue *key) | 20 | static Node *hashkey(const GCtab *t, cTValue *key) |
36 | { | 21 | { |
37 | lua_assert(!tvisint(key)); | 22 | lj_assertX(!tvisint(key), "attempt to hash integer"); |
38 | if (tvisstr(key)) | 23 | if (tvisstr(key)) |
39 | return hashstr(t, strV(key)); | 24 | return hashstr(t, strV(key)); |
40 | else if (tvisnum(key)) | 25 | else if (tvisnum(key)) |
@@ -53,13 +38,13 @@ static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t hbits) | |||
53 | { | 38 | { |
54 | uint32_t hsize; | 39 | uint32_t hsize; |
55 | Node *node; | 40 | Node *node; |
56 | lua_assert(hbits != 0); | 41 | lj_assertL(hbits != 0, "zero hash size"); |
57 | if (hbits > LJ_MAX_HBITS) | 42 | if (hbits > LJ_MAX_HBITS) |
58 | lj_err_msg(L, LJ_ERR_TABOV); | 43 | lj_err_msg(L, LJ_ERR_TABOV); |
59 | hsize = 1u << hbits; | 44 | hsize = 1u << hbits; |
60 | node = lj_mem_newvec(L, hsize, Node); | 45 | node = lj_mem_newvec(L, hsize, Node); |
61 | setmref(node->freetop, &node[hsize]); | ||
62 | setmref(t->node, node); | 46 | setmref(t->node, node); |
47 | setfreetop(t, node, &node[hsize]); | ||
63 | t->hmask = hsize-1; | 48 | t->hmask = hsize-1; |
64 | } | 49 | } |
65 | 50 | ||
@@ -74,7 +59,7 @@ static LJ_AINLINE void clearhpart(GCtab *t) | |||
74 | { | 59 | { |
75 | uint32_t i, hmask = t->hmask; | 60 | uint32_t i, hmask = t->hmask; |
76 | Node *node = noderef(t->node); | 61 | Node *node = noderef(t->node); |
77 | lua_assert(t->hmask != 0); | 62 | lj_assertX(t->hmask != 0, "empty hash part"); |
78 | for (i = 0; i <= hmask; i++) { | 63 | for (i = 0; i <= hmask; i++) { |
79 | Node *n = &node[i]; | 64 | Node *n = &node[i]; |
80 | setmref(n->next, NULL); | 65 | setmref(n->next, NULL); |
@@ -98,7 +83,8 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits) | |||
98 | GCtab *t; | 83 | GCtab *t; |
99 | /* First try to colocate the array part. */ | 84 | /* First try to colocate the array part. */ |
100 | if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) { | 85 | if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) { |
101 | lua_assert((sizeof(GCtab) & 7) == 0); | 86 | Node *nilnode; |
87 | lj_assertL((sizeof(GCtab) & 7) == 0, "bad GCtab size"); | ||
102 | t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize)); | 88 | t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize)); |
103 | t->gct = ~LJ_TTAB; | 89 | t->gct = ~LJ_TTAB; |
104 | t->nomm = (uint8_t)~0; | 90 | t->nomm = (uint8_t)~0; |
@@ -107,8 +93,13 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits) | |||
107 | setgcrefnull(t->metatable); | 93 | setgcrefnull(t->metatable); |
108 | t->asize = asize; | 94 | t->asize = asize; |
109 | t->hmask = 0; | 95 | t->hmask = 0; |
110 | setmref(t->node, &G(L)->nilnode); | 96 | nilnode = &G(L)->nilnode; |
97 | setmref(t->node, nilnode); | ||
98 | #if LJ_GC64 | ||
99 | setmref(t->freetop, nilnode); | ||
100 | #endif | ||
111 | } else { /* Otherwise separately allocate the array part. */ | 101 | } else { /* Otherwise separately allocate the array part. */ |
102 | Node *nilnode; | ||
112 | t = lj_mem_newobj(L, GCtab); | 103 | t = lj_mem_newobj(L, GCtab); |
113 | t->gct = ~LJ_TTAB; | 104 | t->gct = ~LJ_TTAB; |
114 | t->nomm = (uint8_t)~0; | 105 | t->nomm = (uint8_t)~0; |
@@ -117,7 +108,11 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits) | |||
117 | setgcrefnull(t->metatable); | 108 | setgcrefnull(t->metatable); |
118 | t->asize = 0; /* In case the array allocation fails. */ | 109 | t->asize = 0; /* In case the array allocation fails. */ |
119 | t->hmask = 0; | 110 | t->hmask = 0; |
120 | setmref(t->node, &G(L)->nilnode); | 111 | nilnode = &G(L)->nilnode; |
112 | setmref(t->node, nilnode); | ||
113 | #if LJ_GC64 | ||
114 | setmref(t->freetop, nilnode); | ||
115 | #endif | ||
121 | if (asize > 0) { | 116 | if (asize > 0) { |
122 | if (asize > LJ_MAX_ASIZE) | 117 | if (asize > LJ_MAX_ASIZE) |
123 | lj_err_msg(L, LJ_ERR_TABOV); | 118 | lj_err_msg(L, LJ_ERR_TABOV); |
@@ -149,6 +144,12 @@ GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits) | |||
149 | return t; | 144 | return t; |
150 | } | 145 | } |
151 | 146 | ||
147 | /* The API of this function conforms to lua_createtable(). */ | ||
148 | GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h) | ||
149 | { | ||
150 | return lj_tab_new(L, (uint32_t)(a > 0 ? a+1 : 0), hsize2hbits(h)); | ||
151 | } | ||
152 | |||
152 | #if LJ_HASJIT | 153 | #if LJ_HASJIT |
153 | GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize) | 154 | GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize) |
154 | { | 155 | { |
@@ -165,7 +166,8 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) | |||
165 | GCtab *t; | 166 | GCtab *t; |
166 | uint32_t asize, hmask; | 167 | uint32_t asize, hmask; |
167 | t = newtab(L, kt->asize, kt->hmask > 0 ? lj_fls(kt->hmask)+1 : 0); | 168 | t = newtab(L, kt->asize, kt->hmask > 0 ? lj_fls(kt->hmask)+1 : 0); |
168 | lua_assert(kt->asize == t->asize && kt->hmask == t->hmask); | 169 | lj_assertL(kt->asize == t->asize && kt->hmask == t->hmask, |
170 | "mismatched size of table and template"); | ||
169 | t->nomm = 0; /* Keys with metamethod names may be present. */ | 171 | t->nomm = 0; /* Keys with metamethod names may be present. */ |
170 | asize = kt->asize; | 172 | asize = kt->asize; |
171 | if (asize > 0) { | 173 | if (asize > 0) { |
@@ -185,7 +187,7 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) | |||
185 | Node *node = noderef(t->node); | 187 | Node *node = noderef(t->node); |
186 | Node *knode = noderef(kt->node); | 188 | Node *knode = noderef(kt->node); |
187 | ptrdiff_t d = (char *)node - (char *)knode; | 189 | ptrdiff_t d = (char *)node - (char *)knode; |
188 | setmref(node->freetop, (Node *)((char *)noderef(knode->freetop) + d)); | 190 | setfreetop(t, node, (Node *)((char *)getfreetop(kt, knode) + d)); |
189 | for (i = 0; i <= hmask; i++) { | 191 | for (i = 0; i <= hmask; i++) { |
190 | Node *kn = &knode[i]; | 192 | Node *kn = &knode[i]; |
191 | Node *n = &node[i]; | 193 | Node *n = &node[i]; |
@@ -198,6 +200,17 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) | |||
198 | return t; | 200 | return t; |
199 | } | 201 | } |
200 | 202 | ||
203 | /* Clear a table. */ | ||
204 | void LJ_FASTCALL lj_tab_clear(GCtab *t) | ||
205 | { | ||
206 | clearapart(t); | ||
207 | if (t->hmask > 0) { | ||
208 | Node *node = noderef(t->node); | ||
209 | setfreetop(t, node, &node[t->hmask+1]); | ||
210 | clearhpart(t); | ||
211 | } | ||
212 | } | ||
213 | |||
201 | /* Free a table. */ | 214 | /* Free a table. */ |
202 | void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t) | 215 | void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t) |
203 | { | 216 | { |
@@ -214,7 +227,7 @@ void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t) | |||
214 | /* -- Table resizing ------------------------------------------------------ */ | 227 | /* -- Table resizing ------------------------------------------------------ */ |
215 | 228 | ||
216 | /* Resize a table to fit the new array/hash part sizes. */ | 229 | /* Resize a table to fit the new array/hash part sizes. */ |
217 | static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) | 230 | void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) |
218 | { | 231 | { |
219 | Node *oldnode = noderef(t->node); | 232 | Node *oldnode = noderef(t->node); |
220 | uint32_t oldasize = t->asize; | 233 | uint32_t oldasize = t->asize; |
@@ -247,6 +260,9 @@ static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) | |||
247 | } else { | 260 | } else { |
248 | global_State *g = G(L); | 261 | global_State *g = G(L); |
249 | setmref(t->node, &g->nilnode); | 262 | setmref(t->node, &g->nilnode); |
263 | #if LJ_GC64 | ||
264 | setmref(t->freetop, &g->nilnode); | ||
265 | #endif | ||
250 | t->hmask = 0; | 266 | t->hmask = 0; |
251 | } | 267 | } |
252 | if (asize < oldasize) { /* Array part shrinks? */ | 268 | if (asize < oldasize) { /* Array part shrinks? */ |
@@ -276,7 +292,7 @@ static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) | |||
276 | 292 | ||
277 | static uint32_t countint(cTValue *key, uint32_t *bins) | 293 | static uint32_t countint(cTValue *key, uint32_t *bins) |
278 | { | 294 | { |
279 | lua_assert(!tvisint(key)); | 295 | lj_assertX(!tvisint(key), "bad integer key"); |
280 | if (tvisnum(key)) { | 296 | if (tvisnum(key)) { |
281 | lua_Number nk = numV(key); | 297 | lua_Number nk = numV(key); |
282 | int32_t k = lj_num2int(nk); | 298 | int32_t k = lj_num2int(nk); |
@@ -348,7 +364,7 @@ static void rehashtab(lua_State *L, GCtab *t, cTValue *ek) | |||
348 | asize += countint(ek, bins); | 364 | asize += countint(ek, bins); |
349 | na = bestasize(bins, &asize); | 365 | na = bestasize(bins, &asize); |
350 | total -= na; | 366 | total -= na; |
351 | resizetab(L, t, asize, hsize2hbits(total)); | 367 | lj_tab_resize(L, t, asize, hsize2hbits(total)); |
352 | } | 368 | } |
353 | 369 | ||
354 | #if LJ_HASFFI | 370 | #if LJ_HASFFI |
@@ -360,7 +376,7 @@ void lj_tab_rehash(lua_State *L, GCtab *t) | |||
360 | 376 | ||
361 | void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize) | 377 | void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize) |
362 | { | 378 | { |
363 | resizetab(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0); | 379 | lj_tab_resize(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0); |
364 | } | 380 | } |
365 | 381 | ||
366 | /* -- Table getters ------------------------------------------------------- */ | 382 | /* -- Table getters ------------------------------------------------------- */ |
@@ -378,7 +394,7 @@ cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key) | |||
378 | return NULL; | 394 | return NULL; |
379 | } | 395 | } |
380 | 396 | ||
381 | cTValue *lj_tab_getstr(GCtab *t, GCstr *key) | 397 | cTValue *lj_tab_getstr(GCtab *t, const GCstr *key) |
382 | { | 398 | { |
383 | Node *n = hashstr(t, key); | 399 | Node *n = hashstr(t, key); |
384 | do { | 400 | do { |
@@ -428,16 +444,17 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) | |||
428 | Node *n = hashkey(t, key); | 444 | Node *n = hashkey(t, key); |
429 | if (!tvisnil(&n->val) || t->hmask == 0) { | 445 | if (!tvisnil(&n->val) || t->hmask == 0) { |
430 | Node *nodebase = noderef(t->node); | 446 | Node *nodebase = noderef(t->node); |
431 | Node *collide, *freenode = noderef(nodebase->freetop); | 447 | Node *collide, *freenode = getfreetop(t, nodebase); |
432 | lua_assert(freenode >= nodebase && freenode <= nodebase+t->hmask+1); | 448 | lj_assertL(freenode >= nodebase && freenode <= nodebase+t->hmask+1, |
449 | "bad freenode"); | ||
433 | do { | 450 | do { |
434 | if (freenode == nodebase) { /* No free node found? */ | 451 | if (freenode == nodebase) { /* No free node found? */ |
435 | rehashtab(L, t, key); /* Rehash table. */ | 452 | rehashtab(L, t, key); /* Rehash table. */ |
436 | return lj_tab_set(L, t, key); /* Retry key insertion. */ | 453 | return lj_tab_set(L, t, key); /* Retry key insertion. */ |
437 | } | 454 | } |
438 | } while (!tvisnil(&(--freenode)->key)); | 455 | } while (!tvisnil(&(--freenode)->key)); |
439 | setmref(nodebase->freetop, freenode); | 456 | setfreetop(t, nodebase, freenode); |
440 | lua_assert(freenode != &G(L)->nilnode); | 457 | lj_assertL(freenode != &G(L)->nilnode, "store to fallback hash"); |
441 | collide = hashkey(t, &n->key); | 458 | collide = hashkey(t, &n->key); |
442 | if (collide != n) { /* Colliding node not the main node? */ | 459 | if (collide != n) { /* Colliding node not the main node? */ |
443 | while (noderef(collide->next) != n) /* Find predecessor. */ | 460 | while (noderef(collide->next) != n) /* Find predecessor. */ |
@@ -493,7 +510,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) | |||
493 | if (LJ_UNLIKELY(tvismzero(&n->key))) | 510 | if (LJ_UNLIKELY(tvismzero(&n->key))) |
494 | n->key.u64 = 0; | 511 | n->key.u64 = 0; |
495 | lj_gc_anybarriert(L, t); | 512 | lj_gc_anybarriert(L, t); |
496 | lua_assert(tvisnil(&n->val)); | 513 | lj_assertL(tvisnil(&n->val), "new hash slot is not empty"); |
497 | return &n->val; | 514 | return &n->val; |
498 | } | 515 | } |
499 | 516 | ||
@@ -510,7 +527,7 @@ TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key) | |||
510 | return lj_tab_newkey(L, t, &k); | 527 | return lj_tab_newkey(L, t, &k); |
511 | } | 528 | } |
512 | 529 | ||
513 | TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key) | 530 | TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key) |
514 | { | 531 | { |
515 | TValue k; | 532 | TValue k; |
516 | Node *n = hashstr(t, key); | 533 | Node *n = hashstr(t, key); |
@@ -551,103 +568,126 @@ TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key) | |||
551 | 568 | ||
552 | /* -- Table traversal ----------------------------------------------------- */ | 569 | /* -- Table traversal ----------------------------------------------------- */ |
553 | 570 | ||
554 | /* Get the traversal index of a key. */ | 571 | /* Table traversal indexes: |
555 | static uint32_t keyindex(lua_State *L, GCtab *t, cTValue *key) | 572 | ** |
573 | ** Array key index: [0 .. t->asize-1] | ||
574 | ** Hash key index: [t->asize .. t->asize+t->hmask] | ||
575 | ** Invalid key: ~0 | ||
576 | */ | ||
577 | |||
578 | /* Get the successor traversal index of a key. */ | ||
579 | uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key) | ||
556 | { | 580 | { |
557 | TValue tmp; | 581 | TValue tmp; |
558 | if (tvisint(key)) { | 582 | if (tvisint(key)) { |
559 | int32_t k = intV(key); | 583 | int32_t k = intV(key); |
560 | if ((uint32_t)k < t->asize) | 584 | if ((uint32_t)k < t->asize) |
561 | return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */ | 585 | return (uint32_t)k + 1; |
562 | setnumV(&tmp, (lua_Number)k); | 586 | setnumV(&tmp, (lua_Number)k); |
563 | key = &tmp; | 587 | key = &tmp; |
564 | } else if (tvisnum(key)) { | 588 | } else if (tvisnum(key)) { |
565 | lua_Number nk = numV(key); | 589 | lua_Number nk = numV(key); |
566 | int32_t k = lj_num2int(nk); | 590 | int32_t k = lj_num2int(nk); |
567 | if ((uint32_t)k < t->asize && nk == (lua_Number)k) | 591 | if ((uint32_t)k < t->asize && nk == (lua_Number)k) |
568 | return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */ | 592 | return (uint32_t)k + 1; |
569 | } | 593 | } |
570 | if (!tvisnil(key)) { | 594 | if (!tvisnil(key)) { |
571 | Node *n = hashkey(t, key); | 595 | Node *n = hashkey(t, key); |
572 | do { | 596 | do { |
573 | if (lj_obj_equal(&n->key, key)) | 597 | if (lj_obj_equal(&n->key, key)) |
574 | return t->asize + (uint32_t)(n - noderef(t->node)); | 598 | return t->asize + (uint32_t)((n+1) - noderef(t->node)); |
575 | /* Hash key indexes: [t->asize..t->asize+t->nmask] */ | ||
576 | } while ((n = nextnode(n))); | 599 | } while ((n = nextnode(n))); |
577 | if (key->u32.hi == 0xfffe7fff) /* ITERN was despecialized while running. */ | 600 | if (key->u32.hi == LJ_KEYINDEX) /* Despecialized ITERN while running. */ |
578 | return key->u32.lo - 1; | 601 | return key->u32.lo; |
579 | lj_err_msg(L, LJ_ERR_NEXTIDX); | 602 | return ~0u; /* Invalid key to next. */ |
580 | return 0; /* unreachable */ | ||
581 | } | 603 | } |
582 | return ~0u; /* A nil key starts the traversal. */ | 604 | return 0; /* A nil key starts the traversal. */ |
583 | } | 605 | } |
584 | 606 | ||
585 | /* Advance to the next step in a table traversal. */ | 607 | /* Get the next key/value pair of a table traversal. */ |
586 | int lj_tab_next(lua_State *L, GCtab *t, TValue *key) | 608 | int lj_tab_next(GCtab *t, cTValue *key, TValue *o) |
587 | { | 609 | { |
588 | uint32_t i = keyindex(L, t, key); /* Find predecessor key index. */ | 610 | uint32_t idx = lj_tab_keyindex(t, key); /* Find successor index of key. */ |
589 | for (i++; i < t->asize; i++) /* First traverse the array keys. */ | 611 | /* First traverse the array part. */ |
590 | if (!tvisnil(arrayslot(t, i))) { | 612 | for (; idx < t->asize; idx++) { |
591 | setintV(key, i); | 613 | cTValue *a = arrayslot(t, idx); |
592 | copyTV(L, key+1, arrayslot(t, i)); | 614 | if (LJ_LIKELY(!tvisnil(a))) { |
615 | setintV(o, idx); | ||
616 | o[1] = *a; | ||
593 | return 1; | 617 | return 1; |
594 | } | 618 | } |
595 | for (i -= t->asize; i <= t->hmask; i++) { /* Then traverse the hash keys. */ | 619 | } |
596 | Node *n = &noderef(t->node)[i]; | 620 | idx -= t->asize; |
621 | /* Then traverse the hash part. */ | ||
622 | for (; idx <= t->hmask; idx++) { | ||
623 | Node *n = &noderef(t->node)[idx]; | ||
597 | if (!tvisnil(&n->val)) { | 624 | if (!tvisnil(&n->val)) { |
598 | copyTV(L, key, &n->key); | 625 | o[0] = n->key; |
599 | copyTV(L, key+1, &n->val); | 626 | o[1] = n->val; |
600 | return 1; | 627 | return 1; |
601 | } | 628 | } |
602 | } | 629 | } |
603 | return 0; /* End of traversal. */ | 630 | return (int32_t)idx < 0 ? -1 : 0; /* Invalid key or end of traversal. */ |
604 | } | 631 | } |
605 | 632 | ||
606 | /* -- Table length calculation -------------------------------------------- */ | 633 | /* -- Table length calculation -------------------------------------------- */ |
607 | 634 | ||
608 | static MSize unbound_search(GCtab *t, MSize j) | 635 | /* Compute table length. Slow path with mixed array/hash lookups. */ |
636 | LJ_NOINLINE static MSize tab_len_slow(GCtab *t, size_t hi) | ||
609 | { | 637 | { |
610 | cTValue *tv; | 638 | cTValue *tv; |
611 | MSize i = j; /* i is zero or a present index */ | 639 | size_t lo = hi; |
612 | j++; | 640 | hi++; |
613 | /* find `i' and `j' such that i is present and j is not */ | 641 | /* Widening search for an upper bound. */ |
614 | while ((tv = lj_tab_getint(t, (int32_t)j)) && !tvisnil(tv)) { | 642 | while ((tv = lj_tab_getint(t, (int32_t)hi)) && !tvisnil(tv)) { |
615 | i = j; | 643 | lo = hi; |
616 | j *= 2; | 644 | hi += hi; |
617 | if (j > (MSize)(INT_MAX-2)) { /* overflow? */ | 645 | if (hi > (size_t)(INT_MAX-2)) { /* Punt and do a linear search. */ |
618 | /* table was built with bad purposes: resort to linear search */ | 646 | lo = 1; |
619 | i = 1; | 647 | while ((tv = lj_tab_getint(t, (int32_t)lo)) && !tvisnil(tv)) lo++; |
620 | while ((tv = lj_tab_getint(t, (int32_t)i)) && !tvisnil(tv)) i++; | 648 | return (MSize)(lo - 1); |
621 | return i - 1; | ||
622 | } | 649 | } |
623 | } | 650 | } |
624 | /* now do a binary search between them */ | 651 | /* Binary search to find a non-nil to nil transition. */ |
625 | while (j - i > 1) { | 652 | while (hi - lo > 1) { |
626 | MSize m = (i+j)/2; | 653 | size_t mid = (lo+hi) >> 1; |
627 | cTValue *tvb = lj_tab_getint(t, (int32_t)m); | 654 | cTValue *tvb = lj_tab_getint(t, (int32_t)mid); |
628 | if (tvb && !tvisnil(tvb)) i = m; else j = m; | 655 | if (tvb && !tvisnil(tvb)) lo = mid; else hi = mid; |
629 | } | 656 | } |
630 | return i; | 657 | return (MSize)lo; |
631 | } | 658 | } |
632 | 659 | ||
633 | /* | 660 | /* Compute table length. Fast path. */ |
634 | ** Try to find a boundary in table `t'. A `boundary' is an integer index | ||
635 | ** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil). | ||
636 | */ | ||
637 | MSize LJ_FASTCALL lj_tab_len(GCtab *t) | 661 | MSize LJ_FASTCALL lj_tab_len(GCtab *t) |
638 | { | 662 | { |
639 | MSize j = (MSize)t->asize; | 663 | size_t hi = (size_t)t->asize; |
640 | if (j > 1 && tvisnil(arrayslot(t, j-1))) { | 664 | if (hi) hi--; |
641 | MSize i = 1; | 665 | /* In a growing array the last array element is very likely nil. */ |
642 | while (j - i > 1) { | 666 | if (hi > 0 && LJ_LIKELY(tvisnil(arrayslot(t, hi)))) { |
643 | MSize m = (i+j)/2; | 667 | /* Binary search to find a non-nil to nil transition in the array. */ |
644 | if (tvisnil(arrayslot(t, m-1))) j = m; else i = m; | 668 | size_t lo = 0; |
669 | while (hi - lo > 1) { | ||
670 | size_t mid = (lo+hi) >> 1; | ||
671 | if (tvisnil(arrayslot(t, mid))) hi = mid; else lo = mid; | ||
645 | } | 672 | } |
646 | return i-1; | 673 | return (MSize)lo; |
647 | } | 674 | } |
648 | if (j) j--; | 675 | /* Without a hash part, there's an implicit nil after the last element. */ |
649 | if (t->hmask <= 0) | 676 | return t->hmask ? tab_len_slow(t, hi) : (MSize)hi; |
650 | return j; | ||
651 | return unbound_search(t, j); | ||
652 | } | 677 | } |
653 | 678 | ||
679 | #if LJ_HASJIT | ||
680 | /* Verify hinted table length or compute it. */ | ||
681 | MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint) | ||
682 | { | ||
683 | size_t asize = (size_t)t->asize; | ||
684 | cTValue *tv = arrayslot(t, hint); | ||
685 | if (LJ_LIKELY(hint+1 < asize)) { | ||
686 | if (LJ_LIKELY(!tvisnil(tv) && tvisnil(tv+1))) return (MSize)hint; | ||
687 | } else if (hint+1 <= asize && LJ_LIKELY(t->hmask == 0) && !tvisnil(tv)) { | ||
688 | return (MSize)hint; | ||
689 | } | ||
690 | return lj_tab_len(t); | ||
691 | } | ||
692 | #endif | ||
693 | |||
diff --git a/src/lj_tab.h b/src/lj_tab.h index 4a106873..2a3f76bf 100644 --- a/src/lj_tab.h +++ b/src/lj_tab.h | |||
@@ -31,30 +31,52 @@ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi) | |||
31 | return hi; | 31 | return hi; |
32 | } | 32 | } |
33 | 33 | ||
34 | /* Hash values are masked with the table hash mask and used as an index. */ | ||
35 | static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash) | ||
36 | { | ||
37 | Node *n = noderef(t->node); | ||
38 | return &n[hash & t->hmask]; | ||
39 | } | ||
40 | |||
41 | /* String IDs are generated when a string is interned. */ | ||
42 | #define hashstr(t, s) hashmask(t, (s)->sid) | ||
43 | |||
44 | #define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) | ||
45 | #define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) | ||
46 | #if LJ_GC64 | ||
47 | #define hashgcref(t, r) \ | ||
48 | hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32)) | ||
49 | #else | ||
50 | #define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS) | ||
51 | #endif | ||
52 | |||
34 | #define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) | 53 | #define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) |
35 | 54 | ||
36 | LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); | 55 | LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); |
56 | LJ_FUNC GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h); | ||
37 | #if LJ_HASJIT | 57 | #if LJ_HASJIT |
38 | LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize); | 58 | LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize); |
39 | #endif | 59 | #endif |
40 | LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt); | 60 | LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt); |
61 | LJ_FUNC void LJ_FASTCALL lj_tab_clear(GCtab *t); | ||
41 | LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); | 62 | LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); |
42 | #if LJ_HASFFI | 63 | #if LJ_HASFFI |
43 | LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t); | 64 | LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t); |
44 | #endif | 65 | #endif |
66 | LJ_FUNC void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits); | ||
45 | LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); | 67 | LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); |
46 | 68 | ||
47 | /* Caveat: all getters except lj_tab_get() can return NULL! */ | 69 | /* Caveat: all getters except lj_tab_get() can return NULL! */ |
48 | 70 | ||
49 | LJ_FUNCA cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key); | 71 | LJ_FUNCA cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key); |
50 | LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, GCstr *key); | 72 | LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, const GCstr *key); |
51 | LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key); | 73 | LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key); |
52 | 74 | ||
53 | /* Caveat: all setters require a write barrier for the stored value. */ | 75 | /* Caveat: all setters require a write barrier for the stored value. */ |
54 | 76 | ||
55 | LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); | 77 | LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); |
56 | LJ_FUNC TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); | 78 | LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); |
57 | LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key); | 79 | LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key); |
58 | LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); | 80 | LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); |
59 | 81 | ||
60 | #define inarray(t, key) ((MSize)(key) < (MSize)(t)->asize) | 82 | #define inarray(t, key) ((MSize)(key) < (MSize)(t)->asize) |
@@ -64,7 +86,11 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); | |||
64 | #define lj_tab_setint(L, t, key) \ | 86 | #define lj_tab_setint(L, t, key) \ |
65 | (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key))) | 87 | (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key))) |
66 | 88 | ||
67 | LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); | 89 | LJ_FUNC uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key); |
90 | LJ_FUNCA int lj_tab_next(GCtab *t, cTValue *key, TValue *o); | ||
68 | LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t); | 91 | LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t); |
92 | #if LJ_HASJIT | ||
93 | LJ_FUNC MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint); | ||
94 | #endif | ||
69 | 95 | ||
70 | #endif | 96 | #endif |
diff --git a/src/lj_target.h b/src/lj_target.h index 75eb965f..19716928 100644 --- a/src/lj_target.h +++ b/src/lj_target.h | |||
@@ -55,7 +55,7 @@ typedef uint32_t RegSP; | |||
55 | /* Bitset for registers. 32 registers suffice for most architectures. | 55 | /* Bitset for registers. 32 registers suffice for most architectures. |
56 | ** Note that one set holds bits for both GPRs and FPRs. | 56 | ** Note that one set holds bits for both GPRs and FPRs. |
57 | */ | 57 | */ |
58 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | 58 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 |
59 | typedef uint64_t RegSet; | 59 | typedef uint64_t RegSet; |
60 | #else | 60 | #else |
61 | typedef uint32_t RegSet; | 61 | typedef uint32_t RegSet; |
@@ -69,7 +69,7 @@ typedef uint32_t RegSet; | |||
69 | #define rset_set(rs, r) (rs |= RID2RSET(r)) | 69 | #define rset_set(rs, r) (rs |= RID2RSET(r)) |
70 | #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) | 70 | #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) |
71 | #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) | 71 | #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) |
72 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | 72 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 |
73 | #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) | 73 | #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) |
74 | #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) | 74 | #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) |
75 | #else | 75 | #else |
@@ -138,6 +138,8 @@ typedef uint32_t RegCost; | |||
138 | #include "lj_target_x86.h" | 138 | #include "lj_target_x86.h" |
139 | #elif LJ_TARGET_ARM | 139 | #elif LJ_TARGET_ARM |
140 | #include "lj_target_arm.h" | 140 | #include "lj_target_arm.h" |
141 | #elif LJ_TARGET_ARM64 | ||
142 | #include "lj_target_arm64.h" | ||
141 | #elif LJ_TARGET_PPC | 143 | #elif LJ_TARGET_PPC |
142 | #include "lj_target_ppc.h" | 144 | #include "lj_target_ppc.h" |
143 | #elif LJ_TARGET_MIPS | 145 | #elif LJ_TARGET_MIPS |
@@ -150,7 +152,8 @@ typedef uint32_t RegCost; | |||
150 | /* Return the address of an exit stub. */ | 152 | /* Return the address of an exit stub. */ |
151 | static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno) | 153 | static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno) |
152 | { | 154 | { |
153 | lua_assert(group[exitno / EXITSTUBS_PER_GROUP] != NULL); | 155 | lj_assertX(group[exitno / EXITSTUBS_PER_GROUP] != NULL, |
156 | "exit stub group for exit %d uninitialized", exitno); | ||
154 | return (char *)group[exitno / EXITSTUBS_PER_GROUP] + | 157 | return (char *)group[exitno / EXITSTUBS_PER_GROUP] + |
155 | EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP); | 158 | EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP); |
156 | } | 159 | } |
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h index 76a30710..48f487a5 100644 --- a/src/lj_target_arm.h +++ b/src/lj_target_arm.h | |||
@@ -211,6 +211,7 @@ typedef enum ARMIns { | |||
211 | /* ARMv6T2 */ | 211 | /* ARMv6T2 */ |
212 | ARMI_MOVW = 0xe3000000, | 212 | ARMI_MOVW = 0xe3000000, |
213 | ARMI_MOVT = 0xe3400000, | 213 | ARMI_MOVT = 0xe3400000, |
214 | ARMI_BFI = 0xe7c00010, | ||
214 | 215 | ||
215 | /* VFP */ | 216 | /* VFP */ |
216 | ARMI_VMOV_D = 0xeeb00b40, | 217 | ARMI_VMOV_D = 0xeeb00b40, |
@@ -243,10 +244,6 @@ typedef enum ARMIns { | |||
243 | ARMI_VCVT_S32_F64 = 0xeebd0bc0, | 244 | ARMI_VCVT_S32_F64 = 0xeebd0bc0, |
244 | ARMI_VCVT_U32_F32 = 0xeebc0ac0, | 245 | ARMI_VCVT_U32_F32 = 0xeebc0ac0, |
245 | ARMI_VCVT_U32_F64 = 0xeebc0bc0, | 246 | ARMI_VCVT_U32_F64 = 0xeebc0bc0, |
246 | ARMI_VCVTR_S32_F32 = 0xeebd0a40, | ||
247 | ARMI_VCVTR_S32_F64 = 0xeebd0b40, | ||
248 | ARMI_VCVTR_U32_F32 = 0xeebc0a40, | ||
249 | ARMI_VCVTR_U32_F64 = 0xeebc0b40, | ||
250 | ARMI_VCVT_F32_S32 = 0xeeb80ac0, | 247 | ARMI_VCVT_F32_S32 = 0xeeb80ac0, |
251 | ARMI_VCVT_F64_S32 = 0xeeb80bc0, | 248 | ARMI_VCVT_F64_S32 = 0xeeb80bc0, |
252 | ARMI_VCVT_F32_U32 = 0xeeb80a40, | 249 | ARMI_VCVT_F32_U32 = 0xeeb80a40, |
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h new file mode 100644 index 00000000..d45af2e4 --- /dev/null +++ b/src/lj_target_arm64.h | |||
@@ -0,0 +1,336 @@ | |||
1 | /* | ||
2 | ** Definitions for ARM64 CPUs. | ||
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #ifndef _LJ_TARGET_ARM64_H | ||
7 | #define _LJ_TARGET_ARM64_H | ||
8 | |||
9 | /* -- Registers IDs ------------------------------------------------------- */ | ||
10 | |||
11 | #define GPRDEF(_) \ | ||
12 | _(X0) _(X1) _(X2) _(X3) _(X4) _(X5) _(X6) _(X7) \ | ||
13 | _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \ | ||
14 | _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \ | ||
15 | _(X24) _(X25) _(X26) _(X27) _(X28) _(FP) _(LR) _(SP) | ||
16 | #define FPRDEF(_) \ | ||
17 | _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \ | ||
18 | _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) \ | ||
19 | _(D16) _(D17) _(D18) _(D19) _(D20) _(D21) _(D22) _(D23) \ | ||
20 | _(D24) _(D25) _(D26) _(D27) _(D28) _(D29) _(D30) _(D31) | ||
21 | #define VRIDDEF(_) | ||
22 | |||
23 | #define RIDENUM(name) RID_##name, | ||
24 | |||
25 | enum { | ||
26 | GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ | ||
27 | FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ | ||
28 | RID_MAX, | ||
29 | RID_TMP = RID_LR, | ||
30 | RID_ZERO = RID_SP, | ||
31 | |||
32 | /* Calling conventions. */ | ||
33 | RID_RET = RID_X0, | ||
34 | RID_RETLO = RID_X0, | ||
35 | RID_RETHI = RID_X1, | ||
36 | RID_FPRET = RID_D0, | ||
37 | |||
38 | /* These definitions must match with the *.dasc file(s): */ | ||
39 | RID_BASE = RID_X19, /* Interpreter BASE. */ | ||
40 | RID_LPC = RID_X21, /* Interpreter PC. */ | ||
41 | RID_GL = RID_X22, /* Interpreter GL. */ | ||
42 | RID_LREG = RID_X23, /* Interpreter L. */ | ||
43 | |||
44 | /* Register ranges [min, max) and number of registers. */ | ||
45 | RID_MIN_GPR = RID_X0, | ||
46 | RID_MAX_GPR = RID_SP+1, | ||
47 | RID_MIN_FPR = RID_MAX_GPR, | ||
48 | RID_MAX_FPR = RID_D31+1, | ||
49 | RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, | ||
50 | RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR | ||
51 | }; | ||
52 | |||
53 | #define RID_NUM_KREF RID_NUM_GPR | ||
54 | #define RID_MIN_KREF RID_X0 | ||
55 | |||
56 | /* -- Register sets ------------------------------------------------------- */ | ||
57 | |||
58 | /* Make use of all registers, except for x18, fp, lr and sp. */ | ||
59 | #define RSET_FIXED \ | ||
60 | (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)|\ | ||
61 | RID2RSET(RID_GL)) | ||
62 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) | ||
63 | #define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) | ||
64 | #define RSET_ALL (RSET_GPR|RSET_FPR) | ||
65 | #define RSET_INIT RSET_ALL | ||
66 | |||
67 | /* lr is an implicit scratch register. */ | ||
68 | #define RSET_SCRATCH_GPR (RSET_RANGE(RID_X0, RID_X17+1)) | ||
69 | #define RSET_SCRATCH_FPR \ | ||
70 | (RSET_RANGE(RID_D0, RID_D7+1)|RSET_RANGE(RID_D16, RID_D31+1)) | ||
71 | #define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) | ||
72 | #define REGARG_FIRSTGPR RID_X0 | ||
73 | #define REGARG_LASTGPR RID_X7 | ||
74 | #define REGARG_NUMGPR 8 | ||
75 | #define REGARG_FIRSTFPR RID_D0 | ||
76 | #define REGARG_LASTFPR RID_D7 | ||
77 | #define REGARG_NUMFPR 8 | ||
78 | |||
79 | /* -- Spill slots --------------------------------------------------------- */ | ||
80 | |||
81 | /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. | ||
82 | ** | ||
83 | ** SPS_FIXED: Available fixed spill slots in interpreter frame. | ||
84 | ** This definition must match with the vm_arm64.dasc file. | ||
85 | ** Pre-allocate some slots to avoid sp adjust in every root trace. | ||
86 | ** | ||
87 | ** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. | ||
88 | */ | ||
89 | #define SPS_FIXED 4 | ||
90 | #define SPS_FIRST 2 | ||
91 | |||
92 | #define SPOFS_TMP 0 | ||
93 | |||
94 | #define sps_scale(slot) (4 * (int32_t)(slot)) | ||
95 | #define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) | ||
96 | |||
97 | /* -- Exit state ---------------------------------------------------------- */ | ||
98 | |||
99 | /* This definition must match with the *.dasc file(s). */ | ||
100 | typedef struct { | ||
101 | lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ | ||
102 | intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ | ||
103 | int32_t spill[256]; /* Spill slots. */ | ||
104 | } ExitState; | ||
105 | |||
106 | /* Highest exit + 1 indicates stack check. */ | ||
107 | #define EXITSTATE_CHECKEXIT 1 | ||
108 | |||
109 | /* Return the address of a per-trace exit stub. */ | ||
110 | static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) | ||
111 | { | ||
112 | while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */ | ||
113 | return p + 3 + exitno; | ||
114 | } | ||
115 | /* Avoid dependence on lj_jit.h if only including lj_target.h. */ | ||
116 | #define exitstub_trace_addr(T, exitno) \ | ||
117 | exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno)) | ||
118 | |||
119 | /* -- Instructions -------------------------------------------------------- */ | ||
120 | |||
121 | /* ARM64 instructions are always little-endian. Swap for ARM64BE. */ | ||
122 | #if LJ_BE | ||
123 | #define A64I_LE(x) (lj_bswap(x)) | ||
124 | #else | ||
125 | #define A64I_LE(x) (x) | ||
126 | #endif | ||
127 | |||
128 | /* Instruction fields. */ | ||
129 | #define A64F_D(r) (r) | ||
130 | #define A64F_N(r) ((r) << 5) | ||
131 | #define A64F_A(r) ((r) << 10) | ||
132 | #define A64F_M(r) ((r) << 16) | ||
133 | #define A64F_IMMS(x) ((x) << 10) | ||
134 | #define A64F_IMMR(x) ((x) << 16) | ||
135 | #define A64F_U16(x) ((x) << 5) | ||
136 | #define A64F_U12(x) ((x) << 10) | ||
137 | #define A64F_S26(x) (((uint32_t)(x) & 0x03ffffffu)) | ||
138 | #define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5) | ||
139 | #define A64F_S14(x) (((uint32_t)(x) & 0x3fffu) << 5) | ||
140 | #define A64F_S9(x) ((x) << 12) | ||
141 | #define A64F_BIT(x) ((x) << 19) | ||
142 | #define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10)) | ||
143 | #define A64F_EX(ex) (A64I_EX | ((ex) << 13)) | ||
144 | #define A64F_EXSH(ex,x) (A64I_EX | ((ex) << 13) | ((x) << 10)) | ||
145 | #define A64F_FP8(x) ((x) << 13) | ||
146 | #define A64F_CC(cc) ((cc) << 12) | ||
147 | #define A64F_LSL16(x) (((x) / 16) << 21) | ||
148 | #define A64F_BSH(sh) ((sh) << 10) | ||
149 | |||
150 | /* Check for valid field range. */ | ||
151 | #define A64F_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0) | ||
152 | |||
153 | typedef enum A64Ins { | ||
154 | A64I_S = 0x20000000, | ||
155 | A64I_X = 0x80000000, | ||
156 | A64I_EX = 0x00200000, | ||
157 | A64I_ON = 0x00200000, | ||
158 | A64I_K12 = 0x1a000000, | ||
159 | A64I_K13 = 0x18000000, | ||
160 | A64I_LS_U = 0x01000000, | ||
161 | A64I_LS_S = 0x00800000, | ||
162 | A64I_LS_R = 0x01200800, | ||
163 | A64I_LS_SH = 0x00001000, | ||
164 | A64I_LS_UXTWx = 0x00004000, | ||
165 | A64I_LS_SXTWx = 0x0000c000, | ||
166 | A64I_LS_SXTXx = 0x0000e000, | ||
167 | A64I_LS_LSLx = 0x00006000, | ||
168 | |||
169 | A64I_ADDw = 0x0b000000, | ||
170 | A64I_ADDx = 0x8b000000, | ||
171 | A64I_ADDSw = 0x2b000000, | ||
172 | A64I_ADDSx = 0xab000000, | ||
173 | A64I_NEGw = 0x4b0003e0, | ||
174 | A64I_NEGx = 0xcb0003e0, | ||
175 | A64I_SUBw = 0x4b000000, | ||
176 | A64I_SUBx = 0xcb000000, | ||
177 | A64I_SUBSw = 0x6b000000, | ||
178 | A64I_SUBSx = 0xeb000000, | ||
179 | |||
180 | A64I_MULw = 0x1b007c00, | ||
181 | A64I_MULx = 0x9b007c00, | ||
182 | A64I_SMULL = 0x9b207c00, | ||
183 | |||
184 | A64I_ANDw = 0x0a000000, | ||
185 | A64I_ANDx = 0x8a000000, | ||
186 | A64I_ANDSw = 0x6a000000, | ||
187 | A64I_ANDSx = 0xea000000, | ||
188 | A64I_EORw = 0x4a000000, | ||
189 | A64I_EORx = 0xca000000, | ||
190 | A64I_ORRw = 0x2a000000, | ||
191 | A64I_ORRx = 0xaa000000, | ||
192 | A64I_TSTw = 0x6a00001f, | ||
193 | A64I_TSTx = 0xea00001f, | ||
194 | |||
195 | A64I_CMPw = 0x6b00001f, | ||
196 | A64I_CMPx = 0xeb00001f, | ||
197 | A64I_CMNw = 0x2b00001f, | ||
198 | A64I_CMNx = 0xab00001f, | ||
199 | A64I_CCMPw = 0x7a400000, | ||
200 | A64I_CCMPx = 0xfa400000, | ||
201 | A64I_CSELw = 0x1a800000, | ||
202 | A64I_CSELx = 0x9a800000, | ||
203 | |||
204 | A64I_ASRw = 0x13007c00, | ||
205 | A64I_ASRx = 0x9340fc00, | ||
206 | A64I_LSLx = 0xd3400000, | ||
207 | A64I_LSRx = 0xd340fc00, | ||
208 | A64I_SHRw = 0x1ac02000, | ||
209 | A64I_SHRx = 0x9ac02000, /* lsl/lsr/asr/ror x0, x0, x0 */ | ||
210 | A64I_REVw = 0x5ac00800, | ||
211 | A64I_REVx = 0xdac00c00, | ||
212 | |||
213 | A64I_EXTRw = 0x13800000, | ||
214 | A64I_EXTRx = 0x93c00000, | ||
215 | A64I_BFMw = 0x33000000, | ||
216 | A64I_BFMx = 0xb3400000, | ||
217 | A64I_SBFMw = 0x13000000, | ||
218 | A64I_SBFMx = 0x93400000, | ||
219 | A64I_SXTBw = 0x13001c00, | ||
220 | A64I_SXTHw = 0x13003c00, | ||
221 | A64I_SXTW = 0x93407c00, | ||
222 | A64I_UBFMw = 0x53000000, | ||
223 | A64I_UBFMx = 0xd3400000, | ||
224 | A64I_UXTBw = 0x53001c00, | ||
225 | A64I_UXTHw = 0x53003c00, | ||
226 | |||
227 | A64I_MOVw = 0x2a0003e0, | ||
228 | A64I_MOVx = 0xaa0003e0, | ||
229 | A64I_MVNw = 0x2a2003e0, | ||
230 | A64I_MVNx = 0xaa2003e0, | ||
231 | A64I_MOVKw = 0x72800000, | ||
232 | A64I_MOVKx = 0xf2800000, | ||
233 | A64I_MOVZw = 0x52800000, | ||
234 | A64I_MOVZx = 0xd2800000, | ||
235 | A64I_MOVNw = 0x12800000, | ||
236 | A64I_MOVNx = 0x92800000, | ||
237 | |||
238 | A64I_LDRB = 0x39400000, | ||
239 | A64I_LDRH = 0x79400000, | ||
240 | A64I_LDRw = 0xb9400000, | ||
241 | A64I_LDRx = 0xf9400000, | ||
242 | A64I_LDRLw = 0x18000000, | ||
243 | A64I_LDRLx = 0x58000000, | ||
244 | A64I_STRB = 0x39000000, | ||
245 | A64I_STRH = 0x79000000, | ||
246 | A64I_STRw = 0xb9000000, | ||
247 | A64I_STRx = 0xf9000000, | ||
248 | A64I_STPw = 0x29000000, | ||
249 | A64I_STPx = 0xa9000000, | ||
250 | A64I_LDPw = 0x29400000, | ||
251 | A64I_LDPx = 0xa9400000, | ||
252 | |||
253 | A64I_B = 0x14000000, | ||
254 | A64I_BCC = 0x54000000, | ||
255 | A64I_BL = 0x94000000, | ||
256 | A64I_BR = 0xd61f0000, | ||
257 | A64I_BLR = 0xd63f0000, | ||
258 | A64I_TBZ = 0x36000000, | ||
259 | A64I_TBNZ = 0x37000000, | ||
260 | A64I_CBZ = 0x34000000, | ||
261 | A64I_CBNZ = 0x35000000, | ||
262 | |||
263 | A64I_NOP = 0xd503201f, | ||
264 | |||
265 | /* FP */ | ||
266 | A64I_FADDd = 0x1e602800, | ||
267 | A64I_FSUBd = 0x1e603800, | ||
268 | A64I_FMADDd = 0x1f400000, | ||
269 | A64I_FMSUBd = 0x1f408000, | ||
270 | A64I_FNMADDd = 0x1f600000, | ||
271 | A64I_FNMSUBd = 0x1f608000, | ||
272 | A64I_FMULd = 0x1e600800, | ||
273 | A64I_FDIVd = 0x1e601800, | ||
274 | A64I_FNEGd = 0x1e614000, | ||
275 | A64I_FABS = 0x1e60c000, | ||
276 | A64I_FSQRTd = 0x1e61c000, | ||
277 | A64I_LDRs = 0xbd400000, | ||
278 | A64I_LDRd = 0xfd400000, | ||
279 | A64I_STRs = 0xbd000000, | ||
280 | A64I_STRd = 0xfd000000, | ||
281 | A64I_LDPs = 0x2d400000, | ||
282 | A64I_LDPd = 0x6d400000, | ||
283 | A64I_STPs = 0x2d000000, | ||
284 | A64I_STPd = 0x6d000000, | ||
285 | A64I_FCMPd = 0x1e602000, | ||
286 | A64I_FCMPZd = 0x1e602008, | ||
287 | A64I_FCSELd = 0x1e600c00, | ||
288 | A64I_FRINTMd = 0x1e654000, | ||
289 | A64I_FRINTPd = 0x1e64c000, | ||
290 | A64I_FRINTZd = 0x1e65c000, | ||
291 | |||
292 | A64I_FCVT_F32_F64 = 0x1e624000, | ||
293 | A64I_FCVT_F64_F32 = 0x1e22c000, | ||
294 | A64I_FCVT_F32_S32 = 0x1e220000, | ||
295 | A64I_FCVT_F64_S32 = 0x1e620000, | ||
296 | A64I_FCVT_F32_U32 = 0x1e230000, | ||
297 | A64I_FCVT_F64_U32 = 0x1e630000, | ||
298 | A64I_FCVT_F32_S64 = 0x9e220000, | ||
299 | A64I_FCVT_F64_S64 = 0x9e620000, | ||
300 | A64I_FCVT_F32_U64 = 0x9e230000, | ||
301 | A64I_FCVT_F64_U64 = 0x9e630000, | ||
302 | A64I_FCVT_S32_F64 = 0x1e780000, | ||
303 | A64I_FCVT_S32_F32 = 0x1e380000, | ||
304 | A64I_FCVT_U32_F64 = 0x1e790000, | ||
305 | A64I_FCVT_U32_F32 = 0x1e390000, | ||
306 | A64I_FCVT_S64_F64 = 0x9e780000, | ||
307 | A64I_FCVT_S64_F32 = 0x9e380000, | ||
308 | A64I_FCVT_U64_F64 = 0x9e790000, | ||
309 | A64I_FCVT_U64_F32 = 0x9e390000, | ||
310 | |||
311 | A64I_FMOV_S = 0x1e204000, | ||
312 | A64I_FMOV_D = 0x1e604000, | ||
313 | A64I_FMOV_R_S = 0x1e260000, | ||
314 | A64I_FMOV_S_R = 0x1e270000, | ||
315 | A64I_FMOV_R_D = 0x9e660000, | ||
316 | A64I_FMOV_D_R = 0x9e670000, | ||
317 | A64I_FMOV_DI = 0x1e601000, | ||
318 | } A64Ins; | ||
319 | |||
320 | typedef enum A64Shift { | ||
321 | A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR | ||
322 | } A64Shift; | ||
323 | |||
324 | typedef enum A64Extend { | ||
325 | A64EX_UXTB, A64EX_UXTH, A64EX_UXTW, A64EX_UXTX, | ||
326 | A64EX_SXTB, A64EX_SXTH, A64EX_SXTW, A64EX_SXTX, | ||
327 | } A64Extend; | ||
328 | |||
329 | /* ARM condition codes. */ | ||
330 | typedef enum A64CC { | ||
331 | CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC, | ||
332 | CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL, | ||
333 | CC_HS = CC_CS, CC_LO = CC_CC | ||
334 | } A64CC; | ||
335 | |||
336 | #endif | ||
diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h index ec935494..da72d61a 100644 --- a/src/lj_target_mips.h +++ b/src/lj_target_mips.h | |||
@@ -13,11 +13,15 @@ | |||
13 | _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \ | 13 | _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \ |
14 | _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \ | 14 | _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \ |
15 | _(R24) _(R25) _(SYS1) _(SYS2) _(R28) _(SP) _(R30) _(RA) | 15 | _(R24) _(R25) _(SYS1) _(SYS2) _(R28) _(SP) _(R30) _(RA) |
16 | #if LJ_SOFTFP | ||
17 | #define FPRDEF(_) | ||
18 | #else | ||
16 | #define FPRDEF(_) \ | 19 | #define FPRDEF(_) \ |
17 | _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ | 20 | _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ |
18 | _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ | 21 | _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ |
19 | _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ | 22 | _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ |
20 | _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) | 23 | _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) |
24 | #endif | ||
21 | #define VRIDDEF(_) | 25 | #define VRIDDEF(_) |
22 | 26 | ||
23 | #define RIDENUM(name) RID_##name, | 27 | #define RIDENUM(name) RID_##name, |
@@ -39,7 +43,11 @@ enum { | |||
39 | RID_RETHI = RID_R2, | 43 | RID_RETHI = RID_R2, |
40 | RID_RETLO = RID_R3, | 44 | RID_RETLO = RID_R3, |
41 | #endif | 45 | #endif |
46 | #if LJ_SOFTFP | ||
47 | RID_FPRET = RID_R2, | ||
48 | #else | ||
42 | RID_FPRET = RID_F0, | 49 | RID_FPRET = RID_F0, |
50 | #endif | ||
43 | RID_CFUNCADDR = RID_R25, | 51 | RID_CFUNCADDR = RID_R25, |
44 | 52 | ||
45 | /* These definitions must match with the *.dasc file(s): */ | 53 | /* These definitions must match with the *.dasc file(s): */ |
@@ -52,8 +60,12 @@ enum { | |||
52 | /* Register ranges [min, max) and number of registers. */ | 60 | /* Register ranges [min, max) and number of registers. */ |
53 | RID_MIN_GPR = RID_R0, | 61 | RID_MIN_GPR = RID_R0, |
54 | RID_MAX_GPR = RID_RA+1, | 62 | RID_MAX_GPR = RID_RA+1, |
55 | RID_MIN_FPR = RID_F0, | 63 | RID_MIN_FPR = RID_MAX_GPR, |
64 | #if LJ_SOFTFP | ||
65 | RID_MAX_FPR = RID_MIN_FPR, | ||
66 | #else | ||
56 | RID_MAX_FPR = RID_F31+1, | 67 | RID_MAX_FPR = RID_F31+1, |
68 | #endif | ||
57 | RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, | 69 | RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, |
58 | RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ | 70 | RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ |
59 | }; | 71 | }; |
@@ -68,28 +80,60 @@ enum { | |||
68 | (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ | 80 | (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ |
69 | RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP)) | 81 | RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP)) |
70 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) | 82 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) |
83 | #if LJ_SOFTFP | ||
84 | #define RSET_FPR 0 | ||
85 | #else | ||
86 | #if LJ_32 | ||
71 | #define RSET_FPR \ | 87 | #define RSET_FPR \ |
72 | (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ | 88 | (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ |
73 | RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ | 89 | RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ |
74 | RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\ | 90 | RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\ |
75 | RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30)) | 91 | RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30)) |
76 | #define RSET_ALL (RSET_GPR|RSET_FPR) | 92 | #else |
77 | #define RSET_INIT RSET_ALL | 93 | #define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) |
94 | #endif | ||
95 | #endif | ||
96 | #define RSET_ALL (RSET_GPR|RSET_FPR) | ||
97 | #define RSET_INIT RSET_ALL | ||
78 | 98 | ||
79 | #define RSET_SCRATCH_GPR \ | 99 | #define RSET_SCRATCH_GPR \ |
80 | (RSET_RANGE(RID_R1, RID_R15+1)|\ | 100 | (RSET_RANGE(RID_R1, RID_R15+1)|\ |
81 | RID2RSET(RID_R24)|RID2RSET(RID_R25)) | 101 | RID2RSET(RID_R24)|RID2RSET(RID_R25)) |
102 | #if LJ_SOFTFP | ||
103 | #define RSET_SCRATCH_FPR 0 | ||
104 | #else | ||
105 | #if LJ_32 | ||
82 | #define RSET_SCRATCH_FPR \ | 106 | #define RSET_SCRATCH_FPR \ |
83 | (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ | 107 | (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ |
84 | RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ | 108 | RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ |
85 | RID2RSET(RID_F16)|RID2RSET(RID_F18)) | 109 | RID2RSET(RID_F16)|RID2RSET(RID_F18)) |
110 | #else | ||
111 | #define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F24) | ||
112 | #endif | ||
113 | #endif | ||
86 | #define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) | 114 | #define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) |
87 | #define REGARG_FIRSTGPR RID_R4 | 115 | #define REGARG_FIRSTGPR RID_R4 |
116 | #if LJ_32 | ||
88 | #define REGARG_LASTGPR RID_R7 | 117 | #define REGARG_LASTGPR RID_R7 |
89 | #define REGARG_NUMGPR 4 | 118 | #define REGARG_NUMGPR 4 |
119 | #else | ||
120 | #define REGARG_LASTGPR RID_R11 | ||
121 | #define REGARG_NUMGPR 8 | ||
122 | #endif | ||
123 | #if LJ_ABI_SOFTFP | ||
124 | #define REGARG_FIRSTFPR 0 | ||
125 | #define REGARG_LASTFPR 0 | ||
126 | #define REGARG_NUMFPR 0 | ||
127 | #else | ||
90 | #define REGARG_FIRSTFPR RID_F12 | 128 | #define REGARG_FIRSTFPR RID_F12 |
129 | #if LJ_32 | ||
91 | #define REGARG_LASTFPR RID_F14 | 130 | #define REGARG_LASTFPR RID_F14 |
92 | #define REGARG_NUMFPR 2 | 131 | #define REGARG_NUMFPR 2 |
132 | #else | ||
133 | #define REGARG_LASTFPR RID_F19 | ||
134 | #define REGARG_NUMFPR 8 | ||
135 | #endif | ||
136 | #endif | ||
93 | 137 | ||
94 | /* -- Spill slots --------------------------------------------------------- */ | 138 | /* -- Spill slots --------------------------------------------------------- */ |
95 | 139 | ||
@@ -100,7 +144,11 @@ enum { | |||
100 | ** | 144 | ** |
101 | ** SPS_FIRST: First spill slot for general use. | 145 | ** SPS_FIRST: First spill slot for general use. |
102 | */ | 146 | */ |
147 | #if LJ_32 | ||
103 | #define SPS_FIXED 5 | 148 | #define SPS_FIXED 5 |
149 | #else | ||
150 | #define SPS_FIXED 4 | ||
151 | #endif | ||
104 | #define SPS_FIRST 4 | 152 | #define SPS_FIRST 4 |
105 | 153 | ||
106 | #define SPOFS_TMP 0 | 154 | #define SPOFS_TMP 0 |
@@ -112,8 +160,10 @@ enum { | |||
112 | 160 | ||
113 | /* This definition must match with the *.dasc file(s). */ | 161 | /* This definition must match with the *.dasc file(s). */ |
114 | typedef struct { | 162 | typedef struct { |
163 | #if !LJ_SOFTFP | ||
115 | lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ | 164 | lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ |
116 | int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ | 165 | #endif |
166 | intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ | ||
117 | int32_t spill[256]; /* Spill slots. */ | 167 | int32_t spill[256]; /* Spill slots. */ |
118 | } ExitState; | 168 | } ExitState; |
119 | 169 | ||
@@ -142,52 +192,87 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p) | |||
142 | #define MIPSF_F(r) ((r) << 6) | 192 | #define MIPSF_F(r) ((r) << 6) |
143 | #define MIPSF_A(n) ((n) << 6) | 193 | #define MIPSF_A(n) ((n) << 6) |
144 | #define MIPSF_M(n) ((n) << 11) | 194 | #define MIPSF_M(n) ((n) << 11) |
195 | #define MIPSF_L(n) ((n) << 6) | ||
145 | 196 | ||
146 | typedef enum MIPSIns { | 197 | typedef enum MIPSIns { |
198 | MIPSI_D = 0x38, | ||
199 | MIPSI_DV = 0x10, | ||
200 | MIPSI_D32 = 0x3c, | ||
147 | /* Integer instructions. */ | 201 | /* Integer instructions. */ |
148 | MIPSI_MOVE = 0x00000021, | 202 | MIPSI_MOVE = 0x00000025, |
149 | MIPSI_NOP = 0x00000000, | 203 | MIPSI_NOP = 0x00000000, |
150 | 204 | ||
151 | MIPSI_LI = 0x24000000, | 205 | MIPSI_LI = 0x24000000, |
152 | MIPSI_LU = 0x34000000, | 206 | MIPSI_LU = 0x34000000, |
153 | MIPSI_LUI = 0x3c000000, | 207 | MIPSI_LUI = 0x3c000000, |
154 | 208 | ||
155 | MIPSI_ADDIU = 0x24000000, | 209 | MIPSI_AND = 0x00000024, |
156 | MIPSI_ANDI = 0x30000000, | 210 | MIPSI_ANDI = 0x30000000, |
211 | MIPSI_OR = 0x00000025, | ||
157 | MIPSI_ORI = 0x34000000, | 212 | MIPSI_ORI = 0x34000000, |
213 | MIPSI_XOR = 0x00000026, | ||
158 | MIPSI_XORI = 0x38000000, | 214 | MIPSI_XORI = 0x38000000, |
215 | MIPSI_NOR = 0x00000027, | ||
216 | |||
217 | MIPSI_SLT = 0x0000002a, | ||
218 | MIPSI_SLTU = 0x0000002b, | ||
159 | MIPSI_SLTI = 0x28000000, | 219 | MIPSI_SLTI = 0x28000000, |
160 | MIPSI_SLTIU = 0x2c000000, | 220 | MIPSI_SLTIU = 0x2c000000, |
161 | 221 | ||
162 | MIPSI_ADDU = 0x00000021, | 222 | MIPSI_ADDU = 0x00000021, |
223 | MIPSI_ADDIU = 0x24000000, | ||
224 | MIPSI_SUB = 0x00000022, | ||
163 | MIPSI_SUBU = 0x00000023, | 225 | MIPSI_SUBU = 0x00000023, |
226 | |||
227 | #if !LJ_TARGET_MIPSR6 | ||
164 | MIPSI_MUL = 0x70000002, | 228 | MIPSI_MUL = 0x70000002, |
165 | MIPSI_AND = 0x00000024, | 229 | MIPSI_DIV = 0x0000001a, |
166 | MIPSI_OR = 0x00000025, | 230 | MIPSI_DIVU = 0x0000001b, |
167 | MIPSI_XOR = 0x00000026, | 231 | |
168 | MIPSI_NOR = 0x00000027, | ||
169 | MIPSI_SLT = 0x0000002a, | ||
170 | MIPSI_SLTU = 0x0000002b, | ||
171 | MIPSI_MOVZ = 0x0000000a, | 232 | MIPSI_MOVZ = 0x0000000a, |
172 | MIPSI_MOVN = 0x0000000b, | 233 | MIPSI_MOVN = 0x0000000b, |
234 | MIPSI_MFHI = 0x00000010, | ||
235 | MIPSI_MFLO = 0x00000012, | ||
236 | MIPSI_MULT = 0x00000018, | ||
237 | #else | ||
238 | MIPSI_MUL = 0x00000098, | ||
239 | MIPSI_MUH = 0x000000d8, | ||
240 | MIPSI_DIV = 0x0000009a, | ||
241 | MIPSI_DIVU = 0x0000009b, | ||
242 | |||
243 | MIPSI_SELEQZ = 0x00000035, | ||
244 | MIPSI_SELNEZ = 0x00000037, | ||
245 | #endif | ||
173 | 246 | ||
174 | MIPSI_SLL = 0x00000000, | 247 | MIPSI_SLL = 0x00000000, |
175 | MIPSI_SRL = 0x00000002, | 248 | MIPSI_SRL = 0x00000002, |
176 | MIPSI_SRA = 0x00000003, | 249 | MIPSI_SRA = 0x00000003, |
177 | MIPSI_ROTR = 0x00200002, /* MIPS32R2 */ | 250 | MIPSI_ROTR = 0x00200002, /* MIPSXXR2 */ |
251 | MIPSI_DROTR = 0x0020003a, | ||
252 | MIPSI_DROTR32 = 0x0020003e, | ||
178 | MIPSI_SLLV = 0x00000004, | 253 | MIPSI_SLLV = 0x00000004, |
179 | MIPSI_SRLV = 0x00000006, | 254 | MIPSI_SRLV = 0x00000006, |
180 | MIPSI_SRAV = 0x00000007, | 255 | MIPSI_SRAV = 0x00000007, |
181 | MIPSI_ROTRV = 0x00000046, /* MIPS32R2 */ | 256 | MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */ |
257 | MIPSI_DROTRV = 0x00000056, | ||
258 | |||
259 | MIPSI_INS = 0x7c000004, /* MIPSXXR2 */ | ||
182 | 260 | ||
183 | MIPSI_SEB = 0x7c000420, /* MIPS32R2 */ | 261 | MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */ |
184 | MIPSI_SEH = 0x7c000620, /* MIPS32R2 */ | 262 | MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */ |
185 | MIPSI_WSBH = 0x7c0000a0, /* MIPS32R2 */ | 263 | MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */ |
264 | MIPSI_DSBH = 0x7c0000a4, | ||
186 | 265 | ||
187 | MIPSI_B = 0x10000000, | 266 | MIPSI_B = 0x10000000, |
188 | MIPSI_J = 0x08000000, | 267 | MIPSI_J = 0x08000000, |
189 | MIPSI_JAL = 0x0c000000, | 268 | MIPSI_JAL = 0x0c000000, |
269 | #if !LJ_TARGET_MIPSR6 | ||
270 | MIPSI_JALX = 0x74000000, | ||
190 | MIPSI_JR = 0x00000008, | 271 | MIPSI_JR = 0x00000008, |
272 | #else | ||
273 | MIPSI_JR = 0x00000009, | ||
274 | MIPSI_BALC = 0xe8000000, | ||
275 | #endif | ||
191 | MIPSI_JALR = 0x0000f809, | 276 | MIPSI_JALR = 0x0000f809, |
192 | 277 | ||
193 | MIPSI_BEQ = 0x10000000, | 278 | MIPSI_BEQ = 0x10000000, |
@@ -199,7 +284,9 @@ typedef enum MIPSIns { | |||
199 | 284 | ||
200 | /* Load/store instructions. */ | 285 | /* Load/store instructions. */ |
201 | MIPSI_LW = 0x8c000000, | 286 | MIPSI_LW = 0x8c000000, |
287 | MIPSI_LD = 0xdc000000, | ||
202 | MIPSI_SW = 0xac000000, | 288 | MIPSI_SW = 0xac000000, |
289 | MIPSI_SD = 0xfc000000, | ||
203 | MIPSI_LB = 0x80000000, | 290 | MIPSI_LB = 0x80000000, |
204 | MIPSI_SB = 0xa0000000, | 291 | MIPSI_SB = 0xa0000000, |
205 | MIPSI_LH = 0x84000000, | 292 | MIPSI_LH = 0x84000000, |
@@ -211,11 +298,69 @@ typedef enum MIPSIns { | |||
211 | MIPSI_LDC1 = 0xd4000000, | 298 | MIPSI_LDC1 = 0xd4000000, |
212 | MIPSI_SDC1 = 0xf4000000, | 299 | MIPSI_SDC1 = 0xf4000000, |
213 | 300 | ||
301 | /* MIPS64 instructions. */ | ||
302 | MIPSI_DADD = 0x0000002c, | ||
303 | MIPSI_DADDU = 0x0000002d, | ||
304 | MIPSI_DADDIU = 0x64000000, | ||
305 | MIPSI_DSUB = 0x0000002e, | ||
306 | MIPSI_DSUBU = 0x0000002f, | ||
307 | #if !LJ_TARGET_MIPSR6 | ||
308 | MIPSI_DDIV = 0x0000001e, | ||
309 | MIPSI_DDIVU = 0x0000001f, | ||
310 | MIPSI_DMULT = 0x0000001c, | ||
311 | MIPSI_DMULTU = 0x0000001d, | ||
312 | #else | ||
313 | MIPSI_DDIV = 0x0000009e, | ||
314 | MIPSI_DMOD = 0x000000de, | ||
315 | MIPSI_DDIVU = 0x0000009f, | ||
316 | MIPSI_DMODU = 0x000000df, | ||
317 | MIPSI_DMUL = 0x0000009c, | ||
318 | MIPSI_DMUH = 0x000000dc, | ||
319 | #endif | ||
320 | |||
321 | MIPSI_DSLL = 0x00000038, | ||
322 | MIPSI_DSRL = 0x0000003a, | ||
323 | MIPSI_DSLLV = 0x00000014, | ||
324 | MIPSI_DSRLV = 0x00000016, | ||
325 | MIPSI_DSRA = 0x0000003b, | ||
326 | MIPSI_DSRAV = 0x00000017, | ||
327 | MIPSI_DSRA32 = 0x0000003f, | ||
328 | MIPSI_DSLL32 = 0x0000003c, | ||
329 | MIPSI_DSRL32 = 0x0000003e, | ||
330 | MIPSI_DSHD = 0x7c000164, | ||
331 | |||
332 | MIPSI_AADDU = LJ_32 ? MIPSI_ADDU : MIPSI_DADDU, | ||
333 | MIPSI_AADDIU = LJ_32 ? MIPSI_ADDIU : MIPSI_DADDIU, | ||
334 | MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU, | ||
335 | MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD, | ||
336 | MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD, | ||
337 | #if LJ_TARGET_MIPSR6 | ||
338 | MIPSI_LSA = 0x00000005, | ||
339 | MIPSI_DLSA = 0x00000015, | ||
340 | MIPSI_ALSA = LJ_32 ? MIPSI_LSA : MIPSI_DLSA, | ||
341 | #endif | ||
342 | |||
343 | /* Extract/insert instructions. */ | ||
344 | MIPSI_DEXTM = 0x7c000001, | ||
345 | MIPSI_DEXTU = 0x7c000002, | ||
346 | MIPSI_DEXT = 0x7c000003, | ||
347 | MIPSI_DINSM = 0x7c000005, | ||
348 | MIPSI_DINSU = 0x7c000006, | ||
349 | MIPSI_DINS = 0x7c000007, | ||
350 | |||
351 | MIPSI_FLOOR_D = 0x4620000b, | ||
352 | |||
214 | /* FP instructions. */ | 353 | /* FP instructions. */ |
215 | MIPSI_MOV_S = 0x46000006, | 354 | MIPSI_MOV_S = 0x46000006, |
216 | MIPSI_MOV_D = 0x46200006, | 355 | MIPSI_MOV_D = 0x46200006, |
356 | #if !LJ_TARGET_MIPSR6 | ||
217 | MIPSI_MOVT_D = 0x46210011, | 357 | MIPSI_MOVT_D = 0x46210011, |
218 | MIPSI_MOVF_D = 0x46200011, | 358 | MIPSI_MOVF_D = 0x46200011, |
359 | #else | ||
360 | MIPSI_MIN_D = 0x4620001C, | ||
361 | MIPSI_MAX_D = 0x4620001E, | ||
362 | MIPSI_SEL_D = 0x46200010, | ||
363 | #endif | ||
219 | 364 | ||
220 | MIPSI_ABS_D = 0x46200005, | 365 | MIPSI_ABS_D = 0x46200005, |
221 | MIPSI_NEG_D = 0x46200007, | 366 | MIPSI_NEG_D = 0x46200007, |
@@ -235,23 +380,37 @@ typedef enum MIPSIns { | |||
235 | MIPSI_CVT_W_D = 0x46200024, | 380 | MIPSI_CVT_W_D = 0x46200024, |
236 | MIPSI_CVT_S_W = 0x46800020, | 381 | MIPSI_CVT_S_W = 0x46800020, |
237 | MIPSI_CVT_D_W = 0x46800021, | 382 | MIPSI_CVT_D_W = 0x46800021, |
383 | MIPSI_CVT_S_L = 0x46a00020, | ||
384 | MIPSI_CVT_D_L = 0x46a00021, | ||
238 | 385 | ||
239 | MIPSI_TRUNC_W_S = 0x4600000d, | 386 | MIPSI_TRUNC_W_S = 0x4600000d, |
240 | MIPSI_TRUNC_W_D = 0x4620000d, | 387 | MIPSI_TRUNC_W_D = 0x4620000d, |
388 | MIPSI_TRUNC_L_S = 0x46000009, | ||
389 | MIPSI_TRUNC_L_D = 0x46200009, | ||
241 | MIPSI_FLOOR_W_S = 0x4600000f, | 390 | MIPSI_FLOOR_W_S = 0x4600000f, |
242 | MIPSI_FLOOR_W_D = 0x4620000f, | 391 | MIPSI_FLOOR_W_D = 0x4620000f, |
243 | 392 | ||
244 | MIPSI_MFC1 = 0x44000000, | 393 | MIPSI_MFC1 = 0x44000000, |
245 | MIPSI_MTC1 = 0x44800000, | 394 | MIPSI_MTC1 = 0x44800000, |
395 | MIPSI_DMTC1 = 0x44a00000, | ||
396 | MIPSI_DMFC1 = 0x44200000, | ||
246 | 397 | ||
398 | #if !LJ_TARGET_MIPSR6 | ||
247 | MIPSI_BC1F = 0x45000000, | 399 | MIPSI_BC1F = 0x45000000, |
248 | MIPSI_BC1T = 0x45010000, | 400 | MIPSI_BC1T = 0x45010000, |
249 | |||
250 | MIPSI_C_EQ_D = 0x46200032, | 401 | MIPSI_C_EQ_D = 0x46200032, |
402 | MIPSI_C_OLT_S = 0x46000034, | ||
251 | MIPSI_C_OLT_D = 0x46200034, | 403 | MIPSI_C_OLT_D = 0x46200034, |
252 | MIPSI_C_ULT_D = 0x46200035, | 404 | MIPSI_C_ULT_D = 0x46200035, |
253 | MIPSI_C_OLE_D = 0x46200036, | 405 | MIPSI_C_OLE_D = 0x46200036, |
254 | MIPSI_C_ULE_D = 0x46200037, | 406 | MIPSI_C_ULE_D = 0x46200037, |
407 | #else | ||
408 | MIPSI_BC1EQZ = 0x45200000, | ||
409 | MIPSI_BC1NEZ = 0x45a00000, | ||
410 | MIPSI_CMP_EQ_D = 0x46a00002, | ||
411 | MIPSI_CMP_LT_S = 0x46800004, | ||
412 | MIPSI_CMP_LT_D = 0x46a00004, | ||
413 | #endif | ||
255 | 414 | ||
256 | } MIPSIns; | 415 | } MIPSIns; |
257 | 416 | ||
diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h index b4f600eb..bc9802a4 100644 --- a/src/lj_target_ppc.h +++ b/src/lj_target_ppc.h | |||
@@ -104,7 +104,7 @@ enum { | |||
104 | /* This definition must match with the *.dasc file(s). */ | 104 | /* This definition must match with the *.dasc file(s). */ |
105 | typedef struct { | 105 | typedef struct { |
106 | lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ | 106 | lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ |
107 | int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ | 107 | intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ |
108 | int32_t spill[256]; /* Spill slots. */ | 108 | int32_t spill[256]; /* Spill slots. */ |
109 | } ExitState; | 109 | } ExitState; |
110 | 110 | ||
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 69aec37c..69cb8ca5 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
@@ -22,7 +22,7 @@ | |||
22 | _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) | 22 | _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) |
23 | #endif | 23 | #endif |
24 | #define VRIDDEF(_) \ | 24 | #define VRIDDEF(_) \ |
25 | _(MRM) | 25 | _(MRM) _(RIP) |
26 | 26 | ||
27 | #define RIDENUM(name) RID_##name, | 27 | #define RIDENUM(name) RID_##name, |
28 | 28 | ||
@@ -31,15 +31,16 @@ enum { | |||
31 | FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ | 31 | FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ |
32 | RID_MAX, | 32 | RID_MAX, |
33 | RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ | 33 | RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ |
34 | RID_RIP = RID_MAX+5, /* Pseudo-id for RIP (x64 only), rm bits = 5. */ | ||
34 | 35 | ||
35 | /* Calling conventions. */ | 36 | /* Calling conventions. */ |
37 | RID_SP = RID_ESP, | ||
36 | RID_RET = RID_EAX, | 38 | RID_RET = RID_EAX, |
37 | #if LJ_64 | 39 | #if LJ_64 |
38 | RID_FPRET = RID_XMM0, | 40 | RID_FPRET = RID_XMM0, |
39 | #else | 41 | #endif |
40 | RID_RETLO = RID_EAX, | 42 | RID_RETLO = RID_EAX, |
41 | RID_RETHI = RID_EDX, | 43 | RID_RETHI = RID_EDX, |
42 | #endif | ||
43 | 44 | ||
44 | /* These definitions must match with the *.dasc file(s): */ | 45 | /* These definitions must match with the *.dasc file(s): */ |
45 | RID_BASE = RID_EDX, /* Interpreter BASE. */ | 46 | RID_BASE = RID_EDX, /* Interpreter BASE. */ |
@@ -62,8 +63,10 @@ enum { | |||
62 | 63 | ||
63 | /* -- Register sets ------------------------------------------------------- */ | 64 | /* -- Register sets ------------------------------------------------------- */ |
64 | 65 | ||
65 | /* Make use of all registers, except the stack pointer. */ | 66 | /* Make use of all registers, except the stack pointer (and maybe DISPATCH). */ |
66 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) | 67 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \ |
68 | - RID2RSET(RID_ESP) \ | ||
69 | - LJ_GC64*RID2RSET(RID_DISPATCH)) | ||
67 | #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) | 70 | #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) |
68 | #define RSET_ALL (RSET_GPR|RSET_FPR) | 71 | #define RSET_ALL (RSET_GPR|RSET_FPR) |
69 | #define RSET_INIT RSET_ALL | 72 | #define RSET_INIT RSET_ALL |
@@ -131,7 +134,11 @@ enum { | |||
131 | #define SPS_FIXED (4*2) | 134 | #define SPS_FIXED (4*2) |
132 | #define SPS_FIRST (4*2) /* Don't use callee register save area. */ | 135 | #define SPS_FIRST (4*2) /* Don't use callee register save area. */ |
133 | #else | 136 | #else |
137 | #if LJ_GC64 | ||
138 | #define SPS_FIXED 2 | ||
139 | #else | ||
134 | #define SPS_FIXED 4 | 140 | #define SPS_FIXED 4 |
141 | #endif | ||
135 | #define SPS_FIRST 2 | 142 | #define SPS_FIRST 2 |
136 | #endif | 143 | #endif |
137 | #else | 144 | #else |
@@ -157,6 +164,8 @@ typedef struct { | |||
157 | #define EXITSTUB_SPACING (2+2) | 164 | #define EXITSTUB_SPACING (2+2) |
158 | #define EXITSTUBS_PER_GROUP 32 | 165 | #define EXITSTUBS_PER_GROUP 32 |
159 | 166 | ||
167 | #define EXITTRACE_VMSTATE 1 /* g->vmstate has traceno on exit. */ | ||
168 | |||
160 | /* -- x86 ModRM operand encoding ------------------------------------------ */ | 169 | /* -- x86 ModRM operand encoding ------------------------------------------ */ |
161 | 170 | ||
162 | typedef enum { | 171 | typedef enum { |
@@ -184,12 +193,18 @@ typedef struct { | |||
184 | #define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24))) | 193 | #define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24))) |
185 | #define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24))) | 194 | #define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24))) |
186 | 195 | ||
196 | #define XV_660f38(o) ((uint32_t)(0x79e2c4 + (0x##o<<24))) | ||
197 | #define XV_f20f38(o) ((uint32_t)(0x7be2c4 + (0x##o<<24))) | ||
198 | #define XV_f20f3a(o) ((uint32_t)(0x7be3c4 + (0x##o<<24))) | ||
199 | #define XV_f30f38(o) ((uint32_t)(0x7ae2c4 + (0x##o<<24))) | ||
200 | |||
187 | /* This list of x86 opcodes is not intended to be complete. Opcodes are only | 201 | /* This list of x86 opcodes is not intended to be complete. Opcodes are only |
188 | ** included when needed. Take a look at DynASM or jit.dis_x86 to see the | 202 | ** included when needed. Take a look at DynASM or jit.dis_x86 to see the |
189 | ** whole mess. | 203 | ** whole mess. |
190 | */ | 204 | */ |
191 | typedef enum { | 205 | typedef enum { |
192 | /* Fixed length opcodes. XI_* prefix. */ | 206 | /* Fixed length opcodes. XI_* prefix. */ |
207 | XI_O16 = 0x66, | ||
193 | XI_NOP = 0x90, | 208 | XI_NOP = 0x90, |
194 | XI_XCHGa = 0x90, | 209 | XI_XCHGa = 0x90, |
195 | XI_CALL = 0xe8, | 210 | XI_CALL = 0xe8, |
@@ -207,26 +222,28 @@ typedef enum { | |||
207 | XI_PUSHi8 = 0x6a, | 222 | XI_PUSHi8 = 0x6a, |
208 | XI_TESTb = 0x84, | 223 | XI_TESTb = 0x84, |
209 | XI_TEST = 0x85, | 224 | XI_TEST = 0x85, |
225 | XI_INT3 = 0xcc, | ||
210 | XI_MOVmi = 0xc7, | 226 | XI_MOVmi = 0xc7, |
211 | XI_GROUP5 = 0xff, | 227 | XI_GROUP5 = 0xff, |
212 | 228 | ||
213 | /* Note: little-endian byte-order! */ | 229 | /* Note: little-endian byte-order! */ |
214 | XI_FLDZ = 0xeed9, | 230 | XI_FLDZ = 0xeed9, |
215 | XI_FLD1 = 0xe8d9, | 231 | XI_FLD1 = 0xe8d9, |
216 | XI_FLDLG2 = 0xecd9, | ||
217 | XI_FLDLN2 = 0xedd9, | ||
218 | XI_FDUP = 0xc0d9, /* Really fld st0. */ | 232 | XI_FDUP = 0xc0d9, /* Really fld st0. */ |
219 | XI_FPOP = 0xd8dd, /* Really fstp st0. */ | 233 | XI_FPOP = 0xd8dd, /* Really fstp st0. */ |
220 | XI_FPOP1 = 0xd9dd, /* Really fstp st1. */ | 234 | XI_FPOP1 = 0xd9dd, /* Really fstp st1. */ |
221 | XI_FRNDINT = 0xfcd9, | 235 | XI_FRNDINT = 0xfcd9, |
222 | XI_FSIN = 0xfed9, | ||
223 | XI_FCOS = 0xffd9, | ||
224 | XI_FPTAN = 0xf2d9, | ||
225 | XI_FPATAN = 0xf3d9, | ||
226 | XI_FSCALE = 0xfdd9, | 236 | XI_FSCALE = 0xfdd9, |
227 | XI_FYL2X = 0xf1d9, | 237 | XI_FYL2X = 0xf1d9, |
228 | 238 | ||
239 | /* VEX-encoded instructions. XV_* prefix. */ | ||
240 | XV_RORX = XV_f20f3a(f0), | ||
241 | XV_SARX = XV_f30f38(f7), | ||
242 | XV_SHLX = XV_660f38(f7), | ||
243 | XV_SHRX = XV_f20f38(f7), | ||
244 | |||
229 | /* Variable-length opcodes. XO_* prefix. */ | 245 | /* Variable-length opcodes. XO_* prefix. */ |
246 | XO_OR = XO_(0b), | ||
230 | XO_MOV = XO_(8b), | 247 | XO_MOV = XO_(8b), |
231 | XO_MOVto = XO_(89), | 248 | XO_MOVto = XO_(89), |
232 | XO_MOVtow = XO_66(89), | 249 | XO_MOVtow = XO_66(89), |
@@ -277,10 +294,8 @@ typedef enum { | |||
277 | XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */ | 294 | XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */ |
278 | XO_UCOMISD = XO_660f(2e), | 295 | XO_UCOMISD = XO_660f(2e), |
279 | XO_CVTSI2SD = XO_f20f(2a), | 296 | XO_CVTSI2SD = XO_f20f(2a), |
280 | XO_CVTSD2SI = XO_f20f(2d), | ||
281 | XO_CVTTSD2SI= XO_f20f(2c), | 297 | XO_CVTTSD2SI= XO_f20f(2c), |
282 | XO_CVTSI2SS = XO_f30f(2a), | 298 | XO_CVTSI2SS = XO_f30f(2a), |
283 | XO_CVTSS2SI = XO_f30f(2d), | ||
284 | XO_CVTTSS2SI= XO_f30f(2c), | 299 | XO_CVTTSS2SI= XO_f30f(2c), |
285 | XO_CVTSS2SD = XO_f30f(5a), | 300 | XO_CVTSS2SD = XO_f30f(5a), |
286 | XO_CVTSD2SS = XO_f20f(5a), | 301 | XO_CVTSD2SS = XO_f20f(5a), |
diff --git a/src/lj_trace.c b/src/lj_trace.c index 89c3c5ed..c2329394 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include "lj_vm.h" | 30 | #include "lj_vm.h" |
31 | #include "lj_vmevent.h" | 31 | #include "lj_vmevent.h" |
32 | #include "lj_target.h" | 32 | #include "lj_target.h" |
33 | #include "lj_prng.h" | ||
33 | 34 | ||
34 | /* -- Error handling ------------------------------------------------------ */ | 35 | /* -- Error handling ------------------------------------------------------ */ |
35 | 36 | ||
@@ -104,7 +105,8 @@ static void perftools_addtrace(GCtrace *T) | |||
104 | name++; | 105 | name++; |
105 | else | 106 | else |
106 | name = "(string)"; | 107 | name = "(string)"; |
107 | lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc); | 108 | lj_assertX(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc, |
109 | "trace PC out of range"); | ||
108 | lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); | 110 | lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); |
109 | if (!fp) { | 111 | if (!fp) { |
110 | char fname[40]; | 112 | char fname[40]; |
@@ -117,15 +119,26 @@ static void perftools_addtrace(GCtrace *T) | |||
117 | } | 119 | } |
118 | #endif | 120 | #endif |
119 | 121 | ||
120 | /* Allocate space for copy of trace. */ | 122 | /* Allocate space for copy of T. */ |
121 | static GCtrace *trace_save_alloc(jit_State *J) | 123 | GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T) |
122 | { | 124 | { |
123 | size_t sztr = ((sizeof(GCtrace)+7)&~7); | 125 | size_t sztr = ((sizeof(GCtrace)+7)&~7); |
124 | size_t szins = (J->cur.nins-J->cur.nk)*sizeof(IRIns); | 126 | size_t szins = (T->nins-T->nk)*sizeof(IRIns); |
125 | size_t sz = sztr + szins + | 127 | size_t sz = sztr + szins + |
126 | J->cur.nsnap*sizeof(SnapShot) + | 128 | T->nsnap*sizeof(SnapShot) + |
127 | J->cur.nsnapmap*sizeof(SnapEntry); | 129 | T->nsnapmap*sizeof(SnapEntry); |
128 | return lj_mem_newt(J->L, (MSize)sz, GCtrace); | 130 | GCtrace *T2 = lj_mem_newt(L, (MSize)sz, GCtrace); |
131 | char *p = (char *)T2 + sztr; | ||
132 | T2->gct = ~LJ_TTRACE; | ||
133 | T2->marked = 0; | ||
134 | T2->traceno = 0; | ||
135 | T2->ir = (IRIns *)p - T->nk; | ||
136 | T2->nins = T->nins; | ||
137 | T2->nk = T->nk; | ||
138 | T2->nsnap = T->nsnap; | ||
139 | T2->nsnapmap = T->nsnapmap; | ||
140 | memcpy(p, T->ir + T->nk, szins); | ||
141 | return T2; | ||
129 | } | 142 | } |
130 | 143 | ||
131 | /* Save current trace by copying and compacting it. */ | 144 | /* Save current trace by copying and compacting it. */ |
@@ -139,12 +152,12 @@ static void trace_save(jit_State *J, GCtrace *T) | |||
139 | setgcrefp(J2G(J)->gc.root, T); | 152 | setgcrefp(J2G(J)->gc.root, T); |
140 | newwhite(J2G(J), T); | 153 | newwhite(J2G(J), T); |
141 | T->gct = ~LJ_TTRACE; | 154 | T->gct = ~LJ_TTRACE; |
142 | T->ir = (IRIns *)p - J->cur.nk; | 155 | T->ir = (IRIns *)p - J->cur.nk; /* The IR has already been copied above. */ |
143 | memcpy(p, J->cur.ir+J->cur.nk, szins); | ||
144 | p += szins; | 156 | p += szins; |
145 | TRACE_APPENDVEC(snap, nsnap, SnapShot) | 157 | TRACE_APPENDVEC(snap, nsnap, SnapShot) |
146 | TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry) | 158 | TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry) |
147 | J->cur.traceno = 0; | 159 | J->cur.traceno = 0; |
160 | J->curfinal = NULL; | ||
148 | setgcrefp(J->trace[T->traceno], T); | 161 | setgcrefp(J->trace[T->traceno], T); |
149 | lj_gc_barriertrace(J2G(J), T->traceno); | 162 | lj_gc_barriertrace(J2G(J), T->traceno); |
150 | lj_gdbjit_addtrace(J, T); | 163 | lj_gdbjit_addtrace(J, T); |
@@ -172,7 +185,7 @@ void lj_trace_reenableproto(GCproto *pt) | |||
172 | { | 185 | { |
173 | if ((pt->flags & PROTO_ILOOP)) { | 186 | if ((pt->flags & PROTO_ILOOP)) { |
174 | BCIns *bc = proto_bc(pt); | 187 | BCIns *bc = proto_bc(pt); |
175 | BCPos i, sizebc = pt->sizebc;; | 188 | BCPos i, sizebc = pt->sizebc; |
176 | pt->flags &= ~PROTO_ILOOP; | 189 | pt->flags &= ~PROTO_ILOOP; |
177 | if (bc_op(bc[0]) == BC_IFUNCF) | 190 | if (bc_op(bc[0]) == BC_IFUNCF) |
178 | setbc_op(&bc[0], BC_FUNCF); | 191 | setbc_op(&bc[0], BC_FUNCF); |
@@ -194,27 +207,28 @@ static void trace_unpatch(jit_State *J, GCtrace *T) | |||
194 | return; /* No need to unpatch branches in parent traces (yet). */ | 207 | return; /* No need to unpatch branches in parent traces (yet). */ |
195 | switch (bc_op(*pc)) { | 208 | switch (bc_op(*pc)) { |
196 | case BC_JFORL: | 209 | case BC_JFORL: |
197 | lua_assert(traceref(J, bc_d(*pc)) == T); | 210 | lj_assertJ(traceref(J, bc_d(*pc)) == T, "JFORL references other trace"); |
198 | *pc = T->startins; | 211 | *pc = T->startins; |
199 | pc += bc_j(T->startins); | 212 | pc += bc_j(T->startins); |
200 | lua_assert(bc_op(*pc) == BC_JFORI); | 213 | lj_assertJ(bc_op(*pc) == BC_JFORI, "FORL does not point to JFORI"); |
201 | setbc_op(pc, BC_FORI); | 214 | setbc_op(pc, BC_FORI); |
202 | break; | 215 | break; |
203 | case BC_JITERL: | 216 | case BC_JITERL: |
204 | case BC_JLOOP: | 217 | case BC_JLOOP: |
205 | lua_assert(op == BC_ITERL || op == BC_LOOP || bc_isret(op)); | 218 | lj_assertJ(op == BC_ITERL || op == BC_ITERN || op == BC_LOOP || |
219 | bc_isret(op), "bad original bytecode %d", op); | ||
206 | *pc = T->startins; | 220 | *pc = T->startins; |
207 | break; | 221 | break; |
208 | case BC_JMP: | 222 | case BC_JMP: |
209 | lua_assert(op == BC_ITERL); | 223 | lj_assertJ(op == BC_ITERL, "bad original bytecode %d", op); |
210 | pc += bc_j(*pc)+2; | 224 | pc += bc_j(*pc)+2; |
211 | if (bc_op(*pc) == BC_JITERL) { | 225 | if (bc_op(*pc) == BC_JITERL) { |
212 | lua_assert(traceref(J, bc_d(*pc)) == T); | 226 | lj_assertJ(traceref(J, bc_d(*pc)) == T, "JITERL references other trace"); |
213 | *pc = T->startins; | 227 | *pc = T->startins; |
214 | } | 228 | } |
215 | break; | 229 | break; |
216 | case BC_JFUNCF: | 230 | case BC_JFUNCF: |
217 | lua_assert(op == BC_FUNCF); | 231 | lj_assertJ(op == BC_FUNCF, "bad original bytecode %d", op); |
218 | *pc = T->startins; | 232 | *pc = T->startins; |
219 | break; | 233 | break; |
220 | default: /* Already unpatched. */ | 234 | default: /* Already unpatched. */ |
@@ -226,7 +240,8 @@ static void trace_unpatch(jit_State *J, GCtrace *T) | |||
226 | static void trace_flushroot(jit_State *J, GCtrace *T) | 240 | static void trace_flushroot(jit_State *J, GCtrace *T) |
227 | { | 241 | { |
228 | GCproto *pt = &gcref(T->startpt)->pt; | 242 | GCproto *pt = &gcref(T->startpt)->pt; |
229 | lua_assert(T->root == 0 && pt != NULL); | 243 | lj_assertJ(T->root == 0, "not a root trace"); |
244 | lj_assertJ(pt != NULL, "trace has no prototype"); | ||
230 | /* First unpatch any modified bytecode. */ | 245 | /* First unpatch any modified bytecode. */ |
231 | trace_unpatch(J, T); | 246 | trace_unpatch(J, T); |
232 | /* Unlink root trace from chain anchored in prototype. */ | 247 | /* Unlink root trace from chain anchored in prototype. */ |
@@ -274,7 +289,7 @@ int lj_trace_flushall(lua_State *L) | |||
274 | if (T->root == 0) | 289 | if (T->root == 0) |
275 | trace_flushroot(J, T); | 290 | trace_flushroot(J, T); |
276 | lj_gdbjit_deltrace(J, T); | 291 | lj_gdbjit_deltrace(J, T); |
277 | T->traceno = 0; | 292 | T->traceno = T->link = 0; /* Blacklist the link for cont_stitch. */ |
278 | setgcrefnull(J->trace[i]); | 293 | setgcrefnull(J->trace[i]); |
279 | } | 294 | } |
280 | } | 295 | } |
@@ -296,13 +311,42 @@ void lj_trace_initstate(global_State *g) | |||
296 | { | 311 | { |
297 | jit_State *J = G2J(g); | 312 | jit_State *J = G2J(g); |
298 | TValue *tv; | 313 | TValue *tv; |
299 | /* Initialize SIMD constants. */ | 314 | |
315 | /* Initialize aligned SIMD constants. */ | ||
300 | tv = LJ_KSIMD(J, LJ_KSIMD_ABS); | 316 | tv = LJ_KSIMD(J, LJ_KSIMD_ABS); |
301 | tv[0].u64 = U64x(7fffffff,ffffffff); | 317 | tv[0].u64 = U64x(7fffffff,ffffffff); |
302 | tv[1].u64 = U64x(7fffffff,ffffffff); | 318 | tv[1].u64 = U64x(7fffffff,ffffffff); |
303 | tv = LJ_KSIMD(J, LJ_KSIMD_NEG); | 319 | tv = LJ_KSIMD(J, LJ_KSIMD_NEG); |
304 | tv[0].u64 = U64x(80000000,00000000); | 320 | tv[0].u64 = U64x(80000000,00000000); |
305 | tv[1].u64 = U64x(80000000,00000000); | 321 | tv[1].u64 = U64x(80000000,00000000); |
322 | |||
323 | /* Initialize 32/64 bit constants. */ | ||
324 | #if LJ_TARGET_X86ORX64 | ||
325 | J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000); | ||
326 | #if LJ_32 | ||
327 | J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000); | ||
328 | #endif | ||
329 | J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); | ||
330 | J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000; | ||
331 | #endif | ||
332 | #if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 | ||
333 | J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); | ||
334 | #endif | ||
335 | #if LJ_TARGET_PPC | ||
336 | J->k32[LJ_K32_2P52_2P31] = 0x59800004; | ||
337 | J->k32[LJ_K32_2P52] = 0x59800000; | ||
338 | #endif | ||
339 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | ||
340 | J->k32[LJ_K32_2P31] = 0x4f000000; | ||
341 | #endif | ||
342 | #if LJ_TARGET_MIPS | ||
343 | J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000); | ||
344 | #if LJ_64 | ||
345 | J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000); | ||
346 | J->k32[LJ_K32_2P63] = 0x5f000000; | ||
347 | J->k32[LJ_K32_M2P64] = 0xdf800000; | ||
348 | #endif | ||
349 | #endif | ||
306 | } | 350 | } |
307 | 351 | ||
308 | /* Free everything associated with the JIT compiler state. */ | 352 | /* Free everything associated with the JIT compiler state. */ |
@@ -313,11 +357,11 @@ void lj_trace_freestate(global_State *g) | |||
313 | { /* This assumes all traces have already been freed. */ | 357 | { /* This assumes all traces have already been freed. */ |
314 | ptrdiff_t i; | 358 | ptrdiff_t i; |
315 | for (i = 1; i < (ptrdiff_t)J->sizetrace; i++) | 359 | for (i = 1; i < (ptrdiff_t)J->sizetrace; i++) |
316 | lua_assert(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL); | 360 | lj_assertG(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL, |
361 | "trace still allocated"); | ||
317 | } | 362 | } |
318 | #endif | 363 | #endif |
319 | lj_mcode_free(J); | 364 | lj_mcode_free(J); |
320 | lj_ir_k64_freeall(J); | ||
321 | lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry); | 365 | lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry); |
322 | lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot); | 366 | lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot); |
323 | lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns); | 367 | lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns); |
@@ -329,8 +373,13 @@ void lj_trace_freestate(global_State *g) | |||
329 | /* Blacklist a bytecode instruction. */ | 373 | /* Blacklist a bytecode instruction. */ |
330 | static void blacklist_pc(GCproto *pt, BCIns *pc) | 374 | static void blacklist_pc(GCproto *pt, BCIns *pc) |
331 | { | 375 | { |
332 | setbc_op(pc, (int)bc_op(*pc)+(int)BC_ILOOP-(int)BC_LOOP); | 376 | if (bc_op(*pc) == BC_ITERN) { |
333 | pt->flags |= PROTO_ILOOP; | 377 | setbc_op(pc, BC_ITERC); |
378 | setbc_op(pc+1+bc_j(pc[1]), BC_JMP); | ||
379 | } else { | ||
380 | setbc_op(pc, (int)bc_op(*pc)+(int)BC_ILOOP-(int)BC_LOOP); | ||
381 | pt->flags |= PROTO_ILOOP; | ||
382 | } | ||
334 | } | 383 | } |
335 | 384 | ||
336 | /* Penalize a bytecode instruction. */ | 385 | /* Penalize a bytecode instruction. */ |
@@ -341,7 +390,7 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) | |||
341 | if (mref(J->penalty[i].pc, const BCIns) == pc) { /* Cache slot found? */ | 390 | if (mref(J->penalty[i].pc, const BCIns) == pc) { /* Cache slot found? */ |
342 | /* First try to bump its hotcount several times. */ | 391 | /* First try to bump its hotcount several times. */ |
343 | val = ((uint32_t)J->penalty[i].val << 1) + | 392 | val = ((uint32_t)J->penalty[i].val << 1) + |
344 | LJ_PRNG_BITS(J, PENALTY_RNDBITS); | 393 | (lj_prng_u64(&J2G(J)->prng) & ((1u<<PENALTY_RNDBITS)-1)); |
345 | if (val > PENALTY_MAX) { | 394 | if (val > PENALTY_MAX) { |
346 | blacklist_pc(pt, pc); /* Blacklist it, if that didn't help. */ | 395 | blacklist_pc(pt, pc); /* Blacklist it, if that didn't help. */ |
347 | return; | 396 | return; |
@@ -367,10 +416,11 @@ static void trace_start(jit_State *J) | |||
367 | TraceNo traceno; | 416 | TraceNo traceno; |
368 | 417 | ||
369 | if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ | 418 | if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ |
370 | if (J->parent == 0) { | 419 | if (J->parent == 0 && J->exitno == 0 && bc_op(*J->pc) != BC_ITERN) { |
371 | /* Lazy bytecode patching to disable hotcount events. */ | 420 | /* Lazy bytecode patching to disable hotcount events. */ |
372 | lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || | 421 | lj_assertJ(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || |
373 | bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF); | 422 | bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF, |
423 | "bad hot bytecode %d", bc_op(*J->pc)); | ||
374 | setbc_op(J->pc, (int)bc_op(*J->pc)+(int)BC_ILOOP-(int)BC_LOOP); | 424 | setbc_op(J->pc, (int)bc_op(*J->pc)+(int)BC_ILOOP-(int)BC_LOOP); |
375 | J->pt->flags |= PROTO_ILOOP; | 425 | J->pt->flags |= PROTO_ILOOP; |
376 | } | 426 | } |
@@ -381,7 +431,8 @@ static void trace_start(jit_State *J) | |||
381 | /* Get a new trace number. */ | 431 | /* Get a new trace number. */ |
382 | traceno = trace_findfree(J); | 432 | traceno = trace_findfree(J); |
383 | if (LJ_UNLIKELY(traceno == 0)) { /* No free trace? */ | 433 | if (LJ_UNLIKELY(traceno == 0)) { /* No free trace? */ |
384 | lua_assert((J2G(J)->hookmask & HOOK_GC) == 0); | 434 | lj_assertJ((J2G(J)->hookmask & HOOK_GC) == 0, |
435 | "recorder called from GC hook"); | ||
385 | lj_trace_flushall(J->L); | 436 | lj_trace_flushall(J->L); |
386 | J->state = LJ_TRACE_IDLE; /* Silently ignored. */ | 437 | J->state = LJ_TRACE_IDLE; /* Silently ignored. */ |
387 | return; | 438 | return; |
@@ -401,6 +452,8 @@ static void trace_start(jit_State *J) | |||
401 | J->guardemit.irt = 0; | 452 | J->guardemit.irt = 0; |
402 | J->postproc = LJ_POST_NONE; | 453 | J->postproc = LJ_POST_NONE; |
403 | lj_resetsplit(J); | 454 | lj_resetsplit(J); |
455 | J->retryrec = 0; | ||
456 | J->ktrace = 0; | ||
404 | setgcref(J->cur.startpt, obj2gco(J->pt)); | 457 | setgcref(J->cur.startpt, obj2gco(J->pt)); |
405 | 458 | ||
406 | L = J->L; | 459 | L = J->L; |
@@ -412,6 +465,12 @@ static void trace_start(jit_State *J) | |||
412 | if (J->parent) { | 465 | if (J->parent) { |
413 | setintV(L->top++, J->parent); | 466 | setintV(L->top++, J->parent); |
414 | setintV(L->top++, J->exitno); | 467 | setintV(L->top++, J->exitno); |
468 | } else { | ||
469 | BCOp op = bc_op(*J->pc); | ||
470 | if (op == BC_CALLM || op == BC_CALL || op == BC_ITERC) { | ||
471 | setintV(L->top++, J->exitno); /* Parent of stitched trace. */ | ||
472 | setintV(L->top++, -1); | ||
473 | } | ||
415 | } | 474 | } |
416 | ); | 475 | ); |
417 | lj_record_setup(J); | 476 | lj_record_setup(J); |
@@ -424,7 +483,7 @@ static void trace_stop(jit_State *J) | |||
424 | BCOp op = bc_op(J->cur.startins); | 483 | BCOp op = bc_op(J->cur.startins); |
425 | GCproto *pt = &gcref(J->cur.startpt)->pt; | 484 | GCproto *pt = &gcref(J->cur.startpt)->pt; |
426 | TraceNo traceno = J->cur.traceno; | 485 | TraceNo traceno = J->cur.traceno; |
427 | GCtrace *T = trace_save_alloc(J); /* Do this first. May throw OOM. */ | 486 | GCtrace *T = J->curfinal; |
428 | lua_State *L; | 487 | lua_State *L; |
429 | 488 | ||
430 | switch (op) { | 489 | switch (op) { |
@@ -442,6 +501,7 @@ static void trace_stop(jit_State *J) | |||
442 | J->cur.nextroot = pt->trace; | 501 | J->cur.nextroot = pt->trace; |
443 | pt->trace = (TraceNo1)traceno; | 502 | pt->trace = (TraceNo1)traceno; |
444 | break; | 503 | break; |
504 | case BC_ITERN: | ||
445 | case BC_RET: | 505 | case BC_RET: |
446 | case BC_RET0: | 506 | case BC_RET0: |
447 | case BC_RET1: | 507 | case BC_RET1: |
@@ -449,7 +509,7 @@ static void trace_stop(jit_State *J) | |||
449 | goto addroot; | 509 | goto addroot; |
450 | case BC_JMP: | 510 | case BC_JMP: |
451 | /* Patch exit branch in parent to side trace entry. */ | 511 | /* Patch exit branch in parent to side trace entry. */ |
452 | lua_assert(J->parent != 0 && J->cur.root != 0); | 512 | lj_assertJ(J->parent != 0 && J->cur.root != 0, "not a side trace"); |
453 | lj_asm_patchexit(J, traceref(J, J->parent), J->exitno, J->cur.mcode); | 513 | lj_asm_patchexit(J, traceref(J, J->parent), J->exitno, J->cur.mcode); |
454 | /* Avoid compiling a side trace twice (stack resizing uses parent exit). */ | 514 | /* Avoid compiling a side trace twice (stack resizing uses parent exit). */ |
455 | { | 515 | { |
@@ -465,8 +525,14 @@ static void trace_stop(jit_State *J) | |||
465 | root->nextside = (TraceNo1)traceno; | 525 | root->nextside = (TraceNo1)traceno; |
466 | } | 526 | } |
467 | break; | 527 | break; |
528 | case BC_CALLM: | ||
529 | case BC_CALL: | ||
530 | case BC_ITERC: | ||
531 | /* Trace stitching: patch link of previous trace. */ | ||
532 | traceref(J, J->exitno)->link = traceno; | ||
533 | break; | ||
468 | default: | 534 | default: |
469 | lua_assert(0); | 535 | lj_assertJ(0, "bad stop bytecode %d", op); |
470 | break; | 536 | break; |
471 | } | 537 | } |
472 | 538 | ||
@@ -479,6 +545,7 @@ static void trace_stop(jit_State *J) | |||
479 | lj_vmevent_send(L, TRACE, | 545 | lj_vmevent_send(L, TRACE, |
480 | setstrV(L, L->top++, lj_str_newlit(L, "stop")); | 546 | setstrV(L, L->top++, lj_str_newlit(L, "stop")); |
481 | setintV(L->top++, traceno); | 547 | setintV(L->top++, traceno); |
548 | setfuncV(L, L->top++, J->fn); | ||
482 | ); | 549 | ); |
483 | } | 550 | } |
484 | 551 | ||
@@ -486,8 +553,8 @@ static void trace_stop(jit_State *J) | |||
486 | static int trace_downrec(jit_State *J) | 553 | static int trace_downrec(jit_State *J) |
487 | { | 554 | { |
488 | /* Restart recording at the return instruction. */ | 555 | /* Restart recording at the return instruction. */ |
489 | lua_assert(J->pt != NULL); | 556 | lj_assertJ(J->pt != NULL, "no active prototype"); |
490 | lua_assert(bc_isret(bc_op(*J->pc))); | 557 | lj_assertJ(bc_isret(bc_op(*J->pc)), "not at a return bytecode"); |
491 | if (bc_op(*J->pc) == BC_RETM) | 558 | if (bc_op(*J->pc) == BC_RETM) |
492 | return 0; /* NYI: down-recursion with RETM. */ | 559 | return 0; /* NYI: down-recursion with RETM. */ |
493 | J->parent = 0; | 560 | J->parent = 0; |
@@ -506,6 +573,10 @@ static int trace_abort(jit_State *J) | |||
506 | 573 | ||
507 | J->postproc = LJ_POST_NONE; | 574 | J->postproc = LJ_POST_NONE; |
508 | lj_mcode_abort(J); | 575 | lj_mcode_abort(J); |
576 | if (J->curfinal) { | ||
577 | lj_trace_free(J2G(J), J->curfinal); | ||
578 | J->curfinal = NULL; | ||
579 | } | ||
509 | if (tvisnumber(L->top-1)) | 580 | if (tvisnumber(L->top-1)) |
510 | e = (TraceError)numberVint(L->top-1); | 581 | e = (TraceError)numberVint(L->top-1); |
511 | if (e == LJ_TRERR_MCODELM) { | 582 | if (e == LJ_TRERR_MCODELM) { |
@@ -514,8 +585,17 @@ static int trace_abort(jit_State *J) | |||
514 | return 1; /* Retry ASM with new MCode area. */ | 585 | return 1; /* Retry ASM with new MCode area. */ |
515 | } | 586 | } |
516 | /* Penalize or blacklist starting bytecode instruction. */ | 587 | /* Penalize or blacklist starting bytecode instruction. */ |
517 | if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) | 588 | if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { |
518 | penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e); | 589 | if (J->exitno == 0) { |
590 | BCIns *startpc = mref(J->cur.startpc, BCIns); | ||
591 | if (e == LJ_TRERR_RETRY) | ||
592 | hotcount_set(J2GG(J), startpc+1, 1); /* Immediate retry. */ | ||
593 | else | ||
594 | penalty_pc(J, &gcref(J->cur.startpt)->pt, startpc, e); | ||
595 | } else { | ||
596 | traceref(J, J->exitno)->link = J->exitno; /* Self-link is blacklisted. */ | ||
597 | } | ||
598 | } | ||
519 | 599 | ||
520 | /* Is there anything to abort? */ | 600 | /* Is there anything to abort? */ |
521 | traceno = J->cur.traceno; | 601 | traceno = J->cur.traceno; |
@@ -581,8 +661,13 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud) | |||
581 | J->state = LJ_TRACE_RECORD; /* trace_start() may change state. */ | 661 | J->state = LJ_TRACE_RECORD; /* trace_start() may change state. */ |
582 | trace_start(J); | 662 | trace_start(J); |
583 | lj_dispatch_update(J2G(J)); | 663 | lj_dispatch_update(J2G(J)); |
584 | break; | 664 | if (J->state != LJ_TRACE_RECORD_1ST) |
665 | break; | ||
666 | /* fallthrough */ | ||
585 | 667 | ||
668 | case LJ_TRACE_RECORD_1ST: | ||
669 | J->state = LJ_TRACE_RECORD; | ||
670 | /* fallthrough */ | ||
586 | case LJ_TRACE_RECORD: | 671 | case LJ_TRACE_RECORD: |
587 | trace_pendpatch(J, 0); | 672 | trace_pendpatch(J, 0); |
588 | setvmstate(J2G(J), RECORD); | 673 | setvmstate(J2G(J), RECORD); |
@@ -688,15 +773,30 @@ static void trace_hotside(jit_State *J, const BCIns *pc) | |||
688 | { | 773 | { |
689 | SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno]; | 774 | SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno]; |
690 | if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) && | 775 | if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) && |
776 | isluafunc(curr_func(J->L)) && | ||
691 | snap->count != SNAPCOUNT_DONE && | 777 | snap->count != SNAPCOUNT_DONE && |
692 | ++snap->count >= J->param[JIT_P_hotexit]) { | 778 | ++snap->count >= J->param[JIT_P_hotexit]) { |
693 | lua_assert(J->state == LJ_TRACE_IDLE); | 779 | lj_assertJ(J->state == LJ_TRACE_IDLE, "hot side exit while recording"); |
694 | /* J->parent is non-zero for a side trace. */ | 780 | /* J->parent is non-zero for a side trace. */ |
695 | J->state = LJ_TRACE_START; | 781 | J->state = LJ_TRACE_START; |
696 | lj_trace_ins(J, pc); | 782 | lj_trace_ins(J, pc); |
697 | } | 783 | } |
698 | } | 784 | } |
699 | 785 | ||
786 | /* Stitch a new trace to the previous trace. */ | ||
787 | void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc) | ||
788 | { | ||
789 | /* Only start a new trace if not recording or inside __gc call or vmevent. */ | ||
790 | if (J->state == LJ_TRACE_IDLE && | ||
791 | !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) { | ||
792 | J->parent = 0; /* Have to treat it like a root trace. */ | ||
793 | /* J->exitno is set to the invoking trace. */ | ||
794 | J->state = LJ_TRACE_START; | ||
795 | lj_trace_ins(J, pc); | ||
796 | } | ||
797 | } | ||
798 | |||
799 | |||
700 | /* Tiny struct to pass data to protected call. */ | 800 | /* Tiny struct to pass data to protected call. */ |
701 | typedef struct ExitDataCP { | 801 | typedef struct ExitDataCP { |
702 | jit_State *J; | 802 | jit_State *J; |
@@ -740,7 +840,7 @@ static void trace_exit_regs(lua_State *L, ExitState *ex) | |||
740 | } | 840 | } |
741 | #endif | 841 | #endif |
742 | 842 | ||
743 | #ifdef EXITSTATE_PCREG | 843 | #if defined(EXITSTATE_PCREG) || (LJ_UNWIND_JIT && !EXITTRACE_VMSTATE) |
744 | /* Determine trace number from pc of exit instruction. */ | 844 | /* Determine trace number from pc of exit instruction. */ |
745 | static TraceNo trace_exit_find(jit_State *J, MCode *pc) | 845 | static TraceNo trace_exit_find(jit_State *J, MCode *pc) |
746 | { | 846 | { |
@@ -750,7 +850,7 @@ static TraceNo trace_exit_find(jit_State *J, MCode *pc) | |||
750 | if (T && pc >= T->mcode && pc < (MCode *)((char *)T->mcode + T->szmcode)) | 850 | if (T && pc >= T->mcode && pc < (MCode *)((char *)T->mcode + T->szmcode)) |
751 | return traceno; | 851 | return traceno; |
752 | } | 852 | } |
753 | lua_assert(0); | 853 | lj_assertJ(0, "bad exit pc"); |
754 | return 0; | 854 | return 0; |
755 | } | 855 | } |
756 | #endif | 856 | #endif |
@@ -762,40 +862,55 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) | |||
762 | lua_State *L = J->L; | 862 | lua_State *L = J->L; |
763 | ExitState *ex = (ExitState *)exptr; | 863 | ExitState *ex = (ExitState *)exptr; |
764 | ExitDataCP exd; | 864 | ExitDataCP exd; |
765 | int errcode; | 865 | int errcode, exitcode = J->exitcode; |
866 | TValue exiterr; | ||
766 | const BCIns *pc; | 867 | const BCIns *pc; |
767 | void *cf; | 868 | void *cf; |
768 | GCtrace *T; | 869 | GCtrace *T; |
870 | |||
871 | setnilV(&exiterr); | ||
872 | if (exitcode) { /* Trace unwound with error code. */ | ||
873 | J->exitcode = 0; | ||
874 | copyTV(L, &exiterr, L->top-1); | ||
875 | } | ||
876 | |||
769 | #ifdef EXITSTATE_PCREG | 877 | #ifdef EXITSTATE_PCREG |
770 | J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]); | 878 | J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]); |
771 | #endif | 879 | #endif |
772 | T = traceref(J, J->parent); UNUSED(T); | 880 | T = traceref(J, J->parent); UNUSED(T); |
773 | #ifdef EXITSTATE_CHECKEXIT | 881 | #ifdef EXITSTATE_CHECKEXIT |
774 | if (J->exitno == T->nsnap) { /* Treat stack check like a parent exit. */ | 882 | if (J->exitno == T->nsnap) { /* Treat stack check like a parent exit. */ |
775 | lua_assert(T->root != 0); | 883 | lj_assertJ(T->root != 0, "stack check in root trace"); |
776 | J->exitno = T->ir[REF_BASE].op2; | 884 | J->exitno = T->ir[REF_BASE].op2; |
777 | J->parent = T->ir[REF_BASE].op1; | 885 | J->parent = T->ir[REF_BASE].op1; |
778 | T = traceref(J, J->parent); | 886 | T = traceref(J, J->parent); |
779 | } | 887 | } |
780 | #endif | 888 | #endif |
781 | lua_assert(T != NULL && J->exitno < T->nsnap); | 889 | lj_assertJ(T != NULL && J->exitno < T->nsnap, "bad trace or exit number"); |
782 | exd.J = J; | 890 | exd.J = J; |
783 | exd.exptr = exptr; | 891 | exd.exptr = exptr; |
784 | errcode = lj_vm_cpcall(L, NULL, &exd, trace_exit_cp); | 892 | errcode = lj_vm_cpcall(L, NULL, &exd, trace_exit_cp); |
785 | if (errcode) | 893 | if (errcode) |
786 | return -errcode; /* Return negated error code. */ | 894 | return -errcode; /* Return negated error code. */ |
787 | 895 | ||
788 | lj_vmevent_send(L, TEXIT, | 896 | if (exitcode) copyTV(L, L->top++, &exiterr); /* Anchor the error object. */ |
789 | lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); | 897 | |
790 | setintV(L->top++, J->parent); | 898 | if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE))) |
791 | setintV(L->top++, J->exitno); | 899 | lj_vmevent_send(L, TEXIT, |
792 | trace_exit_regs(L, ex); | 900 | lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); |
793 | ); | 901 | setintV(L->top++, J->parent); |
902 | setintV(L->top++, J->exitno); | ||
903 | trace_exit_regs(L, ex); | ||
904 | ); | ||
794 | 905 | ||
795 | pc = exd.pc; | 906 | pc = exd.pc; |
796 | cf = cframe_raw(L->cframe); | 907 | cf = cframe_raw(L->cframe); |
797 | setcframe_pc(cf, pc); | 908 | setcframe_pc(cf, pc); |
798 | if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { | 909 | if (exitcode) { |
910 | return -exitcode; | ||
911 | } else if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) { | ||
912 | /* Just exit to interpreter. */ | ||
913 | } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { | ||
799 | if (!(G(L)->hookmask & HOOK_GC)) | 914 | if (!(G(L)->hookmask & HOOK_GC)) |
800 | lj_gc_step(L); /* Exited because of GC: drive GC forward. */ | 915 | lj_gc_step(L); /* Exited because of GC: drive GC forward. */ |
801 | } else { | 916 | } else { |
@@ -803,13 +918,14 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) | |||
803 | } | 918 | } |
804 | if (bc_op(*pc) == BC_JLOOP) { | 919 | if (bc_op(*pc) == BC_JLOOP) { |
805 | BCIns *retpc = &traceref(J, bc_d(*pc))->startins; | 920 | BCIns *retpc = &traceref(J, bc_d(*pc))->startins; |
806 | if (bc_isret(bc_op(*retpc))) { | 921 | int isret = bc_isret(bc_op(*retpc)); |
922 | if (isret || bc_op(*retpc) == BC_ITERN) { | ||
807 | if (J->state == LJ_TRACE_RECORD) { | 923 | if (J->state == LJ_TRACE_RECORD) { |
808 | J->patchins = *pc; | 924 | J->patchins = *pc; |
809 | J->patchpc = (BCIns *)pc; | 925 | J->patchpc = (BCIns *)pc; |
810 | *J->patchpc = *retpc; | 926 | *J->patchpc = *retpc; |
811 | J->bcskip = 1; | 927 | J->bcskip = 1; |
812 | } else { | 928 | } else if (isret) { |
813 | pc = retpc; | 929 | pc = retpc; |
814 | setcframe_pc(cf, pc); | 930 | setcframe_pc(cf, pc); |
815 | } | 931 | } |
@@ -819,7 +935,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) | |||
819 | ERRNO_RESTORE | 935 | ERRNO_RESTORE |
820 | switch (bc_op(*pc)) { | 936 | switch (bc_op(*pc)) { |
821 | case BC_CALLM: case BC_CALLMT: | 937 | case BC_CALLM: case BC_CALLMT: |
822 | return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc)); | 938 | return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc) - LJ_FR2); |
823 | case BC_RETM: | 939 | case BC_RETM: |
824 | return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc)); | 940 | return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc)); |
825 | case BC_TSETM: | 941 | case BC_TSETM: |
@@ -831,4 +947,41 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) | |||
831 | } | 947 | } |
832 | } | 948 | } |
833 | 949 | ||
950 | #if LJ_UNWIND_JIT | ||
951 | /* Given an mcode address determine trace exit address for unwinding. */ | ||
952 | uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep) | ||
953 | { | ||
954 | #if EXITTRACE_VMSTATE | ||
955 | TraceNo traceno = J2G(J)->vmstate; | ||
956 | #else | ||
957 | TraceNo traceno = trace_exit_find(J, (MCode *)addr); | ||
958 | #endif | ||
959 | GCtrace *T = traceref(J, traceno); | ||
960 | if (T | ||
961 | #if EXITTRACE_VMSTATE | ||
962 | && addr >= (uintptr_t)T->mcode && addr < (uintptr_t)T->mcode + T->szmcode | ||
963 | #endif | ||
964 | ) { | ||
965 | SnapShot *snap = T->snap; | ||
966 | SnapNo lo = 0, exitno = T->nsnap; | ||
967 | uintptr_t ofs = (uintptr_t)((MCode *)addr - T->mcode); /* MCode units! */ | ||
968 | /* Rightmost binary search for mcode offset to determine exit number. */ | ||
969 | do { | ||
970 | SnapNo mid = (lo+exitno) >> 1; | ||
971 | if (ofs < snap[mid].mcofs) exitno = mid; else lo = mid + 1; | ||
972 | } while (lo < exitno); | ||
973 | exitno--; | ||
974 | *ep = exitno; | ||
975 | #ifdef EXITSTUBS_PER_GROUP | ||
976 | return (uintptr_t)exitstub_addr(J, exitno); | ||
977 | #else | ||
978 | return (uintptr_t)exitstub_trace_addr(T, exitno); | ||
979 | #endif | ||
980 | } | ||
981 | /* Cannot correlate addr with trace/exit. This will be fatal. */ | ||
982 | lj_assertJ(0, "bad exit pc"); | ||
983 | return 0; | ||
984 | } | ||
985 | #endif | ||
986 | |||
834 | #endif | 987 | #endif |
diff --git a/src/lj_trace.h b/src/lj_trace.h index 0fc03672..3d7f76f0 100644 --- a/src/lj_trace.h +++ b/src/lj_trace.h | |||
@@ -23,6 +23,7 @@ LJ_FUNC_NORET void lj_trace_err(jit_State *J, TraceError e); | |||
23 | LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e); | 23 | LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e); |
24 | 24 | ||
25 | /* Trace management. */ | 25 | /* Trace management. */ |
26 | LJ_FUNC GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T); | ||
26 | LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T); | 27 | LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T); |
27 | LJ_FUNC void lj_trace_reenableproto(GCproto *pt); | 28 | LJ_FUNC void lj_trace_reenableproto(GCproto *pt); |
28 | LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt); | 29 | LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt); |
@@ -34,7 +35,11 @@ LJ_FUNC void lj_trace_freestate(global_State *g); | |||
34 | /* Event handling. */ | 35 | /* Event handling. */ |
35 | LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc); | 36 | LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc); |
36 | LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc); | 37 | LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc); |
38 | LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc); | ||
37 | LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); | 39 | LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); |
40 | #if LJ_UNWIND_EXT | ||
41 | LJ_FUNC uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep); | ||
42 | #endif | ||
38 | 43 | ||
39 | /* Signal asynchronous abort of trace or end of trace. */ | 44 | /* Signal asynchronous abort of trace or end of trace. */ |
40 | #define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE) | 45 | #define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE) |
diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h index a4e5ae64..8ed8ac82 100644 --- a/src/lj_traceerr.h +++ b/src/lj_traceerr.h | |||
@@ -7,10 +7,12 @@ | |||
7 | 7 | ||
8 | /* Recording. */ | 8 | /* Recording. */ |
9 | TREDEF(RECERR, "error thrown or hook called during recording") | 9 | TREDEF(RECERR, "error thrown or hook called during recording") |
10 | TREDEF(TRACEUV, "trace too short") | ||
10 | TREDEF(TRACEOV, "trace too long") | 11 | TREDEF(TRACEOV, "trace too long") |
11 | TREDEF(STACKOV, "trace too deep") | 12 | TREDEF(STACKOV, "trace too deep") |
12 | TREDEF(SNAPOV, "too many snapshots") | 13 | TREDEF(SNAPOV, "too many snapshots") |
13 | TREDEF(BLACKL, "blacklisted") | 14 | TREDEF(BLACKL, "blacklisted") |
15 | TREDEF(RETRY, "retry recording") | ||
14 | TREDEF(NYIBC, "NYI: bytecode %d") | 16 | TREDEF(NYIBC, "NYI: bytecode %d") |
15 | 17 | ||
16 | /* Recording loop ops. */ | 18 | /* Recording loop ops. */ |
@@ -23,8 +25,6 @@ TREDEF(BADTYPE, "bad argument type") | |||
23 | TREDEF(CJITOFF, "JIT compilation disabled for function") | 25 | TREDEF(CJITOFF, "JIT compilation disabled for function") |
24 | TREDEF(CUNROLL, "call unroll limit reached") | 26 | TREDEF(CUNROLL, "call unroll limit reached") |
25 | TREDEF(DOWNREC, "down-recursion, restarting") | 27 | TREDEF(DOWNREC, "down-recursion, restarting") |
26 | TREDEF(NYICF, "NYI: C function %s") | ||
27 | TREDEF(NYIFF, "NYI: FastFunc %s") | ||
28 | TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s") | 28 | TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s") |
29 | TREDEF(NYIRETL, "NYI: return to lower frame") | 29 | TREDEF(NYIRETL, "NYI: return to lower frame") |
30 | 30 | ||
diff --git a/src/lj_udata.c b/src/lj_udata.c index 7dada848..ee4a145d 100644 --- a/src/lj_udata.c +++ b/src/lj_udata.c | |||
@@ -8,6 +8,7 @@ | |||
8 | 8 | ||
9 | #include "lj_obj.h" | 9 | #include "lj_obj.h" |
10 | #include "lj_gc.h" | 10 | #include "lj_gc.h" |
11 | #include "lj_err.h" | ||
11 | #include "lj_udata.h" | 12 | #include "lj_udata.h" |
12 | 13 | ||
13 | GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env) | 14 | GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env) |
@@ -32,3 +33,30 @@ void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud) | |||
32 | lj_mem_free(g, ud, sizeudata(ud)); | 33 | lj_mem_free(g, ud, sizeudata(ud)); |
33 | } | 34 | } |
34 | 35 | ||
36 | #if LJ_64 | ||
37 | void *lj_lightud_intern(lua_State *L, void *p) | ||
38 | { | ||
39 | global_State *g = G(L); | ||
40 | uint64_t u = (uint64_t)p; | ||
41 | uint32_t up = lightudup(u); | ||
42 | uint32_t *segmap = mref(g->gc.lightudseg, uint32_t); | ||
43 | MSize segnum = g->gc.lightudnum; | ||
44 | if (segmap) { | ||
45 | MSize seg; | ||
46 | for (seg = 0; seg <= segnum; seg++) | ||
47 | if (segmap[seg] == up) /* Fast path. */ | ||
48 | return (void *)(((uint64_t)seg << LJ_LIGHTUD_BITS_LO) | lightudlo(u)); | ||
49 | segnum++; | ||
50 | /* Leave last segment unused to avoid clash with ITERN key. */ | ||
51 | if (segnum >= (1 << LJ_LIGHTUD_BITS_SEG)-1) lj_err_msg(L, LJ_ERR_BADLU); | ||
52 | } | ||
53 | if (!((segnum-1) & segnum) && segnum != 1) { | ||
54 | lj_mem_reallocvec(L, segmap, segnum, segnum ? 2*segnum : 2u, uint32_t); | ||
55 | setmref(g->gc.lightudseg, segmap); | ||
56 | } | ||
57 | g->gc.lightudnum = segnum; | ||
58 | segmap[segnum] = up; | ||
59 | return (void *)(((uint64_t)segnum << LJ_LIGHTUD_BITS_LO) | lightudlo(u)); | ||
60 | } | ||
61 | #endif | ||
62 | |||
diff --git a/src/lj_udata.h b/src/lj_udata.h index acd136a7..503c9e30 100644 --- a/src/lj_udata.h +++ b/src/lj_udata.h | |||
@@ -10,5 +10,8 @@ | |||
10 | 10 | ||
11 | LJ_FUNC GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env); | 11 | LJ_FUNC GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env); |
12 | LJ_FUNC void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud); | 12 | LJ_FUNC void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud); |
13 | #if LJ_64 | ||
14 | LJ_FUNC void * LJ_FASTCALL lj_lightud_intern(lua_State *L, void *p); | ||
15 | #endif | ||
13 | 16 | ||
14 | #endif | 17 | #endif |
diff --git a/src/lj_vm.h b/src/lj_vm.h index b66f5b85..7713d16b 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h | |||
@@ -17,11 +17,18 @@ LJ_ASMF int lj_vm_cpcall(lua_State *L, lua_CFunction func, void *ud, | |||
17 | LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef); | 17 | LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef); |
18 | LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode); | 18 | LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode); |
19 | LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe); | 19 | LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe); |
20 | #if LJ_ABI_WIN && LJ_TARGET_X86 | ||
21 | LJ_ASMF_NORET void LJ_FASTCALL lj_vm_rtlunwind(void *cframe, void *excptrec, | ||
22 | void *unwinder, int errcode); | ||
23 | #endif | ||
20 | LJ_ASMF void lj_vm_unwind_c_eh(void); | 24 | LJ_ASMF void lj_vm_unwind_c_eh(void); |
21 | LJ_ASMF void lj_vm_unwind_ff_eh(void); | 25 | LJ_ASMF void lj_vm_unwind_ff_eh(void); |
22 | #if LJ_TARGET_X86ORX64 | 26 | #if LJ_TARGET_X86ORX64 |
23 | LJ_ASMF void lj_vm_unwind_rethrow(void); | 27 | LJ_ASMF void lj_vm_unwind_rethrow(void); |
24 | #endif | 28 | #endif |
29 | #if LJ_TARGET_MIPS | ||
30 | LJ_ASMF void lj_vm_unwind_stub(void); | ||
31 | #endif | ||
25 | 32 | ||
26 | /* Miscellaneous functions. */ | 33 | /* Miscellaneous functions. */ |
27 | #if LJ_TARGET_X86ORX64 | 34 | #if LJ_TARGET_X86ORX64 |
@@ -43,13 +50,15 @@ LJ_ASMF void lj_vm_record(void); | |||
43 | LJ_ASMF void lj_vm_inshook(void); | 50 | LJ_ASMF void lj_vm_inshook(void); |
44 | LJ_ASMF void lj_vm_rethook(void); | 51 | LJ_ASMF void lj_vm_rethook(void); |
45 | LJ_ASMF void lj_vm_callhook(void); | 52 | LJ_ASMF void lj_vm_callhook(void); |
53 | LJ_ASMF void lj_vm_profhook(void); | ||
54 | LJ_ASMF void lj_vm_IITERN(void); | ||
46 | 55 | ||
47 | /* Trace exit handling. */ | 56 | /* Trace exit handling. */ |
48 | LJ_ASMF void lj_vm_exit_handler(void); | 57 | LJ_ASMF void lj_vm_exit_handler(void); |
49 | LJ_ASMF void lj_vm_exit_interp(void); | 58 | LJ_ASMF void lj_vm_exit_interp(void); |
50 | 59 | ||
51 | /* Internal math helper functions. */ | 60 | /* Internal math helper functions. */ |
52 | #if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC | 61 | #if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP) |
53 | #define lj_vm_floor floor | 62 | #define lj_vm_floor floor |
54 | #define lj_vm_ceil ceil | 63 | #define lj_vm_ceil ceil |
55 | #else | 64 | #else |
@@ -60,23 +69,26 @@ LJ_ASMF double lj_vm_floor_sf(double); | |||
60 | LJ_ASMF double lj_vm_ceil_sf(double); | 69 | LJ_ASMF double lj_vm_ceil_sf(double); |
61 | #endif | 70 | #endif |
62 | #endif | 71 | #endif |
63 | #if defined(LUAJIT_NO_LOG2) || LJ_TARGET_X86ORX64 | 72 | #ifdef LUAJIT_NO_LOG2 |
64 | LJ_ASMF double lj_vm_log2(double); | 73 | LJ_ASMF double lj_vm_log2(double); |
65 | #else | 74 | #else |
66 | #define lj_vm_log2 log2 | 75 | #define lj_vm_log2 log2 |
67 | #endif | 76 | #endif |
77 | #if !(defined(_LJ_DISPATCH_H) && LJ_TARGET_MIPS) | ||
78 | LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); | ||
79 | #endif | ||
68 | 80 | ||
69 | #if LJ_HASJIT | 81 | #if LJ_HASJIT |
70 | #if LJ_TARGET_X86ORX64 | 82 | #if LJ_TARGET_X86ORX64 |
71 | LJ_ASMF void lj_vm_floor_sse(void); | 83 | LJ_ASMF void lj_vm_floor_sse(void); |
72 | LJ_ASMF void lj_vm_ceil_sse(void); | 84 | LJ_ASMF void lj_vm_ceil_sse(void); |
73 | LJ_ASMF void lj_vm_trunc_sse(void); | 85 | LJ_ASMF void lj_vm_trunc_sse(void); |
74 | LJ_ASMF void lj_vm_exp_x87(void); | ||
75 | LJ_ASMF void lj_vm_exp2_x87(void); | ||
76 | LJ_ASMF void lj_vm_pow_sse(void); | ||
77 | LJ_ASMF void lj_vm_powi_sse(void); | 86 | LJ_ASMF void lj_vm_powi_sse(void); |
87 | #define lj_vm_powi NULL | ||
78 | #else | 88 | #else |
79 | #if LJ_TARGET_PPC | 89 | LJ_ASMF double lj_vm_powi(double, int32_t); |
90 | #endif | ||
91 | #if LJ_TARGET_PPC || LJ_TARGET_ARM64 | ||
80 | #define lj_vm_trunc trunc | 92 | #define lj_vm_trunc trunc |
81 | #else | 93 | #else |
82 | LJ_ASMF double lj_vm_trunc(double); | 94 | LJ_ASMF double lj_vm_trunc(double); |
@@ -84,17 +96,10 @@ LJ_ASMF double lj_vm_trunc(double); | |||
84 | LJ_ASMF double lj_vm_trunc_sf(double); | 96 | LJ_ASMF double lj_vm_trunc_sf(double); |
85 | #endif | 97 | #endif |
86 | #endif | 98 | #endif |
87 | LJ_ASMF double lj_vm_powi(double, int32_t); | ||
88 | #ifdef LUAJIT_NO_EXP2 | ||
89 | LJ_ASMF double lj_vm_exp2(double); | ||
90 | #else | ||
91 | #define lj_vm_exp2 exp2 | ||
92 | #endif | ||
93 | #endif | ||
94 | LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); | ||
95 | #if LJ_HASFFI | 99 | #if LJ_HASFFI |
96 | LJ_ASMF int lj_vm_errno(void); | 100 | LJ_ASMF int lj_vm_errno(void); |
97 | #endif | 101 | #endif |
102 | LJ_ASMF TValue *lj_vm_next(GCtab *t, uint32_t idx); | ||
98 | #endif | 103 | #endif |
99 | 104 | ||
100 | /* Continuations for metamethods. */ | 105 | /* Continuations for metamethods. */ |
@@ -104,8 +109,7 @@ LJ_ASMF void lj_cont_nop(void); /* Do nothing, just continue execution. */ | |||
104 | LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */ | 109 | LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */ |
105 | LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */ | 110 | LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */ |
106 | LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */ | 111 | LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */ |
107 | 112 | LJ_ASMF void lj_cont_stitch(void); /* Trace stitching. */ | |
108 | enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ | ||
109 | 113 | ||
110 | /* Start of the ASM code. */ | 114 | /* Start of the ASM code. */ |
111 | LJ_ASMF char lj_vm_asm_begin[]; | 115 | LJ_ASMF char lj_vm_asm_begin[]; |
diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c index 704af254..c8491d82 100644 --- a/src/lj_vmevent.c +++ b/src/lj_vmevent.c | |||
@@ -27,6 +27,7 @@ ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev) | |||
27 | if (tv && tvisfunc(tv)) { | 27 | if (tv && tvisfunc(tv)) { |
28 | lj_state_checkstack(L, LUA_MINSTACK); | 28 | lj_state_checkstack(L, LUA_MINSTACK); |
29 | setfuncV(L, L->top++, funcV(tv)); | 29 | setfuncV(L, L->top++, funcV(tv)); |
30 | if (LJ_FR2) setnilV(L->top++); | ||
30 | return savestack(L, L->top); | 31 | return savestack(L, L->top); |
31 | } | 32 | } |
32 | } | 33 | } |
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index ff41ba28..536199d8 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c | |||
@@ -13,16 +13,29 @@ | |||
13 | #include "lj_ir.h" | 13 | #include "lj_ir.h" |
14 | #include "lj_vm.h" | 14 | #include "lj_vm.h" |
15 | 15 | ||
16 | /* -- Helper functions for generated machine code ------------------------- */ | 16 | /* -- Wrapper functions --------------------------------------------------- */ |
17 | 17 | ||
18 | #if LJ_TARGET_X86ORX64 | 18 | #if LJ_TARGET_X86 && __ELF__ && __PIC__ |
19 | /* Wrapper functions to avoid linker issues on OSX. */ | 19 | /* Wrapper functions to deal with the ELF/x86 PIC disaster. */ |
20 | LJ_FUNCA double lj_vm_sinh(double x) { return sinh(x); } | 20 | LJ_FUNCA double lj_wrap_log(double x) { return log(x); } |
21 | LJ_FUNCA double lj_vm_cosh(double x) { return cosh(x); } | 21 | LJ_FUNCA double lj_wrap_log10(double x) { return log10(x); } |
22 | LJ_FUNCA double lj_vm_tanh(double x) { return tanh(x); } | 22 | LJ_FUNCA double lj_wrap_exp(double x) { return exp(x); } |
23 | LJ_FUNCA double lj_wrap_sin(double x) { return sin(x); } | ||
24 | LJ_FUNCA double lj_wrap_cos(double x) { return cos(x); } | ||
25 | LJ_FUNCA double lj_wrap_tan(double x) { return tan(x); } | ||
26 | LJ_FUNCA double lj_wrap_asin(double x) { return asin(x); } | ||
27 | LJ_FUNCA double lj_wrap_acos(double x) { return acos(x); } | ||
28 | LJ_FUNCA double lj_wrap_atan(double x) { return atan(x); } | ||
29 | LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); } | ||
30 | LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); } | ||
31 | LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); } | ||
32 | LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); } | ||
33 | LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); } | ||
34 | LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); } | ||
23 | #endif | 35 | #endif |
24 | 36 | ||
25 | #if !LJ_TARGET_X86ORX64 | 37 | /* -- Helper functions for generated machine code ------------------------- */ |
38 | |||
26 | double lj_vm_foldarith(double x, double y, int op) | 39 | double lj_vm_foldarith(double x, double y, int op) |
27 | { | 40 | { |
28 | switch (op) { | 41 | switch (op) { |
@@ -35,37 +48,20 @@ double lj_vm_foldarith(double x, double y, int op) | |||
35 | case IR_NEG - IR_ADD: return -x; break; | 48 | case IR_NEG - IR_ADD: return -x; break; |
36 | case IR_ABS - IR_ADD: return fabs(x); break; | 49 | case IR_ABS - IR_ADD: return fabs(x); break; |
37 | #if LJ_HASJIT | 50 | #if LJ_HASJIT |
38 | case IR_ATAN2 - IR_ADD: return atan2(x, y); break; | ||
39 | case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; | 51 | case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; |
40 | case IR_MIN - IR_ADD: return x > y ? y : x; break; | 52 | case IR_MIN - IR_ADD: return x < y ? x : y; break; |
41 | case IR_MAX - IR_ADD: return x < y ? y : x; break; | 53 | case IR_MAX - IR_ADD: return x > y ? x : y; break; |
42 | #endif | 54 | #endif |
43 | default: return x; | 55 | default: return x; |
44 | } | 56 | } |
45 | } | 57 | } |
46 | #endif | ||
47 | |||
48 | #if LJ_HASJIT | ||
49 | 58 | ||
50 | #ifdef LUAJIT_NO_LOG2 | 59 | #if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS |
51 | double lj_vm_log2(double a) | ||
52 | { | ||
53 | return log(a) * 1.4426950408889634074; | ||
54 | } | ||
55 | #endif | ||
56 | |||
57 | #ifdef LUAJIT_NO_EXP2 | ||
58 | double lj_vm_exp2(double a) | ||
59 | { | ||
60 | return exp(a * 0.6931471805599453); | ||
61 | } | ||
62 | #endif | ||
63 | |||
64 | #if !(LJ_TARGET_ARM || LJ_TARGET_PPC) | ||
65 | int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) | 60 | int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) |
66 | { | 61 | { |
67 | uint32_t y, ua, ub; | 62 | uint32_t y, ua, ub; |
68 | lua_assert(b != 0); /* This must be checked before using this function. */ | 63 | /* This must be checked before using this function. */ |
64 | lj_assertX(b != 0, "modulo with zero divisor"); | ||
69 | ua = a < 0 ? (uint32_t)-a : (uint32_t)a; | 65 | ua = a < 0 ? (uint32_t)-a : (uint32_t)a; |
70 | ub = b < 0 ? (uint32_t)-b : (uint32_t)b; | 66 | ub = b < 0 ? (uint32_t)-b : (uint32_t)b; |
71 | y = ua % ub; | 67 | y = ua % ub; |
@@ -75,12 +71,21 @@ int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) | |||
75 | } | 71 | } |
76 | #endif | 72 | #endif |
77 | 73 | ||
74 | #if LJ_HASJIT | ||
75 | |||
76 | #ifdef LUAJIT_NO_LOG2 | ||
77 | double lj_vm_log2(double a) | ||
78 | { | ||
79 | return log(a) * 1.4426950408889634074; | ||
80 | } | ||
81 | #endif | ||
82 | |||
78 | #if !LJ_TARGET_X86ORX64 | 83 | #if !LJ_TARGET_X86ORX64 |
79 | /* Unsigned x^k. */ | 84 | /* Unsigned x^k. */ |
80 | static double lj_vm_powui(double x, uint32_t k) | 85 | static double lj_vm_powui(double x, uint32_t k) |
81 | { | 86 | { |
82 | double y; | 87 | double y; |
83 | lua_assert(k != 0); | 88 | lj_assertX(k != 0, "pow with zero exponent"); |
84 | for (; (k & 1) == 0; k >>= 1) x *= x; | 89 | for (; (k & 1) == 0; k >>= 1) x *= x; |
85 | y = x; | 90 | y = x; |
86 | if ((k >>= 1) != 0) { | 91 | if ((k >>= 1) != 0) { |
@@ -107,6 +112,7 @@ double lj_vm_powi(double x, int32_t k) | |||
107 | else | 112 | else |
108 | return 1.0 / lj_vm_powui(x, (uint32_t)-k); | 113 | return 1.0 / lj_vm_powui(x, (uint32_t)-k); |
109 | } | 114 | } |
115 | #endif | ||
110 | 116 | ||
111 | /* Computes fpm(x) for extended math functions. */ | 117 | /* Computes fpm(x) for extended math functions. */ |
112 | double lj_vm_foldfpm(double x, int fpm) | 118 | double lj_vm_foldfpm(double x, int fpm) |
@@ -116,19 +122,12 @@ double lj_vm_foldfpm(double x, int fpm) | |||
116 | case IRFPM_CEIL: return lj_vm_ceil(x); | 122 | case IRFPM_CEIL: return lj_vm_ceil(x); |
117 | case IRFPM_TRUNC: return lj_vm_trunc(x); | 123 | case IRFPM_TRUNC: return lj_vm_trunc(x); |
118 | case IRFPM_SQRT: return sqrt(x); | 124 | case IRFPM_SQRT: return sqrt(x); |
119 | case IRFPM_EXP: return exp(x); | ||
120 | case IRFPM_EXP2: return lj_vm_exp2(x); | ||
121 | case IRFPM_LOG: return log(x); | 125 | case IRFPM_LOG: return log(x); |
122 | case IRFPM_LOG2: return lj_vm_log2(x); | 126 | case IRFPM_LOG2: return lj_vm_log2(x); |
123 | case IRFPM_LOG10: return log10(x); | 127 | default: lj_assertX(0, "bad fpm %d", fpm); |
124 | case IRFPM_SIN: return sin(x); | ||
125 | case IRFPM_COS: return cos(x); | ||
126 | case IRFPM_TAN: return tan(x); | ||
127 | default: lua_assert(0); | ||
128 | } | 128 | } |
129 | return 0; | 129 | return 0; |
130 | } | 130 | } |
131 | #endif | ||
132 | 131 | ||
133 | #if LJ_HASFFI | 132 | #if LJ_HASFFI |
134 | int lj_vm_errno(void) | 133 | int lj_vm_errno(void) |
diff --git a/src/ljamalg.c b/src/ljamalg.c index 92f070da..cae8356c 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c | |||
@@ -3,16 +3,6 @@ | |||
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | 3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h |
4 | */ | 4 | */ |
5 | 5 | ||
6 | /* | ||
7 | +--------------------------------------------------------------------------+ | ||
8 | | WARNING: Compiling the amalgamation needs a lot of virtual memory | | ||
9 | | (around 300 MB with GCC 4.x)! If you don't have enough physical memory | | ||
10 | | your machine will start swapping to disk and the compile will not finish | | ||
11 | | within a reasonable amount of time. | | ||
12 | | So either compile on a bigger machine or use the non-amalgamated build. | | ||
13 | +--------------------------------------------------------------------------+ | ||
14 | */ | ||
15 | |||
16 | #define ljamalg_c | 6 | #define ljamalg_c |
17 | #define LUA_CORE | 7 | #define LUA_CORE |
18 | 8 | ||
@@ -28,23 +18,30 @@ | |||
28 | #include "lua.h" | 18 | #include "lua.h" |
29 | #include "lauxlib.h" | 19 | #include "lauxlib.h" |
30 | 20 | ||
21 | #include "lj_assert.c" | ||
31 | #include "lj_gc.c" | 22 | #include "lj_gc.c" |
32 | #include "lj_err.c" | 23 | #include "lj_err.c" |
33 | #include "lj_char.c" | 24 | #include "lj_char.c" |
34 | #include "lj_bc.c" | 25 | #include "lj_bc.c" |
35 | #include "lj_obj.c" | 26 | #include "lj_obj.c" |
27 | #include "lj_buf.c" | ||
36 | #include "lj_str.c" | 28 | #include "lj_str.c" |
37 | #include "lj_tab.c" | 29 | #include "lj_tab.c" |
38 | #include "lj_func.c" | 30 | #include "lj_func.c" |
39 | #include "lj_udata.c" | 31 | #include "lj_udata.c" |
40 | #include "lj_meta.c" | 32 | #include "lj_meta.c" |
41 | #include "lj_debug.c" | 33 | #include "lj_debug.c" |
34 | #include "lj_prng.c" | ||
42 | #include "lj_state.c" | 35 | #include "lj_state.c" |
43 | #include "lj_dispatch.c" | 36 | #include "lj_dispatch.c" |
44 | #include "lj_vmevent.c" | 37 | #include "lj_vmevent.c" |
45 | #include "lj_vmmath.c" | 38 | #include "lj_vmmath.c" |
46 | #include "lj_strscan.c" | 39 | #include "lj_strscan.c" |
40 | #include "lj_strfmt.c" | ||
41 | #include "lj_strfmt_num.c" | ||
42 | #include "lj_serialize.c" | ||
47 | #include "lj_api.c" | 43 | #include "lj_api.c" |
44 | #include "lj_profile.c" | ||
48 | #include "lj_lex.c" | 45 | #include "lj_lex.c" |
49 | #include "lj_parse.c" | 46 | #include "lj_parse.c" |
50 | #include "lj_bcread.c" | 47 | #include "lj_bcread.c" |
@@ -89,5 +86,6 @@ | |||
89 | #include "lib_bit.c" | 86 | #include "lib_bit.c" |
90 | #include "lib_jit.c" | 87 | #include "lib_jit.c" |
91 | #include "lib_ffi.c" | 88 | #include "lib_ffi.c" |
89 | #include "lib_buffer.c" | ||
92 | #include "lib_init.c" | 90 | #include "lib_init.c" |
93 | 91 | ||
@@ -39,7 +39,8 @@ | |||
39 | #define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i)) | 39 | #define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i)) |
40 | 40 | ||
41 | 41 | ||
42 | /* thread status; 0 is OK */ | 42 | /* thread status */ |
43 | #define LUA_OK 0 | ||
43 | #define LUA_YIELD 1 | 44 | #define LUA_YIELD 1 |
44 | #define LUA_ERRRUN 2 | 45 | #define LUA_ERRRUN 2 |
45 | #define LUA_ERRSYNTAX 3 | 46 | #define LUA_ERRSYNTAX 3 |
@@ -226,6 +227,7 @@ LUA_API int (lua_status) (lua_State *L); | |||
226 | #define LUA_GCSTEP 5 | 227 | #define LUA_GCSTEP 5 |
227 | #define LUA_GCSETPAUSE 6 | 228 | #define LUA_GCSETPAUSE 6 |
228 | #define LUA_GCSETSTEPMUL 7 | 229 | #define LUA_GCSETSTEPMUL 7 |
230 | #define LUA_GCISRUNNING 9 | ||
229 | 231 | ||
230 | LUA_API int (lua_gc) (lua_State *L, int what, int data); | 232 | LUA_API int (lua_gc) (lua_State *L, int what, int data); |
231 | 233 | ||
@@ -346,6 +348,13 @@ LUA_API void *lua_upvalueid (lua_State *L, int idx, int n); | |||
346 | LUA_API void lua_upvaluejoin (lua_State *L, int idx1, int n1, int idx2, int n2); | 348 | LUA_API void lua_upvaluejoin (lua_State *L, int idx1, int n1, int idx2, int n2); |
347 | LUA_API int lua_loadx (lua_State *L, lua_Reader reader, void *dt, | 349 | LUA_API int lua_loadx (lua_State *L, lua_Reader reader, void *dt, |
348 | const char *chunkname, const char *mode); | 350 | const char *chunkname, const char *mode); |
351 | LUA_API const lua_Number *lua_version (lua_State *L); | ||
352 | LUA_API void lua_copy (lua_State *L, int fromidx, int toidx); | ||
353 | LUA_API lua_Number lua_tonumberx (lua_State *L, int idx, int *isnum); | ||
354 | LUA_API lua_Integer lua_tointegerx (lua_State *L, int idx, int *isnum); | ||
355 | |||
356 | /* From Lua 5.3. */ | ||
357 | LUA_API int lua_isyieldable (lua_State *L); | ||
349 | 358 | ||
350 | 359 | ||
351 | struct lua_Debug { | 360 | struct lua_Debug { |
diff --git a/src/luaconf.h b/src/luaconf.h index 9d587e9d..e8790c1d 100644 --- a/src/luaconf.h +++ b/src/luaconf.h | |||
@@ -37,7 +37,7 @@ | |||
37 | #endif | 37 | #endif |
38 | #define LUA_LROOT "/usr/local" | 38 | #define LUA_LROOT "/usr/local" |
39 | #define LUA_LUADIR "/lua/5.1/" | 39 | #define LUA_LUADIR "/lua/5.1/" |
40 | #define LUA_LJDIR "/luajit-2.0.5/" | 40 | #define LUA_LJDIR "/luajit-2.1.0-beta3/" |
41 | 41 | ||
42 | #ifdef LUA_ROOT | 42 | #ifdef LUA_ROOT |
43 | #define LUA_JROOT LUA_ROOT | 43 | #define LUA_JROOT LUA_ROOT |
@@ -79,7 +79,7 @@ | |||
79 | #define LUA_IGMARK "-" | 79 | #define LUA_IGMARK "-" |
80 | #define LUA_PATH_CONFIG \ | 80 | #define LUA_PATH_CONFIG \ |
81 | LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \ | 81 | LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \ |
82 | LUA_EXECDIR "\n" LUA_IGMARK | 82 | LUA_EXECDIR "\n" LUA_IGMARK "\n" |
83 | 83 | ||
84 | /* Quoting in error messages. */ | 84 | /* Quoting in error messages. */ |
85 | #define LUA_QL(x) "'" x "'" | 85 | #define LUA_QL(x) "'" x "'" |
@@ -92,10 +92,6 @@ | |||
92 | #define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */ | 92 | #define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */ |
93 | #define LUA_MAXCAPTURES 32 /* Max. pattern captures. */ | 93 | #define LUA_MAXCAPTURES 32 /* Max. pattern captures. */ |
94 | 94 | ||
95 | /* Compatibility with older library function names. */ | ||
96 | #define LUA_COMPAT_MOD /* OLD: math.mod, NEW: math.fmod */ | ||
97 | #define LUA_COMPAT_GFIND /* OLD: string.gfind, NEW: string.gmatch */ | ||
98 | |||
99 | /* Configuration for the frontend (the luajit executable). */ | 95 | /* Configuration for the frontend (the luajit executable). */ |
100 | #if defined(luajit_c) | 96 | #if defined(luajit_c) |
101 | #define LUA_PROGNAME "luajit" /* Fallback frontend name. */ | 97 | #define LUA_PROGNAME "luajit" /* Fallback frontend name. */ |
@@ -140,7 +136,7 @@ | |||
140 | 136 | ||
141 | #define LUALIB_API LUA_API | 137 | #define LUALIB_API LUA_API |
142 | 138 | ||
143 | /* Support for internal assertions. */ | 139 | /* Compatibility support for assertions. */ |
144 | #if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) | 140 | #if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) |
145 | #include <assert.h> | 141 | #include <assert.h> |
146 | #endif | 142 | #endif |
diff --git a/src/luajit.c b/src/luajit.c index e723e522..4e7a97d4 100644 --- a/src/luajit.c +++ b/src/luajit.c | |||
@@ -61,8 +61,9 @@ static void laction(int i) | |||
61 | 61 | ||
62 | static void print_usage(void) | 62 | static void print_usage(void) |
63 | { | 63 | { |
64 | fprintf(stderr, | 64 | fputs("usage: ", stderr); |
65 | "usage: %s [options]... [script [args]...].\n" | 65 | fputs(progname, stderr); |
66 | fputs(" [options]... [script [args]...].\n" | ||
66 | "Available options are:\n" | 67 | "Available options are:\n" |
67 | " -e chunk Execute string " LUA_QL("chunk") ".\n" | 68 | " -e chunk Execute string " LUA_QL("chunk") ".\n" |
68 | " -l name Require library " LUA_QL("name") ".\n" | 69 | " -l name Require library " LUA_QL("name") ".\n" |
@@ -73,16 +74,14 @@ static void print_usage(void) | |||
73 | " -v Show version information.\n" | 74 | " -v Show version information.\n" |
74 | " -E Ignore environment variables.\n" | 75 | " -E Ignore environment variables.\n" |
75 | " -- Stop handling options.\n" | 76 | " -- Stop handling options.\n" |
76 | " - Execute stdin and stop handling options.\n" | 77 | " - Execute stdin and stop handling options.\n", stderr); |
77 | , | ||
78 | progname); | ||
79 | fflush(stderr); | 78 | fflush(stderr); |
80 | } | 79 | } |
81 | 80 | ||
82 | static void l_message(const char *pname, const char *msg) | 81 | static void l_message(const char *pname, const char *msg) |
83 | { | 82 | { |
84 | if (pname) fprintf(stderr, "%s: ", pname); | 83 | if (pname) { fputs(pname, stderr); fputc(':', stderr); fputc(' ', stderr); } |
85 | fprintf(stderr, "%s\n", msg); | 84 | fputs(msg, stderr); fputc('\n', stderr); |
86 | fflush(stderr); | 85 | fflush(stderr); |
87 | } | 86 | } |
88 | 87 | ||
@@ -125,7 +124,7 @@ static int docall(lua_State *L, int narg, int clear) | |||
125 | #endif | 124 | #endif |
126 | lua_remove(L, base); /* remove traceback function */ | 125 | lua_remove(L, base); /* remove traceback function */ |
127 | /* force a complete garbage collection in case of errors */ | 126 | /* force a complete garbage collection in case of errors */ |
128 | if (status != 0) lua_gc(L, LUA_GCCOLLECT, 0); | 127 | if (status != LUA_OK) lua_gc(L, LUA_GCCOLLECT, 0); |
129 | return status; | 128 | return status; |
130 | } | 129 | } |
131 | 130 | ||
@@ -154,22 +153,15 @@ static void print_jit_status(lua_State *L) | |||
154 | lua_settop(L, 0); /* clear stack */ | 153 | lua_settop(L, 0); /* clear stack */ |
155 | } | 154 | } |
156 | 155 | ||
157 | static int getargs(lua_State *L, char **argv, int n) | 156 | static void createargtable(lua_State *L, char **argv, int argc, int argf) |
158 | { | 157 | { |
159 | int narg; | ||
160 | int i; | 158 | int i; |
161 | int argc = 0; | 159 | lua_createtable(L, argc - argf, argf); |
162 | while (argv[argc]) argc++; /* count total number of arguments */ | ||
163 | narg = argc - (n + 1); /* number of arguments to the script */ | ||
164 | luaL_checkstack(L, narg + 3, "too many arguments to script"); | ||
165 | for (i = n+1; i < argc; i++) | ||
166 | lua_pushstring(L, argv[i]); | ||
167 | lua_createtable(L, narg, n + 1); | ||
168 | for (i = 0; i < argc; i++) { | 160 | for (i = 0; i < argc; i++) { |
169 | lua_pushstring(L, argv[i]); | 161 | lua_pushstring(L, argv[i]); |
170 | lua_rawseti(L, -2, i - n); | 162 | lua_rawseti(L, -2, i - argf); |
171 | } | 163 | } |
172 | return narg; | 164 | lua_setglobal(L, "arg"); |
173 | } | 165 | } |
174 | 166 | ||
175 | static int dofile(lua_State *L, const char *name) | 167 | static int dofile(lua_State *L, const char *name) |
@@ -258,9 +250,9 @@ static void dotty(lua_State *L) | |||
258 | const char *oldprogname = progname; | 250 | const char *oldprogname = progname; |
259 | progname = NULL; | 251 | progname = NULL; |
260 | while ((status = loadline(L)) != -1) { | 252 | while ((status = loadline(L)) != -1) { |
261 | if (status == 0) status = docall(L, 0, 0); | 253 | if (status == LUA_OK) status = docall(L, 0, 0); |
262 | report(L, status); | 254 | report(L, status); |
263 | if (status == 0 && lua_gettop(L) > 0) { /* any result to print? */ | 255 | if (status == LUA_OK && lua_gettop(L) > 0) { /* any result to print? */ |
264 | lua_getglobal(L, "print"); | 256 | lua_getglobal(L, "print"); |
265 | lua_insert(L, 1); | 257 | lua_insert(L, 1); |
266 | if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0) | 258 | if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0) |
@@ -275,21 +267,30 @@ static void dotty(lua_State *L) | |||
275 | progname = oldprogname; | 267 | progname = oldprogname; |
276 | } | 268 | } |
277 | 269 | ||
278 | static int handle_script(lua_State *L, char **argv, int n) | 270 | static int handle_script(lua_State *L, char **argx) |
279 | { | 271 | { |
280 | int status; | 272 | int status; |
281 | const char *fname; | 273 | const char *fname = argx[0]; |
282 | int narg = getargs(L, argv, n); /* collect arguments */ | 274 | if (strcmp(fname, "-") == 0 && strcmp(argx[-1], "--") != 0) |
283 | lua_setglobal(L, "arg"); | ||
284 | fname = argv[n]; | ||
285 | if (strcmp(fname, "-") == 0 && strcmp(argv[n-1], "--") != 0) | ||
286 | fname = NULL; /* stdin */ | 275 | fname = NULL; /* stdin */ |
287 | status = luaL_loadfile(L, fname); | 276 | status = luaL_loadfile(L, fname); |
288 | lua_insert(L, -(narg+1)); | 277 | if (status == LUA_OK) { |
289 | if (status == 0) | 278 | /* Fetch args from arg table. LUA_INIT or -e might have changed them. */ |
279 | int narg = 0; | ||
280 | lua_getglobal(L, "arg"); | ||
281 | if (lua_istable(L, -1)) { | ||
282 | do { | ||
283 | narg++; | ||
284 | lua_rawgeti(L, -narg, narg); | ||
285 | } while (!lua_isnil(L, -1)); | ||
286 | lua_pop(L, 1); | ||
287 | lua_remove(L, -narg); | ||
288 | narg--; | ||
289 | } else { | ||
290 | lua_pop(L, 1); | ||
291 | } | ||
290 | status = docall(L, narg, 0); | 292 | status = docall(L, narg, 0); |
291 | else | 293 | } |
292 | lua_pop(L, narg); | ||
293 | return report(L, status); | 294 | return report(L, status); |
294 | } | 295 | } |
295 | 296 | ||
@@ -386,7 +387,8 @@ static int dobytecode(lua_State *L, char **argv) | |||
386 | } | 387 | } |
387 | for (argv++; *argv != NULL; narg++, argv++) | 388 | for (argv++; *argv != NULL; narg++, argv++) |
388 | lua_pushstring(L, *argv); | 389 | lua_pushstring(L, *argv); |
389 | return report(L, lua_pcall(L, narg, 0, 0)); | 390 | report(L, lua_pcall(L, narg, 0, 0)); |
391 | return -1; | ||
390 | } | 392 | } |
391 | 393 | ||
392 | /* check that argument has no extra characters at the end */ | 394 | /* check that argument has no extra characters at the end */ |
@@ -407,7 +409,7 @@ static int collectargs(char **argv, int *flags) | |||
407 | switch (argv[i][1]) { /* Check option. */ | 409 | switch (argv[i][1]) { /* Check option. */ |
408 | case '-': | 410 | case '-': |
409 | notail(argv[i]); | 411 | notail(argv[i]); |
410 | return (argv[i+1] != NULL ? i+1 : 0); | 412 | return i+1; |
411 | case '\0': | 413 | case '\0': |
412 | return i; | 414 | return i; |
413 | case 'i': | 415 | case 'i': |
@@ -433,23 +435,23 @@ static int collectargs(char **argv, int *flags) | |||
433 | case 'b': /* LuaJIT extension */ | 435 | case 'b': /* LuaJIT extension */ |
434 | if (*flags) return -1; | 436 | if (*flags) return -1; |
435 | *flags |= FLAGS_EXEC; | 437 | *flags |= FLAGS_EXEC; |
436 | return 0; | 438 | return i+1; |
437 | case 'E': | 439 | case 'E': |
438 | *flags |= FLAGS_NOENV; | 440 | *flags |= FLAGS_NOENV; |
439 | break; | 441 | break; |
440 | default: return -1; /* invalid option */ | 442 | default: return -1; /* invalid option */ |
441 | } | 443 | } |
442 | } | 444 | } |
443 | return 0; | 445 | return i; |
444 | } | 446 | } |
445 | 447 | ||
446 | static int runargs(lua_State *L, char **argv, int n) | 448 | static int runargs(lua_State *L, char **argv, int argn) |
447 | { | 449 | { |
448 | int i; | 450 | int i; |
449 | for (i = 1; i < n; i++) { | 451 | for (i = 1; i < argn; i++) { |
450 | if (argv[i] == NULL) continue; | 452 | if (argv[i] == NULL) continue; |
451 | lua_assert(argv[i][0] == '-'); | 453 | lua_assert(argv[i][0] == '-'); |
452 | switch (argv[i][1]) { /* option */ | 454 | switch (argv[i][1]) { |
453 | case 'e': { | 455 | case 'e': { |
454 | const char *chunk = argv[i] + 2; | 456 | const char *chunk = argv[i] + 2; |
455 | if (*chunk == '\0') chunk = argv[++i]; | 457 | if (*chunk == '\0') chunk = argv[++i]; |
@@ -463,10 +465,10 @@ static int runargs(lua_State *L, char **argv, int n) | |||
463 | if (*filename == '\0') filename = argv[++i]; | 465 | if (*filename == '\0') filename = argv[++i]; |
464 | lua_assert(filename != NULL); | 466 | lua_assert(filename != NULL); |
465 | if (dolibrary(L, filename)) | 467 | if (dolibrary(L, filename)) |
466 | return 1; /* stop if file fails */ | 468 | return 1; |
467 | break; | 469 | break; |
468 | } | 470 | } |
469 | case 'j': { /* LuaJIT extension */ | 471 | case 'j': { /* LuaJIT extension. */ |
470 | const char *cmd = argv[i] + 2; | 472 | const char *cmd = argv[i] + 2; |
471 | if (*cmd == '\0') cmd = argv[++i]; | 473 | if (*cmd == '\0') cmd = argv[++i]; |
472 | lua_assert(cmd != NULL); | 474 | lua_assert(cmd != NULL); |
@@ -474,16 +476,16 @@ static int runargs(lua_State *L, char **argv, int n) | |||
474 | return 1; | 476 | return 1; |
475 | break; | 477 | break; |
476 | } | 478 | } |
477 | case 'O': /* LuaJIT extension */ | 479 | case 'O': /* LuaJIT extension. */ |
478 | if (dojitopt(L, argv[i] + 2)) | 480 | if (dojitopt(L, argv[i] + 2)) |
479 | return 1; | 481 | return 1; |
480 | break; | 482 | break; |
481 | case 'b': /* LuaJIT extension */ | 483 | case 'b': /* LuaJIT extension. */ |
482 | return dobytecode(L, argv+i); | 484 | return dobytecode(L, argv+i); |
483 | default: break; | 485 | default: break; |
484 | } | 486 | } |
485 | } | 487 | } |
486 | return 0; | 488 | return LUA_OK; |
487 | } | 489 | } |
488 | 490 | ||
489 | static int handle_luainit(lua_State *L) | 491 | static int handle_luainit(lua_State *L) |
@@ -494,7 +496,7 @@ static int handle_luainit(lua_State *L) | |||
494 | const char *init = getenv(LUA_INIT); | 496 | const char *init = getenv(LUA_INIT); |
495 | #endif | 497 | #endif |
496 | if (init == NULL) | 498 | if (init == NULL) |
497 | return 0; /* status OK */ | 499 | return LUA_OK; |
498 | else if (init[0] == '@') | 500 | else if (init[0] == '@') |
499 | return dofile(L, init+1); | 501 | return dofile(L, init+1); |
500 | else | 502 | else |
@@ -511,45 +513,57 @@ static int pmain(lua_State *L) | |||
511 | { | 513 | { |
512 | struct Smain *s = &smain; | 514 | struct Smain *s = &smain; |
513 | char **argv = s->argv; | 515 | char **argv = s->argv; |
514 | int script; | 516 | int argn; |
515 | int flags = 0; | 517 | int flags = 0; |
516 | globalL = L; | 518 | globalL = L; |
517 | if (argv[0] && argv[0][0]) progname = argv[0]; | 519 | if (argv[0] && argv[0][0]) progname = argv[0]; |
518 | LUAJIT_VERSION_SYM(); /* linker-enforced version check */ | 520 | |
519 | script = collectargs(argv, &flags); | 521 | LUAJIT_VERSION_SYM(); /* Linker-enforced version check. */ |
520 | if (script < 0) { /* invalid args? */ | 522 | |
523 | argn = collectargs(argv, &flags); | ||
524 | if (argn < 0) { /* Invalid args? */ | ||
521 | print_usage(); | 525 | print_usage(); |
522 | s->status = 1; | 526 | s->status = 1; |
523 | return 0; | 527 | return 0; |
524 | } | 528 | } |
529 | |||
525 | if ((flags & FLAGS_NOENV)) { | 530 | if ((flags & FLAGS_NOENV)) { |
526 | lua_pushboolean(L, 1); | 531 | lua_pushboolean(L, 1); |
527 | lua_setfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); | 532 | lua_setfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); |
528 | } | 533 | } |
529 | lua_gc(L, LUA_GCSTOP, 0); /* stop collector during initialization */ | 534 | |
530 | luaL_openlibs(L); /* open libraries */ | 535 | /* Stop collector during library initialization. */ |
536 | lua_gc(L, LUA_GCSTOP, 0); | ||
537 | luaL_openlibs(L); | ||
531 | lua_gc(L, LUA_GCRESTART, -1); | 538 | lua_gc(L, LUA_GCRESTART, -1); |
539 | |||
540 | createargtable(L, argv, s->argc, argn); | ||
541 | |||
532 | if (!(flags & FLAGS_NOENV)) { | 542 | if (!(flags & FLAGS_NOENV)) { |
533 | s->status = handle_luainit(L); | 543 | s->status = handle_luainit(L); |
534 | if (s->status != 0) return 0; | 544 | if (s->status != LUA_OK) return 0; |
535 | } | 545 | } |
546 | |||
536 | if ((flags & FLAGS_VERSION)) print_version(); | 547 | if ((flags & FLAGS_VERSION)) print_version(); |
537 | s->status = runargs(L, argv, (script > 0) ? script : s->argc); | 548 | |
538 | if (s->status != 0) return 0; | 549 | s->status = runargs(L, argv, argn); |
539 | if (script) { | 550 | if (s->status != LUA_OK) return 0; |
540 | s->status = handle_script(L, argv, script); | 551 | |
541 | if (s->status != 0) return 0; | 552 | if (s->argc > argn) { |
553 | s->status = handle_script(L, argv + argn); | ||
554 | if (s->status != LUA_OK) return 0; | ||
542 | } | 555 | } |
556 | |||
543 | if ((flags & FLAGS_INTERACTIVE)) { | 557 | if ((flags & FLAGS_INTERACTIVE)) { |
544 | print_jit_status(L); | 558 | print_jit_status(L); |
545 | dotty(L); | 559 | dotty(L); |
546 | } else if (script == 0 && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) { | 560 | } else if (s->argc == argn && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) { |
547 | if (lua_stdin_is_tty()) { | 561 | if (lua_stdin_is_tty()) { |
548 | print_version(); | 562 | print_version(); |
549 | print_jit_status(L); | 563 | print_jit_status(L); |
550 | dotty(L); | 564 | dotty(L); |
551 | } else { | 565 | } else { |
552 | dofile(L, NULL); /* executes stdin as a file */ | 566 | dofile(L, NULL); /* Executes stdin as a file. */ |
553 | } | 567 | } |
554 | } | 568 | } |
555 | return 0; | 569 | return 0; |
@@ -558,7 +572,7 @@ static int pmain(lua_State *L) | |||
558 | int main(int argc, char **argv) | 572 | int main(int argc, char **argv) |
559 | { | 573 | { |
560 | int status; | 574 | int status; |
561 | lua_State *L = lua_open(); /* create state */ | 575 | lua_State *L = lua_open(); |
562 | if (L == NULL) { | 576 | if (L == NULL) { |
563 | l_message(argv[0], "cannot create state: not enough memory"); | 577 | l_message(argv[0], "cannot create state: not enough memory"); |
564 | return EXIT_FAILURE; | 578 | return EXIT_FAILURE; |
@@ -568,6 +582,6 @@ int main(int argc, char **argv) | |||
568 | status = lua_cpcall(L, pmain, NULL); | 582 | status = lua_cpcall(L, pmain, NULL); |
569 | report(L, status); | 583 | report(L, status); |
570 | lua_close(L); | 584 | lua_close(L); |
571 | return (status || smain.status) ? EXIT_FAILURE : EXIT_SUCCESS; | 585 | return (status || smain.status > 0) ? EXIT_FAILURE : EXIT_SUCCESS; |
572 | } | 586 | } |
573 | 587 | ||
diff --git a/src/luajit.h b/src/luajit.h index 8b666f63..31f1eb1f 100644 --- a/src/luajit.h +++ b/src/luajit.h | |||
@@ -30,9 +30,9 @@ | |||
30 | 30 | ||
31 | #include "lua.h" | 31 | #include "lua.h" |
32 | 32 | ||
33 | #define LUAJIT_VERSION "LuaJIT 2.0.5" | 33 | #define LUAJIT_VERSION "LuaJIT 2.1.0-beta3" |
34 | #define LUAJIT_VERSION_NUM 20005 /* Version 2.0.5 = 02.00.05. */ | 34 | #define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */ |
35 | #define LUAJIT_VERSION_SYM luaJIT_version_2_0_5 | 35 | #define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_beta3 |
36 | #define LUAJIT_COPYRIGHT "Copyright (C) 2005-2022 Mike Pall" | 36 | #define LUAJIT_COPYRIGHT "Copyright (C) 2005-2022 Mike Pall" |
37 | #define LUAJIT_URL "https://luajit.org/" | 37 | #define LUAJIT_URL "https://luajit.org/" |
38 | 38 | ||
@@ -64,6 +64,15 @@ enum { | |||
64 | /* Control the JIT engine. */ | 64 | /* Control the JIT engine. */ |
65 | LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode); | 65 | LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode); |
66 | 66 | ||
67 | /* Low-overhead profiling API. */ | ||
68 | typedef void (*luaJIT_profile_callback)(void *data, lua_State *L, | ||
69 | int samples, int vmstate); | ||
70 | LUA_API void luaJIT_profile_start(lua_State *L, const char *mode, | ||
71 | luaJIT_profile_callback cb, void *data); | ||
72 | LUA_API void luaJIT_profile_stop(lua_State *L); | ||
73 | LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt, | ||
74 | int depth, size_t *len); | ||
75 | |||
67 | /* Enforce (dynamic) linker error for version mismatches. Call from main. */ | 76 | /* Enforce (dynamic) linker error for version mismatches. Call from main. */ |
68 | LUA_API void LUAJIT_VERSION_SYM(void); | 77 | LUA_API void LUAJIT_VERSION_SYM(void); |
69 | 78 | ||
diff --git a/src/lualib.h b/src/lualib.h index 4a2f8692..87748456 100644 --- a/src/lualib.h +++ b/src/lualib.h | |||
@@ -33,6 +33,7 @@ LUALIB_API int luaopen_debug(lua_State *L); | |||
33 | LUALIB_API int luaopen_bit(lua_State *L); | 33 | LUALIB_API int luaopen_bit(lua_State *L); |
34 | LUALIB_API int luaopen_jit(lua_State *L); | 34 | LUALIB_API int luaopen_jit(lua_State *L); |
35 | LUALIB_API int luaopen_ffi(lua_State *L); | 35 | LUALIB_API int luaopen_ffi(lua_State *L); |
36 | LUALIB_API int luaopen_string_buffer(lua_State *L); | ||
36 | 37 | ||
37 | LUALIB_API void luaL_openlibs(lua_State *L); | 38 | LUALIB_API void luaL_openlibs(lua_State *L); |
38 | 39 | ||
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat index 045965f8..d323d8d4 100644 --- a/src/msvcbuild.bat +++ b/src/msvcbuild.bat | |||
@@ -5,6 +5,7 @@ | |||
5 | @rem Then cd to this directory and run this script. Use the following | 5 | @rem Then cd to this directory and run this script. Use the following |
6 | @rem options (in order), if needed. The default is a dynamic release build. | 6 | @rem options (in order), if needed. The default is a dynamic release build. |
7 | @rem | 7 | @rem |
8 | @rem nogc64 disable LJ_GC64 mode for x64 | ||
8 | @rem debug emit debug symbols | 9 | @rem debug emit debug symbols |
9 | @rem amalg amalgamated build | 10 | @rem amalg amalgamated build |
10 | @rem static static linkage | 11 | @rem static static linkage |
@@ -20,10 +21,11 @@ | |||
20 | @set LJLIB=lib /nologo /nodefaultlib | 21 | @set LJLIB=lib /nologo /nodefaultlib |
21 | @set DASMDIR=..\dynasm | 22 | @set DASMDIR=..\dynasm |
22 | @set DASM=%DASMDIR%\dynasm.lua | 23 | @set DASM=%DASMDIR%\dynasm.lua |
24 | @set DASC=vm_x64.dasc | ||
23 | @set LJDLLNAME=lua51.dll | 25 | @set LJDLLNAME=lua51.dll |
24 | @set LJLIBNAME=lua51.lib | 26 | @set LJLIBNAME=lua51.lib |
25 | @set BUILDTYPE=release | 27 | @set BUILDTYPE=release |
26 | @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c | 28 | @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c |
27 | 29 | ||
28 | %LJCOMPILE% host\minilua.c | 30 | %LJCOMPILE% host\minilua.c |
29 | @if errorlevel 1 goto :BAD | 31 | @if errorlevel 1 goto :BAD |
@@ -36,10 +38,17 @@ if exist minilua.exe.manifest^ | |||
36 | @set LJARCH=x64 | 38 | @set LJARCH=x64 |
37 | @minilua | 39 | @minilua |
38 | @if errorlevel 8 goto :X64 | 40 | @if errorlevel 8 goto :X64 |
41 | @set DASC=vm_x86.dasc | ||
39 | @set DASMFLAGS=-D WIN -D JIT -D FFI | 42 | @set DASMFLAGS=-D WIN -D JIT -D FFI |
40 | @set LJARCH=x86 | 43 | @set LJARCH=x86 |
44 | @set LJCOMPILE=%LJCOMPILE% /arch:SSE2 | ||
41 | :X64 | 45 | :X64 |
42 | minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc | 46 | @if "%1" neq "nogc64" goto :GC64 |
47 | @shift | ||
48 | @set DASC=vm_x86.dasc | ||
49 | @set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64 | ||
50 | :GC64 | ||
51 | minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% | ||
43 | @if errorlevel 1 goto :BAD | 52 | @if errorlevel 1 goto :BAD |
44 | 53 | ||
45 | %LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c | 54 | %LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c |
@@ -68,6 +77,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c | |||
68 | @shift | 77 | @shift |
69 | @set BUILDTYPE=debug | 78 | @set BUILDTYPE=debug |
70 | @set LJCOMPILE=%LJCOMPILE% /Zi %DEBUGCFLAGS% | 79 | @set LJCOMPILE=%LJCOMPILE% /Zi %DEBUGCFLAGS% |
80 | @set LJLINK=%LJLINK% /opt:ref /opt:icf /incremental:no | ||
71 | :NODEBUG | 81 | :NODEBUG |
72 | @set LJLINK=%LJLINK% /%BUILDTYPE% | 82 | @set LJLINK=%LJLINK% /%BUILDTYPE% |
73 | @if "%1"=="amalg" goto :AMALGDLL | 83 | @if "%1"=="amalg" goto :AMALGDLL |
diff --git a/src/ps4build.bat b/src/ps4build.bat index 337a44fa..fdd09d81 100644 --- a/src/ps4build.bat +++ b/src/ps4build.bat | |||
@@ -2,7 +2,19 @@ | |||
2 | @rem Donated to the public domain. | 2 | @rem Donated to the public domain. |
3 | @rem | 3 | @rem |
4 | @rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler) | 4 | @rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler) |
5 | @rem or "VS2015 x64 Native Tools Command Prompt". | ||
6 | @rem | ||
5 | @rem Then cd to this directory and run this script. | 7 | @rem Then cd to this directory and run this script. |
8 | @rem | ||
9 | @rem Recommended invocation: | ||
10 | @rem | ||
11 | @rem ps4build release build, amalgamated, 64-bit GC | ||
12 | @rem ps4build debug debug build, amalgamated, 64-bit GC | ||
13 | @rem | ||
14 | @rem Additional command-line options (not generally recommended): | ||
15 | @rem | ||
16 | @rem gc32 (before debug) 32-bit GC | ||
17 | @rem noamalg (after debug) non-amalgamated build | ||
6 | 18 | ||
7 | @if not defined INCLUDE goto :FAIL | 19 | @if not defined INCLUDE goto :FAIL |
8 | @if not defined SCE_ORBIS_SDK_DIR goto :FAIL | 20 | @if not defined SCE_ORBIS_SDK_DIR goto :FAIL |
@@ -14,7 +26,15 @@ | |||
14 | @set LJMT=mt /nologo | 26 | @set LJMT=mt /nologo |
15 | @set DASMDIR=..\dynasm | 27 | @set DASMDIR=..\dynasm |
16 | @set DASM=%DASMDIR%\dynasm.lua | 28 | @set DASM=%DASMDIR%\dynasm.lua |
17 | @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c | 29 | @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c |
30 | @set GC64= | ||
31 | @set DASC=vm_x64.dasc | ||
32 | |||
33 | @if "%1" neq "gc32" goto :NOGC32 | ||
34 | @shift | ||
35 | @set GC64=-DLUAJIT_DISABLE_GC64 | ||
36 | @set DASC=vm_x86.dasc | ||
37 | :NOGC32 | ||
18 | 38 | ||
19 | %LJCOMPILE% host\minilua.c | 39 | %LJCOMPILE% host\minilua.c |
20 | @if errorlevel 1 goto :BAD | 40 | @if errorlevel 1 goto :BAD |
@@ -28,10 +48,10 @@ if exist minilua.exe.manifest^ | |||
28 | @if not errorlevel 8 goto :FAIL | 48 | @if not errorlevel 8 goto :FAIL |
29 | 49 | ||
30 | @set DASMFLAGS=-D P64 -D NO_UNWIND | 50 | @set DASMFLAGS=-D P64 -D NO_UNWIND |
31 | minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc | 51 | minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% |
32 | @if errorlevel 1 goto :BAD | 52 | @if errorlevel 1 goto :BAD |
33 | 53 | ||
34 | %LJCOMPILE% /I "." /I %DASMDIR% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c | 54 | %LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c |
35 | @if errorlevel 1 goto :BAD | 55 | @if errorlevel 1 goto :BAD |
36 | %LJLINK% /out:buildvm.exe buildvm*.obj | 56 | %LJLINK% /out:buildvm.exe buildvm*.obj |
37 | @if errorlevel 1 goto :BAD | 57 | @if errorlevel 1 goto :BAD |
@@ -54,7 +74,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c | |||
54 | @if errorlevel 1 goto :BAD | 74 | @if errorlevel 1 goto :BAD |
55 | 75 | ||
56 | @rem ---- Cross compiler ---- | 76 | @rem ---- Cross compiler ---- |
57 | @set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI | 77 | @set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI %GC64% |
58 | @set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus | 78 | @set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus |
59 | @set INCLUDE="" | 79 | @set INCLUDE="" |
60 | 80 | ||
@@ -63,14 +83,14 @@ orbis-as -o lj_vm.o lj_vm.s | |||
63 | @if "%1" neq "debug" goto :NODEBUG | 83 | @if "%1" neq "debug" goto :NODEBUG |
64 | @shift | 84 | @shift |
65 | @set LJCOMPILE=%LJCOMPILE% -g -O0 | 85 | @set LJCOMPILE=%LJCOMPILE% -g -O0 |
66 | @set TARGETLIB=libluajitD.a | 86 | @set TARGETLIB=libluajitD_ps4.a |
67 | goto :BUILD | 87 | goto :BUILD |
68 | :NODEBUG | 88 | :NODEBUG |
69 | @set LJCOMPILE=%LJCOMPILE% -O2 | 89 | @set LJCOMPILE=%LJCOMPILE% -O2 |
70 | @set TARGETLIB=libluajit.a | 90 | @set TARGETLIB=libluajit_ps4.a |
71 | :BUILD | 91 | :BUILD |
72 | del %TARGETLIB% | 92 | del %TARGETLIB% |
73 | @if "%1"=="amalg" goto :AMALG | 93 | @if "%1" neq "noamalg" goto :AMALG |
74 | for %%f in (lj_*.c lib_*.c) do ( | 94 | for %%f in (lj_*.c lib_*.c) do ( |
75 | %LJCOMPILE% %%f | 95 | %LJCOMPILE% %%f |
76 | @if errorlevel 1 goto :BAD | 96 | @if errorlevel 1 goto :BAD |
diff --git a/src/psvitabuild.bat b/src/psvitabuild.bat index 3991dc65..2980e157 100644 --- a/src/psvitabuild.bat +++ b/src/psvitabuild.bat | |||
@@ -14,7 +14,7 @@ | |||
14 | @set LJMT=mt /nologo | 14 | @set LJMT=mt /nologo |
15 | @set DASMDIR=..\dynasm | 15 | @set DASMDIR=..\dynasm |
16 | @set DASM=%DASMDIR%\dynasm.lua | 16 | @set DASM=%DASMDIR%\dynasm.lua |
17 | @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c | 17 | @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c |
18 | 18 | ||
19 | %LJCOMPILE% host\minilua.c | 19 | %LJCOMPILE% host\minilua.c |
20 | @if errorlevel 1 goto :BAD | 20 | @if errorlevel 1 goto :BAD |
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 4a13c68b..770c1602 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc | |||
@@ -99,6 +99,7 @@ | |||
99 | |.type NODE, Node | 99 | |.type NODE, Node |
100 | |.type NARGS8, int | 100 | |.type NARGS8, int |
101 | |.type TRACE, GCtrace | 101 | |.type TRACE, GCtrace |
102 | |.type SBUF, SBuf | ||
102 | | | 103 | | |
103 | |//----------------------------------------------------------------------- | 104 | |//----------------------------------------------------------------------- |
104 | | | 105 | | |
@@ -372,6 +373,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
372 | | st_vmstate CARG2 | 373 | | st_vmstate CARG2 |
373 | | b ->vm_returnc | 374 | | b ->vm_returnc |
374 | | | 375 | | |
376 | |->vm_unwind_ext: // Complete external unwind. | ||
377 | #if !LJ_NO_UNWIND | ||
378 | | push {r0, r1, r2, lr} | ||
379 | | bl extern _Unwind_Complete | ||
380 | | ldr r0, [sp] | ||
381 | | bl extern _Unwind_DeleteException | ||
382 | | pop {r0, r1, r2, lr} | ||
383 | | mov r0, r1 | ||
384 | | bx r2 | ||
385 | #endif | ||
386 | | | ||
375 | |//----------------------------------------------------------------------- | 387 | |//----------------------------------------------------------------------- |
376 | |//-- Grow stack for calls ----------------------------------------------- | 388 | |//-- Grow stack for calls ----------------------------------------------- |
377 | |//----------------------------------------------------------------------- | 389 | |//----------------------------------------------------------------------- |
@@ -418,13 +430,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
418 | | add CARG2, sp, #CFRAME_RESUME | 430 | | add CARG2, sp, #CFRAME_RESUME |
419 | | ldrb CARG1, L->status | 431 | | ldrb CARG1, L->status |
420 | | str CARG3, SAVE_ERRF | 432 | | str CARG3, SAVE_ERRF |
421 | | str CARG2, L->cframe | 433 | | str L, SAVE_PC // Any value outside of bytecode is ok. |
422 | | str CARG3, SAVE_CFRAME | 434 | | str CARG3, SAVE_CFRAME |
423 | | cmp CARG1, #0 | 435 | | cmp CARG1, #0 |
424 | | str L, SAVE_PC // Any value outside of bytecode is ok. | 436 | | str CARG2, L->cframe |
425 | | beq >3 | 437 | | beq >3 |
426 | | | 438 | | |
427 | | // Resume after yield (like a return). | 439 | | // Resume after yield (like a return). |
440 | | str L, [DISPATCH, #DISPATCH_GL(cur_L)] | ||
428 | | mov RA, BASE | 441 | | mov RA, BASE |
429 | | ldr BASE, L->base | 442 | | ldr BASE, L->base |
430 | | ldr CARG1, L->top | 443 | | ldr CARG1, L->top |
@@ -458,14 +471,15 @@ static void build_subroutines(BuildCtx *ctx) | |||
458 | | str CARG3, SAVE_NRES | 471 | | str CARG3, SAVE_NRES |
459 | | mov L, CARG1 | 472 | | mov L, CARG1 |
460 | | str CARG1, SAVE_L | 473 | | str CARG1, SAVE_L |
461 | | mov BASE, CARG2 | ||
462 | | str sp, L->cframe // Add our C frame to cframe chain. | ||
463 | | ldr DISPATCH, L->glref // Setup pointer to dispatch table. | 474 | | ldr DISPATCH, L->glref // Setup pointer to dispatch table. |
475 | | mov BASE, CARG2 | ||
464 | | str CARG1, SAVE_PC // Any value outside of bytecode is ok. | 476 | | str CARG1, SAVE_PC // Any value outside of bytecode is ok. |
465 | | str RC, SAVE_CFRAME | 477 | | str RC, SAVE_CFRAME |
466 | | add DISPATCH, DISPATCH, #GG_G2DISP | 478 | | add DISPATCH, DISPATCH, #GG_G2DISP |
479 | | str sp, L->cframe // Add our C frame to cframe chain. | ||
467 | | | 480 | | |
468 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). | 481 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). |
482 | | str L, [DISPATCH, #DISPATCH_GL(cur_L)] | ||
469 | | ldr RB, L->base // RB = old base (for vmeta_call). | 483 | | ldr RB, L->base // RB = old base (for vmeta_call). |
470 | | ldr CARG1, L->top | 484 | | ldr CARG1, L->top |
471 | | mov MASKR8, #255 | 485 | | mov MASKR8, #255 |
@@ -491,20 +505,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
491 | | mov L, CARG1 | 505 | | mov L, CARG1 |
492 | | ldr RA, L:CARG1->stack | 506 | | ldr RA, L:CARG1->stack |
493 | | str CARG1, SAVE_L | 507 | | str CARG1, SAVE_L |
508 | | ldr DISPATCH, L->glref // Setup pointer to dispatch table. | ||
494 | | ldr RB, L->top | 509 | | ldr RB, L->top |
495 | | str CARG1, SAVE_PC // Any value outside of bytecode is ok. | 510 | | str CARG1, SAVE_PC // Any value outside of bytecode is ok. |
496 | | ldr RC, L->cframe | 511 | | ldr RC, L->cframe |
512 | | add DISPATCH, DISPATCH, #GG_G2DISP | ||
497 | | sub RA, RA, RB // Compute -savestack(L, L->top). | 513 | | sub RA, RA, RB // Compute -savestack(L, L->top). |
498 | | str sp, L->cframe // Add our C frame to cframe chain. | ||
499 | | mov RB, #0 | 514 | | mov RB, #0 |
500 | | str RA, SAVE_NRES // Neg. delta means cframe w/o frame. | 515 | | str RA, SAVE_NRES // Neg. delta means cframe w/o frame. |
501 | | str RB, SAVE_ERRF // No error function. | 516 | | str RB, SAVE_ERRF // No error function. |
502 | | str RC, SAVE_CFRAME | 517 | | str RC, SAVE_CFRAME |
518 | | str sp, L->cframe // Add our C frame to cframe chain. | ||
519 | | str L, [DISPATCH, #DISPATCH_GL(cur_L)] | ||
503 | | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud) | 520 | | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud) |
504 | | ldr DISPATCH, L->glref // Setup pointer to dispatch table. | ||
505 | | movs BASE, CRET1 | 521 | | movs BASE, CRET1 |
506 | | mov PC, #FRAME_CP | 522 | | mov PC, #FRAME_CP |
507 | | add DISPATCH, DISPATCH, #GG_G2DISP | ||
508 | | bne <3 // Else continue with the call. | 523 | | bne <3 // Else continue with the call. |
509 | | b ->vm_leave_cp // No base? Just remove C frame. | 524 | | b ->vm_leave_cp // No base? Just remove C frame. |
510 | | | 525 | | |
@@ -614,6 +629,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
614 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. | 629 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. |
615 | | b ->vm_call_dispatch_f | 630 | | b ->vm_call_dispatch_f |
616 | | | 631 | | |
632 | |->vmeta_tgetr: | ||
633 | | .IOS mov RC, BASE | ||
634 | | bl extern lj_tab_getinth // (GCtab *t, int32_t key) | ||
635 | | // Returns cTValue * or NULL. | ||
636 | | .IOS mov BASE, RC | ||
637 | | cmp CRET1, #0 | ||
638 | | ldrdne CARG12, [CRET1] | ||
639 | | mvneq CARG2, #~LJ_TNIL | ||
640 | | b ->BC_TGETR_Z | ||
641 | | | ||
617 | |//----------------------------------------------------------------------- | 642 | |//----------------------------------------------------------------------- |
618 | | | 643 | | |
619 | |->vmeta_tsets1: | 644 | |->vmeta_tsets1: |
@@ -671,6 +696,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
671 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. | 696 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. |
672 | | b ->vm_call_dispatch_f | 697 | | b ->vm_call_dispatch_f |
673 | | | 698 | | |
699 | |->vmeta_tsetr: | ||
700 | | str BASE, L->base | ||
701 | | .IOS mov RC, BASE | ||
702 | | mov CARG1, L | ||
703 | | str PC, SAVE_PC | ||
704 | | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | ||
705 | | // Returns TValue *. | ||
706 | | .IOS mov BASE, RC | ||
707 | | b ->BC_TSETR_Z | ||
708 | | | ||
674 | |//-- Comparison metamethods --------------------------------------------- | 709 | |//-- Comparison metamethods --------------------------------------------- |
675 | | | 710 | | |
676 | |->vmeta_comp: | 711 | |->vmeta_comp: |
@@ -735,6 +770,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
735 | | b <3 | 770 | | b <3 |
736 | |.endif | 771 | |.endif |
737 | | | 772 | | |
773 | |->vmeta_istype: | ||
774 | | sub PC, PC, #4 | ||
775 | | str BASE, L->base | ||
776 | | mov CARG1, L | ||
777 | | lsr CARG2, RA, #3 | ||
778 | | mov CARG3, RC | ||
779 | | str PC, SAVE_PC | ||
780 | | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) | ||
781 | | .IOS ldr BASE, L->base | ||
782 | | b ->cont_nop | ||
783 | | | ||
738 | |//-- Arithmetic metamethods --------------------------------------------- | 784 | |//-- Arithmetic metamethods --------------------------------------------- |
739 | | | 785 | | |
740 | |->vmeta_arith_vn: | 786 | |->vmeta_arith_vn: |
@@ -966,9 +1012,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
966 | | cmp TAB:RB, #0 | 1012 | | cmp TAB:RB, #0 |
967 | | beq ->fff_restv | 1013 | | beq ->fff_restv |
968 | | ldr CARG3, TAB:RB->hmask | 1014 | | ldr CARG3, TAB:RB->hmask |
969 | | ldr CARG4, STR:RC->hash | 1015 | | ldr CARG4, STR:RC->sid |
970 | | ldr NODE:INS, TAB:RB->node | 1016 | | ldr NODE:INS, TAB:RB->node |
971 | | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask | 1017 | | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask |
972 | | add CARG3, CARG3, CARG3, lsl #1 | 1018 | | add CARG3, CARG3, CARG3, lsl #1 |
973 | | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 | 1019 | | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 |
974 | |3: // Rearranged logic, because we expect _not_ to find the key. | 1020 | |3: // Rearranged logic, because we expect _not_ to find the key. |
@@ -1052,7 +1098,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1052 | | ffgccheck | 1098 | | ffgccheck |
1053 | | mov CARG1, L | 1099 | | mov CARG1, L |
1054 | | mov CARG2, BASE | 1100 | | mov CARG2, BASE |
1055 | | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) | 1101 | | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) |
1056 | | // Returns GCstr *. | 1102 | | // Returns GCstr *. |
1057 | | ldr BASE, L->base | 1103 | | ldr BASE, L->base |
1058 | | mvn CARG2, #~LJ_TSTR | 1104 | | mvn CARG2, #~LJ_TSTR |
@@ -1065,24 +1111,18 @@ static void build_subroutines(BuildCtx *ctx) | |||
1065 | | checktab CARG2, ->fff_fallback | 1111 | | checktab CARG2, ->fff_fallback |
1066 | | strd CARG34, [BASE, NARGS8:RC] // Set missing 2nd arg to nil. | 1112 | | strd CARG34, [BASE, NARGS8:RC] // Set missing 2nd arg to nil. |
1067 | | ldr PC, [BASE, FRAME_PC] | 1113 | | ldr PC, [BASE, FRAME_PC] |
1068 | | mov CARG2, CARG1 | 1114 | | add CARG2, BASE, #8 |
1069 | | str BASE, L->base // Add frame since C call can throw. | 1115 | | sub CARG3, BASE, #8 |
1070 | | mov CARG1, L | 1116 | | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) |
1071 | | str BASE, L->top // Dummy frame length is ok. | 1117 | | // Returns 1=found, 0=end, -1=error. |
1072 | | add CARG3, BASE, #8 | ||
1073 | | str PC, SAVE_PC | ||
1074 | | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) | ||
1075 | | // Returns 0 at end of traversal. | ||
1076 | | .IOS ldr BASE, L->base | 1118 | | .IOS ldr BASE, L->base |
1077 | | cmp CRET1, #0 | 1119 | | cmp CRET1, #0 |
1078 | | mvneq CRET2, #~LJ_TNIL | 1120 | | mov RC, #(2+1)*8 |
1079 | | beq ->fff_restv // End of traversal: return nil. | 1121 | | bgt ->fff_res // Found key/value. |
1080 | | ldrd CARG12, [BASE, #8] // Copy key and value to results. | 1122 | | bmi ->fff_fallback // Invalid key. |
1081 | | ldrd CARG34, [BASE, #16] | 1123 | | // End of traversal: return nil. |
1082 | | mov RC, #(2+1)*8 | 1124 | | mvn CRET2, #~LJ_TNIL |
1083 | | strd CARG12, [BASE, #-8] | 1125 | | b ->fff_restv |
1084 | | strd CARG34, [BASE] | ||
1085 | | b ->fff_res | ||
1086 | | | 1126 | | |
1087 | |.ffunc_1 pairs | 1127 | |.ffunc_1 pairs |
1088 | | checktab CARG2, ->fff_fallback | 1128 | | checktab CARG2, ->fff_fallback |
@@ -1230,9 +1270,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
1230 | | ldr CARG3, L:RA->base | 1270 | | ldr CARG3, L:RA->base |
1231 | | mv_vmstate CARG2, INTERP | 1271 | | mv_vmstate CARG2, INTERP |
1232 | | ldr CARG4, L:RA->top | 1272 | | ldr CARG4, L:RA->top |
1233 | | st_vmstate CARG2 | ||
1234 | | cmp CRET1, #LUA_YIELD | 1273 | | cmp CRET1, #LUA_YIELD |
1235 | | ldr BASE, L->base | 1274 | | ldr BASE, L->base |
1275 | | str L, [DISPATCH, #DISPATCH_GL(cur_L)] | ||
1276 | | st_vmstate CARG2 | ||
1236 | | bhi >8 | 1277 | | bhi >8 |
1237 | | subs RC, CARG4, CARG3 | 1278 | | subs RC, CARG4, CARG3 |
1238 | | ldr CARG1, L->maxstack | 1279 | | ldr CARG1, L->maxstack |
@@ -1500,19 +1541,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1500 | | math_extern2 atan2 | 1541 | | math_extern2 atan2 |
1501 | | math_extern2 fmod | 1542 | | math_extern2 fmod |
1502 | | | 1543 | | |
1503 | |->ff_math_deg: | ||
1504 | |.if FPU | ||
1505 | | .ffunc_d math_rad | ||
1506 | | vldr d1, CFUNC:CARG3->upvalue[0] | ||
1507 | | vmul.f64 d0, d0, d1 | ||
1508 | | b ->fff_resd | ||
1509 | |.else | ||
1510 | | .ffunc_n math_rad | ||
1511 | | ldrd CARG34, CFUNC:CARG3->upvalue[0] | ||
1512 | | bl extern __aeabi_dmul | ||
1513 | | b ->fff_restv | ||
1514 | |.endif | ||
1515 | | | ||
1516 | |.if HFABI | 1544 | |.if HFABI |
1517 | | .ffunc math_ldexp | 1545 | | .ffunc math_ldexp |
1518 | | ldr CARG4, [BASE, #4] | 1546 | | ldr CARG4, [BASE, #4] |
@@ -1682,17 +1710,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
1682 | |.endif | 1710 | |.endif |
1683 | |.endmacro | 1711 | |.endmacro |
1684 | | | 1712 | | |
1685 | | math_minmax math_min, gt, hi | 1713 | | math_minmax math_min, gt, pl |
1686 | | math_minmax math_max, lt, lo | 1714 | | math_minmax math_max, lt, le |
1687 | | | 1715 | | |
1688 | |//-- String library ----------------------------------------------------- | 1716 | |//-- String library ----------------------------------------------------- |
1689 | | | 1717 | | |
1690 | |.ffunc_1 string_len | ||
1691 | | checkstr CARG2, ->fff_fallback | ||
1692 | | ldr CARG1, STR:CARG1->len | ||
1693 | | mvn CARG2, #~LJ_TISNUM | ||
1694 | | b ->fff_restv | ||
1695 | | | ||
1696 | |.ffunc string_byte // Only handle the 1-arg case here. | 1718 | |.ffunc string_byte // Only handle the 1-arg case here. |
1697 | | ldrd CARG12, [BASE] | 1719 | | ldrd CARG12, [BASE] |
1698 | | ldr PC, [BASE, FRAME_PC] | 1720 | | ldr PC, [BASE, FRAME_PC] |
@@ -1725,6 +1747,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1725 | | mov CARG1, L | 1747 | | mov CARG1, L |
1726 | | str PC, SAVE_PC | 1748 | | str PC, SAVE_PC |
1727 | | bl extern lj_str_new // (lua_State *L, char *str, size_t l) | 1749 | | bl extern lj_str_new // (lua_State *L, char *str, size_t l) |
1750 | |->fff_resstr: | ||
1728 | | // Returns GCstr *. | 1751 | | // Returns GCstr *. |
1729 | | ldr BASE, L->base | 1752 | | ldr BASE, L->base |
1730 | | mvn CARG2, #~LJ_TSTR | 1753 | | mvn CARG2, #~LJ_TSTR |
@@ -1768,91 +1791,28 @@ static void build_subroutines(BuildCtx *ctx) | |||
1768 | | mvn CARG2, #~LJ_TSTR | 1791 | | mvn CARG2, #~LJ_TSTR |
1769 | | b ->fff_restv | 1792 | | b ->fff_restv |
1770 | | | 1793 | | |
1771 | |.ffunc string_rep // Only handle the 1-char case inline. | 1794 | |.macro ffstring_op, name |
1772 | | ffgccheck | 1795 | | .ffunc string_ .. name |
1773 | | ldrd CARG12, [BASE] | ||
1774 | | ldrd CARG34, [BASE, #8] | ||
1775 | | cmp NARGS8:RC, #16 | ||
1776 | | bne ->fff_fallback // Exactly 2 arguments | ||
1777 | | checktp CARG2, LJ_TSTR | ||
1778 | | checktpeq CARG4, LJ_TISNUM | ||
1779 | | bne ->fff_fallback | ||
1780 | | subs CARG4, CARG3, #1 | ||
1781 | | ldr CARG2, STR:CARG1->len | ||
1782 | | blt ->fff_emptystr // Count <= 0? | ||
1783 | | cmp CARG2, #1 | ||
1784 | | blo ->fff_emptystr // Zero-length string? | ||
1785 | | bne ->fff_fallback // Fallback for > 1-char strings. | ||
1786 | | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)] | ||
1787 | | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)] | ||
1788 | | ldr CARG1, STR:CARG1[1] | ||
1789 | | cmp RB, CARG3 | ||
1790 | | blo ->fff_fallback | ||
1791 | |1: // Fill buffer with char. | ||
1792 | | strb CARG1, [CARG2, CARG4] | ||
1793 | | subs CARG4, CARG4, #1 | ||
1794 | | bge <1 | ||
1795 | | b ->fff_newstr | ||
1796 | | | ||
1797 | |.ffunc string_reverse | ||
1798 | | ffgccheck | ||
1799 | | ldrd CARG12, [BASE] | ||
1800 | | cmp NARGS8:RC, #8 | ||
1801 | | blo ->fff_fallback | ||
1802 | | checkstr CARG2, ->fff_fallback | ||
1803 | | ldr CARG3, STR:CARG1->len | ||
1804 | | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)] | ||
1805 | | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)] | ||
1806 | | mov CARG4, CARG3 | ||
1807 | | add CARG1, STR:CARG1, #sizeof(GCstr) | ||
1808 | | cmp RB, CARG3 | ||
1809 | | blo ->fff_fallback | ||
1810 | |1: // Reverse string copy. | ||
1811 | | ldrb RB, [CARG1], #1 | ||
1812 | | subs CARG4, CARG4, #1 | ||
1813 | | blt ->fff_newstr | ||
1814 | | strb RB, [CARG2, CARG4] | ||
1815 | | b <1 | ||
1816 | | | ||
1817 | |.macro ffstring_case, name, lo | ||
1818 | | .ffunc name | ||
1819 | | ffgccheck | 1796 | | ffgccheck |
1820 | | ldrd CARG12, [BASE] | 1797 | | ldr CARG3, [BASE, #4] |
1821 | | cmp NARGS8:RC, #8 | 1798 | | cmp NARGS8:RC, #8 |
1799 | | ldr STR:CARG2, [BASE] | ||
1822 | | blo ->fff_fallback | 1800 | | blo ->fff_fallback |
1823 | | checkstr CARG2, ->fff_fallback | 1801 | | sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf) |
1824 | | ldr CARG3, STR:CARG1->len | 1802 | | checkstr CARG3, ->fff_fallback |
1825 | | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)] | 1803 | | ldr CARG4, SBUF:CARG1->b |
1826 | | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)] | 1804 | | str BASE, L->base |
1827 | | mov CARG4, #0 | 1805 | | str PC, SAVE_PC |
1828 | | add CARG1, STR:CARG1, #sizeof(GCstr) | 1806 | | str L, SBUF:CARG1->L |
1829 | | cmp RB, CARG3 | 1807 | | str CARG4, SBUF:CARG1->w |
1830 | | blo ->fff_fallback | 1808 | | bl extern lj_buf_putstr_ .. name |
1831 | |1: // ASCII case conversion. | 1809 | | bl extern lj_buf_tostr |
1832 | | ldrb RB, [CARG1, CARG4] | 1810 | | b ->fff_resstr |
1833 | | cmp CARG4, CARG3 | ||
1834 | | bhs ->fff_newstr | ||
1835 | | sub RC, RB, #lo | ||
1836 | | cmp RC, #26 | ||
1837 | | eorlo RB, RB, #0x20 | ||
1838 | | strb RB, [CARG2, CARG4] | ||
1839 | | add CARG4, CARG4, #1 | ||
1840 | | b <1 | ||
1841 | |.endmacro | 1811 | |.endmacro |
1842 | | | 1812 | | |
1843 | |ffstring_case string_lower, 65 | 1813 | |ffstring_op reverse |
1844 | |ffstring_case string_upper, 97 | 1814 | |ffstring_op lower |
1845 | | | 1815 | |ffstring_op upper |
1846 | |//-- Table library ------------------------------------------------------ | ||
1847 | | | ||
1848 | |.ffunc_1 table_getn | ||
1849 | | checktab CARG2, ->fff_fallback | ||
1850 | | .IOS mov RA, BASE | ||
1851 | | bl extern lj_tab_len // (GCtab *t) | ||
1852 | | // Returns uint32_t (but less than 2^31). | ||
1853 | | .IOS mov BASE, RA | ||
1854 | | mvn CARG2, #~LJ_TISNUM | ||
1855 | | b ->fff_restv | ||
1856 | | | 1816 | | |
1857 | |//-- Bit library -------------------------------------------------------- | 1817 | |//-- Bit library -------------------------------------------------------- |
1858 | | | 1818 | | |
@@ -2127,6 +2087,66 @@ static void build_subroutines(BuildCtx *ctx) | |||
2127 | | ldr INS, [PC, #-4] | 2087 | | ldr INS, [PC, #-4] |
2128 | | bx CRET1 | 2088 | | bx CRET1 |
2129 | | | 2089 | | |
2090 | |->cont_stitch: // Trace stitching. | ||
2091 | |.if JIT | ||
2092 | | // RA = resultptr, CARG4 = meta base | ||
2093 | | ldr RB, SAVE_MULTRES | ||
2094 | | ldr INS, [PC, #-4] | ||
2095 | | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace. | ||
2096 | | subs RB, RB, #8 | ||
2097 | | decode_RA8 RC, INS // Call base. | ||
2098 | | beq >2 | ||
2099 | |1: // Move results down. | ||
2100 | | ldrd CARG12, [RA] | ||
2101 | | add RA, RA, #8 | ||
2102 | | subs RB, RB, #8 | ||
2103 | | strd CARG12, [BASE, RC] | ||
2104 | | add RC, RC, #8 | ||
2105 | | bne <1 | ||
2106 | |2: | ||
2107 | | decode_RA8 RA, INS | ||
2108 | | decode_RB8 RB, INS | ||
2109 | | add RA, RA, RB | ||
2110 | |3: | ||
2111 | | cmp RA, RC | ||
2112 | | mvn CARG2, #~LJ_TNIL | ||
2113 | | bhi >9 // More results wanted? | ||
2114 | | | ||
2115 | | ldrh RA, TRACE:CARG3->traceno | ||
2116 | | ldrh RC, TRACE:CARG3->link | ||
2117 | | cmp RC, RA | ||
2118 | | beq ->cont_nop // Blacklisted. | ||
2119 | | cmp RC, #0 | ||
2120 | | bne =>BC_JLOOP // Jump to stitched trace. | ||
2121 | | | ||
2122 | | // Stitch a new trace to the previous trace. | ||
2123 | | str RA, [DISPATCH, #DISPATCH_J(exitno)] | ||
2124 | | str L, [DISPATCH, #DISPATCH_J(L)] | ||
2125 | | str BASE, L->base | ||
2126 | | sub CARG1, DISPATCH, #-GG_DISP2J | ||
2127 | | mov CARG2, PC | ||
2128 | | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) | ||
2129 | | ldr BASE, L->base | ||
2130 | | b ->cont_nop | ||
2131 | | | ||
2132 | |9: // Fill up results with nil. | ||
2133 | | strd CARG12, [BASE, RC] | ||
2134 | | add RC, RC, #8 | ||
2135 | | b <3 | ||
2136 | |.endif | ||
2137 | | | ||
2138 | |->vm_profhook: // Dispatch target for profiler hook. | ||
2139 | #if LJ_HASPROFILE | ||
2140 | | mov CARG1, L | ||
2141 | | str BASE, L->base | ||
2142 | | mov CARG2, PC | ||
2143 | | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) | ||
2144 | | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | ||
2145 | | ldr BASE, L->base | ||
2146 | | sub PC, PC, #4 | ||
2147 | | b ->cont_nop | ||
2148 | #endif | ||
2149 | | | ||
2130 | |//----------------------------------------------------------------------- | 2150 | |//----------------------------------------------------------------------- |
2131 | |//-- Trace exit handler ------------------------------------------------- | 2151 | |//-- Trace exit handler ------------------------------------------------- |
2132 | |//----------------------------------------------------------------------- | 2152 | |//----------------------------------------------------------------------- |
@@ -2151,14 +2171,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
2151 | | add CARG1, CARG1, CARG2, asr #6 | 2171 | | add CARG1, CARG1, CARG2, asr #6 |
2152 | | ldr CARG2, [lr, #4] // Load exit stub group offset. | 2172 | | ldr CARG2, [lr, #4] // Load exit stub group offset. |
2153 | | sub CARG1, CARG1, lr | 2173 | | sub CARG1, CARG1, lr |
2154 | | ldr L, [DISPATCH, #DISPATCH_GL(jit_L)] | 2174 | | ldr L, [DISPATCH, #DISPATCH_GL(cur_L)] |
2155 | | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. | 2175 | | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. |
2156 | | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)] | 2176 | | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)] |
2157 | | str CARG1, [DISPATCH, #DISPATCH_J(exitno)] | 2177 | | str CARG1, [DISPATCH, #DISPATCH_J(exitno)] |
2158 | | mov CARG4, #0 | 2178 | | mov CARG4, #0 |
2159 | | str L, [DISPATCH, #DISPATCH_J(L)] | ||
2160 | | str BASE, L->base | 2179 | | str BASE, L->base |
2161 | | str CARG4, [DISPATCH, #DISPATCH_GL(jit_L)] | 2180 | | str L, [DISPATCH, #DISPATCH_J(L)] |
2181 | | str CARG4, [DISPATCH, #DISPATCH_GL(jit_base)] | ||
2162 | | sub CARG1, DISPATCH, #-GG_DISP2J | 2182 | | sub CARG1, DISPATCH, #-GG_DISP2J |
2163 | | mov CARG2, sp | 2183 | | mov CARG2, sp |
2164 | | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) | 2184 | | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) |
@@ -2177,13 +2197,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
2177 | | ldr L, SAVE_L | 2197 | | ldr L, SAVE_L |
2178 | |1: | 2198 | |1: |
2179 | | cmp CARG1, #0 | 2199 | | cmp CARG1, #0 |
2180 | | blt >3 // Check for error from exit. | 2200 | | blt >9 // Check for error from exit. |
2181 | | lsl RC, CARG1, #3 | 2201 | | lsl RC, CARG1, #3 |
2182 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | 2202 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] |
2183 | | str RC, SAVE_MULTRES | 2203 | | str RC, SAVE_MULTRES |
2184 | | mov CARG3, #0 | 2204 | | mov CARG3, #0 |
2205 | | str BASE, L->base | ||
2185 | | ldr CARG2, LFUNC:CARG2->field_pc | 2206 | | ldr CARG2, LFUNC:CARG2->field_pc |
2186 | | str CARG3, [DISPATCH, #DISPATCH_GL(jit_L)] | 2207 | | str CARG3, [DISPATCH, #DISPATCH_GL(jit_base)] |
2187 | | mv_vmstate CARG4, INTERP | 2208 | | mv_vmstate CARG4, INTERP |
2188 | | ldr KBASE, [CARG2, #PC2PROTO(k)] | 2209 | | ldr KBASE, [CARG2, #PC2PROTO(k)] |
2189 | | // Modified copy of ins_next which handles function header dispatch, too. | 2210 | | // Modified copy of ins_next which handles function header dispatch, too. |
@@ -2192,17 +2213,35 @@ static void build_subroutines(BuildCtx *ctx) | |||
2192 | | ldr INS, [PC], #4 | 2213 | | ldr INS, [PC], #4 |
2193 | | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. | 2214 | | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. |
2194 | | st_vmstate CARG4 | 2215 | | st_vmstate CARG4 |
2216 | | cmp OP, #BC_FUNCC+2 // Fast function? | ||
2217 | | bhs >4 | ||
2218 | |2: | ||
2195 | | cmp OP, #BC_FUNCF // Function header? | 2219 | | cmp OP, #BC_FUNCF // Function header? |
2196 | | ldr OP, [DISPATCH, OP, lsl #2] | 2220 | | ldr OP, [DISPATCH, OP, lsl #2] |
2197 | | decode_RA8 RA, INS | 2221 | | decode_RA8 RA, INS |
2198 | | lsrlo RC, INS, #16 // No: Decode operands A*8 and D. | 2222 | | lsrlo RC, INS, #16 // No: Decode operands A*8 and D. |
2199 | | subhs RC, RC, #8 | 2223 | | subhs RC, RC, #8 |
2200 | | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8 | 2224 | | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8 |
2225 | | ldrhs CARG3, [BASE, FRAME_FUNC] | ||
2201 | | bx OP | 2226 | | bx OP |
2202 | | | 2227 | | |
2203 | |3: // Rethrow error from the right C frame. | 2228 | |4: // Check frame below fast function. |
2229 | | ldr CARG1, [BASE, FRAME_PC] | ||
2230 | | ands CARG2, CARG1, #FRAME_TYPE | ||
2231 | | bne <2 // Trace stitching continuation? | ||
2232 | | // Otherwise set KBASE for Lua function below fast function. | ||
2233 | | ldr CARG3, [CARG1, #-4] | ||
2234 | | decode_RA8 CARG1, CARG3 | ||
2235 | | sub CARG2, BASE, CARG1 | ||
2236 | | ldr LFUNC:CARG3, [CARG2, #-16] | ||
2237 | | ldr CARG3, LFUNC:CARG3->field_pc | ||
2238 | | ldr KBASE, [CARG3, #PC2PROTO(k)] | ||
2239 | | b <2 | ||
2240 | | | ||
2241 | |9: // Rethrow error from the right C frame. | ||
2242 | | rsb CARG2, CARG1, #0 | ||
2204 | | mov CARG1, L | 2243 | | mov CARG1, L |
2205 | | bl extern lj_err_run // (lua_State *L) | 2244 | | bl extern lj_err_trace // (lua_State *L, int errcode) |
2206 | |.endif | 2245 | |.endif |
2207 | | | 2246 | | |
2208 | |//----------------------------------------------------------------------- | 2247 | |//----------------------------------------------------------------------- |
@@ -2385,6 +2424,64 @@ static void build_subroutines(BuildCtx *ctx) | |||
2385 | |//-- Miscellaneous functions -------------------------------------------- | 2424 | |//-- Miscellaneous functions -------------------------------------------- |
2386 | |//----------------------------------------------------------------------- | 2425 | |//----------------------------------------------------------------------- |
2387 | | | 2426 | | |
2427 | |.define NEXT_TAB, TAB:CARG1 | ||
2428 | |.define NEXT_RES, CARG1 | ||
2429 | |.define NEXT_IDX, CARG2 | ||
2430 | |.define NEXT_TMP0, CARG3 | ||
2431 | |.define NEXT_TMP1, CARG4 | ||
2432 | |.define NEXT_LIM, r12 | ||
2433 | |.define NEXT_RES_PTR, sp | ||
2434 | |.define NEXT_RES_VAL, [sp] | ||
2435 | |.define NEXT_RES_KEY_I, [sp, #8] | ||
2436 | |.define NEXT_RES_KEY_IT, [sp, #12] | ||
2437 | | | ||
2438 | |// TValue *lj_vm_next(GCtab *t, uint32_t idx) | ||
2439 | |// Next idx returned in CRET2. | ||
2440 | |->vm_next: | ||
2441 | |.if JIT | ||
2442 | | ldr NEXT_TMP0, NEXT_TAB->array | ||
2443 | | ldr NEXT_LIM, NEXT_TAB->asize | ||
2444 | | add NEXT_TMP0, NEXT_TMP0, NEXT_IDX, lsl #3 | ||
2445 | |1: // Traverse array part. | ||
2446 | | subs NEXT_TMP1, NEXT_IDX, NEXT_LIM | ||
2447 | | bhs >5 | ||
2448 | | ldr NEXT_TMP1, [NEXT_TMP0, #4] | ||
2449 | | str NEXT_IDX, NEXT_RES_KEY_I | ||
2450 | | add NEXT_TMP0, NEXT_TMP0, #8 | ||
2451 | | add NEXT_IDX, NEXT_IDX, #1 | ||
2452 | | checktp NEXT_TMP1, LJ_TNIL | ||
2453 | | beq <1 // Skip holes in array part. | ||
2454 | | ldr NEXT_TMP0, [NEXT_TMP0, #-8] | ||
2455 | | mov NEXT_RES, NEXT_RES_PTR | ||
2456 | | strd NEXT_TMP0, NEXT_RES_VAL // Stores NEXT_TMP1, too. | ||
2457 | | mvn NEXT_TMP0, #~LJ_TISNUM | ||
2458 | | str NEXT_TMP0, NEXT_RES_KEY_IT | ||
2459 | | bx lr | ||
2460 | | | ||
2461 | |5: // Traverse hash part. | ||
2462 | | ldr NEXT_TMP0, NEXT_TAB->hmask | ||
2463 | | ldr NODE:NEXT_RES, NEXT_TAB->node | ||
2464 | | add NEXT_TMP1, NEXT_TMP1, NEXT_TMP1, lsl #1 | ||
2465 | | add NEXT_LIM, NEXT_LIM, NEXT_TMP0 | ||
2466 | | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP1, lsl #3 | ||
2467 | |6: | ||
2468 | | cmp NEXT_IDX, NEXT_LIM | ||
2469 | | bhi >9 | ||
2470 | | ldr NEXT_TMP1, NODE:NEXT_RES->val.it | ||
2471 | | checktp NEXT_TMP1, LJ_TNIL | ||
2472 | | add NEXT_IDX, NEXT_IDX, #1 | ||
2473 | | bxne lr | ||
2474 | | // Skip holes in hash part. | ||
2475 | | add NEXT_RES, NEXT_RES, #sizeof(Node) | ||
2476 | | b <6 | ||
2477 | | | ||
2478 | |9: // End of iteration. Set the key to nil (not the value). | ||
2479 | | mvn NEXT_TMP0, #0 | ||
2480 | | mov NEXT_RES, NEXT_RES_PTR | ||
2481 | | str NEXT_TMP0, NEXT_RES_KEY_IT | ||
2482 | | bx lr | ||
2483 | |.endif | ||
2484 | | | ||
2388 | |//----------------------------------------------------------------------- | 2485 | |//----------------------------------------------------------------------- |
2389 | |//-- FFI helper functions ----------------------------------------------- | 2486 | |//-- FFI helper functions ----------------------------------------------- |
2390 | |//----------------------------------------------------------------------- | 2487 | |//----------------------------------------------------------------------- |
@@ -2832,6 +2929,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2832 | | ins_next | 2929 | | ins_next |
2833 | break; | 2930 | break; |
2834 | 2931 | ||
2932 | case BC_ISTYPE: | ||
2933 | | // RA = src*8, RC = -type | ||
2934 | | ldrd CARG12, [BASE, RA] | ||
2935 | | ins_next1 | ||
2936 | | cmn CARG2, RC | ||
2937 | | ins_next2 | ||
2938 | | bne ->vmeta_istype | ||
2939 | | ins_next3 | ||
2940 | break; | ||
2941 | case BC_ISNUM: | ||
2942 | | // RA = src*8, RC = -(TISNUM-1) | ||
2943 | | ldrd CARG12, [BASE, RA] | ||
2944 | | ins_next1 | ||
2945 | | checktp CARG2, LJ_TISNUM | ||
2946 | | ins_next2 | ||
2947 | | bhs ->vmeta_istype | ||
2948 | | ins_next3 | ||
2949 | break; | ||
2950 | |||
2835 | /* -- Unary ops --------------------------------------------------------- */ | 2951 | /* -- Unary ops --------------------------------------------------------- */ |
2836 | 2952 | ||
2837 | case BC_MOV: | 2953 | case BC_MOV: |
@@ -3436,10 +3552,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3436 | |->BC_TGETS_Z: | 3552 | |->BC_TGETS_Z: |
3437 | | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 | 3553 | | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 |
3438 | | ldr CARG3, TAB:CARG1->hmask | 3554 | | ldr CARG3, TAB:CARG1->hmask |
3439 | | ldr CARG4, STR:RC->hash | 3555 | | ldr CARG4, STR:RC->sid |
3440 | | ldr NODE:INS, TAB:CARG1->node | 3556 | | ldr NODE:INS, TAB:CARG1->node |
3441 | | mov TAB:RB, TAB:CARG1 | 3557 | | mov TAB:RB, TAB:CARG1 |
3442 | | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask | 3558 | | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask |
3443 | | add CARG3, CARG3, CARG3, lsl #1 | 3559 | | add CARG3, CARG3, CARG3, lsl #1 |
3444 | | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 | 3560 | | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 |
3445 | |1: | 3561 | |1: |
@@ -3502,6 +3618,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3502 | | bne <1 // 'no __index' flag set: done. | 3618 | | bne <1 // 'no __index' flag set: done. |
3503 | | b ->vmeta_tgetb | 3619 | | b ->vmeta_tgetb |
3504 | break; | 3620 | break; |
3621 | case BC_TGETR: | ||
3622 | | decode_RB8 RB, INS | ||
3623 | | decode_RC8 RC, INS | ||
3624 | | // RA = dst*8, RB = table*8, RC = key*8 | ||
3625 | | ldr TAB:CARG1, [BASE, RB] | ||
3626 | | ldr CARG2, [BASE, RC] | ||
3627 | | ldr CARG4, TAB:CARG1->array | ||
3628 | | ldr CARG3, TAB:CARG1->asize | ||
3629 | | add CARG4, CARG4, CARG2, lsl #3 | ||
3630 | | cmp CARG2, CARG3 // In array part? | ||
3631 | | bhs ->vmeta_tgetr | ||
3632 | | ldrd CARG12, [CARG4] | ||
3633 | |->BC_TGETR_Z: | ||
3634 | | ins_next1 | ||
3635 | | ins_next2 | ||
3636 | | strd CARG12, [BASE, RA] | ||
3637 | | ins_next3 | ||
3638 | break; | ||
3505 | 3639 | ||
3506 | case BC_TSETV: | 3640 | case BC_TSETV: |
3507 | | decode_RB8 RB, INS | 3641 | | decode_RB8 RB, INS |
@@ -3565,10 +3699,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3565 | |->BC_TSETS_Z: | 3699 | |->BC_TSETS_Z: |
3566 | | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 | 3700 | | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 |
3567 | | ldr CARG3, TAB:CARG1->hmask | 3701 | | ldr CARG3, TAB:CARG1->hmask |
3568 | | ldr CARG4, STR:RC->hash | 3702 | | ldr CARG4, STR:RC->sid |
3569 | | ldr NODE:INS, TAB:CARG1->node | 3703 | | ldr NODE:INS, TAB:CARG1->node |
3570 | | mov TAB:RB, TAB:CARG1 | 3704 | | mov TAB:RB, TAB:CARG1 |
3571 | | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask | 3705 | | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask |
3572 | | add CARG3, CARG3, CARG3, lsl #1 | 3706 | | add CARG3, CARG3, CARG3, lsl #1 |
3573 | | mov CARG4, #0 | 3707 | | mov CARG4, #0 |
3574 | | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 | 3708 | | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 |
@@ -3672,6 +3806,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3672 | | barrierback TAB:CARG1, INS, CARG3 | 3806 | | barrierback TAB:CARG1, INS, CARG3 |
3673 | | b <2 | 3807 | | b <2 |
3674 | break; | 3808 | break; |
3809 | case BC_TSETR: | ||
3810 | | decode_RB8 RB, INS | ||
3811 | | decode_RC8 RC, INS | ||
3812 | | // RA = src*8, RB = table*8, RC = key*8 | ||
3813 | | ldr TAB:CARG2, [BASE, RB] | ||
3814 | | ldr CARG3, [BASE, RC] | ||
3815 | | ldrb INS, TAB:CARG2->marked | ||
3816 | | ldr CARG1, TAB:CARG2->array | ||
3817 | | ldr CARG4, TAB:CARG2->asize | ||
3818 | | tst INS, #LJ_GC_BLACK // isblack(table) | ||
3819 | | add CARG1, CARG1, CARG3, lsl #3 | ||
3820 | | bne >7 | ||
3821 | |2: | ||
3822 | | cmp CARG3, CARG4 // In array part? | ||
3823 | | bhs ->vmeta_tsetr | ||
3824 | |->BC_TSETR_Z: | ||
3825 | | ldrd CARG34, [BASE, RA] | ||
3826 | | ins_next1 | ||
3827 | | ins_next2 | ||
3828 | | strd CARG34, [CARG1] | ||
3829 | | ins_next3 | ||
3830 | | | ||
3831 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3832 | | barrierback TAB:CARG2, INS, RB | ||
3833 | | b <2 | ||
3834 | break; | ||
3675 | 3835 | ||
3676 | case BC_TSETM: | 3836 | case BC_TSETM: |
3677 | | // RA = base*8 (table at base-1), RC = num_const (start index) | 3837 | | // RA = base*8 (table at base-1), RC = num_const (start index) |
@@ -3812,10 +3972,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3812 | break; | 3972 | break; |
3813 | 3973 | ||
3814 | case BC_ITERN: | 3974 | case BC_ITERN: |
3815 | | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
3816 | |.if JIT | 3975 | |.if JIT |
3817 | | // NYI: add hotloop, record BC_ITERN. | 3976 | | hotloop |
3818 | |.endif | 3977 | |.endif |
3978 | |->vm_IITERN: | ||
3979 | | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
3819 | | add RA, BASE, RA | 3980 | | add RA, BASE, RA |
3820 | | ldr TAB:RB, [RA, #-16] | 3981 | | ldr TAB:RB, [RA, #-16] |
3821 | | ldr CARG1, [RA, #-8] // Get index from control var. | 3982 | | ldr CARG1, [RA, #-8] // Get index from control var. |
@@ -3881,7 +4042,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3881 | | ins_next1 | 4042 | | ins_next1 |
3882 | | ins_next2 | 4043 | | ins_next2 |
3883 | | mov CARG1, #0 | 4044 | | mov CARG1, #0 |
3884 | | mvn CARG2, #0x00018000 | 4045 | | mvn CARG2, #~LJ_KEYINDEX |
3885 | | strd CARG1, [RA, #-8] // Initialize control var. | 4046 | | strd CARG1, [RA, #-8] // Initialize control var. |
3886 | |1: | 4047 | |1: |
3887 | | ins_next3 | 4048 | | ins_next3 |
@@ -3890,9 +4051,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3890 | | mov OP, #BC_ITERC | 4051 | | mov OP, #BC_ITERC |
3891 | | strb CARG1, [PC, #-4] | 4052 | | strb CARG1, [PC, #-4] |
3892 | | sub PC, RC, #0x20000 | 4053 | | sub PC, RC, #0x20000 |
4054 | |.if JIT | ||
4055 | | ldrb CARG1, [PC] | ||
4056 | | cmp CARG1, #BC_ITERN | ||
4057 | | bne >6 | ||
4058 | |.endif | ||
3893 | | strb OP, [PC] // Subsumes ins_next1. | 4059 | | strb OP, [PC] // Subsumes ins_next1. |
3894 | | ins_next2 | 4060 | | ins_next2 |
3895 | | b <1 | 4061 | | b <1 |
4062 | |.if JIT | ||
4063 | |6: // Unpatch JLOOP. | ||
4064 | | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)] | ||
4065 | | ldrh CARG2, [PC, #2] | ||
4066 | | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2] | ||
4067 | | // Subsumes ins_next1 and ins_next2. | ||
4068 | | ldr INS, TRACE:CARG1->startins | ||
4069 | | bfi INS, OP, #0, #8 | ||
4070 | | str INS, [PC], #4 | ||
4071 | | b <1 | ||
4072 | |.endif | ||
3896 | break; | 4073 | break; |
3897 | 4074 | ||
3898 | case BC_VARG: | 4075 | case BC_VARG: |
@@ -4269,7 +4446,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4269 | | st_vmstate CARG2 | 4446 | | st_vmstate CARG2 |
4270 | | ldr RA, TRACE:RC->mcode | 4447 | | ldr RA, TRACE:RC->mcode |
4271 | | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)] | 4448 | | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)] |
4272 | | str L, [DISPATCH, #DISPATCH_GL(jit_L)] | 4449 | | str L, [DISPATCH, #DISPATCH_GL(tmpbuf.L)] |
4273 | | bx RA | 4450 | | bx RA |
4274 | |.endif | 4451 | |.endif |
4275 | break; | 4452 | break; |
@@ -4387,6 +4564,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4387 | | ldr BASE, L->base | 4564 | | ldr BASE, L->base |
4388 | | mv_vmstate CARG3, INTERP | 4565 | | mv_vmstate CARG3, INTERP |
4389 | | ldr CRET2, L->top | 4566 | | ldr CRET2, L->top |
4567 | | str L, [DISPATCH, #DISPATCH_GL(cur_L)] | ||
4390 | | lsl RC, CRET1, #3 | 4568 | | lsl RC, CRET1, #3 |
4391 | | st_vmstate CARG3 | 4569 | | st_vmstate CARG3 |
4392 | | ldr PC, [BASE, FRAME_PC] | 4570 | | ldr PC, [BASE, FRAME_PC] |
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc new file mode 100644 index 00000000..f5f1b5f1 --- /dev/null +++ b/src/vm_arm64.dasc | |||
@@ -0,0 +1,4156 @@ | |||
1 | |// Low-level VM code for ARM64 CPUs. | ||
2 | |// Bytecode interpreter, fast functions and helper functions. | ||
3 | |// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | | | ||
5 | |.arch arm64 | ||
6 | |.section code_op, code_sub | ||
7 | | | ||
8 | |.actionlist build_actionlist | ||
9 | |.globals GLOB_ | ||
10 | |.globalnames globnames | ||
11 | |.externnames extnames | ||
12 | | | ||
13 | |// Note: The ragged indentation of the instructions is intentional. | ||
14 | |// The starting columns indicate data dependencies. | ||
15 | | | ||
16 | |//----------------------------------------------------------------------- | ||
17 | | | ||
18 | |// ARM64 registers and the AAPCS64 ABI 1.0 at a glance: | ||
19 | |// | ||
20 | |// x0-x17 temp, x19-x28 callee-saved, x29 fp, x30 lr | ||
21 | |// x18 is reserved on most platforms. Don't use it, save it or restore it. | ||
22 | |// x31 doesn't exist. Register number 31 either means xzr/wzr (zero) or sp, | ||
23 | |// depending on the instruction. | ||
24 | |// v0-v7 temp, v8-v15 callee-saved (only d8-d15 preserved), v16-v31 temp | ||
25 | |// | ||
26 | |// x0-x7/v0-v7 hold parameters and results. | ||
27 | | | ||
28 | |// Fixed register assignments for the interpreter. | ||
29 | | | ||
30 | |// The following must be C callee-save. | ||
31 | |.define BASE, x19 // Base of current Lua stack frame. | ||
32 | |.define KBASE, x20 // Constants of current Lua function. | ||
33 | |.define PC, x21 // Next PC. | ||
34 | |.define GLREG, x22 // Global state. | ||
35 | |.define LREG, x23 // Register holding lua_State (also in SAVE_L). | ||
36 | |.define TISNUM, x24 // Constant LJ_TISNUM << 47. | ||
37 | |.define TISNUMhi, x25 // Constant LJ_TISNUM << 15. | ||
38 | |.define TISNIL, x26 // Constant -1LL. | ||
39 | |.define fp, x29 // Yes, we have to maintain a frame pointer. | ||
40 | | | ||
41 | |.define ST_INTERP, w26 // Constant -1. | ||
42 | | | ||
43 | |// The following temporaries are not saved across C calls, except for RA/RC. | ||
44 | |.define RA, x27 | ||
45 | |.define RC, x28 | ||
46 | |.define RB, x17 | ||
47 | |.define RAw, w27 | ||
48 | |.define RCw, w28 | ||
49 | |.define RBw, w17 | ||
50 | |.define INS, x16 | ||
51 | |.define INSw, w16 | ||
52 | |.define ITYPE, x15 | ||
53 | |.define TMP0, x8 | ||
54 | |.define TMP1, x9 | ||
55 | |.define TMP2, x10 | ||
56 | |.define TMP3, x11 | ||
57 | |.define TMP0w, w8 | ||
58 | |.define TMP1w, w9 | ||
59 | |.define TMP2w, w10 | ||
60 | |.define TMP3w, w11 | ||
61 | | | ||
62 | |// Calling conventions. Also used as temporaries. | ||
63 | |.define CARG1, x0 | ||
64 | |.define CARG2, x1 | ||
65 | |.define CARG3, x2 | ||
66 | |.define CARG4, x3 | ||
67 | |.define CARG5, x4 | ||
68 | |.define CARG1w, w0 | ||
69 | |.define CARG2w, w1 | ||
70 | |.define CARG3w, w2 | ||
71 | |.define CARG4w, w3 | ||
72 | |.define CARG5w, w4 | ||
73 | | | ||
74 | |.define FARG1, d0 | ||
75 | |.define FARG2, d1 | ||
76 | | | ||
77 | |.define CRET1, x0 | ||
78 | |.define CRET1w, w0 | ||
79 | | | ||
80 | |// Stack layout while in interpreter. Must match with lj_frame.h. | ||
81 | | | ||
82 | |.define CFRAME_SPACE, 208 | ||
83 | |//----- 16 byte aligned, <-- sp entering interpreter | ||
84 | |.define SAVE_FP_LR_, 192 | ||
85 | |.define SAVE_GPR_, 112 // 112+10*8: 64 bit GPR saves | ||
86 | |.define SAVE_FPR_, 48 // 48+8*8: 64 bit FPR saves | ||
87 | |// Unused [sp, #44] // 32 bit values | ||
88 | |.define SAVE_NRES, [sp, #40] | ||
89 | |.define SAVE_ERRF, [sp, #36] | ||
90 | |.define SAVE_MULTRES, [sp, #32] | ||
91 | |.define TMPD, [sp, #24] // 64 bit values | ||
92 | |.define SAVE_L, [sp, #16] | ||
93 | |.define SAVE_PC, [sp, #8] | ||
94 | |.define SAVE_CFRAME, [sp, #0] | ||
95 | |//----- 16 byte aligned, <-- sp while in interpreter. | ||
96 | | | ||
97 | |.define TMPDofs, #24 | ||
98 | | | ||
99 | |.macro save_, gpr1, gpr2, fpr1, fpr2 | ||
100 | | stp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8] | ||
101 | | stp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8] | ||
102 | |.endmacro | ||
103 | |.macro rest_, gpr1, gpr2, fpr1, fpr2 | ||
104 | | ldp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8] | ||
105 | | ldp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8] | ||
106 | |.endmacro | ||
107 | | | ||
108 | |.macro saveregs | ||
109 | | sub sp, sp, # CFRAME_SPACE | ||
110 | | stp fp, lr, [sp, # SAVE_FP_LR_] | ||
111 | | add fp, sp, # SAVE_FP_LR_ | ||
112 | | stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8] | ||
113 | | save_ 21, 22, 8, 9 | ||
114 | | save_ 23, 24, 10, 11 | ||
115 | | save_ 25, 26, 12, 13 | ||
116 | | save_ 27, 28, 14, 15 | ||
117 | |.endmacro | ||
118 | |.macro restoreregs | ||
119 | | ldp x20, x19, [sp, # SAVE_GPR_+(27-19)*8] | ||
120 | | rest_ 21, 22, 8, 9 | ||
121 | | rest_ 23, 24, 10, 11 | ||
122 | | rest_ 25, 26, 12, 13 | ||
123 | | rest_ 27, 28, 14, 15 | ||
124 | | ldp fp, lr, [sp, # SAVE_FP_LR_] | ||
125 | | add sp, sp, # CFRAME_SPACE | ||
126 | |.endmacro | ||
127 | | | ||
128 | |// Type definitions. Some of these are only used for documentation. | ||
129 | |.type L, lua_State, LREG | ||
130 | |.type GL, global_State, GLREG | ||
131 | |.type TVALUE, TValue | ||
132 | |.type GCOBJ, GCobj | ||
133 | |.type STR, GCstr | ||
134 | |.type TAB, GCtab | ||
135 | |.type LFUNC, GCfuncL | ||
136 | |.type CFUNC, GCfuncC | ||
137 | |.type PROTO, GCproto | ||
138 | |.type UPVAL, GCupval | ||
139 | |.type NODE, Node | ||
140 | |.type NARGS8, int | ||
141 | |.type TRACE, GCtrace | ||
142 | |.type SBUF, SBuf | ||
143 | | | ||
144 | |//----------------------------------------------------------------------- | ||
145 | | | ||
146 | |// Trap for not-yet-implemented parts. | ||
147 | |.macro NYI; brk; .endmacro | ||
148 | | | ||
149 | |//----------------------------------------------------------------------- | ||
150 | | | ||
151 | |// Access to frame relative to BASE. | ||
152 | |.define FRAME_FUNC, #-16 | ||
153 | |.define FRAME_PC, #-8 | ||
154 | | | ||
155 | |// Endian-specific defines. | ||
156 | |.if ENDIAN_LE | ||
157 | |.define LO, 0 | ||
158 | |.define OFS_RD, 2 | ||
159 | |.define OFS_RB, 3 | ||
160 | |.define OFS_RA, 1 | ||
161 | |.define OFS_OP, 0 | ||
162 | |.else | ||
163 | |.define LO, 4 | ||
164 | |.define OFS_RD, 0 | ||
165 | |.define OFS_RB, 0 | ||
166 | |.define OFS_RA, 2 | ||
167 | |.define OFS_OP, 3 | ||
168 | |.endif | ||
169 | | | ||
170 | |.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro | ||
171 | |.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro | ||
172 | |.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro | ||
173 | |.macro decode_RD, dst, ins; ubfx dst, ins, #16, #16; .endmacro | ||
174 | |.macro decode_RC8RD, dst, src; ubfiz dst, src, #3, #8; .endmacro | ||
175 | | | ||
176 | |// Instruction decode+dispatch. | ||
177 | |.macro ins_NEXT | ||
178 | | ldr INSw, [PC], #4 | ||
179 | | add TMP1, GL, INS, uxtb #3 | ||
180 | | decode_RA RA, INS | ||
181 | | ldr TMP0, [TMP1, #GG_G2DISP] | ||
182 | | decode_RD RC, INS | ||
183 | | br TMP0 | ||
184 | |.endmacro | ||
185 | | | ||
186 | |// Instruction footer. | ||
187 | |.if 1 | ||
188 | | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. | ||
189 | | .define ins_next, ins_NEXT | ||
190 | | .define ins_next_, ins_NEXT | ||
191 | |.else | ||
192 | | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. | ||
193 | | // Affects only certain kinds of benchmarks (and only with -j off). | ||
194 | | .macro ins_next | ||
195 | | b ->ins_next | ||
196 | | .endmacro | ||
197 | | .macro ins_next_ | ||
198 | | ->ins_next: | ||
199 | | ins_NEXT | ||
200 | | .endmacro | ||
201 | |.endif | ||
202 | | | ||
203 | |// Call decode and dispatch. | ||
204 | |.macro ins_callt | ||
205 | | // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC | ||
206 | | ldr PC, LFUNC:CARG3->pc | ||
207 | | ldr INSw, [PC], #4 | ||
208 | | add TMP1, GL, INS, uxtb #3 | ||
209 | | decode_RA RA, INS | ||
210 | | ldr TMP0, [TMP1, #GG_G2DISP] | ||
211 | | add RA, BASE, RA, lsl #3 | ||
212 | | br TMP0 | ||
213 | |.endmacro | ||
214 | | | ||
215 | |.macro ins_call | ||
216 | | // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC | ||
217 | | str PC, [BASE, FRAME_PC] | ||
218 | | ins_callt | ||
219 | |.endmacro | ||
220 | | | ||
221 | |//----------------------------------------------------------------------- | ||
222 | | | ||
223 | |// Macros to check the TValue type and extract the GCobj. Branch on failure. | ||
224 | |.macro checktp, reg, tp, target | ||
225 | | asr ITYPE, reg, #47 | ||
226 | | cmn ITYPE, #-tp | ||
227 | | and reg, reg, #LJ_GCVMASK | ||
228 | | bne target | ||
229 | |.endmacro | ||
230 | |.macro checktp, dst, reg, tp, target | ||
231 | | asr ITYPE, reg, #47 | ||
232 | | cmn ITYPE, #-tp | ||
233 | | and dst, reg, #LJ_GCVMASK | ||
234 | | bne target | ||
235 | |.endmacro | ||
236 | |.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro | ||
237 | |.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro | ||
238 | |.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro | ||
239 | |.macro checkint, reg, target | ||
240 | | cmp TISNUMhi, reg, lsr #32 | ||
241 | | bne target | ||
242 | |.endmacro | ||
243 | |.macro checknum, reg, target | ||
244 | | cmp TISNUMhi, reg, lsr #32 | ||
245 | | bls target | ||
246 | |.endmacro | ||
247 | |.macro checknumber, reg, target | ||
248 | | cmp TISNUMhi, reg, lsr #32 | ||
249 | | blo target | ||
250 | |.endmacro | ||
251 | | | ||
252 | |.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro | ||
253 | |.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro | ||
254 | | | ||
255 | #define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field)) | ||
256 | | | ||
257 | #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) | ||
258 | | | ||
259 | |.macro hotcheck, delta | ||
260 | | lsr CARG1, PC, #1 | ||
261 | | and CARG1, CARG1, #126 | ||
262 | | add CARG1, CARG1, #GG_G2DISP+GG_DISP2HOT | ||
263 | | ldrh CARG2w, [GL, CARG1] | ||
264 | | subs CARG2, CARG2, #delta | ||
265 | | strh CARG2w, [GL, CARG1] | ||
266 | |.endmacro | ||
267 | | | ||
268 | |.macro hotloop | ||
269 | | hotcheck HOTCOUNT_LOOP | ||
270 | | blo ->vm_hotloop | ||
271 | |.endmacro | ||
272 | | | ||
273 | |.macro hotcall | ||
274 | | hotcheck HOTCOUNT_CALL | ||
275 | | blo ->vm_hotcall | ||
276 | |.endmacro | ||
277 | | | ||
278 | |// Set current VM state. | ||
279 | |.macro mv_vmstate, reg, st; movn reg, #LJ_VMST_..st; .endmacro | ||
280 | |.macro st_vmstate, reg; str reg, GL->vmstate; .endmacro | ||
281 | | | ||
282 | |// Move table write barrier back. Overwrites mark and tmp. | ||
283 | |.macro barrierback, tab, mark, tmp | ||
284 | | ldr tmp, GL->gc.grayagain | ||
285 | | and mark, mark, #~LJ_GC_BLACK // black2gray(tab) | ||
286 | | str tab, GL->gc.grayagain | ||
287 | | strb mark, tab->marked | ||
288 | | str tmp, tab->gclist | ||
289 | |.endmacro | ||
290 | | | ||
291 | |//----------------------------------------------------------------------- | ||
292 | |||
293 | #if !LJ_DUALNUM | ||
294 | #error "Only dual-number mode supported for ARM64 target" | ||
295 | #endif | ||
296 | |||
297 | /* Generate subroutines used by opcodes and other parts of the VM. */ | ||
298 | /* The .code_sub section should be last to help static branch prediction. */ | ||
299 | static void build_subroutines(BuildCtx *ctx) | ||
300 | { | ||
301 | |.code_sub | ||
302 | | | ||
303 | |//----------------------------------------------------------------------- | ||
304 | |//-- Return handling ---------------------------------------------------- | ||
305 | |//----------------------------------------------------------------------- | ||
306 | | | ||
307 | |->vm_returnp: | ||
308 | | // See vm_return. Also: RB = previous base. | ||
309 | | tbz PC, #2, ->cont_dispatch // (PC & FRAME_P) == 0? | ||
310 | | | ||
311 | | // Return from pcall or xpcall fast func. | ||
312 | | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame. | ||
313 | | mov_true TMP0 | ||
314 | | mov BASE, RB | ||
315 | | // Prepending may overwrite the pcall frame, so do it at the end. | ||
316 | | str TMP0, [RA, #-8]! // Prepend true to results. | ||
317 | | | ||
318 | |->vm_returnc: | ||
319 | | adds RC, RC, #8 // RC = (nresults+1)*8. | ||
320 | | mov CRET1, #LUA_YIELD | ||
321 | | beq ->vm_unwind_c_eh | ||
322 | | str RCw, SAVE_MULTRES | ||
323 | | ands CARG1, PC, #FRAME_TYPE | ||
324 | | beq ->BC_RET_Z // Handle regular return to Lua. | ||
325 | | | ||
326 | |->vm_return: | ||
327 | | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return | ||
328 | | // CARG1 = PC & FRAME_TYPE | ||
329 | | and RB, PC, #~FRAME_TYPEP | ||
330 | | cmp CARG1, #FRAME_C | ||
331 | | sub RB, BASE, RB // RB = previous base. | ||
332 | | bne ->vm_returnp | ||
333 | | | ||
334 | | str RB, L->base | ||
335 | | ldrsw CARG2, SAVE_NRES // CARG2 = nresults+1. | ||
336 | | mv_vmstate TMP0w, C | ||
337 | | sub BASE, BASE, #16 | ||
338 | | subs TMP2, RC, #8 | ||
339 | | st_vmstate TMP0w | ||
340 | | beq >2 | ||
341 | |1: | ||
342 | | subs TMP2, TMP2, #8 | ||
343 | | ldr TMP0, [RA], #8 | ||
344 | | str TMP0, [BASE], #8 | ||
345 | | bne <1 | ||
346 | |2: | ||
347 | | cmp RC, CARG2, lsl #3 // More/less results wanted? | ||
348 | | bne >6 | ||
349 | |3: | ||
350 | | str BASE, L->top // Store new top. | ||
351 | | | ||
352 | |->vm_leave_cp: | ||
353 | | ldr RC, SAVE_CFRAME // Restore previous C frame. | ||
354 | | mov CRET1, #0 // Ok return status for vm_pcall. | ||
355 | | str RC, L->cframe | ||
356 | | | ||
357 | |->vm_leave_unw: | ||
358 | | restoreregs | ||
359 | | ret | ||
360 | | | ||
361 | |6: | ||
362 | | bgt >7 // Less results wanted? | ||
363 | | // More results wanted. Check stack size and fill up results with nil. | ||
364 | | ldr CARG3, L->maxstack | ||
365 | | cmp BASE, CARG3 | ||
366 | | bhs >8 | ||
367 | | str TISNIL, [BASE], #8 | ||
368 | | add RC, RC, #8 | ||
369 | | b <2 | ||
370 | | | ||
371 | |7: // Less results wanted. | ||
372 | | cbz CARG2, <3 // LUA_MULTRET+1 case? | ||
373 | | sub CARG1, RC, CARG2, lsl #3 | ||
374 | | sub BASE, BASE, CARG1 // Shrink top. | ||
375 | | b <3 | ||
376 | | | ||
377 | |8: // Corner case: need to grow stack for filling up results. | ||
378 | | // This can happen if: | ||
379 | | // - A C function grows the stack (a lot). | ||
380 | | // - The GC shrinks the stack in between. | ||
381 | | // - A return back from a lua_call() with (high) nresults adjustment. | ||
382 | | str BASE, L->top // Save current top held in BASE (yes). | ||
383 | | mov CARG1, L | ||
384 | | bl extern lj_state_growstack // (lua_State *L, int n) | ||
385 | | ldr BASE, L->top // Need the (realloced) L->top in BASE. | ||
386 | | ldrsw CARG2, SAVE_NRES | ||
387 | | b <2 | ||
388 | | | ||
389 | |->vm_unwind_c: // Unwind C stack, return from vm_pcall. | ||
390 | | // (void *cframe, int errcode) | ||
391 | | mov sp, CARG1 | ||
392 | | mov CRET1, CARG2 | ||
393 | |->vm_unwind_c_eh: // Landing pad for external unwinder. | ||
394 | | ldr L, SAVE_L | ||
395 | | mv_vmstate TMP0w, C | ||
396 | | ldr GL, L->glref | ||
397 | | st_vmstate TMP0w | ||
398 | | b ->vm_leave_unw | ||
399 | | | ||
400 | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. | ||
401 | | // (void *cframe) | ||
402 | | and sp, CARG1, #CFRAME_RAWMASK | ||
403 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. | ||
404 | | ldr L, SAVE_L | ||
405 | | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 | ||
406 | | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | ||
407 | | movn TISNIL, #0 | ||
408 | | mov RC, #16 // 2 results: false + error message. | ||
409 | | ldr BASE, L->base | ||
410 | | ldr GL, L->glref // Setup pointer to global state. | ||
411 | | mov_false TMP0 | ||
412 | | sub RA, BASE, #8 // Results start at BASE-8. | ||
413 | | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame. | ||
414 | | str TMP0, [BASE, #-8] // Prepend false to error message. | ||
415 | | st_vmstate ST_INTERP | ||
416 | | b ->vm_returnc | ||
417 | | | ||
418 | |//----------------------------------------------------------------------- | ||
419 | |//-- Grow stack for calls ----------------------------------------------- | ||
420 | |//----------------------------------------------------------------------- | ||
421 | | | ||
422 | |->vm_growstack_c: // Grow stack for C function. | ||
423 | | // CARG1 = L | ||
424 | | mov CARG2, #LUA_MINSTACK | ||
425 | | b >2 | ||
426 | | | ||
427 | |->vm_growstack_l: // Grow stack for Lua function. | ||
428 | | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC | ||
429 | | add RC, BASE, RC | ||
430 | | sub RA, RA, BASE | ||
431 | | mov CARG1, L | ||
432 | | stp BASE, RC, L->base | ||
433 | | add PC, PC, #4 // Must point after first instruction. | ||
434 | | lsr CARG2, RA, #3 | ||
435 | |2: | ||
436 | | // L->base = new base, L->top = top | ||
437 | | str PC, SAVE_PC | ||
438 | | bl extern lj_state_growstack // (lua_State *L, int n) | ||
439 | | ldp BASE, RC, L->base | ||
440 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] | ||
441 | | sub NARGS8:RC, RC, BASE | ||
442 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
443 | | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC | ||
444 | | ins_callt // Just retry the call. | ||
445 | | | ||
446 | |//----------------------------------------------------------------------- | ||
447 | |//-- Entry points into the assembler VM --------------------------------- | ||
448 | |//----------------------------------------------------------------------- | ||
449 | | | ||
450 | |->vm_resume: // Setup C frame and resume thread. | ||
451 | | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) | ||
452 | | saveregs | ||
453 | | mov L, CARG1 | ||
454 | | ldr GL, L->glref // Setup pointer to global state. | ||
455 | | mov BASE, CARG2 | ||
456 | | str L, SAVE_L | ||
457 | | mov PC, #FRAME_CP | ||
458 | | str wzr, SAVE_NRES | ||
459 | | add TMP0, sp, #CFRAME_RESUME | ||
460 | | ldrb TMP1w, L->status | ||
461 | | str wzr, SAVE_ERRF | ||
462 | | str L, SAVE_PC // Any value outside of bytecode is ok. | ||
463 | | str xzr, SAVE_CFRAME | ||
464 | | str TMP0, L->cframe | ||
465 | | cbz TMP1w, >3 | ||
466 | | | ||
467 | | // Resume after yield (like a return). | ||
468 | | str L, GL->cur_L | ||
469 | | mov RA, BASE | ||
470 | | ldp BASE, CARG1, L->base | ||
471 | | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 | ||
472 | | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | ||
473 | | ldr PC, [BASE, FRAME_PC] | ||
474 | | strb wzr, L->status | ||
475 | | movn TISNIL, #0 | ||
476 | | sub RC, CARG1, BASE | ||
477 | | ands CARG1, PC, #FRAME_TYPE | ||
478 | | add RC, RC, #8 | ||
479 | | st_vmstate ST_INTERP | ||
480 | | str RCw, SAVE_MULTRES | ||
481 | | beq ->BC_RET_Z | ||
482 | | b ->vm_return | ||
483 | | | ||
484 | |->vm_pcall: // Setup protected C frame and enter VM. | ||
485 | | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) | ||
486 | | saveregs | ||
487 | | mov PC, #FRAME_CP | ||
488 | | str CARG4w, SAVE_ERRF | ||
489 | | b >1 | ||
490 | | | ||
491 | |->vm_call: // Setup C frame and enter VM. | ||
492 | | // (lua_State *L, TValue *base, int nres1) | ||
493 | | saveregs | ||
494 | | mov PC, #FRAME_C | ||
495 | | | ||
496 | |1: // Entry point for vm_pcall above (PC = ftype). | ||
497 | | ldr RC, L:CARG1->cframe | ||
498 | | str CARG3w, SAVE_NRES | ||
499 | | mov L, CARG1 | ||
500 | | str CARG1, SAVE_L | ||
501 | | ldr GL, L->glref // Setup pointer to global state. | ||
502 | | mov BASE, CARG2 | ||
503 | | str CARG1, SAVE_PC // Any value outside of bytecode is ok. | ||
504 | | add TMP0, sp, #0 | ||
505 | | str RC, SAVE_CFRAME | ||
506 | | str TMP0, L->cframe // Add our C frame to cframe chain. | ||
507 | | | ||
508 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). | ||
509 | | str L, GL->cur_L | ||
510 | | ldp RB, CARG1, L->base // RB = old base (for vmeta_call). | ||
511 | | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 | ||
512 | | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | ||
513 | | add PC, PC, BASE | ||
514 | | movn TISNIL, #0 | ||
515 | | sub PC, PC, RB // PC = frame delta + frame type | ||
516 | | sub NARGS8:RC, CARG1, BASE | ||
517 | | st_vmstate ST_INTERP | ||
518 | | | ||
519 | |->vm_call_dispatch: | ||
520 | | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC | ||
521 | | ldr CARG3, [BASE, FRAME_FUNC] | ||
522 | | checkfunc CARG3, ->vmeta_call | ||
523 | | | ||
524 | |->vm_call_dispatch_f: | ||
525 | | ins_call | ||
526 | | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC | ||
527 | | | ||
528 | |->vm_cpcall: // Setup protected C frame, call C. | ||
529 | | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) | ||
530 | | saveregs | ||
531 | | mov L, CARG1 | ||
532 | | ldr RA, L:CARG1->stack | ||
533 | | str CARG1, SAVE_L | ||
534 | | ldr GL, L->glref // Setup pointer to global state. | ||
535 | | ldr RB, L->top | ||
536 | | str CARG1, SAVE_PC // Any value outside of bytecode is ok. | ||
537 | | ldr RC, L->cframe | ||
538 | | sub RA, RA, RB // Compute -savestack(L, L->top). | ||
539 | | str RAw, SAVE_NRES // Neg. delta means cframe w/o frame. | ||
540 | | str wzr, SAVE_ERRF // No error function. | ||
541 | | add TMP0, sp, #0 | ||
542 | | str RC, SAVE_CFRAME | ||
543 | | str TMP0, L->cframe // Add our C frame to cframe chain. | ||
544 | | str L, GL->cur_L | ||
545 | | blr CARG4 // (lua_State *L, lua_CFunction func, void *ud) | ||
546 | | mov BASE, CRET1 | ||
547 | | mov PC, #FRAME_CP | ||
548 | | cbnz BASE, <3 // Else continue with the call. | ||
549 | | b ->vm_leave_cp // No base? Just remove C frame. | ||
550 | | | ||
551 | |//----------------------------------------------------------------------- | ||
552 | |//-- Metamethod handling ------------------------------------------------ | ||
553 | |//----------------------------------------------------------------------- | ||
554 | | | ||
555 | |//-- Continuation dispatch ---------------------------------------------- | ||
556 | | | ||
557 | |->cont_dispatch: | ||
558 | | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8 | ||
559 | | ldr LFUNC:CARG3, [RB, FRAME_FUNC] | ||
560 | | ldr CARG1, [BASE, #-32] // Get continuation. | ||
561 | | mov CARG4, BASE | ||
562 | | mov BASE, RB // Restore caller BASE. | ||
563 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
564 | |.if FFI | ||
565 | | cmp CARG1, #1 | ||
566 | |.endif | ||
567 | | ldr PC, [CARG4, #-24] // Restore PC from [cont|PC]. | ||
568 | | add TMP0, RA, RC | ||
569 | | str TISNIL, [TMP0, #-8] // Ensure one valid arg. | ||
570 | |.if FFI | ||
571 | | bls >1 | ||
572 | |.endif | ||
573 | | ldr CARG3, LFUNC:CARG3->pc | ||
574 | | ldr KBASE, [CARG3, #PC2PROTO(k)] | ||
575 | | // BASE = base, RA = resultptr, CARG4 = meta base | ||
576 | | br CARG1 | ||
577 | | | ||
578 | |.if FFI | ||
579 | |1: | ||
580 | | beq ->cont_ffi_callback // cont = 1: return from FFI callback. | ||
581 | | // cont = 0: tailcall from C function. | ||
582 | | sub CARG4, CARG4, #32 | ||
583 | | sub RC, CARG4, BASE | ||
584 | | b ->vm_call_tail | ||
585 | |.endif | ||
586 | | | ||
587 | |->cont_cat: // RA = resultptr, CARG4 = meta base | ||
588 | | ldr INSw, [PC, #-4] | ||
589 | | sub CARG2, CARG4, #32 | ||
590 | | ldr TMP0, [RA] | ||
591 | | str BASE, L->base | ||
592 | | decode_RB RB, INS | ||
593 | | decode_RA RA, INS | ||
594 | | add TMP1, BASE, RB, lsl #3 | ||
595 | | subs TMP1, CARG2, TMP1 | ||
596 | | beq >1 | ||
597 | | str TMP0, [CARG2] | ||
598 | | lsr CARG3, TMP1, #3 | ||
599 | | b ->BC_CAT_Z | ||
600 | | | ||
601 | |1: | ||
602 | | str TMP0, [BASE, RA, lsl #3] | ||
603 | | b ->cont_nop | ||
604 | | | ||
605 | |//-- Table indexing metamethods ----------------------------------------- | ||
606 | | | ||
607 | |->vmeta_tgets1: | ||
608 | | movn CARG4, #~LJ_TSTR | ||
609 | | add CARG2, BASE, RB, lsl #3 | ||
610 | | add CARG4, STR:RC, CARG4, lsl #47 | ||
611 | | b >2 | ||
612 | | | ||
613 | |->vmeta_tgets: | ||
614 | | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48 | ||
615 | | str CARG2, GL->tmptv | ||
616 | | add CARG2, GL, #offsetof(global_State, tmptv) | ||
617 | |2: | ||
618 | | add CARG3, sp, TMPDofs | ||
619 | | str CARG4, TMPD | ||
620 | | b >1 | ||
621 | | | ||
622 | |->vmeta_tgetb: // RB = table, RC = index | ||
623 | | add RC, RC, TISNUM | ||
624 | | add CARG2, BASE, RB, lsl #3 | ||
625 | | add CARG3, sp, TMPDofs | ||
626 | | str RC, TMPD | ||
627 | | b >1 | ||
628 | | | ||
629 | |->vmeta_tgetv: // RB = table, RC = key | ||
630 | | add CARG2, BASE, RB, lsl #3 | ||
631 | | add CARG3, BASE, RC, lsl #3 | ||
632 | |1: | ||
633 | | str BASE, L->base | ||
634 | | mov CARG1, L | ||
635 | | str PC, SAVE_PC | ||
636 | | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) | ||
637 | | // Returns TValue * (finished) or NULL (metamethod). | ||
638 | | cbz CRET1, >3 | ||
639 | | ldr TMP0, [CRET1] | ||
640 | | str TMP0, [BASE, RA, lsl #3] | ||
641 | | ins_next | ||
642 | | | ||
643 | |3: // Call __index metamethod. | ||
644 | | // BASE = base, L->top = new base, stack = cont/func/t/k | ||
645 | | sub TMP1, BASE, #FRAME_CONT | ||
646 | | ldr BASE, L->top | ||
647 | | mov NARGS8:RC, #16 // 2 args for func(t, k). | ||
648 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. | ||
649 | | str PC, [BASE, #-24] // [cont|PC] | ||
650 | | sub PC, BASE, TMP1 | ||
651 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
652 | | b ->vm_call_dispatch_f | ||
653 | | | ||
654 | |->vmeta_tgetr: | ||
655 | | sxtw CARG2, TMP1w | ||
656 | | bl extern lj_tab_getinth // (GCtab *t, int32_t key) | ||
657 | | // Returns cTValue * or NULL. | ||
658 | | mov TMP0, TISNIL | ||
659 | | cbz CRET1, ->BC_TGETR_Z | ||
660 | | ldr TMP0, [CRET1] | ||
661 | | b ->BC_TGETR_Z | ||
662 | | | ||
663 | |//----------------------------------------------------------------------- | ||
664 | | | ||
665 | |->vmeta_tsets1: | ||
666 | | movn CARG4, #~LJ_TSTR | ||
667 | | add CARG2, BASE, RB, lsl #3 | ||
668 | | add CARG4, STR:RC, CARG4, lsl #47 | ||
669 | | b >2 | ||
670 | | | ||
671 | |->vmeta_tsets: | ||
672 | | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48 | ||
673 | | str CARG2, GL->tmptv | ||
674 | | add CARG2, GL, #offsetof(global_State, tmptv) | ||
675 | |2: | ||
676 | | add CARG3, sp, TMPDofs | ||
677 | | str CARG4, TMPD | ||
678 | | b >1 | ||
679 | | | ||
680 | |->vmeta_tsetb: // RB = table, RC = index | ||
681 | | add RC, RC, TISNUM | ||
682 | | add CARG2, BASE, RB, lsl #3 | ||
683 | | add CARG3, sp, TMPDofs | ||
684 | | str RC, TMPD | ||
685 | | b >1 | ||
686 | | | ||
687 | |->vmeta_tsetv: | ||
688 | | add CARG2, BASE, RB, lsl #3 | ||
689 | | add CARG3, BASE, RC, lsl #3 | ||
690 | |1: | ||
691 | | str BASE, L->base | ||
692 | | mov CARG1, L | ||
693 | | str PC, SAVE_PC | ||
694 | | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) | ||
695 | | // Returns TValue * (finished) or NULL (metamethod). | ||
696 | | ldr TMP0, [BASE, RA, lsl #3] | ||
697 | | cbz CRET1, >3 | ||
698 | | // NOBARRIER: lj_meta_tset ensures the table is not black. | ||
699 | | str TMP0, [CRET1] | ||
700 | | ins_next | ||
701 | | | ||
702 | |3: // Call __newindex metamethod. | ||
703 | | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) | ||
704 | | sub TMP1, BASE, #FRAME_CONT | ||
705 | | ldr BASE, L->top | ||
706 | | mov NARGS8:RC, #24 // 3 args for func(t, k, v). | ||
707 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. | ||
708 | | str TMP0, [BASE, #16] // Copy value to third argument. | ||
709 | | str PC, [BASE, #-24] // [cont|PC] | ||
710 | | sub PC, BASE, TMP1 | ||
711 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
712 | | b ->vm_call_dispatch_f | ||
713 | | | ||
714 | |->vmeta_tsetr: | ||
715 | | sxtw CARG3, TMP1w | ||
716 | | str BASE, L->base | ||
717 | | mov CARG1, L | ||
718 | | str PC, SAVE_PC | ||
719 | | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | ||
720 | | // Returns TValue *. | ||
721 | | b ->BC_TSETR_Z | ||
722 | | | ||
723 | |//-- Comparison metamethods --------------------------------------------- | ||
724 | | | ||
725 | |->vmeta_comp: | ||
726 | | add CARG2, BASE, RA, lsl #3 | ||
727 | | sub PC, PC, #4 | ||
728 | | add CARG3, BASE, RC, lsl #3 | ||
729 | | str BASE, L->base | ||
730 | | mov CARG1, L | ||
731 | | str PC, SAVE_PC | ||
732 | | uxtb CARG4w, INSw | ||
733 | | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) | ||
734 | | // Returns 0/1 or TValue * (metamethod). | ||
735 | |3: | ||
736 | | cmp CRET1, #1 | ||
737 | | bhi ->vmeta_binop | ||
738 | |4: | ||
739 | | ldrh RBw, [PC, # OFS_RD] | ||
740 | | add PC, PC, #4 | ||
741 | | add RB, PC, RB, lsl #2 | ||
742 | | sub RB, RB, #0x20000 | ||
743 | | csel PC, PC, RB, lo | ||
744 | |->cont_nop: | ||
745 | | ins_next | ||
746 | | | ||
747 | |->cont_ra: // RA = resultptr | ||
748 | | ldr INSw, [PC, #-4] | ||
749 | | ldr TMP0, [RA] | ||
750 | | decode_RA TMP1, INS | ||
751 | | str TMP0, [BASE, TMP1, lsl #3] | ||
752 | | b ->cont_nop | ||
753 | | | ||
754 | |->cont_condt: // RA = resultptr | ||
755 | | ldr TMP0, [RA] | ||
756 | | mov_true TMP1 | ||
757 | | cmp TMP1, TMP0 // Branch if result is true. | ||
758 | | b <4 | ||
759 | | | ||
760 | |->cont_condf: // RA = resultptr | ||
761 | | ldr TMP0, [RA] | ||
762 | | mov_false TMP1 | ||
763 | | cmp TMP0, TMP1 // Branch if result is false. | ||
764 | | b <4 | ||
765 | | | ||
766 | |->vmeta_equal: | ||
767 | | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. | ||
768 | | and TAB:CARG3, CARG3, #LJ_GCVMASK | ||
769 | | sub PC, PC, #4 | ||
770 | | str BASE, L->base | ||
771 | | mov CARG1, L | ||
772 | | str PC, SAVE_PC | ||
773 | | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) | ||
774 | | // Returns 0/1 or TValue * (metamethod). | ||
775 | | b <3 | ||
776 | | | ||
777 | |->vmeta_equal_cd: | ||
778 | |.if FFI | ||
779 | | sub PC, PC, #4 | ||
780 | | str BASE, L->base | ||
781 | | mov CARG1, L | ||
782 | | mov CARG2, INS | ||
783 | | str PC, SAVE_PC | ||
784 | | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) | ||
785 | | // Returns 0/1 or TValue * (metamethod). | ||
786 | | b <3 | ||
787 | |.endif | ||
788 | | | ||
789 | |->vmeta_istype: | ||
790 | | sub PC, PC, #4 | ||
791 | | str BASE, L->base | ||
792 | | mov CARG1, L | ||
793 | | mov CARG2, RA | ||
794 | | mov CARG3, RC | ||
795 | | str PC, SAVE_PC | ||
796 | | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) | ||
797 | | b ->cont_nop | ||
798 | | | ||
799 | |//-- Arithmetic metamethods --------------------------------------------- | ||
800 | | | ||
801 | |->vmeta_arith_vn: | ||
802 | | add CARG3, BASE, RB, lsl #3 | ||
803 | | add CARG4, KBASE, RC, lsl #3 | ||
804 | | b >1 | ||
805 | | | ||
806 | |->vmeta_arith_nv: | ||
807 | | add CARG4, BASE, RB, lsl #3 | ||
808 | | add CARG3, KBASE, RC, lsl #3 | ||
809 | | b >1 | ||
810 | | | ||
811 | |->vmeta_unm: | ||
812 | | add CARG3, BASE, RC, lsl #3 | ||
813 | | mov CARG4, CARG3 | ||
814 | | b >1 | ||
815 | | | ||
816 | |->vmeta_arith_vv: | ||
817 | | add CARG3, BASE, RB, lsl #3 | ||
818 | | add CARG4, BASE, RC, lsl #3 | ||
819 | |1: | ||
820 | | uxtb CARG5w, INSw | ||
821 | | add CARG2, BASE, RA, lsl #3 | ||
822 | | str BASE, L->base | ||
823 | | mov CARG1, L | ||
824 | | str PC, SAVE_PC | ||
825 | | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) | ||
826 | | // Returns NULL (finished) or TValue * (metamethod). | ||
827 | | cbz CRET1, ->cont_nop | ||
828 | | | ||
829 | | // Call metamethod for binary op. | ||
830 | |->vmeta_binop: | ||
831 | | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 | ||
832 | | sub TMP1, CRET1, BASE | ||
833 | | str PC, [CRET1, #-24] // [cont|PC] | ||
834 | | add PC, TMP1, #FRAME_CONT | ||
835 | | mov BASE, CRET1 | ||
836 | | mov NARGS8:RC, #16 // 2 args for func(o1, o2). | ||
837 | | b ->vm_call_dispatch | ||
838 | | | ||
839 | |->vmeta_len: | ||
840 | | add CARG2, BASE, RC, lsl #3 | ||
841 | #if LJ_52 | ||
842 | | mov TAB:RC, TAB:CARG1 // Save table (ignored for other types). | ||
843 | #endif | ||
844 | | str BASE, L->base | ||
845 | | mov CARG1, L | ||
846 | | str PC, SAVE_PC | ||
847 | | bl extern lj_meta_len // (lua_State *L, TValue *o) | ||
848 | | // Returns NULL (retry) or TValue * (metamethod base). | ||
849 | #if LJ_52 | ||
850 | | cbnz CRET1, ->vmeta_binop // Binop call for compatibility. | ||
851 | | mov TAB:CARG1, TAB:RC | ||
852 | | b ->BC_LEN_Z | ||
853 | #else | ||
854 | | b ->vmeta_binop // Binop call for compatibility. | ||
855 | #endif | ||
856 | | | ||
857 | |//-- Call metamethod ---------------------------------------------------- | ||
858 | | | ||
859 | |->vmeta_call: // Resolve and call __call metamethod. | ||
860 | | // RB = old base, BASE = new base, RC = nargs*8 | ||
861 | | mov CARG1, L | ||
862 | | str RB, L->base // This is the callers base! | ||
863 | | sub CARG2, BASE, #16 | ||
864 | | str PC, SAVE_PC | ||
865 | | add CARG3, BASE, NARGS8:RC | ||
866 | | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) | ||
867 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. | ||
868 | | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. | ||
869 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
870 | | ins_call | ||
871 | | | ||
872 | |->vmeta_callt: // Resolve __call for BC_CALLT. | ||
873 | | // BASE = old base, RA = new base, RC = nargs*8 | ||
874 | | mov CARG1, L | ||
875 | | str BASE, L->base | ||
876 | | sub CARG2, RA, #16 | ||
877 | | str PC, SAVE_PC | ||
878 | | add CARG3, RA, NARGS8:RC | ||
879 | | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) | ||
880 | | ldr TMP1, [RA, FRAME_FUNC] // Guaranteed to be a function here. | ||
881 | | ldr PC, [BASE, FRAME_PC] | ||
882 | | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. | ||
883 | | and LFUNC:CARG3, TMP1, #LJ_GCVMASK | ||
884 | | b ->BC_CALLT2_Z | ||
885 | | | ||
886 | |//-- Argument coercion for 'for' statement ------------------------------ | ||
887 | | | ||
888 | |->vmeta_for: | ||
889 | | mov CARG1, L | ||
890 | | str BASE, L->base | ||
891 | | mov CARG2, RA | ||
892 | | str PC, SAVE_PC | ||
893 | | bl extern lj_meta_for // (lua_State *L, TValue *base) | ||
894 | | ldr INSw, [PC, #-4] | ||
895 | |.if JIT | ||
896 | | uxtb TMP0w, INSw | ||
897 | |.endif | ||
898 | | decode_RA RA, INS | ||
899 | | decode_RD RC, INS | ||
900 | |.if JIT | ||
901 | | cmp TMP0, #BC_JFORI | ||
902 | | beq =>BC_JFORI | ||
903 | |.endif | ||
904 | | b =>BC_FORI | ||
905 | | | ||
906 | |//----------------------------------------------------------------------- | ||
907 | |//-- Fast functions ----------------------------------------------------- | ||
908 | |//----------------------------------------------------------------------- | ||
909 | | | ||
910 | |.macro .ffunc, name | ||
911 | |->ff_ .. name: | ||
912 | |.endmacro | ||
913 | | | ||
914 | |.macro .ffunc_1, name | ||
915 | |->ff_ .. name: | ||
916 | | ldr CARG1, [BASE] | ||
917 | | cmp NARGS8:RC, #8 | ||
918 | | blo ->fff_fallback | ||
919 | |.endmacro | ||
920 | | | ||
921 | |.macro .ffunc_2, name | ||
922 | |->ff_ .. name: | ||
923 | | ldp CARG1, CARG2, [BASE] | ||
924 | | cmp NARGS8:RC, #16 | ||
925 | | blo ->fff_fallback | ||
926 | |.endmacro | ||
927 | | | ||
928 | |.macro .ffunc_n, name | ||
929 | | .ffunc name | ||
930 | | ldr CARG1, [BASE] | ||
931 | | cmp NARGS8:RC, #8 | ||
932 | | ldr FARG1, [BASE] | ||
933 | | blo ->fff_fallback | ||
934 | | checknum CARG1, ->fff_fallback | ||
935 | |.endmacro | ||
936 | | | ||
937 | |.macro .ffunc_nn, name | ||
938 | | .ffunc name | ||
939 | | ldp CARG1, CARG2, [BASE] | ||
940 | | cmp NARGS8:RC, #16 | ||
941 | | ldp FARG1, FARG2, [BASE] | ||
942 | | blo ->fff_fallback | ||
943 | | checknum CARG1, ->fff_fallback | ||
944 | | checknum CARG2, ->fff_fallback | ||
945 | |.endmacro | ||
946 | | | ||
947 | |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. | ||
948 | |.macro ffgccheck | ||
949 | | ldp CARG1, CARG2, GL->gc.total // Assumes threshold follows total. | ||
950 | | cmp CARG1, CARG2 | ||
951 | | blt >1 | ||
952 | | bl ->fff_gcstep | ||
953 | |1: | ||
954 | |.endmacro | ||
955 | | | ||
956 | |//-- Base library: checks ----------------------------------------------- | ||
957 | | | ||
958 | |.ffunc_1 assert | ||
959 | | ldr PC, [BASE, FRAME_PC] | ||
960 | | mov_false TMP1 | ||
961 | | cmp CARG1, TMP1 | ||
962 | | bhs ->fff_fallback | ||
963 | | str CARG1, [BASE, #-16] | ||
964 | | sub RB, BASE, #8 | ||
965 | | subs RA, NARGS8:RC, #8 | ||
966 | | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8. | ||
967 | | cbz RA, ->fff_res // Done if exactly 1 argument. | ||
968 | |1: | ||
969 | | ldr CARG1, [RB, #16] | ||
970 | | sub RA, RA, #8 | ||
971 | | str CARG1, [RB], #8 | ||
972 | | cbnz RA, <1 | ||
973 | | b ->fff_res | ||
974 | | | ||
975 | |.ffunc_1 type | ||
976 | | mov TMP0, #~LJ_TISNUM | ||
977 | | asr ITYPE, CARG1, #47 | ||
978 | | cmn ITYPE, #~LJ_TISNUM | ||
979 | | csinv TMP1, TMP0, ITYPE, lo | ||
980 | | add TMP1, TMP1, #offsetof(GCfuncC, upvalue)/8 | ||
981 | | ldr CARG1, [CFUNC:CARG3, TMP1, lsl #3] | ||
982 | | b ->fff_restv | ||
983 | | | ||
984 | |//-- Base library: getters and setters --------------------------------- | ||
985 | | | ||
986 | |.ffunc_1 getmetatable | ||
987 | | asr ITYPE, CARG1, #47 | ||
988 | | cmn ITYPE, #-LJ_TTAB | ||
989 | | ccmn ITYPE, #-LJ_TUDATA, #4, ne | ||
990 | | and TAB:CARG1, CARG1, #LJ_GCVMASK | ||
991 | | bne >6 | ||
992 | |1: // Field metatable must be at same offset for GCtab and GCudata! | ||
993 | | ldr TAB:RB, TAB:CARG1->metatable | ||
994 | |2: | ||
995 | | mov CARG1, TISNIL | ||
996 | | ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable] | ||
997 | | cbz TAB:RB, ->fff_restv | ||
998 | | ldr TMP1w, TAB:RB->hmask | ||
999 | | ldr TMP2w, STR:RC->sid | ||
1000 | | ldr NODE:CARG3, TAB:RB->node | ||
1001 | | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask | ||
1002 | | add TMP1, TMP1, TMP1, lsl #1 | ||
1003 | | movn CARG4, #~LJ_TSTR | ||
1004 | | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 | ||
1005 | | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for. | ||
1006 | |3: // Rearranged logic, because we expect _not_ to find the key. | ||
1007 | | ldp CARG1, TMP0, NODE:CARG3->val | ||
1008 | | ldr NODE:CARG3, NODE:CARG3->next | ||
1009 | | cmp TMP0, CARG4 | ||
1010 | | beq >5 | ||
1011 | | cbnz NODE:CARG3, <3 | ||
1012 | |4: | ||
1013 | | mov CARG1, RB // Use metatable as default result. | ||
1014 | | movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48 | ||
1015 | | b ->fff_restv | ||
1016 | |5: | ||
1017 | | cmp TMP0, TISNIL | ||
1018 | | bne ->fff_restv | ||
1019 | | b <4 | ||
1020 | | | ||
1021 | |6: | ||
1022 | | movn TMP0, #~LJ_TISNUM | ||
1023 | | cmp ITYPE, TMP0 | ||
1024 | | csel ITYPE, ITYPE, TMP0, hs | ||
1025 | | sub TMP1, GL, ITYPE, lsl #3 | ||
1026 | | ldr TAB:RB, [TMP1, #offsetof(global_State, gcroot[GCROOT_BASEMT])-8] | ||
1027 | | b <2 | ||
1028 | | | ||
1029 | |.ffunc_2 setmetatable | ||
1030 | | // Fast path: no mt for table yet and not clearing the mt. | ||
1031 | | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback | ||
1032 | | ldr TAB:TMP0, TAB:TMP1->metatable | ||
1033 | | asr ITYPE, CARG2, #47 | ||
1034 | | ldrb TMP2w, TAB:TMP1->marked | ||
1035 | | cmn ITYPE, #-LJ_TTAB | ||
1036 | | and TAB:CARG2, CARG2, #LJ_GCVMASK | ||
1037 | | ccmp TAB:TMP0, #0, #0, eq | ||
1038 | | bne ->fff_fallback | ||
1039 | | str TAB:CARG2, TAB:TMP1->metatable | ||
1040 | | tbz TMP2w, #2, ->fff_restv // isblack(table) | ||
1041 | | barrierback TAB:TMP1, TMP2w, TMP0 | ||
1042 | | b ->fff_restv | ||
1043 | | | ||
1044 | |.ffunc rawget | ||
1045 | | ldr CARG2, [BASE] | ||
1046 | | cmp NARGS8:RC, #16 | ||
1047 | | blo ->fff_fallback | ||
1048 | | checktab CARG2, ->fff_fallback | ||
1049 | | mov CARG1, L | ||
1050 | | add CARG3, BASE, #8 | ||
1051 | | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | ||
1052 | | // Returns cTValue *. | ||
1053 | | ldr CARG1, [CRET1] | ||
1054 | | b ->fff_restv | ||
1055 | | | ||
1056 | |//-- Base library: conversions ------------------------------------------ | ||
1057 | | | ||
1058 | |.ffunc tonumber | ||
1059 | | // Only handles the number case inline (without a base argument). | ||
1060 | | ldr CARG1, [BASE] | ||
1061 | | cmp NARGS8:RC, #8 | ||
1062 | | bne ->fff_fallback | ||
1063 | | checknumber CARG1, ->fff_fallback | ||
1064 | | b ->fff_restv | ||
1065 | | | ||
1066 | |.ffunc_1 tostring | ||
1067 | | // Only handles the string or number case inline. | ||
1068 | | asr ITYPE, CARG1, #47 | ||
1069 | | cmn ITYPE, #-LJ_TSTR | ||
1070 | | // A __tostring method in the string base metatable is ignored. | ||
1071 | | beq ->fff_restv | ||
1072 | | // Handle numbers inline, unless a number base metatable is present. | ||
1073 | | ldr TMP1, GL->gcroot[GCROOT_BASEMT_NUM] | ||
1074 | | str BASE, L->base | ||
1075 | | cmn ITYPE, #-LJ_TISNUM | ||
1076 | | ccmp TMP1, #0, #0, ls | ||
1077 | | str PC, SAVE_PC // Redundant (but a defined value). | ||
1078 | | bne ->fff_fallback | ||
1079 | | ffgccheck | ||
1080 | | mov CARG1, L | ||
1081 | | mov CARG2, BASE | ||
1082 | | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) | ||
1083 | | // Returns GCstr *. | ||
1084 | | movn TMP1, #~LJ_TSTR | ||
1085 | | ldr BASE, L->base | ||
1086 | | add CARG1, CARG1, TMP1, lsl #47 | ||
1087 | | b ->fff_restv | ||
1088 | | | ||
1089 | |//-- Base library: iterators ------------------------------------------- | ||
1090 | | | ||
1091 | |.ffunc_1 next | ||
1092 | | checktp CARG1, LJ_TTAB, ->fff_fallback | ||
1093 | | str TISNIL, [BASE, NARGS8:RC] // Set missing 2nd arg to nil. | ||
1094 | | ldr PC, [BASE, FRAME_PC] | ||
1095 | | add CARG2, BASE, #8 | ||
1096 | | sub CARG3, BASE, #16 | ||
1097 | | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) | ||
1098 | | // Returns 1=found, 0=end, -1=error. | ||
1099 | | mov RC, #(2+1)*8 | ||
1100 | | tbnz CRET1w, #31, ->fff_fallback // Invalid key. | ||
1101 | | cbnz CRET1, ->fff_res // Found key/value. | ||
1102 | | // End of traversal: return nil. | ||
1103 | | str TISNIL, [BASE, #-16] | ||
1104 | | b ->fff_res1 | ||
1105 | | | ||
1106 | |.ffunc_1 pairs | ||
1107 | | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback | ||
1108 | #if LJ_52 | ||
1109 | | ldr TAB:CARG2, TAB:TMP1->metatable | ||
1110 | #endif | ||
1111 | | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0] | ||
1112 | | ldr PC, [BASE, FRAME_PC] | ||
1113 | #if LJ_52 | ||
1114 | | cbnz TAB:CARG2, ->fff_fallback | ||
1115 | #endif | ||
1116 | | mov RC, #(3+1)*8 | ||
1117 | | stp CARG1, TISNIL, [BASE, #-8] | ||
1118 | | str CFUNC:CARG4, [BASE, #-16] | ||
1119 | | b ->fff_res | ||
1120 | | | ||
1121 | |.ffunc_2 ipairs_aux | ||
1122 | | checktab CARG1, ->fff_fallback | ||
1123 | | checkint CARG2, ->fff_fallback | ||
1124 | | ldr TMP1w, TAB:CARG1->asize | ||
1125 | | ldr CARG3, TAB:CARG1->array | ||
1126 | | ldr TMP0w, TAB:CARG1->hmask | ||
1127 | | add CARG2w, CARG2w, #1 | ||
1128 | | cmp CARG2w, TMP1w | ||
1129 | | ldr PC, [BASE, FRAME_PC] | ||
1130 | | add TMP2, CARG2, TISNUM | ||
1131 | | mov RC, #(0+1)*8 | ||
1132 | | str TMP2, [BASE, #-16] | ||
1133 | | bhs >2 // Not in array part? | ||
1134 | | ldr TMP0, [CARG3, CARG2, lsl #3] | ||
1135 | |1: | ||
1136 | | mov TMP1, #(2+1)*8 | ||
1137 | | cmp TMP0, TISNIL | ||
1138 | | str TMP0, [BASE, #-8] | ||
1139 | | csel RC, RC, TMP1, eq | ||
1140 | | b ->fff_res | ||
1141 | |2: // Check for empty hash part first. Otherwise call C function. | ||
1142 | | cbz TMP0w, ->fff_res | ||
1143 | | bl extern lj_tab_getinth // (GCtab *t, int32_t key) | ||
1144 | | // Returns cTValue * or NULL. | ||
1145 | | cbz CRET1, ->fff_res | ||
1146 | | ldr TMP0, [CRET1] | ||
1147 | | b <1 | ||
1148 | | | ||
1149 | |.ffunc_1 ipairs | ||
1150 | | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback | ||
1151 | #if LJ_52 | ||
1152 | | ldr TAB:CARG2, TAB:TMP1->metatable | ||
1153 | #endif | ||
1154 | | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0] | ||
1155 | | ldr PC, [BASE, FRAME_PC] | ||
1156 | #if LJ_52 | ||
1157 | | cbnz TAB:CARG2, ->fff_fallback | ||
1158 | #endif | ||
1159 | | mov RC, #(3+1)*8 | ||
1160 | | stp CARG1, TISNUM, [BASE, #-8] | ||
1161 | | str CFUNC:CARG4, [BASE, #-16] | ||
1162 | | b ->fff_res | ||
1163 | | | ||
1164 | |//-- Base library: catch errors ---------------------------------------- | ||
1165 | | | ||
1166 | |.ffunc pcall | ||
1167 | | cmp NARGS8:RC, #8 | ||
1168 | | ldrb TMP0w, GL->hookmask | ||
1169 | | blo ->fff_fallback | ||
1170 | | sub NARGS8:RC, NARGS8:RC, #8 | ||
1171 | | mov RB, BASE | ||
1172 | | add BASE, BASE, #16 | ||
1173 | | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 | ||
1174 | | add PC, TMP0, #16+FRAME_PCALL | ||
1175 | | beq ->vm_call_dispatch | ||
1176 | |1: | ||
1177 | | add TMP2, BASE, NARGS8:RC | ||
1178 | |2: | ||
1179 | | ldr TMP0, [TMP2, #-16] | ||
1180 | | str TMP0, [TMP2, #-8]! | ||
1181 | | cmp TMP2, BASE | ||
1182 | | bne <2 | ||
1183 | | b ->vm_call_dispatch | ||
1184 | | | ||
1185 | |.ffunc xpcall | ||
1186 | | ldp CARG1, CARG2, [BASE] | ||
1187 | | ldrb TMP0w, GL->hookmask | ||
1188 | | subs NARGS8:TMP1, NARGS8:RC, #16 | ||
1189 | | blo ->fff_fallback | ||
1190 | | mov RB, BASE | ||
1191 | | asr ITYPE, CARG2, #47 | ||
1192 | | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 | ||
1193 | | cmn ITYPE, #-LJ_TFUNC | ||
1194 | | add PC, TMP0, #24+FRAME_PCALL | ||
1195 | | bne ->fff_fallback // Traceback must be a function. | ||
1196 | | mov NARGS8:RC, NARGS8:TMP1 | ||
1197 | | add BASE, BASE, #24 | ||
1198 | | stp CARG2, CARG1, [RB] // Swap function and traceback. | ||
1199 | | cbz NARGS8:RC, ->vm_call_dispatch | ||
1200 | | b <1 | ||
1201 | | | ||
1202 | |//-- Coroutine library -------------------------------------------------- | ||
1203 | | | ||
1204 | |.macro coroutine_resume_wrap, resume | ||
1205 | |.if resume | ||
1206 | |.ffunc_1 coroutine_resume | ||
1207 | | checktp CARG1, LJ_TTHREAD, ->fff_fallback | ||
1208 | |.else | ||
1209 | |.ffunc coroutine_wrap_aux | ||
1210 | | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr | ||
1211 | | and L:CARG1, CARG1, #LJ_GCVMASK | ||
1212 | |.endif | ||
1213 | | ldr PC, [BASE, FRAME_PC] | ||
1214 | | str BASE, L->base | ||
1215 | | ldp RB, CARG2, L:CARG1->base | ||
1216 | | ldrb TMP1w, L:CARG1->status | ||
1217 | | add TMP0, CARG2, TMP1 | ||
1218 | | str PC, SAVE_PC | ||
1219 | | cmp TMP0, RB | ||
1220 | | beq ->fff_fallback | ||
1221 | | cmp TMP1, #LUA_YIELD | ||
1222 | | add TMP0, CARG2, #8 | ||
1223 | | csel CARG2, CARG2, TMP0, hs | ||
1224 | | ldr CARG4, L:CARG1->maxstack | ||
1225 | | add CARG3, CARG2, NARGS8:RC | ||
1226 | | ldr RB, L:CARG1->cframe | ||
1227 | | ccmp CARG3, CARG4, #2, ls | ||
1228 | | ccmp RB, #0, #2, ls | ||
1229 | | bhi ->fff_fallback | ||
1230 | |.if resume | ||
1231 | | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC. | ||
1232 | | add BASE, BASE, #8 | ||
1233 | | sub NARGS8:RC, NARGS8:RC, #8 | ||
1234 | |.endif | ||
1235 | | str CARG3, L:CARG1->top | ||
1236 | | str BASE, L->top | ||
1237 | | cbz NARGS8:RC, >3 | ||
1238 | |2: // Move args to coroutine. | ||
1239 | | ldr TMP0, [BASE, RB] | ||
1240 | | cmp RB, NARGS8:RC | ||
1241 | | str TMP0, [CARG2, RB] | ||
1242 | | add RB, RB, #8 | ||
1243 | | bne <2 | ||
1244 | |3: | ||
1245 | | mov CARG3, #0 | ||
1246 | | mov L:RA, L:CARG1 | ||
1247 | | mov CARG4, #0 | ||
1248 | | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) | ||
1249 | | // Returns thread status. | ||
1250 | |4: | ||
1251 | | ldp CARG3, CARG4, L:RA->base | ||
1252 | | cmp CRET1, #LUA_YIELD | ||
1253 | | ldr BASE, L->base | ||
1254 | | str L, GL->cur_L | ||
1255 | | st_vmstate ST_INTERP | ||
1256 | | bhi >8 | ||
1257 | | sub RC, CARG4, CARG3 | ||
1258 | | ldr CARG1, L->maxstack | ||
1259 | | add CARG2, BASE, RC | ||
1260 | | cbz RC, >6 // No results? | ||
1261 | | cmp CARG2, CARG1 | ||
1262 | | mov RB, #0 | ||
1263 | | bhi >9 // Need to grow stack? | ||
1264 | | | ||
1265 | | sub CARG4, RC, #8 | ||
1266 | | str CARG3, L:RA->top // Clear coroutine stack. | ||
1267 | |5: // Move results from coroutine. | ||
1268 | | ldr TMP0, [CARG3, RB] | ||
1269 | | cmp RB, CARG4 | ||
1270 | | str TMP0, [BASE, RB] | ||
1271 | | add RB, RB, #8 | ||
1272 | | bne <5 | ||
1273 | |6: | ||
1274 | |.if resume | ||
1275 | | mov_true TMP1 | ||
1276 | | add RC, RC, #16 | ||
1277 | |7: | ||
1278 | | str TMP1, [BASE, #-8] // Prepend true/false to results. | ||
1279 | | sub RA, BASE, #8 | ||
1280 | |.else | ||
1281 | | mov RA, BASE | ||
1282 | | add RC, RC, #8 | ||
1283 | |.endif | ||
1284 | | ands CARG1, PC, #FRAME_TYPE | ||
1285 | | str PC, SAVE_PC | ||
1286 | | str RCw, SAVE_MULTRES | ||
1287 | | beq ->BC_RET_Z | ||
1288 | | b ->vm_return | ||
1289 | | | ||
1290 | |8: // Coroutine returned with error (at co->top-1). | ||
1291 | |.if resume | ||
1292 | | ldr TMP0, [CARG4, #-8]! | ||
1293 | | mov_false TMP1 | ||
1294 | | mov RC, #(2+1)*8 | ||
1295 | | str CARG4, L:RA->top // Remove error from coroutine stack. | ||
1296 | | str TMP0, [BASE] // Copy error message. | ||
1297 | | b <7 | ||
1298 | |.else | ||
1299 | | mov CARG1, L | ||
1300 | | mov CARG2, L:RA | ||
1301 | | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) | ||
1302 | | // Never returns. | ||
1303 | |.endif | ||
1304 | | | ||
1305 | |9: // Handle stack expansion on return from yield. | ||
1306 | | mov CARG1, L | ||
1307 | | lsr CARG2, RC, #3 | ||
1308 | | bl extern lj_state_growstack // (lua_State *L, int n) | ||
1309 | | mov CRET1, #0 | ||
1310 | | b <4 | ||
1311 | |.endmacro | ||
1312 | | | ||
1313 | | coroutine_resume_wrap 1 // coroutine.resume | ||
1314 | | coroutine_resume_wrap 0 // coroutine.wrap | ||
1315 | | | ||
1316 | |.ffunc coroutine_yield | ||
1317 | | ldr TMP0, L->cframe | ||
1318 | | add TMP1, BASE, NARGS8:RC | ||
1319 | | mov CRET1, #LUA_YIELD | ||
1320 | | stp BASE, TMP1, L->base | ||
1321 | | tbz TMP0, #0, ->fff_fallback | ||
1322 | | str xzr, L->cframe | ||
1323 | | strb CRET1w, L->status | ||
1324 | | b ->vm_leave_unw | ||
1325 | | | ||
1326 | |//-- Math library ------------------------------------------------------- | ||
1327 | | | ||
1328 | |.macro math_round, func, round | ||
1329 | | .ffunc math_ .. func | ||
1330 | | ldr CARG1, [BASE] | ||
1331 | | cmp NARGS8:RC, #8 | ||
1332 | | ldr d0, [BASE] | ||
1333 | | blo ->fff_fallback | ||
1334 | | cmp TISNUMhi, CARG1, lsr #32 | ||
1335 | | beq ->fff_restv | ||
1336 | | blo ->fff_fallback | ||
1337 | | round d0, d0 | ||
1338 | | b ->fff_resn | ||
1339 | |.endmacro | ||
1340 | | | ||
1341 | | math_round floor, frintm | ||
1342 | | math_round ceil, frintp | ||
1343 | | | ||
1344 | |.ffunc_1 math_abs | ||
1345 | | checknumber CARG1, ->fff_fallback | ||
1346 | | and CARG1, CARG1, #U64x(7fffffff,ffffffff) | ||
1347 | | bne ->fff_restv | ||
1348 | | eor CARG2w, CARG1w, CARG1w, asr #31 | ||
1349 | | movz CARG3, #0x41e0, lsl #48 // 2^31. | ||
1350 | | subs CARG1w, CARG2w, CARG1w, asr #31 | ||
1351 | | add CARG1, CARG1, TISNUM | ||
1352 | | csel CARG1, CARG1, CARG3, pl | ||
1353 | | // Fallthrough. | ||
1354 | | | ||
1355 | |->fff_restv: | ||
1356 | | // CARG1 = TValue result. | ||
1357 | | ldr PC, [BASE, FRAME_PC] | ||
1358 | | str CARG1, [BASE, #-16] | ||
1359 | |->fff_res1: | ||
1360 | | // PC = return. | ||
1361 | | mov RC, #(1+1)*8 | ||
1362 | |->fff_res: | ||
1363 | | // RC = (nresults+1)*8, PC = return. | ||
1364 | | ands CARG1, PC, #FRAME_TYPE | ||
1365 | | str RCw, SAVE_MULTRES | ||
1366 | | sub RA, BASE, #16 | ||
1367 | | bne ->vm_return | ||
1368 | | ldr INSw, [PC, #-4] | ||
1369 | | decode_RB RB, INS | ||
1370 | |5: | ||
1371 | | cmp RC, RB, lsl #3 // More results expected? | ||
1372 | | blo >6 | ||
1373 | | decode_RA TMP1, INS | ||
1374 | | // Adjust BASE. KBASE is assumed to be set for the calling frame. | ||
1375 | | sub BASE, RA, TMP1, lsl #3 | ||
1376 | | ins_next | ||
1377 | | | ||
1378 | |6: // Fill up results with nil. | ||
1379 | | add TMP1, RA, RC | ||
1380 | | add RC, RC, #8 | ||
1381 | | str TISNIL, [TMP1, #-8] | ||
1382 | | b <5 | ||
1383 | | | ||
1384 | |.macro math_extern, func | ||
1385 | | .ffunc_n math_ .. func | ||
1386 | | bl extern func | ||
1387 | | b ->fff_resn | ||
1388 | |.endmacro | ||
1389 | | | ||
1390 | |.macro math_extern2, func | ||
1391 | | .ffunc_nn math_ .. func | ||
1392 | | bl extern func | ||
1393 | | b ->fff_resn | ||
1394 | |.endmacro | ||
1395 | | | ||
1396 | |.ffunc_n math_sqrt | ||
1397 | | fsqrt d0, d0 | ||
1398 | |->fff_resn: | ||
1399 | | ldr PC, [BASE, FRAME_PC] | ||
1400 | | str d0, [BASE, #-16] | ||
1401 | | b ->fff_res1 | ||
1402 | | | ||
1403 | |.ffunc math_log | ||
1404 | | ldr CARG1, [BASE] | ||
1405 | | cmp NARGS8:RC, #8 | ||
1406 | | ldr FARG1, [BASE] | ||
1407 | | bne ->fff_fallback // Need exactly 1 argument. | ||
1408 | | checknum CARG1, ->fff_fallback | ||
1409 | | bl extern log | ||
1410 | | b ->fff_resn | ||
1411 | | | ||
1412 | | math_extern log10 | ||
1413 | | math_extern exp | ||
1414 | | math_extern sin | ||
1415 | | math_extern cos | ||
1416 | | math_extern tan | ||
1417 | | math_extern asin | ||
1418 | | math_extern acos | ||
1419 | | math_extern atan | ||
1420 | | math_extern sinh | ||
1421 | | math_extern cosh | ||
1422 | | math_extern tanh | ||
1423 | | math_extern2 pow | ||
1424 | | math_extern2 atan2 | ||
1425 | | math_extern2 fmod | ||
1426 | | | ||
1427 | |.ffunc_2 math_ldexp | ||
1428 | | ldr FARG1, [BASE] | ||
1429 | | checknum CARG1, ->fff_fallback | ||
1430 | | checkint CARG2, ->fff_fallback | ||
1431 | | sxtw CARG1, CARG2w | ||
1432 | | bl extern ldexp // (double x, int exp) | ||
1433 | | b ->fff_resn | ||
1434 | | | ||
1435 | |.ffunc_n math_frexp | ||
1436 | | add CARG1, sp, TMPDofs | ||
1437 | | bl extern frexp | ||
1438 | | ldr CARG2w, TMPD | ||
1439 | | ldr PC, [BASE, FRAME_PC] | ||
1440 | | str d0, [BASE, #-16] | ||
1441 | | mov RC, #(2+1)*8 | ||
1442 | | add CARG2, CARG2, TISNUM | ||
1443 | | str CARG2, [BASE, #-8] | ||
1444 | | b ->fff_res | ||
1445 | | | ||
1446 | |.ffunc_n math_modf | ||
1447 | | sub CARG1, BASE, #16 | ||
1448 | | ldr PC, [BASE, FRAME_PC] | ||
1449 | | bl extern modf | ||
1450 | | mov RC, #(2+1)*8 | ||
1451 | | str d0, [BASE, #-8] | ||
1452 | | b ->fff_res | ||
1453 | | | ||
1454 | |.macro math_minmax, name, cond, fcond | ||
1455 | | .ffunc_1 name | ||
1456 | | add RB, BASE, RC | ||
1457 | | add RA, BASE, #8 | ||
1458 | | checkint CARG1, >4 | ||
1459 | |1: // Handle integers. | ||
1460 | | ldr CARG2, [RA] | ||
1461 | | cmp RA, RB | ||
1462 | | bhs ->fff_restv | ||
1463 | | checkint CARG2, >3 | ||
1464 | | cmp CARG1w, CARG2w | ||
1465 | | add RA, RA, #8 | ||
1466 | | csel CARG1, CARG2, CARG1, cond | ||
1467 | | b <1 | ||
1468 | |3: // Convert intermediate result to number and continue below. | ||
1469 | | scvtf d0, CARG1w | ||
1470 | | blo ->fff_fallback | ||
1471 | | ldr d1, [RA] | ||
1472 | | b >6 | ||
1473 | | | ||
1474 | |4: | ||
1475 | | ldr d0, [BASE] | ||
1476 | | blo ->fff_fallback | ||
1477 | |5: // Handle numbers. | ||
1478 | | ldr CARG2, [RA] | ||
1479 | | ldr d1, [RA] | ||
1480 | | cmp RA, RB | ||
1481 | | bhs ->fff_resn | ||
1482 | | checknum CARG2, >7 | ||
1483 | |6: | ||
1484 | | fcmp d0, d1 | ||
1485 | | add RA, RA, #8 | ||
1486 | | fcsel d0, d1, d0, fcond | ||
1487 | | b <5 | ||
1488 | |7: // Convert integer to number and continue above. | ||
1489 | | scvtf d1, CARG2w | ||
1490 | | blo ->fff_fallback | ||
1491 | | b <6 | ||
1492 | |.endmacro | ||
1493 | | | ||
1494 | | math_minmax math_min, gt, pl | ||
1495 | | math_minmax math_max, lt, le | ||
1496 | | | ||
1497 | |//-- String library ----------------------------------------------------- | ||
1498 | | | ||
1499 | |.ffunc string_byte // Only handle the 1-arg case here. | ||
1500 | | ldp PC, CARG1, [BASE, FRAME_PC] | ||
1501 | | cmp NARGS8:RC, #8 | ||
1502 | | asr ITYPE, CARG1, #47 | ||
1503 | | ccmn ITYPE, #-LJ_TSTR, #0, eq | ||
1504 | | and STR:CARG1, CARG1, #LJ_GCVMASK | ||
1505 | | bne ->fff_fallback | ||
1506 | | ldrb TMP0w, STR:CARG1[1] // Access is always ok (NUL at end). | ||
1507 | | ldr CARG3w, STR:CARG1->len | ||
1508 | | add TMP0, TMP0, TISNUM | ||
1509 | | str TMP0, [BASE, #-16] | ||
1510 | | mov RC, #(0+1)*8 | ||
1511 | | cbz CARG3, ->fff_res | ||
1512 | | b ->fff_res1 | ||
1513 | | | ||
1514 | |.ffunc string_char // Only handle the 1-arg case here. | ||
1515 | | ffgccheck | ||
1516 | | ldp PC, CARG1, [BASE, FRAME_PC] | ||
1517 | | cmp CARG1w, #255 | ||
1518 | | ccmp NARGS8:RC, #8, #0, ls // Need exactly 1 argument. | ||
1519 | | bne ->fff_fallback | ||
1520 | | checkint CARG1, ->fff_fallback | ||
1521 | | mov CARG3, #1 | ||
1522 | | // Point to the char inside the integer in the stack slot. | ||
1523 | |.if ENDIAN_LE | ||
1524 | | mov CARG2, BASE | ||
1525 | |.else | ||
1526 | | add CARG2, BASE, #7 | ||
1527 | |.endif | ||
1528 | |->fff_newstr: | ||
1529 | | // CARG2 = str, CARG3 = len. | ||
1530 | | str BASE, L->base | ||
1531 | | mov CARG1, L | ||
1532 | | str PC, SAVE_PC | ||
1533 | | bl extern lj_str_new // (lua_State *L, char *str, size_t l) | ||
1534 | |->fff_resstr: | ||
1535 | | // Returns GCstr *. | ||
1536 | | ldr BASE, L->base | ||
1537 | | movn TMP1, #~LJ_TSTR | ||
1538 | | add CARG1, CARG1, TMP1, lsl #47 | ||
1539 | | b ->fff_restv | ||
1540 | | | ||
1541 | |.ffunc string_sub | ||
1542 | | ffgccheck | ||
1543 | | ldr CARG1, [BASE] | ||
1544 | | ldr CARG3, [BASE, #16] | ||
1545 | | cmp NARGS8:RC, #16 | ||
1546 | | movn RB, #0 | ||
1547 | | beq >1 | ||
1548 | | blo ->fff_fallback | ||
1549 | | checkint CARG3, ->fff_fallback | ||
1550 | | sxtw RB, CARG3w | ||
1551 | |1: | ||
1552 | | ldr CARG2, [BASE, #8] | ||
1553 | | checkstr CARG1, ->fff_fallback | ||
1554 | | ldr TMP1w, STR:CARG1->len | ||
1555 | | checkint CARG2, ->fff_fallback | ||
1556 | | sxtw CARG2, CARG2w | ||
1557 | | // CARG1 = str, TMP1 = str->len, CARG2 = start, RB = end | ||
1558 | | add TMP2, RB, TMP1 | ||
1559 | | cmp RB, #0 | ||
1560 | | add TMP0, CARG2, TMP1 | ||
1561 | | csinc RB, RB, TMP2, ge // if (end < 0) end += len+1 | ||
1562 | | cmp CARG2, #0 | ||
1563 | | csinc CARG2, CARG2, TMP0, ge // if (start < 0) start += len+1 | ||
1564 | | cmp RB, #0 | ||
1565 | | csel RB, RB, xzr, ge // if (end < 0) end = 0 | ||
1566 | | cmp CARG2, #1 | ||
1567 | | csinc CARG2, CARG2, xzr, ge // if (start < 1) start = 1 | ||
1568 | | cmp RB, TMP1 | ||
1569 | | csel RB, RB, TMP1, le // if (end > len) end = len | ||
1570 | | add CARG1, STR:CARG1, #sizeof(GCstr)-1 | ||
1571 | | subs CARG3, RB, CARG2 // len = end - start | ||
1572 | | add CARG2, CARG1, CARG2 | ||
1573 | | add CARG3, CARG3, #1 // len += 1 | ||
1574 | | bge ->fff_newstr | ||
1575 | | add STR:CARG1, GL, #offsetof(global_State, strempty) | ||
1576 | | movn TMP1, #~LJ_TSTR | ||
1577 | | add CARG1, CARG1, TMP1, lsl #47 | ||
1578 | | b ->fff_restv | ||
1579 | | | ||
1580 | |.macro ffstring_op, name | ||
1581 | | .ffunc string_ .. name | ||
1582 | | ffgccheck | ||
1583 | | ldr CARG2, [BASE] | ||
1584 | | cmp NARGS8:RC, #8 | ||
1585 | | asr ITYPE, CARG2, #47 | ||
1586 | | ccmn ITYPE, #-LJ_TSTR, #0, hs | ||
1587 | | and STR:CARG2, CARG2, #LJ_GCVMASK | ||
1588 | | bne ->fff_fallback | ||
1589 | | ldr TMP0, GL->tmpbuf.b | ||
1590 | | add SBUF:CARG1, GL, #offsetof(global_State, tmpbuf) | ||
1591 | | str BASE, L->base | ||
1592 | | str PC, SAVE_PC | ||
1593 | | str L, GL->tmpbuf.L | ||
1594 | | str TMP0, GL->tmpbuf.w | ||
1595 | | bl extern lj_buf_putstr_ .. name | ||
1596 | | bl extern lj_buf_tostr | ||
1597 | | b ->fff_resstr | ||
1598 | |.endmacro | ||
1599 | | | ||
1600 | |ffstring_op reverse | ||
1601 | |ffstring_op lower | ||
1602 | |ffstring_op upper | ||
1603 | | | ||
1604 | |//-- Bit library -------------------------------------------------------- | ||
1605 | | | ||
1606 | |// FP number to bit conversion for soft-float. Clobbers CARG1-CARG3 | ||
1607 | |->vm_tobit_fb: | ||
1608 | | bls ->fff_fallback | ||
1609 | | add CARG2, CARG1, CARG1 | ||
1610 | | mov CARG3, #1076 | ||
1611 | | sub CARG3, CARG3, CARG2, lsr #53 | ||
1612 | | cmp CARG3, #53 | ||
1613 | | bhi >1 | ||
1614 | | and CARG2, CARG2, #U64x(001fffff,ffffffff) | ||
1615 | | orr CARG2, CARG2, #U64x(00200000,00000000) | ||
1616 | | cmp CARG1, #0 | ||
1617 | | lsr CARG2, CARG2, CARG3 | ||
1618 | | cneg CARG1w, CARG2w, mi | ||
1619 | | br lr | ||
1620 | |1: | ||
1621 | | mov CARG1w, #0 | ||
1622 | | br lr | ||
1623 | | | ||
1624 | |.macro .ffunc_bit, name | ||
1625 | | .ffunc_1 bit_..name | ||
1626 | | adr lr, >1 | ||
1627 | | checkint CARG1, ->vm_tobit_fb | ||
1628 | |1: | ||
1629 | |.endmacro | ||
1630 | | | ||
1631 | |.macro .ffunc_bit_op, name, ins | ||
1632 | | .ffunc_bit name | ||
1633 | | mov RA, #8 | ||
1634 | | mov TMP0w, CARG1w | ||
1635 | | adr lr, >2 | ||
1636 | |1: | ||
1637 | | ldr CARG1, [BASE, RA] | ||
1638 | | cmp RA, NARGS8:RC | ||
1639 | | add RA, RA, #8 | ||
1640 | | bge >9 | ||
1641 | | checkint CARG1, ->vm_tobit_fb | ||
1642 | |2: | ||
1643 | | ins TMP0w, TMP0w, CARG1w | ||
1644 | | b <1 | ||
1645 | |.endmacro | ||
1646 | | | ||
1647 | |.ffunc_bit_op band, and | ||
1648 | |.ffunc_bit_op bor, orr | ||
1649 | |.ffunc_bit_op bxor, eor | ||
1650 | | | ||
1651 | |.ffunc_bit tobit | ||
1652 | | mov TMP0w, CARG1w | ||
1653 | |9: // Label reused by .ffunc_bit_op users. | ||
1654 | | add CARG1, TMP0, TISNUM | ||
1655 | | b ->fff_restv | ||
1656 | | | ||
1657 | |.ffunc_bit bswap | ||
1658 | | rev TMP0w, CARG1w | ||
1659 | | add CARG1, TMP0, TISNUM | ||
1660 | | b ->fff_restv | ||
1661 | | | ||
1662 | |.ffunc_bit bnot | ||
1663 | | mvn TMP0w, CARG1w | ||
1664 | | add CARG1, TMP0, TISNUM | ||
1665 | | b ->fff_restv | ||
1666 | | | ||
1667 | |.macro .ffunc_bit_sh, name, ins, shmod | ||
1668 | | .ffunc bit_..name | ||
1669 | | ldp TMP0, CARG1, [BASE] | ||
1670 | | cmp NARGS8:RC, #16 | ||
1671 | | blo ->fff_fallback | ||
1672 | | adr lr, >1 | ||
1673 | | checkint CARG1, ->vm_tobit_fb | ||
1674 | |1: | ||
1675 | |.if shmod == 0 | ||
1676 | | mov TMP1, CARG1 | ||
1677 | |.else | ||
1678 | | neg TMP1, CARG1 | ||
1679 | |.endif | ||
1680 | | mov CARG1, TMP0 | ||
1681 | | adr lr, >2 | ||
1682 | | checkint CARG1, ->vm_tobit_fb | ||
1683 | |2: | ||
1684 | | ins TMP0w, CARG1w, TMP1w | ||
1685 | | add CARG1, TMP0, TISNUM | ||
1686 | | b ->fff_restv | ||
1687 | |.endmacro | ||
1688 | | | ||
1689 | |.ffunc_bit_sh lshift, lsl, 0 | ||
1690 | |.ffunc_bit_sh rshift, lsr, 0 | ||
1691 | |.ffunc_bit_sh arshift, asr, 0 | ||
1692 | |.ffunc_bit_sh rol, ror, 1 | ||
1693 | |.ffunc_bit_sh ror, ror, 0 | ||
1694 | | | ||
1695 | |//----------------------------------------------------------------------- | ||
1696 | | | ||
1697 | |->fff_fallback: // Call fast function fallback handler. | ||
1698 | | // BASE = new base, RC = nargs*8 | ||
1699 | | ldp CFUNC:CARG3, PC, [BASE, FRAME_FUNC] // Fallback may overwrite PC. | ||
1700 | | ldr TMP2, L->maxstack | ||
1701 | | add TMP1, BASE, NARGS8:RC | ||
1702 | | stp BASE, TMP1, L->base | ||
1703 | | and CFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
1704 | | add TMP1, TMP1, #8*LUA_MINSTACK | ||
1705 | | ldr CARG3, CFUNC:CARG3->f | ||
1706 | | str PC, SAVE_PC // Redundant (but a defined value). | ||
1707 | | cmp TMP1, TMP2 | ||
1708 | | mov CARG1, L | ||
1709 | | bhi >5 // Need to grow stack. | ||
1710 | | blr CARG3 // (lua_State *L) | ||
1711 | | // Either throws an error, or recovers and returns -1, 0 or nresults+1. | ||
1712 | | ldr BASE, L->base | ||
1713 | | cmp CRET1w, #0 | ||
1714 | | lsl RC, CRET1, #3 | ||
1715 | | sub RA, BASE, #16 | ||
1716 | | bgt ->fff_res // Returned nresults+1? | ||
1717 | |1: // Returned 0 or -1: retry fast path. | ||
1718 | | ldr CARG1, L->top | ||
1719 | | ldr CFUNC:CARG3, [BASE, FRAME_FUNC] | ||
1720 | | sub NARGS8:RC, CARG1, BASE | ||
1721 | | bne ->vm_call_tail // Returned -1? | ||
1722 | | and CFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
1723 | | ins_callt // Returned 0: retry fast path. | ||
1724 | | | ||
1725 | |// Reconstruct previous base for vmeta_call during tailcall. | ||
1726 | |->vm_call_tail: | ||
1727 | | ands TMP0, PC, #FRAME_TYPE | ||
1728 | | and TMP1, PC, #~FRAME_TYPEP | ||
1729 | | bne >3 | ||
1730 | | ldrb RAw, [PC, #-4+OFS_RA] | ||
1731 | | lsl RA, RA, #3 | ||
1732 | | add TMP1, RA, #16 | ||
1733 | |3: | ||
1734 | | sub RB, BASE, TMP1 | ||
1735 | | b ->vm_call_dispatch // Resolve again for tailcall. | ||
1736 | | | ||
1737 | |5: // Grow stack for fallback handler. | ||
1738 | | mov CARG2, #LUA_MINSTACK | ||
1739 | | bl extern lj_state_growstack // (lua_State *L, int n) | ||
1740 | | ldr BASE, L->base | ||
1741 | | cmp CARG1, CARG1 // Set zero-flag to force retry. | ||
1742 | | b <1 | ||
1743 | | | ||
1744 | |->fff_gcstep: // Call GC step function. | ||
1745 | | // BASE = new base, RC = nargs*8 | ||
1746 | | add CARG2, BASE, NARGS8:RC // Calculate L->top. | ||
1747 | | mov RA, lr | ||
1748 | | stp BASE, CARG2, L->base | ||
1749 | | str PC, SAVE_PC // Redundant (but a defined value). | ||
1750 | | mov CARG1, L | ||
1751 | | bl extern lj_gc_step // (lua_State *L) | ||
1752 | | ldp BASE, CARG2, L->base | ||
1753 | | ldr CFUNC:CARG3, [BASE, FRAME_FUNC] | ||
1754 | | mov lr, RA // Help return address predictor. | ||
1755 | | sub NARGS8:RC, CARG2, BASE // Calculate nargs*8. | ||
1756 | | and CFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
1757 | | ret | ||
1758 | | | ||
1759 | |//----------------------------------------------------------------------- | ||
1760 | |//-- Special dispatch targets ------------------------------------------- | ||
1761 | |//----------------------------------------------------------------------- | ||
1762 | | | ||
1763 | |->vm_record: // Dispatch target for recording phase. | ||
1764 | |.if JIT | ||
1765 | | ldrb CARG1w, GL->hookmask | ||
1766 | | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. | ||
1767 | | bne >5 | ||
1768 | | // Decrement the hookcount for consistency, but always do the call. | ||
1769 | | ldr CARG2w, GL->hookcount | ||
1770 | | tst CARG1, #HOOK_ACTIVE | ||
1771 | | bne >1 | ||
1772 | | sub CARG2w, CARG2w, #1 | ||
1773 | | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT | ||
1774 | | beq >1 | ||
1775 | | str CARG2w, GL->hookcount | ||
1776 | | b >1 | ||
1777 | |.endif | ||
1778 | | | ||
1779 | |->vm_rethook: // Dispatch target for return hooks. | ||
1780 | | ldrb TMP2w, GL->hookmask | ||
1781 | | tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active? | ||
1782 | |5: // Re-dispatch to static ins. | ||
1783 | | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC] | ||
1784 | | br TMP0 | ||
1785 | | | ||
1786 | |->vm_inshook: // Dispatch target for instr/line hooks. | ||
1787 | | ldrb TMP2w, GL->hookmask | ||
1788 | | ldr TMP3w, GL->hookcount | ||
1789 | | tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5 // Hook already active? | ||
1790 | | tst TMP2w, #LUA_MASKLINE|LUA_MASKCOUNT | ||
1791 | | beq <5 | ||
1792 | | sub TMP3w, TMP3w, #1 | ||
1793 | | str TMP3w, GL->hookcount | ||
1794 | | cbz TMP3w, >1 | ||
1795 | | tbz TMP2w, #LUA_HOOKLINE, <5 | ||
1796 | |1: | ||
1797 | | mov CARG1, L | ||
1798 | | str BASE, L->base | ||
1799 | | mov CARG2, PC | ||
1800 | | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. | ||
1801 | | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) | ||
1802 | |3: | ||
1803 | | ldr BASE, L->base | ||
1804 | |4: // Re-dispatch to static ins. | ||
1805 | | ldr INSw, [PC, #-4] | ||
1806 | | add TMP1, GL, INS, uxtb #3 | ||
1807 | | decode_RA RA, INS | ||
1808 | | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC] | ||
1809 | | decode_RD RC, INS | ||
1810 | | br TMP0 | ||
1811 | | | ||
1812 | |->cont_hook: // Continue from hook yield. | ||
1813 | | ldr CARG1, [CARG4, #-40] | ||
1814 | | add PC, PC, #4 | ||
1815 | | str CARG1w, SAVE_MULTRES // Restore MULTRES for *M ins. | ||
1816 | | b <4 | ||
1817 | | | ||
1818 | |->vm_hotloop: // Hot loop counter underflow. | ||
1819 | |.if JIT | ||
1820 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L). | ||
1821 | | add CARG1, GL, #GG_G2DISP+GG_DISP2J | ||
1822 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
1823 | | str PC, SAVE_PC | ||
1824 | | ldr CARG3, LFUNC:CARG3->pc | ||
1825 | | mov CARG2, PC | ||
1826 | | str L, [GL, #GL_J(L)] | ||
1827 | | ldrb CARG3w, [CARG3, #PC2PROTO(framesize)] | ||
1828 | | str BASE, L->base | ||
1829 | | add CARG3, BASE, CARG3, lsl #3 | ||
1830 | | str CARG3, L->top | ||
1831 | | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) | ||
1832 | | b <3 | ||
1833 | |.endif | ||
1834 | | | ||
1835 | |->vm_callhook: // Dispatch target for call hooks. | ||
1836 | | mov CARG2, PC | ||
1837 | |.if JIT | ||
1838 | | b >1 | ||
1839 | |.endif | ||
1840 | | | ||
1841 | |->vm_hotcall: // Hot call counter underflow. | ||
1842 | |.if JIT | ||
1843 | | orr CARG2, PC, #1 | ||
1844 | |1: | ||
1845 | |.endif | ||
1846 | | add TMP1, BASE, NARGS8:RC | ||
1847 | | str PC, SAVE_PC | ||
1848 | | mov CARG1, L | ||
1849 | | sub RA, RA, BASE | ||
1850 | | stp BASE, TMP1, L->base | ||
1851 | | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) | ||
1852 | | // Returns ASMFunction. | ||
1853 | | ldp BASE, TMP1, L->base | ||
1854 | | str xzr, SAVE_PC // Invalidate for subsequent line hook. | ||
1855 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] | ||
1856 | | add RA, BASE, RA | ||
1857 | | sub NARGS8:RC, TMP1, BASE | ||
1858 | | ldr INSw, [PC, #-4] | ||
1859 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
1860 | | br CRET1 | ||
1861 | | | ||
1862 | |->cont_stitch: // Trace stitching. | ||
1863 | |.if JIT | ||
1864 | | // RA = resultptr, CARG4 = meta base | ||
1865 | | ldr RBw, SAVE_MULTRES | ||
1866 | | ldr INSw, [PC, #-4] | ||
1867 | | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. | ||
1868 | | subs RB, RB, #8 | ||
1869 | | decode_RA RC, INS // Call base. | ||
1870 | | and CARG3, CARG3, #LJ_GCVMASK | ||
1871 | | beq >2 | ||
1872 | |1: // Move results down. | ||
1873 | | ldr CARG1, [RA] | ||
1874 | | add RA, RA, #8 | ||
1875 | | subs RB, RB, #8 | ||
1876 | | str CARG1, [BASE, RC, lsl #3] | ||
1877 | | add RC, RC, #1 | ||
1878 | | bne <1 | ||
1879 | |2: | ||
1880 | | decode_RA RA, INS | ||
1881 | | decode_RB RB, INS | ||
1882 | | add RA, RA, RB | ||
1883 | |3: | ||
1884 | | cmp RA, RC | ||
1885 | | bhi >9 // More results wanted? | ||
1886 | | | ||
1887 | | ldrh RAw, TRACE:CARG3->traceno | ||
1888 | | ldrh RCw, TRACE:CARG3->link | ||
1889 | | cmp RCw, RAw | ||
1890 | | beq ->cont_nop // Blacklisted. | ||
1891 | | cmp RCw, #0 | ||
1892 | | bne =>BC_JLOOP // Jump to stitched trace. | ||
1893 | | | ||
1894 | | // Stitch a new trace to the previous trace. | ||
1895 | | mov CARG1, #GL_J(exitno) | ||
1896 | | str RAw, [GL, CARG1] | ||
1897 | | mov CARG1, #GL_J(L) | ||
1898 | | str L, [GL, CARG1] | ||
1899 | | str BASE, L->base | ||
1900 | | add CARG1, GL, #GG_G2J | ||
1901 | | mov CARG2, PC | ||
1902 | | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) | ||
1903 | | ldr BASE, L->base | ||
1904 | | b ->cont_nop | ||
1905 | | | ||
1906 | |9: // Fill up results with nil. | ||
1907 | | str TISNIL, [BASE, RC, lsl #3] | ||
1908 | | add RC, RC, #1 | ||
1909 | | b <3 | ||
1910 | |.endif | ||
1911 | | | ||
1912 | |->vm_profhook: // Dispatch target for profiler hook. | ||
1913 | #if LJ_HASPROFILE | ||
1914 | | mov CARG1, L | ||
1915 | | str BASE, L->base | ||
1916 | | mov CARG2, PC | ||
1917 | | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) | ||
1918 | | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | ||
1919 | | ldr BASE, L->base | ||
1920 | | sub PC, PC, #4 | ||
1921 | | b ->cont_nop | ||
1922 | #endif | ||
1923 | | | ||
1924 | |//----------------------------------------------------------------------- | ||
1925 | |//-- Trace exit handler ------------------------------------------------- | ||
1926 | |//----------------------------------------------------------------------- | ||
1927 | | | ||
1928 | |.macro savex_, a, b | ||
1929 | | stp d..a, d..b, [sp, #a*8] | ||
1930 | | stp x..a, x..b, [sp, #32*8+a*8] | ||
1931 | |.endmacro | ||
1932 | | | ||
1933 | |->vm_exit_handler: | ||
1934 | |.if JIT | ||
1935 | | sub sp, sp, #(64*8) | ||
1936 | | savex_, 0, 1 | ||
1937 | | savex_, 2, 3 | ||
1938 | | savex_, 4, 5 | ||
1939 | | savex_, 6, 7 | ||
1940 | | savex_, 8, 9 | ||
1941 | | savex_, 10, 11 | ||
1942 | | savex_, 12, 13 | ||
1943 | | savex_, 14, 15 | ||
1944 | | savex_, 16, 17 | ||
1945 | | savex_, 18, 19 | ||
1946 | | savex_, 20, 21 | ||
1947 | | savex_, 22, 23 | ||
1948 | | savex_, 24, 25 | ||
1949 | | savex_, 26, 27 | ||
1950 | | savex_, 28, 29 | ||
1951 | | stp d30, d31, [sp, #30*8] | ||
1952 | | ldr CARG1, [sp, #64*8] // Load original value of lr. | ||
1953 | | add CARG3, sp, #64*8 // Recompute original value of sp. | ||
1954 | | mv_vmstate CARG4w, EXIT | ||
1955 | | stp xzr, CARG3, [sp, #62*8] // Store 0/sp in RID_LR/RID_SP. | ||
1956 | | sub CARG1, CARG1, lr | ||
1957 | | ldr L, GL->cur_L | ||
1958 | | lsr CARG1, CARG1, #2 | ||
1959 | | ldr BASE, GL->jit_base | ||
1960 | | sub CARG1, CARG1, #2 | ||
1961 | | ldr CARG2w, [lr] // Load trace number. | ||
1962 | | st_vmstate CARG4w | ||
1963 | |.if ENDIAN_BE | ||
1964 | | rev32 CARG2, CARG2 | ||
1965 | |.endif | ||
1966 | | str BASE, L->base | ||
1967 | | ubfx CARG2w, CARG2w, #5, #16 | ||
1968 | | str CARG1w, [GL, #GL_J(exitno)] | ||
1969 | | str CARG2w, [GL, #GL_J(parent)] | ||
1970 | | str L, [GL, #GL_J(L)] | ||
1971 | | str xzr, GL->jit_base | ||
1972 | | add CARG1, GL, #GG_G2J | ||
1973 | | mov CARG2, sp | ||
1974 | | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) | ||
1975 | | // Returns MULTRES (unscaled) or negated error code. | ||
1976 | | ldr CARG2, L->cframe | ||
1977 | | ldr BASE, L->base | ||
1978 | | and sp, CARG2, #CFRAME_RAWMASK | ||
1979 | | ldr PC, SAVE_PC // Get SAVE_PC. | ||
1980 | | str L, SAVE_L // Set SAVE_L (on-trace resume/yield). | ||
1981 | | b >1 | ||
1982 | |.endif | ||
1983 | | | ||
1984 | |->vm_exit_interp: | ||
1985 | | // CARG1 = MULTRES or negated error code, BASE, PC and GL set. | ||
1986 | |.if JIT | ||
1987 | | ldr L, SAVE_L | ||
1988 | |1: | ||
1989 | | cmp CARG1w, #0 | ||
1990 | | blt >9 // Check for error from exit. | ||
1991 | | lsl RC, CARG1, #3 | ||
1992 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | ||
1993 | | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 | ||
1994 | | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | ||
1995 | | movn TISNIL, #0 | ||
1996 | | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | ||
1997 | | str RCw, SAVE_MULTRES | ||
1998 | | str BASE, L->base | ||
1999 | | ldr CARG2, LFUNC:CARG2->pc | ||
2000 | | str xzr, GL->jit_base | ||
2001 | | mv_vmstate CARG4w, INTERP | ||
2002 | | ldr KBASE, [CARG2, #PC2PROTO(k)] | ||
2003 | | // Modified copy of ins_next which handles function header dispatch, too. | ||
2004 | | ldrb RBw, [PC, # OFS_OP] | ||
2005 | | ldr INSw, [PC], #4 | ||
2006 | | st_vmstate CARG4w | ||
2007 | | cmp RBw, #BC_FUNCC+2 // Fast function? | ||
2008 | | add TMP1, GL, INS, uxtb #3 | ||
2009 | | bhs >4 | ||
2010 | |2: | ||
2011 | | cmp RBw, #BC_FUNCF // Function header? | ||
2012 | | add TMP0, GL, RB, uxtb #3 | ||
2013 | | ldr RB, [TMP0, #GG_G2DISP] | ||
2014 | | decode_RA RA, INS | ||
2015 | | lsr TMP0, INS, #16 | ||
2016 | | csel RC, TMP0, RC, lo | ||
2017 | | blo >5 | ||
2018 | | ldr CARG3, [BASE, FRAME_FUNC] | ||
2019 | | sub RC, RC, #8 | ||
2020 | | add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8 | ||
2021 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
2022 | |5: | ||
2023 | | br RB | ||
2024 | | | ||
2025 | |4: // Check frame below fast function. | ||
2026 | | ldr CARG1, [BASE, FRAME_PC] | ||
2027 | | ands CARG2, CARG1, #FRAME_TYPE | ||
2028 | | bne <2 // Trace stitching continuation? | ||
2029 | | // Otherwise set KBASE for Lua function below fast function. | ||
2030 | | ldr CARG3w, [CARG1, #-4] | ||
2031 | | decode_RA CARG1, CARG3 | ||
2032 | | sub CARG2, BASE, CARG1, lsl #3 | ||
2033 | | ldr LFUNC:CARG3, [CARG2, #-32] | ||
2034 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
2035 | | ldr CARG3, LFUNC:CARG3->pc | ||
2036 | | ldr KBASE, [CARG3, #PC2PROTO(k)] | ||
2037 | | b <2 | ||
2038 | | | ||
2039 | |9: // Rethrow error from the right C frame. | ||
2040 | | neg CARG2w, CARG1w | ||
2041 | | mov CARG1, L | ||
2042 | | bl extern lj_err_trace // (lua_State *L, int errcode) | ||
2043 | |.endif | ||
2044 | | | ||
2045 | |//----------------------------------------------------------------------- | ||
2046 | |//-- Math helper functions ---------------------------------------------- | ||
2047 | |//----------------------------------------------------------------------- | ||
2048 | | | ||
2049 | | // int lj_vm_modi(int dividend, int divisor); | ||
2050 | |->vm_modi: | ||
2051 | | eor CARG4w, CARG1w, CARG2w | ||
2052 | | cmp CARG4w, #0 | ||
2053 | | eor CARG3w, CARG1w, CARG1w, asr #31 | ||
2054 | | eor CARG4w, CARG2w, CARG2w, asr #31 | ||
2055 | | sub CARG3w, CARG3w, CARG1w, asr #31 | ||
2056 | | sub CARG4w, CARG4w, CARG2w, asr #31 | ||
2057 | | udiv CARG1w, CARG3w, CARG4w | ||
2058 | | msub CARG1w, CARG1w, CARG4w, CARG3w | ||
2059 | | ccmp CARG1w, #0, #4, mi | ||
2060 | | sub CARG3w, CARG1w, CARG4w | ||
2061 | | csel CARG1w, CARG1w, CARG3w, eq | ||
2062 | | eor CARG3w, CARG1w, CARG2w | ||
2063 | | cmp CARG3w, #0 | ||
2064 | | cneg CARG1w, CARG1w, mi | ||
2065 | | ret | ||
2066 | | | ||
2067 | |//----------------------------------------------------------------------- | ||
2068 | |//-- Miscellaneous functions -------------------------------------------- | ||
2069 | |//----------------------------------------------------------------------- | ||
2070 | | | ||
2071 | |.define NEXT_TAB, TAB:CARG1 | ||
2072 | |.define NEXT_RES, CARG1 | ||
2073 | |.define NEXT_IDX, CARG2w | ||
2074 | |.define NEXT_LIM, CARG3w | ||
2075 | |.define NEXT_TMP0, TMP0 | ||
2076 | |.define NEXT_TMP0w, TMP0w | ||
2077 | |.define NEXT_TMP1, TMP1 | ||
2078 | |.define NEXT_TMP1w, TMP1w | ||
2079 | |.define NEXT_RES_PTR, sp | ||
2080 | |.define NEXT_RES_VAL, [sp] | ||
2081 | |.define NEXT_RES_KEY, [sp, #8] | ||
2082 | | | ||
2083 | |// TValue *lj_vm_next(GCtab *t, uint32_t idx) | ||
2084 | |// Next idx returned in CRET2w. | ||
2085 | |->vm_next: | ||
2086 | |.if JIT | ||
2087 | | ldr NEXT_LIM, NEXT_TAB->asize | ||
2088 | | ldr NEXT_TMP1, NEXT_TAB->array | ||
2089 | |1: // Traverse array part. | ||
2090 | | subs NEXT_TMP0w, NEXT_IDX, NEXT_LIM | ||
2091 | | bhs >5 // Index points after array part? | ||
2092 | | ldr NEXT_TMP0, [NEXT_TMP1, NEXT_IDX, uxtw #3] | ||
2093 | | cmn NEXT_TMP0, #-LJ_TNIL | ||
2094 | | cinc NEXT_IDX, NEXT_IDX, eq | ||
2095 | | beq <1 // Skip holes in array part. | ||
2096 | | str NEXT_TMP0, NEXT_RES_VAL | ||
2097 | | movz NEXT_TMP0w, #(LJ_TISNUM>>1)&0xffff, lsl #16 | ||
2098 | | stp NEXT_IDX, NEXT_TMP0w, NEXT_RES_KEY | ||
2099 | | add NEXT_IDX, NEXT_IDX, #1 | ||
2100 | | mov NEXT_RES, NEXT_RES_PTR | ||
2101 | |4: | ||
2102 | | ret | ||
2103 | | | ||
2104 | |5: // Traverse hash part. | ||
2105 | | ldr NEXT_TMP1w, NEXT_TAB->hmask | ||
2106 | | ldr NODE:NEXT_RES, NEXT_TAB->node | ||
2107 | | add NEXT_TMP0w, NEXT_TMP0w, NEXT_TMP0w, lsl #1 | ||
2108 | | add NEXT_LIM, NEXT_LIM, NEXT_TMP1w | ||
2109 | | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP0w, uxtw #3 | ||
2110 | |6: | ||
2111 | | cmp NEXT_IDX, NEXT_LIM | ||
2112 | | bhi >9 | ||
2113 | | ldr NEXT_TMP0, NODE:NEXT_RES->val | ||
2114 | | cmn NEXT_TMP0, #-LJ_TNIL | ||
2115 | | add NEXT_IDX, NEXT_IDX, #1 | ||
2116 | | bne <4 | ||
2117 | | // Skip holes in hash part. | ||
2118 | | add NODE:NEXT_RES, NODE:NEXT_RES, #sizeof(Node) | ||
2119 | | b <6 | ||
2120 | | | ||
2121 | |9: // End of iteration. Set the key to nil (not the value). | ||
2122 | | movn NEXT_TMP0, #0 | ||
2123 | | str NEXT_TMP0, NEXT_RES_KEY | ||
2124 | | mov NEXT_RES, NEXT_RES_PTR | ||
2125 | | ret | ||
2126 | |.endif | ||
2127 | | | ||
2128 | |//----------------------------------------------------------------------- | ||
2129 | |//-- FFI helper functions ----------------------------------------------- | ||
2130 | |//----------------------------------------------------------------------- | ||
2131 | | | ||
2132 | |// Handler for callback functions. | ||
2133 | |// Saveregs already performed. Callback slot number in [sp], g in r12. | ||
2134 | |->vm_ffi_callback: | ||
2135 | |.if FFI | ||
2136 | |.type CTSTATE, CTState, PC | ||
2137 | | saveregs | ||
2138 | | ldr CTSTATE, GL:x10->ctype_state | ||
2139 | | mov GL, x10 | ||
2140 | | add x10, sp, # CFRAME_SPACE | ||
2141 | | str w9, CTSTATE->cb.slot | ||
2142 | | stp x0, x1, CTSTATE->cb.gpr[0] | ||
2143 | | stp d0, d1, CTSTATE->cb.fpr[0] | ||
2144 | | stp x2, x3, CTSTATE->cb.gpr[2] | ||
2145 | | stp d2, d3, CTSTATE->cb.fpr[2] | ||
2146 | | stp x4, x5, CTSTATE->cb.gpr[4] | ||
2147 | | stp d4, d5, CTSTATE->cb.fpr[4] | ||
2148 | | stp x6, x7, CTSTATE->cb.gpr[6] | ||
2149 | | stp d6, d7, CTSTATE->cb.fpr[6] | ||
2150 | | str x10, CTSTATE->cb.stack | ||
2151 | | mov CARG1, CTSTATE | ||
2152 | | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok. | ||
2153 | | mov CARG2, sp | ||
2154 | | bl extern lj_ccallback_enter // (CTState *cts, void *cf) | ||
2155 | | // Returns lua_State *. | ||
2156 | | ldp BASE, RC, L:CRET1->base | ||
2157 | | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 | ||
2158 | | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | ||
2159 | | movn TISNIL, #0 | ||
2160 | | mov L, CRET1 | ||
2161 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] | ||
2162 | | sub RC, RC, BASE | ||
2163 | | st_vmstate ST_INTERP | ||
2164 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
2165 | | ins_callt | ||
2166 | |.endif | ||
2167 | | | ||
2168 | |->cont_ffi_callback: // Return from FFI callback. | ||
2169 | |.if FFI | ||
2170 | | ldr CTSTATE, GL->ctype_state | ||
2171 | | stp BASE, CARG4, L->base | ||
2172 | | str L, CTSTATE->L | ||
2173 | | mov CARG1, CTSTATE | ||
2174 | | mov CARG2, RA | ||
2175 | | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) | ||
2176 | | ldp x0, x1, CTSTATE->cb.gpr[0] | ||
2177 | | ldp d0, d1, CTSTATE->cb.fpr[0] | ||
2178 | | b ->vm_leave_unw | ||
2179 | |.endif | ||
2180 | | | ||
2181 | |->vm_ffi_call: // Call C function via FFI. | ||
2182 | | // Caveat: needs special frame unwinding, see below. | ||
2183 | |.if FFI | ||
2184 | | .type CCSTATE, CCallState, x19 | ||
2185 | | stp x20, CCSTATE, [sp, #-32]! | ||
2186 | | stp fp, lr, [sp, #16] | ||
2187 | | add fp, sp, #16 | ||
2188 | | mov CCSTATE, x0 | ||
2189 | | ldr TMP0w, CCSTATE:x0->spadj | ||
2190 | | ldrb TMP1w, CCSTATE->nsp | ||
2191 | | add TMP2, CCSTATE, #offsetof(CCallState, stack) | ||
2192 | | subs TMP1, TMP1, #1 | ||
2193 | | ldr TMP3, CCSTATE->func | ||
2194 | | sub sp, sp, TMP0 | ||
2195 | | bmi >2 | ||
2196 | |1: // Copy stack slots | ||
2197 | | ldr TMP0, [TMP2, TMP1, lsl #3] | ||
2198 | | str TMP0, [sp, TMP1, lsl #3] | ||
2199 | | subs TMP1, TMP1, #1 | ||
2200 | | bpl <1 | ||
2201 | |2: | ||
2202 | | ldp x0, x1, CCSTATE->gpr[0] | ||
2203 | | ldp d0, d1, CCSTATE->fpr[0] | ||
2204 | | ldp x2, x3, CCSTATE->gpr[2] | ||
2205 | | ldp d2, d3, CCSTATE->fpr[2] | ||
2206 | | ldp x4, x5, CCSTATE->gpr[4] | ||
2207 | | ldp d4, d5, CCSTATE->fpr[4] | ||
2208 | | ldp x6, x7, CCSTATE->gpr[6] | ||
2209 | | ldp d6, d7, CCSTATE->fpr[6] | ||
2210 | | ldr x8, CCSTATE->retp | ||
2211 | | blr TMP3 | ||
2212 | | sub sp, fp, #16 | ||
2213 | | stp x0, x1, CCSTATE->gpr[0] | ||
2214 | | stp d0, d1, CCSTATE->fpr[0] | ||
2215 | | stp d2, d3, CCSTATE->fpr[2] | ||
2216 | | ldp fp, lr, [sp, #16] | ||
2217 | | ldp x20, CCSTATE, [sp], #32 | ||
2218 | | ret | ||
2219 | |.endif | ||
2220 | |// Note: vm_ffi_call must be the last function in this object file! | ||
2221 | | | ||
2222 | |//----------------------------------------------------------------------- | ||
2223 | } | ||
2224 | |||
2225 | /* Generate the code for a single instruction. */ | ||
2226 | static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ||
2227 | { | ||
2228 | int vk = 0; | ||
2229 | |=>defop: | ||
2230 | |||
2231 | switch (op) { | ||
2232 | |||
2233 | /* -- Comparison ops ---------------------------------------------------- */ | ||
2234 | |||
2235 | /* Remember: all ops branch for a true comparison, fall through otherwise. */ | ||
2236 | |||
2237 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | ||
2238 | | // RA = src1, RC = src2, JMP with RC = target | ||
2239 | | ldr CARG1, [BASE, RA, lsl #3] | ||
2240 | | ldrh RBw, [PC, # OFS_RD] | ||
2241 | | ldr CARG2, [BASE, RC, lsl #3] | ||
2242 | | add PC, PC, #4 | ||
2243 | | add RB, PC, RB, lsl #2 | ||
2244 | | sub RB, RB, #0x20000 | ||
2245 | | checkint CARG1, >3 | ||
2246 | | checkint CARG2, >4 | ||
2247 | | cmp CARG1w, CARG2w | ||
2248 | if (op == BC_ISLT) { | ||
2249 | | csel PC, RB, PC, lt | ||
2250 | } else if (op == BC_ISGE) { | ||
2251 | | csel PC, RB, PC, ge | ||
2252 | } else if (op == BC_ISLE) { | ||
2253 | | csel PC, RB, PC, le | ||
2254 | } else { | ||
2255 | | csel PC, RB, PC, gt | ||
2256 | } | ||
2257 | |1: | ||
2258 | | ins_next | ||
2259 | | | ||
2260 | |3: // RA not int. | ||
2261 | | ldr FARG1, [BASE, RA, lsl #3] | ||
2262 | | blo ->vmeta_comp | ||
2263 | | ldr FARG2, [BASE, RC, lsl #3] | ||
2264 | | cmp TISNUMhi, CARG2, lsr #32 | ||
2265 | | bhi >5 | ||
2266 | | bne ->vmeta_comp | ||
2267 | | // RA number, RC int. | ||
2268 | | scvtf FARG2, CARG2w | ||
2269 | | b >5 | ||
2270 | | | ||
2271 | |4: // RA int, RC not int | ||
2272 | | ldr FARG2, [BASE, RC, lsl #3] | ||
2273 | | blo ->vmeta_comp | ||
2274 | | // RA int, RC number. | ||
2275 | | scvtf FARG1, CARG1w | ||
2276 | | | ||
2277 | |5: // RA number, RC number | ||
2278 | | fcmp FARG1, FARG2 | ||
2279 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. | ||
2280 | if (op == BC_ISLT) { | ||
2281 | | csel PC, RB, PC, lo | ||
2282 | } else if (op == BC_ISGE) { | ||
2283 | | csel PC, RB, PC, hs | ||
2284 | } else if (op == BC_ISLE) { | ||
2285 | | csel PC, RB, PC, ls | ||
2286 | } else { | ||
2287 | | csel PC, RB, PC, hi | ||
2288 | } | ||
2289 | | b <1 | ||
2290 | break; | ||
2291 | |||
2292 | case BC_ISEQV: case BC_ISNEV: | ||
2293 | vk = op == BC_ISEQV; | ||
2294 | | // RA = src1, RC = src2, JMP with RC = target | ||
2295 | | ldr CARG1, [BASE, RA, lsl #3] | ||
2296 | | add RC, BASE, RC, lsl #3 | ||
2297 | | ldrh RBw, [PC, # OFS_RD] | ||
2298 | | ldr CARG3, [RC] | ||
2299 | | add PC, PC, #4 | ||
2300 | | add RB, PC, RB, lsl #2 | ||
2301 | | sub RB, RB, #0x20000 | ||
2302 | | asr ITYPE, CARG3, #47 | ||
2303 | | cmn ITYPE, #-LJ_TISNUM | ||
2304 | if (vk) { | ||
2305 | | bls ->BC_ISEQN_Z | ||
2306 | } else { | ||
2307 | | bls ->BC_ISNEN_Z | ||
2308 | } | ||
2309 | | // RC is not a number. | ||
2310 | | asr TMP0, CARG1, #47 | ||
2311 | |.if FFI | ||
2312 | | // Check if RC or RA is a cdata. | ||
2313 | | cmn ITYPE, #-LJ_TCDATA | ||
2314 | | ccmn TMP0, #-LJ_TCDATA, #4, ne | ||
2315 | | beq ->vmeta_equal_cd | ||
2316 | |.endif | ||
2317 | | cmp CARG1, CARG3 | ||
2318 | | bne >2 | ||
2319 | | // Tag and value are equal. | ||
2320 | if (vk) { | ||
2321 | |->BC_ISEQV_Z: | ||
2322 | | mov PC, RB // Perform branch. | ||
2323 | } | ||
2324 | |1: | ||
2325 | | ins_next | ||
2326 | | | ||
2327 | |2: // Check if the tags are the same and it's a table or userdata. | ||
2328 | | cmp ITYPE, TMP0 | ||
2329 | | ccmn ITYPE, #-LJ_TISTABUD, #2, eq | ||
2330 | if (vk) { | ||
2331 | | bhi <1 | ||
2332 | } else { | ||
2333 | | bhi ->BC_ISEQV_Z // Reuse code from opposite instruction. | ||
2334 | } | ||
2335 | | // Different tables or userdatas. Need to check __eq metamethod. | ||
2336 | | // Field metatable must be at same offset for GCtab and GCudata! | ||
2337 | | and TAB:CARG2, CARG1, #LJ_GCVMASK | ||
2338 | | ldr TAB:TMP2, TAB:CARG2->metatable | ||
2339 | if (vk) { | ||
2340 | | cbz TAB:TMP2, <1 // No metatable? | ||
2341 | | ldrb TMP1w, TAB:TMP2->nomm | ||
2342 | | mov CARG4, #0 // ne = 0 | ||
2343 | | tbnz TMP1w, #MM_eq, <1 // 'no __eq' flag set: done. | ||
2344 | } else { | ||
2345 | | cbz TAB:TMP2, ->BC_ISEQV_Z // No metatable? | ||
2346 | | ldrb TMP1w, TAB:TMP2->nomm | ||
2347 | | mov CARG4, #1 // ne = 1. | ||
2348 | | tbnz TMP1w, #MM_eq, ->BC_ISEQV_Z // 'no __eq' flag set: done. | ||
2349 | } | ||
2350 | | b ->vmeta_equal | ||
2351 | break; | ||
2352 | |||
2353 | case BC_ISEQS: case BC_ISNES: | ||
2354 | vk = op == BC_ISEQS; | ||
2355 | | // RA = src, RC = str_const (~), JMP with RC = target | ||
2356 | | ldr CARG1, [BASE, RA, lsl #3] | ||
2357 | | mvn RC, RC | ||
2358 | | ldrh RBw, [PC, # OFS_RD] | ||
2359 | | ldr CARG2, [KBASE, RC, lsl #3] | ||
2360 | | add PC, PC, #4 | ||
2361 | | movn TMP0, #~LJ_TSTR | ||
2362 | |.if FFI | ||
2363 | | asr ITYPE, CARG1, #47 | ||
2364 | |.endif | ||
2365 | | add RB, PC, RB, lsl #2 | ||
2366 | | add CARG2, CARG2, TMP0, lsl #47 | ||
2367 | | sub RB, RB, #0x20000 | ||
2368 | |.if FFI | ||
2369 | | cmn ITYPE, #-LJ_TCDATA | ||
2370 | | beq ->vmeta_equal_cd | ||
2371 | |.endif | ||
2372 | | cmp CARG1, CARG2 | ||
2373 | if (vk) { | ||
2374 | | csel PC, RB, PC, eq | ||
2375 | } else { | ||
2376 | | csel PC, RB, PC, ne | ||
2377 | } | ||
2378 | | ins_next | ||
2379 | break; | ||
2380 | |||
2381 | case BC_ISEQN: case BC_ISNEN: | ||
2382 | vk = op == BC_ISEQN; | ||
2383 | | // RA = src, RC = num_const (~), JMP with RC = target | ||
2384 | | ldr CARG1, [BASE, RA, lsl #3] | ||
2385 | | add RC, KBASE, RC, lsl #3 | ||
2386 | | ldrh RBw, [PC, # OFS_RD] | ||
2387 | | ldr CARG3, [RC] | ||
2388 | | add PC, PC, #4 | ||
2389 | | add RB, PC, RB, lsl #2 | ||
2390 | | sub RB, RB, #0x20000 | ||
2391 | if (vk) { | ||
2392 | |->BC_ISEQN_Z: | ||
2393 | } else { | ||
2394 | |->BC_ISNEN_Z: | ||
2395 | } | ||
2396 | | checkint CARG1, >4 | ||
2397 | | checkint CARG3, >6 | ||
2398 | | cmp CARG1w, CARG3w | ||
2399 | |1: | ||
2400 | if (vk) { | ||
2401 | | csel PC, RB, PC, eq | ||
2402 | |2: | ||
2403 | } else { | ||
2404 | |2: | ||
2405 | | csel PC, RB, PC, ne | ||
2406 | } | ||
2407 | |3: | ||
2408 | | ins_next | ||
2409 | | | ||
2410 | |4: // RA not int. | ||
2411 | |.if FFI | ||
2412 | | blo >7 | ||
2413 | |.else | ||
2414 | | blo <2 | ||
2415 | |.endif | ||
2416 | | ldr FARG1, [BASE, RA, lsl #3] | ||
2417 | | ldr FARG2, [RC] | ||
2418 | | cmp TISNUMhi, CARG3, lsr #32 | ||
2419 | | bne >5 | ||
2420 | | // RA number, RC int. | ||
2421 | | scvtf FARG2, CARG3w | ||
2422 | |5: | ||
2423 | | // RA number, RC number. | ||
2424 | | fcmp FARG1, FARG2 | ||
2425 | | b <1 | ||
2426 | | | ||
2427 | |6: // RA int, RC number | ||
2428 | | ldr FARG2, [RC] | ||
2429 | | scvtf FARG1, CARG1w | ||
2430 | | fcmp FARG1, FARG2 | ||
2431 | | b <1 | ||
2432 | | | ||
2433 | |.if FFI | ||
2434 | |7: | ||
2435 | | asr ITYPE, CARG1, #47 | ||
2436 | | cmn ITYPE, #-LJ_TCDATA | ||
2437 | | bne <2 | ||
2438 | | b ->vmeta_equal_cd | ||
2439 | |.endif | ||
2440 | break; | ||
2441 | |||
2442 | case BC_ISEQP: case BC_ISNEP: | ||
2443 | vk = op == BC_ISEQP; | ||
2444 | | // RA = src, RC = primitive_type (~), JMP with RC = target | ||
2445 | | ldr TMP0, [BASE, RA, lsl #3] | ||
2446 | | ldrh RBw, [PC, # OFS_RD] | ||
2447 | | add PC, PC, #4 | ||
2448 | | add RC, RC, #1 | ||
2449 | | add RB, PC, RB, lsl #2 | ||
2450 | |.if FFI | ||
2451 | | asr ITYPE, TMP0, #47 | ||
2452 | | cmn ITYPE, #-LJ_TCDATA | ||
2453 | | beq ->vmeta_equal_cd | ||
2454 | | cmn RC, ITYPE | ||
2455 | |.else | ||
2456 | | cmn RC, TMP0, asr #47 | ||
2457 | |.endif | ||
2458 | | sub RB, RB, #0x20000 | ||
2459 | if (vk) { | ||
2460 | | csel PC, RB, PC, eq | ||
2461 | } else { | ||
2462 | | csel PC, RB, PC, ne | ||
2463 | } | ||
2464 | | ins_next | ||
2465 | break; | ||
2466 | |||
2467 | /* -- Unary test and copy ops ------------------------------------------- */ | ||
2468 | |||
2469 | case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: | ||
2470 | | // RA = dst or unused, RC = src, JMP with RC = target | ||
2471 | | ldrh RBw, [PC, # OFS_RD] | ||
2472 | | ldr TMP0, [BASE, RC, lsl #3] | ||
2473 | | add PC, PC, #4 | ||
2474 | | mov_false TMP1 | ||
2475 | | add RB, PC, RB, lsl #2 | ||
2476 | | cmp TMP0, TMP1 | ||
2477 | | sub RB, RB, #0x20000 | ||
2478 | if (op == BC_ISTC || op == BC_IST) { | ||
2479 | if (op == BC_ISTC) { | ||
2480 | | csel RA, RA, RC, lo | ||
2481 | } | ||
2482 | | csel PC, RB, PC, lo | ||
2483 | } else { | ||
2484 | if (op == BC_ISFC) { | ||
2485 | | csel RA, RA, RC, hs | ||
2486 | } | ||
2487 | | csel PC, RB, PC, hs | ||
2488 | } | ||
2489 | if (op == BC_ISTC || op == BC_ISFC) { | ||
2490 | | str TMP0, [BASE, RA, lsl #3] | ||
2491 | } | ||
2492 | | ins_next | ||
2493 | break; | ||
2494 | |||
2495 | case BC_ISTYPE: | ||
2496 | | // RA = src, RC = -type | ||
2497 | | ldr TMP0, [BASE, RA, lsl #3] | ||
2498 | | cmn RC, TMP0, asr #47 | ||
2499 | | bne ->vmeta_istype | ||
2500 | | ins_next | ||
2501 | break; | ||
2502 | case BC_ISNUM: | ||
2503 | | // RA = src, RC = -(TISNUM-1) | ||
2504 | | ldr TMP0, [BASE, RA] | ||
2505 | | checknum TMP0, ->vmeta_istype | ||
2506 | | ins_next | ||
2507 | break; | ||
2508 | |||
2509 | /* -- Unary ops --------------------------------------------------------- */ | ||
2510 | |||
2511 | case BC_MOV: | ||
2512 | | // RA = dst, RC = src | ||
2513 | | ldr TMP0, [BASE, RC, lsl #3] | ||
2514 | | str TMP0, [BASE, RA, lsl #3] | ||
2515 | | ins_next | ||
2516 | break; | ||
2517 | case BC_NOT: | ||
2518 | | // RA = dst, RC = src | ||
2519 | | ldr TMP0, [BASE, RC, lsl #3] | ||
2520 | | mov_false TMP1 | ||
2521 | | mov_true TMP2 | ||
2522 | | cmp TMP0, TMP1 | ||
2523 | | csel TMP0, TMP1, TMP2, lo | ||
2524 | | str TMP0, [BASE, RA, lsl #3] | ||
2525 | | ins_next | ||
2526 | break; | ||
2527 | case BC_UNM: | ||
2528 | | // RA = dst, RC = src | ||
2529 | | ldr TMP0, [BASE, RC, lsl #3] | ||
2530 | | asr ITYPE, TMP0, #47 | ||
2531 | | cmn ITYPE, #-LJ_TISNUM | ||
2532 | | bhi ->vmeta_unm | ||
2533 | | eor TMP0, TMP0, #U64x(80000000,00000000) | ||
2534 | | bne >5 | ||
2535 | | negs TMP0w, TMP0w | ||
2536 | | movz CARG3, #0x41e0, lsl #48 // 2^31. | ||
2537 | | add TMP0, TMP0, TISNUM | ||
2538 | | csel TMP0, TMP0, CARG3, vc | ||
2539 | |5: | ||
2540 | | str TMP0, [BASE, RA, lsl #3] | ||
2541 | | ins_next | ||
2542 | break; | ||
2543 | case BC_LEN: | ||
2544 | | // RA = dst, RC = src | ||
2545 | | ldr CARG1, [BASE, RC, lsl #3] | ||
2546 | | asr ITYPE, CARG1, #47 | ||
2547 | | cmn ITYPE, #-LJ_TSTR | ||
2548 | | and CARG1, CARG1, #LJ_GCVMASK | ||
2549 | | bne >2 | ||
2550 | | ldr CARG1w, STR:CARG1->len | ||
2551 | |1: | ||
2552 | | add CARG1, CARG1, TISNUM | ||
2553 | | str CARG1, [BASE, RA, lsl #3] | ||
2554 | | ins_next | ||
2555 | | | ||
2556 | |2: | ||
2557 | | cmn ITYPE, #-LJ_TTAB | ||
2558 | | bne ->vmeta_len | ||
2559 | #if LJ_52 | ||
2560 | | ldr TAB:CARG2, TAB:CARG1->metatable | ||
2561 | | cbnz TAB:CARG2, >9 | ||
2562 | |3: | ||
2563 | #endif | ||
2564 | |->BC_LEN_Z: | ||
2565 | | bl extern lj_tab_len // (GCtab *t) | ||
2566 | | // Returns uint32_t (but less than 2^31). | ||
2567 | | b <1 | ||
2568 | | | ||
2569 | #if LJ_52 | ||
2570 | |9: | ||
2571 | | ldrb TMP1w, TAB:CARG2->nomm | ||
2572 | | tbnz TMP1w, #MM_len, <3 // 'no __len' flag set: done. | ||
2573 | | b ->vmeta_len | ||
2574 | #endif | ||
2575 | break; | ||
2576 | |||
2577 | /* -- Binary ops -------------------------------------------------------- */ | ||
2578 | |||
2579 | |.macro ins_arithcheck_int, target | ||
2580 | | checkint CARG1, target | ||
2581 | | checkint CARG2, target | ||
2582 | |.endmacro | ||
2583 | | | ||
2584 | |.macro ins_arithcheck_num, target | ||
2585 | | checknum CARG1, target | ||
2586 | | checknum CARG2, target | ||
2587 | |.endmacro | ||
2588 | | | ||
2589 | |.macro ins_arithcheck_nzdiv, target | ||
2590 | | cbz CARG2w, target | ||
2591 | |.endmacro | ||
2592 | | | ||
2593 | |.macro ins_arithhead | ||
2594 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | ||
2595 | ||if (vk == 1) { | ||
2596 | | and RC, RC, #255 | ||
2597 | | decode_RB RB, INS | ||
2598 | ||} else { | ||
2599 | | decode_RB RB, INS | ||
2600 | | and RC, RC, #255 | ||
2601 | ||} | ||
2602 | |.endmacro | ||
2603 | | | ||
2604 | |.macro ins_arithload, reg1, reg2 | ||
2605 | | // RA = dst, RB = src1, RC = src2 | num_const | ||
2606 | ||switch (vk) { | ||
2607 | ||case 0: | ||
2608 | | ldr reg1, [BASE, RB, lsl #3] | ||
2609 | | ldr reg2, [KBASE, RC, lsl #3] | ||
2610 | || break; | ||
2611 | ||case 1: | ||
2612 | | ldr reg1, [KBASE, RC, lsl #3] | ||
2613 | | ldr reg2, [BASE, RB, lsl #3] | ||
2614 | || break; | ||
2615 | ||default: | ||
2616 | | ldr reg1, [BASE, RB, lsl #3] | ||
2617 | | ldr reg2, [BASE, RC, lsl #3] | ||
2618 | || break; | ||
2619 | ||} | ||
2620 | |.endmacro | ||
2621 | | | ||
2622 | |.macro ins_arithfallback, ins | ||
2623 | ||switch (vk) { | ||
2624 | ||case 0: | ||
2625 | | ins ->vmeta_arith_vn | ||
2626 | || break; | ||
2627 | ||case 1: | ||
2628 | | ins ->vmeta_arith_nv | ||
2629 | || break; | ||
2630 | ||default: | ||
2631 | | ins ->vmeta_arith_vv | ||
2632 | || break; | ||
2633 | ||} | ||
2634 | |.endmacro | ||
2635 | | | ||
2636 | |.macro ins_arithmod, res, reg1, reg2 | ||
2637 | | fdiv d2, reg1, reg2 | ||
2638 | | frintm d2, d2 | ||
2639 | | fmsub res, d2, reg2, reg1 | ||
2640 | |.endmacro | ||
2641 | | | ||
2642 | |.macro ins_arithdn, intins, fpins | ||
2643 | | ins_arithhead | ||
2644 | | ins_arithload CARG1, CARG2 | ||
2645 | | ins_arithcheck_int >5 | ||
2646 | |.if "intins" == "smull" | ||
2647 | | smull CARG1, CARG1w, CARG2w | ||
2648 | | cmp CARG1, CARG1, sxtw | ||
2649 | | mov CARG1w, CARG1w | ||
2650 | | ins_arithfallback bne | ||
2651 | |.elif "intins" == "ins_arithmodi" | ||
2652 | | ins_arithfallback ins_arithcheck_nzdiv | ||
2653 | | bl ->vm_modi | ||
2654 | |.else | ||
2655 | | intins CARG1w, CARG1w, CARG2w | ||
2656 | | ins_arithfallback bvs | ||
2657 | |.endif | ||
2658 | | add CARG1, CARG1, TISNUM | ||
2659 | | str CARG1, [BASE, RA, lsl #3] | ||
2660 | |4: | ||
2661 | | ins_next | ||
2662 | | | ||
2663 | |5: // FP variant. | ||
2664 | | ins_arithload FARG1, FARG2 | ||
2665 | | ins_arithfallback ins_arithcheck_num | ||
2666 | | fpins FARG1, FARG1, FARG2 | ||
2667 | | str FARG1, [BASE, RA, lsl #3] | ||
2668 | | b <4 | ||
2669 | |.endmacro | ||
2670 | | | ||
2671 | |.macro ins_arithfp, fpins | ||
2672 | | ins_arithhead | ||
2673 | | ins_arithload CARG1, CARG2 | ||
2674 | | ins_arithload FARG1, FARG2 | ||
2675 | | ins_arithfallback ins_arithcheck_num | ||
2676 | |.if "fpins" == "fpow" | ||
2677 | | bl extern pow | ||
2678 | |.else | ||
2679 | | fpins FARG1, FARG1, FARG2 | ||
2680 | |.endif | ||
2681 | | str FARG1, [BASE, RA, lsl #3] | ||
2682 | | ins_next | ||
2683 | |.endmacro | ||
2684 | |||
2685 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | ||
2686 | | ins_arithdn adds, fadd | ||
2687 | break; | ||
2688 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | ||
2689 | | ins_arithdn subs, fsub | ||
2690 | break; | ||
2691 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | ||
2692 | | ins_arithdn smull, fmul | ||
2693 | break; | ||
2694 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | ||
2695 | | ins_arithfp fdiv | ||
2696 | break; | ||
2697 | case BC_MODVN: case BC_MODNV: case BC_MODVV: | ||
2698 | | ins_arithdn ins_arithmodi, ins_arithmod | ||
2699 | break; | ||
2700 | case BC_POW: | ||
2701 | | // NYI: (partial) integer arithmetic. | ||
2702 | | ins_arithfp fpow | ||
2703 | break; | ||
2704 | |||
2705 | case BC_CAT: | ||
2706 | | decode_RB RB, INS | ||
2707 | | and RC, RC, #255 | ||
2708 | | // RA = dst, RB = src_start, RC = src_end | ||
2709 | | str BASE, L->base | ||
2710 | | sub CARG3, RC, RB | ||
2711 | | add CARG2, BASE, RC, lsl #3 | ||
2712 | |->BC_CAT_Z: | ||
2713 | | // RA = dst, CARG2 = top-1, CARG3 = left | ||
2714 | | mov CARG1, L | ||
2715 | | str PC, SAVE_PC | ||
2716 | | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) | ||
2717 | | // Returns NULL (finished) or TValue * (metamethod). | ||
2718 | | ldrb RBw, [PC, #-4+OFS_RB] | ||
2719 | | ldr BASE, L->base | ||
2720 | | cbnz CRET1, ->vmeta_binop | ||
2721 | | ldr TMP0, [BASE, RB, lsl #3] | ||
2722 | | str TMP0, [BASE, RA, lsl #3] // Copy result to RA. | ||
2723 | | ins_next | ||
2724 | break; | ||
2725 | |||
2726 | /* -- Constant ops ------------------------------------------------------ */ | ||
2727 | |||
2728 | case BC_KSTR: | ||
2729 | | // RA = dst, RC = str_const (~) | ||
2730 | | mvn RC, RC | ||
2731 | | ldr TMP0, [KBASE, RC, lsl #3] | ||
2732 | | movn TMP1, #~LJ_TSTR | ||
2733 | | add TMP0, TMP0, TMP1, lsl #47 | ||
2734 | | str TMP0, [BASE, RA, lsl #3] | ||
2735 | | ins_next | ||
2736 | break; | ||
2737 | case BC_KCDATA: | ||
2738 | |.if FFI | ||
2739 | | // RA = dst, RC = cdata_const (~) | ||
2740 | | mvn RC, RC | ||
2741 | | ldr TMP0, [KBASE, RC, lsl #3] | ||
2742 | | movn TMP1, #~LJ_TCDATA | ||
2743 | | add TMP0, TMP0, TMP1, lsl #47 | ||
2744 | | str TMP0, [BASE, RA, lsl #3] | ||
2745 | | ins_next | ||
2746 | |.endif | ||
2747 | break; | ||
2748 | case BC_KSHORT: | ||
2749 | | // RA = dst, RC = int16_literal | ||
2750 | | sxth RCw, RCw | ||
2751 | | add TMP0, RC, TISNUM | ||
2752 | | str TMP0, [BASE, RA, lsl #3] | ||
2753 | | ins_next | ||
2754 | break; | ||
2755 | case BC_KNUM: | ||
2756 | | // RA = dst, RC = num_const | ||
2757 | | ldr TMP0, [KBASE, RC, lsl #3] | ||
2758 | | str TMP0, [BASE, RA, lsl #3] | ||
2759 | | ins_next | ||
2760 | break; | ||
2761 | case BC_KPRI: | ||
2762 | | // RA = dst, RC = primitive_type (~) | ||
2763 | | mvn TMP0, RC, lsl #47 | ||
2764 | | str TMP0, [BASE, RA, lsl #3] | ||
2765 | | ins_next | ||
2766 | break; | ||
2767 | case BC_KNIL: | ||
2768 | | // RA = base, RC = end | ||
2769 | | add RA, BASE, RA, lsl #3 | ||
2770 | | add RC, BASE, RC, lsl #3 | ||
2771 | | str TISNIL, [RA], #8 | ||
2772 | |1: | ||
2773 | | cmp RA, RC | ||
2774 | | str TISNIL, [RA], #8 | ||
2775 | | blt <1 | ||
2776 | | ins_next_ | ||
2777 | break; | ||
2778 | |||
2779 | /* -- Upvalue and function ops ------------------------------------------ */ | ||
2780 | |||
2781 | case BC_UGET: | ||
2782 | | // RA = dst, RC = uvnum | ||
2783 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | ||
2784 | | add RC, RC, #offsetof(GCfuncL, uvptr)/8 | ||
2785 | | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | ||
2786 | | ldr UPVAL:CARG2, [LFUNC:CARG2, RC, lsl #3] | ||
2787 | | ldr CARG2, UPVAL:CARG2->v | ||
2788 | | ldr TMP0, [CARG2] | ||
2789 | | str TMP0, [BASE, RA, lsl #3] | ||
2790 | | ins_next | ||
2791 | break; | ||
2792 | case BC_USETV: | ||
2793 | | // RA = uvnum, RC = src | ||
2794 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | ||
2795 | | add RA, RA, #offsetof(GCfuncL, uvptr)/8 | ||
2796 | | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | ||
2797 | | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3] | ||
2798 | | ldr CARG3, [BASE, RC, lsl #3] | ||
2799 | | ldr CARG2, UPVAL:CARG1->v | ||
2800 | | ldrb TMP2w, UPVAL:CARG1->marked | ||
2801 | | ldrb TMP0w, UPVAL:CARG1->closed | ||
2802 | | asr ITYPE, CARG3, #47 | ||
2803 | | str CARG3, [CARG2] | ||
2804 | | add ITYPE, ITYPE, #-LJ_TISGCV | ||
2805 | | tst TMP2w, #LJ_GC_BLACK // isblack(uv) | ||
2806 | | ccmp TMP0w, #0, #4, ne // && uv->closed | ||
2807 | | ccmn ITYPE, #-(LJ_TNUMX - LJ_TISGCV), #0, ne // && tvisgcv(v) | ||
2808 | | bhi >2 | ||
2809 | |1: | ||
2810 | | ins_next | ||
2811 | | | ||
2812 | |2: // Check if new value is white. | ||
2813 | | and GCOBJ:CARG3, CARG3, #LJ_GCVMASK | ||
2814 | | ldrb TMP1w, GCOBJ:CARG3->gch.marked | ||
2815 | | tst TMP1w, #LJ_GC_WHITES // iswhite(str) | ||
2816 | | beq <1 | ||
2817 | | // Crossed a write barrier. Move the barrier forward. | ||
2818 | | mov CARG1, GL | ||
2819 | | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) | ||
2820 | | b <1 | ||
2821 | break; | ||
2822 | case BC_USETS: | ||
2823 | | // RA = uvnum, RC = str_const (~) | ||
2824 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | ||
2825 | | add RA, RA, #offsetof(GCfuncL, uvptr)/8 | ||
2826 | | mvn RC, RC | ||
2827 | | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | ||
2828 | | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3] | ||
2829 | | ldr STR:CARG3, [KBASE, RC, lsl #3] | ||
2830 | | movn TMP0, #~LJ_TSTR | ||
2831 | | ldr CARG2, UPVAL:CARG1->v | ||
2832 | | ldrb TMP2w, UPVAL:CARG1->marked | ||
2833 | | add TMP0, STR:CARG3, TMP0, lsl #47 | ||
2834 | | ldrb TMP1w, STR:CARG3->marked | ||
2835 | | str TMP0, [CARG2] | ||
2836 | | tbnz TMP2w, #2, >2 // isblack(uv) | ||
2837 | |1: | ||
2838 | | ins_next | ||
2839 | | | ||
2840 | |2: // Check if string is white and ensure upvalue is closed. | ||
2841 | | ldrb TMP0w, UPVAL:CARG1->closed | ||
2842 | | tst TMP1w, #LJ_GC_WHITES // iswhite(str) | ||
2843 | | ccmp TMP0w, #0, #4, ne | ||
2844 | | beq <1 | ||
2845 | | // Crossed a write barrier. Move the barrier forward. | ||
2846 | | mov CARG1, GL | ||
2847 | | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) | ||
2848 | | b <1 | ||
2849 | break; | ||
2850 | case BC_USETN: | ||
2851 | | // RA = uvnum, RC = num_const | ||
2852 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | ||
2853 | | add RA, RA, #offsetof(GCfuncL, uvptr)/8 | ||
2854 | | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | ||
2855 | | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3] | ||
2856 | | ldr TMP0, [KBASE, RC, lsl #3] | ||
2857 | | ldr CARG2, UPVAL:CARG2->v | ||
2858 | | str TMP0, [CARG2] | ||
2859 | | ins_next | ||
2860 | break; | ||
2861 | case BC_USETP: | ||
2862 | | // RA = uvnum, RC = primitive_type (~) | ||
2863 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | ||
2864 | | add RA, RA, #offsetof(GCfuncL, uvptr)/8 | ||
2865 | | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | ||
2866 | | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3] | ||
2867 | | mvn TMP0, RC, lsl #47 | ||
2868 | | ldr CARG2, UPVAL:CARG2->v | ||
2869 | | str TMP0, [CARG2] | ||
2870 | | ins_next | ||
2871 | break; | ||
2872 | |||
2873 | case BC_UCLO: | ||
2874 | | // RA = level, RC = target | ||
2875 | | ldr CARG3, L->openupval | ||
2876 | | add RC, PC, RC, lsl #2 | ||
2877 | | str BASE, L->base | ||
2878 | | sub PC, RC, #0x20000 | ||
2879 | | cbz CARG3, >1 | ||
2880 | | mov CARG1, L | ||
2881 | | add CARG2, BASE, RA, lsl #3 | ||
2882 | | bl extern lj_func_closeuv // (lua_State *L, TValue *level) | ||
2883 | | ldr BASE, L->base | ||
2884 | |1: | ||
2885 | | ins_next | ||
2886 | break; | ||
2887 | |||
2888 | case BC_FNEW: | ||
2889 | | // RA = dst, RC = proto_const (~) (holding function prototype) | ||
2890 | | mvn RC, RC | ||
2891 | | str BASE, L->base | ||
2892 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] | ||
2893 | | str PC, SAVE_PC | ||
2894 | | ldr CARG2, [KBASE, RC, lsl #3] | ||
2895 | | mov CARG1, L | ||
2896 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
2897 | | // (lua_State *L, GCproto *pt, GCfuncL *parent) | ||
2898 | | bl extern lj_func_newL_gc | ||
2899 | | // Returns GCfuncL *. | ||
2900 | | ldr BASE, L->base | ||
2901 | | movn TMP0, #~LJ_TFUNC | ||
2902 | | add CRET1, CRET1, TMP0, lsl #47 | ||
2903 | | str CRET1, [BASE, RA, lsl #3] | ||
2904 | | ins_next | ||
2905 | break; | ||
2906 | |||
2907 | /* -- Table ops --------------------------------------------------------- */ | ||
2908 | |||
2909 | case BC_TNEW: | ||
2910 | case BC_TDUP: | ||
2911 | | // RA = dst, RC = (hbits|asize) | tab_const (~) | ||
2912 | | ldp CARG3, CARG4, GL->gc.total // Assumes threshold follows total. | ||
2913 | | str BASE, L->base | ||
2914 | | str PC, SAVE_PC | ||
2915 | | mov CARG1, L | ||
2916 | | cmp CARG3, CARG4 | ||
2917 | | bhs >5 | ||
2918 | |1: | ||
2919 | if (op == BC_TNEW) { | ||
2920 | | and CARG2, RC, #0x7ff | ||
2921 | | lsr CARG3, RC, #11 | ||
2922 | | cmp CARG2, #0x7ff | ||
2923 | | mov TMP0, #0x801 | ||
2924 | | csel CARG2, CARG2, TMP0, ne | ||
2925 | | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) | ||
2926 | | // Returns GCtab *. | ||
2927 | } else { | ||
2928 | | mvn RC, RC | ||
2929 | | ldr CARG2, [KBASE, RC, lsl #3] | ||
2930 | | bl extern lj_tab_dup // (lua_State *L, Table *kt) | ||
2931 | | // Returns GCtab *. | ||
2932 | } | ||
2933 | | ldr BASE, L->base | ||
2934 | | movk CRET1, #(LJ_TTAB>>1)&0xffff, lsl #48 | ||
2935 | | str CRET1, [BASE, RA, lsl #3] | ||
2936 | | ins_next | ||
2937 | | | ||
2938 | |5: | ||
2939 | | bl extern lj_gc_step_fixtop // (lua_State *L) | ||
2940 | | mov CARG1, L | ||
2941 | | b <1 | ||
2942 | break; | ||
2943 | |||
2944 | case BC_GGET: | ||
2945 | | // RA = dst, RC = str_const (~) | ||
2946 | case BC_GSET: | ||
2947 | | // RA = src, RC = str_const (~) | ||
2948 | | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] | ||
2949 | | mvn RC, RC | ||
2950 | | and LFUNC:CARG1, CARG1, #LJ_GCVMASK | ||
2951 | | ldr TAB:CARG2, LFUNC:CARG1->env | ||
2952 | | ldr STR:RC, [KBASE, RC, lsl #3] | ||
2953 | if (op == BC_GGET) { | ||
2954 | | b ->BC_TGETS_Z | ||
2955 | } else { | ||
2956 | | b ->BC_TSETS_Z | ||
2957 | } | ||
2958 | break; | ||
2959 | |||
2960 | case BC_TGETV: | ||
2961 | | decode_RB RB, INS | ||
2962 | | and RC, RC, #255 | ||
2963 | | // RA = dst, RB = table, RC = key | ||
2964 | | ldr CARG2, [BASE, RB, lsl #3] | ||
2965 | | ldr TMP1, [BASE, RC, lsl #3] | ||
2966 | | checktab CARG2, ->vmeta_tgetv | ||
2967 | | checkint TMP1, >9 // Integer key? | ||
2968 | | ldr CARG3, TAB:CARG2->array | ||
2969 | | ldr CARG1w, TAB:CARG2->asize | ||
2970 | | add CARG3, CARG3, TMP1, uxtw #3 | ||
2971 | | cmp TMP1w, CARG1w // In array part? | ||
2972 | | bhs ->vmeta_tgetv | ||
2973 | | ldr TMP0, [CARG3] | ||
2974 | | cmp TMP0, TISNIL | ||
2975 | | beq >5 | ||
2976 | |1: | ||
2977 | | str TMP0, [BASE, RA, lsl #3] | ||
2978 | | ins_next | ||
2979 | | | ||
2980 | |5: // Check for __index if table value is nil. | ||
2981 | | ldr TAB:CARG1, TAB:CARG2->metatable | ||
2982 | | cbz TAB:CARG1, <1 // No metatable: done. | ||
2983 | | ldrb TMP1w, TAB:CARG1->nomm | ||
2984 | | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done. | ||
2985 | | b ->vmeta_tgetv | ||
2986 | | | ||
2987 | |9: | ||
2988 | | asr ITYPE, TMP1, #47 | ||
2989 | | cmn ITYPE, #-LJ_TSTR // String key? | ||
2990 | | bne ->vmeta_tgetv | ||
2991 | | and STR:RC, TMP1, #LJ_GCVMASK | ||
2992 | | b ->BC_TGETS_Z | ||
2993 | break; | ||
2994 | case BC_TGETS: | ||
2995 | | decode_RB RB, INS | ||
2996 | | and RC, RC, #255 | ||
2997 | | // RA = dst, RB = table, RC = str_const (~) | ||
2998 | | ldr CARG2, [BASE, RB, lsl #3] | ||
2999 | | mvn RC, RC | ||
3000 | | ldr STR:RC, [KBASE, RC, lsl #3] | ||
3001 | | checktab CARG2, ->vmeta_tgets1 | ||
3002 | |->BC_TGETS_Z: | ||
3003 | | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = dst | ||
3004 | | ldr TMP1w, TAB:CARG2->hmask | ||
3005 | | ldr TMP2w, STR:RC->sid | ||
3006 | | ldr NODE:CARG3, TAB:CARG2->node | ||
3007 | | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask | ||
3008 | | add TMP1, TMP1, TMP1, lsl #1 | ||
3009 | | movn CARG4, #~LJ_TSTR | ||
3010 | | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 | ||
3011 | | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for. | ||
3012 | |1: | ||
3013 | | ldp TMP0, CARG1, NODE:CARG3->val | ||
3014 | | ldr NODE:CARG3, NODE:CARG3->next | ||
3015 | | cmp CARG1, CARG4 | ||
3016 | | bne >4 | ||
3017 | | cmp TMP0, TISNIL | ||
3018 | | beq >5 | ||
3019 | |3: | ||
3020 | | str TMP0, [BASE, RA, lsl #3] | ||
3021 | | ins_next | ||
3022 | | | ||
3023 | |4: // Follow hash chain. | ||
3024 | | cbnz NODE:CARG3, <1 | ||
3025 | | // End of hash chain: key not found, nil result. | ||
3026 | | mov TMP0, TISNIL | ||
3027 | | | ||
3028 | |5: // Check for __index if table value is nil. | ||
3029 | | ldr TAB:CARG1, TAB:CARG2->metatable | ||
3030 | | cbz TAB:CARG1, <3 // No metatable: done. | ||
3031 | | ldrb TMP1w, TAB:CARG1->nomm | ||
3032 | | tbnz TMP1w, #MM_index, <3 // 'no __index' flag set: done. | ||
3033 | | b ->vmeta_tgets | ||
3034 | break; | ||
3035 | case BC_TGETB: | ||
3036 | | decode_RB RB, INS | ||
3037 | | and RC, RC, #255 | ||
3038 | | // RA = dst, RB = table, RC = index | ||
3039 | | ldr CARG2, [BASE, RB, lsl #3] | ||
3040 | | checktab CARG2, ->vmeta_tgetb | ||
3041 | | ldr CARG3, TAB:CARG2->array | ||
3042 | | ldr CARG1w, TAB:CARG2->asize | ||
3043 | | add CARG3, CARG3, RC, lsl #3 | ||
3044 | | cmp RCw, CARG1w // In array part? | ||
3045 | | bhs ->vmeta_tgetb | ||
3046 | | ldr TMP0, [CARG3] | ||
3047 | | cmp TMP0, TISNIL | ||
3048 | | beq >5 | ||
3049 | |1: | ||
3050 | | str TMP0, [BASE, RA, lsl #3] | ||
3051 | | ins_next | ||
3052 | | | ||
3053 | |5: // Check for __index if table value is nil. | ||
3054 | | ldr TAB:CARG1, TAB:CARG2->metatable | ||
3055 | | cbz TAB:CARG1, <1 // No metatable: done. | ||
3056 | | ldrb TMP1w, TAB:CARG1->nomm | ||
3057 | | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done. | ||
3058 | | b ->vmeta_tgetb | ||
3059 | break; | ||
3060 | case BC_TGETR: | ||
3061 | | decode_RB RB, INS | ||
3062 | | and RC, RC, #255 | ||
3063 | | // RA = dst, RB = table, RC = key | ||
3064 | | ldr CARG1, [BASE, RB, lsl #3] | ||
3065 | | ldr TMP1, [BASE, RC, lsl #3] | ||
3066 | | and TAB:CARG1, CARG1, #LJ_GCVMASK | ||
3067 | | ldr CARG3, TAB:CARG1->array | ||
3068 | | ldr TMP2w, TAB:CARG1->asize | ||
3069 | | add CARG3, CARG3, TMP1w, uxtw #3 | ||
3070 | | cmp TMP1w, TMP2w // In array part? | ||
3071 | | bhs ->vmeta_tgetr | ||
3072 | | ldr TMP0, [CARG3] | ||
3073 | |->BC_TGETR_Z: | ||
3074 | | str TMP0, [BASE, RA, lsl #3] | ||
3075 | | ins_next | ||
3076 | break; | ||
3077 | |||
3078 | case BC_TSETV: | ||
3079 | | decode_RB RB, INS | ||
3080 | | and RC, RC, #255 | ||
3081 | | // RA = src, RB = table, RC = key | ||
3082 | | ldr CARG2, [BASE, RB, lsl #3] | ||
3083 | | ldr TMP1, [BASE, RC, lsl #3] | ||
3084 | | checktab CARG2, ->vmeta_tsetv | ||
3085 | | checkint TMP1, >9 // Integer key? | ||
3086 | | ldr CARG3, TAB:CARG2->array | ||
3087 | | ldr CARG1w, TAB:CARG2->asize | ||
3088 | | add CARG3, CARG3, TMP1, uxtw #3 | ||
3089 | | cmp TMP1w, CARG1w // In array part? | ||
3090 | | bhs ->vmeta_tsetv | ||
3091 | | ldr TMP1, [CARG3] | ||
3092 | | ldr TMP0, [BASE, RA, lsl #3] | ||
3093 | | ldrb TMP2w, TAB:CARG2->marked | ||
3094 | | cmp TMP1, TISNIL // Previous value is nil? | ||
3095 | | beq >5 | ||
3096 | |1: | ||
3097 | | str TMP0, [CARG3] | ||
3098 | | tbnz TMP2w, #2, >7 // isblack(table) | ||
3099 | |2: | ||
3100 | | ins_next | ||
3101 | | | ||
3102 | |5: // Check for __newindex if previous value is nil. | ||
3103 | | ldr TAB:CARG1, TAB:CARG2->metatable | ||
3104 | | cbz TAB:CARG1, <1 // No metatable: done. | ||
3105 | | ldrb TMP1w, TAB:CARG1->nomm | ||
3106 | | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done. | ||
3107 | | b ->vmeta_tsetv | ||
3108 | | | ||
3109 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3110 | | barrierback TAB:CARG2, TMP2w, TMP1 | ||
3111 | | b <2 | ||
3112 | | | ||
3113 | |9: | ||
3114 | | asr ITYPE, TMP1, #47 | ||
3115 | | cmn ITYPE, #-LJ_TSTR // String key? | ||
3116 | | bne ->vmeta_tsetv | ||
3117 | | and STR:RC, TMP1, #LJ_GCVMASK | ||
3118 | | b ->BC_TSETS_Z | ||
3119 | break; | ||
3120 | case BC_TSETS: | ||
3121 | | decode_RB RB, INS | ||
3122 | | and RC, RC, #255 | ||
3123 | | // RA = dst, RB = table, RC = str_const (~) | ||
3124 | | ldr CARG2, [BASE, RB, lsl #3] | ||
3125 | | mvn RC, RC | ||
3126 | | ldr STR:RC, [KBASE, RC, lsl #3] | ||
3127 | | checktab CARG2, ->vmeta_tsets1 | ||
3128 | |->BC_TSETS_Z: | ||
3129 | | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = src | ||
3130 | | ldr TMP1w, TAB:CARG2->hmask | ||
3131 | | ldr TMP2w, STR:RC->sid | ||
3132 | | ldr NODE:CARG3, TAB:CARG2->node | ||
3133 | | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask | ||
3134 | | add TMP1, TMP1, TMP1, lsl #1 | ||
3135 | | movn CARG4, #~LJ_TSTR | ||
3136 | | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 | ||
3137 | | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for. | ||
3138 | | strb wzr, TAB:CARG2->nomm // Clear metamethod cache. | ||
3139 | |1: | ||
3140 | | ldp TMP1, CARG1, NODE:CARG3->val | ||
3141 | | ldr NODE:TMP3, NODE:CARG3->next | ||
3142 | | ldrb TMP2w, TAB:CARG2->marked | ||
3143 | | cmp CARG1, CARG4 | ||
3144 | | bne >5 | ||
3145 | | ldr TMP0, [BASE, RA, lsl #3] | ||
3146 | | cmp TMP1, TISNIL // Previous value is nil? | ||
3147 | | beq >4 | ||
3148 | |2: | ||
3149 | | str TMP0, NODE:CARG3->val | ||
3150 | | tbnz TMP2w, #2, >7 // isblack(table) | ||
3151 | |3: | ||
3152 | | ins_next | ||
3153 | | | ||
3154 | |4: // Check for __newindex if previous value is nil. | ||
3155 | | ldr TAB:CARG1, TAB:CARG2->metatable | ||
3156 | | cbz TAB:CARG1, <2 // No metatable: done. | ||
3157 | | ldrb TMP1w, TAB:CARG1->nomm | ||
3158 | | tbnz TMP1w, #MM_newindex, <2 // 'no __newindex' flag set: done. | ||
3159 | | b ->vmeta_tsets | ||
3160 | | | ||
3161 | |5: // Follow hash chain. | ||
3162 | | mov NODE:CARG3, NODE:TMP3 | ||
3163 | | cbnz NODE:TMP3, <1 | ||
3164 | | // End of hash chain: key not found, add a new one. | ||
3165 | | | ||
3166 | | // But check for __newindex first. | ||
3167 | | ldr TAB:CARG1, TAB:CARG2->metatable | ||
3168 | | cbz TAB:CARG1, >6 // No metatable: continue. | ||
3169 | | ldrb TMP1w, TAB:CARG1->nomm | ||
3170 | | // 'no __newindex' flag NOT set: check. | ||
3171 | | tbz TMP1w, #MM_newindex, ->vmeta_tsets | ||
3172 | |6: | ||
3173 | | movn TMP1, #~LJ_TSTR | ||
3174 | | str PC, SAVE_PC | ||
3175 | | add TMP0, STR:RC, TMP1, lsl #47 | ||
3176 | | str BASE, L->base | ||
3177 | | mov CARG1, L | ||
3178 | | str TMP0, TMPD | ||
3179 | | add CARG3, sp, TMPDofs | ||
3180 | | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) | ||
3181 | | // Returns TValue *. | ||
3182 | | ldr BASE, L->base | ||
3183 | | ldr TMP0, [BASE, RA, lsl #3] | ||
3184 | | str TMP0, [CRET1] | ||
3185 | | b <3 // No 2nd write barrier needed. | ||
3186 | | | ||
3187 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3188 | | barrierback TAB:CARG2, TMP2w, TMP1 | ||
3189 | | b <3 | ||
3190 | break; | ||
3191 | case BC_TSETB: | ||
3192 | | decode_RB RB, INS | ||
3193 | | and RC, RC, #255 | ||
3194 | | // RA = src, RB = table, RC = index | ||
3195 | | ldr CARG2, [BASE, RB, lsl #3] | ||
3196 | | checktab CARG2, ->vmeta_tsetb | ||
3197 | | ldr CARG3, TAB:CARG2->array | ||
3198 | | ldr CARG1w, TAB:CARG2->asize | ||
3199 | | add CARG3, CARG3, RC, lsl #3 | ||
3200 | | cmp RCw, CARG1w // In array part? | ||
3201 | | bhs ->vmeta_tsetb | ||
3202 | | ldr TMP1, [CARG3] | ||
3203 | | ldr TMP0, [BASE, RA, lsl #3] | ||
3204 | | ldrb TMP2w, TAB:CARG2->marked | ||
3205 | | cmp TMP1, TISNIL // Previous value is nil? | ||
3206 | | beq >5 | ||
3207 | |1: | ||
3208 | | str TMP0, [CARG3] | ||
3209 | | tbnz TMP2w, #2, >7 // isblack(table) | ||
3210 | |2: | ||
3211 | | ins_next | ||
3212 | | | ||
3213 | |5: // Check for __newindex if previous value is nil. | ||
3214 | | ldr TAB:CARG1, TAB:CARG2->metatable | ||
3215 | | cbz TAB:CARG1, <1 // No metatable: done. | ||
3216 | | ldrb TMP1w, TAB:CARG1->nomm | ||
3217 | | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done. | ||
3218 | | b ->vmeta_tsetb | ||
3219 | | | ||
3220 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3221 | | barrierback TAB:CARG2, TMP2w, TMP1 | ||
3222 | | b <2 | ||
3223 | break; | ||
3224 | case BC_TSETR: | ||
3225 | | decode_RB RB, INS | ||
3226 | | and RC, RC, #255 | ||
3227 | | // RA = src, RB = table, RC = key | ||
3228 | | ldr CARG2, [BASE, RB, lsl #3] | ||
3229 | | ldr TMP1, [BASE, RC, lsl #3] | ||
3230 | | and TAB:CARG2, CARG2, #LJ_GCVMASK | ||
3231 | | ldr CARG1, TAB:CARG2->array | ||
3232 | | ldrb TMP2w, TAB:CARG2->marked | ||
3233 | | ldr CARG4w, TAB:CARG2->asize | ||
3234 | | add CARG1, CARG1, TMP1, uxtw #3 | ||
3235 | | tbnz TMP2w, #2, >7 // isblack(table) | ||
3236 | |2: | ||
3237 | | cmp TMP1w, CARG4w // In array part? | ||
3238 | | bhs ->vmeta_tsetr | ||
3239 | |->BC_TSETR_Z: | ||
3240 | | ldr TMP0, [BASE, RA, lsl #3] | ||
3241 | | str TMP0, [CARG1] | ||
3242 | | ins_next | ||
3243 | | | ||
3244 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3245 | | barrierback TAB:CARG2, TMP2w, TMP0 | ||
3246 | | b <2 | ||
3247 | break; | ||
3248 | |||
3249 | case BC_TSETM: | ||
3250 | | // RA = base (table at base-1), RC = num_const (start index) | ||
3251 | | add RA, BASE, RA, lsl #3 | ||
3252 | |1: | ||
3253 | | ldr RBw, SAVE_MULTRES | ||
3254 | | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table. | ||
3255 | | ldr TMP1, [KBASE, RC, lsl #3] // Integer constant is in lo-word. | ||
3256 | | sub RB, RB, #8 | ||
3257 | | cbz RB, >4 // Nothing to copy? | ||
3258 | | and TAB:CARG2, CARG2, #LJ_GCVMASK | ||
3259 | | ldr CARG1w, TAB:CARG2->asize | ||
3260 | | add CARG3w, TMP1w, RBw, lsr #3 | ||
3261 | | ldr CARG4, TAB:CARG2->array | ||
3262 | | cmp CARG3, CARG1 | ||
3263 | | add RB, RA, RB | ||
3264 | | bhi >5 | ||
3265 | | add TMP1, CARG4, TMP1w, uxtw #3 | ||
3266 | | ldrb TMP2w, TAB:CARG2->marked | ||
3267 | |3: // Copy result slots to table. | ||
3268 | | ldr TMP0, [RA], #8 | ||
3269 | | str TMP0, [TMP1], #8 | ||
3270 | | cmp RA, RB | ||
3271 | | blo <3 | ||
3272 | | tbnz TMP2w, #2, >7 // isblack(table) | ||
3273 | |4: | ||
3274 | | ins_next | ||
3275 | | | ||
3276 | |5: // Need to resize array part. | ||
3277 | | str BASE, L->base | ||
3278 | | mov CARG1, L | ||
3279 | | str PC, SAVE_PC | ||
3280 | | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) | ||
3281 | | // Must not reallocate the stack. | ||
3282 | | b <1 | ||
3283 | | | ||
3284 | |7: // Possible table write barrier for any value. Skip valiswhite check. | ||
3285 | | barrierback TAB:CARG2, TMP2w, TMP1 | ||
3286 | | b <4 | ||
3287 | break; | ||
3288 | |||
3289 | /* -- Calls and vararg handling ----------------------------------------- */ | ||
3290 | |||
3291 | case BC_CALLM: | ||
3292 | | // RA = base, (RB = nresults+1,) RC = extra_nargs | ||
3293 | | ldr TMP0w, SAVE_MULTRES | ||
3294 | | decode_RC8RD NARGS8:RC, RC | ||
3295 | | add NARGS8:RC, NARGS8:RC, TMP0 | ||
3296 | | b ->BC_CALL_Z | ||
3297 | break; | ||
3298 | case BC_CALL: | ||
3299 | | decode_RC8RD NARGS8:RC, RC | ||
3300 | | // RA = base, (RB = nresults+1,) RC = (nargs+1)*8 | ||
3301 | |->BC_CALL_Z: | ||
3302 | | mov RB, BASE // Save old BASE for vmeta_call. | ||
3303 | | add BASE, BASE, RA, lsl #3 | ||
3304 | | ldr CARG3, [BASE] | ||
3305 | | sub NARGS8:RC, NARGS8:RC, #8 | ||
3306 | | add BASE, BASE, #16 | ||
3307 | | checkfunc CARG3, ->vmeta_call | ||
3308 | | ins_call | ||
3309 | break; | ||
3310 | |||
3311 | case BC_CALLMT: | ||
3312 | | // RA = base, (RB = 0,) RC = extra_nargs | ||
3313 | | ldr TMP0w, SAVE_MULTRES | ||
3314 | | add NARGS8:RC, TMP0, RC, lsl #3 | ||
3315 | | b ->BC_CALLT1_Z | ||
3316 | break; | ||
3317 | case BC_CALLT: | ||
3318 | | lsl NARGS8:RC, RC, #3 | ||
3319 | | // RA = base, (RB = 0,) RC = (nargs+1)*8 | ||
3320 | |->BC_CALLT1_Z: | ||
3321 | | add RA, BASE, RA, lsl #3 | ||
3322 | | ldr TMP1, [RA] | ||
3323 | | sub NARGS8:RC, NARGS8:RC, #8 | ||
3324 | | add RA, RA, #16 | ||
3325 | | checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt | ||
3326 | | ldr PC, [BASE, FRAME_PC] | ||
3327 | |->BC_CALLT2_Z: | ||
3328 | | mov RB, #0 | ||
3329 | | ldrb TMP2w, LFUNC:CARG3->ffid | ||
3330 | | tst PC, #FRAME_TYPE | ||
3331 | | bne >7 | ||
3332 | |1: | ||
3333 | | str TMP1, [BASE, FRAME_FUNC] // Copy function down, but keep PC. | ||
3334 | | cbz NARGS8:RC, >3 | ||
3335 | |2: | ||
3336 | | ldr TMP0, [RA, RB] | ||
3337 | | add TMP1, RB, #8 | ||
3338 | | cmp TMP1, NARGS8:RC | ||
3339 | | str TMP0, [BASE, RB] | ||
3340 | | mov RB, TMP1 | ||
3341 | | bne <2 | ||
3342 | |3: | ||
3343 | | cmp TMP2, #1 // (> FF_C) Calling a fast function? | ||
3344 | | bhi >5 | ||
3345 | |4: | ||
3346 | | ins_callt | ||
3347 | | | ||
3348 | |5: // Tailcall to a fast function with a Lua frame below. | ||
3349 | | ldrb RAw, [PC, #-4+OFS_RA] | ||
3350 | | sub CARG1, BASE, RA, lsl #3 | ||
3351 | | ldr LFUNC:CARG1, [CARG1, #-32] | ||
3352 | | and LFUNC:CARG1, CARG1, #LJ_GCVMASK | ||
3353 | | ldr CARG1, LFUNC:CARG1->pc | ||
3354 | | ldr KBASE, [CARG1, #PC2PROTO(k)] | ||
3355 | | b <4 | ||
3356 | | | ||
3357 | |7: // Tailcall from a vararg function. | ||
3358 | | eor PC, PC, #FRAME_VARG | ||
3359 | | tst PC, #FRAME_TYPEP // Vararg frame below? | ||
3360 | | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below. | ||
3361 | | bne <1 | ||
3362 | | sub BASE, BASE, PC | ||
3363 | | ldr PC, [BASE, FRAME_PC] | ||
3364 | | tst PC, #FRAME_TYPE | ||
3365 | | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below. | ||
3366 | | b <1 | ||
3367 | break; | ||
3368 | |||
3369 | case BC_ITERC: | ||
3370 | | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
3371 | | add RA, BASE, RA, lsl #3 | ||
3372 | | ldr CARG3, [RA, #-24] | ||
3373 | | mov RB, BASE // Save old BASE for vmeta_call. | ||
3374 | | ldp CARG1, CARG2, [RA, #-16] | ||
3375 | | add BASE, RA, #16 | ||
3376 | | mov NARGS8:RC, #16 // Iterators get 2 arguments. | ||
3377 | | str CARG3, [RA] // Copy callable. | ||
3378 | | stp CARG1, CARG2, [RA, #16] // Copy state and control var. | ||
3379 | | checkfunc CARG3, ->vmeta_call | ||
3380 | | ins_call | ||
3381 | break; | ||
3382 | |||
3383 | case BC_ITERN: | ||
3384 | |.if JIT | ||
3385 | | hotloop | ||
3386 | |.endif | ||
3387 | |->vm_IITERN: | ||
3388 | | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
3389 | | add RA, BASE, RA, lsl #3 | ||
3390 | | ldr TAB:RB, [RA, #-16] | ||
3391 | | ldrh TMP3w, [PC, # OFS_RD] | ||
3392 | | ldr CARG1w, [RA, #-8+LO] // Get index from control var. | ||
3393 | | add PC, PC, #4 | ||
3394 | | add TMP3, PC, TMP3, lsl #2 | ||
3395 | | and TAB:RB, RB, #LJ_GCVMASK | ||
3396 | | sub TMP3, TMP3, #0x20000 | ||
3397 | | ldr TMP1w, TAB:RB->asize | ||
3398 | | ldr CARG2, TAB:RB->array | ||
3399 | |1: // Traverse array part. | ||
3400 | | subs RC, CARG1, TMP1 | ||
3401 | | add CARG3, CARG2, CARG1, lsl #3 | ||
3402 | | bhs >5 // Index points after array part? | ||
3403 | | ldr TMP0, [CARG3] | ||
3404 | | cmp TMP0, TISNIL | ||
3405 | | cinc CARG1, CARG1, eq // Skip holes in array part. | ||
3406 | | beq <1 | ||
3407 | | add CARG1, CARG1, TISNUM | ||
3408 | | stp CARG1, TMP0, [RA] | ||
3409 | | add CARG1, CARG1, #1 | ||
3410 | |3: | ||
3411 | | str CARG1w, [RA, #-8+LO] // Update control var. | ||
3412 | | mov PC, TMP3 | ||
3413 | |4: | ||
3414 | | ins_next | ||
3415 | | | ||
3416 | |5: // Traverse hash part. | ||
3417 | | ldr TMP2w, TAB:RB->hmask | ||
3418 | | ldr NODE:RB, TAB:RB->node | ||
3419 | |6: | ||
3420 | | add CARG1, RC, RC, lsl #1 | ||
3421 | | cmp RC, TMP2 // End of iteration? Branch to ITERN+1. | ||
3422 | | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8 | ||
3423 | | bhi <4 | ||
3424 | | ldp TMP0, CARG1, NODE:CARG3->val | ||
3425 | | cmp TMP0, TISNIL | ||
3426 | | add RC, RC, #1 | ||
3427 | | beq <6 // Skip holes in hash part. | ||
3428 | | stp CARG1, TMP0, [RA] | ||
3429 | | add CARG1, RC, TMP1 | ||
3430 | | b <3 | ||
3431 | break; | ||
3432 | |||
3433 | case BC_ISNEXT: | ||
3434 | | // RA = base, RC = target (points to ITERN) | ||
3435 | | add RA, BASE, RA, lsl #3 | ||
3436 | | ldr CFUNC:CARG1, [RA, #-24] | ||
3437 | | add RC, PC, RC, lsl #2 | ||
3438 | | ldp TAB:CARG3, CARG4, [RA, #-16] | ||
3439 | | sub RC, RC, #0x20000 | ||
3440 | | checkfunc CFUNC:CARG1, >5 | ||
3441 | | asr TMP0, TAB:CARG3, #47 | ||
3442 | | ldrb TMP1w, CFUNC:CARG1->ffid | ||
3443 | | cmn TMP0, #-LJ_TTAB | ||
3444 | | ccmp CARG4, TISNIL, #0, eq | ||
3445 | | ccmp TMP1w, #FF_next_N, #0, eq | ||
3446 | | bne >5 | ||
3447 | | mov TMP0w, #0xfffe7fff // LJ_KEYINDEX | ||
3448 | | lsl TMP0, TMP0, #32 | ||
3449 | | str TMP0, [RA, #-8] // Initialize control var. | ||
3450 | |1: | ||
3451 | | mov PC, RC | ||
3452 | | ins_next | ||
3453 | | | ||
3454 | |5: // Despecialize bytecode if any of the checks fail. | ||
3455 | |.if JIT | ||
3456 | | ldrb TMP2w, [RC, # OFS_OP] | ||
3457 | |.endif | ||
3458 | | mov TMP0, #BC_JMP | ||
3459 | | mov TMP1, #BC_ITERC | ||
3460 | | strb TMP0w, [PC, #-4+OFS_OP] | ||
3461 | |.if JIT | ||
3462 | | cmp TMP2w, #BC_ITERN | ||
3463 | | bne >6 | ||
3464 | |.endif | ||
3465 | | strb TMP1w, [RC, # OFS_OP] | ||
3466 | | b <1 | ||
3467 | |.if JIT | ||
3468 | |6: // Unpatch JLOOP. | ||
3469 | | ldr RA, [GL, #GL_J(trace)] | ||
3470 | | ldrh TMP2w, [RC, # OFS_RD] | ||
3471 | | ldr TRACE:RA, [RA, TMP2, lsl #3] | ||
3472 | | ldr TMP2w, TRACE:RA->startins | ||
3473 | | bfxil TMP2w, TMP1w, #0, #8 | ||
3474 | | str TMP2w, [RC] | ||
3475 | | b <1 | ||
3476 | |.endif | ||
3477 | break; | ||
3478 | |||
3479 | case BC_VARG: | ||
3480 | | decode_RB RB, INS | ||
3481 | | and RC, RC, #255 | ||
3482 | | // RA = base, RB = (nresults+1), RC = numparams | ||
3483 | | ldr TMP1, [BASE, FRAME_PC] | ||
3484 | | add RC, BASE, RC, lsl #3 | ||
3485 | | add RA, BASE, RA, lsl #3 | ||
3486 | | add RC, RC, #FRAME_VARG | ||
3487 | | add TMP2, RA, RB, lsl #3 | ||
3488 | | sub RC, RC, TMP1 // RC = vbase | ||
3489 | | // Note: RC may now be even _above_ BASE if nargs was < numparams. | ||
3490 | | sub TMP3, BASE, #16 // TMP3 = vtop | ||
3491 | | cbz RB, >5 | ||
3492 | | sub TMP2, TMP2, #16 | ||
3493 | |1: // Copy vararg slots to destination slots. | ||
3494 | | cmp RC, TMP3 | ||
3495 | | ldr TMP0, [RC], #8 | ||
3496 | | csel TMP0, TMP0, TISNIL, lo | ||
3497 | | cmp RA, TMP2 | ||
3498 | | str TMP0, [RA], #8 | ||
3499 | | blo <1 | ||
3500 | |2: | ||
3501 | | ins_next | ||
3502 | | | ||
3503 | |5: // Copy all varargs. | ||
3504 | | ldr TMP0, L->maxstack | ||
3505 | | subs TMP2, TMP3, RC | ||
3506 | | csel RB, xzr, TMP2, le // MULTRES = (max(vtop-vbase,0)+1)*8 | ||
3507 | | add RB, RB, #8 | ||
3508 | | add TMP1, RA, TMP2 | ||
3509 | | str RBw, SAVE_MULTRES | ||
3510 | | ble <2 // Nothing to copy. | ||
3511 | | cmp TMP1, TMP0 | ||
3512 | | bhi >7 | ||
3513 | |6: | ||
3514 | | ldr TMP0, [RC], #8 | ||
3515 | | str TMP0, [RA], #8 | ||
3516 | | cmp RC, TMP3 | ||
3517 | | blo <6 | ||
3518 | | b <2 | ||
3519 | | | ||
3520 | |7: // Grow stack for varargs. | ||
3521 | | lsr CARG2, TMP2, #3 | ||
3522 | | stp BASE, RA, L->base | ||
3523 | | mov CARG1, L | ||
3524 | | sub RC, RC, BASE // Need delta, because BASE may change. | ||
3525 | | str PC, SAVE_PC | ||
3526 | | bl extern lj_state_growstack // (lua_State *L, int n) | ||
3527 | | ldp BASE, RA, L->base | ||
3528 | | add RC, BASE, RC | ||
3529 | | sub TMP3, BASE, #16 | ||
3530 | | b <6 | ||
3531 | break; | ||
3532 | |||
3533 | /* -- Returns ----------------------------------------------------------- */ | ||
3534 | |||
3535 | case BC_RETM: | ||
3536 | | // RA = results, RC = extra results | ||
3537 | | ldr TMP0w, SAVE_MULTRES | ||
3538 | | ldr PC, [BASE, FRAME_PC] | ||
3539 | | add RA, BASE, RA, lsl #3 | ||
3540 | | add RC, TMP0, RC, lsl #3 | ||
3541 | | b ->BC_RETM_Z | ||
3542 | break; | ||
3543 | |||
3544 | case BC_RET: | ||
3545 | | // RA = results, RC = nresults+1 | ||
3546 | | ldr PC, [BASE, FRAME_PC] | ||
3547 | | lsl RC, RC, #3 | ||
3548 | | add RA, BASE, RA, lsl #3 | ||
3549 | |->BC_RETM_Z: | ||
3550 | | str RCw, SAVE_MULTRES | ||
3551 | |1: | ||
3552 | | ands CARG1, PC, #FRAME_TYPE | ||
3553 | | eor CARG2, PC, #FRAME_VARG | ||
3554 | | bne ->BC_RETV2_Z | ||
3555 | | | ||
3556 | |->BC_RET_Z: | ||
3557 | | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return | ||
3558 | | ldr INSw, [PC, #-4] | ||
3559 | | subs TMP1, RC, #8 | ||
3560 | | sub CARG3, BASE, #16 | ||
3561 | | beq >3 | ||
3562 | |2: | ||
3563 | | ldr TMP0, [RA], #8 | ||
3564 | | add BASE, BASE, #8 | ||
3565 | | sub TMP1, TMP1, #8 | ||
3566 | | str TMP0, [BASE, #-24] | ||
3567 | | cbnz TMP1, <2 | ||
3568 | |3: | ||
3569 | | decode_RA RA, INS | ||
3570 | | sub CARG4, CARG3, RA, lsl #3 | ||
3571 | | decode_RB RB, INS | ||
3572 | | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC] | ||
3573 | |5: | ||
3574 | | cmp RC, RB, lsl #3 // More results expected? | ||
3575 | | blo >6 | ||
3576 | | and LFUNC:CARG1, CARG1, #LJ_GCVMASK | ||
3577 | | mov BASE, CARG4 | ||
3578 | | ldr CARG2, LFUNC:CARG1->pc | ||
3579 | | ldr KBASE, [CARG2, #PC2PROTO(k)] | ||
3580 | | ins_next | ||
3581 | | | ||
3582 | |6: // Fill up results with nil. | ||
3583 | | add BASE, BASE, #8 | ||
3584 | | add RC, RC, #8 | ||
3585 | | str TISNIL, [BASE, #-24] | ||
3586 | | b <5 | ||
3587 | | | ||
3588 | |->BC_RETV1_Z: // Non-standard return case. | ||
3589 | | add RA, BASE, RA, lsl #3 | ||
3590 | |->BC_RETV2_Z: | ||
3591 | | tst CARG2, #FRAME_TYPEP | ||
3592 | | bne ->vm_return | ||
3593 | | // Return from vararg function: relocate BASE down. | ||
3594 | | sub BASE, BASE, CARG2 | ||
3595 | | ldr PC, [BASE, FRAME_PC] | ||
3596 | | b <1 | ||
3597 | break; | ||
3598 | |||
3599 | case BC_RET0: case BC_RET1: | ||
3600 | | // RA = results, RC = nresults+1 | ||
3601 | | ldr PC, [BASE, FRAME_PC] | ||
3602 | | lsl RC, RC, #3 | ||
3603 | | str RCw, SAVE_MULTRES | ||
3604 | | ands CARG1, PC, #FRAME_TYPE | ||
3605 | | eor CARG2, PC, #FRAME_VARG | ||
3606 | | bne ->BC_RETV1_Z | ||
3607 | | ldr INSw, [PC, #-4] | ||
3608 | if (op == BC_RET1) { | ||
3609 | | ldr TMP0, [BASE, RA, lsl #3] | ||
3610 | } | ||
3611 | | sub CARG4, BASE, #16 | ||
3612 | | decode_RA RA, INS | ||
3613 | | sub BASE, CARG4, RA, lsl #3 | ||
3614 | if (op == BC_RET1) { | ||
3615 | | str TMP0, [CARG4], #8 | ||
3616 | } | ||
3617 | | decode_RB RB, INS | ||
3618 | | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] | ||
3619 | |5: | ||
3620 | | cmp RC, RB, lsl #3 | ||
3621 | | blo >6 | ||
3622 | | and LFUNC:CARG1, CARG1, #LJ_GCVMASK | ||
3623 | | ldr CARG2, LFUNC:CARG1->pc | ||
3624 | | ldr KBASE, [CARG2, #PC2PROTO(k)] | ||
3625 | | ins_next | ||
3626 | | | ||
3627 | |6: // Fill up results with nil. | ||
3628 | | add RC, RC, #8 | ||
3629 | | str TISNIL, [CARG4], #8 | ||
3630 | | b <5 | ||
3631 | break; | ||
3632 | |||
3633 | /* -- Loops and branches ------------------------------------------------ */ | ||
3634 | |||
3635 | |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4] | ||
3636 | |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12] | ||
3637 | |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20] | ||
3638 | |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28] | ||
3639 | |||
3640 | case BC_FORL: | ||
3641 | |.if JIT | ||
3642 | | hotloop | ||
3643 | |.endif | ||
3644 | | // Fall through. Assumes BC_IFORL follows. | ||
3645 | break; | ||
3646 | |||
3647 | case BC_JFORI: | ||
3648 | case BC_JFORL: | ||
3649 | #if !LJ_HASJIT | ||
3650 | break; | ||
3651 | #endif | ||
3652 | case BC_FORI: | ||
3653 | case BC_IFORL: | ||
3654 | | // RA = base, RC = target (after end of loop or start of loop) | ||
3655 | vk = (op == BC_IFORL || op == BC_JFORL); | ||
3656 | | add RA, BASE, RA, lsl #3 | ||
3657 | | ldp CARG1, CARG2, FOR_IDX // CARG1 = IDX, CARG2 = STOP | ||
3658 | | ldr CARG3, FOR_STEP // CARG3 = STEP | ||
3659 | if (op != BC_JFORL) { | ||
3660 | | add RC, PC, RC, lsl #2 | ||
3661 | | sub RC, RC, #0x20000 | ||
3662 | } | ||
3663 | | checkint CARG1, >5 | ||
3664 | if (!vk) { | ||
3665 | | checkint CARG2, ->vmeta_for | ||
3666 | | checkint CARG3, ->vmeta_for | ||
3667 | | tbnz CARG3w, #31, >4 | ||
3668 | | cmp CARG1w, CARG2w | ||
3669 | } else { | ||
3670 | | adds CARG1w, CARG1w, CARG3w | ||
3671 | | bvs >2 | ||
3672 | | add TMP0, CARG1, TISNUM | ||
3673 | | tbnz CARG3w, #31, >4 | ||
3674 | | cmp CARG1w, CARG2w | ||
3675 | } | ||
3676 | |1: | ||
3677 | if (op == BC_FORI) { | ||
3678 | | csel PC, RC, PC, gt | ||
3679 | } else if (op == BC_JFORI) { | ||
3680 | | mov PC, RC | ||
3681 | | ldrh RCw, [RC, #-4+OFS_RD] | ||
3682 | } else if (op == BC_IFORL) { | ||
3683 | | csel PC, RC, PC, le | ||
3684 | } | ||
3685 | if (vk) { | ||
3686 | | str TMP0, FOR_IDX | ||
3687 | | str TMP0, FOR_EXT | ||
3688 | } else { | ||
3689 | | str CARG1, FOR_EXT | ||
3690 | } | ||
3691 | if (op == BC_JFORI || op == BC_JFORL) { | ||
3692 | | ble =>BC_JLOOP | ||
3693 | } | ||
3694 | |2: | ||
3695 | | ins_next | ||
3696 | | | ||
3697 | |4: // Invert check for negative step. | ||
3698 | | cmp CARG2w, CARG1w | ||
3699 | | b <1 | ||
3700 | | | ||
3701 | |5: // FP loop. | ||
3702 | | ldp d0, d1, FOR_IDX | ||
3703 | | blo ->vmeta_for | ||
3704 | if (!vk) { | ||
3705 | | checknum CARG2, ->vmeta_for | ||
3706 | | checknum CARG3, ->vmeta_for | ||
3707 | | str d0, FOR_EXT | ||
3708 | } else { | ||
3709 | | ldr d2, FOR_STEP | ||
3710 | | fadd d0, d0, d2 | ||
3711 | } | ||
3712 | | tbnz CARG3, #63, >7 | ||
3713 | | fcmp d0, d1 | ||
3714 | |6: | ||
3715 | if (vk) { | ||
3716 | | str d0, FOR_IDX | ||
3717 | | str d0, FOR_EXT | ||
3718 | } | ||
3719 | if (op == BC_FORI) { | ||
3720 | | csel PC, RC, PC, hi | ||
3721 | } else if (op == BC_JFORI) { | ||
3722 | | ldrh RCw, [RC, #-4+OFS_RD] | ||
3723 | | bls =>BC_JLOOP | ||
3724 | } else if (op == BC_IFORL) { | ||
3725 | | csel PC, RC, PC, ls | ||
3726 | } else { | ||
3727 | | bls =>BC_JLOOP | ||
3728 | } | ||
3729 | | b <2 | ||
3730 | | | ||
3731 | |7: // Invert check for negative step. | ||
3732 | | fcmp d1, d0 | ||
3733 | | b <6 | ||
3734 | break; | ||
3735 | |||
3736 | case BC_ITERL: | ||
3737 | |.if JIT | ||
3738 | | hotloop | ||
3739 | |.endif | ||
3740 | | // Fall through. Assumes BC_IITERL follows. | ||
3741 | break; | ||
3742 | |||
3743 | case BC_JITERL: | ||
3744 | #if !LJ_HASJIT | ||
3745 | break; | ||
3746 | #endif | ||
3747 | case BC_IITERL: | ||
3748 | | // RA = base, RC = target | ||
3749 | | ldr CARG1, [BASE, RA, lsl #3] | ||
3750 | | add TMP1, BASE, RA, lsl #3 | ||
3751 | | cmp CARG1, TISNIL | ||
3752 | | beq >1 // Stop if iterator returned nil. | ||
3753 | if (op == BC_JITERL) { | ||
3754 | | str CARG1, [TMP1, #-8] | ||
3755 | | b =>BC_JLOOP | ||
3756 | } else { | ||
3757 | | add TMP0, PC, RC, lsl #2 // Otherwise save control var + branch. | ||
3758 | | sub PC, TMP0, #0x20000 | ||
3759 | | str CARG1, [TMP1, #-8] | ||
3760 | } | ||
3761 | |1: | ||
3762 | | ins_next | ||
3763 | break; | ||
3764 | |||
3765 | case BC_LOOP: | ||
3766 | | // RA = base, RC = target (loop extent) | ||
3767 | | // Note: RA/RC is only used by trace recorder to determine scope/extent | ||
3768 | | // This opcode does NOT jump, it's only purpose is to detect a hot loop. | ||
3769 | |.if JIT | ||
3770 | | hotloop | ||
3771 | |.endif | ||
3772 | | // Fall through. Assumes BC_ILOOP follows. | ||
3773 | break; | ||
3774 | |||
3775 | case BC_ILOOP: | ||
3776 | | // RA = base, RC = target (loop extent) | ||
3777 | | ins_next | ||
3778 | break; | ||
3779 | |||
3780 | case BC_JLOOP: | ||
3781 | |.if JIT | ||
3782 | | // RA = base (ignored), RC = traceno | ||
3783 | | ldr CARG1, [GL, #GL_J(trace)] | ||
3784 | | mov CARG2w, #0 // Traces on ARM64 don't store the trace #, so use 0. | ||
3785 | | ldr TRACE:RC, [CARG1, RC, lsl #3] | ||
3786 | | st_vmstate CARG2w | ||
3787 | | ldr RA, TRACE:RC->mcode | ||
3788 | | str BASE, GL->jit_base | ||
3789 | | str L, GL->tmpbuf.L | ||
3790 | | sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace. | ||
3791 | | br RA | ||
3792 | |.endif | ||
3793 | break; | ||
3794 | |||
3795 | case BC_JMP: | ||
3796 | | // RA = base (only used by trace recorder), RC = target | ||
3797 | | add RC, PC, RC, lsl #2 | ||
3798 | | sub PC, RC, #0x20000 | ||
3799 | | ins_next | ||
3800 | break; | ||
3801 | |||
3802 | /* -- Function headers -------------------------------------------------- */ | ||
3803 | |||
3804 | case BC_FUNCF: | ||
3805 | |.if JIT | ||
3806 | | hotcall | ||
3807 | |.endif | ||
3808 | case BC_FUNCV: /* NYI: compiled vararg functions. */ | ||
3809 | | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. | ||
3810 | break; | ||
3811 | |||
3812 | case BC_JFUNCF: | ||
3813 | #if !LJ_HASJIT | ||
3814 | break; | ||
3815 | #endif | ||
3816 | case BC_IFUNCF: | ||
3817 | | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 | ||
3818 | | ldr CARG1, L->maxstack | ||
3819 | | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)] | ||
3820 | | ldr KBASE, [PC, #-4+PC2PROTO(k)] | ||
3821 | | cmp RA, CARG1 | ||
3822 | | bhi ->vm_growstack_l | ||
3823 | |2: | ||
3824 | | cmp NARGS8:RC, TMP1, lsl #3 // Check for missing parameters. | ||
3825 | | blo >3 | ||
3826 | if (op == BC_JFUNCF) { | ||
3827 | | decode_RD RC, INS | ||
3828 | | b =>BC_JLOOP | ||
3829 | } else { | ||
3830 | | ins_next | ||
3831 | } | ||
3832 | | | ||
3833 | |3: // Clear missing parameters. | ||
3834 | | str TISNIL, [BASE, NARGS8:RC] | ||
3835 | | add NARGS8:RC, NARGS8:RC, #8 | ||
3836 | | b <2 | ||
3837 | break; | ||
3838 | |||
3839 | case BC_JFUNCV: | ||
3840 | #if !LJ_HASJIT | ||
3841 | break; | ||
3842 | #endif | ||
3843 | | NYI // NYI: compiled vararg functions | ||
3844 | break; /* NYI: compiled vararg functions. */ | ||
3845 | |||
3846 | case BC_IFUNCV: | ||
3847 | | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 | ||
3848 | | ldr CARG1, L->maxstack | ||
3849 | | movn TMP0, #~LJ_TFUNC | ||
3850 | | add TMP2, BASE, RC | ||
3851 | | add LFUNC:CARG3, CARG3, TMP0, lsl #47 | ||
3852 | | add RA, RA, RC | ||
3853 | | add TMP0, RC, #16+FRAME_VARG | ||
3854 | | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC. | ||
3855 | | ldr KBASE, [PC, #-4+PC2PROTO(k)] | ||
3856 | | cmp RA, CARG1 | ||
3857 | | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG. | ||
3858 | | bhs ->vm_growstack_l | ||
3859 | | sub RC, TMP2, #16 | ||
3860 | | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)] | ||
3861 | | mov RA, BASE | ||
3862 | | mov BASE, TMP2 | ||
3863 | | cbz TMP1, >2 | ||
3864 | |1: | ||
3865 | | cmp RA, RC // Less args than parameters? | ||
3866 | | bhs >3 | ||
3867 | | ldr TMP0, [RA] | ||
3868 | | sub TMP1, TMP1, #1 | ||
3869 | | str TISNIL, [RA], #8 // Clear old fixarg slot (help the GC). | ||
3870 | | str TMP0, [TMP2], #8 | ||
3871 | | cbnz TMP1, <1 | ||
3872 | |2: | ||
3873 | | ins_next | ||
3874 | | | ||
3875 | |3: | ||
3876 | | sub TMP1, TMP1, #1 | ||
3877 | | str TISNIL, [TMP2], #8 | ||
3878 | | cbz TMP1, <2 | ||
3879 | | b <3 | ||
3880 | break; | ||
3881 | |||
3882 | case BC_FUNCC: | ||
3883 | case BC_FUNCCW: | ||
3884 | | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8 | ||
3885 | if (op == BC_FUNCC) { | ||
3886 | | ldr CARG4, CFUNC:CARG3->f | ||
3887 | } else { | ||
3888 | | ldr CARG4, GL->wrapf | ||
3889 | } | ||
3890 | | add CARG2, RA, NARGS8:RC | ||
3891 | | ldr CARG1, L->maxstack | ||
3892 | | add RC, BASE, NARGS8:RC | ||
3893 | | cmp CARG2, CARG1 | ||
3894 | | stp BASE, RC, L->base | ||
3895 | if (op == BC_FUNCCW) { | ||
3896 | | ldr CARG2, CFUNC:CARG3->f | ||
3897 | } | ||
3898 | | mv_vmstate TMP0w, C | ||
3899 | | mov CARG1, L | ||
3900 | | bhi ->vm_growstack_c // Need to grow stack. | ||
3901 | | st_vmstate TMP0w | ||
3902 | | blr CARG4 // (lua_State *L [, lua_CFunction f]) | ||
3903 | | // Returns nresults. | ||
3904 | | ldp BASE, TMP1, L->base | ||
3905 | | str L, GL->cur_L | ||
3906 | | sbfiz RC, CRET1, #3, #32 | ||
3907 | | st_vmstate ST_INTERP | ||
3908 | | ldr PC, [BASE, FRAME_PC] | ||
3909 | | sub RA, TMP1, RC // RA = L->top - nresults*8 | ||
3910 | | b ->vm_returnc | ||
3911 | break; | ||
3912 | |||
3913 | /* ---------------------------------------------------------------------- */ | ||
3914 | |||
3915 | default: | ||
3916 | fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); | ||
3917 | exit(2); | ||
3918 | break; | ||
3919 | } | ||
3920 | } | ||
3921 | |||
3922 | static int build_backend(BuildCtx *ctx) | ||
3923 | { | ||
3924 | int op; | ||
3925 | |||
3926 | dasm_growpc(Dst, BC__MAX); | ||
3927 | |||
3928 | build_subroutines(ctx); | ||
3929 | |||
3930 | |.code_op | ||
3931 | for (op = 0; op < BC__MAX; op++) | ||
3932 | build_ins(ctx, (BCOp)op, op); | ||
3933 | |||
3934 | return BC__MAX; | ||
3935 | } | ||
3936 | |||
3937 | /* Emit pseudo frame-info for all assembler functions. */ | ||
3938 | static void emit_asm_debug(BuildCtx *ctx) | ||
3939 | { | ||
3940 | int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); | ||
3941 | int i; | ||
3942 | switch (ctx->mode) { | ||
3943 | case BUILD_elfasm: | ||
3944 | fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n"); | ||
3945 | fprintf(ctx->fp, | ||
3946 | ".Lframe0:\n" | ||
3947 | "\t.long .LECIE0-.LSCIE0\n" | ||
3948 | ".LSCIE0:\n" | ||
3949 | "\t.long 0xffffffff\n" | ||
3950 | "\t.byte 0x1\n" | ||
3951 | "\t.string \"\"\n" | ||
3952 | "\t.uleb128 0x1\n" | ||
3953 | "\t.sleb128 -8\n" | ||
3954 | "\t.byte 30\n" /* Return address is in lr. */ | ||
3955 | "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ | ||
3956 | "\t.align 3\n" | ||
3957 | ".LECIE0:\n\n"); | ||
3958 | fprintf(ctx->fp, | ||
3959 | ".LSFDE0:\n" | ||
3960 | "\t.long .LEFDE0-.LASFDE0\n" | ||
3961 | ".LASFDE0:\n" | ||
3962 | "\t.long .Lframe0\n" | ||
3963 | "\t.quad .Lbegin\n" | ||
3964 | "\t.quad %d\n" | ||
3965 | "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ | ||
3966 | "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ | ||
3967 | fcofs); | ||
3968 | for (i = 19; i <= 28; i++) /* offset x19-x28 */ | ||
3969 | fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); | ||
3970 | for (i = 8; i <= 15; i++) /* offset d8-d15 */ | ||
3971 | fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", | ||
3972 | 64+i, i+(3+(28-19+1)-8)); | ||
3973 | fprintf(ctx->fp, | ||
3974 | "\t.align 3\n" | ||
3975 | ".LEFDE0:\n\n"); | ||
3976 | #if LJ_HASFFI | ||
3977 | fprintf(ctx->fp, | ||
3978 | ".LSFDE1:\n" | ||
3979 | "\t.long .LEFDE1-.LASFDE1\n" | ||
3980 | ".LASFDE1:\n" | ||
3981 | "\t.long .Lframe0\n" | ||
3982 | "\t.quad lj_vm_ffi_call\n" | ||
3983 | "\t.quad %d\n" | ||
3984 | "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ | ||
3985 | "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ | ||
3986 | "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ | ||
3987 | "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ | ||
3988 | "\t.align 3\n" | ||
3989 | ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); | ||
3990 | #endif | ||
3991 | fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n"); | ||
3992 | fprintf(ctx->fp, | ||
3993 | ".Lframe1:\n" | ||
3994 | "\t.long .LECIE1-.LSCIE1\n" | ||
3995 | ".LSCIE1:\n" | ||
3996 | "\t.long 0\n" | ||
3997 | "\t.byte 0x1\n" | ||
3998 | "\t.string \"zPR\"\n" | ||
3999 | "\t.uleb128 0x1\n" | ||
4000 | "\t.sleb128 -8\n" | ||
4001 | "\t.byte 30\n" /* Return address is in lr. */ | ||
4002 | "\t.uleb128 6\n" /* augmentation length */ | ||
4003 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
4004 | "\t.long lj_err_unwind_dwarf-.\n" | ||
4005 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
4006 | "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ | ||
4007 | "\t.align 3\n" | ||
4008 | ".LECIE1:\n\n"); | ||
4009 | fprintf(ctx->fp, | ||
4010 | ".LSFDE2:\n" | ||
4011 | "\t.long .LEFDE2-.LASFDE2\n" | ||
4012 | ".LASFDE2:\n" | ||
4013 | "\t.long .LASFDE2-.Lframe1\n" | ||
4014 | "\t.long .Lbegin-.\n" | ||
4015 | "\t.long %d\n" | ||
4016 | "\t.uleb128 0\n" /* augmentation length */ | ||
4017 | "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ | ||
4018 | "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ | ||
4019 | fcofs); | ||
4020 | for (i = 19; i <= 28; i++) /* offset x19-x28 */ | ||
4021 | fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); | ||
4022 | for (i = 8; i <= 15; i++) /* offset d8-d15 */ | ||
4023 | fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", | ||
4024 | 64+i, i+(3+(28-19+1)-8)); | ||
4025 | fprintf(ctx->fp, | ||
4026 | "\t.align 3\n" | ||
4027 | ".LEFDE2:\n\n"); | ||
4028 | #if LJ_HASFFI | ||
4029 | fprintf(ctx->fp, | ||
4030 | ".Lframe2:\n" | ||
4031 | "\t.long .LECIE2-.LSCIE2\n" | ||
4032 | ".LSCIE2:\n" | ||
4033 | "\t.long 0\n" | ||
4034 | "\t.byte 0x1\n" | ||
4035 | "\t.string \"zR\"\n" | ||
4036 | "\t.uleb128 0x1\n" | ||
4037 | "\t.sleb128 -8\n" | ||
4038 | "\t.byte 30\n" /* Return address is in lr. */ | ||
4039 | "\t.uleb128 1\n" /* augmentation length */ | ||
4040 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
4041 | "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ | ||
4042 | "\t.align 3\n" | ||
4043 | ".LECIE2:\n\n"); | ||
4044 | fprintf(ctx->fp, | ||
4045 | ".LSFDE3:\n" | ||
4046 | "\t.long .LEFDE3-.LASFDE3\n" | ||
4047 | ".LASFDE3:\n" | ||
4048 | "\t.long .LASFDE3-.Lframe2\n" | ||
4049 | "\t.long lj_vm_ffi_call-.\n" | ||
4050 | "\t.long %d\n" | ||
4051 | "\t.uleb128 0\n" /* augmentation length */ | ||
4052 | "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ | ||
4053 | "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ | ||
4054 | "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ | ||
4055 | "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ | ||
4056 | "\t.align 3\n" | ||
4057 | ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); | ||
4058 | #endif | ||
4059 | break; | ||
4060 | #if !LJ_NO_UNWIND | ||
4061 | case BUILD_machasm: { | ||
4062 | #if LJ_HASFFI | ||
4063 | int fcsize = 0; | ||
4064 | #endif | ||
4065 | int j; | ||
4066 | fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); | ||
4067 | fprintf(ctx->fp, | ||
4068 | "EH_frame1:\n" | ||
4069 | "\t.set L$set$x,LECIEX-LSCIEX\n" | ||
4070 | "\t.long L$set$x\n" | ||
4071 | "LSCIEX:\n" | ||
4072 | "\t.long 0\n" | ||
4073 | "\t.byte 0x1\n" | ||
4074 | "\t.ascii \"zPR\\0\"\n" | ||
4075 | "\t.uleb128 0x1\n" | ||
4076 | "\t.sleb128 -8\n" | ||
4077 | "\t.byte 30\n" /* Return address is in lr. */ | ||
4078 | "\t.uleb128 6\n" /* augmentation length */ | ||
4079 | "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ | ||
4080 | "\t.long _lj_err_unwind_dwarf@GOT-.\n" | ||
4081 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
4082 | "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ | ||
4083 | "\t.align 3\n" | ||
4084 | "LECIEX:\n\n"); | ||
4085 | for (j = 0; j < ctx->nsym; j++) { | ||
4086 | const char *name = ctx->sym[j].name; | ||
4087 | int32_t size = ctx->sym[j+1].ofs - ctx->sym[j].ofs; | ||
4088 | if (size == 0) continue; | ||
4089 | #if LJ_HASFFI | ||
4090 | if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } | ||
4091 | #endif | ||
4092 | fprintf(ctx->fp, | ||
4093 | "LSFDE%d:\n" | ||
4094 | "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" | ||
4095 | "\t.long L$set$%d\n" | ||
4096 | "LASFDE%d:\n" | ||
4097 | "\t.long LASFDE%d-EH_frame1\n" | ||
4098 | "\t.long %s-.\n" | ||
4099 | "\t.long %d\n" | ||
4100 | "\t.uleb128 0\n" /* augmentation length */ | ||
4101 | "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ | ||
4102 | "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ | ||
4103 | j, j, j, j, j, j, j, name, size); | ||
4104 | for (i = 19; i <= 28; i++) /* offset x19-x28 */ | ||
4105 | fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); | ||
4106 | for (i = 8; i <= 15; i++) /* offset d8-d15 */ | ||
4107 | fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", | ||
4108 | 64+i, i+(3+(28-19+1)-8)); | ||
4109 | fprintf(ctx->fp, | ||
4110 | "\t.align 3\n" | ||
4111 | "LEFDE%d:\n\n", j); | ||
4112 | } | ||
4113 | #if LJ_HASFFI | ||
4114 | if (fcsize) { | ||
4115 | fprintf(ctx->fp, | ||
4116 | "EH_frame2:\n" | ||
4117 | "\t.set L$set$y,LECIEY-LSCIEY\n" | ||
4118 | "\t.long L$set$y\n" | ||
4119 | "LSCIEY:\n" | ||
4120 | "\t.long 0\n" | ||
4121 | "\t.byte 0x1\n" | ||
4122 | "\t.ascii \"zR\\0\"\n" | ||
4123 | "\t.uleb128 0x1\n" | ||
4124 | "\t.sleb128 -8\n" | ||
4125 | "\t.byte 30\n" /* Return address is in lr. */ | ||
4126 | "\t.uleb128 1\n" /* augmentation length */ | ||
4127 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
4128 | "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ | ||
4129 | "\t.align 3\n" | ||
4130 | "LECIEY:\n\n"); | ||
4131 | fprintf(ctx->fp, | ||
4132 | "LSFDEY:\n" | ||
4133 | "\t.set L$set$yy,LEFDEY-LASFDEY\n" | ||
4134 | "\t.long L$set$yy\n" | ||
4135 | "LASFDEY:\n" | ||
4136 | "\t.long LASFDEY-EH_frame2\n" | ||
4137 | "\t.long _lj_vm_ffi_call-.\n" | ||
4138 | "\t.long %d\n" | ||
4139 | "\t.uleb128 0\n" /* augmentation length */ | ||
4140 | "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ | ||
4141 | "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ | ||
4142 | "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ | ||
4143 | "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ | ||
4144 | "\t.align 3\n" | ||
4145 | "LEFDEY:\n\n", fcsize); | ||
4146 | } | ||
4147 | #endif | ||
4148 | fprintf(ctx->fp, ".subsections_via_symbols\n"); | ||
4149 | } | ||
4150 | break; | ||
4151 | #endif | ||
4152 | default: | ||
4153 | break; | ||
4154 | } | ||
4155 | } | ||
4156 | |||
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 866b8e3d..34645bf1 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc | |||
@@ -1,6 +1,9 @@ | |||
1 | |// Low-level VM code for MIPS CPUs. | 1 | |// Low-level VM code for MIPS CPUs. |
2 | |// Bytecode interpreter, fast functions and helper functions. | 2 | |// Bytecode interpreter, fast functions and helper functions. |
3 | |// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | 3 | |// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h |
4 | |// | ||
5 | |// MIPS soft-float support contributed by Djordje Kovacevic and | ||
6 | |// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc. | ||
4 | | | 7 | | |
5 | |.arch mips | 8 | |.arch mips |
6 | |.section code_op, code_sub | 9 | |.section code_op, code_sub |
@@ -18,6 +21,12 @@ | |||
18 | |// Fixed register assignments for the interpreter. | 21 | |// Fixed register assignments for the interpreter. |
19 | |// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra | 22 | |// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra |
20 | | | 23 | | |
24 | |.macro .FPU, a, b | ||
25 | |.if FPU | ||
26 | | a, b | ||
27 | |.endif | ||
28 | |.endmacro | ||
29 | | | ||
21 | |// The following must be C callee-save (but BASE is often refetched). | 30 | |// The following must be C callee-save (but BASE is often refetched). |
22 | |.define BASE, r16 // Base of current Lua stack frame. | 31 | |.define BASE, r16 // Base of current Lua stack frame. |
23 | |.define KBASE, r17 // Constants of current Lua function. | 32 | |.define KBASE, r17 // Constants of current Lua function. |
@@ -25,13 +34,15 @@ | |||
25 | |.define DISPATCH, r19 // Opcode dispatch table. | 34 | |.define DISPATCH, r19 // Opcode dispatch table. |
26 | |.define LREG, r20 // Register holding lua_State (also in SAVE_L). | 35 | |.define LREG, r20 // Register holding lua_State (also in SAVE_L). |
27 | |.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. | 36 | |.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. |
28 | |// NYI: r22 currently unused. | ||
29 | | | 37 | | |
30 | |.define JGL, r30 // On-trace: global_State + 32768. | 38 | |.define JGL, r30 // On-trace: global_State + 32768. |
31 | | | 39 | | |
32 | |// Constants for type-comparisons, stores and conversions. C callee-save. | 40 | |// Constants for type-comparisons, stores and conversions. C callee-save. |
41 | |.define TISNUM, r22 | ||
33 | |.define TISNIL, r30 | 42 | |.define TISNIL, r30 |
43 | |.if FPU | ||
34 | |.define TOBIT, f30 // 2^52 + 2^51. | 44 | |.define TOBIT, f30 // 2^52 + 2^51. |
45 | |.endif | ||
35 | | | 46 | | |
36 | |// The following temporaries are not saved across C calls, except for RA. | 47 | |// The following temporaries are not saved across C calls, except for RA. |
37 | |.define RA, r23 // Callee-save. | 48 | |.define RA, r23 // Callee-save. |
@@ -46,7 +57,7 @@ | |||
46 | |.define TMP2, r14 | 57 | |.define TMP2, r14 |
47 | |.define TMP3, r15 | 58 | |.define TMP3, r15 |
48 | | | 59 | | |
49 | |// Calling conventions. | 60 | |// MIPS o32 calling convention. |
50 | |.define CFUNCADDR, r25 | 61 | |.define CFUNCADDR, r25 |
51 | |.define CARG1, r4 | 62 | |.define CARG1, r4 |
52 | |.define CARG2, r5 | 63 | |.define CARG2, r5 |
@@ -56,13 +67,33 @@ | |||
56 | |.define CRET1, r2 | 67 | |.define CRET1, r2 |
57 | |.define CRET2, r3 | 68 | |.define CRET2, r3 |
58 | | | 69 | | |
70 | |.if ENDIAN_LE | ||
71 | |.define SFRETLO, CRET1 | ||
72 | |.define SFRETHI, CRET2 | ||
73 | |.define SFARG1LO, CARG1 | ||
74 | |.define SFARG1HI, CARG2 | ||
75 | |.define SFARG2LO, CARG3 | ||
76 | |.define SFARG2HI, CARG4 | ||
77 | |.else | ||
78 | |.define SFRETLO, CRET2 | ||
79 | |.define SFRETHI, CRET1 | ||
80 | |.define SFARG1LO, CARG2 | ||
81 | |.define SFARG1HI, CARG1 | ||
82 | |.define SFARG2LO, CARG4 | ||
83 | |.define SFARG2HI, CARG3 | ||
84 | |.endif | ||
85 | | | ||
86 | |.if FPU | ||
59 | |.define FARG1, f12 | 87 | |.define FARG1, f12 |
60 | |.define FARG2, f14 | 88 | |.define FARG2, f14 |
61 | | | 89 | | |
62 | |.define FRET1, f0 | 90 | |.define FRET1, f0 |
63 | |.define FRET2, f2 | 91 | |.define FRET2, f2 |
92 | |.endif | ||
64 | | | 93 | | |
65 | |// Stack layout while in interpreter. Must match with lj_frame.h. | 94 | |// Stack layout while in interpreter. Must match with lj_frame.h. |
95 | |.if FPU // MIPS32 hard-float. | ||
96 | | | ||
66 | |.define CFRAME_SPACE, 112 // Delta for sp. | 97 | |.define CFRAME_SPACE, 112 // Delta for sp. |
67 | | | 98 | | |
68 | |.define SAVE_ERRF, 124(sp) // 32 bit C frame info. | 99 | |.define SAVE_ERRF, 124(sp) // 32 bit C frame info. |
@@ -72,6 +103,20 @@ | |||
72 | |//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. | 103 | |//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. |
73 | |.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves. | 104 | |.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves. |
74 | |.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves. | 105 | |.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves. |
106 | | | ||
107 | |.else // MIPS32 soft-float | ||
108 | | | ||
109 | |.define CFRAME_SPACE, 64 // Delta for sp. | ||
110 | | | ||
111 | |.define SAVE_ERRF, 76(sp) // 32 bit C frame info. | ||
112 | |.define SAVE_NRES, 72(sp) | ||
113 | |.define SAVE_CFRAME, 68(sp) | ||
114 | |.define SAVE_L, 64(sp) | ||
115 | |//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. | ||
116 | |.define SAVE_GPR_, 24 // .. 24+10*4: 32 bit GPR saves. | ||
117 | | | ||
118 | |.endif | ||
119 | | | ||
75 | |.define SAVE_PC, 20(sp) | 120 | |.define SAVE_PC, 20(sp) |
76 | |.define ARG5, 16(sp) | 121 | |.define ARG5, 16(sp) |
77 | |.define CSAVE_4, 12(sp) | 122 | |.define CSAVE_4, 12(sp) |
@@ -83,43 +128,45 @@ | |||
83 | |.define ARG5_OFS, 16 | 128 | |.define ARG5_OFS, 16 |
84 | |.define SAVE_MULTRES, ARG5 | 129 | |.define SAVE_MULTRES, ARG5 |
85 | | | 130 | | |
131 | |//----------------------------------------------------------------------- | ||
132 | | | ||
86 | |.macro saveregs | 133 | |.macro saveregs |
87 | | addiu sp, sp, -CFRAME_SPACE | 134 | | addiu sp, sp, -CFRAME_SPACE |
88 | | sw ra, SAVE_GPR_+9*4(sp) | 135 | | sw ra, SAVE_GPR_+9*4(sp) |
89 | | sw r30, SAVE_GPR_+8*4(sp) | 136 | | sw r30, SAVE_GPR_+8*4(sp) |
90 | | sdc1 f30, SAVE_FPR_+5*8(sp) | 137 | | .FPU sdc1 f30, SAVE_FPR_+5*8(sp) |
91 | | sw r23, SAVE_GPR_+7*4(sp) | 138 | | sw r23, SAVE_GPR_+7*4(sp) |
92 | | sw r22, SAVE_GPR_+6*4(sp) | 139 | | sw r22, SAVE_GPR_+6*4(sp) |
93 | | sdc1 f28, SAVE_FPR_+4*8(sp) | 140 | | .FPU sdc1 f28, SAVE_FPR_+4*8(sp) |
94 | | sw r21, SAVE_GPR_+5*4(sp) | 141 | | sw r21, SAVE_GPR_+5*4(sp) |
95 | | sw r20, SAVE_GPR_+4*4(sp) | 142 | | sw r20, SAVE_GPR_+4*4(sp) |
96 | | sdc1 f26, SAVE_FPR_+3*8(sp) | 143 | | .FPU sdc1 f26, SAVE_FPR_+3*8(sp) |
97 | | sw r19, SAVE_GPR_+3*4(sp) | 144 | | sw r19, SAVE_GPR_+3*4(sp) |
98 | | sw r18, SAVE_GPR_+2*4(sp) | 145 | | sw r18, SAVE_GPR_+2*4(sp) |
99 | | sdc1 f24, SAVE_FPR_+2*8(sp) | 146 | | .FPU sdc1 f24, SAVE_FPR_+2*8(sp) |
100 | | sw r17, SAVE_GPR_+1*4(sp) | 147 | | sw r17, SAVE_GPR_+1*4(sp) |
101 | | sw r16, SAVE_GPR_+0*4(sp) | 148 | | sw r16, SAVE_GPR_+0*4(sp) |
102 | | sdc1 f22, SAVE_FPR_+1*8(sp) | 149 | | .FPU sdc1 f22, SAVE_FPR_+1*8(sp) |
103 | | sdc1 f20, SAVE_FPR_+0*8(sp) | 150 | | .FPU sdc1 f20, SAVE_FPR_+0*8(sp) |
104 | |.endmacro | 151 | |.endmacro |
105 | | | 152 | | |
106 | |.macro restoreregs_ret | 153 | |.macro restoreregs_ret |
107 | | lw ra, SAVE_GPR_+9*4(sp) | 154 | | lw ra, SAVE_GPR_+9*4(sp) |
108 | | lw r30, SAVE_GPR_+8*4(sp) | 155 | | lw r30, SAVE_GPR_+8*4(sp) |
109 | | ldc1 f30, SAVE_FPR_+5*8(sp) | 156 | | .FPU ldc1 f30, SAVE_FPR_+5*8(sp) |
110 | | lw r23, SAVE_GPR_+7*4(sp) | 157 | | lw r23, SAVE_GPR_+7*4(sp) |
111 | | lw r22, SAVE_GPR_+6*4(sp) | 158 | | lw r22, SAVE_GPR_+6*4(sp) |
112 | | ldc1 f28, SAVE_FPR_+4*8(sp) | 159 | | .FPU ldc1 f28, SAVE_FPR_+4*8(sp) |
113 | | lw r21, SAVE_GPR_+5*4(sp) | 160 | | lw r21, SAVE_GPR_+5*4(sp) |
114 | | lw r20, SAVE_GPR_+4*4(sp) | 161 | | lw r20, SAVE_GPR_+4*4(sp) |
115 | | ldc1 f26, SAVE_FPR_+3*8(sp) | 162 | | .FPU ldc1 f26, SAVE_FPR_+3*8(sp) |
116 | | lw r19, SAVE_GPR_+3*4(sp) | 163 | | lw r19, SAVE_GPR_+3*4(sp) |
117 | | lw r18, SAVE_GPR_+2*4(sp) | 164 | | lw r18, SAVE_GPR_+2*4(sp) |
118 | | ldc1 f24, SAVE_FPR_+2*8(sp) | 165 | | .FPU ldc1 f24, SAVE_FPR_+2*8(sp) |
119 | | lw r17, SAVE_GPR_+1*4(sp) | 166 | | lw r17, SAVE_GPR_+1*4(sp) |
120 | | lw r16, SAVE_GPR_+0*4(sp) | 167 | | lw r16, SAVE_GPR_+0*4(sp) |
121 | | ldc1 f22, SAVE_FPR_+1*8(sp) | 168 | | .FPU ldc1 f22, SAVE_FPR_+1*8(sp) |
122 | | ldc1 f20, SAVE_FPR_+0*8(sp) | 169 | | .FPU ldc1 f20, SAVE_FPR_+0*8(sp) |
123 | | jr ra | 170 | | jr ra |
124 | | addiu sp, sp, CFRAME_SPACE | 171 | | addiu sp, sp, CFRAME_SPACE |
125 | |.endmacro | 172 | |.endmacro |
@@ -138,11 +185,12 @@ | |||
138 | |.type NODE, Node | 185 | |.type NODE, Node |
139 | |.type NARGS8, int | 186 | |.type NARGS8, int |
140 | |.type TRACE, GCtrace | 187 | |.type TRACE, GCtrace |
188 | |.type SBUF, SBuf | ||
141 | | | 189 | | |
142 | |//----------------------------------------------------------------------- | 190 | |//----------------------------------------------------------------------- |
143 | | | 191 | | |
144 | |// Trap for not-yet-implemented parts. | 192 | |// Trap for not-yet-implemented parts. |
145 | |.macro NYI; .long 0xf0f0f0f0; .endmacro | 193 | |.macro NYI; .long 0xec1cf0f0; .endmacro |
146 | | | 194 | | |
147 | |// Macros to mark delay slots. | 195 | |// Macros to mark delay slots. |
148 | |.macro ., a; a; .endmacro | 196 | |.macro ., a; a; .endmacro |
@@ -152,13 +200,23 @@ | |||
152 | |//----------------------------------------------------------------------- | 200 | |//----------------------------------------------------------------------- |
153 | | | 201 | | |
154 | |// Endian-specific defines. | 202 | |// Endian-specific defines. |
155 | |.define FRAME_PC, LJ_ENDIAN_SELECT(-4,-8) | 203 | |.if ENDIAN_LE |
156 | |.define FRAME_FUNC, LJ_ENDIAN_SELECT(-8,-4) | 204 | |.define FRAME_PC, -4 |
157 | |.define HI, LJ_ENDIAN_SELECT(4,0) | 205 | |.define FRAME_FUNC, -8 |
158 | |.define LO, LJ_ENDIAN_SELECT(0,4) | 206 | |.define HI, 4 |
159 | |.define OFS_RD, LJ_ENDIAN_SELECT(2,0) | 207 | |.define LO, 0 |
160 | |.define OFS_RA, LJ_ENDIAN_SELECT(1,2) | 208 | |.define OFS_RD, 2 |
161 | |.define OFS_OP, LJ_ENDIAN_SELECT(0,3) | 209 | |.define OFS_RA, 1 |
210 | |.define OFS_OP, 0 | ||
211 | |.else | ||
212 | |.define FRAME_PC, -8 | ||
213 | |.define FRAME_FUNC, -4 | ||
214 | |.define HI, 0 | ||
215 | |.define LO, 4 | ||
216 | |.define OFS_RD, 0 | ||
217 | |.define OFS_RA, 2 | ||
218 | |.define OFS_OP, 3 | ||
219 | |.endif | ||
162 | | | 220 | | |
163 | |// Instruction decode. | 221 | |// Instruction decode. |
164 | |.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro | 222 | |.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro |
@@ -353,9 +411,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
353 | |. sll TMP2, TMP2, 3 | 411 | |. sll TMP2, TMP2, 3 |
354 | |1: | 412 | |1: |
355 | | addiu TMP1, TMP1, -8 | 413 | | addiu TMP1, TMP1, -8 |
356 | | ldc1 f0, 0(RA) | 414 | | lw SFRETHI, HI(RA) |
415 | | lw SFRETLO, LO(RA) | ||
357 | | addiu RA, RA, 8 | 416 | | addiu RA, RA, 8 |
358 | | sdc1 f0, 0(BASE) | 417 | | sw SFRETHI, HI(BASE) |
418 | | sw SFRETLO, LO(BASE) | ||
359 | | bnez TMP1, <1 | 419 | | bnez TMP1, <1 |
360 | |. addiu BASE, BASE, 8 | 420 | |. addiu BASE, BASE, 8 |
361 | | | 421 | | |
@@ -424,15 +484,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
424 | | and sp, CARG1, AT | 484 | | and sp, CARG1, AT |
425 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. | 485 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. |
426 | | lw L, SAVE_L | 486 | | lw L, SAVE_L |
427 | | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 487 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
488 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. | ||
428 | | li TISNIL, LJ_TNIL | 489 | | li TISNIL, LJ_TNIL |
429 | | lw BASE, L->base | 490 | | lw BASE, L->base |
430 | | lw DISPATCH, L->glref // Setup pointer to dispatch table. | 491 | | lw DISPATCH, L->glref // Setup pointer to dispatch table. |
431 | | mtc1 TMP3, TOBIT | 492 | | .FPU mtc1 TMP3, TOBIT |
432 | | li TMP1, LJ_TFALSE | 493 | | li TMP1, LJ_TFALSE |
433 | | li_vmstate INTERP | 494 | | li_vmstate INTERP |
434 | | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame. | 495 | | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame. |
435 | | cvt.d.s TOBIT, TOBIT | 496 | | .FPU cvt.d.s TOBIT, TOBIT |
436 | | addiu RA, BASE, -8 // Results start at BASE-8. | 497 | | addiu RA, BASE, -8 // Results start at BASE-8. |
437 | | addiu DISPATCH, DISPATCH, GG_G2DISP | 498 | | addiu DISPATCH, DISPATCH, GG_G2DISP |
438 | | sw TMP1, HI(RA) // Prepend false to error message. | 499 | | sw TMP1, HI(RA) // Prepend false to error message. |
@@ -440,6 +501,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
440 | | b ->vm_returnc | 501 | | b ->vm_returnc |
441 | |. li RD, 16 // 2 results: false + error message. | 502 | |. li RD, 16 // 2 results: false + error message. |
442 | | | 503 | | |
504 | |->vm_unwind_stub: // Jump to exit stub from unwinder. | ||
505 | | jr CARG1 | ||
506 | |. move ra, CARG2 | ||
507 | | | ||
443 | |//----------------------------------------------------------------------- | 508 | |//----------------------------------------------------------------------- |
444 | |//-- Grow stack for calls ----------------------------------------------- | 509 | |//-- Grow stack for calls ----------------------------------------------- |
445 | |//----------------------------------------------------------------------- | 510 | |//----------------------------------------------------------------------- |
@@ -486,21 +551,23 @@ static void build_subroutines(BuildCtx *ctx) | |||
486 | | addiu DISPATCH, DISPATCH, GG_G2DISP | 551 | | addiu DISPATCH, DISPATCH, GG_G2DISP |
487 | | sw r0, SAVE_NRES | 552 | | sw r0, SAVE_NRES |
488 | | sw r0, SAVE_ERRF | 553 | | sw r0, SAVE_ERRF |
489 | | sw TMP0, L->cframe | 554 | | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. |
490 | | sw r0, SAVE_CFRAME | 555 | | sw r0, SAVE_CFRAME |
491 | | beqz TMP1, >3 | 556 | | beqz TMP1, >3 |
492 | |. sw CARG1, SAVE_PC // Any value outside of bytecode is ok. | 557 | |. sw TMP0, L->cframe |
493 | | | 558 | | |
494 | | // Resume after yield (like a return). | 559 | | // Resume after yield (like a return). |
560 | | sw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
495 | | move RA, BASE | 561 | | move RA, BASE |
496 | | lw BASE, L->base | 562 | | lw BASE, L->base |
563 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. | ||
497 | | lw TMP1, L->top | 564 | | lw TMP1, L->top |
498 | | lw PC, FRAME_PC(BASE) | 565 | | lw PC, FRAME_PC(BASE) |
499 | | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 566 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
500 | | subu RD, TMP1, BASE | 567 | | subu RD, TMP1, BASE |
501 | | mtc1 TMP3, TOBIT | 568 | | .FPU mtc1 TMP3, TOBIT |
502 | | sb r0, L->status | 569 | | sb r0, L->status |
503 | | cvt.d.s TOBIT, TOBIT | 570 | | .FPU cvt.d.s TOBIT, TOBIT |
504 | | li_vmstate INTERP | 571 | | li_vmstate INTERP |
505 | | addiu RD, RD, 8 | 572 | | addiu RD, RD, 8 |
506 | | st_vmstate | 573 | | st_vmstate |
@@ -525,25 +592,27 @@ static void build_subroutines(BuildCtx *ctx) | |||
525 | | | 592 | | |
526 | |1: // Entry point for vm_pcall above (PC = ftype). | 593 | |1: // Entry point for vm_pcall above (PC = ftype). |
527 | | lw TMP1, L:CARG1->cframe | 594 | | lw TMP1, L:CARG1->cframe |
528 | | sw CARG3, SAVE_NRES | ||
529 | | move L, CARG1 | 595 | | move L, CARG1 |
530 | | sw CARG1, SAVE_L | 596 | | sw CARG3, SAVE_NRES |
531 | | move BASE, CARG2 | ||
532 | | sw sp, L->cframe // Add our C frame to cframe chain. | ||
533 | | lw DISPATCH, L->glref // Setup pointer to dispatch table. | 597 | | lw DISPATCH, L->glref // Setup pointer to dispatch table. |
598 | | sw CARG1, SAVE_L | ||
599 | | move BASE, CARG2 | ||
600 | | addiu DISPATCH, DISPATCH, GG_G2DISP | ||
534 | | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. | 601 | | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. |
535 | | sw TMP1, SAVE_CFRAME | 602 | | sw TMP1, SAVE_CFRAME |
536 | | addiu DISPATCH, DISPATCH, GG_G2DISP | 603 | | sw sp, L->cframe // Add our C frame to cframe chain. |
537 | | | 604 | | |
538 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). | 605 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). |
606 | | sw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
539 | | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). | 607 | | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). |
540 | | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 608 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. |
609 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | ||
541 | | lw TMP1, L->top | 610 | | lw TMP1, L->top |
542 | | mtc1 TMP3, TOBIT | 611 | | .FPU mtc1 TMP3, TOBIT |
543 | | addu PC, PC, BASE | 612 | | addu PC, PC, BASE |
544 | | subu NARGS8:RC, TMP1, BASE | 613 | | subu NARGS8:RC, TMP1, BASE |
545 | | subu PC, PC, TMP2 // PC = frame delta + frame type | 614 | | subu PC, PC, TMP2 // PC = frame delta + frame type |
546 | | cvt.d.s TOBIT, TOBIT | 615 | | .FPU cvt.d.s TOBIT, TOBIT |
547 | | li_vmstate INTERP | 616 | | li_vmstate INTERP |
548 | | li TISNIL, LJ_TNIL | 617 | | li TISNIL, LJ_TNIL |
549 | | st_vmstate | 618 | | st_vmstate |
@@ -566,20 +635,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
566 | | lw TMP0, L:CARG1->stack | 635 | | lw TMP0, L:CARG1->stack |
567 | | sw CARG1, SAVE_L | 636 | | sw CARG1, SAVE_L |
568 | | lw TMP1, L->top | 637 | | lw TMP1, L->top |
638 | | lw DISPATCH, L->glref // Setup pointer to dispatch table. | ||
569 | | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. | 639 | | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. |
570 | | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). | 640 | | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). |
571 | | lw TMP1, L->cframe | 641 | | lw TMP1, L->cframe |
572 | | sw sp, L->cframe // Add our C frame to cframe chain. | 642 | | addiu DISPATCH, DISPATCH, GG_G2DISP |
573 | | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. | 643 | | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. |
574 | | sw r0, SAVE_ERRF // No error function. | 644 | | sw r0, SAVE_ERRF // No error function. |
575 | | move CFUNCADDR, CARG4 | 645 | | sw TMP1, SAVE_CFRAME |
646 | | sw sp, L->cframe // Add our C frame to cframe chain. | ||
647 | | sw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
576 | | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) | 648 | | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) |
577 | |. sw TMP1, SAVE_CFRAME | 649 | |. move CFUNCADDR, CARG4 |
578 | | move BASE, CRET1 | 650 | | move BASE, CRET1 |
579 | | lw DISPATCH, L->glref // Setup pointer to dispatch table. | ||
580 | | li PC, FRAME_CP | ||
581 | | bnez CRET1, <3 // Else continue with the call. | 651 | | bnez CRET1, <3 // Else continue with the call. |
582 | |. addiu DISPATCH, DISPATCH, GG_G2DISP | 652 | |. li PC, FRAME_CP |
583 | | b ->vm_leave_cp // No base? Just remove C frame. | 653 | | b ->vm_leave_cp // No base? Just remove C frame. |
584 | |. nop | 654 | |. nop |
585 | | | 655 | | |
@@ -624,7 +694,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
624 | |->cont_cat: // RA = resultptr, RB = meta base | 694 | |->cont_cat: // RA = resultptr, RB = meta base |
625 | | lw INS, -4(PC) | 695 | | lw INS, -4(PC) |
626 | | addiu CARG2, RB, -16 | 696 | | addiu CARG2, RB, -16 |
627 | | ldc1 f0, 0(RA) | 697 | | lw SFRETHI, HI(RA) |
698 | | lw SFRETLO, LO(RA) | ||
628 | | decode_RB8a MULTRES, INS | 699 | | decode_RB8a MULTRES, INS |
629 | | decode_RA8a RA, INS | 700 | | decode_RA8a RA, INS |
630 | | decode_RB8b MULTRES | 701 | | decode_RB8b MULTRES |
@@ -632,11 +703,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
632 | | addu TMP1, BASE, MULTRES | 703 | | addu TMP1, BASE, MULTRES |
633 | | sw BASE, L->base | 704 | | sw BASE, L->base |
634 | | subu CARG3, CARG2, TMP1 | 705 | | subu CARG3, CARG2, TMP1 |
706 | | sw SFRETHI, HI(CARG2) | ||
635 | | bne TMP1, CARG2, ->BC_CAT_Z | 707 | | bne TMP1, CARG2, ->BC_CAT_Z |
636 | |. sdc1 f0, 0(CARG2) | 708 | |. sw SFRETLO, LO(CARG2) |
637 | | addu RA, BASE, RA | 709 | | addu RA, BASE, RA |
710 | | sw SFRETHI, HI(RA) | ||
638 | | b ->cont_nop | 711 | | b ->cont_nop |
639 | |. sdc1 f0, 0(RA) | 712 | |. sw SFRETLO, LO(RA) |
640 | | | 713 | | |
641 | |//-- Table indexing metamethods ----------------------------------------- | 714 | |//-- Table indexing metamethods ----------------------------------------- |
642 | | | 715 | | |
@@ -659,10 +732,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
659 | |. sw TMP1, HI(CARG3) | 732 | |. sw TMP1, HI(CARG3) |
660 | | | 733 | | |
661 | |->vmeta_tgetb: // TMP0 = index | 734 | |->vmeta_tgetb: // TMP0 = index |
662 | | mtc1 TMP0, f0 | ||
663 | | cvt.d.w f0, f0 | ||
664 | | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | 735 | | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) |
665 | | sdc1 f0, 0(CARG3) | 736 | | sw TMP0, LO(CARG3) |
737 | | sw TISNUM, HI(CARG3) | ||
666 | | | 738 | | |
667 | |->vmeta_tgetv: | 739 | |->vmeta_tgetv: |
668 | |1: | 740 | |1: |
@@ -674,9 +746,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
674 | | // Returns TValue * (finished) or NULL (metamethod). | 746 | | // Returns TValue * (finished) or NULL (metamethod). |
675 | | beqz CRET1, >3 | 747 | | beqz CRET1, >3 |
676 | |. addiu TMP1, BASE, -FRAME_CONT | 748 | |. addiu TMP1, BASE, -FRAME_CONT |
677 | | ldc1 f0, 0(CRET1) | 749 | | lw SFARG1HI, HI(CRET1) |
750 | | lw SFARG2HI, LO(CRET1) | ||
678 | | ins_next1 | 751 | | ins_next1 |
679 | | sdc1 f0, 0(RA) | 752 | | sw SFARG1HI, HI(RA) |
753 | | sw SFARG2HI, LO(RA) | ||
680 | | ins_next2 | 754 | | ins_next2 |
681 | | | 755 | | |
682 | |3: // Call __index metamethod. | 756 | |3: // Call __index metamethod. |
@@ -688,6 +762,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
688 | | b ->vm_call_dispatch_f | 762 | | b ->vm_call_dispatch_f |
689 | |. li NARGS8:RC, 16 // 2 args for func(t, k). | 763 | |. li NARGS8:RC, 16 // 2 args for func(t, k). |
690 | | | 764 | | |
765 | |->vmeta_tgetr: | ||
766 | | load_got lj_tab_getinth | ||
767 | | call_intern lj_tab_getinth // (GCtab *t, int32_t key) | ||
768 | |. nop | ||
769 | | // Returns cTValue * or NULL. | ||
770 | | beqz CRET1, ->BC_TGETR_Z | ||
771 | |. move SFARG2HI, TISNIL | ||
772 | | lw SFARG2HI, HI(CRET1) | ||
773 | | b ->BC_TGETR_Z | ||
774 | |. lw SFARG2LO, LO(CRET1) | ||
775 | | | ||
691 | |//----------------------------------------------------------------------- | 776 | |//----------------------------------------------------------------------- |
692 | | | 777 | | |
693 | |->vmeta_tsets1: | 778 | |->vmeta_tsets1: |
@@ -709,10 +794,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
709 | |. sw TMP1, HI(CARG3) | 794 | |. sw TMP1, HI(CARG3) |
710 | | | 795 | | |
711 | |->vmeta_tsetb: // TMP0 = index | 796 | |->vmeta_tsetb: // TMP0 = index |
712 | | mtc1 TMP0, f0 | ||
713 | | cvt.d.w f0, f0 | ||
714 | | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | 797 | | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) |
715 | | sdc1 f0, 0(CARG3) | 798 | | sw TMP0, LO(CARG3) |
799 | | sw TISNUM, HI(CARG3) | ||
716 | | | 800 | | |
717 | |->vmeta_tsetv: | 801 | |->vmeta_tsetv: |
718 | |1: | 802 | |1: |
@@ -722,11 +806,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
722 | | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) | 806 | | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) |
723 | |. move CARG1, L | 807 | |. move CARG1, L |
724 | | // Returns TValue * (finished) or NULL (metamethod). | 808 | | // Returns TValue * (finished) or NULL (metamethod). |
809 | | lw SFARG1HI, HI(RA) | ||
725 | | beqz CRET1, >3 | 810 | | beqz CRET1, >3 |
726 | |. ldc1 f0, 0(RA) | 811 | |. lw SFARG1LO, LO(RA) |
727 | | // NOBARRIER: lj_meta_tset ensures the table is not black. | 812 | | // NOBARRIER: lj_meta_tset ensures the table is not black. |
728 | | ins_next1 | 813 | | ins_next1 |
729 | | sdc1 f0, 0(CRET1) | 814 | | sw SFARG1HI, HI(CRET1) |
815 | | sw SFARG1LO, LO(CRET1) | ||
730 | | ins_next2 | 816 | | ins_next2 |
731 | | | 817 | | |
732 | |3: // Call __newindex metamethod. | 818 | |3: // Call __newindex metamethod. |
@@ -736,14 +822,27 @@ static void build_subroutines(BuildCtx *ctx) | |||
736 | | sw PC, -16+HI(BASE) // [cont|PC] | 822 | | sw PC, -16+HI(BASE) // [cont|PC] |
737 | | subu PC, BASE, TMP1 | 823 | | subu PC, BASE, TMP1 |
738 | | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | 824 | | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. |
739 | | sdc1 f0, 16(BASE) // Copy value to third argument. | 825 | | sw SFARG1HI, 16+HI(BASE) // Copy value to third argument. |
826 | | sw SFARG1LO, 16+LO(BASE) | ||
740 | | b ->vm_call_dispatch_f | 827 | | b ->vm_call_dispatch_f |
741 | |. li NARGS8:RC, 24 // 3 args for func(t, k, v) | 828 | |. li NARGS8:RC, 24 // 3 args for func(t, k, v) |
742 | | | 829 | | |
830 | |->vmeta_tsetr: | ||
831 | | load_got lj_tab_setinth | ||
832 | | sw BASE, L->base | ||
833 | | sw PC, SAVE_PC | ||
834 | | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | ||
835 | |. move CARG1, L | ||
836 | | // Returns TValue *. | ||
837 | | b ->BC_TSETR_Z | ||
838 | |. nop | ||
839 | | | ||
743 | |//-- Comparison metamethods --------------------------------------------- | 840 | |//-- Comparison metamethods --------------------------------------------- |
744 | | | 841 | | |
745 | |->vmeta_comp: | 842 | |->vmeta_comp: |
746 | | // CARG2, CARG3 are already set by BC_ISLT/BC_ISGE/BC_ISLE/BC_ISGT. | 843 | | // RA/RD point to o1/o2. |
844 | | move CARG2, RA | ||
845 | | move CARG3, RD | ||
747 | | load_got lj_meta_comp | 846 | | load_got lj_meta_comp |
748 | | addiu PC, PC, -4 | 847 | | addiu PC, PC, -4 |
749 | | sw BASE, L->base | 848 | | sw BASE, L->base |
@@ -769,11 +868,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
769 | | | 868 | | |
770 | |->cont_ra: // RA = resultptr | 869 | |->cont_ra: // RA = resultptr |
771 | | lbu TMP1, -4+OFS_RA(PC) | 870 | | lbu TMP1, -4+OFS_RA(PC) |
772 | | ldc1 f0, 0(RA) | 871 | | lw SFRETHI, HI(RA) |
872 | | lw SFRETLO, LO(RA) | ||
773 | | sll TMP1, TMP1, 3 | 873 | | sll TMP1, TMP1, 3 |
774 | | addu TMP1, BASE, TMP1 | 874 | | addu TMP1, BASE, TMP1 |
875 | | sw SFRETHI, HI(TMP1) | ||
775 | | b ->cont_nop | 876 | | b ->cont_nop |
776 | |. sdc1 f0, 0(TMP1) | 877 | |. sw SFRETLO, LO(TMP1) |
777 | | | 878 | | |
778 | |->cont_condt: // RA = resultptr | 879 | |->cont_condt: // RA = resultptr |
779 | | lw TMP0, HI(RA) | 880 | | lw TMP0, HI(RA) |
@@ -788,8 +889,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
788 | |. addiu TMP2, AT, -1 // Branch if result is false. | 889 | |. addiu TMP2, AT, -1 // Branch if result is false. |
789 | | | 890 | | |
790 | |->vmeta_equal: | 891 | |->vmeta_equal: |
791 | | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. | 892 | | // SFARG1LO/SFARG2LO point to o1/o2. TMP0 is set to 0/1. |
792 | | load_got lj_meta_equal | 893 | | load_got lj_meta_equal |
894 | | move CARG2, SFARG1LO | ||
895 | | move CARG3, SFARG2LO | ||
896 | | move CARG4, TMP0 | ||
793 | | addiu PC, PC, -4 | 897 | | addiu PC, PC, -4 |
794 | | sw BASE, L->base | 898 | | sw BASE, L->base |
795 | | sw PC, SAVE_PC | 899 | | sw PC, SAVE_PC |
@@ -813,17 +917,31 @@ static void build_subroutines(BuildCtx *ctx) | |||
813 | |. nop | 917 | |. nop |
814 | |.endif | 918 | |.endif |
815 | | | 919 | | |
920 | |->vmeta_istype: | ||
921 | | load_got lj_meta_istype | ||
922 | | addiu PC, PC, -4 | ||
923 | | sw BASE, L->base | ||
924 | | srl CARG2, RA, 3 | ||
925 | | srl CARG3, RD, 3 | ||
926 | | sw PC, SAVE_PC | ||
927 | | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) | ||
928 | |. move CARG1, L | ||
929 | | b ->cont_nop | ||
930 | |. nop | ||
931 | | | ||
816 | |//-- Arithmetic metamethods --------------------------------------------- | 932 | |//-- Arithmetic metamethods --------------------------------------------- |
817 | | | 933 | | |
818 | |->vmeta_unm: | 934 | |->vmeta_unm: |
819 | | move CARG4, CARG3 | 935 | | move RC, RB |
820 | | | 936 | | |
821 | |->vmeta_arith: | 937 | |->vmeta_arith: |
822 | | load_got lj_meta_arith | 938 | | load_got lj_meta_arith |
823 | | decode_OP1 TMP0, INS | 939 | | decode_OP1 TMP0, INS |
824 | | sw BASE, L->base | 940 | | sw BASE, L->base |
825 | | sw PC, SAVE_PC | ||
826 | | move CARG2, RA | 941 | | move CARG2, RA |
942 | | sw PC, SAVE_PC | ||
943 | | move CARG3, RB | ||
944 | | move CARG4, RC | ||
827 | | sw TMP0, ARG5 | 945 | | sw TMP0, ARG5 |
828 | | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) | 946 | | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) |
829 | |. move CARG1, L | 947 | |. move CARG1, L |
@@ -931,40 +1049,52 @@ static void build_subroutines(BuildCtx *ctx) | |||
931 | | | 1049 | | |
932 | |.macro .ffunc_1, name | 1050 | |.macro .ffunc_1, name |
933 | |->ff_ .. name: | 1051 | |->ff_ .. name: |
1052 | | lw SFARG1HI, HI(BASE) | ||
934 | | beqz NARGS8:RC, ->fff_fallback | 1053 | | beqz NARGS8:RC, ->fff_fallback |
935 | |. lw CARG3, HI(BASE) | 1054 | |. lw SFARG1LO, LO(BASE) |
936 | | lw CARG1, LO(BASE) | ||
937 | |.endmacro | 1055 | |.endmacro |
938 | | | 1056 | | |
939 | |.macro .ffunc_2, name | 1057 | |.macro .ffunc_2, name |
940 | |->ff_ .. name: | 1058 | |->ff_ .. name: |
941 | | sltiu AT, NARGS8:RC, 16 | 1059 | | sltiu AT, NARGS8:RC, 16 |
942 | | lw CARG3, HI(BASE) | 1060 | | lw SFARG1HI, HI(BASE) |
943 | | bnez AT, ->fff_fallback | 1061 | | bnez AT, ->fff_fallback |
944 | |. lw CARG4, 8+HI(BASE) | 1062 | |. lw SFARG2HI, 8+HI(BASE) |
945 | | lw CARG1, LO(BASE) | 1063 | | lw SFARG1LO, LO(BASE) |
946 | | lw CARG2, 8+LO(BASE) | 1064 | | lw SFARG2LO, 8+LO(BASE) |
947 | |.endmacro | 1065 | |.endmacro |
948 | | | 1066 | | |
949 | |.macro .ffunc_n, name // Caveat: has delay slot! | 1067 | |.macro .ffunc_n, name // Caveat: has delay slot! |
950 | |->ff_ .. name: | 1068 | |->ff_ .. name: |
951 | | lw CARG3, HI(BASE) | 1069 | | lw SFARG1HI, HI(BASE) |
1070 | |.if FPU | ||
1071 | | ldc1 FARG1, 0(BASE) | ||
1072 | |.else | ||
1073 | | lw SFARG1LO, LO(BASE) | ||
1074 | |.endif | ||
952 | | beqz NARGS8:RC, ->fff_fallback | 1075 | | beqz NARGS8:RC, ->fff_fallback |
953 | |. ldc1 FARG1, 0(BASE) | 1076 | |. sltiu AT, SFARG1HI, LJ_TISNUM |
954 | | sltiu AT, CARG3, LJ_TISNUM | ||
955 | | beqz AT, ->fff_fallback | 1077 | | beqz AT, ->fff_fallback |
956 | |.endmacro | 1078 | |.endmacro |
957 | | | 1079 | | |
958 | |.macro .ffunc_nn, name // Caveat: has delay slot! | 1080 | |.macro .ffunc_nn, name // Caveat: has delay slot! |
959 | |->ff_ .. name: | 1081 | |->ff_ .. name: |
960 | | sltiu AT, NARGS8:RC, 16 | 1082 | | sltiu AT, NARGS8:RC, 16 |
961 | | lw CARG3, HI(BASE) | 1083 | | lw SFARG1HI, HI(BASE) |
962 | | bnez AT, ->fff_fallback | 1084 | | bnez AT, ->fff_fallback |
963 | |. lw CARG4, 8+HI(BASE) | 1085 | |. lw SFARG2HI, 8+HI(BASE) |
964 | | ldc1 FARG1, 0(BASE) | 1086 | | sltiu TMP0, SFARG1HI, LJ_TISNUM |
965 | | ldc1 FARG2, 8(BASE) | 1087 | |.if FPU |
966 | | sltiu TMP0, CARG3, LJ_TISNUM | 1088 | | ldc1 FARG1, 0(BASE) |
967 | | sltiu TMP1, CARG4, LJ_TISNUM | 1089 | |.else |
1090 | | lw SFARG1LO, LO(BASE) | ||
1091 | |.endif | ||
1092 | | sltiu TMP1, SFARG2HI, LJ_TISNUM | ||
1093 | |.if FPU | ||
1094 | | ldc1 FARG2, 8(BASE) | ||
1095 | |.else | ||
1096 | | lw SFARG2LO, 8+LO(BASE) | ||
1097 | |.endif | ||
968 | | and TMP0, TMP0, TMP1 | 1098 | | and TMP0, TMP0, TMP1 |
969 | | beqz TMP0, ->fff_fallback | 1099 | | beqz TMP0, ->fff_fallback |
970 | |.endmacro | 1100 | |.endmacro |
@@ -980,53 +1110,55 @@ static void build_subroutines(BuildCtx *ctx) | |||
980 | |//-- Base library: checks ----------------------------------------------- | 1110 | |//-- Base library: checks ----------------------------------------------- |
981 | | | 1111 | | |
982 | |.ffunc_1 assert | 1112 | |.ffunc_1 assert |
983 | | sltiu AT, CARG3, LJ_TISTRUECOND | 1113 | | sltiu AT, SFARG1HI, LJ_TISTRUECOND |
984 | | beqz AT, ->fff_fallback | 1114 | | beqz AT, ->fff_fallback |
985 | |. addiu RA, BASE, -8 | 1115 | |. addiu RA, BASE, -8 |
986 | | lw PC, FRAME_PC(BASE) | 1116 | | lw PC, FRAME_PC(BASE) |
987 | | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. | 1117 | | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. |
988 | | addu TMP2, RA, NARGS8:RC | 1118 | | addu TMP2, RA, NARGS8:RC |
989 | | sw CARG3, HI(RA) | 1119 | | sw SFARG1HI, HI(RA) |
990 | | addiu TMP1, BASE, 8 | 1120 | | addiu TMP1, BASE, 8 |
991 | | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. | 1121 | | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. |
992 | |. sw CARG1, LO(RA) | 1122 | |. sw SFARG1LO, LO(RA) |
993 | |1: | 1123 | |1: |
994 | | ldc1 f0, 0(TMP1) | 1124 | | lw SFRETHI, HI(TMP1) |
995 | | sdc1 f0, -8(TMP1) | 1125 | | lw SFRETLO, LO(TMP1) |
1126 | | sw SFRETHI, -8+HI(TMP1) | ||
1127 | | sw SFRETLO, -8+LO(TMP1) | ||
996 | | bne TMP1, TMP2, <1 | 1128 | | bne TMP1, TMP2, <1 |
997 | |. addiu TMP1, TMP1, 8 | 1129 | |. addiu TMP1, TMP1, 8 |
998 | | b ->fff_res | 1130 | | b ->fff_res |
999 | |. nop | 1131 | |. nop |
1000 | | | 1132 | | |
1001 | |.ffunc type | 1133 | |.ffunc type |
1002 | | lw CARG3, HI(BASE) | 1134 | | lw SFARG1HI, HI(BASE) |
1003 | | li TMP1, LJ_TISNUM | ||
1004 | | beqz NARGS8:RC, ->fff_fallback | 1135 | | beqz NARGS8:RC, ->fff_fallback |
1005 | |. sltiu TMP0, CARG3, LJ_TISNUM | 1136 | |. sltiu TMP0, SFARG1HI, LJ_TISNUM |
1006 | | movz TMP1, CARG3, TMP0 | 1137 | | movn SFARG1HI, TISNUM, TMP0 |
1007 | | not TMP1, TMP1 | 1138 | | not TMP1, SFARG1HI |
1008 | | sll TMP1, TMP1, 3 | 1139 | | sll TMP1, TMP1, 3 |
1009 | | addu TMP1, CFUNC:RB, TMP1 | 1140 | | addu TMP1, CFUNC:RB, TMP1 |
1010 | | b ->fff_resn | 1141 | | lw SFARG1HI, CFUNC:TMP1->upvalue[0].u32.hi |
1011 | |. ldc1 FRET1, CFUNC:TMP1->upvalue | 1142 | | b ->fff_restv |
1143 | |. lw SFARG1LO, CFUNC:TMP1->upvalue[0].u32.lo | ||
1012 | | | 1144 | | |
1013 | |//-- Base library: getters and setters --------------------------------- | 1145 | |//-- Base library: getters and setters --------------------------------- |
1014 | | | 1146 | | |
1015 | |.ffunc_1 getmetatable | 1147 | |.ffunc_1 getmetatable |
1016 | | li AT, LJ_TTAB | 1148 | | li AT, LJ_TTAB |
1017 | | bne CARG3, AT, >6 | 1149 | | bne SFARG1HI, AT, >6 |
1018 | |. li AT, LJ_TUDATA | 1150 | |. li AT, LJ_TUDATA |
1019 | |1: // Field metatable must be at same offset for GCtab and GCudata! | 1151 | |1: // Field metatable must be at same offset for GCtab and GCudata! |
1020 | | lw TAB:CARG1, TAB:CARG1->metatable | 1152 | | lw TAB:SFARG1LO, TAB:SFARG1LO->metatable |
1021 | |2: | 1153 | |2: |
1022 | | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) | 1154 | | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) |
1023 | | beqz TAB:CARG1, ->fff_restv | 1155 | | beqz TAB:SFARG1LO, ->fff_restv |
1024 | |. li CARG3, LJ_TNIL | 1156 | |. li SFARG1HI, LJ_TNIL |
1025 | | lw TMP0, TAB:CARG1->hmask | 1157 | | lw TMP0, TAB:SFARG1LO->hmask |
1026 | | li CARG3, LJ_TTAB // Use metatable as default result. | 1158 | | li SFARG1HI, LJ_TTAB // Use metatable as default result. |
1027 | | lw TMP1, STR:RC->hash | 1159 | | lw TMP1, STR:RC->sid |
1028 | | lw NODE:TMP2, TAB:CARG1->node | 1160 | | lw NODE:TMP2, TAB:SFARG1LO->node |
1029 | | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask | 1161 | | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask |
1030 | | sll TMP0, TMP1, 5 | 1162 | | sll TMP0, TMP1, 5 |
1031 | | sll TMP1, TMP1, 3 | 1163 | | sll TMP1, TMP1, 3 |
1032 | | subu TMP1, TMP0, TMP1 | 1164 | | subu TMP1, TMP0, TMP1 |
@@ -1037,7 +1169,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1037 | | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) | 1169 | | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) |
1038 | | lw NODE:TMP3, NODE:TMP2->next | 1170 | | lw NODE:TMP3, NODE:TMP2->next |
1039 | | bne CARG4, AT, >4 | 1171 | | bne CARG4, AT, >4 |
1040 | |. lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) | 1172 | |. lw CARG3, offsetof(Node, val)+HI(NODE:TMP2) |
1041 | | beq TMP0, STR:RC, >5 | 1173 | | beq TMP0, STR:RC, >5 |
1042 | |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2) | 1174 | |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2) |
1043 | |4: | 1175 | |4: |
@@ -1046,36 +1178,35 @@ static void build_subroutines(BuildCtx *ctx) | |||
1046 | | b <3 | 1178 | | b <3 |
1047 | |. nop | 1179 | |. nop |
1048 | |5: | 1180 | |5: |
1049 | | beq CARG2, TISNIL, ->fff_restv // Ditto for nil value. | 1181 | | beq CARG3, TISNIL, ->fff_restv // Ditto for nil value. |
1050 | |. nop | 1182 | |. nop |
1051 | | move CARG3, CARG2 // Return value of mt.__metatable. | 1183 | | move SFARG1HI, CARG3 // Return value of mt.__metatable. |
1052 | | b ->fff_restv | 1184 | | b ->fff_restv |
1053 | |. move CARG1, TMP1 | 1185 | |. move SFARG1LO, TMP1 |
1054 | | | 1186 | | |
1055 | |6: | 1187 | |6: |
1056 | | beq CARG3, AT, <1 | 1188 | | beq SFARG1HI, AT, <1 |
1057 | |. sltiu TMP0, CARG3, LJ_TISNUM | 1189 | |. sltu AT, TISNUM, SFARG1HI |
1058 | | li TMP1, LJ_TISNUM | 1190 | | movz SFARG1HI, TISNUM, AT |
1059 | | movz TMP1, CARG3, TMP0 | 1191 | | not TMP1, SFARG1HI |
1060 | | not TMP1, TMP1 | ||
1061 | | sll TMP1, TMP1, 2 | 1192 | | sll TMP1, TMP1, 2 |
1062 | | addu TMP1, DISPATCH, TMP1 | 1193 | | addu TMP1, DISPATCH, TMP1 |
1063 | | b <2 | 1194 | | b <2 |
1064 | |. lw TAB:CARG1, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1) | 1195 | |. lw TAB:SFARG1LO, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1) |
1065 | | | 1196 | | |
1066 | |.ffunc_2 setmetatable | 1197 | |.ffunc_2 setmetatable |
1067 | | // Fast path: no mt for table yet and not clearing the mt. | 1198 | | // Fast path: no mt for table yet and not clearing the mt. |
1068 | | li AT, LJ_TTAB | 1199 | | li AT, LJ_TTAB |
1069 | | bne CARG3, AT, ->fff_fallback | 1200 | | bne SFARG1HI, AT, ->fff_fallback |
1070 | |. addiu CARG4, CARG4, -LJ_TTAB | 1201 | |. addiu SFARG2HI, SFARG2HI, -LJ_TTAB |
1071 | | lw TAB:TMP1, TAB:CARG1->metatable | 1202 | | lw TAB:TMP1, TAB:SFARG1LO->metatable |
1072 | | lbu TMP3, TAB:CARG1->marked | 1203 | | lbu TMP3, TAB:SFARG1LO->marked |
1073 | | or AT, CARG4, TAB:TMP1 | 1204 | | or AT, SFARG2HI, TAB:TMP1 |
1074 | | bnez AT, ->fff_fallback | 1205 | | bnez AT, ->fff_fallback |
1075 | |. andi AT, TMP3, LJ_GC_BLACK // isblack(table) | 1206 | |. andi AT, TMP3, LJ_GC_BLACK // isblack(table) |
1076 | | beqz AT, ->fff_restv | 1207 | | beqz AT, ->fff_restv |
1077 | |. sw TAB:CARG2, TAB:CARG1->metatable | 1208 | |. sw TAB:SFARG2LO, TAB:SFARG1LO->metatable |
1078 | | barrierback TAB:CARG1, TMP3, TMP0, ->fff_restv | 1209 | | barrierback TAB:SFARG1LO, TMP3, TMP0, ->fff_restv |
1079 | | | 1210 | | |
1080 | |.ffunc rawget | 1211 | |.ffunc rawget |
1081 | | lw CARG4, HI(BASE) | 1212 | | lw CARG4, HI(BASE) |
@@ -1089,90 +1220,89 @@ static void build_subroutines(BuildCtx *ctx) | |||
1089 | | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | 1220 | | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) |
1090 | |. move CARG1, L | 1221 | |. move CARG1, L |
1091 | | // Returns cTValue *. | 1222 | | // Returns cTValue *. |
1092 | | b ->fff_resn | 1223 | | lw SFARG1HI, HI(CRET1) |
1093 | |. ldc1 FRET1, 0(CRET1) | 1224 | | b ->fff_restv |
1225 | |. lw SFARG1LO, LO(CRET1) | ||
1094 | | | 1226 | | |
1095 | |//-- Base library: conversions ------------------------------------------ | 1227 | |//-- Base library: conversions ------------------------------------------ |
1096 | | | 1228 | | |
1097 | |.ffunc tonumber | 1229 | |.ffunc tonumber |
1098 | | // Only handles the number case inline (without a base argument). | 1230 | | // Only handles the number case inline (without a base argument). |
1099 | | lw CARG1, HI(BASE) | 1231 | | lw CARG1, HI(BASE) |
1100 | | xori AT, NARGS8:RC, 8 | 1232 | | xori AT, NARGS8:RC, 8 // Exactly one number argument. |
1101 | | sltiu CARG1, CARG1, LJ_TISNUM | 1233 | | sltu TMP0, TISNUM, CARG1 |
1102 | | movn CARG1, r0, AT | 1234 | | or AT, AT, TMP0 |
1103 | | beqz CARG1, ->fff_fallback // Exactly one number argument. | 1235 | | bnez AT, ->fff_fallback |
1104 | |. ldc1 FRET1, 0(BASE) | 1236 | |. lw SFARG1HI, HI(BASE) |
1105 | | b ->fff_resn | 1237 | | b ->fff_restv |
1106 | |. nop | 1238 | |. lw SFARG1LO, LO(BASE) |
1107 | | | 1239 | | |
1108 | |.ffunc_1 tostring | 1240 | |.ffunc_1 tostring |
1109 | | // Only handles the string or number case inline. | 1241 | | // Only handles the string or number case inline. |
1110 | | li AT, LJ_TSTR | 1242 | | li AT, LJ_TSTR |
1111 | | // A __tostring method in the string base metatable is ignored. | 1243 | | // A __tostring method in the string base metatable is ignored. |
1112 | | beq CARG3, AT, ->fff_restv // String key? | 1244 | | beq SFARG1HI, AT, ->fff_restv // String key? |
1113 | | // Handle numbers inline, unless a number base metatable is present. | 1245 | | // Handle numbers inline, unless a number base metatable is present. |
1114 | |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) | 1246 | |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) |
1115 | | sltiu TMP0, CARG3, LJ_TISNUM | 1247 | | sltu TMP0, TISNUM, SFARG1HI |
1116 | | sltiu TMP1, TMP1, 1 | 1248 | | or TMP0, TMP0, TMP1 |
1117 | | and TMP0, TMP0, TMP1 | 1249 | | bnez TMP0, ->fff_fallback |
1118 | | beqz TMP0, ->fff_fallback | ||
1119 | |. sw BASE, L->base // Add frame since C call can throw. | 1250 | |. sw BASE, L->base // Add frame since C call can throw. |
1120 | | ffgccheck | 1251 | | ffgccheck |
1121 | |. sw PC, SAVE_PC // Redundant (but a defined value). | 1252 | |. sw PC, SAVE_PC // Redundant (but a defined value). |
1122 | | load_got lj_str_fromnum | 1253 | | load_got lj_strfmt_number |
1123 | | move CARG1, L | 1254 | | move CARG1, L |
1124 | | call_intern lj_str_fromnum // (lua_State *L, lua_Number *np) | 1255 | | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) |
1125 | |. move CARG2, BASE | 1256 | |. move CARG2, BASE |
1126 | | // Returns GCstr *. | 1257 | | // Returns GCstr *. |
1127 | | li CARG3, LJ_TSTR | 1258 | | li SFARG1HI, LJ_TSTR |
1128 | | b ->fff_restv | 1259 | | b ->fff_restv |
1129 | |. move CARG1, CRET1 | 1260 | |. move SFARG1LO, CRET1 |
1130 | | | 1261 | | |
1131 | |//-- Base library: iterators ------------------------------------------- | 1262 | |//-- Base library: iterators ------------------------------------------- |
1132 | | | 1263 | | |
1133 | |.ffunc next | 1264 | |.ffunc next |
1134 | | lw CARG1, HI(BASE) | 1265 | | lw CARG2, HI(BASE) |
1135 | | lw TAB:CARG2, LO(BASE) | 1266 | | lw TAB:CARG1, LO(BASE) |
1136 | | beqz NARGS8:RC, ->fff_fallback | 1267 | | beqz NARGS8:RC, ->fff_fallback |
1137 | |. addu TMP2, BASE, NARGS8:RC | 1268 | |. addu TMP2, BASE, NARGS8:RC |
1138 | | li AT, LJ_TTAB | 1269 | | li AT, LJ_TTAB |
1139 | | sw TISNIL, HI(TMP2) // Set missing 2nd arg to nil. | 1270 | | sw TISNIL, HI(TMP2) // Set missing 2nd arg to nil. |
1140 | | bne CARG1, AT, ->fff_fallback | 1271 | | bne CARG2, AT, ->fff_fallback |
1141 | |. lw PC, FRAME_PC(BASE) | 1272 | |. lw PC, FRAME_PC(BASE) |
1142 | | load_got lj_tab_next | 1273 | | load_got lj_tab_next |
1143 | | sw BASE, L->base // Add frame since C call can throw. | 1274 | | addiu CARG2, BASE, 8 |
1144 | | sw BASE, L->top // Dummy frame length is ok. | 1275 | | call_intern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) |
1145 | | addiu CARG3, BASE, 8 | 1276 | |. addiu CARG3, BASE, -8 |
1146 | | sw PC, SAVE_PC | 1277 | | // Returns 1=found, 0=end, -1=error. |
1147 | | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) | 1278 | | addiu RA, BASE, -8 |
1148 | |. move CARG1, L | 1279 | | bgtz CRET1, ->fff_res // Found key/value. |
1149 | | // Returns 0 at end of traversal. | 1280 | |. li RD, (2+1)*8 |
1150 | | beqz CRET1, ->fff_restv // End of traversal: return nil. | 1281 | | beqz CRET1, ->fff_restv // End of traversal: return nil. |
1151 | |. li CARG3, LJ_TNIL | 1282 | |. li SFARG1HI, LJ_TNIL |
1152 | | ldc1 f0, 8(BASE) // Copy key and value to results. | 1283 | | lw CFUNC:RB, FRAME_FUNC(BASE) |
1153 | | addiu RA, BASE, -8 | 1284 | | b ->fff_fallback // Invalid key. |
1154 | | ldc1 f2, 16(BASE) | 1285 | |. li RC, 2*8 |
1155 | | li RD, (2+1)*8 | ||
1156 | | sdc1 f0, 0(RA) | ||
1157 | | b ->fff_res | ||
1158 | |. sdc1 f2, 8(RA) | ||
1159 | | | 1286 | | |
1160 | |.ffunc_1 pairs | 1287 | |.ffunc_1 pairs |
1161 | | li AT, LJ_TTAB | 1288 | | li AT, LJ_TTAB |
1162 | | bne CARG3, AT, ->fff_fallback | 1289 | | bne SFARG1HI, AT, ->fff_fallback |
1163 | |. lw PC, FRAME_PC(BASE) | 1290 | |. lw PC, FRAME_PC(BASE) |
1164 | #if LJ_52 | 1291 | #if LJ_52 |
1165 | | lw TAB:TMP2, TAB:CARG1->metatable | 1292 | | lw TAB:TMP2, TAB:SFARG1LO->metatable |
1166 | | ldc1 f0, CFUNC:RB->upvalue[0] | 1293 | | lw TMP0, CFUNC:RB->upvalue[0].u32.hi |
1294 | | lw TMP1, CFUNC:RB->upvalue[0].u32.lo | ||
1167 | | bnez TAB:TMP2, ->fff_fallback | 1295 | | bnez TAB:TMP2, ->fff_fallback |
1168 | #else | 1296 | #else |
1169 | | ldc1 f0, CFUNC:RB->upvalue[0] | 1297 | | lw TMP0, CFUNC:RB->upvalue[0].u32.hi |
1298 | | lw TMP1, CFUNC:RB->upvalue[0].u32.lo | ||
1170 | #endif | 1299 | #endif |
1171 | |. addiu RA, BASE, -8 | 1300 | |. addiu RA, BASE, -8 |
1172 | | sw TISNIL, 8+HI(BASE) | 1301 | | sw TISNIL, 8+HI(BASE) |
1173 | | li RD, (3+1)*8 | 1302 | | sw TMP0, HI(RA) |
1303 | | sw TMP1, LO(RA) | ||
1174 | | b ->fff_res | 1304 | | b ->fff_res |
1175 | |. sdc1 f0, 0(RA) | 1305 | |. li RD, (3+1)*8 |
1176 | | | 1306 | | |
1177 | |.ffunc ipairs_aux | 1307 | |.ffunc ipairs_aux |
1178 | | sltiu AT, NARGS8:RC, 16 | 1308 | | sltiu AT, NARGS8:RC, 16 |
@@ -1180,35 +1310,32 @@ static void build_subroutines(BuildCtx *ctx) | |||
1180 | | lw TAB:CARG1, LO(BASE) | 1310 | | lw TAB:CARG1, LO(BASE) |
1181 | | lw CARG4, 8+HI(BASE) | 1311 | | lw CARG4, 8+HI(BASE) |
1182 | | bnez AT, ->fff_fallback | 1312 | | bnez AT, ->fff_fallback |
1183 | |. ldc1 FARG2, 8(BASE) | 1313 | |. addiu CARG3, CARG3, -LJ_TTAB |
1184 | | addiu CARG3, CARG3, -LJ_TTAB | 1314 | | xor CARG4, CARG4, TISNUM |
1185 | | sltiu AT, CARG4, LJ_TISNUM | 1315 | | and AT, CARG3, CARG4 |
1186 | | li TMP0, 1 | 1316 | | bnez AT, ->fff_fallback |
1187 | | movn AT, r0, CARG3 | ||
1188 | | mtc1 TMP0, FARG1 | ||
1189 | | beqz AT, ->fff_fallback | ||
1190 | |. lw PC, FRAME_PC(BASE) | 1317 | |. lw PC, FRAME_PC(BASE) |
1191 | | cvt.w.d FRET1, FARG2 | 1318 | | lw TMP2, 8+LO(BASE) |
1192 | | cvt.d.w FARG1, FARG1 | ||
1193 | | lw TMP0, TAB:CARG1->asize | 1319 | | lw TMP0, TAB:CARG1->asize |
1194 | | lw TMP1, TAB:CARG1->array | 1320 | | lw TMP1, TAB:CARG1->array |
1195 | | mfc1 TMP2, FRET1 | ||
1196 | | addiu RA, BASE, -8 | ||
1197 | | add.d FARG2, FARG2, FARG1 | ||
1198 | | addiu TMP2, TMP2, 1 | 1321 | | addiu TMP2, TMP2, 1 |
1322 | | sw TISNUM, -8+HI(BASE) | ||
1199 | | sltu AT, TMP2, TMP0 | 1323 | | sltu AT, TMP2, TMP0 |
1324 | | sw TMP2, -8+LO(BASE) | ||
1325 | | beqz AT, >2 // Not in array part? | ||
1326 | |. addiu RA, BASE, -8 | ||
1200 | | sll TMP3, TMP2, 3 | 1327 | | sll TMP3, TMP2, 3 |
1201 | | addu TMP3, TMP1, TMP3 | 1328 | | addu TMP3, TMP1, TMP3 |
1202 | | beqz AT, >2 // Not in array part? | 1329 | | lw TMP1, HI(TMP3) |
1203 | |. sdc1 FARG2, 0(RA) | 1330 | | lw TMP2, LO(TMP3) |
1204 | | lw TMP2, HI(TMP3) | ||
1205 | | ldc1 f0, 0(TMP3) | ||
1206 | |1: | 1331 | |1: |
1207 | | beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results. | 1332 | | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. |
1208 | |. li RD, (0+1)*8 | 1333 | |. li RD, (0+1)*8 |
1209 | | li RD, (2+1)*8 | 1334 | | sw TMP1, 8+HI(RA) |
1335 | | sw TMP2, 8+LO(RA) | ||
1210 | | b ->fff_res | 1336 | | b ->fff_res |
1211 | |. sdc1 f0, 8(RA) | 1337 | |. li RD, (2+1)*8 |
1338 | | | ||
1212 | |2: // Check for empty hash part first. Otherwise call C function. | 1339 | |2: // Check for empty hash part first. Otherwise call C function. |
1213 | | lw TMP0, TAB:CARG1->hmask | 1340 | | lw TMP0, TAB:CARG1->hmask |
1214 | | load_got lj_tab_getinth | 1341 | | load_got lj_tab_getinth |
@@ -1219,27 +1346,30 @@ static void build_subroutines(BuildCtx *ctx) | |||
1219 | | // Returns cTValue * or NULL. | 1346 | | // Returns cTValue * or NULL. |
1220 | | beqz CRET1, ->fff_res | 1347 | | beqz CRET1, ->fff_res |
1221 | |. li RD, (0+1)*8 | 1348 | |. li RD, (0+1)*8 |
1222 | | lw TMP2, HI(CRET1) | 1349 | | lw TMP1, HI(CRET1) |
1223 | | b <1 | 1350 | | b <1 |
1224 | |. ldc1 f0, 0(CRET1) | 1351 | |. lw TMP2, LO(CRET1) |
1225 | | | 1352 | | |
1226 | |.ffunc_1 ipairs | 1353 | |.ffunc_1 ipairs |
1227 | | li AT, LJ_TTAB | 1354 | | li AT, LJ_TTAB |
1228 | | bne CARG3, AT, ->fff_fallback | 1355 | | bne SFARG1HI, AT, ->fff_fallback |
1229 | |. lw PC, FRAME_PC(BASE) | 1356 | |. lw PC, FRAME_PC(BASE) |
1230 | #if LJ_52 | 1357 | #if LJ_52 |
1231 | | lw TAB:TMP2, TAB:CARG1->metatable | 1358 | | lw TAB:TMP2, TAB:SFARG1LO->metatable |
1232 | | ldc1 f0, CFUNC:RB->upvalue[0] | 1359 | | lw TMP0, CFUNC:RB->upvalue[0].u32.hi |
1360 | | lw TMP1, CFUNC:RB->upvalue[0].u32.lo | ||
1233 | | bnez TAB:TMP2, ->fff_fallback | 1361 | | bnez TAB:TMP2, ->fff_fallback |
1234 | #else | 1362 | #else |
1235 | | ldc1 f0, CFUNC:RB->upvalue[0] | 1363 | | lw TMP0, CFUNC:RB->upvalue[0].u32.hi |
1364 | | lw TMP1, CFUNC:RB->upvalue[0].u32.lo | ||
1236 | #endif | 1365 | #endif |
1237 | |. addiu RA, BASE, -8 | 1366 | |. addiu RA, BASE, -8 |
1238 | | sw r0, 8+HI(BASE) | 1367 | | sw TISNUM, 8+HI(BASE) |
1239 | | sw r0, 8+LO(BASE) | 1368 | | sw r0, 8+LO(BASE) |
1240 | | li RD, (3+1)*8 | 1369 | | sw TMP0, HI(RA) |
1370 | | sw TMP1, LO(RA) | ||
1241 | | b ->fff_res | 1371 | | b ->fff_res |
1242 | |. sdc1 f0, 0(RA) | 1372 | |. li RD, (3+1)*8 |
1243 | | | 1373 | | |
1244 | |//-- Base library: catch errors ---------------------------------------- | 1374 | |//-- Base library: catch errors ---------------------------------------- |
1245 | | | 1375 | | |
@@ -1259,8 +1389,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1259 | | sltiu AT, NARGS8:RC, 16 | 1389 | | sltiu AT, NARGS8:RC, 16 |
1260 | | lw CARG4, 8+HI(BASE) | 1390 | | lw CARG4, 8+HI(BASE) |
1261 | | bnez AT, ->fff_fallback | 1391 | | bnez AT, ->fff_fallback |
1262 | |. ldc1 FARG2, 8(BASE) | 1392 | |. lw CARG3, 8+LO(BASE) |
1263 | | ldc1 FARG1, 0(BASE) | 1393 | | lw CARG1, LO(BASE) |
1394 | | lw CARG2, HI(BASE) | ||
1264 | | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) | 1395 | | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) |
1265 | | li AT, LJ_TFUNC | 1396 | | li AT, LJ_TFUNC |
1266 | | move TMP2, BASE | 1397 | | move TMP2, BASE |
@@ -1268,9 +1399,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
1268 | | addiu BASE, BASE, 16 | 1399 | | addiu BASE, BASE, 16 |
1269 | | // Remember active hook before pcall. | 1400 | | // Remember active hook before pcall. |
1270 | | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT | 1401 | | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT |
1271 | | sdc1 FARG2, 0(TMP2) // Swap function and traceback. | 1402 | | sw CARG3, LO(TMP2) // Swap function and traceback. |
1403 | | sw CARG4, HI(TMP2) | ||
1272 | | andi TMP3, TMP3, 1 | 1404 | | andi TMP3, TMP3, 1 |
1273 | | sdc1 FARG1, 8(TMP2) | 1405 | | sw CARG1, 8+LO(TMP2) |
1406 | | sw CARG2, 8+HI(TMP2) | ||
1274 | | addiu PC, TMP3, 16+FRAME_PCALL | 1407 | | addiu PC, TMP3, 16+FRAME_PCALL |
1275 | | b ->vm_call_dispatch | 1408 | | b ->vm_call_dispatch |
1276 | |. addiu NARGS8:RC, NARGS8:RC, -16 | 1409 | |. addiu NARGS8:RC, NARGS8:RC, -16 |
@@ -1279,7 +1412,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
1279 | | | 1412 | | |
1280 | |.macro coroutine_resume_wrap, resume | 1413 | |.macro coroutine_resume_wrap, resume |
1281 | |.if resume | 1414 | |.if resume |
1282 | |.ffunc_1 coroutine_resume | 1415 | |.ffunc coroutine_resume |
1416 | | lw CARG3, HI(BASE) | ||
1417 | | beqz NARGS8:RC, ->fff_fallback | ||
1418 | |. lw CARG1, LO(BASE) | ||
1283 | | li AT, LJ_TTHREAD | 1419 | | li AT, LJ_TTHREAD |
1284 | | bne CARG3, AT, ->fff_fallback | 1420 | | bne CARG3, AT, ->fff_fallback |
1285 | |.else | 1421 | |.else |
@@ -1314,11 +1450,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
1314 | | move CARG3, CARG2 | 1450 | | move CARG3, CARG2 |
1315 | | sw BASE, L->top | 1451 | | sw BASE, L->top |
1316 | |2: // Move args to coroutine. | 1452 | |2: // Move args to coroutine. |
1317 | | ldc1 f0, 0(BASE) | 1453 | | lw SFRETHI, HI(BASE) |
1454 | | lw SFRETLO, LO(BASE) | ||
1318 | | sltu AT, BASE, TMP1 | 1455 | | sltu AT, BASE, TMP1 |
1319 | | beqz AT, >3 | 1456 | | beqz AT, >3 |
1320 | |. addiu BASE, BASE, 8 | 1457 | |. addiu BASE, BASE, 8 |
1321 | | sdc1 f0, 0(CARG3) | 1458 | | sw SFRETHI, HI(CARG3) |
1459 | | sw SFRETLO, LO(CARG3) | ||
1322 | | b <2 | 1460 | | b <2 |
1323 | |. addiu CARG3, CARG3, 8 | 1461 | |. addiu CARG3, CARG3, 8 |
1324 | |3: | 1462 | |3: |
@@ -1331,6 +1469,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1331 | | lw TMP3, L:RA->top | 1469 | | lw TMP3, L:RA->top |
1332 | | li_vmstate INTERP | 1470 | | li_vmstate INTERP |
1333 | | lw BASE, L->base | 1471 | | lw BASE, L->base |
1472 | | sw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
1334 | | st_vmstate | 1473 | | st_vmstate |
1335 | | beqz AT, >8 | 1474 | | beqz AT, >8 |
1336 | |. subu RD, TMP3, TMP2 | 1475 | |. subu RD, TMP3, TMP2 |
@@ -1343,10 +1482,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1343 | | sw TMP2, L:RA->top // Clear coroutine stack. | 1482 | | sw TMP2, L:RA->top // Clear coroutine stack. |
1344 | | move TMP1, BASE | 1483 | | move TMP1, BASE |
1345 | |5: // Move results from coroutine. | 1484 | |5: // Move results from coroutine. |
1346 | | ldc1 f0, 0(TMP2) | 1485 | | lw SFRETHI, HI(TMP2) |
1486 | | lw SFRETLO, LO(TMP2) | ||
1347 | | addiu TMP2, TMP2, 8 | 1487 | | addiu TMP2, TMP2, 8 |
1348 | | sltu AT, TMP2, TMP3 | 1488 | | sltu AT, TMP2, TMP3 |
1349 | | sdc1 f0, 0(TMP1) | 1489 | | sw SFRETHI, HI(TMP1) |
1490 | | sw SFRETLO, LO(TMP1) | ||
1350 | | bnez AT, <5 | 1491 | | bnez AT, <5 |
1351 | |. addiu TMP1, TMP1, 8 | 1492 | |. addiu TMP1, TMP1, 8 |
1352 | |6: | 1493 | |6: |
@@ -1371,12 +1512,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
1371 | |.if resume | 1512 | |.if resume |
1372 | | addiu TMP3, TMP3, -8 | 1513 | | addiu TMP3, TMP3, -8 |
1373 | | li TMP1, LJ_TFALSE | 1514 | | li TMP1, LJ_TFALSE |
1374 | | ldc1 f0, 0(TMP3) | 1515 | | lw SFRETHI, HI(TMP3) |
1516 | | lw SFRETLO, LO(TMP3) | ||
1375 | | sw TMP3, L:RA->top // Remove error from coroutine stack. | 1517 | | sw TMP3, L:RA->top // Remove error from coroutine stack. |
1376 | | li RD, (2+1)*8 | 1518 | | li RD, (2+1)*8 |
1377 | | sw TMP1, -8+HI(BASE) // Prepend false to results. | 1519 | | sw TMP1, -8+HI(BASE) // Prepend false to results. |
1378 | | addiu RA, BASE, -8 | 1520 | | addiu RA, BASE, -8 |
1379 | | sdc1 f0, 0(BASE) // Copy error message. | 1521 | | sw SFRETHI, HI(BASE) // Copy error message. |
1522 | | sw SFRETLO, LO(BASE) | ||
1380 | | b <7 | 1523 | | b <7 |
1381 | |. andi TMP0, PC, FRAME_TYPE | 1524 | |. andi TMP0, PC, FRAME_TYPE |
1382 | |.else | 1525 | |.else |
@@ -1412,20 +1555,29 @@ static void build_subroutines(BuildCtx *ctx) | |||
1412 | | | 1555 | | |
1413 | |//-- Math library ------------------------------------------------------- | 1556 | |//-- Math library ------------------------------------------------------- |
1414 | | | 1557 | | |
1415 | |.ffunc_n math_abs | 1558 | |.ffunc_1 math_abs |
1416 | |. abs.d FRET1, FARG1 | 1559 | | bne SFARG1HI, TISNUM, >1 |
1417 | |->fff_resn: | 1560 | |. sra TMP0, SFARG1LO, 31 |
1418 | | lw PC, FRAME_PC(BASE) | 1561 | | xor TMP1, SFARG1LO, TMP0 |
1419 | | addiu RA, BASE, -8 | 1562 | | subu SFARG1LO, TMP1, TMP0 |
1420 | | b ->fff_res1 | 1563 | | bgez SFARG1LO, ->fff_restv |
1421 | |. sdc1 FRET1, -8(BASE) | 1564 | |. nop |
1565 | | lui SFARG1HI, 0x41e0 // 2^31 as a double. | ||
1566 | | b ->fff_restv | ||
1567 | |. li SFARG1LO, 0 | ||
1568 | |1: | ||
1569 | | sltiu AT, SFARG1HI, LJ_TISNUM | ||
1570 | | beqz AT, ->fff_fallback | ||
1571 | |. sll SFARG1HI, SFARG1HI, 1 | ||
1572 | | srl SFARG1HI, SFARG1HI, 1 | ||
1573 | |// fallthrough | ||
1422 | | | 1574 | | |
1423 | |->fff_restv: | 1575 | |->fff_restv: |
1424 | | // CARG3/CARG1 = TValue result. | 1576 | | // SFARG1LO/SFARG1HI = TValue result. |
1425 | | lw PC, FRAME_PC(BASE) | 1577 | | lw PC, FRAME_PC(BASE) |
1426 | | sw CARG3, -8+HI(BASE) | 1578 | | sw SFARG1HI, -8+HI(BASE) |
1427 | | addiu RA, BASE, -8 | 1579 | | addiu RA, BASE, -8 |
1428 | | sw CARG1, -8+LO(BASE) | 1580 | | sw SFARG1LO, -8+LO(BASE) |
1429 | |->fff_res1: | 1581 | |->fff_res1: |
1430 | | // RA = results, PC = return. | 1582 | | // RA = results, PC = return. |
1431 | | li RD, (1+1)*8 | 1583 | | li RD, (1+1)*8 |
@@ -1454,15 +1606,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
1454 | |. sw TISNIL, -8+HI(TMP1) | 1606 | |. sw TISNIL, -8+HI(TMP1) |
1455 | | | 1607 | | |
1456 | |.macro math_extern, func | 1608 | |.macro math_extern, func |
1457 | |->ff_math_ .. func: | 1609 | | .ffunc math_ .. func |
1458 | | lw CARG3, HI(BASE) | 1610 | | lw SFARG1HI, HI(BASE) |
1459 | | beqz NARGS8:RC, ->fff_fallback | 1611 | | beqz NARGS8:RC, ->fff_fallback |
1460 | |. load_got func | 1612 | |. load_got func |
1461 | | sltiu AT, CARG3, LJ_TISNUM | 1613 | | sltiu AT, SFARG1HI, LJ_TISNUM |
1462 | | beqz AT, ->fff_fallback | 1614 | | beqz AT, ->fff_fallback |
1463 | |. nop | 1615 | |.if FPU |
1464 | | call_extern | ||
1465 | |. ldc1 FARG1, 0(BASE) | 1616 | |. ldc1 FARG1, 0(BASE) |
1617 | |.else | ||
1618 | |. lw SFARG1LO, LO(BASE) | ||
1619 | |.endif | ||
1620 | | call_extern | ||
1621 | |. nop | ||
1466 | | b ->fff_resn | 1622 | | b ->fff_resn |
1467 | |. nop | 1623 | |. nop |
1468 | |.endmacro | 1624 | |.endmacro |
@@ -1476,10 +1632,22 @@ static void build_subroutines(BuildCtx *ctx) | |||
1476 | |. nop | 1632 | |. nop |
1477 | |.endmacro | 1633 | |.endmacro |
1478 | | | 1634 | | |
1635 | |// TODO: Return integer type if result is integer (own sf implementation). | ||
1479 | |.macro math_round, func | 1636 | |.macro math_round, func |
1480 | | .ffunc_n math_ .. func | 1637 | |->ff_math_ .. func: |
1481 | |. nop | 1638 | | lw SFARG1HI, HI(BASE) |
1639 | | beqz NARGS8:RC, ->fff_fallback | ||
1640 | |. lw SFARG1LO, LO(BASE) | ||
1641 | | beq SFARG1HI, TISNUM, ->fff_restv | ||
1642 | |. sltu AT, SFARG1HI, TISNUM | ||
1643 | | beqz AT, ->fff_fallback | ||
1644 | |.if FPU | ||
1645 | |. ldc1 FARG1, 0(BASE) | ||
1482 | | bal ->vm_ .. func | 1646 | | bal ->vm_ .. func |
1647 | |.else | ||
1648 | |. load_got func | ||
1649 | | call_extern | ||
1650 | |.endif | ||
1483 | |. nop | 1651 | |. nop |
1484 | | b ->fff_resn | 1652 | | b ->fff_resn |
1485 | |. nop | 1653 | |. nop |
@@ -1489,15 +1657,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
1489 | | math_round ceil | 1657 | | math_round ceil |
1490 | | | 1658 | | |
1491 | |.ffunc math_log | 1659 | |.ffunc math_log |
1492 | | lw CARG3, HI(BASE) | ||
1493 | | li AT, 8 | 1660 | | li AT, 8 |
1494 | | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. | 1661 | | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. |
1495 | |. load_got log | 1662 | |. lw SFARG1HI, HI(BASE) |
1496 | | sltiu AT, CARG3, LJ_TISNUM | 1663 | | sltiu AT, SFARG1HI, LJ_TISNUM |
1497 | | beqz AT, ->fff_fallback | 1664 | | beqz AT, ->fff_fallback |
1498 | |. nop | 1665 | |. load_got log |
1666 | |.if FPU | ||
1499 | | call_extern | 1667 | | call_extern |
1500 | |. ldc1 FARG1, 0(BASE) | 1668 | |. ldc1 FARG1, 0(BASE) |
1669 | |.else | ||
1670 | | call_extern | ||
1671 | |. lw SFARG1LO, LO(BASE) | ||
1672 | |.endif | ||
1501 | | b ->fff_resn | 1673 | | b ->fff_resn |
1502 | |. nop | 1674 | |. nop |
1503 | | | 1675 | | |
@@ -1516,23 +1688,43 @@ static void build_subroutines(BuildCtx *ctx) | |||
1516 | | math_extern2 atan2 | 1688 | | math_extern2 atan2 |
1517 | | math_extern2 fmod | 1689 | | math_extern2 fmod |
1518 | | | 1690 | | |
1691 | |.if FPU | ||
1519 | |.ffunc_n math_sqrt | 1692 | |.ffunc_n math_sqrt |
1520 | |. sqrt.d FRET1, FARG1 | 1693 | |. sqrt.d FRET1, FARG1 |
1521 | | b ->fff_resn | 1694 | |// fallthrough to ->fff_resn |
1522 | |. nop | 1695 | |.else |
1696 | | math_extern sqrt | ||
1697 | |.endif | ||
1698 | | | ||
1699 | |->fff_resn: | ||
1700 | | lw PC, FRAME_PC(BASE) | ||
1701 | | addiu RA, BASE, -8 | ||
1702 | |.if FPU | ||
1703 | | b ->fff_res1 | ||
1704 | |. sdc1 FRET1, -8(BASE) | ||
1705 | |.else | ||
1706 | | sw SFRETHI, -8+HI(BASE) | ||
1707 | | b ->fff_res1 | ||
1708 | |. sw SFRETLO, -8+LO(BASE) | ||
1709 | |.endif | ||
1523 | | | 1710 | | |
1524 | |->ff_math_deg: | ||
1525 | |.ffunc_n math_rad | ||
1526 | |. ldc1 FARG2, CFUNC:RB->upvalue[0] | ||
1527 | | b ->fff_resn | ||
1528 | |. mul.d FRET1, FARG1, FARG2 | ||
1529 | | | 1711 | | |
1530 | |.ffunc_nn math_ldexp | 1712 | |.ffunc math_ldexp |
1531 | | cvt.w.d FARG2, FARG2 | 1713 | | sltiu AT, NARGS8:RC, 16 |
1714 | | lw SFARG1HI, HI(BASE) | ||
1715 | | bnez AT, ->fff_fallback | ||
1716 | |. lw CARG4, 8+HI(BASE) | ||
1717 | | bne CARG4, TISNUM, ->fff_fallback | ||
1532 | | load_got ldexp | 1718 | | load_got ldexp |
1533 | | mfc1 CARG3, FARG2 | 1719 | |. sltu AT, SFARG1HI, TISNUM |
1720 | | beqz AT, ->fff_fallback | ||
1721 | |.if FPU | ||
1722 | |. ldc1 FARG1, 0(BASE) | ||
1723 | |.else | ||
1724 | |. lw SFARG1LO, LO(BASE) | ||
1725 | |.endif | ||
1534 | | call_extern | 1726 | | call_extern |
1535 | |. nop | 1727 | |. lw CARG3, 8+LO(BASE) |
1536 | | b ->fff_resn | 1728 | | b ->fff_resn |
1537 | |. nop | 1729 | |. nop |
1538 | | | 1730 | | |
@@ -1543,10 +1735,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
1543 | |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | 1735 | |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) |
1544 | | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) | 1736 | | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) |
1545 | | addiu RA, BASE, -8 | 1737 | | addiu RA, BASE, -8 |
1738 | |.if FPU | ||
1546 | | mtc1 TMP1, FARG2 | 1739 | | mtc1 TMP1, FARG2 |
1547 | | sdc1 FRET1, 0(RA) | 1740 | | sdc1 FRET1, 0(RA) |
1548 | | cvt.d.w FARG2, FARG2 | 1741 | | cvt.d.w FARG2, FARG2 |
1549 | | sdc1 FARG2, 8(RA) | 1742 | | sdc1 FARG2, 8(RA) |
1743 | |.else | ||
1744 | | sw SFRETLO, LO(RA) | ||
1745 | | sw SFRETHI, HI(RA) | ||
1746 | | sw TMP1, 8+LO(RA) | ||
1747 | | sw TISNUM, 8+HI(RA) | ||
1748 | |.endif | ||
1550 | | b ->fff_res | 1749 | | b ->fff_res |
1551 | |. li RD, (2+1)*8 | 1750 | |. li RD, (2+1)*8 |
1552 | | | 1751 | | |
@@ -1556,49 +1755,109 @@ static void build_subroutines(BuildCtx *ctx) | |||
1556 | | call_extern | 1755 | | call_extern |
1557 | |. addiu CARG3, BASE, -8 | 1756 | |. addiu CARG3, BASE, -8 |
1558 | | addiu RA, BASE, -8 | 1757 | | addiu RA, BASE, -8 |
1758 | |.if FPU | ||
1559 | | sdc1 FRET1, 0(BASE) | 1759 | | sdc1 FRET1, 0(BASE) |
1760 | |.else | ||
1761 | | sw SFRETLO, LO(BASE) | ||
1762 | | sw SFRETHI, HI(BASE) | ||
1763 | |.endif | ||
1560 | | b ->fff_res | 1764 | | b ->fff_res |
1561 | |. li RD, (2+1)*8 | 1765 | |. li RD, (2+1)*8 |
1562 | | | 1766 | | |
1563 | |.macro math_minmax, name, ismax | 1767 | |.macro math_minmax, name, intins, ismax |
1564 | |->ff_ .. name: | 1768 | | .ffunc_1 name |
1565 | | lw CARG3, HI(BASE) | 1769 | | addu TMP3, BASE, NARGS8:RC |
1566 | | beqz NARGS8:RC, ->fff_fallback | 1770 | | bne SFARG1HI, TISNUM, >5 |
1567 | |. ldc1 FRET1, 0(BASE) | 1771 | |. addiu TMP2, BASE, 8 |
1568 | | sltiu AT, CARG3, LJ_TISNUM | 1772 | |1: // Handle integers. |
1773 | |. lw SFARG2HI, HI(TMP2) | ||
1774 | | beq TMP2, TMP3, ->fff_restv | ||
1775 | |. lw SFARG2LO, LO(TMP2) | ||
1776 | | bne SFARG2HI, TISNUM, >3 | ||
1777 | |. slt AT, SFARG1LO, SFARG2LO | ||
1778 | | intins SFARG1LO, SFARG2LO, AT | ||
1779 | | b <1 | ||
1780 | |. addiu TMP2, TMP2, 8 | ||
1781 | | | ||
1782 | |3: // Convert intermediate result to number and continue with number loop. | ||
1783 | | sltiu AT, SFARG2HI, LJ_TISNUM | ||
1569 | | beqz AT, ->fff_fallback | 1784 | | beqz AT, ->fff_fallback |
1570 | |. addu TMP2, BASE, NARGS8:RC | 1785 | |.if FPU |
1571 | | addiu TMP1, BASE, 8 | 1786 | |. mtc1 SFARG1LO, FRET1 |
1572 | | beq TMP1, TMP2, ->fff_resn | 1787 | | cvt.d.w FRET1, FRET1 |
1573 | |1: | 1788 | | b >7 |
1574 | |. lw CARG3, HI(TMP1) | 1789 | |. ldc1 FARG1, 0(TMP2) |
1575 | | ldc1 FARG1, 0(TMP1) | 1790 | |.else |
1576 | | addiu TMP1, TMP1, 8 | 1791 | |. nop |
1577 | | sltiu AT, CARG3, LJ_TISNUM | 1792 | | bal ->vm_sfi2d_1 |
1793 | |. nop | ||
1794 | | b >7 | ||
1795 | |. nop | ||
1796 | |.endif | ||
1797 | | | ||
1798 | |5: | ||
1799 | |. sltiu AT, SFARG1HI, LJ_TISNUM | ||
1578 | | beqz AT, ->fff_fallback | 1800 | | beqz AT, ->fff_fallback |
1801 | |.if FPU | ||
1802 | |. ldc1 FRET1, 0(BASE) | ||
1803 | |.endif | ||
1804 | | | ||
1805 | |6: // Handle numbers. | ||
1806 | |. lw SFARG2HI, HI(TMP2) | ||
1807 | |.if FPU | ||
1808 | | beq TMP2, TMP3, ->fff_resn | ||
1809 | |.else | ||
1810 | | beq TMP2, TMP3, ->fff_restv | ||
1811 | |.endif | ||
1812 | |. sltiu AT, SFARG2HI, LJ_TISNUM | ||
1813 | | beqz AT, >8 | ||
1814 | |.if FPU | ||
1815 | |. ldc1 FARG1, 0(TMP2) | ||
1816 | |.else | ||
1817 | |. lw SFARG2LO, LO(TMP2) | ||
1818 | |.endif | ||
1819 | |7: | ||
1820 | |.if FPU | ||
1579 | |.if ismax | 1821 | |.if ismax |
1580 | |. c.olt.d FARG1, FRET1 | 1822 | | c.olt.d FARG1, FRET1 |
1581 | |.else | 1823 | |.else |
1582 | |. c.olt.d FRET1, FARG1 | 1824 | | c.olt.d FRET1, FARG1 |
1825 | |.endif | ||
1826 | | movf.d FRET1, FARG1 | ||
1827 | |.else | ||
1828 | |.if ismax | ||
1829 | | bal ->vm_sfcmpogt | ||
1830 | |.else | ||
1831 | | bal ->vm_sfcmpolt | ||
1583 | |.endif | 1832 | |.endif |
1584 | | bne TMP1, TMP2, <1 | ||
1585 | |. movf.d FRET1, FARG1 | ||
1586 | | b ->fff_resn | ||
1587 | |. nop | 1833 | |. nop |
1834 | | movz SFARG1LO, SFARG2LO, CRET1 | ||
1835 | | movz SFARG1HI, SFARG2HI, CRET1 | ||
1836 | |.endif | ||
1837 | | b <6 | ||
1838 | |. addiu TMP2, TMP2, 8 | ||
1839 | | | ||
1840 | |8: // Convert integer to number and continue with number loop. | ||
1841 | | bne SFARG2HI, TISNUM, ->fff_fallback | ||
1842 | |.if FPU | ||
1843 | |. lwc1 FARG1, LO(TMP2) | ||
1844 | | b <7 | ||
1845 | |. cvt.d.w FARG1, FARG1 | ||
1846 | |.else | ||
1847 | |. nop | ||
1848 | | bal ->vm_sfi2d_2 | ||
1849 | |. nop | ||
1850 | | b <7 | ||
1851 | |. nop | ||
1852 | |.endif | ||
1853 | | | ||
1588 | |.endmacro | 1854 | |.endmacro |
1589 | | | 1855 | | |
1590 | | math_minmax math_min, 0 | 1856 | | math_minmax math_min, movz, 0 |
1591 | | math_minmax math_max, 1 | 1857 | | math_minmax math_max, movn, 1 |
1592 | | | 1858 | | |
1593 | |//-- String library ----------------------------------------------------- | 1859 | |//-- String library ----------------------------------------------------- |
1594 | | | 1860 | | |
1595 | |.ffunc_1 string_len | ||
1596 | | li AT, LJ_TSTR | ||
1597 | | bne CARG3, AT, ->fff_fallback | ||
1598 | |. nop | ||
1599 | | b ->fff_resi | ||
1600 | |. lw CRET1, STR:CARG1->len | ||
1601 | | | ||
1602 | |.ffunc string_byte // Only handle the 1-arg case here. | 1861 | |.ffunc string_byte // Only handle the 1-arg case here. |
1603 | | lw CARG3, HI(BASE) | 1862 | | lw CARG3, HI(BASE) |
1604 | | lw STR:CARG1, LO(BASE) | 1863 | | lw STR:CARG1, LO(BASE) |
@@ -1608,33 +1867,31 @@ static void build_subroutines(BuildCtx *ctx) | |||
1608 | | bnez AT, ->fff_fallback // Need exactly 1 string argument. | 1867 | | bnez AT, ->fff_fallback // Need exactly 1 string argument. |
1609 | |. nop | 1868 | |. nop |
1610 | | lw TMP0, STR:CARG1->len | 1869 | | lw TMP0, STR:CARG1->len |
1611 | | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). | ||
1612 | | addiu RA, BASE, -8 | 1870 | | addiu RA, BASE, -8 |
1871 | | lw PC, FRAME_PC(BASE) | ||
1613 | | sltu RD, r0, TMP0 | 1872 | | sltu RD, r0, TMP0 |
1614 | | mtc1 TMP1, f0 | 1873 | | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). |
1615 | | addiu RD, RD, 1 | 1874 | | addiu RD, RD, 1 |
1616 | | cvt.d.w f0, f0 | ||
1617 | | lw PC, FRAME_PC(BASE) | ||
1618 | | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 | 1875 | | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 |
1876 | | sw TISNUM, HI(RA) | ||
1619 | | b ->fff_res | 1877 | | b ->fff_res |
1620 | |. sdc1 f0, 0(RA) | 1878 | |. sw TMP1, LO(RA) |
1621 | | | 1879 | | |
1622 | |.ffunc string_char // Only handle the 1-arg case here. | 1880 | |.ffunc string_char // Only handle the 1-arg case here. |
1623 | | ffgccheck | 1881 | | ffgccheck |
1624 | |. nop | 1882 | |. nop |
1625 | | lw CARG3, HI(BASE) | 1883 | | lw CARG3, HI(BASE) |
1626 | | ldc1 FARG1, 0(BASE) | 1884 | | lw CARG1, LO(BASE) |
1627 | | li AT, 8 | 1885 | | li TMP1, 255 |
1628 | | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. | 1886 | | xori AT, NARGS8:RC, 8 // Exactly 1 argument. |
1629 | |. sltiu AT, CARG3, LJ_TISNUM | 1887 | | xor TMP0, CARG3, TISNUM // Integer. |
1630 | | beqz AT, ->fff_fallback | 1888 | | sltu TMP1, TMP1, CARG1 // !(255 < n). |
1889 | | or AT, AT, TMP0 | ||
1890 | | or AT, AT, TMP1 | ||
1891 | | bnez AT, ->fff_fallback | ||
1631 | |. li CARG3, 1 | 1892 | |. li CARG3, 1 |
1632 | | cvt.w.d FARG1, FARG1 | ||
1633 | | addiu CARG2, sp, ARG5_OFS | 1893 | | addiu CARG2, sp, ARG5_OFS |
1634 | | sltiu AT, TMP0, 256 | 1894 | | sb CARG1, ARG5 |
1635 | | mfc1 TMP0, FARG1 | ||
1636 | | beqz AT, ->fff_fallback | ||
1637 | |. sw TMP0, ARG5 | ||
1638 | |->fff_newstr: | 1895 | |->fff_newstr: |
1639 | | load_got lj_str_new | 1896 | | load_got lj_str_new |
1640 | | sw BASE, L->base | 1897 | | sw BASE, L->base |
@@ -1643,35 +1900,30 @@ static void build_subroutines(BuildCtx *ctx) | |||
1643 | |. move CARG1, L | 1900 | |. move CARG1, L |
1644 | | // Returns GCstr *. | 1901 | | // Returns GCstr *. |
1645 | | lw BASE, L->base | 1902 | | lw BASE, L->base |
1646 | | move CARG1, CRET1 | 1903 | |->fff_resstr: |
1904 | | move SFARG1LO, CRET1 | ||
1647 | | b ->fff_restv | 1905 | | b ->fff_restv |
1648 | |. li CARG3, LJ_TSTR | 1906 | |. li SFARG1HI, LJ_TSTR |
1649 | | | 1907 | | |
1650 | |.ffunc string_sub | 1908 | |.ffunc string_sub |
1651 | | ffgccheck | 1909 | | ffgccheck |
1652 | |. nop | 1910 | |. nop |
1653 | | addiu AT, NARGS8:RC, -16 | 1911 | | addiu AT, NARGS8:RC, -16 |
1654 | | lw CARG3, 16+HI(BASE) | 1912 | | lw CARG3, 16+HI(BASE) |
1655 | | ldc1 f0, 16(BASE) | ||
1656 | | lw TMP0, HI(BASE) | 1913 | | lw TMP0, HI(BASE) |
1657 | | lw STR:CARG1, LO(BASE) | 1914 | | lw STR:CARG1, LO(BASE) |
1658 | | bltz AT, ->fff_fallback | 1915 | | bltz AT, ->fff_fallback |
1659 | | lw CARG2, 8+HI(BASE) | 1916 | |. lw CARG2, 8+HI(BASE) |
1660 | | ldc1 f2, 8(BASE) | ||
1661 | | beqz AT, >1 | 1917 | | beqz AT, >1 |
1662 | |. li CARG4, -1 | 1918 | |. li CARG4, -1 |
1663 | | cvt.w.d f0, f0 | 1919 | | bne CARG3, TISNUM, ->fff_fallback |
1664 | | sltiu AT, CARG3, LJ_TISNUM | 1920 | |. lw CARG4, 16+LO(BASE) |
1665 | | beqz AT, ->fff_fallback | ||
1666 | |. mfc1 CARG4, f0 | ||
1667 | |1: | 1921 | |1: |
1668 | | sltiu AT, CARG2, LJ_TISNUM | 1922 | | bne CARG2, TISNUM, ->fff_fallback |
1669 | | beqz AT, ->fff_fallback | ||
1670 | |. li AT, LJ_TSTR | 1923 | |. li AT, LJ_TSTR |
1671 | | cvt.w.d f2, f2 | ||
1672 | | bne TMP0, AT, ->fff_fallback | 1924 | | bne TMP0, AT, ->fff_fallback |
1673 | |. lw CARG2, STR:CARG1->len | 1925 | |. lw CARG3, 8+LO(BASE) |
1674 | | mfc1 CARG3, f2 | 1926 | | lw CARG2, STR:CARG1->len |
1675 | | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end | 1927 | | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end |
1676 | | slt AT, CARG4, r0 | 1928 | | slt AT, CARG4, r0 |
1677 | | addiu TMP0, CARG2, 1 | 1929 | | addiu TMP0, CARG2, 1 |
@@ -1693,139 +1945,130 @@ static void build_subroutines(BuildCtx *ctx) | |||
1693 | | bgez CARG3, ->fff_newstr | 1945 | | bgez CARG3, ->fff_newstr |
1694 | |. addiu CARG3, CARG3, 1 // len++ | 1946 | |. addiu CARG3, CARG3, 1 // len++ |
1695 | |->fff_emptystr: // Return empty string. | 1947 | |->fff_emptystr: // Return empty string. |
1696 | | addiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty) | 1948 | | addiu STR:SFARG1LO, DISPATCH, DISPATCH_GL(strempty) |
1697 | | b ->fff_restv | 1949 | | b ->fff_restv |
1698 | |. li CARG3, LJ_TSTR | 1950 | |. li SFARG1HI, LJ_TSTR |
1699 | | | 1951 | | |
1700 | |.ffunc string_rep // Only handle the 1-char case inline. | 1952 | |.macro ffstring_op, name |
1701 | | ffgccheck | 1953 | | .ffunc string_ .. name |
1702 | |. nop | ||
1703 | | lw TMP0, HI(BASE) | ||
1704 | | addiu AT, NARGS8:RC, -16 // Exactly 2 arguments. | ||
1705 | | lw CARG4, 8+HI(BASE) | ||
1706 | | lw STR:CARG1, LO(BASE) | ||
1707 | | addiu TMP0, TMP0, -LJ_TSTR | ||
1708 | | ldc1 f0, 8(BASE) | ||
1709 | | or AT, AT, TMP0 | ||
1710 | | bnez AT, ->fff_fallback | ||
1711 | |. sltiu AT, CARG4, LJ_TISNUM | ||
1712 | | cvt.w.d f0, f0 | ||
1713 | | beqz AT, ->fff_fallback | ||
1714 | |. lw TMP0, STR:CARG1->len | ||
1715 | | mfc1 CARG3, f0 | ||
1716 | | lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) | ||
1717 | | li AT, 1 | ||
1718 | | blez CARG3, ->fff_emptystr // Count <= 0? | ||
1719 | |. sltu AT, AT, TMP0 | ||
1720 | | beqz TMP0, ->fff_emptystr // Zero length string? | ||
1721 | |. sltu TMP0, TMP1, CARG3 | ||
1722 | | or AT, AT, TMP0 | ||
1723 | | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) | ||
1724 | | bnez AT, ->fff_fallback // Fallback for > 1-char strings. | ||
1725 | |. lbu TMP0, STR:CARG1[1] | ||
1726 | | addu TMP2, CARG2, CARG3 | ||
1727 | |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). | ||
1728 | | addiu TMP2, TMP2, -1 | ||
1729 | | sltu AT, CARG2, TMP2 | ||
1730 | | bnez AT, <1 | ||
1731 | |. sb TMP0, 0(TMP2) | ||
1732 | | b ->fff_newstr | ||
1733 | |. nop | ||
1734 | | | ||
1735 | |.ffunc string_reverse | ||
1736 | | ffgccheck | 1954 | | ffgccheck |
1737 | |. nop | 1955 | |. nop |
1738 | | lw CARG3, HI(BASE) | 1956 | | lw CARG3, HI(BASE) |
1739 | | lw STR:CARG1, LO(BASE) | 1957 | | lw STR:CARG2, LO(BASE) |
1740 | | beqz NARGS8:RC, ->fff_fallback | 1958 | | beqz NARGS8:RC, ->fff_fallback |
1741 | |. li AT, LJ_TSTR | 1959 | |. li AT, LJ_TSTR |
1742 | | bne CARG3, AT, ->fff_fallback | 1960 | | bne CARG3, AT, ->fff_fallback |
1743 | |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) | 1961 | |. addiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf) |
1744 | | lw CARG3, STR:CARG1->len | 1962 | | load_got lj_buf_putstr_ .. name |
1745 | | addiu CARG1, STR:CARG1, #STR | 1963 | | lw TMP0, SBUF:CARG1->b |
1746 | | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) | 1964 | | sw L, SBUF:CARG1->L |
1747 | | sltu AT, TMP1, CARG3 | 1965 | | sw BASE, L->base |
1748 | | bnez AT, ->fff_fallback | 1966 | | sw TMP0, SBUF:CARG1->w |
1749 | |. addu TMP3, CARG1, CARG3 | 1967 | | call_intern extern lj_buf_putstr_ .. name |
1750 | | addu CARG4, CARG2, CARG3 | 1968 | |. sw PC, SAVE_PC |
1751 | |1: // Reverse string copy. | 1969 | | load_got lj_buf_tostr |
1752 | | lbu TMP1, 0(CARG1) | 1970 | | call_intern lj_buf_tostr |
1753 | | sltu AT, CARG1, TMP3 | 1971 | |. move SBUF:CARG1, SBUF:CRET1 |
1754 | | beqz AT, ->fff_newstr | 1972 | | b ->fff_resstr |
1755 | |. addiu CARG1, CARG1, 1 | 1973 | |. lw BASE, L->base |
1756 | | addiu CARG4, CARG4, -1 | ||
1757 | | b <1 | ||
1758 | | sb TMP1, 0(CARG4) | ||
1759 | | | ||
1760 | |.macro ffstring_case, name, lo | ||
1761 | | .ffunc name | ||
1762 | | ffgccheck | ||
1763 | |. nop | ||
1764 | | lw CARG3, HI(BASE) | ||
1765 | | lw STR:CARG1, LO(BASE) | ||
1766 | | beqz NARGS8:RC, ->fff_fallback | ||
1767 | |. li AT, LJ_TSTR | ||
1768 | | bne CARG3, AT, ->fff_fallback | ||
1769 | |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) | ||
1770 | | lw CARG3, STR:CARG1->len | ||
1771 | | addiu CARG1, STR:CARG1, #STR | ||
1772 | | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) | ||
1773 | | sltu AT, TMP1, CARG3 | ||
1774 | | bnez AT, ->fff_fallback | ||
1775 | |. addu TMP3, CARG1, CARG3 | ||
1776 | | move CARG4, CARG2 | ||
1777 | |1: // ASCII case conversion. | ||
1778 | | lbu TMP1, 0(CARG1) | ||
1779 | | sltu AT, CARG1, TMP3 | ||
1780 | | beqz AT, ->fff_newstr | ||
1781 | |. addiu TMP0, TMP1, -lo | ||
1782 | | xori TMP2, TMP1, 0x20 | ||
1783 | | sltiu AT, TMP0, 26 | ||
1784 | | movn TMP1, TMP2, AT | ||
1785 | | addiu CARG1, CARG1, 1 | ||
1786 | | sb TMP1, 0(CARG4) | ||
1787 | | b <1 | ||
1788 | |. addiu CARG4, CARG4, 1 | ||
1789 | |.endmacro | 1974 | |.endmacro |
1790 | | | 1975 | | |
1791 | |ffstring_case string_lower, 65 | 1976 | |ffstring_op reverse |
1792 | |ffstring_case string_upper, 97 | 1977 | |ffstring_op lower |
1978 | |ffstring_op upper | ||
1793 | | | 1979 | | |
1794 | |//-- Table library ------------------------------------------------------ | 1980 | |//-- Bit library -------------------------------------------------------- |
1795 | | | 1981 | | |
1796 | |.ffunc_1 table_getn | 1982 | |->vm_tobit_fb: |
1797 | | li AT, LJ_TTAB | 1983 | | beqz TMP1, ->fff_fallback |
1798 | | bne CARG3, AT, ->fff_fallback | 1984 | |.if FPU |
1799 | |. load_got lj_tab_len | 1985 | |. ldc1 FARG1, 0(BASE) |
1800 | | call_intern lj_tab_len // (GCtab *t) | 1986 | | add.d FARG1, FARG1, TOBIT |
1801 | |. nop | 1987 | | jr ra |
1802 | | // Returns uint32_t (but less than 2^31). | 1988 | |. mfc1 CRET1, FARG1 |
1803 | | b ->fff_resi | 1989 | |.else |
1990 | |// FP number to bit conversion for soft-float. | ||
1991 | |->vm_tobit: | ||
1992 | | sll TMP0, SFARG1HI, 1 | ||
1993 | | lui AT, 0x0020 | ||
1994 | | addu TMP0, TMP0, AT | ||
1995 | | slt AT, TMP0, r0 | ||
1996 | | movz SFARG1LO, r0, AT | ||
1997 | | beqz AT, >2 | ||
1998 | |. li TMP1, 0x3e0 | ||
1999 | | not TMP1, TMP1 | ||
2000 | | sra TMP0, TMP0, 21 | ||
2001 | | subu TMP0, TMP1, TMP0 | ||
2002 | | slt AT, TMP0, r0 | ||
2003 | | bnez AT, >1 | ||
2004 | |. sll TMP1, SFARG1HI, 11 | ||
2005 | | lui AT, 0x8000 | ||
2006 | | or TMP1, TMP1, AT | ||
2007 | | srl AT, SFARG1LO, 21 | ||
2008 | | or TMP1, TMP1, AT | ||
2009 | | slt AT, SFARG1HI, r0 | ||
2010 | | beqz AT, >2 | ||
2011 | |. srlv SFARG1LO, TMP1, TMP0 | ||
2012 | | subu SFARG1LO, r0, SFARG1LO | ||
2013 | |2: | ||
2014 | | jr ra | ||
2015 | |. move CRET1, SFARG1LO | ||
2016 | |1: | ||
2017 | | addiu TMP0, TMP0, 21 | ||
2018 | | srlv TMP1, SFARG1LO, TMP0 | ||
2019 | | li AT, 20 | ||
2020 | | subu TMP0, AT, TMP0 | ||
2021 | | sll SFARG1LO, SFARG1HI, 12 | ||
2022 | | sllv AT, SFARG1LO, TMP0 | ||
2023 | | or SFARG1LO, TMP1, AT | ||
2024 | | slt AT, SFARG1HI, r0 | ||
2025 | | beqz AT, <2 | ||
1804 | |. nop | 2026 | |. nop |
1805 | | | 2027 | | jr ra |
1806 | |//-- Bit library -------------------------------------------------------- | 2028 | |. subu CRET1, r0, SFARG1LO |
2029 | |.endif | ||
1807 | | | 2030 | | |
1808 | |.macro .ffunc_bit, name | 2031 | |.macro .ffunc_bit, name |
1809 | | .ffunc_n bit_..name | 2032 | | .ffunc_1 bit_..name |
1810 | |. add.d FARG1, FARG1, TOBIT | 2033 | | beq SFARG1HI, TISNUM, >6 |
1811 | | mfc1 CRET1, FARG1 | 2034 | |. move CRET1, SFARG1LO |
2035 | | bal ->vm_tobit_fb | ||
2036 | |. sltu TMP1, SFARG1HI, TISNUM | ||
2037 | |6: | ||
1812 | |.endmacro | 2038 | |.endmacro |
1813 | | | 2039 | | |
1814 | |.macro .ffunc_bit_op, name, ins | 2040 | |.macro .ffunc_bit_op, name, ins |
1815 | | .ffunc_bit name | 2041 | | .ffunc_bit name |
1816 | | addiu TMP1, BASE, 8 | 2042 | | addiu TMP2, BASE, 8 |
1817 | | addu TMP2, BASE, NARGS8:RC | 2043 | | addu TMP3, BASE, NARGS8:RC |
1818 | |1: | 2044 | |1: |
1819 | | lw CARG4, HI(TMP1) | 2045 | | lw SFARG1HI, HI(TMP2) |
1820 | | beq TMP1, TMP2, ->fff_resi | 2046 | | beq TMP2, TMP3, ->fff_resi |
1821 | |. ldc1 FARG1, 0(TMP1) | 2047 | |. lw SFARG1LO, LO(TMP2) |
1822 | | sltiu AT, CARG4, LJ_TISNUM | 2048 | |.if FPU |
1823 | | beqz AT, ->fff_fallback | 2049 | | bne SFARG1HI, TISNUM, >2 |
1824 | | add.d FARG1, FARG1, TOBIT | 2050 | |. addiu TMP2, TMP2, 8 |
1825 | | mfc1 CARG2, FARG1 | ||
1826 | | ins CRET1, CRET1, CARG2 | ||
1827 | | b <1 | 2051 | | b <1 |
1828 | |. addiu TMP1, TMP1, 8 | 2052 | |. ins CRET1, CRET1, SFARG1LO |
2053 | |2: | ||
2054 | | ldc1 FARG1, -8(TMP2) | ||
2055 | | sltu TMP1, SFARG1HI, TISNUM | ||
2056 | | beqz TMP1, ->fff_fallback | ||
2057 | |. add.d FARG1, FARG1, TOBIT | ||
2058 | | mfc1 SFARG1LO, FARG1 | ||
2059 | | b <1 | ||
2060 | |. ins CRET1, CRET1, SFARG1LO | ||
2061 | |.else | ||
2062 | | beq SFARG1HI, TISNUM, >2 | ||
2063 | |. move CRET2, CRET1 | ||
2064 | | bal ->vm_tobit_fb | ||
2065 | |. sltu TMP1, SFARG1HI, TISNUM | ||
2066 | | move SFARG1LO, CRET2 | ||
2067 | |2: | ||
2068 | | ins CRET1, CRET1, SFARG1LO | ||
2069 | | b <1 | ||
2070 | |. addiu TMP2, TMP2, 8 | ||
2071 | |.endif | ||
1829 | |.endmacro | 2072 | |.endmacro |
1830 | | | 2073 | | |
1831 | |.ffunc_bit_op band, and | 2074 | |.ffunc_bit_op band, and |
@@ -1849,24 +2092,28 @@ static void build_subroutines(BuildCtx *ctx) | |||
1849 | |. not CRET1, CRET1 | 2092 | |. not CRET1, CRET1 |
1850 | | | 2093 | | |
1851 | |.macro .ffunc_bit_sh, name, ins, shmod | 2094 | |.macro .ffunc_bit_sh, name, ins, shmod |
1852 | | .ffunc_nn bit_..name | 2095 | | .ffunc_2 bit_..name |
1853 | |. add.d FARG1, FARG1, TOBIT | 2096 | | beq SFARG1HI, TISNUM, >1 |
1854 | | add.d FARG2, FARG2, TOBIT | 2097 | |. nop |
1855 | | mfc1 CARG1, FARG1 | 2098 | | bal ->vm_tobit_fb |
1856 | | mfc1 CARG2, FARG2 | 2099 | |. sltu TMP1, SFARG1HI, TISNUM |
2100 | | move SFARG1LO, CRET1 | ||
2101 | |1: | ||
2102 | | bne SFARG2HI, TISNUM, ->fff_fallback | ||
2103 | |. nop | ||
1857 | |.if shmod == 1 | 2104 | |.if shmod == 1 |
1858 | | li AT, 32 | 2105 | | li AT, 32 |
1859 | | subu TMP0, AT, CARG2 | 2106 | | subu TMP0, AT, SFARG2LO |
1860 | | sllv CARG2, CARG1, CARG2 | 2107 | | sllv SFARG2LO, SFARG1LO, SFARG2LO |
1861 | | srlv CARG1, CARG1, TMP0 | 2108 | | srlv SFARG1LO, SFARG1LO, TMP0 |
1862 | |.elif shmod == 2 | 2109 | |.elif shmod == 2 |
1863 | | li AT, 32 | 2110 | | li AT, 32 |
1864 | | subu TMP0, AT, CARG2 | 2111 | | subu TMP0, AT, SFARG2LO |
1865 | | srlv CARG2, CARG1, CARG2 | 2112 | | srlv SFARG2LO, SFARG1LO, SFARG2LO |
1866 | | sllv CARG1, CARG1, TMP0 | 2113 | | sllv SFARG1LO, SFARG1LO, TMP0 |
1867 | |.endif | 2114 | |.endif |
1868 | | b ->fff_resi | 2115 | | b ->fff_resi |
1869 | |. ins CRET1, CARG1, CARG2 | 2116 | |. ins CRET1, SFARG1LO, SFARG2LO |
1870 | |.endmacro | 2117 | |.endmacro |
1871 | | | 2118 | | |
1872 | |.ffunc_bit_sh lshift, sllv, 0 | 2119 | |.ffunc_bit_sh lshift, sllv, 0 |
@@ -1878,9 +2125,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
1878 | | | 2125 | | |
1879 | |.ffunc_bit tobit | 2126 | |.ffunc_bit tobit |
1880 | |->fff_resi: | 2127 | |->fff_resi: |
1881 | | mtc1 CRET1, FRET1 | 2128 | | lw PC, FRAME_PC(BASE) |
1882 | | b ->fff_resn | 2129 | | addiu RA, BASE, -8 |
1883 | |. cvt.d.w FRET1, FRET1 | 2130 | | sw TISNUM, -8+HI(BASE) |
2131 | | b ->fff_res1 | ||
2132 | |. sw CRET1, -8+LO(BASE) | ||
1884 | | | 2133 | | |
1885 | |//----------------------------------------------------------------------- | 2134 | |//----------------------------------------------------------------------- |
1886 | | | 2135 | | |
@@ -2067,19 +2316,96 @@ static void build_subroutines(BuildCtx *ctx) | |||
2067 | | jr CRET1 | 2316 | | jr CRET1 |
2068 | |. lw INS, -4(PC) | 2317 | |. lw INS, -4(PC) |
2069 | | | 2318 | | |
2319 | |->cont_stitch: // Trace stitching. | ||
2320 | |.if JIT | ||
2321 | | // RA = resultptr, RB = meta base | ||
2322 | | lw INS, -4(PC) | ||
2323 | | lw TMP2, -24+LO(RB) // Save previous trace. | ||
2324 | | decode_RA8a RC, INS | ||
2325 | | addiu AT, MULTRES, -8 | ||
2326 | | decode_RA8b RC | ||
2327 | | beqz AT, >2 | ||
2328 | |. addu RC, BASE, RC // Call base. | ||
2329 | |1: // Move results down. | ||
2330 | | lw SFRETHI, HI(RA) | ||
2331 | | lw SFRETLO, LO(RA) | ||
2332 | | addiu AT, AT, -8 | ||
2333 | | addiu RA, RA, 8 | ||
2334 | | sw SFRETHI, HI(RC) | ||
2335 | | sw SFRETLO, LO(RC) | ||
2336 | | bnez AT, <1 | ||
2337 | |. addiu RC, RC, 8 | ||
2338 | |2: | ||
2339 | | decode_RA8a RA, INS | ||
2340 | | decode_RB8a RB, INS | ||
2341 | | decode_RA8b RA | ||
2342 | | decode_RB8b RB | ||
2343 | | addu RA, RA, RB | ||
2344 | | addu RA, BASE, RA | ||
2345 | |3: | ||
2346 | | sltu AT, RC, RA | ||
2347 | | bnez AT, >9 // More results wanted? | ||
2348 | |. nop | ||
2349 | | | ||
2350 | | lhu TMP3, TRACE:TMP2->traceno | ||
2351 | | lhu RD, TRACE:TMP2->link | ||
2352 | | beq RD, TMP3, ->cont_nop // Blacklisted. | ||
2353 | |. load_got lj_dispatch_stitch | ||
2354 | | bnez RD, =>BC_JLOOP // Jump to stitched trace. | ||
2355 | |. sll RD, RD, 3 | ||
2356 | | | ||
2357 | | // Stitch a new trace to the previous trace. | ||
2358 | | sw TMP3, DISPATCH_J(exitno)(DISPATCH) | ||
2359 | | sw L, DISPATCH_J(L)(DISPATCH) | ||
2360 | | sw BASE, L->base | ||
2361 | | addiu CARG1, DISPATCH, GG_DISP2J | ||
2362 | | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) | ||
2363 | |. move CARG2, PC | ||
2364 | | b ->cont_nop | ||
2365 | |. lw BASE, L->base | ||
2366 | | | ||
2367 | |9: | ||
2368 | | sw TISNIL, HI(RC) | ||
2369 | | b <3 | ||
2370 | |. addiu RC, RC, 8 | ||
2371 | |.endif | ||
2372 | | | ||
2373 | |->vm_profhook: // Dispatch target for profiler hook. | ||
2374 | #if LJ_HASPROFILE | ||
2375 | | load_got lj_dispatch_profile | ||
2376 | | sw MULTRES, SAVE_MULTRES | ||
2377 | | move CARG2, PC | ||
2378 | | sw BASE, L->base | ||
2379 | | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) | ||
2380 | |. move CARG1, L | ||
2381 | | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | ||
2382 | | addiu PC, PC, -4 | ||
2383 | | b ->cont_nop | ||
2384 | |. lw BASE, L->base | ||
2385 | #endif | ||
2386 | | | ||
2070 | |//----------------------------------------------------------------------- | 2387 | |//----------------------------------------------------------------------- |
2071 | |//-- Trace exit handler ------------------------------------------------- | 2388 | |//-- Trace exit handler ------------------------------------------------- |
2072 | |//----------------------------------------------------------------------- | 2389 | |//----------------------------------------------------------------------- |
2073 | | | 2390 | | |
2074 | |.macro savex_, a, b | 2391 | |.macro savex_, a, b |
2392 | |.if FPU | ||
2075 | | sdc1 f..a, 16+a*8(sp) | 2393 | | sdc1 f..a, 16+a*8(sp) |
2076 | | sw r..a, 16+32*8+a*4(sp) | 2394 | | sw r..a, 16+32*8+a*4(sp) |
2077 | | sw r..b, 16+32*8+b*4(sp) | 2395 | | sw r..b, 16+32*8+b*4(sp) |
2396 | |.else | ||
2397 | | sw r..a, 16+a*4(sp) | ||
2398 | | sw r..b, 16+b*4(sp) | ||
2399 | |.endif | ||
2078 | |.endmacro | 2400 | |.endmacro |
2079 | | | 2401 | | |
2080 | |->vm_exit_handler: | 2402 | |->vm_exit_handler: |
2081 | |.if JIT | 2403 | |.if JIT |
2404 | |.if FPU | ||
2082 | | addiu sp, sp, -(16+32*8+32*4) | 2405 | | addiu sp, sp, -(16+32*8+32*4) |
2406 | |.else | ||
2407 | | addiu sp, sp, -(16+32*4) | ||
2408 | |.endif | ||
2083 | | savex_ 0, 1 | 2409 | | savex_ 0, 1 |
2084 | | savex_ 2, 3 | 2410 | | savex_ 2, 3 |
2085 | | savex_ 4, 5 | 2411 | | savex_ 4, 5 |
@@ -2094,25 +2420,34 @@ static void build_subroutines(BuildCtx *ctx) | |||
2094 | | savex_ 22, 23 | 2420 | | savex_ 22, 23 |
2095 | | savex_ 24, 25 | 2421 | | savex_ 24, 25 |
2096 | | savex_ 26, 27 | 2422 | | savex_ 26, 27 |
2423 | |.if FPU | ||
2097 | | sdc1 f28, 16+28*8(sp) | 2424 | | sdc1 f28, 16+28*8(sp) |
2098 | | sw r28, 16+32*8+28*4(sp) | ||
2099 | | sdc1 f30, 16+30*8(sp) | 2425 | | sdc1 f30, 16+30*8(sp) |
2426 | | sw r28, 16+32*8+28*4(sp) | ||
2100 | | sw r30, 16+32*8+30*4(sp) | 2427 | | sw r30, 16+32*8+30*4(sp) |
2101 | | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP. | 2428 | | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP. |
2429 | | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp. | ||
2430 | | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP | ||
2431 | |.else | ||
2432 | | sw r28, 16+28*4(sp) | ||
2433 | | sw r30, 16+30*4(sp) | ||
2434 | | sw r0, 16+31*4(sp) // Clear RID_TMP. | ||
2435 | | addiu TMP2, sp, 16+32*4 // Recompute original value of sp. | ||
2436 | | sw TMP2, 16+29*4(sp) // Store sp in RID_SP | ||
2437 | |.endif | ||
2102 | | li_vmstate EXIT | 2438 | | li_vmstate EXIT |
2103 | | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp. | ||
2104 | | addiu DISPATCH, JGL, -GG_DISP2G-32768 | 2439 | | addiu DISPATCH, JGL, -GG_DISP2G-32768 |
2105 | | lw TMP1, 0(TMP2) // Load exit number. | 2440 | | lw TMP1, 0(TMP2) // Load exit number. |
2106 | | st_vmstate | 2441 | | st_vmstate |
2107 | | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP. | 2442 | | lw L, DISPATCH_GL(cur_L)(DISPATCH) |
2108 | | lw L, DISPATCH_GL(jit_L)(DISPATCH) | 2443 | | lw BASE, DISPATCH_GL(jit_base)(DISPATCH) |
2109 | | lw BASE, DISPATCH_GL(jit_base)(DISPATCH) | ||
2110 | | load_got lj_trace_exit | 2444 | | load_got lj_trace_exit |
2111 | | sw L, DISPATCH_J(L)(DISPATCH) | 2445 | | sw L, DISPATCH_J(L)(DISPATCH) |
2112 | | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. | 2446 | | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. |
2447 | | sw BASE, L->base | ||
2113 | | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. | 2448 | | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. |
2114 | | addiu CARG1, DISPATCH, GG_DISP2J | 2449 | | addiu CARG1, DISPATCH, GG_DISP2J |
2115 | | sw BASE, L->base | 2450 | | sw r0, DISPATCH_GL(jit_base)(DISPATCH) |
2116 | | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) | 2451 | | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) |
2117 | |. addiu CARG2, sp, 16 | 2452 | |. addiu CARG2, sp, 16 |
2118 | | // Returns MULTRES (unscaled) or negated error code. | 2453 | | // Returns MULTRES (unscaled) or negated error code. |
@@ -2128,19 +2463,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
2128 | |.if JIT | 2463 | |.if JIT |
2129 | | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. | 2464 | | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. |
2130 | | lw L, SAVE_L | 2465 | | lw L, SAVE_L |
2131 | | addiu DISPATCH, JGL, -GG_DISP2G-32768 | 2466 | | addiu DISPATCH, JGL, -GG_DISP2G-32768 |
2467 | | sw BASE, L->base | ||
2132 | |1: | 2468 | |1: |
2133 | | bltz CRET1, >3 // Check for error from exit. | 2469 | | bltz CRET1, >9 // Check for error from exit. |
2134 | |. lw LFUNC:TMP1, FRAME_FUNC(BASE) | 2470 | |. lw LFUNC:RB, FRAME_FUNC(BASE) |
2135 | | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 2471 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
2136 | | sll MULTRES, CRET1, 3 | 2472 | | sll MULTRES, CRET1, 3 |
2137 | | li TISNIL, LJ_TNIL | 2473 | | li TISNIL, LJ_TNIL |
2474 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. | ||
2138 | | sw MULTRES, SAVE_MULTRES | 2475 | | sw MULTRES, SAVE_MULTRES |
2139 | | mtc1 TMP3, TOBIT | 2476 | | .FPU mtc1 TMP3, TOBIT |
2140 | | lw TMP1, LFUNC:TMP1->pc | 2477 | | lw TMP1, LFUNC:RB->pc |
2141 | | sw r0, DISPATCH_GL(jit_L)(DISPATCH) | 2478 | | sw r0, DISPATCH_GL(jit_base)(DISPATCH) |
2142 | | lw KBASE, PC2PROTO(k)(TMP1) | 2479 | | lw KBASE, PC2PROTO(k)(TMP1) |
2143 | | cvt.d.s TOBIT, TOBIT | 2480 | | .FPU cvt.d.s TOBIT, TOBIT |
2144 | | // Modified copy of ins_next which handles function header dispatch, too. | 2481 | | // Modified copy of ins_next which handles function header dispatch, too. |
2145 | | lw INS, 0(PC) | 2482 | | lw INS, 0(PC) |
2146 | | addiu PC, PC, 4 | 2483 | | addiu PC, PC, 4 |
@@ -2148,7 +2485,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2148 | | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) | 2485 | | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) |
2149 | | decode_OP4a TMP1, INS | 2486 | | decode_OP4a TMP1, INS |
2150 | | decode_OP4b TMP1 | 2487 | | decode_OP4b TMP1 |
2151 | | sltiu TMP2, TMP1, BC_FUNCF*4 // Function header? | 2488 | | sltiu TMP2, TMP1, BC_FUNCF*4 |
2152 | | addu TMP0, DISPATCH, TMP1 | 2489 | | addu TMP0, DISPATCH, TMP1 |
2153 | | decode_RD8a RD, INS | 2490 | | decode_RD8a RD, INS |
2154 | | lw AT, 0(TMP0) | 2491 | | lw AT, 0(TMP0) |
@@ -2158,13 +2495,30 @@ static void build_subroutines(BuildCtx *ctx) | |||
2158 | | jr AT | 2495 | | jr AT |
2159 | |. decode_RD8b RD | 2496 | |. decode_RD8b RD |
2160 | |2: | 2497 | |2: |
2498 | | sltiu TMP2, TMP1, (BC_FUNCC+2)*4 // Fast function? | ||
2499 | | bnez TMP2, >3 | ||
2500 | |. lw TMP1, FRAME_PC(BASE) | ||
2501 | | // Check frame below fast function. | ||
2502 | | andi TMP0, TMP1, FRAME_TYPE | ||
2503 | | bnez TMP0, >3 // Trace stitching continuation? | ||
2504 | |. nop | ||
2505 | | // Otherwise set KBASE for Lua function below fast function. | ||
2506 | | lw TMP2, -4(TMP1) | ||
2507 | | decode_RA8a TMP0, TMP2 | ||
2508 | | decode_RA8b TMP0 | ||
2509 | | subu TMP1, BASE, TMP0 | ||
2510 | | lw LFUNC:TMP2, -8+FRAME_FUNC(TMP1) | ||
2511 | | lw TMP1, LFUNC:TMP2->pc | ||
2512 | | lw KBASE, PC2PROTO(k)(TMP1) | ||
2513 | |3: | ||
2161 | | addiu RC, MULTRES, -8 | 2514 | | addiu RC, MULTRES, -8 |
2162 | | jr AT | 2515 | | jr AT |
2163 | |. addu RA, RA, BASE | 2516 | |. addu RA, RA, BASE |
2164 | | | 2517 | | |
2165 | |3: // Rethrow error from the right C frame. | 2518 | |9: // Rethrow error from the right C frame. |
2166 | | load_got lj_err_run | 2519 | | load_got lj_err_trace |
2167 | | call_intern lj_err_run // (lua_State *L) | 2520 | | sub CARG2, r0, CRET1 |
2521 | | call_intern lj_err_trace // (lua_State *L, int errcode) | ||
2168 | |. move CARG1, L | 2522 | |. move CARG1, L |
2169 | |.endif | 2523 | |.endif |
2170 | | | 2524 | | |
@@ -2172,8 +2526,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
2172 | |//-- Math helper functions ---------------------------------------------- | 2526 | |//-- Math helper functions ---------------------------------------------- |
2173 | |//----------------------------------------------------------------------- | 2527 | |//----------------------------------------------------------------------- |
2174 | | | 2528 | | |
2529 | |// Hard-float round to integer. | ||
2175 | |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. | 2530 | |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. |
2176 | |.macro vm_round, func | 2531 | |.macro vm_round_hf, func |
2177 | | lui TMP0, 0x4330 // Hiword of 2^52 (double). | 2532 | | lui TMP0, 0x4330 // Hiword of 2^52 (double). |
2178 | | mtc1 r0, f4 | 2533 | | mtc1 r0, f4 |
2179 | | mtc1 TMP0, f5 | 2534 | | mtc1 TMP0, f5 |
@@ -2215,6 +2570,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
2215 | |. mov.d FRET1, FARG1 | 2570 | |. mov.d FRET1, FARG1 |
2216 | |.endmacro | 2571 | |.endmacro |
2217 | | | 2572 | | |
2573 | |.macro vm_round, func | ||
2574 | |.if FPU | ||
2575 | | vm_round_hf, func | ||
2576 | |.endif | ||
2577 | |.endmacro | ||
2578 | | | ||
2218 | |->vm_floor: | 2579 | |->vm_floor: |
2219 | | vm_round floor | 2580 | | vm_round floor |
2220 | |->vm_ceil: | 2581 | |->vm_ceil: |
@@ -2224,10 +2585,286 @@ static void build_subroutines(BuildCtx *ctx) | |||
2224 | | vm_round trunc | 2585 | | vm_round trunc |
2225 | |.endif | 2586 | |.endif |
2226 | | | 2587 | | |
2588 | |// Soft-float integer to number conversion. | ||
2589 | |.macro sfi2d, AHI, ALO | ||
2590 | |.if not FPU | ||
2591 | | beqz ALO, >9 // Handle zero first. | ||
2592 | |. sra TMP0, ALO, 31 | ||
2593 | | xor TMP1, ALO, TMP0 | ||
2594 | | subu TMP1, TMP1, TMP0 // Absolute value in TMP1. | ||
2595 | | clz AHI, TMP1 | ||
2596 | | andi TMP0, TMP0, 0x800 // Mask sign bit. | ||
2597 | | li AT, 0x3ff+31-1 | ||
2598 | | sllv TMP1, TMP1, AHI // Align mantissa left with leading 1. | ||
2599 | | subu AHI, AT, AHI // Exponent - 1 in AHI. | ||
2600 | | sll ALO, TMP1, 21 | ||
2601 | | or AHI, AHI, TMP0 // Sign | Exponent. | ||
2602 | | srl TMP1, TMP1, 11 | ||
2603 | | sll AHI, AHI, 20 // Align left. | ||
2604 | | jr ra | ||
2605 | |. addu AHI, AHI, TMP1 // Add mantissa, increment exponent. | ||
2606 | |9: | ||
2607 | | jr ra | ||
2608 | |. li AHI, 0 | ||
2609 | |.endif | ||
2610 | |.endmacro | ||
2611 | | | ||
2612 | |// Input SFARG1LO. Output: SFARG1*. Temporaries: AT, TMP0, TMP1. | ||
2613 | |->vm_sfi2d_1: | ||
2614 | | sfi2d SFARG1HI, SFARG1LO | ||
2615 | | | ||
2616 | |// Input SFARG2LO. Output: SFARG2*. Temporaries: AT, TMP0, TMP1. | ||
2617 | |->vm_sfi2d_2: | ||
2618 | | sfi2d SFARG2HI, SFARG2LO | ||
2619 | | | ||
2620 | |// Soft-float comparison. Equivalent to c.eq.d. | ||
2621 | |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1. | ||
2622 | |->vm_sfcmpeq: | ||
2623 | |.if not FPU | ||
2624 | | sll AT, SFARG1HI, 1 | ||
2625 | | sll TMP0, SFARG2HI, 1 | ||
2626 | | or CRET1, SFARG1LO, SFARG2LO | ||
2627 | | or TMP1, AT, TMP0 | ||
2628 | | or TMP1, TMP1, CRET1 | ||
2629 | | beqz TMP1, >8 // Both args +-0: return 1. | ||
2630 | |. sltu CRET1, r0, SFARG1LO | ||
2631 | | lui TMP1, 0xffe0 | ||
2632 | | addu AT, AT, CRET1 | ||
2633 | | sltu CRET1, r0, SFARG2LO | ||
2634 | | sltu AT, TMP1, AT | ||
2635 | | addu TMP0, TMP0, CRET1 | ||
2636 | | sltu TMP0, TMP1, TMP0 | ||
2637 | | or TMP1, AT, TMP0 | ||
2638 | | bnez TMP1, >9 // Either arg is NaN: return 0; | ||
2639 | |. xor TMP0, SFARG1HI, SFARG2HI | ||
2640 | | xor TMP1, SFARG1LO, SFARG2LO | ||
2641 | | or AT, TMP0, TMP1 | ||
2642 | | jr ra | ||
2643 | |. sltiu CRET1, AT, 1 // Same values: return 1. | ||
2644 | |8: | ||
2645 | | jr ra | ||
2646 | |. li CRET1, 1 | ||
2647 | |9: | ||
2648 | | jr ra | ||
2649 | |. li CRET1, 0 | ||
2650 | |.endif | ||
2651 | | | ||
2652 | |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d. | ||
2653 | |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2. | ||
2654 | |->vm_sfcmpult: | ||
2655 | |.if not FPU | ||
2656 | | b >1 | ||
2657 | |. li CRET2, 1 | ||
2658 | |.endif | ||
2659 | | | ||
2660 | |->vm_sfcmpolt: | ||
2661 | |.if not FPU | ||
2662 | | li CRET2, 0 | ||
2663 | |1: | ||
2664 | | sll AT, SFARG1HI, 1 | ||
2665 | | sll TMP0, SFARG2HI, 1 | ||
2666 | | or CRET1, SFARG1LO, SFARG2LO | ||
2667 | | or TMP1, AT, TMP0 | ||
2668 | | or TMP1, TMP1, CRET1 | ||
2669 | | beqz TMP1, >8 // Both args +-0: return 0. | ||
2670 | |. sltu CRET1, r0, SFARG1LO | ||
2671 | | lui TMP1, 0xffe0 | ||
2672 | | addu AT, AT, CRET1 | ||
2673 | | sltu CRET1, r0, SFARG2LO | ||
2674 | | sltu AT, TMP1, AT | ||
2675 | | addu TMP0, TMP0, CRET1 | ||
2676 | | sltu TMP0, TMP1, TMP0 | ||
2677 | | or TMP1, AT, TMP0 | ||
2678 | | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; | ||
2679 | |. and AT, SFARG1HI, SFARG2HI | ||
2680 | | bltz AT, >5 // Both args negative? | ||
2681 | |. nop | ||
2682 | | beq SFARG1HI, SFARG2HI, >8 | ||
2683 | |. sltu CRET1, SFARG1LO, SFARG2LO | ||
2684 | | jr ra | ||
2685 | |. slt CRET1, SFARG1HI, SFARG2HI | ||
2686 | |5: // Swap conditions if both operands are negative. | ||
2687 | | beq SFARG1HI, SFARG2HI, >8 | ||
2688 | |. sltu CRET1, SFARG2LO, SFARG1LO | ||
2689 | | jr ra | ||
2690 | |. slt CRET1, SFARG2HI, SFARG1HI | ||
2691 | |8: | ||
2692 | | jr ra | ||
2693 | |. nop | ||
2694 | |9: | ||
2695 | | jr ra | ||
2696 | |. move CRET1, CRET2 | ||
2697 | |.endif | ||
2698 | | | ||
2699 | |->vm_sfcmpogt: | ||
2700 | |.if not FPU | ||
2701 | | sll AT, SFARG2HI, 1 | ||
2702 | | sll TMP0, SFARG1HI, 1 | ||
2703 | | or CRET1, SFARG2LO, SFARG1LO | ||
2704 | | or TMP1, AT, TMP0 | ||
2705 | | or TMP1, TMP1, CRET1 | ||
2706 | | beqz TMP1, >8 // Both args +-0: return 0. | ||
2707 | |. sltu CRET1, r0, SFARG2LO | ||
2708 | | lui TMP1, 0xffe0 | ||
2709 | | addu AT, AT, CRET1 | ||
2710 | | sltu CRET1, r0, SFARG1LO | ||
2711 | | sltu AT, TMP1, AT | ||
2712 | | addu TMP0, TMP0, CRET1 | ||
2713 | | sltu TMP0, TMP1, TMP0 | ||
2714 | | or TMP1, AT, TMP0 | ||
2715 | | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; | ||
2716 | |. and AT, SFARG2HI, SFARG1HI | ||
2717 | | bltz AT, >5 // Both args negative? | ||
2718 | |. nop | ||
2719 | | beq SFARG2HI, SFARG1HI, >8 | ||
2720 | |. sltu CRET1, SFARG2LO, SFARG1LO | ||
2721 | | jr ra | ||
2722 | |. slt CRET1, SFARG2HI, SFARG1HI | ||
2723 | |5: // Swap conditions if both operands are negative. | ||
2724 | | beq SFARG2HI, SFARG1HI, >8 | ||
2725 | |. sltu CRET1, SFARG1LO, SFARG2LO | ||
2726 | | jr ra | ||
2727 | |. slt CRET1, SFARG1HI, SFARG2HI | ||
2728 | |8: | ||
2729 | | jr ra | ||
2730 | |. nop | ||
2731 | |9: | ||
2732 | | jr ra | ||
2733 | |. li CRET1, 0 | ||
2734 | |.endif | ||
2735 | | | ||
2736 | |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. | ||
2737 | |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. | ||
2738 | |->vm_sfcmpolex: | ||
2739 | |.if not FPU | ||
2740 | | sll AT, SFARG1HI, 1 | ||
2741 | | sll TMP0, SFARG2HI, 1 | ||
2742 | | or CRET1, SFARG1LO, SFARG2LO | ||
2743 | | or TMP1, AT, TMP0 | ||
2744 | | or TMP1, TMP1, CRET1 | ||
2745 | | beqz TMP1, >8 // Both args +-0: return 1. | ||
2746 | |. sltu CRET1, r0, SFARG1LO | ||
2747 | | lui TMP1, 0xffe0 | ||
2748 | | addu AT, AT, CRET1 | ||
2749 | | sltu CRET1, r0, SFARG2LO | ||
2750 | | sltu AT, TMP1, AT | ||
2751 | | addu TMP0, TMP0, CRET1 | ||
2752 | | sltu TMP0, TMP1, TMP0 | ||
2753 | | or TMP1, AT, TMP0 | ||
2754 | | bnez TMP1, >9 // Either arg is NaN: return 0; | ||
2755 | |. and AT, SFARG1HI, SFARG2HI | ||
2756 | | xor AT, AT, TMP3 | ||
2757 | | bltz AT, >5 // Both args negative? | ||
2758 | |. nop | ||
2759 | | beq SFARG1HI, SFARG2HI, >6 | ||
2760 | |. sltu CRET1, SFARG2LO, SFARG1LO | ||
2761 | | jr ra | ||
2762 | |. slt CRET1, SFARG2HI, SFARG1HI | ||
2763 | |5: // Swap conditions if both operands are negative. | ||
2764 | | beq SFARG1HI, SFARG2HI, >6 | ||
2765 | |. sltu CRET1, SFARG1LO, SFARG2LO | ||
2766 | | slt CRET1, SFARG1HI, SFARG2HI | ||
2767 | |6: | ||
2768 | | jr ra | ||
2769 | |. nop | ||
2770 | |8: | ||
2771 | | jr ra | ||
2772 | |. li CRET1, 1 | ||
2773 | |9: | ||
2774 | | jr ra | ||
2775 | |. li CRET1, 0 | ||
2776 | |.endif | ||
2777 | | | ||
2778 | |.macro sfmin_max, name, fpcall | ||
2779 | |->vm_sf .. name: | ||
2780 | |.if JIT and not FPU | ||
2781 | | move TMP2, ra | ||
2782 | | bal ->fpcall | ||
2783 | |. nop | ||
2784 | | move TMP0, CRET1 | ||
2785 | | move SFRETHI, SFARG1HI | ||
2786 | | move SFRETLO, SFARG1LO | ||
2787 | | move ra, TMP2 | ||
2788 | | movz SFRETHI, SFARG2HI, TMP0 | ||
2789 | | jr ra | ||
2790 | |. movz SFRETLO, SFARG2LO, TMP0 | ||
2791 | |.endif | ||
2792 | |.endmacro | ||
2793 | | | ||
2794 | | sfmin_max min, vm_sfcmpolt | ||
2795 | | sfmin_max max, vm_sfcmpogt | ||
2796 | | | ||
2227 | |//----------------------------------------------------------------------- | 2797 | |//----------------------------------------------------------------------- |
2228 | |//-- Miscellaneous functions -------------------------------------------- | 2798 | |//-- Miscellaneous functions -------------------------------------------- |
2229 | |//----------------------------------------------------------------------- | 2799 | |//----------------------------------------------------------------------- |
2230 | | | 2800 | | |
2801 | |.define NEXT_TAB, TAB:CARG1 | ||
2802 | |.define NEXT_IDX, CARG2 | ||
2803 | |.define NEXT_ASIZE, CARG3 | ||
2804 | |.define NEXT_NIL, CARG4 | ||
2805 | |.define NEXT_TMP0, r12 | ||
2806 | |.define NEXT_TMP1, r13 | ||
2807 | |.define NEXT_TMP2, r14 | ||
2808 | |.define NEXT_RES_VK, CRET1 | ||
2809 | |.define NEXT_RES_IDX, CRET2 | ||
2810 | |.define NEXT_RES_PTR, sp | ||
2811 | |.define NEXT_RES_VAL_I, 0(sp) | ||
2812 | |.define NEXT_RES_VAL_IT, 4(sp) | ||
2813 | |.define NEXT_RES_KEY_I, 8(sp) | ||
2814 | |.define NEXT_RES_KEY_IT, 12(sp) | ||
2815 | | | ||
2816 | |// TValue *lj_vm_next(GCtab *t, uint32_t idx) | ||
2817 | |// Next idx returned in CRET2. | ||
2818 | |->vm_next: | ||
2819 | |.if JIT and ENDIAN_LE | ||
2820 | | lw NEXT_ASIZE, NEXT_TAB->asize | ||
2821 | | lw NEXT_TMP0, NEXT_TAB->array | ||
2822 | | li NEXT_NIL, LJ_TNIL | ||
2823 | |1: // Traverse array part. | ||
2824 | | sltu AT, NEXT_IDX, NEXT_ASIZE | ||
2825 | | sll NEXT_TMP1, NEXT_IDX, 3 | ||
2826 | | beqz AT, >5 | ||
2827 | |. addu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1 | ||
2828 | | lw NEXT_TMP2, 4(NEXT_TMP1) | ||
2829 | | sw NEXT_IDX, NEXT_RES_KEY_I | ||
2830 | | beq NEXT_TMP2, NEXT_NIL, <1 | ||
2831 | |. addiu NEXT_IDX, NEXT_IDX, 1 | ||
2832 | | lw NEXT_TMP0, 0(NEXT_TMP1) | ||
2833 | | li AT, LJ_TISNUM | ||
2834 | | sw NEXT_TMP2, NEXT_RES_VAL_IT | ||
2835 | | sw AT, NEXT_RES_KEY_IT | ||
2836 | | sw NEXT_TMP0, NEXT_RES_VAL_I | ||
2837 | | move NEXT_RES_VK, NEXT_RES_PTR | ||
2838 | | jr ra | ||
2839 | |. move NEXT_RES_IDX, NEXT_IDX | ||
2840 | | | ||
2841 | |5: // Traverse hash part. | ||
2842 | | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE | ||
2843 | | lw NODE:NEXT_RES_VK, NEXT_TAB->node | ||
2844 | | sll NEXT_TMP2, NEXT_RES_IDX, 5 | ||
2845 | | lw NEXT_TMP0, NEXT_TAB->hmask | ||
2846 | | sll AT, NEXT_RES_IDX, 3 | ||
2847 | | subu AT, NEXT_TMP2, AT | ||
2848 | | addu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT | ||
2849 | |6: | ||
2850 | | sltu AT, NEXT_TMP0, NEXT_RES_IDX | ||
2851 | | bnez AT, >8 | ||
2852 | |. nop | ||
2853 | | lw NEXT_TMP2, NODE:NEXT_RES_VK->val.it | ||
2854 | | bne NEXT_TMP2, NEXT_NIL, >9 | ||
2855 | |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1 | ||
2856 | | // Skip holes in hash part. | ||
2857 | | b <6 | ||
2858 | |. addiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node) | ||
2859 | | | ||
2860 | |8: // End of iteration. Set the key to nil (not the value). | ||
2861 | | sw NEXT_NIL, NEXT_RES_KEY_IT | ||
2862 | | move NEXT_RES_VK, NEXT_RES_PTR | ||
2863 | |9: | ||
2864 | | jr ra | ||
2865 | |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE | ||
2866 | |.endif | ||
2867 | | | ||
2231 | |//----------------------------------------------------------------------- | 2868 | |//----------------------------------------------------------------------- |
2232 | |//-- FFI helper functions ----------------------------------------------- | 2869 | |//-- FFI helper functions ----------------------------------------------- |
2233 | |//----------------------------------------------------------------------- | 2870 | |//----------------------------------------------------------------------- |
@@ -2243,10 +2880,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
2243 | | sw r1, CTSTATE->cb.slot | 2880 | | sw r1, CTSTATE->cb.slot |
2244 | | sw CARG1, CTSTATE->cb.gpr[0] | 2881 | | sw CARG1, CTSTATE->cb.gpr[0] |
2245 | | sw CARG2, CTSTATE->cb.gpr[1] | 2882 | | sw CARG2, CTSTATE->cb.gpr[1] |
2246 | | sdc1 FARG1, CTSTATE->cb.fpr[0] | 2883 | | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0] |
2247 | | sw CARG3, CTSTATE->cb.gpr[2] | 2884 | | sw CARG3, CTSTATE->cb.gpr[2] |
2248 | | sw CARG4, CTSTATE->cb.gpr[3] | 2885 | | sw CARG4, CTSTATE->cb.gpr[3] |
2249 | | sdc1 FARG2, CTSTATE->cb.fpr[1] | 2886 | | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1] |
2250 | | addiu TMP0, sp, CFRAME_SPACE+16 | 2887 | | addiu TMP0, sp, CFRAME_SPACE+16 |
2251 | | sw TMP0, CTSTATE->cb.stack | 2888 | | sw TMP0, CTSTATE->cb.stack |
2252 | | sw r0, SAVE_PC // Any value outside of bytecode is ok. | 2889 | | sw r0, SAVE_PC // Any value outside of bytecode is ok. |
@@ -2256,15 +2893,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
2256 | | // Returns lua_State *. | 2893 | | // Returns lua_State *. |
2257 | | lw BASE, L:CRET1->base | 2894 | | lw BASE, L:CRET1->base |
2258 | | lw RC, L:CRET1->top | 2895 | | lw RC, L:CRET1->top |
2896 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. | ||
2259 | | move L, CRET1 | 2897 | | move L, CRET1 |
2260 | | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 2898 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
2261 | | lw LFUNC:RB, FRAME_FUNC(BASE) | 2899 | | lw LFUNC:RB, FRAME_FUNC(BASE) |
2262 | | mtc1 TMP3, TOBIT | 2900 | | .FPU mtc1 TMP3, TOBIT |
2263 | | li_vmstate INTERP | 2901 | | li_vmstate INTERP |
2264 | | li TISNIL, LJ_TNIL | 2902 | | li TISNIL, LJ_TNIL |
2265 | | subu RC, RC, BASE | 2903 | | subu RC, RC, BASE |
2266 | | st_vmstate | 2904 | | st_vmstate |
2267 | | cvt.d.s TOBIT, TOBIT | 2905 | | .FPU cvt.d.s TOBIT, TOBIT |
2268 | | ins_callt | 2906 | | ins_callt |
2269 | |.endif | 2907 | |.endif |
2270 | | | 2908 | | |
@@ -2278,11 +2916,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
2278 | | move CARG2, RA | 2916 | | move CARG2, RA |
2279 | | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) | 2917 | | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) |
2280 | |. move CARG1, CTSTATE | 2918 | |. move CARG1, CTSTATE |
2919 | | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0] | ||
2281 | | lw CRET1, CTSTATE->cb.gpr[0] | 2920 | | lw CRET1, CTSTATE->cb.gpr[0] |
2282 | | ldc1 FRET1, CTSTATE->cb.fpr[0] | 2921 | | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1] |
2283 | | lw CRET2, CTSTATE->cb.gpr[1] | ||
2284 | | b ->vm_leave_unw | 2922 | | b ->vm_leave_unw |
2285 | |. ldc1 FRET2, CTSTATE->cb.fpr[1] | 2923 | |. lw CRET2, CTSTATE->cb.gpr[1] |
2286 | |.endif | 2924 | |.endif |
2287 | | | 2925 | | |
2288 | |->vm_ffi_call: // Call C function via FFI. | 2926 | |->vm_ffi_call: // Call C function via FFI. |
@@ -2314,8 +2952,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
2314 | | lw CARG2, CCSTATE->gpr[1] | 2952 | | lw CARG2, CCSTATE->gpr[1] |
2315 | | lw CARG3, CCSTATE->gpr[2] | 2953 | | lw CARG3, CCSTATE->gpr[2] |
2316 | | lw CARG4, CCSTATE->gpr[3] | 2954 | | lw CARG4, CCSTATE->gpr[3] |
2317 | | ldc1 FARG1, CCSTATE->fpr[0] | 2955 | | .FPU ldc1 FARG1, CCSTATE->fpr[0] |
2318 | | ldc1 FARG2, CCSTATE->fpr[1] | 2956 | | .FPU ldc1 FARG2, CCSTATE->fpr[1] |
2319 | | jalr CFUNCADDR | 2957 | | jalr CFUNCADDR |
2320 | |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. | 2958 | |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. |
2321 | | lw CCSTATE:TMP1, -12(r16) | 2959 | | lw CCSTATE:TMP1, -12(r16) |
@@ -2323,8 +2961,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
2323 | | lw ra, -4(r16) | 2961 | | lw ra, -4(r16) |
2324 | | sw CRET1, CCSTATE:TMP1->gpr[0] | 2962 | | sw CRET1, CCSTATE:TMP1->gpr[0] |
2325 | | sw CRET2, CCSTATE:TMP1->gpr[1] | 2963 | | sw CRET2, CCSTATE:TMP1->gpr[1] |
2964 | |.if FPU | ||
2326 | | sdc1 FRET1, CCSTATE:TMP1->fpr[0] | 2965 | | sdc1 FRET1, CCSTATE:TMP1->fpr[0] |
2327 | | sdc1 FRET2, CCSTATE:TMP1->fpr[1] | 2966 | | sdc1 FRET2, CCSTATE:TMP1->fpr[1] |
2967 | |.else | ||
2968 | | sw CARG1, CCSTATE:TMP1->gpr[2] // Soft-float: complex double .im part. | ||
2969 | | sw CARG2, CCSTATE:TMP1->gpr[3] | ||
2970 | |.endif | ||
2328 | | move sp, r16 | 2971 | | move sp, r16 |
2329 | | jr ra | 2972 | | jr ra |
2330 | |. move r16, TMP2 | 2973 | |. move r16, TMP2 |
@@ -2348,82 +2991,143 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2348 | 2991 | ||
2349 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | 2992 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: |
2350 | | // RA = src1*8, RD = src2*8, JMP with RD = target | 2993 | | // RA = src1*8, RD = src2*8, JMP with RD = target |
2351 | | addu CARG2, BASE, RA | 2994 | |.macro bc_comp, FRA, FRD, RAHI, RALO, RDHI, RDLO, movop, fmovop, fcomp, sfcomp |
2352 | | addu CARG3, BASE, RD | 2995 | | addu RA, BASE, RA |
2353 | | lw TMP0, HI(CARG2) | 2996 | | addu RD, BASE, RD |
2354 | | lw TMP1, HI(CARG3) | 2997 | | lw RAHI, HI(RA) |
2355 | | ldc1 f0, 0(CARG2) | 2998 | | lw RDHI, HI(RD) |
2356 | | ldc1 f2, 0(CARG3) | ||
2357 | | sltiu TMP0, TMP0, LJ_TISNUM | ||
2358 | | sltiu TMP1, TMP1, LJ_TISNUM | ||
2359 | | lhu TMP2, OFS_RD(PC) | 2999 | | lhu TMP2, OFS_RD(PC) |
2360 | | and TMP0, TMP0, TMP1 | ||
2361 | | addiu PC, PC, 4 | 3000 | | addiu PC, PC, 4 |
2362 | | beqz TMP0, ->vmeta_comp | 3001 | | bne RAHI, TISNUM, >2 |
2363 | |. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) | 3002 | |. lw RALO, LO(RA) |
2364 | | decode_RD4b TMP2 | 3003 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) |
2365 | | addu TMP2, TMP2, TMP1 | 3004 | | lw RDLO, LO(RD) |
2366 | if (op == BC_ISLT || op == BC_ISGE) { | 3005 | | bne RDHI, TISNUM, >5 |
2367 | | c.olt.d f0, f2 | 3006 | |. decode_RD4b TMP2 |
2368 | } else { | 3007 | | slt AT, SFARG1LO, SFARG2LO |
2369 | | c.ole.d f0, f2 | 3008 | | addu TMP2, TMP2, TMP3 |
2370 | } | 3009 | | movop TMP2, r0, AT |
2371 | if (op == BC_ISLT || op == BC_ISLE) { | ||
2372 | | movf TMP2, r0 | ||
2373 | } else { | ||
2374 | | movt TMP2, r0 | ||
2375 | } | ||
2376 | | addu PC, PC, TMP2 | ||
2377 | |1: | 3010 | |1: |
3011 | | addu PC, PC, TMP2 | ||
2378 | | ins_next | 3012 | | ins_next |
3013 | | | ||
3014 | |2: // RA is not an integer. | ||
3015 | | sltiu AT, RAHI, LJ_TISNUM | ||
3016 | | beqz AT, ->vmeta_comp | ||
3017 | |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3018 | | sltiu AT, RDHI, LJ_TISNUM | ||
3019 | |.if FPU | ||
3020 | | ldc1 FRA, 0(RA) | ||
3021 | | ldc1 FRD, 0(RD) | ||
3022 | |.else | ||
3023 | | lw RDLO, LO(RD) | ||
3024 | |.endif | ||
3025 | | beqz AT, >4 | ||
3026 | |. decode_RD4b TMP2 | ||
3027 | |3: // RA and RD are both numbers. | ||
3028 | |.if FPU | ||
3029 | | fcomp f20, f22 | ||
3030 | | addu TMP2, TMP2, TMP3 | ||
3031 | | b <1 | ||
3032 | |. fmovop TMP2, r0 | ||
3033 | |.else | ||
3034 | | bal sfcomp | ||
3035 | |. addu TMP2, TMP2, TMP3 | ||
3036 | | b <1 | ||
3037 | |. movop TMP2, r0, CRET1 | ||
3038 | |.endif | ||
3039 | | | ||
3040 | |4: // RA is a number, RD is not a number. | ||
3041 | | bne RDHI, TISNUM, ->vmeta_comp | ||
3042 | | // RA is a number, RD is an integer. Convert RD to a number. | ||
3043 | |.if FPU | ||
3044 | |. lwc1 FRD, LO(RD) | ||
3045 | | b <3 | ||
3046 | |. cvt.d.w FRD, FRD | ||
3047 | |.else | ||
3048 | |. nop | ||
3049 | |.if "RDHI" == "SFARG1HI" | ||
3050 | | bal ->vm_sfi2d_1 | ||
3051 | |.else | ||
3052 | | bal ->vm_sfi2d_2 | ||
3053 | |.endif | ||
3054 | |. nop | ||
3055 | | b <3 | ||
3056 | |. nop | ||
3057 | |.endif | ||
3058 | | | ||
3059 | |5: // RA is an integer, RD is not an integer | ||
3060 | | sltiu AT, RDHI, LJ_TISNUM | ||
3061 | | beqz AT, ->vmeta_comp | ||
3062 | | // RA is an integer, RD is a number. Convert RA to a number. | ||
3063 | |.if FPU | ||
3064 | |. mtc1 RALO, FRA | ||
3065 | | ldc1 FRD, 0(RD) | ||
3066 | | b <3 | ||
3067 | | cvt.d.w FRA, FRA | ||
3068 | |.else | ||
3069 | |. nop | ||
3070 | |.if "RAHI" == "SFARG1HI" | ||
3071 | | bal ->vm_sfi2d_1 | ||
3072 | |.else | ||
3073 | | bal ->vm_sfi2d_2 | ||
3074 | |.endif | ||
3075 | |. nop | ||
3076 | | b <3 | ||
3077 | |. nop | ||
3078 | |.endif | ||
3079 | |.endmacro | ||
3080 | | | ||
3081 | if (op == BC_ISLT) { | ||
3082 | | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movz, movf, c.olt.d, ->vm_sfcmpolt | ||
3083 | } else if (op == BC_ISGE) { | ||
3084 | | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movn, movt, c.olt.d, ->vm_sfcmpolt | ||
3085 | } else if (op == BC_ISLE) { | ||
3086 | | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movn, movt, c.ult.d, ->vm_sfcmpult | ||
3087 | } else { | ||
3088 | | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movz, movf, c.ult.d, ->vm_sfcmpult | ||
3089 | } | ||
2379 | break; | 3090 | break; |
2380 | 3091 | ||
2381 | case BC_ISEQV: case BC_ISNEV: | 3092 | case BC_ISEQV: case BC_ISNEV: |
2382 | vk = op == BC_ISEQV; | 3093 | vk = op == BC_ISEQV; |
2383 | | // RA = src1*8, RD = src2*8, JMP with RD = target | 3094 | | // RA = src1*8, RD = src2*8, JMP with RD = target |
2384 | | addu RA, BASE, RA | 3095 | | addu RA, BASE, RA |
2385 | | addiu PC, PC, 4 | 3096 | | addiu PC, PC, 4 |
2386 | | lw TMP0, HI(RA) | ||
2387 | | ldc1 f0, 0(RA) | ||
2388 | | addu RD, BASE, RD | 3097 | | addu RD, BASE, RD |
3098 | | lw SFARG1HI, HI(RA) | ||
2389 | | lhu TMP2, -4+OFS_RD(PC) | 3099 | | lhu TMP2, -4+OFS_RD(PC) |
2390 | | lw TMP1, HI(RD) | 3100 | | lw SFARG2HI, HI(RD) |
2391 | | ldc1 f2, 0(RD) | ||
2392 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | 3101 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) |
2393 | | sltiu AT, TMP0, LJ_TISNUM | 3102 | | sltu AT, TISNUM, SFARG1HI |
2394 | | sltiu CARG1, TMP1, LJ_TISNUM | 3103 | | sltu TMP0, TISNUM, SFARG2HI |
2395 | | decode_RD4b TMP2 | 3104 | | or AT, AT, TMP0 |
2396 | | and AT, AT, CARG1 | ||
2397 | | beqz AT, >5 | ||
2398 | |. addu TMP2, TMP2, TMP3 | ||
2399 | | c.eq.d f0, f2 | ||
2400 | if (vk) { | 3105 | if (vk) { |
2401 | | movf TMP2, r0 | 3106 | | beqz AT, ->BC_ISEQN_Z |
2402 | } else { | 3107 | } else { |
2403 | | movt TMP2, r0 | 3108 | | beqz AT, ->BC_ISNEN_Z |
2404 | } | 3109 | } |
2405 | |1: | 3110 | |. decode_RD4b TMP2 |
2406 | | addu PC, PC, TMP2 | 3111 | | // Either or both types are not numbers. |
2407 | | ins_next | 3112 | | lw SFARG1LO, LO(RA) |
2408 | |5: // Either or both types are not numbers. | 3113 | | lw SFARG2LO, LO(RD) |
2409 | | lw CARG2, LO(RA) | 3114 | | addu TMP2, TMP2, TMP3 |
2410 | | lw CARG3, LO(RD) | ||
2411 | |.if FFI | 3115 | |.if FFI |
2412 | | li TMP3, LJ_TCDATA | 3116 | | li TMP3, LJ_TCDATA |
2413 | | beq TMP0, TMP3, ->vmeta_equal_cd | 3117 | | beq SFARG1HI, TMP3, ->vmeta_equal_cd |
2414 | |.endif | 3118 | |.endif |
2415 | |. sltiu AT, TMP0, LJ_TISPRI // Not a primitive? | 3119 | |. sltiu AT, SFARG1HI, LJ_TISPRI // Not a primitive? |
2416 | |.if FFI | 3120 | |.if FFI |
2417 | | beq TMP1, TMP3, ->vmeta_equal_cd | 3121 | | beq SFARG2HI, TMP3, ->vmeta_equal_cd |
2418 | |.endif | 3122 | |.endif |
2419 | |. xor TMP3, CARG2, CARG3 // Same tv? | 3123 | |. xor TMP3, SFARG1LO, SFARG2LO // Same tv? |
2420 | | xor TMP1, TMP1, TMP0 // Same type? | 3124 | | xor SFARG2HI, SFARG2HI, SFARG1HI // Same type? |
2421 | | sltiu CARG1, TMP0, LJ_TISTABUD+1 // Table or userdata? | 3125 | | sltiu TMP0, SFARG1HI, LJ_TISTABUD+1 // Table or userdata? |
2422 | | movz TMP3, r0, AT // Ignore tv if primitive. | 3126 | | movz TMP3, r0, AT // Ignore tv if primitive. |
2423 | | movn CARG1, r0, TMP1 // Tab/ud and same type? | 3127 | | movn TMP0, r0, SFARG2HI // Tab/ud and same type? |
2424 | | or AT, TMP1, TMP3 // Same type && (pri||same tv). | 3128 | | or AT, SFARG2HI, TMP3 // Same type && (pri||same tv). |
2425 | | movz CARG1, r0, AT | 3129 | | movz TMP0, r0, AT |
2426 | | beqz CARG1, <1 // Done if not tab/ud or not same type or same tv. | 3130 | | beqz TMP0, >1 // Done if not tab/ud or not same type or same tv. |
2427 | if (vk) { | 3131 | if (vk) { |
2428 | |. movn TMP2, r0, AT | 3132 | |. movn TMP2, r0, AT |
2429 | } else { | 3133 | } else { |
@@ -2431,15 +3135,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2431 | } | 3135 | } |
2432 | | // Different tables or userdatas. Need to check __eq metamethod. | 3136 | | // Different tables or userdatas. Need to check __eq metamethod. |
2433 | | // Field metatable must be at same offset for GCtab and GCudata! | 3137 | | // Field metatable must be at same offset for GCtab and GCudata! |
2434 | | lw TAB:TMP1, TAB:CARG2->metatable | 3138 | | lw TAB:TMP1, TAB:SFARG1LO->metatable |
2435 | | beqz TAB:TMP1, <1 // No metatable? | 3139 | | beqz TAB:TMP1, >1 // No metatable? |
2436 | |. nop | 3140 | |. nop |
2437 | | lbu TMP1, TAB:TMP1->nomm | 3141 | | lbu TMP1, TAB:TMP1->nomm |
2438 | | andi TMP1, TMP1, 1<<MM_eq | 3142 | | andi TMP1, TMP1, 1<<MM_eq |
2439 | | bnez TMP1, <1 // Or 'no __eq' flag set? | 3143 | | bnez TMP1, >1 // Or 'no __eq' flag set? |
2440 | |. nop | 3144 | |. nop |
2441 | | b ->vmeta_equal // Handle __eq metamethod. | 3145 | | b ->vmeta_equal // Handle __eq metamethod. |
2442 | |. li CARG4, 1-vk // ne = 0 or 1. | 3146 | |. li TMP0, 1-vk // ne = 0 or 1. |
3147 | |1: | ||
3148 | | addu PC, PC, TMP2 | ||
3149 | | ins_next | ||
2443 | break; | 3150 | break; |
2444 | 3151 | ||
2445 | case BC_ISEQS: case BC_ISNES: | 3152 | case BC_ISEQS: case BC_ISNES: |
@@ -2476,38 +3183,124 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2476 | vk = op == BC_ISEQN; | 3183 | vk = op == BC_ISEQN; |
2477 | | // RA = src*8, RD = num_const*8, JMP with RD = target | 3184 | | // RA = src*8, RD = num_const*8, JMP with RD = target |
2478 | | addu RA, BASE, RA | 3185 | | addu RA, BASE, RA |
2479 | | addiu PC, PC, 4 | 3186 | | addu RD, KBASE, RD |
2480 | | lw TMP0, HI(RA) | 3187 | | lw SFARG1HI, HI(RA) |
2481 | | ldc1 f0, 0(RA) | 3188 | | lw SFARG2HI, HI(RD) |
2482 | | addu RD, KBASE, RD | 3189 | | lhu TMP2, OFS_RD(PC) |
2483 | | lhu TMP2, -4+OFS_RD(PC) | 3190 | | addiu PC, PC, 4 |
2484 | | ldc1 f2, 0(RD) | ||
2485 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | 3191 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) |
2486 | | sltiu AT, TMP0, LJ_TISNUM | ||
2487 | | decode_RD4b TMP2 | 3192 | | decode_RD4b TMP2 |
2488 | |.if FFI | ||
2489 | | beqz AT, >5 | ||
2490 | |.else | ||
2491 | | beqz AT, >1 | ||
2492 | |.endif | ||
2493 | |. addu TMP2, TMP2, TMP3 | ||
2494 | | c.eq.d f0, f2 | ||
2495 | if (vk) { | 3193 | if (vk) { |
2496 | | movf TMP2, r0 | 3194 | |->BC_ISEQN_Z: |
2497 | | addu PC, PC, TMP2 | 3195 | } else { |
3196 | |->BC_ISNEN_Z: | ||
3197 | } | ||
3198 | | bne SFARG1HI, TISNUM, >3 | ||
3199 | |. lw SFARG1LO, LO(RA) | ||
3200 | | lw SFARG2LO, LO(RD) | ||
3201 | | addu TMP2, TMP2, TMP3 | ||
3202 | | bne SFARG2HI, TISNUM, >6 | ||
3203 | |. xor AT, SFARG1LO, SFARG2LO | ||
3204 | if (vk) { | ||
3205 | | movn TMP2, r0, AT | ||
2498 | |1: | 3206 | |1: |
3207 | | addu PC, PC, TMP2 | ||
3208 | |2: | ||
2499 | } else { | 3209 | } else { |
2500 | | movt TMP2, r0 | 3210 | | movz TMP2, r0, AT |
2501 | |1: | 3211 | |1: |
3212 | |2: | ||
2502 | | addu PC, PC, TMP2 | 3213 | | addu PC, PC, TMP2 |
2503 | } | 3214 | } |
2504 | | ins_next | 3215 | | ins_next |
3216 | | | ||
3217 | |3: // RA is not an integer. | ||
3218 | | sltiu AT, SFARG1HI, LJ_TISNUM | ||
2505 | |.if FFI | 3219 | |.if FFI |
2506 | |5: | 3220 | | beqz AT, >8 |
2507 | | li AT, LJ_TCDATA | 3221 | |.else |
2508 | | beq TMP0, AT, ->vmeta_equal_cd | 3222 | | beqz AT, <2 |
3223 | |.endif | ||
3224 | |. addu TMP2, TMP2, TMP3 | ||
3225 | | sltiu AT, SFARG2HI, LJ_TISNUM | ||
3226 | |.if FPU | ||
3227 | | ldc1 f20, 0(RA) | ||
3228 | | ldc1 f22, 0(RD) | ||
3229 | |.endif | ||
3230 | | beqz AT, >5 | ||
3231 | |. lw SFARG2LO, LO(RD) | ||
3232 | |4: // RA and RD are both numbers. | ||
3233 | |.if FPU | ||
3234 | | c.eq.d f20, f22 | ||
3235 | | b <1 | ||
3236 | if (vk) { | ||
3237 | |. movf TMP2, r0 | ||
3238 | } else { | ||
3239 | |. movt TMP2, r0 | ||
3240 | } | ||
3241 | |.else | ||
3242 | | bal ->vm_sfcmpeq | ||
2509 | |. nop | 3243 | |. nop |
2510 | | b <1 | 3244 | | b <1 |
3245 | if (vk) { | ||
3246 | |. movz TMP2, r0, CRET1 | ||
3247 | } else { | ||
3248 | |. movn TMP2, r0, CRET1 | ||
3249 | } | ||
3250 | |.endif | ||
3251 | | | ||
3252 | |5: // RA is a number, RD is not a number. | ||
3253 | |.if FFI | ||
3254 | | bne SFARG2HI, TISNUM, >9 | ||
3255 | |.else | ||
3256 | | bne SFARG2HI, TISNUM, <2 | ||
3257 | |.endif | ||
3258 | | // RA is a number, RD is an integer. Convert RD to a number. | ||
3259 | |.if FPU | ||
3260 | |. lwc1 f22, LO(RD) | ||
3261 | | b <4 | ||
3262 | |. cvt.d.w f22, f22 | ||
3263 | |.else | ||
3264 | |. nop | ||
3265 | | bal ->vm_sfi2d_2 | ||
3266 | |. nop | ||
3267 | | b <4 | ||
3268 | |. nop | ||
3269 | |.endif | ||
3270 | | | ||
3271 | |6: // RA is an integer, RD is not an integer | ||
3272 | | sltiu AT, SFARG2HI, LJ_TISNUM | ||
3273 | |.if FFI | ||
3274 | | beqz AT, >9 | ||
3275 | |.else | ||
3276 | | beqz AT, <2 | ||
3277 | |.endif | ||
3278 | | // RA is an integer, RD is a number. Convert RA to a number. | ||
3279 | |.if FPU | ||
3280 | |. mtc1 SFARG1LO, f20 | ||
3281 | | ldc1 f22, 0(RD) | ||
3282 | | b <4 | ||
3283 | | cvt.d.w f20, f20 | ||
3284 | |.else | ||
3285 | |. nop | ||
3286 | | bal ->vm_sfi2d_1 | ||
3287 | |. nop | ||
3288 | | b <4 | ||
3289 | |. nop | ||
3290 | |.endif | ||
3291 | | | ||
3292 | |.if FFI | ||
3293 | |8: | ||
3294 | | li AT, LJ_TCDATA | ||
3295 | | bne SFARG1HI, AT, <2 | ||
3296 | |. nop | ||
3297 | | b ->vmeta_equal_cd | ||
3298 | |. nop | ||
3299 | |9: | ||
3300 | | li AT, LJ_TCDATA | ||
3301 | | bne SFARG2HI, AT, <2 | ||
3302 | |. nop | ||
3303 | | b ->vmeta_equal_cd | ||
2511 | |. nop | 3304 | |. nop |
2512 | |.endif | 3305 | |.endif |
2513 | break; | 3306 | break; |
@@ -2559,7 +3352,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2559 | | addu PC, PC, TMP2 | 3352 | | addu PC, PC, TMP2 |
2560 | } else { | 3353 | } else { |
2561 | | sltiu TMP0, TMP0, LJ_TISTRUECOND | 3354 | | sltiu TMP0, TMP0, LJ_TISTRUECOND |
2562 | | ldc1 f0, 0(RD) | 3355 | | lw SFRETHI, HI(RD) |
3356 | | lw SFRETLO, LO(RD) | ||
2563 | if (op == BC_ISTC) { | 3357 | if (op == BC_ISTC) { |
2564 | | beqz TMP0, >1 | 3358 | | beqz TMP0, >1 |
2565 | } else { | 3359 | } else { |
@@ -2569,22 +3363,45 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2569 | | decode_RD4b TMP2 | 3363 | | decode_RD4b TMP2 |
2570 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | 3364 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) |
2571 | | addu TMP2, TMP2, TMP3 | 3365 | | addu TMP2, TMP2, TMP3 |
2572 | | sdc1 f0, 0(RA) | 3366 | | sw SFRETHI, HI(RA) |
3367 | | sw SFRETLO, LO(RA) | ||
2573 | | addu PC, PC, TMP2 | 3368 | | addu PC, PC, TMP2 |
2574 | |1: | 3369 | |1: |
2575 | } | 3370 | } |
2576 | | ins_next | 3371 | | ins_next |
2577 | break; | 3372 | break; |
2578 | 3373 | ||
3374 | case BC_ISTYPE: | ||
3375 | | // RA = src*8, RD = -type*8 | ||
3376 | | addu TMP2, BASE, RA | ||
3377 | | srl TMP1, RD, 3 | ||
3378 | | lw TMP0, HI(TMP2) | ||
3379 | | ins_next1 | ||
3380 | | addu AT, TMP0, TMP1 | ||
3381 | | bnez AT, ->vmeta_istype | ||
3382 | |. ins_next2 | ||
3383 | break; | ||
3384 | case BC_ISNUM: | ||
3385 | | // RA = src*8, RD = -(TISNUM-1)*8 | ||
3386 | | addu TMP2, BASE, RA | ||
3387 | | lw TMP0, HI(TMP2) | ||
3388 | | ins_next1 | ||
3389 | | sltiu AT, TMP0, LJ_TISNUM | ||
3390 | | beqz AT, ->vmeta_istype | ||
3391 | |. ins_next2 | ||
3392 | break; | ||
3393 | |||
2579 | /* -- Unary ops --------------------------------------------------------- */ | 3394 | /* -- Unary ops --------------------------------------------------------- */ |
2580 | 3395 | ||
2581 | case BC_MOV: | 3396 | case BC_MOV: |
2582 | | // RA = dst*8, RD = src*8 | 3397 | | // RA = dst*8, RD = src*8 |
2583 | | addu RD, BASE, RD | 3398 | | addu RD, BASE, RD |
2584 | | addu RA, BASE, RA | 3399 | | addu RA, BASE, RA |
2585 | | ldc1 f0, 0(RD) | 3400 | | lw SFRETHI, HI(RD) |
3401 | | lw SFRETLO, LO(RD) | ||
2586 | | ins_next1 | 3402 | | ins_next1 |
2587 | | sdc1 f0, 0(RA) | 3403 | | sw SFRETHI, HI(RA) |
3404 | | sw SFRETLO, LO(RA) | ||
2588 | | ins_next2 | 3405 | | ins_next2 |
2589 | break; | 3406 | break; |
2590 | case BC_NOT: | 3407 | case BC_NOT: |
@@ -2601,16 +3418,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2601 | break; | 3418 | break; |
2602 | case BC_UNM: | 3419 | case BC_UNM: |
2603 | | // RA = dst*8, RD = src*8 | 3420 | | // RA = dst*8, RD = src*8 |
2604 | | addu CARG3, BASE, RD | 3421 | | addu RB, BASE, RD |
3422 | | lw SFARG1HI, HI(RB) | ||
2605 | | addu RA, BASE, RA | 3423 | | addu RA, BASE, RA |
2606 | | lw TMP0, HI(CARG3) | 3424 | | bne SFARG1HI, TISNUM, >2 |
2607 | | ldc1 f0, 0(CARG3) | 3425 | |. lw SFARG1LO, LO(RB) |
2608 | | sltiu AT, TMP0, LJ_TISNUM | 3426 | | lui TMP1, 0x8000 |
2609 | | beqz AT, ->vmeta_unm | 3427 | | beq SFARG1LO, TMP1, ->vmeta_unm // Meta handler deals with -2^31. |
2610 | |. neg.d f0, f0 | 3428 | |. negu SFARG1LO, SFARG1LO |
3429 | |1: | ||
2611 | | ins_next1 | 3430 | | ins_next1 |
2612 | | sdc1 f0, 0(RA) | 3431 | | sw SFARG1HI, HI(RA) |
3432 | | sw SFARG1LO, LO(RA) | ||
2613 | | ins_next2 | 3433 | | ins_next2 |
3434 | |2: | ||
3435 | | sltiu AT, SFARG1HI, LJ_TISNUM | ||
3436 | | beqz AT, ->vmeta_unm | ||
3437 | |. lui TMP1, 0x8000 | ||
3438 | | b <1 | ||
3439 | |. xor SFARG1HI, SFARG1HI, TMP1 | ||
2614 | break; | 3440 | break; |
2615 | case BC_LEN: | 3441 | case BC_LEN: |
2616 | | // RA = dst*8, RD = src*8 | 3442 | | // RA = dst*8, RD = src*8 |
@@ -2621,12 +3447,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2621 | | li AT, LJ_TSTR | 3447 | | li AT, LJ_TSTR |
2622 | | bne TMP0, AT, >2 | 3448 | | bne TMP0, AT, >2 |
2623 | |. li AT, LJ_TTAB | 3449 | |. li AT, LJ_TTAB |
2624 | | lw CRET1, STR:CARG1->len | 3450 | | lw CRET1, STR:CARG1->len |
2625 | |1: | 3451 | |1: |
2626 | | mtc1 CRET1, f0 | ||
2627 | | cvt.d.w f0, f0 | ||
2628 | | ins_next1 | 3452 | | ins_next1 |
2629 | | sdc1 f0, 0(RA) | 3453 | | sw TISNUM, HI(RA) |
3454 | | sw CRET1, LO(RA) | ||
2630 | | ins_next2 | 3455 | | ins_next2 |
2631 | |2: | 3456 | |2: |
2632 | | bne TMP0, AT, ->vmeta_len | 3457 | | bne TMP0, AT, ->vmeta_len |
@@ -2657,104 +3482,232 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2657 | 3482 | ||
2658 | /* -- Binary ops -------------------------------------------------------- */ | 3483 | /* -- Binary ops -------------------------------------------------------- */ |
2659 | 3484 | ||
2660 | |.macro ins_arithpre | 3485 | |.macro fpmod, a, b, c |
3486 | | bal ->vm_floor // floor(b/c) | ||
3487 | |. div.d FARG1, b, c | ||
3488 | | mul.d a, FRET1, c | ||
3489 | | sub.d a, b, a // b - floor(b/c)*c | ||
3490 | |.endmacro | ||
3491 | |||
3492 | |.macro sfpmod | ||
3493 | | addiu sp, sp, -16 | ||
3494 | | | ||
3495 | | load_got __divdf3 | ||
3496 | | sw SFARG1HI, HI(sp) | ||
3497 | | sw SFARG1LO, LO(sp) | ||
3498 | | sw SFARG2HI, 8+HI(sp) | ||
3499 | | call_extern | ||
3500 | |. sw SFARG2LO, 8+LO(sp) | ||
3501 | | | ||
3502 | | load_got floor | ||
3503 | | move SFARG1HI, SFRETHI | ||
3504 | | call_extern | ||
3505 | |. move SFARG1LO, SFRETLO | ||
3506 | | | ||
3507 | | load_got __muldf3 | ||
3508 | | move SFARG1HI, SFRETHI | ||
3509 | | move SFARG1LO, SFRETLO | ||
3510 | | lw SFARG2HI, 8+HI(sp) | ||
3511 | | call_extern | ||
3512 | |. lw SFARG2LO, 8+LO(sp) | ||
3513 | | | ||
3514 | | load_got __subdf3 | ||
3515 | | lw SFARG1HI, HI(sp) | ||
3516 | | lw SFARG1LO, LO(sp) | ||
3517 | | move SFARG2HI, SFRETHI | ||
3518 | | call_extern | ||
3519 | |. move SFARG2LO, SFRETLO | ||
3520 | | | ||
3521 | | addiu sp, sp, 16 | ||
3522 | |.endmacro | ||
3523 | |||
3524 | |.macro ins_arithpre, label | ||
2661 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | 3525 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); |
2662 | | decode_RB8a RB, INS | ||
2663 | | decode_RB8b RB | ||
2664 | | decode_RDtoRC8 RC, RD | ||
2665 | | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 | 3526 | | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 |
2666 | ||switch (vk) { | 3527 | ||switch (vk) { |
2667 | ||case 0: | 3528 | ||case 0: |
2668 | | addu CARG3, BASE, RB | 3529 | | decode_RB8a RB, INS |
2669 | | addu CARG4, KBASE, RC | 3530 | | decode_RB8b RB |
2670 | | lw TMP1, HI(CARG3) | 3531 | | decode_RDtoRC8 RC, RD |
2671 | | ldc1 f20, 0(CARG3) | 3532 | | // RA = dst*8, RB = src1*8, RC = num_const*8 |
2672 | | ldc1 f22, 0(CARG4) | 3533 | | addu RB, BASE, RB |
2673 | | sltiu AT, TMP1, LJ_TISNUM | 3534 | |.if "label" ~= "none" |
3535 | | b label | ||
3536 | |.endif | ||
3537 | |. addu RC, KBASE, RC | ||
2674 | || break; | 3538 | || break; |
2675 | ||case 1: | 3539 | ||case 1: |
2676 | | addu CARG4, BASE, RB | 3540 | | decode_RB8a RC, INS |
2677 | | addu CARG3, KBASE, RC | 3541 | | decode_RB8b RC |
2678 | | lw TMP1, HI(CARG4) | 3542 | | decode_RDtoRC8 RB, RD |
2679 | | ldc1 f22, 0(CARG4) | 3543 | | // RA = dst*8, RB = num_const*8, RC = src1*8 |
2680 | | ldc1 f20, 0(CARG3) | 3544 | | addu RC, BASE, RC |
2681 | | sltiu AT, TMP1, LJ_TISNUM | 3545 | |.if "label" ~= "none" |
3546 | | b label | ||
3547 | |.endif | ||
3548 | |. addu RB, KBASE, RB | ||
2682 | || break; | 3549 | || break; |
2683 | ||default: | 3550 | ||default: |
2684 | | addu CARG3, BASE, RB | 3551 | | decode_RB8a RB, INS |
2685 | | addu CARG4, BASE, RC | 3552 | | decode_RB8b RB |
2686 | | lw TMP1, HI(CARG3) | 3553 | | decode_RDtoRC8 RC, RD |
2687 | | lw TMP2, HI(CARG4) | 3554 | | // RA = dst*8, RB = src1*8, RC = src2*8 |
2688 | | ldc1 f20, 0(CARG3) | 3555 | | addu RB, BASE, RB |
2689 | | ldc1 f22, 0(CARG4) | 3556 | |.if "label" ~= "none" |
2690 | | sltiu AT, TMP1, LJ_TISNUM | 3557 | | b label |
2691 | | sltiu TMP0, TMP2, LJ_TISNUM | 3558 | |.endif |
2692 | | and AT, AT, TMP0 | 3559 | |. addu RC, BASE, RC |
2693 | || break; | 3560 | || break; |
2694 | ||} | 3561 | ||} |
2695 | | beqz AT, ->vmeta_arith | ||
2696 | |. addu RA, BASE, RA | ||
2697 | |.endmacro | 3562 | |.endmacro |
2698 | | | 3563 | | |
2699 | |.macro fpmod, a, b, c | 3564 | |.macro ins_arith, intins, fpins, fpcall, label |
2700 | |->BC_MODVN_Z: | 3565 | | ins_arithpre none |
2701 | | bal ->vm_floor // floor(b/c) | ||
2702 | |. div.d FARG1, b, c | ||
2703 | | mul.d a, FRET1, c | ||
2704 | | sub.d a, b, a // b - floor(b/c)*c | ||
2705 | |.endmacro | ||
2706 | | | 3566 | | |
2707 | |.macro ins_arith, ins | 3567 | |.if "label" ~= "none" |
2708 | | ins_arithpre | 3568 | |label: |
2709 | |.if "ins" == "fpmod_" | 3569 | |.endif |
2710 | | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | 3570 | | |
2711 | |. nop | 3571 | | lw SFARG1HI, HI(RB) |
3572 | | lw SFARG2HI, HI(RC) | ||
3573 | | | ||
3574 | |.if "intins" ~= "div" | ||
3575 | | | ||
3576 | | // Check for two integers. | ||
3577 | | lw SFARG1LO, LO(RB) | ||
3578 | | bne SFARG1HI, TISNUM, >5 | ||
3579 | |. lw SFARG2LO, LO(RC) | ||
3580 | | bne SFARG2HI, TISNUM, >5 | ||
3581 | | | ||
3582 | |.if "intins" == "addu" | ||
3583 | |. intins CRET1, SFARG1LO, SFARG2LO | ||
3584 | | xor TMP1, CRET1, SFARG1LO // ((y^a) & (y^b)) < 0: overflow. | ||
3585 | | xor TMP2, CRET1, SFARG2LO | ||
3586 | | and TMP1, TMP1, TMP2 | ||
3587 | | bltz TMP1, ->vmeta_arith | ||
3588 | |. addu RA, BASE, RA | ||
3589 | |.elif "intins" == "subu" | ||
3590 | |. intins CRET1, SFARG1LO, SFARG2LO | ||
3591 | | xor TMP1, CRET1, SFARG1LO // ((y^a) & (a^b)) < 0: overflow. | ||
3592 | | xor TMP2, SFARG1LO, SFARG2LO | ||
3593 | | and TMP1, TMP1, TMP2 | ||
3594 | | bltz TMP1, ->vmeta_arith | ||
3595 | |. addu RA, BASE, RA | ||
3596 | |.elif "intins" == "mult" | ||
3597 | |. intins SFARG1LO, SFARG2LO | ||
3598 | | mflo CRET1 | ||
3599 | | mfhi TMP2 | ||
3600 | | sra TMP1, CRET1, 31 | ||
3601 | | bne TMP1, TMP2, ->vmeta_arith | ||
3602 | |. addu RA, BASE, RA | ||
2712 | |.else | 3603 | |.else |
2713 | | ins f0, f20, f22 | 3604 | |. load_got lj_vm_modi |
3605 | | beqz SFARG2LO, ->vmeta_arith | ||
3606 | |. addu RA, BASE, RA | ||
3607 | |.if ENDIAN_BE | ||
3608 | | move CARG1, SFARG1LO | ||
3609 | |.endif | ||
3610 | | call_extern | ||
3611 | |. move CARG2, SFARG2LO | ||
3612 | |.endif | ||
3613 | | | ||
3614 | | ins_next1 | ||
3615 | | sw TISNUM, HI(RA) | ||
3616 | | sw CRET1, LO(RA) | ||
3617 | |3: | ||
3618 | | ins_next2 | ||
3619 | | | ||
3620 | |.elif not FPU | ||
3621 | | | ||
3622 | | lw SFARG1LO, LO(RB) | ||
3623 | | lw SFARG2LO, LO(RC) | ||
3624 | | | ||
3625 | |.endif | ||
3626 | | | ||
3627 | |5: // Check for two numbers. | ||
3628 | | .FPU ldc1 f20, 0(RB) | ||
3629 | | sltiu AT, SFARG1HI, LJ_TISNUM | ||
3630 | | sltiu TMP0, SFARG2HI, LJ_TISNUM | ||
3631 | | .FPU ldc1 f22, 0(RC) | ||
3632 | | and AT, AT, TMP0 | ||
3633 | | beqz AT, ->vmeta_arith | ||
3634 | |. addu RA, BASE, RA | ||
3635 | | | ||
3636 | |.if FPU | ||
3637 | | fpins FRET1, f20, f22 | ||
3638 | |.elif "fpcall" == "sfpmod" | ||
3639 | | sfpmod | ||
3640 | |.else | ||
3641 | | load_got fpcall | ||
3642 | | call_extern | ||
3643 | |. nop | ||
3644 | |.endif | ||
3645 | | | ||
2714 | | ins_next1 | 3646 | | ins_next1 |
2715 | | sdc1 f0, 0(RA) | 3647 | |.if not FPU |
3648 | | sw SFRETHI, HI(RA) | ||
3649 | |.endif | ||
3650 | |.if "intins" ~= "div" | ||
3651 | | b <3 | ||
3652 | |.endif | ||
3653 | |.if FPU | ||
3654 | |. sdc1 FRET1, 0(RA) | ||
3655 | |.else | ||
3656 | |. sw SFRETLO, LO(RA) | ||
3657 | |.endif | ||
3658 | |.if "intins" == "div" | ||
2716 | | ins_next2 | 3659 | | ins_next2 |
2717 | |.endif | 3660 | |.endif |
3661 | | | ||
2718 | |.endmacro | 3662 | |.endmacro |
2719 | 3663 | ||
2720 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | 3664 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: |
2721 | | ins_arith add.d | 3665 | | ins_arith addu, add.d, __adddf3, none |
2722 | break; | 3666 | break; |
2723 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | 3667 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: |
2724 | | ins_arith sub.d | 3668 | | ins_arith subu, sub.d, __subdf3, none |
2725 | break; | 3669 | break; |
2726 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | 3670 | case BC_MULVN: case BC_MULNV: case BC_MULVV: |
2727 | | ins_arith mul.d | 3671 | | ins_arith mult, mul.d, __muldf3, none |
3672 | break; | ||
3673 | case BC_DIVVN: | ||
3674 | | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z | ||
2728 | break; | 3675 | break; |
2729 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | 3676 | case BC_DIVNV: case BC_DIVVV: |
2730 | | ins_arith div.d | 3677 | | ins_arithpre ->BC_DIVVN_Z |
2731 | break; | 3678 | break; |
2732 | case BC_MODVN: | 3679 | case BC_MODVN: |
2733 | | ins_arith fpmod | 3680 | | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z |
2734 | break; | 3681 | break; |
2735 | case BC_MODNV: case BC_MODVV: | 3682 | case BC_MODNV: case BC_MODVV: |
2736 | | ins_arith fpmod_ | 3683 | | ins_arithpre ->BC_MODVN_Z |
2737 | break; | 3684 | break; |
2738 | case BC_POW: | 3685 | case BC_POW: |
2739 | | decode_RB8a RB, INS | 3686 | | ins_arithpre none |
2740 | | decode_RB8b RB | 3687 | | lw SFARG1HI, HI(RB) |
2741 | | decode_RDtoRC8 RC, RD | 3688 | | lw SFARG2HI, HI(RC) |
2742 | | addu CARG3, BASE, RB | 3689 | | sltiu AT, SFARG1HI, LJ_TISNUM |
2743 | | addu CARG4, BASE, RC | 3690 | | sltiu TMP0, SFARG2HI, LJ_TISNUM |
2744 | | lw TMP1, HI(CARG3) | ||
2745 | | lw TMP2, HI(CARG4) | ||
2746 | | ldc1 FARG1, 0(CARG3) | ||
2747 | | ldc1 FARG2, 0(CARG4) | ||
2748 | | sltiu AT, TMP1, LJ_TISNUM | ||
2749 | | sltiu TMP0, TMP2, LJ_TISNUM | ||
2750 | | and AT, AT, TMP0 | 3691 | | and AT, AT, TMP0 |
2751 | | load_got pow | 3692 | | load_got pow |
2752 | | beqz AT, ->vmeta_arith | 3693 | | beqz AT, ->vmeta_arith |
2753 | |. addu RA, BASE, RA | 3694 | |. addu RA, BASE, RA |
3695 | |.if FPU | ||
3696 | | ldc1 FARG1, 0(RB) | ||
3697 | | ldc1 FARG2, 0(RC) | ||
3698 | |.else | ||
3699 | | lw SFARG1LO, LO(RB) | ||
3700 | | lw SFARG2LO, LO(RC) | ||
3701 | |.endif | ||
2754 | | call_extern | 3702 | | call_extern |
2755 | |. nop | 3703 | |. nop |
2756 | | ins_next1 | 3704 | | ins_next1 |
3705 | |.if FPU | ||
2757 | | sdc1 FRET1, 0(RA) | 3706 | | sdc1 FRET1, 0(RA) |
3707 | |.else | ||
3708 | | sw SFRETHI, HI(RA) | ||
3709 | | sw SFRETLO, LO(RA) | ||
3710 | |.endif | ||
2758 | | ins_next2 | 3711 | | ins_next2 |
2759 | break; | 3712 | break; |
2760 | 3713 | ||
@@ -2777,10 +3730,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2777 | | bnez CRET1, ->vmeta_binop | 3730 | | bnez CRET1, ->vmeta_binop |
2778 | |. lw BASE, L->base | 3731 | |. lw BASE, L->base |
2779 | | addu RB, BASE, MULTRES | 3732 | | addu RB, BASE, MULTRES |
2780 | | ldc1 f0, 0(RB) | 3733 | | lw SFRETHI, HI(RB) |
3734 | | lw SFRETLO, LO(RB) | ||
2781 | | addu RA, BASE, RA | 3735 | | addu RA, BASE, RA |
2782 | | ins_next1 | 3736 | | ins_next1 |
2783 | | sdc1 f0, 0(RA) // Copy result from RB to RA. | 3737 | | sw SFRETHI, HI(RA) |
3738 | | sw SFRETLO, LO(RA) | ||
2784 | | ins_next2 | 3739 | | ins_next2 |
2785 | break; | 3740 | break; |
2786 | 3741 | ||
@@ -2815,20 +3770,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2815 | case BC_KSHORT: | 3770 | case BC_KSHORT: |
2816 | | // RA = dst*8, RD = int16_literal*8 | 3771 | | // RA = dst*8, RD = int16_literal*8 |
2817 | | sra RD, INS, 16 | 3772 | | sra RD, INS, 16 |
2818 | | mtc1 RD, f0 | ||
2819 | | addu RA, BASE, RA | 3773 | | addu RA, BASE, RA |
2820 | | cvt.d.w f0, f0 | ||
2821 | | ins_next1 | 3774 | | ins_next1 |
2822 | | sdc1 f0, 0(RA) | 3775 | | sw TISNUM, HI(RA) |
3776 | | sw RD, LO(RA) | ||
2823 | | ins_next2 | 3777 | | ins_next2 |
2824 | break; | 3778 | break; |
2825 | case BC_KNUM: | 3779 | case BC_KNUM: |
2826 | | // RA = dst*8, RD = num_const*8 | 3780 | | // RA = dst*8, RD = num_const*8 |
2827 | | addu RD, KBASE, RD | 3781 | | addu RD, KBASE, RD |
2828 | | addu RA, BASE, RA | 3782 | | addu RA, BASE, RA |
2829 | | ldc1 f0, 0(RD) | 3783 | | lw SFRETHI, HI(RD) |
3784 | | lw SFRETLO, LO(RD) | ||
2830 | | ins_next1 | 3785 | | ins_next1 |
2831 | | sdc1 f0, 0(RA) | 3786 | | sw SFRETHI, HI(RA) |
3787 | | sw SFRETLO, LO(RA) | ||
2832 | | ins_next2 | 3788 | | ins_next2 |
2833 | break; | 3789 | break; |
2834 | case BC_KPRI: | 3790 | case BC_KPRI: |
@@ -2864,9 +3820,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2864 | | lw UPVAL:RB, LFUNC:RD->uvptr | 3820 | | lw UPVAL:RB, LFUNC:RD->uvptr |
2865 | | ins_next1 | 3821 | | ins_next1 |
2866 | | lw TMP1, UPVAL:RB->v | 3822 | | lw TMP1, UPVAL:RB->v |
2867 | | ldc1 f0, 0(TMP1) | 3823 | | lw SFRETHI, HI(TMP1) |
3824 | | lw SFRETLO, LO(TMP1) | ||
2868 | | addu RA, BASE, RA | 3825 | | addu RA, BASE, RA |
2869 | | sdc1 f0, 0(RA) | 3826 | | sw SFRETHI, HI(RA) |
3827 | | sw SFRETLO, LO(RA) | ||
2870 | | ins_next2 | 3828 | | ins_next2 |
2871 | break; | 3829 | break; |
2872 | case BC_USETV: | 3830 | case BC_USETV: |
@@ -2875,26 +3833,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2875 | | srl RA, RA, 1 | 3833 | | srl RA, RA, 1 |
2876 | | addu RD, BASE, RD | 3834 | | addu RD, BASE, RD |
2877 | | addu RA, RA, LFUNC:RB | 3835 | | addu RA, RA, LFUNC:RB |
2878 | | ldc1 f0, 0(RD) | ||
2879 | | lw UPVAL:RB, LFUNC:RA->uvptr | 3836 | | lw UPVAL:RB, LFUNC:RA->uvptr |
3837 | | lw SFRETHI, HI(RD) | ||
3838 | | lw SFRETLO, LO(RD) | ||
2880 | | lbu TMP3, UPVAL:RB->marked | 3839 | | lbu TMP3, UPVAL:RB->marked |
2881 | | lw CARG2, UPVAL:RB->v | 3840 | | lw CARG2, UPVAL:RB->v |
2882 | | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) | 3841 | | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) |
2883 | | lbu TMP0, UPVAL:RB->closed | 3842 | | lbu TMP0, UPVAL:RB->closed |
2884 | | lw TMP2, HI(RD) | 3843 | | sw SFRETHI, HI(CARG2) |
2885 | | sdc1 f0, 0(CARG2) | 3844 | | sw SFRETLO, LO(CARG2) |
2886 | | li AT, LJ_GC_BLACK|1 | 3845 | | li AT, LJ_GC_BLACK|1 |
2887 | | or TMP3, TMP3, TMP0 | 3846 | | or TMP3, TMP3, TMP0 |
2888 | | beq TMP3, AT, >2 // Upvalue is closed and black? | 3847 | | beq TMP3, AT, >2 // Upvalue is closed and black? |
2889 | |. addiu TMP2, TMP2, -(LJ_TNUMX+1) | 3848 | |. addiu TMP2, SFRETHI, -(LJ_TNUMX+1) |
2890 | |1: | 3849 | |1: |
2891 | | ins_next | 3850 | | ins_next |
2892 | | | 3851 | | |
2893 | |2: // Check if new value is collectable. | 3852 | |2: // Check if new value is collectable. |
2894 | | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) | 3853 | | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) |
2895 | | beqz AT, <1 // tvisgcv(v) | 3854 | | beqz AT, <1 // tvisgcv(v) |
2896 | |. lw TMP1, LO(RD) | 3855 | |. nop |
2897 | | lbu TMP3, GCOBJ:TMP1->gch.marked | 3856 | | lbu TMP3, GCOBJ:SFRETLO->gch.marked |
2898 | | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) | 3857 | | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) |
2899 | | beqz TMP3, <1 | 3858 | | beqz TMP3, <1 |
2900 | |. load_got lj_gc_barrieruv | 3859 | |. load_got lj_gc_barrieruv |
@@ -2942,11 +3901,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2942 | | srl RA, RA, 1 | 3901 | | srl RA, RA, 1 |
2943 | | addu RD, KBASE, RD | 3902 | | addu RD, KBASE, RD |
2944 | | addu RA, RA, LFUNC:RB | 3903 | | addu RA, RA, LFUNC:RB |
2945 | | ldc1 f0, 0(RD) | 3904 | | lw UPVAL:RB, LFUNC:RA->uvptr |
2946 | | lw UPVAL:RB, LFUNC:RA->uvptr | 3905 | | lw SFRETHI, HI(RD) |
3906 | | lw SFRETLO, LO(RD) | ||
3907 | | lw TMP1, UPVAL:RB->v | ||
2947 | | ins_next1 | 3908 | | ins_next1 |
2948 | | lw TMP1, UPVAL:RB->v | 3909 | | sw SFRETHI, HI(TMP1) |
2949 | | sdc1 f0, 0(TMP1) | 3910 | | sw SFRETLO, LO(TMP1) |
2950 | | ins_next2 | 3911 | | ins_next2 |
2951 | break; | 3912 | break; |
2952 | case BC_USETP: | 3913 | case BC_USETP: |
@@ -2956,10 +3917,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2956 | | srl TMP0, RD, 3 | 3917 | | srl TMP0, RD, 3 |
2957 | | addu RA, RA, LFUNC:RB | 3918 | | addu RA, RA, LFUNC:RB |
2958 | | not TMP0, TMP0 | 3919 | | not TMP0, TMP0 |
2959 | | lw UPVAL:RB, LFUNC:RA->uvptr | 3920 | | lw UPVAL:RB, LFUNC:RA->uvptr |
2960 | | ins_next1 | 3921 | | ins_next1 |
2961 | | lw TMP1, UPVAL:RB->v | 3922 | | lw TMP1, UPVAL:RB->v |
2962 | | sw TMP0, HI(TMP1) | 3923 | | sw TMP0, HI(TMP1) |
2963 | | ins_next2 | 3924 | | ins_next2 |
2964 | break; | 3925 | break; |
2965 | 3926 | ||
@@ -2995,8 +3956,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2995 | | li TMP0, LJ_TFUNC | 3956 | | li TMP0, LJ_TFUNC |
2996 | | ins_next1 | 3957 | | ins_next1 |
2997 | | addu RA, BASE, RA | 3958 | | addu RA, BASE, RA |
2998 | | sw TMP0, HI(RA) | ||
2999 | | sw LFUNC:CRET1, LO(RA) | 3959 | | sw LFUNC:CRET1, LO(RA) |
3960 | | sw TMP0, HI(RA) | ||
3000 | | ins_next2 | 3961 | | ins_next2 |
3001 | break; | 3962 | break; |
3002 | 3963 | ||
@@ -3077,31 +4038,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3077 | | lw TMP2, HI(CARG3) | 4038 | | lw TMP2, HI(CARG3) |
3078 | | lw TAB:RB, LO(CARG2) | 4039 | | lw TAB:RB, LO(CARG2) |
3079 | | li AT, LJ_TTAB | 4040 | | li AT, LJ_TTAB |
3080 | | ldc1 f0, 0(CARG3) | ||
3081 | | bne TMP1, AT, ->vmeta_tgetv | 4041 | | bne TMP1, AT, ->vmeta_tgetv |
3082 | |. addu RA, BASE, RA | 4042 | |. addu RA, BASE, RA |
3083 | | sltiu AT, TMP2, LJ_TISNUM | 4043 | | bne TMP2, TISNUM, >5 |
3084 | | beqz AT, >5 | 4044 | |. lw RC, LO(CARG3) |
3085 | |. li AT, LJ_TSTR | 4045 | | lw TMP0, TAB:RB->asize |
3086 | | | ||
3087 | | // Convert number key to integer, check for integerness and range. | ||
3088 | | cvt.w.d f2, f0 | ||
3089 | | lw TMP0, TAB:RB->asize | ||
3090 | | mfc1 TMP2, f2 | ||
3091 | | cvt.d.w f4, f2 | ||
3092 | | lw TMP1, TAB:RB->array | 4046 | | lw TMP1, TAB:RB->array |
3093 | | c.eq.d f0, f4 | 4047 | | sltu AT, RC, TMP0 |
3094 | | sltu AT, TMP2, TMP0 | 4048 | | sll TMP2, RC, 3 |
3095 | | movf AT, r0 | ||
3096 | | sll TMP2, TMP2, 3 | ||
3097 | | beqz AT, ->vmeta_tgetv // Integer key and in array part? | 4049 | | beqz AT, ->vmeta_tgetv // Integer key and in array part? |
3098 | |. addu TMP2, TMP1, TMP2 | 4050 | |. addu TMP2, TMP1, TMP2 |
3099 | | lw TMP0, HI(TMP2) | 4051 | | lw SFRETHI, HI(TMP2) |
3100 | | beq TMP0, TISNIL, >2 | 4052 | | beq SFRETHI, TISNIL, >2 |
3101 | |. ldc1 f0, 0(TMP2) | 4053 | |. lw SFRETLO, LO(TMP2) |
3102 | |1: | 4054 | |1: |
3103 | | ins_next1 | 4055 | | ins_next1 |
3104 | | sdc1 f0, 0(RA) | 4056 | | sw SFRETHI, HI(RA) |
4057 | | sw SFRETLO, LO(RA) | ||
3105 | | ins_next2 | 4058 | | ins_next2 |
3106 | | | 4059 | | |
3107 | |2: // Check for __index if table value is nil. | 4060 | |2: // Check for __index if table value is nil. |
@@ -3116,8 +4069,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3116 | |. nop | 4069 | |. nop |
3117 | | | 4070 | | |
3118 | |5: | 4071 | |5: |
4072 | | li AT, LJ_TSTR | ||
3119 | | bne TMP2, AT, ->vmeta_tgetv | 4073 | | bne TMP2, AT, ->vmeta_tgetv |
3120 | |. lw STR:RC, LO(CARG3) | 4074 | |. nop |
3121 | | b ->BC_TGETS_Z // String key? | 4075 | | b ->BC_TGETS_Z // String key? |
3122 | |. nop | 4076 | |. nop |
3123 | break; | 4077 | break; |
@@ -3138,9 +4092,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3138 | |->BC_TGETS_Z: | 4092 | |->BC_TGETS_Z: |
3139 | | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 | 4093 | | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 |
3140 | | lw TMP0, TAB:RB->hmask | 4094 | | lw TMP0, TAB:RB->hmask |
3141 | | lw TMP1, STR:RC->hash | 4095 | | lw TMP1, STR:RC->sid |
3142 | | lw NODE:TMP2, TAB:RB->node | 4096 | | lw NODE:TMP2, TAB:RB->node |
3143 | | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask | 4097 | | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask |
3144 | | sll TMP0, TMP1, 5 | 4098 | | sll TMP0, TMP1, 5 |
3145 | | sll TMP1, TMP1, 3 | 4099 | | sll TMP1, TMP1, 3 |
3146 | | subu TMP1, TMP0, TMP1 | 4100 | | subu TMP1, TMP0, TMP1 |
@@ -3149,18 +4103,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3149 | | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) | 4103 | | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) |
3150 | | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) | 4104 | | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) |
3151 | | lw NODE:TMP1, NODE:TMP2->next | 4105 | | lw NODE:TMP1, NODE:TMP2->next |
3152 | | lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) | 4106 | | lw SFRETHI, offsetof(Node, val)+HI(NODE:TMP2) |
3153 | | addiu CARG1, CARG1, -LJ_TSTR | 4107 | | addiu CARG1, CARG1, -LJ_TSTR |
3154 | | xor TMP0, TMP0, STR:RC | 4108 | | xor TMP0, TMP0, STR:RC |
3155 | | or AT, CARG1, TMP0 | 4109 | | or AT, CARG1, TMP0 |
3156 | | bnez AT, >4 | 4110 | | bnez AT, >4 |
3157 | |. lw TAB:TMP3, TAB:RB->metatable | 4111 | |. lw TAB:TMP3, TAB:RB->metatable |
3158 | | beq CARG2, TISNIL, >5 // Key found, but nil value? | 4112 | | beq SFRETHI, TISNIL, >5 // Key found, but nil value? |
3159 | |. lw CARG1, offsetof(Node, val)+LO(NODE:TMP2) | 4113 | |. lw SFRETLO, offsetof(Node, val)+LO(NODE:TMP2) |
3160 | |3: | 4114 | |3: |
3161 | | ins_next1 | 4115 | | ins_next1 |
3162 | | sw CARG2, HI(RA) | 4116 | | sw SFRETHI, HI(RA) |
3163 | | sw CARG1, LO(RA) | 4117 | | sw SFRETLO, LO(RA) |
3164 | | ins_next2 | 4118 | | ins_next2 |
3165 | | | 4119 | | |
3166 | |4: // Follow hash chain. | 4120 | |4: // Follow hash chain. |
@@ -3170,7 +4124,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3170 | | | 4124 | | |
3171 | |5: // Check for __index if table value is nil. | 4125 | |5: // Check for __index if table value is nil. |
3172 | | beqz TAB:TMP3, <3 // No metatable: done. | 4126 | | beqz TAB:TMP3, <3 // No metatable: done. |
3173 | |. li CARG2, LJ_TNIL | 4127 | |. li SFRETHI, LJ_TNIL |
3174 | | lbu TMP0, TAB:TMP3->nomm | 4128 | | lbu TMP0, TAB:TMP3->nomm |
3175 | | andi TMP0, TMP0, 1<<MM_index | 4129 | | andi TMP0, TMP0, 1<<MM_index |
3176 | | bnez TMP0, <3 // 'no __index' flag set: done. | 4130 | | bnez TMP0, <3 // 'no __index' flag set: done. |
@@ -3195,12 +4149,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3195 | | sltu AT, TMP0, TMP1 | 4149 | | sltu AT, TMP0, TMP1 |
3196 | | beqz AT, ->vmeta_tgetb | 4150 | | beqz AT, ->vmeta_tgetb |
3197 | |. addu RC, TMP2, RC | 4151 | |. addu RC, TMP2, RC |
3198 | | lw TMP1, HI(RC) | 4152 | | lw SFRETHI, HI(RC) |
3199 | | beq TMP1, TISNIL, >5 | 4153 | | beq SFRETHI, TISNIL, >5 |
3200 | |. ldc1 f0, 0(RC) | 4154 | |. lw SFRETLO, LO(RC) |
3201 | |1: | 4155 | |1: |
3202 | | ins_next1 | 4156 | | ins_next1 |
3203 | | sdc1 f0, 0(RA) | 4157 | | sw SFRETHI, HI(RA) |
4158 | | sw SFRETLO, LO(RA) | ||
3204 | | ins_next2 | 4159 | | ins_next2 |
3205 | | | 4160 | | |
3206 | |5: // Check for __index if table value is nil. | 4161 | |5: // Check for __index if table value is nil. |
@@ -3211,9 +4166,33 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3211 | | andi TMP1, TMP1, 1<<MM_index | 4166 | | andi TMP1, TMP1, 1<<MM_index |
3212 | | bnez TMP1, <1 // 'no __index' flag set: done. | 4167 | | bnez TMP1, <1 // 'no __index' flag set: done. |
3213 | |. nop | 4168 | |. nop |
3214 | | b ->vmeta_tgetb // Caveat: preserve TMP0! | 4169 | | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2! |
3215 | |. nop | 4170 | |. nop |
3216 | break; | 4171 | break; |
4172 | case BC_TGETR: | ||
4173 | | // RA = dst*8, RB = table*8, RC = key*8 | ||
4174 | | decode_RB8a RB, INS | ||
4175 | | decode_RB8b RB | ||
4176 | | decode_RDtoRC8 RC, RD | ||
4177 | | addu RB, BASE, RB | ||
4178 | | addu RC, BASE, RC | ||
4179 | | lw TAB:CARG1, LO(RB) | ||
4180 | | lw CARG2, LO(RC) | ||
4181 | | addu RA, BASE, RA | ||
4182 | | lw TMP0, TAB:CARG1->asize | ||
4183 | | lw TMP1, TAB:CARG1->array | ||
4184 | | sltu AT, CARG2, TMP0 | ||
4185 | | sll TMP2, CARG2, 3 | ||
4186 | | beqz AT, ->vmeta_tgetr // In array part? | ||
4187 | |. addu CRET1, TMP1, TMP2 | ||
4188 | | lw SFARG2HI, HI(CRET1) | ||
4189 | | lw SFARG2LO, LO(CRET1) | ||
4190 | |->BC_TGETR_Z: | ||
4191 | | ins_next1 | ||
4192 | | sw SFARG2HI, HI(RA) | ||
4193 | | sw SFARG2LO, LO(RA) | ||
4194 | | ins_next2 | ||
4195 | break; | ||
3217 | 4196 | ||
3218 | case BC_TSETV: | 4197 | case BC_TSETV: |
3219 | | // RA = src*8, RB = table*8, RC = key*8 | 4198 | | // RA = src*8, RB = table*8, RC = key*8 |
@@ -3226,33 +4205,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3226 | | lw TMP2, HI(CARG3) | 4205 | | lw TMP2, HI(CARG3) |
3227 | | lw TAB:RB, LO(CARG2) | 4206 | | lw TAB:RB, LO(CARG2) |
3228 | | li AT, LJ_TTAB | 4207 | | li AT, LJ_TTAB |
3229 | | ldc1 f0, 0(CARG3) | ||
3230 | | bne TMP1, AT, ->vmeta_tsetv | 4208 | | bne TMP1, AT, ->vmeta_tsetv |
3231 | |. addu RA, BASE, RA | 4209 | |. addu RA, BASE, RA |
3232 | | sltiu AT, TMP2, LJ_TISNUM | 4210 | | bne TMP2, TISNUM, >5 |
3233 | | beqz AT, >5 | 4211 | |. lw RC, LO(CARG3) |
3234 | |. li AT, LJ_TSTR | 4212 | | lw TMP0, TAB:RB->asize |
3235 | | | ||
3236 | | // Convert number key to integer, check for integerness and range. | ||
3237 | | cvt.w.d f2, f0 | ||
3238 | | lw TMP0, TAB:RB->asize | ||
3239 | | mfc1 TMP2, f2 | ||
3240 | | cvt.d.w f4, f2 | ||
3241 | | lw TMP1, TAB:RB->array | 4213 | | lw TMP1, TAB:RB->array |
3242 | | c.eq.d f0, f4 | 4214 | | sltu AT, RC, TMP0 |
3243 | | sltu AT, TMP2, TMP0 | 4215 | | sll TMP2, RC, 3 |
3244 | | movf AT, r0 | ||
3245 | | sll TMP2, TMP2, 3 | ||
3246 | | beqz AT, ->vmeta_tsetv // Integer key and in array part? | 4216 | | beqz AT, ->vmeta_tsetv // Integer key and in array part? |
3247 | |. addu TMP1, TMP1, TMP2 | 4217 | |. addu TMP1, TMP1, TMP2 |
3248 | | lbu TMP3, TAB:RB->marked | ||
3249 | | lw TMP0, HI(TMP1) | 4218 | | lw TMP0, HI(TMP1) |
4219 | | lbu TMP3, TAB:RB->marked | ||
4220 | | lw SFRETHI, HI(RA) | ||
3250 | | beq TMP0, TISNIL, >3 | 4221 | | beq TMP0, TISNIL, >3 |
3251 | |. ldc1 f0, 0(RA) | 4222 | |. lw SFRETLO, LO(RA) |
3252 | |1: | 4223 | |1: |
3253 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | 4224 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) |
3254 | | bnez AT, >7 | 4225 | | sw SFRETHI, HI(TMP1) |
3255 | |. sdc1 f0, 0(TMP1) | 4226 | | bnez AT, >7 |
4227 | |. sw SFRETLO, LO(TMP1) | ||
3256 | |2: | 4228 | |2: |
3257 | | ins_next | 4229 | | ins_next |
3258 | | | 4230 | | |
@@ -3268,8 +4240,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3268 | |. nop | 4240 | |. nop |
3269 | | | 4241 | | |
3270 | |5: | 4242 | |5: |
4243 | | li AT, LJ_TSTR | ||
3271 | | bne TMP2, AT, ->vmeta_tsetv | 4244 | | bne TMP2, AT, ->vmeta_tsetv |
3272 | |. lw STR:RC, LO(CARG3) | 4245 | |. nop |
3273 | | b ->BC_TSETS_Z // String key? | 4246 | | b ->BC_TSETS_Z // String key? |
3274 | |. nop | 4247 | |. nop |
3275 | | | 4248 | | |
@@ -3293,15 +4266,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3293 | |->BC_TSETS_Z: | 4266 | |->BC_TSETS_Z: |
3294 | | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 | 4267 | | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 |
3295 | | lw TMP0, TAB:RB->hmask | 4268 | | lw TMP0, TAB:RB->hmask |
3296 | | lw TMP1, STR:RC->hash | 4269 | | lw TMP1, STR:RC->sid |
3297 | | lw NODE:TMP2, TAB:RB->node | 4270 | | lw NODE:TMP2, TAB:RB->node |
3298 | | sb r0, TAB:RB->nomm // Clear metamethod cache. | 4271 | | sb r0, TAB:RB->nomm // Clear metamethod cache. |
3299 | | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask | 4272 | | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask |
3300 | | sll TMP0, TMP1, 5 | 4273 | | sll TMP0, TMP1, 5 |
3301 | | sll TMP1, TMP1, 3 | 4274 | | sll TMP1, TMP1, 3 |
3302 | | subu TMP1, TMP0, TMP1 | 4275 | | subu TMP1, TMP0, TMP1 |
3303 | | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) | 4276 | | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) |
4277 | |.if FPU | ||
3304 | | ldc1 f20, 0(RA) | 4278 | | ldc1 f20, 0(RA) |
4279 | |.else | ||
4280 | | lw SFRETHI, HI(RA) | ||
4281 | | lw SFRETLO, LO(RA) | ||
4282 | |.endif | ||
3305 | |1: | 4283 | |1: |
3306 | | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) | 4284 | | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) |
3307 | | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) | 4285 | | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) |
@@ -3315,8 +4293,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3315 | |. lw TAB:TMP0, TAB:RB->metatable | 4293 | |. lw TAB:TMP0, TAB:RB->metatable |
3316 | |2: | 4294 | |2: |
3317 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | 4295 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) |
4296 | |.if FPU | ||
3318 | | bnez AT, >7 | 4297 | | bnez AT, >7 |
3319 | |. sdc1 f20, NODE:TMP2->val | 4298 | |. sdc1 f20, NODE:TMP2->val |
4299 | |.else | ||
4300 | | sw SFRETHI, NODE:TMP2->val.u32.hi | ||
4301 | | bnez AT, >7 | ||
4302 | |. sw SFRETLO, NODE:TMP2->val.u32.lo | ||
4303 | |.endif | ||
3320 | |3: | 4304 | |3: |
3321 | | ins_next | 4305 | | ins_next |
3322 | | | 4306 | | |
@@ -3354,8 +4338,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3354 | |. move CARG1, L | 4338 | |. move CARG1, L |
3355 | | // Returns TValue *. | 4339 | | // Returns TValue *. |
3356 | | lw BASE, L->base | 4340 | | lw BASE, L->base |
4341 | |.if FPU | ||
3357 | | b <3 // No 2nd write barrier needed. | 4342 | | b <3 // No 2nd write barrier needed. |
3358 | |. sdc1 f20, 0(CRET1) | 4343 | |. sdc1 f20, 0(CRET1) |
4344 | |.else | ||
4345 | | lw SFARG1HI, HI(RA) | ||
4346 | | lw SFARG1LO, LO(RA) | ||
4347 | | sw SFARG1HI, HI(CRET1) | ||
4348 | | b <3 // No 2nd write barrier needed. | ||
4349 | |. sw SFARG1LO, LO(CRET1) | ||
4350 | |.endif | ||
3359 | | | 4351 | | |
3360 | |7: // Possible table write barrier for the value. Skip valiswhite check. | 4352 | |7: // Possible table write barrier for the value. Skip valiswhite check. |
3361 | | barrierback TAB:RB, TMP3, TMP0, <3 | 4353 | | barrierback TAB:RB, TMP3, TMP0, <3 |
@@ -3380,11 +4372,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3380 | | lw TMP1, HI(RC) | 4372 | | lw TMP1, HI(RC) |
3381 | | lbu TMP3, TAB:RB->marked | 4373 | | lbu TMP3, TAB:RB->marked |
3382 | | beq TMP1, TISNIL, >5 | 4374 | | beq TMP1, TISNIL, >5 |
3383 | |. ldc1 f0, 0(RA) | ||
3384 | |1: | 4375 | |1: |
4376 | |. lw SFRETHI, HI(RA) | ||
4377 | | lw SFRETLO, LO(RA) | ||
3385 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | 4378 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) |
4379 | | sw SFRETHI, HI(RC) | ||
3386 | | bnez AT, >7 | 4380 | | bnez AT, >7 |
3387 | |. sdc1 f0, 0(RC) | 4381 | |. sw SFRETLO, LO(RC) |
3388 | |2: | 4382 | |2: |
3389 | | ins_next | 4383 | | ins_next |
3390 | | | 4384 | | |
@@ -3396,12 +4390,43 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3396 | | andi TMP1, TMP1, 1<<MM_newindex | 4390 | | andi TMP1, TMP1, 1<<MM_newindex |
3397 | | bnez TMP1, <1 // 'no __newindex' flag set: done. | 4391 | | bnez TMP1, <1 // 'no __newindex' flag set: done. |
3398 | |. nop | 4392 | |. nop |
3399 | | b ->vmeta_tsetb // Caveat: preserve TMP0! | 4393 | | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2! |
3400 | |. nop | 4394 | |. nop |
3401 | | | 4395 | | |
3402 | |7: // Possible table write barrier for the value. Skip valiswhite check. | 4396 | |7: // Possible table write barrier for the value. Skip valiswhite check. |
3403 | | barrierback TAB:RB, TMP3, TMP0, <2 | 4397 | | barrierback TAB:RB, TMP3, TMP0, <2 |
3404 | break; | 4398 | break; |
4399 | case BC_TSETR: | ||
4400 | | // RA = dst*8, RB = table*8, RC = key*8 | ||
4401 | | decode_RB8a RB, INS | ||
4402 | | decode_RB8b RB | ||
4403 | | decode_RDtoRC8 RC, RD | ||
4404 | | addu CARG1, BASE, RB | ||
4405 | | addu CARG3, BASE, RC | ||
4406 | | lw TAB:CARG2, LO(CARG1) | ||
4407 | | lw CARG3, LO(CARG3) | ||
4408 | | lbu TMP3, TAB:CARG2->marked | ||
4409 | | lw TMP0, TAB:CARG2->asize | ||
4410 | | lw TMP1, TAB:CARG2->array | ||
4411 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | ||
4412 | | bnez AT, >7 | ||
4413 | |. addu RA, BASE, RA | ||
4414 | |2: | ||
4415 | | sltu AT, CARG3, TMP0 | ||
4416 | | sll TMP2, CARG3, 3 | ||
4417 | | beqz AT, ->vmeta_tsetr // In array part? | ||
4418 | |. addu CRET1, TMP1, TMP2 | ||
4419 | |->BC_TSETR_Z: | ||
4420 | | lw SFARG1HI, HI(RA) | ||
4421 | | lw SFARG1LO, LO(RA) | ||
4422 | | ins_next1 | ||
4423 | | sw SFARG1HI, HI(CRET1) | ||
4424 | | sw SFARG1LO, LO(CRET1) | ||
4425 | | ins_next2 | ||
4426 | | | ||
4427 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
4428 | | barrierback TAB:CARG2, TMP3, CRET1, <2 | ||
4429 | break; | ||
3405 | 4430 | ||
3406 | case BC_TSETM: | 4431 | case BC_TSETM: |
3407 | | // RA = base*8 (table at base-1), RD = num_const*8 (start index) | 4432 | | // RA = base*8 (table at base-1), RD = num_const*8 (start index) |
@@ -3424,10 +4449,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3424 | | addu TMP1, TMP1, CARG1 | 4449 | | addu TMP1, TMP1, CARG1 |
3425 | | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) | 4450 | | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) |
3426 | |3: // Copy result slots to table. | 4451 | |3: // Copy result slots to table. |
3427 | | ldc1 f0, 0(RA) | 4452 | | lw SFRETHI, HI(RA) |
4453 | | lw SFRETLO, LO(RA) | ||
3428 | | addiu RA, RA, 8 | 4454 | | addiu RA, RA, 8 |
3429 | | sltu AT, RA, TMP2 | 4455 | | sltu AT, RA, TMP2 |
3430 | | sdc1 f0, 0(TMP1) | 4456 | | sw SFRETHI, HI(TMP1) |
4457 | | sw SFRETLO, LO(TMP1) | ||
3431 | | bnez AT, <3 | 4458 | | bnez AT, <3 |
3432 | |. addiu TMP1, TMP1, 8 | 4459 | |. addiu TMP1, TMP1, 8 |
3433 | | bnez TMP0, >7 | 4460 | | bnez TMP0, >7 |
@@ -3502,10 +4529,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3502 | | beqz NARGS8:RC, >3 | 4529 | | beqz NARGS8:RC, >3 |
3503 | |. move TMP3, NARGS8:RC | 4530 | |. move TMP3, NARGS8:RC |
3504 | |2: | 4531 | |2: |
3505 | | ldc1 f0, 0(RA) | 4532 | | lw SFRETHI, HI(RA) |
4533 | | lw SFRETLO, LO(RA) | ||
3506 | | addiu RA, RA, 8 | 4534 | | addiu RA, RA, 8 |
3507 | | addiu TMP3, TMP3, -8 | 4535 | | addiu TMP3, TMP3, -8 |
3508 | | sdc1 f0, 0(TMP2) | 4536 | | sw SFRETHI, HI(TMP2) |
4537 | | sw SFRETLO, LO(TMP2) | ||
3509 | | bnez TMP3, <2 | 4538 | | bnez TMP3, <2 |
3510 | |. addiu TMP2, TMP2, 8 | 4539 | |. addiu TMP2, TMP2, 8 |
3511 | |3: | 4540 | |3: |
@@ -3542,12 +4571,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3542 | | li AT, LJ_TFUNC | 4571 | | li AT, LJ_TFUNC |
3543 | | lw TMP1, -24+HI(BASE) | 4572 | | lw TMP1, -24+HI(BASE) |
3544 | | lw LFUNC:RB, -24+LO(BASE) | 4573 | | lw LFUNC:RB, -24+LO(BASE) |
3545 | | ldc1 f2, -8(BASE) | 4574 | | lw SFARG1HI, -16+HI(BASE) |
3546 | | ldc1 f0, -16(BASE) | 4575 | | lw SFARG1LO, -16+LO(BASE) |
4576 | | lw SFARG2HI, -8+HI(BASE) | ||
4577 | | lw SFARG2LO, -8+LO(BASE) | ||
3547 | | sw TMP1, HI(BASE) // Copy callable. | 4578 | | sw TMP1, HI(BASE) // Copy callable. |
3548 | | sw LFUNC:RB, LO(BASE) | 4579 | | sw LFUNC:RB, LO(BASE) |
3549 | | sdc1 f2, 16(BASE) // Copy control var. | 4580 | | sw SFARG1HI, 8+HI(BASE) // Copy state. |
3550 | | sdc1 f0, 8(BASE) // Copy state. | 4581 | | sw SFARG1LO, 8+LO(BASE) |
4582 | | sw SFARG2HI, 16+HI(BASE) // Copy control var. | ||
4583 | | sw SFARG2LO, 16+LO(BASE) | ||
3551 | | addiu BASE, BASE, 8 | 4584 | | addiu BASE, BASE, 8 |
3552 | | bne TMP1, AT, ->vmeta_call | 4585 | | bne TMP1, AT, ->vmeta_call |
3553 | |. li NARGS8:RC, 16 // Iterators get 2 arguments. | 4586 | |. li NARGS8:RC, 16 // Iterators get 2 arguments. |
@@ -3555,10 +4588,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3555 | break; | 4588 | break; |
3556 | 4589 | ||
3557 | case BC_ITERN: | 4590 | case BC_ITERN: |
3558 | | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) | 4591 | |.if JIT and ENDIAN_LE |
3559 | |.if JIT | 4592 | | hotloop |
3560 | | // NYI: add hotloop, record BC_ITERN. | ||
3561 | |.endif | 4593 | |.endif |
4594 | |->vm_IITERN: | ||
4595 | | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) | ||
3562 | | addu RA, BASE, RA | 4596 | | addu RA, BASE, RA |
3563 | | lw TAB:RB, -16+LO(RA) | 4597 | | lw TAB:RB, -16+LO(RA) |
3564 | | lw RC, -8+LO(RA) // Get index from control var. | 4598 | | lw RC, -8+LO(RA) // Get index from control var. |
@@ -3570,20 +4604,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3570 | | beqz AT, >5 // Index points after array part? | 4604 | | beqz AT, >5 // Index points after array part? |
3571 | |. sll TMP3, RC, 3 | 4605 | |. sll TMP3, RC, 3 |
3572 | | addu TMP3, TMP1, TMP3 | 4606 | | addu TMP3, TMP1, TMP3 |
3573 | | lw TMP2, HI(TMP3) | 4607 | | lw SFARG1HI, HI(TMP3) |
3574 | | ldc1 f0, 0(TMP3) | 4608 | | lw SFARG1LO, LO(TMP3) |
3575 | | mtc1 RC, f2 | ||
3576 | | lhu RD, -4+OFS_RD(PC) | 4609 | | lhu RD, -4+OFS_RD(PC) |
3577 | | beq TMP2, TISNIL, <1 // Skip holes in array part. | 4610 | | sw TISNUM, HI(RA) |
4611 | | sw RC, LO(RA) | ||
4612 | | beq SFARG1HI, TISNIL, <1 // Skip holes in array part. | ||
3578 | |. addiu RC, RC, 1 | 4613 | |. addiu RC, RC, 1 |
3579 | | cvt.d.w f2, f2 | 4614 | | sw SFARG1HI, 8+HI(RA) |
4615 | | sw SFARG1LO, 8+LO(RA) | ||
3580 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | 4616 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) |
3581 | | sdc1 f0, 8(RA) | ||
3582 | | decode_RD4b RD | 4617 | | decode_RD4b RD |
3583 | | addu RD, RD, TMP3 | 4618 | | addu RD, RD, TMP3 |
3584 | | sw RC, -8+LO(RA) // Update control var. | 4619 | | sw RC, -8+LO(RA) // Update control var. |
3585 | | addu PC, PC, RD | 4620 | | addu PC, PC, RD |
3586 | | sdc1 f2, 0(RA) | ||
3587 | |3: | 4621 | |3: |
3588 | | ins_next | 4622 | | ins_next |
3589 | | | 4623 | | |
@@ -3598,18 +4632,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3598 | | sll RB, RC, 3 | 4632 | | sll RB, RC, 3 |
3599 | | subu TMP3, TMP3, RB | 4633 | | subu TMP3, TMP3, RB |
3600 | | addu NODE:TMP3, TMP3, TMP2 | 4634 | | addu NODE:TMP3, TMP3, TMP2 |
3601 | | lw RB, HI(NODE:TMP3) | 4635 | | lw SFARG1HI, NODE:TMP3->val.u32.hi |
3602 | | ldc1 f0, 0(NODE:TMP3) | 4636 | | lw SFARG1LO, NODE:TMP3->val.u32.lo |
3603 | | lhu RD, -4+OFS_RD(PC) | 4637 | | lhu RD, -4+OFS_RD(PC) |
3604 | | beq RB, TISNIL, <6 // Skip holes in hash part. | 4638 | | beq SFARG1HI, TISNIL, <6 // Skip holes in hash part. |
3605 | |. addiu RC, RC, 1 | 4639 | |. addiu RC, RC, 1 |
3606 | | ldc1 f2, NODE:TMP3->key | 4640 | | lw SFARG2HI, NODE:TMP3->key.u32.hi |
4641 | | lw SFARG2LO, NODE:TMP3->key.u32.lo | ||
3607 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | 4642 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) |
3608 | | sdc1 f0, 8(RA) | 4643 | | sw SFARG1HI, 8+HI(RA) |
4644 | | sw SFARG1LO, 8+LO(RA) | ||
3609 | | addu RC, RC, TMP0 | 4645 | | addu RC, RC, TMP0 |
3610 | | decode_RD4b RD | 4646 | | decode_RD4b RD |
3611 | | addu RD, RD, TMP3 | 4647 | | addu RD, RD, TMP3 |
3612 | | sdc1 f2, 0(RA) | 4648 | | sw SFARG2HI, HI(RA) |
4649 | | sw SFARG2LO, LO(RA) | ||
3613 | | addu PC, PC, RD | 4650 | | addu PC, PC, RD |
3614 | | b <3 | 4651 | | b <3 |
3615 | |. sw RC, -8+LO(RA) // Update control var. | 4652 | |. sw RC, -8+LO(RA) // Update control var. |
@@ -3634,9 +4671,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3634 | | addiu CARG2, CARG2, -FF_next_N | 4671 | | addiu CARG2, CARG2, -FF_next_N |
3635 | | or CARG2, CARG2, CARG3 | 4672 | | or CARG2, CARG2, CARG3 |
3636 | | bnez CARG2, >5 | 4673 | | bnez CARG2, >5 |
3637 | |. lui TMP1, 0xfffe | 4674 | |. lui TMP1, (LJ_KEYINDEX >> 16) |
3638 | | addu PC, TMP0, TMP2 | 4675 | | addu PC, TMP0, TMP2 |
3639 | | ori TMP1, TMP1, 0x7fff | 4676 | | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff) |
3640 | | sw r0, -8+LO(RA) // Initialize control var. | 4677 | | sw r0, -8+LO(RA) // Initialize control var. |
3641 | | sw TMP1, -8+HI(RA) | 4678 | | sw TMP1, -8+HI(RA) |
3642 | |1: | 4679 | |1: |
@@ -3645,9 +4682,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3645 | | li TMP3, BC_JMP | 4682 | | li TMP3, BC_JMP |
3646 | | li TMP1, BC_ITERC | 4683 | | li TMP1, BC_ITERC |
3647 | | sb TMP3, -4+OFS_OP(PC) | 4684 | | sb TMP3, -4+OFS_OP(PC) |
3648 | | addu PC, TMP0, TMP2 | 4685 | | addu PC, TMP0, TMP2 |
4686 | |.if JIT | ||
4687 | | lb TMP0, OFS_OP(PC) | ||
4688 | | li AT, BC_ITERN | ||
4689 | | bne TMP0, AT, >6 | ||
4690 | |. lhu TMP2, OFS_RD(PC) | ||
4691 | |.endif | ||
3649 | | b <1 | 4692 | | b <1 |
3650 | |. sb TMP1, OFS_OP(PC) | 4693 | |. sb TMP1, OFS_OP(PC) |
4694 | |.if JIT | ||
4695 | |6: // Unpatch JLOOP. | ||
4696 | | lw TMP0, DISPATCH_J(trace)(DISPATCH) | ||
4697 | | sll TMP2, TMP2, 2 | ||
4698 | | addu TMP0, TMP0, TMP2 | ||
4699 | | lw TRACE:TMP2, 0(TMP0) | ||
4700 | | lw TMP0, TRACE:TMP2->startins | ||
4701 | | li AT, -256 | ||
4702 | | and TMP0, TMP0, AT | ||
4703 | | or TMP0, TMP0, TMP1 | ||
4704 | | b <1 | ||
4705 | |. sw TMP0, 0(PC) | ||
4706 | |.endif | ||
3651 | break; | 4707 | break; |
3652 | 4708 | ||
3653 | case BC_VARG: | 4709 | case BC_VARG: |
@@ -3689,9 +4745,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3689 | | bnez AT, >7 | 4745 | | bnez AT, >7 |
3690 | |. addiu MULTRES, TMP1, 8 | 4746 | |. addiu MULTRES, TMP1, 8 |
3691 | |6: | 4747 | |6: |
3692 | | ldc1 f0, 0(RC) | 4748 | | lw SFRETHI, HI(RC) |
4749 | | lw SFRETLO, LO(RC) | ||
3693 | | addiu RC, RC, 8 | 4750 | | addiu RC, RC, 8 |
3694 | | sdc1 f0, 0(RA) | 4751 | | sw SFRETHI, HI(RA) |
4752 | | sw SFRETLO, LO(RA) | ||
3695 | | sltu AT, RC, TMP3 | 4753 | | sltu AT, RC, TMP3 |
3696 | | bnez AT, <6 // More vararg slots? | 4754 | | bnez AT, <6 // More vararg slots? |
3697 | |. addiu RA, RA, 8 | 4755 | |. addiu RA, RA, 8 |
@@ -3747,10 +4805,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3747 | | beqz RC, >3 | 4805 | | beqz RC, >3 |
3748 | |. subu BASE, TMP2, TMP0 | 4806 | |. subu BASE, TMP2, TMP0 |
3749 | |2: | 4807 | |2: |
3750 | | ldc1 f0, 0(RA) | 4808 | | lw SFRETHI, HI(RA) |
4809 | | lw SFRETLO, LO(RA) | ||
3751 | | addiu RA, RA, 8 | 4810 | | addiu RA, RA, 8 |
3752 | | addiu RC, RC, -8 | 4811 | | addiu RC, RC, -8 |
3753 | | sdc1 f0, 0(TMP2) | 4812 | | sw SFRETHI, HI(TMP2) |
4813 | | sw SFRETLO, LO(TMP2) | ||
3754 | | bnez RC, <2 | 4814 | | bnez RC, <2 |
3755 | |. addiu TMP2, TMP2, 8 | 4815 | |. addiu TMP2, TMP2, 8 |
3756 | |3: | 4816 | |3: |
@@ -3791,14 +4851,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3791 | | lw INS, -4(PC) | 4851 | | lw INS, -4(PC) |
3792 | | addiu TMP2, BASE, -8 | 4852 | | addiu TMP2, BASE, -8 |
3793 | if (op == BC_RET1) { | 4853 | if (op == BC_RET1) { |
3794 | | ldc1 f0, 0(RA) | 4854 | | lw SFRETHI, HI(RA) |
4855 | | lw SFRETLO, LO(RA) | ||
3795 | } | 4856 | } |
3796 | | decode_RB8a RB, INS | 4857 | | decode_RB8a RB, INS |
3797 | | decode_RA8a RA, INS | 4858 | | decode_RA8a RA, INS |
3798 | | decode_RB8b RB | 4859 | | decode_RB8b RB |
3799 | | decode_RA8b RA | 4860 | | decode_RA8b RA |
3800 | if (op == BC_RET1) { | 4861 | if (op == BC_RET1) { |
3801 | | sdc1 f0, 0(TMP2) | 4862 | | sw SFRETHI, HI(TMP2) |
4863 | | sw SFRETLO, LO(TMP2) | ||
3802 | } | 4864 | } |
3803 | | subu BASE, TMP2, RA | 4865 | | subu BASE, TMP2, RA |
3804 | |5: | 4866 | |5: |
@@ -3840,69 +4902,147 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3840 | | // RA = base*8, RD = target (after end of loop or start of loop) | 4902 | | // RA = base*8, RD = target (after end of loop or start of loop) |
3841 | vk = (op == BC_IFORL || op == BC_JFORL); | 4903 | vk = (op == BC_IFORL || op == BC_JFORL); |
3842 | | addu RA, BASE, RA | 4904 | | addu RA, BASE, RA |
3843 | if (vk) { | 4905 | | lw SFARG1HI, FORL_IDX*8+HI(RA) |
3844 | | ldc1 f0, FORL_IDX*8(RA) | 4906 | | lw SFARG1LO, FORL_IDX*8+LO(RA) |
3845 | | ldc1 f4, FORL_STEP*8(RA) | ||
3846 | | ldc1 f2, FORL_STOP*8(RA) | ||
3847 | | lw TMP3, FORL_STEP*8+HI(RA) | ||
3848 | | add.d f0, f0, f4 | ||
3849 | | sdc1 f0, FORL_IDX*8(RA) | ||
3850 | } else { | ||
3851 | | lw TMP1, FORL_IDX*8+HI(RA) | ||
3852 | | lw TMP3, FORL_STEP*8+HI(RA) | ||
3853 | | lw TMP2, FORL_STOP*8+HI(RA) | ||
3854 | | sltiu TMP1, TMP1, LJ_TISNUM | ||
3855 | | sltiu TMP0, TMP3, LJ_TISNUM | ||
3856 | | sltiu TMP2, TMP2, LJ_TISNUM | ||
3857 | | and TMP1, TMP1, TMP0 | ||
3858 | | and TMP1, TMP1, TMP2 | ||
3859 | | ldc1 f0, FORL_IDX*8(RA) | ||
3860 | | beqz TMP1, ->vmeta_for | ||
3861 | |. ldc1 f2, FORL_STOP*8(RA) | ||
3862 | } | ||
3863 | if (op != BC_JFORL) { | 4907 | if (op != BC_JFORL) { |
3864 | | srl RD, RD, 1 | 4908 | | srl RD, RD, 1 |
3865 | | lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535) | 4909 | | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) |
4910 | | addu TMP2, RD, TMP2 | ||
4911 | } | ||
4912 | if (!vk) { | ||
4913 | | lw SFARG2HI, FORL_STOP*8+HI(RA) | ||
4914 | | lw SFARG2LO, FORL_STOP*8+LO(RA) | ||
4915 | | bne SFARG1HI, TISNUM, >5 | ||
4916 | |. lw SFRETHI, FORL_STEP*8+HI(RA) | ||
4917 | | xor AT, SFARG2HI, TISNUM | ||
4918 | | lw SFRETLO, FORL_STEP*8+LO(RA) | ||
4919 | | xor TMP0, SFRETHI, TISNUM | ||
4920 | | or AT, AT, TMP0 | ||
4921 | | bnez AT, ->vmeta_for | ||
4922 | |. slt AT, SFRETLO, r0 | ||
4923 | | slt CRET1, SFARG2LO, SFARG1LO | ||
4924 | | slt TMP1, SFARG1LO, SFARG2LO | ||
4925 | | movn CRET1, TMP1, AT | ||
4926 | } else { | ||
4927 | | bne SFARG1HI, TISNUM, >5 | ||
4928 | |. lw SFARG2LO, FORL_STEP*8+LO(RA) | ||
4929 | | lw SFRETLO, FORL_STOP*8+LO(RA) | ||
4930 | | move TMP3, SFARG1LO | ||
4931 | | addu SFARG1LO, SFARG1LO, SFARG2LO | ||
4932 | | xor TMP0, SFARG1LO, TMP3 | ||
4933 | | xor TMP1, SFARG1LO, SFARG2LO | ||
4934 | | and TMP0, TMP0, TMP1 | ||
4935 | | slt TMP1, SFARG1LO, SFRETLO | ||
4936 | | slt CRET1, SFRETLO, SFARG1LO | ||
4937 | | slt AT, SFARG2LO, r0 | ||
4938 | | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. | ||
4939 | | movn CRET1, TMP1, AT | ||
4940 | | or CRET1, CRET1, TMP0 | ||
4941 | } | ||
4942 | |1: | ||
4943 | if (op == BC_FORI) { | ||
4944 | | movz TMP2, r0, CRET1 | ||
4945 | | addu PC, PC, TMP2 | ||
4946 | } else if (op == BC_JFORI) { | ||
4947 | | addu PC, PC, TMP2 | ||
4948 | | lhu RD, -4+OFS_RD(PC) | ||
4949 | } else if (op == BC_IFORL) { | ||
4950 | | movn TMP2, r0, CRET1 | ||
4951 | | addu PC, PC, TMP2 | ||
4952 | } | ||
4953 | if (vk) { | ||
4954 | | sw SFARG1HI, FORL_IDX*8+HI(RA) | ||
4955 | | sw SFARG1LO, FORL_IDX*8+LO(RA) | ||
3866 | } | 4956 | } |
3867 | | c.le.d 0, f0, f2 | 4957 | | ins_next1 |
3868 | | c.le.d 1, f2, f0 | 4958 | | sw SFARG1HI, FORL_EXT*8+HI(RA) |
3869 | | sdc1 f0, FORL_EXT*8(RA) | 4959 | | sw SFARG1LO, FORL_EXT*8+LO(RA) |
4960 | |2: | ||
3870 | if (op == BC_JFORI) { | 4961 | if (op == BC_JFORI) { |
3871 | | li TMP1, 1 | 4962 | | beqz CRET1, =>BC_JLOOP |
3872 | | li TMP2, 1 | ||
3873 | | addu TMP0, RD, TMP0 | ||
3874 | | slt TMP3, TMP3, r0 | ||
3875 | | movf TMP1, r0, 0 | ||
3876 | | addu PC, PC, TMP0 | ||
3877 | | movf TMP2, r0, 1 | ||
3878 | | lhu RD, -4+OFS_RD(PC) | ||
3879 | | movn TMP1, TMP2, TMP3 | ||
3880 | | bnez TMP1, =>BC_JLOOP | ||
3881 | |. decode_RD8b RD | 4963 | |. decode_RD8b RD |
3882 | } else if (op == BC_JFORL) { | 4964 | } else if (op == BC_JFORL) { |
3883 | | li TMP1, 1 | 4965 | | beqz CRET1, =>BC_JLOOP |
3884 | | li TMP2, 1 | 4966 | } |
3885 | | slt TMP3, TMP3, r0 | 4967 | | ins_next2 |
3886 | | movf TMP1, r0, 0 | 4968 | | |
3887 | | movf TMP2, r0, 1 | 4969 | |5: // FP loop. |
3888 | | movn TMP1, TMP2, TMP3 | 4970 | |.if FPU |
3889 | | bnez TMP1, =>BC_JLOOP | 4971 | if (!vk) { |
4972 | | ldc1 f0, FORL_IDX*8(RA) | ||
4973 | | ldc1 f2, FORL_STOP*8(RA) | ||
4974 | | sltiu TMP0, SFARG1HI, LJ_TISNUM | ||
4975 | | sltiu TMP1, SFARG2HI, LJ_TISNUM | ||
4976 | | sltiu AT, SFRETHI, LJ_TISNUM | ||
4977 | | and TMP0, TMP0, TMP1 | ||
4978 | | and AT, AT, TMP0 | ||
4979 | | beqz AT, ->vmeta_for | ||
4980 | |. slt TMP3, SFRETHI, r0 | ||
4981 | | c.ole.d 0, f0, f2 | ||
4982 | | c.ole.d 1, f2, f0 | ||
4983 | | li CRET1, 1 | ||
4984 | | movt CRET1, r0, 0 | ||
4985 | | movt AT, r0, 1 | ||
4986 | | b <1 | ||
4987 | |. movn CRET1, AT, TMP3 | ||
4988 | } else { | ||
4989 | | ldc1 f0, FORL_IDX*8(RA) | ||
4990 | | ldc1 f4, FORL_STEP*8(RA) | ||
4991 | | ldc1 f2, FORL_STOP*8(RA) | ||
4992 | | lw SFARG2HI, FORL_STEP*8+HI(RA) | ||
4993 | | add.d f0, f0, f4 | ||
4994 | | c.ole.d 0, f0, f2 | ||
4995 | | c.ole.d 1, f2, f0 | ||
4996 | | slt TMP3, SFARG2HI, r0 | ||
4997 | | li CRET1, 1 | ||
4998 | | li AT, 1 | ||
4999 | | movt CRET1, r0, 0 | ||
5000 | | movt AT, r0, 1 | ||
5001 | | movn CRET1, AT, TMP3 | ||
5002 | if (op == BC_IFORL) { | ||
5003 | | movn TMP2, r0, CRET1 | ||
5004 | | addu PC, PC, TMP2 | ||
5005 | } | ||
5006 | | sdc1 f0, FORL_IDX*8(RA) | ||
5007 | | ins_next1 | ||
5008 | | b <2 | ||
5009 | |. sdc1 f0, FORL_EXT*8(RA) | ||
5010 | } | ||
5011 | |.else | ||
5012 | if (!vk) { | ||
5013 | | sltiu TMP0, SFARG1HI, LJ_TISNUM | ||
5014 | | sltiu TMP1, SFARG2HI, LJ_TISNUM | ||
5015 | | sltiu AT, SFRETHI, LJ_TISNUM | ||
5016 | | and TMP0, TMP0, TMP1 | ||
5017 | | and AT, AT, TMP0 | ||
5018 | | beqz AT, ->vmeta_for | ||
5019 | |. nop | ||
5020 | | bal ->vm_sfcmpolex | ||
5021 | |. move TMP3, SFRETHI | ||
5022 | | b <1 | ||
3890 | |. nop | 5023 | |. nop |
3891 | } else { | 5024 | } else { |
3892 | | addu TMP1, RD, TMP0 | 5025 | | lw SFARG2HI, FORL_STEP*8+HI(RA) |
3893 | | slt TMP3, TMP3, r0 | 5026 | | load_got __adddf3 |
3894 | | move TMP2, TMP1 | 5027 | | call_extern |
3895 | if (op == BC_FORI) { | 5028 | |. sw TMP2, ARG5 |
3896 | | movt TMP1, r0, 0 | 5029 | | lw SFARG2HI, FORL_STOP*8+HI(RA) |
3897 | | movt TMP2, r0, 1 | 5030 | | lw SFARG2LO, FORL_STOP*8+LO(RA) |
5031 | | move SFARG1HI, SFRETHI | ||
5032 | | move SFARG1LO, SFRETLO | ||
5033 | | bal ->vm_sfcmpolex | ||
5034 | |. lw TMP3, FORL_STEP*8+HI(RA) | ||
5035 | if ( op == BC_JFORL ) { | ||
5036 | | lhu RD, -4+OFS_RD(PC) | ||
5037 | | lw TMP2, ARG5 | ||
5038 | | b <1 | ||
5039 | |. decode_RD8b RD | ||
3898 | } else { | 5040 | } else { |
3899 | | movf TMP1, r0, 0 | 5041 | | b <1 |
3900 | | movf TMP2, r0, 1 | 5042 | |. lw TMP2, ARG5 |
3901 | } | 5043 | } |
3902 | | movn TMP1, TMP2, TMP3 | ||
3903 | | addu PC, PC, TMP1 | ||
3904 | } | 5044 | } |
3905 | | ins_next | 5045 | |.endif |
3906 | break; | 5046 | break; |
3907 | 5047 | ||
3908 | case BC_ITERL: | 5048 | case BC_ITERL: |
@@ -3961,8 +5101,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3961 | | sw AT, DISPATCH_GL(vmstate)(DISPATCH) | 5101 | | sw AT, DISPATCH_GL(vmstate)(DISPATCH) |
3962 | | lw TRACE:TMP2, 0(TMP1) | 5102 | | lw TRACE:TMP2, 0(TMP1) |
3963 | | sw BASE, DISPATCH_GL(jit_base)(DISPATCH) | 5103 | | sw BASE, DISPATCH_GL(jit_base)(DISPATCH) |
3964 | | sw L, DISPATCH_GL(jit_L)(DISPATCH) | ||
3965 | | lw TMP2, TRACE:TMP2->mcode | 5104 | | lw TMP2, TRACE:TMP2->mcode |
5105 | | sw L, DISPATCH_GL(tmpbuf.L)(DISPATCH) | ||
3966 | | jr TMP2 | 5106 | | jr TMP2 |
3967 | |. addiu JGL, DISPATCH, GG_DISP2G+32768 | 5107 | |. addiu JGL, DISPATCH, GG_DISP2G+32768 |
3968 | |.endif | 5108 | |.endif |
@@ -4088,6 +5228,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4088 | | li_vmstate INTERP | 5228 | | li_vmstate INTERP |
4089 | | lw PC, FRAME_PC(BASE) // Fetch PC of caller. | 5229 | | lw PC, FRAME_PC(BASE) // Fetch PC of caller. |
4090 | | subu RA, TMP1, RD // RA = L->top - nresults*8 | 5230 | | subu RA, TMP1, RD // RA = L->top - nresults*8 |
5231 | | sw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
4091 | | b ->vm_returnc | 5232 | | b ->vm_returnc |
4092 | |. st_vmstate | 5233 | |. st_vmstate |
4093 | break; | 5234 | break; |
@@ -4150,8 +5291,10 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
4150 | fcofs, CFRAME_SIZE); | 5291 | fcofs, CFRAME_SIZE); |
4151 | for (i = 23; i >= 16; i--) | 5292 | for (i = 23; i >= 16; i--) |
4152 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); | 5293 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); |
5294 | #if !LJ_SOFTFP | ||
4153 | for (i = 30; i >= 20; i -= 2) | 5295 | for (i = 30; i >= 20; i -= 2) |
4154 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); | 5296 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); |
5297 | #endif | ||
4155 | fprintf(ctx->fp, | 5298 | fprintf(ctx->fp, |
4156 | "\t.align 2\n" | 5299 | "\t.align 2\n" |
4157 | ".LEFDE0:\n\n"); | 5300 | ".LEFDE0:\n\n"); |
@@ -4203,8 +5346,10 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
4203 | fcofs, CFRAME_SIZE); | 5346 | fcofs, CFRAME_SIZE); |
4204 | for (i = 23; i >= 16; i--) | 5347 | for (i = 23; i >= 16; i--) |
4205 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); | 5348 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); |
5349 | #if !LJ_SOFTFP | ||
4206 | for (i = 30; i >= 20; i -= 2) | 5350 | for (i = 30; i >= 20; i -= 2) |
4207 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); | 5351 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); |
5352 | #endif | ||
4208 | fprintf(ctx->fp, | 5353 | fprintf(ctx->fp, |
4209 | "\t.align 2\n" | 5354 | "\t.align 2\n" |
4210 | ".LEFDE2:\n\n"); | 5355 | ".LEFDE2:\n\n"); |
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc new file mode 100644 index 00000000..651bc42e --- /dev/null +++ b/src/vm_mips64.dasc | |||
@@ -0,0 +1,5538 @@ | |||
1 | |// Low-level VM code for MIPS64 CPUs. | ||
2 | |// Bytecode interpreter, fast functions and helper functions. | ||
3 | |// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | |// | ||
5 | |// Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. | ||
6 | |// Sponsored by Cisco Systems, Inc. | ||
7 | | | ||
8 | |.arch mips64 | ||
9 | |.section code_op, code_sub | ||
10 | | | ||
11 | |.actionlist build_actionlist | ||
12 | |.globals GLOB_ | ||
13 | |.globalnames globnames | ||
14 | |.externnames extnames | ||
15 | | | ||
16 | |// Note: The ragged indentation of the instructions is intentional. | ||
17 | |// The starting columns indicate data dependencies. | ||
18 | | | ||
19 | |//----------------------------------------------------------------------- | ||
20 | | | ||
21 | |// Fixed register assignments for the interpreter. | ||
22 | |// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra | ||
23 | | | ||
24 | |.macro .FPU, a, b | ||
25 | |.if FPU | ||
26 | | a, b | ||
27 | |.endif | ||
28 | |.endmacro | ||
29 | | | ||
30 | |// The following must be C callee-save (but BASE is often refetched). | ||
31 | |.define BASE, r16 // Base of current Lua stack frame. | ||
32 | |.define KBASE, r17 // Constants of current Lua function. | ||
33 | |.define PC, r18 // Next PC. | ||
34 | |.define DISPATCH, r19 // Opcode dispatch table. | ||
35 | |.define LREG, r20 // Register holding lua_State (also in SAVE_L). | ||
36 | |.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. | ||
37 | | | ||
38 | |.define JGL, r30 // On-trace: global_State + 32768. | ||
39 | | | ||
40 | |// Constants for type-comparisons, stores and conversions. C callee-save. | ||
41 | |.define TISNIL, r30 | ||
42 | |.define TISNUM, r22 | ||
43 | |.if FPU | ||
44 | |.define TOBIT, f30 // 2^52 + 2^51. | ||
45 | |.endif | ||
46 | | | ||
47 | |// The following temporaries are not saved across C calls, except for RA. | ||
48 | |.define RA, r23 // Callee-save. | ||
49 | |.define RB, r8 | ||
50 | |.define RC, r9 | ||
51 | |.define RD, r10 | ||
52 | |.define INS, r11 | ||
53 | | | ||
54 | |.define AT, r1 // Assembler temporary. | ||
55 | |.define TMP0, r12 | ||
56 | |.define TMP1, r13 | ||
57 | |.define TMP2, r14 | ||
58 | |.define TMP3, r15 | ||
59 | | | ||
60 | |// MIPS n64 calling convention. | ||
61 | |.define CFUNCADDR, r25 | ||
62 | |.define CARG1, r4 | ||
63 | |.define CARG2, r5 | ||
64 | |.define CARG3, r6 | ||
65 | |.define CARG4, r7 | ||
66 | |.define CARG5, r8 | ||
67 | |.define CARG6, r9 | ||
68 | |.define CARG7, r10 | ||
69 | |.define CARG8, r11 | ||
70 | | | ||
71 | |.define CRET1, r2 | ||
72 | |.define CRET2, r3 | ||
73 | | | ||
74 | |.if FPU | ||
75 | |.define FARG1, f12 | ||
76 | |.define FARG2, f13 | ||
77 | |.define FARG3, f14 | ||
78 | |.define FARG4, f15 | ||
79 | |.define FARG5, f16 | ||
80 | |.define FARG6, f17 | ||
81 | |.define FARG7, f18 | ||
82 | |.define FARG8, f19 | ||
83 | | | ||
84 | |.define FRET1, f0 | ||
85 | |.define FRET2, f2 | ||
86 | | | ||
87 | |.define FTMP0, f20 | ||
88 | |.define FTMP1, f21 | ||
89 | |.define FTMP2, f22 | ||
90 | |.endif | ||
91 | | | ||
92 | |// Stack layout while in interpreter. Must match with lj_frame.h. | ||
93 | |.if FPU // MIPS64 hard-float. | ||
94 | | | ||
95 | |.define CFRAME_SPACE, 192 // Delta for sp. | ||
96 | | | ||
97 | |//----- 16 byte aligned, <-- sp entering interpreter | ||
98 | |.define SAVE_ERRF, 188(sp) // 32 bit values. | ||
99 | |.define SAVE_NRES, 184(sp) | ||
100 | |.define SAVE_CFRAME, 176(sp) // 64 bit values. | ||
101 | |.define SAVE_L, 168(sp) | ||
102 | |.define SAVE_PC, 160(sp) | ||
103 | |//----- 16 byte aligned | ||
104 | |.define SAVE_GPR_, 80 // .. 80+10*8: 64 bit GPR saves. | ||
105 | |.define SAVE_FPR_, 16 // .. 16+8*8: 64 bit FPR saves. | ||
106 | | | ||
107 | |.else // MIPS64 soft-float | ||
108 | | | ||
109 | |.define CFRAME_SPACE, 128 // Delta for sp. | ||
110 | | | ||
111 | |//----- 16 byte aligned, <-- sp entering interpreter | ||
112 | |.define SAVE_ERRF, 124(sp) // 32 bit values. | ||
113 | |.define SAVE_NRES, 120(sp) | ||
114 | |.define SAVE_CFRAME, 112(sp) // 64 bit values. | ||
115 | |.define SAVE_L, 104(sp) | ||
116 | |.define SAVE_PC, 96(sp) | ||
117 | |//----- 16 byte aligned | ||
118 | |.define SAVE_GPR_, 16 // .. 16+10*8: 64 bit GPR saves. | ||
119 | | | ||
120 | |.endif | ||
121 | | | ||
122 | |.define TMPX, 8(sp) // Unused by interpreter, temp for JIT code. | ||
123 | |.define TMPD, 0(sp) | ||
124 | |//----- 16 byte aligned | ||
125 | | | ||
126 | |.define TMPD_OFS, 0 | ||
127 | | | ||
128 | |.define SAVE_MULTRES, TMPD | ||
129 | | | ||
130 | |//----------------------------------------------------------------------- | ||
131 | | | ||
132 | |.macro saveregs | ||
133 | | daddiu sp, sp, -CFRAME_SPACE | ||
134 | | sd ra, SAVE_GPR_+9*8(sp) | ||
135 | | sd r30, SAVE_GPR_+8*8(sp) | ||
136 | | .FPU sdc1 f31, SAVE_FPR_+7*8(sp) | ||
137 | | sd r23, SAVE_GPR_+7*8(sp) | ||
138 | | .FPU sdc1 f30, SAVE_FPR_+6*8(sp) | ||
139 | | sd r22, SAVE_GPR_+6*8(sp) | ||
140 | | .FPU sdc1 f29, SAVE_FPR_+5*8(sp) | ||
141 | | sd r21, SAVE_GPR_+5*8(sp) | ||
142 | | .FPU sdc1 f28, SAVE_FPR_+4*8(sp) | ||
143 | | sd r20, SAVE_GPR_+4*8(sp) | ||
144 | | .FPU sdc1 f27, SAVE_FPR_+3*8(sp) | ||
145 | | sd r19, SAVE_GPR_+3*8(sp) | ||
146 | | .FPU sdc1 f26, SAVE_FPR_+2*8(sp) | ||
147 | | sd r18, SAVE_GPR_+2*8(sp) | ||
148 | | .FPU sdc1 f25, SAVE_FPR_+1*8(sp) | ||
149 | | sd r17, SAVE_GPR_+1*8(sp) | ||
150 | | .FPU sdc1 f24, SAVE_FPR_+0*8(sp) | ||
151 | | sd r16, SAVE_GPR_+0*8(sp) | ||
152 | |.endmacro | ||
153 | | | ||
154 | |.macro restoreregs_ret | ||
155 | | ld ra, SAVE_GPR_+9*8(sp) | ||
156 | | ld r30, SAVE_GPR_+8*8(sp) | ||
157 | | ld r23, SAVE_GPR_+7*8(sp) | ||
158 | | .FPU ldc1 f31, SAVE_FPR_+7*8(sp) | ||
159 | | ld r22, SAVE_GPR_+6*8(sp) | ||
160 | | .FPU ldc1 f30, SAVE_FPR_+6*8(sp) | ||
161 | | ld r21, SAVE_GPR_+5*8(sp) | ||
162 | | .FPU ldc1 f29, SAVE_FPR_+5*8(sp) | ||
163 | | ld r20, SAVE_GPR_+4*8(sp) | ||
164 | | .FPU ldc1 f28, SAVE_FPR_+4*8(sp) | ||
165 | | ld r19, SAVE_GPR_+3*8(sp) | ||
166 | | .FPU ldc1 f27, SAVE_FPR_+3*8(sp) | ||
167 | | ld r18, SAVE_GPR_+2*8(sp) | ||
168 | | .FPU ldc1 f26, SAVE_FPR_+2*8(sp) | ||
169 | | ld r17, SAVE_GPR_+1*8(sp) | ||
170 | | .FPU ldc1 f25, SAVE_FPR_+1*8(sp) | ||
171 | | ld r16, SAVE_GPR_+0*8(sp) | ||
172 | | .FPU ldc1 f24, SAVE_FPR_+0*8(sp) | ||
173 | | jr ra | ||
174 | | daddiu sp, sp, CFRAME_SPACE | ||
175 | |.endmacro | ||
176 | | | ||
177 | |// Type definitions. Some of these are only used for documentation. | ||
178 | |.type L, lua_State, LREG | ||
179 | |.type GL, global_State | ||
180 | |.type TVALUE, TValue | ||
181 | |.type GCOBJ, GCobj | ||
182 | |.type STR, GCstr | ||
183 | |.type TAB, GCtab | ||
184 | |.type LFUNC, GCfuncL | ||
185 | |.type CFUNC, GCfuncC | ||
186 | |.type PROTO, GCproto | ||
187 | |.type UPVAL, GCupval | ||
188 | |.type NODE, Node | ||
189 | |.type NARGS8, int | ||
190 | |.type TRACE, GCtrace | ||
191 | |.type SBUF, SBuf | ||
192 | | | ||
193 | |//----------------------------------------------------------------------- | ||
194 | | | ||
195 | |// Trap for not-yet-implemented parts. | ||
196 | |.macro NYI; .long 0xec1cf0f0; .endmacro | ||
197 | | | ||
198 | |// Macros to mark delay slots. | ||
199 | |.macro ., a; a; .endmacro | ||
200 | |.macro ., a,b; a,b; .endmacro | ||
201 | |.macro ., a,b,c; a,b,c; .endmacro | ||
202 | |.macro ., a,b,c,d; a,b,c,d; .endmacro | ||
203 | | | ||
204 | |.define FRAME_PC, -8 | ||
205 | |.define FRAME_FUNC, -16 | ||
206 | | | ||
207 | |//----------------------------------------------------------------------- | ||
208 | | | ||
209 | |// Endian-specific defines. | ||
210 | |.if ENDIAN_LE | ||
211 | |.define HI, 4 | ||
212 | |.define LO, 0 | ||
213 | |.define OFS_RD, 2 | ||
214 | |.define OFS_RA, 1 | ||
215 | |.define OFS_OP, 0 | ||
216 | |.else | ||
217 | |.define HI, 0 | ||
218 | |.define LO, 4 | ||
219 | |.define OFS_RD, 0 | ||
220 | |.define OFS_RA, 2 | ||
221 | |.define OFS_OP, 3 | ||
222 | |.endif | ||
223 | | | ||
224 | |// Instruction decode. | ||
225 | |.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro | ||
226 | |.macro decode_OP8a, dst, ins; andi dst, ins, 0xff; .endmacro | ||
227 | |.macro decode_OP8b, dst; sll dst, dst, 3; .endmacro | ||
228 | |.macro decode_RC8a, dst, ins; srl dst, ins, 13; .endmacro | ||
229 | |.macro decode_RC8b, dst; andi dst, dst, 0x7f8; .endmacro | ||
230 | |.macro decode_RD4b, dst; sll dst, dst, 2; .endmacro | ||
231 | |.macro decode_RA8a, dst, ins; srl dst, ins, 5; .endmacro | ||
232 | |.macro decode_RA8b, dst; andi dst, dst, 0x7f8; .endmacro | ||
233 | |.macro decode_RB8a, dst, ins; srl dst, ins, 21; .endmacro | ||
234 | |.macro decode_RB8b, dst; andi dst, dst, 0x7f8; .endmacro | ||
235 | |.macro decode_RD8a, dst, ins; srl dst, ins, 16; .endmacro | ||
236 | |.macro decode_RD8b, dst; sll dst, dst, 3; .endmacro | ||
237 | |.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro | ||
238 | | | ||
239 | |// Instruction fetch. | ||
240 | |.macro ins_NEXT1 | ||
241 | | lw INS, 0(PC) | ||
242 | | daddiu PC, PC, 4 | ||
243 | |.endmacro | ||
244 | |// Instruction decode+dispatch. | ||
245 | |.macro ins_NEXT2 | ||
246 | | decode_OP8a TMP1, INS | ||
247 | | decode_OP8b TMP1 | ||
248 | | daddu TMP0, DISPATCH, TMP1 | ||
249 | | decode_RD8a RD, INS | ||
250 | | ld AT, 0(TMP0) | ||
251 | | decode_RA8a RA, INS | ||
252 | | decode_RD8b RD | ||
253 | | jr AT | ||
254 | | decode_RA8b RA | ||
255 | |.endmacro | ||
256 | |.macro ins_NEXT | ||
257 | | ins_NEXT1 | ||
258 | | ins_NEXT2 | ||
259 | |.endmacro | ||
260 | | | ||
261 | |// Instruction footer. | ||
262 | |.if 1 | ||
263 | | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. | ||
264 | | .define ins_next, ins_NEXT | ||
265 | | .define ins_next_, ins_NEXT | ||
266 | | .define ins_next1, ins_NEXT1 | ||
267 | | .define ins_next2, ins_NEXT2 | ||
268 | |.else | ||
269 | | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. | ||
270 | | // Affects only certain kinds of benchmarks (and only with -j off). | ||
271 | | .macro ins_next | ||
272 | | b ->ins_next | ||
273 | | .endmacro | ||
274 | | .macro ins_next1 | ||
275 | | .endmacro | ||
276 | | .macro ins_next2 | ||
277 | | b ->ins_next | ||
278 | | .endmacro | ||
279 | | .macro ins_next_ | ||
280 | | ->ins_next: | ||
281 | | ins_NEXT | ||
282 | | .endmacro | ||
283 | |.endif | ||
284 | | | ||
285 | |// Call decode and dispatch. | ||
286 | |.macro ins_callt | ||
287 | | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC | ||
288 | | ld PC, LFUNC:RB->pc | ||
289 | | lw INS, 0(PC) | ||
290 | | daddiu PC, PC, 4 | ||
291 | | decode_OP8a TMP1, INS | ||
292 | | decode_RA8a RA, INS | ||
293 | | decode_OP8b TMP1 | ||
294 | | decode_RA8b RA | ||
295 | | daddu TMP0, DISPATCH, TMP1 | ||
296 | | ld TMP0, 0(TMP0) | ||
297 | | jr TMP0 | ||
298 | | daddu RA, RA, BASE | ||
299 | |.endmacro | ||
300 | | | ||
301 | |.macro ins_call | ||
302 | | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC | ||
303 | | sd PC, FRAME_PC(BASE) | ||
304 | | ins_callt | ||
305 | |.endmacro | ||
306 | | | ||
307 | |//----------------------------------------------------------------------- | ||
308 | | | ||
309 | |.macro branch_RD | ||
310 | | srl TMP0, RD, 1 | ||
311 | | lui AT, (-(BCBIAS_J*4 >> 16) & 65535) | ||
312 | | addu TMP0, TMP0, AT | ||
313 | | daddu PC, PC, TMP0 | ||
314 | |.endmacro | ||
315 | | | ||
316 | |// Assumes DISPATCH is relative to GL. | ||
317 | #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) | ||
318 | #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) | ||
319 | #define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) | ||
320 | #define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name) | ||
321 | | | ||
322 | #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) | ||
323 | | | ||
324 | |.macro load_got, func | ||
325 | | ld CFUNCADDR, DISPATCH_GOT(func)(DISPATCH) | ||
326 | |.endmacro | ||
327 | |// Much faster. Sadly, there's no easy way to force the required code layout. | ||
328 | |// .macro call_intern, func; bal extern func; .endmacro | ||
329 | |.macro call_intern, func; jalr CFUNCADDR; .endmacro | ||
330 | |.macro call_extern; jalr CFUNCADDR; .endmacro | ||
331 | |.macro jmp_extern; jr CFUNCADDR; .endmacro | ||
332 | | | ||
333 | |.macro hotcheck, delta, target | ||
334 | | dsrl TMP1, PC, 1 | ||
335 | | andi TMP1, TMP1, 126 | ||
336 | | daddu TMP1, TMP1, DISPATCH | ||
337 | | lhu TMP2, GG_DISP2HOT(TMP1) | ||
338 | | addiu TMP2, TMP2, -delta | ||
339 | | bltz TMP2, target | ||
340 | |. sh TMP2, GG_DISP2HOT(TMP1) | ||
341 | |.endmacro | ||
342 | | | ||
343 | |.macro hotloop | ||
344 | | hotcheck HOTCOUNT_LOOP, ->vm_hotloop | ||
345 | |.endmacro | ||
346 | | | ||
347 | |.macro hotcall | ||
348 | | hotcheck HOTCOUNT_CALL, ->vm_hotcall | ||
349 | |.endmacro | ||
350 | | | ||
351 | |// Set current VM state. Uses TMP0. | ||
352 | |.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro | ||
353 | |.macro st_vmstate; sw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro | ||
354 | | | ||
355 | |// Move table write barrier back. Overwrites mark and tmp. | ||
356 | |.macro barrierback, tab, mark, tmp, target | ||
357 | | ld tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) | ||
358 | | andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab) | ||
359 | | sd tab, DISPATCH_GL(gc.grayagain)(DISPATCH) | ||
360 | | sb mark, tab->marked | ||
361 | | b target | ||
362 | |. sd tmp, tab->gclist | ||
363 | |.endmacro | ||
364 | | | ||
365 | |// Clear type tag. Isolate lowest 14+32+1=47 bits of reg. | ||
366 | |.macro cleartp, reg; dextm reg, reg, 0, 14; .endmacro | ||
367 | |.macro cleartp, dst, reg; dextm dst, reg, 0, 14; .endmacro | ||
368 | | | ||
369 | |// Set type tag: Merge 17 type bits into bits [15+32=47, 31+32+1=64) of dst. | ||
370 | |.macro settp, dst, tp; dinsu dst, tp, 15, 31; .endmacro | ||
371 | | | ||
372 | |// Extract (negative) type tag. | ||
373 | |.macro gettp, dst, src; dsra dst, src, 47; .endmacro | ||
374 | | | ||
375 | |// Macros to check the TValue type and extract the GCobj. Branch on failure. | ||
376 | |.macro checktp, reg, tp, target | ||
377 | | gettp AT, reg | ||
378 | | daddiu AT, AT, tp | ||
379 | | bnez AT, target | ||
380 | |. cleartp reg | ||
381 | |.endmacro | ||
382 | |.macro checktp, dst, reg, tp, target | ||
383 | | gettp AT, reg | ||
384 | | daddiu AT, AT, tp | ||
385 | | bnez AT, target | ||
386 | |. cleartp dst, reg | ||
387 | |.endmacro | ||
388 | |.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro | ||
389 | |.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro | ||
390 | |.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro | ||
391 | |.macro checkint, reg, target // Caveat: has delay slot! | ||
392 | | gettp AT, reg | ||
393 | | bne AT, TISNUM, target | ||
394 | |.endmacro | ||
395 | |.macro checknum, reg, target // Caveat: has delay slot! | ||
396 | | gettp AT, reg | ||
397 | | sltiu AT, AT, LJ_TISNUM | ||
398 | | beqz AT, target | ||
399 | |.endmacro | ||
400 | | | ||
401 | |.macro mov_false, reg | ||
402 | | lu reg, 0x8000 | ||
403 | | dsll reg, reg, 32 | ||
404 | | not reg, reg | ||
405 | |.endmacro | ||
406 | |.macro mov_true, reg | ||
407 | | li reg, 0x0001 | ||
408 | | dsll reg, reg, 48 | ||
409 | | not reg, reg | ||
410 | |.endmacro | ||
411 | | | ||
412 | |//----------------------------------------------------------------------- | ||
413 | |||
414 | /* Generate subroutines used by opcodes and other parts of the VM. */ | ||
415 | /* The .code_sub section should be last to help static branch prediction. */ | ||
416 | static void build_subroutines(BuildCtx *ctx) | ||
417 | { | ||
418 | |.code_sub | ||
419 | | | ||
420 | |//----------------------------------------------------------------------- | ||
421 | |//-- Return handling ---------------------------------------------------- | ||
422 | |//----------------------------------------------------------------------- | ||
423 | | | ||
424 | |->vm_returnp: | ||
425 | | // See vm_return. Also: TMP2 = previous base. | ||
426 | | andi AT, PC, FRAME_P | ||
427 | | beqz AT, ->cont_dispatch | ||
428 | | | ||
429 | | // Return from pcall or xpcall fast func. | ||
430 | |. mov_true TMP1 | ||
431 | | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame. | ||
432 | | move BASE, TMP2 // Restore caller base. | ||
433 | | // Prepending may overwrite the pcall frame, so do it at the end. | ||
434 | | sd TMP1, -8(RA) // Prepend true to results. | ||
435 | | daddiu RA, RA, -8 | ||
436 | | | ||
437 | |->vm_returnc: | ||
438 | | addiu RD, RD, 8 // RD = (nresults+1)*8. | ||
439 | | andi TMP0, PC, FRAME_TYPE | ||
440 | | beqz RD, ->vm_unwind_c_eh | ||
441 | |. li CRET1, LUA_YIELD | ||
442 | | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua. | ||
443 | |. move MULTRES, RD | ||
444 | | | ||
445 | |->vm_return: | ||
446 | | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return | ||
447 | | // TMP0 = PC & FRAME_TYPE | ||
448 | | li TMP2, -8 | ||
449 | | xori AT, TMP0, FRAME_C | ||
450 | | and TMP2, PC, TMP2 | ||
451 | | bnez AT, ->vm_returnp | ||
452 | | dsubu TMP2, BASE, TMP2 // TMP2 = previous base. | ||
453 | | | ||
454 | | addiu TMP1, RD, -8 | ||
455 | | sd TMP2, L->base | ||
456 | | li_vmstate C | ||
457 | | lw TMP2, SAVE_NRES | ||
458 | | daddiu BASE, BASE, -16 | ||
459 | | st_vmstate | ||
460 | | beqz TMP1, >2 | ||
461 | |. sll TMP2, TMP2, 3 | ||
462 | |1: | ||
463 | | addiu TMP1, TMP1, -8 | ||
464 | | ld CRET1, 0(RA) | ||
465 | | daddiu RA, RA, 8 | ||
466 | | sd CRET1, 0(BASE) | ||
467 | | bnez TMP1, <1 | ||
468 | |. daddiu BASE, BASE, 8 | ||
469 | | | ||
470 | |2: | ||
471 | | bne TMP2, RD, >6 | ||
472 | |3: | ||
473 | |. sd BASE, L->top // Store new top. | ||
474 | | | ||
475 | |->vm_leave_cp: | ||
476 | | ld TMP0, SAVE_CFRAME // Restore previous C frame. | ||
477 | | move CRET1, r0 // Ok return status for vm_pcall. | ||
478 | | sd TMP0, L->cframe | ||
479 | | | ||
480 | |->vm_leave_unw: | ||
481 | | restoreregs_ret | ||
482 | | | ||
483 | |6: | ||
484 | | ld TMP1, L->maxstack | ||
485 | | slt AT, TMP2, RD | ||
486 | | bnez AT, >7 // Less results wanted? | ||
487 | | // More results wanted. Check stack size and fill up results with nil. | ||
488 | |. slt AT, BASE, TMP1 | ||
489 | | beqz AT, >8 | ||
490 | |. nop | ||
491 | | sd TISNIL, 0(BASE) | ||
492 | | addiu RD, RD, 8 | ||
493 | | b <2 | ||
494 | |. daddiu BASE, BASE, 8 | ||
495 | | | ||
496 | |7: // Less results wanted. | ||
497 | | subu TMP0, RD, TMP2 | ||
498 | | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it. | ||
499 | |.if MIPSR6 | ||
500 | | selnez TMP0, TMP0, TMP2 // LUA_MULTRET+1 case? | ||
501 | | seleqz BASE, BASE, TMP2 | ||
502 | | b <3 | ||
503 | |. or BASE, BASE, TMP0 | ||
504 | |.else | ||
505 | | b <3 | ||
506 | |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case? | ||
507 | |.endif | ||
508 | | | ||
509 | |8: // Corner case: need to grow stack for filling up results. | ||
510 | | // This can happen if: | ||
511 | | // - A C function grows the stack (a lot). | ||
512 | | // - The GC shrinks the stack in between. | ||
513 | | // - A return back from a lua_call() with (high) nresults adjustment. | ||
514 | | load_got lj_state_growstack | ||
515 | | move MULTRES, RD | ||
516 | | srl CARG2, TMP2, 3 | ||
517 | | call_intern lj_state_growstack // (lua_State *L, int n) | ||
518 | |. move CARG1, L | ||
519 | | lw TMP2, SAVE_NRES | ||
520 | | ld BASE, L->top // Need the (realloced) L->top in BASE. | ||
521 | | move RD, MULTRES | ||
522 | | b <2 | ||
523 | |. sll TMP2, TMP2, 3 | ||
524 | | | ||
525 | |->vm_unwind_c: // Unwind C stack, return from vm_pcall. | ||
526 | | // (void *cframe, int errcode) | ||
527 | | move sp, CARG1 | ||
528 | | move CRET1, CARG2 | ||
529 | |->vm_unwind_c_eh: // Landing pad for external unwinder. | ||
530 | | ld L, SAVE_L | ||
531 | | li TMP0, ~LJ_VMST_C | ||
532 | | ld GL:TMP1, L->glref | ||
533 | | b ->vm_leave_unw | ||
534 | |. sw TMP0, GL:TMP1->vmstate | ||
535 | | | ||
536 | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. | ||
537 | | // (void *cframe) | ||
538 | | li AT, -4 | ||
539 | | and sp, CARG1, AT | ||
540 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. | ||
541 | | ld L, SAVE_L | ||
542 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | ||
543 | | li TISNIL, LJ_TNIL | ||
544 | | li TISNUM, LJ_TISNUM | ||
545 | | ld BASE, L->base | ||
546 | | ld DISPATCH, L->glref // Setup pointer to dispatch table. | ||
547 | | .FPU mtc1 TMP3, TOBIT | ||
548 | | mov_false TMP1 | ||
549 | | li_vmstate INTERP | ||
550 | | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame. | ||
551 | | .FPU cvt.d.s TOBIT, TOBIT | ||
552 | | daddiu RA, BASE, -8 // Results start at BASE-8. | ||
553 | | daddiu DISPATCH, DISPATCH, GG_G2DISP | ||
554 | | sd TMP1, 0(RA) // Prepend false to error message. | ||
555 | | st_vmstate | ||
556 | | b ->vm_returnc | ||
557 | |. li RD, 16 // 2 results: false + error message. | ||
558 | | | ||
559 | |->vm_unwind_stub: // Jump to exit stub from unwinder. | ||
560 | | jr CARG1 | ||
561 | |. move ra, CARG2 | ||
562 | | | ||
563 | |//----------------------------------------------------------------------- | ||
564 | |//-- Grow stack for calls ----------------------------------------------- | ||
565 | |//----------------------------------------------------------------------- | ||
566 | | | ||
567 | |->vm_growstack_c: // Grow stack for C function. | ||
568 | | b >2 | ||
569 | |. li CARG2, LUA_MINSTACK | ||
570 | | | ||
571 | |->vm_growstack_l: // Grow stack for Lua function. | ||
572 | | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC | ||
573 | | daddu RC, BASE, RC | ||
574 | | dsubu RA, RA, BASE | ||
575 | | sd BASE, L->base | ||
576 | | daddiu PC, PC, 4 // Must point after first instruction. | ||
577 | | sd RC, L->top | ||
578 | | srl CARG2, RA, 3 | ||
579 | |2: | ||
580 | | // L->base = new base, L->top = top | ||
581 | | load_got lj_state_growstack | ||
582 | | sd PC, SAVE_PC | ||
583 | | call_intern lj_state_growstack // (lua_State *L, int n) | ||
584 | |. move CARG1, L | ||
585 | | ld BASE, L->base | ||
586 | | ld RC, L->top | ||
587 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
588 | | dsubu RC, RC, BASE | ||
589 | | cleartp LFUNC:RB | ||
590 | | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC | ||
591 | | ins_callt // Just retry the call. | ||
592 | | | ||
593 | |//----------------------------------------------------------------------- | ||
594 | |//-- Entry points into the assembler VM --------------------------------- | ||
595 | |//----------------------------------------------------------------------- | ||
596 | | | ||
597 | |->vm_resume: // Setup C frame and resume thread. | ||
598 | | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) | ||
599 | | saveregs | ||
600 | | move L, CARG1 | ||
601 | | ld DISPATCH, L->glref // Setup pointer to dispatch table. | ||
602 | | move BASE, CARG2 | ||
603 | | lbu TMP1, L->status | ||
604 | | sd L, SAVE_L | ||
605 | | li PC, FRAME_CP | ||
606 | | daddiu TMP0, sp, CFRAME_RESUME | ||
607 | | daddiu DISPATCH, DISPATCH, GG_G2DISP | ||
608 | | sw r0, SAVE_NRES | ||
609 | | sw r0, SAVE_ERRF | ||
610 | | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. | ||
611 | | sd r0, SAVE_CFRAME | ||
612 | | beqz TMP1, >3 | ||
613 | |. sd TMP0, L->cframe | ||
614 | | | ||
615 | | // Resume after yield (like a return). | ||
616 | | sd L, DISPATCH_GL(cur_L)(DISPATCH) | ||
617 | | move RA, BASE | ||
618 | | ld BASE, L->base | ||
619 | | ld TMP1, L->top | ||
620 | | ld PC, FRAME_PC(BASE) | ||
621 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | ||
622 | | dsubu RD, TMP1, BASE | ||
623 | | .FPU mtc1 TMP3, TOBIT | ||
624 | | sb r0, L->status | ||
625 | | .FPU cvt.d.s TOBIT, TOBIT | ||
626 | | li_vmstate INTERP | ||
627 | | daddiu RD, RD, 8 | ||
628 | | st_vmstate | ||
629 | | move MULTRES, RD | ||
630 | | andi TMP0, PC, FRAME_TYPE | ||
631 | | li TISNIL, LJ_TNIL | ||
632 | | beqz TMP0, ->BC_RET_Z | ||
633 | |. li TISNUM, LJ_TISNUM | ||
634 | | b ->vm_return | ||
635 | |. nop | ||
636 | | | ||
637 | |->vm_pcall: // Setup protected C frame and enter VM. | ||
638 | | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) | ||
639 | | saveregs | ||
640 | | sw CARG4, SAVE_ERRF | ||
641 | | b >1 | ||
642 | |. li PC, FRAME_CP | ||
643 | | | ||
644 | |->vm_call: // Setup C frame and enter VM. | ||
645 | | // (lua_State *L, TValue *base, int nres1) | ||
646 | | saveregs | ||
647 | | li PC, FRAME_C | ||
648 | | | ||
649 | |1: // Entry point for vm_pcall above (PC = ftype). | ||
650 | | ld TMP1, L:CARG1->cframe | ||
651 | | move L, CARG1 | ||
652 | | sw CARG3, SAVE_NRES | ||
653 | | ld DISPATCH, L->glref // Setup pointer to dispatch table. | ||
654 | | sd CARG1, SAVE_L | ||
655 | | move BASE, CARG2 | ||
656 | | daddiu DISPATCH, DISPATCH, GG_G2DISP | ||
657 | | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. | ||
658 | | sd TMP1, SAVE_CFRAME | ||
659 | | sd sp, L->cframe // Add our C frame to cframe chain. | ||
660 | | | ||
661 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). | ||
662 | | sd L, DISPATCH_GL(cur_L)(DISPATCH) | ||
663 | | ld TMP2, L->base // TMP2 = old base (used in vmeta_call). | ||
664 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | ||
665 | | ld TMP1, L->top | ||
666 | | .FPU mtc1 TMP3, TOBIT | ||
667 | | daddu PC, PC, BASE | ||
668 | | dsubu NARGS8:RC, TMP1, BASE | ||
669 | | li TISNUM, LJ_TISNUM | ||
670 | | dsubu PC, PC, TMP2 // PC = frame delta + frame type | ||
671 | | .FPU cvt.d.s TOBIT, TOBIT | ||
672 | | li_vmstate INTERP | ||
673 | | li TISNIL, LJ_TNIL | ||
674 | | st_vmstate | ||
675 | | | ||
676 | |->vm_call_dispatch: | ||
677 | | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC | ||
678 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
679 | | checkfunc LFUNC:RB, ->vmeta_call | ||
680 | | | ||
681 | |->vm_call_dispatch_f: | ||
682 | | ins_call | ||
683 | | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC | ||
684 | | | ||
685 | |->vm_cpcall: // Setup protected C frame, call C. | ||
686 | | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) | ||
687 | | saveregs | ||
688 | | move L, CARG1 | ||
689 | | ld TMP0, L:CARG1->stack | ||
690 | | sd CARG1, SAVE_L | ||
691 | | ld TMP1, L->top | ||
692 | | ld DISPATCH, L->glref // Setup pointer to dispatch table. | ||
693 | | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. | ||
694 | | dsubu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). | ||
695 | | ld TMP1, L->cframe | ||
696 | | daddiu DISPATCH, DISPATCH, GG_G2DISP | ||
697 | | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. | ||
698 | | sw r0, SAVE_ERRF // No error function. | ||
699 | | sd TMP1, SAVE_CFRAME | ||
700 | | sd sp, L->cframe // Add our C frame to cframe chain. | ||
701 | | sd L, DISPATCH_GL(cur_L)(DISPATCH) | ||
702 | | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) | ||
703 | |. move CFUNCADDR, CARG4 | ||
704 | | move BASE, CRET1 | ||
705 | | bnez CRET1, <3 // Else continue with the call. | ||
706 | |. li PC, FRAME_CP | ||
707 | | b ->vm_leave_cp // No base? Just remove C frame. | ||
708 | |. nop | ||
709 | | | ||
710 | |//----------------------------------------------------------------------- | ||
711 | |//-- Metamethod handling ------------------------------------------------ | ||
712 | |//----------------------------------------------------------------------- | ||
713 | | | ||
714 | |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the | ||
715 | |// stack, so BASE doesn't need to be reloaded across these calls. | ||
716 | | | ||
717 | |//-- Continuation dispatch ---------------------------------------------- | ||
718 | | | ||
719 | |->cont_dispatch: | ||
720 | | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 | ||
721 | | ld TMP0, -32(BASE) // Continuation. | ||
722 | | move RB, BASE | ||
723 | | move BASE, TMP2 // Restore caller BASE. | ||
724 | | ld LFUNC:TMP1, FRAME_FUNC(TMP2) | ||
725 | |.if FFI | ||
726 | | sltiu AT, TMP0, 2 | ||
727 | |.endif | ||
728 | | ld PC, -24(RB) // Restore PC from [cont|PC]. | ||
729 | | cleartp LFUNC:TMP1 | ||
730 | | daddu TMP2, RA, RD | ||
731 | |.if FFI | ||
732 | | bnez AT, >1 | ||
733 | |.endif | ||
734 | |. sd TISNIL, -8(TMP2) // Ensure one valid arg. | ||
735 | | ld TMP1, LFUNC:TMP1->pc | ||
736 | | // BASE = base, RA = resultptr, RB = meta base | ||
737 | | jr TMP0 // Jump to continuation. | ||
738 | |. ld KBASE, PC2PROTO(k)(TMP1) | ||
739 | | | ||
740 | |.if FFI | ||
741 | |1: | ||
742 | | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. | ||
743 | | // cont = 0: tailcall from C function. | ||
744 | |. daddiu TMP1, RB, -32 | ||
745 | | b ->vm_call_tail | ||
746 | |. dsubu RC, TMP1, BASE | ||
747 | |.endif | ||
748 | | | ||
749 | |->cont_cat: // RA = resultptr, RB = meta base | ||
750 | | lw INS, -4(PC) | ||
751 | | daddiu CARG2, RB, -32 | ||
752 | | ld CRET1, 0(RA) | ||
753 | | decode_RB8a MULTRES, INS | ||
754 | | decode_RA8a RA, INS | ||
755 | | decode_RB8b MULTRES | ||
756 | | decode_RA8b RA | ||
757 | | daddu TMP1, BASE, MULTRES | ||
758 | | sd BASE, L->base | ||
759 | | dsubu CARG3, CARG2, TMP1 | ||
760 | | bne TMP1, CARG2, ->BC_CAT_Z | ||
761 | |. sd CRET1, 0(CARG2) | ||
762 | | daddu RA, BASE, RA | ||
763 | | b ->cont_nop | ||
764 | |. sd CRET1, 0(RA) | ||
765 | | | ||
766 | |//-- Table indexing metamethods ----------------------------------------- | ||
767 | | | ||
768 | |->vmeta_tgets1: | ||
769 | | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | ||
770 | | li TMP0, LJ_TSTR | ||
771 | | settp STR:RC, TMP0 | ||
772 | | b >1 | ||
773 | |. sd STR:RC, 0(CARG3) | ||
774 | | | ||
775 | |->vmeta_tgets: | ||
776 | | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) | ||
777 | | li TMP0, LJ_TTAB | ||
778 | | li TMP1, LJ_TSTR | ||
779 | | settp TAB:RB, TMP0 | ||
780 | | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) | ||
781 | | sd TAB:RB, 0(CARG2) | ||
782 | | settp STR:RC, TMP1 | ||
783 | | b >1 | ||
784 | |. sd STR:RC, 0(CARG3) | ||
785 | | | ||
786 | |->vmeta_tgetb: // TMP0 = index | ||
787 | | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | ||
788 | | settp TMP0, TISNUM | ||
789 | | sd TMP0, 0(CARG3) | ||
790 | | | ||
791 | |->vmeta_tgetv: | ||
792 | |1: | ||
793 | | load_got lj_meta_tget | ||
794 | | sd BASE, L->base | ||
795 | | sd PC, SAVE_PC | ||
796 | | call_intern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) | ||
797 | |. move CARG1, L | ||
798 | | // Returns TValue * (finished) or NULL (metamethod). | ||
799 | | beqz CRET1, >3 | ||
800 | |. daddiu TMP1, BASE, -FRAME_CONT | ||
801 | | ld CARG1, 0(CRET1) | ||
802 | | ins_next1 | ||
803 | | sd CARG1, 0(RA) | ||
804 | | ins_next2 | ||
805 | | | ||
806 | |3: // Call __index metamethod. | ||
807 | | // BASE = base, L->top = new base, stack = cont/func/t/k | ||
808 | | ld BASE, L->top | ||
809 | | sd PC, -24(BASE) // [cont|PC] | ||
810 | | dsubu PC, BASE, TMP1 | ||
811 | | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | ||
812 | | cleartp LFUNC:RB | ||
813 | | b ->vm_call_dispatch_f | ||
814 | |. li NARGS8:RC, 16 // 2 args for func(t, k). | ||
815 | | | ||
816 | |->vmeta_tgetr: | ||
817 | | load_got lj_tab_getinth | ||
818 | | call_intern lj_tab_getinth // (GCtab *t, int32_t key) | ||
819 | |. nop | ||
820 | | // Returns cTValue * or NULL. | ||
821 | | beqz CRET1, ->BC_TGETR_Z | ||
822 | |. move CARG2, TISNIL | ||
823 | | b ->BC_TGETR_Z | ||
824 | |. ld CARG2, 0(CRET1) | ||
825 | | | ||
826 | |//----------------------------------------------------------------------- | ||
827 | | | ||
828 | |->vmeta_tsets1: | ||
829 | | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | ||
830 | | li TMP0, LJ_TSTR | ||
831 | | settp STR:RC, TMP0 | ||
832 | | b >1 | ||
833 | |. sd STR:RC, 0(CARG3) | ||
834 | | | ||
835 | |->vmeta_tsets: | ||
836 | | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) | ||
837 | | li TMP0, LJ_TTAB | ||
838 | | li TMP1, LJ_TSTR | ||
839 | | settp TAB:RB, TMP0 | ||
840 | | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) | ||
841 | | sd TAB:RB, 0(CARG2) | ||
842 | | settp STR:RC, TMP1 | ||
843 | | b >1 | ||
844 | |. sd STR:RC, 0(CARG3) | ||
845 | | | ||
846 | |->vmeta_tsetb: // TMP0 = index | ||
847 | | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | ||
848 | | settp TMP0, TISNUM | ||
849 | | sd TMP0, 0(CARG3) | ||
850 | | | ||
851 | |->vmeta_tsetv: | ||
852 | |1: | ||
853 | | load_got lj_meta_tset | ||
854 | | sd BASE, L->base | ||
855 | | sd PC, SAVE_PC | ||
856 | | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) | ||
857 | |. move CARG1, L | ||
858 | | // Returns TValue * (finished) or NULL (metamethod). | ||
859 | | beqz CRET1, >3 | ||
860 | |. ld CARG1, 0(RA) | ||
861 | | // NOBARRIER: lj_meta_tset ensures the table is not black. | ||
862 | | ins_next1 | ||
863 | | sd CARG1, 0(CRET1) | ||
864 | | ins_next2 | ||
865 | | | ||
866 | |3: // Call __newindex metamethod. | ||
867 | | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) | ||
868 | | daddiu TMP1, BASE, -FRAME_CONT | ||
869 | | ld BASE, L->top | ||
870 | | sd PC, -24(BASE) // [cont|PC] | ||
871 | | dsubu PC, BASE, TMP1 | ||
872 | | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | ||
873 | | cleartp LFUNC:RB | ||
874 | | sd CARG1, 16(BASE) // Copy value to third argument. | ||
875 | | b ->vm_call_dispatch_f | ||
876 | |. li NARGS8:RC, 24 // 3 args for func(t, k, v) | ||
877 | | | ||
878 | |->vmeta_tsetr: | ||
879 | | load_got lj_tab_setinth | ||
880 | | sd BASE, L->base | ||
881 | | sd PC, SAVE_PC | ||
882 | | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | ||
883 | |. move CARG1, L | ||
884 | | // Returns TValue *. | ||
885 | | b ->BC_TSETR_Z | ||
886 | |. nop | ||
887 | | | ||
888 | |//-- Comparison metamethods --------------------------------------------- | ||
889 | | | ||
890 | |->vmeta_comp: | ||
891 | | // RA/RD point to o1/o2. | ||
892 | | move CARG2, RA | ||
893 | | move CARG3, RD | ||
894 | | load_got lj_meta_comp | ||
895 | | daddiu PC, PC, -4 | ||
896 | | sd BASE, L->base | ||
897 | | sd PC, SAVE_PC | ||
898 | | decode_OP1 CARG4, INS | ||
899 | | call_intern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) | ||
900 | |. move CARG1, L | ||
901 | | // Returns 0/1 or TValue * (metamethod). | ||
902 | |3: | ||
903 | | sltiu AT, CRET1, 2 | ||
904 | | beqz AT, ->vmeta_binop | ||
905 | | negu TMP2, CRET1 | ||
906 | |4: | ||
907 | | lhu RD, OFS_RD(PC) | ||
908 | | daddiu PC, PC, 4 | ||
909 | | lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) | ||
910 | | sll RD, RD, 2 | ||
911 | | addu RD, RD, TMP1 | ||
912 | | and RD, RD, TMP2 | ||
913 | | daddu PC, PC, RD | ||
914 | |->cont_nop: | ||
915 | | ins_next | ||
916 | | | ||
917 | |->cont_ra: // RA = resultptr | ||
918 | | lbu TMP1, -4+OFS_RA(PC) | ||
919 | | ld CRET1, 0(RA) | ||
920 | | sll TMP1, TMP1, 3 | ||
921 | | daddu TMP1, BASE, TMP1 | ||
922 | | b ->cont_nop | ||
923 | |. sd CRET1, 0(TMP1) | ||
924 | | | ||
925 | |->cont_condt: // RA = resultptr | ||
926 | | ld TMP0, 0(RA) | ||
927 | | gettp TMP0, TMP0 | ||
928 | | sltiu AT, TMP0, LJ_TISTRUECOND | ||
929 | | b <4 | ||
930 | |. negu TMP2, AT // Branch if result is true. | ||
931 | | | ||
932 | |->cont_condf: // RA = resultptr | ||
933 | | ld TMP0, 0(RA) | ||
934 | | gettp TMP0, TMP0 | ||
935 | | sltiu AT, TMP0, LJ_TISTRUECOND | ||
936 | | b <4 | ||
937 | |. addiu TMP2, AT, -1 // Branch if result is false. | ||
938 | | | ||
939 | |->vmeta_equal: | ||
940 | | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1. | ||
941 | | load_got lj_meta_equal | ||
942 | | cleartp LFUNC:CARG3, CARG2 | ||
943 | | cleartp LFUNC:CARG2, CARG1 | ||
944 | | move CARG4, TMP0 | ||
945 | | daddiu PC, PC, -4 | ||
946 | | sd BASE, L->base | ||
947 | | sd PC, SAVE_PC | ||
948 | | call_intern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) | ||
949 | |. move CARG1, L | ||
950 | | // Returns 0/1 or TValue * (metamethod). | ||
951 | | b <3 | ||
952 | |. nop | ||
953 | | | ||
954 | |->vmeta_equal_cd: | ||
955 | |.if FFI | ||
956 | | load_got lj_meta_equal_cd | ||
957 | | move CARG2, INS | ||
958 | | daddiu PC, PC, -4 | ||
959 | | sd BASE, L->base | ||
960 | | sd PC, SAVE_PC | ||
961 | | call_intern lj_meta_equal_cd // (lua_State *L, BCIns op) | ||
962 | |. move CARG1, L | ||
963 | | // Returns 0/1 or TValue * (metamethod). | ||
964 | | b <3 | ||
965 | |. nop | ||
966 | |.endif | ||
967 | | | ||
968 | |->vmeta_istype: | ||
969 | | load_got lj_meta_istype | ||
970 | | daddiu PC, PC, -4 | ||
971 | | sd BASE, L->base | ||
972 | | srl CARG2, RA, 3 | ||
973 | | srl CARG3, RD, 3 | ||
974 | | sd PC, SAVE_PC | ||
975 | | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) | ||
976 | |. move CARG1, L | ||
977 | | b ->cont_nop | ||
978 | |. nop | ||
979 | | | ||
980 | |//-- Arithmetic metamethods --------------------------------------------- | ||
981 | | | ||
982 | |->vmeta_unm: | ||
983 | | move RC, RB | ||
984 | | | ||
985 | |->vmeta_arith: | ||
986 | | load_got lj_meta_arith | ||
987 | | sd BASE, L->base | ||
988 | | move CARG2, RA | ||
989 | | sd PC, SAVE_PC | ||
990 | | move CARG3, RB | ||
991 | | move CARG4, RC | ||
992 | | decode_OP1 CARG5, INS // CARG5 == RB. | ||
993 | | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) | ||
994 | |. move CARG1, L | ||
995 | | // Returns NULL (finished) or TValue * (metamethod). | ||
996 | | beqz CRET1, ->cont_nop | ||
997 | |. nop | ||
998 | | | ||
999 | | // Call metamethod for binary op. | ||
1000 | |->vmeta_binop: | ||
1001 | | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 | ||
1002 | | dsubu TMP1, CRET1, BASE | ||
1003 | | sd PC, -24(CRET1) // [cont|PC] | ||
1004 | | move TMP2, BASE | ||
1005 | | daddiu PC, TMP1, FRAME_CONT | ||
1006 | | move BASE, CRET1 | ||
1007 | | b ->vm_call_dispatch | ||
1008 | |. li NARGS8:RC, 16 // 2 args for func(o1, o2). | ||
1009 | | | ||
1010 | |->vmeta_len: | ||
1011 | | // CARG2 already set by BC_LEN. | ||
1012 | #if LJ_52 | ||
1013 | | move MULTRES, CARG1 | ||
1014 | #endif | ||
1015 | | load_got lj_meta_len | ||
1016 | | sd BASE, L->base | ||
1017 | | sd PC, SAVE_PC | ||
1018 | | call_intern lj_meta_len // (lua_State *L, TValue *o) | ||
1019 | |. move CARG1, L | ||
1020 | | // Returns NULL (retry) or TValue * (metamethod base). | ||
1021 | #if LJ_52 | ||
1022 | | bnez CRET1, ->vmeta_binop // Binop call for compatibility. | ||
1023 | |. nop | ||
1024 | | b ->BC_LEN_Z | ||
1025 | |. move CARG1, MULTRES | ||
1026 | #else | ||
1027 | | b ->vmeta_binop // Binop call for compatibility. | ||
1028 | |. nop | ||
1029 | #endif | ||
1030 | | | ||
1031 | |//-- Call metamethod ---------------------------------------------------- | ||
1032 | | | ||
1033 | |->vmeta_call: // Resolve and call __call metamethod. | ||
1034 | | // TMP2 = old base, BASE = new base, RC = nargs*8 | ||
1035 | | load_got lj_meta_call | ||
1036 | | sd TMP2, L->base // This is the callers base! | ||
1037 | | daddiu CARG2, BASE, -16 | ||
1038 | | sd PC, SAVE_PC | ||
1039 | | daddu CARG3, BASE, RC | ||
1040 | | move MULTRES, NARGS8:RC | ||
1041 | | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) | ||
1042 | |. move CARG1, L | ||
1043 | | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | ||
1044 | | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now. | ||
1045 | | cleartp LFUNC:RB | ||
1046 | | ins_call | ||
1047 | | | ||
1048 | |->vmeta_callt: // Resolve __call for BC_CALLT. | ||
1049 | | // BASE = old base, RA = new base, RC = nargs*8 | ||
1050 | | load_got lj_meta_call | ||
1051 | | sd BASE, L->base | ||
1052 | | daddiu CARG2, RA, -16 | ||
1053 | | sd PC, SAVE_PC | ||
1054 | | daddu CARG3, RA, RC | ||
1055 | | move MULTRES, NARGS8:RC | ||
1056 | | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) | ||
1057 | |. move CARG1, L | ||
1058 | | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here. | ||
1059 | | ld TMP1, FRAME_PC(BASE) | ||
1060 | | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now. | ||
1061 | | b ->BC_CALLT_Z | ||
1062 | |. cleartp LFUNC:CARG3, RB | ||
1063 | | | ||
1064 | |//-- Argument coercion for 'for' statement ------------------------------ | ||
1065 | | | ||
1066 | |->vmeta_for: | ||
1067 | | load_got lj_meta_for | ||
1068 | | sd BASE, L->base | ||
1069 | | move CARG2, RA | ||
1070 | | sd PC, SAVE_PC | ||
1071 | | move MULTRES, INS | ||
1072 | | call_intern lj_meta_for // (lua_State *L, TValue *base) | ||
1073 | |. move CARG1, L | ||
1074 | |.if JIT | ||
1075 | | decode_OP1 TMP0, MULTRES | ||
1076 | | li AT, BC_JFORI | ||
1077 | |.endif | ||
1078 | | decode_RA8a RA, MULTRES | ||
1079 | | decode_RD8a RD, MULTRES | ||
1080 | | decode_RA8b RA | ||
1081 | |.if JIT | ||
1082 | | beq TMP0, AT, =>BC_JFORI | ||
1083 | |. decode_RD8b RD | ||
1084 | | b =>BC_FORI | ||
1085 | |. nop | ||
1086 | |.else | ||
1087 | | b =>BC_FORI | ||
1088 | |. decode_RD8b RD | ||
1089 | |.endif | ||
1090 | | | ||
1091 | |//----------------------------------------------------------------------- | ||
1092 | |//-- Fast functions ----------------------------------------------------- | ||
1093 | |//----------------------------------------------------------------------- | ||
1094 | | | ||
1095 | |.macro .ffunc, name | ||
1096 | |->ff_ .. name: | ||
1097 | |.endmacro | ||
1098 | | | ||
1099 | |.macro .ffunc_1, name | ||
1100 | |->ff_ .. name: | ||
1101 | | beqz NARGS8:RC, ->fff_fallback | ||
1102 | |. ld CARG1, 0(BASE) | ||
1103 | |.endmacro | ||
1104 | | | ||
1105 | |.macro .ffunc_2, name | ||
1106 | |->ff_ .. name: | ||
1107 | | sltiu AT, NARGS8:RC, 16 | ||
1108 | | ld CARG1, 0(BASE) | ||
1109 | | bnez AT, ->fff_fallback | ||
1110 | |. ld CARG2, 8(BASE) | ||
1111 | |.endmacro | ||
1112 | | | ||
1113 | |.macro .ffunc_n, name // Caveat: has delay slot! | ||
1114 | |->ff_ .. name: | ||
1115 | | ld CARG1, 0(BASE) | ||
1116 | | beqz NARGS8:RC, ->fff_fallback | ||
1117 | | // Either ldc1 or the 1st instruction of checknum is in the delay slot. | ||
1118 | | .FPU ldc1 FARG1, 0(BASE) | ||
1119 | | checknum CARG1, ->fff_fallback | ||
1120 | |.endmacro | ||
1121 | | | ||
1122 | |.macro .ffunc_nn, name // Caveat: has delay slot! | ||
1123 | |->ff_ .. name: | ||
1124 | | ld CARG1, 0(BASE) | ||
1125 | | sltiu AT, NARGS8:RC, 16 | ||
1126 | | ld CARG2, 8(BASE) | ||
1127 | | bnez AT, ->fff_fallback | ||
1128 | |. gettp TMP0, CARG1 | ||
1129 | | gettp TMP1, CARG2 | ||
1130 | | sltiu TMP0, TMP0, LJ_TISNUM | ||
1131 | | sltiu TMP1, TMP1, LJ_TISNUM | ||
1132 | | .FPU ldc1 FARG1, 0(BASE) | ||
1133 | | and TMP0, TMP0, TMP1 | ||
1134 | | .FPU ldc1 FARG2, 8(BASE) | ||
1135 | | beqz TMP0, ->fff_fallback | ||
1136 | |.endmacro | ||
1137 | | | ||
1138 | |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot! | ||
1139 | |// MIPSR6: no delay slot, but a forbidden slot. | ||
1140 | |.macro ffgccheck | ||
1141 | | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) | ||
1142 | | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) | ||
1143 | | dsubu AT, TMP0, TMP1 | ||
1144 | |.if MIPSR6 | ||
1145 | | bgezalc AT, ->fff_gcstep | ||
1146 | |.else | ||
1147 | | bgezal AT, ->fff_gcstep | ||
1148 | |.endif | ||
1149 | |.endmacro | ||
1150 | | | ||
1151 | |//-- Base library: checks ----------------------------------------------- | ||
1152 | |.ffunc_1 assert | ||
1153 | | gettp AT, CARG1 | ||
1154 | | sltiu AT, AT, LJ_TISTRUECOND | ||
1155 | | beqz AT, ->fff_fallback | ||
1156 | |. daddiu RA, BASE, -16 | ||
1157 | | ld PC, FRAME_PC(BASE) | ||
1158 | | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. | ||
1159 | | daddu TMP2, RA, RD | ||
1160 | | daddiu TMP1, BASE, 8 | ||
1161 | | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. | ||
1162 | |. sd CARG1, 0(RA) | ||
1163 | |1: | ||
1164 | | ld CRET1, 0(TMP1) | ||
1165 | | sd CRET1, -16(TMP1) | ||
1166 | | bne TMP1, TMP2, <1 | ||
1167 | |. daddiu TMP1, TMP1, 8 | ||
1168 | | b ->fff_res | ||
1169 | |. nop | ||
1170 | | | ||
1171 | |.ffunc_1 type | ||
1172 | | gettp TMP0, CARG1 | ||
1173 | | sltu TMP1, TISNUM, TMP0 | ||
1174 | | not TMP2, TMP0 | ||
1175 | | li TMP3, ~LJ_TISNUM | ||
1176 | |.if MIPSR6 | ||
1177 | | selnez TMP2, TMP2, TMP1 | ||
1178 | | seleqz TMP3, TMP3, TMP1 | ||
1179 | | or TMP2, TMP2, TMP3 | ||
1180 | |.else | ||
1181 | | movz TMP2, TMP3, TMP1 | ||
1182 | |.endif | ||
1183 | | dsll TMP2, TMP2, 3 | ||
1184 | | daddu TMP2, CFUNC:RB, TMP2 | ||
1185 | | b ->fff_restv | ||
1186 | |. ld CARG1, CFUNC:TMP2->upvalue | ||
1187 | | | ||
1188 | |//-- Base library: getters and setters --------------------------------- | ||
1189 | | | ||
1190 | |.ffunc_1 getmetatable | ||
1191 | | gettp TMP2, CARG1 | ||
1192 | | daddiu TMP0, TMP2, -LJ_TTAB | ||
1193 | | daddiu TMP1, TMP2, -LJ_TUDATA | ||
1194 | |.if MIPSR6 | ||
1195 | | selnez TMP0, TMP1, TMP0 | ||
1196 | |.else | ||
1197 | | movn TMP0, TMP1, TMP0 | ||
1198 | |.endif | ||
1199 | | bnez TMP0, >6 | ||
1200 | |. cleartp TAB:CARG1 | ||
1201 | |1: // Field metatable must be at same offset for GCtab and GCudata! | ||
1202 | | ld TAB:RB, TAB:CARG1->metatable | ||
1203 | |2: | ||
1204 | | ld STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) | ||
1205 | | beqz TAB:RB, ->fff_restv | ||
1206 | |. li CARG1, LJ_TNIL | ||
1207 | | lw TMP0, TAB:RB->hmask | ||
1208 | | lw TMP1, STR:RC->sid | ||
1209 | | ld NODE:TMP2, TAB:RB->node | ||
1210 | | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | ||
1211 | | dsll TMP0, TMP1, 5 | ||
1212 | | dsll TMP1, TMP1, 3 | ||
1213 | | dsubu TMP1, TMP0, TMP1 | ||
1214 | | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) | ||
1215 | | li CARG4, LJ_TSTR | ||
1216 | | settp STR:RC, CARG4 // Tagged key to look for. | ||
1217 | |3: // Rearranged logic, because we expect _not_ to find the key. | ||
1218 | | ld TMP0, NODE:TMP2->key | ||
1219 | | ld CARG1, NODE:TMP2->val | ||
1220 | | ld NODE:TMP2, NODE:TMP2->next | ||
1221 | | beq RC, TMP0, >5 | ||
1222 | |. li AT, LJ_TTAB | ||
1223 | | bnez NODE:TMP2, <3 | ||
1224 | |. nop | ||
1225 | |4: | ||
1226 | | move CARG1, RB | ||
1227 | | b ->fff_restv // Not found, keep default result. | ||
1228 | |. settp CARG1, AT | ||
1229 | |5: | ||
1230 | | bne CARG1, TISNIL, ->fff_restv | ||
1231 | |. nop | ||
1232 | | b <4 // Ditto for nil value. | ||
1233 | |. nop | ||
1234 | | | ||
1235 | |6: | ||
1236 | | sltiu AT, TMP2, LJ_TISNUM | ||
1237 | |.if MIPSR6 | ||
1238 | | selnez TMP0, TISNUM, AT | ||
1239 | | seleqz AT, TMP2, AT | ||
1240 | | or TMP2, TMP0, AT | ||
1241 | |.else | ||
1242 | | movn TMP2, TISNUM, AT | ||
1243 | |.endif | ||
1244 | | dsll TMP2, TMP2, 3 | ||
1245 | | dsubu TMP0, DISPATCH, TMP2 | ||
1246 | | b <2 | ||
1247 | |. ld TAB:RB, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8(TMP0) | ||
1248 | | | ||
1249 | |.ffunc_2 setmetatable | ||
1250 | | // Fast path: no mt for table yet and not clearing the mt. | ||
1251 | | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback | ||
1252 | | gettp TMP3, CARG2 | ||
1253 | | ld TAB:TMP0, TAB:TMP1->metatable | ||
1254 | | lbu TMP2, TAB:TMP1->marked | ||
1255 | | daddiu AT, TMP3, -LJ_TTAB | ||
1256 | | cleartp TAB:CARG2 | ||
1257 | | or AT, AT, TAB:TMP0 | ||
1258 | | bnez AT, ->fff_fallback | ||
1259 | |. andi AT, TMP2, LJ_GC_BLACK // isblack(table) | ||
1260 | | beqz AT, ->fff_restv | ||
1261 | |. sd TAB:CARG2, TAB:TMP1->metatable | ||
1262 | | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv | ||
1263 | | | ||
1264 | |.ffunc rawget | ||
1265 | | ld CARG2, 0(BASE) | ||
1266 | | sltiu AT, NARGS8:RC, 16 | ||
1267 | | load_got lj_tab_get | ||
1268 | | gettp TMP0, CARG2 | ||
1269 | | cleartp CARG2 | ||
1270 | | daddiu TMP0, TMP0, -LJ_TTAB | ||
1271 | | or AT, AT, TMP0 | ||
1272 | | bnez AT, ->fff_fallback | ||
1273 | |. daddiu CARG3, BASE, 8 | ||
1274 | | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | ||
1275 | |. move CARG1, L | ||
1276 | | b ->fff_restv | ||
1277 | |. ld CARG1, 0(CRET1) | ||
1278 | | | ||
1279 | |//-- Base library: conversions ------------------------------------------ | ||
1280 | | | ||
1281 | |.ffunc tonumber | ||
1282 | | // Only handles the number case inline (without a base argument). | ||
1283 | | ld CARG1, 0(BASE) | ||
1284 | | xori AT, NARGS8:RC, 8 // Exactly one number argument. | ||
1285 | | gettp TMP1, CARG1 | ||
1286 | | sltu TMP0, TISNUM, TMP1 | ||
1287 | | or AT, AT, TMP0 | ||
1288 | | bnez AT, ->fff_fallback | ||
1289 | |. nop | ||
1290 | | b ->fff_restv | ||
1291 | |. nop | ||
1292 | | | ||
1293 | |.ffunc_1 tostring | ||
1294 | | // Only handles the string or number case inline. | ||
1295 | | gettp TMP0, CARG1 | ||
1296 | | daddiu AT, TMP0, -LJ_TSTR | ||
1297 | | // A __tostring method in the string base metatable is ignored. | ||
1298 | | beqz AT, ->fff_restv // String key? | ||
1299 | | // Handle numbers inline, unless a number base metatable is present. | ||
1300 | |. ld TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) | ||
1301 | | sltu TMP0, TISNUM, TMP0 | ||
1302 | | or TMP0, TMP0, TMP1 | ||
1303 | | bnez TMP0, ->fff_fallback | ||
1304 | |. sd BASE, L->base // Add frame since C call can throw. | ||
1305 | |.if MIPSR6 | ||
1306 | | sd PC, SAVE_PC // Redundant (but a defined value). | ||
1307 | | ffgccheck | ||
1308 | |.else | ||
1309 | | ffgccheck | ||
1310 | |. sd PC, SAVE_PC // Redundant (but a defined value). | ||
1311 | |.endif | ||
1312 | | load_got lj_strfmt_number | ||
1313 | | move CARG1, L | ||
1314 | | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) | ||
1315 | |. move CARG2, BASE | ||
1316 | | // Returns GCstr *. | ||
1317 | | li AT, LJ_TSTR | ||
1318 | | settp CRET1, AT | ||
1319 | | b ->fff_restv | ||
1320 | |. move CARG1, CRET1 | ||
1321 | | | ||
1322 | |//-- Base library: iterators ------------------------------------------- | ||
1323 | | | ||
1324 | |.ffunc_1 next | ||
1325 | | checktp CARG1, -LJ_TTAB, ->fff_fallback | ||
1326 | | daddu TMP2, BASE, NARGS8:RC | ||
1327 | | sd TISNIL, 0(TMP2) // Set missing 2nd arg to nil. | ||
1328 | | load_got lj_tab_next | ||
1329 | | ld PC, FRAME_PC(BASE) | ||
1330 | | daddiu CARG2, BASE, 8 | ||
1331 | | call_intern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) | ||
1332 | |. daddiu CARG3, BASE, -16 | ||
1333 | | // Returns 1=found, 0=end, -1=error. | ||
1334 | | daddiu RA, BASE, -16 | ||
1335 | | bgtz CRET1, ->fff_res // Found key/value. | ||
1336 | |. li RD, (2+1)*8 | ||
1337 | | beqz CRET1, ->fff_restv // End of traversal: return nil. | ||
1338 | |. move CARG1, TISNIL | ||
1339 | | ld CFUNC:RB, FRAME_FUNC(BASE) | ||
1340 | | cleartp CFUNC:RB | ||
1341 | | b ->fff_fallback // Invalid key. | ||
1342 | |. li RC, 2*8 | ||
1343 | | | ||
1344 | |.ffunc_1 pairs | ||
1345 | | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback | ||
1346 | | ld PC, FRAME_PC(BASE) | ||
1347 | #if LJ_52 | ||
1348 | | ld TAB:TMP2, TAB:TMP1->metatable | ||
1349 | | ld TMP0, CFUNC:RB->upvalue[0] | ||
1350 | | bnez TAB:TMP2, ->fff_fallback | ||
1351 | #else | ||
1352 | | ld TMP0, CFUNC:RB->upvalue[0] | ||
1353 | #endif | ||
1354 | |. daddiu RA, BASE, -16 | ||
1355 | | sd TISNIL, 0(BASE) | ||
1356 | | sd CARG1, -8(BASE) | ||
1357 | | sd TMP0, 0(RA) | ||
1358 | | b ->fff_res | ||
1359 | |. li RD, (3+1)*8 | ||
1360 | | | ||
1361 | |.ffunc_2 ipairs_aux | ||
1362 | | checktab CARG1, ->fff_fallback | ||
1363 | | checkint CARG2, ->fff_fallback | ||
1364 | |. lw TMP0, TAB:CARG1->asize | ||
1365 | | ld TMP1, TAB:CARG1->array | ||
1366 | | ld PC, FRAME_PC(BASE) | ||
1367 | | sextw TMP2, CARG2 | ||
1368 | | addiu TMP2, TMP2, 1 | ||
1369 | | sltu AT, TMP2, TMP0 | ||
1370 | | daddiu RA, BASE, -16 | ||
1371 | | zextw TMP0, TMP2 | ||
1372 | | settp TMP0, TISNUM | ||
1373 | | beqz AT, >2 // Not in array part? | ||
1374 | |. sd TMP0, 0(RA) | ||
1375 | | dsll TMP3, TMP2, 3 | ||
1376 | | daddu TMP3, TMP1, TMP3 | ||
1377 | | ld TMP1, 0(TMP3) | ||
1378 | |1: | ||
1379 | | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. | ||
1380 | |. li RD, (0+1)*8 | ||
1381 | | sd TMP1, -8(BASE) | ||
1382 | | b ->fff_res | ||
1383 | |. li RD, (2+1)*8 | ||
1384 | |2: // Check for empty hash part first. Otherwise call C function. | ||
1385 | | lw TMP0, TAB:CARG1->hmask | ||
1386 | | load_got lj_tab_getinth | ||
1387 | | beqz TMP0, ->fff_res | ||
1388 | |. li RD, (0+1)*8 | ||
1389 | | call_intern lj_tab_getinth // (GCtab *t, int32_t key) | ||
1390 | |. move CARG2, TMP2 | ||
1391 | | // Returns cTValue * or NULL. | ||
1392 | | beqz CRET1, ->fff_res | ||
1393 | |. li RD, (0+1)*8 | ||
1394 | | b <1 | ||
1395 | |. ld TMP1, 0(CRET1) | ||
1396 | | | ||
1397 | |.ffunc_1 ipairs | ||
1398 | | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback | ||
1399 | | ld PC, FRAME_PC(BASE) | ||
1400 | #if LJ_52 | ||
1401 | | ld TAB:TMP2, TAB:TMP1->metatable | ||
1402 | | ld CFUNC:TMP0, CFUNC:RB->upvalue[0] | ||
1403 | | bnez TAB:TMP2, ->fff_fallback | ||
1404 | #else | ||
1405 | | ld TMP0, CFUNC:RB->upvalue[0] | ||
1406 | #endif | ||
1407 | | daddiu RA, BASE, -16 | ||
1408 | | dsll AT, TISNUM, 47 | ||
1409 | | sd CARG1, -8(BASE) | ||
1410 | | sd AT, 0(BASE) | ||
1411 | | sd CFUNC:TMP0, 0(RA) | ||
1412 | | b ->fff_res | ||
1413 | |. li RD, (3+1)*8 | ||
1414 | | | ||
1415 | |//-- Base library: catch errors ---------------------------------------- | ||
1416 | | | ||
1417 | |.ffunc pcall | ||
1418 | | daddiu NARGS8:RC, NARGS8:RC, -8 | ||
1419 | | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) | ||
1420 | | bltz NARGS8:RC, ->fff_fallback | ||
1421 | |. move TMP2, BASE | ||
1422 | | daddiu BASE, BASE, 16 | ||
1423 | | // Remember active hook before pcall. | ||
1424 | | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT | ||
1425 | | andi TMP3, TMP3, 1 | ||
1426 | | daddiu PC, TMP3, 16+FRAME_PCALL | ||
1427 | | beqz NARGS8:RC, ->vm_call_dispatch | ||
1428 | |1: | ||
1429 | |. daddu TMP0, BASE, NARGS8:RC | ||
1430 | |2: | ||
1431 | | ld TMP1, -16(TMP0) | ||
1432 | | sd TMP1, -8(TMP0) | ||
1433 | | daddiu TMP0, TMP0, -8 | ||
1434 | | bne TMP0, BASE, <2 | ||
1435 | |. nop | ||
1436 | | b ->vm_call_dispatch | ||
1437 | |. nop | ||
1438 | | | ||
1439 | |.ffunc xpcall | ||
1440 | | daddiu NARGS8:TMP0, NARGS8:RC, -16 | ||
1441 | | ld CARG1, 0(BASE) | ||
1442 | | ld CARG2, 8(BASE) | ||
1443 | | bltz NARGS8:TMP0, ->fff_fallback | ||
1444 | |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) | ||
1445 | | gettp AT, CARG2 | ||
1446 | | daddiu AT, AT, -LJ_TFUNC | ||
1447 | | bnez AT, ->fff_fallback // Traceback must be a function. | ||
1448 | |. move TMP2, BASE | ||
1449 | | move NARGS8:RC, NARGS8:TMP0 | ||
1450 | | daddiu BASE, BASE, 24 | ||
1451 | | // Remember active hook before pcall. | ||
1452 | | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT | ||
1453 | | sd CARG2, 0(TMP2) // Swap function and traceback. | ||
1454 | | andi TMP3, TMP3, 1 | ||
1455 | | sd CARG1, 8(TMP2) | ||
1456 | | beqz NARGS8:RC, ->vm_call_dispatch | ||
1457 | |. daddiu PC, TMP3, 24+FRAME_PCALL | ||
1458 | | b <1 | ||
1459 | |. nop | ||
1460 | | | ||
1461 | |//-- Coroutine library -------------------------------------------------- | ||
1462 | | | ||
1463 | |.macro coroutine_resume_wrap, resume | ||
1464 | |.if resume | ||
1465 | |.ffunc_1 coroutine_resume | ||
1466 | | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback | ||
1467 | |.else | ||
1468 | |.ffunc coroutine_wrap_aux | ||
1469 | | ld L:CARG1, CFUNC:RB->upvalue[0].gcr | ||
1470 | | cleartp L:CARG1 | ||
1471 | |.endif | ||
1472 | | lbu TMP0, L:CARG1->status | ||
1473 | | ld TMP1, L:CARG1->cframe | ||
1474 | | ld CARG2, L:CARG1->top | ||
1475 | | ld TMP2, L:CARG1->base | ||
1476 | | addiu AT, TMP0, -LUA_YIELD | ||
1477 | | daddu CARG3, CARG2, TMP0 | ||
1478 | | daddiu TMP3, CARG2, 8 | ||
1479 | |.if MIPSR6 | ||
1480 | | seleqz CARG2, CARG2, AT | ||
1481 | | selnez TMP3, TMP3, AT | ||
1482 | | bgtz AT, ->fff_fallback // st > LUA_YIELD? | ||
1483 | |. or CARG2, TMP3, CARG2 | ||
1484 | |.else | ||
1485 | | bgtz AT, ->fff_fallback // st > LUA_YIELD? | ||
1486 | |. movn CARG2, TMP3, AT | ||
1487 | |.endif | ||
1488 | | xor TMP2, TMP2, CARG3 | ||
1489 | | bnez TMP1, ->fff_fallback // cframe != 0? | ||
1490 | |. or AT, TMP2, TMP0 | ||
1491 | | ld TMP0, L:CARG1->maxstack | ||
1492 | | beqz AT, ->fff_fallback // base == top && st == 0? | ||
1493 | |. ld PC, FRAME_PC(BASE) | ||
1494 | | daddu TMP2, CARG2, NARGS8:RC | ||
1495 | | sltu AT, TMP0, TMP2 | ||
1496 | | bnez AT, ->fff_fallback // Stack overflow? | ||
1497 | |. sd PC, SAVE_PC | ||
1498 | | sd BASE, L->base | ||
1499 | |1: | ||
1500 | |.if resume | ||
1501 | | daddiu BASE, BASE, 8 // Keep resumed thread in stack for GC. | ||
1502 | | daddiu NARGS8:RC, NARGS8:RC, -8 | ||
1503 | | daddiu TMP2, TMP2, -8 | ||
1504 | |.endif | ||
1505 | | sd TMP2, L:CARG1->top | ||
1506 | | daddu TMP1, BASE, NARGS8:RC | ||
1507 | | move CARG3, CARG2 | ||
1508 | | sd BASE, L->top | ||
1509 | |2: // Move args to coroutine. | ||
1510 | | ld CRET1, 0(BASE) | ||
1511 | | sltu AT, BASE, TMP1 | ||
1512 | | beqz AT, >3 | ||
1513 | |. daddiu BASE, BASE, 8 | ||
1514 | | sd CRET1, 0(CARG3) | ||
1515 | | b <2 | ||
1516 | |. daddiu CARG3, CARG3, 8 | ||
1517 | |3: | ||
1518 | | bal ->vm_resume // (lua_State *L, TValue *base, 0, 0) | ||
1519 | |. move L:RA, L:CARG1 | ||
1520 | | // Returns thread status. | ||
1521 | |4: | ||
1522 | | ld TMP2, L:RA->base | ||
1523 | | sltiu AT, CRET1, LUA_YIELD+1 | ||
1524 | | ld TMP3, L:RA->top | ||
1525 | | li_vmstate INTERP | ||
1526 | | ld BASE, L->base | ||
1527 | | sd L, DISPATCH_GL(cur_L)(DISPATCH) | ||
1528 | | st_vmstate | ||
1529 | | beqz AT, >8 | ||
1530 | |. dsubu RD, TMP3, TMP2 | ||
1531 | | ld TMP0, L->maxstack | ||
1532 | | beqz RD, >6 // No results? | ||
1533 | |. daddu TMP1, BASE, RD | ||
1534 | | sltu AT, TMP0, TMP1 | ||
1535 | | bnez AT, >9 // Need to grow stack? | ||
1536 | |. daddu TMP3, TMP2, RD | ||
1537 | | sd TMP2, L:RA->top // Clear coroutine stack. | ||
1538 | | move TMP1, BASE | ||
1539 | |5: // Move results from coroutine. | ||
1540 | | ld CRET1, 0(TMP2) | ||
1541 | | daddiu TMP2, TMP2, 8 | ||
1542 | | sltu AT, TMP2, TMP3 | ||
1543 | | sd CRET1, 0(TMP1) | ||
1544 | | bnez AT, <5 | ||
1545 | |. daddiu TMP1, TMP1, 8 | ||
1546 | |6: | ||
1547 | | andi TMP0, PC, FRAME_TYPE | ||
1548 | |.if resume | ||
1549 | | mov_true TMP1 | ||
1550 | | daddiu RA, BASE, -8 | ||
1551 | | sd TMP1, -8(BASE) // Prepend true to results. | ||
1552 | | daddiu RD, RD, 16 | ||
1553 | |.else | ||
1554 | | move RA, BASE | ||
1555 | | daddiu RD, RD, 8 | ||
1556 | |.endif | ||
1557 | |7: | ||
1558 | | sd PC, SAVE_PC | ||
1559 | | beqz TMP0, ->BC_RET_Z | ||
1560 | |. move MULTRES, RD | ||
1561 | | b ->vm_return | ||
1562 | |. nop | ||
1563 | | | ||
1564 | |8: // Coroutine returned with error (at co->top-1). | ||
1565 | |.if resume | ||
1566 | | daddiu TMP3, TMP3, -8 | ||
1567 | | mov_false TMP1 | ||
1568 | | ld CRET1, 0(TMP3) | ||
1569 | | sd TMP3, L:RA->top // Remove error from coroutine stack. | ||
1570 | | li RD, (2+1)*8 | ||
1571 | | sd TMP1, -8(BASE) // Prepend false to results. | ||
1572 | | daddiu RA, BASE, -8 | ||
1573 | | sd CRET1, 0(BASE) // Copy error message. | ||
1574 | | b <7 | ||
1575 | |. andi TMP0, PC, FRAME_TYPE | ||
1576 | |.else | ||
1577 | | load_got lj_ffh_coroutine_wrap_err | ||
1578 | | move CARG2, L:RA | ||
1579 | | call_intern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) | ||
1580 | |. move CARG1, L | ||
1581 | |.endif | ||
1582 | | | ||
1583 | |9: // Handle stack expansion on return from yield. | ||
1584 | | load_got lj_state_growstack | ||
1585 | | srl CARG2, RD, 3 | ||
1586 | | call_intern lj_state_growstack // (lua_State *L, int n) | ||
1587 | |. move CARG1, L | ||
1588 | | b <4 | ||
1589 | |. li CRET1, 0 | ||
1590 | |.endmacro | ||
1591 | | | ||
1592 | | coroutine_resume_wrap 1 // coroutine.resume | ||
1593 | | coroutine_resume_wrap 0 // coroutine.wrap | ||
1594 | | | ||
1595 | |.ffunc coroutine_yield | ||
1596 | | ld TMP0, L->cframe | ||
1597 | | daddu TMP1, BASE, NARGS8:RC | ||
1598 | | sd BASE, L->base | ||
1599 | | andi TMP0, TMP0, CFRAME_RESUME | ||
1600 | | sd TMP1, L->top | ||
1601 | | beqz TMP0, ->fff_fallback | ||
1602 | |. li CRET1, LUA_YIELD | ||
1603 | | sd r0, L->cframe | ||
1604 | | b ->vm_leave_unw | ||
1605 | |. sb CRET1, L->status | ||
1606 | | | ||
1607 | |//-- Math library ------------------------------------------------------- | ||
1608 | | | ||
1609 | |.ffunc_1 math_abs | ||
1610 | | gettp CARG2, CARG1 | ||
1611 | | daddiu AT, CARG2, -LJ_TISNUM | ||
1612 | | bnez AT, >1 | ||
1613 | |. sextw TMP1, CARG1 | ||
1614 | | sra TMP0, TMP1, 31 // Extract sign. | ||
1615 | | xor TMP1, TMP1, TMP0 | ||
1616 | | dsubu CARG1, TMP1, TMP0 | ||
1617 | | dsll TMP3, CARG1, 32 | ||
1618 | | bgez TMP3, ->fff_restv | ||
1619 | |. settp CARG1, TISNUM | ||
1620 | | li CARG1, 0x41e0 // 2^31 as a double. | ||
1621 | | b ->fff_restv | ||
1622 | |. dsll CARG1, CARG1, 48 | ||
1623 | |1: | ||
1624 | | sltiu AT, CARG2, LJ_TISNUM | ||
1625 | | beqz AT, ->fff_fallback | ||
1626 | |. dextm CARG1, CARG1, 0, 30 | ||
1627 | |// fallthrough | ||
1628 | | | ||
1629 | |->fff_restv: | ||
1630 | | // CARG1 = TValue result. | ||
1631 | | ld PC, FRAME_PC(BASE) | ||
1632 | | daddiu RA, BASE, -16 | ||
1633 | | sd CARG1, -16(BASE) | ||
1634 | |->fff_res1: | ||
1635 | | // RA = results, PC = return. | ||
1636 | | li RD, (1+1)*8 | ||
1637 | |->fff_res: | ||
1638 | | // RA = results, RD = (nresults+1)*8, PC = return. | ||
1639 | | andi TMP0, PC, FRAME_TYPE | ||
1640 | | bnez TMP0, ->vm_return | ||
1641 | |. move MULTRES, RD | ||
1642 | | lw INS, -4(PC) | ||
1643 | | decode_RB8a RB, INS | ||
1644 | | decode_RB8b RB | ||
1645 | |5: | ||
1646 | | sltu AT, RD, RB | ||
1647 | | bnez AT, >6 // More results expected? | ||
1648 | |. decode_RA8a TMP0, INS | ||
1649 | | decode_RA8b TMP0 | ||
1650 | | ins_next1 | ||
1651 | | // Adjust BASE. KBASE is assumed to be set for the calling frame. | ||
1652 | | dsubu BASE, RA, TMP0 | ||
1653 | | ins_next2 | ||
1654 | | | ||
1655 | |6: // Fill up results with nil. | ||
1656 | | daddu TMP1, RA, RD | ||
1657 | | daddiu RD, RD, 8 | ||
1658 | | b <5 | ||
1659 | |. sd TISNIL, -8(TMP1) | ||
1660 | | | ||
1661 | |.macro math_extern, func | ||
1662 | | .ffunc_n math_ .. func | ||
1663 | | load_got func | ||
1664 | | call_extern | ||
1665 | |. nop | ||
1666 | | b ->fff_resn | ||
1667 | |. nop | ||
1668 | |.endmacro | ||
1669 | | | ||
1670 | |.macro math_extern2, func | ||
1671 | | .ffunc_nn math_ .. func | ||
1672 | |. load_got func | ||
1673 | | call_extern | ||
1674 | |. nop | ||
1675 | | b ->fff_resn | ||
1676 | |. nop | ||
1677 | |.endmacro | ||
1678 | | | ||
1679 | |// TODO: Return integer type if result is integer (own sf implementation). | ||
1680 | |.macro math_round, func | ||
1681 | |->ff_math_ .. func: | ||
1682 | | ld CARG1, 0(BASE) | ||
1683 | | beqz NARGS8:RC, ->fff_fallback | ||
1684 | |. gettp TMP0, CARG1 | ||
1685 | | beq TMP0, TISNUM, ->fff_restv | ||
1686 | |. sltu AT, TMP0, TISNUM | ||
1687 | | beqz AT, ->fff_fallback | ||
1688 | |.if FPU | ||
1689 | |. ldc1 FARG1, 0(BASE) | ||
1690 | | bal ->vm_ .. func | ||
1691 | |. nop | ||
1692 | |.else | ||
1693 | |. load_got func | ||
1694 | | call_extern | ||
1695 | |. nop | ||
1696 | |.endif | ||
1697 | | b ->fff_resn | ||
1698 | |. nop | ||
1699 | |.endmacro | ||
1700 | | | ||
1701 | | math_round floor | ||
1702 | | math_round ceil | ||
1703 | | | ||
1704 | |.ffunc math_log | ||
1705 | | li AT, 8 | ||
1706 | | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. | ||
1707 | |. ld CARG1, 0(BASE) | ||
1708 | | checknum CARG1, ->fff_fallback | ||
1709 | |. load_got log | ||
1710 | |.if FPU | ||
1711 | | call_extern | ||
1712 | |. ldc1 FARG1, 0(BASE) | ||
1713 | |.else | ||
1714 | | call_extern | ||
1715 | |. nop | ||
1716 | |.endif | ||
1717 | | b ->fff_resn | ||
1718 | |. nop | ||
1719 | | | ||
1720 | | math_extern log10 | ||
1721 | | math_extern exp | ||
1722 | | math_extern sin | ||
1723 | | math_extern cos | ||
1724 | | math_extern tan | ||
1725 | | math_extern asin | ||
1726 | | math_extern acos | ||
1727 | | math_extern atan | ||
1728 | | math_extern sinh | ||
1729 | | math_extern cosh | ||
1730 | | math_extern tanh | ||
1731 | | math_extern2 pow | ||
1732 | | math_extern2 atan2 | ||
1733 | | math_extern2 fmod | ||
1734 | | | ||
1735 | |.if FPU | ||
1736 | |.ffunc_n math_sqrt | ||
1737 | |. sqrt.d FRET1, FARG1 | ||
1738 | |// fallthrough to ->fff_resn | ||
1739 | |.else | ||
1740 | | math_extern sqrt | ||
1741 | |.endif | ||
1742 | | | ||
1743 | |->fff_resn: | ||
1744 | | ld PC, FRAME_PC(BASE) | ||
1745 | | daddiu RA, BASE, -16 | ||
1746 | | b ->fff_res1 | ||
1747 | |.if FPU | ||
1748 | |. sdc1 FRET1, 0(RA) | ||
1749 | |.else | ||
1750 | |. sd CRET1, 0(RA) | ||
1751 | |.endif | ||
1752 | | | ||
1753 | | | ||
1754 | |.ffunc_2 math_ldexp | ||
1755 | | checknum CARG1, ->fff_fallback | ||
1756 | | checkint CARG2, ->fff_fallback | ||
1757 | |. load_got ldexp | ||
1758 | | .FPU ldc1 FARG1, 0(BASE) | ||
1759 | | call_extern | ||
1760 | |. lw CARG2, 8+LO(BASE) | ||
1761 | | b ->fff_resn | ||
1762 | |. nop | ||
1763 | | | ||
1764 | |.ffunc_n math_frexp | ||
1765 | | load_got frexp | ||
1766 | | ld PC, FRAME_PC(BASE) | ||
1767 | | call_extern | ||
1768 | |. daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) | ||
1769 | | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) | ||
1770 | | daddiu RA, BASE, -16 | ||
1771 | |.if FPU | ||
1772 | | mtc1 TMP1, FARG2 | ||
1773 | | sdc1 FRET1, 0(RA) | ||
1774 | | cvt.d.w FARG2, FARG2 | ||
1775 | | sdc1 FARG2, 8(RA) | ||
1776 | |.else | ||
1777 | | sd CRET1, 0(RA) | ||
1778 | | zextw TMP1, TMP1 | ||
1779 | | settp TMP1, TISNUM | ||
1780 | | sd TMP1, 8(RA) | ||
1781 | |.endif | ||
1782 | | b ->fff_res | ||
1783 | |. li RD, (2+1)*8 | ||
1784 | | | ||
1785 | |.ffunc_n math_modf | ||
1786 | | load_got modf | ||
1787 | | ld PC, FRAME_PC(BASE) | ||
1788 | | call_extern | ||
1789 | |. daddiu CARG2, BASE, -16 | ||
1790 | | daddiu RA, BASE, -16 | ||
1791 | |.if FPU | ||
1792 | | sdc1 FRET1, -8(BASE) | ||
1793 | |.else | ||
1794 | | sd CRET1, -8(BASE) | ||
1795 | |.endif | ||
1796 | | b ->fff_res | ||
1797 | |. li RD, (2+1)*8 | ||
1798 | | | ||
1799 | |.macro math_minmax, name, intins, intinsc, fpins | ||
1800 | | .ffunc_1 name | ||
1801 | | daddu TMP3, BASE, NARGS8:RC | ||
1802 | | checkint CARG1, >5 | ||
1803 | |. daddiu TMP2, BASE, 8 | ||
1804 | |1: // Handle integers. | ||
1805 | | beq TMP2, TMP3, ->fff_restv | ||
1806 | |. ld CARG2, 0(TMP2) | ||
1807 | | checkint CARG2, >3 | ||
1808 | |. sextw CARG1, CARG1 | ||
1809 | | lw CARG2, LO(TMP2) | ||
1810 | |. slt AT, CARG1, CARG2 | ||
1811 | |.if MIPSR6 | ||
1812 | | intins TMP1, CARG2, AT | ||
1813 | | intinsc CARG1, CARG1, AT | ||
1814 | | or CARG1, CARG1, TMP1 | ||
1815 | |.else | ||
1816 | | intins CARG1, CARG2, AT | ||
1817 | |.endif | ||
1818 | | daddiu TMP2, TMP2, 8 | ||
1819 | | zextw CARG1, CARG1 | ||
1820 | | b <1 | ||
1821 | |. settp CARG1, TISNUM | ||
1822 | | | ||
1823 | |3: // Convert intermediate result to number and continue with number loop. | ||
1824 | | checknum CARG2, ->fff_fallback | ||
1825 | |.if FPU | ||
1826 | |. mtc1 CARG1, FRET1 | ||
1827 | | cvt.d.w FRET1, FRET1 | ||
1828 | | b >7 | ||
1829 | |. ldc1 FARG1, 0(TMP2) | ||
1830 | |.else | ||
1831 | |. nop | ||
1832 | | bal ->vm_sfi2d_1 | ||
1833 | |. nop | ||
1834 | | b >7 | ||
1835 | |. nop | ||
1836 | |.endif | ||
1837 | | | ||
1838 | |5: | ||
1839 | | .FPU ldc1 FRET1, 0(BASE) | ||
1840 | | checknum CARG1, ->fff_fallback | ||
1841 | |6: // Handle numbers. | ||
1842 | |. ld CARG2, 0(TMP2) | ||
1843 | | beq TMP2, TMP3, ->fff_resn | ||
1844 | |.if FPU | ||
1845 | | ldc1 FARG1, 0(TMP2) | ||
1846 | |.else | ||
1847 | | move CRET1, CARG1 | ||
1848 | |.endif | ||
1849 | | checknum CARG2, >8 | ||
1850 | |. nop | ||
1851 | |7: | ||
1852 | |.if FPU | ||
1853 | |.if MIPSR6 | ||
1854 | | fpins FRET1, FRET1, FARG1 | ||
1855 | |.else | ||
1856 | |.if fpins // ismax | ||
1857 | | c.olt.d FARG1, FRET1 | ||
1858 | |.else | ||
1859 | | c.olt.d FRET1, FARG1 | ||
1860 | |.endif | ||
1861 | | movf.d FRET1, FARG1 | ||
1862 | |.endif | ||
1863 | |.else | ||
1864 | |.if fpins // ismax | ||
1865 | | bal ->vm_sfcmpogt | ||
1866 | |.else | ||
1867 | | bal ->vm_sfcmpolt | ||
1868 | |.endif | ||
1869 | |. nop | ||
1870 | |.if MIPSR6 | ||
1871 | | seleqz AT, CARG2, CRET1 | ||
1872 | | selnez CARG1, CARG1, CRET1 | ||
1873 | | or CARG1, CARG1, AT | ||
1874 | |.else | ||
1875 | | movz CARG1, CARG2, CRET1 | ||
1876 | |.endif | ||
1877 | |.endif | ||
1878 | | b <6 | ||
1879 | |. daddiu TMP2, TMP2, 8 | ||
1880 | | | ||
1881 | |8: // Convert integer to number and continue with number loop. | ||
1882 | | checkint CARG2, ->fff_fallback | ||
1883 | |.if FPU | ||
1884 | |. lwc1 FARG1, LO(TMP2) | ||
1885 | | b <7 | ||
1886 | |. cvt.d.w FARG1, FARG1 | ||
1887 | |.else | ||
1888 | |. lw CARG2, LO(TMP2) | ||
1889 | | bal ->vm_sfi2d_2 | ||
1890 | |. nop | ||
1891 | | b <7 | ||
1892 | |. nop | ||
1893 | |.endif | ||
1894 | | | ||
1895 | |.endmacro | ||
1896 | | | ||
1897 | |.if MIPSR6 | ||
1898 | | math_minmax math_min, seleqz, selnez, min.d | ||
1899 | | math_minmax math_max, selnez, seleqz, max.d | ||
1900 | |.else | ||
1901 | | math_minmax math_min, movz, _, 0 | ||
1902 | | math_minmax math_max, movn, _, 1 | ||
1903 | |.endif | ||
1904 | | | ||
1905 | |//-- String library ----------------------------------------------------- | ||
1906 | | | ||
1907 | |.ffunc string_byte // Only handle the 1-arg case here. | ||
1908 | | ld CARG1, 0(BASE) | ||
1909 | | gettp TMP0, CARG1 | ||
1910 | | xori AT, NARGS8:RC, 8 | ||
1911 | | daddiu TMP0, TMP0, -LJ_TSTR | ||
1912 | | or AT, AT, TMP0 | ||
1913 | | bnez AT, ->fff_fallback // Need exactly 1 string argument. | ||
1914 | |. cleartp STR:CARG1 | ||
1915 | | lw TMP0, STR:CARG1->len | ||
1916 | | daddiu RA, BASE, -16 | ||
1917 | | ld PC, FRAME_PC(BASE) | ||
1918 | | sltu RD, r0, TMP0 | ||
1919 | | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). | ||
1920 | | addiu RD, RD, 1 | ||
1921 | | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 | ||
1922 | | settp TMP1, TISNUM | ||
1923 | | b ->fff_res | ||
1924 | |. sd TMP1, 0(RA) | ||
1925 | | | ||
1926 | |.ffunc string_char // Only handle the 1-arg case here. | ||
1927 | | ffgccheck | ||
1928 | |.if not MIPSR6 | ||
1929 | |. nop | ||
1930 | |.endif | ||
1931 | | ld CARG1, 0(BASE) | ||
1932 | | gettp TMP0, CARG1 | ||
1933 | | xori AT, NARGS8:RC, 8 // Exactly 1 argument. | ||
1934 | | daddiu TMP0, TMP0, -LJ_TISNUM // Integer. | ||
1935 | | li TMP1, 255 | ||
1936 | | sextw CARG1, CARG1 | ||
1937 | | or AT, AT, TMP0 | ||
1938 | | sltu TMP1, TMP1, CARG1 // !(255 < n). | ||
1939 | | or AT, AT, TMP1 | ||
1940 | | bnez AT, ->fff_fallback | ||
1941 | |. li CARG3, 1 | ||
1942 | | daddiu CARG2, sp, TMPD_OFS | ||
1943 | | sb CARG1, TMPD | ||
1944 | |->fff_newstr: | ||
1945 | | load_got lj_str_new | ||
1946 | | sd BASE, L->base | ||
1947 | | sd PC, SAVE_PC | ||
1948 | | call_intern lj_str_new // (lua_State *L, char *str, size_t l) | ||
1949 | |. move CARG1, L | ||
1950 | | // Returns GCstr *. | ||
1951 | | ld BASE, L->base | ||
1952 | |->fff_resstr: | ||
1953 | | li AT, LJ_TSTR | ||
1954 | | settp CRET1, AT | ||
1955 | | b ->fff_restv | ||
1956 | |. move CARG1, CRET1 | ||
1957 | | | ||
1958 | |.ffunc string_sub | ||
1959 | | ffgccheck | ||
1960 | |.if not MIPSR6 | ||
1961 | |. nop | ||
1962 | |.endif | ||
1963 | | addiu AT, NARGS8:RC, -16 | ||
1964 | | ld TMP0, 0(BASE) | ||
1965 | | bltz AT, ->fff_fallback | ||
1966 | |. gettp TMP3, TMP0 | ||
1967 | | cleartp STR:CARG1, TMP0 | ||
1968 | | ld CARG2, 8(BASE) | ||
1969 | | beqz AT, >1 | ||
1970 | |. li CARG4, -1 | ||
1971 | | ld CARG3, 16(BASE) | ||
1972 | | checkint CARG3, ->fff_fallback | ||
1973 | |. sextw CARG4, CARG3 | ||
1974 | |1: | ||
1975 | | checkint CARG2, ->fff_fallback | ||
1976 | |. li AT, LJ_TSTR | ||
1977 | | bne TMP3, AT, ->fff_fallback | ||
1978 | |. sextw CARG3, CARG2 | ||
1979 | | lw CARG2, STR:CARG1->len | ||
1980 | | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end | ||
1981 | | slt AT, CARG4, r0 | ||
1982 | | addiu TMP0, CARG2, 1 | ||
1983 | | addu TMP1, CARG4, TMP0 | ||
1984 | | slt TMP3, CARG3, r0 | ||
1985 | |.if MIPSR6 | ||
1986 | | seleqz CARG4, CARG4, AT | ||
1987 | | selnez TMP1, TMP1, AT | ||
1988 | | or CARG4, TMP1, CARG4 // if (end < 0) end += len+1 | ||
1989 | |.else | ||
1990 | | movn CARG4, TMP1, AT // if (end < 0) end += len+1 | ||
1991 | |.endif | ||
1992 | | addu TMP1, CARG3, TMP0 | ||
1993 | |.if MIPSR6 | ||
1994 | | selnez TMP1, TMP1, TMP3 | ||
1995 | | seleqz CARG3, CARG3, TMP3 | ||
1996 | | or CARG3, TMP1, CARG3 // if (start < 0) start += len+1 | ||
1997 | | li TMP2, 1 | ||
1998 | | slt AT, CARG4, r0 | ||
1999 | | slt TMP3, r0, CARG3 | ||
2000 | | seleqz CARG4, CARG4, AT // if (end < 0) end = 0 | ||
2001 | | selnez CARG3, CARG3, TMP3 | ||
2002 | | seleqz TMP2, TMP2, TMP3 | ||
2003 | | or CARG3, TMP2, CARG3 // if (start < 1) start = 1 | ||
2004 | | slt AT, CARG2, CARG4 | ||
2005 | | seleqz CARG4, CARG4, AT | ||
2006 | | selnez CARG2, CARG2, AT | ||
2007 | | or CARG4, CARG2, CARG4 // if (end > len) end = len | ||
2008 | |.else | ||
2009 | | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1 | ||
2010 | | li TMP2, 1 | ||
2011 | | slt AT, CARG4, r0 | ||
2012 | | slt TMP3, r0, CARG3 | ||
2013 | | movn CARG4, r0, AT // if (end < 0) end = 0 | ||
2014 | | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1 | ||
2015 | | slt AT, CARG2, CARG4 | ||
2016 | | movn CARG4, CARG2, AT // if (end > len) end = len | ||
2017 | |.endif | ||
2018 | | daddu CARG2, STR:CARG1, CARG3 | ||
2019 | | subu CARG3, CARG4, CARG3 // len = end - start | ||
2020 | | daddiu CARG2, CARG2, sizeof(GCstr)-1 | ||
2021 | | bgez CARG3, ->fff_newstr | ||
2022 | |. addiu CARG3, CARG3, 1 // len++ | ||
2023 | |->fff_emptystr: // Return empty string. | ||
2024 | | li AT, LJ_TSTR | ||
2025 | | daddiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty) | ||
2026 | | b ->fff_restv | ||
2027 | |. settp CARG1, AT | ||
2028 | | | ||
2029 | |.macro ffstring_op, name | ||
2030 | | .ffunc string_ .. name | ||
2031 | | ffgccheck | ||
2032 | |. nop | ||
2033 | | beqz NARGS8:RC, ->fff_fallback | ||
2034 | |. ld CARG2, 0(BASE) | ||
2035 | | checkstr STR:CARG2, ->fff_fallback | ||
2036 | | daddiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf) | ||
2037 | | load_got lj_buf_putstr_ .. name | ||
2038 | | ld TMP0, SBUF:CARG1->b | ||
2039 | | sd L, SBUF:CARG1->L | ||
2040 | | sd BASE, L->base | ||
2041 | | sd TMP0, SBUF:CARG1->w | ||
2042 | | call_intern extern lj_buf_putstr_ .. name | ||
2043 | |. sd PC, SAVE_PC | ||
2044 | | load_got lj_buf_tostr | ||
2045 | | call_intern lj_buf_tostr | ||
2046 | |. move SBUF:CARG1, SBUF:CRET1 | ||
2047 | | b ->fff_resstr | ||
2048 | |. ld BASE, L->base | ||
2049 | |.endmacro | ||
2050 | | | ||
2051 | |ffstring_op reverse | ||
2052 | |ffstring_op lower | ||
2053 | |ffstring_op upper | ||
2054 | | | ||
2055 | |//-- Bit library -------------------------------------------------------- | ||
2056 | | | ||
2057 | |->vm_tobit_fb: | ||
2058 | | beqz TMP1, ->fff_fallback | ||
2059 | |.if FPU | ||
2060 | |. ldc1 FARG1, 0(BASE) | ||
2061 | | add.d FARG1, FARG1, TOBIT | ||
2062 | | mfc1 CRET1, FARG1 | ||
2063 | | jr ra | ||
2064 | |. zextw CRET1, CRET1 | ||
2065 | |.else | ||
2066 | |// FP number to bit conversion for soft-float. | ||
2067 | |->vm_tobit: | ||
2068 | | dsll TMP0, CARG1, 1 | ||
2069 | | li CARG3, 1076 | ||
2070 | | dsrl AT, TMP0, 53 | ||
2071 | | dsubu CARG3, CARG3, AT | ||
2072 | | sltiu AT, CARG3, 54 | ||
2073 | | beqz AT, >1 | ||
2074 | |. dextm TMP0, TMP0, 0, 20 | ||
2075 | | dinsu TMP0, AT, 21, 21 | ||
2076 | | slt AT, CARG1, r0 | ||
2077 | | dsrlv CRET1, TMP0, CARG3 | ||
2078 | | dsubu TMP0, r0, CRET1 | ||
2079 | |.if MIPSR6 | ||
2080 | | selnez TMP0, TMP0, AT | ||
2081 | | seleqz CRET1, CRET1, AT | ||
2082 | | or CRET1, CRET1, TMP0 | ||
2083 | |.else | ||
2084 | | movn CRET1, TMP0, AT | ||
2085 | |.endif | ||
2086 | | jr ra | ||
2087 | |. zextw CRET1, CRET1 | ||
2088 | |1: | ||
2089 | | jr ra | ||
2090 | |. move CRET1, r0 | ||
2091 | | | ||
2092 | |// FP number to int conversion with a check for soft-float. | ||
2093 | |// Modifies CARG1, CRET1, CRET2, TMP0, AT. | ||
2094 | |->vm_tointg: | ||
2095 | |.if JIT | ||
2096 | | dsll CRET2, CARG1, 1 | ||
2097 | | beqz CRET2, >2 | ||
2098 | |. li TMP0, 1076 | ||
2099 | | dsrl AT, CRET2, 53 | ||
2100 | | dsubu TMP0, TMP0, AT | ||
2101 | | sltiu AT, TMP0, 54 | ||
2102 | | beqz AT, >1 | ||
2103 | |. dextm CRET2, CRET2, 0, 20 | ||
2104 | | dinsu CRET2, AT, 21, 21 | ||
2105 | | slt AT, CARG1, r0 | ||
2106 | | dsrlv CRET1, CRET2, TMP0 | ||
2107 | | dsubu CARG1, r0, CRET1 | ||
2108 | |.if MIPSR6 | ||
2109 | | seleqz CRET1, CRET1, AT | ||
2110 | | selnez CARG1, CARG1, AT | ||
2111 | | or CRET1, CRET1, CARG1 | ||
2112 | |.else | ||
2113 | | movn CRET1, CARG1, AT | ||
2114 | |.endif | ||
2115 | | li CARG1, 64 | ||
2116 | | subu TMP0, CARG1, TMP0 | ||
2117 | | dsllv CRET2, CRET2, TMP0 // Integer check. | ||
2118 | | sextw AT, CRET1 | ||
2119 | | xor AT, CRET1, AT // Range check. | ||
2120 | |.if MIPSR6 | ||
2121 | | seleqz AT, AT, CRET2 | ||
2122 | | selnez CRET2, CRET2, CRET2 | ||
2123 | | jr ra | ||
2124 | |. or CRET2, AT, CRET2 | ||
2125 | |.else | ||
2126 | | jr ra | ||
2127 | |. movz CRET2, AT, CRET2 | ||
2128 | |.endif | ||
2129 | |1: | ||
2130 | | jr ra | ||
2131 | |. li CRET2, 1 | ||
2132 | |2: | ||
2133 | | jr ra | ||
2134 | |. move CRET1, r0 | ||
2135 | |.endif | ||
2136 | |.endif | ||
2137 | | | ||
2138 | |.macro .ffunc_bit, name | ||
2139 | | .ffunc_1 bit_..name | ||
2140 | | gettp TMP0, CARG1 | ||
2141 | | beq TMP0, TISNUM, >6 | ||
2142 | |. zextw CRET1, CARG1 | ||
2143 | | bal ->vm_tobit_fb | ||
2144 | |. sltiu TMP1, TMP0, LJ_TISNUM | ||
2145 | |6: | ||
2146 | |.endmacro | ||
2147 | | | ||
2148 | |.macro .ffunc_bit_op, name, bins | ||
2149 | | .ffunc_bit name | ||
2150 | | daddiu TMP2, BASE, 8 | ||
2151 | | daddu TMP3, BASE, NARGS8:RC | ||
2152 | |1: | ||
2153 | | beq TMP2, TMP3, ->fff_resi | ||
2154 | |. ld CARG1, 0(TMP2) | ||
2155 | | gettp TMP0, CARG1 | ||
2156 | |.if FPU | ||
2157 | | bne TMP0, TISNUM, >2 | ||
2158 | |. daddiu TMP2, TMP2, 8 | ||
2159 | | zextw CARG1, CARG1 | ||
2160 | | b <1 | ||
2161 | |. bins CRET1, CRET1, CARG1 | ||
2162 | |2: | ||
2163 | | ldc1 FARG1, -8(TMP2) | ||
2164 | | sltiu AT, TMP0, LJ_TISNUM | ||
2165 | | beqz AT, ->fff_fallback | ||
2166 | |. add.d FARG1, FARG1, TOBIT | ||
2167 | | mfc1 CARG1, FARG1 | ||
2168 | | zextw CARG1, CARG1 | ||
2169 | | b <1 | ||
2170 | |. bins CRET1, CRET1, CARG1 | ||
2171 | |.else | ||
2172 | | beq TMP0, TISNUM, >2 | ||
2173 | |. move CRET2, CRET1 | ||
2174 | | bal ->vm_tobit_fb | ||
2175 | |. sltiu TMP1, TMP0, LJ_TISNUM | ||
2176 | | move CARG1, CRET2 | ||
2177 | |2: | ||
2178 | | zextw CARG1, CARG1 | ||
2179 | | bins CRET1, CRET1, CARG1 | ||
2180 | | b <1 | ||
2181 | |. daddiu TMP2, TMP2, 8 | ||
2182 | |.endif | ||
2183 | |.endmacro | ||
2184 | | | ||
2185 | |.ffunc_bit_op band, and | ||
2186 | |.ffunc_bit_op bor, or | ||
2187 | |.ffunc_bit_op bxor, xor | ||
2188 | | | ||
2189 | |.ffunc_bit bswap | ||
2190 | | dsrl TMP0, CRET1, 8 | ||
2191 | | dsrl TMP1, CRET1, 24 | ||
2192 | | andi TMP2, TMP0, 0xff00 | ||
2193 | | dins TMP1, CRET1, 24, 31 | ||
2194 | | dins TMP2, TMP0, 16, 23 | ||
2195 | | b ->fff_resi | ||
2196 | |. or CRET1, TMP1, TMP2 | ||
2197 | | | ||
2198 | |.ffunc_bit bnot | ||
2199 | | not CRET1, CRET1 | ||
2200 | | b ->fff_resi | ||
2201 | |. zextw CRET1, CRET1 | ||
2202 | | | ||
2203 | |.macro .ffunc_bit_sh, name, shins, shmod | ||
2204 | | .ffunc_2 bit_..name | ||
2205 | | gettp TMP0, CARG1 | ||
2206 | | beq TMP0, TISNUM, >1 | ||
2207 | |. nop | ||
2208 | | bal ->vm_tobit_fb | ||
2209 | |. sltiu TMP1, TMP0, LJ_TISNUM | ||
2210 | | move CARG1, CRET1 | ||
2211 | |1: | ||
2212 | | gettp TMP0, CARG2 | ||
2213 | | bne TMP0, TISNUM, ->fff_fallback | ||
2214 | |. zextw CARG2, CARG2 | ||
2215 | | sextw CARG1, CARG1 | ||
2216 | |.if shmod == 1 | ||
2217 | | negu CARG2, CARG2 | ||
2218 | |.endif | ||
2219 | | shins CRET1, CARG1, CARG2 | ||
2220 | | b ->fff_resi | ||
2221 | |. zextw CRET1, CRET1 | ||
2222 | |.endmacro | ||
2223 | | | ||
2224 | |.ffunc_bit_sh lshift, sllv, 0 | ||
2225 | |.ffunc_bit_sh rshift, srlv, 0 | ||
2226 | |.ffunc_bit_sh arshift, srav, 0 | ||
2227 | |.ffunc_bit_sh rol, rotrv, 1 | ||
2228 | |.ffunc_bit_sh ror, rotrv, 0 | ||
2229 | | | ||
2230 | |.ffunc_bit tobit | ||
2231 | |->fff_resi: | ||
2232 | | ld PC, FRAME_PC(BASE) | ||
2233 | | daddiu RA, BASE, -16 | ||
2234 | | settp CRET1, TISNUM | ||
2235 | | b ->fff_res1 | ||
2236 | |. sd CRET1, -16(BASE) | ||
2237 | | | ||
2238 | |//----------------------------------------------------------------------- | ||
2239 | |->fff_fallback: // Call fast function fallback handler. | ||
2240 | | // BASE = new base, RB = CFUNC, RC = nargs*8 | ||
2241 | | ld TMP3, CFUNC:RB->f | ||
2242 | | daddu TMP1, BASE, NARGS8:RC | ||
2243 | | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC. | ||
2244 | | daddiu TMP0, TMP1, 8*LUA_MINSTACK | ||
2245 | | ld TMP2, L->maxstack | ||
2246 | | sd PC, SAVE_PC // Redundant (but a defined value). | ||
2247 | | sltu AT, TMP2, TMP0 | ||
2248 | | sd BASE, L->base | ||
2249 | | sd TMP1, L->top | ||
2250 | | bnez AT, >5 // Need to grow stack. | ||
2251 | |. move CFUNCADDR, TMP3 | ||
2252 | | jalr TMP3 // (lua_State *L) | ||
2253 | |. move CARG1, L | ||
2254 | | // Either throws an error, or recovers and returns -1, 0 or nresults+1. | ||
2255 | | ld BASE, L->base | ||
2256 | | sll RD, CRET1, 3 | ||
2257 | | bgtz CRET1, ->fff_res // Returned nresults+1? | ||
2258 | |. daddiu RA, BASE, -16 | ||
2259 | |1: // Returned 0 or -1: retry fast path. | ||
2260 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
2261 | | ld TMP0, L->top | ||
2262 | | cleartp LFUNC:RB | ||
2263 | | bnez CRET1, ->vm_call_tail // Returned -1? | ||
2264 | |. dsubu NARGS8:RC, TMP0, BASE | ||
2265 | | ins_callt // Returned 0: retry fast path. | ||
2266 | | | ||
2267 | |// Reconstruct previous base for vmeta_call during tailcall. | ||
2268 | |->vm_call_tail: | ||
2269 | | andi TMP0, PC, FRAME_TYPE | ||
2270 | | li AT, -4 | ||
2271 | | bnez TMP0, >3 | ||
2272 | |. and TMP1, PC, AT | ||
2273 | | lbu TMP1, OFS_RA(PC) | ||
2274 | | sll TMP1, TMP1, 3 | ||
2275 | | addiu TMP1, TMP1, 16 | ||
2276 | |3: | ||
2277 | | b ->vm_call_dispatch // Resolve again for tailcall. | ||
2278 | |. dsubu TMP2, BASE, TMP1 | ||
2279 | | | ||
2280 | |5: // Grow stack for fallback handler. | ||
2281 | | load_got lj_state_growstack | ||
2282 | | li CARG2, LUA_MINSTACK | ||
2283 | | call_intern lj_state_growstack // (lua_State *L, int n) | ||
2284 | |. move CARG1, L | ||
2285 | | ld BASE, L->base | ||
2286 | | b <1 | ||
2287 | |. li CRET1, 0 // Force retry. | ||
2288 | | | ||
2289 | |->fff_gcstep: // Call GC step function. | ||
2290 | | // BASE = new base, RC = nargs*8 | ||
2291 | | move MULTRES, ra | ||
2292 | | load_got lj_gc_step | ||
2293 | | sd BASE, L->base | ||
2294 | | daddu TMP0, BASE, NARGS8:RC | ||
2295 | | sd PC, SAVE_PC // Redundant (but a defined value). | ||
2296 | | sd TMP0, L->top | ||
2297 | | call_intern lj_gc_step // (lua_State *L) | ||
2298 | |. move CARG1, L | ||
2299 | | ld BASE, L->base | ||
2300 | | move ra, MULTRES | ||
2301 | | ld TMP0, L->top | ||
2302 | | ld CFUNC:RB, FRAME_FUNC(BASE) | ||
2303 | | cleartp CFUNC:RB | ||
2304 | | jr ra | ||
2305 | |. dsubu NARGS8:RC, TMP0, BASE | ||
2306 | | | ||
2307 | |//----------------------------------------------------------------------- | ||
2308 | |//-- Special dispatch targets ------------------------------------------- | ||
2309 | |//----------------------------------------------------------------------- | ||
2310 | | | ||
2311 | |->vm_record: // Dispatch target for recording phase. | ||
2312 | |.if JIT | ||
2313 | | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) | ||
2314 | | andi AT, TMP3, HOOK_VMEVENT // No recording while in vmevent. | ||
2315 | | bnez AT, >5 | ||
2316 | | // Decrement the hookcount for consistency, but always do the call. | ||
2317 | |. lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) | ||
2318 | | andi AT, TMP3, HOOK_ACTIVE | ||
2319 | | bnez AT, >1 | ||
2320 | |. addiu TMP2, TMP2, -1 | ||
2321 | | andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT | ||
2322 | | beqz AT, >1 | ||
2323 | |. nop | ||
2324 | | b >1 | ||
2325 | |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) | ||
2326 | |.endif | ||
2327 | | | ||
2328 | |->vm_rethook: // Dispatch target for return hooks. | ||
2329 | | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) | ||
2330 | | andi AT, TMP3, HOOK_ACTIVE // Hook already active? | ||
2331 | | beqz AT, >1 | ||
2332 | |5: // Re-dispatch to static ins. | ||
2333 | |. ld AT, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. | ||
2334 | | jr AT | ||
2335 | |. nop | ||
2336 | | | ||
2337 | |->vm_inshook: // Dispatch target for instr/line hooks. | ||
2338 | | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) | ||
2339 | | lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) | ||
2340 | | andi AT, TMP3, HOOK_ACTIVE // Hook already active? | ||
2341 | | bnez AT, <5 | ||
2342 | |. andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT | ||
2343 | | beqz AT, <5 | ||
2344 | |. addiu TMP2, TMP2, -1 | ||
2345 | | beqz TMP2, >1 | ||
2346 | |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) | ||
2347 | | andi AT, TMP3, LUA_MASKLINE | ||
2348 | | beqz AT, <5 | ||
2349 | |1: | ||
2350 | |. load_got lj_dispatch_ins | ||
2351 | | sw MULTRES, SAVE_MULTRES | ||
2352 | | move CARG2, PC | ||
2353 | | sd BASE, L->base | ||
2354 | | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. | ||
2355 | | call_intern lj_dispatch_ins // (lua_State *L, const BCIns *pc) | ||
2356 | |. move CARG1, L | ||
2357 | |3: | ||
2358 | | ld BASE, L->base | ||
2359 | |4: // Re-dispatch to static ins. | ||
2360 | | lw INS, -4(PC) | ||
2361 | | decode_OP8a TMP1, INS | ||
2362 | | decode_OP8b TMP1 | ||
2363 | | daddu TMP0, DISPATCH, TMP1 | ||
2364 | | decode_RD8a RD, INS | ||
2365 | | ld AT, GG_DISP2STATIC(TMP0) | ||
2366 | | decode_RA8a RA, INS | ||
2367 | | decode_RD8b RD | ||
2368 | | jr AT | ||
2369 | | decode_RA8b RA | ||
2370 | | | ||
2371 | |->cont_hook: // Continue from hook yield. | ||
2372 | | daddiu PC, PC, 4 | ||
2373 | | b <4 | ||
2374 | |. lw MULTRES, -24+LO(RB) // Restore MULTRES for *M ins. | ||
2375 | | | ||
2376 | |->vm_hotloop: // Hot loop counter underflow. | ||
2377 | |.if JIT | ||
2378 | | ld LFUNC:TMP1, FRAME_FUNC(BASE) | ||
2379 | | daddiu CARG1, DISPATCH, GG_DISP2J | ||
2380 | | cleartp LFUNC:TMP1 | ||
2381 | | sd PC, SAVE_PC | ||
2382 | | ld TMP1, LFUNC:TMP1->pc | ||
2383 | | move CARG2, PC | ||
2384 | | sd L, DISPATCH_J(L)(DISPATCH) | ||
2385 | | lbu TMP1, PC2PROTO(framesize)(TMP1) | ||
2386 | | load_got lj_trace_hot | ||
2387 | | sd BASE, L->base | ||
2388 | | dsll TMP1, TMP1, 3 | ||
2389 | | daddu TMP1, BASE, TMP1 | ||
2390 | | call_intern lj_trace_hot // (jit_State *J, const BCIns *pc) | ||
2391 | |. sd TMP1, L->top | ||
2392 | | b <3 | ||
2393 | |. nop | ||
2394 | |.endif | ||
2395 | | | ||
2396 | | | ||
2397 | |->vm_callhook: // Dispatch target for call hooks. | ||
2398 | |.if JIT | ||
2399 | | b >1 | ||
2400 | |.endif | ||
2401 | |. move CARG2, PC | ||
2402 | | | ||
2403 | |->vm_hotcall: // Hot call counter underflow. | ||
2404 | |.if JIT | ||
2405 | | ori CARG2, PC, 1 | ||
2406 | |1: | ||
2407 | |.endif | ||
2408 | | load_got lj_dispatch_call | ||
2409 | | daddu TMP0, BASE, RC | ||
2410 | | sd PC, SAVE_PC | ||
2411 | | sd BASE, L->base | ||
2412 | | dsubu RA, RA, BASE | ||
2413 | | sd TMP0, L->top | ||
2414 | | call_intern lj_dispatch_call // (lua_State *L, const BCIns *pc) | ||
2415 | |. move CARG1, L | ||
2416 | | // Returns ASMFunction. | ||
2417 | | ld BASE, L->base | ||
2418 | | ld TMP0, L->top | ||
2419 | | sd r0, SAVE_PC // Invalidate for subsequent line hook. | ||
2420 | | dsubu NARGS8:RC, TMP0, BASE | ||
2421 | | daddu RA, BASE, RA | ||
2422 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
2423 | | cleartp LFUNC:RB | ||
2424 | | jr CRET1 | ||
2425 | |. lw INS, -4(PC) | ||
2426 | | | ||
2427 | |->cont_stitch: // Trace stitching. | ||
2428 | |.if JIT | ||
2429 | | // RA = resultptr, RB = meta base | ||
2430 | | lw INS, -4(PC) | ||
2431 | | ld TRACE:TMP2, -40(RB) // Save previous trace. | ||
2432 | | decode_RA8a RC, INS | ||
2433 | | daddiu AT, MULTRES, -8 | ||
2434 | | cleartp TRACE:TMP2 | ||
2435 | | decode_RA8b RC | ||
2436 | | beqz AT, >2 | ||
2437 | |. daddu RC, BASE, RC // Call base. | ||
2438 | |1: // Move results down. | ||
2439 | | ld CARG1, 0(RA) | ||
2440 | | daddiu AT, AT, -8 | ||
2441 | | daddiu RA, RA, 8 | ||
2442 | | sd CARG1, 0(RC) | ||
2443 | | bnez AT, <1 | ||
2444 | |. daddiu RC, RC, 8 | ||
2445 | |2: | ||
2446 | | decode_RA8a RA, INS | ||
2447 | | decode_RB8a RB, INS | ||
2448 | | decode_RA8b RA | ||
2449 | | decode_RB8b RB | ||
2450 | | daddu RA, RA, RB | ||
2451 | | daddu RA, BASE, RA | ||
2452 | |3: | ||
2453 | | sltu AT, RC, RA | ||
2454 | | bnez AT, >9 // More results wanted? | ||
2455 | |. nop | ||
2456 | | | ||
2457 | | lhu TMP3, TRACE:TMP2->traceno | ||
2458 | | lhu RD, TRACE:TMP2->link | ||
2459 | | beq RD, TMP3, ->cont_nop // Blacklisted. | ||
2460 | |. load_got lj_dispatch_stitch | ||
2461 | | bnez RD, =>BC_JLOOP // Jump to stitched trace. | ||
2462 | |. sll RD, RD, 3 | ||
2463 | | | ||
2464 | | // Stitch a new trace to the previous trace. | ||
2465 | | sw TMP3, DISPATCH_J(exitno)(DISPATCH) | ||
2466 | | sd L, DISPATCH_J(L)(DISPATCH) | ||
2467 | | sd BASE, L->base | ||
2468 | | daddiu CARG1, DISPATCH, GG_DISP2J | ||
2469 | | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) | ||
2470 | |. move CARG2, PC | ||
2471 | | b ->cont_nop | ||
2472 | |. ld BASE, L->base | ||
2473 | | | ||
2474 | |9: | ||
2475 | | sd TISNIL, 0(RC) | ||
2476 | | b <3 | ||
2477 | |. daddiu RC, RC, 8 | ||
2478 | |.endif | ||
2479 | | | ||
2480 | |->vm_profhook: // Dispatch target for profiler hook. | ||
2481 | #if LJ_HASPROFILE | ||
2482 | | load_got lj_dispatch_profile | ||
2483 | | sw MULTRES, SAVE_MULTRES | ||
2484 | | move CARG2, PC | ||
2485 | | sd BASE, L->base | ||
2486 | | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) | ||
2487 | |. move CARG1, L | ||
2488 | | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | ||
2489 | | daddiu PC, PC, -4 | ||
2490 | | b ->cont_nop | ||
2491 | |. ld BASE, L->base | ||
2492 | #endif | ||
2493 | | | ||
2494 | |//----------------------------------------------------------------------- | ||
2495 | |//-- Trace exit handler ------------------------------------------------- | ||
2496 | |//----------------------------------------------------------------------- | ||
2497 | | | ||
2498 | |.macro savex_, a, b | ||
2499 | |.if FPU | ||
2500 | | sdc1 f..a, a*8(sp) | ||
2501 | | sdc1 f..b, b*8(sp) | ||
2502 | | sd r..a, 32*8+a*8(sp) | ||
2503 | | sd r..b, 32*8+b*8(sp) | ||
2504 | |.else | ||
2505 | | sd r..a, a*8(sp) | ||
2506 | | sd r..b, b*8(sp) | ||
2507 | |.endif | ||
2508 | |.endmacro | ||
2509 | | | ||
2510 | |->vm_exit_handler: | ||
2511 | |.if JIT | ||
2512 | |.if FPU | ||
2513 | | daddiu sp, sp, -(32*8+32*8) | ||
2514 | |.else | ||
2515 | | daddiu sp, sp, -(32*8) | ||
2516 | |.endif | ||
2517 | | savex_ 0, 1 | ||
2518 | | savex_ 2, 3 | ||
2519 | | savex_ 4, 5 | ||
2520 | | savex_ 6, 7 | ||
2521 | | savex_ 8, 9 | ||
2522 | | savex_ 10, 11 | ||
2523 | | savex_ 12, 13 | ||
2524 | | savex_ 14, 15 | ||
2525 | | savex_ 16, 17 | ||
2526 | | savex_ 18, 19 | ||
2527 | | savex_ 20, 21 | ||
2528 | | savex_ 22, 23 | ||
2529 | | savex_ 24, 25 | ||
2530 | | savex_ 26, 27 | ||
2531 | | savex_ 28, 30 | ||
2532 | |.if FPU | ||
2533 | | sdc1 f29, 29*8(sp) | ||
2534 | | sdc1 f31, 31*8(sp) | ||
2535 | | sd r0, 32*8+31*8(sp) // Clear RID_TMP. | ||
2536 | | daddiu TMP2, sp, 32*8+32*8 // Recompute original value of sp. | ||
2537 | | sd TMP2, 32*8+29*8(sp) // Store sp in RID_SP | ||
2538 | |.else | ||
2539 | | sd r0, 31*8(sp) // Clear RID_TMP. | ||
2540 | | daddiu TMP2, sp, 32*8 // Recompute original value of sp. | ||
2541 | | sd TMP2, 29*8(sp) // Store sp in RID_SP | ||
2542 | |.endif | ||
2543 | | li_vmstate EXIT | ||
2544 | | daddiu DISPATCH, JGL, -GG_DISP2G-32768 | ||
2545 | | lw TMP1, 0(TMP2) // Load exit number. | ||
2546 | | st_vmstate | ||
2547 | | ld L, DISPATCH_GL(cur_L)(DISPATCH) | ||
2548 | | ld BASE, DISPATCH_GL(jit_base)(DISPATCH) | ||
2549 | | load_got lj_trace_exit | ||
2550 | | sd L, DISPATCH_J(L)(DISPATCH) | ||
2551 | | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. | ||
2552 | | sd BASE, L->base | ||
2553 | | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. | ||
2554 | | daddiu CARG1, DISPATCH, GG_DISP2J | ||
2555 | | sd r0, DISPATCH_GL(jit_base)(DISPATCH) | ||
2556 | | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) | ||
2557 | |. move CARG2, sp | ||
2558 | | // Returns MULTRES (unscaled) or negated error code. | ||
2559 | | ld TMP1, L->cframe | ||
2560 | | li AT, -4 | ||
2561 | | ld BASE, L->base | ||
2562 | | and sp, TMP1, AT | ||
2563 | | ld PC, SAVE_PC // Get SAVE_PC. | ||
2564 | | b >1 | ||
2565 | |. sd L, SAVE_L // Set SAVE_L (on-trace resume/yield). | ||
2566 | |.endif | ||
2567 | |->vm_exit_interp: | ||
2568 | |.if JIT | ||
2569 | | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. | ||
2570 | | ld L, SAVE_L | ||
2571 | | daddiu DISPATCH, JGL, -GG_DISP2G-32768 | ||
2572 | | sd BASE, L->base | ||
2573 | |1: | ||
2574 | | bltz CRET1, >9 // Check for error from exit. | ||
2575 | |. ld LFUNC:RB, FRAME_FUNC(BASE) | ||
2576 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | ||
2577 | | dsll MULTRES, CRET1, 3 | ||
2578 | | cleartp LFUNC:RB | ||
2579 | | sw MULTRES, SAVE_MULTRES | ||
2580 | | li TISNIL, LJ_TNIL | ||
2581 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. | ||
2582 | | .FPU mtc1 TMP3, TOBIT | ||
2583 | | ld TMP1, LFUNC:RB->pc | ||
2584 | | sd r0, DISPATCH_GL(jit_base)(DISPATCH) | ||
2585 | | ld KBASE, PC2PROTO(k)(TMP1) | ||
2586 | | .FPU cvt.d.s TOBIT, TOBIT | ||
2587 | | // Modified copy of ins_next which handles function header dispatch, too. | ||
2588 | | lw INS, 0(PC) | ||
2589 | | daddiu PC, PC, 4 | ||
2590 | | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 | ||
2591 | | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) | ||
2592 | | decode_OP8a TMP1, INS | ||
2593 | | decode_OP8b TMP1 | ||
2594 | | sltiu TMP2, TMP1, BC_FUNCF*8 | ||
2595 | | daddu TMP0, DISPATCH, TMP1 | ||
2596 | | decode_RD8a RD, INS | ||
2597 | | ld AT, 0(TMP0) | ||
2598 | | decode_RA8a RA, INS | ||
2599 | | beqz TMP2, >2 | ||
2600 | |. decode_RA8b RA | ||
2601 | | jr AT | ||
2602 | |. decode_RD8b RD | ||
2603 | |2: | ||
2604 | | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function? | ||
2605 | | bnez TMP2, >3 | ||
2606 | |. ld TMP1, FRAME_PC(BASE) | ||
2607 | | // Check frame below fast function. | ||
2608 | | andi TMP0, TMP1, FRAME_TYPE | ||
2609 | | bnez TMP0, >3 // Trace stitching continuation? | ||
2610 | |. nop | ||
2611 | | // Otherwise set KBASE for Lua function below fast function. | ||
2612 | | lw TMP2, -4(TMP1) | ||
2613 | | decode_RA8a TMP0, TMP2 | ||
2614 | | decode_RA8b TMP0 | ||
2615 | | dsubu TMP1, BASE, TMP0 | ||
2616 | | ld LFUNC:TMP2, -32(TMP1) | ||
2617 | | cleartp LFUNC:TMP2 | ||
2618 | | ld TMP1, LFUNC:TMP2->pc | ||
2619 | | ld KBASE, PC2PROTO(k)(TMP1) | ||
2620 | |3: | ||
2621 | | daddiu RC, MULTRES, -8 | ||
2622 | | jr AT | ||
2623 | |. daddu RA, RA, BASE | ||
2624 | | | ||
2625 | |9: // Rethrow error from the right C frame. | ||
2626 | | load_got lj_err_trace | ||
2627 | | sub CARG2, r0, CRET1 | ||
2628 | | call_intern lj_err_trace // (lua_State *L, int errcode) | ||
2629 | |. move CARG1, L | ||
2630 | |.endif | ||
2631 | | | ||
2632 | |//----------------------------------------------------------------------- | ||
2633 | |//-- Math helper functions ---------------------------------------------- | ||
2634 | |//----------------------------------------------------------------------- | ||
2635 | | | ||
2636 | |// Hard-float round to integer. | ||
2637 | |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. | ||
2638 | |// MIPSR6: Modifies FTMP1, too. | ||
2639 | |.macro vm_round_hf, func | ||
2640 | | lui TMP0, 0x4330 // Hiword of 2^52 (double). | ||
2641 | | dsll TMP0, TMP0, 32 | ||
2642 | | dmtc1 TMP0, f4 | ||
2643 | | abs.d FRET2, FARG1 // |x| | ||
2644 | | dmfc1 AT, FARG1 | ||
2645 | |.if MIPSR6 | ||
2646 | | cmp.lt.d FTMP1, FRET2, f4 | ||
2647 | | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 | ||
2648 | | bc1eqz FTMP1, >1 // Truncate only if |x| < 2^52. | ||
2649 | |.else | ||
2650 | | c.olt.d 0, FRET2, f4 | ||
2651 | | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 | ||
2652 | | bc1f 0, >1 // Truncate only if |x| < 2^52. | ||
2653 | |.endif | ||
2654 | |. sub.d FRET1, FRET1, f4 | ||
2655 | | slt AT, AT, r0 | ||
2656 | |.if "func" == "ceil" | ||
2657 | | lui TMP0, 0xbff0 // Hiword of -1 (double). Preserves -0. | ||
2658 | |.else | ||
2659 | | lui TMP0, 0x3ff0 // Hiword of +1 (double). | ||
2660 | |.endif | ||
2661 | |.if "func" == "trunc" | ||
2662 | | dsll TMP0, TMP0, 32 | ||
2663 | | dmtc1 TMP0, f4 | ||
2664 | |.if MIPSR6 | ||
2665 | | cmp.lt.d FTMP1, FRET2, FRET1 // |x| < result? | ||
2666 | | sub.d FRET2, FRET1, f4 | ||
2667 | | sel.d FTMP1, FRET1, FRET2 // If yes, subtract +1. | ||
2668 | | dmtc1 AT, FRET1 | ||
2669 | | neg.d FRET2, FTMP1 | ||
2670 | | jr ra | ||
2671 | |. sel.d FRET1, FTMP1, FRET2 // Merge sign bit back in. | ||
2672 | |.else | ||
2673 | | c.olt.d 0, FRET2, FRET1 // |x| < result? | ||
2674 | | sub.d FRET2, FRET1, f4 | ||
2675 | | movt.d FRET1, FRET2, 0 // If yes, subtract +1. | ||
2676 | | neg.d FRET2, FRET1 | ||
2677 | | jr ra | ||
2678 | |. movn.d FRET1, FRET2, AT // Merge sign bit back in. | ||
2679 | |.endif | ||
2680 | |.else | ||
2681 | | neg.d FRET2, FRET1 | ||
2682 | | dsll TMP0, TMP0, 32 | ||
2683 | | dmtc1 TMP0, f4 | ||
2684 | |.if MIPSR6 | ||
2685 | | dmtc1 AT, FTMP1 | ||
2686 | | sel.d FTMP1, FRET1, FRET2 | ||
2687 | |.if "func" == "ceil" | ||
2688 | | cmp.lt.d FRET1, FTMP1, FARG1 // x > result? | ||
2689 | |.else | ||
2690 | | cmp.lt.d FRET1, FARG1, FTMP1 // x < result? | ||
2691 | |.endif | ||
2692 | | sub.d FRET2, FTMP1, f4 // If yes, subtract +-1. | ||
2693 | | jr ra | ||
2694 | |. sel.d FRET1, FTMP1, FRET2 | ||
2695 | |.else | ||
2696 | | movn.d FRET1, FRET2, AT // Merge sign bit back in. | ||
2697 | |.if "func" == "ceil" | ||
2698 | | c.olt.d 0, FRET1, FARG1 // x > result? | ||
2699 | |.else | ||
2700 | | c.olt.d 0, FARG1, FRET1 // x < result? | ||
2701 | |.endif | ||
2702 | | sub.d FRET2, FRET1, f4 // If yes, subtract +-1. | ||
2703 | | jr ra | ||
2704 | |. movt.d FRET1, FRET2, 0 | ||
2705 | |.endif | ||
2706 | |.endif | ||
2707 | |1: | ||
2708 | | jr ra | ||
2709 | |. mov.d FRET1, FARG1 | ||
2710 | |.endmacro | ||
2711 | | | ||
2712 | |.macro vm_round, func | ||
2713 | |.if FPU | ||
2714 | | vm_round_hf, func | ||
2715 | |.endif | ||
2716 | |.endmacro | ||
2717 | | | ||
2718 | |->vm_floor: | ||
2719 | | vm_round floor | ||
2720 | |->vm_ceil: | ||
2721 | | vm_round ceil | ||
2722 | |->vm_trunc: | ||
2723 | |.if JIT | ||
2724 | | vm_round trunc | ||
2725 | |.endif | ||
2726 | | | ||
2727 | |// Soft-float integer to number conversion. | ||
2728 | |.macro sfi2d, ARG | ||
2729 | |.if not FPU | ||
2730 | | beqz ARG, >9 // Handle zero first. | ||
2731 | |. sra TMP0, ARG, 31 | ||
2732 | | xor TMP1, ARG, TMP0 | ||
2733 | | dsubu TMP1, TMP1, TMP0 // Absolute value in TMP1. | ||
2734 | | dclz ARG, TMP1 | ||
2735 | | addiu ARG, ARG, -11 | ||
2736 | | li AT, 0x3ff+63-11-1 | ||
2737 | | dsllv TMP1, TMP1, ARG // Align mantissa left with leading 1. | ||
2738 | | subu ARG, AT, ARG // Exponent - 1. | ||
2739 | | ins ARG, TMP0, 11, 11 // Sign | Exponent. | ||
2740 | | dsll ARG, ARG, 52 // Align left. | ||
2741 | | jr ra | ||
2742 | |. daddu ARG, ARG, TMP1 // Add mantissa, increment exponent. | ||
2743 | |9: | ||
2744 | | jr ra | ||
2745 | |. nop | ||
2746 | |.endif | ||
2747 | |.endmacro | ||
2748 | | | ||
2749 | |// Input CARG1. Output: CARG1. Temporaries: AT, TMP0, TMP1. | ||
2750 | |->vm_sfi2d_1: | ||
2751 | | sfi2d CARG1 | ||
2752 | | | ||
2753 | |// Input CARG2. Output: CARG2. Temporaries: AT, TMP0, TMP1. | ||
2754 | |->vm_sfi2d_2: | ||
2755 | | sfi2d CARG2 | ||
2756 | | | ||
2757 | |// Soft-float comparison. Equivalent to c.eq.d. | ||
2758 | |// Input: CARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1. | ||
2759 | |->vm_sfcmpeq: | ||
2760 | |.if not FPU | ||
2761 | | dsll AT, CARG1, 1 | ||
2762 | | dsll TMP0, CARG2, 1 | ||
2763 | | or TMP1, AT, TMP0 | ||
2764 | | beqz TMP1, >8 // Both args +-0: return 1. | ||
2765 | |. lui TMP1, 0xffe0 | ||
2766 | | dsll TMP1, TMP1, 32 | ||
2767 | | sltu AT, TMP1, AT | ||
2768 | | sltu TMP0, TMP1, TMP0 | ||
2769 | | or TMP1, AT, TMP0 | ||
2770 | | bnez TMP1, >9 // Either arg is NaN: return 0; | ||
2771 | |. xor AT, CARG1, CARG2 | ||
2772 | | jr ra | ||
2773 | |. sltiu CRET1, AT, 1 // Same values: return 1. | ||
2774 | |8: | ||
2775 | | jr ra | ||
2776 | |. li CRET1, 1 | ||
2777 | |9: | ||
2778 | | jr ra | ||
2779 | |. li CRET1, 0 | ||
2780 | |.endif | ||
2781 | | | ||
2782 | |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d. | ||
2783 | |// Input: CARG1, CARG2. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2. | ||
2784 | |->vm_sfcmpult: | ||
2785 | |.if not FPU | ||
2786 | | b >1 | ||
2787 | |. li CRET2, 1 | ||
2788 | |.endif | ||
2789 | | | ||
2790 | |->vm_sfcmpolt: | ||
2791 | |.if not FPU | ||
2792 | | li CRET2, 0 | ||
2793 | |1: | ||
2794 | | dsll AT, CARG1, 1 | ||
2795 | | dsll TMP0, CARG2, 1 | ||
2796 | | or TMP1, AT, TMP0 | ||
2797 | | beqz TMP1, >8 // Both args +-0: return 0. | ||
2798 | |. lui TMP1, 0xffe0 | ||
2799 | | dsll TMP1, TMP1, 32 | ||
2800 | | sltu AT, TMP1, AT | ||
2801 | | sltu TMP0, TMP1, TMP0 | ||
2802 | | or TMP1, AT, TMP0 | ||
2803 | | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; | ||
2804 | |. and AT, CARG1, CARG2 | ||
2805 | | bltz AT, >5 // Both args negative? | ||
2806 | |. nop | ||
2807 | | jr ra | ||
2808 | |. slt CRET1, CARG1, CARG2 | ||
2809 | |5: // Swap conditions if both operands are negative. | ||
2810 | | jr ra | ||
2811 | |. slt CRET1, CARG2, CARG1 | ||
2812 | |8: | ||
2813 | | jr ra | ||
2814 | |. li CRET1, 0 | ||
2815 | |9: | ||
2816 | | jr ra | ||
2817 | |. move CRET1, CRET2 | ||
2818 | |.endif | ||
2819 | | | ||
2820 | |->vm_sfcmpogt: | ||
2821 | |.if not FPU | ||
2822 | | dsll AT, CARG2, 1 | ||
2823 | | dsll TMP0, CARG1, 1 | ||
2824 | | or TMP1, AT, TMP0 | ||
2825 | | beqz TMP1, >8 // Both args +-0: return 0. | ||
2826 | |. lui TMP1, 0xffe0 | ||
2827 | | dsll TMP1, TMP1, 32 | ||
2828 | | sltu AT, TMP1, AT | ||
2829 | | sltu TMP0, TMP1, TMP0 | ||
2830 | | or TMP1, AT, TMP0 | ||
2831 | | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; | ||
2832 | |. and AT, CARG2, CARG1 | ||
2833 | | bltz AT, >5 // Both args negative? | ||
2834 | |. nop | ||
2835 | | jr ra | ||
2836 | |. slt CRET1, CARG2, CARG1 | ||
2837 | |5: // Swap conditions if both operands are negative. | ||
2838 | | jr ra | ||
2839 | |. slt CRET1, CARG1, CARG2 | ||
2840 | |8: | ||
2841 | | jr ra | ||
2842 | |. li CRET1, 0 | ||
2843 | |9: | ||
2844 | | jr ra | ||
2845 | |. li CRET1, 0 | ||
2846 | |.endif | ||
2847 | | | ||
2848 | |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. | ||
2849 | |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. | ||
2850 | |->vm_sfcmpolex: | ||
2851 | |.if not FPU | ||
2852 | | dsll AT, CARG1, 1 | ||
2853 | | dsll TMP0, CARG2, 1 | ||
2854 | | or TMP1, AT, TMP0 | ||
2855 | | beqz TMP1, >8 // Both args +-0: return 1. | ||
2856 | |. lui TMP1, 0xffe0 | ||
2857 | | dsll TMP1, TMP1, 32 | ||
2858 | | sltu AT, TMP1, AT | ||
2859 | | sltu TMP0, TMP1, TMP0 | ||
2860 | | or TMP1, AT, TMP0 | ||
2861 | | bnez TMP1, >9 // Either arg is NaN: return 0; | ||
2862 | |. and AT, CARG1, CARG2 | ||
2863 | | xor AT, AT, TMP3 | ||
2864 | | bltz AT, >5 // Both args negative? | ||
2865 | |. nop | ||
2866 | | jr ra | ||
2867 | |. slt CRET1, CARG2, CARG1 | ||
2868 | |5: // Swap conditions if both operands are negative. | ||
2869 | | jr ra | ||
2870 | |. slt CRET1, CARG1, CARG2 | ||
2871 | |8: | ||
2872 | | jr ra | ||
2873 | |. li CRET1, 1 | ||
2874 | |9: | ||
2875 | | jr ra | ||
2876 | |. li CRET1, 0 | ||
2877 | |.endif | ||
2878 | | | ||
2879 | |.macro sfmin_max, name, fpcall | ||
2880 | |->vm_sf .. name: | ||
2881 | |.if JIT and not FPU | ||
2882 | | move TMP2, ra | ||
2883 | | bal ->fpcall | ||
2884 | |. nop | ||
2885 | | move ra, TMP2 | ||
2886 | | move TMP0, CRET1 | ||
2887 | | move CRET1, CARG1 | ||
2888 | |.if MIPSR6 | ||
2889 | | selnez CRET1, CRET1, TMP0 | ||
2890 | | seleqz TMP0, CARG2, TMP0 | ||
2891 | | jr ra | ||
2892 | |. or CRET1, CRET1, TMP0 | ||
2893 | |.else | ||
2894 | | jr ra | ||
2895 | |. movz CRET1, CARG2, TMP0 | ||
2896 | |.endif | ||
2897 | |.endif | ||
2898 | |.endmacro | ||
2899 | | | ||
2900 | | sfmin_max min, vm_sfcmpolt | ||
2901 | | sfmin_max max, vm_sfcmpogt | ||
2902 | | | ||
2903 | |//----------------------------------------------------------------------- | ||
2904 | |//-- Miscellaneous functions -------------------------------------------- | ||
2905 | |//----------------------------------------------------------------------- | ||
2906 | | | ||
2907 | |.define NEXT_TAB, TAB:CARG1 | ||
2908 | |.define NEXT_IDX, CARG2 | ||
2909 | |.define NEXT_ASIZE, CARG3 | ||
2910 | |.define NEXT_NIL, CARG4 | ||
2911 | |.define NEXT_TMP0, r12 | ||
2912 | |.define NEXT_TMP1, r13 | ||
2913 | |.define NEXT_TMP2, r14 | ||
2914 | |.define NEXT_RES_VK, CRET1 | ||
2915 | |.define NEXT_RES_IDX, CRET2 | ||
2916 | |.define NEXT_RES_PTR, sp | ||
2917 | |.define NEXT_RES_VAL, 0(sp) | ||
2918 | |.define NEXT_RES_KEY, 8(sp) | ||
2919 | | | ||
2920 | |// TValue *lj_vm_next(GCtab *t, uint32_t idx) | ||
2921 | |// Next idx returned in CRET2. | ||
2922 | |->vm_next: | ||
2923 | |.if JIT and ENDIAN_LE | ||
2924 | | lw NEXT_ASIZE, NEXT_TAB->asize | ||
2925 | | ld NEXT_TMP0, NEXT_TAB->array | ||
2926 | | li NEXT_NIL, LJ_TNIL | ||
2927 | |1: // Traverse array part. | ||
2928 | | sltu AT, NEXT_IDX, NEXT_ASIZE | ||
2929 | | sll NEXT_TMP1, NEXT_IDX, 3 | ||
2930 | | beqz AT, >5 | ||
2931 | |. daddu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1 | ||
2932 | | li AT, LJ_TISNUM | ||
2933 | | ld NEXT_TMP2, 0(NEXT_TMP1) | ||
2934 | | dsll AT, AT, 47 | ||
2935 | | or NEXT_TMP1, NEXT_IDX, AT | ||
2936 | | beq NEXT_TMP2, NEXT_NIL, <1 | ||
2937 | |. addiu NEXT_IDX, NEXT_IDX, 1 | ||
2938 | | sd NEXT_TMP2, NEXT_RES_VAL | ||
2939 | | sd NEXT_TMP1, NEXT_RES_KEY | ||
2940 | | move NEXT_RES_VK, NEXT_RES_PTR | ||
2941 | | jr ra | ||
2942 | |. move NEXT_RES_IDX, NEXT_IDX | ||
2943 | | | ||
2944 | |5: // Traverse hash part. | ||
2945 | | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE | ||
2946 | | ld NODE:NEXT_RES_VK, NEXT_TAB->node | ||
2947 | | sll NEXT_TMP2, NEXT_RES_IDX, 5 | ||
2948 | | lw NEXT_TMP0, NEXT_TAB->hmask | ||
2949 | | sll AT, NEXT_RES_IDX, 3 | ||
2950 | | subu AT, NEXT_TMP2, AT | ||
2951 | | daddu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT | ||
2952 | |6: | ||
2953 | | sltu AT, NEXT_TMP0, NEXT_RES_IDX | ||
2954 | | bnez AT, >8 | ||
2955 | |. nop | ||
2956 | | ld NEXT_TMP2, NODE:NEXT_RES_VK->val | ||
2957 | | bne NEXT_TMP2, NEXT_NIL, >9 | ||
2958 | |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1 | ||
2959 | | // Skip holes in hash part. | ||
2960 | | b <6 | ||
2961 | |. daddiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node) | ||
2962 | | | ||
2963 | |8: // End of iteration. Set the key to nil (not the value). | ||
2964 | | sd NEXT_NIL, NEXT_RES_KEY | ||
2965 | | move NEXT_RES_VK, NEXT_RES_PTR | ||
2966 | |9: | ||
2967 | | jr ra | ||
2968 | |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE | ||
2969 | |.endif | ||
2970 | | | ||
2971 | |//----------------------------------------------------------------------- | ||
2972 | |//-- FFI helper functions ----------------------------------------------- | ||
2973 | |//----------------------------------------------------------------------- | ||
2974 | | | ||
2975 | |// Handler for callback functions. Callback slot number in r1, g in r2. | ||
2976 | |->vm_ffi_callback: | ||
2977 | |.if FFI | ||
2978 | |.type CTSTATE, CTState, PC | ||
2979 | | saveregs | ||
2980 | | ld CTSTATE, GL:r2->ctype_state | ||
2981 | | daddiu DISPATCH, r2, GG_G2DISP | ||
2982 | | load_got lj_ccallback_enter | ||
2983 | | sw r1, CTSTATE->cb.slot | ||
2984 | | sd CARG1, CTSTATE->cb.gpr[0] | ||
2985 | | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0] | ||
2986 | | sd CARG2, CTSTATE->cb.gpr[1] | ||
2987 | | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1] | ||
2988 | | sd CARG3, CTSTATE->cb.gpr[2] | ||
2989 | | .FPU sdc1 FARG3, CTSTATE->cb.fpr[2] | ||
2990 | | sd CARG4, CTSTATE->cb.gpr[3] | ||
2991 | | .FPU sdc1 FARG4, CTSTATE->cb.fpr[3] | ||
2992 | | sd CARG5, CTSTATE->cb.gpr[4] | ||
2993 | | .FPU sdc1 FARG5, CTSTATE->cb.fpr[4] | ||
2994 | | sd CARG6, CTSTATE->cb.gpr[5] | ||
2995 | | .FPU sdc1 FARG6, CTSTATE->cb.fpr[5] | ||
2996 | | sd CARG7, CTSTATE->cb.gpr[6] | ||
2997 | | .FPU sdc1 FARG7, CTSTATE->cb.fpr[6] | ||
2998 | | sd CARG8, CTSTATE->cb.gpr[7] | ||
2999 | | .FPU sdc1 FARG8, CTSTATE->cb.fpr[7] | ||
3000 | | daddiu TMP0, sp, CFRAME_SPACE | ||
3001 | | sd TMP0, CTSTATE->cb.stack | ||
3002 | | sd r0, SAVE_PC // Any value outside of bytecode is ok. | ||
3003 | | move CARG2, sp | ||
3004 | | call_intern lj_ccallback_enter // (CTState *cts, void *cf) | ||
3005 | |. move CARG1, CTSTATE | ||
3006 | | // Returns lua_State *. | ||
3007 | | ld BASE, L:CRET1->base | ||
3008 | | ld RC, L:CRET1->top | ||
3009 | | move L, CRET1 | ||
3010 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | ||
3011 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
3012 | | .FPU mtc1 TMP3, TOBIT | ||
3013 | | li TISNIL, LJ_TNIL | ||
3014 | | li TISNUM, LJ_TISNUM | ||
3015 | | li_vmstate INTERP | ||
3016 | | subu RC, RC, BASE | ||
3017 | | cleartp LFUNC:RB | ||
3018 | | st_vmstate | ||
3019 | | .FPU cvt.d.s TOBIT, TOBIT | ||
3020 | | ins_callt | ||
3021 | |.endif | ||
3022 | | | ||
3023 | |->cont_ffi_callback: // Return from FFI callback. | ||
3024 | |.if FFI | ||
3025 | | load_got lj_ccallback_leave | ||
3026 | | ld CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) | ||
3027 | | sd BASE, L->base | ||
3028 | | sd RB, L->top | ||
3029 | | sd L, CTSTATE->L | ||
3030 | | move CARG2, RA | ||
3031 | | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) | ||
3032 | |. move CARG1, CTSTATE | ||
3033 | | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0] | ||
3034 | | ld CRET1, CTSTATE->cb.gpr[0] | ||
3035 | | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1] | ||
3036 | | b ->vm_leave_unw | ||
3037 | |. ld CRET2, CTSTATE->cb.gpr[1] | ||
3038 | |.endif | ||
3039 | | | ||
3040 | |->vm_ffi_call: // Call C function via FFI. | ||
3041 | | // Caveat: needs special frame unwinding, see below. | ||
3042 | |.if FFI | ||
3043 | | .type CCSTATE, CCallState, CARG1 | ||
3044 | | lw TMP1, CCSTATE->spadj | ||
3045 | | lbu CARG2, CCSTATE->nsp | ||
3046 | | move TMP2, sp | ||
3047 | | dsubu sp, sp, TMP1 | ||
3048 | | sd ra, -8(TMP2) | ||
3049 | | sll CARG2, CARG2, 3 | ||
3050 | | sd r16, -16(TMP2) | ||
3051 | | sd CCSTATE, -24(TMP2) | ||
3052 | | move r16, TMP2 | ||
3053 | | daddiu TMP1, CCSTATE, offsetof(CCallState, stack) | ||
3054 | | move TMP2, sp | ||
3055 | | beqz CARG2, >2 | ||
3056 | |. daddu TMP3, TMP1, CARG2 | ||
3057 | |1: | ||
3058 | | ld TMP0, 0(TMP1) | ||
3059 | | daddiu TMP1, TMP1, 8 | ||
3060 | | sltu AT, TMP1, TMP3 | ||
3061 | | sd TMP0, 0(TMP2) | ||
3062 | | bnez AT, <1 | ||
3063 | |. daddiu TMP2, TMP2, 8 | ||
3064 | |2: | ||
3065 | | ld CFUNCADDR, CCSTATE->func | ||
3066 | | .FPU ldc1 FARG1, CCSTATE->gpr[0] | ||
3067 | | ld CARG2, CCSTATE->gpr[1] | ||
3068 | | .FPU ldc1 FARG2, CCSTATE->gpr[1] | ||
3069 | | ld CARG3, CCSTATE->gpr[2] | ||
3070 | | .FPU ldc1 FARG3, CCSTATE->gpr[2] | ||
3071 | | ld CARG4, CCSTATE->gpr[3] | ||
3072 | | .FPU ldc1 FARG4, CCSTATE->gpr[3] | ||
3073 | | ld CARG5, CCSTATE->gpr[4] | ||
3074 | | .FPU ldc1 FARG5, CCSTATE->gpr[4] | ||
3075 | | ld CARG6, CCSTATE->gpr[5] | ||
3076 | | .FPU ldc1 FARG6, CCSTATE->gpr[5] | ||
3077 | | ld CARG7, CCSTATE->gpr[6] | ||
3078 | | .FPU ldc1 FARG7, CCSTATE->gpr[6] | ||
3079 | | ld CARG8, CCSTATE->gpr[7] | ||
3080 | | .FPU ldc1 FARG8, CCSTATE->gpr[7] | ||
3081 | | jalr CFUNCADDR | ||
3082 | |. ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. | ||
3083 | | ld CCSTATE:TMP1, -24(r16) | ||
3084 | | ld TMP2, -16(r16) | ||
3085 | | ld ra, -8(r16) | ||
3086 | | sd CRET1, CCSTATE:TMP1->gpr[0] | ||
3087 | | sd CRET2, CCSTATE:TMP1->gpr[1] | ||
3088 | |.if FPU | ||
3089 | | sdc1 FRET1, CCSTATE:TMP1->fpr[0] | ||
3090 | | sdc1 FRET2, CCSTATE:TMP1->fpr[1] | ||
3091 | |.else | ||
3092 | | sd CARG1, CCSTATE:TMP1->gpr[2] // 2nd FP struct field for soft-float. | ||
3093 | |.endif | ||
3094 | | move sp, r16 | ||
3095 | | jr ra | ||
3096 | |. move r16, TMP2 | ||
3097 | |.endif | ||
3098 | |// Note: vm_ffi_call must be the last function in this object file! | ||
3099 | | | ||
3100 | |//----------------------------------------------------------------------- | ||
3101 | } | ||
3102 | |||
3103 | /* Generate the code for a single instruction. */ | ||
3104 | static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ||
3105 | { | ||
3106 | int vk = 0; | ||
3107 | |=>defop: | ||
3108 | |||
3109 | switch (op) { | ||
3110 | |||
3111 | /* -- Comparison ops ---------------------------------------------------- */ | ||
3112 | |||
3113 | /* Remember: all ops branch for a true comparison, fall through otherwise. */ | ||
3114 | |||
3115 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | ||
3116 | | // RA = src1*8, RD = src2*8, JMP with RD = target | ||
3117 | |.macro bc_comp, FRA, FRD, ARGRA, ARGRD, movop, fmovop, fcomp, sfcomp | ||
3118 | | daddu RA, BASE, RA | ||
3119 | | daddu RD, BASE, RD | ||
3120 | | ld ARGRA, 0(RA) | ||
3121 | | ld ARGRD, 0(RD) | ||
3122 | | lhu TMP2, OFS_RD(PC) | ||
3123 | | gettp CARG3, ARGRA | ||
3124 | | gettp CARG4, ARGRD | ||
3125 | | bne CARG3, TISNUM, >2 | ||
3126 | |. daddiu PC, PC, 4 | ||
3127 | | bne CARG4, TISNUM, >5 | ||
3128 | |. decode_RD4b TMP2 | ||
3129 | | sextw ARGRA, ARGRA | ||
3130 | | sextw ARGRD, ARGRD | ||
3131 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3132 | | slt AT, CARG1, CARG2 | ||
3133 | | addu TMP2, TMP2, TMP3 | ||
3134 | |.if MIPSR6 | ||
3135 | | movop TMP2, TMP2, AT | ||
3136 | |.else | ||
3137 | | movop TMP2, r0, AT | ||
3138 | |.endif | ||
3139 | |1: | ||
3140 | | daddu PC, PC, TMP2 | ||
3141 | | ins_next | ||
3142 | | | ||
3143 | |2: // RA is not an integer. | ||
3144 | | sltiu AT, CARG3, LJ_TISNUM | ||
3145 | | beqz AT, ->vmeta_comp | ||
3146 | |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3147 | | sltiu AT, CARG4, LJ_TISNUM | ||
3148 | | beqz AT, >4 | ||
3149 | |. decode_RD4b TMP2 | ||
3150 | |.if FPU | ||
3151 | | ldc1 FRA, 0(RA) | ||
3152 | | ldc1 FRD, 0(RD) | ||
3153 | |.endif | ||
3154 | |3: // RA and RD are both numbers. | ||
3155 | |.if FPU | ||
3156 | |.if MIPSR6 | ||
3157 | | fcomp FTMP0, FTMP0, FTMP2 | ||
3158 | | addu TMP2, TMP2, TMP3 | ||
3159 | | mfc1 TMP3, FTMP0 | ||
3160 | | b <1 | ||
3161 | |. fmovop TMP2, TMP2, TMP3 | ||
3162 | |.else | ||
3163 | | fcomp FTMP0, FTMP2 | ||
3164 | | addu TMP2, TMP2, TMP3 | ||
3165 | | b <1 | ||
3166 | |. fmovop TMP2, r0 | ||
3167 | |.endif | ||
3168 | |.else | ||
3169 | | bal sfcomp | ||
3170 | |. addu TMP2, TMP2, TMP3 | ||
3171 | | b <1 | ||
3172 | |.if MIPSR6 | ||
3173 | |. movop TMP2, TMP2, CRET1 | ||
3174 | |.else | ||
3175 | |. movop TMP2, r0, CRET1 | ||
3176 | |.endif | ||
3177 | |.endif | ||
3178 | | | ||
3179 | |4: // RA is a number, RD is not a number. | ||
3180 | | bne CARG4, TISNUM, ->vmeta_comp | ||
3181 | | // RA is a number, RD is an integer. Convert RD to a number. | ||
3182 | |.if FPU | ||
3183 | |. lwc1 FRD, LO(RD) | ||
3184 | | ldc1 FRA, 0(RA) | ||
3185 | | b <3 | ||
3186 | |. cvt.d.w FRD, FRD | ||
3187 | |.else | ||
3188 | |.if "ARGRD" == "CARG1" | ||
3189 | |. sextw CARG1, CARG1 | ||
3190 | | bal ->vm_sfi2d_1 | ||
3191 | |. nop | ||
3192 | |.else | ||
3193 | |. sextw CARG2, CARG2 | ||
3194 | | bal ->vm_sfi2d_2 | ||
3195 | |. nop | ||
3196 | |.endif | ||
3197 | | b <3 | ||
3198 | |. nop | ||
3199 | |.endif | ||
3200 | | | ||
3201 | |5: // RA is an integer, RD is not an integer | ||
3202 | | sltiu AT, CARG4, LJ_TISNUM | ||
3203 | | beqz AT, ->vmeta_comp | ||
3204 | |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3205 | | // RA is an integer, RD is a number. Convert RA to a number. | ||
3206 | |.if FPU | ||
3207 | | lwc1 FRA, LO(RA) | ||
3208 | | ldc1 FRD, 0(RD) | ||
3209 | | b <3 | ||
3210 | | cvt.d.w FRA, FRA | ||
3211 | |.else | ||
3212 | |.if "ARGRA" == "CARG1" | ||
3213 | | bal ->vm_sfi2d_1 | ||
3214 | |. sextw CARG1, CARG1 | ||
3215 | |.else | ||
3216 | | bal ->vm_sfi2d_2 | ||
3217 | |. sextw CARG2, CARG2 | ||
3218 | |.endif | ||
3219 | | b <3 | ||
3220 | |. nop | ||
3221 | |.endif | ||
3222 | |.endmacro | ||
3223 | | | ||
3224 | |.if MIPSR6 | ||
3225 | if (op == BC_ISLT) { | ||
3226 | | bc_comp FTMP0, FTMP2, CARG1, CARG2, selnez, selnez, cmp.lt.d, ->vm_sfcmpolt | ||
3227 | } else if (op == BC_ISGE) { | ||
3228 | | bc_comp FTMP0, FTMP2, CARG1, CARG2, seleqz, seleqz, cmp.lt.d, ->vm_sfcmpolt | ||
3229 | } else if (op == BC_ISLE) { | ||
3230 | | bc_comp FTMP2, FTMP0, CARG2, CARG1, seleqz, seleqz, cmp.ult.d, ->vm_sfcmpult | ||
3231 | } else { | ||
3232 | | bc_comp FTMP2, FTMP0, CARG2, CARG1, selnez, selnez, cmp.ult.d, ->vm_sfcmpult | ||
3233 | } | ||
3234 | |.else | ||
3235 | if (op == BC_ISLT) { | ||
3236 | | bc_comp FTMP0, FTMP2, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt | ||
3237 | } else if (op == BC_ISGE) { | ||
3238 | | bc_comp FTMP0, FTMP2, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt | ||
3239 | } else if (op == BC_ISLE) { | ||
3240 | | bc_comp FTMP2, FTMP0, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult | ||
3241 | } else { | ||
3242 | | bc_comp FTMP2, FTMP0, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult | ||
3243 | } | ||
3244 | |.endif | ||
3245 | break; | ||
3246 | |||
3247 | case BC_ISEQV: case BC_ISNEV: | ||
3248 | vk = op == BC_ISEQV; | ||
3249 | | // RA = src1*8, RD = src2*8, JMP with RD = target | ||
3250 | | daddu RA, BASE, RA | ||
3251 | | daddiu PC, PC, 4 | ||
3252 | | daddu RD, BASE, RD | ||
3253 | | ld CARG1, 0(RA) | ||
3254 | | lhu TMP2, -4+OFS_RD(PC) | ||
3255 | | ld CARG2, 0(RD) | ||
3256 | | gettp CARG3, CARG1 | ||
3257 | | gettp CARG4, CARG2 | ||
3258 | | sltu AT, TISNUM, CARG3 | ||
3259 | | sltu TMP1, TISNUM, CARG4 | ||
3260 | | or AT, AT, TMP1 | ||
3261 | if (vk) { | ||
3262 | | beqz AT, ->BC_ISEQN_Z | ||
3263 | } else { | ||
3264 | | beqz AT, ->BC_ISNEN_Z | ||
3265 | } | ||
3266 | | // Either or both types are not numbers. | ||
3267 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3268 | |.if FFI | ||
3269 | |. li AT, LJ_TCDATA | ||
3270 | | beq CARG3, AT, ->vmeta_equal_cd | ||
3271 | |.endif | ||
3272 | | decode_RD4b TMP2 | ||
3273 | |.if FFI | ||
3274 | | beq CARG4, AT, ->vmeta_equal_cd | ||
3275 | |. nop | ||
3276 | |.endif | ||
3277 | | bne CARG1, CARG2, >2 | ||
3278 | |. addu TMP2, TMP2, TMP3 | ||
3279 | | // Tag and value are equal. | ||
3280 | if (vk) { | ||
3281 | |->BC_ISEQV_Z: | ||
3282 | | daddu PC, PC, TMP2 | ||
3283 | } | ||
3284 | |1: | ||
3285 | | ins_next | ||
3286 | | | ||
3287 | |2: // Check if the tags are the same and it's a table or userdata. | ||
3288 | | xor AT, CARG3, CARG4 // Same type? | ||
3289 | | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? | ||
3290 | |.if MIPSR6 | ||
3291 | | seleqz TMP0, TMP0, AT | ||
3292 | |.else | ||
3293 | | movn TMP0, r0, AT | ||
3294 | |.endif | ||
3295 | if (vk) { | ||
3296 | | beqz TMP0, <1 | ||
3297 | } else { | ||
3298 | | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction. | ||
3299 | } | ||
3300 | | // Different tables or userdatas. Need to check __eq metamethod. | ||
3301 | | // Field metatable must be at same offset for GCtab and GCudata! | ||
3302 | |. cleartp TAB:TMP1, CARG1 | ||
3303 | | ld TAB:TMP3, TAB:TMP1->metatable | ||
3304 | if (vk) { | ||
3305 | | beqz TAB:TMP3, <1 // No metatable? | ||
3306 | |. nop | ||
3307 | | lbu TMP3, TAB:TMP3->nomm | ||
3308 | | andi TMP3, TMP3, 1<<MM_eq | ||
3309 | | bnez TMP3, >1 // Or 'no __eq' flag set? | ||
3310 | } else { | ||
3311 | | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable? | ||
3312 | |. nop | ||
3313 | | lbu TMP3, TAB:TMP3->nomm | ||
3314 | | andi TMP3, TMP3, 1<<MM_eq | ||
3315 | | bnez TMP3, ->BC_ISEQV_Z // Or 'no __eq' flag set? | ||
3316 | } | ||
3317 | |. nop | ||
3318 | | b ->vmeta_equal // Handle __eq metamethod. | ||
3319 | |. li TMP0, 1-vk // ne = 0 or 1. | ||
3320 | break; | ||
3321 | |||
3322 | case BC_ISEQS: case BC_ISNES: | ||
3323 | vk = op == BC_ISEQS; | ||
3324 | | // RA = src*8, RD = str_const*8 (~), JMP with RD = target | ||
3325 | | daddu RA, BASE, RA | ||
3326 | | daddiu PC, PC, 4 | ||
3327 | | ld CARG1, 0(RA) | ||
3328 | | dsubu RD, KBASE, RD | ||
3329 | | lhu TMP2, -4+OFS_RD(PC) | ||
3330 | | ld CARG2, -8(RD) // KBASE-8-str_const*8 | ||
3331 | |.if FFI | ||
3332 | | gettp TMP0, CARG1 | ||
3333 | | li AT, LJ_TCDATA | ||
3334 | |.endif | ||
3335 | | li TMP1, LJ_TSTR | ||
3336 | | decode_RD4b TMP2 | ||
3337 | |.if FFI | ||
3338 | | beq TMP0, AT, ->vmeta_equal_cd | ||
3339 | |.endif | ||
3340 | |. settp CARG2, TMP1 | ||
3341 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3342 | | xor TMP1, CARG1, CARG2 | ||
3343 | | addu TMP2, TMP2, TMP3 | ||
3344 | |.if MIPSR6 | ||
3345 | if (vk) { | ||
3346 | | seleqz TMP2, TMP2, TMP1 | ||
3347 | } else { | ||
3348 | | selnez TMP2, TMP2, TMP1 | ||
3349 | } | ||
3350 | |.else | ||
3351 | if (vk) { | ||
3352 | | movn TMP2, r0, TMP1 | ||
3353 | } else { | ||
3354 | | movz TMP2, r0, TMP1 | ||
3355 | } | ||
3356 | |.endif | ||
3357 | | daddu PC, PC, TMP2 | ||
3358 | | ins_next | ||
3359 | break; | ||
3360 | |||
3361 | case BC_ISEQN: case BC_ISNEN: | ||
3362 | vk = op == BC_ISEQN; | ||
3363 | | // RA = src*8, RD = num_const*8, JMP with RD = target | ||
3364 | | daddu RA, BASE, RA | ||
3365 | | daddu RD, KBASE, RD | ||
3366 | | ld CARG1, 0(RA) | ||
3367 | | ld CARG2, 0(RD) | ||
3368 | | lhu TMP2, OFS_RD(PC) | ||
3369 | | gettp CARG3, CARG1 | ||
3370 | | gettp CARG4, CARG2 | ||
3371 | | daddiu PC, PC, 4 | ||
3372 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3373 | if (vk) { | ||
3374 | |->BC_ISEQN_Z: | ||
3375 | } else { | ||
3376 | |->BC_ISNEN_Z: | ||
3377 | } | ||
3378 | | bne CARG3, TISNUM, >3 | ||
3379 | |. decode_RD4b TMP2 | ||
3380 | | bne CARG4, TISNUM, >6 | ||
3381 | |. addu TMP2, TMP2, TMP3 | ||
3382 | | xor AT, CARG1, CARG2 | ||
3383 | |.if MIPSR6 | ||
3384 | if (vk) { | ||
3385 | | seleqz TMP2, TMP2, AT | ||
3386 | |1: | ||
3387 | | daddu PC, PC, TMP2 | ||
3388 | |2: | ||
3389 | } else { | ||
3390 | | selnez TMP2, TMP2, AT | ||
3391 | |1: | ||
3392 | |2: | ||
3393 | | daddu PC, PC, TMP2 | ||
3394 | } | ||
3395 | |.else | ||
3396 | if (vk) { | ||
3397 | | movn TMP2, r0, AT | ||
3398 | |1: | ||
3399 | | daddu PC, PC, TMP2 | ||
3400 | |2: | ||
3401 | } else { | ||
3402 | | movz TMP2, r0, AT | ||
3403 | |1: | ||
3404 | |2: | ||
3405 | | daddu PC, PC, TMP2 | ||
3406 | } | ||
3407 | |.endif | ||
3408 | | ins_next | ||
3409 | | | ||
3410 | |3: // RA is not an integer. | ||
3411 | | sltu AT, CARG3, TISNUM | ||
3412 | |.if FFI | ||
3413 | | beqz AT, >8 | ||
3414 | |.else | ||
3415 | | beqz AT, <2 | ||
3416 | |.endif | ||
3417 | |. addu TMP2, TMP2, TMP3 | ||
3418 | | sltu AT, CARG4, TISNUM | ||
3419 | |.if FPU | ||
3420 | | ldc1 FTMP0, 0(RA) | ||
3421 | | ldc1 FTMP2, 0(RD) | ||
3422 | |.endif | ||
3423 | | beqz AT, >5 | ||
3424 | |. nop | ||
3425 | |4: // RA and RD are both numbers. | ||
3426 | |.if FPU | ||
3427 | |.if MIPSR6 | ||
3428 | | cmp.eq.d FTMP0, FTMP0, FTMP2 | ||
3429 | | dmfc1 TMP1, FTMP0 | ||
3430 | | b <1 | ||
3431 | if (vk) { | ||
3432 | |. selnez TMP2, TMP2, TMP1 | ||
3433 | } else { | ||
3434 | |. seleqz TMP2, TMP2, TMP1 | ||
3435 | } | ||
3436 | |.else | ||
3437 | | c.eq.d FTMP0, FTMP2 | ||
3438 | | b <1 | ||
3439 | if (vk) { | ||
3440 | |. movf TMP2, r0 | ||
3441 | } else { | ||
3442 | |. movt TMP2, r0 | ||
3443 | } | ||
3444 | |.endif | ||
3445 | |.else | ||
3446 | | bal ->vm_sfcmpeq | ||
3447 | |. nop | ||
3448 | | b <1 | ||
3449 | |.if MIPSR6 | ||
3450 | if (vk) { | ||
3451 | |. selnez TMP2, TMP2, CRET1 | ||
3452 | } else { | ||
3453 | |. seleqz TMP2, TMP2, CRET1 | ||
3454 | } | ||
3455 | |.else | ||
3456 | if (vk) { | ||
3457 | |. movz TMP2, r0, CRET1 | ||
3458 | } else { | ||
3459 | |. movn TMP2, r0, CRET1 | ||
3460 | } | ||
3461 | |.endif | ||
3462 | |.endif | ||
3463 | | | ||
3464 | |5: // RA is a number, RD is not a number. | ||
3465 | |.if FFI | ||
3466 | | bne CARG4, TISNUM, >9 | ||
3467 | |.else | ||
3468 | | bne CARG4, TISNUM, <2 | ||
3469 | |.endif | ||
3470 | | // RA is a number, RD is an integer. Convert RD to a number. | ||
3471 | |.if FPU | ||
3472 | |. lwc1 FTMP2, LO(RD) | ||
3473 | | b <4 | ||
3474 | |. cvt.d.w FTMP2, FTMP2 | ||
3475 | |.else | ||
3476 | |. sextw CARG2, CARG2 | ||
3477 | | bal ->vm_sfi2d_2 | ||
3478 | |. nop | ||
3479 | | b <4 | ||
3480 | |. nop | ||
3481 | |.endif | ||
3482 | | | ||
3483 | |6: // RA is an integer, RD is not an integer | ||
3484 | | sltu AT, CARG4, TISNUM | ||
3485 | |.if FFI | ||
3486 | | beqz AT, >9 | ||
3487 | |.else | ||
3488 | | beqz AT, <2 | ||
3489 | |.endif | ||
3490 | | // RA is an integer, RD is a number. Convert RA to a number. | ||
3491 | |.if FPU | ||
3492 | |. lwc1 FTMP0, LO(RA) | ||
3493 | | ldc1 FTMP2, 0(RD) | ||
3494 | | b <4 | ||
3495 | | cvt.d.w FTMP0, FTMP0 | ||
3496 | |.else | ||
3497 | |. sextw CARG1, CARG1 | ||
3498 | | bal ->vm_sfi2d_1 | ||
3499 | |. nop | ||
3500 | | b <4 | ||
3501 | |. nop | ||
3502 | |.endif | ||
3503 | | | ||
3504 | |.if FFI | ||
3505 | |8: | ||
3506 | | li AT, LJ_TCDATA | ||
3507 | | bne CARG3, AT, <2 | ||
3508 | |. nop | ||
3509 | | b ->vmeta_equal_cd | ||
3510 | |. nop | ||
3511 | |9: | ||
3512 | | li AT, LJ_TCDATA | ||
3513 | | bne CARG4, AT, <2 | ||
3514 | |. nop | ||
3515 | | b ->vmeta_equal_cd | ||
3516 | |. nop | ||
3517 | |.endif | ||
3518 | break; | ||
3519 | |||
3520 | case BC_ISEQP: case BC_ISNEP: | ||
3521 | vk = op == BC_ISEQP; | ||
3522 | | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target | ||
3523 | | daddu RA, BASE, RA | ||
3524 | | srl TMP1, RD, 3 | ||
3525 | | ld TMP0, 0(RA) | ||
3526 | | lhu TMP2, OFS_RD(PC) | ||
3527 | | not TMP1, TMP1 | ||
3528 | | gettp TMP0, TMP0 | ||
3529 | | daddiu PC, PC, 4 | ||
3530 | |.if FFI | ||
3531 | | li AT, LJ_TCDATA | ||
3532 | | beq TMP0, AT, ->vmeta_equal_cd | ||
3533 | |.endif | ||
3534 | |. xor TMP0, TMP0, TMP1 | ||
3535 | | decode_RD4b TMP2 | ||
3536 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3537 | | addu TMP2, TMP2, TMP3 | ||
3538 | |.if MIPSR6 | ||
3539 | if (vk) { | ||
3540 | | seleqz TMP2, TMP2, TMP0 | ||
3541 | } else { | ||
3542 | | selnez TMP2, TMP2, TMP0 | ||
3543 | } | ||
3544 | |.else | ||
3545 | if (vk) { | ||
3546 | | movn TMP2, r0, TMP0 | ||
3547 | } else { | ||
3548 | | movz TMP2, r0, TMP0 | ||
3549 | } | ||
3550 | |.endif | ||
3551 | | daddu PC, PC, TMP2 | ||
3552 | | ins_next | ||
3553 | break; | ||
3554 | |||
3555 | /* -- Unary test and copy ops ------------------------------------------- */ | ||
3556 | |||
3557 | case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: | ||
3558 | | // RA = dst*8 or unused, RD = src*8, JMP with RD = target | ||
3559 | | daddu RD, BASE, RD | ||
3560 | | lhu TMP2, OFS_RD(PC) | ||
3561 | | ld TMP0, 0(RD) | ||
3562 | | daddiu PC, PC, 4 | ||
3563 | | gettp TMP0, TMP0 | ||
3564 | | sltiu TMP0, TMP0, LJ_TISTRUECOND | ||
3565 | if (op == BC_IST || op == BC_ISF) { | ||
3566 | | decode_RD4b TMP2 | ||
3567 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3568 | | addu TMP2, TMP2, TMP3 | ||
3569 | |.if MIPSR6 | ||
3570 | if (op == BC_IST) { | ||
3571 | | selnez TMP2, TMP2, TMP0; | ||
3572 | } else { | ||
3573 | | seleqz TMP2, TMP2, TMP0; | ||
3574 | } | ||
3575 | |.else | ||
3576 | if (op == BC_IST) { | ||
3577 | | movz TMP2, r0, TMP0 | ||
3578 | } else { | ||
3579 | | movn TMP2, r0, TMP0 | ||
3580 | } | ||
3581 | |.endif | ||
3582 | | daddu PC, PC, TMP2 | ||
3583 | } else { | ||
3584 | | ld CRET1, 0(RD) | ||
3585 | if (op == BC_ISTC) { | ||
3586 | | beqz TMP0, >1 | ||
3587 | } else { | ||
3588 | | bnez TMP0, >1 | ||
3589 | } | ||
3590 | |. daddu RA, BASE, RA | ||
3591 | | decode_RD4b TMP2 | ||
3592 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
3593 | | addu TMP2, TMP2, TMP3 | ||
3594 | | sd CRET1, 0(RA) | ||
3595 | | daddu PC, PC, TMP2 | ||
3596 | |1: | ||
3597 | } | ||
3598 | | ins_next | ||
3599 | break; | ||
3600 | |||
3601 | case BC_ISTYPE: | ||
3602 | | // RA = src*8, RD = -type*8 | ||
3603 | | daddu TMP2, BASE, RA | ||
3604 | | srl TMP1, RD, 3 | ||
3605 | | ld TMP0, 0(TMP2) | ||
3606 | | ins_next1 | ||
3607 | | gettp TMP0, TMP0 | ||
3608 | | daddu AT, TMP0, TMP1 | ||
3609 | | bnez AT, ->vmeta_istype | ||
3610 | |. ins_next2 | ||
3611 | break; | ||
3612 | case BC_ISNUM: | ||
3613 | | // RA = src*8, RD = -(TISNUM-1)*8 | ||
3614 | | daddu TMP2, BASE, RA | ||
3615 | | ld TMP0, 0(TMP2) | ||
3616 | | ins_next1 | ||
3617 | | checknum TMP0, ->vmeta_istype | ||
3618 | |. ins_next2 | ||
3619 | break; | ||
3620 | |||
3621 | /* -- Unary ops --------------------------------------------------------- */ | ||
3622 | |||
3623 | case BC_MOV: | ||
3624 | | // RA = dst*8, RD = src*8 | ||
3625 | | daddu RD, BASE, RD | ||
3626 | | daddu RA, BASE, RA | ||
3627 | | ld CRET1, 0(RD) | ||
3628 | | ins_next1 | ||
3629 | | sd CRET1, 0(RA) | ||
3630 | | ins_next2 | ||
3631 | break; | ||
3632 | case BC_NOT: | ||
3633 | | // RA = dst*8, RD = src*8 | ||
3634 | | daddu RD, BASE, RD | ||
3635 | | daddu RA, BASE, RA | ||
3636 | | ld TMP0, 0(RD) | ||
3637 | | li AT, LJ_TTRUE | ||
3638 | | gettp TMP0, TMP0 | ||
3639 | | sltu TMP0, AT, TMP0 | ||
3640 | | addiu TMP0, TMP0, 1 | ||
3641 | | dsll TMP0, TMP0, 47 | ||
3642 | | not TMP0, TMP0 | ||
3643 | | ins_next1 | ||
3644 | | sd TMP0, 0(RA) | ||
3645 | | ins_next2 | ||
3646 | break; | ||
3647 | case BC_UNM: | ||
3648 | | // RA = dst*8, RD = src*8 | ||
3649 | | daddu RB, BASE, RD | ||
3650 | | ld CARG1, 0(RB) | ||
3651 | | daddu RA, BASE, RA | ||
3652 | | gettp CARG3, CARG1 | ||
3653 | | bne CARG3, TISNUM, >2 | ||
3654 | |. lui TMP1, 0x8000 | ||
3655 | | sextw CARG1, CARG1 | ||
3656 | | beq CARG1, TMP1, ->vmeta_unm // Meta handler deals with -2^31. | ||
3657 | |. negu CARG1, CARG1 | ||
3658 | | zextw CARG1, CARG1 | ||
3659 | | settp CARG1, TISNUM | ||
3660 | |1: | ||
3661 | | ins_next1 | ||
3662 | | sd CARG1, 0(RA) | ||
3663 | | ins_next2 | ||
3664 | |2: | ||
3665 | | sltiu AT, CARG3, LJ_TISNUM | ||
3666 | | beqz AT, ->vmeta_unm | ||
3667 | |. dsll TMP1, TMP1, 32 | ||
3668 | | b <1 | ||
3669 | |. xor CARG1, CARG1, TMP1 | ||
3670 | break; | ||
3671 | case BC_LEN: | ||
3672 | | // RA = dst*8, RD = src*8 | ||
3673 | | daddu CARG2, BASE, RD | ||
3674 | | daddu RA, BASE, RA | ||
3675 | | ld TMP0, 0(CARG2) | ||
3676 | | gettp TMP1, TMP0 | ||
3677 | | daddiu AT, TMP1, -LJ_TSTR | ||
3678 | | bnez AT, >2 | ||
3679 | |. cleartp STR:CARG1, TMP0 | ||
3680 | | lw CRET1, STR:CARG1->len | ||
3681 | |1: | ||
3682 | | settp CRET1, TISNUM | ||
3683 | | ins_next1 | ||
3684 | | sd CRET1, 0(RA) | ||
3685 | | ins_next2 | ||
3686 | |2: | ||
3687 | | daddiu AT, TMP1, -LJ_TTAB | ||
3688 | | bnez AT, ->vmeta_len | ||
3689 | |. nop | ||
3690 | #if LJ_52 | ||
3691 | | ld TAB:TMP2, TAB:CARG1->metatable | ||
3692 | | bnez TAB:TMP2, >9 | ||
3693 | |. nop | ||
3694 | |3: | ||
3695 | #endif | ||
3696 | |->BC_LEN_Z: | ||
3697 | | load_got lj_tab_len | ||
3698 | | call_intern lj_tab_len // (GCtab *t) | ||
3699 | |. nop | ||
3700 | | // Returns uint32_t (but less than 2^31). | ||
3701 | | b <1 | ||
3702 | |. nop | ||
3703 | #if LJ_52 | ||
3704 | |9: | ||
3705 | | lbu TMP0, TAB:TMP2->nomm | ||
3706 | | andi TMP0, TMP0, 1<<MM_len | ||
3707 | | bnez TMP0, <3 // 'no __len' flag set: done. | ||
3708 | |. nop | ||
3709 | | b ->vmeta_len | ||
3710 | |. nop | ||
3711 | #endif | ||
3712 | break; | ||
3713 | |||
3714 | /* -- Binary ops -------------------------------------------------------- */ | ||
3715 | |||
3716 | |.macro fpmod, a, b, c | ||
3717 | | bal ->vm_floor // floor(b/c) | ||
3718 | |. div.d FARG1, b, c | ||
3719 | | mul.d a, FRET1, c | ||
3720 | | sub.d a, b, a // b - floor(b/c)*c | ||
3721 | |.endmacro | ||
3722 | |||
3723 | |.macro sfpmod | ||
3724 | | daddiu sp, sp, -16 | ||
3725 | | | ||
3726 | | load_got __divdf3 | ||
3727 | | sd CARG1, 0(sp) | ||
3728 | | call_extern | ||
3729 | |. sd CARG2, 8(sp) | ||
3730 | | | ||
3731 | | load_got floor | ||
3732 | | call_extern | ||
3733 | |. move CARG1, CRET1 | ||
3734 | | | ||
3735 | | load_got __muldf3 | ||
3736 | | move CARG1, CRET1 | ||
3737 | | call_extern | ||
3738 | |. ld CARG2, 8(sp) | ||
3739 | | | ||
3740 | | load_got __subdf3 | ||
3741 | | ld CARG1, 0(sp) | ||
3742 | | call_extern | ||
3743 | |. move CARG2, CRET1 | ||
3744 | | | ||
3745 | | daddiu sp, sp, 16 | ||
3746 | |.endmacro | ||
3747 | |||
3748 | |.macro ins_arithpre, label | ||
3749 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | ||
3750 | | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 | ||
3751 | ||switch (vk) { | ||
3752 | ||case 0: | ||
3753 | | decode_RB8a RB, INS | ||
3754 | | decode_RB8b RB | ||
3755 | | decode_RDtoRC8 RC, RD | ||
3756 | | // RA = dst*8, RB = src1*8, RC = num_const*8 | ||
3757 | | daddu RB, BASE, RB | ||
3758 | |.if "label" ~= "none" | ||
3759 | | b label | ||
3760 | |.endif | ||
3761 | |. daddu RC, KBASE, RC | ||
3762 | || break; | ||
3763 | ||case 1: | ||
3764 | | decode_RB8a RC, INS | ||
3765 | | decode_RB8b RC | ||
3766 | | decode_RDtoRC8 RB, RD | ||
3767 | | // RA = dst*8, RB = num_const*8, RC = src1*8 | ||
3768 | | daddu RC, BASE, RC | ||
3769 | |.if "label" ~= "none" | ||
3770 | | b label | ||
3771 | |.endif | ||
3772 | |. daddu RB, KBASE, RB | ||
3773 | || break; | ||
3774 | ||default: | ||
3775 | | decode_RB8a RB, INS | ||
3776 | | decode_RB8b RB | ||
3777 | | decode_RDtoRC8 RC, RD | ||
3778 | | // RA = dst*8, RB = src1*8, RC = src2*8 | ||
3779 | | daddu RB, BASE, RB | ||
3780 | |.if "label" ~= "none" | ||
3781 | | b label | ||
3782 | |.endif | ||
3783 | |. daddu RC, BASE, RC | ||
3784 | || break; | ||
3785 | ||} | ||
3786 | |.endmacro | ||
3787 | | | ||
3788 | |.macro ins_arith, intins, fpins, fpcall, label | ||
3789 | | ins_arithpre none | ||
3790 | | | ||
3791 | |.if "label" ~= "none" | ||
3792 | |label: | ||
3793 | |.endif | ||
3794 | | | ||
3795 | |// Used in 5. | ||
3796 | | ld CARG1, 0(RB) | ||
3797 | | ld CARG2, 0(RC) | ||
3798 | | gettp TMP0, CARG1 | ||
3799 | | gettp TMP1, CARG2 | ||
3800 | | | ||
3801 | |.if "intins" ~= "div" | ||
3802 | | | ||
3803 | | // Check for two integers. | ||
3804 | | sextw CARG3, CARG1 | ||
3805 | | bne TMP0, TISNUM, >5 | ||
3806 | |. sextw CARG4, CARG2 | ||
3807 | | bne TMP1, TISNUM, >5 | ||
3808 | | | ||
3809 | |.if "intins" == "addu" | ||
3810 | |. intins CRET1, CARG3, CARG4 | ||
3811 | | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow. | ||
3812 | | xor TMP2, CRET1, CARG4 | ||
3813 | | and TMP1, TMP1, TMP2 | ||
3814 | | bltz TMP1, ->vmeta_arith | ||
3815 | |. daddu RA, BASE, RA | ||
3816 | |.elif "intins" == "subu" | ||
3817 | |. intins CRET1, CARG3, CARG4 | ||
3818 | | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow. | ||
3819 | | xor TMP2, CARG3, CARG4 | ||
3820 | | and TMP1, TMP1, TMP2 | ||
3821 | | bltz TMP1, ->vmeta_arith | ||
3822 | |. daddu RA, BASE, RA | ||
3823 | |.elif "intins" == "mult" | ||
3824 | |.if MIPSR6 | ||
3825 | |. nop | ||
3826 | | mul CRET1, CARG3, CARG4 | ||
3827 | | muh TMP2, CARG3, CARG4 | ||
3828 | |.else | ||
3829 | |. intins CARG3, CARG4 | ||
3830 | | mflo CRET1 | ||
3831 | | mfhi TMP2 | ||
3832 | |.endif | ||
3833 | | sra TMP1, CRET1, 31 | ||
3834 | | bne TMP1, TMP2, ->vmeta_arith | ||
3835 | |. daddu RA, BASE, RA | ||
3836 | |.else | ||
3837 | |. load_got lj_vm_modi | ||
3838 | | beqz CARG4, ->vmeta_arith | ||
3839 | |. daddu RA, BASE, RA | ||
3840 | | move CARG1, CARG3 | ||
3841 | | call_extern | ||
3842 | |. move CARG2, CARG4 | ||
3843 | |.endif | ||
3844 | | | ||
3845 | | zextw CRET1, CRET1 | ||
3846 | | settp CRET1, TISNUM | ||
3847 | | ins_next1 | ||
3848 | | sd CRET1, 0(RA) | ||
3849 | |3: | ||
3850 | | ins_next2 | ||
3851 | | | ||
3852 | |.endif | ||
3853 | | | ||
3854 | |5: // Check for two numbers. | ||
3855 | | .FPU ldc1 FTMP0, 0(RB) | ||
3856 | | sltu AT, TMP0, TISNUM | ||
3857 | | sltu TMP0, TMP1, TISNUM | ||
3858 | | .FPU ldc1 FTMP2, 0(RC) | ||
3859 | | and AT, AT, TMP0 | ||
3860 | | beqz AT, ->vmeta_arith | ||
3861 | |. daddu RA, BASE, RA | ||
3862 | | | ||
3863 | |.if FPU | ||
3864 | | fpins FRET1, FTMP0, FTMP2 | ||
3865 | |.elif "fpcall" == "sfpmod" | ||
3866 | | sfpmod | ||
3867 | |.else | ||
3868 | | load_got fpcall | ||
3869 | | call_extern | ||
3870 | |. nop | ||
3871 | |.endif | ||
3872 | | | ||
3873 | | ins_next1 | ||
3874 | |.if "intins" ~= "div" | ||
3875 | | b <3 | ||
3876 | |.endif | ||
3877 | |.if FPU | ||
3878 | |. sdc1 FRET1, 0(RA) | ||
3879 | |.else | ||
3880 | |. sd CRET1, 0(RA) | ||
3881 | |.endif | ||
3882 | |.if "intins" == "div" | ||
3883 | | ins_next2 | ||
3884 | |.endif | ||
3885 | | | ||
3886 | |.endmacro | ||
3887 | |||
3888 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | ||
3889 | | ins_arith addu, add.d, __adddf3, none | ||
3890 | break; | ||
3891 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | ||
3892 | | ins_arith subu, sub.d, __subdf3, none | ||
3893 | break; | ||
3894 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | ||
3895 | | ins_arith mult, mul.d, __muldf3, none | ||
3896 | break; | ||
3897 | case BC_DIVVN: | ||
3898 | | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z | ||
3899 | break; | ||
3900 | case BC_DIVNV: case BC_DIVVV: | ||
3901 | | ins_arithpre ->BC_DIVVN_Z | ||
3902 | break; | ||
3903 | case BC_MODVN: | ||
3904 | | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z | ||
3905 | break; | ||
3906 | case BC_MODNV: case BC_MODVV: | ||
3907 | | ins_arithpre ->BC_MODVN_Z | ||
3908 | break; | ||
3909 | case BC_POW: | ||
3910 | | ins_arithpre none | ||
3911 | | ld CARG1, 0(RB) | ||
3912 | | ld CARG2, 0(RC) | ||
3913 | | gettp TMP0, CARG1 | ||
3914 | | gettp TMP1, CARG2 | ||
3915 | | sltiu TMP0, TMP0, LJ_TISNUM | ||
3916 | | sltiu TMP1, TMP1, LJ_TISNUM | ||
3917 | | and AT, TMP0, TMP1 | ||
3918 | | load_got pow | ||
3919 | | beqz AT, ->vmeta_arith | ||
3920 | |. daddu RA, BASE, RA | ||
3921 | |.if FPU | ||
3922 | | ldc1 FARG1, 0(RB) | ||
3923 | | ldc1 FARG2, 0(RC) | ||
3924 | |.endif | ||
3925 | | call_extern | ||
3926 | |. nop | ||
3927 | | ins_next1 | ||
3928 | |.if FPU | ||
3929 | | sdc1 FRET1, 0(RA) | ||
3930 | |.else | ||
3931 | | sd CRET1, 0(RA) | ||
3932 | |.endif | ||
3933 | | ins_next2 | ||
3934 | break; | ||
3935 | |||
3936 | case BC_CAT: | ||
3937 | | // RA = dst*8, RB = src_start*8, RC = src_end*8 | ||
3938 | | decode_RB8a RB, INS | ||
3939 | | decode_RB8b RB | ||
3940 | | decode_RDtoRC8 RC, RD | ||
3941 | | dsubu CARG3, RC, RB | ||
3942 | | sd BASE, L->base | ||
3943 | | daddu CARG2, BASE, RC | ||
3944 | | move MULTRES, RB | ||
3945 | |->BC_CAT_Z: | ||
3946 | | load_got lj_meta_cat | ||
3947 | | srl CARG3, CARG3, 3 | ||
3948 | | sd PC, SAVE_PC | ||
3949 | | call_intern lj_meta_cat // (lua_State *L, TValue *top, int left) | ||
3950 | |. move CARG1, L | ||
3951 | | // Returns NULL (finished) or TValue * (metamethod). | ||
3952 | | bnez CRET1, ->vmeta_binop | ||
3953 | |. ld BASE, L->base | ||
3954 | | daddu RB, BASE, MULTRES | ||
3955 | | ld CRET1, 0(RB) | ||
3956 | | daddu RA, BASE, RA | ||
3957 | | ins_next1 | ||
3958 | | sd CRET1, 0(RA) | ||
3959 | | ins_next2 | ||
3960 | break; | ||
3961 | |||
3962 | /* -- Constant ops ------------------------------------------------------ */ | ||
3963 | |||
3964 | case BC_KSTR: | ||
3965 | | // RA = dst*8, RD = str_const*8 (~) | ||
3966 | | dsubu TMP1, KBASE, RD | ||
3967 | | ins_next1 | ||
3968 | | li TMP2, LJ_TSTR | ||
3969 | | ld TMP0, -8(TMP1) // KBASE-8-str_const*8 | ||
3970 | | daddu RA, BASE, RA | ||
3971 | | settp TMP0, TMP2 | ||
3972 | | sd TMP0, 0(RA) | ||
3973 | | ins_next2 | ||
3974 | break; | ||
3975 | case BC_KCDATA: | ||
3976 | |.if FFI | ||
3977 | | // RA = dst*8, RD = cdata_const*8 (~) | ||
3978 | | dsubu TMP1, KBASE, RD | ||
3979 | | ins_next1 | ||
3980 | | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8 | ||
3981 | | li TMP2, LJ_TCDATA | ||
3982 | | daddu RA, BASE, RA | ||
3983 | | settp TMP0, TMP2 | ||
3984 | | sd TMP0, 0(RA) | ||
3985 | | ins_next2 | ||
3986 | |.endif | ||
3987 | break; | ||
3988 | case BC_KSHORT: | ||
3989 | | // RA = dst*8, RD = int16_literal*8 | ||
3990 | | sra RD, INS, 16 | ||
3991 | | daddu RA, BASE, RA | ||
3992 | | zextw RD, RD | ||
3993 | | ins_next1 | ||
3994 | | settp RD, TISNUM | ||
3995 | | sd RD, 0(RA) | ||
3996 | | ins_next2 | ||
3997 | break; | ||
3998 | case BC_KNUM: | ||
3999 | | // RA = dst*8, RD = num_const*8 | ||
4000 | | daddu RD, KBASE, RD | ||
4001 | | daddu RA, BASE, RA | ||
4002 | | ld CRET1, 0(RD) | ||
4003 | | ins_next1 | ||
4004 | | sd CRET1, 0(RA) | ||
4005 | | ins_next2 | ||
4006 | break; | ||
4007 | case BC_KPRI: | ||
4008 | | // RA = dst*8, RD = primitive_type*8 (~) | ||
4009 | | daddu RA, BASE, RA | ||
4010 | | dsll TMP0, RD, 44 | ||
4011 | | not TMP0, TMP0 | ||
4012 | | ins_next1 | ||
4013 | | sd TMP0, 0(RA) | ||
4014 | | ins_next2 | ||
4015 | break; | ||
4016 | case BC_KNIL: | ||
4017 | | // RA = base*8, RD = end*8 | ||
4018 | | daddu RA, BASE, RA | ||
4019 | | sd TISNIL, 0(RA) | ||
4020 | | daddiu RA, RA, 8 | ||
4021 | | daddu RD, BASE, RD | ||
4022 | |1: | ||
4023 | | sd TISNIL, 0(RA) | ||
4024 | | slt AT, RA, RD | ||
4025 | | bnez AT, <1 | ||
4026 | |. daddiu RA, RA, 8 | ||
4027 | | ins_next_ | ||
4028 | break; | ||
4029 | |||
4030 | /* -- Upvalue and function ops ------------------------------------------ */ | ||
4031 | |||
4032 | case BC_UGET: | ||
4033 | | // RA = dst*8, RD = uvnum*8 | ||
4034 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
4035 | | daddu RA, BASE, RA | ||
4036 | | cleartp LFUNC:RB | ||
4037 | | daddu RD, RD, LFUNC:RB | ||
4038 | | ld UPVAL:RB, LFUNC:RD->uvptr | ||
4039 | | ins_next1 | ||
4040 | | ld TMP1, UPVAL:RB->v | ||
4041 | | ld CRET1, 0(TMP1) | ||
4042 | | sd CRET1, 0(RA) | ||
4043 | | ins_next2 | ||
4044 | break; | ||
4045 | case BC_USETV: | ||
4046 | | // RA = uvnum*8, RD = src*8 | ||
4047 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
4048 | | daddu RD, BASE, RD | ||
4049 | | cleartp LFUNC:RB | ||
4050 | | daddu RA, RA, LFUNC:RB | ||
4051 | | ld UPVAL:RB, LFUNC:RA->uvptr | ||
4052 | | ld CRET1, 0(RD) | ||
4053 | | lbu TMP3, UPVAL:RB->marked | ||
4054 | | ld CARG2, UPVAL:RB->v | ||
4055 | | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) | ||
4056 | | lbu TMP0, UPVAL:RB->closed | ||
4057 | | gettp TMP2, CRET1 | ||
4058 | | sd CRET1, 0(CARG2) | ||
4059 | | li AT, LJ_GC_BLACK|1 | ||
4060 | | or TMP3, TMP3, TMP0 | ||
4061 | | beq TMP3, AT, >2 // Upvalue is closed and black? | ||
4062 | |. daddiu TMP2, TMP2, -(LJ_TNUMX+1) | ||
4063 | |1: | ||
4064 | | ins_next | ||
4065 | | | ||
4066 | |2: // Check if new value is collectable. | ||
4067 | | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) | ||
4068 | | beqz AT, <1 // tvisgcv(v) | ||
4069 | |. cleartp GCOBJ:CRET1, CRET1 | ||
4070 | | lbu TMP3, GCOBJ:CRET1->gch.marked | ||
4071 | | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) | ||
4072 | | beqz TMP3, <1 | ||
4073 | |. load_got lj_gc_barrieruv | ||
4074 | | // Crossed a write barrier. Move the barrier forward. | ||
4075 | | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) | ||
4076 | |. daddiu CARG1, DISPATCH, GG_DISP2G | ||
4077 | | b <1 | ||
4078 | |. nop | ||
4079 | break; | ||
4080 | case BC_USETS: | ||
4081 | | // RA = uvnum*8, RD = str_const*8 (~) | ||
4082 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
4083 | | dsubu TMP1, KBASE, RD | ||
4084 | | cleartp LFUNC:RB | ||
4085 | | daddu RA, RA, LFUNC:RB | ||
4086 | | ld UPVAL:RB, LFUNC:RA->uvptr | ||
4087 | | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8 | ||
4088 | | lbu TMP2, UPVAL:RB->marked | ||
4089 | | ld CARG2, UPVAL:RB->v | ||
4090 | | lbu TMP3, STR:TMP1->marked | ||
4091 | | andi AT, TMP2, LJ_GC_BLACK // isblack(uv) | ||
4092 | | lbu TMP2, UPVAL:RB->closed | ||
4093 | | li TMP0, LJ_TSTR | ||
4094 | | settp TMP1, TMP0 | ||
4095 | | bnez AT, >2 | ||
4096 | |. sd TMP1, 0(CARG2) | ||
4097 | |1: | ||
4098 | | ins_next | ||
4099 | | | ||
4100 | |2: // Check if string is white and ensure upvalue is closed. | ||
4101 | | beqz TMP2, <1 | ||
4102 | |. andi AT, TMP3, LJ_GC_WHITES // iswhite(str) | ||
4103 | | beqz AT, <1 | ||
4104 | |. load_got lj_gc_barrieruv | ||
4105 | | // Crossed a write barrier. Move the barrier forward. | ||
4106 | | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) | ||
4107 | |. daddiu CARG1, DISPATCH, GG_DISP2G | ||
4108 | | b <1 | ||
4109 | |. nop | ||
4110 | break; | ||
4111 | case BC_USETN: | ||
4112 | | // RA = uvnum*8, RD = num_const*8 | ||
4113 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
4114 | | daddu RD, KBASE, RD | ||
4115 | | cleartp LFUNC:RB | ||
4116 | | daddu RA, RA, LFUNC:RB | ||
4117 | | ld UPVAL:RB, LFUNC:RA->uvptr | ||
4118 | | ld CRET1, 0(RD) | ||
4119 | | ld TMP1, UPVAL:RB->v | ||
4120 | | ins_next1 | ||
4121 | | sd CRET1, 0(TMP1) | ||
4122 | | ins_next2 | ||
4123 | break; | ||
4124 | case BC_USETP: | ||
4125 | | // RA = uvnum*8, RD = primitive_type*8 (~) | ||
4126 | | ld LFUNC:RB, FRAME_FUNC(BASE) | ||
4127 | | dsll TMP0, RD, 44 | ||
4128 | | cleartp LFUNC:RB | ||
4129 | | daddu RA, RA, LFUNC:RB | ||
4130 | | not TMP0, TMP0 | ||
4131 | | ld UPVAL:RB, LFUNC:RA->uvptr | ||
4132 | | ins_next1 | ||
4133 | | ld TMP1, UPVAL:RB->v | ||
4134 | | sd TMP0, 0(TMP1) | ||
4135 | | ins_next2 | ||
4136 | break; | ||
4137 | |||
4138 | case BC_UCLO: | ||
4139 | | // RA = level*8, RD = target | ||
4140 | | ld TMP2, L->openupval | ||
4141 | | branch_RD // Do this first since RD is not saved. | ||
4142 | | load_got lj_func_closeuv | ||
4143 | | sd BASE, L->base | ||
4144 | | beqz TMP2, >1 | ||
4145 | |. move CARG1, L | ||
4146 | | call_intern lj_func_closeuv // (lua_State *L, TValue *level) | ||
4147 | |. daddu CARG2, BASE, RA | ||
4148 | | ld BASE, L->base | ||
4149 | |1: | ||
4150 | | ins_next | ||
4151 | break; | ||
4152 | |||
4153 | case BC_FNEW: | ||
4154 | | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) | ||
4155 | | load_got lj_func_newL_gc | ||
4156 | | dsubu TMP1, KBASE, RD | ||
4157 | | ld CARG3, FRAME_FUNC(BASE) | ||
4158 | | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8 | ||
4159 | | sd BASE, L->base | ||
4160 | | sd PC, SAVE_PC | ||
4161 | | cleartp CARG3 | ||
4162 | | // (lua_State *L, GCproto *pt, GCfuncL *parent) | ||
4163 | | call_intern lj_func_newL_gc | ||
4164 | |. move CARG1, L | ||
4165 | | // Returns GCfuncL *. | ||
4166 | | li TMP0, LJ_TFUNC | ||
4167 | | ld BASE, L->base | ||
4168 | | ins_next1 | ||
4169 | | settp CRET1, TMP0 | ||
4170 | | daddu RA, BASE, RA | ||
4171 | | sd CRET1, 0(RA) | ||
4172 | | ins_next2 | ||
4173 | break; | ||
4174 | |||
4175 | /* -- Table ops --------------------------------------------------------- */ | ||
4176 | |||
4177 | case BC_TNEW: | ||
4178 | case BC_TDUP: | ||
4179 | | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) | ||
4180 | | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) | ||
4181 | | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) | ||
4182 | | sd BASE, L->base | ||
4183 | | sd PC, SAVE_PC | ||
4184 | | sltu AT, TMP0, TMP1 | ||
4185 | | beqz AT, >5 | ||
4186 | |1: | ||
4187 | if (op == BC_TNEW) { | ||
4188 | | load_got lj_tab_new | ||
4189 | | srl CARG2, RD, 3 | ||
4190 | | andi CARG2, CARG2, 0x7ff | ||
4191 | | li TMP0, 0x801 | ||
4192 | | addiu AT, CARG2, -0x7ff | ||
4193 | | srl CARG3, RD, 14 | ||
4194 | |.if MIPSR6 | ||
4195 | | seleqz TMP0, TMP0, AT | ||
4196 | | selnez CARG2, CARG2, AT | ||
4197 | | or CARG2, CARG2, TMP0 | ||
4198 | |.else | ||
4199 | | movz CARG2, TMP0, AT | ||
4200 | |.endif | ||
4201 | | // (lua_State *L, int32_t asize, uint32_t hbits) | ||
4202 | | call_intern lj_tab_new | ||
4203 | |. move CARG1, L | ||
4204 | | // Returns Table *. | ||
4205 | } else { | ||
4206 | | load_got lj_tab_dup | ||
4207 | | dsubu TMP1, KBASE, RD | ||
4208 | | move CARG1, L | ||
4209 | | call_intern lj_tab_dup // (lua_State *L, Table *kt) | ||
4210 | |. ld CARG2, -8(TMP1) // KBASE-8-str_const*8 | ||
4211 | | // Returns Table *. | ||
4212 | } | ||
4213 | | li TMP0, LJ_TTAB | ||
4214 | | ld BASE, L->base | ||
4215 | | ins_next1 | ||
4216 | | daddu RA, BASE, RA | ||
4217 | | settp CRET1, TMP0 | ||
4218 | | sd CRET1, 0(RA) | ||
4219 | | ins_next2 | ||
4220 | |5: | ||
4221 | | load_got lj_gc_step_fixtop | ||
4222 | | move MULTRES, RD | ||
4223 | | call_intern lj_gc_step_fixtop // (lua_State *L) | ||
4224 | |. move CARG1, L | ||
4225 | | b <1 | ||
4226 | |. move RD, MULTRES | ||
4227 | break; | ||
4228 | |||
4229 | case BC_GGET: | ||
4230 | | // RA = dst*8, RD = str_const*8 (~) | ||
4231 | case BC_GSET: | ||
4232 | | // RA = src*8, RD = str_const*8 (~) | ||
4233 | | ld LFUNC:TMP2, FRAME_FUNC(BASE) | ||
4234 | | dsubu TMP1, KBASE, RD | ||
4235 | | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8 | ||
4236 | | cleartp LFUNC:TMP2 | ||
4237 | | ld TAB:RB, LFUNC:TMP2->env | ||
4238 | if (op == BC_GGET) { | ||
4239 | | b ->BC_TGETS_Z | ||
4240 | } else { | ||
4241 | | b ->BC_TSETS_Z | ||
4242 | } | ||
4243 | |. daddu RA, BASE, RA | ||
4244 | break; | ||
4245 | |||
4246 | case BC_TGETV: | ||
4247 | | // RA = dst*8, RB = table*8, RC = key*8 | ||
4248 | | decode_RB8a RB, INS | ||
4249 | | decode_RB8b RB | ||
4250 | | decode_RDtoRC8 RC, RD | ||
4251 | | daddu CARG2, BASE, RB | ||
4252 | | daddu CARG3, BASE, RC | ||
4253 | | ld TAB:RB, 0(CARG2) | ||
4254 | | ld TMP2, 0(CARG3) | ||
4255 | | daddu RA, BASE, RA | ||
4256 | | checktab TAB:RB, ->vmeta_tgetv | ||
4257 | | gettp TMP3, TMP2 | ||
4258 | | bne TMP3, TISNUM, >5 // Integer key? | ||
4259 | |. lw TMP0, TAB:RB->asize | ||
4260 | | sextw TMP2, TMP2 | ||
4261 | | ld TMP1, TAB:RB->array | ||
4262 | | sltu AT, TMP2, TMP0 | ||
4263 | | sll TMP2, TMP2, 3 | ||
4264 | | beqz AT, ->vmeta_tgetv // Integer key and in array part? | ||
4265 | |. daddu TMP2, TMP1, TMP2 | ||
4266 | | ld AT, 0(TMP2) | ||
4267 | | beq AT, TISNIL, >2 | ||
4268 | |. ld CRET1, 0(TMP2) | ||
4269 | |1: | ||
4270 | | ins_next1 | ||
4271 | | sd CRET1, 0(RA) | ||
4272 | | ins_next2 | ||
4273 | | | ||
4274 | |2: // Check for __index if table value is nil. | ||
4275 | | ld TAB:TMP2, TAB:RB->metatable | ||
4276 | | beqz TAB:TMP2, <1 // No metatable: done. | ||
4277 | |. nop | ||
4278 | | lbu TMP0, TAB:TMP2->nomm | ||
4279 | | andi TMP0, TMP0, 1<<MM_index | ||
4280 | | bnez TMP0, <1 // 'no __index' flag set: done. | ||
4281 | |. nop | ||
4282 | | b ->vmeta_tgetv | ||
4283 | |. nop | ||
4284 | | | ||
4285 | |5: | ||
4286 | | li AT, LJ_TSTR | ||
4287 | | bne TMP3, AT, ->vmeta_tgetv | ||
4288 | |. cleartp RC, TMP2 | ||
4289 | | b ->BC_TGETS_Z // String key? | ||
4290 | |. nop | ||
4291 | break; | ||
4292 | case BC_TGETS: | ||
4293 | | // RA = dst*8, RB = table*8, RC = str_const*8 (~) | ||
4294 | | decode_RB8a RB, INS | ||
4295 | | decode_RB8b RB | ||
4296 | | decode_RC8a RC, INS | ||
4297 | | daddu CARG2, BASE, RB | ||
4298 | | decode_RC8b RC | ||
4299 | | ld TAB:RB, 0(CARG2) | ||
4300 | | dsubu CARG3, KBASE, RC | ||
4301 | | daddu RA, BASE, RA | ||
4302 | | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8 | ||
4303 | | checktab TAB:RB, ->vmeta_tgets1 | ||
4304 | |->BC_TGETS_Z: | ||
4305 | | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 | ||
4306 | | lw TMP0, TAB:RB->hmask | ||
4307 | | lw TMP1, STR:RC->sid | ||
4308 | | ld NODE:TMP2, TAB:RB->node | ||
4309 | | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | ||
4310 | | sll TMP0, TMP1, 5 | ||
4311 | | sll TMP1, TMP1, 3 | ||
4312 | | subu TMP1, TMP0, TMP1 | ||
4313 | | li TMP3, LJ_TSTR | ||
4314 | | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) | ||
4315 | | settp STR:RC, TMP3 // Tagged key to look for. | ||
4316 | |1: | ||
4317 | | ld CARG1, NODE:TMP2->key | ||
4318 | | ld CRET1, NODE:TMP2->val | ||
4319 | | ld NODE:TMP1, NODE:TMP2->next | ||
4320 | | bne CARG1, RC, >4 | ||
4321 | |. ld TAB:TMP3, TAB:RB->metatable | ||
4322 | | beq CRET1, TISNIL, >5 // Key found, but nil value? | ||
4323 | |. nop | ||
4324 | |3: | ||
4325 | | ins_next1 | ||
4326 | | sd CRET1, 0(RA) | ||
4327 | | ins_next2 | ||
4328 | | | ||
4329 | |4: // Follow hash chain. | ||
4330 | | bnez NODE:TMP1, <1 | ||
4331 | |. move NODE:TMP2, NODE:TMP1 | ||
4332 | | // End of hash chain: key not found, nil result. | ||
4333 | | | ||
4334 | |5: // Check for __index if table value is nil. | ||
4335 | | beqz TAB:TMP3, <3 // No metatable: done. | ||
4336 | |. move CRET1, TISNIL | ||
4337 | | lbu TMP0, TAB:TMP3->nomm | ||
4338 | | andi TMP0, TMP0, 1<<MM_index | ||
4339 | | bnez TMP0, <3 // 'no __index' flag set: done. | ||
4340 | |. nop | ||
4341 | | b ->vmeta_tgets | ||
4342 | |. nop | ||
4343 | break; | ||
4344 | case BC_TGETB: | ||
4345 | | // RA = dst*8, RB = table*8, RC = index*8 | ||
4346 | | decode_RB8a RB, INS | ||
4347 | | decode_RB8b RB | ||
4348 | | daddu CARG2, BASE, RB | ||
4349 | | decode_RDtoRC8 RC, RD | ||
4350 | | ld TAB:RB, 0(CARG2) | ||
4351 | | daddu RA, BASE, RA | ||
4352 | | srl TMP0, RC, 3 | ||
4353 | | checktab TAB:RB, ->vmeta_tgetb | ||
4354 | | lw TMP1, TAB:RB->asize | ||
4355 | | ld TMP2, TAB:RB->array | ||
4356 | | sltu AT, TMP0, TMP1 | ||
4357 | | beqz AT, ->vmeta_tgetb | ||
4358 | |. daddu RC, TMP2, RC | ||
4359 | | ld AT, 0(RC) | ||
4360 | | beq AT, TISNIL, >5 | ||
4361 | |. ld CRET1, 0(RC) | ||
4362 | |1: | ||
4363 | | ins_next1 | ||
4364 | | sd CRET1, 0(RA) | ||
4365 | | ins_next2 | ||
4366 | | | ||
4367 | |5: // Check for __index if table value is nil. | ||
4368 | | ld TAB:TMP2, TAB:RB->metatable | ||
4369 | | beqz TAB:TMP2, <1 // No metatable: done. | ||
4370 | |. nop | ||
4371 | | lbu TMP1, TAB:TMP2->nomm | ||
4372 | | andi TMP1, TMP1, 1<<MM_index | ||
4373 | | bnez TMP1, <1 // 'no __index' flag set: done. | ||
4374 | |. nop | ||
4375 | | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2! | ||
4376 | |. nop | ||
4377 | break; | ||
4378 | case BC_TGETR: | ||
4379 | | // RA = dst*8, RB = table*8, RC = key*8 | ||
4380 | | decode_RB8a RB, INS | ||
4381 | | decode_RB8b RB | ||
4382 | | decode_RDtoRC8 RC, RD | ||
4383 | | daddu RB, BASE, RB | ||
4384 | | daddu RC, BASE, RC | ||
4385 | | ld TAB:CARG1, 0(RB) | ||
4386 | | lw CARG2, LO(RC) | ||
4387 | | daddu RA, BASE, RA | ||
4388 | | cleartp TAB:CARG1 | ||
4389 | | lw TMP0, TAB:CARG1->asize | ||
4390 | | ld TMP1, TAB:CARG1->array | ||
4391 | | sltu AT, CARG2, TMP0 | ||
4392 | | sll TMP2, CARG2, 3 | ||
4393 | | beqz AT, ->vmeta_tgetr // In array part? | ||
4394 | |. daddu CRET1, TMP1, TMP2 | ||
4395 | | ld CARG2, 0(CRET1) | ||
4396 | |->BC_TGETR_Z: | ||
4397 | | ins_next1 | ||
4398 | | sd CARG2, 0(RA) | ||
4399 | | ins_next2 | ||
4400 | break; | ||
4401 | |||
4402 | case BC_TSETV: | ||
4403 | | // RA = src*8, RB = table*8, RC = key*8 | ||
4404 | | decode_RB8a RB, INS | ||
4405 | | decode_RB8b RB | ||
4406 | | decode_RDtoRC8 RC, RD | ||
4407 | | daddu CARG2, BASE, RB | ||
4408 | | daddu CARG3, BASE, RC | ||
4409 | | ld RB, 0(CARG2) | ||
4410 | | ld TMP2, 0(CARG3) | ||
4411 | | daddu RA, BASE, RA | ||
4412 | | checktab RB, ->vmeta_tsetv | ||
4413 | | checkint TMP2, >5 | ||
4414 | |. sextw RC, TMP2 | ||
4415 | | lw TMP0, TAB:RB->asize | ||
4416 | | ld TMP1, TAB:RB->array | ||
4417 | | sltu AT, RC, TMP0 | ||
4418 | | sll TMP2, RC, 3 | ||
4419 | | beqz AT, ->vmeta_tsetv // Integer key and in array part? | ||
4420 | |. daddu TMP1, TMP1, TMP2 | ||
4421 | | ld TMP0, 0(TMP1) | ||
4422 | | lbu TMP3, TAB:RB->marked | ||
4423 | | beq TMP0, TISNIL, >3 | ||
4424 | |. ld CRET1, 0(RA) | ||
4425 | |1: | ||
4426 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | ||
4427 | | bnez AT, >7 | ||
4428 | |. sd CRET1, 0(TMP1) | ||
4429 | |2: | ||
4430 | | ins_next | ||
4431 | | | ||
4432 | |3: // Check for __newindex if previous value is nil. | ||
4433 | | ld TAB:TMP2, TAB:RB->metatable | ||
4434 | | beqz TAB:TMP2, <1 // No metatable: done. | ||
4435 | |. nop | ||
4436 | | lbu TMP2, TAB:TMP2->nomm | ||
4437 | | andi TMP2, TMP2, 1<<MM_newindex | ||
4438 | | bnez TMP2, <1 // 'no __newindex' flag set: done. | ||
4439 | |. nop | ||
4440 | | b ->vmeta_tsetv | ||
4441 | |. nop | ||
4442 | | | ||
4443 | |5: | ||
4444 | | gettp AT, TMP2 | ||
4445 | | daddiu AT, AT, -LJ_TSTR | ||
4446 | | bnez AT, ->vmeta_tsetv | ||
4447 | |. nop | ||
4448 | | b ->BC_TSETS_Z // String key? | ||
4449 | |. cleartp STR:RC, TMP2 | ||
4450 | | | ||
4451 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
4452 | | barrierback TAB:RB, TMP3, TMP0, <2 | ||
4453 | break; | ||
4454 | case BC_TSETS: | ||
4455 | | // RA = src*8, RB = table*8, RC = str_const*8 (~) | ||
4456 | | decode_RB8a RB, INS | ||
4457 | | decode_RB8b RB | ||
4458 | | daddu CARG2, BASE, RB | ||
4459 | | decode_RC8a RC, INS | ||
4460 | | ld TAB:RB, 0(CARG2) | ||
4461 | | decode_RC8b RC | ||
4462 | | dsubu CARG3, KBASE, RC | ||
4463 | | ld RC, -8(CARG3) // KBASE-8-str_const*8 | ||
4464 | | daddu RA, BASE, RA | ||
4465 | | cleartp STR:RC | ||
4466 | | checktab TAB:RB, ->vmeta_tsets1 | ||
4467 | |->BC_TSETS_Z: | ||
4468 | | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 | ||
4469 | | lw TMP0, TAB:RB->hmask | ||
4470 | | lw TMP1, STR:RC->sid | ||
4471 | | ld NODE:TMP2, TAB:RB->node | ||
4472 | | sb r0, TAB:RB->nomm // Clear metamethod cache. | ||
4473 | | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | ||
4474 | | sll TMP0, TMP1, 5 | ||
4475 | | sll TMP1, TMP1, 3 | ||
4476 | | subu TMP1, TMP0, TMP1 | ||
4477 | | li TMP3, LJ_TSTR | ||
4478 | | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) | ||
4479 | | settp STR:RC, TMP3 // Tagged key to look for. | ||
4480 | |.if FPU | ||
4481 | | ldc1 FTMP0, 0(RA) | ||
4482 | |.else | ||
4483 | | ld CRET1, 0(RA) | ||
4484 | |.endif | ||
4485 | |1: | ||
4486 | | ld TMP0, NODE:TMP2->key | ||
4487 | | ld CARG2, NODE:TMP2->val | ||
4488 | | ld NODE:TMP1, NODE:TMP2->next | ||
4489 | | bne TMP0, RC, >5 | ||
4490 | |. lbu TMP3, TAB:RB->marked | ||
4491 | | beq CARG2, TISNIL, >4 // Key found, but nil value? | ||
4492 | |. ld TAB:TMP0, TAB:RB->metatable | ||
4493 | |2: | ||
4494 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | ||
4495 | | bnez AT, >7 | ||
4496 | |.if FPU | ||
4497 | |. sdc1 FTMP0, NODE:TMP2->val | ||
4498 | |.else | ||
4499 | |. sd CRET1, NODE:TMP2->val | ||
4500 | |.endif | ||
4501 | |3: | ||
4502 | | ins_next | ||
4503 | | | ||
4504 | |4: // Check for __newindex if previous value is nil. | ||
4505 | | beqz TAB:TMP0, <2 // No metatable: done. | ||
4506 | |. nop | ||
4507 | | lbu TMP0, TAB:TMP0->nomm | ||
4508 | | andi TMP0, TMP0, 1<<MM_newindex | ||
4509 | | bnez TMP0, <2 // 'no __newindex' flag set: done. | ||
4510 | |. nop | ||
4511 | | b ->vmeta_tsets | ||
4512 | |. nop | ||
4513 | | | ||
4514 | |5: // Follow hash chain. | ||
4515 | | bnez NODE:TMP1, <1 | ||
4516 | |. move NODE:TMP2, NODE:TMP1 | ||
4517 | | // End of hash chain: key not found, add a new one | ||
4518 | | | ||
4519 | | // But check for __newindex first. | ||
4520 | | ld TAB:TMP2, TAB:RB->metatable | ||
4521 | | beqz TAB:TMP2, >6 // No metatable: continue. | ||
4522 | |. daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | ||
4523 | | lbu TMP0, TAB:TMP2->nomm | ||
4524 | | andi TMP0, TMP0, 1<<MM_newindex | ||
4525 | | beqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check. | ||
4526 | |6: | ||
4527 | | load_got lj_tab_newkey | ||
4528 | | sd RC, 0(CARG3) | ||
4529 | | sd BASE, L->base | ||
4530 | | move CARG2, TAB:RB | ||
4531 | | sd PC, SAVE_PC | ||
4532 | | call_intern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k | ||
4533 | |. move CARG1, L | ||
4534 | | // Returns TValue *. | ||
4535 | | ld BASE, L->base | ||
4536 | |.if FPU | ||
4537 | | b <3 // No 2nd write barrier needed. | ||
4538 | |. sdc1 FTMP0, 0(CRET1) | ||
4539 | |.else | ||
4540 | | ld CARG1, 0(RA) | ||
4541 | | b <3 // No 2nd write barrier needed. | ||
4542 | |. sd CARG1, 0(CRET1) | ||
4543 | |.endif | ||
4544 | | | ||
4545 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
4546 | | barrierback TAB:RB, TMP3, TMP0, <3 | ||
4547 | break; | ||
4548 | case BC_TSETB: | ||
4549 | | // RA = src*8, RB = table*8, RC = index*8 | ||
4550 | | decode_RB8a RB, INS | ||
4551 | | decode_RB8b RB | ||
4552 | | daddu CARG2, BASE, RB | ||
4553 | | decode_RDtoRC8 RC, RD | ||
4554 | | ld TAB:RB, 0(CARG2) | ||
4555 | | daddu RA, BASE, RA | ||
4556 | | srl TMP0, RC, 3 | ||
4557 | | checktab RB, ->vmeta_tsetb | ||
4558 | | lw TMP1, TAB:RB->asize | ||
4559 | | ld TMP2, TAB:RB->array | ||
4560 | | sltu AT, TMP0, TMP1 | ||
4561 | | beqz AT, ->vmeta_tsetb | ||
4562 | |. daddu RC, TMP2, RC | ||
4563 | | ld TMP1, 0(RC) | ||
4564 | | lbu TMP3, TAB:RB->marked | ||
4565 | | beq TMP1, TISNIL, >5 | ||
4566 | |1: | ||
4567 | |. ld CRET1, 0(RA) | ||
4568 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | ||
4569 | | bnez AT, >7 | ||
4570 | |. sd CRET1, 0(RC) | ||
4571 | |2: | ||
4572 | | ins_next | ||
4573 | | | ||
4574 | |5: // Check for __newindex if previous value is nil. | ||
4575 | | ld TAB:TMP2, TAB:RB->metatable | ||
4576 | | beqz TAB:TMP2, <1 // No metatable: done. | ||
4577 | |. nop | ||
4578 | | lbu TMP1, TAB:TMP2->nomm | ||
4579 | | andi TMP1, TMP1, 1<<MM_newindex | ||
4580 | | bnez TMP1, <1 // 'no __newindex' flag set: done. | ||
4581 | |. nop | ||
4582 | | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2! | ||
4583 | |. nop | ||
4584 | | | ||
4585 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
4586 | | barrierback TAB:RB, TMP3, TMP0, <2 | ||
4587 | break; | ||
4588 | case BC_TSETR: | ||
4589 | | // RA = dst*8, RB = table*8, RC = key*8 | ||
4590 | | decode_RB8a RB, INS | ||
4591 | | decode_RB8b RB | ||
4592 | | decode_RDtoRC8 RC, RD | ||
4593 | | daddu CARG1, BASE, RB | ||
4594 | | daddu CARG3, BASE, RC | ||
4595 | | ld TAB:CARG2, 0(CARG1) | ||
4596 | | lw CARG3, LO(CARG3) | ||
4597 | | cleartp TAB:CARG2 | ||
4598 | | lbu TMP3, TAB:CARG2->marked | ||
4599 | | lw TMP0, TAB:CARG2->asize | ||
4600 | | ld TMP1, TAB:CARG2->array | ||
4601 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | ||
4602 | | bnez AT, >7 | ||
4603 | |. daddu RA, BASE, RA | ||
4604 | |2: | ||
4605 | | sltu AT, CARG3, TMP0 | ||
4606 | | sll TMP2, CARG3, 3 | ||
4607 | | beqz AT, ->vmeta_tsetr // In array part? | ||
4608 | |. daddu CRET1, TMP1, TMP2 | ||
4609 | |->BC_TSETR_Z: | ||
4610 | | ld CARG1, 0(RA) | ||
4611 | | ins_next1 | ||
4612 | | sd CARG1, 0(CRET1) | ||
4613 | | ins_next2 | ||
4614 | | | ||
4615 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
4616 | | barrierback TAB:CARG2, TMP3, CRET1, <2 | ||
4617 | break; | ||
4618 | |||
4619 | case BC_TSETM: | ||
4620 | | // RA = base*8 (table at base-1), RD = num_const*8 (start index) | ||
4621 | | daddu RA, BASE, RA | ||
4622 | |1: | ||
4623 | | daddu TMP3, KBASE, RD | ||
4624 | | ld TAB:CARG2, -8(RA) // Guaranteed to be a table. | ||
4625 | | addiu TMP0, MULTRES, -8 | ||
4626 | | lw TMP3, LO(TMP3) // Integer constant is in lo-word. | ||
4627 | | beqz TMP0, >4 // Nothing to copy? | ||
4628 | |. srl CARG3, TMP0, 3 | ||
4629 | | cleartp CARG2 | ||
4630 | | addu CARG3, CARG3, TMP3 | ||
4631 | | lw TMP2, TAB:CARG2->asize | ||
4632 | | sll TMP1, TMP3, 3 | ||
4633 | | lbu TMP3, TAB:CARG2->marked | ||
4634 | | ld CARG1, TAB:CARG2->array | ||
4635 | | sltu AT, TMP2, CARG3 | ||
4636 | | bnez AT, >5 | ||
4637 | |. daddu TMP2, RA, TMP0 | ||
4638 | | daddu TMP1, TMP1, CARG1 | ||
4639 | | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) | ||
4640 | |3: // Copy result slots to table. | ||
4641 | | ld CRET1, 0(RA) | ||
4642 | | daddiu RA, RA, 8 | ||
4643 | | sltu AT, RA, TMP2 | ||
4644 | | sd CRET1, 0(TMP1) | ||
4645 | | bnez AT, <3 | ||
4646 | |. daddiu TMP1, TMP1, 8 | ||
4647 | | bnez TMP0, >7 | ||
4648 | |. nop | ||
4649 | |4: | ||
4650 | | ins_next | ||
4651 | | | ||
4652 | |5: // Need to resize array part. | ||
4653 | | load_got lj_tab_reasize | ||
4654 | | sd BASE, L->base | ||
4655 | | sd PC, SAVE_PC | ||
4656 | | move BASE, RD | ||
4657 | | call_intern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) | ||
4658 | |. move CARG1, L | ||
4659 | | // Must not reallocate the stack. | ||
4660 | | move RD, BASE | ||
4661 | | b <1 | ||
4662 | |. ld BASE, L->base // Reload BASE for lack of a saved register. | ||
4663 | | | ||
4664 | |7: // Possible table write barrier for any value. Skip valiswhite check. | ||
4665 | | barrierback TAB:CARG2, TMP3, TMP0, <4 | ||
4666 | break; | ||
4667 | |||
4668 | /* -- Calls and vararg handling ----------------------------------------- */ | ||
4669 | |||
4670 | case BC_CALLM: | ||
4671 | | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 | ||
4672 | | decode_RDtoRC8 NARGS8:RC, RD | ||
4673 | | b ->BC_CALL_Z | ||
4674 | |. addu NARGS8:RC, NARGS8:RC, MULTRES | ||
4675 | break; | ||
4676 | case BC_CALL: | ||
4677 | | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 | ||
4678 | | decode_RDtoRC8 NARGS8:RC, RD | ||
4679 | |->BC_CALL_Z: | ||
4680 | | move TMP2, BASE | ||
4681 | | daddu BASE, BASE, RA | ||
4682 | | ld LFUNC:RB, 0(BASE) | ||
4683 | | daddiu BASE, BASE, 16 | ||
4684 | | addiu NARGS8:RC, NARGS8:RC, -8 | ||
4685 | | checkfunc RB, ->vmeta_call | ||
4686 | | ins_call | ||
4687 | break; | ||
4688 | |||
4689 | case BC_CALLMT: | ||
4690 | | // RA = base*8, (RB = 0,) RC = extra_nargs*8 | ||
4691 | | addu NARGS8:RD, NARGS8:RD, MULTRES // BC_CALLT gets RC from RD. | ||
4692 | | // Fall through. Assumes BC_CALLT follows. | ||
4693 | break; | ||
4694 | case BC_CALLT: | ||
4695 | | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 | ||
4696 | | daddu RA, BASE, RA | ||
4697 | | ld RB, 0(RA) | ||
4698 | | move NARGS8:RC, RD | ||
4699 | | ld TMP1, FRAME_PC(BASE) | ||
4700 | | daddiu RA, RA, 16 | ||
4701 | | addiu NARGS8:RC, NARGS8:RC, -8 | ||
4702 | | checktp CARG3, RB, -LJ_TFUNC, ->vmeta_callt | ||
4703 | |->BC_CALLT_Z: | ||
4704 | | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. | ||
4705 | | lbu TMP3, LFUNC:CARG3->ffid | ||
4706 | | bnez TMP0, >7 | ||
4707 | |. xori TMP2, TMP1, FRAME_VARG | ||
4708 | |1: | ||
4709 | | sd RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. | ||
4710 | | sltiu AT, TMP3, 2 // (> FF_C) Calling a fast function? | ||
4711 | | move TMP2, BASE | ||
4712 | | move RB, CARG3 | ||
4713 | | beqz NARGS8:RC, >3 | ||
4714 | |. move TMP3, NARGS8:RC | ||
4715 | |2: | ||
4716 | | ld CRET1, 0(RA) | ||
4717 | | daddiu RA, RA, 8 | ||
4718 | | addiu TMP3, TMP3, -8 | ||
4719 | | sd CRET1, 0(TMP2) | ||
4720 | | bnez TMP3, <2 | ||
4721 | |. daddiu TMP2, TMP2, 8 | ||
4722 | |3: | ||
4723 | | or TMP0, TMP0, AT | ||
4724 | | beqz TMP0, >5 | ||
4725 | |. nop | ||
4726 | |4: | ||
4727 | | ins_callt | ||
4728 | | | ||
4729 | |5: // Tailcall to a fast function with a Lua frame below. | ||
4730 | | lw INS, -4(TMP1) | ||
4731 | | decode_RA8a RA, INS | ||
4732 | | decode_RA8b RA | ||
4733 | | dsubu TMP1, BASE, RA | ||
4734 | | ld TMP1, -32(TMP1) | ||
4735 | | cleartp LFUNC:TMP1 | ||
4736 | | ld TMP1, LFUNC:TMP1->pc | ||
4737 | | b <4 | ||
4738 | |. ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. | ||
4739 | | | ||
4740 | |7: // Tailcall from a vararg function. | ||
4741 | | andi AT, TMP2, FRAME_TYPEP | ||
4742 | | bnez AT, <1 // Vararg frame below? | ||
4743 | |. dsubu TMP2, BASE, TMP2 // Relocate BASE down. | ||
4744 | | move BASE, TMP2 | ||
4745 | | ld TMP1, FRAME_PC(TMP2) | ||
4746 | | b <1 | ||
4747 | |. andi TMP0, TMP1, FRAME_TYPE | ||
4748 | break; | ||
4749 | |||
4750 | case BC_ITERC: | ||
4751 | | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) | ||
4752 | | move TMP2, BASE // Save old BASE fir vmeta_call. | ||
4753 | | daddu BASE, BASE, RA | ||
4754 | | ld RB, -24(BASE) | ||
4755 | | ld CARG1, -16(BASE) | ||
4756 | | ld CARG2, -8(BASE) | ||
4757 | | li NARGS8:RC, 16 // Iterators get 2 arguments. | ||
4758 | | sd RB, 0(BASE) // Copy callable. | ||
4759 | | sd CARG1, 16(BASE) // Copy state. | ||
4760 | | sd CARG2, 24(BASE) // Copy control var. | ||
4761 | | daddiu BASE, BASE, 16 | ||
4762 | | checkfunc RB, ->vmeta_call | ||
4763 | | ins_call | ||
4764 | break; | ||
4765 | |||
4766 | case BC_ITERN: | ||
4767 | |.if JIT and ENDIAN_LE | ||
4768 | | hotloop | ||
4769 | |.endif | ||
4770 | |->vm_IITERN: | ||
4771 | | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) | ||
4772 | | daddu RA, BASE, RA | ||
4773 | | ld TAB:RB, -16(RA) | ||
4774 | | lw RC, -8+LO(RA) // Get index from control var. | ||
4775 | | cleartp TAB:RB | ||
4776 | | daddiu PC, PC, 4 | ||
4777 | | lw TMP0, TAB:RB->asize | ||
4778 | | ld TMP1, TAB:RB->array | ||
4779 | | dsll CARG3, TISNUM, 47 | ||
4780 | |1: // Traverse array part. | ||
4781 | | sltu AT, RC, TMP0 | ||
4782 | | beqz AT, >5 // Index points after array part? | ||
4783 | |. sll TMP3, RC, 3 | ||
4784 | | daddu TMP3, TMP1, TMP3 | ||
4785 | | ld CARG1, 0(TMP3) | ||
4786 | | lhu RD, -4+OFS_RD(PC) | ||
4787 | | or TMP2, RC, CARG3 | ||
4788 | | beq CARG1, TISNIL, <1 // Skip holes in array part. | ||
4789 | |. addiu RC, RC, 1 | ||
4790 | | sd TMP2, 0(RA) | ||
4791 | | sd CARG1, 8(RA) | ||
4792 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
4793 | | decode_RD4b RD | ||
4794 | | daddu RD, RD, TMP3 | ||
4795 | | sw RC, -8+LO(RA) // Update control var. | ||
4796 | | daddu PC, PC, RD | ||
4797 | |3: | ||
4798 | | ins_next | ||
4799 | | | ||
4800 | |5: // Traverse hash part. | ||
4801 | | lw TMP1, TAB:RB->hmask | ||
4802 | | subu RC, RC, TMP0 | ||
4803 | | ld TMP2, TAB:RB->node | ||
4804 | |6: | ||
4805 | | sltu AT, TMP1, RC // End of iteration? Branch to ITERL+1. | ||
4806 | | bnez AT, <3 | ||
4807 | |. sll TMP3, RC, 5 | ||
4808 | | sll RB, RC, 3 | ||
4809 | | subu TMP3, TMP3, RB | ||
4810 | | daddu NODE:TMP3, TMP3, TMP2 | ||
4811 | | ld CARG1, 0(NODE:TMP3) | ||
4812 | | lhu RD, -4+OFS_RD(PC) | ||
4813 | | beq CARG1, TISNIL, <6 // Skip holes in hash part. | ||
4814 | |. addiu RC, RC, 1 | ||
4815 | | ld CARG2, NODE:TMP3->key | ||
4816 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
4817 | | sd CARG1, 8(RA) | ||
4818 | | addu RC, RC, TMP0 | ||
4819 | | decode_RD4b RD | ||
4820 | | addu RD, RD, TMP3 | ||
4821 | | sd CARG2, 0(RA) | ||
4822 | | daddu PC, PC, RD | ||
4823 | | b <3 | ||
4824 | |. sw RC, -8+LO(RA) // Update control var. | ||
4825 | break; | ||
4826 | |||
4827 | case BC_ISNEXT: | ||
4828 | | // RA = base*8, RD = target (points to ITERN) | ||
4829 | | daddu RA, BASE, RA | ||
4830 | | srl TMP0, RD, 1 | ||
4831 | | ld CFUNC:CARG1, -24(RA) | ||
4832 | | daddu TMP0, PC, TMP0 | ||
4833 | | ld CARG2, -16(RA) | ||
4834 | | ld CARG3, -8(RA) | ||
4835 | | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) | ||
4836 | | checkfunc CFUNC:CARG1, >5 | ||
4837 | | gettp CARG2, CARG2 | ||
4838 | | daddiu CARG2, CARG2, -LJ_TTAB | ||
4839 | | lbu TMP1, CFUNC:CARG1->ffid | ||
4840 | | daddiu CARG3, CARG3, -LJ_TNIL | ||
4841 | | or AT, CARG2, CARG3 | ||
4842 | | daddiu TMP1, TMP1, -FF_next_N | ||
4843 | | or AT, AT, TMP1 | ||
4844 | | bnez AT, >5 | ||
4845 | |. lui TMP1, (LJ_KEYINDEX >> 16) | ||
4846 | | daddu PC, TMP0, TMP2 | ||
4847 | | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff) | ||
4848 | | dsll TMP1, TMP1, 32 | ||
4849 | | sd TMP1, -8(RA) | ||
4850 | |1: | ||
4851 | | ins_next | ||
4852 | |5: // Despecialize bytecode if any of the checks fail. | ||
4853 | | li TMP3, BC_JMP | ||
4854 | | li TMP1, BC_ITERC | ||
4855 | | sb TMP3, -4+OFS_OP(PC) | ||
4856 | | daddu PC, TMP0, TMP2 | ||
4857 | |.if JIT | ||
4858 | | lb TMP0, OFS_OP(PC) | ||
4859 | | li AT, BC_ITERN | ||
4860 | | bne TMP0, AT, >6 | ||
4861 | |. lhu TMP2, OFS_RD(PC) | ||
4862 | |.endif | ||
4863 | | b <1 | ||
4864 | |. sb TMP1, OFS_OP(PC) | ||
4865 | |.if JIT | ||
4866 | |6: // Unpatch JLOOP. | ||
4867 | | ld TMP0, DISPATCH_J(trace)(DISPATCH) | ||
4868 | | sll TMP2, TMP2, 3 | ||
4869 | | daddu TMP0, TMP0, TMP2 | ||
4870 | | ld TRACE:TMP2, 0(TMP0) | ||
4871 | | lw TMP0, TRACE:TMP2->startins | ||
4872 | | li AT, -256 | ||
4873 | | and TMP0, TMP0, AT | ||
4874 | | or TMP0, TMP0, TMP1 | ||
4875 | | b <1 | ||
4876 | |. sw TMP0, 0(PC) | ||
4877 | |.endif | ||
4878 | break; | ||
4879 | |||
4880 | case BC_VARG: | ||
4881 | | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 | ||
4882 | | ld TMP0, FRAME_PC(BASE) | ||
4883 | | decode_RDtoRC8 RC, RD | ||
4884 | | decode_RB8a RB, INS | ||
4885 | | daddu RC, BASE, RC | ||
4886 | | decode_RB8b RB | ||
4887 | | daddu RA, BASE, RA | ||
4888 | | daddiu RC, RC, FRAME_VARG | ||
4889 | | daddu TMP2, RA, RB | ||
4890 | | daddiu TMP3, BASE, -16 // TMP3 = vtop | ||
4891 | | dsubu RC, RC, TMP0 // RC = vbase | ||
4892 | | // Note: RC may now be even _above_ BASE if nargs was < numparams. | ||
4893 | | beqz RB, >5 // Copy all varargs? | ||
4894 | |. dsubu TMP1, TMP3, RC | ||
4895 | | daddiu TMP2, TMP2, -16 | ||
4896 | |1: // Copy vararg slots to destination slots. | ||
4897 | | ld CARG1, 0(RC) | ||
4898 | | sltu AT, RC, TMP3 | ||
4899 | | daddiu RC, RC, 8 | ||
4900 | |.if MIPSR6 | ||
4901 | | selnez CARG1, CARG1, AT | ||
4902 | | seleqz AT, TISNIL, AT | ||
4903 | | or CARG1, CARG1, AT | ||
4904 | |.else | ||
4905 | | movz CARG1, TISNIL, AT | ||
4906 | |.endif | ||
4907 | | sd CARG1, 0(RA) | ||
4908 | | sltu AT, RA, TMP2 | ||
4909 | | bnez AT, <1 | ||
4910 | |. daddiu RA, RA, 8 | ||
4911 | |3: | ||
4912 | | ins_next | ||
4913 | | | ||
4914 | |5: // Copy all varargs. | ||
4915 | | ld TMP0, L->maxstack | ||
4916 | | blez TMP1, <3 // No vararg slots? | ||
4917 | |. li MULTRES, 8 // MULTRES = (0+1)*8 | ||
4918 | | daddu TMP2, RA, TMP1 | ||
4919 | | sltu AT, TMP0, TMP2 | ||
4920 | | bnez AT, >7 | ||
4921 | |. daddiu MULTRES, TMP1, 8 | ||
4922 | |6: | ||
4923 | | ld CRET1, 0(RC) | ||
4924 | | daddiu RC, RC, 8 | ||
4925 | | sd CRET1, 0(RA) | ||
4926 | | sltu AT, RC, TMP3 | ||
4927 | | bnez AT, <6 // More vararg slots? | ||
4928 | |. daddiu RA, RA, 8 | ||
4929 | | b <3 | ||
4930 | |. nop | ||
4931 | | | ||
4932 | |7: // Grow stack for varargs. | ||
4933 | | load_got lj_state_growstack | ||
4934 | | sd RA, L->top | ||
4935 | | dsubu RA, RA, BASE | ||
4936 | | sd BASE, L->base | ||
4937 | | dsubu BASE, RC, BASE // Need delta, because BASE may change. | ||
4938 | | sd PC, SAVE_PC | ||
4939 | | srl CARG2, TMP1, 3 | ||
4940 | | call_intern lj_state_growstack // (lua_State *L, int n) | ||
4941 | |. move CARG1, L | ||
4942 | | move RC, BASE | ||
4943 | | ld BASE, L->base | ||
4944 | | daddu RA, BASE, RA | ||
4945 | | daddu RC, BASE, RC | ||
4946 | | b <6 | ||
4947 | |. daddiu TMP3, BASE, -16 | ||
4948 | break; | ||
4949 | |||
4950 | /* -- Returns ----------------------------------------------------------- */ | ||
4951 | |||
4952 | case BC_RETM: | ||
4953 | | // RA = results*8, RD = extra_nresults*8 | ||
4954 | | addu RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. | ||
4955 | | // Fall through. Assumes BC_RET follows. | ||
4956 | break; | ||
4957 | |||
4958 | case BC_RET: | ||
4959 | | // RA = results*8, RD = (nresults+1)*8 | ||
4960 | | ld PC, FRAME_PC(BASE) | ||
4961 | | daddu RA, BASE, RA | ||
4962 | | move MULTRES, RD | ||
4963 | |1: | ||
4964 | | andi TMP0, PC, FRAME_TYPE | ||
4965 | | bnez TMP0, ->BC_RETV_Z | ||
4966 | |. xori TMP1, PC, FRAME_VARG | ||
4967 | | | ||
4968 | |->BC_RET_Z: | ||
4969 | | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return | ||
4970 | | lw INS, -4(PC) | ||
4971 | | daddiu TMP2, BASE, -16 | ||
4972 | | daddiu RC, RD, -8 | ||
4973 | | decode_RA8a TMP0, INS | ||
4974 | | decode_RB8a RB, INS | ||
4975 | | decode_RA8b TMP0 | ||
4976 | | decode_RB8b RB | ||
4977 | | daddu TMP3, TMP2, RB | ||
4978 | | beqz RC, >3 | ||
4979 | |. dsubu BASE, TMP2, TMP0 | ||
4980 | |2: | ||
4981 | | ld CRET1, 0(RA) | ||
4982 | | daddiu RA, RA, 8 | ||
4983 | | daddiu RC, RC, -8 | ||
4984 | | sd CRET1, 0(TMP2) | ||
4985 | | bnez RC, <2 | ||
4986 | |. daddiu TMP2, TMP2, 8 | ||
4987 | |3: | ||
4988 | | daddiu TMP3, TMP3, -8 | ||
4989 | |5: | ||
4990 | | sltu AT, TMP2, TMP3 | ||
4991 | | bnez AT, >6 | ||
4992 | |. ld LFUNC:TMP1, FRAME_FUNC(BASE) | ||
4993 | | ins_next1 | ||
4994 | | cleartp LFUNC:TMP1 | ||
4995 | | ld TMP1, LFUNC:TMP1->pc | ||
4996 | | ld KBASE, PC2PROTO(k)(TMP1) | ||
4997 | | ins_next2 | ||
4998 | | | ||
4999 | |6: // Fill up results with nil. | ||
5000 | | sd TISNIL, 0(TMP2) | ||
5001 | | b <5 | ||
5002 | |. daddiu TMP2, TMP2, 8 | ||
5003 | | | ||
5004 | |->BC_RETV_Z: // Non-standard return case. | ||
5005 | | andi TMP2, TMP1, FRAME_TYPEP | ||
5006 | | bnez TMP2, ->vm_return | ||
5007 | |. nop | ||
5008 | | // Return from vararg function: relocate BASE down. | ||
5009 | | dsubu BASE, BASE, TMP1 | ||
5010 | | b <1 | ||
5011 | |. ld PC, FRAME_PC(BASE) | ||
5012 | break; | ||
5013 | |||
5014 | case BC_RET0: case BC_RET1: | ||
5015 | | // RA = results*8, RD = (nresults+1)*8 | ||
5016 | | ld PC, FRAME_PC(BASE) | ||
5017 | | daddu RA, BASE, RA | ||
5018 | | move MULTRES, RD | ||
5019 | | andi TMP0, PC, FRAME_TYPE | ||
5020 | | bnez TMP0, ->BC_RETV_Z | ||
5021 | |. xori TMP1, PC, FRAME_VARG | ||
5022 | | lw INS, -4(PC) | ||
5023 | | daddiu TMP2, BASE, -16 | ||
5024 | if (op == BC_RET1) { | ||
5025 | | ld CRET1, 0(RA) | ||
5026 | } | ||
5027 | | decode_RB8a RB, INS | ||
5028 | | decode_RA8a RA, INS | ||
5029 | | decode_RB8b RB | ||
5030 | | decode_RA8b RA | ||
5031 | | dsubu BASE, TMP2, RA | ||
5032 | if (op == BC_RET1) { | ||
5033 | | sd CRET1, 0(TMP2) | ||
5034 | } | ||
5035 | |5: | ||
5036 | | sltu AT, RD, RB | ||
5037 | | bnez AT, >6 | ||
5038 | |. ld TMP1, FRAME_FUNC(BASE) | ||
5039 | | ins_next1 | ||
5040 | | cleartp LFUNC:TMP1 | ||
5041 | | ld TMP1, LFUNC:TMP1->pc | ||
5042 | | ld KBASE, PC2PROTO(k)(TMP1) | ||
5043 | | ins_next2 | ||
5044 | | | ||
5045 | |6: // Fill up results with nil. | ||
5046 | | daddiu TMP2, TMP2, 8 | ||
5047 | | daddiu RD, RD, 8 | ||
5048 | | b <5 | ||
5049 | if (op == BC_RET1) { | ||
5050 | |. sd TISNIL, 0(TMP2) | ||
5051 | } else { | ||
5052 | |. sd TISNIL, -8(TMP2) | ||
5053 | } | ||
5054 | break; | ||
5055 | |||
5056 | /* -- Loops and branches ------------------------------------------------ */ | ||
5057 | |||
5058 | case BC_FORL: | ||
5059 | |.if JIT | ||
5060 | | hotloop | ||
5061 | |.endif | ||
5062 | | // Fall through. Assumes BC_IFORL follows. | ||
5063 | break; | ||
5064 | |||
5065 | case BC_JFORI: | ||
5066 | case BC_JFORL: | ||
5067 | #if !LJ_HASJIT | ||
5068 | break; | ||
5069 | #endif | ||
5070 | case BC_FORI: | ||
5071 | case BC_IFORL: | ||
5072 | | // RA = base*8, RD = target (after end of loop or start of loop) | ||
5073 | vk = (op == BC_IFORL || op == BC_JFORL); | ||
5074 | | daddu RA, BASE, RA | ||
5075 | | ld CARG1, FORL_IDX*8(RA) // IDX CARG1 - CARG3 type | ||
5076 | | gettp CARG3, CARG1 | ||
5077 | if (op != BC_JFORL) { | ||
5078 | | srl RD, RD, 1 | ||
5079 | | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) | ||
5080 | | daddu TMP2, RD, TMP2 | ||
5081 | } | ||
5082 | if (!vk) { | ||
5083 | | ld CARG2, FORL_STOP*8(RA) // STOP CARG2 - CARG4 type | ||
5084 | | ld CRET1, FORL_STEP*8(RA) // STEP CRET1 - CRET2 type | ||
5085 | | gettp CARG4, CARG2 | ||
5086 | | bne CARG3, TISNUM, >5 | ||
5087 | |. gettp CRET2, CRET1 | ||
5088 | | bne CARG4, TISNUM, ->vmeta_for | ||
5089 | |. sextw CARG3, CARG1 | ||
5090 | | bne CRET2, TISNUM, ->vmeta_for | ||
5091 | |. sextw CARG2, CARG2 | ||
5092 | | dext AT, CRET1, 31, 0 | ||
5093 | | slt CRET1, CARG2, CARG3 | ||
5094 | | slt TMP1, CARG3, CARG2 | ||
5095 | |.if MIPSR6 | ||
5096 | | selnez TMP1, TMP1, AT | ||
5097 | | seleqz CRET1, CRET1, AT | ||
5098 | | or CRET1, CRET1, TMP1 | ||
5099 | |.else | ||
5100 | | movn CRET1, TMP1, AT | ||
5101 | |.endif | ||
5102 | } else { | ||
5103 | | bne CARG3, TISNUM, >5 | ||
5104 | |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type | ||
5105 | | ld CRET1, FORL_STOP*8(RA) // STOP CRET1 - CRET2 type | ||
5106 | | sextw TMP3, CARG1 | ||
5107 | | sextw CARG2, CARG2 | ||
5108 | | sextw CRET1, CRET1 | ||
5109 | | addu CARG1, TMP3, CARG2 | ||
5110 | | xor TMP0, CARG1, TMP3 | ||
5111 | | xor TMP1, CARG1, CARG2 | ||
5112 | | and TMP0, TMP0, TMP1 | ||
5113 | | slt TMP1, CARG1, CRET1 | ||
5114 | | slt CRET1, CRET1, CARG1 | ||
5115 | | slt AT, CARG2, r0 | ||
5116 | | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. | ||
5117 | |.if MIPSR6 | ||
5118 | | selnez TMP1, TMP1, AT | ||
5119 | | seleqz CRET1, CRET1, AT | ||
5120 | | or CRET1, CRET1, TMP1 | ||
5121 | |.else | ||
5122 | | movn CRET1, TMP1, AT | ||
5123 | |.endif | ||
5124 | | or CRET1, CRET1, TMP0 | ||
5125 | | zextw CARG1, CARG1 | ||
5126 | | settp CARG1, TISNUM | ||
5127 | } | ||
5128 | |1: | ||
5129 | if (op == BC_FORI) { | ||
5130 | |.if MIPSR6 | ||
5131 | | selnez TMP2, TMP2, CRET1 | ||
5132 | |.else | ||
5133 | | movz TMP2, r0, CRET1 | ||
5134 | |.endif | ||
5135 | | daddu PC, PC, TMP2 | ||
5136 | } else if (op == BC_JFORI) { | ||
5137 | | daddu PC, PC, TMP2 | ||
5138 | | lhu RD, -4+OFS_RD(PC) | ||
5139 | } else if (op == BC_IFORL) { | ||
5140 | |.if MIPSR6 | ||
5141 | | seleqz TMP2, TMP2, CRET1 | ||
5142 | |.else | ||
5143 | | movn TMP2, r0, CRET1 | ||
5144 | |.endif | ||
5145 | | daddu PC, PC, TMP2 | ||
5146 | } | ||
5147 | if (vk) { | ||
5148 | | sd CARG1, FORL_IDX*8(RA) | ||
5149 | } | ||
5150 | | ins_next1 | ||
5151 | | sd CARG1, FORL_EXT*8(RA) | ||
5152 | |2: | ||
5153 | if (op == BC_JFORI) { | ||
5154 | | beqz CRET1, =>BC_JLOOP | ||
5155 | |. decode_RD8b RD | ||
5156 | } else if (op == BC_JFORL) { | ||
5157 | | beqz CRET1, =>BC_JLOOP | ||
5158 | } | ||
5159 | | ins_next2 | ||
5160 | | | ||
5161 | |5: // FP loop. | ||
5162 | |.if FPU | ||
5163 | if (!vk) { | ||
5164 | | ldc1 f0, FORL_IDX*8(RA) | ||
5165 | | ldc1 f2, FORL_STOP*8(RA) | ||
5166 | | sltiu TMP0, CARG3, LJ_TISNUM | ||
5167 | | sltiu TMP1, CARG4, LJ_TISNUM | ||
5168 | | sltiu AT, CRET2, LJ_TISNUM | ||
5169 | | ld TMP3, FORL_STEP*8(RA) | ||
5170 | | and TMP0, TMP0, TMP1 | ||
5171 | | and AT, AT, TMP0 | ||
5172 | | beqz AT, ->vmeta_for | ||
5173 | |. slt TMP3, TMP3, r0 | ||
5174 | |.if MIPSR6 | ||
5175 | | dmtc1 TMP3, FTMP2 | ||
5176 | | cmp.lt.d FTMP0, f0, f2 | ||
5177 | | cmp.lt.d FTMP1, f2, f0 | ||
5178 | | sel.d FTMP2, FTMP1, FTMP0 | ||
5179 | | b <1 | ||
5180 | |. dmfc1 CRET1, FTMP2 | ||
5181 | |.else | ||
5182 | | c.ole.d 0, f0, f2 | ||
5183 | | c.ole.d 1, f2, f0 | ||
5184 | | li CRET1, 1 | ||
5185 | | movt CRET1, r0, 0 | ||
5186 | | movt AT, r0, 1 | ||
5187 | | b <1 | ||
5188 | |. movn CRET1, AT, TMP3 | ||
5189 | |.endif | ||
5190 | } else { | ||
5191 | | ldc1 f0, FORL_IDX*8(RA) | ||
5192 | | ldc1 f4, FORL_STEP*8(RA) | ||
5193 | | ldc1 f2, FORL_STOP*8(RA) | ||
5194 | | ld TMP3, FORL_STEP*8(RA) | ||
5195 | | add.d f0, f0, f4 | ||
5196 | |.if MIPSR6 | ||
5197 | | slt TMP3, TMP3, r0 | ||
5198 | | dmtc1 TMP3, FTMP2 | ||
5199 | | cmp.lt.d FTMP0, f0, f2 | ||
5200 | | cmp.lt.d FTMP1, f2, f0 | ||
5201 | | sel.d FTMP2, FTMP1, FTMP0 | ||
5202 | | dmfc1 CRET1, FTMP2 | ||
5203 | if (op == BC_IFORL) { | ||
5204 | | seleqz TMP2, TMP2, CRET1 | ||
5205 | | daddu PC, PC, TMP2 | ||
5206 | } | ||
5207 | |.else | ||
5208 | | c.ole.d 0, f0, f2 | ||
5209 | | c.ole.d 1, f2, f0 | ||
5210 | | slt TMP3, TMP3, r0 | ||
5211 | | li CRET1, 1 | ||
5212 | | li AT, 1 | ||
5213 | | movt CRET1, r0, 0 | ||
5214 | | movt AT, r0, 1 | ||
5215 | | movn CRET1, AT, TMP3 | ||
5216 | if (op == BC_IFORL) { | ||
5217 | | movn TMP2, r0, CRET1 | ||
5218 | | daddu PC, PC, TMP2 | ||
5219 | } | ||
5220 | |.endif | ||
5221 | | sdc1 f0, FORL_IDX*8(RA) | ||
5222 | | ins_next1 | ||
5223 | | b <2 | ||
5224 | |. sdc1 f0, FORL_EXT*8(RA) | ||
5225 | } | ||
5226 | |.else | ||
5227 | if (!vk) { | ||
5228 | | sltiu TMP0, CARG3, LJ_TISNUM | ||
5229 | | sltiu TMP1, CARG4, LJ_TISNUM | ||
5230 | | sltiu AT, CRET2, LJ_TISNUM | ||
5231 | | and TMP0, TMP0, TMP1 | ||
5232 | | and AT, AT, TMP0 | ||
5233 | | beqz AT, ->vmeta_for | ||
5234 | |. nop | ||
5235 | | bal ->vm_sfcmpolex | ||
5236 | |. lw TMP3, FORL_STEP*8+HI(RA) | ||
5237 | | b <1 | ||
5238 | |. nop | ||
5239 | } else { | ||
5240 | | load_got __adddf3 | ||
5241 | | call_extern | ||
5242 | |. sw TMP2, TMPD | ||
5243 | | ld CARG2, FORL_STOP*8(RA) | ||
5244 | | move CARG1, CRET1 | ||
5245 | if ( op == BC_JFORL ) { | ||
5246 | | lhu RD, -4+OFS_RD(PC) | ||
5247 | | decode_RD8b RD | ||
5248 | } | ||
5249 | | bal ->vm_sfcmpolex | ||
5250 | |. lw TMP3, FORL_STEP*8+HI(RA) | ||
5251 | | b <1 | ||
5252 | |. lw TMP2, TMPD | ||
5253 | } | ||
5254 | |.endif | ||
5255 | break; | ||
5256 | |||
5257 | case BC_ITERL: | ||
5258 | |.if JIT | ||
5259 | | hotloop | ||
5260 | |.endif | ||
5261 | | // Fall through. Assumes BC_IITERL follows. | ||
5262 | break; | ||
5263 | |||
5264 | case BC_JITERL: | ||
5265 | #if !LJ_HASJIT | ||
5266 | break; | ||
5267 | #endif | ||
5268 | case BC_IITERL: | ||
5269 | | // RA = base*8, RD = target | ||
5270 | | daddu RA, BASE, RA | ||
5271 | | ld TMP1, 0(RA) | ||
5272 | | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. | ||
5273 | |. nop | ||
5274 | if (op == BC_JITERL) { | ||
5275 | | b =>BC_JLOOP | ||
5276 | |. sd TMP1, -8(RA) | ||
5277 | } else { | ||
5278 | | branch_RD // Otherwise save control var + branch. | ||
5279 | | sd TMP1, -8(RA) | ||
5280 | } | ||
5281 | |1: | ||
5282 | | ins_next | ||
5283 | break; | ||
5284 | |||
5285 | case BC_LOOP: | ||
5286 | | // RA = base*8, RD = target (loop extent) | ||
5287 | | // Note: RA/RD is only used by trace recorder to determine scope/extent | ||
5288 | | // This opcode does NOT jump, it's only purpose is to detect a hot loop. | ||
5289 | |.if JIT | ||
5290 | | hotloop | ||
5291 | |.endif | ||
5292 | | // Fall through. Assumes BC_ILOOP follows. | ||
5293 | break; | ||
5294 | |||
5295 | case BC_ILOOP: | ||
5296 | | // RA = base*8, RD = target (loop extent) | ||
5297 | | ins_next | ||
5298 | break; | ||
5299 | |||
5300 | case BC_JLOOP: | ||
5301 | |.if JIT | ||
5302 | | // RA = base*8 (ignored), RD = traceno*8 | ||
5303 | | ld TMP1, DISPATCH_J(trace)(DISPATCH) | ||
5304 | | li AT, 0 | ||
5305 | | daddu TMP1, TMP1, RD | ||
5306 | | // Traces on MIPS don't store the trace number, so use 0. | ||
5307 | | sd AT, DISPATCH_GL(vmstate)(DISPATCH) | ||
5308 | | ld TRACE:TMP2, 0(TMP1) | ||
5309 | | sd BASE, DISPATCH_GL(jit_base)(DISPATCH) | ||
5310 | | ld TMP2, TRACE:TMP2->mcode | ||
5311 | | sd L, DISPATCH_GL(tmpbuf.L)(DISPATCH) | ||
5312 | | jr TMP2 | ||
5313 | |. daddiu JGL, DISPATCH, GG_DISP2G+32768 | ||
5314 | |.endif | ||
5315 | break; | ||
5316 | |||
5317 | case BC_JMP: | ||
5318 | | // RA = base*8 (only used by trace recorder), RD = target | ||
5319 | | branch_RD | ||
5320 | | ins_next | ||
5321 | break; | ||
5322 | |||
5323 | /* -- Function headers -------------------------------------------------- */ | ||
5324 | |||
5325 | case BC_FUNCF: | ||
5326 | |.if JIT | ||
5327 | | hotcall | ||
5328 | |.endif | ||
5329 | case BC_FUNCV: /* NYI: compiled vararg functions. */ | ||
5330 | | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. | ||
5331 | break; | ||
5332 | |||
5333 | case BC_JFUNCF: | ||
5334 | #if !LJ_HASJIT | ||
5335 | break; | ||
5336 | #endif | ||
5337 | case BC_IFUNCF: | ||
5338 | | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 | ||
5339 | | ld TMP2, L->maxstack | ||
5340 | | lbu TMP1, -4+PC2PROTO(numparams)(PC) | ||
5341 | | ld KBASE, -4+PC2PROTO(k)(PC) | ||
5342 | | sltu AT, TMP2, RA | ||
5343 | | bnez AT, ->vm_growstack_l | ||
5344 | |. sll TMP1, TMP1, 3 | ||
5345 | if (op != BC_JFUNCF) { | ||
5346 | | ins_next1 | ||
5347 | } | ||
5348 | |2: | ||
5349 | | sltu AT, NARGS8:RC, TMP1 // Check for missing parameters. | ||
5350 | | bnez AT, >3 | ||
5351 | |. daddu AT, BASE, NARGS8:RC | ||
5352 | if (op == BC_JFUNCF) { | ||
5353 | | decode_RD8a RD, INS | ||
5354 | | b =>BC_JLOOP | ||
5355 | |. decode_RD8b RD | ||
5356 | } else { | ||
5357 | | ins_next2 | ||
5358 | } | ||
5359 | | | ||
5360 | |3: // Clear missing parameters. | ||
5361 | | sd TISNIL, 0(AT) | ||
5362 | | b <2 | ||
5363 | |. addiu NARGS8:RC, NARGS8:RC, 8 | ||
5364 | break; | ||
5365 | |||
5366 | case BC_JFUNCV: | ||
5367 | #if !LJ_HASJIT | ||
5368 | break; | ||
5369 | #endif | ||
5370 | | NYI // NYI: compiled vararg functions | ||
5371 | break; /* NYI: compiled vararg functions. */ | ||
5372 | |||
5373 | case BC_IFUNCV: | ||
5374 | | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 | ||
5375 | | li TMP0, LJ_TFUNC | ||
5376 | | daddu TMP1, BASE, RC | ||
5377 | | ld TMP2, L->maxstack | ||
5378 | | settp LFUNC:RB, TMP0 | ||
5379 | | daddu TMP0, RA, RC | ||
5380 | | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. | ||
5381 | | daddiu TMP3, RC, 16+FRAME_VARG | ||
5382 | | sltu AT, TMP0, TMP2 | ||
5383 | | ld KBASE, -4+PC2PROTO(k)(PC) | ||
5384 | | beqz AT, ->vm_growstack_l | ||
5385 | |. sd TMP3, 8(TMP1) // Store delta + FRAME_VARG. | ||
5386 | | lbu TMP2, -4+PC2PROTO(numparams)(PC) | ||
5387 | | move RA, BASE | ||
5388 | | move RC, TMP1 | ||
5389 | | ins_next1 | ||
5390 | | beqz TMP2, >3 | ||
5391 | |. daddiu BASE, TMP1, 16 | ||
5392 | |1: | ||
5393 | | ld TMP0, 0(RA) | ||
5394 | | sltu AT, RA, RC // Less args than parameters? | ||
5395 | | move CARG1, TMP0 | ||
5396 | |.if MIPSR6 | ||
5397 | | selnez TMP0, TMP0, AT | ||
5398 | | seleqz TMP3, TISNIL, AT | ||
5399 | | or TMP0, TMP0, TMP3 | ||
5400 | | seleqz TMP3, CARG1, AT | ||
5401 | | selnez CARG1, TISNIL, AT | ||
5402 | | or CARG1, CARG1, TMP3 | ||
5403 | |.else | ||
5404 | | movz TMP0, TISNIL, AT // Clear missing parameters. | ||
5405 | | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC). | ||
5406 | |.endif | ||
5407 | | addiu TMP2, TMP2, -1 | ||
5408 | | sd TMP0, 16(TMP1) | ||
5409 | | daddiu TMP1, TMP1, 8 | ||
5410 | | sd CARG1, 0(RA) | ||
5411 | | bnez TMP2, <1 | ||
5412 | |. daddiu RA, RA, 8 | ||
5413 | |3: | ||
5414 | | ins_next2 | ||
5415 | break; | ||
5416 | |||
5417 | case BC_FUNCC: | ||
5418 | case BC_FUNCCW: | ||
5419 | | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 | ||
5420 | if (op == BC_FUNCC) { | ||
5421 | | ld CFUNCADDR, CFUNC:RB->f | ||
5422 | } else { | ||
5423 | | ld CFUNCADDR, DISPATCH_GL(wrapf)(DISPATCH) | ||
5424 | } | ||
5425 | | daddu TMP1, RA, NARGS8:RC | ||
5426 | | ld TMP2, L->maxstack | ||
5427 | | daddu RC, BASE, NARGS8:RC | ||
5428 | | sd BASE, L->base | ||
5429 | | sltu AT, TMP2, TMP1 | ||
5430 | | sd RC, L->top | ||
5431 | | li_vmstate C | ||
5432 | if (op == BC_FUNCCW) { | ||
5433 | | ld CARG2, CFUNC:RB->f | ||
5434 | } | ||
5435 | | bnez AT, ->vm_growstack_c // Need to grow stack. | ||
5436 | |. move CARG1, L | ||
5437 | | jalr CFUNCADDR // (lua_State *L [, lua_CFunction f]) | ||
5438 | |. st_vmstate | ||
5439 | | // Returns nresults. | ||
5440 | | ld BASE, L->base | ||
5441 | | sll RD, CRET1, 3 | ||
5442 | | ld TMP1, L->top | ||
5443 | | li_vmstate INTERP | ||
5444 | | ld PC, FRAME_PC(BASE) // Fetch PC of caller. | ||
5445 | | dsubu RA, TMP1, RD // RA = L->top - nresults*8 | ||
5446 | | sd L, DISPATCH_GL(cur_L)(DISPATCH) | ||
5447 | | b ->vm_returnc | ||
5448 | |. st_vmstate | ||
5449 | break; | ||
5450 | |||
5451 | /* ---------------------------------------------------------------------- */ | ||
5452 | |||
5453 | default: | ||
5454 | fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); | ||
5455 | exit(2); | ||
5456 | break; | ||
5457 | } | ||
5458 | } | ||
5459 | |||
5460 | static int build_backend(BuildCtx *ctx) | ||
5461 | { | ||
5462 | int op; | ||
5463 | |||
5464 | dasm_growpc(Dst, BC__MAX); | ||
5465 | |||
5466 | build_subroutines(ctx); | ||
5467 | |||
5468 | |.code_op | ||
5469 | for (op = 0; op < BC__MAX; op++) | ||
5470 | build_ins(ctx, (BCOp)op, op); | ||
5471 | |||
5472 | return BC__MAX; | ||
5473 | } | ||
5474 | |||
5475 | /* Emit pseudo frame-info for all assembler functions. */ | ||
5476 | static void emit_asm_debug(BuildCtx *ctx) | ||
5477 | { | ||
5478 | int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); | ||
5479 | int i; | ||
5480 | switch (ctx->mode) { | ||
5481 | case BUILD_elfasm: | ||
5482 | fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); | ||
5483 | fprintf(ctx->fp, | ||
5484 | ".Lframe0:\n" | ||
5485 | "\t.4byte .LECIE0-.LSCIE0\n" | ||
5486 | ".LSCIE0:\n" | ||
5487 | "\t.4byte 0xffffffff\n" | ||
5488 | "\t.byte 0x1\n" | ||
5489 | "\t.string \"\"\n" | ||
5490 | "\t.uleb128 0x1\n" | ||
5491 | "\t.sleb128 -4\n" | ||
5492 | "\t.byte 31\n" | ||
5493 | "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" | ||
5494 | "\t.align 2\n" | ||
5495 | ".LECIE0:\n\n"); | ||
5496 | fprintf(ctx->fp, | ||
5497 | ".LSFDE0:\n" | ||
5498 | "\t.4byte .LEFDE0-.LASFDE0\n" | ||
5499 | ".LASFDE0:\n" | ||
5500 | "\t.4byte .Lframe0\n" | ||
5501 | "\t.8byte .Lbegin\n" | ||
5502 | "\t.8byte %d\n" | ||
5503 | "\t.byte 0xe\n\t.uleb128 %d\n" | ||
5504 | "\t.byte 0x9f\n\t.sleb128 2*5\n" | ||
5505 | "\t.byte 0x9e\n\t.sleb128 2*6\n", | ||
5506 | fcofs, CFRAME_SIZE); | ||
5507 | for (i = 23; i >= 16; i--) | ||
5508 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(30-i)); | ||
5509 | #if !LJ_SOFTFP | ||
5510 | for (i = 31; i >= 24; i--) | ||
5511 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(46-i)); | ||
5512 | #endif | ||
5513 | fprintf(ctx->fp, | ||
5514 | "\t.align 2\n" | ||
5515 | ".LEFDE0:\n\n"); | ||
5516 | #if LJ_HASFFI | ||
5517 | fprintf(ctx->fp, | ||
5518 | ".LSFDE1:\n" | ||
5519 | "\t.4byte .LEFDE1-.LASFDE1\n" | ||
5520 | ".LASFDE1:\n" | ||
5521 | "\t.4byte .Lframe0\n" | ||
5522 | "\t.4byte lj_vm_ffi_call\n" | ||
5523 | "\t.4byte %d\n" | ||
5524 | "\t.byte 0x9f\n\t.uleb128 2*1\n" | ||
5525 | "\t.byte 0x90\n\t.uleb128 2*2\n" | ||
5526 | "\t.byte 0xd\n\t.uleb128 0x10\n" | ||
5527 | "\t.align 2\n" | ||
5528 | ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); | ||
5529 | #endif | ||
5530 | #if !LJ_NO_UNWIND | ||
5531 | /* NYI */ | ||
5532 | #endif | ||
5533 | break; | ||
5534 | default: | ||
5535 | break; | ||
5536 | } | ||
5537 | } | ||
5538 | |||
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index 48d0ed0f..3cad37d2 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc | |||
@@ -1,4 +1,4 @@ | |||
1 | |// Low-level VM code for PowerPC CPUs. | 1 | |// Low-level VM code for PowerPC 32 bit or 32on64 bit mode. |
2 | |// Bytecode interpreter, fast functions and helper functions. | 2 | |// Bytecode interpreter, fast functions and helper functions. |
3 | |// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | 3 | |// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h |
4 | | | 4 | | |
@@ -18,7 +18,6 @@ | |||
18 | |// DynASM defines used by the PPC port: | 18 | |// DynASM defines used by the PPC port: |
19 | |// | 19 | |// |
20 | |// P64 64 bit pointers (only for GPR64 testing). | 20 | |// P64 64 bit pointers (only for GPR64 testing). |
21 | |// Note: a full PPC64 _LP64 port is not planned. | ||
22 | |// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). | 21 | |// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). |
23 | |// Affects reg saves, stack layout, carry/overflow/dot flags etc. | 22 | |// Affects reg saves, stack layout, carry/overflow/dot flags etc. |
24 | |// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). | 23 | |// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). |
@@ -103,6 +102,18 @@ | |||
103 | |// Fixed register assignments for the interpreter. | 102 | |// Fixed register assignments for the interpreter. |
104 | |// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) | 103 | |// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) |
105 | | | 104 | | |
105 | |.macro .FPU, a, b | ||
106 | |.if FPU | ||
107 | | a, b | ||
108 | |.endif | ||
109 | |.endmacro | ||
110 | | | ||
111 | |.macro .FPU, a, b, c | ||
112 | |.if FPU | ||
113 | | a, b, c | ||
114 | |.endif | ||
115 | |.endmacro | ||
116 | | | ||
106 | |// The following must be C callee-save (but BASE is often refetched). | 117 | |// The following must be C callee-save (but BASE is often refetched). |
107 | |.define BASE, r14 // Base of current Lua stack frame. | 118 | |.define BASE, r14 // Base of current Lua stack frame. |
108 | |.define KBASE, r15 // Constants of current Lua function. | 119 | |.define KBASE, r15 // Constants of current Lua function. |
@@ -116,8 +127,10 @@ | |||
116 | |.define TISNUM, r22 | 127 | |.define TISNUM, r22 |
117 | |.define TISNIL, r23 | 128 | |.define TISNIL, r23 |
118 | |.define ZERO, r24 | 129 | |.define ZERO, r24 |
130 | |.if FPU | ||
119 | |.define TOBIT, f30 // 2^52 + 2^51. | 131 | |.define TOBIT, f30 // 2^52 + 2^51. |
120 | |.define TONUM, f31 // 2^52 + 2^51 + 2^31. | 132 | |.define TONUM, f31 // 2^52 + 2^51 + 2^31. |
133 | |.endif | ||
121 | | | 134 | | |
122 | |// The following temporaries are not saved across C calls, except for RA. | 135 | |// The following temporaries are not saved across C calls, except for RA. |
123 | |.define RA, r20 // Callee-save. | 136 | |.define RA, r20 // Callee-save. |
@@ -133,6 +146,7 @@ | |||
133 | | | 146 | | |
134 | |// Saved temporaries. | 147 | |// Saved temporaries. |
135 | |.define SAVE0, r21 | 148 | |.define SAVE0, r21 |
149 | |.define SAVE1, r25 | ||
136 | | | 150 | | |
137 | |// Calling conventions. | 151 | |// Calling conventions. |
138 | |.define CARG1, r3 | 152 | |.define CARG1, r3 |
@@ -141,8 +155,10 @@ | |||
141 | |.define CARG4, r6 // Overlaps TMP3. | 155 | |.define CARG4, r6 // Overlaps TMP3. |
142 | |.define CARG5, r7 // Overlaps INS. | 156 | |.define CARG5, r7 // Overlaps INS. |
143 | | | 157 | | |
158 | |.if FPU | ||
144 | |.define FARG1, f1 | 159 | |.define FARG1, f1 |
145 | |.define FARG2, f2 | 160 | |.define FARG2, f2 |
161 | |.endif | ||
146 | | | 162 | | |
147 | |.define CRET1, r3 | 163 | |.define CRET1, r3 |
148 | |.define CRET2, r4 | 164 | |.define CRET2, r4 |
@@ -213,10 +229,16 @@ | |||
213 | |.endif | 229 | |.endif |
214 | |.else | 230 | |.else |
215 | | | 231 | | |
232 | |.if FPU | ||
216 | |.define SAVE_LR, 276(sp) | 233 | |.define SAVE_LR, 276(sp) |
217 | |.define CFRAME_SPACE, 272 // Delta for sp. | 234 | |.define CFRAME_SPACE, 272 // Delta for sp. |
218 | |// Back chain for sp: 272(sp) <-- sp entering interpreter | 235 | |// Back chain for sp: 272(sp) <-- sp entering interpreter |
219 | |.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. | 236 | |.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. |
237 | |.else | ||
238 | |.define SAVE_LR, 132(sp) | ||
239 | |.define CFRAME_SPACE, 128 // Delta for sp. | ||
240 | |// Back chain for sp: 128(sp) <-- sp entering interpreter | ||
241 | |.endif | ||
220 | |.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. | 242 | |.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. |
221 | |.define SAVE_CR, 52(sp) // 32 bit CR save. | 243 | |.define SAVE_CR, 52(sp) // 32 bit CR save. |
222 | |.define SAVE_ERRF, 48(sp) // 32 bit C frame info. | 244 | |.define SAVE_ERRF, 48(sp) // 32 bit C frame info. |
@@ -226,16 +248,25 @@ | |||
226 | |.define SAVE_PC, 32(sp) | 248 | |.define SAVE_PC, 32(sp) |
227 | |.define SAVE_MULTRES, 28(sp) | 249 | |.define SAVE_MULTRES, 28(sp) |
228 | |.define UNUSED1, 24(sp) | 250 | |.define UNUSED1, 24(sp) |
251 | |.if FPU | ||
229 | |.define TMPD_LO, 20(sp) | 252 | |.define TMPD_LO, 20(sp) |
230 | |.define TMPD_HI, 16(sp) | 253 | |.define TMPD_HI, 16(sp) |
231 | |.define TONUM_LO, 12(sp) | 254 | |.define TONUM_LO, 12(sp) |
232 | |.define TONUM_HI, 8(sp) | 255 | |.define TONUM_HI, 8(sp) |
256 | |.else | ||
257 | |.define SFSAVE_4, 20(sp) | ||
258 | |.define SFSAVE_3, 16(sp) | ||
259 | |.define SFSAVE_2, 12(sp) | ||
260 | |.define SFSAVE_1, 8(sp) | ||
261 | |.endif | ||
233 | |// Next frame lr: 4(sp) | 262 | |// Next frame lr: 4(sp) |
234 | |// Back chain for sp: 0(sp) <-- sp while in interpreter | 263 | |// Back chain for sp: 0(sp) <-- sp while in interpreter |
235 | | | 264 | | |
265 | |.if FPU | ||
236 | |.define TMPD_BLO, 23(sp) | 266 | |.define TMPD_BLO, 23(sp) |
237 | |.define TMPD, TMPD_HI | 267 | |.define TMPD, TMPD_HI |
238 | |.define TONUM_D, TONUM_HI | 268 | |.define TONUM_D, TONUM_HI |
269 | |.endif | ||
239 | | | 270 | | |
240 | |.endif | 271 | |.endif |
241 | | | 272 | | |
@@ -245,7 +276,7 @@ | |||
245 | |.else | 276 | |.else |
246 | | stw r..reg, SAVE_GPR_+(reg-14)*4(sp) | 277 | | stw r..reg, SAVE_GPR_+(reg-14)*4(sp) |
247 | |.endif | 278 | |.endif |
248 | | stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) | 279 | | .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) |
249 | |.endmacro | 280 | |.endmacro |
250 | |.macro rest_, reg | 281 | |.macro rest_, reg |
251 | |.if GPR64 | 282 | |.if GPR64 |
@@ -253,7 +284,7 @@ | |||
253 | |.else | 284 | |.else |
254 | | lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) | 285 | | lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) |
255 | |.endif | 286 | |.endif |
256 | | lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) | 287 | | .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) |
257 | |.endmacro | 288 | |.endmacro |
258 | | | 289 | | |
259 | |.macro saveregs | 290 | |.macro saveregs |
@@ -316,19 +347,14 @@ | |||
316 | |.type NODE, Node | 347 | |.type NODE, Node |
317 | |.type NARGS8, int | 348 | |.type NARGS8, int |
318 | |.type TRACE, GCtrace | 349 | |.type TRACE, GCtrace |
350 | |.type SBUF, SBuf | ||
319 | | | 351 | | |
320 | |//----------------------------------------------------------------------- | 352 | |//----------------------------------------------------------------------- |
321 | | | 353 | | |
322 | |// These basic macros should really be part of DynASM. | ||
323 | |.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro | ||
324 | |.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro | ||
325 | |.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro | ||
326 | |.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro | ||
327 | |.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro | ||
328 | | | ||
329 | |// Trap for not-yet-implemented parts. | 354 | |// Trap for not-yet-implemented parts. |
330 | |.macro NYI; tw 4, sp, sp; .endmacro | 355 | |.macro NYI; tw 4, sp, sp; .endmacro |
331 | | | 356 | | |
357 | |.if FPU | ||
332 | |// int/FP conversions. | 358 | |// int/FP conversions. |
333 | |.macro tonum_i, freg, reg | 359 | |.macro tonum_i, freg, reg |
334 | | xoris reg, reg, 0x8000 | 360 | | xoris reg, reg, 0x8000 |
@@ -352,6 +378,7 @@ | |||
352 | |.macro toint, reg, freg | 378 | |.macro toint, reg, freg |
353 | | toint reg, freg, freg | 379 | | toint reg, freg, freg |
354 | |.endmacro | 380 | |.endmacro |
381 | |.endif | ||
355 | | | 382 | | |
356 | |//----------------------------------------------------------------------- | 383 | |//----------------------------------------------------------------------- |
357 | | | 384 | | |
@@ -539,9 +566,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
539 | | beq >2 | 566 | | beq >2 |
540 | |1: | 567 | |1: |
541 | | addic. TMP1, TMP1, -8 | 568 | | addic. TMP1, TMP1, -8 |
569 | |.if FPU | ||
542 | | lfd f0, 0(RA) | 570 | | lfd f0, 0(RA) |
571 | |.else | ||
572 | | lwz CARG1, 0(RA) | ||
573 | | lwz CARG2, 4(RA) | ||
574 | |.endif | ||
543 | | addi RA, RA, 8 | 575 | | addi RA, RA, 8 |
576 | |.if FPU | ||
544 | | stfd f0, 0(BASE) | 577 | | stfd f0, 0(BASE) |
578 | |.else | ||
579 | | stw CARG1, 0(BASE) | ||
580 | | stw CARG2, 4(BASE) | ||
581 | |.endif | ||
545 | | addi BASE, BASE, 8 | 582 | | addi BASE, BASE, 8 |
546 | | bney <1 | 583 | | bney <1 |
547 | | | 584 | | |
@@ -619,23 +656,23 @@ static void build_subroutines(BuildCtx *ctx) | |||
619 | | .toc ld TOCREG, SAVE_TOC | 656 | | .toc ld TOCREG, SAVE_TOC |
620 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. | 657 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. |
621 | | lp BASE, L->base | 658 | | lp BASE, L->base |
622 | | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 659 | | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
623 | | lwz DISPATCH, L->glref // Setup pointer to dispatch table. | 660 | | lwz DISPATCH, L->glref // Setup pointer to dispatch table. |
624 | | li ZERO, 0 | 661 | | li ZERO, 0 |
625 | | stw TMP3, TMPD | 662 | | .FPU stw TMP3, TMPD |
626 | | li TMP1, LJ_TFALSE | 663 | | li TMP1, LJ_TFALSE |
627 | | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). | 664 | | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). |
628 | | li TISNIL, LJ_TNIL | 665 | | li TISNIL, LJ_TNIL |
629 | | li_vmstate INTERP | 666 | | li_vmstate INTERP |
630 | | lfs TOBIT, TMPD | 667 | | .FPU lfs TOBIT, TMPD |
631 | | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. | 668 | | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. |
632 | | la RA, -8(BASE) // Results start at BASE-8. | 669 | | la RA, -8(BASE) // Results start at BASE-8. |
633 | | stw TMP3, TMPD | 670 | | .FPU stw TMP3, TMPD |
634 | | addi DISPATCH, DISPATCH, GG_G2DISP | 671 | | addi DISPATCH, DISPATCH, GG_G2DISP |
635 | | stw TMP1, 0(RA) // Prepend false to error message. | 672 | | stw TMP1, 0(RA) // Prepend false to error message. |
636 | | li RD, 16 // 2 results: false + error message. | 673 | | li RD, 16 // 2 results: false + error message. |
637 | | st_vmstate | 674 | | st_vmstate |
638 | | lfs TONUM, TMPD | 675 | | .FPU lfs TONUM, TMPD |
639 | | b ->vm_returnc | 676 | | b ->vm_returnc |
640 | | | 677 | | |
641 | |//----------------------------------------------------------------------- | 678 | |//----------------------------------------------------------------------- |
@@ -684,33 +721,34 @@ static void build_subroutines(BuildCtx *ctx) | |||
684 | | stw CARG3, SAVE_NRES | 721 | | stw CARG3, SAVE_NRES |
685 | | cmplwi TMP1, 0 | 722 | | cmplwi TMP1, 0 |
686 | | stw CARG3, SAVE_ERRF | 723 | | stw CARG3, SAVE_ERRF |
687 | | stp TMP0, L->cframe | ||
688 | | stp CARG3, SAVE_CFRAME | 724 | | stp CARG3, SAVE_CFRAME |
689 | | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. | 725 | | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. |
726 | | stp TMP0, L->cframe | ||
690 | | beq >3 | 727 | | beq >3 |
691 | | | 728 | | |
692 | | // Resume after yield (like a return). | 729 | | // Resume after yield (like a return). |
730 | | stw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
693 | | mr RA, BASE | 731 | | mr RA, BASE |
694 | | lp BASE, L->base | 732 | | lp BASE, L->base |
695 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. | 733 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. |
696 | | lp TMP1, L->top | 734 | | lp TMP1, L->top |
697 | | lwz PC, FRAME_PC(BASE) | 735 | | lwz PC, FRAME_PC(BASE) |
698 | | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 736 | | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
699 | | stb CARG3, L->status | 737 | | stb CARG3, L->status |
700 | | stw TMP3, TMPD | 738 | | .FPU stw TMP3, TMPD |
701 | | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). | 739 | | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). |
702 | | lfs TOBIT, TMPD | 740 | | .FPU lfs TOBIT, TMPD |
703 | | sub RD, TMP1, BASE | 741 | | sub RD, TMP1, BASE |
704 | | stw TMP3, TMPD | 742 | | .FPU stw TMP3, TMPD |
705 | | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) | 743 | | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) |
706 | | addi RD, RD, 8 | 744 | | addi RD, RD, 8 |
707 | | stw TMP0, TONUM_HI | 745 | | .FPU stw TMP0, TONUM_HI |
708 | | li_vmstate INTERP | 746 | | li_vmstate INTERP |
709 | | li ZERO, 0 | 747 | | li ZERO, 0 |
710 | | st_vmstate | 748 | | st_vmstate |
711 | | andix. TMP0, PC, FRAME_TYPE | 749 | | andix. TMP0, PC, FRAME_TYPE |
712 | | mr MULTRES, RD | 750 | | mr MULTRES, RD |
713 | | lfs TONUM, TMPD | 751 | | .FPU lfs TONUM, TMPD |
714 | | li TISNIL, LJ_TNIL | 752 | | li TISNIL, LJ_TNIL |
715 | | beq ->BC_RET_Z | 753 | | beq ->BC_RET_Z |
716 | | b ->vm_return | 754 | | b ->vm_return |
@@ -729,33 +767,34 @@ static void build_subroutines(BuildCtx *ctx) | |||
729 | | | 767 | | |
730 | |1: // Entry point for vm_pcall above (PC = ftype). | 768 | |1: // Entry point for vm_pcall above (PC = ftype). |
731 | | lp TMP1, L:CARG1->cframe | 769 | | lp TMP1, L:CARG1->cframe |
732 | | stw CARG3, SAVE_NRES | ||
733 | | mr L, CARG1 | 770 | | mr L, CARG1 |
734 | | stw CARG1, SAVE_L | 771 | | stw CARG3, SAVE_NRES |
735 | | mr BASE, CARG2 | ||
736 | | stp sp, L->cframe // Add our C frame to cframe chain. | ||
737 | | lwz DISPATCH, L->glref // Setup pointer to dispatch table. | 772 | | lwz DISPATCH, L->glref // Setup pointer to dispatch table. |
773 | | stw CARG1, SAVE_L | ||
774 | | mr BASE, CARG2 | ||
775 | | addi DISPATCH, DISPATCH, GG_G2DISP | ||
738 | | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. | 776 | | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. |
739 | | stp TMP1, SAVE_CFRAME | 777 | | stp TMP1, SAVE_CFRAME |
740 | | addi DISPATCH, DISPATCH, GG_G2DISP | 778 | | stp sp, L->cframe // Add our C frame to cframe chain. |
741 | | | 779 | | |
742 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). | 780 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). |
781 | | stw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
743 | | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). | 782 | | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). |
744 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. | 783 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. |
745 | | lp TMP1, L->top | 784 | | lp TMP1, L->top |
746 | | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 785 | | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
747 | | add PC, PC, BASE | 786 | | add PC, PC, BASE |
748 | | stw TMP3, TMPD | 787 | | .FPU stw TMP3, TMPD |
749 | | li ZERO, 0 | 788 | | li ZERO, 0 |
750 | | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). | 789 | | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). |
751 | | lfs TOBIT, TMPD | 790 | | .FPU lfs TOBIT, TMPD |
752 | | sub PC, PC, TMP2 // PC = frame delta + frame type | 791 | | sub PC, PC, TMP2 // PC = frame delta + frame type |
753 | | stw TMP3, TMPD | 792 | | .FPU stw TMP3, TMPD |
754 | | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) | 793 | | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) |
755 | | sub NARGS8:RC, TMP1, BASE | 794 | | sub NARGS8:RC, TMP1, BASE |
756 | | stw TMP0, TONUM_HI | 795 | | .FPU stw TMP0, TONUM_HI |
757 | | li_vmstate INTERP | 796 | | li_vmstate INTERP |
758 | | lfs TONUM, TMPD | 797 | | .FPU lfs TONUM, TMPD |
759 | | li TISNIL, LJ_TNIL | 798 | | li TISNIL, LJ_TNIL |
760 | | st_vmstate | 799 | | st_vmstate |
761 | | | 800 | | |
@@ -776,15 +815,18 @@ static void build_subroutines(BuildCtx *ctx) | |||
776 | | lwz TMP0, L:CARG1->stack | 815 | | lwz TMP0, L:CARG1->stack |
777 | | stw CARG1, SAVE_L | 816 | | stw CARG1, SAVE_L |
778 | | lp TMP1, L->top | 817 | | lp TMP1, L->top |
818 | | lwz DISPATCH, L->glref // Setup pointer to dispatch table. | ||
779 | | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. | 819 | | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. |
780 | | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). | 820 | | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). |
781 | | lp TMP1, L->cframe | 821 | | lp TMP1, L->cframe |
782 | | stp sp, L->cframe // Add our C frame to cframe chain. | 822 | | addi DISPATCH, DISPATCH, GG_G2DISP |
783 | | .toc lp CARG4, 0(CARG4) | 823 | | .toc lp CARG4, 0(CARG4) |
784 | | li TMP2, 0 | 824 | | li TMP2, 0 |
785 | | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. | 825 | | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. |
786 | | stw TMP2, SAVE_ERRF // No error function. | 826 | | stw TMP2, SAVE_ERRF // No error function. |
787 | | stp TMP1, SAVE_CFRAME | 827 | | stp TMP1, SAVE_CFRAME |
828 | | stp sp, L->cframe // Add our C frame to cframe chain. | ||
829 | | stw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
788 | | mtctr CARG4 | 830 | | mtctr CARG4 |
789 | | bctrl // (lua_State *L, lua_CFunction func, void *ud) | 831 | | bctrl // (lua_State *L, lua_CFunction func, void *ud) |
790 | |.if PPE | 832 | |.if PPE |
@@ -793,9 +835,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
793 | |.else | 835 | |.else |
794 | | mr. BASE, CRET1 | 836 | | mr. BASE, CRET1 |
795 | |.endif | 837 | |.endif |
796 | | lwz DISPATCH, L->glref // Setup pointer to dispatch table. | 838 | | li PC, FRAME_CP |
797 | | li PC, FRAME_CP | ||
798 | | addi DISPATCH, DISPATCH, GG_G2DISP | ||
799 | | bne <3 // Else continue with the call. | 839 | | bne <3 // Else continue with the call. |
800 | | b ->vm_leave_cp // No base? Just remove C frame. | 840 | | b ->vm_leave_cp // No base? Just remove C frame. |
801 | | | 841 | | |
@@ -842,15 +882,30 @@ static void build_subroutines(BuildCtx *ctx) | |||
842 | | lwz INS, -4(PC) | 882 | | lwz INS, -4(PC) |
843 | | subi CARG2, RB, 16 | 883 | | subi CARG2, RB, 16 |
844 | | decode_RB8 SAVE0, INS | 884 | | decode_RB8 SAVE0, INS |
885 | |.if FPU | ||
845 | | lfd f0, 0(RA) | 886 | | lfd f0, 0(RA) |
887 | |.else | ||
888 | | lwz TMP2, 0(RA) | ||
889 | | lwz TMP3, 4(RA) | ||
890 | |.endif | ||
846 | | add TMP1, BASE, SAVE0 | 891 | | add TMP1, BASE, SAVE0 |
847 | | stp BASE, L->base | 892 | | stp BASE, L->base |
848 | | cmplw TMP1, CARG2 | 893 | | cmplw TMP1, CARG2 |
849 | | sub CARG3, CARG2, TMP1 | 894 | | sub CARG3, CARG2, TMP1 |
850 | | decode_RA8 RA, INS | 895 | | decode_RA8 RA, INS |
896 | |.if FPU | ||
851 | | stfd f0, 0(CARG2) | 897 | | stfd f0, 0(CARG2) |
898 | |.else | ||
899 | | stw TMP2, 0(CARG2) | ||
900 | | stw TMP3, 4(CARG2) | ||
901 | |.endif | ||
852 | | bney ->BC_CAT_Z | 902 | | bney ->BC_CAT_Z |
903 | |.if FPU | ||
853 | | stfdx f0, BASE, RA | 904 | | stfdx f0, BASE, RA |
905 | |.else | ||
906 | | stwux TMP2, RA, BASE | ||
907 | | stw TMP3, 4(RA) | ||
908 | |.endif | ||
854 | | b ->cont_nop | 909 | | b ->cont_nop |
855 | | | 910 | | |
856 | |//-- Table indexing metamethods ----------------------------------------- | 911 | |//-- Table indexing metamethods ----------------------------------------- |
@@ -903,9 +958,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
903 | | // Returns TValue * (finished) or NULL (metamethod). | 958 | | // Returns TValue * (finished) or NULL (metamethod). |
904 | | cmplwi CRET1, 0 | 959 | | cmplwi CRET1, 0 |
905 | | beq >3 | 960 | | beq >3 |
961 | |.if FPU | ||
906 | | lfd f0, 0(CRET1) | 962 | | lfd f0, 0(CRET1) |
963 | |.else | ||
964 | | lwz TMP0, 0(CRET1) | ||
965 | | lwz TMP1, 4(CRET1) | ||
966 | |.endif | ||
907 | | ins_next1 | 967 | | ins_next1 |
968 | |.if FPU | ||
908 | | stfdx f0, BASE, RA | 969 | | stfdx f0, BASE, RA |
970 | |.else | ||
971 | | stwux TMP0, RA, BASE | ||
972 | | stw TMP1, 4(RA) | ||
973 | |.endif | ||
909 | | ins_next2 | 974 | | ins_next2 |
910 | | | 975 | | |
911 | |3: // Call __index metamethod. | 976 | |3: // Call __index metamethod. |
@@ -918,6 +983,22 @@ static void build_subroutines(BuildCtx *ctx) | |||
918 | | li NARGS8:RC, 16 // 2 args for func(t, k). | 983 | | li NARGS8:RC, 16 // 2 args for func(t, k). |
919 | | b ->vm_call_dispatch_f | 984 | | b ->vm_call_dispatch_f |
920 | | | 985 | | |
986 | |->vmeta_tgetr: | ||
987 | | bl extern lj_tab_getinth // (GCtab *t, int32_t key) | ||
988 | | // Returns cTValue * or NULL. | ||
989 | | cmplwi CRET1, 0 | ||
990 | | beq >1 | ||
991 | |.if FPU | ||
992 | | lfd f14, 0(CRET1) | ||
993 | |.else | ||
994 | | lwz SAVE0, 0(CRET1) | ||
995 | | lwz SAVE1, 4(CRET1) | ||
996 | |.endif | ||
997 | | b ->BC_TGETR_Z | ||
998 | |1: | ||
999 | | stwx TISNIL, BASE, RA | ||
1000 | | b ->cont_nop | ||
1001 | | | ||
921 | |//----------------------------------------------------------------------- | 1002 | |//----------------------------------------------------------------------- |
922 | | | 1003 | | |
923 | |->vmeta_tsets1: | 1004 | |->vmeta_tsets1: |
@@ -967,11 +1048,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
967 | | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) | 1048 | | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) |
968 | | // Returns TValue * (finished) or NULL (metamethod). | 1049 | | // Returns TValue * (finished) or NULL (metamethod). |
969 | | cmplwi CRET1, 0 | 1050 | | cmplwi CRET1, 0 |
1051 | |.if FPU | ||
970 | | lfdx f0, BASE, RA | 1052 | | lfdx f0, BASE, RA |
1053 | |.else | ||
1054 | | lwzux TMP2, RA, BASE | ||
1055 | | lwz TMP3, 4(RA) | ||
1056 | |.endif | ||
971 | | beq >3 | 1057 | | beq >3 |
972 | | // NOBARRIER: lj_meta_tset ensures the table is not black. | 1058 | | // NOBARRIER: lj_meta_tset ensures the table is not black. |
973 | | ins_next1 | 1059 | | ins_next1 |
1060 | |.if FPU | ||
974 | | stfd f0, 0(CRET1) | 1061 | | stfd f0, 0(CRET1) |
1062 | |.else | ||
1063 | | stw TMP2, 0(CRET1) | ||
1064 | | stw TMP3, 4(CRET1) | ||
1065 | |.endif | ||
975 | | ins_next2 | 1066 | | ins_next2 |
976 | | | 1067 | | |
977 | |3: // Call __newindex metamethod. | 1068 | |3: // Call __newindex metamethod. |
@@ -982,9 +1073,28 @@ static void build_subroutines(BuildCtx *ctx) | |||
982 | | add PC, TMP1, BASE | 1073 | | add PC, TMP1, BASE |
983 | | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | 1074 | | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. |
984 | | li NARGS8:RC, 24 // 3 args for func(t, k, v) | 1075 | | li NARGS8:RC, 24 // 3 args for func(t, k, v) |
1076 | |.if FPU | ||
985 | | stfd f0, 16(BASE) // Copy value to third argument. | 1077 | | stfd f0, 16(BASE) // Copy value to third argument. |
1078 | |.else | ||
1079 | | stw TMP2, 16(BASE) | ||
1080 | | stw TMP3, 20(BASE) | ||
1081 | |.endif | ||
986 | | b ->vm_call_dispatch_f | 1082 | | b ->vm_call_dispatch_f |
987 | | | 1083 | | |
1084 | |->vmeta_tsetr: | ||
1085 | | stp BASE, L->base | ||
1086 | | mr CARG1, L | ||
1087 | | stw PC, SAVE_PC | ||
1088 | | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | ||
1089 | | // Returns TValue *. | ||
1090 | |.if FPU | ||
1091 | | stfd f14, 0(CRET1) | ||
1092 | |.else | ||
1093 | | stw SAVE0, 0(CRET1) | ||
1094 | | stw SAVE1, 4(CRET1) | ||
1095 | |.endif | ||
1096 | | b ->cont_nop | ||
1097 | | | ||
988 | |//-- Comparison metamethods --------------------------------------------- | 1098 | |//-- Comparison metamethods --------------------------------------------- |
989 | | | 1099 | | |
990 | |->vmeta_comp: | 1100 | |->vmeta_comp: |
@@ -1021,9 +1131,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
1021 | | | 1131 | | |
1022 | |->cont_ra: // RA = resultptr | 1132 | |->cont_ra: // RA = resultptr |
1023 | | lwz INS, -4(PC) | 1133 | | lwz INS, -4(PC) |
1134 | |.if FPU | ||
1024 | | lfd f0, 0(RA) | 1135 | | lfd f0, 0(RA) |
1136 | |.else | ||
1137 | | lwz CARG1, 0(RA) | ||
1138 | | lwz CARG2, 4(RA) | ||
1139 | |.endif | ||
1025 | | decode_RA8 TMP1, INS | 1140 | | decode_RA8 TMP1, INS |
1141 | |.if FPU | ||
1026 | | stfdx f0, BASE, TMP1 | 1142 | | stfdx f0, BASE, TMP1 |
1143 | |.else | ||
1144 | | stwux CARG1, TMP1, BASE | ||
1145 | | stw CARG2, 4(TMP1) | ||
1146 | |.endif | ||
1027 | | b ->cont_nop | 1147 | | b ->cont_nop |
1028 | | | 1148 | | |
1029 | |->cont_condt: // RA = resultptr | 1149 | |->cont_condt: // RA = resultptr |
@@ -1063,6 +1183,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
1063 | | b <3 | 1183 | | b <3 |
1064 | |.endif | 1184 | |.endif |
1065 | | | 1185 | | |
1186 | |->vmeta_istype: | ||
1187 | | subi PC, PC, 4 | ||
1188 | | stp BASE, L->base | ||
1189 | | srwi CARG2, RA, 3 | ||
1190 | | mr CARG1, L | ||
1191 | | srwi CARG3, RD, 3 | ||
1192 | | stw PC, SAVE_PC | ||
1193 | | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) | ||
1194 | | b ->cont_nop | ||
1195 | | | ||
1066 | |//-- Arithmetic metamethods --------------------------------------------- | 1196 | |//-- Arithmetic metamethods --------------------------------------------- |
1067 | | | 1197 | | |
1068 | |->vmeta_arith_nv: | 1198 | |->vmeta_arith_nv: |
@@ -1219,22 +1349,32 @@ static void build_subroutines(BuildCtx *ctx) | |||
1219 | |.macro .ffunc_n, name | 1349 | |.macro .ffunc_n, name |
1220 | |->ff_ .. name: | 1350 | |->ff_ .. name: |
1221 | | cmplwi NARGS8:RC, 8 | 1351 | | cmplwi NARGS8:RC, 8 |
1222 | | lwz CARG3, 0(BASE) | 1352 | | lwz CARG1, 0(BASE) |
1353 | |.if FPU | ||
1223 | | lfd FARG1, 0(BASE) | 1354 | | lfd FARG1, 0(BASE) |
1355 | |.else | ||
1356 | | lwz CARG2, 4(BASE) | ||
1357 | |.endif | ||
1224 | | blt ->fff_fallback | 1358 | | blt ->fff_fallback |
1225 | | checknum CARG3; bge ->fff_fallback | 1359 | | checknum CARG1; bge ->fff_fallback |
1226 | |.endmacro | 1360 | |.endmacro |
1227 | | | 1361 | | |
1228 | |.macro .ffunc_nn, name | 1362 | |.macro .ffunc_nn, name |
1229 | |->ff_ .. name: | 1363 | |->ff_ .. name: |
1230 | | cmplwi NARGS8:RC, 16 | 1364 | | cmplwi NARGS8:RC, 16 |
1231 | | lwz CARG3, 0(BASE) | 1365 | | lwz CARG1, 0(BASE) |
1366 | |.if FPU | ||
1232 | | lfd FARG1, 0(BASE) | 1367 | | lfd FARG1, 0(BASE) |
1233 | | lwz CARG4, 8(BASE) | 1368 | | lwz CARG3, 8(BASE) |
1234 | | lfd FARG2, 8(BASE) | 1369 | | lfd FARG2, 8(BASE) |
1370 | |.else | ||
1371 | | lwz CARG2, 4(BASE) | ||
1372 | | lwz CARG3, 8(BASE) | ||
1373 | | lwz CARG4, 12(BASE) | ||
1374 | |.endif | ||
1235 | | blt ->fff_fallback | 1375 | | blt ->fff_fallback |
1376 | | checknum CARG1; bge ->fff_fallback | ||
1236 | | checknum CARG3; bge ->fff_fallback | 1377 | | checknum CARG3; bge ->fff_fallback |
1237 | | checknum CARG4; bge ->fff_fallback | ||
1238 | |.endmacro | 1378 | |.endmacro |
1239 | | | 1379 | | |
1240 | |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. | 1380 | |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. |
@@ -1255,14 +1395,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
1255 | | bge cr1, ->fff_fallback | 1395 | | bge cr1, ->fff_fallback |
1256 | | stw CARG3, 0(RA) | 1396 | | stw CARG3, 0(RA) |
1257 | | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. | 1397 | | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. |
1398 | | addi TMP1, BASE, 8 | ||
1399 | | add TMP2, RA, NARGS8:RC | ||
1258 | | stw CARG1, 4(RA) | 1400 | | stw CARG1, 4(RA) |
1259 | | beq ->fff_res // Done if exactly 1 argument. | 1401 | | beq ->fff_res // Done if exactly 1 argument. |
1260 | | li TMP1, 8 | ||
1261 | | subi RC, RC, 8 | ||
1262 | |1: | 1402 | |1: |
1263 | | cmplw TMP1, RC | 1403 | | cmplw TMP1, TMP2 |
1264 | | lfdx f0, BASE, TMP1 | 1404 | |.if FPU |
1265 | | stfdx f0, RA, TMP1 | 1405 | | lfd f0, 0(TMP1) |
1406 | | stfd f0, 0(TMP1) | ||
1407 | |.else | ||
1408 | | lwz CARG1, 0(TMP1) | ||
1409 | | lwz CARG2, 4(TMP1) | ||
1410 | | stw CARG1, -8(TMP1) | ||
1411 | | stw CARG2, -4(TMP1) | ||
1412 | |.endif | ||
1266 | | addi TMP1, TMP1, 8 | 1413 | | addi TMP1, TMP1, 8 |
1267 | | bney <1 | 1414 | | bney <1 |
1268 | | b ->fff_res | 1415 | | b ->fff_res |
@@ -1277,8 +1424,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
1277 | | orc TMP1, TMP2, TMP0 | 1424 | | orc TMP1, TMP2, TMP0 |
1278 | | addi TMP1, TMP1, ~LJ_TISNUM+1 | 1425 | | addi TMP1, TMP1, ~LJ_TISNUM+1 |
1279 | | slwi TMP1, TMP1, 3 | 1426 | | slwi TMP1, TMP1, 3 |
1427 | |.if FPU | ||
1280 | | la TMP2, CFUNC:RB->upvalue | 1428 | | la TMP2, CFUNC:RB->upvalue |
1281 | | lfdx FARG1, TMP2, TMP1 | 1429 | | lfdx FARG1, TMP2, TMP1 |
1430 | |.else | ||
1431 | | add TMP1, CFUNC:RB, TMP1 | ||
1432 | | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi | ||
1433 | | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo | ||
1434 | |.endif | ||
1282 | | b ->fff_resn | 1435 | | b ->fff_resn |
1283 | | | 1436 | | |
1284 | |//-- Base library: getters and setters --------------------------------- | 1437 | |//-- Base library: getters and setters --------------------------------- |
@@ -1294,9 +1447,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1294 | | beq ->fff_restv | 1447 | | beq ->fff_restv |
1295 | | lwz TMP0, TAB:CARG1->hmask | 1448 | | lwz TMP0, TAB:CARG1->hmask |
1296 | | li CARG3, LJ_TTAB // Use metatable as default result. | 1449 | | li CARG3, LJ_TTAB // Use metatable as default result. |
1297 | | lwz TMP1, STR:RC->hash | 1450 | | lwz TMP1, STR:RC->sid |
1298 | | lwz NODE:TMP2, TAB:CARG1->node | 1451 | | lwz NODE:TMP2, TAB:CARG1->node |
1299 | | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask | 1452 | | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask |
1300 | | slwi TMP0, TMP1, 5 | 1453 | | slwi TMP0, TMP1, 5 |
1301 | | slwi TMP1, TMP1, 3 | 1454 | | slwi TMP1, TMP1, 3 |
1302 | | sub TMP1, TMP0, TMP1 | 1455 | | sub TMP1, TMP0, TMP1 |
@@ -1356,7 +1509,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1356 | | mr CARG1, L | 1509 | | mr CARG1, L |
1357 | | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | 1510 | | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) |
1358 | | // Returns cTValue *. | 1511 | | // Returns cTValue *. |
1512 | |.if FPU | ||
1359 | | lfd FARG1, 0(CRET1) | 1513 | | lfd FARG1, 0(CRET1) |
1514 | |.else | ||
1515 | | lwz CARG2, 4(CRET1) | ||
1516 | | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1. | ||
1517 | |.endif | ||
1360 | | b ->fff_resn | 1518 | | b ->fff_resn |
1361 | | | 1519 | | |
1362 | |//-- Base library: conversions ------------------------------------------ | 1520 | |//-- Base library: conversions ------------------------------------------ |
@@ -1365,7 +1523,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
1365 | | // Only handles the number case inline (without a base argument). | 1523 | | // Only handles the number case inline (without a base argument). |
1366 | | cmplwi NARGS8:RC, 8 | 1524 | | cmplwi NARGS8:RC, 8 |
1367 | | lwz CARG1, 0(BASE) | 1525 | | lwz CARG1, 0(BASE) |
1526 | |.if FPU | ||
1368 | | lfd FARG1, 0(BASE) | 1527 | | lfd FARG1, 0(BASE) |
1528 | |.else | ||
1529 | | lwz CARG2, 4(BASE) | ||
1530 | |.endif | ||
1369 | | bne ->fff_fallback // Exactly one argument. | 1531 | | bne ->fff_fallback // Exactly one argument. |
1370 | | checknum CARG1; bgt ->fff_fallback | 1532 | | checknum CARG1; bgt ->fff_fallback |
1371 | | b ->fff_resn | 1533 | | b ->fff_resn |
@@ -1387,9 +1549,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1387 | | mr CARG1, L | 1549 | | mr CARG1, L |
1388 | | mr CARG2, BASE | 1550 | | mr CARG2, BASE |
1389 | |.if DUALNUM | 1551 | |.if DUALNUM |
1390 | | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) | 1552 | | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) |
1391 | |.else | 1553 | |.else |
1392 | | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) | 1554 | | bl extern lj_strfmt_num // (lua_State *L, lua_Number *np) |
1393 | |.endif | 1555 | |.endif |
1394 | | // Returns GCstr *. | 1556 | | // Returns GCstr *. |
1395 | | li CARG3, LJ_TSTR | 1557 | | li CARG3, LJ_TSTR |
@@ -1397,32 +1559,24 @@ static void build_subroutines(BuildCtx *ctx) | |||
1397 | | | 1559 | | |
1398 | |//-- Base library: iterators ------------------------------------------- | 1560 | |//-- Base library: iterators ------------------------------------------- |
1399 | | | 1561 | | |
1400 | |.ffunc next | 1562 | |.ffunc_1 next |
1401 | | cmplwi NARGS8:RC, 8 | ||
1402 | | lwz CARG1, 0(BASE) | ||
1403 | | lwz TAB:CARG2, 4(BASE) | ||
1404 | | blt ->fff_fallback | ||
1405 | | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. | 1563 | | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. |
1406 | | checktab CARG1 | 1564 | | checktab CARG3 |
1407 | | lwz PC, FRAME_PC(BASE) | 1565 | | lwz PC, FRAME_PC(BASE) |
1408 | | bne ->fff_fallback | 1566 | | bne ->fff_fallback |
1409 | | stp BASE, L->base // Add frame since C call can throw. | 1567 | | la CARG2, 8(BASE) |
1410 | | mr CARG1, L | 1568 | | la CARG3, -8(BASE) |
1411 | | stp BASE, L->top // Dummy frame length is ok. | 1569 | | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) |
1412 | | la CARG3, 8(BASE) | 1570 | | // Returns 1=found, 0=end, -1=error. |
1413 | | stw PC, SAVE_PC | 1571 | | cmpwi CRET1, 0 |
1414 | | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) | ||
1415 | | // Returns 0 at end of traversal. | ||
1416 | | cmplwi CRET1, 0 | ||
1417 | | li CARG3, LJ_TNIL | ||
1418 | | beq ->fff_restv // End of traversal: return nil. | ||
1419 | | lfd f0, 8(BASE) // Copy key and value to results. | ||
1420 | | la RA, -8(BASE) | 1572 | | la RA, -8(BASE) |
1421 | | lfd f1, 16(BASE) | ||
1422 | | stfd f0, 0(RA) | ||
1423 | | li RD, (2+1)*8 | 1573 | | li RD, (2+1)*8 |
1424 | | stfd f1, 8(RA) | 1574 | | bgt ->fff_res // Found key/value. |
1425 | | b ->fff_res | 1575 | | li CARG3, LJ_TNIL |
1576 | | beq ->fff_restv // End of traversal: return nil. | ||
1577 | | lwz CFUNC:RB, FRAME_FUNC(BASE) | ||
1578 | | li NARGS8:RC, 2*8 | ||
1579 | | b ->fff_fallback // Invalid key. | ||
1426 | | | 1580 | | |
1427 | |.ffunc_1 pairs | 1581 | |.ffunc_1 pairs |
1428 | | checktab CARG3 | 1582 | | checktab CARG3 |
@@ -1430,17 +1584,32 @@ static void build_subroutines(BuildCtx *ctx) | |||
1430 | | bne ->fff_fallback | 1584 | | bne ->fff_fallback |
1431 | #if LJ_52 | 1585 | #if LJ_52 |
1432 | | lwz TAB:TMP2, TAB:CARG1->metatable | 1586 | | lwz TAB:TMP2, TAB:CARG1->metatable |
1587 | |.if FPU | ||
1433 | | lfd f0, CFUNC:RB->upvalue[0] | 1588 | | lfd f0, CFUNC:RB->upvalue[0] |
1589 | |.else | ||
1590 | | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi | ||
1591 | | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo | ||
1592 | |.endif | ||
1434 | | cmplwi TAB:TMP2, 0 | 1593 | | cmplwi TAB:TMP2, 0 |
1435 | | la RA, -8(BASE) | 1594 | | la RA, -8(BASE) |
1436 | | bne ->fff_fallback | 1595 | | bne ->fff_fallback |
1437 | #else | 1596 | #else |
1597 | |.if FPU | ||
1438 | | lfd f0, CFUNC:RB->upvalue[0] | 1598 | | lfd f0, CFUNC:RB->upvalue[0] |
1599 | |.else | ||
1600 | | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi | ||
1601 | | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo | ||
1602 | |.endif | ||
1439 | | la RA, -8(BASE) | 1603 | | la RA, -8(BASE) |
1440 | #endif | 1604 | #endif |
1441 | | stw TISNIL, 8(BASE) | 1605 | | stw TISNIL, 8(BASE) |
1442 | | li RD, (3+1)*8 | 1606 | | li RD, (3+1)*8 |
1607 | |.if FPU | ||
1443 | | stfd f0, 0(RA) | 1608 | | stfd f0, 0(RA) |
1609 | |.else | ||
1610 | | stw TMP0, 0(RA) | ||
1611 | | stw TMP1, 4(RA) | ||
1612 | |.endif | ||
1444 | | b ->fff_res | 1613 | | b ->fff_res |
1445 | | | 1614 | | |
1446 | |.ffunc ipairs_aux | 1615 | |.ffunc ipairs_aux |
@@ -1486,14 +1655,24 @@ static void build_subroutines(BuildCtx *ctx) | |||
1486 | | stfd FARG2, 0(RA) | 1655 | | stfd FARG2, 0(RA) |
1487 | |.endif | 1656 | |.endif |
1488 | | ble >2 // Not in array part? | 1657 | | ble >2 // Not in array part? |
1658 | |.if FPU | ||
1489 | | lwzx TMP2, TMP1, TMP3 | 1659 | | lwzx TMP2, TMP1, TMP3 |
1490 | | lfdx f0, TMP1, TMP3 | 1660 | | lfdx f0, TMP1, TMP3 |
1661 | |.else | ||
1662 | | lwzux TMP2, TMP1, TMP3 | ||
1663 | | lwz TMP3, 4(TMP1) | ||
1664 | |.endif | ||
1491 | |1: | 1665 | |1: |
1492 | | checknil TMP2 | 1666 | | checknil TMP2 |
1493 | | li RD, (0+1)*8 | 1667 | | li RD, (0+1)*8 |
1494 | | beq ->fff_res // End of iteration, return 0 results. | 1668 | | beq ->fff_res // End of iteration, return 0 results. |
1495 | | li RD, (2+1)*8 | 1669 | | li RD, (2+1)*8 |
1670 | |.if FPU | ||
1496 | | stfd f0, 8(RA) | 1671 | | stfd f0, 8(RA) |
1672 | |.else | ||
1673 | | stw TMP2, 8(RA) | ||
1674 | | stw TMP3, 12(RA) | ||
1675 | |.endif | ||
1497 | | b ->fff_res | 1676 | | b ->fff_res |
1498 | |2: // Check for empty hash part first. Otherwise call C function. | 1677 | |2: // Check for empty hash part first. Otherwise call C function. |
1499 | | lwz TMP0, TAB:CARG1->hmask | 1678 | | lwz TMP0, TAB:CARG1->hmask |
@@ -1507,7 +1686,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
1507 | | li RD, (0+1)*8 | 1686 | | li RD, (0+1)*8 |
1508 | | beq ->fff_res | 1687 | | beq ->fff_res |
1509 | | lwz TMP2, 0(CRET1) | 1688 | | lwz TMP2, 0(CRET1) |
1689 | |.if FPU | ||
1510 | | lfd f0, 0(CRET1) | 1690 | | lfd f0, 0(CRET1) |
1691 | |.else | ||
1692 | | lwz TMP3, 4(CRET1) | ||
1693 | |.endif | ||
1511 | | b <1 | 1694 | | b <1 |
1512 | | | 1695 | | |
1513 | |.ffunc_1 ipairs | 1696 | |.ffunc_1 ipairs |
@@ -1516,12 +1699,22 @@ static void build_subroutines(BuildCtx *ctx) | |||
1516 | | bne ->fff_fallback | 1699 | | bne ->fff_fallback |
1517 | #if LJ_52 | 1700 | #if LJ_52 |
1518 | | lwz TAB:TMP2, TAB:CARG1->metatable | 1701 | | lwz TAB:TMP2, TAB:CARG1->metatable |
1702 | |.if FPU | ||
1519 | | lfd f0, CFUNC:RB->upvalue[0] | 1703 | | lfd f0, CFUNC:RB->upvalue[0] |
1704 | |.else | ||
1705 | | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi | ||
1706 | | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo | ||
1707 | |.endif | ||
1520 | | cmplwi TAB:TMP2, 0 | 1708 | | cmplwi TAB:TMP2, 0 |
1521 | | la RA, -8(BASE) | 1709 | | la RA, -8(BASE) |
1522 | | bne ->fff_fallback | 1710 | | bne ->fff_fallback |
1523 | #else | 1711 | #else |
1712 | |.if FPU | ||
1524 | | lfd f0, CFUNC:RB->upvalue[0] | 1713 | | lfd f0, CFUNC:RB->upvalue[0] |
1714 | |.else | ||
1715 | | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi | ||
1716 | | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo | ||
1717 | |.endif | ||
1525 | | la RA, -8(BASE) | 1718 | | la RA, -8(BASE) |
1526 | #endif | 1719 | #endif |
1527 | |.if DUALNUM | 1720 | |.if DUALNUM |
@@ -1531,7 +1724,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1531 | |.endif | 1724 | |.endif |
1532 | | stw ZERO, 12(BASE) | 1725 | | stw ZERO, 12(BASE) |
1533 | | li RD, (3+1)*8 | 1726 | | li RD, (3+1)*8 |
1727 | |.if FPU | ||
1534 | | stfd f0, 0(RA) | 1728 | | stfd f0, 0(RA) |
1729 | |.else | ||
1730 | | stw TMP0, 0(RA) | ||
1731 | | stw TMP1, 4(RA) | ||
1732 | |.endif | ||
1535 | | b ->fff_res | 1733 | | b ->fff_res |
1536 | | | 1734 | | |
1537 | |//-- Base library: catch errors ---------------------------------------- | 1735 | |//-- Base library: catch errors ---------------------------------------- |
@@ -1550,19 +1748,32 @@ static void build_subroutines(BuildCtx *ctx) | |||
1550 | | | 1748 | | |
1551 | |.ffunc xpcall | 1749 | |.ffunc xpcall |
1552 | | cmplwi NARGS8:RC, 16 | 1750 | | cmplwi NARGS8:RC, 16 |
1553 | | lwz CARG4, 8(BASE) | 1751 | | lwz CARG3, 8(BASE) |
1752 | |.if FPU | ||
1554 | | lfd FARG2, 8(BASE) | 1753 | | lfd FARG2, 8(BASE) |
1555 | | lfd FARG1, 0(BASE) | 1754 | | lfd FARG1, 0(BASE) |
1755 | |.else | ||
1756 | | lwz CARG1, 0(BASE) | ||
1757 | | lwz CARG2, 4(BASE) | ||
1758 | | lwz CARG4, 12(BASE) | ||
1759 | |.endif | ||
1556 | | blt ->fff_fallback | 1760 | | blt ->fff_fallback |
1557 | | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) | 1761 | | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) |
1558 | | mr TMP2, BASE | 1762 | | mr TMP2, BASE |
1559 | | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. | 1763 | | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function. |
1560 | | la BASE, 16(BASE) | 1764 | | la BASE, 16(BASE) |
1561 | | // Remember active hook before pcall. | 1765 | | // Remember active hook before pcall. |
1562 | | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 | 1766 | | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 |
1767 | |.if FPU | ||
1563 | | stfd FARG2, 0(TMP2) // Swap function and traceback. | 1768 | | stfd FARG2, 0(TMP2) // Swap function and traceback. |
1564 | | subi NARGS8:RC, NARGS8:RC, 16 | ||
1565 | | stfd FARG1, 8(TMP2) | 1769 | | stfd FARG1, 8(TMP2) |
1770 | |.else | ||
1771 | | stw CARG3, 0(TMP2) | ||
1772 | | stw CARG4, 4(TMP2) | ||
1773 | | stw CARG1, 8(TMP2) | ||
1774 | | stw CARG2, 12(TMP2) | ||
1775 | |.endif | ||
1776 | | subi NARGS8:RC, NARGS8:RC, 16 | ||
1566 | | addi PC, TMP1, 16+FRAME_PCALL | 1777 | | addi PC, TMP1, 16+FRAME_PCALL |
1567 | | b ->vm_call_dispatch | 1778 | | b ->vm_call_dispatch |
1568 | | | 1779 | | |
@@ -1605,9 +1816,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
1605 | | stp BASE, L->top | 1816 | | stp BASE, L->top |
1606 | |2: // Move args to coroutine. | 1817 | |2: // Move args to coroutine. |
1607 | | cmpw TMP1, NARGS8:RC | 1818 | | cmpw TMP1, NARGS8:RC |
1819 | |.if FPU | ||
1608 | | lfdx f0, BASE, TMP1 | 1820 | | lfdx f0, BASE, TMP1 |
1821 | |.else | ||
1822 | | add CARG3, BASE, TMP1 | ||
1823 | | lwz TMP2, 0(CARG3) | ||
1824 | | lwz TMP3, 4(CARG3) | ||
1825 | |.endif | ||
1609 | | beq >3 | 1826 | | beq >3 |
1827 | |.if FPU | ||
1610 | | stfdx f0, CARG2, TMP1 | 1828 | | stfdx f0, CARG2, TMP1 |
1829 | |.else | ||
1830 | | add CARG3, CARG2, TMP1 | ||
1831 | | stw TMP2, 0(CARG3) | ||
1832 | | stw TMP3, 4(CARG3) | ||
1833 | |.endif | ||
1611 | | addi TMP1, TMP1, 8 | 1834 | | addi TMP1, TMP1, 8 |
1612 | | b <2 | 1835 | | b <2 |
1613 | |3: | 1836 | |3: |
@@ -1622,6 +1845,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1622 | | lp TMP3, L:SAVE0->top | 1845 | | lp TMP3, L:SAVE0->top |
1623 | | li_vmstate INTERP | 1846 | | li_vmstate INTERP |
1624 | | lp BASE, L->base | 1847 | | lp BASE, L->base |
1848 | | stw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
1625 | | st_vmstate | 1849 | | st_vmstate |
1626 | | bgt >8 | 1850 | | bgt >8 |
1627 | | sub RD, TMP3, TMP2 | 1851 | | sub RD, TMP3, TMP2 |
@@ -1637,8 +1861,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
1637 | | stp TMP2, L:SAVE0->top // Clear coroutine stack. | 1861 | | stp TMP2, L:SAVE0->top // Clear coroutine stack. |
1638 | |5: // Move results from coroutine. | 1862 | |5: // Move results from coroutine. |
1639 | | cmplw TMP1, TMP3 | 1863 | | cmplw TMP1, TMP3 |
1864 | |.if FPU | ||
1640 | | lfdx f0, TMP2, TMP1 | 1865 | | lfdx f0, TMP2, TMP1 |
1641 | | stfdx f0, BASE, TMP1 | 1866 | | stfdx f0, BASE, TMP1 |
1867 | |.else | ||
1868 | | add CARG3, TMP2, TMP1 | ||
1869 | | lwz CARG1, 0(CARG3) | ||
1870 | | lwz CARG2, 4(CARG3) | ||
1871 | | add CARG3, BASE, TMP1 | ||
1872 | | stw CARG1, 0(CARG3) | ||
1873 | | stw CARG2, 4(CARG3) | ||
1874 | |.endif | ||
1642 | | addi TMP1, TMP1, 8 | 1875 | | addi TMP1, TMP1, 8 |
1643 | | bne <5 | 1876 | | bne <5 |
1644 | |6: | 1877 | |6: |
@@ -1663,12 +1896,22 @@ static void build_subroutines(BuildCtx *ctx) | |||
1663 | | andix. TMP0, PC, FRAME_TYPE | 1896 | | andix. TMP0, PC, FRAME_TYPE |
1664 | | la TMP3, -8(TMP3) | 1897 | | la TMP3, -8(TMP3) |
1665 | | li TMP1, LJ_TFALSE | 1898 | | li TMP1, LJ_TFALSE |
1899 | |.if FPU | ||
1666 | | lfd f0, 0(TMP3) | 1900 | | lfd f0, 0(TMP3) |
1901 | |.else | ||
1902 | | lwz CARG1, 0(TMP3) | ||
1903 | | lwz CARG2, 4(TMP3) | ||
1904 | |.endif | ||
1667 | | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. | 1905 | | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. |
1668 | | li RD, (2+1)*8 | 1906 | | li RD, (2+1)*8 |
1669 | | stw TMP1, -8(BASE) // Prepend false to results. | 1907 | | stw TMP1, -8(BASE) // Prepend false to results. |
1670 | | la RA, -8(BASE) | 1908 | | la RA, -8(BASE) |
1909 | |.if FPU | ||
1671 | | stfd f0, 0(BASE) // Copy error message. | 1910 | | stfd f0, 0(BASE) // Copy error message. |
1911 | |.else | ||
1912 | | stw CARG1, 0(BASE) // Copy error message. | ||
1913 | | stw CARG2, 4(BASE) | ||
1914 | |.endif | ||
1672 | | b <7 | 1915 | | b <7 |
1673 | |.else | 1916 | |.else |
1674 | | mr CARG1, L | 1917 | | mr CARG1, L |
@@ -1847,7 +2090,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1847 | | lus CARG1, 0x8000 // -(2^31). | 2090 | | lus CARG1, 0x8000 // -(2^31). |
1848 | | beqy ->fff_resi | 2091 | | beqy ->fff_resi |
1849 | |5: | 2092 | |5: |
2093 | |.if FPU | ||
1850 | | lfd FARG1, 0(BASE) | 2094 | | lfd FARG1, 0(BASE) |
2095 | |.else | ||
2096 | | lwz CARG1, 0(BASE) | ||
2097 | | lwz CARG2, 4(BASE) | ||
2098 | |.endif | ||
1851 | | blex func | 2099 | | blex func |
1852 | | b ->fff_resn | 2100 | | b ->fff_resn |
1853 | |.endmacro | 2101 | |.endmacro |
@@ -1871,10 +2119,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
1871 | | | 2119 | | |
1872 | |.ffunc math_log | 2120 | |.ffunc math_log |
1873 | | cmplwi NARGS8:RC, 8 | 2121 | | cmplwi NARGS8:RC, 8 |
1874 | | lwz CARG3, 0(BASE) | 2122 | | lwz CARG1, 0(BASE) |
1875 | | lfd FARG1, 0(BASE) | ||
1876 | | bne ->fff_fallback // Need exactly 1 argument. | 2123 | | bne ->fff_fallback // Need exactly 1 argument. |
1877 | | checknum CARG3; bge ->fff_fallback | 2124 | | checknum CARG1; bge ->fff_fallback |
2125 | |.if FPU | ||
2126 | | lfd FARG1, 0(BASE) | ||
2127 | |.else | ||
2128 | | lwz CARG2, 4(BASE) | ||
2129 | |.endif | ||
1878 | | blex log | 2130 | | blex log |
1879 | | b ->fff_resn | 2131 | | b ->fff_resn |
1880 | | | 2132 | | |
@@ -1893,26 +2145,27 @@ static void build_subroutines(BuildCtx *ctx) | |||
1893 | | math_extern2 atan2 | 2145 | | math_extern2 atan2 |
1894 | | math_extern2 fmod | 2146 | | math_extern2 fmod |
1895 | | | 2147 | | |
1896 | |->ff_math_deg: | ||
1897 | |.ffunc_n math_rad | ||
1898 | | lfd FARG2, CFUNC:RB->upvalue[0] | ||
1899 | | fmul FARG1, FARG1, FARG2 | ||
1900 | | b ->fff_resn | ||
1901 | | | ||
1902 | |.if DUALNUM | 2148 | |.if DUALNUM |
1903 | |.ffunc math_ldexp | 2149 | |.ffunc math_ldexp |
1904 | | cmplwi NARGS8:RC, 16 | 2150 | | cmplwi NARGS8:RC, 16 |
1905 | | lwz CARG3, 0(BASE) | 2151 | | lwz TMP0, 0(BASE) |
2152 | |.if FPU | ||
1906 | | lfd FARG1, 0(BASE) | 2153 | | lfd FARG1, 0(BASE) |
1907 | | lwz CARG4, 8(BASE) | 2154 | |.else |
2155 | | lwz CARG1, 0(BASE) | ||
2156 | | lwz CARG2, 4(BASE) | ||
2157 | |.endif | ||
2158 | | lwz TMP1, 8(BASE) | ||
1908 | |.if GPR64 | 2159 | |.if GPR64 |
1909 | | lwz CARG2, 12(BASE) | 2160 | | lwz CARG2, 12(BASE) |
1910 | |.else | 2161 | |.elif FPU |
1911 | | lwz CARG1, 12(BASE) | 2162 | | lwz CARG1, 12(BASE) |
2163 | |.else | ||
2164 | | lwz CARG3, 12(BASE) | ||
1912 | |.endif | 2165 | |.endif |
1913 | | blt ->fff_fallback | 2166 | | blt ->fff_fallback |
1914 | | checknum CARG3; bge ->fff_fallback | 2167 | | checknum TMP0; bge ->fff_fallback |
1915 | | checknum CARG4; bne ->fff_fallback | 2168 | | checknum TMP1; bne ->fff_fallback |
1916 | |.else | 2169 | |.else |
1917 | |.ffunc_nn math_ldexp | 2170 | |.ffunc_nn math_ldexp |
1918 | |.if GPR64 | 2171 | |.if GPR64 |
@@ -1927,8 +2180,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
1927 | |.ffunc_n math_frexp | 2180 | |.ffunc_n math_frexp |
1928 | |.if GPR64 | 2181 | |.if GPR64 |
1929 | | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) | 2182 | | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) |
1930 | |.else | 2183 | |.elif FPU |
1931 | | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) | 2184 | | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) |
2185 | |.else | ||
2186 | | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) | ||
1932 | |.endif | 2187 | |.endif |
1933 | | lwz PC, FRAME_PC(BASE) | 2188 | | lwz PC, FRAME_PC(BASE) |
1934 | | blex frexp | 2189 | | blex frexp |
@@ -1937,7 +2192,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1937 | |.if not DUALNUM | 2192 | |.if not DUALNUM |
1938 | | tonum_i FARG2, TMP1 | 2193 | | tonum_i FARG2, TMP1 |
1939 | |.endif | 2194 | |.endif |
2195 | |.if FPU | ||
1940 | | stfd FARG1, 0(RA) | 2196 | | stfd FARG1, 0(RA) |
2197 | |.else | ||
2198 | | stw CRET1, 0(RA) | ||
2199 | | stw CRET2, 4(RA) | ||
2200 | |.endif | ||
1941 | | li RD, (2+1)*8 | 2201 | | li RD, (2+1)*8 |
1942 | |.if DUALNUM | 2202 | |.if DUALNUM |
1943 | | stw TISNUM, 8(RA) | 2203 | | stw TISNUM, 8(RA) |
@@ -1950,13 +2210,20 @@ static void build_subroutines(BuildCtx *ctx) | |||
1950 | |.ffunc_n math_modf | 2210 | |.ffunc_n math_modf |
1951 | |.if GPR64 | 2211 | |.if GPR64 |
1952 | | la CARG2, -8(BASE) | 2212 | | la CARG2, -8(BASE) |
1953 | |.else | 2213 | |.elif FPU |
1954 | | la CARG1, -8(BASE) | 2214 | | la CARG1, -8(BASE) |
2215 | |.else | ||
2216 | | la CARG3, -8(BASE) | ||
1955 | |.endif | 2217 | |.endif |
1956 | | lwz PC, FRAME_PC(BASE) | 2218 | | lwz PC, FRAME_PC(BASE) |
1957 | | blex modf | 2219 | | blex modf |
1958 | | la RA, -8(BASE) | 2220 | | la RA, -8(BASE) |
2221 | |.if FPU | ||
1959 | | stfd FARG1, 0(BASE) | 2222 | | stfd FARG1, 0(BASE) |
2223 | |.else | ||
2224 | | stw CRET1, 0(BASE) | ||
2225 | | stw CRET2, 4(BASE) | ||
2226 | |.endif | ||
1960 | | li RD, (2+1)*8 | 2227 | | li RD, (2+1)*8 |
1961 | | b ->fff_res | 2228 | | b ->fff_res |
1962 | | | 2229 | | |
@@ -1964,13 +2231,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
1964 | |.if DUALNUM | 2231 | |.if DUALNUM |
1965 | | .ffunc_1 name | 2232 | | .ffunc_1 name |
1966 | | checknum CARG3 | 2233 | | checknum CARG3 |
1967 | | addi TMP1, BASE, 8 | 2234 | | addi SAVE0, BASE, 8 |
1968 | | add TMP2, BASE, NARGS8:RC | 2235 | | add SAVE1, BASE, NARGS8:RC |
1969 | | bne >4 | 2236 | | bne >4 |
1970 | |1: // Handle integers. | 2237 | |1: // Handle integers. |
1971 | | lwz CARG4, 0(TMP1) | 2238 | | lwz CARG4, 0(SAVE0) |
1972 | | cmplw cr1, TMP1, TMP2 | 2239 | | cmplw cr1, SAVE0, SAVE1 |
1973 | | lwz CARG2, 4(TMP1) | 2240 | | lwz CARG2, 4(SAVE0) |
1974 | | bge cr1, ->fff_resi | 2241 | | bge cr1, ->fff_resi |
1975 | | checknum CARG4 | 2242 | | checknum CARG4 |
1976 | | xoris TMP0, CARG1, 0x8000 | 2243 | | xoris TMP0, CARG1, 0x8000 |
@@ -1987,36 +2254,76 @@ static void build_subroutines(BuildCtx *ctx) | |||
1987 | |.if GPR64 | 2254 | |.if GPR64 |
1988 | | rldicl CARG1, CARG1, 0, 32 | 2255 | | rldicl CARG1, CARG1, 0, 32 |
1989 | |.endif | 2256 | |.endif |
1990 | | addi TMP1, TMP1, 8 | 2257 | | addi SAVE0, SAVE0, 8 |
1991 | | b <1 | 2258 | | b <1 |
1992 | |3: | 2259 | |3: |
1993 | | bge ->fff_fallback | 2260 | | bge ->fff_fallback |
1994 | | // Convert intermediate result to number and continue below. | 2261 | | // Convert intermediate result to number and continue below. |
2262 | |.if FPU | ||
1995 | | tonum_i FARG1, CARG1 | 2263 | | tonum_i FARG1, CARG1 |
1996 | | lfd FARG2, 0(TMP1) | 2264 | | lfd FARG2, 0(SAVE0) |
2265 | |.else | ||
2266 | | mr CARG2, CARG1 | ||
2267 | | bl ->vm_sfi2d_1 | ||
2268 | | lwz CARG3, 0(SAVE0) | ||
2269 | | lwz CARG4, 4(SAVE0) | ||
2270 | |.endif | ||
1997 | | b >6 | 2271 | | b >6 |
1998 | |4: | 2272 | |4: |
2273 | |.if FPU | ||
1999 | | lfd FARG1, 0(BASE) | 2274 | | lfd FARG1, 0(BASE) |
2275 | |.else | ||
2276 | | lwz CARG1, 0(BASE) | ||
2277 | | lwz CARG2, 4(BASE) | ||
2278 | |.endif | ||
2000 | | bge ->fff_fallback | 2279 | | bge ->fff_fallback |
2001 | |5: // Handle numbers. | 2280 | |5: // Handle numbers. |
2002 | | lwz CARG4, 0(TMP1) | 2281 | | lwz CARG3, 0(SAVE0) |
2003 | | cmplw cr1, TMP1, TMP2 | 2282 | | cmplw cr1, SAVE0, SAVE1 |
2004 | | lfd FARG2, 0(TMP1) | 2283 | |.if FPU |
2284 | | lfd FARG2, 0(SAVE0) | ||
2285 | |.else | ||
2286 | | lwz CARG4, 4(SAVE0) | ||
2287 | |.endif | ||
2005 | | bge cr1, ->fff_resn | 2288 | | bge cr1, ->fff_resn |
2006 | | checknum CARG4; bge >7 | 2289 | | checknum CARG3; bge >7 |
2007 | |6: | 2290 | |6: |
2008 | | fsub f0, FARG1, FARG2 | 2291 | | addi SAVE0, SAVE0, 8 |
2009 | | addi TMP1, TMP1, 8 | 2292 | |.if FPU |
2010 | |.if ismax | 2293 | |.if ismax |
2294 | | fsub f0, FARG1, FARG2 | ||
2295 | |.else | ||
2296 | | fsub f0, FARG2, FARG1 | ||
2297 | |.endif | ||
2011 | | fsel FARG1, f0, FARG1, FARG2 | 2298 | | fsel FARG1, f0, FARG1, FARG2 |
2012 | |.else | 2299 | |.else |
2013 | | fsel FARG1, f0, FARG2, FARG1 | 2300 | | stw CARG1, SFSAVE_1 |
2301 | | stw CARG2, SFSAVE_2 | ||
2302 | | stw CARG3, SFSAVE_3 | ||
2303 | | stw CARG4, SFSAVE_4 | ||
2304 | | blex __ledf2 | ||
2305 | | cmpwi CRET1, 0 | ||
2306 | |.if ismax | ||
2307 | | blt >8 | ||
2308 | |.else | ||
2309 | | bge >8 | ||
2310 | |.endif | ||
2311 | | lwz CARG1, SFSAVE_1 | ||
2312 | | lwz CARG2, SFSAVE_2 | ||
2313 | | b <5 | ||
2314 | |8: | ||
2315 | | lwz CARG1, SFSAVE_3 | ||
2316 | | lwz CARG2, SFSAVE_4 | ||
2014 | |.endif | 2317 | |.endif |
2015 | | b <5 | 2318 | | b <5 |
2016 | |7: // Convert integer to number and continue above. | 2319 | |7: // Convert integer to number and continue above. |
2017 | | lwz CARG2, 4(TMP1) | 2320 | | lwz CARG3, 4(SAVE0) |
2018 | | bne ->fff_fallback | 2321 | | bne ->fff_fallback |
2019 | | tonum_i FARG2, CARG2 | 2322 | |.if FPU |
2323 | | tonum_i FARG2, CARG3 | ||
2324 | |.else | ||
2325 | | bl ->vm_sfi2d_2 | ||
2326 | |.endif | ||
2020 | | b <6 | 2327 | | b <6 |
2021 | |.else | 2328 | |.else |
2022 | | .ffunc_n name | 2329 | | .ffunc_n name |
@@ -2028,13 +2335,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
2028 | | checknum CARG2 | 2335 | | checknum CARG2 |
2029 | | bge cr1, ->fff_resn | 2336 | | bge cr1, ->fff_resn |
2030 | | bge ->fff_fallback | 2337 | | bge ->fff_fallback |
2031 | | fsub f0, FARG1, FARG2 | ||
2032 | | addi TMP1, TMP1, 8 | ||
2033 | |.if ismax | 2338 | |.if ismax |
2034 | | fsel FARG1, f0, FARG1, FARG2 | 2339 | | fsub f0, FARG1, FARG2 |
2035 | |.else | 2340 | |.else |
2036 | | fsel FARG1, f0, FARG2, FARG1 | 2341 | | fsub f0, FARG2, FARG1 |
2037 | |.endif | 2342 | |.endif |
2343 | | addi TMP1, TMP1, 8 | ||
2344 | | fsel FARG1, f0, FARG1, FARG2 | ||
2038 | | b <1 | 2345 | | b <1 |
2039 | |.endif | 2346 | |.endif |
2040 | |.endmacro | 2347 | |.endmacro |
@@ -2044,11 +2351,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
2044 | | | 2351 | | |
2045 | |//-- String library ----------------------------------------------------- | 2352 | |//-- String library ----------------------------------------------------- |
2046 | | | 2353 | | |
2047 | |.ffunc_1 string_len | ||
2048 | | checkstr CARG3; bne ->fff_fallback | ||
2049 | | lwz CRET1, STR:CARG1->len | ||
2050 | | b ->fff_resi | ||
2051 | | | ||
2052 | |.ffunc string_byte // Only handle the 1-arg case here. | 2354 | |.ffunc string_byte // Only handle the 1-arg case here. |
2053 | | cmplwi NARGS8:RC, 8 | 2355 | | cmplwi NARGS8:RC, 8 |
2054 | | lwz CARG3, 0(BASE) | 2356 | | lwz CARG3, 0(BASE) |
@@ -2103,6 +2405,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2103 | | stp BASE, L->base | 2405 | | stp BASE, L->base |
2104 | | stw PC, SAVE_PC | 2406 | | stw PC, SAVE_PC |
2105 | | bl extern lj_str_new // (lua_State *L, char *str, size_t l) | 2407 | | bl extern lj_str_new // (lua_State *L, char *str, size_t l) |
2408 | |->fff_resstr: | ||
2106 | | // Returns GCstr *. | 2409 | | // Returns GCstr *. |
2107 | | lp BASE, L->base | 2410 | | lp BASE, L->base |
2108 | | li CARG3, LJ_TSTR | 2411 | | li CARG3, LJ_TSTR |
@@ -2180,114 +2483,29 @@ static void build_subroutines(BuildCtx *ctx) | |||
2180 | | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) | 2483 | | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) |
2181 | | b <3 | 2484 | | b <3 |
2182 | | | 2485 | | |
2183 | |.ffunc string_rep // Only handle the 1-char case inline. | 2486 | |.macro ffstring_op, name |
2184 | | ffgccheck | 2487 | | .ffunc string_ .. name |
2185 | | cmplwi NARGS8:RC, 16 | ||
2186 | | lwz TMP0, 0(BASE) | ||
2187 | | lwz STR:CARG1, 4(BASE) | ||
2188 | | lwz CARG4, 8(BASE) | ||
2189 | |.if DUALNUM | ||
2190 | | lwz CARG3, 12(BASE) | ||
2191 | |.else | ||
2192 | | lfd FARG2, 8(BASE) | ||
2193 | |.endif | ||
2194 | | bne ->fff_fallback // Exactly 2 arguments. | ||
2195 | | checkstr TMP0; bne ->fff_fallback | ||
2196 | |.if DUALNUM | ||
2197 | | checknum CARG4; bne ->fff_fallback | ||
2198 | |.else | ||
2199 | | checknum CARG4; bge ->fff_fallback | ||
2200 | | toint CARG3, FARG2 | ||
2201 | |.endif | ||
2202 | | lwz TMP0, STR:CARG1->len | ||
2203 | | cmpwi CARG3, 0 | ||
2204 | | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) | ||
2205 | | ble >2 // Count <= 0? (or non-int) | ||
2206 | | cmplwi TMP0, 1 | ||
2207 | | subi TMP2, CARG3, 1 | ||
2208 | | blt >2 // Zero length string? | ||
2209 | | cmplw cr1, TMP1, CARG3 | ||
2210 | | bne ->fff_fallback // Fallback for > 1-char strings. | ||
2211 | | lbz TMP0, STR:CARG1[1] | ||
2212 | | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) | ||
2213 | | blt cr1, ->fff_fallback | ||
2214 | |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). | ||
2215 | | cmplwi TMP2, 0 | ||
2216 | | stbx TMP0, CARG2, TMP2 | ||
2217 | | subi TMP2, TMP2, 1 | ||
2218 | | bne <1 | ||
2219 | | b ->fff_newstr | ||
2220 | |2: // Return empty string. | ||
2221 | | la STR:CARG1, DISPATCH_GL(strempty)(DISPATCH) | ||
2222 | | li CARG3, LJ_TSTR | ||
2223 | | b ->fff_restv | ||
2224 | | | ||
2225 | |.ffunc string_reverse | ||
2226 | | ffgccheck | 2488 | | ffgccheck |
2227 | | cmplwi NARGS8:RC, 8 | 2489 | | cmplwi NARGS8:RC, 8 |
2228 | | lwz CARG3, 0(BASE) | 2490 | | lwz CARG3, 0(BASE) |
2229 | | lwz STR:CARG1, 4(BASE) | 2491 | | lwz STR:CARG2, 4(BASE) |
2230 | | blt ->fff_fallback | 2492 | | blt ->fff_fallback |
2231 | | checkstr CARG3 | 2493 | | checkstr CARG3 |
2232 | | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) | 2494 | | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH) |
2233 | | bne ->fff_fallback | 2495 | | bne ->fff_fallback |
2234 | | lwz CARG3, STR:CARG1->len | 2496 | | lwz TMP0, SBUF:CARG1->b |
2235 | | la CARG1, #STR(STR:CARG1) | 2497 | | stw L, SBUF:CARG1->L |
2236 | | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) | 2498 | | stp BASE, L->base |
2237 | | li TMP2, 0 | 2499 | | stw PC, SAVE_PC |
2238 | | cmplw TMP1, CARG3 | 2500 | | stw TMP0, SBUF:CARG1->w |
2239 | | subi TMP3, CARG3, 1 | 2501 | | bl extern lj_buf_putstr_ .. name |
2240 | | blt ->fff_fallback | 2502 | | bl extern lj_buf_tostr |
2241 | |1: // Reverse string copy. | 2503 | | b ->fff_resstr |
2242 | | cmpwi TMP3, 0 | ||
2243 | | lbzx TMP1, CARG1, TMP2 | ||
2244 | | blty ->fff_newstr | ||
2245 | | stbx TMP1, CARG2, TMP3 | ||
2246 | | subi TMP3, TMP3, 1 | ||
2247 | | addi TMP2, TMP2, 1 | ||
2248 | | b <1 | ||
2249 | | | ||
2250 | |.macro ffstring_case, name, lo | ||
2251 | | .ffunc name | ||
2252 | | ffgccheck | ||
2253 | | cmplwi NARGS8:RC, 8 | ||
2254 | | lwz CARG3, 0(BASE) | ||
2255 | | lwz STR:CARG1, 4(BASE) | ||
2256 | | blt ->fff_fallback | ||
2257 | | checkstr CARG3 | ||
2258 | | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) | ||
2259 | | bne ->fff_fallback | ||
2260 | | lwz CARG3, STR:CARG1->len | ||
2261 | | la CARG1, #STR(STR:CARG1) | ||
2262 | | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) | ||
2263 | | cmplw TMP1, CARG3 | ||
2264 | | li TMP2, 0 | ||
2265 | | blt ->fff_fallback | ||
2266 | |1: // ASCII case conversion. | ||
2267 | | cmplw TMP2, CARG3 | ||
2268 | | lbzx TMP1, CARG1, TMP2 | ||
2269 | | bgey ->fff_newstr | ||
2270 | | subi TMP0, TMP1, lo | ||
2271 | | xori TMP3, TMP1, 0x20 | ||
2272 | | addic TMP0, TMP0, -26 | ||
2273 | | subfe TMP3, TMP3, TMP3 | ||
2274 | | rlwinm TMP3, TMP3, 0, 26, 26 // x &= 0x20. | ||
2275 | | xor TMP1, TMP1, TMP3 | ||
2276 | | stbx TMP1, CARG2, TMP2 | ||
2277 | | addi TMP2, TMP2, 1 | ||
2278 | | b <1 | ||
2279 | |.endmacro | 2504 | |.endmacro |
2280 | | | 2505 | | |
2281 | |ffstring_case string_lower, 65 | 2506 | |ffstring_op reverse |
2282 | |ffstring_case string_upper, 97 | 2507 | |ffstring_op lower |
2283 | | | 2508 | |ffstring_op upper |
2284 | |//-- Table library ------------------------------------------------------ | ||
2285 | | | ||
2286 | |.ffunc_1 table_getn | ||
2287 | | checktab CARG3; bne ->fff_fallback | ||
2288 | | bl extern lj_tab_len // (GCtab *t) | ||
2289 | | // Returns uint32_t (but less than 2^31). | ||
2290 | | b ->fff_resi | ||
2291 | | | 2509 | | |
2292 | |//-- Bit library -------------------------------------------------------- | 2510 | |//-- Bit library -------------------------------------------------------- |
2293 | | | 2511 | | |
@@ -2305,28 +2523,37 @@ static void build_subroutines(BuildCtx *ctx) | |||
2305 | | | 2523 | | |
2306 | |.macro .ffunc_bit_op, name, ins | 2524 | |.macro .ffunc_bit_op, name, ins |
2307 | | .ffunc_bit name | 2525 | | .ffunc_bit name |
2308 | | addi TMP1, BASE, 8 | 2526 | | addi SAVE0, BASE, 8 |
2309 | | add TMP2, BASE, NARGS8:RC | 2527 | | add SAVE1, BASE, NARGS8:RC |
2310 | |1: | 2528 | |1: |
2311 | | lwz CARG4, 0(TMP1) | 2529 | | lwz CARG4, 0(SAVE0) |
2312 | | cmplw cr1, TMP1, TMP2 | 2530 | | cmplw cr1, SAVE0, SAVE1 |
2313 | |.if DUALNUM | 2531 | |.if DUALNUM |
2314 | | lwz CARG2, 4(TMP1) | 2532 | | lwz CARG2, 4(SAVE0) |
2315 | |.else | 2533 | |.else |
2316 | | lfd FARG1, 0(TMP1) | 2534 | | lfd FARG1, 0(SAVE0) |
2317 | |.endif | 2535 | |.endif |
2318 | | bgey cr1, ->fff_resi | 2536 | | bgey cr1, ->fff_resi |
2319 | | checknum CARG4 | 2537 | | checknum CARG4 |
2320 | |.if DUALNUM | 2538 | |.if DUALNUM |
2539 | |.if FPU | ||
2321 | | bnel ->fff_bitop_fb | 2540 | | bnel ->fff_bitop_fb |
2322 | |.else | 2541 | |.else |
2542 | | beq >3 | ||
2543 | | stw CARG1, SFSAVE_1 | ||
2544 | | bl ->fff_bitop_fb | ||
2545 | | mr CARG2, CARG1 | ||
2546 | | lwz CARG1, SFSAVE_1 | ||
2547 | |3: | ||
2548 | |.endif | ||
2549 | |.else | ||
2323 | | fadd FARG1, FARG1, TOBIT | 2550 | | fadd FARG1, FARG1, TOBIT |
2324 | | bge ->fff_fallback | 2551 | | bge ->fff_fallback |
2325 | | stfd FARG1, TMPD | 2552 | | stfd FARG1, TMPD |
2326 | | lwz CARG2, TMPD_LO | 2553 | | lwz CARG2, TMPD_LO |
2327 | |.endif | 2554 | |.endif |
2328 | | ins CARG1, CARG1, CARG2 | 2555 | | ins CARG1, CARG1, CARG2 |
2329 | | addi TMP1, TMP1, 8 | 2556 | | addi SAVE0, SAVE0, 8 |
2330 | | b <1 | 2557 | | b <1 |
2331 | |.endmacro | 2558 | |.endmacro |
2332 | | | 2559 | | |
@@ -2348,7 +2575,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
2348 | |.macro .ffunc_bit_sh, name, ins, shmod | 2575 | |.macro .ffunc_bit_sh, name, ins, shmod |
2349 | |.if DUALNUM | 2576 | |.if DUALNUM |
2350 | | .ffunc_2 bit_..name | 2577 | | .ffunc_2 bit_..name |
2578 | |.if FPU | ||
2351 | | checknum CARG3; bnel ->fff_tobit_fb | 2579 | | checknum CARG3; bnel ->fff_tobit_fb |
2580 | |.else | ||
2581 | | checknum CARG3; beq >1 | ||
2582 | | bl ->fff_tobit_fb | ||
2583 | | lwz CARG2, 12(BASE) // Conversion polluted CARG2. | ||
2584 | |1: | ||
2585 | |.endif | ||
2352 | | // Note: no inline conversion from number for 2nd argument! | 2586 | | // Note: no inline conversion from number for 2nd argument! |
2353 | | checknum CARG4; bne ->fff_fallback | 2587 | | checknum CARG4; bne ->fff_fallback |
2354 | |.else | 2588 | |.else |
@@ -2385,27 +2619,77 @@ static void build_subroutines(BuildCtx *ctx) | |||
2385 | |->fff_resn: | 2619 | |->fff_resn: |
2386 | | lwz PC, FRAME_PC(BASE) | 2620 | | lwz PC, FRAME_PC(BASE) |
2387 | | la RA, -8(BASE) | 2621 | | la RA, -8(BASE) |
2622 | |.if FPU | ||
2388 | | stfd FARG1, -8(BASE) | 2623 | | stfd FARG1, -8(BASE) |
2624 | |.else | ||
2625 | | stw CARG1, -8(BASE) | ||
2626 | | stw CARG2, -4(BASE) | ||
2627 | |.endif | ||
2389 | | b ->fff_res1 | 2628 | | b ->fff_res1 |
2390 | | | 2629 | | |
2391 | |// Fallback FP number to bit conversion. | 2630 | |// Fallback FP number to bit conversion. |
2392 | |->fff_tobit_fb: | 2631 | |->fff_tobit_fb: |
2393 | |.if DUALNUM | 2632 | |.if DUALNUM |
2633 | |.if FPU | ||
2394 | | lfd FARG1, 0(BASE) | 2634 | | lfd FARG1, 0(BASE) |
2395 | | bgt ->fff_fallback | 2635 | | bgt ->fff_fallback |
2396 | | fadd FARG1, FARG1, TOBIT | 2636 | | fadd FARG1, FARG1, TOBIT |
2397 | | stfd FARG1, TMPD | 2637 | | stfd FARG1, TMPD |
2398 | | lwz CARG1, TMPD_LO | 2638 | | lwz CARG1, TMPD_LO |
2399 | | blr | 2639 | | blr |
2640 | |.else | ||
2641 | | bgt ->fff_fallback | ||
2642 | | mr CARG2, CARG1 | ||
2643 | | mr CARG1, CARG3 | ||
2644 | |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2. | ||
2645 | |->vm_tobit: | ||
2646 | | slwi TMP2, CARG1, 1 | ||
2647 | | addis TMP2, TMP2, 0x0020 | ||
2648 | | cmpwi TMP2, 0 | ||
2649 | | bge >2 | ||
2650 | | li TMP1, 0x3e0 | ||
2651 | | srawi TMP2, TMP2, 21 | ||
2652 | | not TMP1, TMP1 | ||
2653 | | sub. TMP2, TMP1, TMP2 | ||
2654 | | cmpwi cr7, CARG1, 0 | ||
2655 | | blt >1 | ||
2656 | | slwi TMP1, CARG1, 11 | ||
2657 | | srwi TMP0, CARG2, 21 | ||
2658 | | oris TMP1, TMP1, 0x8000 | ||
2659 | | or TMP1, TMP1, TMP0 | ||
2660 | | srw CARG1, TMP1, TMP2 | ||
2661 | | bclr 4, 28 // Return if cr7[lt] == 0, no hint. | ||
2662 | | neg CARG1, CARG1 | ||
2663 | | blr | ||
2664 | |1: | ||
2665 | | addi TMP2, TMP2, 21 | ||
2666 | | srw TMP1, CARG2, TMP2 | ||
2667 | | slwi CARG2, CARG1, 12 | ||
2668 | | subfic TMP2, TMP2, 20 | ||
2669 | | slw TMP0, CARG2, TMP2 | ||
2670 | | or CARG1, TMP1, TMP0 | ||
2671 | | bclr 4, 28 // Return if cr7[lt] == 0, no hint. | ||
2672 | | neg CARG1, CARG1 | ||
2673 | | blr | ||
2674 | |2: | ||
2675 | | li CARG1, 0 | ||
2676 | | blr | ||
2677 | |.endif | ||
2400 | |.endif | 2678 | |.endif |
2401 | |->fff_bitop_fb: | 2679 | |->fff_bitop_fb: |
2402 | |.if DUALNUM | 2680 | |.if DUALNUM |
2403 | | lfd FARG1, 0(TMP1) | 2681 | |.if FPU |
2682 | | lfd FARG1, 0(SAVE0) | ||
2404 | | bgt ->fff_fallback | 2683 | | bgt ->fff_fallback |
2405 | | fadd FARG1, FARG1, TOBIT | 2684 | | fadd FARG1, FARG1, TOBIT |
2406 | | stfd FARG1, TMPD | 2685 | | stfd FARG1, TMPD |
2407 | | lwz CARG2, TMPD_LO | 2686 | | lwz CARG2, TMPD_LO |
2408 | | blr | 2687 | | blr |
2688 | |.else | ||
2689 | | bgt ->fff_fallback | ||
2690 | | mr CARG1, CARG4 | ||
2691 | | b ->vm_tobit | ||
2692 | |.endif | ||
2409 | |.endif | 2693 | |.endif |
2410 | | | 2694 | | |
2411 | |//----------------------------------------------------------------------- | 2695 | |//----------------------------------------------------------------------- |
@@ -2589,15 +2873,88 @@ static void build_subroutines(BuildCtx *ctx) | |||
2589 | | mtctr CRET1 | 2873 | | mtctr CRET1 |
2590 | | bctr | 2874 | | bctr |
2591 | | | 2875 | | |
2876 | |->cont_stitch: // Trace stitching. | ||
2877 | |.if JIT | ||
2878 | | // RA = resultptr, RB = meta base | ||
2879 | | lwz INS, -4(PC) | ||
2880 | | lwz TRACE:TMP2, -20(RB) // Save previous trace. | ||
2881 | | addic. TMP1, MULTRES, -8 | ||
2882 | | decode_RA8 RC, INS // Call base. | ||
2883 | | beq >2 | ||
2884 | |1: // Move results down. | ||
2885 | |.if FPU | ||
2886 | | lfd f0, 0(RA) | ||
2887 | |.else | ||
2888 | | lwz CARG1, 0(RA) | ||
2889 | | lwz CARG2, 4(RA) | ||
2890 | |.endif | ||
2891 | | addic. TMP1, TMP1, -8 | ||
2892 | | addi RA, RA, 8 | ||
2893 | |.if FPU | ||
2894 | | stfdx f0, BASE, RC | ||
2895 | |.else | ||
2896 | | add CARG3, BASE, RC | ||
2897 | | stw CARG1, 0(CARG3) | ||
2898 | | stw CARG2, 4(CARG3) | ||
2899 | |.endif | ||
2900 | | addi RC, RC, 8 | ||
2901 | | bne <1 | ||
2902 | |2: | ||
2903 | | decode_RA8 RA, INS | ||
2904 | | decode_RB8 RB, INS | ||
2905 | | add RA, RA, RB | ||
2906 | |3: | ||
2907 | | cmplw RA, RC | ||
2908 | | bgt >9 // More results wanted? | ||
2909 | | | ||
2910 | | lhz TMP3, TRACE:TMP2->traceno | ||
2911 | | lhz RD, TRACE:TMP2->link | ||
2912 | | cmpw RD, TMP3 | ||
2913 | | cmpwi cr1, RD, 0 | ||
2914 | | beq ->cont_nop // Blacklisted. | ||
2915 | | slwi RD, RD, 3 | ||
2916 | | bne cr1, =>BC_JLOOP // Jump to stitched trace. | ||
2917 | | | ||
2918 | | // Stitch a new trace to the previous trace. | ||
2919 | | stw TMP3, DISPATCH_J(exitno)(DISPATCH) | ||
2920 | | stp L, DISPATCH_J(L)(DISPATCH) | ||
2921 | | stp BASE, L->base | ||
2922 | | addi CARG1, DISPATCH, GG_DISP2J | ||
2923 | | mr CARG2, PC | ||
2924 | | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) | ||
2925 | | lp BASE, L->base | ||
2926 | | b ->cont_nop | ||
2927 | | | ||
2928 | |9: | ||
2929 | | stwx TISNIL, BASE, RC | ||
2930 | | addi RC, RC, 8 | ||
2931 | | b <3 | ||
2932 | |.endif | ||
2933 | | | ||
2934 | |->vm_profhook: // Dispatch target for profiler hook. | ||
2935 | #if LJ_HASPROFILE | ||
2936 | | mr CARG1, L | ||
2937 | | stw MULTRES, SAVE_MULTRES | ||
2938 | | mr CARG2, PC | ||
2939 | | stp BASE, L->base | ||
2940 | | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) | ||
2941 | | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | ||
2942 | | lp BASE, L->base | ||
2943 | | subi PC, PC, 4 | ||
2944 | | b ->cont_nop | ||
2945 | #endif | ||
2946 | | | ||
2592 | |//----------------------------------------------------------------------- | 2947 | |//----------------------------------------------------------------------- |
2593 | |//-- Trace exit handler ------------------------------------------------- | 2948 | |//-- Trace exit handler ------------------------------------------------- |
2594 | |//----------------------------------------------------------------------- | 2949 | |//----------------------------------------------------------------------- |
2595 | | | 2950 | | |
2596 | |.macro savex_, a, b, c, d | 2951 | |.macro savex_, a, b, c, d |
2952 | |.if FPU | ||
2597 | | stfd f..a, 16+a*8(sp) | 2953 | | stfd f..a, 16+a*8(sp) |
2598 | | stfd f..b, 16+b*8(sp) | 2954 | | stfd f..b, 16+b*8(sp) |
2599 | | stfd f..c, 16+c*8(sp) | 2955 | | stfd f..c, 16+c*8(sp) |
2600 | | stfd f..d, 16+d*8(sp) | 2956 | | stfd f..d, 16+d*8(sp) |
2957 | |.endif | ||
2601 | |.endmacro | 2958 | |.endmacro |
2602 | | | 2959 | | |
2603 | |->vm_exit_handler: | 2960 | |->vm_exit_handler: |
@@ -2623,16 +2980,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
2623 | | savex_ 20,21,22,23 | 2980 | | savex_ 20,21,22,23 |
2624 | | lhz CARG4, 2(CARG3) // Load trace number. | 2981 | | lhz CARG4, 2(CARG3) // Load trace number. |
2625 | | savex_ 24,25,26,27 | 2982 | | savex_ 24,25,26,27 |
2626 | | lwz L, DISPATCH_GL(jit_L)(DISPATCH) | 2983 | | lwz L, DISPATCH_GL(cur_L)(DISPATCH) |
2627 | | savex_ 28,29,30,31 | 2984 | | savex_ 28,29,30,31 |
2628 | | sub CARG3, TMP0, CARG3 // Compute exit number. | 2985 | | sub CARG3, TMP0, CARG3 // Compute exit number. |
2629 | | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) | 2986 | | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) |
2630 | | srwi CARG3, CARG3, 2 | 2987 | | srwi CARG3, CARG3, 2 |
2631 | | stw L, DISPATCH_J(L)(DISPATCH) | 2988 | | stp L, DISPATCH_J(L)(DISPATCH) |
2632 | | subi CARG3, CARG3, 2 | 2989 | | subi CARG3, CARG3, 2 |
2633 | | stw TMP1, DISPATCH_GL(jit_L)(DISPATCH) | ||
2634 | | stw CARG4, DISPATCH_J(parent)(DISPATCH) | ||
2635 | | stp BASE, L->base | 2990 | | stp BASE, L->base |
2991 | | stw CARG4, DISPATCH_J(parent)(DISPATCH) | ||
2992 | | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH) | ||
2636 | | addi CARG1, DISPATCH, GG_DISP2J | 2993 | | addi CARG1, DISPATCH, GG_DISP2J |
2637 | | stw CARG3, DISPATCH_J(exitno)(DISPATCH) | 2994 | | stw CARG3, DISPATCH_J(exitno)(DISPATCH) |
2638 | | addi CARG2, sp, 16 | 2995 | | addi CARG2, sp, 16 |
@@ -2656,28 +3013,29 @@ static void build_subroutines(BuildCtx *ctx) | |||
2656 | | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. | 3013 | | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. |
2657 | | lwz L, SAVE_L | 3014 | | lwz L, SAVE_L |
2658 | | addi DISPATCH, JGL, -GG_DISP2G-32768 | 3015 | | addi DISPATCH, JGL, -GG_DISP2G-32768 |
3016 | | stp BASE, L->base | ||
2659 | |1: | 3017 | |1: |
2660 | | cmpwi CARG1, 0 | 3018 | | cmpwi CARG1, 0 |
2661 | | blt >3 // Check for error from exit. | 3019 | | blt >9 // Check for error from exit. |
2662 | | lwz LFUNC:TMP1, FRAME_FUNC(BASE) | 3020 | | lwz LFUNC:RB, FRAME_FUNC(BASE) |
2663 | | slwi MULTRES, CARG1, 3 | 3021 | | slwi MULTRES, CARG1, 3 |
2664 | | li TMP2, 0 | 3022 | | li TMP2, 0 |
2665 | | stw MULTRES, SAVE_MULTRES | 3023 | | stw MULTRES, SAVE_MULTRES |
2666 | | lwz TMP1, LFUNC:TMP1->pc | 3024 | | lwz TMP1, LFUNC:RB->pc |
2667 | | stw TMP2, DISPATCH_GL(jit_L)(DISPATCH) | 3025 | | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH) |
2668 | | lwz KBASE, PC2PROTO(k)(TMP1) | 3026 | | lwz KBASE, PC2PROTO(k)(TMP1) |
2669 | | // Setup type comparison constants. | 3027 | | // Setup type comparison constants. |
2670 | | li TISNUM, LJ_TISNUM | 3028 | | li TISNUM, LJ_TISNUM |
2671 | | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 3029 | | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
2672 | | stw TMP3, TMPD | 3030 | | .FPU stw TMP3, TMPD |
2673 | | li ZERO, 0 | 3031 | | li ZERO, 0 |
2674 | | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). | 3032 | | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). |
2675 | | lfs TOBIT, TMPD | 3033 | | .FPU lfs TOBIT, TMPD |
2676 | | stw TMP3, TMPD | 3034 | | .FPU stw TMP3, TMPD |
2677 | | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) | 3035 | | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) |
2678 | | li TISNIL, LJ_TNIL | 3036 | | li TISNIL, LJ_TNIL |
2679 | | stw TMP0, TONUM_HI | 3037 | | .FPU stw TMP0, TONUM_HI |
2680 | | lfs TONUM, TMPD | 3038 | | .FPU lfs TONUM, TMPD |
2681 | | // Modified copy of ins_next which handles function header dispatch, too. | 3039 | | // Modified copy of ins_next which handles function header dispatch, too. |
2682 | | lwz INS, 0(PC) | 3040 | | lwz INS, 0(PC) |
2683 | | addi PC, PC, 4 | 3041 | | addi PC, PC, 4 |
@@ -2694,20 +3052,63 @@ static void build_subroutines(BuildCtx *ctx) | |||
2694 | | decode_RC8 RC, INS | 3052 | | decode_RC8 RC, INS |
2695 | | bctr | 3053 | | bctr |
2696 | |2: | 3054 | |2: |
3055 | | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function? | ||
3056 | | blt >3 | ||
3057 | | // Check frame below fast function. | ||
3058 | | lwz TMP1, FRAME_PC(BASE) | ||
3059 | | andix. TMP0, TMP1, FRAME_TYPE | ||
3060 | | bney >3 // Trace stitching continuation? | ||
3061 | | // Otherwise set KBASE for Lua function below fast function. | ||
3062 | | lwz TMP2, -4(TMP1) | ||
3063 | | decode_RA8 TMP0, TMP2 | ||
3064 | | sub TMP1, BASE, TMP0 | ||
3065 | | lwz LFUNC:TMP2, -12(TMP1) | ||
3066 | | lwz TMP1, LFUNC:TMP2->pc | ||
3067 | | lwz KBASE, PC2PROTO(k)(TMP1) | ||
3068 | |3: | ||
2697 | | subi RC, MULTRES, 8 | 3069 | | subi RC, MULTRES, 8 |
2698 | | add RA, RA, BASE | 3070 | | add RA, RA, BASE |
2699 | | bctr | 3071 | | bctr |
2700 | | | 3072 | | |
2701 | |3: // Rethrow error from the right C frame. | 3073 | |9: // Rethrow error from the right C frame. |
3074 | | neg CARG2, CARG1 | ||
2702 | | mr CARG1, L | 3075 | | mr CARG1, L |
2703 | | bl extern lj_err_run // (lua_State *L) | 3076 | | bl extern lj_err_trace // (lua_State *L, int errcode) |
2704 | |.endif | 3077 | |.endif |
2705 | | | 3078 | | |
2706 | |//----------------------------------------------------------------------- | 3079 | |//----------------------------------------------------------------------- |
2707 | |//-- Math helper functions ---------------------------------------------- | 3080 | |//-- Math helper functions ---------------------------------------------- |
2708 | |//----------------------------------------------------------------------- | 3081 | |//----------------------------------------------------------------------- |
2709 | | | 3082 | | |
2710 | |// NYI: Use internal implementations of floor, ceil, trunc. | 3083 | |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp. |
3084 | | | ||
3085 | |.macro sfi2d, AHI, ALO | ||
3086 | |.if not FPU | ||
3087 | | mr. AHI, ALO | ||
3088 | | bclr 12, 2 // Handle zero first. | ||
3089 | | srawi TMP0, ALO, 31 | ||
3090 | | xor TMP1, ALO, TMP0 | ||
3091 | | sub TMP1, TMP1, TMP0 // Absolute value in TMP1. | ||
3092 | | cntlzw AHI, TMP1 | ||
3093 | | andix. TMP0, TMP0, 0x800 // Mask sign bit. | ||
3094 | | slw TMP1, TMP1, AHI // Align mantissa left with leading 1. | ||
3095 | | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI. | ||
3096 | | slwi ALO, TMP1, 21 | ||
3097 | | or AHI, AHI, TMP0 // Sign | Exponent. | ||
3098 | | srwi TMP1, TMP1, 11 | ||
3099 | | slwi AHI, AHI, 20 // Align left. | ||
3100 | | add AHI, AHI, TMP1 // Add mantissa, increment exponent. | ||
3101 | | blr | ||
3102 | |.endif | ||
3103 | |.endmacro | ||
3104 | | | ||
3105 | |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1. | ||
3106 | |->vm_sfi2d_1: | ||
3107 | | sfi2d CARG1, CARG2 | ||
3108 | | | ||
3109 | |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1. | ||
3110 | |->vm_sfi2d_2: | ||
3111 | | sfi2d CARG3, CARG4 | ||
2711 | | | 3112 | | |
2712 | |->vm_modi: | 3113 | |->vm_modi: |
2713 | | divwo. TMP0, CARG1, CARG2 | 3114 | | divwo. TMP0, CARG1, CARG2 |
@@ -2762,6 +3163,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
2762 | | blr | 3163 | | blr |
2763 | |.endif | 3164 | |.endif |
2764 | | | 3165 | | |
3166 | |->vm_next: | ||
3167 | |.if JIT | ||
3168 | | NYI // On big-endian. | ||
3169 | |.endif | ||
3170 | | | ||
2765 | |//----------------------------------------------------------------------- | 3171 | |//----------------------------------------------------------------------- |
2766 | |//-- FFI helper functions ----------------------------------------------- | 3172 | |//-- FFI helper functions ----------------------------------------------- |
2767 | |//----------------------------------------------------------------------- | 3173 | |//----------------------------------------------------------------------- |
@@ -2775,21 +3181,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
2775 | | addi DISPATCH, r12, GG_G2DISP | 3181 | | addi DISPATCH, r12, GG_G2DISP |
2776 | | stw r11, CTSTATE->cb.slot | 3182 | | stw r11, CTSTATE->cb.slot |
2777 | | stw r3, CTSTATE->cb.gpr[0] | 3183 | | stw r3, CTSTATE->cb.gpr[0] |
2778 | | stfd f1, CTSTATE->cb.fpr[0] | 3184 | | .FPU stfd f1, CTSTATE->cb.fpr[0] |
2779 | | stw r4, CTSTATE->cb.gpr[1] | 3185 | | stw r4, CTSTATE->cb.gpr[1] |
2780 | | stfd f2, CTSTATE->cb.fpr[1] | 3186 | | .FPU stfd f2, CTSTATE->cb.fpr[1] |
2781 | | stw r5, CTSTATE->cb.gpr[2] | 3187 | | stw r5, CTSTATE->cb.gpr[2] |
2782 | | stfd f3, CTSTATE->cb.fpr[2] | 3188 | | .FPU stfd f3, CTSTATE->cb.fpr[2] |
2783 | | stw r6, CTSTATE->cb.gpr[3] | 3189 | | stw r6, CTSTATE->cb.gpr[3] |
2784 | | stfd f4, CTSTATE->cb.fpr[3] | 3190 | | .FPU stfd f4, CTSTATE->cb.fpr[3] |
2785 | | stw r7, CTSTATE->cb.gpr[4] | 3191 | | stw r7, CTSTATE->cb.gpr[4] |
2786 | | stfd f5, CTSTATE->cb.fpr[4] | 3192 | | .FPU stfd f5, CTSTATE->cb.fpr[4] |
2787 | | stw r8, CTSTATE->cb.gpr[5] | 3193 | | stw r8, CTSTATE->cb.gpr[5] |
2788 | | stfd f6, CTSTATE->cb.fpr[5] | 3194 | | .FPU stfd f6, CTSTATE->cb.fpr[5] |
2789 | | stw r9, CTSTATE->cb.gpr[6] | 3195 | | stw r9, CTSTATE->cb.gpr[6] |
2790 | | stfd f7, CTSTATE->cb.fpr[6] | 3196 | | .FPU stfd f7, CTSTATE->cb.fpr[6] |
2791 | | stw r10, CTSTATE->cb.gpr[7] | 3197 | | stw r10, CTSTATE->cb.gpr[7] |
2792 | | stfd f8, CTSTATE->cb.fpr[7] | 3198 | | .FPU stfd f8, CTSTATE->cb.fpr[7] |
2793 | | addi TMP0, sp, CFRAME_SPACE+8 | 3199 | | addi TMP0, sp, CFRAME_SPACE+8 |
2794 | | stw TMP0, CTSTATE->cb.stack | 3200 | | stw TMP0, CTSTATE->cb.stack |
2795 | | mr CARG1, CTSTATE | 3201 | | mr CARG1, CTSTATE |
@@ -2800,21 +3206,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
2800 | | lp BASE, L:CRET1->base | 3206 | | lp BASE, L:CRET1->base |
2801 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. | 3207 | | li TISNUM, LJ_TISNUM // Setup type comparison constants. |
2802 | | lp RC, L:CRET1->top | 3208 | | lp RC, L:CRET1->top |
2803 | | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 3209 | | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
2804 | | li ZERO, 0 | 3210 | | li ZERO, 0 |
2805 | | mr L, CRET1 | 3211 | | mr L, CRET1 |
2806 | | stw TMP3, TMPD | 3212 | | .FPU stw TMP3, TMPD |
2807 | | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) | 3213 | | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) |
2808 | | lwz LFUNC:RB, FRAME_FUNC(BASE) | 3214 | | lwz LFUNC:RB, FRAME_FUNC(BASE) |
2809 | | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). | 3215 | | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). |
2810 | | stw TMP0, TONUM_HI | 3216 | | .FPU stw TMP0, TONUM_HI |
2811 | | li TISNIL, LJ_TNIL | 3217 | | li TISNIL, LJ_TNIL |
2812 | | li_vmstate INTERP | 3218 | | li_vmstate INTERP |
2813 | | lfs TOBIT, TMPD | 3219 | | .FPU lfs TOBIT, TMPD |
2814 | | stw TMP3, TMPD | 3220 | | .FPU stw TMP3, TMPD |
2815 | | sub RC, RC, BASE | 3221 | | sub RC, RC, BASE |
2816 | | st_vmstate | 3222 | | st_vmstate |
2817 | | lfs TONUM, TMPD | 3223 | | .FPU lfs TONUM, TMPD |
2818 | | ins_callt | 3224 | | ins_callt |
2819 | |.endif | 3225 | |.endif |
2820 | | | 3226 | | |
@@ -2828,7 +3234,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2828 | | mr CARG2, RA | 3234 | | mr CARG2, RA |
2829 | | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) | 3235 | | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) |
2830 | | lwz CRET1, CTSTATE->cb.gpr[0] | 3236 | | lwz CRET1, CTSTATE->cb.gpr[0] |
2831 | | lfd FARG1, CTSTATE->cb.fpr[0] | 3237 | | .FPU lfd FARG1, CTSTATE->cb.fpr[0] |
2832 | | lwz CRET2, CTSTATE->cb.gpr[1] | 3238 | | lwz CRET2, CTSTATE->cb.gpr[1] |
2833 | | b ->vm_leave_unw | 3239 | | b ->vm_leave_unw |
2834 | |.endif | 3240 | |.endif |
@@ -2862,14 +3268,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
2862 | | bge <1 | 3268 | | bge <1 |
2863 | |2: | 3269 | |2: |
2864 | | bney cr1, >3 | 3270 | | bney cr1, >3 |
2865 | | lfd f1, CCSTATE->fpr[0] | 3271 | | .FPU lfd f1, CCSTATE->fpr[0] |
2866 | | lfd f2, CCSTATE->fpr[1] | 3272 | | .FPU lfd f2, CCSTATE->fpr[1] |
2867 | | lfd f3, CCSTATE->fpr[2] | 3273 | | .FPU lfd f3, CCSTATE->fpr[2] |
2868 | | lfd f4, CCSTATE->fpr[3] | 3274 | | .FPU lfd f4, CCSTATE->fpr[3] |
2869 | | lfd f5, CCSTATE->fpr[4] | 3275 | | .FPU lfd f5, CCSTATE->fpr[4] |
2870 | | lfd f6, CCSTATE->fpr[5] | 3276 | | .FPU lfd f6, CCSTATE->fpr[5] |
2871 | | lfd f7, CCSTATE->fpr[6] | 3277 | | .FPU lfd f7, CCSTATE->fpr[6] |
2872 | | lfd f8, CCSTATE->fpr[7] | 3278 | | .FPU lfd f8, CCSTATE->fpr[7] |
2873 | |3: | 3279 | |3: |
2874 | | lp TMP0, CCSTATE->func | 3280 | | lp TMP0, CCSTATE->func |
2875 | | lwz CARG2, CCSTATE->gpr[1] | 3281 | | lwz CARG2, CCSTATE->gpr[1] |
@@ -2886,7 +3292,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2886 | | lwz TMP2, -4(r14) | 3292 | | lwz TMP2, -4(r14) |
2887 | | lwz TMP0, 4(r14) | 3293 | | lwz TMP0, 4(r14) |
2888 | | stw CARG1, CCSTATE:TMP1->gpr[0] | 3294 | | stw CARG1, CCSTATE:TMP1->gpr[0] |
2889 | | stfd FARG1, CCSTATE:TMP1->fpr[0] | 3295 | | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0] |
2890 | | stw CARG2, CCSTATE:TMP1->gpr[1] | 3296 | | stw CARG2, CCSTATE:TMP1->gpr[1] |
2891 | | mtlr TMP0 | 3297 | | mtlr TMP0 |
2892 | | stw CARG3, CCSTATE:TMP1->gpr[2] | 3298 | | stw CARG3, CCSTATE:TMP1->gpr[2] |
@@ -2915,19 +3321,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2915 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | 3321 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: |
2916 | | // RA = src1*8, RD = src2*8, JMP with RD = target | 3322 | | // RA = src1*8, RD = src2*8, JMP with RD = target |
2917 | |.if DUALNUM | 3323 | |.if DUALNUM |
2918 | | lwzux TMP0, RA, BASE | 3324 | | lwzux CARG1, RA, BASE |
2919 | | addi PC, PC, 4 | 3325 | | addi PC, PC, 4 |
2920 | | lwz CARG2, 4(RA) | 3326 | | lwz CARG2, 4(RA) |
2921 | | lwzux TMP1, RD, BASE | 3327 | | lwzux CARG3, RD, BASE |
2922 | | lwz TMP2, -4(PC) | 3328 | | lwz TMP2, -4(PC) |
2923 | | checknum cr0, TMP0 | 3329 | | checknum cr0, CARG1 |
2924 | | lwz CARG3, 4(RD) | 3330 | | lwz CARG4, 4(RD) |
2925 | | decode_RD4 TMP2, TMP2 | 3331 | | decode_RD4 TMP2, TMP2 |
2926 | | checknum cr1, TMP1 | 3332 | | checknum cr1, CARG3 |
2927 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) | 3333 | | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16) |
2928 | | bne cr0, >7 | 3334 | | bne cr0, >7 |
2929 | | bne cr1, >8 | 3335 | | bne cr1, >8 |
2930 | | cmpw CARG2, CARG3 | 3336 | | cmpw CARG2, CARG4 |
2931 | if (op == BC_ISLT) { | 3337 | if (op == BC_ISLT) { |
2932 | | bge >2 | 3338 | | bge >2 |
2933 | } else if (op == BC_ISGE) { | 3339 | } else if (op == BC_ISGE) { |
@@ -2938,28 +3344,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2938 | | ble >2 | 3344 | | ble >2 |
2939 | } | 3345 | } |
2940 | |1: | 3346 | |1: |
2941 | | add PC, PC, TMP2 | 3347 | | add PC, PC, SAVE0 |
2942 | |2: | 3348 | |2: |
2943 | | ins_next | 3349 | | ins_next |
2944 | | | 3350 | | |
2945 | |7: // RA is not an integer. | 3351 | |7: // RA is not an integer. |
2946 | | bgt cr0, ->vmeta_comp | 3352 | | bgt cr0, ->vmeta_comp |
2947 | | // RA is a number. | 3353 | | // RA is a number. |
2948 | | lfd f0, 0(RA) | 3354 | | .FPU lfd f0, 0(RA) |
2949 | | bgt cr1, ->vmeta_comp | 3355 | | bgt cr1, ->vmeta_comp |
2950 | | blt cr1, >4 | 3356 | | blt cr1, >4 |
2951 | | // RA is a number, RD is an integer. | 3357 | | // RA is a number, RD is an integer. |
2952 | | tonum_i f1, CARG3 | 3358 | |.if FPU |
3359 | | tonum_i f1, CARG4 | ||
3360 | |.else | ||
3361 | | bl ->vm_sfi2d_2 | ||
3362 | |.endif | ||
2953 | | b >5 | 3363 | | b >5 |
2954 | | | 3364 | | |
2955 | |8: // RA is an integer, RD is not an integer. | 3365 | |8: // RA is an integer, RD is not an integer. |
2956 | | bgt cr1, ->vmeta_comp | 3366 | | bgt cr1, ->vmeta_comp |
2957 | | // RA is an integer, RD is a number. | 3367 | | // RA is an integer, RD is a number. |
3368 | |.if FPU | ||
2958 | | tonum_i f0, CARG2 | 3369 | | tonum_i f0, CARG2 |
3370 | |.else | ||
3371 | | bl ->vm_sfi2d_1 | ||
3372 | |.endif | ||
2959 | |4: | 3373 | |4: |
2960 | | lfd f1, 0(RD) | 3374 | | .FPU lfd f1, 0(RD) |
2961 | |5: | 3375 | |5: |
3376 | |.if FPU | ||
2962 | | fcmpu cr0, f0, f1 | 3377 | | fcmpu cr0, f0, f1 |
3378 | |.else | ||
3379 | | blex __ledf2 | ||
3380 | | cmpwi CRET1, 0 | ||
3381 | |.endif | ||
2963 | if (op == BC_ISLT) { | 3382 | if (op == BC_ISLT) { |
2964 | | bge <2 | 3383 | | bge <2 |
2965 | } else if (op == BC_ISGE) { | 3384 | } else if (op == BC_ISGE) { |
@@ -3007,42 +3426,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3007 | vk = op == BC_ISEQV; | 3426 | vk = op == BC_ISEQV; |
3008 | | // RA = src1*8, RD = src2*8, JMP with RD = target | 3427 | | // RA = src1*8, RD = src2*8, JMP with RD = target |
3009 | |.if DUALNUM | 3428 | |.if DUALNUM |
3010 | | lwzux TMP0, RA, BASE | 3429 | | lwzux CARG1, RA, BASE |
3011 | | addi PC, PC, 4 | 3430 | | addi PC, PC, 4 |
3012 | | lwz CARG2, 4(RA) | 3431 | | lwz CARG2, 4(RA) |
3013 | | lwzux TMP1, RD, BASE | 3432 | | lwzux CARG3, RD, BASE |
3014 | | checknum cr0, TMP0 | 3433 | | checknum cr0, CARG1 |
3015 | | lwz TMP2, -4(PC) | 3434 | | lwz SAVE0, -4(PC) |
3016 | | checknum cr1, TMP1 | 3435 | | checknum cr1, CARG3 |
3017 | | decode_RD4 TMP2, TMP2 | 3436 | | decode_RD4 SAVE0, SAVE0 |
3018 | | lwz CARG3, 4(RD) | 3437 | | lwz CARG4, 4(RD) |
3019 | | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt | 3438 | | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt |
3020 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) | 3439 | | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) |
3021 | if (vk) { | 3440 | if (vk) { |
3022 | | ble cr7, ->BC_ISEQN_Z | 3441 | | ble cr7, ->BC_ISEQN_Z |
3023 | } else { | 3442 | } else { |
3024 | | ble cr7, ->BC_ISNEN_Z | 3443 | | ble cr7, ->BC_ISNEN_Z |
3025 | } | 3444 | } |
3026 | |.else | 3445 | |.else |
3027 | | lwzux TMP0, RA, BASE | 3446 | | lwzux CARG1, RA, BASE |
3028 | | lwz TMP2, 0(PC) | 3447 | | lwz SAVE0, 0(PC) |
3029 | | lfd f0, 0(RA) | 3448 | | lfd f0, 0(RA) |
3030 | | addi PC, PC, 4 | 3449 | | addi PC, PC, 4 |
3031 | | lwzux TMP1, RD, BASE | 3450 | | lwzux CARG3, RD, BASE |
3032 | | checknum cr0, TMP0 | 3451 | | checknum cr0, CARG1 |
3033 | | decode_RD4 TMP2, TMP2 | 3452 | | decode_RD4 SAVE0, SAVE0 |
3034 | | lfd f1, 0(RD) | 3453 | | lfd f1, 0(RD) |
3035 | | checknum cr1, TMP1 | 3454 | | checknum cr1, CARG3 |
3036 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) | 3455 | | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) |
3037 | | bge cr0, >5 | 3456 | | bge cr0, >5 |
3038 | | bge cr1, >5 | 3457 | | bge cr1, >5 |
3039 | | fcmpu cr0, f0, f1 | 3458 | | fcmpu cr0, f0, f1 |
3040 | if (vk) { | 3459 | if (vk) { |
3041 | | bne >1 | 3460 | | bne >1 |
3042 | | add PC, PC, TMP2 | 3461 | | add PC, PC, SAVE0 |
3043 | } else { | 3462 | } else { |
3044 | | beq >1 | 3463 | | beq >1 |
3045 | | add PC, PC, TMP2 | 3464 | | add PC, PC, SAVE0 |
3046 | } | 3465 | } |
3047 | |1: | 3466 | |1: |
3048 | | ins_next | 3467 | | ins_next |
@@ -3050,36 +3469,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3050 | |5: // Either or both types are not numbers. | 3469 | |5: // Either or both types are not numbers. |
3051 | |.if not DUALNUM | 3470 | |.if not DUALNUM |
3052 | | lwz CARG2, 4(RA) | 3471 | | lwz CARG2, 4(RA) |
3053 | | lwz CARG3, 4(RD) | 3472 | | lwz CARG4, 4(RD) |
3054 | |.endif | 3473 | |.endif |
3055 | |.if FFI | 3474 | |.if FFI |
3056 | | cmpwi cr7, TMP0, LJ_TCDATA | 3475 | | cmpwi cr7, CARG1, LJ_TCDATA |
3057 | | cmpwi cr5, TMP1, LJ_TCDATA | 3476 | | cmpwi cr5, CARG3, LJ_TCDATA |
3058 | |.endif | 3477 | |.endif |
3059 | | not TMP3, TMP0 | 3478 | | not TMP2, CARG1 |
3060 | | cmplw TMP0, TMP1 | 3479 | | cmplw CARG1, CARG3 |
3061 | | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? | 3480 | | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive? |
3062 | |.if FFI | 3481 | |.if FFI |
3063 | | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq | 3482 | | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq |
3064 | |.endif | 3483 | |.endif |
3065 | | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? | 3484 | | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata? |
3066 | |.if FFI | 3485 | |.if FFI |
3067 | | beq cr7, ->vmeta_equal_cd | 3486 | | beq cr7, ->vmeta_equal_cd |
3068 | |.endif | 3487 | |.endif |
3069 | | cmplw cr5, CARG2, CARG3 | 3488 | | cmplw cr5, CARG2, CARG4 |
3070 | | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. | 3489 | | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. |
3071 | | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. | 3490 | | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. |
3072 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. | 3491 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. |
3073 | | mr SAVE0, PC | 3492 | | mr SAVE1, PC |
3074 | | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. | 3493 | | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. |
3075 | | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. | 3494 | | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. |
3076 | if (vk) { | 3495 | if (vk) { |
3077 | | bne cr0, >6 | 3496 | | bne cr0, >6 |
3078 | | add PC, PC, TMP2 | 3497 | | add PC, PC, SAVE0 |
3079 | |6: | 3498 | |6: |
3080 | } else { | 3499 | } else { |
3081 | | beq cr0, >6 | 3500 | | beq cr0, >6 |
3082 | | add PC, PC, TMP2 | 3501 | | add PC, PC, SAVE0 |
3083 | |6: | 3502 | |6: |
3084 | } | 3503 | } |
3085 | |.if DUALNUM | 3504 | |.if DUALNUM |
@@ -3094,6 +3513,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3094 | | | 3513 | | |
3095 | | // Different tables or userdatas. Need to check __eq metamethod. | 3514 | | // Different tables or userdatas. Need to check __eq metamethod. |
3096 | | // Field metatable must be at same offset for GCtab and GCudata! | 3515 | | // Field metatable must be at same offset for GCtab and GCudata! |
3516 | | mr CARG3, CARG4 | ||
3097 | | lwz TAB:TMP2, TAB:CARG2->metatable | 3517 | | lwz TAB:TMP2, TAB:CARG2->metatable |
3098 | | li CARG4, 1-vk // ne = 0 or 1. | 3518 | | li CARG4, 1-vk // ne = 0 or 1. |
3099 | | cmplwi TAB:TMP2, 0 | 3519 | | cmplwi TAB:TMP2, 0 |
@@ -3101,7 +3521,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3101 | | lbz TMP2, TAB:TMP2->nomm | 3521 | | lbz TMP2, TAB:TMP2->nomm |
3102 | | andix. TMP2, TMP2, 1<<MM_eq | 3522 | | andix. TMP2, TMP2, 1<<MM_eq |
3103 | | bne <1 // Or 'no __eq' flag set? | 3523 | | bne <1 // Or 'no __eq' flag set? |
3104 | | mr PC, SAVE0 // Restore old PC. | 3524 | | mr PC, SAVE1 // Restore old PC. |
3105 | | b ->vmeta_equal // Handle __eq metamethod. | 3525 | | b ->vmeta_equal // Handle __eq metamethod. |
3106 | break; | 3526 | break; |
3107 | 3527 | ||
@@ -3142,16 +3562,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3142 | vk = op == BC_ISEQN; | 3562 | vk = op == BC_ISEQN; |
3143 | | // RA = src*8, RD = num_const*8, JMP with RD = target | 3563 | | // RA = src*8, RD = num_const*8, JMP with RD = target |
3144 | |.if DUALNUM | 3564 | |.if DUALNUM |
3145 | | lwzux TMP0, RA, BASE | 3565 | | lwzux CARG1, RA, BASE |
3146 | | addi PC, PC, 4 | 3566 | | addi PC, PC, 4 |
3147 | | lwz CARG2, 4(RA) | 3567 | | lwz CARG2, 4(RA) |
3148 | | lwzux TMP1, RD, KBASE | 3568 | | lwzux CARG3, RD, KBASE |
3149 | | checknum cr0, TMP0 | 3569 | | checknum cr0, CARG1 |
3150 | | lwz TMP2, -4(PC) | 3570 | | lwz SAVE0, -4(PC) |
3151 | | checknum cr1, TMP1 | 3571 | | checknum cr1, CARG3 |
3152 | | decode_RD4 TMP2, TMP2 | 3572 | | decode_RD4 SAVE0, SAVE0 |
3153 | | lwz CARG3, 4(RD) | 3573 | | lwz CARG4, 4(RD) |
3154 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) | 3574 | | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) |
3155 | if (vk) { | 3575 | if (vk) { |
3156 | |->BC_ISEQN_Z: | 3576 | |->BC_ISEQN_Z: |
3157 | } else { | 3577 | } else { |
@@ -3159,7 +3579,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3159 | } | 3579 | } |
3160 | | bne cr0, >7 | 3580 | | bne cr0, >7 |
3161 | | bne cr1, >8 | 3581 | | bne cr1, >8 |
3162 | | cmpw CARG2, CARG3 | 3582 | | cmpw CARG2, CARG4 |
3163 | |4: | 3583 | |4: |
3164 | |.else | 3584 | |.else |
3165 | if (vk) { | 3585 | if (vk) { |
@@ -3167,20 +3587,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3167 | } else { | 3587 | } else { |
3168 | |->BC_ISNEN_Z: // Dummy label. | 3588 | |->BC_ISNEN_Z: // Dummy label. |
3169 | } | 3589 | } |
3170 | | lwzx TMP0, BASE, RA | 3590 | | lwzx CARG1, BASE, RA |
3171 | | addi PC, PC, 4 | 3591 | | addi PC, PC, 4 |
3172 | | lfdx f0, BASE, RA | 3592 | | lfdx f0, BASE, RA |
3173 | | lwz TMP2, -4(PC) | 3593 | | lwz SAVE0, -4(PC) |
3174 | | lfdx f1, KBASE, RD | 3594 | | lfdx f1, KBASE, RD |
3175 | | decode_RD4 TMP2, TMP2 | 3595 | | decode_RD4 SAVE0, SAVE0 |
3176 | | checknum TMP0 | 3596 | | checknum CARG1 |
3177 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) | 3597 | | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) |
3178 | | bge >3 | 3598 | | bge >3 |
3179 | | fcmpu cr0, f0, f1 | 3599 | | fcmpu cr0, f0, f1 |
3180 | |.endif | 3600 | |.endif |
3181 | if (vk) { | 3601 | if (vk) { |
3182 | | bne >1 | 3602 | | bne >1 |
3183 | | add PC, PC, TMP2 | 3603 | | add PC, PC, SAVE0 |
3184 | |1: | 3604 | |1: |
3185 | |.if not FFI | 3605 | |.if not FFI |
3186 | |3: | 3606 | |3: |
@@ -3191,13 +3611,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3191 | |.if not FFI | 3611 | |.if not FFI |
3192 | |3: | 3612 | |3: |
3193 | |.endif | 3613 | |.endif |
3194 | | add PC, PC, TMP2 | 3614 | | add PC, PC, SAVE0 |
3195 | |2: | 3615 | |2: |
3196 | } | 3616 | } |
3197 | | ins_next | 3617 | | ins_next |
3198 | |.if FFI | 3618 | |.if FFI |
3199 | |3: | 3619 | |3: |
3200 | | cmpwi TMP0, LJ_TCDATA | 3620 | | cmpwi CARG1, LJ_TCDATA |
3201 | | beq ->vmeta_equal_cd | 3621 | | beq ->vmeta_equal_cd |
3202 | | b <1 | 3622 | | b <1 |
3203 | |.endif | 3623 | |.endif |
@@ -3205,18 +3625,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3205 | |7: // RA is not an integer. | 3625 | |7: // RA is not an integer. |
3206 | | bge cr0, <3 | 3626 | | bge cr0, <3 |
3207 | | // RA is a number. | 3627 | | // RA is a number. |
3208 | | lfd f0, 0(RA) | 3628 | | .FPU lfd f0, 0(RA) |
3209 | | blt cr1, >1 | 3629 | | blt cr1, >1 |
3210 | | // RA is a number, RD is an integer. | 3630 | | // RA is a number, RD is an integer. |
3211 | | tonum_i f1, CARG3 | 3631 | |.if FPU |
3632 | | tonum_i f1, CARG4 | ||
3633 | |.else | ||
3634 | | bl ->vm_sfi2d_2 | ||
3635 | |.endif | ||
3212 | | b >2 | 3636 | | b >2 |
3213 | | | 3637 | | |
3214 | |8: // RA is an integer, RD is a number. | 3638 | |8: // RA is an integer, RD is a number. |
3639 | |.if FPU | ||
3215 | | tonum_i f0, CARG2 | 3640 | | tonum_i f0, CARG2 |
3641 | |.else | ||
3642 | | bl ->vm_sfi2d_1 | ||
3643 | |.endif | ||
3216 | |1: | 3644 | |1: |
3217 | | lfd f1, 0(RD) | 3645 | | .FPU lfd f1, 0(RD) |
3218 | |2: | 3646 | |2: |
3647 | |.if FPU | ||
3219 | | fcmpu cr0, f0, f1 | 3648 | | fcmpu cr0, f0, f1 |
3649 | |.else | ||
3650 | | blex __ledf2 | ||
3651 | | cmpwi CRET1, 0 | ||
3652 | |.endif | ||
3220 | | b <4 | 3653 | | b <4 |
3221 | |.endif | 3654 | |.endif |
3222 | break; | 3655 | break; |
@@ -3271,7 +3704,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3271 | | add PC, PC, TMP2 | 3704 | | add PC, PC, TMP2 |
3272 | } else { | 3705 | } else { |
3273 | | li TMP1, LJ_TFALSE | 3706 | | li TMP1, LJ_TFALSE |
3707 | |.if FPU | ||
3274 | | lfdx f0, BASE, RD | 3708 | | lfdx f0, BASE, RD |
3709 | |.else | ||
3710 | | lwzux CARG1, RD, BASE | ||
3711 | | lwz CARG2, 4(RD) | ||
3712 | |.endif | ||
3275 | | cmplw TMP0, TMP1 | 3713 | | cmplw TMP0, TMP1 |
3276 | if (op == BC_ISTC) { | 3714 | if (op == BC_ISTC) { |
3277 | | bge >1 | 3715 | | bge >1 |
@@ -3280,20 +3718,55 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3280 | } | 3718 | } |
3281 | | addis PC, PC, -(BCBIAS_J*4 >> 16) | 3719 | | addis PC, PC, -(BCBIAS_J*4 >> 16) |
3282 | | decode_RD4 TMP2, INS | 3720 | | decode_RD4 TMP2, INS |
3721 | |.if FPU | ||
3283 | | stfdx f0, BASE, RA | 3722 | | stfdx f0, BASE, RA |
3723 | |.else | ||
3724 | | stwux CARG1, RA, BASE | ||
3725 | | stw CARG2, 4(RA) | ||
3726 | |.endif | ||
3284 | | add PC, PC, TMP2 | 3727 | | add PC, PC, TMP2 |
3285 | |1: | 3728 | |1: |
3286 | } | 3729 | } |
3287 | | ins_next | 3730 | | ins_next |
3288 | break; | 3731 | break; |
3289 | 3732 | ||
3733 | case BC_ISTYPE: | ||
3734 | | // RA = src*8, RD = -type*8 | ||
3735 | | lwzx TMP0, BASE, RA | ||
3736 | | srwi TMP1, RD, 3 | ||
3737 | | ins_next1 | ||
3738 | |.if not PPE and not GPR64 | ||
3739 | | add. TMP0, TMP0, TMP1 | ||
3740 | |.else | ||
3741 | | neg TMP1, TMP1 | ||
3742 | | cmpw TMP0, TMP1 | ||
3743 | |.endif | ||
3744 | | bne ->vmeta_istype | ||
3745 | | ins_next2 | ||
3746 | break; | ||
3747 | case BC_ISNUM: | ||
3748 | | // RA = src*8, RD = -(TISNUM-1)*8 | ||
3749 | | lwzx TMP0, BASE, RA | ||
3750 | | ins_next1 | ||
3751 | | checknum TMP0 | ||
3752 | | bge ->vmeta_istype | ||
3753 | | ins_next2 | ||
3754 | break; | ||
3755 | |||
3290 | /* -- Unary ops --------------------------------------------------------- */ | 3756 | /* -- Unary ops --------------------------------------------------------- */ |
3291 | 3757 | ||
3292 | case BC_MOV: | 3758 | case BC_MOV: |
3293 | | // RA = dst*8, RD = src*8 | 3759 | | // RA = dst*8, RD = src*8 |
3294 | | ins_next1 | 3760 | | ins_next1 |
3761 | |.if FPU | ||
3295 | | lfdx f0, BASE, RD | 3762 | | lfdx f0, BASE, RD |
3296 | | stfdx f0, BASE, RA | 3763 | | stfdx f0, BASE, RA |
3764 | |.else | ||
3765 | | lwzux TMP0, RD, BASE | ||
3766 | | lwz TMP1, 4(RD) | ||
3767 | | stwux TMP0, RA, BASE | ||
3768 | | stw TMP1, 4(RA) | ||
3769 | |.endif | ||
3297 | | ins_next2 | 3770 | | ins_next2 |
3298 | break; | 3771 | break; |
3299 | case BC_NOT: | 3772 | case BC_NOT: |
@@ -3395,44 +3868,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3395 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | 3868 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); |
3396 | ||switch (vk) { | 3869 | ||switch (vk) { |
3397 | ||case 0: | 3870 | ||case 0: |
3398 | | lwzx TMP1, BASE, RB | 3871 | | lwzx CARG1, BASE, RB |
3399 | | .if DUALNUM | 3872 | | .if DUALNUM |
3400 | | lwzx TMP2, KBASE, RC | 3873 | | lwzx CARG3, KBASE, RC |
3401 | | .endif | 3874 | | .endif |
3875 | | .if FPU | ||
3402 | | lfdx f14, BASE, RB | 3876 | | lfdx f14, BASE, RB |
3403 | | lfdx f15, KBASE, RC | 3877 | | lfdx f15, KBASE, RC |
3878 | | .else | ||
3879 | | add TMP1, BASE, RB | ||
3880 | | add TMP2, KBASE, RC | ||
3881 | | lwz CARG2, 4(TMP1) | ||
3882 | | lwz CARG4, 4(TMP2) | ||
3883 | | .endif | ||
3404 | | .if DUALNUM | 3884 | | .if DUALNUM |
3405 | | checknum cr0, TMP1 | 3885 | | checknum cr0, CARG1 |
3406 | | checknum cr1, TMP2 | 3886 | | checknum cr1, CARG3 |
3407 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | 3887 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt |
3408 | | bge ->vmeta_arith_vn | 3888 | | bge ->vmeta_arith_vn |
3409 | | .else | 3889 | | .else |
3410 | | checknum TMP1; bge ->vmeta_arith_vn | 3890 | | checknum CARG1; bge ->vmeta_arith_vn |
3411 | | .endif | 3891 | | .endif |
3412 | || break; | 3892 | || break; |
3413 | ||case 1: | 3893 | ||case 1: |
3414 | | lwzx TMP1, BASE, RB | 3894 | | lwzx CARG1, BASE, RB |
3415 | | .if DUALNUM | 3895 | | .if DUALNUM |
3416 | | lwzx TMP2, KBASE, RC | 3896 | | lwzx CARG3, KBASE, RC |
3417 | | .endif | 3897 | | .endif |
3898 | | .if FPU | ||
3418 | | lfdx f15, BASE, RB | 3899 | | lfdx f15, BASE, RB |
3419 | | lfdx f14, KBASE, RC | 3900 | | lfdx f14, KBASE, RC |
3901 | | .else | ||
3902 | | add TMP1, BASE, RB | ||
3903 | | add TMP2, KBASE, RC | ||
3904 | | lwz CARG2, 4(TMP1) | ||
3905 | | lwz CARG4, 4(TMP2) | ||
3906 | | .endif | ||
3420 | | .if DUALNUM | 3907 | | .if DUALNUM |
3421 | | checknum cr0, TMP1 | 3908 | | checknum cr0, CARG1 |
3422 | | checknum cr1, TMP2 | 3909 | | checknum cr1, CARG3 |
3423 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | 3910 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt |
3424 | | bge ->vmeta_arith_nv | 3911 | | bge ->vmeta_arith_nv |
3425 | | .else | 3912 | | .else |
3426 | | checknum TMP1; bge ->vmeta_arith_nv | 3913 | | checknum CARG1; bge ->vmeta_arith_nv |
3427 | | .endif | 3914 | | .endif |
3428 | || break; | 3915 | || break; |
3429 | ||default: | 3916 | ||default: |
3430 | | lwzx TMP1, BASE, RB | 3917 | | lwzx CARG1, BASE, RB |
3431 | | lwzx TMP2, BASE, RC | 3918 | | lwzx CARG3, BASE, RC |
3919 | | .if FPU | ||
3432 | | lfdx f14, BASE, RB | 3920 | | lfdx f14, BASE, RB |
3433 | | lfdx f15, BASE, RC | 3921 | | lfdx f15, BASE, RC |
3434 | | checknum cr0, TMP1 | 3922 | | .else |
3435 | | checknum cr1, TMP2 | 3923 | | add TMP1, BASE, RB |
3924 | | add TMP2, BASE, RC | ||
3925 | | lwz CARG2, 4(TMP1) | ||
3926 | | lwz CARG4, 4(TMP2) | ||
3927 | | .endif | ||
3928 | | checknum cr0, CARG1 | ||
3929 | | checknum cr1, CARG3 | ||
3436 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | 3930 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt |
3437 | | bge ->vmeta_arith_vv | 3931 | | bge ->vmeta_arith_vv |
3438 | || break; | 3932 | || break; |
@@ -3466,48 +3960,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3466 | | fsub a, b, a // b - floor(b/c)*c | 3960 | | fsub a, b, a // b - floor(b/c)*c |
3467 | |.endmacro | 3961 | |.endmacro |
3468 | | | 3962 | | |
3963 | |.macro sfpmod | ||
3964 | |->BC_MODVN_Z: | ||
3965 | | stw CARG1, SFSAVE_1 | ||
3966 | | stw CARG2, SFSAVE_2 | ||
3967 | | mr SAVE0, CARG3 | ||
3968 | | mr SAVE1, CARG4 | ||
3969 | | blex __divdf3 | ||
3970 | | blex floor | ||
3971 | | mr CARG3, SAVE0 | ||
3972 | | mr CARG4, SAVE1 | ||
3973 | | blex __muldf3 | ||
3974 | | mr CARG3, CRET1 | ||
3975 | | mr CARG4, CRET2 | ||
3976 | | lwz CARG1, SFSAVE_1 | ||
3977 | | lwz CARG2, SFSAVE_2 | ||
3978 | | blex __subdf3 | ||
3979 | |.endmacro | ||
3980 | | | ||
3469 | |.macro ins_arithfp, fpins | 3981 | |.macro ins_arithfp, fpins |
3470 | | ins_arithpre | 3982 | | ins_arithpre |
3471 | |.if "fpins" == "fpmod_" | 3983 | |.if "fpins" == "fpmod_" |
3472 | | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | 3984 | | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. |
3473 | |.else | 3985 | |.elif FPU |
3474 | | fpins f0, f14, f15 | 3986 | | fpins f0, f14, f15 |
3475 | | ins_next1 | 3987 | | ins_next1 |
3476 | | stfdx f0, BASE, RA | 3988 | | stfdx f0, BASE, RA |
3477 | | ins_next2 | 3989 | | ins_next2 |
3990 | |.else | ||
3991 | | blex __divdf3 // Only soft-float div uses this macro. | ||
3992 | | ins_next1 | ||
3993 | | stwux CRET1, RA, BASE | ||
3994 | | stw CRET2, 4(RA) | ||
3995 | | ins_next2 | ||
3478 | |.endif | 3996 | |.endif |
3479 | |.endmacro | 3997 | |.endmacro |
3480 | | | 3998 | | |
3481 | |.macro ins_arithdn, intins, fpins | 3999 | |.macro ins_arithdn, intins, fpins, fpcall |
3482 | | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 | 4000 | | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 |
3483 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | 4001 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); |
3484 | ||switch (vk) { | 4002 | ||switch (vk) { |
3485 | ||case 0: | 4003 | ||case 0: |
3486 | | lwzux TMP1, RB, BASE | 4004 | | lwzux CARG1, RB, BASE |
3487 | | lwzux TMP2, RC, KBASE | 4005 | | lwzux CARG3, RC, KBASE |
3488 | | lwz CARG1, 4(RB) | 4006 | | lwz CARG2, 4(RB) |
3489 | | checknum cr0, TMP1 | 4007 | | checknum cr0, CARG1 |
3490 | | lwz CARG2, 4(RC) | 4008 | | lwz CARG4, 4(RC) |
4009 | | checknum cr1, CARG3 | ||
3491 | || break; | 4010 | || break; |
3492 | ||case 1: | 4011 | ||case 1: |
3493 | | lwzux TMP1, RB, BASE | 4012 | | lwzux CARG3, RB, BASE |
3494 | | lwzux TMP2, RC, KBASE | 4013 | | lwzux CARG1, RC, KBASE |
3495 | | lwz CARG2, 4(RB) | 4014 | | lwz CARG4, 4(RB) |
3496 | | checknum cr0, TMP1 | 4015 | | checknum cr0, CARG3 |
3497 | | lwz CARG1, 4(RC) | 4016 | | lwz CARG2, 4(RC) |
4017 | | checknum cr1, CARG1 | ||
3498 | || break; | 4018 | || break; |
3499 | ||default: | 4019 | ||default: |
3500 | | lwzux TMP1, RB, BASE | 4020 | | lwzux CARG1, RB, BASE |
3501 | | lwzux TMP2, RC, BASE | 4021 | | lwzux CARG3, RC, BASE |
3502 | | lwz CARG1, 4(RB) | 4022 | | lwz CARG2, 4(RB) |
3503 | | checknum cr0, TMP1 | 4023 | | checknum cr0, CARG1 |
3504 | | lwz CARG2, 4(RC) | 4024 | | lwz CARG4, 4(RC) |
4025 | | checknum cr1, CARG3 | ||
3505 | || break; | 4026 | || break; |
3506 | ||} | 4027 | ||} |
3507 | | checknum cr1, TMP2 | ||
3508 | | bne >5 | 4028 | | bne >5 |
3509 | | bne cr1, >5 | 4029 | | bne cr1, >5 |
3510 | | intins CARG1, CARG1, CARG2 | 4030 | |.if "intins" == "intmod" |
4031 | | mr CARG1, CARG2 | ||
4032 | | mr CARG2, CARG4 | ||
4033 | |.endif | ||
4034 | | intins CARG1, CARG2, CARG4 | ||
3511 | | bso >4 | 4035 | | bso >4 |
3512 | |1: | 4036 | |1: |
3513 | | ins_next1 | 4037 | | ins_next1 |
@@ -3519,29 +4043,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3519 | | checkov TMP0, <1 // Ignore unrelated overflow. | 4043 | | checkov TMP0, <1 // Ignore unrelated overflow. |
3520 | | ins_arithfallback b | 4044 | | ins_arithfallback b |
3521 | |5: // FP variant. | 4045 | |5: // FP variant. |
4046 | |.if FPU | ||
3522 | ||if (vk == 1) { | 4047 | ||if (vk == 1) { |
3523 | | lfd f15, 0(RB) | 4048 | | lfd f15, 0(RB) |
3524 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | ||
3525 | | lfd f14, 0(RC) | 4049 | | lfd f14, 0(RC) |
3526 | ||} else { | 4050 | ||} else { |
3527 | | lfd f14, 0(RB) | 4051 | | lfd f14, 0(RB) |
3528 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | ||
3529 | | lfd f15, 0(RC) | 4052 | | lfd f15, 0(RC) |
3530 | ||} | 4053 | ||} |
4054 | |.endif | ||
4055 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | ||
3531 | | ins_arithfallback bge | 4056 | | ins_arithfallback bge |
3532 | |.if "fpins" == "fpmod_" | 4057 | |.if "fpins" == "fpmod_" |
3533 | | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | 4058 | | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. |
3534 | |.else | 4059 | |.else |
4060 | |.if FPU | ||
3535 | | fpins f0, f14, f15 | 4061 | | fpins f0, f14, f15 |
3536 | | ins_next1 | ||
3537 | | stfdx f0, BASE, RA | 4062 | | stfdx f0, BASE, RA |
4063 | |.else | ||
4064 | |.if "fpcall" == "sfpmod" | ||
4065 | | sfpmod | ||
4066 | |.else | ||
4067 | | blex fpcall | ||
4068 | |.endif | ||
4069 | | stwux CRET1, RA, BASE | ||
4070 | | stw CRET2, 4(RA) | ||
4071 | |.endif | ||
4072 | | ins_next1 | ||
3538 | | b <2 | 4073 | | b <2 |
3539 | |.endif | 4074 | |.endif |
3540 | |.endmacro | 4075 | |.endmacro |
3541 | | | 4076 | | |
3542 | |.macro ins_arith, intins, fpins | 4077 | |.macro ins_arith, intins, fpins, fpcall |
3543 | |.if DUALNUM | 4078 | |.if DUALNUM |
3544 | | ins_arithdn intins, fpins | 4079 | | ins_arithdn intins, fpins, fpcall |
3545 | |.else | 4080 | |.else |
3546 | | ins_arithfp fpins | 4081 | | ins_arithfp fpins |
3547 | |.endif | 4082 | |.endif |
@@ -3556,9 +4091,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3556 | | addo. TMP0, TMP0, TMP1 | 4091 | | addo. TMP0, TMP0, TMP1 |
3557 | | add y, a, b | 4092 | | add y, a, b |
3558 | |.endmacro | 4093 | |.endmacro |
3559 | | ins_arith addo32., fadd | 4094 | | ins_arith addo32., fadd, __adddf3 |
3560 | |.else | 4095 | |.else |
3561 | | ins_arith addo., fadd | 4096 | | ins_arith addo., fadd, __adddf3 |
3562 | |.endif | 4097 | |.endif |
3563 | break; | 4098 | break; |
3564 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | 4099 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: |
@@ -3570,36 +4105,48 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3570 | | subo. TMP0, TMP0, TMP1 | 4105 | | subo. TMP0, TMP0, TMP1 |
3571 | | sub y, a, b | 4106 | | sub y, a, b |
3572 | |.endmacro | 4107 | |.endmacro |
3573 | | ins_arith subo32., fsub | 4108 | | ins_arith subo32., fsub, __subdf3 |
3574 | |.else | 4109 | |.else |
3575 | | ins_arith subo., fsub | 4110 | | ins_arith subo., fsub, __subdf3 |
3576 | |.endif | 4111 | |.endif |
3577 | break; | 4112 | break; |
3578 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | 4113 | case BC_MULVN: case BC_MULNV: case BC_MULVV: |
3579 | | ins_arith mullwo., fmul | 4114 | | ins_arith mullwo., fmul, __muldf3 |
3580 | break; | 4115 | break; |
3581 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | 4116 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: |
3582 | | ins_arithfp fdiv | 4117 | | ins_arithfp fdiv |
3583 | break; | 4118 | break; |
3584 | case BC_MODVN: | 4119 | case BC_MODVN: |
3585 | | ins_arith intmod, fpmod | 4120 | | ins_arith intmod, fpmod, sfpmod |
3586 | break; | 4121 | break; |
3587 | case BC_MODNV: case BC_MODVV: | 4122 | case BC_MODNV: case BC_MODVV: |
3588 | | ins_arith intmod, fpmod_ | 4123 | | ins_arith intmod, fpmod_, sfpmod |
3589 | break; | 4124 | break; |
3590 | case BC_POW: | 4125 | case BC_POW: |
3591 | | // NYI: (partial) integer arithmetic. | 4126 | | // NYI: (partial) integer arithmetic. |
3592 | | lwzx TMP1, BASE, RB | 4127 | | lwzx CARG1, BASE, RB |
4128 | | lwzx CARG3, BASE, RC | ||
4129 | |.if FPU | ||
3593 | | lfdx FARG1, BASE, RB | 4130 | | lfdx FARG1, BASE, RB |
3594 | | lwzx TMP2, BASE, RC | ||
3595 | | lfdx FARG2, BASE, RC | 4131 | | lfdx FARG2, BASE, RC |
3596 | | checknum cr0, TMP1 | 4132 | |.else |
3597 | | checknum cr1, TMP2 | 4133 | | add TMP1, BASE, RB |
4134 | | add TMP2, BASE, RC | ||
4135 | | lwz CARG2, 4(TMP1) | ||
4136 | | lwz CARG4, 4(TMP2) | ||
4137 | |.endif | ||
4138 | | checknum cr0, CARG1 | ||
4139 | | checknum cr1, CARG3 | ||
3598 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | 4140 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt |
3599 | | bge ->vmeta_arith_vv | 4141 | | bge ->vmeta_arith_vv |
3600 | | blex pow | 4142 | | blex pow |
3601 | | ins_next1 | 4143 | | ins_next1 |
4144 | |.if FPU | ||
3602 | | stfdx FARG1, BASE, RA | 4145 | | stfdx FARG1, BASE, RA |
4146 | |.else | ||
4147 | | stwux CARG1, RA, BASE | ||
4148 | | stw CARG2, 4(RA) | ||
4149 | |.endif | ||
3603 | | ins_next2 | 4150 | | ins_next2 |
3604 | break; | 4151 | break; |
3605 | 4152 | ||
@@ -3619,8 +4166,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3619 | | lp BASE, L->base | 4166 | | lp BASE, L->base |
3620 | | bne ->vmeta_binop | 4167 | | bne ->vmeta_binop |
3621 | | ins_next1 | 4168 | | ins_next1 |
4169 | |.if FPU | ||
3622 | | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. | 4170 | | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. |
3623 | | stfdx f0, BASE, RA | 4171 | | stfdx f0, BASE, RA |
4172 | |.else | ||
4173 | | lwzux TMP0, SAVE0, BASE | ||
4174 | | lwz TMP1, 4(SAVE0) | ||
4175 | | stwux TMP0, RA, BASE | ||
4176 | | stw TMP1, 4(RA) | ||
4177 | |.endif | ||
3624 | | ins_next2 | 4178 | | ins_next2 |
3625 | break; | 4179 | break; |
3626 | 4180 | ||
@@ -3683,8 +4237,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3683 | case BC_KNUM: | 4237 | case BC_KNUM: |
3684 | | // RA = dst*8, RD = num_const*8 | 4238 | | // RA = dst*8, RD = num_const*8 |
3685 | | ins_next1 | 4239 | | ins_next1 |
4240 | |.if FPU | ||
3686 | | lfdx f0, KBASE, RD | 4241 | | lfdx f0, KBASE, RD |
3687 | | stfdx f0, BASE, RA | 4242 | | stfdx f0, BASE, RA |
4243 | |.else | ||
4244 | | lwzux TMP0, RD, KBASE | ||
4245 | | lwz TMP1, 4(RD) | ||
4246 | | stwux TMP0, RA, BASE | ||
4247 | | stw TMP1, 4(RA) | ||
4248 | |.endif | ||
3688 | | ins_next2 | 4249 | | ins_next2 |
3689 | break; | 4250 | break; |
3690 | case BC_KPRI: | 4251 | case BC_KPRI: |
@@ -3717,8 +4278,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3717 | | lwzx UPVAL:RB, LFUNC:RB, RD | 4278 | | lwzx UPVAL:RB, LFUNC:RB, RD |
3718 | | ins_next1 | 4279 | | ins_next1 |
3719 | | lwz TMP1, UPVAL:RB->v | 4280 | | lwz TMP1, UPVAL:RB->v |
4281 | |.if FPU | ||
3720 | | lfd f0, 0(TMP1) | 4282 | | lfd f0, 0(TMP1) |
3721 | | stfdx f0, BASE, RA | 4283 | | stfdx f0, BASE, RA |
4284 | |.else | ||
4285 | | lwz TMP2, 0(TMP1) | ||
4286 | | lwz TMP3, 4(TMP1) | ||
4287 | | stwux TMP2, RA, BASE | ||
4288 | | stw TMP3, 4(RA) | ||
4289 | |.endif | ||
3722 | | ins_next2 | 4290 | | ins_next2 |
3723 | break; | 4291 | break; |
3724 | case BC_USETV: | 4292 | case BC_USETV: |
@@ -3726,14 +4294,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3726 | | lwz LFUNC:RB, FRAME_FUNC(BASE) | 4294 | | lwz LFUNC:RB, FRAME_FUNC(BASE) |
3727 | | srwi RA, RA, 1 | 4295 | | srwi RA, RA, 1 |
3728 | | addi RA, RA, offsetof(GCfuncL, uvptr) | 4296 | | addi RA, RA, offsetof(GCfuncL, uvptr) |
4297 | |.if FPU | ||
3729 | | lfdux f0, RD, BASE | 4298 | | lfdux f0, RD, BASE |
4299 | |.else | ||
4300 | | lwzux CARG1, RD, BASE | ||
4301 | | lwz CARG3, 4(RD) | ||
4302 | |.endif | ||
3730 | | lwzx UPVAL:RB, LFUNC:RB, RA | 4303 | | lwzx UPVAL:RB, LFUNC:RB, RA |
3731 | | lbz TMP3, UPVAL:RB->marked | 4304 | | lbz TMP3, UPVAL:RB->marked |
3732 | | lwz CARG2, UPVAL:RB->v | 4305 | | lwz CARG2, UPVAL:RB->v |
3733 | | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) | 4306 | | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) |
3734 | | lbz TMP0, UPVAL:RB->closed | 4307 | | lbz TMP0, UPVAL:RB->closed |
3735 | | lwz TMP2, 0(RD) | 4308 | | lwz TMP2, 0(RD) |
4309 | |.if FPU | ||
3736 | | stfd f0, 0(CARG2) | 4310 | | stfd f0, 0(CARG2) |
4311 | |.else | ||
4312 | | stw CARG1, 0(CARG2) | ||
4313 | | stw CARG3, 4(CARG2) | ||
4314 | |.endif | ||
3737 | | cmplwi cr1, TMP0, 0 | 4315 | | cmplwi cr1, TMP0, 0 |
3738 | | lwz TMP1, 4(RD) | 4316 | | lwz TMP1, 4(RD) |
3739 | | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq | 4317 | | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq |
@@ -3789,11 +4367,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3789 | | lwz LFUNC:RB, FRAME_FUNC(BASE) | 4367 | | lwz LFUNC:RB, FRAME_FUNC(BASE) |
3790 | | srwi RA, RA, 1 | 4368 | | srwi RA, RA, 1 |
3791 | | addi RA, RA, offsetof(GCfuncL, uvptr) | 4369 | | addi RA, RA, offsetof(GCfuncL, uvptr) |
4370 | |.if FPU | ||
3792 | | lfdx f0, KBASE, RD | 4371 | | lfdx f0, KBASE, RD |
4372 | |.else | ||
4373 | | lwzux TMP2, RD, KBASE | ||
4374 | | lwz TMP3, 4(RD) | ||
4375 | |.endif | ||
3793 | | lwzx UPVAL:RB, LFUNC:RB, RA | 4376 | | lwzx UPVAL:RB, LFUNC:RB, RA |
3794 | | ins_next1 | 4377 | | ins_next1 |
3795 | | lwz TMP1, UPVAL:RB->v | 4378 | | lwz TMP1, UPVAL:RB->v |
4379 | |.if FPU | ||
3796 | | stfd f0, 0(TMP1) | 4380 | | stfd f0, 0(TMP1) |
4381 | |.else | ||
4382 | | stw TMP2, 0(TMP1) | ||
4383 | | stw TMP3, 4(TMP1) | ||
4384 | |.endif | ||
3797 | | ins_next2 | 4385 | | ins_next2 |
3798 | break; | 4386 | break; |
3799 | case BC_USETP: | 4387 | case BC_USETP: |
@@ -3941,11 +4529,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3941 | |.endif | 4529 | |.endif |
3942 | | ble ->vmeta_tgetv // Integer key and in array part? | 4530 | | ble ->vmeta_tgetv // Integer key and in array part? |
3943 | | lwzx TMP0, TMP1, TMP2 | 4531 | | lwzx TMP0, TMP1, TMP2 |
4532 | |.if FPU | ||
3944 | | lfdx f14, TMP1, TMP2 | 4533 | | lfdx f14, TMP1, TMP2 |
4534 | |.else | ||
4535 | | lwzux SAVE0, TMP1, TMP2 | ||
4536 | | lwz SAVE1, 4(TMP1) | ||
4537 | |.endif | ||
3945 | | checknil TMP0; beq >2 | 4538 | | checknil TMP0; beq >2 |
3946 | |1: | 4539 | |1: |
3947 | | ins_next1 | 4540 | | ins_next1 |
4541 | |.if FPU | ||
3948 | | stfdx f14, BASE, RA | 4542 | | stfdx f14, BASE, RA |
4543 | |.else | ||
4544 | | stwux SAVE0, RA, BASE | ||
4545 | | stw SAVE1, 4(RA) | ||
4546 | |.endif | ||
3949 | | ins_next2 | 4547 | | ins_next2 |
3950 | | | 4548 | | |
3951 | |2: // Check for __index if table value is nil. | 4549 | |2: // Check for __index if table value is nil. |
@@ -3976,9 +4574,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3976 | |->BC_TGETS_Z: | 4574 | |->BC_TGETS_Z: |
3977 | | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 | 4575 | | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 |
3978 | | lwz TMP0, TAB:RB->hmask | 4576 | | lwz TMP0, TAB:RB->hmask |
3979 | | lwz TMP1, STR:RC->hash | 4577 | | lwz TMP1, STR:RC->sid |
3980 | | lwz NODE:TMP2, TAB:RB->node | 4578 | | lwz NODE:TMP2, TAB:RB->node |
3981 | | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask | 4579 | | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask |
3982 | | slwi TMP0, TMP1, 5 | 4580 | | slwi TMP0, TMP1, 5 |
3983 | | slwi TMP1, TMP1, 3 | 4581 | | slwi TMP1, TMP1, 3 |
3984 | | sub TMP1, TMP0, TMP1 | 4582 | | sub TMP1, TMP0, TMP1 |
@@ -4021,12 +4619,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4021 | | lwz TMP1, TAB:RB->asize | 4619 | | lwz TMP1, TAB:RB->asize |
4022 | | lwz TMP2, TAB:RB->array | 4620 | | lwz TMP2, TAB:RB->array |
4023 | | cmplw TMP0, TMP1; bge ->vmeta_tgetb | 4621 | | cmplw TMP0, TMP1; bge ->vmeta_tgetb |
4622 | |.if FPU | ||
4024 | | lwzx TMP1, TMP2, RC | 4623 | | lwzx TMP1, TMP2, RC |
4025 | | lfdx f0, TMP2, RC | 4624 | | lfdx f0, TMP2, RC |
4625 | |.else | ||
4626 | | lwzux TMP1, TMP2, RC | ||
4627 | | lwz TMP3, 4(TMP2) | ||
4628 | |.endif | ||
4026 | | checknil TMP1; beq >5 | 4629 | | checknil TMP1; beq >5 |
4027 | |1: | 4630 | |1: |
4028 | | ins_next1 | 4631 | | ins_next1 |
4632 | |.if FPU | ||
4029 | | stfdx f0, BASE, RA | 4633 | | stfdx f0, BASE, RA |
4634 | |.else | ||
4635 | | stwux TMP1, RA, BASE | ||
4636 | | stw TMP3, 4(RA) | ||
4637 | |.endif | ||
4030 | | ins_next2 | 4638 | | ins_next2 |
4031 | | | 4639 | | |
4032 | |5: // Check for __index if table value is nil. | 4640 | |5: // Check for __index if table value is nil. |
@@ -4038,6 +4646,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4038 | | bne <1 // 'no __index' flag set: done. | 4646 | | bne <1 // 'no __index' flag set: done. |
4039 | | b ->vmeta_tgetb // Caveat: preserve TMP0! | 4647 | | b ->vmeta_tgetb // Caveat: preserve TMP0! |
4040 | break; | 4648 | break; |
4649 | case BC_TGETR: | ||
4650 | | // RA = dst*8, RB = table*8, RC = key*8 | ||
4651 | | add RB, BASE, RB | ||
4652 | | lwz TAB:CARG1, 4(RB) | ||
4653 | |.if DUALNUM | ||
4654 | | add RC, BASE, RC | ||
4655 | | lwz TMP0, TAB:CARG1->asize | ||
4656 | | lwz CARG2, 4(RC) | ||
4657 | | lwz TMP1, TAB:CARG1->array | ||
4658 | |.else | ||
4659 | | lfdx f0, BASE, RC | ||
4660 | | lwz TMP0, TAB:CARG1->asize | ||
4661 | | toint CARG2, f0 | ||
4662 | | lwz TMP1, TAB:CARG1->array | ||
4663 | |.endif | ||
4664 | | cmplw TMP0, CARG2 | ||
4665 | | slwi TMP2, CARG2, 3 | ||
4666 | | ble ->vmeta_tgetr // In array part? | ||
4667 | |.if FPU | ||
4668 | | lfdx f14, TMP1, TMP2 | ||
4669 | |.else | ||
4670 | | lwzux SAVE0, TMP2, TMP1 | ||
4671 | | lwz SAVE1, 4(TMP2) | ||
4672 | |.endif | ||
4673 | |->BC_TGETR_Z: | ||
4674 | | ins_next1 | ||
4675 | |.if FPU | ||
4676 | | stfdx f14, BASE, RA | ||
4677 | |.else | ||
4678 | | stwux SAVE0, RA, BASE | ||
4679 | | stw SAVE1, 4(RA) | ||
4680 | |.endif | ||
4681 | | ins_next2 | ||
4682 | break; | ||
4041 | 4683 | ||
4042 | case BC_TSETV: | 4684 | case BC_TSETV: |
4043 | | // RA = src*8, RB = table*8, RC = key*8 | 4685 | | // RA = src*8, RB = table*8, RC = key*8 |
@@ -4076,11 +4718,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4076 | | ble ->vmeta_tsetv // Integer key and in array part? | 4718 | | ble ->vmeta_tsetv // Integer key and in array part? |
4077 | | lwzx TMP2, TMP1, TMP0 | 4719 | | lwzx TMP2, TMP1, TMP0 |
4078 | | lbz TMP3, TAB:RB->marked | 4720 | | lbz TMP3, TAB:RB->marked |
4721 | |.if FPU | ||
4079 | | lfdx f14, BASE, RA | 4722 | | lfdx f14, BASE, RA |
4723 | |.else | ||
4724 | | add SAVE1, BASE, RA | ||
4725 | | lwz SAVE0, 0(SAVE1) | ||
4726 | | lwz SAVE1, 4(SAVE1) | ||
4727 | |.endif | ||
4080 | | checknil TMP2; beq >3 | 4728 | | checknil TMP2; beq >3 |
4081 | |1: | 4729 | |1: |
4082 | | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) | 4730 | | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) |
4731 | |.if FPU | ||
4083 | | stfdx f14, TMP1, TMP0 | 4732 | | stfdx f14, TMP1, TMP0 |
4733 | |.else | ||
4734 | | stwux SAVE0, TMP1, TMP0 | ||
4735 | | stw SAVE1, 4(TMP1) | ||
4736 | |.endif | ||
4084 | | bne >7 | 4737 | | bne >7 |
4085 | |2: | 4738 | |2: |
4086 | | ins_next | 4739 | | ins_next |
@@ -4117,11 +4770,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4117 | |->BC_TSETS_Z: | 4770 | |->BC_TSETS_Z: |
4118 | | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 | 4771 | | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 |
4119 | | lwz TMP0, TAB:RB->hmask | 4772 | | lwz TMP0, TAB:RB->hmask |
4120 | | lwz TMP1, STR:RC->hash | 4773 | | lwz TMP1, STR:RC->sid |
4121 | | lwz NODE:TMP2, TAB:RB->node | 4774 | | lwz NODE:TMP2, TAB:RB->node |
4122 | | stb ZERO, TAB:RB->nomm // Clear metamethod cache. | 4775 | | stb ZERO, TAB:RB->nomm // Clear metamethod cache. |
4123 | | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask | 4776 | | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask |
4777 | |.if FPU | ||
4124 | | lfdx f14, BASE, RA | 4778 | | lfdx f14, BASE, RA |
4779 | |.else | ||
4780 | | add CARG2, BASE, RA | ||
4781 | | lwz SAVE0, 0(CARG2) | ||
4782 | | lwz SAVE1, 4(CARG2) | ||
4783 | |.endif | ||
4125 | | slwi TMP0, TMP1, 5 | 4784 | | slwi TMP0, TMP1, 5 |
4126 | | slwi TMP1, TMP1, 3 | 4785 | | slwi TMP1, TMP1, 3 |
4127 | | sub TMP1, TMP0, TMP1 | 4786 | | sub TMP1, TMP0, TMP1 |
@@ -4137,7 +4796,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4137 | | checknil CARG2; beq >4 // Key found, but nil value? | 4796 | | checknil CARG2; beq >4 // Key found, but nil value? |
4138 | |2: | 4797 | |2: |
4139 | | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) | 4798 | | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) |
4799 | |.if FPU | ||
4140 | | stfd f14, NODE:TMP2->val | 4800 | | stfd f14, NODE:TMP2->val |
4801 | |.else | ||
4802 | | stw SAVE0, NODE:TMP2->val.u32.hi | ||
4803 | | stw SAVE1, NODE:TMP2->val.u32.lo | ||
4804 | |.endif | ||
4141 | | bne >7 | 4805 | | bne >7 |
4142 | |3: | 4806 | |3: |
4143 | | ins_next | 4807 | | ins_next |
@@ -4176,7 +4840,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4176 | | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) | 4840 | | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) |
4177 | | // Returns TValue *. | 4841 | | // Returns TValue *. |
4178 | | lp BASE, L->base | 4842 | | lp BASE, L->base |
4843 | |.if FPU | ||
4179 | | stfd f14, 0(CRET1) | 4844 | | stfd f14, 0(CRET1) |
4845 | |.else | ||
4846 | | stw SAVE0, 0(CRET1) | ||
4847 | | stw SAVE1, 4(CRET1) | ||
4848 | |.endif | ||
4180 | | b <3 // No 2nd write barrier needed. | 4849 | | b <3 // No 2nd write barrier needed. |
4181 | | | 4850 | | |
4182 | |7: // Possible table write barrier for the value. Skip valiswhite check. | 4851 | |7: // Possible table write barrier for the value. Skip valiswhite check. |
@@ -4193,13 +4862,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4193 | | lwz TMP2, TAB:RB->array | 4862 | | lwz TMP2, TAB:RB->array |
4194 | | lbz TMP3, TAB:RB->marked | 4863 | | lbz TMP3, TAB:RB->marked |
4195 | | cmplw TMP0, TMP1 | 4864 | | cmplw TMP0, TMP1 |
4865 | |.if FPU | ||
4196 | | lfdx f14, BASE, RA | 4866 | | lfdx f14, BASE, RA |
4867 | |.else | ||
4868 | | add CARG2, BASE, RA | ||
4869 | | lwz SAVE0, 0(CARG2) | ||
4870 | | lwz SAVE1, 4(CARG2) | ||
4871 | |.endif | ||
4197 | | bge ->vmeta_tsetb | 4872 | | bge ->vmeta_tsetb |
4198 | | lwzx TMP1, TMP2, RC | 4873 | | lwzx TMP1, TMP2, RC |
4199 | | checknil TMP1; beq >5 | 4874 | | checknil TMP1; beq >5 |
4200 | |1: | 4875 | |1: |
4201 | | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) | 4876 | | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) |
4877 | |.if FPU | ||
4202 | | stfdx f14, TMP2, RC | 4878 | | stfdx f14, TMP2, RC |
4879 | |.else | ||
4880 | | stwux SAVE0, RC, TMP2 | ||
4881 | | stw SAVE1, 4(RC) | ||
4882 | |.endif | ||
4203 | | bne >7 | 4883 | | bne >7 |
4204 | |2: | 4884 | |2: |
4205 | | ins_next | 4885 | | ins_next |
@@ -4217,6 +4897,49 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4217 | | barrierback TAB:RB, TMP3, TMP0 | 4897 | | barrierback TAB:RB, TMP3, TMP0 |
4218 | | b <2 | 4898 | | b <2 |
4219 | break; | 4899 | break; |
4900 | case BC_TSETR: | ||
4901 | | // RA = dst*8, RB = table*8, RC = key*8 | ||
4902 | | add RB, BASE, RB | ||
4903 | | lwz TAB:CARG2, 4(RB) | ||
4904 | |.if DUALNUM | ||
4905 | | add RC, BASE, RC | ||
4906 | | lbz TMP3, TAB:CARG2->marked | ||
4907 | | lwz TMP0, TAB:CARG2->asize | ||
4908 | | lwz CARG3, 4(RC) | ||
4909 | | lwz TMP1, TAB:CARG2->array | ||
4910 | |.else | ||
4911 | | lfdx f0, BASE, RC | ||
4912 | | lbz TMP3, TAB:CARG2->marked | ||
4913 | | lwz TMP0, TAB:CARG2->asize | ||
4914 | | toint CARG3, f0 | ||
4915 | | lwz TMP1, TAB:CARG2->array | ||
4916 | |.endif | ||
4917 | | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) | ||
4918 | | bne >7 | ||
4919 | |2: | ||
4920 | | cmplw TMP0, CARG3 | ||
4921 | | slwi TMP2, CARG3, 3 | ||
4922 | |.if FPU | ||
4923 | | lfdx f14, BASE, RA | ||
4924 | |.else | ||
4925 | | lwzux SAVE0, RA, BASE | ||
4926 | | lwz SAVE1, 4(RA) | ||
4927 | |.endif | ||
4928 | | ble ->vmeta_tsetr // In array part? | ||
4929 | | ins_next1 | ||
4930 | |.if FPU | ||
4931 | | stfdx f14, TMP1, TMP2 | ||
4932 | |.else | ||
4933 | | stwux SAVE0, TMP1, TMP2 | ||
4934 | | stw SAVE1, 4(TMP1) | ||
4935 | |.endif | ||
4936 | | ins_next2 | ||
4937 | | | ||
4938 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
4939 | | barrierback TAB:CARG2, TMP3, TMP2 | ||
4940 | | b <2 | ||
4941 | break; | ||
4942 | |||
4220 | 4943 | ||
4221 | case BC_TSETM: | 4944 | case BC_TSETM: |
4222 | | // RA = base*8 (table at base-1), RD = num_const*8 (start index) | 4945 | | // RA = base*8 (table at base-1), RD = num_const*8 (start index) |
@@ -4239,10 +4962,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4239 | | add TMP1, TMP1, TMP0 | 4962 | | add TMP1, TMP1, TMP0 |
4240 | | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) | 4963 | | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) |
4241 | |3: // Copy result slots to table. | 4964 | |3: // Copy result slots to table. |
4965 | |.if FPU | ||
4242 | | lfd f0, 0(RA) | 4966 | | lfd f0, 0(RA) |
4967 | |.else | ||
4968 | | lwz SAVE0, 0(RA) | ||
4969 | | lwz SAVE1, 4(RA) | ||
4970 | |.endif | ||
4243 | | addi RA, RA, 8 | 4971 | | addi RA, RA, 8 |
4244 | | cmpw cr1, RA, TMP2 | 4972 | | cmpw cr1, RA, TMP2 |
4973 | |.if FPU | ||
4245 | | stfd f0, 0(TMP1) | 4974 | | stfd f0, 0(TMP1) |
4975 | |.else | ||
4976 | | stw SAVE0, 0(TMP1) | ||
4977 | | stw SAVE1, 4(TMP1) | ||
4978 | |.endif | ||
4246 | | addi TMP1, TMP1, 8 | 4979 | | addi TMP1, TMP1, 8 |
4247 | | blt cr1, <3 | 4980 | | blt cr1, <3 |
4248 | | bne >7 | 4981 | | bne >7 |
@@ -4309,9 +5042,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4309 | | beq cr1, >3 | 5042 | | beq cr1, >3 |
4310 | |2: | 5043 | |2: |
4311 | | addi TMP3, TMP2, 8 | 5044 | | addi TMP3, TMP2, 8 |
5045 | |.if FPU | ||
4312 | | lfdx f0, RA, TMP2 | 5046 | | lfdx f0, RA, TMP2 |
5047 | |.else | ||
5048 | | add CARG3, RA, TMP2 | ||
5049 | | lwz CARG1, 0(CARG3) | ||
5050 | | lwz CARG2, 4(CARG3) | ||
5051 | |.endif | ||
4313 | | cmplw cr1, TMP3, NARGS8:RC | 5052 | | cmplw cr1, TMP3, NARGS8:RC |
5053 | |.if FPU | ||
4314 | | stfdx f0, BASE, TMP2 | 5054 | | stfdx f0, BASE, TMP2 |
5055 | |.else | ||
5056 | | stwux CARG1, TMP2, BASE | ||
5057 | | stw CARG2, 4(TMP2) | ||
5058 | |.endif | ||
4315 | | mr TMP2, TMP3 | 5059 | | mr TMP2, TMP3 |
4316 | | bne cr1, <2 | 5060 | | bne cr1, <2 |
4317 | |3: | 5061 | |3: |
@@ -4344,14 +5088,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4344 | | add BASE, BASE, RA | 5088 | | add BASE, BASE, RA |
4345 | | lwz TMP1, -24(BASE) | 5089 | | lwz TMP1, -24(BASE) |
4346 | | lwz LFUNC:RB, -20(BASE) | 5090 | | lwz LFUNC:RB, -20(BASE) |
5091 | |.if FPU | ||
4347 | | lfd f1, -8(BASE) | 5092 | | lfd f1, -8(BASE) |
4348 | | lfd f0, -16(BASE) | 5093 | | lfd f0, -16(BASE) |
5094 | |.else | ||
5095 | | lwz CARG1, -8(BASE) | ||
5096 | | lwz CARG2, -4(BASE) | ||
5097 | | lwz CARG3, -16(BASE) | ||
5098 | | lwz CARG4, -12(BASE) | ||
5099 | |.endif | ||
4349 | | stw TMP1, 0(BASE) // Copy callable. | 5100 | | stw TMP1, 0(BASE) // Copy callable. |
4350 | | stw LFUNC:RB, 4(BASE) | 5101 | | stw LFUNC:RB, 4(BASE) |
4351 | | checkfunc TMP1 | 5102 | | checkfunc TMP1 |
4352 | | stfd f1, 16(BASE) // Copy control var. | ||
4353 | | li NARGS8:RC, 16 // Iterators get 2 arguments. | 5103 | | li NARGS8:RC, 16 // Iterators get 2 arguments. |
5104 | |.if FPU | ||
5105 | | stfd f1, 16(BASE) // Copy control var. | ||
4354 | | stfdu f0, 8(BASE) // Copy state. | 5106 | | stfdu f0, 8(BASE) // Copy state. |
5107 | |.else | ||
5108 | | stw CARG1, 16(BASE) // Copy control var. | ||
5109 | | stw CARG2, 20(BASE) | ||
5110 | | stwu CARG3, 8(BASE) // Copy state. | ||
5111 | | stw CARG4, 4(BASE) | ||
5112 | |.endif | ||
4355 | | bne ->vmeta_call | 5113 | | bne ->vmeta_call |
4356 | | ins_call | 5114 | | ins_call |
4357 | break; | 5115 | break; |
@@ -4359,8 +5117,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4359 | case BC_ITERN: | 5117 | case BC_ITERN: |
4360 | | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) | 5118 | | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) |
4361 | |.if JIT | 5119 | |.if JIT |
4362 | | // NYI: add hotloop, record BC_ITERN. | 5120 | | // NYI on big-endian |
4363 | |.endif | 5121 | |.endif |
5122 | |->vm_IITERN: | ||
4364 | | add RA, BASE, RA | 5123 | | add RA, BASE, RA |
4365 | | lwz TAB:RB, -12(RA) | 5124 | | lwz TAB:RB, -12(RA) |
4366 | | lwz RC, -4(RA) // Get index from control var. | 5125 | | lwz RC, -4(RA) // Get index from control var. |
@@ -4372,7 +5131,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4372 | | slwi TMP3, RC, 3 | 5131 | | slwi TMP3, RC, 3 |
4373 | | bge >5 // Index points after array part? | 5132 | | bge >5 // Index points after array part? |
4374 | | lwzx TMP2, TMP1, TMP3 | 5133 | | lwzx TMP2, TMP1, TMP3 |
5134 | |.if FPU | ||
4375 | | lfdx f0, TMP1, TMP3 | 5135 | | lfdx f0, TMP1, TMP3 |
5136 | |.else | ||
5137 | | lwzux CARG1, TMP3, TMP1 | ||
5138 | | lwz CARG2, 4(TMP3) | ||
5139 | |.endif | ||
4376 | | checknil TMP2 | 5140 | | checknil TMP2 |
4377 | | lwz INS, -4(PC) | 5141 | | lwz INS, -4(PC) |
4378 | | beq >4 | 5142 | | beq >4 |
@@ -4384,7 +5148,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4384 | |.endif | 5148 | |.endif |
4385 | | addi RC, RC, 1 | 5149 | | addi RC, RC, 1 |
4386 | | addis TMP3, PC, -(BCBIAS_J*4 >> 16) | 5150 | | addis TMP3, PC, -(BCBIAS_J*4 >> 16) |
5151 | |.if FPU | ||
4387 | | stfd f0, 8(RA) | 5152 | | stfd f0, 8(RA) |
5153 | |.else | ||
5154 | | stw CARG1, 8(RA) | ||
5155 | | stw CARG2, 12(RA) | ||
5156 | |.endif | ||
4388 | | decode_RD4 TMP1, INS | 5157 | | decode_RD4 TMP1, INS |
4389 | | stw RC, -4(RA) // Update control var. | 5158 | | stw RC, -4(RA) // Update control var. |
4390 | | add PC, TMP1, TMP3 | 5159 | | add PC, TMP1, TMP3 |
@@ -4409,17 +5178,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4409 | | slwi RB, RC, 3 | 5178 | | slwi RB, RC, 3 |
4410 | | sub TMP3, TMP3, RB | 5179 | | sub TMP3, TMP3, RB |
4411 | | lwzx RB, TMP2, TMP3 | 5180 | | lwzx RB, TMP2, TMP3 |
5181 | |.if FPU | ||
4412 | | lfdx f0, TMP2, TMP3 | 5182 | | lfdx f0, TMP2, TMP3 |
5183 | |.else | ||
5184 | | add CARG3, TMP2, TMP3 | ||
5185 | | lwz CARG1, 0(CARG3) | ||
5186 | | lwz CARG2, 4(CARG3) | ||
5187 | |.endif | ||
4413 | | add NODE:TMP3, TMP2, TMP3 | 5188 | | add NODE:TMP3, TMP2, TMP3 |
4414 | | checknil RB | 5189 | | checknil RB |
4415 | | lwz INS, -4(PC) | 5190 | | lwz INS, -4(PC) |
4416 | | beq >7 | 5191 | | beq >7 |
5192 | |.if FPU | ||
4417 | | lfd f1, NODE:TMP3->key | 5193 | | lfd f1, NODE:TMP3->key |
5194 | |.else | ||
5195 | | lwz CARG3, NODE:TMP3->key.u32.hi | ||
5196 | | lwz CARG4, NODE:TMP3->key.u32.lo | ||
5197 | |.endif | ||
4418 | | addis TMP2, PC, -(BCBIAS_J*4 >> 16) | 5198 | | addis TMP2, PC, -(BCBIAS_J*4 >> 16) |
5199 | |.if FPU | ||
4419 | | stfd f0, 8(RA) | 5200 | | stfd f0, 8(RA) |
5201 | |.else | ||
5202 | | stw CARG1, 8(RA) | ||
5203 | | stw CARG2, 12(RA) | ||
5204 | |.endif | ||
4420 | | add RC, RC, TMP0 | 5205 | | add RC, RC, TMP0 |
4421 | | decode_RD4 TMP1, INS | 5206 | | decode_RD4 TMP1, INS |
5207 | |.if FPU | ||
4422 | | stfd f1, 0(RA) | 5208 | | stfd f1, 0(RA) |
5209 | |.else | ||
5210 | | stw CARG3, 0(RA) | ||
5211 | | stw CARG4, 4(RA) | ||
5212 | |.endif | ||
4423 | | addi RC, RC, 1 | 5213 | | addi RC, RC, 1 |
4424 | | add PC, TMP1, TMP2 | 5214 | | add PC, TMP1, TMP2 |
4425 | | stw RC, -4(RA) // Update control var. | 5215 | | stw RC, -4(RA) // Update control var. |
@@ -4448,8 +5238,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4448 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq | 5238 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq |
4449 | | add TMP3, PC, TMP0 | 5239 | | add TMP3, PC, TMP0 |
4450 | | bne cr0, >5 | 5240 | | bne cr0, >5 |
4451 | | lus TMP1, 0xfffe | 5241 | | lus TMP1, (LJ_KEYINDEX >> 16) |
4452 | | ori TMP1, TMP1, 0x7fff | 5242 | | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff) |
4453 | | stw ZERO, -4(RA) // Initialize control var. | 5243 | | stw ZERO, -4(RA) // Initialize control var. |
4454 | | stw TMP1, -8(RA) | 5244 | | stw TMP1, -8(RA) |
4455 | | addis PC, TMP3, -(BCBIAS_J*4 >> 16) | 5245 | | addis PC, TMP3, -(BCBIAS_J*4 >> 16) |
@@ -4460,6 +5250,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4460 | | li TMP1, BC_ITERC | 5250 | | li TMP1, BC_ITERC |
4461 | | stb TMP0, -1(PC) | 5251 | | stb TMP0, -1(PC) |
4462 | | addis PC, TMP3, -(BCBIAS_J*4 >> 16) | 5252 | | addis PC, TMP3, -(BCBIAS_J*4 >> 16) |
5253 | | // NYI on big-endian: unpatch JLOOP. | ||
4463 | | stb TMP1, 3(PC) | 5254 | | stb TMP1, 3(PC) |
4464 | | b <1 | 5255 | | b <1 |
4465 | break; | 5256 | break; |
@@ -4485,9 +5276,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4485 | | subi TMP2, TMP2, 16 | 5276 | | subi TMP2, TMP2, 16 |
4486 | | ble >2 // No vararg slots? | 5277 | | ble >2 // No vararg slots? |
4487 | |1: // Copy vararg slots to destination slots. | 5278 | |1: // Copy vararg slots to destination slots. |
5279 | |.if FPU | ||
4488 | | lfd f0, 0(RC) | 5280 | | lfd f0, 0(RC) |
5281 | |.else | ||
5282 | | lwz CARG1, 0(RC) | ||
5283 | | lwz CARG2, 4(RC) | ||
5284 | |.endif | ||
4489 | | addi RC, RC, 8 | 5285 | | addi RC, RC, 8 |
5286 | |.if FPU | ||
4490 | | stfd f0, 0(RA) | 5287 | | stfd f0, 0(RA) |
5288 | |.else | ||
5289 | | stw CARG1, 0(RA) | ||
5290 | | stw CARG2, 4(RA) | ||
5291 | |.endif | ||
4491 | | cmplw RA, TMP2 | 5292 | | cmplw RA, TMP2 |
4492 | | cmplw cr1, RC, TMP3 | 5293 | | cmplw cr1, RC, TMP3 |
4493 | | bge >3 // All destination slots filled? | 5294 | | bge >3 // All destination slots filled? |
@@ -4510,9 +5311,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4510 | | addi MULTRES, TMP1, 8 | 5311 | | addi MULTRES, TMP1, 8 |
4511 | | bgt >7 | 5312 | | bgt >7 |
4512 | |6: | 5313 | |6: |
5314 | |.if FPU | ||
4513 | | lfd f0, 0(RC) | 5315 | | lfd f0, 0(RC) |
5316 | |.else | ||
5317 | | lwz CARG1, 0(RC) | ||
5318 | | lwz CARG2, 4(RC) | ||
5319 | |.endif | ||
4514 | | addi RC, RC, 8 | 5320 | | addi RC, RC, 8 |
5321 | |.if FPU | ||
4515 | | stfd f0, 0(RA) | 5322 | | stfd f0, 0(RA) |
5323 | |.else | ||
5324 | | stw CARG1, 0(RA) | ||
5325 | | stw CARG2, 4(RA) | ||
5326 | |.endif | ||
4516 | | cmplw RC, TMP3 | 5327 | | cmplw RC, TMP3 |
4517 | | addi RA, RA, 8 | 5328 | | addi RA, RA, 8 |
4518 | | blt <6 // More vararg slots? | 5329 | | blt <6 // More vararg slots? |
@@ -4563,14 +5374,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4563 | | li TMP1, 0 | 5374 | | li TMP1, 0 |
4564 | |2: | 5375 | |2: |
4565 | | addi TMP3, TMP1, 8 | 5376 | | addi TMP3, TMP1, 8 |
5377 | |.if FPU | ||
4566 | | lfdx f0, RA, TMP1 | 5378 | | lfdx f0, RA, TMP1 |
5379 | |.else | ||
5380 | | add CARG3, RA, TMP1 | ||
5381 | | lwz CARG1, 0(CARG3) | ||
5382 | | lwz CARG2, 4(CARG3) | ||
5383 | |.endif | ||
4567 | | cmpw TMP3, RC | 5384 | | cmpw TMP3, RC |
5385 | |.if FPU | ||
4568 | | stfdx f0, TMP2, TMP1 | 5386 | | stfdx f0, TMP2, TMP1 |
5387 | |.else | ||
5388 | | add CARG3, TMP2, TMP1 | ||
5389 | | stw CARG1, 0(CARG3) | ||
5390 | | stw CARG2, 4(CARG3) | ||
5391 | |.endif | ||
4569 | | beq >3 | 5392 | | beq >3 |
4570 | | addi TMP1, TMP3, 8 | 5393 | | addi TMP1, TMP3, 8 |
5394 | |.if FPU | ||
4571 | | lfdx f1, RA, TMP3 | 5395 | | lfdx f1, RA, TMP3 |
5396 | |.else | ||
5397 | | add CARG3, RA, TMP3 | ||
5398 | | lwz CARG1, 0(CARG3) | ||
5399 | | lwz CARG2, 4(CARG3) | ||
5400 | |.endif | ||
4572 | | cmpw TMP1, RC | 5401 | | cmpw TMP1, RC |
5402 | |.if FPU | ||
4573 | | stfdx f1, TMP2, TMP3 | 5403 | | stfdx f1, TMP2, TMP3 |
5404 | |.else | ||
5405 | | add CARG3, TMP2, TMP3 | ||
5406 | | stw CARG1, 0(CARG3) | ||
5407 | | stw CARG2, 4(CARG3) | ||
5408 | |.endif | ||
4574 | | bne <2 | 5409 | | bne <2 |
4575 | |3: | 5410 | |3: |
4576 | |5: | 5411 | |5: |
@@ -4612,8 +5447,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4612 | | subi TMP2, BASE, 8 | 5447 | | subi TMP2, BASE, 8 |
4613 | | decode_RB8 RB, INS | 5448 | | decode_RB8 RB, INS |
4614 | if (op == BC_RET1) { | 5449 | if (op == BC_RET1) { |
5450 | |.if FPU | ||
4615 | | lfd f0, 0(RA) | 5451 | | lfd f0, 0(RA) |
4616 | | stfd f0, 0(TMP2) | 5452 | | stfd f0, 0(TMP2) |
5453 | |.else | ||
5454 | | lwz CARG1, 0(RA) | ||
5455 | | lwz CARG2, 4(RA) | ||
5456 | | stw CARG1, 0(TMP2) | ||
5457 | | stw CARG2, 4(TMP2) | ||
5458 | |.endif | ||
4617 | } | 5459 | } |
4618 | |5: | 5460 | |5: |
4619 | | cmplw RB, RD | 5461 | | cmplw RB, RD |
@@ -4674,11 +5516,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4674 | |4: | 5516 | |4: |
4675 | | stw CARG1, FORL_IDX*8+4(RA) | 5517 | | stw CARG1, FORL_IDX*8+4(RA) |
4676 | } else { | 5518 | } else { |
4677 | | lwz TMP3, FORL_STEP*8(RA) | 5519 | | lwz SAVE0, FORL_STEP*8(RA) |
4678 | | lwz CARG3, FORL_STEP*8+4(RA) | 5520 | | lwz CARG3, FORL_STEP*8+4(RA) |
4679 | | lwz TMP2, FORL_STOP*8(RA) | 5521 | | lwz TMP2, FORL_STOP*8(RA) |
4680 | | lwz CARG2, FORL_STOP*8+4(RA) | 5522 | | lwz CARG2, FORL_STOP*8+4(RA) |
4681 | | cmplw cr7, TMP3, TISNUM | 5523 | | cmplw cr7, SAVE0, TISNUM |
4682 | | cmplw cr1, TMP2, TISNUM | 5524 | | cmplw cr1, TMP2, TISNUM |
4683 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq | 5525 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq |
4684 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq | 5526 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq |
@@ -4721,41 +5563,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4721 | if (vk) { | 5563 | if (vk) { |
4722 | |.if DUALNUM | 5564 | |.if DUALNUM |
4723 | |9: // FP loop. | 5565 | |9: // FP loop. |
5566 | |.if FPU | ||
4724 | | lfd f1, FORL_IDX*8(RA) | 5567 | | lfd f1, FORL_IDX*8(RA) |
4725 | |.else | 5568 | |.else |
5569 | | lwz CARG1, FORL_IDX*8(RA) | ||
5570 | | lwz CARG2, FORL_IDX*8+4(RA) | ||
5571 | |.endif | ||
5572 | |.else | ||
4726 | | lfdux f1, RA, BASE | 5573 | | lfdux f1, RA, BASE |
4727 | |.endif | 5574 | |.endif |
5575 | |.if FPU | ||
4728 | | lfd f3, FORL_STEP*8(RA) | 5576 | | lfd f3, FORL_STEP*8(RA) |
4729 | | lfd f2, FORL_STOP*8(RA) | 5577 | | lfd f2, FORL_STOP*8(RA) |
4730 | | lwz TMP3, FORL_STEP*8(RA) | ||
4731 | | fadd f1, f1, f3 | 5578 | | fadd f1, f1, f3 |
4732 | | stfd f1, FORL_IDX*8(RA) | 5579 | | stfd f1, FORL_IDX*8(RA) |
5580 | |.else | ||
5581 | | lwz CARG3, FORL_STEP*8(RA) | ||
5582 | | lwz CARG4, FORL_STEP*8+4(RA) | ||
5583 | | mr SAVE1, RD | ||
5584 | | blex __adddf3 | ||
5585 | | mr RD, SAVE1 | ||
5586 | | stw CRET1, FORL_IDX*8(RA) | ||
5587 | | stw CRET2, FORL_IDX*8+4(RA) | ||
5588 | | lwz CARG3, FORL_STOP*8(RA) | ||
5589 | | lwz CARG4, FORL_STOP*8+4(RA) | ||
5590 | |.endif | ||
5591 | | lwz SAVE0, FORL_STEP*8(RA) | ||
4733 | } else { | 5592 | } else { |
4734 | |.if DUALNUM | 5593 | |.if DUALNUM |
4735 | |9: // FP loop. | 5594 | |9: // FP loop. |
4736 | |.else | 5595 | |.else |
4737 | | lwzux TMP1, RA, BASE | 5596 | | lwzux TMP1, RA, BASE |
4738 | | lwz TMP3, FORL_STEP*8(RA) | 5597 | | lwz SAVE0, FORL_STEP*8(RA) |
4739 | | lwz TMP2, FORL_STOP*8(RA) | 5598 | | lwz TMP2, FORL_STOP*8(RA) |
4740 | | cmplw cr0, TMP1, TISNUM | 5599 | | cmplw cr0, TMP1, TISNUM |
4741 | | cmplw cr7, TMP3, TISNUM | 5600 | | cmplw cr7, SAVE0, TISNUM |
4742 | | cmplw cr1, TMP2, TISNUM | 5601 | | cmplw cr1, TMP2, TISNUM |
4743 | |.endif | 5602 | |.endif |
5603 | |.if FPU | ||
4744 | | lfd f1, FORL_IDX*8(RA) | 5604 | | lfd f1, FORL_IDX*8(RA) |
5605 | |.else | ||
5606 | | lwz CARG1, FORL_IDX*8(RA) | ||
5607 | | lwz CARG2, FORL_IDX*8+4(RA) | ||
5608 | |.endif | ||
4745 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt | 5609 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt |
4746 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | 5610 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt |
5611 | |.if FPU | ||
4747 | | lfd f2, FORL_STOP*8(RA) | 5612 | | lfd f2, FORL_STOP*8(RA) |
5613 | |.else | ||
5614 | | lwz CARG3, FORL_STOP*8(RA) | ||
5615 | | lwz CARG4, FORL_STOP*8+4(RA) | ||
5616 | |.endif | ||
4748 | | bge ->vmeta_for | 5617 | | bge ->vmeta_for |
4749 | } | 5618 | } |
4750 | | cmpwi cr6, TMP3, 0 | 5619 | | cmpwi cr6, SAVE0, 0 |
4751 | if (op != BC_JFORL) { | 5620 | if (op != BC_JFORL) { |
4752 | | srwi RD, RD, 1 | 5621 | | srwi RD, RD, 1 |
4753 | } | 5622 | } |
5623 | |.if FPU | ||
4754 | | stfd f1, FORL_EXT*8(RA) | 5624 | | stfd f1, FORL_EXT*8(RA) |
5625 | |.else | ||
5626 | | stw CARG1, FORL_EXT*8(RA) | ||
5627 | | stw CARG2, FORL_EXT*8+4(RA) | ||
5628 | |.endif | ||
4755 | if (op != BC_JFORL) { | 5629 | if (op != BC_JFORL) { |
4756 | | add RD, PC, RD | 5630 | | add RD, PC, RD |
4757 | } | 5631 | } |
5632 | |.if FPU | ||
4758 | | fcmpu cr0, f1, f2 | 5633 | | fcmpu cr0, f1, f2 |
5634 | |.else | ||
5635 | | mr SAVE1, RD | ||
5636 | | blex __ledf2 | ||
5637 | | cmpwi CRET1, 0 | ||
5638 | | mr RD, SAVE1 | ||
5639 | |.endif | ||
4759 | if (op == BC_JFORI) { | 5640 | if (op == BC_JFORI) { |
4760 | | addis PC, RD, -(BCBIAS_J*4 >> 16) | 5641 | | addis PC, RD, -(BCBIAS_J*4 >> 16) |
4761 | } | 5642 | } |
@@ -4858,8 +5739,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4858 | | lp TMP2, TRACE:TMP2->mcode | 5739 | | lp TMP2, TRACE:TMP2->mcode |
4859 | | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) | 5740 | | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) |
4860 | | mtctr TMP2 | 5741 | | mtctr TMP2 |
4861 | | stw L, DISPATCH_GL(jit_L)(DISPATCH) | ||
4862 | | addi JGL, DISPATCH, GG_DISP2G+32768 | 5742 | | addi JGL, DISPATCH, GG_DISP2G+32768 |
5743 | | stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH) | ||
4863 | | bctr | 5744 | | bctr |
4864 | |.endif | 5745 | |.endif |
4865 | break; | 5746 | break; |
@@ -4994,6 +5875,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4994 | | lp TMP1, L->top | 5875 | | lp TMP1, L->top |
4995 | | li_vmstate INTERP | 5876 | | li_vmstate INTERP |
4996 | | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. | 5877 | | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. |
5878 | | stw L, DISPATCH_GL(cur_L)(DISPATCH) | ||
4997 | | sub RA, TMP1, RD // RA = L->top - nresults*8 | 5879 | | sub RA, TMP1, RD // RA = L->top - nresults*8 |
4998 | | st_vmstate | 5880 | | st_vmstate |
4999 | | b ->vm_returnc | 5881 | | b ->vm_returnc |
diff --git a/src/vm_ppcspe.dasc b/src/vm_ppcspe.dasc deleted file mode 100644 index 1d8f70f0..00000000 --- a/src/vm_ppcspe.dasc +++ /dev/null | |||
@@ -1,3691 +0,0 @@ | |||
1 | |// Low-level VM code for PowerPC/e500 CPUs. | ||
2 | |// Bytecode interpreter, fast functions and helper functions. | ||
3 | |// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | | | ||
5 | |.arch ppc | ||
6 | |.section code_op, code_sub | ||
7 | | | ||
8 | |.actionlist build_actionlist | ||
9 | |.globals GLOB_ | ||
10 | |.globalnames globnames | ||
11 | |.externnames extnames | ||
12 | | | ||
13 | |// Note: The ragged indentation of the instructions is intentional. | ||
14 | |// The starting columns indicate data dependencies. | ||
15 | | | ||
16 | |//----------------------------------------------------------------------- | ||
17 | | | ||
18 | |// Fixed register assignments for the interpreter. | ||
19 | |// Don't use: r1 = sp, r2 and r13 = reserved and/or small data area ptr | ||
20 | | | ||
21 | |// The following must be C callee-save (but BASE is often refetched). | ||
22 | |.define BASE, r14 // Base of current Lua stack frame. | ||
23 | |.define KBASE, r15 // Constants of current Lua function. | ||
24 | |.define PC, r16 // Next PC. | ||
25 | |.define DISPATCH, r17 // Opcode dispatch table. | ||
26 | |.define LREG, r18 // Register holding lua_State (also in SAVE_L). | ||
27 | |.define MULTRES, r19 // Size of multi-result: (nresults+1)*8. | ||
28 | | | ||
29 | |// Constants for vectorized type-comparisons (hi+low GPR). C callee-save. | ||
30 | |.define TISNUM, r22 | ||
31 | |.define TISSTR, r23 | ||
32 | |.define TISTAB, r24 | ||
33 | |.define TISFUNC, r25 | ||
34 | |.define TISNIL, r26 | ||
35 | |.define TOBIT, r27 | ||
36 | |.define ZERO, TOBIT // Zero in lo word. | ||
37 | | | ||
38 | |// The following temporaries are not saved across C calls, except for RA. | ||
39 | |.define RA, r20 // Callee-save. | ||
40 | |.define RB, r10 | ||
41 | |.define RC, r11 | ||
42 | |.define RD, r12 | ||
43 | |.define INS, r7 // Overlaps CARG5. | ||
44 | | | ||
45 | |.define TMP0, r0 | ||
46 | |.define TMP1, r8 | ||
47 | |.define TMP2, r9 | ||
48 | |.define TMP3, r6 // Overlaps CARG4. | ||
49 | | | ||
50 | |// Saved temporaries. | ||
51 | |.define SAVE0, r21 | ||
52 | | | ||
53 | |// Calling conventions. | ||
54 | |.define CARG1, r3 | ||
55 | |.define CARG2, r4 | ||
56 | |.define CARG3, r5 | ||
57 | |.define CARG4, r6 // Overlaps TMP3. | ||
58 | |.define CARG5, r7 // Overlaps INS. | ||
59 | | | ||
60 | |.define CRET1, r3 | ||
61 | |.define CRET2, r4 | ||
62 | | | ||
63 | |// Stack layout while in interpreter. Must match with lj_frame.h. | ||
64 | |.define SAVE_LR, 188(sp) | ||
65 | |.define CFRAME_SPACE, 184 // Delta for sp. | ||
66 | |// Back chain for sp: 184(sp) <-- sp entering interpreter | ||
67 | |.define SAVE_r31, 176(sp) // 64 bit register saves. | ||
68 | |.define SAVE_r30, 168(sp) | ||
69 | |.define SAVE_r29, 160(sp) | ||
70 | |.define SAVE_r28, 152(sp) | ||
71 | |.define SAVE_r27, 144(sp) | ||
72 | |.define SAVE_r26, 136(sp) | ||
73 | |.define SAVE_r25, 128(sp) | ||
74 | |.define SAVE_r24, 120(sp) | ||
75 | |.define SAVE_r23, 112(sp) | ||
76 | |.define SAVE_r22, 104(sp) | ||
77 | |.define SAVE_r21, 96(sp) | ||
78 | |.define SAVE_r20, 88(sp) | ||
79 | |.define SAVE_r19, 80(sp) | ||
80 | |.define SAVE_r18, 72(sp) | ||
81 | |.define SAVE_r17, 64(sp) | ||
82 | |.define SAVE_r16, 56(sp) | ||
83 | |.define SAVE_r15, 48(sp) | ||
84 | |.define SAVE_r14, 40(sp) | ||
85 | |.define SAVE_CR, 36(sp) | ||
86 | |.define UNUSED1, 32(sp) | ||
87 | |.define SAVE_ERRF, 28(sp) // 32 bit C frame info. | ||
88 | |.define SAVE_NRES, 24(sp) | ||
89 | |.define SAVE_CFRAME, 20(sp) | ||
90 | |.define SAVE_L, 16(sp) | ||
91 | |.define SAVE_PC, 12(sp) | ||
92 | |.define SAVE_MULTRES, 8(sp) | ||
93 | |// Next frame lr: 4(sp) | ||
94 | |// Back chain for sp: 0(sp) <-- sp while in interpreter | ||
95 | | | ||
96 | |.macro save_, reg; evstdd reg, SAVE_..reg; .endmacro | ||
97 | |.macro rest_, reg; evldd reg, SAVE_..reg; .endmacro | ||
98 | | | ||
99 | |.macro saveregs | ||
100 | | stwu sp, -CFRAME_SPACE(sp) | ||
101 | | save_ r14; save_ r15; save_ r16; save_ r17; save_ r18; save_ r19 | ||
102 | | mflr r0; mfcr r12 | ||
103 | | save_ r20; save_ r21; save_ r22; save_ r23; save_ r24; save_ r25 | ||
104 | | stw r0, SAVE_LR; stw r12, SAVE_CR | ||
105 | | save_ r26; save_ r27; save_ r28; save_ r29; save_ r30; save_ r31 | ||
106 | |.endmacro | ||
107 | | | ||
108 | |.macro restoreregs | ||
109 | | lwz r0, SAVE_LR; lwz r12, SAVE_CR | ||
110 | | rest_ r14; rest_ r15; rest_ r16; rest_ r17; rest_ r18; rest_ r19 | ||
111 | | mtlr r0; mtcrf 0x38, r12 | ||
112 | | rest_ r20; rest_ r21; rest_ r22; rest_ r23; rest_ r24; rest_ r25 | ||
113 | | rest_ r26; rest_ r27; rest_ r28; rest_ r29; rest_ r30; rest_ r31 | ||
114 | | addi sp, sp, CFRAME_SPACE | ||
115 | |.endmacro | ||
116 | | | ||
117 | |// Type definitions. Some of these are only used for documentation. | ||
118 | |.type L, lua_State, LREG | ||
119 | |.type GL, global_State | ||
120 | |.type TVALUE, TValue | ||
121 | |.type GCOBJ, GCobj | ||
122 | |.type STR, GCstr | ||
123 | |.type TAB, GCtab | ||
124 | |.type LFUNC, GCfuncL | ||
125 | |.type CFUNC, GCfuncC | ||
126 | |.type PROTO, GCproto | ||
127 | |.type UPVAL, GCupval | ||
128 | |.type NODE, Node | ||
129 | |.type NARGS8, int | ||
130 | |.type TRACE, GCtrace | ||
131 | | | ||
132 | |//----------------------------------------------------------------------- | ||
133 | | | ||
134 | |// These basic macros should really be part of DynASM. | ||
135 | |.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro | ||
136 | |.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro | ||
137 | |.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro | ||
138 | |.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro | ||
139 | |.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro | ||
140 | | | ||
141 | |// Trap for not-yet-implemented parts. | ||
142 | |.macro NYI; tw 4, sp, sp; .endmacro | ||
143 | | | ||
144 | |//----------------------------------------------------------------------- | ||
145 | | | ||
146 | |// Access to frame relative to BASE. | ||
147 | |.define FRAME_PC, -8 | ||
148 | |.define FRAME_FUNC, -4 | ||
149 | | | ||
150 | |// Instruction decode. | ||
151 | |.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro | ||
152 | |.macro decode_RA8, dst, ins; rlwinm dst, ins, 27, 21, 28; .endmacro | ||
153 | |.macro decode_RB8, dst, ins; rlwinm dst, ins, 11, 21, 28; .endmacro | ||
154 | |.macro decode_RC8, dst, ins; rlwinm dst, ins, 19, 21, 28; .endmacro | ||
155 | |.macro decode_RD8, dst, ins; rlwinm dst, ins, 19, 13, 28; .endmacro | ||
156 | | | ||
157 | |.macro decode_OP1, dst, ins; rlwinm dst, ins, 0, 24, 31; .endmacro | ||
158 | |.macro decode_RD4, dst, ins; rlwinm dst, ins, 18, 14, 29; .endmacro | ||
159 | | | ||
160 | |// Instruction fetch. | ||
161 | |.macro ins_NEXT1 | ||
162 | | lwz INS, 0(PC) | ||
163 | | addi PC, PC, 4 | ||
164 | |.endmacro | ||
165 | |// Instruction decode+dispatch. | ||
166 | |.macro ins_NEXT2 | ||
167 | | decode_OP4 TMP1, INS | ||
168 | | decode_RB8 RB, INS | ||
169 | | decode_RD8 RD, INS | ||
170 | | lwzx TMP0, DISPATCH, TMP1 | ||
171 | | decode_RA8 RA, INS | ||
172 | | decode_RC8 RC, INS | ||
173 | | mtctr TMP0 | ||
174 | | bctr | ||
175 | |.endmacro | ||
176 | |.macro ins_NEXT | ||
177 | | ins_NEXT1 | ||
178 | | ins_NEXT2 | ||
179 | |.endmacro | ||
180 | | | ||
181 | |// Instruction footer. | ||
182 | |.if 1 | ||
183 | | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. | ||
184 | | .define ins_next, ins_NEXT | ||
185 | | .define ins_next_, ins_NEXT | ||
186 | | .define ins_next1, ins_NEXT1 | ||
187 | | .define ins_next2, ins_NEXT2 | ||
188 | |.else | ||
189 | | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. | ||
190 | | // Affects only certain kinds of benchmarks (and only with -j off). | ||
191 | | .macro ins_next | ||
192 | | b ->ins_next | ||
193 | | .endmacro | ||
194 | | .macro ins_next1 | ||
195 | | .endmacro | ||
196 | | .macro ins_next2 | ||
197 | | b ->ins_next | ||
198 | | .endmacro | ||
199 | | .macro ins_next_ | ||
200 | | ->ins_next: | ||
201 | | ins_NEXT | ||
202 | | .endmacro | ||
203 | |.endif | ||
204 | | | ||
205 | |// Call decode and dispatch. | ||
206 | |.macro ins_callt | ||
207 | | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC | ||
208 | | lwz PC, LFUNC:RB->pc | ||
209 | | lwz INS, 0(PC) | ||
210 | | addi PC, PC, 4 | ||
211 | | decode_OP4 TMP1, INS | ||
212 | | decode_RA8 RA, INS | ||
213 | | lwzx TMP0, DISPATCH, TMP1 | ||
214 | | add RA, RA, BASE | ||
215 | | mtctr TMP0 | ||
216 | | bctr | ||
217 | |.endmacro | ||
218 | | | ||
219 | |.macro ins_call | ||
220 | | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC | ||
221 | | stw PC, FRAME_PC(BASE) | ||
222 | | ins_callt | ||
223 | |.endmacro | ||
224 | | | ||
225 | |//----------------------------------------------------------------------- | ||
226 | | | ||
227 | |// Macros to test operand types. | ||
228 | |.macro checknum, reg; evcmpltu reg, TISNUM; .endmacro | ||
229 | |.macro checkstr, reg; evcmpeq reg, TISSTR; .endmacro | ||
230 | |.macro checktab, reg; evcmpeq reg, TISTAB; .endmacro | ||
231 | |.macro checkfunc, reg; evcmpeq reg, TISFUNC; .endmacro | ||
232 | |.macro checknil, reg; evcmpeq reg, TISNIL; .endmacro | ||
233 | |.macro checkok, label; blt label; .endmacro | ||
234 | |.macro checkfail, label; bge label; .endmacro | ||
235 | |.macro checkanyfail, label; bns label; .endmacro | ||
236 | |.macro checkallok, label; bso label; .endmacro | ||
237 | | | ||
238 | |.macro branch_RD | ||
239 | | srwi TMP0, RD, 1 | ||
240 | | add PC, PC, TMP0 | ||
241 | | addis PC, PC, -(BCBIAS_J*4 >> 16) | ||
242 | |.endmacro | ||
243 | | | ||
244 | |// Assumes DISPATCH is relative to GL. | ||
245 | #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) | ||
246 | #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) | ||
247 | | | ||
248 | #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) | ||
249 | | | ||
250 | |.macro hotloop | ||
251 | | NYI | ||
252 | |.endmacro | ||
253 | | | ||
254 | |.macro hotcall | ||
255 | | NYI | ||
256 | |.endmacro | ||
257 | | | ||
258 | |// Set current VM state. Uses TMP0. | ||
259 | |.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro | ||
260 | |.macro st_vmstate; stw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro | ||
261 | | | ||
262 | |// Move table write barrier back. Overwrites mark and tmp. | ||
263 | |.macro barrierback, tab, mark, tmp | ||
264 | | lwz tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) | ||
265 | | // Assumes LJ_GC_BLACK is 0x04. | ||
266 | | rlwinm mark, mark, 0, 30, 28 // black2gray(tab) | ||
267 | | stw tab, DISPATCH_GL(gc.grayagain)(DISPATCH) | ||
268 | | stb mark, tab->marked | ||
269 | | stw tmp, tab->gclist | ||
270 | |.endmacro | ||
271 | | | ||
272 | |//----------------------------------------------------------------------- | ||
273 | |||
274 | /* Generate subroutines used by opcodes and other parts of the VM. */ | ||
275 | /* The .code_sub section should be last to help static branch prediction. */ | ||
276 | static void build_subroutines(BuildCtx *ctx) | ||
277 | { | ||
278 | |.code_sub | ||
279 | | | ||
280 | |//----------------------------------------------------------------------- | ||
281 | |//-- Return handling ---------------------------------------------------- | ||
282 | |//----------------------------------------------------------------------- | ||
283 | | | ||
284 | |->vm_returnp: | ||
285 | | // See vm_return. Also: TMP2 = previous base. | ||
286 | | andi. TMP0, PC, FRAME_P | ||
287 | | evsplati TMP1, LJ_TTRUE | ||
288 | | beq ->cont_dispatch | ||
289 | | | ||
290 | | // Return from pcall or xpcall fast func. | ||
291 | | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame. | ||
292 | | mr BASE, TMP2 // Restore caller base. | ||
293 | | // Prepending may overwrite the pcall frame, so do it at the end. | ||
294 | | stwu TMP1, FRAME_PC(RA) // Prepend true to results. | ||
295 | | | ||
296 | |->vm_returnc: | ||
297 | | addi RD, RD, 8 // RD = (nresults+1)*8. | ||
298 | | andi. TMP0, PC, FRAME_TYPE | ||
299 | | cmpwi cr1, RD, 0 | ||
300 | | li CRET1, LUA_YIELD | ||
301 | | beq cr1, ->vm_unwind_c_eh | ||
302 | | mr MULTRES, RD | ||
303 | | beq ->BC_RET_Z // Handle regular return to Lua. | ||
304 | | | ||
305 | |->vm_return: | ||
306 | | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return | ||
307 | | // TMP0 = PC & FRAME_TYPE | ||
308 | | cmpwi TMP0, FRAME_C | ||
309 | | rlwinm TMP2, PC, 0, 0, 28 | ||
310 | | li_vmstate C | ||
311 | | sub TMP2, BASE, TMP2 // TMP2 = previous base. | ||
312 | | bne ->vm_returnp | ||
313 | | | ||
314 | | addic. TMP1, RD, -8 | ||
315 | | stw TMP2, L->base | ||
316 | | lwz TMP2, SAVE_NRES | ||
317 | | subi BASE, BASE, 8 | ||
318 | | st_vmstate | ||
319 | | slwi TMP2, TMP2, 3 | ||
320 | | beq >2 | ||
321 | |1: | ||
322 | | addic. TMP1, TMP1, -8 | ||
323 | | evldd TMP0, 0(RA) | ||
324 | | addi RA, RA, 8 | ||
325 | | evstdd TMP0, 0(BASE) | ||
326 | | addi BASE, BASE, 8 | ||
327 | | bne <1 | ||
328 | | | ||
329 | |2: | ||
330 | | cmpw TMP2, RD // More/less results wanted? | ||
331 | | bne >6 | ||
332 | |3: | ||
333 | | stw BASE, L->top // Store new top. | ||
334 | | | ||
335 | |->vm_leave_cp: | ||
336 | | lwz TMP0, SAVE_CFRAME // Restore previous C frame. | ||
337 | | li CRET1, 0 // Ok return status for vm_pcall. | ||
338 | | stw TMP0, L->cframe | ||
339 | | | ||
340 | |->vm_leave_unw: | ||
341 | | restoreregs | ||
342 | | blr | ||
343 | | | ||
344 | |6: | ||
345 | | ble >7 // Less results wanted? | ||
346 | | // More results wanted. Check stack size and fill up results with nil. | ||
347 | | lwz TMP1, L->maxstack | ||
348 | | cmplw BASE, TMP1 | ||
349 | | bge >8 | ||
350 | | evstdd TISNIL, 0(BASE) | ||
351 | | addi RD, RD, 8 | ||
352 | | addi BASE, BASE, 8 | ||
353 | | b <2 | ||
354 | | | ||
355 | |7: // Less results wanted. | ||
356 | | sub TMP0, RD, TMP2 | ||
357 | | cmpwi TMP2, 0 // LUA_MULTRET+1 case? | ||
358 | | sub TMP0, BASE, TMP0 // Subtract the difference. | ||
359 | | iseleq BASE, BASE, TMP0 // Either keep top or shrink it. | ||
360 | | b <3 | ||
361 | | | ||
362 | |8: // Corner case: need to grow stack for filling up results. | ||
363 | | // This can happen if: | ||
364 | | // - A C function grows the stack (a lot). | ||
365 | | // - The GC shrinks the stack in between. | ||
366 | | // - A return back from a lua_call() with (high) nresults adjustment. | ||
367 | | stw BASE, L->top // Save current top held in BASE (yes). | ||
368 | | mr SAVE0, RD | ||
369 | | mr CARG2, TMP2 | ||
370 | | mr CARG1, L | ||
371 | | bl extern lj_state_growstack // (lua_State *L, int n) | ||
372 | | lwz TMP2, SAVE_NRES | ||
373 | | mr RD, SAVE0 | ||
374 | | slwi TMP2, TMP2, 3 | ||
375 | | lwz BASE, L->top // Need the (realloced) L->top in BASE. | ||
376 | | b <2 | ||
377 | | | ||
378 | |->vm_unwind_c: // Unwind C stack, return from vm_pcall. | ||
379 | | // (void *cframe, int errcode) | ||
380 | | mr sp, CARG1 | ||
381 | | mr CRET1, CARG2 | ||
382 | |->vm_unwind_c_eh: // Landing pad for external unwinder. | ||
383 | | lwz L, SAVE_L | ||
384 | | li TMP0, ~LJ_VMST_C | ||
385 | | lwz GL:TMP1, L->glref | ||
386 | | stw TMP0, GL:TMP1->vmstate | ||
387 | | b ->vm_leave_unw | ||
388 | | | ||
389 | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. | ||
390 | | // (void *cframe) | ||
391 | | rlwinm sp, CARG1, 0, 0, 29 | ||
392 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. | ||
393 | | lwz L, SAVE_L | ||
394 | | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants. | ||
395 | | evsplati TISFUNC, LJ_TFUNC | ||
396 | | lus TOBIT, 0x4338 | ||
397 | | evsplati TISTAB, LJ_TTAB | ||
398 | | li TMP0, 0 | ||
399 | | lwz BASE, L->base | ||
400 | | evmergelo TOBIT, TOBIT, TMP0 | ||
401 | | lwz DISPATCH, L->glref // Setup pointer to dispatch table. | ||
402 | | evsplati TISSTR, LJ_TSTR | ||
403 | | li TMP1, LJ_TFALSE | ||
404 | | evsplati TISNIL, LJ_TNIL | ||
405 | | li_vmstate INTERP | ||
406 | | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. | ||
407 | | la RA, -8(BASE) // Results start at BASE-8. | ||
408 | | addi DISPATCH, DISPATCH, GG_G2DISP | ||
409 | | stw TMP1, 0(RA) // Prepend false to error message. | ||
410 | | li RD, 16 // 2 results: false + error message. | ||
411 | | st_vmstate | ||
412 | | b ->vm_returnc | ||
413 | | | ||
414 | |//----------------------------------------------------------------------- | ||
415 | |//-- Grow stack for calls ----------------------------------------------- | ||
416 | |//----------------------------------------------------------------------- | ||
417 | | | ||
418 | |->vm_growstack_c: // Grow stack for C function. | ||
419 | | li CARG2, LUA_MINSTACK | ||
420 | | b >2 | ||
421 | | | ||
422 | |->vm_growstack_l: // Grow stack for Lua function. | ||
423 | | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC | ||
424 | | add RC, BASE, RC | ||
425 | | sub RA, RA, BASE | ||
426 | | stw BASE, L->base | ||
427 | | addi PC, PC, 4 // Must point after first instruction. | ||
428 | | stw RC, L->top | ||
429 | | srwi CARG2, RA, 3 | ||
430 | |2: | ||
431 | | // L->base = new base, L->top = top | ||
432 | | stw PC, SAVE_PC | ||
433 | | mr CARG1, L | ||
434 | | bl extern lj_state_growstack // (lua_State *L, int n) | ||
435 | | lwz BASE, L->base | ||
436 | | lwz RC, L->top | ||
437 | | lwz LFUNC:RB, FRAME_FUNC(BASE) | ||
438 | | sub RC, RC, BASE | ||
439 | | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC | ||
440 | | ins_callt // Just retry the call. | ||
441 | | | ||
442 | |//----------------------------------------------------------------------- | ||
443 | |//-- Entry points into the assembler VM --------------------------------- | ||
444 | |//----------------------------------------------------------------------- | ||
445 | | | ||
446 | |->vm_resume: // Setup C frame and resume thread. | ||
447 | | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) | ||
448 | | saveregs | ||
449 | | mr L, CARG1 | ||
450 | | lwz DISPATCH, L->glref // Setup pointer to dispatch table. | ||
451 | | mr BASE, CARG2 | ||
452 | | lbz TMP1, L->status | ||
453 | | stw L, SAVE_L | ||
454 | | li PC, FRAME_CP | ||
455 | | addi TMP0, sp, CFRAME_RESUME | ||
456 | | addi DISPATCH, DISPATCH, GG_G2DISP | ||
457 | | stw CARG3, SAVE_NRES | ||
458 | | cmplwi TMP1, 0 | ||
459 | | stw CARG3, SAVE_ERRF | ||
460 | | stw TMP0, L->cframe | ||
461 | | stw CARG3, SAVE_CFRAME | ||
462 | | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. | ||
463 | | beq >3 | ||
464 | | | ||
465 | | // Resume after yield (like a return). | ||
466 | | mr RA, BASE | ||
467 | | lwz BASE, L->base | ||
468 | | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants. | ||
469 | | lwz TMP1, L->top | ||
470 | | evsplati TISFUNC, LJ_TFUNC | ||
471 | | lus TOBIT, 0x4338 | ||
472 | | evsplati TISTAB, LJ_TTAB | ||
473 | | lwz PC, FRAME_PC(BASE) | ||
474 | | li TMP2, 0 | ||
475 | | evsplati TISSTR, LJ_TSTR | ||
476 | | sub RD, TMP1, BASE | ||
477 | | evmergelo TOBIT, TOBIT, TMP2 | ||
478 | | stb CARG3, L->status | ||
479 | | andi. TMP0, PC, FRAME_TYPE | ||
480 | | li_vmstate INTERP | ||
481 | | addi RD, RD, 8 | ||
482 | | evsplati TISNIL, LJ_TNIL | ||
483 | | mr MULTRES, RD | ||
484 | | st_vmstate | ||
485 | | beq ->BC_RET_Z | ||
486 | | b ->vm_return | ||
487 | | | ||
488 | |->vm_pcall: // Setup protected C frame and enter VM. | ||
489 | | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) | ||
490 | | saveregs | ||
491 | | li PC, FRAME_CP | ||
492 | | stw CARG4, SAVE_ERRF | ||
493 | | b >1 | ||
494 | | | ||
495 | |->vm_call: // Setup C frame and enter VM. | ||
496 | | // (lua_State *L, TValue *base, int nres1) | ||
497 | | saveregs | ||
498 | | li PC, FRAME_C | ||
499 | | | ||
500 | |1: // Entry point for vm_pcall above (PC = ftype). | ||
501 | | lwz TMP1, L:CARG1->cframe | ||
502 | | stw CARG3, SAVE_NRES | ||
503 | | mr L, CARG1 | ||
504 | | stw CARG1, SAVE_L | ||
505 | | mr BASE, CARG2 | ||
506 | | stw sp, L->cframe // Add our C frame to cframe chain. | ||
507 | | lwz DISPATCH, L->glref // Setup pointer to dispatch table. | ||
508 | | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. | ||
509 | | stw TMP1, SAVE_CFRAME | ||
510 | | addi DISPATCH, DISPATCH, GG_G2DISP | ||
511 | | | ||
512 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). | ||
513 | | lwz TMP2, L->base // TMP2 = old base (used in vmeta_call). | ||
514 | | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants. | ||
515 | | lwz TMP1, L->top | ||
516 | | evsplati TISFUNC, LJ_TFUNC | ||
517 | | add PC, PC, BASE | ||
518 | | evsplati TISTAB, LJ_TTAB | ||
519 | | lus TOBIT, 0x4338 | ||
520 | | li TMP0, 0 | ||
521 | | sub PC, PC, TMP2 // PC = frame delta + frame type | ||
522 | | evsplati TISSTR, LJ_TSTR | ||
523 | | sub NARGS8:RC, TMP1, BASE | ||
524 | | evmergelo TOBIT, TOBIT, TMP0 | ||
525 | | li_vmstate INTERP | ||
526 | | evsplati TISNIL, LJ_TNIL | ||
527 | | st_vmstate | ||
528 | | | ||
529 | |->vm_call_dispatch: | ||
530 | | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC | ||
531 | | li TMP0, -8 | ||
532 | | evlddx LFUNC:RB, BASE, TMP0 | ||
533 | | checkfunc LFUNC:RB | ||
534 | | checkfail ->vmeta_call | ||
535 | | | ||
536 | |->vm_call_dispatch_f: | ||
537 | | ins_call | ||
538 | | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC | ||
539 | | | ||
540 | |->vm_cpcall: // Setup protected C frame, call C. | ||
541 | | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) | ||
542 | | saveregs | ||
543 | | mr L, CARG1 | ||
544 | | lwz TMP0, L:CARG1->stack | ||
545 | | stw CARG1, SAVE_L | ||
546 | | lwz TMP1, L->top | ||
547 | | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. | ||
548 | | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). | ||
549 | | lwz TMP1, L->cframe | ||
550 | | stw sp, L->cframe // Add our C frame to cframe chain. | ||
551 | | li TMP2, 0 | ||
552 | | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. | ||
553 | | stw TMP2, SAVE_ERRF // No error function. | ||
554 | | stw TMP1, SAVE_CFRAME | ||
555 | | mtctr CARG4 | ||
556 | | bctrl // (lua_State *L, lua_CFunction func, void *ud) | ||
557 | | mr. BASE, CRET1 | ||
558 | | lwz DISPATCH, L->glref // Setup pointer to dispatch table. | ||
559 | | li PC, FRAME_CP | ||
560 | | addi DISPATCH, DISPATCH, GG_G2DISP | ||
561 | | bne <3 // Else continue with the call. | ||
562 | | b ->vm_leave_cp // No base? Just remove C frame. | ||
563 | | | ||
564 | |//----------------------------------------------------------------------- | ||
565 | |//-- Metamethod handling ------------------------------------------------ | ||
566 | |//----------------------------------------------------------------------- | ||
567 | | | ||
568 | |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the | ||
569 | |// stack, so BASE doesn't need to be reloaded across these calls. | ||
570 | | | ||
571 | |//-- Continuation dispatch ---------------------------------------------- | ||
572 | | | ||
573 | |->cont_dispatch: | ||
574 | | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 | ||
575 | | lwz TMP0, -12(BASE) // Continuation. | ||
576 | | mr RB, BASE | ||
577 | | mr BASE, TMP2 // Restore caller BASE. | ||
578 | | lwz LFUNC:TMP1, FRAME_FUNC(TMP2) | ||
579 | | cmplwi TMP0, 0 | ||
580 | | lwz PC, -16(RB) // Restore PC from [cont|PC]. | ||
581 | | beq >1 | ||
582 | | subi TMP2, RD, 8 | ||
583 | | lwz TMP1, LFUNC:TMP1->pc | ||
584 | | evstddx TISNIL, RA, TMP2 // Ensure one valid arg. | ||
585 | | lwz KBASE, PC2PROTO(k)(TMP1) | ||
586 | | // BASE = base, RA = resultptr, RB = meta base | ||
587 | | mtctr TMP0 | ||
588 | | bctr // Jump to continuation. | ||
589 | | | ||
590 | |1: // Tail call from C function. | ||
591 | | subi TMP1, RB, 16 | ||
592 | | sub RC, TMP1, BASE | ||
593 | | b ->vm_call_tail | ||
594 | | | ||
595 | |->cont_cat: // RA = resultptr, RB = meta base | ||
596 | | lwz INS, -4(PC) | ||
597 | | subi CARG2, RB, 16 | ||
598 | | decode_RB8 SAVE0, INS | ||
599 | | evldd TMP0, 0(RA) | ||
600 | | add TMP1, BASE, SAVE0 | ||
601 | | stw BASE, L->base | ||
602 | | cmplw TMP1, CARG2 | ||
603 | | sub CARG3, CARG2, TMP1 | ||
604 | | decode_RA8 RA, INS | ||
605 | | evstdd TMP0, 0(CARG2) | ||
606 | | bne ->BC_CAT_Z | ||
607 | | evstddx TMP0, BASE, RA | ||
608 | | b ->cont_nop | ||
609 | | | ||
610 | |//-- Table indexing metamethods ----------------------------------------- | ||
611 | | | ||
612 | |->vmeta_tgets1: | ||
613 | | evmergelo STR:RC, TISSTR, STR:RC | ||
614 | | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) | ||
615 | | decode_RB8 RB, INS | ||
616 | | evstdd STR:RC, 0(CARG3) | ||
617 | | add CARG2, BASE, RB | ||
618 | | b >1 | ||
619 | | | ||
620 | |->vmeta_tgets: | ||
621 | | evmergelo TAB:RB, TISTAB, TAB:RB | ||
622 | | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) | ||
623 | | evmergelo STR:RC, TISSTR, STR:RC | ||
624 | | evstdd TAB:RB, 0(CARG2) | ||
625 | | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) | ||
626 | | evstdd STR:RC, 0(CARG3) | ||
627 | | b >1 | ||
628 | | | ||
629 | |->vmeta_tgetb: // TMP0 = index | ||
630 | | efdcfsi TMP0, TMP0 | ||
631 | | decode_RB8 RB, INS | ||
632 | | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) | ||
633 | | add CARG2, BASE, RB | ||
634 | | evstdd TMP0, 0(CARG3) | ||
635 | | b >1 | ||
636 | | | ||
637 | |->vmeta_tgetv: | ||
638 | | decode_RB8 RB, INS | ||
639 | | decode_RC8 RC, INS | ||
640 | | add CARG2, BASE, RB | ||
641 | | add CARG3, BASE, RC | ||
642 | |1: | ||
643 | | stw BASE, L->base | ||
644 | | mr CARG1, L | ||
645 | | stw PC, SAVE_PC | ||
646 | | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) | ||
647 | | // Returns TValue * (finished) or NULL (metamethod). | ||
648 | | cmplwi CRET1, 0 | ||
649 | | beq >3 | ||
650 | | evldd TMP0, 0(CRET1) | ||
651 | | evstddx TMP0, BASE, RA | ||
652 | | ins_next | ||
653 | | | ||
654 | |3: // Call __index metamethod. | ||
655 | | // BASE = base, L->top = new base, stack = cont/func/t/k | ||
656 | | subfic TMP1, BASE, FRAME_CONT | ||
657 | | lwz BASE, L->top | ||
658 | | stw PC, -16(BASE) // [cont|PC] | ||
659 | | add PC, TMP1, BASE | ||
660 | | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | ||
661 | | li NARGS8:RC, 16 // 2 args for func(t, k). | ||
662 | | b ->vm_call_dispatch_f | ||
663 | | | ||
664 | |//----------------------------------------------------------------------- | ||
665 | | | ||
666 | |->vmeta_tsets1: | ||
667 | | evmergelo STR:RC, TISSTR, STR:RC | ||
668 | | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) | ||
669 | | decode_RB8 RB, INS | ||
670 | | evstdd STR:RC, 0(CARG3) | ||
671 | | add CARG2, BASE, RB | ||
672 | | b >1 | ||
673 | | | ||
674 | |->vmeta_tsets: | ||
675 | | evmergelo TAB:RB, TISTAB, TAB:RB | ||
676 | | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) | ||
677 | | evmergelo STR:RC, TISSTR, STR:RC | ||
678 | | evstdd TAB:RB, 0(CARG2) | ||
679 | | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) | ||
680 | | evstdd STR:RC, 0(CARG3) | ||
681 | | b >1 | ||
682 | | | ||
683 | |->vmeta_tsetb: // TMP0 = index | ||
684 | | efdcfsi TMP0, TMP0 | ||
685 | | decode_RB8 RB, INS | ||
686 | | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) | ||
687 | | add CARG2, BASE, RB | ||
688 | | evstdd TMP0, 0(CARG3) | ||
689 | | b >1 | ||
690 | | | ||
691 | |->vmeta_tsetv: | ||
692 | | decode_RB8 RB, INS | ||
693 | | decode_RC8 RC, INS | ||
694 | | add CARG2, BASE, RB | ||
695 | | add CARG3, BASE, RC | ||
696 | |1: | ||
697 | | stw BASE, L->base | ||
698 | | mr CARG1, L | ||
699 | | stw PC, SAVE_PC | ||
700 | | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) | ||
701 | | // Returns TValue * (finished) or NULL (metamethod). | ||
702 | | cmplwi CRET1, 0 | ||
703 | | evlddx TMP0, BASE, RA | ||
704 | | beq >3 | ||
705 | | // NOBARRIER: lj_meta_tset ensures the table is not black. | ||
706 | | evstdd TMP0, 0(CRET1) | ||
707 | | ins_next | ||
708 | | | ||
709 | |3: // Call __newindex metamethod. | ||
710 | | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) | ||
711 | | subfic TMP1, BASE, FRAME_CONT | ||
712 | | lwz BASE, L->top | ||
713 | | stw PC, -16(BASE) // [cont|PC] | ||
714 | | add PC, TMP1, BASE | ||
715 | | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | ||
716 | | li NARGS8:RC, 24 // 3 args for func(t, k, v) | ||
717 | | evstdd TMP0, 16(BASE) // Copy value to third argument. | ||
718 | | b ->vm_call_dispatch_f | ||
719 | | | ||
720 | |//-- Comparison metamethods --------------------------------------------- | ||
721 | | | ||
722 | |->vmeta_comp: | ||
723 | | mr CARG1, L | ||
724 | | subi PC, PC, 4 | ||
725 | | add CARG2, BASE, RA | ||
726 | | stw PC, SAVE_PC | ||
727 | | add CARG3, BASE, RD | ||
728 | | stw BASE, L->base | ||
729 | | decode_OP1 CARG4, INS | ||
730 | | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) | ||
731 | | // Returns 0/1 or TValue * (metamethod). | ||
732 | |3: | ||
733 | | cmplwi CRET1, 1 | ||
734 | | bgt ->vmeta_binop | ||
735 | |4: | ||
736 | | lwz INS, 0(PC) | ||
737 | | addi PC, PC, 4 | ||
738 | | decode_RD4 TMP2, INS | ||
739 | | addis TMP3, PC, -(BCBIAS_J*4 >> 16) | ||
740 | | add TMP2, TMP2, TMP3 | ||
741 | | isellt PC, PC, TMP2 | ||
742 | |->cont_nop: | ||
743 | | ins_next | ||
744 | | | ||
745 | |->cont_ra: // RA = resultptr | ||
746 | | lwz INS, -4(PC) | ||
747 | | evldd TMP0, 0(RA) | ||
748 | | decode_RA8 TMP1, INS | ||
749 | | evstddx TMP0, BASE, TMP1 | ||
750 | | b ->cont_nop | ||
751 | | | ||
752 | |->cont_condt: // RA = resultptr | ||
753 | | lwz TMP0, 0(RA) | ||
754 | | li TMP1, LJ_TTRUE | ||
755 | | cmplw TMP1, TMP0 // Branch if result is true. | ||
756 | | b <4 | ||
757 | | | ||
758 | |->cont_condf: // RA = resultptr | ||
759 | | lwz TMP0, 0(RA) | ||
760 | | li TMP1, LJ_TFALSE | ||
761 | | cmplw TMP0, TMP1 // Branch if result is false. | ||
762 | | b <4 | ||
763 | | | ||
764 | |->vmeta_equal: | ||
765 | | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. | ||
766 | | subi PC, PC, 4 | ||
767 | | stw BASE, L->base | ||
768 | | mr CARG1, L | ||
769 | | stw PC, SAVE_PC | ||
770 | | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) | ||
771 | | // Returns 0/1 or TValue * (metamethod). | ||
772 | | b <3 | ||
773 | | | ||
774 | |//-- Arithmetic metamethods --------------------------------------------- | ||
775 | | | ||
776 | |->vmeta_arith_vn: | ||
777 | | add CARG3, BASE, RB | ||
778 | | add CARG4, KBASE, RC | ||
779 | | b >1 | ||
780 | | | ||
781 | |->vmeta_arith_nv: | ||
782 | | add CARG3, KBASE, RC | ||
783 | | add CARG4, BASE, RB | ||
784 | | b >1 | ||
785 | | | ||
786 | |->vmeta_unm: | ||
787 | | add CARG3, BASE, RD | ||
788 | | mr CARG4, CARG3 | ||
789 | | b >1 | ||
790 | | | ||
791 | |->vmeta_arith_vv: | ||
792 | | add CARG3, BASE, RB | ||
793 | | add CARG4, BASE, RC | ||
794 | |1: | ||
795 | | add CARG2, BASE, RA | ||
796 | | stw BASE, L->base | ||
797 | | mr CARG1, L | ||
798 | | stw PC, SAVE_PC | ||
799 | | decode_OP1 CARG5, INS // Caveat: CARG5 overlaps INS. | ||
800 | | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) | ||
801 | | // Returns NULL (finished) or TValue * (metamethod). | ||
802 | | cmplwi CRET1, 0 | ||
803 | | beq ->cont_nop | ||
804 | | | ||
805 | | // Call metamethod for binary op. | ||
806 | |->vmeta_binop: | ||
807 | | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 | ||
808 | | sub TMP1, CRET1, BASE | ||
809 | | stw PC, -16(CRET1) // [cont|PC] | ||
810 | | mr TMP2, BASE | ||
811 | | addi PC, TMP1, FRAME_CONT | ||
812 | | mr BASE, CRET1 | ||
813 | | li NARGS8:RC, 16 // 2 args for func(o1, o2). | ||
814 | | b ->vm_call_dispatch | ||
815 | | | ||
816 | |->vmeta_len: | ||
817 | #if LJ_52 | ||
818 | | mr SAVE0, CARG1 | ||
819 | #endif | ||
820 | | add CARG2, BASE, RD | ||
821 | | stw BASE, L->base | ||
822 | | mr CARG1, L | ||
823 | | stw PC, SAVE_PC | ||
824 | | bl extern lj_meta_len // (lua_State *L, TValue *o) | ||
825 | | // Returns NULL (retry) or TValue * (metamethod base). | ||
826 | #if LJ_52 | ||
827 | | cmplwi CRET1, 0 | ||
828 | | bne ->vmeta_binop // Binop call for compatibility. | ||
829 | | mr CARG1, SAVE0 | ||
830 | | b ->BC_LEN_Z | ||
831 | #else | ||
832 | | b ->vmeta_binop // Binop call for compatibility. | ||
833 | #endif | ||
834 | | | ||
835 | |//-- Call metamethod ---------------------------------------------------- | ||
836 | | | ||
837 | |->vmeta_call: // Resolve and call __call metamethod. | ||
838 | | // TMP2 = old base, BASE = new base, RC = nargs*8 | ||
839 | | mr CARG1, L | ||
840 | | stw TMP2, L->base // This is the callers base! | ||
841 | | subi CARG2, BASE, 8 | ||
842 | | stw PC, SAVE_PC | ||
843 | | add CARG3, BASE, RC | ||
844 | | mr SAVE0, NARGS8:RC | ||
845 | | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) | ||
846 | | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | ||
847 | | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. | ||
848 | | ins_call | ||
849 | | | ||
850 | |->vmeta_callt: // Resolve __call for BC_CALLT. | ||
851 | | // BASE = old base, RA = new base, RC = nargs*8 | ||
852 | | mr CARG1, L | ||
853 | | stw BASE, L->base | ||
854 | | subi CARG2, RA, 8 | ||
855 | | stw PC, SAVE_PC | ||
856 | | add CARG3, RA, RC | ||
857 | | mr SAVE0, NARGS8:RC | ||
858 | | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) | ||
859 | | lwz TMP1, FRAME_PC(BASE) | ||
860 | | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. | ||
861 | | lwz LFUNC:RB, FRAME_FUNC(RA) // Guaranteed to be a function here. | ||
862 | | b ->BC_CALLT_Z | ||
863 | | | ||
864 | |//-- Argument coercion for 'for' statement ------------------------------ | ||
865 | | | ||
866 | |->vmeta_for: | ||
867 | | mr CARG1, L | ||
868 | | stw BASE, L->base | ||
869 | | mr CARG2, RA | ||
870 | | stw PC, SAVE_PC | ||
871 | | mr SAVE0, INS | ||
872 | | bl extern lj_meta_for // (lua_State *L, TValue *base) | ||
873 | |.if JIT | ||
874 | | decode_OP1 TMP0, SAVE0 | ||
875 | |.endif | ||
876 | | decode_RA8 RA, SAVE0 | ||
877 | |.if JIT | ||
878 | | cmpwi TMP0, BC_JFORI | ||
879 | |.endif | ||
880 | | decode_RD8 RD, SAVE0 | ||
881 | |.if JIT | ||
882 | | beq =>BC_JFORI | ||
883 | |.endif | ||
884 | | b =>BC_FORI | ||
885 | | | ||
886 | |//----------------------------------------------------------------------- | ||
887 | |//-- Fast functions ----------------------------------------------------- | ||
888 | |//----------------------------------------------------------------------- | ||
889 | | | ||
890 | |.macro .ffunc, name | ||
891 | |->ff_ .. name: | ||
892 | |.endmacro | ||
893 | | | ||
894 | |.macro .ffunc_1, name | ||
895 | |->ff_ .. name: | ||
896 | | cmplwi NARGS8:RC, 8 | ||
897 | | evldd CARG1, 0(BASE) | ||
898 | | blt ->fff_fallback | ||
899 | |.endmacro | ||
900 | | | ||
901 | |.macro .ffunc_2, name | ||
902 | |->ff_ .. name: | ||
903 | | cmplwi NARGS8:RC, 16 | ||
904 | | evldd CARG1, 0(BASE) | ||
905 | | evldd CARG2, 8(BASE) | ||
906 | | blt ->fff_fallback | ||
907 | |.endmacro | ||
908 | | | ||
909 | |.macro .ffunc_n, name | ||
910 | | .ffunc_1 name | ||
911 | | checknum CARG1 | ||
912 | | checkfail ->fff_fallback | ||
913 | |.endmacro | ||
914 | | | ||
915 | |.macro .ffunc_nn, name | ||
916 | | .ffunc_2 name | ||
917 | | evmergehi TMP0, CARG1, CARG2 | ||
918 | | checknum TMP0 | ||
919 | | checkanyfail ->fff_fallback | ||
920 | |.endmacro | ||
921 | | | ||
922 | |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. | ||
923 | |.macro ffgccheck | ||
924 | | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH) | ||
925 | | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) | ||
926 | | cmplw TMP0, TMP1 | ||
927 | | bgel ->fff_gcstep | ||
928 | |.endmacro | ||
929 | | | ||
930 | |//-- Base library: checks ----------------------------------------------- | ||
931 | | | ||
932 | |.ffunc assert | ||
933 | | cmplwi NARGS8:RC, 8 | ||
934 | | evldd TMP0, 0(BASE) | ||
935 | | blt ->fff_fallback | ||
936 | | evaddw TMP1, TISNIL, TISNIL // Synthesize LJ_TFALSE. | ||
937 | | la RA, -8(BASE) | ||
938 | | evcmpltu cr1, TMP0, TMP1 | ||
939 | | lwz PC, FRAME_PC(BASE) | ||
940 | | bge cr1, ->fff_fallback | ||
941 | | evstdd TMP0, 0(RA) | ||
942 | | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. | ||
943 | | beq ->fff_res // Done if exactly 1 argument. | ||
944 | | li TMP1, 8 | ||
945 | | subi RC, RC, 8 | ||
946 | |1: | ||
947 | | cmplw TMP1, RC | ||
948 | | evlddx TMP0, BASE, TMP1 | ||
949 | | evstddx TMP0, RA, TMP1 | ||
950 | | addi TMP1, TMP1, 8 | ||
951 | | bne <1 | ||
952 | | b ->fff_res | ||
953 | | | ||
954 | |.ffunc type | ||
955 | | cmplwi NARGS8:RC, 8 | ||
956 | | lwz CARG1, 0(BASE) | ||
957 | | blt ->fff_fallback | ||
958 | | li TMP2, ~LJ_TNUMX | ||
959 | | cmplw CARG1, TISNUM | ||
960 | | not TMP1, CARG1 | ||
961 | | isellt TMP1, TMP2, TMP1 | ||
962 | | slwi TMP1, TMP1, 3 | ||
963 | | la TMP2, CFUNC:RB->upvalue | ||
964 | | evlddx STR:CRET1, TMP2, TMP1 | ||
965 | | b ->fff_restv | ||
966 | | | ||
967 | |//-- Base library: getters and setters --------------------------------- | ||
968 | | | ||
969 | |.ffunc_1 getmetatable | ||
970 | | checktab CARG1 | ||
971 | | evmergehi TMP1, CARG1, CARG1 | ||
972 | | checkfail >6 | ||
973 | |1: // Field metatable must be at same offset for GCtab and GCudata! | ||
974 | | lwz TAB:RB, TAB:CARG1->metatable | ||
975 | |2: | ||
976 | | evmr CRET1, TISNIL | ||
977 | | cmplwi TAB:RB, 0 | ||
978 | | lwz STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) | ||
979 | | beq ->fff_restv | ||
980 | | lwz TMP0, TAB:RB->hmask | ||
981 | | evmergelo CRET1, TISTAB, TAB:RB // Use metatable as default result. | ||
982 | | lwz TMP1, STR:RC->hash | ||
983 | | lwz NODE:TMP2, TAB:RB->node | ||
984 | | evmergelo STR:RC, TISSTR, STR:RC | ||
985 | | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask | ||
986 | | slwi TMP0, TMP1, 5 | ||
987 | | slwi TMP1, TMP1, 3 | ||
988 | | sub TMP1, TMP0, TMP1 | ||
989 | | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) | ||
990 | |3: // Rearranged logic, because we expect _not_ to find the key. | ||
991 | | evldd TMP0, NODE:TMP2->key | ||
992 | | evldd TMP1, NODE:TMP2->val | ||
993 | | evcmpeq TMP0, STR:RC | ||
994 | | lwz NODE:TMP2, NODE:TMP2->next | ||
995 | | checkallok >5 | ||
996 | | cmplwi NODE:TMP2, 0 | ||
997 | | beq ->fff_restv // Not found, keep default result. | ||
998 | | b <3 | ||
999 | |5: | ||
1000 | | checknil TMP1 | ||
1001 | | checkok ->fff_restv // Ditto for nil value. | ||
1002 | | evmr CRET1, TMP1 // Return value of mt.__metatable. | ||
1003 | | b ->fff_restv | ||
1004 | | | ||
1005 | |6: | ||
1006 | | cmpwi TMP1, LJ_TUDATA | ||
1007 | | not TMP1, TMP1 | ||
1008 | | beq <1 | ||
1009 | | checknum CARG1 | ||
1010 | | slwi TMP1, TMP1, 2 | ||
1011 | | li TMP2, 4*~LJ_TNUMX | ||
1012 | | isellt TMP1, TMP2, TMP1 | ||
1013 | | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH) | ||
1014 | | lwzx TAB:RB, TMP2, TMP1 | ||
1015 | | b <2 | ||
1016 | | | ||
1017 | |.ffunc_2 setmetatable | ||
1018 | | // Fast path: no mt for table yet and not clearing the mt. | ||
1019 | | evmergehi TMP0, TAB:CARG1, TAB:CARG2 | ||
1020 | | checktab TMP0 | ||
1021 | | checkanyfail ->fff_fallback | ||
1022 | | lwz TAB:TMP1, TAB:CARG1->metatable | ||
1023 | | cmplwi TAB:TMP1, 0 | ||
1024 | | lbz TMP3, TAB:CARG1->marked | ||
1025 | | bne ->fff_fallback | ||
1026 | | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table) | ||
1027 | | stw TAB:CARG2, TAB:CARG1->metatable | ||
1028 | | beq ->fff_restv | ||
1029 | | barrierback TAB:CARG1, TMP3, TMP0 | ||
1030 | | b ->fff_restv | ||
1031 | | | ||
1032 | |.ffunc rawget | ||
1033 | | cmplwi NARGS8:RC, 16 | ||
1034 | | evldd CARG2, 0(BASE) | ||
1035 | | blt ->fff_fallback | ||
1036 | | checktab CARG2 | ||
1037 | | la CARG3, 8(BASE) | ||
1038 | | checkfail ->fff_fallback | ||
1039 | | mr CARG1, L | ||
1040 | | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | ||
1041 | | // Returns cTValue *. | ||
1042 | | evldd CRET1, 0(CRET1) | ||
1043 | | b ->fff_restv | ||
1044 | | | ||
1045 | |//-- Base library: conversions ------------------------------------------ | ||
1046 | | | ||
1047 | |.ffunc tonumber | ||
1048 | | // Only handles the number case inline (without a base argument). | ||
1049 | | cmplwi NARGS8:RC, 8 | ||
1050 | | evldd CARG1, 0(BASE) | ||
1051 | | bne ->fff_fallback // Exactly one argument. | ||
1052 | | checknum CARG1 | ||
1053 | | checkok ->fff_restv | ||
1054 | | b ->fff_fallback | ||
1055 | | | ||
1056 | |.ffunc_1 tostring | ||
1057 | | // Only handles the string or number case inline. | ||
1058 | | checkstr CARG1 | ||
1059 | | // A __tostring method in the string base metatable is ignored. | ||
1060 | | checkok ->fff_restv // String key? | ||
1061 | | // Handle numbers inline, unless a number base metatable is present. | ||
1062 | | lwz TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) | ||
1063 | | checknum CARG1 | ||
1064 | | cmplwi cr1, TMP0, 0 | ||
1065 | | stw BASE, L->base // Add frame since C call can throw. | ||
1066 | | crand 4*cr0+eq, 4*cr0+lt, 4*cr1+eq | ||
1067 | | stw PC, SAVE_PC // Redundant (but a defined value). | ||
1068 | | bne ->fff_fallback | ||
1069 | | ffgccheck | ||
1070 | | mr CARG1, L | ||
1071 | | mr CARG2, BASE | ||
1072 | | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) | ||
1073 | | // Returns GCstr *. | ||
1074 | | evmergelo STR:CRET1, TISSTR, STR:CRET1 | ||
1075 | | b ->fff_restv | ||
1076 | | | ||
1077 | |//-- Base library: iterators ------------------------------------------- | ||
1078 | | | ||
1079 | |.ffunc next | ||
1080 | | cmplwi NARGS8:RC, 8 | ||
1081 | | evldd CARG2, 0(BASE) | ||
1082 | | blt ->fff_fallback | ||
1083 | | evstddx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. | ||
1084 | | checktab TAB:CARG2 | ||
1085 | | lwz PC, FRAME_PC(BASE) | ||
1086 | | checkfail ->fff_fallback | ||
1087 | | stw BASE, L->base // Add frame since C call can throw. | ||
1088 | | mr CARG1, L | ||
1089 | | stw BASE, L->top // Dummy frame length is ok. | ||
1090 | | la CARG3, 8(BASE) | ||
1091 | | stw PC, SAVE_PC | ||
1092 | | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) | ||
1093 | | // Returns 0 at end of traversal. | ||
1094 | | cmplwi CRET1, 0 | ||
1095 | | evmr CRET1, TISNIL | ||
1096 | | beq ->fff_restv // End of traversal: return nil. | ||
1097 | | evldd TMP0, 8(BASE) // Copy key and value to results. | ||
1098 | | la RA, -8(BASE) | ||
1099 | | evldd TMP1, 16(BASE) | ||
1100 | | evstdd TMP0, 0(RA) | ||
1101 | | li RD, (2+1)*8 | ||
1102 | | evstdd TMP1, 8(RA) | ||
1103 | | b ->fff_res | ||
1104 | | | ||
1105 | |.ffunc_1 pairs | ||
1106 | | checktab TAB:CARG1 | ||
1107 | | lwz PC, FRAME_PC(BASE) | ||
1108 | | checkfail ->fff_fallback | ||
1109 | #if LJ_52 | ||
1110 | | lwz TAB:TMP2, TAB:CARG1->metatable | ||
1111 | | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0] | ||
1112 | | cmplwi TAB:TMP2, 0 | ||
1113 | | la RA, -8(BASE) | ||
1114 | | bne ->fff_fallback | ||
1115 | #else | ||
1116 | | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0] | ||
1117 | | la RA, -8(BASE) | ||
1118 | #endif | ||
1119 | | evstdd TISNIL, 8(BASE) | ||
1120 | | li RD, (3+1)*8 | ||
1121 | | evstdd CFUNC:TMP0, 0(RA) | ||
1122 | | b ->fff_res | ||
1123 | | | ||
1124 | |.ffunc_2 ipairs_aux | ||
1125 | | checktab TAB:CARG1 | ||
1126 | | lwz PC, FRAME_PC(BASE) | ||
1127 | | checkfail ->fff_fallback | ||
1128 | | checknum CARG2 | ||
1129 | | lus TMP3, 0x3ff0 | ||
1130 | | checkfail ->fff_fallback | ||
1131 | | efdctsi TMP2, CARG2 | ||
1132 | | lwz TMP0, TAB:CARG1->asize | ||
1133 | | evmergelo TMP3, TMP3, ZERO | ||
1134 | | lwz TMP1, TAB:CARG1->array | ||
1135 | | efdadd CARG2, CARG2, TMP3 | ||
1136 | | addi TMP2, TMP2, 1 | ||
1137 | | la RA, -8(BASE) | ||
1138 | | cmplw TMP0, TMP2 | ||
1139 | | slwi TMP3, TMP2, 3 | ||
1140 | | evstdd CARG2, 0(RA) | ||
1141 | | ble >2 // Not in array part? | ||
1142 | | evlddx TMP1, TMP1, TMP3 | ||
1143 | |1: | ||
1144 | | checknil TMP1 | ||
1145 | | li RD, (0+1)*8 | ||
1146 | | checkok ->fff_res // End of iteration, return 0 results. | ||
1147 | | li RD, (2+1)*8 | ||
1148 | | evstdd TMP1, 8(RA) | ||
1149 | | b ->fff_res | ||
1150 | |2: // Check for empty hash part first. Otherwise call C function. | ||
1151 | | lwz TMP0, TAB:CARG1->hmask | ||
1152 | | cmplwi TMP0, 0 | ||
1153 | | li RD, (0+1)*8 | ||
1154 | | beq ->fff_res | ||
1155 | | mr CARG2, TMP2 | ||
1156 | | bl extern lj_tab_getinth // (GCtab *t, int32_t key) | ||
1157 | | // Returns cTValue * or NULL. | ||
1158 | | cmplwi CRET1, 0 | ||
1159 | | li RD, (0+1)*8 | ||
1160 | | beq ->fff_res | ||
1161 | | evldd TMP1, 0(CRET1) | ||
1162 | | b <1 | ||
1163 | | | ||
1164 | |.ffunc_1 ipairs | ||
1165 | | checktab TAB:CARG1 | ||
1166 | | lwz PC, FRAME_PC(BASE) | ||
1167 | | checkfail ->fff_fallback | ||
1168 | #if LJ_52 | ||
1169 | | lwz TAB:TMP2, TAB:CARG1->metatable | ||
1170 | | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0] | ||
1171 | | cmplwi TAB:TMP2, 0 | ||
1172 | | la RA, -8(BASE) | ||
1173 | | bne ->fff_fallback | ||
1174 | #else | ||
1175 | | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0] | ||
1176 | | la RA, -8(BASE) | ||
1177 | #endif | ||
1178 | | evsplati TMP1, 0 | ||
1179 | | li RD, (3+1)*8 | ||
1180 | | evstdd TMP1, 8(BASE) | ||
1181 | | evstdd CFUNC:TMP0, 0(RA) | ||
1182 | | b ->fff_res | ||
1183 | | | ||
1184 | |//-- Base library: catch errors ---------------------------------------- | ||
1185 | | | ||
1186 | |.ffunc pcall | ||
1187 | | cmplwi NARGS8:RC, 8 | ||
1188 | | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) | ||
1189 | | blt ->fff_fallback | ||
1190 | | mr TMP2, BASE | ||
1191 | | la BASE, 8(BASE) | ||
1192 | | // Remember active hook before pcall. | ||
1193 | | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31 | ||
1194 | | subi NARGS8:RC, NARGS8:RC, 8 | ||
1195 | | addi PC, TMP3, 8+FRAME_PCALL | ||
1196 | | b ->vm_call_dispatch | ||
1197 | | | ||
1198 | |.ffunc_2 xpcall | ||
1199 | | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) | ||
1200 | | mr TMP2, BASE | ||
1201 | | checkfunc CARG2 // Traceback must be a function. | ||
1202 | | checkfail ->fff_fallback | ||
1203 | | la BASE, 16(BASE) | ||
1204 | | // Remember active hook before pcall. | ||
1205 | | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31 | ||
1206 | | evstdd CARG2, 0(TMP2) // Swap function and traceback. | ||
1207 | | subi NARGS8:RC, NARGS8:RC, 16 | ||
1208 | | evstdd CARG1, 8(TMP2) | ||
1209 | | addi PC, TMP3, 16+FRAME_PCALL | ||
1210 | | b ->vm_call_dispatch | ||
1211 | | | ||
1212 | |//-- Coroutine library -------------------------------------------------- | ||
1213 | | | ||
1214 | |.macro coroutine_resume_wrap, resume | ||
1215 | |.if resume | ||
1216 | |.ffunc_1 coroutine_resume | ||
1217 | | evmergehi TMP0, L:CARG1, L:CARG1 | ||
1218 | |.else | ||
1219 | |.ffunc coroutine_wrap_aux | ||
1220 | | lwz L:CARG1, CFUNC:RB->upvalue[0].gcr | ||
1221 | |.endif | ||
1222 | |.if resume | ||
1223 | | cmpwi TMP0, LJ_TTHREAD | ||
1224 | | bne ->fff_fallback | ||
1225 | |.endif | ||
1226 | | lbz TMP0, L:CARG1->status | ||
1227 | | lwz TMP1, L:CARG1->cframe | ||
1228 | | lwz CARG2, L:CARG1->top | ||
1229 | | cmplwi cr0, TMP0, LUA_YIELD | ||
1230 | | lwz TMP2, L:CARG1->base | ||
1231 | | cmplwi cr1, TMP1, 0 | ||
1232 | | lwz TMP0, L:CARG1->maxstack | ||
1233 | | cmplw cr7, CARG2, TMP2 | ||
1234 | | lwz PC, FRAME_PC(BASE) | ||
1235 | | crorc 4*cr6+lt, 4*cr0+gt, 4*cr1+eq // st>LUA_YIELD || cframe!=0 | ||
1236 | | add TMP2, CARG2, NARGS8:RC | ||
1237 | | crandc 4*cr6+gt, 4*cr7+eq, 4*cr0+eq // base==top && st!=LUA_YIELD | ||
1238 | | cmplw cr1, TMP2, TMP0 | ||
1239 | | cror 4*cr6+lt, 4*cr6+lt, 4*cr6+gt | ||
1240 | | stw PC, SAVE_PC | ||
1241 | | cror 4*cr6+lt, 4*cr6+lt, 4*cr1+gt // cond1 || cond2 || stackov | ||
1242 | | stw BASE, L->base | ||
1243 | | blt cr6, ->fff_fallback | ||
1244 | |1: | ||
1245 | |.if resume | ||
1246 | | addi BASE, BASE, 8 // Keep resumed thread in stack for GC. | ||
1247 | | subi NARGS8:RC, NARGS8:RC, 8 | ||
1248 | | subi TMP2, TMP2, 8 | ||
1249 | |.endif | ||
1250 | | stw TMP2, L:CARG1->top | ||
1251 | | li TMP1, 0 | ||
1252 | | stw BASE, L->top | ||
1253 | |2: // Move args to coroutine. | ||
1254 | | cmpw TMP1, NARGS8:RC | ||
1255 | | evlddx TMP0, BASE, TMP1 | ||
1256 | | beq >3 | ||
1257 | | evstddx TMP0, CARG2, TMP1 | ||
1258 | | addi TMP1, TMP1, 8 | ||
1259 | | b <2 | ||
1260 | |3: | ||
1261 | | li CARG3, 0 | ||
1262 | | mr L:SAVE0, L:CARG1 | ||
1263 | | li CARG4, 0 | ||
1264 | | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) | ||
1265 | | // Returns thread status. | ||
1266 | |4: | ||
1267 | | lwz TMP2, L:SAVE0->base | ||
1268 | | cmplwi CRET1, LUA_YIELD | ||
1269 | | lwz TMP3, L:SAVE0->top | ||
1270 | | li_vmstate INTERP | ||
1271 | | lwz BASE, L->base | ||
1272 | | st_vmstate | ||
1273 | | bgt >8 | ||
1274 | | sub RD, TMP3, TMP2 | ||
1275 | | lwz TMP0, L->maxstack | ||
1276 | | cmplwi RD, 0 | ||
1277 | | add TMP1, BASE, RD | ||
1278 | | beq >6 // No results? | ||
1279 | | cmplw TMP1, TMP0 | ||
1280 | | li TMP1, 0 | ||
1281 | | bgt >9 // Need to grow stack? | ||
1282 | | | ||
1283 | | subi TMP3, RD, 8 | ||
1284 | | stw TMP2, L:SAVE0->top // Clear coroutine stack. | ||
1285 | |5: // Move results from coroutine. | ||
1286 | | cmplw TMP1, TMP3 | ||
1287 | | evlddx TMP0, TMP2, TMP1 | ||
1288 | | evstddx TMP0, BASE, TMP1 | ||
1289 | | addi TMP1, TMP1, 8 | ||
1290 | | bne <5 | ||
1291 | |6: | ||
1292 | | andi. TMP0, PC, FRAME_TYPE | ||
1293 | |.if resume | ||
1294 | | li TMP1, LJ_TTRUE | ||
1295 | | la RA, -8(BASE) | ||
1296 | | stw TMP1, -8(BASE) // Prepend true to results. | ||
1297 | | addi RD, RD, 16 | ||
1298 | |.else | ||
1299 | | mr RA, BASE | ||
1300 | | addi RD, RD, 8 | ||
1301 | |.endif | ||
1302 | |7: | ||
1303 | | stw PC, SAVE_PC | ||
1304 | | mr MULTRES, RD | ||
1305 | | beq ->BC_RET_Z | ||
1306 | | b ->vm_return | ||
1307 | | | ||
1308 | |8: // Coroutine returned with error (at co->top-1). | ||
1309 | |.if resume | ||
1310 | | andi. TMP0, PC, FRAME_TYPE | ||
1311 | | la TMP3, -8(TMP3) | ||
1312 | | li TMP1, LJ_TFALSE | ||
1313 | | evldd TMP0, 0(TMP3) | ||
1314 | | stw TMP3, L:SAVE0->top // Remove error from coroutine stack. | ||
1315 | | li RD, (2+1)*8 | ||
1316 | | stw TMP1, -8(BASE) // Prepend false to results. | ||
1317 | | la RA, -8(BASE) | ||
1318 | | evstdd TMP0, 0(BASE) // Copy error message. | ||
1319 | | b <7 | ||
1320 | |.else | ||
1321 | | mr CARG1, L | ||
1322 | | mr CARG2, L:SAVE0 | ||
1323 | | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) | ||
1324 | |.endif | ||
1325 | | | ||
1326 | |9: // Handle stack expansion on return from yield. | ||
1327 | | mr CARG1, L | ||
1328 | | srwi CARG2, RD, 3 | ||
1329 | | bl extern lj_state_growstack // (lua_State *L, int n) | ||
1330 | | li CRET1, 0 | ||
1331 | | b <4 | ||
1332 | |.endmacro | ||
1333 | | | ||
1334 | | coroutine_resume_wrap 1 // coroutine.resume | ||
1335 | | coroutine_resume_wrap 0 // coroutine.wrap | ||
1336 | | | ||
1337 | |.ffunc coroutine_yield | ||
1338 | | lwz TMP0, L->cframe | ||
1339 | | add TMP1, BASE, NARGS8:RC | ||
1340 | | stw BASE, L->base | ||
1341 | | andi. TMP0, TMP0, CFRAME_RESUME | ||
1342 | | stw TMP1, L->top | ||
1343 | | li CRET1, LUA_YIELD | ||
1344 | | beq ->fff_fallback | ||
1345 | | stw ZERO, L->cframe | ||
1346 | | stb CRET1, L->status | ||
1347 | | b ->vm_leave_unw | ||
1348 | | | ||
1349 | |//-- Math library ------------------------------------------------------- | ||
1350 | | | ||
1351 | |.ffunc_n math_abs | ||
1352 | | efdabs CRET1, CARG1 | ||
1353 | | // Fallthrough. | ||
1354 | | | ||
1355 | |->fff_restv: | ||
1356 | | // CRET1 = TValue result. | ||
1357 | | lwz PC, FRAME_PC(BASE) | ||
1358 | | la RA, -8(BASE) | ||
1359 | | evstdd CRET1, 0(RA) | ||
1360 | |->fff_res1: | ||
1361 | | // RA = results, PC = return. | ||
1362 | | li RD, (1+1)*8 | ||
1363 | |->fff_res: | ||
1364 | | // RA = results, RD = (nresults+1)*8, PC = return. | ||
1365 | | andi. TMP0, PC, FRAME_TYPE | ||
1366 | | mr MULTRES, RD | ||
1367 | | bne ->vm_return | ||
1368 | | lwz INS, -4(PC) | ||
1369 | | decode_RB8 RB, INS | ||
1370 | |5: | ||
1371 | | cmplw RB, RD // More results expected? | ||
1372 | | decode_RA8 TMP0, INS | ||
1373 | | bgt >6 | ||
1374 | | ins_next1 | ||
1375 | | // Adjust BASE. KBASE is assumed to be set for the calling frame. | ||
1376 | | sub BASE, RA, TMP0 | ||
1377 | | ins_next2 | ||
1378 | | | ||
1379 | |6: // Fill up results with nil. | ||
1380 | | subi TMP1, RD, 8 | ||
1381 | | addi RD, RD, 8 | ||
1382 | | evstddx TISNIL, RA, TMP1 | ||
1383 | | b <5 | ||
1384 | | | ||
1385 | |.macro math_extern, func | ||
1386 | | .ffunc math_ .. func | ||
1387 | | cmplwi NARGS8:RC, 8 | ||
1388 | | evldd CARG2, 0(BASE) | ||
1389 | | blt ->fff_fallback | ||
1390 | | checknum CARG2 | ||
1391 | | evmergehi CARG1, CARG2, CARG2 | ||
1392 | | checkfail ->fff_fallback | ||
1393 | | bl extern func@plt | ||
1394 | | evmergelo CRET1, CRET1, CRET2 | ||
1395 | | b ->fff_restv | ||
1396 | |.endmacro | ||
1397 | | | ||
1398 | |.macro math_extern2, func | ||
1399 | | .ffunc math_ .. func | ||
1400 | | cmplwi NARGS8:RC, 16 | ||
1401 | | evldd CARG2, 0(BASE) | ||
1402 | | evldd CARG4, 8(BASE) | ||
1403 | | blt ->fff_fallback | ||
1404 | | evmergehi CARG1, CARG4, CARG2 | ||
1405 | | checknum CARG1 | ||
1406 | | evmergehi CARG3, CARG4, CARG4 | ||
1407 | | checkanyfail ->fff_fallback | ||
1408 | | bl extern func@plt | ||
1409 | | evmergelo CRET1, CRET1, CRET2 | ||
1410 | | b ->fff_restv | ||
1411 | |.endmacro | ||
1412 | | | ||
1413 | |.macro math_round, func | ||
1414 | | .ffunc math_ .. func | ||
1415 | | cmplwi NARGS8:RC, 8 | ||
1416 | | evldd CARG2, 0(BASE) | ||
1417 | | blt ->fff_fallback | ||
1418 | | checknum CARG2 | ||
1419 | | evmergehi CARG1, CARG2, CARG2 | ||
1420 | | checkfail ->fff_fallback | ||
1421 | | lwz PC, FRAME_PC(BASE) | ||
1422 | | bl ->vm_..func.._hilo; | ||
1423 | | la RA, -8(BASE) | ||
1424 | | evstdd CRET2, 0(RA) | ||
1425 | | b ->fff_res1 | ||
1426 | |.endmacro | ||
1427 | | | ||
1428 | | math_round floor | ||
1429 | | math_round ceil | ||
1430 | | | ||
1431 | | math_extern sqrt | ||
1432 | | | ||
1433 | |.ffunc math_log | ||
1434 | | cmplwi NARGS8:RC, 8 | ||
1435 | | evldd CARG2, 0(BASE) | ||
1436 | | bne ->fff_fallback // Need exactly 1 argument. | ||
1437 | | checknum CARG2 | ||
1438 | | evmergehi CARG1, CARG2, CARG2 | ||
1439 | | checkfail ->fff_fallback | ||
1440 | | bl extern log@plt | ||
1441 | | evmergelo CRET1, CRET1, CRET2 | ||
1442 | | b ->fff_restv | ||
1443 | | | ||
1444 | | math_extern log10 | ||
1445 | | math_extern exp | ||
1446 | | math_extern sin | ||
1447 | | math_extern cos | ||
1448 | | math_extern tan | ||
1449 | | math_extern asin | ||
1450 | | math_extern acos | ||
1451 | | math_extern atan | ||
1452 | | math_extern sinh | ||
1453 | | math_extern cosh | ||
1454 | | math_extern tanh | ||
1455 | | math_extern2 pow | ||
1456 | | math_extern2 atan2 | ||
1457 | | math_extern2 fmod | ||
1458 | | | ||
1459 | |->ff_math_deg: | ||
1460 | |.ffunc_n math_rad | ||
1461 | | evldd CARG2, CFUNC:RB->upvalue[0] | ||
1462 | | efdmul CRET1, CARG1, CARG2 | ||
1463 | | b ->fff_restv | ||
1464 | | | ||
1465 | |.ffunc math_ldexp | ||
1466 | | cmplwi NARGS8:RC, 16 | ||
1467 | | evldd CARG2, 0(BASE) | ||
1468 | | evldd CARG4, 8(BASE) | ||
1469 | | blt ->fff_fallback | ||
1470 | | evmergehi CARG1, CARG4, CARG2 | ||
1471 | | checknum CARG1 | ||
1472 | | checkanyfail ->fff_fallback | ||
1473 | | efdctsi CARG3, CARG4 | ||
1474 | | bl extern ldexp@plt | ||
1475 | | evmergelo CRET1, CRET1, CRET2 | ||
1476 | | b ->fff_restv | ||
1477 | | | ||
1478 | |.ffunc math_frexp | ||
1479 | | cmplwi NARGS8:RC, 8 | ||
1480 | | evldd CARG2, 0(BASE) | ||
1481 | | blt ->fff_fallback | ||
1482 | | checknum CARG2 | ||
1483 | | evmergehi CARG1, CARG2, CARG2 | ||
1484 | | checkfail ->fff_fallback | ||
1485 | | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) | ||
1486 | | lwz PC, FRAME_PC(BASE) | ||
1487 | | bl extern frexp@plt | ||
1488 | | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH) | ||
1489 | | evmergelo CRET1, CRET1, CRET2 | ||
1490 | | efdcfsi CRET2, TMP1 | ||
1491 | | la RA, -8(BASE) | ||
1492 | | evstdd CRET1, 0(RA) | ||
1493 | | li RD, (2+1)*8 | ||
1494 | | evstdd CRET2, 8(RA) | ||
1495 | | b ->fff_res | ||
1496 | | | ||
1497 | |.ffunc math_modf | ||
1498 | | cmplwi NARGS8:RC, 8 | ||
1499 | | evldd CARG2, 0(BASE) | ||
1500 | | blt ->fff_fallback | ||
1501 | | checknum CARG2 | ||
1502 | | evmergehi CARG1, CARG2, CARG2 | ||
1503 | | checkfail ->fff_fallback | ||
1504 | | la CARG3, -8(BASE) | ||
1505 | | lwz PC, FRAME_PC(BASE) | ||
1506 | | bl extern modf@plt | ||
1507 | | evmergelo CRET1, CRET1, CRET2 | ||
1508 | | la RA, -8(BASE) | ||
1509 | | evstdd CRET1, 0(BASE) | ||
1510 | | li RD, (2+1)*8 | ||
1511 | | b ->fff_res | ||
1512 | | | ||
1513 | |.macro math_minmax, name, cmpop | ||
1514 | | .ffunc_1 name | ||
1515 | | checknum CARG1 | ||
1516 | | li TMP1, 8 | ||
1517 | | checkfail ->fff_fallback | ||
1518 | |1: | ||
1519 | | evlddx CARG2, BASE, TMP1 | ||
1520 | | cmplw cr1, TMP1, NARGS8:RC | ||
1521 | | checknum CARG2 | ||
1522 | | bge cr1, ->fff_restv // Ok, since CRET1 = CARG1. | ||
1523 | | checkfail ->fff_fallback | ||
1524 | | cmpop CARG2, CARG1 | ||
1525 | | addi TMP1, TMP1, 8 | ||
1526 | | crmove 4*cr0+lt, 4*cr0+gt | ||
1527 | | evsel CARG1, CARG2, CARG1 | ||
1528 | | b <1 | ||
1529 | |.endmacro | ||
1530 | | | ||
1531 | | math_minmax math_min, efdtstlt | ||
1532 | | math_minmax math_max, efdtstgt | ||
1533 | | | ||
1534 | |//-- String library ----------------------------------------------------- | ||
1535 | | | ||
1536 | |.ffunc_1 string_len | ||
1537 | | checkstr STR:CARG1 | ||
1538 | | checkfail ->fff_fallback | ||
1539 | | lwz TMP0, STR:CARG1->len | ||
1540 | | efdcfsi CRET1, TMP0 | ||
1541 | | b ->fff_restv | ||
1542 | | | ||
1543 | |.ffunc string_byte // Only handle the 1-arg case here. | ||
1544 | | cmplwi NARGS8:RC, 8 | ||
1545 | | evldd STR:CARG1, 0(BASE) | ||
1546 | | bne ->fff_fallback // Need exactly 1 argument. | ||
1547 | | checkstr STR:CARG1 | ||
1548 | | la RA, -8(BASE) | ||
1549 | | checkfail ->fff_fallback | ||
1550 | | lwz TMP0, STR:CARG1->len | ||
1551 | | li RD, (0+1)*8 | ||
1552 | | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end). | ||
1553 | | li TMP2, (1+1)*8 | ||
1554 | | cmplwi TMP0, 0 | ||
1555 | | lwz PC, FRAME_PC(BASE) | ||
1556 | | efdcfsi CRET1, TMP1 | ||
1557 | | iseleq RD, RD, TMP2 | ||
1558 | | evstdd CRET1, 0(RA) | ||
1559 | | b ->fff_res | ||
1560 | | | ||
1561 | |.ffunc string_char // Only handle the 1-arg case here. | ||
1562 | | ffgccheck | ||
1563 | | cmplwi NARGS8:RC, 8 | ||
1564 | | evldd CARG1, 0(BASE) | ||
1565 | | bne ->fff_fallback // Exactly 1 argument. | ||
1566 | | checknum CARG1 | ||
1567 | | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) | ||
1568 | | checkfail ->fff_fallback | ||
1569 | | efdctsiz TMP0, CARG1 | ||
1570 | | li CARG3, 1 | ||
1571 | | cmplwi TMP0, 255 | ||
1572 | | stb TMP0, 0(CARG2) | ||
1573 | | bgt ->fff_fallback | ||
1574 | |->fff_newstr: | ||
1575 | | mr CARG1, L | ||
1576 | | stw BASE, L->base | ||
1577 | | stw PC, SAVE_PC | ||
1578 | | bl extern lj_str_new // (lua_State *L, char *str, size_t l) | ||
1579 | | // Returns GCstr *. | ||
1580 | | lwz BASE, L->base | ||
1581 | | evmergelo STR:CRET1, TISSTR, STR:CRET1 | ||
1582 | | b ->fff_restv | ||
1583 | | | ||
1584 | |.ffunc string_sub | ||
1585 | | ffgccheck | ||
1586 | | cmplwi NARGS8:RC, 16 | ||
1587 | | evldd CARG3, 16(BASE) | ||
1588 | | evldd STR:CARG1, 0(BASE) | ||
1589 | | blt ->fff_fallback | ||
1590 | | evldd CARG2, 8(BASE) | ||
1591 | | li TMP2, -1 | ||
1592 | | beq >1 | ||
1593 | | checknum CARG3 | ||
1594 | | checkfail ->fff_fallback | ||
1595 | | efdctsiz TMP2, CARG3 | ||
1596 | |1: | ||
1597 | | checknum CARG2 | ||
1598 | | checkfail ->fff_fallback | ||
1599 | | checkstr STR:CARG1 | ||
1600 | | efdctsiz TMP1, CARG2 | ||
1601 | | checkfail ->fff_fallback | ||
1602 | | lwz TMP0, STR:CARG1->len | ||
1603 | | cmplw TMP0, TMP2 // len < end? (unsigned compare) | ||
1604 | | add TMP3, TMP2, TMP0 | ||
1605 | | blt >5 | ||
1606 | |2: | ||
1607 | | cmpwi TMP1, 0 // start <= 0? | ||
1608 | | add TMP3, TMP1, TMP0 | ||
1609 | | ble >7 | ||
1610 | |3: | ||
1611 | | sub. CARG3, TMP2, TMP1 | ||
1612 | | addi CARG2, STR:CARG1, #STR-1 | ||
1613 | | addi CARG3, CARG3, 1 | ||
1614 | | add CARG2, CARG2, TMP1 | ||
1615 | | isellt CARG3, r0, CARG3 | ||
1616 | | b ->fff_newstr | ||
1617 | | | ||
1618 | |5: // Negative end or overflow. | ||
1619 | | cmpw TMP0, TMP2 | ||
1620 | | addi TMP3, TMP3, 1 | ||
1621 | | iselgt TMP2, TMP3, TMP0 // end = end > len ? len : end+len+1 | ||
1622 | | b <2 | ||
1623 | | | ||
1624 | |7: // Negative start or underflow. | ||
1625 | | cmpwi cr1, TMP3, 0 | ||
1626 | | iseleq TMP1, r0, TMP3 | ||
1627 | | isel TMP1, r0, TMP1, 4*cr1+lt | ||
1628 | | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) | ||
1629 | | b <3 | ||
1630 | | | ||
1631 | |.ffunc string_rep // Only handle the 1-char case inline. | ||
1632 | | ffgccheck | ||
1633 | | cmplwi NARGS8:RC, 16 | ||
1634 | | evldd CARG1, 0(BASE) | ||
1635 | | evldd CARG2, 8(BASE) | ||
1636 | | bne ->fff_fallback // Exactly 2 arguments. | ||
1637 | | checknum CARG2 | ||
1638 | | checkfail ->fff_fallback | ||
1639 | | checkstr STR:CARG1 | ||
1640 | | efdctsiz CARG3, CARG2 | ||
1641 | | checkfail ->fff_fallback | ||
1642 | | lwz TMP0, STR:CARG1->len | ||
1643 | | cmpwi CARG3, 0 | ||
1644 | | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) | ||
1645 | | ble >2 // Count <= 0? (or non-int) | ||
1646 | | cmplwi TMP0, 1 | ||
1647 | | subi TMP2, CARG3, 1 | ||
1648 | | blt >2 // Zero length string? | ||
1649 | | cmplw cr1, TMP1, CARG3 | ||
1650 | | bne ->fff_fallback // Fallback for > 1-char strings. | ||
1651 | | lbz TMP0, STR:CARG1[1] | ||
1652 | | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) | ||
1653 | | blt cr1, ->fff_fallback | ||
1654 | |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). | ||
1655 | | cmplwi TMP2, 0 | ||
1656 | | stbx TMP0, CARG2, TMP2 | ||
1657 | | subi TMP2, TMP2, 1 | ||
1658 | | bne <1 | ||
1659 | | b ->fff_newstr | ||
1660 | |2: // Return empty string. | ||
1661 | | la STR:CRET1, DISPATCH_GL(strempty)(DISPATCH) | ||
1662 | | evmergelo CRET1, TISSTR, STR:CRET1 | ||
1663 | | b ->fff_restv | ||
1664 | | | ||
1665 | |.ffunc string_reverse | ||
1666 | | ffgccheck | ||
1667 | | cmplwi NARGS8:RC, 8 | ||
1668 | | evldd CARG1, 0(BASE) | ||
1669 | | blt ->fff_fallback | ||
1670 | | checkstr STR:CARG1 | ||
1671 | | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) | ||
1672 | | checkfail ->fff_fallback | ||
1673 | | lwz CARG3, STR:CARG1->len | ||
1674 | | la CARG1, #STR(STR:CARG1) | ||
1675 | | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) | ||
1676 | | li TMP2, 0 | ||
1677 | | cmplw TMP1, CARG3 | ||
1678 | | subi TMP3, CARG3, 1 | ||
1679 | | blt ->fff_fallback | ||
1680 | |1: // Reverse string copy. | ||
1681 | | cmpwi TMP3, 0 | ||
1682 | | lbzx TMP1, CARG1, TMP2 | ||
1683 | | blt ->fff_newstr | ||
1684 | | stbx TMP1, CARG2, TMP3 | ||
1685 | | subi TMP3, TMP3, 1 | ||
1686 | | addi TMP2, TMP2, 1 | ||
1687 | | b <1 | ||
1688 | | | ||
1689 | |.macro ffstring_case, name, lo | ||
1690 | | .ffunc name | ||
1691 | | ffgccheck | ||
1692 | | cmplwi NARGS8:RC, 8 | ||
1693 | | evldd CARG1, 0(BASE) | ||
1694 | | blt ->fff_fallback | ||
1695 | | checkstr STR:CARG1 | ||
1696 | | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) | ||
1697 | | checkfail ->fff_fallback | ||
1698 | | lwz CARG3, STR:CARG1->len | ||
1699 | | la CARG1, #STR(STR:CARG1) | ||
1700 | | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) | ||
1701 | | cmplw TMP1, CARG3 | ||
1702 | | li TMP2, 0 | ||
1703 | | blt ->fff_fallback | ||
1704 | |1: // ASCII case conversion. | ||
1705 | | cmplw TMP2, CARG3 | ||
1706 | | lbzx TMP1, CARG1, TMP2 | ||
1707 | | bge ->fff_newstr | ||
1708 | | subi TMP0, TMP1, lo | ||
1709 | | xori TMP3, TMP1, 0x20 | ||
1710 | | cmplwi TMP0, 26 | ||
1711 | | isellt TMP1, TMP3, TMP1 | ||
1712 | | stbx TMP1, CARG2, TMP2 | ||
1713 | | addi TMP2, TMP2, 1 | ||
1714 | | b <1 | ||
1715 | |.endmacro | ||
1716 | | | ||
1717 | |ffstring_case string_lower, 65 | ||
1718 | |ffstring_case string_upper, 97 | ||
1719 | | | ||
1720 | |//-- Table library ------------------------------------------------------ | ||
1721 | | | ||
1722 | |.ffunc_1 table_getn | ||
1723 | | checktab CARG1 | ||
1724 | | checkfail ->fff_fallback | ||
1725 | | bl extern lj_tab_len // (GCtab *t) | ||
1726 | | // Returns uint32_t (but less than 2^31). | ||
1727 | | efdcfsi CRET1, CRET1 | ||
1728 | | b ->fff_restv | ||
1729 | | | ||
1730 | |//-- Bit library -------------------------------------------------------- | ||
1731 | | | ||
1732 | |.macro .ffunc_bit, name | ||
1733 | | .ffunc_n bit_..name | ||
1734 | | efdadd CARG1, CARG1, TOBIT | ||
1735 | |.endmacro | ||
1736 | | | ||
1737 | |.ffunc_bit tobit | ||
1738 | |->fff_resbit: | ||
1739 | | efdcfsi CRET1, CARG1 | ||
1740 | | b ->fff_restv | ||
1741 | | | ||
1742 | |.macro .ffunc_bit_op, name, ins | ||
1743 | | .ffunc_bit name | ||
1744 | | li TMP1, 8 | ||
1745 | |1: | ||
1746 | | evlddx CARG2, BASE, TMP1 | ||
1747 | | cmplw cr1, TMP1, NARGS8:RC | ||
1748 | | checknum CARG2 | ||
1749 | | bge cr1, ->fff_resbit | ||
1750 | | checkfail ->fff_fallback | ||
1751 | | efdadd CARG2, CARG2, TOBIT | ||
1752 | | ins CARG1, CARG1, CARG2 | ||
1753 | | addi TMP1, TMP1, 8 | ||
1754 | | b <1 | ||
1755 | |.endmacro | ||
1756 | | | ||
1757 | |.ffunc_bit_op band, and | ||
1758 | |.ffunc_bit_op bor, or | ||
1759 | |.ffunc_bit_op bxor, xor | ||
1760 | | | ||
1761 | |.ffunc_bit bswap | ||
1762 | | rotlwi TMP0, CARG1, 8 | ||
1763 | | rlwimi TMP0, CARG1, 24, 0, 7 | ||
1764 | | rlwimi TMP0, CARG1, 24, 16, 23 | ||
1765 | | efdcfsi CRET1, TMP0 | ||
1766 | | b ->fff_restv | ||
1767 | | | ||
1768 | |.ffunc_bit bnot | ||
1769 | | not TMP0, CARG1 | ||
1770 | | efdcfsi CRET1, TMP0 | ||
1771 | | b ->fff_restv | ||
1772 | | | ||
1773 | |.macro .ffunc_bit_sh, name, ins, shmod | ||
1774 | | .ffunc_nn bit_..name | ||
1775 | | efdadd CARG2, CARG2, TOBIT | ||
1776 | | efdadd CARG1, CARG1, TOBIT | ||
1777 | |.if shmod == 1 | ||
1778 | | rlwinm CARG2, CARG2, 0, 27, 31 | ||
1779 | |.elif shmod == 2 | ||
1780 | | neg CARG2, CARG2 | ||
1781 | |.endif | ||
1782 | | ins TMP0, CARG1, CARG2 | ||
1783 | | efdcfsi CRET1, TMP0 | ||
1784 | | b ->fff_restv | ||
1785 | |.endmacro | ||
1786 | | | ||
1787 | |.ffunc_bit_sh lshift, slw, 1 | ||
1788 | |.ffunc_bit_sh rshift, srw, 1 | ||
1789 | |.ffunc_bit_sh arshift, sraw, 1 | ||
1790 | |.ffunc_bit_sh rol, rotlw, 0 | ||
1791 | |.ffunc_bit_sh ror, rotlw, 2 | ||
1792 | | | ||
1793 | |//----------------------------------------------------------------------- | ||
1794 | | | ||
1795 | |->fff_fallback: // Call fast function fallback handler. | ||
1796 | | // BASE = new base, RB = CFUNC, RC = nargs*8 | ||
1797 | | lwz TMP3, CFUNC:RB->f | ||
1798 | | add TMP1, BASE, NARGS8:RC | ||
1799 | | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC. | ||
1800 | | addi TMP0, TMP1, 8*LUA_MINSTACK | ||
1801 | | lwz TMP2, L->maxstack | ||
1802 | | stw PC, SAVE_PC // Redundant (but a defined value). | ||
1803 | | cmplw TMP0, TMP2 | ||
1804 | | stw BASE, L->base | ||
1805 | | stw TMP1, L->top | ||
1806 | | mr CARG1, L | ||
1807 | | bgt >5 // Need to grow stack. | ||
1808 | | mtctr TMP3 | ||
1809 | | bctrl // (lua_State *L) | ||
1810 | | // Either throws an error, or recovers and returns -1, 0 or nresults+1. | ||
1811 | | lwz BASE, L->base | ||
1812 | | cmpwi CRET1, 0 | ||
1813 | | slwi RD, CRET1, 3 | ||
1814 | | la RA, -8(BASE) | ||
1815 | | bgt ->fff_res // Returned nresults+1? | ||
1816 | |1: // Returned 0 or -1: retry fast path. | ||
1817 | | lwz TMP0, L->top | ||
1818 | | lwz LFUNC:RB, FRAME_FUNC(BASE) | ||
1819 | | sub NARGS8:RC, TMP0, BASE | ||
1820 | | bne ->vm_call_tail // Returned -1? | ||
1821 | | ins_callt // Returned 0: retry fast path. | ||
1822 | | | ||
1823 | |// Reconstruct previous base for vmeta_call during tailcall. | ||
1824 | |->vm_call_tail: | ||
1825 | | andi. TMP0, PC, FRAME_TYPE | ||
1826 | | rlwinm TMP1, PC, 0, 0, 28 | ||
1827 | | bne >3 | ||
1828 | | lwz INS, -4(PC) | ||
1829 | | decode_RA8 TMP1, INS | ||
1830 | | addi TMP1, TMP1, 8 | ||
1831 | |3: | ||
1832 | | sub TMP2, BASE, TMP1 | ||
1833 | | b ->vm_call_dispatch // Resolve again for tailcall. | ||
1834 | | | ||
1835 | |5: // Grow stack for fallback handler. | ||
1836 | | li CARG2, LUA_MINSTACK | ||
1837 | | bl extern lj_state_growstack // (lua_State *L, int n) | ||
1838 | | lwz BASE, L->base | ||
1839 | | cmpw TMP0, TMP0 // Set 4*cr0+eq to force retry. | ||
1840 | | b <1 | ||
1841 | | | ||
1842 | |->fff_gcstep: // Call GC step function. | ||
1843 | | // BASE = new base, RC = nargs*8 | ||
1844 | | mflr SAVE0 | ||
1845 | | stw BASE, L->base | ||
1846 | | add TMP0, BASE, NARGS8:RC | ||
1847 | | stw PC, SAVE_PC // Redundant (but a defined value). | ||
1848 | | stw TMP0, L->top | ||
1849 | | mr CARG1, L | ||
1850 | | bl extern lj_gc_step // (lua_State *L) | ||
1851 | | lwz BASE, L->base | ||
1852 | | mtlr SAVE0 | ||
1853 | | lwz TMP0, L->top | ||
1854 | | sub NARGS8:RC, TMP0, BASE | ||
1855 | | lwz CFUNC:RB, FRAME_FUNC(BASE) | ||
1856 | | blr | ||
1857 | | | ||
1858 | |//----------------------------------------------------------------------- | ||
1859 | |//-- Special dispatch targets ------------------------------------------- | ||
1860 | |//----------------------------------------------------------------------- | ||
1861 | | | ||
1862 | |->vm_record: // Dispatch target for recording phase. | ||
1863 | |.if JIT | ||
1864 | | NYI | ||
1865 | |.endif | ||
1866 | | | ||
1867 | |->vm_rethook: // Dispatch target for return hooks. | ||
1868 | | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) | ||
1869 | | andi. TMP0, TMP3, HOOK_ACTIVE // Hook already active? | ||
1870 | | beq >1 | ||
1871 | |5: // Re-dispatch to static ins. | ||
1872 | | addi TMP1, TMP1, GG_DISP2STATIC // Assumes decode_OP4 TMP1, INS. | ||
1873 | | lwzx TMP0, DISPATCH, TMP1 | ||
1874 | | mtctr TMP0 | ||
1875 | | bctr | ||
1876 | | | ||
1877 | |->vm_inshook: // Dispatch target for instr/line hooks. | ||
1878 | | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) | ||
1879 | | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH) | ||
1880 | | andi. TMP0, TMP3, HOOK_ACTIVE // Hook already active? | ||
1881 | | rlwinm TMP0, TMP3, 31-LUA_HOOKLINE, 31, 0 | ||
1882 | | bne <5 | ||
1883 | | | ||
1884 | | cmpwi cr1, TMP0, 0 | ||
1885 | | addic. TMP2, TMP2, -1 | ||
1886 | | beq cr1, <5 | ||
1887 | | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) | ||
1888 | | beq >1 | ||
1889 | | bge cr1, <5 | ||
1890 | |1: | ||
1891 | | mr CARG1, L | ||
1892 | | stw MULTRES, SAVE_MULTRES | ||
1893 | | mr CARG2, PC | ||
1894 | | stw BASE, L->base | ||
1895 | | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. | ||
1896 | | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) | ||
1897 | |3: | ||
1898 | | lwz BASE, L->base | ||
1899 | |4: // Re-dispatch to static ins. | ||
1900 | | lwz INS, -4(PC) | ||
1901 | | decode_OP4 TMP1, INS | ||
1902 | | decode_RB8 RB, INS | ||
1903 | | addi TMP1, TMP1, GG_DISP2STATIC | ||
1904 | | decode_RD8 RD, INS | ||
1905 | | lwzx TMP0, DISPATCH, TMP1 | ||
1906 | | decode_RA8 RA, INS | ||
1907 | | decode_RC8 RC, INS | ||
1908 | | mtctr TMP0 | ||
1909 | | bctr | ||
1910 | | | ||
1911 | |->cont_hook: // Continue from hook yield. | ||
1912 | | addi PC, PC, 4 | ||
1913 | | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins. | ||
1914 | | b <4 | ||
1915 | | | ||
1916 | |->vm_hotloop: // Hot loop counter underflow. | ||
1917 | |.if JIT | ||
1918 | | NYI | ||
1919 | |.endif | ||
1920 | | | ||
1921 | |->vm_callhook: // Dispatch target for call hooks. | ||
1922 | | mr CARG2, PC | ||
1923 | |.if JIT | ||
1924 | | b >1 | ||
1925 | |.endif | ||
1926 | | | ||
1927 | |->vm_hotcall: // Hot call counter underflow. | ||
1928 | |.if JIT | ||
1929 | | ori CARG2, PC, 1 | ||
1930 | |1: | ||
1931 | |.endif | ||
1932 | | add TMP0, BASE, RC | ||
1933 | | stw PC, SAVE_PC | ||
1934 | | mr CARG1, L | ||
1935 | | stw BASE, L->base | ||
1936 | | sub RA, RA, BASE | ||
1937 | | stw TMP0, L->top | ||
1938 | | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) | ||
1939 | | // Returns ASMFunction. | ||
1940 | | lwz BASE, L->base | ||
1941 | | lwz TMP0, L->top | ||
1942 | | stw ZERO, SAVE_PC // Invalidate for subsequent line hook. | ||
1943 | | sub NARGS8:RC, TMP0, BASE | ||
1944 | | add RA, BASE, RA | ||
1945 | | lwz LFUNC:RB, FRAME_FUNC(BASE) | ||
1946 | | mtctr CRET1 | ||
1947 | | bctr | ||
1948 | | | ||
1949 | |//----------------------------------------------------------------------- | ||
1950 | |//-- Trace exit handler ------------------------------------------------- | ||
1951 | |//----------------------------------------------------------------------- | ||
1952 | | | ||
1953 | |->vm_exit_handler: | ||
1954 | |.if JIT | ||
1955 | | NYI | ||
1956 | |.endif | ||
1957 | |->vm_exit_interp: | ||
1958 | |.if JIT | ||
1959 | | NYI | ||
1960 | |.endif | ||
1961 | | | ||
1962 | |//----------------------------------------------------------------------- | ||
1963 | |//-- Math helper functions ---------------------------------------------- | ||
1964 | |//----------------------------------------------------------------------- | ||
1965 | | | ||
1966 | |// FP value rounding. Called by math.floor/math.ceil fast functions | ||
1967 | |// and from JIT code. | ||
1968 | |// | ||
1969 | |// This can be inlined if the CPU has the frin/friz/frip/frim instructions. | ||
1970 | |// The alternative hard-float approaches have a deep dependency chain. | ||
1971 | |// The resulting latency is at least 3x-7x the double-precision FP latency | ||
1972 | |// (e500v2: 6cy, e600: 5cy, Cell: 10cy) or around 20-70 cycles. | ||
1973 | |// | ||
1974 | |// The soft-float approach is tedious, but much faster (e500v2: ~11cy/~6cy). | ||
1975 | |// However it relies on a fast way to transfer the FP value to GPRs | ||
1976 | |// (e500v2: 0cy for lo-word, 1cy for hi-word). | ||
1977 | |// | ||
1978 | |.macro vm_round, name, mode | ||
1979 | | // Used temporaries: TMP0, TMP1, TMP2, TMP3. | ||
1980 | |->name.._efd: // Input: CARG2, output: CRET2 | ||
1981 | | evmergehi CARG1, CARG2, CARG2 | ||
1982 | |->name.._hilo: | ||
1983 | | // Input: CARG1 (hi), CARG2 (hi, lo), output: CRET2 | ||
1984 | | rlwinm TMP2, CARG1, 12, 21, 31 | ||
1985 | | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023 | ||
1986 | | li TMP1, -1 | ||
1987 | | cmplwi cr1, TMP2, 51 // 0 <= exp <= 51? | ||
1988 | | subfic TMP0, TMP2, 52 | ||
1989 | | bgt cr1, >1 | ||
1990 | | lus TMP3, 0xfff0 | ||
1991 | | slw TMP0, TMP1, TMP0 // lomask = -1 << (52-exp) | ||
1992 | | sraw TMP1, TMP3, TMP2 // himask = (int32_t)0xfff00000 >> exp | ||
1993 | |.if mode == 2 // trunc(x): | ||
1994 | | evmergelo TMP0, TMP1, TMP0 | ||
1995 | | evand CRET2, CARG2, TMP0 // hi &= himask, lo &= lomask | ||
1996 | |.else | ||
1997 | | andc TMP2, CARG2, TMP0 | ||
1998 | | andc TMP3, CARG1, TMP1 | ||
1999 | | or TMP2, TMP2, TMP3 // ztest = (hi&~himask) | (lo&~lomask) | ||
2000 | | srawi TMP3, CARG1, 31 // signmask = (int32_t)hi >> 31 | ||
2001 | |.if mode == 0 // floor(x): | ||
2002 | | and. TMP2, TMP2, TMP3 // iszero = ((ztest & signmask) == 0) | ||
2003 | |.else // ceil(x): | ||
2004 | | andc. TMP2, TMP2, TMP3 // iszero = ((ztest & ~signmask) == 0) | ||
2005 | |.endif | ||
2006 | | and CARG2, CARG2, TMP0 // lo &= lomask | ||
2007 | | and CARG1, CARG1, TMP1 // hi &= himask | ||
2008 | | subc TMP0, CARG2, TMP0 | ||
2009 | | iseleq TMP0, CARG2, TMP0 // lo = iszero ? lo : lo-lomask | ||
2010 | | sube TMP1, CARG1, TMP1 | ||
2011 | | iseleq TMP1, CARG1, TMP1 // hi = iszero ? hi : hi-himask+carry | ||
2012 | | evmergelo CRET2, TMP1, TMP0 | ||
2013 | |.endif | ||
2014 | | blr | ||
2015 | |1: | ||
2016 | | bgtlr // Already done if >=2^52, +-inf or nan. | ||
2017 | |.if mode == 2 // trunc(x): | ||
2018 | | rlwinm TMP1, CARG1, 0, 0, 0 // hi = sign(x) | ||
2019 | | li TMP0, 0 | ||
2020 | | evmergelo CRET2, TMP1, TMP0 | ||
2021 | |.else | ||
2022 | | rlwinm TMP2, CARG1, 0, 1, 31 | ||
2023 | | srawi TMP0, CARG1, 31 // signmask = (int32_t)hi >> 31 | ||
2024 | | or TMP2, TMP2, CARG2 // ztest = abs(hi) | lo | ||
2025 | | lus TMP1, 0x3ff0 | ||
2026 | |.if mode == 0 // floor(x): | ||
2027 | | and. TMP2, TMP2, TMP0 // iszero = ((ztest & signmask) == 0) | ||
2028 | |.else // ceil(x): | ||
2029 | | andc. TMP2, TMP2, TMP0 // iszero = ((ztest & ~signmask) == 0) | ||
2030 | |.endif | ||
2031 | | li TMP0, 0 | ||
2032 | | iseleq TMP1, r0, TMP1 | ||
2033 | | rlwimi CARG1, TMP1, 0, 1, 31 // hi = sign(x) | (iszero ? 0.0 : 1.0) | ||
2034 | | evmergelo CRET2, CARG1, TMP0 | ||
2035 | |.endif | ||
2036 | | blr | ||
2037 | |.endmacro | ||
2038 | | | ||
2039 | |->vm_floor: | ||
2040 | | mflr CARG3 | ||
2041 | | evmergelo CARG2, CARG1, CARG2 | ||
2042 | | bl ->vm_floor_hilo | ||
2043 | | mtlr CARG3 | ||
2044 | | evmergehi CRET1, CRET2, CRET2 | ||
2045 | | blr | ||
2046 | | | ||
2047 | | vm_round vm_floor, 0 | ||
2048 | | vm_round vm_ceil, 1 | ||
2049 | |.if JIT | ||
2050 | | vm_round vm_trunc, 2 | ||
2051 | |.else | ||
2052 | |->vm_trunc_efd: | ||
2053 | |->vm_trunc_hilo: | ||
2054 | |.endif | ||
2055 | | | ||
2056 | |//----------------------------------------------------------------------- | ||
2057 | |//-- Miscellaneous functions -------------------------------------------- | ||
2058 | |//----------------------------------------------------------------------- | ||
2059 | | | ||
2060 | |//----------------------------------------------------------------------- | ||
2061 | |//-- FFI helper functions ----------------------------------------------- | ||
2062 | |//----------------------------------------------------------------------- | ||
2063 | | | ||
2064 | |->vm_ffi_call: | ||
2065 | |.if FFI | ||
2066 | | NYI | ||
2067 | |.endif | ||
2068 | | | ||
2069 | |//----------------------------------------------------------------------- | ||
2070 | } | ||
2071 | |||
2072 | /* Generate the code for a single instruction. */ | ||
2073 | static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ||
2074 | { | ||
2075 | int vk = 0; | ||
2076 | |=>defop: | ||
2077 | |||
2078 | switch (op) { | ||
2079 | |||
2080 | /* -- Comparison ops ---------------------------------------------------- */ | ||
2081 | |||
2082 | /* Remember: all ops branch for a true comparison, fall through otherwise. */ | ||
2083 | |||
2084 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | ||
2085 | | // RA = src1*8, RD = src2*8, JMP with RD = target | ||
2086 | | evlddx TMP0, BASE, RA | ||
2087 | | addi PC, PC, 4 | ||
2088 | | evlddx TMP1, BASE, RD | ||
2089 | | addis TMP3, PC, -(BCBIAS_J*4 >> 16) | ||
2090 | | lwz TMP2, -4(PC) | ||
2091 | | evmergehi RB, TMP0, TMP1 | ||
2092 | | decode_RD4 TMP2, TMP2 | ||
2093 | | checknum RB | ||
2094 | | add TMP2, TMP2, TMP3 | ||
2095 | | checkanyfail ->vmeta_comp | ||
2096 | | efdcmplt TMP0, TMP1 | ||
2097 | if (op == BC_ISLE || op == BC_ISGT) { | ||
2098 | | efdcmpeq cr1, TMP0, TMP1 | ||
2099 | | cror 4*cr0+gt, 4*cr0+gt, 4*cr1+gt | ||
2100 | } | ||
2101 | if (op == BC_ISLT || op == BC_ISLE) { | ||
2102 | | iselgt PC, TMP2, PC | ||
2103 | } else { | ||
2104 | | iselgt PC, PC, TMP2 | ||
2105 | } | ||
2106 | | ins_next | ||
2107 | break; | ||
2108 | |||
2109 | case BC_ISEQV: case BC_ISNEV: | ||
2110 | vk = op == BC_ISEQV; | ||
2111 | | // RA = src1*8, RD = src2*8, JMP with RD = target | ||
2112 | | evlddx CARG2, BASE, RA | ||
2113 | | addi PC, PC, 4 | ||
2114 | | evlddx CARG3, BASE, RD | ||
2115 | | addis TMP3, PC, -(BCBIAS_J*4 >> 16) | ||
2116 | | lwz TMP2, -4(PC) | ||
2117 | | evmergehi RB, CARG2, CARG3 | ||
2118 | | decode_RD4 TMP2, TMP2 | ||
2119 | | checknum RB | ||
2120 | | add TMP2, TMP2, TMP3 | ||
2121 | | checkanyfail >5 | ||
2122 | | efdcmpeq CARG2, CARG3 | ||
2123 | if (vk) { | ||
2124 | | iselgt PC, TMP2, PC | ||
2125 | } else { | ||
2126 | | iselgt PC, PC, TMP2 | ||
2127 | } | ||
2128 | |1: | ||
2129 | | ins_next | ||
2130 | | | ||
2131 | |5: // Either or both types are not numbers. | ||
2132 | | evcmpeq CARG2, CARG3 | ||
2133 | | not TMP3, RB | ||
2134 | | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? | ||
2135 | | crorc 4*cr7+lt, 4*cr0+so, 4*cr0+lt // 1: Same tv or different type. | ||
2136 | | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? | ||
2137 | | crandc 4*cr7+gt, 4*cr0+lt, 4*cr1+gt // 2: Same type and primitive. | ||
2138 | | mr SAVE0, PC | ||
2139 | if (vk) { | ||
2140 | | isel PC, TMP2, PC, 4*cr7+gt | ||
2141 | } else { | ||
2142 | | isel TMP2, PC, TMP2, 4*cr7+gt | ||
2143 | } | ||
2144 | | cror 4*cr7+lt, 4*cr7+lt, 4*cr7+gt // 1 or 2. | ||
2145 | if (vk) { | ||
2146 | | isel PC, TMP2, PC, 4*cr0+so | ||
2147 | } else { | ||
2148 | | isel PC, PC, TMP2, 4*cr0+so | ||
2149 | } | ||
2150 | | blt cr7, <1 // Done if 1 or 2. | ||
2151 | | blt cr6, <1 // Done if not tab/ud. | ||
2152 | | | ||
2153 | | // Different tables or userdatas. Need to check __eq metamethod. | ||
2154 | | // Field metatable must be at same offset for GCtab and GCudata! | ||
2155 | | lwz TAB:TMP2, TAB:CARG2->metatable | ||
2156 | | li CARG4, 1-vk // ne = 0 or 1. | ||
2157 | | cmplwi TAB:TMP2, 0 | ||
2158 | | beq <1 // No metatable? | ||
2159 | | lbz TMP2, TAB:TMP2->nomm | ||
2160 | | andi. TMP2, TMP2, 1<<MM_eq | ||
2161 | | bne <1 // Or 'no __eq' flag set? | ||
2162 | | mr PC, SAVE0 // Restore old PC. | ||
2163 | | b ->vmeta_equal // Handle __eq metamethod. | ||
2164 | break; | ||
2165 | |||
2166 | case BC_ISEQS: case BC_ISNES: | ||
2167 | vk = op == BC_ISEQS; | ||
2168 | | // RA = src*8, RD = str_const*8 (~), JMP with RD = target | ||
2169 | | evlddx TMP0, BASE, RA | ||
2170 | | srwi RD, RD, 1 | ||
2171 | | lwz INS, 0(PC) | ||
2172 | | subfic RD, RD, -4 | ||
2173 | | addi PC, PC, 4 | ||
2174 | | lwzx STR:TMP1, KBASE, RD // KBASE-4-str_const*4 | ||
2175 | | addis TMP3, PC, -(BCBIAS_J*4 >> 16) | ||
2176 | | decode_RD4 TMP2, INS | ||
2177 | | evmergelo STR:TMP1, TISSTR, STR:TMP1 | ||
2178 | | add TMP2, TMP2, TMP3 | ||
2179 | | evcmpeq TMP0, STR:TMP1 | ||
2180 | if (vk) { | ||
2181 | | isel PC, TMP2, PC, 4*cr0+so | ||
2182 | } else { | ||
2183 | | isel PC, PC, TMP2, 4*cr0+so | ||
2184 | } | ||
2185 | | ins_next | ||
2186 | break; | ||
2187 | |||
2188 | case BC_ISEQN: case BC_ISNEN: | ||
2189 | vk = op == BC_ISEQN; | ||
2190 | | // RA = src*8, RD = num_const*8, JMP with RD = target | ||
2191 | | evlddx TMP0, BASE, RA | ||
2192 | | addi PC, PC, 4 | ||
2193 | | evlddx TMP1, KBASE, RD | ||
2194 | | addis TMP3, PC, -(BCBIAS_J*4 >> 16) | ||
2195 | | lwz INS, -4(PC) | ||
2196 | | checknum TMP0 | ||
2197 | | checkfail >5 | ||
2198 | | efdcmpeq TMP0, TMP1 | ||
2199 | |1: | ||
2200 | | decode_RD4 TMP2, INS | ||
2201 | | add TMP2, TMP2, TMP3 | ||
2202 | if (vk) { | ||
2203 | | iselgt PC, TMP2, PC | ||
2204 | |5: | ||
2205 | } else { | ||
2206 | | iselgt PC, PC, TMP2 | ||
2207 | } | ||
2208 | |3: | ||
2209 | | ins_next | ||
2210 | if (!vk) { | ||
2211 | |5: | ||
2212 | | decode_RD4 TMP2, INS | ||
2213 | | add PC, TMP2, TMP3 | ||
2214 | | b <3 | ||
2215 | } | ||
2216 | break; | ||
2217 | |||
2218 | case BC_ISEQP: case BC_ISNEP: | ||
2219 | vk = op == BC_ISEQP; | ||
2220 | | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target | ||
2221 | | lwzx TMP0, BASE, RA | ||
2222 | | srwi TMP1, RD, 3 | ||
2223 | | lwz INS, 0(PC) | ||
2224 | | addi PC, PC, 4 | ||
2225 | | not TMP1, TMP1 | ||
2226 | | addis TMP3, PC, -(BCBIAS_J*4 >> 16) | ||
2227 | | cmplw TMP0, TMP1 | ||
2228 | | decode_RD4 TMP2, INS | ||
2229 | | add TMP2, TMP2, TMP3 | ||
2230 | if (vk) { | ||
2231 | | iseleq PC, TMP2, PC | ||
2232 | } else { | ||
2233 | | iseleq PC, PC, TMP2 | ||
2234 | } | ||
2235 | | ins_next | ||
2236 | break; | ||
2237 | |||
2238 | /* -- Unary test and copy ops ------------------------------------------- */ | ||
2239 | |||
2240 | case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: | ||
2241 | | // RA = dst*8 or unused, RD = src*8, JMP with RD = target | ||
2242 | | evlddx TMP0, BASE, RD | ||
2243 | | evaddw TMP1, TISNIL, TISNIL // Synthesize LJ_TFALSE. | ||
2244 | | lwz INS, 0(PC) | ||
2245 | | evcmpltu TMP0, TMP1 | ||
2246 | | addi PC, PC, 4 | ||
2247 | if (op == BC_IST || op == BC_ISF) { | ||
2248 | | addis TMP3, PC, -(BCBIAS_J*4 >> 16) | ||
2249 | | decode_RD4 TMP2, INS | ||
2250 | | add TMP2, TMP2, TMP3 | ||
2251 | if (op == BC_IST) { | ||
2252 | | isellt PC, TMP2, PC | ||
2253 | } else { | ||
2254 | | isellt PC, PC, TMP2 | ||
2255 | } | ||
2256 | } else { | ||
2257 | if (op == BC_ISTC) { | ||
2258 | | checkfail >1 | ||
2259 | } else { | ||
2260 | | checkok >1 | ||
2261 | } | ||
2262 | | addis PC, PC, -(BCBIAS_J*4 >> 16) | ||
2263 | | decode_RD4 TMP2, INS | ||
2264 | | evstddx TMP0, BASE, RA | ||
2265 | | add PC, PC, TMP2 | ||
2266 | |1: | ||
2267 | } | ||
2268 | | ins_next | ||
2269 | break; | ||
2270 | |||
2271 | /* -- Unary ops --------------------------------------------------------- */ | ||
2272 | |||
2273 | case BC_MOV: | ||
2274 | | // RA = dst*8, RD = src*8 | ||
2275 | | ins_next1 | ||
2276 | | evlddx TMP0, BASE, RD | ||
2277 | | evstddx TMP0, BASE, RA | ||
2278 | | ins_next2 | ||
2279 | break; | ||
2280 | case BC_NOT: | ||
2281 | | // RA = dst*8, RD = src*8 | ||
2282 | | ins_next1 | ||
2283 | | lwzx TMP0, BASE, RD | ||
2284 | | subfic TMP1, TMP0, LJ_TTRUE | ||
2285 | | adde TMP0, TMP0, TMP1 | ||
2286 | | stwx TMP0, BASE, RA | ||
2287 | | ins_next2 | ||
2288 | break; | ||
2289 | case BC_UNM: | ||
2290 | | // RA = dst*8, RD = src*8 | ||
2291 | | evlddx TMP0, BASE, RD | ||
2292 | | checknum TMP0 | ||
2293 | | checkfail ->vmeta_unm | ||
2294 | | efdneg TMP0, TMP0 | ||
2295 | | ins_next1 | ||
2296 | | evstddx TMP0, BASE, RA | ||
2297 | | ins_next2 | ||
2298 | break; | ||
2299 | case BC_LEN: | ||
2300 | | // RA = dst*8, RD = src*8 | ||
2301 | | evlddx CARG1, BASE, RD | ||
2302 | | checkstr CARG1 | ||
2303 | | checkfail >2 | ||
2304 | | lwz CRET1, STR:CARG1->len | ||
2305 | |1: | ||
2306 | | ins_next1 | ||
2307 | | efdcfsi TMP0, CRET1 | ||
2308 | | evstddx TMP0, BASE, RA | ||
2309 | | ins_next2 | ||
2310 | |2: | ||
2311 | | checktab CARG1 | ||
2312 | | checkfail ->vmeta_len | ||
2313 | #if LJ_52 | ||
2314 | | lwz TAB:TMP2, TAB:CARG1->metatable | ||
2315 | | cmplwi TAB:TMP2, 0 | ||
2316 | | bne >9 | ||
2317 | |3: | ||
2318 | #endif | ||
2319 | |->BC_LEN_Z: | ||
2320 | | bl extern lj_tab_len // (GCtab *t) | ||
2321 | | // Returns uint32_t (but less than 2^31). | ||
2322 | | b <1 | ||
2323 | #if LJ_52 | ||
2324 | |9: | ||
2325 | | lbz TMP0, TAB:TMP2->nomm | ||
2326 | | andi. TMP0, TMP0, 1<<MM_len | ||
2327 | | bne <3 // 'no __len' flag set: done. | ||
2328 | | b ->vmeta_len | ||
2329 | #endif | ||
2330 | break; | ||
2331 | |||
2332 | /* -- Binary ops -------------------------------------------------------- */ | ||
2333 | |||
2334 | |.macro ins_arithpre, t0, t1 | ||
2335 | | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 | ||
2336 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | ||
2337 | ||switch (vk) { | ||
2338 | ||case 0: | ||
2339 | | evlddx t0, BASE, RB | ||
2340 | | checknum t0 | ||
2341 | | evlddx t1, KBASE, RC | ||
2342 | | checkfail ->vmeta_arith_vn | ||
2343 | || break; | ||
2344 | ||case 1: | ||
2345 | | evlddx t1, BASE, RB | ||
2346 | | checknum t1 | ||
2347 | | evlddx t0, KBASE, RC | ||
2348 | | checkfail ->vmeta_arith_nv | ||
2349 | || break; | ||
2350 | ||default: | ||
2351 | | evlddx t0, BASE, RB | ||
2352 | | evlddx t1, BASE, RC | ||
2353 | | evmergehi TMP2, t0, t1 | ||
2354 | | checknum TMP2 | ||
2355 | | checkanyfail ->vmeta_arith_vv | ||
2356 | || break; | ||
2357 | ||} | ||
2358 | |.endmacro | ||
2359 | | | ||
2360 | |.macro ins_arith, ins | ||
2361 | | ins_arithpre TMP0, TMP1 | ||
2362 | | ins_next1 | ||
2363 | | ins TMP0, TMP0, TMP1 | ||
2364 | | evstddx TMP0, BASE, RA | ||
2365 | | ins_next2 | ||
2366 | |.endmacro | ||
2367 | |||
2368 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | ||
2369 | | ins_arith efdadd | ||
2370 | break; | ||
2371 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | ||
2372 | | ins_arith efdsub | ||
2373 | break; | ||
2374 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | ||
2375 | | ins_arith efdmul | ||
2376 | break; | ||
2377 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | ||
2378 | | ins_arith efddiv | ||
2379 | break; | ||
2380 | case BC_MODVN: | ||
2381 | | ins_arithpre RD, SAVE0 | ||
2382 | |->BC_MODVN_Z: | ||
2383 | | efddiv CARG2, RD, SAVE0 | ||
2384 | | bl ->vm_floor_efd // floor(b/c) | ||
2385 | | efdmul TMP0, CRET2, SAVE0 | ||
2386 | | ins_next1 | ||
2387 | | efdsub TMP0, RD, TMP0 // b - floor(b/c)*c | ||
2388 | | evstddx TMP0, BASE, RA | ||
2389 | | ins_next2 | ||
2390 | break; | ||
2391 | case BC_MODNV: case BC_MODVV: | ||
2392 | | ins_arithpre RD, SAVE0 | ||
2393 | | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | ||
2394 | break; | ||
2395 | case BC_POW: | ||
2396 | | evlddx CARG2, BASE, RB | ||
2397 | | evlddx CARG4, BASE, RC | ||
2398 | | evmergehi CARG1, CARG4, CARG2 | ||
2399 | | checknum CARG1 | ||
2400 | | evmergehi CARG3, CARG4, CARG4 | ||
2401 | | checkanyfail ->vmeta_arith_vv | ||
2402 | | bl extern pow@plt | ||
2403 | | evmergelo CRET2, CRET1, CRET2 | ||
2404 | | evstddx CRET2, BASE, RA | ||
2405 | | ins_next | ||
2406 | break; | ||
2407 | |||
2408 | case BC_CAT: | ||
2409 | | // RA = dst*8, RB = src_start*8, RC = src_end*8 | ||
2410 | | sub CARG3, RC, RB | ||
2411 | | stw BASE, L->base | ||
2412 | | add CARG2, BASE, RC | ||
2413 | | mr SAVE0, RB | ||
2414 | |->BC_CAT_Z: | ||
2415 | | stw PC, SAVE_PC | ||
2416 | | mr CARG1, L | ||
2417 | | srwi CARG3, CARG3, 3 | ||
2418 | | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) | ||
2419 | | // Returns NULL (finished) or TValue * (metamethod). | ||
2420 | | cmplwi CRET1, 0 | ||
2421 | | lwz BASE, L->base | ||
2422 | | bne ->vmeta_binop | ||
2423 | | evlddx TMP0, BASE, SAVE0 // Copy result from RB to RA. | ||
2424 | | evstddx TMP0, BASE, RA | ||
2425 | | ins_next | ||
2426 | break; | ||
2427 | |||
2428 | /* -- Constant ops ------------------------------------------------------ */ | ||
2429 | |||
2430 | case BC_KSTR: | ||
2431 | | // RA = dst*8, RD = str_const*8 (~) | ||
2432 | | ins_next1 | ||
2433 | | srwi TMP1, RD, 1 | ||
2434 | | subfic TMP1, TMP1, -4 | ||
2435 | | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4 | ||
2436 | | evmergelo TMP0, TISSTR, TMP0 | ||
2437 | | evstddx TMP0, BASE, RA | ||
2438 | | ins_next2 | ||
2439 | break; | ||
2440 | case BC_KCDATA: | ||
2441 | |.if FFI | ||
2442 | | // RA = dst*8, RD = cdata_const*8 (~) | ||
2443 | | ins_next1 | ||
2444 | | srwi TMP1, RD, 1 | ||
2445 | | subfic TMP1, TMP1, -4 | ||
2446 | | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4 | ||
2447 | | li TMP2, LJ_TCDATA | ||
2448 | | evmergelo TMP0, TMP2, TMP0 | ||
2449 | | evstddx TMP0, BASE, RA | ||
2450 | | ins_next2 | ||
2451 | |.endif | ||
2452 | break; | ||
2453 | case BC_KSHORT: | ||
2454 | | // RA = dst*8, RD = int16_literal*8 | ||
2455 | | srwi TMP1, RD, 3 | ||
2456 | | extsh TMP1, TMP1 | ||
2457 | | ins_next1 | ||
2458 | | efdcfsi TMP0, TMP1 | ||
2459 | | evstddx TMP0, BASE, RA | ||
2460 | | ins_next2 | ||
2461 | break; | ||
2462 | case BC_KNUM: | ||
2463 | | // RA = dst*8, RD = num_const*8 | ||
2464 | | evlddx TMP0, KBASE, RD | ||
2465 | | ins_next1 | ||
2466 | | evstddx TMP0, BASE, RA | ||
2467 | | ins_next2 | ||
2468 | break; | ||
2469 | case BC_KPRI: | ||
2470 | | // RA = dst*8, RD = primitive_type*8 (~) | ||
2471 | | srwi TMP1, RD, 3 | ||
2472 | | not TMP0, TMP1 | ||
2473 | | ins_next1 | ||
2474 | | stwx TMP0, BASE, RA | ||
2475 | | ins_next2 | ||
2476 | break; | ||
2477 | case BC_KNIL: | ||
2478 | | // RA = base*8, RD = end*8 | ||
2479 | | evstddx TISNIL, BASE, RA | ||
2480 | | addi RA, RA, 8 | ||
2481 | |1: | ||
2482 | | evstddx TISNIL, BASE, RA | ||
2483 | | cmpw RA, RD | ||
2484 | | addi RA, RA, 8 | ||
2485 | | blt <1 | ||
2486 | | ins_next_ | ||
2487 | break; | ||
2488 | |||
2489 | /* -- Upvalue and function ops ------------------------------------------ */ | ||
2490 | |||
2491 | case BC_UGET: | ||
2492 | | // RA = dst*8, RD = uvnum*8 | ||
2493 | | ins_next1 | ||
2494 | | lwz LFUNC:RB, FRAME_FUNC(BASE) | ||
2495 | | srwi RD, RD, 1 | ||
2496 | | addi RD, RD, offsetof(GCfuncL, uvptr) | ||
2497 | | lwzx UPVAL:RB, LFUNC:RB, RD | ||
2498 | | lwz TMP1, UPVAL:RB->v | ||
2499 | | evldd TMP0, 0(TMP1) | ||
2500 | | evstddx TMP0, BASE, RA | ||
2501 | | ins_next2 | ||
2502 | break; | ||
2503 | case BC_USETV: | ||
2504 | | // RA = uvnum*8, RD = src*8 | ||
2505 | | lwz LFUNC:RB, FRAME_FUNC(BASE) | ||
2506 | | srwi RA, RA, 1 | ||
2507 | | addi RA, RA, offsetof(GCfuncL, uvptr) | ||
2508 | | evlddx TMP1, BASE, RD | ||
2509 | | lwzx UPVAL:RB, LFUNC:RB, RA | ||
2510 | | lbz TMP3, UPVAL:RB->marked | ||
2511 | | lwz CARG2, UPVAL:RB->v | ||
2512 | | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) | ||
2513 | | lbz TMP0, UPVAL:RB->closed | ||
2514 | | evmergehi TMP2, TMP1, TMP1 | ||
2515 | | evstdd TMP1, 0(CARG2) | ||
2516 | | cmplwi cr1, TMP0, 0 | ||
2517 | | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq | ||
2518 | | subi TMP2, TMP2, (LJ_TISNUM+1) | ||
2519 | | bne >2 // Upvalue is closed and black? | ||
2520 | |1: | ||
2521 | | ins_next | ||
2522 | | | ||
2523 | |2: // Check if new value is collectable. | ||
2524 | | cmplwi TMP2, LJ_TISGCV - (LJ_TISNUM+1) | ||
2525 | | bge <1 // tvisgcv(v) | ||
2526 | | lbz TMP3, GCOBJ:TMP1->gch.marked | ||
2527 | | andi. TMP3, TMP3, LJ_GC_WHITES // iswhite(v) | ||
2528 | | la CARG1, GG_DISP2G(DISPATCH) | ||
2529 | | // Crossed a write barrier. Move the barrier forward. | ||
2530 | | beq <1 | ||
2531 | | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) | ||
2532 | | b <1 | ||
2533 | break; | ||
2534 | case BC_USETS: | ||
2535 | | // RA = uvnum*8, RD = str_const*8 (~) | ||
2536 | | lwz LFUNC:RB, FRAME_FUNC(BASE) | ||
2537 | | srwi TMP1, RD, 1 | ||
2538 | | srwi RA, RA, 1 | ||
2539 | | subfic TMP1, TMP1, -4 | ||
2540 | | addi RA, RA, offsetof(GCfuncL, uvptr) | ||
2541 | | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4 | ||
2542 | | lwzx UPVAL:RB, LFUNC:RB, RA | ||
2543 | | evmergelo STR:TMP1, TISSTR, STR:TMP1 | ||
2544 | | lbz TMP3, UPVAL:RB->marked | ||
2545 | | lwz CARG2, UPVAL:RB->v | ||
2546 | | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) | ||
2547 | | lbz TMP3, STR:TMP1->marked | ||
2548 | | lbz TMP2, UPVAL:RB->closed | ||
2549 | | evstdd STR:TMP1, 0(CARG2) | ||
2550 | | bne >2 | ||
2551 | |1: | ||
2552 | | ins_next | ||
2553 | | | ||
2554 | |2: // Check if string is white and ensure upvalue is closed. | ||
2555 | | andi. TMP3, TMP3, LJ_GC_WHITES // iswhite(str) | ||
2556 | | cmplwi cr1, TMP2, 0 | ||
2557 | | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq | ||
2558 | | la CARG1, GG_DISP2G(DISPATCH) | ||
2559 | | // Crossed a write barrier. Move the barrier forward. | ||
2560 | | beq <1 | ||
2561 | | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) | ||
2562 | | b <1 | ||
2563 | break; | ||
2564 | case BC_USETN: | ||
2565 | | // RA = uvnum*8, RD = num_const*8 | ||
2566 | | ins_next1 | ||
2567 | | lwz LFUNC:RB, FRAME_FUNC(BASE) | ||
2568 | | srwi RA, RA, 1 | ||
2569 | | addi RA, RA, offsetof(GCfuncL, uvptr) | ||
2570 | | evlddx TMP0, KBASE, RD | ||
2571 | | lwzx UPVAL:RB, LFUNC:RB, RA | ||
2572 | | lwz TMP1, UPVAL:RB->v | ||
2573 | | evstdd TMP0, 0(TMP1) | ||
2574 | | ins_next2 | ||
2575 | break; | ||
2576 | case BC_USETP: | ||
2577 | | // RA = uvnum*8, RD = primitive_type*8 (~) | ||
2578 | | ins_next1 | ||
2579 | | lwz LFUNC:RB, FRAME_FUNC(BASE) | ||
2580 | | srwi RA, RA, 1 | ||
2581 | | addi RA, RA, offsetof(GCfuncL, uvptr) | ||
2582 | | srwi TMP0, RD, 3 | ||
2583 | | lwzx UPVAL:RB, LFUNC:RB, RA | ||
2584 | | not TMP0, TMP0 | ||
2585 | | lwz TMP1, UPVAL:RB->v | ||
2586 | | stw TMP0, 0(TMP1) | ||
2587 | | ins_next2 | ||
2588 | break; | ||
2589 | |||
2590 | case BC_UCLO: | ||
2591 | | // RA = level*8, RD = target | ||
2592 | | lwz TMP1, L->openupval | ||
2593 | | branch_RD // Do this first since RD is not saved. | ||
2594 | | stw BASE, L->base | ||
2595 | | cmplwi TMP1, 0 | ||
2596 | | mr CARG1, L | ||
2597 | | beq >1 | ||
2598 | | add CARG2, BASE, RA | ||
2599 | | bl extern lj_func_closeuv // (lua_State *L, TValue *level) | ||
2600 | | lwz BASE, L->base | ||
2601 | |1: | ||
2602 | | ins_next | ||
2603 | break; | ||
2604 | |||
2605 | case BC_FNEW: | ||
2606 | | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) | ||
2607 | | srwi TMP1, RD, 1 | ||
2608 | | stw BASE, L->base | ||
2609 | | subfic TMP1, TMP1, -4 | ||
2610 | | stw PC, SAVE_PC | ||
2611 | | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4 | ||
2612 | | mr CARG1, L | ||
2613 | | lwz CARG3, FRAME_FUNC(BASE) | ||
2614 | | // (lua_State *L, GCproto *pt, GCfuncL *parent) | ||
2615 | | bl extern lj_func_newL_gc | ||
2616 | | // Returns GCfuncL *. | ||
2617 | | lwz BASE, L->base | ||
2618 | | evmergelo LFUNC:CRET1, TISFUNC, LFUNC:CRET1 | ||
2619 | | evstddx LFUNC:CRET1, BASE, RA | ||
2620 | | ins_next | ||
2621 | break; | ||
2622 | |||
2623 | /* -- Table ops --------------------------------------------------------- */ | ||
2624 | |||
2625 | case BC_TNEW: | ||
2626 | case BC_TDUP: | ||
2627 | | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) | ||
2628 | | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH) | ||
2629 | | mr CARG1, L | ||
2630 | | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) | ||
2631 | | stw BASE, L->base | ||
2632 | | cmplw TMP0, TMP1 | ||
2633 | | stw PC, SAVE_PC | ||
2634 | | bge >5 | ||
2635 | |1: | ||
2636 | if (op == BC_TNEW) { | ||
2637 | | rlwinm CARG2, RD, 29, 21, 31 | ||
2638 | | rlwinm CARG3, RD, 18, 27, 31 | ||
2639 | | cmpwi CARG2, 0x7ff | ||
2640 | | li TMP1, 0x801 | ||
2641 | | iseleq CARG2, TMP1, CARG2 | ||
2642 | | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) | ||
2643 | | // Returns Table *. | ||
2644 | } else { | ||
2645 | | srwi TMP1, RD, 1 | ||
2646 | | subfic TMP1, TMP1, -4 | ||
2647 | | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4 | ||
2648 | | bl extern lj_tab_dup // (lua_State *L, Table *kt) | ||
2649 | | // Returns Table *. | ||
2650 | } | ||
2651 | | lwz BASE, L->base | ||
2652 | | evmergelo TAB:CRET1, TISTAB, TAB:CRET1 | ||
2653 | | evstddx TAB:CRET1, BASE, RA | ||
2654 | | ins_next | ||
2655 | |5: | ||
2656 | | mr SAVE0, RD | ||
2657 | | bl extern lj_gc_step_fixtop // (lua_State *L) | ||
2658 | | mr RD, SAVE0 | ||
2659 | | mr CARG1, L | ||
2660 | | b <1 | ||
2661 | break; | ||
2662 | |||
2663 | case BC_GGET: | ||
2664 | | // RA = dst*8, RD = str_const*8 (~) | ||
2665 | case BC_GSET: | ||
2666 | | // RA = src*8, RD = str_const*8 (~) | ||
2667 | | lwz LFUNC:TMP2, FRAME_FUNC(BASE) | ||
2668 | | srwi TMP1, RD, 1 | ||
2669 | | lwz TAB:RB, LFUNC:TMP2->env | ||
2670 | | subfic TMP1, TMP1, -4 | ||
2671 | | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 | ||
2672 | if (op == BC_GGET) { | ||
2673 | | b ->BC_TGETS_Z | ||
2674 | } else { | ||
2675 | | b ->BC_TSETS_Z | ||
2676 | } | ||
2677 | break; | ||
2678 | |||
2679 | case BC_TGETV: | ||
2680 | | // RA = dst*8, RB = table*8, RC = key*8 | ||
2681 | | evlddx TAB:RB, BASE, RB | ||
2682 | | evlddx RC, BASE, RC | ||
2683 | | checktab TAB:RB | ||
2684 | | checkfail ->vmeta_tgetv | ||
2685 | | checknum RC | ||
2686 | | checkfail >5 | ||
2687 | | // Convert number key to integer | ||
2688 | | efdctsi TMP2, RC | ||
2689 | | lwz TMP0, TAB:RB->asize | ||
2690 | | efdcfsi TMP1, TMP2 | ||
2691 | | cmplw cr0, TMP0, TMP2 | ||
2692 | | efdcmpeq cr1, RC, TMP1 | ||
2693 | | lwz TMP1, TAB:RB->array | ||
2694 | | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+gt | ||
2695 | | slwi TMP2, TMP2, 3 | ||
2696 | | ble ->vmeta_tgetv // Integer key and in array part? | ||
2697 | | evlddx TMP1, TMP1, TMP2 | ||
2698 | | checknil TMP1 | ||
2699 | | checkok >2 | ||
2700 | |1: | ||
2701 | | evstddx TMP1, BASE, RA | ||
2702 | | ins_next | ||
2703 | | | ||
2704 | |2: // Check for __index if table value is nil. | ||
2705 | | lwz TAB:TMP2, TAB:RB->metatable | ||
2706 | | cmplwi TAB:TMP2, 0 | ||
2707 | | beq <1 // No metatable: done. | ||
2708 | | lbz TMP0, TAB:TMP2->nomm | ||
2709 | | andi. TMP0, TMP0, 1<<MM_index | ||
2710 | | bne <1 // 'no __index' flag set: done. | ||
2711 | | b ->vmeta_tgetv | ||
2712 | | | ||
2713 | |5: | ||
2714 | | checkstr STR:RC // String key? | ||
2715 | | checkok ->BC_TGETS_Z | ||
2716 | | b ->vmeta_tgetv | ||
2717 | break; | ||
2718 | case BC_TGETS: | ||
2719 | | // RA = dst*8, RB = table*8, RC = str_const*8 (~) | ||
2720 | | evlddx TAB:RB, BASE, RB | ||
2721 | | srwi TMP1, RC, 1 | ||
2722 | | checktab TAB:RB | ||
2723 | | subfic TMP1, TMP1, -4 | ||
2724 | | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 | ||
2725 | | checkfail ->vmeta_tgets1 | ||
2726 | |->BC_TGETS_Z: | ||
2727 | | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 | ||
2728 | | lwz TMP0, TAB:RB->hmask | ||
2729 | | lwz TMP1, STR:RC->hash | ||
2730 | | lwz NODE:TMP2, TAB:RB->node | ||
2731 | | evmergelo STR:RC, TISSTR, STR:RC | ||
2732 | | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask | ||
2733 | | slwi TMP0, TMP1, 5 | ||
2734 | | slwi TMP1, TMP1, 3 | ||
2735 | | sub TMP1, TMP0, TMP1 | ||
2736 | | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) | ||
2737 | |1: | ||
2738 | | evldd TMP0, NODE:TMP2->key | ||
2739 | | evldd TMP1, NODE:TMP2->val | ||
2740 | | evcmpeq TMP0, STR:RC | ||
2741 | | checkanyfail >4 | ||
2742 | | checknil TMP1 | ||
2743 | | checkok >5 // Key found, but nil value? | ||
2744 | |3: | ||
2745 | | evstddx TMP1, BASE, RA | ||
2746 | | ins_next | ||
2747 | | | ||
2748 | |4: // Follow hash chain. | ||
2749 | | lwz NODE:TMP2, NODE:TMP2->next | ||
2750 | | cmplwi NODE:TMP2, 0 | ||
2751 | | bne <1 | ||
2752 | | // End of hash chain: key not found, nil result. | ||
2753 | | evmr TMP1, TISNIL | ||
2754 | | | ||
2755 | |5: // Check for __index if table value is nil. | ||
2756 | | lwz TAB:TMP2, TAB:RB->metatable | ||
2757 | | cmplwi TAB:TMP2, 0 | ||
2758 | | beq <3 // No metatable: done. | ||
2759 | | lbz TMP0, TAB:TMP2->nomm | ||
2760 | | andi. TMP0, TMP0, 1<<MM_index | ||
2761 | | bne <3 // 'no __index' flag set: done. | ||
2762 | | b ->vmeta_tgets | ||
2763 | break; | ||
2764 | case BC_TGETB: | ||
2765 | | // RA = dst*8, RB = table*8, RC = index*8 | ||
2766 | | evlddx TAB:RB, BASE, RB | ||
2767 | | srwi TMP0, RC, 3 | ||
2768 | | checktab TAB:RB | ||
2769 | | checkfail ->vmeta_tgetb | ||
2770 | | lwz TMP1, TAB:RB->asize | ||
2771 | | lwz TMP2, TAB:RB->array | ||
2772 | | cmplw TMP0, TMP1 | ||
2773 | | bge ->vmeta_tgetb | ||
2774 | | evlddx TMP1, TMP2, RC | ||
2775 | | checknil TMP1 | ||
2776 | | checkok >5 | ||
2777 | |1: | ||
2778 | | ins_next1 | ||
2779 | | evstddx TMP1, BASE, RA | ||
2780 | | ins_next2 | ||
2781 | | | ||
2782 | |5: // Check for __index if table value is nil. | ||
2783 | | lwz TAB:TMP2, TAB:RB->metatable | ||
2784 | | cmplwi TAB:TMP2, 0 | ||
2785 | | beq <1 // No metatable: done. | ||
2786 | | lbz TMP2, TAB:TMP2->nomm | ||
2787 | | andi. TMP2, TMP2, 1<<MM_index | ||
2788 | | bne <1 // 'no __index' flag set: done. | ||
2789 | | b ->vmeta_tgetb // Caveat: preserve TMP0! | ||
2790 | break; | ||
2791 | |||
2792 | case BC_TSETV: | ||
2793 | | // RA = src*8, RB = table*8, RC = key*8 | ||
2794 | | evlddx TAB:RB, BASE, RB | ||
2795 | | evlddx RC, BASE, RC | ||
2796 | | checktab TAB:RB | ||
2797 | | checkfail ->vmeta_tsetv | ||
2798 | | checknum RC | ||
2799 | | checkfail >5 | ||
2800 | | // Convert number key to integer | ||
2801 | | efdctsi TMP2, RC | ||
2802 | | evlddx SAVE0, BASE, RA | ||
2803 | | lwz TMP0, TAB:RB->asize | ||
2804 | | efdcfsi TMP1, TMP2 | ||
2805 | | cmplw cr0, TMP0, TMP2 | ||
2806 | | efdcmpeq cr1, RC, TMP1 | ||
2807 | | lwz TMP1, TAB:RB->array | ||
2808 | | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+gt | ||
2809 | | slwi TMP0, TMP2, 3 | ||
2810 | | ble ->vmeta_tsetv // Integer key and in array part? | ||
2811 | | lbz TMP3, TAB:RB->marked | ||
2812 | | evlddx TMP2, TMP1, TMP0 | ||
2813 | | checknil TMP2 | ||
2814 | | checkok >3 | ||
2815 | |1: | ||
2816 | | andi. TMP2, TMP3, LJ_GC_BLACK // isblack(table) | ||
2817 | | evstddx SAVE0, TMP1, TMP0 | ||
2818 | | bne >7 | ||
2819 | |2: | ||
2820 | | ins_next | ||
2821 | | | ||
2822 | |3: // Check for __newindex if previous value is nil. | ||
2823 | | lwz TAB:TMP2, TAB:RB->metatable | ||
2824 | | cmplwi TAB:TMP2, 0 | ||
2825 | | beq <1 // No metatable: done. | ||
2826 | | lbz TMP2, TAB:TMP2->nomm | ||
2827 | | andi. TMP2, TMP2, 1<<MM_newindex | ||
2828 | | bne <1 // 'no __newindex' flag set: done. | ||
2829 | | b ->vmeta_tsetv | ||
2830 | | | ||
2831 | |5: | ||
2832 | | checkstr STR:RC // String key? | ||
2833 | | checkok ->BC_TSETS_Z | ||
2834 | | b ->vmeta_tsetv | ||
2835 | | | ||
2836 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
2837 | | barrierback TAB:RB, TMP3, TMP0 | ||
2838 | | b <2 | ||
2839 | break; | ||
2840 | case BC_TSETS: | ||
2841 | | // RA = src*8, RB = table*8, RC = str_const*8 (~) | ||
2842 | | evlddx TAB:RB, BASE, RB | ||
2843 | | srwi TMP1, RC, 1 | ||
2844 | | checktab TAB:RB | ||
2845 | | subfic TMP1, TMP1, -4 | ||
2846 | | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 | ||
2847 | | checkfail ->vmeta_tsets1 | ||
2848 | |->BC_TSETS_Z: | ||
2849 | | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 | ||
2850 | | lwz TMP0, TAB:RB->hmask | ||
2851 | | lwz TMP1, STR:RC->hash | ||
2852 | | lwz NODE:TMP2, TAB:RB->node | ||
2853 | | evmergelo STR:RC, TISSTR, STR:RC | ||
2854 | | stb ZERO, TAB:RB->nomm // Clear metamethod cache. | ||
2855 | | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask | ||
2856 | | evlddx SAVE0, BASE, RA | ||
2857 | | slwi TMP0, TMP1, 5 | ||
2858 | | slwi TMP1, TMP1, 3 | ||
2859 | | sub TMP1, TMP0, TMP1 | ||
2860 | | lbz TMP3, TAB:RB->marked | ||
2861 | | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) | ||
2862 | |1: | ||
2863 | | evldd TMP0, NODE:TMP2->key | ||
2864 | | evldd TMP1, NODE:TMP2->val | ||
2865 | | evcmpeq TMP0, STR:RC | ||
2866 | | checkanyfail >5 | ||
2867 | | checknil TMP1 | ||
2868 | | checkok >4 // Key found, but nil value? | ||
2869 | |2: | ||
2870 | | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table) | ||
2871 | | evstdd SAVE0, NODE:TMP2->val | ||
2872 | | bne >7 | ||
2873 | |3: | ||
2874 | | ins_next | ||
2875 | | | ||
2876 | |4: // Check for __newindex if previous value is nil. | ||
2877 | | lwz TAB:TMP1, TAB:RB->metatable | ||
2878 | | cmplwi TAB:TMP1, 0 | ||
2879 | | beq <2 // No metatable: done. | ||
2880 | | lbz TMP0, TAB:TMP1->nomm | ||
2881 | | andi. TMP0, TMP0, 1<<MM_newindex | ||
2882 | | bne <2 // 'no __newindex' flag set: done. | ||
2883 | | b ->vmeta_tsets | ||
2884 | | | ||
2885 | |5: // Follow hash chain. | ||
2886 | | lwz NODE:TMP2, NODE:TMP2->next | ||
2887 | | cmplwi NODE:TMP2, 0 | ||
2888 | | bne <1 | ||
2889 | | // End of hash chain: key not found, add a new one. | ||
2890 | | | ||
2891 | | // But check for __newindex first. | ||
2892 | | lwz TAB:TMP1, TAB:RB->metatable | ||
2893 | | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) | ||
2894 | | stw PC, SAVE_PC | ||
2895 | | mr CARG1, L | ||
2896 | | cmplwi TAB:TMP1, 0 | ||
2897 | | stw BASE, L->base | ||
2898 | | beq >6 // No metatable: continue. | ||
2899 | | lbz TMP0, TAB:TMP1->nomm | ||
2900 | | andi. TMP0, TMP0, 1<<MM_newindex | ||
2901 | | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check. | ||
2902 | |6: | ||
2903 | | mr CARG2, TAB:RB | ||
2904 | | evstdd STR:RC, 0(CARG3) | ||
2905 | | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) | ||
2906 | | // Returns TValue *. | ||
2907 | | lwz BASE, L->base | ||
2908 | | evstdd SAVE0, 0(CRET1) | ||
2909 | | b <3 // No 2nd write barrier needed. | ||
2910 | | | ||
2911 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
2912 | | barrierback TAB:RB, TMP3, TMP0 | ||
2913 | | b <3 | ||
2914 | break; | ||
2915 | case BC_TSETB: | ||
2916 | | // RA = src*8, RB = table*8, RC = index*8 | ||
2917 | | evlddx TAB:RB, BASE, RB | ||
2918 | | srwi TMP0, RC, 3 | ||
2919 | | checktab TAB:RB | ||
2920 | | checkfail ->vmeta_tsetb | ||
2921 | | lwz TMP1, TAB:RB->asize | ||
2922 | | lwz TMP2, TAB:RB->array | ||
2923 | | lbz TMP3, TAB:RB->marked | ||
2924 | | cmplw TMP0, TMP1 | ||
2925 | | evlddx SAVE0, BASE, RA | ||
2926 | | bge ->vmeta_tsetb | ||
2927 | | evlddx TMP1, TMP2, RC | ||
2928 | | checknil TMP1 | ||
2929 | | checkok >5 | ||
2930 | |1: | ||
2931 | | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table) | ||
2932 | | evstddx SAVE0, TMP2, RC | ||
2933 | | bne >7 | ||
2934 | |2: | ||
2935 | | ins_next | ||
2936 | | | ||
2937 | |5: // Check for __newindex if previous value is nil. | ||
2938 | | lwz TAB:TMP1, TAB:RB->metatable | ||
2939 | | cmplwi TAB:TMP1, 0 | ||
2940 | | beq <1 // No metatable: done. | ||
2941 | | lbz TMP1, TAB:TMP1->nomm | ||
2942 | | andi. TMP1, TMP1, 1<<MM_newindex | ||
2943 | | bne <1 // 'no __newindex' flag set: done. | ||
2944 | | b ->vmeta_tsetb // Caveat: preserve TMP0! | ||
2945 | | | ||
2946 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
2947 | | barrierback TAB:RB, TMP3, TMP0 | ||
2948 | | b <2 | ||
2949 | break; | ||
2950 | |||
2951 | case BC_TSETM: | ||
2952 | | // RA = base*8 (table at base-1), RD = num_const*8 (start index) | ||
2953 | | add RA, BASE, RA | ||
2954 | |1: | ||
2955 | | add TMP3, KBASE, RD | ||
2956 | | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table. | ||
2957 | | addic. TMP0, MULTRES, -8 | ||
2958 | | lwz TMP3, 4(TMP3) // Integer constant is in lo-word. | ||
2959 | | srwi CARG3, TMP0, 3 | ||
2960 | | beq >4 // Nothing to copy? | ||
2961 | | add CARG3, CARG3, TMP3 | ||
2962 | | lwz TMP2, TAB:CARG2->asize | ||
2963 | | slwi TMP1, TMP3, 3 | ||
2964 | | lbz TMP3, TAB:CARG2->marked | ||
2965 | | cmplw CARG3, TMP2 | ||
2966 | | add TMP2, RA, TMP0 | ||
2967 | | lwz TMP0, TAB:CARG2->array | ||
2968 | | bgt >5 | ||
2969 | | add TMP1, TMP1, TMP0 | ||
2970 | | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table) | ||
2971 | |3: // Copy result slots to table. | ||
2972 | | evldd TMP0, 0(RA) | ||
2973 | | addi RA, RA, 8 | ||
2974 | | cmpw cr1, RA, TMP2 | ||
2975 | | evstdd TMP0, 0(TMP1) | ||
2976 | | addi TMP1, TMP1, 8 | ||
2977 | | blt cr1, <3 | ||
2978 | | bne >7 | ||
2979 | |4: | ||
2980 | | ins_next | ||
2981 | | | ||
2982 | |5: // Need to resize array part. | ||
2983 | | stw BASE, L->base | ||
2984 | | mr CARG1, L | ||
2985 | | stw PC, SAVE_PC | ||
2986 | | mr SAVE0, RD | ||
2987 | | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) | ||
2988 | | // Must not reallocate the stack. | ||
2989 | | mr RD, SAVE0 | ||
2990 | | b <1 | ||
2991 | | | ||
2992 | |7: // Possible table write barrier for any value. Skip valiswhite check. | ||
2993 | | barrierback TAB:CARG2, TMP3, TMP0 | ||
2994 | | b <4 | ||
2995 | break; | ||
2996 | |||
2997 | /* -- Calls and vararg handling ----------------------------------------- */ | ||
2998 | |||
2999 | case BC_CALLM: | ||
3000 | | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 | ||
3001 | | add NARGS8:RC, NARGS8:RC, MULTRES | ||
3002 | | // Fall through. Assumes BC_CALL follows. | ||
3003 | break; | ||
3004 | case BC_CALL: | ||
3005 | | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 | ||
3006 | | evlddx LFUNC:RB, BASE, RA | ||
3007 | | mr TMP2, BASE | ||
3008 | | add BASE, BASE, RA | ||
3009 | | subi NARGS8:RC, NARGS8:RC, 8 | ||
3010 | | checkfunc LFUNC:RB | ||
3011 | | addi BASE, BASE, 8 | ||
3012 | | checkfail ->vmeta_call | ||
3013 | | ins_call | ||
3014 | break; | ||
3015 | |||
3016 | case BC_CALLMT: | ||
3017 | | // RA = base*8, (RB = 0,) RC = extra_nargs*8 | ||
3018 | | add NARGS8:RC, NARGS8:RC, MULTRES | ||
3019 | | // Fall through. Assumes BC_CALLT follows. | ||
3020 | break; | ||
3021 | case BC_CALLT: | ||
3022 | | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 | ||
3023 | | evlddx LFUNC:RB, BASE, RA | ||
3024 | | add RA, BASE, RA | ||
3025 | | lwz TMP1, FRAME_PC(BASE) | ||
3026 | | subi NARGS8:RC, NARGS8:RC, 8 | ||
3027 | | checkfunc LFUNC:RB | ||
3028 | | addi RA, RA, 8 | ||
3029 | | checkfail ->vmeta_callt | ||
3030 | |->BC_CALLT_Z: | ||
3031 | | andi. TMP0, TMP1, FRAME_TYPE // Caveat: preserve cr0 until the crand. | ||
3032 | | lbz TMP3, LFUNC:RB->ffid | ||
3033 | | xori TMP2, TMP1, FRAME_VARG | ||
3034 | | cmplwi cr1, NARGS8:RC, 0 | ||
3035 | | bne >7 | ||
3036 | |1: | ||
3037 | | stw LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. | ||
3038 | | li TMP2, 0 | ||
3039 | | cmplwi cr7, TMP3, 1 // (> FF_C) Calling a fast function? | ||
3040 | | beq cr1, >3 | ||
3041 | |2: | ||
3042 | | addi TMP3, TMP2, 8 | ||
3043 | | evlddx TMP0, RA, TMP2 | ||
3044 | | cmplw cr1, TMP3, NARGS8:RC | ||
3045 | | evstddx TMP0, BASE, TMP2 | ||
3046 | | mr TMP2, TMP3 | ||
3047 | | bne cr1, <2 | ||
3048 | |3: | ||
3049 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+gt | ||
3050 | | beq >5 | ||
3051 | |4: | ||
3052 | | ins_callt | ||
3053 | | | ||
3054 | |5: // Tailcall to a fast function with a Lua frame below. | ||
3055 | | lwz INS, -4(TMP1) | ||
3056 | | decode_RA8 RA, INS | ||
3057 | | sub TMP1, BASE, RA | ||
3058 | | lwz LFUNC:TMP1, FRAME_FUNC-8(TMP1) | ||
3059 | | lwz TMP1, LFUNC:TMP1->pc | ||
3060 | | lwz KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. | ||
3061 | | b <4 | ||
3062 | | | ||
3063 | |7: // Tailcall from a vararg function. | ||
3064 | | andi. TMP0, TMP2, FRAME_TYPEP | ||
3065 | | bne <1 // Vararg frame below? | ||
3066 | | sub BASE, BASE, TMP2 // Relocate BASE down. | ||
3067 | | lwz TMP1, FRAME_PC(BASE) | ||
3068 | | andi. TMP0, TMP1, FRAME_TYPE | ||
3069 | | b <1 | ||
3070 | break; | ||
3071 | |||
3072 | case BC_ITERC: | ||
3073 | | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) | ||
3074 | | subi RA, RA, 24 // evldd doesn't support neg. offsets. | ||
3075 | | mr TMP2, BASE | ||
3076 | | evlddx LFUNC:RB, BASE, RA | ||
3077 | | add BASE, BASE, RA | ||
3078 | | evldd TMP0, 8(BASE) | ||
3079 | | evldd TMP1, 16(BASE) | ||
3080 | | evstdd LFUNC:RB, 24(BASE) // Copy callable. | ||
3081 | | checkfunc LFUNC:RB | ||
3082 | | evstdd TMP0, 32(BASE) // Copy state. | ||
3083 | | li NARGS8:RC, 16 // Iterators get 2 arguments. | ||
3084 | | evstdd TMP1, 40(BASE) // Copy control var. | ||
3085 | | addi BASE, BASE, 32 | ||
3086 | | checkfail ->vmeta_call | ||
3087 | | ins_call | ||
3088 | break; | ||
3089 | |||
3090 | case BC_ITERN: | ||
3091 | | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) | ||
3092 | |.if JIT | ||
3093 | | // NYI: add hotloop, record BC_ITERN. | ||
3094 | |.endif | ||
3095 | | add RA, BASE, RA | ||
3096 | | lwz TAB:RB, -12(RA) | ||
3097 | | lwz RC, -4(RA) // Get index from control var. | ||
3098 | | lwz TMP0, TAB:RB->asize | ||
3099 | | lwz TMP1, TAB:RB->array | ||
3100 | | addi PC, PC, 4 | ||
3101 | |1: // Traverse array part. | ||
3102 | | cmplw RC, TMP0 | ||
3103 | | slwi TMP3, RC, 3 | ||
3104 | | bge >5 // Index points after array part? | ||
3105 | | evlddx TMP2, TMP1, TMP3 | ||
3106 | | checknil TMP2 | ||
3107 | | lwz INS, -4(PC) | ||
3108 | | checkok >4 | ||
3109 | | efdcfsi TMP0, RC | ||
3110 | | addi RC, RC, 1 | ||
3111 | | addis TMP3, PC, -(BCBIAS_J*4 >> 16) | ||
3112 | | evstdd TMP2, 8(RA) | ||
3113 | | decode_RD4 TMP1, INS | ||
3114 | | stw RC, -4(RA) // Update control var. | ||
3115 | | add PC, TMP1, TMP3 | ||
3116 | | evstdd TMP0, 0(RA) | ||
3117 | |3: | ||
3118 | | ins_next | ||
3119 | | | ||
3120 | |4: // Skip holes in array part. | ||
3121 | | addi RC, RC, 1 | ||
3122 | | b <1 | ||
3123 | | | ||
3124 | |5: // Traverse hash part. | ||
3125 | | lwz TMP1, TAB:RB->hmask | ||
3126 | | sub RC, RC, TMP0 | ||
3127 | | lwz TMP2, TAB:RB->node | ||
3128 | |6: | ||
3129 | | cmplw RC, TMP1 // End of iteration? Branch to ITERL+1. | ||
3130 | | slwi TMP3, RC, 5 | ||
3131 | | bgt <3 | ||
3132 | | slwi RB, RC, 3 | ||
3133 | | sub TMP3, TMP3, RB | ||
3134 | | evlddx RB, TMP2, TMP3 | ||
3135 | | add NODE:TMP3, TMP2, TMP3 | ||
3136 | | checknil RB | ||
3137 | | lwz INS, -4(PC) | ||
3138 | | checkok >7 | ||
3139 | | evldd TMP3, NODE:TMP3->key | ||
3140 | | addis TMP2, PC, -(BCBIAS_J*4 >> 16) | ||
3141 | | evstdd RB, 8(RA) | ||
3142 | | add RC, RC, TMP0 | ||
3143 | | decode_RD4 TMP1, INS | ||
3144 | | evstdd TMP3, 0(RA) | ||
3145 | | addi RC, RC, 1 | ||
3146 | | add PC, TMP1, TMP2 | ||
3147 | | stw RC, -4(RA) // Update control var. | ||
3148 | | b <3 | ||
3149 | | | ||
3150 | |7: // Skip holes in hash part. | ||
3151 | | addi RC, RC, 1 | ||
3152 | | b <6 | ||
3153 | break; | ||
3154 | |||
3155 | case BC_ISNEXT: | ||
3156 | | // RA = base*8, RD = target (points to ITERN) | ||
3157 | | add RA, BASE, RA | ||
3158 | | li TMP2, -24 | ||
3159 | | evlddx CFUNC:TMP1, RA, TMP2 | ||
3160 | | lwz TMP2, -16(RA) | ||
3161 | | lwz TMP3, -8(RA) | ||
3162 | | evmergehi TMP0, CFUNC:TMP1, CFUNC:TMP1 | ||
3163 | | cmpwi cr0, TMP2, LJ_TTAB | ||
3164 | | cmpwi cr1, TMP0, LJ_TFUNC | ||
3165 | | cmpwi cr6, TMP3, LJ_TNIL | ||
3166 | | bne cr1, >5 | ||
3167 | | lbz TMP1, CFUNC:TMP1->ffid | ||
3168 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr6+eq | ||
3169 | | cmpwi cr7, TMP1, FF_next_N | ||
3170 | | srwi TMP0, RD, 1 | ||
3171 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq | ||
3172 | | add TMP3, PC, TMP0 | ||
3173 | | bne cr0, >5 | ||
3174 | | lus TMP1, 0xfffe | ||
3175 | | ori TMP1, TMP1, 0x7fff | ||
3176 | | stw ZERO, -4(RA) // Initialize control var. | ||
3177 | | stw TMP1, -8(RA) | ||
3178 | | addis PC, TMP3, -(BCBIAS_J*4 >> 16) | ||
3179 | |1: | ||
3180 | | ins_next | ||
3181 | |5: // Despecialize bytecode if any of the checks fail. | ||
3182 | | li TMP0, BC_JMP | ||
3183 | | li TMP1, BC_ITERC | ||
3184 | | stb TMP0, -1(PC) | ||
3185 | | addis PC, TMP3, -(BCBIAS_J*4 >> 16) | ||
3186 | | stb TMP1, 3(PC) | ||
3187 | | b <1 | ||
3188 | break; | ||
3189 | |||
3190 | case BC_VARG: | ||
3191 | | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 | ||
3192 | | lwz TMP0, FRAME_PC(BASE) | ||
3193 | | add RC, BASE, RC | ||
3194 | | add RA, BASE, RA | ||
3195 | | addi RC, RC, FRAME_VARG | ||
3196 | | add TMP2, RA, RB | ||
3197 | | subi TMP3, BASE, 8 // TMP3 = vtop | ||
3198 | | sub RC, RC, TMP0 // RC = vbase | ||
3199 | | // Note: RC may now be even _above_ BASE if nargs was < numparams. | ||
3200 | | cmplwi cr1, RB, 0 | ||
3201 | | sub. TMP1, TMP3, RC | ||
3202 | | beq cr1, >5 // Copy all varargs? | ||
3203 | | subi TMP2, TMP2, 16 | ||
3204 | | ble >2 // No vararg slots? | ||
3205 | |1: // Copy vararg slots to destination slots. | ||
3206 | | evldd TMP0, 0(RC) | ||
3207 | | addi RC, RC, 8 | ||
3208 | | evstdd TMP0, 0(RA) | ||
3209 | | cmplw RA, TMP2 | ||
3210 | | cmplw cr1, RC, TMP3 | ||
3211 | | bge >3 // All destination slots filled? | ||
3212 | | addi RA, RA, 8 | ||
3213 | | blt cr1, <1 // More vararg slots? | ||
3214 | |2: // Fill up remainder with nil. | ||
3215 | | evstdd TISNIL, 0(RA) | ||
3216 | | cmplw RA, TMP2 | ||
3217 | | addi RA, RA, 8 | ||
3218 | | blt <2 | ||
3219 | |3: | ||
3220 | | ins_next | ||
3221 | | | ||
3222 | |5: // Copy all varargs. | ||
3223 | | lwz TMP0, L->maxstack | ||
3224 | | li MULTRES, 8 // MULTRES = (0+1)*8 | ||
3225 | | ble <3 // No vararg slots? | ||
3226 | | add TMP2, RA, TMP1 | ||
3227 | | cmplw TMP2, TMP0 | ||
3228 | | addi MULTRES, TMP1, 8 | ||
3229 | | bgt >7 | ||
3230 | |6: | ||
3231 | | evldd TMP0, 0(RC) | ||
3232 | | addi RC, RC, 8 | ||
3233 | | evstdd TMP0, 0(RA) | ||
3234 | | cmplw RC, TMP3 | ||
3235 | | addi RA, RA, 8 | ||
3236 | | blt <6 // More vararg slots? | ||
3237 | | b <3 | ||
3238 | | | ||
3239 | |7: // Grow stack for varargs. | ||
3240 | | mr CARG1, L | ||
3241 | | stw RA, L->top | ||
3242 | | sub SAVE0, RC, BASE // Need delta, because BASE may change. | ||
3243 | | stw BASE, L->base | ||
3244 | | sub RA, RA, BASE | ||
3245 | | stw PC, SAVE_PC | ||
3246 | | srwi CARG2, TMP1, 3 | ||
3247 | | bl extern lj_state_growstack // (lua_State *L, int n) | ||
3248 | | lwz BASE, L->base | ||
3249 | | add RA, BASE, RA | ||
3250 | | add RC, BASE, SAVE0 | ||
3251 | | subi TMP3, BASE, 8 | ||
3252 | | b <6 | ||
3253 | break; | ||
3254 | |||
3255 | /* -- Returns ----------------------------------------------------------- */ | ||
3256 | |||
3257 | case BC_RETM: | ||
3258 | | // RA = results*8, RD = extra_nresults*8 | ||
3259 | | add RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. | ||
3260 | | // Fall through. Assumes BC_RET follows. | ||
3261 | break; | ||
3262 | |||
3263 | case BC_RET: | ||
3264 | | // RA = results*8, RD = (nresults+1)*8 | ||
3265 | | lwz PC, FRAME_PC(BASE) | ||
3266 | | add RA, BASE, RA | ||
3267 | | mr MULTRES, RD | ||
3268 | |1: | ||
3269 | | andi. TMP0, PC, FRAME_TYPE | ||
3270 | | xori TMP1, PC, FRAME_VARG | ||
3271 | | bne ->BC_RETV_Z | ||
3272 | | | ||
3273 | |->BC_RET_Z: | ||
3274 | | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return | ||
3275 | | lwz INS, -4(PC) | ||
3276 | | cmpwi RD, 8 | ||
3277 | | subi TMP2, BASE, 8 | ||
3278 | | subi RC, RD, 8 | ||
3279 | | decode_RB8 RB, INS | ||
3280 | | beq >3 | ||
3281 | | li TMP1, 0 | ||
3282 | |2: | ||
3283 | | addi TMP3, TMP1, 8 | ||
3284 | | evlddx TMP0, RA, TMP1 | ||
3285 | | cmpw TMP3, RC | ||
3286 | | evstddx TMP0, TMP2, TMP1 | ||
3287 | | beq >3 | ||
3288 | | addi TMP1, TMP3, 8 | ||
3289 | | evlddx TMP0, RA, TMP3 | ||
3290 | | cmpw TMP1, RC | ||
3291 | | evstddx TMP0, TMP2, TMP3 | ||
3292 | | bne <2 | ||
3293 | |3: | ||
3294 | |5: | ||
3295 | | cmplw RB, RD | ||
3296 | | decode_RA8 RA, INS | ||
3297 | | bgt >6 | ||
3298 | | sub BASE, TMP2, RA | ||
3299 | | lwz LFUNC:TMP1, FRAME_FUNC(BASE) | ||
3300 | | ins_next1 | ||
3301 | | lwz TMP1, LFUNC:TMP1->pc | ||
3302 | | lwz KBASE, PC2PROTO(k)(TMP1) | ||
3303 | | ins_next2 | ||
3304 | | | ||
3305 | |6: // Fill up results with nil. | ||
3306 | | subi TMP1, RD, 8 | ||
3307 | | addi RD, RD, 8 | ||
3308 | | evstddx TISNIL, TMP2, TMP1 | ||
3309 | | b <5 | ||
3310 | | | ||
3311 | |->BC_RETV_Z: // Non-standard return case. | ||
3312 | | andi. TMP2, TMP1, FRAME_TYPEP | ||
3313 | | bne ->vm_return | ||
3314 | | // Return from vararg function: relocate BASE down. | ||
3315 | | sub BASE, BASE, TMP1 | ||
3316 | | lwz PC, FRAME_PC(BASE) | ||
3317 | | b <1 | ||
3318 | break; | ||
3319 | |||
3320 | case BC_RET0: case BC_RET1: | ||
3321 | | // RA = results*8, RD = (nresults+1)*8 | ||
3322 | | lwz PC, FRAME_PC(BASE) | ||
3323 | | add RA, BASE, RA | ||
3324 | | mr MULTRES, RD | ||
3325 | | andi. TMP0, PC, FRAME_TYPE | ||
3326 | | xori TMP1, PC, FRAME_VARG | ||
3327 | | bne ->BC_RETV_Z | ||
3328 | | | ||
3329 | | lwz INS, -4(PC) | ||
3330 | | subi TMP2, BASE, 8 | ||
3331 | | decode_RB8 RB, INS | ||
3332 | if (op == BC_RET1) { | ||
3333 | | evldd TMP0, 0(RA) | ||
3334 | | evstdd TMP0, 0(TMP2) | ||
3335 | } | ||
3336 | |5: | ||
3337 | | cmplw RB, RD | ||
3338 | | decode_RA8 RA, INS | ||
3339 | | bgt >6 | ||
3340 | | sub BASE, TMP2, RA | ||
3341 | | lwz LFUNC:TMP1, FRAME_FUNC(BASE) | ||
3342 | | ins_next1 | ||
3343 | | lwz TMP1, LFUNC:TMP1->pc | ||
3344 | | lwz KBASE, PC2PROTO(k)(TMP1) | ||
3345 | | ins_next2 | ||
3346 | | | ||
3347 | |6: // Fill up results with nil. | ||
3348 | | subi TMP1, RD, 8 | ||
3349 | | addi RD, RD, 8 | ||
3350 | | evstddx TISNIL, TMP2, TMP1 | ||
3351 | | b <5 | ||
3352 | break; | ||
3353 | |||
3354 | /* -- Loops and branches ------------------------------------------------ */ | ||
3355 | |||
3356 | case BC_FORL: | ||
3357 | |.if JIT | ||
3358 | | hotloop | ||
3359 | |.endif | ||
3360 | | // Fall through. Assumes BC_IFORL follows. | ||
3361 | break; | ||
3362 | |||
3363 | case BC_JFORI: | ||
3364 | case BC_JFORL: | ||
3365 | #if !LJ_HASJIT | ||
3366 | break; | ||
3367 | #endif | ||
3368 | case BC_FORI: | ||
3369 | case BC_IFORL: | ||
3370 | | // RA = base*8, RD = target (after end of loop or start of loop) | ||
3371 | vk = (op == BC_IFORL || op == BC_JFORL); | ||
3372 | | add RA, BASE, RA | ||
3373 | | evldd TMP1, FORL_IDX*8(RA) | ||
3374 | | evldd TMP3, FORL_STEP*8(RA) | ||
3375 | | evldd TMP2, FORL_STOP*8(RA) | ||
3376 | if (!vk) { | ||
3377 | | evcmpgtu cr0, TMP1, TISNUM | ||
3378 | | evcmpgtu cr7, TMP3, TISNUM | ||
3379 | | evcmpgtu cr1, TMP2, TISNUM | ||
3380 | | cror 4*cr0+lt, 4*cr0+lt, 4*cr7+lt | ||
3381 | | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | ||
3382 | | blt ->vmeta_for | ||
3383 | } | ||
3384 | if (vk) { | ||
3385 | | efdadd TMP1, TMP1, TMP3 | ||
3386 | | evstdd TMP1, FORL_IDX*8(RA) | ||
3387 | } | ||
3388 | | evcmpgts TMP3, TISNIL | ||
3389 | | evstdd TMP1, FORL_EXT*8(RA) | ||
3390 | | bge >2 | ||
3391 | | efdcmpgt TMP1, TMP2 | ||
3392 | |1: | ||
3393 | if (op != BC_JFORL) { | ||
3394 | | srwi RD, RD, 1 | ||
3395 | | add RD, PC, RD | ||
3396 | if (op == BC_JFORI) { | ||
3397 | | addis PC, RD, -(BCBIAS_J*4 >> 16) | ||
3398 | } else { | ||
3399 | | addis RD, RD, -(BCBIAS_J*4 >> 16) | ||
3400 | } | ||
3401 | } | ||
3402 | if (op == BC_FORI) { | ||
3403 | | iselgt PC, RD, PC | ||
3404 | } else if (op == BC_IFORL) { | ||
3405 | | iselgt PC, PC, RD | ||
3406 | } else { | ||
3407 | | ble =>BC_JLOOP | ||
3408 | } | ||
3409 | | ins_next | ||
3410 | |2: | ||
3411 | | efdcmpgt TMP2, TMP1 | ||
3412 | | b <1 | ||
3413 | break; | ||
3414 | |||
3415 | case BC_ITERL: | ||
3416 | |.if JIT | ||
3417 | | hotloop | ||
3418 | |.endif | ||
3419 | | // Fall through. Assumes BC_IITERL follows. | ||
3420 | break; | ||
3421 | |||
3422 | case BC_JITERL: | ||
3423 | #if !LJ_HASJIT | ||
3424 | break; | ||
3425 | #endif | ||
3426 | case BC_IITERL: | ||
3427 | | // RA = base*8, RD = target | ||
3428 | | evlddx TMP1, BASE, RA | ||
3429 | | subi RA, RA, 8 | ||
3430 | | checknil TMP1 | ||
3431 | | checkok >1 // Stop if iterator returned nil. | ||
3432 | if (op == BC_JITERL) { | ||
3433 | | NYI | ||
3434 | } else { | ||
3435 | | branch_RD // Otherwise save control var + branch. | ||
3436 | | evstddx TMP1, BASE, RA | ||
3437 | } | ||
3438 | |1: | ||
3439 | | ins_next | ||
3440 | break; | ||
3441 | |||
3442 | case BC_LOOP: | ||
3443 | | // RA = base*8, RD = target (loop extent) | ||
3444 | | // Note: RA/RD is only used by trace recorder to determine scope/extent | ||
3445 | | // This opcode does NOT jump, it's only purpose is to detect a hot loop. | ||
3446 | |.if JIT | ||
3447 | | hotloop | ||
3448 | |.endif | ||
3449 | | // Fall through. Assumes BC_ILOOP follows. | ||
3450 | break; | ||
3451 | |||
3452 | case BC_ILOOP: | ||
3453 | | // RA = base*8, RD = target (loop extent) | ||
3454 | | ins_next | ||
3455 | break; | ||
3456 | |||
3457 | case BC_JLOOP: | ||
3458 | |.if JIT | ||
3459 | | NYI | ||
3460 | |.endif | ||
3461 | break; | ||
3462 | |||
3463 | case BC_JMP: | ||
3464 | | // RA = base*8 (only used by trace recorder), RD = target | ||
3465 | | branch_RD | ||
3466 | | ins_next | ||
3467 | break; | ||
3468 | |||
3469 | /* -- Function headers -------------------------------------------------- */ | ||
3470 | |||
3471 | case BC_FUNCF: | ||
3472 | |.if JIT | ||
3473 | | hotcall | ||
3474 | |.endif | ||
3475 | case BC_FUNCV: /* NYI: compiled vararg functions. */ | ||
3476 | | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. | ||
3477 | break; | ||
3478 | |||
3479 | case BC_JFUNCF: | ||
3480 | #if !LJ_HASJIT | ||
3481 | break; | ||
3482 | #endif | ||
3483 | case BC_IFUNCF: | ||
3484 | | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 | ||
3485 | | lwz TMP2, L->maxstack | ||
3486 | | lbz TMP1, -4+PC2PROTO(numparams)(PC) | ||
3487 | | lwz KBASE, -4+PC2PROTO(k)(PC) | ||
3488 | | cmplw RA, TMP2 | ||
3489 | | slwi TMP1, TMP1, 3 | ||
3490 | | bgt ->vm_growstack_l | ||
3491 | | ins_next1 | ||
3492 | |2: | ||
3493 | | cmplw NARGS8:RC, TMP1 // Check for missing parameters. | ||
3494 | | ble >3 | ||
3495 | if (op == BC_JFUNCF) { | ||
3496 | | NYI | ||
3497 | } else { | ||
3498 | | ins_next2 | ||
3499 | } | ||
3500 | | | ||
3501 | |3: // Clear missing parameters. | ||
3502 | | evstddx TISNIL, BASE, NARGS8:RC | ||
3503 | | addi NARGS8:RC, NARGS8:RC, 8 | ||
3504 | | b <2 | ||
3505 | break; | ||
3506 | |||
3507 | case BC_JFUNCV: | ||
3508 | #if !LJ_HASJIT | ||
3509 | break; | ||
3510 | #endif | ||
3511 | | NYI // NYI: compiled vararg functions | ||
3512 | break; /* NYI: compiled vararg functions. */ | ||
3513 | |||
3514 | case BC_IFUNCV: | ||
3515 | | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 | ||
3516 | | lwz TMP2, L->maxstack | ||
3517 | | add TMP1, BASE, RC | ||
3518 | | add TMP0, RA, RC | ||
3519 | | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC. | ||
3520 | | addi TMP3, RC, 8+FRAME_VARG | ||
3521 | | lwz KBASE, -4+PC2PROTO(k)(PC) | ||
3522 | | cmplw TMP0, TMP2 | ||
3523 | | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG. | ||
3524 | | bge ->vm_growstack_l | ||
3525 | | lbz TMP2, -4+PC2PROTO(numparams)(PC) | ||
3526 | | mr RA, BASE | ||
3527 | | mr RC, TMP1 | ||
3528 | | ins_next1 | ||
3529 | | cmpwi TMP2, 0 | ||
3530 | | addi BASE, TMP1, 8 | ||
3531 | | beq >3 | ||
3532 | |1: | ||
3533 | | cmplw RA, RC // Less args than parameters? | ||
3534 | | evldd TMP0, 0(RA) | ||
3535 | | bge >4 | ||
3536 | | evstdd TISNIL, 0(RA) // Clear old fixarg slot (help the GC). | ||
3537 | | addi RA, RA, 8 | ||
3538 | |2: | ||
3539 | | addic. TMP2, TMP2, -1 | ||
3540 | | evstdd TMP0, 8(TMP1) | ||
3541 | | addi TMP1, TMP1, 8 | ||
3542 | | bne <1 | ||
3543 | |3: | ||
3544 | | ins_next2 | ||
3545 | | | ||
3546 | |4: // Clear missing parameters. | ||
3547 | | evmr TMP0, TISNIL | ||
3548 | | b <2 | ||
3549 | break; | ||
3550 | |||
3551 | case BC_FUNCC: | ||
3552 | case BC_FUNCCW: | ||
3553 | | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 | ||
3554 | if (op == BC_FUNCC) { | ||
3555 | | lwz TMP3, CFUNC:RB->f | ||
3556 | } else { | ||
3557 | | lwz TMP3, DISPATCH_GL(wrapf)(DISPATCH) | ||
3558 | } | ||
3559 | | add TMP1, RA, NARGS8:RC | ||
3560 | | lwz TMP2, L->maxstack | ||
3561 | | add RC, BASE, NARGS8:RC | ||
3562 | | stw BASE, L->base | ||
3563 | | cmplw TMP1, TMP2 | ||
3564 | | stw RC, L->top | ||
3565 | | li_vmstate C | ||
3566 | | mtctr TMP3 | ||
3567 | if (op == BC_FUNCCW) { | ||
3568 | | lwz CARG2, CFUNC:RB->f | ||
3569 | } | ||
3570 | | mr CARG1, L | ||
3571 | | bgt ->vm_growstack_c // Need to grow stack. | ||
3572 | | st_vmstate | ||
3573 | | bctrl // (lua_State *L [, lua_CFunction f]) | ||
3574 | | // Returns nresults. | ||
3575 | | lwz TMP1, L->top | ||
3576 | | slwi RD, CRET1, 3 | ||
3577 | | lwz BASE, L->base | ||
3578 | | li_vmstate INTERP | ||
3579 | | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. | ||
3580 | | sub RA, TMP1, RD // RA = L->top - nresults*8 | ||
3581 | | st_vmstate | ||
3582 | | b ->vm_returnc | ||
3583 | break; | ||
3584 | |||
3585 | /* ---------------------------------------------------------------------- */ | ||
3586 | |||
3587 | default: | ||
3588 | fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); | ||
3589 | exit(2); | ||
3590 | break; | ||
3591 | } | ||
3592 | } | ||
3593 | |||
3594 | static int build_backend(BuildCtx *ctx) | ||
3595 | { | ||
3596 | int op; | ||
3597 | |||
3598 | dasm_growpc(Dst, BC__MAX); | ||
3599 | |||
3600 | build_subroutines(ctx); | ||
3601 | |||
3602 | |.code_op | ||
3603 | for (op = 0; op < BC__MAX; op++) | ||
3604 | build_ins(ctx, (BCOp)op, op); | ||
3605 | |||
3606 | return BC__MAX; | ||
3607 | } | ||
3608 | |||
3609 | /* Emit pseudo frame-info for all assembler functions. */ | ||
3610 | static void emit_asm_debug(BuildCtx *ctx) | ||
3611 | { | ||
3612 | int i; | ||
3613 | switch (ctx->mode) { | ||
3614 | case BUILD_elfasm: | ||
3615 | fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); | ||
3616 | fprintf(ctx->fp, | ||
3617 | ".Lframe0:\n" | ||
3618 | "\t.long .LECIE0-.LSCIE0\n" | ||
3619 | ".LSCIE0:\n" | ||
3620 | "\t.long 0xffffffff\n" | ||
3621 | "\t.byte 0x1\n" | ||
3622 | "\t.string \"\"\n" | ||
3623 | "\t.uleb128 0x1\n" | ||
3624 | "\t.sleb128 -4\n" | ||
3625 | "\t.byte 65\n" | ||
3626 | "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" | ||
3627 | "\t.align 2\n" | ||
3628 | ".LECIE0:\n\n"); | ||
3629 | fprintf(ctx->fp, | ||
3630 | ".LSFDE0:\n" | ||
3631 | "\t.long .LEFDE0-.LASFDE0\n" | ||
3632 | ".LASFDE0:\n" | ||
3633 | "\t.long .Lframe0\n" | ||
3634 | "\t.long .Lbegin\n" | ||
3635 | "\t.long %d\n" | ||
3636 | "\t.byte 0xe\n\t.uleb128 %d\n" | ||
3637 | "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" | ||
3638 | "\t.byte 0x5\n\t.uleb128 70\n\t.sleb128 37\n", | ||
3639 | (int)ctx->codesz, CFRAME_SIZE); | ||
3640 | for (i = 14; i <= 31; i++) | ||
3641 | fprintf(ctx->fp, | ||
3642 | "\t.byte %d\n\t.uleb128 %d\n" | ||
3643 | "\t.byte 5\n\t.uleb128 %d\n\t.uleb128 %d\n", | ||
3644 | 0x80+i, 1+2*(31-i), 1200+i, 2+2*(31-i)); | ||
3645 | fprintf(ctx->fp, | ||
3646 | "\t.align 2\n" | ||
3647 | ".LEFDE0:\n\n"); | ||
3648 | fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); | ||
3649 | fprintf(ctx->fp, | ||
3650 | ".Lframe1:\n" | ||
3651 | "\t.long .LECIE1-.LSCIE1\n" | ||
3652 | ".LSCIE1:\n" | ||
3653 | "\t.long 0\n" | ||
3654 | "\t.byte 0x1\n" | ||
3655 | "\t.string \"zPR\"\n" | ||
3656 | "\t.uleb128 0x1\n" | ||
3657 | "\t.sleb128 -4\n" | ||
3658 | "\t.byte 65\n" | ||
3659 | "\t.uleb128 6\n" /* augmentation length */ | ||
3660 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
3661 | "\t.long lj_err_unwind_dwarf-.\n" | ||
3662 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
3663 | "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" | ||
3664 | "\t.align 2\n" | ||
3665 | ".LECIE1:\n\n"); | ||
3666 | fprintf(ctx->fp, | ||
3667 | ".LSFDE1:\n" | ||
3668 | "\t.long .LEFDE1-.LASFDE1\n" | ||
3669 | ".LASFDE1:\n" | ||
3670 | "\t.long .LASFDE1-.Lframe1\n" | ||
3671 | "\t.long .Lbegin-.\n" | ||
3672 | "\t.long %d\n" | ||
3673 | "\t.uleb128 0\n" /* augmentation length */ | ||
3674 | "\t.byte 0xe\n\t.uleb128 %d\n" | ||
3675 | "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" | ||
3676 | "\t.byte 0x5\n\t.uleb128 70\n\t.sleb128 37\n", | ||
3677 | (int)ctx->codesz, CFRAME_SIZE); | ||
3678 | for (i = 14; i <= 31; i++) | ||
3679 | fprintf(ctx->fp, | ||
3680 | "\t.byte %d\n\t.uleb128 %d\n" | ||
3681 | "\t.byte 5\n\t.uleb128 %d\n\t.uleb128 %d\n", | ||
3682 | 0x80+i, 1+2*(31-i), 1200+i, 2+2*(31-i)); | ||
3683 | fprintf(ctx->fp, | ||
3684 | "\t.align 2\n" | ||
3685 | ".LEFDE1:\n\n"); | ||
3686 | break; | ||
3687 | default: | ||
3688 | break; | ||
3689 | } | ||
3690 | } | ||
3691 | |||
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc new file mode 100644 index 00000000..b222190a --- /dev/null +++ b/src/vm_x64.dasc | |||
@@ -0,0 +1,4971 @@ | |||
1 | |// Low-level VM code for x64 CPUs in LJ_GC64 mode. | ||
2 | |// Bytecode interpreter, fast functions and helper functions. | ||
3 | |// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | ||
4 | | | ||
5 | |.arch x64 | ||
6 | |.section code_op, code_sub | ||
7 | | | ||
8 | |.actionlist build_actionlist | ||
9 | |.globals GLOB_ | ||
10 | |.globalnames globnames | ||
11 | |.externnames extnames | ||
12 | | | ||
13 | |//----------------------------------------------------------------------- | ||
14 | | | ||
15 | |.if WIN | ||
16 | |.define X64WIN, 1 // Windows/x64 calling conventions. | ||
17 | |.endif | ||
18 | | | ||
19 | |// Fixed register assignments for the interpreter. | ||
20 | |// This is very fragile and has many dependencies. Caveat emptor. | ||
21 | |.define BASE, rdx // Not C callee-save, refetched anyway. | ||
22 | |.if X64WIN | ||
23 | |.define KBASE, rdi // Must be C callee-save. | ||
24 | |.define PC, rsi // Must be C callee-save. | ||
25 | |.define DISPATCH, rbx // Must be C callee-save. | ||
26 | |.define KBASEd, edi | ||
27 | |.define PCd, esi | ||
28 | |.define DISPATCHd, ebx | ||
29 | |.else | ||
30 | |.define KBASE, r15 // Must be C callee-save. | ||
31 | |.define PC, rbx // Must be C callee-save. | ||
32 | |.define DISPATCH, r14 // Must be C callee-save. | ||
33 | |.define KBASEd, r15d | ||
34 | |.define PCd, ebx | ||
35 | |.define DISPATCHd, r14d | ||
36 | |.endif | ||
37 | | | ||
38 | |.define RA, rcx | ||
39 | |.define RAd, ecx | ||
40 | |.define RAH, ch | ||
41 | |.define RAL, cl | ||
42 | |.define RB, rbp // Must be rbp (C callee-save). | ||
43 | |.define RBd, ebp | ||
44 | |.define RC, rax // Must be rax. | ||
45 | |.define RCd, eax | ||
46 | |.define RCW, ax | ||
47 | |.define RCH, ah | ||
48 | |.define RCL, al | ||
49 | |.define OP, RBd | ||
50 | |.define RD, RC | ||
51 | |.define RDd, RCd | ||
52 | |.define RDW, RCW | ||
53 | |.define RDL, RCL | ||
54 | |.define TMPR, r10 | ||
55 | |.define TMPRd, r10d | ||
56 | |.define ITYPE, r11 | ||
57 | |.define ITYPEd, r11d | ||
58 | | | ||
59 | |.if X64WIN | ||
60 | |.define CARG1, rcx // x64/WIN64 C call arguments. | ||
61 | |.define CARG2, rdx | ||
62 | |.define CARG3, r8 | ||
63 | |.define CARG4, r9 | ||
64 | |.define CARG1d, ecx | ||
65 | |.define CARG2d, edx | ||
66 | |.define CARG3d, r8d | ||
67 | |.define CARG4d, r9d | ||
68 | |.else | ||
69 | |.define CARG1, rdi // x64/POSIX C call arguments. | ||
70 | |.define CARG2, rsi | ||
71 | |.define CARG3, rdx | ||
72 | |.define CARG4, rcx | ||
73 | |.define CARG5, r8 | ||
74 | |.define CARG6, r9 | ||
75 | |.define CARG1d, edi | ||
76 | |.define CARG2d, esi | ||
77 | |.define CARG3d, edx | ||
78 | |.define CARG4d, ecx | ||
79 | |.define CARG5d, r8d | ||
80 | |.define CARG6d, r9d | ||
81 | |.endif | ||
82 | | | ||
83 | |// Type definitions. Some of these are only used for documentation. | ||
84 | |.type L, lua_State | ||
85 | |.type GL, global_State | ||
86 | |.type TVALUE, TValue | ||
87 | |.type GCOBJ, GCobj | ||
88 | |.type STR, GCstr | ||
89 | |.type TAB, GCtab | ||
90 | |.type LFUNC, GCfuncL | ||
91 | |.type CFUNC, GCfuncC | ||
92 | |.type PROTO, GCproto | ||
93 | |.type UPVAL, GCupval | ||
94 | |.type NODE, Node | ||
95 | |.type NARGS, int | ||
96 | |.type TRACE, GCtrace | ||
97 | |.type SBUF, SBuf | ||
98 | | | ||
99 | |// Stack layout while in interpreter. Must match with lj_frame.h. | ||
100 | |//----------------------------------------------------------------------- | ||
101 | |.if X64WIN // x64/Windows stack layout | ||
102 | | | ||
103 | |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). | ||
104 | |.macro saveregs_ | ||
105 | | push rdi; push rsi; push rbx | ||
106 | | sub rsp, CFRAME_SPACE | ||
107 | |.endmacro | ||
108 | |.macro saveregs | ||
109 | | push rbp; saveregs_ | ||
110 | |.endmacro | ||
111 | |.macro restoreregs | ||
112 | | add rsp, CFRAME_SPACE | ||
113 | | pop rbx; pop rsi; pop rdi; pop rbp | ||
114 | |.endmacro | ||
115 | | | ||
116 | |.define SAVE_CFRAME, aword [rsp+aword*13] | ||
117 | |.define SAVE_PC, aword [rsp+aword*12] | ||
118 | |.define SAVE_L, aword [rsp+aword*11] | ||
119 | |.define SAVE_ERRF, dword [rsp+dword*21] | ||
120 | |.define SAVE_NRES, dword [rsp+dword*20] | ||
121 | |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter | ||
122 | |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. | ||
123 | |.define SAVE_R4, aword [rsp+aword*8] | ||
124 | |.define SAVE_R3, aword [rsp+aword*7] | ||
125 | |.define SAVE_R2, aword [rsp+aword*6] | ||
126 | |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. | ||
127 | |.define ARG5, aword [rsp+aword*4] | ||
128 | |.define CSAVE_4, aword [rsp+aword*3] | ||
129 | |.define CSAVE_3, aword [rsp+aword*2] | ||
130 | |.define CSAVE_2, aword [rsp+aword*1] | ||
131 | |.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter. | ||
132 | |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee | ||
133 | | | ||
134 | |.define ARG5d, dword [rsp+dword*8] | ||
135 | |.define TMP1, ARG5 // TMP1 overlaps ARG5 | ||
136 | |.define TMP1d, ARG5d | ||
137 | |.define TMP1hi, dword [rsp+dword*9] | ||
138 | |.define MULTRES, TMP1d // MULTRES overlaps TMP1d. | ||
139 | | | ||
140 | |//----------------------------------------------------------------------- | ||
141 | |.else // x64/POSIX stack layout | ||
142 | | | ||
143 | |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). | ||
144 | |.macro saveregs_ | ||
145 | | push rbx; push r15; push r14 | ||
146 | |.if NO_UNWIND | ||
147 | | push r13; push r12 | ||
148 | |.endif | ||
149 | | sub rsp, CFRAME_SPACE | ||
150 | |.endmacro | ||
151 | |.macro saveregs | ||
152 | | push rbp; saveregs_ | ||
153 | |.endmacro | ||
154 | |.macro restoreregs | ||
155 | | add rsp, CFRAME_SPACE | ||
156 | |.if NO_UNWIND | ||
157 | | pop r12; pop r13 | ||
158 | |.endif | ||
159 | | pop r14; pop r15; pop rbx; pop rbp | ||
160 | |.endmacro | ||
161 | | | ||
162 | |//----- 16 byte aligned, | ||
163 | |.if NO_UNWIND | ||
164 | |.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter. | ||
165 | |.define SAVE_R4, aword [rsp+aword*10] | ||
166 | |.define SAVE_R3, aword [rsp+aword*9] | ||
167 | |.define SAVE_R2, aword [rsp+aword*8] | ||
168 | |.define SAVE_R1, aword [rsp+aword*7] | ||
169 | |.define SAVE_RU2, aword [rsp+aword*6] | ||
170 | |.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves. | ||
171 | |.else | ||
172 | |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. | ||
173 | |.define SAVE_R4, aword [rsp+aword*8] | ||
174 | |.define SAVE_R3, aword [rsp+aword*7] | ||
175 | |.define SAVE_R2, aword [rsp+aword*6] | ||
176 | |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. | ||
177 | |.endif | ||
178 | |.define SAVE_CFRAME, aword [rsp+aword*4] | ||
179 | |.define SAVE_PC, aword [rsp+aword*3] | ||
180 | |.define SAVE_L, aword [rsp+aword*2] | ||
181 | |.define SAVE_ERRF, dword [rsp+dword*3] | ||
182 | |.define SAVE_NRES, dword [rsp+dword*2] | ||
183 | |.define TMP1, aword [rsp] //<-- rsp while in interpreter. | ||
184 | |//----- 16 byte aligned | ||
185 | | | ||
186 | |.define TMP1d, dword [rsp] | ||
187 | |.define TMP1hi, dword [rsp+dword*1] | ||
188 | |.define MULTRES, TMP1d // MULTRES overlaps TMP1d. | ||
189 | | | ||
190 | |.endif | ||
191 | | | ||
192 | |//----------------------------------------------------------------------- | ||
193 | | | ||
194 | |// Instruction headers. | ||
195 | |.macro ins_A; .endmacro | ||
196 | |.macro ins_AD; .endmacro | ||
197 | |.macro ins_AJ; .endmacro | ||
198 | |.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro | ||
199 | |.macro ins_AB_; movzx RBd, RCH; .endmacro | ||
200 | |.macro ins_A_C; movzx RCd, RCL; .endmacro | ||
201 | |.macro ins_AND; not RD; .endmacro | ||
202 | | | ||
203 | |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). | ||
204 | |.macro ins_NEXT | ||
205 | | mov RCd, [PC] | ||
206 | | movzx RAd, RCH | ||
207 | | movzx OP, RCL | ||
208 | | add PC, 4 | ||
209 | | shr RCd, 16 | ||
210 | | jmp aword [DISPATCH+OP*8] | ||
211 | |.endmacro | ||
212 | | | ||
213 | |// Instruction footer. | ||
214 | |.if 1 | ||
215 | | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. | ||
216 | | .define ins_next, ins_NEXT | ||
217 | | .define ins_next_, ins_NEXT | ||
218 | |.else | ||
219 | | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. | ||
220 | | // Affects only certain kinds of benchmarks (and only with -j off). | ||
221 | | // Around 10%-30% slower on Core2, a lot more slower on P4. | ||
222 | | .macro ins_next | ||
223 | | jmp ->ins_next | ||
224 | | .endmacro | ||
225 | | .macro ins_next_ | ||
226 | | ->ins_next: | ||
227 | | ins_NEXT | ||
228 | | .endmacro | ||
229 | |.endif | ||
230 | | | ||
231 | |// Call decode and dispatch. | ||
232 | |.macro ins_callt | ||
233 | | // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC | ||
234 | | mov PC, LFUNC:RB->pc | ||
235 | | mov RAd, [PC] | ||
236 | | movzx OP, RAL | ||
237 | | movzx RAd, RAH | ||
238 | | add PC, 4 | ||
239 | | jmp aword [DISPATCH+OP*8] | ||
240 | |.endmacro | ||
241 | | | ||
242 | |.macro ins_call | ||
243 | | // BASE = new base, RB = LFUNC, RD = nargs+1 | ||
244 | | mov [BASE-8], PC | ||
245 | | ins_callt | ||
246 | |.endmacro | ||
247 | | | ||
248 | |//----------------------------------------------------------------------- | ||
249 | | | ||
250 | |// Macros to clear or set tags. | ||
251 | |.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro | ||
252 | |.macro settp, reg, tp | ||
253 | | mov64 ITYPE, ((uint64_t)tp<<47) | ||
254 | | or reg, ITYPE | ||
255 | |.endmacro | ||
256 | |.macro settp, dst, reg, tp | ||
257 | | mov64 dst, ((uint64_t)tp<<47) | ||
258 | | or dst, reg | ||
259 | |.endmacro | ||
260 | |.macro setint, reg | ||
261 | | settp reg, LJ_TISNUM | ||
262 | |.endmacro | ||
263 | |.macro setint, dst, reg | ||
264 | | settp dst, reg, LJ_TISNUM | ||
265 | |.endmacro | ||
266 | | | ||
267 | |// Macros to test operand types. | ||
268 | |.macro checktp_nc, reg, tp, target | ||
269 | | mov ITYPE, reg | ||
270 | | sar ITYPE, 47 | ||
271 | | cmp ITYPEd, tp | ||
272 | | jne target | ||
273 | |.endmacro | ||
274 | |.macro checktp, reg, tp, target | ||
275 | | mov ITYPE, reg | ||
276 | | cleartp reg | ||
277 | | sar ITYPE, 47 | ||
278 | | cmp ITYPEd, tp | ||
279 | | jne target | ||
280 | |.endmacro | ||
281 | |.macro checktptp, src, tp, target | ||
282 | | mov ITYPE, src | ||
283 | | sar ITYPE, 47 | ||
284 | | cmp ITYPEd, tp | ||
285 | | jne target | ||
286 | |.endmacro | ||
287 | |.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro | ||
288 | |.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro | ||
289 | |.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro | ||
290 | | | ||
291 | |.macro checknumx, reg, target, jump | ||
292 | | mov ITYPE, reg | ||
293 | | sar ITYPE, 47 | ||
294 | | cmp ITYPEd, LJ_TISNUM | ||
295 | | jump target | ||
296 | |.endmacro | ||
297 | |.macro checkint, reg, target; checknumx reg, target, jne; .endmacro | ||
298 | |.macro checkinttp, src, target; checknumx src, target, jne; .endmacro | ||
299 | |.macro checknum, reg, target; checknumx reg, target, jae; .endmacro | ||
300 | |.macro checknumtp, src, target; checknumx src, target, jae; .endmacro | ||
301 | |.macro checknumber, src, target; checknumx src, target, ja; .endmacro | ||
302 | | | ||
303 | |.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro | ||
304 | |.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro | ||
305 | | | ||
306 | |// These operands must be used with movzx. | ||
307 | |.define PC_OP, byte [PC-4] | ||
308 | |.define PC_RA, byte [PC-3] | ||
309 | |.define PC_RB, byte [PC-1] | ||
310 | |.define PC_RC, byte [PC-2] | ||
311 | |.define PC_RD, word [PC-2] | ||
312 | | | ||
313 | |.macro branchPC, reg | ||
314 | | lea PC, [PC+reg*4-BCBIAS_J*4] | ||
315 | |.endmacro | ||
316 | | | ||
317 | |// Assumes DISPATCH is relative to GL. | ||
318 | #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) | ||
319 | #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) | ||
320 | | | ||
321 | #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) | ||
322 | | | ||
323 | |// Decrement hashed hotcount and trigger trace recorder if zero. | ||
324 | |.macro hotloop, reg | ||
325 | | mov reg, PCd | ||
326 | | shr reg, 1 | ||
327 | | and reg, HOTCOUNT_PCMASK | ||
328 | | sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP | ||
329 | | jb ->vm_hotloop | ||
330 | |.endmacro | ||
331 | | | ||
332 | |.macro hotcall, reg | ||
333 | | mov reg, PCd | ||
334 | | shr reg, 1 | ||
335 | | and reg, HOTCOUNT_PCMASK | ||
336 | | sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL | ||
337 | | jb ->vm_hotcall | ||
338 | |.endmacro | ||
339 | | | ||
340 | |// Set current VM state. | ||
341 | |.macro set_vmstate, st | ||
342 | | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st | ||
343 | |.endmacro | ||
344 | | | ||
345 | |.macro fpop1; fstp st1; .endmacro | ||
346 | | | ||
347 | |// Synthesize SSE FP constants. | ||
348 | |.macro sseconst_abs, reg, tmp // Synthesize abs mask. | ||
349 | | mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp | ||
350 | |.endmacro | ||
351 | | | ||
352 | |.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const. | ||
353 | | mov64 tmp, U64x(val,00000000); movd reg, tmp | ||
354 | |.endmacro | ||
355 | | | ||
356 | |.macro sseconst_sign, reg, tmp // Synthesize sign mask. | ||
357 | | sseconst_hi reg, tmp, 80000000 | ||
358 | |.endmacro | ||
359 | |.macro sseconst_1, reg, tmp // Synthesize 1.0. | ||
360 | | sseconst_hi reg, tmp, 3ff00000 | ||
361 | |.endmacro | ||
362 | |.macro sseconst_m1, reg, tmp // Synthesize -1.0. | ||
363 | | sseconst_hi reg, tmp, bff00000 | ||
364 | |.endmacro | ||
365 | |.macro sseconst_2p52, reg, tmp // Synthesize 2^52. | ||
366 | | sseconst_hi reg, tmp, 43300000 | ||
367 | |.endmacro | ||
368 | |.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51. | ||
369 | | sseconst_hi reg, tmp, 43380000 | ||
370 | |.endmacro | ||
371 | | | ||
372 | |// Move table write barrier back. Overwrites reg. | ||
373 | |.macro barrierback, tab, reg | ||
374 | | and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab) | ||
375 | | mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)] | ||
376 | | mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab | ||
377 | | mov tab->gclist, reg | ||
378 | |.endmacro | ||
379 | | | ||
380 | |//----------------------------------------------------------------------- | ||
381 | |||
382 | /* Generate subroutines used by opcodes and other parts of the VM. */ | ||
383 | /* The .code_sub section should be last to help static branch prediction. */ | ||
384 | static void build_subroutines(BuildCtx *ctx) | ||
385 | { | ||
386 | |.code_sub | ||
387 | | | ||
388 | |//----------------------------------------------------------------------- | ||
389 | |//-- Return handling ---------------------------------------------------- | ||
390 | |//----------------------------------------------------------------------- | ||
391 | | | ||
392 | |->vm_returnp: | ||
393 | | test PCd, FRAME_P | ||
394 | | jz ->cont_dispatch | ||
395 | | | ||
396 | | // Return from pcall or xpcall fast func. | ||
397 | | and PC, -8 | ||
398 | | sub BASE, PC // Restore caller base. | ||
399 | | lea RA, [RA+PC-8] // Rebase RA and prepend one result. | ||
400 | | mov PC, [BASE-8] // Fetch PC of previous frame. | ||
401 | | // Prepending may overwrite the pcall frame, so do it at the end. | ||
402 | | mov_true ITYPE | ||
403 | | mov aword [BASE+RA], ITYPE // Prepend true to results. | ||
404 | | | ||
405 | |->vm_returnc: | ||
406 | | add RDd, 1 // RD = nresults+1 | ||
407 | | jz ->vm_unwind_yield | ||
408 | | mov MULTRES, RDd | ||
409 | | test PC, FRAME_TYPE | ||
410 | | jz ->BC_RET_Z // Handle regular return to Lua. | ||
411 | | | ||
412 | |->vm_return: | ||
413 | | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return | ||
414 | | xor PC, FRAME_C | ||
415 | | test PCd, FRAME_TYPE | ||
416 | | jnz ->vm_returnp | ||
417 | | | ||
418 | | // Return to C. | ||
419 | | set_vmstate C | ||
420 | | and PC, -8 | ||
421 | | sub PC, BASE | ||
422 | | neg PC // Previous base = BASE - delta. | ||
423 | | | ||
424 | | sub RDd, 1 | ||
425 | | jz >2 | ||
426 | |1: // Move results down. | ||
427 | | mov RB, [BASE+RA] | ||
428 | | mov [BASE-16], RB | ||
429 | | add BASE, 8 | ||
430 | | sub RDd, 1 | ||
431 | | jnz <1 | ||
432 | |2: | ||
433 | | mov L:RB, SAVE_L | ||
434 | | mov L:RB->base, PC | ||
435 | |3: | ||
436 | | mov RDd, MULTRES | ||
437 | | mov RAd, SAVE_NRES // RA = wanted nresults+1 | ||
438 | |4: | ||
439 | | cmp RAd, RDd | ||
440 | | jne >6 // More/less results wanted? | ||
441 | |5: | ||
442 | | sub BASE, 16 | ||
443 | | mov L:RB->top, BASE | ||
444 | | | ||
445 | |->vm_leave_cp: | ||
446 | | mov RA, SAVE_CFRAME // Restore previous C frame. | ||
447 | | mov L:RB->cframe, RA | ||
448 | | xor eax, eax // Ok return status for vm_pcall. | ||
449 | | | ||
450 | |->vm_leave_unw: | ||
451 | | restoreregs | ||
452 | | ret | ||
453 | | | ||
454 | |6: | ||
455 | | jb >7 // Less results wanted? | ||
456 | | // More results wanted. Check stack size and fill up results with nil. | ||
457 | | cmp BASE, L:RB->maxstack | ||
458 | | ja >8 | ||
459 | | mov aword [BASE-16], LJ_TNIL | ||
460 | | add BASE, 8 | ||
461 | | add RDd, 1 | ||
462 | | jmp <4 | ||
463 | | | ||
464 | |7: // Less results wanted. | ||
465 | | test RAd, RAd | ||
466 | | jz <5 // But check for LUA_MULTRET+1. | ||
467 | | sub RA, RD // Negative result! | ||
468 | | lea BASE, [BASE+RA*8] // Correct top. | ||
469 | | jmp <5 | ||
470 | | | ||
471 | |8: // Corner case: need to grow stack for filling up results. | ||
472 | | // This can happen if: | ||
473 | | // - A C function grows the stack (a lot). | ||
474 | | // - The GC shrinks the stack in between. | ||
475 | | // - A return back from a lua_call() with (high) nresults adjustment. | ||
476 | | mov L:RB->top, BASE // Save current top held in BASE (yes). | ||
477 | | mov MULTRES, RDd // Need to fill only remainder with nil. | ||
478 | | mov CARG2d, RAd | ||
479 | | mov CARG1, L:RB | ||
480 | | call extern lj_state_growstack // (lua_State *L, int n) | ||
481 | | mov BASE, L:RB->top // Need the (realloced) L->top in BASE. | ||
482 | | jmp <3 | ||
483 | | | ||
484 | |->vm_unwind_yield: | ||
485 | | mov al, LUA_YIELD | ||
486 | | jmp ->vm_unwind_c_eh | ||
487 | | | ||
488 | |->vm_unwind_c: // Unwind C stack, return from vm_pcall. | ||
489 | | // (void *cframe, int errcode) | ||
490 | | mov eax, CARG2d // Error return status for vm_pcall. | ||
491 | | mov rsp, CARG1 | ||
492 | |->vm_unwind_c_eh: // Landing pad for external unwinder. | ||
493 | | mov L:RB, SAVE_L | ||
494 | | mov GL:RB, L:RB->glref | ||
495 | | mov dword GL:RB->vmstate, ~LJ_VMST_C | ||
496 | | jmp ->vm_leave_unw | ||
497 | | | ||
498 | |->vm_unwind_rethrow: | ||
499 | |.if not X64WIN | ||
500 | | mov CARG1, SAVE_L | ||
501 | | mov CARG2d, eax | ||
502 | | restoreregs | ||
503 | | jmp extern lj_err_throw // (lua_State *L, int errcode) | ||
504 | |.endif | ||
505 | | | ||
506 | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. | ||
507 | | // (void *cframe) | ||
508 | | and CARG1, CFRAME_RAWMASK | ||
509 | | mov rsp, CARG1 | ||
510 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. | ||
511 | | mov L:RB, SAVE_L | ||
512 | | mov RDd, 1+1 // Really 1+2 results, incr. later. | ||
513 | | mov BASE, L:RB->base | ||
514 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
515 | | add DISPATCH, GG_G2DISP | ||
516 | | mov PC, [BASE-8] // Fetch PC of previous frame. | ||
517 | | mov_false RA | ||
518 | | mov RB, [BASE] | ||
519 | | mov [BASE-16], RA // Prepend false to error message. | ||
520 | | mov [BASE-8], RB | ||
521 | | mov RA, -16 // Results start at BASE+RA = BASE-16. | ||
522 | | set_vmstate INTERP | ||
523 | | jmp ->vm_returnc // Increments RD/MULTRES and returns. | ||
524 | | | ||
525 | |//----------------------------------------------------------------------- | ||
526 | |//-- Grow stack for calls ----------------------------------------------- | ||
527 | |//----------------------------------------------------------------------- | ||
528 | | | ||
529 | |->vm_growstack_c: // Grow stack for C function. | ||
530 | | mov CARG2d, LUA_MINSTACK | ||
531 | | jmp >2 | ||
532 | | | ||
533 | |->vm_growstack_v: // Grow stack for vararg Lua function. | ||
534 | | sub RD, 16 // LJ_FR2 | ||
535 | | jmp >1 | ||
536 | | | ||
537 | |->vm_growstack_f: // Grow stack for fixarg Lua function. | ||
538 | | // BASE = new base, RD = nargs+1, RB = L, PC = first PC | ||
539 | | lea RD, [BASE+NARGS:RD*8-8] | ||
540 | |1: | ||
541 | | movzx RAd, byte [PC-4+PC2PROTO(framesize)] | ||
542 | | add PC, 4 // Must point after first instruction. | ||
543 | | mov L:RB->base, BASE | ||
544 | | mov L:RB->top, RD | ||
545 | | mov SAVE_PC, PC | ||
546 | | mov CARG2, RA | ||
547 | |2: | ||
548 | | // RB = L, L->base = new base, L->top = top | ||
549 | | mov CARG1, L:RB | ||
550 | | call extern lj_state_growstack // (lua_State *L, int n) | ||
551 | | mov BASE, L:RB->base | ||
552 | | mov RD, L:RB->top | ||
553 | | mov LFUNC:RB, [BASE-16] | ||
554 | | cleartp LFUNC:RB | ||
555 | | sub RD, BASE | ||
556 | | shr RDd, 3 | ||
557 | | add NARGS:RDd, 1 | ||
558 | | // BASE = new base, RB = LFUNC, RD = nargs+1 | ||
559 | | ins_callt // Just retry the call. | ||
560 | | | ||
561 | |//----------------------------------------------------------------------- | ||
562 | |//-- Entry points into the assembler VM --------------------------------- | ||
563 | |//----------------------------------------------------------------------- | ||
564 | | | ||
565 | |->vm_resume: // Setup C frame and resume thread. | ||
566 | | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) | ||
567 | | saveregs | ||
568 | | mov L:RB, CARG1 // Caveat: CARG1 may be RA. | ||
569 | | mov SAVE_L, CARG1 | ||
570 | | mov RA, CARG2 | ||
571 | | mov PCd, FRAME_CP | ||
572 | | xor RDd, RDd | ||
573 | | lea KBASE, [esp+CFRAME_RESUME] | ||
574 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
575 | | add DISPATCH, GG_G2DISP | ||
576 | | mov SAVE_PC, RD // Any value outside of bytecode is ok. | ||
577 | | mov SAVE_CFRAME, RD | ||
578 | | mov SAVE_NRES, RDd | ||
579 | | mov SAVE_ERRF, RDd | ||
580 | | mov L:RB->cframe, KBASE | ||
581 | | cmp byte L:RB->status, RDL | ||
582 | | je >2 // Initial resume (like a call). | ||
583 | | | ||
584 | | // Resume after yield (like a return). | ||
585 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
586 | | set_vmstate INTERP | ||
587 | | mov byte L:RB->status, RDL | ||
588 | | mov BASE, L:RB->base | ||
589 | | mov RD, L:RB->top | ||
590 | | sub RD, RA | ||
591 | | shr RDd, 3 | ||
592 | | add RDd, 1 // RD = nresults+1 | ||
593 | | sub RA, BASE // RA = resultofs | ||
594 | | mov PC, [BASE-8] | ||
595 | | mov MULTRES, RDd | ||
596 | | test PCd, FRAME_TYPE | ||
597 | | jz ->BC_RET_Z | ||
598 | | jmp ->vm_return | ||
599 | | | ||
600 | |->vm_pcall: // Setup protected C frame and enter VM. | ||
601 | | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) | ||
602 | | saveregs | ||
603 | | mov PCd, FRAME_CP | ||
604 | | mov SAVE_ERRF, CARG4d | ||
605 | | jmp >1 | ||
606 | | | ||
607 | |->vm_call: // Setup C frame and enter VM. | ||
608 | | // (lua_State *L, TValue *base, int nres1) | ||
609 | | saveregs | ||
610 | | mov PCd, FRAME_C | ||
611 | | | ||
612 | |1: // Entry point for vm_pcall above (PC = ftype). | ||
613 | | mov SAVE_NRES, CARG3d | ||
614 | | mov L:RB, CARG1 // Caveat: CARG1 may be RA. | ||
615 | | mov SAVE_L, CARG1 | ||
616 | | mov RA, CARG2 | ||
617 | | | ||
618 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
619 | | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. | ||
620 | | mov SAVE_CFRAME, KBASE | ||
621 | | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. | ||
622 | | add DISPATCH, GG_G2DISP | ||
623 | | mov L:RB->cframe, rsp | ||
624 | | | ||
625 | |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). | ||
626 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
627 | | set_vmstate INTERP | ||
628 | | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). | ||
629 | | add PC, RA | ||
630 | | sub PC, BASE // PC = frame delta + frame type | ||
631 | | | ||
632 | | mov RD, L:RB->top | ||
633 | | sub RD, RA | ||
634 | | shr NARGS:RDd, 3 | ||
635 | | add NARGS:RDd, 1 // RD = nargs+1 | ||
636 | | | ||
637 | |->vm_call_dispatch: | ||
638 | | mov LFUNC:RB, [RA-16] | ||
639 | | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE. | ||
640 | | | ||
641 | |->vm_call_dispatch_f: | ||
642 | | mov BASE, RA | ||
643 | | ins_call | ||
644 | | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC | ||
645 | | | ||
646 | |->vm_cpcall: // Setup protected C frame, call C. | ||
647 | | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) | ||
648 | | saveregs | ||
649 | | mov L:RB, CARG1 // Caveat: CARG1 may be RA. | ||
650 | | mov SAVE_L, CARG1 | ||
651 | | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. | ||
652 | | | ||
653 | | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). | ||
654 | | sub KBASE, L:RB->top | ||
655 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
656 | | mov SAVE_ERRF, 0 // No error function. | ||
657 | | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame. | ||
658 | | add DISPATCH, GG_G2DISP | ||
659 | | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). | ||
660 | | | ||
661 | | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. | ||
662 | | mov SAVE_CFRAME, KBASE | ||
663 | | mov L:RB->cframe, rsp | ||
664 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
665 | | | ||
666 | | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) | ||
667 | | // TValue * (new base) or NULL returned in eax (RC). | ||
668 | | test RC, RC | ||
669 | | jz ->vm_leave_cp // No base? Just remove C frame. | ||
670 | | mov RA, RC | ||
671 | | mov PCd, FRAME_CP | ||
672 | | jmp <2 // Else continue with the call. | ||
673 | | | ||
674 | |//----------------------------------------------------------------------- | ||
675 | |//-- Metamethod handling ------------------------------------------------ | ||
676 | |//----------------------------------------------------------------------- | ||
677 | | | ||
678 | |//-- Continuation dispatch ---------------------------------------------- | ||
679 | | | ||
680 | |->cont_dispatch: | ||
681 | | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) | ||
682 | | add RA, BASE | ||
683 | | and PC, -8 | ||
684 | | mov RB, BASE | ||
685 | | sub BASE, PC // Restore caller BASE. | ||
686 | | mov aword [RA+RD*8-8], LJ_TNIL // Ensure one valid arg. | ||
687 | | mov RC, RA // ... in [RC] | ||
688 | | mov PC, [RB-24] // Restore PC from [cont|PC]. | ||
689 | | mov RA, qword [RB-32] // May be negative on WIN64 with debug. | ||
690 | |.if FFI | ||
691 | | cmp RA, 1 | ||
692 | | jbe >1 | ||
693 | |.endif | ||
694 | | mov LFUNC:KBASE, [BASE-16] | ||
695 | | cleartp LFUNC:KBASE | ||
696 | | mov KBASE, LFUNC:KBASE->pc | ||
697 | | mov KBASE, [KBASE+PC2PROTO(k)] | ||
698 | | // BASE = base, RC = result, RB = meta base | ||
699 | | jmp RA // Jump to continuation. | ||
700 | | | ||
701 | |.if FFI | ||
702 | |1: | ||
703 | | je ->cont_ffi_callback // cont = 1: return from FFI callback. | ||
704 | | // cont = 0: Tail call from C function. | ||
705 | | sub RB, BASE | ||
706 | | shr RBd, 3 | ||
707 | | lea RDd, [RBd-3] | ||
708 | | jmp ->vm_call_tail | ||
709 | |.endif | ||
710 | | | ||
711 | |->cont_cat: // BASE = base, RC = result, RB = mbase | ||
712 | | movzx RAd, PC_RB | ||
713 | | sub RB, 32 | ||
714 | | lea RA, [BASE+RA*8] | ||
715 | | sub RA, RB | ||
716 | | je ->cont_ra | ||
717 | | neg RA | ||
718 | | shr RAd, 3 | ||
719 | |.if X64WIN | ||
720 | | mov CARG3d, RAd | ||
721 | | mov L:CARG1, SAVE_L | ||
722 | | mov L:CARG1->base, BASE | ||
723 | | mov RC, [RC] | ||
724 | | mov [RB], RC | ||
725 | | mov CARG2, RB | ||
726 | |.else | ||
727 | | mov L:CARG1, SAVE_L | ||
728 | | mov L:CARG1->base, BASE | ||
729 | | mov CARG3d, RAd | ||
730 | | mov RA, [RC] | ||
731 | | mov [RB], RA | ||
732 | | mov CARG2, RB | ||
733 | |.endif | ||
734 | | jmp ->BC_CAT_Z | ||
735 | | | ||
736 | |//-- Table indexing metamethods ----------------------------------------- | ||
737 | | | ||
738 | |->vmeta_tgets: | ||
739 | | settp STR:RC, LJ_TSTR // STR:RC = GCstr * | ||
740 | | mov TMP1, STR:RC | ||
741 | | lea RC, TMP1 | ||
742 | | cmp PC_OP, BC_GGET | ||
743 | | jne >1 | ||
744 | | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * | ||
745 | | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. | ||
746 | | mov [RB], TAB:RA | ||
747 | | jmp >2 | ||
748 | | | ||
749 | |->vmeta_tgetb: | ||
750 | | movzx RCd, PC_RC | ||
751 | |.if DUALNUM | ||
752 | | setint RC | ||
753 | | mov TMP1, RC | ||
754 | |.else | ||
755 | | cvtsi2sd xmm0, RCd | ||
756 | | movsd TMP1, xmm0 | ||
757 | |.endif | ||
758 | | lea RC, TMP1 | ||
759 | | jmp >1 | ||
760 | | | ||
761 | |->vmeta_tgetv: | ||
762 | | movzx RCd, PC_RC // Reload TValue *k from RC. | ||
763 | | lea RC, [BASE+RC*8] | ||
764 | |1: | ||
765 | | movzx RBd, PC_RB // Reload TValue *t from RB. | ||
766 | | lea RB, [BASE+RB*8] | ||
767 | |2: | ||
768 | | mov L:CARG1, SAVE_L | ||
769 | | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. | ||
770 | | mov CARG2, RB | ||
771 | | mov CARG3, RC | ||
772 | | mov L:RB, L:CARG1 | ||
773 | | mov SAVE_PC, PC | ||
774 | | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) | ||
775 | | // TValue * (finished) or NULL (metamethod) returned in eax (RC). | ||
776 | | mov BASE, L:RB->base | ||
777 | | test RC, RC | ||
778 | | jz >3 | ||
779 | |->cont_ra: // BASE = base, RC = result | ||
780 | | movzx RAd, PC_RA | ||
781 | | mov RB, [RC] | ||
782 | | mov [BASE+RA*8], RB | ||
783 | | ins_next | ||
784 | | | ||
785 | |3: // Call __index metamethod. | ||
786 | | // BASE = base, L->top = new base, stack = cont/func/t/k | ||
787 | | mov RA, L:RB->top | ||
788 | | mov [RA-24], PC // [cont|PC] | ||
789 | | lea PC, [RA+FRAME_CONT] | ||
790 | | sub PC, BASE | ||
791 | | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here. | ||
792 | | mov NARGS:RDd, 2+1 // 2 args for func(t, k). | ||
793 | | cleartp LFUNC:RB | ||
794 | | jmp ->vm_call_dispatch_f | ||
795 | | | ||
796 | |->vmeta_tgetr: | ||
797 | | mov CARG1, TAB:RB | ||
798 | | mov RB, BASE // Save BASE. | ||
799 | | mov CARG2d, RCd // Caveat: CARG2 == BASE | ||
800 | | call extern lj_tab_getinth // (GCtab *t, int32_t key) | ||
801 | | // cTValue * or NULL returned in eax (RC). | ||
802 | | movzx RAd, PC_RA | ||
803 | | mov BASE, RB // Restore BASE. | ||
804 | | test RC, RC | ||
805 | | jnz ->BC_TGETR_Z | ||
806 | | mov ITYPE, LJ_TNIL | ||
807 | | jmp ->BC_TGETR2_Z | ||
808 | | | ||
809 | |//----------------------------------------------------------------------- | ||
810 | | | ||
811 | |->vmeta_tsets: | ||
812 | | settp STR:RC, LJ_TSTR // STR:RC = GCstr * | ||
813 | | mov TMP1, STR:RC | ||
814 | | lea RC, TMP1 | ||
815 | | cmp PC_OP, BC_GSET | ||
816 | | jne >1 | ||
817 | | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * | ||
818 | | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. | ||
819 | | mov [RB], TAB:RA | ||
820 | | jmp >2 | ||
821 | | | ||
822 | |->vmeta_tsetb: | ||
823 | | movzx RCd, PC_RC | ||
824 | |.if DUALNUM | ||
825 | | setint RC | ||
826 | | mov TMP1, RC | ||
827 | |.else | ||
828 | | cvtsi2sd xmm0, RCd | ||
829 | | movsd TMP1, xmm0 | ||
830 | |.endif | ||
831 | | lea RC, TMP1 | ||
832 | | jmp >1 | ||
833 | | | ||
834 | |->vmeta_tsetv: | ||
835 | | movzx RCd, PC_RC // Reload TValue *k from RC. | ||
836 | | lea RC, [BASE+RC*8] | ||
837 | |1: | ||
838 | | movzx RBd, PC_RB // Reload TValue *t from RB. | ||
839 | | lea RB, [BASE+RB*8] | ||
840 | |2: | ||
841 | | mov L:CARG1, SAVE_L | ||
842 | | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. | ||
843 | | mov CARG2, RB | ||
844 | | mov CARG3, RC | ||
845 | | mov L:RB, L:CARG1 | ||
846 | | mov SAVE_PC, PC | ||
847 | | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) | ||
848 | | // TValue * (finished) or NULL (metamethod) returned in eax (RC). | ||
849 | | mov BASE, L:RB->base | ||
850 | | test RC, RC | ||
851 | | jz >3 | ||
852 | | // NOBARRIER: lj_meta_tset ensures the table is not black. | ||
853 | | movzx RAd, PC_RA | ||
854 | | mov RB, [BASE+RA*8] | ||
855 | | mov [RC], RB | ||
856 | |->cont_nop: // BASE = base, (RC = result) | ||
857 | | ins_next | ||
858 | | | ||
859 | |3: // Call __newindex metamethod. | ||
860 | | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) | ||
861 | | mov RA, L:RB->top | ||
862 | | mov [RA-24], PC // [cont|PC] | ||
863 | | movzx RCd, PC_RA | ||
864 | | // Copy value to third argument. | ||
865 | | mov RB, [BASE+RC*8] | ||
866 | | mov [RA+16], RB | ||
867 | | lea PC, [RA+FRAME_CONT] | ||
868 | | sub PC, BASE | ||
869 | | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here. | ||
870 | | mov NARGS:RDd, 3+1 // 3 args for func(t, k, v). | ||
871 | | cleartp LFUNC:RB | ||
872 | | jmp ->vm_call_dispatch_f | ||
873 | | | ||
874 | |->vmeta_tsetr: | ||
875 | |.if X64WIN | ||
876 | | mov L:CARG1, SAVE_L | ||
877 | | mov CARG3d, RCd | ||
878 | | mov L:CARG1->base, BASE | ||
879 | | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE. | ||
880 | |.else | ||
881 | | mov L:CARG1, SAVE_L | ||
882 | | mov CARG2, TAB:RB | ||
883 | | mov L:CARG1->base, BASE | ||
884 | | mov RB, BASE // Save BASE. | ||
885 | | mov CARG3d, RCd // Caveat: CARG3 == BASE. | ||
886 | |.endif | ||
887 | | mov SAVE_PC, PC | ||
888 | | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | ||
889 | | // TValue * returned in eax (RC). | ||
890 | | movzx RAd, PC_RA | ||
891 | | mov BASE, RB // Restore BASE. | ||
892 | | jmp ->BC_TSETR_Z | ||
893 | | | ||
894 | |//-- Comparison metamethods --------------------------------------------- | ||
895 | | | ||
896 | |->vmeta_comp: | ||
897 | | movzx RDd, PC_RD | ||
898 | | movzx RAd, PC_RA | ||
899 | | mov L:RB, SAVE_L | ||
900 | | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE. | ||
901 | |.if X64WIN | ||
902 | | lea CARG3, [BASE+RD*8] | ||
903 | | lea CARG2, [BASE+RA*8] | ||
904 | |.else | ||
905 | | lea CARG2, [BASE+RA*8] | ||
906 | | lea CARG3, [BASE+RD*8] | ||
907 | |.endif | ||
908 | | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA. | ||
909 | | movzx CARG4d, PC_OP | ||
910 | | mov SAVE_PC, PC | ||
911 | | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) | ||
912 | | // 0/1 or TValue * (metamethod) returned in eax (RC). | ||
913 | |3: | ||
914 | | mov BASE, L:RB->base | ||
915 | | cmp RC, 1 | ||
916 | | ja ->vmeta_binop | ||
917 | |4: | ||
918 | | lea PC, [PC+4] | ||
919 | | jb >6 | ||
920 | |5: | ||
921 | | movzx RDd, PC_RD | ||
922 | | branchPC RD | ||
923 | |6: | ||
924 | | ins_next | ||
925 | | | ||
926 | |->cont_condt: // BASE = base, RC = result | ||
927 | | add PC, 4 | ||
928 | | mov ITYPE, [RC] | ||
929 | | sar ITYPE, 47 | ||
930 | | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is true. | ||
931 | | jb <5 | ||
932 | | jmp <6 | ||
933 | | | ||
934 | |->cont_condf: // BASE = base, RC = result | ||
935 | | mov ITYPE, [RC] | ||
936 | | sar ITYPE, 47 | ||
937 | | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false. | ||
938 | | jmp <4 | ||
939 | | | ||
940 | |->vmeta_equal: | ||
941 | | cleartp TAB:RD | ||
942 | | sub PC, 4 | ||
943 | |.if X64WIN | ||
944 | | mov CARG3, RD | ||
945 | | mov CARG4d, RBd | ||
946 | | mov L:RB, SAVE_L | ||
947 | | mov L:RB->base, BASE // Caveat: CARG2 == BASE. | ||
948 | | mov CARG2, RA | ||
949 | | mov CARG1, L:RB // Caveat: CARG1 == RA. | ||
950 | |.else | ||
951 | | mov CARG2, RA | ||
952 | | mov CARG4d, RBd // Caveat: CARG4 == RA. | ||
953 | | mov L:RB, SAVE_L | ||
954 | | mov L:RB->base, BASE // Caveat: CARG3 == BASE. | ||
955 | | mov CARG3, RD | ||
956 | | mov CARG1, L:RB | ||
957 | |.endif | ||
958 | | mov SAVE_PC, PC | ||
959 | | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) | ||
960 | | // 0/1 or TValue * (metamethod) returned in eax (RC). | ||
961 | | jmp <3 | ||
962 | | | ||
963 | |->vmeta_equal_cd: | ||
964 | |.if FFI | ||
965 | | sub PC, 4 | ||
966 | | mov L:RB, SAVE_L | ||
967 | | mov L:RB->base, BASE | ||
968 | | mov CARG1, L:RB | ||
969 | | mov CARG2d, dword [PC-4] | ||
970 | | mov SAVE_PC, PC | ||
971 | | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins) | ||
972 | | // 0/1 or TValue * (metamethod) returned in eax (RC). | ||
973 | | jmp <3 | ||
974 | |.endif | ||
975 | | | ||
976 | |->vmeta_istype: | ||
977 | | mov L:RB, SAVE_L | ||
978 | | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE. | ||
979 | | mov CARG2d, RAd | ||
980 | | mov CARG3d, RDd | ||
981 | | mov L:CARG1, L:RB | ||
982 | | mov SAVE_PC, PC | ||
983 | | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) | ||
984 | | mov BASE, L:RB->base | ||
985 | | jmp <6 | ||
986 | | | ||
987 | |//-- Arithmetic metamethods --------------------------------------------- | ||
988 | | | ||
989 | |->vmeta_arith_vno: | ||
990 | |.if DUALNUM | ||
991 | | movzx RBd, PC_RB | ||
992 | | movzx RCd, PC_RC | ||
993 | |.endif | ||
994 | |->vmeta_arith_vn: | ||
995 | | lea RC, [KBASE+RC*8] | ||
996 | | jmp >1 | ||
997 | | | ||
998 | |->vmeta_arith_nvo: | ||
999 | |.if DUALNUM | ||
1000 | | movzx RBd, PC_RB | ||
1001 | | movzx RCd, PC_RC | ||
1002 | |.endif | ||
1003 | |->vmeta_arith_nv: | ||
1004 | | lea TMPR, [KBASE+RC*8] | ||
1005 | | lea RC, [BASE+RB*8] | ||
1006 | | mov RB, TMPR | ||
1007 | | jmp >2 | ||
1008 | | | ||
1009 | |->vmeta_unm: | ||
1010 | | lea RC, [BASE+RD*8] | ||
1011 | | mov RB, RC | ||
1012 | | jmp >2 | ||
1013 | | | ||
1014 | |->vmeta_arith_vvo: | ||
1015 | |.if DUALNUM | ||
1016 | | movzx RBd, PC_RB | ||
1017 | | movzx RCd, PC_RC | ||
1018 | |.endif | ||
1019 | |->vmeta_arith_vv: | ||
1020 | | lea RC, [BASE+RC*8] | ||
1021 | |1: | ||
1022 | | lea RB, [BASE+RB*8] | ||
1023 | |2: | ||
1024 | | lea RA, [BASE+RA*8] | ||
1025 | |.if X64WIN | ||
1026 | | mov CARG3, RB | ||
1027 | | mov CARG4, RC | ||
1028 | | movzx RCd, PC_OP | ||
1029 | | mov ARG5d, RCd | ||
1030 | | mov L:RB, SAVE_L | ||
1031 | | mov L:RB->base, BASE // Caveat: CARG2 == BASE. | ||
1032 | | mov CARG2, RA | ||
1033 | | mov CARG1, L:RB // Caveat: CARG1 == RA. | ||
1034 | |.else | ||
1035 | | movzx CARG5d, PC_OP | ||
1036 | | mov CARG2, RA | ||
1037 | | mov CARG4, RC // Caveat: CARG4 == RA. | ||
1038 | | mov L:CARG1, SAVE_L | ||
1039 | | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE. | ||
1040 | | mov CARG3, RB | ||
1041 | | mov L:RB, L:CARG1 | ||
1042 | |.endif | ||
1043 | | mov SAVE_PC, PC | ||
1044 | | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) | ||
1045 | | // NULL (finished) or TValue * (metamethod) returned in eax (RC). | ||
1046 | | mov BASE, L:RB->base | ||
1047 | | test RC, RC | ||
1048 | | jz ->cont_nop | ||
1049 | | | ||
1050 | | // Call metamethod for binary op. | ||
1051 | |->vmeta_binop: | ||
1052 | | // BASE = base, RC = new base, stack = cont/func/o1/o2 | ||
1053 | | mov RA, RC | ||
1054 | | sub RC, BASE | ||
1055 | | mov [RA-24], PC // [cont|PC] | ||
1056 | | lea PC, [RC+FRAME_CONT] | ||
1057 | | mov NARGS:RDd, 2+1 // 2 args for func(o1, o2). | ||
1058 | | jmp ->vm_call_dispatch | ||
1059 | | | ||
1060 | |->vmeta_len: | ||
1061 | | movzx RDd, PC_RD | ||
1062 | | mov L:RB, SAVE_L | ||
1063 | | mov L:RB->base, BASE | ||
1064 | | lea CARG2, [BASE+RD*8] // Caveat: CARG2 == BASE | ||
1065 | | mov L:CARG1, L:RB | ||
1066 | | mov SAVE_PC, PC | ||
1067 | | call extern lj_meta_len // (lua_State *L, TValue *o) | ||
1068 | | // NULL (retry) or TValue * (metamethod) returned in eax (RC). | ||
1069 | | mov BASE, L:RB->base | ||
1070 | #if LJ_52 | ||
1071 | | test RC, RC | ||
1072 | | jne ->vmeta_binop // Binop call for compatibility. | ||
1073 | | movzx RDd, PC_RD | ||
1074 | | mov TAB:CARG1, [BASE+RD*8] | ||
1075 | | cleartp TAB:CARG1 | ||
1076 | | jmp ->BC_LEN_Z | ||
1077 | #else | ||
1078 | | jmp ->vmeta_binop // Binop call for compatibility. | ||
1079 | #endif | ||
1080 | | | ||
1081 | |//-- Call metamethod ---------------------------------------------------- | ||
1082 | | | ||
1083 | |->vmeta_call_ra: | ||
1084 | | lea RA, [BASE+RA*8+16] | ||
1085 | |->vmeta_call: // Resolve and call __call metamethod. | ||
1086 | | // BASE = old base, RA = new base, RC = nargs+1, PC = return | ||
1087 | | mov TMP1d, NARGS:RDd // Save RA, RC for us. | ||
1088 | | mov RB, RA | ||
1089 | |.if X64WIN | ||
1090 | | mov L:TMPR, SAVE_L | ||
1091 | | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE. | ||
1092 | | lea CARG2, [RA-16] | ||
1093 | | lea CARG3, [RA+NARGS:RD*8-8] | ||
1094 | | mov CARG1, L:TMPR // Caveat: CARG1 is RA. | ||
1095 | |.else | ||
1096 | | mov L:CARG1, SAVE_L | ||
1097 | | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE. | ||
1098 | | lea CARG2, [RA-16] | ||
1099 | | lea CARG3, [RA+NARGS:RD*8-8] | ||
1100 | |.endif | ||
1101 | | mov SAVE_PC, PC | ||
1102 | | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) | ||
1103 | | mov RA, RB | ||
1104 | | mov L:RB, SAVE_L | ||
1105 | | mov BASE, L:RB->base | ||
1106 | | mov NARGS:RDd, TMP1d | ||
1107 | | mov LFUNC:RB, [RA-16] | ||
1108 | | add NARGS:RDd, 1 | ||
1109 | | // This is fragile. L->base must not move, KBASE must always be defined. | ||
1110 | | cmp KBASE, BASE // Continue with CALLT if flag set. | ||
1111 | | je ->BC_CALLT_Z | ||
1112 | | cleartp LFUNC:RB | ||
1113 | | mov BASE, RA | ||
1114 | | ins_call // Otherwise call resolved metamethod. | ||
1115 | | | ||
1116 | |//-- Argument coercion for 'for' statement ------------------------------ | ||
1117 | | | ||
1118 | |->vmeta_for: | ||
1119 | | mov L:RB, SAVE_L | ||
1120 | | mov L:RB->base, BASE | ||
1121 | | mov CARG2, RA // Caveat: CARG2 == BASE | ||
1122 | | mov L:CARG1, L:RB // Caveat: CARG1 == RA | ||
1123 | | mov SAVE_PC, PC | ||
1124 | | call extern lj_meta_for // (lua_State *L, TValue *base) | ||
1125 | | mov BASE, L:RB->base | ||
1126 | | mov RCd, [PC-4] | ||
1127 | | movzx RAd, RCH | ||
1128 | | movzx OP, RCL | ||
1129 | | shr RCd, 16 | ||
1130 | | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI. | ||
1131 | | | ||
1132 | |//----------------------------------------------------------------------- | ||
1133 | |//-- Fast functions ----------------------------------------------------- | ||
1134 | |//----------------------------------------------------------------------- | ||
1135 | | | ||
1136 | |.macro .ffunc, name | ||
1137 | |->ff_ .. name: | ||
1138 | |.endmacro | ||
1139 | | | ||
1140 | |.macro .ffunc_1, name | ||
1141 | |->ff_ .. name: | ||
1142 | | cmp NARGS:RDd, 1+1; jb ->fff_fallback | ||
1143 | |.endmacro | ||
1144 | | | ||
1145 | |.macro .ffunc_2, name | ||
1146 | |->ff_ .. name: | ||
1147 | | cmp NARGS:RDd, 2+1; jb ->fff_fallback | ||
1148 | |.endmacro | ||
1149 | | | ||
1150 | |.macro .ffunc_n, name, op | ||
1151 | | .ffunc_1 name | ||
1152 | | checknumtp [BASE], ->fff_fallback | ||
1153 | | op xmm0, qword [BASE] | ||
1154 | |.endmacro | ||
1155 | | | ||
1156 | |.macro .ffunc_n, name | ||
1157 | | .ffunc_n name, movsd | ||
1158 | |.endmacro | ||
1159 | | | ||
1160 | |.macro .ffunc_nn, name | ||
1161 | | .ffunc_2 name | ||
1162 | | checknumtp [BASE], ->fff_fallback | ||
1163 | | checknumtp [BASE+8], ->fff_fallback | ||
1164 | | movsd xmm0, qword [BASE] | ||
1165 | | movsd xmm1, qword [BASE+8] | ||
1166 | |.endmacro | ||
1167 | | | ||
1168 | |// Inlined GC threshold check. Caveat: uses label 1. | ||
1169 | |.macro ffgccheck | ||
1170 | | mov RB, [DISPATCH+DISPATCH_GL(gc.total)] | ||
1171 | | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)] | ||
1172 | | jb >1 | ||
1173 | | call ->fff_gcstep | ||
1174 | |1: | ||
1175 | |.endmacro | ||
1176 | | | ||
1177 | |//-- Base library: checks ----------------------------------------------- | ||
1178 | | | ||
1179 | |.ffunc_1 assert | ||
1180 | | mov ITYPE, [BASE] | ||
1181 | | mov RB, ITYPE | ||
1182 | | sar ITYPE, 47 | ||
1183 | | cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback | ||
1184 | | mov PC, [BASE-8] | ||
1185 | | mov MULTRES, RDd | ||
1186 | | mov RB, [BASE] | ||
1187 | | mov [BASE-16], RB | ||
1188 | | sub RDd, 2 | ||
1189 | | jz >2 | ||
1190 | | mov RA, BASE | ||
1191 | |1: | ||
1192 | | add RA, 8 | ||
1193 | | mov RB, [RA] | ||
1194 | | mov [RA-16], RB | ||
1195 | | sub RDd, 1 | ||
1196 | | jnz <1 | ||
1197 | |2: | ||
1198 | | mov RDd, MULTRES | ||
1199 | | jmp ->fff_res_ | ||
1200 | | | ||
1201 | |.ffunc_1 type | ||
1202 | | mov RC, [BASE] | ||
1203 | | sar RC, 47 | ||
1204 | | mov RBd, LJ_TISNUM | ||
1205 | | cmp RCd, RBd | ||
1206 | | cmovb RCd, RBd | ||
1207 | | not RCd | ||
1208 | |2: | ||
1209 | | mov CFUNC:RB, [BASE-16] | ||
1210 | | cleartp CFUNC:RB | ||
1211 | | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] | ||
1212 | | mov PC, [BASE-8] | ||
1213 | | settp STR:RC, LJ_TSTR | ||
1214 | | mov [BASE-16], STR:RC | ||
1215 | | jmp ->fff_res1 | ||
1216 | | | ||
1217 | |//-- Base library: getters and setters --------------------------------- | ||
1218 | | | ||
1219 | |.ffunc_1 getmetatable | ||
1220 | | mov TAB:RB, [BASE] | ||
1221 | | mov PC, [BASE-8] | ||
1222 | | checktab TAB:RB, >6 | ||
1223 | |1: // Field metatable must be at same offset for GCtab and GCudata! | ||
1224 | | mov TAB:RB, TAB:RB->metatable | ||
1225 | |2: | ||
1226 | | test TAB:RB, TAB:RB | ||
1227 | | mov aword [BASE-16], LJ_TNIL | ||
1228 | | jz ->fff_res1 | ||
1229 | | settp TAB:RC, TAB:RB, LJ_TTAB | ||
1230 | | mov [BASE-16], TAB:RC // Store metatable as default result. | ||
1231 | | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)] | ||
1232 | | mov RAd, TAB:RB->hmask | ||
1233 | | and RAd, STR:RC->sid | ||
1234 | | settp STR:RC, LJ_TSTR | ||
1235 | | imul RAd, #NODE | ||
1236 | | add NODE:RA, TAB:RB->node | ||
1237 | |3: // Rearranged logic, because we expect _not_ to find the key. | ||
1238 | | cmp NODE:RA->key, STR:RC | ||
1239 | | je >5 | ||
1240 | |4: | ||
1241 | | mov NODE:RA, NODE:RA->next | ||
1242 | | test NODE:RA, NODE:RA | ||
1243 | | jnz <3 | ||
1244 | | jmp ->fff_res1 // Not found, keep default result. | ||
1245 | |5: | ||
1246 | | mov RB, NODE:RA->val | ||
1247 | | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value. | ||
1248 | | mov [BASE-16], RB // Return value of mt.__metatable. | ||
1249 | | jmp ->fff_res1 | ||
1250 | | | ||
1251 | |6: | ||
1252 | | cmp ITYPEd, LJ_TUDATA; je <1 | ||
1253 | | cmp ITYPEd, LJ_TISNUM; ja >7 | ||
1254 | | mov ITYPEd, LJ_TISNUM | ||
1255 | |7: | ||
1256 | | not ITYPEd | ||
1257 | | mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])] | ||
1258 | | jmp <2 | ||
1259 | | | ||
1260 | |.ffunc_2 setmetatable | ||
1261 | | mov TAB:RB, [BASE] | ||
1262 | | mov TAB:TMPR, TAB:RB | ||
1263 | | checktab TAB:RB, ->fff_fallback | ||
1264 | | // Fast path: no mt for table yet and not clearing the mt. | ||
1265 | | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback | ||
1266 | | mov TAB:RA, [BASE+8] | ||
1267 | | checktab TAB:RA, ->fff_fallback | ||
1268 | | mov TAB:RB->metatable, TAB:RA | ||
1269 | | mov PC, [BASE-8] | ||
1270 | | mov [BASE-16], TAB:TMPR // Return original table. | ||
1271 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
1272 | | jz >1 | ||
1273 | | // Possible write barrier. Table is black, but skip iswhite(mt) check. | ||
1274 | | barrierback TAB:RB, RC | ||
1275 | |1: | ||
1276 | | jmp ->fff_res1 | ||
1277 | | | ||
1278 | |.ffunc_2 rawget | ||
1279 | |.if X64WIN | ||
1280 | | mov TAB:RA, [BASE] | ||
1281 | | checktab TAB:RA, ->fff_fallback | ||
1282 | | mov RB, BASE // Save BASE. | ||
1283 | | lea CARG3, [BASE+8] | ||
1284 | | mov CARG2, TAB:RA // Caveat: CARG2 == BASE. | ||
1285 | | mov CARG1, SAVE_L | ||
1286 | |.else | ||
1287 | | mov TAB:CARG2, [BASE] | ||
1288 | | checktab TAB:CARG2, ->fff_fallback | ||
1289 | | mov RB, BASE // Save BASE. | ||
1290 | | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE. | ||
1291 | | mov CARG1, SAVE_L | ||
1292 | |.endif | ||
1293 | | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | ||
1294 | | // cTValue * returned in eax (RD). | ||
1295 | | mov BASE, RB // Restore BASE. | ||
1296 | | // Copy table slot. | ||
1297 | | mov RB, [RD] | ||
1298 | | mov PC, [BASE-8] | ||
1299 | | mov [BASE-16], RB | ||
1300 | | jmp ->fff_res1 | ||
1301 | | | ||
1302 | |//-- Base library: conversions ------------------------------------------ | ||
1303 | | | ||
1304 | |.ffunc tonumber | ||
1305 | | // Only handles the number case inline (without a base argument). | ||
1306 | | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument. | ||
1307 | | mov RB, [BASE] | ||
1308 | | checknumber RB, ->fff_fallback | ||
1309 | | mov PC, [BASE-8] | ||
1310 | | mov [BASE-16], RB | ||
1311 | | jmp ->fff_res1 | ||
1312 | | | ||
1313 | |.ffunc_1 tostring | ||
1314 | | // Only handles the string or number case inline. | ||
1315 | | mov PC, [BASE-8] | ||
1316 | | mov STR:RB, [BASE] | ||
1317 | | checktp_nc STR:RB, LJ_TSTR, >3 | ||
1318 | | // A __tostring method in the string base metatable is ignored. | ||
1319 | |2: | ||
1320 | | mov [BASE-16], STR:RB | ||
1321 | | jmp ->fff_res1 | ||
1322 | |3: // Handle numbers inline, unless a number base metatable is present. | ||
1323 | | cmp ITYPEd, LJ_TISNUM; ja ->fff_fallback_1 | ||
1324 | | cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0 | ||
1325 | | jne ->fff_fallback | ||
1326 | | ffgccheck // Caveat: uses label 1. | ||
1327 | | mov L:RB, SAVE_L | ||
1328 | | mov L:RB->base, BASE // Add frame since C call can throw. | ||
1329 | | mov SAVE_PC, PC // Redundant (but a defined value). | ||
1330 | |.if not X64WIN | ||
1331 | | mov CARG2, BASE // Otherwise: CARG2 == BASE | ||
1332 | |.endif | ||
1333 | | mov L:CARG1, L:RB | ||
1334 | |.if DUALNUM | ||
1335 | | call extern lj_strfmt_number // (lua_State *L, cTValue *o) | ||
1336 | |.else | ||
1337 | | call extern lj_strfmt_num // (lua_State *L, lua_Number *np) | ||
1338 | |.endif | ||
1339 | | // GCstr returned in eax (RD). | ||
1340 | | mov BASE, L:RB->base | ||
1341 | | settp STR:RB, RD, LJ_TSTR | ||
1342 | | jmp <2 | ||
1343 | | | ||
1344 | |//-- Base library: iterators ------------------------------------------- | ||
1345 | | | ||
1346 | |.ffunc_1 next | ||
1347 | | je >2 // Missing 2nd arg? | ||
1348 | |1: | ||
1349 | | mov CARG1, [BASE] | ||
1350 | | mov PC, [BASE-8] | ||
1351 | | checktab CARG1, ->fff_fallback | ||
1352 | | mov RB, BASE // Save BASE. | ||
1353 | |.if X64WIN | ||
1354 | | lea CARG3, [BASE-16] | ||
1355 | | lea CARG2, [BASE+8] // Caveat: CARG2 == BASE. | ||
1356 | |.else | ||
1357 | | lea CARG2, [BASE+8] | ||
1358 | | lea CARG3, [BASE-16] // Caveat: CARG3 == BASE. | ||
1359 | |.endif | ||
1360 | | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) | ||
1361 | | // 1=found, 0=end, -1=error returned in eax (RD). | ||
1362 | | mov BASE, RB // Restore BASE. | ||
1363 | | test RDd, RDd; jg ->fff_res2 // Found key/value. | ||
1364 | | js ->fff_fallback_2 // Invalid key. | ||
1365 | | // End of traversal: return nil. | ||
1366 | | mov aword [BASE-16], LJ_TNIL | ||
1367 | | jmp ->fff_res1 | ||
1368 | |2: // Set missing 2nd arg to nil. | ||
1369 | | mov aword [BASE+8], LJ_TNIL | ||
1370 | | jmp <1 | ||
1371 | | | ||
1372 | |.ffunc_1 pairs | ||
1373 | | mov TAB:RB, [BASE] | ||
1374 | | mov TMPR, TAB:RB | ||
1375 | | checktab TAB:RB, ->fff_fallback | ||
1376 | #if LJ_52 | ||
1377 | | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback | ||
1378 | #endif | ||
1379 | | mov CFUNC:RD, [BASE-16] | ||
1380 | | cleartp CFUNC:RD | ||
1381 | | mov CFUNC:RD, CFUNC:RD->upvalue[0] | ||
1382 | | settp CFUNC:RD, LJ_TFUNC | ||
1383 | | mov PC, [BASE-8] | ||
1384 | | mov [BASE-16], CFUNC:RD | ||
1385 | | mov [BASE-8], TMPR | ||
1386 | | mov aword [BASE], LJ_TNIL | ||
1387 | | mov RDd, 1+3 | ||
1388 | | jmp ->fff_res | ||
1389 | | | ||
1390 | |.ffunc_2 ipairs_aux | ||
1391 | | mov TAB:RB, [BASE] | ||
1392 | | checktab TAB:RB, ->fff_fallback | ||
1393 | |.if DUALNUM | ||
1394 | | mov RA, [BASE+8] | ||
1395 | | checkint RA, ->fff_fallback | ||
1396 | |.else | ||
1397 | | checknumtp [BASE+8], ->fff_fallback | ||
1398 | | movsd xmm0, qword [BASE+8] | ||
1399 | |.endif | ||
1400 | | mov PC, [BASE-8] | ||
1401 | |.if DUALNUM | ||
1402 | | add RAd, 1 | ||
1403 | | setint ITYPE, RA | ||
1404 | | mov [BASE-16], ITYPE | ||
1405 | |.else | ||
1406 | | sseconst_1 xmm1, TMPR | ||
1407 | | addsd xmm0, xmm1 | ||
1408 | | cvttsd2si RAd, xmm0 | ||
1409 | | movsd qword [BASE-16], xmm0 | ||
1410 | |.endif | ||
1411 | | cmp RAd, TAB:RB->asize; jae >2 // Not in array part? | ||
1412 | | mov RD, TAB:RB->array | ||
1413 | | lea RD, [RD+RA*8] | ||
1414 | |1: | ||
1415 | | cmp aword [RD], LJ_TNIL; je ->fff_res0 | ||
1416 | | // Copy array slot. | ||
1417 | | mov RB, [RD] | ||
1418 | | mov [BASE-8], RB | ||
1419 | |->fff_res2: | ||
1420 | | mov RDd, 1+2 | ||
1421 | | jmp ->fff_res | ||
1422 | |2: // Check for empty hash part first. Otherwise call C function. | ||
1423 | | cmp dword TAB:RB->hmask, 0; je ->fff_res0 | ||
1424 | |.if X64WIN | ||
1425 | | mov TMPR, BASE | ||
1426 | | mov CARG2d, RAd | ||
1427 | | mov CARG1, TAB:RB | ||
1428 | | mov RB, TMPR | ||
1429 | |.else | ||
1430 | | mov CARG1, TAB:RB | ||
1431 | | mov RB, BASE // Save BASE. | ||
1432 | | mov CARG2d, RAd // Caveat: CARG2 == BASE | ||
1433 | |.endif | ||
1434 | | call extern lj_tab_getinth // (GCtab *t, int32_t key) | ||
1435 | | // cTValue * or NULL returned in eax (RD). | ||
1436 | | mov BASE, RB | ||
1437 | | test RD, RD | ||
1438 | | jnz <1 | ||
1439 | |->fff_res0: | ||
1440 | | mov RDd, 1+0 | ||
1441 | | jmp ->fff_res | ||
1442 | | | ||
1443 | |.ffunc_1 ipairs | ||
1444 | | mov TAB:RB, [BASE] | ||
1445 | | mov TMPR, TAB:RB | ||
1446 | | checktab TAB:RB, ->fff_fallback | ||
1447 | #if LJ_52 | ||
1448 | | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback | ||
1449 | #endif | ||
1450 | | mov CFUNC:RD, [BASE-16] | ||
1451 | | cleartp CFUNC:RD | ||
1452 | | mov CFUNC:RD, CFUNC:RD->upvalue[0] | ||
1453 | | settp CFUNC:RD, LJ_TFUNC | ||
1454 | | mov PC, [BASE-8] | ||
1455 | | mov [BASE-16], CFUNC:RD | ||
1456 | | mov [BASE-8], TMPR | ||
1457 | |.if DUALNUM | ||
1458 | | mov64 RD, ((uint64_t)LJ_TISNUM<<47) | ||
1459 | | mov [BASE], RD | ||
1460 | |.else | ||
1461 | | mov qword [BASE], 0 | ||
1462 | |.endif | ||
1463 | | mov RDd, 1+3 | ||
1464 | | jmp ->fff_res | ||
1465 | | | ||
1466 | |//-- Base library: catch errors ---------------------------------------- | ||
1467 | | | ||
1468 | |.ffunc_1 pcall | ||
1469 | | lea RA, [BASE+16] | ||
1470 | | sub NARGS:RDd, 1 | ||
1471 | | mov PCd, 16+FRAME_PCALL | ||
1472 | |1: | ||
1473 | | movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)] | ||
1474 | | shr RB, HOOK_ACTIVE_SHIFT | ||
1475 | | and RB, 1 | ||
1476 | | add PC, RB // Remember active hook before pcall. | ||
1477 | | // Note: this does a (harmless) copy of the function to the PC slot, too. | ||
1478 | | mov KBASE, RD | ||
1479 | |2: | ||
1480 | | mov RB, [RA+KBASE*8-24] | ||
1481 | | mov [RA+KBASE*8-16], RB | ||
1482 | | sub KBASE, 1 | ||
1483 | | ja <2 | ||
1484 | | jmp ->vm_call_dispatch | ||
1485 | | | ||
1486 | |.ffunc_2 xpcall | ||
1487 | | mov LFUNC:RA, [BASE+8] | ||
1488 | | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback | ||
1489 | | mov LFUNC:RB, [BASE] // Swap function and traceback. | ||
1490 | | mov [BASE], LFUNC:RA | ||
1491 | | mov [BASE+8], LFUNC:RB | ||
1492 | | lea RA, [BASE+24] | ||
1493 | | sub NARGS:RDd, 2 | ||
1494 | | mov PCd, 24+FRAME_PCALL | ||
1495 | | jmp <1 | ||
1496 | | | ||
1497 | |//-- Coroutine library -------------------------------------------------- | ||
1498 | | | ||
1499 | |.macro coroutine_resume_wrap, resume | ||
1500 | |.if resume | ||
1501 | |.ffunc_1 coroutine_resume | ||
1502 | | mov L:RB, [BASE] | ||
1503 | | cleartp L:RB | ||
1504 | |.else | ||
1505 | |.ffunc coroutine_wrap_aux | ||
1506 | | mov CFUNC:RB, [BASE-16] | ||
1507 | | cleartp CFUNC:RB | ||
1508 | | mov L:RB, CFUNC:RB->upvalue[0].gcr | ||
1509 | | cleartp L:RB | ||
1510 | |.endif | ||
1511 | | mov PC, [BASE-8] | ||
1512 | | mov SAVE_PC, PC | ||
1513 | | mov TMP1, L:RB | ||
1514 | |.if resume | ||
1515 | | checktptp [BASE], LJ_TTHREAD, ->fff_fallback | ||
1516 | |.endif | ||
1517 | | cmp aword L:RB->cframe, 0; jne ->fff_fallback | ||
1518 | | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback | ||
1519 | | mov RA, L:RB->top | ||
1520 | | je >1 // Status != LUA_YIELD (i.e. 0)? | ||
1521 | | cmp RA, L:RB->base // Check for presence of initial func. | ||
1522 | | je ->fff_fallback | ||
1523 | | mov PC, [RA-8] // Move initial function up. | ||
1524 | | mov [RA], PC | ||
1525 | | add RA, 8 | ||
1526 | |1: | ||
1527 | |.if resume | ||
1528 | | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread). | ||
1529 | |.else | ||
1530 | | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1). | ||
1531 | |.endif | ||
1532 | | cmp PC, L:RB->maxstack; ja ->fff_fallback | ||
1533 | | mov L:RB->top, PC | ||
1534 | | | ||
1535 | | mov L:RB, SAVE_L | ||
1536 | | mov L:RB->base, BASE | ||
1537 | |.if resume | ||
1538 | | add BASE, 8 // Keep resumed thread in stack for GC. | ||
1539 | |.endif | ||
1540 | | mov L:RB->top, BASE | ||
1541 | |.if resume | ||
1542 | | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move. | ||
1543 | |.else | ||
1544 | | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move. | ||
1545 | |.endif | ||
1546 | | sub RB, PC // Relative to PC. | ||
1547 | | | ||
1548 | | cmp PC, RA | ||
1549 | | je >3 | ||
1550 | |2: // Move args to coroutine. | ||
1551 | | mov RC, [PC+RB] | ||
1552 | | mov [PC-8], RC | ||
1553 | | sub PC, 8 | ||
1554 | | cmp PC, RA | ||
1555 | | jne <2 | ||
1556 | |3: | ||
1557 | | mov CARG2, RA | ||
1558 | | mov CARG1, TMP1 | ||
1559 | | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) | ||
1560 | | | ||
1561 | | mov L:RB, SAVE_L | ||
1562 | | mov L:PC, TMP1 | ||
1563 | | mov BASE, L:RB->base | ||
1564 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
1565 | | set_vmstate INTERP | ||
1566 | | | ||
1567 | | cmp eax, LUA_YIELD | ||
1568 | | ja >8 | ||
1569 | |4: | ||
1570 | | mov RA, L:PC->base | ||
1571 | | mov KBASE, L:PC->top | ||
1572 | | mov L:PC->top, RA // Clear coroutine stack. | ||
1573 | | mov PC, KBASE | ||
1574 | | sub PC, RA | ||
1575 | | je >6 // No results? | ||
1576 | | lea RD, [BASE+PC] | ||
1577 | | shr PCd, 3 | ||
1578 | | cmp RD, L:RB->maxstack | ||
1579 | | ja >9 // Need to grow stack? | ||
1580 | | | ||
1581 | | mov RB, BASE | ||
1582 | | sub RB, RA | ||
1583 | |5: // Move results from coroutine. | ||
1584 | | mov RD, [RA] | ||
1585 | | mov [RA+RB], RD | ||
1586 | | add RA, 8 | ||
1587 | | cmp RA, KBASE | ||
1588 | | jne <5 | ||
1589 | |6: | ||
1590 | |.if resume | ||
1591 | | lea RDd, [PCd+2] // nresults+1 = 1 + true + results. | ||
1592 | | mov_true ITYPE // Prepend true to results. | ||
1593 | | mov [BASE-8], ITYPE | ||
1594 | |.else | ||
1595 | | lea RDd, [PCd+1] // nresults+1 = 1 + results. | ||
1596 | |.endif | ||
1597 | |7: | ||
1598 | | mov PC, SAVE_PC | ||
1599 | | mov MULTRES, RDd | ||
1600 | |.if resume | ||
1601 | | mov RA, -8 | ||
1602 | |.else | ||
1603 | | xor RAd, RAd | ||
1604 | |.endif | ||
1605 | | test PCd, FRAME_TYPE | ||
1606 | | jz ->BC_RET_Z | ||
1607 | | jmp ->vm_return | ||
1608 | | | ||
1609 | |8: // Coroutine returned with error (at co->top-1). | ||
1610 | |.if resume | ||
1611 | | mov_false ITYPE // Prepend false to results. | ||
1612 | | mov [BASE-8], ITYPE | ||
1613 | | mov RA, L:PC->top | ||
1614 | | sub RA, 8 | ||
1615 | | mov L:PC->top, RA // Clear error from coroutine stack. | ||
1616 | | // Copy error message. | ||
1617 | | mov RD, [RA] | ||
1618 | | mov [BASE], RD | ||
1619 | | mov RDd, 1+2 // nresults+1 = 1 + false + error. | ||
1620 | | jmp <7 | ||
1621 | |.else | ||
1622 | | mov CARG2, L:PC | ||
1623 | | mov CARG1, L:RB | ||
1624 | | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) | ||
1625 | | // Error function does not return. | ||
1626 | |.endif | ||
1627 | | | ||
1628 | |9: // Handle stack expansion on return from yield. | ||
1629 | | mov L:RA, TMP1 | ||
1630 | | mov L:RA->top, KBASE // Undo coroutine stack clearing. | ||
1631 | | mov CARG2, PC | ||
1632 | | mov CARG1, L:RB | ||
1633 | | call extern lj_state_growstack // (lua_State *L, int n) | ||
1634 | | mov L:PC, TMP1 | ||
1635 | | mov BASE, L:RB->base | ||
1636 | | jmp <4 // Retry the stack move. | ||
1637 | |.endmacro | ||
1638 | | | ||
1639 | | coroutine_resume_wrap 1 // coroutine.resume | ||
1640 | | coroutine_resume_wrap 0 // coroutine.wrap | ||
1641 | | | ||
1642 | |.ffunc coroutine_yield | ||
1643 | | mov L:RB, SAVE_L | ||
1644 | | test aword L:RB->cframe, CFRAME_RESUME | ||
1645 | | jz ->fff_fallback | ||
1646 | | mov L:RB->base, BASE | ||
1647 | | lea RD, [BASE+NARGS:RD*8-8] | ||
1648 | | mov L:RB->top, RD | ||
1649 | | xor RDd, RDd | ||
1650 | | mov aword L:RB->cframe, RD | ||
1651 | | mov al, LUA_YIELD | ||
1652 | | mov byte L:RB->status, al | ||
1653 | | jmp ->vm_leave_unw | ||
1654 | | | ||
1655 | |//-- Math library ------------------------------------------------------- | ||
1656 | | | ||
1657 | | .ffunc_1 math_abs | ||
1658 | | mov RB, [BASE] | ||
1659 | |.if DUALNUM | ||
1660 | | checkint RB, >3 | ||
1661 | | cmp RBd, 0; jns ->fff_resi | ||
1662 | | neg RBd; js >2 | ||
1663 | |->fff_resbit: | ||
1664 | |->fff_resi: | ||
1665 | | setint RB | ||
1666 | |->fff_resRB: | ||
1667 | | mov PC, [BASE-8] | ||
1668 | | mov [BASE-16], RB | ||
1669 | | jmp ->fff_res1 | ||
1670 | |2: | ||
1671 | | mov64 RB, U64x(41e00000,00000000) // 2^31. | ||
1672 | | jmp ->fff_resRB | ||
1673 | |3: | ||
1674 | | ja ->fff_fallback | ||
1675 | |.else | ||
1676 | | checknum RB, ->fff_fallback | ||
1677 | |.endif | ||
1678 | | shl RB, 1 | ||
1679 | | shr RB, 1 | ||
1680 | | mov PC, [BASE-8] | ||
1681 | | mov [BASE-16], RB | ||
1682 | | jmp ->fff_res1 | ||
1683 | | | ||
1684 | |.ffunc_n math_sqrt, sqrtsd | ||
1685 | |->fff_resxmm0: | ||
1686 | | mov PC, [BASE-8] | ||
1687 | | movsd qword [BASE-16], xmm0 | ||
1688 | | // fallthrough | ||
1689 | | | ||
1690 | |->fff_res1: | ||
1691 | | mov RDd, 1+1 | ||
1692 | |->fff_res: | ||
1693 | | mov MULTRES, RDd | ||
1694 | |->fff_res_: | ||
1695 | | test PCd, FRAME_TYPE | ||
1696 | | jnz >7 | ||
1697 | |5: | ||
1698 | | cmp PC_RB, RDL // More results expected? | ||
1699 | | ja >6 | ||
1700 | | // Adjust BASE. KBASE is assumed to be set for the calling frame. | ||
1701 | | movzx RAd, PC_RA | ||
1702 | | neg RA | ||
1703 | | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8 | ||
1704 | | ins_next | ||
1705 | | | ||
1706 | |6: // Fill up results with nil. | ||
1707 | | mov aword [BASE+RD*8-24], LJ_TNIL | ||
1708 | | add RD, 1 | ||
1709 | | jmp <5 | ||
1710 | | | ||
1711 | |7: // Non-standard return case. | ||
1712 | | mov RA, -16 // Results start at BASE+RA = BASE-16. | ||
1713 | | jmp ->vm_return | ||
1714 | | | ||
1715 | |.macro math_round, func | ||
1716 | | .ffunc math_ .. func | ||
1717 | |.if DUALNUM | ||
1718 | | mov RB, [BASE] | ||
1719 | | checknumx RB, ->fff_resRB, je | ||
1720 | | ja ->fff_fallback | ||
1721 | |.else | ||
1722 | | checknumtp [BASE], ->fff_fallback | ||
1723 | |.endif | ||
1724 | | movsd xmm0, qword [BASE] | ||
1725 | | call ->vm_ .. func .. _sse | ||
1726 | |.if DUALNUM | ||
1727 | | cvttsd2si RBd, xmm0 | ||
1728 | | cmp RBd, 0x80000000 | ||
1729 | | jne ->fff_resi | ||
1730 | | cvtsi2sd xmm1, RBd | ||
1731 | | ucomisd xmm0, xmm1 | ||
1732 | | jp ->fff_resxmm0 | ||
1733 | | je ->fff_resi | ||
1734 | |.endif | ||
1735 | | jmp ->fff_resxmm0 | ||
1736 | |.endmacro | ||
1737 | | | ||
1738 | | math_round floor | ||
1739 | | math_round ceil | ||
1740 | | | ||
1741 | |.ffunc math_log | ||
1742 | | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument. | ||
1743 | | checknumtp [BASE], ->fff_fallback | ||
1744 | | movsd xmm0, qword [BASE] | ||
1745 | | mov RB, BASE | ||
1746 | | call extern log | ||
1747 | | mov BASE, RB | ||
1748 | | jmp ->fff_resxmm0 | ||
1749 | | | ||
1750 | |.macro math_extern, func | ||
1751 | | .ffunc_n math_ .. func | ||
1752 | | mov RB, BASE | ||
1753 | | call extern func | ||
1754 | | mov BASE, RB | ||
1755 | | jmp ->fff_resxmm0 | ||
1756 | |.endmacro | ||
1757 | | | ||
1758 | |.macro math_extern2, func | ||
1759 | | .ffunc_nn math_ .. func | ||
1760 | | mov RB, BASE | ||
1761 | | call extern func | ||
1762 | | mov BASE, RB | ||
1763 | | jmp ->fff_resxmm0 | ||
1764 | |.endmacro | ||
1765 | | | ||
1766 | | math_extern log10 | ||
1767 | | math_extern exp | ||
1768 | | math_extern sin | ||
1769 | | math_extern cos | ||
1770 | | math_extern tan | ||
1771 | | math_extern asin | ||
1772 | | math_extern acos | ||
1773 | | math_extern atan | ||
1774 | | math_extern sinh | ||
1775 | | math_extern cosh | ||
1776 | | math_extern tanh | ||
1777 | | math_extern2 pow | ||
1778 | | math_extern2 atan2 | ||
1779 | | math_extern2 fmod | ||
1780 | | | ||
1781 | |.ffunc_2 math_ldexp | ||
1782 | | checknumtp [BASE], ->fff_fallback | ||
1783 | | checknumtp [BASE+8], ->fff_fallback | ||
1784 | | fld qword [BASE+8] | ||
1785 | | fld qword [BASE] | ||
1786 | | fscale | ||
1787 | | fpop1 | ||
1788 | | mov PC, [BASE-8] | ||
1789 | | fstp qword [BASE-16] | ||
1790 | | jmp ->fff_res1 | ||
1791 | | | ||
1792 | |.ffunc_n math_frexp | ||
1793 | | mov RB, BASE | ||
1794 | |.if X64WIN | ||
1795 | | lea CARG2, TMP1 // Caveat: CARG2 == BASE | ||
1796 | |.else | ||
1797 | | lea CARG1, TMP1 | ||
1798 | |.endif | ||
1799 | | call extern frexp | ||
1800 | | mov BASE, RB | ||
1801 | | mov RBd, TMP1d | ||
1802 | | mov PC, [BASE-8] | ||
1803 | | movsd qword [BASE-16], xmm0 | ||
1804 | |.if DUALNUM | ||
1805 | | setint RB | ||
1806 | | mov [BASE-8], RB | ||
1807 | |.else | ||
1808 | | cvtsi2sd xmm1, RBd | ||
1809 | | movsd qword [BASE-8], xmm1 | ||
1810 | |.endif | ||
1811 | | mov RDd, 1+2 | ||
1812 | | jmp ->fff_res | ||
1813 | | | ||
1814 | |.ffunc_n math_modf | ||
1815 | | mov RB, BASE | ||
1816 | |.if X64WIN | ||
1817 | | lea CARG2, [BASE-16] // Caveat: CARG2 == BASE | ||
1818 | |.else | ||
1819 | | lea CARG1, [BASE-16] | ||
1820 | |.endif | ||
1821 | | call extern modf | ||
1822 | | mov BASE, RB | ||
1823 | | mov PC, [BASE-8] | ||
1824 | | movsd qword [BASE-8], xmm0 | ||
1825 | | mov RDd, 1+2 | ||
1826 | | jmp ->fff_res | ||
1827 | | | ||
1828 | |.macro math_minmax, name, cmovop, sseop | ||
1829 | | .ffunc_1 name | ||
1830 | | mov RAd, 2 | ||
1831 | |.if DUALNUM | ||
1832 | | mov RB, [BASE] | ||
1833 | | checkint RB, >4 | ||
1834 | |1: // Handle integers. | ||
1835 | | cmp RAd, RDd; jae ->fff_resRB | ||
1836 | | mov TMPR, [BASE+RA*8-8] | ||
1837 | | checkint TMPR, >3 | ||
1838 | | cmp RBd, TMPRd | ||
1839 | | cmovop RB, TMPR | ||
1840 | | add RAd, 1 | ||
1841 | | jmp <1 | ||
1842 | |3: | ||
1843 | | ja ->fff_fallback | ||
1844 | | // Convert intermediate result to number and continue below. | ||
1845 | | cvtsi2sd xmm0, RBd | ||
1846 | | jmp >6 | ||
1847 | |4: | ||
1848 | | ja ->fff_fallback | ||
1849 | |.else | ||
1850 | | checknumtp [BASE], ->fff_fallback | ||
1851 | |.endif | ||
1852 | | | ||
1853 | | movsd xmm0, qword [BASE] | ||
1854 | |5: // Handle numbers or integers. | ||
1855 | | cmp RAd, RDd; jae ->fff_resxmm0 | ||
1856 | |.if DUALNUM | ||
1857 | | mov RB, [BASE+RA*8-8] | ||
1858 | | checknumx RB, >6, jb | ||
1859 | | ja ->fff_fallback | ||
1860 | | cvtsi2sd xmm1, RBd | ||
1861 | | jmp >7 | ||
1862 | |.else | ||
1863 | | checknumtp [BASE+RA*8-8], ->fff_fallback | ||
1864 | |.endif | ||
1865 | |6: | ||
1866 | | movsd xmm1, qword [BASE+RA*8-8] | ||
1867 | |7: | ||
1868 | | sseop xmm0, xmm1 | ||
1869 | | add RAd, 1 | ||
1870 | | jmp <5 | ||
1871 | |.endmacro | ||
1872 | | | ||
1873 | | math_minmax math_min, cmovg, minsd | ||
1874 | | math_minmax math_max, cmovl, maxsd | ||
1875 | | | ||
1876 | |//-- String library ----------------------------------------------------- | ||
1877 | | | ||
1878 | |.ffunc string_byte // Only handle the 1-arg case here. | ||
1879 | | cmp NARGS:RDd, 1+1; jne ->fff_fallback | ||
1880 | | mov STR:RB, [BASE] | ||
1881 | | checkstr STR:RB, ->fff_fallback | ||
1882 | | mov PC, [BASE-8] | ||
1883 | | cmp dword STR:RB->len, 1 | ||
1884 | | jb ->fff_res0 // Return no results for empty string. | ||
1885 | | movzx RBd, byte STR:RB[1] | ||
1886 | |.if DUALNUM | ||
1887 | | jmp ->fff_resi | ||
1888 | |.else | ||
1889 | | cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0 | ||
1890 | |.endif | ||
1891 | | | ||
1892 | |.ffunc string_char // Only handle the 1-arg case here. | ||
1893 | | ffgccheck | ||
1894 | | cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg. | ||
1895 | |.if DUALNUM | ||
1896 | | mov RB, [BASE] | ||
1897 | | checkint RB, ->fff_fallback | ||
1898 | |.else | ||
1899 | | checknumtp [BASE], ->fff_fallback | ||
1900 | | cvttsd2si RBd, qword [BASE] | ||
1901 | |.endif | ||
1902 | | cmp RBd, 255; ja ->fff_fallback | ||
1903 | | mov TMP1d, RBd | ||
1904 | | mov TMPRd, 1 | ||
1905 | | lea RD, TMP1 // Points to stack. Little-endian. | ||
1906 | |->fff_newstr: | ||
1907 | | mov L:RB, SAVE_L | ||
1908 | | mov L:RB->base, BASE | ||
1909 | | mov CARG3d, TMPRd // Zero-extended to size_t. | ||
1910 | | mov CARG2, RD | ||
1911 | | mov CARG1, L:RB | ||
1912 | | mov SAVE_PC, PC | ||
1913 | | call extern lj_str_new // (lua_State *L, char *str, size_t l) | ||
1914 | |->fff_resstr: | ||
1915 | | // GCstr * returned in eax (RD). | ||
1916 | | mov BASE, L:RB->base | ||
1917 | | mov PC, [BASE-8] | ||
1918 | | settp STR:RD, LJ_TSTR | ||
1919 | | mov [BASE-16], STR:RD | ||
1920 | | jmp ->fff_res1 | ||
1921 | | | ||
1922 | |.ffunc string_sub | ||
1923 | | ffgccheck | ||
1924 | | mov TMPRd, -1 | ||
1925 | | cmp NARGS:RDd, 1+2; jb ->fff_fallback | ||
1926 | | jna >1 | ||
1927 | |.if DUALNUM | ||
1928 | | mov TMPR, [BASE+16] | ||
1929 | | checkint TMPR, ->fff_fallback | ||
1930 | |.else | ||
1931 | | checknumtp [BASE+16], ->fff_fallback | ||
1932 | | cvttsd2si TMPRd, qword [BASE+16] | ||
1933 | |.endif | ||
1934 | |1: | ||
1935 | | mov STR:RB, [BASE] | ||
1936 | | checkstr STR:RB, ->fff_fallback | ||
1937 | |.if DUALNUM | ||
1938 | | mov ITYPE, [BASE+8] | ||
1939 | | mov RAd, ITYPEd // Must clear hiword for lea below. | ||
1940 | | sar ITYPE, 47 | ||
1941 | | cmp ITYPEd, LJ_TISNUM | ||
1942 | | jne ->fff_fallback | ||
1943 | |.else | ||
1944 | | checknumtp [BASE+8], ->fff_fallback | ||
1945 | | cvttsd2si RAd, qword [BASE+8] | ||
1946 | |.endif | ||
1947 | | mov RCd, STR:RB->len | ||
1948 | | cmp RCd, TMPRd // len < end? (unsigned compare) | ||
1949 | | jb >5 | ||
1950 | |2: | ||
1951 | | test RAd, RAd // start <= 0? | ||
1952 | | jle >7 | ||
1953 | |3: | ||
1954 | | sub TMPRd, RAd // start > end? | ||
1955 | | jl ->fff_emptystr | ||
1956 | | lea RD, [STR:RB+RAd+#STR-1] | ||
1957 | | add TMPRd, 1 | ||
1958 | |4: | ||
1959 | | jmp ->fff_newstr | ||
1960 | | | ||
1961 | |5: // Negative end or overflow. | ||
1962 | | jl >6 | ||
1963 | | lea TMPRd, [TMPRd+RCd+1] // end = end+(len+1) | ||
1964 | | jmp <2 | ||
1965 | |6: // Overflow. | ||
1966 | | mov TMPRd, RCd // end = len | ||
1967 | | jmp <2 | ||
1968 | | | ||
1969 | |7: // Negative start or underflow. | ||
1970 | | je >8 | ||
1971 | | add RAd, RCd // start = start+(len+1) | ||
1972 | | add RAd, 1 | ||
1973 | | jg <3 // start > 0? | ||
1974 | |8: // Underflow. | ||
1975 | | mov RAd, 1 // start = 1 | ||
1976 | | jmp <3 | ||
1977 | | | ||
1978 | |->fff_emptystr: // Range underflow. | ||
1979 | | xor TMPRd, TMPRd // Zero length. Any ptr in RD is ok. | ||
1980 | | jmp <4 | ||
1981 | | | ||
1982 | |.macro ffstring_op, name | ||
1983 | | .ffunc_1 string_ .. name | ||
1984 | | ffgccheck | ||
1985 | |.if X64WIN | ||
1986 | | mov STR:TMPR, [BASE] | ||
1987 | | checkstr STR:TMPR, ->fff_fallback | ||
1988 | |.else | ||
1989 | | mov STR:CARG2, [BASE] | ||
1990 | | checkstr STR:CARG2, ->fff_fallback | ||
1991 | |.endif | ||
1992 | | mov L:RB, SAVE_L | ||
1993 | | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] | ||
1994 | | mov L:RB->base, BASE | ||
1995 | |.if X64WIN | ||
1996 | | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE | ||
1997 | |.endif | ||
1998 | | mov RC, SBUF:CARG1->b | ||
1999 | | mov SBUF:CARG1->L, L:RB | ||
2000 | | mov SBUF:CARG1->w, RC | ||
2001 | | mov SAVE_PC, PC | ||
2002 | | call extern lj_buf_putstr_ .. name | ||
2003 | | mov CARG1, rax | ||
2004 | | call extern lj_buf_tostr | ||
2005 | | jmp ->fff_resstr | ||
2006 | |.endmacro | ||
2007 | | | ||
2008 | |ffstring_op reverse | ||
2009 | |ffstring_op lower | ||
2010 | |ffstring_op upper | ||
2011 | | | ||
2012 | |//-- Bit library -------------------------------------------------------- | ||
2013 | | | ||
2014 | |.macro .ffunc_bit, name, kind, fdef | ||
2015 | | fdef name | ||
2016 | |.if kind == 2 | ||
2017 | | sseconst_tobit xmm1, RB | ||
2018 | |.endif | ||
2019 | |.if DUALNUM | ||
2020 | | mov RB, [BASE] | ||
2021 | | checkint RB, >1 | ||
2022 | |.if kind > 0 | ||
2023 | | jmp >2 | ||
2024 | |.else | ||
2025 | | jmp ->fff_resbit | ||
2026 | |.endif | ||
2027 | |1: | ||
2028 | | ja ->fff_fallback | ||
2029 | | movd xmm0, RB | ||
2030 | |.else | ||
2031 | | checknumtp [BASE], ->fff_fallback | ||
2032 | | movsd xmm0, qword [BASE] | ||
2033 | |.endif | ||
2034 | |.if kind < 2 | ||
2035 | | sseconst_tobit xmm1, RB | ||
2036 | |.endif | ||
2037 | | addsd xmm0, xmm1 | ||
2038 | | movd RBd, xmm0 | ||
2039 | |2: | ||
2040 | |.endmacro | ||
2041 | | | ||
2042 | |.macro .ffunc_bit, name, kind | ||
2043 | | .ffunc_bit name, kind, .ffunc_1 | ||
2044 | |.endmacro | ||
2045 | | | ||
2046 | |.ffunc_bit bit_tobit, 0 | ||
2047 | | jmp ->fff_resbit | ||
2048 | | | ||
2049 | |.macro .ffunc_bit_op, name, ins | ||
2050 | | .ffunc_bit name, 2 | ||
2051 | | mov TMPRd, NARGS:RDd // Save for fallback. | ||
2052 | | lea RD, [BASE+NARGS:RD*8-16] | ||
2053 | |1: | ||
2054 | | cmp RD, BASE | ||
2055 | | jbe ->fff_resbit | ||
2056 | |.if DUALNUM | ||
2057 | | mov RA, [RD] | ||
2058 | | checkint RA, >2 | ||
2059 | | ins RBd, RAd | ||
2060 | | sub RD, 8 | ||
2061 | | jmp <1 | ||
2062 | |2: | ||
2063 | | ja ->fff_fallback_bit_op | ||
2064 | | movd xmm0, RA | ||
2065 | |.else | ||
2066 | | checknumtp [RD], ->fff_fallback_bit_op | ||
2067 | | movsd xmm0, qword [RD] | ||
2068 | |.endif | ||
2069 | | addsd xmm0, xmm1 | ||
2070 | | movd RAd, xmm0 | ||
2071 | | ins RBd, RAd | ||
2072 | | sub RD, 8 | ||
2073 | | jmp <1 | ||
2074 | |.endmacro | ||
2075 | | | ||
2076 | |.ffunc_bit_op bit_band, and | ||
2077 | |.ffunc_bit_op bit_bor, or | ||
2078 | |.ffunc_bit_op bit_bxor, xor | ||
2079 | | | ||
2080 | |.ffunc_bit bit_bswap, 1 | ||
2081 | | bswap RBd | ||
2082 | | jmp ->fff_resbit | ||
2083 | | | ||
2084 | |.ffunc_bit bit_bnot, 1 | ||
2085 | | not RBd | ||
2086 | |.if DUALNUM | ||
2087 | | jmp ->fff_resbit | ||
2088 | |.else | ||
2089 | |->fff_resbit: | ||
2090 | | cvtsi2sd xmm0, RBd | ||
2091 | | jmp ->fff_resxmm0 | ||
2092 | |.endif | ||
2093 | | | ||
2094 | |->fff_fallback_bit_op: | ||
2095 | | mov NARGS:RDd, TMPRd // Restore for fallback | ||
2096 | | jmp ->fff_fallback | ||
2097 | | | ||
2098 | |.macro .ffunc_bit_sh, name, ins | ||
2099 | |.if DUALNUM | ||
2100 | | .ffunc_bit name, 1, .ffunc_2 | ||
2101 | | // Note: no inline conversion from number for 2nd argument! | ||
2102 | | mov RA, [BASE+8] | ||
2103 | | checkint RA, ->fff_fallback | ||
2104 | |.else | ||
2105 | | .ffunc_nn name | ||
2106 | | sseconst_tobit xmm2, RB | ||
2107 | | addsd xmm0, xmm2 | ||
2108 | | addsd xmm1, xmm2 | ||
2109 | | movd RBd, xmm0 | ||
2110 | | movd RAd, xmm1 | ||
2111 | |.endif | ||
2112 | | ins RBd, cl // Assumes RA is ecx. | ||
2113 | | jmp ->fff_resbit | ||
2114 | |.endmacro | ||
2115 | | | ||
2116 | |.ffunc_bit_sh bit_lshift, shl | ||
2117 | |.ffunc_bit_sh bit_rshift, shr | ||
2118 | |.ffunc_bit_sh bit_arshift, sar | ||
2119 | |.ffunc_bit_sh bit_rol, rol | ||
2120 | |.ffunc_bit_sh bit_ror, ror | ||
2121 | | | ||
2122 | |//----------------------------------------------------------------------- | ||
2123 | | | ||
2124 | |->fff_fallback_2: | ||
2125 | | mov NARGS:RDd, 1+2 // Other args are ignored, anyway. | ||
2126 | | jmp ->fff_fallback | ||
2127 | |->fff_fallback_1: | ||
2128 | | mov NARGS:RDd, 1+1 // Other args are ignored, anyway. | ||
2129 | |->fff_fallback: // Call fast function fallback handler. | ||
2130 | | // BASE = new base, RD = nargs+1 | ||
2131 | | mov L:RB, SAVE_L | ||
2132 | | mov PC, [BASE-8] // Fallback may overwrite PC. | ||
2133 | | mov SAVE_PC, PC // Redundant (but a defined value). | ||
2134 | | mov L:RB->base, BASE | ||
2135 | | lea RD, [BASE+NARGS:RD*8-8] | ||
2136 | | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler. | ||
2137 | | mov L:RB->top, RD | ||
2138 | | mov CFUNC:RD, [BASE-16] | ||
2139 | | cleartp CFUNC:RD | ||
2140 | | cmp RA, L:RB->maxstack | ||
2141 | | ja >5 // Need to grow stack. | ||
2142 | | mov CARG1, L:RB | ||
2143 | | call aword CFUNC:RD->f // (lua_State *L) | ||
2144 | | mov BASE, L:RB->base | ||
2145 | | // Either throws an error, or recovers and returns -1, 0 or nresults+1. | ||
2146 | | test RDd, RDd; jg ->fff_res // Returned nresults+1? | ||
2147 | |1: | ||
2148 | | mov RA, L:RB->top | ||
2149 | | sub RA, BASE | ||
2150 | | shr RAd, 3 | ||
2151 | | test RDd, RDd | ||
2152 | | lea NARGS:RDd, [RAd+1] | ||
2153 | | mov LFUNC:RB, [BASE-16] | ||
2154 | | jne ->vm_call_tail // Returned -1? | ||
2155 | | cleartp LFUNC:RB | ||
2156 | | ins_callt // Returned 0: retry fast path. | ||
2157 | | | ||
2158 | |// Reconstruct previous base for vmeta_call during tailcall. | ||
2159 | |->vm_call_tail: | ||
2160 | | mov RA, BASE | ||
2161 | | test PCd, FRAME_TYPE | ||
2162 | | jnz >3 | ||
2163 | | movzx RBd, PC_RA | ||
2164 | | neg RB | ||
2165 | | lea BASE, [BASE+RB*8-16] // base = base - (RB+2)*8 | ||
2166 | | jmp ->vm_call_dispatch // Resolve again for tailcall. | ||
2167 | |3: | ||
2168 | | mov RB, PC | ||
2169 | | and RB, -8 | ||
2170 | | sub BASE, RB | ||
2171 | | jmp ->vm_call_dispatch // Resolve again for tailcall. | ||
2172 | | | ||
2173 | |5: // Grow stack for fallback handler. | ||
2174 | | mov CARG2d, LUA_MINSTACK | ||
2175 | | mov CARG1, L:RB | ||
2176 | | call extern lj_state_growstack // (lua_State *L, int n) | ||
2177 | | mov BASE, L:RB->base | ||
2178 | | xor RDd, RDd // Simulate a return 0. | ||
2179 | | jmp <1 // Dumb retry (goes through ff first). | ||
2180 | | | ||
2181 | |->fff_gcstep: // Call GC step function. | ||
2182 | | // BASE = new base, RD = nargs+1 | ||
2183 | | pop RB // Must keep stack at same level. | ||
2184 | | mov TMP1, RB // Save return address | ||
2185 | | mov L:RB, SAVE_L | ||
2186 | | mov SAVE_PC, PC // Redundant (but a defined value). | ||
2187 | | mov L:RB->base, BASE | ||
2188 | | lea RD, [BASE+NARGS:RD*8-8] | ||
2189 | | mov CARG1, L:RB | ||
2190 | | mov L:RB->top, RD | ||
2191 | | call extern lj_gc_step // (lua_State *L) | ||
2192 | | mov BASE, L:RB->base | ||
2193 | | mov RD, L:RB->top | ||
2194 | | sub RD, BASE | ||
2195 | | shr RDd, 3 | ||
2196 | | add NARGS:RDd, 1 | ||
2197 | | mov RB, TMP1 | ||
2198 | | push RB // Restore return address. | ||
2199 | | ret | ||
2200 | | | ||
2201 | |//----------------------------------------------------------------------- | ||
2202 | |//-- Special dispatch targets ------------------------------------------- | ||
2203 | |//----------------------------------------------------------------------- | ||
2204 | | | ||
2205 | |->vm_record: // Dispatch target for recording phase. | ||
2206 | |.if JIT | ||
2207 | | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] | ||
2208 | | test RDL, HOOK_VMEVENT // No recording while in vmevent. | ||
2209 | | jnz >5 | ||
2210 | | // Decrement the hookcount for consistency, but always do the call. | ||
2211 | | test RDL, HOOK_ACTIVE | ||
2212 | | jnz >1 | ||
2213 | | test RDL, LUA_MASKLINE|LUA_MASKCOUNT | ||
2214 | | jz >1 | ||
2215 | | dec dword [DISPATCH+DISPATCH_GL(hookcount)] | ||
2216 | | jmp >1 | ||
2217 | |.endif | ||
2218 | | | ||
2219 | |->vm_rethook: // Dispatch target for return hooks. | ||
2220 | | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] | ||
2221 | | test RDL, HOOK_ACTIVE // Hook already active? | ||
2222 | | jnz >5 | ||
2223 | | jmp >1 | ||
2224 | | | ||
2225 | |->vm_inshook: // Dispatch target for instr/line hooks. | ||
2226 | | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] | ||
2227 | | test RDL, HOOK_ACTIVE // Hook already active? | ||
2228 | | jnz >5 | ||
2229 | | | ||
2230 | | test RDL, LUA_MASKLINE|LUA_MASKCOUNT | ||
2231 | | jz >5 | ||
2232 | | dec dword [DISPATCH+DISPATCH_GL(hookcount)] | ||
2233 | | jz >1 | ||
2234 | | test RDL, LUA_MASKLINE | ||
2235 | | jz >5 | ||
2236 | |1: | ||
2237 | | mov L:RB, SAVE_L | ||
2238 | | mov L:RB->base, BASE | ||
2239 | | mov CARG2, PC // Caveat: CARG2 == BASE | ||
2240 | | mov CARG1, L:RB | ||
2241 | | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. | ||
2242 | | call extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) | ||
2243 | |3: | ||
2244 | | mov BASE, L:RB->base | ||
2245 | |4: | ||
2246 | | movzx RAd, PC_RA | ||
2247 | |5: | ||
2248 | | movzx OP, PC_OP | ||
2249 | | movzx RDd, PC_RD | ||
2250 | | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins. | ||
2251 | | | ||
2252 | |->cont_hook: // Continue from hook yield. | ||
2253 | | add PC, 4 | ||
2254 | | mov RA, [RB-40] | ||
2255 | | mov MULTRES, RAd // Restore MULTRES for *M ins. | ||
2256 | | jmp <4 | ||
2257 | | | ||
2258 | |->vm_hotloop: // Hot loop counter underflow. | ||
2259 | |.if JIT | ||
2260 | | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L). | ||
2261 | | cleartp LFUNC:RB | ||
2262 | | mov RB, LFUNC:RB->pc | ||
2263 | | movzx RDd, byte [RB+PC2PROTO(framesize)] | ||
2264 | | lea RD, [BASE+RD*8] | ||
2265 | | mov L:RB, SAVE_L | ||
2266 | | mov L:RB->base, BASE | ||
2267 | | mov L:RB->top, RD | ||
2268 | | mov CARG2, PC | ||
2269 | | lea CARG1, [DISPATCH+GG_DISP2J] | ||
2270 | | mov aword [DISPATCH+DISPATCH_J(L)], L:RB | ||
2271 | | mov SAVE_PC, PC | ||
2272 | | call extern lj_trace_hot // (jit_State *J, const BCIns *pc) | ||
2273 | | jmp <3 | ||
2274 | |.endif | ||
2275 | | | ||
2276 | |->vm_callhook: // Dispatch target for call hooks. | ||
2277 | | mov SAVE_PC, PC | ||
2278 | |.if JIT | ||
2279 | | jmp >1 | ||
2280 | |.endif | ||
2281 | | | ||
2282 | |->vm_hotcall: // Hot call counter underflow. | ||
2283 | |.if JIT | ||
2284 | | mov SAVE_PC, PC | ||
2285 | | or PC, 1 // Marker for hot call. | ||
2286 | |1: | ||
2287 | |.endif | ||
2288 | | lea RD, [BASE+NARGS:RD*8-8] | ||
2289 | | mov L:RB, SAVE_L | ||
2290 | | mov L:RB->base, BASE | ||
2291 | | mov L:RB->top, RD | ||
2292 | | mov CARG2, PC | ||
2293 | | mov CARG1, L:RB | ||
2294 | | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc) | ||
2295 | | // ASMFunction returned in eax/rax (RD). | ||
2296 | | mov SAVE_PC, 0 // Invalidate for subsequent line hook. | ||
2297 | |.if JIT | ||
2298 | | and PC, -2 | ||
2299 | |.endif | ||
2300 | | mov BASE, L:RB->base | ||
2301 | | mov RA, RD | ||
2302 | | mov RD, L:RB->top | ||
2303 | | sub RD, BASE | ||
2304 | | mov RB, RA | ||
2305 | | movzx RAd, PC_RA | ||
2306 | | shr RDd, 3 | ||
2307 | | add NARGS:RDd, 1 | ||
2308 | | jmp RB | ||
2309 | | | ||
2310 | |->cont_stitch: // Trace stitching. | ||
2311 | |.if JIT | ||
2312 | | // BASE = base, RC = result, RB = mbase | ||
2313 | | mov TRACE:ITYPE, [RB-40] // Save previous trace. | ||
2314 | | cleartp TRACE:ITYPE | ||
2315 | | mov TMPRd, MULTRES | ||
2316 | | movzx RAd, PC_RA | ||
2317 | | lea RA, [BASE+RA*8] // Call base. | ||
2318 | | sub TMPRd, 1 | ||
2319 | | jz >2 | ||
2320 | |1: // Move results down. | ||
2321 | | mov RB, [RC] | ||
2322 | | mov [RA], RB | ||
2323 | | add RC, 8 | ||
2324 | | add RA, 8 | ||
2325 | | sub TMPRd, 1 | ||
2326 | | jnz <1 | ||
2327 | |2: | ||
2328 | | movzx RCd, PC_RA | ||
2329 | | movzx RBd, PC_RB | ||
2330 | | add RC, RB | ||
2331 | | lea RC, [BASE+RC*8-8] | ||
2332 | |3: | ||
2333 | | cmp RC, RA | ||
2334 | | ja >9 // More results wanted? | ||
2335 | | | ||
2336 | | test TRACE:ITYPE, TRACE:ITYPE | ||
2337 | | jz ->cont_nop | ||
2338 | | movzx RBd, word TRACE:ITYPE->traceno | ||
2339 | | movzx RDd, word TRACE:ITYPE->link | ||
2340 | | cmp RDd, RBd | ||
2341 | | je ->cont_nop // Blacklisted. | ||
2342 | | test RDd, RDd | ||
2343 | | jne =>BC_JLOOP // Jump to stitched trace. | ||
2344 | | | ||
2345 | | // Stitch a new trace to the previous trace. | ||
2346 | | mov [DISPATCH+DISPATCH_J(exitno)], RB | ||
2347 | | mov L:RB, SAVE_L | ||
2348 | | mov L:RB->base, BASE | ||
2349 | | mov CARG2, PC | ||
2350 | | lea CARG1, [DISPATCH+GG_DISP2J] | ||
2351 | | mov aword [DISPATCH+DISPATCH_J(L)], L:RB | ||
2352 | | call extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) | ||
2353 | | mov BASE, L:RB->base | ||
2354 | | jmp ->cont_nop | ||
2355 | | | ||
2356 | |9: // Fill up results with nil. | ||
2357 | | mov aword [RA], LJ_TNIL | ||
2358 | | add RA, 8 | ||
2359 | | jmp <3 | ||
2360 | |.endif | ||
2361 | | | ||
2362 | |->vm_profhook: // Dispatch target for profiler hook. | ||
2363 | #if LJ_HASPROFILE | ||
2364 | | mov L:RB, SAVE_L | ||
2365 | | mov L:RB->base, BASE | ||
2366 | | mov CARG2, PC // Caveat: CARG2 == BASE | ||
2367 | | mov CARG1, L:RB | ||
2368 | | call extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) | ||
2369 | | mov BASE, L:RB->base | ||
2370 | | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | ||
2371 | | sub PC, 4 | ||
2372 | | jmp ->cont_nop | ||
2373 | #endif | ||
2374 | | | ||
2375 | |//----------------------------------------------------------------------- | ||
2376 | |//-- Trace exit handler ------------------------------------------------- | ||
2377 | |//----------------------------------------------------------------------- | ||
2378 | | | ||
2379 | |// Called from an exit stub with the exit number on the stack. | ||
2380 | |// The 16 bit exit number is stored with two (sign-extended) push imm8. | ||
2381 | |->vm_exit_handler: | ||
2382 | |.if JIT | ||
2383 | | push r13; push r12 | ||
2384 | | push r11; push r10; push r9; push r8 | ||
2385 | | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp | ||
2386 | | push rbx; push rdx; push rcx; push rax | ||
2387 | | movzx RCd, byte [rbp-8] // Reconstruct exit number. | ||
2388 | | mov RCH, byte [rbp-16] | ||
2389 | | mov [rbp-8], r15; mov [rbp-16], r14 | ||
2390 | | // DISPATCH is preserved on-trace in LJ_GC64 mode. | ||
2391 | | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. | ||
2392 | | set_vmstate EXIT | ||
2393 | | mov [DISPATCH+DISPATCH_J(exitno)], RCd | ||
2394 | | mov [DISPATCH+DISPATCH_J(parent)], RAd | ||
2395 | |.if X64WIN | ||
2396 | | sub rsp, 16*8+4*8 // Room for SSE regs + save area. | ||
2397 | |.else | ||
2398 | | sub rsp, 16*8 // Room for SSE regs. | ||
2399 | |.endif | ||
2400 | | add rbp, -128 | ||
2401 | | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14 | ||
2402 | | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12 | ||
2403 | | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10 | ||
2404 | | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8 | ||
2405 | | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6 | ||
2406 | | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4 | ||
2407 | | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2 | ||
2408 | | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0 | ||
2409 | | // Caveat: RB is rbp. | ||
2410 | | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] | ||
2411 | | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] | ||
2412 | | mov aword [DISPATCH+DISPATCH_J(L)], L:RB | ||
2413 | | mov L:RB->base, BASE | ||
2414 | |.if X64WIN | ||
2415 | | lea CARG2, [rsp+4*8] | ||
2416 | |.else | ||
2417 | | mov CARG2, rsp | ||
2418 | |.endif | ||
2419 | | lea CARG1, [DISPATCH+GG_DISP2J] | ||
2420 | | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 | ||
2421 | | call extern lj_trace_exit // (jit_State *J, ExitState *ex) | ||
2422 | | // MULTRES or negated error code returned in eax (RD). | ||
2423 | | mov RA, L:RB->cframe | ||
2424 | | and RA, CFRAME_RAWMASK | ||
2425 | | mov [RA+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield). | ||
2426 | | mov BASE, L:RB->base | ||
2427 | | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC. | ||
2428 | | jmp >1 | ||
2429 | |.endif | ||
2430 | |->vm_exit_interp: | ||
2431 | | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. | ||
2432 | |.if JIT | ||
2433 | | // Restore additional callee-save registers only used in compiled code. | ||
2434 | |.if X64WIN | ||
2435 | | lea RA, [rsp+10*16+4*8] | ||
2436 | |1: | ||
2437 | | movdqa xmm15, [RA-10*16] | ||
2438 | | movdqa xmm14, [RA-9*16] | ||
2439 | | movdqa xmm13, [RA-8*16] | ||
2440 | | movdqa xmm12, [RA-7*16] | ||
2441 | | movdqa xmm11, [RA-6*16] | ||
2442 | | movdqa xmm10, [RA-5*16] | ||
2443 | | movdqa xmm9, [RA-4*16] | ||
2444 | | movdqa xmm8, [RA-3*16] | ||
2445 | | movdqa xmm7, [RA-2*16] | ||
2446 | | mov rsp, RA // Reposition stack to C frame. | ||
2447 | | movdqa xmm6, [RA-1*16] | ||
2448 | | mov r15, CSAVE_1 | ||
2449 | | mov r14, CSAVE_2 | ||
2450 | | mov r13, CSAVE_3 | ||
2451 | | mov r12, CSAVE_4 | ||
2452 | |.else | ||
2453 | | lea RA, [rsp+16] | ||
2454 | |1: | ||
2455 | | mov r13, [RA-8] | ||
2456 | | mov r12, [RA] | ||
2457 | | mov rsp, RA // Reposition stack to C frame. | ||
2458 | |.endif | ||
2459 | | test RDd, RDd; js >9 // Check for error from exit. | ||
2460 | | mov L:RB, SAVE_L | ||
2461 | | mov MULTRES, RDd | ||
2462 | | mov LFUNC:KBASE, [BASE-16] | ||
2463 | | cleartp LFUNC:KBASE | ||
2464 | | mov KBASE, LFUNC:KBASE->pc | ||
2465 | | mov KBASE, [KBASE+PC2PROTO(k)] | ||
2466 | | mov L:RB->base, BASE | ||
2467 | | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 | ||
2468 | | set_vmstate INTERP | ||
2469 | | // Modified copy of ins_next which handles function header dispatch, too. | ||
2470 | | mov RCd, [PC] | ||
2471 | | movzx RAd, RCH | ||
2472 | | movzx OP, RCL | ||
2473 | | add PC, 4 | ||
2474 | | shr RCd, 16 | ||
2475 | | cmp OP, BC_FUNCF // Function header? | ||
2476 | | jb >3 | ||
2477 | | cmp OP, BC_FUNCC+2 // Fast function? | ||
2478 | | jae >4 | ||
2479 | |2: | ||
2480 | | mov RCd, MULTRES // RC/RD holds nres+1. | ||
2481 | |3: | ||
2482 | | jmp aword [DISPATCH+OP*8] | ||
2483 | | | ||
2484 | |4: // Check frame below fast function. | ||
2485 | | mov RC, [BASE-8] | ||
2486 | | test RCd, FRAME_TYPE | ||
2487 | | jnz <2 // Trace stitching continuation? | ||
2488 | | // Otherwise set KBASE for Lua function below fast function. | ||
2489 | | movzx RCd, byte [RC-3] | ||
2490 | | neg RC | ||
2491 | | mov LFUNC:KBASE, [BASE+RC*8-32] | ||
2492 | | cleartp LFUNC:KBASE | ||
2493 | | mov KBASE, LFUNC:KBASE->pc | ||
2494 | | mov KBASE, [KBASE+PC2PROTO(k)] | ||
2495 | | jmp <2 | ||
2496 | | | ||
2497 | |9: // Rethrow error from the right C frame. | ||
2498 | | mov CARG2d, RDd | ||
2499 | | mov CARG1, L:RB | ||
2500 | | neg CARG2d | ||
2501 | | call extern lj_err_trace // (lua_State *L, int errcode) | ||
2502 | |.endif | ||
2503 | | | ||
2504 | |//----------------------------------------------------------------------- | ||
2505 | |//-- Math helper functions ---------------------------------------------- | ||
2506 | |//----------------------------------------------------------------------- | ||
2507 | | | ||
2508 | |// FP value rounding. Called by math.floor/math.ceil fast functions | ||
2509 | |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. | ||
2510 | |.macro vm_round, name, mode, cond | ||
2511 | |->name: | ||
2512 | |->name .. _sse: | ||
2513 | | sseconst_abs xmm2, RD | ||
2514 | | sseconst_2p52 xmm3, RD | ||
2515 | | movaps xmm1, xmm0 | ||
2516 | | andpd xmm1, xmm2 // |x| | ||
2517 | | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|. | ||
2518 | | jbe >1 | ||
2519 | | andnpd xmm2, xmm0 // Isolate sign bit. | ||
2520 | |.if mode == 2 // trunc(x)? | ||
2521 | | movaps xmm0, xmm1 | ||
2522 | | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 | ||
2523 | | subsd xmm1, xmm3 | ||
2524 | | sseconst_1 xmm3, RD | ||
2525 | | cmpsd xmm0, xmm1, 1 // |x| < result? | ||
2526 | | andpd xmm0, xmm3 | ||
2527 | | subsd xmm1, xmm0 // If yes, subtract -1. | ||
2528 | | orpd xmm1, xmm2 // Merge sign bit back in. | ||
2529 | |.else | ||
2530 | | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 | ||
2531 | | subsd xmm1, xmm3 | ||
2532 | | orpd xmm1, xmm2 // Merge sign bit back in. | ||
2533 | | .if mode == 1 // ceil(x)? | ||
2534 | | sseconst_m1 xmm2, RD // Must subtract -1 to preserve -0. | ||
2535 | | cmpsd xmm0, xmm1, 6 // x > result? | ||
2536 | | .else // floor(x)? | ||
2537 | | sseconst_1 xmm2, RD | ||
2538 | | cmpsd xmm0, xmm1, 1 // x < result? | ||
2539 | | .endif | ||
2540 | | andpd xmm0, xmm2 | ||
2541 | | subsd xmm1, xmm0 // If yes, subtract +-1. | ||
2542 | |.endif | ||
2543 | | movaps xmm0, xmm1 | ||
2544 | |1: | ||
2545 | | ret | ||
2546 | |.endmacro | ||
2547 | | | ||
2548 | | vm_round vm_floor, 0, 1 | ||
2549 | | vm_round vm_ceil, 1, JIT | ||
2550 | | vm_round vm_trunc, 2, JIT | ||
2551 | | | ||
2552 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. | ||
2553 | |->vm_mod: | ||
2554 | |// Args in xmm0/xmm1, return value in xmm0. | ||
2555 | |// Caveat: xmm0-xmm5 and RC (eax) modified! | ||
2556 | | movaps xmm5, xmm0 | ||
2557 | | divsd xmm0, xmm1 | ||
2558 | | sseconst_abs xmm2, RD | ||
2559 | | sseconst_2p52 xmm3, RD | ||
2560 | | movaps xmm4, xmm0 | ||
2561 | | andpd xmm4, xmm2 // |x/y| | ||
2562 | | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. | ||
2563 | | jbe >1 | ||
2564 | | andnpd xmm2, xmm0 // Isolate sign bit. | ||
2565 | | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 | ||
2566 | | subsd xmm4, xmm3 | ||
2567 | | orpd xmm4, xmm2 // Merge sign bit back in. | ||
2568 | | sseconst_1 xmm2, RD | ||
2569 | | cmpsd xmm0, xmm4, 1 // x/y < result? | ||
2570 | | andpd xmm0, xmm2 | ||
2571 | | subsd xmm4, xmm0 // If yes, subtract 1.0. | ||
2572 | | movaps xmm0, xmm5 | ||
2573 | | mulsd xmm1, xmm4 | ||
2574 | | subsd xmm0, xmm1 | ||
2575 | | ret | ||
2576 | |1: | ||
2577 | | mulsd xmm1, xmm0 | ||
2578 | | movaps xmm0, xmm5 | ||
2579 | | subsd xmm0, xmm1 | ||
2580 | | ret | ||
2581 | | | ||
2582 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. | ||
2583 | |->vm_powi_sse: | ||
2584 | | cmp eax, 1; jle >6 // i<=1? | ||
2585 | | // Now 1 < (unsigned)i <= 0x80000000. | ||
2586 | |1: // Handle leading zeros. | ||
2587 | | test eax, 1; jnz >2 | ||
2588 | | mulsd xmm0, xmm0 | ||
2589 | | shr eax, 1 | ||
2590 | | jmp <1 | ||
2591 | |2: | ||
2592 | | shr eax, 1; jz >5 | ||
2593 | | movaps xmm1, xmm0 | ||
2594 | |3: // Handle trailing bits. | ||
2595 | | mulsd xmm0, xmm0 | ||
2596 | | shr eax, 1; jz >4 | ||
2597 | | jnc <3 | ||
2598 | | mulsd xmm1, xmm0 | ||
2599 | | jmp <3 | ||
2600 | |4: | ||
2601 | | mulsd xmm0, xmm1 | ||
2602 | |5: | ||
2603 | | ret | ||
2604 | |6: | ||
2605 | | je <5 // x^1 ==> x | ||
2606 | | jb >7 // x^0 ==> 1 | ||
2607 | | neg eax | ||
2608 | | call <1 | ||
2609 | | sseconst_1 xmm1, RD | ||
2610 | | divsd xmm1, xmm0 | ||
2611 | | movaps xmm0, xmm1 | ||
2612 | | ret | ||
2613 | |7: | ||
2614 | | sseconst_1 xmm0, RD | ||
2615 | | ret | ||
2616 | | | ||
2617 | |//----------------------------------------------------------------------- | ||
2618 | |//-- Miscellaneous functions -------------------------------------------- | ||
2619 | |//----------------------------------------------------------------------- | ||
2620 | | | ||
2621 | |// int lj_vm_cpuid(uint32_t f, uint32_t res[4]) | ||
2622 | |->vm_cpuid: | ||
2623 | | mov eax, CARG1d | ||
2624 | | .if X64WIN; push rsi; mov rsi, CARG2; .endif | ||
2625 | | push rbx | ||
2626 | | xor ecx, ecx | ||
2627 | | cpuid | ||
2628 | | mov [rsi], eax | ||
2629 | | mov [rsi+4], ebx | ||
2630 | | mov [rsi+8], ecx | ||
2631 | | mov [rsi+12], edx | ||
2632 | | pop rbx | ||
2633 | | .if X64WIN; pop rsi; .endif | ||
2634 | | ret | ||
2635 | | | ||
2636 | |.define NEXT_TAB, TAB:CARG1 | ||
2637 | |.define NEXT_IDX, CARG2d | ||
2638 | |.define NEXT_IDXa, CARG2 | ||
2639 | |.define NEXT_PTR, RC | ||
2640 | |.define NEXT_PTRd, RCd | ||
2641 | |.define NEXT_TMP, CARG3 | ||
2642 | |.define NEXT_ASIZE, CARG4d | ||
2643 | |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro | ||
2644 | |.if X64WIN | ||
2645 | |.define NEXT_RES_PTR, [rsp+aword*5] | ||
2646 | |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro | ||
2647 | |.else | ||
2648 | |.define NEXT_RES_PTR, [rsp+aword*1] | ||
2649 | |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro | ||
2650 | |.endif | ||
2651 | | | ||
2652 | |// TValue *lj_vm_next(GCtab *t, uint32_t idx) | ||
2653 | |// Next idx returned in edx. | ||
2654 | |->vm_next: | ||
2655 | |.if JIT | ||
2656 | | mov NEXT_ASIZE, NEXT_TAB->asize | ||
2657 | |1: // Traverse array part. | ||
2658 | | cmp NEXT_IDX, NEXT_ASIZE; jae >5 | ||
2659 | | mov NEXT_TMP, NEXT_TAB->array | ||
2660 | | mov NEXT_TMP, qword [NEXT_TMP+NEXT_IDX*8] | ||
2661 | | cmp NEXT_TMP, LJ_TNIL; je >2 | ||
2662 | | lea NEXT_PTR, NEXT_RES_PTR | ||
2663 | | mov qword [NEXT_PTR], NEXT_TMP | ||
2664 | |.if DUALNUM | ||
2665 | | setint NEXT_TMP, NEXT_IDXa | ||
2666 | | mov qword [NEXT_PTR+qword*1], NEXT_TMP | ||
2667 | |.else | ||
2668 | | cvtsi2sd xmm0, NEXT_IDX | ||
2669 | | movsd qword [NEXT_PTR+qword*1], xmm0 | ||
2670 | |.endif | ||
2671 | | NEXT_RES_IDX 1 | ||
2672 | | ret | ||
2673 | |2: // Skip holes in array part. | ||
2674 | | add NEXT_IDX, 1 | ||
2675 | | jmp <1 | ||
2676 | | | ||
2677 | |5: // Traverse hash part. | ||
2678 | | sub NEXT_IDX, NEXT_ASIZE | ||
2679 | |6: | ||
2680 | | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9 | ||
2681 | | imul NEXT_PTRd, NEXT_IDX, #NODE | ||
2682 | | add NODE:NEXT_PTR, NEXT_TAB->node | ||
2683 | | cmp qword NODE:NEXT_PTR->val, LJ_TNIL; je >7 | ||
2684 | | NEXT_RES_IDXL NEXT_ASIZE+1 | ||
2685 | | ret | ||
2686 | |7: // Skip holes in hash part. | ||
2687 | | add NEXT_IDX, 1 | ||
2688 | | jmp <6 | ||
2689 | | | ||
2690 | |9: // End of iteration. Set the key to nil (not the value). | ||
2691 | | NEXT_RES_IDX NEXT_ASIZE | ||
2692 | | lea NEXT_PTR, NEXT_RES_PTR | ||
2693 | | mov qword [NEXT_PTR+qword*1], LJ_TNIL | ||
2694 | | ret | ||
2695 | |.endif | ||
2696 | | | ||
2697 | |//----------------------------------------------------------------------- | ||
2698 | |//-- Assertions --------------------------------------------------------- | ||
2699 | |//----------------------------------------------------------------------- | ||
2700 | | | ||
2701 | |->assert_bad_for_arg_type: | ||
2702 | #ifdef LUA_USE_ASSERT | ||
2703 | | int3 | ||
2704 | #endif | ||
2705 | | int3 | ||
2706 | | | ||
2707 | |//----------------------------------------------------------------------- | ||
2708 | |//-- FFI helper functions ----------------------------------------------- | ||
2709 | |//----------------------------------------------------------------------- | ||
2710 | | | ||
2711 | |// Handler for callback functions. Callback slot number in ah/al. | ||
2712 | |->vm_ffi_callback: | ||
2713 | |.if FFI | ||
2714 | |.type CTSTATE, CTState, PC | ||
2715 | | saveregs_ // ebp/rbp already saved. ebp now holds global_State *. | ||
2716 | | lea DISPATCH, [ebp+GG_G2DISP] | ||
2717 | | mov CTSTATE, GL:ebp->ctype_state | ||
2718 | | movzx eax, ax | ||
2719 | | mov CTSTATE->cb.slot, eax | ||
2720 | | mov CTSTATE->cb.gpr[0], CARG1 | ||
2721 | | mov CTSTATE->cb.gpr[1], CARG2 | ||
2722 | | mov CTSTATE->cb.gpr[2], CARG3 | ||
2723 | | mov CTSTATE->cb.gpr[3], CARG4 | ||
2724 | | movsd qword CTSTATE->cb.fpr[0], xmm0 | ||
2725 | | movsd qword CTSTATE->cb.fpr[1], xmm1 | ||
2726 | | movsd qword CTSTATE->cb.fpr[2], xmm2 | ||
2727 | | movsd qword CTSTATE->cb.fpr[3], xmm3 | ||
2728 | |.if X64WIN | ||
2729 | | lea rax, [rsp+CFRAME_SIZE+4*8] | ||
2730 | |.else | ||
2731 | | lea rax, [rsp+CFRAME_SIZE] | ||
2732 | | mov CTSTATE->cb.gpr[4], CARG5 | ||
2733 | | mov CTSTATE->cb.gpr[5], CARG6 | ||
2734 | | movsd qword CTSTATE->cb.fpr[4], xmm4 | ||
2735 | | movsd qword CTSTATE->cb.fpr[5], xmm5 | ||
2736 | | movsd qword CTSTATE->cb.fpr[6], xmm6 | ||
2737 | | movsd qword CTSTATE->cb.fpr[7], xmm7 | ||
2738 | |.endif | ||
2739 | | mov CTSTATE->cb.stack, rax | ||
2740 | | mov CARG2, rsp | ||
2741 | | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok. | ||
2742 | | mov CARG1, CTSTATE | ||
2743 | | call extern lj_ccallback_enter // (CTState *cts, void *cf) | ||
2744 | | // lua_State * returned in eax (RD). | ||
2745 | | set_vmstate INTERP | ||
2746 | | mov BASE, L:RD->base | ||
2747 | | mov RD, L:RD->top | ||
2748 | | sub RD, BASE | ||
2749 | | mov LFUNC:RB, [BASE-16] | ||
2750 | | cleartp LFUNC:RB | ||
2751 | | shr RD, 3 | ||
2752 | | add RD, 1 | ||
2753 | | ins_callt | ||
2754 | |.endif | ||
2755 | | | ||
2756 | |->cont_ffi_callback: // Return from FFI callback. | ||
2757 | |.if FFI | ||
2758 | | mov L:RA, SAVE_L | ||
2759 | | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] | ||
2760 | | mov aword CTSTATE->L, L:RA | ||
2761 | | mov L:RA->base, BASE | ||
2762 | | mov L:RA->top, RB | ||
2763 | | mov CARG1, CTSTATE | ||
2764 | | mov CARG2, RC | ||
2765 | | call extern lj_ccallback_leave // (CTState *cts, TValue *o) | ||
2766 | | mov rax, CTSTATE->cb.gpr[0] | ||
2767 | | movsd xmm0, qword CTSTATE->cb.fpr[0] | ||
2768 | | jmp ->vm_leave_unw | ||
2769 | |.endif | ||
2770 | | | ||
2771 | |->vm_ffi_call: // Call C function via FFI. | ||
2772 | | // Caveat: needs special frame unwinding, see below. | ||
2773 | |.if FFI | ||
2774 | | .type CCSTATE, CCallState, rbx | ||
2775 | | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 | ||
2776 | | | ||
2777 | | // Readjust stack. | ||
2778 | | mov eax, CCSTATE->spadj | ||
2779 | | sub rsp, rax | ||
2780 | | | ||
2781 | | // Copy stack slots. | ||
2782 | | movzx ecx, byte CCSTATE->nsp | ||
2783 | | sub ecx, 1 | ||
2784 | | js >2 | ||
2785 | |1: | ||
2786 | | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] | ||
2787 | | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax | ||
2788 | | sub ecx, 1 | ||
2789 | | jns <1 | ||
2790 | |2: | ||
2791 | | | ||
2792 | | movzx eax, byte CCSTATE->nfpr | ||
2793 | | mov CARG1, CCSTATE->gpr[0] | ||
2794 | | mov CARG2, CCSTATE->gpr[1] | ||
2795 | | mov CARG3, CCSTATE->gpr[2] | ||
2796 | | mov CARG4, CCSTATE->gpr[3] | ||
2797 | |.if not X64WIN | ||
2798 | | mov CARG5, CCSTATE->gpr[4] | ||
2799 | | mov CARG6, CCSTATE->gpr[5] | ||
2800 | |.endif | ||
2801 | | test eax, eax; jz >5 | ||
2802 | | movaps xmm0, CCSTATE->fpr[0] | ||
2803 | | movaps xmm1, CCSTATE->fpr[1] | ||
2804 | | movaps xmm2, CCSTATE->fpr[2] | ||
2805 | | movaps xmm3, CCSTATE->fpr[3] | ||
2806 | |.if not X64WIN | ||
2807 | | cmp eax, 4; jbe >5 | ||
2808 | | movaps xmm4, CCSTATE->fpr[4] | ||
2809 | | movaps xmm5, CCSTATE->fpr[5] | ||
2810 | | movaps xmm6, CCSTATE->fpr[6] | ||
2811 | | movaps xmm7, CCSTATE->fpr[7] | ||
2812 | |.endif | ||
2813 | |5: | ||
2814 | | | ||
2815 | | call aword CCSTATE->func | ||
2816 | | | ||
2817 | | mov CCSTATE->gpr[0], rax | ||
2818 | | movaps CCSTATE->fpr[0], xmm0 | ||
2819 | |.if not X64WIN | ||
2820 | | mov CCSTATE->gpr[1], rdx | ||
2821 | | movaps CCSTATE->fpr[1], xmm1 | ||
2822 | |.endif | ||
2823 | | | ||
2824 | | mov rbx, [rbp-8]; leave; ret | ||
2825 | |.endif | ||
2826 | |// Note: vm_ffi_call must be the last function in this object file! | ||
2827 | | | ||
2828 | |//----------------------------------------------------------------------- | ||
2829 | } | ||
2830 | |||
2831 | /* Generate the code for a single instruction. */ | ||
2832 | static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ||
2833 | { | ||
2834 | int vk = 0; | ||
2835 | |// Note: aligning all instructions does not pay off. | ||
2836 | |=>defop: | ||
2837 | |||
2838 | switch (op) { | ||
2839 | |||
2840 | /* -- Comparison ops ---------------------------------------------------- */ | ||
2841 | |||
2842 | /* Remember: all ops branch for a true comparison, fall through otherwise. */ | ||
2843 | |||
2844 | |.macro jmp_comp, lt, ge, le, gt, target | ||
2845 | ||switch (op) { | ||
2846 | ||case BC_ISLT: | ||
2847 | | lt target | ||
2848 | ||break; | ||
2849 | ||case BC_ISGE: | ||
2850 | | ge target | ||
2851 | ||break; | ||
2852 | ||case BC_ISLE: | ||
2853 | | le target | ||
2854 | ||break; | ||
2855 | ||case BC_ISGT: | ||
2856 | | gt target | ||
2857 | ||break; | ||
2858 | ||default: break; /* Shut up GCC. */ | ||
2859 | ||} | ||
2860 | |.endmacro | ||
2861 | |||
2862 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | ||
2863 | | // RA = src1, RD = src2, JMP with RD = target | ||
2864 | | ins_AD | ||
2865 | | mov ITYPE, [BASE+RA*8] | ||
2866 | | mov RB, [BASE+RD*8] | ||
2867 | | mov RA, ITYPE | ||
2868 | | mov RD, RB | ||
2869 | | sar ITYPE, 47 | ||
2870 | | sar RB, 47 | ||
2871 | |.if DUALNUM | ||
2872 | | cmp ITYPEd, LJ_TISNUM; jne >7 | ||
2873 | | cmp RBd, LJ_TISNUM; jne >8 | ||
2874 | | add PC, 4 | ||
2875 | | cmp RAd, RDd | ||
2876 | | jmp_comp jge, jl, jg, jle, >9 | ||
2877 | |6: | ||
2878 | | movzx RDd, PC_RD | ||
2879 | | branchPC RD | ||
2880 | |9: | ||
2881 | | ins_next | ||
2882 | | | ||
2883 | |7: // RA is not an integer. | ||
2884 | | ja ->vmeta_comp | ||
2885 | | // RA is a number. | ||
2886 | | cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp | ||
2887 | | // RA is a number, RD is an integer. | ||
2888 | | cvtsi2sd xmm0, RDd | ||
2889 | | jmp >2 | ||
2890 | | | ||
2891 | |8: // RA is an integer, RD is not an integer. | ||
2892 | | ja ->vmeta_comp | ||
2893 | | // RA is an integer, RD is a number. | ||
2894 | | cvtsi2sd xmm1, RAd | ||
2895 | | movd xmm0, RD | ||
2896 | | jmp >3 | ||
2897 | |.else | ||
2898 | | cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp | ||
2899 | | cmp RBd, LJ_TISNUM; jae ->vmeta_comp | ||
2900 | |.endif | ||
2901 | |1: | ||
2902 | | movd xmm0, RD | ||
2903 | |2: | ||
2904 | | movd xmm1, RA | ||
2905 | |3: | ||
2906 | | add PC, 4 | ||
2907 | | ucomisd xmm0, xmm1 | ||
2908 | | // Unordered: all of ZF CF PF set, ordered: PF clear. | ||
2909 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. | ||
2910 | |.if DUALNUM | ||
2911 | | jmp_comp jbe, ja, jb, jae, <9 | ||
2912 | | jmp <6 | ||
2913 | |.else | ||
2914 | | jmp_comp jbe, ja, jb, jae, >1 | ||
2915 | | movzx RDd, PC_RD | ||
2916 | | branchPC RD | ||
2917 | |1: | ||
2918 | | ins_next | ||
2919 | |.endif | ||
2920 | break; | ||
2921 | |||
2922 | case BC_ISEQV: case BC_ISNEV: | ||
2923 | vk = op == BC_ISEQV; | ||
2924 | | ins_AD // RA = src1, RD = src2, JMP with RD = target | ||
2925 | | mov RB, [BASE+RD*8] | ||
2926 | | mov ITYPE, [BASE+RA*8] | ||
2927 | | add PC, 4 | ||
2928 | | mov RD, RB | ||
2929 | | mov RA, ITYPE | ||
2930 | | sar RB, 47 | ||
2931 | | sar ITYPE, 47 | ||
2932 | |.if DUALNUM | ||
2933 | | cmp RBd, LJ_TISNUM; jne >7 | ||
2934 | | cmp ITYPEd, LJ_TISNUM; jne >8 | ||
2935 | | cmp RDd, RAd | ||
2936 | if (vk) { | ||
2937 | | jne >9 | ||
2938 | } else { | ||
2939 | | je >9 | ||
2940 | } | ||
2941 | | movzx RDd, PC_RD | ||
2942 | | branchPC RD | ||
2943 | |9: | ||
2944 | | ins_next | ||
2945 | | | ||
2946 | |7: // RD is not an integer. | ||
2947 | | ja >5 | ||
2948 | | // RD is a number. | ||
2949 | | movd xmm1, RD | ||
2950 | | cmp ITYPEd, LJ_TISNUM; jb >1; jne >5 | ||
2951 | | // RD is a number, RA is an integer. | ||
2952 | | cvtsi2sd xmm0, RAd | ||
2953 | | jmp >2 | ||
2954 | | | ||
2955 | |8: // RD is an integer, RA is not an integer. | ||
2956 | | ja >5 | ||
2957 | | // RD is an integer, RA is a number. | ||
2958 | | cvtsi2sd xmm1, RDd | ||
2959 | | jmp >1 | ||
2960 | | | ||
2961 | |.else | ||
2962 | | cmp RBd, LJ_TISNUM; jae >5 | ||
2963 | | cmp ITYPEd, LJ_TISNUM; jae >5 | ||
2964 | | movd xmm1, RD | ||
2965 | |.endif | ||
2966 | |1: | ||
2967 | | movd xmm0, RA | ||
2968 | |2: | ||
2969 | | ucomisd xmm0, xmm1 | ||
2970 | |4: | ||
2971 | iseqne_fp: | ||
2972 | if (vk) { | ||
2973 | | jp >2 // Unordered means not equal. | ||
2974 | | jne >2 | ||
2975 | } else { | ||
2976 | | jp >2 // Unordered means not equal. | ||
2977 | | je >1 | ||
2978 | } | ||
2979 | iseqne_end: | ||
2980 | if (vk) { | ||
2981 | |1: // EQ: Branch to the target. | ||
2982 | | movzx RDd, PC_RD | ||
2983 | | branchPC RD | ||
2984 | |2: // NE: Fallthrough to next instruction. | ||
2985 | |.if not FFI | ||
2986 | |3: | ||
2987 | |.endif | ||
2988 | } else { | ||
2989 | |.if not FFI | ||
2990 | |3: | ||
2991 | |.endif | ||
2992 | |2: // NE: Branch to the target. | ||
2993 | | movzx RDd, PC_RD | ||
2994 | | branchPC RD | ||
2995 | |1: // EQ: Fallthrough to next instruction. | ||
2996 | } | ||
2997 | if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV || | ||
2998 | op == BC_ISEQN || op == BC_ISNEN)) { | ||
2999 | | jmp <9 | ||
3000 | } else { | ||
3001 | | ins_next | ||
3002 | } | ||
3003 | | | ||
3004 | if (op == BC_ISEQV || op == BC_ISNEV) { | ||
3005 | |5: // Either or both types are not numbers. | ||
3006 | |.if FFI | ||
3007 | | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd | ||
3008 | | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd | ||
3009 | |.endif | ||
3010 | | cmp RA, RD | ||
3011 | | je <1 // Same GCobjs or pvalues? | ||
3012 | | cmp RBd, ITYPEd | ||
3013 | | jne <2 // Not the same type? | ||
3014 | | cmp RBd, LJ_TISTABUD | ||
3015 | | ja <2 // Different objects and not table/ud? | ||
3016 | | | ||
3017 | | // Different tables or userdatas. Need to check __eq metamethod. | ||
3018 | | // Field metatable must be at same offset for GCtab and GCudata! | ||
3019 | | cleartp TAB:RA | ||
3020 | | mov TAB:RB, TAB:RA->metatable | ||
3021 | | test TAB:RB, TAB:RB | ||
3022 | | jz <2 // No metatable? | ||
3023 | | test byte TAB:RB->nomm, 1<<MM_eq | ||
3024 | | jnz <2 // Or 'no __eq' flag set? | ||
3025 | if (vk) { | ||
3026 | | xor RBd, RBd // ne = 0 | ||
3027 | } else { | ||
3028 | | mov RBd, 1 // ne = 1 | ||
3029 | } | ||
3030 | | jmp ->vmeta_equal // Handle __eq metamethod. | ||
3031 | } else { | ||
3032 | |.if FFI | ||
3033 | |3: | ||
3034 | | cmp ITYPEd, LJ_TCDATA | ||
3035 | if (LJ_DUALNUM && vk) { | ||
3036 | | jne <9 | ||
3037 | } else { | ||
3038 | | jne <2 | ||
3039 | } | ||
3040 | | jmp ->vmeta_equal_cd | ||
3041 | |.endif | ||
3042 | } | ||
3043 | break; | ||
3044 | case BC_ISEQS: case BC_ISNES: | ||
3045 | vk = op == BC_ISEQS; | ||
3046 | | ins_AND // RA = src, RD = str const, JMP with RD = target | ||
3047 | | mov RB, [BASE+RA*8] | ||
3048 | | add PC, 4 | ||
3049 | | checkstr RB, >3 | ||
3050 | | cmp RB, [KBASE+RD*8] | ||
3051 | iseqne_test: | ||
3052 | if (vk) { | ||
3053 | | jne >2 | ||
3054 | } else { | ||
3055 | | je >1 | ||
3056 | } | ||
3057 | goto iseqne_end; | ||
3058 | case BC_ISEQN: case BC_ISNEN: | ||
3059 | vk = op == BC_ISEQN; | ||
3060 | | ins_AD // RA = src, RD = num const, JMP with RD = target | ||
3061 | | mov RB, [BASE+RA*8] | ||
3062 | | add PC, 4 | ||
3063 | |.if DUALNUM | ||
3064 | | checkint RB, >7 | ||
3065 | | mov RD, [KBASE+RD*8] | ||
3066 | | checkint RD, >8 | ||
3067 | | cmp RBd, RDd | ||
3068 | if (vk) { | ||
3069 | | jne >9 | ||
3070 | } else { | ||
3071 | | je >9 | ||
3072 | } | ||
3073 | | movzx RDd, PC_RD | ||
3074 | | branchPC RD | ||
3075 | |9: | ||
3076 | | ins_next | ||
3077 | | | ||
3078 | |7: // RA is not an integer. | ||
3079 | | ja >3 | ||
3080 | | // RA is a number. | ||
3081 | | mov RD, [KBASE+RD*8] | ||
3082 | | checkint RD, >1 | ||
3083 | | // RA is a number, RD is an integer. | ||
3084 | | cvtsi2sd xmm0, RDd | ||
3085 | | jmp >2 | ||
3086 | | | ||
3087 | |8: // RA is an integer, RD is a number. | ||
3088 | | cvtsi2sd xmm0, RBd | ||
3089 | | movd xmm1, RD | ||
3090 | | ucomisd xmm0, xmm1 | ||
3091 | | jmp >4 | ||
3092 | |1: | ||
3093 | | movd xmm0, RD | ||
3094 | |.else | ||
3095 | | checknum RB, >3 | ||
3096 | |1: | ||
3097 | | movsd xmm0, qword [KBASE+RD*8] | ||
3098 | |.endif | ||
3099 | |2: | ||
3100 | | ucomisd xmm0, qword [BASE+RA*8] | ||
3101 | |4: | ||
3102 | goto iseqne_fp; | ||
3103 | case BC_ISEQP: case BC_ISNEP: | ||
3104 | vk = op == BC_ISEQP; | ||
3105 | | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target | ||
3106 | | mov RB, [BASE+RA*8] | ||
3107 | | sar RB, 47 | ||
3108 | | add PC, 4 | ||
3109 | | cmp RBd, RDd | ||
3110 | if (!LJ_HASFFI) goto iseqne_test; | ||
3111 | if (vk) { | ||
3112 | | jne >3 | ||
3113 | | movzx RDd, PC_RD | ||
3114 | | branchPC RD | ||
3115 | |2: | ||
3116 | | ins_next | ||
3117 | |3: | ||
3118 | | cmp RBd, LJ_TCDATA; jne <2 | ||
3119 | | jmp ->vmeta_equal_cd | ||
3120 | } else { | ||
3121 | | je >2 | ||
3122 | | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd | ||
3123 | | movzx RDd, PC_RD | ||
3124 | | branchPC RD | ||
3125 | |2: | ||
3126 | | ins_next | ||
3127 | } | ||
3128 | break; | ||
3129 | |||
3130 | /* -- Unary test and copy ops ------------------------------------------- */ | ||
3131 | |||
3132 | case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: | ||
3133 | | ins_AD // RA = dst or unused, RD = src, JMP with RD = target | ||
3134 | | mov ITYPE, [BASE+RD*8] | ||
3135 | | add PC, 4 | ||
3136 | if (op == BC_ISTC || op == BC_ISFC) { | ||
3137 | | mov RB, ITYPE | ||
3138 | } | ||
3139 | | sar ITYPE, 47 | ||
3140 | | cmp ITYPEd, LJ_TISTRUECOND | ||
3141 | if (op == BC_IST || op == BC_ISTC) { | ||
3142 | | jae >1 | ||
3143 | } else { | ||
3144 | | jb >1 | ||
3145 | } | ||
3146 | if (op == BC_ISTC || op == BC_ISFC) { | ||
3147 | | mov [BASE+RA*8], RB | ||
3148 | } | ||
3149 | | movzx RDd, PC_RD | ||
3150 | | branchPC RD | ||
3151 | |1: // Fallthrough to the next instruction. | ||
3152 | | ins_next | ||
3153 | break; | ||
3154 | |||
3155 | case BC_ISTYPE: | ||
3156 | | ins_AD // RA = src, RD = -type | ||
3157 | | mov RB, [BASE+RA*8] | ||
3158 | | sar RB, 47 | ||
3159 | | add RBd, RDd | ||
3160 | | jne ->vmeta_istype | ||
3161 | | ins_next | ||
3162 | break; | ||
3163 | case BC_ISNUM: | ||
3164 | | ins_AD // RA = src, RD = -(TISNUM-1) | ||
3165 | | checknumtp [BASE+RA*8], ->vmeta_istype | ||
3166 | | ins_next | ||
3167 | break; | ||
3168 | |||
3169 | /* -- Unary ops --------------------------------------------------------- */ | ||
3170 | |||
3171 | case BC_MOV: | ||
3172 | | ins_AD // RA = dst, RD = src | ||
3173 | | mov RB, [BASE+RD*8] | ||
3174 | | mov [BASE+RA*8], RB | ||
3175 | | ins_next_ | ||
3176 | break; | ||
3177 | case BC_NOT: | ||
3178 | | ins_AD // RA = dst, RD = src | ||
3179 | | mov RB, [BASE+RD*8] | ||
3180 | | sar RB, 47 | ||
3181 | | mov RCd, 2 | ||
3182 | | cmp RB, LJ_TISTRUECOND | ||
3183 | | sbb RCd, 0 | ||
3184 | | shl RC, 47 | ||
3185 | | not RC | ||
3186 | | mov [BASE+RA*8], RC | ||
3187 | | ins_next | ||
3188 | break; | ||
3189 | case BC_UNM: | ||
3190 | | ins_AD // RA = dst, RD = src | ||
3191 | | mov RB, [BASE+RD*8] | ||
3192 | |.if DUALNUM | ||
3193 | | checkint RB, >5 | ||
3194 | | neg RBd | ||
3195 | | jo >4 | ||
3196 | | setint RB | ||
3197 | |9: | ||
3198 | | mov [BASE+RA*8], RB | ||
3199 | | ins_next | ||
3200 | |4: | ||
3201 | | mov64 RB, U64x(41e00000,00000000) // 2^31. | ||
3202 | | jmp <9 | ||
3203 | |5: | ||
3204 | | ja ->vmeta_unm | ||
3205 | |.else | ||
3206 | | checknum RB, ->vmeta_unm | ||
3207 | |.endif | ||
3208 | | mov64 RD, U64x(80000000,00000000) | ||
3209 | | xor RB, RD | ||
3210 | |.if DUALNUM | ||
3211 | | jmp <9 | ||
3212 | |.else | ||
3213 | | mov [BASE+RA*8], RB | ||
3214 | | ins_next | ||
3215 | |.endif | ||
3216 | break; | ||
3217 | case BC_LEN: | ||
3218 | | ins_AD // RA = dst, RD = src | ||
3219 | | mov RD, [BASE+RD*8] | ||
3220 | | checkstr RD, >2 | ||
3221 | |.if DUALNUM | ||
3222 | | mov RDd, dword STR:RD->len | ||
3223 | |1: | ||
3224 | | setint RD | ||
3225 | | mov [BASE+RA*8], RD | ||
3226 | |.else | ||
3227 | | xorps xmm0, xmm0 | ||
3228 | | cvtsi2sd xmm0, dword STR:RD->len | ||
3229 | |1: | ||
3230 | | movsd qword [BASE+RA*8], xmm0 | ||
3231 | |.endif | ||
3232 | | ins_next | ||
3233 | |2: | ||
3234 | | cmp ITYPEd, LJ_TTAB; jne ->vmeta_len | ||
3235 | | mov TAB:CARG1, TAB:RD | ||
3236 | #if LJ_52 | ||
3237 | | mov TAB:RB, TAB:RD->metatable | ||
3238 | | cmp TAB:RB, 0 | ||
3239 | | jnz >9 | ||
3240 | |3: | ||
3241 | #endif | ||
3242 | |->BC_LEN_Z: | ||
3243 | | mov RB, BASE // Save BASE. | ||
3244 | | call extern lj_tab_len // (GCtab *t) | ||
3245 | | // Length of table returned in eax (RD). | ||
3246 | |.if DUALNUM | ||
3247 | | // Nothing to do. | ||
3248 | |.else | ||
3249 | | cvtsi2sd xmm0, RDd | ||
3250 | |.endif | ||
3251 | | mov BASE, RB // Restore BASE. | ||
3252 | | movzx RAd, PC_RA | ||
3253 | | jmp <1 | ||
3254 | #if LJ_52 | ||
3255 | |9: // Check for __len. | ||
3256 | | test byte TAB:RB->nomm, 1<<MM_len | ||
3257 | | jnz <3 | ||
3258 | | jmp ->vmeta_len // 'no __len' flag NOT set: check. | ||
3259 | #endif | ||
3260 | break; | ||
3261 | |||
3262 | /* -- Binary ops -------------------------------------------------------- */ | ||
3263 | |||
3264 | |.macro ins_arithpre, sseins, ssereg | ||
3265 | | ins_ABC | ||
3266 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | ||
3267 | ||switch (vk) { | ||
3268 | ||case 0: | ||
3269 | | checknumtp [BASE+RB*8], ->vmeta_arith_vn | ||
3270 | | .if DUALNUM | ||
3271 | | checknumtp [KBASE+RC*8], ->vmeta_arith_vn | ||
3272 | | .endif | ||
3273 | | movsd xmm0, qword [BASE+RB*8] | ||
3274 | | sseins ssereg, qword [KBASE+RC*8] | ||
3275 | || break; | ||
3276 | ||case 1: | ||
3277 | | checknumtp [BASE+RB*8], ->vmeta_arith_nv | ||
3278 | | .if DUALNUM | ||
3279 | | checknumtp [KBASE+RC*8], ->vmeta_arith_nv | ||
3280 | | .endif | ||
3281 | | movsd xmm0, qword [KBASE+RC*8] | ||
3282 | | sseins ssereg, qword [BASE+RB*8] | ||
3283 | || break; | ||
3284 | ||default: | ||
3285 | | checknumtp [BASE+RB*8], ->vmeta_arith_vv | ||
3286 | | checknumtp [BASE+RC*8], ->vmeta_arith_vv | ||
3287 | | movsd xmm0, qword [BASE+RB*8] | ||
3288 | | sseins ssereg, qword [BASE+RC*8] | ||
3289 | || break; | ||
3290 | ||} | ||
3291 | |.endmacro | ||
3292 | | | ||
3293 | |.macro ins_arithdn, intins | ||
3294 | | ins_ABC | ||
3295 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | ||
3296 | ||switch (vk) { | ||
3297 | ||case 0: | ||
3298 | | mov RB, [BASE+RB*8] | ||
3299 | | mov RC, [KBASE+RC*8] | ||
3300 | | checkint RB, ->vmeta_arith_vno | ||
3301 | | checkint RC, ->vmeta_arith_vno | ||
3302 | | intins RBd, RCd; jo ->vmeta_arith_vno | ||
3303 | || break; | ||
3304 | ||case 1: | ||
3305 | | mov RB, [BASE+RB*8] | ||
3306 | | mov RC, [KBASE+RC*8] | ||
3307 | | checkint RB, ->vmeta_arith_nvo | ||
3308 | | checkint RC, ->vmeta_arith_nvo | ||
3309 | | intins RCd, RBd; jo ->vmeta_arith_nvo | ||
3310 | || break; | ||
3311 | ||default: | ||
3312 | | mov RB, [BASE+RB*8] | ||
3313 | | mov RC, [BASE+RC*8] | ||
3314 | | checkint RB, ->vmeta_arith_vvo | ||
3315 | | checkint RC, ->vmeta_arith_vvo | ||
3316 | | intins RBd, RCd; jo ->vmeta_arith_vvo | ||
3317 | || break; | ||
3318 | ||} | ||
3319 | ||if (vk == 1) { | ||
3320 | | setint RC | ||
3321 | | mov [BASE+RA*8], RC | ||
3322 | ||} else { | ||
3323 | | setint RB | ||
3324 | | mov [BASE+RA*8], RB | ||
3325 | ||} | ||
3326 | | ins_next | ||
3327 | |.endmacro | ||
3328 | | | ||
3329 | |.macro ins_arithpost | ||
3330 | | movsd qword [BASE+RA*8], xmm0 | ||
3331 | |.endmacro | ||
3332 | | | ||
3333 | |.macro ins_arith, sseins | ||
3334 | | ins_arithpre sseins, xmm0 | ||
3335 | | ins_arithpost | ||
3336 | | ins_next | ||
3337 | |.endmacro | ||
3338 | | | ||
3339 | |.macro ins_arith, intins, sseins | ||
3340 | |.if DUALNUM | ||
3341 | | ins_arithdn intins | ||
3342 | |.else | ||
3343 | | ins_arith, sseins | ||
3344 | |.endif | ||
3345 | |.endmacro | ||
3346 | |||
3347 | | // RA = dst, RB = src1 or num const, RC = src2 or num const | ||
3348 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | ||
3349 | | ins_arith add, addsd | ||
3350 | break; | ||
3351 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | ||
3352 | | ins_arith sub, subsd | ||
3353 | break; | ||
3354 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | ||
3355 | | ins_arith imul, mulsd | ||
3356 | break; | ||
3357 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | ||
3358 | | ins_arith divsd | ||
3359 | break; | ||
3360 | case BC_MODVN: | ||
3361 | | ins_arithpre movsd, xmm1 | ||
3362 | |->BC_MODVN_Z: | ||
3363 | | call ->vm_mod | ||
3364 | | ins_arithpost | ||
3365 | | ins_next | ||
3366 | break; | ||
3367 | case BC_MODNV: case BC_MODVV: | ||
3368 | | ins_arithpre movsd, xmm1 | ||
3369 | | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | ||
3370 | break; | ||
3371 | case BC_POW: | ||
3372 | | ins_arithpre movsd, xmm1 | ||
3373 | | mov RB, BASE | ||
3374 | | call extern pow | ||
3375 | | movzx RAd, PC_RA | ||
3376 | | mov BASE, RB | ||
3377 | | ins_arithpost | ||
3378 | | ins_next | ||
3379 | break; | ||
3380 | |||
3381 | case BC_CAT: | ||
3382 | | ins_ABC // RA = dst, RB = src_start, RC = src_end | ||
3383 | | mov L:CARG1, SAVE_L | ||
3384 | | mov L:CARG1->base, BASE | ||
3385 | | lea CARG2, [BASE+RC*8] | ||
3386 | | mov CARG3d, RCd | ||
3387 | | sub CARG3d, RBd | ||
3388 | |->BC_CAT_Z: | ||
3389 | | mov L:RB, L:CARG1 | ||
3390 | | mov SAVE_PC, PC | ||
3391 | | call extern lj_meta_cat // (lua_State *L, TValue *top, int left) | ||
3392 | | // NULL (finished) or TValue * (metamethod) returned in eax (RC). | ||
3393 | | mov BASE, L:RB->base | ||
3394 | | test RC, RC | ||
3395 | | jnz ->vmeta_binop | ||
3396 | | movzx RBd, PC_RB // Copy result to Stk[RA] from Stk[RB]. | ||
3397 | | movzx RAd, PC_RA | ||
3398 | | mov RC, [BASE+RB*8] | ||
3399 | | mov [BASE+RA*8], RC | ||
3400 | | ins_next | ||
3401 | break; | ||
3402 | |||
3403 | /* -- Constant ops ------------------------------------------------------ */ | ||
3404 | |||
3405 | case BC_KSTR: | ||
3406 | | ins_AND // RA = dst, RD = str const (~) | ||
3407 | | mov RD, [KBASE+RD*8] | ||
3408 | | settp RD, LJ_TSTR | ||
3409 | | mov [BASE+RA*8], RD | ||
3410 | | ins_next | ||
3411 | break; | ||
3412 | case BC_KCDATA: | ||
3413 | |.if FFI | ||
3414 | | ins_AND // RA = dst, RD = cdata const (~) | ||
3415 | | mov RD, [KBASE+RD*8] | ||
3416 | | settp RD, LJ_TCDATA | ||
3417 | | mov [BASE+RA*8], RD | ||
3418 | | ins_next | ||
3419 | |.endif | ||
3420 | break; | ||
3421 | case BC_KSHORT: | ||
3422 | | ins_AD // RA = dst, RD = signed int16 literal | ||
3423 | |.if DUALNUM | ||
3424 | | movsx RDd, RDW | ||
3425 | | setint RD | ||
3426 | | mov [BASE+RA*8], RD | ||
3427 | |.else | ||
3428 | | movsx RDd, RDW // Sign-extend literal. | ||
3429 | | cvtsi2sd xmm0, RDd | ||
3430 | | movsd qword [BASE+RA*8], xmm0 | ||
3431 | |.endif | ||
3432 | | ins_next | ||
3433 | break; | ||
3434 | case BC_KNUM: | ||
3435 | | ins_AD // RA = dst, RD = num const | ||
3436 | | movsd xmm0, qword [KBASE+RD*8] | ||
3437 | | movsd qword [BASE+RA*8], xmm0 | ||
3438 | | ins_next | ||
3439 | break; | ||
3440 | case BC_KPRI: | ||
3441 | | ins_AD // RA = dst, RD = primitive type (~) | ||
3442 | | shl RD, 47 | ||
3443 | | not RD | ||
3444 | | mov [BASE+RA*8], RD | ||
3445 | | ins_next | ||
3446 | break; | ||
3447 | case BC_KNIL: | ||
3448 | | ins_AD // RA = dst_start, RD = dst_end | ||
3449 | | lea RA, [BASE+RA*8+8] | ||
3450 | | lea RD, [BASE+RD*8] | ||
3451 | | mov RB, LJ_TNIL | ||
3452 | | mov [RA-8], RB // Sets minimum 2 slots. | ||
3453 | |1: | ||
3454 | | mov [RA], RB | ||
3455 | | add RA, 8 | ||
3456 | | cmp RA, RD | ||
3457 | | jbe <1 | ||
3458 | | ins_next | ||
3459 | break; | ||
3460 | |||
3461 | /* -- Upvalue and function ops ------------------------------------------ */ | ||
3462 | |||
3463 | case BC_UGET: | ||
3464 | | ins_AD // RA = dst, RD = upvalue # | ||
3465 | | mov LFUNC:RB, [BASE-16] | ||
3466 | | cleartp LFUNC:RB | ||
3467 | | mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)] | ||
3468 | | mov RB, UPVAL:RB->v | ||
3469 | | mov RD, [RB] | ||
3470 | | mov [BASE+RA*8], RD | ||
3471 | | ins_next | ||
3472 | break; | ||
3473 | case BC_USETV: | ||
3474 | #define TV2MARKOFS \ | ||
3475 | ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) | ||
3476 | | ins_AD // RA = upvalue #, RD = src | ||
3477 | | mov LFUNC:RB, [BASE-16] | ||
3478 | | cleartp LFUNC:RB | ||
3479 | | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] | ||
3480 | | cmp byte UPVAL:RB->closed, 0 | ||
3481 | | mov RB, UPVAL:RB->v | ||
3482 | | mov RA, [BASE+RD*8] | ||
3483 | | mov [RB], RA | ||
3484 | | jz >1 | ||
3485 | | // Check barrier for closed upvalue. | ||
3486 | | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv) | ||
3487 | | jnz >2 | ||
3488 | |1: | ||
3489 | | ins_next | ||
3490 | | | ||
3491 | |2: // Upvalue is black. Check if new value is collectable and white. | ||
3492 | | mov RD, RA | ||
3493 | | sar RD, 47 | ||
3494 | | sub RDd, LJ_TISGCV | ||
3495 | | cmp RDd, LJ_TNUMX - LJ_TISGCV // tvisgcv(v) | ||
3496 | | jbe <1 | ||
3497 | | cleartp GCOBJ:RA | ||
3498 | | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) | ||
3499 | | jz <1 | ||
3500 | | // Crossed a write barrier. Move the barrier forward. | ||
3501 | |.if not X64WIN | ||
3502 | | mov CARG2, RB | ||
3503 | | mov RB, BASE // Save BASE. | ||
3504 | |.else | ||
3505 | | xchg CARG2, RB // Save BASE (CARG2 == BASE). | ||
3506 | |.endif | ||
3507 | | lea GL:CARG1, [DISPATCH+GG_DISP2G] | ||
3508 | | call extern lj_gc_barrieruv // (global_State *g, TValue *tv) | ||
3509 | | mov BASE, RB // Restore BASE. | ||
3510 | | jmp <1 | ||
3511 | break; | ||
3512 | #undef TV2MARKOFS | ||
3513 | case BC_USETS: | ||
3514 | | ins_AND // RA = upvalue #, RD = str const (~) | ||
3515 | | mov LFUNC:RB, [BASE-16] | ||
3516 | | cleartp LFUNC:RB | ||
3517 | | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] | ||
3518 | | mov STR:RA, [KBASE+RD*8] | ||
3519 | | mov RD, UPVAL:RB->v | ||
3520 | | settp STR:ITYPE, STR:RA, LJ_TSTR | ||
3521 | | mov [RD], STR:ITYPE | ||
3522 | | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) | ||
3523 | | jnz >2 | ||
3524 | |1: | ||
3525 | | ins_next | ||
3526 | | | ||
3527 | |2: // Check if string is white and ensure upvalue is closed. | ||
3528 | | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) | ||
3529 | | jz <1 | ||
3530 | | cmp byte UPVAL:RB->closed, 0 | ||
3531 | | jz <1 | ||
3532 | | // Crossed a write barrier. Move the barrier forward. | ||
3533 | | mov RB, BASE // Save BASE (CARG2 == BASE). | ||
3534 | | mov CARG2, RD | ||
3535 | | lea GL:CARG1, [DISPATCH+GG_DISP2G] | ||
3536 | | call extern lj_gc_barrieruv // (global_State *g, TValue *tv) | ||
3537 | | mov BASE, RB // Restore BASE. | ||
3538 | | jmp <1 | ||
3539 | break; | ||
3540 | case BC_USETN: | ||
3541 | | ins_AD // RA = upvalue #, RD = num const | ||
3542 | | mov LFUNC:RB, [BASE-16] | ||
3543 | | cleartp LFUNC:RB | ||
3544 | | movsd xmm0, qword [KBASE+RD*8] | ||
3545 | | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] | ||
3546 | | mov RA, UPVAL:RB->v | ||
3547 | | movsd qword [RA], xmm0 | ||
3548 | | ins_next | ||
3549 | break; | ||
3550 | case BC_USETP: | ||
3551 | | ins_AD // RA = upvalue #, RD = primitive type (~) | ||
3552 | | mov LFUNC:RB, [BASE-16] | ||
3553 | | cleartp LFUNC:RB | ||
3554 | | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] | ||
3555 | | shl RD, 47 | ||
3556 | | not RD | ||
3557 | | mov RA, UPVAL:RB->v | ||
3558 | | mov [RA], RD | ||
3559 | | ins_next | ||
3560 | break; | ||
3561 | case BC_UCLO: | ||
3562 | | ins_AD // RA = level, RD = target | ||
3563 | | branchPC RD // Do this first to free RD. | ||
3564 | | mov L:RB, SAVE_L | ||
3565 | | cmp aword L:RB->openupval, 0 | ||
3566 | | je >1 | ||
3567 | | mov L:RB->base, BASE | ||
3568 | | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE | ||
3569 | | mov L:CARG1, L:RB // Caveat: CARG1 == RA | ||
3570 | | call extern lj_func_closeuv // (lua_State *L, TValue *level) | ||
3571 | | mov BASE, L:RB->base | ||
3572 | |1: | ||
3573 | | ins_next | ||
3574 | break; | ||
3575 | |||
3576 | case BC_FNEW: | ||
3577 | | ins_AND // RA = dst, RD = proto const (~) (holding function prototype) | ||
3578 | | mov L:RB, SAVE_L | ||
3579 | | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE. | ||
3580 | | mov CARG3, [BASE-16] | ||
3581 | | cleartp CARG3 | ||
3582 | | mov CARG2, [KBASE+RD*8] // Fetch GCproto *. | ||
3583 | | mov CARG1, L:RB | ||
3584 | | mov SAVE_PC, PC | ||
3585 | | // (lua_State *L, GCproto *pt, GCfuncL *parent) | ||
3586 | | call extern lj_func_newL_gc | ||
3587 | | // GCfuncL * returned in eax (RC). | ||
3588 | | mov BASE, L:RB->base | ||
3589 | | movzx RAd, PC_RA | ||
3590 | | settp LFUNC:RC, LJ_TFUNC | ||
3591 | | mov [BASE+RA*8], LFUNC:RC | ||
3592 | | ins_next | ||
3593 | break; | ||
3594 | |||
3595 | /* -- Table ops --------------------------------------------------------- */ | ||
3596 | |||
3597 | case BC_TNEW: | ||
3598 | | ins_AD // RA = dst, RD = hbits|asize | ||
3599 | | mov L:RB, SAVE_L | ||
3600 | | mov L:RB->base, BASE | ||
3601 | | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] | ||
3602 | | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] | ||
3603 | | mov SAVE_PC, PC | ||
3604 | | jae >5 | ||
3605 | |1: | ||
3606 | | mov CARG3d, RDd | ||
3607 | | and RDd, 0x7ff | ||
3608 | | shr CARG3d, 11 | ||
3609 | | cmp RDd, 0x7ff | ||
3610 | | je >3 | ||
3611 | |2: | ||
3612 | | mov L:CARG1, L:RB | ||
3613 | | mov CARG2d, RDd | ||
3614 | | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) | ||
3615 | | // Table * returned in eax (RC). | ||
3616 | | mov BASE, L:RB->base | ||
3617 | | movzx RAd, PC_RA | ||
3618 | | settp TAB:RC, LJ_TTAB | ||
3619 | | mov [BASE+RA*8], TAB:RC | ||
3620 | | ins_next | ||
3621 | |3: // Turn 0x7ff into 0x801. | ||
3622 | | mov RDd, 0x801 | ||
3623 | | jmp <2 | ||
3624 | |5: | ||
3625 | | mov L:CARG1, L:RB | ||
3626 | | call extern lj_gc_step_fixtop // (lua_State *L) | ||
3627 | | movzx RDd, PC_RD | ||
3628 | | jmp <1 | ||
3629 | break; | ||
3630 | case BC_TDUP: | ||
3631 | | ins_AND // RA = dst, RD = table const (~) (holding template table) | ||
3632 | | mov L:RB, SAVE_L | ||
3633 | | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] | ||
3634 | | mov SAVE_PC, PC | ||
3635 | | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] | ||
3636 | | mov L:RB->base, BASE | ||
3637 | | jae >3 | ||
3638 | |2: | ||
3639 | | mov TAB:CARG2, [KBASE+RD*8] // Caveat: CARG2 == BASE | ||
3640 | | mov L:CARG1, L:RB // Caveat: CARG1 == RA | ||
3641 | | call extern lj_tab_dup // (lua_State *L, Table *kt) | ||
3642 | | // Table * returned in eax (RC). | ||
3643 | | mov BASE, L:RB->base | ||
3644 | | movzx RAd, PC_RA | ||
3645 | | settp TAB:RC, LJ_TTAB | ||
3646 | | mov [BASE+RA*8], TAB:RC | ||
3647 | | ins_next | ||
3648 | |3: | ||
3649 | | mov L:CARG1, L:RB | ||
3650 | | call extern lj_gc_step_fixtop // (lua_State *L) | ||
3651 | | movzx RDd, PC_RD // Need to reload RD. | ||
3652 | | not RD | ||
3653 | | jmp <2 | ||
3654 | break; | ||
3655 | |||
3656 | case BC_GGET: | ||
3657 | | ins_AND // RA = dst, RD = str const (~) | ||
3658 | | mov LFUNC:RB, [BASE-16] | ||
3659 | | cleartp LFUNC:RB | ||
3660 | | mov TAB:RB, LFUNC:RB->env | ||
3661 | | mov STR:RC, [KBASE+RD*8] | ||
3662 | | jmp ->BC_TGETS_Z | ||
3663 | break; | ||
3664 | case BC_GSET: | ||
3665 | | ins_AND // RA = src, RD = str const (~) | ||
3666 | | mov LFUNC:RB, [BASE-16] | ||
3667 | | cleartp LFUNC:RB | ||
3668 | | mov TAB:RB, LFUNC:RB->env | ||
3669 | | mov STR:RC, [KBASE+RD*8] | ||
3670 | | jmp ->BC_TSETS_Z | ||
3671 | break; | ||
3672 | |||
3673 | case BC_TGETV: | ||
3674 | | ins_ABC // RA = dst, RB = table, RC = key | ||
3675 | | mov TAB:RB, [BASE+RB*8] | ||
3676 | | mov RC, [BASE+RC*8] | ||
3677 | | checktab TAB:RB, ->vmeta_tgetv | ||
3678 | | | ||
3679 | | // Integer key? | ||
3680 | |.if DUALNUM | ||
3681 | | checkint RC, >5 | ||
3682 | |.else | ||
3683 | | // Convert number to int and back and compare. | ||
3684 | | checknum RC, >5 | ||
3685 | | movd xmm0, RC | ||
3686 | | cvttsd2si RCd, xmm0 | ||
3687 | | cvtsi2sd xmm1, RCd | ||
3688 | | ucomisd xmm0, xmm1 | ||
3689 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. | ||
3690 | |.endif | ||
3691 | | cmp RCd, TAB:RB->asize // Takes care of unordered, too. | ||
3692 | | jae ->vmeta_tgetv // Not in array part? Use fallback. | ||
3693 | | shl RCd, 3 | ||
3694 | | add RC, TAB:RB->array | ||
3695 | | // Get array slot. | ||
3696 | | mov ITYPE, [RC] | ||
3697 | | cmp ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath. | ||
3698 | | je >2 | ||
3699 | |1: | ||
3700 | | mov [BASE+RA*8], ITYPE | ||
3701 | | ins_next | ||
3702 | | | ||
3703 | |2: // Check for __index if table value is nil. | ||
3704 | | mov TAB:TMPR, TAB:RB->metatable | ||
3705 | | test TAB:TMPR, TAB:TMPR | ||
3706 | | jz <1 | ||
3707 | | test byte TAB:TMPR->nomm, 1<<MM_index | ||
3708 | | jz ->vmeta_tgetv // 'no __index' flag NOT set: check. | ||
3709 | | jmp <1 | ||
3710 | | | ||
3711 | |5: // String key? | ||
3712 | | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv | ||
3713 | | cleartp STR:RC | ||
3714 | | jmp ->BC_TGETS_Z | ||
3715 | break; | ||
3716 | case BC_TGETS: | ||
3717 | | ins_ABC // RA = dst, RB = table, RC = str const (~) | ||
3718 | | mov TAB:RB, [BASE+RB*8] | ||
3719 | | not RC | ||
3720 | | mov STR:RC, [KBASE+RC*8] | ||
3721 | | checktab TAB:RB, ->vmeta_tgets | ||
3722 | |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr * | ||
3723 | | mov TMPRd, TAB:RB->hmask | ||
3724 | | and TMPRd, STR:RC->sid | ||
3725 | | imul TMPRd, #NODE | ||
3726 | | add NODE:TMPR, TAB:RB->node | ||
3727 | | settp ITYPE, STR:RC, LJ_TSTR | ||
3728 | |1: | ||
3729 | | cmp NODE:TMPR->key, ITYPE | ||
3730 | | jne >4 | ||
3731 | | // Get node value. | ||
3732 | | mov ITYPE, NODE:TMPR->val | ||
3733 | | cmp ITYPE, LJ_TNIL | ||
3734 | | je >5 // Key found, but nil value? | ||
3735 | |2: | ||
3736 | | mov [BASE+RA*8], ITYPE | ||
3737 | | ins_next | ||
3738 | | | ||
3739 | |4: // Follow hash chain. | ||
3740 | | mov NODE:TMPR, NODE:TMPR->next | ||
3741 | | test NODE:TMPR, NODE:TMPR | ||
3742 | | jnz <1 | ||
3743 | | // End of hash chain: key not found, nil result. | ||
3744 | | mov ITYPE, LJ_TNIL | ||
3745 | | | ||
3746 | |5: // Check for __index if table value is nil. | ||
3747 | | mov TAB:TMPR, TAB:RB->metatable | ||
3748 | | test TAB:TMPR, TAB:TMPR | ||
3749 | | jz <2 // No metatable: done. | ||
3750 | | test byte TAB:TMPR->nomm, 1<<MM_index | ||
3751 | | jnz <2 // 'no __index' flag set: done. | ||
3752 | | jmp ->vmeta_tgets // Caveat: preserve STR:RC. | ||
3753 | break; | ||
3754 | case BC_TGETB: | ||
3755 | | ins_ABC // RA = dst, RB = table, RC = byte literal | ||
3756 | | mov TAB:RB, [BASE+RB*8] | ||
3757 | | checktab TAB:RB, ->vmeta_tgetb | ||
3758 | | cmp RCd, TAB:RB->asize | ||
3759 | | jae ->vmeta_tgetb | ||
3760 | | shl RCd, 3 | ||
3761 | | add RC, TAB:RB->array | ||
3762 | | // Get array slot. | ||
3763 | | mov ITYPE, [RC] | ||
3764 | | cmp ITYPE, LJ_TNIL | ||
3765 | | je >2 | ||
3766 | |1: | ||
3767 | | mov [BASE+RA*8], ITYPE | ||
3768 | | ins_next | ||
3769 | | | ||
3770 | |2: // Check for __index if table value is nil. | ||
3771 | | mov TAB:TMPR, TAB:RB->metatable | ||
3772 | | test TAB:TMPR, TAB:TMPR | ||
3773 | | jz <1 | ||
3774 | | test byte TAB:TMPR->nomm, 1<<MM_index | ||
3775 | | jz ->vmeta_tgetb // 'no __index' flag NOT set: check. | ||
3776 | | jmp <1 | ||
3777 | break; | ||
3778 | case BC_TGETR: | ||
3779 | | ins_ABC // RA = dst, RB = table, RC = key | ||
3780 | | mov TAB:RB, [BASE+RB*8] | ||
3781 | | cleartp TAB:RB | ||
3782 | |.if DUALNUM | ||
3783 | | mov RCd, dword [BASE+RC*8] | ||
3784 | |.else | ||
3785 | | cvttsd2si RCd, qword [BASE+RC*8] | ||
3786 | |.endif | ||
3787 | | cmp RCd, TAB:RB->asize | ||
3788 | | jae ->vmeta_tgetr // Not in array part? Use fallback. | ||
3789 | | shl RCd, 3 | ||
3790 | | add RC, TAB:RB->array | ||
3791 | | // Get array slot. | ||
3792 | |->BC_TGETR_Z: | ||
3793 | | mov ITYPE, [RC] | ||
3794 | |->BC_TGETR2_Z: | ||
3795 | | mov [BASE+RA*8], ITYPE | ||
3796 | | ins_next | ||
3797 | break; | ||
3798 | |||
3799 | case BC_TSETV: | ||
3800 | | ins_ABC // RA = src, RB = table, RC = key | ||
3801 | | mov TAB:RB, [BASE+RB*8] | ||
3802 | | mov RC, [BASE+RC*8] | ||
3803 | | checktab TAB:RB, ->vmeta_tsetv | ||
3804 | | | ||
3805 | | // Integer key? | ||
3806 | |.if DUALNUM | ||
3807 | | checkint RC, >5 | ||
3808 | |.else | ||
3809 | | // Convert number to int and back and compare. | ||
3810 | | checknum RC, >5 | ||
3811 | | movd xmm0, RC | ||
3812 | | cvttsd2si RCd, xmm0 | ||
3813 | | cvtsi2sd xmm1, RCd | ||
3814 | | ucomisd xmm0, xmm1 | ||
3815 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. | ||
3816 | |.endif | ||
3817 | | cmp RCd, TAB:RB->asize // Takes care of unordered, too. | ||
3818 | | jae ->vmeta_tsetv | ||
3819 | | shl RCd, 3 | ||
3820 | | add RC, TAB:RB->array | ||
3821 | | cmp aword [RC], LJ_TNIL | ||
3822 | | je >3 // Previous value is nil? | ||
3823 | |1: | ||
3824 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
3825 | | jnz >7 | ||
3826 | |2: // Set array slot. | ||
3827 | | mov RB, [BASE+RA*8] | ||
3828 | | mov [RC], RB | ||
3829 | | ins_next | ||
3830 | | | ||
3831 | |3: // Check for __newindex if previous value is nil. | ||
3832 | | mov TAB:TMPR, TAB:RB->metatable | ||
3833 | | test TAB:TMPR, TAB:TMPR | ||
3834 | | jz <1 | ||
3835 | | test byte TAB:TMPR->nomm, 1<<MM_newindex | ||
3836 | | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check. | ||
3837 | | jmp <1 | ||
3838 | | | ||
3839 | |5: // String key? | ||
3840 | | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv | ||
3841 | | cleartp STR:RC | ||
3842 | | jmp ->BC_TSETS_Z | ||
3843 | | | ||
3844 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3845 | | barrierback TAB:RB, TMPR | ||
3846 | | jmp <2 | ||
3847 | break; | ||
3848 | case BC_TSETS: | ||
3849 | | ins_ABC // RA = src, RB = table, RC = str const (~) | ||
3850 | | mov TAB:RB, [BASE+RB*8] | ||
3851 | | not RC | ||
3852 | | mov STR:RC, [KBASE+RC*8] | ||
3853 | | checktab TAB:RB, ->vmeta_tsets | ||
3854 | |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr * | ||
3855 | | mov TMPRd, TAB:RB->hmask | ||
3856 | | and TMPRd, STR:RC->sid | ||
3857 | | imul TMPRd, #NODE | ||
3858 | | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. | ||
3859 | | add NODE:TMPR, TAB:RB->node | ||
3860 | | settp ITYPE, STR:RC, LJ_TSTR | ||
3861 | |1: | ||
3862 | | cmp NODE:TMPR->key, ITYPE | ||
3863 | | jne >5 | ||
3864 | | // Ok, key found. Assumes: offsetof(Node, val) == 0 | ||
3865 | | cmp aword [TMPR], LJ_TNIL | ||
3866 | | je >4 // Previous value is nil? | ||
3867 | |2: | ||
3868 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
3869 | | jnz >7 | ||
3870 | |3: // Set node value. | ||
3871 | | mov ITYPE, [BASE+RA*8] | ||
3872 | | mov [TMPR], ITYPE | ||
3873 | | ins_next | ||
3874 | | | ||
3875 | |4: // Check for __newindex if previous value is nil. | ||
3876 | | mov TAB:ITYPE, TAB:RB->metatable | ||
3877 | | test TAB:ITYPE, TAB:ITYPE | ||
3878 | | jz <2 | ||
3879 | | test byte TAB:ITYPE->nomm, 1<<MM_newindex | ||
3880 | | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. | ||
3881 | | jmp <2 | ||
3882 | | | ||
3883 | |5: // Follow hash chain. | ||
3884 | | mov NODE:TMPR, NODE:TMPR->next | ||
3885 | | test NODE:TMPR, NODE:TMPR | ||
3886 | | jnz <1 | ||
3887 | | // End of hash chain: key not found, add a new one. | ||
3888 | | | ||
3889 | | // But check for __newindex first. | ||
3890 | | mov TAB:TMPR, TAB:RB->metatable | ||
3891 | | test TAB:TMPR, TAB:TMPR | ||
3892 | | jz >6 // No metatable: continue. | ||
3893 | | test byte TAB:TMPR->nomm, 1<<MM_newindex | ||
3894 | | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. | ||
3895 | |6: | ||
3896 | | mov TMP1, ITYPE | ||
3897 | | mov L:CARG1, SAVE_L | ||
3898 | | mov L:CARG1->base, BASE | ||
3899 | | lea CARG3, TMP1 | ||
3900 | | mov CARG2, TAB:RB | ||
3901 | | mov SAVE_PC, PC | ||
3902 | | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) | ||
3903 | | // Handles write barrier for the new key. TValue * returned in eax (RC). | ||
3904 | | mov L:CARG1, SAVE_L | ||
3905 | | mov BASE, L:CARG1->base | ||
3906 | | mov TMPR, rax | ||
3907 | | movzx RAd, PC_RA | ||
3908 | | jmp <2 // Must check write barrier for value. | ||
3909 | | | ||
3910 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3911 | | barrierback TAB:RB, ITYPE | ||
3912 | | jmp <3 | ||
3913 | break; | ||
3914 | case BC_TSETB: | ||
3915 | | ins_ABC // RA = src, RB = table, RC = byte literal | ||
3916 | | mov TAB:RB, [BASE+RB*8] | ||
3917 | | checktab TAB:RB, ->vmeta_tsetb | ||
3918 | | cmp RCd, TAB:RB->asize | ||
3919 | | jae ->vmeta_tsetb | ||
3920 | | shl RCd, 3 | ||
3921 | | add RC, TAB:RB->array | ||
3922 | | cmp aword [RC], LJ_TNIL | ||
3923 | | je >3 // Previous value is nil? | ||
3924 | |1: | ||
3925 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
3926 | | jnz >7 | ||
3927 | |2: // Set array slot. | ||
3928 | | mov ITYPE, [BASE+RA*8] | ||
3929 | | mov [RC], ITYPE | ||
3930 | | ins_next | ||
3931 | | | ||
3932 | |3: // Check for __newindex if previous value is nil. | ||
3933 | | mov TAB:TMPR, TAB:RB->metatable | ||
3934 | | test TAB:TMPR, TAB:TMPR | ||
3935 | | jz <1 | ||
3936 | | test byte TAB:TMPR->nomm, 1<<MM_newindex | ||
3937 | | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check. | ||
3938 | | jmp <1 | ||
3939 | | | ||
3940 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3941 | | barrierback TAB:RB, TMPR | ||
3942 | | jmp <2 | ||
3943 | break; | ||
3944 | case BC_TSETR: | ||
3945 | | ins_ABC // RA = src, RB = table, RC = key | ||
3946 | | mov TAB:RB, [BASE+RB*8] | ||
3947 | | cleartp TAB:RB | ||
3948 | |.if DUALNUM | ||
3949 | | mov RC, [BASE+RC*8] | ||
3950 | |.else | ||
3951 | | cvttsd2si RCd, qword [BASE+RC*8] | ||
3952 | |.endif | ||
3953 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
3954 | | jnz >7 | ||
3955 | |2: | ||
3956 | | cmp RCd, TAB:RB->asize | ||
3957 | | jae ->vmeta_tsetr | ||
3958 | | shl RCd, 3 | ||
3959 | | add RC, TAB:RB->array | ||
3960 | | // Set array slot. | ||
3961 | |->BC_TSETR_Z: | ||
3962 | | mov ITYPE, [BASE+RA*8] | ||
3963 | | mov [RC], ITYPE | ||
3964 | | ins_next | ||
3965 | | | ||
3966 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
3967 | | barrierback TAB:RB, TMPR | ||
3968 | | jmp <2 | ||
3969 | break; | ||
3970 | |||
3971 | case BC_TSETM: | ||
3972 | | ins_AD // RA = base (table at base-1), RD = num const (start index) | ||
3973 | |1: | ||
3974 | | mov TMPRd, dword [KBASE+RD*8] // Integer constant is in lo-word. | ||
3975 | | lea RA, [BASE+RA*8] | ||
3976 | | mov TAB:RB, [RA-8] // Guaranteed to be a table. | ||
3977 | | cleartp TAB:RB | ||
3978 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
3979 | | jnz >7 | ||
3980 | |2: | ||
3981 | | mov RDd, MULTRES | ||
3982 | | sub RDd, 1 | ||
3983 | | jz >4 // Nothing to copy? | ||
3984 | | add RDd, TMPRd // Compute needed size. | ||
3985 | | cmp RDd, TAB:RB->asize | ||
3986 | | ja >5 // Doesn't fit into array part? | ||
3987 | | sub RDd, TMPRd | ||
3988 | | shl TMPRd, 3 | ||
3989 | | add TMPR, TAB:RB->array | ||
3990 | |3: // Copy result slots to table. | ||
3991 | | mov RB, [RA] | ||
3992 | | add RA, 8 | ||
3993 | | mov [TMPR], RB | ||
3994 | | add TMPR, 8 | ||
3995 | | sub RDd, 1 | ||
3996 | | jnz <3 | ||
3997 | |4: | ||
3998 | | ins_next | ||
3999 | | | ||
4000 | |5: // Need to resize array part. | ||
4001 | | mov L:CARG1, SAVE_L | ||
4002 | | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. | ||
4003 | | mov CARG2, TAB:RB | ||
4004 | | mov CARG3d, RDd | ||
4005 | | mov L:RB, L:CARG1 | ||
4006 | | mov SAVE_PC, PC | ||
4007 | | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) | ||
4008 | | mov BASE, L:RB->base | ||
4009 | | movzx RAd, PC_RA // Restore RA. | ||
4010 | | movzx RDd, PC_RD // Restore RD. | ||
4011 | | jmp <1 // Retry. | ||
4012 | | | ||
4013 | |7: // Possible table write barrier for any value. Skip valiswhite check. | ||
4014 | | barrierback TAB:RB, RD | ||
4015 | | jmp <2 | ||
4016 | break; | ||
4017 | |||
4018 | /* -- Calls and vararg handling ----------------------------------------- */ | ||
4019 | |||
4020 | case BC_CALL: case BC_CALLM: | ||
4021 | | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs | ||
4022 | if (op == BC_CALLM) { | ||
4023 | | add NARGS:RDd, MULTRES | ||
4024 | } | ||
4025 | | mov LFUNC:RB, [BASE+RA*8] | ||
4026 | | checkfunc LFUNC:RB, ->vmeta_call_ra | ||
4027 | | lea BASE, [BASE+RA*8+16] | ||
4028 | | ins_call | ||
4029 | break; | ||
4030 | |||
4031 | case BC_CALLMT: | ||
4032 | | ins_AD // RA = base, RD = extra_nargs | ||
4033 | | add NARGS:RDd, MULTRES | ||
4034 | | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op. | ||
4035 | break; | ||
4036 | case BC_CALLT: | ||
4037 | | ins_AD // RA = base, RD = nargs+1 | ||
4038 | | lea RA, [BASE+RA*8+16] | ||
4039 | | mov KBASE, BASE // Use KBASE for move + vmeta_call hint. | ||
4040 | | mov LFUNC:RB, [RA-16] | ||
4041 | | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call | ||
4042 | |->BC_CALLT_Z: | ||
4043 | | mov PC, [BASE-8] | ||
4044 | | test PCd, FRAME_TYPE | ||
4045 | | jnz >7 | ||
4046 | |1: | ||
4047 | | mov [BASE-16], LFUNC:RB // Copy func+tag down, reloaded below. | ||
4048 | | mov MULTRES, NARGS:RDd | ||
4049 | | sub NARGS:RDd, 1 | ||
4050 | | jz >3 | ||
4051 | |2: // Move args down. | ||
4052 | | mov RB, [RA] | ||
4053 | | add RA, 8 | ||
4054 | | mov [KBASE], RB | ||
4055 | | add KBASE, 8 | ||
4056 | | sub NARGS:RDd, 1 | ||
4057 | | jnz <2 | ||
4058 | | | ||
4059 | | mov LFUNC:RB, [BASE-16] | ||
4060 | |3: | ||
4061 | | cleartp LFUNC:RB | ||
4062 | | mov NARGS:RDd, MULTRES | ||
4063 | | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function? | ||
4064 | | ja >5 | ||
4065 | |4: | ||
4066 | | ins_callt | ||
4067 | | | ||
4068 | |5: // Tailcall to a fast function. | ||
4069 | | test PCd, FRAME_TYPE // Lua frame below? | ||
4070 | | jnz <4 | ||
4071 | | movzx RAd, PC_RA | ||
4072 | | neg RA | ||
4073 | | mov LFUNC:KBASE, [BASE+RA*8-32] // Need to prepare KBASE. | ||
4074 | | cleartp LFUNC:KBASE | ||
4075 | | mov KBASE, LFUNC:KBASE->pc | ||
4076 | | mov KBASE, [KBASE+PC2PROTO(k)] | ||
4077 | | jmp <4 | ||
4078 | | | ||
4079 | |7: // Tailcall from a vararg function. | ||
4080 | | sub PC, FRAME_VARG | ||
4081 | | test PCd, FRAME_TYPEP | ||
4082 | | jnz >8 // Vararg frame below? | ||
4083 | | sub BASE, PC // Need to relocate BASE/KBASE down. | ||
4084 | | mov KBASE, BASE | ||
4085 | | mov PC, [BASE-8] | ||
4086 | | jmp <1 | ||
4087 | |8: | ||
4088 | | add PCd, FRAME_VARG | ||
4089 | | jmp <1 | ||
4090 | break; | ||
4091 | |||
4092 | case BC_ITERC: | ||
4093 | | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) | ||
4094 | | lea RA, [BASE+RA*8+16] // fb = base+2 | ||
4095 | | mov RB, [RA-32] // Copy state. fb[0] = fb[-4]. | ||
4096 | | mov RC, [RA-24] // Copy control var. fb[1] = fb[-3]. | ||
4097 | | mov [RA], RB | ||
4098 | | mov [RA+8], RC | ||
4099 | | mov LFUNC:RB, [RA-40] // Copy callable. fb[-2] = fb[-5] | ||
4100 | | mov [RA-16], LFUNC:RB | ||
4101 | | mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call. | ||
4102 | | checkfunc LFUNC:RB, ->vmeta_call | ||
4103 | | mov BASE, RA | ||
4104 | | ins_call | ||
4105 | break; | ||
4106 | |||
4107 | case BC_ITERN: | ||
4108 | |.if JIT | ||
4109 | | hotloop RBd | ||
4110 | |.endif | ||
4111 | |->vm_IITERN: | ||
4112 | | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
4113 | | mov TAB:RB, [BASE+RA*8-16] | ||
4114 | | cleartp TAB:RB | ||
4115 | | mov RCd, [BASE+RA*8-8] // Get index from control var. | ||
4116 | | mov TMPRd, TAB:RB->asize | ||
4117 | | add PC, 4 | ||
4118 | | mov ITYPE, TAB:RB->array | ||
4119 | |1: // Traverse array part. | ||
4120 | | cmp RCd, TMPRd; jae >5 // Index points after array part? | ||
4121 | | cmp aword [ITYPE+RC*8], LJ_TNIL; je >4 | ||
4122 | |.if not DUALNUM | ||
4123 | | cvtsi2sd xmm0, RCd | ||
4124 | |.endif | ||
4125 | | // Copy array slot to returned value. | ||
4126 | | mov RB, [ITYPE+RC*8] | ||
4127 | | mov [BASE+RA*8+8], RB | ||
4128 | | // Return array index as a numeric key. | ||
4129 | |.if DUALNUM | ||
4130 | | setint ITYPE, RC | ||
4131 | | mov [BASE+RA*8], ITYPE | ||
4132 | |.else | ||
4133 | | movsd qword [BASE+RA*8], xmm0 | ||
4134 | |.endif | ||
4135 | | add RCd, 1 | ||
4136 | | mov [BASE+RA*8-8], RCd // Update control var. | ||
4137 | |2: | ||
4138 | | movzx RDd, PC_RD // Get target from ITERL. | ||
4139 | | branchPC RD | ||
4140 | |3: | ||
4141 | | ins_next | ||
4142 | | | ||
4143 | |4: // Skip holes in array part. | ||
4144 | | add RCd, 1 | ||
4145 | | jmp <1 | ||
4146 | | | ||
4147 | |5: // Traverse hash part. | ||
4148 | | sub RCd, TMPRd | ||
4149 | |6: | ||
4150 | | cmp RCd, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1. | ||
4151 | | imul ITYPEd, RCd, #NODE | ||
4152 | | add NODE:ITYPE, TAB:RB->node | ||
4153 | | cmp aword NODE:ITYPE->val, LJ_TNIL; je >7 | ||
4154 | | lea TMPRd, [RCd+TMPRd+1] | ||
4155 | | // Copy key and value from hash slot. | ||
4156 | | mov RB, NODE:ITYPE->key | ||
4157 | | mov RC, NODE:ITYPE->val | ||
4158 | | mov [BASE+RA*8], RB | ||
4159 | | mov [BASE+RA*8+8], RC | ||
4160 | | mov [BASE+RA*8-8], TMPRd | ||
4161 | | jmp <2 | ||
4162 | | | ||
4163 | |7: // Skip holes in hash part. | ||
4164 | | add RCd, 1 | ||
4165 | | jmp <6 | ||
4166 | break; | ||
4167 | |||
4168 | case BC_ISNEXT: | ||
4169 | | ins_AD // RA = base, RD = target (points to ITERN) | ||
4170 | | mov CFUNC:RB, [BASE+RA*8-24] | ||
4171 | | checkfunc CFUNC:RB, >5 | ||
4172 | | checktptp [BASE+RA*8-16], LJ_TTAB, >5 | ||
4173 | | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5 | ||
4174 | | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 | ||
4175 | | branchPC RD | ||
4176 | | mov64 TMPR, ((uint64_t)LJ_KEYINDEX << 32) | ||
4177 | | mov [BASE+RA*8-8], TMPR // Initialize control var. | ||
4178 | |1: | ||
4179 | | ins_next | ||
4180 | |5: // Despecialize bytecode if any of the checks fail. | ||
4181 | | mov PC_OP, BC_JMP | ||
4182 | | branchPC RD | ||
4183 | |.if JIT | ||
4184 | | cmp byte [PC], BC_ITERN | ||
4185 | | jne >6 | ||
4186 | |.endif | ||
4187 | | mov byte [PC], BC_ITERC | ||
4188 | | jmp <1 | ||
4189 | |.if JIT | ||
4190 | |6: // Unpatch JLOOP. | ||
4191 | | mov RA, [DISPATCH+DISPATCH_J(trace)] | ||
4192 | | movzx RCd, word [PC+2] | ||
4193 | | mov TRACE:RA, [RA+RC*8] | ||
4194 | | mov eax, TRACE:RA->startins | ||
4195 | | mov al, BC_ITERC | ||
4196 | | mov dword [PC], eax | ||
4197 | | jmp <1 | ||
4198 | |.endif | ||
4199 | break; | ||
4200 | |||
4201 | case BC_VARG: | ||
4202 | | ins_ABC // RA = base, RB = nresults+1, RC = numparams | ||
4203 | | lea TMPR, [BASE+RC*8+(16+FRAME_VARG)] | ||
4204 | | lea RA, [BASE+RA*8] | ||
4205 | | sub TMPR, [BASE-8] | ||
4206 | | // Note: TMPR may now be even _above_ BASE if nargs was < numparams. | ||
4207 | | test RB, RB | ||
4208 | | jz >5 // Copy all varargs? | ||
4209 | | lea RB, [RA+RB*8-8] | ||
4210 | | cmp TMPR, BASE // No vararg slots? | ||
4211 | | jnb >2 | ||
4212 | |1: // Copy vararg slots to destination slots. | ||
4213 | | mov RC, [TMPR-16] | ||
4214 | | add TMPR, 8 | ||
4215 | | mov [RA], RC | ||
4216 | | add RA, 8 | ||
4217 | | cmp RA, RB // All destination slots filled? | ||
4218 | | jnb >3 | ||
4219 | | cmp TMPR, BASE // No more vararg slots? | ||
4220 | | jb <1 | ||
4221 | |2: // Fill up remainder with nil. | ||
4222 | | mov aword [RA], LJ_TNIL | ||
4223 | | add RA, 8 | ||
4224 | | cmp RA, RB | ||
4225 | | jb <2 | ||
4226 | |3: | ||
4227 | | ins_next | ||
4228 | | | ||
4229 | |5: // Copy all varargs. | ||
4230 | | mov MULTRES, 1 // MULTRES = 0+1 | ||
4231 | | mov RC, BASE | ||
4232 | | sub RC, TMPR | ||
4233 | | jbe <3 // No vararg slots? | ||
4234 | | mov RBd, RCd | ||
4235 | | shr RBd, 3 | ||
4236 | | add RBd, 1 | ||
4237 | | mov MULTRES, RBd // MULTRES = #varargs+1 | ||
4238 | | mov L:RB, SAVE_L | ||
4239 | | add RC, RA | ||
4240 | | cmp RC, L:RB->maxstack | ||
4241 | | ja >7 // Need to grow stack? | ||
4242 | |6: // Copy all vararg slots. | ||
4243 | | mov RC, [TMPR-16] | ||
4244 | | add TMPR, 8 | ||
4245 | | mov [RA], RC | ||
4246 | | add RA, 8 | ||
4247 | | cmp TMPR, BASE // No more vararg slots? | ||
4248 | | jb <6 | ||
4249 | | jmp <3 | ||
4250 | | | ||
4251 | |7: // Grow stack for varargs. | ||
4252 | | mov L:RB->base, BASE | ||
4253 | | mov L:RB->top, RA | ||
4254 | | mov SAVE_PC, PC | ||
4255 | | sub TMPR, BASE // Need delta, because BASE may change. | ||
4256 | | mov TMP1hi, TMPRd | ||
4257 | | mov CARG2d, MULTRES | ||
4258 | | sub CARG2d, 1 | ||
4259 | | mov CARG1, L:RB | ||
4260 | | call extern lj_state_growstack // (lua_State *L, int n) | ||
4261 | | mov BASE, L:RB->base | ||
4262 | | movsxd TMPR, TMP1hi | ||
4263 | | mov RA, L:RB->top | ||
4264 | | add TMPR, BASE | ||
4265 | | jmp <6 | ||
4266 | break; | ||
4267 | |||
4268 | /* -- Returns ----------------------------------------------------------- */ | ||
4269 | |||
4270 | case BC_RETM: | ||
4271 | | ins_AD // RA = results, RD = extra_nresults | ||
4272 | | add RDd, MULTRES // MULTRES >=1, so RD >=1. | ||
4273 | | // Fall through. Assumes BC_RET follows and ins_AD is a no-op. | ||
4274 | break; | ||
4275 | |||
4276 | case BC_RET: case BC_RET0: case BC_RET1: | ||
4277 | | ins_AD // RA = results, RD = nresults+1 | ||
4278 | if (op != BC_RET0) { | ||
4279 | | shl RAd, 3 | ||
4280 | } | ||
4281 | |1: | ||
4282 | | mov PC, [BASE-8] | ||
4283 | | mov MULTRES, RDd // Save nresults+1. | ||
4284 | | test PCd, FRAME_TYPE // Check frame type marker. | ||
4285 | | jnz >7 // Not returning to a fixarg Lua func? | ||
4286 | switch (op) { | ||
4287 | case BC_RET: | ||
4288 | |->BC_RET_Z: | ||
4289 | | mov KBASE, BASE // Use KBASE for result move. | ||
4290 | | sub RDd, 1 | ||
4291 | | jz >3 | ||
4292 | |2: // Move results down. | ||
4293 | | mov RB, [KBASE+RA] | ||
4294 | | mov [KBASE-16], RB | ||
4295 | | add KBASE, 8 | ||
4296 | | sub RDd, 1 | ||
4297 | | jnz <2 | ||
4298 | |3: | ||
4299 | | mov RDd, MULTRES // Note: MULTRES may be >255. | ||
4300 | | movzx RBd, PC_RB // So cannot compare with RDL! | ||
4301 | |5: | ||
4302 | | cmp RBd, RDd // More results expected? | ||
4303 | | ja >6 | ||
4304 | break; | ||
4305 | case BC_RET1: | ||
4306 | | mov RB, [BASE+RA] | ||
4307 | | mov [BASE-16], RB | ||
4308 | /* fallthrough */ | ||
4309 | case BC_RET0: | ||
4310 | |5: | ||
4311 | | cmp PC_RB, RDL // More results expected? | ||
4312 | | ja >6 | ||
4313 | default: | ||
4314 | break; | ||
4315 | } | ||
4316 | | movzx RAd, PC_RA | ||
4317 | | neg RA | ||
4318 | | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8 | ||
4319 | | mov LFUNC:KBASE, [BASE-16] | ||
4320 | | cleartp LFUNC:KBASE | ||
4321 | | mov KBASE, LFUNC:KBASE->pc | ||
4322 | | mov KBASE, [KBASE+PC2PROTO(k)] | ||
4323 | | ins_next | ||
4324 | | | ||
4325 | |6: // Fill up results with nil. | ||
4326 | if (op == BC_RET) { | ||
4327 | | mov aword [KBASE-16], LJ_TNIL // Note: relies on shifted base. | ||
4328 | | add KBASE, 8 | ||
4329 | } else { | ||
4330 | | mov aword [BASE+RD*8-24], LJ_TNIL | ||
4331 | } | ||
4332 | | add RD, 1 | ||
4333 | | jmp <5 | ||
4334 | | | ||
4335 | |7: // Non-standard return case. | ||
4336 | | lea RB, [PC-FRAME_VARG] | ||
4337 | | test RBd, FRAME_TYPEP | ||
4338 | | jnz ->vm_return | ||
4339 | | // Return from vararg function: relocate BASE down and RA up. | ||
4340 | | sub BASE, RB | ||
4341 | if (op != BC_RET0) { | ||
4342 | | add RA, RB | ||
4343 | } | ||
4344 | | jmp <1 | ||
4345 | break; | ||
4346 | |||
4347 | /* -- Loops and branches ------------------------------------------------ */ | ||
4348 | |||
4349 | |.define FOR_IDX, [RA] | ||
4350 | |.define FOR_STOP, [RA+8] | ||
4351 | |.define FOR_STEP, [RA+16] | ||
4352 | |.define FOR_EXT, [RA+24] | ||
4353 | |||
4354 | case BC_FORL: | ||
4355 | |.if JIT | ||
4356 | | hotloop RBd | ||
4357 | |.endif | ||
4358 | | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. | ||
4359 | break; | ||
4360 | |||
4361 | case BC_JFORI: | ||
4362 | case BC_JFORL: | ||
4363 | #if !LJ_HASJIT | ||
4364 | break; | ||
4365 | #endif | ||
4366 | case BC_FORI: | ||
4367 | case BC_IFORL: | ||
4368 | vk = (op == BC_IFORL || op == BC_JFORL); | ||
4369 | | ins_AJ // RA = base, RD = target (after end of loop or start of loop) | ||
4370 | | lea RA, [BASE+RA*8] | ||
4371 | if (LJ_DUALNUM) { | ||
4372 | | mov RB, FOR_IDX | ||
4373 | | checkint RB, >9 | ||
4374 | | mov TMPR, FOR_STOP | ||
4375 | if (!vk) { | ||
4376 | | checkint TMPR, ->vmeta_for | ||
4377 | | mov ITYPE, FOR_STEP | ||
4378 | | test ITYPEd, ITYPEd; js >5 | ||
4379 | | sar ITYPE, 47; | ||
4380 | | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for | ||
4381 | } else { | ||
4382 | #ifdef LUA_USE_ASSERT | ||
4383 | | checkinttp FOR_STOP, ->assert_bad_for_arg_type | ||
4384 | | checkinttp FOR_STEP, ->assert_bad_for_arg_type | ||
4385 | #endif | ||
4386 | | mov ITYPE, FOR_STEP | ||
4387 | | test ITYPEd, ITYPEd; js >5 | ||
4388 | | add RBd, ITYPEd; jo >1 | ||
4389 | | setint RB | ||
4390 | | mov FOR_IDX, RB | ||
4391 | } | ||
4392 | | cmp RBd, TMPRd | ||
4393 | | mov FOR_EXT, RB | ||
4394 | if (op == BC_FORI) { | ||
4395 | | jle >7 | ||
4396 | |1: | ||
4397 | |6: | ||
4398 | | branchPC RD | ||
4399 | } else if (op == BC_JFORI) { | ||
4400 | | branchPC RD | ||
4401 | | movzx RDd, PC_RD | ||
4402 | | jle =>BC_JLOOP | ||
4403 | |1: | ||
4404 | |6: | ||
4405 | } else if (op == BC_IFORL) { | ||
4406 | | jg >7 | ||
4407 | |6: | ||
4408 | | branchPC RD | ||
4409 | |1: | ||
4410 | } else { | ||
4411 | | jle =>BC_JLOOP | ||
4412 | |1: | ||
4413 | |6: | ||
4414 | } | ||
4415 | |7: | ||
4416 | | ins_next | ||
4417 | | | ||
4418 | |5: // Invert check for negative step. | ||
4419 | if (!vk) { | ||
4420 | | sar ITYPE, 47; | ||
4421 | | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for | ||
4422 | } else { | ||
4423 | | add RBd, ITYPEd; jo <1 | ||
4424 | | setint RB | ||
4425 | | mov FOR_IDX, RB | ||
4426 | } | ||
4427 | | cmp RBd, TMPRd | ||
4428 | | mov FOR_EXT, RB | ||
4429 | if (op == BC_FORI) { | ||
4430 | | jge <7 | ||
4431 | } else if (op == BC_JFORI) { | ||
4432 | | branchPC RD | ||
4433 | | movzx RDd, PC_RD | ||
4434 | | jge =>BC_JLOOP | ||
4435 | } else if (op == BC_IFORL) { | ||
4436 | | jl <7 | ||
4437 | } else { | ||
4438 | | jge =>BC_JLOOP | ||
4439 | } | ||
4440 | | jmp <6 | ||
4441 | |9: // Fallback to FP variant. | ||
4442 | if (!vk) { | ||
4443 | | jae ->vmeta_for | ||
4444 | } | ||
4445 | } else if (!vk) { | ||
4446 | | checknumtp FOR_IDX, ->vmeta_for | ||
4447 | } | ||
4448 | if (!vk) { | ||
4449 | | checknumtp FOR_STOP, ->vmeta_for | ||
4450 | } else { | ||
4451 | #ifdef LUA_USE_ASSERT | ||
4452 | | checknumtp FOR_STOP, ->assert_bad_for_arg_type | ||
4453 | | checknumtp FOR_STEP, ->assert_bad_for_arg_type | ||
4454 | #endif | ||
4455 | } | ||
4456 | | mov RB, FOR_STEP | ||
4457 | if (!vk) { | ||
4458 | | checknum RB, ->vmeta_for | ||
4459 | } | ||
4460 | | movsd xmm0, qword FOR_IDX | ||
4461 | | movsd xmm1, qword FOR_STOP | ||
4462 | if (vk) { | ||
4463 | | addsd xmm0, qword FOR_STEP | ||
4464 | | movsd qword FOR_IDX, xmm0 | ||
4465 | | test RB, RB; js >3 | ||
4466 | } else { | ||
4467 | | jl >3 | ||
4468 | } | ||
4469 | | ucomisd xmm1, xmm0 | ||
4470 | |1: | ||
4471 | | movsd qword FOR_EXT, xmm0 | ||
4472 | if (op == BC_FORI) { | ||
4473 | |.if DUALNUM | ||
4474 | | jnb <7 | ||
4475 | |.else | ||
4476 | | jnb >2 | ||
4477 | | branchPC RD | ||
4478 | |.endif | ||
4479 | } else if (op == BC_JFORI) { | ||
4480 | | branchPC RD | ||
4481 | | movzx RDd, PC_RD | ||
4482 | | jnb =>BC_JLOOP | ||
4483 | } else if (op == BC_IFORL) { | ||
4484 | |.if DUALNUM | ||
4485 | | jb <7 | ||
4486 | |.else | ||
4487 | | jb >2 | ||
4488 | | branchPC RD | ||
4489 | |.endif | ||
4490 | } else { | ||
4491 | | jnb =>BC_JLOOP | ||
4492 | } | ||
4493 | |.if DUALNUM | ||
4494 | | jmp <6 | ||
4495 | |.else | ||
4496 | |2: | ||
4497 | | ins_next | ||
4498 | |.endif | ||
4499 | | | ||
4500 | |3: // Invert comparison if step is negative. | ||
4501 | | ucomisd xmm0, xmm1 | ||
4502 | | jmp <1 | ||
4503 | break; | ||
4504 | |||
4505 | case BC_ITERL: | ||
4506 | |.if JIT | ||
4507 | | hotloop RBd | ||
4508 | |.endif | ||
4509 | | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. | ||
4510 | break; | ||
4511 | |||
4512 | case BC_JITERL: | ||
4513 | #if !LJ_HASJIT | ||
4514 | break; | ||
4515 | #endif | ||
4516 | case BC_IITERL: | ||
4517 | | ins_AJ // RA = base, RD = target | ||
4518 | | lea RA, [BASE+RA*8] | ||
4519 | | mov RB, [RA] | ||
4520 | | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil. | ||
4521 | if (op == BC_JITERL) { | ||
4522 | | mov [RA-8], RB | ||
4523 | | jmp =>BC_JLOOP | ||
4524 | } else { | ||
4525 | | branchPC RD // Otherwise save control var + branch. | ||
4526 | | mov [RA-8], RB | ||
4527 | } | ||
4528 | |1: | ||
4529 | | ins_next | ||
4530 | break; | ||
4531 | |||
4532 | case BC_LOOP: | ||
4533 | | ins_A // RA = base, RD = target (loop extent) | ||
4534 | | // Note: RA/RD is only used by trace recorder to determine scope/extent | ||
4535 | | // This opcode does NOT jump, it's only purpose is to detect a hot loop. | ||
4536 | |.if JIT | ||
4537 | | hotloop RBd | ||
4538 | |.endif | ||
4539 | | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. | ||
4540 | break; | ||
4541 | |||
4542 | case BC_ILOOP: | ||
4543 | | ins_A // RA = base, RD = target (loop extent) | ||
4544 | | ins_next | ||
4545 | break; | ||
4546 | |||
4547 | case BC_JLOOP: | ||
4548 | |.if JIT | ||
4549 | | ins_AD // RA = base (ignored), RD = traceno | ||
4550 | | mov RA, [DISPATCH+DISPATCH_J(trace)] | ||
4551 | | mov TRACE:RD, [RA+RD*8] | ||
4552 | | mov RD, TRACE:RD->mcode | ||
4553 | | mov L:RB, SAVE_L | ||
4554 | | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE | ||
4555 | | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB | ||
4556 | | // Save additional callee-save registers only used in compiled code. | ||
4557 | |.if X64WIN | ||
4558 | | mov CSAVE_4, r12 | ||
4559 | | mov CSAVE_3, r13 | ||
4560 | | mov CSAVE_2, r14 | ||
4561 | | mov CSAVE_1, r15 | ||
4562 | | mov RA, rsp | ||
4563 | | sub rsp, 10*16+4*8 | ||
4564 | | movdqa [RA-1*16], xmm6 | ||
4565 | | movdqa [RA-2*16], xmm7 | ||
4566 | | movdqa [RA-3*16], xmm8 | ||
4567 | | movdqa [RA-4*16], xmm9 | ||
4568 | | movdqa [RA-5*16], xmm10 | ||
4569 | | movdqa [RA-6*16], xmm11 | ||
4570 | | movdqa [RA-7*16], xmm12 | ||
4571 | | movdqa [RA-8*16], xmm13 | ||
4572 | | movdqa [RA-9*16], xmm14 | ||
4573 | | movdqa [RA-10*16], xmm15 | ||
4574 | |.else | ||
4575 | | sub rsp, 16 | ||
4576 | | mov [rsp+16], r12 | ||
4577 | | mov [rsp+8], r13 | ||
4578 | |.endif | ||
4579 | | jmp RD | ||
4580 | |.endif | ||
4581 | break; | ||
4582 | |||
4583 | case BC_JMP: | ||
4584 | | ins_AJ // RA = unused, RD = target | ||
4585 | | branchPC RD | ||
4586 | | ins_next | ||
4587 | break; | ||
4588 | |||
4589 | /* -- Function headers -------------------------------------------------- */ | ||
4590 | |||
4591 | /* | ||
4592 | ** Reminder: A function may be called with func/args above L->maxstack, | ||
4593 | ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, | ||
4594 | ** too. This means all FUNC* ops (including fast functions) must check | ||
4595 | ** for stack overflow _before_ adding more slots! | ||
4596 | */ | ||
4597 | |||
4598 | case BC_FUNCF: | ||
4599 | |.if JIT | ||
4600 | | hotcall RBd | ||
4601 | |.endif | ||
4602 | case BC_FUNCV: /* NYI: compiled vararg functions. */ | ||
4603 | | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. | ||
4604 | break; | ||
4605 | |||
4606 | case BC_JFUNCF: | ||
4607 | #if !LJ_HASJIT | ||
4608 | break; | ||
4609 | #endif | ||
4610 | case BC_IFUNCF: | ||
4611 | | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 | ||
4612 | | mov KBASE, [PC-4+PC2PROTO(k)] | ||
4613 | | mov L:RB, SAVE_L | ||
4614 | | lea RA, [BASE+RA*8] // Top of frame. | ||
4615 | | cmp RA, L:RB->maxstack | ||
4616 | | ja ->vm_growstack_f | ||
4617 | | movzx RAd, byte [PC-4+PC2PROTO(numparams)] | ||
4618 | | cmp NARGS:RDd, RAd // Check for missing parameters. | ||
4619 | | jbe >3 | ||
4620 | |2: | ||
4621 | if (op == BC_JFUNCF) { | ||
4622 | | movzx RDd, PC_RD | ||
4623 | | jmp =>BC_JLOOP | ||
4624 | } else { | ||
4625 | | ins_next | ||
4626 | } | ||
4627 | | | ||
4628 | |3: // Clear missing parameters. | ||
4629 | | mov aword [BASE+NARGS:RD*8-8], LJ_TNIL | ||
4630 | | add NARGS:RDd, 1 | ||
4631 | | cmp NARGS:RDd, RAd | ||
4632 | | jbe <3 | ||
4633 | | jmp <2 | ||
4634 | break; | ||
4635 | |||
4636 | case BC_JFUNCV: | ||
4637 | #if !LJ_HASJIT | ||
4638 | break; | ||
4639 | #endif | ||
4640 | | int3 // NYI: compiled vararg functions | ||
4641 | break; /* NYI: compiled vararg functions. */ | ||
4642 | |||
4643 | case BC_IFUNCV: | ||
4644 | | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 | ||
4645 | | lea RBd, [NARGS:RD*8+FRAME_VARG+8] | ||
4646 | | lea RD, [BASE+NARGS:RD*8+8] | ||
4647 | | mov LFUNC:KBASE, [BASE-16] | ||
4648 | | mov [RD-8], RB // Store delta + FRAME_VARG. | ||
4649 | | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC. | ||
4650 | | mov L:RB, SAVE_L | ||
4651 | | lea RA, [RD+RA*8] | ||
4652 | | cmp RA, L:RB->maxstack | ||
4653 | | ja ->vm_growstack_v // Need to grow stack. | ||
4654 | | mov RA, BASE | ||
4655 | | mov BASE, RD | ||
4656 | | movzx RBd, byte [PC-4+PC2PROTO(numparams)] | ||
4657 | | test RBd, RBd | ||
4658 | | jz >2 | ||
4659 | | add RA, 8 | ||
4660 | |1: // Copy fixarg slots up to new frame. | ||
4661 | | add RA, 8 | ||
4662 | | cmp RA, BASE | ||
4663 | | jnb >3 // Less args than parameters? | ||
4664 | | mov KBASE, [RA-16] | ||
4665 | | mov [RD], KBASE | ||
4666 | | add RD, 8 | ||
4667 | | mov aword [RA-16], LJ_TNIL // Clear old fixarg slot (help the GC). | ||
4668 | | sub RBd, 1 | ||
4669 | | jnz <1 | ||
4670 | |2: | ||
4671 | if (op == BC_JFUNCV) { | ||
4672 | | movzx RDd, PC_RD | ||
4673 | | jmp =>BC_JLOOP | ||
4674 | } else { | ||
4675 | | mov KBASE, [PC-4+PC2PROTO(k)] | ||
4676 | | ins_next | ||
4677 | } | ||
4678 | | | ||
4679 | |3: // Clear missing parameters. | ||
4680 | | mov aword [RD], LJ_TNIL | ||
4681 | | add RD, 8 | ||
4682 | | sub RBd, 1 | ||
4683 | | jnz <3 | ||
4684 | | jmp <2 | ||
4685 | break; | ||
4686 | |||
4687 | case BC_FUNCC: | ||
4688 | case BC_FUNCCW: | ||
4689 | | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1 | ||
4690 | | mov CFUNC:RB, [BASE-16] | ||
4691 | | cleartp CFUNC:RB | ||
4692 | | mov KBASE, CFUNC:RB->f | ||
4693 | | mov L:RB, SAVE_L | ||
4694 | | lea RD, [BASE+NARGS:RD*8-8] | ||
4695 | | mov L:RB->base, BASE | ||
4696 | | lea RA, [RD+8*LUA_MINSTACK] | ||
4697 | | cmp RA, L:RB->maxstack | ||
4698 | | mov L:RB->top, RD | ||
4699 | if (op == BC_FUNCC) { | ||
4700 | | mov CARG1, L:RB // Caveat: CARG1 may be RA. | ||
4701 | } else { | ||
4702 | | mov CARG2, KBASE | ||
4703 | | mov CARG1, L:RB // Caveat: CARG1 may be RA. | ||
4704 | } | ||
4705 | | ja ->vm_growstack_c // Need to grow stack. | ||
4706 | | set_vmstate C | ||
4707 | if (op == BC_FUNCC) { | ||
4708 | | call KBASE // (lua_State *L) | ||
4709 | } else { | ||
4710 | | // (lua_State *L, lua_CFunction f) | ||
4711 | | call aword [DISPATCH+DISPATCH_GL(wrapf)] | ||
4712 | } | ||
4713 | | // nresults returned in eax (RD). | ||
4714 | | mov BASE, L:RB->base | ||
4715 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
4716 | | set_vmstate INTERP | ||
4717 | | lea RA, [BASE+RD*8] | ||
4718 | | neg RA | ||
4719 | | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 | ||
4720 | | mov PC, [BASE-8] // Fetch PC of caller. | ||
4721 | | jmp ->vm_returnc | ||
4722 | break; | ||
4723 | |||
4724 | /* ---------------------------------------------------------------------- */ | ||
4725 | |||
4726 | default: | ||
4727 | fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); | ||
4728 | exit(2); | ||
4729 | break; | ||
4730 | } | ||
4731 | } | ||
4732 | |||
4733 | static int build_backend(BuildCtx *ctx) | ||
4734 | { | ||
4735 | int op; | ||
4736 | dasm_growpc(Dst, BC__MAX); | ||
4737 | build_subroutines(ctx); | ||
4738 | |.code_op | ||
4739 | for (op = 0; op < BC__MAX; op++) | ||
4740 | build_ins(ctx, (BCOp)op, op); | ||
4741 | return BC__MAX; | ||
4742 | } | ||
4743 | |||
4744 | /* Emit pseudo frame-info for all assembler functions. */ | ||
4745 | static void emit_asm_debug(BuildCtx *ctx) | ||
4746 | { | ||
4747 | int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); | ||
4748 | switch (ctx->mode) { | ||
4749 | case BUILD_elfasm: | ||
4750 | fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); | ||
4751 | fprintf(ctx->fp, | ||
4752 | ".Lframe0:\n" | ||
4753 | "\t.long .LECIE0-.LSCIE0\n" | ||
4754 | ".LSCIE0:\n" | ||
4755 | "\t.long 0xffffffff\n" | ||
4756 | "\t.byte 0x1\n" | ||
4757 | "\t.string \"\"\n" | ||
4758 | "\t.uleb128 0x1\n" | ||
4759 | "\t.sleb128 -8\n" | ||
4760 | "\t.byte 0x10\n" | ||
4761 | "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" | ||
4762 | "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" | ||
4763 | "\t.align 8\n" | ||
4764 | ".LECIE0:\n\n"); | ||
4765 | fprintf(ctx->fp, | ||
4766 | ".LSFDE0:\n" | ||
4767 | "\t.long .LEFDE0-.LASFDE0\n" | ||
4768 | ".LASFDE0:\n" | ||
4769 | "\t.long .Lframe0\n" | ||
4770 | "\t.quad .Lbegin\n" | ||
4771 | "\t.quad %d\n" | ||
4772 | "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ | ||
4773 | "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ | ||
4774 | "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ | ||
4775 | "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ | ||
4776 | "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ | ||
4777 | #if LJ_NO_UNWIND | ||
4778 | "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */ | ||
4779 | "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */ | ||
4780 | #endif | ||
4781 | "\t.align 8\n" | ||
4782 | ".LEFDE0:\n\n", fcofs, CFRAME_SIZE); | ||
4783 | #if LJ_HASFFI | ||
4784 | fprintf(ctx->fp, | ||
4785 | ".LSFDE1:\n" | ||
4786 | "\t.long .LEFDE1-.LASFDE1\n" | ||
4787 | ".LASFDE1:\n" | ||
4788 | "\t.long .Lframe0\n" | ||
4789 | "\t.quad lj_vm_ffi_call\n" | ||
4790 | "\t.quad %d\n" | ||
4791 | "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ | ||
4792 | "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ | ||
4793 | "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ | ||
4794 | "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ | ||
4795 | "\t.align 8\n" | ||
4796 | ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); | ||
4797 | #endif | ||
4798 | #if !LJ_NO_UNWIND | ||
4799 | #if LJ_TARGET_SOLARIS | ||
4800 | fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); | ||
4801 | #else | ||
4802 | fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); | ||
4803 | #endif | ||
4804 | fprintf(ctx->fp, | ||
4805 | ".Lframe1:\n" | ||
4806 | "\t.long .LECIE1-.LSCIE1\n" | ||
4807 | ".LSCIE1:\n" | ||
4808 | "\t.long 0\n" | ||
4809 | "\t.byte 0x1\n" | ||
4810 | "\t.string \"zPR\"\n" | ||
4811 | "\t.uleb128 0x1\n" | ||
4812 | "\t.sleb128 -8\n" | ||
4813 | "\t.byte 0x10\n" | ||
4814 | "\t.uleb128 6\n" /* augmentation length */ | ||
4815 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
4816 | "\t.long lj_err_unwind_dwarf-.\n" | ||
4817 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
4818 | "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" | ||
4819 | "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" | ||
4820 | "\t.align 8\n" | ||
4821 | ".LECIE1:\n\n"); | ||
4822 | fprintf(ctx->fp, | ||
4823 | ".LSFDE2:\n" | ||
4824 | "\t.long .LEFDE2-.LASFDE2\n" | ||
4825 | ".LASFDE2:\n" | ||
4826 | "\t.long .LASFDE2-.Lframe1\n" | ||
4827 | "\t.long .Lbegin-.\n" | ||
4828 | "\t.long %d\n" | ||
4829 | "\t.uleb128 0\n" /* augmentation length */ | ||
4830 | "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ | ||
4831 | "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ | ||
4832 | "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ | ||
4833 | "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ | ||
4834 | "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ | ||
4835 | "\t.align 8\n" | ||
4836 | ".LEFDE2:\n\n", fcofs, CFRAME_SIZE); | ||
4837 | #if LJ_HASFFI | ||
4838 | fprintf(ctx->fp, | ||
4839 | ".Lframe2:\n" | ||
4840 | "\t.long .LECIE2-.LSCIE2\n" | ||
4841 | ".LSCIE2:\n" | ||
4842 | "\t.long 0\n" | ||
4843 | "\t.byte 0x1\n" | ||
4844 | "\t.string \"zR\"\n" | ||
4845 | "\t.uleb128 0x1\n" | ||
4846 | "\t.sleb128 -8\n" | ||
4847 | "\t.byte 0x10\n" | ||
4848 | "\t.uleb128 1\n" /* augmentation length */ | ||
4849 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
4850 | "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" | ||
4851 | "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" | ||
4852 | "\t.align 8\n" | ||
4853 | ".LECIE2:\n\n"); | ||
4854 | fprintf(ctx->fp, | ||
4855 | ".LSFDE3:\n" | ||
4856 | "\t.long .LEFDE3-.LASFDE3\n" | ||
4857 | ".LASFDE3:\n" | ||
4858 | "\t.long .LASFDE3-.Lframe2\n" | ||
4859 | "\t.long lj_vm_ffi_call-.\n" | ||
4860 | "\t.long %d\n" | ||
4861 | "\t.uleb128 0\n" /* augmentation length */ | ||
4862 | "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ | ||
4863 | "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ | ||
4864 | "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ | ||
4865 | "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ | ||
4866 | "\t.align 8\n" | ||
4867 | ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); | ||
4868 | #endif | ||
4869 | #endif | ||
4870 | break; | ||
4871 | #if !LJ_NO_UNWIND | ||
4872 | /* Mental note: never let Apple design an assembler. | ||
4873 | ** Or a linker. Or a plastic case. But I digress. | ||
4874 | */ | ||
4875 | case BUILD_machasm: { | ||
4876 | #if LJ_HASFFI | ||
4877 | int fcsize = 0; | ||
4878 | #endif | ||
4879 | int i; | ||
4880 | fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); | ||
4881 | fprintf(ctx->fp, | ||
4882 | "EH_frame1:\n" | ||
4883 | "\t.set L$set$x,LECIEX-LSCIEX\n" | ||
4884 | "\t.long L$set$x\n" | ||
4885 | "LSCIEX:\n" | ||
4886 | "\t.long 0\n" | ||
4887 | "\t.byte 0x1\n" | ||
4888 | "\t.ascii \"zPR\\0\"\n" | ||
4889 | "\t.byte 0x1\n" | ||
4890 | "\t.byte 128-8\n" | ||
4891 | "\t.byte 0x10\n" | ||
4892 | "\t.byte 6\n" /* augmentation length */ | ||
4893 | "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ | ||
4894 | "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n" | ||
4895 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
4896 | "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n" | ||
4897 | "\t.byte 0x80+0x10\n\t.byte 0x1\n" | ||
4898 | "\t.align 3\n" | ||
4899 | "LECIEX:\n\n"); | ||
4900 | for (i = 0; i < ctx->nsym; i++) { | ||
4901 | const char *name = ctx->sym[i].name; | ||
4902 | int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs; | ||
4903 | if (size == 0) continue; | ||
4904 | #if LJ_HASFFI | ||
4905 | if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } | ||
4906 | #endif | ||
4907 | fprintf(ctx->fp, | ||
4908 | "%s.eh:\n" | ||
4909 | "LSFDE%d:\n" | ||
4910 | "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" | ||
4911 | "\t.long L$set$%d\n" | ||
4912 | "LASFDE%d:\n" | ||
4913 | "\t.long LASFDE%d-EH_frame1\n" | ||
4914 | "\t.long %s-.\n" | ||
4915 | "\t.long %d\n" | ||
4916 | "\t.byte 0\n" /* augmentation length */ | ||
4917 | "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */ | ||
4918 | "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ | ||
4919 | "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ | ||
4920 | "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */ | ||
4921 | "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */ | ||
4922 | "\t.align 3\n" | ||
4923 | "LEFDE%d:\n\n", | ||
4924 | name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i); | ||
4925 | } | ||
4926 | #if LJ_HASFFI | ||
4927 | if (fcsize) { | ||
4928 | fprintf(ctx->fp, | ||
4929 | "EH_frame2:\n" | ||
4930 | "\t.set L$set$y,LECIEY-LSCIEY\n" | ||
4931 | "\t.long L$set$y\n" | ||
4932 | "LSCIEY:\n" | ||
4933 | "\t.long 0\n" | ||
4934 | "\t.byte 0x1\n" | ||
4935 | "\t.ascii \"zR\\0\"\n" | ||
4936 | "\t.byte 0x1\n" | ||
4937 | "\t.byte 128-8\n" | ||
4938 | "\t.byte 0x10\n" | ||
4939 | "\t.byte 1\n" /* augmentation length */ | ||
4940 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
4941 | "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n" | ||
4942 | "\t.byte 0x80+0x10\n\t.byte 0x1\n" | ||
4943 | "\t.align 3\n" | ||
4944 | "LECIEY:\n\n"); | ||
4945 | fprintf(ctx->fp, | ||
4946 | "_lj_vm_ffi_call.eh:\n" | ||
4947 | "LSFDEY:\n" | ||
4948 | "\t.set L$set$yy,LEFDEY-LASFDEY\n" | ||
4949 | "\t.long L$set$yy\n" | ||
4950 | "LASFDEY:\n" | ||
4951 | "\t.long LASFDEY-EH_frame2\n" | ||
4952 | "\t.long _lj_vm_ffi_call-.\n" | ||
4953 | "\t.long %d\n" | ||
4954 | "\t.byte 0\n" /* augmentation length */ | ||
4955 | "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */ | ||
4956 | "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ | ||
4957 | "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */ | ||
4958 | "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ | ||
4959 | "\t.align 3\n" | ||
4960 | "LEFDEY:\n\n", fcsize); | ||
4961 | } | ||
4962 | #endif | ||
4963 | fprintf(ctx->fp, ".subsections_via_symbols\n"); | ||
4964 | } | ||
4965 | break; | ||
4966 | #endif | ||
4967 | default: /* Difficult for other modes. */ | ||
4968 | break; | ||
4969 | } | ||
4970 | } | ||
4971 | |||
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 8c2740c3..eb56840a 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc | |||
@@ -18,7 +18,6 @@ | |||
18 | | | 18 | | |
19 | |.if P64 | 19 | |.if P64 |
20 | |.define X64, 1 | 20 | |.define X64, 1 |
21 | |.define SSE, 1 | ||
22 | |.if WIN | 21 | |.if WIN |
23 | |.define X64WIN, 1 | 22 | |.define X64WIN, 1 |
24 | |.endif | 23 | |.endif |
@@ -116,24 +115,74 @@ | |||
116 | |.type NODE, Node | 115 | |.type NODE, Node |
117 | |.type NARGS, int | 116 | |.type NARGS, int |
118 | |.type TRACE, GCtrace | 117 | |.type TRACE, GCtrace |
118 | |.type SBUF, SBuf | ||
119 | | | 119 | | |
120 | |// Stack layout while in interpreter. Must match with lj_frame.h. | 120 | |// Stack layout while in interpreter. Must match with lj_frame.h. |
121 | |//----------------------------------------------------------------------- | 121 | |//----------------------------------------------------------------------- |
122 | |.if not X64 // x86 stack layout. | 122 | |.if not X64 // x86 stack layout. |
123 | | | 123 | | |
124 | |.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). | 124 | |.if WIN |
125 | | | ||
126 | |.define CFRAME_SPACE, aword*9 // Delta for esp (see <--). | ||
125 | |.macro saveregs_ | 127 | |.macro saveregs_ |
126 | | push edi; push esi; push ebx | 128 | | push edi; push esi; push ebx |
129 | | push extern lj_err_unwind_win | ||
130 | | fs; push dword [0] | ||
131 | | fs; mov [0], esp | ||
127 | | sub esp, CFRAME_SPACE | 132 | | sub esp, CFRAME_SPACE |
128 | |.endmacro | 133 | |.endmacro |
129 | |.macro saveregs | 134 | |.macro restoreregs |
130 | | push ebp; saveregs_ | 135 | | add esp, CFRAME_SPACE |
136 | | fs; pop dword [0] | ||
137 | | pop edi // Short for esp += 4. | ||
138 | | pop ebx; pop esi; pop edi; pop ebp | ||
139 | |.endmacro | ||
140 | | | ||
141 | |.else | ||
142 | | | ||
143 | |.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). | ||
144 | |.macro saveregs_ | ||
145 | | push edi; push esi; push ebx | ||
146 | | sub esp, CFRAME_SPACE | ||
131 | |.endmacro | 147 | |.endmacro |
132 | |.macro restoreregs | 148 | |.macro restoreregs |
133 | | add esp, CFRAME_SPACE | 149 | | add esp, CFRAME_SPACE |
134 | | pop ebx; pop esi; pop edi; pop ebp | 150 | | pop ebx; pop esi; pop edi; pop ebp |
135 | |.endmacro | 151 | |.endmacro |
136 | | | 152 | | |
153 | |.endif | ||
154 | | | ||
155 | |.macro saveregs | ||
156 | | push ebp; saveregs_ | ||
157 | |.endmacro | ||
158 | | | ||
159 | |.if WIN | ||
160 | |.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only. | ||
161 | |.define SAVE_NRES, aword [esp+aword*18] | ||
162 | |.define SAVE_CFRAME, aword [esp+aword*17] | ||
163 | |.define SAVE_L, aword [esp+aword*16] | ||
164 | |//----- 16 byte aligned, ^^^ arguments from C caller | ||
165 | |.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter. | ||
166 | |.define SAVE_R4, aword [esp+aword*14] | ||
167 | |.define SAVE_R3, aword [esp+aword*13] | ||
168 | |.define SAVE_R2, aword [esp+aword*12] | ||
169 | |//----- 16 byte aligned | ||
170 | |.define SAVE_R1, aword [esp+aword*11] | ||
171 | |.define SEH_FUNC, aword [esp+aword*10] | ||
172 | |.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves. | ||
173 | |.define UNUSED2, aword [esp+aword*8] | ||
174 | |//----- 16 byte aligned | ||
175 | |.define UNUSED1, aword [esp+aword*7] | ||
176 | |.define SAVE_PC, aword [esp+aword*6] | ||
177 | |.define TMP2, aword [esp+aword*5] | ||
178 | |.define TMP1, aword [esp+aword*4] | ||
179 | |//----- 16 byte aligned | ||
180 | |.define ARG4, aword [esp+aword*3] | ||
181 | |.define ARG3, aword [esp+aword*2] | ||
182 | |.define ARG2, aword [esp+aword*1] | ||
183 | |.define ARG1, aword [esp] //<-- esp while in interpreter. | ||
184 | |//----- 16 byte aligned, ^^^ arguments for C callee | ||
185 | |.else | ||
137 | |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. | 186 | |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. |
138 | |.define SAVE_NRES, aword [esp+aword*14] | 187 | |.define SAVE_NRES, aword [esp+aword*14] |
139 | |.define SAVE_CFRAME, aword [esp+aword*13] | 188 | |.define SAVE_CFRAME, aword [esp+aword*13] |
@@ -154,6 +203,7 @@ | |||
154 | |.define ARG2, aword [esp+aword*1] | 203 | |.define ARG2, aword [esp+aword*1] |
155 | |.define ARG1, aword [esp] //<-- esp while in interpreter. | 204 | |.define ARG1, aword [esp] //<-- esp while in interpreter. |
156 | |//----- 16 byte aligned, ^^^ arguments for C callee | 205 | |//----- 16 byte aligned, ^^^ arguments for C callee |
206 | |.endif | ||
157 | | | 207 | | |
158 | |// FPARGx overlaps ARGx and ARG(x+1) on x86. | 208 | |// FPARGx overlaps ARGx and ARG(x+1) on x86. |
159 | |.define FPARG3, qword [esp+qword*1] | 209 | |.define FPARG3, qword [esp+qword*1] |
@@ -389,7 +439,6 @@ | |||
389 | | fpop | 439 | | fpop |
390 | |.endmacro | 440 | |.endmacro |
391 | | | 441 | | |
392 | |.macro fdup; fld st0; .endmacro | ||
393 | |.macro fpop1; fstp st1; .endmacro | 442 | |.macro fpop1; fstp st1; .endmacro |
394 | | | 443 | | |
395 | |// Synthesize SSE FP constants. | 444 | |// Synthesize SSE FP constants. |
@@ -555,6 +604,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
555 | |.else | 604 | |.else |
556 | | mov eax, FCARG2 // Error return status for vm_pcall. | 605 | | mov eax, FCARG2 // Error return status for vm_pcall. |
557 | | mov esp, FCARG1 | 606 | | mov esp, FCARG1 |
607 | |.if WIN | ||
608 | | lea FCARG1, SEH_NEXT | ||
609 | | fs; mov [0], FCARG1 | ||
610 | |.endif | ||
558 | |.endif | 611 | |.endif |
559 | |->vm_unwind_c_eh: // Landing pad for external unwinder. | 612 | |->vm_unwind_c_eh: // Landing pad for external unwinder. |
560 | | mov L:RB, SAVE_L | 613 | | mov L:RB, SAVE_L |
@@ -578,6 +631,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
578 | |.else | 631 | |.else |
579 | | and FCARG1, CFRAME_RAWMASK | 632 | | and FCARG1, CFRAME_RAWMASK |
580 | | mov esp, FCARG1 | 633 | | mov esp, FCARG1 |
634 | |.if WIN | ||
635 | | lea FCARG1, SEH_NEXT | ||
636 | | fs; mov [0], FCARG1 | ||
637 | |.endif | ||
581 | |.endif | 638 | |.endif |
582 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. | 639 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. |
583 | | mov L:RB, SAVE_L | 640 | | mov L:RB, SAVE_L |
@@ -591,6 +648,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
591 | | set_vmstate INTERP | 648 | | set_vmstate INTERP |
592 | | jmp ->vm_returnc // Increments RD/MULTRES and returns. | 649 | | jmp ->vm_returnc // Increments RD/MULTRES and returns. |
593 | | | 650 | | |
651 | |.if WIN and not X64 | ||
652 | |->vm_rtlunwind@16: // Thin layer around RtlUnwind. | ||
653 | | // (void *cframe, void *excptrec, void *unwinder, int errcode) | ||
654 | | mov [esp], FCARG1 // Return value for RtlUnwind. | ||
655 | | push FCARG2 // Exception record for RtlUnwind. | ||
656 | | push 0 // Ignored by RtlUnwind. | ||
657 | | push dword [FCARG1+CFRAME_OFS_SEH] | ||
658 | | call extern RtlUnwind@16 // Violates ABI (clobbers too much). | ||
659 | | mov FCARG1, eax | ||
660 | | mov FCARG2, [esp+4] // errcode (for vm_unwind_c). | ||
661 | | ret // Jump to unwinder. | ||
662 | |.endif | ||
663 | | | ||
594 | |//----------------------------------------------------------------------- | 664 | |//----------------------------------------------------------------------- |
595 | |//-- Grow stack for calls ----------------------------------------------- | 665 | |//-- Grow stack for calls ----------------------------------------------- |
596 | |//----------------------------------------------------------------------- | 666 | |//----------------------------------------------------------------------- |
@@ -646,17 +716,18 @@ static void build_subroutines(BuildCtx *ctx) | |||
646 | | lea KBASEa, [esp+CFRAME_RESUME] | 716 | | lea KBASEa, [esp+CFRAME_RESUME] |
647 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | 717 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. |
648 | | add DISPATCH, GG_G2DISP | 718 | | add DISPATCH, GG_G2DISP |
649 | | mov L:RB->cframe, KBASEa | ||
650 | | mov SAVE_PC, RD // Any value outside of bytecode is ok. | 719 | | mov SAVE_PC, RD // Any value outside of bytecode is ok. |
651 | | mov SAVE_CFRAME, RDa | 720 | | mov SAVE_CFRAME, RDa |
652 | |.if X64 | 721 | |.if X64 |
653 | | mov SAVE_NRES, RD | 722 | | mov SAVE_NRES, RD |
654 | | mov SAVE_ERRF, RD | 723 | | mov SAVE_ERRF, RD |
655 | |.endif | 724 | |.endif |
725 | | mov L:RB->cframe, KBASEa | ||
656 | | cmp byte L:RB->status, RDL | 726 | | cmp byte L:RB->status, RDL |
657 | | je >3 // Initial resume (like a call). | 727 | | je >2 // Initial resume (like a call). |
658 | | | 728 | | |
659 | | // Resume after yield (like a return). | 729 | | // Resume after yield (like a return). |
730 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
660 | | set_vmstate INTERP | 731 | | set_vmstate INTERP |
661 | | mov byte L:RB->status, RDL | 732 | | mov byte L:RB->status, RDL |
662 | | mov BASE, L:RB->base | 733 | | mov BASE, L:RB->base |
@@ -696,20 +767,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
696 | | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! | 767 | | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! |
697 | |.endif | 768 | |.endif |
698 | | | 769 | | |
770 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
699 | | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. | 771 | | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. |
700 | | mov SAVE_CFRAME, KBASEa | 772 | | mov SAVE_CFRAME, KBASEa |
701 | | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. | 773 | | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. |
774 | | add DISPATCH, GG_G2DISP | ||
702 | |.if X64 | 775 | |.if X64 |
703 | | mov L:RB->cframe, rsp | 776 | | mov L:RB->cframe, rsp |
704 | |.else | 777 | |.else |
705 | | mov L:RB->cframe, esp | 778 | | mov L:RB->cframe, esp |
706 | |.endif | 779 | |.endif |
707 | | | 780 | | |
708 | |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype). | 781 | |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). |
709 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | 782 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB |
710 | | add DISPATCH, GG_G2DISP | ||
711 | | | ||
712 | |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype). | ||
713 | | set_vmstate INTERP | 783 | | set_vmstate INTERP |
714 | | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). | 784 | | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). |
715 | | add PC, RA | 785 | | add PC, RA |
@@ -747,14 +817,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
747 | | | 817 | | |
748 | | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). | 818 | | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). |
749 | | sub KBASE, L:RB->top | 819 | | sub KBASE, L:RB->top |
820 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
750 | | mov SAVE_ERRF, 0 // No error function. | 821 | | mov SAVE_ERRF, 0 // No error function. |
751 | | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. | 822 | | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. |
823 | | add DISPATCH, GG_G2DISP | ||
752 | | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). | 824 | | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). |
753 | | | 825 | | |
754 | |.if X64 | 826 | |.if X64 |
755 | | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. | 827 | | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. |
756 | | mov SAVE_CFRAME, KBASEa | 828 | | mov SAVE_CFRAME, KBASEa |
757 | | mov L:RB->cframe, rsp | 829 | | mov L:RB->cframe, rsp |
830 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
758 | | | 831 | | |
759 | | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) | 832 | | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) |
760 | |.else | 833 | |.else |
@@ -765,6 +838,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
765 | | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. | 838 | | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. |
766 | | mov SAVE_CFRAME, KBASE | 839 | | mov SAVE_CFRAME, KBASE |
767 | | mov L:RB->cframe, esp | 840 | | mov L:RB->cframe, esp |
841 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
768 | | | 842 | | |
769 | | call BASE // (lua_State *L, lua_CFunction func, void *ud) | 843 | | call BASE // (lua_State *L, lua_CFunction func, void *ud) |
770 | |.endif | 844 | |.endif |
@@ -872,13 +946,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
872 | |.if DUALNUM | 946 | |.if DUALNUM |
873 | | mov TMP2, LJ_TISNUM | 947 | | mov TMP2, LJ_TISNUM |
874 | | mov TMP1, RC | 948 | | mov TMP1, RC |
875 | |.elif SSE | 949 | |.else |
876 | | cvtsi2sd xmm0, RC | 950 | | cvtsi2sd xmm0, RC |
877 | | movsd TMPQ, xmm0 | 951 | | movsd TMPQ, xmm0 |
878 | |.else | ||
879 | | mov ARG4, RC | ||
880 | | fild ARG4 | ||
881 | | fstp TMPQ | ||
882 | |.endif | 952 | |.endif |
883 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. | 953 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. |
884 | | jmp >1 | 954 | | jmp >1 |
@@ -932,6 +1002,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
932 | | mov NARGS:RD, 2+1 // 2 args for func(t, k). | 1002 | | mov NARGS:RD, 2+1 // 2 args for func(t, k). |
933 | | jmp ->vm_call_dispatch_f | 1003 | | jmp ->vm_call_dispatch_f |
934 | | | 1004 | | |
1005 | |->vmeta_tgetr: | ||
1006 | | mov FCARG1, TAB:RB | ||
1007 | | mov RB, BASE // Save BASE. | ||
1008 | | mov FCARG2, RC // Caveat: FCARG2 == BASE | ||
1009 | | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) | ||
1010 | | // cTValue * or NULL returned in eax (RC). | ||
1011 | | movzx RA, PC_RA | ||
1012 | | mov BASE, RB // Restore BASE. | ||
1013 | | test RC, RC | ||
1014 | | jnz ->BC_TGETR_Z | ||
1015 | | mov dword [BASE+RA*8+4], LJ_TNIL | ||
1016 | | jmp ->BC_TGETR2_Z | ||
1017 | | | ||
935 | |//----------------------------------------------------------------------- | 1018 | |//----------------------------------------------------------------------- |
936 | | | 1019 | | |
937 | |->vmeta_tsets: | 1020 | |->vmeta_tsets: |
@@ -951,13 +1034,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
951 | |.if DUALNUM | 1034 | |.if DUALNUM |
952 | | mov TMP2, LJ_TISNUM | 1035 | | mov TMP2, LJ_TISNUM |
953 | | mov TMP1, RC | 1036 | | mov TMP1, RC |
954 | |.elif SSE | 1037 | |.else |
955 | | cvtsi2sd xmm0, RC | 1038 | | cvtsi2sd xmm0, RC |
956 | | movsd TMPQ, xmm0 | 1039 | | movsd TMPQ, xmm0 |
957 | |.else | ||
958 | | mov ARG4, RC | ||
959 | | fild ARG4 | ||
960 | | fstp TMPQ | ||
961 | |.endif | 1040 | |.endif |
962 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. | 1041 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. |
963 | | jmp >1 | 1042 | | jmp >1 |
@@ -1023,6 +1102,33 @@ static void build_subroutines(BuildCtx *ctx) | |||
1023 | | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). | 1102 | | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). |
1024 | | jmp ->vm_call_dispatch_f | 1103 | | jmp ->vm_call_dispatch_f |
1025 | | | 1104 | | |
1105 | |->vmeta_tsetr: | ||
1106 | |.if X64WIN | ||
1107 | | mov L:CARG1d, SAVE_L | ||
1108 | | mov CARG3d, RC | ||
1109 | | mov L:CARG1d->base, BASE | ||
1110 | | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE. | ||
1111 | |.elif X64 | ||
1112 | | mov L:CARG1d, SAVE_L | ||
1113 | | mov CARG2d, TAB:RB | ||
1114 | | mov L:CARG1d->base, BASE | ||
1115 | | mov RB, BASE // Save BASE. | ||
1116 | | mov CARG3d, RC // Caveat: CARG3d == BASE. | ||
1117 | |.else | ||
1118 | | mov L:RA, SAVE_L | ||
1119 | | mov ARG2, TAB:RB | ||
1120 | | mov RB, BASE // Save BASE. | ||
1121 | | mov ARG3, RC | ||
1122 | | mov ARG1, L:RA | ||
1123 | | mov L:RA->base, BASE | ||
1124 | |.endif | ||
1125 | | mov SAVE_PC, PC | ||
1126 | | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | ||
1127 | | // TValue * returned in eax (RC). | ||
1128 | | movzx RA, PC_RA | ||
1129 | | mov BASE, RB // Restore BASE. | ||
1130 | | jmp ->BC_TSETR_Z | ||
1131 | | | ||
1026 | |//-- Comparison metamethods --------------------------------------------- | 1132 | |//-- Comparison metamethods --------------------------------------------- |
1027 | | | 1133 | | |
1028 | |->vmeta_comp: | 1134 | |->vmeta_comp: |
@@ -1117,6 +1223,26 @@ static void build_subroutines(BuildCtx *ctx) | |||
1117 | | jmp <3 | 1223 | | jmp <3 |
1118 | |.endif | 1224 | |.endif |
1119 | | | 1225 | | |
1226 | |->vmeta_istype: | ||
1227 | |.if X64 | ||
1228 | | mov L:RB, SAVE_L | ||
1229 | | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. | ||
1230 | | mov CARG2d, RA | ||
1231 | | movzx CARG3d, PC_RD | ||
1232 | | mov L:CARG1d, L:RB | ||
1233 | |.else | ||
1234 | | movzx RD, PC_RD | ||
1235 | | mov ARG2, RA | ||
1236 | | mov L:RB, SAVE_L | ||
1237 | | mov ARG3, RD | ||
1238 | | mov ARG1, L:RB | ||
1239 | | mov L:RB->base, BASE | ||
1240 | |.endif | ||
1241 | | mov SAVE_PC, PC | ||
1242 | | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) | ||
1243 | | mov BASE, L:RB->base | ||
1244 | | jmp <6 | ||
1245 | | | ||
1120 | |//-- Arithmetic metamethods --------------------------------------------- | 1246 | |//-- Arithmetic metamethods --------------------------------------------- |
1121 | | | 1247 | | |
1122 | |->vmeta_arith_vno: | 1248 | |->vmeta_arith_vno: |
@@ -1293,19 +1419,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1293 | | cmp NARGS:RD, 2+1; jb ->fff_fallback | 1419 | | cmp NARGS:RD, 2+1; jb ->fff_fallback |
1294 | |.endmacro | 1420 | |.endmacro |
1295 | | | 1421 | | |
1296 | |.macro .ffunc_n, name | ||
1297 | | .ffunc_1 name | ||
1298 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
1299 | | fld qword [BASE] | ||
1300 | |.endmacro | ||
1301 | | | ||
1302 | |.macro .ffunc_n, name, op | ||
1303 | | .ffunc_1 name | ||
1304 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
1305 | | op | ||
1306 | | fld qword [BASE] | ||
1307 | |.endmacro | ||
1308 | | | ||
1309 | |.macro .ffunc_nsse, name, op | 1422 | |.macro .ffunc_nsse, name, op |
1310 | | .ffunc_1 name | 1423 | | .ffunc_1 name |
1311 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 1424 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
@@ -1316,14 +1429,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1316 | | .ffunc_nsse name, movsd | 1429 | | .ffunc_nsse name, movsd |
1317 | |.endmacro | 1430 | |.endmacro |
1318 | | | 1431 | | |
1319 | |.macro .ffunc_nn, name | ||
1320 | | .ffunc_2 name | ||
1321 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
1322 | | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback | ||
1323 | | fld qword [BASE] | ||
1324 | | fld qword [BASE+8] | ||
1325 | |.endmacro | ||
1326 | | | ||
1327 | |.macro .ffunc_nnsse, name | 1432 | |.macro .ffunc_nnsse, name |
1328 | | .ffunc_2 name | 1433 | | .ffunc_2 name |
1329 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 1434 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
@@ -1421,7 +1526,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1421 | | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. | 1526 | | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. |
1422 | | mov [BASE-8], TAB:RB | 1527 | | mov [BASE-8], TAB:RB |
1423 | | mov RA, TAB:RB->hmask | 1528 | | mov RA, TAB:RB->hmask |
1424 | | and RA, STR:RC->hash | 1529 | | and RA, STR:RC->sid |
1425 | | imul RA, #NODE | 1530 | | imul RA, #NODE |
1426 | | add NODE:RA, TAB:RB->node | 1531 | | add NODE:RA, TAB:RB->node |
1427 | |3: // Rearranged logic, because we expect _not_ to find the key. | 1532 | |3: // Rearranged logic, because we expect _not_ to find the key. |
@@ -1529,11 +1634,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1529 | |.else | 1634 | |.else |
1530 | | jae ->fff_fallback | 1635 | | jae ->fff_fallback |
1531 | |.endif | 1636 | |.endif |
1532 | |.if SSE | ||
1533 | | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 | 1637 | | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 |
1534 | |.else | ||
1535 | | fld qword [BASE]; jmp ->fff_resn | ||
1536 | |.endif | ||
1537 | | | 1638 | | |
1538 | |.ffunc_1 tostring | 1639 | |.ffunc_1 tostring |
1539 | | // Only handles the string or number case inline. | 1640 | | // Only handles the string or number case inline. |
@@ -1558,9 +1659,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1558 | |.endif | 1659 | |.endif |
1559 | | mov L:FCARG1, L:RB | 1660 | | mov L:FCARG1, L:RB |
1560 | |.if DUALNUM | 1661 | |.if DUALNUM |
1561 | | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) | 1662 | | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o) |
1562 | |.else | 1663 | |.else |
1563 | | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) | 1664 | | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np) |
1564 | |.endif | 1665 | |.endif |
1565 | | // GCstr returned in eax (RD). | 1666 | | // GCstr returned in eax (RD). |
1566 | | mov BASE, L:RB->base | 1667 | | mov BASE, L:RB->base |
@@ -1572,55 +1673,35 @@ static void build_subroutines(BuildCtx *ctx) | |||
1572 | | je >2 // Missing 2nd arg? | 1673 | | je >2 // Missing 2nd arg? |
1573 | |1: | 1674 | |1: |
1574 | | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback | 1675 | | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback |
1575 | | mov L:RB, SAVE_L | ||
1576 | | mov L:RB->base, BASE // Add frame since C call can throw. | ||
1577 | | mov L:RB->top, BASE // Dummy frame length is ok. | ||
1578 | | mov PC, [BASE-4] | 1676 | | mov PC, [BASE-4] |
1677 | | mov RB, BASE // Save BASE. | ||
1579 | |.if X64WIN | 1678 | |.if X64WIN |
1580 | | lea CARG3d, [BASE+8] | 1679 | | mov CARG1d, [BASE] |
1581 | | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. | 1680 | | lea CARG3d, [BASE-8] |
1582 | | mov CARG1d, L:RB | 1681 | | lea CARG2d, [BASE+8] // Caveat: CARG2d == BASE. |
1583 | |.elif X64 | 1682 | |.elif X64 |
1584 | | mov CARG2d, [BASE] | 1683 | | mov CARG1d, [BASE] |
1585 | | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. | 1684 | | lea CARG2d, [BASE+8] |
1586 | | mov CARG1d, L:RB | 1685 | | lea CARG3d, [BASE-8] // Caveat: CARG3d == BASE. |
1587 | |.else | 1686 | |.else |
1588 | | mov TAB:RD, [BASE] | 1687 | | mov TAB:RD, [BASE] |
1589 | | mov ARG2, TAB:RD | 1688 | | mov ARG1, TAB:RD |
1590 | | mov ARG1, L:RB | ||
1591 | | add BASE, 8 | 1689 | | add BASE, 8 |
1690 | | mov ARG2, BASE | ||
1691 | | sub BASE, 8+8 | ||
1592 | | mov ARG3, BASE | 1692 | | mov ARG3, BASE |
1593 | |.endif | 1693 | |.endif |
1594 | | mov SAVE_PC, PC // Needed for ITERN fallback. | 1694 | | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) |
1595 | | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) | 1695 | | // 1=found, 0=end, -1=error returned in eax (RD). |
1596 | | // Flag returned in eax (RD). | 1696 | | mov BASE, RB // Restore BASE. |
1597 | | mov BASE, L:RB->base | 1697 | | test RD, RD; jg ->fff_res2 // Found key/value. |
1598 | | test RD, RD; jz >3 // End of traversal? | 1698 | | js ->fff_fallback_2 // Invalid key. |
1599 | | // Copy key and value to results. | 1699 | | // End of traversal: return nil. |
1600 | |.if X64 | 1700 | | mov dword [BASE-4], LJ_TNIL |
1601 | | mov RBa, [BASE+8] | 1701 | | jmp ->fff_res1 |
1602 | | mov RDa, [BASE+16] | ||
1603 | | mov [BASE-8], RBa | ||
1604 | | mov [BASE], RDa | ||
1605 | |.else | ||
1606 | | mov RB, [BASE+8] | ||
1607 | | mov RD, [BASE+12] | ||
1608 | | mov [BASE-8], RB | ||
1609 | | mov [BASE-4], RD | ||
1610 | | mov RB, [BASE+16] | ||
1611 | | mov RD, [BASE+20] | ||
1612 | | mov [BASE], RB | ||
1613 | | mov [BASE+4], RD | ||
1614 | |.endif | ||
1615 | |->fff_res2: | ||
1616 | | mov RD, 1+2 | ||
1617 | | jmp ->fff_res | ||
1618 | |2: // Set missing 2nd arg to nil. | 1702 | |2: // Set missing 2nd arg to nil. |
1619 | | mov dword [BASE+12], LJ_TNIL | 1703 | | mov dword [BASE+12], LJ_TNIL |
1620 | | jmp <1 | 1704 | | jmp <1 |
1621 | |3: // End of traversal: return nil. | ||
1622 | | mov dword [BASE-4], LJ_TNIL | ||
1623 | | jmp ->fff_res1 | ||
1624 | | | 1705 | | |
1625 | |.ffunc_1 pairs | 1706 | |.ffunc_1 pairs |
1626 | | mov TAB:RB, [BASE] | 1707 | | mov TAB:RB, [BASE] |
@@ -1651,19 +1732,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1651 | | add RD, 1 | 1732 | | add RD, 1 |
1652 | | mov dword [BASE-4], LJ_TISNUM | 1733 | | mov dword [BASE-4], LJ_TISNUM |
1653 | | mov dword [BASE-8], RD | 1734 | | mov dword [BASE-8], RD |
1654 | |.elif SSE | 1735 | |.else |
1655 | | movsd xmm0, qword [BASE+8] | 1736 | | movsd xmm0, qword [BASE+8] |
1656 | | sseconst_1 xmm1, RBa | 1737 | | sseconst_1 xmm1, RBa |
1657 | | addsd xmm0, xmm1 | 1738 | | addsd xmm0, xmm1 |
1658 | | cvtsd2si RD, xmm0 | 1739 | | cvttsd2si RD, xmm0 |
1659 | | movsd qword [BASE-8], xmm0 | 1740 | | movsd qword [BASE-8], xmm0 |
1660 | |.else | ||
1661 | | fld qword [BASE+8] | ||
1662 | | fld1 | ||
1663 | | faddp st1 | ||
1664 | | fist ARG1 | ||
1665 | | fstp qword [BASE-8] | ||
1666 | | mov RD, ARG1 | ||
1667 | |.endif | 1741 | |.endif |
1668 | | mov TAB:RB, [BASE] | 1742 | | mov TAB:RB, [BASE] |
1669 | | cmp RD, TAB:RB->asize; jae >2 // Not in array part? | 1743 | | cmp RD, TAB:RB->asize; jae >2 // Not in array part? |
@@ -1681,7 +1755,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1681 | | mov [BASE], RB | 1755 | | mov [BASE], RB |
1682 | | mov [BASE+4], RD | 1756 | | mov [BASE+4], RD |
1683 | |.endif | 1757 | |.endif |
1684 | | jmp ->fff_res2 | 1758 | |->fff_res2: |
1759 | | mov RD, 1+2 | ||
1760 | | jmp ->fff_res | ||
1685 | |2: // Check for empty hash part first. Otherwise call C function. | 1761 | |2: // Check for empty hash part first. Otherwise call C function. |
1686 | | cmp dword TAB:RB->hmask, 0; je ->fff_res0 | 1762 | | cmp dword TAB:RB->hmask, 0; je ->fff_res0 |
1687 | | mov FCARG1, TAB:RB | 1763 | | mov FCARG1, TAB:RB |
@@ -1710,12 +1786,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1710 | |.if DUALNUM | 1786 | |.if DUALNUM |
1711 | | mov dword [BASE+12], LJ_TISNUM | 1787 | | mov dword [BASE+12], LJ_TISNUM |
1712 | | mov dword [BASE+8], 0 | 1788 | | mov dword [BASE+8], 0 |
1713 | |.elif SSE | 1789 | |.else |
1714 | | xorps xmm0, xmm0 | 1790 | | xorps xmm0, xmm0 |
1715 | | movsd qword [BASE+8], xmm0 | 1791 | | movsd qword [BASE+8], xmm0 |
1716 | |.else | ||
1717 | | fldz | ||
1718 | | fstp qword [BASE+8] | ||
1719 | |.endif | 1792 | |.endif |
1720 | | mov RD, 1+3 | 1793 | | mov RD, 1+3 |
1721 | | jmp ->fff_res | 1794 | | jmp ->fff_res |
@@ -1822,7 +1895,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1822 | | mov ARG3, RA | 1895 | | mov ARG3, RA |
1823 | |.endif | 1896 | |.endif |
1824 | | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) | 1897 | | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) |
1825 | | set_vmstate INTERP | ||
1826 | | | 1898 | | |
1827 | | mov L:RB, SAVE_L | 1899 | | mov L:RB, SAVE_L |
1828 | |.if X64 | 1900 | |.if X64 |
@@ -1831,6 +1903,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1831 | | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. | 1903 | | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. |
1832 | |.endif | 1904 | |.endif |
1833 | | mov BASE, L:RB->base | 1905 | | mov BASE, L:RB->base |
1906 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
1907 | | set_vmstate INTERP | ||
1908 | | | ||
1834 | | cmp eax, LUA_YIELD | 1909 | | cmp eax, LUA_YIELD |
1835 | | ja >8 | 1910 | | ja >8 |
1836 | |4: | 1911 | |4: |
@@ -1945,12 +2020,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
1945 | |->fff_resi: // Dummy. | 2020 | |->fff_resi: // Dummy. |
1946 | |.endif | 2021 | |.endif |
1947 | | | 2022 | | |
1948 | |.if SSE | ||
1949 | |->fff_resn: | 2023 | |->fff_resn: |
1950 | | mov PC, [BASE-4] | 2024 | | mov PC, [BASE-4] |
1951 | | fstp qword [BASE-8] | 2025 | | fstp qword [BASE-8] |
1952 | | jmp ->fff_res1 | 2026 | | jmp ->fff_res1 |
1953 | |.endif | ||
1954 | | | 2027 | | |
1955 | | .ffunc_1 math_abs | 2028 | | .ffunc_1 math_abs |
1956 | |.if DUALNUM | 2029 | |.if DUALNUM |
@@ -1974,8 +2047,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1974 | |.else | 2047 | |.else |
1975 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 2048 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
1976 | |.endif | 2049 | |.endif |
1977 | | | ||
1978 | |.if SSE | ||
1979 | | movsd xmm0, qword [BASE] | 2050 | | movsd xmm0, qword [BASE] |
1980 | | sseconst_abs xmm1, RDa | 2051 | | sseconst_abs xmm1, RDa |
1981 | | andps xmm0, xmm1 | 2052 | | andps xmm0, xmm1 |
@@ -1983,15 +2054,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1983 | | mov PC, [BASE-4] | 2054 | | mov PC, [BASE-4] |
1984 | | movsd qword [BASE-8], xmm0 | 2055 | | movsd qword [BASE-8], xmm0 |
1985 | | // fallthrough | 2056 | | // fallthrough |
1986 | |.else | ||
1987 | | fld qword [BASE] | ||
1988 | | fabs | ||
1989 | | // fallthrough | ||
1990 | |->fff_resxmm0: // Dummy. | ||
1991 | |->fff_resn: | ||
1992 | | mov PC, [BASE-4] | ||
1993 | | fstp qword [BASE-8] | ||
1994 | |.endif | ||
1995 | | | 2057 | | |
1996 | |->fff_res1: | 2058 | |->fff_res1: |
1997 | | mov RD, 1+1 | 2059 | | mov RD, 1+1 |
@@ -2018,6 +2080,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
2018 | | mov RAa, -8 // Results start at BASE+RA = BASE-8. | 2080 | | mov RAa, -8 // Results start at BASE+RA = BASE-8. |
2019 | | jmp ->vm_return | 2081 | | jmp ->vm_return |
2020 | | | 2082 | | |
2083 | |.if X64 | ||
2084 | |.define fff_resfp, fff_resxmm0 | ||
2085 | |.else | ||
2086 | |.define fff_resfp, fff_resn | ||
2087 | |.endif | ||
2088 | | | ||
2021 | |.macro math_round, func | 2089 | |.macro math_round, func |
2022 | | .ffunc math_ .. func | 2090 | | .ffunc math_ .. func |
2023 | |.if DUALNUM | 2091 | |.if DUALNUM |
@@ -2028,107 +2096,75 @@ static void build_subroutines(BuildCtx *ctx) | |||
2028 | |.else | 2096 | |.else |
2029 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 2097 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
2030 | |.endif | 2098 | |.endif |
2031 | |.if SSE | ||
2032 | | movsd xmm0, qword [BASE] | 2099 | | movsd xmm0, qword [BASE] |
2033 | | call ->vm_ .. func | 2100 | | call ->vm_ .. func .. _sse |
2034 | | .if DUALNUM | 2101 | |.if DUALNUM |
2035 | | cvtsd2si RB, xmm0 | 2102 | | cvttsd2si RB, xmm0 |
2036 | | cmp RB, 0x80000000 | 2103 | | cmp RB, 0x80000000 |
2037 | | jne ->fff_resi | 2104 | | jne ->fff_resi |
2038 | | cvtsi2sd xmm1, RB | 2105 | | cvtsi2sd xmm1, RB |
2039 | | ucomisd xmm0, xmm1 | 2106 | | ucomisd xmm0, xmm1 |
2040 | | jp ->fff_resxmm0 | 2107 | | jp ->fff_resxmm0 |
2041 | | je ->fff_resi | 2108 | | je ->fff_resi |
2042 | | .endif | ||
2043 | | jmp ->fff_resxmm0 | ||
2044 | |.else | ||
2045 | | fld qword [BASE] | ||
2046 | | call ->vm_ .. func | ||
2047 | | .if DUALNUM | ||
2048 | | fist ARG1 | ||
2049 | | mov RB, ARG1 | ||
2050 | | cmp RB, 0x80000000; jne >2 | ||
2051 | | fdup | ||
2052 | | fild ARG1 | ||
2053 | | fcomparepp | ||
2054 | | jp ->fff_resn | ||
2055 | | jne ->fff_resn | ||
2056 | |2: | ||
2057 | | fpop | ||
2058 | | jmp ->fff_resi | ||
2059 | | .else | ||
2060 | | jmp ->fff_resn | ||
2061 | | .endif | ||
2062 | |.endif | 2109 | |.endif |
2110 | | jmp ->fff_resxmm0 | ||
2063 | |.endmacro | 2111 | |.endmacro |
2064 | | | 2112 | | |
2065 | | math_round floor | 2113 | | math_round floor |
2066 | | math_round ceil | 2114 | | math_round ceil |
2067 | | | 2115 | | |
2068 | |.if SSE | ||
2069 | |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 | 2116 | |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 |
2070 | |.else | ||
2071 | |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn | ||
2072 | |.endif | ||
2073 | | | 2117 | | |
2074 | |.ffunc math_log | 2118 | |.ffunc math_log |
2075 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. | 2119 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. |
2076 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 2120 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
2077 | | fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn | 2121 | | movsd xmm0, qword [BASE] |
2078 | | | 2122 | |.if not X64 |
2079 | |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn | 2123 | | movsd FPARG1, xmm0 |
2080 | |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn | 2124 | |.endif |
2081 | | | 2125 | | mov RB, BASE |
2082 | |.ffunc_n math_sin; fsin; jmp ->fff_resn | 2126 | | call extern log |
2083 | |.ffunc_n math_cos; fcos; jmp ->fff_resn | 2127 | | mov BASE, RB |
2084 | |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn | 2128 | | jmp ->fff_resfp |
2085 | | | ||
2086 | |.ffunc_n math_asin | ||
2087 | | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan | ||
2088 | | jmp ->fff_resn | ||
2089 | |.ffunc_n math_acos | ||
2090 | | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan | ||
2091 | | jmp ->fff_resn | ||
2092 | |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn | ||
2093 | | | 2129 | | |
2094 | |.macro math_extern, func | 2130 | |.macro math_extern, func |
2095 | |.if SSE | ||
2096 | | .ffunc_nsse math_ .. func | 2131 | | .ffunc_nsse math_ .. func |
2097 | | .if not X64 | 2132 | |.if not X64 |
2098 | | movsd FPARG1, xmm0 | 2133 | | movsd FPARG1, xmm0 |
2099 | | .endif | ||
2100 | |.else | ||
2101 | | .ffunc_n math_ .. func | ||
2102 | | fstp FPARG1 | ||
2103 | |.endif | 2134 | |.endif |
2104 | | mov RB, BASE | 2135 | | mov RB, BASE |
2105 | | call extern lj_vm_ .. func | 2136 | | call extern func |
2106 | | mov BASE, RB | 2137 | | mov BASE, RB |
2107 | | .if X64 | 2138 | | jmp ->fff_resfp |
2108 | | jmp ->fff_resxmm0 | ||
2109 | | .else | ||
2110 | | jmp ->fff_resn | ||
2111 | | .endif | ||
2112 | |.endmacro | 2139 | |.endmacro |
2113 | | | 2140 | | |
2141 | |.macro math_extern2, func | ||
2142 | | .ffunc_nnsse math_ .. func | ||
2143 | |.if not X64 | ||
2144 | | movsd FPARG1, xmm0 | ||
2145 | | movsd FPARG3, xmm1 | ||
2146 | |.endif | ||
2147 | | mov RB, BASE | ||
2148 | | call extern func | ||
2149 | | mov BASE, RB | ||
2150 | | jmp ->fff_resfp | ||
2151 | |.endmacro | ||
2152 | | | ||
2153 | | math_extern log10 | ||
2154 | | math_extern exp | ||
2155 | | math_extern sin | ||
2156 | | math_extern cos | ||
2157 | | math_extern tan | ||
2158 | | math_extern asin | ||
2159 | | math_extern acos | ||
2160 | | math_extern atan | ||
2114 | | math_extern sinh | 2161 | | math_extern sinh |
2115 | | math_extern cosh | 2162 | | math_extern cosh |
2116 | | math_extern tanh | 2163 | | math_extern tanh |
2164 | | math_extern2 pow | ||
2165 | | math_extern2 atan2 | ||
2166 | | math_extern2 fmod | ||
2117 | | | 2167 | | |
2118 | |->ff_math_deg: | ||
2119 | |.if SSE | ||
2120 | |.ffunc_nsse math_rad | ||
2121 | | mov CFUNC:RB, [BASE-8] | ||
2122 | | mulsd xmm0, qword CFUNC:RB->upvalue[0] | ||
2123 | | jmp ->fff_resxmm0 | ||
2124 | |.else | ||
2125 | |.ffunc_n math_rad | ||
2126 | | mov CFUNC:RB, [BASE-8] | ||
2127 | | fmul qword CFUNC:RB->upvalue[0] | ||
2128 | | jmp ->fff_resn | ||
2129 | |.endif | ||
2130 | | | ||
2131 | |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn | ||
2132 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn | 2168 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn |
2133 | | | 2169 | | |
2134 | |.ffunc_1 math_frexp | 2170 | |.ffunc_1 math_frexp |
@@ -2143,65 +2179,34 @@ static void build_subroutines(BuildCtx *ctx) | |||
2143 | | cmp RB, 0x00200000; jb >4 | 2179 | | cmp RB, 0x00200000; jb >4 |
2144 | |1: | 2180 | |1: |
2145 | | shr RB, 21; sub RB, RC // Extract and unbias exponent. | 2181 | | shr RB, 21; sub RB, RC // Extract and unbias exponent. |
2146 | |.if SSE | ||
2147 | | cvtsi2sd xmm0, RB | 2182 | | cvtsi2sd xmm0, RB |
2148 | |.else | ||
2149 | | mov TMP1, RB; fild TMP1 | ||
2150 | |.endif | ||
2151 | | mov RB, [BASE-4] | 2183 | | mov RB, [BASE-4] |
2152 | | and RB, 0x800fffff // Mask off exponent. | 2184 | | and RB, 0x800fffff // Mask off exponent. |
2153 | | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. | 2185 | | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. |
2154 | | mov [BASE-4], RB | 2186 | | mov [BASE-4], RB |
2155 | |2: | 2187 | |2: |
2156 | |.if SSE | ||
2157 | | movsd qword [BASE], xmm0 | 2188 | | movsd qword [BASE], xmm0 |
2158 | |.else | ||
2159 | | fstp qword [BASE] | ||
2160 | |.endif | ||
2161 | | mov RD, 1+2 | 2189 | | mov RD, 1+2 |
2162 | | jmp ->fff_res | 2190 | | jmp ->fff_res |
2163 | |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. | 2191 | |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. |
2164 | |.if SSE | ||
2165 | | xorps xmm0, xmm0; jmp <2 | 2192 | | xorps xmm0, xmm0; jmp <2 |
2166 | |.else | ||
2167 | | fldz; jmp <2 | ||
2168 | |.endif | ||
2169 | |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. | 2193 | |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. |
2170 | |.if SSE | ||
2171 | | movsd xmm0, qword [BASE] | 2194 | | movsd xmm0, qword [BASE] |
2172 | | sseconst_hi xmm1, RBa, 43500000 // 2^54. | 2195 | | sseconst_hi xmm1, RBa, 43500000 // 2^54. |
2173 | | mulsd xmm0, xmm1 | 2196 | | mulsd xmm0, xmm1 |
2174 | | movsd qword [BASE-8], xmm0 | 2197 | | movsd qword [BASE-8], xmm0 |
2175 | |.else | ||
2176 | | fld qword [BASE] | ||
2177 | | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54 | ||
2178 | | fstp qword [BASE-8] | ||
2179 | |.endif | ||
2180 | | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 | 2198 | | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 |
2181 | | | 2199 | | |
2182 | |.if SSE | ||
2183 | |.ffunc_nsse math_modf | 2200 | |.ffunc_nsse math_modf |
2184 | |.else | ||
2185 | |.ffunc_n math_modf | ||
2186 | |.endif | ||
2187 | | mov RB, [BASE+4] | 2201 | | mov RB, [BASE+4] |
2188 | | mov PC, [BASE-4] | 2202 | | mov PC, [BASE-4] |
2189 | | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? | 2203 | | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? |
2190 | |.if SSE | ||
2191 | | movaps xmm4, xmm0 | 2204 | | movaps xmm4, xmm0 |
2192 | | call ->vm_trunc | 2205 | | call ->vm_trunc_sse |
2193 | | subsd xmm4, xmm0 | 2206 | | subsd xmm4, xmm0 |
2194 | |1: | 2207 | |1: |
2195 | | movsd qword [BASE-8], xmm0 | 2208 | | movsd qword [BASE-8], xmm0 |
2196 | | movsd qword [BASE], xmm4 | 2209 | | movsd qword [BASE], xmm4 |
2197 | |.else | ||
2198 | | fdup | ||
2199 | | call ->vm_trunc | ||
2200 | | fsub st1, st0 | ||
2201 | |1: | ||
2202 | | fstp qword [BASE-8] | ||
2203 | | fstp qword [BASE] | ||
2204 | |.endif | ||
2205 | | mov RC, [BASE-4]; mov RB, [BASE+4] | 2210 | | mov RC, [BASE-4]; mov RB, [BASE+4] |
2206 | | xor RC, RB; js >3 // Need to adjust sign? | 2211 | | xor RC, RB; js >3 // Need to adjust sign? |
2207 | |2: | 2212 | |2: |
@@ -2211,25 +2216,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
2211 | | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. | 2216 | | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. |
2212 | | jmp <2 | 2217 | | jmp <2 |
2213 | |4: | 2218 | |4: |
2214 | |.if SSE | ||
2215 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. | 2219 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. |
2216 | |.else | ||
2217 | | fldz; fxch; jmp <1 // Return +-Inf and +-0. | ||
2218 | |.endif | ||
2219 | | | ||
2220 | |.ffunc_nnr math_fmod | ||
2221 | |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1 | ||
2222 | | fpop1 | ||
2223 | | jmp ->fff_resn | ||
2224 | | | ||
2225 | |.if SSE | ||
2226 | |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0 | ||
2227 | |.else | ||
2228 | |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn | ||
2229 | |.endif | ||
2230 | | | 2220 | | |
2231 | |.macro math_minmax, name, cmovop, fcmovop, sseop | 2221 | |.macro math_minmax, name, cmovop, sseop |
2232 | | .ffunc name | 2222 | | .ffunc_1 name |
2233 | | mov RA, 2 | 2223 | | mov RA, 2 |
2234 | | cmp dword [BASE+4], LJ_TISNUM | 2224 | | cmp dword [BASE+4], LJ_TISNUM |
2235 | |.if DUALNUM | 2225 | |.if DUALNUM |
@@ -2245,12 +2235,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2245 | |3: | 2235 | |3: |
2246 | | ja ->fff_fallback | 2236 | | ja ->fff_fallback |
2247 | | // Convert intermediate result to number and continue below. | 2237 | | // Convert intermediate result to number and continue below. |
2248 | |.if SSE | ||
2249 | | cvtsi2sd xmm0, RB | 2238 | | cvtsi2sd xmm0, RB |
2250 | |.else | ||
2251 | | mov TMP1, RB | ||
2252 | | fild TMP1 | ||
2253 | |.endif | ||
2254 | | jmp >6 | 2239 | | jmp >6 |
2255 | |4: | 2240 | |4: |
2256 | | ja ->fff_fallback | 2241 | | ja ->fff_fallback |
@@ -2258,7 +2243,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
2258 | | jae ->fff_fallback | 2243 | | jae ->fff_fallback |
2259 | |.endif | 2244 | |.endif |
2260 | | | 2245 | | |
2261 | |.if SSE | ||
2262 | | movsd xmm0, qword [BASE] | 2246 | | movsd xmm0, qword [BASE] |
2263 | |5: // Handle numbers or integers. | 2247 | |5: // Handle numbers or integers. |
2264 | | cmp RA, RD; jae ->fff_resxmm0 | 2248 | | cmp RA, RD; jae ->fff_resxmm0 |
@@ -2277,48 +2261,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
2277 | | sseop xmm0, xmm1 | 2261 | | sseop xmm0, xmm1 |
2278 | | add RA, 1 | 2262 | | add RA, 1 |
2279 | | jmp <5 | 2263 | | jmp <5 |
2280 | |.else | ||
2281 | | fld qword [BASE] | ||
2282 | |5: // Handle numbers or integers. | ||
2283 | | cmp RA, RD; jae ->fff_resn | ||
2284 | | cmp dword [BASE+RA*8-4], LJ_TISNUM | ||
2285 | |.if DUALNUM | ||
2286 | | jb >6 | ||
2287 | | ja >9 | ||
2288 | | fild dword [BASE+RA*8-8] | ||
2289 | | jmp >7 | ||
2290 | |.else | ||
2291 | | jae >9 | ||
2292 | |.endif | ||
2293 | |6: | ||
2294 | | fld qword [BASE+RA*8-8] | ||
2295 | |7: | ||
2296 | | fucomi st1; fcmovop st1; fpop1 | ||
2297 | | add RA, 1 | ||
2298 | | jmp <5 | ||
2299 | |.endif | ||
2300 | |.endmacro | 2264 | |.endmacro |
2301 | | | 2265 | | |
2302 | | math_minmax math_min, cmovg, fcmovnbe, minsd | 2266 | | math_minmax math_min, cmovg, minsd |
2303 | | math_minmax math_max, cmovl, fcmovbe, maxsd | 2267 | | math_minmax math_max, cmovl, maxsd |
2304 | |.if not SSE | ||
2305 | |9: | ||
2306 | | fpop; jmp ->fff_fallback | ||
2307 | |.endif | ||
2308 | | | 2268 | | |
2309 | |//-- String library ----------------------------------------------------- | 2269 | |//-- String library ----------------------------------------------------- |
2310 | | | 2270 | | |
2311 | |.ffunc_1 string_len | ||
2312 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | ||
2313 | | mov STR:RB, [BASE] | ||
2314 | |.if DUALNUM | ||
2315 | | mov RB, dword STR:RB->len; jmp ->fff_resi | ||
2316 | |.elif SSE | ||
2317 | | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0 | ||
2318 | |.else | ||
2319 | | fild dword STR:RB->len; jmp ->fff_resn | ||
2320 | |.endif | ||
2321 | | | ||
2322 | |.ffunc string_byte // Only handle the 1-arg case here. | 2271 | |.ffunc string_byte // Only handle the 1-arg case here. |
2323 | | cmp NARGS:RD, 1+1; jne ->fff_fallback | 2272 | | cmp NARGS:RD, 1+1; jne ->fff_fallback |
2324 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2273 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
@@ -2329,10 +2278,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
2329 | | movzx RB, byte STR:RB[1] | 2278 | | movzx RB, byte STR:RB[1] |
2330 | |.if DUALNUM | 2279 | |.if DUALNUM |
2331 | | jmp ->fff_resi | 2280 | | jmp ->fff_resi |
2332 | |.elif SSE | ||
2333 | | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 | ||
2334 | |.else | 2281 | |.else |
2335 | | mov TMP1, RB; fild TMP1; jmp ->fff_resn | 2282 | | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 |
2336 | |.endif | 2283 | |.endif |
2337 | | | 2284 | | |
2338 | |.ffunc string_char // Only handle the 1-arg case here. | 2285 | |.ffunc string_char // Only handle the 1-arg case here. |
@@ -2344,16 +2291,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
2344 | | mov RB, dword [BASE] | 2291 | | mov RB, dword [BASE] |
2345 | | cmp RB, 255; ja ->fff_fallback | 2292 | | cmp RB, 255; ja ->fff_fallback |
2346 | | mov TMP2, RB | 2293 | | mov TMP2, RB |
2347 | |.elif SSE | 2294 | |.else |
2348 | | jae ->fff_fallback | 2295 | | jae ->fff_fallback |
2349 | | cvttsd2si RB, qword [BASE] | 2296 | | cvttsd2si RB, qword [BASE] |
2350 | | cmp RB, 255; ja ->fff_fallback | 2297 | | cmp RB, 255; ja ->fff_fallback |
2351 | | mov TMP2, RB | 2298 | | mov TMP2, RB |
2352 | |.else | ||
2353 | | jae ->fff_fallback | ||
2354 | | fld qword [BASE] | ||
2355 | | fistp TMP2 | ||
2356 | | cmp TMP2, 255; ja ->fff_fallback | ||
2357 | |.endif | 2299 | |.endif |
2358 | |.if X64 | 2300 | |.if X64 |
2359 | | mov TMP3, 1 | 2301 | | mov TMP3, 1 |
@@ -2374,6 +2316,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2374 | |.endif | 2316 | |.endif |
2375 | | mov SAVE_PC, PC | 2317 | | mov SAVE_PC, PC |
2376 | | call extern lj_str_new // (lua_State *L, char *str, size_t l) | 2318 | | call extern lj_str_new // (lua_State *L, char *str, size_t l) |
2319 | |->fff_resstr: | ||
2377 | | // GCstr * returned in eax (RD). | 2320 | | // GCstr * returned in eax (RD). |
2378 | | mov BASE, L:RB->base | 2321 | | mov BASE, L:RB->base |
2379 | | mov PC, [BASE-4] | 2322 | | mov PC, [BASE-4] |
@@ -2391,14 +2334,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
2391 | | jne ->fff_fallback | 2334 | | jne ->fff_fallback |
2392 | | mov RB, dword [BASE+16] | 2335 | | mov RB, dword [BASE+16] |
2393 | | mov TMP2, RB | 2336 | | mov TMP2, RB |
2394 | |.elif SSE | 2337 | |.else |
2395 | | jae ->fff_fallback | 2338 | | jae ->fff_fallback |
2396 | | cvttsd2si RB, qword [BASE+16] | 2339 | | cvttsd2si RB, qword [BASE+16] |
2397 | | mov TMP2, RB | 2340 | | mov TMP2, RB |
2398 | |.else | ||
2399 | | jae ->fff_fallback | ||
2400 | | fld qword [BASE+16] | ||
2401 | | fistp TMP2 | ||
2402 | |.endif | 2341 | |.endif |
2403 | |1: | 2342 | |1: |
2404 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2343 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
@@ -2413,12 +2352,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
2413 | | mov RB, STR:RB->len | 2352 | | mov RB, STR:RB->len |
2414 | |.if DUALNUM | 2353 | |.if DUALNUM |
2415 | | mov RA, dword [BASE+8] | 2354 | | mov RA, dword [BASE+8] |
2416 | |.elif SSE | ||
2417 | | cvttsd2si RA, qword [BASE+8] | ||
2418 | |.else | 2355 | |.else |
2419 | | fld qword [BASE+8] | 2356 | | cvttsd2si RA, qword [BASE+8] |
2420 | | fistp ARG3 | ||
2421 | | mov RA, ARG3 | ||
2422 | |.endif | 2357 | |.endif |
2423 | | mov RC, TMP2 | 2358 | | mov RC, TMP2 |
2424 | | cmp RB, RC // len < end? (unsigned compare) | 2359 | | cmp RB, RC // len < end? (unsigned compare) |
@@ -2462,136 +2397,34 @@ static void build_subroutines(BuildCtx *ctx) | |||
2462 | | xor RC, RC // Zero length. Any ptr in RB is ok. | 2397 | | xor RC, RC // Zero length. Any ptr in RB is ok. |
2463 | | jmp <4 | 2398 | | jmp <4 |
2464 | | | 2399 | | |
2465 | |.ffunc string_rep // Only handle the 1-char case inline. | 2400 | |.macro ffstring_op, name |
2466 | | ffgccheck | 2401 | | .ffunc_1 string_ .. name |
2467 | | cmp NARGS:RD, 2+1; jne ->fff_fallback // Exactly 2 arguments. | ||
2468 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | ||
2469 | | cmp dword [BASE+12], LJ_TISNUM | ||
2470 | | mov STR:RB, [BASE] | ||
2471 | |.if DUALNUM | ||
2472 | | jne ->fff_fallback | ||
2473 | | mov RC, dword [BASE+8] | ||
2474 | |.elif SSE | ||
2475 | | jae ->fff_fallback | ||
2476 | | cvttsd2si RC, qword [BASE+8] | ||
2477 | |.else | ||
2478 | | jae ->fff_fallback | ||
2479 | | fld qword [BASE+8] | ||
2480 | | fistp TMP2 | ||
2481 | | mov RC, TMP2 | ||
2482 | |.endif | ||
2483 | | test RC, RC | ||
2484 | | jle ->fff_emptystr // Count <= 0? (or non-int) | ||
2485 | | cmp dword STR:RB->len, 1 | ||
2486 | | jb ->fff_emptystr // Zero length string? | ||
2487 | | jne ->fff_fallback_2 // Fallback for > 1-char strings. | ||
2488 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2 | ||
2489 | | movzx RA, byte STR:RB[1] | ||
2490 | | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
2491 | |.if X64 | ||
2492 | | mov TMP3, RC | ||
2493 | |.else | ||
2494 | | mov ARG3, RC | ||
2495 | |.endif | ||
2496 | |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). | ||
2497 | | mov [RB], RAL | ||
2498 | | add RB, 1 | ||
2499 | | sub RC, 1 | ||
2500 | | jnz <1 | ||
2501 | | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
2502 | | jmp ->fff_newstr | ||
2503 | | | ||
2504 | |.ffunc_1 string_reverse | ||
2505 | | ffgccheck | 2402 | | ffgccheck |
2506 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2403 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
2507 | | mov STR:RB, [BASE] | 2404 | | mov L:RB, SAVE_L |
2508 | | mov RC, STR:RB->len | 2405 | | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] |
2509 | | test RC, RC | 2406 | | mov L:RB->base, BASE |
2510 | | jz ->fff_emptystr // Zero length string? | 2407 | | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE |
2511 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 | 2408 | | mov RCa, SBUF:FCARG1->b |
2512 | | add RB, #STR | 2409 | | mov SBUF:FCARG1->L, L:RB |
2513 | | mov TMP2, PC // Need another temp register. | 2410 | | mov SBUF:FCARG1->w, RCa |
2514 | |.if X64 | 2411 | | mov SAVE_PC, PC |
2515 | | mov TMP3, RC | 2412 | | call extern lj_buf_putstr_ .. name .. @8 |
2516 | |.else | 2413 | | mov FCARG1, eax |
2517 | | mov ARG3, RC | 2414 | | call extern lj_buf_tostr@4 |
2518 | |.endif | 2415 | | jmp ->fff_resstr |
2519 | | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
2520 | |1: | ||
2521 | | movzx RA, byte [RB] | ||
2522 | | add RB, 1 | ||
2523 | | sub RC, 1 | ||
2524 | | mov [PC+RC], RAL | ||
2525 | | jnz <1 | ||
2526 | | mov RD, PC | ||
2527 | | mov PC, TMP2 | ||
2528 | | jmp ->fff_newstr | ||
2529 | | | ||
2530 | |.macro ffstring_case, name, lo, hi | ||
2531 | | .ffunc_1 name | ||
2532 | | ffgccheck | ||
2533 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | ||
2534 | | mov STR:RB, [BASE] | ||
2535 | | mov RC, STR:RB->len | ||
2536 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 | ||
2537 | | add RB, #STR | ||
2538 | | mov TMP2, PC // Need another temp register. | ||
2539 | |.if X64 | ||
2540 | | mov TMP3, RC | ||
2541 | |.else | ||
2542 | | mov ARG3, RC | ||
2543 | |.endif | ||
2544 | | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
2545 | | jmp >3 | ||
2546 | |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?). | ||
2547 | | movzx RA, byte [RB+RC] | ||
2548 | | cmp RA, lo | ||
2549 | | jb >2 | ||
2550 | | cmp RA, hi | ||
2551 | | ja >2 | ||
2552 | | xor RA, 0x20 | ||
2553 | |2: | ||
2554 | | mov [PC+RC], RAL | ||
2555 | |3: | ||
2556 | | sub RC, 1 | ||
2557 | | jns <1 | ||
2558 | | mov RD, PC | ||
2559 | | mov PC, TMP2 | ||
2560 | | jmp ->fff_newstr | ||
2561 | |.endmacro | 2416 | |.endmacro |
2562 | | | 2417 | | |
2563 | |ffstring_case string_lower, 0x41, 0x5a | 2418 | |ffstring_op reverse |
2564 | |ffstring_case string_upper, 0x61, 0x7a | 2419 | |ffstring_op lower |
2565 | | | 2420 | |ffstring_op upper |
2566 | |//-- Table library ------------------------------------------------------ | ||
2567 | | | ||
2568 | |.ffunc_1 table_getn | ||
2569 | | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback | ||
2570 | | mov RB, BASE // Save BASE. | ||
2571 | | mov TAB:FCARG1, [BASE] | ||
2572 | | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t) | ||
2573 | | // Length of table returned in eax (RD). | ||
2574 | | mov BASE, RB // Restore BASE. | ||
2575 | |.if DUALNUM | ||
2576 | | mov RB, RD; jmp ->fff_resi | ||
2577 | |.elif SSE | ||
2578 | | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0 | ||
2579 | |.else | ||
2580 | | mov ARG1, RD; fild ARG1; jmp ->fff_resn | ||
2581 | |.endif | ||
2582 | | | 2421 | | |
2583 | |//-- Bit library -------------------------------------------------------- | 2422 | |//-- Bit library -------------------------------------------------------- |
2584 | | | 2423 | | |
2585 | |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!). | ||
2586 | | | ||
2587 | |.macro .ffunc_bit, name, kind, fdef | 2424 | |.macro .ffunc_bit, name, kind, fdef |
2588 | | fdef name | 2425 | | fdef name |
2589 | |.if kind == 2 | 2426 | |.if kind == 2 |
2590 | |.if SSE | ||
2591 | | sseconst_tobit xmm1, RBa | 2427 | | sseconst_tobit xmm1, RBa |
2592 | |.else | ||
2593 | | mov TMP1, TOBIT_BIAS | ||
2594 | |.endif | ||
2595 | |.endif | 2428 | |.endif |
2596 | | cmp dword [BASE+4], LJ_TISNUM | 2429 | | cmp dword [BASE+4], LJ_TISNUM |
2597 | |.if DUALNUM | 2430 | |.if DUALNUM |
@@ -2607,24 +2440,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
2607 | |.else | 2440 | |.else |
2608 | | jae ->fff_fallback | 2441 | | jae ->fff_fallback |
2609 | |.endif | 2442 | |.endif |
2610 | |.if SSE | ||
2611 | | movsd xmm0, qword [BASE] | 2443 | | movsd xmm0, qword [BASE] |
2612 | |.if kind < 2 | 2444 | |.if kind < 2 |
2613 | | sseconst_tobit xmm1, RBa | 2445 | | sseconst_tobit xmm1, RBa |
2614 | |.endif | 2446 | |.endif |
2615 | | addsd xmm0, xmm1 | 2447 | | addsd xmm0, xmm1 |
2616 | | movd RB, xmm0 | 2448 | | movd RB, xmm0 |
2617 | |.else | ||
2618 | | fld qword [BASE] | ||
2619 | |.if kind < 2 | ||
2620 | | mov TMP1, TOBIT_BIAS | ||
2621 | |.endif | ||
2622 | | fadd TMP1 | ||
2623 | | fstp FPARG1 | ||
2624 | |.if kind > 0 | ||
2625 | | mov RB, ARG1 | ||
2626 | |.endif | ||
2627 | |.endif | ||
2628 | |2: | 2449 | |2: |
2629 | |.endmacro | 2450 | |.endmacro |
2630 | | | 2451 | | |
@@ -2633,15 +2454,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2633 | |.endmacro | 2454 | |.endmacro |
2634 | | | 2455 | | |
2635 | |.ffunc_bit bit_tobit, 0 | 2456 | |.ffunc_bit bit_tobit, 0 |
2636 | |.if DUALNUM or SSE | ||
2637 | |.if not SSE | ||
2638 | | mov RB, ARG1 | ||
2639 | |.endif | ||
2640 | | jmp ->fff_resbit | 2457 | | jmp ->fff_resbit |
2641 | |.else | ||
2642 | | fild ARG1 | ||
2643 | | jmp ->fff_resn | ||
2644 | |.endif | ||
2645 | | | 2458 | | |
2646 | |.macro .ffunc_bit_op, name, ins | 2459 | |.macro .ffunc_bit_op, name, ins |
2647 | | .ffunc_bit name, 2 | 2460 | | .ffunc_bit name, 2 |
@@ -2661,17 +2474,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
2661 | |.else | 2474 | |.else |
2662 | | jae ->fff_fallback_bit_op | 2475 | | jae ->fff_fallback_bit_op |
2663 | |.endif | 2476 | |.endif |
2664 | |.if SSE | ||
2665 | | movsd xmm0, qword [RD] | 2477 | | movsd xmm0, qword [RD] |
2666 | | addsd xmm0, xmm1 | 2478 | | addsd xmm0, xmm1 |
2667 | | movd RA, xmm0 | 2479 | | movd RA, xmm0 |
2668 | | ins RB, RA | 2480 | | ins RB, RA |
2669 | |.else | ||
2670 | | fld qword [RD] | ||
2671 | | fadd TMP1 | ||
2672 | | fstp FPARG1 | ||
2673 | | ins RB, ARG1 | ||
2674 | |.endif | ||
2675 | | sub RD, 8 | 2481 | | sub RD, 8 |
2676 | | jmp <1 | 2482 | | jmp <1 |
2677 | |.endmacro | 2483 | |.endmacro |
@@ -2688,15 +2494,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
2688 | | not RB | 2494 | | not RB |
2689 | |.if DUALNUM | 2495 | |.if DUALNUM |
2690 | | jmp ->fff_resbit | 2496 | | jmp ->fff_resbit |
2691 | |.elif SSE | 2497 | |.else |
2692 | |->fff_resbit: | 2498 | |->fff_resbit: |
2693 | | cvtsi2sd xmm0, RB | 2499 | | cvtsi2sd xmm0, RB |
2694 | | jmp ->fff_resxmm0 | 2500 | | jmp ->fff_resxmm0 |
2695 | |.else | ||
2696 | |->fff_resbit: | ||
2697 | | mov ARG1, RB | ||
2698 | | fild ARG1 | ||
2699 | | jmp ->fff_resn | ||
2700 | |.endif | 2501 | |.endif |
2701 | | | 2502 | | |
2702 | |->fff_fallback_bit_op: | 2503 | |->fff_fallback_bit_op: |
@@ -2709,22 +2510,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
2709 | | // Note: no inline conversion from number for 2nd argument! | 2510 | | // Note: no inline conversion from number for 2nd argument! |
2710 | | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback | 2511 | | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback |
2711 | | mov RA, dword [BASE+8] | 2512 | | mov RA, dword [BASE+8] |
2712 | |.elif SSE | 2513 | |.else |
2713 | | .ffunc_nnsse name | 2514 | | .ffunc_nnsse name |
2714 | | sseconst_tobit xmm2, RBa | 2515 | | sseconst_tobit xmm2, RBa |
2715 | | addsd xmm0, xmm2 | 2516 | | addsd xmm0, xmm2 |
2716 | | addsd xmm1, xmm2 | 2517 | | addsd xmm1, xmm2 |
2717 | | movd RB, xmm0 | 2518 | | movd RB, xmm0 |
2718 | | movd RA, xmm1 | 2519 | | movd RA, xmm1 |
2719 | |.else | ||
2720 | | .ffunc_nn name | ||
2721 | | mov TMP1, TOBIT_BIAS | ||
2722 | | fadd TMP1 | ||
2723 | | fstp FPARG3 | ||
2724 | | fadd TMP1 | ||
2725 | | fstp FPARG1 | ||
2726 | | mov RA, ARG3 | ||
2727 | | mov RB, ARG1 | ||
2728 | |.endif | 2520 | |.endif |
2729 | | ins RB, cl // Assumes RA is ecx. | 2521 | | ins RB, cl // Assumes RA is ecx. |
2730 | | jmp ->fff_resbit | 2522 | | jmp ->fff_resbit |
@@ -2858,7 +2650,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2858 | | mov FCARG2, PC // Caveat: FCARG2 == BASE | 2650 | | mov FCARG2, PC // Caveat: FCARG2 == BASE |
2859 | | mov FCARG1, L:RB | 2651 | | mov FCARG1, L:RB |
2860 | | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. | 2652 | | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. |
2861 | | call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc) | 2653 | | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc) |
2862 | |3: | 2654 | |3: |
2863 | | mov BASE, L:RB->base | 2655 | | mov BASE, L:RB->base |
2864 | |4: | 2656 | |4: |
@@ -2929,6 +2721,79 @@ static void build_subroutines(BuildCtx *ctx) | |||
2929 | | add NARGS:RD, 1 | 2721 | | add NARGS:RD, 1 |
2930 | | jmp RBa | 2722 | | jmp RBa |
2931 | | | 2723 | | |
2724 | |->cont_stitch: // Trace stitching. | ||
2725 | |.if JIT | ||
2726 | | // BASE = base, RC = result, RB = mbase | ||
2727 | | mov TRACE:RA, [RB-24] // Save previous trace. | ||
2728 | | mov TMP1, TRACE:RA | ||
2729 | | mov TMP3, DISPATCH // Need one more register. | ||
2730 | | mov DISPATCH, MULTRES | ||
2731 | | movzx RA, PC_RA | ||
2732 | | lea RA, [BASE+RA*8] // Call base. | ||
2733 | | sub DISPATCH, 1 | ||
2734 | | jz >2 | ||
2735 | |1: // Move results down. | ||
2736 | |.if X64 | ||
2737 | | mov RBa, [RC] | ||
2738 | | mov [RA], RBa | ||
2739 | |.else | ||
2740 | | mov RB, [RC] | ||
2741 | | mov [RA], RB | ||
2742 | | mov RB, [RC+4] | ||
2743 | | mov [RA+4], RB | ||
2744 | |.endif | ||
2745 | | add RC, 8 | ||
2746 | | add RA, 8 | ||
2747 | | sub DISPATCH, 1 | ||
2748 | | jnz <1 | ||
2749 | |2: | ||
2750 | | movzx RC, PC_RA | ||
2751 | | movzx RB, PC_RB | ||
2752 | | add RC, RB | ||
2753 | | lea RC, [BASE+RC*8-8] | ||
2754 | |3: | ||
2755 | | cmp RC, RA | ||
2756 | | ja >9 // More results wanted? | ||
2757 | | | ||
2758 | | mov DISPATCH, TMP3 | ||
2759 | | mov TRACE:RD, TMP1 // Get previous trace. | ||
2760 | | movzx RB, word TRACE:RD->traceno | ||
2761 | | movzx RD, word TRACE:RD->link | ||
2762 | | cmp RD, RB | ||
2763 | | je ->cont_nop // Blacklisted. | ||
2764 | | test RD, RD | ||
2765 | | jne =>BC_JLOOP // Jump to stitched trace. | ||
2766 | | | ||
2767 | | // Stitch a new trace to the previous trace. | ||
2768 | | mov [DISPATCH+DISPATCH_J(exitno)], RB | ||
2769 | | mov L:RB, SAVE_L | ||
2770 | | mov L:RB->base, BASE | ||
2771 | | mov FCARG2, PC | ||
2772 | | lea FCARG1, [DISPATCH+GG_DISP2J] | ||
2773 | | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa | ||
2774 | | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc) | ||
2775 | | mov BASE, L:RB->base | ||
2776 | | jmp ->cont_nop | ||
2777 | | | ||
2778 | |9: // Fill up results with nil. | ||
2779 | | mov dword [RA+4], LJ_TNIL | ||
2780 | | add RA, 8 | ||
2781 | | jmp <3 | ||
2782 | |.endif | ||
2783 | | | ||
2784 | |->vm_profhook: // Dispatch target for profiler hook. | ||
2785 | #if LJ_HASPROFILE | ||
2786 | | mov L:RB, SAVE_L | ||
2787 | | mov L:RB->base, BASE | ||
2788 | | mov FCARG2, PC // Caveat: FCARG2 == BASE | ||
2789 | | mov FCARG1, L:RB | ||
2790 | | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc) | ||
2791 | | mov BASE, L:RB->base | ||
2792 | | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | ||
2793 | | sub PC, 4 | ||
2794 | | jmp ->cont_nop | ||
2795 | #endif | ||
2796 | | | ||
2932 | |//----------------------------------------------------------------------- | 2797 | |//----------------------------------------------------------------------- |
2933 | |//-- Trace exit handler ------------------------------------------------- | 2798 | |//-- Trace exit handler ------------------------------------------------- |
2934 | |//----------------------------------------------------------------------- | 2799 | |//----------------------------------------------------------------------- |
@@ -2981,10 +2846,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
2981 | | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 | 2846 | | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 |
2982 | |.endif | 2847 | |.endif |
2983 | | // Caveat: RB is ebp. | 2848 | | // Caveat: RB is ebp. |
2984 | | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)] | 2849 | | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] |
2985 | | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] | 2850 | | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] |
2986 | | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa | 2851 | | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa |
2987 | | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 | ||
2988 | | mov L:RB->base, BASE | 2852 | | mov L:RB->base, BASE |
2989 | |.if X64WIN | 2853 | |.if X64WIN |
2990 | | lea CARG2, [rsp+4*8] | 2854 | | lea CARG2, [rsp+4*8] |
@@ -2994,6 +2858,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2994 | | lea FCARG2, [esp+16] | 2858 | | lea FCARG2, [esp+16] |
2995 | |.endif | 2859 | |.endif |
2996 | | lea FCARG1, [DISPATCH+GG_DISP2J] | 2860 | | lea FCARG1, [DISPATCH+GG_DISP2J] |
2861 | | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 | ||
2997 | | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) | 2862 | | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) |
2998 | | // MULTRES or negated error code returned in eax (RD). | 2863 | | // MULTRES or negated error code returned in eax (RD). |
2999 | | mov RAa, L:RB->cframe | 2864 | | mov RAa, L:RB->cframe |
@@ -3040,12 +2905,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
3040 | | mov r13, TMPa | 2905 | | mov r13, TMPa |
3041 | | mov r12, TMPQ | 2906 | | mov r12, TMPQ |
3042 | |.endif | 2907 | |.endif |
3043 | | test RD, RD; js >3 // Check for error from exit. | 2908 | | test RD, RD; js >9 // Check for error from exit. |
2909 | | mov L:RB, SAVE_L | ||
3044 | | mov MULTRES, RD | 2910 | | mov MULTRES, RD |
3045 | | mov LFUNC:KBASE, [BASE-8] | 2911 | | mov LFUNC:KBASE, [BASE-8] |
3046 | | mov KBASE, LFUNC:KBASE->pc | 2912 | | mov KBASE, LFUNC:KBASE->pc |
3047 | | mov KBASE, [KBASE+PC2PROTO(k)] | 2913 | | mov KBASE, [KBASE+PC2PROTO(k)] |
3048 | | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 | 2914 | | mov L:RB->base, BASE |
2915 | | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 | ||
3049 | | set_vmstate INTERP | 2916 | | set_vmstate INTERP |
3050 | | // Modified copy of ins_next which handles function header dispatch, too. | 2917 | | // Modified copy of ins_next which handles function header dispatch, too. |
3051 | | mov RC, [PC] | 2918 | | mov RC, [PC] |
@@ -3054,18 +2921,35 @@ static void build_subroutines(BuildCtx *ctx) | |||
3054 | | add PC, 4 | 2921 | | add PC, 4 |
3055 | | shr RC, 16 | 2922 | | shr RC, 16 |
3056 | | cmp OP, BC_FUNCF // Function header? | 2923 | | cmp OP, BC_FUNCF // Function header? |
3057 | | jb >2 | 2924 | | jb >3 |
3058 | | mov RC, MULTRES // RC/RD holds nres+1. | 2925 | | cmp OP, BC_FUNCC+2 // Fast function? |
2926 | | jae >4 | ||
3059 | |2: | 2927 | |2: |
2928 | | mov RC, MULTRES // RC/RD holds nres+1. | ||
2929 | |3: | ||
3060 | |.if X64 | 2930 | |.if X64 |
3061 | | jmp aword [DISPATCH+OP*8] | 2931 | | jmp aword [DISPATCH+OP*8] |
3062 | |.else | 2932 | |.else |
3063 | | jmp aword [DISPATCH+OP*4] | 2933 | | jmp aword [DISPATCH+OP*4] |
3064 | |.endif | 2934 | |.endif |
3065 | | | 2935 | | |
3066 | |3: // Rethrow error from the right C frame. | 2936 | |4: // Check frame below fast function. |
2937 | | mov RC, [BASE-4] | ||
2938 | | test RC, FRAME_TYPE | ||
2939 | | jnz <2 // Trace stitching continuation? | ||
2940 | | // Otherwise set KBASE for Lua function below fast function. | ||
2941 | | movzx RC, byte [RC-3] | ||
2942 | | not RCa | ||
2943 | | mov LFUNC:KBASE, [BASE+RC*8-8] | ||
2944 | | mov KBASE, LFUNC:KBASE->pc | ||
2945 | | mov KBASE, [KBASE+PC2PROTO(k)] | ||
2946 | | jmp <2 | ||
2947 | | | ||
2948 | |9: // Rethrow error from the right C frame. | ||
2949 | | mov FCARG2, RD | ||
3067 | | mov FCARG1, L:RB | 2950 | | mov FCARG1, L:RB |
3068 | | call extern lj_err_run@4 // (lua_State *L) | 2951 | | neg FCARG2 |
2952 | | call extern lj_err_trace@8 // (lua_State *L, int errcode) | ||
3069 | |.endif | 2953 | |.endif |
3070 | | | 2954 | | |
3071 | |//----------------------------------------------------------------------- | 2955 | |//----------------------------------------------------------------------- |
@@ -3073,27 +2957,18 @@ static void build_subroutines(BuildCtx *ctx) | |||
3073 | |//----------------------------------------------------------------------- | 2957 | |//----------------------------------------------------------------------- |
3074 | | | 2958 | | |
3075 | |// FP value rounding. Called by math.floor/math.ceil fast functions | 2959 | |// FP value rounding. Called by math.floor/math.ceil fast functions |
3076 | |// and from JIT code. | 2960 | |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. |
3077 | | | 2961 | |.macro vm_round, name, mode, cond |
3078 | |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. | 2962 | |->name: |
3079 | |.macro vm_round_x87, mode1, mode2 | 2963 | |.if not X64 and cond |
3080 | | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2. | 2964 | | movsd xmm0, qword [esp+4] |
3081 | | mov [esp+8], eax | 2965 | | call ->name .. _sse |
3082 | | mov ax, mode1 | 2966 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. |
3083 | | or ax, [esp+4] | 2967 | | fld qword [esp+4] |
3084 | |.if mode2 ~= 0xffff | ||
3085 | | and ax, mode2 | ||
3086 | |.endif | ||
3087 | | mov [esp+6], ax | ||
3088 | | fldcw word [esp+6] | ||
3089 | | frndint | ||
3090 | | fldcw word [esp+4] | ||
3091 | | mov eax, [esp+8] | ||
3092 | | ret | 2968 | | ret |
3093 | |.endmacro | 2969 | |.endif |
3094 | | | 2970 | | |
3095 | |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. | 2971 | |->name .. _sse: |
3096 | |.macro vm_round_sse, mode | ||
3097 | | sseconst_abs xmm2, RDa | 2972 | | sseconst_abs xmm2, RDa |
3098 | | sseconst_2p52 xmm3, RDa | 2973 | | sseconst_2p52 xmm3, RDa |
3099 | | movaps xmm1, xmm0 | 2974 | | movaps xmm1, xmm0 |
@@ -3129,22 +3004,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
3129 | | ret | 3004 | | ret |
3130 | |.endmacro | 3005 | |.endmacro |
3131 | | | 3006 | | |
3132 | |.macro vm_round, name, ssemode, mode1, mode2 | 3007 | | vm_round vm_floor, 0, 1 |
3133 | |->name: | 3008 | | vm_round vm_ceil, 1, JIT |
3134 | |.if not SSE | 3009 | | vm_round vm_trunc, 2, JIT |
3135 | | vm_round_x87 mode1, mode2 | ||
3136 | |.endif | ||
3137 | |->name .. _sse: | ||
3138 | | vm_round_sse ssemode | ||
3139 | |.endmacro | ||
3140 | | | ||
3141 | | vm_round vm_floor, 0, 0x0400, 0xf7ff | ||
3142 | | vm_round vm_ceil, 1, 0x0800, 0xfbff | ||
3143 | | vm_round vm_trunc, 2, 0x0c00, 0xffff | ||
3144 | | | 3010 | | |
3145 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. | 3011 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. |
3146 | |->vm_mod: | 3012 | |->vm_mod: |
3147 | |.if SSE | ||
3148 | |// Args in xmm0/xmm1, return value in xmm0. | 3013 | |// Args in xmm0/xmm1, return value in xmm0. |
3149 | |// Caveat: xmm0-xmm5 and RC (eax) modified! | 3014 | |// Caveat: xmm0-xmm5 and RC (eax) modified! |
3150 | | movaps xmm5, xmm0 | 3015 | | movaps xmm5, xmm0 |
@@ -3172,172 +3037,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
3172 | | movaps xmm0, xmm5 | 3037 | | movaps xmm0, xmm5 |
3173 | | subsd xmm0, xmm1 | 3038 | | subsd xmm0, xmm1 |
3174 | | ret | 3039 | | ret |
3175 | |.else | ||
3176 | |// Args/ret on x87 stack (y on top). No xmm registers modified. | ||
3177 | |// Caveat: needs 3 slots on x87 stack! RC (eax) modified! | ||
3178 | | fld st1 | ||
3179 | | fdiv st1 | ||
3180 | | fnstcw word [esp+4] | ||
3181 | | mov ax, 0x0400 | ||
3182 | | or ax, [esp+4] | ||
3183 | | and ax, 0xf7ff | ||
3184 | | mov [esp+6], ax | ||
3185 | | fldcw word [esp+6] | ||
3186 | | frndint | ||
3187 | | fldcw word [esp+4] | ||
3188 | | fmulp st1 | ||
3189 | | fsubp st1 | ||
3190 | | ret | ||
3191 | |.endif | ||
3192 | | | ||
3193 | |// FP log2(x). Called by math.log(x, base). | ||
3194 | |->vm_log2: | ||
3195 | |.if X64WIN | ||
3196 | | movsd qword [rsp+8], xmm0 // Use scratch area. | ||
3197 | | fld1 | ||
3198 | | fld qword [rsp+8] | ||
3199 | | fyl2x | ||
3200 | | fstp qword [rsp+8] | ||
3201 | | movsd xmm0, qword [rsp+8] | ||
3202 | |.elif X64 | ||
3203 | | movsd qword [rsp-8], xmm0 // Use red zone. | ||
3204 | | fld1 | ||
3205 | | fld qword [rsp-8] | ||
3206 | | fyl2x | ||
3207 | | fstp qword [rsp-8] | ||
3208 | | movsd xmm0, qword [rsp-8] | ||
3209 | |.else | ||
3210 | | fld1 | ||
3211 | | fld qword [esp+4] | ||
3212 | | fyl2x | ||
3213 | |.endif | ||
3214 | | ret | ||
3215 | | | ||
3216 | |// FP exponentiation e^x and 2^x. Called by math.exp fast function and | ||
3217 | |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified. | ||
3218 | |// Caveat: needs 3 slots on x87 stack! | ||
3219 | |->vm_exp_x87: | ||
3220 | | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e)) | ||
3221 | |->vm_exp2_x87: | ||
3222 | | .if X64WIN | ||
3223 | | .define expscratch, dword [rsp+8] // Use scratch area. | ||
3224 | | .elif X64 | ||
3225 | | .define expscratch, dword [rsp-8] // Use red zone. | ||
3226 | | .else | ||
3227 | | .define expscratch, dword [esp+4] // Needs 4 byte scratch area. | ||
3228 | | .endif | ||
3229 | | fst expscratch // Caveat: overwrites ARG1. | ||
3230 | | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf | ||
3231 | | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0 | ||
3232 | |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check. | ||
3233 | | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. | ||
3234 | | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int | ||
3235 | |1: | ||
3236 | | ret | ||
3237 | |2: | ||
3238 | | fpop; fldz; ret | ||
3239 | | | ||
3240 | |// Generic power function x^y. Called by BC_POW, math.pow fast function, | ||
3241 | |// and vm_arith. | ||
3242 | |// Args/ret on x87 stack (y on top). RC (eax) modified. | ||
3243 | |// Caveat: needs 3 slots on x87 stack! | ||
3244 | |->vm_pow: | ||
3245 | |.if not SSE | ||
3246 | | fist dword [esp+4] // Store/reload int before comparison. | ||
3247 | | fild dword [esp+4] // Integral exponent used in vm_powi. | ||
3248 | | fucomip st1 | ||
3249 | | jnz >8 // Branch for FP exponents. | ||
3250 | | jp >9 // Branch for NaN exponent. | ||
3251 | | fpop // Pop y and fallthrough to vm_powi. | ||
3252 | | | ||
3253 | |// FP/int power function x^i. Arg1/ret on x87 stack. | ||
3254 | |// Arg2 (int) on C stack. RC (eax) modified. | ||
3255 | |// Caveat: needs 2 slots on x87 stack! | ||
3256 | | mov eax, [esp+4] | ||
3257 | | cmp eax, 1; jle >6 // i<=1? | ||
3258 | | // Now 1 < (unsigned)i <= 0x80000000. | ||
3259 | |1: // Handle leading zeros. | ||
3260 | | test eax, 1; jnz >2 | ||
3261 | | fmul st0 | ||
3262 | | shr eax, 1 | ||
3263 | | jmp <1 | ||
3264 | |2: | ||
3265 | | shr eax, 1; jz >5 | ||
3266 | | fdup | ||
3267 | |3: // Handle trailing bits. | ||
3268 | | fmul st0 | ||
3269 | | shr eax, 1; jz >4 | ||
3270 | | jnc <3 | ||
3271 | | fmul st1, st0 | ||
3272 | | jmp <3 | ||
3273 | |4: | ||
3274 | | fmulp st1 | ||
3275 | |5: | ||
3276 | | ret | ||
3277 | |6: | ||
3278 | | je <5 // x^1 ==> x | ||
3279 | | jb >7 | ||
3280 | | fld1; fdivrp st1 | ||
3281 | | neg eax | ||
3282 | | cmp eax, 1; je <5 // x^-1 ==> 1/x | ||
3283 | | jmp <1 // x^-i ==> (1/x)^i | ||
3284 | |7: | ||
3285 | | fpop; fld1 // x^0 ==> 1 | ||
3286 | | ret | ||
3287 | | | ||
3288 | |8: // FP/FP power function x^y. | ||
3289 | | fst dword [esp+4] | ||
3290 | | fxch | ||
3291 | | fst dword [esp+8] | ||
3292 | | mov eax, [esp+4]; shl eax, 1 | ||
3293 | | cmp eax, 0xff000000; je >2 // x^+-Inf? | ||
3294 | | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y? | ||
3295 | | cmp eax, 0xff000000; je >4 // +-Inf^y? | ||
3296 | | fyl2x | ||
3297 | | jmp ->vm_exp2raw | ||
3298 | | | ||
3299 | |9: // Handle x^NaN. | ||
3300 | | fld1 | ||
3301 | | fucomip st2 | ||
3302 | | je >1 // 1^NaN ==> 1 | ||
3303 | | fxch // x^NaN ==> NaN | ||
3304 | |1: | ||
3305 | | fpop | ||
3306 | | ret | ||
3307 | | | ||
3308 | |2: // Handle x^+-Inf. | ||
3309 | | fabs | ||
3310 | | fld1 | ||
3311 | | fucomip st1 | ||
3312 | | je >3 // +-1^+-Inf ==> 1 | ||
3313 | | fpop; fabs; fldz; mov eax, 0; setc al | ||
3314 | | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0 | ||
3315 | | fxch | ||
3316 | |3: | ||
3317 | | fpop1; fabs | ||
3318 | | ret | ||
3319 | | | ||
3320 | |4: // Handle +-0^y or +-Inf^y. | ||
3321 | | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x| | ||
3322 | | fpop; fpop | ||
3323 | | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf | ||
3324 | | fldz // y < 0, +-Inf^y ==> 0 | ||
3325 | | ret | ||
3326 | |5: | ||
3327 | | mov dword [esp+4], 0x7f800000 // Return +Inf. | ||
3328 | | fld dword [esp+4] | ||
3329 | | ret | ||
3330 | |.endif | ||
3331 | | | ||
3332 | |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. | ||
3333 | |// Needs 16 byte scratch area for x86. Also called from JIT code. | ||
3334 | |->vm_pow_sse: | ||
3335 | | cvtsd2si eax, xmm1 | ||
3336 | | cvtsi2sd xmm2, eax | ||
3337 | | ucomisd xmm1, xmm2 | ||
3338 | | jnz >8 // Branch for FP exponents. | ||
3339 | | jp >9 // Branch for NaN exponent. | ||
3340 | | // Fallthrough to vm_powi_sse. | ||
3341 | | | 3040 | | |
3342 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. | 3041 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. |
3343 | |->vm_powi_sse: | 3042 | |->vm_powi_sse: |
@@ -3374,287 +3073,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
3374 | | sseconst_1 xmm0, RDa | 3073 | | sseconst_1 xmm0, RDa |
3375 | | ret | 3074 | | ret |
3376 | | | 3075 | | |
3377 | |8: // FP/FP power function x^y. | ||
3378 | |.if X64 | ||
3379 | | movd rax, xmm1; shl rax, 1 | ||
3380 | | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf? | ||
3381 | | movd rax, xmm0; shl rax, 1; je >4 // +-0^y? | ||
3382 | | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y? | ||
3383 | | .if X64WIN | ||
3384 | | movsd qword [rsp+16], xmm1 // Use scratch area. | ||
3385 | | movsd qword [rsp+8], xmm0 | ||
3386 | | fld qword [rsp+16] | ||
3387 | | fld qword [rsp+8] | ||
3388 | | .else | ||
3389 | | movsd qword [rsp-16], xmm1 // Use red zone. | ||
3390 | | movsd qword [rsp-8], xmm0 | ||
3391 | | fld qword [rsp-16] | ||
3392 | | fld qword [rsp-8] | ||
3393 | | .endif | ||
3394 | |.else | ||
3395 | | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area. | ||
3396 | | movsd qword [esp+4], xmm0 | ||
3397 | | cmp dword [esp+12], 0; jne >1 | ||
3398 | | mov eax, [esp+16]; shl eax, 1 | ||
3399 | | cmp eax, 0xffe00000; je >2 // x^+-Inf? | ||
3400 | |1: | ||
3401 | | cmp dword [esp+4], 0; jne >1 | ||
3402 | | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y? | ||
3403 | | cmp eax, 0xffe00000; je >5 // +-Inf^y? | ||
3404 | |1: | ||
3405 | | fld qword [esp+12] | ||
3406 | | fld qword [esp+4] | ||
3407 | |.endif | ||
3408 | | fyl2x // y*log2(x) | ||
3409 | | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. | ||
3410 | | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int | ||
3411 | |.if X64WIN | ||
3412 | | fstp qword [rsp+8] // Use scratch area. | ||
3413 | | movsd xmm0, qword [rsp+8] | ||
3414 | |.elif X64 | ||
3415 | | fstp qword [rsp-8] // Use red zone. | ||
3416 | | movsd xmm0, qword [rsp-8] | ||
3417 | |.else | ||
3418 | | fstp qword [esp+4] // Needs 8 byte scratch area. | ||
3419 | | movsd xmm0, qword [esp+4] | ||
3420 | |.endif | ||
3421 | | ret | ||
3422 | | | ||
3423 | |9: // Handle x^NaN. | ||
3424 | | sseconst_1 xmm2, RDa | ||
3425 | | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1 | ||
3426 | | movaps xmm0, xmm1 // x^NaN ==> NaN | ||
3427 | |1: | ||
3428 | | ret | ||
3429 | | | ||
3430 | |2: // Handle x^+-Inf. | ||
3431 | | sseconst_abs xmm2, RDa | ||
3432 | | andpd xmm0, xmm2 // |x| | ||
3433 | | sseconst_1 xmm2, RDa | ||
3434 | | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1 | ||
3435 | | movmskpd eax, xmm1 | ||
3436 | | xorps xmm0, xmm0 | ||
3437 | | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0 | ||
3438 | |3: | ||
3439 | | sseconst_hi xmm0, RDa, 7ff00000 // +Inf | ||
3440 | | ret | ||
3441 | | | ||
3442 | |4: // Handle +-0^y. | ||
3443 | | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf | ||
3444 | | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0 | ||
3445 | | ret | ||
3446 | | | ||
3447 | |5: // Handle +-Inf^y. | ||
3448 | | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf | ||
3449 | | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0 | ||
3450 | | ret | ||
3451 | | | ||
3452 | |// Callable from C: double lj_vm_foldfpm(double x, int fpm) | ||
3453 | |// Computes fpm(x) for extended math functions. ORDER FPM. | ||
3454 | |->vm_foldfpm: | ||
3455 | |.if JIT | ||
3456 | |.if X64 | ||
3457 | | .if X64WIN | ||
3458 | | .define fpmop, CARG2d | ||
3459 | | .else | ||
3460 | | .define fpmop, CARG1d | ||
3461 | | .endif | ||
3462 | | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil | ||
3463 | | cmp fpmop, 3; jb ->vm_trunc; ja >2 | ||
3464 | | sqrtsd xmm0, xmm0; ret | ||
3465 | |2: | ||
3466 | | .if X64WIN | ||
3467 | | movsd qword [rsp+8], xmm0 // Use scratch area. | ||
3468 | | fld qword [rsp+8] | ||
3469 | | .else | ||
3470 | | movsd qword [rsp-8], xmm0 // Use red zone. | ||
3471 | | fld qword [rsp-8] | ||
3472 | | .endif | ||
3473 | | cmp fpmop, 5; ja >2 | ||
3474 | | .if X64WIN; pop rax; .endif | ||
3475 | | je >1 | ||
3476 | | call ->vm_exp_x87 | ||
3477 | | .if X64WIN; push rax; .endif | ||
3478 | | jmp >7 | ||
3479 | |1: | ||
3480 | | call ->vm_exp2_x87 | ||
3481 | | .if X64WIN; push rax; .endif | ||
3482 | | jmp >7 | ||
3483 | |2: ; cmp fpmop, 7; je >1; ja >2 | ||
3484 | | fldln2; fxch; fyl2x; jmp >7 | ||
3485 | |1: ; fld1; fxch; fyl2x; jmp >7 | ||
3486 | |2: ; cmp fpmop, 9; je >1; ja >2 | ||
3487 | | fldlg2; fxch; fyl2x; jmp >7 | ||
3488 | |1: ; fsin; jmp >7 | ||
3489 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
3490 | | fcos; jmp >7 | ||
3491 | |1: ; fptan; fpop | ||
3492 | |7: | ||
3493 | | .if X64WIN | ||
3494 | | fstp qword [rsp+8] // Use scratch area. | ||
3495 | | movsd xmm0, qword [rsp+8] | ||
3496 | | .else | ||
3497 | | fstp qword [rsp-8] // Use red zone. | ||
3498 | | movsd xmm0, qword [rsp-8] | ||
3499 | | .endif | ||
3500 | | ret | ||
3501 | |.else // x86 calling convention. | ||
3502 | | .define fpmop, eax | ||
3503 | |.if SSE | ||
3504 | | mov fpmop, [esp+12] | ||
3505 | | movsd xmm0, qword [esp+4] | ||
3506 | | cmp fpmop, 1; je >1; ja >2 | ||
3507 | | call ->vm_floor; jmp >7 | ||
3508 | |1: ; call ->vm_ceil; jmp >7 | ||
3509 | |2: ; cmp fpmop, 3; je >1; ja >2 | ||
3510 | | call ->vm_trunc; jmp >7 | ||
3511 | |1: | ||
3512 | | sqrtsd xmm0, xmm0 | ||
3513 | |7: | ||
3514 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. | ||
3515 | | fld qword [esp+4] | ||
3516 | | ret | ||
3517 | |2: ; fld qword [esp+4] | ||
3518 | | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 | ||
3519 | |2: ; cmp fpmop, 7; je >1; ja >2 | ||
3520 | | fldln2; fxch; fyl2x; ret | ||
3521 | |1: ; fld1; fxch; fyl2x; ret | ||
3522 | |2: ; cmp fpmop, 9; je >1; ja >2 | ||
3523 | | fldlg2; fxch; fyl2x; ret | ||
3524 | |1: ; fsin; ret | ||
3525 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
3526 | | fcos; ret | ||
3527 | |1: ; fptan; fpop; ret | ||
3528 | |.else | ||
3529 | | mov fpmop, [esp+12] | ||
3530 | | fld qword [esp+4] | ||
3531 | | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil | ||
3532 | | cmp fpmop, 3; jb ->vm_trunc; ja >2 | ||
3533 | | fsqrt; ret | ||
3534 | |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 | ||
3535 | | cmp fpmop, 7; je >1; ja >2 | ||
3536 | | fldln2; fxch; fyl2x; ret | ||
3537 | |1: ; fld1; fxch; fyl2x; ret | ||
3538 | |2: ; cmp fpmop, 9; je >1; ja >2 | ||
3539 | | fldlg2; fxch; fyl2x; ret | ||
3540 | |1: ; fsin; ret | ||
3541 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
3542 | | fcos; ret | ||
3543 | |1: ; fptan; fpop; ret | ||
3544 | |.endif | ||
3545 | |.endif | ||
3546 | |9: ; int3 // Bad fpm. | ||
3547 | |.endif | ||
3548 | | | ||
3549 | |// Callable from C: double lj_vm_foldarith(double x, double y, int op) | ||
3550 | |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -) | ||
3551 | |// and basic math functions. ORDER ARITH | ||
3552 | |->vm_foldarith: | ||
3553 | |.if X64 | ||
3554 | | | ||
3555 | | .if X64WIN | ||
3556 | | .define foldop, CARG3d | ||
3557 | | .else | ||
3558 | | .define foldop, CARG1d | ||
3559 | | .endif | ||
3560 | | cmp foldop, 1; je >1; ja >2 | ||
3561 | | addsd xmm0, xmm1; ret | ||
3562 | |1: ; subsd xmm0, xmm1; ret | ||
3563 | |2: ; cmp foldop, 3; je >1; ja >2 | ||
3564 | | mulsd xmm0, xmm1; ret | ||
3565 | |1: ; divsd xmm0, xmm1; ret | ||
3566 | |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow | ||
3567 | | cmp foldop, 7; je >1; ja >2 | ||
3568 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret | ||
3569 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret | ||
3570 | |2: ; cmp foldop, 9; ja >2 | ||
3571 | |.if X64WIN | ||
3572 | | movsd qword [rsp+8], xmm0 // Use scratch area. | ||
3573 | | movsd qword [rsp+16], xmm1 | ||
3574 | | fld qword [rsp+8] | ||
3575 | | fld qword [rsp+16] | ||
3576 | |.else | ||
3577 | | movsd qword [rsp-8], xmm0 // Use red zone. | ||
3578 | | movsd qword [rsp-16], xmm1 | ||
3579 | | fld qword [rsp-8] | ||
3580 | | fld qword [rsp-16] | ||
3581 | |.endif | ||
3582 | | je >1 | ||
3583 | | fpatan | ||
3584 | |7: | ||
3585 | |.if X64WIN | ||
3586 | | fstp qword [rsp+8] // Use scratch area. | ||
3587 | | movsd xmm0, qword [rsp+8] | ||
3588 | |.else | ||
3589 | | fstp qword [rsp-8] // Use red zone. | ||
3590 | | movsd xmm0, qword [rsp-8] | ||
3591 | |.endif | ||
3592 | | ret | ||
3593 | |1: ; fxch; fscale; fpop1; jmp <7 | ||
3594 | |2: ; cmp foldop, 11; je >1; ja >9 | ||
3595 | | minsd xmm0, xmm1; ret | ||
3596 | |1: ; maxsd xmm0, xmm1; ret | ||
3597 | |9: ; int3 // Bad op. | ||
3598 | | | ||
3599 | |.elif SSE // x86 calling convention with SSE ops. | ||
3600 | | | ||
3601 | | .define foldop, eax | ||
3602 | | mov foldop, [esp+20] | ||
3603 | | movsd xmm0, qword [esp+4] | ||
3604 | | movsd xmm1, qword [esp+12] | ||
3605 | | cmp foldop, 1; je >1; ja >2 | ||
3606 | | addsd xmm0, xmm1 | ||
3607 | |7: | ||
3608 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. | ||
3609 | | fld qword [esp+4] | ||
3610 | | ret | ||
3611 | |1: ; subsd xmm0, xmm1; jmp <7 | ||
3612 | |2: ; cmp foldop, 3; je >1; ja >2 | ||
3613 | | mulsd xmm0, xmm1; jmp <7 | ||
3614 | |1: ; divsd xmm0, xmm1; jmp <7 | ||
3615 | |2: ; cmp foldop, 5 | ||
3616 | | je >1; ja >2 | ||
3617 | | call ->vm_mod; jmp <7 | ||
3618 | |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. | ||
3619 | |2: ; cmp foldop, 7; je >1; ja >2 | ||
3620 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 | ||
3621 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 | ||
3622 | |2: ; cmp foldop, 9; ja >2 | ||
3623 | | fld qword [esp+4] // Reload from stack | ||
3624 | | fld qword [esp+12] | ||
3625 | | je >1 | ||
3626 | | fpatan; ret | ||
3627 | |1: ; fxch; fscale; fpop1; ret | ||
3628 | |2: ; cmp foldop, 11; je >1; ja >9 | ||
3629 | | minsd xmm0, xmm1; jmp <7 | ||
3630 | |1: ; maxsd xmm0, xmm1; jmp <7 | ||
3631 | |9: ; int3 // Bad op. | ||
3632 | | | ||
3633 | |.else // x86 calling convention with x87 ops. | ||
3634 | | | ||
3635 | | mov eax, [esp+20] | ||
3636 | | fld qword [esp+4] | ||
3637 | | fld qword [esp+12] | ||
3638 | | cmp eax, 1; je >1; ja >2 | ||
3639 | | faddp st1; ret | ||
3640 | |1: ; fsubp st1; ret | ||
3641 | |2: ; cmp eax, 3; je >1; ja >2 | ||
3642 | | fmulp st1; ret | ||
3643 | |1: ; fdivp st1; ret | ||
3644 | |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow | ||
3645 | | cmp eax, 7; je >1; ja >2 | ||
3646 | | fpop; fchs; ret | ||
3647 | |1: ; fpop; fabs; ret | ||
3648 | |2: ; cmp eax, 9; je >1; ja >2 | ||
3649 | | fpatan; ret | ||
3650 | |1: ; fxch; fscale; fpop1; ret | ||
3651 | |2: ; cmp eax, 11; je >1; ja >9 | ||
3652 | | fucomi st1; fcmovnbe st1; fpop1; ret | ||
3653 | |1: ; fucomi st1; fcmovbe st1; fpop1; ret | ||
3654 | |9: ; int3 // Bad op. | ||
3655 | | | ||
3656 | |.endif | ||
3657 | | | ||
3658 | |//----------------------------------------------------------------------- | 3076 | |//----------------------------------------------------------------------- |
3659 | |//-- Miscellaneous functions -------------------------------------------- | 3077 | |//-- Miscellaneous functions -------------------------------------------- |
3660 | |//----------------------------------------------------------------------- | 3078 | |//----------------------------------------------------------------------- |
@@ -3665,6 +3083,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
3665 | | mov eax, CARG1d | 3083 | | mov eax, CARG1d |
3666 | | .if X64WIN; push rsi; mov rsi, CARG2; .endif | 3084 | | .if X64WIN; push rsi; mov rsi, CARG2; .endif |
3667 | | push rbx | 3085 | | push rbx |
3086 | | xor ecx, ecx | ||
3668 | | cpuid | 3087 | | cpuid |
3669 | | mov [rsi], eax | 3088 | | mov [rsi], eax |
3670 | | mov [rsi+4], ebx | 3089 | | mov [rsi+4], ebx |
@@ -3688,6 +3107,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
3688 | | mov eax, [esp+4] // Argument 1 is function number. | 3107 | | mov eax, [esp+4] // Argument 1 is function number. |
3689 | | push edi | 3108 | | push edi |
3690 | | push ebx | 3109 | | push ebx |
3110 | | xor ecx, ecx | ||
3691 | | cpuid | 3111 | | cpuid |
3692 | | mov edi, [esp+16] // Argument 2 is result area. | 3112 | | mov edi, [esp+16] // Argument 2 is result area. |
3693 | | mov [edi], eax | 3113 | | mov [edi], eax |
@@ -3700,6 +3120,86 @@ static void build_subroutines(BuildCtx *ctx) | |||
3700 | | ret | 3120 | | ret |
3701 | |.endif | 3121 | |.endif |
3702 | | | 3122 | | |
3123 | |.define NEXT_TAB, TAB:FCARG1 | ||
3124 | |.define NEXT_IDX, FCARG2 | ||
3125 | |.define NEXT_PTR, RCa | ||
3126 | |.define NEXT_PTRd, RC | ||
3127 | |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro | ||
3128 | |.if X64 | ||
3129 | |.define NEXT_TMP, CARG3d | ||
3130 | |.define NEXT_TMPq, CARG3 | ||
3131 | |.define NEXT_ASIZE, CARG4d | ||
3132 | |.macro NEXT_ENTER; .endmacro | ||
3133 | |.macro NEXT_LEAVE; ret; .endmacro | ||
3134 | |.if X64WIN | ||
3135 | |.define NEXT_RES_PTR, [rsp+aword*5] | ||
3136 | |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro | ||
3137 | |.else | ||
3138 | |.define NEXT_RES_PTR, [rsp+aword*1] | ||
3139 | |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro | ||
3140 | |.endif | ||
3141 | |.else | ||
3142 | |.define NEXT_ASIZE, esi | ||
3143 | |.define NEXT_TMP, edi | ||
3144 | |.macro NEXT_ENTER; push esi; push edi; .endmacro | ||
3145 | |.macro NEXT_LEAVE; pop edi; pop esi; ret; .endmacro | ||
3146 | |.define NEXT_RES_PTR, [esp+dword*3] | ||
3147 | |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro | ||
3148 | |.endif | ||
3149 | | | ||
3150 | |// TValue *lj_vm_next(GCtab *t, uint32_t idx) | ||
3151 | |// Next idx returned in edx. | ||
3152 | |->vm_next: | ||
3153 | |.if JIT | ||
3154 | | NEXT_ENTER | ||
3155 | | mov NEXT_ASIZE, NEXT_TAB->asize | ||
3156 | |1: // Traverse array part. | ||
3157 | | cmp NEXT_IDX, NEXT_ASIZE; jae >5 | ||
3158 | | mov NEXT_TMP, NEXT_TAB->array | ||
3159 | | cmp dword [NEXT_TMP+NEXT_IDX*8+4], LJ_TNIL; je >2 | ||
3160 | | lea NEXT_PTR, NEXT_RES_PTR | ||
3161 | |.if X64 | ||
3162 | | mov NEXT_TMPq, qword [NEXT_TMP+NEXT_IDX*8] | ||
3163 | | mov qword [NEXT_PTR], NEXT_TMPq | ||
3164 | |.else | ||
3165 | | mov NEXT_ASIZE, dword [NEXT_TMP+NEXT_IDX*8+4] | ||
3166 | | mov NEXT_TMP, dword [NEXT_TMP+NEXT_IDX*8] | ||
3167 | | mov dword [NEXT_PTR+4], NEXT_ASIZE | ||
3168 | | mov dword [NEXT_PTR], NEXT_TMP | ||
3169 | |.endif | ||
3170 | |.if DUALNUM | ||
3171 | | mov dword [NEXT_PTR+dword*3], LJ_TISNUM | ||
3172 | | mov dword [NEXT_PTR+dword*2], NEXT_IDX | ||
3173 | |.else | ||
3174 | | cvtsi2sd xmm0, NEXT_IDX | ||
3175 | | movsd qword [NEXT_PTR+dword*2], xmm0 | ||
3176 | |.endif | ||
3177 | | NEXT_RES_IDX 1 | ||
3178 | | NEXT_LEAVE | ||
3179 | |2: // Skip holes in array part. | ||
3180 | | add NEXT_IDX, 1 | ||
3181 | | jmp <1 | ||
3182 | | | ||
3183 | |5: // Traverse hash part. | ||
3184 | | sub NEXT_IDX, NEXT_ASIZE | ||
3185 | |6: | ||
3186 | | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9 | ||
3187 | | imul NEXT_PTRd, NEXT_IDX, #NODE | ||
3188 | | add NODE:NEXT_PTRd, dword NEXT_TAB->node | ||
3189 | | cmp dword NODE:NEXT_PTR->val.it, LJ_TNIL; je >7 | ||
3190 | | NEXT_RES_IDXL NEXT_ASIZE+1 | ||
3191 | | NEXT_LEAVE | ||
3192 | |7: // Skip holes in hash part. | ||
3193 | | add NEXT_IDX, 1 | ||
3194 | | jmp <6 | ||
3195 | | | ||
3196 | |9: // End of iteration. Set the key to nil (not the value). | ||
3197 | | NEXT_RES_IDX NEXT_ASIZE | ||
3198 | | lea NEXT_PTR, NEXT_RES_PTR | ||
3199 | | mov dword [NEXT_PTR+dword*3], LJ_TNIL | ||
3200 | | NEXT_LEAVE | ||
3201 | |.endif | ||
3202 | | | ||
3703 | |//----------------------------------------------------------------------- | 3203 | |//----------------------------------------------------------------------- |
3704 | |//-- Assertions --------------------------------------------------------- | 3204 | |//-- Assertions --------------------------------------------------------- |
3705 | |//----------------------------------------------------------------------- | 3205 | |//----------------------------------------------------------------------- |
@@ -3965,19 +3465,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3965 | | // RA is a number. | 3465 | | // RA is a number. |
3966 | | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp | 3466 | | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp |
3967 | | // RA is a number, RD is an integer. | 3467 | | // RA is a number, RD is an integer. |
3968 | |.if SSE | ||
3969 | | cvtsi2sd xmm0, dword [BASE+RD*8] | 3468 | | cvtsi2sd xmm0, dword [BASE+RD*8] |
3970 | | jmp >2 | 3469 | | jmp >2 |
3971 | |.else | ||
3972 | | fld qword [BASE+RA*8] | ||
3973 | | fild dword [BASE+RD*8] | ||
3974 | | jmp >3 | ||
3975 | |.endif | ||
3976 | | | 3470 | | |
3977 | |8: // RA is an integer, RD is not an integer. | 3471 | |8: // RA is an integer, RD is not an integer. |
3978 | | ja ->vmeta_comp | 3472 | | ja ->vmeta_comp |
3979 | | // RA is an integer, RD is a number. | 3473 | | // RA is an integer, RD is a number. |
3980 | |.if SSE | ||
3981 | | cvtsi2sd xmm1, dword [BASE+RA*8] | 3474 | | cvtsi2sd xmm1, dword [BASE+RA*8] |
3982 | | movsd xmm0, qword [BASE+RD*8] | 3475 | | movsd xmm0, qword [BASE+RD*8] |
3983 | | add PC, 4 | 3476 | | add PC, 4 |
@@ -3985,29 +3478,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3985 | | jmp_comp jbe, ja, jb, jae, <9 | 3478 | | jmp_comp jbe, ja, jb, jae, <9 |
3986 | | jmp <6 | 3479 | | jmp <6 |
3987 | |.else | 3480 | |.else |
3988 | | fild dword [BASE+RA*8] | ||
3989 | | jmp >2 | ||
3990 | |.endif | ||
3991 | |.else | ||
3992 | | checknum RA, ->vmeta_comp | 3481 | | checknum RA, ->vmeta_comp |
3993 | | checknum RD, ->vmeta_comp | 3482 | | checknum RD, ->vmeta_comp |
3994 | |.endif | 3483 | |.endif |
3995 | |.if SSE | ||
3996 | |1: | 3484 | |1: |
3997 | | movsd xmm0, qword [BASE+RD*8] | 3485 | | movsd xmm0, qword [BASE+RD*8] |
3998 | |2: | 3486 | |2: |
3999 | | add PC, 4 | 3487 | | add PC, 4 |
4000 | | ucomisd xmm0, qword [BASE+RA*8] | 3488 | | ucomisd xmm0, qword [BASE+RA*8] |
4001 | |3: | 3489 | |3: |
4002 | |.else | ||
4003 | |1: | ||
4004 | | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A. | ||
4005 | |2: | ||
4006 | | fld qword [BASE+RD*8] | ||
4007 | |3: | ||
4008 | | add PC, 4 | ||
4009 | | fcomparepp | ||
4010 | |.endif | ||
4011 | | // Unordered: all of ZF CF PF set, ordered: PF clear. | 3490 | | // Unordered: all of ZF CF PF set, ordered: PF clear. |
4012 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. | 3491 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. |
4013 | |.if DUALNUM | 3492 | |.if DUALNUM |
@@ -4047,43 +3526,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4047 | | // RD is a number. | 3526 | | // RD is a number. |
4048 | | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 | 3527 | | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 |
4049 | | // RD is a number, RA is an integer. | 3528 | | // RD is a number, RA is an integer. |
4050 | |.if SSE | ||
4051 | | cvtsi2sd xmm0, dword [BASE+RA*8] | 3529 | | cvtsi2sd xmm0, dword [BASE+RA*8] |
4052 | |.else | ||
4053 | | fild dword [BASE+RA*8] | ||
4054 | |.endif | ||
4055 | | jmp >2 | 3530 | | jmp >2 |
4056 | | | 3531 | | |
4057 | |8: // RD is an integer, RA is not an integer. | 3532 | |8: // RD is an integer, RA is not an integer. |
4058 | | ja >5 | 3533 | | ja >5 |
4059 | | // RD is an integer, RA is a number. | 3534 | | // RD is an integer, RA is a number. |
4060 | |.if SSE | ||
4061 | | cvtsi2sd xmm0, dword [BASE+RD*8] | 3535 | | cvtsi2sd xmm0, dword [BASE+RD*8] |
4062 | | ucomisd xmm0, qword [BASE+RA*8] | 3536 | | ucomisd xmm0, qword [BASE+RA*8] |
4063 | |.else | ||
4064 | | fild dword [BASE+RD*8] | ||
4065 | | fld qword [BASE+RA*8] | ||
4066 | |.endif | ||
4067 | | jmp >4 | 3537 | | jmp >4 |
4068 | | | 3538 | | |
4069 | |.else | 3539 | |.else |
4070 | | cmp RB, LJ_TISNUM; jae >5 | 3540 | | cmp RB, LJ_TISNUM; jae >5 |
4071 | | checknum RA, >5 | 3541 | | checknum RA, >5 |
4072 | |.endif | 3542 | |.endif |
4073 | |.if SSE | ||
4074 | |1: | 3543 | |1: |
4075 | | movsd xmm0, qword [BASE+RA*8] | 3544 | | movsd xmm0, qword [BASE+RA*8] |
4076 | |2: | 3545 | |2: |
4077 | | ucomisd xmm0, qword [BASE+RD*8] | 3546 | | ucomisd xmm0, qword [BASE+RD*8] |
4078 | |4: | 3547 | |4: |
4079 | |.else | ||
4080 | |1: | ||
4081 | | fld qword [BASE+RA*8] | ||
4082 | |2: | ||
4083 | | fld qword [BASE+RD*8] | ||
4084 | |4: | ||
4085 | | fcomparepp | ||
4086 | |.endif | ||
4087 | iseqne_fp: | 3548 | iseqne_fp: |
4088 | if (vk) { | 3549 | if (vk) { |
4089 | | jp >2 // Unordered means not equal. | 3550 | | jp >2 // Unordered means not equal. |
@@ -4206,39 +3667,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4206 | | // RA is a number. | 3667 | | // RA is a number. |
4207 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 | 3668 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 |
4208 | | // RA is a number, RD is an integer. | 3669 | | // RA is a number, RD is an integer. |
4209 | |.if SSE | ||
4210 | | cvtsi2sd xmm0, dword [KBASE+RD*8] | 3670 | | cvtsi2sd xmm0, dword [KBASE+RD*8] |
4211 | |.else | ||
4212 | | fild dword [KBASE+RD*8] | ||
4213 | |.endif | ||
4214 | | jmp >2 | 3671 | | jmp >2 |
4215 | | | 3672 | | |
4216 | |8: // RA is an integer, RD is a number. | 3673 | |8: // RA is an integer, RD is a number. |
4217 | |.if SSE | ||
4218 | | cvtsi2sd xmm0, dword [BASE+RA*8] | 3674 | | cvtsi2sd xmm0, dword [BASE+RA*8] |
4219 | | ucomisd xmm0, qword [KBASE+RD*8] | 3675 | | ucomisd xmm0, qword [KBASE+RD*8] |
4220 | |.else | ||
4221 | | fild dword [BASE+RA*8] | ||
4222 | | fld qword [KBASE+RD*8] | ||
4223 | |.endif | ||
4224 | | jmp >4 | 3676 | | jmp >4 |
4225 | |.else | 3677 | |.else |
4226 | | cmp RB, LJ_TISNUM; jae >3 | 3678 | | cmp RB, LJ_TISNUM; jae >3 |
4227 | |.endif | 3679 | |.endif |
4228 | |.if SSE | ||
4229 | |1: | 3680 | |1: |
4230 | | movsd xmm0, qword [KBASE+RD*8] | 3681 | | movsd xmm0, qword [KBASE+RD*8] |
4231 | |2: | 3682 | |2: |
4232 | | ucomisd xmm0, qword [BASE+RA*8] | 3683 | | ucomisd xmm0, qword [BASE+RA*8] |
4233 | |4: | 3684 | |4: |
4234 | |.else | ||
4235 | |1: | ||
4236 | | fld qword [KBASE+RD*8] | ||
4237 | |2: | ||
4238 | | fld qword [BASE+RA*8] | ||
4239 | |4: | ||
4240 | | fcomparepp | ||
4241 | |.endif | ||
4242 | goto iseqne_fp; | 3685 | goto iseqne_fp; |
4243 | case BC_ISEQP: case BC_ISNEP: | 3686 | case BC_ISEQP: case BC_ISNEP: |
4244 | vk = op == BC_ISEQP; | 3687 | vk = op == BC_ISEQP; |
@@ -4289,6 +3732,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4289 | | ins_next | 3732 | | ins_next |
4290 | break; | 3733 | break; |
4291 | 3734 | ||
3735 | case BC_ISTYPE: | ||
3736 | | ins_AD // RA = src, RD = -type | ||
3737 | | add RD, [BASE+RA*8+4] | ||
3738 | | jne ->vmeta_istype | ||
3739 | | ins_next | ||
3740 | break; | ||
3741 | case BC_ISNUM: | ||
3742 | | ins_AD // RA = src, RD = -(TISNUM-1) | ||
3743 | | checknum RA, ->vmeta_istype | ||
3744 | | ins_next | ||
3745 | break; | ||
3746 | |||
4292 | /* -- Unary ops --------------------------------------------------------- */ | 3747 | /* -- Unary ops --------------------------------------------------------- */ |
4293 | 3748 | ||
4294 | case BC_MOV: | 3749 | case BC_MOV: |
@@ -4332,16 +3787,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4332 | |.else | 3787 | |.else |
4333 | | checknum RD, ->vmeta_unm | 3788 | | checknum RD, ->vmeta_unm |
4334 | |.endif | 3789 | |.endif |
4335 | |.if SSE | ||
4336 | | movsd xmm0, qword [BASE+RD*8] | 3790 | | movsd xmm0, qword [BASE+RD*8] |
4337 | | sseconst_sign xmm1, RDa | 3791 | | sseconst_sign xmm1, RDa |
4338 | | xorps xmm0, xmm1 | 3792 | | xorps xmm0, xmm1 |
4339 | | movsd qword [BASE+RA*8], xmm0 | 3793 | | movsd qword [BASE+RA*8], xmm0 |
4340 | |.else | ||
4341 | | fld qword [BASE+RD*8] | ||
4342 | | fchs | ||
4343 | | fstp qword [BASE+RA*8] | ||
4344 | |.endif | ||
4345 | |.if DUALNUM | 3794 | |.if DUALNUM |
4346 | | jmp <9 | 3795 | | jmp <9 |
4347 | |.else | 3796 | |.else |
@@ -4357,15 +3806,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4357 | |1: | 3806 | |1: |
4358 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 3807 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
4359 | | mov dword [BASE+RA*8], RD | 3808 | | mov dword [BASE+RA*8], RD |
4360 | |.elif SSE | 3809 | |.else |
4361 | | xorps xmm0, xmm0 | 3810 | | xorps xmm0, xmm0 |
4362 | | cvtsi2sd xmm0, dword STR:RD->len | 3811 | | cvtsi2sd xmm0, dword STR:RD->len |
4363 | |1: | 3812 | |1: |
4364 | | movsd qword [BASE+RA*8], xmm0 | 3813 | | movsd qword [BASE+RA*8], xmm0 |
4365 | |.else | ||
4366 | | fild dword STR:RD->len | ||
4367 | |1: | ||
4368 | | fstp qword [BASE+RA*8] | ||
4369 | |.endif | 3814 | |.endif |
4370 | | ins_next | 3815 | | ins_next |
4371 | |2: | 3816 | |2: |
@@ -4383,11 +3828,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4383 | | // Length of table returned in eax (RD). | 3828 | | // Length of table returned in eax (RD). |
4384 | |.if DUALNUM | 3829 | |.if DUALNUM |
4385 | | // Nothing to do. | 3830 | | // Nothing to do. |
4386 | |.elif SSE | ||
4387 | | cvtsi2sd xmm0, RD | ||
4388 | |.else | 3831 | |.else |
4389 | | mov ARG1, RD | 3832 | | cvtsi2sd xmm0, RD |
4390 | | fild ARG1 | ||
4391 | |.endif | 3833 | |.endif |
4392 | | mov BASE, RB // Restore BASE. | 3834 | | mov BASE, RB // Restore BASE. |
4393 | | movzx RA, PC_RA | 3835 | | movzx RA, PC_RA |
@@ -4402,7 +3844,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4402 | 3844 | ||
4403 | /* -- Binary ops -------------------------------------------------------- */ | 3845 | /* -- Binary ops -------------------------------------------------------- */ |
4404 | 3846 | ||
4405 | |.macro ins_arithpre, x87ins, sseins, ssereg | 3847 | |.macro ins_arithpre, sseins, ssereg |
4406 | | ins_ABC | 3848 | | ins_ABC |
4407 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | 3849 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); |
4408 | ||switch (vk) { | 3850 | ||switch (vk) { |
@@ -4411,37 +3853,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4411 | | .if DUALNUM | 3853 | | .if DUALNUM |
4412 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn | 3854 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn |
4413 | | .endif | 3855 | | .endif |
4414 | | .if SSE | 3856 | | movsd xmm0, qword [BASE+RB*8] |
4415 | | movsd xmm0, qword [BASE+RB*8] | 3857 | | sseins ssereg, qword [KBASE+RC*8] |
4416 | | sseins ssereg, qword [KBASE+RC*8] | ||
4417 | | .else | ||
4418 | | fld qword [BASE+RB*8] | ||
4419 | | x87ins qword [KBASE+RC*8] | ||
4420 | | .endif | ||
4421 | || break; | 3858 | || break; |
4422 | ||case 1: | 3859 | ||case 1: |
4423 | | checknum RB, ->vmeta_arith_nv | 3860 | | checknum RB, ->vmeta_arith_nv |
4424 | | .if DUALNUM | 3861 | | .if DUALNUM |
4425 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv | 3862 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv |
4426 | | .endif | 3863 | | .endif |
4427 | | .if SSE | 3864 | | movsd xmm0, qword [KBASE+RC*8] |
4428 | | movsd xmm0, qword [KBASE+RC*8] | 3865 | | sseins ssereg, qword [BASE+RB*8] |
4429 | | sseins ssereg, qword [BASE+RB*8] | ||
4430 | | .else | ||
4431 | | fld qword [KBASE+RC*8] | ||
4432 | | x87ins qword [BASE+RB*8] | ||
4433 | | .endif | ||
4434 | || break; | 3866 | || break; |
4435 | ||default: | 3867 | ||default: |
4436 | | checknum RB, ->vmeta_arith_vv | 3868 | | checknum RB, ->vmeta_arith_vv |
4437 | | checknum RC, ->vmeta_arith_vv | 3869 | | checknum RC, ->vmeta_arith_vv |
4438 | | .if SSE | 3870 | | movsd xmm0, qword [BASE+RB*8] |
4439 | | movsd xmm0, qword [BASE+RB*8] | 3871 | | sseins ssereg, qword [BASE+RC*8] |
4440 | | sseins ssereg, qword [BASE+RC*8] | ||
4441 | | .else | ||
4442 | | fld qword [BASE+RB*8] | ||
4443 | | x87ins qword [BASE+RC*8] | ||
4444 | | .endif | ||
4445 | || break; | 3872 | || break; |
4446 | ||} | 3873 | ||} |
4447 | |.endmacro | 3874 | |.endmacro |
@@ -4479,55 +3906,62 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4479 | |.endmacro | 3906 | |.endmacro |
4480 | | | 3907 | | |
4481 | |.macro ins_arithpost | 3908 | |.macro ins_arithpost |
4482 | |.if SSE | ||
4483 | | movsd qword [BASE+RA*8], xmm0 | 3909 | | movsd qword [BASE+RA*8], xmm0 |
4484 | |.else | ||
4485 | | fstp qword [BASE+RA*8] | ||
4486 | |.endif | ||
4487 | |.endmacro | 3910 | |.endmacro |
4488 | | | 3911 | | |
4489 | |.macro ins_arith, x87ins, sseins | 3912 | |.macro ins_arith, sseins |
4490 | | ins_arithpre x87ins, sseins, xmm0 | 3913 | | ins_arithpre sseins, xmm0 |
4491 | | ins_arithpost | 3914 | | ins_arithpost |
4492 | | ins_next | 3915 | | ins_next |
4493 | |.endmacro | 3916 | |.endmacro |
4494 | | | 3917 | | |
4495 | |.macro ins_arith, intins, x87ins, sseins | 3918 | |.macro ins_arith, intins, sseins |
4496 | |.if DUALNUM | 3919 | |.if DUALNUM |
4497 | | ins_arithdn intins | 3920 | | ins_arithdn intins |
4498 | |.else | 3921 | |.else |
4499 | | ins_arith, x87ins, sseins | 3922 | | ins_arith, sseins |
4500 | |.endif | 3923 | |.endif |
4501 | |.endmacro | 3924 | |.endmacro |
4502 | 3925 | ||
4503 | | // RA = dst, RB = src1 or num const, RC = src2 or num const | 3926 | | // RA = dst, RB = src1 or num const, RC = src2 or num const |
4504 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | 3927 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: |
4505 | | ins_arith add, fadd, addsd | 3928 | | ins_arith add, addsd |
4506 | break; | 3929 | break; |
4507 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | 3930 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: |
4508 | | ins_arith sub, fsub, subsd | 3931 | | ins_arith sub, subsd |
4509 | break; | 3932 | break; |
4510 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | 3933 | case BC_MULVN: case BC_MULNV: case BC_MULVV: |
4511 | | ins_arith imul, fmul, mulsd | 3934 | | ins_arith imul, mulsd |
4512 | break; | 3935 | break; |
4513 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | 3936 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: |
4514 | | ins_arith fdiv, divsd | 3937 | | ins_arith divsd |
4515 | break; | 3938 | break; |
4516 | case BC_MODVN: | 3939 | case BC_MODVN: |
4517 | | ins_arithpre fld, movsd, xmm1 | 3940 | | ins_arithpre movsd, xmm1 |
4518 | |->BC_MODVN_Z: | 3941 | |->BC_MODVN_Z: |
4519 | | call ->vm_mod | 3942 | | call ->vm_mod |
4520 | | ins_arithpost | 3943 | | ins_arithpost |
4521 | | ins_next | 3944 | | ins_next |
4522 | break; | 3945 | break; |
4523 | case BC_MODNV: case BC_MODVV: | 3946 | case BC_MODNV: case BC_MODVV: |
4524 | | ins_arithpre fld, movsd, xmm1 | 3947 | | ins_arithpre movsd, xmm1 |
4525 | | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | 3948 | | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. |
4526 | break; | 3949 | break; |
4527 | case BC_POW: | 3950 | case BC_POW: |
4528 | | ins_arithpre fld, movsd, xmm1 | 3951 | | ins_arithpre movsd, xmm1 |
4529 | | call ->vm_pow | 3952 | | mov RB, BASE |
3953 | |.if not X64 | ||
3954 | | movsd FPARG1, xmm0 | ||
3955 | | movsd FPARG3, xmm1 | ||
3956 | |.endif | ||
3957 | | call extern pow | ||
3958 | | movzx RA, PC_RA | ||
3959 | | mov BASE, RB | ||
3960 | |.if X64 | ||
4530 | | ins_arithpost | 3961 | | ins_arithpost |
3962 | |.else | ||
3963 | | fstp qword [BASE+RA*8] | ||
3964 | |.endif | ||
4531 | | ins_next | 3965 | | ins_next |
4532 | break; | 3966 | break; |
4533 | 3967 | ||
@@ -4595,25 +4029,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4595 | | movsx RD, RDW | 4029 | | movsx RD, RDW |
4596 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 4030 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
4597 | | mov dword [BASE+RA*8], RD | 4031 | | mov dword [BASE+RA*8], RD |
4598 | |.elif SSE | 4032 | |.else |
4599 | | movsx RD, RDW // Sign-extend literal. | 4033 | | movsx RD, RDW // Sign-extend literal. |
4600 | | cvtsi2sd xmm0, RD | 4034 | | cvtsi2sd xmm0, RD |
4601 | | movsd qword [BASE+RA*8], xmm0 | 4035 | | movsd qword [BASE+RA*8], xmm0 |
4602 | |.else | ||
4603 | | fild PC_RD // Refetch signed RD from instruction. | ||
4604 | | fstp qword [BASE+RA*8] | ||
4605 | |.endif | 4036 | |.endif |
4606 | | ins_next | 4037 | | ins_next |
4607 | break; | 4038 | break; |
4608 | case BC_KNUM: | 4039 | case BC_KNUM: |
4609 | | ins_AD // RA = dst, RD = num const | 4040 | | ins_AD // RA = dst, RD = num const |
4610 | |.if SSE | ||
4611 | | movsd xmm0, qword [KBASE+RD*8] | 4041 | | movsd xmm0, qword [KBASE+RD*8] |
4612 | | movsd qword [BASE+RA*8], xmm0 | 4042 | | movsd qword [BASE+RA*8], xmm0 |
4613 | |.else | ||
4614 | | fld qword [KBASE+RD*8] | ||
4615 | | fstp qword [BASE+RA*8] | ||
4616 | |.endif | ||
4617 | | ins_next | 4043 | | ins_next |
4618 | break; | 4044 | break; |
4619 | case BC_KPRI: | 4045 | case BC_KPRI: |
@@ -4720,18 +4146,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4720 | case BC_USETN: | 4146 | case BC_USETN: |
4721 | | ins_AD // RA = upvalue #, RD = num const | 4147 | | ins_AD // RA = upvalue #, RD = num const |
4722 | | mov LFUNC:RB, [BASE-8] | 4148 | | mov LFUNC:RB, [BASE-8] |
4723 | |.if SSE | ||
4724 | | movsd xmm0, qword [KBASE+RD*8] | 4149 | | movsd xmm0, qword [KBASE+RD*8] |
4725 | |.else | ||
4726 | | fld qword [KBASE+RD*8] | ||
4727 | |.endif | ||
4728 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | 4150 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] |
4729 | | mov RA, UPVAL:RB->v | 4151 | | mov RA, UPVAL:RB->v |
4730 | |.if SSE | ||
4731 | | movsd qword [RA], xmm0 | 4152 | | movsd qword [RA], xmm0 |
4732 | |.else | ||
4733 | | fstp qword [RA] | ||
4734 | |.endif | ||
4735 | | ins_next | 4153 | | ins_next |
4736 | break; | 4154 | break; |
4737 | case BC_USETP: | 4155 | case BC_USETP: |
@@ -4885,18 +4303,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4885 | |.else | 4303 | |.else |
4886 | | // Convert number to int and back and compare. | 4304 | | // Convert number to int and back and compare. |
4887 | | checknum RC, >5 | 4305 | | checknum RC, >5 |
4888 | |.if SSE | ||
4889 | | movsd xmm0, qword [BASE+RC*8] | 4306 | | movsd xmm0, qword [BASE+RC*8] |
4890 | | cvtsd2si RC, xmm0 | 4307 | | cvttsd2si RC, xmm0 |
4891 | | cvtsi2sd xmm1, RC | 4308 | | cvtsi2sd xmm1, RC |
4892 | | ucomisd xmm0, xmm1 | 4309 | | ucomisd xmm0, xmm1 |
4893 | |.else | ||
4894 | | fld qword [BASE+RC*8] | ||
4895 | | fist ARG1 | ||
4896 | | fild ARG1 | ||
4897 | | fcomparepp | ||
4898 | | mov RC, ARG1 | ||
4899 | |.endif | ||
4900 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. | 4310 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. |
4901 | |.endif | 4311 | |.endif |
4902 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. | 4312 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. |
@@ -4942,7 +4352,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4942 | | mov TAB:RB, [BASE+RB*8] | 4352 | | mov TAB:RB, [BASE+RB*8] |
4943 | |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. | 4353 | |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. |
4944 | | mov RA, TAB:RB->hmask | 4354 | | mov RA, TAB:RB->hmask |
4945 | | and RA, STR:RC->hash | 4355 | | and RA, STR:RC->sid |
4946 | | imul RA, #NODE | 4356 | | imul RA, #NODE |
4947 | | add NODE:RA, TAB:RB->node | 4357 | | add NODE:RA, TAB:RB->node |
4948 | |1: | 4358 | |1: |
@@ -5020,6 +4430,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5020 | | mov dword [BASE+RA*8+4], LJ_TNIL | 4430 | | mov dword [BASE+RA*8+4], LJ_TNIL |
5021 | | jmp <1 | 4431 | | jmp <1 |
5022 | break; | 4432 | break; |
4433 | case BC_TGETR: | ||
4434 | | ins_ABC // RA = dst, RB = table, RC = key | ||
4435 | | mov TAB:RB, [BASE+RB*8] | ||
4436 | |.if DUALNUM | ||
4437 | | mov RC, dword [BASE+RC*8] | ||
4438 | |.else | ||
4439 | | cvttsd2si RC, qword [BASE+RC*8] | ||
4440 | |.endif | ||
4441 | | cmp RC, TAB:RB->asize | ||
4442 | | jae ->vmeta_tgetr // Not in array part? Use fallback. | ||
4443 | | shl RC, 3 | ||
4444 | | add RC, TAB:RB->array | ||
4445 | | // Get array slot. | ||
4446 | |->BC_TGETR_Z: | ||
4447 | |.if X64 | ||
4448 | | mov RBa, [RC] | ||
4449 | | mov [BASE+RA*8], RBa | ||
4450 | |.else | ||
4451 | | mov RB, [RC] | ||
4452 | | mov RC, [RC+4] | ||
4453 | | mov [BASE+RA*8], RB | ||
4454 | | mov [BASE+RA*8+4], RC | ||
4455 | |.endif | ||
4456 | |->BC_TGETR2_Z: | ||
4457 | | ins_next | ||
4458 | break; | ||
5023 | 4459 | ||
5024 | case BC_TSETV: | 4460 | case BC_TSETV: |
5025 | | ins_ABC // RA = src, RB = table, RC = key | 4461 | | ins_ABC // RA = src, RB = table, RC = key |
@@ -5033,18 +4469,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5033 | |.else | 4469 | |.else |
5034 | | // Convert number to int and back and compare. | 4470 | | // Convert number to int and back and compare. |
5035 | | checknum RC, >5 | 4471 | | checknum RC, >5 |
5036 | |.if SSE | ||
5037 | | movsd xmm0, qword [BASE+RC*8] | 4472 | | movsd xmm0, qword [BASE+RC*8] |
5038 | | cvtsd2si RC, xmm0 | 4473 | | cvttsd2si RC, xmm0 |
5039 | | cvtsi2sd xmm1, RC | 4474 | | cvtsi2sd xmm1, RC |
5040 | | ucomisd xmm0, xmm1 | 4475 | | ucomisd xmm0, xmm1 |
5041 | |.else | ||
5042 | | fld qword [BASE+RC*8] | ||
5043 | | fist ARG1 | ||
5044 | | fild ARG1 | ||
5045 | | fcomparepp | ||
5046 | | mov RC, ARG1 | ||
5047 | |.endif | ||
5048 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. | 4476 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. |
5049 | |.endif | 4477 | |.endif |
5050 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. | 4478 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. |
@@ -5095,7 +4523,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5095 | | mov TAB:RB, [BASE+RB*8] | 4523 | | mov TAB:RB, [BASE+RB*8] |
5096 | |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. | 4524 | |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. |
5097 | | mov RA, TAB:RB->hmask | 4525 | | mov RA, TAB:RB->hmask |
5098 | | and RA, STR:RC->hash | 4526 | | and RA, STR:RC->sid |
5099 | | imul RA, #NODE | 4527 | | imul RA, #NODE |
5100 | | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. | 4528 | | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. |
5101 | | add NODE:RA, TAB:RB->node | 4529 | | add NODE:RA, TAB:RB->node |
@@ -5214,6 +4642,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5214 | | movzx RA, PC_RA // Restore RA. | 4642 | | movzx RA, PC_RA // Restore RA. |
5215 | | jmp <2 | 4643 | | jmp <2 |
5216 | break; | 4644 | break; |
4645 | case BC_TSETR: | ||
4646 | | ins_ABC // RA = src, RB = table, RC = key | ||
4647 | | mov TAB:RB, [BASE+RB*8] | ||
4648 | |.if DUALNUM | ||
4649 | | mov RC, dword [BASE+RC*8] | ||
4650 | |.else | ||
4651 | | cvttsd2si RC, qword [BASE+RC*8] | ||
4652 | |.endif | ||
4653 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
4654 | | jnz >7 | ||
4655 | |2: | ||
4656 | | cmp RC, TAB:RB->asize | ||
4657 | | jae ->vmeta_tsetr | ||
4658 | | shl RC, 3 | ||
4659 | | add RC, TAB:RB->array | ||
4660 | | // Set array slot. | ||
4661 | |->BC_TSETR_Z: | ||
4662 | |.if X64 | ||
4663 | | mov RBa, [BASE+RA*8] | ||
4664 | | mov [RC], RBa | ||
4665 | |.else | ||
4666 | | mov RB, [BASE+RA*8+4] | ||
4667 | | mov RA, [BASE+RA*8] | ||
4668 | | mov [RC+4], RB | ||
4669 | | mov [RC], RA | ||
4670 | |.endif | ||
4671 | | ins_next | ||
4672 | | | ||
4673 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
4674 | | barrierback TAB:RB, RA | ||
4675 | | movzx RA, PC_RA // Restore RA. | ||
4676 | | jmp <2 | ||
4677 | break; | ||
5217 | 4678 | ||
5218 | case BC_TSETM: | 4679 | case BC_TSETM: |
5219 | | ins_AD // RA = base (table at base-1), RD = num const (start index) | 4680 | | ins_AD // RA = base (table at base-1), RD = num const (start index) |
@@ -5390,10 +4851,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5390 | break; | 4851 | break; |
5391 | 4852 | ||
5392 | case BC_ITERN: | 4853 | case BC_ITERN: |
5393 | | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
5394 | |.if JIT | 4854 | |.if JIT |
5395 | | // NYI: add hotloop, record BC_ITERN. | 4855 | | hotloop RB |
5396 | |.endif | 4856 | |.endif |
4857 | |->vm_IITERN: | ||
4858 | | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
5397 | | mov TMP1, KBASE // Need two more free registers. | 4859 | | mov TMP1, KBASE // Need two more free registers. |
5398 | | mov TMP2, DISPATCH | 4860 | | mov TMP2, DISPATCH |
5399 | | mov TAB:RB, [BASE+RA*8-16] | 4861 | | mov TAB:RB, [BASE+RA*8-16] |
@@ -5407,10 +4869,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5407 | |.if DUALNUM | 4869 | |.if DUALNUM |
5408 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 4870 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
5409 | | mov dword [BASE+RA*8], RC | 4871 | | mov dword [BASE+RA*8], RC |
5410 | |.elif SSE | ||
5411 | | cvtsi2sd xmm0, RC | ||
5412 | |.else | 4872 | |.else |
5413 | | fild dword [BASE+RA*8-8] | 4873 | | cvtsi2sd xmm0, RC |
5414 | |.endif | 4874 | |.endif |
5415 | | // Copy array slot to returned value. | 4875 | | // Copy array slot to returned value. |
5416 | |.if X64 | 4876 | |.if X64 |
@@ -5426,10 +4886,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5426 | | // Return array index as a numeric key. | 4886 | | // Return array index as a numeric key. |
5427 | |.if DUALNUM | 4887 | |.if DUALNUM |
5428 | | // See above. | 4888 | | // See above. |
5429 | |.elif SSE | ||
5430 | | movsd qword [BASE+RA*8], xmm0 | ||
5431 | |.else | 4889 | |.else |
5432 | | fstp qword [BASE+RA*8] | 4890 | | movsd qword [BASE+RA*8], xmm0 |
5433 | |.endif | 4891 | |.endif |
5434 | | mov [BASE+RA*8-8], RC // Update control var. | 4892 | | mov [BASE+RA*8-8], RC // Update control var. |
5435 | |2: | 4893 | |2: |
@@ -5442,9 +4900,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5442 | | | 4900 | | |
5443 | |4: // Skip holes in array part. | 4901 | |4: // Skip holes in array part. |
5444 | | add RC, 1 | 4902 | | add RC, 1 |
5445 | |.if not (DUALNUM or SSE) | ||
5446 | | mov [BASE+RA*8-8], RC | ||
5447 | |.endif | ||
5448 | | jmp <1 | 4903 | | jmp <1 |
5449 | | | 4904 | | |
5450 | |5: // Traverse hash part. | 4905 | |5: // Traverse hash part. |
@@ -5488,14 +4943,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5488 | | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 | 4943 | | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 |
5489 | | branchPC RD | 4944 | | branchPC RD |
5490 | | mov dword [BASE+RA*8-8], 0 // Initialize control var. | 4945 | | mov dword [BASE+RA*8-8], 0 // Initialize control var. |
5491 | | mov dword [BASE+RA*8-4], 0xfffe7fff | 4946 | | mov dword [BASE+RA*8-4], LJ_KEYINDEX |
5492 | |1: | 4947 | |1: |
5493 | | ins_next | 4948 | | ins_next |
5494 | |5: // Despecialize bytecode if any of the checks fail. | 4949 | |5: // Despecialize bytecode if any of the checks fail. |
5495 | | mov PC_OP, BC_JMP | 4950 | | mov PC_OP, BC_JMP |
5496 | | branchPC RD | 4951 | | branchPC RD |
4952 | |.if JIT | ||
4953 | | cmp byte [PC], BC_ITERN | ||
4954 | | jne >6 | ||
4955 | |.endif | ||
5497 | | mov byte [PC], BC_ITERC | 4956 | | mov byte [PC], BC_ITERC |
5498 | | jmp <1 | 4957 | | jmp <1 |
4958 | |.if JIT | ||
4959 | |6: // Unpatch JLOOP. | ||
4960 | | mov RA, [DISPATCH+DISPATCH_J(trace)] | ||
4961 | | movzx RC, word [PC+2] | ||
4962 | | mov TRACE:RA, [RA+RC*4] | ||
4963 | | mov eax, TRACE:RA->startins | ||
4964 | | mov al, BC_ITERC | ||
4965 | | mov dword [PC], eax | ||
4966 | | jmp <1 | ||
4967 | |.endif | ||
5499 | break; | 4968 | break; |
5500 | 4969 | ||
5501 | case BC_VARG: | 4970 | case BC_VARG: |
@@ -5778,7 +5247,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5778 | if (!vk) { | 5247 | if (!vk) { |
5779 | | cmp RB, LJ_TISNUM; jae ->vmeta_for | 5248 | | cmp RB, LJ_TISNUM; jae ->vmeta_for |
5780 | } | 5249 | } |
5781 | |.if SSE | ||
5782 | | movsd xmm0, qword FOR_IDX | 5250 | | movsd xmm0, qword FOR_IDX |
5783 | | movsd xmm1, qword FOR_STOP | 5251 | | movsd xmm1, qword FOR_STOP |
5784 | if (vk) { | 5252 | if (vk) { |
@@ -5791,22 +5259,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5791 | | ucomisd xmm1, xmm0 | 5259 | | ucomisd xmm1, xmm0 |
5792 | |1: | 5260 | |1: |
5793 | | movsd qword FOR_EXT, xmm0 | 5261 | | movsd qword FOR_EXT, xmm0 |
5794 | |.else | ||
5795 | | fld qword FOR_STOP | ||
5796 | | fld qword FOR_IDX | ||
5797 | if (vk) { | ||
5798 | | fadd qword FOR_STEP // nidx = idx + step | ||
5799 | | fst qword FOR_IDX | ||
5800 | | fst qword FOR_EXT | ||
5801 | | test RB, RB; js >1 | ||
5802 | } else { | ||
5803 | | fst qword FOR_EXT | ||
5804 | | jl >1 | ||
5805 | } | ||
5806 | | fxch // Swap lim/(n)idx if step non-negative. | ||
5807 | |1: | ||
5808 | | fcomparepp | ||
5809 | |.endif | ||
5810 | if (op == BC_FORI) { | 5262 | if (op == BC_FORI) { |
5811 | |.if DUALNUM | 5263 | |.if DUALNUM |
5812 | | jnb <7 | 5264 | | jnb <7 |
@@ -5834,11 +5286,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5834 | |2: | 5286 | |2: |
5835 | | ins_next | 5287 | | ins_next |
5836 | |.endif | 5288 | |.endif |
5837 | |.if SSE | 5289 | | |
5838 | |3: // Invert comparison if step is negative. | 5290 | |3: // Invert comparison if step is negative. |
5839 | | ucomisd xmm0, xmm1 | 5291 | | ucomisd xmm0, xmm1 |
5840 | | jmp <1 | 5292 | | jmp <1 |
5841 | |.endif | ||
5842 | break; | 5293 | break; |
5843 | 5294 | ||
5844 | case BC_ITERL: | 5295 | case BC_ITERL: |
@@ -5876,7 +5327,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5876 | | ins_A // RA = base, RD = target (loop extent) | 5327 | | ins_A // RA = base, RD = target (loop extent) |
5877 | | // Note: RA/RD is only used by trace recorder to determine scope/extent | 5328 | | // Note: RA/RD is only used by trace recorder to determine scope/extent |
5878 | | // This opcode does NOT jump, it's only purpose is to detect a hot loop. | 5329 | | // This opcode does NOT jump, it's only purpose is to detect a hot loop. |
5879 | |.if JIT | 5330 | |.if JIT |
5880 | | hotloop RB | 5331 | | hotloop RB |
5881 | |.endif | 5332 | |.endif |
5882 | | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. | 5333 | | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. |
@@ -5895,7 +5346,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5895 | | mov RDa, TRACE:RD->mcode | 5346 | | mov RDa, TRACE:RD->mcode |
5896 | | mov L:RB, SAVE_L | 5347 | | mov L:RB, SAVE_L |
5897 | | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE | 5348 | | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE |
5898 | | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB | 5349 | | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB |
5899 | | // Save additional callee-save registers only used in compiled code. | 5350 | | // Save additional callee-save registers only used in compiled code. |
5900 | |.if X64WIN | 5351 | |.if X64WIN |
5901 | | mov TMPQ, r12 | 5352 | | mov TMPQ, r12 |
@@ -6062,9 +5513,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
6062 | | // (lua_State *L, lua_CFunction f) | 5513 | | // (lua_State *L, lua_CFunction f) |
6063 | | call aword [DISPATCH+DISPATCH_GL(wrapf)] | 5514 | | call aword [DISPATCH+DISPATCH_GL(wrapf)] |
6064 | } | 5515 | } |
6065 | | set_vmstate INTERP | ||
6066 | | // nresults returned in eax (RD). | 5516 | | // nresults returned in eax (RD). |
6067 | | mov BASE, L:RB->base | 5517 | | mov BASE, L:RB->base |
5518 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
5519 | | set_vmstate INTERP | ||
6068 | | lea RA, [BASE+RD*8] | 5520 | | lea RA, [BASE+RD*8] |
6069 | | neg RA | 5521 | | neg RA |
6070 | | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 | 5522 | | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 |
@@ -6177,7 +5629,7 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
6177 | ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); | 5629 | ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); |
6178 | #endif | 5630 | #endif |
6179 | #if !LJ_NO_UNWIND | 5631 | #if !LJ_NO_UNWIND |
6180 | #if (defined(__sun__) && defined(__svr4__)) | 5632 | #if LJ_TARGET_SOLARIS |
6181 | #if LJ_64 | 5633 | #if LJ_64 |
6182 | fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); | 5634 | fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); |
6183 | #else | 5635 | #else |
@@ -6384,15 +5836,21 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
6384 | "LEFDEY:\n\n", fcsize); | 5836 | "LEFDEY:\n\n", fcsize); |
6385 | } | 5837 | } |
6386 | #endif | 5838 | #endif |
6387 | #if LJ_64 | 5839 | #if !LJ_64 |
6388 | fprintf(ctx->fp, "\t.subsections_via_symbols\n"); | ||
6389 | #else | ||
6390 | fprintf(ctx->fp, | 5840 | fprintf(ctx->fp, |
6391 | "\t.non_lazy_symbol_pointer\n" | 5841 | "\t.non_lazy_symbol_pointer\n" |
6392 | "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" | 5842 | "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" |
6393 | ".indirect_symbol _lj_err_unwind_dwarf\n" | 5843 | ".indirect_symbol _lj_err_unwind_dwarf\n" |
6394 | ".long 0\n"); | 5844 | ".long 0\n\n"); |
5845 | fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n"); | ||
5846 | { | ||
5847 | const char *const *xn; | ||
5848 | for (xn = ctx->extnames; *xn; xn++) | ||
5849 | if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) | ||
5850 | fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn); | ||
5851 | } | ||
6395 | #endif | 5852 | #endif |
5853 | fprintf(ctx->fp, ".subsections_via_symbols\n"); | ||
6396 | } | 5854 | } |
6397 | break; | 5855 | break; |
6398 | #endif | 5856 | #endif |
diff --git a/src/xb1build.bat b/src/xb1build.bat new file mode 100644 index 00000000..2eb68171 --- /dev/null +++ b/src/xb1build.bat | |||
@@ -0,0 +1,101 @@ | |||
1 | @rem Script to build LuaJIT with the Xbox One SDK. | ||
2 | @rem Donated to the public domain. | ||
3 | @rem | ||
4 | @rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler) | ||
5 | @rem Then cd to this directory and run this script. | ||
6 | |||
7 | @if not defined INCLUDE goto :FAIL | ||
8 | @if not defined DurangoXDK goto :FAIL | ||
9 | |||
10 | @setlocal | ||
11 | @echo ---- Host compiler ---- | ||
12 | @set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE | ||
13 | @set LJLINK=link /nologo | ||
14 | @set LJMT=mt /nologo | ||
15 | @set DASMDIR=..\dynasm | ||
16 | @set DASM=%DASMDIR%\dynasm.lua | ||
17 | @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c | ||
18 | |||
19 | %LJCOMPILE% host\minilua.c | ||
20 | @if errorlevel 1 goto :BAD | ||
21 | %LJLINK% /out:minilua.exe minilua.obj | ||
22 | @if errorlevel 1 goto :BAD | ||
23 | if exist minilua.exe.manifest^ | ||
24 | %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe | ||
25 | |||
26 | @rem Error out for 64 bit host compiler | ||
27 | @minilua | ||
28 | @if not errorlevel 8 goto :FAIL | ||
29 | |||
30 | @set DASMFLAGS=-D WIN -D FFI -D P64 | ||
31 | minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x64.dasc | ||
32 | @if errorlevel 1 goto :BAD | ||
33 | |||
34 | %LJCOMPILE% /I "." /I %DASMDIR% /D_DURANGO host\buildvm*.c | ||
35 | @if errorlevel 1 goto :BAD | ||
36 | %LJLINK% /out:buildvm.exe buildvm*.obj | ||
37 | @if errorlevel 1 goto :BAD | ||
38 | if exist buildvm.exe.manifest^ | ||
39 | %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe | ||
40 | |||
41 | buildvm -m peobj -o lj_vm.obj | ||
42 | @if errorlevel 1 goto :BAD | ||
43 | buildvm -m bcdef -o lj_bcdef.h %ALL_LIB% | ||
44 | @if errorlevel 1 goto :BAD | ||
45 | buildvm -m ffdef -o lj_ffdef.h %ALL_LIB% | ||
46 | @if errorlevel 1 goto :BAD | ||
47 | buildvm -m libdef -o lj_libdef.h %ALL_LIB% | ||
48 | @if errorlevel 1 goto :BAD | ||
49 | buildvm -m recdef -o lj_recdef.h %ALL_LIB% | ||
50 | @if errorlevel 1 goto :BAD | ||
51 | buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB% | ||
52 | @if errorlevel 1 goto :BAD | ||
53 | buildvm -m folddef -o lj_folddef.h lj_opt_fold.c | ||
54 | @if errorlevel 1 goto :BAD | ||
55 | |||
56 | @echo ---- Cross compiler ---- | ||
57 | |||
58 | @set CWD=%cd% | ||
59 | @call "%DurangoXDK%\xdk\DurangoVars.cmd" XDK | ||
60 | @cd /D "%CWD%" | ||
61 | @shift | ||
62 | |||
63 | @set LJCOMPILE="cl" /nologo /c /W3 /GF /Gm- /GR- /GS- /Gy /openmp- /D_CRT_SECURE_NO_DEPRECATE /D_LIB /D_UNICODE /D_DURANGO | ||
64 | @set LJLIB="lib" /nologo | ||
65 | |||
66 | @if "%1"=="debug" ( | ||
67 | @shift | ||
68 | @set LJCOMPILE=%LJCOMPILE% /Zi /MDd /Od | ||
69 | @set LJLINK=%LJLINK% /debug | ||
70 | ) else ( | ||
71 | @set LJCOMPILE=%LJCOMPILE% /MD /O2 /DNDEBUG | ||
72 | ) | ||
73 | |||
74 | @if "%1"=="amalg" goto :AMALG | ||
75 | %LJCOMPILE% /DLUA_BUILD_AS_DLL lj_*.c lib_*.c | ||
76 | @if errorlevel 1 goto :BAD | ||
77 | %LJLIB% /OUT:luajit.lib lj_*.obj lib_*.obj | ||
78 | @if errorlevel 1 goto :BAD | ||
79 | @goto :NOAMALG | ||
80 | :AMALG | ||
81 | %LJCOMPILE% /DLUA_BUILD_AS_DLL ljamalg.c | ||
82 | @if errorlevel 1 goto :BAD | ||
83 | %LJLIB% /OUT:luajit.lib ljamalg.obj lj_vm.obj | ||
84 | @if errorlevel 1 goto :BAD | ||
85 | :NOAMALG | ||
86 | |||
87 | @del *.obj *.manifest minilua.exe buildvm.exe | ||
88 | @echo. | ||
89 | @echo === Successfully built LuaJIT for Xbox One === | ||
90 | |||
91 | @goto :END | ||
92 | :BAD | ||
93 | @echo. | ||
94 | @echo ******************************************************* | ||
95 | @echo *** Build FAILED -- Please check the error messages *** | ||
96 | @echo ******************************************************* | ||
97 | @goto :END | ||
98 | :FAIL | ||
99 | @echo To run this script you must open a "Visual Studio .NET Command Prompt" | ||
100 | @echo (64 bit host compiler). The Xbox One SDK must be installed, too. | ||
101 | :END | ||
diff --git a/src/xedkbuild.bat b/src/xedkbuild.bat index 240ec878..37322d03 100644 --- a/src/xedkbuild.bat +++ b/src/xedkbuild.bat | |||
@@ -14,7 +14,7 @@ | |||
14 | @set LJMT=mt /nologo | 14 | @set LJMT=mt /nologo |
15 | @set DASMDIR=..\dynasm | 15 | @set DASMDIR=..\dynasm |
16 | @set DASM=%DASMDIR%\dynasm.lua | 16 | @set DASM=%DASMDIR%\dynasm.lua |
17 | @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c | 17 | @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c |
18 | 18 | ||
19 | %LJCOMPILE% host\minilua.c | 19 | %LJCOMPILE% host\minilua.c |
20 | @if errorlevel 1 goto :BAD | 20 | @if errorlevel 1 goto :BAD |