diff options
| author | Mike Pall <mike> | 2013-02-21 16:56:59 +0100 |
|---|---|---|
| committer | Mike Pall <mike> | 2013-02-21 16:56:59 +0100 |
| commit | 57768cd5882eb8d39c673d9dd8598946ef7c1843 (patch) | |
| tree | f3f5663e8fb76b965e704aca33347d72cbfa3532 /src | |
| parent | 61fb587d2c1646cae4c90990b9c4c1f1bff09e5b (diff) | |
| download | luajit-57768cd5882eb8d39c673d9dd8598946ef7c1843.tar.gz luajit-57768cd5882eb8d39c673d9dd8598946ef7c1843.tar.bz2 luajit-57768cd5882eb8d39c673d9dd8598946ef7c1843.zip | |
x86: Remove x87 support from interpreter.
SSE2 required from now on.
Diffstat (limited to 'src')
| -rw-r--r-- | src/Makefile | 11 | ||||
| -rw-r--r-- | src/lib_jit.c | 22 | ||||
| -rw-r--r-- | src/lj_asm.c | 2 | ||||
| -rw-r--r-- | src/lj_jit.h | 18 | ||||
| -rw-r--r-- | src/lj_vm.h | 4 | ||||
| -rw-r--r-- | src/msvcbuild.bat | 1 | ||||
| -rw-r--r-- | src/vm_x86.dasc | 687 |
7 files changed, 100 insertions, 645 deletions
diff --git a/src/Makefile b/src/Makefile index 278324a1..4ea8c85e 100644 --- a/src/Makefile +++ b/src/Makefile | |||
| @@ -42,13 +42,10 @@ CCOPT= -O2 -fomit-frame-pointer | |||
| 42 | # | 42 | # |
| 43 | # Target-specific compiler options: | 43 | # Target-specific compiler options: |
| 44 | # | 44 | # |
| 45 | # x86 only: it's recommended to compile at least for i686. Better yet, | ||
| 46 | # compile for an architecture that has SSE2, too (-msse -msse2). | ||
| 47 | # | ||
| 48 | # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute | 45 | # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute |
| 49 | # the binaries to a different machine you could also use: -march=native | 46 | # the binaries to a different machine you could also use: -march=native |
| 50 | # | 47 | # |
| 51 | CCOPT_x86= -march=i686 | 48 | CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse |
| 52 | CCOPT_x64= | 49 | CCOPT_x64= |
| 53 | CCOPT_arm= | 50 | CCOPT_arm= |
| 54 | CCOPT_ppc= | 51 | CCOPT_ppc= |
| @@ -394,11 +391,6 @@ DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subs | |||
| 394 | ifeq (Windows,$(TARGET_SYS)) | 391 | ifeq (Windows,$(TARGET_SYS)) |
| 395 | DASM_AFLAGS+= -D WIN | 392 | DASM_AFLAGS+= -D WIN |
| 396 | endif | 393 | endif |
| 397 | ifeq (x86,$(TARGET_LJARCH)) | ||
| 398 | ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH))) | ||
| 399 | DASM_AFLAGS+= -D SSE | ||
| 400 | endif | ||
| 401 | else | ||
| 402 | ifeq (x64,$(TARGET_LJARCH)) | 394 | ifeq (x64,$(TARGET_LJARCH)) |
| 403 | DASM_ARCH= x86 | 395 | DASM_ARCH= x86 |
| 404 | else | 396 | else |
| @@ -423,7 +415,6 @@ ifeq (ppc,$(TARGET_LJARCH)) | |||
| 423 | endif | 415 | endif |
| 424 | endif | 416 | endif |
| 425 | endif | 417 | endif |
| 426 | endif | ||
| 427 | 418 | ||
| 428 | DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) | 419 | DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) |
| 429 | DASM_DASC= vm_$(DASM_ARCH).dasc | 420 | DASM_DASC= vm_$(DASM_ARCH).dasc |
diff --git a/src/lib_jit.c b/src/lib_jit.c index 82e68258..1b69caa5 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c | |||
| @@ -538,18 +538,14 @@ static uint32_t jit_cpudetect(lua_State *L) | |||
| 538 | uint32_t features[4]; | 538 | uint32_t features[4]; |
| 539 | if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { | 539 | if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { |
| 540 | #if !LJ_HASJIT | 540 | #if !LJ_HASJIT |
| 541 | #define JIT_F_CMOV 1 | ||
| 542 | #define JIT_F_SSE2 2 | 541 | #define JIT_F_SSE2 2 |
| 543 | #endif | 542 | #endif |
| 544 | flags |= ((features[3] >> 15)&1) * JIT_F_CMOV; | ||
| 545 | flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; | 543 | flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; |
| 546 | #if LJ_HASJIT | 544 | #if LJ_HASJIT |
| 547 | flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; | 545 | flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; |
| 548 | flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; | 546 | flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; |
| 549 | if (vendor[2] == 0x6c65746e) { /* Intel. */ | 547 | if (vendor[2] == 0x6c65746e) { /* Intel. */ |
| 550 | if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */ | 548 | if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ |
| 551 | flags |= JIT_F_P4; /* Currently unused. */ | ||
| 552 | else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ | ||
| 553 | flags |= JIT_F_LEA_AGU; | 549 | flags |= JIT_F_LEA_AGU; |
| 554 | } else if (vendor[2] == 0x444d4163) { /* AMD. */ | 550 | } else if (vendor[2] == 0x444d4163) { /* AMD. */ |
| 555 | uint32_t fam = (features[0] & 0x0ff00f00); | 551 | uint32_t fam = (features[0] & 0x0ff00f00); |
| @@ -562,14 +558,8 @@ static uint32_t jit_cpudetect(lua_State *L) | |||
| 562 | } | 558 | } |
| 563 | /* Check for required instruction set support on x86 (unnecessary on x64). */ | 559 | /* Check for required instruction set support on x86 (unnecessary on x64). */ |
| 564 | #if LJ_TARGET_X86 | 560 | #if LJ_TARGET_X86 |
| 565 | #if !defined(LUAJIT_CPU_NOCMOV) | ||
| 566 | if (!(flags & JIT_F_CMOV)) | ||
| 567 | luaL_error(L, "CPU not supported"); | ||
| 568 | #endif | ||
| 569 | #if defined(LUAJIT_CPU_SSE2) | ||
| 570 | if (!(flags & JIT_F_SSE2)) | 561 | if (!(flags & JIT_F_SSE2)) |
| 571 | luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)"); | 562 | luaL_error(L, "CPU with SSE2 required"); |
| 572 | #endif | ||
| 573 | #endif | 563 | #endif |
| 574 | #elif LJ_TARGET_ARM | 564 | #elif LJ_TARGET_ARM |
| 575 | #if LJ_HASJIT | 565 | #if LJ_HASJIT |
| @@ -631,11 +621,7 @@ static void jit_init(lua_State *L) | |||
| 631 | uint32_t flags = jit_cpudetect(L); | 621 | uint32_t flags = jit_cpudetect(L); |
| 632 | #if LJ_HASJIT | 622 | #if LJ_HASJIT |
| 633 | jit_State *J = L2J(L); | 623 | jit_State *J = L2J(L); |
| 634 | #if LJ_TARGET_X86 | 624 | J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; |
| 635 | /* Silently turn off the JIT compiler on CPUs without SSE2. */ | ||
| 636 | if ((flags & JIT_F_SSE2)) | ||
| 637 | #endif | ||
| 638 | J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; | ||
| 639 | memcpy(J->param, jit_param_default, sizeof(J->param)); | 625 | memcpy(J->param, jit_param_default, sizeof(J->param)); |
| 640 | lj_dispatch_update(G(L)); | 626 | lj_dispatch_update(G(L)); |
| 641 | #else | 627 | #else |
| @@ -645,6 +631,7 @@ static void jit_init(lua_State *L) | |||
| 645 | 631 | ||
| 646 | LUALIB_API int luaopen_jit(lua_State *L) | 632 | LUALIB_API int luaopen_jit(lua_State *L) |
| 647 | { | 633 | { |
| 634 | jit_init(L); | ||
| 648 | lua_pushliteral(L, LJ_OS_NAME); | 635 | lua_pushliteral(L, LJ_OS_NAME); |
| 649 | lua_pushliteral(L, LJ_ARCH_NAME); | 636 | lua_pushliteral(L, LJ_ARCH_NAME); |
| 650 | lua_pushinteger(L, LUAJIT_VERSION_NUM); | 637 | lua_pushinteger(L, LUAJIT_VERSION_NUM); |
| @@ -657,7 +644,6 @@ LUALIB_API int luaopen_jit(lua_State *L) | |||
| 657 | LJ_LIB_REG(L, "jit.opt", jit_opt); | 644 | LJ_LIB_REG(L, "jit.opt", jit_opt); |
| 658 | #endif | 645 | #endif |
| 659 | L->top -= 2; | 646 | L->top -= 2; |
| 660 | jit_init(L); | ||
| 661 | return 1; | 647 | return 1; |
| 662 | } | 648 | } |
| 663 | 649 | ||
diff --git a/src/lj_asm.c b/src/lj_asm.c index c7365404..a01b4e52 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -1730,7 +1730,7 @@ static void asm_setup_regsp(ASMState *as) | |||
| 1730 | break; | 1730 | break; |
| 1731 | case IR_FPMATH: | 1731 | case IR_FPMATH: |
| 1732 | #if LJ_TARGET_X86ORX64 | 1732 | #if LJ_TARGET_X86ORX64 |
| 1733 | if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ | 1733 | if (ir->op2 == IRFPM_EXP2) { /* May be joined to pow. */ |
| 1734 | ir->prev = REGSP_HINT(RID_XMM0); | 1734 | ir->prev = REGSP_HINT(RID_XMM0); |
| 1735 | #if !LJ_64 | 1735 | #if !LJ_64 |
| 1736 | if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ | 1736 | if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ |
diff --git a/src/lj_jit.h b/src/lj_jit.h index c0b1c41e..8b42dd4e 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
| @@ -14,18 +14,16 @@ | |||
| 14 | 14 | ||
| 15 | /* CPU-specific JIT engine flags. */ | 15 | /* CPU-specific JIT engine flags. */ |
| 16 | #if LJ_TARGET_X86ORX64 | 16 | #if LJ_TARGET_X86ORX64 |
| 17 | #define JIT_F_CMOV 0x00000010 | 17 | #define JIT_F_SSE2 0x00000010 |
| 18 | #define JIT_F_SSE2 0x00000020 | 18 | #define JIT_F_SSE3 0x00000020 |
| 19 | #define JIT_F_SSE3 0x00000040 | 19 | #define JIT_F_SSE4_1 0x00000040 |
| 20 | #define JIT_F_SSE4_1 0x00000080 | 20 | #define JIT_F_PREFER_IMUL 0x00000080 |
| 21 | #define JIT_F_P4 0x00000100 | 21 | #define JIT_F_SPLIT_XMM 0x00000100 |
| 22 | #define JIT_F_PREFER_IMUL 0x00000200 | 22 | #define JIT_F_LEA_AGU 0x00000200 |
| 23 | #define JIT_F_SPLIT_XMM 0x00000400 | ||
| 24 | #define JIT_F_LEA_AGU 0x00000800 | ||
| 25 | 23 | ||
| 26 | /* Names for the CPU-specific flags. Must match the order above. */ | 24 | /* Names for the CPU-specific flags. Must match the order above. */ |
| 27 | #define JIT_F_CPU_FIRST JIT_F_CMOV | 25 | #define JIT_F_CPU_FIRST JIT_F_SSE2 |
| 28 | #define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM" | 26 | #define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\2K8\4ATOM" |
| 29 | #elif LJ_TARGET_ARM | 27 | #elif LJ_TARGET_ARM |
| 30 | #define JIT_F_ARMV6_ 0x00000010 | 28 | #define JIT_F_ARMV6_ 0x00000010 |
| 31 | #define JIT_F_ARMV6T2_ 0x00000020 | 29 | #define JIT_F_ARMV6T2_ 0x00000020 |
diff --git a/src/lj_vm.h b/src/lj_vm.h index c5d05de4..948d63c2 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h | |||
| @@ -49,12 +49,14 @@ LJ_ASMF void lj_vm_exit_handler(void); | |||
| 49 | LJ_ASMF void lj_vm_exit_interp(void); | 49 | LJ_ASMF void lj_vm_exit_interp(void); |
| 50 | 50 | ||
| 51 | /* Internal math helper functions. */ | 51 | /* Internal math helper functions. */ |
| 52 | #if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC | 52 | #if LJ_TARGET_PPC |
| 53 | #define lj_vm_floor floor | 53 | #define lj_vm_floor floor |
| 54 | #define lj_vm_ceil ceil | 54 | #define lj_vm_ceil ceil |
| 55 | #else | 55 | #else |
| 56 | LJ_ASMF double lj_vm_floor(double); | 56 | LJ_ASMF double lj_vm_floor(double); |
| 57 | #if !LJ_TARGET_X86ORX64 | ||
| 57 | LJ_ASMF double lj_vm_ceil(double); | 58 | LJ_ASMF double lj_vm_ceil(double); |
| 59 | #endif | ||
| 58 | #if LJ_TARGET_ARM | 60 | #if LJ_TARGET_ARM |
| 59 | LJ_ASMF double lj_vm_floor_sf(double); | 61 | LJ_ASMF double lj_vm_floor_sf(double); |
| 60 | LJ_ASMF double lj_vm_ceil_sf(double); | 62 | LJ_ASMF double lj_vm_ceil_sf(double); |
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat index 745c93ff..1d5bd55a 100644 --- a/src/msvcbuild.bat +++ b/src/msvcbuild.bat | |||
| @@ -35,6 +35,7 @@ if exist minilua.exe.manifest^ | |||
| 35 | @if errorlevel 8 goto :X64 | 35 | @if errorlevel 8 goto :X64 |
| 36 | @set DASMFLAGS=-D WIN -D JIT -D FFI | 36 | @set DASMFLAGS=-D WIN -D JIT -D FFI |
| 37 | @set LJARCH=x86 | 37 | @set LJARCH=x86 |
| 38 | @set LJCOMPILE=%LJCOMPILE% /arch:SSE2 | ||
| 38 | :X64 | 39 | :X64 |
| 39 | minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc | 40 | minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc |
| 40 | @if errorlevel 1 goto :BAD | 41 | @if errorlevel 1 goto :BAD |
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index b4674e2b..7020eb27 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | | | 18 | | |
| 19 | |.if P64 | 19 | |.if P64 |
| 20 | |.define X64, 1 | 20 | |.define X64, 1 |
| 21 | |.define SSE, 1 | ||
| 22 | |.if WIN | 21 | |.if WIN |
| 23 | |.define X64WIN, 1 | 22 | |.define X64WIN, 1 |
| 24 | |.endif | 23 | |.endif |
| @@ -856,13 +855,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 856 | |.if DUALNUM | 855 | |.if DUALNUM |
| 857 | | mov TMP2, LJ_TISNUM | 856 | | mov TMP2, LJ_TISNUM |
| 858 | | mov TMP1, RC | 857 | | mov TMP1, RC |
| 859 | |.elif SSE | 858 | |.else |
| 860 | | cvtsi2sd xmm0, RC | 859 | | cvtsi2sd xmm0, RC |
| 861 | | movsd TMPQ, xmm0 | 860 | | movsd TMPQ, xmm0 |
| 862 | |.else | ||
| 863 | | mov ARG4, RC | ||
| 864 | | fild ARG4 | ||
| 865 | | fstp TMPQ | ||
| 866 | |.endif | 861 | |.endif |
| 867 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. | 862 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. |
| 868 | | jmp >1 | 863 | | jmp >1 |
| @@ -935,13 +930,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 935 | |.if DUALNUM | 930 | |.if DUALNUM |
| 936 | | mov TMP2, LJ_TISNUM | 931 | | mov TMP2, LJ_TISNUM |
| 937 | | mov TMP1, RC | 932 | | mov TMP1, RC |
| 938 | |.elif SSE | 933 | |.else |
| 939 | | cvtsi2sd xmm0, RC | 934 | | cvtsi2sd xmm0, RC |
| 940 | | movsd TMPQ, xmm0 | 935 | | movsd TMPQ, xmm0 |
| 941 | |.else | ||
| 942 | | mov ARG4, RC | ||
| 943 | | fild ARG4 | ||
| 944 | | fstp TMPQ | ||
| 945 | |.endif | 936 | |.endif |
| 946 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. | 937 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. |
| 947 | | jmp >1 | 938 | | jmp >1 |
| @@ -1509,11 +1500,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1509 | |.else | 1500 | |.else |
| 1510 | | jae ->fff_fallback | 1501 | | jae ->fff_fallback |
| 1511 | |.endif | 1502 | |.endif |
| 1512 | |.if SSE | ||
| 1513 | | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 | 1503 | | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 |
| 1514 | |.else | ||
| 1515 | | fld qword [BASE]; jmp ->fff_resn | ||
| 1516 | |.endif | ||
| 1517 | | | 1504 | | |
| 1518 | |.ffunc_1 tostring | 1505 | |.ffunc_1 tostring |
| 1519 | | // Only handles the string or number case inline. | 1506 | | // Only handles the string or number case inline. |
| @@ -1631,19 +1618,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1631 | | add RD, 1 | 1618 | | add RD, 1 |
| 1632 | | mov dword [BASE-4], LJ_TISNUM | 1619 | | mov dword [BASE-4], LJ_TISNUM |
| 1633 | | mov dword [BASE-8], RD | 1620 | | mov dword [BASE-8], RD |
| 1634 | |.elif SSE | 1621 | |.else |
| 1635 | | movsd xmm0, qword [BASE+8] | 1622 | | movsd xmm0, qword [BASE+8] |
| 1636 | | sseconst_1 xmm1, RBa | 1623 | | sseconst_1 xmm1, RBa |
| 1637 | | addsd xmm0, xmm1 | 1624 | | addsd xmm0, xmm1 |
| 1638 | | cvtsd2si RD, xmm0 | 1625 | | cvtsd2si RD, xmm0 |
| 1639 | | movsd qword [BASE-8], xmm0 | 1626 | | movsd qword [BASE-8], xmm0 |
| 1640 | |.else | ||
| 1641 | | fld qword [BASE+8] | ||
| 1642 | | fld1 | ||
| 1643 | | faddp st1 | ||
| 1644 | | fist ARG1 | ||
| 1645 | | fstp qword [BASE-8] | ||
| 1646 | | mov RD, ARG1 | ||
| 1647 | |.endif | 1627 | |.endif |
| 1648 | | mov TAB:RB, [BASE] | 1628 | | mov TAB:RB, [BASE] |
| 1649 | | cmp RD, TAB:RB->asize; jae >2 // Not in array part? | 1629 | | cmp RD, TAB:RB->asize; jae >2 // Not in array part? |
| @@ -1690,12 +1670,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1690 | |.if DUALNUM | 1670 | |.if DUALNUM |
| 1691 | | mov dword [BASE+12], LJ_TISNUM | 1671 | | mov dword [BASE+12], LJ_TISNUM |
| 1692 | | mov dword [BASE+8], 0 | 1672 | | mov dword [BASE+8], 0 |
| 1693 | |.elif SSE | 1673 | |.else |
| 1694 | | xorps xmm0, xmm0 | 1674 | | xorps xmm0, xmm0 |
| 1695 | | movsd qword [BASE+8], xmm0 | 1675 | | movsd qword [BASE+8], xmm0 |
| 1696 | |.else | ||
| 1697 | | fldz | ||
| 1698 | | fstp qword [BASE+8] | ||
| 1699 | |.endif | 1676 | |.endif |
| 1700 | | mov RD, 1+3 | 1677 | | mov RD, 1+3 |
| 1701 | | jmp ->fff_res | 1678 | | jmp ->fff_res |
| @@ -1925,12 +1902,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1925 | |->fff_resi: // Dummy. | 1902 | |->fff_resi: // Dummy. |
| 1926 | |.endif | 1903 | |.endif |
| 1927 | | | 1904 | | |
| 1928 | |.if SSE | ||
| 1929 | |->fff_resn: | 1905 | |->fff_resn: |
| 1930 | | mov PC, [BASE-4] | 1906 | | mov PC, [BASE-4] |
| 1931 | | fstp qword [BASE-8] | 1907 | | fstp qword [BASE-8] |
| 1932 | | jmp ->fff_res1 | 1908 | | jmp ->fff_res1 |
| 1933 | |.endif | ||
| 1934 | | | 1909 | | |
| 1935 | | .ffunc_1 math_abs | 1910 | | .ffunc_1 math_abs |
| 1936 | |.if DUALNUM | 1911 | |.if DUALNUM |
| @@ -1954,8 +1929,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1954 | |.else | 1929 | |.else |
| 1955 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 1930 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
| 1956 | |.endif | 1931 | |.endif |
| 1957 | | | ||
| 1958 | |.if SSE | ||
| 1959 | | movsd xmm0, qword [BASE] | 1932 | | movsd xmm0, qword [BASE] |
| 1960 | | sseconst_abs xmm1, RDa | 1933 | | sseconst_abs xmm1, RDa |
| 1961 | | andps xmm0, xmm1 | 1934 | | andps xmm0, xmm1 |
| @@ -1963,15 +1936,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1963 | | mov PC, [BASE-4] | 1936 | | mov PC, [BASE-4] |
| 1964 | | movsd qword [BASE-8], xmm0 | 1937 | | movsd qword [BASE-8], xmm0 |
| 1965 | | // fallthrough | 1938 | | // fallthrough |
| 1966 | |.else | ||
| 1967 | | fld qword [BASE] | ||
| 1968 | | fabs | ||
| 1969 | | // fallthrough | ||
| 1970 | |->fff_resxmm0: // Dummy. | ||
| 1971 | |->fff_resn: | ||
| 1972 | | mov PC, [BASE-4] | ||
| 1973 | | fstp qword [BASE-8] | ||
| 1974 | |.endif | ||
| 1975 | | | 1939 | | |
| 1976 | |->fff_res1: | 1940 | |->fff_res1: |
| 1977 | | mov RD, 1+1 | 1941 | | mov RD, 1+1 |
| @@ -2008,48 +1972,24 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2008 | |.else | 1972 | |.else |
| 2009 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 1973 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
| 2010 | |.endif | 1974 | |.endif |
| 2011 | |.if SSE | ||
| 2012 | | movsd xmm0, qword [BASE] | 1975 | | movsd xmm0, qword [BASE] |
| 2013 | | call ->vm_ .. func | 1976 | | call ->vm_ .. func .. _sse |
| 2014 | | .if DUALNUM | 1977 | |.if DUALNUM |
| 2015 | | cvtsd2si RB, xmm0 | 1978 | | cvtsd2si RB, xmm0 |
| 2016 | | cmp RB, 0x80000000 | 1979 | | cmp RB, 0x80000000 |
| 2017 | | jne ->fff_resi | 1980 | | jne ->fff_resi |
| 2018 | | cvtsi2sd xmm1, RB | 1981 | | cvtsi2sd xmm1, RB |
| 2019 | | ucomisd xmm0, xmm1 | 1982 | | ucomisd xmm0, xmm1 |
| 2020 | | jp ->fff_resxmm0 | 1983 | | jp ->fff_resxmm0 |
| 2021 | | je ->fff_resi | 1984 | | je ->fff_resi |
| 2022 | | .endif | ||
| 2023 | | jmp ->fff_resxmm0 | ||
| 2024 | |.else | ||
| 2025 | | fld qword [BASE] | ||
| 2026 | | call ->vm_ .. func | ||
| 2027 | | .if DUALNUM | ||
| 2028 | | fist ARG1 | ||
| 2029 | | mov RB, ARG1 | ||
| 2030 | | cmp RB, 0x80000000; jne >2 | ||
| 2031 | | fdup | ||
| 2032 | | fild ARG1 | ||
| 2033 | | fcomparepp | ||
| 2034 | | jp ->fff_resn | ||
| 2035 | | jne ->fff_resn | ||
| 2036 | |2: | ||
| 2037 | | fpop | ||
| 2038 | | jmp ->fff_resi | ||
| 2039 | | .else | ||
| 2040 | | jmp ->fff_resn | ||
| 2041 | | .endif | ||
| 2042 | |.endif | 1985 | |.endif |
| 1986 | | jmp ->fff_resxmm0 | ||
| 2043 | |.endmacro | 1987 | |.endmacro |
| 2044 | | | 1988 | | |
| 2045 | | math_round floor | 1989 | | math_round floor |
| 2046 | | math_round ceil | 1990 | | math_round ceil |
| 2047 | | | 1991 | | |
| 2048 | |.if SSE | ||
| 2049 | |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 | 1992 | |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 |
| 2050 | |.else | ||
| 2051 | |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn | ||
| 2052 | |.endif | ||
| 2053 | | | 1993 | | |
| 2054 | |.ffunc math_log | 1994 | |.ffunc math_log |
| 2055 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. | 1995 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. |
| @@ -2072,23 +2012,18 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2072 | |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn | 2012 | |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn |
| 2073 | | | 2013 | | |
| 2074 | |.macro math_extern, func | 2014 | |.macro math_extern, func |
| 2075 | |.if SSE | ||
| 2076 | | .ffunc_nsse math_ .. func | 2015 | | .ffunc_nsse math_ .. func |
| 2077 | | .if not X64 | 2016 | |.if not X64 |
| 2078 | | movsd FPARG1, xmm0 | 2017 | | movsd FPARG1, xmm0 |
| 2079 | | .endif | ||
| 2080 | |.else | ||
| 2081 | | .ffunc_n math_ .. func | ||
| 2082 | | fstp FPARG1 | ||
| 2083 | |.endif | 2018 | |.endif |
| 2084 | | mov RB, BASE | 2019 | | mov RB, BASE |
| 2085 | | call extern lj_vm_ .. func | 2020 | | call extern lj_vm_ .. func |
| 2086 | | mov BASE, RB | 2021 | | mov BASE, RB |
| 2087 | | .if X64 | 2022 | |.if X64 |
| 2088 | | jmp ->fff_resxmm0 | 2023 | | jmp ->fff_resxmm0 |
| 2089 | | .else | 2024 | |.else |
| 2090 | | jmp ->fff_resn | 2025 | | jmp ->fff_resn |
| 2091 | | .endif | 2026 | |.endif |
| 2092 | |.endmacro | 2027 | |.endmacro |
| 2093 | | | 2028 | | |
| 2094 | | math_extern sinh | 2029 | | math_extern sinh |
| @@ -2096,17 +2031,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2096 | | math_extern tanh | 2031 | | math_extern tanh |
| 2097 | | | 2032 | | |
| 2098 | |->ff_math_deg: | 2033 | |->ff_math_deg: |
| 2099 | |.if SSE | ||
| 2100 | |.ffunc_nsse math_rad | 2034 | |.ffunc_nsse math_rad |
| 2101 | | mov CFUNC:RB, [BASE-8] | 2035 | | mov CFUNC:RB, [BASE-8] |
| 2102 | | mulsd xmm0, qword CFUNC:RB->upvalue[0] | 2036 | | mulsd xmm0, qword CFUNC:RB->upvalue[0] |
| 2103 | | jmp ->fff_resxmm0 | 2037 | | jmp ->fff_resxmm0 |
| 2104 | |.else | ||
| 2105 | |.ffunc_n math_rad | ||
| 2106 | | mov CFUNC:RB, [BASE-8] | ||
| 2107 | | fmul qword CFUNC:RB->upvalue[0] | ||
| 2108 | | jmp ->fff_resn | ||
| 2109 | |.endif | ||
| 2110 | | | 2038 | | |
| 2111 | |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn | 2039 | |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn |
| 2112 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn | 2040 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn |
| @@ -2123,65 +2051,34 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2123 | | cmp RB, 0x00200000; jb >4 | 2051 | | cmp RB, 0x00200000; jb >4 |
| 2124 | |1: | 2052 | |1: |
| 2125 | | shr RB, 21; sub RB, RC // Extract and unbias exponent. | 2053 | | shr RB, 21; sub RB, RC // Extract and unbias exponent. |
| 2126 | |.if SSE | ||
| 2127 | | cvtsi2sd xmm0, RB | 2054 | | cvtsi2sd xmm0, RB |
| 2128 | |.else | ||
| 2129 | | mov TMP1, RB; fild TMP1 | ||
| 2130 | |.endif | ||
| 2131 | | mov RB, [BASE-4] | 2055 | | mov RB, [BASE-4] |
| 2132 | | and RB, 0x800fffff // Mask off exponent. | 2056 | | and RB, 0x800fffff // Mask off exponent. |
| 2133 | | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. | 2057 | | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. |
| 2134 | | mov [BASE-4], RB | 2058 | | mov [BASE-4], RB |
| 2135 | |2: | 2059 | |2: |
| 2136 | |.if SSE | ||
| 2137 | | movsd qword [BASE], xmm0 | 2060 | | movsd qword [BASE], xmm0 |
| 2138 | |.else | ||
| 2139 | | fstp qword [BASE] | ||
| 2140 | |.endif | ||
| 2141 | | mov RD, 1+2 | 2061 | | mov RD, 1+2 |
| 2142 | | jmp ->fff_res | 2062 | | jmp ->fff_res |
| 2143 | |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. | 2063 | |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. |
| 2144 | |.if SSE | ||
| 2145 | | xorps xmm0, xmm0; jmp <2 | 2064 | | xorps xmm0, xmm0; jmp <2 |
| 2146 | |.else | ||
| 2147 | | fldz; jmp <2 | ||
| 2148 | |.endif | ||
| 2149 | |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. | 2065 | |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. |
| 2150 | |.if SSE | ||
| 2151 | | movsd xmm0, qword [BASE] | 2066 | | movsd xmm0, qword [BASE] |
| 2152 | | sseconst_hi xmm1, RBa, 43500000 // 2^54. | 2067 | | sseconst_hi xmm1, RBa, 43500000 // 2^54. |
| 2153 | | mulsd xmm0, xmm1 | 2068 | | mulsd xmm0, xmm1 |
| 2154 | | movsd qword [BASE-8], xmm0 | 2069 | | movsd qword [BASE-8], xmm0 |
| 2155 | |.else | ||
| 2156 | | fld qword [BASE] | ||
| 2157 | | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54 | ||
| 2158 | | fstp qword [BASE-8] | ||
| 2159 | |.endif | ||
| 2160 | | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 | 2070 | | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 |
| 2161 | | | 2071 | | |
| 2162 | |.if SSE | ||
| 2163 | |.ffunc_nsse math_modf | 2072 | |.ffunc_nsse math_modf |
| 2164 | |.else | ||
| 2165 | |.ffunc_n math_modf | ||
| 2166 | |.endif | ||
| 2167 | | mov RB, [BASE+4] | 2073 | | mov RB, [BASE+4] |
| 2168 | | mov PC, [BASE-4] | 2074 | | mov PC, [BASE-4] |
| 2169 | | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? | 2075 | | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? |
| 2170 | |.if SSE | ||
| 2171 | | movaps xmm4, xmm0 | 2076 | | movaps xmm4, xmm0 |
| 2172 | | call ->vm_trunc | 2077 | | call ->vm_trunc_sse |
| 2173 | | subsd xmm4, xmm0 | 2078 | | subsd xmm4, xmm0 |
| 2174 | |1: | 2079 | |1: |
| 2175 | | movsd qword [BASE-8], xmm0 | 2080 | | movsd qword [BASE-8], xmm0 |
| 2176 | | movsd qword [BASE], xmm4 | 2081 | | movsd qword [BASE], xmm4 |
| 2177 | |.else | ||
| 2178 | | fdup | ||
| 2179 | | call ->vm_trunc | ||
| 2180 | | fsub st1, st0 | ||
| 2181 | |1: | ||
| 2182 | | fstp qword [BASE-8] | ||
| 2183 | | fstp qword [BASE] | ||
| 2184 | |.endif | ||
| 2185 | | mov RC, [BASE-4]; mov RB, [BASE+4] | 2082 | | mov RC, [BASE-4]; mov RB, [BASE+4] |
| 2186 | | xor RC, RB; js >3 // Need to adjust sign? | 2083 | | xor RC, RB; js >3 // Need to adjust sign? |
| 2187 | |2: | 2084 | |2: |
| @@ -2191,24 +2088,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2191 | | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. | 2088 | | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. |
| 2192 | | jmp <2 | 2089 | | jmp <2 |
| 2193 | |4: | 2090 | |4: |
| 2194 | |.if SSE | ||
| 2195 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. | 2091 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. |
| 2196 | |.else | ||
| 2197 | | fldz; fxch; jmp <1 // Return +-Inf and +-0. | ||
| 2198 | |.endif | ||
| 2199 | | | 2092 | | |
| 2200 | |.ffunc_nnr math_fmod | 2093 | |.ffunc_nnr math_fmod |
| 2201 | |1: ; fprem; fnstsw ax; sahf; jp <1 | 2094 | |1: ; fprem; fnstsw ax; sahf; jp <1 |
| 2202 | | fpop1 | 2095 | | fpop1 |
| 2203 | | jmp ->fff_resn | 2096 | | jmp ->fff_resn |
| 2204 | | | 2097 | | |
| 2205 | |.if SSE | 2098 | |.ffunc_nnsse math_pow; call ->vm_pow_sse; jmp ->fff_resxmm0 |
| 2206 | |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0 | ||
| 2207 | |.else | ||
| 2208 | |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn | ||
| 2209 | |.endif | ||
| 2210 | | | 2099 | | |
| 2211 | |.macro math_minmax, name, cmovop, fcmovop, sseop | 2100 | |.macro math_minmax, name, cmovop, sseop |
| 2212 | | .ffunc name | 2101 | | .ffunc name |
| 2213 | | mov RA, 2 | 2102 | | mov RA, 2 |
| 2214 | | cmp dword [BASE+4], LJ_TISNUM | 2103 | | cmp dword [BASE+4], LJ_TISNUM |
| @@ -2225,12 +2114,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2225 | |3: | 2114 | |3: |
| 2226 | | ja ->fff_fallback | 2115 | | ja ->fff_fallback |
| 2227 | | // Convert intermediate result to number and continue below. | 2116 | | // Convert intermediate result to number and continue below. |
| 2228 | |.if SSE | ||
| 2229 | | cvtsi2sd xmm0, RB | 2117 | | cvtsi2sd xmm0, RB |
| 2230 | |.else | ||
| 2231 | | mov TMP1, RB | ||
| 2232 | | fild TMP1 | ||
| 2233 | |.endif | ||
| 2234 | | jmp >6 | 2118 | | jmp >6 |
| 2235 | |4: | 2119 | |4: |
| 2236 | | ja ->fff_fallback | 2120 | | ja ->fff_fallback |
| @@ -2238,7 +2122,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2238 | | jae ->fff_fallback | 2122 | | jae ->fff_fallback |
| 2239 | |.endif | 2123 | |.endif |
| 2240 | | | 2124 | | |
| 2241 | |.if SSE | ||
| 2242 | | movsd xmm0, qword [BASE] | 2125 | | movsd xmm0, qword [BASE] |
| 2243 | |5: // Handle numbers or integers. | 2126 | |5: // Handle numbers or integers. |
| 2244 | | cmp RA, RD; jae ->fff_resxmm0 | 2127 | | cmp RA, RD; jae ->fff_resxmm0 |
| @@ -2257,34 +2140,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2257 | | sseop xmm0, xmm1 | 2140 | | sseop xmm0, xmm1 |
| 2258 | | add RA, 1 | 2141 | | add RA, 1 |
| 2259 | | jmp <5 | 2142 | | jmp <5 |
| 2260 | |.else | ||
| 2261 | | fld qword [BASE] | ||
| 2262 | |5: // Handle numbers or integers. | ||
| 2263 | | cmp RA, RD; jae ->fff_resn | ||
| 2264 | | cmp dword [BASE+RA*8-4], LJ_TISNUM | ||
| 2265 | |.if DUALNUM | ||
| 2266 | | jb >6 | ||
| 2267 | | ja >9 | ||
| 2268 | | fild dword [BASE+RA*8-8] | ||
| 2269 | | jmp >7 | ||
| 2270 | |.else | ||
| 2271 | | jae >9 | ||
| 2272 | |.endif | ||
| 2273 | |6: | ||
| 2274 | | fld qword [BASE+RA*8-8] | ||
| 2275 | |7: | ||
| 2276 | | fucomi st1; fcmovop st1; fpop1 | ||
| 2277 | | add RA, 1 | ||
| 2278 | | jmp <5 | ||
| 2279 | |.endif | ||
| 2280 | |.endmacro | 2143 | |.endmacro |
| 2281 | | | 2144 | | |
| 2282 | | math_minmax math_min, cmovg, fcmovnbe, minsd | 2145 | | math_minmax math_min, cmovg, minsd |
| 2283 | | math_minmax math_max, cmovl, fcmovbe, maxsd | 2146 | | math_minmax math_max, cmovl, maxsd |
| 2284 | |.if not SSE | ||
| 2285 | |9: | ||
| 2286 | | fpop; jmp ->fff_fallback | ||
| 2287 | |.endif | ||
| 2288 | | | 2147 | | |
| 2289 | |//-- String library ----------------------------------------------------- | 2148 | |//-- String library ----------------------------------------------------- |
| 2290 | | | 2149 | | |
| @@ -2293,10 +2152,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2293 | | mov STR:RB, [BASE] | 2152 | | mov STR:RB, [BASE] |
| 2294 | |.if DUALNUM | 2153 | |.if DUALNUM |
| 2295 | | mov RB, dword STR:RB->len; jmp ->fff_resi | 2154 | | mov RB, dword STR:RB->len; jmp ->fff_resi |
| 2296 | |.elif SSE | ||
| 2297 | | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0 | ||
| 2298 | |.else | 2155 | |.else |
| 2299 | | fild dword STR:RB->len; jmp ->fff_resn | 2156 | | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0 |
| 2300 | |.endif | 2157 | |.endif |
| 2301 | | | 2158 | | |
| 2302 | |.ffunc string_byte // Only handle the 1-arg case here. | 2159 | |.ffunc string_byte // Only handle the 1-arg case here. |
| @@ -2309,10 +2166,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2309 | | movzx RB, byte STR:RB[1] | 2166 | | movzx RB, byte STR:RB[1] |
| 2310 | |.if DUALNUM | 2167 | |.if DUALNUM |
| 2311 | | jmp ->fff_resi | 2168 | | jmp ->fff_resi |
| 2312 | |.elif SSE | ||
| 2313 | | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 | ||
| 2314 | |.else | 2169 | |.else |
| 2315 | | mov TMP1, RB; fild TMP1; jmp ->fff_resn | 2170 | | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 |
| 2316 | |.endif | 2171 | |.endif |
| 2317 | | | 2172 | | |
| 2318 | |.ffunc string_char // Only handle the 1-arg case here. | 2173 | |.ffunc string_char // Only handle the 1-arg case here. |
| @@ -2324,16 +2179,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2324 | | mov RB, dword [BASE] | 2179 | | mov RB, dword [BASE] |
| 2325 | | cmp RB, 255; ja ->fff_fallback | 2180 | | cmp RB, 255; ja ->fff_fallback |
| 2326 | | mov TMP2, RB | 2181 | | mov TMP2, RB |
| 2327 | |.elif SSE | 2182 | |.else |
| 2328 | | jae ->fff_fallback | 2183 | | jae ->fff_fallback |
| 2329 | | cvttsd2si RB, qword [BASE] | 2184 | | cvttsd2si RB, qword [BASE] |
| 2330 | | cmp RB, 255; ja ->fff_fallback | 2185 | | cmp RB, 255; ja ->fff_fallback |
| 2331 | | mov TMP2, RB | 2186 | | mov TMP2, RB |
| 2332 | |.else | ||
| 2333 | | jae ->fff_fallback | ||
| 2334 | | fld qword [BASE] | ||
| 2335 | | fistp TMP2 | ||
| 2336 | | cmp TMP2, 255; ja ->fff_fallback | ||
| 2337 | |.endif | 2187 | |.endif |
| 2338 | |.if X64 | 2188 | |.if X64 |
| 2339 | | mov TMP3, 1 | 2189 | | mov TMP3, 1 |
| @@ -2371,14 +2221,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2371 | | jne ->fff_fallback | 2221 | | jne ->fff_fallback |
| 2372 | | mov RB, dword [BASE+16] | 2222 | | mov RB, dword [BASE+16] |
| 2373 | | mov TMP2, RB | 2223 | | mov TMP2, RB |
| 2374 | |.elif SSE | 2224 | |.else |
| 2375 | | jae ->fff_fallback | 2225 | | jae ->fff_fallback |
| 2376 | | cvttsd2si RB, qword [BASE+16] | 2226 | | cvttsd2si RB, qword [BASE+16] |
| 2377 | | mov TMP2, RB | 2227 | | mov TMP2, RB |
| 2378 | |.else | ||
| 2379 | | jae ->fff_fallback | ||
| 2380 | | fld qword [BASE+16] | ||
| 2381 | | fistp TMP2 | ||
| 2382 | |.endif | 2228 | |.endif |
| 2383 | |1: | 2229 | |1: |
| 2384 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2230 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
| @@ -2393,12 +2239,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2393 | | mov RB, STR:RB->len | 2239 | | mov RB, STR:RB->len |
| 2394 | |.if DUALNUM | 2240 | |.if DUALNUM |
| 2395 | | mov RA, dword [BASE+8] | 2241 | | mov RA, dword [BASE+8] |
| 2396 | |.elif SSE | ||
| 2397 | | cvttsd2si RA, qword [BASE+8] | ||
| 2398 | |.else | 2242 | |.else |
| 2399 | | fld qword [BASE+8] | 2243 | | cvttsd2si RA, qword [BASE+8] |
| 2400 | | fistp ARG3 | ||
| 2401 | | mov RA, ARG3 | ||
| 2402 | |.endif | 2244 | |.endif |
| 2403 | | mov RC, TMP2 | 2245 | | mov RC, TMP2 |
| 2404 | | cmp RB, RC // len < end? (unsigned compare) | 2246 | | cmp RB, RC // len < end? (unsigned compare) |
| @@ -2451,14 +2293,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2451 | |.if DUALNUM | 2293 | |.if DUALNUM |
| 2452 | | jne ->fff_fallback | 2294 | | jne ->fff_fallback |
| 2453 | | mov RC, dword [BASE+8] | 2295 | | mov RC, dword [BASE+8] |
| 2454 | |.elif SSE | ||
| 2455 | | jae ->fff_fallback | ||
| 2456 | | cvttsd2si RC, qword [BASE+8] | ||
| 2457 | |.else | 2296 | |.else |
| 2458 | | jae ->fff_fallback | 2297 | | jae ->fff_fallback |
| 2459 | | fld qword [BASE+8] | 2298 | | cvttsd2si RC, qword [BASE+8] |
| 2460 | | fistp TMP2 | ||
| 2461 | | mov RC, TMP2 | ||
| 2462 | |.endif | 2299 | |.endif |
| 2463 | | test RC, RC | 2300 | | test RC, RC |
| 2464 | | jle ->fff_emptystr // Count <= 0? (or non-int) | 2301 | | jle ->fff_emptystr // Count <= 0? (or non-int) |
| @@ -2554,10 +2391,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2554 | | mov BASE, RB // Restore BASE. | 2391 | | mov BASE, RB // Restore BASE. |
| 2555 | |.if DUALNUM | 2392 | |.if DUALNUM |
| 2556 | | mov RB, RD; jmp ->fff_resi | 2393 | | mov RB, RD; jmp ->fff_resi |
| 2557 | |.elif SSE | ||
| 2558 | | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0 | ||
| 2559 | |.else | 2394 | |.else |
| 2560 | | mov ARG1, RD; fild ARG1; jmp ->fff_resn | 2395 | | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0 |
| 2561 | |.endif | 2396 | |.endif |
| 2562 | | | 2397 | | |
| 2563 | |//-- Bit library -------------------------------------------------------- | 2398 | |//-- Bit library -------------------------------------------------------- |
| @@ -2567,11 +2402,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2567 | |.macro .ffunc_bit, name, kind | 2402 | |.macro .ffunc_bit, name, kind |
| 2568 | | .ffunc_1 name | 2403 | | .ffunc_1 name |
| 2569 | |.if kind == 2 | 2404 | |.if kind == 2 |
| 2570 | |.if SSE | ||
| 2571 | | sseconst_tobit xmm1, RBa | 2405 | | sseconst_tobit xmm1, RBa |
| 2572 | |.else | ||
| 2573 | | mov TMP1, TOBIT_BIAS | ||
| 2574 | |.endif | ||
| 2575 | |.endif | 2406 | |.endif |
| 2576 | | cmp dword [BASE+4], LJ_TISNUM | 2407 | | cmp dword [BASE+4], LJ_TISNUM |
| 2577 | |.if DUALNUM | 2408 | |.if DUALNUM |
| @@ -2587,37 +2418,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2587 | |.else | 2418 | |.else |
| 2588 | | jae ->fff_fallback | 2419 | | jae ->fff_fallback |
| 2589 | |.endif | 2420 | |.endif |
| 2590 | |.if SSE | ||
| 2591 | | movsd xmm0, qword [BASE] | 2421 | | movsd xmm0, qword [BASE] |
| 2592 | |.if kind < 2 | 2422 | |.if kind < 2 |
| 2593 | | sseconst_tobit xmm1, RBa | 2423 | | sseconst_tobit xmm1, RBa |
| 2594 | |.endif | 2424 | |.endif |
| 2595 | | addsd xmm0, xmm1 | 2425 | | addsd xmm0, xmm1 |
| 2596 | | movd RB, xmm0 | 2426 | | movd RB, xmm0 |
| 2597 | |.else | ||
| 2598 | | fld qword [BASE] | ||
| 2599 | |.if kind < 2 | ||
| 2600 | | mov TMP1, TOBIT_BIAS | ||
| 2601 | |.endif | ||
| 2602 | | fadd TMP1 | ||
| 2603 | | fstp FPARG1 | ||
| 2604 | |.if kind > 0 | ||
| 2605 | | mov RB, ARG1 | ||
| 2606 | |.endif | ||
| 2607 | |.endif | ||
| 2608 | |2: | 2427 | |2: |
| 2609 | |.endmacro | 2428 | |.endmacro |
| 2610 | | | 2429 | | |
| 2611 | |.ffunc_bit bit_tobit, 0 | 2430 | |.ffunc_bit bit_tobit, 0 |
| 2612 | |.if DUALNUM or SSE | ||
| 2613 | |.if not SSE | ||
| 2614 | | mov RB, ARG1 | ||
| 2615 | |.endif | ||
| 2616 | | jmp ->fff_resbit | 2431 | | jmp ->fff_resbit |
| 2617 | |.else | ||
| 2618 | | fild ARG1 | ||
| 2619 | | jmp ->fff_resn | ||
| 2620 | |.endif | ||
| 2621 | | | 2432 | | |
| 2622 | |.macro .ffunc_bit_op, name, ins | 2433 | |.macro .ffunc_bit_op, name, ins |
| 2623 | | .ffunc_bit name, 2 | 2434 | | .ffunc_bit name, 2 |
| @@ -2637,17 +2448,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2637 | |.else | 2448 | |.else |
| 2638 | | jae ->fff_fallback_bit_op | 2449 | | jae ->fff_fallback_bit_op |
| 2639 | |.endif | 2450 | |.endif |
| 2640 | |.if SSE | ||
| 2641 | | movsd xmm0, qword [RD] | 2451 | | movsd xmm0, qword [RD] |
| 2642 | | addsd xmm0, xmm1 | 2452 | | addsd xmm0, xmm1 |
| 2643 | | movd RA, xmm0 | 2453 | | movd RA, xmm0 |
| 2644 | | ins RB, RA | 2454 | | ins RB, RA |
| 2645 | |.else | ||
| 2646 | | fld qword [RD] | ||
| 2647 | | fadd TMP1 | ||
| 2648 | | fstp FPARG1 | ||
| 2649 | | ins RB, ARG1 | ||
| 2650 | |.endif | ||
| 2651 | | sub RD, 8 | 2455 | | sub RD, 8 |
| 2652 | | jmp <1 | 2456 | | jmp <1 |
| 2653 | |.endmacro | 2457 | |.endmacro |
| @@ -2664,15 +2468,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2664 | | not RB | 2468 | | not RB |
| 2665 | |.if DUALNUM | 2469 | |.if DUALNUM |
| 2666 | | jmp ->fff_resbit | 2470 | | jmp ->fff_resbit |
| 2667 | |.elif SSE | 2471 | |.else |
| 2668 | |->fff_resbit: | 2472 | |->fff_resbit: |
| 2669 | | cvtsi2sd xmm0, RB | 2473 | | cvtsi2sd xmm0, RB |
| 2670 | | jmp ->fff_resxmm0 | 2474 | | jmp ->fff_resxmm0 |
| 2671 | |.else | ||
| 2672 | |->fff_resbit: | ||
| 2673 | | mov ARG1, RB | ||
| 2674 | | fild ARG1 | ||
| 2675 | | jmp ->fff_resn | ||
| 2676 | |.endif | 2475 | |.endif |
| 2677 | | | 2476 | | |
| 2678 | |->fff_fallback_bit_op: | 2477 | |->fff_fallback_bit_op: |
| @@ -2685,22 +2484,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2685 | | // Note: no inline conversion from number for 2nd argument! | 2484 | | // Note: no inline conversion from number for 2nd argument! |
| 2686 | | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback | 2485 | | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback |
| 2687 | | mov RA, dword [BASE+8] | 2486 | | mov RA, dword [BASE+8] |
| 2688 | |.elif SSE | 2487 | |.else |
| 2689 | | .ffunc_nnsse name | 2488 | | .ffunc_nnsse name |
| 2690 | | sseconst_tobit xmm2, RBa | 2489 | | sseconst_tobit xmm2, RBa |
| 2691 | | addsd xmm0, xmm2 | 2490 | | addsd xmm0, xmm2 |
| 2692 | | addsd xmm1, xmm2 | 2491 | | addsd xmm1, xmm2 |
| 2693 | | movd RB, xmm0 | 2492 | | movd RB, xmm0 |
| 2694 | | movd RA, xmm1 | 2493 | | movd RA, xmm1 |
| 2695 | |.else | ||
| 2696 | | .ffunc_nn name | ||
| 2697 | | mov TMP1, TOBIT_BIAS | ||
| 2698 | | fadd TMP1 | ||
| 2699 | | fstp FPARG3 | ||
| 2700 | | fadd TMP1 | ||
| 2701 | | fstp FPARG1 | ||
| 2702 | | mov RA, ARG3 | ||
| 2703 | | mov RB, ARG1 | ||
| 2704 | |.endif | 2494 | |.endif |
| 2705 | | ins RB, cl // Assumes RA is ecx. | 2495 | | ins RB, cl // Assumes RA is ecx. |
| 2706 | | jmp ->fff_resbit | 2496 | | jmp ->fff_resbit |
| @@ -3051,27 +2841,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 3051 | |//----------------------------------------------------------------------- | 2841 | |//----------------------------------------------------------------------- |
| 3052 | | | 2842 | | |
| 3053 | |// FP value rounding. Called by math.floor/math.ceil fast functions | 2843 | |// FP value rounding. Called by math.floor/math.ceil fast functions |
| 3054 | |// and from JIT code. | 2844 | |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. |
| 3055 | | | 2845 | |.macro vm_round, name, mode |
| 3056 | |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. | 2846 | |->name .. _sse: |
| 3057 | |.macro vm_round_x87, mode1, mode2 | ||
| 3058 | | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2. | ||
| 3059 | | mov [esp+8], eax | ||
| 3060 | | mov ax, mode1 | ||
| 3061 | | or ax, [esp+4] | ||
| 3062 | |.if mode2 ~= 0xffff | ||
| 3063 | | and ax, mode2 | ||
| 3064 | |.endif | ||
| 3065 | | mov [esp+6], ax | ||
| 3066 | | fldcw word [esp+6] | ||
| 3067 | | frndint | ||
| 3068 | | fldcw word [esp+4] | ||
| 3069 | | mov eax, [esp+8] | ||
| 3070 | | ret | ||
| 3071 | |.endmacro | ||
| 3072 | | | ||
| 3073 | |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. | ||
| 3074 | |.macro vm_round_sse, mode | ||
| 3075 | | sseconst_abs xmm2, RDa | 2847 | | sseconst_abs xmm2, RDa |
| 3076 | | sseconst_2p52 xmm3, RDa | 2848 | | sseconst_2p52 xmm3, RDa |
| 3077 | | movaps xmm1, xmm0 | 2849 | | movaps xmm1, xmm0 |
| @@ -3107,22 +2879,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 3107 | | ret | 2879 | | ret |
| 3108 | |.endmacro | 2880 | |.endmacro |
| 3109 | | | 2881 | | |
| 3110 | |.macro vm_round, name, ssemode, mode1, mode2 | 2882 | |->vm_floor: |
| 3111 | |->name: | 2883 | |.if not X64 |
| 3112 | |.if not SSE | 2884 | | movsd xmm0, qword [esp+4] |
| 3113 | | vm_round_x87 mode1, mode2 | 2885 | | call ->vm_floor_sse |
| 2886 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. | ||
| 2887 | | fld qword [esp+4] | ||
| 2888 | | ret | ||
| 3114 | |.endif | 2889 | |.endif |
| 3115 | |->name .. _sse: | ||
| 3116 | | vm_round_sse ssemode | ||
| 3117 | |.endmacro | ||
| 3118 | | | 2890 | | |
| 3119 | | vm_round vm_floor, 0, 0x0400, 0xf7ff | 2891 | | vm_round vm_floor, 0 |
| 3120 | | vm_round vm_ceil, 1, 0x0800, 0xfbff | 2892 | | vm_round vm_ceil, 1 |
| 3121 | | vm_round vm_trunc, 2, 0x0c00, 0xffff | 2893 | | vm_round vm_trunc, 2 |
| 3122 | | | 2894 | | |
| 3123 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. | 2895 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. |
| 3124 | |->vm_mod: | 2896 | |->vm_mod: |
| 3125 | |.if SSE | ||
| 3126 | |// Args in xmm0/xmm1, return value in xmm0. | 2897 | |// Args in xmm0/xmm1, return value in xmm0. |
| 3127 | |// Caveat: xmm0-xmm5 and RC (eax) modified! | 2898 | |// Caveat: xmm0-xmm5 and RC (eax) modified! |
| 3128 | | movaps xmm5, xmm0 | 2899 | | movaps xmm5, xmm0 |
| @@ -3150,23 +2921,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 3150 | | movaps xmm0, xmm5 | 2921 | | movaps xmm0, xmm5 |
| 3151 | | subsd xmm0, xmm1 | 2922 | | subsd xmm0, xmm1 |
| 3152 | | ret | 2923 | | ret |
| 3153 | |.else | ||
| 3154 | |// Args/ret on x87 stack (y on top). No xmm registers modified. | ||
| 3155 | |// Caveat: needs 3 slots on x87 stack! RC (eax) modified! | ||
| 3156 | | fld st1 | ||
| 3157 | | fdiv st1 | ||
| 3158 | | fnstcw word [esp+4] | ||
| 3159 | | mov ax, 0x0400 | ||
| 3160 | | or ax, [esp+4] | ||
| 3161 | | and ax, 0xf7ff | ||
| 3162 | | mov [esp+6], ax | ||
| 3163 | | fldcw word [esp+6] | ||
| 3164 | | frndint | ||
| 3165 | | fldcw word [esp+4] | ||
| 3166 | | fmulp st1 | ||
| 3167 | | fsubp st1 | ||
| 3168 | | ret | ||
| 3169 | |.endif | ||
| 3170 | | | 2924 | | |
| 3171 | |// FP log2(x). Called by math.log(x, base). | 2925 | |// FP log2(x). Called by math.log(x, base). |
| 3172 | |->vm_log2: | 2926 | |->vm_log2: |
| @@ -3217,96 +2971,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 3217 | | | 2971 | | |
| 3218 | |// Generic power function x^y. Called by BC_POW, math.pow fast function, | 2972 | |// Generic power function x^y. Called by BC_POW, math.pow fast function, |
| 3219 | |// and vm_arith. | 2973 | |// and vm_arith. |
| 3220 | |// Args/ret on x87 stack (y on top). RC (eax) modified. | ||
| 3221 | |// Caveat: needs 3 slots on x87 stack! | ||
| 3222 | |->vm_pow: | ||
| 3223 | |.if not SSE | ||
| 3224 | | fist dword [esp+4] // Store/reload int before comparison. | ||
| 3225 | | fild dword [esp+4] // Integral exponent used in vm_powi. | ||
| 3226 | | fucomip st1 | ||
| 3227 | | jnz >8 // Branch for FP exponents. | ||
| 3228 | | jp >9 // Branch for NaN exponent. | ||
| 3229 | | fpop // Pop y and fallthrough to vm_powi. | ||
| 3230 | | | ||
| 3231 | |// FP/int power function x^i. Arg1/ret on x87 stack. | ||
| 3232 | |// Arg2 (int) on C stack. RC (eax) modified. | ||
| 3233 | |// Caveat: needs 2 slots on x87 stack! | ||
| 3234 | | mov eax, [esp+4] | ||
| 3235 | | cmp eax, 1; jle >6 // i<=1? | ||
| 3236 | | // Now 1 < (unsigned)i <= 0x80000000. | ||
| 3237 | |1: // Handle leading zeros. | ||
| 3238 | | test eax, 1; jnz >2 | ||
| 3239 | | fmul st0 | ||
| 3240 | | shr eax, 1 | ||
| 3241 | | jmp <1 | ||
| 3242 | |2: | ||
| 3243 | | shr eax, 1; jz >5 | ||
| 3244 | | fdup | ||
| 3245 | |3: // Handle trailing bits. | ||
| 3246 | | fmul st0 | ||
| 3247 | | shr eax, 1; jz >4 | ||
| 3248 | | jnc <3 | ||
| 3249 | | fmul st1, st0 | ||
| 3250 | | jmp <3 | ||
| 3251 | |4: | ||
| 3252 | | fmulp st1 | ||
| 3253 | |5: | ||
| 3254 | | ret | ||
| 3255 | |6: | ||
| 3256 | | je <5 // x^1 ==> x | ||
| 3257 | | jb >7 | ||
| 3258 | | fld1; fdivrp st1 | ||
| 3259 | | neg eax | ||
| 3260 | | cmp eax, 1; je <5 // x^-1 ==> 1/x | ||
| 3261 | | jmp <1 // x^-i ==> (1/x)^i | ||
| 3262 | |7: | ||
| 3263 | | fpop; fld1 // x^0 ==> 1 | ||
| 3264 | | ret | ||
| 3265 | | | ||
| 3266 | |8: // FP/FP power function x^y. | ||
| 3267 | | fst dword [esp+4] | ||
| 3268 | | fxch | ||
| 3269 | | fst dword [esp+8] | ||
| 3270 | | mov eax, [esp+4]; shl eax, 1 | ||
| 3271 | | cmp eax, 0xff000000; je >2 // x^+-Inf? | ||
| 3272 | | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y? | ||
| 3273 | | cmp eax, 0xff000000; je >4 // +-Inf^y? | ||
| 3274 | | fyl2x | ||
| 3275 | | jmp ->vm_exp2raw | ||
| 3276 | | | ||
| 3277 | |9: // Handle x^NaN. | ||
| 3278 | | fld1 | ||
| 3279 | | fucomip st2 | ||
| 3280 | | je >1 // 1^NaN ==> 1 | ||
| 3281 | | fxch // x^NaN ==> NaN | ||
| 3282 | |1: | ||
| 3283 | | fpop | ||
| 3284 | | ret | ||
| 3285 | | | ||
| 3286 | |2: // Handle x^+-Inf. | ||
| 3287 | | fabs | ||
| 3288 | | fld1 | ||
| 3289 | | fucomip st1 | ||
| 3290 | | je >3 // +-1^+-Inf ==> 1 | ||
| 3291 | | fpop; fabs; fldz; mov eax, 0; setc al | ||
| 3292 | | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0 | ||
| 3293 | | fxch | ||
| 3294 | |3: | ||
| 3295 | | fpop1; fabs | ||
| 3296 | | ret | ||
| 3297 | | | ||
| 3298 | |4: // Handle +-0^y or +-Inf^y. | ||
| 3299 | | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x| | ||
| 3300 | | fpop; fpop | ||
| 3301 | | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf | ||
| 3302 | | fldz // y < 0, +-Inf^y ==> 0 | ||
| 3303 | | ret | ||
| 3304 | |5: | ||
| 3305 | | mov dword [esp+4], 0x7f800000 // Return +Inf. | ||
| 3306 | | fld dword [esp+4] | ||
| 3307 | | ret | ||
| 3308 | |.endif | ||
| 3309 | | | ||
| 3310 | |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. | 2974 | |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. |
| 3311 | |// Needs 16 byte scratch area for x86. Also called from JIT code. | 2975 | |// Needs 16 byte scratch area for x86. Also called from JIT code. |
| 3312 | |->vm_pow_sse: | 2976 | |->vm_pow_sse: |
| @@ -3315,7 +2979,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 3315 | | ucomisd xmm1, xmm2 | 2979 | | ucomisd xmm1, xmm2 |
| 3316 | | jnz >8 // Branch for FP exponents. | 2980 | | jnz >8 // Branch for FP exponents. |
| 3317 | | jp >9 // Branch for NaN exponent. | 2981 | | jp >9 // Branch for NaN exponent. |
| 3318 | | // Fallthrough to vm_powi_sse. | 2982 | | // Fallthrough. |
| 3319 | | | 2983 | | |
| 3320 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. | 2984 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. |
| 3321 | |->vm_powi_sse: | 2985 | |->vm_powi_sse: |
| @@ -3437,8 +3101,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 3437 | | .else | 3101 | | .else |
| 3438 | | .define fpmop, CARG1d | 3102 | | .define fpmop, CARG1d |
| 3439 | | .endif | 3103 | | .endif |
| 3440 | | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil | 3104 | | cmp fpmop, 1; jb ->vm_floor_sse; je ->vm_ceil_sse |
| 3441 | | cmp fpmop, 3; jb ->vm_trunc; ja >2 | 3105 | | cmp fpmop, 3; jb ->vm_trunc_sse; ja >2 |
| 3442 | | sqrtsd xmm0, xmm0; ret | 3106 | | sqrtsd xmm0, xmm0; ret |
| 3443 | |2: | 3107 | |2: |
| 3444 | | .if X64WIN | 3108 | | .if X64WIN |
| @@ -3478,14 +3142,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 3478 | | ret | 3142 | | ret |
| 3479 | |.else // x86 calling convention. | 3143 | |.else // x86 calling convention. |
| 3480 | | .define fpmop, eax | 3144 | | .define fpmop, eax |
| 3481 | |.if SSE | ||
| 3482 | | mov fpmop, [esp+12] | 3145 | | mov fpmop, [esp+12] |
| 3483 | | movsd xmm0, qword [esp+4] | 3146 | | movsd xmm0, qword [esp+4] |
| 3484 | | cmp fpmop, 1; je >1; ja >2 | 3147 | | cmp fpmop, 1; je >1; ja >2 |
| 3485 | | call ->vm_floor; jmp >7 | 3148 | | call ->vm_floor_sse; jmp >7 |
| 3486 | |1: ; call ->vm_ceil; jmp >7 | 3149 | |1: ; call ->vm_ceil_sse; jmp >7 |
| 3487 | |2: ; cmp fpmop, 3; je >1; ja >2 | 3150 | |2: ; cmp fpmop, 3; je >1; ja >2 |
| 3488 | | call ->vm_trunc; jmp >7 | 3151 | | call ->vm_trunc_sse; jmp >7 |
| 3489 | |1: | 3152 | |1: |
| 3490 | | sqrtsd xmm0, xmm0 | 3153 | | sqrtsd xmm0, xmm0 |
| 3491 | |7: | 3154 | |7: |
| @@ -3503,23 +3166,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 3503 | |2: ; cmp fpmop, 11; je >1; ja >9 | 3166 | |2: ; cmp fpmop, 11; je >1; ja >9 |
| 3504 | | fcos; ret | 3167 | | fcos; ret |
| 3505 | |1: ; fptan; fpop; ret | 3168 | |1: ; fptan; fpop; ret |
| 3506 | |.else | ||
| 3507 | | mov fpmop, [esp+12] | ||
| 3508 | | fld qword [esp+4] | ||
| 3509 | | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil | ||
| 3510 | | cmp fpmop, 3; jb ->vm_trunc; ja >2 | ||
| 3511 | | fsqrt; ret | ||
| 3512 | |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 | ||
| 3513 | | cmp fpmop, 7; je >1; ja >2 | ||
| 3514 | | fldln2; fxch; fyl2x; ret | ||
| 3515 | |1: ; fld1; fxch; fyl2x; ret | ||
| 3516 | |2: ; cmp fpmop, 9; je >1; ja >2 | ||
| 3517 | | fldlg2; fxch; fyl2x; ret | ||
| 3518 | |1: ; fsin; ret | ||
| 3519 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
| 3520 | | fcos; ret | ||
| 3521 | |1: ; fptan; fpop; ret | ||
| 3522 | |.endif | ||
| 3523 | |.endif | 3169 | |.endif |
| 3524 | |9: ; int3 // Bad fpm. | 3170 | |9: ; int3 // Bad fpm. |
| 3525 | |.endif | 3171 | |.endif |
| @@ -3541,7 +3187,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 3541 | |2: ; cmp foldop, 3; je >1; ja >2 | 3187 | |2: ; cmp foldop, 3; je >1; ja >2 |
| 3542 | | mulsd xmm0, xmm1; ret | 3188 | | mulsd xmm0, xmm1; ret |
| 3543 | |1: ; divsd xmm0, xmm1; ret | 3189 | |1: ; divsd xmm0, xmm1; ret |
| 3544 | |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow | 3190 | |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow_sse |
| 3545 | | cmp foldop, 7; je >1; ja >2 | 3191 | | cmp foldop, 7; je >1; ja >2 |
| 3546 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret | 3192 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret |
| 3547 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret | 3193 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret |
| @@ -3574,7 +3220,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 3574 | |1: ; maxsd xmm0, xmm1; ret | 3220 | |1: ; maxsd xmm0, xmm1; ret |
| 3575 | |9: ; int3 // Bad op. | 3221 | |9: ; int3 // Bad op. |
| 3576 | | | 3222 | | |
| 3577 | |.elif SSE // x86 calling convention with SSE ops. | 3223 | |.else // x86 calling convention. |
| 3578 | | | 3224 | | |
| 3579 | | .define foldop, eax | 3225 | | .define foldop, eax |
| 3580 | | mov foldop, [esp+20] | 3226 | | mov foldop, [esp+20] |
| @@ -3593,7 +3239,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 3593 | |2: ; cmp foldop, 5 | 3239 | |2: ; cmp foldop, 5 |
| 3594 | | je >1; ja >2 | 3240 | | je >1; ja >2 |
| 3595 | | call ->vm_mod; jmp <7 | 3241 | | call ->vm_mod; jmp <7 |
| 3596 | |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. | 3242 | |1: ; pop edx; call ->vm_pow_sse; push edx; jmp <7 // Writes to scratch area. |
| 3597 | |2: ; cmp foldop, 7; je >1; ja >2 | 3243 | |2: ; cmp foldop, 7; je >1; ja >2 |
| 3598 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 | 3244 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 |
| 3599 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 | 3245 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 |
| @@ -3608,29 +3254,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 3608 | |1: ; maxsd xmm0, xmm1; jmp <7 | 3254 | |1: ; maxsd xmm0, xmm1; jmp <7 |
| 3609 | |9: ; int3 // Bad op. | 3255 | |9: ; int3 // Bad op. |
| 3610 | | | 3256 | | |
| 3611 | |.else // x86 calling convention with x87 ops. | ||
| 3612 | | | ||
| 3613 | | mov eax, [esp+20] | ||
| 3614 | | fld qword [esp+4] | ||
| 3615 | | fld qword [esp+12] | ||
| 3616 | | cmp eax, 1; je >1; ja >2 | ||
| 3617 | | faddp st1; ret | ||
| 3618 | |1: ; fsubp st1; ret | ||
| 3619 | |2: ; cmp eax, 3; je >1; ja >2 | ||
| 3620 | | fmulp st1; ret | ||
| 3621 | |1: ; fdivp st1; ret | ||
| 3622 | |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow | ||
| 3623 | | cmp eax, 7; je >1; ja >2 | ||
| 3624 | | fpop; fchs; ret | ||
| 3625 | |1: ; fpop; fabs; ret | ||
| 3626 | |2: ; cmp eax, 9; je >1; ja >2 | ||
| 3627 | | fpatan; ret | ||
| 3628 | |1: ; fxch; fscale; fpop1; ret | ||
| 3629 | |2: ; cmp eax, 11; je >1; ja >9 | ||
| 3630 | | fucomi st1; fcmovnbe st1; fpop1; ret | ||
| 3631 | |1: ; fucomi st1; fcmovbe st1; fpop1; ret | ||
| 3632 | |9: ; int3 // Bad op. | ||
| 3633 | | | ||
| 3634 | |.endif | 3257 | |.endif |
| 3635 | | | 3258 | | |
| 3636 | |//----------------------------------------------------------------------- | 3259 | |//----------------------------------------------------------------------- |
| @@ -3943,19 +3566,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 3943 | | // RA is a number. | 3566 | | // RA is a number. |
| 3944 | | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp | 3567 | | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp |
| 3945 | | // RA is a number, RD is an integer. | 3568 | | // RA is a number, RD is an integer. |
| 3946 | |.if SSE | ||
| 3947 | | cvtsi2sd xmm0, dword [BASE+RD*8] | 3569 | | cvtsi2sd xmm0, dword [BASE+RD*8] |
| 3948 | | jmp >2 | 3570 | | jmp >2 |
| 3949 | |.else | ||
| 3950 | | fld qword [BASE+RA*8] | ||
| 3951 | | fild dword [BASE+RD*8] | ||
| 3952 | | jmp >3 | ||
| 3953 | |.endif | ||
| 3954 | | | 3571 | | |
| 3955 | |8: // RA is an integer, RD is not an integer. | 3572 | |8: // RA is an integer, RD is not an integer. |
| 3956 | | ja ->vmeta_comp | 3573 | | ja ->vmeta_comp |
| 3957 | | // RA is an integer, RD is a number. | 3574 | | // RA is an integer, RD is a number. |
| 3958 | |.if SSE | ||
| 3959 | | cvtsi2sd xmm1, dword [BASE+RA*8] | 3575 | | cvtsi2sd xmm1, dword [BASE+RA*8] |
| 3960 | | movsd xmm0, qword [BASE+RD*8] | 3576 | | movsd xmm0, qword [BASE+RD*8] |
| 3961 | | add PC, 4 | 3577 | | add PC, 4 |
| @@ -3963,29 +3579,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 3963 | | jmp_comp jbe, ja, jb, jae, <9 | 3579 | | jmp_comp jbe, ja, jb, jae, <9 |
| 3964 | | jmp <6 | 3580 | | jmp <6 |
| 3965 | |.else | 3581 | |.else |
| 3966 | | fild dword [BASE+RA*8] | ||
| 3967 | | jmp >2 | ||
| 3968 | |.endif | ||
| 3969 | |.else | ||
| 3970 | | checknum RA, ->vmeta_comp | 3582 | | checknum RA, ->vmeta_comp |
| 3971 | | checknum RD, ->vmeta_comp | 3583 | | checknum RD, ->vmeta_comp |
| 3972 | |.endif | 3584 | |.endif |
| 3973 | |.if SSE | ||
| 3974 | |1: | 3585 | |1: |
| 3975 | | movsd xmm0, qword [BASE+RD*8] | 3586 | | movsd xmm0, qword [BASE+RD*8] |
| 3976 | |2: | 3587 | |2: |
| 3977 | | add PC, 4 | 3588 | | add PC, 4 |
| 3978 | | ucomisd xmm0, qword [BASE+RA*8] | 3589 | | ucomisd xmm0, qword [BASE+RA*8] |
| 3979 | |3: | 3590 | |3: |
| 3980 | |.else | ||
| 3981 | |1: | ||
| 3982 | | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A. | ||
| 3983 | |2: | ||
| 3984 | | fld qword [BASE+RD*8] | ||
| 3985 | |3: | ||
| 3986 | | add PC, 4 | ||
| 3987 | | fcomparepp | ||
| 3988 | |.endif | ||
| 3989 | | // Unordered: all of ZF CF PF set, ordered: PF clear. | 3591 | | // Unordered: all of ZF CF PF set, ordered: PF clear. |
| 3990 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. | 3592 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. |
| 3991 | |.if DUALNUM | 3593 | |.if DUALNUM |
| @@ -4025,43 +3627,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4025 | | // RD is a number. | 3627 | | // RD is a number. |
| 4026 | | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 | 3628 | | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 |
| 4027 | | // RD is a number, RA is an integer. | 3629 | | // RD is a number, RA is an integer. |
| 4028 | |.if SSE | ||
| 4029 | | cvtsi2sd xmm0, dword [BASE+RA*8] | 3630 | | cvtsi2sd xmm0, dword [BASE+RA*8] |
| 4030 | |.else | ||
| 4031 | | fild dword [BASE+RA*8] | ||
| 4032 | |.endif | ||
| 4033 | | jmp >2 | 3631 | | jmp >2 |
| 4034 | | | 3632 | | |
| 4035 | |8: // RD is an integer, RA is not an integer. | 3633 | |8: // RD is an integer, RA is not an integer. |
| 4036 | | ja >5 | 3634 | | ja >5 |
| 4037 | | // RD is an integer, RA is a number. | 3635 | | // RD is an integer, RA is a number. |
| 4038 | |.if SSE | ||
| 4039 | | cvtsi2sd xmm0, dword [BASE+RD*8] | 3636 | | cvtsi2sd xmm0, dword [BASE+RD*8] |
| 4040 | | ucomisd xmm0, qword [BASE+RA*8] | 3637 | | ucomisd xmm0, qword [BASE+RA*8] |
| 4041 | |.else | ||
| 4042 | | fild dword [BASE+RD*8] | ||
| 4043 | | fld qword [BASE+RA*8] | ||
| 4044 | |.endif | ||
| 4045 | | jmp >4 | 3638 | | jmp >4 |
| 4046 | | | 3639 | | |
| 4047 | |.else | 3640 | |.else |
| 4048 | | cmp RB, LJ_TISNUM; jae >5 | 3641 | | cmp RB, LJ_TISNUM; jae >5 |
| 4049 | | checknum RA, >5 | 3642 | | checknum RA, >5 |
| 4050 | |.endif | 3643 | |.endif |
| 4051 | |.if SSE | ||
| 4052 | |1: | 3644 | |1: |
| 4053 | | movsd xmm0, qword [BASE+RA*8] | 3645 | | movsd xmm0, qword [BASE+RA*8] |
| 4054 | |2: | 3646 | |2: |
| 4055 | | ucomisd xmm0, qword [BASE+RD*8] | 3647 | | ucomisd xmm0, qword [BASE+RD*8] |
| 4056 | |4: | 3648 | |4: |
| 4057 | |.else | ||
| 4058 | |1: | ||
| 4059 | | fld qword [BASE+RA*8] | ||
| 4060 | |2: | ||
| 4061 | | fld qword [BASE+RD*8] | ||
| 4062 | |4: | ||
| 4063 | | fcomparepp | ||
| 4064 | |.endif | ||
| 4065 | iseqne_fp: | 3649 | iseqne_fp: |
| 4066 | if (vk) { | 3650 | if (vk) { |
| 4067 | | jp >2 // Unordered means not equal. | 3651 | | jp >2 // Unordered means not equal. |
| @@ -4184,39 +3768,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4184 | | // RA is a number. | 3768 | | // RA is a number. |
| 4185 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 | 3769 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 |
| 4186 | | // RA is a number, RD is an integer. | 3770 | | // RA is a number, RD is an integer. |
| 4187 | |.if SSE | ||
| 4188 | | cvtsi2sd xmm0, dword [KBASE+RD*8] | 3771 | | cvtsi2sd xmm0, dword [KBASE+RD*8] |
| 4189 | |.else | ||
| 4190 | | fild dword [KBASE+RD*8] | ||
| 4191 | |.endif | ||
| 4192 | | jmp >2 | 3772 | | jmp >2 |
| 4193 | | | 3773 | | |
| 4194 | |8: // RA is an integer, RD is a number. | 3774 | |8: // RA is an integer, RD is a number. |
| 4195 | |.if SSE | ||
| 4196 | | cvtsi2sd xmm0, dword [BASE+RA*8] | 3775 | | cvtsi2sd xmm0, dword [BASE+RA*8] |
| 4197 | | ucomisd xmm0, qword [KBASE+RD*8] | 3776 | | ucomisd xmm0, qword [KBASE+RD*8] |
| 4198 | |.else | ||
| 4199 | | fild dword [BASE+RA*8] | ||
| 4200 | | fld qword [KBASE+RD*8] | ||
| 4201 | |.endif | ||
| 4202 | | jmp >4 | 3777 | | jmp >4 |
| 4203 | |.else | 3778 | |.else |
| 4204 | | cmp RB, LJ_TISNUM; jae >3 | 3779 | | cmp RB, LJ_TISNUM; jae >3 |
| 4205 | |.endif | 3780 | |.endif |
| 4206 | |.if SSE | ||
| 4207 | |1: | 3781 | |1: |
| 4208 | | movsd xmm0, qword [KBASE+RD*8] | 3782 | | movsd xmm0, qword [KBASE+RD*8] |
| 4209 | |2: | 3783 | |2: |
| 4210 | | ucomisd xmm0, qword [BASE+RA*8] | 3784 | | ucomisd xmm0, qword [BASE+RA*8] |
| 4211 | |4: | 3785 | |4: |
| 4212 | |.else | ||
| 4213 | |1: | ||
| 4214 | | fld qword [KBASE+RD*8] | ||
| 4215 | |2: | ||
| 4216 | | fld qword [BASE+RA*8] | ||
| 4217 | |4: | ||
| 4218 | | fcomparepp | ||
| 4219 | |.endif | ||
| 4220 | goto iseqne_fp; | 3786 | goto iseqne_fp; |
| 4221 | case BC_ISEQP: case BC_ISNEP: | 3787 | case BC_ISEQP: case BC_ISNEP: |
| 4222 | vk = op == BC_ISEQP; | 3788 | vk = op == BC_ISEQP; |
| @@ -4310,16 +3876,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4310 | |.else | 3876 | |.else |
| 4311 | | checknum RD, ->vmeta_unm | 3877 | | checknum RD, ->vmeta_unm |
| 4312 | |.endif | 3878 | |.endif |
| 4313 | |.if SSE | ||
| 4314 | | movsd xmm0, qword [BASE+RD*8] | 3879 | | movsd xmm0, qword [BASE+RD*8] |
| 4315 | | sseconst_sign xmm1, RDa | 3880 | | sseconst_sign xmm1, RDa |
| 4316 | | xorps xmm0, xmm1 | 3881 | | xorps xmm0, xmm1 |
| 4317 | | movsd qword [BASE+RA*8], xmm0 | 3882 | | movsd qword [BASE+RA*8], xmm0 |
| 4318 | |.else | ||
| 4319 | | fld qword [BASE+RD*8] | ||
| 4320 | | fchs | ||
| 4321 | | fstp qword [BASE+RA*8] | ||
| 4322 | |.endif | ||
| 4323 | |.if DUALNUM | 3883 | |.if DUALNUM |
| 4324 | | jmp <9 | 3884 | | jmp <9 |
| 4325 | |.else | 3885 | |.else |
| @@ -4335,15 +3895,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4335 | |1: | 3895 | |1: |
| 4336 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 3896 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
| 4337 | | mov dword [BASE+RA*8], RD | 3897 | | mov dword [BASE+RA*8], RD |
| 4338 | |.elif SSE | 3898 | |.else |
| 4339 | | xorps xmm0, xmm0 | 3899 | | xorps xmm0, xmm0 |
| 4340 | | cvtsi2sd xmm0, dword STR:RD->len | 3900 | | cvtsi2sd xmm0, dword STR:RD->len |
| 4341 | |1: | 3901 | |1: |
| 4342 | | movsd qword [BASE+RA*8], xmm0 | 3902 | | movsd qword [BASE+RA*8], xmm0 |
| 4343 | |.else | ||
| 4344 | | fild dword STR:RD->len | ||
| 4345 | |1: | ||
| 4346 | | fstp qword [BASE+RA*8] | ||
| 4347 | |.endif | 3903 | |.endif |
| 4348 | | ins_next | 3904 | | ins_next |
| 4349 | |2: | 3905 | |2: |
| @@ -4361,11 +3917,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4361 | | // Length of table returned in eax (RD). | 3917 | | // Length of table returned in eax (RD). |
| 4362 | |.if DUALNUM | 3918 | |.if DUALNUM |
| 4363 | | // Nothing to do. | 3919 | | // Nothing to do. |
| 4364 | |.elif SSE | ||
| 4365 | | cvtsi2sd xmm0, RD | ||
| 4366 | |.else | 3920 | |.else |
| 4367 | | mov ARG1, RD | 3921 | | cvtsi2sd xmm0, RD |
| 4368 | | fild ARG1 | ||
| 4369 | |.endif | 3922 | |.endif |
| 4370 | | mov BASE, RB // Restore BASE. | 3923 | | mov BASE, RB // Restore BASE. |
| 4371 | | movzx RA, PC_RA | 3924 | | movzx RA, PC_RA |
| @@ -4380,7 +3933,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4380 | 3933 | ||
| 4381 | /* -- Binary ops -------------------------------------------------------- */ | 3934 | /* -- Binary ops -------------------------------------------------------- */ |
| 4382 | 3935 | ||
| 4383 | |.macro ins_arithpre, x87ins, sseins, ssereg | 3936 | |.macro ins_arithpre, sseins, ssereg |
| 4384 | | ins_ABC | 3937 | | ins_ABC |
| 4385 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | 3938 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); |
| 4386 | ||switch (vk) { | 3939 | ||switch (vk) { |
| @@ -4389,37 +3942,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4389 | | .if DUALNUM | 3942 | | .if DUALNUM |
| 4390 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn | 3943 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn |
| 4391 | | .endif | 3944 | | .endif |
| 4392 | | .if SSE | 3945 | | movsd xmm0, qword [BASE+RB*8] |
| 4393 | | movsd xmm0, qword [BASE+RB*8] | 3946 | | sseins ssereg, qword [KBASE+RC*8] |
| 4394 | | sseins ssereg, qword [KBASE+RC*8] | ||
| 4395 | | .else | ||
| 4396 | | fld qword [BASE+RB*8] | ||
| 4397 | | x87ins qword [KBASE+RC*8] | ||
| 4398 | | .endif | ||
| 4399 | || break; | 3947 | || break; |
| 4400 | ||case 1: | 3948 | ||case 1: |
| 4401 | | checknum RB, ->vmeta_arith_nv | 3949 | | checknum RB, ->vmeta_arith_nv |
| 4402 | | .if DUALNUM | 3950 | | .if DUALNUM |
| 4403 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv | 3951 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv |
| 4404 | | .endif | 3952 | | .endif |
| 4405 | | .if SSE | 3953 | | movsd xmm0, qword [KBASE+RC*8] |
| 4406 | | movsd xmm0, qword [KBASE+RC*8] | 3954 | | sseins ssereg, qword [BASE+RB*8] |
| 4407 | | sseins ssereg, qword [BASE+RB*8] | ||
| 4408 | | .else | ||
| 4409 | | fld qword [KBASE+RC*8] | ||
| 4410 | | x87ins qword [BASE+RB*8] | ||
| 4411 | | .endif | ||
| 4412 | || break; | 3955 | || break; |
| 4413 | ||default: | 3956 | ||default: |
| 4414 | | checknum RB, ->vmeta_arith_vv | 3957 | | checknum RB, ->vmeta_arith_vv |
| 4415 | | checknum RC, ->vmeta_arith_vv | 3958 | | checknum RC, ->vmeta_arith_vv |
| 4416 | | .if SSE | 3959 | | movsd xmm0, qword [BASE+RB*8] |
| 4417 | | movsd xmm0, qword [BASE+RB*8] | 3960 | | sseins ssereg, qword [BASE+RC*8] |
| 4418 | | sseins ssereg, qword [BASE+RC*8] | ||
| 4419 | | .else | ||
| 4420 | | fld qword [BASE+RB*8] | ||
| 4421 | | x87ins qword [BASE+RC*8] | ||
| 4422 | | .endif | ||
| 4423 | || break; | 3961 | || break; |
| 4424 | ||} | 3962 | ||} |
| 4425 | |.endmacro | 3963 | |.endmacro |
| @@ -4457,54 +3995,50 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4457 | |.endmacro | 3995 | |.endmacro |
| 4458 | | | 3996 | | |
| 4459 | |.macro ins_arithpost | 3997 | |.macro ins_arithpost |
| 4460 | |.if SSE | ||
| 4461 | | movsd qword [BASE+RA*8], xmm0 | 3998 | | movsd qword [BASE+RA*8], xmm0 |
| 4462 | |.else | ||
| 4463 | | fstp qword [BASE+RA*8] | ||
| 4464 | |.endif | ||
| 4465 | |.endmacro | 3999 | |.endmacro |
| 4466 | | | 4000 | | |
| 4467 | |.macro ins_arith, x87ins, sseins | 4001 | |.macro ins_arith, sseins |
| 4468 | | ins_arithpre x87ins, sseins, xmm0 | 4002 | | ins_arithpre sseins, xmm0 |
| 4469 | | ins_arithpost | 4003 | | ins_arithpost |
| 4470 | | ins_next | 4004 | | ins_next |
| 4471 | |.endmacro | 4005 | |.endmacro |
| 4472 | | | 4006 | | |
| 4473 | |.macro ins_arith, intins, x87ins, sseins | 4007 | |.macro ins_arith, intins, sseins |
| 4474 | |.if DUALNUM | 4008 | |.if DUALNUM |
| 4475 | | ins_arithdn intins | 4009 | | ins_arithdn intins |
| 4476 | |.else | 4010 | |.else |
| 4477 | | ins_arith, x87ins, sseins | 4011 | | ins_arith, sseins |
| 4478 | |.endif | 4012 | |.endif |
| 4479 | |.endmacro | 4013 | |.endmacro |
| 4480 | 4014 | ||
| 4481 | | // RA = dst, RB = src1 or num const, RC = src2 or num const | 4015 | | // RA = dst, RB = src1 or num const, RC = src2 or num const |
| 4482 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | 4016 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: |
| 4483 | | ins_arith add, fadd, addsd | 4017 | | ins_arith add, addsd |
| 4484 | break; | 4018 | break; |
| 4485 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | 4019 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: |
| 4486 | | ins_arith sub, fsub, subsd | 4020 | | ins_arith sub, subsd |
| 4487 | break; | 4021 | break; |
| 4488 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | 4022 | case BC_MULVN: case BC_MULNV: case BC_MULVV: |
| 4489 | | ins_arith imul, fmul, mulsd | 4023 | | ins_arith imul, mulsd |
| 4490 | break; | 4024 | break; |
| 4491 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | 4025 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: |
| 4492 | | ins_arith fdiv, divsd | 4026 | | ins_arith divsd |
| 4493 | break; | 4027 | break; |
| 4494 | case BC_MODVN: | 4028 | case BC_MODVN: |
| 4495 | | ins_arithpre fld, movsd, xmm1 | 4029 | | ins_arithpre movsd, xmm1 |
| 4496 | |->BC_MODVN_Z: | 4030 | |->BC_MODVN_Z: |
| 4497 | | call ->vm_mod | 4031 | | call ->vm_mod |
| 4498 | | ins_arithpost | 4032 | | ins_arithpost |
| 4499 | | ins_next | 4033 | | ins_next |
| 4500 | break; | 4034 | break; |
| 4501 | case BC_MODNV: case BC_MODVV: | 4035 | case BC_MODNV: case BC_MODVV: |
| 4502 | | ins_arithpre fld, movsd, xmm1 | 4036 | | ins_arithpre movsd, xmm1 |
| 4503 | | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | 4037 | | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. |
| 4504 | break; | 4038 | break; |
| 4505 | case BC_POW: | 4039 | case BC_POW: |
| 4506 | | ins_arithpre fld, movsd, xmm1 | 4040 | | ins_arithpre movsd, xmm1 |
| 4507 | | call ->vm_pow | 4041 | | call ->vm_pow_sse |
| 4508 | | ins_arithpost | 4042 | | ins_arithpost |
| 4509 | | ins_next | 4043 | | ins_next |
| 4510 | break; | 4044 | break; |
| @@ -4573,25 +4107,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4573 | | movsx RD, RDW | 4107 | | movsx RD, RDW |
| 4574 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 4108 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
| 4575 | | mov dword [BASE+RA*8], RD | 4109 | | mov dword [BASE+RA*8], RD |
| 4576 | |.elif SSE | 4110 | |.else |
| 4577 | | movsx RD, RDW // Sign-extend literal. | 4111 | | movsx RD, RDW // Sign-extend literal. |
| 4578 | | cvtsi2sd xmm0, RD | 4112 | | cvtsi2sd xmm0, RD |
| 4579 | | movsd qword [BASE+RA*8], xmm0 | 4113 | | movsd qword [BASE+RA*8], xmm0 |
| 4580 | |.else | ||
| 4581 | | fild PC_RD // Refetch signed RD from instruction. | ||
| 4582 | | fstp qword [BASE+RA*8] | ||
| 4583 | |.endif | 4114 | |.endif |
| 4584 | | ins_next | 4115 | | ins_next |
| 4585 | break; | 4116 | break; |
| 4586 | case BC_KNUM: | 4117 | case BC_KNUM: |
| 4587 | | ins_AD // RA = dst, RD = num const | 4118 | | ins_AD // RA = dst, RD = num const |
| 4588 | |.if SSE | ||
| 4589 | | movsd xmm0, qword [KBASE+RD*8] | 4119 | | movsd xmm0, qword [KBASE+RD*8] |
| 4590 | | movsd qword [BASE+RA*8], xmm0 | 4120 | | movsd qword [BASE+RA*8], xmm0 |
| 4591 | |.else | ||
| 4592 | | fld qword [KBASE+RD*8] | ||
| 4593 | | fstp qword [BASE+RA*8] | ||
| 4594 | |.endif | ||
| 4595 | | ins_next | 4121 | | ins_next |
| 4596 | break; | 4122 | break; |
| 4597 | case BC_KPRI: | 4123 | case BC_KPRI: |
| @@ -4698,18 +4224,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4698 | case BC_USETN: | 4224 | case BC_USETN: |
| 4699 | | ins_AD // RA = upvalue #, RD = num const | 4225 | | ins_AD // RA = upvalue #, RD = num const |
| 4700 | | mov LFUNC:RB, [BASE-8] | 4226 | | mov LFUNC:RB, [BASE-8] |
| 4701 | |.if SSE | ||
| 4702 | | movsd xmm0, qword [KBASE+RD*8] | 4227 | | movsd xmm0, qword [KBASE+RD*8] |
| 4703 | |.else | ||
| 4704 | | fld qword [KBASE+RD*8] | ||
| 4705 | |.endif | ||
| 4706 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | 4228 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] |
| 4707 | | mov RA, UPVAL:RB->v | 4229 | | mov RA, UPVAL:RB->v |
| 4708 | |.if SSE | ||
| 4709 | | movsd qword [RA], xmm0 | 4230 | | movsd qword [RA], xmm0 |
| 4710 | |.else | ||
| 4711 | | fstp qword [RA] | ||
| 4712 | |.endif | ||
| 4713 | | ins_next | 4231 | | ins_next |
| 4714 | break; | 4232 | break; |
| 4715 | case BC_USETP: | 4233 | case BC_USETP: |
| @@ -4863,18 +4381,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4863 | |.else | 4381 | |.else |
| 4864 | | // Convert number to int and back and compare. | 4382 | | // Convert number to int and back and compare. |
| 4865 | | checknum RC, >5 | 4383 | | checknum RC, >5 |
| 4866 | |.if SSE | ||
| 4867 | | movsd xmm0, qword [BASE+RC*8] | 4384 | | movsd xmm0, qword [BASE+RC*8] |
| 4868 | | cvtsd2si RC, xmm0 | 4385 | | cvtsd2si RC, xmm0 |
| 4869 | | cvtsi2sd xmm1, RC | 4386 | | cvtsi2sd xmm1, RC |
| 4870 | | ucomisd xmm0, xmm1 | 4387 | | ucomisd xmm0, xmm1 |
| 4871 | |.else | ||
| 4872 | | fld qword [BASE+RC*8] | ||
| 4873 | | fist ARG1 | ||
| 4874 | | fild ARG1 | ||
| 4875 | | fcomparepp | ||
| 4876 | | mov RC, ARG1 | ||
| 4877 | |.endif | ||
| 4878 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. | 4388 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. |
| 4879 | |.endif | 4389 | |.endif |
| 4880 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. | 4390 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. |
| @@ -5011,18 +4521,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 5011 | |.else | 4521 | |.else |
| 5012 | | // Convert number to int and back and compare. | 4522 | | // Convert number to int and back and compare. |
| 5013 | | checknum RC, >5 | 4523 | | checknum RC, >5 |
| 5014 | |.if SSE | ||
| 5015 | | movsd xmm0, qword [BASE+RC*8] | 4524 | | movsd xmm0, qword [BASE+RC*8] |
| 5016 | | cvtsd2si RC, xmm0 | 4525 | | cvtsd2si RC, xmm0 |
| 5017 | | cvtsi2sd xmm1, RC | 4526 | | cvtsi2sd xmm1, RC |
| 5018 | | ucomisd xmm0, xmm1 | 4527 | | ucomisd xmm0, xmm1 |
| 5019 | |.else | ||
| 5020 | | fld qword [BASE+RC*8] | ||
| 5021 | | fist ARG1 | ||
| 5022 | | fild ARG1 | ||
| 5023 | | fcomparepp | ||
| 5024 | | mov RC, ARG1 | ||
| 5025 | |.endif | ||
| 5026 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. | 4528 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. |
| 5027 | |.endif | 4529 | |.endif |
| 5028 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. | 4530 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. |
| @@ -5386,10 +4888,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 5386 | |.if DUALNUM | 4888 | |.if DUALNUM |
| 5387 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 4889 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
| 5388 | | mov dword [BASE+RA*8], RC | 4890 | | mov dword [BASE+RA*8], RC |
| 5389 | |.elif SSE | ||
| 5390 | | cvtsi2sd xmm0, RC | ||
| 5391 | |.else | 4891 | |.else |
| 5392 | | fild dword [BASE+RA*8-8] | 4892 | | cvtsi2sd xmm0, RC |
| 5393 | |.endif | 4893 | |.endif |
| 5394 | | // Copy array slot to returned value. | 4894 | | // Copy array slot to returned value. |
| 5395 | |.if X64 | 4895 | |.if X64 |
| @@ -5405,10 +4905,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 5405 | | // Return array index as a numeric key. | 4905 | | // Return array index as a numeric key. |
| 5406 | |.if DUALNUM | 4906 | |.if DUALNUM |
| 5407 | | // See above. | 4907 | | // See above. |
| 5408 | |.elif SSE | ||
| 5409 | | movsd qword [BASE+RA*8], xmm0 | ||
| 5410 | |.else | 4908 | |.else |
| 5411 | | fstp qword [BASE+RA*8] | 4909 | | movsd qword [BASE+RA*8], xmm0 |
| 5412 | |.endif | 4910 | |.endif |
| 5413 | | mov [BASE+RA*8-8], RC // Update control var. | 4911 | | mov [BASE+RA*8-8], RC // Update control var. |
| 5414 | |2: | 4912 | |2: |
| @@ -5421,9 +4919,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 5421 | | | 4919 | | |
| 5422 | |4: // Skip holes in array part. | 4920 | |4: // Skip holes in array part. |
| 5423 | | add RC, 1 | 4921 | | add RC, 1 |
| 5424 | |.if not (DUALNUM or SSE) | ||
| 5425 | | mov [BASE+RA*8-8], RC | ||
| 5426 | |.endif | ||
| 5427 | | jmp <1 | 4922 | | jmp <1 |
| 5428 | | | 4923 | | |
| 5429 | |5: // Traverse hash part. | 4924 | |5: // Traverse hash part. |
| @@ -5757,7 +5252,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 5757 | if (!vk) { | 5252 | if (!vk) { |
| 5758 | | cmp RB, LJ_TISNUM; jae ->vmeta_for | 5253 | | cmp RB, LJ_TISNUM; jae ->vmeta_for |
| 5759 | } | 5254 | } |
| 5760 | |.if SSE | ||
| 5761 | | movsd xmm0, qword FOR_IDX | 5255 | | movsd xmm0, qword FOR_IDX |
| 5762 | | movsd xmm1, qword FOR_STOP | 5256 | | movsd xmm1, qword FOR_STOP |
| 5763 | if (vk) { | 5257 | if (vk) { |
| @@ -5770,22 +5264,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 5770 | | ucomisd xmm1, xmm0 | 5264 | | ucomisd xmm1, xmm0 |
| 5771 | |1: | 5265 | |1: |
| 5772 | | movsd qword FOR_EXT, xmm0 | 5266 | | movsd qword FOR_EXT, xmm0 |
| 5773 | |.else | ||
| 5774 | | fld qword FOR_STOP | ||
| 5775 | | fld qword FOR_IDX | ||
| 5776 | if (vk) { | ||
| 5777 | | fadd qword FOR_STEP // nidx = idx + step | ||
| 5778 | | fst qword FOR_IDX | ||
| 5779 | | fst qword FOR_EXT | ||
| 5780 | | test RB, RB; js >1 | ||
| 5781 | } else { | ||
| 5782 | | fst qword FOR_EXT | ||
| 5783 | | jl >1 | ||
| 5784 | } | ||
| 5785 | | fxch // Swap lim/(n)idx if step non-negative. | ||
| 5786 | |1: | ||
| 5787 | | fcomparepp | ||
| 5788 | |.endif | ||
| 5789 | if (op == BC_FORI) { | 5267 | if (op == BC_FORI) { |
| 5790 | |.if DUALNUM | 5268 | |.if DUALNUM |
| 5791 | | jnb <7 | 5269 | | jnb <7 |
| @@ -5813,11 +5291,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 5813 | |2: | 5291 | |2: |
| 5814 | | ins_next | 5292 | | ins_next |
| 5815 | |.endif | 5293 | |.endif |
| 5816 | |.if SSE | 5294 | | |
| 5817 | |3: // Invert comparison if step is negative. | 5295 | |3: // Invert comparison if step is negative. |
| 5818 | | ucomisd xmm0, xmm1 | 5296 | | ucomisd xmm0, xmm1 |
| 5819 | | jmp <1 | 5297 | | jmp <1 |
| 5820 | |.endif | ||
| 5821 | break; | 5298 | break; |
| 5822 | 5299 | ||
| 5823 | case BC_ITERL: | 5300 | case BC_ITERL: |
