aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2012-06-10 16:44:33 +0200
committerMike Pall <mike>2012-06-10 16:50:46 +0200
commit58ec704f78e311e6af97841a9e26cd7187955494 (patch)
treec66d9aedcbd7ed7945573b571c4e2737050e31b3 /src
parente496a502b0686af25053c161752c044074edc44e (diff)
downloadluajit-58ec704f78e311e6af97841a9e26cd7187955494.tar.gz
luajit-58ec704f78e311e6af97841a9e26cd7187955494.tar.bz2
luajit-58ec704f78e311e6af97841a9e26cd7187955494.zip
x86/x64: Clean up interpreter.
Use DynASM defines instead of C defines. Remove support for ancient CPUs without CMOV (before Pentium Pro).
Diffstat (limited to 'src')
-rw-r--r--src/Makefile19
-rw-r--r--src/msvcbuild.bat7
-rw-r--r--src/vm_x86.dasc2226
3 files changed, 1076 insertions, 1176 deletions
diff --git a/src/Makefile b/src/Makefile
index d9bb178b..9d21c3fb 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -42,9 +42,8 @@ CCOPT= -O2 -fomit-frame-pointer
42# 42#
43# Target-specific compiler options: 43# Target-specific compiler options:
44# 44#
45# x86 only: it's recommended to compile at least for i686. By default the 45# x86 only: it's recommended to compile at least for i686. Better yet,
46# assembler part of the interpreter makes use of CMOV/FCOMI*/FUCOMI* 46# compile for an architecture that has SSE2, too (-msse -msse2).
47# instructions, anyway.
48# 47#
49# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute 48# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
50# the binaries to a different machine you could also use: -march=native 49# the binaries to a different machine you could also use: -march=native
@@ -105,20 +104,6 @@ XCFLAGS=
105# Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter. 104# Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter.
106#XCFLAGS+= -DLUAJIT_DISABLE_JIT 105#XCFLAGS+= -DLUAJIT_DISABLE_JIT
107# 106#
108# x86 only: use SSE2 instead of x87 instructions in the interpreter
109# (always enabled for x64). A pure interpreter built with this flag won't
110# run on older CPUs (before P4 or K8). There isn't much of a speed
111# difference, so this is not enabled by default.
112# The JIT compiler is not affected by this flag. It always uses runtime
113# CPU feature detection before emitting code for SSE2 up to SSE4.1.
114#XCFLAGS+= -DLUAJIT_CPU_SSE2
115#
116# x86 only: Disable the use of CMOV and FCOMI*/FUCOMI* instructions in the
117# interpreter. Do this only if you intend to use REALLY ANCIENT CPUs
118# (before Pentium Pro, or on the VIA C3). This generally slows down the
119# interpreter. Don't bother if your OS wouldn't run on them, anyway.
120#XCFLAGS+= -DLUAJIT_CPU_NOCMOV
121#
122# Some architectures (e.g. PPC) can use either single-number (1) or 107# Some architectures (e.g. PPC) can use either single-number (1) or
123# dual-number (2) mode. Uncomment one of these lines to override the 108# dual-number (2) mode. Uncomment one of these lines to override the
124# default mode. Please see LJ_ARCH_NUMMODE in lj_arch.h for details. 109# default mode. Please see LJ_ARCH_NUMMODE in lj_arch.h for details.
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat
index ad6f2113..ca943a63 100644
--- a/src/msvcbuild.bat
+++ b/src/msvcbuild.bat
@@ -29,15 +29,16 @@
29if exist minilua.exe.manifest^ 29if exist minilua.exe.manifest^
30 %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe 30 %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
31 31
32@set DASMFLAGS=-D X64 -D WIN 32@set DASMFLAGS=-D WIN -D JIT -D FFI
33@set DASMX64=-D X64
33@if defined CPU goto :XCPU 34@if defined CPU goto :XCPU
34@set CPU=%PROCESSOR_ARCHITECTURE% 35@set CPU=%PROCESSOR_ARCHITECTURE%
35:XCPU 36:XCPU
36@if "%CPU%"=="AMD64" goto :X64 37@if "%CPU%"=="AMD64" goto :X64
37@if "%CPU%"=="X64" goto :X64 38@if "%CPU%"=="X64" goto :X64
38@set DASMFLAGS=-D WIN 39@set DASMX64=
39:X64 40:X64
40minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc 41minilua %DASM% -LN %DASMFLAGS% %DASMX64% -o host\buildvm_arch.h vm_x86.dasc
41@if errorlevel 1 goto :BAD 42@if errorlevel 1 goto :BAD
42 43
43%LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c 44%LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index 1cab76eb..38b268d4 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -50,7 +50,7 @@
50|.define RAH, ch 50|.define RAH, ch
51|.define RAL, cl 51|.define RAL, cl
52|.define RB, ebp // Must be ebp (C callee-save). 52|.define RB, ebp // Must be ebp (C callee-save).
53|.define RC, eax // Must be eax (fcomparepp and others). 53|.define RC, eax // Must be eax.
54|.define RCW, ax 54|.define RCW, ax
55|.define RCH, ah 55|.define RCH, ah
56|.define RCL, al 56|.define RCL, al
@@ -366,16 +366,10 @@
366| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st 366| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
367|.endmacro 367|.endmacro
368| 368|
369|// Annoying x87 stuff: support for two compare variants. 369|// x87 compares.
370|.macro fcomparepp // Compare and pop st0 >< st1. 370|.macro fcomparepp // Compare and pop st0 >< st1.
371||if (cmov) {
372| fucomip st1 371| fucomip st1
373| fpop 372| fpop
374||} else {
375| fucompp
376| fnstsw ax // eax modified!
377| sahf
378||}
379|.endmacro 373|.endmacro
380| 374|
381|.macro fdup; fld st0; .endmacro 375|.macro fdup; fld st0; .endmacro
@@ -426,7 +420,7 @@
426 420
427/* Generate subroutines used by opcodes and other parts of the VM. */ 421/* Generate subroutines used by opcodes and other parts of the VM. */
428/* The .code_sub section should be last to help static branch prediction. */ 422/* The .code_sub section should be last to help static branch prediction. */
429static void build_subroutines(BuildCtx *ctx, int cmov, int sse) 423static void build_subroutines(BuildCtx *ctx)
430{ 424{
431 |.code_sub 425 |.code_sub
432 | 426 |
@@ -776,18 +770,18 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
776 | mov PC, [RB-12] // Restore PC from [cont|PC]. 770 | mov PC, [RB-12] // Restore PC from [cont|PC].
777 |.if X64 771 |.if X64
778 | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug. 772 | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug.
779#if LJ_HASFFI 773 |.if FFI
780 | cmp RA, 1 774 | cmp RA, 1
781 | jbe >1 775 | jbe >1
782#endif 776 |.endif
783 | lea KBASEa, qword [=>0] 777 | lea KBASEa, qword [=>0]
784 | add RAa, KBASEa 778 | add RAa, KBASEa
785 |.else 779 |.else
786 | mov RA, dword [RB-16] 780 | mov RA, dword [RB-16]
787#if LJ_HASFFI 781 |.if FFI
788 | cmp RA, 1 782 | cmp RA, 1
789 | jbe >1 783 | jbe >1
790#endif 784 |.endif
791 |.endif 785 |.endif
792 | mov LFUNC:KBASE, [BASE-8] 786 | mov LFUNC:KBASE, [BASE-8]
793 | mov KBASE, LFUNC:KBASE->pc 787 | mov KBASE, LFUNC:KBASE->pc
@@ -795,7 +789,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
795 | // BASE = base, RC = result, RB = meta base 789 | // BASE = base, RC = result, RB = meta base
796 | jmp RAa // Jump to continuation. 790 | jmp RAa // Jump to continuation.
797 | 791 |
798#if LJ_HASFFI 792 |.if FFI
799 |1: 793 |1:
800 | je ->cont_ffi_callback // cont = 1: return from FFI callback. 794 | je ->cont_ffi_callback // cont = 1: return from FFI callback.
801 | // cont = 0: Tail call from C function. 795 | // cont = 0: Tail call from C function.
@@ -803,7 +797,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
803 | shr RB, 3 797 | shr RB, 3
804 | lea RD, [RB-1] 798 | lea RD, [RB-1]
805 | jmp ->vm_call_tail 799 | jmp ->vm_call_tail
806#endif 800 |.endif
807 | 801 |
808 |->cont_cat: // BASE = base, RC = result, RB = mbase 802 |->cont_cat: // BASE = base, RC = result, RB = mbase
809 | movzx RA, PC_RB 803 | movzx RA, PC_RB
@@ -853,19 +847,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
853 | 847 |
854 |->vmeta_tgetb: 848 |->vmeta_tgetb:
855 | movzx RC, PC_RC 849 | movzx RC, PC_RC
856 if (LJ_DUALNUM) { 850 |.if DUALNUM
857 | mov TMP2, LJ_TISNUM 851 | mov TMP2, LJ_TISNUM
858 | mov TMP1, RC 852 | mov TMP1, RC
859 } else if (sse) { 853 |.elif SSE
860 | cvtsi2sd xmm0, RC 854 | cvtsi2sd xmm0, RC
861 | movsd TMPQ, xmm0 855 | movsd TMPQ, xmm0
862 } else { 856 |.else
863 |.if not X64 857 | mov ARG4, RC
864 | mov ARG4, RC 858 | fild ARG4
865 | fild ARG4 859 | fstp TMPQ
866 | fstp TMPQ 860 |.endif
867 |.endif
868 }
869 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 861 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
870 | jmp >1 862 | jmp >1
871 | 863 |
@@ -934,19 +926,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
934 | 926 |
935 |->vmeta_tsetb: 927 |->vmeta_tsetb:
936 | movzx RC, PC_RC 928 | movzx RC, PC_RC
937 if (LJ_DUALNUM) { 929 |.if DUALNUM
938 | mov TMP2, LJ_TISNUM 930 | mov TMP2, LJ_TISNUM
939 | mov TMP1, RC 931 | mov TMP1, RC
940 } else if (sse) { 932 |.elif SSE
941 | cvtsi2sd xmm0, RC 933 | cvtsi2sd xmm0, RC
942 | movsd TMPQ, xmm0 934 | movsd TMPQ, xmm0
943 } else { 935 |.else
944 |.if not X64 936 | mov ARG4, RC
945 | mov ARG4, RC 937 | fild ARG4
946 | fild ARG4 938 | fstp TMPQ
947 | fstp TMPQ 939 |.endif
948 |.endif
949 }
950 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 940 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
951 | jmp >1 941 | jmp >1
952 | 942 |
@@ -1093,7 +1083,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1093 | jmp <3 1083 | jmp <3
1094 | 1084 |
1095 |->vmeta_equal_cd: 1085 |->vmeta_equal_cd:
1096#if LJ_HASFFI 1086 |.if FFI
1097 | sub PC, 4 1087 | sub PC, 4
1098 | mov L:RB, SAVE_L 1088 | mov L:RB, SAVE_L
1099 | mov L:RB->base, BASE 1089 | mov L:RB->base, BASE
@@ -1103,22 +1093,22 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1103 | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins) 1093 | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins)
1104 | // 0/1 or TValue * (metamethod) returned in eax (RC). 1094 | // 0/1 or TValue * (metamethod) returned in eax (RC).
1105 | jmp <3 1095 | jmp <3
1106#endif 1096 |.endif
1107 | 1097 |
1108 |//-- Arithmetic metamethods --------------------------------------------- 1098 |//-- Arithmetic metamethods ---------------------------------------------
1109 | 1099 |
1110 |->vmeta_arith_vno: 1100 |->vmeta_arith_vno:
1111#if LJ_DUALNUM 1101 |.if DUALNUM
1112 | movzx RB, PC_RB 1102 | movzx RB, PC_RB
1113#endif 1103 |.endif
1114 |->vmeta_arith_vn: 1104 |->vmeta_arith_vn:
1115 | lea RC, [KBASE+RC*8] 1105 | lea RC, [KBASE+RC*8]
1116 | jmp >1 1106 | jmp >1
1117 | 1107 |
1118 |->vmeta_arith_nvo: 1108 |->vmeta_arith_nvo:
1119#if LJ_DUALNUM 1109 |.if DUALNUM
1120 | movzx RC, PC_RC 1110 | movzx RC, PC_RC
1121#endif 1111 |.endif
1122 |->vmeta_arith_nv: 1112 |->vmeta_arith_nv:
1123 | lea RC, [KBASE+RC*8] 1113 | lea RC, [KBASE+RC*8]
1124 | lea RB, [BASE+RB*8] 1114 | lea RB, [BASE+RB*8]
@@ -1131,9 +1121,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1131 | jmp >2 1121 | jmp >2
1132 | 1122 |
1133 |->vmeta_arith_vvo: 1123 |->vmeta_arith_vvo:
1134#if LJ_DUALNUM 1124 |.if DUALNUM
1135 | movzx RB, PC_RB 1125 | movzx RB, PC_RB
1136#endif 1126 |.endif
1137 |->vmeta_arith_vv: 1127 |->vmeta_arith_vv:
1138 | lea RC, [BASE+RC*8] 1128 | lea RC, [BASE+RC*8]
1139 |1: 1129 |1:
@@ -1374,11 +1364,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1374 | mov RC, ~LJ_TNUMX 1364 | mov RC, ~LJ_TNUMX
1375 | not RB 1365 | not RB
1376 | cmp RC, RB 1366 | cmp RC, RB
1377 ||if (cmov) {
1378 | cmova RC, RB 1367 | cmova RC, RB
1379 ||} else {
1380 | jbe >1; mov RC, RB; 1:
1381 ||}
1382 |2: 1368 |2:
1383 | mov CFUNC:RB, [BASE-8] 1369 | mov CFUNC:RB, [BASE-8]
1384 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] 1370 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
@@ -1509,19 +1495,19 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1509 | // Only handles the number case inline (without a base argument). 1495 | // Only handles the number case inline (without a base argument).
1510 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 1496 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
1511 | cmp dword [BASE+4], LJ_TISNUM 1497 | cmp dword [BASE+4], LJ_TISNUM
1512 if (LJ_DUALNUM) { 1498 |.if DUALNUM
1513 | jne >1 1499 | jne >1
1514 | mov RB, dword [BASE]; jmp ->fff_resi 1500 | mov RB, dword [BASE]; jmp ->fff_resi
1515 |1: 1501 |1:
1516 | ja ->fff_fallback 1502 | ja ->fff_fallback
1517 } else { 1503 |.else
1518 | jae ->fff_fallback 1504 | jae ->fff_fallback
1519 } 1505 |.endif
1520 if (sse) { 1506 |.if SSE
1521 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 1507 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1522 } else { 1508 |.else
1523 | fld qword [BASE]; jmp ->fff_resn 1509 | fld qword [BASE]; jmp ->fff_resn
1524 } 1510 |.endif
1525 | 1511 |
1526 |.ffunc_1 tostring 1512 |.ffunc_1 tostring
1527 | // Only handles the string or number case inline. 1513 | // Only handles the string or number case inline.
@@ -1545,11 +1531,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1545 | mov FCARG2, BASE // Otherwise: FCARG2 == BASE 1531 | mov FCARG2, BASE // Otherwise: FCARG2 == BASE
1546 |.endif 1532 |.endif
1547 | mov L:FCARG1, L:RB 1533 | mov L:FCARG1, L:RB
1548 if (LJ_DUALNUM) { 1534 |.if DUALNUM
1549 | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) 1535 | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o)
1550 } else { 1536 |.else
1551 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) 1537 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np)
1552 } 1538 |.endif
1553 | // GCstr returned in eax (RD). 1539 | // GCstr returned in eax (RD).
1554 | mov BASE, L:RB->base 1540 | mov BASE, L:RB->base
1555 | jmp <2 1541 | jmp <2
@@ -1628,33 +1614,31 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1628 |.ffunc_1 ipairs_aux 1614 |.ffunc_1 ipairs_aux
1629 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback 1615 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1630 | cmp dword [BASE+12], LJ_TISNUM 1616 | cmp dword [BASE+12], LJ_TISNUM
1631 if (LJ_DUALNUM) { 1617 |.if DUALNUM
1632 | jne ->fff_fallback 1618 | jne ->fff_fallback
1633 } else { 1619 |.else
1634 | jae ->fff_fallback 1620 | jae ->fff_fallback
1635 } 1621 |.endif
1636 | mov PC, [BASE-4] 1622 | mov PC, [BASE-4]
1637 if (LJ_DUALNUM) { 1623 |.if DUALNUM
1638 | mov RD, dword [BASE+8] 1624 | mov RD, dword [BASE+8]
1639 | add RD, 1 1625 | add RD, 1
1640 | mov dword [BASE-4], LJ_TISNUM 1626 | mov dword [BASE-4], LJ_TISNUM
1641 | mov dword [BASE-8], RD 1627 | mov dword [BASE-8], RD
1642 } else if (sse) { 1628 |.elif SSE
1643 | movsd xmm0, qword [BASE+8] 1629 | movsd xmm0, qword [BASE+8]
1644 | sseconst_1 xmm1, RBa 1630 | sseconst_1 xmm1, RBa
1645 | addsd xmm0, xmm1 1631 | addsd xmm0, xmm1
1646 | cvtsd2si RD, xmm0 1632 | cvtsd2si RD, xmm0
1647 | movsd qword [BASE-8], xmm0 1633 | movsd qword [BASE-8], xmm0
1648 } else { 1634 |.else
1649 |.if not X64 1635 | fld qword [BASE+8]
1650 | fld qword [BASE+8] 1636 | fld1
1651 | fld1 1637 | faddp st1
1652 | faddp st1 1638 | fist ARG1
1653 | fist ARG1 1639 | fstp qword [BASE-8]
1654 | fstp qword [BASE-8] 1640 | mov RD, ARG1
1655 | mov RD, ARG1 1641 |.endif
1656 |.endif
1657 }
1658 | mov TAB:RB, [BASE] 1642 | mov TAB:RB, [BASE]
1659 | cmp RD, TAB:RB->asize; jae >2 // Not in array part? 1643 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
1660 | shl RD, 3 1644 | shl RD, 3
@@ -1697,16 +1681,16 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1697 | mov PC, [BASE-4] 1681 | mov PC, [BASE-4]
1698 | mov dword [BASE-4], LJ_TFUNC 1682 | mov dword [BASE-4], LJ_TFUNC
1699 | mov [BASE-8], CFUNC:RD 1683 | mov [BASE-8], CFUNC:RD
1700 if (LJ_DUALNUM) { 1684 |.if DUALNUM
1701 | mov dword [BASE+12], LJ_TISNUM 1685 | mov dword [BASE+12], LJ_TISNUM
1702 | mov dword [BASE+8], 0 1686 | mov dword [BASE+8], 0
1703 } else if (sse) { 1687 |.elif SSE
1704 | xorps xmm0, xmm0 1688 | xorps xmm0, xmm0
1705 | movsd qword [BASE+8], xmm0 1689 | movsd qword [BASE+8], xmm0
1706 } else { 1690 |.else
1707 | fldz 1691 | fldz
1708 | fstp qword [BASE+8] 1692 | fstp qword [BASE+8]
1709 } 1693 |.endif
1710 | mov RD, 1+3 1694 | mov RD, 1+3
1711 | jmp ->fff_res 1695 | jmp ->fff_res
1712 | 1696 |
@@ -1931,54 +1915,58 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1931 | 1915 |
1932 |//-- Math library ------------------------------------------------------- 1916 |//-- Math library -------------------------------------------------------
1933 | 1917 |
1934 if (!LJ_DUALNUM) { 1918 |.if not DUALNUM
1935 |->fff_resi: // Dummy. 1919 |->fff_resi: // Dummy.
1936 } 1920 |.endif
1937 if (sse) { 1921 |
1938 |->fff_resn: 1922 |.if SSE
1939 | mov PC, [BASE-4] 1923 |->fff_resn:
1940 | fstp qword [BASE-8] 1924 | mov PC, [BASE-4]
1941 | jmp ->fff_res1 1925 | fstp qword [BASE-8]
1942 } 1926 | jmp ->fff_res1
1927 |.endif
1928 |
1943 | .ffunc_1 math_abs 1929 | .ffunc_1 math_abs
1944 if (LJ_DUALNUM) { 1930 |.if DUALNUM
1945 | cmp dword [BASE+4], LJ_TISNUM; jne >2 1931 | cmp dword [BASE+4], LJ_TISNUM; jne >2
1946 | mov RB, dword [BASE] 1932 | mov RB, dword [BASE]
1947 | cmp RB, 0; jns ->fff_resi 1933 | cmp RB, 0; jns ->fff_resi
1948 | neg RB; js >1 1934 | neg RB; js >1
1949 |->fff_resbit: 1935 |->fff_resbit:
1950 |->fff_resi: 1936 |->fff_resi:
1951 | mov PC, [BASE-4] 1937 | mov PC, [BASE-4]
1952 | mov dword [BASE-4], LJ_TISNUM 1938 | mov dword [BASE-4], LJ_TISNUM
1953 | mov dword [BASE-8], RB 1939 | mov dword [BASE-8], RB
1954 | jmp ->fff_res1 1940 | jmp ->fff_res1
1955 |1: 1941 |1:
1956 | mov PC, [BASE-4] 1942 | mov PC, [BASE-4]
1957 | mov dword [BASE-4], 0x41e00000 // 2^31. 1943 | mov dword [BASE-4], 0x41e00000 // 2^31.
1958 | mov dword [BASE-8], 0 1944 | mov dword [BASE-8], 0
1959 | jmp ->fff_res1 1945 | jmp ->fff_res1
1960 |2: 1946 |2:
1961 | ja ->fff_fallback 1947 | ja ->fff_fallback
1962 } else { 1948 |.else
1963 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1949 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1964 } 1950 |.endif
1965 if (sse) { 1951 |
1966 | movsd xmm0, qword [BASE] 1952 |.if SSE
1967 | sseconst_abs xmm1, RDa 1953 | movsd xmm0, qword [BASE]
1968 | andps xmm0, xmm1 1954 | sseconst_abs xmm1, RDa
1969 |->fff_resxmm0: 1955 | andps xmm0, xmm1
1970 | mov PC, [BASE-4] 1956 |->fff_resxmm0:
1971 | movsd qword [BASE-8], xmm0 1957 | mov PC, [BASE-4]
1972 | // fallthrough 1958 | movsd qword [BASE-8], xmm0
1973 } else { 1959 | // fallthrough
1974 | fld qword [BASE] 1960 |.else
1975 | fabs 1961 | fld qword [BASE]
1976 | // fallthrough 1962 | fabs
1977 |->fff_resxmm0: // Dummy. 1963 | // fallthrough
1978 |->fff_resn: 1964 |->fff_resxmm0: // Dummy.
1979 | mov PC, [BASE-4] 1965 |->fff_resn:
1980 | fstp qword [BASE-8] 1966 | mov PC, [BASE-4]
1981 } 1967 | fstp qword [BASE-8]
1968 |.endif
1969 |
1982 |->fff_res1: 1970 |->fff_res1:
1983 | mov RD, 1+1 1971 | mov RD, 1+1
1984 |->fff_res: 1972 |->fff_res:
@@ -2006,18 +1994,18 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2006 | 1994 |
2007 |.macro math_round, func 1995 |.macro math_round, func
2008 | .ffunc math_ .. func 1996 | .ffunc math_ .. func
2009 ||if (LJ_DUALNUM) { 1997 |.if DUALNUM
2010 | cmp dword [BASE+4], LJ_TISNUM; jne >1 1998 | cmp dword [BASE+4], LJ_TISNUM; jne >1
2011 | mov RB, dword [BASE]; jmp ->fff_resi 1999 | mov RB, dword [BASE]; jmp ->fff_resi
2012 |1: 2000 |1:
2013 | ja ->fff_fallback 2001 | ja ->fff_fallback
2014 ||} else { 2002 |.else
2015 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2003 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2016 ||} 2004 |.endif
2017 ||if (sse) { 2005 |.if SSE
2018 | movsd xmm0, qword [BASE] 2006 | movsd xmm0, qword [BASE]
2019 | call ->vm_ .. func 2007 | call ->vm_ .. func
2020 || if (LJ_DUALNUM) { 2008 | .if DUALNUM
2021 | cvtsd2si RB, xmm0 2009 | cvtsd2si RB, xmm0
2022 | cmp RB, 0x80000000 2010 | cmp RB, 0x80000000
2023 | jne ->fff_resi 2011 | jne ->fff_resi
@@ -2025,13 +2013,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2025 | ucomisd xmm0, xmm1 2013 | ucomisd xmm0, xmm1
2026 | jp ->fff_resxmm0 2014 | jp ->fff_resxmm0
2027 | je ->fff_resi 2015 | je ->fff_resi
2028 || } 2016 | .endif
2029 | jmp ->fff_resxmm0 2017 | jmp ->fff_resxmm0
2030 ||} else { 2018 |.else
2031 | fld qword [BASE] 2019 | fld qword [BASE]
2032 | call ->vm_ .. func 2020 | call ->vm_ .. func
2033 || if (LJ_DUALNUM) { 2021 | .if DUALNUM
2034 |.if not X64
2035 | fist ARG1 2022 | fist ARG1
2036 | mov RB, ARG1 2023 | mov RB, ARG1
2037 | cmp RB, 0x80000000; jne >2 2024 | cmp RB, 0x80000000; jne >2
@@ -2043,21 +2030,20 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2043 |2: 2030 |2:
2044 | fpop 2031 | fpop
2045 | jmp ->fff_resi 2032 | jmp ->fff_resi
2046 |.endif 2033 | .else
2047 || } else {
2048 | jmp ->fff_resn 2034 | jmp ->fff_resn
2049 || } 2035 | .endif
2050 ||} 2036 |.endif
2051 |.endmacro 2037 |.endmacro
2052 | 2038 |
2053 | math_round floor 2039 | math_round floor
2054 | math_round ceil 2040 | math_round ceil
2055 | 2041 |
2056 if (sse) { 2042 |.if SSE
2057 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 2043 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
2058 } else { 2044 |.else
2059 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn 2045 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
2060 } 2046 |.endif
2061 |.ffunc_n math_log, fldln2; fyl2x; jmp ->fff_resn 2047 |.ffunc_n math_log, fldln2; fyl2x; jmp ->fff_resn
2062 |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn 2048 |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn
2063 |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn 2049 |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn
@@ -2075,17 +2061,15 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2075 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn 2061 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
2076 | 2062 |
2077 |.macro math_extern, func 2063 |.macro math_extern, func
2078 ||if (sse) { 2064 |.if SSE
2079 | .ffunc_nsse math_ .. func 2065 | .ffunc_nsse math_ .. func
2080 | .if not X64 2066 | .if not X64
2081 | movsd FPARG1, xmm0 2067 | movsd FPARG1, xmm0
2082 | .endif 2068 | .endif
2083 ||} else { 2069 |.else
2084 | .if not X64 2070 | .ffunc_n math_ .. func
2085 | .ffunc_n math_ .. func 2071 | fstp FPARG1
2086 | fstp FPARG1 2072 |.endif
2087 | .endif
2088 ||}
2089 | mov RB, BASE 2073 | mov RB, BASE
2090 | call extern lj_vm_ .. func 2074 | call extern lj_vm_ .. func
2091 | mov BASE, RB 2075 | mov BASE, RB
@@ -2101,17 +2085,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2101 | math_extern tanh 2085 | math_extern tanh
2102 | 2086 |
2103 |->ff_math_deg: 2087 |->ff_math_deg:
2104 if (sse) { 2088 |.if SSE
2105 |.ffunc_nsse math_rad 2089 |.ffunc_nsse math_rad
2106 | mov CFUNC:RB, [BASE-8] 2090 | mov CFUNC:RB, [BASE-8]
2107 | mulsd xmm0, qword CFUNC:RB->upvalue[0] 2091 | mulsd xmm0, qword CFUNC:RB->upvalue[0]
2108 | jmp ->fff_resxmm0 2092 | jmp ->fff_resxmm0
2109 } else { 2093 |.else
2110 |.ffunc_n math_rad 2094 |.ffunc_n math_rad
2111 | mov CFUNC:RB, [BASE-8] 2095 | mov CFUNC:RB, [BASE-8]
2112 | fmul qword CFUNC:RB->upvalue[0] 2096 | fmul qword CFUNC:RB->upvalue[0]
2113 | jmp ->fff_resn 2097 | jmp ->fff_resn
2114 } 2098 |.endif
2115 | 2099 |
2116 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn 2100 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
2117 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn 2101 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
@@ -2128,65 +2112,65 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2128 | cmp RB, 0x00200000; jb >4 2112 | cmp RB, 0x00200000; jb >4
2129 |1: 2113 |1:
2130 | shr RB, 21; sub RB, RC // Extract and unbias exponent. 2114 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
2131 if (sse) { 2115 |.if SSE
2132 | cvtsi2sd xmm0, RB 2116 | cvtsi2sd xmm0, RB
2133 } else { 2117 |.else
2134 | mov TMP1, RB; fild TMP1 2118 | mov TMP1, RB; fild TMP1
2135 } 2119 |.endif
2136 | mov RB, [BASE-4] 2120 | mov RB, [BASE-4]
2137 | and RB, 0x800fffff // Mask off exponent. 2121 | and RB, 0x800fffff // Mask off exponent.
2138 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. 2122 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
2139 | mov [BASE-4], RB 2123 | mov [BASE-4], RB
2140 |2: 2124 |2:
2141 if (sse) { 2125 |.if SSE
2142 | movsd qword [BASE], xmm0 2126 | movsd qword [BASE], xmm0
2143 } else { 2127 |.else
2144 | fstp qword [BASE] 2128 | fstp qword [BASE]
2145 } 2129 |.endif
2146 | mov RD, 1+2 2130 | mov RD, 1+2
2147 | jmp ->fff_res 2131 | jmp ->fff_res
2148 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. 2132 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
2149 if (sse) { 2133 |.if SSE
2150 | xorps xmm0, xmm0; jmp <2 2134 | xorps xmm0, xmm0; jmp <2
2151 } else { 2135 |.else
2152 | fldz; jmp <2 2136 | fldz; jmp <2
2153 } 2137 |.endif
2154 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. 2138 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
2155 if (sse) { 2139 |.if SSE
2156 | movsd xmm0, qword [BASE] 2140 | movsd xmm0, qword [BASE]
2157 | sseconst_hi xmm1, RBa, 43500000 // 2^54. 2141 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
2158 | mulsd xmm0, xmm1 2142 | mulsd xmm0, xmm1
2159 | movsd qword [BASE-8], xmm0 2143 | movsd qword [BASE-8], xmm0
2160 } else { 2144 |.else
2161 | fld qword [BASE] 2145 | fld qword [BASE]
2162 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54 2146 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
2163 | fstp qword [BASE-8] 2147 | fstp qword [BASE-8]
2164 } 2148 |.endif
2165 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 2149 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2166 | 2150 |
2167 if (sse) { 2151 |.if SSE
2168 |.ffunc_nsse math_modf 2152 |.ffunc_nsse math_modf
2169 } else { 2153 |.else
2170 |.ffunc_n math_modf 2154 |.ffunc_n math_modf
2171 } 2155 |.endif
2172 | mov RB, [BASE+4] 2156 | mov RB, [BASE+4]
2173 | mov PC, [BASE-4] 2157 | mov PC, [BASE-4]
2174 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? 2158 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
2175 if (sse) { 2159 |.if SSE
2176 | movaps xmm4, xmm0 2160 | movaps xmm4, xmm0
2177 | call ->vm_trunc 2161 | call ->vm_trunc
2178 | subsd xmm4, xmm0 2162 | subsd xmm4, xmm0
2179 |1: 2163 |1:
2180 | movsd qword [BASE-8], xmm0 2164 | movsd qword [BASE-8], xmm0
2181 | movsd qword [BASE], xmm4 2165 | movsd qword [BASE], xmm4
2182 } else { 2166 |.else
2183 | fdup 2167 | fdup
2184 | call ->vm_trunc 2168 | call ->vm_trunc
2185 | fsub st1, st0 2169 | fsub st1, st0
2186 |1: 2170 |1:
2187 | fstp qword [BASE-8] 2171 | fstp qword [BASE-8]
2188 | fstp qword [BASE] 2172 | fstp qword [BASE]
2189 } 2173 |.endif
2190 | mov RC, [BASE-4]; mov RB, [BASE+4] 2174 | mov RC, [BASE-4]; mov RB, [BASE+4]
2191 | xor RC, RB; js >3 // Need to adjust sign? 2175 | xor RC, RB; js >3 // Need to adjust sign?
2192 |2: 2176 |2:
@@ -2196,28 +2180,28 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2196 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. 2180 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
2197 | jmp <2 2181 | jmp <2
2198 |4: 2182 |4:
2199 if (sse) { 2183 |.if SSE
2200 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. 2184 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
2201 } else { 2185 |.else
2202 | fldz; fxch; jmp <1 // Return +-Inf and +-0. 2186 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
2203 } 2187 |.endif
2204 | 2188 |
2205 |.ffunc_nnr math_fmod 2189 |.ffunc_nnr math_fmod
2206 |1: ; fprem; fnstsw ax; sahf; jp <1 2190 |1: ; fprem; fnstsw ax; sahf; jp <1
2207 | fpop1 2191 | fpop1
2208 | jmp ->fff_resn 2192 | jmp ->fff_resn
2209 | 2193 |
2210 if (sse) { 2194 |.if SSE
2211 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0 2195 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
2212 } else { 2196 |.else
2213 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn 2197 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
2214 } 2198 |.endif
2215 | 2199 |
2216 |.macro math_minmax, name, cmovop, fcmovop, nofcmovop, sseop 2200 |.macro math_minmax, name, cmovop, fcmovop, sseop
2217 | .ffunc name 2201 | .ffunc name
2218 | mov RA, 2 2202 | mov RA, 2
2219 | cmp dword [BASE+4], LJ_TISNUM 2203 | cmp dword [BASE+4], LJ_TISNUM
2220 ||if (LJ_DUALNUM) { 2204 |.if DUALNUM
2221 | jne >4 2205 | jne >4
2222 | mov RB, dword [BASE] 2206 | mov RB, dword [BASE]
2223 |1: // Handle integers. 2207 |1: // Handle integers.
@@ -2230,89 +2214,79 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2230 |3: 2214 |3:
2231 | ja ->fff_fallback 2215 | ja ->fff_fallback
2232 | // Convert intermediate result to number and continue below. 2216 | // Convert intermediate result to number and continue below.
2233 ||if (sse) { 2217 |.if SSE
2234 | cvtsi2sd xmm0, RB 2218 | cvtsi2sd xmm0, RB
2235 ||} else { 2219 |.else
2236 |.if not X64 2220 | mov TMP1, RB
2237 | mov TMP1, RB 2221 | fild TMP1
2238 | fild TMP1
2239 |.endif 2222 |.endif
2240 ||}
2241 | jmp >6 2223 | jmp >6
2242 |4: 2224 |4:
2243 | ja ->fff_fallback 2225 | ja ->fff_fallback
2244 ||} else { 2226 |.else
2245 | jae ->fff_fallback 2227 | jae ->fff_fallback
2246 ||} 2228 |.endif
2247 | 2229 |
2248 ||if (sse) { 2230 |.if SSE
2249 | movsd xmm0, qword [BASE] 2231 | movsd xmm0, qword [BASE]
2250 |5: // Handle numbers or integers. 2232 |5: // Handle numbers or integers.
2251 | cmp RA, RD; jae ->fff_resxmm0 2233 | cmp RA, RD; jae ->fff_resxmm0
2252 | cmp dword [BASE+RA*8-4], LJ_TISNUM 2234 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2253 ||if (LJ_DUALNUM) { 2235 |.if DUALNUM
2254 | jb >6 2236 | jb >6
2255 | ja ->fff_fallback 2237 | ja ->fff_fallback
2256 | cvtsi2sd xmm1, dword [BASE+RA*8-8] 2238 | cvtsi2sd xmm1, dword [BASE+RA*8-8]
2257 | jmp >7 2239 | jmp >7
2258 ||} else { 2240 |.else
2259 | jae ->fff_fallback 2241 | jae ->fff_fallback
2260 ||} 2242 |.endif
2261 |6: 2243 |6:
2262 | movsd xmm1, qword [BASE+RA*8-8] 2244 | movsd xmm1, qword [BASE+RA*8-8]
2263 |7: 2245 |7:
2264 | sseop xmm0, xmm1 2246 | sseop xmm0, xmm1
2265 | add RA, 1 2247 | add RA, 1
2266 | jmp <5 2248 | jmp <5
2267 ||} else { 2249 |.else
2268 |.if not X64
2269 | fld qword [BASE] 2250 | fld qword [BASE]
2270 |5: // Handle numbers or integers. 2251 |5: // Handle numbers or integers.
2271 | cmp RA, RD; jae ->fff_resn 2252 | cmp RA, RD; jae ->fff_resn
2272 | cmp dword [BASE+RA*8-4], LJ_TISNUM 2253 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2273 ||if (LJ_DUALNUM) { 2254 |.if DUALNUM
2274 | jb >6 2255 | jb >6
2275 | ja >9 2256 | ja >9
2276 | fild dword [BASE+RA*8-8] 2257 | fild dword [BASE+RA*8-8]
2277 | jmp >7 2258 | jmp >7
2278 ||} else { 2259 |.else
2279 | jae >9 2260 | jae >9
2280 ||} 2261 |.endif
2281 |6: 2262 |6:
2282 | fld qword [BASE+RA*8-8] 2263 | fld qword [BASE+RA*8-8]
2283 |7: 2264 |7:
2284 ||if (cmov) {
2285 | fucomi st1; fcmovop st1; fpop1 2265 | fucomi st1; fcmovop st1; fpop1
2286 ||} else {
2287 | push eax
2288 | fucom st1; fnstsw ax; test ah, 1; nofcmovop >2; fxch; 2: ; fpop
2289 | pop eax
2290 ||}
2291 | add RA, 1 2266 | add RA, 1
2292 | jmp <5 2267 | jmp <5
2293 |.endif 2268 |.endif
2294 ||}
2295 |.endmacro 2269 |.endmacro
2296 | 2270 |
2297 | math_minmax math_min, cmovg, fcmovnbe, jz, minsd 2271 | math_minmax math_min, cmovg, fcmovnbe, minsd
2298 | math_minmax math_max, cmovl, fcmovbe, jnz, maxsd 2272 | math_minmax math_max, cmovl, fcmovbe, maxsd
2299 if (!sse) { 2273 |.if not SSE
2300 |9: 2274 |9:
2301 | fpop; jmp ->fff_fallback 2275 | fpop; jmp ->fff_fallback
2302 } 2276 |.endif
2303 | 2277 |
2304 |//-- String library ----------------------------------------------------- 2278 |//-- String library -----------------------------------------------------
2305 | 2279 |
2306 |.ffunc_1 string_len 2280 |.ffunc_1 string_len
2307 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2281 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2308 | mov STR:RB, [BASE] 2282 | mov STR:RB, [BASE]
2309 if (LJ_DUALNUM) { 2283 |.if DUALNUM
2310 | mov RB, dword STR:RB->len; jmp ->fff_resi 2284 | mov RB, dword STR:RB->len; jmp ->fff_resi
2311 } else if (sse) { 2285 |.elif SSE
2312 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0 2286 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2313 } else { 2287 |.else
2314 | fild dword STR:RB->len; jmp ->fff_resn 2288 | fild dword STR:RB->len; jmp ->fff_resn
2315 } 2289 |.endif
2316 | 2290 |
2317 |.ffunc string_byte // Only handle the 1-arg case here. 2291 |.ffunc string_byte // Only handle the 1-arg case here.
2318 | cmp NARGS:RD, 1+1; jne ->fff_fallback 2292 | cmp NARGS:RD, 1+1; jne ->fff_fallback
@@ -2322,34 +2296,34 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2322 | cmp dword STR:RB->len, 1 2296 | cmp dword STR:RB->len, 1
2323 | jb ->fff_res0 // Return no results for empty string. 2297 | jb ->fff_res0 // Return no results for empty string.
2324 | movzx RB, byte STR:RB[1] 2298 | movzx RB, byte STR:RB[1]
2325 if (LJ_DUALNUM) { 2299 |.if DUALNUM
2326 | jmp ->fff_resi 2300 | jmp ->fff_resi
2327 } else if (sse) { 2301 |.elif SSE
2328 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 2302 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2329 } else { 2303 |.else
2330 | mov TMP1, RB; fild TMP1; jmp ->fff_resn 2304 | mov TMP1, RB; fild TMP1; jmp ->fff_resn
2331 } 2305 |.endif
2332 | 2306 |
2333 |.ffunc string_char // Only handle the 1-arg case here. 2307 |.ffunc string_char // Only handle the 1-arg case here.
2334 | ffgccheck 2308 | ffgccheck
2335 | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg. 2309 | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
2336 | cmp dword [BASE+4], LJ_TISNUM 2310 | cmp dword [BASE+4], LJ_TISNUM
2337 if (LJ_DUALNUM) { 2311 |.if DUALNUM
2338 | jne ->fff_fallback 2312 | jne ->fff_fallback
2339 | mov RB, dword [BASE] 2313 | mov RB, dword [BASE]
2340 | cmp RB, 255; ja ->fff_fallback 2314 | cmp RB, 255; ja ->fff_fallback
2341 | mov TMP2, RB 2315 | mov TMP2, RB
2342 } else if (sse) { 2316 |.elif SSE
2343 | jae ->fff_fallback 2317 | jae ->fff_fallback
2344 | cvttsd2si RB, qword [BASE] 2318 | cvttsd2si RB, qword [BASE]
2345 | cmp RB, 255; ja ->fff_fallback 2319 | cmp RB, 255; ja ->fff_fallback
2346 | mov TMP2, RB 2320 | mov TMP2, RB
2347 } else { 2321 |.else
2348 | jae ->fff_fallback 2322 | jae ->fff_fallback
2349 | fld qword [BASE] 2323 | fld qword [BASE]
2350 | fistp TMP2 2324 | fistp TMP2
2351 | cmp TMP2, 255; ja ->fff_fallback 2325 | cmp TMP2, 255; ja ->fff_fallback
2352 } 2326 |.endif
2353 |.if X64 2327 |.if X64
2354 | mov TMP3, 1 2328 | mov TMP3, 1
2355 |.else 2329 |.else
@@ -2382,41 +2356,39 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2382 | cmp NARGS:RD, 1+2; jb ->fff_fallback 2356 | cmp NARGS:RD, 1+2; jb ->fff_fallback
2383 | jna >1 2357 | jna >1
2384 | cmp dword [BASE+20], LJ_TISNUM 2358 | cmp dword [BASE+20], LJ_TISNUM
2385 if (LJ_DUALNUM) { 2359 |.if DUALNUM
2386 | jne ->fff_fallback 2360 | jne ->fff_fallback
2387 | mov RB, dword [BASE+16] 2361 | mov RB, dword [BASE+16]
2388 | mov TMP2, RB 2362 | mov TMP2, RB
2389 } else if (sse) { 2363 |.elif SSE
2390 | jae ->fff_fallback 2364 | jae ->fff_fallback
2391 | cvttsd2si RB, qword [BASE+16] 2365 | cvttsd2si RB, qword [BASE+16]
2392 | mov TMP2, RB 2366 | mov TMP2, RB
2393 } else { 2367 |.else
2394 | jae ->fff_fallback 2368 | jae ->fff_fallback
2395 | fld qword [BASE+16] 2369 | fld qword [BASE+16]
2396 | fistp TMP2 2370 | fistp TMP2
2397 } 2371 |.endif
2398 |1: 2372 |1:
2399 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2373 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2400 | cmp dword [BASE+12], LJ_TISNUM 2374 | cmp dword [BASE+12], LJ_TISNUM
2401 if (LJ_DUALNUM) { 2375 |.if DUALNUM
2402 | jne ->fff_fallback 2376 | jne ->fff_fallback
2403 } else { 2377 |.else
2404 | jae ->fff_fallback 2378 | jae ->fff_fallback
2405 } 2379 |.endif
2406 | mov STR:RB, [BASE] 2380 | mov STR:RB, [BASE]
2407 | mov TMP3, STR:RB 2381 | mov TMP3, STR:RB
2408 | mov RB, STR:RB->len 2382 | mov RB, STR:RB->len
2409 if (LJ_DUALNUM) { 2383 |.if DUALNUM
2410 | mov RA, dword [BASE+8] 2384 | mov RA, dword [BASE+8]
2411 } else if (sse) { 2385 |.elif SSE
2412 | cvttsd2si RA, qword [BASE+8] 2386 | cvttsd2si RA, qword [BASE+8]
2413 } else { 2387 |.else
2414 |.if not X64 2388 | fld qword [BASE+8]
2415 | fld qword [BASE+8] 2389 | fistp ARG3
2416 | fistp ARG3 2390 | mov RA, ARG3
2417 | mov RA, ARG3 2391 |.endif
2418 |.endif
2419 }
2420 | mov RC, TMP2 2392 | mov RC, TMP2
2421 | cmp RB, RC // len < end? (unsigned compare) 2393 | cmp RB, RC // len < end? (unsigned compare)
2422 | jb >5 2394 | jb >5
@@ -2464,18 +2436,18 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2464 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2436 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2465 | cmp dword [BASE+12], LJ_TISNUM 2437 | cmp dword [BASE+12], LJ_TISNUM
2466 | mov STR:RB, [BASE] 2438 | mov STR:RB, [BASE]
2467 if (LJ_DUALNUM) { 2439 |.if DUALNUM
2468 | jne ->fff_fallback 2440 | jne ->fff_fallback
2469 | mov RC, dword [BASE+8] 2441 | mov RC, dword [BASE+8]
2470 } else if (sse) { 2442 |.elif SSE
2471 | jae ->fff_fallback 2443 | jae ->fff_fallback
2472 | cvttsd2si RC, qword [BASE+8] 2444 | cvttsd2si RC, qword [BASE+8]
2473 } else { 2445 |.else
2474 | jae ->fff_fallback 2446 | jae ->fff_fallback
2475 | fld qword [BASE+8] 2447 | fld qword [BASE+8]
2476 | fistp TMP2 2448 | fistp TMP2
2477 | mov RC, TMP2 2449 | mov RC, TMP2
2478 } 2450 |.endif
2479 | test RC, RC 2451 | test RC, RC
2480 | jle ->fff_emptystr // Count <= 0? (or non-int) 2452 | jle ->fff_emptystr // Count <= 0? (or non-int)
2481 | cmp dword STR:RB->len, 1 2453 | cmp dword STR:RB->len, 1
@@ -2568,15 +2540,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2568 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t) 2540 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2569 | // Length of table returned in eax (RD). 2541 | // Length of table returned in eax (RD).
2570 | mov BASE, RB // Restore BASE. 2542 | mov BASE, RB // Restore BASE.
2571 if (LJ_DUALNUM) { 2543 |.if DUALNUM
2572 | mov RB, RD; jmp ->fff_resi 2544 | mov RB, RD; jmp ->fff_resi
2573 } else if (sse) { 2545 |.elif SSE
2574 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0 2546 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2575 } else { 2547 |.else
2576 |.if not X64 2548 | mov ARG1, RD; fild ARG1; jmp ->fff_resn
2577 | mov ARG1, RD; fild ARG1; jmp ->fff_resn 2549 |.endif
2578 |.endif
2579 }
2580 | 2550 |
2581 |//-- Bit library -------------------------------------------------------- 2551 |//-- Bit library --------------------------------------------------------
2582 | 2552 |
@@ -2585,14 +2555,14 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2585 |.macro .ffunc_bit, name, kind 2555 |.macro .ffunc_bit, name, kind
2586 | .ffunc_1 name 2556 | .ffunc_1 name
2587 |.if kind == 2 2557 |.if kind == 2
2588 ||if (sse) { 2558 |.if SSE
2589 | sseconst_tobit xmm1, RBa 2559 | sseconst_tobit xmm1, RBa
2590 ||} else { 2560 |.else
2591 | mov TMP1, TOBIT_BIAS 2561 | mov TMP1, TOBIT_BIAS
2592 ||} 2562 |.endif
2593 |.endif 2563 |.endif
2594 | cmp dword [BASE+4], LJ_TISNUM 2564 | cmp dword [BASE+4], LJ_TISNUM
2595 ||if (LJ_DUALNUM) { 2565 |.if DUALNUM
2596 | jne >1 2566 | jne >1
2597 | mov RB, dword [BASE] 2567 | mov RB, dword [BASE]
2598 |.if kind > 0 2568 |.if kind > 0
@@ -2602,18 +2572,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2602 |.endif 2572 |.endif
2603 |1: 2573 |1:
2604 | ja ->fff_fallback 2574 | ja ->fff_fallback
2605 ||} else { 2575 |.else
2606 | jae ->fff_fallback 2576 | jae ->fff_fallback
2607 ||} 2577 |.endif
2608 ||if (sse) { 2578 |.if SSE
2609 | movsd xmm0, qword [BASE] 2579 | movsd xmm0, qword [BASE]
2610 |.if kind < 2 2580 |.if kind < 2
2611 | sseconst_tobit xmm1, RBa 2581 | sseconst_tobit xmm1, RBa
2612 |.endif 2582 |.endif
2613 | addsd xmm0, xmm1 2583 | addsd xmm0, xmm1
2614 | movd RB, xmm0 2584 | movd RB, xmm0
2615 ||} else { 2585 |.else
2616 |.if not X64
2617 | fld qword [BASE] 2586 | fld qword [BASE]
2618 |.if kind < 2 2587 |.if kind < 2
2619 | mov TMP1, TOBIT_BIAS 2588 | mov TMP1, TOBIT_BIAS
@@ -2624,24 +2593,19 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2624 | mov RB, ARG1 2593 | mov RB, ARG1
2625 |.endif 2594 |.endif
2626 |.endif 2595 |.endif
2627 ||}
2628 |2: 2596 |2:
2629 |.endmacro 2597 |.endmacro
2630 | 2598 |
2631 |.ffunc_bit bit_tobit, 0 2599 |.ffunc_bit bit_tobit, 0
2632 if (LJ_DUALNUM || sse) { 2600 |.if DUALNUM or SSE
2633 if (!sse) { 2601 |.if not SSE
2634 |.if not X64 2602 | mov RB, ARG1
2635 | mov RB, ARG1 2603 |.endif
2636 |.endif 2604 | jmp ->fff_resbit
2637 } 2605 |.else
2638 | jmp ->fff_resbit 2606 | fild ARG1
2639 } else { 2607 | jmp ->fff_resn
2640 |.if not X64 2608 |.endif
2641 | fild ARG1
2642 | jmp ->fff_resn
2643 |.endif
2644 }
2645 | 2609 |
2646 |.macro .ffunc_bit_op, name, ins 2610 |.macro .ffunc_bit_op, name, ins
2647 | .ffunc_bit name, 2 2611 | .ffunc_bit name, 2
@@ -2651,29 +2615,27 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2651 | cmp RD, BASE 2615 | cmp RD, BASE
2652 | jbe ->fff_resbit 2616 | jbe ->fff_resbit
2653 | cmp dword [RD+4], LJ_TISNUM 2617 | cmp dword [RD+4], LJ_TISNUM
2654 ||if (LJ_DUALNUM) { 2618 |.if DUALNUM
2655 | jne >2 2619 | jne >2
2656 | ins RB, dword [RD] 2620 | ins RB, dword [RD]
2657 | sub RD, 8 2621 | sub RD, 8
2658 | jmp <1 2622 | jmp <1
2659 |2: 2623 |2:
2660 | ja ->fff_fallback_bit_op 2624 | ja ->fff_fallback_bit_op
2661 ||} else { 2625 |.else
2662 | jae ->fff_fallback_bit_op 2626 | jae ->fff_fallback_bit_op
2663 ||} 2627 |.endif
2664 ||if (sse) { 2628 |.if SSE
2665 | movsd xmm0, qword [RD] 2629 | movsd xmm0, qword [RD]
2666 | addsd xmm0, xmm1 2630 | addsd xmm0, xmm1
2667 | movd RA, xmm0 2631 | movd RA, xmm0
2668 | ins RB, RA 2632 | ins RB, RA
2669 ||} else { 2633 |.else
2670 |.if not X64
2671 | fld qword [RD] 2634 | fld qword [RD]
2672 | fadd TMP1 2635 | fadd TMP1
2673 | fstp FPARG1 2636 | fstp FPARG1
2674 | ins RB, ARG1 2637 | ins RB, ARG1
2675 |.endif 2638 |.endif
2676 ||}
2677 | sub RD, 8 2639 | sub RD, 8
2678 | jmp <1 2640 | jmp <1
2679 |.endmacro 2641 |.endmacro
@@ -2688,40 +2650,37 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2688 | 2650 |
2689 |.ffunc_bit bit_bnot, 1 2651 |.ffunc_bit bit_bnot, 1
2690 | not RB 2652 | not RB
2691 if (LJ_DUALNUM) { 2653 |.if DUALNUM
2692 | jmp ->fff_resbit 2654 | jmp ->fff_resbit
2693 } else if (sse) { 2655 |.elif SSE
2694 |->fff_resbit: 2656 |->fff_resbit:
2695 | cvtsi2sd xmm0, RB 2657 | cvtsi2sd xmm0, RB
2696 | jmp ->fff_resxmm0 2658 | jmp ->fff_resxmm0
2697 } else { 2659 |.else
2698 |.if not X64 2660 |->fff_resbit:
2699 |->fff_resbit: 2661 | mov ARG1, RB
2700 | mov ARG1, RB 2662 | fild ARG1
2701 | fild ARG1 2663 | jmp ->fff_resn
2702 | jmp ->fff_resn 2664 |.endif
2703 |.endif
2704 }
2705 | 2665 |
2706 |->fff_fallback_bit_op: 2666 |->fff_fallback_bit_op:
2707 | mov NARGS:RD, TMP2 // Restore for fallback 2667 | mov NARGS:RD, TMP2 // Restore for fallback
2708 | jmp ->fff_fallback 2668 | jmp ->fff_fallback
2709 | 2669 |
2710 |.macro .ffunc_bit_sh, name, ins 2670 |.macro .ffunc_bit_sh, name, ins
2711 ||if (LJ_DUALNUM) { 2671 |.if DUALNUM
2712 | .ffunc_bit name, 1 2672 | .ffunc_bit name, 1
2713 | // Note: no inline conversion from number for 2nd argument! 2673 | // Note: no inline conversion from number for 2nd argument!
2714 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback 2674 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2715 | mov RA, dword [BASE+8] 2675 | mov RA, dword [BASE+8]
2716 ||} else if (sse) { 2676 |.elif SSE
2717 | .ffunc_nnsse name 2677 | .ffunc_nnsse name
2718 | sseconst_tobit xmm2, RBa 2678 | sseconst_tobit xmm2, RBa
2719 | addsd xmm0, xmm2 2679 | addsd xmm0, xmm2
2720 | addsd xmm1, xmm2 2680 | addsd xmm1, xmm2
2721 | movd RB, xmm0 2681 | movd RB, xmm0
2722 | movd RA, xmm1 2682 | movd RA, xmm1
2723 ||} else { 2683 |.else
2724 |.if not X64
2725 | .ffunc_nn name 2684 | .ffunc_nn name
2726 | mov TMP1, TOBIT_BIAS 2685 | mov TMP1, TOBIT_BIAS
2727 | fadd TMP1 2686 | fadd TMP1
@@ -2731,7 +2690,6 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2731 | mov RA, ARG3 2690 | mov RA, ARG3
2732 | mov RB, ARG1 2691 | mov RB, ARG1
2733 |.endif 2692 |.endif
2734 ||}
2735 | ins RB, cl // Assumes RA is ecx. 2693 | ins RB, cl // Assumes RA is ecx.
2736 | jmp ->fff_resbit 2694 | jmp ->fff_resbit
2737 |.endmacro 2695 |.endmacro
@@ -2828,7 +2786,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2828 |//----------------------------------------------------------------------- 2786 |//-----------------------------------------------------------------------
2829 | 2787 |
2830 |->vm_record: // Dispatch target for recording phase. 2788 |->vm_record: // Dispatch target for recording phase.
2831#if LJ_HASJIT 2789 |.if JIT
2832 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] 2790 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
2833 | test RDL, HOOK_VMEVENT // No recording while in vmevent. 2791 | test RDL, HOOK_VMEVENT // No recording while in vmevent.
2834 | jnz >5 2792 | jnz >5
@@ -2839,7 +2797,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2839 | jz >1 2797 | jz >1
2840 | dec dword [DISPATCH+DISPATCH_GL(hookcount)] 2798 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2841 | jmp >1 2799 | jmp >1
2842#endif 2800 |.endif
2843 | 2801 |
2844 |->vm_rethook: // Dispatch target for return hooks. 2802 |->vm_rethook: // Dispatch target for return hooks.
2845 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] 2803 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
@@ -2885,7 +2843,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2885 | jmp <4 2843 | jmp <4
2886 | 2844 |
2887 |->vm_hotloop: // Hot loop counter underflow. 2845 |->vm_hotloop: // Hot loop counter underflow.
2888#if LJ_HASJIT 2846 |.if JIT
2889 | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L). 2847 | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L).
2890 | mov RB, LFUNC:RB->pc 2848 | mov RB, LFUNC:RB->pc
2891 | movzx RD, byte [RB+PC2PROTO(framesize)] 2849 | movzx RD, byte [RB+PC2PROTO(framesize)]
@@ -2899,20 +2857,20 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2899 | mov SAVE_PC, PC 2857 | mov SAVE_PC, PC
2900 | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc) 2858 | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc)
2901 | jmp <3 2859 | jmp <3
2902#endif 2860 |.endif
2903 | 2861 |
2904 |->vm_callhook: // Dispatch target for call hooks. 2862 |->vm_callhook: // Dispatch target for call hooks.
2905 | mov SAVE_PC, PC 2863 | mov SAVE_PC, PC
2906#if LJ_HASJIT 2864 |.if JIT
2907 | jmp >1 2865 | jmp >1
2908#endif 2866 |.endif
2909 | 2867 |
2910 |->vm_hotcall: // Hot call counter underflow. 2868 |->vm_hotcall: // Hot call counter underflow.
2911#if LJ_HASJIT 2869 |.if JIT
2912 | mov SAVE_PC, PC 2870 | mov SAVE_PC, PC
2913 | or PC, 1 // Marker for hot call. 2871 | or PC, 1 // Marker for hot call.
2914 |1: 2872 |1:
2915#endif 2873 |.endif
2916 | lea RD, [BASE+NARGS:RD*8-8] 2874 | lea RD, [BASE+NARGS:RD*8-8]
2917 | mov L:RB, SAVE_L 2875 | mov L:RB, SAVE_L
2918 | mov L:RB->base, BASE 2876 | mov L:RB->base, BASE
@@ -2922,9 +2880,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2922 | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc) 2880 | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc)
2923 | // ASMFunction returned in eax/rax (RDa). 2881 | // ASMFunction returned in eax/rax (RDa).
2924 | mov SAVE_PC, 0 // Invalidate for subsequent line hook. 2882 | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
2925#if LJ_HASJIT 2883 |.if JIT
2926 | and PC, -2 2884 | and PC, -2
2927#endif 2885 |.endif
2928 | mov BASE, L:RB->base 2886 | mov BASE, L:RB->base
2929 | mov RAa, RDa 2887 | mov RAa, RDa
2930 | mov RD, L:RB->top 2888 | mov RD, L:RB->top
@@ -2942,7 +2900,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2942 |// Called from an exit stub with the exit number on the stack. 2900 |// Called from an exit stub with the exit number on the stack.
2943 |// The 16 bit exit number is stored with two (sign-extended) push imm8. 2901 |// The 16 bit exit number is stored with two (sign-extended) push imm8.
2944 |->vm_exit_handler: 2902 |->vm_exit_handler:
2945#if LJ_HASJIT 2903 |.if JIT
2946 |.if X64 2904 |.if X64
2947 | push r13; push r12 2905 | push r13; push r12
2948 | push r11; push r10; push r9; push r8 2906 | push r11; push r10; push r9; push r8
@@ -3017,10 +2975,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
3017 |.if X64 2975 |.if X64
3018 | jmp >1 2976 | jmp >1
3019 |.endif 2977 |.endif
3020#endif 2978 |.endif
3021 |->vm_exit_interp: 2979 |->vm_exit_interp:
3022 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. 2980 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
3023#if LJ_HASJIT 2981 |.if JIT
3024 |.if X64 2982 |.if X64
3025 | // Restore additional callee-save registers only used in compiled code. 2983 | // Restore additional callee-save registers only used in compiled code.
3026 |.if X64WIN 2984 |.if X64WIN
@@ -3074,7 +3032,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
3074 | mov FCARG1, L:RB 3032 | mov FCARG1, L:RB
3075 | mov FCARG2, RD 3033 | mov FCARG2, RD
3076 | call extern lj_err_throw@8 // (lua_State *L, int errcode) 3034 | call extern lj_err_throw@8 // (lua_State *L, int errcode)
3077#endif 3035 |.endif
3078 | 3036 |
3079 |//----------------------------------------------------------------------- 3037 |//-----------------------------------------------------------------------
3080 |//-- Math helper functions ---------------------------------------------- 3038 |//-- Math helper functions ----------------------------------------------
@@ -3139,9 +3097,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
3139 | 3097 |
3140 |.macro vm_round, name, ssemode, mode1, mode2 3098 |.macro vm_round, name, ssemode, mode1, mode2
3141 |->name: 3099 |->name:
3142 ||if (!sse) { 3100 |.if not SSE
3143 | vm_round_x87 mode1, mode2 3101 | vm_round_x87 mode1, mode2
3144 ||} 3102 |.endif
3145 |->name .. _sse: 3103 |->name .. _sse:
3146 | vm_round_sse ssemode 3104 | vm_round_sse ssemode
3147 |.endmacro 3105 |.endmacro
@@ -3152,51 +3110,51 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
3152 | 3110 |
3153 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 3111 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
3154 |->vm_mod: 3112 |->vm_mod:
3155 if (sse) { 3113 |.if SSE
3156 |// Args in xmm0/xmm1, return value in xmm0. 3114 |// Args in xmm0/xmm1, return value in xmm0.
3157 |// Caveat: xmm0-xmm5 and RC (eax) modified! 3115 |// Caveat: xmm0-xmm5 and RC (eax) modified!
3158 | movaps xmm5, xmm0 3116 | movaps xmm5, xmm0
3159 | divsd xmm0, xmm1 3117 | divsd xmm0, xmm1
3160 | sseconst_abs xmm2, RDa 3118 | sseconst_abs xmm2, RDa
3161 | sseconst_2p52 xmm3, RDa 3119 | sseconst_2p52 xmm3, RDa
3162 | movaps xmm4, xmm0 3120 | movaps xmm4, xmm0
3163 | andpd xmm4, xmm2 // |x/y| 3121 | andpd xmm4, xmm2 // |x/y|
3164 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. 3122 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
3165 | jbe >1 3123 | jbe >1
3166 | andnpd xmm2, xmm0 // Isolate sign bit. 3124 | andnpd xmm2, xmm0 // Isolate sign bit.
3167 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 3125 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
3168 | subsd xmm4, xmm3 3126 | subsd xmm4, xmm3
3169 | orpd xmm4, xmm2 // Merge sign bit back in. 3127 | orpd xmm4, xmm2 // Merge sign bit back in.
3170 | sseconst_1 xmm2, RDa 3128 | sseconst_1 xmm2, RDa
3171 | cmpsd xmm0, xmm4, 1 // x/y < result? 3129 | cmpsd xmm0, xmm4, 1 // x/y < result?
3172 | andpd xmm0, xmm2 3130 | andpd xmm0, xmm2
3173 | subsd xmm4, xmm0 // If yes, subtract 1.0. 3131 | subsd xmm4, xmm0 // If yes, subtract 1.0.
3174 | movaps xmm0, xmm5 3132 | movaps xmm0, xmm5
3175 | mulsd xmm1, xmm4 3133 | mulsd xmm1, xmm4
3176 | subsd xmm0, xmm1 3134 | subsd xmm0, xmm1
3177 | ret 3135 | ret
3178 |1: 3136 |1:
3179 | mulsd xmm1, xmm0 3137 | mulsd xmm1, xmm0
3180 | movaps xmm0, xmm5 3138 | movaps xmm0, xmm5
3181 | subsd xmm0, xmm1 3139 | subsd xmm0, xmm1
3182 | ret 3140 | ret
3183 } else { 3141 |.else
3184 |// Args/ret on x87 stack (y on top). No xmm registers modified. 3142 |// Args/ret on x87 stack (y on top). No xmm registers modified.
3185 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified! 3143 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
3186 | fld st1 3144 | fld st1
3187 | fdiv st1 3145 | fdiv st1
3188 | fnstcw word [esp+4] 3146 | fnstcw word [esp+4]
3189 | mov ax, 0x0400 3147 | mov ax, 0x0400
3190 | or ax, [esp+4] 3148 | or ax, [esp+4]
3191 | and ax, 0xf7ff 3149 | and ax, 0xf7ff
3192 | mov [esp+6], ax 3150 | mov [esp+6], ax
3193 | fldcw word [esp+6] 3151 | fldcw word [esp+6]
3194 | frndint 3152 | frndint
3195 | fldcw word [esp+4] 3153 | fldcw word [esp+4]
3196 | fmulp st1 3154 | fmulp st1
3197 | fsubp st1 3155 | fsubp st1
3198 | ret 3156 | ret
3199 } 3157 |.endif
3200 | 3158 |
3201 |// FP exponentiation e^x and 2^x. Called by math.exp fast function and 3159 |// FP exponentiation e^x and 2^x. Called by math.exp fast function and
3202 |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified. 3160 |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
@@ -3224,18 +3182,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
3224 | 3182 |
3225 |// Generic power function x^y. Called by BC_POW, math.pow fast function, 3183 |// Generic power function x^y. Called by BC_POW, math.pow fast function,
3226 |// and vm_arith. 3184 |// and vm_arith.
3227 if (!sse) {
3228 |.if not X64
3229 |// Args/ret on x87 stack (y on top). RC (eax) modified. 3185 |// Args/ret on x87 stack (y on top). RC (eax) modified.
3230 |// Caveat: needs 3 slots on x87 stack! 3186 |// Caveat: needs 3 slots on x87 stack!
3231 |->vm_pow: 3187 |->vm_pow:
3188 |.if not SSE
3232 | fist dword [esp+4] // Store/reload int before comparison. 3189 | fist dword [esp+4] // Store/reload int before comparison.
3233 | fild dword [esp+4] // Integral exponent used in vm_powi. 3190 | fild dword [esp+4] // Integral exponent used in vm_powi.
3234 ||if (cmov) {
3235 | fucomip st1 3191 | fucomip st1
3236 ||} else {
3237 | fucomp st1; fnstsw ax; sahf
3238 ||}
3239 | jnz >8 // Branch for FP exponents. 3192 | jnz >8 // Branch for FP exponents.
3240 | jp >9 // Branch for NaN exponent. 3193 | jp >9 // Branch for NaN exponent.
3241 | fpop // Pop y and fallthrough to vm_powi. 3194 | fpop // Pop y and fallthrough to vm_powi.
@@ -3288,11 +3241,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
3288 | 3241 |
3289 |9: // Handle x^NaN. 3242 |9: // Handle x^NaN.
3290 | fld1 3243 | fld1
3291 ||if (cmov) {
3292 | fucomip st2 3244 | fucomip st2
3293 ||} else {
3294 | fucomp st2; fnstsw ax; sahf
3295 ||}
3296 | je >1 // 1^NaN ==> 1 3245 | je >1 // 1^NaN ==> 1
3297 | fxch // x^NaN ==> NaN 3246 | fxch // x^NaN ==> NaN
3298 |1: 3247 |1:
@@ -3302,11 +3251,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
3302 |2: // Handle x^+-Inf. 3251 |2: // Handle x^+-Inf.
3303 | fabs 3252 | fabs
3304 | fld1 3253 | fld1
3305 ||if (cmov) {
3306 | fucomip st1 3254 | fucomip st1
3307 ||} else {
3308 | fucomp st1; fnstsw ax; sahf
3309 ||}
3310 | je >3 // +-1^+-Inf ==> 1 3255 | je >3 // +-1^+-Inf ==> 1
3311 | fpop; fabs; fldz; mov eax, 0; setc al 3256 | fpop; fabs; fldz; mov eax, 0; setc al
3312 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0 3257 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
@@ -3326,9 +3271,6 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
3326 | fld dword [esp+4] 3271 | fld dword [esp+4]
3327 | ret 3272 | ret
3328 |.endif 3273 |.endif
3329 } else {
3330 |->vm_pow:
3331 }
3332 | 3274 |
3333 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. 3275 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
3334 |// Needs 16 byte scratch area for x86. Also called from JIT code. 3276 |// Needs 16 byte scratch area for x86. Also called from JIT code.
@@ -3453,217 +3395,208 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
3453 |// Callable from C: double lj_vm_foldfpm(double x, int fpm) 3395 |// Callable from C: double lj_vm_foldfpm(double x, int fpm)
3454 |// Computes fpm(x) for extended math functions. ORDER FPM. 3396 |// Computes fpm(x) for extended math functions. ORDER FPM.
3455 |->vm_foldfpm: 3397 |->vm_foldfpm:
3456#if LJ_HASJIT 3398 |.if JIT
3457 if (sse) { 3399 |.if X64
3458 |.if X64 3400 | .if X64WIN
3459 | 3401 | .define fpmop, CARG2d
3460 | .if X64WIN 3402 | .else
3461 | .define fpmop, CARG2d 3403 | .define fpmop, CARG1d
3462 | .else 3404 | .endif
3463 | .define fpmop, CARG1d 3405 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3464 | .endif 3406 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3465 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil 3407 | sqrtsd xmm0, xmm0; ret
3466 | cmp fpmop, 3; jb ->vm_trunc; ja >2 3408 |2:
3467 | sqrtsd xmm0, xmm0; ret 3409 | .if X64WIN
3468 |2: 3410 | movsd qword [rsp+8], xmm0 // Use scratch area.
3469 | .if X64WIN 3411 | fld qword [rsp+8]
3470 | movsd qword [rsp+8], xmm0 // Use scratch area. 3412 | .else
3471 | fld qword [rsp+8] 3413 | movsd qword [rsp-8], xmm0 // Use red zone.
3472 | .else 3414 | fld qword [rsp-8]
3473 | movsd qword [rsp-8], xmm0 // Use red zone. 3415 | .endif
3474 | fld qword [rsp-8] 3416 | cmp fpmop, 5; ja >2
3475 | .endif 3417 | .if X64WIN; pop rax; .endif
3476 | cmp fpmop, 5; ja >2 3418 | je >1
3477 | .if X64WIN; pop rax; .endif 3419 | call ->vm_exp_x87
3478 | je >1 3420 | .if X64WIN; push rax; .endif
3479 | call ->vm_exp_x87 3421 | jmp >7
3480 | .if X64WIN; push rax; .endif 3422 |1:
3481 | jmp >7 3423 | call ->vm_exp2_x87
3482 |1: 3424 | .if X64WIN; push rax; .endif
3483 | call ->vm_exp2_x87 3425 | jmp >7
3484 | .if X64WIN; push rax; .endif 3426 |2: ; cmp fpmop, 7; je >1; ja >2
3485 | jmp >7 3427 | fldln2; fxch; fyl2x; jmp >7
3486 |2: ; cmp fpmop, 7; je >1; ja >2 3428 |1: ; fld1; fxch; fyl2x; jmp >7
3487 | fldln2; fxch; fyl2x; jmp >7 3429 |2: ; cmp fpmop, 9; je >1; ja >2
3488 |1: ; fld1; fxch; fyl2x; jmp >7 3430 | fldlg2; fxch; fyl2x; jmp >7
3489 |2: ; cmp fpmop, 9; je >1; ja >2 3431 |1: ; fsin; jmp >7
3490 | fldlg2; fxch; fyl2x; jmp >7 3432 |2: ; cmp fpmop, 11; je >1; ja >9
3491 |1: ; fsin; jmp >7 3433 | fcos; jmp >7
3492 |2: ; cmp fpmop, 11; je >1; ja >9 3434 |1: ; fptan; fpop
3493 | fcos; jmp >7 3435 |7:
3494 |1: ; fptan; fpop 3436 | .if X64WIN
3495 |7: 3437 | fstp qword [rsp+8] // Use scratch area.
3496 | .if X64WIN 3438 | movsd xmm0, qword [rsp+8]
3497 | fstp qword [rsp+8] // Use scratch area. 3439 | .else
3498 | movsd xmm0, qword [rsp+8] 3440 | fstp qword [rsp-8] // Use red zone.
3499 | .else 3441 | movsd xmm0, qword [rsp-8]
3500 | fstp qword [rsp-8] // Use red zone. 3442 | .endif
3501 | movsd xmm0, qword [rsp-8] 3443 | ret
3502 | .endif 3444 |.else // x86 calling convention.
3503 | ret 3445 | .define fpmop, eax
3504 | 3446 |.if SSE
3505 |.else // x86 calling convention. 3447 | mov fpmop, [esp+12]
3506 | 3448 | movsd xmm0, qword [esp+4]
3507 | .define fpmop, eax 3449 | cmp fpmop, 1; je >1; ja >2
3508 | mov fpmop, [esp+12] 3450 | call ->vm_floor; jmp >7
3509 | movsd xmm0, qword [esp+4] 3451 |1: ; call ->vm_ceil; jmp >7
3510 | cmp fpmop, 1; je >1; ja >2 3452 |2: ; cmp fpmop, 3; je >1; ja >2
3511 | call ->vm_floor; jmp >7 3453 | call ->vm_trunc; jmp >7
3512 |1: ; call ->vm_ceil; jmp >7 3454 |1:
3513 |2: ; cmp fpmop, 3; je >1; ja >2 3455 | sqrtsd xmm0, xmm0
3514 | call ->vm_trunc; jmp >7 3456 |7:
3515 |1: 3457 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3516 | sqrtsd xmm0, xmm0 3458 | fld qword [esp+4]
3517 |7: 3459 | ret
3518 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. 3460 |2: ; fld qword [esp+4]
3519 | fld qword [esp+4] 3461 | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3520 | ret 3462 |2: ; cmp fpmop, 7; je >1; ja >2
3521 |2: ; fld qword [esp+4] 3463 | fldln2; fxch; fyl2x; ret
3522 | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 3464 |1: ; fld1; fxch; fyl2x; ret
3523 |2: ; cmp fpmop, 7; je >1; ja >2 3465 |2: ; cmp fpmop, 9; je >1; ja >2
3524 | fldln2; fxch; fyl2x; ret 3466 | fldlg2; fxch; fyl2x; ret
3525 |1: ; fld1; fxch; fyl2x; ret 3467 |1: ; fsin; ret
3526 |2: ; cmp fpmop, 9; je >1; ja >2 3468 |2: ; cmp fpmop, 11; je >1; ja >9
3527 | fldlg2; fxch; fyl2x; ret 3469 | fcos; ret
3528 |1: ; fsin; ret 3470 |1: ; fptan; fpop; ret
3529 |2: ; cmp fpmop, 11; je >1; ja >9 3471 |.else
3530 | fcos; ret 3472 | mov fpmop, [esp+12]
3531 |1: ; fptan; fpop; ret 3473 | fld qword [esp+4]
3532 | 3474 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3533 |.endif 3475 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3534 } else { 3476 | fsqrt; ret
3535 | mov fpmop, [esp+12] 3477 |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3536 | fld qword [esp+4] 3478 | cmp fpmop, 7; je >1; ja >2
3537 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil 3479 | fldln2; fxch; fyl2x; ret
3538 | cmp fpmop, 3; jb ->vm_trunc; ja >2 3480 |1: ; fld1; fxch; fyl2x; ret
3539 | fsqrt; ret 3481 |2: ; cmp fpmop, 9; je >1; ja >2
3540 |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 3482 | fldlg2; fxch; fyl2x; ret
3541 | cmp fpmop, 7; je >1; ja >2 3483 |1: ; fsin; ret
3542 | fldln2; fxch; fyl2x; ret 3484 |2: ; cmp fpmop, 11; je >1; ja >9
3543 |1: ; fld1; fxch; fyl2x; ret 3485 | fcos; ret
3544 |2: ; cmp fpmop, 9; je >1; ja >2 3486 |1: ; fptan; fpop; ret
3545 | fldlg2; fxch; fyl2x; ret 3487 |.endif
3546 |1: ; fsin; ret 3488 |.endif
3547 |2: ; cmp fpmop, 11; je >1; ja >9
3548 | fcos; ret
3549 |1: ; fptan; fpop; ret
3550 }
3551 |9: ; int3 // Bad fpm. 3489 |9: ; int3 // Bad fpm.
3552#endif 3490 |.endif
3553 | 3491 |
3554 |// Callable from C: double lj_vm_foldarith(double x, double y, int op) 3492 |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
3555 |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -) 3493 |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
3556 |// and basic math functions. ORDER ARITH 3494 |// and basic math functions. ORDER ARITH
3557 |->vm_foldarith: 3495 |->vm_foldarith:
3558 if (sse) { 3496 |.if X64
3559 |.if X64 3497 |
3560 | 3498 | .if X64WIN
3561 | .if X64WIN 3499 | .define foldop, CARG3d
3562 | .define foldop, CARG3d 3500 | .else
3563 | .else 3501 | .define foldop, CARG1d
3564 | .define foldop, CARG1d 3502 | .endif
3565 | .endif 3503 | cmp foldop, 1; je >1; ja >2
3566 | cmp foldop, 1; je >1; ja >2 3504 | addsd xmm0, xmm1; ret
3567 | addsd xmm0, xmm1; ret 3505 |1: ; subsd xmm0, xmm1; ret
3568 |1: ; subsd xmm0, xmm1; ret 3506 |2: ; cmp foldop, 3; je >1; ja >2
3569 |2: ; cmp foldop, 3; je >1; ja >2 3507 | mulsd xmm0, xmm1; ret
3570 | mulsd xmm0, xmm1; ret 3508 |1: ; divsd xmm0, xmm1; ret
3571 |1: ; divsd xmm0, xmm1; ret 3509 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow
3572 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow 3510 | cmp foldop, 7; je >1; ja >2
3573 | cmp foldop, 7; je >1; ja >2 3511 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
3574 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret 3512 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
3575 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret 3513 |2: ; cmp foldop, 9; ja >2
3576 |2: ; cmp foldop, 9; ja >2 3514 |.if X64WIN
3577 |.if X64WIN 3515 | movsd qword [rsp+8], xmm0 // Use scratch area.
3578 | movsd qword [rsp+8], xmm0 // Use scratch area. 3516 | movsd qword [rsp+16], xmm1
3579 | movsd qword [rsp+16], xmm1 3517 | fld qword [rsp+8]
3580 | fld qword [rsp+8] 3518 | fld qword [rsp+16]
3581 | fld qword [rsp+16] 3519 |.else
3582 |.else 3520 | movsd qword [rsp-8], xmm0 // Use red zone.
3583 | movsd qword [rsp-8], xmm0 // Use red zone. 3521 | movsd qword [rsp-16], xmm1
3584 | movsd qword [rsp-16], xmm1 3522 | fld qword [rsp-8]
3585 | fld qword [rsp-8] 3523 | fld qword [rsp-16]
3586 | fld qword [rsp-16] 3524 |.endif
3587 |.endif 3525 | je >1
3588 | je >1 3526 | fpatan
3589 | fpatan 3527 |7:
3590 |7: 3528 |.if X64WIN
3591 |.if X64WIN 3529 | fstp qword [rsp+8] // Use scratch area.
3592 | fstp qword [rsp+8] // Use scratch area. 3530 | movsd xmm0, qword [rsp+8]
3593 | movsd xmm0, qword [rsp+8] 3531 |.else
3594 |.else 3532 | fstp qword [rsp-8] // Use red zone.
3595 | fstp qword [rsp-8] // Use red zone. 3533 | movsd xmm0, qword [rsp-8]
3596 | movsd xmm0, qword [rsp-8] 3534 |.endif
3597 |.endif 3535 | ret
3598 | ret 3536 |1: ; fxch; fscale; fpop1; jmp <7
3599 |1: ; fxch; fscale; fpop1; jmp <7 3537 |2: ; cmp foldop, 11; je >1; ja >9
3600 |2: ; cmp foldop, 11; je >1; ja >9 3538 | minsd xmm0, xmm1; ret
3601 | minsd xmm0, xmm1; ret 3539 |1: ; maxsd xmm0, xmm1; ret
3602 |1: ; maxsd xmm0, xmm1; ret 3540 |9: ; int3 // Bad op.
3603 |9: ; int3 // Bad op. 3541 |
3604 | 3542 |.elif SSE // x86 calling convention with SSE ops.
3605 |.else // x86 calling convention. 3543 |
3606 | 3544 | .define foldop, eax
3607 | .define foldop, eax 3545 | mov foldop, [esp+20]
3608 | mov foldop, [esp+20] 3546 | movsd xmm0, qword [esp+4]
3609 | movsd xmm0, qword [esp+4] 3547 | movsd xmm1, qword [esp+12]
3610 | movsd xmm1, qword [esp+12] 3548 | cmp foldop, 1; je >1; ja >2
3611 | cmp foldop, 1; je >1; ja >2 3549 | addsd xmm0, xmm1
3612 | addsd xmm0, xmm1 3550 |7:
3613 |7: 3551 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3614 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. 3552 | fld qword [esp+4]
3615 | fld qword [esp+4] 3553 | ret
3616 | ret 3554 |1: ; subsd xmm0, xmm1; jmp <7
3617 |1: ; subsd xmm0, xmm1; jmp <7 3555 |2: ; cmp foldop, 3; je >1; ja >2
3618 |2: ; cmp foldop, 3; je >1; ja >2 3556 | mulsd xmm0, xmm1; jmp <7
3619 | mulsd xmm0, xmm1; jmp <7 3557 |1: ; divsd xmm0, xmm1; jmp <7
3620 |1: ; divsd xmm0, xmm1; jmp <7 3558 |2: ; cmp foldop, 5
3621 |2: ; cmp foldop, 5 3559 | je >1; ja >2
3622 | je >1; ja >2 3560 | call ->vm_mod; jmp <7
3623 | call ->vm_mod; jmp <7 3561 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area.
3624 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. 3562 |2: ; cmp foldop, 7; je >1; ja >2
3625 |2: ; cmp foldop, 7; je >1; ja >2 3563 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
3626 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 3564 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
3627 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 3565 |2: ; cmp foldop, 9; ja >2
3628 |2: ; cmp foldop, 9; ja >2 3566 | fld qword [esp+4] // Reload from stack
3629 | fld qword [esp+4] // Reload from stack 3567 | fld qword [esp+12]
3630 | fld qword [esp+12] 3568 | je >1
3631 | je >1 3569 | fpatan; ret
3632 | fpatan; ret 3570 |1: ; fxch; fscale; fpop1; ret
3633 |1: ; fxch; fscale; fpop1; ret 3571 |2: ; cmp foldop, 11; je >1; ja >9
3634 |2: ; cmp foldop, 11; je >1; ja >9 3572 | minsd xmm0, xmm1; jmp <7
3635 | minsd xmm0, xmm1; jmp <7 3573 |1: ; maxsd xmm0, xmm1; jmp <7
3636 |1: ; maxsd xmm0, xmm1; jmp <7 3574 |9: ; int3 // Bad op.
3637 |9: ; int3 // Bad op. 3575 |
3638 | 3576 |.else // x86 calling convention with x87 ops.
3639 |.endif 3577 |
3640 } else { 3578 | mov eax, [esp+20]
3641 | mov eax, [esp+20] 3579 | fld qword [esp+4]
3642 | fld qword [esp+4] 3580 | fld qword [esp+12]
3643 | fld qword [esp+12] 3581 | cmp eax, 1; je >1; ja >2
3644 | cmp eax, 1; je >1; ja >2 3582 | faddp st1; ret
3645 | faddp st1; ret 3583 |1: ; fsubp st1; ret
3646 |1: ; fsubp st1; ret 3584 |2: ; cmp eax, 3; je >1; ja >2
3647 |2: ; cmp eax, 3; je >1; ja >2 3585 | fmulp st1; ret
3648 | fmulp st1; ret 3586 |1: ; fdivp st1; ret
3649 |1: ; fdivp st1; ret 3587 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
3650 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow 3588 | cmp eax, 7; je >1; ja >2
3651 | cmp eax, 7; je >1; ja >2 3589 | fpop; fchs; ret
3652 | fpop; fchs; ret 3590 |1: ; fpop; fabs; ret
3653 |1: ; fpop; fabs; ret 3591 |2: ; cmp eax, 9; je >1; ja >2
3654 |2: ; cmp eax, 9; je >1; ja >2 3592 | fpatan; ret
3655 | fpatan; ret 3593 |1: ; fxch; fscale; fpop1; ret
3656 |1: ; fxch; fscale; fpop1; ret 3594 |2: ; cmp eax, 11; je >1; ja >9
3657 |2: ; cmp eax, 11; je >1; ja >9 3595 | fucomi st1; fcmovnbe st1; fpop1; ret
3658 ||if (cmov) { 3596 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
3659 | fucomi st1; fcmovnbe st1; fpop1; ret 3597 |9: ; int3 // Bad op.
3660 |1: ; fucomi st1; fcmovbe st1; fpop1; ret 3598 |
3661 ||} else { 3599 |.endif
3662 | fucom st1; fnstsw ax; test ah, 1; jz >2; fxch; 2: ; fpop; ret
3663 |1: ; fucom st1; fnstsw ax; test ah, 1; jnz >2; fxch; 2: ; fpop; ret
3664 ||}
3665 |9: ; int3 // Bad op.
3666 }
3667 | 3600 |
3668 |//----------------------------------------------------------------------- 3601 |//-----------------------------------------------------------------------
3669 |//-- Miscellaneous functions -------------------------------------------- 3602 |//-- Miscellaneous functions --------------------------------------------
@@ -3726,7 +3659,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
3726 | 3659 |
3727 |// Handler for callback functions. Callback slot number in ah/al. 3660 |// Handler for callback functions. Callback slot number in ah/al.
3728 |->vm_ffi_callback: 3661 |->vm_ffi_callback:
3729#if LJ_HASFFI 3662 |.if FFI
3730 |.type CTSTATE, CTState, PC 3663 |.type CTSTATE, CTState, PC
3731 |.if not X64 3664 |.if not X64
3732 | sub esp, 16 // Leave room for SAVE_ERRF etc. 3665 | sub esp, 16 // Leave room for SAVE_ERRF etc.
@@ -3781,10 +3714,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
3781 | shr RD, 3 3714 | shr RD, 3
3782 | add RD, 1 3715 | add RD, 1
3783 | ins_callt 3716 | ins_callt
3784#endif 3717 |.endif
3785 | 3718 |
3786 |->cont_ffi_callback: // Return from FFI callback. 3719 |->cont_ffi_callback: // Return from FFI callback.
3787#if LJ_HASFFI 3720 |.if FFI
3788 | mov L:RA, SAVE_L 3721 | mov L:RA, SAVE_L
3789 | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] 3722 | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
3790 | mov aword CTSTATE->L, L:RAa 3723 | mov aword CTSTATE->L, L:RAa
@@ -3819,11 +3752,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
3819 | push ecx 3752 | push ecx
3820 | ret 3753 | ret
3821 |.endif 3754 |.endif
3822#endif 3755 |.endif
3823 | 3756 |
3824 |->vm_ffi_call@4: // Call C function via FFI. 3757 |->vm_ffi_call@4: // Call C function via FFI.
3825 | // Caveat: needs special frame unwinding, see below. 3758 | // Caveat: needs special frame unwinding, see below.
3826#if LJ_HASFFI 3759 |.if FFI
3827 |.if X64 3760 |.if X64
3828 | .type CCSTATE, CCallState, rbx 3761 | .type CCSTATE, CCallState, rbx
3829 | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 3762 | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
@@ -3838,9 +3771,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
3838 | sub rsp, rax 3771 | sub rsp, rax
3839 |.else 3772 |.else
3840 | sub esp, CCSTATE->spadj 3773 | sub esp, CCSTATE->spadj
3841#if LJ_TARGET_WINDOWS 3774 |.if WIN
3842 | mov CCSTATE->spadj, esp 3775 | mov CCSTATE->spadj, esp
3843#endif 3776 |.endif
3844 |.endif 3777 |.endif
3845 | 3778 |
3846 | // Copy stack slots. 3779 | // Copy stack slots.
@@ -3907,9 +3840,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
3907 |6: 3840 |6:
3908 | fstp dword CCSTATE->fpr[0].f[0] 3841 | fstp dword CCSTATE->fpr[0].f[0]
3909 |7: 3842 |7:
3910#if LJ_TARGET_WINDOWS 3843 |.if WIN
3911 | sub CCSTATE->spadj, esp 3844 | sub CCSTATE->spadj, esp
3912#endif 3845 |.endif
3913 |.endif 3846 |.endif
3914 | 3847 |
3915 |.if X64 3848 |.if X64
@@ -3917,14 +3850,14 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
3917 |.else 3850 |.else
3918 | mov ebx, [ebp-4]; leave; ret 3851 | mov ebx, [ebp-4]; leave; ret
3919 |.endif 3852 |.endif
3920#endif 3853 |.endif
3921 |// Note: vm_ffi_call must be the last function in this object file! 3854 |// Note: vm_ffi_call must be the last function in this object file!
3922 | 3855 |
3923 |//----------------------------------------------------------------------- 3856 |//-----------------------------------------------------------------------
3924} 3857}
3925 3858
3926/* Generate the code for a single instruction. */ 3859/* Generate the code for a single instruction. */
3927static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) 3860static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3928{ 3861{
3929 int vk = 0; 3862 int vk = 0;
3930 |// Note: aligning all instructions does not pay off. 3863 |// Note: aligning all instructions does not pay off.
@@ -3957,79 +3890,79 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
3957 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 3890 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
3958 | // RA = src1, RD = src2, JMP with RD = target 3891 | // RA = src1, RD = src2, JMP with RD = target
3959 | ins_AD 3892 | ins_AD
3960 if (LJ_DUALNUM) { 3893 |.if DUALNUM
3961 | checkint RA, >7 3894 | checkint RA, >7
3962 | checkint RD, >8 3895 | checkint RD, >8
3963 | mov RB, dword [BASE+RA*8] 3896 | mov RB, dword [BASE+RA*8]
3964 | add PC, 4 3897 | add PC, 4
3965 | cmp RB, dword [BASE+RD*8] 3898 | cmp RB, dword [BASE+RD*8]
3966 | jmp_comp jge, jl, jg, jle, >9 3899 | jmp_comp jge, jl, jg, jle, >9
3967 |6: 3900 |6:
3968 | movzx RD, PC_RD 3901 | movzx RD, PC_RD
3969 | branchPC RD 3902 | branchPC RD
3970 |9: 3903 |9:
3971 | ins_next 3904 | ins_next
3972 | 3905 |
3973 |7: // RA is not an integer. 3906 |7: // RA is not an integer.
3974 | ja ->vmeta_comp 3907 | ja ->vmeta_comp
3975 | // RA is a number. 3908 | // RA is a number.
3976 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp 3909 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3977 | // RA is a number, RD is an integer. 3910 | // RA is a number, RD is an integer.
3978 if (sse) { 3911 |.if SSE
3979 | cvtsi2sd xmm0, dword [BASE+RD*8] 3912 | cvtsi2sd xmm0, dword [BASE+RD*8]
3980 | jmp >2 3913 | jmp >2
3981 } else { 3914 |.else
3982 | fld qword [BASE+RA*8] 3915 | fld qword [BASE+RA*8]
3983 | fild dword [BASE+RD*8] 3916 | fild dword [BASE+RD*8]
3984 | jmp >3 3917 | jmp >3
3985 } 3918 |.endif
3986 | 3919 |
3987 |8: // RA is an integer, RD is not an integer. 3920 |8: // RA is an integer, RD is not an integer.
3988 | ja ->vmeta_comp 3921 | ja ->vmeta_comp
3989 | // RA is an integer, RD is a number. 3922 | // RA is an integer, RD is a number.
3990 if (sse) { 3923 |.if SSE
3991 | cvtsi2sd xmm1, dword [BASE+RA*8] 3924 | cvtsi2sd xmm1, dword [BASE+RA*8]
3992 | movsd xmm0, qword [BASE+RD*8] 3925 | movsd xmm0, qword [BASE+RD*8]
3993 | add PC, 4 3926 | add PC, 4
3994 | ucomisd xmm0, xmm1 3927 | ucomisd xmm0, xmm1
3995 | jmp_comp jbe, ja, jb, jae, <9 3928 | jmp_comp jbe, ja, jb, jae, <9
3996 | jmp <6 3929 | jmp <6
3997 } else { 3930 |.else
3998 | fild dword [BASE+RA*8] 3931 | fild dword [BASE+RA*8]
3999 | jmp >2 3932 | jmp >2
4000 } 3933 |.endif
4001 } else { 3934 |.else
4002 | checknum RA, ->vmeta_comp 3935 | checknum RA, ->vmeta_comp
4003 | checknum RD, ->vmeta_comp 3936 | checknum RD, ->vmeta_comp
4004 } 3937 |.endif
4005 if (sse) { 3938 |.if SSE
4006 |1: 3939 |1:
4007 | movsd xmm0, qword [BASE+RD*8] 3940 | movsd xmm0, qword [BASE+RD*8]
4008 |2: 3941 |2:
4009 | add PC, 4 3942 | add PC, 4
4010 | ucomisd xmm0, qword [BASE+RA*8] 3943 | ucomisd xmm0, qword [BASE+RA*8]
4011 |3: 3944 |3:
4012 } else { 3945 |.else
4013 |1: 3946 |1:
4014 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A. 3947 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
4015 |2: 3948 |2:
4016 | fld qword [BASE+RD*8] 3949 | fld qword [BASE+RD*8]
4017 |3: 3950 |3:
4018 | add PC, 4 3951 | add PC, 4
4019 | fcomparepp // eax (RD) modified! 3952 | fcomparepp
4020 } 3953 |.endif
4021 | // Unordered: all of ZF CF PF set, ordered: PF clear. 3954 | // Unordered: all of ZF CF PF set, ordered: PF clear.
4022 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 3955 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
4023 if (LJ_DUALNUM) { 3956 |.if DUALNUM
4024 | jmp_comp jbe, ja, jb, jae, <9 3957 | jmp_comp jbe, ja, jb, jae, <9
4025 | jmp <6 3958 | jmp <6
4026 } else { 3959 |.else
4027 | jmp_comp jbe, ja, jb, jae, >1 3960 | jmp_comp jbe, ja, jb, jae, >1
4028 | movzx RD, PC_RD 3961 | movzx RD, PC_RD
4029 | branchPC RD 3962 | branchPC RD
4030 |1: 3963 |1:
4031 | ins_next 3964 | ins_next
4032 } 3965 |.endif
4033 break; 3966 break;
4034 3967
4035 case BC_ISEQV: case BC_ISNEV: 3968 case BC_ISEQV: case BC_ISNEV:
@@ -4037,63 +3970,63 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4037 | ins_AD // RA = src1, RD = src2, JMP with RD = target 3970 | ins_AD // RA = src1, RD = src2, JMP with RD = target
4038 | mov RB, [BASE+RD*8+4] 3971 | mov RB, [BASE+RD*8+4]
4039 | add PC, 4 3972 | add PC, 4
4040 if (LJ_DUALNUM) { 3973 |.if DUALNUM
4041 | cmp RB, LJ_TISNUM; jne >7 3974 | cmp RB, LJ_TISNUM; jne >7
4042 | checkint RA, >8 3975 | checkint RA, >8
4043 | mov RB, dword [BASE+RD*8] 3976 | mov RB, dword [BASE+RD*8]
4044 | cmp RB, dword [BASE+RA*8] 3977 | cmp RB, dword [BASE+RA*8]
4045 if (vk) { 3978 if (vk) {
4046 | jne >9 3979 | jne >9
4047 } else {
4048 | je >9
4049 }
4050 | movzx RD, PC_RD
4051 | branchPC RD
4052 |9:
4053 | ins_next
4054 |
4055 |7: // RD is not an integer.
4056 | ja >5
4057 | // RD is a number.
4058 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
4059 | // RD is a number, RA is an integer.
4060 if (sse) {
4061 | cvtsi2sd xmm0, dword [BASE+RA*8]
4062 } else {
4063 | fild dword [BASE+RA*8]
4064 }
4065 | jmp >2
4066 |
4067 |8: // RD is an integer, RA is not an integer.
4068 | ja >5
4069 | // RD is an integer, RA is a number.
4070 if (sse) {
4071 | cvtsi2sd xmm0, dword [BASE+RD*8]
4072 | ucomisd xmm0, qword [BASE+RA*8]
4073 } else {
4074 | fild dword [BASE+RD*8]
4075 | fld qword [BASE+RA*8]
4076 }
4077 | jmp >4
4078 |
4079 } else {
4080 | cmp RB, LJ_TISNUM; jae >5
4081 | checknum RA, >5
4082 }
4083 if (sse) {
4084 |1:
4085 | movsd xmm0, qword [BASE+RA*8]
4086 |2:
4087 | ucomisd xmm0, qword [BASE+RD*8]
4088 |4:
4089 } else { 3980 } else {
4090 |1: 3981 | je >9
4091 | fld qword [BASE+RA*8]
4092 |2:
4093 | fld qword [BASE+RD*8]
4094 |4:
4095 | fcomparepp // eax (RD) modified!
4096 } 3982 }
3983 | movzx RD, PC_RD
3984 | branchPC RD
3985 |9:
3986 | ins_next
3987 |
3988 |7: // RD is not an integer.
3989 | ja >5
3990 | // RD is a number.
3991 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
3992 | // RD is a number, RA is an integer.
3993 |.if SSE
3994 | cvtsi2sd xmm0, dword [BASE+RA*8]
3995 |.else
3996 | fild dword [BASE+RA*8]
3997 |.endif
3998 | jmp >2
3999 |
4000 |8: // RD is an integer, RA is not an integer.
4001 | ja >5
4002 | // RD is an integer, RA is a number.
4003 |.if SSE
4004 | cvtsi2sd xmm0, dword [BASE+RD*8]
4005 | ucomisd xmm0, qword [BASE+RA*8]
4006 |.else
4007 | fild dword [BASE+RD*8]
4008 | fld qword [BASE+RA*8]
4009 |.endif
4010 | jmp >4
4011 |
4012 |.else
4013 | cmp RB, LJ_TISNUM; jae >5
4014 | checknum RA, >5
4015 |.endif
4016 |.if SSE
4017 |1:
4018 | movsd xmm0, qword [BASE+RA*8]
4019 |2:
4020 | ucomisd xmm0, qword [BASE+RD*8]
4021 |4:
4022 |.else
4023 |1:
4024 | fld qword [BASE+RA*8]
4025 |2:
4026 | fld qword [BASE+RD*8]
4027 |4:
4028 | fcomparepp
4029 |.endif
4097 iseqne_fp: 4030 iseqne_fp:
4098 if (vk) { 4031 if (vk) {
4099 | jp >2 // Unordered means not equal. 4032 | jp >2 // Unordered means not equal.
@@ -4129,10 +4062,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4129 | 4062 |
4130 if (op == BC_ISEQV || op == BC_ISNEV) { 4063 if (op == BC_ISEQV || op == BC_ISNEV) {
4131 |5: // Either or both types are not numbers. 4064 |5: // Either or both types are not numbers.
4132 if (LJ_HASFFI) { 4065 |.if FFI
4133 | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd 4066 | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd
4134 | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd 4067 | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd
4135 } 4068 |.endif
4136 | checktp RA, RB // Compare types. 4069 | checktp RA, RB // Compare types.
4137 | jne <2 // Not the same type? 4070 | jne <2 // Not the same type?
4138 | cmp RB, LJ_TISPRI 4071 | cmp RB, LJ_TISPRI
@@ -4163,7 +4096,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4163 | mov RB, 1 // ne = 1 4096 | mov RB, 1 // ne = 1
4164 } 4097 }
4165 | jmp ->vmeta_equal // Handle __eq metamethod. 4098 | jmp ->vmeta_equal // Handle __eq metamethod.
4166 } else if (LJ_HASFFI) { 4099 } else {
4100 |.if FFI
4167 |3: 4101 |3:
4168 | cmp RB, LJ_TCDATA 4102 | cmp RB, LJ_TCDATA
4169 if (LJ_DUALNUM && vk) { 4103 if (LJ_DUALNUM && vk) {
@@ -4172,6 +4106,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4172 | jne <2 4106 | jne <2
4173 } 4107 }
4174 | jmp ->vmeta_equal_cd 4108 | jmp ->vmeta_equal_cd
4109 |.endif
4175 } 4110 }
4176 break; 4111 break;
4177 case BC_ISEQS: case BC_ISNES: 4112 case BC_ISEQS: case BC_ISNES:
@@ -4194,59 +4129,59 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4194 | ins_AD // RA = src, RD = num const, JMP with RD = target 4129 | ins_AD // RA = src, RD = num const, JMP with RD = target
4195 | mov RB, [BASE+RA*8+4] 4130 | mov RB, [BASE+RA*8+4]
4196 | add PC, 4 4131 | add PC, 4
4197 if (LJ_DUALNUM) { 4132 |.if DUALNUM
4198 | cmp RB, LJ_TISNUM; jne >7 4133 | cmp RB, LJ_TISNUM; jne >7
4199 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8 4134 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8
4200 | mov RB, dword [KBASE+RD*8] 4135 | mov RB, dword [KBASE+RD*8]
4201 | cmp RB, dword [BASE+RA*8] 4136 | cmp RB, dword [BASE+RA*8]
4202 if (vk) { 4137 if (vk) {
4203 | jne >9 4138 | jne >9
4204 } else {
4205 | je >9
4206 }
4207 | movzx RD, PC_RD
4208 | branchPC RD
4209 |9:
4210 | ins_next
4211 |
4212 |7: // RA is not an integer.
4213 | ja >3
4214 | // RA is a number.
4215 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
4216 | // RA is a number, RD is an integer.
4217 if (sse) {
4218 | cvtsi2sd xmm0, dword [KBASE+RD*8]
4219 } else {
4220 | fild dword [KBASE+RD*8]
4221 }
4222 | jmp >2
4223 |
4224 |8: // RA is an integer, RD is a number.
4225 if (sse) {
4226 | cvtsi2sd xmm0, dword [BASE+RA*8]
4227 | ucomisd xmm0, qword [KBASE+RD*8]
4228 } else {
4229 | fild dword [BASE+RA*8]
4230 | fld qword [KBASE+RD*8]
4231 }
4232 | jmp >4
4233 } else {
4234 | cmp RB, LJ_TISNUM; jae >3
4235 }
4236 if (sse) {
4237 |1:
4238 | movsd xmm0, qword [KBASE+RD*8]
4239 |2:
4240 | ucomisd xmm0, qword [BASE+RA*8]
4241 |4:
4242 } else { 4139 } else {
4243 |1: 4140 | je >9
4244 | fld qword [KBASE+RD*8]
4245 |2:
4246 | fld qword [BASE+RA*8]
4247 |4:
4248 | fcomparepp // eax (RD) modified!
4249 } 4141 }
4142 | movzx RD, PC_RD
4143 | branchPC RD
4144 |9:
4145 | ins_next
4146 |
4147 |7: // RA is not an integer.
4148 | ja >3
4149 | // RA is a number.
4150 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
4151 | // RA is a number, RD is an integer.
4152 |.if SSE
4153 | cvtsi2sd xmm0, dword [KBASE+RD*8]
4154 |.else
4155 | fild dword [KBASE+RD*8]
4156 |.endif
4157 | jmp >2
4158 |
4159 |8: // RA is an integer, RD is a number.
4160 |.if SSE
4161 | cvtsi2sd xmm0, dword [BASE+RA*8]
4162 | ucomisd xmm0, qword [KBASE+RD*8]
4163 |.else
4164 | fild dword [BASE+RA*8]
4165 | fld qword [KBASE+RD*8]
4166 |.endif
4167 | jmp >4
4168 |.else
4169 | cmp RB, LJ_TISNUM; jae >3
4170 |.endif
4171 |.if SSE
4172 |1:
4173 | movsd xmm0, qword [KBASE+RD*8]
4174 |2:
4175 | ucomisd xmm0, qword [BASE+RA*8]
4176 |4:
4177 |.else
4178 |1:
4179 | fld qword [KBASE+RD*8]
4180 |2:
4181 | fld qword [BASE+RA*8]
4182 |4:
4183 | fcomparepp
4184 |.endif
4250 goto iseqne_fp; 4185 goto iseqne_fp;
4251 case BC_ISEQP: case BC_ISNEP: 4186 case BC_ISEQP: case BC_ISNEP:
4252 vk = op == BC_ISEQP; 4187 vk = op == BC_ISEQP;
@@ -4322,59 +4257,59 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4322 break; 4257 break;
4323 case BC_UNM: 4258 case BC_UNM:
4324 | ins_AD // RA = dst, RD = src 4259 | ins_AD // RA = dst, RD = src
4325 if (LJ_DUALNUM) { 4260 |.if DUALNUM
4326 | checkint RD, >5 4261 | checkint RD, >5
4327 | mov RB, [BASE+RD*8] 4262 | mov RB, [BASE+RD*8]
4328 | neg RB 4263 | neg RB
4329 | jo >4 4264 | jo >4
4330 | mov dword [BASE+RA*8+4], LJ_TISNUM 4265 | mov dword [BASE+RA*8+4], LJ_TISNUM
4331 | mov dword [BASE+RA*8], RB 4266 | mov dword [BASE+RA*8], RB
4332 |9: 4267 |9:
4333 | ins_next 4268 | ins_next
4334 |4: 4269 |4:
4335 | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31. 4270 | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31.
4336 | mov dword [BASE+RA*8], 0 4271 | mov dword [BASE+RA*8], 0
4337 | jmp <9 4272 | jmp <9
4338 |5: 4273 |5:
4339 | ja ->vmeta_unm 4274 | ja ->vmeta_unm
4340 } else { 4275 |.else
4341 | checknum RD, ->vmeta_unm 4276 | checknum RD, ->vmeta_unm
4342 } 4277 |.endif
4343 if (sse) { 4278 |.if SSE
4344 | movsd xmm0, qword [BASE+RD*8] 4279 | movsd xmm0, qword [BASE+RD*8]
4345 | sseconst_sign xmm1, RDa 4280 | sseconst_sign xmm1, RDa
4346 | xorps xmm0, xmm1 4281 | xorps xmm0, xmm1
4347 | movsd qword [BASE+RA*8], xmm0 4282 | movsd qword [BASE+RA*8], xmm0
4348 } else { 4283 |.else
4349 | fld qword [BASE+RD*8] 4284 | fld qword [BASE+RD*8]
4350 | fchs 4285 | fchs
4351 | fstp qword [BASE+RA*8] 4286 | fstp qword [BASE+RA*8]
4352 } 4287 |.endif
4353 if (LJ_DUALNUM) { 4288 |.if DUALNUM
4354 | jmp <9 4289 | jmp <9
4355 } else { 4290 |.else
4356 | ins_next 4291 | ins_next
4357 } 4292 |.endif
4358 break; 4293 break;
4359 case BC_LEN: 4294 case BC_LEN:
4360 | ins_AD // RA = dst, RD = src 4295 | ins_AD // RA = dst, RD = src
4361 | checkstr RD, >2 4296 | checkstr RD, >2
4362 | mov STR:RD, [BASE+RD*8] 4297 | mov STR:RD, [BASE+RD*8]
4363 if (LJ_DUALNUM) { 4298 |.if DUALNUM
4364 | mov RD, dword STR:RD->len 4299 | mov RD, dword STR:RD->len
4365 |1: 4300 |1:
4366 | mov dword [BASE+RA*8+4], LJ_TISNUM 4301 | mov dword [BASE+RA*8+4], LJ_TISNUM
4367 | mov dword [BASE+RA*8], RD 4302 | mov dword [BASE+RA*8], RD
4368 } else if (sse) { 4303 |.elif SSE
4369 | xorps xmm0, xmm0 4304 | xorps xmm0, xmm0
4370 | cvtsi2sd xmm0, dword STR:RD->len 4305 | cvtsi2sd xmm0, dword STR:RD->len
4371 |1: 4306 |1:
4372 | movsd qword [BASE+RA*8], xmm0 4307 | movsd qword [BASE+RA*8], xmm0
4373 } else { 4308 |.else
4374 | fild dword STR:RD->len 4309 | fild dword STR:RD->len
4375 |1: 4310 |1:
4376 | fstp qword [BASE+RA*8] 4311 | fstp qword [BASE+RA*8]
4377 } 4312 |.endif
4378 | ins_next 4313 | ins_next
4379 |2: 4314 |2:
4380 | checktab RD, ->vmeta_len 4315 | checktab RD, ->vmeta_len
@@ -4389,16 +4324,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4389 | mov RB, BASE // Save BASE. 4324 | mov RB, BASE // Save BASE.
4390 | call extern lj_tab_len@4 // (GCtab *t) 4325 | call extern lj_tab_len@4 // (GCtab *t)
4391 | // Length of table returned in eax (RD). 4326 | // Length of table returned in eax (RD).
4392 if (LJ_DUALNUM) { 4327 |.if DUALNUM
4393 | // Nothing to do. 4328 | // Nothing to do.
4394 } else if (sse) { 4329 |.elif SSE
4395 | cvtsi2sd xmm0, RD 4330 | cvtsi2sd xmm0, RD
4396 } else { 4331 |.else
4397 |.if not X64 4332 | mov ARG1, RD
4398 | mov ARG1, RD 4333 | fild ARG1
4399 | fild ARG1 4334 |.endif
4400 |.endif
4401 }
4402 | mov BASE, RB // Restore BASE. 4335 | mov BASE, RB // Restore BASE.
4403 | movzx RA, PC_RA 4336 | movzx RA, PC_RA
4404 | jmp <1 4337 | jmp <1
@@ -4418,40 +4351,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4418 ||switch (vk) { 4351 ||switch (vk) {
4419 ||case 0: 4352 ||case 0:
4420 | checknum RB, ->vmeta_arith_vn 4353 | checknum RB, ->vmeta_arith_vn
4421 ||if (LJ_DUALNUM) { 4354 | .if DUALNUM
4422 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn 4355 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
4423 ||} 4356 | .endif
4424 ||if (sse) { 4357 | .if SSE
4425 | movsd xmm0, qword [BASE+RB*8] 4358 | movsd xmm0, qword [BASE+RB*8]
4426 | sseins ssereg, qword [KBASE+RC*8] 4359 | sseins ssereg, qword [KBASE+RC*8]
4427 ||} else { 4360 | .else
4428 | fld qword [BASE+RB*8] 4361 | fld qword [BASE+RB*8]
4429 | x87ins qword [KBASE+RC*8] 4362 | x87ins qword [KBASE+RC*8]
4430 ||} 4363 | .endif
4431 || break; 4364 || break;
4432 ||case 1: 4365 ||case 1:
4433 | checknum RB, ->vmeta_arith_nv 4366 | checknum RB, ->vmeta_arith_nv
4434 ||if (LJ_DUALNUM) { 4367 | .if DUALNUM
4435 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv 4368 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
4436 ||} 4369 | .endif
4437 ||if (sse) { 4370 | .if SSE
4438 | movsd xmm0, qword [KBASE+RC*8] 4371 | movsd xmm0, qword [KBASE+RC*8]
4439 | sseins ssereg, qword [BASE+RB*8] 4372 | sseins ssereg, qword [BASE+RB*8]
4440 ||} else { 4373 | .else
4441 | fld qword [KBASE+RC*8] 4374 | fld qword [KBASE+RC*8]
4442 | x87ins qword [BASE+RB*8] 4375 | x87ins qword [BASE+RB*8]
4443 ||} 4376 | .endif
4444 || break; 4377 || break;
4445 ||default: 4378 ||default:
4446 | checknum RB, ->vmeta_arith_vv 4379 | checknum RB, ->vmeta_arith_vv
4447 | checknum RC, ->vmeta_arith_vv 4380 | checknum RC, ->vmeta_arith_vv
4448 ||if (sse) { 4381 | .if SSE
4449 | movsd xmm0, qword [BASE+RB*8] 4382 | movsd xmm0, qword [BASE+RB*8]
4450 | sseins ssereg, qword [BASE+RC*8] 4383 | sseins ssereg, qword [BASE+RC*8]
4451 ||} else { 4384 | .else
4452 | fld qword [BASE+RB*8] 4385 | fld qword [BASE+RB*8]
4453 | x87ins qword [BASE+RC*8] 4386 | x87ins qword [BASE+RC*8]
4454 ||} 4387 | .endif
4455 || break; 4388 || break;
4456 ||} 4389 ||}
4457 |.endmacro 4390 |.endmacro
@@ -4489,11 +4422,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4489 |.endmacro 4422 |.endmacro
4490 | 4423 |
4491 |.macro ins_arithpost 4424 |.macro ins_arithpost
4492 ||if (sse) { 4425 |.if SSE
4493 | movsd qword [BASE+RA*8], xmm0 4426 | movsd qword [BASE+RA*8], xmm0
4494 ||} else { 4427 |.else
4495 | fstp qword [BASE+RA*8] 4428 | fstp qword [BASE+RA*8]
4496 ||} 4429 |.endif
4497 |.endmacro 4430 |.endmacro
4498 | 4431 |
4499 |.macro ins_arith, x87ins, sseins 4432 |.macro ins_arith, x87ins, sseins
@@ -4503,11 +4436,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4503 |.endmacro 4436 |.endmacro
4504 | 4437 |
4505 |.macro ins_arith, intins, x87ins, sseins 4438 |.macro ins_arith, intins, x87ins, sseins
4506 ||if (LJ_DUALNUM) { 4439 |.if DUALNUM
4507 | ins_arithdn intins 4440 | ins_arithdn intins
4508 ||} else { 4441 |.else
4509 | ins_arith, x87ins, sseins 4442 | ins_arith, x87ins, sseins
4510 ||} 4443 |.endif
4511 |.endmacro 4444 |.endmacro
4512 4445
4513 | // RA = dst, RB = src1 or num const, RC = src2 or num const 4446 | // RA = dst, RB = src1 or num const, RC = src2 or num const
@@ -4591,39 +4524,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4591 | ins_next 4524 | ins_next
4592 break; 4525 break;
4593 case BC_KCDATA: 4526 case BC_KCDATA:
4594#if LJ_HASFFI 4527 |.if FFI
4595 | ins_AND // RA = dst, RD = cdata const (~) 4528 | ins_AND // RA = dst, RD = cdata const (~)
4596 | mov RD, [KBASE+RD*4] 4529 | mov RD, [KBASE+RD*4]
4597 | mov dword [BASE+RA*8+4], LJ_TCDATA 4530 | mov dword [BASE+RA*8+4], LJ_TCDATA
4598 | mov [BASE+RA*8], RD 4531 | mov [BASE+RA*8], RD
4599 | ins_next 4532 | ins_next
4600#endif 4533 |.endif
4601 break; 4534 break;
4602 case BC_KSHORT: 4535 case BC_KSHORT:
4603 | ins_AD // RA = dst, RD = signed int16 literal 4536 | ins_AD // RA = dst, RD = signed int16 literal
4604 if (LJ_DUALNUM) { 4537 |.if DUALNUM
4605 | movsx RD, RDW 4538 | movsx RD, RDW
4606 | mov dword [BASE+RA*8+4], LJ_TISNUM 4539 | mov dword [BASE+RA*8+4], LJ_TISNUM
4607 | mov dword [BASE+RA*8], RD 4540 | mov dword [BASE+RA*8], RD
4608 } else if (sse) { 4541 |.elif SSE
4609 | movsx RD, RDW // Sign-extend literal. 4542 | movsx RD, RDW // Sign-extend literal.
4610 | cvtsi2sd xmm0, RD 4543 | cvtsi2sd xmm0, RD
4611 | movsd qword [BASE+RA*8], xmm0 4544 | movsd qword [BASE+RA*8], xmm0
4612 } else { 4545 |.else
4613 | fild PC_RD // Refetch signed RD from instruction. 4546 | fild PC_RD // Refetch signed RD from instruction.
4614 | fstp qword [BASE+RA*8] 4547 | fstp qword [BASE+RA*8]
4615 } 4548 |.endif
4616 | ins_next 4549 | ins_next
4617 break; 4550 break;
4618 case BC_KNUM: 4551 case BC_KNUM:
4619 | ins_AD // RA = dst, RD = num const 4552 | ins_AD // RA = dst, RD = num const
4620 if (sse) { 4553 |.if SSE
4621 | movsd xmm0, qword [KBASE+RD*8] 4554 | movsd xmm0, qword [KBASE+RD*8]
4622 | movsd qword [BASE+RA*8], xmm0 4555 | movsd qword [BASE+RA*8], xmm0
4623 } else { 4556 |.else
4624 | fld qword [KBASE+RD*8] 4557 | fld qword [KBASE+RD*8]
4625 | fstp qword [BASE+RA*8] 4558 | fstp qword [BASE+RA*8]
4626 } 4559 |.endif
4627 | ins_next 4560 | ins_next
4628 break; 4561 break;
4629 case BC_KPRI: 4562 case BC_KPRI:
@@ -4730,18 +4663,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4730 case BC_USETN: 4663 case BC_USETN:
4731 | ins_AD // RA = upvalue #, RD = num const 4664 | ins_AD // RA = upvalue #, RD = num const
4732 | mov LFUNC:RB, [BASE-8] 4665 | mov LFUNC:RB, [BASE-8]
4733 if (sse) { 4666 |.if SSE
4734 | movsd xmm0, qword [KBASE+RD*8] 4667 | movsd xmm0, qword [KBASE+RD*8]
4735 } else { 4668 |.else
4736 | fld qword [KBASE+RD*8] 4669 | fld qword [KBASE+RD*8]
4737 } 4670 |.endif
4738 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4671 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4739 | mov RA, UPVAL:RB->v 4672 | mov RA, UPVAL:RB->v
4740 if (sse) { 4673 |.if SSE
4741 | movsd qword [RA], xmm0 4674 | movsd qword [RA], xmm0
4742 } else { 4675 |.else
4743 | fstp qword [RA] 4676 | fstp qword [RA]
4744 } 4677 |.endif
4745 | ins_next 4678 | ins_next
4746 break; 4679 break;
4747 case BC_USETP: 4680 case BC_USETP:
@@ -4889,28 +4822,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4889 | mov TAB:RB, [BASE+RB*8] 4822 | mov TAB:RB, [BASE+RB*8]
4890 | 4823 |
4891 | // Integer key? 4824 | // Integer key?
4892 if (LJ_DUALNUM) { 4825 |.if DUALNUM
4893 | checkint RC, >5 4826 | checkint RC, >5
4894 | mov RC, dword [BASE+RC*8] 4827 | mov RC, dword [BASE+RC*8]
4895 } else { 4828 |.else
4896 | // Convert number to int and back and compare. 4829 | // Convert number to int and back and compare.
4897 | checknum RC, >5 4830 | checknum RC, >5
4898 if (sse) { 4831 |.if SSE
4899 | movsd xmm0, qword [BASE+RC*8] 4832 | movsd xmm0, qword [BASE+RC*8]
4900 | cvtsd2si RC, xmm0 4833 | cvtsd2si RC, xmm0
4901 | cvtsi2sd xmm1, RC 4834 | cvtsi2sd xmm1, RC
4902 | ucomisd xmm0, xmm1 4835 | ucomisd xmm0, xmm1
4903 } else { 4836 |.else
4904 |.if not X64 4837 | fld qword [BASE+RC*8]
4905 | fld qword [BASE+RC*8] 4838 | fist ARG1
4906 | fist ARG1 4839 | fild ARG1
4907 | fild ARG1 4840 | fcomparepp
4908 | fcomparepp // eax (RC) modified! 4841 | mov RC, ARG1
4909 | mov RC, ARG1 4842 |.endif
4910 |.endif 4843 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4911 } 4844 |.endif
4912 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4913 }
4914 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4845 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
4915 | jae ->vmeta_tgetv // Not in array part? Use fallback. 4846 | jae ->vmeta_tgetv // Not in array part? Use fallback.
4916 | shl RC, 3 4847 | shl RC, 3
@@ -5039,28 +4970,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
5039 | mov TAB:RB, [BASE+RB*8] 4970 | mov TAB:RB, [BASE+RB*8]
5040 | 4971 |
5041 | // Integer key? 4972 | // Integer key?
5042 if (LJ_DUALNUM) { 4973 |.if DUALNUM
5043 | checkint RC, >5 4974 | checkint RC, >5
5044 | mov RC, dword [BASE+RC*8] 4975 | mov RC, dword [BASE+RC*8]
5045 } else { 4976 |.else
5046 | // Convert number to int and back and compare. 4977 | // Convert number to int and back and compare.
5047 | checknum RC, >5 4978 | checknum RC, >5
5048 if (sse) { 4979 |.if SSE
5049 | movsd xmm0, qword [BASE+RC*8] 4980 | movsd xmm0, qword [BASE+RC*8]
5050 | cvtsd2si RC, xmm0 4981 | cvtsd2si RC, xmm0
5051 | cvtsi2sd xmm1, RC 4982 | cvtsi2sd xmm1, RC
5052 | ucomisd xmm0, xmm1 4983 | ucomisd xmm0, xmm1
5053 } else { 4984 |.else
5054 |.if not X64 4985 | fld qword [BASE+RC*8]
5055 | fld qword [BASE+RC*8] 4986 | fist ARG1
5056 | fist ARG1 4987 | fild ARG1
5057 | fild ARG1 4988 | fcomparepp
5058 | fcomparepp // eax (RC) modified! 4989 | mov RC, ARG1
5059 | mov RC, ARG1 4990 |.endif
5060 |.endif 4991 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
5061 } 4992 |.endif
5062 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
5063 }
5064 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4993 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
5065 | jae ->vmeta_tsetv 4994 | jae ->vmeta_tsetv
5066 | shl RC, 3 4995 | shl RC, 3
@@ -5406,9 +5335,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
5406 5335
5407 case BC_ITERN: 5336 case BC_ITERN:
5408 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) 5337 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
5409#if LJ_HASJIT 5338 |.if JIT
5410 | // NYI: add hotloop, record BC_ITERN. 5339 | // NYI: add hotloop, record BC_ITERN.
5411#endif 5340 |.endif
5412 | mov TMP1, KBASE // Need two more free registers. 5341 | mov TMP1, KBASE // Need two more free registers.
5413 | mov TMP2, DISPATCH 5342 | mov TMP2, DISPATCH
5414 | mov TAB:RB, [BASE+RA*8-16] 5343 | mov TAB:RB, [BASE+RA*8-16]
@@ -5419,14 +5348,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
5419 |1: // Traverse array part. 5348 |1: // Traverse array part.
5420 | cmp RC, DISPATCH; jae >5 // Index points after array part? 5349 | cmp RC, DISPATCH; jae >5 // Index points after array part?
5421 | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4 5350 | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4
5422 if (LJ_DUALNUM) { 5351 |.if DUALNUM
5423 | mov dword [BASE+RA*8+4], LJ_TISNUM 5352 | mov dword [BASE+RA*8+4], LJ_TISNUM
5424 | mov dword [BASE+RA*8], RC 5353 | mov dword [BASE+RA*8], RC
5425 } else if (sse) { 5354 |.elif SSE
5426 | cvtsi2sd xmm0, RC 5355 | cvtsi2sd xmm0, RC
5427 } else { 5356 |.else
5428 | fild dword [BASE+RA*8-8] 5357 | fild dword [BASE+RA*8-8]
5429 } 5358 |.endif
5430 | // Copy array slot to returned value. 5359 | // Copy array slot to returned value.
5431 |.if X64 5360 |.if X64
5432 | mov RBa, [KBASE+RC*8] 5361 | mov RBa, [KBASE+RC*8]
@@ -5439,13 +5368,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
5439 |.endif 5368 |.endif
5440 | add RC, 1 5369 | add RC, 1
5441 | // Return array index as a numeric key. 5370 | // Return array index as a numeric key.
5442 if (LJ_DUALNUM) { 5371 |.if DUALNUM
5443 | // See above. 5372 | // See above.
5444 } else if (sse) { 5373 |.elif SSE
5445 | movsd qword [BASE+RA*8], xmm0 5374 | movsd qword [BASE+RA*8], xmm0
5446 } else { 5375 |.else
5447 | fstp qword [BASE+RA*8] 5376 | fstp qword [BASE+RA*8]
5448 } 5377 |.endif
5449 | mov [BASE+RA*8-8], RC // Update control var. 5378 | mov [BASE+RA*8-8], RC // Update control var.
5450 |2: 5379 |2:
5451 | movzx RD, PC_RD // Get target from ITERL. 5380 | movzx RD, PC_RD // Get target from ITERL.
@@ -5457,9 +5386,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
5457 | 5386 |
5458 |4: // Skip holes in array part. 5387 |4: // Skip holes in array part.
5459 | add RC, 1 5388 | add RC, 1
5460 if (!LJ_DUALNUM && !sse) { 5389 |.if not (DUALNUM or SSE)
5461 | mov [BASE+RA*8-8], RC 5390 | mov [BASE+RA*8-8], RC
5462 } 5391 |.endif
5463 | jmp <1 5392 | jmp <1
5464 | 5393 |
5465 |5: // Traverse hash part. 5394 |5: // Traverse hash part.
@@ -5695,9 +5624,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
5695 |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28] 5624 |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28]
5696 5625
5697 case BC_FORL: 5626 case BC_FORL:
5698#if LJ_HASJIT 5627 |.if JIT
5699 | hotloop RB 5628 | hotloop RB
5700#endif 5629 |.endif
5701 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. 5630 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
5702 break; 5631 break;
5703 5632
@@ -5792,76 +5721,73 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
5792 if (!vk) { 5721 if (!vk) {
5793 | cmp RB, LJ_TISNUM; jae ->vmeta_for 5722 | cmp RB, LJ_TISNUM; jae ->vmeta_for
5794 } 5723 }
5795 if (sse) { 5724 |.if SSE
5796 | movsd xmm0, qword FOR_IDX 5725 | movsd xmm0, qword FOR_IDX
5797 | movsd xmm1, qword FOR_STOP 5726 | movsd xmm1, qword FOR_STOP
5798 if (vk) { 5727 if (vk) {
5799 | addsd xmm0, qword FOR_STEP 5728 | addsd xmm0, qword FOR_STEP
5800 | movsd qword FOR_IDX, xmm0 5729 | movsd qword FOR_IDX, xmm0
5801 | test RB, RB; js >3 5730 | test RB, RB; js >3
5802 } else {
5803 | jl >3
5804 }
5805 | ucomisd xmm1, xmm0
5806 |1:
5807 | movsd qword FOR_EXT, xmm0
5808 } else { 5731 } else {
5809 | fld qword FOR_STOP 5732 | jl >3
5810 | fld qword FOR_IDX 5733 }
5811 if (vk) { 5734 | ucomisd xmm1, xmm0
5812 | fadd qword FOR_STEP // nidx = idx + step 5735 |1:
5813 | fst qword FOR_IDX 5736 | movsd qword FOR_EXT, xmm0
5814 | fst qword FOR_EXT 5737 |.else
5815 | test RB, RB; js >1 5738 | fld qword FOR_STOP
5816 } else { 5739 | fld qword FOR_IDX
5817 | fst qword FOR_EXT 5740 if (vk) {
5818 | jl >1 5741 | fadd qword FOR_STEP // nidx = idx + step
5819 } 5742 | fst qword FOR_IDX
5820 | fxch // Swap lim/(n)idx if step non-negative. 5743 | fst qword FOR_EXT
5821 |1: 5744 | test RB, RB; js >1
5822 | fcomparepp // eax (RD) modified if !cmov. 5745 } else {
5823 if (!cmov) { 5746 | fst qword FOR_EXT
5824 | movzx RD, PC_RD // Need to reload RD. 5747 | jl >1
5825 }
5826 } 5748 }
5749 | fxch // Swap lim/(n)idx if step non-negative.
5750 |1:
5751 | fcomparepp
5752 |.endif
5827 if (op == BC_FORI) { 5753 if (op == BC_FORI) {
5828 if (LJ_DUALNUM) { 5754 |.if DUALNUM
5829 | jnb <7 5755 | jnb <7
5830 } else { 5756 |.else
5831 | jnb >2 5757 | jnb >2
5832 | branchPC RD 5758 | branchPC RD
5833 } 5759 |.endif
5834 } else if (op == BC_JFORI) { 5760 } else if (op == BC_JFORI) {
5835 | branchPC RD 5761 | branchPC RD
5836 | movzx RD, PC_RD 5762 | movzx RD, PC_RD
5837 | jnb =>BC_JLOOP 5763 | jnb =>BC_JLOOP
5838 } else if (op == BC_IFORL) { 5764 } else if (op == BC_IFORL) {
5839 if (LJ_DUALNUM) { 5765 |.if DUALNUM
5840 | jb <7 5766 | jb <7
5841 } else { 5767 |.else
5842 | jb >2 5768 | jb >2
5843 | branchPC RD 5769 | branchPC RD
5844 } 5770 |.endif
5845 } else { 5771 } else {
5846 | jnb =>BC_JLOOP 5772 | jnb =>BC_JLOOP
5847 } 5773 }
5848 if (LJ_DUALNUM) { 5774 |.if DUALNUM
5849 | jmp <6 5775 | jmp <6
5850 } else { 5776 |.else
5851 |2: 5777 |2:
5852 | ins_next 5778 | ins_next
5853 } 5779 |.endif
5854 if (sse) { 5780 |.if SSE
5855 |3: // Invert comparison if step is negative. 5781 |3: // Invert comparison if step is negative.
5856 | ucomisd xmm0, xmm1 5782 | ucomisd xmm0, xmm1
5857 | jmp <1 5783 | jmp <1
5858 } 5784 |.endif
5859 break; 5785 break;
5860 5786
5861 case BC_ITERL: 5787 case BC_ITERL:
5862#if LJ_HASJIT 5788 |.if JIT
5863 | hotloop RB 5789 | hotloop RB
5864#endif 5790 |.endif
5865 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. 5791 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
5866 break; 5792 break;
5867 5793
@@ -5893,9 +5819,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
5893 | ins_A // RA = base, RD = target (loop extent) 5819 | ins_A // RA = base, RD = target (loop extent)
5894 | // Note: RA/RD is only used by trace recorder to determine scope/extent 5820 | // Note: RA/RD is only used by trace recorder to determine scope/extent
5895 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. 5821 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
5896#if LJ_HASJIT 5822 |.if JIT
5897 | hotloop RB 5823 | hotloop RB
5898#endif 5824 |.endif
5899 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. 5825 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
5900 break; 5826 break;
5901 5827
@@ -5905,7 +5831,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
5905 break; 5831 break;
5906 5832
5907 case BC_JLOOP: 5833 case BC_JLOOP:
5908#if LJ_HASJIT 5834 |.if JIT
5909 | ins_AD // RA = base (ignored), RD = traceno 5835 | ins_AD // RA = base (ignored), RD = traceno
5910 | mov RA, [DISPATCH+DISPATCH_J(trace)] 5836 | mov RA, [DISPATCH+DISPATCH_J(trace)]
5911 | mov TRACE:RD, [RA+RD*4] 5837 | mov TRACE:RD, [RA+RD*4]
@@ -5937,7 +5863,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
5937 | sub rsp, 16 5863 | sub rsp, 16
5938 |.endif 5864 |.endif
5939 | jmp RDa 5865 | jmp RDa
5940#endif 5866 |.endif
5941 break; 5867 break;
5942 5868
5943 case BC_JMP: 5869 case BC_JMP:
@@ -5956,9 +5882,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
5956 */ 5882 */
5957 5883
5958 case BC_FUNCF: 5884 case BC_FUNCF:
5959#if LJ_HASJIT 5885 |.if JIT
5960 | hotcall RB 5886 | hotcall RB
5961#endif 5887 |.endif
5962 case BC_FUNCV: /* NYI: compiled vararg functions. */ 5888 case BC_FUNCV: /* NYI: compiled vararg functions. */
5963 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. 5889 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
5964 break; 5890 break;
@@ -6101,23 +6027,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
6101static int build_backend(BuildCtx *ctx) 6027static int build_backend(BuildCtx *ctx)
6102{ 6028{
6103 int op; 6029 int op;
6104 int cmov = 1;
6105 int sse = 0;
6106#ifdef LUAJIT_CPU_NOCMOV
6107 cmov = 0;
6108#endif
6109#if defined(LUAJIT_CPU_SSE2) || defined(LJ_TARGET_X64)
6110 sse = 1;
6111#endif
6112
6113 dasm_growpc(Dst, BC__MAX); 6030 dasm_growpc(Dst, BC__MAX);
6114 6031 build_subroutines(ctx);
6115 build_subroutines(ctx, cmov, sse);
6116
6117 |.code_op 6032 |.code_op
6118 for (op = 0; op < BC__MAX; op++) 6033 for (op = 0; op < BC__MAX; op++)
6119 build_ins(ctx, (BCOp)op, op, cmov, sse); 6034 build_ins(ctx, (BCOp)op, op);
6120
6121 return BC__MAX; 6035 return BC__MAX;
6122} 6036}
6123 6037