diff options
author | Mike Pall <mike> | 2012-06-10 16:44:33 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2012-06-10 16:50:46 +0200 |
commit | 58ec704f78e311e6af97841a9e26cd7187955494 (patch) | |
tree | c66d9aedcbd7ed7945573b571c4e2737050e31b3 /src | |
parent | e496a502b0686af25053c161752c044074edc44e (diff) | |
download | luajit-58ec704f78e311e6af97841a9e26cd7187955494.tar.gz luajit-58ec704f78e311e6af97841a9e26cd7187955494.tar.bz2 luajit-58ec704f78e311e6af97841a9e26cd7187955494.zip |
x86/x64: Clean up interpreter.
Use DynASM defines instead of C defines.
Remove support for ancient CPUs without CMOV (before Pentium Pro).
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile | 19 | ||||
-rw-r--r-- | src/msvcbuild.bat | 7 | ||||
-rw-r--r-- | src/vm_x86.dasc | 2226 |
3 files changed, 1076 insertions, 1176 deletions
diff --git a/src/Makefile b/src/Makefile index d9bb178b..9d21c3fb 100644 --- a/src/Makefile +++ b/src/Makefile | |||
@@ -42,9 +42,8 @@ CCOPT= -O2 -fomit-frame-pointer | |||
42 | # | 42 | # |
43 | # Target-specific compiler options: | 43 | # Target-specific compiler options: |
44 | # | 44 | # |
45 | # x86 only: it's recommended to compile at least for i686. By default the | 45 | # x86 only: it's recommended to compile at least for i686. Better yet, |
46 | # assembler part of the interpreter makes use of CMOV/FCOMI*/FUCOMI* | 46 | # compile for an architecture that has SSE2, too (-msse -msse2). |
47 | # instructions, anyway. | ||
48 | # | 47 | # |
49 | # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute | 48 | # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute |
50 | # the binaries to a different machine you could also use: -march=native | 49 | # the binaries to a different machine you could also use: -march=native |
@@ -105,20 +104,6 @@ XCFLAGS= | |||
105 | # Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter. | 104 | # Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter. |
106 | #XCFLAGS+= -DLUAJIT_DISABLE_JIT | 105 | #XCFLAGS+= -DLUAJIT_DISABLE_JIT |
107 | # | 106 | # |
108 | # x86 only: use SSE2 instead of x87 instructions in the interpreter | ||
109 | # (always enabled for x64). A pure interpreter built with this flag won't | ||
110 | # run on older CPUs (before P4 or K8). There isn't much of a speed | ||
111 | # difference, so this is not enabled by default. | ||
112 | # The JIT compiler is not affected by this flag. It always uses runtime | ||
113 | # CPU feature detection before emitting code for SSE2 up to SSE4.1. | ||
114 | #XCFLAGS+= -DLUAJIT_CPU_SSE2 | ||
115 | # | ||
116 | # x86 only: Disable the use of CMOV and FCOMI*/FUCOMI* instructions in the | ||
117 | # interpreter. Do this only if you intend to use REALLY ANCIENT CPUs | ||
118 | # (before Pentium Pro, or on the VIA C3). This generally slows down the | ||
119 | # interpreter. Don't bother if your OS wouldn't run on them, anyway. | ||
120 | #XCFLAGS+= -DLUAJIT_CPU_NOCMOV | ||
121 | # | ||
122 | # Some architectures (e.g. PPC) can use either single-number (1) or | 107 | # Some architectures (e.g. PPC) can use either single-number (1) or |
123 | # dual-number (2) mode. Uncomment one of these lines to override the | 108 | # dual-number (2) mode. Uncomment one of these lines to override the |
124 | # default mode. Please see LJ_ARCH_NUMMODE in lj_arch.h for details. | 109 | # default mode. Please see LJ_ARCH_NUMMODE in lj_arch.h for details. |
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat index ad6f2113..ca943a63 100644 --- a/src/msvcbuild.bat +++ b/src/msvcbuild.bat | |||
@@ -29,15 +29,16 @@ | |||
29 | if exist minilua.exe.manifest^ | 29 | if exist minilua.exe.manifest^ |
30 | %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe | 30 | %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe |
31 | 31 | ||
32 | @set DASMFLAGS=-D X64 -D WIN | 32 | @set DASMFLAGS=-D WIN -D JIT -D FFI |
33 | @set DASMX64=-D X64 | ||
33 | @if defined CPU goto :XCPU | 34 | @if defined CPU goto :XCPU |
34 | @set CPU=%PROCESSOR_ARCHITECTURE% | 35 | @set CPU=%PROCESSOR_ARCHITECTURE% |
35 | :XCPU | 36 | :XCPU |
36 | @if "%CPU%"=="AMD64" goto :X64 | 37 | @if "%CPU%"=="AMD64" goto :X64 |
37 | @if "%CPU%"=="X64" goto :X64 | 38 | @if "%CPU%"=="X64" goto :X64 |
38 | @set DASMFLAGS=-D WIN | 39 | @set DASMX64= |
39 | :X64 | 40 | :X64 |
40 | minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc | 41 | minilua %DASM% -LN %DASMFLAGS% %DASMX64% -o host\buildvm_arch.h vm_x86.dasc |
41 | @if errorlevel 1 goto :BAD | 42 | @if errorlevel 1 goto :BAD |
42 | 43 | ||
43 | %LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c | 44 | %LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c |
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 1cab76eb..38b268d4 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc | |||
@@ -50,7 +50,7 @@ | |||
50 | |.define RAH, ch | 50 | |.define RAH, ch |
51 | |.define RAL, cl | 51 | |.define RAL, cl |
52 | |.define RB, ebp // Must be ebp (C callee-save). | 52 | |.define RB, ebp // Must be ebp (C callee-save). |
53 | |.define RC, eax // Must be eax (fcomparepp and others). | 53 | |.define RC, eax // Must be eax. |
54 | |.define RCW, ax | 54 | |.define RCW, ax |
55 | |.define RCH, ah | 55 | |.define RCH, ah |
56 | |.define RCL, al | 56 | |.define RCL, al |
@@ -366,16 +366,10 @@ | |||
366 | | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st | 366 | | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st |
367 | |.endmacro | 367 | |.endmacro |
368 | | | 368 | | |
369 | |// Annoying x87 stuff: support for two compare variants. | 369 | |// x87 compares. |
370 | |.macro fcomparepp // Compare and pop st0 >< st1. | 370 | |.macro fcomparepp // Compare and pop st0 >< st1. |
371 | ||if (cmov) { | ||
372 | | fucomip st1 | 371 | | fucomip st1 |
373 | | fpop | 372 | | fpop |
374 | ||} else { | ||
375 | | fucompp | ||
376 | | fnstsw ax // eax modified! | ||
377 | | sahf | ||
378 | ||} | ||
379 | |.endmacro | 373 | |.endmacro |
380 | | | 374 | | |
381 | |.macro fdup; fld st0; .endmacro | 375 | |.macro fdup; fld st0; .endmacro |
@@ -426,7 +420,7 @@ | |||
426 | 420 | ||
427 | /* Generate subroutines used by opcodes and other parts of the VM. */ | 421 | /* Generate subroutines used by opcodes and other parts of the VM. */ |
428 | /* The .code_sub section should be last to help static branch prediction. */ | 422 | /* The .code_sub section should be last to help static branch prediction. */ |
429 | static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | 423 | static void build_subroutines(BuildCtx *ctx) |
430 | { | 424 | { |
431 | |.code_sub | 425 | |.code_sub |
432 | | | 426 | | |
@@ -776,18 +770,18 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
776 | | mov PC, [RB-12] // Restore PC from [cont|PC]. | 770 | | mov PC, [RB-12] // Restore PC from [cont|PC]. |
777 | |.if X64 | 771 | |.if X64 |
778 | | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug. | 772 | | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug. |
779 | #if LJ_HASFFI | 773 | |.if FFI |
780 | | cmp RA, 1 | 774 | | cmp RA, 1 |
781 | | jbe >1 | 775 | | jbe >1 |
782 | #endif | 776 | |.endif |
783 | | lea KBASEa, qword [=>0] | 777 | | lea KBASEa, qword [=>0] |
784 | | add RAa, KBASEa | 778 | | add RAa, KBASEa |
785 | |.else | 779 | |.else |
786 | | mov RA, dword [RB-16] | 780 | | mov RA, dword [RB-16] |
787 | #if LJ_HASFFI | 781 | |.if FFI |
788 | | cmp RA, 1 | 782 | | cmp RA, 1 |
789 | | jbe >1 | 783 | | jbe >1 |
790 | #endif | 784 | |.endif |
791 | |.endif | 785 | |.endif |
792 | | mov LFUNC:KBASE, [BASE-8] | 786 | | mov LFUNC:KBASE, [BASE-8] |
793 | | mov KBASE, LFUNC:KBASE->pc | 787 | | mov KBASE, LFUNC:KBASE->pc |
@@ -795,7 +789,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
795 | | // BASE = base, RC = result, RB = meta base | 789 | | // BASE = base, RC = result, RB = meta base |
796 | | jmp RAa // Jump to continuation. | 790 | | jmp RAa // Jump to continuation. |
797 | | | 791 | | |
798 | #if LJ_HASFFI | 792 | |.if FFI |
799 | |1: | 793 | |1: |
800 | | je ->cont_ffi_callback // cont = 1: return from FFI callback. | 794 | | je ->cont_ffi_callback // cont = 1: return from FFI callback. |
801 | | // cont = 0: Tail call from C function. | 795 | | // cont = 0: Tail call from C function. |
@@ -803,7 +797,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
803 | | shr RB, 3 | 797 | | shr RB, 3 |
804 | | lea RD, [RB-1] | 798 | | lea RD, [RB-1] |
805 | | jmp ->vm_call_tail | 799 | | jmp ->vm_call_tail |
806 | #endif | 800 | |.endif |
807 | | | 801 | | |
808 | |->cont_cat: // BASE = base, RC = result, RB = mbase | 802 | |->cont_cat: // BASE = base, RC = result, RB = mbase |
809 | | movzx RA, PC_RB | 803 | | movzx RA, PC_RB |
@@ -853,19 +847,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
853 | | | 847 | | |
854 | |->vmeta_tgetb: | 848 | |->vmeta_tgetb: |
855 | | movzx RC, PC_RC | 849 | | movzx RC, PC_RC |
856 | if (LJ_DUALNUM) { | 850 | |.if DUALNUM |
857 | | mov TMP2, LJ_TISNUM | 851 | | mov TMP2, LJ_TISNUM |
858 | | mov TMP1, RC | 852 | | mov TMP1, RC |
859 | } else if (sse) { | 853 | |.elif SSE |
860 | | cvtsi2sd xmm0, RC | 854 | | cvtsi2sd xmm0, RC |
861 | | movsd TMPQ, xmm0 | 855 | | movsd TMPQ, xmm0 |
862 | } else { | 856 | |.else |
863 | |.if not X64 | 857 | | mov ARG4, RC |
864 | | mov ARG4, RC | 858 | | fild ARG4 |
865 | | fild ARG4 | 859 | | fstp TMPQ |
866 | | fstp TMPQ | 860 | |.endif |
867 | |.endif | ||
868 | } | ||
869 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. | 861 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. |
870 | | jmp >1 | 862 | | jmp >1 |
871 | | | 863 | | |
@@ -934,19 +926,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
934 | | | 926 | | |
935 | |->vmeta_tsetb: | 927 | |->vmeta_tsetb: |
936 | | movzx RC, PC_RC | 928 | | movzx RC, PC_RC |
937 | if (LJ_DUALNUM) { | 929 | |.if DUALNUM |
938 | | mov TMP2, LJ_TISNUM | 930 | | mov TMP2, LJ_TISNUM |
939 | | mov TMP1, RC | 931 | | mov TMP1, RC |
940 | } else if (sse) { | 932 | |.elif SSE |
941 | | cvtsi2sd xmm0, RC | 933 | | cvtsi2sd xmm0, RC |
942 | | movsd TMPQ, xmm0 | 934 | | movsd TMPQ, xmm0 |
943 | } else { | 935 | |.else |
944 | |.if not X64 | 936 | | mov ARG4, RC |
945 | | mov ARG4, RC | 937 | | fild ARG4 |
946 | | fild ARG4 | 938 | | fstp TMPQ |
947 | | fstp TMPQ | 939 | |.endif |
948 | |.endif | ||
949 | } | ||
950 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. | 940 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. |
951 | | jmp >1 | 941 | | jmp >1 |
952 | | | 942 | | |
@@ -1093,7 +1083,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1093 | | jmp <3 | 1083 | | jmp <3 |
1094 | | | 1084 | | |
1095 | |->vmeta_equal_cd: | 1085 | |->vmeta_equal_cd: |
1096 | #if LJ_HASFFI | 1086 | |.if FFI |
1097 | | sub PC, 4 | 1087 | | sub PC, 4 |
1098 | | mov L:RB, SAVE_L | 1088 | | mov L:RB, SAVE_L |
1099 | | mov L:RB->base, BASE | 1089 | | mov L:RB->base, BASE |
@@ -1103,22 +1093,22 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1103 | | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins) | 1093 | | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins) |
1104 | | // 0/1 or TValue * (metamethod) returned in eax (RC). | 1094 | | // 0/1 or TValue * (metamethod) returned in eax (RC). |
1105 | | jmp <3 | 1095 | | jmp <3 |
1106 | #endif | 1096 | |.endif |
1107 | | | 1097 | | |
1108 | |//-- Arithmetic metamethods --------------------------------------------- | 1098 | |//-- Arithmetic metamethods --------------------------------------------- |
1109 | | | 1099 | | |
1110 | |->vmeta_arith_vno: | 1100 | |->vmeta_arith_vno: |
1111 | #if LJ_DUALNUM | 1101 | |.if DUALNUM |
1112 | | movzx RB, PC_RB | 1102 | | movzx RB, PC_RB |
1113 | #endif | 1103 | |.endif |
1114 | |->vmeta_arith_vn: | 1104 | |->vmeta_arith_vn: |
1115 | | lea RC, [KBASE+RC*8] | 1105 | | lea RC, [KBASE+RC*8] |
1116 | | jmp >1 | 1106 | | jmp >1 |
1117 | | | 1107 | | |
1118 | |->vmeta_arith_nvo: | 1108 | |->vmeta_arith_nvo: |
1119 | #if LJ_DUALNUM | 1109 | |.if DUALNUM |
1120 | | movzx RC, PC_RC | 1110 | | movzx RC, PC_RC |
1121 | #endif | 1111 | |.endif |
1122 | |->vmeta_arith_nv: | 1112 | |->vmeta_arith_nv: |
1123 | | lea RC, [KBASE+RC*8] | 1113 | | lea RC, [KBASE+RC*8] |
1124 | | lea RB, [BASE+RB*8] | 1114 | | lea RB, [BASE+RB*8] |
@@ -1131,9 +1121,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1131 | | jmp >2 | 1121 | | jmp >2 |
1132 | | | 1122 | | |
1133 | |->vmeta_arith_vvo: | 1123 | |->vmeta_arith_vvo: |
1134 | #if LJ_DUALNUM | 1124 | |.if DUALNUM |
1135 | | movzx RB, PC_RB | 1125 | | movzx RB, PC_RB |
1136 | #endif | 1126 | |.endif |
1137 | |->vmeta_arith_vv: | 1127 | |->vmeta_arith_vv: |
1138 | | lea RC, [BASE+RC*8] | 1128 | | lea RC, [BASE+RC*8] |
1139 | |1: | 1129 | |1: |
@@ -1374,11 +1364,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1374 | | mov RC, ~LJ_TNUMX | 1364 | | mov RC, ~LJ_TNUMX |
1375 | | not RB | 1365 | | not RB |
1376 | | cmp RC, RB | 1366 | | cmp RC, RB |
1377 | ||if (cmov) { | ||
1378 | | cmova RC, RB | 1367 | | cmova RC, RB |
1379 | ||} else { | ||
1380 | | jbe >1; mov RC, RB; 1: | ||
1381 | ||} | ||
1382 | |2: | 1368 | |2: |
1383 | | mov CFUNC:RB, [BASE-8] | 1369 | | mov CFUNC:RB, [BASE-8] |
1384 | | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] | 1370 | | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] |
@@ -1509,19 +1495,19 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1509 | | // Only handles the number case inline (without a base argument). | 1495 | | // Only handles the number case inline (without a base argument). |
1510 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. | 1496 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. |
1511 | | cmp dword [BASE+4], LJ_TISNUM | 1497 | | cmp dword [BASE+4], LJ_TISNUM |
1512 | if (LJ_DUALNUM) { | 1498 | |.if DUALNUM |
1513 | | jne >1 | 1499 | | jne >1 |
1514 | | mov RB, dword [BASE]; jmp ->fff_resi | 1500 | | mov RB, dword [BASE]; jmp ->fff_resi |
1515 | |1: | 1501 | |1: |
1516 | | ja ->fff_fallback | 1502 | | ja ->fff_fallback |
1517 | } else { | 1503 | |.else |
1518 | | jae ->fff_fallback | 1504 | | jae ->fff_fallback |
1519 | } | 1505 | |.endif |
1520 | if (sse) { | 1506 | |.if SSE |
1521 | | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 | 1507 | | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 |
1522 | } else { | 1508 | |.else |
1523 | | fld qword [BASE]; jmp ->fff_resn | 1509 | | fld qword [BASE]; jmp ->fff_resn |
1524 | } | 1510 | |.endif |
1525 | | | 1511 | | |
1526 | |.ffunc_1 tostring | 1512 | |.ffunc_1 tostring |
1527 | | // Only handles the string or number case inline. | 1513 | | // Only handles the string or number case inline. |
@@ -1545,11 +1531,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1545 | | mov FCARG2, BASE // Otherwise: FCARG2 == BASE | 1531 | | mov FCARG2, BASE // Otherwise: FCARG2 == BASE |
1546 | |.endif | 1532 | |.endif |
1547 | | mov L:FCARG1, L:RB | 1533 | | mov L:FCARG1, L:RB |
1548 | if (LJ_DUALNUM) { | 1534 | |.if DUALNUM |
1549 | | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) | 1535 | | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) |
1550 | } else { | 1536 | |.else |
1551 | | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) | 1537 | | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) |
1552 | } | 1538 | |.endif |
1553 | | // GCstr returned in eax (RD). | 1539 | | // GCstr returned in eax (RD). |
1554 | | mov BASE, L:RB->base | 1540 | | mov BASE, L:RB->base |
1555 | | jmp <2 | 1541 | | jmp <2 |
@@ -1628,33 +1614,31 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1628 | |.ffunc_1 ipairs_aux | 1614 | |.ffunc_1 ipairs_aux |
1629 | | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback | 1615 | | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback |
1630 | | cmp dword [BASE+12], LJ_TISNUM | 1616 | | cmp dword [BASE+12], LJ_TISNUM |
1631 | if (LJ_DUALNUM) { | 1617 | |.if DUALNUM |
1632 | | jne ->fff_fallback | 1618 | | jne ->fff_fallback |
1633 | } else { | 1619 | |.else |
1634 | | jae ->fff_fallback | 1620 | | jae ->fff_fallback |
1635 | } | 1621 | |.endif |
1636 | | mov PC, [BASE-4] | 1622 | | mov PC, [BASE-4] |
1637 | if (LJ_DUALNUM) { | 1623 | |.if DUALNUM |
1638 | | mov RD, dword [BASE+8] | 1624 | | mov RD, dword [BASE+8] |
1639 | | add RD, 1 | 1625 | | add RD, 1 |
1640 | | mov dword [BASE-4], LJ_TISNUM | 1626 | | mov dword [BASE-4], LJ_TISNUM |
1641 | | mov dword [BASE-8], RD | 1627 | | mov dword [BASE-8], RD |
1642 | } else if (sse) { | 1628 | |.elif SSE |
1643 | | movsd xmm0, qword [BASE+8] | 1629 | | movsd xmm0, qword [BASE+8] |
1644 | | sseconst_1 xmm1, RBa | 1630 | | sseconst_1 xmm1, RBa |
1645 | | addsd xmm0, xmm1 | 1631 | | addsd xmm0, xmm1 |
1646 | | cvtsd2si RD, xmm0 | 1632 | | cvtsd2si RD, xmm0 |
1647 | | movsd qword [BASE-8], xmm0 | 1633 | | movsd qword [BASE-8], xmm0 |
1648 | } else { | 1634 | |.else |
1649 | |.if not X64 | 1635 | | fld qword [BASE+8] |
1650 | | fld qword [BASE+8] | 1636 | | fld1 |
1651 | | fld1 | 1637 | | faddp st1 |
1652 | | faddp st1 | 1638 | | fist ARG1 |
1653 | | fist ARG1 | 1639 | | fstp qword [BASE-8] |
1654 | | fstp qword [BASE-8] | 1640 | | mov RD, ARG1 |
1655 | | mov RD, ARG1 | 1641 | |.endif |
1656 | |.endif | ||
1657 | } | ||
1658 | | mov TAB:RB, [BASE] | 1642 | | mov TAB:RB, [BASE] |
1659 | | cmp RD, TAB:RB->asize; jae >2 // Not in array part? | 1643 | | cmp RD, TAB:RB->asize; jae >2 // Not in array part? |
1660 | | shl RD, 3 | 1644 | | shl RD, 3 |
@@ -1697,16 +1681,16 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1697 | | mov PC, [BASE-4] | 1681 | | mov PC, [BASE-4] |
1698 | | mov dword [BASE-4], LJ_TFUNC | 1682 | | mov dword [BASE-4], LJ_TFUNC |
1699 | | mov [BASE-8], CFUNC:RD | 1683 | | mov [BASE-8], CFUNC:RD |
1700 | if (LJ_DUALNUM) { | 1684 | |.if DUALNUM |
1701 | | mov dword [BASE+12], LJ_TISNUM | 1685 | | mov dword [BASE+12], LJ_TISNUM |
1702 | | mov dword [BASE+8], 0 | 1686 | | mov dword [BASE+8], 0 |
1703 | } else if (sse) { | 1687 | |.elif SSE |
1704 | | xorps xmm0, xmm0 | 1688 | | xorps xmm0, xmm0 |
1705 | | movsd qword [BASE+8], xmm0 | 1689 | | movsd qword [BASE+8], xmm0 |
1706 | } else { | 1690 | |.else |
1707 | | fldz | 1691 | | fldz |
1708 | | fstp qword [BASE+8] | 1692 | | fstp qword [BASE+8] |
1709 | } | 1693 | |.endif |
1710 | | mov RD, 1+3 | 1694 | | mov RD, 1+3 |
1711 | | jmp ->fff_res | 1695 | | jmp ->fff_res |
1712 | | | 1696 | | |
@@ -1931,54 +1915,58 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1931 | | | 1915 | | |
1932 | |//-- Math library ------------------------------------------------------- | 1916 | |//-- Math library ------------------------------------------------------- |
1933 | | | 1917 | | |
1934 | if (!LJ_DUALNUM) { | 1918 | |.if not DUALNUM |
1935 | |->fff_resi: // Dummy. | 1919 | |->fff_resi: // Dummy. |
1936 | } | 1920 | |.endif |
1937 | if (sse) { | 1921 | | |
1938 | |->fff_resn: | 1922 | |.if SSE |
1939 | | mov PC, [BASE-4] | 1923 | |->fff_resn: |
1940 | | fstp qword [BASE-8] | 1924 | | mov PC, [BASE-4] |
1941 | | jmp ->fff_res1 | 1925 | | fstp qword [BASE-8] |
1942 | } | 1926 | | jmp ->fff_res1 |
1927 | |.endif | ||
1928 | | | ||
1943 | | .ffunc_1 math_abs | 1929 | | .ffunc_1 math_abs |
1944 | if (LJ_DUALNUM) { | 1930 | |.if DUALNUM |
1945 | | cmp dword [BASE+4], LJ_TISNUM; jne >2 | 1931 | | cmp dword [BASE+4], LJ_TISNUM; jne >2 |
1946 | | mov RB, dword [BASE] | 1932 | | mov RB, dword [BASE] |
1947 | | cmp RB, 0; jns ->fff_resi | 1933 | | cmp RB, 0; jns ->fff_resi |
1948 | | neg RB; js >1 | 1934 | | neg RB; js >1 |
1949 | |->fff_resbit: | 1935 | |->fff_resbit: |
1950 | |->fff_resi: | 1936 | |->fff_resi: |
1951 | | mov PC, [BASE-4] | 1937 | | mov PC, [BASE-4] |
1952 | | mov dword [BASE-4], LJ_TISNUM | 1938 | | mov dword [BASE-4], LJ_TISNUM |
1953 | | mov dword [BASE-8], RB | 1939 | | mov dword [BASE-8], RB |
1954 | | jmp ->fff_res1 | 1940 | | jmp ->fff_res1 |
1955 | |1: | 1941 | |1: |
1956 | | mov PC, [BASE-4] | 1942 | | mov PC, [BASE-4] |
1957 | | mov dword [BASE-4], 0x41e00000 // 2^31. | 1943 | | mov dword [BASE-4], 0x41e00000 // 2^31. |
1958 | | mov dword [BASE-8], 0 | 1944 | | mov dword [BASE-8], 0 |
1959 | | jmp ->fff_res1 | 1945 | | jmp ->fff_res1 |
1960 | |2: | 1946 | |2: |
1961 | | ja ->fff_fallback | 1947 | | ja ->fff_fallback |
1962 | } else { | 1948 | |.else |
1963 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 1949 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
1964 | } | 1950 | |.endif |
1965 | if (sse) { | 1951 | | |
1966 | | movsd xmm0, qword [BASE] | 1952 | |.if SSE |
1967 | | sseconst_abs xmm1, RDa | 1953 | | movsd xmm0, qword [BASE] |
1968 | | andps xmm0, xmm1 | 1954 | | sseconst_abs xmm1, RDa |
1969 | |->fff_resxmm0: | 1955 | | andps xmm0, xmm1 |
1970 | | mov PC, [BASE-4] | 1956 | |->fff_resxmm0: |
1971 | | movsd qword [BASE-8], xmm0 | 1957 | | mov PC, [BASE-4] |
1972 | | // fallthrough | 1958 | | movsd qword [BASE-8], xmm0 |
1973 | } else { | 1959 | | // fallthrough |
1974 | | fld qword [BASE] | 1960 | |.else |
1975 | | fabs | 1961 | | fld qword [BASE] |
1976 | | // fallthrough | 1962 | | fabs |
1977 | |->fff_resxmm0: // Dummy. | 1963 | | // fallthrough |
1978 | |->fff_resn: | 1964 | |->fff_resxmm0: // Dummy. |
1979 | | mov PC, [BASE-4] | 1965 | |->fff_resn: |
1980 | | fstp qword [BASE-8] | 1966 | | mov PC, [BASE-4] |
1981 | } | 1967 | | fstp qword [BASE-8] |
1968 | |.endif | ||
1969 | | | ||
1982 | |->fff_res1: | 1970 | |->fff_res1: |
1983 | | mov RD, 1+1 | 1971 | | mov RD, 1+1 |
1984 | |->fff_res: | 1972 | |->fff_res: |
@@ -2006,18 +1994,18 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2006 | | | 1994 | | |
2007 | |.macro math_round, func | 1995 | |.macro math_round, func |
2008 | | .ffunc math_ .. func | 1996 | | .ffunc math_ .. func |
2009 | ||if (LJ_DUALNUM) { | 1997 | |.if DUALNUM |
2010 | | cmp dword [BASE+4], LJ_TISNUM; jne >1 | 1998 | | cmp dword [BASE+4], LJ_TISNUM; jne >1 |
2011 | | mov RB, dword [BASE]; jmp ->fff_resi | 1999 | | mov RB, dword [BASE]; jmp ->fff_resi |
2012 | |1: | 2000 | |1: |
2013 | | ja ->fff_fallback | 2001 | | ja ->fff_fallback |
2014 | ||} else { | 2002 | |.else |
2015 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 2003 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
2016 | ||} | 2004 | |.endif |
2017 | ||if (sse) { | 2005 | |.if SSE |
2018 | | movsd xmm0, qword [BASE] | 2006 | | movsd xmm0, qword [BASE] |
2019 | | call ->vm_ .. func | 2007 | | call ->vm_ .. func |
2020 | || if (LJ_DUALNUM) { | 2008 | | .if DUALNUM |
2021 | | cvtsd2si RB, xmm0 | 2009 | | cvtsd2si RB, xmm0 |
2022 | | cmp RB, 0x80000000 | 2010 | | cmp RB, 0x80000000 |
2023 | | jne ->fff_resi | 2011 | | jne ->fff_resi |
@@ -2025,13 +2013,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2025 | | ucomisd xmm0, xmm1 | 2013 | | ucomisd xmm0, xmm1 |
2026 | | jp ->fff_resxmm0 | 2014 | | jp ->fff_resxmm0 |
2027 | | je ->fff_resi | 2015 | | je ->fff_resi |
2028 | || } | 2016 | | .endif |
2029 | | jmp ->fff_resxmm0 | 2017 | | jmp ->fff_resxmm0 |
2030 | ||} else { | 2018 | |.else |
2031 | | fld qword [BASE] | 2019 | | fld qword [BASE] |
2032 | | call ->vm_ .. func | 2020 | | call ->vm_ .. func |
2033 | || if (LJ_DUALNUM) { | 2021 | | .if DUALNUM |
2034 | |.if not X64 | ||
2035 | | fist ARG1 | 2022 | | fist ARG1 |
2036 | | mov RB, ARG1 | 2023 | | mov RB, ARG1 |
2037 | | cmp RB, 0x80000000; jne >2 | 2024 | | cmp RB, 0x80000000; jne >2 |
@@ -2043,21 +2030,20 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2043 | |2: | 2030 | |2: |
2044 | | fpop | 2031 | | fpop |
2045 | | jmp ->fff_resi | 2032 | | jmp ->fff_resi |
2046 | |.endif | 2033 | | .else |
2047 | || } else { | ||
2048 | | jmp ->fff_resn | 2034 | | jmp ->fff_resn |
2049 | || } | 2035 | | .endif |
2050 | ||} | 2036 | |.endif |
2051 | |.endmacro | 2037 | |.endmacro |
2052 | | | 2038 | | |
2053 | | math_round floor | 2039 | | math_round floor |
2054 | | math_round ceil | 2040 | | math_round ceil |
2055 | | | 2041 | | |
2056 | if (sse) { | 2042 | |.if SSE |
2057 | |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 | 2043 | |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 |
2058 | } else { | 2044 | |.else |
2059 | |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn | 2045 | |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn |
2060 | } | 2046 | |.endif |
2061 | |.ffunc_n math_log, fldln2; fyl2x; jmp ->fff_resn | 2047 | |.ffunc_n math_log, fldln2; fyl2x; jmp ->fff_resn |
2062 | |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn | 2048 | |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn |
2063 | |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn | 2049 | |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn |
@@ -2075,17 +2061,15 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2075 | |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn | 2061 | |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn |
2076 | | | 2062 | | |
2077 | |.macro math_extern, func | 2063 | |.macro math_extern, func |
2078 | ||if (sse) { | 2064 | |.if SSE |
2079 | | .ffunc_nsse math_ .. func | 2065 | | .ffunc_nsse math_ .. func |
2080 | | .if not X64 | 2066 | | .if not X64 |
2081 | | movsd FPARG1, xmm0 | 2067 | | movsd FPARG1, xmm0 |
2082 | | .endif | 2068 | | .endif |
2083 | ||} else { | 2069 | |.else |
2084 | | .if not X64 | 2070 | | .ffunc_n math_ .. func |
2085 | | .ffunc_n math_ .. func | 2071 | | fstp FPARG1 |
2086 | | fstp FPARG1 | 2072 | |.endif |
2087 | | .endif | ||
2088 | ||} | ||
2089 | | mov RB, BASE | 2073 | | mov RB, BASE |
2090 | | call extern lj_vm_ .. func | 2074 | | call extern lj_vm_ .. func |
2091 | | mov BASE, RB | 2075 | | mov BASE, RB |
@@ -2101,17 +2085,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2101 | | math_extern tanh | 2085 | | math_extern tanh |
2102 | | | 2086 | | |
2103 | |->ff_math_deg: | 2087 | |->ff_math_deg: |
2104 | if (sse) { | 2088 | |.if SSE |
2105 | |.ffunc_nsse math_rad | 2089 | |.ffunc_nsse math_rad |
2106 | | mov CFUNC:RB, [BASE-8] | 2090 | | mov CFUNC:RB, [BASE-8] |
2107 | | mulsd xmm0, qword CFUNC:RB->upvalue[0] | 2091 | | mulsd xmm0, qword CFUNC:RB->upvalue[0] |
2108 | | jmp ->fff_resxmm0 | 2092 | | jmp ->fff_resxmm0 |
2109 | } else { | 2093 | |.else |
2110 | |.ffunc_n math_rad | 2094 | |.ffunc_n math_rad |
2111 | | mov CFUNC:RB, [BASE-8] | 2095 | | mov CFUNC:RB, [BASE-8] |
2112 | | fmul qword CFUNC:RB->upvalue[0] | 2096 | | fmul qword CFUNC:RB->upvalue[0] |
2113 | | jmp ->fff_resn | 2097 | | jmp ->fff_resn |
2114 | } | 2098 | |.endif |
2115 | | | 2099 | | |
2116 | |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn | 2100 | |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn |
2117 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn | 2101 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn |
@@ -2128,65 +2112,65 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2128 | | cmp RB, 0x00200000; jb >4 | 2112 | | cmp RB, 0x00200000; jb >4 |
2129 | |1: | 2113 | |1: |
2130 | | shr RB, 21; sub RB, RC // Extract and unbias exponent. | 2114 | | shr RB, 21; sub RB, RC // Extract and unbias exponent. |
2131 | if (sse) { | 2115 | |.if SSE |
2132 | | cvtsi2sd xmm0, RB | 2116 | | cvtsi2sd xmm0, RB |
2133 | } else { | 2117 | |.else |
2134 | | mov TMP1, RB; fild TMP1 | 2118 | | mov TMP1, RB; fild TMP1 |
2135 | } | 2119 | |.endif |
2136 | | mov RB, [BASE-4] | 2120 | | mov RB, [BASE-4] |
2137 | | and RB, 0x800fffff // Mask off exponent. | 2121 | | and RB, 0x800fffff // Mask off exponent. |
2138 | | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. | 2122 | | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. |
2139 | | mov [BASE-4], RB | 2123 | | mov [BASE-4], RB |
2140 | |2: | 2124 | |2: |
2141 | if (sse) { | 2125 | |.if SSE |
2142 | | movsd qword [BASE], xmm0 | 2126 | | movsd qword [BASE], xmm0 |
2143 | } else { | 2127 | |.else |
2144 | | fstp qword [BASE] | 2128 | | fstp qword [BASE] |
2145 | } | 2129 | |.endif |
2146 | | mov RD, 1+2 | 2130 | | mov RD, 1+2 |
2147 | | jmp ->fff_res | 2131 | | jmp ->fff_res |
2148 | |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. | 2132 | |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. |
2149 | if (sse) { | 2133 | |.if SSE |
2150 | | xorps xmm0, xmm0; jmp <2 | 2134 | | xorps xmm0, xmm0; jmp <2 |
2151 | } else { | 2135 | |.else |
2152 | | fldz; jmp <2 | 2136 | | fldz; jmp <2 |
2153 | } | 2137 | |.endif |
2154 | |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. | 2138 | |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. |
2155 | if (sse) { | 2139 | |.if SSE |
2156 | | movsd xmm0, qword [BASE] | 2140 | | movsd xmm0, qword [BASE] |
2157 | | sseconst_hi xmm1, RBa, 43500000 // 2^54. | 2141 | | sseconst_hi xmm1, RBa, 43500000 // 2^54. |
2158 | | mulsd xmm0, xmm1 | 2142 | | mulsd xmm0, xmm1 |
2159 | | movsd qword [BASE-8], xmm0 | 2143 | | movsd qword [BASE-8], xmm0 |
2160 | } else { | 2144 | |.else |
2161 | | fld qword [BASE] | 2145 | | fld qword [BASE] |
2162 | | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54 | 2146 | | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54 |
2163 | | fstp qword [BASE-8] | 2147 | | fstp qword [BASE-8] |
2164 | } | 2148 | |.endif |
2165 | | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 | 2149 | | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 |
2166 | | | 2150 | | |
2167 | if (sse) { | 2151 | |.if SSE |
2168 | |.ffunc_nsse math_modf | 2152 | |.ffunc_nsse math_modf |
2169 | } else { | 2153 | |.else |
2170 | |.ffunc_n math_modf | 2154 | |.ffunc_n math_modf |
2171 | } | 2155 | |.endif |
2172 | | mov RB, [BASE+4] | 2156 | | mov RB, [BASE+4] |
2173 | | mov PC, [BASE-4] | 2157 | | mov PC, [BASE-4] |
2174 | | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? | 2158 | | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? |
2175 | if (sse) { | 2159 | |.if SSE |
2176 | | movaps xmm4, xmm0 | 2160 | | movaps xmm4, xmm0 |
2177 | | call ->vm_trunc | 2161 | | call ->vm_trunc |
2178 | | subsd xmm4, xmm0 | 2162 | | subsd xmm4, xmm0 |
2179 | |1: | 2163 | |1: |
2180 | | movsd qword [BASE-8], xmm0 | 2164 | | movsd qword [BASE-8], xmm0 |
2181 | | movsd qword [BASE], xmm4 | 2165 | | movsd qword [BASE], xmm4 |
2182 | } else { | 2166 | |.else |
2183 | | fdup | 2167 | | fdup |
2184 | | call ->vm_trunc | 2168 | | call ->vm_trunc |
2185 | | fsub st1, st0 | 2169 | | fsub st1, st0 |
2186 | |1: | 2170 | |1: |
2187 | | fstp qword [BASE-8] | 2171 | | fstp qword [BASE-8] |
2188 | | fstp qword [BASE] | 2172 | | fstp qword [BASE] |
2189 | } | 2173 | |.endif |
2190 | | mov RC, [BASE-4]; mov RB, [BASE+4] | 2174 | | mov RC, [BASE-4]; mov RB, [BASE+4] |
2191 | | xor RC, RB; js >3 // Need to adjust sign? | 2175 | | xor RC, RB; js >3 // Need to adjust sign? |
2192 | |2: | 2176 | |2: |
@@ -2196,28 +2180,28 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2196 | | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. | 2180 | | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. |
2197 | | jmp <2 | 2181 | | jmp <2 |
2198 | |4: | 2182 | |4: |
2199 | if (sse) { | 2183 | |.if SSE |
2200 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. | 2184 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. |
2201 | } else { | 2185 | |.else |
2202 | | fldz; fxch; jmp <1 // Return +-Inf and +-0. | 2186 | | fldz; fxch; jmp <1 // Return +-Inf and +-0. |
2203 | } | 2187 | |.endif |
2204 | | | 2188 | | |
2205 | |.ffunc_nnr math_fmod | 2189 | |.ffunc_nnr math_fmod |
2206 | |1: ; fprem; fnstsw ax; sahf; jp <1 | 2190 | |1: ; fprem; fnstsw ax; sahf; jp <1 |
2207 | | fpop1 | 2191 | | fpop1 |
2208 | | jmp ->fff_resn | 2192 | | jmp ->fff_resn |
2209 | | | 2193 | | |
2210 | if (sse) { | 2194 | |.if SSE |
2211 | |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0 | 2195 | |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0 |
2212 | } else { | 2196 | |.else |
2213 | |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn | 2197 | |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn |
2214 | } | 2198 | |.endif |
2215 | | | 2199 | | |
2216 | |.macro math_minmax, name, cmovop, fcmovop, nofcmovop, sseop | 2200 | |.macro math_minmax, name, cmovop, fcmovop, sseop |
2217 | | .ffunc name | 2201 | | .ffunc name |
2218 | | mov RA, 2 | 2202 | | mov RA, 2 |
2219 | | cmp dword [BASE+4], LJ_TISNUM | 2203 | | cmp dword [BASE+4], LJ_TISNUM |
2220 | ||if (LJ_DUALNUM) { | 2204 | |.if DUALNUM |
2221 | | jne >4 | 2205 | | jne >4 |
2222 | | mov RB, dword [BASE] | 2206 | | mov RB, dword [BASE] |
2223 | |1: // Handle integers. | 2207 | |1: // Handle integers. |
@@ -2230,89 +2214,79 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2230 | |3: | 2214 | |3: |
2231 | | ja ->fff_fallback | 2215 | | ja ->fff_fallback |
2232 | | // Convert intermediate result to number and continue below. | 2216 | | // Convert intermediate result to number and continue below. |
2233 | ||if (sse) { | 2217 | |.if SSE |
2234 | | cvtsi2sd xmm0, RB | 2218 | | cvtsi2sd xmm0, RB |
2235 | ||} else { | 2219 | |.else |
2236 | |.if not X64 | 2220 | | mov TMP1, RB |
2237 | | mov TMP1, RB | 2221 | | fild TMP1 |
2238 | | fild TMP1 | ||
2239 | |.endif | 2222 | |.endif |
2240 | ||} | ||
2241 | | jmp >6 | 2223 | | jmp >6 |
2242 | |4: | 2224 | |4: |
2243 | | ja ->fff_fallback | 2225 | | ja ->fff_fallback |
2244 | ||} else { | 2226 | |.else |
2245 | | jae ->fff_fallback | 2227 | | jae ->fff_fallback |
2246 | ||} | 2228 | |.endif |
2247 | | | 2229 | | |
2248 | ||if (sse) { | 2230 | |.if SSE |
2249 | | movsd xmm0, qword [BASE] | 2231 | | movsd xmm0, qword [BASE] |
2250 | |5: // Handle numbers or integers. | 2232 | |5: // Handle numbers or integers. |
2251 | | cmp RA, RD; jae ->fff_resxmm0 | 2233 | | cmp RA, RD; jae ->fff_resxmm0 |
2252 | | cmp dword [BASE+RA*8-4], LJ_TISNUM | 2234 | | cmp dword [BASE+RA*8-4], LJ_TISNUM |
2253 | ||if (LJ_DUALNUM) { | 2235 | |.if DUALNUM |
2254 | | jb >6 | 2236 | | jb >6 |
2255 | | ja ->fff_fallback | 2237 | | ja ->fff_fallback |
2256 | | cvtsi2sd xmm1, dword [BASE+RA*8-8] | 2238 | | cvtsi2sd xmm1, dword [BASE+RA*8-8] |
2257 | | jmp >7 | 2239 | | jmp >7 |
2258 | ||} else { | 2240 | |.else |
2259 | | jae ->fff_fallback | 2241 | | jae ->fff_fallback |
2260 | ||} | 2242 | |.endif |
2261 | |6: | 2243 | |6: |
2262 | | movsd xmm1, qword [BASE+RA*8-8] | 2244 | | movsd xmm1, qword [BASE+RA*8-8] |
2263 | |7: | 2245 | |7: |
2264 | | sseop xmm0, xmm1 | 2246 | | sseop xmm0, xmm1 |
2265 | | add RA, 1 | 2247 | | add RA, 1 |
2266 | | jmp <5 | 2248 | | jmp <5 |
2267 | ||} else { | 2249 | |.else |
2268 | |.if not X64 | ||
2269 | | fld qword [BASE] | 2250 | | fld qword [BASE] |
2270 | |5: // Handle numbers or integers. | 2251 | |5: // Handle numbers or integers. |
2271 | | cmp RA, RD; jae ->fff_resn | 2252 | | cmp RA, RD; jae ->fff_resn |
2272 | | cmp dword [BASE+RA*8-4], LJ_TISNUM | 2253 | | cmp dword [BASE+RA*8-4], LJ_TISNUM |
2273 | ||if (LJ_DUALNUM) { | 2254 | |.if DUALNUM |
2274 | | jb >6 | 2255 | | jb >6 |
2275 | | ja >9 | 2256 | | ja >9 |
2276 | | fild dword [BASE+RA*8-8] | 2257 | | fild dword [BASE+RA*8-8] |
2277 | | jmp >7 | 2258 | | jmp >7 |
2278 | ||} else { | 2259 | |.else |
2279 | | jae >9 | 2260 | | jae >9 |
2280 | ||} | 2261 | |.endif |
2281 | |6: | 2262 | |6: |
2282 | | fld qword [BASE+RA*8-8] | 2263 | | fld qword [BASE+RA*8-8] |
2283 | |7: | 2264 | |7: |
2284 | ||if (cmov) { | ||
2285 | | fucomi st1; fcmovop st1; fpop1 | 2265 | | fucomi st1; fcmovop st1; fpop1 |
2286 | ||} else { | ||
2287 | | push eax | ||
2288 | | fucom st1; fnstsw ax; test ah, 1; nofcmovop >2; fxch; 2: ; fpop | ||
2289 | | pop eax | ||
2290 | ||} | ||
2291 | | add RA, 1 | 2266 | | add RA, 1 |
2292 | | jmp <5 | 2267 | | jmp <5 |
2293 | |.endif | 2268 | |.endif |
2294 | ||} | ||
2295 | |.endmacro | 2269 | |.endmacro |
2296 | | | 2270 | | |
2297 | | math_minmax math_min, cmovg, fcmovnbe, jz, minsd | 2271 | | math_minmax math_min, cmovg, fcmovnbe, minsd |
2298 | | math_minmax math_max, cmovl, fcmovbe, jnz, maxsd | 2272 | | math_minmax math_max, cmovl, fcmovbe, maxsd |
2299 | if (!sse) { | 2273 | |.if not SSE |
2300 | |9: | 2274 | |9: |
2301 | | fpop; jmp ->fff_fallback | 2275 | | fpop; jmp ->fff_fallback |
2302 | } | 2276 | |.endif |
2303 | | | 2277 | | |
2304 | |//-- String library ----------------------------------------------------- | 2278 | |//-- String library ----------------------------------------------------- |
2305 | | | 2279 | | |
2306 | |.ffunc_1 string_len | 2280 | |.ffunc_1 string_len |
2307 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2281 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
2308 | | mov STR:RB, [BASE] | 2282 | | mov STR:RB, [BASE] |
2309 | if (LJ_DUALNUM) { | 2283 | |.if DUALNUM |
2310 | | mov RB, dword STR:RB->len; jmp ->fff_resi | 2284 | | mov RB, dword STR:RB->len; jmp ->fff_resi |
2311 | } else if (sse) { | 2285 | |.elif SSE |
2312 | | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0 | 2286 | | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0 |
2313 | } else { | 2287 | |.else |
2314 | | fild dword STR:RB->len; jmp ->fff_resn | 2288 | | fild dword STR:RB->len; jmp ->fff_resn |
2315 | } | 2289 | |.endif |
2316 | | | 2290 | | |
2317 | |.ffunc string_byte // Only handle the 1-arg case here. | 2291 | |.ffunc string_byte // Only handle the 1-arg case here. |
2318 | | cmp NARGS:RD, 1+1; jne ->fff_fallback | 2292 | | cmp NARGS:RD, 1+1; jne ->fff_fallback |
@@ -2322,34 +2296,34 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2322 | | cmp dword STR:RB->len, 1 | 2296 | | cmp dword STR:RB->len, 1 |
2323 | | jb ->fff_res0 // Return no results for empty string. | 2297 | | jb ->fff_res0 // Return no results for empty string. |
2324 | | movzx RB, byte STR:RB[1] | 2298 | | movzx RB, byte STR:RB[1] |
2325 | if (LJ_DUALNUM) { | 2299 | |.if DUALNUM |
2326 | | jmp ->fff_resi | 2300 | | jmp ->fff_resi |
2327 | } else if (sse) { | 2301 | |.elif SSE |
2328 | | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 | 2302 | | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 |
2329 | } else { | 2303 | |.else |
2330 | | mov TMP1, RB; fild TMP1; jmp ->fff_resn | 2304 | | mov TMP1, RB; fild TMP1; jmp ->fff_resn |
2331 | } | 2305 | |.endif |
2332 | | | 2306 | | |
2333 | |.ffunc string_char // Only handle the 1-arg case here. | 2307 | |.ffunc string_char // Only handle the 1-arg case here. |
2334 | | ffgccheck | 2308 | | ffgccheck |
2335 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg. | 2309 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg. |
2336 | | cmp dword [BASE+4], LJ_TISNUM | 2310 | | cmp dword [BASE+4], LJ_TISNUM |
2337 | if (LJ_DUALNUM) { | 2311 | |.if DUALNUM |
2338 | | jne ->fff_fallback | 2312 | | jne ->fff_fallback |
2339 | | mov RB, dword [BASE] | 2313 | | mov RB, dword [BASE] |
2340 | | cmp RB, 255; ja ->fff_fallback | 2314 | | cmp RB, 255; ja ->fff_fallback |
2341 | | mov TMP2, RB | 2315 | | mov TMP2, RB |
2342 | } else if (sse) { | 2316 | |.elif SSE |
2343 | | jae ->fff_fallback | 2317 | | jae ->fff_fallback |
2344 | | cvttsd2si RB, qword [BASE] | 2318 | | cvttsd2si RB, qword [BASE] |
2345 | | cmp RB, 255; ja ->fff_fallback | 2319 | | cmp RB, 255; ja ->fff_fallback |
2346 | | mov TMP2, RB | 2320 | | mov TMP2, RB |
2347 | } else { | 2321 | |.else |
2348 | | jae ->fff_fallback | 2322 | | jae ->fff_fallback |
2349 | | fld qword [BASE] | 2323 | | fld qword [BASE] |
2350 | | fistp TMP2 | 2324 | | fistp TMP2 |
2351 | | cmp TMP2, 255; ja ->fff_fallback | 2325 | | cmp TMP2, 255; ja ->fff_fallback |
2352 | } | 2326 | |.endif |
2353 | |.if X64 | 2327 | |.if X64 |
2354 | | mov TMP3, 1 | 2328 | | mov TMP3, 1 |
2355 | |.else | 2329 | |.else |
@@ -2382,41 +2356,39 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2382 | | cmp NARGS:RD, 1+2; jb ->fff_fallback | 2356 | | cmp NARGS:RD, 1+2; jb ->fff_fallback |
2383 | | jna >1 | 2357 | | jna >1 |
2384 | | cmp dword [BASE+20], LJ_TISNUM | 2358 | | cmp dword [BASE+20], LJ_TISNUM |
2385 | if (LJ_DUALNUM) { | 2359 | |.if DUALNUM |
2386 | | jne ->fff_fallback | 2360 | | jne ->fff_fallback |
2387 | | mov RB, dword [BASE+16] | 2361 | | mov RB, dword [BASE+16] |
2388 | | mov TMP2, RB | 2362 | | mov TMP2, RB |
2389 | } else if (sse) { | 2363 | |.elif SSE |
2390 | | jae ->fff_fallback | 2364 | | jae ->fff_fallback |
2391 | | cvttsd2si RB, qword [BASE+16] | 2365 | | cvttsd2si RB, qword [BASE+16] |
2392 | | mov TMP2, RB | 2366 | | mov TMP2, RB |
2393 | } else { | 2367 | |.else |
2394 | | jae ->fff_fallback | 2368 | | jae ->fff_fallback |
2395 | | fld qword [BASE+16] | 2369 | | fld qword [BASE+16] |
2396 | | fistp TMP2 | 2370 | | fistp TMP2 |
2397 | } | 2371 | |.endif |
2398 | |1: | 2372 | |1: |
2399 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2373 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
2400 | | cmp dword [BASE+12], LJ_TISNUM | 2374 | | cmp dword [BASE+12], LJ_TISNUM |
2401 | if (LJ_DUALNUM) { | 2375 | |.if DUALNUM |
2402 | | jne ->fff_fallback | 2376 | | jne ->fff_fallback |
2403 | } else { | 2377 | |.else |
2404 | | jae ->fff_fallback | 2378 | | jae ->fff_fallback |
2405 | } | 2379 | |.endif |
2406 | | mov STR:RB, [BASE] | 2380 | | mov STR:RB, [BASE] |
2407 | | mov TMP3, STR:RB | 2381 | | mov TMP3, STR:RB |
2408 | | mov RB, STR:RB->len | 2382 | | mov RB, STR:RB->len |
2409 | if (LJ_DUALNUM) { | 2383 | |.if DUALNUM |
2410 | | mov RA, dword [BASE+8] | 2384 | | mov RA, dword [BASE+8] |
2411 | } else if (sse) { | 2385 | |.elif SSE |
2412 | | cvttsd2si RA, qword [BASE+8] | 2386 | | cvttsd2si RA, qword [BASE+8] |
2413 | } else { | 2387 | |.else |
2414 | |.if not X64 | 2388 | | fld qword [BASE+8] |
2415 | | fld qword [BASE+8] | 2389 | | fistp ARG3 |
2416 | | fistp ARG3 | 2390 | | mov RA, ARG3 |
2417 | | mov RA, ARG3 | 2391 | |.endif |
2418 | |.endif | ||
2419 | } | ||
2420 | | mov RC, TMP2 | 2392 | | mov RC, TMP2 |
2421 | | cmp RB, RC // len < end? (unsigned compare) | 2393 | | cmp RB, RC // len < end? (unsigned compare) |
2422 | | jb >5 | 2394 | | jb >5 |
@@ -2464,18 +2436,18 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2464 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2436 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
2465 | | cmp dword [BASE+12], LJ_TISNUM | 2437 | | cmp dword [BASE+12], LJ_TISNUM |
2466 | | mov STR:RB, [BASE] | 2438 | | mov STR:RB, [BASE] |
2467 | if (LJ_DUALNUM) { | 2439 | |.if DUALNUM |
2468 | | jne ->fff_fallback | 2440 | | jne ->fff_fallback |
2469 | | mov RC, dword [BASE+8] | 2441 | | mov RC, dword [BASE+8] |
2470 | } else if (sse) { | 2442 | |.elif SSE |
2471 | | jae ->fff_fallback | 2443 | | jae ->fff_fallback |
2472 | | cvttsd2si RC, qword [BASE+8] | 2444 | | cvttsd2si RC, qword [BASE+8] |
2473 | } else { | 2445 | |.else |
2474 | | jae ->fff_fallback | 2446 | | jae ->fff_fallback |
2475 | | fld qword [BASE+8] | 2447 | | fld qword [BASE+8] |
2476 | | fistp TMP2 | 2448 | | fistp TMP2 |
2477 | | mov RC, TMP2 | 2449 | | mov RC, TMP2 |
2478 | } | 2450 | |.endif |
2479 | | test RC, RC | 2451 | | test RC, RC |
2480 | | jle ->fff_emptystr // Count <= 0? (or non-int) | 2452 | | jle ->fff_emptystr // Count <= 0? (or non-int) |
2481 | | cmp dword STR:RB->len, 1 | 2453 | | cmp dword STR:RB->len, 1 |
@@ -2568,15 +2540,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2568 | | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t) | 2540 | | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t) |
2569 | | // Length of table returned in eax (RD). | 2541 | | // Length of table returned in eax (RD). |
2570 | | mov BASE, RB // Restore BASE. | 2542 | | mov BASE, RB // Restore BASE. |
2571 | if (LJ_DUALNUM) { | 2543 | |.if DUALNUM |
2572 | | mov RB, RD; jmp ->fff_resi | 2544 | | mov RB, RD; jmp ->fff_resi |
2573 | } else if (sse) { | 2545 | |.elif SSE |
2574 | | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0 | 2546 | | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0 |
2575 | } else { | 2547 | |.else |
2576 | |.if not X64 | 2548 | | mov ARG1, RD; fild ARG1; jmp ->fff_resn |
2577 | | mov ARG1, RD; fild ARG1; jmp ->fff_resn | 2549 | |.endif |
2578 | |.endif | ||
2579 | } | ||
2580 | | | 2550 | | |
2581 | |//-- Bit library -------------------------------------------------------- | 2551 | |//-- Bit library -------------------------------------------------------- |
2582 | | | 2552 | | |
@@ -2585,14 +2555,14 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2585 | |.macro .ffunc_bit, name, kind | 2555 | |.macro .ffunc_bit, name, kind |
2586 | | .ffunc_1 name | 2556 | | .ffunc_1 name |
2587 | |.if kind == 2 | 2557 | |.if kind == 2 |
2588 | ||if (sse) { | 2558 | |.if SSE |
2589 | | sseconst_tobit xmm1, RBa | 2559 | | sseconst_tobit xmm1, RBa |
2590 | ||} else { | 2560 | |.else |
2591 | | mov TMP1, TOBIT_BIAS | 2561 | | mov TMP1, TOBIT_BIAS |
2592 | ||} | 2562 | |.endif |
2593 | |.endif | 2563 | |.endif |
2594 | | cmp dword [BASE+4], LJ_TISNUM | 2564 | | cmp dword [BASE+4], LJ_TISNUM |
2595 | ||if (LJ_DUALNUM) { | 2565 | |.if DUALNUM |
2596 | | jne >1 | 2566 | | jne >1 |
2597 | | mov RB, dword [BASE] | 2567 | | mov RB, dword [BASE] |
2598 | |.if kind > 0 | 2568 | |.if kind > 0 |
@@ -2602,18 +2572,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2602 | |.endif | 2572 | |.endif |
2603 | |1: | 2573 | |1: |
2604 | | ja ->fff_fallback | 2574 | | ja ->fff_fallback |
2605 | ||} else { | 2575 | |.else |
2606 | | jae ->fff_fallback | 2576 | | jae ->fff_fallback |
2607 | ||} | 2577 | |.endif |
2608 | ||if (sse) { | 2578 | |.if SSE |
2609 | | movsd xmm0, qword [BASE] | 2579 | | movsd xmm0, qword [BASE] |
2610 | |.if kind < 2 | 2580 | |.if kind < 2 |
2611 | | sseconst_tobit xmm1, RBa | 2581 | | sseconst_tobit xmm1, RBa |
2612 | |.endif | 2582 | |.endif |
2613 | | addsd xmm0, xmm1 | 2583 | | addsd xmm0, xmm1 |
2614 | | movd RB, xmm0 | 2584 | | movd RB, xmm0 |
2615 | ||} else { | 2585 | |.else |
2616 | |.if not X64 | ||
2617 | | fld qword [BASE] | 2586 | | fld qword [BASE] |
2618 | |.if kind < 2 | 2587 | |.if kind < 2 |
2619 | | mov TMP1, TOBIT_BIAS | 2588 | | mov TMP1, TOBIT_BIAS |
@@ -2624,24 +2593,19 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2624 | | mov RB, ARG1 | 2593 | | mov RB, ARG1 |
2625 | |.endif | 2594 | |.endif |
2626 | |.endif | 2595 | |.endif |
2627 | ||} | ||
2628 | |2: | 2596 | |2: |
2629 | |.endmacro | 2597 | |.endmacro |
2630 | | | 2598 | | |
2631 | |.ffunc_bit bit_tobit, 0 | 2599 | |.ffunc_bit bit_tobit, 0 |
2632 | if (LJ_DUALNUM || sse) { | 2600 | |.if DUALNUM or SSE |
2633 | if (!sse) { | 2601 | |.if not SSE |
2634 | |.if not X64 | 2602 | | mov RB, ARG1 |
2635 | | mov RB, ARG1 | 2603 | |.endif |
2636 | |.endif | 2604 | | jmp ->fff_resbit |
2637 | } | 2605 | |.else |
2638 | | jmp ->fff_resbit | 2606 | | fild ARG1 |
2639 | } else { | 2607 | | jmp ->fff_resn |
2640 | |.if not X64 | 2608 | |.endif |
2641 | | fild ARG1 | ||
2642 | | jmp ->fff_resn | ||
2643 | |.endif | ||
2644 | } | ||
2645 | | | 2609 | | |
2646 | |.macro .ffunc_bit_op, name, ins | 2610 | |.macro .ffunc_bit_op, name, ins |
2647 | | .ffunc_bit name, 2 | 2611 | | .ffunc_bit name, 2 |
@@ -2651,29 +2615,27 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2651 | | cmp RD, BASE | 2615 | | cmp RD, BASE |
2652 | | jbe ->fff_resbit | 2616 | | jbe ->fff_resbit |
2653 | | cmp dword [RD+4], LJ_TISNUM | 2617 | | cmp dword [RD+4], LJ_TISNUM |
2654 | ||if (LJ_DUALNUM) { | 2618 | |.if DUALNUM |
2655 | | jne >2 | 2619 | | jne >2 |
2656 | | ins RB, dword [RD] | 2620 | | ins RB, dword [RD] |
2657 | | sub RD, 8 | 2621 | | sub RD, 8 |
2658 | | jmp <1 | 2622 | | jmp <1 |
2659 | |2: | 2623 | |2: |
2660 | | ja ->fff_fallback_bit_op | 2624 | | ja ->fff_fallback_bit_op |
2661 | ||} else { | 2625 | |.else |
2662 | | jae ->fff_fallback_bit_op | 2626 | | jae ->fff_fallback_bit_op |
2663 | ||} | 2627 | |.endif |
2664 | ||if (sse) { | 2628 | |.if SSE |
2665 | | movsd xmm0, qword [RD] | 2629 | | movsd xmm0, qword [RD] |
2666 | | addsd xmm0, xmm1 | 2630 | | addsd xmm0, xmm1 |
2667 | | movd RA, xmm0 | 2631 | | movd RA, xmm0 |
2668 | | ins RB, RA | 2632 | | ins RB, RA |
2669 | ||} else { | 2633 | |.else |
2670 | |.if not X64 | ||
2671 | | fld qword [RD] | 2634 | | fld qword [RD] |
2672 | | fadd TMP1 | 2635 | | fadd TMP1 |
2673 | | fstp FPARG1 | 2636 | | fstp FPARG1 |
2674 | | ins RB, ARG1 | 2637 | | ins RB, ARG1 |
2675 | |.endif | 2638 | |.endif |
2676 | ||} | ||
2677 | | sub RD, 8 | 2639 | | sub RD, 8 |
2678 | | jmp <1 | 2640 | | jmp <1 |
2679 | |.endmacro | 2641 | |.endmacro |
@@ -2688,40 +2650,37 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2688 | | | 2650 | | |
2689 | |.ffunc_bit bit_bnot, 1 | 2651 | |.ffunc_bit bit_bnot, 1 |
2690 | | not RB | 2652 | | not RB |
2691 | if (LJ_DUALNUM) { | 2653 | |.if DUALNUM |
2692 | | jmp ->fff_resbit | 2654 | | jmp ->fff_resbit |
2693 | } else if (sse) { | 2655 | |.elif SSE |
2694 | |->fff_resbit: | 2656 | |->fff_resbit: |
2695 | | cvtsi2sd xmm0, RB | 2657 | | cvtsi2sd xmm0, RB |
2696 | | jmp ->fff_resxmm0 | 2658 | | jmp ->fff_resxmm0 |
2697 | } else { | 2659 | |.else |
2698 | |.if not X64 | 2660 | |->fff_resbit: |
2699 | |->fff_resbit: | 2661 | | mov ARG1, RB |
2700 | | mov ARG1, RB | 2662 | | fild ARG1 |
2701 | | fild ARG1 | 2663 | | jmp ->fff_resn |
2702 | | jmp ->fff_resn | 2664 | |.endif |
2703 | |.endif | ||
2704 | } | ||
2705 | | | 2665 | | |
2706 | |->fff_fallback_bit_op: | 2666 | |->fff_fallback_bit_op: |
2707 | | mov NARGS:RD, TMP2 // Restore for fallback | 2667 | | mov NARGS:RD, TMP2 // Restore for fallback |
2708 | | jmp ->fff_fallback | 2668 | | jmp ->fff_fallback |
2709 | | | 2669 | | |
2710 | |.macro .ffunc_bit_sh, name, ins | 2670 | |.macro .ffunc_bit_sh, name, ins |
2711 | ||if (LJ_DUALNUM) { | 2671 | |.if DUALNUM |
2712 | | .ffunc_bit name, 1 | 2672 | | .ffunc_bit name, 1 |
2713 | | // Note: no inline conversion from number for 2nd argument! | 2673 | | // Note: no inline conversion from number for 2nd argument! |
2714 | | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback | 2674 | | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback |
2715 | | mov RA, dword [BASE+8] | 2675 | | mov RA, dword [BASE+8] |
2716 | ||} else if (sse) { | 2676 | |.elif SSE |
2717 | | .ffunc_nnsse name | 2677 | | .ffunc_nnsse name |
2718 | | sseconst_tobit xmm2, RBa | 2678 | | sseconst_tobit xmm2, RBa |
2719 | | addsd xmm0, xmm2 | 2679 | | addsd xmm0, xmm2 |
2720 | | addsd xmm1, xmm2 | 2680 | | addsd xmm1, xmm2 |
2721 | | movd RB, xmm0 | 2681 | | movd RB, xmm0 |
2722 | | movd RA, xmm1 | 2682 | | movd RA, xmm1 |
2723 | ||} else { | 2683 | |.else |
2724 | |.if not X64 | ||
2725 | | .ffunc_nn name | 2684 | | .ffunc_nn name |
2726 | | mov TMP1, TOBIT_BIAS | 2685 | | mov TMP1, TOBIT_BIAS |
2727 | | fadd TMP1 | 2686 | | fadd TMP1 |
@@ -2731,7 +2690,6 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2731 | | mov RA, ARG3 | 2690 | | mov RA, ARG3 |
2732 | | mov RB, ARG1 | 2691 | | mov RB, ARG1 |
2733 | |.endif | 2692 | |.endif |
2734 | ||} | ||
2735 | | ins RB, cl // Assumes RA is ecx. | 2693 | | ins RB, cl // Assumes RA is ecx. |
2736 | | jmp ->fff_resbit | 2694 | | jmp ->fff_resbit |
2737 | |.endmacro | 2695 | |.endmacro |
@@ -2828,7 +2786,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2828 | |//----------------------------------------------------------------------- | 2786 | |//----------------------------------------------------------------------- |
2829 | | | 2787 | | |
2830 | |->vm_record: // Dispatch target for recording phase. | 2788 | |->vm_record: // Dispatch target for recording phase. |
2831 | #if LJ_HASJIT | 2789 | |.if JIT |
2832 | | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] | 2790 | | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] |
2833 | | test RDL, HOOK_VMEVENT // No recording while in vmevent. | 2791 | | test RDL, HOOK_VMEVENT // No recording while in vmevent. |
2834 | | jnz >5 | 2792 | | jnz >5 |
@@ -2839,7 +2797,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2839 | | jz >1 | 2797 | | jz >1 |
2840 | | dec dword [DISPATCH+DISPATCH_GL(hookcount)] | 2798 | | dec dword [DISPATCH+DISPATCH_GL(hookcount)] |
2841 | | jmp >1 | 2799 | | jmp >1 |
2842 | #endif | 2800 | |.endif |
2843 | | | 2801 | | |
2844 | |->vm_rethook: // Dispatch target for return hooks. | 2802 | |->vm_rethook: // Dispatch target for return hooks. |
2845 | | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] | 2803 | | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] |
@@ -2885,7 +2843,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2885 | | jmp <4 | 2843 | | jmp <4 |
2886 | | | 2844 | | |
2887 | |->vm_hotloop: // Hot loop counter underflow. | 2845 | |->vm_hotloop: // Hot loop counter underflow. |
2888 | #if LJ_HASJIT | 2846 | |.if JIT |
2889 | | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L). | 2847 | | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L). |
2890 | | mov RB, LFUNC:RB->pc | 2848 | | mov RB, LFUNC:RB->pc |
2891 | | movzx RD, byte [RB+PC2PROTO(framesize)] | 2849 | | movzx RD, byte [RB+PC2PROTO(framesize)] |
@@ -2899,20 +2857,20 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2899 | | mov SAVE_PC, PC | 2857 | | mov SAVE_PC, PC |
2900 | | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc) | 2858 | | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc) |
2901 | | jmp <3 | 2859 | | jmp <3 |
2902 | #endif | 2860 | |.endif |
2903 | | | 2861 | | |
2904 | |->vm_callhook: // Dispatch target for call hooks. | 2862 | |->vm_callhook: // Dispatch target for call hooks. |
2905 | | mov SAVE_PC, PC | 2863 | | mov SAVE_PC, PC |
2906 | #if LJ_HASJIT | 2864 | |.if JIT |
2907 | | jmp >1 | 2865 | | jmp >1 |
2908 | #endif | 2866 | |.endif |
2909 | | | 2867 | | |
2910 | |->vm_hotcall: // Hot call counter underflow. | 2868 | |->vm_hotcall: // Hot call counter underflow. |
2911 | #if LJ_HASJIT | 2869 | |.if JIT |
2912 | | mov SAVE_PC, PC | 2870 | | mov SAVE_PC, PC |
2913 | | or PC, 1 // Marker for hot call. | 2871 | | or PC, 1 // Marker for hot call. |
2914 | |1: | 2872 | |1: |
2915 | #endif | 2873 | |.endif |
2916 | | lea RD, [BASE+NARGS:RD*8-8] | 2874 | | lea RD, [BASE+NARGS:RD*8-8] |
2917 | | mov L:RB, SAVE_L | 2875 | | mov L:RB, SAVE_L |
2918 | | mov L:RB->base, BASE | 2876 | | mov L:RB->base, BASE |
@@ -2922,9 +2880,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2922 | | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc) | 2880 | | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc) |
2923 | | // ASMFunction returned in eax/rax (RDa). | 2881 | | // ASMFunction returned in eax/rax (RDa). |
2924 | | mov SAVE_PC, 0 // Invalidate for subsequent line hook. | 2882 | | mov SAVE_PC, 0 // Invalidate for subsequent line hook. |
2925 | #if LJ_HASJIT | 2883 | |.if JIT |
2926 | | and PC, -2 | 2884 | | and PC, -2 |
2927 | #endif | 2885 | |.endif |
2928 | | mov BASE, L:RB->base | 2886 | | mov BASE, L:RB->base |
2929 | | mov RAa, RDa | 2887 | | mov RAa, RDa |
2930 | | mov RD, L:RB->top | 2888 | | mov RD, L:RB->top |
@@ -2942,7 +2900,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2942 | |// Called from an exit stub with the exit number on the stack. | 2900 | |// Called from an exit stub with the exit number on the stack. |
2943 | |// The 16 bit exit number is stored with two (sign-extended) push imm8. | 2901 | |// The 16 bit exit number is stored with two (sign-extended) push imm8. |
2944 | |->vm_exit_handler: | 2902 | |->vm_exit_handler: |
2945 | #if LJ_HASJIT | 2903 | |.if JIT |
2946 | |.if X64 | 2904 | |.if X64 |
2947 | | push r13; push r12 | 2905 | | push r13; push r12 |
2948 | | push r11; push r10; push r9; push r8 | 2906 | | push r11; push r10; push r9; push r8 |
@@ -3017,10 +2975,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
3017 | |.if X64 | 2975 | |.if X64 |
3018 | | jmp >1 | 2976 | | jmp >1 |
3019 | |.endif | 2977 | |.endif |
3020 | #endif | 2978 | |.endif |
3021 | |->vm_exit_interp: | 2979 | |->vm_exit_interp: |
3022 | | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. | 2980 | | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. |
3023 | #if LJ_HASJIT | 2981 | |.if JIT |
3024 | |.if X64 | 2982 | |.if X64 |
3025 | | // Restore additional callee-save registers only used in compiled code. | 2983 | | // Restore additional callee-save registers only used in compiled code. |
3026 | |.if X64WIN | 2984 | |.if X64WIN |
@@ -3074,7 +3032,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
3074 | | mov FCARG1, L:RB | 3032 | | mov FCARG1, L:RB |
3075 | | mov FCARG2, RD | 3033 | | mov FCARG2, RD |
3076 | | call extern lj_err_throw@8 // (lua_State *L, int errcode) | 3034 | | call extern lj_err_throw@8 // (lua_State *L, int errcode) |
3077 | #endif | 3035 | |.endif |
3078 | | | 3036 | | |
3079 | |//----------------------------------------------------------------------- | 3037 | |//----------------------------------------------------------------------- |
3080 | |//-- Math helper functions ---------------------------------------------- | 3038 | |//-- Math helper functions ---------------------------------------------- |
@@ -3139,9 +3097,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
3139 | | | 3097 | | |
3140 | |.macro vm_round, name, ssemode, mode1, mode2 | 3098 | |.macro vm_round, name, ssemode, mode1, mode2 |
3141 | |->name: | 3099 | |->name: |
3142 | ||if (!sse) { | 3100 | |.if not SSE |
3143 | | vm_round_x87 mode1, mode2 | 3101 | | vm_round_x87 mode1, mode2 |
3144 | ||} | 3102 | |.endif |
3145 | |->name .. _sse: | 3103 | |->name .. _sse: |
3146 | | vm_round_sse ssemode | 3104 | | vm_round_sse ssemode |
3147 | |.endmacro | 3105 | |.endmacro |
@@ -3152,51 +3110,51 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
3152 | | | 3110 | | |
3153 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. | 3111 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. |
3154 | |->vm_mod: | 3112 | |->vm_mod: |
3155 | if (sse) { | 3113 | |.if SSE |
3156 | |// Args in xmm0/xmm1, return value in xmm0. | 3114 | |// Args in xmm0/xmm1, return value in xmm0. |
3157 | |// Caveat: xmm0-xmm5 and RC (eax) modified! | 3115 | |// Caveat: xmm0-xmm5 and RC (eax) modified! |
3158 | | movaps xmm5, xmm0 | 3116 | | movaps xmm5, xmm0 |
3159 | | divsd xmm0, xmm1 | 3117 | | divsd xmm0, xmm1 |
3160 | | sseconst_abs xmm2, RDa | 3118 | | sseconst_abs xmm2, RDa |
3161 | | sseconst_2p52 xmm3, RDa | 3119 | | sseconst_2p52 xmm3, RDa |
3162 | | movaps xmm4, xmm0 | 3120 | | movaps xmm4, xmm0 |
3163 | | andpd xmm4, xmm2 // |x/y| | 3121 | | andpd xmm4, xmm2 // |x/y| |
3164 | | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. | 3122 | | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. |
3165 | | jbe >1 | 3123 | | jbe >1 |
3166 | | andnpd xmm2, xmm0 // Isolate sign bit. | 3124 | | andnpd xmm2, xmm0 // Isolate sign bit. |
3167 | | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 | 3125 | | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 |
3168 | | subsd xmm4, xmm3 | 3126 | | subsd xmm4, xmm3 |
3169 | | orpd xmm4, xmm2 // Merge sign bit back in. | 3127 | | orpd xmm4, xmm2 // Merge sign bit back in. |
3170 | | sseconst_1 xmm2, RDa | 3128 | | sseconst_1 xmm2, RDa |
3171 | | cmpsd xmm0, xmm4, 1 // x/y < result? | 3129 | | cmpsd xmm0, xmm4, 1 // x/y < result? |
3172 | | andpd xmm0, xmm2 | 3130 | | andpd xmm0, xmm2 |
3173 | | subsd xmm4, xmm0 // If yes, subtract 1.0. | 3131 | | subsd xmm4, xmm0 // If yes, subtract 1.0. |
3174 | | movaps xmm0, xmm5 | 3132 | | movaps xmm0, xmm5 |
3175 | | mulsd xmm1, xmm4 | 3133 | | mulsd xmm1, xmm4 |
3176 | | subsd xmm0, xmm1 | 3134 | | subsd xmm0, xmm1 |
3177 | | ret | 3135 | | ret |
3178 | |1: | 3136 | |1: |
3179 | | mulsd xmm1, xmm0 | 3137 | | mulsd xmm1, xmm0 |
3180 | | movaps xmm0, xmm5 | 3138 | | movaps xmm0, xmm5 |
3181 | | subsd xmm0, xmm1 | 3139 | | subsd xmm0, xmm1 |
3182 | | ret | 3140 | | ret |
3183 | } else { | 3141 | |.else |
3184 | |// Args/ret on x87 stack (y on top). No xmm registers modified. | 3142 | |// Args/ret on x87 stack (y on top). No xmm registers modified. |
3185 | |// Caveat: needs 3 slots on x87 stack! RC (eax) modified! | 3143 | |// Caveat: needs 3 slots on x87 stack! RC (eax) modified! |
3186 | | fld st1 | 3144 | | fld st1 |
3187 | | fdiv st1 | 3145 | | fdiv st1 |
3188 | | fnstcw word [esp+4] | 3146 | | fnstcw word [esp+4] |
3189 | | mov ax, 0x0400 | 3147 | | mov ax, 0x0400 |
3190 | | or ax, [esp+4] | 3148 | | or ax, [esp+4] |
3191 | | and ax, 0xf7ff | 3149 | | and ax, 0xf7ff |
3192 | | mov [esp+6], ax | 3150 | | mov [esp+6], ax |
3193 | | fldcw word [esp+6] | 3151 | | fldcw word [esp+6] |
3194 | | frndint | 3152 | | frndint |
3195 | | fldcw word [esp+4] | 3153 | | fldcw word [esp+4] |
3196 | | fmulp st1 | 3154 | | fmulp st1 |
3197 | | fsubp st1 | 3155 | | fsubp st1 |
3198 | | ret | 3156 | | ret |
3199 | } | 3157 | |.endif |
3200 | | | 3158 | | |
3201 | |// FP exponentiation e^x and 2^x. Called by math.exp fast function and | 3159 | |// FP exponentiation e^x and 2^x. Called by math.exp fast function and |
3202 | |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified. | 3160 | |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified. |
@@ -3224,18 +3182,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
3224 | | | 3182 | | |
3225 | |// Generic power function x^y. Called by BC_POW, math.pow fast function, | 3183 | |// Generic power function x^y. Called by BC_POW, math.pow fast function, |
3226 | |// and vm_arith. | 3184 | |// and vm_arith. |
3227 | if (!sse) { | ||
3228 | |.if not X64 | ||
3229 | |// Args/ret on x87 stack (y on top). RC (eax) modified. | 3185 | |// Args/ret on x87 stack (y on top). RC (eax) modified. |
3230 | |// Caveat: needs 3 slots on x87 stack! | 3186 | |// Caveat: needs 3 slots on x87 stack! |
3231 | |->vm_pow: | 3187 | |->vm_pow: |
3188 | |.if not SSE | ||
3232 | | fist dword [esp+4] // Store/reload int before comparison. | 3189 | | fist dword [esp+4] // Store/reload int before comparison. |
3233 | | fild dword [esp+4] // Integral exponent used in vm_powi. | 3190 | | fild dword [esp+4] // Integral exponent used in vm_powi. |
3234 | ||if (cmov) { | ||
3235 | | fucomip st1 | 3191 | | fucomip st1 |
3236 | ||} else { | ||
3237 | | fucomp st1; fnstsw ax; sahf | ||
3238 | ||} | ||
3239 | | jnz >8 // Branch for FP exponents. | 3192 | | jnz >8 // Branch for FP exponents. |
3240 | | jp >9 // Branch for NaN exponent. | 3193 | | jp >9 // Branch for NaN exponent. |
3241 | | fpop // Pop y and fallthrough to vm_powi. | 3194 | | fpop // Pop y and fallthrough to vm_powi. |
@@ -3288,11 +3241,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
3288 | | | 3241 | | |
3289 | |9: // Handle x^NaN. | 3242 | |9: // Handle x^NaN. |
3290 | | fld1 | 3243 | | fld1 |
3291 | ||if (cmov) { | ||
3292 | | fucomip st2 | 3244 | | fucomip st2 |
3293 | ||} else { | ||
3294 | | fucomp st2; fnstsw ax; sahf | ||
3295 | ||} | ||
3296 | | je >1 // 1^NaN ==> 1 | 3245 | | je >1 // 1^NaN ==> 1 |
3297 | | fxch // x^NaN ==> NaN | 3246 | | fxch // x^NaN ==> NaN |
3298 | |1: | 3247 | |1: |
@@ -3302,11 +3251,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
3302 | |2: // Handle x^+-Inf. | 3251 | |2: // Handle x^+-Inf. |
3303 | | fabs | 3252 | | fabs |
3304 | | fld1 | 3253 | | fld1 |
3305 | ||if (cmov) { | ||
3306 | | fucomip st1 | 3254 | | fucomip st1 |
3307 | ||} else { | ||
3308 | | fucomp st1; fnstsw ax; sahf | ||
3309 | ||} | ||
3310 | | je >3 // +-1^+-Inf ==> 1 | 3255 | | je >3 // +-1^+-Inf ==> 1 |
3311 | | fpop; fabs; fldz; mov eax, 0; setc al | 3256 | | fpop; fabs; fldz; mov eax, 0; setc al |
3312 | | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0 | 3257 | | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0 |
@@ -3326,9 +3271,6 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
3326 | | fld dword [esp+4] | 3271 | | fld dword [esp+4] |
3327 | | ret | 3272 | | ret |
3328 | |.endif | 3273 | |.endif |
3329 | } else { | ||
3330 | |->vm_pow: | ||
3331 | } | ||
3332 | | | 3274 | | |
3333 | |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. | 3275 | |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. |
3334 | |// Needs 16 byte scratch area for x86. Also called from JIT code. | 3276 | |// Needs 16 byte scratch area for x86. Also called from JIT code. |
@@ -3453,217 +3395,208 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
3453 | |// Callable from C: double lj_vm_foldfpm(double x, int fpm) | 3395 | |// Callable from C: double lj_vm_foldfpm(double x, int fpm) |
3454 | |// Computes fpm(x) for extended math functions. ORDER FPM. | 3396 | |// Computes fpm(x) for extended math functions. ORDER FPM. |
3455 | |->vm_foldfpm: | 3397 | |->vm_foldfpm: |
3456 | #if LJ_HASJIT | 3398 | |.if JIT |
3457 | if (sse) { | 3399 | |.if X64 |
3458 | |.if X64 | 3400 | | .if X64WIN |
3459 | | | 3401 | | .define fpmop, CARG2d |
3460 | | .if X64WIN | 3402 | | .else |
3461 | | .define fpmop, CARG2d | 3403 | | .define fpmop, CARG1d |
3462 | | .else | 3404 | | .endif |
3463 | | .define fpmop, CARG1d | 3405 | | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil |
3464 | | .endif | 3406 | | cmp fpmop, 3; jb ->vm_trunc; ja >2 |
3465 | | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil | 3407 | | sqrtsd xmm0, xmm0; ret |
3466 | | cmp fpmop, 3; jb ->vm_trunc; ja >2 | 3408 | |2: |
3467 | | sqrtsd xmm0, xmm0; ret | 3409 | | .if X64WIN |
3468 | |2: | 3410 | | movsd qword [rsp+8], xmm0 // Use scratch area. |
3469 | | .if X64WIN | 3411 | | fld qword [rsp+8] |
3470 | | movsd qword [rsp+8], xmm0 // Use scratch area. | 3412 | | .else |
3471 | | fld qword [rsp+8] | 3413 | | movsd qword [rsp-8], xmm0 // Use red zone. |
3472 | | .else | 3414 | | fld qword [rsp-8] |
3473 | | movsd qword [rsp-8], xmm0 // Use red zone. | 3415 | | .endif |
3474 | | fld qword [rsp-8] | 3416 | | cmp fpmop, 5; ja >2 |
3475 | | .endif | 3417 | | .if X64WIN; pop rax; .endif |
3476 | | cmp fpmop, 5; ja >2 | 3418 | | je >1 |
3477 | | .if X64WIN; pop rax; .endif | 3419 | | call ->vm_exp_x87 |
3478 | | je >1 | 3420 | | .if X64WIN; push rax; .endif |
3479 | | call ->vm_exp_x87 | 3421 | | jmp >7 |
3480 | | .if X64WIN; push rax; .endif | 3422 | |1: |
3481 | | jmp >7 | 3423 | | call ->vm_exp2_x87 |
3482 | |1: | 3424 | | .if X64WIN; push rax; .endif |
3483 | | call ->vm_exp2_x87 | 3425 | | jmp >7 |
3484 | | .if X64WIN; push rax; .endif | 3426 | |2: ; cmp fpmop, 7; je >1; ja >2 |
3485 | | jmp >7 | 3427 | | fldln2; fxch; fyl2x; jmp >7 |
3486 | |2: ; cmp fpmop, 7; je >1; ja >2 | 3428 | |1: ; fld1; fxch; fyl2x; jmp >7 |
3487 | | fldln2; fxch; fyl2x; jmp >7 | 3429 | |2: ; cmp fpmop, 9; je >1; ja >2 |
3488 | |1: ; fld1; fxch; fyl2x; jmp >7 | 3430 | | fldlg2; fxch; fyl2x; jmp >7 |
3489 | |2: ; cmp fpmop, 9; je >1; ja >2 | 3431 | |1: ; fsin; jmp >7 |
3490 | | fldlg2; fxch; fyl2x; jmp >7 | 3432 | |2: ; cmp fpmop, 11; je >1; ja >9 |
3491 | |1: ; fsin; jmp >7 | 3433 | | fcos; jmp >7 |
3492 | |2: ; cmp fpmop, 11; je >1; ja >9 | 3434 | |1: ; fptan; fpop |
3493 | | fcos; jmp >7 | 3435 | |7: |
3494 | |1: ; fptan; fpop | 3436 | | .if X64WIN |
3495 | |7: | 3437 | | fstp qword [rsp+8] // Use scratch area. |
3496 | | .if X64WIN | 3438 | | movsd xmm0, qword [rsp+8] |
3497 | | fstp qword [rsp+8] // Use scratch area. | 3439 | | .else |
3498 | | movsd xmm0, qword [rsp+8] | 3440 | | fstp qword [rsp-8] // Use red zone. |
3499 | | .else | 3441 | | movsd xmm0, qword [rsp-8] |
3500 | | fstp qword [rsp-8] // Use red zone. | 3442 | | .endif |
3501 | | movsd xmm0, qword [rsp-8] | 3443 | | ret |
3502 | | .endif | 3444 | |.else // x86 calling convention. |
3503 | | ret | 3445 | | .define fpmop, eax |
3504 | | | 3446 | |.if SSE |
3505 | |.else // x86 calling convention. | 3447 | | mov fpmop, [esp+12] |
3506 | | | 3448 | | movsd xmm0, qword [esp+4] |
3507 | | .define fpmop, eax | 3449 | | cmp fpmop, 1; je >1; ja >2 |
3508 | | mov fpmop, [esp+12] | 3450 | | call ->vm_floor; jmp >7 |
3509 | | movsd xmm0, qword [esp+4] | 3451 | |1: ; call ->vm_ceil; jmp >7 |
3510 | | cmp fpmop, 1; je >1; ja >2 | 3452 | |2: ; cmp fpmop, 3; je >1; ja >2 |
3511 | | call ->vm_floor; jmp >7 | 3453 | | call ->vm_trunc; jmp >7 |
3512 | |1: ; call ->vm_ceil; jmp >7 | 3454 | |1: |
3513 | |2: ; cmp fpmop, 3; je >1; ja >2 | 3455 | | sqrtsd xmm0, xmm0 |
3514 | | call ->vm_trunc; jmp >7 | 3456 | |7: |
3515 | |1: | 3457 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. |
3516 | | sqrtsd xmm0, xmm0 | 3458 | | fld qword [esp+4] |
3517 | |7: | 3459 | | ret |
3518 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. | 3460 | |2: ; fld qword [esp+4] |
3519 | | fld qword [esp+4] | 3461 | | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 |
3520 | | ret | 3462 | |2: ; cmp fpmop, 7; je >1; ja >2 |
3521 | |2: ; fld qword [esp+4] | 3463 | | fldln2; fxch; fyl2x; ret |
3522 | | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 | 3464 | |1: ; fld1; fxch; fyl2x; ret |
3523 | |2: ; cmp fpmop, 7; je >1; ja >2 | 3465 | |2: ; cmp fpmop, 9; je >1; ja >2 |
3524 | | fldln2; fxch; fyl2x; ret | 3466 | | fldlg2; fxch; fyl2x; ret |
3525 | |1: ; fld1; fxch; fyl2x; ret | 3467 | |1: ; fsin; ret |
3526 | |2: ; cmp fpmop, 9; je >1; ja >2 | 3468 | |2: ; cmp fpmop, 11; je >1; ja >9 |
3527 | | fldlg2; fxch; fyl2x; ret | 3469 | | fcos; ret |
3528 | |1: ; fsin; ret | 3470 | |1: ; fptan; fpop; ret |
3529 | |2: ; cmp fpmop, 11; je >1; ja >9 | 3471 | |.else |
3530 | | fcos; ret | 3472 | | mov fpmop, [esp+12] |
3531 | |1: ; fptan; fpop; ret | 3473 | | fld qword [esp+4] |
3532 | | | 3474 | | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil |
3533 | |.endif | 3475 | | cmp fpmop, 3; jb ->vm_trunc; ja >2 |
3534 | } else { | 3476 | | fsqrt; ret |
3535 | | mov fpmop, [esp+12] | 3477 | |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 |
3536 | | fld qword [esp+4] | 3478 | | cmp fpmop, 7; je >1; ja >2 |
3537 | | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil | 3479 | | fldln2; fxch; fyl2x; ret |
3538 | | cmp fpmop, 3; jb ->vm_trunc; ja >2 | 3480 | |1: ; fld1; fxch; fyl2x; ret |
3539 | | fsqrt; ret | 3481 | |2: ; cmp fpmop, 9; je >1; ja >2 |
3540 | |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 | 3482 | | fldlg2; fxch; fyl2x; ret |
3541 | | cmp fpmop, 7; je >1; ja >2 | 3483 | |1: ; fsin; ret |
3542 | | fldln2; fxch; fyl2x; ret | 3484 | |2: ; cmp fpmop, 11; je >1; ja >9 |
3543 | |1: ; fld1; fxch; fyl2x; ret | 3485 | | fcos; ret |
3544 | |2: ; cmp fpmop, 9; je >1; ja >2 | 3486 | |1: ; fptan; fpop; ret |
3545 | | fldlg2; fxch; fyl2x; ret | 3487 | |.endif |
3546 | |1: ; fsin; ret | 3488 | |.endif |
3547 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
3548 | | fcos; ret | ||
3549 | |1: ; fptan; fpop; ret | ||
3550 | } | ||
3551 | |9: ; int3 // Bad fpm. | 3489 | |9: ; int3 // Bad fpm. |
3552 | #endif | 3490 | |.endif |
3553 | | | 3491 | | |
3554 | |// Callable from C: double lj_vm_foldarith(double x, double y, int op) | 3492 | |// Callable from C: double lj_vm_foldarith(double x, double y, int op) |
3555 | |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -) | 3493 | |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -) |
3556 | |// and basic math functions. ORDER ARITH | 3494 | |// and basic math functions. ORDER ARITH |
3557 | |->vm_foldarith: | 3495 | |->vm_foldarith: |
3558 | if (sse) { | 3496 | |.if X64 |
3559 | |.if X64 | 3497 | | |
3560 | | | 3498 | | .if X64WIN |
3561 | | .if X64WIN | 3499 | | .define foldop, CARG3d |
3562 | | .define foldop, CARG3d | 3500 | | .else |
3563 | | .else | 3501 | | .define foldop, CARG1d |
3564 | | .define foldop, CARG1d | 3502 | | .endif |
3565 | | .endif | 3503 | | cmp foldop, 1; je >1; ja >2 |
3566 | | cmp foldop, 1; je >1; ja >2 | 3504 | | addsd xmm0, xmm1; ret |
3567 | | addsd xmm0, xmm1; ret | 3505 | |1: ; subsd xmm0, xmm1; ret |
3568 | |1: ; subsd xmm0, xmm1; ret | 3506 | |2: ; cmp foldop, 3; je >1; ja >2 |
3569 | |2: ; cmp foldop, 3; je >1; ja >2 | 3507 | | mulsd xmm0, xmm1; ret |
3570 | | mulsd xmm0, xmm1; ret | 3508 | |1: ; divsd xmm0, xmm1; ret |
3571 | |1: ; divsd xmm0, xmm1; ret | 3509 | |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow |
3572 | |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow | 3510 | | cmp foldop, 7; je >1; ja >2 |
3573 | | cmp foldop, 7; je >1; ja >2 | 3511 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret |
3574 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret | 3512 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret |
3575 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret | 3513 | |2: ; cmp foldop, 9; ja >2 |
3576 | |2: ; cmp foldop, 9; ja >2 | 3514 | |.if X64WIN |
3577 | |.if X64WIN | 3515 | | movsd qword [rsp+8], xmm0 // Use scratch area. |
3578 | | movsd qword [rsp+8], xmm0 // Use scratch area. | 3516 | | movsd qword [rsp+16], xmm1 |
3579 | | movsd qword [rsp+16], xmm1 | 3517 | | fld qword [rsp+8] |
3580 | | fld qword [rsp+8] | 3518 | | fld qword [rsp+16] |
3581 | | fld qword [rsp+16] | 3519 | |.else |
3582 | |.else | 3520 | | movsd qword [rsp-8], xmm0 // Use red zone. |
3583 | | movsd qword [rsp-8], xmm0 // Use red zone. | 3521 | | movsd qword [rsp-16], xmm1 |
3584 | | movsd qword [rsp-16], xmm1 | 3522 | | fld qword [rsp-8] |
3585 | | fld qword [rsp-8] | 3523 | | fld qword [rsp-16] |
3586 | | fld qword [rsp-16] | 3524 | |.endif |
3587 | |.endif | 3525 | | je >1 |
3588 | | je >1 | 3526 | | fpatan |
3589 | | fpatan | 3527 | |7: |
3590 | |7: | 3528 | |.if X64WIN |
3591 | |.if X64WIN | 3529 | | fstp qword [rsp+8] // Use scratch area. |
3592 | | fstp qword [rsp+8] // Use scratch area. | 3530 | | movsd xmm0, qword [rsp+8] |
3593 | | movsd xmm0, qword [rsp+8] | 3531 | |.else |
3594 | |.else | 3532 | | fstp qword [rsp-8] // Use red zone. |
3595 | | fstp qword [rsp-8] // Use red zone. | 3533 | | movsd xmm0, qword [rsp-8] |
3596 | | movsd xmm0, qword [rsp-8] | 3534 | |.endif |
3597 | |.endif | 3535 | | ret |
3598 | | ret | 3536 | |1: ; fxch; fscale; fpop1; jmp <7 |
3599 | |1: ; fxch; fscale; fpop1; jmp <7 | 3537 | |2: ; cmp foldop, 11; je >1; ja >9 |
3600 | |2: ; cmp foldop, 11; je >1; ja >9 | 3538 | | minsd xmm0, xmm1; ret |
3601 | | minsd xmm0, xmm1; ret | 3539 | |1: ; maxsd xmm0, xmm1; ret |
3602 | |1: ; maxsd xmm0, xmm1; ret | 3540 | |9: ; int3 // Bad op. |
3603 | |9: ; int3 // Bad op. | 3541 | | |
3604 | | | 3542 | |.elif SSE // x86 calling convention with SSE ops. |
3605 | |.else // x86 calling convention. | 3543 | | |
3606 | | | 3544 | | .define foldop, eax |
3607 | | .define foldop, eax | 3545 | | mov foldop, [esp+20] |
3608 | | mov foldop, [esp+20] | 3546 | | movsd xmm0, qword [esp+4] |
3609 | | movsd xmm0, qword [esp+4] | 3547 | | movsd xmm1, qword [esp+12] |
3610 | | movsd xmm1, qword [esp+12] | 3548 | | cmp foldop, 1; je >1; ja >2 |
3611 | | cmp foldop, 1; je >1; ja >2 | 3549 | | addsd xmm0, xmm1 |
3612 | | addsd xmm0, xmm1 | 3550 | |7: |
3613 | |7: | 3551 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. |
3614 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. | 3552 | | fld qword [esp+4] |
3615 | | fld qword [esp+4] | 3553 | | ret |
3616 | | ret | 3554 | |1: ; subsd xmm0, xmm1; jmp <7 |
3617 | |1: ; subsd xmm0, xmm1; jmp <7 | 3555 | |2: ; cmp foldop, 3; je >1; ja >2 |
3618 | |2: ; cmp foldop, 3; je >1; ja >2 | 3556 | | mulsd xmm0, xmm1; jmp <7 |
3619 | | mulsd xmm0, xmm1; jmp <7 | 3557 | |1: ; divsd xmm0, xmm1; jmp <7 |
3620 | |1: ; divsd xmm0, xmm1; jmp <7 | 3558 | |2: ; cmp foldop, 5 |
3621 | |2: ; cmp foldop, 5 | 3559 | | je >1; ja >2 |
3622 | | je >1; ja >2 | 3560 | | call ->vm_mod; jmp <7 |
3623 | | call ->vm_mod; jmp <7 | 3561 | |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. |
3624 | |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. | 3562 | |2: ; cmp foldop, 7; je >1; ja >2 |
3625 | |2: ; cmp foldop, 7; je >1; ja >2 | 3563 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 |
3626 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 | 3564 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 |
3627 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 | 3565 | |2: ; cmp foldop, 9; ja >2 |
3628 | |2: ; cmp foldop, 9; ja >2 | 3566 | | fld qword [esp+4] // Reload from stack |
3629 | | fld qword [esp+4] // Reload from stack | 3567 | | fld qword [esp+12] |
3630 | | fld qword [esp+12] | 3568 | | je >1 |
3631 | | je >1 | 3569 | | fpatan; ret |
3632 | | fpatan; ret | 3570 | |1: ; fxch; fscale; fpop1; ret |
3633 | |1: ; fxch; fscale; fpop1; ret | 3571 | |2: ; cmp foldop, 11; je >1; ja >9 |
3634 | |2: ; cmp foldop, 11; je >1; ja >9 | 3572 | | minsd xmm0, xmm1; jmp <7 |
3635 | | minsd xmm0, xmm1; jmp <7 | 3573 | |1: ; maxsd xmm0, xmm1; jmp <7 |
3636 | |1: ; maxsd xmm0, xmm1; jmp <7 | 3574 | |9: ; int3 // Bad op. |
3637 | |9: ; int3 // Bad op. | 3575 | | |
3638 | | | 3576 | |.else // x86 calling convention with x87 ops. |
3639 | |.endif | 3577 | | |
3640 | } else { | 3578 | | mov eax, [esp+20] |
3641 | | mov eax, [esp+20] | 3579 | | fld qword [esp+4] |
3642 | | fld qword [esp+4] | 3580 | | fld qword [esp+12] |
3643 | | fld qword [esp+12] | 3581 | | cmp eax, 1; je >1; ja >2 |
3644 | | cmp eax, 1; je >1; ja >2 | 3582 | | faddp st1; ret |
3645 | | faddp st1; ret | 3583 | |1: ; fsubp st1; ret |
3646 | |1: ; fsubp st1; ret | 3584 | |2: ; cmp eax, 3; je >1; ja >2 |
3647 | |2: ; cmp eax, 3; je >1; ja >2 | 3585 | | fmulp st1; ret |
3648 | | fmulp st1; ret | 3586 | |1: ; fdivp st1; ret |
3649 | |1: ; fdivp st1; ret | 3587 | |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow |
3650 | |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow | 3588 | | cmp eax, 7; je >1; ja >2 |
3651 | | cmp eax, 7; je >1; ja >2 | 3589 | | fpop; fchs; ret |
3652 | | fpop; fchs; ret | 3590 | |1: ; fpop; fabs; ret |
3653 | |1: ; fpop; fabs; ret | 3591 | |2: ; cmp eax, 9; je >1; ja >2 |
3654 | |2: ; cmp eax, 9; je >1; ja >2 | 3592 | | fpatan; ret |
3655 | | fpatan; ret | 3593 | |1: ; fxch; fscale; fpop1; ret |
3656 | |1: ; fxch; fscale; fpop1; ret | 3594 | |2: ; cmp eax, 11; je >1; ja >9 |
3657 | |2: ; cmp eax, 11; je >1; ja >9 | 3595 | | fucomi st1; fcmovnbe st1; fpop1; ret |
3658 | ||if (cmov) { | 3596 | |1: ; fucomi st1; fcmovbe st1; fpop1; ret |
3659 | | fucomi st1; fcmovnbe st1; fpop1; ret | 3597 | |9: ; int3 // Bad op. |
3660 | |1: ; fucomi st1; fcmovbe st1; fpop1; ret | 3598 | | |
3661 | ||} else { | 3599 | |.endif |
3662 | | fucom st1; fnstsw ax; test ah, 1; jz >2; fxch; 2: ; fpop; ret | ||
3663 | |1: ; fucom st1; fnstsw ax; test ah, 1; jnz >2; fxch; 2: ; fpop; ret | ||
3664 | ||} | ||
3665 | |9: ; int3 // Bad op. | ||
3666 | } | ||
3667 | | | 3600 | | |
3668 | |//----------------------------------------------------------------------- | 3601 | |//----------------------------------------------------------------------- |
3669 | |//-- Miscellaneous functions -------------------------------------------- | 3602 | |//-- Miscellaneous functions -------------------------------------------- |
@@ -3726,7 +3659,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
3726 | | | 3659 | | |
3727 | |// Handler for callback functions. Callback slot number in ah/al. | 3660 | |// Handler for callback functions. Callback slot number in ah/al. |
3728 | |->vm_ffi_callback: | 3661 | |->vm_ffi_callback: |
3729 | #if LJ_HASFFI | 3662 | |.if FFI |
3730 | |.type CTSTATE, CTState, PC | 3663 | |.type CTSTATE, CTState, PC |
3731 | |.if not X64 | 3664 | |.if not X64 |
3732 | | sub esp, 16 // Leave room for SAVE_ERRF etc. | 3665 | | sub esp, 16 // Leave room for SAVE_ERRF etc. |
@@ -3781,10 +3714,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
3781 | | shr RD, 3 | 3714 | | shr RD, 3 |
3782 | | add RD, 1 | 3715 | | add RD, 1 |
3783 | | ins_callt | 3716 | | ins_callt |
3784 | #endif | 3717 | |.endif |
3785 | | | 3718 | | |
3786 | |->cont_ffi_callback: // Return from FFI callback. | 3719 | |->cont_ffi_callback: // Return from FFI callback. |
3787 | #if LJ_HASFFI | 3720 | |.if FFI |
3788 | | mov L:RA, SAVE_L | 3721 | | mov L:RA, SAVE_L |
3789 | | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] | 3722 | | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] |
3790 | | mov aword CTSTATE->L, L:RAa | 3723 | | mov aword CTSTATE->L, L:RAa |
@@ -3819,11 +3752,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
3819 | | push ecx | 3752 | | push ecx |
3820 | | ret | 3753 | | ret |
3821 | |.endif | 3754 | |.endif |
3822 | #endif | 3755 | |.endif |
3823 | | | 3756 | | |
3824 | |->vm_ffi_call@4: // Call C function via FFI. | 3757 | |->vm_ffi_call@4: // Call C function via FFI. |
3825 | | // Caveat: needs special frame unwinding, see below. | 3758 | | // Caveat: needs special frame unwinding, see below. |
3826 | #if LJ_HASFFI | 3759 | |.if FFI |
3827 | |.if X64 | 3760 | |.if X64 |
3828 | | .type CCSTATE, CCallState, rbx | 3761 | | .type CCSTATE, CCallState, rbx |
3829 | | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 | 3762 | | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 |
@@ -3838,9 +3771,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
3838 | | sub rsp, rax | 3771 | | sub rsp, rax |
3839 | |.else | 3772 | |.else |
3840 | | sub esp, CCSTATE->spadj | 3773 | | sub esp, CCSTATE->spadj |
3841 | #if LJ_TARGET_WINDOWS | 3774 | |.if WIN |
3842 | | mov CCSTATE->spadj, esp | 3775 | | mov CCSTATE->spadj, esp |
3843 | #endif | 3776 | |.endif |
3844 | |.endif | 3777 | |.endif |
3845 | | | 3778 | | |
3846 | | // Copy stack slots. | 3779 | | // Copy stack slots. |
@@ -3907,9 +3840,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
3907 | |6: | 3840 | |6: |
3908 | | fstp dword CCSTATE->fpr[0].f[0] | 3841 | | fstp dword CCSTATE->fpr[0].f[0] |
3909 | |7: | 3842 | |7: |
3910 | #if LJ_TARGET_WINDOWS | 3843 | |.if WIN |
3911 | | sub CCSTATE->spadj, esp | 3844 | | sub CCSTATE->spadj, esp |
3912 | #endif | 3845 | |.endif |
3913 | |.endif | 3846 | |.endif |
3914 | | | 3847 | | |
3915 | |.if X64 | 3848 | |.if X64 |
@@ -3917,14 +3850,14 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
3917 | |.else | 3850 | |.else |
3918 | | mov ebx, [ebp-4]; leave; ret | 3851 | | mov ebx, [ebp-4]; leave; ret |
3919 | |.endif | 3852 | |.endif |
3920 | #endif | 3853 | |.endif |
3921 | |// Note: vm_ffi_call must be the last function in this object file! | 3854 | |// Note: vm_ffi_call must be the last function in this object file! |
3922 | | | 3855 | | |
3923 | |//----------------------------------------------------------------------- | 3856 | |//----------------------------------------------------------------------- |
3924 | } | 3857 | } |
3925 | 3858 | ||
3926 | /* Generate the code for a single instruction. */ | 3859 | /* Generate the code for a single instruction. */ |
3927 | static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | 3860 | static void build_ins(BuildCtx *ctx, BCOp op, int defop) |
3928 | { | 3861 | { |
3929 | int vk = 0; | 3862 | int vk = 0; |
3930 | |// Note: aligning all instructions does not pay off. | 3863 | |// Note: aligning all instructions does not pay off. |
@@ -3957,79 +3890,79 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
3957 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | 3890 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: |
3958 | | // RA = src1, RD = src2, JMP with RD = target | 3891 | | // RA = src1, RD = src2, JMP with RD = target |
3959 | | ins_AD | 3892 | | ins_AD |
3960 | if (LJ_DUALNUM) { | 3893 | |.if DUALNUM |
3961 | | checkint RA, >7 | 3894 | | checkint RA, >7 |
3962 | | checkint RD, >8 | 3895 | | checkint RD, >8 |
3963 | | mov RB, dword [BASE+RA*8] | 3896 | | mov RB, dword [BASE+RA*8] |
3964 | | add PC, 4 | 3897 | | add PC, 4 |
3965 | | cmp RB, dword [BASE+RD*8] | 3898 | | cmp RB, dword [BASE+RD*8] |
3966 | | jmp_comp jge, jl, jg, jle, >9 | 3899 | | jmp_comp jge, jl, jg, jle, >9 |
3967 | |6: | 3900 | |6: |
3968 | | movzx RD, PC_RD | 3901 | | movzx RD, PC_RD |
3969 | | branchPC RD | 3902 | | branchPC RD |
3970 | |9: | 3903 | |9: |
3971 | | ins_next | 3904 | | ins_next |
3972 | | | 3905 | | |
3973 | |7: // RA is not an integer. | 3906 | |7: // RA is not an integer. |
3974 | | ja ->vmeta_comp | 3907 | | ja ->vmeta_comp |
3975 | | // RA is a number. | 3908 | | // RA is a number. |
3976 | | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp | 3909 | | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp |
3977 | | // RA is a number, RD is an integer. | 3910 | | // RA is a number, RD is an integer. |
3978 | if (sse) { | 3911 | |.if SSE |
3979 | | cvtsi2sd xmm0, dword [BASE+RD*8] | 3912 | | cvtsi2sd xmm0, dword [BASE+RD*8] |
3980 | | jmp >2 | 3913 | | jmp >2 |
3981 | } else { | 3914 | |.else |
3982 | | fld qword [BASE+RA*8] | 3915 | | fld qword [BASE+RA*8] |
3983 | | fild dword [BASE+RD*8] | 3916 | | fild dword [BASE+RD*8] |
3984 | | jmp >3 | 3917 | | jmp >3 |
3985 | } | 3918 | |.endif |
3986 | | | 3919 | | |
3987 | |8: // RA is an integer, RD is not an integer. | 3920 | |8: // RA is an integer, RD is not an integer. |
3988 | | ja ->vmeta_comp | 3921 | | ja ->vmeta_comp |
3989 | | // RA is an integer, RD is a number. | 3922 | | // RA is an integer, RD is a number. |
3990 | if (sse) { | 3923 | |.if SSE |
3991 | | cvtsi2sd xmm1, dword [BASE+RA*8] | 3924 | | cvtsi2sd xmm1, dword [BASE+RA*8] |
3992 | | movsd xmm0, qword [BASE+RD*8] | 3925 | | movsd xmm0, qword [BASE+RD*8] |
3993 | | add PC, 4 | 3926 | | add PC, 4 |
3994 | | ucomisd xmm0, xmm1 | 3927 | | ucomisd xmm0, xmm1 |
3995 | | jmp_comp jbe, ja, jb, jae, <9 | 3928 | | jmp_comp jbe, ja, jb, jae, <9 |
3996 | | jmp <6 | 3929 | | jmp <6 |
3997 | } else { | 3930 | |.else |
3998 | | fild dword [BASE+RA*8] | 3931 | | fild dword [BASE+RA*8] |
3999 | | jmp >2 | 3932 | | jmp >2 |
4000 | } | 3933 | |.endif |
4001 | } else { | 3934 | |.else |
4002 | | checknum RA, ->vmeta_comp | 3935 | | checknum RA, ->vmeta_comp |
4003 | | checknum RD, ->vmeta_comp | 3936 | | checknum RD, ->vmeta_comp |
4004 | } | 3937 | |.endif |
4005 | if (sse) { | 3938 | |.if SSE |
4006 | |1: | 3939 | |1: |
4007 | | movsd xmm0, qword [BASE+RD*8] | 3940 | | movsd xmm0, qword [BASE+RD*8] |
4008 | |2: | 3941 | |2: |
4009 | | add PC, 4 | 3942 | | add PC, 4 |
4010 | | ucomisd xmm0, qword [BASE+RA*8] | 3943 | | ucomisd xmm0, qword [BASE+RA*8] |
4011 | |3: | 3944 | |3: |
4012 | } else { | 3945 | |.else |
4013 | |1: | 3946 | |1: |
4014 | | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A. | 3947 | | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A. |
4015 | |2: | 3948 | |2: |
4016 | | fld qword [BASE+RD*8] | 3949 | | fld qword [BASE+RD*8] |
4017 | |3: | 3950 | |3: |
4018 | | add PC, 4 | 3951 | | add PC, 4 |
4019 | | fcomparepp // eax (RD) modified! | 3952 | | fcomparepp |
4020 | } | 3953 | |.endif |
4021 | | // Unordered: all of ZF CF PF set, ordered: PF clear. | 3954 | | // Unordered: all of ZF CF PF set, ordered: PF clear. |
4022 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. | 3955 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. |
4023 | if (LJ_DUALNUM) { | 3956 | |.if DUALNUM |
4024 | | jmp_comp jbe, ja, jb, jae, <9 | 3957 | | jmp_comp jbe, ja, jb, jae, <9 |
4025 | | jmp <6 | 3958 | | jmp <6 |
4026 | } else { | 3959 | |.else |
4027 | | jmp_comp jbe, ja, jb, jae, >1 | 3960 | | jmp_comp jbe, ja, jb, jae, >1 |
4028 | | movzx RD, PC_RD | 3961 | | movzx RD, PC_RD |
4029 | | branchPC RD | 3962 | | branchPC RD |
4030 | |1: | 3963 | |1: |
4031 | | ins_next | 3964 | | ins_next |
4032 | } | 3965 | |.endif |
4033 | break; | 3966 | break; |
4034 | 3967 | ||
4035 | case BC_ISEQV: case BC_ISNEV: | 3968 | case BC_ISEQV: case BC_ISNEV: |
@@ -4037,63 +3970,63 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4037 | | ins_AD // RA = src1, RD = src2, JMP with RD = target | 3970 | | ins_AD // RA = src1, RD = src2, JMP with RD = target |
4038 | | mov RB, [BASE+RD*8+4] | 3971 | | mov RB, [BASE+RD*8+4] |
4039 | | add PC, 4 | 3972 | | add PC, 4 |
4040 | if (LJ_DUALNUM) { | 3973 | |.if DUALNUM |
4041 | | cmp RB, LJ_TISNUM; jne >7 | 3974 | | cmp RB, LJ_TISNUM; jne >7 |
4042 | | checkint RA, >8 | 3975 | | checkint RA, >8 |
4043 | | mov RB, dword [BASE+RD*8] | 3976 | | mov RB, dword [BASE+RD*8] |
4044 | | cmp RB, dword [BASE+RA*8] | 3977 | | cmp RB, dword [BASE+RA*8] |
4045 | if (vk) { | 3978 | if (vk) { |
4046 | | jne >9 | 3979 | | jne >9 |
4047 | } else { | ||
4048 | | je >9 | ||
4049 | } | ||
4050 | | movzx RD, PC_RD | ||
4051 | | branchPC RD | ||
4052 | |9: | ||
4053 | | ins_next | ||
4054 | | | ||
4055 | |7: // RD is not an integer. | ||
4056 | | ja >5 | ||
4057 | | // RD is a number. | ||
4058 | | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 | ||
4059 | | // RD is a number, RA is an integer. | ||
4060 | if (sse) { | ||
4061 | | cvtsi2sd xmm0, dword [BASE+RA*8] | ||
4062 | } else { | ||
4063 | | fild dword [BASE+RA*8] | ||
4064 | } | ||
4065 | | jmp >2 | ||
4066 | | | ||
4067 | |8: // RD is an integer, RA is not an integer. | ||
4068 | | ja >5 | ||
4069 | | // RD is an integer, RA is a number. | ||
4070 | if (sse) { | ||
4071 | | cvtsi2sd xmm0, dword [BASE+RD*8] | ||
4072 | | ucomisd xmm0, qword [BASE+RA*8] | ||
4073 | } else { | ||
4074 | | fild dword [BASE+RD*8] | ||
4075 | | fld qword [BASE+RA*8] | ||
4076 | } | ||
4077 | | jmp >4 | ||
4078 | | | ||
4079 | } else { | ||
4080 | | cmp RB, LJ_TISNUM; jae >5 | ||
4081 | | checknum RA, >5 | ||
4082 | } | ||
4083 | if (sse) { | ||
4084 | |1: | ||
4085 | | movsd xmm0, qword [BASE+RA*8] | ||
4086 | |2: | ||
4087 | | ucomisd xmm0, qword [BASE+RD*8] | ||
4088 | |4: | ||
4089 | } else { | 3980 | } else { |
4090 | |1: | 3981 | | je >9 |
4091 | | fld qword [BASE+RA*8] | ||
4092 | |2: | ||
4093 | | fld qword [BASE+RD*8] | ||
4094 | |4: | ||
4095 | | fcomparepp // eax (RD) modified! | ||
4096 | } | 3982 | } |
3983 | | movzx RD, PC_RD | ||
3984 | | branchPC RD | ||
3985 | |9: | ||
3986 | | ins_next | ||
3987 | | | ||
3988 | |7: // RD is not an integer. | ||
3989 | | ja >5 | ||
3990 | | // RD is a number. | ||
3991 | | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 | ||
3992 | | // RD is a number, RA is an integer. | ||
3993 | |.if SSE | ||
3994 | | cvtsi2sd xmm0, dword [BASE+RA*8] | ||
3995 | |.else | ||
3996 | | fild dword [BASE+RA*8] | ||
3997 | |.endif | ||
3998 | | jmp >2 | ||
3999 | | | ||
4000 | |8: // RD is an integer, RA is not an integer. | ||
4001 | | ja >5 | ||
4002 | | // RD is an integer, RA is a number. | ||
4003 | |.if SSE | ||
4004 | | cvtsi2sd xmm0, dword [BASE+RD*8] | ||
4005 | | ucomisd xmm0, qword [BASE+RA*8] | ||
4006 | |.else | ||
4007 | | fild dword [BASE+RD*8] | ||
4008 | | fld qword [BASE+RA*8] | ||
4009 | |.endif | ||
4010 | | jmp >4 | ||
4011 | | | ||
4012 | |.else | ||
4013 | | cmp RB, LJ_TISNUM; jae >5 | ||
4014 | | checknum RA, >5 | ||
4015 | |.endif | ||
4016 | |.if SSE | ||
4017 | |1: | ||
4018 | | movsd xmm0, qword [BASE+RA*8] | ||
4019 | |2: | ||
4020 | | ucomisd xmm0, qword [BASE+RD*8] | ||
4021 | |4: | ||
4022 | |.else | ||
4023 | |1: | ||
4024 | | fld qword [BASE+RA*8] | ||
4025 | |2: | ||
4026 | | fld qword [BASE+RD*8] | ||
4027 | |4: | ||
4028 | | fcomparepp | ||
4029 | |.endif | ||
4097 | iseqne_fp: | 4030 | iseqne_fp: |
4098 | if (vk) { | 4031 | if (vk) { |
4099 | | jp >2 // Unordered means not equal. | 4032 | | jp >2 // Unordered means not equal. |
@@ -4129,10 +4062,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4129 | | | 4062 | | |
4130 | if (op == BC_ISEQV || op == BC_ISNEV) { | 4063 | if (op == BC_ISEQV || op == BC_ISNEV) { |
4131 | |5: // Either or both types are not numbers. | 4064 | |5: // Either or both types are not numbers. |
4132 | if (LJ_HASFFI) { | 4065 | |.if FFI |
4133 | | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd | 4066 | | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd |
4134 | | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd | 4067 | | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd |
4135 | } | 4068 | |.endif |
4136 | | checktp RA, RB // Compare types. | 4069 | | checktp RA, RB // Compare types. |
4137 | | jne <2 // Not the same type? | 4070 | | jne <2 // Not the same type? |
4138 | | cmp RB, LJ_TISPRI | 4071 | | cmp RB, LJ_TISPRI |
@@ -4163,7 +4096,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4163 | | mov RB, 1 // ne = 1 | 4096 | | mov RB, 1 // ne = 1 |
4164 | } | 4097 | } |
4165 | | jmp ->vmeta_equal // Handle __eq metamethod. | 4098 | | jmp ->vmeta_equal // Handle __eq metamethod. |
4166 | } else if (LJ_HASFFI) { | 4099 | } else { |
4100 | |.if FFI | ||
4167 | |3: | 4101 | |3: |
4168 | | cmp RB, LJ_TCDATA | 4102 | | cmp RB, LJ_TCDATA |
4169 | if (LJ_DUALNUM && vk) { | 4103 | if (LJ_DUALNUM && vk) { |
@@ -4172,6 +4106,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4172 | | jne <2 | 4106 | | jne <2 |
4173 | } | 4107 | } |
4174 | | jmp ->vmeta_equal_cd | 4108 | | jmp ->vmeta_equal_cd |
4109 | |.endif | ||
4175 | } | 4110 | } |
4176 | break; | 4111 | break; |
4177 | case BC_ISEQS: case BC_ISNES: | 4112 | case BC_ISEQS: case BC_ISNES: |
@@ -4194,59 +4129,59 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4194 | | ins_AD // RA = src, RD = num const, JMP with RD = target | 4129 | | ins_AD // RA = src, RD = num const, JMP with RD = target |
4195 | | mov RB, [BASE+RA*8+4] | 4130 | | mov RB, [BASE+RA*8+4] |
4196 | | add PC, 4 | 4131 | | add PC, 4 |
4197 | if (LJ_DUALNUM) { | 4132 | |.if DUALNUM |
4198 | | cmp RB, LJ_TISNUM; jne >7 | 4133 | | cmp RB, LJ_TISNUM; jne >7 |
4199 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8 | 4134 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8 |
4200 | | mov RB, dword [KBASE+RD*8] | 4135 | | mov RB, dword [KBASE+RD*8] |
4201 | | cmp RB, dword [BASE+RA*8] | 4136 | | cmp RB, dword [BASE+RA*8] |
4202 | if (vk) { | 4137 | if (vk) { |
4203 | | jne >9 | 4138 | | jne >9 |
4204 | } else { | ||
4205 | | je >9 | ||
4206 | } | ||
4207 | | movzx RD, PC_RD | ||
4208 | | branchPC RD | ||
4209 | |9: | ||
4210 | | ins_next | ||
4211 | | | ||
4212 | |7: // RA is not an integer. | ||
4213 | | ja >3 | ||
4214 | | // RA is a number. | ||
4215 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 | ||
4216 | | // RA is a number, RD is an integer. | ||
4217 | if (sse) { | ||
4218 | | cvtsi2sd xmm0, dword [KBASE+RD*8] | ||
4219 | } else { | ||
4220 | | fild dword [KBASE+RD*8] | ||
4221 | } | ||
4222 | | jmp >2 | ||
4223 | | | ||
4224 | |8: // RA is an integer, RD is a number. | ||
4225 | if (sse) { | ||
4226 | | cvtsi2sd xmm0, dword [BASE+RA*8] | ||
4227 | | ucomisd xmm0, qword [KBASE+RD*8] | ||
4228 | } else { | ||
4229 | | fild dword [BASE+RA*8] | ||
4230 | | fld qword [KBASE+RD*8] | ||
4231 | } | ||
4232 | | jmp >4 | ||
4233 | } else { | ||
4234 | | cmp RB, LJ_TISNUM; jae >3 | ||
4235 | } | ||
4236 | if (sse) { | ||
4237 | |1: | ||
4238 | | movsd xmm0, qword [KBASE+RD*8] | ||
4239 | |2: | ||
4240 | | ucomisd xmm0, qword [BASE+RA*8] | ||
4241 | |4: | ||
4242 | } else { | 4139 | } else { |
4243 | |1: | 4140 | | je >9 |
4244 | | fld qword [KBASE+RD*8] | ||
4245 | |2: | ||
4246 | | fld qword [BASE+RA*8] | ||
4247 | |4: | ||
4248 | | fcomparepp // eax (RD) modified! | ||
4249 | } | 4141 | } |
4142 | | movzx RD, PC_RD | ||
4143 | | branchPC RD | ||
4144 | |9: | ||
4145 | | ins_next | ||
4146 | | | ||
4147 | |7: // RA is not an integer. | ||
4148 | | ja >3 | ||
4149 | | // RA is a number. | ||
4150 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 | ||
4151 | | // RA is a number, RD is an integer. | ||
4152 | |.if SSE | ||
4153 | | cvtsi2sd xmm0, dword [KBASE+RD*8] | ||
4154 | |.else | ||
4155 | | fild dword [KBASE+RD*8] | ||
4156 | |.endif | ||
4157 | | jmp >2 | ||
4158 | | | ||
4159 | |8: // RA is an integer, RD is a number. | ||
4160 | |.if SSE | ||
4161 | | cvtsi2sd xmm0, dword [BASE+RA*8] | ||
4162 | | ucomisd xmm0, qword [KBASE+RD*8] | ||
4163 | |.else | ||
4164 | | fild dword [BASE+RA*8] | ||
4165 | | fld qword [KBASE+RD*8] | ||
4166 | |.endif | ||
4167 | | jmp >4 | ||
4168 | |.else | ||
4169 | | cmp RB, LJ_TISNUM; jae >3 | ||
4170 | |.endif | ||
4171 | |.if SSE | ||
4172 | |1: | ||
4173 | | movsd xmm0, qword [KBASE+RD*8] | ||
4174 | |2: | ||
4175 | | ucomisd xmm0, qword [BASE+RA*8] | ||
4176 | |4: | ||
4177 | |.else | ||
4178 | |1: | ||
4179 | | fld qword [KBASE+RD*8] | ||
4180 | |2: | ||
4181 | | fld qword [BASE+RA*8] | ||
4182 | |4: | ||
4183 | | fcomparepp | ||
4184 | |.endif | ||
4250 | goto iseqne_fp; | 4185 | goto iseqne_fp; |
4251 | case BC_ISEQP: case BC_ISNEP: | 4186 | case BC_ISEQP: case BC_ISNEP: |
4252 | vk = op == BC_ISEQP; | 4187 | vk = op == BC_ISEQP; |
@@ -4322,59 +4257,59 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4322 | break; | 4257 | break; |
4323 | case BC_UNM: | 4258 | case BC_UNM: |
4324 | | ins_AD // RA = dst, RD = src | 4259 | | ins_AD // RA = dst, RD = src |
4325 | if (LJ_DUALNUM) { | 4260 | |.if DUALNUM |
4326 | | checkint RD, >5 | 4261 | | checkint RD, >5 |
4327 | | mov RB, [BASE+RD*8] | 4262 | | mov RB, [BASE+RD*8] |
4328 | | neg RB | 4263 | | neg RB |
4329 | | jo >4 | 4264 | | jo >4 |
4330 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 4265 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
4331 | | mov dword [BASE+RA*8], RB | 4266 | | mov dword [BASE+RA*8], RB |
4332 | |9: | 4267 | |9: |
4333 | | ins_next | 4268 | | ins_next |
4334 | |4: | 4269 | |4: |
4335 | | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31. | 4270 | | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31. |
4336 | | mov dword [BASE+RA*8], 0 | 4271 | | mov dword [BASE+RA*8], 0 |
4337 | | jmp <9 | 4272 | | jmp <9 |
4338 | |5: | 4273 | |5: |
4339 | | ja ->vmeta_unm | 4274 | | ja ->vmeta_unm |
4340 | } else { | 4275 | |.else |
4341 | | checknum RD, ->vmeta_unm | 4276 | | checknum RD, ->vmeta_unm |
4342 | } | 4277 | |.endif |
4343 | if (sse) { | 4278 | |.if SSE |
4344 | | movsd xmm0, qword [BASE+RD*8] | 4279 | | movsd xmm0, qword [BASE+RD*8] |
4345 | | sseconst_sign xmm1, RDa | 4280 | | sseconst_sign xmm1, RDa |
4346 | | xorps xmm0, xmm1 | 4281 | | xorps xmm0, xmm1 |
4347 | | movsd qword [BASE+RA*8], xmm0 | 4282 | | movsd qword [BASE+RA*8], xmm0 |
4348 | } else { | 4283 | |.else |
4349 | | fld qword [BASE+RD*8] | 4284 | | fld qword [BASE+RD*8] |
4350 | | fchs | 4285 | | fchs |
4351 | | fstp qword [BASE+RA*8] | 4286 | | fstp qword [BASE+RA*8] |
4352 | } | 4287 | |.endif |
4353 | if (LJ_DUALNUM) { | 4288 | |.if DUALNUM |
4354 | | jmp <9 | 4289 | | jmp <9 |
4355 | } else { | 4290 | |.else |
4356 | | ins_next | 4291 | | ins_next |
4357 | } | 4292 | |.endif |
4358 | break; | 4293 | break; |
4359 | case BC_LEN: | 4294 | case BC_LEN: |
4360 | | ins_AD // RA = dst, RD = src | 4295 | | ins_AD // RA = dst, RD = src |
4361 | | checkstr RD, >2 | 4296 | | checkstr RD, >2 |
4362 | | mov STR:RD, [BASE+RD*8] | 4297 | | mov STR:RD, [BASE+RD*8] |
4363 | if (LJ_DUALNUM) { | 4298 | |.if DUALNUM |
4364 | | mov RD, dword STR:RD->len | 4299 | | mov RD, dword STR:RD->len |
4365 | |1: | 4300 | |1: |
4366 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 4301 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
4367 | | mov dword [BASE+RA*8], RD | 4302 | | mov dword [BASE+RA*8], RD |
4368 | } else if (sse) { | 4303 | |.elif SSE |
4369 | | xorps xmm0, xmm0 | 4304 | | xorps xmm0, xmm0 |
4370 | | cvtsi2sd xmm0, dword STR:RD->len | 4305 | | cvtsi2sd xmm0, dword STR:RD->len |
4371 | |1: | 4306 | |1: |
4372 | | movsd qword [BASE+RA*8], xmm0 | 4307 | | movsd qword [BASE+RA*8], xmm0 |
4373 | } else { | 4308 | |.else |
4374 | | fild dword STR:RD->len | 4309 | | fild dword STR:RD->len |
4375 | |1: | 4310 | |1: |
4376 | | fstp qword [BASE+RA*8] | 4311 | | fstp qword [BASE+RA*8] |
4377 | } | 4312 | |.endif |
4378 | | ins_next | 4313 | | ins_next |
4379 | |2: | 4314 | |2: |
4380 | | checktab RD, ->vmeta_len | 4315 | | checktab RD, ->vmeta_len |
@@ -4389,16 +4324,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4389 | | mov RB, BASE // Save BASE. | 4324 | | mov RB, BASE // Save BASE. |
4390 | | call extern lj_tab_len@4 // (GCtab *t) | 4325 | | call extern lj_tab_len@4 // (GCtab *t) |
4391 | | // Length of table returned in eax (RD). | 4326 | | // Length of table returned in eax (RD). |
4392 | if (LJ_DUALNUM) { | 4327 | |.if DUALNUM |
4393 | | // Nothing to do. | 4328 | | // Nothing to do. |
4394 | } else if (sse) { | 4329 | |.elif SSE |
4395 | | cvtsi2sd xmm0, RD | 4330 | | cvtsi2sd xmm0, RD |
4396 | } else { | 4331 | |.else |
4397 | |.if not X64 | 4332 | | mov ARG1, RD |
4398 | | mov ARG1, RD | 4333 | | fild ARG1 |
4399 | | fild ARG1 | 4334 | |.endif |
4400 | |.endif | ||
4401 | } | ||
4402 | | mov BASE, RB // Restore BASE. | 4335 | | mov BASE, RB // Restore BASE. |
4403 | | movzx RA, PC_RA | 4336 | | movzx RA, PC_RA |
4404 | | jmp <1 | 4337 | | jmp <1 |
@@ -4418,40 +4351,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4418 | ||switch (vk) { | 4351 | ||switch (vk) { |
4419 | ||case 0: | 4352 | ||case 0: |
4420 | | checknum RB, ->vmeta_arith_vn | 4353 | | checknum RB, ->vmeta_arith_vn |
4421 | ||if (LJ_DUALNUM) { | 4354 | | .if DUALNUM |
4422 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn | 4355 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn |
4423 | ||} | 4356 | | .endif |
4424 | ||if (sse) { | 4357 | | .if SSE |
4425 | | movsd xmm0, qword [BASE+RB*8] | 4358 | | movsd xmm0, qword [BASE+RB*8] |
4426 | | sseins ssereg, qword [KBASE+RC*8] | 4359 | | sseins ssereg, qword [KBASE+RC*8] |
4427 | ||} else { | 4360 | | .else |
4428 | | fld qword [BASE+RB*8] | 4361 | | fld qword [BASE+RB*8] |
4429 | | x87ins qword [KBASE+RC*8] | 4362 | | x87ins qword [KBASE+RC*8] |
4430 | ||} | 4363 | | .endif |
4431 | || break; | 4364 | || break; |
4432 | ||case 1: | 4365 | ||case 1: |
4433 | | checknum RB, ->vmeta_arith_nv | 4366 | | checknum RB, ->vmeta_arith_nv |
4434 | ||if (LJ_DUALNUM) { | 4367 | | .if DUALNUM |
4435 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv | 4368 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv |
4436 | ||} | 4369 | | .endif |
4437 | ||if (sse) { | 4370 | | .if SSE |
4438 | | movsd xmm0, qword [KBASE+RC*8] | 4371 | | movsd xmm0, qword [KBASE+RC*8] |
4439 | | sseins ssereg, qword [BASE+RB*8] | 4372 | | sseins ssereg, qword [BASE+RB*8] |
4440 | ||} else { | 4373 | | .else |
4441 | | fld qword [KBASE+RC*8] | 4374 | | fld qword [KBASE+RC*8] |
4442 | | x87ins qword [BASE+RB*8] | 4375 | | x87ins qword [BASE+RB*8] |
4443 | ||} | 4376 | | .endif |
4444 | || break; | 4377 | || break; |
4445 | ||default: | 4378 | ||default: |
4446 | | checknum RB, ->vmeta_arith_vv | 4379 | | checknum RB, ->vmeta_arith_vv |
4447 | | checknum RC, ->vmeta_arith_vv | 4380 | | checknum RC, ->vmeta_arith_vv |
4448 | ||if (sse) { | 4381 | | .if SSE |
4449 | | movsd xmm0, qword [BASE+RB*8] | 4382 | | movsd xmm0, qword [BASE+RB*8] |
4450 | | sseins ssereg, qword [BASE+RC*8] | 4383 | | sseins ssereg, qword [BASE+RC*8] |
4451 | ||} else { | 4384 | | .else |
4452 | | fld qword [BASE+RB*8] | 4385 | | fld qword [BASE+RB*8] |
4453 | | x87ins qword [BASE+RC*8] | 4386 | | x87ins qword [BASE+RC*8] |
4454 | ||} | 4387 | | .endif |
4455 | || break; | 4388 | || break; |
4456 | ||} | 4389 | ||} |
4457 | |.endmacro | 4390 | |.endmacro |
@@ -4489,11 +4422,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4489 | |.endmacro | 4422 | |.endmacro |
4490 | | | 4423 | | |
4491 | |.macro ins_arithpost | 4424 | |.macro ins_arithpost |
4492 | ||if (sse) { | 4425 | |.if SSE |
4493 | | movsd qword [BASE+RA*8], xmm0 | 4426 | | movsd qword [BASE+RA*8], xmm0 |
4494 | ||} else { | 4427 | |.else |
4495 | | fstp qword [BASE+RA*8] | 4428 | | fstp qword [BASE+RA*8] |
4496 | ||} | 4429 | |.endif |
4497 | |.endmacro | 4430 | |.endmacro |
4498 | | | 4431 | | |
4499 | |.macro ins_arith, x87ins, sseins | 4432 | |.macro ins_arith, x87ins, sseins |
@@ -4503,11 +4436,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4503 | |.endmacro | 4436 | |.endmacro |
4504 | | | 4437 | | |
4505 | |.macro ins_arith, intins, x87ins, sseins | 4438 | |.macro ins_arith, intins, x87ins, sseins |
4506 | ||if (LJ_DUALNUM) { | 4439 | |.if DUALNUM |
4507 | | ins_arithdn intins | 4440 | | ins_arithdn intins |
4508 | ||} else { | 4441 | |.else |
4509 | | ins_arith, x87ins, sseins | 4442 | | ins_arith, x87ins, sseins |
4510 | ||} | 4443 | |.endif |
4511 | |.endmacro | 4444 | |.endmacro |
4512 | 4445 | ||
4513 | | // RA = dst, RB = src1 or num const, RC = src2 or num const | 4446 | | // RA = dst, RB = src1 or num const, RC = src2 or num const |
@@ -4591,39 +4524,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4591 | | ins_next | 4524 | | ins_next |
4592 | break; | 4525 | break; |
4593 | case BC_KCDATA: | 4526 | case BC_KCDATA: |
4594 | #if LJ_HASFFI | 4527 | |.if FFI |
4595 | | ins_AND // RA = dst, RD = cdata const (~) | 4528 | | ins_AND // RA = dst, RD = cdata const (~) |
4596 | | mov RD, [KBASE+RD*4] | 4529 | | mov RD, [KBASE+RD*4] |
4597 | | mov dword [BASE+RA*8+4], LJ_TCDATA | 4530 | | mov dword [BASE+RA*8+4], LJ_TCDATA |
4598 | | mov [BASE+RA*8], RD | 4531 | | mov [BASE+RA*8], RD |
4599 | | ins_next | 4532 | | ins_next |
4600 | #endif | 4533 | |.endif |
4601 | break; | 4534 | break; |
4602 | case BC_KSHORT: | 4535 | case BC_KSHORT: |
4603 | | ins_AD // RA = dst, RD = signed int16 literal | 4536 | | ins_AD // RA = dst, RD = signed int16 literal |
4604 | if (LJ_DUALNUM) { | 4537 | |.if DUALNUM |
4605 | | movsx RD, RDW | 4538 | | movsx RD, RDW |
4606 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 4539 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
4607 | | mov dword [BASE+RA*8], RD | 4540 | | mov dword [BASE+RA*8], RD |
4608 | } else if (sse) { | 4541 | |.elif SSE |
4609 | | movsx RD, RDW // Sign-extend literal. | 4542 | | movsx RD, RDW // Sign-extend literal. |
4610 | | cvtsi2sd xmm0, RD | 4543 | | cvtsi2sd xmm0, RD |
4611 | | movsd qword [BASE+RA*8], xmm0 | 4544 | | movsd qword [BASE+RA*8], xmm0 |
4612 | } else { | 4545 | |.else |
4613 | | fild PC_RD // Refetch signed RD from instruction. | 4546 | | fild PC_RD // Refetch signed RD from instruction. |
4614 | | fstp qword [BASE+RA*8] | 4547 | | fstp qword [BASE+RA*8] |
4615 | } | 4548 | |.endif |
4616 | | ins_next | 4549 | | ins_next |
4617 | break; | 4550 | break; |
4618 | case BC_KNUM: | 4551 | case BC_KNUM: |
4619 | | ins_AD // RA = dst, RD = num const | 4552 | | ins_AD // RA = dst, RD = num const |
4620 | if (sse) { | 4553 | |.if SSE |
4621 | | movsd xmm0, qword [KBASE+RD*8] | 4554 | | movsd xmm0, qword [KBASE+RD*8] |
4622 | | movsd qword [BASE+RA*8], xmm0 | 4555 | | movsd qword [BASE+RA*8], xmm0 |
4623 | } else { | 4556 | |.else |
4624 | | fld qword [KBASE+RD*8] | 4557 | | fld qword [KBASE+RD*8] |
4625 | | fstp qword [BASE+RA*8] | 4558 | | fstp qword [BASE+RA*8] |
4626 | } | 4559 | |.endif |
4627 | | ins_next | 4560 | | ins_next |
4628 | break; | 4561 | break; |
4629 | case BC_KPRI: | 4562 | case BC_KPRI: |
@@ -4730,18 +4663,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4730 | case BC_USETN: | 4663 | case BC_USETN: |
4731 | | ins_AD // RA = upvalue #, RD = num const | 4664 | | ins_AD // RA = upvalue #, RD = num const |
4732 | | mov LFUNC:RB, [BASE-8] | 4665 | | mov LFUNC:RB, [BASE-8] |
4733 | if (sse) { | 4666 | |.if SSE |
4734 | | movsd xmm0, qword [KBASE+RD*8] | 4667 | | movsd xmm0, qword [KBASE+RD*8] |
4735 | } else { | 4668 | |.else |
4736 | | fld qword [KBASE+RD*8] | 4669 | | fld qword [KBASE+RD*8] |
4737 | } | 4670 | |.endif |
4738 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | 4671 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] |
4739 | | mov RA, UPVAL:RB->v | 4672 | | mov RA, UPVAL:RB->v |
4740 | if (sse) { | 4673 | |.if SSE |
4741 | | movsd qword [RA], xmm0 | 4674 | | movsd qword [RA], xmm0 |
4742 | } else { | 4675 | |.else |
4743 | | fstp qword [RA] | 4676 | | fstp qword [RA] |
4744 | } | 4677 | |.endif |
4745 | | ins_next | 4678 | | ins_next |
4746 | break; | 4679 | break; |
4747 | case BC_USETP: | 4680 | case BC_USETP: |
@@ -4889,28 +4822,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4889 | | mov TAB:RB, [BASE+RB*8] | 4822 | | mov TAB:RB, [BASE+RB*8] |
4890 | | | 4823 | | |
4891 | | // Integer key? | 4824 | | // Integer key? |
4892 | if (LJ_DUALNUM) { | 4825 | |.if DUALNUM |
4893 | | checkint RC, >5 | 4826 | | checkint RC, >5 |
4894 | | mov RC, dword [BASE+RC*8] | 4827 | | mov RC, dword [BASE+RC*8] |
4895 | } else { | 4828 | |.else |
4896 | | // Convert number to int and back and compare. | 4829 | | // Convert number to int and back and compare. |
4897 | | checknum RC, >5 | 4830 | | checknum RC, >5 |
4898 | if (sse) { | 4831 | |.if SSE |
4899 | | movsd xmm0, qword [BASE+RC*8] | 4832 | | movsd xmm0, qword [BASE+RC*8] |
4900 | | cvtsd2si RC, xmm0 | 4833 | | cvtsd2si RC, xmm0 |
4901 | | cvtsi2sd xmm1, RC | 4834 | | cvtsi2sd xmm1, RC |
4902 | | ucomisd xmm0, xmm1 | 4835 | | ucomisd xmm0, xmm1 |
4903 | } else { | 4836 | |.else |
4904 | |.if not X64 | 4837 | | fld qword [BASE+RC*8] |
4905 | | fld qword [BASE+RC*8] | 4838 | | fist ARG1 |
4906 | | fist ARG1 | 4839 | | fild ARG1 |
4907 | | fild ARG1 | 4840 | | fcomparepp |
4908 | | fcomparepp // eax (RC) modified! | 4841 | | mov RC, ARG1 |
4909 | | mov RC, ARG1 | 4842 | |.endif |
4910 | |.endif | 4843 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. |
4911 | } | 4844 | |.endif |
4912 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. | ||
4913 | } | ||
4914 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. | 4845 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. |
4915 | | jae ->vmeta_tgetv // Not in array part? Use fallback. | 4846 | | jae ->vmeta_tgetv // Not in array part? Use fallback. |
4916 | | shl RC, 3 | 4847 | | shl RC, 3 |
@@ -5039,28 +4970,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
5039 | | mov TAB:RB, [BASE+RB*8] | 4970 | | mov TAB:RB, [BASE+RB*8] |
5040 | | | 4971 | | |
5041 | | // Integer key? | 4972 | | // Integer key? |
5042 | if (LJ_DUALNUM) { | 4973 | |.if DUALNUM |
5043 | | checkint RC, >5 | 4974 | | checkint RC, >5 |
5044 | | mov RC, dword [BASE+RC*8] | 4975 | | mov RC, dword [BASE+RC*8] |
5045 | } else { | 4976 | |.else |
5046 | | // Convert number to int and back and compare. | 4977 | | // Convert number to int and back and compare. |
5047 | | checknum RC, >5 | 4978 | | checknum RC, >5 |
5048 | if (sse) { | 4979 | |.if SSE |
5049 | | movsd xmm0, qword [BASE+RC*8] | 4980 | | movsd xmm0, qword [BASE+RC*8] |
5050 | | cvtsd2si RC, xmm0 | 4981 | | cvtsd2si RC, xmm0 |
5051 | | cvtsi2sd xmm1, RC | 4982 | | cvtsi2sd xmm1, RC |
5052 | | ucomisd xmm0, xmm1 | 4983 | | ucomisd xmm0, xmm1 |
5053 | } else { | 4984 | |.else |
5054 | |.if not X64 | 4985 | | fld qword [BASE+RC*8] |
5055 | | fld qword [BASE+RC*8] | 4986 | | fist ARG1 |
5056 | | fist ARG1 | 4987 | | fild ARG1 |
5057 | | fild ARG1 | 4988 | | fcomparepp |
5058 | | fcomparepp // eax (RC) modified! | 4989 | | mov RC, ARG1 |
5059 | | mov RC, ARG1 | 4990 | |.endif |
5060 | |.endif | 4991 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. |
5061 | } | 4992 | |.endif |
5062 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. | ||
5063 | } | ||
5064 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. | 4993 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. |
5065 | | jae ->vmeta_tsetv | 4994 | | jae ->vmeta_tsetv |
5066 | | shl RC, 3 | 4995 | | shl RC, 3 |
@@ -5406,9 +5335,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
5406 | 5335 | ||
5407 | case BC_ITERN: | 5336 | case BC_ITERN: |
5408 | | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | 5337 | | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) |
5409 | #if LJ_HASJIT | 5338 | |.if JIT |
5410 | | // NYI: add hotloop, record BC_ITERN. | 5339 | | // NYI: add hotloop, record BC_ITERN. |
5411 | #endif | 5340 | |.endif |
5412 | | mov TMP1, KBASE // Need two more free registers. | 5341 | | mov TMP1, KBASE // Need two more free registers. |
5413 | | mov TMP2, DISPATCH | 5342 | | mov TMP2, DISPATCH |
5414 | | mov TAB:RB, [BASE+RA*8-16] | 5343 | | mov TAB:RB, [BASE+RA*8-16] |
@@ -5419,14 +5348,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
5419 | |1: // Traverse array part. | 5348 | |1: // Traverse array part. |
5420 | | cmp RC, DISPATCH; jae >5 // Index points after array part? | 5349 | | cmp RC, DISPATCH; jae >5 // Index points after array part? |
5421 | | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4 | 5350 | | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4 |
5422 | if (LJ_DUALNUM) { | 5351 | |.if DUALNUM |
5423 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 5352 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
5424 | | mov dword [BASE+RA*8], RC | 5353 | | mov dword [BASE+RA*8], RC |
5425 | } else if (sse) { | 5354 | |.elif SSE |
5426 | | cvtsi2sd xmm0, RC | 5355 | | cvtsi2sd xmm0, RC |
5427 | } else { | 5356 | |.else |
5428 | | fild dword [BASE+RA*8-8] | 5357 | | fild dword [BASE+RA*8-8] |
5429 | } | 5358 | |.endif |
5430 | | // Copy array slot to returned value. | 5359 | | // Copy array slot to returned value. |
5431 | |.if X64 | 5360 | |.if X64 |
5432 | | mov RBa, [KBASE+RC*8] | 5361 | | mov RBa, [KBASE+RC*8] |
@@ -5439,13 +5368,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
5439 | |.endif | 5368 | |.endif |
5440 | | add RC, 1 | 5369 | | add RC, 1 |
5441 | | // Return array index as a numeric key. | 5370 | | // Return array index as a numeric key. |
5442 | if (LJ_DUALNUM) { | 5371 | |.if DUALNUM |
5443 | | // See above. | 5372 | | // See above. |
5444 | } else if (sse) { | 5373 | |.elif SSE |
5445 | | movsd qword [BASE+RA*8], xmm0 | 5374 | | movsd qword [BASE+RA*8], xmm0 |
5446 | } else { | 5375 | |.else |
5447 | | fstp qword [BASE+RA*8] | 5376 | | fstp qword [BASE+RA*8] |
5448 | } | 5377 | |.endif |
5449 | | mov [BASE+RA*8-8], RC // Update control var. | 5378 | | mov [BASE+RA*8-8], RC // Update control var. |
5450 | |2: | 5379 | |2: |
5451 | | movzx RD, PC_RD // Get target from ITERL. | 5380 | | movzx RD, PC_RD // Get target from ITERL. |
@@ -5457,9 +5386,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
5457 | | | 5386 | | |
5458 | |4: // Skip holes in array part. | 5387 | |4: // Skip holes in array part. |
5459 | | add RC, 1 | 5388 | | add RC, 1 |
5460 | if (!LJ_DUALNUM && !sse) { | 5389 | |.if not (DUALNUM or SSE) |
5461 | | mov [BASE+RA*8-8], RC | 5390 | | mov [BASE+RA*8-8], RC |
5462 | } | 5391 | |.endif |
5463 | | jmp <1 | 5392 | | jmp <1 |
5464 | | | 5393 | | |
5465 | |5: // Traverse hash part. | 5394 | |5: // Traverse hash part. |
@@ -5695,9 +5624,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
5695 | |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28] | 5624 | |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28] |
5696 | 5625 | ||
5697 | case BC_FORL: | 5626 | case BC_FORL: |
5698 | #if LJ_HASJIT | 5627 | |.if JIT |
5699 | | hotloop RB | 5628 | | hotloop RB |
5700 | #endif | 5629 | |.endif |
5701 | | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. | 5630 | | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. |
5702 | break; | 5631 | break; |
5703 | 5632 | ||
@@ -5792,76 +5721,73 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
5792 | if (!vk) { | 5721 | if (!vk) { |
5793 | | cmp RB, LJ_TISNUM; jae ->vmeta_for | 5722 | | cmp RB, LJ_TISNUM; jae ->vmeta_for |
5794 | } | 5723 | } |
5795 | if (sse) { | 5724 | |.if SSE |
5796 | | movsd xmm0, qword FOR_IDX | 5725 | | movsd xmm0, qword FOR_IDX |
5797 | | movsd xmm1, qword FOR_STOP | 5726 | | movsd xmm1, qword FOR_STOP |
5798 | if (vk) { | 5727 | if (vk) { |
5799 | | addsd xmm0, qword FOR_STEP | 5728 | | addsd xmm0, qword FOR_STEP |
5800 | | movsd qword FOR_IDX, xmm0 | 5729 | | movsd qword FOR_IDX, xmm0 |
5801 | | test RB, RB; js >3 | 5730 | | test RB, RB; js >3 |
5802 | } else { | ||
5803 | | jl >3 | ||
5804 | } | ||
5805 | | ucomisd xmm1, xmm0 | ||
5806 | |1: | ||
5807 | | movsd qword FOR_EXT, xmm0 | ||
5808 | } else { | 5731 | } else { |
5809 | | fld qword FOR_STOP | 5732 | | jl >3 |
5810 | | fld qword FOR_IDX | 5733 | } |
5811 | if (vk) { | 5734 | | ucomisd xmm1, xmm0 |
5812 | | fadd qword FOR_STEP // nidx = idx + step | 5735 | |1: |
5813 | | fst qword FOR_IDX | 5736 | | movsd qword FOR_EXT, xmm0 |
5814 | | fst qword FOR_EXT | 5737 | |.else |
5815 | | test RB, RB; js >1 | 5738 | | fld qword FOR_STOP |
5816 | } else { | 5739 | | fld qword FOR_IDX |
5817 | | fst qword FOR_EXT | 5740 | if (vk) { |
5818 | | jl >1 | 5741 | | fadd qword FOR_STEP // nidx = idx + step |
5819 | } | 5742 | | fst qword FOR_IDX |
5820 | | fxch // Swap lim/(n)idx if step non-negative. | 5743 | | fst qword FOR_EXT |
5821 | |1: | 5744 | | test RB, RB; js >1 |
5822 | | fcomparepp // eax (RD) modified if !cmov. | 5745 | } else { |
5823 | if (!cmov) { | 5746 | | fst qword FOR_EXT |
5824 | | movzx RD, PC_RD // Need to reload RD. | 5747 | | jl >1 |
5825 | } | ||
5826 | } | 5748 | } |
5749 | | fxch // Swap lim/(n)idx if step non-negative. | ||
5750 | |1: | ||
5751 | | fcomparepp | ||
5752 | |.endif | ||
5827 | if (op == BC_FORI) { | 5753 | if (op == BC_FORI) { |
5828 | if (LJ_DUALNUM) { | 5754 | |.if DUALNUM |
5829 | | jnb <7 | 5755 | | jnb <7 |
5830 | } else { | 5756 | |.else |
5831 | | jnb >2 | 5757 | | jnb >2 |
5832 | | branchPC RD | 5758 | | branchPC RD |
5833 | } | 5759 | |.endif |
5834 | } else if (op == BC_JFORI) { | 5760 | } else if (op == BC_JFORI) { |
5835 | | branchPC RD | 5761 | | branchPC RD |
5836 | | movzx RD, PC_RD | 5762 | | movzx RD, PC_RD |
5837 | | jnb =>BC_JLOOP | 5763 | | jnb =>BC_JLOOP |
5838 | } else if (op == BC_IFORL) { | 5764 | } else if (op == BC_IFORL) { |
5839 | if (LJ_DUALNUM) { | 5765 | |.if DUALNUM |
5840 | | jb <7 | 5766 | | jb <7 |
5841 | } else { | 5767 | |.else |
5842 | | jb >2 | 5768 | | jb >2 |
5843 | | branchPC RD | 5769 | | branchPC RD |
5844 | } | 5770 | |.endif |
5845 | } else { | 5771 | } else { |
5846 | | jnb =>BC_JLOOP | 5772 | | jnb =>BC_JLOOP |
5847 | } | 5773 | } |
5848 | if (LJ_DUALNUM) { | 5774 | |.if DUALNUM |
5849 | | jmp <6 | 5775 | | jmp <6 |
5850 | } else { | 5776 | |.else |
5851 | |2: | 5777 | |2: |
5852 | | ins_next | 5778 | | ins_next |
5853 | } | 5779 | |.endif |
5854 | if (sse) { | 5780 | |.if SSE |
5855 | |3: // Invert comparison if step is negative. | 5781 | |3: // Invert comparison if step is negative. |
5856 | | ucomisd xmm0, xmm1 | 5782 | | ucomisd xmm0, xmm1 |
5857 | | jmp <1 | 5783 | | jmp <1 |
5858 | } | 5784 | |.endif |
5859 | break; | 5785 | break; |
5860 | 5786 | ||
5861 | case BC_ITERL: | 5787 | case BC_ITERL: |
5862 | #if LJ_HASJIT | 5788 | |.if JIT |
5863 | | hotloop RB | 5789 | | hotloop RB |
5864 | #endif | 5790 | |.endif |
5865 | | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. | 5791 | | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. |
5866 | break; | 5792 | break; |
5867 | 5793 | ||
@@ -5893,9 +5819,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
5893 | | ins_A // RA = base, RD = target (loop extent) | 5819 | | ins_A // RA = base, RD = target (loop extent) |
5894 | | // Note: RA/RD is only used by trace recorder to determine scope/extent | 5820 | | // Note: RA/RD is only used by trace recorder to determine scope/extent |
5895 | | // This opcode does NOT jump, it's only purpose is to detect a hot loop. | 5821 | | // This opcode does NOT jump, it's only purpose is to detect a hot loop. |
5896 | #if LJ_HASJIT | 5822 | |.if JIT |
5897 | | hotloop RB | 5823 | | hotloop RB |
5898 | #endif | 5824 | |.endif |
5899 | | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. | 5825 | | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. |
5900 | break; | 5826 | break; |
5901 | 5827 | ||
@@ -5905,7 +5831,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
5905 | break; | 5831 | break; |
5906 | 5832 | ||
5907 | case BC_JLOOP: | 5833 | case BC_JLOOP: |
5908 | #if LJ_HASJIT | 5834 | |.if JIT |
5909 | | ins_AD // RA = base (ignored), RD = traceno | 5835 | | ins_AD // RA = base (ignored), RD = traceno |
5910 | | mov RA, [DISPATCH+DISPATCH_J(trace)] | 5836 | | mov RA, [DISPATCH+DISPATCH_J(trace)] |
5911 | | mov TRACE:RD, [RA+RD*4] | 5837 | | mov TRACE:RD, [RA+RD*4] |
@@ -5937,7 +5863,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
5937 | | sub rsp, 16 | 5863 | | sub rsp, 16 |
5938 | |.endif | 5864 | |.endif |
5939 | | jmp RDa | 5865 | | jmp RDa |
5940 | #endif | 5866 | |.endif |
5941 | break; | 5867 | break; |
5942 | 5868 | ||
5943 | case BC_JMP: | 5869 | case BC_JMP: |
@@ -5956,9 +5882,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
5956 | */ | 5882 | */ |
5957 | 5883 | ||
5958 | case BC_FUNCF: | 5884 | case BC_FUNCF: |
5959 | #if LJ_HASJIT | 5885 | |.if JIT |
5960 | | hotcall RB | 5886 | | hotcall RB |
5961 | #endif | 5887 | |.endif |
5962 | case BC_FUNCV: /* NYI: compiled vararg functions. */ | 5888 | case BC_FUNCV: /* NYI: compiled vararg functions. */ |
5963 | | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. | 5889 | | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. |
5964 | break; | 5890 | break; |
@@ -6101,23 +6027,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
6101 | static int build_backend(BuildCtx *ctx) | 6027 | static int build_backend(BuildCtx *ctx) |
6102 | { | 6028 | { |
6103 | int op; | 6029 | int op; |
6104 | int cmov = 1; | ||
6105 | int sse = 0; | ||
6106 | #ifdef LUAJIT_CPU_NOCMOV | ||
6107 | cmov = 0; | ||
6108 | #endif | ||
6109 | #if defined(LUAJIT_CPU_SSE2) || defined(LJ_TARGET_X64) | ||
6110 | sse = 1; | ||
6111 | #endif | ||
6112 | |||
6113 | dasm_growpc(Dst, BC__MAX); | 6030 | dasm_growpc(Dst, BC__MAX); |
6114 | 6031 | build_subroutines(ctx); | |
6115 | build_subroutines(ctx, cmov, sse); | ||
6116 | |||
6117 | |.code_op | 6032 | |.code_op |
6118 | for (op = 0; op < BC__MAX; op++) | 6033 | for (op = 0; op < BC__MAX; op++) |
6119 | build_ins(ctx, (BCOp)op, op, cmov, sse); | 6034 | build_ins(ctx, (BCOp)op, op); |
6120 | |||
6121 | return BC__MAX; | 6035 | return BC__MAX; |
6122 | } | 6036 | } |
6123 | 6037 | ||