diff options
| author | Mike Pall <mike> | 2012-06-10 16:44:33 +0200 |
|---|---|---|
| committer | Mike Pall <mike> | 2012-06-10 16:50:46 +0200 |
| commit | 58ec704f78e311e6af97841a9e26cd7187955494 (patch) | |
| tree | c66d9aedcbd7ed7945573b571c4e2737050e31b3 /src | |
| parent | e496a502b0686af25053c161752c044074edc44e (diff) | |
| download | luajit-58ec704f78e311e6af97841a9e26cd7187955494.tar.gz luajit-58ec704f78e311e6af97841a9e26cd7187955494.tar.bz2 luajit-58ec704f78e311e6af97841a9e26cd7187955494.zip | |
x86/x64: Clean up interpreter.
Use DynASM defines instead of C defines.
Remove support for ancient CPUs without CMOV (before Pentium Pro).
Diffstat (limited to 'src')
| -rw-r--r-- | src/Makefile | 19 | ||||
| -rw-r--r-- | src/msvcbuild.bat | 7 | ||||
| -rw-r--r-- | src/vm_x86.dasc | 2226 |
3 files changed, 1076 insertions, 1176 deletions
diff --git a/src/Makefile b/src/Makefile index d9bb178b..9d21c3fb 100644 --- a/src/Makefile +++ b/src/Makefile | |||
| @@ -42,9 +42,8 @@ CCOPT= -O2 -fomit-frame-pointer | |||
| 42 | # | 42 | # |
| 43 | # Target-specific compiler options: | 43 | # Target-specific compiler options: |
| 44 | # | 44 | # |
| 45 | # x86 only: it's recommended to compile at least for i686. By default the | 45 | # x86 only: it's recommended to compile at least for i686. Better yet, |
| 46 | # assembler part of the interpreter makes use of CMOV/FCOMI*/FUCOMI* | 46 | # compile for an architecture that has SSE2, too (-msse -msse2). |
| 47 | # instructions, anyway. | ||
| 48 | # | 47 | # |
| 49 | # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute | 48 | # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute |
| 50 | # the binaries to a different machine you could also use: -march=native | 49 | # the binaries to a different machine you could also use: -march=native |
| @@ -105,20 +104,6 @@ XCFLAGS= | |||
| 105 | # Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter. | 104 | # Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter. |
| 106 | #XCFLAGS+= -DLUAJIT_DISABLE_JIT | 105 | #XCFLAGS+= -DLUAJIT_DISABLE_JIT |
| 107 | # | 106 | # |
| 108 | # x86 only: use SSE2 instead of x87 instructions in the interpreter | ||
| 109 | # (always enabled for x64). A pure interpreter built with this flag won't | ||
| 110 | # run on older CPUs (before P4 or K8). There isn't much of a speed | ||
| 111 | # difference, so this is not enabled by default. | ||
| 112 | # The JIT compiler is not affected by this flag. It always uses runtime | ||
| 113 | # CPU feature detection before emitting code for SSE2 up to SSE4.1. | ||
| 114 | #XCFLAGS+= -DLUAJIT_CPU_SSE2 | ||
| 115 | # | ||
| 116 | # x86 only: Disable the use of CMOV and FCOMI*/FUCOMI* instructions in the | ||
| 117 | # interpreter. Do this only if you intend to use REALLY ANCIENT CPUs | ||
| 118 | # (before Pentium Pro, or on the VIA C3). This generally slows down the | ||
| 119 | # interpreter. Don't bother if your OS wouldn't run on them, anyway. | ||
| 120 | #XCFLAGS+= -DLUAJIT_CPU_NOCMOV | ||
| 121 | # | ||
| 122 | # Some architectures (e.g. PPC) can use either single-number (1) or | 107 | # Some architectures (e.g. PPC) can use either single-number (1) or |
| 123 | # dual-number (2) mode. Uncomment one of these lines to override the | 108 | # dual-number (2) mode. Uncomment one of these lines to override the |
| 124 | # default mode. Please see LJ_ARCH_NUMMODE in lj_arch.h for details. | 109 | # default mode. Please see LJ_ARCH_NUMMODE in lj_arch.h for details. |
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat index ad6f2113..ca943a63 100644 --- a/src/msvcbuild.bat +++ b/src/msvcbuild.bat | |||
| @@ -29,15 +29,16 @@ | |||
| 29 | if exist minilua.exe.manifest^ | 29 | if exist minilua.exe.manifest^ |
| 30 | %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe | 30 | %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe |
| 31 | 31 | ||
| 32 | @set DASMFLAGS=-D X64 -D WIN | 32 | @set DASMFLAGS=-D WIN -D JIT -D FFI |
| 33 | @set DASMX64=-D X64 | ||
| 33 | @if defined CPU goto :XCPU | 34 | @if defined CPU goto :XCPU |
| 34 | @set CPU=%PROCESSOR_ARCHITECTURE% | 35 | @set CPU=%PROCESSOR_ARCHITECTURE% |
| 35 | :XCPU | 36 | :XCPU |
| 36 | @if "%CPU%"=="AMD64" goto :X64 | 37 | @if "%CPU%"=="AMD64" goto :X64 |
| 37 | @if "%CPU%"=="X64" goto :X64 | 38 | @if "%CPU%"=="X64" goto :X64 |
| 38 | @set DASMFLAGS=-D WIN | 39 | @set DASMX64= |
| 39 | :X64 | 40 | :X64 |
| 40 | minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc | 41 | minilua %DASM% -LN %DASMFLAGS% %DASMX64% -o host\buildvm_arch.h vm_x86.dasc |
| 41 | @if errorlevel 1 goto :BAD | 42 | @if errorlevel 1 goto :BAD |
| 42 | 43 | ||
| 43 | %LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c | 44 | %LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c |
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 1cab76eb..38b268d4 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc | |||
| @@ -50,7 +50,7 @@ | |||
| 50 | |.define RAH, ch | 50 | |.define RAH, ch |
| 51 | |.define RAL, cl | 51 | |.define RAL, cl |
| 52 | |.define RB, ebp // Must be ebp (C callee-save). | 52 | |.define RB, ebp // Must be ebp (C callee-save). |
| 53 | |.define RC, eax // Must be eax (fcomparepp and others). | 53 | |.define RC, eax // Must be eax. |
| 54 | |.define RCW, ax | 54 | |.define RCW, ax |
| 55 | |.define RCH, ah | 55 | |.define RCH, ah |
| 56 | |.define RCL, al | 56 | |.define RCL, al |
| @@ -366,16 +366,10 @@ | |||
| 366 | | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st | 366 | | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st |
| 367 | |.endmacro | 367 | |.endmacro |
| 368 | | | 368 | | |
| 369 | |// Annoying x87 stuff: support for two compare variants. | 369 | |// x87 compares. |
| 370 | |.macro fcomparepp // Compare and pop st0 >< st1. | 370 | |.macro fcomparepp // Compare and pop st0 >< st1. |
| 371 | ||if (cmov) { | ||
| 372 | | fucomip st1 | 371 | | fucomip st1 |
| 373 | | fpop | 372 | | fpop |
| 374 | ||} else { | ||
| 375 | | fucompp | ||
| 376 | | fnstsw ax // eax modified! | ||
| 377 | | sahf | ||
| 378 | ||} | ||
| 379 | |.endmacro | 373 | |.endmacro |
| 380 | | | 374 | | |
| 381 | |.macro fdup; fld st0; .endmacro | 375 | |.macro fdup; fld st0; .endmacro |
| @@ -426,7 +420,7 @@ | |||
| 426 | 420 | ||
| 427 | /* Generate subroutines used by opcodes and other parts of the VM. */ | 421 | /* Generate subroutines used by opcodes and other parts of the VM. */ |
| 428 | /* The .code_sub section should be last to help static branch prediction. */ | 422 | /* The .code_sub section should be last to help static branch prediction. */ |
| 429 | static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | 423 | static void build_subroutines(BuildCtx *ctx) |
| 430 | { | 424 | { |
| 431 | |.code_sub | 425 | |.code_sub |
| 432 | | | 426 | | |
| @@ -776,18 +770,18 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 776 | | mov PC, [RB-12] // Restore PC from [cont|PC]. | 770 | | mov PC, [RB-12] // Restore PC from [cont|PC]. |
| 777 | |.if X64 | 771 | |.if X64 |
| 778 | | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug. | 772 | | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug. |
| 779 | #if LJ_HASFFI | 773 | |.if FFI |
| 780 | | cmp RA, 1 | 774 | | cmp RA, 1 |
| 781 | | jbe >1 | 775 | | jbe >1 |
| 782 | #endif | 776 | |.endif |
| 783 | | lea KBASEa, qword [=>0] | 777 | | lea KBASEa, qword [=>0] |
| 784 | | add RAa, KBASEa | 778 | | add RAa, KBASEa |
| 785 | |.else | 779 | |.else |
| 786 | | mov RA, dword [RB-16] | 780 | | mov RA, dword [RB-16] |
| 787 | #if LJ_HASFFI | 781 | |.if FFI |
| 788 | | cmp RA, 1 | 782 | | cmp RA, 1 |
| 789 | | jbe >1 | 783 | | jbe >1 |
| 790 | #endif | 784 | |.endif |
| 791 | |.endif | 785 | |.endif |
| 792 | | mov LFUNC:KBASE, [BASE-8] | 786 | | mov LFUNC:KBASE, [BASE-8] |
| 793 | | mov KBASE, LFUNC:KBASE->pc | 787 | | mov KBASE, LFUNC:KBASE->pc |
| @@ -795,7 +789,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 795 | | // BASE = base, RC = result, RB = meta base | 789 | | // BASE = base, RC = result, RB = meta base |
| 796 | | jmp RAa // Jump to continuation. | 790 | | jmp RAa // Jump to continuation. |
| 797 | | | 791 | | |
| 798 | #if LJ_HASFFI | 792 | |.if FFI |
| 799 | |1: | 793 | |1: |
| 800 | | je ->cont_ffi_callback // cont = 1: return from FFI callback. | 794 | | je ->cont_ffi_callback // cont = 1: return from FFI callback. |
| 801 | | // cont = 0: Tail call from C function. | 795 | | // cont = 0: Tail call from C function. |
| @@ -803,7 +797,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 803 | | shr RB, 3 | 797 | | shr RB, 3 |
| 804 | | lea RD, [RB-1] | 798 | | lea RD, [RB-1] |
| 805 | | jmp ->vm_call_tail | 799 | | jmp ->vm_call_tail |
| 806 | #endif | 800 | |.endif |
| 807 | | | 801 | | |
| 808 | |->cont_cat: // BASE = base, RC = result, RB = mbase | 802 | |->cont_cat: // BASE = base, RC = result, RB = mbase |
| 809 | | movzx RA, PC_RB | 803 | | movzx RA, PC_RB |
| @@ -853,19 +847,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 853 | | | 847 | | |
| 854 | |->vmeta_tgetb: | 848 | |->vmeta_tgetb: |
| 855 | | movzx RC, PC_RC | 849 | | movzx RC, PC_RC |
| 856 | if (LJ_DUALNUM) { | 850 | |.if DUALNUM |
| 857 | | mov TMP2, LJ_TISNUM | 851 | | mov TMP2, LJ_TISNUM |
| 858 | | mov TMP1, RC | 852 | | mov TMP1, RC |
| 859 | } else if (sse) { | 853 | |.elif SSE |
| 860 | | cvtsi2sd xmm0, RC | 854 | | cvtsi2sd xmm0, RC |
| 861 | | movsd TMPQ, xmm0 | 855 | | movsd TMPQ, xmm0 |
| 862 | } else { | 856 | |.else |
| 863 | |.if not X64 | 857 | | mov ARG4, RC |
| 864 | | mov ARG4, RC | 858 | | fild ARG4 |
| 865 | | fild ARG4 | 859 | | fstp TMPQ |
| 866 | | fstp TMPQ | 860 | |.endif |
| 867 | |.endif | ||
| 868 | } | ||
| 869 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. | 861 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. |
| 870 | | jmp >1 | 862 | | jmp >1 |
| 871 | | | 863 | | |
| @@ -934,19 +926,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 934 | | | 926 | | |
| 935 | |->vmeta_tsetb: | 927 | |->vmeta_tsetb: |
| 936 | | movzx RC, PC_RC | 928 | | movzx RC, PC_RC |
| 937 | if (LJ_DUALNUM) { | 929 | |.if DUALNUM |
| 938 | | mov TMP2, LJ_TISNUM | 930 | | mov TMP2, LJ_TISNUM |
| 939 | | mov TMP1, RC | 931 | | mov TMP1, RC |
| 940 | } else if (sse) { | 932 | |.elif SSE |
| 941 | | cvtsi2sd xmm0, RC | 933 | | cvtsi2sd xmm0, RC |
| 942 | | movsd TMPQ, xmm0 | 934 | | movsd TMPQ, xmm0 |
| 943 | } else { | 935 | |.else |
| 944 | |.if not X64 | 936 | | mov ARG4, RC |
| 945 | | mov ARG4, RC | 937 | | fild ARG4 |
| 946 | | fild ARG4 | 938 | | fstp TMPQ |
| 947 | | fstp TMPQ | 939 | |.endif |
| 948 | |.endif | ||
| 949 | } | ||
| 950 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. | 940 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. |
| 951 | | jmp >1 | 941 | | jmp >1 |
| 952 | | | 942 | | |
| @@ -1093,7 +1083,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 1093 | | jmp <3 | 1083 | | jmp <3 |
| 1094 | | | 1084 | | |
| 1095 | |->vmeta_equal_cd: | 1085 | |->vmeta_equal_cd: |
| 1096 | #if LJ_HASFFI | 1086 | |.if FFI |
| 1097 | | sub PC, 4 | 1087 | | sub PC, 4 |
| 1098 | | mov L:RB, SAVE_L | 1088 | | mov L:RB, SAVE_L |
| 1099 | | mov L:RB->base, BASE | 1089 | | mov L:RB->base, BASE |
| @@ -1103,22 +1093,22 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 1103 | | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins) | 1093 | | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins) |
| 1104 | | // 0/1 or TValue * (metamethod) returned in eax (RC). | 1094 | | // 0/1 or TValue * (metamethod) returned in eax (RC). |
| 1105 | | jmp <3 | 1095 | | jmp <3 |
| 1106 | #endif | 1096 | |.endif |
| 1107 | | | 1097 | | |
| 1108 | |//-- Arithmetic metamethods --------------------------------------------- | 1098 | |//-- Arithmetic metamethods --------------------------------------------- |
| 1109 | | | 1099 | | |
| 1110 | |->vmeta_arith_vno: | 1100 | |->vmeta_arith_vno: |
| 1111 | #if LJ_DUALNUM | 1101 | |.if DUALNUM |
| 1112 | | movzx RB, PC_RB | 1102 | | movzx RB, PC_RB |
| 1113 | #endif | 1103 | |.endif |
| 1114 | |->vmeta_arith_vn: | 1104 | |->vmeta_arith_vn: |
| 1115 | | lea RC, [KBASE+RC*8] | 1105 | | lea RC, [KBASE+RC*8] |
| 1116 | | jmp >1 | 1106 | | jmp >1 |
| 1117 | | | 1107 | | |
| 1118 | |->vmeta_arith_nvo: | 1108 | |->vmeta_arith_nvo: |
| 1119 | #if LJ_DUALNUM | 1109 | |.if DUALNUM |
| 1120 | | movzx RC, PC_RC | 1110 | | movzx RC, PC_RC |
| 1121 | #endif | 1111 | |.endif |
| 1122 | |->vmeta_arith_nv: | 1112 | |->vmeta_arith_nv: |
| 1123 | | lea RC, [KBASE+RC*8] | 1113 | | lea RC, [KBASE+RC*8] |
| 1124 | | lea RB, [BASE+RB*8] | 1114 | | lea RB, [BASE+RB*8] |
| @@ -1131,9 +1121,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 1131 | | jmp >2 | 1121 | | jmp >2 |
| 1132 | | | 1122 | | |
| 1133 | |->vmeta_arith_vvo: | 1123 | |->vmeta_arith_vvo: |
| 1134 | #if LJ_DUALNUM | 1124 | |.if DUALNUM |
| 1135 | | movzx RB, PC_RB | 1125 | | movzx RB, PC_RB |
| 1136 | #endif | 1126 | |.endif |
| 1137 | |->vmeta_arith_vv: | 1127 | |->vmeta_arith_vv: |
| 1138 | | lea RC, [BASE+RC*8] | 1128 | | lea RC, [BASE+RC*8] |
| 1139 | |1: | 1129 | |1: |
| @@ -1374,11 +1364,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 1374 | | mov RC, ~LJ_TNUMX | 1364 | | mov RC, ~LJ_TNUMX |
| 1375 | | not RB | 1365 | | not RB |
| 1376 | | cmp RC, RB | 1366 | | cmp RC, RB |
| 1377 | ||if (cmov) { | ||
| 1378 | | cmova RC, RB | 1367 | | cmova RC, RB |
| 1379 | ||} else { | ||
| 1380 | | jbe >1; mov RC, RB; 1: | ||
| 1381 | ||} | ||
| 1382 | |2: | 1368 | |2: |
| 1383 | | mov CFUNC:RB, [BASE-8] | 1369 | | mov CFUNC:RB, [BASE-8] |
| 1384 | | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] | 1370 | | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] |
| @@ -1509,19 +1495,19 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 1509 | | // Only handles the number case inline (without a base argument). | 1495 | | // Only handles the number case inline (without a base argument). |
| 1510 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. | 1496 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. |
| 1511 | | cmp dword [BASE+4], LJ_TISNUM | 1497 | | cmp dword [BASE+4], LJ_TISNUM |
| 1512 | if (LJ_DUALNUM) { | 1498 | |.if DUALNUM |
| 1513 | | jne >1 | 1499 | | jne >1 |
| 1514 | | mov RB, dword [BASE]; jmp ->fff_resi | 1500 | | mov RB, dword [BASE]; jmp ->fff_resi |
| 1515 | |1: | 1501 | |1: |
| 1516 | | ja ->fff_fallback | 1502 | | ja ->fff_fallback |
| 1517 | } else { | 1503 | |.else |
| 1518 | | jae ->fff_fallback | 1504 | | jae ->fff_fallback |
| 1519 | } | 1505 | |.endif |
| 1520 | if (sse) { | 1506 | |.if SSE |
| 1521 | | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 | 1507 | | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 |
| 1522 | } else { | 1508 | |.else |
| 1523 | | fld qword [BASE]; jmp ->fff_resn | 1509 | | fld qword [BASE]; jmp ->fff_resn |
| 1524 | } | 1510 | |.endif |
| 1525 | | | 1511 | | |
| 1526 | |.ffunc_1 tostring | 1512 | |.ffunc_1 tostring |
| 1527 | | // Only handles the string or number case inline. | 1513 | | // Only handles the string or number case inline. |
| @@ -1545,11 +1531,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 1545 | | mov FCARG2, BASE // Otherwise: FCARG2 == BASE | 1531 | | mov FCARG2, BASE // Otherwise: FCARG2 == BASE |
| 1546 | |.endif | 1532 | |.endif |
| 1547 | | mov L:FCARG1, L:RB | 1533 | | mov L:FCARG1, L:RB |
| 1548 | if (LJ_DUALNUM) { | 1534 | |.if DUALNUM |
| 1549 | | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) | 1535 | | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) |
| 1550 | } else { | 1536 | |.else |
| 1551 | | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) | 1537 | | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) |
| 1552 | } | 1538 | |.endif |
| 1553 | | // GCstr returned in eax (RD). | 1539 | | // GCstr returned in eax (RD). |
| 1554 | | mov BASE, L:RB->base | 1540 | | mov BASE, L:RB->base |
| 1555 | | jmp <2 | 1541 | | jmp <2 |
| @@ -1628,33 +1614,31 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 1628 | |.ffunc_1 ipairs_aux | 1614 | |.ffunc_1 ipairs_aux |
| 1629 | | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback | 1615 | | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback |
| 1630 | | cmp dword [BASE+12], LJ_TISNUM | 1616 | | cmp dword [BASE+12], LJ_TISNUM |
| 1631 | if (LJ_DUALNUM) { | 1617 | |.if DUALNUM |
| 1632 | | jne ->fff_fallback | 1618 | | jne ->fff_fallback |
| 1633 | } else { | 1619 | |.else |
| 1634 | | jae ->fff_fallback | 1620 | | jae ->fff_fallback |
| 1635 | } | 1621 | |.endif |
| 1636 | | mov PC, [BASE-4] | 1622 | | mov PC, [BASE-4] |
| 1637 | if (LJ_DUALNUM) { | 1623 | |.if DUALNUM |
| 1638 | | mov RD, dword [BASE+8] | 1624 | | mov RD, dword [BASE+8] |
| 1639 | | add RD, 1 | 1625 | | add RD, 1 |
| 1640 | | mov dword [BASE-4], LJ_TISNUM | 1626 | | mov dword [BASE-4], LJ_TISNUM |
| 1641 | | mov dword [BASE-8], RD | 1627 | | mov dword [BASE-8], RD |
| 1642 | } else if (sse) { | 1628 | |.elif SSE |
| 1643 | | movsd xmm0, qword [BASE+8] | 1629 | | movsd xmm0, qword [BASE+8] |
| 1644 | | sseconst_1 xmm1, RBa | 1630 | | sseconst_1 xmm1, RBa |
| 1645 | | addsd xmm0, xmm1 | 1631 | | addsd xmm0, xmm1 |
| 1646 | | cvtsd2si RD, xmm0 | 1632 | | cvtsd2si RD, xmm0 |
| 1647 | | movsd qword [BASE-8], xmm0 | 1633 | | movsd qword [BASE-8], xmm0 |
| 1648 | } else { | 1634 | |.else |
| 1649 | |.if not X64 | 1635 | | fld qword [BASE+8] |
| 1650 | | fld qword [BASE+8] | 1636 | | fld1 |
| 1651 | | fld1 | 1637 | | faddp st1 |
| 1652 | | faddp st1 | 1638 | | fist ARG1 |
| 1653 | | fist ARG1 | 1639 | | fstp qword [BASE-8] |
| 1654 | | fstp qword [BASE-8] | 1640 | | mov RD, ARG1 |
| 1655 | | mov RD, ARG1 | 1641 | |.endif |
| 1656 | |.endif | ||
| 1657 | } | ||
| 1658 | | mov TAB:RB, [BASE] | 1642 | | mov TAB:RB, [BASE] |
| 1659 | | cmp RD, TAB:RB->asize; jae >2 // Not in array part? | 1643 | | cmp RD, TAB:RB->asize; jae >2 // Not in array part? |
| 1660 | | shl RD, 3 | 1644 | | shl RD, 3 |
| @@ -1697,16 +1681,16 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 1697 | | mov PC, [BASE-4] | 1681 | | mov PC, [BASE-4] |
| 1698 | | mov dword [BASE-4], LJ_TFUNC | 1682 | | mov dword [BASE-4], LJ_TFUNC |
| 1699 | | mov [BASE-8], CFUNC:RD | 1683 | | mov [BASE-8], CFUNC:RD |
| 1700 | if (LJ_DUALNUM) { | 1684 | |.if DUALNUM |
| 1701 | | mov dword [BASE+12], LJ_TISNUM | 1685 | | mov dword [BASE+12], LJ_TISNUM |
| 1702 | | mov dword [BASE+8], 0 | 1686 | | mov dword [BASE+8], 0 |
| 1703 | } else if (sse) { | 1687 | |.elif SSE |
| 1704 | | xorps xmm0, xmm0 | 1688 | | xorps xmm0, xmm0 |
| 1705 | | movsd qword [BASE+8], xmm0 | 1689 | | movsd qword [BASE+8], xmm0 |
| 1706 | } else { | 1690 | |.else |
| 1707 | | fldz | 1691 | | fldz |
| 1708 | | fstp qword [BASE+8] | 1692 | | fstp qword [BASE+8] |
| 1709 | } | 1693 | |.endif |
| 1710 | | mov RD, 1+3 | 1694 | | mov RD, 1+3 |
| 1711 | | jmp ->fff_res | 1695 | | jmp ->fff_res |
| 1712 | | | 1696 | | |
| @@ -1931,54 +1915,58 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 1931 | | | 1915 | | |
| 1932 | |//-- Math library ------------------------------------------------------- | 1916 | |//-- Math library ------------------------------------------------------- |
| 1933 | | | 1917 | | |
| 1934 | if (!LJ_DUALNUM) { | 1918 | |.if not DUALNUM |
| 1935 | |->fff_resi: // Dummy. | 1919 | |->fff_resi: // Dummy. |
| 1936 | } | 1920 | |.endif |
| 1937 | if (sse) { | 1921 | | |
| 1938 | |->fff_resn: | 1922 | |.if SSE |
| 1939 | | mov PC, [BASE-4] | 1923 | |->fff_resn: |
| 1940 | | fstp qword [BASE-8] | 1924 | | mov PC, [BASE-4] |
| 1941 | | jmp ->fff_res1 | 1925 | | fstp qword [BASE-8] |
| 1942 | } | 1926 | | jmp ->fff_res1 |
| 1927 | |.endif | ||
| 1928 | | | ||
| 1943 | | .ffunc_1 math_abs | 1929 | | .ffunc_1 math_abs |
| 1944 | if (LJ_DUALNUM) { | 1930 | |.if DUALNUM |
| 1945 | | cmp dword [BASE+4], LJ_TISNUM; jne >2 | 1931 | | cmp dword [BASE+4], LJ_TISNUM; jne >2 |
| 1946 | | mov RB, dword [BASE] | 1932 | | mov RB, dword [BASE] |
| 1947 | | cmp RB, 0; jns ->fff_resi | 1933 | | cmp RB, 0; jns ->fff_resi |
| 1948 | | neg RB; js >1 | 1934 | | neg RB; js >1 |
| 1949 | |->fff_resbit: | 1935 | |->fff_resbit: |
| 1950 | |->fff_resi: | 1936 | |->fff_resi: |
| 1951 | | mov PC, [BASE-4] | 1937 | | mov PC, [BASE-4] |
| 1952 | | mov dword [BASE-4], LJ_TISNUM | 1938 | | mov dword [BASE-4], LJ_TISNUM |
| 1953 | | mov dword [BASE-8], RB | 1939 | | mov dword [BASE-8], RB |
| 1954 | | jmp ->fff_res1 | 1940 | | jmp ->fff_res1 |
| 1955 | |1: | 1941 | |1: |
| 1956 | | mov PC, [BASE-4] | 1942 | | mov PC, [BASE-4] |
| 1957 | | mov dword [BASE-4], 0x41e00000 // 2^31. | 1943 | | mov dword [BASE-4], 0x41e00000 // 2^31. |
| 1958 | | mov dword [BASE-8], 0 | 1944 | | mov dword [BASE-8], 0 |
| 1959 | | jmp ->fff_res1 | 1945 | | jmp ->fff_res1 |
| 1960 | |2: | 1946 | |2: |
| 1961 | | ja ->fff_fallback | 1947 | | ja ->fff_fallback |
| 1962 | } else { | 1948 | |.else |
| 1963 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 1949 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
| 1964 | } | 1950 | |.endif |
| 1965 | if (sse) { | 1951 | | |
| 1966 | | movsd xmm0, qword [BASE] | 1952 | |.if SSE |
| 1967 | | sseconst_abs xmm1, RDa | 1953 | | movsd xmm0, qword [BASE] |
| 1968 | | andps xmm0, xmm1 | 1954 | | sseconst_abs xmm1, RDa |
| 1969 | |->fff_resxmm0: | 1955 | | andps xmm0, xmm1 |
| 1970 | | mov PC, [BASE-4] | 1956 | |->fff_resxmm0: |
| 1971 | | movsd qword [BASE-8], xmm0 | 1957 | | mov PC, [BASE-4] |
| 1972 | | // fallthrough | 1958 | | movsd qword [BASE-8], xmm0 |
| 1973 | } else { | 1959 | | // fallthrough |
| 1974 | | fld qword [BASE] | 1960 | |.else |
| 1975 | | fabs | 1961 | | fld qword [BASE] |
| 1976 | | // fallthrough | 1962 | | fabs |
| 1977 | |->fff_resxmm0: // Dummy. | 1963 | | // fallthrough |
| 1978 | |->fff_resn: | 1964 | |->fff_resxmm0: // Dummy. |
| 1979 | | mov PC, [BASE-4] | 1965 | |->fff_resn: |
| 1980 | | fstp qword [BASE-8] | 1966 | | mov PC, [BASE-4] |
| 1981 | } | 1967 | | fstp qword [BASE-8] |
| 1968 | |.endif | ||
| 1969 | | | ||
| 1982 | |->fff_res1: | 1970 | |->fff_res1: |
| 1983 | | mov RD, 1+1 | 1971 | | mov RD, 1+1 |
| 1984 | |->fff_res: | 1972 | |->fff_res: |
| @@ -2006,18 +1994,18 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2006 | | | 1994 | | |
| 2007 | |.macro math_round, func | 1995 | |.macro math_round, func |
| 2008 | | .ffunc math_ .. func | 1996 | | .ffunc math_ .. func |
| 2009 | ||if (LJ_DUALNUM) { | 1997 | |.if DUALNUM |
| 2010 | | cmp dword [BASE+4], LJ_TISNUM; jne >1 | 1998 | | cmp dword [BASE+4], LJ_TISNUM; jne >1 |
| 2011 | | mov RB, dword [BASE]; jmp ->fff_resi | 1999 | | mov RB, dword [BASE]; jmp ->fff_resi |
| 2012 | |1: | 2000 | |1: |
| 2013 | | ja ->fff_fallback | 2001 | | ja ->fff_fallback |
| 2014 | ||} else { | 2002 | |.else |
| 2015 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 2003 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
| 2016 | ||} | 2004 | |.endif |
| 2017 | ||if (sse) { | 2005 | |.if SSE |
| 2018 | | movsd xmm0, qword [BASE] | 2006 | | movsd xmm0, qword [BASE] |
| 2019 | | call ->vm_ .. func | 2007 | | call ->vm_ .. func |
| 2020 | || if (LJ_DUALNUM) { | 2008 | | .if DUALNUM |
| 2021 | | cvtsd2si RB, xmm0 | 2009 | | cvtsd2si RB, xmm0 |
| 2022 | | cmp RB, 0x80000000 | 2010 | | cmp RB, 0x80000000 |
| 2023 | | jne ->fff_resi | 2011 | | jne ->fff_resi |
| @@ -2025,13 +2013,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2025 | | ucomisd xmm0, xmm1 | 2013 | | ucomisd xmm0, xmm1 |
| 2026 | | jp ->fff_resxmm0 | 2014 | | jp ->fff_resxmm0 |
| 2027 | | je ->fff_resi | 2015 | | je ->fff_resi |
| 2028 | || } | 2016 | | .endif |
| 2029 | | jmp ->fff_resxmm0 | 2017 | | jmp ->fff_resxmm0 |
| 2030 | ||} else { | 2018 | |.else |
| 2031 | | fld qword [BASE] | 2019 | | fld qword [BASE] |
| 2032 | | call ->vm_ .. func | 2020 | | call ->vm_ .. func |
| 2033 | || if (LJ_DUALNUM) { | 2021 | | .if DUALNUM |
| 2034 | |.if not X64 | ||
| 2035 | | fist ARG1 | 2022 | | fist ARG1 |
| 2036 | | mov RB, ARG1 | 2023 | | mov RB, ARG1 |
| 2037 | | cmp RB, 0x80000000; jne >2 | 2024 | | cmp RB, 0x80000000; jne >2 |
| @@ -2043,21 +2030,20 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2043 | |2: | 2030 | |2: |
| 2044 | | fpop | 2031 | | fpop |
| 2045 | | jmp ->fff_resi | 2032 | | jmp ->fff_resi |
| 2046 | |.endif | 2033 | | .else |
| 2047 | || } else { | ||
| 2048 | | jmp ->fff_resn | 2034 | | jmp ->fff_resn |
| 2049 | || } | 2035 | | .endif |
| 2050 | ||} | 2036 | |.endif |
| 2051 | |.endmacro | 2037 | |.endmacro |
| 2052 | | | 2038 | | |
| 2053 | | math_round floor | 2039 | | math_round floor |
| 2054 | | math_round ceil | 2040 | | math_round ceil |
| 2055 | | | 2041 | | |
| 2056 | if (sse) { | 2042 | |.if SSE |
| 2057 | |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 | 2043 | |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 |
| 2058 | } else { | 2044 | |.else |
| 2059 | |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn | 2045 | |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn |
| 2060 | } | 2046 | |.endif |
| 2061 | |.ffunc_n math_log, fldln2; fyl2x; jmp ->fff_resn | 2047 | |.ffunc_n math_log, fldln2; fyl2x; jmp ->fff_resn |
| 2062 | |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn | 2048 | |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn |
| 2063 | |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn | 2049 | |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn |
| @@ -2075,17 +2061,15 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2075 | |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn | 2061 | |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn |
| 2076 | | | 2062 | | |
| 2077 | |.macro math_extern, func | 2063 | |.macro math_extern, func |
| 2078 | ||if (sse) { | 2064 | |.if SSE |
| 2079 | | .ffunc_nsse math_ .. func | 2065 | | .ffunc_nsse math_ .. func |
| 2080 | | .if not X64 | 2066 | | .if not X64 |
| 2081 | | movsd FPARG1, xmm0 | 2067 | | movsd FPARG1, xmm0 |
| 2082 | | .endif | 2068 | | .endif |
| 2083 | ||} else { | 2069 | |.else |
| 2084 | | .if not X64 | 2070 | | .ffunc_n math_ .. func |
| 2085 | | .ffunc_n math_ .. func | 2071 | | fstp FPARG1 |
| 2086 | | fstp FPARG1 | 2072 | |.endif |
| 2087 | | .endif | ||
| 2088 | ||} | ||
| 2089 | | mov RB, BASE | 2073 | | mov RB, BASE |
| 2090 | | call extern lj_vm_ .. func | 2074 | | call extern lj_vm_ .. func |
| 2091 | | mov BASE, RB | 2075 | | mov BASE, RB |
| @@ -2101,17 +2085,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2101 | | math_extern tanh | 2085 | | math_extern tanh |
| 2102 | | | 2086 | | |
| 2103 | |->ff_math_deg: | 2087 | |->ff_math_deg: |
| 2104 | if (sse) { | 2088 | |.if SSE |
| 2105 | |.ffunc_nsse math_rad | 2089 | |.ffunc_nsse math_rad |
| 2106 | | mov CFUNC:RB, [BASE-8] | 2090 | | mov CFUNC:RB, [BASE-8] |
| 2107 | | mulsd xmm0, qword CFUNC:RB->upvalue[0] | 2091 | | mulsd xmm0, qword CFUNC:RB->upvalue[0] |
| 2108 | | jmp ->fff_resxmm0 | 2092 | | jmp ->fff_resxmm0 |
| 2109 | } else { | 2093 | |.else |
| 2110 | |.ffunc_n math_rad | 2094 | |.ffunc_n math_rad |
| 2111 | | mov CFUNC:RB, [BASE-8] | 2095 | | mov CFUNC:RB, [BASE-8] |
| 2112 | | fmul qword CFUNC:RB->upvalue[0] | 2096 | | fmul qword CFUNC:RB->upvalue[0] |
| 2113 | | jmp ->fff_resn | 2097 | | jmp ->fff_resn |
| 2114 | } | 2098 | |.endif |
| 2115 | | | 2099 | | |
| 2116 | |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn | 2100 | |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn |
| 2117 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn | 2101 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn |
| @@ -2128,65 +2112,65 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2128 | | cmp RB, 0x00200000; jb >4 | 2112 | | cmp RB, 0x00200000; jb >4 |
| 2129 | |1: | 2113 | |1: |
| 2130 | | shr RB, 21; sub RB, RC // Extract and unbias exponent. | 2114 | | shr RB, 21; sub RB, RC // Extract and unbias exponent. |
| 2131 | if (sse) { | 2115 | |.if SSE |
| 2132 | | cvtsi2sd xmm0, RB | 2116 | | cvtsi2sd xmm0, RB |
| 2133 | } else { | 2117 | |.else |
| 2134 | | mov TMP1, RB; fild TMP1 | 2118 | | mov TMP1, RB; fild TMP1 |
| 2135 | } | 2119 | |.endif |
| 2136 | | mov RB, [BASE-4] | 2120 | | mov RB, [BASE-4] |
| 2137 | | and RB, 0x800fffff // Mask off exponent. | 2121 | | and RB, 0x800fffff // Mask off exponent. |
| 2138 | | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. | 2122 | | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. |
| 2139 | | mov [BASE-4], RB | 2123 | | mov [BASE-4], RB |
| 2140 | |2: | 2124 | |2: |
| 2141 | if (sse) { | 2125 | |.if SSE |
| 2142 | | movsd qword [BASE], xmm0 | 2126 | | movsd qword [BASE], xmm0 |
| 2143 | } else { | 2127 | |.else |
| 2144 | | fstp qword [BASE] | 2128 | | fstp qword [BASE] |
| 2145 | } | 2129 | |.endif |
| 2146 | | mov RD, 1+2 | 2130 | | mov RD, 1+2 |
| 2147 | | jmp ->fff_res | 2131 | | jmp ->fff_res |
| 2148 | |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. | 2132 | |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. |
| 2149 | if (sse) { | 2133 | |.if SSE |
| 2150 | | xorps xmm0, xmm0; jmp <2 | 2134 | | xorps xmm0, xmm0; jmp <2 |
| 2151 | } else { | 2135 | |.else |
| 2152 | | fldz; jmp <2 | 2136 | | fldz; jmp <2 |
| 2153 | } | 2137 | |.endif |
| 2154 | |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. | 2138 | |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. |
| 2155 | if (sse) { | 2139 | |.if SSE |
| 2156 | | movsd xmm0, qword [BASE] | 2140 | | movsd xmm0, qword [BASE] |
| 2157 | | sseconst_hi xmm1, RBa, 43500000 // 2^54. | 2141 | | sseconst_hi xmm1, RBa, 43500000 // 2^54. |
| 2158 | | mulsd xmm0, xmm1 | 2142 | | mulsd xmm0, xmm1 |
| 2159 | | movsd qword [BASE-8], xmm0 | 2143 | | movsd qword [BASE-8], xmm0 |
| 2160 | } else { | 2144 | |.else |
| 2161 | | fld qword [BASE] | 2145 | | fld qword [BASE] |
| 2162 | | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54 | 2146 | | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54 |
| 2163 | | fstp qword [BASE-8] | 2147 | | fstp qword [BASE-8] |
| 2164 | } | 2148 | |.endif |
| 2165 | | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 | 2149 | | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 |
| 2166 | | | 2150 | | |
| 2167 | if (sse) { | 2151 | |.if SSE |
| 2168 | |.ffunc_nsse math_modf | 2152 | |.ffunc_nsse math_modf |
| 2169 | } else { | 2153 | |.else |
| 2170 | |.ffunc_n math_modf | 2154 | |.ffunc_n math_modf |
| 2171 | } | 2155 | |.endif |
| 2172 | | mov RB, [BASE+4] | 2156 | | mov RB, [BASE+4] |
| 2173 | | mov PC, [BASE-4] | 2157 | | mov PC, [BASE-4] |
| 2174 | | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? | 2158 | | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? |
| 2175 | if (sse) { | 2159 | |.if SSE |
| 2176 | | movaps xmm4, xmm0 | 2160 | | movaps xmm4, xmm0 |
| 2177 | | call ->vm_trunc | 2161 | | call ->vm_trunc |
| 2178 | | subsd xmm4, xmm0 | 2162 | | subsd xmm4, xmm0 |
| 2179 | |1: | 2163 | |1: |
| 2180 | | movsd qword [BASE-8], xmm0 | 2164 | | movsd qword [BASE-8], xmm0 |
| 2181 | | movsd qword [BASE], xmm4 | 2165 | | movsd qword [BASE], xmm4 |
| 2182 | } else { | 2166 | |.else |
| 2183 | | fdup | 2167 | | fdup |
| 2184 | | call ->vm_trunc | 2168 | | call ->vm_trunc |
| 2185 | | fsub st1, st0 | 2169 | | fsub st1, st0 |
| 2186 | |1: | 2170 | |1: |
| 2187 | | fstp qword [BASE-8] | 2171 | | fstp qword [BASE-8] |
| 2188 | | fstp qword [BASE] | 2172 | | fstp qword [BASE] |
| 2189 | } | 2173 | |.endif |
| 2190 | | mov RC, [BASE-4]; mov RB, [BASE+4] | 2174 | | mov RC, [BASE-4]; mov RB, [BASE+4] |
| 2191 | | xor RC, RB; js >3 // Need to adjust sign? | 2175 | | xor RC, RB; js >3 // Need to adjust sign? |
| 2192 | |2: | 2176 | |2: |
| @@ -2196,28 +2180,28 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2196 | | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. | 2180 | | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. |
| 2197 | | jmp <2 | 2181 | | jmp <2 |
| 2198 | |4: | 2182 | |4: |
| 2199 | if (sse) { | 2183 | |.if SSE |
| 2200 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. | 2184 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. |
| 2201 | } else { | 2185 | |.else |
| 2202 | | fldz; fxch; jmp <1 // Return +-Inf and +-0. | 2186 | | fldz; fxch; jmp <1 // Return +-Inf and +-0. |
| 2203 | } | 2187 | |.endif |
| 2204 | | | 2188 | | |
| 2205 | |.ffunc_nnr math_fmod | 2189 | |.ffunc_nnr math_fmod |
| 2206 | |1: ; fprem; fnstsw ax; sahf; jp <1 | 2190 | |1: ; fprem; fnstsw ax; sahf; jp <1 |
| 2207 | | fpop1 | 2191 | | fpop1 |
| 2208 | | jmp ->fff_resn | 2192 | | jmp ->fff_resn |
| 2209 | | | 2193 | | |
| 2210 | if (sse) { | 2194 | |.if SSE |
| 2211 | |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0 | 2195 | |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0 |
| 2212 | } else { | 2196 | |.else |
| 2213 | |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn | 2197 | |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn |
| 2214 | } | 2198 | |.endif |
| 2215 | | | 2199 | | |
| 2216 | |.macro math_minmax, name, cmovop, fcmovop, nofcmovop, sseop | 2200 | |.macro math_minmax, name, cmovop, fcmovop, sseop |
| 2217 | | .ffunc name | 2201 | | .ffunc name |
| 2218 | | mov RA, 2 | 2202 | | mov RA, 2 |
| 2219 | | cmp dword [BASE+4], LJ_TISNUM | 2203 | | cmp dword [BASE+4], LJ_TISNUM |
| 2220 | ||if (LJ_DUALNUM) { | 2204 | |.if DUALNUM |
| 2221 | | jne >4 | 2205 | | jne >4 |
| 2222 | | mov RB, dword [BASE] | 2206 | | mov RB, dword [BASE] |
| 2223 | |1: // Handle integers. | 2207 | |1: // Handle integers. |
| @@ -2230,89 +2214,79 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2230 | |3: | 2214 | |3: |
| 2231 | | ja ->fff_fallback | 2215 | | ja ->fff_fallback |
| 2232 | | // Convert intermediate result to number and continue below. | 2216 | | // Convert intermediate result to number and continue below. |
| 2233 | ||if (sse) { | 2217 | |.if SSE |
| 2234 | | cvtsi2sd xmm0, RB | 2218 | | cvtsi2sd xmm0, RB |
| 2235 | ||} else { | 2219 | |.else |
| 2236 | |.if not X64 | 2220 | | mov TMP1, RB |
| 2237 | | mov TMP1, RB | 2221 | | fild TMP1 |
| 2238 | | fild TMP1 | ||
| 2239 | |.endif | 2222 | |.endif |
| 2240 | ||} | ||
| 2241 | | jmp >6 | 2223 | | jmp >6 |
| 2242 | |4: | 2224 | |4: |
| 2243 | | ja ->fff_fallback | 2225 | | ja ->fff_fallback |
| 2244 | ||} else { | 2226 | |.else |
| 2245 | | jae ->fff_fallback | 2227 | | jae ->fff_fallback |
| 2246 | ||} | 2228 | |.endif |
| 2247 | | | 2229 | | |
| 2248 | ||if (sse) { | 2230 | |.if SSE |
| 2249 | | movsd xmm0, qword [BASE] | 2231 | | movsd xmm0, qword [BASE] |
| 2250 | |5: // Handle numbers or integers. | 2232 | |5: // Handle numbers or integers. |
| 2251 | | cmp RA, RD; jae ->fff_resxmm0 | 2233 | | cmp RA, RD; jae ->fff_resxmm0 |
| 2252 | | cmp dword [BASE+RA*8-4], LJ_TISNUM | 2234 | | cmp dword [BASE+RA*8-4], LJ_TISNUM |
| 2253 | ||if (LJ_DUALNUM) { | 2235 | |.if DUALNUM |
| 2254 | | jb >6 | 2236 | | jb >6 |
| 2255 | | ja ->fff_fallback | 2237 | | ja ->fff_fallback |
| 2256 | | cvtsi2sd xmm1, dword [BASE+RA*8-8] | 2238 | | cvtsi2sd xmm1, dword [BASE+RA*8-8] |
| 2257 | | jmp >7 | 2239 | | jmp >7 |
| 2258 | ||} else { | 2240 | |.else |
| 2259 | | jae ->fff_fallback | 2241 | | jae ->fff_fallback |
| 2260 | ||} | 2242 | |.endif |
| 2261 | |6: | 2243 | |6: |
| 2262 | | movsd xmm1, qword [BASE+RA*8-8] | 2244 | | movsd xmm1, qword [BASE+RA*8-8] |
| 2263 | |7: | 2245 | |7: |
| 2264 | | sseop xmm0, xmm1 | 2246 | | sseop xmm0, xmm1 |
| 2265 | | add RA, 1 | 2247 | | add RA, 1 |
| 2266 | | jmp <5 | 2248 | | jmp <5 |
| 2267 | ||} else { | 2249 | |.else |
| 2268 | |.if not X64 | ||
| 2269 | | fld qword [BASE] | 2250 | | fld qword [BASE] |
| 2270 | |5: // Handle numbers or integers. | 2251 | |5: // Handle numbers or integers. |
| 2271 | | cmp RA, RD; jae ->fff_resn | 2252 | | cmp RA, RD; jae ->fff_resn |
| 2272 | | cmp dword [BASE+RA*8-4], LJ_TISNUM | 2253 | | cmp dword [BASE+RA*8-4], LJ_TISNUM |
| 2273 | ||if (LJ_DUALNUM) { | 2254 | |.if DUALNUM |
| 2274 | | jb >6 | 2255 | | jb >6 |
| 2275 | | ja >9 | 2256 | | ja >9 |
| 2276 | | fild dword [BASE+RA*8-8] | 2257 | | fild dword [BASE+RA*8-8] |
| 2277 | | jmp >7 | 2258 | | jmp >7 |
| 2278 | ||} else { | 2259 | |.else |
| 2279 | | jae >9 | 2260 | | jae >9 |
| 2280 | ||} | 2261 | |.endif |
| 2281 | |6: | 2262 | |6: |
| 2282 | | fld qword [BASE+RA*8-8] | 2263 | | fld qword [BASE+RA*8-8] |
| 2283 | |7: | 2264 | |7: |
| 2284 | ||if (cmov) { | ||
| 2285 | | fucomi st1; fcmovop st1; fpop1 | 2265 | | fucomi st1; fcmovop st1; fpop1 |
| 2286 | ||} else { | ||
| 2287 | | push eax | ||
| 2288 | | fucom st1; fnstsw ax; test ah, 1; nofcmovop >2; fxch; 2: ; fpop | ||
| 2289 | | pop eax | ||
| 2290 | ||} | ||
| 2291 | | add RA, 1 | 2266 | | add RA, 1 |
| 2292 | | jmp <5 | 2267 | | jmp <5 |
| 2293 | |.endif | 2268 | |.endif |
| 2294 | ||} | ||
| 2295 | |.endmacro | 2269 | |.endmacro |
| 2296 | | | 2270 | | |
| 2297 | | math_minmax math_min, cmovg, fcmovnbe, jz, minsd | 2271 | | math_minmax math_min, cmovg, fcmovnbe, minsd |
| 2298 | | math_minmax math_max, cmovl, fcmovbe, jnz, maxsd | 2272 | | math_minmax math_max, cmovl, fcmovbe, maxsd |
| 2299 | if (!sse) { | 2273 | |.if not SSE |
| 2300 | |9: | 2274 | |9: |
| 2301 | | fpop; jmp ->fff_fallback | 2275 | | fpop; jmp ->fff_fallback |
| 2302 | } | 2276 | |.endif |
| 2303 | | | 2277 | | |
| 2304 | |//-- String library ----------------------------------------------------- | 2278 | |//-- String library ----------------------------------------------------- |
| 2305 | | | 2279 | | |
| 2306 | |.ffunc_1 string_len | 2280 | |.ffunc_1 string_len |
| 2307 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2281 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
| 2308 | | mov STR:RB, [BASE] | 2282 | | mov STR:RB, [BASE] |
| 2309 | if (LJ_DUALNUM) { | 2283 | |.if DUALNUM |
| 2310 | | mov RB, dword STR:RB->len; jmp ->fff_resi | 2284 | | mov RB, dword STR:RB->len; jmp ->fff_resi |
| 2311 | } else if (sse) { | 2285 | |.elif SSE |
| 2312 | | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0 | 2286 | | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0 |
| 2313 | } else { | 2287 | |.else |
| 2314 | | fild dword STR:RB->len; jmp ->fff_resn | 2288 | | fild dword STR:RB->len; jmp ->fff_resn |
| 2315 | } | 2289 | |.endif |
| 2316 | | | 2290 | | |
| 2317 | |.ffunc string_byte // Only handle the 1-arg case here. | 2291 | |.ffunc string_byte // Only handle the 1-arg case here. |
| 2318 | | cmp NARGS:RD, 1+1; jne ->fff_fallback | 2292 | | cmp NARGS:RD, 1+1; jne ->fff_fallback |
| @@ -2322,34 +2296,34 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2322 | | cmp dword STR:RB->len, 1 | 2296 | | cmp dword STR:RB->len, 1 |
| 2323 | | jb ->fff_res0 // Return no results for empty string. | 2297 | | jb ->fff_res0 // Return no results for empty string. |
| 2324 | | movzx RB, byte STR:RB[1] | 2298 | | movzx RB, byte STR:RB[1] |
| 2325 | if (LJ_DUALNUM) { | 2299 | |.if DUALNUM |
| 2326 | | jmp ->fff_resi | 2300 | | jmp ->fff_resi |
| 2327 | } else if (sse) { | 2301 | |.elif SSE |
| 2328 | | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 | 2302 | | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 |
| 2329 | } else { | 2303 | |.else |
| 2330 | | mov TMP1, RB; fild TMP1; jmp ->fff_resn | 2304 | | mov TMP1, RB; fild TMP1; jmp ->fff_resn |
| 2331 | } | 2305 | |.endif |
| 2332 | | | 2306 | | |
| 2333 | |.ffunc string_char // Only handle the 1-arg case here. | 2307 | |.ffunc string_char // Only handle the 1-arg case here. |
| 2334 | | ffgccheck | 2308 | | ffgccheck |
| 2335 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg. | 2309 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg. |
| 2336 | | cmp dword [BASE+4], LJ_TISNUM | 2310 | | cmp dword [BASE+4], LJ_TISNUM |
| 2337 | if (LJ_DUALNUM) { | 2311 | |.if DUALNUM |
| 2338 | | jne ->fff_fallback | 2312 | | jne ->fff_fallback |
| 2339 | | mov RB, dword [BASE] | 2313 | | mov RB, dword [BASE] |
| 2340 | | cmp RB, 255; ja ->fff_fallback | 2314 | | cmp RB, 255; ja ->fff_fallback |
| 2341 | | mov TMP2, RB | 2315 | | mov TMP2, RB |
| 2342 | } else if (sse) { | 2316 | |.elif SSE |
| 2343 | | jae ->fff_fallback | 2317 | | jae ->fff_fallback |
| 2344 | | cvttsd2si RB, qword [BASE] | 2318 | | cvttsd2si RB, qword [BASE] |
| 2345 | | cmp RB, 255; ja ->fff_fallback | 2319 | | cmp RB, 255; ja ->fff_fallback |
| 2346 | | mov TMP2, RB | 2320 | | mov TMP2, RB |
| 2347 | } else { | 2321 | |.else |
| 2348 | | jae ->fff_fallback | 2322 | | jae ->fff_fallback |
| 2349 | | fld qword [BASE] | 2323 | | fld qword [BASE] |
| 2350 | | fistp TMP2 | 2324 | | fistp TMP2 |
| 2351 | | cmp TMP2, 255; ja ->fff_fallback | 2325 | | cmp TMP2, 255; ja ->fff_fallback |
| 2352 | } | 2326 | |.endif |
| 2353 | |.if X64 | 2327 | |.if X64 |
| 2354 | | mov TMP3, 1 | 2328 | | mov TMP3, 1 |
| 2355 | |.else | 2329 | |.else |
| @@ -2382,41 +2356,39 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2382 | | cmp NARGS:RD, 1+2; jb ->fff_fallback | 2356 | | cmp NARGS:RD, 1+2; jb ->fff_fallback |
| 2383 | | jna >1 | 2357 | | jna >1 |
| 2384 | | cmp dword [BASE+20], LJ_TISNUM | 2358 | | cmp dword [BASE+20], LJ_TISNUM |
| 2385 | if (LJ_DUALNUM) { | 2359 | |.if DUALNUM |
| 2386 | | jne ->fff_fallback | 2360 | | jne ->fff_fallback |
| 2387 | | mov RB, dword [BASE+16] | 2361 | | mov RB, dword [BASE+16] |
| 2388 | | mov TMP2, RB | 2362 | | mov TMP2, RB |
| 2389 | } else if (sse) { | 2363 | |.elif SSE |
| 2390 | | jae ->fff_fallback | 2364 | | jae ->fff_fallback |
| 2391 | | cvttsd2si RB, qword [BASE+16] | 2365 | | cvttsd2si RB, qword [BASE+16] |
| 2392 | | mov TMP2, RB | 2366 | | mov TMP2, RB |
| 2393 | } else { | 2367 | |.else |
| 2394 | | jae ->fff_fallback | 2368 | | jae ->fff_fallback |
| 2395 | | fld qword [BASE+16] | 2369 | | fld qword [BASE+16] |
| 2396 | | fistp TMP2 | 2370 | | fistp TMP2 |
| 2397 | } | 2371 | |.endif |
| 2398 | |1: | 2372 | |1: |
| 2399 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2373 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
| 2400 | | cmp dword [BASE+12], LJ_TISNUM | 2374 | | cmp dword [BASE+12], LJ_TISNUM |
| 2401 | if (LJ_DUALNUM) { | 2375 | |.if DUALNUM |
| 2402 | | jne ->fff_fallback | 2376 | | jne ->fff_fallback |
| 2403 | } else { | 2377 | |.else |
| 2404 | | jae ->fff_fallback | 2378 | | jae ->fff_fallback |
| 2405 | } | 2379 | |.endif |
| 2406 | | mov STR:RB, [BASE] | 2380 | | mov STR:RB, [BASE] |
| 2407 | | mov TMP3, STR:RB | 2381 | | mov TMP3, STR:RB |
| 2408 | | mov RB, STR:RB->len | 2382 | | mov RB, STR:RB->len |
| 2409 | if (LJ_DUALNUM) { | 2383 | |.if DUALNUM |
| 2410 | | mov RA, dword [BASE+8] | 2384 | | mov RA, dword [BASE+8] |
| 2411 | } else if (sse) { | 2385 | |.elif SSE |
| 2412 | | cvttsd2si RA, qword [BASE+8] | 2386 | | cvttsd2si RA, qword [BASE+8] |
| 2413 | } else { | 2387 | |.else |
| 2414 | |.if not X64 | 2388 | | fld qword [BASE+8] |
| 2415 | | fld qword [BASE+8] | 2389 | | fistp ARG3 |
| 2416 | | fistp ARG3 | 2390 | | mov RA, ARG3 |
| 2417 | | mov RA, ARG3 | 2391 | |.endif |
| 2418 | |.endif | ||
| 2419 | } | ||
| 2420 | | mov RC, TMP2 | 2392 | | mov RC, TMP2 |
| 2421 | | cmp RB, RC // len < end? (unsigned compare) | 2393 | | cmp RB, RC // len < end? (unsigned compare) |
| 2422 | | jb >5 | 2394 | | jb >5 |
| @@ -2464,18 +2436,18 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2464 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2436 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
| 2465 | | cmp dword [BASE+12], LJ_TISNUM | 2437 | | cmp dword [BASE+12], LJ_TISNUM |
| 2466 | | mov STR:RB, [BASE] | 2438 | | mov STR:RB, [BASE] |
| 2467 | if (LJ_DUALNUM) { | 2439 | |.if DUALNUM |
| 2468 | | jne ->fff_fallback | 2440 | | jne ->fff_fallback |
| 2469 | | mov RC, dword [BASE+8] | 2441 | | mov RC, dword [BASE+8] |
| 2470 | } else if (sse) { | 2442 | |.elif SSE |
| 2471 | | jae ->fff_fallback | 2443 | | jae ->fff_fallback |
| 2472 | | cvttsd2si RC, qword [BASE+8] | 2444 | | cvttsd2si RC, qword [BASE+8] |
| 2473 | } else { | 2445 | |.else |
| 2474 | | jae ->fff_fallback | 2446 | | jae ->fff_fallback |
| 2475 | | fld qword [BASE+8] | 2447 | | fld qword [BASE+8] |
| 2476 | | fistp TMP2 | 2448 | | fistp TMP2 |
| 2477 | | mov RC, TMP2 | 2449 | | mov RC, TMP2 |
| 2478 | } | 2450 | |.endif |
| 2479 | | test RC, RC | 2451 | | test RC, RC |
| 2480 | | jle ->fff_emptystr // Count <= 0? (or non-int) | 2452 | | jle ->fff_emptystr // Count <= 0? (or non-int) |
| 2481 | | cmp dword STR:RB->len, 1 | 2453 | | cmp dword STR:RB->len, 1 |
| @@ -2568,15 +2540,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2568 | | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t) | 2540 | | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t) |
| 2569 | | // Length of table returned in eax (RD). | 2541 | | // Length of table returned in eax (RD). |
| 2570 | | mov BASE, RB // Restore BASE. | 2542 | | mov BASE, RB // Restore BASE. |
| 2571 | if (LJ_DUALNUM) { | 2543 | |.if DUALNUM |
| 2572 | | mov RB, RD; jmp ->fff_resi | 2544 | | mov RB, RD; jmp ->fff_resi |
| 2573 | } else if (sse) { | 2545 | |.elif SSE |
| 2574 | | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0 | 2546 | | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0 |
| 2575 | } else { | 2547 | |.else |
| 2576 | |.if not X64 | 2548 | | mov ARG1, RD; fild ARG1; jmp ->fff_resn |
| 2577 | | mov ARG1, RD; fild ARG1; jmp ->fff_resn | 2549 | |.endif |
| 2578 | |.endif | ||
| 2579 | } | ||
| 2580 | | | 2550 | | |
| 2581 | |//-- Bit library -------------------------------------------------------- | 2551 | |//-- Bit library -------------------------------------------------------- |
| 2582 | | | 2552 | | |
| @@ -2585,14 +2555,14 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2585 | |.macro .ffunc_bit, name, kind | 2555 | |.macro .ffunc_bit, name, kind |
| 2586 | | .ffunc_1 name | 2556 | | .ffunc_1 name |
| 2587 | |.if kind == 2 | 2557 | |.if kind == 2 |
| 2588 | ||if (sse) { | 2558 | |.if SSE |
| 2589 | | sseconst_tobit xmm1, RBa | 2559 | | sseconst_tobit xmm1, RBa |
| 2590 | ||} else { | 2560 | |.else |
| 2591 | | mov TMP1, TOBIT_BIAS | 2561 | | mov TMP1, TOBIT_BIAS |
| 2592 | ||} | 2562 | |.endif |
| 2593 | |.endif | 2563 | |.endif |
| 2594 | | cmp dword [BASE+4], LJ_TISNUM | 2564 | | cmp dword [BASE+4], LJ_TISNUM |
| 2595 | ||if (LJ_DUALNUM) { | 2565 | |.if DUALNUM |
| 2596 | | jne >1 | 2566 | | jne >1 |
| 2597 | | mov RB, dword [BASE] | 2567 | | mov RB, dword [BASE] |
| 2598 | |.if kind > 0 | 2568 | |.if kind > 0 |
| @@ -2602,18 +2572,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2602 | |.endif | 2572 | |.endif |
| 2603 | |1: | 2573 | |1: |
| 2604 | | ja ->fff_fallback | 2574 | | ja ->fff_fallback |
| 2605 | ||} else { | 2575 | |.else |
| 2606 | | jae ->fff_fallback | 2576 | | jae ->fff_fallback |
| 2607 | ||} | 2577 | |.endif |
| 2608 | ||if (sse) { | 2578 | |.if SSE |
| 2609 | | movsd xmm0, qword [BASE] | 2579 | | movsd xmm0, qword [BASE] |
| 2610 | |.if kind < 2 | 2580 | |.if kind < 2 |
| 2611 | | sseconst_tobit xmm1, RBa | 2581 | | sseconst_tobit xmm1, RBa |
| 2612 | |.endif | 2582 | |.endif |
| 2613 | | addsd xmm0, xmm1 | 2583 | | addsd xmm0, xmm1 |
| 2614 | | movd RB, xmm0 | 2584 | | movd RB, xmm0 |
| 2615 | ||} else { | 2585 | |.else |
| 2616 | |.if not X64 | ||
| 2617 | | fld qword [BASE] | 2586 | | fld qword [BASE] |
| 2618 | |.if kind < 2 | 2587 | |.if kind < 2 |
| 2619 | | mov TMP1, TOBIT_BIAS | 2588 | | mov TMP1, TOBIT_BIAS |
| @@ -2624,24 +2593,19 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2624 | | mov RB, ARG1 | 2593 | | mov RB, ARG1 |
| 2625 | |.endif | 2594 | |.endif |
| 2626 | |.endif | 2595 | |.endif |
| 2627 | ||} | ||
| 2628 | |2: | 2596 | |2: |
| 2629 | |.endmacro | 2597 | |.endmacro |
| 2630 | | | 2598 | | |
| 2631 | |.ffunc_bit bit_tobit, 0 | 2599 | |.ffunc_bit bit_tobit, 0 |
| 2632 | if (LJ_DUALNUM || sse) { | 2600 | |.if DUALNUM or SSE |
| 2633 | if (!sse) { | 2601 | |.if not SSE |
| 2634 | |.if not X64 | 2602 | | mov RB, ARG1 |
| 2635 | | mov RB, ARG1 | 2603 | |.endif |
| 2636 | |.endif | 2604 | | jmp ->fff_resbit |
| 2637 | } | 2605 | |.else |
| 2638 | | jmp ->fff_resbit | 2606 | | fild ARG1 |
| 2639 | } else { | 2607 | | jmp ->fff_resn |
| 2640 | |.if not X64 | 2608 | |.endif |
| 2641 | | fild ARG1 | ||
| 2642 | | jmp ->fff_resn | ||
| 2643 | |.endif | ||
| 2644 | } | ||
| 2645 | | | 2609 | | |
| 2646 | |.macro .ffunc_bit_op, name, ins | 2610 | |.macro .ffunc_bit_op, name, ins |
| 2647 | | .ffunc_bit name, 2 | 2611 | | .ffunc_bit name, 2 |
| @@ -2651,29 +2615,27 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2651 | | cmp RD, BASE | 2615 | | cmp RD, BASE |
| 2652 | | jbe ->fff_resbit | 2616 | | jbe ->fff_resbit |
| 2653 | | cmp dword [RD+4], LJ_TISNUM | 2617 | | cmp dword [RD+4], LJ_TISNUM |
| 2654 | ||if (LJ_DUALNUM) { | 2618 | |.if DUALNUM |
| 2655 | | jne >2 | 2619 | | jne >2 |
| 2656 | | ins RB, dword [RD] | 2620 | | ins RB, dword [RD] |
| 2657 | | sub RD, 8 | 2621 | | sub RD, 8 |
| 2658 | | jmp <1 | 2622 | | jmp <1 |
| 2659 | |2: | 2623 | |2: |
| 2660 | | ja ->fff_fallback_bit_op | 2624 | | ja ->fff_fallback_bit_op |
| 2661 | ||} else { | 2625 | |.else |
| 2662 | | jae ->fff_fallback_bit_op | 2626 | | jae ->fff_fallback_bit_op |
| 2663 | ||} | 2627 | |.endif |
| 2664 | ||if (sse) { | 2628 | |.if SSE |
| 2665 | | movsd xmm0, qword [RD] | 2629 | | movsd xmm0, qword [RD] |
| 2666 | | addsd xmm0, xmm1 | 2630 | | addsd xmm0, xmm1 |
| 2667 | | movd RA, xmm0 | 2631 | | movd RA, xmm0 |
| 2668 | | ins RB, RA | 2632 | | ins RB, RA |
| 2669 | ||} else { | 2633 | |.else |
| 2670 | |.if not X64 | ||
| 2671 | | fld qword [RD] | 2634 | | fld qword [RD] |
| 2672 | | fadd TMP1 | 2635 | | fadd TMP1 |
| 2673 | | fstp FPARG1 | 2636 | | fstp FPARG1 |
| 2674 | | ins RB, ARG1 | 2637 | | ins RB, ARG1 |
| 2675 | |.endif | 2638 | |.endif |
| 2676 | ||} | ||
| 2677 | | sub RD, 8 | 2639 | | sub RD, 8 |
| 2678 | | jmp <1 | 2640 | | jmp <1 |
| 2679 | |.endmacro | 2641 | |.endmacro |
| @@ -2688,40 +2650,37 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2688 | | | 2650 | | |
| 2689 | |.ffunc_bit bit_bnot, 1 | 2651 | |.ffunc_bit bit_bnot, 1 |
| 2690 | | not RB | 2652 | | not RB |
| 2691 | if (LJ_DUALNUM) { | 2653 | |.if DUALNUM |
| 2692 | | jmp ->fff_resbit | 2654 | | jmp ->fff_resbit |
| 2693 | } else if (sse) { | 2655 | |.elif SSE |
| 2694 | |->fff_resbit: | 2656 | |->fff_resbit: |
| 2695 | | cvtsi2sd xmm0, RB | 2657 | | cvtsi2sd xmm0, RB |
| 2696 | | jmp ->fff_resxmm0 | 2658 | | jmp ->fff_resxmm0 |
| 2697 | } else { | 2659 | |.else |
| 2698 | |.if not X64 | 2660 | |->fff_resbit: |
| 2699 | |->fff_resbit: | 2661 | | mov ARG1, RB |
| 2700 | | mov ARG1, RB | 2662 | | fild ARG1 |
| 2701 | | fild ARG1 | 2663 | | jmp ->fff_resn |
| 2702 | | jmp ->fff_resn | 2664 | |.endif |
| 2703 | |.endif | ||
| 2704 | } | ||
| 2705 | | | 2665 | | |
| 2706 | |->fff_fallback_bit_op: | 2666 | |->fff_fallback_bit_op: |
| 2707 | | mov NARGS:RD, TMP2 // Restore for fallback | 2667 | | mov NARGS:RD, TMP2 // Restore for fallback |
| 2708 | | jmp ->fff_fallback | 2668 | | jmp ->fff_fallback |
| 2709 | | | 2669 | | |
| 2710 | |.macro .ffunc_bit_sh, name, ins | 2670 | |.macro .ffunc_bit_sh, name, ins |
| 2711 | ||if (LJ_DUALNUM) { | 2671 | |.if DUALNUM |
| 2712 | | .ffunc_bit name, 1 | 2672 | | .ffunc_bit name, 1 |
| 2713 | | // Note: no inline conversion from number for 2nd argument! | 2673 | | // Note: no inline conversion from number for 2nd argument! |
| 2714 | | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback | 2674 | | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback |
| 2715 | | mov RA, dword [BASE+8] | 2675 | | mov RA, dword [BASE+8] |
| 2716 | ||} else if (sse) { | 2676 | |.elif SSE |
| 2717 | | .ffunc_nnsse name | 2677 | | .ffunc_nnsse name |
| 2718 | | sseconst_tobit xmm2, RBa | 2678 | | sseconst_tobit xmm2, RBa |
| 2719 | | addsd xmm0, xmm2 | 2679 | | addsd xmm0, xmm2 |
| 2720 | | addsd xmm1, xmm2 | 2680 | | addsd xmm1, xmm2 |
| 2721 | | movd RB, xmm0 | 2681 | | movd RB, xmm0 |
| 2722 | | movd RA, xmm1 | 2682 | | movd RA, xmm1 |
| 2723 | ||} else { | 2683 | |.else |
| 2724 | |.if not X64 | ||
| 2725 | | .ffunc_nn name | 2684 | | .ffunc_nn name |
| 2726 | | mov TMP1, TOBIT_BIAS | 2685 | | mov TMP1, TOBIT_BIAS |
| 2727 | | fadd TMP1 | 2686 | | fadd TMP1 |
| @@ -2731,7 +2690,6 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2731 | | mov RA, ARG3 | 2690 | | mov RA, ARG3 |
| 2732 | | mov RB, ARG1 | 2691 | | mov RB, ARG1 |
| 2733 | |.endif | 2692 | |.endif |
| 2734 | ||} | ||
| 2735 | | ins RB, cl // Assumes RA is ecx. | 2693 | | ins RB, cl // Assumes RA is ecx. |
| 2736 | | jmp ->fff_resbit | 2694 | | jmp ->fff_resbit |
| 2737 | |.endmacro | 2695 | |.endmacro |
| @@ -2828,7 +2786,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2828 | |//----------------------------------------------------------------------- | 2786 | |//----------------------------------------------------------------------- |
| 2829 | | | 2787 | | |
| 2830 | |->vm_record: // Dispatch target for recording phase. | 2788 | |->vm_record: // Dispatch target for recording phase. |
| 2831 | #if LJ_HASJIT | 2789 | |.if JIT |
| 2832 | | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] | 2790 | | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] |
| 2833 | | test RDL, HOOK_VMEVENT // No recording while in vmevent. | 2791 | | test RDL, HOOK_VMEVENT // No recording while in vmevent. |
| 2834 | | jnz >5 | 2792 | | jnz >5 |
| @@ -2839,7 +2797,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2839 | | jz >1 | 2797 | | jz >1 |
| 2840 | | dec dword [DISPATCH+DISPATCH_GL(hookcount)] | 2798 | | dec dword [DISPATCH+DISPATCH_GL(hookcount)] |
| 2841 | | jmp >1 | 2799 | | jmp >1 |
| 2842 | #endif | 2800 | |.endif |
| 2843 | | | 2801 | | |
| 2844 | |->vm_rethook: // Dispatch target for return hooks. | 2802 | |->vm_rethook: // Dispatch target for return hooks. |
| 2845 | | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] | 2803 | | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] |
| @@ -2885,7 +2843,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2885 | | jmp <4 | 2843 | | jmp <4 |
| 2886 | | | 2844 | | |
| 2887 | |->vm_hotloop: // Hot loop counter underflow. | 2845 | |->vm_hotloop: // Hot loop counter underflow. |
| 2888 | #if LJ_HASJIT | 2846 | |.if JIT |
| 2889 | | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L). | 2847 | | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L). |
| 2890 | | mov RB, LFUNC:RB->pc | 2848 | | mov RB, LFUNC:RB->pc |
| 2891 | | movzx RD, byte [RB+PC2PROTO(framesize)] | 2849 | | movzx RD, byte [RB+PC2PROTO(framesize)] |
| @@ -2899,20 +2857,20 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2899 | | mov SAVE_PC, PC | 2857 | | mov SAVE_PC, PC |
| 2900 | | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc) | 2858 | | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc) |
| 2901 | | jmp <3 | 2859 | | jmp <3 |
| 2902 | #endif | 2860 | |.endif |
| 2903 | | | 2861 | | |
| 2904 | |->vm_callhook: // Dispatch target for call hooks. | 2862 | |->vm_callhook: // Dispatch target for call hooks. |
| 2905 | | mov SAVE_PC, PC | 2863 | | mov SAVE_PC, PC |
| 2906 | #if LJ_HASJIT | 2864 | |.if JIT |
| 2907 | | jmp >1 | 2865 | | jmp >1 |
| 2908 | #endif | 2866 | |.endif |
| 2909 | | | 2867 | | |
| 2910 | |->vm_hotcall: // Hot call counter underflow. | 2868 | |->vm_hotcall: // Hot call counter underflow. |
| 2911 | #if LJ_HASJIT | 2869 | |.if JIT |
| 2912 | | mov SAVE_PC, PC | 2870 | | mov SAVE_PC, PC |
| 2913 | | or PC, 1 // Marker for hot call. | 2871 | | or PC, 1 // Marker for hot call. |
| 2914 | |1: | 2872 | |1: |
| 2915 | #endif | 2873 | |.endif |
| 2916 | | lea RD, [BASE+NARGS:RD*8-8] | 2874 | | lea RD, [BASE+NARGS:RD*8-8] |
| 2917 | | mov L:RB, SAVE_L | 2875 | | mov L:RB, SAVE_L |
| 2918 | | mov L:RB->base, BASE | 2876 | | mov L:RB->base, BASE |
| @@ -2922,9 +2880,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2922 | | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc) | 2880 | | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc) |
| 2923 | | // ASMFunction returned in eax/rax (RDa). | 2881 | | // ASMFunction returned in eax/rax (RDa). |
| 2924 | | mov SAVE_PC, 0 // Invalidate for subsequent line hook. | 2882 | | mov SAVE_PC, 0 // Invalidate for subsequent line hook. |
| 2925 | #if LJ_HASJIT | 2883 | |.if JIT |
| 2926 | | and PC, -2 | 2884 | | and PC, -2 |
| 2927 | #endif | 2885 | |.endif |
| 2928 | | mov BASE, L:RB->base | 2886 | | mov BASE, L:RB->base |
| 2929 | | mov RAa, RDa | 2887 | | mov RAa, RDa |
| 2930 | | mov RD, L:RB->top | 2888 | | mov RD, L:RB->top |
| @@ -2942,7 +2900,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 2942 | |// Called from an exit stub with the exit number on the stack. | 2900 | |// Called from an exit stub with the exit number on the stack. |
| 2943 | |// The 16 bit exit number is stored with two (sign-extended) push imm8. | 2901 | |// The 16 bit exit number is stored with two (sign-extended) push imm8. |
| 2944 | |->vm_exit_handler: | 2902 | |->vm_exit_handler: |
| 2945 | #if LJ_HASJIT | 2903 | |.if JIT |
| 2946 | |.if X64 | 2904 | |.if X64 |
| 2947 | | push r13; push r12 | 2905 | | push r13; push r12 |
| 2948 | | push r11; push r10; push r9; push r8 | 2906 | | push r11; push r10; push r9; push r8 |
| @@ -3017,10 +2975,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 3017 | |.if X64 | 2975 | |.if X64 |
| 3018 | | jmp >1 | 2976 | | jmp >1 |
| 3019 | |.endif | 2977 | |.endif |
| 3020 | #endif | 2978 | |.endif |
| 3021 | |->vm_exit_interp: | 2979 | |->vm_exit_interp: |
| 3022 | | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. | 2980 | | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. |
| 3023 | #if LJ_HASJIT | 2981 | |.if JIT |
| 3024 | |.if X64 | 2982 | |.if X64 |
| 3025 | | // Restore additional callee-save registers only used in compiled code. | 2983 | | // Restore additional callee-save registers only used in compiled code. |
| 3026 | |.if X64WIN | 2984 | |.if X64WIN |
| @@ -3074,7 +3032,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 3074 | | mov FCARG1, L:RB | 3032 | | mov FCARG1, L:RB |
| 3075 | | mov FCARG2, RD | 3033 | | mov FCARG2, RD |
| 3076 | | call extern lj_err_throw@8 // (lua_State *L, int errcode) | 3034 | | call extern lj_err_throw@8 // (lua_State *L, int errcode) |
| 3077 | #endif | 3035 | |.endif |
| 3078 | | | 3036 | | |
| 3079 | |//----------------------------------------------------------------------- | 3037 | |//----------------------------------------------------------------------- |
| 3080 | |//-- Math helper functions ---------------------------------------------- | 3038 | |//-- Math helper functions ---------------------------------------------- |
| @@ -3139,9 +3097,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 3139 | | | 3097 | | |
| 3140 | |.macro vm_round, name, ssemode, mode1, mode2 | 3098 | |.macro vm_round, name, ssemode, mode1, mode2 |
| 3141 | |->name: | 3099 | |->name: |
| 3142 | ||if (!sse) { | 3100 | |.if not SSE |
| 3143 | | vm_round_x87 mode1, mode2 | 3101 | | vm_round_x87 mode1, mode2 |
| 3144 | ||} | 3102 | |.endif |
| 3145 | |->name .. _sse: | 3103 | |->name .. _sse: |
| 3146 | | vm_round_sse ssemode | 3104 | | vm_round_sse ssemode |
| 3147 | |.endmacro | 3105 | |.endmacro |
| @@ -3152,51 +3110,51 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 3152 | | | 3110 | | |
| 3153 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. | 3111 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. |
| 3154 | |->vm_mod: | 3112 | |->vm_mod: |
| 3155 | if (sse) { | 3113 | |.if SSE |
| 3156 | |// Args in xmm0/xmm1, return value in xmm0. | 3114 | |// Args in xmm0/xmm1, return value in xmm0. |
| 3157 | |// Caveat: xmm0-xmm5 and RC (eax) modified! | 3115 | |// Caveat: xmm0-xmm5 and RC (eax) modified! |
| 3158 | | movaps xmm5, xmm0 | 3116 | | movaps xmm5, xmm0 |
| 3159 | | divsd xmm0, xmm1 | 3117 | | divsd xmm0, xmm1 |
| 3160 | | sseconst_abs xmm2, RDa | 3118 | | sseconst_abs xmm2, RDa |
| 3161 | | sseconst_2p52 xmm3, RDa | 3119 | | sseconst_2p52 xmm3, RDa |
| 3162 | | movaps xmm4, xmm0 | 3120 | | movaps xmm4, xmm0 |
| 3163 | | andpd xmm4, xmm2 // |x/y| | 3121 | | andpd xmm4, xmm2 // |x/y| |
| 3164 | | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. | 3122 | | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. |
| 3165 | | jbe >1 | 3123 | | jbe >1 |
| 3166 | | andnpd xmm2, xmm0 // Isolate sign bit. | 3124 | | andnpd xmm2, xmm0 // Isolate sign bit. |
| 3167 | | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 | 3125 | | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 |
| 3168 | | subsd xmm4, xmm3 | 3126 | | subsd xmm4, xmm3 |
| 3169 | | orpd xmm4, xmm2 // Merge sign bit back in. | 3127 | | orpd xmm4, xmm2 // Merge sign bit back in. |
| 3170 | | sseconst_1 xmm2, RDa | 3128 | | sseconst_1 xmm2, RDa |
| 3171 | | cmpsd xmm0, xmm4, 1 // x/y < result? | 3129 | | cmpsd xmm0, xmm4, 1 // x/y < result? |
| 3172 | | andpd xmm0, xmm2 | 3130 | | andpd xmm0, xmm2 |
| 3173 | | subsd xmm4, xmm0 // If yes, subtract 1.0. | 3131 | | subsd xmm4, xmm0 // If yes, subtract 1.0. |
| 3174 | | movaps xmm0, xmm5 | 3132 | | movaps xmm0, xmm5 |
| 3175 | | mulsd xmm1, xmm4 | 3133 | | mulsd xmm1, xmm4 |
| 3176 | | subsd xmm0, xmm1 | 3134 | | subsd xmm0, xmm1 |
| 3177 | | ret | 3135 | | ret |
| 3178 | |1: | 3136 | |1: |
| 3179 | | mulsd xmm1, xmm0 | 3137 | | mulsd xmm1, xmm0 |
| 3180 | | movaps xmm0, xmm5 | 3138 | | movaps xmm0, xmm5 |
| 3181 | | subsd xmm0, xmm1 | 3139 | | subsd xmm0, xmm1 |
| 3182 | | ret | 3140 | | ret |
| 3183 | } else { | 3141 | |.else |
| 3184 | |// Args/ret on x87 stack (y on top). No xmm registers modified. | 3142 | |// Args/ret on x87 stack (y on top). No xmm registers modified. |
| 3185 | |// Caveat: needs 3 slots on x87 stack! RC (eax) modified! | 3143 | |// Caveat: needs 3 slots on x87 stack! RC (eax) modified! |
| 3186 | | fld st1 | 3144 | | fld st1 |
| 3187 | | fdiv st1 | 3145 | | fdiv st1 |
| 3188 | | fnstcw word [esp+4] | 3146 | | fnstcw word [esp+4] |
| 3189 | | mov ax, 0x0400 | 3147 | | mov ax, 0x0400 |
| 3190 | | or ax, [esp+4] | 3148 | | or ax, [esp+4] |
| 3191 | | and ax, 0xf7ff | 3149 | | and ax, 0xf7ff |
| 3192 | | mov [esp+6], ax | 3150 | | mov [esp+6], ax |
| 3193 | | fldcw word [esp+6] | 3151 | | fldcw word [esp+6] |
| 3194 | | frndint | 3152 | | frndint |
| 3195 | | fldcw word [esp+4] | 3153 | | fldcw word [esp+4] |
| 3196 | | fmulp st1 | 3154 | | fmulp st1 |
| 3197 | | fsubp st1 | 3155 | | fsubp st1 |
| 3198 | | ret | 3156 | | ret |
| 3199 | } | 3157 | |.endif |
| 3200 | | | 3158 | | |
| 3201 | |// FP exponentiation e^x and 2^x. Called by math.exp fast function and | 3159 | |// FP exponentiation e^x and 2^x. Called by math.exp fast function and |
| 3202 | |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified. | 3160 | |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified. |
| @@ -3224,18 +3182,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 3224 | | | 3182 | | |
| 3225 | |// Generic power function x^y. Called by BC_POW, math.pow fast function, | 3183 | |// Generic power function x^y. Called by BC_POW, math.pow fast function, |
| 3226 | |// and vm_arith. | 3184 | |// and vm_arith. |
| 3227 | if (!sse) { | ||
| 3228 | |.if not X64 | ||
| 3229 | |// Args/ret on x87 stack (y on top). RC (eax) modified. | 3185 | |// Args/ret on x87 stack (y on top). RC (eax) modified. |
| 3230 | |// Caveat: needs 3 slots on x87 stack! | 3186 | |// Caveat: needs 3 slots on x87 stack! |
| 3231 | |->vm_pow: | 3187 | |->vm_pow: |
| 3188 | |.if not SSE | ||
| 3232 | | fist dword [esp+4] // Store/reload int before comparison. | 3189 | | fist dword [esp+4] // Store/reload int before comparison. |
| 3233 | | fild dword [esp+4] // Integral exponent used in vm_powi. | 3190 | | fild dword [esp+4] // Integral exponent used in vm_powi. |
| 3234 | ||if (cmov) { | ||
| 3235 | | fucomip st1 | 3191 | | fucomip st1 |
| 3236 | ||} else { | ||
| 3237 | | fucomp st1; fnstsw ax; sahf | ||
| 3238 | ||} | ||
| 3239 | | jnz >8 // Branch for FP exponents. | 3192 | | jnz >8 // Branch for FP exponents. |
| 3240 | | jp >9 // Branch for NaN exponent. | 3193 | | jp >9 // Branch for NaN exponent. |
| 3241 | | fpop // Pop y and fallthrough to vm_powi. | 3194 | | fpop // Pop y and fallthrough to vm_powi. |
| @@ -3288,11 +3241,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 3288 | | | 3241 | | |
| 3289 | |9: // Handle x^NaN. | 3242 | |9: // Handle x^NaN. |
| 3290 | | fld1 | 3243 | | fld1 |
| 3291 | ||if (cmov) { | ||
| 3292 | | fucomip st2 | 3244 | | fucomip st2 |
| 3293 | ||} else { | ||
| 3294 | | fucomp st2; fnstsw ax; sahf | ||
| 3295 | ||} | ||
| 3296 | | je >1 // 1^NaN ==> 1 | 3245 | | je >1 // 1^NaN ==> 1 |
| 3297 | | fxch // x^NaN ==> NaN | 3246 | | fxch // x^NaN ==> NaN |
| 3298 | |1: | 3247 | |1: |
| @@ -3302,11 +3251,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 3302 | |2: // Handle x^+-Inf. | 3251 | |2: // Handle x^+-Inf. |
| 3303 | | fabs | 3252 | | fabs |
| 3304 | | fld1 | 3253 | | fld1 |
| 3305 | ||if (cmov) { | ||
| 3306 | | fucomip st1 | 3254 | | fucomip st1 |
| 3307 | ||} else { | ||
| 3308 | | fucomp st1; fnstsw ax; sahf | ||
| 3309 | ||} | ||
| 3310 | | je >3 // +-1^+-Inf ==> 1 | 3255 | | je >3 // +-1^+-Inf ==> 1 |
| 3311 | | fpop; fabs; fldz; mov eax, 0; setc al | 3256 | | fpop; fabs; fldz; mov eax, 0; setc al |
| 3312 | | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0 | 3257 | | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0 |
| @@ -3326,9 +3271,6 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 3326 | | fld dword [esp+4] | 3271 | | fld dword [esp+4] |
| 3327 | | ret | 3272 | | ret |
| 3328 | |.endif | 3273 | |.endif |
| 3329 | } else { | ||
| 3330 | |->vm_pow: | ||
| 3331 | } | ||
| 3332 | | | 3274 | | |
| 3333 | |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. | 3275 | |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. |
| 3334 | |// Needs 16 byte scratch area for x86. Also called from JIT code. | 3276 | |// Needs 16 byte scratch area for x86. Also called from JIT code. |
| @@ -3453,217 +3395,208 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 3453 | |// Callable from C: double lj_vm_foldfpm(double x, int fpm) | 3395 | |// Callable from C: double lj_vm_foldfpm(double x, int fpm) |
| 3454 | |// Computes fpm(x) for extended math functions. ORDER FPM. | 3396 | |// Computes fpm(x) for extended math functions. ORDER FPM. |
| 3455 | |->vm_foldfpm: | 3397 | |->vm_foldfpm: |
| 3456 | #if LJ_HASJIT | 3398 | |.if JIT |
| 3457 | if (sse) { | 3399 | |.if X64 |
| 3458 | |.if X64 | 3400 | | .if X64WIN |
| 3459 | | | 3401 | | .define fpmop, CARG2d |
| 3460 | | .if X64WIN | 3402 | | .else |
| 3461 | | .define fpmop, CARG2d | 3403 | | .define fpmop, CARG1d |
| 3462 | | .else | 3404 | | .endif |
| 3463 | | .define fpmop, CARG1d | 3405 | | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil |
| 3464 | | .endif | 3406 | | cmp fpmop, 3; jb ->vm_trunc; ja >2 |
| 3465 | | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil | 3407 | | sqrtsd xmm0, xmm0; ret |
| 3466 | | cmp fpmop, 3; jb ->vm_trunc; ja >2 | 3408 | |2: |
| 3467 | | sqrtsd xmm0, xmm0; ret | 3409 | | .if X64WIN |
| 3468 | |2: | 3410 | | movsd qword [rsp+8], xmm0 // Use scratch area. |
| 3469 | | .if X64WIN | 3411 | | fld qword [rsp+8] |
| 3470 | | movsd qword [rsp+8], xmm0 // Use scratch area. | 3412 | | .else |
| 3471 | | fld qword [rsp+8] | 3413 | | movsd qword [rsp-8], xmm0 // Use red zone. |
| 3472 | | .else | 3414 | | fld qword [rsp-8] |
| 3473 | | movsd qword [rsp-8], xmm0 // Use red zone. | 3415 | | .endif |
| 3474 | | fld qword [rsp-8] | 3416 | | cmp fpmop, 5; ja >2 |
| 3475 | | .endif | 3417 | | .if X64WIN; pop rax; .endif |
| 3476 | | cmp fpmop, 5; ja >2 | 3418 | | je >1 |
| 3477 | | .if X64WIN; pop rax; .endif | 3419 | | call ->vm_exp_x87 |
| 3478 | | je >1 | 3420 | | .if X64WIN; push rax; .endif |
| 3479 | | call ->vm_exp_x87 | 3421 | | jmp >7 |
| 3480 | | .if X64WIN; push rax; .endif | 3422 | |1: |
| 3481 | | jmp >7 | 3423 | | call ->vm_exp2_x87 |
| 3482 | |1: | 3424 | | .if X64WIN; push rax; .endif |
| 3483 | | call ->vm_exp2_x87 | 3425 | | jmp >7 |
| 3484 | | .if X64WIN; push rax; .endif | 3426 | |2: ; cmp fpmop, 7; je >1; ja >2 |
| 3485 | | jmp >7 | 3427 | | fldln2; fxch; fyl2x; jmp >7 |
| 3486 | |2: ; cmp fpmop, 7; je >1; ja >2 | 3428 | |1: ; fld1; fxch; fyl2x; jmp >7 |
| 3487 | | fldln2; fxch; fyl2x; jmp >7 | 3429 | |2: ; cmp fpmop, 9; je >1; ja >2 |
| 3488 | |1: ; fld1; fxch; fyl2x; jmp >7 | 3430 | | fldlg2; fxch; fyl2x; jmp >7 |
| 3489 | |2: ; cmp fpmop, 9; je >1; ja >2 | 3431 | |1: ; fsin; jmp >7 |
| 3490 | | fldlg2; fxch; fyl2x; jmp >7 | 3432 | |2: ; cmp fpmop, 11; je >1; ja >9 |
| 3491 | |1: ; fsin; jmp >7 | 3433 | | fcos; jmp >7 |
| 3492 | |2: ; cmp fpmop, 11; je >1; ja >9 | 3434 | |1: ; fptan; fpop |
| 3493 | | fcos; jmp >7 | 3435 | |7: |
| 3494 | |1: ; fptan; fpop | 3436 | | .if X64WIN |
| 3495 | |7: | 3437 | | fstp qword [rsp+8] // Use scratch area. |
| 3496 | | .if X64WIN | 3438 | | movsd xmm0, qword [rsp+8] |
| 3497 | | fstp qword [rsp+8] // Use scratch area. | 3439 | | .else |
| 3498 | | movsd xmm0, qword [rsp+8] | 3440 | | fstp qword [rsp-8] // Use red zone. |
| 3499 | | .else | 3441 | | movsd xmm0, qword [rsp-8] |
| 3500 | | fstp qword [rsp-8] // Use red zone. | 3442 | | .endif |
| 3501 | | movsd xmm0, qword [rsp-8] | 3443 | | ret |
| 3502 | | .endif | 3444 | |.else // x86 calling convention. |
| 3503 | | ret | 3445 | | .define fpmop, eax |
| 3504 | | | 3446 | |.if SSE |
| 3505 | |.else // x86 calling convention. | 3447 | | mov fpmop, [esp+12] |
| 3506 | | | 3448 | | movsd xmm0, qword [esp+4] |
| 3507 | | .define fpmop, eax | 3449 | | cmp fpmop, 1; je >1; ja >2 |
| 3508 | | mov fpmop, [esp+12] | 3450 | | call ->vm_floor; jmp >7 |
| 3509 | | movsd xmm0, qword [esp+4] | 3451 | |1: ; call ->vm_ceil; jmp >7 |
| 3510 | | cmp fpmop, 1; je >1; ja >2 | 3452 | |2: ; cmp fpmop, 3; je >1; ja >2 |
| 3511 | | call ->vm_floor; jmp >7 | 3453 | | call ->vm_trunc; jmp >7 |
| 3512 | |1: ; call ->vm_ceil; jmp >7 | 3454 | |1: |
| 3513 | |2: ; cmp fpmop, 3; je >1; ja >2 | 3455 | | sqrtsd xmm0, xmm0 |
| 3514 | | call ->vm_trunc; jmp >7 | 3456 | |7: |
| 3515 | |1: | 3457 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. |
| 3516 | | sqrtsd xmm0, xmm0 | 3458 | | fld qword [esp+4] |
| 3517 | |7: | 3459 | | ret |
| 3518 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. | 3460 | |2: ; fld qword [esp+4] |
| 3519 | | fld qword [esp+4] | 3461 | | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 |
| 3520 | | ret | 3462 | |2: ; cmp fpmop, 7; je >1; ja >2 |
| 3521 | |2: ; fld qword [esp+4] | 3463 | | fldln2; fxch; fyl2x; ret |
| 3522 | | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 | 3464 | |1: ; fld1; fxch; fyl2x; ret |
| 3523 | |2: ; cmp fpmop, 7; je >1; ja >2 | 3465 | |2: ; cmp fpmop, 9; je >1; ja >2 |
| 3524 | | fldln2; fxch; fyl2x; ret | 3466 | | fldlg2; fxch; fyl2x; ret |
| 3525 | |1: ; fld1; fxch; fyl2x; ret | 3467 | |1: ; fsin; ret |
| 3526 | |2: ; cmp fpmop, 9; je >1; ja >2 | 3468 | |2: ; cmp fpmop, 11; je >1; ja >9 |
| 3527 | | fldlg2; fxch; fyl2x; ret | 3469 | | fcos; ret |
| 3528 | |1: ; fsin; ret | 3470 | |1: ; fptan; fpop; ret |
| 3529 | |2: ; cmp fpmop, 11; je >1; ja >9 | 3471 | |.else |
| 3530 | | fcos; ret | 3472 | | mov fpmop, [esp+12] |
| 3531 | |1: ; fptan; fpop; ret | 3473 | | fld qword [esp+4] |
| 3532 | | | 3474 | | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil |
| 3533 | |.endif | 3475 | | cmp fpmop, 3; jb ->vm_trunc; ja >2 |
| 3534 | } else { | 3476 | | fsqrt; ret |
| 3535 | | mov fpmop, [esp+12] | 3477 | |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 |
| 3536 | | fld qword [esp+4] | 3478 | | cmp fpmop, 7; je >1; ja >2 |
| 3537 | | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil | 3479 | | fldln2; fxch; fyl2x; ret |
| 3538 | | cmp fpmop, 3; jb ->vm_trunc; ja >2 | 3480 | |1: ; fld1; fxch; fyl2x; ret |
| 3539 | | fsqrt; ret | 3481 | |2: ; cmp fpmop, 9; je >1; ja >2 |
| 3540 | |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 | 3482 | | fldlg2; fxch; fyl2x; ret |
| 3541 | | cmp fpmop, 7; je >1; ja >2 | 3483 | |1: ; fsin; ret |
| 3542 | | fldln2; fxch; fyl2x; ret | 3484 | |2: ; cmp fpmop, 11; je >1; ja >9 |
| 3543 | |1: ; fld1; fxch; fyl2x; ret | 3485 | | fcos; ret |
| 3544 | |2: ; cmp fpmop, 9; je >1; ja >2 | 3486 | |1: ; fptan; fpop; ret |
| 3545 | | fldlg2; fxch; fyl2x; ret | 3487 | |.endif |
| 3546 | |1: ; fsin; ret | 3488 | |.endif |
| 3547 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
| 3548 | | fcos; ret | ||
| 3549 | |1: ; fptan; fpop; ret | ||
| 3550 | } | ||
| 3551 | |9: ; int3 // Bad fpm. | 3489 | |9: ; int3 // Bad fpm. |
| 3552 | #endif | 3490 | |.endif |
| 3553 | | | 3491 | | |
| 3554 | |// Callable from C: double lj_vm_foldarith(double x, double y, int op) | 3492 | |// Callable from C: double lj_vm_foldarith(double x, double y, int op) |
| 3555 | |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -) | 3493 | |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -) |
| 3556 | |// and basic math functions. ORDER ARITH | 3494 | |// and basic math functions. ORDER ARITH |
| 3557 | |->vm_foldarith: | 3495 | |->vm_foldarith: |
| 3558 | if (sse) { | 3496 | |.if X64 |
| 3559 | |.if X64 | 3497 | | |
| 3560 | | | 3498 | | .if X64WIN |
| 3561 | | .if X64WIN | 3499 | | .define foldop, CARG3d |
| 3562 | | .define foldop, CARG3d | 3500 | | .else |
| 3563 | | .else | 3501 | | .define foldop, CARG1d |
| 3564 | | .define foldop, CARG1d | 3502 | | .endif |
| 3565 | | .endif | 3503 | | cmp foldop, 1; je >1; ja >2 |
| 3566 | | cmp foldop, 1; je >1; ja >2 | 3504 | | addsd xmm0, xmm1; ret |
| 3567 | | addsd xmm0, xmm1; ret | 3505 | |1: ; subsd xmm0, xmm1; ret |
| 3568 | |1: ; subsd xmm0, xmm1; ret | 3506 | |2: ; cmp foldop, 3; je >1; ja >2 |
| 3569 | |2: ; cmp foldop, 3; je >1; ja >2 | 3507 | | mulsd xmm0, xmm1; ret |
| 3570 | | mulsd xmm0, xmm1; ret | 3508 | |1: ; divsd xmm0, xmm1; ret |
| 3571 | |1: ; divsd xmm0, xmm1; ret | 3509 | |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow |
| 3572 | |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow | 3510 | | cmp foldop, 7; je >1; ja >2 |
| 3573 | | cmp foldop, 7; je >1; ja >2 | 3511 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret |
| 3574 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret | 3512 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret |
| 3575 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret | 3513 | |2: ; cmp foldop, 9; ja >2 |
| 3576 | |2: ; cmp foldop, 9; ja >2 | 3514 | |.if X64WIN |
| 3577 | |.if X64WIN | 3515 | | movsd qword [rsp+8], xmm0 // Use scratch area. |
| 3578 | | movsd qword [rsp+8], xmm0 // Use scratch area. | 3516 | | movsd qword [rsp+16], xmm1 |
| 3579 | | movsd qword [rsp+16], xmm1 | 3517 | | fld qword [rsp+8] |
| 3580 | | fld qword [rsp+8] | 3518 | | fld qword [rsp+16] |
| 3581 | | fld qword [rsp+16] | 3519 | |.else |
| 3582 | |.else | 3520 | | movsd qword [rsp-8], xmm0 // Use red zone. |
| 3583 | | movsd qword [rsp-8], xmm0 // Use red zone. | 3521 | | movsd qword [rsp-16], xmm1 |
| 3584 | | movsd qword [rsp-16], xmm1 | 3522 | | fld qword [rsp-8] |
| 3585 | | fld qword [rsp-8] | 3523 | | fld qword [rsp-16] |
| 3586 | | fld qword [rsp-16] | 3524 | |.endif |
| 3587 | |.endif | 3525 | | je >1 |
| 3588 | | je >1 | 3526 | | fpatan |
| 3589 | | fpatan | 3527 | |7: |
| 3590 | |7: | 3528 | |.if X64WIN |
| 3591 | |.if X64WIN | 3529 | | fstp qword [rsp+8] // Use scratch area. |
| 3592 | | fstp qword [rsp+8] // Use scratch area. | 3530 | | movsd xmm0, qword [rsp+8] |
| 3593 | | movsd xmm0, qword [rsp+8] | 3531 | |.else |
| 3594 | |.else | 3532 | | fstp qword [rsp-8] // Use red zone. |
| 3595 | | fstp qword [rsp-8] // Use red zone. | 3533 | | movsd xmm0, qword [rsp-8] |
| 3596 | | movsd xmm0, qword [rsp-8] | 3534 | |.endif |
| 3597 | |.endif | 3535 | | ret |
| 3598 | | ret | 3536 | |1: ; fxch; fscale; fpop1; jmp <7 |
| 3599 | |1: ; fxch; fscale; fpop1; jmp <7 | 3537 | |2: ; cmp foldop, 11; je >1; ja >9 |
| 3600 | |2: ; cmp foldop, 11; je >1; ja >9 | 3538 | | minsd xmm0, xmm1; ret |
| 3601 | | minsd xmm0, xmm1; ret | 3539 | |1: ; maxsd xmm0, xmm1; ret |
| 3602 | |1: ; maxsd xmm0, xmm1; ret | 3540 | |9: ; int3 // Bad op. |
| 3603 | |9: ; int3 // Bad op. | 3541 | | |
| 3604 | | | 3542 | |.elif SSE // x86 calling convention with SSE ops. |
| 3605 | |.else // x86 calling convention. | 3543 | | |
| 3606 | | | 3544 | | .define foldop, eax |
| 3607 | | .define foldop, eax | 3545 | | mov foldop, [esp+20] |
| 3608 | | mov foldop, [esp+20] | 3546 | | movsd xmm0, qword [esp+4] |
| 3609 | | movsd xmm0, qword [esp+4] | 3547 | | movsd xmm1, qword [esp+12] |
| 3610 | | movsd xmm1, qword [esp+12] | 3548 | | cmp foldop, 1; je >1; ja >2 |
| 3611 | | cmp foldop, 1; je >1; ja >2 | 3549 | | addsd xmm0, xmm1 |
| 3612 | | addsd xmm0, xmm1 | 3550 | |7: |
| 3613 | |7: | 3551 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. |
| 3614 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. | 3552 | | fld qword [esp+4] |
| 3615 | | fld qword [esp+4] | 3553 | | ret |
| 3616 | | ret | 3554 | |1: ; subsd xmm0, xmm1; jmp <7 |
| 3617 | |1: ; subsd xmm0, xmm1; jmp <7 | 3555 | |2: ; cmp foldop, 3; je >1; ja >2 |
| 3618 | |2: ; cmp foldop, 3; je >1; ja >2 | 3556 | | mulsd xmm0, xmm1; jmp <7 |
| 3619 | | mulsd xmm0, xmm1; jmp <7 | 3557 | |1: ; divsd xmm0, xmm1; jmp <7 |
| 3620 | |1: ; divsd xmm0, xmm1; jmp <7 | 3558 | |2: ; cmp foldop, 5 |
| 3621 | |2: ; cmp foldop, 5 | 3559 | | je >1; ja >2 |
| 3622 | | je >1; ja >2 | 3560 | | call ->vm_mod; jmp <7 |
| 3623 | | call ->vm_mod; jmp <7 | 3561 | |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. |
| 3624 | |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. | 3562 | |2: ; cmp foldop, 7; je >1; ja >2 |
| 3625 | |2: ; cmp foldop, 7; je >1; ja >2 | 3563 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 |
| 3626 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 | 3564 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 |
| 3627 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 | 3565 | |2: ; cmp foldop, 9; ja >2 |
| 3628 | |2: ; cmp foldop, 9; ja >2 | 3566 | | fld qword [esp+4] // Reload from stack |
| 3629 | | fld qword [esp+4] // Reload from stack | 3567 | | fld qword [esp+12] |
| 3630 | | fld qword [esp+12] | 3568 | | je >1 |
| 3631 | | je >1 | 3569 | | fpatan; ret |
| 3632 | | fpatan; ret | 3570 | |1: ; fxch; fscale; fpop1; ret |
| 3633 | |1: ; fxch; fscale; fpop1; ret | 3571 | |2: ; cmp foldop, 11; je >1; ja >9 |
| 3634 | |2: ; cmp foldop, 11; je >1; ja >9 | 3572 | | minsd xmm0, xmm1; jmp <7 |
| 3635 | | minsd xmm0, xmm1; jmp <7 | 3573 | |1: ; maxsd xmm0, xmm1; jmp <7 |
| 3636 | |1: ; maxsd xmm0, xmm1; jmp <7 | 3574 | |9: ; int3 // Bad op. |
| 3637 | |9: ; int3 // Bad op. | 3575 | | |
| 3638 | | | 3576 | |.else // x86 calling convention with x87 ops. |
| 3639 | |.endif | 3577 | | |
| 3640 | } else { | 3578 | | mov eax, [esp+20] |
| 3641 | | mov eax, [esp+20] | 3579 | | fld qword [esp+4] |
| 3642 | | fld qword [esp+4] | 3580 | | fld qword [esp+12] |
| 3643 | | fld qword [esp+12] | 3581 | | cmp eax, 1; je >1; ja >2 |
| 3644 | | cmp eax, 1; je >1; ja >2 | 3582 | | faddp st1; ret |
| 3645 | | faddp st1; ret | 3583 | |1: ; fsubp st1; ret |
| 3646 | |1: ; fsubp st1; ret | 3584 | |2: ; cmp eax, 3; je >1; ja >2 |
| 3647 | |2: ; cmp eax, 3; je >1; ja >2 | 3585 | | fmulp st1; ret |
| 3648 | | fmulp st1; ret | 3586 | |1: ; fdivp st1; ret |
| 3649 | |1: ; fdivp st1; ret | 3587 | |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow |
| 3650 | |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow | 3588 | | cmp eax, 7; je >1; ja >2 |
| 3651 | | cmp eax, 7; je >1; ja >2 | 3589 | | fpop; fchs; ret |
| 3652 | | fpop; fchs; ret | 3590 | |1: ; fpop; fabs; ret |
| 3653 | |1: ; fpop; fabs; ret | 3591 | |2: ; cmp eax, 9; je >1; ja >2 |
| 3654 | |2: ; cmp eax, 9; je >1; ja >2 | 3592 | | fpatan; ret |
| 3655 | | fpatan; ret | 3593 | |1: ; fxch; fscale; fpop1; ret |
| 3656 | |1: ; fxch; fscale; fpop1; ret | 3594 | |2: ; cmp eax, 11; je >1; ja >9 |
| 3657 | |2: ; cmp eax, 11; je >1; ja >9 | 3595 | | fucomi st1; fcmovnbe st1; fpop1; ret |
| 3658 | ||if (cmov) { | 3596 | |1: ; fucomi st1; fcmovbe st1; fpop1; ret |
| 3659 | | fucomi st1; fcmovnbe st1; fpop1; ret | 3597 | |9: ; int3 // Bad op. |
| 3660 | |1: ; fucomi st1; fcmovbe st1; fpop1; ret | 3598 | | |
| 3661 | ||} else { | 3599 | |.endif |
| 3662 | | fucom st1; fnstsw ax; test ah, 1; jz >2; fxch; 2: ; fpop; ret | ||
| 3663 | |1: ; fucom st1; fnstsw ax; test ah, 1; jnz >2; fxch; 2: ; fpop; ret | ||
| 3664 | ||} | ||
| 3665 | |9: ; int3 // Bad op. | ||
| 3666 | } | ||
| 3667 | | | 3600 | | |
| 3668 | |//----------------------------------------------------------------------- | 3601 | |//----------------------------------------------------------------------- |
| 3669 | |//-- Miscellaneous functions -------------------------------------------- | 3602 | |//-- Miscellaneous functions -------------------------------------------- |
| @@ -3726,7 +3659,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 3726 | | | 3659 | | |
| 3727 | |// Handler for callback functions. Callback slot number in ah/al. | 3660 | |// Handler for callback functions. Callback slot number in ah/al. |
| 3728 | |->vm_ffi_callback: | 3661 | |->vm_ffi_callback: |
| 3729 | #if LJ_HASFFI | 3662 | |.if FFI |
| 3730 | |.type CTSTATE, CTState, PC | 3663 | |.type CTSTATE, CTState, PC |
| 3731 | |.if not X64 | 3664 | |.if not X64 |
| 3732 | | sub esp, 16 // Leave room for SAVE_ERRF etc. | 3665 | | sub esp, 16 // Leave room for SAVE_ERRF etc. |
| @@ -3781,10 +3714,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 3781 | | shr RD, 3 | 3714 | | shr RD, 3 |
| 3782 | | add RD, 1 | 3715 | | add RD, 1 |
| 3783 | | ins_callt | 3716 | | ins_callt |
| 3784 | #endif | 3717 | |.endif |
| 3785 | | | 3718 | | |
| 3786 | |->cont_ffi_callback: // Return from FFI callback. | 3719 | |->cont_ffi_callback: // Return from FFI callback. |
| 3787 | #if LJ_HASFFI | 3720 | |.if FFI |
| 3788 | | mov L:RA, SAVE_L | 3721 | | mov L:RA, SAVE_L |
| 3789 | | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] | 3722 | | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] |
| 3790 | | mov aword CTSTATE->L, L:RAa | 3723 | | mov aword CTSTATE->L, L:RAa |
| @@ -3819,11 +3752,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 3819 | | push ecx | 3752 | | push ecx |
| 3820 | | ret | 3753 | | ret |
| 3821 | |.endif | 3754 | |.endif |
| 3822 | #endif | 3755 | |.endif |
| 3823 | | | 3756 | | |
| 3824 | |->vm_ffi_call@4: // Call C function via FFI. | 3757 | |->vm_ffi_call@4: // Call C function via FFI. |
| 3825 | | // Caveat: needs special frame unwinding, see below. | 3758 | | // Caveat: needs special frame unwinding, see below. |
| 3826 | #if LJ_HASFFI | 3759 | |.if FFI |
| 3827 | |.if X64 | 3760 | |.if X64 |
| 3828 | | .type CCSTATE, CCallState, rbx | 3761 | | .type CCSTATE, CCallState, rbx |
| 3829 | | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 | 3762 | | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 |
| @@ -3838,9 +3771,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 3838 | | sub rsp, rax | 3771 | | sub rsp, rax |
| 3839 | |.else | 3772 | |.else |
| 3840 | | sub esp, CCSTATE->spadj | 3773 | | sub esp, CCSTATE->spadj |
| 3841 | #if LJ_TARGET_WINDOWS | 3774 | |.if WIN |
| 3842 | | mov CCSTATE->spadj, esp | 3775 | | mov CCSTATE->spadj, esp |
| 3843 | #endif | 3776 | |.endif |
| 3844 | |.endif | 3777 | |.endif |
| 3845 | | | 3778 | | |
| 3846 | | // Copy stack slots. | 3779 | | // Copy stack slots. |
| @@ -3907,9 +3840,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 3907 | |6: | 3840 | |6: |
| 3908 | | fstp dword CCSTATE->fpr[0].f[0] | 3841 | | fstp dword CCSTATE->fpr[0].f[0] |
| 3909 | |7: | 3842 | |7: |
| 3910 | #if LJ_TARGET_WINDOWS | 3843 | |.if WIN |
| 3911 | | sub CCSTATE->spadj, esp | 3844 | | sub CCSTATE->spadj, esp |
| 3912 | #endif | 3845 | |.endif |
| 3913 | |.endif | 3846 | |.endif |
| 3914 | | | 3847 | | |
| 3915 | |.if X64 | 3848 | |.if X64 |
| @@ -3917,14 +3850,14 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
| 3917 | |.else | 3850 | |.else |
| 3918 | | mov ebx, [ebp-4]; leave; ret | 3851 | | mov ebx, [ebp-4]; leave; ret |
| 3919 | |.endif | 3852 | |.endif |
| 3920 | #endif | 3853 | |.endif |
| 3921 | |// Note: vm_ffi_call must be the last function in this object file! | 3854 | |// Note: vm_ffi_call must be the last function in this object file! |
| 3922 | | | 3855 | | |
| 3923 | |//----------------------------------------------------------------------- | 3856 | |//----------------------------------------------------------------------- |
| 3924 | } | 3857 | } |
| 3925 | 3858 | ||
| 3926 | /* Generate the code for a single instruction. */ | 3859 | /* Generate the code for a single instruction. */ |
| 3927 | static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | 3860 | static void build_ins(BuildCtx *ctx, BCOp op, int defop) |
| 3928 | { | 3861 | { |
| 3929 | int vk = 0; | 3862 | int vk = 0; |
| 3930 | |// Note: aligning all instructions does not pay off. | 3863 | |// Note: aligning all instructions does not pay off. |
| @@ -3957,79 +3890,79 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 3957 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | 3890 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: |
| 3958 | | // RA = src1, RD = src2, JMP with RD = target | 3891 | | // RA = src1, RD = src2, JMP with RD = target |
| 3959 | | ins_AD | 3892 | | ins_AD |
| 3960 | if (LJ_DUALNUM) { | 3893 | |.if DUALNUM |
| 3961 | | checkint RA, >7 | 3894 | | checkint RA, >7 |
| 3962 | | checkint RD, >8 | 3895 | | checkint RD, >8 |
| 3963 | | mov RB, dword [BASE+RA*8] | 3896 | | mov RB, dword [BASE+RA*8] |
| 3964 | | add PC, 4 | 3897 | | add PC, 4 |
| 3965 | | cmp RB, dword [BASE+RD*8] | 3898 | | cmp RB, dword [BASE+RD*8] |
| 3966 | | jmp_comp jge, jl, jg, jle, >9 | 3899 | | jmp_comp jge, jl, jg, jle, >9 |
| 3967 | |6: | 3900 | |6: |
| 3968 | | movzx RD, PC_RD | 3901 | | movzx RD, PC_RD |
| 3969 | | branchPC RD | 3902 | | branchPC RD |
| 3970 | |9: | 3903 | |9: |
| 3971 | | ins_next | 3904 | | ins_next |
| 3972 | | | 3905 | | |
| 3973 | |7: // RA is not an integer. | 3906 | |7: // RA is not an integer. |
| 3974 | | ja ->vmeta_comp | 3907 | | ja ->vmeta_comp |
| 3975 | | // RA is a number. | 3908 | | // RA is a number. |
| 3976 | | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp | 3909 | | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp |
| 3977 | | // RA is a number, RD is an integer. | 3910 | | // RA is a number, RD is an integer. |
| 3978 | if (sse) { | 3911 | |.if SSE |
| 3979 | | cvtsi2sd xmm0, dword [BASE+RD*8] | 3912 | | cvtsi2sd xmm0, dword [BASE+RD*8] |
| 3980 | | jmp >2 | 3913 | | jmp >2 |
| 3981 | } else { | 3914 | |.else |
| 3982 | | fld qword [BASE+RA*8] | 3915 | | fld qword [BASE+RA*8] |
| 3983 | | fild dword [BASE+RD*8] | 3916 | | fild dword [BASE+RD*8] |
| 3984 | | jmp >3 | 3917 | | jmp >3 |
| 3985 | } | 3918 | |.endif |
| 3986 | | | 3919 | | |
| 3987 | |8: // RA is an integer, RD is not an integer. | 3920 | |8: // RA is an integer, RD is not an integer. |
| 3988 | | ja ->vmeta_comp | 3921 | | ja ->vmeta_comp |
| 3989 | | // RA is an integer, RD is a number. | 3922 | | // RA is an integer, RD is a number. |
| 3990 | if (sse) { | 3923 | |.if SSE |
| 3991 | | cvtsi2sd xmm1, dword [BASE+RA*8] | 3924 | | cvtsi2sd xmm1, dword [BASE+RA*8] |
| 3992 | | movsd xmm0, qword [BASE+RD*8] | 3925 | | movsd xmm0, qword [BASE+RD*8] |
| 3993 | | add PC, 4 | 3926 | | add PC, 4 |
| 3994 | | ucomisd xmm0, xmm1 | 3927 | | ucomisd xmm0, xmm1 |
| 3995 | | jmp_comp jbe, ja, jb, jae, <9 | 3928 | | jmp_comp jbe, ja, jb, jae, <9 |
| 3996 | | jmp <6 | 3929 | | jmp <6 |
| 3997 | } else { | 3930 | |.else |
| 3998 | | fild dword [BASE+RA*8] | 3931 | | fild dword [BASE+RA*8] |
| 3999 | | jmp >2 | 3932 | | jmp >2 |
| 4000 | } | 3933 | |.endif |
| 4001 | } else { | 3934 | |.else |
| 4002 | | checknum RA, ->vmeta_comp | 3935 | | checknum RA, ->vmeta_comp |
| 4003 | | checknum RD, ->vmeta_comp | 3936 | | checknum RD, ->vmeta_comp |
| 4004 | } | 3937 | |.endif |
| 4005 | if (sse) { | 3938 | |.if SSE |
| 4006 | |1: | 3939 | |1: |
| 4007 | | movsd xmm0, qword [BASE+RD*8] | 3940 | | movsd xmm0, qword [BASE+RD*8] |
| 4008 | |2: | 3941 | |2: |
| 4009 | | add PC, 4 | 3942 | | add PC, 4 |
| 4010 | | ucomisd xmm0, qword [BASE+RA*8] | 3943 | | ucomisd xmm0, qword [BASE+RA*8] |
| 4011 | |3: | 3944 | |3: |
| 4012 | } else { | 3945 | |.else |
| 4013 | |1: | 3946 | |1: |
| 4014 | | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A. | 3947 | | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A. |
| 4015 | |2: | 3948 | |2: |
| 4016 | | fld qword [BASE+RD*8] | 3949 | | fld qword [BASE+RD*8] |
| 4017 | |3: | 3950 | |3: |
| 4018 | | add PC, 4 | 3951 | | add PC, 4 |
| 4019 | | fcomparepp // eax (RD) modified! | 3952 | | fcomparepp |
| 4020 | } | 3953 | |.endif |
| 4021 | | // Unordered: all of ZF CF PF set, ordered: PF clear. | 3954 | | // Unordered: all of ZF CF PF set, ordered: PF clear. |
| 4022 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. | 3955 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. |
| 4023 | if (LJ_DUALNUM) { | 3956 | |.if DUALNUM |
| 4024 | | jmp_comp jbe, ja, jb, jae, <9 | 3957 | | jmp_comp jbe, ja, jb, jae, <9 |
| 4025 | | jmp <6 | 3958 | | jmp <6 |
| 4026 | } else { | 3959 | |.else |
| 4027 | | jmp_comp jbe, ja, jb, jae, >1 | 3960 | | jmp_comp jbe, ja, jb, jae, >1 |
| 4028 | | movzx RD, PC_RD | 3961 | | movzx RD, PC_RD |
| 4029 | | branchPC RD | 3962 | | branchPC RD |
| 4030 | |1: | 3963 | |1: |
| 4031 | | ins_next | 3964 | | ins_next |
| 4032 | } | 3965 | |.endif |
| 4033 | break; | 3966 | break; |
| 4034 | 3967 | ||
| 4035 | case BC_ISEQV: case BC_ISNEV: | 3968 | case BC_ISEQV: case BC_ISNEV: |
| @@ -4037,63 +3970,63 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 4037 | | ins_AD // RA = src1, RD = src2, JMP with RD = target | 3970 | | ins_AD // RA = src1, RD = src2, JMP with RD = target |
| 4038 | | mov RB, [BASE+RD*8+4] | 3971 | | mov RB, [BASE+RD*8+4] |
| 4039 | | add PC, 4 | 3972 | | add PC, 4 |
| 4040 | if (LJ_DUALNUM) { | 3973 | |.if DUALNUM |
| 4041 | | cmp RB, LJ_TISNUM; jne >7 | 3974 | | cmp RB, LJ_TISNUM; jne >7 |
| 4042 | | checkint RA, >8 | 3975 | | checkint RA, >8 |
| 4043 | | mov RB, dword [BASE+RD*8] | 3976 | | mov RB, dword [BASE+RD*8] |
| 4044 | | cmp RB, dword [BASE+RA*8] | 3977 | | cmp RB, dword [BASE+RA*8] |
| 4045 | if (vk) { | 3978 | if (vk) { |
| 4046 | | jne >9 | 3979 | | jne >9 |
| 4047 | } else { | ||
| 4048 | | je >9 | ||
| 4049 | } | ||
| 4050 | | movzx RD, PC_RD | ||
| 4051 | | branchPC RD | ||
| 4052 | |9: | ||
| 4053 | | ins_next | ||
| 4054 | | | ||
| 4055 | |7: // RD is not an integer. | ||
| 4056 | | ja >5 | ||
| 4057 | | // RD is a number. | ||
| 4058 | | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 | ||
| 4059 | | // RD is a number, RA is an integer. | ||
| 4060 | if (sse) { | ||
| 4061 | | cvtsi2sd xmm0, dword [BASE+RA*8] | ||
| 4062 | } else { | ||
| 4063 | | fild dword [BASE+RA*8] | ||
| 4064 | } | ||
| 4065 | | jmp >2 | ||
| 4066 | | | ||
| 4067 | |8: // RD is an integer, RA is not an integer. | ||
| 4068 | | ja >5 | ||
| 4069 | | // RD is an integer, RA is a number. | ||
| 4070 | if (sse) { | ||
| 4071 | | cvtsi2sd xmm0, dword [BASE+RD*8] | ||
| 4072 | | ucomisd xmm0, qword [BASE+RA*8] | ||
| 4073 | } else { | ||
| 4074 | | fild dword [BASE+RD*8] | ||
| 4075 | | fld qword [BASE+RA*8] | ||
| 4076 | } | ||
| 4077 | | jmp >4 | ||
| 4078 | | | ||
| 4079 | } else { | ||
| 4080 | | cmp RB, LJ_TISNUM; jae >5 | ||
| 4081 | | checknum RA, >5 | ||
| 4082 | } | ||
| 4083 | if (sse) { | ||
| 4084 | |1: | ||
| 4085 | | movsd xmm0, qword [BASE+RA*8] | ||
| 4086 | |2: | ||
| 4087 | | ucomisd xmm0, qword [BASE+RD*8] | ||
| 4088 | |4: | ||
| 4089 | } else { | 3980 | } else { |
| 4090 | |1: | 3981 | | je >9 |
| 4091 | | fld qword [BASE+RA*8] | ||
| 4092 | |2: | ||
| 4093 | | fld qword [BASE+RD*8] | ||
| 4094 | |4: | ||
| 4095 | | fcomparepp // eax (RD) modified! | ||
| 4096 | } | 3982 | } |
| 3983 | | movzx RD, PC_RD | ||
| 3984 | | branchPC RD | ||
| 3985 | |9: | ||
| 3986 | | ins_next | ||
| 3987 | | | ||
| 3988 | |7: // RD is not an integer. | ||
| 3989 | | ja >5 | ||
| 3990 | | // RD is a number. | ||
| 3991 | | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 | ||
| 3992 | | // RD is a number, RA is an integer. | ||
| 3993 | |.if SSE | ||
| 3994 | | cvtsi2sd xmm0, dword [BASE+RA*8] | ||
| 3995 | |.else | ||
| 3996 | | fild dword [BASE+RA*8] | ||
| 3997 | |.endif | ||
| 3998 | | jmp >2 | ||
| 3999 | | | ||
| 4000 | |8: // RD is an integer, RA is not an integer. | ||
| 4001 | | ja >5 | ||
| 4002 | | // RD is an integer, RA is a number. | ||
| 4003 | |.if SSE | ||
| 4004 | | cvtsi2sd xmm0, dword [BASE+RD*8] | ||
| 4005 | | ucomisd xmm0, qword [BASE+RA*8] | ||
| 4006 | |.else | ||
| 4007 | | fild dword [BASE+RD*8] | ||
| 4008 | | fld qword [BASE+RA*8] | ||
| 4009 | |.endif | ||
| 4010 | | jmp >4 | ||
| 4011 | | | ||
| 4012 | |.else | ||
| 4013 | | cmp RB, LJ_TISNUM; jae >5 | ||
| 4014 | | checknum RA, >5 | ||
| 4015 | |.endif | ||
| 4016 | |.if SSE | ||
| 4017 | |1: | ||
| 4018 | | movsd xmm0, qword [BASE+RA*8] | ||
| 4019 | |2: | ||
| 4020 | | ucomisd xmm0, qword [BASE+RD*8] | ||
| 4021 | |4: | ||
| 4022 | |.else | ||
| 4023 | |1: | ||
| 4024 | | fld qword [BASE+RA*8] | ||
| 4025 | |2: | ||
| 4026 | | fld qword [BASE+RD*8] | ||
| 4027 | |4: | ||
| 4028 | | fcomparepp | ||
| 4029 | |.endif | ||
| 4097 | iseqne_fp: | 4030 | iseqne_fp: |
| 4098 | if (vk) { | 4031 | if (vk) { |
| 4099 | | jp >2 // Unordered means not equal. | 4032 | | jp >2 // Unordered means not equal. |
| @@ -4129,10 +4062,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 4129 | | | 4062 | | |
| 4130 | if (op == BC_ISEQV || op == BC_ISNEV) { | 4063 | if (op == BC_ISEQV || op == BC_ISNEV) { |
| 4131 | |5: // Either or both types are not numbers. | 4064 | |5: // Either or both types are not numbers. |
| 4132 | if (LJ_HASFFI) { | 4065 | |.if FFI |
| 4133 | | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd | 4066 | | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd |
| 4134 | | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd | 4067 | | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd |
| 4135 | } | 4068 | |.endif |
| 4136 | | checktp RA, RB // Compare types. | 4069 | | checktp RA, RB // Compare types. |
| 4137 | | jne <2 // Not the same type? | 4070 | | jne <2 // Not the same type? |
| 4138 | | cmp RB, LJ_TISPRI | 4071 | | cmp RB, LJ_TISPRI |
| @@ -4163,7 +4096,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 4163 | | mov RB, 1 // ne = 1 | 4096 | | mov RB, 1 // ne = 1 |
| 4164 | } | 4097 | } |
| 4165 | | jmp ->vmeta_equal // Handle __eq metamethod. | 4098 | | jmp ->vmeta_equal // Handle __eq metamethod. |
| 4166 | } else if (LJ_HASFFI) { | 4099 | } else { |
| 4100 | |.if FFI | ||
| 4167 | |3: | 4101 | |3: |
| 4168 | | cmp RB, LJ_TCDATA | 4102 | | cmp RB, LJ_TCDATA |
| 4169 | if (LJ_DUALNUM && vk) { | 4103 | if (LJ_DUALNUM && vk) { |
| @@ -4172,6 +4106,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 4172 | | jne <2 | 4106 | | jne <2 |
| 4173 | } | 4107 | } |
| 4174 | | jmp ->vmeta_equal_cd | 4108 | | jmp ->vmeta_equal_cd |
| 4109 | |.endif | ||
| 4175 | } | 4110 | } |
| 4176 | break; | 4111 | break; |
| 4177 | case BC_ISEQS: case BC_ISNES: | 4112 | case BC_ISEQS: case BC_ISNES: |
| @@ -4194,59 +4129,59 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 4194 | | ins_AD // RA = src, RD = num const, JMP with RD = target | 4129 | | ins_AD // RA = src, RD = num const, JMP with RD = target |
| 4195 | | mov RB, [BASE+RA*8+4] | 4130 | | mov RB, [BASE+RA*8+4] |
| 4196 | | add PC, 4 | 4131 | | add PC, 4 |
| 4197 | if (LJ_DUALNUM) { | 4132 | |.if DUALNUM |
| 4198 | | cmp RB, LJ_TISNUM; jne >7 | 4133 | | cmp RB, LJ_TISNUM; jne >7 |
| 4199 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8 | 4134 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8 |
| 4200 | | mov RB, dword [KBASE+RD*8] | 4135 | | mov RB, dword [KBASE+RD*8] |
| 4201 | | cmp RB, dword [BASE+RA*8] | 4136 | | cmp RB, dword [BASE+RA*8] |
| 4202 | if (vk) { | 4137 | if (vk) { |
| 4203 | | jne >9 | 4138 | | jne >9 |
| 4204 | } else { | ||
| 4205 | | je >9 | ||
| 4206 | } | ||
| 4207 | | movzx RD, PC_RD | ||
| 4208 | | branchPC RD | ||
| 4209 | |9: | ||
| 4210 | | ins_next | ||
| 4211 | | | ||
| 4212 | |7: // RA is not an integer. | ||
| 4213 | | ja >3 | ||
| 4214 | | // RA is a number. | ||
| 4215 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 | ||
| 4216 | | // RA is a number, RD is an integer. | ||
| 4217 | if (sse) { | ||
| 4218 | | cvtsi2sd xmm0, dword [KBASE+RD*8] | ||
| 4219 | } else { | ||
| 4220 | | fild dword [KBASE+RD*8] | ||
| 4221 | } | ||
| 4222 | | jmp >2 | ||
| 4223 | | | ||
| 4224 | |8: // RA is an integer, RD is a number. | ||
| 4225 | if (sse) { | ||
| 4226 | | cvtsi2sd xmm0, dword [BASE+RA*8] | ||
| 4227 | | ucomisd xmm0, qword [KBASE+RD*8] | ||
| 4228 | } else { | ||
| 4229 | | fild dword [BASE+RA*8] | ||
| 4230 | | fld qword [KBASE+RD*8] | ||
| 4231 | } | ||
| 4232 | | jmp >4 | ||
| 4233 | } else { | ||
| 4234 | | cmp RB, LJ_TISNUM; jae >3 | ||
| 4235 | } | ||
| 4236 | if (sse) { | ||
| 4237 | |1: | ||
| 4238 | | movsd xmm0, qword [KBASE+RD*8] | ||
| 4239 | |2: | ||
| 4240 | | ucomisd xmm0, qword [BASE+RA*8] | ||
| 4241 | |4: | ||
| 4242 | } else { | 4139 | } else { |
| 4243 | |1: | 4140 | | je >9 |
| 4244 | | fld qword [KBASE+RD*8] | ||
| 4245 | |2: | ||
| 4246 | | fld qword [BASE+RA*8] | ||
| 4247 | |4: | ||
| 4248 | | fcomparepp // eax (RD) modified! | ||
| 4249 | } | 4141 | } |
| 4142 | | movzx RD, PC_RD | ||
| 4143 | | branchPC RD | ||
| 4144 | |9: | ||
| 4145 | | ins_next | ||
| 4146 | | | ||
| 4147 | |7: // RA is not an integer. | ||
| 4148 | | ja >3 | ||
| 4149 | | // RA is a number. | ||
| 4150 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 | ||
| 4151 | | // RA is a number, RD is an integer. | ||
| 4152 | |.if SSE | ||
| 4153 | | cvtsi2sd xmm0, dword [KBASE+RD*8] | ||
| 4154 | |.else | ||
| 4155 | | fild dword [KBASE+RD*8] | ||
| 4156 | |.endif | ||
| 4157 | | jmp >2 | ||
| 4158 | | | ||
| 4159 | |8: // RA is an integer, RD is a number. | ||
| 4160 | |.if SSE | ||
| 4161 | | cvtsi2sd xmm0, dword [BASE+RA*8] | ||
| 4162 | | ucomisd xmm0, qword [KBASE+RD*8] | ||
| 4163 | |.else | ||
| 4164 | | fild dword [BASE+RA*8] | ||
| 4165 | | fld qword [KBASE+RD*8] | ||
| 4166 | |.endif | ||
| 4167 | | jmp >4 | ||
| 4168 | |.else | ||
| 4169 | | cmp RB, LJ_TISNUM; jae >3 | ||
| 4170 | |.endif | ||
| 4171 | |.if SSE | ||
| 4172 | |1: | ||
| 4173 | | movsd xmm0, qword [KBASE+RD*8] | ||
| 4174 | |2: | ||
| 4175 | | ucomisd xmm0, qword [BASE+RA*8] | ||
| 4176 | |4: | ||
| 4177 | |.else | ||
| 4178 | |1: | ||
| 4179 | | fld qword [KBASE+RD*8] | ||
| 4180 | |2: | ||
| 4181 | | fld qword [BASE+RA*8] | ||
| 4182 | |4: | ||
| 4183 | | fcomparepp | ||
| 4184 | |.endif | ||
| 4250 | goto iseqne_fp; | 4185 | goto iseqne_fp; |
| 4251 | case BC_ISEQP: case BC_ISNEP: | 4186 | case BC_ISEQP: case BC_ISNEP: |
| 4252 | vk = op == BC_ISEQP; | 4187 | vk = op == BC_ISEQP; |
| @@ -4322,59 +4257,59 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 4322 | break; | 4257 | break; |
| 4323 | case BC_UNM: | 4258 | case BC_UNM: |
| 4324 | | ins_AD // RA = dst, RD = src | 4259 | | ins_AD // RA = dst, RD = src |
| 4325 | if (LJ_DUALNUM) { | 4260 | |.if DUALNUM |
| 4326 | | checkint RD, >5 | 4261 | | checkint RD, >5 |
| 4327 | | mov RB, [BASE+RD*8] | 4262 | | mov RB, [BASE+RD*8] |
| 4328 | | neg RB | 4263 | | neg RB |
| 4329 | | jo >4 | 4264 | | jo >4 |
| 4330 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 4265 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
| 4331 | | mov dword [BASE+RA*8], RB | 4266 | | mov dword [BASE+RA*8], RB |
| 4332 | |9: | 4267 | |9: |
| 4333 | | ins_next | 4268 | | ins_next |
| 4334 | |4: | 4269 | |4: |
| 4335 | | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31. | 4270 | | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31. |
| 4336 | | mov dword [BASE+RA*8], 0 | 4271 | | mov dword [BASE+RA*8], 0 |
| 4337 | | jmp <9 | 4272 | | jmp <9 |
| 4338 | |5: | 4273 | |5: |
| 4339 | | ja ->vmeta_unm | 4274 | | ja ->vmeta_unm |
| 4340 | } else { | 4275 | |.else |
| 4341 | | checknum RD, ->vmeta_unm | 4276 | | checknum RD, ->vmeta_unm |
| 4342 | } | 4277 | |.endif |
| 4343 | if (sse) { | 4278 | |.if SSE |
| 4344 | | movsd xmm0, qword [BASE+RD*8] | 4279 | | movsd xmm0, qword [BASE+RD*8] |
| 4345 | | sseconst_sign xmm1, RDa | 4280 | | sseconst_sign xmm1, RDa |
| 4346 | | xorps xmm0, xmm1 | 4281 | | xorps xmm0, xmm1 |
| 4347 | | movsd qword [BASE+RA*8], xmm0 | 4282 | | movsd qword [BASE+RA*8], xmm0 |
| 4348 | } else { | 4283 | |.else |
| 4349 | | fld qword [BASE+RD*8] | 4284 | | fld qword [BASE+RD*8] |
| 4350 | | fchs | 4285 | | fchs |
| 4351 | | fstp qword [BASE+RA*8] | 4286 | | fstp qword [BASE+RA*8] |
| 4352 | } | 4287 | |.endif |
| 4353 | if (LJ_DUALNUM) { | 4288 | |.if DUALNUM |
| 4354 | | jmp <9 | 4289 | | jmp <9 |
| 4355 | } else { | 4290 | |.else |
| 4356 | | ins_next | 4291 | | ins_next |
| 4357 | } | 4292 | |.endif |
| 4358 | break; | 4293 | break; |
| 4359 | case BC_LEN: | 4294 | case BC_LEN: |
| 4360 | | ins_AD // RA = dst, RD = src | 4295 | | ins_AD // RA = dst, RD = src |
| 4361 | | checkstr RD, >2 | 4296 | | checkstr RD, >2 |
| 4362 | | mov STR:RD, [BASE+RD*8] | 4297 | | mov STR:RD, [BASE+RD*8] |
| 4363 | if (LJ_DUALNUM) { | 4298 | |.if DUALNUM |
| 4364 | | mov RD, dword STR:RD->len | 4299 | | mov RD, dword STR:RD->len |
| 4365 | |1: | 4300 | |1: |
| 4366 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 4301 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
| 4367 | | mov dword [BASE+RA*8], RD | 4302 | | mov dword [BASE+RA*8], RD |
| 4368 | } else if (sse) { | 4303 | |.elif SSE |
| 4369 | | xorps xmm0, xmm0 | 4304 | | xorps xmm0, xmm0 |
| 4370 | | cvtsi2sd xmm0, dword STR:RD->len | 4305 | | cvtsi2sd xmm0, dword STR:RD->len |
| 4371 | |1: | 4306 | |1: |
| 4372 | | movsd qword [BASE+RA*8], xmm0 | 4307 | | movsd qword [BASE+RA*8], xmm0 |
| 4373 | } else { | 4308 | |.else |
| 4374 | | fild dword STR:RD->len | 4309 | | fild dword STR:RD->len |
| 4375 | |1: | 4310 | |1: |
| 4376 | | fstp qword [BASE+RA*8] | 4311 | | fstp qword [BASE+RA*8] |
| 4377 | } | 4312 | |.endif |
| 4378 | | ins_next | 4313 | | ins_next |
| 4379 | |2: | 4314 | |2: |
| 4380 | | checktab RD, ->vmeta_len | 4315 | | checktab RD, ->vmeta_len |
| @@ -4389,16 +4324,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 4389 | | mov RB, BASE // Save BASE. | 4324 | | mov RB, BASE // Save BASE. |
| 4390 | | call extern lj_tab_len@4 // (GCtab *t) | 4325 | | call extern lj_tab_len@4 // (GCtab *t) |
| 4391 | | // Length of table returned in eax (RD). | 4326 | | // Length of table returned in eax (RD). |
| 4392 | if (LJ_DUALNUM) { | 4327 | |.if DUALNUM |
| 4393 | | // Nothing to do. | 4328 | | // Nothing to do. |
| 4394 | } else if (sse) { | 4329 | |.elif SSE |
| 4395 | | cvtsi2sd xmm0, RD | 4330 | | cvtsi2sd xmm0, RD |
| 4396 | } else { | 4331 | |.else |
| 4397 | |.if not X64 | 4332 | | mov ARG1, RD |
| 4398 | | mov ARG1, RD | 4333 | | fild ARG1 |
| 4399 | | fild ARG1 | 4334 | |.endif |
| 4400 | |.endif | ||
| 4401 | } | ||
| 4402 | | mov BASE, RB // Restore BASE. | 4335 | | mov BASE, RB // Restore BASE. |
| 4403 | | movzx RA, PC_RA | 4336 | | movzx RA, PC_RA |
| 4404 | | jmp <1 | 4337 | | jmp <1 |
| @@ -4418,40 +4351,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 4418 | ||switch (vk) { | 4351 | ||switch (vk) { |
| 4419 | ||case 0: | 4352 | ||case 0: |
| 4420 | | checknum RB, ->vmeta_arith_vn | 4353 | | checknum RB, ->vmeta_arith_vn |
| 4421 | ||if (LJ_DUALNUM) { | 4354 | | .if DUALNUM |
| 4422 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn | 4355 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn |
| 4423 | ||} | 4356 | | .endif |
| 4424 | ||if (sse) { | 4357 | | .if SSE |
| 4425 | | movsd xmm0, qword [BASE+RB*8] | 4358 | | movsd xmm0, qword [BASE+RB*8] |
| 4426 | | sseins ssereg, qword [KBASE+RC*8] | 4359 | | sseins ssereg, qword [KBASE+RC*8] |
| 4427 | ||} else { | 4360 | | .else |
| 4428 | | fld qword [BASE+RB*8] | 4361 | | fld qword [BASE+RB*8] |
| 4429 | | x87ins qword [KBASE+RC*8] | 4362 | | x87ins qword [KBASE+RC*8] |
| 4430 | ||} | 4363 | | .endif |
| 4431 | || break; | 4364 | || break; |
| 4432 | ||case 1: | 4365 | ||case 1: |
| 4433 | | checknum RB, ->vmeta_arith_nv | 4366 | | checknum RB, ->vmeta_arith_nv |
| 4434 | ||if (LJ_DUALNUM) { | 4367 | | .if DUALNUM |
| 4435 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv | 4368 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv |
| 4436 | ||} | 4369 | | .endif |
| 4437 | ||if (sse) { | 4370 | | .if SSE |
| 4438 | | movsd xmm0, qword [KBASE+RC*8] | 4371 | | movsd xmm0, qword [KBASE+RC*8] |
| 4439 | | sseins ssereg, qword [BASE+RB*8] | 4372 | | sseins ssereg, qword [BASE+RB*8] |
| 4440 | ||} else { | 4373 | | .else |
| 4441 | | fld qword [KBASE+RC*8] | 4374 | | fld qword [KBASE+RC*8] |
| 4442 | | x87ins qword [BASE+RB*8] | 4375 | | x87ins qword [BASE+RB*8] |
| 4443 | ||} | 4376 | | .endif |
| 4444 | || break; | 4377 | || break; |
| 4445 | ||default: | 4378 | ||default: |
| 4446 | | checknum RB, ->vmeta_arith_vv | 4379 | | checknum RB, ->vmeta_arith_vv |
| 4447 | | checknum RC, ->vmeta_arith_vv | 4380 | | checknum RC, ->vmeta_arith_vv |
| 4448 | ||if (sse) { | 4381 | | .if SSE |
| 4449 | | movsd xmm0, qword [BASE+RB*8] | 4382 | | movsd xmm0, qword [BASE+RB*8] |
| 4450 | | sseins ssereg, qword [BASE+RC*8] | 4383 | | sseins ssereg, qword [BASE+RC*8] |
| 4451 | ||} else { | 4384 | | .else |
| 4452 | | fld qword [BASE+RB*8] | 4385 | | fld qword [BASE+RB*8] |
| 4453 | | x87ins qword [BASE+RC*8] | 4386 | | x87ins qword [BASE+RC*8] |
| 4454 | ||} | 4387 | | .endif |
| 4455 | || break; | 4388 | || break; |
| 4456 | ||} | 4389 | ||} |
| 4457 | |.endmacro | 4390 | |.endmacro |
| @@ -4489,11 +4422,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 4489 | |.endmacro | 4422 | |.endmacro |
| 4490 | | | 4423 | | |
| 4491 | |.macro ins_arithpost | 4424 | |.macro ins_arithpost |
| 4492 | ||if (sse) { | 4425 | |.if SSE |
| 4493 | | movsd qword [BASE+RA*8], xmm0 | 4426 | | movsd qword [BASE+RA*8], xmm0 |
| 4494 | ||} else { | 4427 | |.else |
| 4495 | | fstp qword [BASE+RA*8] | 4428 | | fstp qword [BASE+RA*8] |
| 4496 | ||} | 4429 | |.endif |
| 4497 | |.endmacro | 4430 | |.endmacro |
| 4498 | | | 4431 | | |
| 4499 | |.macro ins_arith, x87ins, sseins | 4432 | |.macro ins_arith, x87ins, sseins |
| @@ -4503,11 +4436,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 4503 | |.endmacro | 4436 | |.endmacro |
| 4504 | | | 4437 | | |
| 4505 | |.macro ins_arith, intins, x87ins, sseins | 4438 | |.macro ins_arith, intins, x87ins, sseins |
| 4506 | ||if (LJ_DUALNUM) { | 4439 | |.if DUALNUM |
| 4507 | | ins_arithdn intins | 4440 | | ins_arithdn intins |
| 4508 | ||} else { | 4441 | |.else |
| 4509 | | ins_arith, x87ins, sseins | 4442 | | ins_arith, x87ins, sseins |
| 4510 | ||} | 4443 | |.endif |
| 4511 | |.endmacro | 4444 | |.endmacro |
| 4512 | 4445 | ||
| 4513 | | // RA = dst, RB = src1 or num const, RC = src2 or num const | 4446 | | // RA = dst, RB = src1 or num const, RC = src2 or num const |
| @@ -4591,39 +4524,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 4591 | | ins_next | 4524 | | ins_next |
| 4592 | break; | 4525 | break; |
| 4593 | case BC_KCDATA: | 4526 | case BC_KCDATA: |
| 4594 | #if LJ_HASFFI | 4527 | |.if FFI |
| 4595 | | ins_AND // RA = dst, RD = cdata const (~) | 4528 | | ins_AND // RA = dst, RD = cdata const (~) |
| 4596 | | mov RD, [KBASE+RD*4] | 4529 | | mov RD, [KBASE+RD*4] |
| 4597 | | mov dword [BASE+RA*8+4], LJ_TCDATA | 4530 | | mov dword [BASE+RA*8+4], LJ_TCDATA |
| 4598 | | mov [BASE+RA*8], RD | 4531 | | mov [BASE+RA*8], RD |
| 4599 | | ins_next | 4532 | | ins_next |
| 4600 | #endif | 4533 | |.endif |
| 4601 | break; | 4534 | break; |
| 4602 | case BC_KSHORT: | 4535 | case BC_KSHORT: |
| 4603 | | ins_AD // RA = dst, RD = signed int16 literal | 4536 | | ins_AD // RA = dst, RD = signed int16 literal |
| 4604 | if (LJ_DUALNUM) { | 4537 | |.if DUALNUM |
| 4605 | | movsx RD, RDW | 4538 | | movsx RD, RDW |
| 4606 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 4539 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
| 4607 | | mov dword [BASE+RA*8], RD | 4540 | | mov dword [BASE+RA*8], RD |
| 4608 | } else if (sse) { | 4541 | |.elif SSE |
| 4609 | | movsx RD, RDW // Sign-extend literal. | 4542 | | movsx RD, RDW // Sign-extend literal. |
| 4610 | | cvtsi2sd xmm0, RD | 4543 | | cvtsi2sd xmm0, RD |
| 4611 | | movsd qword [BASE+RA*8], xmm0 | 4544 | | movsd qword [BASE+RA*8], xmm0 |
| 4612 | } else { | 4545 | |.else |
| 4613 | | fild PC_RD // Refetch signed RD from instruction. | 4546 | | fild PC_RD // Refetch signed RD from instruction. |
| 4614 | | fstp qword [BASE+RA*8] | 4547 | | fstp qword [BASE+RA*8] |
| 4615 | } | 4548 | |.endif |
| 4616 | | ins_next | 4549 | | ins_next |
| 4617 | break; | 4550 | break; |
| 4618 | case BC_KNUM: | 4551 | case BC_KNUM: |
| 4619 | | ins_AD // RA = dst, RD = num const | 4552 | | ins_AD // RA = dst, RD = num const |
| 4620 | if (sse) { | 4553 | |.if SSE |
| 4621 | | movsd xmm0, qword [KBASE+RD*8] | 4554 | | movsd xmm0, qword [KBASE+RD*8] |
| 4622 | | movsd qword [BASE+RA*8], xmm0 | 4555 | | movsd qword [BASE+RA*8], xmm0 |
| 4623 | } else { | 4556 | |.else |
| 4624 | | fld qword [KBASE+RD*8] | 4557 | | fld qword [KBASE+RD*8] |
| 4625 | | fstp qword [BASE+RA*8] | 4558 | | fstp qword [BASE+RA*8] |
| 4626 | } | 4559 | |.endif |
| 4627 | | ins_next | 4560 | | ins_next |
| 4628 | break; | 4561 | break; |
| 4629 | case BC_KPRI: | 4562 | case BC_KPRI: |
| @@ -4730,18 +4663,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 4730 | case BC_USETN: | 4663 | case BC_USETN: |
| 4731 | | ins_AD // RA = upvalue #, RD = num const | 4664 | | ins_AD // RA = upvalue #, RD = num const |
| 4732 | | mov LFUNC:RB, [BASE-8] | 4665 | | mov LFUNC:RB, [BASE-8] |
| 4733 | if (sse) { | 4666 | |.if SSE |
| 4734 | | movsd xmm0, qword [KBASE+RD*8] | 4667 | | movsd xmm0, qword [KBASE+RD*8] |
| 4735 | } else { | 4668 | |.else |
| 4736 | | fld qword [KBASE+RD*8] | 4669 | | fld qword [KBASE+RD*8] |
| 4737 | } | 4670 | |.endif |
| 4738 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | 4671 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] |
| 4739 | | mov RA, UPVAL:RB->v | 4672 | | mov RA, UPVAL:RB->v |
| 4740 | if (sse) { | 4673 | |.if SSE |
| 4741 | | movsd qword [RA], xmm0 | 4674 | | movsd qword [RA], xmm0 |
| 4742 | } else { | 4675 | |.else |
| 4743 | | fstp qword [RA] | 4676 | | fstp qword [RA] |
| 4744 | } | 4677 | |.endif |
| 4745 | | ins_next | 4678 | | ins_next |
| 4746 | break; | 4679 | break; |
| 4747 | case BC_USETP: | 4680 | case BC_USETP: |
| @@ -4889,28 +4822,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 4889 | | mov TAB:RB, [BASE+RB*8] | 4822 | | mov TAB:RB, [BASE+RB*8] |
| 4890 | | | 4823 | | |
| 4891 | | // Integer key? | 4824 | | // Integer key? |
| 4892 | if (LJ_DUALNUM) { | 4825 | |.if DUALNUM |
| 4893 | | checkint RC, >5 | 4826 | | checkint RC, >5 |
| 4894 | | mov RC, dword [BASE+RC*8] | 4827 | | mov RC, dword [BASE+RC*8] |
| 4895 | } else { | 4828 | |.else |
| 4896 | | // Convert number to int and back and compare. | 4829 | | // Convert number to int and back and compare. |
| 4897 | | checknum RC, >5 | 4830 | | checknum RC, >5 |
| 4898 | if (sse) { | 4831 | |.if SSE |
| 4899 | | movsd xmm0, qword [BASE+RC*8] | 4832 | | movsd xmm0, qword [BASE+RC*8] |
| 4900 | | cvtsd2si RC, xmm0 | 4833 | | cvtsd2si RC, xmm0 |
| 4901 | | cvtsi2sd xmm1, RC | 4834 | | cvtsi2sd xmm1, RC |
| 4902 | | ucomisd xmm0, xmm1 | 4835 | | ucomisd xmm0, xmm1 |
| 4903 | } else { | 4836 | |.else |
| 4904 | |.if not X64 | 4837 | | fld qword [BASE+RC*8] |
| 4905 | | fld qword [BASE+RC*8] | 4838 | | fist ARG1 |
| 4906 | | fist ARG1 | 4839 | | fild ARG1 |
| 4907 | | fild ARG1 | 4840 | | fcomparepp |
| 4908 | | fcomparepp // eax (RC) modified! | 4841 | | mov RC, ARG1 |
| 4909 | | mov RC, ARG1 | 4842 | |.endif |
| 4910 | |.endif | 4843 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. |
| 4911 | } | 4844 | |.endif |
| 4912 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. | ||
| 4913 | } | ||
| 4914 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. | 4845 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. |
| 4915 | | jae ->vmeta_tgetv // Not in array part? Use fallback. | 4846 | | jae ->vmeta_tgetv // Not in array part? Use fallback. |
| 4916 | | shl RC, 3 | 4847 | | shl RC, 3 |
| @@ -5039,28 +4970,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 5039 | | mov TAB:RB, [BASE+RB*8] | 4970 | | mov TAB:RB, [BASE+RB*8] |
| 5040 | | | 4971 | | |
| 5041 | | // Integer key? | 4972 | | // Integer key? |
| 5042 | if (LJ_DUALNUM) { | 4973 | |.if DUALNUM |
| 5043 | | checkint RC, >5 | 4974 | | checkint RC, >5 |
| 5044 | | mov RC, dword [BASE+RC*8] | 4975 | | mov RC, dword [BASE+RC*8] |
| 5045 | } else { | 4976 | |.else |
| 5046 | | // Convert number to int and back and compare. | 4977 | | // Convert number to int and back and compare. |
| 5047 | | checknum RC, >5 | 4978 | | checknum RC, >5 |
| 5048 | if (sse) { | 4979 | |.if SSE |
| 5049 | | movsd xmm0, qword [BASE+RC*8] | 4980 | | movsd xmm0, qword [BASE+RC*8] |
| 5050 | | cvtsd2si RC, xmm0 | 4981 | | cvtsd2si RC, xmm0 |
| 5051 | | cvtsi2sd xmm1, RC | 4982 | | cvtsi2sd xmm1, RC |
| 5052 | | ucomisd xmm0, xmm1 | 4983 | | ucomisd xmm0, xmm1 |
| 5053 | } else { | 4984 | |.else |
| 5054 | |.if not X64 | 4985 | | fld qword [BASE+RC*8] |
| 5055 | | fld qword [BASE+RC*8] | 4986 | | fist ARG1 |
| 5056 | | fist ARG1 | 4987 | | fild ARG1 |
| 5057 | | fild ARG1 | 4988 | | fcomparepp |
| 5058 | | fcomparepp // eax (RC) modified! | 4989 | | mov RC, ARG1 |
| 5059 | | mov RC, ARG1 | 4990 | |.endif |
| 5060 | |.endif | 4991 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. |
| 5061 | } | 4992 | |.endif |
| 5062 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. | ||
| 5063 | } | ||
| 5064 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. | 4993 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. |
| 5065 | | jae ->vmeta_tsetv | 4994 | | jae ->vmeta_tsetv |
| 5066 | | shl RC, 3 | 4995 | | shl RC, 3 |
| @@ -5406,9 +5335,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 5406 | 5335 | ||
| 5407 | case BC_ITERN: | 5336 | case BC_ITERN: |
| 5408 | | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | 5337 | | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) |
| 5409 | #if LJ_HASJIT | 5338 | |.if JIT |
| 5410 | | // NYI: add hotloop, record BC_ITERN. | 5339 | | // NYI: add hotloop, record BC_ITERN. |
| 5411 | #endif | 5340 | |.endif |
| 5412 | | mov TMP1, KBASE // Need two more free registers. | 5341 | | mov TMP1, KBASE // Need two more free registers. |
| 5413 | | mov TMP2, DISPATCH | 5342 | | mov TMP2, DISPATCH |
| 5414 | | mov TAB:RB, [BASE+RA*8-16] | 5343 | | mov TAB:RB, [BASE+RA*8-16] |
| @@ -5419,14 +5348,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 5419 | |1: // Traverse array part. | 5348 | |1: // Traverse array part. |
| 5420 | | cmp RC, DISPATCH; jae >5 // Index points after array part? | 5349 | | cmp RC, DISPATCH; jae >5 // Index points after array part? |
| 5421 | | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4 | 5350 | | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4 |
| 5422 | if (LJ_DUALNUM) { | 5351 | |.if DUALNUM |
| 5423 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 5352 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
| 5424 | | mov dword [BASE+RA*8], RC | 5353 | | mov dword [BASE+RA*8], RC |
| 5425 | } else if (sse) { | 5354 | |.elif SSE |
| 5426 | | cvtsi2sd xmm0, RC | 5355 | | cvtsi2sd xmm0, RC |
| 5427 | } else { | 5356 | |.else |
| 5428 | | fild dword [BASE+RA*8-8] | 5357 | | fild dword [BASE+RA*8-8] |
| 5429 | } | 5358 | |.endif |
| 5430 | | // Copy array slot to returned value. | 5359 | | // Copy array slot to returned value. |
| 5431 | |.if X64 | 5360 | |.if X64 |
| 5432 | | mov RBa, [KBASE+RC*8] | 5361 | | mov RBa, [KBASE+RC*8] |
| @@ -5439,13 +5368,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 5439 | |.endif | 5368 | |.endif |
| 5440 | | add RC, 1 | 5369 | | add RC, 1 |
| 5441 | | // Return array index as a numeric key. | 5370 | | // Return array index as a numeric key. |
| 5442 | if (LJ_DUALNUM) { | 5371 | |.if DUALNUM |
| 5443 | | // See above. | 5372 | | // See above. |
| 5444 | } else if (sse) { | 5373 | |.elif SSE |
| 5445 | | movsd qword [BASE+RA*8], xmm0 | 5374 | | movsd qword [BASE+RA*8], xmm0 |
| 5446 | } else { | 5375 | |.else |
| 5447 | | fstp qword [BASE+RA*8] | 5376 | | fstp qword [BASE+RA*8] |
| 5448 | } | 5377 | |.endif |
| 5449 | | mov [BASE+RA*8-8], RC // Update control var. | 5378 | | mov [BASE+RA*8-8], RC // Update control var. |
| 5450 | |2: | 5379 | |2: |
| 5451 | | movzx RD, PC_RD // Get target from ITERL. | 5380 | | movzx RD, PC_RD // Get target from ITERL. |
| @@ -5457,9 +5386,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 5457 | | | 5386 | | |
| 5458 | |4: // Skip holes in array part. | 5387 | |4: // Skip holes in array part. |
| 5459 | | add RC, 1 | 5388 | | add RC, 1 |
| 5460 | if (!LJ_DUALNUM && !sse) { | 5389 | |.if not (DUALNUM or SSE) |
| 5461 | | mov [BASE+RA*8-8], RC | 5390 | | mov [BASE+RA*8-8], RC |
| 5462 | } | 5391 | |.endif |
| 5463 | | jmp <1 | 5392 | | jmp <1 |
| 5464 | | | 5393 | | |
| 5465 | |5: // Traverse hash part. | 5394 | |5: // Traverse hash part. |
| @@ -5695,9 +5624,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 5695 | |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28] | 5624 | |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28] |
| 5696 | 5625 | ||
| 5697 | case BC_FORL: | 5626 | case BC_FORL: |
| 5698 | #if LJ_HASJIT | 5627 | |.if JIT |
| 5699 | | hotloop RB | 5628 | | hotloop RB |
| 5700 | #endif | 5629 | |.endif |
| 5701 | | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. | 5630 | | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. |
| 5702 | break; | 5631 | break; |
| 5703 | 5632 | ||
| @@ -5792,76 +5721,73 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 5792 | if (!vk) { | 5721 | if (!vk) { |
| 5793 | | cmp RB, LJ_TISNUM; jae ->vmeta_for | 5722 | | cmp RB, LJ_TISNUM; jae ->vmeta_for |
| 5794 | } | 5723 | } |
| 5795 | if (sse) { | 5724 | |.if SSE |
| 5796 | | movsd xmm0, qword FOR_IDX | 5725 | | movsd xmm0, qword FOR_IDX |
| 5797 | | movsd xmm1, qword FOR_STOP | 5726 | | movsd xmm1, qword FOR_STOP |
| 5798 | if (vk) { | 5727 | if (vk) { |
| 5799 | | addsd xmm0, qword FOR_STEP | 5728 | | addsd xmm0, qword FOR_STEP |
| 5800 | | movsd qword FOR_IDX, xmm0 | 5729 | | movsd qword FOR_IDX, xmm0 |
| 5801 | | test RB, RB; js >3 | 5730 | | test RB, RB; js >3 |
| 5802 | } else { | ||
| 5803 | | jl >3 | ||
| 5804 | } | ||
| 5805 | | ucomisd xmm1, xmm0 | ||
| 5806 | |1: | ||
| 5807 | | movsd qword FOR_EXT, xmm0 | ||
| 5808 | } else { | 5731 | } else { |
| 5809 | | fld qword FOR_STOP | 5732 | | jl >3 |
| 5810 | | fld qword FOR_IDX | 5733 | } |
| 5811 | if (vk) { | 5734 | | ucomisd xmm1, xmm0 |
| 5812 | | fadd qword FOR_STEP // nidx = idx + step | 5735 | |1: |
| 5813 | | fst qword FOR_IDX | 5736 | | movsd qword FOR_EXT, xmm0 |
| 5814 | | fst qword FOR_EXT | 5737 | |.else |
| 5815 | | test RB, RB; js >1 | 5738 | | fld qword FOR_STOP |
| 5816 | } else { | 5739 | | fld qword FOR_IDX |
| 5817 | | fst qword FOR_EXT | 5740 | if (vk) { |
| 5818 | | jl >1 | 5741 | | fadd qword FOR_STEP // nidx = idx + step |
| 5819 | } | 5742 | | fst qword FOR_IDX |
| 5820 | | fxch // Swap lim/(n)idx if step non-negative. | 5743 | | fst qword FOR_EXT |
| 5821 | |1: | 5744 | | test RB, RB; js >1 |
| 5822 | | fcomparepp // eax (RD) modified if !cmov. | 5745 | } else { |
| 5823 | if (!cmov) { | 5746 | | fst qword FOR_EXT |
| 5824 | | movzx RD, PC_RD // Need to reload RD. | 5747 | | jl >1 |
| 5825 | } | ||
| 5826 | } | 5748 | } |
| 5749 | | fxch // Swap lim/(n)idx if step non-negative. | ||
| 5750 | |1: | ||
| 5751 | | fcomparepp | ||
| 5752 | |.endif | ||
| 5827 | if (op == BC_FORI) { | 5753 | if (op == BC_FORI) { |
| 5828 | if (LJ_DUALNUM) { | 5754 | |.if DUALNUM |
| 5829 | | jnb <7 | 5755 | | jnb <7 |
| 5830 | } else { | 5756 | |.else |
| 5831 | | jnb >2 | 5757 | | jnb >2 |
| 5832 | | branchPC RD | 5758 | | branchPC RD |
| 5833 | } | 5759 | |.endif |
| 5834 | } else if (op == BC_JFORI) { | 5760 | } else if (op == BC_JFORI) { |
| 5835 | | branchPC RD | 5761 | | branchPC RD |
| 5836 | | movzx RD, PC_RD | 5762 | | movzx RD, PC_RD |
| 5837 | | jnb =>BC_JLOOP | 5763 | | jnb =>BC_JLOOP |
| 5838 | } else if (op == BC_IFORL) { | 5764 | } else if (op == BC_IFORL) { |
| 5839 | if (LJ_DUALNUM) { | 5765 | |.if DUALNUM |
| 5840 | | jb <7 | 5766 | | jb <7 |
| 5841 | } else { | 5767 | |.else |
| 5842 | | jb >2 | 5768 | | jb >2 |
| 5843 | | branchPC RD | 5769 | | branchPC RD |
| 5844 | } | 5770 | |.endif |
| 5845 | } else { | 5771 | } else { |
| 5846 | | jnb =>BC_JLOOP | 5772 | | jnb =>BC_JLOOP |
| 5847 | } | 5773 | } |
| 5848 | if (LJ_DUALNUM) { | 5774 | |.if DUALNUM |
| 5849 | | jmp <6 | 5775 | | jmp <6 |
| 5850 | } else { | 5776 | |.else |
| 5851 | |2: | 5777 | |2: |
| 5852 | | ins_next | 5778 | | ins_next |
| 5853 | } | 5779 | |.endif |
| 5854 | if (sse) { | 5780 | |.if SSE |
| 5855 | |3: // Invert comparison if step is negative. | 5781 | |3: // Invert comparison if step is negative. |
| 5856 | | ucomisd xmm0, xmm1 | 5782 | | ucomisd xmm0, xmm1 |
| 5857 | | jmp <1 | 5783 | | jmp <1 |
| 5858 | } | 5784 | |.endif |
| 5859 | break; | 5785 | break; |
| 5860 | 5786 | ||
| 5861 | case BC_ITERL: | 5787 | case BC_ITERL: |
| 5862 | #if LJ_HASJIT | 5788 | |.if JIT |
| 5863 | | hotloop RB | 5789 | | hotloop RB |
| 5864 | #endif | 5790 | |.endif |
| 5865 | | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. | 5791 | | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. |
| 5866 | break; | 5792 | break; |
| 5867 | 5793 | ||
| @@ -5893,9 +5819,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 5893 | | ins_A // RA = base, RD = target (loop extent) | 5819 | | ins_A // RA = base, RD = target (loop extent) |
| 5894 | | // Note: RA/RD is only used by trace recorder to determine scope/extent | 5820 | | // Note: RA/RD is only used by trace recorder to determine scope/extent |
| 5895 | | // This opcode does NOT jump, it's only purpose is to detect a hot loop. | 5821 | | // This opcode does NOT jump, it's only purpose is to detect a hot loop. |
| 5896 | #if LJ_HASJIT | 5822 | |.if JIT |
| 5897 | | hotloop RB | 5823 | | hotloop RB |
| 5898 | #endif | 5824 | |.endif |
| 5899 | | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. | 5825 | | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. |
| 5900 | break; | 5826 | break; |
| 5901 | 5827 | ||
| @@ -5905,7 +5831,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 5905 | break; | 5831 | break; |
| 5906 | 5832 | ||
| 5907 | case BC_JLOOP: | 5833 | case BC_JLOOP: |
| 5908 | #if LJ_HASJIT | 5834 | |.if JIT |
| 5909 | | ins_AD // RA = base (ignored), RD = traceno | 5835 | | ins_AD // RA = base (ignored), RD = traceno |
| 5910 | | mov RA, [DISPATCH+DISPATCH_J(trace)] | 5836 | | mov RA, [DISPATCH+DISPATCH_J(trace)] |
| 5911 | | mov TRACE:RD, [RA+RD*4] | 5837 | | mov TRACE:RD, [RA+RD*4] |
| @@ -5937,7 +5863,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 5937 | | sub rsp, 16 | 5863 | | sub rsp, 16 |
| 5938 | |.endif | 5864 | |.endif |
| 5939 | | jmp RDa | 5865 | | jmp RDa |
| 5940 | #endif | 5866 | |.endif |
| 5941 | break; | 5867 | break; |
| 5942 | 5868 | ||
| 5943 | case BC_JMP: | 5869 | case BC_JMP: |
| @@ -5956,9 +5882,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 5956 | */ | 5882 | */ |
| 5957 | 5883 | ||
| 5958 | case BC_FUNCF: | 5884 | case BC_FUNCF: |
| 5959 | #if LJ_HASJIT | 5885 | |.if JIT |
| 5960 | | hotcall RB | 5886 | | hotcall RB |
| 5961 | #endif | 5887 | |.endif |
| 5962 | case BC_FUNCV: /* NYI: compiled vararg functions. */ | 5888 | case BC_FUNCV: /* NYI: compiled vararg functions. */ |
| 5963 | | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. | 5889 | | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. |
| 5964 | break; | 5890 | break; |
| @@ -6101,23 +6027,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
| 6101 | static int build_backend(BuildCtx *ctx) | 6027 | static int build_backend(BuildCtx *ctx) |
| 6102 | { | 6028 | { |
| 6103 | int op; | 6029 | int op; |
| 6104 | int cmov = 1; | ||
| 6105 | int sse = 0; | ||
| 6106 | #ifdef LUAJIT_CPU_NOCMOV | ||
| 6107 | cmov = 0; | ||
| 6108 | #endif | ||
| 6109 | #if defined(LUAJIT_CPU_SSE2) || defined(LJ_TARGET_X64) | ||
| 6110 | sse = 1; | ||
| 6111 | #endif | ||
| 6112 | |||
| 6113 | dasm_growpc(Dst, BC__MAX); | 6030 | dasm_growpc(Dst, BC__MAX); |
| 6114 | 6031 | build_subroutines(ctx); | |
| 6115 | build_subroutines(ctx, cmov, sse); | ||
| 6116 | |||
| 6117 | |.code_op | 6032 | |.code_op |
| 6118 | for (op = 0; op < BC__MAX; op++) | 6033 | for (op = 0; op < BC__MAX; op++) |
| 6119 | build_ins(ctx, (BCOp)op, op, cmov, sse); | 6034 | build_ins(ctx, (BCOp)op, op); |
| 6120 | |||
| 6121 | return BC__MAX; | 6035 | return BC__MAX; |
| 6122 | } | 6036 | } |
| 6123 | 6037 | ||
