diff options
| author | Mike Pall <mike> | 2014-12-08 02:02:34 +0100 |
|---|---|---|
| committer | Mike Pall <mike> | 2014-12-08 02:02:34 +0100 |
| commit | ad03eba715e5e0d0bd0f3c0ddef4b8f5bbb0c626 (patch) | |
| tree | 3404e1b148e08f2320a9937ca4849dc794b36bad /src | |
| parent | e03df1e3395bc719d43bd9196d0290757f992b2f (diff) | |
| download | luajit-ad03eba715e5e0d0bd0f3c0ddef4b8f5bbb0c626.tar.gz luajit-ad03eba715e5e0d0bd0f3c0ddef4b8f5bbb0c626.tar.bz2 luajit-ad03eba715e5e0d0bd0f3c0ddef4b8f5bbb0c626.zip | |
x86/x64: Drop internal x87 math functions. Use libm functions.
Diffstat (limited to 'src')
| -rw-r--r-- | src/lj_arch.h | 6 | ||||
| -rw-r--r-- | src/lj_asm.c | 4 | ||||
| -rw-r--r-- | src/lj_asm_x86.h | 82 | ||||
| -rw-r--r-- | src/lj_ircall.h | 24 | ||||
| -rw-r--r-- | src/lj_vm.h | 12 | ||||
| -rw-r--r-- | src/lj_vmmath.c | 16 | ||||
| -rw-r--r-- | src/vm_x86.dasc | 425 |
7 files changed, 114 insertions, 455 deletions
diff --git a/src/lj_arch.h b/src/lj_arch.h index da16a193..36b38886 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h | |||
| @@ -426,11 +426,11 @@ | |||
| 426 | #define LJ_TARGET_UNALIGNED 0 | 426 | #define LJ_TARGET_UNALIGNED 0 |
| 427 | #endif | 427 | #endif |
| 428 | 428 | ||
| 429 | /* Various workarounds for embedded operating systems. */ | 429 | /* Various workarounds for embedded operating systems or weak C runtimes. */ |
| 430 | #if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 | 430 | #if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS |
| 431 | #define LUAJIT_NO_LOG2 | 431 | #define LUAJIT_NO_LOG2 |
| 432 | #endif | 432 | #endif |
| 433 | #if defined(__symbian__) | 433 | #if defined(__symbian__) || LJ_TARGET_WINDOWS |
| 434 | #define LUAJIT_NO_EXP2 | 434 | #define LUAJIT_NO_EXP2 |
| 435 | #endif | 435 | #endif |
| 436 | 436 | ||
diff --git a/src/lj_asm.c b/src/lj_asm.c index 0b6738da..aaab3255 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -1262,9 +1262,6 @@ static void asm_call(ASMState *as, IRIns *ir) | |||
| 1262 | } | 1262 | } |
| 1263 | 1263 | ||
| 1264 | #if !LJ_SOFTFP | 1264 | #if !LJ_SOFTFP |
| 1265 | static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref); | ||
| 1266 | |||
| 1267 | #if !LJ_TARGET_X86ORX64 | ||
| 1268 | static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) | 1265 | static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) |
| 1269 | { | 1266 | { |
| 1270 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; | 1267 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; |
| @@ -1274,7 +1271,6 @@ static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) | |||
| 1274 | asm_setupresult(as, ir, ci); | 1271 | asm_setupresult(as, ir, ci); |
| 1275 | asm_gencall(as, ci, args); | 1272 | asm_gencall(as, ci, args); |
| 1276 | } | 1273 | } |
| 1277 | #endif | ||
| 1278 | 1274 | ||
| 1279 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) | 1275 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) |
| 1280 | { | 1276 | { |
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 8b541250..bd97764f 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
| @@ -1593,26 +1593,9 @@ static void asm_x87load(ASMState *as, IRRef ref) | |||
| 1593 | } | 1593 | } |
| 1594 | } | 1594 | } |
| 1595 | 1595 | ||
| 1596 | static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) | ||
| 1597 | { | ||
| 1598 | /* The modified regs must match with the *.dasc implementation. */ | ||
| 1599 | RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); | ||
| 1600 | IRIns *irx; | ||
| 1601 | if (ra_hasreg(ir->r)) | ||
| 1602 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | ||
| 1603 | ra_evictset(as, drop); | ||
| 1604 | ra_destreg(as, ir, RID_XMM0); | ||
| 1605 | emit_call(as, lj_vm_pow_sse); | ||
| 1606 | irx = IR(lref); | ||
| 1607 | if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1) | ||
| 1608 | irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */ | ||
| 1609 | ra_left(as, RID_XMM0, lref); | ||
| 1610 | ra_left(as, RID_XMM1, rref); | ||
| 1611 | } | ||
| 1612 | |||
| 1613 | static void asm_fpmath(ASMState *as, IRIns *ir) | 1596 | static void asm_fpmath(ASMState *as, IRIns *ir) |
| 1614 | { | 1597 | { |
| 1615 | IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER; | 1598 | IRFPMathOp fpm = (IRFPMathOp)ir->op2; |
| 1616 | if (fpm == IRFPM_SQRT) { | 1599 | if (fpm == IRFPM_SQRT) { |
| 1617 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1600 | Reg dest = ra_dest(as, ir, RSET_FPR); |
| 1618 | Reg left = asm_fuseload(as, ir->op1, RSET_FPR); | 1601 | Reg left = asm_fuseload(as, ir->op1, RSET_FPR); |
| @@ -1645,53 +1628,28 @@ static void asm_fpmath(ASMState *as, IRIns *ir) | |||
| 1645 | } | 1628 | } |
| 1646 | } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { | 1629 | } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { |
| 1647 | /* Rejoined to pow(). */ | 1630 | /* Rejoined to pow(). */ |
| 1648 | } else { /* Handle x87 ops. */ | 1631 | } else { |
| 1649 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | 1632 | asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); |
| 1650 | Reg dest = ir->r; | ||
| 1651 | if (ra_hasreg(dest)) { | ||
| 1652 | ra_free(as, dest); | ||
| 1653 | ra_modified(as, dest); | ||
| 1654 | emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs); | ||
| 1655 | } | ||
| 1656 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); | ||
| 1657 | switch (fpm) { /* st0 = lj_vm_*(st0) */ | ||
| 1658 | case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break; | ||
| 1659 | case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break; | ||
| 1660 | case IRFPM_SIN: emit_x87op(as, XI_FSIN); break; | ||
| 1661 | case IRFPM_COS: emit_x87op(as, XI_FCOS); break; | ||
| 1662 | case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break; | ||
| 1663 | case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10: | ||
| 1664 | /* Note: the use of fyl2xp1 would be pointless here. When computing | ||
| 1665 | ** log(1.0+eps) the precision is already lost after 1.0 is added. | ||
| 1666 | ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense. | ||
| 1667 | */ | ||
| 1668 | emit_x87op(as, XI_FYL2X); break; | ||
| 1669 | case IRFPM_OTHER: | ||
| 1670 | switch (ir->o) { | ||
| 1671 | case IR_ATAN2: | ||
| 1672 | emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break; | ||
| 1673 | case IR_LDEXP: | ||
| 1674 | emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break; | ||
| 1675 | default: lua_assert(0); break; | ||
| 1676 | } | ||
| 1677 | break; | ||
| 1678 | default: lua_assert(0); break; | ||
| 1679 | } | ||
| 1680 | asm_x87load(as, ir->op1); | ||
| 1681 | switch (fpm) { | ||
| 1682 | case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break; | ||
| 1683 | case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break; | ||
| 1684 | case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break; | ||
| 1685 | case IRFPM_OTHER: | ||
| 1686 | if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2); | ||
| 1687 | break; | ||
| 1688 | default: break; | ||
| 1689 | } | ||
| 1690 | } | 1633 | } |
| 1691 | } | 1634 | } |
| 1692 | 1635 | ||
| 1693 | #define asm_atan2(as, ir) asm_fpmath(as, ir) | 1636 | #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) |
| 1694 | #define asm_ldexp(as, ir) asm_fpmath(as, ir) | 1637 | |
| 1638 | static void asm_ldexp(ASMState *as, IRIns *ir) | ||
| 1639 | { | ||
| 1640 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | ||
| 1641 | Reg dest = ir->r; | ||
| 1642 | if (ra_hasreg(dest)) { | ||
| 1643 | ra_free(as, dest); | ||
| 1644 | ra_modified(as, dest); | ||
| 1645 | emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs); | ||
| 1646 | } | ||
| 1647 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); | ||
| 1648 | emit_x87op(as, XI_FPOP1); | ||
| 1649 | emit_x87op(as, XI_FSCALE); | ||
| 1650 | asm_x87load(as, ir->op1); | ||
| 1651 | asm_x87load(as, ir->op2); | ||
| 1652 | } | ||
| 1695 | 1653 | ||
| 1696 | static void asm_fppowi(ASMState *as, IRIns *ir) | 1654 | static void asm_fppowi(ASMState *as, IRIns *ir) |
| 1697 | { | 1655 | { |
diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 9bf46918..e71f0432 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h | |||
| @@ -169,18 +169,18 @@ typedef struct CCallInfo { | |||
| 169 | _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \ | 169 | _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \ |
| 170 | _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \ | 170 | _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \ |
| 171 | _(FPMATH, sqrt, 1, N, NUM, XA_FP) \ | 171 | _(FPMATH, sqrt, 1, N, NUM, XA_FP) \ |
| 172 | _(FPMATH, exp, 1, N, NUM, XA_FP) \ | 172 | _(ANY, exp, 1, N, NUM, XA_FP) \ |
| 173 | _(FPMATH, lj_vm_exp2, 1, N, NUM, XA_FP) \ | 173 | _(ANY, lj_vm_exp2, 1, N, NUM, XA_FP) \ |
| 174 | _(FPMATH, log, 1, N, NUM, XA_FP) \ | 174 | _(ANY, log, 1, N, NUM, XA_FP) \ |
| 175 | _(FPMATH, lj_vm_log2, 1, N, NUM, XA_FP) \ | 175 | _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \ |
| 176 | _(FPMATH, log10, 1, N, NUM, XA_FP) \ | 176 | _(ANY, log10, 1, N, NUM, XA_FP) \ |
| 177 | _(FPMATH, sin, 1, N, NUM, XA_FP) \ | 177 | _(ANY, sin, 1, N, NUM, XA_FP) \ |
| 178 | _(FPMATH, cos, 1, N, NUM, XA_FP) \ | 178 | _(ANY, cos, 1, N, NUM, XA_FP) \ |
| 179 | _(FPMATH, tan, 1, N, NUM, XA_FP) \ | 179 | _(ANY, tan, 1, N, NUM, XA_FP) \ |
| 180 | _(FPMATH, lj_vm_powi, 2, N, NUM, XA_FP) \ | 180 | _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \ |
| 181 | _(FPMATH, pow, 2, N, NUM, XA2_FP) \ | 181 | _(ANY, pow, 2, N, NUM, XA2_FP) \ |
| 182 | _(FPMATH, atan2, 2, N, NUM, XA2_FP) \ | 182 | _(ANY, atan2, 2, N, NUM, XA2_FP) \ |
| 183 | _(FPMATH, ldexp, 2, N, NUM, XA_FP) \ | 183 | _(ANY, ldexp, 2, N, NUM, XA_FP) \ |
| 184 | _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \ | 184 | _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \ |
| 185 | _(SOFTFP, softfp_add, 4, N, NUM, 0) \ | 185 | _(SOFTFP, softfp_add, 4, N, NUM, 0) \ |
| 186 | _(SOFTFP, softfp_sub, 4, N, NUM, 0) \ | 186 | _(SOFTFP, softfp_sub, 4, N, NUM, 0) \ |
diff --git a/src/lj_vm.h b/src/lj_vm.h index 83883e2c..a69d699f 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h | |||
| @@ -55,15 +55,13 @@ LJ_ASMF void lj_vm_exit_interp(void); | |||
| 55 | #define lj_vm_ceil ceil | 55 | #define lj_vm_ceil ceil |
| 56 | #else | 56 | #else |
| 57 | LJ_ASMF double lj_vm_floor(double); | 57 | LJ_ASMF double lj_vm_floor(double); |
| 58 | #if !LJ_TARGET_X86ORX64 | ||
| 59 | LJ_ASMF double lj_vm_ceil(double); | 58 | LJ_ASMF double lj_vm_ceil(double); |
| 60 | #endif | ||
| 61 | #if LJ_TARGET_ARM | 59 | #if LJ_TARGET_ARM |
| 62 | LJ_ASMF double lj_vm_floor_sf(double); | 60 | LJ_ASMF double lj_vm_floor_sf(double); |
| 63 | LJ_ASMF double lj_vm_ceil_sf(double); | 61 | LJ_ASMF double lj_vm_ceil_sf(double); |
| 64 | #endif | 62 | #endif |
| 65 | #endif | 63 | #endif |
| 66 | #if defined(LUAJIT_NO_LOG2) || LJ_TARGET_X86ORX64 | 64 | #ifdef LUAJIT_NO_LOG2 |
| 67 | LJ_ASMF double lj_vm_log2(double); | 65 | LJ_ASMF double lj_vm_log2(double); |
| 68 | #else | 66 | #else |
| 69 | #define lj_vm_log2 log2 | 67 | #define lj_vm_log2 log2 |
| @@ -74,11 +72,11 @@ LJ_ASMF double lj_vm_log2(double); | |||
| 74 | LJ_ASMF void lj_vm_floor_sse(void); | 72 | LJ_ASMF void lj_vm_floor_sse(void); |
| 75 | LJ_ASMF void lj_vm_ceil_sse(void); | 73 | LJ_ASMF void lj_vm_ceil_sse(void); |
| 76 | LJ_ASMF void lj_vm_trunc_sse(void); | 74 | LJ_ASMF void lj_vm_trunc_sse(void); |
| 77 | LJ_ASMF void lj_vm_exp_x87(void); | ||
| 78 | LJ_ASMF void lj_vm_exp2_x87(void); | ||
| 79 | LJ_ASMF void lj_vm_pow_sse(void); | ||
| 80 | LJ_ASMF void lj_vm_powi_sse(void); | 75 | LJ_ASMF void lj_vm_powi_sse(void); |
| 76 | #define lj_vm_powi NULL | ||
| 81 | #else | 77 | #else |
| 78 | LJ_ASMF double lj_vm_powi(double, int32_t); | ||
| 79 | #endif | ||
| 82 | #if LJ_TARGET_PPC | 80 | #if LJ_TARGET_PPC |
| 83 | #define lj_vm_trunc trunc | 81 | #define lj_vm_trunc trunc |
| 84 | #else | 82 | #else |
| @@ -87,13 +85,11 @@ LJ_ASMF double lj_vm_trunc(double); | |||
| 87 | LJ_ASMF double lj_vm_trunc_sf(double); | 85 | LJ_ASMF double lj_vm_trunc_sf(double); |
| 88 | #endif | 86 | #endif |
| 89 | #endif | 87 | #endif |
| 90 | LJ_ASMF double lj_vm_powi(double, int32_t); | ||
| 91 | #ifdef LUAJIT_NO_EXP2 | 88 | #ifdef LUAJIT_NO_EXP2 |
| 92 | LJ_ASMF double lj_vm_exp2(double); | 89 | LJ_ASMF double lj_vm_exp2(double); |
| 93 | #else | 90 | #else |
| 94 | #define lj_vm_exp2 exp2 | 91 | #define lj_vm_exp2 exp2 |
| 95 | #endif | 92 | #endif |
| 96 | #endif | ||
| 97 | LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); | 93 | LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); |
| 98 | #if LJ_HASFFI | 94 | #if LJ_HASFFI |
| 99 | LJ_ASMF int lj_vm_errno(void); | 95 | LJ_ASMF int lj_vm_errno(void); |
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index b60858b2..6ea99d15 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c | |||
| @@ -17,14 +17,25 @@ | |||
| 17 | 17 | ||
| 18 | #if LJ_TARGET_X86 && __ELF__ && __PIC__ | 18 | #if LJ_TARGET_X86 && __ELF__ && __PIC__ |
| 19 | /* Wrapper functions to deal with the ELF/x86 PIC disaster. */ | 19 | /* Wrapper functions to deal with the ELF/x86 PIC disaster. */ |
| 20 | LJ_FUNCA double lj_wrap_log(double x) { return log(x); } | ||
| 21 | LJ_FUNCA double lj_wrap_log10(double x) { return log10(x); } | ||
| 22 | LJ_FUNCA double lj_wrap_exp(double x) { return exp(x); } | ||
| 23 | LJ_FUNCA double lj_wrap_sin(double x) { return sin(x); } | ||
| 24 | LJ_FUNCA double lj_wrap_cos(double x) { return cos(x); } | ||
| 25 | LJ_FUNCA double lj_wrap_tan(double x) { return tan(x); } | ||
| 26 | LJ_FUNCA double lj_wrap_asin(double x) { return asin(x); } | ||
| 27 | LJ_FUNCA double lj_wrap_acos(double x) { return acos(x); } | ||
| 28 | LJ_FUNCA double lj_wrap_atan(double x) { return atan(x); } | ||
| 20 | LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); } | 29 | LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); } |
| 21 | LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); } | 30 | LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); } |
| 22 | LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); } | 31 | LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); } |
| 32 | LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); } | ||
| 33 | LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); } | ||
| 34 | LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); } | ||
| 23 | #endif | 35 | #endif |
| 24 | 36 | ||
| 25 | /* -- Helper functions for generated machine code ------------------------- */ | 37 | /* -- Helper functions for generated machine code ------------------------- */ |
| 26 | 38 | ||
| 27 | #if !LJ_TARGET_X86ORX64 | ||
| 28 | double lj_vm_foldarith(double x, double y, int op) | 39 | double lj_vm_foldarith(double x, double y, int op) |
| 29 | { | 40 | { |
| 30 | switch (op) { | 41 | switch (op) { |
| @@ -45,7 +56,6 @@ double lj_vm_foldarith(double x, double y, int op) | |||
| 45 | default: return x; | 56 | default: return x; |
| 46 | } | 57 | } |
| 47 | } | 58 | } |
| 48 | #endif | ||
| 49 | 59 | ||
| 50 | #if LJ_HASJIT | 60 | #if LJ_HASJIT |
| 51 | 61 | ||
| @@ -109,6 +119,7 @@ double lj_vm_powi(double x, int32_t k) | |||
| 109 | else | 119 | else |
| 110 | return 1.0 / lj_vm_powui(x, (uint32_t)-k); | 120 | return 1.0 / lj_vm_powui(x, (uint32_t)-k); |
| 111 | } | 121 | } |
| 122 | #endif | ||
| 112 | 123 | ||
| 113 | /* Computes fpm(x) for extended math functions. */ | 124 | /* Computes fpm(x) for extended math functions. */ |
| 114 | double lj_vm_foldfpm(double x, int fpm) | 125 | double lj_vm_foldfpm(double x, int fpm) |
| @@ -130,7 +141,6 @@ double lj_vm_foldfpm(double x, int fpm) | |||
| 130 | } | 141 | } |
| 131 | return 0; | 142 | return 0; |
| 132 | } | 143 | } |
| 133 | #endif | ||
| 134 | 144 | ||
| 135 | #if LJ_HASFFI | 145 | #if LJ_HASFFI |
| 136 | int lj_vm_errno(void) | 146 | int lj_vm_errno(void) |
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index cd43afbd..290054dc 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc | |||
| @@ -373,7 +373,6 @@ | |||
| 373 | | fpop | 373 | | fpop |
| 374 | |.endmacro | 374 | |.endmacro |
| 375 | | | 375 | | |
| 376 | |.macro fdup; fld st0; .endmacro | ||
| 377 | |.macro fpop1; fstp st1; .endmacro | 376 | |.macro fpop1; fstp st1; .endmacro |
| 378 | | | 377 | | |
| 379 | |// Synthesize SSE FP constants. | 378 | |// Synthesize SSE FP constants. |
| @@ -1329,19 +1328,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1329 | | cmp NARGS:RD, 2+1; jb ->fff_fallback | 1328 | | cmp NARGS:RD, 2+1; jb ->fff_fallback |
| 1330 | |.endmacro | 1329 | |.endmacro |
| 1331 | | | 1330 | | |
| 1332 | |.macro .ffunc_n, name | ||
| 1333 | | .ffunc_1 name | ||
| 1334 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
| 1335 | | fld qword [BASE] | ||
| 1336 | |.endmacro | ||
| 1337 | | | ||
| 1338 | |.macro .ffunc_n, name, op | ||
| 1339 | | .ffunc_1 name | ||
| 1340 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
| 1341 | | op | ||
| 1342 | | fld qword [BASE] | ||
| 1343 | |.endmacro | ||
| 1344 | | | ||
| 1345 | |.macro .ffunc_nsse, name, op | 1331 | |.macro .ffunc_nsse, name, op |
| 1346 | | .ffunc_1 name | 1332 | | .ffunc_1 name |
| 1347 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 1333 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
| @@ -1352,14 +1338,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1352 | | .ffunc_nsse name, movsd | 1338 | | .ffunc_nsse name, movsd |
| 1353 | |.endmacro | 1339 | |.endmacro |
| 1354 | | | 1340 | | |
| 1355 | |.macro .ffunc_nn, name | ||
| 1356 | | .ffunc_2 name | ||
| 1357 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
| 1358 | | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback | ||
| 1359 | | fld qword [BASE] | ||
| 1360 | | fld qword [BASE+8] | ||
| 1361 | |.endmacro | ||
| 1362 | | | ||
| 1363 | |.macro .ffunc_nnsse, name | 1341 | |.macro .ffunc_nnsse, name |
| 1364 | | .ffunc_2 name | 1342 | | .ffunc_2 name |
| 1365 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 1343 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
| @@ -2029,6 +2007,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2029 | | mov RAa, -8 // Results start at BASE+RA = BASE-8. | 2007 | | mov RAa, -8 // Results start at BASE+RA = BASE-8. |
| 2030 | | jmp ->vm_return | 2008 | | jmp ->vm_return |
| 2031 | | | 2009 | | |
| 2010 | |.if X64 | ||
| 2011 | |.define fff_resfp, fff_resxmm0 | ||
| 2012 | |.else | ||
| 2013 | |.define fff_resfp, fff_resn | ||
| 2014 | |.endif | ||
| 2015 | | | ||
| 2032 | |.macro math_round, func | 2016 | |.macro math_round, func |
| 2033 | | .ffunc math_ .. func | 2017 | | .ffunc math_ .. func |
| 2034 | |.if DUALNUM | 2018 | |.if DUALNUM |
| @@ -2061,22 +2045,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2061 | |.ffunc math_log | 2045 | |.ffunc math_log |
| 2062 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. | 2046 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. |
| 2063 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 2047 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
| 2064 | | fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn | 2048 | | movsd xmm0, qword [BASE] |
| 2065 | | | 2049 | |.if not X64 |
| 2066 | |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn | 2050 | | movsd FPARG1, xmm0 |
| 2067 | |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn | 2051 | |.endif |
| 2068 | | | 2052 | | mov RB, BASE |
| 2069 | |.ffunc_n math_sin; fsin; jmp ->fff_resn | 2053 | | call extern log |
| 2070 | |.ffunc_n math_cos; fcos; jmp ->fff_resn | 2054 | | mov BASE, RB |
| 2071 | |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn | 2055 | | jmp ->fff_resfp |
| 2072 | | | ||
| 2073 | |.ffunc_n math_asin | ||
| 2074 | | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan | ||
| 2075 | | jmp ->fff_resn | ||
| 2076 | |.ffunc_n math_acos | ||
| 2077 | | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan | ||
| 2078 | | jmp ->fff_resn | ||
| 2079 | |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn | ||
| 2080 | | | 2056 | | |
| 2081 | |.macro math_extern, func | 2057 | |.macro math_extern, func |
| 2082 | | .ffunc_nsse math_ .. func | 2058 | | .ffunc_nsse math_ .. func |
| @@ -2086,18 +2062,36 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2086 | | mov RB, BASE | 2062 | | mov RB, BASE |
| 2087 | | call extern func | 2063 | | call extern func |
| 2088 | | mov BASE, RB | 2064 | | mov BASE, RB |
| 2089 | |.if X64 | 2065 | | jmp ->fff_resfp |
| 2090 | | jmp ->fff_resxmm0 | 2066 | |.endmacro |
| 2091 | |.else | 2067 | | |
| 2092 | | jmp ->fff_resn | 2068 | |.macro math_extern2, func |
| 2069 | | .ffunc_nnsse math_ .. func | ||
| 2070 | |.if not X64 | ||
| 2071 | | movsd FPARG1, xmm0 | ||
| 2072 | | movsd FPARG3, xmm1 | ||
| 2093 | |.endif | 2073 | |.endif |
| 2074 | | mov RB, BASE | ||
| 2075 | | call extern func | ||
| 2076 | | mov BASE, RB | ||
| 2077 | | jmp ->fff_resfp | ||
| 2094 | |.endmacro | 2078 | |.endmacro |
| 2095 | | | 2079 | | |
| 2080 | | math_extern log10 | ||
| 2081 | | math_extern exp | ||
| 2082 | | math_extern sin | ||
| 2083 | | math_extern cos | ||
| 2084 | | math_extern tan | ||
| 2085 | | math_extern asin | ||
| 2086 | | math_extern acos | ||
| 2087 | | math_extern atan | ||
| 2096 | | math_extern sinh | 2088 | | math_extern sinh |
| 2097 | | math_extern cosh | 2089 | | math_extern cosh |
| 2098 | | math_extern tanh | 2090 | | math_extern tanh |
| 2091 | | math_extern2 pow | ||
| 2092 | | math_extern2 atan2 | ||
| 2093 | | math_extern2 fmod | ||
| 2099 | | | 2094 | | |
| 2100 | |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn | ||
| 2101 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn | 2095 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn |
| 2102 | | | 2096 | | |
| 2103 | |.ffunc_1 math_frexp | 2097 | |.ffunc_1 math_frexp |
| @@ -2151,13 +2145,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2151 | |4: | 2145 | |4: |
| 2152 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. | 2146 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. |
| 2153 | | | 2147 | | |
| 2154 | |.ffunc_nnr math_fmod | ||
| 2155 | |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1 | ||
| 2156 | | fpop1 | ||
| 2157 | | jmp ->fff_resn | ||
| 2158 | | | ||
| 2159 | |.ffunc_nnsse math_pow; call ->vm_pow_sse; jmp ->fff_resxmm0 | ||
| 2160 | | | ||
| 2161 | |.macro math_minmax, name, cmovop, sseop | 2148 | |.macro math_minmax, name, cmovop, sseop |
| 2162 | | .ffunc name | 2149 | | .ffunc name |
| 2163 | | mov RA, 2 | 2150 | | mov RA, 2 |
| @@ -2899,7 +2886,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2899 | | | 2886 | | |
| 2900 | |// FP value rounding. Called by math.floor/math.ceil fast functions | 2887 | |// FP value rounding. Called by math.floor/math.ceil fast functions |
| 2901 | |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. | 2888 | |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. |
| 2902 | |.macro vm_round, name, mode | 2889 | |.macro vm_round, name, mode, cond |
| 2890 | |->name: | ||
| 2891 | |.if not X64 and cond | ||
| 2892 | | movsd xmm0, qword [esp+4] | ||
| 2893 | | call ->name .. _sse | ||
| 2894 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. | ||
| 2895 | | fld qword [esp+4] | ||
| 2896 | | ret | ||
| 2897 | |.endif | ||
| 2898 | | | ||
| 2903 | |->name .. _sse: | 2899 | |->name .. _sse: |
| 2904 | | sseconst_abs xmm2, RDa | 2900 | | sseconst_abs xmm2, RDa |
| 2905 | | sseconst_2p52 xmm3, RDa | 2901 | | sseconst_2p52 xmm3, RDa |
| @@ -2936,18 +2932,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2936 | | ret | 2932 | | ret |
| 2937 | |.endmacro | 2933 | |.endmacro |
| 2938 | | | 2934 | | |
| 2939 | |->vm_floor: | 2935 | | vm_round vm_floor, 0, 1 |
| 2940 | |.if not X64 | 2936 | | vm_round vm_ceil, 1, JIT |
| 2941 | | movsd xmm0, qword [esp+4] | 2937 | | vm_round vm_trunc, 2, JIT |
| 2942 | | call ->vm_floor_sse | ||
| 2943 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. | ||
| 2944 | | fld qword [esp+4] | ||
| 2945 | | ret | ||
| 2946 | |.endif | ||
| 2947 | | | ||
| 2948 | | vm_round vm_floor, 0 | ||
| 2949 | | vm_round vm_ceil, 1 | ||
| 2950 | | vm_round vm_trunc, 2 | ||
| 2951 | | | 2938 | | |
| 2952 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. | 2939 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. |
| 2953 | |->vm_mod: | 2940 | |->vm_mod: |
| @@ -2979,65 +2966,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2979 | | subsd xmm0, xmm1 | 2966 | | subsd xmm0, xmm1 |
| 2980 | | ret | 2967 | | ret |
| 2981 | | | 2968 | | |
| 2982 | |// FP log2(x). Called by math.log(x, base). | ||
| 2983 | |->vm_log2: | ||
| 2984 | |.if X64WIN | ||
| 2985 | | movsd qword [rsp+8], xmm0 // Use scratch area. | ||
| 2986 | | fld1 | ||
| 2987 | | fld qword [rsp+8] | ||
| 2988 | | fyl2x | ||
| 2989 | | fstp qword [rsp+8] | ||
| 2990 | | movsd xmm0, qword [rsp+8] | ||
| 2991 | |.elif X64 | ||
| 2992 | | movsd qword [rsp-8], xmm0 // Use red zone. | ||
| 2993 | | fld1 | ||
| 2994 | | fld qword [rsp-8] | ||
| 2995 | | fyl2x | ||
| 2996 | | fstp qword [rsp-8] | ||
| 2997 | | movsd xmm0, qword [rsp-8] | ||
| 2998 | |.else | ||
| 2999 | | fld1 | ||
| 3000 | | fld qword [esp+4] | ||
| 3001 | | fyl2x | ||
| 3002 | |.endif | ||
| 3003 | | ret | ||
| 3004 | | | ||
| 3005 | |// FP exponentiation e^x and 2^x. Called by math.exp fast function and | ||
| 3006 | |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified. | ||
| 3007 | |// Caveat: needs 3 slots on x87 stack! | ||
| 3008 | |->vm_exp_x87: | ||
| 3009 | | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e)) | ||
| 3010 | |->vm_exp2_x87: | ||
| 3011 | | .if X64WIN | ||
| 3012 | | .define expscratch, dword [rsp+8] // Use scratch area. | ||
| 3013 | | .elif X64 | ||
| 3014 | | .define expscratch, dword [rsp-8] // Use red zone. | ||
| 3015 | | .else | ||
| 3016 | | .define expscratch, dword [esp+4] // Needs 4 byte scratch area. | ||
| 3017 | | .endif | ||
| 3018 | | fst expscratch // Caveat: overwrites ARG1. | ||
| 3019 | | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf | ||
| 3020 | | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0 | ||
| 3021 | |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check. | ||
| 3022 | | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. | ||
| 3023 | | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int | ||
| 3024 | |1: | ||
| 3025 | | ret | ||
| 3026 | |2: | ||
| 3027 | | fpop; fldz; ret | ||
| 3028 | | | ||
| 3029 | |// Generic power function x^y. Called by BC_POW, math.pow fast function, | ||
| 3030 | |// and vm_arith. | ||
| 3031 | |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. | ||
| 3032 | |// Needs 16 byte scratch area for x86. Also called from JIT code. | ||
| 3033 | |->vm_pow_sse: | ||
| 3034 | | cvttsd2si eax, xmm1 | ||
| 3035 | | cvtsi2sd xmm2, eax | ||
| 3036 | | ucomisd xmm1, xmm2 | ||
| 3037 | | jnz >8 // Branch for FP exponents. | ||
| 3038 | | jp >9 // Branch for NaN exponent. | ||
| 3039 | | // Fallthrough. | ||
| 3040 | | | ||
| 3041 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. | 2969 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. |
| 3042 | |->vm_powi_sse: | 2970 | |->vm_powi_sse: |
| 3043 | | cmp eax, 1; jle >6 // i<=1? | 2971 | | cmp eax, 1; jle >6 // i<=1? |
| @@ -3073,246 +3001,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 3073 | | sseconst_1 xmm0, RDa | 3001 | | sseconst_1 xmm0, RDa |
| 3074 | | ret | 3002 | | ret |
| 3075 | | | 3003 | | |
| 3076 | |8: // FP/FP power function x^y. | ||
| 3077 | |.if X64 | ||
| 3078 | | movd rax, xmm1; shl rax, 1 | ||
| 3079 | | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf? | ||
| 3080 | | movd rax, xmm0; shl rax, 1; je >4 // +-0^y? | ||
| 3081 | | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y? | ||
| 3082 | | .if X64WIN | ||
| 3083 | | movsd qword [rsp+16], xmm1 // Use scratch area. | ||
| 3084 | | movsd qword [rsp+8], xmm0 | ||
| 3085 | | fld qword [rsp+16] | ||
| 3086 | | fld qword [rsp+8] | ||
| 3087 | | .else | ||
| 3088 | | movsd qword [rsp-16], xmm1 // Use red zone. | ||
| 3089 | | movsd qword [rsp-8], xmm0 | ||
| 3090 | | fld qword [rsp-16] | ||
| 3091 | | fld qword [rsp-8] | ||
| 3092 | | .endif | ||
| 3093 | |.else | ||
| 3094 | | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area. | ||
| 3095 | | movsd qword [esp+4], xmm0 | ||
| 3096 | | cmp dword [esp+12], 0; jne >1 | ||
| 3097 | | mov eax, [esp+16]; shl eax, 1 | ||
| 3098 | | cmp eax, 0xffe00000; je >2 // x^+-Inf? | ||
| 3099 | |1: | ||
| 3100 | | cmp dword [esp+4], 0; jne >1 | ||
| 3101 | | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y? | ||
| 3102 | | cmp eax, 0xffe00000; je >5 // +-Inf^y? | ||
| 3103 | |1: | ||
| 3104 | | fld qword [esp+12] | ||
| 3105 | | fld qword [esp+4] | ||
| 3106 | |.endif | ||
| 3107 | | fyl2x // y*log2(x) | ||
| 3108 | | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. | ||
| 3109 | | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int | ||
| 3110 | |.if X64WIN | ||
| 3111 | | fstp qword [rsp+8] // Use scratch area. | ||
| 3112 | | movsd xmm0, qword [rsp+8] | ||
| 3113 | |.elif X64 | ||
| 3114 | | fstp qword [rsp-8] // Use red zone. | ||
| 3115 | | movsd xmm0, qword [rsp-8] | ||
| 3116 | |.else | ||
| 3117 | | fstp qword [esp+4] // Needs 8 byte scratch area. | ||
| 3118 | | movsd xmm0, qword [esp+4] | ||
| 3119 | |.endif | ||
| 3120 | | ret | ||
| 3121 | | | ||
| 3122 | |9: // Handle x^NaN. | ||
| 3123 | | sseconst_1 xmm2, RDa | ||
| 3124 | | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1 | ||
| 3125 | | movaps xmm0, xmm1 // x^NaN ==> NaN | ||
| 3126 | |1: | ||
| 3127 | | ret | ||
| 3128 | | | ||
| 3129 | |2: // Handle x^+-Inf. | ||
| 3130 | | sseconst_abs xmm2, RDa | ||
| 3131 | | andpd xmm0, xmm2 // |x| | ||
| 3132 | | sseconst_1 xmm2, RDa | ||
| 3133 | | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1 | ||
| 3134 | | movmskpd eax, xmm1 | ||
| 3135 | | xorps xmm0, xmm0 | ||
| 3136 | | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0 | ||
| 3137 | |3: | ||
| 3138 | | sseconst_hi xmm0, RDa, 7ff00000 // +Inf | ||
| 3139 | | ret | ||
| 3140 | | | ||
| 3141 | |4: // Handle +-0^y. | ||
| 3142 | | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf | ||
| 3143 | | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0 | ||
| 3144 | | ret | ||
| 3145 | | | ||
| 3146 | |5: // Handle +-Inf^y. | ||
| 3147 | | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf | ||
| 3148 | | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0 | ||
| 3149 | | ret | ||
| 3150 | | | ||
| 3151 | |// Callable from C: double lj_vm_foldfpm(double x, int fpm) | ||
| 3152 | |// Computes fpm(x) for extended math functions. ORDER FPM. | ||
| 3153 | |->vm_foldfpm: | ||
| 3154 | |.if JIT | ||
| 3155 | |.if X64 | ||
| 3156 | | .if X64WIN | ||
| 3157 | | .define fpmop, CARG2d | ||
| 3158 | | .else | ||
| 3159 | | .define fpmop, CARG1d | ||
| 3160 | | .endif | ||
| 3161 | | cmp fpmop, 1; jb ->vm_floor_sse; je ->vm_ceil_sse | ||
| 3162 | | cmp fpmop, 3; jb ->vm_trunc_sse; ja >2 | ||
| 3163 | | sqrtsd xmm0, xmm0; ret | ||
| 3164 | |2: | ||
| 3165 | | .if X64WIN | ||
| 3166 | | movsd qword [rsp+8], xmm0 // Use scratch area. | ||
| 3167 | | fld qword [rsp+8] | ||
| 3168 | | .else | ||
| 3169 | | movsd qword [rsp-8], xmm0 // Use red zone. | ||
| 3170 | | fld qword [rsp-8] | ||
| 3171 | | .endif | ||
| 3172 | | cmp fpmop, 5; ja >2 | ||
| 3173 | | .if X64WIN; pop rax; .endif | ||
| 3174 | | je >1 | ||
| 3175 | | call ->vm_exp_x87 | ||
| 3176 | | .if X64WIN; push rax; .endif | ||
| 3177 | | jmp >7 | ||
| 3178 | |1: | ||
| 3179 | | call ->vm_exp2_x87 | ||
| 3180 | | .if X64WIN; push rax; .endif | ||
| 3181 | | jmp >7 | ||
| 3182 | |2: ; cmp fpmop, 7; je >1; ja >2 | ||
| 3183 | | fldln2; fxch; fyl2x; jmp >7 | ||
| 3184 | |1: ; fld1; fxch; fyl2x; jmp >7 | ||
| 3185 | |2: ; cmp fpmop, 9; je >1; ja >2 | ||
| 3186 | | fldlg2; fxch; fyl2x; jmp >7 | ||
| 3187 | |1: ; fsin; jmp >7 | ||
| 3188 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
| 3189 | | fcos; jmp >7 | ||
| 3190 | |1: ; fptan; fpop | ||
| 3191 | |7: | ||
| 3192 | | .if X64WIN | ||
| 3193 | | fstp qword [rsp+8] // Use scratch area. | ||
| 3194 | | movsd xmm0, qword [rsp+8] | ||
| 3195 | | .else | ||
| 3196 | | fstp qword [rsp-8] // Use red zone. | ||
| 3197 | | movsd xmm0, qword [rsp-8] | ||
| 3198 | | .endif | ||
| 3199 | | ret | ||
| 3200 | |.else // x86 calling convention. | ||
| 3201 | | .define fpmop, eax | ||
| 3202 | | mov fpmop, [esp+12] | ||
| 3203 | | movsd xmm0, qword [esp+4] | ||
| 3204 | | cmp fpmop, 1; je >1; ja >2 | ||
| 3205 | | call ->vm_floor_sse; jmp >7 | ||
| 3206 | |1: ; call ->vm_ceil_sse; jmp >7 | ||
| 3207 | |2: ; cmp fpmop, 3; je >1; ja >2 | ||
| 3208 | | call ->vm_trunc_sse; jmp >7 | ||
| 3209 | |1: | ||
| 3210 | | sqrtsd xmm0, xmm0 | ||
| 3211 | |7: | ||
| 3212 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. | ||
| 3213 | | fld qword [esp+4] | ||
| 3214 | | ret | ||
| 3215 | |2: ; fld qword [esp+4] | ||
| 3216 | | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 | ||
| 3217 | |2: ; cmp fpmop, 7; je >1; ja >2 | ||
| 3218 | | fldln2; fxch; fyl2x; ret | ||
| 3219 | |1: ; fld1; fxch; fyl2x; ret | ||
| 3220 | |2: ; cmp fpmop, 9; je >1; ja >2 | ||
| 3221 | | fldlg2; fxch; fyl2x; ret | ||
| 3222 | |1: ; fsin; ret | ||
| 3223 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
| 3224 | | fcos; ret | ||
| 3225 | |1: ; fptan; fpop; ret | ||
| 3226 | |.endif | ||
| 3227 | |9: ; int3 // Bad fpm. | ||
| 3228 | |.endif | ||
| 3229 | | | ||
| 3230 | |// Callable from C: double lj_vm_foldarith(double x, double y, int op) | ||
| 3231 | |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -) | ||
| 3232 | |// and basic math functions. ORDER ARITH | ||
| 3233 | |->vm_foldarith: | ||
| 3234 | |.if X64 | ||
| 3235 | | | ||
| 3236 | | .if X64WIN | ||
| 3237 | | .define foldop, CARG3d | ||
| 3238 | | .else | ||
| 3239 | | .define foldop, CARG1d | ||
| 3240 | | .endif | ||
| 3241 | | cmp foldop, 1; je >1; ja >2 | ||
| 3242 | | addsd xmm0, xmm1; ret | ||
| 3243 | |1: ; subsd xmm0, xmm1; ret | ||
| 3244 | |2: ; cmp foldop, 3; je >1; ja >2 | ||
| 3245 | | mulsd xmm0, xmm1; ret | ||
| 3246 | |1: ; divsd xmm0, xmm1; ret | ||
| 3247 | |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow_sse | ||
| 3248 | | cmp foldop, 7; je >1; ja >2 | ||
| 3249 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret | ||
| 3250 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret | ||
| 3251 | |2: ; cmp foldop, 9; ja >2 | ||
| 3252 | |.if X64WIN | ||
| 3253 | | movsd qword [rsp+8], xmm0 // Use scratch area. | ||
| 3254 | | movsd qword [rsp+16], xmm1 | ||
| 3255 | | fld qword [rsp+8] | ||
| 3256 | | fld qword [rsp+16] | ||
| 3257 | |.else | ||
| 3258 | | movsd qword [rsp-8], xmm0 // Use red zone. | ||
| 3259 | | movsd qword [rsp-16], xmm1 | ||
| 3260 | | fld qword [rsp-8] | ||
| 3261 | | fld qword [rsp-16] | ||
| 3262 | |.endif | ||
| 3263 | | je >1 | ||
| 3264 | | fpatan | ||
| 3265 | |7: | ||
| 3266 | |.if X64WIN | ||
| 3267 | | fstp qword [rsp+8] // Use scratch area. | ||
| 3268 | | movsd xmm0, qword [rsp+8] | ||
| 3269 | |.else | ||
| 3270 | | fstp qword [rsp-8] // Use red zone. | ||
| 3271 | | movsd xmm0, qword [rsp-8] | ||
| 3272 | |.endif | ||
| 3273 | | ret | ||
| 3274 | |1: ; fxch; fscale; fpop1; jmp <7 | ||
| 3275 | |2: ; cmp foldop, 11; je >1; ja >9 | ||
| 3276 | | minsd xmm0, xmm1; ret | ||
| 3277 | |1: ; maxsd xmm0, xmm1; ret | ||
| 3278 | |9: ; int3 // Bad op. | ||
| 3279 | | | ||
| 3280 | |.else // x86 calling convention. | ||
| 3281 | | | ||
| 3282 | | .define foldop, eax | ||
| 3283 | | mov foldop, [esp+20] | ||
| 3284 | | movsd xmm0, qword [esp+4] | ||
| 3285 | | movsd xmm1, qword [esp+12] | ||
| 3286 | | cmp foldop, 1; je >1; ja >2 | ||
| 3287 | | addsd xmm0, xmm1 | ||
| 3288 | |7: | ||
| 3289 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. | ||
| 3290 | | fld qword [esp+4] | ||
| 3291 | | ret | ||
| 3292 | |1: ; subsd xmm0, xmm1; jmp <7 | ||
| 3293 | |2: ; cmp foldop, 3; je >1; ja >2 | ||
| 3294 | | mulsd xmm0, xmm1; jmp <7 | ||
| 3295 | |1: ; divsd xmm0, xmm1; jmp <7 | ||
| 3296 | |2: ; cmp foldop, 5 | ||
| 3297 | | je >1; ja >2 | ||
| 3298 | | call ->vm_mod; jmp <7 | ||
| 3299 | |1: ; pop edx; call ->vm_pow_sse; push edx; jmp <7 // Writes to scratch area. | ||
| 3300 | |2: ; cmp foldop, 7; je >1; ja >2 | ||
| 3301 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 | ||
| 3302 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 | ||
| 3303 | |2: ; cmp foldop, 9; ja >2 | ||
| 3304 | | fld qword [esp+4] // Reload from stack | ||
| 3305 | | fld qword [esp+12] | ||
| 3306 | | je >1 | ||
| 3307 | | fpatan; ret | ||
| 3308 | |1: ; fxch; fscale; fpop1; ret | ||
| 3309 | |2: ; cmp foldop, 11; je >1; ja >9 | ||
| 3310 | | minsd xmm0, xmm1; jmp <7 | ||
| 3311 | |1: ; maxsd xmm0, xmm1; jmp <7 | ||
| 3312 | |9: ; int3 // Bad op. | ||
| 3313 | | | ||
| 3314 | |.endif | ||
| 3315 | | | ||
| 3316 | |//----------------------------------------------------------------------- | 3004 | |//----------------------------------------------------------------------- |
| 3317 | |//-- Miscellaneous functions -------------------------------------------- | 3005 | |//-- Miscellaneous functions -------------------------------------------- |
| 3318 | |//----------------------------------------------------------------------- | 3006 | |//----------------------------------------------------------------------- |
| @@ -4107,8 +3795,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4107 | break; | 3795 | break; |
| 4108 | case BC_POW: | 3796 | case BC_POW: |
| 4109 | | ins_arithpre movsd, xmm1 | 3797 | | ins_arithpre movsd, xmm1 |
| 4110 | | call ->vm_pow_sse | 3798 | | mov RB, BASE |
| 3799 | |.if not X64 | ||
| 3800 | | movsd FPARG1, xmm0 | ||
| 3801 | | movsd FPARG3, xmm1 | ||
| 3802 | |.endif | ||
| 3803 | | call extern pow | ||
| 3804 | | movzx RA, PC_RA | ||
| 3805 | | mov BASE, RB | ||
| 3806 | |.if X64 | ||
| 4111 | | ins_arithpost | 3807 | | ins_arithpost |
| 3808 | |.else | ||
| 3809 | | fstp qword [BASE+RA*8] | ||
| 3810 | |.endif | ||
| 4112 | | ins_next | 3811 | | ins_next |
| 4113 | break; | 3812 | break; |
| 4114 | 3813 | ||
