diff options
author | Mike Pall <mike> | 2014-12-08 02:02:34 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2014-12-08 02:02:34 +0100 |
commit | ad03eba715e5e0d0bd0f3c0ddef4b8f5bbb0c626 (patch) | |
tree | 3404e1b148e08f2320a9937ca4849dc794b36bad | |
parent | e03df1e3395bc719d43bd9196d0290757f992b2f (diff) | |
download | luajit-ad03eba715e5e0d0bd0f3c0ddef4b8f5bbb0c626.tar.gz luajit-ad03eba715e5e0d0bd0f3c0ddef4b8f5bbb0c626.tar.bz2 luajit-ad03eba715e5e0d0bd0f3c0ddef4b8f5bbb0c626.zip |
x86/x64: Drop internal x87 math functions. Use libm functions.
-rw-r--r-- | src/lj_arch.h | 6 | ||||
-rw-r--r-- | src/lj_asm.c | 4 | ||||
-rw-r--r-- | src/lj_asm_x86.h | 82 | ||||
-rw-r--r-- | src/lj_ircall.h | 24 | ||||
-rw-r--r-- | src/lj_vm.h | 12 | ||||
-rw-r--r-- | src/lj_vmmath.c | 16 | ||||
-rw-r--r-- | src/vm_x86.dasc | 425 |
7 files changed, 114 insertions, 455 deletions
diff --git a/src/lj_arch.h b/src/lj_arch.h index da16a193..36b38886 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h | |||
@@ -426,11 +426,11 @@ | |||
426 | #define LJ_TARGET_UNALIGNED 0 | 426 | #define LJ_TARGET_UNALIGNED 0 |
427 | #endif | 427 | #endif |
428 | 428 | ||
429 | /* Various workarounds for embedded operating systems. */ | 429 | /* Various workarounds for embedded operating systems or weak C runtimes. */ |
430 | #if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 | 430 | #if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS |
431 | #define LUAJIT_NO_LOG2 | 431 | #define LUAJIT_NO_LOG2 |
432 | #endif | 432 | #endif |
433 | #if defined(__symbian__) | 433 | #if defined(__symbian__) || LJ_TARGET_WINDOWS |
434 | #define LUAJIT_NO_EXP2 | 434 | #define LUAJIT_NO_EXP2 |
435 | #endif | 435 | #endif |
436 | 436 | ||
diff --git a/src/lj_asm.c b/src/lj_asm.c index 0b6738da..aaab3255 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -1262,9 +1262,6 @@ static void asm_call(ASMState *as, IRIns *ir) | |||
1262 | } | 1262 | } |
1263 | 1263 | ||
1264 | #if !LJ_SOFTFP | 1264 | #if !LJ_SOFTFP |
1265 | static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref); | ||
1266 | |||
1267 | #if !LJ_TARGET_X86ORX64 | ||
1268 | static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) | 1265 | static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) |
1269 | { | 1266 | { |
1270 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; | 1267 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; |
@@ -1274,7 +1271,6 @@ static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) | |||
1274 | asm_setupresult(as, ir, ci); | 1271 | asm_setupresult(as, ir, ci); |
1275 | asm_gencall(as, ci, args); | 1272 | asm_gencall(as, ci, args); |
1276 | } | 1273 | } |
1277 | #endif | ||
1278 | 1274 | ||
1279 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) | 1275 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) |
1280 | { | 1276 | { |
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 8b541250..bd97764f 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
@@ -1593,26 +1593,9 @@ static void asm_x87load(ASMState *as, IRRef ref) | |||
1593 | } | 1593 | } |
1594 | } | 1594 | } |
1595 | 1595 | ||
1596 | static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) | ||
1597 | { | ||
1598 | /* The modified regs must match with the *.dasc implementation. */ | ||
1599 | RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); | ||
1600 | IRIns *irx; | ||
1601 | if (ra_hasreg(ir->r)) | ||
1602 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | ||
1603 | ra_evictset(as, drop); | ||
1604 | ra_destreg(as, ir, RID_XMM0); | ||
1605 | emit_call(as, lj_vm_pow_sse); | ||
1606 | irx = IR(lref); | ||
1607 | if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1) | ||
1608 | irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */ | ||
1609 | ra_left(as, RID_XMM0, lref); | ||
1610 | ra_left(as, RID_XMM1, rref); | ||
1611 | } | ||
1612 | |||
1613 | static void asm_fpmath(ASMState *as, IRIns *ir) | 1596 | static void asm_fpmath(ASMState *as, IRIns *ir) |
1614 | { | 1597 | { |
1615 | IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER; | 1598 | IRFPMathOp fpm = (IRFPMathOp)ir->op2; |
1616 | if (fpm == IRFPM_SQRT) { | 1599 | if (fpm == IRFPM_SQRT) { |
1617 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1600 | Reg dest = ra_dest(as, ir, RSET_FPR); |
1618 | Reg left = asm_fuseload(as, ir->op1, RSET_FPR); | 1601 | Reg left = asm_fuseload(as, ir->op1, RSET_FPR); |
@@ -1645,53 +1628,28 @@ static void asm_fpmath(ASMState *as, IRIns *ir) | |||
1645 | } | 1628 | } |
1646 | } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { | 1629 | } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { |
1647 | /* Rejoined to pow(). */ | 1630 | /* Rejoined to pow(). */ |
1648 | } else { /* Handle x87 ops. */ | 1631 | } else { |
1649 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | 1632 | asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); |
1650 | Reg dest = ir->r; | ||
1651 | if (ra_hasreg(dest)) { | ||
1652 | ra_free(as, dest); | ||
1653 | ra_modified(as, dest); | ||
1654 | emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs); | ||
1655 | } | ||
1656 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); | ||
1657 | switch (fpm) { /* st0 = lj_vm_*(st0) */ | ||
1658 | case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break; | ||
1659 | case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break; | ||
1660 | case IRFPM_SIN: emit_x87op(as, XI_FSIN); break; | ||
1661 | case IRFPM_COS: emit_x87op(as, XI_FCOS); break; | ||
1662 | case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break; | ||
1663 | case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10: | ||
1664 | /* Note: the use of fyl2xp1 would be pointless here. When computing | ||
1665 | ** log(1.0+eps) the precision is already lost after 1.0 is added. | ||
1666 | ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense. | ||
1667 | */ | ||
1668 | emit_x87op(as, XI_FYL2X); break; | ||
1669 | case IRFPM_OTHER: | ||
1670 | switch (ir->o) { | ||
1671 | case IR_ATAN2: | ||
1672 | emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break; | ||
1673 | case IR_LDEXP: | ||
1674 | emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break; | ||
1675 | default: lua_assert(0); break; | ||
1676 | } | ||
1677 | break; | ||
1678 | default: lua_assert(0); break; | ||
1679 | } | ||
1680 | asm_x87load(as, ir->op1); | ||
1681 | switch (fpm) { | ||
1682 | case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break; | ||
1683 | case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break; | ||
1684 | case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break; | ||
1685 | case IRFPM_OTHER: | ||
1686 | if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2); | ||
1687 | break; | ||
1688 | default: break; | ||
1689 | } | ||
1690 | } | 1633 | } |
1691 | } | 1634 | } |
1692 | 1635 | ||
1693 | #define asm_atan2(as, ir) asm_fpmath(as, ir) | 1636 | #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) |
1694 | #define asm_ldexp(as, ir) asm_fpmath(as, ir) | 1637 | |
1638 | static void asm_ldexp(ASMState *as, IRIns *ir) | ||
1639 | { | ||
1640 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | ||
1641 | Reg dest = ir->r; | ||
1642 | if (ra_hasreg(dest)) { | ||
1643 | ra_free(as, dest); | ||
1644 | ra_modified(as, dest); | ||
1645 | emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs); | ||
1646 | } | ||
1647 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); | ||
1648 | emit_x87op(as, XI_FPOP1); | ||
1649 | emit_x87op(as, XI_FSCALE); | ||
1650 | asm_x87load(as, ir->op1); | ||
1651 | asm_x87load(as, ir->op2); | ||
1652 | } | ||
1695 | 1653 | ||
1696 | static void asm_fppowi(ASMState *as, IRIns *ir) | 1654 | static void asm_fppowi(ASMState *as, IRIns *ir) |
1697 | { | 1655 | { |
diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 9bf46918..e71f0432 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h | |||
@@ -169,18 +169,18 @@ typedef struct CCallInfo { | |||
169 | _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \ | 169 | _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \ |
170 | _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \ | 170 | _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \ |
171 | _(FPMATH, sqrt, 1, N, NUM, XA_FP) \ | 171 | _(FPMATH, sqrt, 1, N, NUM, XA_FP) \ |
172 | _(FPMATH, exp, 1, N, NUM, XA_FP) \ | 172 | _(ANY, exp, 1, N, NUM, XA_FP) \ |
173 | _(FPMATH, lj_vm_exp2, 1, N, NUM, XA_FP) \ | 173 | _(ANY, lj_vm_exp2, 1, N, NUM, XA_FP) \ |
174 | _(FPMATH, log, 1, N, NUM, XA_FP) \ | 174 | _(ANY, log, 1, N, NUM, XA_FP) \ |
175 | _(FPMATH, lj_vm_log2, 1, N, NUM, XA_FP) \ | 175 | _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \ |
176 | _(FPMATH, log10, 1, N, NUM, XA_FP) \ | 176 | _(ANY, log10, 1, N, NUM, XA_FP) \ |
177 | _(FPMATH, sin, 1, N, NUM, XA_FP) \ | 177 | _(ANY, sin, 1, N, NUM, XA_FP) \ |
178 | _(FPMATH, cos, 1, N, NUM, XA_FP) \ | 178 | _(ANY, cos, 1, N, NUM, XA_FP) \ |
179 | _(FPMATH, tan, 1, N, NUM, XA_FP) \ | 179 | _(ANY, tan, 1, N, NUM, XA_FP) \ |
180 | _(FPMATH, lj_vm_powi, 2, N, NUM, XA_FP) \ | 180 | _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \ |
181 | _(FPMATH, pow, 2, N, NUM, XA2_FP) \ | 181 | _(ANY, pow, 2, N, NUM, XA2_FP) \ |
182 | _(FPMATH, atan2, 2, N, NUM, XA2_FP) \ | 182 | _(ANY, atan2, 2, N, NUM, XA2_FP) \ |
183 | _(FPMATH, ldexp, 2, N, NUM, XA_FP) \ | 183 | _(ANY, ldexp, 2, N, NUM, XA_FP) \ |
184 | _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \ | 184 | _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \ |
185 | _(SOFTFP, softfp_add, 4, N, NUM, 0) \ | 185 | _(SOFTFP, softfp_add, 4, N, NUM, 0) \ |
186 | _(SOFTFP, softfp_sub, 4, N, NUM, 0) \ | 186 | _(SOFTFP, softfp_sub, 4, N, NUM, 0) \ |
diff --git a/src/lj_vm.h b/src/lj_vm.h index 83883e2c..a69d699f 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h | |||
@@ -55,15 +55,13 @@ LJ_ASMF void lj_vm_exit_interp(void); | |||
55 | #define lj_vm_ceil ceil | 55 | #define lj_vm_ceil ceil |
56 | #else | 56 | #else |
57 | LJ_ASMF double lj_vm_floor(double); | 57 | LJ_ASMF double lj_vm_floor(double); |
58 | #if !LJ_TARGET_X86ORX64 | ||
59 | LJ_ASMF double lj_vm_ceil(double); | 58 | LJ_ASMF double lj_vm_ceil(double); |
60 | #endif | ||
61 | #if LJ_TARGET_ARM | 59 | #if LJ_TARGET_ARM |
62 | LJ_ASMF double lj_vm_floor_sf(double); | 60 | LJ_ASMF double lj_vm_floor_sf(double); |
63 | LJ_ASMF double lj_vm_ceil_sf(double); | 61 | LJ_ASMF double lj_vm_ceil_sf(double); |
64 | #endif | 62 | #endif |
65 | #endif | 63 | #endif |
66 | #if defined(LUAJIT_NO_LOG2) || LJ_TARGET_X86ORX64 | 64 | #ifdef LUAJIT_NO_LOG2 |
67 | LJ_ASMF double lj_vm_log2(double); | 65 | LJ_ASMF double lj_vm_log2(double); |
68 | #else | 66 | #else |
69 | #define lj_vm_log2 log2 | 67 | #define lj_vm_log2 log2 |
@@ -74,11 +72,11 @@ LJ_ASMF double lj_vm_log2(double); | |||
74 | LJ_ASMF void lj_vm_floor_sse(void); | 72 | LJ_ASMF void lj_vm_floor_sse(void); |
75 | LJ_ASMF void lj_vm_ceil_sse(void); | 73 | LJ_ASMF void lj_vm_ceil_sse(void); |
76 | LJ_ASMF void lj_vm_trunc_sse(void); | 74 | LJ_ASMF void lj_vm_trunc_sse(void); |
77 | LJ_ASMF void lj_vm_exp_x87(void); | ||
78 | LJ_ASMF void lj_vm_exp2_x87(void); | ||
79 | LJ_ASMF void lj_vm_pow_sse(void); | ||
80 | LJ_ASMF void lj_vm_powi_sse(void); | 75 | LJ_ASMF void lj_vm_powi_sse(void); |
76 | #define lj_vm_powi NULL | ||
81 | #else | 77 | #else |
78 | LJ_ASMF double lj_vm_powi(double, int32_t); | ||
79 | #endif | ||
82 | #if LJ_TARGET_PPC | 80 | #if LJ_TARGET_PPC |
83 | #define lj_vm_trunc trunc | 81 | #define lj_vm_trunc trunc |
84 | #else | 82 | #else |
@@ -87,13 +85,11 @@ LJ_ASMF double lj_vm_trunc(double); | |||
87 | LJ_ASMF double lj_vm_trunc_sf(double); | 85 | LJ_ASMF double lj_vm_trunc_sf(double); |
88 | #endif | 86 | #endif |
89 | #endif | 87 | #endif |
90 | LJ_ASMF double lj_vm_powi(double, int32_t); | ||
91 | #ifdef LUAJIT_NO_EXP2 | 88 | #ifdef LUAJIT_NO_EXP2 |
92 | LJ_ASMF double lj_vm_exp2(double); | 89 | LJ_ASMF double lj_vm_exp2(double); |
93 | #else | 90 | #else |
94 | #define lj_vm_exp2 exp2 | 91 | #define lj_vm_exp2 exp2 |
95 | #endif | 92 | #endif |
96 | #endif | ||
97 | LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); | 93 | LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); |
98 | #if LJ_HASFFI | 94 | #if LJ_HASFFI |
99 | LJ_ASMF int lj_vm_errno(void); | 95 | LJ_ASMF int lj_vm_errno(void); |
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index b60858b2..6ea99d15 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c | |||
@@ -17,14 +17,25 @@ | |||
17 | 17 | ||
18 | #if LJ_TARGET_X86 && __ELF__ && __PIC__ | 18 | #if LJ_TARGET_X86 && __ELF__ && __PIC__ |
19 | /* Wrapper functions to deal with the ELF/x86 PIC disaster. */ | 19 | /* Wrapper functions to deal with the ELF/x86 PIC disaster. */ |
20 | LJ_FUNCA double lj_wrap_log(double x) { return log(x); } | ||
21 | LJ_FUNCA double lj_wrap_log10(double x) { return log10(x); } | ||
22 | LJ_FUNCA double lj_wrap_exp(double x) { return exp(x); } | ||
23 | LJ_FUNCA double lj_wrap_sin(double x) { return sin(x); } | ||
24 | LJ_FUNCA double lj_wrap_cos(double x) { return cos(x); } | ||
25 | LJ_FUNCA double lj_wrap_tan(double x) { return tan(x); } | ||
26 | LJ_FUNCA double lj_wrap_asin(double x) { return asin(x); } | ||
27 | LJ_FUNCA double lj_wrap_acos(double x) { return acos(x); } | ||
28 | LJ_FUNCA double lj_wrap_atan(double x) { return atan(x); } | ||
20 | LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); } | 29 | LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); } |
21 | LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); } | 30 | LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); } |
22 | LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); } | 31 | LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); } |
32 | LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); } | ||
33 | LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); } | ||
34 | LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); } | ||
23 | #endif | 35 | #endif |
24 | 36 | ||
25 | /* -- Helper functions for generated machine code ------------------------- */ | 37 | /* -- Helper functions for generated machine code ------------------------- */ |
26 | 38 | ||
27 | #if !LJ_TARGET_X86ORX64 | ||
28 | double lj_vm_foldarith(double x, double y, int op) | 39 | double lj_vm_foldarith(double x, double y, int op) |
29 | { | 40 | { |
30 | switch (op) { | 41 | switch (op) { |
@@ -45,7 +56,6 @@ double lj_vm_foldarith(double x, double y, int op) | |||
45 | default: return x; | 56 | default: return x; |
46 | } | 57 | } |
47 | } | 58 | } |
48 | #endif | ||
49 | 59 | ||
50 | #if LJ_HASJIT | 60 | #if LJ_HASJIT |
51 | 61 | ||
@@ -109,6 +119,7 @@ double lj_vm_powi(double x, int32_t k) | |||
109 | else | 119 | else |
110 | return 1.0 / lj_vm_powui(x, (uint32_t)-k); | 120 | return 1.0 / lj_vm_powui(x, (uint32_t)-k); |
111 | } | 121 | } |
122 | #endif | ||
112 | 123 | ||
113 | /* Computes fpm(x) for extended math functions. */ | 124 | /* Computes fpm(x) for extended math functions. */ |
114 | double lj_vm_foldfpm(double x, int fpm) | 125 | double lj_vm_foldfpm(double x, int fpm) |
@@ -130,7 +141,6 @@ double lj_vm_foldfpm(double x, int fpm) | |||
130 | } | 141 | } |
131 | return 0; | 142 | return 0; |
132 | } | 143 | } |
133 | #endif | ||
134 | 144 | ||
135 | #if LJ_HASFFI | 145 | #if LJ_HASFFI |
136 | int lj_vm_errno(void) | 146 | int lj_vm_errno(void) |
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index cd43afbd..290054dc 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc | |||
@@ -373,7 +373,6 @@ | |||
373 | | fpop | 373 | | fpop |
374 | |.endmacro | 374 | |.endmacro |
375 | | | 375 | | |
376 | |.macro fdup; fld st0; .endmacro | ||
377 | |.macro fpop1; fstp st1; .endmacro | 376 | |.macro fpop1; fstp st1; .endmacro |
378 | | | 377 | | |
379 | |// Synthesize SSE FP constants. | 378 | |// Synthesize SSE FP constants. |
@@ -1329,19 +1328,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1329 | | cmp NARGS:RD, 2+1; jb ->fff_fallback | 1328 | | cmp NARGS:RD, 2+1; jb ->fff_fallback |
1330 | |.endmacro | 1329 | |.endmacro |
1331 | | | 1330 | | |
1332 | |.macro .ffunc_n, name | ||
1333 | | .ffunc_1 name | ||
1334 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
1335 | | fld qword [BASE] | ||
1336 | |.endmacro | ||
1337 | | | ||
1338 | |.macro .ffunc_n, name, op | ||
1339 | | .ffunc_1 name | ||
1340 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
1341 | | op | ||
1342 | | fld qword [BASE] | ||
1343 | |.endmacro | ||
1344 | | | ||
1345 | |.macro .ffunc_nsse, name, op | 1331 | |.macro .ffunc_nsse, name, op |
1346 | | .ffunc_1 name | 1332 | | .ffunc_1 name |
1347 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 1333 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
@@ -1352,14 +1338,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1352 | | .ffunc_nsse name, movsd | 1338 | | .ffunc_nsse name, movsd |
1353 | |.endmacro | 1339 | |.endmacro |
1354 | | | 1340 | | |
1355 | |.macro .ffunc_nn, name | ||
1356 | | .ffunc_2 name | ||
1357 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
1358 | | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback | ||
1359 | | fld qword [BASE] | ||
1360 | | fld qword [BASE+8] | ||
1361 | |.endmacro | ||
1362 | | | ||
1363 | |.macro .ffunc_nnsse, name | 1341 | |.macro .ffunc_nnsse, name |
1364 | | .ffunc_2 name | 1342 | | .ffunc_2 name |
1365 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 1343 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
@@ -2029,6 +2007,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
2029 | | mov RAa, -8 // Results start at BASE+RA = BASE-8. | 2007 | | mov RAa, -8 // Results start at BASE+RA = BASE-8. |
2030 | | jmp ->vm_return | 2008 | | jmp ->vm_return |
2031 | | | 2009 | | |
2010 | |.if X64 | ||
2011 | |.define fff_resfp, fff_resxmm0 | ||
2012 | |.else | ||
2013 | |.define fff_resfp, fff_resn | ||
2014 | |.endif | ||
2015 | | | ||
2032 | |.macro math_round, func | 2016 | |.macro math_round, func |
2033 | | .ffunc math_ .. func | 2017 | | .ffunc math_ .. func |
2034 | |.if DUALNUM | 2018 | |.if DUALNUM |
@@ -2061,22 +2045,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
2061 | |.ffunc math_log | 2045 | |.ffunc math_log |
2062 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. | 2046 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. |
2063 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 2047 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
2064 | | fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn | 2048 | | movsd xmm0, qword [BASE] |
2065 | | | 2049 | |.if not X64 |
2066 | |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn | 2050 | | movsd FPARG1, xmm0 |
2067 | |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn | 2051 | |.endif |
2068 | | | 2052 | | mov RB, BASE |
2069 | |.ffunc_n math_sin; fsin; jmp ->fff_resn | 2053 | | call extern log |
2070 | |.ffunc_n math_cos; fcos; jmp ->fff_resn | 2054 | | mov BASE, RB |
2071 | |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn | 2055 | | jmp ->fff_resfp |
2072 | | | ||
2073 | |.ffunc_n math_asin | ||
2074 | | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan | ||
2075 | | jmp ->fff_resn | ||
2076 | |.ffunc_n math_acos | ||
2077 | | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan | ||
2078 | | jmp ->fff_resn | ||
2079 | |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn | ||
2080 | | | 2056 | | |
2081 | |.macro math_extern, func | 2057 | |.macro math_extern, func |
2082 | | .ffunc_nsse math_ .. func | 2058 | | .ffunc_nsse math_ .. func |
@@ -2086,18 +2062,36 @@ static void build_subroutines(BuildCtx *ctx) | |||
2086 | | mov RB, BASE | 2062 | | mov RB, BASE |
2087 | | call extern func | 2063 | | call extern func |
2088 | | mov BASE, RB | 2064 | | mov BASE, RB |
2089 | |.if X64 | 2065 | | jmp ->fff_resfp |
2090 | | jmp ->fff_resxmm0 | 2066 | |.endmacro |
2091 | |.else | 2067 | | |
2092 | | jmp ->fff_resn | 2068 | |.macro math_extern2, func |
2069 | | .ffunc_nnsse math_ .. func | ||
2070 | |.if not X64 | ||
2071 | | movsd FPARG1, xmm0 | ||
2072 | | movsd FPARG3, xmm1 | ||
2093 | |.endif | 2073 | |.endif |
2074 | | mov RB, BASE | ||
2075 | | call extern func | ||
2076 | | mov BASE, RB | ||
2077 | | jmp ->fff_resfp | ||
2094 | |.endmacro | 2078 | |.endmacro |
2095 | | | 2079 | | |
2080 | | math_extern log10 | ||
2081 | | math_extern exp | ||
2082 | | math_extern sin | ||
2083 | | math_extern cos | ||
2084 | | math_extern tan | ||
2085 | | math_extern asin | ||
2086 | | math_extern acos | ||
2087 | | math_extern atan | ||
2096 | | math_extern sinh | 2088 | | math_extern sinh |
2097 | | math_extern cosh | 2089 | | math_extern cosh |
2098 | | math_extern tanh | 2090 | | math_extern tanh |
2091 | | math_extern2 pow | ||
2092 | | math_extern2 atan2 | ||
2093 | | math_extern2 fmod | ||
2099 | | | 2094 | | |
2100 | |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn | ||
2101 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn | 2095 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn |
2102 | | | 2096 | | |
2103 | |.ffunc_1 math_frexp | 2097 | |.ffunc_1 math_frexp |
@@ -2151,13 +2145,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
2151 | |4: | 2145 | |4: |
2152 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. | 2146 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. |
2153 | | | 2147 | | |
2154 | |.ffunc_nnr math_fmod | ||
2155 | |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1 | ||
2156 | | fpop1 | ||
2157 | | jmp ->fff_resn | ||
2158 | | | ||
2159 | |.ffunc_nnsse math_pow; call ->vm_pow_sse; jmp ->fff_resxmm0 | ||
2160 | | | ||
2161 | |.macro math_minmax, name, cmovop, sseop | 2148 | |.macro math_minmax, name, cmovop, sseop |
2162 | | .ffunc name | 2149 | | .ffunc name |
2163 | | mov RA, 2 | 2150 | | mov RA, 2 |
@@ -2899,7 +2886,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
2899 | | | 2886 | | |
2900 | |// FP value rounding. Called by math.floor/math.ceil fast functions | 2887 | |// FP value rounding. Called by math.floor/math.ceil fast functions |
2901 | |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. | 2888 | |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. |
2902 | |.macro vm_round, name, mode | 2889 | |.macro vm_round, name, mode, cond |
2890 | |->name: | ||
2891 | |.if not X64 and cond | ||
2892 | | movsd xmm0, qword [esp+4] | ||
2893 | | call ->name .. _sse | ||
2894 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. | ||
2895 | | fld qword [esp+4] | ||
2896 | | ret | ||
2897 | |.endif | ||
2898 | | | ||
2903 | |->name .. _sse: | 2899 | |->name .. _sse: |
2904 | | sseconst_abs xmm2, RDa | 2900 | | sseconst_abs xmm2, RDa |
2905 | | sseconst_2p52 xmm3, RDa | 2901 | | sseconst_2p52 xmm3, RDa |
@@ -2936,18 +2932,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
2936 | | ret | 2932 | | ret |
2937 | |.endmacro | 2933 | |.endmacro |
2938 | | | 2934 | | |
2939 | |->vm_floor: | 2935 | | vm_round vm_floor, 0, 1 |
2940 | |.if not X64 | 2936 | | vm_round vm_ceil, 1, JIT |
2941 | | movsd xmm0, qword [esp+4] | 2937 | | vm_round vm_trunc, 2, JIT |
2942 | | call ->vm_floor_sse | ||
2943 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. | ||
2944 | | fld qword [esp+4] | ||
2945 | | ret | ||
2946 | |.endif | ||
2947 | | | ||
2948 | | vm_round vm_floor, 0 | ||
2949 | | vm_round vm_ceil, 1 | ||
2950 | | vm_round vm_trunc, 2 | ||
2951 | | | 2938 | | |
2952 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. | 2939 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. |
2953 | |->vm_mod: | 2940 | |->vm_mod: |
@@ -2979,65 +2966,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
2979 | | subsd xmm0, xmm1 | 2966 | | subsd xmm0, xmm1 |
2980 | | ret | 2967 | | ret |
2981 | | | 2968 | | |
2982 | |// FP log2(x). Called by math.log(x, base). | ||
2983 | |->vm_log2: | ||
2984 | |.if X64WIN | ||
2985 | | movsd qword [rsp+8], xmm0 // Use scratch area. | ||
2986 | | fld1 | ||
2987 | | fld qword [rsp+8] | ||
2988 | | fyl2x | ||
2989 | | fstp qword [rsp+8] | ||
2990 | | movsd xmm0, qword [rsp+8] | ||
2991 | |.elif X64 | ||
2992 | | movsd qword [rsp-8], xmm0 // Use red zone. | ||
2993 | | fld1 | ||
2994 | | fld qword [rsp-8] | ||
2995 | | fyl2x | ||
2996 | | fstp qword [rsp-8] | ||
2997 | | movsd xmm0, qword [rsp-8] | ||
2998 | |.else | ||
2999 | | fld1 | ||
3000 | | fld qword [esp+4] | ||
3001 | | fyl2x | ||
3002 | |.endif | ||
3003 | | ret | ||
3004 | | | ||
3005 | |// FP exponentiation e^x and 2^x. Called by math.exp fast function and | ||
3006 | |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified. | ||
3007 | |// Caveat: needs 3 slots on x87 stack! | ||
3008 | |->vm_exp_x87: | ||
3009 | | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e)) | ||
3010 | |->vm_exp2_x87: | ||
3011 | | .if X64WIN | ||
3012 | | .define expscratch, dword [rsp+8] // Use scratch area. | ||
3013 | | .elif X64 | ||
3014 | | .define expscratch, dword [rsp-8] // Use red zone. | ||
3015 | | .else | ||
3016 | | .define expscratch, dword [esp+4] // Needs 4 byte scratch area. | ||
3017 | | .endif | ||
3018 | | fst expscratch // Caveat: overwrites ARG1. | ||
3019 | | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf | ||
3020 | | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0 | ||
3021 | |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check. | ||
3022 | | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. | ||
3023 | | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int | ||
3024 | |1: | ||
3025 | | ret | ||
3026 | |2: | ||
3027 | | fpop; fldz; ret | ||
3028 | | | ||
3029 | |// Generic power function x^y. Called by BC_POW, math.pow fast function, | ||
3030 | |// and vm_arith. | ||
3031 | |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. | ||
3032 | |// Needs 16 byte scratch area for x86. Also called from JIT code. | ||
3033 | |->vm_pow_sse: | ||
3034 | | cvttsd2si eax, xmm1 | ||
3035 | | cvtsi2sd xmm2, eax | ||
3036 | | ucomisd xmm1, xmm2 | ||
3037 | | jnz >8 // Branch for FP exponents. | ||
3038 | | jp >9 // Branch for NaN exponent. | ||
3039 | | // Fallthrough. | ||
3040 | | | ||
3041 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. | 2969 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. |
3042 | |->vm_powi_sse: | 2970 | |->vm_powi_sse: |
3043 | | cmp eax, 1; jle >6 // i<=1? | 2971 | | cmp eax, 1; jle >6 // i<=1? |
@@ -3073,246 +3001,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
3073 | | sseconst_1 xmm0, RDa | 3001 | | sseconst_1 xmm0, RDa |
3074 | | ret | 3002 | | ret |
3075 | | | 3003 | | |
3076 | |8: // FP/FP power function x^y. | ||
3077 | |.if X64 | ||
3078 | | movd rax, xmm1; shl rax, 1 | ||
3079 | | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf? | ||
3080 | | movd rax, xmm0; shl rax, 1; je >4 // +-0^y? | ||
3081 | | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y? | ||
3082 | | .if X64WIN | ||
3083 | | movsd qword [rsp+16], xmm1 // Use scratch area. | ||
3084 | | movsd qword [rsp+8], xmm0 | ||
3085 | | fld qword [rsp+16] | ||
3086 | | fld qword [rsp+8] | ||
3087 | | .else | ||
3088 | | movsd qword [rsp-16], xmm1 // Use red zone. | ||
3089 | | movsd qword [rsp-8], xmm0 | ||
3090 | | fld qword [rsp-16] | ||
3091 | | fld qword [rsp-8] | ||
3092 | | .endif | ||
3093 | |.else | ||
3094 | | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area. | ||
3095 | | movsd qword [esp+4], xmm0 | ||
3096 | | cmp dword [esp+12], 0; jne >1 | ||
3097 | | mov eax, [esp+16]; shl eax, 1 | ||
3098 | | cmp eax, 0xffe00000; je >2 // x^+-Inf? | ||
3099 | |1: | ||
3100 | | cmp dword [esp+4], 0; jne >1 | ||
3101 | | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y? | ||
3102 | | cmp eax, 0xffe00000; je >5 // +-Inf^y? | ||
3103 | |1: | ||
3104 | | fld qword [esp+12] | ||
3105 | | fld qword [esp+4] | ||
3106 | |.endif | ||
3107 | | fyl2x // y*log2(x) | ||
3108 | | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. | ||
3109 | | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int | ||
3110 | |.if X64WIN | ||
3111 | | fstp qword [rsp+8] // Use scratch area. | ||
3112 | | movsd xmm0, qword [rsp+8] | ||
3113 | |.elif X64 | ||
3114 | | fstp qword [rsp-8] // Use red zone. | ||
3115 | | movsd xmm0, qword [rsp-8] | ||
3116 | |.else | ||
3117 | | fstp qword [esp+4] // Needs 8 byte scratch area. | ||
3118 | | movsd xmm0, qword [esp+4] | ||
3119 | |.endif | ||
3120 | | ret | ||
3121 | | | ||
3122 | |9: // Handle x^NaN. | ||
3123 | | sseconst_1 xmm2, RDa | ||
3124 | | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1 | ||
3125 | | movaps xmm0, xmm1 // x^NaN ==> NaN | ||
3126 | |1: | ||
3127 | | ret | ||
3128 | | | ||
3129 | |2: // Handle x^+-Inf. | ||
3130 | | sseconst_abs xmm2, RDa | ||
3131 | | andpd xmm0, xmm2 // |x| | ||
3132 | | sseconst_1 xmm2, RDa | ||
3133 | | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1 | ||
3134 | | movmskpd eax, xmm1 | ||
3135 | | xorps xmm0, xmm0 | ||
3136 | | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0 | ||
3137 | |3: | ||
3138 | | sseconst_hi xmm0, RDa, 7ff00000 // +Inf | ||
3139 | | ret | ||
3140 | | | ||
3141 | |4: // Handle +-0^y. | ||
3142 | | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf | ||
3143 | | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0 | ||
3144 | | ret | ||
3145 | | | ||
3146 | |5: // Handle +-Inf^y. | ||
3147 | | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf | ||
3148 | | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0 | ||
3149 | | ret | ||
3150 | | | ||
3151 | |// Callable from C: double lj_vm_foldfpm(double x, int fpm) | ||
3152 | |// Computes fpm(x) for extended math functions. ORDER FPM. | ||
3153 | |->vm_foldfpm: | ||
3154 | |.if JIT | ||
3155 | |.if X64 | ||
3156 | | .if X64WIN | ||
3157 | | .define fpmop, CARG2d | ||
3158 | | .else | ||
3159 | | .define fpmop, CARG1d | ||
3160 | | .endif | ||
3161 | | cmp fpmop, 1; jb ->vm_floor_sse; je ->vm_ceil_sse | ||
3162 | | cmp fpmop, 3; jb ->vm_trunc_sse; ja >2 | ||
3163 | | sqrtsd xmm0, xmm0; ret | ||
3164 | |2: | ||
3165 | | .if X64WIN | ||
3166 | | movsd qword [rsp+8], xmm0 // Use scratch area. | ||
3167 | | fld qword [rsp+8] | ||
3168 | | .else | ||
3169 | | movsd qword [rsp-8], xmm0 // Use red zone. | ||
3170 | | fld qword [rsp-8] | ||
3171 | | .endif | ||
3172 | | cmp fpmop, 5; ja >2 | ||
3173 | | .if X64WIN; pop rax; .endif | ||
3174 | | je >1 | ||
3175 | | call ->vm_exp_x87 | ||
3176 | | .if X64WIN; push rax; .endif | ||
3177 | | jmp >7 | ||
3178 | |1: | ||
3179 | | call ->vm_exp2_x87 | ||
3180 | | .if X64WIN; push rax; .endif | ||
3181 | | jmp >7 | ||
3182 | |2: ; cmp fpmop, 7; je >1; ja >2 | ||
3183 | | fldln2; fxch; fyl2x; jmp >7 | ||
3184 | |1: ; fld1; fxch; fyl2x; jmp >7 | ||
3185 | |2: ; cmp fpmop, 9; je >1; ja >2 | ||
3186 | | fldlg2; fxch; fyl2x; jmp >7 | ||
3187 | |1: ; fsin; jmp >7 | ||
3188 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
3189 | | fcos; jmp >7 | ||
3190 | |1: ; fptan; fpop | ||
3191 | |7: | ||
3192 | | .if X64WIN | ||
3193 | | fstp qword [rsp+8] // Use scratch area. | ||
3194 | | movsd xmm0, qword [rsp+8] | ||
3195 | | .else | ||
3196 | | fstp qword [rsp-8] // Use red zone. | ||
3197 | | movsd xmm0, qword [rsp-8] | ||
3198 | | .endif | ||
3199 | | ret | ||
3200 | |.else // x86 calling convention. | ||
3201 | | .define fpmop, eax | ||
3202 | | mov fpmop, [esp+12] | ||
3203 | | movsd xmm0, qword [esp+4] | ||
3204 | | cmp fpmop, 1; je >1; ja >2 | ||
3205 | | call ->vm_floor_sse; jmp >7 | ||
3206 | |1: ; call ->vm_ceil_sse; jmp >7 | ||
3207 | |2: ; cmp fpmop, 3; je >1; ja >2 | ||
3208 | | call ->vm_trunc_sse; jmp >7 | ||
3209 | |1: | ||
3210 | | sqrtsd xmm0, xmm0 | ||
3211 | |7: | ||
3212 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. | ||
3213 | | fld qword [esp+4] | ||
3214 | | ret | ||
3215 | |2: ; fld qword [esp+4] | ||
3216 | | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 | ||
3217 | |2: ; cmp fpmop, 7; je >1; ja >2 | ||
3218 | | fldln2; fxch; fyl2x; ret | ||
3219 | |1: ; fld1; fxch; fyl2x; ret | ||
3220 | |2: ; cmp fpmop, 9; je >1; ja >2 | ||
3221 | | fldlg2; fxch; fyl2x; ret | ||
3222 | |1: ; fsin; ret | ||
3223 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
3224 | | fcos; ret | ||
3225 | |1: ; fptan; fpop; ret | ||
3226 | |.endif | ||
3227 | |9: ; int3 // Bad fpm. | ||
3228 | |.endif | ||
3229 | | | ||
3230 | |// Callable from C: double lj_vm_foldarith(double x, double y, int op) | ||
3231 | |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -) | ||
3232 | |// and basic math functions. ORDER ARITH | ||
3233 | |->vm_foldarith: | ||
3234 | |.if X64 | ||
3235 | | | ||
3236 | | .if X64WIN | ||
3237 | | .define foldop, CARG3d | ||
3238 | | .else | ||
3239 | | .define foldop, CARG1d | ||
3240 | | .endif | ||
3241 | | cmp foldop, 1; je >1; ja >2 | ||
3242 | | addsd xmm0, xmm1; ret | ||
3243 | |1: ; subsd xmm0, xmm1; ret | ||
3244 | |2: ; cmp foldop, 3; je >1; ja >2 | ||
3245 | | mulsd xmm0, xmm1; ret | ||
3246 | |1: ; divsd xmm0, xmm1; ret | ||
3247 | |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow_sse | ||
3248 | | cmp foldop, 7; je >1; ja >2 | ||
3249 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret | ||
3250 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret | ||
3251 | |2: ; cmp foldop, 9; ja >2 | ||
3252 | |.if X64WIN | ||
3253 | | movsd qword [rsp+8], xmm0 // Use scratch area. | ||
3254 | | movsd qword [rsp+16], xmm1 | ||
3255 | | fld qword [rsp+8] | ||
3256 | | fld qword [rsp+16] | ||
3257 | |.else | ||
3258 | | movsd qword [rsp-8], xmm0 // Use red zone. | ||
3259 | | movsd qword [rsp-16], xmm1 | ||
3260 | | fld qword [rsp-8] | ||
3261 | | fld qword [rsp-16] | ||
3262 | |.endif | ||
3263 | | je >1 | ||
3264 | | fpatan | ||
3265 | |7: | ||
3266 | |.if X64WIN | ||
3267 | | fstp qword [rsp+8] // Use scratch area. | ||
3268 | | movsd xmm0, qword [rsp+8] | ||
3269 | |.else | ||
3270 | | fstp qword [rsp-8] // Use red zone. | ||
3271 | | movsd xmm0, qword [rsp-8] | ||
3272 | |.endif | ||
3273 | | ret | ||
3274 | |1: ; fxch; fscale; fpop1; jmp <7 | ||
3275 | |2: ; cmp foldop, 11; je >1; ja >9 | ||
3276 | | minsd xmm0, xmm1; ret | ||
3277 | |1: ; maxsd xmm0, xmm1; ret | ||
3278 | |9: ; int3 // Bad op. | ||
3279 | | | ||
3280 | |.else // x86 calling convention. | ||
3281 | | | ||
3282 | | .define foldop, eax | ||
3283 | | mov foldop, [esp+20] | ||
3284 | | movsd xmm0, qword [esp+4] | ||
3285 | | movsd xmm1, qword [esp+12] | ||
3286 | | cmp foldop, 1; je >1; ja >2 | ||
3287 | | addsd xmm0, xmm1 | ||
3288 | |7: | ||
3289 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. | ||
3290 | | fld qword [esp+4] | ||
3291 | | ret | ||
3292 | |1: ; subsd xmm0, xmm1; jmp <7 | ||
3293 | |2: ; cmp foldop, 3; je >1; ja >2 | ||
3294 | | mulsd xmm0, xmm1; jmp <7 | ||
3295 | |1: ; divsd xmm0, xmm1; jmp <7 | ||
3296 | |2: ; cmp foldop, 5 | ||
3297 | | je >1; ja >2 | ||
3298 | | call ->vm_mod; jmp <7 | ||
3299 | |1: ; pop edx; call ->vm_pow_sse; push edx; jmp <7 // Writes to scratch area. | ||
3300 | |2: ; cmp foldop, 7; je >1; ja >2 | ||
3301 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 | ||
3302 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 | ||
3303 | |2: ; cmp foldop, 9; ja >2 | ||
3304 | | fld qword [esp+4] // Reload from stack | ||
3305 | | fld qword [esp+12] | ||
3306 | | je >1 | ||
3307 | | fpatan; ret | ||
3308 | |1: ; fxch; fscale; fpop1; ret | ||
3309 | |2: ; cmp foldop, 11; je >1; ja >9 | ||
3310 | | minsd xmm0, xmm1; jmp <7 | ||
3311 | |1: ; maxsd xmm0, xmm1; jmp <7 | ||
3312 | |9: ; int3 // Bad op. | ||
3313 | | | ||
3314 | |.endif | ||
3315 | | | ||
3316 | |//----------------------------------------------------------------------- | 3004 | |//----------------------------------------------------------------------- |
3317 | |//-- Miscellaneous functions -------------------------------------------- | 3005 | |//-- Miscellaneous functions -------------------------------------------- |
3318 | |//----------------------------------------------------------------------- | 3006 | |//----------------------------------------------------------------------- |
@@ -4107,8 +3795,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4107 | break; | 3795 | break; |
4108 | case BC_POW: | 3796 | case BC_POW: |
4109 | | ins_arithpre movsd, xmm1 | 3797 | | ins_arithpre movsd, xmm1 |
4110 | | call ->vm_pow_sse | 3798 | | mov RB, BASE |
3799 | |.if not X64 | ||
3800 | | movsd FPARG1, xmm0 | ||
3801 | | movsd FPARG3, xmm1 | ||
3802 | |.endif | ||
3803 | | call extern pow | ||
3804 | | movzx RA, PC_RA | ||
3805 | | mov BASE, RB | ||
3806 | |.if X64 | ||
4111 | | ins_arithpost | 3807 | | ins_arithpost |
3808 | |.else | ||
3809 | | fstp qword [BASE+RA*8] | ||
3810 | |.endif | ||
4112 | | ins_next | 3811 | | ins_next |
4113 | break; | 3812 | break; |
4114 | 3813 | ||