aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm_x86.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_asm_x86.h')
-rw-r--r--src/lj_asm_x86.h82
1 files changed, 20 insertions, 62 deletions
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 8b541250..bd97764f 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -1593,26 +1593,9 @@ static void asm_x87load(ASMState *as, IRRef ref)
1593 } 1593 }
1594} 1594}
1595 1595
1596static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
1597{
1598 /* The modified regs must match with the *.dasc implementation. */
1599 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1600 IRIns *irx;
1601 if (ra_hasreg(ir->r))
1602 rset_clear(drop, ir->r); /* Dest reg handled below. */
1603 ra_evictset(as, drop);
1604 ra_destreg(as, ir, RID_XMM0);
1605 emit_call(as, lj_vm_pow_sse);
1606 irx = IR(lref);
1607 if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
1608 irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */
1609 ra_left(as, RID_XMM0, lref);
1610 ra_left(as, RID_XMM1, rref);
1611}
1612
1613static void asm_fpmath(ASMState *as, IRIns *ir) 1596static void asm_fpmath(ASMState *as, IRIns *ir)
1614{ 1597{
1615 IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER; 1598 IRFPMathOp fpm = (IRFPMathOp)ir->op2;
1616 if (fpm == IRFPM_SQRT) { 1599 if (fpm == IRFPM_SQRT) {
1617 Reg dest = ra_dest(as, ir, RSET_FPR); 1600 Reg dest = ra_dest(as, ir, RSET_FPR);
1618 Reg left = asm_fuseload(as, ir->op1, RSET_FPR); 1601 Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
@@ -1645,53 +1628,28 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1645 } 1628 }
1646 } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { 1629 } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
1647 /* Rejoined to pow(). */ 1630 /* Rejoined to pow(). */
1648 } else { /* Handle x87 ops. */ 1631 } else {
1649 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ 1632 asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
1650 Reg dest = ir->r;
1651 if (ra_hasreg(dest)) {
1652 ra_free(as, dest);
1653 ra_modified(as, dest);
1654 emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
1655 }
1656 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
1657 switch (fpm) { /* st0 = lj_vm_*(st0) */
1658 case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break;
1659 case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break;
1660 case IRFPM_SIN: emit_x87op(as, XI_FSIN); break;
1661 case IRFPM_COS: emit_x87op(as, XI_FCOS); break;
1662 case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break;
1663 case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10:
1664 /* Note: the use of fyl2xp1 would be pointless here. When computing
1665 ** log(1.0+eps) the precision is already lost after 1.0 is added.
1666 ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense.
1667 */
1668 emit_x87op(as, XI_FYL2X); break;
1669 case IRFPM_OTHER:
1670 switch (ir->o) {
1671 case IR_ATAN2:
1672 emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break;
1673 case IR_LDEXP:
1674 emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break;
1675 default: lua_assert(0); break;
1676 }
1677 break;
1678 default: lua_assert(0); break;
1679 }
1680 asm_x87load(as, ir->op1);
1681 switch (fpm) {
1682 case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break;
1683 case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break;
1684 case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break;
1685 case IRFPM_OTHER:
1686 if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2);
1687 break;
1688 default: break;
1689 }
1690 } 1633 }
1691} 1634}
1692 1635
1693#define asm_atan2(as, ir) asm_fpmath(as, ir) 1636#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1694#define asm_ldexp(as, ir) asm_fpmath(as, ir) 1637
1638static void asm_ldexp(ASMState *as, IRIns *ir)
1639{
1640 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
1641 Reg dest = ir->r;
1642 if (ra_hasreg(dest)) {
1643 ra_free(as, dest);
1644 ra_modified(as, dest);
1645 emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
1646 }
1647 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
1648 emit_x87op(as, XI_FPOP1);
1649 emit_x87op(as, XI_FSCALE);
1650 asm_x87load(as, ir->op1);
1651 asm_x87load(as, ir->op2);
1652}
1695 1653
1696static void asm_fppowi(ASMState *as, IRIns *ir) 1654static void asm_fppowi(ASMState *as, IRIns *ir)
1697{ 1655{