diff options
Diffstat (limited to 'src/lj_asm.c')
-rw-r--r-- | src/lj_asm.c | 444 |
1 files changed, 363 insertions, 81 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index cc2ae597..441700d4 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -347,6 +347,20 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | |||
347 | } | 347 | } |
348 | } | 348 | } |
349 | 349 | ||
350 | /* op rm/mrm, i */ | ||
351 | static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i) | ||
352 | { | ||
353 | x86Op xo; | ||
354 | if (checki8(i)) { | ||
355 | emit_i8(as, i); | ||
356 | xo = XG_TOXOi8(xg); | ||
357 | } else { | ||
358 | emit_i32(as, i); | ||
359 | xo = XG_TOXOi(xg); | ||
360 | } | ||
361 | emit_mrm(as, xo, (Reg)(xg & 7) | (rb & REX_64), (rb & ~REX_64)); | ||
362 | } | ||
363 | |||
350 | /* -- Emit moves ---------------------------------------------------------- */ | 364 | /* -- Emit moves ---------------------------------------------------------- */ |
351 | 365 | ||
352 | /* mov [base+ofs], i */ | 366 | /* mov [base+ofs], i */ |
@@ -371,7 +385,10 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) | |||
371 | /* mov r, i / xor r, r */ | 385 | /* mov r, i / xor r, r */ |
372 | static void emit_loadi(ASMState *as, Reg r, int32_t i) | 386 | static void emit_loadi(ASMState *as, Reg r, int32_t i) |
373 | { | 387 | { |
374 | if (i == 0) { | 388 | /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP. */ |
389 | if (i == 0 && !(LJ_32 && (IR(as->curins)->o == IR_HIOP || | ||
390 | (as->curins+1 < as->T->nins && | ||
391 | IR(as->curins+1)->o == IR_HIOP)))) { | ||
375 | emit_rr(as, XO_ARITH(XOg_XOR), r, r); | 392 | emit_rr(as, XO_ARITH(XOg_XOR), r, r); |
376 | } else { | 393 | } else { |
377 | MCode *p = as->mcp; | 394 | MCode *p = as->mcp; |
@@ -422,6 +439,19 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv) | |||
422 | /* Label for short jumps. */ | 439 | /* Label for short jumps. */ |
423 | typedef MCode *MCLabel; | 440 | typedef MCode *MCLabel; |
424 | 441 | ||
442 | #if LJ_32 && LJ_HASFFI | ||
443 | /* jmp short target */ | ||
444 | static void emit_sjmp(ASMState *as, MCLabel target) | ||
445 | { | ||
446 | MCode *p = as->mcp; | ||
447 | ptrdiff_t delta = target - p; | ||
448 | lua_assert(delta == (int8_t)delta); | ||
449 | p[-1] = (MCode)(int8_t)delta; | ||
450 | p[-2] = XI_JMPs; | ||
451 | as->mcp = p - 2; | ||
452 | } | ||
453 | #endif | ||
454 | |||
425 | /* jcc short target */ | 455 | /* jcc short target */ |
426 | static void emit_sjcc(ASMState *as, int cc, MCLabel target) | 456 | static void emit_sjcc(ASMState *as, int cc, MCLabel target) |
427 | { | 457 | { |
@@ -630,7 +660,7 @@ static Reg ra_rematk(ASMState *as, IRIns *ir) | |||
630 | } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */ | 660 | } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */ |
631 | lua_assert(irt_isnil(ir->t)); | 661 | lua_assert(irt_isnil(ir->t)); |
632 | emit_getgl(as, r, jit_L); | 662 | emit_getgl(as, r, jit_L); |
633 | #if LJ_64 /* NYI: 32 bit register pairs. */ | 663 | #if LJ_64 |
634 | } else if (ir->o == IR_KINT64) { | 664 | } else if (ir->o == IR_KINT64) { |
635 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 665 | emit_loadu64(as, r, ir_kint64(ir)->u64); |
636 | #endif | 666 | #endif |
@@ -681,8 +711,7 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref) | |||
681 | #if LJ_64 | 711 | #if LJ_64 |
682 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) | 712 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) |
683 | #else | 713 | #else |
684 | /* NYI: 32 bit register pairs. */ | 714 | #define REX_64IR(ir, r) (r) |
685 | #define REX_64IR(ir, r) check_exp(!irt_is64((ir)->t), (r)) | ||
686 | #endif | 715 | #endif |
687 | 716 | ||
688 | /* Generic move between two regs. */ | 717 | /* Generic move between two regs. */ |
@@ -939,7 +968,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) | |||
939 | emit_loadn(as, dest, tv); | 968 | emit_loadn(as, dest, tv); |
940 | return; | 969 | return; |
941 | } | 970 | } |
942 | #if LJ_64 /* NYI: 32 bit register pairs. */ | 971 | #if LJ_64 |
943 | } else if (ir->o == IR_KINT64) { | 972 | } else if (ir->o == IR_KINT64) { |
944 | emit_loadu64(as, dest, ir_kint64(ir)->u64); | 973 | emit_loadu64(as, dest, ir_kint64(ir)->u64); |
945 | return; | 974 | return; |
@@ -1463,7 +1492,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
1463 | #endif | 1492 | #endif |
1464 | if (r) { /* Argument is in a register. */ | 1493 | if (r) { /* Argument is in a register. */ |
1465 | if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { | 1494 | if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { |
1466 | #if LJ_64 /* NYI: 32 bit register pairs. */ | 1495 | #if LJ_64 |
1467 | if (ir->o == IR_KINT64) | 1496 | if (ir->o == IR_KINT64) |
1468 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 1497 | emit_loadu64(as, r, ir_kint64(ir)->u64); |
1469 | else | 1498 | else |
@@ -1519,7 +1548,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
1519 | ra_evictset(as, drop); /* Evictions must be performed first. */ | 1548 | ra_evictset(as, drop); /* Evictions must be performed first. */ |
1520 | if (ra_used(ir)) { | 1549 | if (ra_used(ir)) { |
1521 | if (irt_isfp(ir->t)) { | 1550 | if (irt_isfp(ir->t)) { |
1522 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | 1551 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ |
1523 | #if LJ_64 | 1552 | #if LJ_64 |
1524 | if ((ci->flags & CCI_CASTU64)) { | 1553 | if ((ci->flags & CCI_CASTU64)) { |
1525 | Reg dest = ir->r; | 1554 | Reg dest = ir->r; |
@@ -1632,19 +1661,24 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
1632 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); | 1661 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); |
1633 | IRRef lref = ir->op1; | 1662 | IRRef lref = ir->op1; |
1634 | lua_assert(irt_type(ir->t) != st); | 1663 | lua_assert(irt_type(ir->t) != st); |
1664 | lua_assert(!(LJ_32 && (irt_isint64(ir->t) || st64))); /* Handled by SPLIT. */ | ||
1635 | if (irt_isfp(ir->t)) { | 1665 | if (irt_isfp(ir->t)) { |
1636 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1666 | Reg dest = ra_dest(as, ir, RSET_FPR); |
1637 | if (stfp) { /* FP to FP conversion. */ | 1667 | if (stfp) { /* FP to FP conversion. */ |
1638 | Reg left = asm_fuseload(as, lref, RSET_FPR); | 1668 | Reg left = asm_fuseload(as, lref, RSET_FPR); |
1639 | emit_mrm(as, st == IRT_NUM ? XO_CVTSD2SS : XO_CVTSS2SD, dest, left); | 1669 | emit_mrm(as, st == IRT_NUM ? XO_CVTSD2SS : XO_CVTSS2SD, dest, left); |
1640 | if (left == dest) return; /* Avoid the XO_XORPS. */ | 1670 | if (left == dest) return; /* Avoid the XO_XORPS. */ |
1641 | #if LJ_32 | 1671 | } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */ |
1642 | } else if (st >= IRT_U32) { | 1672 | /* number = (2^52+2^51 .. u32) - (2^52+2^51) */ |
1643 | /* NYI: 64 bit integer or uint32_t to number conversion. */ | 1673 | cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000)); |
1644 | setintV(&as->J->errinfo, ir->o); | 1674 | Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); |
1645 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | 1675 | if (irt_isfloat(ir->t)) |
1676 | emit_rr(as, XO_CVTSD2SS, dest, dest); | ||
1677 | emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ | ||
1678 | emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ | ||
1679 | emit_loadn(as, bias, k); | ||
1680 | emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); | ||
1646 | return; | 1681 | return; |
1647 | #endif | ||
1648 | } else { /* Integer to FP conversion. */ | 1682 | } else { /* Integer to FP conversion. */ |
1649 | Reg left = (LJ_64 && (st == IRT_U32 || st == IRT_U64)) ? | 1683 | Reg left = (LJ_64 && (st == IRT_U32 || st == IRT_U64)) ? |
1650 | ra_alloc1(as, lref, RSET_GPR) : | 1684 | ra_alloc1(as, lref, RSET_GPR) : |
@@ -1663,41 +1697,47 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
1663 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ | 1697 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ |
1664 | } else if (stfp) { /* FP to integer conversion. */ | 1698 | } else if (stfp) { /* FP to integer conversion. */ |
1665 | if (irt_isguard(ir->t)) { | 1699 | if (irt_isguard(ir->t)) { |
1666 | lua_assert(!irt_is64(ir->t)); /* No support for checked 64 bit conv. */ | 1700 | /* Checked conversions are only supported from number to int. */ |
1701 | lua_assert(irt_isint(ir->t) && st == IRT_NUM); | ||
1667 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | 1702 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); |
1668 | #if LJ_32 | ||
1669 | } else if (irt_isi64(ir->t) || irt_isu64(ir->t) || irt_isu32(ir->t)) { | ||
1670 | /* NYI: number to 64 bit integer or uint32_t conversion. */ | ||
1671 | setintV(&as->J->errinfo, ir->o); | ||
1672 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1673 | #endif | ||
1674 | } else { | 1703 | } else { |
1675 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1704 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1676 | x86Op op = st == IRT_NUM ? | 1705 | x86Op op = st == IRT_NUM ? |
1677 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) : | 1706 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) : |
1678 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI); | 1707 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI); |
1679 | if (LJ_64 && irt_isu64(ir->t)) { | 1708 | if (LJ_32 && irt_isu32(ir->t)) { /* FP to U32 conversion on x86. */ |
1680 | const void *k = lj_ir_k64_find(as->J, U64x(c3f00000,00000000)); | 1709 | /* u32 = (int32_t)(number - 2^31) + 2^31 */ |
1681 | MCLabel l_end = emit_label(as); | 1710 | Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) : |
1682 | Reg left = IR(lref)->r; | 1711 | ra_scratch(as, RSET_FPR); |
1712 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); | ||
1713 | emit_rr(as, op, dest, tmp); | ||
1714 | if (st == IRT_NUM) | ||
1715 | emit_rma(as, XO_ADDSD, tmp, | ||
1716 | lj_ir_k64_find(as->J, U64x(c1e00000,00000000))); | ||
1717 | else | ||
1718 | emit_rma(as, XO_ADDSS, tmp, | ||
1719 | lj_ir_k64_find(as->J, U64x(00000000,cf000000))); | ||
1720 | ra_left(as, tmp, lref); | ||
1721 | } else if (LJ_64 && irt_isu64(ir->t)) { | ||
1683 | /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ | 1722 | /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ |
1684 | if (ra_hasreg(left)) { | 1723 | Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) : |
1685 | Reg tmpn = ra_scratch(as, rset_exclude(RSET_FPR, left)); | 1724 | ra_scratch(as, RSET_FPR); |
1686 | emit_rr(as, op, dest|REX_64, tmpn); | 1725 | MCLabel l_end = emit_label(as); |
1687 | emit_rr(as, st == IRT_NUM ? XO_ADDSD : XO_ADDSS, tmpn, left); | 1726 | emit_rr(as, op, dest|REX_64, tmp); |
1688 | emit_rma(as, st == IRT_NUM ? XMM_MOVRM(as) : XO_MOVSS, tmpn, k); | 1727 | if (st == IRT_NUM) |
1689 | } else { | 1728 | emit_rma(as, XO_ADDSD, tmp, |
1690 | left = ra_allocref(as, lref, RSET_FPR); | 1729 | lj_ir_k64_find(as->J, U64x(c3f00000,00000000))); |
1691 | emit_rr(as, op, dest|REX_64, left); | 1730 | else |
1692 | emit_rma(as, st == IRT_NUM ? XO_ADDSD : XO_ADDSS, left, k); | 1731 | emit_rma(as, XO_ADDSS, tmp, |
1693 | } | 1732 | lj_ir_k64_find(as->J, U64x(00000000,df800000))); |
1694 | emit_sjcc(as, CC_NS, l_end); | 1733 | emit_sjcc(as, CC_NS, l_end); |
1695 | emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest < 2^63. */ | 1734 | emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest < 2^63. */ |
1696 | emit_rr(as, op, dest|REX_64, left); | 1735 | emit_rr(as, op, dest|REX_64, tmp); |
1736 | ra_left(as, tmp, lref); | ||
1697 | } else { | 1737 | } else { |
1698 | Reg left = asm_fuseload(as, lref, RSET_FPR); | 1738 | Reg left = asm_fuseload(as, lref, RSET_FPR); |
1699 | if (LJ_64 && irt_isu32(ir->t)) | 1739 | if (LJ_64 && irt_isu32(ir->t)) |
1700 | emit_rr(as, XO_MOV, dest, dest); /* Zero upper 32 bits. */ | 1740 | emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ |
1701 | emit_mrm(as, op, | 1741 | emit_mrm(as, op, |
1702 | dest|((LJ_64 && | 1742 | dest|((LJ_64 && |
1703 | (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), | 1743 | (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), |
@@ -1728,12 +1768,10 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
1728 | emit_mrm(as, op, dest, left); | 1768 | emit_mrm(as, op, dest, left); |
1729 | } | 1769 | } |
1730 | } else { /* 32/64 bit integer conversions. */ | 1770 | } else { /* 32/64 bit integer conversions. */ |
1731 | if (irt_is64(ir->t)) { | 1771 | if (LJ_32) { /* Only need to handle 32/32 bit no-op (cast) on x86. */ |
1732 | #if LJ_32 | 1772 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1733 | /* NYI: conversion to 64 bit integers. */ | 1773 | ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */ |
1734 | setintV(&as->J->errinfo, ir->o); | 1774 | } else if (irt_is64(ir->t)) { |
1735 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1736 | #else | ||
1737 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1775 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1738 | if (st64 || !(ir->op2 & IRCONV_SEXT)) { | 1776 | if (st64 || !(ir->op2 & IRCONV_SEXT)) { |
1739 | /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ | 1777 | /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ |
@@ -1742,21 +1780,14 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
1742 | Reg left = asm_fuseload(as, lref, RSET_GPR); | 1780 | Reg left = asm_fuseload(as, lref, RSET_GPR); |
1743 | emit_mrm(as, XO_MOVSXd, dest|REX_64, left); | 1781 | emit_mrm(as, XO_MOVSXd, dest|REX_64, left); |
1744 | } | 1782 | } |
1745 | #endif | ||
1746 | } else { | 1783 | } else { |
1747 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1784 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1748 | if (st64) { | 1785 | if (st64) { |
1749 | #if LJ_32 | ||
1750 | /* NYI: conversion from 64 bit integers. */ | ||
1751 | setintV(&as->J->errinfo, ir->o); | ||
1752 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1753 | #else | ||
1754 | Reg left = asm_fuseload(as, lref, RSET_GPR); | 1786 | Reg left = asm_fuseload(as, lref, RSET_GPR); |
1755 | /* This is either a 32 bit reg/reg mov which zeroes the hi-32 bits | 1787 | /* This is either a 32 bit reg/reg mov which zeroes the hiword |
1756 | ** or a load of the lower 32 bits from a 64 bit address. | 1788 | ** or a load of the loword from a 64 bit address. |
1757 | */ | 1789 | */ |
1758 | emit_mrm(as, XO_MOV, dest, left); | 1790 | emit_mrm(as, XO_MOV, dest, left); |
1759 | #endif | ||
1760 | } else { /* 32/32 bit no-op (cast). */ | 1791 | } else { /* 32/32 bit no-op (cast). */ |
1761 | ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */ | 1792 | ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */ |
1762 | } | 1793 | } |
@@ -1764,6 +1795,93 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
1764 | } | 1795 | } |
1765 | } | 1796 | } |
1766 | 1797 | ||
1798 | #if LJ_32 && LJ_HASFFI | ||
1799 | /* No SSE conversions to/from 64 bit on x86, so resort to ugly x87 code. */ | ||
1800 | |||
1801 | /* 64 bit integer to FP conversion in 32 bit mode. */ | ||
1802 | static void asm_conv_fp_int64(ASMState *as, IRIns *ir) | ||
1803 | { | ||
1804 | Reg hi = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1805 | Reg lo = ra_alloc1(as, (ir-1)->op1, rset_exclude(RSET_GPR, hi)); | ||
1806 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | ||
1807 | Reg dest = ir->r; | ||
1808 | if (ra_hasreg(dest)) { | ||
1809 | ra_free(as, dest); | ||
1810 | ra_modified(as, dest); | ||
1811 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, | ||
1812 | dest, RID_ESP, ofs); | ||
1813 | } | ||
1814 | emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, | ||
1815 | irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); | ||
1816 | if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { | ||
1817 | /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ | ||
1818 | MCLabel l_end = emit_label(as); | ||
1819 | emit_rma(as, XO_FADDq, XOg_FADDq, | ||
1820 | lj_ir_k64_find(as->J, U64x(43f00000,00000000))); | ||
1821 | emit_sjcc(as, CC_NS, l_end); | ||
1822 | emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ | ||
1823 | } else { | ||
1824 | lua_assert(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64); | ||
1825 | } | ||
1826 | emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0); | ||
1827 | /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */ | ||
1828 | emit_rmro(as, XO_MOVto, hi, RID_ESP, 4); | ||
1829 | emit_rmro(as, XO_MOVto, lo, RID_ESP, 0); | ||
1830 | } | ||
1831 | |||
1832 | /* FP to 64 bit integer conversion in 32 bit mode. */ | ||
1833 | static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | ||
1834 | { | ||
1835 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
1836 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
1837 | Reg lo, hi; | ||
1838 | lua_assert(st == IRT_NUM || st == IRT_FLOAT); | ||
1839 | lua_assert(dt == IRT_I64 || dt == IRT_U64); | ||
1840 | lua_assert(((ir-1)->op2 & IRCONV_TRUNC)); | ||
1841 | hi = ra_dest(as, ir, RSET_GPR); | ||
1842 | lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); | ||
1843 | if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); | ||
1844 | /* NYI: Avoid wide-to-narrow store-to-load forwarding stall. */ | ||
1845 | if (!(as->flags & JIT_F_SSE3)) { /* Set FPU rounding mode to default. */ | ||
1846 | emit_rmro(as, XO_FLDCW, XOg_FLDCW, RID_ESP, 4); | ||
1847 | emit_rmro(as, XO_MOVto, lo, RID_ESP, 4); | ||
1848 | emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff); | ||
1849 | } | ||
1850 | if (dt == IRT_U64) { | ||
1851 | /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ | ||
1852 | MCLabel l_pop, l_end = emit_label(as); | ||
1853 | emit_x87op(as, XI_FPOP); | ||
1854 | l_pop = emit_label(as); | ||
1855 | emit_sjmp(as, l_end); | ||
1856 | emit_rmro(as, XO_MOV, hi, RID_ESP, 4); | ||
1857 | if ((as->flags & JIT_F_SSE3)) | ||
1858 | emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); | ||
1859 | else | ||
1860 | emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); | ||
1861 | emit_rma(as, XO_FADDq, XOg_FADDq, | ||
1862 | lj_ir_k64_find(as->J, U64x(c3f00000,00000000))); | ||
1863 | emit_sjcc(as, CC_NS, l_pop); | ||
1864 | emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ | ||
1865 | } | ||
1866 | emit_rmro(as, XO_MOV, hi, RID_ESP, 4); | ||
1867 | if ((as->flags & JIT_F_SSE3)) { /* Truncation is easy with SSE3. */ | ||
1868 | emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); | ||
1869 | } else { /* Otherwise set FPU rounding mode to truncate before the store. */ | ||
1870 | emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); | ||
1871 | emit_rmro(as, XO_FLDCW, XOg_FLDCW, RID_ESP, 0); | ||
1872 | emit_rmro(as, XO_MOVtow, lo, RID_ESP, 0); | ||
1873 | emit_rmro(as, XO_ARITHw(XOg_OR), lo, RID_ESP, 0); | ||
1874 | emit_loadi(as, lo, 0xc00); | ||
1875 | emit_rmro(as, XO_FNSTCW, XOg_FNSTCW, RID_ESP, 0); | ||
1876 | } | ||
1877 | if (dt == IRT_U64) | ||
1878 | emit_x87op(as, XI_FDUP); | ||
1879 | emit_mrm(as, st == IRT_NUM ? XO_FLDq : XO_FLDd, | ||
1880 | st == IRT_NUM ? XOg_FLDq: XOg_FLDd, | ||
1881 | asm_fuseload(as, ir->op1, RSET_EMPTY)); | ||
1882 | } | ||
1883 | #endif | ||
1884 | |||
1767 | static void asm_strto(ASMState *as, IRIns *ir) | 1885 | static void asm_strto(ASMState *as, IRIns *ir) |
1768 | { | 1886 | { |
1769 | /* Force a spill slot for the destination register (if any). */ | 1887 | /* Force a spill slot for the destination register (if any). */ |
@@ -2644,6 +2762,18 @@ static void asm_powi(ASMState *as, IRIns *ir) | |||
2644 | ra_left(as, RID_EAX, ir->op2); | 2762 | ra_left(as, RID_EAX, ir->op2); |
2645 | } | 2763 | } |
2646 | 2764 | ||
2765 | #if LJ_64 && LJ_HASFFI | ||
2766 | static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id) | ||
2767 | { | ||
2768 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
2769 | IRRef args[2]; | ||
2770 | args[0] = ir->op1; | ||
2771 | args[1] = ir->op2; | ||
2772 | asm_setupresult(as, ir, ci); | ||
2773 | asm_gencall(as, ci, args); | ||
2774 | } | ||
2775 | #endif | ||
2776 | |||
2647 | /* Find out whether swapping operands might be beneficial. */ | 2777 | /* Find out whether swapping operands might be beneficial. */ |
2648 | static int swapops(ASMState *as, IRIns *ir) | 2778 | static int swapops(ASMState *as, IRIns *ir) |
2649 | { | 2779 | { |
@@ -2877,12 +3007,30 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | |||
2877 | /* -- Comparisons --------------------------------------------------------- */ | 3007 | /* -- Comparisons --------------------------------------------------------- */ |
2878 | 3008 | ||
2879 | /* Virtual flags for unordered FP comparisons. */ | 3009 | /* Virtual flags for unordered FP comparisons. */ |
2880 | #define VCC_U 0x100 /* Unordered. */ | 3010 | #define VCC_U 0x1000 /* Unordered. */ |
2881 | #define VCC_P 0x200 /* Needs extra CC_P branch. */ | 3011 | #define VCC_P 0x2000 /* Needs extra CC_P branch. */ |
2882 | #define VCC_S 0x400 /* Swap avoids CC_P branch. */ | 3012 | #define VCC_S 0x4000 /* Swap avoids CC_P branch. */ |
2883 | #define VCC_PS (VCC_P|VCC_S) | 3013 | #define VCC_PS (VCC_P|VCC_S) |
2884 | 3014 | ||
2885 | static void asm_comp_(ASMState *as, IRIns *ir, int cc) | 3015 | /* Map of comparisons to flags. ORDER IR. */ |
3016 | #define COMPFLAGS(ci, cin, cu, cf) ((ci)+((cu)<<4)+((cin)<<8)+(cf)) | ||
3017 | static const uint16_t asm_compmap[IR_ABC+1] = { | ||
3018 | /* signed non-eq unsigned flags */ | ||
3019 | /* LT */ COMPFLAGS(CC_GE, CC_G, CC_AE, VCC_PS), | ||
3020 | /* GE */ COMPFLAGS(CC_L, CC_L, CC_B, 0), | ||
3021 | /* LE */ COMPFLAGS(CC_G, CC_G, CC_A, VCC_PS), | ||
3022 | /* GT */ COMPFLAGS(CC_LE, CC_L, CC_BE, 0), | ||
3023 | /* ULT */ COMPFLAGS(CC_AE, CC_A, CC_AE, VCC_U), | ||
3024 | /* UGE */ COMPFLAGS(CC_B, CC_B, CC_B, VCC_U|VCC_PS), | ||
3025 | /* ULE */ COMPFLAGS(CC_A, CC_A, CC_A, VCC_U), | ||
3026 | /* UGT */ COMPFLAGS(CC_BE, CC_B, CC_BE, VCC_U|VCC_PS), | ||
3027 | /* EQ */ COMPFLAGS(CC_NE, CC_NE, CC_NE, VCC_P), | ||
3028 | /* NE */ COMPFLAGS(CC_E, CC_E, CC_E, VCC_U|VCC_P), | ||
3029 | /* ABC */ COMPFLAGS(CC_BE, CC_B, CC_BE, VCC_U|VCC_PS) /* Same as UGT. */ | ||
3030 | }; | ||
3031 | |||
3032 | /* FP and integer comparisons. */ | ||
3033 | static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | ||
2886 | { | 3034 | { |
2887 | if (irt_isnum(ir->t)) { | 3035 | if (irt_isnum(ir->t)) { |
2888 | IRRef lref = ir->op1; | 3036 | IRRef lref = ir->op1; |
@@ -3008,15 +3156,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
3008 | if (irl+1 == ir) /* Referencing previous ins? */ | 3156 | if (irl+1 == ir) /* Referencing previous ins? */ |
3009 | as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ | 3157 | as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ |
3010 | } else { | 3158 | } else { |
3011 | x86Op xo; | 3159 | emit_gmrmi(as, XG_ARITHi(XOg_CMP), r64 + left, imm); |
3012 | if (checki8(imm)) { | ||
3013 | emit_i8(as, imm); | ||
3014 | xo = XO_ARITHi8; | ||
3015 | } else { | ||
3016 | emit_i32(as, imm); | ||
3017 | xo = XO_ARITHi; | ||
3018 | } | ||
3019 | emit_mrm(as, xo, r64 + XOg_CMP, left); | ||
3020 | } | 3160 | } |
3021 | } | 3161 | } |
3022 | } else { | 3162 | } else { |
@@ -3028,8 +3168,133 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
3028 | } | 3168 | } |
3029 | } | 3169 | } |
3030 | 3170 | ||
3031 | #define asm_comp(as, ir, ci, cf, cu) \ | 3171 | #if LJ_32 && LJ_HASFFI |
3032 | asm_comp_(as, ir, (ci)+((cf)<<4)+(cu)) | 3172 | /* 64 bit integer comparisons in 32 bit mode. */ |
3173 | static void asm_comp_int64(ASMState *as, IRIns *ir) | ||
3174 | { | ||
3175 | uint32_t cc = asm_compmap[(ir-1)->o]; | ||
3176 | RegSet allow = RSET_GPR; | ||
3177 | Reg lefthi = RID_NONE, leftlo = RID_NONE; | ||
3178 | Reg righthi = RID_NONE, rightlo = RID_NONE; | ||
3179 | MCLabel l_around; | ||
3180 | x86ModRM mrm; | ||
3181 | |||
3182 | as->curins--; /* Skip loword ins. Avoids failing in noconflict(), too. */ | ||
3183 | |||
3184 | /* Allocate/fuse hiword operands. */ | ||
3185 | if (irref_isk(ir->op2)) { | ||
3186 | lefthi = asm_fuseload(as, ir->op1, allow); | ||
3187 | } else { | ||
3188 | lefthi = ra_alloc1(as, ir->op1, allow); | ||
3189 | righthi = asm_fuseload(as, ir->op2, allow); | ||
3190 | if (righthi == RID_MRM) { | ||
3191 | if (as->mrm.base != RID_NONE) rset_clear(allow, as->mrm.base); | ||
3192 | if (as->mrm.idx != RID_NONE) rset_clear(allow, as->mrm.idx); | ||
3193 | } else { | ||
3194 | rset_clear(allow, righthi); | ||
3195 | } | ||
3196 | } | ||
3197 | mrm = as->mrm; /* Save state for hiword instruction. */ | ||
3198 | |||
3199 | /* Allocate/fuse loword operands. */ | ||
3200 | if (irref_isk((ir-1)->op2)) { | ||
3201 | leftlo = asm_fuseload(as, (ir-1)->op1, allow); | ||
3202 | } else { | ||
3203 | leftlo = ra_alloc1(as, (ir-1)->op1, allow); | ||
3204 | rightlo = asm_fuseload(as, (ir-1)->op2, allow); | ||
3205 | if (rightlo == RID_MRM) { | ||
3206 | if (as->mrm.base != RID_NONE) rset_clear(allow, as->mrm.base); | ||
3207 | if (as->mrm.idx != RID_NONE) rset_clear(allow, as->mrm.idx); | ||
3208 | } else { | ||
3209 | rset_clear(allow, rightlo); | ||
3210 | } | ||
3211 | } | ||
3212 | |||
3213 | /* All register allocations must be performed _before_ this point. */ | ||
3214 | l_around = emit_label(as); | ||
3215 | as->invmcp = as->testmcp = NULL; /* Cannot use these optimizations. */ | ||
3216 | |||
3217 | /* Loword comparison and branch. */ | ||
3218 | asm_guardcc(as, cc >> 4); /* Always use unsigned compare for loword. */ | ||
3219 | if (ra_noreg(rightlo)) { | ||
3220 | int32_t imm = IR((ir-1)->op2)->i; | ||
3221 | if (imm == 0 && ((cc >> 4) & 0xa) != 0x2 && leftlo != RID_MRM) | ||
3222 | emit_rr(as, XO_TEST, leftlo, leftlo); | ||
3223 | else | ||
3224 | emit_gmrmi(as, XG_ARITHi(XOg_CMP), leftlo, imm); | ||
3225 | } else { | ||
3226 | emit_mrm(as, XO_CMP, leftlo, rightlo); | ||
3227 | } | ||
3228 | |||
3229 | /* Hiword comparison and branches. */ | ||
3230 | if ((cc & 15) != CC_NE) | ||
3231 | emit_sjcc(as, CC_NE, l_around); /* Hiword unequal: skip loword compare. */ | ||
3232 | if ((cc & 15) != CC_E) | ||
3233 | asm_guardcc(as, cc >> 8); /* Hiword compare without equality check. */ | ||
3234 | as->mrm = mrm; /* Restore state. */ | ||
3235 | if (ra_noreg(righthi)) { | ||
3236 | int32_t imm = IR(ir->op2)->i; | ||
3237 | if (imm == 0 && (cc & 0xa) != 0x2 && lefthi != RID_MRM) | ||
3238 | emit_rr(as, XO_TEST, lefthi, lefthi); | ||
3239 | else | ||
3240 | emit_gmrmi(as, XG_ARITHi(XOg_CMP), lefthi, imm); | ||
3241 | } else { | ||
3242 | emit_mrm(as, XO_CMP, lefthi, righthi); | ||
3243 | } | ||
3244 | } | ||
3245 | #endif | ||
3246 | |||
3247 | /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ | ||
3248 | |||
3249 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ | ||
3250 | static void asm_hiop(ASMState *as, IRIns *ir) | ||
3251 | { | ||
3252 | #if LJ_32 && LJ_HASFFI | ||
3253 | /* HIOP is marked as a store because it needs its own DCE logic. */ | ||
3254 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | ||
3255 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | ||
3256 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ | ||
3257 | if (usehi || uselo) { | ||
3258 | if (irt_isfp(ir->t)) | ||
3259 | asm_conv_fp_int64(as, ir); | ||
3260 | else | ||
3261 | asm_conv_int64_fp(as, ir); | ||
3262 | } | ||
3263 | as->curins--; /* Always skip the CONV. */ | ||
3264 | return; | ||
3265 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ | ||
3266 | asm_comp_int64(as, ir); | ||
3267 | return; | ||
3268 | } | ||
3269 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | ||
3270 | switch ((ir-1)->o) { | ||
3271 | case IR_ADD: | ||
3272 | asm_intarith(as, ir, uselo ? XOg_ADC : XOg_ADD); | ||
3273 | break; | ||
3274 | case IR_SUB: | ||
3275 | asm_intarith(as, ir, uselo ? XOg_SBB : XOg_SUB); | ||
3276 | break; | ||
3277 | case IR_NEG: { | ||
3278 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
3279 | emit_rr(as, XO_GROUP3, XOg_NEG, dest); | ||
3280 | if (uselo) { | ||
3281 | emit_i8(as, 0); | ||
3282 | emit_rr(as, XO_ARITHi8, XOg_ADC, dest); | ||
3283 | } | ||
3284 | ra_left(as, dest, ir->op1); | ||
3285 | break; | ||
3286 | } | ||
3287 | case IR_CALLN: | ||
3288 | ra_destreg(as, ir, RID_RETHI); | ||
3289 | if (!uselo) | ||
3290 | ra_allocref(as, ir->op1, RID2RSET(RID_RET)); /* Mark call as used. */ | ||
3291 | break; | ||
3292 | default: lua_assert(0); break; | ||
3293 | } | ||
3294 | #else | ||
3295 | UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on x64 or without FFI. */ | ||
3296 | #endif | ||
3297 | } | ||
3033 | 3298 | ||
3034 | /* -- Stack handling ------------------------------------------------------ */ | 3299 | /* -- Stack handling ------------------------------------------------------ */ |
3035 | 3300 | ||
@@ -3682,21 +3947,16 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
3682 | switch ((IROp)ir->o) { | 3947 | switch ((IROp)ir->o) { |
3683 | /* Miscellaneous ops. */ | 3948 | /* Miscellaneous ops. */ |
3684 | case IR_LOOP: asm_loop(as); break; | 3949 | case IR_LOOP: asm_loop(as); break; |
3685 | case IR_NOP: break; | 3950 | case IR_NOP: lua_assert(!ra_used(ir)); break; |
3686 | case IR_PHI: asm_phi(as, ir); break; | 3951 | case IR_PHI: asm_phi(as, ir); break; |
3952 | case IR_HIOP: asm_hiop(as, ir); break; | ||
3687 | 3953 | ||
3688 | /* Guarded assertions. */ | 3954 | /* Guarded assertions. */ |
3689 | case IR_LT: asm_comp(as, ir, CC_GE, CC_AE, VCC_PS); break; | 3955 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: |
3690 | case IR_GE: asm_comp(as, ir, CC_L, CC_B, 0); break; | 3956 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: |
3691 | case IR_LE: asm_comp(as, ir, CC_G, CC_A, VCC_PS); break; | 3957 | case IR_EQ: case IR_NE: case IR_ABC: |
3692 | case IR_GT: asm_comp(as, ir, CC_LE, CC_BE, 0); break; | 3958 | asm_comp(as, ir, asm_compmap[ir->o]); |
3693 | case IR_ULT: asm_comp(as, ir, CC_AE, CC_AE, VCC_U); break; | 3959 | break; |
3694 | case IR_UGE: asm_comp(as, ir, CC_B, CC_B, VCC_U|VCC_PS); break; | ||
3695 | case IR_ULE: asm_comp(as, ir, CC_A, CC_A, VCC_U); break; | ||
3696 | case IR_ABC: | ||
3697 | case IR_UGT: asm_comp(as, ir, CC_BE, CC_BE, VCC_U|VCC_PS); break; | ||
3698 | case IR_EQ: asm_comp(as, ir, CC_NE, CC_NE, VCC_P); break; | ||
3699 | case IR_NE: asm_comp(as, ir, CC_E, CC_E, VCC_U|VCC_P); break; | ||
3700 | 3960 | ||
3701 | case IR_RETF: asm_retf(as, ir); break; | 3961 | case IR_RETF: asm_retf(as, ir); break; |
3702 | 3962 | ||
@@ -3744,7 +4004,15 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
3744 | case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: | 4004 | case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: |
3745 | asm_fpmath(as, ir); | 4005 | asm_fpmath(as, ir); |
3746 | break; | 4006 | break; |
3747 | case IR_POWI: asm_powi(as, ir); break; | 4007 | case IR_POWI: |
4008 | #if LJ_64 && LJ_HASFFI | ||
4009 | if (!irt_isnum(ir->t)) | ||
4010 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
4011 | IRCALL_lj_carith_powu64); | ||
4012 | else | ||
4013 | #endif | ||
4014 | asm_powi(as, ir); | ||
4015 | break; | ||
3748 | 4016 | ||
3749 | /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ | 4017 | /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ |
3750 | case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; | 4018 | case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; |
@@ -3801,6 +4069,7 @@ static void asm_trace(ASMState *as) | |||
3801 | { | 4069 | { |
3802 | for (as->curins--; as->curins > as->stopins; as->curins--) { | 4070 | for (as->curins--; as->curins > as->stopins; as->curins--) { |
3803 | IRIns *ir = IR(as->curins); | 4071 | IRIns *ir = IR(as->curins); |
4072 | lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ | ||
3804 | if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) | 4073 | if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) |
3805 | continue; /* Dead-code elimination can be soooo easy. */ | 4074 | continue; /* Dead-code elimination can be soooo easy. */ |
3806 | if (irt_isguard(ir->t)) | 4075 | if (irt_isguard(ir->t)) |
@@ -3864,11 +4133,10 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
3864 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { | 4133 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { |
3865 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | 4134 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; |
3866 | #if LJ_64 | 4135 | #if LJ_64 |
3867 | /* NYI: add stack slots for x64 calls with many args. */ | ||
3868 | lua_assert(CCI_NARGS(ci) <= (LJ_ABI_WIN ? 4 : 6)); | 4136 | lua_assert(CCI_NARGS(ci) <= (LJ_ABI_WIN ? 4 : 6)); |
3869 | ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); | 4137 | ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); |
3870 | #else | 4138 | #else |
3871 | /* NYI: not fastcall-aware, but doesn't matter (yet). */ | 4139 | lua_assert(!(ci->flags & CCI_FASTCALL) || CCI_NARGS(ci) <= 2); |
3872 | if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */ | 4140 | if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */ |
3873 | as->evenspill = (int32_t)CCI_NARGS(ci); | 4141 | as->evenspill = (int32_t)CCI_NARGS(ci); |
3874 | ir->prev = REGSP_HINT(RID_RET); | 4142 | ir->prev = REGSP_HINT(RID_RET); |
@@ -3878,6 +4146,12 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
3878 | (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; | 4146 | (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; |
3879 | continue; | 4147 | continue; |
3880 | } | 4148 | } |
4149 | #if LJ_32 && LJ_HASFFI | ||
4150 | case IR_HIOP: | ||
4151 | if ((ir-1)->o == IR_CALLN) | ||
4152 | ir->prev = REGSP_HINT(RID_RETHI); | ||
4153 | break; | ||
4154 | #endif | ||
3881 | /* C calls evict all scratch regs and return results in RID_RET. */ | 4155 | /* C calls evict all scratch regs and return results in RID_RET. */ |
3882 | case IR_SNEW: case IR_NEWREF: | 4156 | case IR_SNEW: case IR_NEWREF: |
3883 | #if !LJ_64 | 4157 | #if !LJ_64 |
@@ -3894,6 +4168,14 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
3894 | as->modset = RSET_SCRATCH; | 4168 | as->modset = RSET_SCRATCH; |
3895 | break; | 4169 | break; |
3896 | case IR_POWI: | 4170 | case IR_POWI: |
4171 | #if LJ_64 && LJ_HASFFI | ||
4172 | if (!irt_isnum(ir->t)) { | ||
4173 | ir->prev = REGSP_HINT(RID_RET); | ||
4174 | if (inloop) | ||
4175 | as->modset |= (RSET_SCRATCH & RSET_GPR); | ||
4176 | continue; | ||
4177 | } | ||
4178 | #endif | ||
3897 | ir->prev = REGSP_HINT(RID_XMM0); | 4179 | ir->prev = REGSP_HINT(RID_XMM0); |
3898 | if (inloop) | 4180 | if (inloop) |
3899 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); | 4181 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); |