diff options
author | Mike Pall <mike> | 2011-02-02 02:29:37 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2011-02-02 02:29:37 +0100 |
commit | b613216efc7447dae645d8834e4d6f3185cd1bcc (patch) | |
tree | 0859fed377f00ebeada70ba45d02496b7fb4a249 /src/lj_asm.c | |
parent | c539c0cac8f668e66a5ce9e5fd645cb45e3c5063 (diff) | |
download | luajit-b613216efc7447dae645d8834e4d6f3185cd1bcc.tar.gz luajit-b613216efc7447dae645d8834e4d6f3185cd1bcc.tar.bz2 luajit-b613216efc7447dae645d8834e4d6f3185cd1bcc.zip |
Add SPLIT pass to split 64 bit IR instructions for 32 bit CPUs.
Add generic HIOP instruction for extra backend functionality.
Add support for HIOP to x86 backend.
Use POWI for 64 bit integer x^k, too.
POWI is lowered to a call by SPLIT or the x64 backend.
Diffstat (limited to 'src/lj_asm.c')
-rw-r--r-- | src/lj_asm.c | 444 |
1 files changed, 363 insertions, 81 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index cc2ae597..441700d4 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -347,6 +347,20 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | |||
347 | } | 347 | } |
348 | } | 348 | } |
349 | 349 | ||
350 | /* op rm/mrm, i */ | ||
351 | static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i) | ||
352 | { | ||
353 | x86Op xo; | ||
354 | if (checki8(i)) { | ||
355 | emit_i8(as, i); | ||
356 | xo = XG_TOXOi8(xg); | ||
357 | } else { | ||
358 | emit_i32(as, i); | ||
359 | xo = XG_TOXOi(xg); | ||
360 | } | ||
361 | emit_mrm(as, xo, (Reg)(xg & 7) | (rb & REX_64), (rb & ~REX_64)); | ||
362 | } | ||
363 | |||
350 | /* -- Emit moves ---------------------------------------------------------- */ | 364 | /* -- Emit moves ---------------------------------------------------------- */ |
351 | 365 | ||
352 | /* mov [base+ofs], i */ | 366 | /* mov [base+ofs], i */ |
@@ -371,7 +385,10 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) | |||
371 | /* mov r, i / xor r, r */ | 385 | /* mov r, i / xor r, r */ |
372 | static void emit_loadi(ASMState *as, Reg r, int32_t i) | 386 | static void emit_loadi(ASMState *as, Reg r, int32_t i) |
373 | { | 387 | { |
374 | if (i == 0) { | 388 | /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP. */ |
389 | if (i == 0 && !(LJ_32 && (IR(as->curins)->o == IR_HIOP || | ||
390 | (as->curins+1 < as->T->nins && | ||
391 | IR(as->curins+1)->o == IR_HIOP)))) { | ||
375 | emit_rr(as, XO_ARITH(XOg_XOR), r, r); | 392 | emit_rr(as, XO_ARITH(XOg_XOR), r, r); |
376 | } else { | 393 | } else { |
377 | MCode *p = as->mcp; | 394 | MCode *p = as->mcp; |
@@ -422,6 +439,19 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv) | |||
422 | /* Label for short jumps. */ | 439 | /* Label for short jumps. */ |
423 | typedef MCode *MCLabel; | 440 | typedef MCode *MCLabel; |
424 | 441 | ||
442 | #if LJ_32 && LJ_HASFFI | ||
443 | /* jmp short target */ | ||
444 | static void emit_sjmp(ASMState *as, MCLabel target) | ||
445 | { | ||
446 | MCode *p = as->mcp; | ||
447 | ptrdiff_t delta = target - p; | ||
448 | lua_assert(delta == (int8_t)delta); | ||
449 | p[-1] = (MCode)(int8_t)delta; | ||
450 | p[-2] = XI_JMPs; | ||
451 | as->mcp = p - 2; | ||
452 | } | ||
453 | #endif | ||
454 | |||
425 | /* jcc short target */ | 455 | /* jcc short target */ |
426 | static void emit_sjcc(ASMState *as, int cc, MCLabel target) | 456 | static void emit_sjcc(ASMState *as, int cc, MCLabel target) |
427 | { | 457 | { |
@@ -630,7 +660,7 @@ static Reg ra_rematk(ASMState *as, IRIns *ir) | |||
630 | } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */ | 660 | } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */ |
631 | lua_assert(irt_isnil(ir->t)); | 661 | lua_assert(irt_isnil(ir->t)); |
632 | emit_getgl(as, r, jit_L); | 662 | emit_getgl(as, r, jit_L); |
633 | #if LJ_64 /* NYI: 32 bit register pairs. */ | 663 | #if LJ_64 |
634 | } else if (ir->o == IR_KINT64) { | 664 | } else if (ir->o == IR_KINT64) { |
635 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 665 | emit_loadu64(as, r, ir_kint64(ir)->u64); |
636 | #endif | 666 | #endif |
@@ -681,8 +711,7 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref) | |||
681 | #if LJ_64 | 711 | #if LJ_64 |
682 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) | 712 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) |
683 | #else | 713 | #else |
684 | /* NYI: 32 bit register pairs. */ | 714 | #define REX_64IR(ir, r) (r) |
685 | #define REX_64IR(ir, r) check_exp(!irt_is64((ir)->t), (r)) | ||
686 | #endif | 715 | #endif |
687 | 716 | ||
688 | /* Generic move between two regs. */ | 717 | /* Generic move between two regs. */ |
@@ -939,7 +968,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) | |||
939 | emit_loadn(as, dest, tv); | 968 | emit_loadn(as, dest, tv); |
940 | return; | 969 | return; |
941 | } | 970 | } |
942 | #if LJ_64 /* NYI: 32 bit register pairs. */ | 971 | #if LJ_64 |
943 | } else if (ir->o == IR_KINT64) { | 972 | } else if (ir->o == IR_KINT64) { |
944 | emit_loadu64(as, dest, ir_kint64(ir)->u64); | 973 | emit_loadu64(as, dest, ir_kint64(ir)->u64); |
945 | return; | 974 | return; |
@@ -1463,7 +1492,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
1463 | #endif | 1492 | #endif |
1464 | if (r) { /* Argument is in a register. */ | 1493 | if (r) { /* Argument is in a register. */ |
1465 | if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { | 1494 | if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { |
1466 | #if LJ_64 /* NYI: 32 bit register pairs. */ | 1495 | #if LJ_64 |
1467 | if (ir->o == IR_KINT64) | 1496 | if (ir->o == IR_KINT64) |
1468 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 1497 | emit_loadu64(as, r, ir_kint64(ir)->u64); |
1469 | else | 1498 | else |
@@ -1519,7 +1548,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
1519 | ra_evictset(as, drop); /* Evictions must be performed first. */ | 1548 | ra_evictset(as, drop); /* Evictions must be performed first. */ |
1520 | if (ra_used(ir)) { | 1549 | if (ra_used(ir)) { |
1521 | if (irt_isfp(ir->t)) { | 1550 | if (irt_isfp(ir->t)) { |
1522 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | 1551 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ |
1523 | #if LJ_64 | 1552 | #if LJ_64 |
1524 | if ((ci->flags & CCI_CASTU64)) { | 1553 | if ((ci->flags & CCI_CASTU64)) { |
1525 | Reg dest = ir->r; | 1554 | Reg dest = ir->r; |
@@ -1632,19 +1661,24 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
1632 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); | 1661 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); |
1633 | IRRef lref = ir->op1; | 1662 | IRRef lref = ir->op1; |
1634 | lua_assert(irt_type(ir->t) != st); | 1663 | lua_assert(irt_type(ir->t) != st); |
1664 | lua_assert(!(LJ_32 && (irt_isint64(ir->t) || st64))); /* Handled by SPLIT. */ | ||
1635 | if (irt_isfp(ir->t)) { | 1665 | if (irt_isfp(ir->t)) { |
1636 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1666 | Reg dest = ra_dest(as, ir, RSET_FPR); |
1637 | if (stfp) { /* FP to FP conversion. */ | 1667 | if (stfp) { /* FP to FP conversion. */ |
1638 | Reg left = asm_fuseload(as, lref, RSET_FPR); | 1668 | Reg left = asm_fuseload(as, lref, RSET_FPR); |
1639 | emit_mrm(as, st == IRT_NUM ? XO_CVTSD2SS : XO_CVTSS2SD, dest, left); | 1669 | emit_mrm(as, st == IRT_NUM ? XO_CVTSD2SS : XO_CVTSS2SD, dest, left); |
1640 | if (left == dest) return; /* Avoid the XO_XORPS. */ | 1670 | if (left == dest) return; /* Avoid the XO_XORPS. */ |
1641 | #if LJ_32 | 1671 | } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */ |
1642 | } else if (st >= IRT_U32) { | 1672 | /* number = (2^52+2^51 .. u32) - (2^52+2^51) */ |
1643 | /* NYI: 64 bit integer or uint32_t to number conversion. */ | 1673 | cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000)); |
1644 | setintV(&as->J->errinfo, ir->o); | 1674 | Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); |
1645 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | 1675 | if (irt_isfloat(ir->t)) |
1676 | emit_rr(as, XO_CVTSD2SS, dest, dest); | ||
1677 | emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ | ||
1678 | emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ | ||
1679 | emit_loadn(as, bias, k); | ||
1680 | emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); | ||
1646 | return; | 1681 | return; |
1647 | #endif | ||
1648 | } else { /* Integer to FP conversion. */ | 1682 | } else { /* Integer to FP conversion. */ |
1649 | Reg left = (LJ_64 && (st == IRT_U32 || st == IRT_U64)) ? | 1683 | Reg left = (LJ_64 && (st == IRT_U32 || st == IRT_U64)) ? |
1650 | ra_alloc1(as, lref, RSET_GPR) : | 1684 | ra_alloc1(as, lref, RSET_GPR) : |
@@ -1663,41 +1697,47 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
1663 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ | 1697 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ |
1664 | } else if (stfp) { /* FP to integer conversion. */ | 1698 | } else if (stfp) { /* FP to integer conversion. */ |
1665 | if (irt_isguard(ir->t)) { | 1699 | if (irt_isguard(ir->t)) { |
1666 | lua_assert(!irt_is64(ir->t)); /* No support for checked 64 bit conv. */ | 1700 | /* Checked conversions are only supported from number to int. */ |
1701 | lua_assert(irt_isint(ir->t) && st == IRT_NUM); | ||
1667 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | 1702 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); |
1668 | #if LJ_32 | ||
1669 | } else if (irt_isi64(ir->t) || irt_isu64(ir->t) || irt_isu32(ir->t)) { | ||
1670 | /* NYI: number to 64 bit integer or uint32_t conversion. */ | ||
1671 | setintV(&as->J->errinfo, ir->o); | ||
1672 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1673 | #endif | ||
1674 | } else { | 1703 | } else { |
1675 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1704 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1676 | x86Op op = st == IRT_NUM ? | 1705 | x86Op op = st == IRT_NUM ? |
1677 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) : | 1706 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) : |
1678 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI); | 1707 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI); |
1679 | if (LJ_64 && irt_isu64(ir->t)) { | 1708 | if (LJ_32 && irt_isu32(ir->t)) { /* FP to U32 conversion on x86. */ |
1680 | const void *k = lj_ir_k64_find(as->J, U64x(c3f00000,00000000)); | 1709 | /* u32 = (int32_t)(number - 2^31) + 2^31 */ |
1681 | MCLabel l_end = emit_label(as); | 1710 | Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) : |
1682 | Reg left = IR(lref)->r; | 1711 | ra_scratch(as, RSET_FPR); |
1712 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); | ||
1713 | emit_rr(as, op, dest, tmp); | ||
1714 | if (st == IRT_NUM) | ||
1715 | emit_rma(as, XO_ADDSD, tmp, | ||
1716 | lj_ir_k64_find(as->J, U64x(c1e00000,00000000))); | ||
1717 | else | ||
1718 | emit_rma(as, XO_ADDSS, tmp, | ||
1719 | lj_ir_k64_find(as->J, U64x(00000000,cf000000))); | ||
1720 | ra_left(as, tmp, lref); | ||
1721 | } else if (LJ_64 && irt_isu64(ir->t)) { | ||
1683 | /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ | 1722 | /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ |
1684 | if (ra_hasreg(left)) { | 1723 | Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) : |
1685 | Reg tmpn = ra_scratch(as, rset_exclude(RSET_FPR, left)); | 1724 | ra_scratch(as, RSET_FPR); |
1686 | emit_rr(as, op, dest|REX_64, tmpn); | 1725 | MCLabel l_end = emit_label(as); |
1687 | emit_rr(as, st == IRT_NUM ? XO_ADDSD : XO_ADDSS, tmpn, left); | 1726 | emit_rr(as, op, dest|REX_64, tmp); |
1688 | emit_rma(as, st == IRT_NUM ? XMM_MOVRM(as) : XO_MOVSS, tmpn, k); | 1727 | if (st == IRT_NUM) |
1689 | } else { | 1728 | emit_rma(as, XO_ADDSD, tmp, |
1690 | left = ra_allocref(as, lref, RSET_FPR); | 1729 | lj_ir_k64_find(as->J, U64x(c3f00000,00000000))); |
1691 | emit_rr(as, op, dest|REX_64, left); | 1730 | else |
1692 | emit_rma(as, st == IRT_NUM ? XO_ADDSD : XO_ADDSS, left, k); | 1731 | emit_rma(as, XO_ADDSS, tmp, |
1693 | } | 1732 | lj_ir_k64_find(as->J, U64x(00000000,df800000))); |
1694 | emit_sjcc(as, CC_NS, l_end); | 1733 | emit_sjcc(as, CC_NS, l_end); |
1695 | emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest < 2^63. */ | 1734 | emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest < 2^63. */ |
1696 | emit_rr(as, op, dest|REX_64, left); | 1735 | emit_rr(as, op, dest|REX_64, tmp); |
1736 | ra_left(as, tmp, lref); | ||
1697 | } else { | 1737 | } else { |
1698 | Reg left = asm_fuseload(as, lref, RSET_FPR); | 1738 | Reg left = asm_fuseload(as, lref, RSET_FPR); |
1699 | if (LJ_64 && irt_isu32(ir->t)) | 1739 | if (LJ_64 && irt_isu32(ir->t)) |
1700 | emit_rr(as, XO_MOV, dest, dest); /* Zero upper 32 bits. */ | 1740 | emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ |
1701 | emit_mrm(as, op, | 1741 | emit_mrm(as, op, |
1702 | dest|((LJ_64 && | 1742 | dest|((LJ_64 && |
1703 | (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), | 1743 | (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), |
@@ -1728,12 +1768,10 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
1728 | emit_mrm(as, op, dest, left); | 1768 | emit_mrm(as, op, dest, left); |
1729 | } | 1769 | } |
1730 | } else { /* 32/64 bit integer conversions. */ | 1770 | } else { /* 32/64 bit integer conversions. */ |
1731 | if (irt_is64(ir->t)) { | 1771 | if (LJ_32) { /* Only need to handle 32/32 bit no-op (cast) on x86. */ |
1732 | #if LJ_32 | 1772 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1733 | /* NYI: conversion to 64 bit integers. */ | 1773 | ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */ |
1734 | setintV(&as->J->errinfo, ir->o); | 1774 | } else if (irt_is64(ir->t)) { |
1735 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1736 | #else | ||
1737 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1775 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1738 | if (st64 || !(ir->op2 & IRCONV_SEXT)) { | 1776 | if (st64 || !(ir->op2 & IRCONV_SEXT)) { |
1739 | /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ | 1777 | /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ |
@@ -1742,21 +1780,14 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
1742 | Reg left = asm_fuseload(as, lref, RSET_GPR); | 1780 | Reg left = asm_fuseload(as, lref, RSET_GPR); |
1743 | emit_mrm(as, XO_MOVSXd, dest|REX_64, left); | 1781 | emit_mrm(as, XO_MOVSXd, dest|REX_64, left); |
1744 | } | 1782 | } |
1745 | #endif | ||
1746 | } else { | 1783 | } else { |
1747 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1784 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1748 | if (st64) { | 1785 | if (st64) { |
1749 | #if LJ_32 | ||
1750 | /* NYI: conversion from 64 bit integers. */ | ||
1751 | setintV(&as->J->errinfo, ir->o); | ||
1752 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1753 | #else | ||
1754 | Reg left = asm_fuseload(as, lref, RSET_GPR); | 1786 | Reg left = asm_fuseload(as, lref, RSET_GPR); |
1755 | /* This is either a 32 bit reg/reg mov which zeroes the hi-32 bits | 1787 | /* This is either a 32 bit reg/reg mov which zeroes the hiword |
1756 | ** or a load of the lower 32 bits from a 64 bit address. | 1788 | ** or a load of the loword from a 64 bit address. |
1757 | */ | 1789 | */ |
1758 | emit_mrm(as, XO_MOV, dest, left); | 1790 | emit_mrm(as, XO_MOV, dest, left); |
1759 | #endif | ||
1760 | } else { /* 32/32 bit no-op (cast). */ | 1791 | } else { /* 32/32 bit no-op (cast). */ |
1761 | ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */ | 1792 | ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */ |
1762 | } | 1793 | } |
@@ -1764,6 +1795,93 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
1764 | } | 1795 | } |
1765 | } | 1796 | } |
1766 | 1797 | ||
1798 | #if LJ_32 && LJ_HASFFI | ||
1799 | /* No SSE conversions to/from 64 bit on x86, so resort to ugly x87 code. */ | ||
1800 | |||
1801 | /* 64 bit integer to FP conversion in 32 bit mode. */ | ||
1802 | static void asm_conv_fp_int64(ASMState *as, IRIns *ir) | ||
1803 | { | ||
1804 | Reg hi = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1805 | Reg lo = ra_alloc1(as, (ir-1)->op1, rset_exclude(RSET_GPR, hi)); | ||
1806 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | ||
1807 | Reg dest = ir->r; | ||
1808 | if (ra_hasreg(dest)) { | ||
1809 | ra_free(as, dest); | ||
1810 | ra_modified(as, dest); | ||
1811 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, | ||
1812 | dest, RID_ESP, ofs); | ||
1813 | } | ||
1814 | emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, | ||
1815 | irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); | ||
1816 | if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { | ||
1817 | /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ | ||
1818 | MCLabel l_end = emit_label(as); | ||
1819 | emit_rma(as, XO_FADDq, XOg_FADDq, | ||
1820 | lj_ir_k64_find(as->J, U64x(43f00000,00000000))); | ||
1821 | emit_sjcc(as, CC_NS, l_end); | ||
1822 | emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ | ||
1823 | } else { | ||
1824 | lua_assert(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64); | ||
1825 | } | ||
1826 | emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0); | ||
1827 | /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */ | ||
1828 | emit_rmro(as, XO_MOVto, hi, RID_ESP, 4); | ||
1829 | emit_rmro(as, XO_MOVto, lo, RID_ESP, 0); | ||
1830 | } | ||
1831 | |||
1832 | /* FP to 64 bit integer conversion in 32 bit mode. */ | ||
1833 | static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | ||
1834 | { | ||
1835 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
1836 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
1837 | Reg lo, hi; | ||
1838 | lua_assert(st == IRT_NUM || st == IRT_FLOAT); | ||
1839 | lua_assert(dt == IRT_I64 || dt == IRT_U64); | ||
1840 | lua_assert(((ir-1)->op2 & IRCONV_TRUNC)); | ||
1841 | hi = ra_dest(as, ir, RSET_GPR); | ||
1842 | lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); | ||
1843 | if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); | ||
1844 | /* NYI: Avoid wide-to-narrow store-to-load forwarding stall. */ | ||
1845 | if (!(as->flags & JIT_F_SSE3)) { /* Set FPU rounding mode to default. */ | ||
1846 | emit_rmro(as, XO_FLDCW, XOg_FLDCW, RID_ESP, 4); | ||
1847 | emit_rmro(as, XO_MOVto, lo, RID_ESP, 4); | ||
1848 | emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff); | ||
1849 | } | ||
1850 | if (dt == IRT_U64) { | ||
1851 | /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ | ||
1852 | MCLabel l_pop, l_end = emit_label(as); | ||
1853 | emit_x87op(as, XI_FPOP); | ||
1854 | l_pop = emit_label(as); | ||
1855 | emit_sjmp(as, l_end); | ||
1856 | emit_rmro(as, XO_MOV, hi, RID_ESP, 4); | ||
1857 | if ((as->flags & JIT_F_SSE3)) | ||
1858 | emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); | ||
1859 | else | ||
1860 | emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); | ||
1861 | emit_rma(as, XO_FADDq, XOg_FADDq, | ||
1862 | lj_ir_k64_find(as->J, U64x(c3f00000,00000000))); | ||
1863 | emit_sjcc(as, CC_NS, l_pop); | ||
1864 | emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ | ||
1865 | } | ||
1866 | emit_rmro(as, XO_MOV, hi, RID_ESP, 4); | ||
1867 | if ((as->flags & JIT_F_SSE3)) { /* Truncation is easy with SSE3. */ | ||
1868 | emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); | ||
1869 | } else { /* Otherwise set FPU rounding mode to truncate before the store. */ | ||
1870 | emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); | ||
1871 | emit_rmro(as, XO_FLDCW, XOg_FLDCW, RID_ESP, 0); | ||
1872 | emit_rmro(as, XO_MOVtow, lo, RID_ESP, 0); | ||
1873 | emit_rmro(as, XO_ARITHw(XOg_OR), lo, RID_ESP, 0); | ||
1874 | emit_loadi(as, lo, 0xc00); | ||
1875 | emit_rmro(as, XO_FNSTCW, XOg_FNSTCW, RID_ESP, 0); | ||
1876 | } | ||
1877 | if (dt == IRT_U64) | ||
1878 | emit_x87op(as, XI_FDUP); | ||
1879 | emit_mrm(as, st == IRT_NUM ? XO_FLDq : XO_FLDd, | ||
1880 | st == IRT_NUM ? XOg_FLDq: XOg_FLDd, | ||
1881 | asm_fuseload(as, ir->op1, RSET_EMPTY)); | ||
1882 | } | ||
1883 | #endif | ||
1884 | |||
1767 | static void asm_strto(ASMState *as, IRIns *ir) | 1885 | static void asm_strto(ASMState *as, IRIns *ir) |
1768 | { | 1886 | { |
1769 | /* Force a spill slot for the destination register (if any). */ | 1887 | /* Force a spill slot for the destination register (if any). */ |
@@ -2644,6 +2762,18 @@ static void asm_powi(ASMState *as, IRIns *ir) | |||
2644 | ra_left(as, RID_EAX, ir->op2); | 2762 | ra_left(as, RID_EAX, ir->op2); |
2645 | } | 2763 | } |
2646 | 2764 | ||
2765 | #if LJ_64 && LJ_HASFFI | ||
2766 | static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id) | ||
2767 | { | ||
2768 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
2769 | IRRef args[2]; | ||
2770 | args[0] = ir->op1; | ||
2771 | args[1] = ir->op2; | ||
2772 | asm_setupresult(as, ir, ci); | ||
2773 | asm_gencall(as, ci, args); | ||
2774 | } | ||
2775 | #endif | ||
2776 | |||
2647 | /* Find out whether swapping operands might be beneficial. */ | 2777 | /* Find out whether swapping operands might be beneficial. */ |
2648 | static int swapops(ASMState *as, IRIns *ir) | 2778 | static int swapops(ASMState *as, IRIns *ir) |
2649 | { | 2779 | { |
@@ -2877,12 +3007,30 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | |||
2877 | /* -- Comparisons --------------------------------------------------------- */ | 3007 | /* -- Comparisons --------------------------------------------------------- */ |
2878 | 3008 | ||
2879 | /* Virtual flags for unordered FP comparisons. */ | 3009 | /* Virtual flags for unordered FP comparisons. */ |
2880 | #define VCC_U 0x100 /* Unordered. */ | 3010 | #define VCC_U 0x1000 /* Unordered. */ |
2881 | #define VCC_P 0x200 /* Needs extra CC_P branch. */ | 3011 | #define VCC_P 0x2000 /* Needs extra CC_P branch. */ |
2882 | #define VCC_S 0x400 /* Swap avoids CC_P branch. */ | 3012 | #define VCC_S 0x4000 /* Swap avoids CC_P branch. */ |
2883 | #define VCC_PS (VCC_P|VCC_S) | 3013 | #define VCC_PS (VCC_P|VCC_S) |
2884 | 3014 | ||
2885 | static void asm_comp_(ASMState *as, IRIns *ir, int cc) | 3015 | /* Map of comparisons to flags. ORDER IR. */ |
3016 | #define COMPFLAGS(ci, cin, cu, cf) ((ci)+((cu)<<4)+((cin)<<8)+(cf)) | ||
3017 | static const uint16_t asm_compmap[IR_ABC+1] = { | ||
3018 | /* signed non-eq unsigned flags */ | ||
3019 | /* LT */ COMPFLAGS(CC_GE, CC_G, CC_AE, VCC_PS), | ||
3020 | /* GE */ COMPFLAGS(CC_L, CC_L, CC_B, 0), | ||
3021 | /* LE */ COMPFLAGS(CC_G, CC_G, CC_A, VCC_PS), | ||
3022 | /* GT */ COMPFLAGS(CC_LE, CC_L, CC_BE, 0), | ||
3023 | /* ULT */ COMPFLAGS(CC_AE, CC_A, CC_AE, VCC_U), | ||
3024 | /* UGE */ COMPFLAGS(CC_B, CC_B, CC_B, VCC_U|VCC_PS), | ||
3025 | /* ULE */ COMPFLAGS(CC_A, CC_A, CC_A, VCC_U), | ||
3026 | /* UGT */ COMPFLAGS(CC_BE, CC_B, CC_BE, VCC_U|VCC_PS), | ||
3027 | /* EQ */ COMPFLAGS(CC_NE, CC_NE, CC_NE, VCC_P), | ||
3028 | /* NE */ COMPFLAGS(CC_E, CC_E, CC_E, VCC_U|VCC_P), | ||
3029 | /* ABC */ COMPFLAGS(CC_BE, CC_B, CC_BE, VCC_U|VCC_PS) /* Same as UGT. */ | ||
3030 | }; | ||
3031 | |||
3032 | /* FP and integer comparisons. */ | ||
3033 | static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | ||
2886 | { | 3034 | { |
2887 | if (irt_isnum(ir->t)) { | 3035 | if (irt_isnum(ir->t)) { |
2888 | IRRef lref = ir->op1; | 3036 | IRRef lref = ir->op1; |
@@ -3008,15 +3156,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
3008 | if (irl+1 == ir) /* Referencing previous ins? */ | 3156 | if (irl+1 == ir) /* Referencing previous ins? */ |
3009 | as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ | 3157 | as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ |
3010 | } else { | 3158 | } else { |
3011 | x86Op xo; | 3159 | emit_gmrmi(as, XG_ARITHi(XOg_CMP), r64 + left, imm); |
3012 | if (checki8(imm)) { | ||
3013 | emit_i8(as, imm); | ||
3014 | xo = XO_ARITHi8; | ||
3015 | } else { | ||
3016 | emit_i32(as, imm); | ||
3017 | xo = XO_ARITHi; | ||
3018 | } | ||
3019 | emit_mrm(as, xo, r64 + XOg_CMP, left); | ||
3020 | } | 3160 | } |
3021 | } | 3161 | } |
3022 | } else { | 3162 | } else { |
@@ -3028,8 +3168,133 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
3028 | } | 3168 | } |
3029 | } | 3169 | } |
3030 | 3170 | ||
3031 | #define asm_comp(as, ir, ci, cf, cu) \ | 3171 | #if LJ_32 && LJ_HASFFI |
3032 | asm_comp_(as, ir, (ci)+((cf)<<4)+(cu)) | 3172 | /* 64 bit integer comparisons in 32 bit mode. */ |
3173 | static void asm_comp_int64(ASMState *as, IRIns *ir) | ||
3174 | { | ||
3175 | uint32_t cc = asm_compmap[(ir-1)->o]; | ||
3176 | RegSet allow = RSET_GPR; | ||
3177 | Reg lefthi = RID_NONE, leftlo = RID_NONE; | ||
3178 | Reg righthi = RID_NONE, rightlo = RID_NONE; | ||
3179 | MCLabel l_around; | ||
3180 | x86ModRM mrm; | ||
3181 | |||
3182 | as->curins--; /* Skip loword ins. Avoids failing in noconflict(), too. */ | ||
3183 | |||
3184 | /* Allocate/fuse hiword operands. */ | ||
3185 | if (irref_isk(ir->op2)) { | ||
3186 | lefthi = asm_fuseload(as, ir->op1, allow); | ||
3187 | } else { | ||
3188 | lefthi = ra_alloc1(as, ir->op1, allow); | ||
3189 | righthi = asm_fuseload(as, ir->op2, allow); | ||
3190 | if (righthi == RID_MRM) { | ||
3191 | if (as->mrm.base != RID_NONE) rset_clear(allow, as->mrm.base); | ||
3192 | if (as->mrm.idx != RID_NONE) rset_clear(allow, as->mrm.idx); | ||
3193 | } else { | ||
3194 | rset_clear(allow, righthi); | ||
3195 | } | ||
3196 | } | ||
3197 | mrm = as->mrm; /* Save state for hiword instruction. */ | ||
3198 | |||
3199 | /* Allocate/fuse loword operands. */ | ||
3200 | if (irref_isk((ir-1)->op2)) { | ||
3201 | leftlo = asm_fuseload(as, (ir-1)->op1, allow); | ||
3202 | } else { | ||
3203 | leftlo = ra_alloc1(as, (ir-1)->op1, allow); | ||
3204 | rightlo = asm_fuseload(as, (ir-1)->op2, allow); | ||
3205 | if (rightlo == RID_MRM) { | ||
3206 | if (as->mrm.base != RID_NONE) rset_clear(allow, as->mrm.base); | ||
3207 | if (as->mrm.idx != RID_NONE) rset_clear(allow, as->mrm.idx); | ||
3208 | } else { | ||
3209 | rset_clear(allow, rightlo); | ||
3210 | } | ||
3211 | } | ||
3212 | |||
3213 | /* All register allocations must be performed _before_ this point. */ | ||
3214 | l_around = emit_label(as); | ||
3215 | as->invmcp = as->testmcp = NULL; /* Cannot use these optimizations. */ | ||
3216 | |||
3217 | /* Loword comparison and branch. */ | ||
3218 | asm_guardcc(as, cc >> 4); /* Always use unsigned compare for loword. */ | ||
3219 | if (ra_noreg(rightlo)) { | ||
3220 | int32_t imm = IR((ir-1)->op2)->i; | ||
3221 | if (imm == 0 && ((cc >> 4) & 0xa) != 0x2 && leftlo != RID_MRM) | ||
3222 | emit_rr(as, XO_TEST, leftlo, leftlo); | ||
3223 | else | ||
3224 | emit_gmrmi(as, XG_ARITHi(XOg_CMP), leftlo, imm); | ||
3225 | } else { | ||
3226 | emit_mrm(as, XO_CMP, leftlo, rightlo); | ||
3227 | } | ||
3228 | |||
3229 | /* Hiword comparison and branches. */ | ||
3230 | if ((cc & 15) != CC_NE) | ||
3231 | emit_sjcc(as, CC_NE, l_around); /* Hiword unequal: skip loword compare. */ | ||
3232 | if ((cc & 15) != CC_E) | ||
3233 | asm_guardcc(as, cc >> 8); /* Hiword compare without equality check. */ | ||
3234 | as->mrm = mrm; /* Restore state. */ | ||
3235 | if (ra_noreg(righthi)) { | ||
3236 | int32_t imm = IR(ir->op2)->i; | ||
3237 | if (imm == 0 && (cc & 0xa) != 0x2 && lefthi != RID_MRM) | ||
3238 | emit_rr(as, XO_TEST, lefthi, lefthi); | ||
3239 | else | ||
3240 | emit_gmrmi(as, XG_ARITHi(XOg_CMP), lefthi, imm); | ||
3241 | } else { | ||
3242 | emit_mrm(as, XO_CMP, lefthi, righthi); | ||
3243 | } | ||
3244 | } | ||
3245 | #endif | ||
3246 | |||
3247 | /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ | ||
3248 | |||
3249 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ | ||
3250 | static void asm_hiop(ASMState *as, IRIns *ir) | ||
3251 | { | ||
3252 | #if LJ_32 && LJ_HASFFI | ||
3253 | /* HIOP is marked as a store because it needs its own DCE logic. */ | ||
3254 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | ||
3255 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | ||
3256 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ | ||
3257 | if (usehi || uselo) { | ||
3258 | if (irt_isfp(ir->t)) | ||
3259 | asm_conv_fp_int64(as, ir); | ||
3260 | else | ||
3261 | asm_conv_int64_fp(as, ir); | ||
3262 | } | ||
3263 | as->curins--; /* Always skip the CONV. */ | ||
3264 | return; | ||
3265 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ | ||
3266 | asm_comp_int64(as, ir); | ||
3267 | return; | ||
3268 | } | ||
3269 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | ||
3270 | switch ((ir-1)->o) { | ||
3271 | case IR_ADD: | ||
3272 | asm_intarith(as, ir, uselo ? XOg_ADC : XOg_ADD); | ||
3273 | break; | ||
3274 | case IR_SUB: | ||
3275 | asm_intarith(as, ir, uselo ? XOg_SBB : XOg_SUB); | ||
3276 | break; | ||
3277 | case IR_NEG: { | ||
3278 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
3279 | emit_rr(as, XO_GROUP3, XOg_NEG, dest); | ||
3280 | if (uselo) { | ||
3281 | emit_i8(as, 0); | ||
3282 | emit_rr(as, XO_ARITHi8, XOg_ADC, dest); | ||
3283 | } | ||
3284 | ra_left(as, dest, ir->op1); | ||
3285 | break; | ||
3286 | } | ||
3287 | case IR_CALLN: | ||
3288 | ra_destreg(as, ir, RID_RETHI); | ||
3289 | if (!uselo) | ||
3290 | ra_allocref(as, ir->op1, RID2RSET(RID_RET)); /* Mark call as used. */ | ||
3291 | break; | ||
3292 | default: lua_assert(0); break; | ||
3293 | } | ||
3294 | #else | ||
3295 | UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on x64 or without FFI. */ | ||
3296 | #endif | ||
3297 | } | ||
3033 | 3298 | ||
3034 | /* -- Stack handling ------------------------------------------------------ */ | 3299 | /* -- Stack handling ------------------------------------------------------ */ |
3035 | 3300 | ||
@@ -3682,21 +3947,16 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
3682 | switch ((IROp)ir->o) { | 3947 | switch ((IROp)ir->o) { |
3683 | /* Miscellaneous ops. */ | 3948 | /* Miscellaneous ops. */ |
3684 | case IR_LOOP: asm_loop(as); break; | 3949 | case IR_LOOP: asm_loop(as); break; |
3685 | case IR_NOP: break; | 3950 | case IR_NOP: lua_assert(!ra_used(ir)); break; |
3686 | case IR_PHI: asm_phi(as, ir); break; | 3951 | case IR_PHI: asm_phi(as, ir); break; |
3952 | case IR_HIOP: asm_hiop(as, ir); break; | ||
3687 | 3953 | ||
3688 | /* Guarded assertions. */ | 3954 | /* Guarded assertions. */ |
3689 | case IR_LT: asm_comp(as, ir, CC_GE, CC_AE, VCC_PS); break; | 3955 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: |
3690 | case IR_GE: asm_comp(as, ir, CC_L, CC_B, 0); break; | 3956 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: |
3691 | case IR_LE: asm_comp(as, ir, CC_G, CC_A, VCC_PS); break; | 3957 | case IR_EQ: case IR_NE: case IR_ABC: |
3692 | case IR_GT: asm_comp(as, ir, CC_LE, CC_BE, 0); break; | 3958 | asm_comp(as, ir, asm_compmap[ir->o]); |
3693 | case IR_ULT: asm_comp(as, ir, CC_AE, CC_AE, VCC_U); break; | 3959 | break; |
3694 | case IR_UGE: asm_comp(as, ir, CC_B, CC_B, VCC_U|VCC_PS); break; | ||
3695 | case IR_ULE: asm_comp(as, ir, CC_A, CC_A, VCC_U); break; | ||
3696 | case IR_ABC: | ||
3697 | case IR_UGT: asm_comp(as, ir, CC_BE, CC_BE, VCC_U|VCC_PS); break; | ||
3698 | case IR_EQ: asm_comp(as, ir, CC_NE, CC_NE, VCC_P); break; | ||
3699 | case IR_NE: asm_comp(as, ir, CC_E, CC_E, VCC_U|VCC_P); break; | ||
3700 | 3960 | ||
3701 | case IR_RETF: asm_retf(as, ir); break; | 3961 | case IR_RETF: asm_retf(as, ir); break; |
3702 | 3962 | ||
@@ -3744,7 +4004,15 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
3744 | case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: | 4004 | case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: |
3745 | asm_fpmath(as, ir); | 4005 | asm_fpmath(as, ir); |
3746 | break; | 4006 | break; |
3747 | case IR_POWI: asm_powi(as, ir); break; | 4007 | case IR_POWI: |
4008 | #if LJ_64 && LJ_HASFFI | ||
4009 | if (!irt_isnum(ir->t)) | ||
4010 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
4011 | IRCALL_lj_carith_powu64); | ||
4012 | else | ||
4013 | #endif | ||
4014 | asm_powi(as, ir); | ||
4015 | break; | ||
3748 | 4016 | ||
3749 | /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ | 4017 | /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ |
3750 | case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; | 4018 | case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; |
@@ -3801,6 +4069,7 @@ static void asm_trace(ASMState *as) | |||
3801 | { | 4069 | { |
3802 | for (as->curins--; as->curins > as->stopins; as->curins--) { | 4070 | for (as->curins--; as->curins > as->stopins; as->curins--) { |
3803 | IRIns *ir = IR(as->curins); | 4071 | IRIns *ir = IR(as->curins); |
4072 | lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ | ||
3804 | if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) | 4073 | if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) |
3805 | continue; /* Dead-code elimination can be soooo easy. */ | 4074 | continue; /* Dead-code elimination can be soooo easy. */ |
3806 | if (irt_isguard(ir->t)) | 4075 | if (irt_isguard(ir->t)) |
@@ -3864,11 +4133,10 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
3864 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { | 4133 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { |
3865 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | 4134 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; |
3866 | #if LJ_64 | 4135 | #if LJ_64 |
3867 | /* NYI: add stack slots for x64 calls with many args. */ | ||
3868 | lua_assert(CCI_NARGS(ci) <= (LJ_ABI_WIN ? 4 : 6)); | 4136 | lua_assert(CCI_NARGS(ci) <= (LJ_ABI_WIN ? 4 : 6)); |
3869 | ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); | 4137 | ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); |
3870 | #else | 4138 | #else |
3871 | /* NYI: not fastcall-aware, but doesn't matter (yet). */ | 4139 | lua_assert(!(ci->flags & CCI_FASTCALL) || CCI_NARGS(ci) <= 2); |
3872 | if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */ | 4140 | if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */ |
3873 | as->evenspill = (int32_t)CCI_NARGS(ci); | 4141 | as->evenspill = (int32_t)CCI_NARGS(ci); |
3874 | ir->prev = REGSP_HINT(RID_RET); | 4142 | ir->prev = REGSP_HINT(RID_RET); |
@@ -3878,6 +4146,12 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
3878 | (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; | 4146 | (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; |
3879 | continue; | 4147 | continue; |
3880 | } | 4148 | } |
4149 | #if LJ_32 && LJ_HASFFI | ||
4150 | case IR_HIOP: | ||
4151 | if ((ir-1)->o == IR_CALLN) | ||
4152 | ir->prev = REGSP_HINT(RID_RETHI); | ||
4153 | break; | ||
4154 | #endif | ||
3881 | /* C calls evict all scratch regs and return results in RID_RET. */ | 4155 | /* C calls evict all scratch regs and return results in RID_RET. */ |
3882 | case IR_SNEW: case IR_NEWREF: | 4156 | case IR_SNEW: case IR_NEWREF: |
3883 | #if !LJ_64 | 4157 | #if !LJ_64 |
@@ -3894,6 +4168,14 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
3894 | as->modset = RSET_SCRATCH; | 4168 | as->modset = RSET_SCRATCH; |
3895 | break; | 4169 | break; |
3896 | case IR_POWI: | 4170 | case IR_POWI: |
4171 | #if LJ_64 && LJ_HASFFI | ||
4172 | if (!irt_isnum(ir->t)) { | ||
4173 | ir->prev = REGSP_HINT(RID_RET); | ||
4174 | if (inloop) | ||
4175 | as->modset |= (RSET_SCRATCH & RSET_GPR); | ||
4176 | continue; | ||
4177 | } | ||
4178 | #endif | ||
3897 | ir->prev = REGSP_HINT(RID_XMM0); | 4179 | ir->prev = REGSP_HINT(RID_XMM0); |
3898 | if (inloop) | 4180 | if (inloop) |
3899 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); | 4181 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); |