diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/lj_asm.c | 96 | ||||
-rw-r--r-- | src/lj_ir.h | 11 | ||||
-rw-r--r-- | src/lj_opt_fold.c | 22 | ||||
-rw-r--r-- | src/lj_target_x86.h | 5 |
4 files changed, 133 insertions, 1 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 687363e1..d75858f8 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -1654,6 +1654,99 @@ static void asm_toi64(ASMState *as, IRIns *ir) | |||
1654 | } | 1654 | } |
1655 | } | 1655 | } |
1656 | 1656 | ||
1657 | static void asm_conv(ASMState *as, IRIns *ir) | ||
1658 | { | ||
1659 | IRType st = (IRType)(ir->op2 & 0x1f); | ||
1660 | int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64)); | ||
1661 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); | ||
1662 | IRRef lref = ir->op1; | ||
1663 | lua_assert(irt_type(ir->t) != st); | ||
1664 | if (irt_isnum(ir->t) || irt_isfloat(ir->t)) { | ||
1665 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
1666 | if (stfp) { /* FP to FP conversion. */ | ||
1667 | Reg left = asm_fuseload(as, lref, RSET_FPR); | ||
1668 | emit_mrm(as, st == IRT_NUM ? XO_CVTSD2SS : XO_CVTSS2SD, dest, left); | ||
1669 | if (left == dest) return; /* Avoid the XO_XORPS. */ | ||
1670 | #if LJ_32 | ||
1671 | } else if (st >= IRT_U32) { | ||
1672 | /* NYI: 64 bit integer or uint32_t to number conversion. */ | ||
1673 | setintV(&as->J->errinfo, ir->o); | ||
1674 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1675 | return; | ||
1676 | #endif | ||
1677 | } else { /* Integer to FP conversion. */ | ||
1678 | Reg left = (LJ_64 && st == IRT_U32) ? ra_allocref(as, lref, RSET_GPR) : | ||
1679 | asm_fuseload(as, lref, RSET_GPR); | ||
1680 | emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, | ||
1681 | dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); | ||
1682 | } | ||
1683 | if (!(as->flags & JIT_F_SPLIT_XMM)) | ||
1684 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ | ||
1685 | } else if (stfp) { /* FP to integer conversion. */ | ||
1686 | if (irt_isguard(ir->t)) { | ||
1687 | lua_assert(!irt_is64(ir->t)); /* No support for checked 64 bit conv. */ | ||
1688 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | ||
1689 | #if LJ_32 | ||
1690 | } else if (irt_isi64(ir->t) || irt_isu64(ir->t) || irt_isu32(ir->t)) { | ||
1691 | /* NYI: number to 64 bit integer or uint32_t conversion. */ | ||
1692 | setintV(&as->J->errinfo, ir->o); | ||
1693 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1694 | #else | ||
1695 | } else if (irt_isu64(ir->t)) { | ||
1696 | /* NYI: number to uint64_t conversion. */ | ||
1697 | setintV(&as->J->errinfo, ir->o); | ||
1698 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1699 | #endif | ||
1700 | } else { | ||
1701 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1702 | Reg left = asm_fuseload(as, ir->op1, RSET_FPR); | ||
1703 | x86Op op = st == IRT_NUM ? | ||
1704 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) : | ||
1705 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI); | ||
1706 | if (LJ_64 && irt_isu32(ir->t)) | ||
1707 | emit_rr(as, XO_MOV, dest, dest); /* Zero upper 32 bits. */ | ||
1708 | emit_mrm(as, op, | ||
1709 | dest|((LJ_64 && | ||
1710 | (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), | ||
1711 | left); | ||
1712 | } | ||
1713 | } else { /* Integer to integer conversion. Only need 32/64 bit variants. */ | ||
1714 | if (irt_is64(ir->t)) { | ||
1715 | #if LJ_32 | ||
1716 | /* NYI: conversion to 64 bit integers. */ | ||
1717 | setintV(&as->J->errinfo, ir->o); | ||
1718 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1719 | #else | ||
1720 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1721 | if (st64 || !(ir->op2 & IRCONV_SEXT)) { | ||
1722 | /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ | ||
1723 | ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */ | ||
1724 | } else { /* 32 to 64 bit sign extension. */ | ||
1725 | Reg left = asm_fuseload(as, lref, RSET_GPR); | ||
1726 | emit_mrm(as, XO_MOVSXd, dest|REX_64, left); | ||
1727 | } | ||
1728 | #endif | ||
1729 | } else { | ||
1730 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1731 | if (st64) { | ||
1732 | #if LJ_32 | ||
1733 | /* NYI: conversion from 64 bit integers. */ | ||
1734 | setintV(&as->J->errinfo, ir->o); | ||
1735 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1736 | #else | ||
1737 | Reg left = asm_fuseload(as, lref, RSET_GPR); | ||
1738 | /* This is either a 32 bit reg/reg mov which zeroes the hi-32 bits | ||
1739 | ** or a load of the lower 32 bits from a 64 bit address. | ||
1740 | */ | ||
1741 | emit_mrm(as, XO_MOV, dest, left); | ||
1742 | #endif | ||
1743 | } else { /* 32/32 bit no-op (cast). */ | ||
1744 | ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */ | ||
1745 | } | ||
1746 | } | ||
1747 | } | ||
1748 | } | ||
1749 | |||
1657 | static void asm_strto(ASMState *as, IRIns *ir) | 1750 | static void asm_strto(ASMState *as, IRIns *ir) |
1658 | { | 1751 | { |
1659 | /* Force a spill slot for the destination register (if any). */ | 1752 | /* Force a spill slot for the destination register (if any). */ |
@@ -3666,6 +3759,7 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
3666 | break; | 3759 | break; |
3667 | case IR_TOBIT: asm_tobit(as, ir); break; | 3760 | case IR_TOBIT: asm_tobit(as, ir); break; |
3668 | case IR_TOI64: asm_toi64(as, ir); break; | 3761 | case IR_TOI64: asm_toi64(as, ir); break; |
3762 | case IR_CONV: asm_conv(as, ir); break; | ||
3669 | case IR_TOSTR: asm_tostr(as, ir); break; | 3763 | case IR_TOSTR: asm_tostr(as, ir); break; |
3670 | case IR_STRTO: asm_strto(as, ir); break; | 3764 | case IR_STRTO: asm_strto(as, ir); break; |
3671 | 3765 | ||
@@ -3808,7 +3902,7 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
3808 | } | 3902 | } |
3809 | break; | 3903 | break; |
3810 | /* Do not propagate hints across type conversions. */ | 3904 | /* Do not propagate hints across type conversions. */ |
3811 | case IR_TONUM: case IR_TOINT: case IR_TOBIT: | 3905 | case IR_CONV: case IR_TONUM: case IR_TOINT: case IR_TOBIT: |
3812 | break; | 3906 | break; |
3813 | default: | 3907 | default: |
3814 | /* Propagate hints across likely 'op reg, imm' or 'op reg'. */ | 3908 | /* Propagate hints across likely 'op reg, imm' or 'op reg'. */ |
diff --git a/src/lj_ir.h b/src/lj_ir.h index bdedaddd..8154949c 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
@@ -118,6 +118,7 @@ | |||
118 | _(OBAR, S , ref, ref) \ | 118 | _(OBAR, S , ref, ref) \ |
119 | \ | 119 | \ |
120 | /* Type conversions. */ \ | 120 | /* Type conversions. */ \ |
121 | _(CONV, N , ref, lit) \ | ||
121 | _(TONUM, N , ref, ___) \ | 122 | _(TONUM, N , ref, ___) \ |
122 | _(TOINT, N , ref, lit) \ | 123 | _(TOINT, N , ref, lit) \ |
123 | _(TOBIT, N , ref, ref) \ | 124 | _(TOBIT, N , ref, ref) \ |
@@ -218,6 +219,16 @@ IRFLDEF(FLENUM) | |||
218 | #define IRTOINT_TRUNCI64 5 /* Truncate number to int64_t. */ | 219 | #define IRTOINT_TRUNCI64 5 /* Truncate number to int64_t. */ |
219 | #define IRTOINT_TOBIT 6 /* Cache only: TOBIT conversion. */ | 220 | #define IRTOINT_TOBIT 6 /* Cache only: TOBIT conversion. */ |
220 | 221 | ||
222 | /* CONV mode, stored in op2. Lowest 8 bits is the IRType of the source. */ | ||
223 | #define IRCONV_TRUNC 0x100 /* Truncate number to integer. */ | ||
224 | #define IRCONV_SEXT 0x200 /* Sign-extend integer to integer. */ | ||
225 | #define IRCONV_CSH 10 | ||
226 | /* Number to integer conversion mode. Ordered by strength of the checks. */ | ||
227 | #define IRCONV_TOBIT (0<<IRCONV_CSH) /* None. Cache only: TOBIT conv. */ | ||
228 | #define IRCONV_ANY (1<<IRCONV_CSH) /* Any FP number is ok. */ | ||
229 | #define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */ | ||
230 | #define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */ | ||
231 | |||
221 | /* C call info for CALL* instructions. */ | 232 | /* C call info for CALL* instructions. */ |
222 | typedef struct CCallInfo { | 233 | typedef struct CCallInfo { |
223 | ASMFunction func; /* Function pointer. */ | 234 | ASMFunction func; /* Function pointer. */ |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index d2c20546..8f05e7c8 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
@@ -771,6 +771,28 @@ LJFOLDF(cse_toint) | |||
771 | return EMITFOLD; /* No fallthrough to regular CSE. */ | 771 | return EMITFOLD; /* No fallthrough to regular CSE. */ |
772 | } | 772 | } |
773 | 773 | ||
774 | /* Special CSE rule for CONV. */ | ||
775 | LJFOLD(CONV any any) | ||
776 | LJFOLDF(cse_conv) | ||
777 | { | ||
778 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
779 | IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK); | ||
780 | uint8_t guard = irt_isguard(fins->t); | ||
781 | IRRef ref = J->chain[IR_CONV]; | ||
782 | while (ref > op1) { | ||
783 | IRIns *ir = IR(ref); | ||
784 | /* CSE also depends on the target type! | ||
785 | ** OTOH commoning with stronger checks is ok, too. | ||
786 | */ | ||
787 | if (ir->op1 == op1 && irt_sametype(ir->t, fins->t) && | ||
788 | (ir->op2 & IRCONV_MODEMASK) == op2 && irt_isguard(ir->t) >= guard) | ||
789 | return ref; | ||
790 | ref = ir->prev; | ||
791 | } | ||
792 | } | ||
793 | return EMITFOLD; /* No fallthrough to regular CSE. */ | ||
794 | } | ||
795 | |||
774 | /* -- Strength reduction of widening -------------------------------------- */ | 796 | /* -- Strength reduction of widening -------------------------------------- */ |
775 | 797 | ||
776 | LJFOLD(TOI64 any 3) /* IRTOINT_ZEXT64 */ | 798 | LJFOLD(TOI64 any 3) /* IRTOINT_ZEXT64 */ |
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index bf58d67c..67590eb3 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
@@ -251,6 +251,11 @@ typedef enum { | |||
251 | XO_CVTSI2SD = XO_f20f(2a), | 251 | XO_CVTSI2SD = XO_f20f(2a), |
252 | XO_CVTSD2SI = XO_f20f(2d), | 252 | XO_CVTSD2SI = XO_f20f(2d), |
253 | XO_CVTTSD2SI= XO_f20f(2c), | 253 | XO_CVTTSD2SI= XO_f20f(2c), |
254 | XO_CVTSI2SS = XO_f30f(2a), | ||
255 | XO_CVTSS2SI = XO_f30f(2d), | ||
256 | XO_CVTTSS2SI= XO_f30f(2c), | ||
257 | XO_CVTSS2SD = XO_f30f(5a), | ||
258 | XO_CVTSD2SS = XO_f20f(5a), | ||
254 | XO_MOVD = XO_660f(6e), | 259 | XO_MOVD = XO_660f(6e), |
255 | XO_MOVDto = XO_660f(7e), | 260 | XO_MOVDto = XO_660f(7e), |
256 | 261 | ||