diff options
author | Mike Pall <mike> | 2012-11-13 19:20:52 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2012-11-13 19:20:52 +0100 |
commit | 1cd13f6b338ebfecd32807e07f27bab94cd5db44 (patch) | |
tree | 0bb87e673a2d6c429de47180c8cc63bfff7b74f4 | |
parent | 87d74a8f3d8f5a53fc7ad1fd45adcc06db4bcde8 (diff) | |
download | luajit-1cd13f6b338ebfecd32807e07f27bab94cd5db44.tar.gz luajit-1cd13f6b338ebfecd32807e07f27bab94cd5db44.tar.bz2 luajit-1cd13f6b338ebfecd32807e07f27bab94cd5db44.zip |
x64: Don't fuse implicitly 32-to-64 extended operands.
-rw-r--r-- | src/lj_asm_x86.h | 24 |
1 files changed, 18 insertions, 6 deletions
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 6d7dd5a3..1222284d 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
@@ -367,6 +367,18 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
367 | return ra_allocref(as, ref, allow); | 367 | return ra_allocref(as, ref, allow); |
368 | } | 368 | } |
369 | 369 | ||
370 | #if LJ_64 | ||
371 | /* Don't fuse a 32 bit load into a 64 bit operation. */ | ||
372 | static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64) | ||
373 | { | ||
374 | if (is64 && !irt_is64(IR(ref)->t)) | ||
375 | return ra_alloc1(as, ref, allow); | ||
376 | return asm_fuseload(as, ref, allow); | ||
377 | } | ||
378 | #else | ||
379 | #define asm_fuseloadm(as, ref, allow, is64) asm_fuseload(as, (ref), (allow)) | ||
380 | #endif | ||
381 | |||
370 | /* -- Calls --------------------------------------------------------------- */ | 382 | /* -- Calls --------------------------------------------------------------- */ |
371 | 383 | ||
372 | /* Count the required number of stack slots for a call. */ | 384 | /* Count the required number of stack slots for a call. */ |
@@ -696,7 +708,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
696 | } else { /* Integer to FP conversion. */ | 708 | } else { /* Integer to FP conversion. */ |
697 | Reg left = (LJ_64 && (st == IRT_U32 || st == IRT_U64)) ? | 709 | Reg left = (LJ_64 && (st == IRT_U32 || st == IRT_U64)) ? |
698 | ra_alloc1(as, lref, RSET_GPR) : | 710 | ra_alloc1(as, lref, RSET_GPR) : |
699 | asm_fuseload(as, lref, RSET_GPR); | 711 | asm_fuseloadm(as, lref, RSET_GPR, st64); |
700 | if (LJ_64 && st == IRT_U64) { | 712 | if (LJ_64 && st == IRT_U64) { |
701 | MCLabel l_end = emit_label(as); | 713 | MCLabel l_end = emit_label(as); |
702 | const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000)); | 714 | const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000)); |
@@ -1829,7 +1841,7 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa) | |||
1829 | if (asm_swapops(as, ir)) { | 1841 | if (asm_swapops(as, ir)) { |
1830 | IRRef tmp = lref; lref = rref; rref = tmp; | 1842 | IRRef tmp = lref; lref = rref; rref = tmp; |
1831 | } | 1843 | } |
1832 | right = asm_fuseload(as, rref, rset_clear(allow, dest)); | 1844 | right = asm_fuseloadm(as, rref, rset_clear(allow, dest), irt_is64(ir->t)); |
1833 | } | 1845 | } |
1834 | if (irt_isguard(ir->t)) /* For IR_ADDOV etc. */ | 1846 | if (irt_isguard(ir->t)) /* For IR_ADDOV etc. */ |
1835 | asm_guardcc(as, CC_O); | 1847 | asm_guardcc(as, CC_O); |
@@ -1842,7 +1854,7 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa) | |||
1842 | emit_mrm(as, XO_IMUL, REX_64IR(ir, dest), right); | 1854 | emit_mrm(as, XO_IMUL, REX_64IR(ir, dest), right); |
1843 | } else { /* IMUL r, r, k. */ | 1855 | } else { /* IMUL r, r, k. */ |
1844 | /* NYI: use lea/shl/add/sub (FOLD only does 2^k) depending on CPU. */ | 1856 | /* NYI: use lea/shl/add/sub (FOLD only does 2^k) depending on CPU. */ |
1845 | Reg left = asm_fuseload(as, lref, RSET_GPR); | 1857 | Reg left = asm_fuseloadm(as, lref, RSET_GPR, irt_is64(ir->t)); |
1846 | x86Op xo; | 1858 | x86Op xo; |
1847 | if (checki8(k)) { emit_i8(as, k); xo = XO_IMULi8; | 1859 | if (checki8(k)) { emit_i8(as, k); xo = XO_IMULi8; |
1848 | } else { emit_i32(as, k); xo = XO_IMULi; } | 1860 | } else { emit_i32(as, k); xo = XO_IMULi; } |
@@ -2109,7 +2121,7 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | |||
2109 | } | 2121 | } |
2110 | } | 2122 | } |
2111 | as->curins--; /* Skip to BAND to avoid failing in noconflict(). */ | 2123 | as->curins--; /* Skip to BAND to avoid failing in noconflict(). */ |
2112 | right = asm_fuseload(as, irl->op1, allow); | 2124 | right = asm_fuseloadm(as, irl->op1, allow, r64); |
2113 | as->curins++; /* Undo the above. */ | 2125 | as->curins++; /* Undo the above. */ |
2114 | test_nofuse: | 2126 | test_nofuse: |
2115 | asm_guardcc(as, cc); | 2127 | asm_guardcc(as, cc); |
@@ -2146,7 +2158,7 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | |||
2146 | return; | 2158 | return; |
2147 | } /* Otherwise handle register case as usual. */ | 2159 | } /* Otherwise handle register case as usual. */ |
2148 | } else { | 2160 | } else { |
2149 | left = asm_fuseload(as, lref, RSET_GPR); | 2161 | left = asm_fuseloadm(as, lref, RSET_GPR, r64); |
2150 | } | 2162 | } |
2151 | asm_guardcc(as, cc); | 2163 | asm_guardcc(as, cc); |
2152 | if (usetest && left != RID_MRM) { | 2164 | if (usetest && left != RID_MRM) { |
@@ -2160,7 +2172,7 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | |||
2160 | } | 2172 | } |
2161 | } else { | 2173 | } else { |
2162 | Reg left = ra_alloc1(as, lref, RSET_GPR); | 2174 | Reg left = ra_alloc1(as, lref, RSET_GPR); |
2163 | Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left)); | 2175 | Reg right = asm_fuseloadm(as, rref, rset_exclude(RSET_GPR, left), r64); |
2164 | asm_guardcc(as, cc); | 2176 | asm_guardcc(as, cc); |
2165 | emit_mrm(as, XO_CMP, r64 + left, right); | 2177 | emit_mrm(as, XO_CMP, r64 + left, right); |
2166 | } | 2178 | } |