aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2011-10-25 19:50:44 +0200
committerMike Pall <mike>2011-10-25 19:50:44 +0200
commit179fe9e5d8a512cabcd9f09d4c02029d4fc244e9 (patch)
tree84d3836c70f5ac53c987ff652ba4c56493312a6f
parentca2b923026fb6ce2cf40bc2527d8234ef7263ea4 (diff)
downloadluajit-179fe9e5d8a512cabcd9f09d4c02029d4fc244e9.tar.gz
luajit-179fe9e5d8a512cabcd9f09d4c02029d4fc244e9.tar.bz2
luajit-179fe9e5d8a512cabcd9f09d4c02029d4fc244e9.zip
PPC: Fuse BSWAP with XLOAD/XSTORE to lwbrx/stwbrx.
-rw-r--r--src/lj_asm_ppc.h57
-rw-r--r--src/lj_target_ppc.h3
2 files changed, 46 insertions, 14 deletions
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index 72e4c956..b2cf9f65 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -205,6 +205,22 @@ static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
205 emit_fai(as, pi, rt, base, ofs); 205 emit_fai(as, pi, rt, base, ofs);
206} 206}
207 207
208/* Fuse XLOAD/XSTORE reference into indexed-only load/store operand. */
209static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
210 RegSet allow)
211{
212 IRIns *ira = IR(ref);
213 Reg right, left;
214 if (mayfuse(as, ref) && ira->o == IR_ADD && ra_noreg(ira->r)) {
215 left = ra_alloc2(as, ira, allow);
216 right = (left >> 8); left &= 255;
217 } else {
218 right = ra_alloc1(as, ref, allow);
219 left = RID_R0;
220 }
221 emit_tab(as, pi, rt, left, right);
222}
223
208/* Fuse to multiply-add/sub instruction. */ 224/* Fuse to multiply-add/sub instruction. */
209static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) 225static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
210{ 226{
@@ -886,10 +902,17 @@ static void asm_xload(ASMState *as, IRIns *ir)
886 902
887static void asm_xstore(ASMState *as, IRIns *ir) 903static void asm_xstore(ASMState *as, IRIns *ir)
888{ 904{
889 // NYI: fuse with bswap to stwbrx. 905 IRIns *irb;
890 Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 906 if (mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP &&
891 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, 907 ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) {
892 rset_exclude(RSET_GPR, src)); 908 /* Fuse BSWAP with XSTORE to stwbrx. */
909 Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
910 asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src));
911 } else {
912 Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
913 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
914 rset_exclude(RSET_GPR, src));
915 }
893} 916}
894 917
895static void asm_ahuvload(ASMState *as, IRIns *ir) 918static void asm_ahuvload(ASMState *as, IRIns *ir)
@@ -1410,17 +1433,23 @@ nofuse:
1410 1433
1411static void asm_bitswap(ASMState *as, IRIns *ir) 1434static void asm_bitswap(ASMState *as, IRIns *ir)
1412{ 1435{
1413 // NYI: fuse with XLOAD to lwbrx.
1414 Reg dest = ra_dest(as, ir, RSET_GPR); 1436 Reg dest = ra_dest(as, ir, RSET_GPR);
1415 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1437 IRIns *irx;
1416 Reg tmp = dest; 1438 if (mayfuse(as, ir->op1) && (irx = IR(ir->op1))->o == IR_XLOAD &&
1417 if (tmp == left) { 1439 ra_noreg(irx->r) && (irt_isint(irx->t) || irt_isu32(irx->t))) {
1418 tmp = RID_TMP; 1440 /* Fuse BSWAP with XLOAD to lwbrx. */
1419 emit_mr(as, dest, RID_TMP); 1441 asm_fusexrefx(as, PPCI_LWBRX, dest, irx->op1, RSET_GPR);
1420 } 1442 } else {
1421 emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 16, 23); 1443 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1422 emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 0, 7); 1444 Reg tmp = dest;
1423 emit_rotlwi(as, tmp, left, 8); 1445 if (tmp == left) {
1446 tmp = RID_TMP;
1447 emit_mr(as, dest, RID_TMP);
1448 }
1449 emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 16, 23);
1450 emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 0, 7);
1451 emit_rotlwi(as, tmp, left, 8);
1452 }
1424} 1453}
1425 1454
1426static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) 1455static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h
index d0b3f4d0..8abc38fd 100644
--- a/src/lj_target_ppc.h
+++ b/src/lj_target_ppc.h
@@ -240,6 +240,9 @@ typedef enum PPCIns {
240 PPCI_LHAX = 0x7c0002ae, 240 PPCI_LHAX = 0x7c0002ae,
241 PPCI_STHX = 0x7c00032e, 241 PPCI_STHX = 0x7c00032e,
242 242
243 PPCI_LWBRX = 0x7c00042c,
244 PPCI_STWBRX = 0x7c00052c,
245
243 PPCI_LFSX = 0x7c00042e, 246 PPCI_LFSX = 0x7c00042e,
244 PPCI_LFDX = 0x7c0004ae, 247 PPCI_LFDX = 0x7c0004ae,
245 PPCI_STFSX = 0x7c00052e, 248 PPCI_STFSX = 0x7c00052e,