diff options
author | Mike Pall <mike> | 2011-10-25 19:50:44 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2011-10-25 19:50:44 +0200 |
commit | 179fe9e5d8a512cabcd9f09d4c02029d4fc244e9 (patch) | |
tree | 84d3836c70f5ac53c987ff652ba4c56493312a6f /src | |
parent | ca2b923026fb6ce2cf40bc2527d8234ef7263ea4 (diff) | |
download | luajit-179fe9e5d8a512cabcd9f09d4c02029d4fc244e9.tar.gz luajit-179fe9e5d8a512cabcd9f09d4c02029d4fc244e9.tar.bz2 luajit-179fe9e5d8a512cabcd9f09d4c02029d4fc244e9.zip |
PPC: Fuse BSWAP with XLOAD/XSTORE to lwbrx/stwbrx.
Diffstat (limited to 'src')
-rw-r--r-- | src/lj_asm_ppc.h | 57 | ||||
-rw-r--r-- | src/lj_target_ppc.h | 3 |
2 files changed, 46 insertions, 14 deletions
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 72e4c956..b2cf9f65 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h | |||
@@ -205,6 +205,22 @@ static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref, | |||
205 | emit_fai(as, pi, rt, base, ofs); | 205 | emit_fai(as, pi, rt, base, ofs); |
206 | } | 206 | } |
207 | 207 | ||
208 | /* Fuse XLOAD/XSTORE reference into indexed-only load/store operand. */ | ||
209 | static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, | ||
210 | RegSet allow) | ||
211 | { | ||
212 | IRIns *ira = IR(ref); | ||
213 | Reg right, left; | ||
214 | if (mayfuse(as, ref) && ira->o == IR_ADD && ra_noreg(ira->r)) { | ||
215 | left = ra_alloc2(as, ira, allow); | ||
216 | right = (left >> 8); left &= 255; | ||
217 | } else { | ||
218 | right = ra_alloc1(as, ref, allow); | ||
219 | left = RID_R0; | ||
220 | } | ||
221 | emit_tab(as, pi, rt, left, right); | ||
222 | } | ||
223 | |||
208 | /* Fuse to multiply-add/sub instruction. */ | 224 | /* Fuse to multiply-add/sub instruction. */ |
209 | static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) | 225 | static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) |
210 | { | 226 | { |
@@ -886,10 +902,17 @@ static void asm_xload(ASMState *as, IRIns *ir) | |||
886 | 902 | ||
887 | static void asm_xstore(ASMState *as, IRIns *ir) | 903 | static void asm_xstore(ASMState *as, IRIns *ir) |
888 | { | 904 | { |
889 | // NYI: fuse with bswap to stwbrx. | 905 | IRIns *irb; |
890 | Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | 906 | if (mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP && |
891 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | 907 | ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) { |
892 | rset_exclude(RSET_GPR, src)); | 908 | /* Fuse BSWAP with XSTORE to stwbrx. */ |
909 | Reg src = ra_alloc1(as, irb->op1, RSET_GPR); | ||
910 | asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); | ||
911 | } else { | ||
912 | Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | ||
913 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | ||
914 | rset_exclude(RSET_GPR, src)); | ||
915 | } | ||
893 | } | 916 | } |
894 | 917 | ||
895 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 918 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
@@ -1410,17 +1433,23 @@ nofuse: | |||
1410 | 1433 | ||
1411 | static void asm_bitswap(ASMState *as, IRIns *ir) | 1434 | static void asm_bitswap(ASMState *as, IRIns *ir) |
1412 | { | 1435 | { |
1413 | // NYI: fuse with XLOAD to lwbrx. | ||
1414 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1436 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1415 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | 1437 | IRIns *irx; |
1416 | Reg tmp = dest; | 1438 | if (mayfuse(as, ir->op1) && (irx = IR(ir->op1))->o == IR_XLOAD && |
1417 | if (tmp == left) { | 1439 | ra_noreg(irx->r) && (irt_isint(irx->t) || irt_isu32(irx->t))) { |
1418 | tmp = RID_TMP; | 1440 | /* Fuse BSWAP with XLOAD to lwbrx. */ |
1419 | emit_mr(as, dest, RID_TMP); | 1441 | asm_fusexrefx(as, PPCI_LWBRX, dest, irx->op1, RSET_GPR); |
1420 | } | 1442 | } else { |
1421 | emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 16, 23); | 1443 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); |
1422 | emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 0, 7); | 1444 | Reg tmp = dest; |
1423 | emit_rotlwi(as, tmp, left, 8); | 1445 | if (tmp == left) { |
1446 | tmp = RID_TMP; | ||
1447 | emit_mr(as, dest, RID_TMP); | ||
1448 | } | ||
1449 | emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 16, 23); | ||
1450 | emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 0, 7); | ||
1451 | emit_rotlwi(as, tmp, left, 8); | ||
1452 | } | ||
1424 | } | 1453 | } |
1425 | 1454 | ||
1426 | static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | 1455 | static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) |
diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h index d0b3f4d0..8abc38fd 100644 --- a/src/lj_target_ppc.h +++ b/src/lj_target_ppc.h | |||
@@ -240,6 +240,9 @@ typedef enum PPCIns { | |||
240 | PPCI_LHAX = 0x7c0002ae, | 240 | PPCI_LHAX = 0x7c0002ae, |
241 | PPCI_STHX = 0x7c00032e, | 241 | PPCI_STHX = 0x7c00032e, |
242 | 242 | ||
243 | PPCI_LWBRX = 0x7c00042c, | ||
244 | PPCI_STWBRX = 0x7c00052c, | ||
245 | |||
243 | PPCI_LFSX = 0x7c00042e, | 246 | PPCI_LFSX = 0x7c00042e, |
244 | PPCI_LFDX = 0x7c0004ae, | 247 | PPCI_LFDX = 0x7c0004ae, |
245 | PPCI_STFSX = 0x7c00052e, | 248 | PPCI_STFSX = 0x7c00052e, |