diff options
| author | Mike Pall <mike> | 2011-10-25 19:50:44 +0200 |
|---|---|---|
| committer | Mike Pall <mike> | 2011-10-25 19:50:44 +0200 |
| commit | 179fe9e5d8a512cabcd9f09d4c02029d4fc244e9 (patch) | |
| tree | 84d3836c70f5ac53c987ff652ba4c56493312a6f /src | |
| parent | ca2b923026fb6ce2cf40bc2527d8234ef7263ea4 (diff) | |
| download | luajit-179fe9e5d8a512cabcd9f09d4c02029d4fc244e9.tar.gz luajit-179fe9e5d8a512cabcd9f09d4c02029d4fc244e9.tar.bz2 luajit-179fe9e5d8a512cabcd9f09d4c02029d4fc244e9.zip | |
PPC: Fuse BSWAP with XLOAD/XSTORE to lwbrx/stwbrx.
Diffstat (limited to 'src')
| -rw-r--r-- | src/lj_asm_ppc.h | 57 | ||||
| -rw-r--r-- | src/lj_target_ppc.h | 3 |
2 files changed, 46 insertions, 14 deletions
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 72e4c956..b2cf9f65 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h | |||
| @@ -205,6 +205,22 @@ static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref, | |||
| 205 | emit_fai(as, pi, rt, base, ofs); | 205 | emit_fai(as, pi, rt, base, ofs); |
| 206 | } | 206 | } |
| 207 | 207 | ||
| 208 | /* Fuse XLOAD/XSTORE reference into indexed-only load/store operand. */ | ||
| 209 | static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, | ||
| 210 | RegSet allow) | ||
| 211 | { | ||
| 212 | IRIns *ira = IR(ref); | ||
| 213 | Reg right, left; | ||
| 214 | if (mayfuse(as, ref) && ira->o == IR_ADD && ra_noreg(ira->r)) { | ||
| 215 | left = ra_alloc2(as, ira, allow); | ||
| 216 | right = (left >> 8); left &= 255; | ||
| 217 | } else { | ||
| 218 | right = ra_alloc1(as, ref, allow); | ||
| 219 | left = RID_R0; | ||
| 220 | } | ||
| 221 | emit_tab(as, pi, rt, left, right); | ||
| 222 | } | ||
| 223 | |||
| 208 | /* Fuse to multiply-add/sub instruction. */ | 224 | /* Fuse to multiply-add/sub instruction. */ |
| 209 | static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) | 225 | static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) |
| 210 | { | 226 | { |
| @@ -886,10 +902,17 @@ static void asm_xload(ASMState *as, IRIns *ir) | |||
| 886 | 902 | ||
| 887 | static void asm_xstore(ASMState *as, IRIns *ir) | 903 | static void asm_xstore(ASMState *as, IRIns *ir) |
| 888 | { | 904 | { |
| 889 | // NYI: fuse with bswap to stwbrx. | 905 | IRIns *irb; |
| 890 | Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | 906 | if (mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP && |
| 891 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | 907 | ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) { |
| 892 | rset_exclude(RSET_GPR, src)); | 908 | /* Fuse BSWAP with XSTORE to stwbrx. */ |
| 909 | Reg src = ra_alloc1(as, irb->op1, RSET_GPR); | ||
| 910 | asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); | ||
| 911 | } else { | ||
| 912 | Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | ||
| 913 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | ||
| 914 | rset_exclude(RSET_GPR, src)); | ||
| 915 | } | ||
| 893 | } | 916 | } |
| 894 | 917 | ||
| 895 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 918 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
| @@ -1410,17 +1433,23 @@ nofuse: | |||
| 1410 | 1433 | ||
| 1411 | static void asm_bitswap(ASMState *as, IRIns *ir) | 1434 | static void asm_bitswap(ASMState *as, IRIns *ir) |
| 1412 | { | 1435 | { |
| 1413 | // NYI: fuse with XLOAD to lwbrx. | ||
| 1414 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1436 | Reg dest = ra_dest(as, ir, RSET_GPR); |
| 1415 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | 1437 | IRIns *irx; |
| 1416 | Reg tmp = dest; | 1438 | if (mayfuse(as, ir->op1) && (irx = IR(ir->op1))->o == IR_XLOAD && |
| 1417 | if (tmp == left) { | 1439 | ra_noreg(irx->r) && (irt_isint(irx->t) || irt_isu32(irx->t))) { |
| 1418 | tmp = RID_TMP; | 1440 | /* Fuse BSWAP with XLOAD to lwbrx. */ |
| 1419 | emit_mr(as, dest, RID_TMP); | 1441 | asm_fusexrefx(as, PPCI_LWBRX, dest, irx->op1, RSET_GPR); |
| 1420 | } | 1442 | } else { |
| 1421 | emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 16, 23); | 1443 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); |
| 1422 | emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 0, 7); | 1444 | Reg tmp = dest; |
| 1423 | emit_rotlwi(as, tmp, left, 8); | 1445 | if (tmp == left) { |
| 1446 | tmp = RID_TMP; | ||
| 1447 | emit_mr(as, dest, RID_TMP); | ||
| 1448 | } | ||
| 1449 | emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 16, 23); | ||
| 1450 | emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 0, 7); | ||
| 1451 | emit_rotlwi(as, tmp, left, 8); | ||
| 1452 | } | ||
| 1424 | } | 1453 | } |
| 1425 | 1454 | ||
| 1426 | static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | 1455 | static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) |
diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h index d0b3f4d0..8abc38fd 100644 --- a/src/lj_target_ppc.h +++ b/src/lj_target_ppc.h | |||
| @@ -240,6 +240,9 @@ typedef enum PPCIns { | |||
| 240 | PPCI_LHAX = 0x7c0002ae, | 240 | PPCI_LHAX = 0x7c0002ae, |
| 241 | PPCI_STHX = 0x7c00032e, | 241 | PPCI_STHX = 0x7c00032e, |
| 242 | 242 | ||
| 243 | PPCI_LWBRX = 0x7c00042c, | ||
| 244 | PPCI_STWBRX = 0x7c00052c, | ||
| 245 | |||
| 243 | PPCI_LFSX = 0x7c00042e, | 246 | PPCI_LFSX = 0x7c00042e, |
| 244 | PPCI_LFDX = 0x7c0004ae, | 247 | PPCI_LFDX = 0x7c0004ae, |
| 245 | PPCI_STFSX = 0x7c00052e, | 248 | PPCI_STFSX = 0x7c00052e, |
