aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2011-10-25 21:29:58 +0200
committerMike Pall <mike>2011-10-25 21:29:58 +0200
commit1d190c99a2547b44deb8f5e483452d9f51925fb2 (patch)
tree5c1c0d28243573cb933ab376606a26f68dfd30d0
parent455917fa0a0b9db21936a4f68994ea6b1134d027 (diff)
downloadluajit-1d190c99a2547b44deb8f5e483452d9f51925fb2.tar.gz
luajit-1d190c99a2547b44deb8f5e483452d9f51925fb2.tar.bz2
luajit-1d190c99a2547b44deb8f5e483452d9f51925fb2.zip
PPC: Fuse BAND with shift to rlwinm.
-rw-r--r--src/lj_asm_ppc.h50
1 files changed, 42 insertions, 8 deletions
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index b2cf9f65..166cf2e4 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -1478,9 +1478,40 @@ static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1478 emit_asb(as, pi, dest, left, right); 1478 emit_asb(as, pi, dest, left, right);
1479} 1479}
1480 1480
1481/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
1482static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
1483{
1484 IRIns *ir;
1485 Reg left;
1486 if (mayfuse(as, ref) && (ir = IR(ref), ra_noreg(ir->r)) &&
1487 irref_isk(ir->op2)) {
1488 int32_t sh = (IR(ir->op2)->i & 31);
1489 switch (ir->o) {
1490 case IR_BSHL:
1491 if ((mask & ((1u<<sh)-1))) goto nofuse;
1492 break;
1493 case IR_BSHR:
1494 if ((mask & ~((~0u)>>sh))) goto nofuse;
1495 sh = ((32-sh)&31);
1496 break;
1497 case IR_BROL:
1498 break;
1499 default:
1500 goto nofuse;
1501 }
1502 left = ra_alloc1(as, ir->op1, RSET_GPR);
1503 *--as->mcp = pi | PPCF_T(left) | PPCF_B(sh);
1504 return;
1505 }
1506nofuse:
1507 left = ra_alloc1(as, ref, RSET_GPR);
1508 *--as->mcp = pi | PPCF_T(left);
1509}
1510
1481static void asm_bitand(ASMState *as, IRIns *ir) 1511static void asm_bitand(ASMState *as, IRIns *ir)
1482{ 1512{
1483 Reg dest, left, right; 1513 Reg dest, left, right;
1514 IRRef lref = ir->op1;
1484 PPCIns dot = 0; 1515 PPCIns dot = 0;
1485 IRRef op2; 1516 IRRef op2;
1486 if (as->flagmcp == as->mcp) { 1517 if (as->flagmcp == as->mcp) {
@@ -1489,48 +1520,51 @@ static void asm_bitand(ASMState *as, IRIns *ir)
1489 dot = PPCF_DOT; 1520 dot = PPCF_DOT;
1490 } 1521 }
1491 dest = ra_dest(as, ir, RSET_GPR); 1522 dest = ra_dest(as, ir, RSET_GPR);
1492 left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1493 if (irref_isk(ir->op2)) { 1523 if (irref_isk(ir->op2)) {
1494 int32_t k = IR(ir->op2)->i; 1524 int32_t k = IR(ir->op2)->i;
1495 if (k) { 1525 if (k) {
1496 // NYI: fuse with shifts/rotates. 1526 /* First check for a contiguous bitmask as used by rlwinm. */
1497 uint32_t s1 = lj_ffs((uint32_t)k); 1527 uint32_t s1 = lj_ffs((uint32_t)k);
1498 uint32_t k1 = ((uint32_t)k >> s1); 1528 uint32_t k1 = ((uint32_t)k >> s1);
1499 if ((k1 & (k1+1)) == 0) { 1529 if ((k1 & (k1+1)) == 0) {
1500 emit_rot(as, PPCI_RLWINM|dot, dest, left, 0, 1530 asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) |
1501 31-lj_fls((uint32_t)k), 31-s1); 1531 PPCF_MB(31-lj_fls((uint32_t)k)) | PPCF_ME(31-s1),
1532 k, lref);
1502 return; 1533 return;
1503 } 1534 }
1504 if (~(uint32_t)k) { 1535 if (~(uint32_t)k) {
1505 uint32_t s2 = lj_ffs(~(uint32_t)k); 1536 uint32_t s2 = lj_ffs(~(uint32_t)k);
1506 uint32_t k2 = (~(uint32_t)k >> s2); 1537 uint32_t k2 = (~(uint32_t)k >> s2);
1507 if ((k2 & (k2+1)) == 0) { 1538 if ((k2 & (k2+1)) == 0) {
1508 emit_rot(as, PPCI_RLWINM|dot, dest, left, 0, 1539 asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) |
1509 32-s2, 30-lj_fls(~(uint32_t)k)); 1540 PPCF_MB(32-s2) | PPCF_ME(30-lj_fls(~(uint32_t)k)),
1541 k, lref);
1510 return; 1542 return;
1511 } 1543 }
1512 } 1544 }
1513 } 1545 }
1514 if (checku16(k)) { 1546 if (checku16(k)) {
1547 left = ra_alloc1(as, lref, RSET_GPR);
1515 emit_asi(as, PPCI_ANDIDOT, dest, left, k); 1548 emit_asi(as, PPCI_ANDIDOT, dest, left, k);
1516 return; 1549 return;
1517 } else if ((k & 0xffff) == 0) { 1550 } else if ((k & 0xffff) == 0) {
1551 left = ra_alloc1(as, lref, RSET_GPR);
1518 emit_asi(as, PPCI_ANDISDOT, dest, left, (k >> 16)); 1552 emit_asi(as, PPCI_ANDISDOT, dest, left, (k >> 16));
1519 return; 1553 return;
1520 } 1554 }
1521 } 1555 }
1522 op2 = ir->op2; 1556 op2 = ir->op2;
1523 if (mayfuse(as, op2) && IR(op2)->o == IR_BNOT) { 1557 if (mayfuse(as, op2) && IR(op2)->o == IR_BNOT && ra_noreg(IR(op2)->r)) {
1524 dot ^= (PPCI_AND ^ PPCI_ANDC); 1558 dot ^= (PPCI_AND ^ PPCI_ANDC);
1525 op2 = IR(op2)->op1; 1559 op2 = IR(op2)->op1;
1526 } 1560 }
1561 left = ra_hintalloc(as, lref, dest, RSET_GPR);
1527 right = ra_alloc1(as, op2, rset_exclude(RSET_GPR, left)); 1562 right = ra_alloc1(as, op2, rset_exclude(RSET_GPR, left));
1528 emit_asb(as, PPCI_AND ^ dot, dest, left, right); 1563 emit_asb(as, PPCI_AND ^ dot, dest, left, right);
1529} 1564}
1530 1565
1531static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) 1566static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1532{ 1567{
1533 // NYI: fuse with IR_BAND.
1534 Reg dest, left; 1568 Reg dest, left;
1535 Reg dot = 0; 1569 Reg dot = 0;
1536 if (as->flagmcp == as->mcp) { 1570 if (as->flagmcp == as->mcp) {