diff options
author | Mike Pall <mike> | 2011-10-25 21:29:58 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2011-10-25 21:29:58 +0200 |
commit | 1d190c99a2547b44deb8f5e483452d9f51925fb2 (patch) | |
tree | 5c1c0d28243573cb933ab376606a26f68dfd30d0 | |
parent | 455917fa0a0b9db21936a4f68994ea6b1134d027 (diff) | |
download | luajit-1d190c99a2547b44deb8f5e483452d9f51925fb2.tar.gz luajit-1d190c99a2547b44deb8f5e483452d9f51925fb2.tar.bz2 luajit-1d190c99a2547b44deb8f5e483452d9f51925fb2.zip |
PPC: Fuse BAND with shift to rlwinm.
-rw-r--r-- | src/lj_asm_ppc.h | 50 |
1 files changed, 42 insertions, 8 deletions
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index b2cf9f65..166cf2e4 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h | |||
@@ -1478,9 +1478,40 @@ static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | |||
1478 | emit_asb(as, pi, dest, left, right); | 1478 | emit_asb(as, pi, dest, left, right); |
1479 | } | 1479 | } |
1480 | 1480 | ||
1481 | /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ | ||
1482 | static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) | ||
1483 | { | ||
1484 | IRIns *ir; | ||
1485 | Reg left; | ||
1486 | if (mayfuse(as, ref) && (ir = IR(ref), ra_noreg(ir->r)) && | ||
1487 | irref_isk(ir->op2)) { | ||
1488 | int32_t sh = (IR(ir->op2)->i & 31); | ||
1489 | switch (ir->o) { | ||
1490 | case IR_BSHL: | ||
1491 | if ((mask & ((1u<<sh)-1))) goto nofuse; | ||
1492 | break; | ||
1493 | case IR_BSHR: | ||
1494 | if ((mask & ~((~0u)>>sh))) goto nofuse; | ||
1495 | sh = ((32-sh)&31); | ||
1496 | break; | ||
1497 | case IR_BROL: | ||
1498 | break; | ||
1499 | default: | ||
1500 | goto nofuse; | ||
1501 | } | ||
1502 | left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1503 | *--as->mcp = pi | PPCF_T(left) | PPCF_B(sh); | ||
1504 | return; | ||
1505 | } | ||
1506 | nofuse: | ||
1507 | left = ra_alloc1(as, ref, RSET_GPR); | ||
1508 | *--as->mcp = pi | PPCF_T(left); | ||
1509 | } | ||
1510 | |||
1481 | static void asm_bitand(ASMState *as, IRIns *ir) | 1511 | static void asm_bitand(ASMState *as, IRIns *ir) |
1482 | { | 1512 | { |
1483 | Reg dest, left, right; | 1513 | Reg dest, left, right; |
1514 | IRRef lref = ir->op1; | ||
1484 | PPCIns dot = 0; | 1515 | PPCIns dot = 0; |
1485 | IRRef op2; | 1516 | IRRef op2; |
1486 | if (as->flagmcp == as->mcp) { | 1517 | if (as->flagmcp == as->mcp) { |
@@ -1489,48 +1520,51 @@ static void asm_bitand(ASMState *as, IRIns *ir) | |||
1489 | dot = PPCF_DOT; | 1520 | dot = PPCF_DOT; |
1490 | } | 1521 | } |
1491 | dest = ra_dest(as, ir, RSET_GPR); | 1522 | dest = ra_dest(as, ir, RSET_GPR); |
1492 | left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1493 | if (irref_isk(ir->op2)) { | 1523 | if (irref_isk(ir->op2)) { |
1494 | int32_t k = IR(ir->op2)->i; | 1524 | int32_t k = IR(ir->op2)->i; |
1495 | if (k) { | 1525 | if (k) { |
1496 | // NYI: fuse with shifts/rotates. | 1526 | /* First check for a contiguous bitmask as used by rlwinm. */ |
1497 | uint32_t s1 = lj_ffs((uint32_t)k); | 1527 | uint32_t s1 = lj_ffs((uint32_t)k); |
1498 | uint32_t k1 = ((uint32_t)k >> s1); | 1528 | uint32_t k1 = ((uint32_t)k >> s1); |
1499 | if ((k1 & (k1+1)) == 0) { | 1529 | if ((k1 & (k1+1)) == 0) { |
1500 | emit_rot(as, PPCI_RLWINM|dot, dest, left, 0, | 1530 | asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) | |
1501 | 31-lj_fls((uint32_t)k), 31-s1); | 1531 | PPCF_MB(31-lj_fls((uint32_t)k)) | PPCF_ME(31-s1), |
1532 | k, lref); | ||
1502 | return; | 1533 | return; |
1503 | } | 1534 | } |
1504 | if (~(uint32_t)k) { | 1535 | if (~(uint32_t)k) { |
1505 | uint32_t s2 = lj_ffs(~(uint32_t)k); | 1536 | uint32_t s2 = lj_ffs(~(uint32_t)k); |
1506 | uint32_t k2 = (~(uint32_t)k >> s2); | 1537 | uint32_t k2 = (~(uint32_t)k >> s2); |
1507 | if ((k2 & (k2+1)) == 0) { | 1538 | if ((k2 & (k2+1)) == 0) { |
1508 | emit_rot(as, PPCI_RLWINM|dot, dest, left, 0, | 1539 | asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) | |
1509 | 32-s2, 30-lj_fls(~(uint32_t)k)); | 1540 | PPCF_MB(32-s2) | PPCF_ME(30-lj_fls(~(uint32_t)k)), |
1541 | k, lref); | ||
1510 | return; | 1542 | return; |
1511 | } | 1543 | } |
1512 | } | 1544 | } |
1513 | } | 1545 | } |
1514 | if (checku16(k)) { | 1546 | if (checku16(k)) { |
1547 | left = ra_alloc1(as, lref, RSET_GPR); | ||
1515 | emit_asi(as, PPCI_ANDIDOT, dest, left, k); | 1548 | emit_asi(as, PPCI_ANDIDOT, dest, left, k); |
1516 | return; | 1549 | return; |
1517 | } else if ((k & 0xffff) == 0) { | 1550 | } else if ((k & 0xffff) == 0) { |
1551 | left = ra_alloc1(as, lref, RSET_GPR); | ||
1518 | emit_asi(as, PPCI_ANDISDOT, dest, left, (k >> 16)); | 1552 | emit_asi(as, PPCI_ANDISDOT, dest, left, (k >> 16)); |
1519 | return; | 1553 | return; |
1520 | } | 1554 | } |
1521 | } | 1555 | } |
1522 | op2 = ir->op2; | 1556 | op2 = ir->op2; |
1523 | if (mayfuse(as, op2) && IR(op2)->o == IR_BNOT) { | 1557 | if (mayfuse(as, op2) && IR(op2)->o == IR_BNOT && ra_noreg(IR(op2)->r)) { |
1524 | dot ^= (PPCI_AND ^ PPCI_ANDC); | 1558 | dot ^= (PPCI_AND ^ PPCI_ANDC); |
1525 | op2 = IR(op2)->op1; | 1559 | op2 = IR(op2)->op1; |
1526 | } | 1560 | } |
1561 | left = ra_hintalloc(as, lref, dest, RSET_GPR); | ||
1527 | right = ra_alloc1(as, op2, rset_exclude(RSET_GPR, left)); | 1562 | right = ra_alloc1(as, op2, rset_exclude(RSET_GPR, left)); |
1528 | emit_asb(as, PPCI_AND ^ dot, dest, left, right); | 1563 | emit_asb(as, PPCI_AND ^ dot, dest, left, right); |
1529 | } | 1564 | } |
1530 | 1565 | ||
1531 | static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | 1566 | static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) |
1532 | { | 1567 | { |
1533 | // NYI: fuse with IR_BAND. | ||
1534 | Reg dest, left; | 1568 | Reg dest, left; |
1535 | Reg dot = 0; | 1569 | Reg dot = 0; |
1536 | if (as->flagmcp == as->mcp) { | 1570 | if (as->flagmcp == as->mcp) { |