aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2016-12-15 22:47:40 +0100
committerMike Pall <mike>2016-12-15 22:47:40 +0100
commitebec2530befabb8777f3e46d22980112c942dba8 (patch)
treedacef5aeab95c157687ad59f972d04c6af5994f1 /src
parent3cfa9cb2bbe0dde6f701ba1d90f1e4afed5d1519 (diff)
downloadluajit-ebec2530befabb8777f3e46d22980112c942dba8.tar.gz
luajit-ebec2530befabb8777f3e46d22980112c942dba8.tar.bz2
luajit-ebec2530befabb8777f3e46d22980112c942dba8.zip
ARM64: Fuse BOR/BXOR and BNOT into ORN/EON.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
Diffstat (limited to 'src')
-rw-r--r--src/lj_asm_arm64.h52
-rw-r--r--src/lj_target_arm64.h1
2 files changed, 36 insertions, 17 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index 372429db..28050bf8 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -1464,40 +1464,58 @@ static void asm_neg(ASMState *as, IRIns *ir)
1464 asm_intneg(as, ir); 1464 asm_intneg(as, ir);
1465} 1465}
1466 1466
1467static void asm_bitop(ASMState *as, IRIns *ir, A64Ins ai) 1467static void asm_band(ASMState *as, IRIns *ir)
1468{ 1468{
1469 if (as->flagmcp == as->mcp && ai == A64I_ANDw) { 1469 A64Ins ai = A64I_ANDw;
1470 if (asm_fuseandshift(as, ir))
1471 return;
1472 if (as->flagmcp == as->mcp) {
1470 /* Try to drop cmp r, #0. */ 1473 /* Try to drop cmp r, #0. */
1471 as->flagmcp = NULL; 1474 as->flagmcp = NULL;
1472 as->mcp++; 1475 as->mcp++;
1473 ai += A64I_ANDSw - A64I_ANDw; 1476 ai = A64I_ANDSw;
1474 } 1477 }
1475 if (ir->op2 == 0) { 1478 asm_intop(as, ir, ai);
1476 Reg dest = ra_dest(as, ir, RSET_GPR); 1479}
1477 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); 1480
1481static void asm_borbxor(ASMState *as, IRIns *ir, A64Ins ai)
1482{
1483 IRRef lref = ir->op1, rref = ir->op2;
1484 IRIns *irl = IR(lref), *irr = IR(rref);
1485 if ((canfuse(as, irl) && irl->o == IR_BNOT && !irref_isk(rref)) ||
1486 (canfuse(as, irr) && irr->o == IR_BNOT && !irref_isk(lref))) {
1487 Reg left, dest = ra_dest(as, ir, RSET_GPR);
1488 uint32_t m;
1489 if (irl->o == IR_BNOT) {
1490 IRRef tmp = lref; lref = rref; rref = tmp;
1491 }
1492 left = ra_alloc1(as, lref, RSET_GPR);
1493 ai |= A64I_ON;
1478 if (irt_is64(ir->t)) ai |= A64I_X; 1494 if (irt_is64(ir->t)) ai |= A64I_X;
1479 emit_d(as, ai^m, dest); 1495 m = asm_fuseopm(as, ai, IR(rref)->op1, rset_exclude(RSET_GPR, left));
1496 emit_dn(as, ai^m, dest, left);
1480 } else { 1497 } else {
1481 asm_intop(as, ir, ai); 1498 asm_intop(as, ir, ai);
1482 } 1499 }
1483} 1500}
1484 1501
1485static void asm_band(ASMState *as, IRIns *ir)
1486{
1487 if (asm_fuseandshift(as, ir))
1488 return;
1489 asm_bitop(as, ir, A64I_ANDw);
1490}
1491
1492static void asm_bor(ASMState *as, IRIns *ir) 1502static void asm_bor(ASMState *as, IRIns *ir)
1493{ 1503{
1494 if (asm_fuseorshift(as, ir)) 1504 if (asm_fuseorshift(as, ir))
1495 return; 1505 return;
1496 asm_bitop(as, ir, A64I_ORRw); 1506 asm_borbxor(as, ir, A64I_ORRw);
1497} 1507}
1498 1508
1499#define asm_bnot(as, ir) asm_bitop(as, ir, A64I_MVNw) 1509#define asm_bxor(as, ir) asm_borbxor(as, ir, A64I_EORw)
1500#define asm_bxor(as, ir) asm_bitop(as, ir, A64I_EORw) 1510
1511static void asm_bnot(ASMState *as, IRIns *ir)
1512{
1513 A64Ins ai = A64I_MVNw;
1514 Reg dest = ra_dest(as, ir, RSET_GPR);
1515 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1516 if (irt_is64(ir->t)) ai |= A64I_X;
1517 emit_d(as, ai^m, dest);
1518}
1501 1519
1502static void asm_bswap(ASMState *as, IRIns *ir) 1520static void asm_bswap(ASMState *as, IRIns *ir)
1503{ 1521{
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
index f77a58a0..9e9fbd01 100644
--- a/src/lj_target_arm64.h
+++ b/src/lj_target_arm64.h
@@ -142,6 +142,7 @@ typedef enum A64Ins {
142 A64I_S = 0x20000000, 142 A64I_S = 0x20000000,
143 A64I_X = 0x80000000, 143 A64I_X = 0x80000000,
144 A64I_EX = 0x00200000, 144 A64I_EX = 0x00200000,
145 A64I_ON = 0x00200000,
145 A64I_K12 = 0x1a000000, 146 A64I_K12 = 0x1a000000,
146 A64I_K13 = 0x18000000, 147 A64I_K13 = 0x18000000,
147 A64I_LS_U = 0x01000000, 148 A64I_LS_U = 0x01000000,