aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2016-11-29 19:30:40 +0100
committerMike Pall <mike>2016-11-29 19:30:40 +0100
commit3ad2bbf58600f8ba2918b56b0a7ab305df19cfe5 (patch)
tree28771b8efec0975a95831ebcac212f7955166191
parent6538c8a18711a6eb009def36050acd5f02e42aec (diff)
downloadluajit-3ad2bbf58600f8ba2918b56b0a7ab305df19cfe5.tar.gz
luajit-3ad2bbf58600f8ba2918b56b0a7ab305df19cfe5.tar.bz2
luajit-3ad2bbf58600f8ba2918b56b0a7ab305df19cfe5.zip
ARM64: Make use of tbz/tbnz and cbz/cbnz.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
-rw-r--r--src/lj_asm_arm64.h83
-rw-r--r--src/lj_emit_arm64.h19
-rw-r--r--src/lj_target_arm64.h6
3 files changed, 91 insertions, 17 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index 19b3331d..eea957b5 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -84,6 +84,34 @@ static void asm_guardcc(ASMState *as, A64CC cc)
84 emit_cond_branch(as, cc, target); 84 emit_cond_branch(as, cc, target);
85} 85}
86 86
87/* Emit test and branch instruction to exit for guard. */
88static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
89{
90 MCode *target = asm_exitstub_addr(as, as->snapno);
91 MCode *p = as->mcp;
92 if (LJ_UNLIKELY(p == as->invmcp)) {
93 as->loopinv = 1;
94 *p = A64I_B | ((target-p) & 0x03ffffffu);
95 emit_tnb(as, ai^0x01000000u, r, bit, p-1);
96 return;
97 }
98 emit_tnb(as, ai, r, bit, target);
99}
100
101/* Emit compare and branch instruction to exit for guard. */
102static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r)
103{
104 MCode *target = asm_exitstub_addr(as, as->snapno);
105 MCode *p = as->mcp;
106 if (LJ_UNLIKELY(p == as->invmcp)) {
107 as->loopinv = 1;
108 *p = A64I_B | ((target-p) & 0x03ffffffu);
109 emit_cnb(as, ai^0x01000000u, r, p-1);
110 return;
111 }
112 emit_cnb(as, ai, r, target);
113}
114
87/* -- Operand fusion ------------------------------------------------------ */ 115/* -- Operand fusion ------------------------------------------------------ */
88 116
89/* Limit linear search to this distance. Avoids O(n^2) behavior. */ 117/* Limit linear search to this distance. Avoids O(n^2) behavior. */
@@ -482,10 +510,9 @@ static void asm_strto(ASMState *as, IRIns *ir)
482 dest = ra_dest(as, ir, RSET_FPR); 510 dest = ra_dest(as, ir, RSET_FPR);
483 } 511 }
484 } 512 }
485 asm_guardcc(as, CC_EQ);
486 if (destused) 513 if (destused)
487 emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0); 514 emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
488 emit_n(as, (A64I_CMPw^A64I_K12)|A64F_U12(0), RID_RET); 515 asm_guardcnb(as, A64I_CBZ, RID_RET);
489 args[0] = ir->op1; /* GCstr *str */ 516 args[0] = ir->op1; /* GCstr *str */
490 args[1] = ASMREF_TMP1; /* TValue *n */ 517 args[1] = ASMREF_TMP1; /* TValue *n */
491 asm_gencall(as, ci, args); 518 asm_gencall(as, ci, args);
@@ -1465,13 +1492,13 @@ static void asm_intcomp(ASMState *as, IRIns *ir)
1465 else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ 1492 else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */
1466 } 1493 }
1467 oldcc = cc; 1494 oldcc = cc;
1468 if (irref_isk(rref) && IR(rref)->i == 0) { 1495 if (irref_isk(rref) && get_k64val(IR(rref)) == 0) {
1469 IRIns *irl = IR(lref); 1496 IRIns *irl = IR(lref);
1470 if (cc == CC_GE) cc = CC_PL; 1497 if (cc == CC_GE) cc = CC_PL;
1471 else if (cc == CC_LT) cc = CC_MI; 1498 else if (cc == CC_LT) cc = CC_MI;
1472 else if (cc > CC_NE) goto notst; /* Other conds don't work with tst. */ 1499 else if (cc > CC_NE) goto nocombine; /* Other conds don't work with tst. */
1473 cmpprev0 = (irl+1 == ir); 1500 cmpprev0 = (irl+1 == ir);
1474 /* Combine comp(BAND(left, right), 0) into tst left, right. */ 1501 /* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */
1475 if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) { 1502 if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) {
1476 IRRef blref = irl->op1, brref = irl->op2; 1503 IRRef blref = irl->op1, brref = irl->op2;
1477 uint32_t m2 = 0; 1504 uint32_t m2 = 0;
@@ -1480,10 +1507,13 @@ static void asm_intcomp(ASMState *as, IRIns *ir)
1480 Reg tmp = blref; blref = brref; brref = tmp; 1507 Reg tmp = blref; blref = brref; brref = tmp;
1481 } 1508 }
1482 if (irref_isk(brref)) { 1509 if (irref_isk(brref)) {
1483 /* NYI: use tbz/tbnz, if applicable. */ 1510 uint64_t k = get_k64val(IR(brref));
1484 m2 = emit_isk13(IR(brref)->i, irt_is64(irl->t)); 1511 if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) {
1485 if (!m2) 1512 asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ,
1486 goto notst; /* Not beneficial if we miss a constant operand. */ 1513 ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k));
1514 return;
1515 }
1516 m2 = emit_isk13(k, irt_is64(irl->t));
1487 } 1517 }
1488 bleft = ra_alloc1(as, blref, RSET_GPR); 1518 bleft = ra_alloc1(as, blref, RSET_GPR);
1489 ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw); 1519 ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw);
@@ -1493,9 +1523,15 @@ static void asm_intcomp(ASMState *as, IRIns *ir)
1493 emit_n(as, ai^m2, bleft); 1523 emit_n(as, ai^m2, bleft);
1494 return; 1524 return;
1495 } 1525 }
1496 /* NYI: use cbz/cbnz for EQ/NE 0. */ 1526 if (cc == CC_EQ || cc == CC_NE) {
1527 /* Combine cmp-bcc into cbz/cbnz. */
1528 ai = cc == CC_EQ ? A64I_CBZ : A64I_CBNZ;
1529 if (irt_is64(ir->t)) ai |= A64I_X;
1530 asm_guardcnb(as, ai, ra_alloc1(as, lref, RSET_GPR));
1531 return;
1532 }
1497 } 1533 }
1498notst: 1534nocombine:
1499 left = ra_alloc1(as, lref, RSET_GPR); 1535 left = ra_alloc1(as, lref, RSET_GPR);
1500 m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); 1536 m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
1501 asm_guardcc(as, cc); 1537 asm_guardcc(as, cc);
@@ -1638,8 +1674,7 @@ static void asm_gc_check(ASMState *as)
1638 ra_evictset(as, RSET_SCRATCH); 1674 ra_evictset(as, RSET_SCRATCH);
1639 l_end = emit_label(as); 1675 l_end = emit_label(as);
1640 /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ 1676 /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
1641 asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */ 1677 asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */
1642 emit_n(as, A64I_CMPx^A64I_K12, RID_RET);
1643 args[0] = ASMREF_TMP1; /* global_State *g */ 1678 args[0] = ASMREF_TMP1; /* global_State *g */
1644 args[1] = ASMREF_TMP2; /* MSize steps */ 1679 args[1] = ASMREF_TMP2; /* MSize steps */
1645 asm_gencall(as, ci, args); 1680 asm_gencall(as, ci, args);
@@ -1666,10 +1701,10 @@ static void asm_loop_fixup(ASMState *as)
1666 MCode *p = as->mctop; 1701 MCode *p = as->mctop;
1667 MCode *target = as->mcp; 1702 MCode *target = as->mcp;
1668 if (as->loopinv) { /* Inverted loop branch? */ 1703 if (as->loopinv) { /* Inverted loop branch? */
1704 uint32_t mask = (p[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu : 0x7ffffu;
1669 ptrdiff_t delta = target - (p - 2); 1705 ptrdiff_t delta = target - (p - 2);
1670 lua_assert(((delta + 0x40000) >> 19) == 0); 1706 /* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */
1671 /* asm_guardcc already inverted the b.cc and patched the final bl. */ 1707 p[-2] |= ((uint32_t)delta & mask) << 5;
1672 p[-2] |= ((uint32_t)delta & 0x7ffff) << 5;
1673 } else { 1708 } else {
1674 ptrdiff_t delta = target - (p - 1); 1709 ptrdiff_t delta = target - (p - 1);
1675 p[-1] = A64I_B | ((uint32_t)(delta) & 0x03ffffffu); 1710 p[-1] = A64I_B | ((uint32_t)(delta) & 0x03ffffffu);
@@ -1795,18 +1830,32 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
1795 MCode *mcarea = lj_mcode_patch(J, p, 0); 1830 MCode *mcarea = lj_mcode_patch(J, p, 0);
1796 MCode *px = exitstub_trace_addr(T, exitno); 1831 MCode *px = exitstub_trace_addr(T, exitno);
1797 for (; p < pe; p++) { 1832 for (; p < pe; p++) {
1798 /* Look for bcc/b exitstub, replace with bcc/b target. */ 1833 /* Look for exitstub branch, replace with branch to target. */
1799 uint32_t ins = *p; 1834 uint32_t ins = *p;
1800 if ((ins & 0xff000000u) == 0x54000000u && 1835 if ((ins & 0xff000000u) == 0x54000000u &&
1801 ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { 1836 ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
1837 /* Patch bcc exitstub. */
1802 *p = (ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u); 1838 *p = (ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u);
1803 cend = p+1; 1839 cend = p+1;
1804 if (!cstart) cstart = p; 1840 if (!cstart) cstart = p;
1805 } else if ((ins & 0xfc000000u) == 0x14000000u && 1841 } else if ((ins & 0xfc000000u) == 0x14000000u &&
1806 ((ins ^ (px-p)) & 0x03ffffffu) == 0) { 1842 ((ins ^ (px-p)) & 0x03ffffffu) == 0) {
1843 /* Patch b exitstub. */
1807 *p = (ins & 0xfc000000u) | ((target-p) & 0x03ffffffu); 1844 *p = (ins & 0xfc000000u) | ((target-p) & 0x03ffffffu);
1808 cend = p+1; 1845 cend = p+1;
1809 if (!cstart) cstart = p; 1846 if (!cstart) cstart = p;
1847 } else if ((ins & 0x7e000000u) == 0x34000000u &&
1848 ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
1849 /* Patch cbz/cbnz exitstub. */
1850 *p = (ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u);
1851 cend = p+1;
1852 if (!cstart) cstart = p;
1853 } else if ((ins & 0x7e000000u) == 0x36000000u &&
1854 ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) {
1855 /* Patch tbz/tbnz exitstub. */
1856 *p = (ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u);
1857 cend = p+1;
1858 if (!cstart) cstart = p;
1810 } 1859 }
1811 } 1860 }
1812 lua_assert(cstart != NULL); 1861 lua_assert(cstart != NULL);
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
index 52e75559..1eb14204 100644
--- a/src/lj_emit_arm64.h
+++ b/src/lj_emit_arm64.h
@@ -321,6 +321,25 @@ static void emit_branch(ASMState *as, A64Ins ai, MCode *target)
321 as->mcp = p; 321 as->mcp = p;
322} 322}
323 323
324static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target)
325{
326 MCode *p = as->mcp;
327 ptrdiff_t delta = target - (p - 1);
328 lua_assert(bit < 63 && ((delta + 0x2000) >> 14) == 0);
329 if (bit > 31) ai |= A64I_X;
330 *--p = ai | A64F_BIT(bit & 31) | A64F_S14((uint32_t)delta & 0x3fffu) | r;
331 as->mcp = p;
332}
333
334static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target)
335{
336 MCode *p = as->mcp;
337 ptrdiff_t delta = target - (p - 1);
338 lua_assert(((delta + 0x40000) >> 19) == 0);
339 *--p = ai | A64F_S19((uint32_t)delta & 0x7ffff) | r;
340 as->mcp = p;
341}
342
324#define emit_jmp(as, target) emit_branch(as, A64I_B, (target)) 343#define emit_jmp(as, target) emit_branch(as, A64I_B, (target))
325 344
326static void emit_call(ASMState *as, void *target) 345static void emit_call(ASMState *as, void *target)
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
index 1cd02fe8..6c8771c6 100644
--- a/src/lj_target_arm64.h
+++ b/src/lj_target_arm64.h
@@ -127,7 +127,9 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
127#define A64F_U12(x) ((x) << 10) 127#define A64F_U12(x) ((x) << 10)
128#define A64F_S26(x) (x) 128#define A64F_S26(x) (x)
129#define A64F_S19(x) ((x) << 5) 129#define A64F_S19(x) ((x) << 5)
130#define A64F_S14(x) ((x) << 5)
130#define A64F_S9(x) ((x) << 12) 131#define A64F_S9(x) ((x) << 12)
132#define A64F_BIT(x) ((x) << 19)
131#define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10)) 133#define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10))
132#define A64F_EX(ex) (A64I_EX | ((ex) << 13)) 134#define A64F_EX(ex) (A64I_EX | ((ex) << 13))
133#define A64F_EXSH(ex,x) (A64I_EX | ((ex) << 13) | ((x) << 10)) 135#define A64F_EXSH(ex,x) (A64I_EX | ((ex) << 13) | ((x) << 10))
@@ -235,6 +237,10 @@ typedef enum A64Ins {
235 A64I_BL = 0x94000000, 237 A64I_BL = 0x94000000,
236 A64I_BR = 0xd61f0000, 238 A64I_BR = 0xd61f0000,
237 A64I_BLR = 0xd63f0000, 239 A64I_BLR = 0xd63f0000,
240 A64I_TBZ = 0x36000000,
241 A64I_TBNZ = 0x37000000,
242 A64I_CBZ = 0x34000000,
243 A64I_CBNZ = 0x35000000,
238 244
239 A64I_NOP = 0xd503201f, 245 A64I_NOP = 0xd503201f,
240 246