aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2016-12-07 18:40:31 +0100
committerMike Pall <mike>2016-12-07 18:40:31 +0100
commitbfeb1167cd77194c1d49368e3c1468f134be337c (patch)
tree0b3014cd4ca2be2d6ffe738e1c4f6d453b5597b8
parent2ac2cd4699d2e3a2eaa55417eae901216204fb37 (diff)
downloadluajit-bfeb1167cd77194c1d49368e3c1468f134be337c.tar.gz
luajit-bfeb1167cd77194c1d49368e3c1468f134be337c.tar.bz2
luajit-bfeb1167cd77194c1d49368e3c1468f134be337c.zip
ARM64: Fuse XLOAD/XSTORE with STRREF/ADD/BSHL/CONV.
-rw-r--r--src/lj_asm_arm64.h53
1 files changed, 47 insertions, 6 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index fff0b3fd..c202bc82 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -232,7 +232,7 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
232 irl->o == IR_CONV && 232 irl->o == IR_CONV &&
233 irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) && 233 irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
234 shift <= 4 && 234 shift <= 4 &&
235 mayfuse(as, ir->op1)) { 235 !neverfuse(as)) {
236 Reg m = ra_alloc1(as, irl->op1, allow); 236 Reg m = ra_alloc1(as, irl->op1, allow);
237 return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift); 237 return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift);
238 } else { 238 } else {
@@ -257,19 +257,60 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref,
257 int32_t ofs = 0; 257 int32_t ofs = 0;
258 if (ra_noreg(ir->r) && canfuse(as, ir)) { 258 if (ra_noreg(ir->r) && canfuse(as, ir)) {
259 if (ir->o == IR_ADD) { 259 if (ir->o == IR_ADD) {
260 if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) 260 if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) {
261 ref = ir->op1; 261 ref = ir->op1;
262 /* NYI: Fuse add with two registers. */ 262 } else {
263 Reg rn, rm;
264 IRRef lref = ir->op1, rref = ir->op2;
265 IRIns *irl = IR(lref);
266 if (mayfuse(as, irl->op1)) {
267 unsigned int shift = 4;
268 if (irl->o == IR_BSHL && irref_isk(irl->op2)) {
269 shift = (IR(irl->op2)->i & 63);
270 } else if (irl->o == IR_ADD && irl->op1 == irl->op2) {
271 shift = 1;
272 }
273 if ((ai >> 30) == shift) {
274 lref = irl->op1;
275 irl = IR(lref);
276 ai |= A64I_LS_SH;
277 }
278 }
279 if (irl->o == IR_CONV &&
280 irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
281 !neverfuse(as)) {
282 lref = irl->op1;
283 ai |= A64I_LS_SXTWx;
284 } else {
285 ai |= A64I_LS_LSLx;
286 }
287 rm = ra_alloc1(as, lref, allow);
288 rn = ra_alloc1(as, rref, rset_exclude(allow, rm));
289 emit_dnm(as, (ai^A64I_LS_R), rd, rn, rm);
290 return;
291 }
263 } else if (ir->o == IR_STRREF) { 292 } else if (ir->o == IR_STRREF) {
264 if (asm_isk32(as, ir->op2, &ofs)) { 293 if (asm_isk32(as, ir->op2, &ofs)) {
265 ref = ir->op1; 294 ref = ir->op1;
266 } else if (asm_isk32(as, ir->op1, &ofs)) { 295 } else if (asm_isk32(as, ir->op1, &ofs)) {
267 ref = ir->op2; 296 ref = ir->op2;
268 } else { 297 } else {
269 /* NYI: Fuse ADD with constant. */
270 Reg rn = ra_alloc1(as, ir->op1, allow); 298 Reg rn = ra_alloc1(as, ir->op1, allow);
271 uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); 299 IRIns *irr = IR(ir->op2);
272 emit_lso(as, ai, rd, rd, sizeof(GCstr)); 300 uint32_t m;
301 if (irr+1 == ir && !ra_used(irr) &&
302 irr->o == IR_ADD && irref_isk(irr->op2)) {
303 ofs = sizeof(GCstr) + IR(irr->op2)->i;
304 if (emit_checkofs(ai, ofs)) {
305 Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn));
306 m = A64F_M(rm) | A64F_EX(A64EX_SXTW);
307 goto skipopm;
308 }
309 }
310 m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn));
311 ofs = sizeof(GCstr);
312 skipopm:
313 emit_lso(as, ai, rd, rd, ofs);
273 emit_dn(as, A64I_ADDx^m, rd, rn); 314 emit_dn(as, A64I_ADDx^m, rd, rn);
274 return; 315 return;
275 } 316 }