diff options
author | Mike Pall <mike> | 2016-12-07 18:40:31 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2016-12-07 18:40:31 +0100 |
commit | bfeb1167cd77194c1d49368e3c1468f134be337c (patch) | |
tree | 0b3014cd4ca2be2d6ffe738e1c4f6d453b5597b8 | |
parent | 2ac2cd4699d2e3a2eaa55417eae901216204fb37 (diff) | |
download | luajit-bfeb1167cd77194c1d49368e3c1468f134be337c.tar.gz luajit-bfeb1167cd77194c1d49368e3c1468f134be337c.tar.bz2 luajit-bfeb1167cd77194c1d49368e3c1468f134be337c.zip |
ARM64: Fuse XLOAD/XSTORE with STRREF/ADD/BSHL/CONV.
-rw-r--r-- | src/lj_asm_arm64.h | 53 |
1 files changed, 47 insertions, 6 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index fff0b3fd..c202bc82 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h | |||
@@ -232,7 +232,7 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) | |||
232 | irl->o == IR_CONV && | 232 | irl->o == IR_CONV && |
233 | irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) && | 233 | irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) && |
234 | shift <= 4 && | 234 | shift <= 4 && |
235 | mayfuse(as, ir->op1)) { | 235 | !neverfuse(as)) { |
236 | Reg m = ra_alloc1(as, irl->op1, allow); | 236 | Reg m = ra_alloc1(as, irl->op1, allow); |
237 | return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift); | 237 | return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift); |
238 | } else { | 238 | } else { |
@@ -257,19 +257,60 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, | |||
257 | int32_t ofs = 0; | 257 | int32_t ofs = 0; |
258 | if (ra_noreg(ir->r) && canfuse(as, ir)) { | 258 | if (ra_noreg(ir->r) && canfuse(as, ir)) { |
259 | if (ir->o == IR_ADD) { | 259 | if (ir->o == IR_ADD) { |
260 | if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) | 260 | if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) { |
261 | ref = ir->op1; | 261 | ref = ir->op1; |
262 | /* NYI: Fuse add with two registers. */ | 262 | } else { |
263 | Reg rn, rm; | ||
264 | IRRef lref = ir->op1, rref = ir->op2; | ||
265 | IRIns *irl = IR(lref); | ||
266 | if (mayfuse(as, irl->op1)) { | ||
267 | unsigned int shift = 4; | ||
268 | if (irl->o == IR_BSHL && irref_isk(irl->op2)) { | ||
269 | shift = (IR(irl->op2)->i & 63); | ||
270 | } else if (irl->o == IR_ADD && irl->op1 == irl->op2) { | ||
271 | shift = 1; | ||
272 | } | ||
273 | if ((ai >> 30) == shift) { | ||
274 | lref = irl->op1; | ||
275 | irl = IR(lref); | ||
276 | ai |= A64I_LS_SH; | ||
277 | } | ||
278 | } | ||
279 | if (irl->o == IR_CONV && | ||
280 | irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) && | ||
281 | !neverfuse(as)) { | ||
282 | lref = irl->op1; | ||
283 | ai |= A64I_LS_SXTWx; | ||
284 | } else { | ||
285 | ai |= A64I_LS_LSLx; | ||
286 | } | ||
287 | rm = ra_alloc1(as, lref, allow); | ||
288 | rn = ra_alloc1(as, rref, rset_exclude(allow, rm)); | ||
289 | emit_dnm(as, (ai^A64I_LS_R), rd, rn, rm); | ||
290 | return; | ||
291 | } | ||
263 | } else if (ir->o == IR_STRREF) { | 292 | } else if (ir->o == IR_STRREF) { |
264 | if (asm_isk32(as, ir->op2, &ofs)) { | 293 | if (asm_isk32(as, ir->op2, &ofs)) { |
265 | ref = ir->op1; | 294 | ref = ir->op1; |
266 | } else if (asm_isk32(as, ir->op1, &ofs)) { | 295 | } else if (asm_isk32(as, ir->op1, &ofs)) { |
267 | ref = ir->op2; | 296 | ref = ir->op2; |
268 | } else { | 297 | } else { |
269 | /* NYI: Fuse ADD with constant. */ | ||
270 | Reg rn = ra_alloc1(as, ir->op1, allow); | 298 | Reg rn = ra_alloc1(as, ir->op1, allow); |
271 | uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); | 299 | IRIns *irr = IR(ir->op2); |
272 | emit_lso(as, ai, rd, rd, sizeof(GCstr)); | 300 | uint32_t m; |
301 | if (irr+1 == ir && !ra_used(irr) && | ||
302 | irr->o == IR_ADD && irref_isk(irr->op2)) { | ||
303 | ofs = sizeof(GCstr) + IR(irr->op2)->i; | ||
304 | if (emit_checkofs(ai, ofs)) { | ||
305 | Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn)); | ||
306 | m = A64F_M(rm) | A64F_EX(A64EX_SXTW); | ||
307 | goto skipopm; | ||
308 | } | ||
309 | } | ||
310 | m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); | ||
311 | ofs = sizeof(GCstr); | ||
312 | skipopm: | ||
313 | emit_lso(as, ai, rd, rd, ofs); | ||
273 | emit_dn(as, A64I_ADDx^m, rd, rn); | 314 | emit_dn(as, A64I_ADDx^m, rd, rn); |
274 | return; | 315 | return; |
275 | } | 316 | } |