aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2016-12-08 01:38:09 +0100
committerMike Pall <mike>2016-12-08 01:38:09 +0100
commit2772cbc36e13200d5b028585abf506a5d19daaba (patch)
tree8067c8bcbe1efc0afe00769024e8cb390220b4ac
parentbfeb1167cd77194c1d49368e3c1468f134be337c (diff)
downloadluajit-2772cbc36e13200d5b028585abf506a5d19daaba.tar.gz
luajit-2772cbc36e13200d5b028585abf506a5d19daaba.tar.bz2
luajit-2772cbc36e13200d5b028585abf506a5d19daaba.zip
ARM64: Fuse FP multiply-add/sub.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
-rw-r--r--src/lj_asm_arm64.h27
-rw-r--r--src/lj_emit_arm64.h5
2 files changed, 30 insertions, 2 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index c202bc82..25016f4a 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -327,6 +327,27 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref,
327 emit_lso(as, ai, (rd & 31), base, ofs); 327 emit_lso(as, ai, (rd & 31), base, ofs);
328} 328}
329 329
330/* Fuse FP multiply-add/sub. */
331static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
332{
333 IRRef lref = ir->op1, rref = ir->op2;
334 IRIns *irm;
335 if (lref != rref &&
336 ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
337 ra_noreg(irm->r)) ||
338 (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
339 (rref = lref, ai = air, ra_noreg(irm->r))))) {
340 Reg dest = ra_dest(as, ir, RSET_FPR);
341 Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
342 Reg left = ra_alloc2(as, irm,
343 rset_exclude(rset_exclude(RSET_FPR, dest), add));
344 Reg right = (left >> 8); left &= 255;
345 emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31));
346 return 1;
347 }
348 return 0;
349}
350
330/* -- Calls --------------------------------------------------------------- */ 351/* -- Calls --------------------------------------------------------------- */
331 352
332/* Generate a call to a C function. */ 353/* Generate a call to a C function. */
@@ -1308,7 +1329,8 @@ static void asm_intmul(ASMState *as, IRIns *ir)
1308static void asm_add(ASMState *as, IRIns *ir) 1329static void asm_add(ASMState *as, IRIns *ir)
1309{ 1330{
1310 if (irt_isnum(ir->t)) { 1331 if (irt_isnum(ir->t)) {
1311 asm_fparith(as, ir, A64I_FADDd); 1332 if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd))
1333 asm_fparith(as, ir, A64I_FADDd);
1312 return; 1334 return;
1313 } 1335 }
1314 asm_intop_s(as, ir, A64I_ADDw); 1336 asm_intop_s(as, ir, A64I_ADDw);
@@ -1317,7 +1339,8 @@ static void asm_add(ASMState *as, IRIns *ir)
1317static void asm_sub(ASMState *as, IRIns *ir) 1339static void asm_sub(ASMState *as, IRIns *ir)
1318{ 1340{
1319 if (irt_isnum(ir->t)) { 1341 if (irt_isnum(ir->t)) {
1320 asm_fparith(as, ir, A64I_FSUBd); 1342 if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd))
1343 asm_fparith(as, ir, A64I_FSUBd);
1321 return; 1344 return;
1322 } 1345 }
1323 asm_intop_s(as, ir, A64I_SUBw); 1346 asm_intop_s(as, ir, A64I_SUBw);
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
index 6686802b..e0f43689 100644
--- a/src/lj_emit_arm64.h
+++ b/src/lj_emit_arm64.h
@@ -74,6 +74,11 @@ static uint32_t emit_isfpk64(uint64_t n)
74 74
75/* -- Emit basic instructions --------------------------------------------- */ 75/* -- Emit basic instructions --------------------------------------------- */
76 76
77static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra)
78{
79 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra);
80}
81
77static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm) 82static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm)
78{ 83{
79 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm); 84 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm);