diff options
author | Mike Pall <mike> | 2016-12-08 01:38:09 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2016-12-08 01:38:09 +0100 |
commit | 2772cbc36e13200d5b028585abf506a5d19daaba (patch) | |
tree | 8067c8bcbe1efc0afe00769024e8cb390220b4ac | |
parent | bfeb1167cd77194c1d49368e3c1468f134be337c (diff) | |
download | luajit-2772cbc36e13200d5b028585abf506a5d19daaba.tar.gz luajit-2772cbc36e13200d5b028585abf506a5d19daaba.tar.bz2 luajit-2772cbc36e13200d5b028585abf506a5d19daaba.zip |
ARM64: Fuse FP multiply-add/sub.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
-rw-r--r-- | src/lj_asm_arm64.h | 27 | ||||
-rw-r--r-- | src/lj_emit_arm64.h | 5 |
2 files changed, 30 insertions, 2 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index c202bc82..25016f4a 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h | |||
@@ -327,6 +327,27 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, | |||
327 | emit_lso(as, ai, (rd & 31), base, ofs); | 327 | emit_lso(as, ai, (rd & 31), base, ofs); |
328 | } | 328 | } |
329 | 329 | ||
330 | /* Fuse FP multiply-add/sub. */ | ||
331 | static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) | ||
332 | { | ||
333 | IRRef lref = ir->op1, rref = ir->op2; | ||
334 | IRIns *irm; | ||
335 | if (lref != rref && | ||
336 | ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && | ||
337 | ra_noreg(irm->r)) || | ||
338 | (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && | ||
339 | (rref = lref, ai = air, ra_noreg(irm->r))))) { | ||
340 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
341 | Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); | ||
342 | Reg left = ra_alloc2(as, irm, | ||
343 | rset_exclude(rset_exclude(RSET_FPR, dest), add)); | ||
344 | Reg right = (left >> 8); left &= 255; | ||
345 | emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31)); | ||
346 | return 1; | ||
347 | } | ||
348 | return 0; | ||
349 | } | ||
350 | |||
330 | /* -- Calls --------------------------------------------------------------- */ | 351 | /* -- Calls --------------------------------------------------------------- */ |
331 | 352 | ||
332 | /* Generate a call to a C function. */ | 353 | /* Generate a call to a C function. */ |
@@ -1308,7 +1329,8 @@ static void asm_intmul(ASMState *as, IRIns *ir) | |||
1308 | static void asm_add(ASMState *as, IRIns *ir) | 1329 | static void asm_add(ASMState *as, IRIns *ir) |
1309 | { | 1330 | { |
1310 | if (irt_isnum(ir->t)) { | 1331 | if (irt_isnum(ir->t)) { |
1311 | asm_fparith(as, ir, A64I_FADDd); | 1332 | if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd)) |
1333 | asm_fparith(as, ir, A64I_FADDd); | ||
1312 | return; | 1334 | return; |
1313 | } | 1335 | } |
1314 | asm_intop_s(as, ir, A64I_ADDw); | 1336 | asm_intop_s(as, ir, A64I_ADDw); |
@@ -1317,7 +1339,8 @@ static void asm_add(ASMState *as, IRIns *ir) | |||
1317 | static void asm_sub(ASMState *as, IRIns *ir) | 1339 | static void asm_sub(ASMState *as, IRIns *ir) |
1318 | { | 1340 | { |
1319 | if (irt_isnum(ir->t)) { | 1341 | if (irt_isnum(ir->t)) { |
1320 | asm_fparith(as, ir, A64I_FSUBd); | 1342 | if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd)) |
1343 | asm_fparith(as, ir, A64I_FSUBd); | ||
1321 | return; | 1344 | return; |
1322 | } | 1345 | } |
1323 | asm_intop_s(as, ir, A64I_SUBw); | 1346 | asm_intop_s(as, ir, A64I_SUBw); |
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index 6686802b..e0f43689 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h | |||
@@ -74,6 +74,11 @@ static uint32_t emit_isfpk64(uint64_t n) | |||
74 | 74 | ||
75 | /* -- Emit basic instructions --------------------------------------------- */ | 75 | /* -- Emit basic instructions --------------------------------------------- */ |
76 | 76 | ||
77 | static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra) | ||
78 | { | ||
79 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra); | ||
80 | } | ||
81 | |||
77 | static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm) | 82 | static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm) |
78 | { | 83 | { |
79 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm); | 84 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm); |