diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/lj_asm_arm64.h | 27 | ||||
| -rw-r--r-- | src/lj_emit_arm64.h | 5 |
2 files changed, 30 insertions, 2 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index c202bc82..25016f4a 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h | |||
| @@ -327,6 +327,27 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, | |||
| 327 | emit_lso(as, ai, (rd & 31), base, ofs); | 327 | emit_lso(as, ai, (rd & 31), base, ofs); |
| 328 | } | 328 | } |
| 329 | 329 | ||
| 330 | /* Fuse FP multiply-add/sub. */ | ||
| 331 | static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) | ||
| 332 | { | ||
| 333 | IRRef lref = ir->op1, rref = ir->op2; | ||
| 334 | IRIns *irm; | ||
| 335 | if (lref != rref && | ||
| 336 | ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && | ||
| 337 | ra_noreg(irm->r)) || | ||
| 338 | (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && | ||
| 339 | (rref = lref, ai = air, ra_noreg(irm->r))))) { | ||
| 340 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
| 341 | Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); | ||
| 342 | Reg left = ra_alloc2(as, irm, | ||
| 343 | rset_exclude(rset_exclude(RSET_FPR, dest), add)); | ||
| 344 | Reg right = (left >> 8); left &= 255; | ||
| 345 | emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31)); | ||
| 346 | return 1; | ||
| 347 | } | ||
| 348 | return 0; | ||
| 349 | } | ||
| 350 | |||
| 330 | /* -- Calls --------------------------------------------------------------- */ | 351 | /* -- Calls --------------------------------------------------------------- */ |
| 331 | 352 | ||
| 332 | /* Generate a call to a C function. */ | 353 | /* Generate a call to a C function. */ |
| @@ -1308,7 +1329,8 @@ static void asm_intmul(ASMState *as, IRIns *ir) | |||
| 1308 | static void asm_add(ASMState *as, IRIns *ir) | 1329 | static void asm_add(ASMState *as, IRIns *ir) |
| 1309 | { | 1330 | { |
| 1310 | if (irt_isnum(ir->t)) { | 1331 | if (irt_isnum(ir->t)) { |
| 1311 | asm_fparith(as, ir, A64I_FADDd); | 1332 | if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd)) |
| 1333 | asm_fparith(as, ir, A64I_FADDd); | ||
| 1312 | return; | 1334 | return; |
| 1313 | } | 1335 | } |
| 1314 | asm_intop_s(as, ir, A64I_ADDw); | 1336 | asm_intop_s(as, ir, A64I_ADDw); |
| @@ -1317,7 +1339,8 @@ static void asm_add(ASMState *as, IRIns *ir) | |||
| 1317 | static void asm_sub(ASMState *as, IRIns *ir) | 1339 | static void asm_sub(ASMState *as, IRIns *ir) |
| 1318 | { | 1340 | { |
| 1319 | if (irt_isnum(ir->t)) { | 1341 | if (irt_isnum(ir->t)) { |
| 1320 | asm_fparith(as, ir, A64I_FSUBd); | 1342 | if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd)) |
| 1343 | asm_fparith(as, ir, A64I_FSUBd); | ||
| 1321 | return; | 1344 | return; |
| 1322 | } | 1345 | } |
| 1323 | asm_intop_s(as, ir, A64I_SUBw); | 1346 | asm_intop_s(as, ir, A64I_SUBw); |
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index 6686802b..e0f43689 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h | |||
| @@ -74,6 +74,11 @@ static uint32_t emit_isfpk64(uint64_t n) | |||
| 74 | 74 | ||
| 75 | /* -- Emit basic instructions --------------------------------------------- */ | 75 | /* -- Emit basic instructions --------------------------------------------- */ |
| 76 | 76 | ||
| 77 | static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra) | ||
| 78 | { | ||
| 79 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra); | ||
| 80 | } | ||
| 81 | |||
| 77 | static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm) | 82 | static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm) |
| 78 | { | 83 | { |
| 79 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm); | 84 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm); |
