aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2016-12-08 04:09:29 +0100
committerMike Pall <mike>2016-12-08 04:09:29 +0100
commit3975b6c9f4c59e2913e36f62a99653754fd33fe1 (patch)
tree119aecb7d43904309b6b81023cdc92af284c7d29
parent2772cbc36e13200d5b028585abf506a5d19daaba (diff)
downloadluajit-3975b6c9f4c59e2913e36f62a99653754fd33fe1.tar.gz
luajit-3975b6c9f4c59e2913e36f62a99653754fd33fe1.tar.bz2
luajit-3975b6c9f4c59e2913e36f62a99653754fd33fe1.zip
ARM64: Fuse various BAND/BSHL/BSHR/BSAR combinations.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
-rw-r--r--src/lj_asm_arm64.h60
1 files changed, 54 insertions, 6 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index 25016f4a..d14f0224 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -348,6 +348,36 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
348 return 0; 348 return 0;
349} 349}
350 350
351/* Fuse BAND + BSHL/BSHR into UBFM. */
352static int asm_fuseandshift(ASMState *as, IRIns *ir)
353{
354 lua_assert(ir->o == IR_BAND);
355 if (!neverfuse(as) && irref_isk(ir->op2)) {
356 uint64_t mask = get_k64val(IR(ir->op2));
357 IRIns *irl = IR(ir->op1);
358 if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) {
359 int32_t shmask = irt_is64(irl->t) ? 63 : 31;
360 int32_t shift = (IR(irl->op2)->i & shmask);
361 int32_t imms = shift;
362 if (irl->o == IR_BSHL) {
363 mask >>= shift;
364 shift = (shmask-shift+1) & shmask;
365 imms = 0;
366 }
367 if (mask && !((mask+1) & mask)) { /* Contiguous 1-bits at the bottom. */
368 Reg dest = ra_dest(as, ir, RSET_GPR);
369 Reg left = ra_alloc1(as, irl->op1, RSET_GPR);
370 A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw;
371 imms += 63 - emit_clz64(mask);
372 if (imms > shmask) imms = shmask;
373 emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left);
374 return 1;
375 }
376 }
377 }
378 return 0;
379}
380
351/* -- Calls --------------------------------------------------------------- */ 381/* -- Calls --------------------------------------------------------------- */
352 382
353/* Generate a call to a C function. */ 383/* Generate a call to a C function. */
@@ -1423,8 +1453,14 @@ static void asm_bitop(ASMState *as, IRIns *ir, A64Ins ai)
1423 } 1453 }
1424} 1454}
1425 1455
1456static void asm_band(ASMState *as, IRIns *ir)
1457{
1458 if (asm_fuseandshift(as, ir))
1459 return;
1460 asm_bitop(as, ir, A64I_ANDw);
1461}
1462
1426#define asm_bnot(as, ir) asm_bitop(as, ir, A64I_MVNw) 1463#define asm_bnot(as, ir) asm_bitop(as, ir, A64I_MVNw)
1427#define asm_band(as, ir) asm_bitop(as, ir, A64I_ANDw)
1428#define asm_bor(as, ir) asm_bitop(as, ir, A64I_ORRw) 1464#define asm_bor(as, ir) asm_bitop(as, ir, A64I_ORRw)
1429#define asm_bxor(as, ir) asm_bitop(as, ir, A64I_EORw) 1465#define asm_bxor(as, ir) asm_bitop(as, ir, A64I_EORw)
1430 1466
@@ -1437,16 +1473,28 @@ static void asm_bswap(ASMState *as, IRIns *ir)
1437 1473
1438static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh) 1474static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh)
1439{ 1475{
1440 int shmask = irt_is64(ir->t) ? 63 : 31; 1476 int32_t shmask = irt_is64(ir->t) ? 63 : 31;
1441 if (irref_isk(ir->op2)) { /* Constant shifts. */ 1477 if (irref_isk(ir->op2)) { /* Constant shifts. */
1442 Reg dest = ra_dest(as, ir, RSET_GPR); 1478 Reg left, dest = ra_dest(as, ir, RSET_GPR);
1443 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1444 int32_t shift = (IR(ir->op2)->i & shmask); 1479 int32_t shift = (IR(ir->op2)->i & shmask);
1445
1446 if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw; 1480 if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw;
1481
1482 /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */
1483 if (!neverfuse(as) && (sh == A64SH_LSR || sh == A64SH_ASR)) {
1484 IRIns *irl = IR(ir->op1);
1485 if (irl->o == IR_BSHL && irref_isk(irl->op2)) {
1486 int32_t shift2 = (IR(irl->op2)->i & shmask);
1487 shift = ((shift - shift2) & shmask);
1488 shmask -= shift2;
1489 ir = irl;
1490 }
1491 }
1492
1493 left = ra_alloc1(as, ir->op1, RSET_GPR);
1447 switch (sh) { 1494 switch (sh) {
1448 case A64SH_LSL: 1495 case A64SH_LSL:
1449 emit_dn(as, ai | A64F_IMMS(shmask-shift) | A64F_IMMR(shmask-shift+1), dest, left); 1496 emit_dn(as, ai | A64F_IMMS(shmask-shift) |
1497 A64F_IMMR((shmask-shift+1)&shmask), dest, left);
1450 break; 1498 break;
1451 case A64SH_LSR: case A64SH_ASR: 1499 case A64SH_LSR: case A64SH_ASR:
1452 emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left); 1500 emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left);