diff options
| author | Mike Pall <mike> | 2016-12-08 04:09:29 +0100 |
|---|---|---|
| committer | Mike Pall <mike> | 2016-12-08 04:09:29 +0100 |
| commit | 3975b6c9f4c59e2913e36f62a99653754fd33fe1 (patch) | |
| tree | 119aecb7d43904309b6b81023cdc92af284c7d29 | |
| parent | 2772cbc36e13200d5b028585abf506a5d19daaba (diff) | |
| download | luajit-3975b6c9f4c59e2913e36f62a99653754fd33fe1.tar.gz luajit-3975b6c9f4c59e2913e36f62a99653754fd33fe1.tar.bz2 luajit-3975b6c9f4c59e2913e36f62a99653754fd33fe1.zip | |
ARM64: Fuse various BAND/BSHL/BSHR/BSAR combinations.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
| -rw-r--r-- | src/lj_asm_arm64.h | 60 |
1 files changed, 54 insertions, 6 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 25016f4a..d14f0224 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h | |||
| @@ -348,6 +348,36 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) | |||
| 348 | return 0; | 348 | return 0; |
| 349 | } | 349 | } |
| 350 | 350 | ||
| 351 | /* Fuse BAND + BSHL/BSHR into UBFM. */ | ||
| 352 | static int asm_fuseandshift(ASMState *as, IRIns *ir) | ||
| 353 | { | ||
| 354 | lua_assert(ir->o == IR_BAND); | ||
| 355 | if (!neverfuse(as) && irref_isk(ir->op2)) { | ||
| 356 | uint64_t mask = get_k64val(IR(ir->op2)); | ||
| 357 | IRIns *irl = IR(ir->op1); | ||
| 358 | if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) { | ||
| 359 | int32_t shmask = irt_is64(irl->t) ? 63 : 31; | ||
| 360 | int32_t shift = (IR(irl->op2)->i & shmask); | ||
| 361 | int32_t imms = shift; | ||
| 362 | if (irl->o == IR_BSHL) { | ||
| 363 | mask >>= shift; | ||
| 364 | shift = (shmask-shift+1) & shmask; | ||
| 365 | imms = 0; | ||
| 366 | } | ||
| 367 | if (mask && !((mask+1) & mask)) { /* Contiguous 1-bits at the bottom. */ | ||
| 368 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 369 | Reg left = ra_alloc1(as, irl->op1, RSET_GPR); | ||
| 370 | A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw; | ||
| 371 | imms += 63 - emit_clz64(mask); | ||
| 372 | if (imms > shmask) imms = shmask; | ||
| 373 | emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left); | ||
| 374 | return 1; | ||
| 375 | } | ||
| 376 | } | ||
| 377 | } | ||
| 378 | return 0; | ||
| 379 | } | ||
| 380 | |||
| 351 | /* -- Calls --------------------------------------------------------------- */ | 381 | /* -- Calls --------------------------------------------------------------- */ |
| 352 | 382 | ||
| 353 | /* Generate a call to a C function. */ | 383 | /* Generate a call to a C function. */ |
| @@ -1423,8 +1453,14 @@ static void asm_bitop(ASMState *as, IRIns *ir, A64Ins ai) | |||
| 1423 | } | 1453 | } |
| 1424 | } | 1454 | } |
| 1425 | 1455 | ||
| 1456 | static void asm_band(ASMState *as, IRIns *ir) | ||
| 1457 | { | ||
| 1458 | if (asm_fuseandshift(as, ir)) | ||
| 1459 | return; | ||
| 1460 | asm_bitop(as, ir, A64I_ANDw); | ||
| 1461 | } | ||
| 1462 | |||
| 1426 | #define asm_bnot(as, ir) asm_bitop(as, ir, A64I_MVNw) | 1463 | #define asm_bnot(as, ir) asm_bitop(as, ir, A64I_MVNw) |
| 1427 | #define asm_band(as, ir) asm_bitop(as, ir, A64I_ANDw) | ||
| 1428 | #define asm_bor(as, ir) asm_bitop(as, ir, A64I_ORRw) | 1464 | #define asm_bor(as, ir) asm_bitop(as, ir, A64I_ORRw) |
| 1429 | #define asm_bxor(as, ir) asm_bitop(as, ir, A64I_EORw) | 1465 | #define asm_bxor(as, ir) asm_bitop(as, ir, A64I_EORw) |
| 1430 | 1466 | ||
| @@ -1437,16 +1473,28 @@ static void asm_bswap(ASMState *as, IRIns *ir) | |||
| 1437 | 1473 | ||
| 1438 | static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh) | 1474 | static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh) |
| 1439 | { | 1475 | { |
| 1440 | int shmask = irt_is64(ir->t) ? 63 : 31; | 1476 | int32_t shmask = irt_is64(ir->t) ? 63 : 31; |
| 1441 | if (irref_isk(ir->op2)) { /* Constant shifts. */ | 1477 | if (irref_isk(ir->op2)) { /* Constant shifts. */ |
| 1442 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1478 | Reg left, dest = ra_dest(as, ir, RSET_GPR); |
| 1443 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
| 1444 | int32_t shift = (IR(ir->op2)->i & shmask); | 1479 | int32_t shift = (IR(ir->op2)->i & shmask); |
| 1445 | |||
| 1446 | if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw; | 1480 | if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw; |
| 1481 | |||
| 1482 | /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */ | ||
| 1483 | if (!neverfuse(as) && (sh == A64SH_LSR || sh == A64SH_ASR)) { | ||
| 1484 | IRIns *irl = IR(ir->op1); | ||
| 1485 | if (irl->o == IR_BSHL && irref_isk(irl->op2)) { | ||
| 1486 | int32_t shift2 = (IR(irl->op2)->i & shmask); | ||
| 1487 | shift = ((shift - shift2) & shmask); | ||
| 1488 | shmask -= shift2; | ||
| 1489 | ir = irl; | ||
| 1490 | } | ||
| 1491 | } | ||
| 1492 | |||
| 1493 | left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
| 1447 | switch (sh) { | 1494 | switch (sh) { |
| 1448 | case A64SH_LSL: | 1495 | case A64SH_LSL: |
| 1449 | emit_dn(as, ai | A64F_IMMS(shmask-shift) | A64F_IMMR(shmask-shift+1), dest, left); | 1496 | emit_dn(as, ai | A64F_IMMS(shmask-shift) | |
| 1497 | A64F_IMMR((shmask-shift+1)&shmask), dest, left); | ||
| 1450 | break; | 1498 | break; |
| 1451 | case A64SH_LSR: case A64SH_ASR: | 1499 | case A64SH_LSR: case A64SH_ASR: |
| 1452 | emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left); | 1500 | emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left); |
