diff options
author | Mike Pall <mike> | 2016-12-08 04:09:29 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2016-12-08 04:09:29 +0100 |
commit | 3975b6c9f4c59e2913e36f62a99653754fd33fe1 (patch) | |
tree | 119aecb7d43904309b6b81023cdc92af284c7d29 | |
parent | 2772cbc36e13200d5b028585abf506a5d19daaba (diff) | |
download | luajit-3975b6c9f4c59e2913e36f62a99653754fd33fe1.tar.gz luajit-3975b6c9f4c59e2913e36f62a99653754fd33fe1.tar.bz2 luajit-3975b6c9f4c59e2913e36f62a99653754fd33fe1.zip |
ARM64: Fuse various BAND/BSHL/BSHR/BSAR combinations.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
-rw-r--r-- | src/lj_asm_arm64.h | 60 |
1 files changed, 54 insertions, 6 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 25016f4a..d14f0224 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h | |||
@@ -348,6 +348,36 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) | |||
348 | return 0; | 348 | return 0; |
349 | } | 349 | } |
350 | 350 | ||
351 | /* Fuse BAND + BSHL/BSHR into UBFM. */ | ||
352 | static int asm_fuseandshift(ASMState *as, IRIns *ir) | ||
353 | { | ||
354 | lua_assert(ir->o == IR_BAND); | ||
355 | if (!neverfuse(as) && irref_isk(ir->op2)) { | ||
356 | uint64_t mask = get_k64val(IR(ir->op2)); | ||
357 | IRIns *irl = IR(ir->op1); | ||
358 | if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) { | ||
359 | int32_t shmask = irt_is64(irl->t) ? 63 : 31; | ||
360 | int32_t shift = (IR(irl->op2)->i & shmask); | ||
361 | int32_t imms = shift; | ||
362 | if (irl->o == IR_BSHL) { | ||
363 | mask >>= shift; | ||
364 | shift = (shmask-shift+1) & shmask; | ||
365 | imms = 0; | ||
366 | } | ||
367 | if (mask && !((mask+1) & mask)) { /* Contiguous 1-bits at the bottom. */ | ||
368 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
369 | Reg left = ra_alloc1(as, irl->op1, RSET_GPR); | ||
370 | A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw; | ||
371 | imms += 63 - emit_clz64(mask); | ||
372 | if (imms > shmask) imms = shmask; | ||
373 | emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left); | ||
374 | return 1; | ||
375 | } | ||
376 | } | ||
377 | } | ||
378 | return 0; | ||
379 | } | ||
380 | |||
351 | /* -- Calls --------------------------------------------------------------- */ | 381 | /* -- Calls --------------------------------------------------------------- */ |
352 | 382 | ||
353 | /* Generate a call to a C function. */ | 383 | /* Generate a call to a C function. */ |
@@ -1423,8 +1453,14 @@ static void asm_bitop(ASMState *as, IRIns *ir, A64Ins ai) | |||
1423 | } | 1453 | } |
1424 | } | 1454 | } |
1425 | 1455 | ||
1456 | static void asm_band(ASMState *as, IRIns *ir) | ||
1457 | { | ||
1458 | if (asm_fuseandshift(as, ir)) | ||
1459 | return; | ||
1460 | asm_bitop(as, ir, A64I_ANDw); | ||
1461 | } | ||
1462 | |||
1426 | #define asm_bnot(as, ir) asm_bitop(as, ir, A64I_MVNw) | 1463 | #define asm_bnot(as, ir) asm_bitop(as, ir, A64I_MVNw) |
1427 | #define asm_band(as, ir) asm_bitop(as, ir, A64I_ANDw) | ||
1428 | #define asm_bor(as, ir) asm_bitop(as, ir, A64I_ORRw) | 1464 | #define asm_bor(as, ir) asm_bitop(as, ir, A64I_ORRw) |
1429 | #define asm_bxor(as, ir) asm_bitop(as, ir, A64I_EORw) | 1465 | #define asm_bxor(as, ir) asm_bitop(as, ir, A64I_EORw) |
1430 | 1466 | ||
@@ -1437,16 +1473,28 @@ static void asm_bswap(ASMState *as, IRIns *ir) | |||
1437 | 1473 | ||
1438 | static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh) | 1474 | static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh) |
1439 | { | 1475 | { |
1440 | int shmask = irt_is64(ir->t) ? 63 : 31; | 1476 | int32_t shmask = irt_is64(ir->t) ? 63 : 31; |
1441 | if (irref_isk(ir->op2)) { /* Constant shifts. */ | 1477 | if (irref_isk(ir->op2)) { /* Constant shifts. */ |
1442 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1478 | Reg left, dest = ra_dest(as, ir, RSET_GPR); |
1443 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1444 | int32_t shift = (IR(ir->op2)->i & shmask); | 1479 | int32_t shift = (IR(ir->op2)->i & shmask); |
1445 | |||
1446 | if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw; | 1480 | if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw; |
1481 | |||
1482 | /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */ | ||
1483 | if (!neverfuse(as) && (sh == A64SH_LSR || sh == A64SH_ASR)) { | ||
1484 | IRIns *irl = IR(ir->op1); | ||
1485 | if (irl->o == IR_BSHL && irref_isk(irl->op2)) { | ||
1486 | int32_t shift2 = (IR(irl->op2)->i & shmask); | ||
1487 | shift = ((shift - shift2) & shmask); | ||
1488 | shmask -= shift2; | ||
1489 | ir = irl; | ||
1490 | } | ||
1491 | } | ||
1492 | |||
1493 | left = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1447 | switch (sh) { | 1494 | switch (sh) { |
1448 | case A64SH_LSL: | 1495 | case A64SH_LSL: |
1449 | emit_dn(as, ai | A64F_IMMS(shmask-shift) | A64F_IMMR(shmask-shift+1), dest, left); | 1496 | emit_dn(as, ai | A64F_IMMS(shmask-shift) | |
1497 | A64F_IMMR((shmask-shift+1)&shmask), dest, left); | ||
1450 | break; | 1498 | break; |
1451 | case A64SH_LSR: case A64SH_ASR: | 1499 | case A64SH_LSR: case A64SH_ASR: |
1452 | emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left); | 1500 | emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left); |