diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/lj_asm.c | 32 | ||||
| -rw-r--r-- | src/lj_opt_fold.c | 86 | ||||
| -rw-r--r-- | src/lj_target_x86.h | 1 |
3 files changed, 106 insertions, 13 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 5d48b6be..ce42c437 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -2482,6 +2482,31 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa) | |||
| 2482 | ra_left(as, dest, lref); | 2482 | ra_left(as, dest, lref); |
| 2483 | } | 2483 | } |
| 2484 | 2484 | ||
| 2485 | static void asm_intmul(ASMState *as, IRIns *ir) | ||
| 2486 | { | ||
| 2487 | IRRef lref = ir->op1; | ||
| 2488 | IRRef rref = ir->op2; | ||
| 2489 | int32_t k = 0; | ||
| 2490 | if (asm_isk32(as, rref, &k)) { | ||
| 2491 | /* NYI: use lea/shl/add/sub (FOLD only does 2^k) depending on CPU. */ | ||
| 2492 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 2493 | Reg left = asm_fuseload(as, lref, RSET_GPR); | ||
| 2494 | x86Op xo; | ||
| 2495 | if (checki8(k)) { | ||
| 2496 | emit_i8(as, k); | ||
| 2497 | xo = XO_IMULi8; | ||
| 2498 | } else { | ||
| 2499 | emit_i32(as, k); | ||
| 2500 | xo = XO_IMULi; | ||
| 2501 | } | ||
| 2502 | emit_rr(as, xo, REX_64IR(ir, dest), left); | ||
| 2503 | } else { | ||
| 2504 | /* NYI: integer multiply of non-constant operands. */ | ||
| 2505 | setintV(&as->J->errinfo, ir->o); | ||
| 2506 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
| 2507 | } | ||
| 2508 | } | ||
| 2509 | |||
| 2485 | /* LEA is really a 4-operand ADD with an independent destination register, | 2510 | /* LEA is really a 4-operand ADD with an independent destination register, |
| 2486 | ** up to two source registers and an immediate. One register can be scaled | 2511 | ** up to two source registers and an immediate. One register can be scaled |
| 2487 | ** by 1, 2, 4 or 8. This can be used to avoid moves or to fuse several | 2512 | ** by 1, 2, 4 or 8. This can be used to avoid moves or to fuse several |
| @@ -3445,7 +3470,12 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
| 3445 | else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ | 3470 | else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ |
| 3446 | asm_intarith(as, ir, XOg_SUB); | 3471 | asm_intarith(as, ir, XOg_SUB); |
| 3447 | break; | 3472 | break; |
| 3448 | case IR_MUL: asm_fparith(as, ir, XO_MULSD); break; | 3473 | case IR_MUL: |
| 3474 | if (irt_isnum(ir->t)) | ||
| 3475 | asm_fparith(as, ir, XO_MULSD); | ||
| 3476 | else | ||
| 3477 | asm_intmul(as, ir); | ||
| 3478 | break; | ||
| 3449 | case IR_DIV: asm_fparith(as, ir, XO_DIVSD); break; | 3479 | case IR_DIV: asm_fparith(as, ir, XO_DIVSD); break; |
| 3450 | 3480 | ||
| 3451 | case IR_NEG: asm_fparith(as, ir, XO_XORPS); break; | 3481 | case IR_NEG: asm_fparith(as, ir, XO_XORPS); break; |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 22d211e1..cae4e5e4 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
| @@ -197,6 +197,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op) | |||
| 197 | switch (op) { | 197 | switch (op) { |
| 198 | case IR_ADD: k1 += k2; break; | 198 | case IR_ADD: k1 += k2; break; |
| 199 | case IR_SUB: k1 -= k2; break; | 199 | case IR_SUB: k1 -= k2; break; |
| 200 | case IR_MUL: k1 *= k2; break; | ||
| 200 | case IR_BAND: k1 &= k2; break; | 201 | case IR_BAND: k1 &= k2; break; |
| 201 | case IR_BOR: k1 |= k2; break; | 202 | case IR_BOR: k1 |= k2; break; |
| 202 | case IR_BXOR: k1 ^= k2; break; | 203 | case IR_BXOR: k1 ^= k2; break; |
| @@ -212,6 +213,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op) | |||
| 212 | 213 | ||
| 213 | LJFOLD(ADD KINT KINT) | 214 | LJFOLD(ADD KINT KINT) |
| 214 | LJFOLD(SUB KINT KINT) | 215 | LJFOLD(SUB KINT KINT) |
| 216 | LJFOLD(MUL KINT KINT) | ||
| 215 | LJFOLD(BAND KINT KINT) | 217 | LJFOLD(BAND KINT KINT) |
| 216 | LJFOLD(BOR KINT KINT) | 218 | LJFOLD(BOR KINT KINT) |
| 217 | LJFOLD(BXOR KINT KINT) | 219 | LJFOLD(BXOR KINT KINT) |
| @@ -680,6 +682,43 @@ LJFOLDF(simplify_intsub_k64) | |||
| 680 | return RETRYFOLD; | 682 | return RETRYFOLD; |
| 681 | } | 683 | } |
| 682 | 684 | ||
| 685 | static TRef simplify_intmul_k(jit_State *J, int32_t k) | ||
| 686 | { | ||
| 687 | /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2. | ||
| 688 | ** But this is mainly intended for simple address arithmetic. | ||
| 689 | ** Also it's easier for the backend to optimize the original multiplies. | ||
| 690 | */ | ||
| 691 | if (k == 1) { /* i * 1 ==> i */ | ||
| 692 | return LEFTFOLD; | ||
| 693 | } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ | ||
| 694 | fins->o = IR_BSHL; | ||
| 695 | fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k)); | ||
| 696 | return RETRYFOLD; | ||
| 697 | } | ||
| 698 | return NEXTFOLD; | ||
| 699 | } | ||
| 700 | |||
| 701 | LJFOLD(MUL any KINT) | ||
| 702 | LJFOLDF(simplify_intmul_k32) | ||
| 703 | { | ||
| 704 | if (fright->i == 0) /* i * 0 ==> 0 */ | ||
| 705 | return INTFOLD(0); | ||
| 706 | else if (fright->i > 0) | ||
| 707 | return simplify_intmul_k(J, fright->i); | ||
| 708 | return NEXTFOLD; | ||
| 709 | } | ||
| 710 | |||
| 711 | LJFOLD(MUL any KINT64) | ||
| 712 | LJFOLDF(simplify_intmul_k64) | ||
| 713 | |||
| 714 | { | ||
| 715 | if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ | ||
| 716 | return lj_ir_kint64(J, 0); | ||
| 717 | else if (ir_kint64(fright)->u64 < 0x80000000u) | ||
| 718 | return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); | ||
| 719 | return NEXTFOLD; | ||
| 720 | } | ||
| 721 | |||
| 683 | LJFOLD(SUB any any) | 722 | LJFOLD(SUB any any) |
| 684 | LJFOLD(SUBOV any any) | 723 | LJFOLD(SUBOV any any) |
| 685 | LJFOLDF(simplify_intsub) | 724 | LJFOLDF(simplify_intsub) |
| @@ -816,16 +855,17 @@ LJFOLD(BROL any KINT) | |||
| 816 | LJFOLD(BROR any KINT) | 855 | LJFOLD(BROR any KINT) |
| 817 | LJFOLDF(simplify_shift_ik) | 856 | LJFOLDF(simplify_shift_ik) |
| 818 | { | 857 | { |
| 819 | int32_t k = (fright->i & 31); | 858 | int32_t mask = irt_is64(fins->t) ? 63 : 31; |
| 859 | int32_t k = (fright->i & mask); | ||
| 820 | if (k == 0) /* i o 0 ==> i */ | 860 | if (k == 0) /* i o 0 ==> i */ |
| 821 | return LEFTFOLD; | 861 | return LEFTFOLD; |
| 822 | if (k != fright->i) { /* i o k ==> i o (k & 31) */ | 862 | if (k != fright->i) { /* i o k ==> i o (k & mask) */ |
| 823 | fins->op2 = (IRRef1)lj_ir_kint(J, k); | 863 | fins->op2 = (IRRef1)lj_ir_kint(J, k); |
| 824 | return RETRYFOLD; | 864 | return RETRYFOLD; |
| 825 | } | 865 | } |
| 826 | if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&31) */ | 866 | if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */ |
| 827 | fins->o = IR_BROL; | 867 | fins->o = IR_BROL; |
| 828 | fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&31); | 868 | fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask); |
| 829 | return RETRYFOLD; | 869 | return RETRYFOLD; |
| 830 | } | 870 | } |
| 831 | return NEXTFOLD; | 871 | return NEXTFOLD; |
| @@ -841,9 +881,10 @@ LJFOLDF(simplify_shift_andk) | |||
| 841 | IRIns *irk = IR(fright->op2); | 881 | IRIns *irk = IR(fright->op2); |
| 842 | PHIBARRIER(fright); | 882 | PHIBARRIER(fright); |
| 843 | if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && | 883 | if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && |
| 844 | irk->o == IR_KINT) { /* i o (j & 31) ==> i o j */ | 884 | irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */ |
| 845 | int32_t k = irk->i & 31; | 885 | int32_t mask = irt_is64(fins->t) ? 63 : 31; |
| 846 | if (k == 31) { | 886 | int32_t k = irk->i & mask; |
| 887 | if (k == mask) { | ||
| 847 | fins->op2 = fright->op1; | 888 | fins->op2 = fright->op1; |
| 848 | return RETRYFOLD; | 889 | return RETRYFOLD; |
| 849 | } | 890 | } |
| @@ -870,9 +911,29 @@ LJFOLDF(simplify_shift2_ki) | |||
| 870 | return NEXTFOLD; | 911 | return NEXTFOLD; |
| 871 | } | 912 | } |
| 872 | 913 | ||
| 914 | LJFOLD(BSHL KINT64 any) | ||
| 915 | LJFOLD(BSHR KINT64 any) | ||
| 916 | LJFOLDF(simplify_shift1_ki64) | ||
| 917 | { | ||
| 918 | if (ir_kint64(fleft)->u64 == 0) /* 0 o i ==> 0 */ | ||
| 919 | return LEFTFOLD; | ||
| 920 | return NEXTFOLD; | ||
| 921 | } | ||
| 922 | |||
| 923 | LJFOLD(BSAR KINT64 any) | ||
| 924 | LJFOLD(BROL KINT64 any) | ||
| 925 | LJFOLD(BROR KINT64 any) | ||
| 926 | LJFOLDF(simplify_shift2_ki64) | ||
| 927 | { | ||
| 928 | if (ir_kint64(fleft)->u64 == 0 || (int64_t)ir_kint64(fleft)->u64 == -1) | ||
| 929 | return LEFTFOLD; /* 0 o i ==> 0; -1 o i ==> -1 */ | ||
| 930 | return NEXTFOLD; | ||
| 931 | } | ||
| 932 | |||
| 873 | /* -- Reassociation ------------------------------------------------------- */ | 933 | /* -- Reassociation ------------------------------------------------------- */ |
| 874 | 934 | ||
| 875 | LJFOLD(ADD ADD KINT) | 935 | LJFOLD(ADD ADD KINT) |
| 936 | LJFOLD(MUL MUL KINT) | ||
| 876 | LJFOLD(BAND BAND KINT) | 937 | LJFOLD(BAND BAND KINT) |
| 877 | LJFOLD(BOR BOR KINT) | 938 | LJFOLD(BOR BOR KINT) |
| 878 | LJFOLD(BXOR BXOR KINT) | 939 | LJFOLD(BXOR BXOR KINT) |
| @@ -924,14 +985,15 @@ LJFOLDF(reassoc_shift) | |||
| 924 | IRIns *irk = IR(fleft->op2); | 985 | IRIns *irk = IR(fleft->op2); |
| 925 | PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */ | 986 | PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */ |
| 926 | if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */ | 987 | if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */ |
| 927 | int32_t k = (irk->i & 31) + (fright->i & 31); | 988 | int32_t mask = irt_is64(fins->t) ? 63 : 31; |
| 928 | if (k > 31) { /* Combined shift too wide? */ | 989 | int32_t k = (irk->i & mask) + (fright->i & mask); |
| 990 | if (k > mask) { /* Combined shift too wide? */ | ||
| 929 | if (fins->o == IR_BSHL || fins->o == IR_BSHR) | 991 | if (fins->o == IR_BSHL || fins->o == IR_BSHR) |
| 930 | return INTFOLD(0); | 992 | return mask == 31 ? INTFOLD(0) : lj_ir_kint64(J, 0); |
| 931 | else if (fins->o == IR_BSAR) | 993 | else if (fins->o == IR_BSAR) |
| 932 | k = 31; | 994 | k = mask; |
| 933 | else | 995 | else |
| 934 | k &= 31; | 996 | k &= mask; |
| 935 | } | 997 | } |
| 936 | fins->op1 = fleft->op1; | 998 | fins->op1 = fleft->op1; |
| 937 | fins->op2 = (IRRef1)lj_ir_kint(J, k); | 999 | fins->op2 = (IRRef1)lj_ir_kint(J, k); |
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 0fcee6dd..d3956a09 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
| @@ -218,6 +218,7 @@ typedef enum { | |||
| 218 | XO_SHIFTi = XO_(c1), | 218 | XO_SHIFTi = XO_(c1), |
| 219 | XO_SHIFT1 = XO_(d1), | 219 | XO_SHIFT1 = XO_(d1), |
| 220 | XO_SHIFTcl = XO_(d3), | 220 | XO_SHIFTcl = XO_(d3), |
| 221 | XO_IMULi = XO_(69), | ||
| 221 | XO_IMULi8 = XO_(6b), | 222 | XO_IMULi8 = XO_(6b), |
| 222 | XO_CMP = XO_(3b), | 223 | XO_CMP = XO_(3b), |
| 223 | XO_TEST = XO_(85), | 224 | XO_TEST = XO_(85), |
