diff options
author | Mike Pall <mike> | 2010-12-06 12:54:00 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2010-12-06 12:54:00 +0100 |
commit | 86fd2289f067536c99e677af8cae3f307810b855 (patch) | |
tree | 43462ba939c57fe695611c8070f91738c53272ee /src | |
parent | b56b83487f51d7492bab8e78c3c26c2f708d4e3c (diff) | |
download | luajit-86fd2289f067536c99e677af8cae3f307810b855.tar.gz luajit-86fd2289f067536c99e677af8cae3f307810b855.tar.bz2 luajit-86fd2289f067536c99e677af8cae3f307810b855.zip |
Add support for integer IR_MUL.
Diffstat (limited to 'src')
-rw-r--r-- | src/lj_asm.c | 32 | ||||
-rw-r--r-- | src/lj_opt_fold.c | 86 | ||||
-rw-r--r-- | src/lj_target_x86.h | 1 |
3 files changed, 106 insertions, 13 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 5d48b6be..ce42c437 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -2482,6 +2482,31 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa) | |||
2482 | ra_left(as, dest, lref); | 2482 | ra_left(as, dest, lref); |
2483 | } | 2483 | } |
2484 | 2484 | ||
2485 | static void asm_intmul(ASMState *as, IRIns *ir) | ||
2486 | { | ||
2487 | IRRef lref = ir->op1; | ||
2488 | IRRef rref = ir->op2; | ||
2489 | int32_t k = 0; | ||
2490 | if (asm_isk32(as, rref, &k)) { | ||
2491 | /* NYI: use lea/shl/add/sub (FOLD only does 2^k) depending on CPU. */ | ||
2492 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
2493 | Reg left = asm_fuseload(as, lref, RSET_GPR); | ||
2494 | x86Op xo; | ||
2495 | if (checki8(k)) { | ||
2496 | emit_i8(as, k); | ||
2497 | xo = XO_IMULi8; | ||
2498 | } else { | ||
2499 | emit_i32(as, k); | ||
2500 | xo = XO_IMULi; | ||
2501 | } | ||
2502 | emit_rr(as, xo, REX_64IR(ir, dest), left); | ||
2503 | } else { | ||
2504 | /* NYI: integer multiply of non-constant operands. */ | ||
2505 | setintV(&as->J->errinfo, ir->o); | ||
2506 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
2507 | } | ||
2508 | } | ||
2509 | |||
2485 | /* LEA is really a 4-operand ADD with an independent destination register, | 2510 | /* LEA is really a 4-operand ADD with an independent destination register, |
2486 | ** up to two source registers and an immediate. One register can be scaled | 2511 | ** up to two source registers and an immediate. One register can be scaled |
2487 | ** by 1, 2, 4 or 8. This can be used to avoid moves or to fuse several | 2512 | ** by 1, 2, 4 or 8. This can be used to avoid moves or to fuse several |
@@ -3445,7 +3470,12 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
3445 | else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ | 3470 | else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ |
3446 | asm_intarith(as, ir, XOg_SUB); | 3471 | asm_intarith(as, ir, XOg_SUB); |
3447 | break; | 3472 | break; |
3448 | case IR_MUL: asm_fparith(as, ir, XO_MULSD); break; | 3473 | case IR_MUL: |
3474 | if (irt_isnum(ir->t)) | ||
3475 | asm_fparith(as, ir, XO_MULSD); | ||
3476 | else | ||
3477 | asm_intmul(as, ir); | ||
3478 | break; | ||
3449 | case IR_DIV: asm_fparith(as, ir, XO_DIVSD); break; | 3479 | case IR_DIV: asm_fparith(as, ir, XO_DIVSD); break; |
3450 | 3480 | ||
3451 | case IR_NEG: asm_fparith(as, ir, XO_XORPS); break; | 3481 | case IR_NEG: asm_fparith(as, ir, XO_XORPS); break; |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 22d211e1..cae4e5e4 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
@@ -197,6 +197,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op) | |||
197 | switch (op) { | 197 | switch (op) { |
198 | case IR_ADD: k1 += k2; break; | 198 | case IR_ADD: k1 += k2; break; |
199 | case IR_SUB: k1 -= k2; break; | 199 | case IR_SUB: k1 -= k2; break; |
200 | case IR_MUL: k1 *= k2; break; | ||
200 | case IR_BAND: k1 &= k2; break; | 201 | case IR_BAND: k1 &= k2; break; |
201 | case IR_BOR: k1 |= k2; break; | 202 | case IR_BOR: k1 |= k2; break; |
202 | case IR_BXOR: k1 ^= k2; break; | 203 | case IR_BXOR: k1 ^= k2; break; |
@@ -212,6 +213,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op) | |||
212 | 213 | ||
213 | LJFOLD(ADD KINT KINT) | 214 | LJFOLD(ADD KINT KINT) |
214 | LJFOLD(SUB KINT KINT) | 215 | LJFOLD(SUB KINT KINT) |
216 | LJFOLD(MUL KINT KINT) | ||
215 | LJFOLD(BAND KINT KINT) | 217 | LJFOLD(BAND KINT KINT) |
216 | LJFOLD(BOR KINT KINT) | 218 | LJFOLD(BOR KINT KINT) |
217 | LJFOLD(BXOR KINT KINT) | 219 | LJFOLD(BXOR KINT KINT) |
@@ -680,6 +682,43 @@ LJFOLDF(simplify_intsub_k64) | |||
680 | return RETRYFOLD; | 682 | return RETRYFOLD; |
681 | } | 683 | } |
682 | 684 | ||
685 | static TRef simplify_intmul_k(jit_State *J, int32_t k) | ||
686 | { | ||
687 | /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2. | ||
688 | ** But this is mainly intended for simple address arithmetic. | ||
689 | ** Also it's easier for the backend to optimize the original multiplies. | ||
690 | */ | ||
691 | if (k == 1) { /* i * 1 ==> i */ | ||
692 | return LEFTFOLD; | ||
693 | } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ | ||
694 | fins->o = IR_BSHL; | ||
695 | fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k)); | ||
696 | return RETRYFOLD; | ||
697 | } | ||
698 | return NEXTFOLD; | ||
699 | } | ||
700 | |||
701 | LJFOLD(MUL any KINT) | ||
702 | LJFOLDF(simplify_intmul_k32) | ||
703 | { | ||
704 | if (fright->i == 0) /* i * 0 ==> 0 */ | ||
705 | return INTFOLD(0); | ||
706 | else if (fright->i > 0) | ||
707 | return simplify_intmul_k(J, fright->i); | ||
708 | return NEXTFOLD; | ||
709 | } | ||
710 | |||
711 | LJFOLD(MUL any KINT64) | ||
712 | LJFOLDF(simplify_intmul_k64) | ||
713 | |||
714 | { | ||
715 | if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ | ||
716 | return lj_ir_kint64(J, 0); | ||
717 | else if (ir_kint64(fright)->u64 < 0x80000000u) | ||
718 | return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); | ||
719 | return NEXTFOLD; | ||
720 | } | ||
721 | |||
683 | LJFOLD(SUB any any) | 722 | LJFOLD(SUB any any) |
684 | LJFOLD(SUBOV any any) | 723 | LJFOLD(SUBOV any any) |
685 | LJFOLDF(simplify_intsub) | 724 | LJFOLDF(simplify_intsub) |
@@ -816,16 +855,17 @@ LJFOLD(BROL any KINT) | |||
816 | LJFOLD(BROR any KINT) | 855 | LJFOLD(BROR any KINT) |
817 | LJFOLDF(simplify_shift_ik) | 856 | LJFOLDF(simplify_shift_ik) |
818 | { | 857 | { |
819 | int32_t k = (fright->i & 31); | 858 | int32_t mask = irt_is64(fins->t) ? 63 : 31; |
859 | int32_t k = (fright->i & mask); | ||
820 | if (k == 0) /* i o 0 ==> i */ | 860 | if (k == 0) /* i o 0 ==> i */ |
821 | return LEFTFOLD; | 861 | return LEFTFOLD; |
822 | if (k != fright->i) { /* i o k ==> i o (k & 31) */ | 862 | if (k != fright->i) { /* i o k ==> i o (k & mask) */ |
823 | fins->op2 = (IRRef1)lj_ir_kint(J, k); | 863 | fins->op2 = (IRRef1)lj_ir_kint(J, k); |
824 | return RETRYFOLD; | 864 | return RETRYFOLD; |
825 | } | 865 | } |
826 | if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&31) */ | 866 | if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */ |
827 | fins->o = IR_BROL; | 867 | fins->o = IR_BROL; |
828 | fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&31); | 868 | fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask); |
829 | return RETRYFOLD; | 869 | return RETRYFOLD; |
830 | } | 870 | } |
831 | return NEXTFOLD; | 871 | return NEXTFOLD; |
@@ -841,9 +881,10 @@ LJFOLDF(simplify_shift_andk) | |||
841 | IRIns *irk = IR(fright->op2); | 881 | IRIns *irk = IR(fright->op2); |
842 | PHIBARRIER(fright); | 882 | PHIBARRIER(fright); |
843 | if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && | 883 | if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && |
844 | irk->o == IR_KINT) { /* i o (j & 31) ==> i o j */ | 884 | irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */ |
845 | int32_t k = irk->i & 31; | 885 | int32_t mask = irt_is64(fins->t) ? 63 : 31; |
846 | if (k == 31) { | 886 | int32_t k = irk->i & mask; |
887 | if (k == mask) { | ||
847 | fins->op2 = fright->op1; | 888 | fins->op2 = fright->op1; |
848 | return RETRYFOLD; | 889 | return RETRYFOLD; |
849 | } | 890 | } |
@@ -870,9 +911,29 @@ LJFOLDF(simplify_shift2_ki) | |||
870 | return NEXTFOLD; | 911 | return NEXTFOLD; |
871 | } | 912 | } |
872 | 913 | ||
914 | LJFOLD(BSHL KINT64 any) | ||
915 | LJFOLD(BSHR KINT64 any) | ||
916 | LJFOLDF(simplify_shift1_ki64) | ||
917 | { | ||
918 | if (ir_kint64(fleft)->u64 == 0) /* 0 o i ==> 0 */ | ||
919 | return LEFTFOLD; | ||
920 | return NEXTFOLD; | ||
921 | } | ||
922 | |||
923 | LJFOLD(BSAR KINT64 any) | ||
924 | LJFOLD(BROL KINT64 any) | ||
925 | LJFOLD(BROR KINT64 any) | ||
926 | LJFOLDF(simplify_shift2_ki64) | ||
927 | { | ||
928 | if (ir_kint64(fleft)->u64 == 0 || (int64_t)ir_kint64(fleft)->u64 == -1) | ||
929 | return LEFTFOLD; /* 0 o i ==> 0; -1 o i ==> -1 */ | ||
930 | return NEXTFOLD; | ||
931 | } | ||
932 | |||
873 | /* -- Reassociation ------------------------------------------------------- */ | 933 | /* -- Reassociation ------------------------------------------------------- */ |
874 | 934 | ||
875 | LJFOLD(ADD ADD KINT) | 935 | LJFOLD(ADD ADD KINT) |
936 | LJFOLD(MUL MUL KINT) | ||
876 | LJFOLD(BAND BAND KINT) | 937 | LJFOLD(BAND BAND KINT) |
877 | LJFOLD(BOR BOR KINT) | 938 | LJFOLD(BOR BOR KINT) |
878 | LJFOLD(BXOR BXOR KINT) | 939 | LJFOLD(BXOR BXOR KINT) |
@@ -924,14 +985,15 @@ LJFOLDF(reassoc_shift) | |||
924 | IRIns *irk = IR(fleft->op2); | 985 | IRIns *irk = IR(fleft->op2); |
925 | PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */ | 986 | PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */ |
926 | if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */ | 987 | if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */ |
927 | int32_t k = (irk->i & 31) + (fright->i & 31); | 988 | int32_t mask = irt_is64(fins->t) ? 63 : 31; |
928 | if (k > 31) { /* Combined shift too wide? */ | 989 | int32_t k = (irk->i & mask) + (fright->i & mask); |
990 | if (k > mask) { /* Combined shift too wide? */ | ||
929 | if (fins->o == IR_BSHL || fins->o == IR_BSHR) | 991 | if (fins->o == IR_BSHL || fins->o == IR_BSHR) |
930 | return INTFOLD(0); | 992 | return mask == 31 ? INTFOLD(0) : lj_ir_kint64(J, 0); |
931 | else if (fins->o == IR_BSAR) | 993 | else if (fins->o == IR_BSAR) |
932 | k = 31; | 994 | k = mask; |
933 | else | 995 | else |
934 | k &= 31; | 996 | k &= mask; |
935 | } | 997 | } |
936 | fins->op1 = fleft->op1; | 998 | fins->op1 = fleft->op1; |
937 | fins->op2 = (IRRef1)lj_ir_kint(J, k); | 999 | fins->op2 = (IRRef1)lj_ir_kint(J, k); |
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 0fcee6dd..d3956a09 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
@@ -218,6 +218,7 @@ typedef enum { | |||
218 | XO_SHIFTi = XO_(c1), | 218 | XO_SHIFTi = XO_(c1), |
219 | XO_SHIFT1 = XO_(d1), | 219 | XO_SHIFT1 = XO_(d1), |
220 | XO_SHIFTcl = XO_(d3), | 220 | XO_SHIFTcl = XO_(d3), |
221 | XO_IMULi = XO_(69), | ||
221 | XO_IMULi8 = XO_(6b), | 222 | XO_IMULi8 = XO_(6b), |
222 | XO_CMP = XO_(3b), | 223 | XO_CMP = XO_(3b), |
223 | XO_TEST = XO_(85), | 224 | XO_TEST = XO_(85), |