aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2010-12-06 12:54:00 +0100
committerMike Pall <mike>2010-12-06 12:54:00 +0100
commit86fd2289f067536c99e677af8cae3f307810b855 (patch)
tree43462ba939c57fe695611c8070f91738c53272ee /src
parentb56b83487f51d7492bab8e78c3c26c2f708d4e3c (diff)
downloadluajit-86fd2289f067536c99e677af8cae3f307810b855.tar.gz
luajit-86fd2289f067536c99e677af8cae3f307810b855.tar.bz2
luajit-86fd2289f067536c99e677af8cae3f307810b855.zip
Add support for integer IR_MUL.
Diffstat (limited to 'src')
-rw-r--r--src/lj_asm.c32
-rw-r--r--src/lj_opt_fold.c86
-rw-r--r--src/lj_target_x86.h1
3 files changed, 106 insertions, 13 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 5d48b6be..ce42c437 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -2482,6 +2482,31 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa)
2482 ra_left(as, dest, lref); 2482 ra_left(as, dest, lref);
2483} 2483}
2484 2484
2485static void asm_intmul(ASMState *as, IRIns *ir)
2486{
2487 IRRef lref = ir->op1;
2488 IRRef rref = ir->op2;
2489 int32_t k = 0;
2490 if (asm_isk32(as, rref, &k)) {
2491 /* NYI: use lea/shl/add/sub (FOLD only does 2^k) depending on CPU. */
2492 Reg dest = ra_dest(as, ir, RSET_GPR);
2493 Reg left = asm_fuseload(as, lref, RSET_GPR);
2494 x86Op xo;
2495 if (checki8(k)) {
2496 emit_i8(as, k);
2497 xo = XO_IMULi8;
2498 } else {
2499 emit_i32(as, k);
2500 xo = XO_IMULi;
2501 }
2502 emit_rr(as, xo, REX_64IR(ir, dest), left);
2503 } else {
2504 /* NYI: integer multiply of non-constant operands. */
2505 setintV(&as->J->errinfo, ir->o);
2506 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2507 }
2508}
2509
2485/* LEA is really a 4-operand ADD with an independent destination register, 2510/* LEA is really a 4-operand ADD with an independent destination register,
2486** up to two source registers and an immediate. One register can be scaled 2511** up to two source registers and an immediate. One register can be scaled
2487** by 1, 2, 4 or 8. This can be used to avoid moves or to fuse several 2512** by 1, 2, 4 or 8. This can be used to avoid moves or to fuse several
@@ -3445,7 +3470,12 @@ static void asm_ir(ASMState *as, IRIns *ir)
3445 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ 3470 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
3446 asm_intarith(as, ir, XOg_SUB); 3471 asm_intarith(as, ir, XOg_SUB);
3447 break; 3472 break;
3448 case IR_MUL: asm_fparith(as, ir, XO_MULSD); break; 3473 case IR_MUL:
3474 if (irt_isnum(ir->t))
3475 asm_fparith(as, ir, XO_MULSD);
3476 else
3477 asm_intmul(as, ir);
3478 break;
3449 case IR_DIV: asm_fparith(as, ir, XO_DIVSD); break; 3479 case IR_DIV: asm_fparith(as, ir, XO_DIVSD); break;
3450 3480
3451 case IR_NEG: asm_fparith(as, ir, XO_XORPS); break; 3481 case IR_NEG: asm_fparith(as, ir, XO_XORPS); break;
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 22d211e1..cae4e5e4 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -197,6 +197,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
197 switch (op) { 197 switch (op) {
198 case IR_ADD: k1 += k2; break; 198 case IR_ADD: k1 += k2; break;
199 case IR_SUB: k1 -= k2; break; 199 case IR_SUB: k1 -= k2; break;
200 case IR_MUL: k1 *= k2; break;
200 case IR_BAND: k1 &= k2; break; 201 case IR_BAND: k1 &= k2; break;
201 case IR_BOR: k1 |= k2; break; 202 case IR_BOR: k1 |= k2; break;
202 case IR_BXOR: k1 ^= k2; break; 203 case IR_BXOR: k1 ^= k2; break;
@@ -212,6 +213,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
212 213
213LJFOLD(ADD KINT KINT) 214LJFOLD(ADD KINT KINT)
214LJFOLD(SUB KINT KINT) 215LJFOLD(SUB KINT KINT)
216LJFOLD(MUL KINT KINT)
215LJFOLD(BAND KINT KINT) 217LJFOLD(BAND KINT KINT)
216LJFOLD(BOR KINT KINT) 218LJFOLD(BOR KINT KINT)
217LJFOLD(BXOR KINT KINT) 219LJFOLD(BXOR KINT KINT)
@@ -680,6 +682,43 @@ LJFOLDF(simplify_intsub_k64)
680 return RETRYFOLD; 682 return RETRYFOLD;
681} 683}
682 684
685static TRef simplify_intmul_k(jit_State *J, int32_t k)
686{
687 /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
688 ** But this is mainly intended for simple address arithmetic.
689 ** Also it's easier for the backend to optimize the original multiplies.
690 */
691 if (k == 1) { /* i * 1 ==> i */
692 return LEFTFOLD;
693 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
694 fins->o = IR_BSHL;
695 fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k));
696 return RETRYFOLD;
697 }
698 return NEXTFOLD;
699}
700
701LJFOLD(MUL any KINT)
702LJFOLDF(simplify_intmul_k32)
703{
704 if (fright->i == 0) /* i * 0 ==> 0 */
705 return INTFOLD(0);
706 else if (fright->i > 0)
707 return simplify_intmul_k(J, fright->i);
708 return NEXTFOLD;
709}
710
711LJFOLD(MUL any KINT64)
712LJFOLDF(simplify_intmul_k64)
713
714{
715 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */
716 return lj_ir_kint64(J, 0);
717 else if (ir_kint64(fright)->u64 < 0x80000000u)
718 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
719 return NEXTFOLD;
720}
721
683LJFOLD(SUB any any) 722LJFOLD(SUB any any)
684LJFOLD(SUBOV any any) 723LJFOLD(SUBOV any any)
685LJFOLDF(simplify_intsub) 724LJFOLDF(simplify_intsub)
@@ -816,16 +855,17 @@ LJFOLD(BROL any KINT)
816LJFOLD(BROR any KINT) 855LJFOLD(BROR any KINT)
817LJFOLDF(simplify_shift_ik) 856LJFOLDF(simplify_shift_ik)
818{ 857{
819 int32_t k = (fright->i & 31); 858 int32_t mask = irt_is64(fins->t) ? 63 : 31;
859 int32_t k = (fright->i & mask);
820 if (k == 0) /* i o 0 ==> i */ 860 if (k == 0) /* i o 0 ==> i */
821 return LEFTFOLD; 861 return LEFTFOLD;
822 if (k != fright->i) { /* i o k ==> i o (k & 31) */ 862 if (k != fright->i) { /* i o k ==> i o (k & mask) */
823 fins->op2 = (IRRef1)lj_ir_kint(J, k); 863 fins->op2 = (IRRef1)lj_ir_kint(J, k);
824 return RETRYFOLD; 864 return RETRYFOLD;
825 } 865 }
826 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&31) */ 866 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */
827 fins->o = IR_BROL; 867 fins->o = IR_BROL;
828 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&31); 868 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask);
829 return RETRYFOLD; 869 return RETRYFOLD;
830 } 870 }
831 return NEXTFOLD; 871 return NEXTFOLD;
@@ -841,9 +881,10 @@ LJFOLDF(simplify_shift_andk)
841 IRIns *irk = IR(fright->op2); 881 IRIns *irk = IR(fright->op2);
842 PHIBARRIER(fright); 882 PHIBARRIER(fright);
843 if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && 883 if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
844 irk->o == IR_KINT) { /* i o (j & 31) ==> i o j */ 884 irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */
845 int32_t k = irk->i & 31; 885 int32_t mask = irt_is64(fins->t) ? 63 : 31;
846 if (k == 31) { 886 int32_t k = irk->i & mask;
887 if (k == mask) {
847 fins->op2 = fright->op1; 888 fins->op2 = fright->op1;
848 return RETRYFOLD; 889 return RETRYFOLD;
849 } 890 }
@@ -870,9 +911,29 @@ LJFOLDF(simplify_shift2_ki)
870 return NEXTFOLD; 911 return NEXTFOLD;
871} 912}
872 913
914LJFOLD(BSHL KINT64 any)
915LJFOLD(BSHR KINT64 any)
916LJFOLDF(simplify_shift1_ki64)
917{
918 if (ir_kint64(fleft)->u64 == 0) /* 0 o i ==> 0 */
919 return LEFTFOLD;
920 return NEXTFOLD;
921}
922
923LJFOLD(BSAR KINT64 any)
924LJFOLD(BROL KINT64 any)
925LJFOLD(BROR KINT64 any)
926LJFOLDF(simplify_shift2_ki64)
927{
928 if (ir_kint64(fleft)->u64 == 0 || (int64_t)ir_kint64(fleft)->u64 == -1)
929 return LEFTFOLD; /* 0 o i ==> 0; -1 o i ==> -1 */
930 return NEXTFOLD;
931}
932
873/* -- Reassociation ------------------------------------------------------- */ 933/* -- Reassociation ------------------------------------------------------- */
874 934
875LJFOLD(ADD ADD KINT) 935LJFOLD(ADD ADD KINT)
936LJFOLD(MUL MUL KINT)
876LJFOLD(BAND BAND KINT) 937LJFOLD(BAND BAND KINT)
877LJFOLD(BOR BOR KINT) 938LJFOLD(BOR BOR KINT)
878LJFOLD(BXOR BXOR KINT) 939LJFOLD(BXOR BXOR KINT)
@@ -924,14 +985,15 @@ LJFOLDF(reassoc_shift)
924 IRIns *irk = IR(fleft->op2); 985 IRIns *irk = IR(fleft->op2);
925 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */ 986 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
926 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */ 987 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
927 int32_t k = (irk->i & 31) + (fright->i & 31); 988 int32_t mask = irt_is64(fins->t) ? 63 : 31;
928 if (k > 31) { /* Combined shift too wide? */ 989 int32_t k = (irk->i & mask) + (fright->i & mask);
990 if (k > mask) { /* Combined shift too wide? */
929 if (fins->o == IR_BSHL || fins->o == IR_BSHR) 991 if (fins->o == IR_BSHL || fins->o == IR_BSHR)
930 return INTFOLD(0); 992 return mask == 31 ? INTFOLD(0) : lj_ir_kint64(J, 0);
931 else if (fins->o == IR_BSAR) 993 else if (fins->o == IR_BSAR)
932 k = 31; 994 k = mask;
933 else 995 else
934 k &= 31; 996 k &= mask;
935 } 997 }
936 fins->op1 = fleft->op1; 998 fins->op1 = fleft->op1;
937 fins->op2 = (IRRef1)lj_ir_kint(J, k); 999 fins->op2 = (IRRef1)lj_ir_kint(J, k);
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index 0fcee6dd..d3956a09 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -218,6 +218,7 @@ typedef enum {
218 XO_SHIFTi = XO_(c1), 218 XO_SHIFTi = XO_(c1),
219 XO_SHIFT1 = XO_(d1), 219 XO_SHIFT1 = XO_(d1),
220 XO_SHIFTcl = XO_(d3), 220 XO_SHIFTcl = XO_(d3),
221 XO_IMULi = XO_(69),
221 XO_IMULi8 = XO_(6b), 222 XO_IMULi8 = XO_(6b),
222 XO_CMP = XO_(3b), 223 XO_CMP = XO_(3b),
223 XO_TEST = XO_(85), 224 XO_TEST = XO_(85),