diff options
author | Mike Pall <mike> | 2010-03-15 17:02:53 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2010-03-15 17:02:53 +0100 |
commit | c4727220e889dccf06427fd6e473741c0829e344 (patch) | |
tree | cb52697fe78bec674692a3d925e20edc7da956f8 /src | |
parent | 24402ede0421876b3e681b153c5355e206376799 (diff) | |
download | luajit-c4727220e889dccf06427fd6e473741c0829e344.tar.gz luajit-c4727220e889dccf06427fd6e473741c0829e344.tar.bz2 luajit-c4727220e889dccf06427fd6e473741c0829e344.zip |
Add array bounds check elimination (-Oabc, on by default).
Diffstat (limited to 'src')
-rw-r--r-- | src/lj_asm.c | 2 | ||||
-rw-r--r-- | src/lj_jit.h | 10 | ||||
-rw-r--r-- | src/lj_opt_fold.c | 67 | ||||
-rw-r--r-- | src/lj_record.c | 40 |
4 files changed, 99 insertions, 20 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index c749ada0..9e8f1fc0 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -2571,7 +2571,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2571 | } else { | 2571 | } else { |
2572 | IRRef lref = ir->op1, rref = ir->op2; | 2572 | IRRef lref = ir->op1, rref = ir->op2; |
2573 | IROp leftop = (IROp)(IR(lref)->o); | 2573 | IROp leftop = (IROp)(IR(lref)->o); |
2574 | lua_assert(irt_isint(ir->t) || (irt_isaddr(ir->t) && (cc & 0xe) == CC_E)); | 2574 | lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); |
2575 | /* Swap constants (only for ABC) and fusable loads to the right. */ | 2575 | /* Swap constants (only for ABC) and fusable loads to the right. */ |
2576 | if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { | 2576 | if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { |
2577 | if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */ | 2577 | if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */ |
diff --git a/src/lj_jit.h b/src/lj_jit.h index bd7c7577..76d7942b 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
@@ -30,7 +30,7 @@ | |||
30 | #endif | 30 | #endif |
31 | 31 | ||
32 | /* Optimization flags. */ | 32 | /* Optimization flags. */ |
33 | #define JIT_F_OPT_MASK 0x00ff0000 | 33 | #define JIT_F_OPT_MASK 0x0fff0000 |
34 | 34 | ||
35 | #define JIT_F_OPT_FOLD 0x00010000 | 35 | #define JIT_F_OPT_FOLD 0x00010000 |
36 | #define JIT_F_OPT_CSE 0x00020000 | 36 | #define JIT_F_OPT_CSE 0x00020000 |
@@ -39,18 +39,20 @@ | |||
39 | #define JIT_F_OPT_DSE 0x00100000 | 39 | #define JIT_F_OPT_DSE 0x00100000 |
40 | #define JIT_F_OPT_NARROW 0x00200000 | 40 | #define JIT_F_OPT_NARROW 0x00200000 |
41 | #define JIT_F_OPT_LOOP 0x00400000 | 41 | #define JIT_F_OPT_LOOP 0x00400000 |
42 | #define JIT_F_OPT_FUSE 0x00800000 | 42 | #define JIT_F_OPT_ABC 0x00800000 |
43 | #define JIT_F_OPT_FUSE 0x01000000 | ||
43 | 44 | ||
44 | /* Optimizations names for -O. Must match the order above. */ | 45 | /* Optimizations names for -O. Must match the order above. */ |
45 | #define JIT_F_OPT_FIRST JIT_F_OPT_FOLD | 46 | #define JIT_F_OPT_FIRST JIT_F_OPT_FOLD |
46 | #define JIT_F_OPTSTRING \ | 47 | #define JIT_F_OPTSTRING \ |
47 | "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\4fuse" | 48 | "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4fuse" |
48 | 49 | ||
49 | /* Optimization levels set a fixed combination of flags. */ | 50 | /* Optimization levels set a fixed combination of flags. */ |
50 | #define JIT_F_OPT_0 0 | 51 | #define JIT_F_OPT_0 0 |
51 | #define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE) | 52 | #define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE) |
52 | #define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP) | 53 | #define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP) |
53 | #define JIT_F_OPT_3 (JIT_F_OPT_2|JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_FUSE) | 54 | #define JIT_F_OPT_3 \ |
55 | (JIT_F_OPT_2|JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_FUSE) | ||
54 | #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 | 56 | #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 |
55 | 57 | ||
56 | #if defined(LUA_USE_WIN) || LJ_64 | 58 | #if defined(LUA_USE_WIN) || LJ_64 |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index a85b49bc..5eeffae3 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
@@ -1,5 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation. | 2 | ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation. |
3 | ** ABCelim: Array Bounds Check Elimination. | ||
3 | ** CSE: Common-Subexpression Elimination. | 4 | ** CSE: Common-Subexpression Elimination. |
4 | ** Copyright (C) 2005-2010 Mike Pall. See Copyright Notice in luajit.h | 5 | ** Copyright (C) 2005-2010 Mike Pall. See Copyright Notice in luajit.h |
5 | */ | 6 | */ |
@@ -949,31 +950,69 @@ LJFOLDF(reassoc_minmax_right) | |||
949 | return NEXTFOLD; | 950 | return NEXTFOLD; |
950 | } | 951 | } |
951 | 952 | ||
953 | /* -- Array bounds check elimination -------------------------------------- */ | ||
954 | |||
952 | /* Eliminate ABC across PHIs to handle t[i-1] forwarding case. | 955 | /* Eliminate ABC across PHIs to handle t[i-1] forwarding case. |
953 | ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists. | 956 | ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists. |
954 | ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig. | 957 | ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig. |
955 | */ | 958 | */ |
956 | LJFOLD(ABC any ADD) | 959 | LJFOLD(ABC any ADD) |
957 | LJFOLDF(reassoc_abc) | 960 | LJFOLDF(abc_fwd) |
958 | { | 961 | { |
959 | if (irref_isk(fright->op2)) { | 962 | if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) { |
960 | IRIns *add2 = IR(fright->op1); | 963 | if (irref_isk(fright->op2)) { |
961 | if (add2->o == IR_ADD && irref_isk(add2->op2) && | 964 | IRIns *add2 = IR(fright->op1); |
962 | IR(fright->op2)->i == -IR(add2->op2)->i) { | 965 | if (add2->o == IR_ADD && irref_isk(add2->op2) && |
963 | IRRef ref = J->chain[IR_ABC]; | 966 | IR(fright->op2)->i == -IR(add2->op2)->i) { |
964 | IRRef lim = add2->op1; | 967 | IRRef ref = J->chain[IR_ABC]; |
965 | if (fins->op1 > lim) lim = fins->op1; | 968 | IRRef lim = add2->op1; |
966 | while (ref > lim) { | 969 | if (fins->op1 > lim) lim = fins->op1; |
967 | IRIns *ir = IR(ref); | 970 | while (ref > lim) { |
968 | if (ir->op1 == fins->op1 && ir->op2 == add2->op1) | 971 | IRIns *ir = IR(ref); |
969 | return DROPFOLD; | 972 | if (ir->op1 == fins->op1 && ir->op2 == add2->op1) |
970 | ref = ir->prev; | 973 | return DROPFOLD; |
974 | ref = ir->prev; | ||
975 | } | ||
971 | } | 976 | } |
972 | } | 977 | } |
973 | } | 978 | } |
974 | return NEXTFOLD; | 979 | return NEXTFOLD; |
975 | } | 980 | } |
976 | 981 | ||
982 | /* Eliminate ABC for constants. | ||
983 | ** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2)) | ||
984 | ** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2. | ||
985 | */ | ||
986 | LJFOLD(ABC any KINT) | ||
987 | LJFOLDF(abc_k) | ||
988 | { | ||
989 | if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) { | ||
990 | IRRef ref = J->chain[IR_ABC]; | ||
991 | IRRef asize = fins->op1; | ||
992 | while (ref > asize) { | ||
993 | IRIns *ir = IR(ref); | ||
994 | if (ir->op1 == asize && irref_isk(ir->op2)) { | ||
995 | int32_t k = IR(ir->op2)->i; | ||
996 | if (fright->i > k) | ||
997 | ir->op2 = fins->op2; | ||
998 | return DROPFOLD; | ||
999 | } | ||
1000 | ref = ir->prev; | ||
1001 | } | ||
1002 | return EMITFOLD; /* Already performed CSE. */ | ||
1003 | } | ||
1004 | return NEXTFOLD; | ||
1005 | } | ||
1006 | |||
1007 | /* Eliminate invariant ABC inside loop. */ | ||
1008 | LJFOLD(ABC any any) | ||
1009 | LJFOLDF(abc_invar) | ||
1010 | { | ||
1011 | if (!irt_isint(fins->t) && J->chain[IR_LOOP]) /* Currently marked as PTR. */ | ||
1012 | return DROPFOLD; | ||
1013 | return NEXTFOLD; | ||
1014 | } | ||
1015 | |||
977 | /* -- Commutativity ------------------------------------------------------- */ | 1016 | /* -- Commutativity ------------------------------------------------------- */ |
978 | 1017 | ||
979 | /* The refs of commutative ops are canonicalized. Lower refs go to the right. | 1018 | /* The refs of commutative ops are canonicalized. Lower refs go to the right. |
diff --git a/src/lj_record.c b/src/lj_record.c index 0b0768d6..33272316 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
@@ -806,6 +806,44 @@ static void rec_mm_comp(jit_State *J, RecordIndex *ix, int op) | |||
806 | 806 | ||
807 | /* -- Indexed access ------------------------------------------------------ */ | 807 | /* -- Indexed access ------------------------------------------------------ */ |
808 | 808 | ||
809 | /* Record bounds-check. */ | ||
810 | static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) | ||
811 | { | ||
812 | /* Try to emit invariant bounds checks. */ | ||
813 | if ((J->flags & (JIT_F_OPT_LOOP|JIT_F_OPT_ABC)) == | ||
814 | (JIT_F_OPT_LOOP|JIT_F_OPT_ABC)) { | ||
815 | IRRef ref = tref_ref(ikey); | ||
816 | IRIns *ir = IR(ref); | ||
817 | int32_t ofs = 0; | ||
818 | IRRef ofsref = 0; | ||
819 | /* Handle constant offsets. */ | ||
820 | if (ir->o == IR_ADD && irref_isk(ir->op2)) { | ||
821 | ofsref = ir->op2; | ||
822 | ofs = IR(ofsref)->i; | ||
823 | ref = ir->op1; | ||
824 | ir = IR(ref); | ||
825 | } | ||
826 | /* Got scalar evolution analysis results for this reference? */ | ||
827 | if (ref == J->scev.idx) { | ||
828 | int32_t stop; | ||
829 | lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); | ||
830 | stop = lj_num2int(numV(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP])); | ||
831 | /* Runtime value for stop of loop is within bounds? */ | ||
832 | if ((int64_t)stop + ofs < (int64_t)asize) { | ||
833 | /* Emit invariant bounds check for stop. */ | ||
834 | emitir(IRTG(IR_ABC, IRT_PTR), asizeref, ofs == 0 ? J->scev.stop : | ||
835 | emitir(IRTI(IR_ADD), J->scev.stop, ofsref)); | ||
836 | /* Emit invariant bounds check for start, if not const or negative. */ | ||
837 | if (!(J->scev.dir && J->scev.start && | ||
838 | (int64_t)IR(J->scev.start)->i + ofs >= 0)) | ||
839 | emitir(IRTG(IR_ABC, IRT_PTR), asizeref, ikey); | ||
840 | return; | ||
841 | } | ||
842 | } | ||
843 | } | ||
844 | emitir(IRTGI(IR_ABC), asizeref, ikey); /* Emit regular bounds check. */ | ||
845 | } | ||
846 | |||
809 | /* Record indexed key lookup. */ | 847 | /* Record indexed key lookup. */ |
810 | static TRef rec_idx_key(jit_State *J, RecordIndex *ix) | 848 | static TRef rec_idx_key(jit_State *J, RecordIndex *ix) |
811 | { | 849 | { |
@@ -827,7 +865,7 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix) | |||
827 | asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); | 865 | asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); |
828 | if ((MSize)k < t->asize) { /* Currently an array key? */ | 866 | if ((MSize)k < t->asize) { /* Currently an array key? */ |
829 | TRef arrayref; | 867 | TRef arrayref; |
830 | emitir(IRTGI(IR_ABC), asizeref, ikey); /* Bounds check. */ | 868 | rec_idx_abc(J, asizeref, ikey, t->asize); |
831 | arrayref = emitir(IRT(IR_FLOAD, IRT_PTR), ix->tab, IRFL_TAB_ARRAY); | 869 | arrayref = emitir(IRT(IR_FLOAD, IRT_PTR), ix->tab, IRFL_TAB_ARRAY); |
832 | return emitir(IRT(IR_AREF, IRT_PTR), arrayref, ikey); | 870 | return emitir(IRT(IR_AREF, IRT_PTR), arrayref, ikey); |
833 | } else { /* Currently not in array (may be an array extension)? */ | 871 | } else { /* Currently not in array (may be an array extension)? */ |