diff options
| author | Mike Pall <mike> | 2010-03-15 17:02:53 +0100 |
|---|---|---|
| committer | Mike Pall <mike> | 2010-03-15 17:02:53 +0100 |
| commit | c4727220e889dccf06427fd6e473741c0829e344 (patch) | |
| tree | cb52697fe78bec674692a3d925e20edc7da956f8 /src | |
| parent | 24402ede0421876b3e681b153c5355e206376799 (diff) | |
| download | luajit-c4727220e889dccf06427fd6e473741c0829e344.tar.gz luajit-c4727220e889dccf06427fd6e473741c0829e344.tar.bz2 luajit-c4727220e889dccf06427fd6e473741c0829e344.zip | |
Add array bounds check elimination (-Oabc, on by default).
Diffstat (limited to 'src')
| -rw-r--r-- | src/lj_asm.c | 2 | ||||
| -rw-r--r-- | src/lj_jit.h | 10 | ||||
| -rw-r--r-- | src/lj_opt_fold.c | 67 | ||||
| -rw-r--r-- | src/lj_record.c | 40 |
4 files changed, 99 insertions, 20 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index c749ada0..9e8f1fc0 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -2571,7 +2571,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
| 2571 | } else { | 2571 | } else { |
| 2572 | IRRef lref = ir->op1, rref = ir->op2; | 2572 | IRRef lref = ir->op1, rref = ir->op2; |
| 2573 | IROp leftop = (IROp)(IR(lref)->o); | 2573 | IROp leftop = (IROp)(IR(lref)->o); |
| 2574 | lua_assert(irt_isint(ir->t) || (irt_isaddr(ir->t) && (cc & 0xe) == CC_E)); | 2574 | lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); |
| 2575 | /* Swap constants (only for ABC) and fusable loads to the right. */ | 2575 | /* Swap constants (only for ABC) and fusable loads to the right. */ |
| 2576 | if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { | 2576 | if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { |
| 2577 | if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */ | 2577 | if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */ |
diff --git a/src/lj_jit.h b/src/lj_jit.h index bd7c7577..76d7942b 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
| @@ -30,7 +30,7 @@ | |||
| 30 | #endif | 30 | #endif |
| 31 | 31 | ||
| 32 | /* Optimization flags. */ | 32 | /* Optimization flags. */ |
| 33 | #define JIT_F_OPT_MASK 0x00ff0000 | 33 | #define JIT_F_OPT_MASK 0x0fff0000 |
| 34 | 34 | ||
| 35 | #define JIT_F_OPT_FOLD 0x00010000 | 35 | #define JIT_F_OPT_FOLD 0x00010000 |
| 36 | #define JIT_F_OPT_CSE 0x00020000 | 36 | #define JIT_F_OPT_CSE 0x00020000 |
| @@ -39,18 +39,20 @@ | |||
| 39 | #define JIT_F_OPT_DSE 0x00100000 | 39 | #define JIT_F_OPT_DSE 0x00100000 |
| 40 | #define JIT_F_OPT_NARROW 0x00200000 | 40 | #define JIT_F_OPT_NARROW 0x00200000 |
| 41 | #define JIT_F_OPT_LOOP 0x00400000 | 41 | #define JIT_F_OPT_LOOP 0x00400000 |
| 42 | #define JIT_F_OPT_FUSE 0x00800000 | 42 | #define JIT_F_OPT_ABC 0x00800000 |
| 43 | #define JIT_F_OPT_FUSE 0x01000000 | ||
| 43 | 44 | ||
| 44 | /* Optimizations names for -O. Must match the order above. */ | 45 | /* Optimizations names for -O. Must match the order above. */ |
| 45 | #define JIT_F_OPT_FIRST JIT_F_OPT_FOLD | 46 | #define JIT_F_OPT_FIRST JIT_F_OPT_FOLD |
| 46 | #define JIT_F_OPTSTRING \ | 47 | #define JIT_F_OPTSTRING \ |
| 47 | "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\4fuse" | 48 | "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4fuse" |
| 48 | 49 | ||
| 49 | /* Optimization levels set a fixed combination of flags. */ | 50 | /* Optimization levels set a fixed combination of flags. */ |
| 50 | #define JIT_F_OPT_0 0 | 51 | #define JIT_F_OPT_0 0 |
| 51 | #define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE) | 52 | #define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE) |
| 52 | #define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP) | 53 | #define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP) |
| 53 | #define JIT_F_OPT_3 (JIT_F_OPT_2|JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_FUSE) | 54 | #define JIT_F_OPT_3 \ |
| 55 | (JIT_F_OPT_2|JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_FUSE) | ||
| 54 | #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 | 56 | #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 |
| 55 | 57 | ||
| 56 | #if defined(LUA_USE_WIN) || LJ_64 | 58 | #if defined(LUA_USE_WIN) || LJ_64 |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index a85b49bc..5eeffae3 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation. | 2 | ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation. |
| 3 | ** ABCelim: Array Bounds Check Elimination. | ||
| 3 | ** CSE: Common-Subexpression Elimination. | 4 | ** CSE: Common-Subexpression Elimination. |
| 4 | ** Copyright (C) 2005-2010 Mike Pall. See Copyright Notice in luajit.h | 5 | ** Copyright (C) 2005-2010 Mike Pall. See Copyright Notice in luajit.h |
| 5 | */ | 6 | */ |
| @@ -949,31 +950,69 @@ LJFOLDF(reassoc_minmax_right) | |||
| 949 | return NEXTFOLD; | 950 | return NEXTFOLD; |
| 950 | } | 951 | } |
| 951 | 952 | ||
| 953 | /* -- Array bounds check elimination -------------------------------------- */ | ||
| 954 | |||
| 952 | /* Eliminate ABC across PHIs to handle t[i-1] forwarding case. | 955 | /* Eliminate ABC across PHIs to handle t[i-1] forwarding case. |
| 953 | ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists. | 956 | ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists. |
| 954 | ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig. | 957 | ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig. |
| 955 | */ | 958 | */ |
| 956 | LJFOLD(ABC any ADD) | 959 | LJFOLD(ABC any ADD) |
| 957 | LJFOLDF(reassoc_abc) | 960 | LJFOLDF(abc_fwd) |
| 958 | { | 961 | { |
| 959 | if (irref_isk(fright->op2)) { | 962 | if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) { |
| 960 | IRIns *add2 = IR(fright->op1); | 963 | if (irref_isk(fright->op2)) { |
| 961 | if (add2->o == IR_ADD && irref_isk(add2->op2) && | 964 | IRIns *add2 = IR(fright->op1); |
| 962 | IR(fright->op2)->i == -IR(add2->op2)->i) { | 965 | if (add2->o == IR_ADD && irref_isk(add2->op2) && |
| 963 | IRRef ref = J->chain[IR_ABC]; | 966 | IR(fright->op2)->i == -IR(add2->op2)->i) { |
| 964 | IRRef lim = add2->op1; | 967 | IRRef ref = J->chain[IR_ABC]; |
| 965 | if (fins->op1 > lim) lim = fins->op1; | 968 | IRRef lim = add2->op1; |
| 966 | while (ref > lim) { | 969 | if (fins->op1 > lim) lim = fins->op1; |
| 967 | IRIns *ir = IR(ref); | 970 | while (ref > lim) { |
| 968 | if (ir->op1 == fins->op1 && ir->op2 == add2->op1) | 971 | IRIns *ir = IR(ref); |
| 969 | return DROPFOLD; | 972 | if (ir->op1 == fins->op1 && ir->op2 == add2->op1) |
| 970 | ref = ir->prev; | 973 | return DROPFOLD; |
| 974 | ref = ir->prev; | ||
| 975 | } | ||
| 971 | } | 976 | } |
| 972 | } | 977 | } |
| 973 | } | 978 | } |
| 974 | return NEXTFOLD; | 979 | return NEXTFOLD; |
| 975 | } | 980 | } |
| 976 | 981 | ||
| 982 | /* Eliminate ABC for constants. | ||
| 983 | ** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2)) | ||
| 984 | ** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2. | ||
| 985 | */ | ||
| 986 | LJFOLD(ABC any KINT) | ||
| 987 | LJFOLDF(abc_k) | ||
| 988 | { | ||
| 989 | if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) { | ||
| 990 | IRRef ref = J->chain[IR_ABC]; | ||
| 991 | IRRef asize = fins->op1; | ||
| 992 | while (ref > asize) { | ||
| 993 | IRIns *ir = IR(ref); | ||
| 994 | if (ir->op1 == asize && irref_isk(ir->op2)) { | ||
| 995 | int32_t k = IR(ir->op2)->i; | ||
| 996 | if (fright->i > k) | ||
| 997 | ir->op2 = fins->op2; | ||
| 998 | return DROPFOLD; | ||
| 999 | } | ||
| 1000 | ref = ir->prev; | ||
| 1001 | } | ||
| 1002 | return EMITFOLD; /* Already performed CSE. */ | ||
| 1003 | } | ||
| 1004 | return NEXTFOLD; | ||
| 1005 | } | ||
| 1006 | |||
| 1007 | /* Eliminate invariant ABC inside loop. */ | ||
| 1008 | LJFOLD(ABC any any) | ||
| 1009 | LJFOLDF(abc_invar) | ||
| 1010 | { | ||
| 1011 | if (!irt_isint(fins->t) && J->chain[IR_LOOP]) /* Currently marked as PTR. */ | ||
| 1012 | return DROPFOLD; | ||
| 1013 | return NEXTFOLD; | ||
| 1014 | } | ||
| 1015 | |||
| 977 | /* -- Commutativity ------------------------------------------------------- */ | 1016 | /* -- Commutativity ------------------------------------------------------- */ |
| 978 | 1017 | ||
| 979 | /* The refs of commutative ops are canonicalized. Lower refs go to the right. | 1018 | /* The refs of commutative ops are canonicalized. Lower refs go to the right. |
diff --git a/src/lj_record.c b/src/lj_record.c index 0b0768d6..33272316 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
| @@ -806,6 +806,44 @@ static void rec_mm_comp(jit_State *J, RecordIndex *ix, int op) | |||
| 806 | 806 | ||
| 807 | /* -- Indexed access ------------------------------------------------------ */ | 807 | /* -- Indexed access ------------------------------------------------------ */ |
| 808 | 808 | ||
| 809 | /* Record bounds-check. */ | ||
| 810 | static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) | ||
| 811 | { | ||
| 812 | /* Try to emit invariant bounds checks. */ | ||
| 813 | if ((J->flags & (JIT_F_OPT_LOOP|JIT_F_OPT_ABC)) == | ||
| 814 | (JIT_F_OPT_LOOP|JIT_F_OPT_ABC)) { | ||
| 815 | IRRef ref = tref_ref(ikey); | ||
| 816 | IRIns *ir = IR(ref); | ||
| 817 | int32_t ofs = 0; | ||
| 818 | IRRef ofsref = 0; | ||
| 819 | /* Handle constant offsets. */ | ||
| 820 | if (ir->o == IR_ADD && irref_isk(ir->op2)) { | ||
| 821 | ofsref = ir->op2; | ||
| 822 | ofs = IR(ofsref)->i; | ||
| 823 | ref = ir->op1; | ||
| 824 | ir = IR(ref); | ||
| 825 | } | ||
| 826 | /* Got scalar evolution analysis results for this reference? */ | ||
| 827 | if (ref == J->scev.idx) { | ||
| 828 | int32_t stop; | ||
| 829 | lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); | ||
| 830 | stop = lj_num2int(numV(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP])); | ||
| 831 | /* Runtime value for stop of loop is within bounds? */ | ||
| 832 | if ((int64_t)stop + ofs < (int64_t)asize) { | ||
| 833 | /* Emit invariant bounds check for stop. */ | ||
| 834 | emitir(IRTG(IR_ABC, IRT_PTR), asizeref, ofs == 0 ? J->scev.stop : | ||
| 835 | emitir(IRTI(IR_ADD), J->scev.stop, ofsref)); | ||
| 836 | /* Emit invariant bounds check for start, if not const or negative. */ | ||
| 837 | if (!(J->scev.dir && J->scev.start && | ||
| 838 | (int64_t)IR(J->scev.start)->i + ofs >= 0)) | ||
| 839 | emitir(IRTG(IR_ABC, IRT_PTR), asizeref, ikey); | ||
| 840 | return; | ||
| 841 | } | ||
| 842 | } | ||
| 843 | } | ||
| 844 | emitir(IRTGI(IR_ABC), asizeref, ikey); /* Emit regular bounds check. */ | ||
| 845 | } | ||
| 846 | |||
| 809 | /* Record indexed key lookup. */ | 847 | /* Record indexed key lookup. */ |
| 810 | static TRef rec_idx_key(jit_State *J, RecordIndex *ix) | 848 | static TRef rec_idx_key(jit_State *J, RecordIndex *ix) |
| 811 | { | 849 | { |
| @@ -827,7 +865,7 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix) | |||
| 827 | asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); | 865 | asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); |
| 828 | if ((MSize)k < t->asize) { /* Currently an array key? */ | 866 | if ((MSize)k < t->asize) { /* Currently an array key? */ |
| 829 | TRef arrayref; | 867 | TRef arrayref; |
| 830 | emitir(IRTGI(IR_ABC), asizeref, ikey); /* Bounds check. */ | 868 | rec_idx_abc(J, asizeref, ikey, t->asize); |
| 831 | arrayref = emitir(IRT(IR_FLOAD, IRT_PTR), ix->tab, IRFL_TAB_ARRAY); | 869 | arrayref = emitir(IRT(IR_FLOAD, IRT_PTR), ix->tab, IRFL_TAB_ARRAY); |
| 832 | return emitir(IRT(IR_AREF, IRT_PTR), arrayref, ikey); | 870 | return emitir(IRT(IR_AREF, IRT_PTR), arrayref, ikey); |
| 833 | } else { /* Currently not in array (may be an array extension)? */ | 871 | } else { /* Currently not in array (may be an array extension)? */ |
