aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2010-03-15 17:02:53 +0100
committerMike Pall <mike>2010-03-15 17:02:53 +0100
commitc4727220e889dccf06427fd6e473741c0829e344 (patch)
treecb52697fe78bec674692a3d925e20edc7da956f8 /src
parent24402ede0421876b3e681b153c5355e206376799 (diff)
downloadluajit-c4727220e889dccf06427fd6e473741c0829e344.tar.gz
luajit-c4727220e889dccf06427fd6e473741c0829e344.tar.bz2
luajit-c4727220e889dccf06427fd6e473741c0829e344.zip
Add array bounds check elimination (-Oabc, on by default).
Diffstat (limited to 'src')
-rw-r--r--src/lj_asm.c2
-rw-r--r--src/lj_jit.h10
-rw-r--r--src/lj_opt_fold.c67
-rw-r--r--src/lj_record.c40
4 files changed, 99 insertions, 20 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index c749ada0..9e8f1fc0 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -2571,7 +2571,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2571 } else { 2571 } else {
2572 IRRef lref = ir->op1, rref = ir->op2; 2572 IRRef lref = ir->op1, rref = ir->op2;
2573 IROp leftop = (IROp)(IR(lref)->o); 2573 IROp leftop = (IROp)(IR(lref)->o);
2574 lua_assert(irt_isint(ir->t) || (irt_isaddr(ir->t) && (cc & 0xe) == CC_E)); 2574 lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
2575 /* Swap constants (only for ABC) and fusable loads to the right. */ 2575 /* Swap constants (only for ABC) and fusable loads to the right. */
2576 if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { 2576 if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) {
2577 if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */ 2577 if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */
diff --git a/src/lj_jit.h b/src/lj_jit.h
index bd7c7577..76d7942b 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -30,7 +30,7 @@
30#endif 30#endif
31 31
32/* Optimization flags. */ 32/* Optimization flags. */
33#define JIT_F_OPT_MASK 0x00ff0000 33#define JIT_F_OPT_MASK 0x0fff0000
34 34
35#define JIT_F_OPT_FOLD 0x00010000 35#define JIT_F_OPT_FOLD 0x00010000
36#define JIT_F_OPT_CSE 0x00020000 36#define JIT_F_OPT_CSE 0x00020000
@@ -39,18 +39,20 @@
39#define JIT_F_OPT_DSE 0x00100000 39#define JIT_F_OPT_DSE 0x00100000
40#define JIT_F_OPT_NARROW 0x00200000 40#define JIT_F_OPT_NARROW 0x00200000
41#define JIT_F_OPT_LOOP 0x00400000 41#define JIT_F_OPT_LOOP 0x00400000
42#define JIT_F_OPT_FUSE 0x00800000 42#define JIT_F_OPT_ABC 0x00800000
43#define JIT_F_OPT_FUSE 0x01000000
43 44
44/* Optimizations names for -O. Must match the order above. */ 45/* Optimizations names for -O. Must match the order above. */
45#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD 46#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD
46#define JIT_F_OPTSTRING \ 47#define JIT_F_OPTSTRING \
47 "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\4fuse" 48 "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4fuse"
48 49
49/* Optimization levels set a fixed combination of flags. */ 50/* Optimization levels set a fixed combination of flags. */
50#define JIT_F_OPT_0 0 51#define JIT_F_OPT_0 0
51#define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE) 52#define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE)
52#define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP) 53#define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP)
53#define JIT_F_OPT_3 (JIT_F_OPT_2|JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_FUSE) 54#define JIT_F_OPT_3 \
55 (JIT_F_OPT_2|JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_FUSE)
54#define JIT_F_OPT_DEFAULT JIT_F_OPT_3 56#define JIT_F_OPT_DEFAULT JIT_F_OPT_3
55 57
56#if defined(LUA_USE_WIN) || LJ_64 58#if defined(LUA_USE_WIN) || LJ_64
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index a85b49bc..5eeffae3 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -1,5 +1,6 @@
1/* 1/*
2** FOLD: Constant Folding, Algebraic Simplifications and Reassociation. 2** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3** ABCelim: Array Bounds Check Elimination.
3** CSE: Common-Subexpression Elimination. 4** CSE: Common-Subexpression Elimination.
4** Copyright (C) 2005-2010 Mike Pall. See Copyright Notice in luajit.h 5** Copyright (C) 2005-2010 Mike Pall. See Copyright Notice in luajit.h
5*/ 6*/
@@ -949,31 +950,69 @@ LJFOLDF(reassoc_minmax_right)
949 return NEXTFOLD; 950 return NEXTFOLD;
950} 951}
951 952
953/* -- Array bounds check elimination -------------------------------------- */
954
952/* Eliminate ABC across PHIs to handle t[i-1] forwarding case. 955/* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
953** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists. 956** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
954** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig. 957** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
955*/ 958*/
956LJFOLD(ABC any ADD) 959LJFOLD(ABC any ADD)
957LJFOLDF(reassoc_abc) 960LJFOLDF(abc_fwd)
958{ 961{
959 if (irref_isk(fright->op2)) { 962 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
960 IRIns *add2 = IR(fright->op1); 963 if (irref_isk(fright->op2)) {
961 if (add2->o == IR_ADD && irref_isk(add2->op2) && 964 IRIns *add2 = IR(fright->op1);
962 IR(fright->op2)->i == -IR(add2->op2)->i) { 965 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
963 IRRef ref = J->chain[IR_ABC]; 966 IR(fright->op2)->i == -IR(add2->op2)->i) {
964 IRRef lim = add2->op1; 967 IRRef ref = J->chain[IR_ABC];
965 if (fins->op1 > lim) lim = fins->op1; 968 IRRef lim = add2->op1;
966 while (ref > lim) { 969 if (fins->op1 > lim) lim = fins->op1;
967 IRIns *ir = IR(ref); 970 while (ref > lim) {
968 if (ir->op1 == fins->op1 && ir->op2 == add2->op1) 971 IRIns *ir = IR(ref);
969 return DROPFOLD; 972 if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
970 ref = ir->prev; 973 return DROPFOLD;
974 ref = ir->prev;
975 }
971 } 976 }
972 } 977 }
973 } 978 }
974 return NEXTFOLD; 979 return NEXTFOLD;
975} 980}
976 981
982/* Eliminate ABC for constants.
983** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2))
984** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2.
985*/
986LJFOLD(ABC any KINT)
987LJFOLDF(abc_k)
988{
989 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
990 IRRef ref = J->chain[IR_ABC];
991 IRRef asize = fins->op1;
992 while (ref > asize) {
993 IRIns *ir = IR(ref);
994 if (ir->op1 == asize && irref_isk(ir->op2)) {
995 int32_t k = IR(ir->op2)->i;
996 if (fright->i > k)
997 ir->op2 = fins->op2;
998 return DROPFOLD;
999 }
1000 ref = ir->prev;
1001 }
1002 return EMITFOLD; /* Already performed CSE. */
1003 }
1004 return NEXTFOLD;
1005}
1006
1007/* Eliminate invariant ABC inside loop. */
1008LJFOLD(ABC any any)
1009LJFOLDF(abc_invar)
1010{
1011 if (!irt_isint(fins->t) && J->chain[IR_LOOP]) /* Currently marked as PTR. */
1012 return DROPFOLD;
1013 return NEXTFOLD;
1014}
1015
977/* -- Commutativity ------------------------------------------------------- */ 1016/* -- Commutativity ------------------------------------------------------- */
978 1017
979/* The refs of commutative ops are canonicalized. Lower refs go to the right. 1018/* The refs of commutative ops are canonicalized. Lower refs go to the right.
diff --git a/src/lj_record.c b/src/lj_record.c
index 0b0768d6..33272316 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -806,6 +806,44 @@ static void rec_mm_comp(jit_State *J, RecordIndex *ix, int op)
806 806
807/* -- Indexed access ------------------------------------------------------ */ 807/* -- Indexed access ------------------------------------------------------ */
808 808
809/* Record bounds-check. */
810static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
811{
812 /* Try to emit invariant bounds checks. */
813 if ((J->flags & (JIT_F_OPT_LOOP|JIT_F_OPT_ABC)) ==
814 (JIT_F_OPT_LOOP|JIT_F_OPT_ABC)) {
815 IRRef ref = tref_ref(ikey);
816 IRIns *ir = IR(ref);
817 int32_t ofs = 0;
818 IRRef ofsref = 0;
819 /* Handle constant offsets. */
820 if (ir->o == IR_ADD && irref_isk(ir->op2)) {
821 ofsref = ir->op2;
822 ofs = IR(ofsref)->i;
823 ref = ir->op1;
824 ir = IR(ref);
825 }
826 /* Got scalar evolution analysis results for this reference? */
827 if (ref == J->scev.idx) {
828 int32_t stop;
829 lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD);
830 stop = lj_num2int(numV(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]));
831 /* Runtime value for stop of loop is within bounds? */
832 if ((int64_t)stop + ofs < (int64_t)asize) {
833 /* Emit invariant bounds check for stop. */
834 emitir(IRTG(IR_ABC, IRT_PTR), asizeref, ofs == 0 ? J->scev.stop :
835 emitir(IRTI(IR_ADD), J->scev.stop, ofsref));
836 /* Emit invariant bounds check for start, if not const or negative. */
837 if (!(J->scev.dir && J->scev.start &&
838 (int64_t)IR(J->scev.start)->i + ofs >= 0))
839 emitir(IRTG(IR_ABC, IRT_PTR), asizeref, ikey);
840 return;
841 }
842 }
843 }
844 emitir(IRTGI(IR_ABC), asizeref, ikey); /* Emit regular bounds check. */
845}
846
809/* Record indexed key lookup. */ 847/* Record indexed key lookup. */
810static TRef rec_idx_key(jit_State *J, RecordIndex *ix) 848static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
811{ 849{
@@ -827,7 +865,7 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
827 asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); 865 asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE);
828 if ((MSize)k < t->asize) { /* Currently an array key? */ 866 if ((MSize)k < t->asize) { /* Currently an array key? */
829 TRef arrayref; 867 TRef arrayref;
830 emitir(IRTGI(IR_ABC), asizeref, ikey); /* Bounds check. */ 868 rec_idx_abc(J, asizeref, ikey, t->asize);
831 arrayref = emitir(IRT(IR_FLOAD, IRT_PTR), ix->tab, IRFL_TAB_ARRAY); 869 arrayref = emitir(IRT(IR_FLOAD, IRT_PTR), ix->tab, IRFL_TAB_ARRAY);
832 return emitir(IRT(IR_AREF, IRT_PTR), arrayref, ikey); 870 return emitir(IRT(IR_AREF, IRT_PTR), arrayref, ikey);
833 } else { /* Currently not in array (may be an array extension)? */ 871 } else { /* Currently not in array (may be an array extension)? */