aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2010-12-06 01:48:19 +0100
committerMike Pall <mike>2010-12-06 01:48:19 +0100
commitdb3d16bfd7fb7dc468e7b86199a9958283424b6d (patch)
tree7472ff2cf74b2336a74d209ab278e96ddfb8e2af /src
parent281f426d602596f855d8fea787ad7a514365244d (diff)
downloadluajit-db3d16bfd7fb7dc468e7b86199a9958283424b6d.tar.gz
luajit-db3d16bfd7fb7dc468e7b86199a9958283424b6d.tar.bz2
luajit-db3d16bfd7fb7dc468e7b86199a9958283424b6d.zip
Add IR_XSTORE.
Diffstat (limited to 'src')
-rw-r--r--src/lj_asm.c13
-rw-r--r--src/lj_ir.h2
-rw-r--r--src/lj_iropt.h1
-rw-r--r--src/lj_opt_fold.c13
-rw-r--r--src/lj_opt_mem.c44
5 files changed, 57 insertions, 16 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index df94933a..a9b87821 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1357,7 +1357,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
1357 /* Generic fusion is only ok for 32 bit operand (but see asm_comp). 1357 /* Generic fusion is only ok for 32 bit operand (but see asm_comp).
1358 ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). 1358 ** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
1359 */ 1359 */
1360 if (irt_isint(ir->t) || irt_isaddr(ir->t)) { 1360 if ((irt_isint(ir->t) || irt_isaddr(ir->t)) &&
1361 noconflict(as, ref, IR_XSTORE)) {
1361 asm_fusexref(as, IR(ir->op1), xallow); 1362 asm_fusexref(as, IR(ir->op1), xallow);
1362 return RID_MRM; 1363 return RID_MRM;
1363 } 1364 }
@@ -1978,7 +1979,7 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1978 emit_mrm(as, xo, dest, RID_MRM); 1979 emit_mrm(as, xo, dest, RID_MRM);
1979} 1980}
1980 1981
1981static void asm_fstore(ASMState *as, IRIns *ir) 1982static void asm_fxstore(ASMState *as, IRIns *ir)
1982{ 1983{
1983 RegSet allow = RSET_GPR; 1984 RegSet allow = RSET_GPR;
1984 Reg src = RID_NONE; 1985 Reg src = RID_NONE;
@@ -1991,7 +1992,11 @@ static void asm_fstore(ASMState *as, IRIns *ir)
1991 src = ra_alloc1(as, ir->op2, allow8); 1992 src = ra_alloc1(as, ir->op2, allow8);
1992 rset_clear(allow, src); 1993 rset_clear(allow, src);
1993 } 1994 }
1994 asm_fusefref(as, IR(ir->op1), allow); 1995 if (ir->o == IR_FSTORE)
1996 asm_fusefref(as, IR(ir->op1), allow);
1997 else
1998 asm_fusexref(as, IR(ir->op1), allow);
1999 /* ir->op2 is ignored -- unaligned stores are ok on x86. */
1995 if (ra_hasreg(src)) { 2000 if (ra_hasreg(src)) {
1996 x86Op xo; 2001 x86Op xo;
1997 switch (irt_type(ir->t)) { 2002 switch (irt_type(ir->t)) {
@@ -3467,7 +3472,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
3467 case IR_SLOAD: asm_sload(as, ir); break; 3472 case IR_SLOAD: asm_sload(as, ir); break;
3468 3473
3469 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; 3474 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
3470 case IR_FSTORE: asm_fstore(as, ir); break; 3475 case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break;
3471 3476
3472 /* Allocations. */ 3477 /* Allocations. */
3473 case IR_SNEW: asm_snew(as, ir); break; 3478 case IR_SNEW: asm_snew(as, ir); break;
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 232ff939..3371a8a4 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -104,6 +104,7 @@
104 _(HSTORE, S , ref, ref) \ 104 _(HSTORE, S , ref, ref) \
105 _(USTORE, S , ref, ref) \ 105 _(USTORE, S , ref, ref) \
106 _(FSTORE, S , ref, ref) \ 106 _(FSTORE, S , ref, ref) \
107 _(XSTORE, S , ref, ref) \
107 \ 108 \
108 /* Allocations. */ \ 109 /* Allocations. */ \
109 _(SNEW, N , ref, ref) /* CSE is ok, so not marked as A. */ \ 110 _(SNEW, N , ref, ref) /* CSE is ok, so not marked as A. */ \
@@ -152,6 +153,7 @@ LJ_STATIC_ASSERT(((int)IR_LT^4) == (int)IR_ULT);
152LJ_STATIC_ASSERT((int)IR_HLOAD + IRDELTA_L2S == (int)IR_HSTORE); 153LJ_STATIC_ASSERT((int)IR_HLOAD + IRDELTA_L2S == (int)IR_HSTORE);
153LJ_STATIC_ASSERT((int)IR_ULOAD + IRDELTA_L2S == (int)IR_USTORE); 154LJ_STATIC_ASSERT((int)IR_ULOAD + IRDELTA_L2S == (int)IR_USTORE);
154LJ_STATIC_ASSERT((int)IR_FLOAD + IRDELTA_L2S == (int)IR_FSTORE); 155LJ_STATIC_ASSERT((int)IR_FLOAD + IRDELTA_L2S == (int)IR_FSTORE);
156LJ_STATIC_ASSERT((int)IR_XLOAD + IRDELTA_L2S == (int)IR_XSTORE);
155 157
156/* -- Named IR literals --------------------------------------------------- */ 158/* -- Named IR literals --------------------------------------------------- */
157 159
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index c05040d6..ce8b564f 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -110,6 +110,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J);
110LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J); 110LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J);
111LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); 111LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J);
112LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); 112LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J);
113LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J);
113LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J); 114LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J);
114LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J); 115LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J);
115LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim); 116LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim);
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 8ccfc6bd..3476235e 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -1357,19 +1357,8 @@ LJFOLDF(xload_kptr)
1357 return NEXTFOLD; 1357 return NEXTFOLD;
1358} 1358}
1359 1359
1360/* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */
1361LJFOLD(XLOAD any any) 1360LJFOLD(XLOAD any any)
1362LJFOLDF(fwd_xload) 1361LJFOLDX(lj_opt_fwd_xload)
1363{
1364 IRRef ref = J->chain[IR_XLOAD];
1365 IRRef op1 = fins->op1;
1366 while (ref > op1) {
1367 if (IR(ref)->op1 == op1 && irt_sametype(IR(ref)->t, fins->t))
1368 return ref;
1369 ref = IR(ref)->prev;
1370 }
1371 return EMITFOLD;
1372}
1373 1362
1374/* -- Write barriers ------------------------------------------------------ */ 1363/* -- Write barriers ------------------------------------------------------ */
1375 1364
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c
index 9b96d66e..d47706fb 100644
--- a/src/lj_opt_mem.c
+++ b/src/lj_opt_mem.c
@@ -523,6 +523,50 @@ doemit:
523 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ 523 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
524} 524}
525 525
526/* -- XLOAD forwarding ---------------------------------------------------- */
527
528/* NYI: Alias analysis for XLOAD/XSTORE. */
529static AliasRet aa_xref(jit_State *J, IRIns *refa, IRIns *refb)
530{
531 UNUSED(J); UNUSED(refa); UNUSED(refb);
532 return ALIAS_MAY;
533}
534
535/* XLOAD forwarding. */
536TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J)
537{
538 IRRef xref = fins->op1;
539 IRRef lim = xref; /* Search limit. */
540 IRIns *xr = IR(xref);
541 IRRef ref;
542
543 if ((fins->op2 & IRXLOAD_READONLY))
544 goto cselim;
545
546 /* Search for conflicting stores. */
547 ref = J->chain[IR_XSTORE];
548 while (ref > xref) {
549 IRIns *store = IR(ref);
550 switch (aa_xref(J, xr, IR(store->op1))) {
551 case ALIAS_NO: break; /* Continue searching. */
552 case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */
553 case ALIAS_MUST: return store->op2; /* Store forwarding. */
554 }
555 ref = store->prev;
556 }
557
558cselim:
559 /* Try to find a matching load. Below the conflicting store, if any. */
560 ref = J->chain[IR_XLOAD];
561 while (ref > lim) {
562 /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */
563 if (IR(ref)->op1 == fins->op1 && irt_sametype(IR(ref)->t, fins->t))
564 return ref;
565 ref = IR(ref)->prev;
566 }
567 return lj_ir_emit(J);
568}
569
526/* -- Forwarding of lj_tab_len -------------------------------------------- */ 570/* -- Forwarding of lj_tab_len -------------------------------------------- */
527 571
528/* This is rather simplistic right now, but better than nothing. */ 572/* This is rather simplistic right now, but better than nothing. */