summaryrefslogtreecommitdiff
path: root/src/lj_opt_mem.c
diff options
context:
space:
mode:
authorMike Pall <mike>2010-12-20 19:35:57 +0100
committerMike Pall <mike>2010-12-20 22:32:01 +0100
commitdbab6cf511e232b5c2fdb11923c87473dcd84a7e (patch)
tree1f24df56032b49c25facabc1ab48e80a35a5684d /src/lj_opt_mem.c
parentc8d6f078a52adb840e370ed52138ee5d379fe945 (diff)
downloadluajit-dbab6cf511e232b5c2fdb11923c87473dcd84a7e.tar.gz
luajit-dbab6cf511e232b5c2fdb11923c87473dcd84a7e.tar.bz2
luajit-dbab6cf511e232b5c2fdb11923c87473dcd84a7e.zip
Reassociate XLOAD across PHIs to handle a[i-1] forwarding case.
Improved SciMark scores: http://luajit.org/download/scimark.lua x86 SciMark LARGE | FFT SOR MC SPARSE LU -----------------------+--------------------------------------- GCC 4.4.3 623.8 | 91.0 883.5 190.4 784.7 1169.6 LuaJIT git +FFI 651.2 | 97.2 1021.9 323.4 673.7 1139.6 LuaJIT git 527.7 | 91.4 1008.5 225.6 400.0 913.2 x64 SciMark LARGE | FFT SOR MC SPARSE LU -----------------------+--------------------------------------- GCC 4.4.3 614.7 | 97.7 883.5 228.5 734.0 1129.9 JVM 1.6.0_22 707.5 | 79.2 1118.1 385.5 658.9 1295.7 LuaJIT git +FFI 632.8 | 89.1 1035.8 298.3 648.1 1092.9 LuaJIT git 516.1 | 88.4 995.4 225.6 382.1 888.9
Diffstat (limited to 'src/lj_opt_mem.c')
-rw-r--r--src/lj_opt_mem.c85
1 files changed, 80 insertions, 5 deletions
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c
index db0ef21e..3ed3c7ee 100644
--- a/src/lj_opt_mem.c
+++ b/src/lj_opt_mem.c
@@ -555,10 +555,10 @@ static AliasRet aa_cnew(jit_State *J, IRIns *refa, IRIns *refb)
555} 555}
556 556
557/* Alias analysis for XLOAD/XSTORE. */ 557/* Alias analysis for XLOAD/XSTORE. */
558static AliasRet aa_xref(jit_State *J, IRIns *xa, IRIns *xb) 558static AliasRet aa_xref(jit_State *J, IRIns *refa, IRIns *xa, IRIns *xb)
559{ 559{
560 ptrdiff_t ofsa = 0, ofsb = 0; 560 ptrdiff_t ofsa = 0, ofsb = 0;
561 IRIns *refa = IR(xa->op1), *refb = IR(xb->op1); 561 IRIns *refb = IR(xb->op1);
562 IRIns *basea = refa, *baseb = refb; 562 IRIns *basea = refa, *baseb = refb;
563 /* This implements (very) strict aliasing rules. 563 /* This implements (very) strict aliasing rules.
564 ** Different types do NOT alias, except for differences in signedness. 564 ** Different types do NOT alias, except for differences in signedness.
@@ -602,10 +602,72 @@ static AliasRet aa_xref(jit_State *J, IRIns *xa, IRIns *xb)
602 return aa_cnew(J, basea, baseb); /* Try to disambiguate allocations. */ 602 return aa_cnew(J, basea, baseb); /* Try to disambiguate allocations. */
603} 603}
604 604
605/* Return CSEd reference or 0. Caveat: swaps lower ref to the right! */
606static IRRef reassoc_trycse(jit_State *J, IROp op, IRRef op1, IRRef op2)
607{
608 IRRef ref = J->chain[op];
609 IRRef lim = op1;
610 if (op2 > lim) { lim = op2; op2 = op1; op1 = lim; }
611 while (ref > lim) {
612 IRIns *ir = IR(ref);
613 if (ir->op1 == op1 && ir->op2 == op2)
614 return ref;
615 ref = ir->prev;
616 }
617 return 0;
618}
619
620/* Reassociate index references. */
621static IRRef reassoc_xref(jit_State *J, IRIns *ir)
622{
623 ptrdiff_t ofs = 0;
624 if (ir->o == IR_ADD && irref_isk(ir->op2)) { /* Get constant offset. */
625 IRIns *irk = IR(ir->op2);
626 ofs = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 :
627 (ptrdiff_t)irk->i;
628 ir = IR(ir->op1);
629 }
630 if (ir->o == IR_ADD) { /* Add of base + index. */
631 /* Index ref > base ref for loop-carried dependences. Only check op1. */
632 IRIns *ir2, *ir1 = IR(ir->op1);
633 int32_t shift = 0;
634 IRRef idxref;
635 /* Determine index shifts. Don't bother with IR_MUL here. */
636 if (ir1->o == IR_BSHL && irref_isk(ir1->op2))
637 shift = IR(ir1->op2)->i;
638 else if (ir1->o == IR_ADD && ir1->op1 == ir1->op2)
639 shift = 1;
640 else
641 ir1 = ir;
642 ir2 = IR(ir1->op1);
643 /* A non-reassociated add. Must be a loop-carried dependence. */
644 if (ir2->o == IR_ADD && irt_isint(ir2->t) && irref_isk(ir2->op2))
645 ofs += (ptrdiff_t)IR(ir2->op2)->i << shift;
646 else
647 return 0;
648 idxref = ir2->op1;
649 /* Try to CSE the reassociated chain. Give up if not found. */
650 if (ir1 != ir &&
651 !(idxref = reassoc_trycse(J, ir1->o, idxref,
652 ir1->o == IR_BSHL ? ir1->op2 : idxref)))
653 return 0;
654 if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, ir->op2)))
655 return 0;
656 if (ofs != 0) {
657 IRRef refk = tref_ref(lj_ir_kintp(J, ofs));
658 if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, refk)))
659 return 0;
660 }
661 return idxref; /* Success, found a reassociated index reference. Phew. */
662 }
663 return 0; /* Failure. */
664}
665
605/* XLOAD forwarding. */ 666/* XLOAD forwarding. */
606TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J) 667TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J)
607{ 668{
608 IRRef xref = fins->op1; 669 IRRef xref = fins->op1;
670 IRIns *xr = IR(xref);
609 IRRef lim = xref; /* Search limit. */ 671 IRRef lim = xref; /* Search limit. */
610 IRRef ref; 672 IRRef ref;
611 673
@@ -614,9 +676,10 @@ TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J)
614 676
615 /* Search for conflicting stores. */ 677 /* Search for conflicting stores. */
616 ref = J->chain[IR_XSTORE]; 678 ref = J->chain[IR_XSTORE];
679retry:
617 while (ref > xref) { 680 while (ref > xref) {
618 IRIns *store = IR(ref); 681 IRIns *store = IR(ref);
619 switch (aa_xref(J, fins, store)) { 682 switch (aa_xref(J, xr, fins, store)) {
620 case ALIAS_NO: break; /* Continue searching. */ 683 case ALIAS_NO: break; /* Continue searching. */
621 case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */ 684 case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */
622 case ALIAS_MUST: return store->op2; /* Store forwarding. */ 685 case ALIAS_MUST: return store->op2; /* Store forwarding. */
@@ -629,10 +692,21 @@ cselim:
629 ref = J->chain[IR_XLOAD]; 692 ref = J->chain[IR_XLOAD];
630 while (ref > lim) { 693 while (ref > lim) {
631 /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */ 694 /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */
632 if (IR(ref)->op1 == fins->op1 && irt_sametype(IR(ref)->t, fins->t)) 695 if (IR(ref)->op1 == xref && irt_sametype(IR(ref)->t, fins->t))
633 return ref; 696 return ref;
634 ref = IR(ref)->prev; 697 ref = IR(ref)->prev;
635 } 698 }
699
700 /* Reassociate XLOAD across PHIs to handle a[i-1] forwarding case. */
701 if (!(fins->op2 & IRXLOAD_READONLY) && J->chain[IR_LOOP] &&
702 xref == fins->op1 && (xref = reassoc_xref(J, xr)) != 0) {
703 ref = J->chain[IR_XSTORE];
704 while (ref > lim) /* Skip stores that have already been checked. */
705 ref = IR(ref)->prev;
706 lim = xref;
707 xr = IR(xref);
708 goto retry; /* Retry with the reassociated reference. */
709 }
636 return lj_ir_emit(J); 710 return lj_ir_emit(J);
637} 711}
638 712
@@ -640,12 +714,13 @@ cselim:
640TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J) 714TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J)
641{ 715{
642 IRRef xref = fins->op1; 716 IRRef xref = fins->op1;
717 IRIns *xr = IR(xref);
643 IRRef val = fins->op2; /* Stored value reference. */ 718 IRRef val = fins->op2; /* Stored value reference. */
644 IRRef1 *refp = &J->chain[IR_XSTORE]; 719 IRRef1 *refp = &J->chain[IR_XSTORE];
645 IRRef ref = *refp; 720 IRRef ref = *refp;
646 while (ref > xref) { /* Search for redundant or conflicting stores. */ 721 while (ref > xref) { /* Search for redundant or conflicting stores. */
647 IRIns *store = IR(ref); 722 IRIns *store = IR(ref);
648 switch (aa_xref(J, fins, store)) { 723 switch (aa_xref(J, xr, fins, store)) {
649 case ALIAS_NO: 724 case ALIAS_NO:
650 break; /* Continue searching. */ 725 break; /* Continue searching. */
651 case ALIAS_MAY: 726 case ALIAS_MAY: