diff options
author | Mike Pall <mike> | 2010-12-20 19:35:57 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2010-12-20 22:32:01 +0100 |
commit | dbab6cf511e232b5c2fdb11923c87473dcd84a7e (patch) | |
tree | 1f24df56032b49c25facabc1ab48e80a35a5684d /src/lj_opt_mem.c | |
parent | c8d6f078a52adb840e370ed52138ee5d379fe945 (diff) | |
download | luajit-dbab6cf511e232b5c2fdb11923c87473dcd84a7e.tar.gz luajit-dbab6cf511e232b5c2fdb11923c87473dcd84a7e.tar.bz2 luajit-dbab6cf511e232b5c2fdb11923c87473dcd84a7e.zip |
Reassociate XLOAD across PHIs to handle a[i-1] forwarding case.
Improved SciMark scores: http://luajit.org/download/scimark.lua
x86 SciMark LARGE | FFT SOR MC SPARSE LU
-----------------------+---------------------------------------
GCC 4.4.3 623.8 | 91.0 883.5 190.4 784.7 1169.6
LuaJIT git +FFI 651.2 | 97.2 1021.9 323.4 673.7 1139.6
LuaJIT git 527.7 | 91.4 1008.5 225.6 400.0 913.2
x64 SciMark LARGE | FFT SOR MC SPARSE LU
-----------------------+---------------------------------------
GCC 4.4.3 614.7 | 97.7 883.5 228.5 734.0 1129.9
JVM 1.6.0_22 707.5 | 79.2 1118.1 385.5 658.9 1295.7
LuaJIT git +FFI 632.8 | 89.1 1035.8 298.3 648.1 1092.9
LuaJIT git 516.1 | 88.4 995.4 225.6 382.1 888.9
Diffstat (limited to 'src/lj_opt_mem.c')
-rw-r--r-- | src/lj_opt_mem.c | 85 |
1 files changed, 80 insertions, 5 deletions
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index db0ef21e..3ed3c7ee 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c | |||
@@ -555,10 +555,10 @@ static AliasRet aa_cnew(jit_State *J, IRIns *refa, IRIns *refb) | |||
555 | } | 555 | } |
556 | 556 | ||
557 | /* Alias analysis for XLOAD/XSTORE. */ | 557 | /* Alias analysis for XLOAD/XSTORE. */ |
558 | static AliasRet aa_xref(jit_State *J, IRIns *xa, IRIns *xb) | 558 | static AliasRet aa_xref(jit_State *J, IRIns *refa, IRIns *xa, IRIns *xb) |
559 | { | 559 | { |
560 | ptrdiff_t ofsa = 0, ofsb = 0; | 560 | ptrdiff_t ofsa = 0, ofsb = 0; |
561 | IRIns *refa = IR(xa->op1), *refb = IR(xb->op1); | 561 | IRIns *refb = IR(xb->op1); |
562 | IRIns *basea = refa, *baseb = refb; | 562 | IRIns *basea = refa, *baseb = refb; |
563 | /* This implements (very) strict aliasing rules. | 563 | /* This implements (very) strict aliasing rules. |
564 | ** Different types do NOT alias, except for differences in signedness. | 564 | ** Different types do NOT alias, except for differences in signedness. |
@@ -602,10 +602,72 @@ static AliasRet aa_xref(jit_State *J, IRIns *xa, IRIns *xb) | |||
602 | return aa_cnew(J, basea, baseb); /* Try to disambiguate allocations. */ | 602 | return aa_cnew(J, basea, baseb); /* Try to disambiguate allocations. */ |
603 | } | 603 | } |
604 | 604 | ||
605 | /* Return CSEd reference or 0. Caveat: swaps lower ref to the right! */ | ||
606 | static IRRef reassoc_trycse(jit_State *J, IROp op, IRRef op1, IRRef op2) | ||
607 | { | ||
608 | IRRef ref = J->chain[op]; | ||
609 | IRRef lim = op1; | ||
610 | if (op2 > lim) { lim = op2; op2 = op1; op1 = lim; } | ||
611 | while (ref > lim) { | ||
612 | IRIns *ir = IR(ref); | ||
613 | if (ir->op1 == op1 && ir->op2 == op2) | ||
614 | return ref; | ||
615 | ref = ir->prev; | ||
616 | } | ||
617 | return 0; | ||
618 | } | ||
619 | |||
620 | /* Reassociate index references. */ | ||
621 | static IRRef reassoc_xref(jit_State *J, IRIns *ir) | ||
622 | { | ||
623 | ptrdiff_t ofs = 0; | ||
624 | if (ir->o == IR_ADD && irref_isk(ir->op2)) { /* Get constant offset. */ | ||
625 | IRIns *irk = IR(ir->op2); | ||
626 | ofs = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : | ||
627 | (ptrdiff_t)irk->i; | ||
628 | ir = IR(ir->op1); | ||
629 | } | ||
630 | if (ir->o == IR_ADD) { /* Add of base + index. */ | ||
631 | /* Index ref > base ref for loop-carried dependences. Only check op1. */ | ||
632 | IRIns *ir2, *ir1 = IR(ir->op1); | ||
633 | int32_t shift = 0; | ||
634 | IRRef idxref; | ||
635 | /* Determine index shifts. Don't bother with IR_MUL here. */ | ||
636 | if (ir1->o == IR_BSHL && irref_isk(ir1->op2)) | ||
637 | shift = IR(ir1->op2)->i; | ||
638 | else if (ir1->o == IR_ADD && ir1->op1 == ir1->op2) | ||
639 | shift = 1; | ||
640 | else | ||
641 | ir1 = ir; | ||
642 | ir2 = IR(ir1->op1); | ||
643 | /* A non-reassociated add. Must be a loop-carried dependence. */ | ||
644 | if (ir2->o == IR_ADD && irt_isint(ir2->t) && irref_isk(ir2->op2)) | ||
645 | ofs += (ptrdiff_t)IR(ir2->op2)->i << shift; | ||
646 | else | ||
647 | return 0; | ||
648 | idxref = ir2->op1; | ||
649 | /* Try to CSE the reassociated chain. Give up if not found. */ | ||
650 | if (ir1 != ir && | ||
651 | !(idxref = reassoc_trycse(J, ir1->o, idxref, | ||
652 | ir1->o == IR_BSHL ? ir1->op2 : idxref))) | ||
653 | return 0; | ||
654 | if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, ir->op2))) | ||
655 | return 0; | ||
656 | if (ofs != 0) { | ||
657 | IRRef refk = tref_ref(lj_ir_kintp(J, ofs)); | ||
658 | if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, refk))) | ||
659 | return 0; | ||
660 | } | ||
661 | return idxref; /* Success, found a reassociated index reference. Phew. */ | ||
662 | } | ||
663 | return 0; /* Failure. */ | ||
664 | } | ||
665 | |||
605 | /* XLOAD forwarding. */ | 666 | /* XLOAD forwarding. */ |
606 | TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J) | 667 | TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J) |
607 | { | 668 | { |
608 | IRRef xref = fins->op1; | 669 | IRRef xref = fins->op1; |
670 | IRIns *xr = IR(xref); | ||
609 | IRRef lim = xref; /* Search limit. */ | 671 | IRRef lim = xref; /* Search limit. */ |
610 | IRRef ref; | 672 | IRRef ref; |
611 | 673 | ||
@@ -614,9 +676,10 @@ TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J) | |||
614 | 676 | ||
615 | /* Search for conflicting stores. */ | 677 | /* Search for conflicting stores. */ |
616 | ref = J->chain[IR_XSTORE]; | 678 | ref = J->chain[IR_XSTORE]; |
679 | retry: | ||
617 | while (ref > xref) { | 680 | while (ref > xref) { |
618 | IRIns *store = IR(ref); | 681 | IRIns *store = IR(ref); |
619 | switch (aa_xref(J, fins, store)) { | 682 | switch (aa_xref(J, xr, fins, store)) { |
620 | case ALIAS_NO: break; /* Continue searching. */ | 683 | case ALIAS_NO: break; /* Continue searching. */ |
621 | case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */ | 684 | case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */ |
622 | case ALIAS_MUST: return store->op2; /* Store forwarding. */ | 685 | case ALIAS_MUST: return store->op2; /* Store forwarding. */ |
@@ -629,10 +692,21 @@ cselim: | |||
629 | ref = J->chain[IR_XLOAD]; | 692 | ref = J->chain[IR_XLOAD]; |
630 | while (ref > lim) { | 693 | while (ref > lim) { |
631 | /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */ | 694 | /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */ |
632 | if (IR(ref)->op1 == fins->op1 && irt_sametype(IR(ref)->t, fins->t)) | 695 | if (IR(ref)->op1 == xref && irt_sametype(IR(ref)->t, fins->t)) |
633 | return ref; | 696 | return ref; |
634 | ref = IR(ref)->prev; | 697 | ref = IR(ref)->prev; |
635 | } | 698 | } |
699 | |||
700 | /* Reassociate XLOAD across PHIs to handle a[i-1] forwarding case. */ | ||
701 | if (!(fins->op2 & IRXLOAD_READONLY) && J->chain[IR_LOOP] && | ||
702 | xref == fins->op1 && (xref = reassoc_xref(J, xr)) != 0) { | ||
703 | ref = J->chain[IR_XSTORE]; | ||
704 | while (ref > lim) /* Skip stores that have already been checked. */ | ||
705 | ref = IR(ref)->prev; | ||
706 | lim = xref; | ||
707 | xr = IR(xref); | ||
708 | goto retry; /* Retry with the reassociated reference. */ | ||
709 | } | ||
636 | return lj_ir_emit(J); | 710 | return lj_ir_emit(J); |
637 | } | 711 | } |
638 | 712 | ||
@@ -640,12 +714,13 @@ cselim: | |||
640 | TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J) | 714 | TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J) |
641 | { | 715 | { |
642 | IRRef xref = fins->op1; | 716 | IRRef xref = fins->op1; |
717 | IRIns *xr = IR(xref); | ||
643 | IRRef val = fins->op2; /* Stored value reference. */ | 718 | IRRef val = fins->op2; /* Stored value reference. */ |
644 | IRRef1 *refp = &J->chain[IR_XSTORE]; | 719 | IRRef1 *refp = &J->chain[IR_XSTORE]; |
645 | IRRef ref = *refp; | 720 | IRRef ref = *refp; |
646 | while (ref > xref) { /* Search for redundant or conflicting stores. */ | 721 | while (ref > xref) { /* Search for redundant or conflicting stores. */ |
647 | IRIns *store = IR(ref); | 722 | IRIns *store = IR(ref); |
648 | switch (aa_xref(J, fins, store)) { | 723 | switch (aa_xref(J, xr, fins, store)) { |
649 | case ALIAS_NO: | 724 | case ALIAS_NO: |
650 | break; /* Continue searching. */ | 725 | break; /* Continue searching. */ |
651 | case ALIAS_MAY: | 726 | case ALIAS_MAY: |