diff options
| author | Mike Pall <mike> | 2010-12-20 19:35:57 +0100 |
|---|---|---|
| committer | Mike Pall <mike> | 2010-12-20 22:32:01 +0100 |
| commit | dbab6cf511e232b5c2fdb11923c87473dcd84a7e (patch) | |
| tree | 1f24df56032b49c25facabc1ab48e80a35a5684d /src | |
| parent | c8d6f078a52adb840e370ed52138ee5d379fe945 (diff) | |
| download | luajit-dbab6cf511e232b5c2fdb11923c87473dcd84a7e.tar.gz luajit-dbab6cf511e232b5c2fdb11923c87473dcd84a7e.tar.bz2 luajit-dbab6cf511e232b5c2fdb11923c87473dcd84a7e.zip | |
Reassociate XLOAD across PHIs to handle a[i-1] forwarding case.
Improved SciMark scores: http://luajit.org/download/scimark.lua
x86 SciMark LARGE | FFT SOR MC SPARSE LU
-----------------------+---------------------------------------
GCC 4.4.3 623.8 | 91.0 883.5 190.4 784.7 1169.6
LuaJIT git +FFI 651.2 | 97.2 1021.9 323.4 673.7 1139.6
LuaJIT git 527.7 | 91.4 1008.5 225.6 400.0 913.2
x64 SciMark LARGE | FFT SOR MC SPARSE LU
-----------------------+---------------------------------------
GCC 4.4.3 614.7 | 97.7 883.5 228.5 734.0 1129.9
JVM 1.6.0_22 707.5 | 79.2 1118.1 385.5 658.9 1295.7
LuaJIT git +FFI 632.8 | 89.1 1035.8 298.3 648.1 1092.9
LuaJIT git 516.1 | 88.4 995.4 225.6 382.1 888.9
Diffstat (limited to 'src')
| -rw-r--r-- | src/lj_opt_mem.c | 85 |
1 files changed, 80 insertions, 5 deletions
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index db0ef21e..3ed3c7ee 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c | |||
| @@ -555,10 +555,10 @@ static AliasRet aa_cnew(jit_State *J, IRIns *refa, IRIns *refb) | |||
| 555 | } | 555 | } |
| 556 | 556 | ||
| 557 | /* Alias analysis for XLOAD/XSTORE. */ | 557 | /* Alias analysis for XLOAD/XSTORE. */ |
| 558 | static AliasRet aa_xref(jit_State *J, IRIns *xa, IRIns *xb) | 558 | static AliasRet aa_xref(jit_State *J, IRIns *refa, IRIns *xa, IRIns *xb) |
| 559 | { | 559 | { |
| 560 | ptrdiff_t ofsa = 0, ofsb = 0; | 560 | ptrdiff_t ofsa = 0, ofsb = 0; |
| 561 | IRIns *refa = IR(xa->op1), *refb = IR(xb->op1); | 561 | IRIns *refb = IR(xb->op1); |
| 562 | IRIns *basea = refa, *baseb = refb; | 562 | IRIns *basea = refa, *baseb = refb; |
| 563 | /* This implements (very) strict aliasing rules. | 563 | /* This implements (very) strict aliasing rules. |
| 564 | ** Different types do NOT alias, except for differences in signedness. | 564 | ** Different types do NOT alias, except for differences in signedness. |
| @@ -602,10 +602,72 @@ static AliasRet aa_xref(jit_State *J, IRIns *xa, IRIns *xb) | |||
| 602 | return aa_cnew(J, basea, baseb); /* Try to disambiguate allocations. */ | 602 | return aa_cnew(J, basea, baseb); /* Try to disambiguate allocations. */ |
| 603 | } | 603 | } |
| 604 | 604 | ||
| 605 | /* Return CSEd reference or 0. Caveat: swaps lower ref to the right! */ | ||
| 606 | static IRRef reassoc_trycse(jit_State *J, IROp op, IRRef op1, IRRef op2) | ||
| 607 | { | ||
| 608 | IRRef ref = J->chain[op]; | ||
| 609 | IRRef lim = op1; | ||
| 610 | if (op2 > lim) { lim = op2; op2 = op1; op1 = lim; } | ||
| 611 | while (ref > lim) { | ||
| 612 | IRIns *ir = IR(ref); | ||
| 613 | if (ir->op1 == op1 && ir->op2 == op2) | ||
| 614 | return ref; | ||
| 615 | ref = ir->prev; | ||
| 616 | } | ||
| 617 | return 0; | ||
| 618 | } | ||
| 619 | |||
| 620 | /* Reassociate index references. */ | ||
| 621 | static IRRef reassoc_xref(jit_State *J, IRIns *ir) | ||
| 622 | { | ||
| 623 | ptrdiff_t ofs = 0; | ||
| 624 | if (ir->o == IR_ADD && irref_isk(ir->op2)) { /* Get constant offset. */ | ||
| 625 | IRIns *irk = IR(ir->op2); | ||
| 626 | ofs = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : | ||
| 627 | (ptrdiff_t)irk->i; | ||
| 628 | ir = IR(ir->op1); | ||
| 629 | } | ||
| 630 | if (ir->o == IR_ADD) { /* Add of base + index. */ | ||
| 631 | /* Index ref > base ref for loop-carried dependences. Only check op1. */ | ||
| 632 | IRIns *ir2, *ir1 = IR(ir->op1); | ||
| 633 | int32_t shift = 0; | ||
| 634 | IRRef idxref; | ||
| 635 | /* Determine index shifts. Don't bother with IR_MUL here. */ | ||
| 636 | if (ir1->o == IR_BSHL && irref_isk(ir1->op2)) | ||
| 637 | shift = IR(ir1->op2)->i; | ||
| 638 | else if (ir1->o == IR_ADD && ir1->op1 == ir1->op2) | ||
| 639 | shift = 1; | ||
| 640 | else | ||
| 641 | ir1 = ir; | ||
| 642 | ir2 = IR(ir1->op1); | ||
| 643 | /* A non-reassociated add. Must be a loop-carried dependence. */ | ||
| 644 | if (ir2->o == IR_ADD && irt_isint(ir2->t) && irref_isk(ir2->op2)) | ||
| 645 | ofs += (ptrdiff_t)IR(ir2->op2)->i << shift; | ||
| 646 | else | ||
| 647 | return 0; | ||
| 648 | idxref = ir2->op1; | ||
| 649 | /* Try to CSE the reassociated chain. Give up if not found. */ | ||
| 650 | if (ir1 != ir && | ||
| 651 | !(idxref = reassoc_trycse(J, ir1->o, idxref, | ||
| 652 | ir1->o == IR_BSHL ? ir1->op2 : idxref))) | ||
| 653 | return 0; | ||
| 654 | if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, ir->op2))) | ||
| 655 | return 0; | ||
| 656 | if (ofs != 0) { | ||
| 657 | IRRef refk = tref_ref(lj_ir_kintp(J, ofs)); | ||
| 658 | if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, refk))) | ||
| 659 | return 0; | ||
| 660 | } | ||
| 661 | return idxref; /* Success, found a reassociated index reference. Phew. */ | ||
| 662 | } | ||
| 663 | return 0; /* Failure. */ | ||
| 664 | } | ||
| 665 | |||
| 605 | /* XLOAD forwarding. */ | 666 | /* XLOAD forwarding. */ |
| 606 | TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J) | 667 | TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J) |
| 607 | { | 668 | { |
| 608 | IRRef xref = fins->op1; | 669 | IRRef xref = fins->op1; |
| 670 | IRIns *xr = IR(xref); | ||
| 609 | IRRef lim = xref; /* Search limit. */ | 671 | IRRef lim = xref; /* Search limit. */ |
| 610 | IRRef ref; | 672 | IRRef ref; |
| 611 | 673 | ||
| @@ -614,9 +676,10 @@ TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J) | |||
| 614 | 676 | ||
| 615 | /* Search for conflicting stores. */ | 677 | /* Search for conflicting stores. */ |
| 616 | ref = J->chain[IR_XSTORE]; | 678 | ref = J->chain[IR_XSTORE]; |
| 679 | retry: | ||
| 617 | while (ref > xref) { | 680 | while (ref > xref) { |
| 618 | IRIns *store = IR(ref); | 681 | IRIns *store = IR(ref); |
| 619 | switch (aa_xref(J, fins, store)) { | 682 | switch (aa_xref(J, xr, fins, store)) { |
| 620 | case ALIAS_NO: break; /* Continue searching. */ | 683 | case ALIAS_NO: break; /* Continue searching. */ |
| 621 | case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */ | 684 | case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */ |
| 622 | case ALIAS_MUST: return store->op2; /* Store forwarding. */ | 685 | case ALIAS_MUST: return store->op2; /* Store forwarding. */ |
| @@ -629,10 +692,21 @@ cselim: | |||
| 629 | ref = J->chain[IR_XLOAD]; | 692 | ref = J->chain[IR_XLOAD]; |
| 630 | while (ref > lim) { | 693 | while (ref > lim) { |
| 631 | /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */ | 694 | /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */ |
| 632 | if (IR(ref)->op1 == fins->op1 && irt_sametype(IR(ref)->t, fins->t)) | 695 | if (IR(ref)->op1 == xref && irt_sametype(IR(ref)->t, fins->t)) |
| 633 | return ref; | 696 | return ref; |
| 634 | ref = IR(ref)->prev; | 697 | ref = IR(ref)->prev; |
| 635 | } | 698 | } |
| 699 | |||
| 700 | /* Reassociate XLOAD across PHIs to handle a[i-1] forwarding case. */ | ||
| 701 | if (!(fins->op2 & IRXLOAD_READONLY) && J->chain[IR_LOOP] && | ||
| 702 | xref == fins->op1 && (xref = reassoc_xref(J, xr)) != 0) { | ||
| 703 | ref = J->chain[IR_XSTORE]; | ||
| 704 | while (ref > lim) /* Skip stores that have already been checked. */ | ||
| 705 | ref = IR(ref)->prev; | ||
| 706 | lim = xref; | ||
| 707 | xr = IR(xref); | ||
| 708 | goto retry; /* Retry with the reassociated reference. */ | ||
| 709 | } | ||
| 636 | return lj_ir_emit(J); | 710 | return lj_ir_emit(J); |
| 637 | } | 711 | } |
| 638 | 712 | ||
| @@ -640,12 +714,13 @@ cselim: | |||
| 640 | TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J) | 714 | TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J) |
| 641 | { | 715 | { |
| 642 | IRRef xref = fins->op1; | 716 | IRRef xref = fins->op1; |
| 717 | IRIns *xr = IR(xref); | ||
| 643 | IRRef val = fins->op2; /* Stored value reference. */ | 718 | IRRef val = fins->op2; /* Stored value reference. */ |
| 644 | IRRef1 *refp = &J->chain[IR_XSTORE]; | 719 | IRRef1 *refp = &J->chain[IR_XSTORE]; |
| 645 | IRRef ref = *refp; | 720 | IRRef ref = *refp; |
| 646 | while (ref > xref) { /* Search for redundant or conflicting stores. */ | 721 | while (ref > xref) { /* Search for redundant or conflicting stores. */ |
| 647 | IRIns *store = IR(ref); | 722 | IRIns *store = IR(ref); |
| 648 | switch (aa_xref(J, fins, store)) { | 723 | switch (aa_xref(J, xr, fins, store)) { |
| 649 | case ALIAS_NO: | 724 | case ALIAS_NO: |
| 650 | break; /* Continue searching. */ | 725 | break; /* Continue searching. */ |
| 651 | case ALIAS_MAY: | 726 | case ALIAS_MAY: |
