diff options
| author | Mike Pall <mike> | 2013-04-26 13:47:41 +0200 |
|---|---|---|
| committer | Mike Pall <mike> | 2013-04-26 13:47:41 +0200 |
| commit | 47fa9a8d8ff7123e160abfc18c88589bbb7f4b58 (patch) | |
| tree | fbd673e26887d2cc2e63b43016d04fc8ae4e8767 /src | |
| parent | 5fd659f8599507c7d9c22e31afe1731c971fe98f (diff) | |
| download | luajit-47fa9a8d8ff7123e160abfc18c88589bbb7f4b58.tar.gz luajit-47fa9a8d8ff7123e160abfc18c88589bbb7f4b58.tar.bz2 luajit-47fa9a8d8ff7123e160abfc18c88589bbb7f4b58.zip | |
Change semantics of buffer ops to simplify CSE and DCE.
Diffstat (limited to 'src')
| -rw-r--r-- | src/lj_asm.c | 23 | ||||
| -rw-r--r-- | src/lj_ffrecord.c | 4 | ||||
| -rw-r--r-- | src/lj_ir.h | 4 | ||||
| -rw-r--r-- | src/lj_ircall.h | 14 | ||||
| -rw-r--r-- | src/lj_opt_fold.c | 67 | ||||
| -rw-r--r-- | src/lj_record.c | 2 |
6 files changed, 58 insertions, 56 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 6ff32940..73df6850 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -1039,20 +1039,18 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref); | |||
| 1039 | 1039 | ||
| 1040 | static void asm_bufhdr(ASMState *as, IRIns *ir) | 1040 | static void asm_bufhdr(ASMState *as, IRIns *ir) |
| 1041 | { | 1041 | { |
| 1042 | if (ra_used(ir)) { | 1042 | Reg sb = ra_dest(as, ir, RSET_GPR); |
| 1043 | Reg sb = ra_dest(as, ir, RSET_GPR); | 1043 | if (!(ir->op2 & IRBUFHDR_APPEND)) { |
| 1044 | if (!(ir->op2 & IRBUFHDR_APPEND)) { | 1044 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); |
| 1045 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); | 1045 | /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */ |
| 1046 | /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */ | 1046 | emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); |
| 1047 | emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); | 1047 | emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); |
| 1048 | emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); | 1048 | } |
| 1049 | } | ||
| 1050 | #if LJ_TARGET_X86ORX64 | 1049 | #if LJ_TARGET_X86ORX64 |
| 1051 | ra_left(as, sb, ir->op1); | 1050 | ra_left(as, sb, ir->op1); |
| 1052 | #else | 1051 | #else |
| 1053 | ra_leftov(as, sb, ir->op1); | 1052 | ra_leftov(as, sb, ir->op1); |
| 1054 | #endif | 1053 | #endif |
| 1055 | } | ||
| 1056 | } | 1054 | } |
| 1057 | 1055 | ||
| 1058 | static void asm_bufput(ASMState *as, IRIns *ir) | 1056 | static void asm_bufput(ASMState *as, IRIns *ir) |
| @@ -1061,7 +1059,6 @@ static void asm_bufput(ASMState *as, IRIns *ir) | |||
| 1061 | IRRef args[3]; | 1059 | IRRef args[3]; |
| 1062 | IRIns *irs; | 1060 | IRIns *irs; |
| 1063 | int kchar = -1; | 1061 | int kchar = -1; |
| 1064 | if (!ra_used(ir)) return; | ||
| 1065 | args[0] = ir->op1; /* SBuf * */ | 1062 | args[0] = ir->op1; /* SBuf * */ |
| 1066 | args[1] = ir->op2; /* GCstr * */ | 1063 | args[1] = ir->op2; /* GCstr * */ |
| 1067 | irs = IR(ir->op2); | 1064 | irs = IR(ir->op2); |
| @@ -1107,7 +1104,7 @@ static void asm_bufstr(ASMState *as, IRIns *ir) | |||
| 1107 | { | 1104 | { |
| 1108 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr]; | 1105 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr]; |
| 1109 | IRRef args[1]; | 1106 | IRRef args[1]; |
| 1110 | args[0] = ir->op2; /* SBuf *sb */ | 1107 | args[0] = ir->op1; /* SBuf *sb */ |
| 1111 | as->gcsteps++; | 1108 | as->gcsteps++; |
| 1112 | asm_setupresult(as, ir, ci); /* GCstr * */ | 1109 | asm_setupresult(as, ir, ci); /* GCstr * */ |
| 1113 | asm_gencall(as, ci, args); | 1110 | asm_gencall(as, ci, args); |
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 69423760..a6ce2df7 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c | |||
| @@ -766,7 +766,7 @@ static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd) | |||
| 766 | TRef tr = hdr; | 766 | TRef tr = hdr; |
| 767 | for (i = 0; J->base[i] != 0; i++) | 767 | for (i = 0; J->base[i] != 0; i++) |
| 768 | tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, J->base[i]); | 768 | tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, J->base[i]); |
| 769 | J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), hdr, tr); | 769 | J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); |
| 770 | } | 770 | } |
| 771 | UNUSED(rd); | 771 | UNUSED(rd); |
| 772 | } | 772 | } |
| @@ -777,7 +777,7 @@ static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd) | |||
| 777 | TRef hdr = emitir(IRT(IR_BUFHDR, IRT_P32), | 777 | TRef hdr = emitir(IRT(IR_BUFHDR, IRT_P32), |
| 778 | lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); | 778 | lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); |
| 779 | TRef tr = lj_ir_call(J, rd->data, hdr, str); | 779 | TRef tr = lj_ir_call(J, rd->data, hdr, str); |
| 780 | J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), hdr, tr); | 780 | J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); |
| 781 | } | 781 | } |
| 782 | 782 | ||
| 783 | /* -- Table library fast functions ---------------------------------------- */ | 783 | /* -- Table library fast functions ---------------------------------------- */ |
diff --git a/src/lj_ir.h b/src/lj_ir.h index 7ab8ab12..841153d8 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
| @@ -121,8 +121,8 @@ | |||
| 121 | _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ | 121 | _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ |
| 122 | \ | 122 | \ |
| 123 | /* Buffer operations. */ \ | 123 | /* Buffer operations. */ \ |
| 124 | _(BUFHDR, S , ref, lit) \ | 124 | _(BUFHDR, L , ref, lit) \ |
| 125 | _(BUFPUT, S , ref, ref) \ | 125 | _(BUFPUT, L , ref, ref) \ |
| 126 | _(BUFSTR, A , ref, ref) \ | 126 | _(BUFSTR, A , ref, ref) \ |
| 127 | \ | 127 | \ |
| 128 | /* Barriers. */ \ | 128 | /* Barriers. */ \ |
diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 35c063c4..3e190c80 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h | |||
| @@ -107,13 +107,13 @@ typedef struct CCallInfo { | |||
| 107 | _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \ | 107 | _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \ |
| 108 | _(ANY, lj_str_fromchar, 2, FN, STR, CCI_L) \ | 108 | _(ANY, lj_str_fromchar, 2, FN, STR, CCI_L) \ |
| 109 | _(ANY, lj_buf_putmem, 3, S, P32, 0) \ | 109 | _(ANY, lj_buf_putmem, 3, S, P32, 0) \ |
| 110 | _(ANY, lj_buf_putstr, 2, FS, P32, 0) \ | 110 | _(ANY, lj_buf_putstr, 2, FL, P32, 0) \ |
| 111 | _(ANY, lj_buf_putchar, 2, FS, P32, 0) \ | 111 | _(ANY, lj_buf_putchar, 2, FL, P32, 0) \ |
| 112 | _(ANY, lj_buf_putint, 2, FS, P32, 0) \ | 112 | _(ANY, lj_buf_putint, 2, FL, P32, 0) \ |
| 113 | _(ANY, lj_buf_putnum, 2, FS, P32, 0) \ | 113 | _(ANY, lj_buf_putnum, 2, FL, P32, 0) \ |
| 114 | _(ANY, lj_buf_putstr_reverse, 2, FS, P32, 0) \ | 114 | _(ANY, lj_buf_putstr_reverse, 2, FL, P32, 0) \ |
| 115 | _(ANY, lj_buf_putstr_lower, 2, FS, P32, 0) \ | 115 | _(ANY, lj_buf_putstr_lower, 2, FL, P32, 0) \ |
| 116 | _(ANY, lj_buf_putstr_upper, 2, FS, P32, 0) \ | 116 | _(ANY, lj_buf_putstr_upper, 2, FL, P32, 0) \ |
| 117 | _(ANY, lj_buf_tostr, 1, FL, STR, 0) \ | 117 | _(ANY, lj_buf_tostr, 1, FL, STR, 0) \ |
| 118 | _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ | 118 | _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ |
| 119 | _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ | 119 | _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 9c751d98..e9f873b7 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
| @@ -520,9 +520,23 @@ LJFOLDF(kfold_strcmp) | |||
| 520 | 520 | ||
| 521 | /* -- Constant folding and forwarding for buffers ------------------------- */ | 521 | /* -- Constant folding and forwarding for buffers ------------------------- */ |
| 522 | 522 | ||
| 523 | /* Note: buffer ops are not CSEd until the BUFSTR. It's ok to modify them. */ | 523 | /* |
| 524 | ** Buffer ops perform stores, but their effect is limited to the buffer | ||
| 525 | ** itself. Also, buffer ops are chained: a use of an op implies a use of | ||
| 526 | ** all other ops up the chain. Conversely, if an op is unused, all ops | ||
| 527 | ** up the chain can go unsed. This largely eliminates the need to treat | ||
| 528 | ** them as stores. | ||
| 529 | ** | ||
| 530 | ** Alas, treating them as normal (IRM_N) ops doesn't work, because they | ||
| 531 | ** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP | ||
| 532 | ** or if FOLD is disabled. | ||
| 533 | ** | ||
| 534 | ** The compromise is to declare them as loads, emit them like stores and | ||
| 535 | ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain | ||
| 536 | ** fragments left over from CSE are eliminated by DCE. | ||
| 537 | */ | ||
| 524 | 538 | ||
| 525 | /* BUFHDR is treated like a store, see below. */ | 539 | /* BUFHDR is emitted like a store, see below. */ |
| 526 | 540 | ||
| 527 | LJFOLD(BUFPUT BUFHDR BUFSTR) | 541 | LJFOLD(BUFPUT BUFHDR BUFSTR) |
| 528 | LJFOLDF(bufput_append) | 542 | LJFOLDF(bufput_append) |
| @@ -530,14 +544,14 @@ LJFOLDF(bufput_append) | |||
| 530 | /* New buffer, no other buffer op inbetween and same buffer? */ | 544 | /* New buffer, no other buffer op inbetween and same buffer? */ |
| 531 | if ((J->flags & JIT_F_OPT_FWD) && | 545 | if ((J->flags & JIT_F_OPT_FWD) && |
| 532 | !(fleft->op2 & IRBUFHDR_APPEND) && | 546 | !(fleft->op2 & IRBUFHDR_APPEND) && |
| 533 | fleft->prev == fright->op1 && | 547 | fleft->prev == fright->op2 && |
| 534 | fleft->op1 == IR(fright->op1)->op1) { | 548 | fleft->op1 == IR(fright->op2)->op1) { |
| 535 | IRRef ref = fins->op1; | 549 | IRRef ref = fins->op1; |
| 536 | IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */ | 550 | IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */ |
| 537 | IR(ref)->op1 = fright->op2; | 551 | IR(ref)->op1 = fright->op1; |
| 538 | return ref; | 552 | return ref; |
| 539 | } | 553 | } |
| 540 | return EMITFOLD; /* This is a store and always emitted. */ | 554 | return EMITFOLD; /* Always emit, CSE later. */ |
| 541 | } | 555 | } |
| 542 | 556 | ||
| 543 | LJFOLD(BUFPUT any any) | 557 | LJFOLD(BUFPUT any any) |
| @@ -565,45 +579,36 @@ LJFOLDF(bufput_kgc) | |||
| 565 | } | 579 | } |
| 566 | } | 580 | } |
| 567 | } | 581 | } |
| 568 | return EMITFOLD; /* This is a store and always emitted. */ | 582 | return EMITFOLD; /* Always emit, CSE later. */ |
| 569 | } | 583 | } |
| 570 | 584 | ||
| 571 | LJFOLD(BUFSTR any any) | 585 | LJFOLD(BUFSTR any any) |
| 572 | LJFOLDF(bufstr_kfold_cse) | 586 | LJFOLDF(bufstr_kfold_cse) |
| 573 | { | 587 | { |
| 574 | lua_assert(fright->o == IR_BUFHDR || fright->o == IR_BUFPUT || | 588 | lua_assert(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT || |
| 575 | fright->o == IR_CALLS); | 589 | fleft->o == IR_CALLL); |
| 576 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { | 590 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { |
| 577 | if (fright->o == IR_BUFHDR) { /* No put operations? */ | 591 | if (fleft->o == IR_BUFHDR) { /* No put operations? */ |
| 578 | if (!(fright->op2 & IRBUFHDR_APPEND)) { /* Empty buffer? */ | 592 | if (!(fleft->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */ |
| 579 | lj_ir_rollback(J, fins->op1); /* Eliminate the current chain. */ | ||
| 580 | return lj_ir_kstr(J, &J2G(J)->strempty); | 593 | return lj_ir_kstr(J, &J2G(J)->strempty); |
| 581 | } | 594 | fins->op1 = fleft->prev; /* Relies on checks in bufput_append. */ |
| 582 | fins->op2 = fright->prev; /* Relies on checks in bufput_append. */ | ||
| 583 | return CSEFOLD; | 595 | return CSEFOLD; |
| 584 | } else if (fright->o == IR_BUFPUT) { | 596 | } else if (fleft->o == IR_BUFPUT) { |
| 585 | IRIns *irb = IR(fright->op1); | 597 | IRIns *irb = IR(fleft->op1); |
| 586 | if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND)) { | 598 | if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND)) |
| 587 | lj_ir_rollback(J, fins->op1); /* Eliminate the current chain. */ | 599 | return fleft->op2; /* Shortcut for a single put operation. */ |
| 588 | return fright->op2; /* Shortcut for a single put operation. */ | ||
| 589 | } | ||
| 590 | } | 600 | } |
| 591 | } | 601 | } |
| 592 | /* Try to CSE the whole chain. */ | 602 | /* Try to CSE the whole chain. */ |
| 593 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | 603 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { |
| 594 | IRRef ref = J->chain[IR_BUFSTR]; | 604 | IRRef ref = J->chain[IR_BUFSTR]; |
| 595 | while (ref) { | 605 | while (ref) { |
| 596 | IRRef last = fins->op2; | 606 | IRRef last = fins->op1; |
| 597 | IRIns *irs = IR(ref), *ira = fright, *irb = IR(irs->op2); | 607 | IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1); |
| 598 | while (ira->o == irb->o && ira->op2 == irb->op2) { | 608 | while (ira->o == irb->o && ira->op2 == irb->op2) { |
| 599 | if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND)) { | 609 | if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND)) { |
| 600 | IRIns *irh; | ||
| 601 | for (irh = IR(ira->prev); irh != irb; irh = IR(irh->prev)) | ||
| 602 | if (irh->op1 == irs->op2) | ||
| 603 | return ref; /* Do CSE, but avoid rollback if append follows. */ | ||
| 604 | lj_ir_rollback(J, last); /* Eliminate the current chain. */ | ||
| 605 | return ref; /* CSE succeeded. */ | 610 | return ref; /* CSE succeeded. */ |
| 606 | } else if (ira->o == IR_CALLS) { | 611 | } else if (ira->o == IR_CALLL) { |
| 607 | ira = IR(ira->op1); irb = IR(irb->op1); | 612 | ira = IR(ira->op1); irb = IR(irb->op1); |
| 608 | lua_assert(ira->o == IR_CARG && irb->o == IR_CARG); | 613 | lua_assert(ira->o == IR_CARG && irb->o == IR_CARG); |
| 609 | if (ira->op2 != irb->op2) break; | 614 | if (ira->op2 != irb->op2) break; |
| @@ -618,9 +623,9 @@ LJFOLDF(bufstr_kfold_cse) | |||
| 618 | return EMITFOLD; /* No CSE possible. */ | 623 | return EMITFOLD; /* No CSE possible. */ |
| 619 | } | 624 | } |
| 620 | 625 | ||
| 621 | LJFOLD(CALLS CARG IRCALL_lj_buf_putstr_reverse) | 626 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse) |
| 622 | LJFOLD(CALLS CARG IRCALL_lj_buf_putstr_upper) | 627 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper) |
| 623 | LJFOLD(CALLS CARG IRCALL_lj_buf_putstr_lower) | 628 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower) |
| 624 | LJFOLDF(bufput_kfold_op) | 629 | LJFOLDF(bufput_kfold_op) |
| 625 | { | 630 | { |
| 626 | if (irref_isk(fleft->op2)) { | 631 | if (irref_isk(fleft->op2)) { |
diff --git a/src/lj_record.c b/src/lj_record.c index 1beaa75f..8dc102e9 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
| @@ -1622,7 +1622,7 @@ static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot) | |||
| 1622 | do { | 1622 | do { |
| 1623 | tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, *trp++); | 1623 | tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, *trp++); |
| 1624 | } while (trp <= top); | 1624 | } while (trp <= top); |
| 1625 | tr = emitir(IRT(IR_BUFSTR, IRT_STR), hdr, tr); | 1625 | tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); |
| 1626 | J->maxslot = (BCReg)(xbase - J->base); | 1626 | J->maxslot = (BCReg)(xbase - J->base); |
| 1627 | if (xbase == base) return tr; | 1627 | if (xbase == base) return tr; |
| 1628 | } | 1628 | } |
