diff options
-rw-r--r-- | src/lj_asm.c | 23 | ||||
-rw-r--r-- | src/lj_ffrecord.c | 4 | ||||
-rw-r--r-- | src/lj_ir.h | 4 | ||||
-rw-r--r-- | src/lj_ircall.h | 14 | ||||
-rw-r--r-- | src/lj_opt_fold.c | 67 | ||||
-rw-r--r-- | src/lj_record.c | 2 |
6 files changed, 58 insertions, 56 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 6ff32940..73df6850 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -1039,20 +1039,18 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref); | |||
1039 | 1039 | ||
1040 | static void asm_bufhdr(ASMState *as, IRIns *ir) | 1040 | static void asm_bufhdr(ASMState *as, IRIns *ir) |
1041 | { | 1041 | { |
1042 | if (ra_used(ir)) { | 1042 | Reg sb = ra_dest(as, ir, RSET_GPR); |
1043 | Reg sb = ra_dest(as, ir, RSET_GPR); | 1043 | if (!(ir->op2 & IRBUFHDR_APPEND)) { |
1044 | if (!(ir->op2 & IRBUFHDR_APPEND)) { | 1044 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); |
1045 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); | 1045 | /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */ |
1046 | /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */ | 1046 | emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); |
1047 | emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); | 1047 | emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); |
1048 | emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); | 1048 | } |
1049 | } | ||
1050 | #if LJ_TARGET_X86ORX64 | 1049 | #if LJ_TARGET_X86ORX64 |
1051 | ra_left(as, sb, ir->op1); | 1050 | ra_left(as, sb, ir->op1); |
1052 | #else | 1051 | #else |
1053 | ra_leftov(as, sb, ir->op1); | 1052 | ra_leftov(as, sb, ir->op1); |
1054 | #endif | 1053 | #endif |
1055 | } | ||
1056 | } | 1054 | } |
1057 | 1055 | ||
1058 | static void asm_bufput(ASMState *as, IRIns *ir) | 1056 | static void asm_bufput(ASMState *as, IRIns *ir) |
@@ -1061,7 +1059,6 @@ static void asm_bufput(ASMState *as, IRIns *ir) | |||
1061 | IRRef args[3]; | 1059 | IRRef args[3]; |
1062 | IRIns *irs; | 1060 | IRIns *irs; |
1063 | int kchar = -1; | 1061 | int kchar = -1; |
1064 | if (!ra_used(ir)) return; | ||
1065 | args[0] = ir->op1; /* SBuf * */ | 1062 | args[0] = ir->op1; /* SBuf * */ |
1066 | args[1] = ir->op2; /* GCstr * */ | 1063 | args[1] = ir->op2; /* GCstr * */ |
1067 | irs = IR(ir->op2); | 1064 | irs = IR(ir->op2); |
@@ -1107,7 +1104,7 @@ static void asm_bufstr(ASMState *as, IRIns *ir) | |||
1107 | { | 1104 | { |
1108 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr]; | 1105 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr]; |
1109 | IRRef args[1]; | 1106 | IRRef args[1]; |
1110 | args[0] = ir->op2; /* SBuf *sb */ | 1107 | args[0] = ir->op1; /* SBuf *sb */ |
1111 | as->gcsteps++; | 1108 | as->gcsteps++; |
1112 | asm_setupresult(as, ir, ci); /* GCstr * */ | 1109 | asm_setupresult(as, ir, ci); /* GCstr * */ |
1113 | asm_gencall(as, ci, args); | 1110 | asm_gencall(as, ci, args); |
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 69423760..a6ce2df7 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c | |||
@@ -766,7 +766,7 @@ static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd) | |||
766 | TRef tr = hdr; | 766 | TRef tr = hdr; |
767 | for (i = 0; J->base[i] != 0; i++) | 767 | for (i = 0; J->base[i] != 0; i++) |
768 | tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, J->base[i]); | 768 | tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, J->base[i]); |
769 | J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), hdr, tr); | 769 | J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); |
770 | } | 770 | } |
771 | UNUSED(rd); | 771 | UNUSED(rd); |
772 | } | 772 | } |
@@ -777,7 +777,7 @@ static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd) | |||
777 | TRef hdr = emitir(IRT(IR_BUFHDR, IRT_P32), | 777 | TRef hdr = emitir(IRT(IR_BUFHDR, IRT_P32), |
778 | lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); | 778 | lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); |
779 | TRef tr = lj_ir_call(J, rd->data, hdr, str); | 779 | TRef tr = lj_ir_call(J, rd->data, hdr, str); |
780 | J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), hdr, tr); | 780 | J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); |
781 | } | 781 | } |
782 | 782 | ||
783 | /* -- Table library fast functions ---------------------------------------- */ | 783 | /* -- Table library fast functions ---------------------------------------- */ |
diff --git a/src/lj_ir.h b/src/lj_ir.h index 7ab8ab12..841153d8 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
@@ -121,8 +121,8 @@ | |||
121 | _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ | 121 | _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ |
122 | \ | 122 | \ |
123 | /* Buffer operations. */ \ | 123 | /* Buffer operations. */ \ |
124 | _(BUFHDR, S , ref, lit) \ | 124 | _(BUFHDR, L , ref, lit) \ |
125 | _(BUFPUT, S , ref, ref) \ | 125 | _(BUFPUT, L , ref, ref) \ |
126 | _(BUFSTR, A , ref, ref) \ | 126 | _(BUFSTR, A , ref, ref) \ |
127 | \ | 127 | \ |
128 | /* Barriers. */ \ | 128 | /* Barriers. */ \ |
diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 35c063c4..3e190c80 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h | |||
@@ -107,13 +107,13 @@ typedef struct CCallInfo { | |||
107 | _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \ | 107 | _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \ |
108 | _(ANY, lj_str_fromchar, 2, FN, STR, CCI_L) \ | 108 | _(ANY, lj_str_fromchar, 2, FN, STR, CCI_L) \ |
109 | _(ANY, lj_buf_putmem, 3, S, P32, 0) \ | 109 | _(ANY, lj_buf_putmem, 3, S, P32, 0) \ |
110 | _(ANY, lj_buf_putstr, 2, FS, P32, 0) \ | 110 | _(ANY, lj_buf_putstr, 2, FL, P32, 0) \ |
111 | _(ANY, lj_buf_putchar, 2, FS, P32, 0) \ | 111 | _(ANY, lj_buf_putchar, 2, FL, P32, 0) \ |
112 | _(ANY, lj_buf_putint, 2, FS, P32, 0) \ | 112 | _(ANY, lj_buf_putint, 2, FL, P32, 0) \ |
113 | _(ANY, lj_buf_putnum, 2, FS, P32, 0) \ | 113 | _(ANY, lj_buf_putnum, 2, FL, P32, 0) \ |
114 | _(ANY, lj_buf_putstr_reverse, 2, FS, P32, 0) \ | 114 | _(ANY, lj_buf_putstr_reverse, 2, FL, P32, 0) \ |
115 | _(ANY, lj_buf_putstr_lower, 2, FS, P32, 0) \ | 115 | _(ANY, lj_buf_putstr_lower, 2, FL, P32, 0) \ |
116 | _(ANY, lj_buf_putstr_upper, 2, FS, P32, 0) \ | 116 | _(ANY, lj_buf_putstr_upper, 2, FL, P32, 0) \ |
117 | _(ANY, lj_buf_tostr, 1, FL, STR, 0) \ | 117 | _(ANY, lj_buf_tostr, 1, FL, STR, 0) \ |
118 | _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ | 118 | _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ |
119 | _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ | 119 | _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 9c751d98..e9f873b7 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
@@ -520,9 +520,23 @@ LJFOLDF(kfold_strcmp) | |||
520 | 520 | ||
521 | /* -- Constant folding and forwarding for buffers ------------------------- */ | 521 | /* -- Constant folding and forwarding for buffers ------------------------- */ |
522 | 522 | ||
523 | /* Note: buffer ops are not CSEd until the BUFSTR. It's ok to modify them. */ | 523 | /* |
524 | ** Buffer ops perform stores, but their effect is limited to the buffer | ||
525 | ** itself. Also, buffer ops are chained: a use of an op implies a use of | ||
526 | ** all other ops up the chain. Conversely, if an op is unused, all ops | ||
527 | ** up the chain can go unsed. This largely eliminates the need to treat | ||
528 | ** them as stores. | ||
529 | ** | ||
530 | ** Alas, treating them as normal (IRM_N) ops doesn't work, because they | ||
531 | ** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP | ||
532 | ** or if FOLD is disabled. | ||
533 | ** | ||
534 | ** The compromise is to declare them as loads, emit them like stores and | ||
535 | ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain | ||
536 | ** fragments left over from CSE are eliminated by DCE. | ||
537 | */ | ||
524 | 538 | ||
525 | /* BUFHDR is treated like a store, see below. */ | 539 | /* BUFHDR is emitted like a store, see below. */ |
526 | 540 | ||
527 | LJFOLD(BUFPUT BUFHDR BUFSTR) | 541 | LJFOLD(BUFPUT BUFHDR BUFSTR) |
528 | LJFOLDF(bufput_append) | 542 | LJFOLDF(bufput_append) |
@@ -530,14 +544,14 @@ LJFOLDF(bufput_append) | |||
530 | /* New buffer, no other buffer op inbetween and same buffer? */ | 544 | /* New buffer, no other buffer op inbetween and same buffer? */ |
531 | if ((J->flags & JIT_F_OPT_FWD) && | 545 | if ((J->flags & JIT_F_OPT_FWD) && |
532 | !(fleft->op2 & IRBUFHDR_APPEND) && | 546 | !(fleft->op2 & IRBUFHDR_APPEND) && |
533 | fleft->prev == fright->op1 && | 547 | fleft->prev == fright->op2 && |
534 | fleft->op1 == IR(fright->op1)->op1) { | 548 | fleft->op1 == IR(fright->op2)->op1) { |
535 | IRRef ref = fins->op1; | 549 | IRRef ref = fins->op1; |
536 | IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */ | 550 | IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */ |
537 | IR(ref)->op1 = fright->op2; | 551 | IR(ref)->op1 = fright->op1; |
538 | return ref; | 552 | return ref; |
539 | } | 553 | } |
540 | return EMITFOLD; /* This is a store and always emitted. */ | 554 | return EMITFOLD; /* Always emit, CSE later. */ |
541 | } | 555 | } |
542 | 556 | ||
543 | LJFOLD(BUFPUT any any) | 557 | LJFOLD(BUFPUT any any) |
@@ -565,45 +579,36 @@ LJFOLDF(bufput_kgc) | |||
565 | } | 579 | } |
566 | } | 580 | } |
567 | } | 581 | } |
568 | return EMITFOLD; /* This is a store and always emitted. */ | 582 | return EMITFOLD; /* Always emit, CSE later. */ |
569 | } | 583 | } |
570 | 584 | ||
571 | LJFOLD(BUFSTR any any) | 585 | LJFOLD(BUFSTR any any) |
572 | LJFOLDF(bufstr_kfold_cse) | 586 | LJFOLDF(bufstr_kfold_cse) |
573 | { | 587 | { |
574 | lua_assert(fright->o == IR_BUFHDR || fright->o == IR_BUFPUT || | 588 | lua_assert(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT || |
575 | fright->o == IR_CALLS); | 589 | fleft->o == IR_CALLL); |
576 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { | 590 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { |
577 | if (fright->o == IR_BUFHDR) { /* No put operations? */ | 591 | if (fleft->o == IR_BUFHDR) { /* No put operations? */ |
578 | if (!(fright->op2 & IRBUFHDR_APPEND)) { /* Empty buffer? */ | 592 | if (!(fleft->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */ |
579 | lj_ir_rollback(J, fins->op1); /* Eliminate the current chain. */ | ||
580 | return lj_ir_kstr(J, &J2G(J)->strempty); | 593 | return lj_ir_kstr(J, &J2G(J)->strempty); |
581 | } | 594 | fins->op1 = fleft->prev; /* Relies on checks in bufput_append. */ |
582 | fins->op2 = fright->prev; /* Relies on checks in bufput_append. */ | ||
583 | return CSEFOLD; | 595 | return CSEFOLD; |
584 | } else if (fright->o == IR_BUFPUT) { | 596 | } else if (fleft->o == IR_BUFPUT) { |
585 | IRIns *irb = IR(fright->op1); | 597 | IRIns *irb = IR(fleft->op1); |
586 | if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND)) { | 598 | if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND)) |
587 | lj_ir_rollback(J, fins->op1); /* Eliminate the current chain. */ | 599 | return fleft->op2; /* Shortcut for a single put operation. */ |
588 | return fright->op2; /* Shortcut for a single put operation. */ | ||
589 | } | ||
590 | } | 600 | } |
591 | } | 601 | } |
592 | /* Try to CSE the whole chain. */ | 602 | /* Try to CSE the whole chain. */ |
593 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | 603 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { |
594 | IRRef ref = J->chain[IR_BUFSTR]; | 604 | IRRef ref = J->chain[IR_BUFSTR]; |
595 | while (ref) { | 605 | while (ref) { |
596 | IRRef last = fins->op2; | 606 | IRRef last = fins->op1; |
597 | IRIns *irs = IR(ref), *ira = fright, *irb = IR(irs->op2); | 607 | IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1); |
598 | while (ira->o == irb->o && ira->op2 == irb->op2) { | 608 | while (ira->o == irb->o && ira->op2 == irb->op2) { |
599 | if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND)) { | 609 | if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND)) { |
600 | IRIns *irh; | ||
601 | for (irh = IR(ira->prev); irh != irb; irh = IR(irh->prev)) | ||
602 | if (irh->op1 == irs->op2) | ||
603 | return ref; /* Do CSE, but avoid rollback if append follows. */ | ||
604 | lj_ir_rollback(J, last); /* Eliminate the current chain. */ | ||
605 | return ref; /* CSE succeeded. */ | 610 | return ref; /* CSE succeeded. */ |
606 | } else if (ira->o == IR_CALLS) { | 611 | } else if (ira->o == IR_CALLL) { |
607 | ira = IR(ira->op1); irb = IR(irb->op1); | 612 | ira = IR(ira->op1); irb = IR(irb->op1); |
608 | lua_assert(ira->o == IR_CARG && irb->o == IR_CARG); | 613 | lua_assert(ira->o == IR_CARG && irb->o == IR_CARG); |
609 | if (ira->op2 != irb->op2) break; | 614 | if (ira->op2 != irb->op2) break; |
@@ -618,9 +623,9 @@ LJFOLDF(bufstr_kfold_cse) | |||
618 | return EMITFOLD; /* No CSE possible. */ | 623 | return EMITFOLD; /* No CSE possible. */ |
619 | } | 624 | } |
620 | 625 | ||
621 | LJFOLD(CALLS CARG IRCALL_lj_buf_putstr_reverse) | 626 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse) |
622 | LJFOLD(CALLS CARG IRCALL_lj_buf_putstr_upper) | 627 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper) |
623 | LJFOLD(CALLS CARG IRCALL_lj_buf_putstr_lower) | 628 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower) |
624 | LJFOLDF(bufput_kfold_op) | 629 | LJFOLDF(bufput_kfold_op) |
625 | { | 630 | { |
626 | if (irref_isk(fleft->op2)) { | 631 | if (irref_isk(fleft->op2)) { |
diff --git a/src/lj_record.c b/src/lj_record.c index 1beaa75f..8dc102e9 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
@@ -1622,7 +1622,7 @@ static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot) | |||
1622 | do { | 1622 | do { |
1623 | tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, *trp++); | 1623 | tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, *trp++); |
1624 | } while (trp <= top); | 1624 | } while (trp <= top); |
1625 | tr = emitir(IRT(IR_BUFSTR, IRT_STR), hdr, tr); | 1625 | tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); |
1626 | J->maxslot = (BCReg)(xbase - J->base); | 1626 | J->maxslot = (BCReg)(xbase - J->base); |
1627 | if (xbase == base) return tr; | 1627 | if (xbase == base) return tr; |
1628 | } | 1628 | } |