aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/lj_asm.c23
-rw-r--r--src/lj_ffrecord.c4
-rw-r--r--src/lj_ir.h4
-rw-r--r--src/lj_ircall.h14
-rw-r--r--src/lj_opt_fold.c67
-rw-r--r--src/lj_record.c2
6 files changed, 58 insertions, 56 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 6ff32940..73df6850 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1039,20 +1039,18 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
1039 1039
1040static void asm_bufhdr(ASMState *as, IRIns *ir) 1040static void asm_bufhdr(ASMState *as, IRIns *ir)
1041{ 1041{
1042 if (ra_used(ir)) { 1042 Reg sb = ra_dest(as, ir, RSET_GPR);
1043 Reg sb = ra_dest(as, ir, RSET_GPR); 1043 if (!(ir->op2 & IRBUFHDR_APPEND)) {
1044 if (!(ir->op2 & IRBUFHDR_APPEND)) { 1044 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
1045 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); 1045 /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */
1046 /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */ 1046 emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
1047 emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); 1047 emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
1048 emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); 1048 }
1049 }
1050#if LJ_TARGET_X86ORX64 1049#if LJ_TARGET_X86ORX64
1051 ra_left(as, sb, ir->op1); 1050 ra_left(as, sb, ir->op1);
1052#else 1051#else
1053 ra_leftov(as, sb, ir->op1); 1052 ra_leftov(as, sb, ir->op1);
1054#endif 1053#endif
1055 }
1056} 1054}
1057 1055
1058static void asm_bufput(ASMState *as, IRIns *ir) 1056static void asm_bufput(ASMState *as, IRIns *ir)
@@ -1061,7 +1059,6 @@ static void asm_bufput(ASMState *as, IRIns *ir)
1061 IRRef args[3]; 1059 IRRef args[3];
1062 IRIns *irs; 1060 IRIns *irs;
1063 int kchar = -1; 1061 int kchar = -1;
1064 if (!ra_used(ir)) return;
1065 args[0] = ir->op1; /* SBuf * */ 1062 args[0] = ir->op1; /* SBuf * */
1066 args[1] = ir->op2; /* GCstr * */ 1063 args[1] = ir->op2; /* GCstr * */
1067 irs = IR(ir->op2); 1064 irs = IR(ir->op2);
@@ -1107,7 +1104,7 @@ static void asm_bufstr(ASMState *as, IRIns *ir)
1107{ 1104{
1108 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr]; 1105 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
1109 IRRef args[1]; 1106 IRRef args[1];
1110 args[0] = ir->op2; /* SBuf *sb */ 1107 args[0] = ir->op1; /* SBuf *sb */
1111 as->gcsteps++; 1108 as->gcsteps++;
1112 asm_setupresult(as, ir, ci); /* GCstr * */ 1109 asm_setupresult(as, ir, ci); /* GCstr * */
1113 asm_gencall(as, ci, args); 1110 asm_gencall(as, ci, args);
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 69423760..a6ce2df7 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -766,7 +766,7 @@ static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd)
766 TRef tr = hdr; 766 TRef tr = hdr;
767 for (i = 0; J->base[i] != 0; i++) 767 for (i = 0; J->base[i] != 0; i++)
768 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, J->base[i]); 768 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, J->base[i]);
769 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), hdr, tr); 769 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
770 } 770 }
771 UNUSED(rd); 771 UNUSED(rd);
772} 772}
@@ -777,7 +777,7 @@ static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd)
777 TRef hdr = emitir(IRT(IR_BUFHDR, IRT_P32), 777 TRef hdr = emitir(IRT(IR_BUFHDR, IRT_P32),
778 lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); 778 lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
779 TRef tr = lj_ir_call(J, rd->data, hdr, str); 779 TRef tr = lj_ir_call(J, rd->data, hdr, str);
780 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), hdr, tr); 780 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
781} 781}
782 782
783/* -- Table library fast functions ---------------------------------------- */ 783/* -- Table library fast functions ---------------------------------------- */
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 7ab8ab12..841153d8 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -121,8 +121,8 @@
121 _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ 121 _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \
122 \ 122 \
123 /* Buffer operations. */ \ 123 /* Buffer operations. */ \
124 _(BUFHDR, S , ref, lit) \ 124 _(BUFHDR, L , ref, lit) \
125 _(BUFPUT, S , ref, ref) \ 125 _(BUFPUT, L , ref, ref) \
126 _(BUFSTR, A , ref, ref) \ 126 _(BUFSTR, A , ref, ref) \
127 \ 127 \
128 /* Barriers. */ \ 128 /* Barriers. */ \
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 35c063c4..3e190c80 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -107,13 +107,13 @@ typedef struct CCallInfo {
107 _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \ 107 _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \
108 _(ANY, lj_str_fromchar, 2, FN, STR, CCI_L) \ 108 _(ANY, lj_str_fromchar, 2, FN, STR, CCI_L) \
109 _(ANY, lj_buf_putmem, 3, S, P32, 0) \ 109 _(ANY, lj_buf_putmem, 3, S, P32, 0) \
110 _(ANY, lj_buf_putstr, 2, FS, P32, 0) \ 110 _(ANY, lj_buf_putstr, 2, FL, P32, 0) \
111 _(ANY, lj_buf_putchar, 2, FS, P32, 0) \ 111 _(ANY, lj_buf_putchar, 2, FL, P32, 0) \
112 _(ANY, lj_buf_putint, 2, FS, P32, 0) \ 112 _(ANY, lj_buf_putint, 2, FL, P32, 0) \
113 _(ANY, lj_buf_putnum, 2, FS, P32, 0) \ 113 _(ANY, lj_buf_putnum, 2, FL, P32, 0) \
114 _(ANY, lj_buf_putstr_reverse, 2, FS, P32, 0) \ 114 _(ANY, lj_buf_putstr_reverse, 2, FL, P32, 0) \
115 _(ANY, lj_buf_putstr_lower, 2, FS, P32, 0) \ 115 _(ANY, lj_buf_putstr_lower, 2, FL, P32, 0) \
116 _(ANY, lj_buf_putstr_upper, 2, FS, P32, 0) \ 116 _(ANY, lj_buf_putstr_upper, 2, FL, P32, 0) \
117 _(ANY, lj_buf_tostr, 1, FL, STR, 0) \ 117 _(ANY, lj_buf_tostr, 1, FL, STR, 0) \
118 _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ 118 _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \
119 _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ 119 _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 9c751d98..e9f873b7 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -520,9 +520,23 @@ LJFOLDF(kfold_strcmp)
520 520
521/* -- Constant folding and forwarding for buffers ------------------------- */ 521/* -- Constant folding and forwarding for buffers ------------------------- */
522 522
523/* Note: buffer ops are not CSEd until the BUFSTR. It's ok to modify them. */ 523/*
524** Buffer ops perform stores, but their effect is limited to the buffer
525** itself. Also, buffer ops are chained: a use of an op implies a use of
526** all other ops up the chain. Conversely, if an op is unused, all ops
527** up the chain can go unsed. This largely eliminates the need to treat
528** them as stores.
529**
530** Alas, treating them as normal (IRM_N) ops doesn't work, because they
531** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
532** or if FOLD is disabled.
533**
534** The compromise is to declare them as loads, emit them like stores and
535** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
536** fragments left over from CSE are eliminated by DCE.
537*/
524 538
525/* BUFHDR is treated like a store, see below. */ 539/* BUFHDR is emitted like a store, see below. */
526 540
527LJFOLD(BUFPUT BUFHDR BUFSTR) 541LJFOLD(BUFPUT BUFHDR BUFSTR)
528LJFOLDF(bufput_append) 542LJFOLDF(bufput_append)
@@ -530,14 +544,14 @@ LJFOLDF(bufput_append)
530 /* New buffer, no other buffer op inbetween and same buffer? */ 544 /* New buffer, no other buffer op inbetween and same buffer? */
531 if ((J->flags & JIT_F_OPT_FWD) && 545 if ((J->flags & JIT_F_OPT_FWD) &&
532 !(fleft->op2 & IRBUFHDR_APPEND) && 546 !(fleft->op2 & IRBUFHDR_APPEND) &&
533 fleft->prev == fright->op1 && 547 fleft->prev == fright->op2 &&
534 fleft->op1 == IR(fright->op1)->op1) { 548 fleft->op1 == IR(fright->op2)->op1) {
535 IRRef ref = fins->op1; 549 IRRef ref = fins->op1;
536 IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */ 550 IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */
537 IR(ref)->op1 = fright->op2; 551 IR(ref)->op1 = fright->op1;
538 return ref; 552 return ref;
539 } 553 }
540 return EMITFOLD; /* This is a store and always emitted. */ 554 return EMITFOLD; /* Always emit, CSE later. */
541} 555}
542 556
543LJFOLD(BUFPUT any any) 557LJFOLD(BUFPUT any any)
@@ -565,45 +579,36 @@ LJFOLDF(bufput_kgc)
565 } 579 }
566 } 580 }
567 } 581 }
568 return EMITFOLD; /* This is a store and always emitted. */ 582 return EMITFOLD; /* Always emit, CSE later. */
569} 583}
570 584
571LJFOLD(BUFSTR any any) 585LJFOLD(BUFSTR any any)
572LJFOLDF(bufstr_kfold_cse) 586LJFOLDF(bufstr_kfold_cse)
573{ 587{
574 lua_assert(fright->o == IR_BUFHDR || fright->o == IR_BUFPUT || 588 lua_assert(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
575 fright->o == IR_CALLS); 589 fleft->o == IR_CALLL);
576 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { 590 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
577 if (fright->o == IR_BUFHDR) { /* No put operations? */ 591 if (fleft->o == IR_BUFHDR) { /* No put operations? */
578 if (!(fright->op2 & IRBUFHDR_APPEND)) { /* Empty buffer? */ 592 if (!(fleft->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */
579 lj_ir_rollback(J, fins->op1); /* Eliminate the current chain. */
580 return lj_ir_kstr(J, &J2G(J)->strempty); 593 return lj_ir_kstr(J, &J2G(J)->strempty);
581 } 594 fins->op1 = fleft->prev; /* Relies on checks in bufput_append. */
582 fins->op2 = fright->prev; /* Relies on checks in bufput_append. */
583 return CSEFOLD; 595 return CSEFOLD;
584 } else if (fright->o == IR_BUFPUT) { 596 } else if (fleft->o == IR_BUFPUT) {
585 IRIns *irb = IR(fright->op1); 597 IRIns *irb = IR(fleft->op1);
586 if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND)) { 598 if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND))
587 lj_ir_rollback(J, fins->op1); /* Eliminate the current chain. */ 599 return fleft->op2; /* Shortcut for a single put operation. */
588 return fright->op2; /* Shortcut for a single put operation. */
589 }
590 } 600 }
591 } 601 }
592 /* Try to CSE the whole chain. */ 602 /* Try to CSE the whole chain. */
593 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { 603 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
594 IRRef ref = J->chain[IR_BUFSTR]; 604 IRRef ref = J->chain[IR_BUFSTR];
595 while (ref) { 605 while (ref) {
596 IRRef last = fins->op2; 606 IRRef last = fins->op1;
597 IRIns *irs = IR(ref), *ira = fright, *irb = IR(irs->op2); 607 IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
598 while (ira->o == irb->o && ira->op2 == irb->op2) { 608 while (ira->o == irb->o && ira->op2 == irb->op2) {
599 if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND)) { 609 if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND)) {
600 IRIns *irh;
601 for (irh = IR(ira->prev); irh != irb; irh = IR(irh->prev))
602 if (irh->op1 == irs->op2)
603 return ref; /* Do CSE, but avoid rollback if append follows. */
604 lj_ir_rollback(J, last); /* Eliminate the current chain. */
605 return ref; /* CSE succeeded. */ 610 return ref; /* CSE succeeded. */
606 } else if (ira->o == IR_CALLS) { 611 } else if (ira->o == IR_CALLL) {
607 ira = IR(ira->op1); irb = IR(irb->op1); 612 ira = IR(ira->op1); irb = IR(irb->op1);
608 lua_assert(ira->o == IR_CARG && irb->o == IR_CARG); 613 lua_assert(ira->o == IR_CARG && irb->o == IR_CARG);
609 if (ira->op2 != irb->op2) break; 614 if (ira->op2 != irb->op2) break;
@@ -618,9 +623,9 @@ LJFOLDF(bufstr_kfold_cse)
618 return EMITFOLD; /* No CSE possible. */ 623 return EMITFOLD; /* No CSE possible. */
619} 624}
620 625
621LJFOLD(CALLS CARG IRCALL_lj_buf_putstr_reverse) 626LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
622LJFOLD(CALLS CARG IRCALL_lj_buf_putstr_upper) 627LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
623LJFOLD(CALLS CARG IRCALL_lj_buf_putstr_lower) 628LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
624LJFOLDF(bufput_kfold_op) 629LJFOLDF(bufput_kfold_op)
625{ 630{
626 if (irref_isk(fleft->op2)) { 631 if (irref_isk(fleft->op2)) {
diff --git a/src/lj_record.c b/src/lj_record.c
index 1beaa75f..8dc102e9 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -1622,7 +1622,7 @@ static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot)
1622 do { 1622 do {
1623 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, *trp++); 1623 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, *trp++);
1624 } while (trp <= top); 1624 } while (trp <= top);
1625 tr = emitir(IRT(IR_BUFSTR, IRT_STR), hdr, tr); 1625 tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
1626 J->maxslot = (BCReg)(xbase - J->base); 1626 J->maxslot = (BCReg)(xbase - J->base);
1627 if (xbase == base) return tr; 1627 if (xbase == base) return tr;
1628 } 1628 }