aboutsummaryrefslogtreecommitdiff
path: root/src/lj_opt_fold.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_opt_fold.c')
-rw-r--r--src/lj_opt_fold.c268
1 files changed, 218 insertions, 50 deletions
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index fe37b98a..1d37a7fd 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -14,18 +14,21 @@
14 14
15#if LJ_HASJIT 15#if LJ_HASJIT
16 16
17#include "lj_buf.h"
17#include "lj_str.h" 18#include "lj_str.h"
18#include "lj_tab.h" 19#include "lj_tab.h"
19#include "lj_ir.h" 20#include "lj_ir.h"
20#include "lj_jit.h" 21#include "lj_jit.h"
22#include "lj_ircall.h"
21#include "lj_iropt.h" 23#include "lj_iropt.h"
22#include "lj_trace.h" 24#include "lj_trace.h"
23#if LJ_HASFFI 25#if LJ_HASFFI
24#include "lj_ctype.h" 26#include "lj_ctype.h"
25#endif
26#include "lj_carith.h" 27#include "lj_carith.h"
28#endif
27#include "lj_vm.h" 29#include "lj_vm.h"
28#include "lj_strscan.h" 30#include "lj_strscan.h"
31#include "lj_strfmt.h"
29 32
30/* Here's a short description how the FOLD engine processes instructions: 33/* Here's a short description how the FOLD engine processes instructions:
31** 34**
@@ -155,13 +158,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
155 158
156/* Barrier to prevent folding across a GC step. 159/* Barrier to prevent folding across a GC step.
157** GC steps can only happen at the head of a trace and at LOOP. 160** GC steps can only happen at the head of a trace and at LOOP.
158** And the GC is only driven forward if there is at least one allocation. 161** And the GC is only driven forward if there's at least one allocation.
159*/ 162*/
160#define gcstep_barrier(J, ref) \ 163#define gcstep_barrier(J, ref) \
161 ((ref) < J->chain[IR_LOOP] && \ 164 ((ref) < J->chain[IR_LOOP] && \
162 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ 165 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
163 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ 166 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
164 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) 167 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
168 J->chain[IR_BUFSTR] || J->chain[IR_TOSTR]))
165 169
166/* -- Constant folding for FP numbers ------------------------------------- */ 170/* -- Constant folding for FP numbers ------------------------------------- */
167 171
@@ -336,11 +340,9 @@ LJFOLDF(kfold_intcomp0)
336static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) 340static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
337{ 341{
338 switch (op) { 342 switch (op) {
339#if LJ_64 || LJ_HASFFI 343#if LJ_HASFFI
340 case IR_ADD: k1 += k2; break; 344 case IR_ADD: k1 += k2; break;
341 case IR_SUB: k1 -= k2; break; 345 case IR_SUB: k1 -= k2; break;
342#endif
343#if LJ_HASFFI
344 case IR_MUL: k1 *= k2; break; 346 case IR_MUL: k1 *= k2; break;
345 case IR_BAND: k1 &= k2; break; 347 case IR_BAND: k1 &= k2; break;
346 case IR_BOR: k1 |= k2; break; 348 case IR_BOR: k1 |= k2; break;
@@ -392,20 +394,10 @@ LJFOLD(BROL KINT64 KINT)
392LJFOLD(BROR KINT64 KINT) 394LJFOLD(BROR KINT64 KINT)
393LJFOLDF(kfold_int64shift) 395LJFOLDF(kfold_int64shift)
394{ 396{
395#if LJ_HASFFI || LJ_64 397#if LJ_HASFFI
396 uint64_t k = ir_k64(fleft)->u64; 398 uint64_t k = ir_k64(fleft)->u64;
397 int32_t sh = (fright->i & 63); 399 int32_t sh = (fright->i & 63);
398 switch ((IROp)fins->o) { 400 return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
399 case IR_BSHL: k <<= sh; break;
400#if LJ_HASFFI
401 case IR_BSHR: k >>= sh; break;
402 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
403 case IR_BROL: k = lj_rol(k, sh); break;
404 case IR_BROR: k = lj_ror(k, sh); break;
405#endif
406 default: lua_assert(0); break;
407 }
408 return INT64FOLD(k);
409#else 401#else
410 UNUSED(J); lua_assert(0); return FAILFOLD; 402 UNUSED(J); lua_assert(0); return FAILFOLD;
411#endif 403#endif
@@ -527,6 +519,179 @@ LJFOLDF(kfold_strcmp)
527 return NEXTFOLD; 519 return NEXTFOLD;
528} 520}
529 521
522/* -- Constant folding and forwarding for buffers ------------------------- */
523
524/*
525** Buffer ops perform stores, but their effect is limited to the buffer
526** itself. Also, buffer ops are chained: a use of an op implies a use of
527** all other ops up the chain. Conversely, if an op is unused, all ops
528** up the chain can go unsed. This largely eliminates the need to treat
529** them as stores.
530**
531** Alas, treating them as normal (IRM_N) ops doesn't work, because they
532** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
533** or if FOLD is disabled.
534**
535** The compromise is to declare them as loads, emit them like stores and
536** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
537** fragments left over from CSE are eliminated by DCE.
538*/
539
540/* BUFHDR is emitted like a store, see below. */
541
542LJFOLD(BUFPUT BUFHDR BUFSTR)
543LJFOLDF(bufput_append)
544{
545 /* New buffer, no other buffer op inbetween and same buffer? */
546 if ((J->flags & JIT_F_OPT_FWD) &&
547 !(fleft->op2 & IRBUFHDR_APPEND) &&
548 fleft->prev == fright->op2 &&
549 fleft->op1 == IR(fright->op2)->op1) {
550 IRRef ref = fins->op1;
551 IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */
552 IR(ref)->op1 = fright->op1;
553 return ref;
554 }
555 return EMITFOLD; /* Always emit, CSE later. */
556}
557
558LJFOLD(BUFPUT any any)
559LJFOLDF(bufput_kgc)
560{
561 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) {
562 GCstr *s2 = ir_kstr(fright);
563 if (s2->len == 0) { /* Empty string? */
564 return LEFTFOLD;
565 } else {
566 if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) &&
567 !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */
568 GCstr *s1 = ir_kstr(IR(fleft->op2));
569 IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2));
570 /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
571 IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */
572 return fins->op1;
573 }
574 }
575 }
576 return EMITFOLD; /* Always emit, CSE later. */
577}
578
579LJFOLD(BUFSTR any any)
580LJFOLDF(bufstr_kfold_cse)
581{
582 lua_assert(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
583 fleft->o == IR_CALLL);
584 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
585 if (fleft->o == IR_BUFHDR) { /* No put operations? */
586 if (!(fleft->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */
587 return lj_ir_kstr(J, &J2G(J)->strempty);
588 fins->op1 = fleft->prev; /* Relies on checks in bufput_append. */
589 return CSEFOLD;
590 } else if (fleft->o == IR_BUFPUT) {
591 IRIns *irb = IR(fleft->op1);
592 if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND))
593 return fleft->op2; /* Shortcut for a single put operation. */
594 }
595 }
596 /* Try to CSE the whole chain. */
597 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
598 IRRef ref = J->chain[IR_BUFSTR];
599 while (ref) {
600 IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
601 while (ira->o == irb->o && ira->op2 == irb->op2) {
602 lua_assert(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
603 ira->o == IR_CALLL || ira->o == IR_CARG);
604 if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND))
605 return ref; /* CSE succeeded. */
606 if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
607 break;
608 ira = IR(ira->op1);
609 irb = IR(irb->op1);
610 }
611 ref = irs->prev;
612 }
613 }
614 return EMITFOLD; /* No CSE possible. */
615}
616
617LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
618LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
619LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
620LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted)
621LJFOLDF(bufput_kfold_op)
622{
623 if (irref_isk(fleft->op2)) {
624 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
625 SBuf *sb = lj_buf_tmp_(J->L);
626 sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb,
627 ir_kstr(IR(fleft->op2)));
628 fins->o = IR_BUFPUT;
629 fins->op1 = fleft->op1;
630 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
631 return RETRYFOLD;
632 }
633 return EMITFOLD; /* Always emit, CSE later. */
634}
635
636LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep)
637LJFOLDF(bufput_kfold_rep)
638{
639 if (irref_isk(fleft->op2)) {
640 IRIns *irc = IR(fleft->op1);
641 if (irref_isk(irc->op2)) {
642 SBuf *sb = lj_buf_tmp_(J->L);
643 sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i);
644 fins->o = IR_BUFPUT;
645 fins->op1 = irc->op1;
646 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
647 return RETRYFOLD;
648 }
649 }
650 return EMITFOLD; /* Always emit, CSE later. */
651}
652
653LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint)
654LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)
655LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint)
656LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum)
657LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr)
658LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
659LJFOLDF(bufput_kfold_fmt)
660{
661 IRIns *irc = IR(fleft->op1);
662 lua_assert(irref_isk(irc->op2)); /* SFormat must be const. */
663 if (irref_isk(fleft->op2)) {
664 SFormat sf = (SFormat)IR(irc->op2)->i;
665 IRIns *ira = IR(fleft->op2);
666 SBuf *sb = lj_buf_tmp_(J->L);
667 switch (fins->op2) {
668 case IRCALL_lj_strfmt_putfxint:
669 sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64);
670 break;
671 case IRCALL_lj_strfmt_putfstr:
672 sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira));
673 break;
674 case IRCALL_lj_strfmt_putfchar:
675 sb = lj_strfmt_putfchar(sb, sf, ira->i);
676 break;
677 case IRCALL_lj_strfmt_putfnum_int:
678 case IRCALL_lj_strfmt_putfnum_uint:
679 case IRCALL_lj_strfmt_putfnum:
680 default: {
681 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
682 sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf,
683 ir_knum(ira)->n);
684 break;
685 }
686 }
687 fins->o = IR_BUFPUT;
688 fins->op1 = irc->op1;
689 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
690 return RETRYFOLD;
691 }
692 return EMITFOLD; /* Always emit, CSE later. */
693}
694
530/* -- Constant folding of pointer arithmetic ------------------------------ */ 695/* -- Constant folding of pointer arithmetic ------------------------------ */
531 696
532LJFOLD(ADD KGC KINT) 697LJFOLD(ADD KGC KINT)
@@ -647,27 +812,22 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
647LJFOLDF(kfold_conv_knum_int_num) 812LJFOLDF(kfold_conv_knum_int_num)
648{ 813{
649 lua_Number n = knumleft; 814 lua_Number n = knumleft;
650 if (!(fins->op2 & IRCONV_TRUNC)) { 815 int32_t k = lj_num2int(n);
651 int32_t k = lj_num2int(n); 816 if (irt_isguard(fins->t) && n != (lua_Number)k) {
652 if (irt_isguard(fins->t) && n != (lua_Number)k) { 817 /* We're about to create a guard which always fails, like CONV +1.5.
653 /* We're about to create a guard which always fails, like CONV +1.5. 818 ** Some pathological loops cause this during LICM, e.g.:
654 ** Some pathological loops cause this during LICM, e.g.: 819 ** local x,k,t = 0,1.5,{1,[1.5]=2}
655 ** local x,k,t = 0,1.5,{1,[1.5]=2} 820 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
656 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end 821 ** assert(x == 300)
657 ** assert(x == 300) 822 */
658 */ 823 return FAILFOLD;
659 return FAILFOLD;
660 }
661 return INTFOLD(k);
662 } else {
663 return INTFOLD((int32_t)n);
664 } 824 }
825 return INTFOLD(k);
665} 826}
666 827
667LJFOLD(CONV KNUM IRCONV_U32_NUM) 828LJFOLD(CONV KNUM IRCONV_U32_NUM)
668LJFOLDF(kfold_conv_knum_u32_num) 829LJFOLDF(kfold_conv_knum_u32_num)
669{ 830{
670 lua_assert((fins->op2 & IRCONV_TRUNC));
671#ifdef _MSC_VER 831#ifdef _MSC_VER
672 { /* Workaround for MSVC bug. */ 832 { /* Workaround for MSVC bug. */
673 volatile uint32_t u = (uint32_t)knumleft; 833 volatile uint32_t u = (uint32_t)knumleft;
@@ -681,27 +841,27 @@ LJFOLDF(kfold_conv_knum_u32_num)
681LJFOLD(CONV KNUM IRCONV_I64_NUM) 841LJFOLD(CONV KNUM IRCONV_I64_NUM)
682LJFOLDF(kfold_conv_knum_i64_num) 842LJFOLDF(kfold_conv_knum_i64_num)
683{ 843{
684 lua_assert((fins->op2 & IRCONV_TRUNC));
685 return INT64FOLD((uint64_t)(int64_t)knumleft); 844 return INT64FOLD((uint64_t)(int64_t)knumleft);
686} 845}
687 846
688LJFOLD(CONV KNUM IRCONV_U64_NUM) 847LJFOLD(CONV KNUM IRCONV_U64_NUM)
689LJFOLDF(kfold_conv_knum_u64_num) 848LJFOLDF(kfold_conv_knum_u64_num)
690{ 849{
691 lua_assert((fins->op2 & IRCONV_TRUNC));
692 return INT64FOLD(lj_num2u64(knumleft)); 850 return INT64FOLD(lj_num2u64(knumleft));
693} 851}
694 852
695LJFOLD(TOSTR KNUM) 853LJFOLD(TOSTR KNUM any)
696LJFOLDF(kfold_tostr_knum) 854LJFOLDF(kfold_tostr_knum)
697{ 855{
698 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); 856 return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft)));
699} 857}
700 858
701LJFOLD(TOSTR KINT) 859LJFOLD(TOSTR KINT any)
702LJFOLDF(kfold_tostr_kint) 860LJFOLDF(kfold_tostr_kint)
703{ 861{
704 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); 862 return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ?
863 lj_strfmt_int(J->L, fleft->i) :
864 lj_strfmt_char(J->L, fleft->i));
705} 865}
706 866
707LJFOLD(STRTO KGC) 867LJFOLD(STRTO KGC)
@@ -1199,7 +1359,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1199 ** But this is mainly intended for simple address arithmetic. 1359 ** But this is mainly intended for simple address arithmetic.
1200 ** Also it's easier for the backend to optimize the original multiplies. 1360 ** Also it's easier for the backend to optimize the original multiplies.
1201 */ 1361 */
1202 if (k == 1) { /* i * 1 ==> i */ 1362 if (k == 0) { /* i * 0 ==> 0 */
1363 return RIGHTFOLD;
1364 } else if (k == 1) { /* i * 1 ==> i */
1203 return LEFTFOLD; 1365 return LEFTFOLD;
1204 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ 1366 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1205 fins->o = IR_BSHL; 1367 fins->o = IR_BSHL;
@@ -1212,9 +1374,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1212LJFOLD(MUL any KINT) 1374LJFOLD(MUL any KINT)
1213LJFOLDF(simplify_intmul_k32) 1375LJFOLDF(simplify_intmul_k32)
1214{ 1376{
1215 if (fright->i == 0) /* i * 0 ==> 0 */ 1377 if (fright->i >= 0)
1216 return INTFOLD(0);
1217 else if (fright->i > 0)
1218 return simplify_intmul_k(J, fright->i); 1378 return simplify_intmul_k(J, fright->i);
1219 return NEXTFOLD; 1379 return NEXTFOLD;
1220} 1380}
@@ -1222,14 +1382,13 @@ LJFOLDF(simplify_intmul_k32)
1222LJFOLD(MUL any KINT64) 1382LJFOLD(MUL any KINT64)
1223LJFOLDF(simplify_intmul_k64) 1383LJFOLDF(simplify_intmul_k64)
1224{ 1384{
1225 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ 1385#if LJ_HASFFI
1226 return INT64FOLD(0); 1386 if (ir_kint64(fright)->u64 < 0x80000000u)
1227#if LJ_64
1228 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1229 else if (ir_kint64(fright)->u64 < 0x80000000u)
1230 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); 1387 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1231#endif
1232 return NEXTFOLD; 1388 return NEXTFOLD;
1389#else
1390 UNUSED(J); lua_assert(0); return FAILFOLD;
1391#endif
1233} 1392}
1234 1393
1235LJFOLD(MOD any KINT) 1394LJFOLD(MOD any KINT)
@@ -1529,7 +1688,7 @@ LJFOLD(BOR BOR KINT64)
1529LJFOLD(BXOR BXOR KINT64) 1688LJFOLD(BXOR BXOR KINT64)
1530LJFOLDF(reassoc_intarith_k64) 1689LJFOLDF(reassoc_intarith_k64)
1531{ 1690{
1532#if LJ_HASFFI || LJ_64 1691#if LJ_HASFFI
1533 IRIns *irk = IR(fleft->op2); 1692 IRIns *irk = IR(fleft->op2);
1534 if (irk->o == IR_KINT64) { 1693 if (irk->o == IR_KINT64) {
1535 uint64_t k = kfold_int64arith(ir_k64(irk)->u64, 1694 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
@@ -2007,6 +2166,14 @@ LJFOLDF(fload_str_len_snew)
2007 return NEXTFOLD; 2166 return NEXTFOLD;
2008} 2167}
2009 2168
2169LJFOLD(FLOAD TOSTR IRFL_STR_LEN)
2170LJFOLDF(fload_str_len_tostr)
2171{
2172 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR)
2173 return INTFOLD(1);
2174 return NEXTFOLD;
2175}
2176
2010/* The C type ID of cdata objects is immutable. */ 2177/* The C type ID of cdata objects is immutable. */
2011LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) 2178LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
2012LJFOLDF(fload_cdata_typeid_kgc) 2179LJFOLDF(fload_cdata_typeid_kgc)
@@ -2149,6 +2316,7 @@ LJFOLD(TNEW any any)
2149LJFOLD(TDUP any) 2316LJFOLD(TDUP any)
2150LJFOLD(CNEW any any) 2317LJFOLD(CNEW any any)
2151LJFOLD(XSNEW any any) 2318LJFOLD(XSNEW any any)
2319LJFOLD(BUFHDR any any)
2152LJFOLDX(lj_ir_emit) 2320LJFOLDX(lj_ir_emit)
2153 2321
2154/* ------------------------------------------------------------------------ */ 2322/* ------------------------------------------------------------------------ */