aboutsummaryrefslogtreecommitdiff
path: root/src/lj_opt_fold.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_opt_fold.c')
-rw-r--r--src/lj_opt_fold.c369
1 files changed, 310 insertions, 59 deletions
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 928d3852..b4d05a26 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -14,18 +14,21 @@
14 14
15#if LJ_HASJIT 15#if LJ_HASJIT
16 16
17#include "lj_buf.h"
17#include "lj_str.h" 18#include "lj_str.h"
18#include "lj_tab.h" 19#include "lj_tab.h"
19#include "lj_ir.h" 20#include "lj_ir.h"
20#include "lj_jit.h" 21#include "lj_jit.h"
22#include "lj_ircall.h"
21#include "lj_iropt.h" 23#include "lj_iropt.h"
22#include "lj_trace.h" 24#include "lj_trace.h"
23#if LJ_HASFFI 25#if LJ_HASFFI
24#include "lj_ctype.h" 26#include "lj_ctype.h"
25#endif
26#include "lj_carith.h" 27#include "lj_carith.h"
28#endif
27#include "lj_vm.h" 29#include "lj_vm.h"
28#include "lj_strscan.h" 30#include "lj_strscan.h"
31#include "lj_strfmt.h"
29 32
30/* Here's a short description how the FOLD engine processes instructions: 33/* Here's a short description how the FOLD engine processes instructions:
31** 34**
@@ -133,8 +136,8 @@
133/* Some local macros to save typing. Undef'd at the end. */ 136/* Some local macros to save typing. Undef'd at the end. */
134#define IR(ref) (&J->cur.ir[(ref)]) 137#define IR(ref) (&J->cur.ir[(ref)])
135#define fins (&J->fold.ins) 138#define fins (&J->fold.ins)
136#define fleft (&J->fold.left) 139#define fleft (J->fold.left)
137#define fright (&J->fold.right) 140#define fright (J->fold.right)
138#define knumleft (ir_knum(fleft)->n) 141#define knumleft (ir_knum(fleft)->n)
139#define knumright (ir_knum(fright)->n) 142#define knumright (ir_knum(fright)->n)
140 143
@@ -155,13 +158,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
155 158
156/* Barrier to prevent folding across a GC step. 159/* Barrier to prevent folding across a GC step.
157** GC steps can only happen at the head of a trace and at LOOP. 160** GC steps can only happen at the head of a trace and at LOOP.
158** And the GC is only driven forward if there is at least one allocation. 161** And the GC is only driven forward if there's at least one allocation.
159*/ 162*/
160#define gcstep_barrier(J, ref) \ 163#define gcstep_barrier(J, ref) \
161 ((ref) < J->chain[IR_LOOP] && \ 164 ((ref) < J->chain[IR_LOOP] && \
162 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ 165 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
163 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ 166 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
164 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) 167 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
168 J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA]))
165 169
166/* -- Constant folding for FP numbers ------------------------------------- */ 170/* -- Constant folding for FP numbers ------------------------------------- */
167 171
@@ -169,8 +173,6 @@ LJFOLD(ADD KNUM KNUM)
169LJFOLD(SUB KNUM KNUM) 173LJFOLD(SUB KNUM KNUM)
170LJFOLD(MUL KNUM KNUM) 174LJFOLD(MUL KNUM KNUM)
171LJFOLD(DIV KNUM KNUM) 175LJFOLD(DIV KNUM KNUM)
172LJFOLD(NEG KNUM KNUM)
173LJFOLD(ABS KNUM KNUM)
174LJFOLD(ATAN2 KNUM KNUM) 176LJFOLD(ATAN2 KNUM KNUM)
175LJFOLD(LDEXP KNUM KNUM) 177LJFOLD(LDEXP KNUM KNUM)
176LJFOLD(MIN KNUM KNUM) 178LJFOLD(MIN KNUM KNUM)
@@ -183,6 +185,15 @@ LJFOLDF(kfold_numarith)
183 return lj_ir_knum(J, y); 185 return lj_ir_knum(J, y);
184} 186}
185 187
188LJFOLD(NEG KNUM FLOAD)
189LJFOLD(ABS KNUM FLOAD)
190LJFOLDF(kfold_numabsneg)
191{
192 lua_Number a = knumleft;
193 lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD);
194 return lj_ir_knum(J, y);
195}
196
186LJFOLD(LDEXP KNUM KINT) 197LJFOLD(LDEXP KNUM KINT)
187LJFOLDF(kfold_ldexp) 198LJFOLDF(kfold_ldexp)
188{ 199{
@@ -336,15 +347,18 @@ LJFOLDF(kfold_intcomp0)
336static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) 347static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
337{ 348{
338 switch (op) { 349 switch (op) {
339#if LJ_64 || LJ_HASFFI 350#if LJ_HASFFI
340 case IR_ADD: k1 += k2; break; 351 case IR_ADD: k1 += k2; break;
341 case IR_SUB: k1 -= k2; break; 352 case IR_SUB: k1 -= k2; break;
342#endif
343#if LJ_HASFFI
344 case IR_MUL: k1 *= k2; break; 353 case IR_MUL: k1 *= k2; break;
345 case IR_BAND: k1 &= k2; break; 354 case IR_BAND: k1 &= k2; break;
346 case IR_BOR: k1 |= k2; break; 355 case IR_BOR: k1 |= k2; break;
347 case IR_BXOR: k1 ^= k2; break; 356 case IR_BXOR: k1 ^= k2; break;
357 case IR_BSHL: k1 <<= (k2 & 63); break;
358 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break;
359 case IR_BSAR: k1 >>= (k2 & 63); break;
360 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break;
361 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break;
348#endif 362#endif
349 default: UNUSED(k2); lua_assert(0); break; 363 default: UNUSED(k2); lua_assert(0); break;
350 } 364 }
@@ -392,20 +406,10 @@ LJFOLD(BROL KINT64 KINT)
392LJFOLD(BROR KINT64 KINT) 406LJFOLD(BROR KINT64 KINT)
393LJFOLDF(kfold_int64shift) 407LJFOLDF(kfold_int64shift)
394{ 408{
395#if LJ_HASFFI || LJ_64 409#if LJ_HASFFI
396 uint64_t k = ir_k64(fleft)->u64; 410 uint64_t k = ir_k64(fleft)->u64;
397 int32_t sh = (fright->i & 63); 411 int32_t sh = (fright->i & 63);
398 switch ((IROp)fins->o) { 412 return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
399 case IR_BSHL: k <<= sh; break;
400#if LJ_HASFFI
401 case IR_BSHR: k >>= sh; break;
402 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
403 case IR_BROL: k = lj_rol(k, sh); break;
404 case IR_BROR: k = lj_ror(k, sh); break;
405#endif
406 default: lua_assert(0); break;
407 }
408 return INT64FOLD(k);
409#else 413#else
410 UNUSED(J); lua_assert(0); return FAILFOLD; 414 UNUSED(J); lua_assert(0); return FAILFOLD;
411#endif 415#endif
@@ -510,7 +514,7 @@ LJFOLDF(kfold_strref_snew)
510 PHIBARRIER(ir); 514 PHIBARRIER(ir);
511 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ 515 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
512 fins->op1 = str; 516 fins->op1 = str;
513 fins->ot = IRT(IR_STRREF, IRT_P32); 517 fins->ot = IRT(IR_STRREF, IRT_PGC);
514 return RETRYFOLD; 518 return RETRYFOLD;
515 } 519 }
516 } 520 }
@@ -528,6 +532,180 @@ LJFOLDF(kfold_strcmp)
528 return NEXTFOLD; 532 return NEXTFOLD;
529} 533}
530 534
535/* -- Constant folding and forwarding for buffers ------------------------- */
536
537/*
538** Buffer ops perform stores, but their effect is limited to the buffer
539** itself. Also, buffer ops are chained: a use of an op implies a use of
540** all other ops up the chain. Conversely, if an op is unused, all ops
541** up the chain can go unsed. This largely eliminates the need to treat
542** them as stores.
543**
544** Alas, treating them as normal (IRM_N) ops doesn't work, because they
545** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
546** or if FOLD is disabled.
547**
548** The compromise is to declare them as loads, emit them like stores and
549** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
550** fragments left over from CSE are eliminated by DCE.
551*/
552
553/* BUFHDR is emitted like a store, see below. */
554
555LJFOLD(BUFPUT BUFHDR BUFSTR)
556LJFOLDF(bufput_append)
557{
558 /* New buffer, no other buffer op inbetween and same buffer? */
559 if ((J->flags & JIT_F_OPT_FWD) &&
560 !(fleft->op2 & IRBUFHDR_APPEND) &&
561 fleft->prev == fright->op2 &&
562 fleft->op1 == IR(fright->op2)->op1) {
563 IRRef ref = fins->op1;
564 IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */
565 IR(ref)->op1 = fright->op1;
566 return ref;
567 }
568 return EMITFOLD; /* Always emit, CSE later. */
569}
570
571LJFOLD(BUFPUT any any)
572LJFOLDF(bufput_kgc)
573{
574 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) {
575 GCstr *s2 = ir_kstr(fright);
576 if (s2->len == 0) { /* Empty string? */
577 return LEFTFOLD;
578 } else {
579 if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) &&
580 !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */
581 GCstr *s1 = ir_kstr(IR(fleft->op2));
582 IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2));
583 /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
584 IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */
585 return fins->op1;
586 }
587 }
588 }
589 return EMITFOLD; /* Always emit, CSE later. */
590}
591
592LJFOLD(BUFSTR any any)
593LJFOLDF(bufstr_kfold_cse)
594{
595 lua_assert(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
596 fleft->o == IR_CALLL);
597 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
598 if (fleft->o == IR_BUFHDR) { /* No put operations? */
599 if (!(fleft->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */
600 return lj_ir_kstr(J, &J2G(J)->strempty);
601 fins->op1 = fleft->op1;
602 fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */
603 return CSEFOLD;
604 } else if (fleft->o == IR_BUFPUT) {
605 IRIns *irb = IR(fleft->op1);
606 if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND))
607 return fleft->op2; /* Shortcut for a single put operation. */
608 }
609 }
610 /* Try to CSE the whole chain. */
611 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
612 IRRef ref = J->chain[IR_BUFSTR];
613 while (ref) {
614 IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
615 while (ira->o == irb->o && ira->op2 == irb->op2) {
616 lua_assert(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
617 ira->o == IR_CALLL || ira->o == IR_CARG);
618 if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND))
619 return ref; /* CSE succeeded. */
620 if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
621 break;
622 ira = IR(ira->op1);
623 irb = IR(irb->op1);
624 }
625 ref = irs->prev;
626 }
627 }
628 return EMITFOLD; /* No CSE possible. */
629}
630
631LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
632LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
633LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
634LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted)
635LJFOLDF(bufput_kfold_op)
636{
637 if (irref_isk(fleft->op2)) {
638 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
639 SBuf *sb = lj_buf_tmp_(J->L);
640 sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb,
641 ir_kstr(IR(fleft->op2)));
642 fins->o = IR_BUFPUT;
643 fins->op1 = fleft->op1;
644 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
645 return RETRYFOLD;
646 }
647 return EMITFOLD; /* Always emit, CSE later. */
648}
649
650LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep)
651LJFOLDF(bufput_kfold_rep)
652{
653 if (irref_isk(fleft->op2)) {
654 IRIns *irc = IR(fleft->op1);
655 if (irref_isk(irc->op2)) {
656 SBuf *sb = lj_buf_tmp_(J->L);
657 sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i);
658 fins->o = IR_BUFPUT;
659 fins->op1 = irc->op1;
660 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
661 return RETRYFOLD;
662 }
663 }
664 return EMITFOLD; /* Always emit, CSE later. */
665}
666
667LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint)
668LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)
669LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint)
670LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum)
671LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr)
672LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
673LJFOLDF(bufput_kfold_fmt)
674{
675 IRIns *irc = IR(fleft->op1);
676 lua_assert(irref_isk(irc->op2)); /* SFormat must be const. */
677 if (irref_isk(fleft->op2)) {
678 SFormat sf = (SFormat)IR(irc->op2)->i;
679 IRIns *ira = IR(fleft->op2);
680 SBuf *sb = lj_buf_tmp_(J->L);
681 switch (fins->op2) {
682 case IRCALL_lj_strfmt_putfxint:
683 sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64);
684 break;
685 case IRCALL_lj_strfmt_putfstr:
686 sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira));
687 break;
688 case IRCALL_lj_strfmt_putfchar:
689 sb = lj_strfmt_putfchar(sb, sf, ira->i);
690 break;
691 case IRCALL_lj_strfmt_putfnum_int:
692 case IRCALL_lj_strfmt_putfnum_uint:
693 case IRCALL_lj_strfmt_putfnum:
694 default: {
695 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
696 sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf,
697 ir_knum(ira)->n);
698 break;
699 }
700 }
701 fins->o = IR_BUFPUT;
702 fins->op1 = irc->op1;
703 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
704 return RETRYFOLD;
705 }
706 return EMITFOLD; /* Always emit, CSE later. */
707}
708
531/* -- Constant folding of pointer arithmetic ------------------------------ */ 709/* -- Constant folding of pointer arithmetic ------------------------------ */
532 710
533LJFOLD(ADD KGC KINT) 711LJFOLD(ADD KGC KINT)
@@ -648,27 +826,22 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
648LJFOLDF(kfold_conv_knum_int_num) 826LJFOLDF(kfold_conv_knum_int_num)
649{ 827{
650 lua_Number n = knumleft; 828 lua_Number n = knumleft;
651 if (!(fins->op2 & IRCONV_TRUNC)) { 829 int32_t k = lj_num2int(n);
652 int32_t k = lj_num2int(n); 830 if (irt_isguard(fins->t) && n != (lua_Number)k) {
653 if (irt_isguard(fins->t) && n != (lua_Number)k) { 831 /* We're about to create a guard which always fails, like CONV +1.5.
654 /* We're about to create a guard which always fails, like CONV +1.5. 832 ** Some pathological loops cause this during LICM, e.g.:
655 ** Some pathological loops cause this during LICM, e.g.: 833 ** local x,k,t = 0,1.5,{1,[1.5]=2}
656 ** local x,k,t = 0,1.5,{1,[1.5]=2} 834 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
657 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end 835 ** assert(x == 300)
658 ** assert(x == 300) 836 */
659 */ 837 return FAILFOLD;
660 return FAILFOLD;
661 }
662 return INTFOLD(k);
663 } else {
664 return INTFOLD((int32_t)n);
665 } 838 }
839 return INTFOLD(k);
666} 840}
667 841
668LJFOLD(CONV KNUM IRCONV_U32_NUM) 842LJFOLD(CONV KNUM IRCONV_U32_NUM)
669LJFOLDF(kfold_conv_knum_u32_num) 843LJFOLDF(kfold_conv_knum_u32_num)
670{ 844{
671 lua_assert((fins->op2 & IRCONV_TRUNC));
672#ifdef _MSC_VER 845#ifdef _MSC_VER
673 { /* Workaround for MSVC bug. */ 846 { /* Workaround for MSVC bug. */
674 volatile uint32_t u = (uint32_t)knumleft; 847 volatile uint32_t u = (uint32_t)knumleft;
@@ -682,27 +855,27 @@ LJFOLDF(kfold_conv_knum_u32_num)
682LJFOLD(CONV KNUM IRCONV_I64_NUM) 855LJFOLD(CONV KNUM IRCONV_I64_NUM)
683LJFOLDF(kfold_conv_knum_i64_num) 856LJFOLDF(kfold_conv_knum_i64_num)
684{ 857{
685 lua_assert((fins->op2 & IRCONV_TRUNC));
686 return INT64FOLD((uint64_t)(int64_t)knumleft); 858 return INT64FOLD((uint64_t)(int64_t)knumleft);
687} 859}
688 860
689LJFOLD(CONV KNUM IRCONV_U64_NUM) 861LJFOLD(CONV KNUM IRCONV_U64_NUM)
690LJFOLDF(kfold_conv_knum_u64_num) 862LJFOLDF(kfold_conv_knum_u64_num)
691{ 863{
692 lua_assert((fins->op2 & IRCONV_TRUNC));
693 return INT64FOLD(lj_num2u64(knumleft)); 864 return INT64FOLD(lj_num2u64(knumleft));
694} 865}
695 866
696LJFOLD(TOSTR KNUM) 867LJFOLD(TOSTR KNUM any)
697LJFOLDF(kfold_tostr_knum) 868LJFOLDF(kfold_tostr_knum)
698{ 869{
699 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); 870 return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft)));
700} 871}
701 872
702LJFOLD(TOSTR KINT) 873LJFOLD(TOSTR KINT any)
703LJFOLDF(kfold_tostr_kint) 874LJFOLDF(kfold_tostr_kint)
704{ 875{
705 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); 876 return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ?
877 lj_strfmt_int(J->L, fleft->i) :
878 lj_strfmt_char(J->L, fleft->i));
706} 879}
707 880
708LJFOLD(STRTO KGC) 881LJFOLD(STRTO KGC)
@@ -750,13 +923,13 @@ LJFOLDF(shortcut_round)
750 return NEXTFOLD; 923 return NEXTFOLD;
751} 924}
752 925
753LJFOLD(ABS ABS KNUM) 926LJFOLD(ABS ABS FLOAD)
754LJFOLDF(shortcut_left) 927LJFOLDF(shortcut_left)
755{ 928{
756 return LEFTFOLD; /* f(g(x)) ==> g(x) */ 929 return LEFTFOLD; /* f(g(x)) ==> g(x) */
757} 930}
758 931
759LJFOLD(ABS NEG KNUM) 932LJFOLD(ABS NEG FLOAD)
760LJFOLDF(shortcut_dropleft) 933LJFOLDF(shortcut_dropleft)
761{ 934{
762 PHIBARRIER(fleft); 935 PHIBARRIER(fleft);
@@ -837,8 +1010,10 @@ LJFOLDF(simplify_nummuldiv_k)
837 if (n == 1.0) { /* x o 1 ==> x */ 1010 if (n == 1.0) { /* x o 1 ==> x */
838 return LEFTFOLD; 1011 return LEFTFOLD;
839 } else if (n == -1.0) { /* x o -1 ==> -x */ 1012 } else if (n == -1.0) { /* x o -1 ==> -x */
1013 IRRef op1 = fins->op1;
1014 fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */
1015 fins->op1 = op1;
840 fins->o = IR_NEG; 1016 fins->o = IR_NEG;
841 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
842 return RETRYFOLD; 1017 return RETRYFOLD;
843 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ 1018 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
844 fins->o = IR_ADD; 1019 fins->o = IR_ADD;
@@ -1205,7 +1380,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1205 ** But this is mainly intended for simple address arithmetic. 1380 ** But this is mainly intended for simple address arithmetic.
1206 ** Also it's easier for the backend to optimize the original multiplies. 1381 ** Also it's easier for the backend to optimize the original multiplies.
1207 */ 1382 */
1208 if (k == 1) { /* i * 1 ==> i */ 1383 if (k == 0) { /* i * 0 ==> 0 */
1384 return RIGHTFOLD;
1385 } else if (k == 1) { /* i * 1 ==> i */
1209 return LEFTFOLD; 1386 return LEFTFOLD;
1210 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ 1387 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1211 fins->o = IR_BSHL; 1388 fins->o = IR_BSHL;
@@ -1218,9 +1395,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1218LJFOLD(MUL any KINT) 1395LJFOLD(MUL any KINT)
1219LJFOLDF(simplify_intmul_k32) 1396LJFOLDF(simplify_intmul_k32)
1220{ 1397{
1221 if (fright->i == 0) /* i * 0 ==> 0 */ 1398 if (fright->i >= 0)
1222 return INTFOLD(0);
1223 else if (fright->i > 0)
1224 return simplify_intmul_k(J, fright->i); 1399 return simplify_intmul_k(J, fright->i);
1225 return NEXTFOLD; 1400 return NEXTFOLD;
1226} 1401}
@@ -1228,14 +1403,13 @@ LJFOLDF(simplify_intmul_k32)
1228LJFOLD(MUL any KINT64) 1403LJFOLD(MUL any KINT64)
1229LJFOLDF(simplify_intmul_k64) 1404LJFOLDF(simplify_intmul_k64)
1230{ 1405{
1231 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ 1406#if LJ_HASFFI
1232 return INT64FOLD(0); 1407 if (ir_kint64(fright)->u64 < 0x80000000u)
1233#if LJ_64
1234 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1235 else if (ir_kint64(fright)->u64 < 0x80000000u)
1236 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); 1408 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1237#endif
1238 return NEXTFOLD; 1409 return NEXTFOLD;
1410#else
1411 UNUSED(J); lua_assert(0); return FAILFOLD;
1412#endif
1239} 1413}
1240 1414
1241LJFOLD(MOD any KINT) 1415LJFOLD(MOD any KINT)
@@ -1491,6 +1665,14 @@ LJFOLDF(simplify_shiftk_andk)
1491 fins->op2 = (IRRef1)lj_ir_kint(J, k); 1665 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1492 fins->ot = IRTI(IR_BAND); 1666 fins->ot = IRTI(IR_BAND);
1493 return RETRYFOLD; 1667 return RETRYFOLD;
1668 } else if (irk->o == IR_KINT64) {
1669 uint64_t k = kfold_int64arith(ir_k64(irk)->u64, fright->i, (IROp)fins->o);
1670 IROpT ot = fleft->ot;
1671 fins->op1 = fleft->op1;
1672 fins->op1 = (IRRef1)lj_opt_fold(J);
1673 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1674 fins->ot = ot;
1675 return RETRYFOLD;
1494 } 1676 }
1495 return NEXTFOLD; 1677 return NEXTFOLD;
1496} 1678}
@@ -1506,6 +1688,47 @@ LJFOLDF(simplify_andk_shiftk)
1506 return NEXTFOLD; 1688 return NEXTFOLD;
1507} 1689}
1508 1690
1691LJFOLD(BAND BOR KINT)
1692LJFOLD(BOR BAND KINT)
1693LJFOLDF(simplify_andor_k)
1694{
1695 IRIns *irk = IR(fleft->op2);
1696 PHIBARRIER(fleft);
1697 if (irk->o == IR_KINT) {
1698 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1699 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1700 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1701 if (k == (fins->o == IR_BAND ? 0 : -1)) {
1702 fins->op1 = fleft->op1;
1703 return RETRYFOLD;
1704 }
1705 }
1706 return NEXTFOLD;
1707}
1708
1709LJFOLD(BAND BOR KINT64)
1710LJFOLD(BOR BAND KINT64)
1711LJFOLDF(simplify_andor_k64)
1712{
1713#if LJ_HASFFI
1714 IRIns *irk = IR(fleft->op2);
1715 PHIBARRIER(fleft);
1716 if (irk->o == IR_KINT64) {
1717 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
1718 ir_k64(fright)->u64, (IROp)fins->o);
1719 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1720 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1721 if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) {
1722 fins->op1 = fleft->op1;
1723 return RETRYFOLD;
1724 }
1725 }
1726 return NEXTFOLD;
1727#else
1728 UNUSED(J); lua_assert(0); return FAILFOLD;
1729#endif
1730}
1731
1509/* -- Reassociation ------------------------------------------------------- */ 1732/* -- Reassociation ------------------------------------------------------- */
1510 1733
1511LJFOLD(ADD ADD KINT) 1734LJFOLD(ADD ADD KINT)
@@ -1535,7 +1758,7 @@ LJFOLD(BOR BOR KINT64)
1535LJFOLD(BXOR BXOR KINT64) 1758LJFOLD(BXOR BXOR KINT64)
1536LJFOLDF(reassoc_intarith_k64) 1759LJFOLDF(reassoc_intarith_k64)
1537{ 1760{
1538#if LJ_HASFFI || LJ_64 1761#if LJ_HASFFI
1539 IRIns *irk = IR(fleft->op2); 1762 IRIns *irk = IR(fleft->op2);
1540 if (irk->o == IR_KINT64) { 1763 if (irk->o == IR_KINT64) {
1541 uint64_t k = kfold_int64arith(ir_k64(irk)->u64, 1764 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
@@ -1953,6 +2176,7 @@ LJFOLDF(fwd_href_tdup)
1953** an aliased table, as it may invalidate all of the pointers and fields. 2176** an aliased table, as it may invalidate all of the pointers and fields.
1954** Only HREF needs the NEWREF check -- AREF and HREFK already depend on 2177** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1955** FLOADs. And NEWREF itself is treated like a store (see below). 2178** FLOADs. And NEWREF itself is treated like a store (see below).
2179** LREF is constant (per trace) since coroutine switches are not inlined.
1956*/ 2180*/
1957LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) 2181LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1958LJFOLDF(fload_tab_tnew_asize) 2182LJFOLDF(fload_tab_tnew_asize)
@@ -2016,6 +2240,14 @@ LJFOLDF(fload_str_len_snew)
2016 return NEXTFOLD; 2240 return NEXTFOLD;
2017} 2241}
2018 2242
2243LJFOLD(FLOAD TOSTR IRFL_STR_LEN)
2244LJFOLDF(fload_str_len_tostr)
2245{
2246 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR)
2247 return INTFOLD(1);
2248 return NEXTFOLD;
2249}
2250
2019/* The C type ID of cdata objects is immutable. */ 2251/* The C type ID of cdata objects is immutable. */
2020LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) 2252LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
2021LJFOLDF(fload_cdata_typeid_kgc) 2253LJFOLDF(fload_cdata_typeid_kgc)
@@ -2062,6 +2294,8 @@ LJFOLDF(fload_cdata_ptr_int64_cnew)
2062} 2294}
2063 2295
2064LJFOLD(FLOAD any IRFL_STR_LEN) 2296LJFOLD(FLOAD any IRFL_STR_LEN)
2297LJFOLD(FLOAD any IRFL_FUNC_ENV)
2298LJFOLD(FLOAD any IRFL_THREAD_ENV)
2065LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) 2299LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2066LJFOLD(FLOAD any IRFL_CDATA_PTR) 2300LJFOLD(FLOAD any IRFL_CDATA_PTR)
2067LJFOLD(FLOAD any IRFL_CDATA_INT) 2301LJFOLD(FLOAD any IRFL_CDATA_INT)
@@ -2127,6 +2361,17 @@ LJFOLDF(barrier_tnew_tdup)
2127 return DROPFOLD; 2361 return DROPFOLD;
2128} 2362}
2129 2363
2364/* -- Profiling ----------------------------------------------------------- */
2365
2366LJFOLD(PROF any any)
2367LJFOLDF(prof)
2368{
2369 IRRef ref = J->chain[IR_PROF];
2370 if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */
2371 return ref;
2372 return EMITFOLD;
2373}
2374
2130/* -- Stores and allocations ---------------------------------------------- */ 2375/* -- Stores and allocations ---------------------------------------------- */
2131 2376
2132/* Stores and allocations cannot be folded or passed on to CSE in general. 2377/* Stores and allocations cannot be folded or passed on to CSE in general.
@@ -2149,8 +2394,9 @@ LJFOLD(XSTORE any any)
2149LJFOLDX(lj_opt_dse_xstore) 2394LJFOLDX(lj_opt_dse_xstore)
2150 2395
2151LJFOLD(NEWREF any any) /* Treated like a store. */ 2396LJFOLD(NEWREF any any) /* Treated like a store. */
2152LJFOLD(CALLS any any) 2397LJFOLD(CALLA any any)
2153LJFOLD(CALLL any any) /* Safeguard fallback. */ 2398LJFOLD(CALLL any any) /* Safeguard fallback. */
2399LJFOLD(CALLS any any)
2154LJFOLD(CALLXS any any) 2400LJFOLD(CALLXS any any)
2155LJFOLD(XBAR) 2401LJFOLD(XBAR)
2156LJFOLD(RETF any any) /* Modifies BASE. */ 2402LJFOLD(RETF any any) /* Modifies BASE. */
@@ -2158,6 +2404,7 @@ LJFOLD(TNEW any any)
2158LJFOLD(TDUP any) 2404LJFOLD(TDUP any)
2159LJFOLD(CNEW any any) 2405LJFOLD(CNEW any any)
2160LJFOLD(XSNEW any any) 2406LJFOLD(XSNEW any any)
2407LJFOLD(BUFHDR any any)
2161LJFOLDX(lj_ir_emit) 2408LJFOLDX(lj_ir_emit)
2162 2409
2163/* ------------------------------------------------------------------------ */ 2410/* ------------------------------------------------------------------------ */
@@ -2209,10 +2456,14 @@ retry:
2209 if (fins->op1 >= J->cur.nk) { 2456 if (fins->op1 >= J->cur.nk) {
2210 key += (uint32_t)IR(fins->op1)->o << 10; 2457 key += (uint32_t)IR(fins->op1)->o << 10;
2211 *fleft = *IR(fins->op1); 2458 *fleft = *IR(fins->op1);
2459 if (fins->op1 < REF_TRUE)
2460 fleft[1] = IR(fins->op1)[1];
2212 } 2461 }
2213 if (fins->op2 >= J->cur.nk) { 2462 if (fins->op2 >= J->cur.nk) {
2214 key += (uint32_t)IR(fins->op2)->o; 2463 key += (uint32_t)IR(fins->op2)->o;
2215 *fright = *IR(fins->op2); 2464 *fright = *IR(fins->op2);
2465 if (fins->op2 < REF_TRUE)
2466 fright[1] = IR(fins->op2)[1];
2216 } else { 2467 } else {
2217 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ 2468 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2218 } 2469 }