diff options
Diffstat (limited to 'src/lj_opt_fold.c')
-rw-r--r-- | src/lj_opt_fold.c | 369 |
1 files changed, 310 insertions, 59 deletions
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 928d3852..b4d05a26 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
@@ -14,18 +14,21 @@ | |||
14 | 14 | ||
15 | #if LJ_HASJIT | 15 | #if LJ_HASJIT |
16 | 16 | ||
17 | #include "lj_buf.h" | ||
17 | #include "lj_str.h" | 18 | #include "lj_str.h" |
18 | #include "lj_tab.h" | 19 | #include "lj_tab.h" |
19 | #include "lj_ir.h" | 20 | #include "lj_ir.h" |
20 | #include "lj_jit.h" | 21 | #include "lj_jit.h" |
22 | #include "lj_ircall.h" | ||
21 | #include "lj_iropt.h" | 23 | #include "lj_iropt.h" |
22 | #include "lj_trace.h" | 24 | #include "lj_trace.h" |
23 | #if LJ_HASFFI | 25 | #if LJ_HASFFI |
24 | #include "lj_ctype.h" | 26 | #include "lj_ctype.h" |
25 | #endif | ||
26 | #include "lj_carith.h" | 27 | #include "lj_carith.h" |
28 | #endif | ||
27 | #include "lj_vm.h" | 29 | #include "lj_vm.h" |
28 | #include "lj_strscan.h" | 30 | #include "lj_strscan.h" |
31 | #include "lj_strfmt.h" | ||
29 | 32 | ||
30 | /* Here's a short description how the FOLD engine processes instructions: | 33 | /* Here's a short description how the FOLD engine processes instructions: |
31 | ** | 34 | ** |
@@ -133,8 +136,8 @@ | |||
133 | /* Some local macros to save typing. Undef'd at the end. */ | 136 | /* Some local macros to save typing. Undef'd at the end. */ |
134 | #define IR(ref) (&J->cur.ir[(ref)]) | 137 | #define IR(ref) (&J->cur.ir[(ref)]) |
135 | #define fins (&J->fold.ins) | 138 | #define fins (&J->fold.ins) |
136 | #define fleft (&J->fold.left) | 139 | #define fleft (J->fold.left) |
137 | #define fright (&J->fold.right) | 140 | #define fright (J->fold.right) |
138 | #define knumleft (ir_knum(fleft)->n) | 141 | #define knumleft (ir_knum(fleft)->n) |
139 | #define knumright (ir_knum(fright)->n) | 142 | #define knumright (ir_knum(fright)->n) |
140 | 143 | ||
@@ -155,13 +158,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J); | |||
155 | 158 | ||
156 | /* Barrier to prevent folding across a GC step. | 159 | /* Barrier to prevent folding across a GC step. |
157 | ** GC steps can only happen at the head of a trace and at LOOP. | 160 | ** GC steps can only happen at the head of a trace and at LOOP. |
158 | ** And the GC is only driven forward if there is at least one allocation. | 161 | ** And the GC is only driven forward if there's at least one allocation. |
159 | */ | 162 | */ |
160 | #define gcstep_barrier(J, ref) \ | 163 | #define gcstep_barrier(J, ref) \ |
161 | ((ref) < J->chain[IR_LOOP] && \ | 164 | ((ref) < J->chain[IR_LOOP] && \ |
162 | (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ | 165 | (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ |
163 | J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ | 166 | J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ |
164 | J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) | 167 | J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \ |
168 | J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA])) | ||
165 | 169 | ||
166 | /* -- Constant folding for FP numbers ------------------------------------- */ | 170 | /* -- Constant folding for FP numbers ------------------------------------- */ |
167 | 171 | ||
@@ -169,8 +173,6 @@ LJFOLD(ADD KNUM KNUM) | |||
169 | LJFOLD(SUB KNUM KNUM) | 173 | LJFOLD(SUB KNUM KNUM) |
170 | LJFOLD(MUL KNUM KNUM) | 174 | LJFOLD(MUL KNUM KNUM) |
171 | LJFOLD(DIV KNUM KNUM) | 175 | LJFOLD(DIV KNUM KNUM) |
172 | LJFOLD(NEG KNUM KNUM) | ||
173 | LJFOLD(ABS KNUM KNUM) | ||
174 | LJFOLD(ATAN2 KNUM KNUM) | 176 | LJFOLD(ATAN2 KNUM KNUM) |
175 | LJFOLD(LDEXP KNUM KNUM) | 177 | LJFOLD(LDEXP KNUM KNUM) |
176 | LJFOLD(MIN KNUM KNUM) | 178 | LJFOLD(MIN KNUM KNUM) |
@@ -183,6 +185,15 @@ LJFOLDF(kfold_numarith) | |||
183 | return lj_ir_knum(J, y); | 185 | return lj_ir_knum(J, y); |
184 | } | 186 | } |
185 | 187 | ||
188 | LJFOLD(NEG KNUM FLOAD) | ||
189 | LJFOLD(ABS KNUM FLOAD) | ||
190 | LJFOLDF(kfold_numabsneg) | ||
191 | { | ||
192 | lua_Number a = knumleft; | ||
193 | lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD); | ||
194 | return lj_ir_knum(J, y); | ||
195 | } | ||
196 | |||
186 | LJFOLD(LDEXP KNUM KINT) | 197 | LJFOLD(LDEXP KNUM KINT) |
187 | LJFOLDF(kfold_ldexp) | 198 | LJFOLDF(kfold_ldexp) |
188 | { | 199 | { |
@@ -336,15 +347,18 @@ LJFOLDF(kfold_intcomp0) | |||
336 | static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) | 347 | static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) |
337 | { | 348 | { |
338 | switch (op) { | 349 | switch (op) { |
339 | #if LJ_64 || LJ_HASFFI | 350 | #if LJ_HASFFI |
340 | case IR_ADD: k1 += k2; break; | 351 | case IR_ADD: k1 += k2; break; |
341 | case IR_SUB: k1 -= k2; break; | 352 | case IR_SUB: k1 -= k2; break; |
342 | #endif | ||
343 | #if LJ_HASFFI | ||
344 | case IR_MUL: k1 *= k2; break; | 353 | case IR_MUL: k1 *= k2; break; |
345 | case IR_BAND: k1 &= k2; break; | 354 | case IR_BAND: k1 &= k2; break; |
346 | case IR_BOR: k1 |= k2; break; | 355 | case IR_BOR: k1 |= k2; break; |
347 | case IR_BXOR: k1 ^= k2; break; | 356 | case IR_BXOR: k1 ^= k2; break; |
357 | case IR_BSHL: k1 <<= (k2 & 63); break; | ||
358 | case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break; | ||
359 | case IR_BSAR: k1 >>= (k2 & 63); break; | ||
360 | case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break; | ||
361 | case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break; | ||
348 | #endif | 362 | #endif |
349 | default: UNUSED(k2); lua_assert(0); break; | 363 | default: UNUSED(k2); lua_assert(0); break; |
350 | } | 364 | } |
@@ -392,20 +406,10 @@ LJFOLD(BROL KINT64 KINT) | |||
392 | LJFOLD(BROR KINT64 KINT) | 406 | LJFOLD(BROR KINT64 KINT) |
393 | LJFOLDF(kfold_int64shift) | 407 | LJFOLDF(kfold_int64shift) |
394 | { | 408 | { |
395 | #if LJ_HASFFI || LJ_64 | 409 | #if LJ_HASFFI |
396 | uint64_t k = ir_k64(fleft)->u64; | 410 | uint64_t k = ir_k64(fleft)->u64; |
397 | int32_t sh = (fright->i & 63); | 411 | int32_t sh = (fright->i & 63); |
398 | switch ((IROp)fins->o) { | 412 | return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL)); |
399 | case IR_BSHL: k <<= sh; break; | ||
400 | #if LJ_HASFFI | ||
401 | case IR_BSHR: k >>= sh; break; | ||
402 | case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break; | ||
403 | case IR_BROL: k = lj_rol(k, sh); break; | ||
404 | case IR_BROR: k = lj_ror(k, sh); break; | ||
405 | #endif | ||
406 | default: lua_assert(0); break; | ||
407 | } | ||
408 | return INT64FOLD(k); | ||
409 | #else | 413 | #else |
410 | UNUSED(J); lua_assert(0); return FAILFOLD; | 414 | UNUSED(J); lua_assert(0); return FAILFOLD; |
411 | #endif | 415 | #endif |
@@ -510,7 +514,7 @@ LJFOLDF(kfold_strref_snew) | |||
510 | PHIBARRIER(ir); | 514 | PHIBARRIER(ir); |
511 | fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ | 515 | fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ |
512 | fins->op1 = str; | 516 | fins->op1 = str; |
513 | fins->ot = IRT(IR_STRREF, IRT_P32); | 517 | fins->ot = IRT(IR_STRREF, IRT_PGC); |
514 | return RETRYFOLD; | 518 | return RETRYFOLD; |
515 | } | 519 | } |
516 | } | 520 | } |
@@ -528,6 +532,180 @@ LJFOLDF(kfold_strcmp) | |||
528 | return NEXTFOLD; | 532 | return NEXTFOLD; |
529 | } | 533 | } |
530 | 534 | ||
535 | /* -- Constant folding and forwarding for buffers ------------------------- */ | ||
536 | |||
537 | /* | ||
538 | ** Buffer ops perform stores, but their effect is limited to the buffer | ||
539 | ** itself. Also, buffer ops are chained: a use of an op implies a use of | ||
540 | ** all other ops up the chain. Conversely, if an op is unused, all ops | ||
541 | ** up the chain can go unsed. This largely eliminates the need to treat | ||
542 | ** them as stores. | ||
543 | ** | ||
544 | ** Alas, treating them as normal (IRM_N) ops doesn't work, because they | ||
545 | ** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP | ||
546 | ** or if FOLD is disabled. | ||
547 | ** | ||
548 | ** The compromise is to declare them as loads, emit them like stores and | ||
549 | ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain | ||
550 | ** fragments left over from CSE are eliminated by DCE. | ||
551 | */ | ||
552 | |||
553 | /* BUFHDR is emitted like a store, see below. */ | ||
554 | |||
555 | LJFOLD(BUFPUT BUFHDR BUFSTR) | ||
556 | LJFOLDF(bufput_append) | ||
557 | { | ||
558 | /* New buffer, no other buffer op inbetween and same buffer? */ | ||
559 | if ((J->flags & JIT_F_OPT_FWD) && | ||
560 | !(fleft->op2 & IRBUFHDR_APPEND) && | ||
561 | fleft->prev == fright->op2 && | ||
562 | fleft->op1 == IR(fright->op2)->op1) { | ||
563 | IRRef ref = fins->op1; | ||
564 | IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */ | ||
565 | IR(ref)->op1 = fright->op1; | ||
566 | return ref; | ||
567 | } | ||
568 | return EMITFOLD; /* Always emit, CSE later. */ | ||
569 | } | ||
570 | |||
571 | LJFOLD(BUFPUT any any) | ||
572 | LJFOLDF(bufput_kgc) | ||
573 | { | ||
574 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) { | ||
575 | GCstr *s2 = ir_kstr(fright); | ||
576 | if (s2->len == 0) { /* Empty string? */ | ||
577 | return LEFTFOLD; | ||
578 | } else { | ||
579 | if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) && | ||
580 | !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */ | ||
581 | GCstr *s1 = ir_kstr(IR(fleft->op2)); | ||
582 | IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2)); | ||
583 | /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */ | ||
584 | IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */ | ||
585 | return fins->op1; | ||
586 | } | ||
587 | } | ||
588 | } | ||
589 | return EMITFOLD; /* Always emit, CSE later. */ | ||
590 | } | ||
591 | |||
592 | LJFOLD(BUFSTR any any) | ||
593 | LJFOLDF(bufstr_kfold_cse) | ||
594 | { | ||
595 | lua_assert(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT || | ||
596 | fleft->o == IR_CALLL); | ||
597 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { | ||
598 | if (fleft->o == IR_BUFHDR) { /* No put operations? */ | ||
599 | if (!(fleft->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */ | ||
600 | return lj_ir_kstr(J, &J2G(J)->strempty); | ||
601 | fins->op1 = fleft->op1; | ||
602 | fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */ | ||
603 | return CSEFOLD; | ||
604 | } else if (fleft->o == IR_BUFPUT) { | ||
605 | IRIns *irb = IR(fleft->op1); | ||
606 | if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND)) | ||
607 | return fleft->op2; /* Shortcut for a single put operation. */ | ||
608 | } | ||
609 | } | ||
610 | /* Try to CSE the whole chain. */ | ||
611 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
612 | IRRef ref = J->chain[IR_BUFSTR]; | ||
613 | while (ref) { | ||
614 | IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1); | ||
615 | while (ira->o == irb->o && ira->op2 == irb->op2) { | ||
616 | lua_assert(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT || | ||
617 | ira->o == IR_CALLL || ira->o == IR_CARG); | ||
618 | if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND)) | ||
619 | return ref; /* CSE succeeded. */ | ||
620 | if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab) | ||
621 | break; | ||
622 | ira = IR(ira->op1); | ||
623 | irb = IR(irb->op1); | ||
624 | } | ||
625 | ref = irs->prev; | ||
626 | } | ||
627 | } | ||
628 | return EMITFOLD; /* No CSE possible. */ | ||
629 | } | ||
630 | |||
631 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse) | ||
632 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper) | ||
633 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower) | ||
634 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted) | ||
635 | LJFOLDF(bufput_kfold_op) | ||
636 | { | ||
637 | if (irref_isk(fleft->op2)) { | ||
638 | const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; | ||
639 | SBuf *sb = lj_buf_tmp_(J->L); | ||
640 | sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb, | ||
641 | ir_kstr(IR(fleft->op2))); | ||
642 | fins->o = IR_BUFPUT; | ||
643 | fins->op1 = fleft->op1; | ||
644 | fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb)); | ||
645 | return RETRYFOLD; | ||
646 | } | ||
647 | return EMITFOLD; /* Always emit, CSE later. */ | ||
648 | } | ||
649 | |||
650 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep) | ||
651 | LJFOLDF(bufput_kfold_rep) | ||
652 | { | ||
653 | if (irref_isk(fleft->op2)) { | ||
654 | IRIns *irc = IR(fleft->op1); | ||
655 | if (irref_isk(irc->op2)) { | ||
656 | SBuf *sb = lj_buf_tmp_(J->L); | ||
657 | sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i); | ||
658 | fins->o = IR_BUFPUT; | ||
659 | fins->op1 = irc->op1; | ||
660 | fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb)); | ||
661 | return RETRYFOLD; | ||
662 | } | ||
663 | } | ||
664 | return EMITFOLD; /* Always emit, CSE later. */ | ||
665 | } | ||
666 | |||
667 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint) | ||
668 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int) | ||
669 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint) | ||
670 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum) | ||
671 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr) | ||
672 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar) | ||
673 | LJFOLDF(bufput_kfold_fmt) | ||
674 | { | ||
675 | IRIns *irc = IR(fleft->op1); | ||
676 | lua_assert(irref_isk(irc->op2)); /* SFormat must be const. */ | ||
677 | if (irref_isk(fleft->op2)) { | ||
678 | SFormat sf = (SFormat)IR(irc->op2)->i; | ||
679 | IRIns *ira = IR(fleft->op2); | ||
680 | SBuf *sb = lj_buf_tmp_(J->L); | ||
681 | switch (fins->op2) { | ||
682 | case IRCALL_lj_strfmt_putfxint: | ||
683 | sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64); | ||
684 | break; | ||
685 | case IRCALL_lj_strfmt_putfstr: | ||
686 | sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira)); | ||
687 | break; | ||
688 | case IRCALL_lj_strfmt_putfchar: | ||
689 | sb = lj_strfmt_putfchar(sb, sf, ira->i); | ||
690 | break; | ||
691 | case IRCALL_lj_strfmt_putfnum_int: | ||
692 | case IRCALL_lj_strfmt_putfnum_uint: | ||
693 | case IRCALL_lj_strfmt_putfnum: | ||
694 | default: { | ||
695 | const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; | ||
696 | sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf, | ||
697 | ir_knum(ira)->n); | ||
698 | break; | ||
699 | } | ||
700 | } | ||
701 | fins->o = IR_BUFPUT; | ||
702 | fins->op1 = irc->op1; | ||
703 | fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb)); | ||
704 | return RETRYFOLD; | ||
705 | } | ||
706 | return EMITFOLD; /* Always emit, CSE later. */ | ||
707 | } | ||
708 | |||
531 | /* -- Constant folding of pointer arithmetic ------------------------------ */ | 709 | /* -- Constant folding of pointer arithmetic ------------------------------ */ |
532 | 710 | ||
533 | LJFOLD(ADD KGC KINT) | 711 | LJFOLD(ADD KGC KINT) |
@@ -648,27 +826,22 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM) | |||
648 | LJFOLDF(kfold_conv_knum_int_num) | 826 | LJFOLDF(kfold_conv_knum_int_num) |
649 | { | 827 | { |
650 | lua_Number n = knumleft; | 828 | lua_Number n = knumleft; |
651 | if (!(fins->op2 & IRCONV_TRUNC)) { | 829 | int32_t k = lj_num2int(n); |
652 | int32_t k = lj_num2int(n); | 830 | if (irt_isguard(fins->t) && n != (lua_Number)k) { |
653 | if (irt_isguard(fins->t) && n != (lua_Number)k) { | 831 | /* We're about to create a guard which always fails, like CONV +1.5. |
654 | /* We're about to create a guard which always fails, like CONV +1.5. | 832 | ** Some pathological loops cause this during LICM, e.g.: |
655 | ** Some pathological loops cause this during LICM, e.g.: | 833 | ** local x,k,t = 0,1.5,{1,[1.5]=2} |
656 | ** local x,k,t = 0,1.5,{1,[1.5]=2} | 834 | ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end |
657 | ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end | 835 | ** assert(x == 300) |
658 | ** assert(x == 300) | 836 | */ |
659 | */ | 837 | return FAILFOLD; |
660 | return FAILFOLD; | ||
661 | } | ||
662 | return INTFOLD(k); | ||
663 | } else { | ||
664 | return INTFOLD((int32_t)n); | ||
665 | } | 838 | } |
839 | return INTFOLD(k); | ||
666 | } | 840 | } |
667 | 841 | ||
668 | LJFOLD(CONV KNUM IRCONV_U32_NUM) | 842 | LJFOLD(CONV KNUM IRCONV_U32_NUM) |
669 | LJFOLDF(kfold_conv_knum_u32_num) | 843 | LJFOLDF(kfold_conv_knum_u32_num) |
670 | { | 844 | { |
671 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
672 | #ifdef _MSC_VER | 845 | #ifdef _MSC_VER |
673 | { /* Workaround for MSVC bug. */ | 846 | { /* Workaround for MSVC bug. */ |
674 | volatile uint32_t u = (uint32_t)knumleft; | 847 | volatile uint32_t u = (uint32_t)knumleft; |
@@ -682,27 +855,27 @@ LJFOLDF(kfold_conv_knum_u32_num) | |||
682 | LJFOLD(CONV KNUM IRCONV_I64_NUM) | 855 | LJFOLD(CONV KNUM IRCONV_I64_NUM) |
683 | LJFOLDF(kfold_conv_knum_i64_num) | 856 | LJFOLDF(kfold_conv_knum_i64_num) |
684 | { | 857 | { |
685 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
686 | return INT64FOLD((uint64_t)(int64_t)knumleft); | 858 | return INT64FOLD((uint64_t)(int64_t)knumleft); |
687 | } | 859 | } |
688 | 860 | ||
689 | LJFOLD(CONV KNUM IRCONV_U64_NUM) | 861 | LJFOLD(CONV KNUM IRCONV_U64_NUM) |
690 | LJFOLDF(kfold_conv_knum_u64_num) | 862 | LJFOLDF(kfold_conv_knum_u64_num) |
691 | { | 863 | { |
692 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
693 | return INT64FOLD(lj_num2u64(knumleft)); | 864 | return INT64FOLD(lj_num2u64(knumleft)); |
694 | } | 865 | } |
695 | 866 | ||
696 | LJFOLD(TOSTR KNUM) | 867 | LJFOLD(TOSTR KNUM any) |
697 | LJFOLDF(kfold_tostr_knum) | 868 | LJFOLDF(kfold_tostr_knum) |
698 | { | 869 | { |
699 | return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); | 870 | return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft))); |
700 | } | 871 | } |
701 | 872 | ||
702 | LJFOLD(TOSTR KINT) | 873 | LJFOLD(TOSTR KINT any) |
703 | LJFOLDF(kfold_tostr_kint) | 874 | LJFOLDF(kfold_tostr_kint) |
704 | { | 875 | { |
705 | return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); | 876 | return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ? |
877 | lj_strfmt_int(J->L, fleft->i) : | ||
878 | lj_strfmt_char(J->L, fleft->i)); | ||
706 | } | 879 | } |
707 | 880 | ||
708 | LJFOLD(STRTO KGC) | 881 | LJFOLD(STRTO KGC) |
@@ -750,13 +923,13 @@ LJFOLDF(shortcut_round) | |||
750 | return NEXTFOLD; | 923 | return NEXTFOLD; |
751 | } | 924 | } |
752 | 925 | ||
753 | LJFOLD(ABS ABS KNUM) | 926 | LJFOLD(ABS ABS FLOAD) |
754 | LJFOLDF(shortcut_left) | 927 | LJFOLDF(shortcut_left) |
755 | { | 928 | { |
756 | return LEFTFOLD; /* f(g(x)) ==> g(x) */ | 929 | return LEFTFOLD; /* f(g(x)) ==> g(x) */ |
757 | } | 930 | } |
758 | 931 | ||
759 | LJFOLD(ABS NEG KNUM) | 932 | LJFOLD(ABS NEG FLOAD) |
760 | LJFOLDF(shortcut_dropleft) | 933 | LJFOLDF(shortcut_dropleft) |
761 | { | 934 | { |
762 | PHIBARRIER(fleft); | 935 | PHIBARRIER(fleft); |
@@ -837,8 +1010,10 @@ LJFOLDF(simplify_nummuldiv_k) | |||
837 | if (n == 1.0) { /* x o 1 ==> x */ | 1010 | if (n == 1.0) { /* x o 1 ==> x */ |
838 | return LEFTFOLD; | 1011 | return LEFTFOLD; |
839 | } else if (n == -1.0) { /* x o -1 ==> -x */ | 1012 | } else if (n == -1.0) { /* x o -1 ==> -x */ |
1013 | IRRef op1 = fins->op1; | ||
1014 | fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */ | ||
1015 | fins->op1 = op1; | ||
840 | fins->o = IR_NEG; | 1016 | fins->o = IR_NEG; |
841 | fins->op2 = (IRRef1)lj_ir_knum_neg(J); | ||
842 | return RETRYFOLD; | 1017 | return RETRYFOLD; |
843 | } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ | 1018 | } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ |
844 | fins->o = IR_ADD; | 1019 | fins->o = IR_ADD; |
@@ -1205,7 +1380,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k) | |||
1205 | ** But this is mainly intended for simple address arithmetic. | 1380 | ** But this is mainly intended for simple address arithmetic. |
1206 | ** Also it's easier for the backend to optimize the original multiplies. | 1381 | ** Also it's easier for the backend to optimize the original multiplies. |
1207 | */ | 1382 | */ |
1208 | if (k == 1) { /* i * 1 ==> i */ | 1383 | if (k == 0) { /* i * 0 ==> 0 */ |
1384 | return RIGHTFOLD; | ||
1385 | } else if (k == 1) { /* i * 1 ==> i */ | ||
1209 | return LEFTFOLD; | 1386 | return LEFTFOLD; |
1210 | } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ | 1387 | } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ |
1211 | fins->o = IR_BSHL; | 1388 | fins->o = IR_BSHL; |
@@ -1218,9 +1395,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k) | |||
1218 | LJFOLD(MUL any KINT) | 1395 | LJFOLD(MUL any KINT) |
1219 | LJFOLDF(simplify_intmul_k32) | 1396 | LJFOLDF(simplify_intmul_k32) |
1220 | { | 1397 | { |
1221 | if (fright->i == 0) /* i * 0 ==> 0 */ | 1398 | if (fright->i >= 0) |
1222 | return INTFOLD(0); | ||
1223 | else if (fright->i > 0) | ||
1224 | return simplify_intmul_k(J, fright->i); | 1399 | return simplify_intmul_k(J, fright->i); |
1225 | return NEXTFOLD; | 1400 | return NEXTFOLD; |
1226 | } | 1401 | } |
@@ -1228,14 +1403,13 @@ LJFOLDF(simplify_intmul_k32) | |||
1228 | LJFOLD(MUL any KINT64) | 1403 | LJFOLD(MUL any KINT64) |
1229 | LJFOLDF(simplify_intmul_k64) | 1404 | LJFOLDF(simplify_intmul_k64) |
1230 | { | 1405 | { |
1231 | if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ | 1406 | #if LJ_HASFFI |
1232 | return INT64FOLD(0); | 1407 | if (ir_kint64(fright)->u64 < 0x80000000u) |
1233 | #if LJ_64 | ||
1234 | /* NYI: SPLIT for BSHL and 32 bit backend support. */ | ||
1235 | else if (ir_kint64(fright)->u64 < 0x80000000u) | ||
1236 | return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); | 1408 | return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); |
1237 | #endif | ||
1238 | return NEXTFOLD; | 1409 | return NEXTFOLD; |
1410 | #else | ||
1411 | UNUSED(J); lua_assert(0); return FAILFOLD; | ||
1412 | #endif | ||
1239 | } | 1413 | } |
1240 | 1414 | ||
1241 | LJFOLD(MOD any KINT) | 1415 | LJFOLD(MOD any KINT) |
@@ -1491,6 +1665,14 @@ LJFOLDF(simplify_shiftk_andk) | |||
1491 | fins->op2 = (IRRef1)lj_ir_kint(J, k); | 1665 | fins->op2 = (IRRef1)lj_ir_kint(J, k); |
1492 | fins->ot = IRTI(IR_BAND); | 1666 | fins->ot = IRTI(IR_BAND); |
1493 | return RETRYFOLD; | 1667 | return RETRYFOLD; |
1668 | } else if (irk->o == IR_KINT64) { | ||
1669 | uint64_t k = kfold_int64arith(ir_k64(irk)->u64, fright->i, (IROp)fins->o); | ||
1670 | IROpT ot = fleft->ot; | ||
1671 | fins->op1 = fleft->op1; | ||
1672 | fins->op1 = (IRRef1)lj_opt_fold(J); | ||
1673 | fins->op2 = (IRRef1)lj_ir_kint64(J, k); | ||
1674 | fins->ot = ot; | ||
1675 | return RETRYFOLD; | ||
1494 | } | 1676 | } |
1495 | return NEXTFOLD; | 1677 | return NEXTFOLD; |
1496 | } | 1678 | } |
@@ -1506,6 +1688,47 @@ LJFOLDF(simplify_andk_shiftk) | |||
1506 | return NEXTFOLD; | 1688 | return NEXTFOLD; |
1507 | } | 1689 | } |
1508 | 1690 | ||
1691 | LJFOLD(BAND BOR KINT) | ||
1692 | LJFOLD(BOR BAND KINT) | ||
1693 | LJFOLDF(simplify_andor_k) | ||
1694 | { | ||
1695 | IRIns *irk = IR(fleft->op2); | ||
1696 | PHIBARRIER(fleft); | ||
1697 | if (irk->o == IR_KINT) { | ||
1698 | int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o); | ||
1699 | /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ | ||
1700 | /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ | ||
1701 | if (k == (fins->o == IR_BAND ? 0 : -1)) { | ||
1702 | fins->op1 = fleft->op1; | ||
1703 | return RETRYFOLD; | ||
1704 | } | ||
1705 | } | ||
1706 | return NEXTFOLD; | ||
1707 | } | ||
1708 | |||
1709 | LJFOLD(BAND BOR KINT64) | ||
1710 | LJFOLD(BOR BAND KINT64) | ||
1711 | LJFOLDF(simplify_andor_k64) | ||
1712 | { | ||
1713 | #if LJ_HASFFI | ||
1714 | IRIns *irk = IR(fleft->op2); | ||
1715 | PHIBARRIER(fleft); | ||
1716 | if (irk->o == IR_KINT64) { | ||
1717 | uint64_t k = kfold_int64arith(ir_k64(irk)->u64, | ||
1718 | ir_k64(fright)->u64, (IROp)fins->o); | ||
1719 | /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ | ||
1720 | /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ | ||
1721 | if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) { | ||
1722 | fins->op1 = fleft->op1; | ||
1723 | return RETRYFOLD; | ||
1724 | } | ||
1725 | } | ||
1726 | return NEXTFOLD; | ||
1727 | #else | ||
1728 | UNUSED(J); lua_assert(0); return FAILFOLD; | ||
1729 | #endif | ||
1730 | } | ||
1731 | |||
1509 | /* -- Reassociation ------------------------------------------------------- */ | 1732 | /* -- Reassociation ------------------------------------------------------- */ |
1510 | 1733 | ||
1511 | LJFOLD(ADD ADD KINT) | 1734 | LJFOLD(ADD ADD KINT) |
@@ -1535,7 +1758,7 @@ LJFOLD(BOR BOR KINT64) | |||
1535 | LJFOLD(BXOR BXOR KINT64) | 1758 | LJFOLD(BXOR BXOR KINT64) |
1536 | LJFOLDF(reassoc_intarith_k64) | 1759 | LJFOLDF(reassoc_intarith_k64) |
1537 | { | 1760 | { |
1538 | #if LJ_HASFFI || LJ_64 | 1761 | #if LJ_HASFFI |
1539 | IRIns *irk = IR(fleft->op2); | 1762 | IRIns *irk = IR(fleft->op2); |
1540 | if (irk->o == IR_KINT64) { | 1763 | if (irk->o == IR_KINT64) { |
1541 | uint64_t k = kfold_int64arith(ir_k64(irk)->u64, | 1764 | uint64_t k = kfold_int64arith(ir_k64(irk)->u64, |
@@ -1953,6 +2176,7 @@ LJFOLDF(fwd_href_tdup) | |||
1953 | ** an aliased table, as it may invalidate all of the pointers and fields. | 2176 | ** an aliased table, as it may invalidate all of the pointers and fields. |
1954 | ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on | 2177 | ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on |
1955 | ** FLOADs. And NEWREF itself is treated like a store (see below). | 2178 | ** FLOADs. And NEWREF itself is treated like a store (see below). |
2179 | ** LREF is constant (per trace) since coroutine switches are not inlined. | ||
1956 | */ | 2180 | */ |
1957 | LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) | 2181 | LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) |
1958 | LJFOLDF(fload_tab_tnew_asize) | 2182 | LJFOLDF(fload_tab_tnew_asize) |
@@ -2016,6 +2240,14 @@ LJFOLDF(fload_str_len_snew) | |||
2016 | return NEXTFOLD; | 2240 | return NEXTFOLD; |
2017 | } | 2241 | } |
2018 | 2242 | ||
2243 | LJFOLD(FLOAD TOSTR IRFL_STR_LEN) | ||
2244 | LJFOLDF(fload_str_len_tostr) | ||
2245 | { | ||
2246 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR) | ||
2247 | return INTFOLD(1); | ||
2248 | return NEXTFOLD; | ||
2249 | } | ||
2250 | |||
2019 | /* The C type ID of cdata objects is immutable. */ | 2251 | /* The C type ID of cdata objects is immutable. */ |
2020 | LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) | 2252 | LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) |
2021 | LJFOLDF(fload_cdata_typeid_kgc) | 2253 | LJFOLDF(fload_cdata_typeid_kgc) |
@@ -2062,6 +2294,8 @@ LJFOLDF(fload_cdata_ptr_int64_cnew) | |||
2062 | } | 2294 | } |
2063 | 2295 | ||
2064 | LJFOLD(FLOAD any IRFL_STR_LEN) | 2296 | LJFOLD(FLOAD any IRFL_STR_LEN) |
2297 | LJFOLD(FLOAD any IRFL_FUNC_ENV) | ||
2298 | LJFOLD(FLOAD any IRFL_THREAD_ENV) | ||
2065 | LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) | 2299 | LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) |
2066 | LJFOLD(FLOAD any IRFL_CDATA_PTR) | 2300 | LJFOLD(FLOAD any IRFL_CDATA_PTR) |
2067 | LJFOLD(FLOAD any IRFL_CDATA_INT) | 2301 | LJFOLD(FLOAD any IRFL_CDATA_INT) |
@@ -2127,6 +2361,17 @@ LJFOLDF(barrier_tnew_tdup) | |||
2127 | return DROPFOLD; | 2361 | return DROPFOLD; |
2128 | } | 2362 | } |
2129 | 2363 | ||
2364 | /* -- Profiling ----------------------------------------------------------- */ | ||
2365 | |||
2366 | LJFOLD(PROF any any) | ||
2367 | LJFOLDF(prof) | ||
2368 | { | ||
2369 | IRRef ref = J->chain[IR_PROF]; | ||
2370 | if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */ | ||
2371 | return ref; | ||
2372 | return EMITFOLD; | ||
2373 | } | ||
2374 | |||
2130 | /* -- Stores and allocations ---------------------------------------------- */ | 2375 | /* -- Stores and allocations ---------------------------------------------- */ |
2131 | 2376 | ||
2132 | /* Stores and allocations cannot be folded or passed on to CSE in general. | 2377 | /* Stores and allocations cannot be folded or passed on to CSE in general. |
@@ -2149,8 +2394,9 @@ LJFOLD(XSTORE any any) | |||
2149 | LJFOLDX(lj_opt_dse_xstore) | 2394 | LJFOLDX(lj_opt_dse_xstore) |
2150 | 2395 | ||
2151 | LJFOLD(NEWREF any any) /* Treated like a store. */ | 2396 | LJFOLD(NEWREF any any) /* Treated like a store. */ |
2152 | LJFOLD(CALLS any any) | 2397 | LJFOLD(CALLA any any) |
2153 | LJFOLD(CALLL any any) /* Safeguard fallback. */ | 2398 | LJFOLD(CALLL any any) /* Safeguard fallback. */ |
2399 | LJFOLD(CALLS any any) | ||
2154 | LJFOLD(CALLXS any any) | 2400 | LJFOLD(CALLXS any any) |
2155 | LJFOLD(XBAR) | 2401 | LJFOLD(XBAR) |
2156 | LJFOLD(RETF any any) /* Modifies BASE. */ | 2402 | LJFOLD(RETF any any) /* Modifies BASE. */ |
@@ -2158,6 +2404,7 @@ LJFOLD(TNEW any any) | |||
2158 | LJFOLD(TDUP any) | 2404 | LJFOLD(TDUP any) |
2159 | LJFOLD(CNEW any any) | 2405 | LJFOLD(CNEW any any) |
2160 | LJFOLD(XSNEW any any) | 2406 | LJFOLD(XSNEW any any) |
2407 | LJFOLD(BUFHDR any any) | ||
2161 | LJFOLDX(lj_ir_emit) | 2408 | LJFOLDX(lj_ir_emit) |
2162 | 2409 | ||
2163 | /* ------------------------------------------------------------------------ */ | 2410 | /* ------------------------------------------------------------------------ */ |
@@ -2209,10 +2456,14 @@ retry: | |||
2209 | if (fins->op1 >= J->cur.nk) { | 2456 | if (fins->op1 >= J->cur.nk) { |
2210 | key += (uint32_t)IR(fins->op1)->o << 10; | 2457 | key += (uint32_t)IR(fins->op1)->o << 10; |
2211 | *fleft = *IR(fins->op1); | 2458 | *fleft = *IR(fins->op1); |
2459 | if (fins->op1 < REF_TRUE) | ||
2460 | fleft[1] = IR(fins->op1)[1]; | ||
2212 | } | 2461 | } |
2213 | if (fins->op2 >= J->cur.nk) { | 2462 | if (fins->op2 >= J->cur.nk) { |
2214 | key += (uint32_t)IR(fins->op2)->o; | 2463 | key += (uint32_t)IR(fins->op2)->o; |
2215 | *fright = *IR(fins->op2); | 2464 | *fright = *IR(fins->op2); |
2465 | if (fins->op2 < REF_TRUE) | ||
2466 | fright[1] = IR(fins->op2)[1]; | ||
2216 | } else { | 2467 | } else { |
2217 | key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ | 2468 | key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ |
2218 | } | 2469 | } |