summaryrefslogtreecommitdiff
path: root/src/lj_opt_fold.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_opt_fold.c')
-rw-r--r--src/lj_opt_fold.c626
1 files changed, 458 insertions, 168 deletions
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index ab158143..48effb8a 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -14,18 +14,21 @@
14 14
15#if LJ_HASJIT 15#if LJ_HASJIT
16 16
17#include "lj_buf.h"
17#include "lj_str.h" 18#include "lj_str.h"
18#include "lj_tab.h" 19#include "lj_tab.h"
19#include "lj_ir.h" 20#include "lj_ir.h"
20#include "lj_jit.h" 21#include "lj_jit.h"
22#include "lj_ircall.h"
21#include "lj_iropt.h" 23#include "lj_iropt.h"
22#include "lj_trace.h" 24#include "lj_trace.h"
23#if LJ_HASFFI 25#if LJ_HASFFI
24#include "lj_ctype.h" 26#include "lj_ctype.h"
25#endif
26#include "lj_carith.h" 27#include "lj_carith.h"
28#endif
27#include "lj_vm.h" 29#include "lj_vm.h"
28#include "lj_strscan.h" 30#include "lj_strscan.h"
31#include "lj_strfmt.h"
29 32
30/* Here's a short description how the FOLD engine processes instructions: 33/* Here's a short description how the FOLD engine processes instructions:
31** 34**
@@ -133,8 +136,8 @@
133/* Some local macros to save typing. Undef'd at the end. */ 136/* Some local macros to save typing. Undef'd at the end. */
134#define IR(ref) (&J->cur.ir[(ref)]) 137#define IR(ref) (&J->cur.ir[(ref)])
135#define fins (&J->fold.ins) 138#define fins (&J->fold.ins)
136#define fleft (&J->fold.left) 139#define fleft (J->fold.left)
137#define fright (&J->fold.right) 140#define fright (J->fold.right)
138#define knumleft (ir_knum(fleft)->n) 141#define knumleft (ir_knum(fleft)->n)
139#define knumright (ir_knum(fright)->n) 142#define knumright (ir_knum(fright)->n)
140 143
@@ -155,13 +158,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
155 158
156/* Barrier to prevent folding across a GC step. 159/* Barrier to prevent folding across a GC step.
157** GC steps can only happen at the head of a trace and at LOOP. 160** GC steps can only happen at the head of a trace and at LOOP.
158** And the GC is only driven forward if there is at least one allocation. 161** And the GC is only driven forward if there's at least one allocation.
159*/ 162*/
160#define gcstep_barrier(J, ref) \ 163#define gcstep_barrier(J, ref) \
161 ((ref) < J->chain[IR_LOOP] && \ 164 ((ref) < J->chain[IR_LOOP] && \
162 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ 165 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
163 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ 166 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
164 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) 167 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
168 J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA]))
165 169
166/* -- Constant folding for FP numbers ------------------------------------- */ 170/* -- Constant folding for FP numbers ------------------------------------- */
167 171
@@ -169,9 +173,6 @@ LJFOLD(ADD KNUM KNUM)
169LJFOLD(SUB KNUM KNUM) 173LJFOLD(SUB KNUM KNUM)
170LJFOLD(MUL KNUM KNUM) 174LJFOLD(MUL KNUM KNUM)
171LJFOLD(DIV KNUM KNUM) 175LJFOLD(DIV KNUM KNUM)
172LJFOLD(NEG KNUM KNUM)
173LJFOLD(ABS KNUM KNUM)
174LJFOLD(ATAN2 KNUM KNUM)
175LJFOLD(LDEXP KNUM KNUM) 176LJFOLD(LDEXP KNUM KNUM)
176LJFOLD(MIN KNUM KNUM) 177LJFOLD(MIN KNUM KNUM)
177LJFOLD(MAX KNUM KNUM) 178LJFOLD(MAX KNUM KNUM)
@@ -183,6 +184,15 @@ LJFOLDF(kfold_numarith)
183 return lj_ir_knum(J, y); 184 return lj_ir_knum(J, y);
184} 185}
185 186
187LJFOLD(NEG KNUM FLOAD)
188LJFOLD(ABS KNUM FLOAD)
189LJFOLDF(kfold_numabsneg)
190{
191 lua_Number a = knumleft;
192 lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD);
193 return lj_ir_knum(J, y);
194}
195
186LJFOLD(LDEXP KNUM KINT) 196LJFOLD(LDEXP KNUM KINT)
187LJFOLDF(kfold_ldexp) 197LJFOLDF(kfold_ldexp)
188{ 198{
@@ -202,13 +212,34 @@ LJFOLDF(kfold_fpmath)
202 return lj_ir_knum(J, y); 212 return lj_ir_knum(J, y);
203} 213}
204 214
205LJFOLD(POW KNUM KINT) 215LJFOLD(CALLN KNUM any)
216LJFOLDF(kfold_fpcall1)
217{
218 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
219 if (CCI_TYPE(ci) == IRT_NUM) {
220 double y = ((double (*)(double))ci->func)(knumleft);
221 return lj_ir_knum(J, y);
222 }
223 return NEXTFOLD;
224}
225
226LJFOLD(CALLN CARG IRCALL_atan2)
227LJFOLDF(kfold_fpcall2)
228{
229 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
230 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
231 double a = ir_knum(IR(fleft->op1))->n;
232 double b = ir_knum(IR(fleft->op2))->n;
233 double y = ((double (*)(double, double))ci->func)(a, b);
234 return lj_ir_knum(J, y);
235 }
236 return NEXTFOLD;
237}
238
239LJFOLD(POW KNUM KNUM)
206LJFOLDF(kfold_numpow) 240LJFOLDF(kfold_numpow)
207{ 241{
208 lua_Number a = knumleft; 242 return lj_ir_knum(J, lj_vm_foldarith(knumleft, knumright, IR_POW - IR_ADD));
209 lua_Number b = (lua_Number)fright->i;
210 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
211 return lj_ir_knum(J, y);
212} 243}
213 244
214/* Must not use kfold_kref for numbers (could be NaN). */ 245/* Must not use kfold_kref for numbers (could be NaN). */
@@ -247,7 +278,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
247 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break; 278 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
248 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break; 279 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
249 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break; 280 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
250 default: lua_assert(0); break; 281 default: lj_assertX(0, "bad IR op %d", op); break;
251 } 282 }
252 return k1; 283 return k1;
253} 284}
@@ -319,7 +350,7 @@ LJFOLDF(kfold_intcomp)
319 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b); 350 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
320 case IR_ABC: 351 case IR_ABC:
321 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b); 352 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
322 default: lua_assert(0); return FAILFOLD; 353 default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
323 } 354 }
324} 355}
325 356
@@ -333,21 +364,29 @@ LJFOLDF(kfold_intcomp0)
333 364
334/* -- Constant folding for 64 bit integers -------------------------------- */ 365/* -- Constant folding for 64 bit integers -------------------------------- */
335 366
336static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) 367static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
368 IROp op)
337{ 369{
370 UNUSED(J);
371#if LJ_HASFFI
338 switch (op) { 372 switch (op) {
339#if LJ_64 || LJ_HASFFI
340 case IR_ADD: k1 += k2; break; 373 case IR_ADD: k1 += k2; break;
341 case IR_SUB: k1 -= k2; break; 374 case IR_SUB: k1 -= k2; break;
342#endif
343#if LJ_HASFFI
344 case IR_MUL: k1 *= k2; break; 375 case IR_MUL: k1 *= k2; break;
345 case IR_BAND: k1 &= k2; break; 376 case IR_BAND: k1 &= k2; break;
346 case IR_BOR: k1 |= k2; break; 377 case IR_BOR: k1 |= k2; break;
347 case IR_BXOR: k1 ^= k2; break; 378 case IR_BXOR: k1 ^= k2; break;
348#endif 379 case IR_BSHL: k1 <<= (k2 & 63); break;
349 default: UNUSED(k2); lua_assert(0); break; 380 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break;
381 case IR_BSAR: k1 >>= (k2 & 63); break;
382 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break;
383 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break;
384 default: lj_assertJ(0, "bad IR op %d", op); break;
350 } 385 }
386#else
387 UNUSED(k2); UNUSED(op);
388 lj_assertJ(0, "FFI IR op without FFI");
389#endif
351 return k1; 390 return k1;
352} 391}
353 392
@@ -359,7 +398,7 @@ LJFOLD(BOR KINT64 KINT64)
359LJFOLD(BXOR KINT64 KINT64) 398LJFOLD(BXOR KINT64 KINT64)
360LJFOLDF(kfold_int64arith) 399LJFOLDF(kfold_int64arith)
361{ 400{
362 return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64, 401 return INT64FOLD(kfold_int64arith(J, ir_k64(fleft)->u64,
363 ir_k64(fright)->u64, (IROp)fins->o)); 402 ir_k64(fright)->u64, (IROp)fins->o));
364} 403}
365 404
@@ -381,7 +420,7 @@ LJFOLDF(kfold_int64arith2)
381 } 420 }
382 return INT64FOLD(k1); 421 return INT64FOLD(k1);
383#else 422#else
384 UNUSED(J); lua_assert(0); return FAILFOLD; 423 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
385#endif 424#endif
386} 425}
387 426
@@ -392,22 +431,12 @@ LJFOLD(BROL KINT64 KINT)
392LJFOLD(BROR KINT64 KINT) 431LJFOLD(BROR KINT64 KINT)
393LJFOLDF(kfold_int64shift) 432LJFOLDF(kfold_int64shift)
394{ 433{
395#if LJ_HASFFI || LJ_64 434#if LJ_HASFFI
396 uint64_t k = ir_k64(fleft)->u64; 435 uint64_t k = ir_k64(fleft)->u64;
397 int32_t sh = (fright->i & 63); 436 int32_t sh = (fright->i & 63);
398 switch ((IROp)fins->o) { 437 return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
399 case IR_BSHL: k <<= sh; break;
400#if LJ_HASFFI
401 case IR_BSHR: k >>= sh; break;
402 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
403 case IR_BROL: k = lj_rol(k, sh); break;
404 case IR_BROR: k = lj_ror(k, sh); break;
405#endif
406 default: lua_assert(0); break;
407 }
408 return INT64FOLD(k);
409#else 438#else
410 UNUSED(J); lua_assert(0); return FAILFOLD; 439 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
411#endif 440#endif
412} 441}
413 442
@@ -417,7 +446,7 @@ LJFOLDF(kfold_bnot64)
417#if LJ_HASFFI 446#if LJ_HASFFI
418 return INT64FOLD(~ir_k64(fleft)->u64); 447 return INT64FOLD(~ir_k64(fleft)->u64);
419#else 448#else
420 UNUSED(J); lua_assert(0); return FAILFOLD; 449 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
421#endif 450#endif
422} 451}
423 452
@@ -427,7 +456,7 @@ LJFOLDF(kfold_bswap64)
427#if LJ_HASFFI 456#if LJ_HASFFI
428 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64)); 457 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
429#else 458#else
430 UNUSED(J); lua_assert(0); return FAILFOLD; 459 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
431#endif 460#endif
432} 461}
433 462
@@ -452,10 +481,10 @@ LJFOLDF(kfold_int64comp)
452 case IR_UGE: return CONDFOLD(a >= b); 481 case IR_UGE: return CONDFOLD(a >= b);
453 case IR_ULE: return CONDFOLD(a <= b); 482 case IR_ULE: return CONDFOLD(a <= b);
454 case IR_UGT: return CONDFOLD(a > b); 483 case IR_UGT: return CONDFOLD(a > b);
455 default: lua_assert(0); return FAILFOLD; 484 default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
456 } 485 }
457#else 486#else
458 UNUSED(J); lua_assert(0); return FAILFOLD; 487 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
459#endif 488#endif
460} 489}
461 490
@@ -467,7 +496,7 @@ LJFOLDF(kfold_int64comp0)
467 return DROPFOLD; 496 return DROPFOLD;
468 return NEXTFOLD; 497 return NEXTFOLD;
469#else 498#else
470 UNUSED(J); lua_assert(0); return FAILFOLD; 499 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
471#endif 500#endif
472} 501}
473 502
@@ -481,6 +510,7 @@ LJFOLDF(kfold_snew_kptr)
481} 510}
482 511
483LJFOLD(SNEW any KINT) 512LJFOLD(SNEW any KINT)
513LJFOLD(XSNEW any KINT)
484LJFOLDF(kfold_snew_empty) 514LJFOLDF(kfold_snew_empty)
485{ 515{
486 if (fright->i == 0) 516 if (fright->i == 0)
@@ -492,7 +522,7 @@ LJFOLD(STRREF KGC KINT)
492LJFOLDF(kfold_strref) 522LJFOLDF(kfold_strref)
493{ 523{
494 GCstr *str = ir_kstr(fleft); 524 GCstr *str = ir_kstr(fleft);
495 lua_assert((MSize)fright->i <= str->len); 525 lj_assertJ((MSize)fright->i <= str->len, "bad string ref");
496 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i); 526 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
497} 527}
498 528
@@ -510,7 +540,7 @@ LJFOLDF(kfold_strref_snew)
510 PHIBARRIER(ir); 540 PHIBARRIER(ir);
511 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ 541 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
512 fins->op1 = str; 542 fins->op1 = str;
513 fins->ot = IRT(IR_STRREF, IRT_P32); 543 fins->ot = IRT(IR_STRREF, IRT_PGC);
514 return RETRYFOLD; 544 return RETRYFOLD;
515 } 545 }
516 } 546 }
@@ -528,6 +558,211 @@ LJFOLDF(kfold_strcmp)
528 return NEXTFOLD; 558 return NEXTFOLD;
529} 559}
530 560
561/* -- Constant folding and forwarding for buffers ------------------------- */
562
563/*
564** Buffer ops perform stores, but their effect is limited to the buffer
565** itself. Also, buffer ops are chained: a use of an op implies a use of
566** all other ops up the chain. Conversely, if an op is unused, all ops
567** up the chain can go unsed. This largely eliminates the need to treat
568** them as stores.
569**
570** Alas, treating them as normal (IRM_N) ops doesn't work, because they
571** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
572** or if FOLD is disabled.
573**
574** The compromise is to declare them as loads, emit them like stores and
575** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
576** fragments left over from CSE are eliminated by DCE.
577**
578** The string buffer methods emit a USE instead of a BUFSTR to keep the
579** chain alive.
580*/
581
582LJFOLD(BUFHDR any any)
583LJFOLDF(bufhdr_merge)
584{
585 return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD;
586}
587
588LJFOLD(BUFPUT any BUFSTR)
589LJFOLDF(bufput_bufstr)
590{
591 if ((J->flags & JIT_F_OPT_FWD)) {
592 IRRef hdr = fright->op2;
593 /* New buffer, no other buffer op inbetween and same buffer? */
594 if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET &&
595 fleft->prev == hdr &&
596 fleft->op1 == IR(hdr)->op1 &&
597 !(irt_isphi(fright->t) && IR(hdr)->prev) &&
598 (!LJ_HASBUFFER || J->chain[IR_CALLA] < hdr)) {
599 IRRef ref = fins->op1;
600 IR(ref)->op2 = IRBUFHDR_APPEND; /* Modify BUFHDR. */
601 IR(ref)->op1 = fright->op1;
602 return ref;
603 }
604 /* Replay puts to global temporary buffer. */
605 if (IR(hdr)->op2 == IRBUFHDR_RESET && !irt_isphi(fright->t)) {
606 IRIns *ir = IR(fright->op1);
607 /* For now only handle single string.reverse .lower .upper .rep. */
608 if (ir->o == IR_CALLL &&
609 ir->op2 >= IRCALL_lj_buf_putstr_reverse &&
610 ir->op2 <= IRCALL_lj_buf_putstr_rep) {
611 IRIns *carg1 = IR(ir->op1);
612 if (ir->op2 == IRCALL_lj_buf_putstr_rep) {
613 IRIns *carg2 = IR(carg1->op1);
614 if (carg2->op1 == hdr) {
615 return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2);
616 }
617 } else if (carg1->op1 == hdr) {
618 return lj_ir_call(J, ir->op2, fins->op1, carg1->op2);
619 }
620 }
621 }
622 }
623 return EMITFOLD; /* Always emit, CSE later. */
624}
625
626LJFOLD(BUFPUT any any)
627LJFOLDF(bufput_kgc)
628{
629 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) {
630 GCstr *s2 = ir_kstr(fright);
631 if (s2->len == 0) { /* Empty string? */
632 return LEFTFOLD;
633 } else {
634 if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) &&
635 !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */
636 GCstr *s1 = ir_kstr(IR(fleft->op2));
637 IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2));
638 /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
639 IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */
640 return fins->op1;
641 }
642 }
643 }
644 return EMITFOLD; /* Always emit, CSE later. */
645}
646
647LJFOLD(BUFSTR any any)
648LJFOLDF(bufstr_kfold_cse)
649{
650 lj_assertJ(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
651 fleft->o == IR_CALLL,
652 "bad buffer constructor IR op %d", fleft->o);
653 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
654 if (fleft->o == IR_BUFHDR) { /* No put operations? */
655 if (fleft->op2 == IRBUFHDR_RESET) /* Empty buffer? */
656 return lj_ir_kstr(J, &J2G(J)->strempty);
657 fins->op1 = fleft->op1;
658 fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */
659 return CSEFOLD;
660 } else if (fleft->o == IR_BUFPUT) {
661 IRIns *irb = IR(fleft->op1);
662 if (irb->o == IR_BUFHDR && irb->op2 == IRBUFHDR_RESET)
663 return fleft->op2; /* Shortcut for a single put operation. */
664 }
665 }
666 /* Try to CSE the whole chain. */
667 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
668 IRRef ref = J->chain[IR_BUFSTR];
669 while (ref) {
670 IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
671 while (ira->o == irb->o && ira->op2 == irb->op2) {
672 lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
673 ira->o == IR_CALLL || ira->o == IR_CARG,
674 "bad buffer constructor IR op %d", ira->o);
675 if (ira->o == IR_BUFHDR && ira->op2 == IRBUFHDR_RESET)
676 return ref; /* CSE succeeded. */
677 if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
678 break;
679 ira = IR(ira->op1);
680 irb = IR(irb->op1);
681 }
682 ref = irs->prev;
683 }
684 }
685 return EMITFOLD; /* No CSE possible. */
686}
687
688LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
689LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
690LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
691LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted)
692LJFOLDF(bufput_kfold_op)
693{
694 if (irref_isk(fleft->op2)) {
695 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
696 SBuf *sb = lj_buf_tmp_(J->L);
697 sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb,
698 ir_kstr(IR(fleft->op2)));
699 fins->o = IR_BUFPUT;
700 fins->op1 = fleft->op1;
701 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
702 return RETRYFOLD;
703 }
704 return EMITFOLD; /* Always emit, CSE later. */
705}
706
707LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep)
708LJFOLDF(bufput_kfold_rep)
709{
710 if (irref_isk(fleft->op2)) {
711 IRIns *irc = IR(fleft->op1);
712 if (irref_isk(irc->op2)) {
713 SBuf *sb = lj_buf_tmp_(J->L);
714 sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i);
715 fins->o = IR_BUFPUT;
716 fins->op1 = irc->op1;
717 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
718 return RETRYFOLD;
719 }
720 }
721 return EMITFOLD; /* Always emit, CSE later. */
722}
723
724LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint)
725LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)
726LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint)
727LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum)
728LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr)
729LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
730LJFOLDF(bufput_kfold_fmt)
731{
732 IRIns *irc = IR(fleft->op1);
733 lj_assertJ(irref_isk(irc->op2), "SFormat must be const");
734 if (irref_isk(fleft->op2)) {
735 SFormat sf = (SFormat)IR(irc->op2)->i;
736 IRIns *ira = IR(fleft->op2);
737 SBuf *sb = lj_buf_tmp_(J->L);
738 switch (fins->op2) {
739 case IRCALL_lj_strfmt_putfxint:
740 sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64);
741 break;
742 case IRCALL_lj_strfmt_putfstr:
743 sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira));
744 break;
745 case IRCALL_lj_strfmt_putfchar:
746 sb = lj_strfmt_putfchar(sb, sf, ira->i);
747 break;
748 case IRCALL_lj_strfmt_putfnum_int:
749 case IRCALL_lj_strfmt_putfnum_uint:
750 case IRCALL_lj_strfmt_putfnum:
751 default: {
752 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
753 sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf,
754 ir_knum(ira)->n);
755 break;
756 }
757 }
758 fins->o = IR_BUFPUT;
759 fins->op1 = irc->op1;
760 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
761 return RETRYFOLD;
762 }
763 return EMITFOLD; /* Always emit, CSE later. */
764}
765
531/* -- Constant folding of pointer arithmetic ------------------------------ */ 766/* -- Constant folding of pointer arithmetic ------------------------------ */
532 767
533LJFOLD(ADD KGC KINT) 768LJFOLD(ADD KGC KINT)
@@ -648,21 +883,17 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
648LJFOLDF(kfold_conv_knum_int_num) 883LJFOLDF(kfold_conv_knum_int_num)
649{ 884{
650 lua_Number n = knumleft; 885 lua_Number n = knumleft;
651 if (!(fins->op2 & IRCONV_TRUNC)) { 886 int32_t k = lj_num2int(n);
652 int32_t k = lj_num2int(n); 887 if (irt_isguard(fins->t) && n != (lua_Number)k) {
653 if (irt_isguard(fins->t) && n != (lua_Number)k) { 888 /* We're about to create a guard which always fails, like CONV +1.5.
654 /* We're about to create a guard which always fails, like CONV +1.5. 889 ** Some pathological loops cause this during LICM, e.g.:
655 ** Some pathological loops cause this during LICM, e.g.: 890 ** local x,k,t = 0,1.5,{1,[1.5]=2}
656 ** local x,k,t = 0,1.5,{1,[1.5]=2} 891 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
657 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end 892 ** assert(x == 300)
658 ** assert(x == 300) 893 */
659 */ 894 return FAILFOLD;
660 return FAILFOLD;
661 }
662 return INTFOLD(k);
663 } else {
664 return INTFOLD((int32_t)n);
665 } 895 }
896 return INTFOLD(k);
666} 897}
667 898
668LJFOLD(CONV KNUM IRCONV_U32_NUM) 899LJFOLD(CONV KNUM IRCONV_U32_NUM)
@@ -690,16 +921,18 @@ LJFOLDF(kfold_conv_knum_u64_num)
690 return INT64FOLD(lj_num2u64(knumleft)); 921 return INT64FOLD(lj_num2u64(knumleft));
691} 922}
692 923
693LJFOLD(TOSTR KNUM) 924LJFOLD(TOSTR KNUM any)
694LJFOLDF(kfold_tostr_knum) 925LJFOLDF(kfold_tostr_knum)
695{ 926{
696 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); 927 return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft)));
697} 928}
698 929
699LJFOLD(TOSTR KINT) 930LJFOLD(TOSTR KINT any)
700LJFOLDF(kfold_tostr_kint) 931LJFOLDF(kfold_tostr_kint)
701{ 932{
702 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); 933 return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ?
934 lj_strfmt_int(J->L, fleft->i) :
935 lj_strfmt_char(J->L, fleft->i));
703} 936}
704 937
705LJFOLD(STRTO KGC) 938LJFOLD(STRTO KGC)
@@ -747,13 +980,13 @@ LJFOLDF(shortcut_round)
747 return NEXTFOLD; 980 return NEXTFOLD;
748} 981}
749 982
750LJFOLD(ABS ABS KNUM) 983LJFOLD(ABS ABS FLOAD)
751LJFOLDF(shortcut_left) 984LJFOLDF(shortcut_left)
752{ 985{
753 return LEFTFOLD; /* f(g(x)) ==> g(x) */ 986 return LEFTFOLD; /* f(g(x)) ==> g(x) */
754} 987}
755 988
756LJFOLD(ABS NEG KNUM) 989LJFOLD(ABS NEG FLOAD)
757LJFOLDF(shortcut_dropleft) 990LJFOLDF(shortcut_dropleft)
758{ 991{
759 PHIBARRIER(fleft); 992 PHIBARRIER(fleft);
@@ -833,8 +1066,10 @@ LJFOLDF(simplify_nummuldiv_k)
833 if (n == 1.0) { /* x o 1 ==> x */ 1066 if (n == 1.0) { /* x o 1 ==> x */
834 return LEFTFOLD; 1067 return LEFTFOLD;
835 } else if (n == -1.0) { /* x o -1 ==> -x */ 1068 } else if (n == -1.0) { /* x o -1 ==> -x */
1069 IRRef op1 = fins->op1;
1070 fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */
1071 fins->op1 = op1;
836 fins->o = IR_NEG; 1072 fins->o = IR_NEG;
837 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
838 return RETRYFOLD; 1073 return RETRYFOLD;
839 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ 1074 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
840 fins->o = IR_ADD; 1075 fins->o = IR_ADD;
@@ -874,52 +1109,17 @@ LJFOLDF(simplify_nummuldiv_negneg)
874 return RETRYFOLD; 1109 return RETRYFOLD;
875} 1110}
876 1111
877LJFOLD(POW any KINT) 1112LJFOLD(POW any KNUM)
878LJFOLDF(simplify_numpow_xk) 1113LJFOLDF(simplify_numpow_k)
879{ 1114{
880 int32_t k = fright->i; 1115 if (knumright == 0.0) /* x ^ 0 ==> 1 */
881 TRef ref = fins->op1;
882 if (k == 0) /* x ^ 0 ==> 1 */
883 return lj_ir_knum_one(J); /* Result must be a number, not an int. */ 1116 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
884 if (k == 1) /* x ^ 1 ==> x */ 1117 else if (knumright == 1.0) /* x ^ 1 ==> x */
885 return LEFTFOLD; 1118 return LEFTFOLD;
886 if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */ 1119 else if (knumright == 2.0) /* x ^ 2 ==> x * x */
1120 return emitir(IRTN(IR_MUL), fins->op1, fins->op1);
1121 else
887 return NEXTFOLD; 1122 return NEXTFOLD;
888 if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
889 ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
890 k = -k;
891 }
892 /* Unroll x^k for 1 <= k <= 65536. */
893 for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
894 ref = emitir(IRTN(IR_MUL), ref, ref);
895 if ((k >>= 1) != 0) { /* Handle trailing bits. */
896 TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
897 for (; k != 1; k >>= 1) {
898 if (k & 1)
899 ref = emitir(IRTN(IR_MUL), ref, tmp);
900 tmp = emitir(IRTN(IR_MUL), tmp, tmp);
901 }
902 ref = emitir(IRTN(IR_MUL), ref, tmp);
903 }
904 return ref;
905}
906
907LJFOLD(POW KNUM any)
908LJFOLDF(simplify_numpow_kx)
909{
910 lua_Number n = knumleft;
911 if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
912 fins->o = IR_CONV;
913#if LJ_TARGET_X86ORX64
914 fins->op1 = fins->op2;
915 fins->op2 = IRCONV_NUM_INT;
916 fins->op2 = (IRRef1)lj_opt_fold(J);
917#endif
918 fins->op1 = (IRRef1)lj_ir_knum_one(J);
919 fins->o = IR_LDEXP;
920 return RETRYFOLD;
921 }
922 return NEXTFOLD;
923} 1123}
924 1124
925/* -- Simplify conversions ------------------------------------------------ */ 1125/* -- Simplify conversions ------------------------------------------------ */
@@ -1004,10 +1204,10 @@ LJFOLDF(simplify_tobit_conv)
1004{ 1204{
1005 /* Fold even across PHI to avoid expensive num->int conversions in loop. */ 1205 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1006 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) { 1206 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
1007 lua_assert(irt_isnum(fleft->t)); 1207 lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
1008 return fleft->op1; 1208 return fleft->op1;
1009 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { 1209 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1010 lua_assert(irt_isnum(fleft->t)); 1210 lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
1011 fins->o = IR_CONV; 1211 fins->o = IR_CONV;
1012 fins->op1 = fleft->op1; 1212 fins->op1 = fleft->op1;
1013 fins->op2 = (IRT_INT<<5)|IRT_U32; 1213 fins->op2 = (IRT_INT<<5)|IRT_U32;
@@ -1047,7 +1247,7 @@ LJFOLDF(simplify_conv_sext)
1047 /* Use scalar evolution analysis results to strength-reduce sign-extension. */ 1247 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
1048 if (ref == J->scev.idx) { 1248 if (ref == J->scev.idx) {
1049 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; 1249 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
1050 lua_assert(irt_isint(J->scev.t)); 1250 lj_assertJ(irt_isint(J->scev.t), "only int SCEV supported");
1051 if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { 1251 if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) {
1052 ok_reduce: 1252 ok_reduce:
1053#if LJ_TARGET_X64 1253#if LJ_TARGET_X64
@@ -1078,6 +1278,10 @@ LJFOLD(CONV SUB IRCONV_U32_U64)
1078LJFOLD(CONV MUL IRCONV_U32_U64) 1278LJFOLD(CONV MUL IRCONV_U32_U64)
1079LJFOLDF(simplify_conv_narrow) 1279LJFOLDF(simplify_conv_narrow)
1080{ 1280{
1281#if LJ_64
1282 UNUSED(J);
1283 return NEXTFOLD;
1284#else
1081 IROp op = (IROp)fleft->o; 1285 IROp op = (IROp)fleft->o;
1082 IRType t = irt_type(fins->t); 1286 IRType t = irt_type(fins->t);
1083 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2; 1287 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
@@ -1088,6 +1292,7 @@ LJFOLDF(simplify_conv_narrow)
1088 fins->op1 = op1; 1292 fins->op1 = op1;
1089 fins->op2 = op2; 1293 fins->op2 = op2;
1090 return RETRYFOLD; 1294 return RETRYFOLD;
1295#endif
1091} 1296}
1092 1297
1093/* Special CSE rule for CONV. */ 1298/* Special CSE rule for CONV. */
@@ -1123,7 +1328,8 @@ LJFOLDF(narrow_convert)
1123 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */ 1328 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1124 if (J->chain[IR_LOOP]) 1329 if (J->chain[IR_LOOP])
1125 return NEXTFOLD; 1330 return NEXTFOLD;
1126 lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT); 1331 lj_assertJ(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT,
1332 "unexpected CONV TOBIT");
1127 return lj_opt_narrow_convert(J); 1333 return lj_opt_narrow_convert(J);
1128} 1334}
1129 1335
@@ -1201,7 +1407,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1201 ** But this is mainly intended for simple address arithmetic. 1407 ** But this is mainly intended for simple address arithmetic.
1202 ** Also it's easier for the backend to optimize the original multiplies. 1408 ** Also it's easier for the backend to optimize the original multiplies.
1203 */ 1409 */
1204 if (k == 1) { /* i * 1 ==> i */ 1410 if (k == 0) { /* i * 0 ==> 0 */
1411 return RIGHTFOLD;
1412 } else if (k == 1) { /* i * 1 ==> i */
1205 return LEFTFOLD; 1413 return LEFTFOLD;
1206 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ 1414 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1207 fins->o = IR_BSHL; 1415 fins->o = IR_BSHL;
@@ -1214,9 +1422,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1214LJFOLD(MUL any KINT) 1422LJFOLD(MUL any KINT)
1215LJFOLDF(simplify_intmul_k32) 1423LJFOLDF(simplify_intmul_k32)
1216{ 1424{
1217 if (fright->i == 0) /* i * 0 ==> 0 */ 1425 if (fright->i >= 0)
1218 return INTFOLD(0);
1219 else if (fright->i > 0)
1220 return simplify_intmul_k(J, fright->i); 1426 return simplify_intmul_k(J, fright->i);
1221 return NEXTFOLD; 1427 return NEXTFOLD;
1222} 1428}
@@ -1224,21 +1430,20 @@ LJFOLDF(simplify_intmul_k32)
1224LJFOLD(MUL any KINT64) 1430LJFOLD(MUL any KINT64)
1225LJFOLDF(simplify_intmul_k64) 1431LJFOLDF(simplify_intmul_k64)
1226{ 1432{
1227 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ 1433#if LJ_HASFFI
1228 return INT64FOLD(0); 1434 if (ir_kint64(fright)->u64 < 0x80000000u)
1229#if LJ_64
1230 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1231 else if (ir_kint64(fright)->u64 < 0x80000000u)
1232 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); 1435 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1233#endif
1234 return NEXTFOLD; 1436 return NEXTFOLD;
1437#else
1438 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1439#endif
1235} 1440}
1236 1441
1237LJFOLD(MOD any KINT) 1442LJFOLD(MOD any KINT)
1238LJFOLDF(simplify_intmod_k) 1443LJFOLDF(simplify_intmod_k)
1239{ 1444{
1240 int32_t k = fright->i; 1445 int32_t k = fright->i;
1241 lua_assert(k != 0); 1446 lj_assertJ(k != 0, "integer mod 0");
1242 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */ 1447 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1243 fins->o = IR_BAND; 1448 fins->o = IR_BAND;
1244 fins->op2 = lj_ir_kint(J, k-1); 1449 fins->op2 = lj_ir_kint(J, k-1);
@@ -1487,6 +1692,15 @@ LJFOLDF(simplify_shiftk_andk)
1487 fins->op2 = (IRRef1)lj_ir_kint(J, k); 1692 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1488 fins->ot = IRTI(IR_BAND); 1693 fins->ot = IRTI(IR_BAND);
1489 return RETRYFOLD; 1694 return RETRYFOLD;
1695 } else if (irk->o == IR_KINT64) {
1696 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, fright->i,
1697 (IROp)fins->o);
1698 IROpT ot = fleft->ot;
1699 fins->op1 = fleft->op1;
1700 fins->op1 = (IRRef1)lj_opt_fold(J);
1701 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1702 fins->ot = ot;
1703 return RETRYFOLD;
1490 } 1704 }
1491 return NEXTFOLD; 1705 return NEXTFOLD;
1492} 1706}
@@ -1502,6 +1716,47 @@ LJFOLDF(simplify_andk_shiftk)
1502 return NEXTFOLD; 1716 return NEXTFOLD;
1503} 1717}
1504 1718
1719LJFOLD(BAND BOR KINT)
1720LJFOLD(BOR BAND KINT)
1721LJFOLDF(simplify_andor_k)
1722{
1723 IRIns *irk = IR(fleft->op2);
1724 PHIBARRIER(fleft);
1725 if (irk->o == IR_KINT) {
1726 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1727 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1728 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1729 if (k == (fins->o == IR_BAND ? 0 : -1)) {
1730 fins->op1 = fleft->op1;
1731 return RETRYFOLD;
1732 }
1733 }
1734 return NEXTFOLD;
1735}
1736
1737LJFOLD(BAND BOR KINT64)
1738LJFOLD(BOR BAND KINT64)
1739LJFOLDF(simplify_andor_k64)
1740{
1741#if LJ_HASFFI
1742 IRIns *irk = IR(fleft->op2);
1743 PHIBARRIER(fleft);
1744 if (irk->o == IR_KINT64) {
1745 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
1746 (IROp)fins->o);
1747 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1748 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1749 if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) {
1750 fins->op1 = fleft->op1;
1751 return RETRYFOLD;
1752 }
1753 }
1754 return NEXTFOLD;
1755#else
1756 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1757#endif
1758}
1759
1505/* -- Reassociation ------------------------------------------------------- */ 1760/* -- Reassociation ------------------------------------------------------- */
1506 1761
1507LJFOLD(ADD ADD KINT) 1762LJFOLD(ADD ADD KINT)
@@ -1531,11 +1786,11 @@ LJFOLD(BOR BOR KINT64)
1531LJFOLD(BXOR BXOR KINT64) 1786LJFOLD(BXOR BXOR KINT64)
1532LJFOLDF(reassoc_intarith_k64) 1787LJFOLDF(reassoc_intarith_k64)
1533{ 1788{
1534#if LJ_HASFFI || LJ_64 1789#if LJ_HASFFI
1535 IRIns *irk = IR(fleft->op2); 1790 IRIns *irk = IR(fleft->op2);
1536 if (irk->o == IR_KINT64) { 1791 if (irk->o == IR_KINT64) {
1537 uint64_t k = kfold_int64arith(ir_k64(irk)->u64, 1792 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
1538 ir_k64(fright)->u64, (IROp)fins->o); 1793 (IROp)fins->o);
1539 PHIBARRIER(fleft); 1794 PHIBARRIER(fleft);
1540 fins->op1 = fleft->op1; 1795 fins->op1 = fleft->op1;
1541 fins->op2 = (IRRef1)lj_ir_kint64(J, k); 1796 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
@@ -1543,12 +1798,10 @@ LJFOLDF(reassoc_intarith_k64)
1543 } 1798 }
1544 return NEXTFOLD; 1799 return NEXTFOLD;
1545#else 1800#else
1546 UNUSED(J); lua_assert(0); return FAILFOLD; 1801 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1547#endif 1802#endif
1548} 1803}
1549 1804
1550LJFOLD(MIN MIN any)
1551LJFOLD(MAX MAX any)
1552LJFOLD(BAND BAND any) 1805LJFOLD(BAND BAND any)
1553LJFOLD(BOR BOR any) 1806LJFOLD(BOR BOR any)
1554LJFOLDF(reassoc_dup) 1807LJFOLDF(reassoc_dup)
@@ -1558,6 +1811,15 @@ LJFOLDF(reassoc_dup)
1558 return NEXTFOLD; 1811 return NEXTFOLD;
1559} 1812}
1560 1813
1814LJFOLD(MIN MIN any)
1815LJFOLD(MAX MAX any)
1816LJFOLDF(reassoc_dup_minmax)
1817{
1818 if (fins->op2 == fleft->op2)
1819 return LEFTFOLD; /* (a o b) o b ==> a o b */
1820 return NEXTFOLD;
1821}
1822
1561LJFOLD(BXOR BXOR any) 1823LJFOLD(BXOR BXOR any)
1562LJFOLDF(reassoc_bxor) 1824LJFOLDF(reassoc_bxor)
1563{ 1825{
@@ -1596,23 +1858,12 @@ LJFOLDF(reassoc_shift)
1596 return NEXTFOLD; 1858 return NEXTFOLD;
1597} 1859}
1598 1860
1599LJFOLD(MIN MIN KNUM)
1600LJFOLD(MAX MAX KNUM)
1601LJFOLD(MIN MIN KINT) 1861LJFOLD(MIN MIN KINT)
1602LJFOLD(MAX MAX KINT) 1862LJFOLD(MAX MAX KINT)
1603LJFOLDF(reassoc_minmax_k) 1863LJFOLDF(reassoc_minmax_k)
1604{ 1864{
1605 IRIns *irk = IR(fleft->op2); 1865 IRIns *irk = IR(fleft->op2);
1606 if (irk->o == IR_KNUM) { 1866 if (irk->o == IR_KINT) {
1607 lua_Number a = ir_knum(irk)->n;
1608 lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD);
1609 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1610 return LEFTFOLD;
1611 PHIBARRIER(fleft);
1612 fins->op1 = fleft->op1;
1613 fins->op2 = (IRRef1)lj_ir_knum(J, y);
1614 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1615 } else if (irk->o == IR_KINT) {
1616 int32_t a = irk->i; 1867 int32_t a = irk->i;
1617 int32_t y = kfold_intop(a, fright->i, fins->o); 1868 int32_t y = kfold_intop(a, fright->i, fins->o);
1618 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ 1869 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
@@ -1625,24 +1876,6 @@ LJFOLDF(reassoc_minmax_k)
1625 return NEXTFOLD; 1876 return NEXTFOLD;
1626} 1877}
1627 1878
1628LJFOLD(MIN MAX any)
1629LJFOLD(MAX MIN any)
1630LJFOLDF(reassoc_minmax_left)
1631{
1632 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1633 return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
1634 return NEXTFOLD;
1635}
1636
1637LJFOLD(MIN any MAX)
1638LJFOLD(MAX any MIN)
1639LJFOLDF(reassoc_minmax_right)
1640{
1641 if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
1642 return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
1643 return NEXTFOLD;
1644}
1645
1646/* -- Array bounds check elimination -------------------------------------- */ 1879/* -- Array bounds check elimination -------------------------------------- */
1647 1880
1648/* Eliminate ABC across PHIs to handle t[i-1] forwarding case. 1881/* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
@@ -1769,8 +2002,6 @@ LJFOLDF(comm_comp)
1769 2002
1770LJFOLD(BAND any any) 2003LJFOLD(BAND any any)
1771LJFOLD(BOR any any) 2004LJFOLD(BOR any any)
1772LJFOLD(MIN any any)
1773LJFOLD(MAX any any)
1774LJFOLDF(comm_dup) 2005LJFOLDF(comm_dup)
1775{ 2006{
1776 if (fins->op1 == fins->op2) /* x o x ==> x */ 2007 if (fins->op1 == fins->op2) /* x o x ==> x */
@@ -1778,6 +2009,15 @@ LJFOLDF(comm_dup)
1778 return fold_comm_swap(J); 2009 return fold_comm_swap(J);
1779} 2010}
1780 2011
2012LJFOLD(MIN any any)
2013LJFOLD(MAX any any)
2014LJFOLDF(comm_dup_minmax)
2015{
2016 if (fins->op1 == fins->op2) /* x o x ==> x */
2017 return LEFTFOLD;
2018 return NEXTFOLD;
2019}
2020
1781LJFOLD(BXOR any any) 2021LJFOLD(BXOR any any)
1782LJFOLDF(comm_bxor) 2022LJFOLDF(comm_bxor)
1783{ 2023{
@@ -1814,7 +2054,7 @@ LJFOLDF(merge_eqne_snew_kgc)
1814{ 2054{
1815 GCstr *kstr = ir_kstr(fright); 2055 GCstr *kstr = ir_kstr(fright);
1816 int32_t len = (int32_t)kstr->len; 2056 int32_t len = (int32_t)kstr->len;
1817 lua_assert(irt_isstr(fins->t)); 2057 lj_assertJ(irt_isstr(fins->t), "bad equality IR type");
1818 2058
1819#if LJ_TARGET_UNALIGNED 2059#if LJ_TARGET_UNALIGNED
1820#define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */ 2060#define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
@@ -1878,7 +2118,7 @@ LJFOLD(HLOAD KKPTR)
1878LJFOLDF(kfold_hload_kkptr) 2118LJFOLDF(kfold_hload_kkptr)
1879{ 2119{
1880 UNUSED(J); 2120 UNUSED(J);
1881 lua_assert(ir_kptr(fleft) == niltvg(J2G(J))); 2121 lj_assertJ(ir_kptr(fleft) == niltvg(J2G(J)), "expected niltv");
1882 return TREF_NIL; 2122 return TREF_NIL;
1883} 2123}
1884 2124
@@ -1888,8 +2128,8 @@ LJFOLDX(lj_opt_fwd_hload)
1888LJFOLD(ULOAD any) 2128LJFOLD(ULOAD any)
1889LJFOLDX(lj_opt_fwd_uload) 2129LJFOLDX(lj_opt_fwd_uload)
1890 2130
1891LJFOLD(CALLL any IRCALL_lj_tab_len) 2131LJFOLD(ALEN any any)
1892LJFOLDX(lj_opt_fwd_tab_len) 2132LJFOLDX(lj_opt_fwd_alen)
1893 2133
1894/* Upvalue refs are really loads, but there are no corresponding stores. 2134/* Upvalue refs are really loads, but there are no corresponding stores.
1895** So CSE is ok for them, except for UREFO across a GC step (see below). 2135** So CSE is ok for them, except for UREFO across a GC step (see below).
@@ -1950,6 +2190,7 @@ LJFOLDF(fwd_href_tdup)
1950** an aliased table, as it may invalidate all of the pointers and fields. 2190** an aliased table, as it may invalidate all of the pointers and fields.
1951** Only HREF needs the NEWREF check -- AREF and HREFK already depend on 2191** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1952** FLOADs. And NEWREF itself is treated like a store (see below). 2192** FLOADs. And NEWREF itself is treated like a store (see below).
2193** LREF is constant (per trace) since coroutine switches are not inlined.
1953*/ 2194*/
1954LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) 2195LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1955LJFOLDF(fload_tab_tnew_asize) 2196LJFOLDF(fload_tab_tnew_asize)
@@ -2013,6 +2254,35 @@ LJFOLDF(fload_str_len_snew)
2013 return NEXTFOLD; 2254 return NEXTFOLD;
2014} 2255}
2015 2256
2257LJFOLD(FLOAD TOSTR IRFL_STR_LEN)
2258LJFOLDF(fload_str_len_tostr)
2259{
2260 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR)
2261 return INTFOLD(1);
2262 return NEXTFOLD;
2263}
2264
2265LJFOLD(FLOAD any IRFL_SBUF_W)
2266LJFOLD(FLOAD any IRFL_SBUF_E)
2267LJFOLD(FLOAD any IRFL_SBUF_B)
2268LJFOLD(FLOAD any IRFL_SBUF_L)
2269LJFOLD(FLOAD any IRFL_SBUF_REF)
2270LJFOLD(FLOAD any IRFL_SBUF_R)
2271LJFOLDF(fload_sbuf)
2272{
2273 TRef tr = lj_opt_fwd_fload(J);
2274 return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD;
2275}
2276
2277/* The fast function ID of function objects is immutable. */
2278LJFOLD(FLOAD KGC IRFL_FUNC_FFID)
2279LJFOLDF(fload_func_ffid_kgc)
2280{
2281 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2282 return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid);
2283 return NEXTFOLD;
2284}
2285
2016/* The C type ID of cdata objects is immutable. */ 2286/* The C type ID of cdata objects is immutable. */
2017LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) 2287LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
2018LJFOLDF(fload_cdata_typeid_kgc) 2288LJFOLDF(fload_cdata_typeid_kgc)
@@ -2059,6 +2329,8 @@ LJFOLDF(fload_cdata_ptr_int64_cnew)
2059} 2329}
2060 2330
2061LJFOLD(FLOAD any IRFL_STR_LEN) 2331LJFOLD(FLOAD any IRFL_STR_LEN)
2332LJFOLD(FLOAD any IRFL_FUNC_ENV)
2333LJFOLD(FLOAD any IRFL_THREAD_ENV)
2062LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) 2334LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2063LJFOLD(FLOAD any IRFL_CDATA_PTR) 2335LJFOLD(FLOAD any IRFL_CDATA_PTR)
2064LJFOLD(FLOAD any IRFL_CDATA_INT) 2336LJFOLD(FLOAD any IRFL_CDATA_INT)
@@ -2078,7 +2350,7 @@ LJFOLDF(fwd_sload)
2078 TRef tr = lj_opt_cse(J); 2350 TRef tr = lj_opt_cse(J);
2079 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr; 2351 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
2080 } else { 2352 } else {
2081 lua_assert(J->slot[fins->op1] != 0); 2353 lj_assertJ(J->slot[fins->op1] != 0, "uninitialized slot accessed");
2082 return J->slot[fins->op1]; 2354 return J->slot[fins->op1];
2083 } 2355 }
2084} 2356}
@@ -2135,6 +2407,17 @@ LJFOLDF(barrier_tnew_tdup)
2135 return DROPFOLD; 2407 return DROPFOLD;
2136} 2408}
2137 2409
2410/* -- Profiling ----------------------------------------------------------- */
2411
2412LJFOLD(PROF any any)
2413LJFOLDF(prof)
2414{
2415 IRRef ref = J->chain[IR_PROF];
2416 if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */
2417 return ref;
2418 return EMITFOLD;
2419}
2420
2138/* -- Stores and allocations ---------------------------------------------- */ 2421/* -- Stores and allocations ---------------------------------------------- */
2139 2422
2140/* Stores and allocations cannot be folded or passed on to CSE in general. 2423/* Stores and allocations cannot be folded or passed on to CSE in general.
@@ -2157,8 +2440,10 @@ LJFOLD(XSTORE any any)
2157LJFOLDX(lj_opt_dse_xstore) 2440LJFOLDX(lj_opt_dse_xstore)
2158 2441
2159LJFOLD(NEWREF any any) /* Treated like a store. */ 2442LJFOLD(NEWREF any any) /* Treated like a store. */
2160LJFOLD(CALLS any any) 2443LJFOLD(TMPREF any any)
2444LJFOLD(CALLA any any)
2161LJFOLD(CALLL any any) /* Safeguard fallback. */ 2445LJFOLD(CALLL any any) /* Safeguard fallback. */
2446LJFOLD(CALLS any any)
2162LJFOLD(CALLXS any any) 2447LJFOLD(CALLXS any any)
2163LJFOLD(XBAR) 2448LJFOLD(XBAR)
2164LJFOLD(RETF any any) /* Modifies BASE. */ 2449LJFOLD(RETF any any) /* Modifies BASE. */
@@ -2191,8 +2476,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
2191 IRRef ref; 2476 IRRef ref;
2192 2477
2193 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) { 2478 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
2194 lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | 2479 lj_assertJ(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
2195 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT); 2480 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT,
2481 "bad JIT_F_OPT_DEFAULT");
2196 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */ 2482 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2197 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N) 2483 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
2198 return lj_opt_cse(J); 2484 return lj_opt_cse(J);
@@ -2217,10 +2503,14 @@ retry:
2217 if (fins->op1 >= J->cur.nk) { 2503 if (fins->op1 >= J->cur.nk) {
2218 key += (uint32_t)IR(fins->op1)->o << 10; 2504 key += (uint32_t)IR(fins->op1)->o << 10;
2219 *fleft = *IR(fins->op1); 2505 *fleft = *IR(fins->op1);
2506 if (fins->op1 < REF_TRUE)
2507 fleft[1] = IR(fins->op1)[1];
2220 } 2508 }
2221 if (fins->op2 >= J->cur.nk) { 2509 if (fins->op2 >= J->cur.nk) {
2222 key += (uint32_t)IR(fins->op2)->o; 2510 key += (uint32_t)IR(fins->op2)->o;
2223 *fright = *IR(fins->op2); 2511 *fright = *IR(fins->op2);
2512 if (fins->op2 < REF_TRUE)
2513 fright[1] = IR(fins->op2)[1];
2224 } else { 2514 } else {
2225 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ 2515 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2226 } 2516 }
@@ -2250,7 +2540,7 @@ retry:
2250 return lj_ir_kint(J, fins->i); 2540 return lj_ir_kint(J, fins->i);
2251 if (ref == FAILFOLD) 2541 if (ref == FAILFOLD)
2252 lj_trace_err(J, LJ_TRERR_GFAIL); 2542 lj_trace_err(J, LJ_TRERR_GFAIL);
2253 lua_assert(ref == DROPFOLD); 2543 lj_assertJ(ref == DROPFOLD, "bad fold result");
2254 return REF_DROP; 2544 return REF_DROP;
2255} 2545}
2256 2546