aboutsummaryrefslogtreecommitdiff
path: root/src/lj_opt_fold.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_opt_fold.c')
-rw-r--r--src/lj_opt_fold.c590
1 files changed, 462 insertions, 128 deletions
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 82ed2d32..e3fe8bbf 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -14,18 +14,21 @@
14 14
15#if LJ_HASJIT 15#if LJ_HASJIT
16 16
17#include "lj_buf.h"
17#include "lj_str.h" 18#include "lj_str.h"
18#include "lj_tab.h" 19#include "lj_tab.h"
19#include "lj_ir.h" 20#include "lj_ir.h"
20#include "lj_jit.h" 21#include "lj_jit.h"
22#include "lj_ircall.h"
21#include "lj_iropt.h" 23#include "lj_iropt.h"
22#include "lj_trace.h" 24#include "lj_trace.h"
23#if LJ_HASFFI 25#if LJ_HASFFI
24#include "lj_ctype.h" 26#include "lj_ctype.h"
25#endif
26#include "lj_carith.h" 27#include "lj_carith.h"
28#endif
27#include "lj_vm.h" 29#include "lj_vm.h"
28#include "lj_strscan.h" 30#include "lj_strscan.h"
31#include "lj_strfmt.h"
29 32
30/* Here's a short description how the FOLD engine processes instructions: 33/* Here's a short description how the FOLD engine processes instructions:
31** 34**
@@ -133,8 +136,8 @@
133/* Some local macros to save typing. Undef'd at the end. */ 136/* Some local macros to save typing. Undef'd at the end. */
134#define IR(ref) (&J->cur.ir[(ref)]) 137#define IR(ref) (&J->cur.ir[(ref)])
135#define fins (&J->fold.ins) 138#define fins (&J->fold.ins)
136#define fleft (&J->fold.left) 139#define fleft (J->fold.left)
137#define fright (&J->fold.right) 140#define fright (J->fold.right)
138#define knumleft (ir_knum(fleft)->n) 141#define knumleft (ir_knum(fleft)->n)
139#define knumright (ir_knum(fright)->n) 142#define knumright (ir_knum(fright)->n)
140 143
@@ -155,13 +158,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
155 158
156/* Barrier to prevent folding across a GC step. 159/* Barrier to prevent folding across a GC step.
157** GC steps can only happen at the head of a trace and at LOOP. 160** GC steps can only happen at the head of a trace and at LOOP.
158** And the GC is only driven forward if there is at least one allocation. 161** And the GC is only driven forward if there's at least one allocation.
159*/ 162*/
160#define gcstep_barrier(J, ref) \ 163#define gcstep_barrier(J, ref) \
161 ((ref) < J->chain[IR_LOOP] && \ 164 ((ref) < J->chain[IR_LOOP] && \
162 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ 165 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
163 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ 166 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
164 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) 167 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
168 J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA]))
165 169
166/* -- Constant folding for FP numbers ------------------------------------- */ 170/* -- Constant folding for FP numbers ------------------------------------- */
167 171
@@ -169,9 +173,6 @@ LJFOLD(ADD KNUM KNUM)
169LJFOLD(SUB KNUM KNUM) 173LJFOLD(SUB KNUM KNUM)
170LJFOLD(MUL KNUM KNUM) 174LJFOLD(MUL KNUM KNUM)
171LJFOLD(DIV KNUM KNUM) 175LJFOLD(DIV KNUM KNUM)
172LJFOLD(NEG KNUM KNUM)
173LJFOLD(ABS KNUM KNUM)
174LJFOLD(ATAN2 KNUM KNUM)
175LJFOLD(LDEXP KNUM KNUM) 176LJFOLD(LDEXP KNUM KNUM)
176LJFOLD(MIN KNUM KNUM) 177LJFOLD(MIN KNUM KNUM)
177LJFOLD(MAX KNUM KNUM) 178LJFOLD(MAX KNUM KNUM)
@@ -183,6 +184,15 @@ LJFOLDF(kfold_numarith)
183 return lj_ir_knum(J, y); 184 return lj_ir_knum(J, y);
184} 185}
185 186
187LJFOLD(NEG KNUM FLOAD)
188LJFOLD(ABS KNUM FLOAD)
189LJFOLDF(kfold_numabsneg)
190{
191 lua_Number a = knumleft;
192 lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD);
193 return lj_ir_knum(J, y);
194}
195
186LJFOLD(LDEXP KNUM KINT) 196LJFOLD(LDEXP KNUM KINT)
187LJFOLDF(kfold_ldexp) 197LJFOLDF(kfold_ldexp)
188{ 198{
@@ -202,11 +212,36 @@ LJFOLDF(kfold_fpmath)
202 return lj_ir_knum(J, y); 212 return lj_ir_knum(J, y);
203} 213}
204 214
215LJFOLD(CALLN KNUM any)
216LJFOLDF(kfold_fpcall1)
217{
218 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
219 if (CCI_TYPE(ci) == IRT_NUM) {
220 double y = ((double (*)(double))ci->func)(knumleft);
221 return lj_ir_knum(J, y);
222 }
223 return NEXTFOLD;
224}
225
226LJFOLD(CALLN CARG IRCALL_atan2)
227LJFOLDF(kfold_fpcall2)
228{
229 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
230 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
231 double a = ir_knum(IR(fleft->op1))->n;
232 double b = ir_knum(IR(fleft->op2))->n;
233 double y = ((double (*)(double, double))ci->func)(a, b);
234 return lj_ir_knum(J, y);
235 }
236 return NEXTFOLD;
237}
238
205LJFOLD(POW KNUM KINT) 239LJFOLD(POW KNUM KINT)
240LJFOLD(POW KNUM KNUM)
206LJFOLDF(kfold_numpow) 241LJFOLDF(kfold_numpow)
207{ 242{
208 lua_Number a = knumleft; 243 lua_Number a = knumleft;
209 lua_Number b = (lua_Number)fright->i; 244 lua_Number b = fright->o == IR_KINT ? (lua_Number)fright->i : knumright;
210 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD); 245 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
211 return lj_ir_knum(J, y); 246 return lj_ir_knum(J, y);
212} 247}
@@ -247,7 +282,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
247 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break; 282 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
248 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break; 283 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
249 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break; 284 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
250 default: lua_assert(0); break; 285 default: lj_assertX(0, "bad IR op %d", op); break;
251 } 286 }
252 return k1; 287 return k1;
253} 288}
@@ -319,7 +354,7 @@ LJFOLDF(kfold_intcomp)
319 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b); 354 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
320 case IR_ABC: 355 case IR_ABC:
321 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b); 356 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
322 default: lua_assert(0); return FAILFOLD; 357 default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
323 } 358 }
324} 359}
325 360
@@ -333,21 +368,29 @@ LJFOLDF(kfold_intcomp0)
333 368
334/* -- Constant folding for 64 bit integers -------------------------------- */ 369/* -- Constant folding for 64 bit integers -------------------------------- */
335 370
336static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) 371static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
372 IROp op)
337{ 373{
374 UNUSED(J);
375#if LJ_HASFFI
338 switch (op) { 376 switch (op) {
339#if LJ_64 || LJ_HASFFI
340 case IR_ADD: k1 += k2; break; 377 case IR_ADD: k1 += k2; break;
341 case IR_SUB: k1 -= k2; break; 378 case IR_SUB: k1 -= k2; break;
342#endif
343#if LJ_HASFFI
344 case IR_MUL: k1 *= k2; break; 379 case IR_MUL: k1 *= k2; break;
345 case IR_BAND: k1 &= k2; break; 380 case IR_BAND: k1 &= k2; break;
346 case IR_BOR: k1 |= k2; break; 381 case IR_BOR: k1 |= k2; break;
347 case IR_BXOR: k1 ^= k2; break; 382 case IR_BXOR: k1 ^= k2; break;
348#endif 383 case IR_BSHL: k1 <<= (k2 & 63); break;
349 default: UNUSED(k2); lua_assert(0); break; 384 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break;
385 case IR_BSAR: k1 >>= (k2 & 63); break;
386 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break;
387 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break;
388 default: lj_assertJ(0, "bad IR op %d", op); break;
350 } 389 }
390#else
391 UNUSED(k2); UNUSED(op);
392 lj_assertJ(0, "FFI IR op without FFI");
393#endif
351 return k1; 394 return k1;
352} 395}
353 396
@@ -359,7 +402,7 @@ LJFOLD(BOR KINT64 KINT64)
359LJFOLD(BXOR KINT64 KINT64) 402LJFOLD(BXOR KINT64 KINT64)
360LJFOLDF(kfold_int64arith) 403LJFOLDF(kfold_int64arith)
361{ 404{
362 return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64, 405 return INT64FOLD(kfold_int64arith(J, ir_k64(fleft)->u64,
363 ir_k64(fright)->u64, (IROp)fins->o)); 406 ir_k64(fright)->u64, (IROp)fins->o));
364} 407}
365 408
@@ -381,7 +424,7 @@ LJFOLDF(kfold_int64arith2)
381 } 424 }
382 return INT64FOLD(k1); 425 return INT64FOLD(k1);
383#else 426#else
384 UNUSED(J); lua_assert(0); return FAILFOLD; 427 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
385#endif 428#endif
386} 429}
387 430
@@ -392,22 +435,12 @@ LJFOLD(BROL KINT64 KINT)
392LJFOLD(BROR KINT64 KINT) 435LJFOLD(BROR KINT64 KINT)
393LJFOLDF(kfold_int64shift) 436LJFOLDF(kfold_int64shift)
394{ 437{
395#if LJ_HASFFI || LJ_64 438#if LJ_HASFFI
396 uint64_t k = ir_k64(fleft)->u64; 439 uint64_t k = ir_k64(fleft)->u64;
397 int32_t sh = (fright->i & 63); 440 int32_t sh = (fright->i & 63);
398 switch ((IROp)fins->o) { 441 return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
399 case IR_BSHL: k <<= sh; break;
400#if LJ_HASFFI
401 case IR_BSHR: k >>= sh; break;
402 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
403 case IR_BROL: k = lj_rol(k, sh); break;
404 case IR_BROR: k = lj_ror(k, sh); break;
405#endif
406 default: lua_assert(0); break;
407 }
408 return INT64FOLD(k);
409#else 442#else
410 UNUSED(J); lua_assert(0); return FAILFOLD; 443 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
411#endif 444#endif
412} 445}
413 446
@@ -417,7 +450,7 @@ LJFOLDF(kfold_bnot64)
417#if LJ_HASFFI 450#if LJ_HASFFI
418 return INT64FOLD(~ir_k64(fleft)->u64); 451 return INT64FOLD(~ir_k64(fleft)->u64);
419#else 452#else
420 UNUSED(J); lua_assert(0); return FAILFOLD; 453 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
421#endif 454#endif
422} 455}
423 456
@@ -427,7 +460,7 @@ LJFOLDF(kfold_bswap64)
427#if LJ_HASFFI 460#if LJ_HASFFI
428 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64)); 461 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
429#else 462#else
430 UNUSED(J); lua_assert(0); return FAILFOLD; 463 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
431#endif 464#endif
432} 465}
433 466
@@ -452,10 +485,10 @@ LJFOLDF(kfold_int64comp)
452 case IR_UGE: return CONDFOLD(a >= b); 485 case IR_UGE: return CONDFOLD(a >= b);
453 case IR_ULE: return CONDFOLD(a <= b); 486 case IR_ULE: return CONDFOLD(a <= b);
454 case IR_UGT: return CONDFOLD(a > b); 487 case IR_UGT: return CONDFOLD(a > b);
455 default: lua_assert(0); return FAILFOLD; 488 default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
456 } 489 }
457#else 490#else
458 UNUSED(J); lua_assert(0); return FAILFOLD; 491 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
459#endif 492#endif
460} 493}
461 494
@@ -467,7 +500,7 @@ LJFOLDF(kfold_int64comp0)
467 return DROPFOLD; 500 return DROPFOLD;
468 return NEXTFOLD; 501 return NEXTFOLD;
469#else 502#else
470 UNUSED(J); lua_assert(0); return FAILFOLD; 503 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
471#endif 504#endif
472} 505}
473 506
@@ -481,6 +514,7 @@ LJFOLDF(kfold_snew_kptr)
481} 514}
482 515
483LJFOLD(SNEW any KINT) 516LJFOLD(SNEW any KINT)
517LJFOLD(XSNEW any KINT)
484LJFOLDF(kfold_snew_empty) 518LJFOLDF(kfold_snew_empty)
485{ 519{
486 if (fright->i == 0) 520 if (fright->i == 0)
@@ -492,7 +526,7 @@ LJFOLD(STRREF KGC KINT)
492LJFOLDF(kfold_strref) 526LJFOLDF(kfold_strref)
493{ 527{
494 GCstr *str = ir_kstr(fleft); 528 GCstr *str = ir_kstr(fleft);
495 lua_assert((MSize)fright->i <= str->len); 529 lj_assertJ((MSize)fright->i <= str->len, "bad string ref");
496 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i); 530 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
497} 531}
498 532
@@ -510,7 +544,7 @@ LJFOLDF(kfold_strref_snew)
510 PHIBARRIER(ir); 544 PHIBARRIER(ir);
511 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ 545 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
512 fins->op1 = str; 546 fins->op1 = str;
513 fins->ot = IRT(IR_STRREF, IRT_P32); 547 fins->ot = IRT(IR_STRREF, IRT_PGC);
514 return RETRYFOLD; 548 return RETRYFOLD;
515 } 549 }
516 } 550 }
@@ -528,6 +562,210 @@ LJFOLDF(kfold_strcmp)
528 return NEXTFOLD; 562 return NEXTFOLD;
529} 563}
530 564
565/* -- Constant folding and forwarding for buffers ------------------------- */
566
567/*
568** Buffer ops perform stores, but their effect is limited to the buffer
569** itself. Also, buffer ops are chained: a use of an op implies a use of
570** all other ops up the chain. Conversely, if an op is unused, all ops
571** up the chain can go unsed. This largely eliminates the need to treat
572** them as stores.
573**
574** Alas, treating them as normal (IRM_N) ops doesn't work, because they
575** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
576** or if FOLD is disabled.
577**
578** The compromise is to declare them as loads, emit them like stores and
579** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
580** fragments left over from CSE are eliminated by DCE.
581**
582** The string buffer methods emit a USE instead of a BUFSTR to keep the
583** chain alive.
584*/
585
586LJFOLD(BUFHDR any any)
587LJFOLDF(bufhdr_merge)
588{
589 return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD;
590}
591
592LJFOLD(BUFPUT any BUFSTR)
593LJFOLDF(bufput_bufstr)
594{
595 if ((J->flags & JIT_F_OPT_FWD)) {
596 IRRef hdr = fright->op2;
597 /* New buffer, no other buffer op inbetween and same buffer? */
598 if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET &&
599 fleft->prev == hdr &&
600 fleft->op1 == IR(hdr)->op1 &&
601 !(irt_isphi(fright->t) && IR(hdr)->prev)) {
602 IRRef ref = fins->op1;
603 IR(ref)->op2 = IRBUFHDR_APPEND; /* Modify BUFHDR. */
604 IR(ref)->op1 = fright->op1;
605 return ref;
606 }
607 /* Replay puts to global temporary buffer. */
608 if (IR(hdr)->op2 == IRBUFHDR_RESET) {
609 IRIns *ir = IR(fright->op1);
610 /* For now only handle single string.reverse .lower .upper .rep. */
611 if (ir->o == IR_CALLL &&
612 ir->op2 >= IRCALL_lj_buf_putstr_reverse &&
613 ir->op2 <= IRCALL_lj_buf_putstr_rep) {
614 IRIns *carg1 = IR(ir->op1);
615 if (ir->op2 == IRCALL_lj_buf_putstr_rep) {
616 IRIns *carg2 = IR(carg1->op1);
617 if (carg2->op1 == hdr) {
618 return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2);
619 }
620 } else if (carg1->op1 == hdr) {
621 return lj_ir_call(J, ir->op2, fins->op1, carg1->op2);
622 }
623 }
624 }
625 }
626 return EMITFOLD; /* Always emit, CSE later. */
627}
628
629LJFOLD(BUFPUT any any)
630LJFOLDF(bufput_kgc)
631{
632 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) {
633 GCstr *s2 = ir_kstr(fright);
634 if (s2->len == 0) { /* Empty string? */
635 return LEFTFOLD;
636 } else {
637 if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) &&
638 !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */
639 GCstr *s1 = ir_kstr(IR(fleft->op2));
640 IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2));
641 /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
642 IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */
643 return fins->op1;
644 }
645 }
646 }
647 return EMITFOLD; /* Always emit, CSE later. */
648}
649
650LJFOLD(BUFSTR any any)
651LJFOLDF(bufstr_kfold_cse)
652{
653 lj_assertJ(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
654 fleft->o == IR_CALLL,
655 "bad buffer constructor IR op %d", fleft->o);
656 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
657 if (fleft->o == IR_BUFHDR) { /* No put operations? */
658 if (fleft->op2 == IRBUFHDR_RESET) /* Empty buffer? */
659 return lj_ir_kstr(J, &J2G(J)->strempty);
660 fins->op1 = fleft->op1;
661 fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */
662 return CSEFOLD;
663 } else if (fleft->o == IR_BUFPUT) {
664 IRIns *irb = IR(fleft->op1);
665 if (irb->o == IR_BUFHDR && irb->op2 == IRBUFHDR_RESET)
666 return fleft->op2; /* Shortcut for a single put operation. */
667 }
668 }
669 /* Try to CSE the whole chain. */
670 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
671 IRRef ref = J->chain[IR_BUFSTR];
672 while (ref) {
673 IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
674 while (ira->o == irb->o && ira->op2 == irb->op2) {
675 lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
676 ira->o == IR_CALLL || ira->o == IR_CARG,
677 "bad buffer constructor IR op %d", ira->o);
678 if (ira->o == IR_BUFHDR && ira->op2 == IRBUFHDR_RESET)
679 return ref; /* CSE succeeded. */
680 if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
681 break;
682 ira = IR(ira->op1);
683 irb = IR(irb->op1);
684 }
685 ref = irs->prev;
686 }
687 }
688 return EMITFOLD; /* No CSE possible. */
689}
690
691LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
692LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
693LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
694LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted)
695LJFOLDF(bufput_kfold_op)
696{
697 if (irref_isk(fleft->op2)) {
698 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
699 SBuf *sb = lj_buf_tmp_(J->L);
700 sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb,
701 ir_kstr(IR(fleft->op2)));
702 fins->o = IR_BUFPUT;
703 fins->op1 = fleft->op1;
704 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
705 return RETRYFOLD;
706 }
707 return EMITFOLD; /* Always emit, CSE later. */
708}
709
710LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep)
711LJFOLDF(bufput_kfold_rep)
712{
713 if (irref_isk(fleft->op2)) {
714 IRIns *irc = IR(fleft->op1);
715 if (irref_isk(irc->op2)) {
716 SBuf *sb = lj_buf_tmp_(J->L);
717 sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i);
718 fins->o = IR_BUFPUT;
719 fins->op1 = irc->op1;
720 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
721 return RETRYFOLD;
722 }
723 }
724 return EMITFOLD; /* Always emit, CSE later. */
725}
726
727LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint)
728LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)
729LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint)
730LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum)
731LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr)
732LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
733LJFOLDF(bufput_kfold_fmt)
734{
735 IRIns *irc = IR(fleft->op1);
736 lj_assertJ(irref_isk(irc->op2), "SFormat must be const");
737 if (irref_isk(fleft->op2)) {
738 SFormat sf = (SFormat)IR(irc->op2)->i;
739 IRIns *ira = IR(fleft->op2);
740 SBuf *sb = lj_buf_tmp_(J->L);
741 switch (fins->op2) {
742 case IRCALL_lj_strfmt_putfxint:
743 sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64);
744 break;
745 case IRCALL_lj_strfmt_putfstr:
746 sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira));
747 break;
748 case IRCALL_lj_strfmt_putfchar:
749 sb = lj_strfmt_putfchar(sb, sf, ira->i);
750 break;
751 case IRCALL_lj_strfmt_putfnum_int:
752 case IRCALL_lj_strfmt_putfnum_uint:
753 case IRCALL_lj_strfmt_putfnum:
754 default: {
755 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
756 sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf,
757 ir_knum(ira)->n);
758 break;
759 }
760 }
761 fins->o = IR_BUFPUT;
762 fins->op1 = irc->op1;
763 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
764 return RETRYFOLD;
765 }
766 return EMITFOLD; /* Always emit, CSE later. */
767}
768
531/* -- Constant folding of pointer arithmetic ------------------------------ */ 769/* -- Constant folding of pointer arithmetic ------------------------------ */
532 770
533LJFOLD(ADD KGC KINT) 771LJFOLD(ADD KGC KINT)
@@ -648,27 +886,22 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
648LJFOLDF(kfold_conv_knum_int_num) 886LJFOLDF(kfold_conv_knum_int_num)
649{ 887{
650 lua_Number n = knumleft; 888 lua_Number n = knumleft;
651 if (!(fins->op2 & IRCONV_TRUNC)) { 889 int32_t k = lj_num2int(n);
652 int32_t k = lj_num2int(n); 890 if (irt_isguard(fins->t) && n != (lua_Number)k) {
653 if (irt_isguard(fins->t) && n != (lua_Number)k) { 891 /* We're about to create a guard which always fails, like CONV +1.5.
654 /* We're about to create a guard which always fails, like CONV +1.5. 892 ** Some pathological loops cause this during LICM, e.g.:
655 ** Some pathological loops cause this during LICM, e.g.: 893 ** local x,k,t = 0,1.5,{1,[1.5]=2}
656 ** local x,k,t = 0,1.5,{1,[1.5]=2} 894 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
657 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end 895 ** assert(x == 300)
658 ** assert(x == 300) 896 */
659 */ 897 return FAILFOLD;
660 return FAILFOLD;
661 }
662 return INTFOLD(k);
663 } else {
664 return INTFOLD((int32_t)n);
665 } 898 }
899 return INTFOLD(k);
666} 900}
667 901
668LJFOLD(CONV KNUM IRCONV_U32_NUM) 902LJFOLD(CONV KNUM IRCONV_U32_NUM)
669LJFOLDF(kfold_conv_knum_u32_num) 903LJFOLDF(kfold_conv_knum_u32_num)
670{ 904{
671 lua_assert((fins->op2 & IRCONV_TRUNC));
672#ifdef _MSC_VER 905#ifdef _MSC_VER
673 { /* Workaround for MSVC bug. */ 906 { /* Workaround for MSVC bug. */
674 volatile uint32_t u = (uint32_t)knumleft; 907 volatile uint32_t u = (uint32_t)knumleft;
@@ -682,27 +915,27 @@ LJFOLDF(kfold_conv_knum_u32_num)
682LJFOLD(CONV KNUM IRCONV_I64_NUM) 915LJFOLD(CONV KNUM IRCONV_I64_NUM)
683LJFOLDF(kfold_conv_knum_i64_num) 916LJFOLDF(kfold_conv_knum_i64_num)
684{ 917{
685 lua_assert((fins->op2 & IRCONV_TRUNC));
686 return INT64FOLD((uint64_t)(int64_t)knumleft); 918 return INT64FOLD((uint64_t)(int64_t)knumleft);
687} 919}
688 920
689LJFOLD(CONV KNUM IRCONV_U64_NUM) 921LJFOLD(CONV KNUM IRCONV_U64_NUM)
690LJFOLDF(kfold_conv_knum_u64_num) 922LJFOLDF(kfold_conv_knum_u64_num)
691{ 923{
692 lua_assert((fins->op2 & IRCONV_TRUNC));
693 return INT64FOLD(lj_num2u64(knumleft)); 924 return INT64FOLD(lj_num2u64(knumleft));
694} 925}
695 926
696LJFOLD(TOSTR KNUM) 927LJFOLD(TOSTR KNUM any)
697LJFOLDF(kfold_tostr_knum) 928LJFOLDF(kfold_tostr_knum)
698{ 929{
699 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); 930 return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft)));
700} 931}
701 932
702LJFOLD(TOSTR KINT) 933LJFOLD(TOSTR KINT any)
703LJFOLDF(kfold_tostr_kint) 934LJFOLDF(kfold_tostr_kint)
704{ 935{
705 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); 936 return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ?
937 lj_strfmt_int(J->L, fleft->i) :
938 lj_strfmt_char(J->L, fleft->i));
706} 939}
707 940
708LJFOLD(STRTO KGC) 941LJFOLD(STRTO KGC)
@@ -750,13 +983,13 @@ LJFOLDF(shortcut_round)
750 return NEXTFOLD; 983 return NEXTFOLD;
751} 984}
752 985
753LJFOLD(ABS ABS KNUM) 986LJFOLD(ABS ABS FLOAD)
754LJFOLDF(shortcut_left) 987LJFOLDF(shortcut_left)
755{ 988{
756 return LEFTFOLD; /* f(g(x)) ==> g(x) */ 989 return LEFTFOLD; /* f(g(x)) ==> g(x) */
757} 990}
758 991
759LJFOLD(ABS NEG KNUM) 992LJFOLD(ABS NEG FLOAD)
760LJFOLDF(shortcut_dropleft) 993LJFOLDF(shortcut_dropleft)
761{ 994{
762 PHIBARRIER(fleft); 995 PHIBARRIER(fleft);
@@ -836,8 +1069,10 @@ LJFOLDF(simplify_nummuldiv_k)
836 if (n == 1.0) { /* x o 1 ==> x */ 1069 if (n == 1.0) { /* x o 1 ==> x */
837 return LEFTFOLD; 1070 return LEFTFOLD;
838 } else if (n == -1.0) { /* x o -1 ==> -x */ 1071 } else if (n == -1.0) { /* x o -1 ==> -x */
1072 IRRef op1 = fins->op1;
1073 fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */
1074 fins->op1 = op1;
839 fins->o = IR_NEG; 1075 fins->o = IR_NEG;
840 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
841 return RETRYFOLD; 1076 return RETRYFOLD;
842 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ 1077 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
843 fins->o = IR_ADD; 1078 fins->o = IR_ADD;
@@ -878,7 +1113,7 @@ LJFOLDF(simplify_nummuldiv_negneg)
878} 1113}
879 1114
880LJFOLD(POW any KINT) 1115LJFOLD(POW any KINT)
881LJFOLDF(simplify_numpow_xk) 1116LJFOLDF(simplify_numpow_xkint)
882{ 1117{
883 int32_t k = fright->i; 1118 int32_t k = fright->i;
884 TRef ref = fins->op1; 1119 TRef ref = fins->op1;
@@ -907,13 +1142,22 @@ LJFOLDF(simplify_numpow_xk)
907 return ref; 1142 return ref;
908} 1143}
909 1144
1145LJFOLD(POW any KNUM)
1146LJFOLDF(simplify_numpow_xknum)
1147{
1148 if (knumright == 0.5) /* x ^ 0.5 ==> sqrt(x) */
1149 return emitir(IRTN(IR_FPMATH), fins->op1, IRFPM_SQRT);
1150 return NEXTFOLD;
1151}
1152
910LJFOLD(POW KNUM any) 1153LJFOLD(POW KNUM any)
911LJFOLDF(simplify_numpow_kx) 1154LJFOLDF(simplify_numpow_kx)
912{ 1155{
913 lua_Number n = knumleft; 1156 lua_Number n = knumleft;
914 if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */ 1157 if (n == 2.0 && irt_isint(fright->t)) { /* 2.0 ^ i ==> ldexp(1.0, i) */
915 fins->o = IR_CONV;
916#if LJ_TARGET_X86ORX64 1158#if LJ_TARGET_X86ORX64
1159 /* Different IR_LDEXP calling convention on x86/x64 requires conversion. */
1160 fins->o = IR_CONV;
917 fins->op1 = fins->op2; 1161 fins->op1 = fins->op2;
918 fins->op2 = IRCONV_NUM_INT; 1162 fins->op2 = IRCONV_NUM_INT;
919 fins->op2 = (IRRef1)lj_opt_fold(J); 1163 fins->op2 = (IRRef1)lj_opt_fold(J);
@@ -1007,10 +1251,10 @@ LJFOLDF(simplify_tobit_conv)
1007{ 1251{
1008 /* Fold even across PHI to avoid expensive num->int conversions in loop. */ 1252 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1009 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) { 1253 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
1010 lua_assert(irt_isnum(fleft->t)); 1254 lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
1011 return fleft->op1; 1255 return fleft->op1;
1012 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { 1256 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1013 lua_assert(irt_isnum(fleft->t)); 1257 lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
1014 fins->o = IR_CONV; 1258 fins->o = IR_CONV;
1015 fins->op1 = fleft->op1; 1259 fins->op1 = fleft->op1;
1016 fins->op2 = (IRT_INT<<5)|IRT_U32; 1260 fins->op2 = (IRT_INT<<5)|IRT_U32;
@@ -1050,7 +1294,7 @@ LJFOLDF(simplify_conv_sext)
1050 /* Use scalar evolution analysis results to strength-reduce sign-extension. */ 1294 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
1051 if (ref == J->scev.idx) { 1295 if (ref == J->scev.idx) {
1052 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; 1296 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
1053 lua_assert(irt_isint(J->scev.t)); 1297 lj_assertJ(irt_isint(J->scev.t), "only int SCEV supported");
1054 if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { 1298 if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) {
1055 ok_reduce: 1299 ok_reduce:
1056#if LJ_TARGET_X64 1300#if LJ_TARGET_X64
@@ -1081,6 +1325,10 @@ LJFOLD(CONV SUB IRCONV_U32_U64)
1081LJFOLD(CONV MUL IRCONV_U32_U64) 1325LJFOLD(CONV MUL IRCONV_U32_U64)
1082LJFOLDF(simplify_conv_narrow) 1326LJFOLDF(simplify_conv_narrow)
1083{ 1327{
1328#if LJ_64
1329 UNUSED(J);
1330 return NEXTFOLD;
1331#else
1084 IROp op = (IROp)fleft->o; 1332 IROp op = (IROp)fleft->o;
1085 IRType t = irt_type(fins->t); 1333 IRType t = irt_type(fins->t);
1086 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2; 1334 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
@@ -1091,6 +1339,7 @@ LJFOLDF(simplify_conv_narrow)
1091 fins->op1 = op1; 1339 fins->op1 = op1;
1092 fins->op2 = op2; 1340 fins->op2 = op2;
1093 return RETRYFOLD; 1341 return RETRYFOLD;
1342#endif
1094} 1343}
1095 1344
1096/* Special CSE rule for CONV. */ 1345/* Special CSE rule for CONV. */
@@ -1126,7 +1375,8 @@ LJFOLDF(narrow_convert)
1126 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */ 1375 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1127 if (J->chain[IR_LOOP]) 1376 if (J->chain[IR_LOOP])
1128 return NEXTFOLD; 1377 return NEXTFOLD;
1129 lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT); 1378 lj_assertJ(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT,
1379 "unexpected CONV TOBIT");
1130 return lj_opt_narrow_convert(J); 1380 return lj_opt_narrow_convert(J);
1131} 1381}
1132 1382
@@ -1204,7 +1454,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1204 ** But this is mainly intended for simple address arithmetic. 1454 ** But this is mainly intended for simple address arithmetic.
1205 ** Also it's easier for the backend to optimize the original multiplies. 1455 ** Also it's easier for the backend to optimize the original multiplies.
1206 */ 1456 */
1207 if (k == 1) { /* i * 1 ==> i */ 1457 if (k == 0) { /* i * 0 ==> 0 */
1458 return RIGHTFOLD;
1459 } else if (k == 1) { /* i * 1 ==> i */
1208 return LEFTFOLD; 1460 return LEFTFOLD;
1209 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ 1461 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1210 fins->o = IR_BSHL; 1462 fins->o = IR_BSHL;
@@ -1217,9 +1469,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1217LJFOLD(MUL any KINT) 1469LJFOLD(MUL any KINT)
1218LJFOLDF(simplify_intmul_k32) 1470LJFOLDF(simplify_intmul_k32)
1219{ 1471{
1220 if (fright->i == 0) /* i * 0 ==> 0 */ 1472 if (fright->i >= 0)
1221 return INTFOLD(0);
1222 else if (fright->i > 0)
1223 return simplify_intmul_k(J, fright->i); 1473 return simplify_intmul_k(J, fright->i);
1224 return NEXTFOLD; 1474 return NEXTFOLD;
1225} 1475}
@@ -1227,21 +1477,20 @@ LJFOLDF(simplify_intmul_k32)
1227LJFOLD(MUL any KINT64) 1477LJFOLD(MUL any KINT64)
1228LJFOLDF(simplify_intmul_k64) 1478LJFOLDF(simplify_intmul_k64)
1229{ 1479{
1230 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ 1480#if LJ_HASFFI
1231 return INT64FOLD(0); 1481 if (ir_kint64(fright)->u64 < 0x80000000u)
1232#if LJ_64
1233 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1234 else if (ir_kint64(fright)->u64 < 0x80000000u)
1235 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); 1482 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1236#endif
1237 return NEXTFOLD; 1483 return NEXTFOLD;
1484#else
1485 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1486#endif
1238} 1487}
1239 1488
1240LJFOLD(MOD any KINT) 1489LJFOLD(MOD any KINT)
1241LJFOLDF(simplify_intmod_k) 1490LJFOLDF(simplify_intmod_k)
1242{ 1491{
1243 int32_t k = fright->i; 1492 int32_t k = fright->i;
1244 lua_assert(k != 0); 1493 lj_assertJ(k != 0, "integer mod 0");
1245 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */ 1494 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1246 fins->o = IR_BAND; 1495 fins->o = IR_BAND;
1247 fins->op2 = lj_ir_kint(J, k-1); 1496 fins->op2 = lj_ir_kint(J, k-1);
@@ -1490,6 +1739,15 @@ LJFOLDF(simplify_shiftk_andk)
1490 fins->op2 = (IRRef1)lj_ir_kint(J, k); 1739 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1491 fins->ot = IRTI(IR_BAND); 1740 fins->ot = IRTI(IR_BAND);
1492 return RETRYFOLD; 1741 return RETRYFOLD;
1742 } else if (irk->o == IR_KINT64) {
1743 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, fright->i,
1744 (IROp)fins->o);
1745 IROpT ot = fleft->ot;
1746 fins->op1 = fleft->op1;
1747 fins->op1 = (IRRef1)lj_opt_fold(J);
1748 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1749 fins->ot = ot;
1750 return RETRYFOLD;
1493 } 1751 }
1494 return NEXTFOLD; 1752 return NEXTFOLD;
1495} 1753}
@@ -1505,6 +1763,47 @@ LJFOLDF(simplify_andk_shiftk)
1505 return NEXTFOLD; 1763 return NEXTFOLD;
1506} 1764}
1507 1765
1766LJFOLD(BAND BOR KINT)
1767LJFOLD(BOR BAND KINT)
1768LJFOLDF(simplify_andor_k)
1769{
1770 IRIns *irk = IR(fleft->op2);
1771 PHIBARRIER(fleft);
1772 if (irk->o == IR_KINT) {
1773 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1774 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1775 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1776 if (k == (fins->o == IR_BAND ? 0 : -1)) {
1777 fins->op1 = fleft->op1;
1778 return RETRYFOLD;
1779 }
1780 }
1781 return NEXTFOLD;
1782}
1783
1784LJFOLD(BAND BOR KINT64)
1785LJFOLD(BOR BAND KINT64)
1786LJFOLDF(simplify_andor_k64)
1787{
1788#if LJ_HASFFI
1789 IRIns *irk = IR(fleft->op2);
1790 PHIBARRIER(fleft);
1791 if (irk->o == IR_KINT64) {
1792 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
1793 (IROp)fins->o);
1794 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1795 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1796 if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) {
1797 fins->op1 = fleft->op1;
1798 return RETRYFOLD;
1799 }
1800 }
1801 return NEXTFOLD;
1802#else
1803 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1804#endif
1805}
1806
1508/* -- Reassociation ------------------------------------------------------- */ 1807/* -- Reassociation ------------------------------------------------------- */
1509 1808
1510LJFOLD(ADD ADD KINT) 1809LJFOLD(ADD ADD KINT)
@@ -1534,11 +1833,11 @@ LJFOLD(BOR BOR KINT64)
1534LJFOLD(BXOR BXOR KINT64) 1833LJFOLD(BXOR BXOR KINT64)
1535LJFOLDF(reassoc_intarith_k64) 1834LJFOLDF(reassoc_intarith_k64)
1536{ 1835{
1537#if LJ_HASFFI || LJ_64 1836#if LJ_HASFFI
1538 IRIns *irk = IR(fleft->op2); 1837 IRIns *irk = IR(fleft->op2);
1539 if (irk->o == IR_KINT64) { 1838 if (irk->o == IR_KINT64) {
1540 uint64_t k = kfold_int64arith(ir_k64(irk)->u64, 1839 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
1541 ir_k64(fright)->u64, (IROp)fins->o); 1840 (IROp)fins->o);
1542 PHIBARRIER(fleft); 1841 PHIBARRIER(fleft);
1543 fins->op1 = fleft->op1; 1842 fins->op1 = fleft->op1;
1544 fins->op2 = (IRRef1)lj_ir_kint64(J, k); 1843 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
@@ -1546,12 +1845,10 @@ LJFOLDF(reassoc_intarith_k64)
1546 } 1845 }
1547 return NEXTFOLD; 1846 return NEXTFOLD;
1548#else 1847#else
1549 UNUSED(J); lua_assert(0); return FAILFOLD; 1848 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1550#endif 1849#endif
1551} 1850}
1552 1851
1553LJFOLD(MIN MIN any)
1554LJFOLD(MAX MAX any)
1555LJFOLD(BAND BAND any) 1852LJFOLD(BAND BAND any)
1556LJFOLD(BOR BOR any) 1853LJFOLD(BOR BOR any)
1557LJFOLDF(reassoc_dup) 1854LJFOLDF(reassoc_dup)
@@ -1561,6 +1858,15 @@ LJFOLDF(reassoc_dup)
1561 return NEXTFOLD; 1858 return NEXTFOLD;
1562} 1859}
1563 1860
1861LJFOLD(MIN MIN any)
1862LJFOLD(MAX MAX any)
1863LJFOLDF(reassoc_dup_minmax)
1864{
1865 if (fins->op2 == fleft->op2)
1866 return LEFTFOLD; /* (a o b) o b ==> a o b */
1867 return NEXTFOLD;
1868}
1869
1564LJFOLD(BXOR BXOR any) 1870LJFOLD(BXOR BXOR any)
1565LJFOLDF(reassoc_bxor) 1871LJFOLDF(reassoc_bxor)
1566{ 1872{
@@ -1599,23 +1905,12 @@ LJFOLDF(reassoc_shift)
1599 return NEXTFOLD; 1905 return NEXTFOLD;
1600} 1906}
1601 1907
1602LJFOLD(MIN MIN KNUM)
1603LJFOLD(MAX MAX KNUM)
1604LJFOLD(MIN MIN KINT) 1908LJFOLD(MIN MIN KINT)
1605LJFOLD(MAX MAX KINT) 1909LJFOLD(MAX MAX KINT)
1606LJFOLDF(reassoc_minmax_k) 1910LJFOLDF(reassoc_minmax_k)
1607{ 1911{
1608 IRIns *irk = IR(fleft->op2); 1912 IRIns *irk = IR(fleft->op2);
1609 if (irk->o == IR_KNUM) { 1913 if (irk->o == IR_KINT) {
1610 lua_Number a = ir_knum(irk)->n;
1611 lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD);
1612 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1613 return LEFTFOLD;
1614 PHIBARRIER(fleft);
1615 fins->op1 = fleft->op1;
1616 fins->op2 = (IRRef1)lj_ir_knum(J, y);
1617 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1618 } else if (irk->o == IR_KINT) {
1619 int32_t a = irk->i; 1914 int32_t a = irk->i;
1620 int32_t y = kfold_intop(a, fright->i, fins->o); 1915 int32_t y = kfold_intop(a, fright->i, fins->o);
1621 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ 1916 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
@@ -1628,24 +1923,6 @@ LJFOLDF(reassoc_minmax_k)
1628 return NEXTFOLD; 1923 return NEXTFOLD;
1629} 1924}
1630 1925
1631LJFOLD(MIN MAX any)
1632LJFOLD(MAX MIN any)
1633LJFOLDF(reassoc_minmax_left)
1634{
1635 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1636 return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
1637 return NEXTFOLD;
1638}
1639
1640LJFOLD(MIN any MAX)
1641LJFOLD(MAX any MIN)
1642LJFOLDF(reassoc_minmax_right)
1643{
1644 if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
1645 return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
1646 return NEXTFOLD;
1647}
1648
1649/* -- Array bounds check elimination -------------------------------------- */ 1926/* -- Array bounds check elimination -------------------------------------- */
1650 1927
1651/* Eliminate ABC across PHIs to handle t[i-1] forwarding case. 1928/* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
@@ -1772,8 +2049,6 @@ LJFOLDF(comm_comp)
1772 2049
1773LJFOLD(BAND any any) 2050LJFOLD(BAND any any)
1774LJFOLD(BOR any any) 2051LJFOLD(BOR any any)
1775LJFOLD(MIN any any)
1776LJFOLD(MAX any any)
1777LJFOLDF(comm_dup) 2052LJFOLDF(comm_dup)
1778{ 2053{
1779 if (fins->op1 == fins->op2) /* x o x ==> x */ 2054 if (fins->op1 == fins->op2) /* x o x ==> x */
@@ -1781,6 +2056,15 @@ LJFOLDF(comm_dup)
1781 return fold_comm_swap(J); 2056 return fold_comm_swap(J);
1782} 2057}
1783 2058
2059LJFOLD(MIN any any)
2060LJFOLD(MAX any any)
2061LJFOLDF(comm_dup_minmax)
2062{
2063 if (fins->op1 == fins->op2) /* x o x ==> x */
2064 return LEFTFOLD;
2065 return NEXTFOLD;
2066}
2067
1784LJFOLD(BXOR any any) 2068LJFOLD(BXOR any any)
1785LJFOLDF(comm_bxor) 2069LJFOLDF(comm_bxor)
1786{ 2070{
@@ -1817,7 +2101,7 @@ LJFOLDF(merge_eqne_snew_kgc)
1817{ 2101{
1818 GCstr *kstr = ir_kstr(fright); 2102 GCstr *kstr = ir_kstr(fright);
1819 int32_t len = (int32_t)kstr->len; 2103 int32_t len = (int32_t)kstr->len;
1820 lua_assert(irt_isstr(fins->t)); 2104 lj_assertJ(irt_isstr(fins->t), "bad equality IR type");
1821 2105
1822#if LJ_TARGET_UNALIGNED 2106#if LJ_TARGET_UNALIGNED
1823#define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */ 2107#define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
@@ -1881,7 +2165,7 @@ LJFOLD(HLOAD KKPTR)
1881LJFOLDF(kfold_hload_kkptr) 2165LJFOLDF(kfold_hload_kkptr)
1882{ 2166{
1883 UNUSED(J); 2167 UNUSED(J);
1884 lua_assert(ir_kptr(fleft) == niltvg(J2G(J))); 2168 lj_assertJ(ir_kptr(fleft) == niltvg(J2G(J)), "expected niltv");
1885 return TREF_NIL; 2169 return TREF_NIL;
1886} 2170}
1887 2171
@@ -1891,8 +2175,8 @@ LJFOLDX(lj_opt_fwd_hload)
1891LJFOLD(ULOAD any) 2175LJFOLD(ULOAD any)
1892LJFOLDX(lj_opt_fwd_uload) 2176LJFOLDX(lj_opt_fwd_uload)
1893 2177
1894LJFOLD(CALLL any IRCALL_lj_tab_len) 2178LJFOLD(ALEN any any)
1895LJFOLDX(lj_opt_fwd_tab_len) 2179LJFOLDX(lj_opt_fwd_alen)
1896 2180
1897/* Upvalue refs are really loads, but there are no corresponding stores. 2181/* Upvalue refs are really loads, but there are no corresponding stores.
1898** So CSE is ok for them, except for UREFO across a GC step (see below). 2182** So CSE is ok for them, except for UREFO across a GC step (see below).
@@ -1953,6 +2237,7 @@ LJFOLDF(fwd_href_tdup)
1953** an aliased table, as it may invalidate all of the pointers and fields. 2237** an aliased table, as it may invalidate all of the pointers and fields.
1954** Only HREF needs the NEWREF check -- AREF and HREFK already depend on 2238** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1955** FLOADs. And NEWREF itself is treated like a store (see below). 2239** FLOADs. And NEWREF itself is treated like a store (see below).
2240** LREF is constant (per trace) since coroutine switches are not inlined.
1956*/ 2241*/
1957LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) 2242LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1958LJFOLDF(fload_tab_tnew_asize) 2243LJFOLDF(fload_tab_tnew_asize)
@@ -2016,6 +2301,35 @@ LJFOLDF(fload_str_len_snew)
2016 return NEXTFOLD; 2301 return NEXTFOLD;
2017} 2302}
2018 2303
2304LJFOLD(FLOAD TOSTR IRFL_STR_LEN)
2305LJFOLDF(fload_str_len_tostr)
2306{
2307 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR)
2308 return INTFOLD(1);
2309 return NEXTFOLD;
2310}
2311
2312LJFOLD(FLOAD any IRFL_SBUF_W)
2313LJFOLD(FLOAD any IRFL_SBUF_E)
2314LJFOLD(FLOAD any IRFL_SBUF_B)
2315LJFOLD(FLOAD any IRFL_SBUF_L)
2316LJFOLD(FLOAD any IRFL_SBUF_REF)
2317LJFOLD(FLOAD any IRFL_SBUF_R)
2318LJFOLDF(fload_sbuf)
2319{
2320 TRef tr = lj_opt_fwd_fload(J);
2321 return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD;
2322}
2323
2324/* The fast function ID of function objects is immutable. */
2325LJFOLD(FLOAD KGC IRFL_FUNC_FFID)
2326LJFOLDF(fload_func_ffid_kgc)
2327{
2328 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2329 return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid);
2330 return NEXTFOLD;
2331}
2332
2019/* The C type ID of cdata objects is immutable. */ 2333/* The C type ID of cdata objects is immutable. */
2020LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) 2334LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
2021LJFOLDF(fload_cdata_typeid_kgc) 2335LJFOLDF(fload_cdata_typeid_kgc)
@@ -2062,6 +2376,8 @@ LJFOLDF(fload_cdata_ptr_int64_cnew)
2062} 2376}
2063 2377
2064LJFOLD(FLOAD any IRFL_STR_LEN) 2378LJFOLD(FLOAD any IRFL_STR_LEN)
2379LJFOLD(FLOAD any IRFL_FUNC_ENV)
2380LJFOLD(FLOAD any IRFL_THREAD_ENV)
2065LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) 2381LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2066LJFOLD(FLOAD any IRFL_CDATA_PTR) 2382LJFOLD(FLOAD any IRFL_CDATA_PTR)
2067LJFOLD(FLOAD any IRFL_CDATA_INT) 2383LJFOLD(FLOAD any IRFL_CDATA_INT)
@@ -2081,7 +2397,7 @@ LJFOLDF(fwd_sload)
2081 TRef tr = lj_opt_cse(J); 2397 TRef tr = lj_opt_cse(J);
2082 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr; 2398 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
2083 } else { 2399 } else {
2084 lua_assert(J->slot[fins->op1] != 0); 2400 lj_assertJ(J->slot[fins->op1] != 0, "uninitialized slot accessed");
2085 return J->slot[fins->op1]; 2401 return J->slot[fins->op1];
2086 } 2402 }
2087} 2403}
@@ -2138,6 +2454,17 @@ LJFOLDF(barrier_tnew_tdup)
2138 return DROPFOLD; 2454 return DROPFOLD;
2139} 2455}
2140 2456
2457/* -- Profiling ----------------------------------------------------------- */
2458
2459LJFOLD(PROF any any)
2460LJFOLDF(prof)
2461{
2462 IRRef ref = J->chain[IR_PROF];
2463 if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */
2464 return ref;
2465 return EMITFOLD;
2466}
2467
2141/* -- Stores and allocations ---------------------------------------------- */ 2468/* -- Stores and allocations ---------------------------------------------- */
2142 2469
2143/* Stores and allocations cannot be folded or passed on to CSE in general. 2470/* Stores and allocations cannot be folded or passed on to CSE in general.
@@ -2160,8 +2487,10 @@ LJFOLD(XSTORE any any)
2160LJFOLDX(lj_opt_dse_xstore) 2487LJFOLDX(lj_opt_dse_xstore)
2161 2488
2162LJFOLD(NEWREF any any) /* Treated like a store. */ 2489LJFOLD(NEWREF any any) /* Treated like a store. */
2163LJFOLD(CALLS any any) 2490LJFOLD(TMPREF any any)
2491LJFOLD(CALLA any any)
2164LJFOLD(CALLL any any) /* Safeguard fallback. */ 2492LJFOLD(CALLL any any) /* Safeguard fallback. */
2493LJFOLD(CALLS any any)
2165LJFOLD(CALLXS any any) 2494LJFOLD(CALLXS any any)
2166LJFOLD(XBAR) 2495LJFOLD(XBAR)
2167LJFOLD(RETF any any) /* Modifies BASE. */ 2496LJFOLD(RETF any any) /* Modifies BASE. */
@@ -2194,8 +2523,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
2194 IRRef ref; 2523 IRRef ref;
2195 2524
2196 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) { 2525 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
2197 lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | 2526 lj_assertJ(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
2198 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT); 2527 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT,
2528 "bad JIT_F_OPT_DEFAULT");
2199 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */ 2529 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2200 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N) 2530 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
2201 return lj_opt_cse(J); 2531 return lj_opt_cse(J);
@@ -2220,10 +2550,14 @@ retry:
2220 if (fins->op1 >= J->cur.nk) { 2550 if (fins->op1 >= J->cur.nk) {
2221 key += (uint32_t)IR(fins->op1)->o << 10; 2551 key += (uint32_t)IR(fins->op1)->o << 10;
2222 *fleft = *IR(fins->op1); 2552 *fleft = *IR(fins->op1);
2553 if (fins->op1 < REF_TRUE)
2554 fleft[1] = IR(fins->op1)[1];
2223 } 2555 }
2224 if (fins->op2 >= J->cur.nk) { 2556 if (fins->op2 >= J->cur.nk) {
2225 key += (uint32_t)IR(fins->op2)->o; 2557 key += (uint32_t)IR(fins->op2)->o;
2226 *fright = *IR(fins->op2); 2558 *fright = *IR(fins->op2);
2559 if (fins->op2 < REF_TRUE)
2560 fright[1] = IR(fins->op2)[1];
2227 } else { 2561 } else {
2228 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ 2562 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2229 } 2563 }
@@ -2253,7 +2587,7 @@ retry:
2253 return lj_ir_kint(J, fins->i); 2587 return lj_ir_kint(J, fins->i);
2254 if (ref == FAILFOLD) 2588 if (ref == FAILFOLD)
2255 lj_trace_err(J, LJ_TRERR_GFAIL); 2589 lj_trace_err(J, LJ_TRERR_GFAIL);
2256 lua_assert(ref == DROPFOLD); 2590 lj_assertJ(ref == DROPFOLD, "bad fold result");
2257 return REF_DROP; 2591 return REF_DROP;
2258} 2592}
2259 2593