diff options
Diffstat (limited to 'src/lj_opt_fold.c')
-rw-r--r-- | src/lj_opt_fold.c | 626 |
1 files changed, 458 insertions, 168 deletions
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index ab158143..48effb8a 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
@@ -14,18 +14,21 @@ | |||
14 | 14 | ||
15 | #if LJ_HASJIT | 15 | #if LJ_HASJIT |
16 | 16 | ||
17 | #include "lj_buf.h" | ||
17 | #include "lj_str.h" | 18 | #include "lj_str.h" |
18 | #include "lj_tab.h" | 19 | #include "lj_tab.h" |
19 | #include "lj_ir.h" | 20 | #include "lj_ir.h" |
20 | #include "lj_jit.h" | 21 | #include "lj_jit.h" |
22 | #include "lj_ircall.h" | ||
21 | #include "lj_iropt.h" | 23 | #include "lj_iropt.h" |
22 | #include "lj_trace.h" | 24 | #include "lj_trace.h" |
23 | #if LJ_HASFFI | 25 | #if LJ_HASFFI |
24 | #include "lj_ctype.h" | 26 | #include "lj_ctype.h" |
25 | #endif | ||
26 | #include "lj_carith.h" | 27 | #include "lj_carith.h" |
28 | #endif | ||
27 | #include "lj_vm.h" | 29 | #include "lj_vm.h" |
28 | #include "lj_strscan.h" | 30 | #include "lj_strscan.h" |
31 | #include "lj_strfmt.h" | ||
29 | 32 | ||
30 | /* Here's a short description how the FOLD engine processes instructions: | 33 | /* Here's a short description how the FOLD engine processes instructions: |
31 | ** | 34 | ** |
@@ -133,8 +136,8 @@ | |||
133 | /* Some local macros to save typing. Undef'd at the end. */ | 136 | /* Some local macros to save typing. Undef'd at the end. */ |
134 | #define IR(ref) (&J->cur.ir[(ref)]) | 137 | #define IR(ref) (&J->cur.ir[(ref)]) |
135 | #define fins (&J->fold.ins) | 138 | #define fins (&J->fold.ins) |
136 | #define fleft (&J->fold.left) | 139 | #define fleft (J->fold.left) |
137 | #define fright (&J->fold.right) | 140 | #define fright (J->fold.right) |
138 | #define knumleft (ir_knum(fleft)->n) | 141 | #define knumleft (ir_knum(fleft)->n) |
139 | #define knumright (ir_knum(fright)->n) | 142 | #define knumright (ir_knum(fright)->n) |
140 | 143 | ||
@@ -155,13 +158,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J); | |||
155 | 158 | ||
156 | /* Barrier to prevent folding across a GC step. | 159 | /* Barrier to prevent folding across a GC step. |
157 | ** GC steps can only happen at the head of a trace and at LOOP. | 160 | ** GC steps can only happen at the head of a trace and at LOOP. |
158 | ** And the GC is only driven forward if there is at least one allocation. | 161 | ** And the GC is only driven forward if there's at least one allocation. |
159 | */ | 162 | */ |
160 | #define gcstep_barrier(J, ref) \ | 163 | #define gcstep_barrier(J, ref) \ |
161 | ((ref) < J->chain[IR_LOOP] && \ | 164 | ((ref) < J->chain[IR_LOOP] && \ |
162 | (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ | 165 | (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ |
163 | J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ | 166 | J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ |
164 | J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) | 167 | J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \ |
168 | J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA])) | ||
165 | 169 | ||
166 | /* -- Constant folding for FP numbers ------------------------------------- */ | 170 | /* -- Constant folding for FP numbers ------------------------------------- */ |
167 | 171 | ||
@@ -169,9 +173,6 @@ LJFOLD(ADD KNUM KNUM) | |||
169 | LJFOLD(SUB KNUM KNUM) | 173 | LJFOLD(SUB KNUM KNUM) |
170 | LJFOLD(MUL KNUM KNUM) | 174 | LJFOLD(MUL KNUM KNUM) |
171 | LJFOLD(DIV KNUM KNUM) | 175 | LJFOLD(DIV KNUM KNUM) |
172 | LJFOLD(NEG KNUM KNUM) | ||
173 | LJFOLD(ABS KNUM KNUM) | ||
174 | LJFOLD(ATAN2 KNUM KNUM) | ||
175 | LJFOLD(LDEXP KNUM KNUM) | 176 | LJFOLD(LDEXP KNUM KNUM) |
176 | LJFOLD(MIN KNUM KNUM) | 177 | LJFOLD(MIN KNUM KNUM) |
177 | LJFOLD(MAX KNUM KNUM) | 178 | LJFOLD(MAX KNUM KNUM) |
@@ -183,6 +184,15 @@ LJFOLDF(kfold_numarith) | |||
183 | return lj_ir_knum(J, y); | 184 | return lj_ir_knum(J, y); |
184 | } | 185 | } |
185 | 186 | ||
187 | LJFOLD(NEG KNUM FLOAD) | ||
188 | LJFOLD(ABS KNUM FLOAD) | ||
189 | LJFOLDF(kfold_numabsneg) | ||
190 | { | ||
191 | lua_Number a = knumleft; | ||
192 | lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD); | ||
193 | return lj_ir_knum(J, y); | ||
194 | } | ||
195 | |||
186 | LJFOLD(LDEXP KNUM KINT) | 196 | LJFOLD(LDEXP KNUM KINT) |
187 | LJFOLDF(kfold_ldexp) | 197 | LJFOLDF(kfold_ldexp) |
188 | { | 198 | { |
@@ -202,13 +212,34 @@ LJFOLDF(kfold_fpmath) | |||
202 | return lj_ir_knum(J, y); | 212 | return lj_ir_knum(J, y); |
203 | } | 213 | } |
204 | 214 | ||
205 | LJFOLD(POW KNUM KINT) | 215 | LJFOLD(CALLN KNUM any) |
216 | LJFOLDF(kfold_fpcall1) | ||
217 | { | ||
218 | const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; | ||
219 | if (CCI_TYPE(ci) == IRT_NUM) { | ||
220 | double y = ((double (*)(double))ci->func)(knumleft); | ||
221 | return lj_ir_knum(J, y); | ||
222 | } | ||
223 | return NEXTFOLD; | ||
224 | } | ||
225 | |||
226 | LJFOLD(CALLN CARG IRCALL_atan2) | ||
227 | LJFOLDF(kfold_fpcall2) | ||
228 | { | ||
229 | if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { | ||
230 | const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; | ||
231 | double a = ir_knum(IR(fleft->op1))->n; | ||
232 | double b = ir_knum(IR(fleft->op2))->n; | ||
233 | double y = ((double (*)(double, double))ci->func)(a, b); | ||
234 | return lj_ir_knum(J, y); | ||
235 | } | ||
236 | return NEXTFOLD; | ||
237 | } | ||
238 | |||
239 | LJFOLD(POW KNUM KNUM) | ||
206 | LJFOLDF(kfold_numpow) | 240 | LJFOLDF(kfold_numpow) |
207 | { | 241 | { |
208 | lua_Number a = knumleft; | 242 | return lj_ir_knum(J, lj_vm_foldarith(knumleft, knumright, IR_POW - IR_ADD)); |
209 | lua_Number b = (lua_Number)fright->i; | ||
210 | lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD); | ||
211 | return lj_ir_knum(J, y); | ||
212 | } | 243 | } |
213 | 244 | ||
214 | /* Must not use kfold_kref for numbers (could be NaN). */ | 245 | /* Must not use kfold_kref for numbers (could be NaN). */ |
@@ -247,7 +278,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op) | |||
247 | case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break; | 278 | case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break; |
248 | case IR_MIN: k1 = k1 < k2 ? k1 : k2; break; | 279 | case IR_MIN: k1 = k1 < k2 ? k1 : k2; break; |
249 | case IR_MAX: k1 = k1 > k2 ? k1 : k2; break; | 280 | case IR_MAX: k1 = k1 > k2 ? k1 : k2; break; |
250 | default: lua_assert(0); break; | 281 | default: lj_assertX(0, "bad IR op %d", op); break; |
251 | } | 282 | } |
252 | return k1; | 283 | return k1; |
253 | } | 284 | } |
@@ -319,7 +350,7 @@ LJFOLDF(kfold_intcomp) | |||
319 | case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b); | 350 | case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b); |
320 | case IR_ABC: | 351 | case IR_ABC: |
321 | case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b); | 352 | case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b); |
322 | default: lua_assert(0); return FAILFOLD; | 353 | default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD; |
323 | } | 354 | } |
324 | } | 355 | } |
325 | 356 | ||
@@ -333,21 +364,29 @@ LJFOLDF(kfold_intcomp0) | |||
333 | 364 | ||
334 | /* -- Constant folding for 64 bit integers -------------------------------- */ | 365 | /* -- Constant folding for 64 bit integers -------------------------------- */ |
335 | 366 | ||
336 | static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) | 367 | static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2, |
368 | IROp op) | ||
337 | { | 369 | { |
370 | UNUSED(J); | ||
371 | #if LJ_HASFFI | ||
338 | switch (op) { | 372 | switch (op) { |
339 | #if LJ_64 || LJ_HASFFI | ||
340 | case IR_ADD: k1 += k2; break; | 373 | case IR_ADD: k1 += k2; break; |
341 | case IR_SUB: k1 -= k2; break; | 374 | case IR_SUB: k1 -= k2; break; |
342 | #endif | ||
343 | #if LJ_HASFFI | ||
344 | case IR_MUL: k1 *= k2; break; | 375 | case IR_MUL: k1 *= k2; break; |
345 | case IR_BAND: k1 &= k2; break; | 376 | case IR_BAND: k1 &= k2; break; |
346 | case IR_BOR: k1 |= k2; break; | 377 | case IR_BOR: k1 |= k2; break; |
347 | case IR_BXOR: k1 ^= k2; break; | 378 | case IR_BXOR: k1 ^= k2; break; |
348 | #endif | 379 | case IR_BSHL: k1 <<= (k2 & 63); break; |
349 | default: UNUSED(k2); lua_assert(0); break; | 380 | case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break; |
381 | case IR_BSAR: k1 >>= (k2 & 63); break; | ||
382 | case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break; | ||
383 | case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break; | ||
384 | default: lj_assertJ(0, "bad IR op %d", op); break; | ||
350 | } | 385 | } |
386 | #else | ||
387 | UNUSED(k2); UNUSED(op); | ||
388 | lj_assertJ(0, "FFI IR op without FFI"); | ||
389 | #endif | ||
351 | return k1; | 390 | return k1; |
352 | } | 391 | } |
353 | 392 | ||
@@ -359,7 +398,7 @@ LJFOLD(BOR KINT64 KINT64) | |||
359 | LJFOLD(BXOR KINT64 KINT64) | 398 | LJFOLD(BXOR KINT64 KINT64) |
360 | LJFOLDF(kfold_int64arith) | 399 | LJFOLDF(kfold_int64arith) |
361 | { | 400 | { |
362 | return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64, | 401 | return INT64FOLD(kfold_int64arith(J, ir_k64(fleft)->u64, |
363 | ir_k64(fright)->u64, (IROp)fins->o)); | 402 | ir_k64(fright)->u64, (IROp)fins->o)); |
364 | } | 403 | } |
365 | 404 | ||
@@ -381,7 +420,7 @@ LJFOLDF(kfold_int64arith2) | |||
381 | } | 420 | } |
382 | return INT64FOLD(k1); | 421 | return INT64FOLD(k1); |
383 | #else | 422 | #else |
384 | UNUSED(J); lua_assert(0); return FAILFOLD; | 423 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
385 | #endif | 424 | #endif |
386 | } | 425 | } |
387 | 426 | ||
@@ -392,22 +431,12 @@ LJFOLD(BROL KINT64 KINT) | |||
392 | LJFOLD(BROR KINT64 KINT) | 431 | LJFOLD(BROR KINT64 KINT) |
393 | LJFOLDF(kfold_int64shift) | 432 | LJFOLDF(kfold_int64shift) |
394 | { | 433 | { |
395 | #if LJ_HASFFI || LJ_64 | 434 | #if LJ_HASFFI |
396 | uint64_t k = ir_k64(fleft)->u64; | 435 | uint64_t k = ir_k64(fleft)->u64; |
397 | int32_t sh = (fright->i & 63); | 436 | int32_t sh = (fright->i & 63); |
398 | switch ((IROp)fins->o) { | 437 | return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL)); |
399 | case IR_BSHL: k <<= sh; break; | ||
400 | #if LJ_HASFFI | ||
401 | case IR_BSHR: k >>= sh; break; | ||
402 | case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break; | ||
403 | case IR_BROL: k = lj_rol(k, sh); break; | ||
404 | case IR_BROR: k = lj_ror(k, sh); break; | ||
405 | #endif | ||
406 | default: lua_assert(0); break; | ||
407 | } | ||
408 | return INT64FOLD(k); | ||
409 | #else | 438 | #else |
410 | UNUSED(J); lua_assert(0); return FAILFOLD; | 439 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
411 | #endif | 440 | #endif |
412 | } | 441 | } |
413 | 442 | ||
@@ -417,7 +446,7 @@ LJFOLDF(kfold_bnot64) | |||
417 | #if LJ_HASFFI | 446 | #if LJ_HASFFI |
418 | return INT64FOLD(~ir_k64(fleft)->u64); | 447 | return INT64FOLD(~ir_k64(fleft)->u64); |
419 | #else | 448 | #else |
420 | UNUSED(J); lua_assert(0); return FAILFOLD; | 449 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
421 | #endif | 450 | #endif |
422 | } | 451 | } |
423 | 452 | ||
@@ -427,7 +456,7 @@ LJFOLDF(kfold_bswap64) | |||
427 | #if LJ_HASFFI | 456 | #if LJ_HASFFI |
428 | return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64)); | 457 | return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64)); |
429 | #else | 458 | #else |
430 | UNUSED(J); lua_assert(0); return FAILFOLD; | 459 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
431 | #endif | 460 | #endif |
432 | } | 461 | } |
433 | 462 | ||
@@ -452,10 +481,10 @@ LJFOLDF(kfold_int64comp) | |||
452 | case IR_UGE: return CONDFOLD(a >= b); | 481 | case IR_UGE: return CONDFOLD(a >= b); |
453 | case IR_ULE: return CONDFOLD(a <= b); | 482 | case IR_ULE: return CONDFOLD(a <= b); |
454 | case IR_UGT: return CONDFOLD(a > b); | 483 | case IR_UGT: return CONDFOLD(a > b); |
455 | default: lua_assert(0); return FAILFOLD; | 484 | default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD; |
456 | } | 485 | } |
457 | #else | 486 | #else |
458 | UNUSED(J); lua_assert(0); return FAILFOLD; | 487 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
459 | #endif | 488 | #endif |
460 | } | 489 | } |
461 | 490 | ||
@@ -467,7 +496,7 @@ LJFOLDF(kfold_int64comp0) | |||
467 | return DROPFOLD; | 496 | return DROPFOLD; |
468 | return NEXTFOLD; | 497 | return NEXTFOLD; |
469 | #else | 498 | #else |
470 | UNUSED(J); lua_assert(0); return FAILFOLD; | 499 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
471 | #endif | 500 | #endif |
472 | } | 501 | } |
473 | 502 | ||
@@ -481,6 +510,7 @@ LJFOLDF(kfold_snew_kptr) | |||
481 | } | 510 | } |
482 | 511 | ||
483 | LJFOLD(SNEW any KINT) | 512 | LJFOLD(SNEW any KINT) |
513 | LJFOLD(XSNEW any KINT) | ||
484 | LJFOLDF(kfold_snew_empty) | 514 | LJFOLDF(kfold_snew_empty) |
485 | { | 515 | { |
486 | if (fright->i == 0) | 516 | if (fright->i == 0) |
@@ -492,7 +522,7 @@ LJFOLD(STRREF KGC KINT) | |||
492 | LJFOLDF(kfold_strref) | 522 | LJFOLDF(kfold_strref) |
493 | { | 523 | { |
494 | GCstr *str = ir_kstr(fleft); | 524 | GCstr *str = ir_kstr(fleft); |
495 | lua_assert((MSize)fright->i <= str->len); | 525 | lj_assertJ((MSize)fright->i <= str->len, "bad string ref"); |
496 | return lj_ir_kkptr(J, (char *)strdata(str) + fright->i); | 526 | return lj_ir_kkptr(J, (char *)strdata(str) + fright->i); |
497 | } | 527 | } |
498 | 528 | ||
@@ -510,7 +540,7 @@ LJFOLDF(kfold_strref_snew) | |||
510 | PHIBARRIER(ir); | 540 | PHIBARRIER(ir); |
511 | fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ | 541 | fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ |
512 | fins->op1 = str; | 542 | fins->op1 = str; |
513 | fins->ot = IRT(IR_STRREF, IRT_P32); | 543 | fins->ot = IRT(IR_STRREF, IRT_PGC); |
514 | return RETRYFOLD; | 544 | return RETRYFOLD; |
515 | } | 545 | } |
516 | } | 546 | } |
@@ -528,6 +558,211 @@ LJFOLDF(kfold_strcmp) | |||
528 | return NEXTFOLD; | 558 | return NEXTFOLD; |
529 | } | 559 | } |
530 | 560 | ||
561 | /* -- Constant folding and forwarding for buffers ------------------------- */ | ||
562 | |||
563 | /* | ||
564 | ** Buffer ops perform stores, but their effect is limited to the buffer | ||
565 | ** itself. Also, buffer ops are chained: a use of an op implies a use of | ||
566 | ** all other ops up the chain. Conversely, if an op is unused, all ops | ||
567 | ** up the chain can go unsed. This largely eliminates the need to treat | ||
568 | ** them as stores. | ||
569 | ** | ||
570 | ** Alas, treating them as normal (IRM_N) ops doesn't work, because they | ||
571 | ** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP | ||
572 | ** or if FOLD is disabled. | ||
573 | ** | ||
574 | ** The compromise is to declare them as loads, emit them like stores and | ||
575 | ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain | ||
576 | ** fragments left over from CSE are eliminated by DCE. | ||
577 | ** | ||
578 | ** The string buffer methods emit a USE instead of a BUFSTR to keep the | ||
579 | ** chain alive. | ||
580 | */ | ||
581 | |||
582 | LJFOLD(BUFHDR any any) | ||
583 | LJFOLDF(bufhdr_merge) | ||
584 | { | ||
585 | return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD; | ||
586 | } | ||
587 | |||
588 | LJFOLD(BUFPUT any BUFSTR) | ||
589 | LJFOLDF(bufput_bufstr) | ||
590 | { | ||
591 | if ((J->flags & JIT_F_OPT_FWD)) { | ||
592 | IRRef hdr = fright->op2; | ||
593 | /* New buffer, no other buffer op inbetween and same buffer? */ | ||
594 | if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET && | ||
595 | fleft->prev == hdr && | ||
596 | fleft->op1 == IR(hdr)->op1 && | ||
597 | !(irt_isphi(fright->t) && IR(hdr)->prev) && | ||
598 | (!LJ_HASBUFFER || J->chain[IR_CALLA] < hdr)) { | ||
599 | IRRef ref = fins->op1; | ||
600 | IR(ref)->op2 = IRBUFHDR_APPEND; /* Modify BUFHDR. */ | ||
601 | IR(ref)->op1 = fright->op1; | ||
602 | return ref; | ||
603 | } | ||
604 | /* Replay puts to global temporary buffer. */ | ||
605 | if (IR(hdr)->op2 == IRBUFHDR_RESET && !irt_isphi(fright->t)) { | ||
606 | IRIns *ir = IR(fright->op1); | ||
607 | /* For now only handle single string.reverse .lower .upper .rep. */ | ||
608 | if (ir->o == IR_CALLL && | ||
609 | ir->op2 >= IRCALL_lj_buf_putstr_reverse && | ||
610 | ir->op2 <= IRCALL_lj_buf_putstr_rep) { | ||
611 | IRIns *carg1 = IR(ir->op1); | ||
612 | if (ir->op2 == IRCALL_lj_buf_putstr_rep) { | ||
613 | IRIns *carg2 = IR(carg1->op1); | ||
614 | if (carg2->op1 == hdr) { | ||
615 | return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2); | ||
616 | } | ||
617 | } else if (carg1->op1 == hdr) { | ||
618 | return lj_ir_call(J, ir->op2, fins->op1, carg1->op2); | ||
619 | } | ||
620 | } | ||
621 | } | ||
622 | } | ||
623 | return EMITFOLD; /* Always emit, CSE later. */ | ||
624 | } | ||
625 | |||
626 | LJFOLD(BUFPUT any any) | ||
627 | LJFOLDF(bufput_kgc) | ||
628 | { | ||
629 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) { | ||
630 | GCstr *s2 = ir_kstr(fright); | ||
631 | if (s2->len == 0) { /* Empty string? */ | ||
632 | return LEFTFOLD; | ||
633 | } else { | ||
634 | if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) && | ||
635 | !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */ | ||
636 | GCstr *s1 = ir_kstr(IR(fleft->op2)); | ||
637 | IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2)); | ||
638 | /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */ | ||
639 | IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */ | ||
640 | return fins->op1; | ||
641 | } | ||
642 | } | ||
643 | } | ||
644 | return EMITFOLD; /* Always emit, CSE later. */ | ||
645 | } | ||
646 | |||
647 | LJFOLD(BUFSTR any any) | ||
648 | LJFOLDF(bufstr_kfold_cse) | ||
649 | { | ||
650 | lj_assertJ(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT || | ||
651 | fleft->o == IR_CALLL, | ||
652 | "bad buffer constructor IR op %d", fleft->o); | ||
653 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { | ||
654 | if (fleft->o == IR_BUFHDR) { /* No put operations? */ | ||
655 | if (fleft->op2 == IRBUFHDR_RESET) /* Empty buffer? */ | ||
656 | return lj_ir_kstr(J, &J2G(J)->strempty); | ||
657 | fins->op1 = fleft->op1; | ||
658 | fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */ | ||
659 | return CSEFOLD; | ||
660 | } else if (fleft->o == IR_BUFPUT) { | ||
661 | IRIns *irb = IR(fleft->op1); | ||
662 | if (irb->o == IR_BUFHDR && irb->op2 == IRBUFHDR_RESET) | ||
663 | return fleft->op2; /* Shortcut for a single put operation. */ | ||
664 | } | ||
665 | } | ||
666 | /* Try to CSE the whole chain. */ | ||
667 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
668 | IRRef ref = J->chain[IR_BUFSTR]; | ||
669 | while (ref) { | ||
670 | IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1); | ||
671 | while (ira->o == irb->o && ira->op2 == irb->op2) { | ||
672 | lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT || | ||
673 | ira->o == IR_CALLL || ira->o == IR_CARG, | ||
674 | "bad buffer constructor IR op %d", ira->o); | ||
675 | if (ira->o == IR_BUFHDR && ira->op2 == IRBUFHDR_RESET) | ||
676 | return ref; /* CSE succeeded. */ | ||
677 | if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab) | ||
678 | break; | ||
679 | ira = IR(ira->op1); | ||
680 | irb = IR(irb->op1); | ||
681 | } | ||
682 | ref = irs->prev; | ||
683 | } | ||
684 | } | ||
685 | return EMITFOLD; /* No CSE possible. */ | ||
686 | } | ||
687 | |||
688 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse) | ||
689 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper) | ||
690 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower) | ||
691 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted) | ||
692 | LJFOLDF(bufput_kfold_op) | ||
693 | { | ||
694 | if (irref_isk(fleft->op2)) { | ||
695 | const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; | ||
696 | SBuf *sb = lj_buf_tmp_(J->L); | ||
697 | sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb, | ||
698 | ir_kstr(IR(fleft->op2))); | ||
699 | fins->o = IR_BUFPUT; | ||
700 | fins->op1 = fleft->op1; | ||
701 | fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb)); | ||
702 | return RETRYFOLD; | ||
703 | } | ||
704 | return EMITFOLD; /* Always emit, CSE later. */ | ||
705 | } | ||
706 | |||
707 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep) | ||
708 | LJFOLDF(bufput_kfold_rep) | ||
709 | { | ||
710 | if (irref_isk(fleft->op2)) { | ||
711 | IRIns *irc = IR(fleft->op1); | ||
712 | if (irref_isk(irc->op2)) { | ||
713 | SBuf *sb = lj_buf_tmp_(J->L); | ||
714 | sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i); | ||
715 | fins->o = IR_BUFPUT; | ||
716 | fins->op1 = irc->op1; | ||
717 | fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb)); | ||
718 | return RETRYFOLD; | ||
719 | } | ||
720 | } | ||
721 | return EMITFOLD; /* Always emit, CSE later. */ | ||
722 | } | ||
723 | |||
724 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint) | ||
725 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int) | ||
726 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint) | ||
727 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum) | ||
728 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr) | ||
729 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar) | ||
730 | LJFOLDF(bufput_kfold_fmt) | ||
731 | { | ||
732 | IRIns *irc = IR(fleft->op1); | ||
733 | lj_assertJ(irref_isk(irc->op2), "SFormat must be const"); | ||
734 | if (irref_isk(fleft->op2)) { | ||
735 | SFormat sf = (SFormat)IR(irc->op2)->i; | ||
736 | IRIns *ira = IR(fleft->op2); | ||
737 | SBuf *sb = lj_buf_tmp_(J->L); | ||
738 | switch (fins->op2) { | ||
739 | case IRCALL_lj_strfmt_putfxint: | ||
740 | sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64); | ||
741 | break; | ||
742 | case IRCALL_lj_strfmt_putfstr: | ||
743 | sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira)); | ||
744 | break; | ||
745 | case IRCALL_lj_strfmt_putfchar: | ||
746 | sb = lj_strfmt_putfchar(sb, sf, ira->i); | ||
747 | break; | ||
748 | case IRCALL_lj_strfmt_putfnum_int: | ||
749 | case IRCALL_lj_strfmt_putfnum_uint: | ||
750 | case IRCALL_lj_strfmt_putfnum: | ||
751 | default: { | ||
752 | const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; | ||
753 | sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf, | ||
754 | ir_knum(ira)->n); | ||
755 | break; | ||
756 | } | ||
757 | } | ||
758 | fins->o = IR_BUFPUT; | ||
759 | fins->op1 = irc->op1; | ||
760 | fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb)); | ||
761 | return RETRYFOLD; | ||
762 | } | ||
763 | return EMITFOLD; /* Always emit, CSE later. */ | ||
764 | } | ||
765 | |||
531 | /* -- Constant folding of pointer arithmetic ------------------------------ */ | 766 | /* -- Constant folding of pointer arithmetic ------------------------------ */ |
532 | 767 | ||
533 | LJFOLD(ADD KGC KINT) | 768 | LJFOLD(ADD KGC KINT) |
@@ -648,21 +883,17 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM) | |||
648 | LJFOLDF(kfold_conv_knum_int_num) | 883 | LJFOLDF(kfold_conv_knum_int_num) |
649 | { | 884 | { |
650 | lua_Number n = knumleft; | 885 | lua_Number n = knumleft; |
651 | if (!(fins->op2 & IRCONV_TRUNC)) { | 886 | int32_t k = lj_num2int(n); |
652 | int32_t k = lj_num2int(n); | 887 | if (irt_isguard(fins->t) && n != (lua_Number)k) { |
653 | if (irt_isguard(fins->t) && n != (lua_Number)k) { | 888 | /* We're about to create a guard which always fails, like CONV +1.5. |
654 | /* We're about to create a guard which always fails, like CONV +1.5. | 889 | ** Some pathological loops cause this during LICM, e.g.: |
655 | ** Some pathological loops cause this during LICM, e.g.: | 890 | ** local x,k,t = 0,1.5,{1,[1.5]=2} |
656 | ** local x,k,t = 0,1.5,{1,[1.5]=2} | 891 | ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end |
657 | ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end | 892 | ** assert(x == 300) |
658 | ** assert(x == 300) | 893 | */ |
659 | */ | 894 | return FAILFOLD; |
660 | return FAILFOLD; | ||
661 | } | ||
662 | return INTFOLD(k); | ||
663 | } else { | ||
664 | return INTFOLD((int32_t)n); | ||
665 | } | 895 | } |
896 | return INTFOLD(k); | ||
666 | } | 897 | } |
667 | 898 | ||
668 | LJFOLD(CONV KNUM IRCONV_U32_NUM) | 899 | LJFOLD(CONV KNUM IRCONV_U32_NUM) |
@@ -690,16 +921,18 @@ LJFOLDF(kfold_conv_knum_u64_num) | |||
690 | return INT64FOLD(lj_num2u64(knumleft)); | 921 | return INT64FOLD(lj_num2u64(knumleft)); |
691 | } | 922 | } |
692 | 923 | ||
693 | LJFOLD(TOSTR KNUM) | 924 | LJFOLD(TOSTR KNUM any) |
694 | LJFOLDF(kfold_tostr_knum) | 925 | LJFOLDF(kfold_tostr_knum) |
695 | { | 926 | { |
696 | return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); | 927 | return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft))); |
697 | } | 928 | } |
698 | 929 | ||
699 | LJFOLD(TOSTR KINT) | 930 | LJFOLD(TOSTR KINT any) |
700 | LJFOLDF(kfold_tostr_kint) | 931 | LJFOLDF(kfold_tostr_kint) |
701 | { | 932 | { |
702 | return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); | 933 | return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ? |
934 | lj_strfmt_int(J->L, fleft->i) : | ||
935 | lj_strfmt_char(J->L, fleft->i)); | ||
703 | } | 936 | } |
704 | 937 | ||
705 | LJFOLD(STRTO KGC) | 938 | LJFOLD(STRTO KGC) |
@@ -747,13 +980,13 @@ LJFOLDF(shortcut_round) | |||
747 | return NEXTFOLD; | 980 | return NEXTFOLD; |
748 | } | 981 | } |
749 | 982 | ||
750 | LJFOLD(ABS ABS KNUM) | 983 | LJFOLD(ABS ABS FLOAD) |
751 | LJFOLDF(shortcut_left) | 984 | LJFOLDF(shortcut_left) |
752 | { | 985 | { |
753 | return LEFTFOLD; /* f(g(x)) ==> g(x) */ | 986 | return LEFTFOLD; /* f(g(x)) ==> g(x) */ |
754 | } | 987 | } |
755 | 988 | ||
756 | LJFOLD(ABS NEG KNUM) | 989 | LJFOLD(ABS NEG FLOAD) |
757 | LJFOLDF(shortcut_dropleft) | 990 | LJFOLDF(shortcut_dropleft) |
758 | { | 991 | { |
759 | PHIBARRIER(fleft); | 992 | PHIBARRIER(fleft); |
@@ -833,8 +1066,10 @@ LJFOLDF(simplify_nummuldiv_k) | |||
833 | if (n == 1.0) { /* x o 1 ==> x */ | 1066 | if (n == 1.0) { /* x o 1 ==> x */ |
834 | return LEFTFOLD; | 1067 | return LEFTFOLD; |
835 | } else if (n == -1.0) { /* x o -1 ==> -x */ | 1068 | } else if (n == -1.0) { /* x o -1 ==> -x */ |
1069 | IRRef op1 = fins->op1; | ||
1070 | fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */ | ||
1071 | fins->op1 = op1; | ||
836 | fins->o = IR_NEG; | 1072 | fins->o = IR_NEG; |
837 | fins->op2 = (IRRef1)lj_ir_knum_neg(J); | ||
838 | return RETRYFOLD; | 1073 | return RETRYFOLD; |
839 | } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ | 1074 | } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ |
840 | fins->o = IR_ADD; | 1075 | fins->o = IR_ADD; |
@@ -874,52 +1109,17 @@ LJFOLDF(simplify_nummuldiv_negneg) | |||
874 | return RETRYFOLD; | 1109 | return RETRYFOLD; |
875 | } | 1110 | } |
876 | 1111 | ||
877 | LJFOLD(POW any KINT) | 1112 | LJFOLD(POW any KNUM) |
878 | LJFOLDF(simplify_numpow_xk) | 1113 | LJFOLDF(simplify_numpow_k) |
879 | { | 1114 | { |
880 | int32_t k = fright->i; | 1115 | if (knumright == 0.0) /* x ^ 0 ==> 1 */ |
881 | TRef ref = fins->op1; | ||
882 | if (k == 0) /* x ^ 0 ==> 1 */ | ||
883 | return lj_ir_knum_one(J); /* Result must be a number, not an int. */ | 1116 | return lj_ir_knum_one(J); /* Result must be a number, not an int. */ |
884 | if (k == 1) /* x ^ 1 ==> x */ | 1117 | else if (knumright == 1.0) /* x ^ 1 ==> x */ |
885 | return LEFTFOLD; | 1118 | return LEFTFOLD; |
886 | if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */ | 1119 | else if (knumright == 2.0) /* x ^ 2 ==> x * x */ |
1120 | return emitir(IRTN(IR_MUL), fins->op1, fins->op1); | ||
1121 | else | ||
887 | return NEXTFOLD; | 1122 | return NEXTFOLD; |
888 | if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */ | ||
889 | ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref); | ||
890 | k = -k; | ||
891 | } | ||
892 | /* Unroll x^k for 1 <= k <= 65536. */ | ||
893 | for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */ | ||
894 | ref = emitir(IRTN(IR_MUL), ref, ref); | ||
895 | if ((k >>= 1) != 0) { /* Handle trailing bits. */ | ||
896 | TRef tmp = emitir(IRTN(IR_MUL), ref, ref); | ||
897 | for (; k != 1; k >>= 1) { | ||
898 | if (k & 1) | ||
899 | ref = emitir(IRTN(IR_MUL), ref, tmp); | ||
900 | tmp = emitir(IRTN(IR_MUL), tmp, tmp); | ||
901 | } | ||
902 | ref = emitir(IRTN(IR_MUL), ref, tmp); | ||
903 | } | ||
904 | return ref; | ||
905 | } | ||
906 | |||
907 | LJFOLD(POW KNUM any) | ||
908 | LJFOLDF(simplify_numpow_kx) | ||
909 | { | ||
910 | lua_Number n = knumleft; | ||
911 | if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */ | ||
912 | fins->o = IR_CONV; | ||
913 | #if LJ_TARGET_X86ORX64 | ||
914 | fins->op1 = fins->op2; | ||
915 | fins->op2 = IRCONV_NUM_INT; | ||
916 | fins->op2 = (IRRef1)lj_opt_fold(J); | ||
917 | #endif | ||
918 | fins->op1 = (IRRef1)lj_ir_knum_one(J); | ||
919 | fins->o = IR_LDEXP; | ||
920 | return RETRYFOLD; | ||
921 | } | ||
922 | return NEXTFOLD; | ||
923 | } | 1123 | } |
924 | 1124 | ||
925 | /* -- Simplify conversions ------------------------------------------------ */ | 1125 | /* -- Simplify conversions ------------------------------------------------ */ |
@@ -1004,10 +1204,10 @@ LJFOLDF(simplify_tobit_conv) | |||
1004 | { | 1204 | { |
1005 | /* Fold even across PHI to avoid expensive num->int conversions in loop. */ | 1205 | /* Fold even across PHI to avoid expensive num->int conversions in loop. */ |
1006 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) { | 1206 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) { |
1007 | lua_assert(irt_isnum(fleft->t)); | 1207 | lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg"); |
1008 | return fleft->op1; | 1208 | return fleft->op1; |
1009 | } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { | 1209 | } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { |
1010 | lua_assert(irt_isnum(fleft->t)); | 1210 | lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg"); |
1011 | fins->o = IR_CONV; | 1211 | fins->o = IR_CONV; |
1012 | fins->op1 = fleft->op1; | 1212 | fins->op1 = fleft->op1; |
1013 | fins->op2 = (IRT_INT<<5)|IRT_U32; | 1213 | fins->op2 = (IRT_INT<<5)|IRT_U32; |
@@ -1047,7 +1247,7 @@ LJFOLDF(simplify_conv_sext) | |||
1047 | /* Use scalar evolution analysis results to strength-reduce sign-extension. */ | 1247 | /* Use scalar evolution analysis results to strength-reduce sign-extension. */ |
1048 | if (ref == J->scev.idx) { | 1248 | if (ref == J->scev.idx) { |
1049 | IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; | 1249 | IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; |
1050 | lua_assert(irt_isint(J->scev.t)); | 1250 | lj_assertJ(irt_isint(J->scev.t), "only int SCEV supported"); |
1051 | if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { | 1251 | if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { |
1052 | ok_reduce: | 1252 | ok_reduce: |
1053 | #if LJ_TARGET_X64 | 1253 | #if LJ_TARGET_X64 |
@@ -1078,6 +1278,10 @@ LJFOLD(CONV SUB IRCONV_U32_U64) | |||
1078 | LJFOLD(CONV MUL IRCONV_U32_U64) | 1278 | LJFOLD(CONV MUL IRCONV_U32_U64) |
1079 | LJFOLDF(simplify_conv_narrow) | 1279 | LJFOLDF(simplify_conv_narrow) |
1080 | { | 1280 | { |
1281 | #if LJ_64 | ||
1282 | UNUSED(J); | ||
1283 | return NEXTFOLD; | ||
1284 | #else | ||
1081 | IROp op = (IROp)fleft->o; | 1285 | IROp op = (IROp)fleft->o; |
1082 | IRType t = irt_type(fins->t); | 1286 | IRType t = irt_type(fins->t); |
1083 | IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2; | 1287 | IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2; |
@@ -1088,6 +1292,7 @@ LJFOLDF(simplify_conv_narrow) | |||
1088 | fins->op1 = op1; | 1292 | fins->op1 = op1; |
1089 | fins->op2 = op2; | 1293 | fins->op2 = op2; |
1090 | return RETRYFOLD; | 1294 | return RETRYFOLD; |
1295 | #endif | ||
1091 | } | 1296 | } |
1092 | 1297 | ||
1093 | /* Special CSE rule for CONV. */ | 1298 | /* Special CSE rule for CONV. */ |
@@ -1123,7 +1328,8 @@ LJFOLDF(narrow_convert) | |||
1123 | /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */ | 1328 | /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */ |
1124 | if (J->chain[IR_LOOP]) | 1329 | if (J->chain[IR_LOOP]) |
1125 | return NEXTFOLD; | 1330 | return NEXTFOLD; |
1126 | lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT); | 1331 | lj_assertJ(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT, |
1332 | "unexpected CONV TOBIT"); | ||
1127 | return lj_opt_narrow_convert(J); | 1333 | return lj_opt_narrow_convert(J); |
1128 | } | 1334 | } |
1129 | 1335 | ||
@@ -1201,7 +1407,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k) | |||
1201 | ** But this is mainly intended for simple address arithmetic. | 1407 | ** But this is mainly intended for simple address arithmetic. |
1202 | ** Also it's easier for the backend to optimize the original multiplies. | 1408 | ** Also it's easier for the backend to optimize the original multiplies. |
1203 | */ | 1409 | */ |
1204 | if (k == 1) { /* i * 1 ==> i */ | 1410 | if (k == 0) { /* i * 0 ==> 0 */ |
1411 | return RIGHTFOLD; | ||
1412 | } else if (k == 1) { /* i * 1 ==> i */ | ||
1205 | return LEFTFOLD; | 1413 | return LEFTFOLD; |
1206 | } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ | 1414 | } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ |
1207 | fins->o = IR_BSHL; | 1415 | fins->o = IR_BSHL; |
@@ -1214,9 +1422,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k) | |||
1214 | LJFOLD(MUL any KINT) | 1422 | LJFOLD(MUL any KINT) |
1215 | LJFOLDF(simplify_intmul_k32) | 1423 | LJFOLDF(simplify_intmul_k32) |
1216 | { | 1424 | { |
1217 | if (fright->i == 0) /* i * 0 ==> 0 */ | 1425 | if (fright->i >= 0) |
1218 | return INTFOLD(0); | ||
1219 | else if (fright->i > 0) | ||
1220 | return simplify_intmul_k(J, fright->i); | 1426 | return simplify_intmul_k(J, fright->i); |
1221 | return NEXTFOLD; | 1427 | return NEXTFOLD; |
1222 | } | 1428 | } |
@@ -1224,21 +1430,20 @@ LJFOLDF(simplify_intmul_k32) | |||
1224 | LJFOLD(MUL any KINT64) | 1430 | LJFOLD(MUL any KINT64) |
1225 | LJFOLDF(simplify_intmul_k64) | 1431 | LJFOLDF(simplify_intmul_k64) |
1226 | { | 1432 | { |
1227 | if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ | 1433 | #if LJ_HASFFI |
1228 | return INT64FOLD(0); | 1434 | if (ir_kint64(fright)->u64 < 0x80000000u) |
1229 | #if LJ_64 | ||
1230 | /* NYI: SPLIT for BSHL and 32 bit backend support. */ | ||
1231 | else if (ir_kint64(fright)->u64 < 0x80000000u) | ||
1232 | return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); | 1435 | return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); |
1233 | #endif | ||
1234 | return NEXTFOLD; | 1436 | return NEXTFOLD; |
1437 | #else | ||
1438 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; | ||
1439 | #endif | ||
1235 | } | 1440 | } |
1236 | 1441 | ||
1237 | LJFOLD(MOD any KINT) | 1442 | LJFOLD(MOD any KINT) |
1238 | LJFOLDF(simplify_intmod_k) | 1443 | LJFOLDF(simplify_intmod_k) |
1239 | { | 1444 | { |
1240 | int32_t k = fright->i; | 1445 | int32_t k = fright->i; |
1241 | lua_assert(k != 0); | 1446 | lj_assertJ(k != 0, "integer mod 0"); |
1242 | if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */ | 1447 | if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */ |
1243 | fins->o = IR_BAND; | 1448 | fins->o = IR_BAND; |
1244 | fins->op2 = lj_ir_kint(J, k-1); | 1449 | fins->op2 = lj_ir_kint(J, k-1); |
@@ -1487,6 +1692,15 @@ LJFOLDF(simplify_shiftk_andk) | |||
1487 | fins->op2 = (IRRef1)lj_ir_kint(J, k); | 1692 | fins->op2 = (IRRef1)lj_ir_kint(J, k); |
1488 | fins->ot = IRTI(IR_BAND); | 1693 | fins->ot = IRTI(IR_BAND); |
1489 | return RETRYFOLD; | 1694 | return RETRYFOLD; |
1695 | } else if (irk->o == IR_KINT64) { | ||
1696 | uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, fright->i, | ||
1697 | (IROp)fins->o); | ||
1698 | IROpT ot = fleft->ot; | ||
1699 | fins->op1 = fleft->op1; | ||
1700 | fins->op1 = (IRRef1)lj_opt_fold(J); | ||
1701 | fins->op2 = (IRRef1)lj_ir_kint64(J, k); | ||
1702 | fins->ot = ot; | ||
1703 | return RETRYFOLD; | ||
1490 | } | 1704 | } |
1491 | return NEXTFOLD; | 1705 | return NEXTFOLD; |
1492 | } | 1706 | } |
@@ -1502,6 +1716,47 @@ LJFOLDF(simplify_andk_shiftk) | |||
1502 | return NEXTFOLD; | 1716 | return NEXTFOLD; |
1503 | } | 1717 | } |
1504 | 1718 | ||
1719 | LJFOLD(BAND BOR KINT) | ||
1720 | LJFOLD(BOR BAND KINT) | ||
1721 | LJFOLDF(simplify_andor_k) | ||
1722 | { | ||
1723 | IRIns *irk = IR(fleft->op2); | ||
1724 | PHIBARRIER(fleft); | ||
1725 | if (irk->o == IR_KINT) { | ||
1726 | int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o); | ||
1727 | /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ | ||
1728 | /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ | ||
1729 | if (k == (fins->o == IR_BAND ? 0 : -1)) { | ||
1730 | fins->op1 = fleft->op1; | ||
1731 | return RETRYFOLD; | ||
1732 | } | ||
1733 | } | ||
1734 | return NEXTFOLD; | ||
1735 | } | ||
1736 | |||
1737 | LJFOLD(BAND BOR KINT64) | ||
1738 | LJFOLD(BOR BAND KINT64) | ||
1739 | LJFOLDF(simplify_andor_k64) | ||
1740 | { | ||
1741 | #if LJ_HASFFI | ||
1742 | IRIns *irk = IR(fleft->op2); | ||
1743 | PHIBARRIER(fleft); | ||
1744 | if (irk->o == IR_KINT64) { | ||
1745 | uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64, | ||
1746 | (IROp)fins->o); | ||
1747 | /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ | ||
1748 | /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ | ||
1749 | if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) { | ||
1750 | fins->op1 = fleft->op1; | ||
1751 | return RETRYFOLD; | ||
1752 | } | ||
1753 | } | ||
1754 | return NEXTFOLD; | ||
1755 | #else | ||
1756 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; | ||
1757 | #endif | ||
1758 | } | ||
1759 | |||
1505 | /* -- Reassociation ------------------------------------------------------- */ | 1760 | /* -- Reassociation ------------------------------------------------------- */ |
1506 | 1761 | ||
1507 | LJFOLD(ADD ADD KINT) | 1762 | LJFOLD(ADD ADD KINT) |
@@ -1531,11 +1786,11 @@ LJFOLD(BOR BOR KINT64) | |||
1531 | LJFOLD(BXOR BXOR KINT64) | 1786 | LJFOLD(BXOR BXOR KINT64) |
1532 | LJFOLDF(reassoc_intarith_k64) | 1787 | LJFOLDF(reassoc_intarith_k64) |
1533 | { | 1788 | { |
1534 | #if LJ_HASFFI || LJ_64 | 1789 | #if LJ_HASFFI |
1535 | IRIns *irk = IR(fleft->op2); | 1790 | IRIns *irk = IR(fleft->op2); |
1536 | if (irk->o == IR_KINT64) { | 1791 | if (irk->o == IR_KINT64) { |
1537 | uint64_t k = kfold_int64arith(ir_k64(irk)->u64, | 1792 | uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64, |
1538 | ir_k64(fright)->u64, (IROp)fins->o); | 1793 | (IROp)fins->o); |
1539 | PHIBARRIER(fleft); | 1794 | PHIBARRIER(fleft); |
1540 | fins->op1 = fleft->op1; | 1795 | fins->op1 = fleft->op1; |
1541 | fins->op2 = (IRRef1)lj_ir_kint64(J, k); | 1796 | fins->op2 = (IRRef1)lj_ir_kint64(J, k); |
@@ -1543,12 +1798,10 @@ LJFOLDF(reassoc_intarith_k64) | |||
1543 | } | 1798 | } |
1544 | return NEXTFOLD; | 1799 | return NEXTFOLD; |
1545 | #else | 1800 | #else |
1546 | UNUSED(J); lua_assert(0); return FAILFOLD; | 1801 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
1547 | #endif | 1802 | #endif |
1548 | } | 1803 | } |
1549 | 1804 | ||
1550 | LJFOLD(MIN MIN any) | ||
1551 | LJFOLD(MAX MAX any) | ||
1552 | LJFOLD(BAND BAND any) | 1805 | LJFOLD(BAND BAND any) |
1553 | LJFOLD(BOR BOR any) | 1806 | LJFOLD(BOR BOR any) |
1554 | LJFOLDF(reassoc_dup) | 1807 | LJFOLDF(reassoc_dup) |
@@ -1558,6 +1811,15 @@ LJFOLDF(reassoc_dup) | |||
1558 | return NEXTFOLD; | 1811 | return NEXTFOLD; |
1559 | } | 1812 | } |
1560 | 1813 | ||
1814 | LJFOLD(MIN MIN any) | ||
1815 | LJFOLD(MAX MAX any) | ||
1816 | LJFOLDF(reassoc_dup_minmax) | ||
1817 | { | ||
1818 | if (fins->op2 == fleft->op2) | ||
1819 | return LEFTFOLD; /* (a o b) o b ==> a o b */ | ||
1820 | return NEXTFOLD; | ||
1821 | } | ||
1822 | |||
1561 | LJFOLD(BXOR BXOR any) | 1823 | LJFOLD(BXOR BXOR any) |
1562 | LJFOLDF(reassoc_bxor) | 1824 | LJFOLDF(reassoc_bxor) |
1563 | { | 1825 | { |
@@ -1596,23 +1858,12 @@ LJFOLDF(reassoc_shift) | |||
1596 | return NEXTFOLD; | 1858 | return NEXTFOLD; |
1597 | } | 1859 | } |
1598 | 1860 | ||
1599 | LJFOLD(MIN MIN KNUM) | ||
1600 | LJFOLD(MAX MAX KNUM) | ||
1601 | LJFOLD(MIN MIN KINT) | 1861 | LJFOLD(MIN MIN KINT) |
1602 | LJFOLD(MAX MAX KINT) | 1862 | LJFOLD(MAX MAX KINT) |
1603 | LJFOLDF(reassoc_minmax_k) | 1863 | LJFOLDF(reassoc_minmax_k) |
1604 | { | 1864 | { |
1605 | IRIns *irk = IR(fleft->op2); | 1865 | IRIns *irk = IR(fleft->op2); |
1606 | if (irk->o == IR_KNUM) { | 1866 | if (irk->o == IR_KINT) { |
1607 | lua_Number a = ir_knum(irk)->n; | ||
1608 | lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD); | ||
1609 | if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ | ||
1610 | return LEFTFOLD; | ||
1611 | PHIBARRIER(fleft); | ||
1612 | fins->op1 = fleft->op1; | ||
1613 | fins->op2 = (IRRef1)lj_ir_knum(J, y); | ||
1614 | return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */ | ||
1615 | } else if (irk->o == IR_KINT) { | ||
1616 | int32_t a = irk->i; | 1867 | int32_t a = irk->i; |
1617 | int32_t y = kfold_intop(a, fright->i, fins->o); | 1868 | int32_t y = kfold_intop(a, fright->i, fins->o); |
1618 | if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ | 1869 | if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ |
@@ -1625,24 +1876,6 @@ LJFOLDF(reassoc_minmax_k) | |||
1625 | return NEXTFOLD; | 1876 | return NEXTFOLD; |
1626 | } | 1877 | } |
1627 | 1878 | ||
1628 | LJFOLD(MIN MAX any) | ||
1629 | LJFOLD(MAX MIN any) | ||
1630 | LJFOLDF(reassoc_minmax_left) | ||
1631 | { | ||
1632 | if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2) | ||
1633 | return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */ | ||
1634 | return NEXTFOLD; | ||
1635 | } | ||
1636 | |||
1637 | LJFOLD(MIN any MAX) | ||
1638 | LJFOLD(MAX any MIN) | ||
1639 | LJFOLDF(reassoc_minmax_right) | ||
1640 | { | ||
1641 | if (fins->op1 == fright->op1 || fins->op1 == fright->op2) | ||
1642 | return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */ | ||
1643 | return NEXTFOLD; | ||
1644 | } | ||
1645 | |||
1646 | /* -- Array bounds check elimination -------------------------------------- */ | 1879 | /* -- Array bounds check elimination -------------------------------------- */ |
1647 | 1880 | ||
1648 | /* Eliminate ABC across PHIs to handle t[i-1] forwarding case. | 1881 | /* Eliminate ABC across PHIs to handle t[i-1] forwarding case. |
@@ -1769,8 +2002,6 @@ LJFOLDF(comm_comp) | |||
1769 | 2002 | ||
1770 | LJFOLD(BAND any any) | 2003 | LJFOLD(BAND any any) |
1771 | LJFOLD(BOR any any) | 2004 | LJFOLD(BOR any any) |
1772 | LJFOLD(MIN any any) | ||
1773 | LJFOLD(MAX any any) | ||
1774 | LJFOLDF(comm_dup) | 2005 | LJFOLDF(comm_dup) |
1775 | { | 2006 | { |
1776 | if (fins->op1 == fins->op2) /* x o x ==> x */ | 2007 | if (fins->op1 == fins->op2) /* x o x ==> x */ |
@@ -1778,6 +2009,15 @@ LJFOLDF(comm_dup) | |||
1778 | return fold_comm_swap(J); | 2009 | return fold_comm_swap(J); |
1779 | } | 2010 | } |
1780 | 2011 | ||
2012 | LJFOLD(MIN any any) | ||
2013 | LJFOLD(MAX any any) | ||
2014 | LJFOLDF(comm_dup_minmax) | ||
2015 | { | ||
2016 | if (fins->op1 == fins->op2) /* x o x ==> x */ | ||
2017 | return LEFTFOLD; | ||
2018 | return NEXTFOLD; | ||
2019 | } | ||
2020 | |||
1781 | LJFOLD(BXOR any any) | 2021 | LJFOLD(BXOR any any) |
1782 | LJFOLDF(comm_bxor) | 2022 | LJFOLDF(comm_bxor) |
1783 | { | 2023 | { |
@@ -1814,7 +2054,7 @@ LJFOLDF(merge_eqne_snew_kgc) | |||
1814 | { | 2054 | { |
1815 | GCstr *kstr = ir_kstr(fright); | 2055 | GCstr *kstr = ir_kstr(fright); |
1816 | int32_t len = (int32_t)kstr->len; | 2056 | int32_t len = (int32_t)kstr->len; |
1817 | lua_assert(irt_isstr(fins->t)); | 2057 | lj_assertJ(irt_isstr(fins->t), "bad equality IR type"); |
1818 | 2058 | ||
1819 | #if LJ_TARGET_UNALIGNED | 2059 | #if LJ_TARGET_UNALIGNED |
1820 | #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */ | 2060 | #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */ |
@@ -1878,7 +2118,7 @@ LJFOLD(HLOAD KKPTR) | |||
1878 | LJFOLDF(kfold_hload_kkptr) | 2118 | LJFOLDF(kfold_hload_kkptr) |
1879 | { | 2119 | { |
1880 | UNUSED(J); | 2120 | UNUSED(J); |
1881 | lua_assert(ir_kptr(fleft) == niltvg(J2G(J))); | 2121 | lj_assertJ(ir_kptr(fleft) == niltvg(J2G(J)), "expected niltv"); |
1882 | return TREF_NIL; | 2122 | return TREF_NIL; |
1883 | } | 2123 | } |
1884 | 2124 | ||
@@ -1888,8 +2128,8 @@ LJFOLDX(lj_opt_fwd_hload) | |||
1888 | LJFOLD(ULOAD any) | 2128 | LJFOLD(ULOAD any) |
1889 | LJFOLDX(lj_opt_fwd_uload) | 2129 | LJFOLDX(lj_opt_fwd_uload) |
1890 | 2130 | ||
1891 | LJFOLD(CALLL any IRCALL_lj_tab_len) | 2131 | LJFOLD(ALEN any any) |
1892 | LJFOLDX(lj_opt_fwd_tab_len) | 2132 | LJFOLDX(lj_opt_fwd_alen) |
1893 | 2133 | ||
1894 | /* Upvalue refs are really loads, but there are no corresponding stores. | 2134 | /* Upvalue refs are really loads, but there are no corresponding stores. |
1895 | ** So CSE is ok for them, except for UREFO across a GC step (see below). | 2135 | ** So CSE is ok for them, except for UREFO across a GC step (see below). |
@@ -1950,6 +2190,7 @@ LJFOLDF(fwd_href_tdup) | |||
1950 | ** an aliased table, as it may invalidate all of the pointers and fields. | 2190 | ** an aliased table, as it may invalidate all of the pointers and fields. |
1951 | ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on | 2191 | ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on |
1952 | ** FLOADs. And NEWREF itself is treated like a store (see below). | 2192 | ** FLOADs. And NEWREF itself is treated like a store (see below). |
2193 | ** LREF is constant (per trace) since coroutine switches are not inlined. | ||
1953 | */ | 2194 | */ |
1954 | LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) | 2195 | LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) |
1955 | LJFOLDF(fload_tab_tnew_asize) | 2196 | LJFOLDF(fload_tab_tnew_asize) |
@@ -2013,6 +2254,35 @@ LJFOLDF(fload_str_len_snew) | |||
2013 | return NEXTFOLD; | 2254 | return NEXTFOLD; |
2014 | } | 2255 | } |
2015 | 2256 | ||
2257 | LJFOLD(FLOAD TOSTR IRFL_STR_LEN) | ||
2258 | LJFOLDF(fload_str_len_tostr) | ||
2259 | { | ||
2260 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR) | ||
2261 | return INTFOLD(1); | ||
2262 | return NEXTFOLD; | ||
2263 | } | ||
2264 | |||
2265 | LJFOLD(FLOAD any IRFL_SBUF_W) | ||
2266 | LJFOLD(FLOAD any IRFL_SBUF_E) | ||
2267 | LJFOLD(FLOAD any IRFL_SBUF_B) | ||
2268 | LJFOLD(FLOAD any IRFL_SBUF_L) | ||
2269 | LJFOLD(FLOAD any IRFL_SBUF_REF) | ||
2270 | LJFOLD(FLOAD any IRFL_SBUF_R) | ||
2271 | LJFOLDF(fload_sbuf) | ||
2272 | { | ||
2273 | TRef tr = lj_opt_fwd_fload(J); | ||
2274 | return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD; | ||
2275 | } | ||
2276 | |||
2277 | /* The fast function ID of function objects is immutable. */ | ||
2278 | LJFOLD(FLOAD KGC IRFL_FUNC_FFID) | ||
2279 | LJFOLDF(fload_func_ffid_kgc) | ||
2280 | { | ||
2281 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) | ||
2282 | return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid); | ||
2283 | return NEXTFOLD; | ||
2284 | } | ||
2285 | |||
2016 | /* The C type ID of cdata objects is immutable. */ | 2286 | /* The C type ID of cdata objects is immutable. */ |
2017 | LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) | 2287 | LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) |
2018 | LJFOLDF(fload_cdata_typeid_kgc) | 2288 | LJFOLDF(fload_cdata_typeid_kgc) |
@@ -2059,6 +2329,8 @@ LJFOLDF(fload_cdata_ptr_int64_cnew) | |||
2059 | } | 2329 | } |
2060 | 2330 | ||
2061 | LJFOLD(FLOAD any IRFL_STR_LEN) | 2331 | LJFOLD(FLOAD any IRFL_STR_LEN) |
2332 | LJFOLD(FLOAD any IRFL_FUNC_ENV) | ||
2333 | LJFOLD(FLOAD any IRFL_THREAD_ENV) | ||
2062 | LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) | 2334 | LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) |
2063 | LJFOLD(FLOAD any IRFL_CDATA_PTR) | 2335 | LJFOLD(FLOAD any IRFL_CDATA_PTR) |
2064 | LJFOLD(FLOAD any IRFL_CDATA_INT) | 2336 | LJFOLD(FLOAD any IRFL_CDATA_INT) |
@@ -2078,7 +2350,7 @@ LJFOLDF(fwd_sload) | |||
2078 | TRef tr = lj_opt_cse(J); | 2350 | TRef tr = lj_opt_cse(J); |
2079 | return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr; | 2351 | return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr; |
2080 | } else { | 2352 | } else { |
2081 | lua_assert(J->slot[fins->op1] != 0); | 2353 | lj_assertJ(J->slot[fins->op1] != 0, "uninitialized slot accessed"); |
2082 | return J->slot[fins->op1]; | 2354 | return J->slot[fins->op1]; |
2083 | } | 2355 | } |
2084 | } | 2356 | } |
@@ -2135,6 +2407,17 @@ LJFOLDF(barrier_tnew_tdup) | |||
2135 | return DROPFOLD; | 2407 | return DROPFOLD; |
2136 | } | 2408 | } |
2137 | 2409 | ||
2410 | /* -- Profiling ----------------------------------------------------------- */ | ||
2411 | |||
2412 | LJFOLD(PROF any any) | ||
2413 | LJFOLDF(prof) | ||
2414 | { | ||
2415 | IRRef ref = J->chain[IR_PROF]; | ||
2416 | if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */ | ||
2417 | return ref; | ||
2418 | return EMITFOLD; | ||
2419 | } | ||
2420 | |||
2138 | /* -- Stores and allocations ---------------------------------------------- */ | 2421 | /* -- Stores and allocations ---------------------------------------------- */ |
2139 | 2422 | ||
2140 | /* Stores and allocations cannot be folded or passed on to CSE in general. | 2423 | /* Stores and allocations cannot be folded or passed on to CSE in general. |
@@ -2157,8 +2440,10 @@ LJFOLD(XSTORE any any) | |||
2157 | LJFOLDX(lj_opt_dse_xstore) | 2440 | LJFOLDX(lj_opt_dse_xstore) |
2158 | 2441 | ||
2159 | LJFOLD(NEWREF any any) /* Treated like a store. */ | 2442 | LJFOLD(NEWREF any any) /* Treated like a store. */ |
2160 | LJFOLD(CALLS any any) | 2443 | LJFOLD(TMPREF any any) |
2444 | LJFOLD(CALLA any any) | ||
2161 | LJFOLD(CALLL any any) /* Safeguard fallback. */ | 2445 | LJFOLD(CALLL any any) /* Safeguard fallback. */ |
2446 | LJFOLD(CALLS any any) | ||
2162 | LJFOLD(CALLXS any any) | 2447 | LJFOLD(CALLXS any any) |
2163 | LJFOLD(XBAR) | 2448 | LJFOLD(XBAR) |
2164 | LJFOLD(RETF any any) /* Modifies BASE. */ | 2449 | LJFOLD(RETF any any) /* Modifies BASE. */ |
@@ -2191,8 +2476,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J) | |||
2191 | IRRef ref; | 2476 | IRRef ref; |
2192 | 2477 | ||
2193 | if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) { | 2478 | if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) { |
2194 | lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | | 2479 | lj_assertJ(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | |
2195 | JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT); | 2480 | JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT, |
2481 | "bad JIT_F_OPT_DEFAULT"); | ||
2196 | /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */ | 2482 | /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */ |
2197 | if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N) | 2483 | if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N) |
2198 | return lj_opt_cse(J); | 2484 | return lj_opt_cse(J); |
@@ -2217,10 +2503,14 @@ retry: | |||
2217 | if (fins->op1 >= J->cur.nk) { | 2503 | if (fins->op1 >= J->cur.nk) { |
2218 | key += (uint32_t)IR(fins->op1)->o << 10; | 2504 | key += (uint32_t)IR(fins->op1)->o << 10; |
2219 | *fleft = *IR(fins->op1); | 2505 | *fleft = *IR(fins->op1); |
2506 | if (fins->op1 < REF_TRUE) | ||
2507 | fleft[1] = IR(fins->op1)[1]; | ||
2220 | } | 2508 | } |
2221 | if (fins->op2 >= J->cur.nk) { | 2509 | if (fins->op2 >= J->cur.nk) { |
2222 | key += (uint32_t)IR(fins->op2)->o; | 2510 | key += (uint32_t)IR(fins->op2)->o; |
2223 | *fright = *IR(fins->op2); | 2511 | *fright = *IR(fins->op2); |
2512 | if (fins->op2 < REF_TRUE) | ||
2513 | fright[1] = IR(fins->op2)[1]; | ||
2224 | } else { | 2514 | } else { |
2225 | key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ | 2515 | key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ |
2226 | } | 2516 | } |
@@ -2250,7 +2540,7 @@ retry: | |||
2250 | return lj_ir_kint(J, fins->i); | 2540 | return lj_ir_kint(J, fins->i); |
2251 | if (ref == FAILFOLD) | 2541 | if (ref == FAILFOLD) |
2252 | lj_trace_err(J, LJ_TRERR_GFAIL); | 2542 | lj_trace_err(J, LJ_TRERR_GFAIL); |
2253 | lua_assert(ref == DROPFOLD); | 2543 | lj_assertJ(ref == DROPFOLD, "bad fold result"); |
2254 | return REF_DROP; | 2544 | return REF_DROP; |
2255 | } | 2545 | } |
2256 | 2546 | ||