diff options
Diffstat (limited to 'src/lj_opt_fold.c')
-rw-r--r-- | src/lj_opt_fold.c | 590 |
1 files changed, 462 insertions, 128 deletions
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 82ed2d32..e3fe8bbf 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
@@ -14,18 +14,21 @@ | |||
14 | 14 | ||
15 | #if LJ_HASJIT | 15 | #if LJ_HASJIT |
16 | 16 | ||
17 | #include "lj_buf.h" | ||
17 | #include "lj_str.h" | 18 | #include "lj_str.h" |
18 | #include "lj_tab.h" | 19 | #include "lj_tab.h" |
19 | #include "lj_ir.h" | 20 | #include "lj_ir.h" |
20 | #include "lj_jit.h" | 21 | #include "lj_jit.h" |
22 | #include "lj_ircall.h" | ||
21 | #include "lj_iropt.h" | 23 | #include "lj_iropt.h" |
22 | #include "lj_trace.h" | 24 | #include "lj_trace.h" |
23 | #if LJ_HASFFI | 25 | #if LJ_HASFFI |
24 | #include "lj_ctype.h" | 26 | #include "lj_ctype.h" |
25 | #endif | ||
26 | #include "lj_carith.h" | 27 | #include "lj_carith.h" |
28 | #endif | ||
27 | #include "lj_vm.h" | 29 | #include "lj_vm.h" |
28 | #include "lj_strscan.h" | 30 | #include "lj_strscan.h" |
31 | #include "lj_strfmt.h" | ||
29 | 32 | ||
30 | /* Here's a short description how the FOLD engine processes instructions: | 33 | /* Here's a short description how the FOLD engine processes instructions: |
31 | ** | 34 | ** |
@@ -133,8 +136,8 @@ | |||
133 | /* Some local macros to save typing. Undef'd at the end. */ | 136 | /* Some local macros to save typing. Undef'd at the end. */ |
134 | #define IR(ref) (&J->cur.ir[(ref)]) | 137 | #define IR(ref) (&J->cur.ir[(ref)]) |
135 | #define fins (&J->fold.ins) | 138 | #define fins (&J->fold.ins) |
136 | #define fleft (&J->fold.left) | 139 | #define fleft (J->fold.left) |
137 | #define fright (&J->fold.right) | 140 | #define fright (J->fold.right) |
138 | #define knumleft (ir_knum(fleft)->n) | 141 | #define knumleft (ir_knum(fleft)->n) |
139 | #define knumright (ir_knum(fright)->n) | 142 | #define knumright (ir_knum(fright)->n) |
140 | 143 | ||
@@ -155,13 +158,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J); | |||
155 | 158 | ||
156 | /* Barrier to prevent folding across a GC step. | 159 | /* Barrier to prevent folding across a GC step. |
157 | ** GC steps can only happen at the head of a trace and at LOOP. | 160 | ** GC steps can only happen at the head of a trace and at LOOP. |
158 | ** And the GC is only driven forward if there is at least one allocation. | 161 | ** And the GC is only driven forward if there's at least one allocation. |
159 | */ | 162 | */ |
160 | #define gcstep_barrier(J, ref) \ | 163 | #define gcstep_barrier(J, ref) \ |
161 | ((ref) < J->chain[IR_LOOP] && \ | 164 | ((ref) < J->chain[IR_LOOP] && \ |
162 | (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ | 165 | (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ |
163 | J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ | 166 | J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ |
164 | J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) | 167 | J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \ |
168 | J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA])) | ||
165 | 169 | ||
166 | /* -- Constant folding for FP numbers ------------------------------------- */ | 170 | /* -- Constant folding for FP numbers ------------------------------------- */ |
167 | 171 | ||
@@ -169,9 +173,6 @@ LJFOLD(ADD KNUM KNUM) | |||
169 | LJFOLD(SUB KNUM KNUM) | 173 | LJFOLD(SUB KNUM KNUM) |
170 | LJFOLD(MUL KNUM KNUM) | 174 | LJFOLD(MUL KNUM KNUM) |
171 | LJFOLD(DIV KNUM KNUM) | 175 | LJFOLD(DIV KNUM KNUM) |
172 | LJFOLD(NEG KNUM KNUM) | ||
173 | LJFOLD(ABS KNUM KNUM) | ||
174 | LJFOLD(ATAN2 KNUM KNUM) | ||
175 | LJFOLD(LDEXP KNUM KNUM) | 176 | LJFOLD(LDEXP KNUM KNUM) |
176 | LJFOLD(MIN KNUM KNUM) | 177 | LJFOLD(MIN KNUM KNUM) |
177 | LJFOLD(MAX KNUM KNUM) | 178 | LJFOLD(MAX KNUM KNUM) |
@@ -183,6 +184,15 @@ LJFOLDF(kfold_numarith) | |||
183 | return lj_ir_knum(J, y); | 184 | return lj_ir_knum(J, y); |
184 | } | 185 | } |
185 | 186 | ||
187 | LJFOLD(NEG KNUM FLOAD) | ||
188 | LJFOLD(ABS KNUM FLOAD) | ||
189 | LJFOLDF(kfold_numabsneg) | ||
190 | { | ||
191 | lua_Number a = knumleft; | ||
192 | lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD); | ||
193 | return lj_ir_knum(J, y); | ||
194 | } | ||
195 | |||
186 | LJFOLD(LDEXP KNUM KINT) | 196 | LJFOLD(LDEXP KNUM KINT) |
187 | LJFOLDF(kfold_ldexp) | 197 | LJFOLDF(kfold_ldexp) |
188 | { | 198 | { |
@@ -202,11 +212,36 @@ LJFOLDF(kfold_fpmath) | |||
202 | return lj_ir_knum(J, y); | 212 | return lj_ir_knum(J, y); |
203 | } | 213 | } |
204 | 214 | ||
215 | LJFOLD(CALLN KNUM any) | ||
216 | LJFOLDF(kfold_fpcall1) | ||
217 | { | ||
218 | const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; | ||
219 | if (CCI_TYPE(ci) == IRT_NUM) { | ||
220 | double y = ((double (*)(double))ci->func)(knumleft); | ||
221 | return lj_ir_knum(J, y); | ||
222 | } | ||
223 | return NEXTFOLD; | ||
224 | } | ||
225 | |||
226 | LJFOLD(CALLN CARG IRCALL_atan2) | ||
227 | LJFOLDF(kfold_fpcall2) | ||
228 | { | ||
229 | if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { | ||
230 | const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; | ||
231 | double a = ir_knum(IR(fleft->op1))->n; | ||
232 | double b = ir_knum(IR(fleft->op2))->n; | ||
233 | double y = ((double (*)(double, double))ci->func)(a, b); | ||
234 | return lj_ir_knum(J, y); | ||
235 | } | ||
236 | return NEXTFOLD; | ||
237 | } | ||
238 | |||
205 | LJFOLD(POW KNUM KINT) | 239 | LJFOLD(POW KNUM KINT) |
240 | LJFOLD(POW KNUM KNUM) | ||
206 | LJFOLDF(kfold_numpow) | 241 | LJFOLDF(kfold_numpow) |
207 | { | 242 | { |
208 | lua_Number a = knumleft; | 243 | lua_Number a = knumleft; |
209 | lua_Number b = (lua_Number)fright->i; | 244 | lua_Number b = fright->o == IR_KINT ? (lua_Number)fright->i : knumright; |
210 | lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD); | 245 | lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD); |
211 | return lj_ir_knum(J, y); | 246 | return lj_ir_knum(J, y); |
212 | } | 247 | } |
@@ -247,7 +282,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op) | |||
247 | case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break; | 282 | case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break; |
248 | case IR_MIN: k1 = k1 < k2 ? k1 : k2; break; | 283 | case IR_MIN: k1 = k1 < k2 ? k1 : k2; break; |
249 | case IR_MAX: k1 = k1 > k2 ? k1 : k2; break; | 284 | case IR_MAX: k1 = k1 > k2 ? k1 : k2; break; |
250 | default: lua_assert(0); break; | 285 | default: lj_assertX(0, "bad IR op %d", op); break; |
251 | } | 286 | } |
252 | return k1; | 287 | return k1; |
253 | } | 288 | } |
@@ -319,7 +354,7 @@ LJFOLDF(kfold_intcomp) | |||
319 | case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b); | 354 | case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b); |
320 | case IR_ABC: | 355 | case IR_ABC: |
321 | case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b); | 356 | case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b); |
322 | default: lua_assert(0); return FAILFOLD; | 357 | default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD; |
323 | } | 358 | } |
324 | } | 359 | } |
325 | 360 | ||
@@ -333,21 +368,29 @@ LJFOLDF(kfold_intcomp0) | |||
333 | 368 | ||
334 | /* -- Constant folding for 64 bit integers -------------------------------- */ | 369 | /* -- Constant folding for 64 bit integers -------------------------------- */ |
335 | 370 | ||
336 | static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) | 371 | static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2, |
372 | IROp op) | ||
337 | { | 373 | { |
374 | UNUSED(J); | ||
375 | #if LJ_HASFFI | ||
338 | switch (op) { | 376 | switch (op) { |
339 | #if LJ_64 || LJ_HASFFI | ||
340 | case IR_ADD: k1 += k2; break; | 377 | case IR_ADD: k1 += k2; break; |
341 | case IR_SUB: k1 -= k2; break; | 378 | case IR_SUB: k1 -= k2; break; |
342 | #endif | ||
343 | #if LJ_HASFFI | ||
344 | case IR_MUL: k1 *= k2; break; | 379 | case IR_MUL: k1 *= k2; break; |
345 | case IR_BAND: k1 &= k2; break; | 380 | case IR_BAND: k1 &= k2; break; |
346 | case IR_BOR: k1 |= k2; break; | 381 | case IR_BOR: k1 |= k2; break; |
347 | case IR_BXOR: k1 ^= k2; break; | 382 | case IR_BXOR: k1 ^= k2; break; |
348 | #endif | 383 | case IR_BSHL: k1 <<= (k2 & 63); break; |
349 | default: UNUSED(k2); lua_assert(0); break; | 384 | case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break; |
385 | case IR_BSAR: k1 >>= (k2 & 63); break; | ||
386 | case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break; | ||
387 | case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break; | ||
388 | default: lj_assertJ(0, "bad IR op %d", op); break; | ||
350 | } | 389 | } |
390 | #else | ||
391 | UNUSED(k2); UNUSED(op); | ||
392 | lj_assertJ(0, "FFI IR op without FFI"); | ||
393 | #endif | ||
351 | return k1; | 394 | return k1; |
352 | } | 395 | } |
353 | 396 | ||
@@ -359,7 +402,7 @@ LJFOLD(BOR KINT64 KINT64) | |||
359 | LJFOLD(BXOR KINT64 KINT64) | 402 | LJFOLD(BXOR KINT64 KINT64) |
360 | LJFOLDF(kfold_int64arith) | 403 | LJFOLDF(kfold_int64arith) |
361 | { | 404 | { |
362 | return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64, | 405 | return INT64FOLD(kfold_int64arith(J, ir_k64(fleft)->u64, |
363 | ir_k64(fright)->u64, (IROp)fins->o)); | 406 | ir_k64(fright)->u64, (IROp)fins->o)); |
364 | } | 407 | } |
365 | 408 | ||
@@ -381,7 +424,7 @@ LJFOLDF(kfold_int64arith2) | |||
381 | } | 424 | } |
382 | return INT64FOLD(k1); | 425 | return INT64FOLD(k1); |
383 | #else | 426 | #else |
384 | UNUSED(J); lua_assert(0); return FAILFOLD; | 427 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
385 | #endif | 428 | #endif |
386 | } | 429 | } |
387 | 430 | ||
@@ -392,22 +435,12 @@ LJFOLD(BROL KINT64 KINT) | |||
392 | LJFOLD(BROR KINT64 KINT) | 435 | LJFOLD(BROR KINT64 KINT) |
393 | LJFOLDF(kfold_int64shift) | 436 | LJFOLDF(kfold_int64shift) |
394 | { | 437 | { |
395 | #if LJ_HASFFI || LJ_64 | 438 | #if LJ_HASFFI |
396 | uint64_t k = ir_k64(fleft)->u64; | 439 | uint64_t k = ir_k64(fleft)->u64; |
397 | int32_t sh = (fright->i & 63); | 440 | int32_t sh = (fright->i & 63); |
398 | switch ((IROp)fins->o) { | 441 | return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL)); |
399 | case IR_BSHL: k <<= sh; break; | ||
400 | #if LJ_HASFFI | ||
401 | case IR_BSHR: k >>= sh; break; | ||
402 | case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break; | ||
403 | case IR_BROL: k = lj_rol(k, sh); break; | ||
404 | case IR_BROR: k = lj_ror(k, sh); break; | ||
405 | #endif | ||
406 | default: lua_assert(0); break; | ||
407 | } | ||
408 | return INT64FOLD(k); | ||
409 | #else | 442 | #else |
410 | UNUSED(J); lua_assert(0); return FAILFOLD; | 443 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
411 | #endif | 444 | #endif |
412 | } | 445 | } |
413 | 446 | ||
@@ -417,7 +450,7 @@ LJFOLDF(kfold_bnot64) | |||
417 | #if LJ_HASFFI | 450 | #if LJ_HASFFI |
418 | return INT64FOLD(~ir_k64(fleft)->u64); | 451 | return INT64FOLD(~ir_k64(fleft)->u64); |
419 | #else | 452 | #else |
420 | UNUSED(J); lua_assert(0); return FAILFOLD; | 453 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
421 | #endif | 454 | #endif |
422 | } | 455 | } |
423 | 456 | ||
@@ -427,7 +460,7 @@ LJFOLDF(kfold_bswap64) | |||
427 | #if LJ_HASFFI | 460 | #if LJ_HASFFI |
428 | return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64)); | 461 | return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64)); |
429 | #else | 462 | #else |
430 | UNUSED(J); lua_assert(0); return FAILFOLD; | 463 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
431 | #endif | 464 | #endif |
432 | } | 465 | } |
433 | 466 | ||
@@ -452,10 +485,10 @@ LJFOLDF(kfold_int64comp) | |||
452 | case IR_UGE: return CONDFOLD(a >= b); | 485 | case IR_UGE: return CONDFOLD(a >= b); |
453 | case IR_ULE: return CONDFOLD(a <= b); | 486 | case IR_ULE: return CONDFOLD(a <= b); |
454 | case IR_UGT: return CONDFOLD(a > b); | 487 | case IR_UGT: return CONDFOLD(a > b); |
455 | default: lua_assert(0); return FAILFOLD; | 488 | default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD; |
456 | } | 489 | } |
457 | #else | 490 | #else |
458 | UNUSED(J); lua_assert(0); return FAILFOLD; | 491 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
459 | #endif | 492 | #endif |
460 | } | 493 | } |
461 | 494 | ||
@@ -467,7 +500,7 @@ LJFOLDF(kfold_int64comp0) | |||
467 | return DROPFOLD; | 500 | return DROPFOLD; |
468 | return NEXTFOLD; | 501 | return NEXTFOLD; |
469 | #else | 502 | #else |
470 | UNUSED(J); lua_assert(0); return FAILFOLD; | 503 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
471 | #endif | 504 | #endif |
472 | } | 505 | } |
473 | 506 | ||
@@ -481,6 +514,7 @@ LJFOLDF(kfold_snew_kptr) | |||
481 | } | 514 | } |
482 | 515 | ||
483 | LJFOLD(SNEW any KINT) | 516 | LJFOLD(SNEW any KINT) |
517 | LJFOLD(XSNEW any KINT) | ||
484 | LJFOLDF(kfold_snew_empty) | 518 | LJFOLDF(kfold_snew_empty) |
485 | { | 519 | { |
486 | if (fright->i == 0) | 520 | if (fright->i == 0) |
@@ -492,7 +526,7 @@ LJFOLD(STRREF KGC KINT) | |||
492 | LJFOLDF(kfold_strref) | 526 | LJFOLDF(kfold_strref) |
493 | { | 527 | { |
494 | GCstr *str = ir_kstr(fleft); | 528 | GCstr *str = ir_kstr(fleft); |
495 | lua_assert((MSize)fright->i <= str->len); | 529 | lj_assertJ((MSize)fright->i <= str->len, "bad string ref"); |
496 | return lj_ir_kkptr(J, (char *)strdata(str) + fright->i); | 530 | return lj_ir_kkptr(J, (char *)strdata(str) + fright->i); |
497 | } | 531 | } |
498 | 532 | ||
@@ -510,7 +544,7 @@ LJFOLDF(kfold_strref_snew) | |||
510 | PHIBARRIER(ir); | 544 | PHIBARRIER(ir); |
511 | fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ | 545 | fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ |
512 | fins->op1 = str; | 546 | fins->op1 = str; |
513 | fins->ot = IRT(IR_STRREF, IRT_P32); | 547 | fins->ot = IRT(IR_STRREF, IRT_PGC); |
514 | return RETRYFOLD; | 548 | return RETRYFOLD; |
515 | } | 549 | } |
516 | } | 550 | } |
@@ -528,6 +562,210 @@ LJFOLDF(kfold_strcmp) | |||
528 | return NEXTFOLD; | 562 | return NEXTFOLD; |
529 | } | 563 | } |
530 | 564 | ||
565 | /* -- Constant folding and forwarding for buffers ------------------------- */ | ||
566 | |||
567 | /* | ||
568 | ** Buffer ops perform stores, but their effect is limited to the buffer | ||
569 | ** itself. Also, buffer ops are chained: a use of an op implies a use of | ||
570 | ** all other ops up the chain. Conversely, if an op is unused, all ops | ||
571 | ** up the chain can go unsed. This largely eliminates the need to treat | ||
572 | ** them as stores. | ||
573 | ** | ||
574 | ** Alas, treating them as normal (IRM_N) ops doesn't work, because they | ||
575 | ** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP | ||
576 | ** or if FOLD is disabled. | ||
577 | ** | ||
578 | ** The compromise is to declare them as loads, emit them like stores and | ||
579 | ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain | ||
580 | ** fragments left over from CSE are eliminated by DCE. | ||
581 | ** | ||
582 | ** The string buffer methods emit a USE instead of a BUFSTR to keep the | ||
583 | ** chain alive. | ||
584 | */ | ||
585 | |||
586 | LJFOLD(BUFHDR any any) | ||
587 | LJFOLDF(bufhdr_merge) | ||
588 | { | ||
589 | return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD; | ||
590 | } | ||
591 | |||
592 | LJFOLD(BUFPUT any BUFSTR) | ||
593 | LJFOLDF(bufput_bufstr) | ||
594 | { | ||
595 | if ((J->flags & JIT_F_OPT_FWD)) { | ||
596 | IRRef hdr = fright->op2; | ||
597 | /* New buffer, no other buffer op inbetween and same buffer? */ | ||
598 | if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET && | ||
599 | fleft->prev == hdr && | ||
600 | fleft->op1 == IR(hdr)->op1 && | ||
601 | !(irt_isphi(fright->t) && IR(hdr)->prev)) { | ||
602 | IRRef ref = fins->op1; | ||
603 | IR(ref)->op2 = IRBUFHDR_APPEND; /* Modify BUFHDR. */ | ||
604 | IR(ref)->op1 = fright->op1; | ||
605 | return ref; | ||
606 | } | ||
607 | /* Replay puts to global temporary buffer. */ | ||
608 | if (IR(hdr)->op2 == IRBUFHDR_RESET) { | ||
609 | IRIns *ir = IR(fright->op1); | ||
610 | /* For now only handle single string.reverse .lower .upper .rep. */ | ||
611 | if (ir->o == IR_CALLL && | ||
612 | ir->op2 >= IRCALL_lj_buf_putstr_reverse && | ||
613 | ir->op2 <= IRCALL_lj_buf_putstr_rep) { | ||
614 | IRIns *carg1 = IR(ir->op1); | ||
615 | if (ir->op2 == IRCALL_lj_buf_putstr_rep) { | ||
616 | IRIns *carg2 = IR(carg1->op1); | ||
617 | if (carg2->op1 == hdr) { | ||
618 | return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2); | ||
619 | } | ||
620 | } else if (carg1->op1 == hdr) { | ||
621 | return lj_ir_call(J, ir->op2, fins->op1, carg1->op2); | ||
622 | } | ||
623 | } | ||
624 | } | ||
625 | } | ||
626 | return EMITFOLD; /* Always emit, CSE later. */ | ||
627 | } | ||
628 | |||
629 | LJFOLD(BUFPUT any any) | ||
630 | LJFOLDF(bufput_kgc) | ||
631 | { | ||
632 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) { | ||
633 | GCstr *s2 = ir_kstr(fright); | ||
634 | if (s2->len == 0) { /* Empty string? */ | ||
635 | return LEFTFOLD; | ||
636 | } else { | ||
637 | if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) && | ||
638 | !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */ | ||
639 | GCstr *s1 = ir_kstr(IR(fleft->op2)); | ||
640 | IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2)); | ||
641 | /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */ | ||
642 | IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */ | ||
643 | return fins->op1; | ||
644 | } | ||
645 | } | ||
646 | } | ||
647 | return EMITFOLD; /* Always emit, CSE later. */ | ||
648 | } | ||
649 | |||
650 | LJFOLD(BUFSTR any any) | ||
651 | LJFOLDF(bufstr_kfold_cse) | ||
652 | { | ||
653 | lj_assertJ(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT || | ||
654 | fleft->o == IR_CALLL, | ||
655 | "bad buffer constructor IR op %d", fleft->o); | ||
656 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { | ||
657 | if (fleft->o == IR_BUFHDR) { /* No put operations? */ | ||
658 | if (fleft->op2 == IRBUFHDR_RESET) /* Empty buffer? */ | ||
659 | return lj_ir_kstr(J, &J2G(J)->strempty); | ||
660 | fins->op1 = fleft->op1; | ||
661 | fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */ | ||
662 | return CSEFOLD; | ||
663 | } else if (fleft->o == IR_BUFPUT) { | ||
664 | IRIns *irb = IR(fleft->op1); | ||
665 | if (irb->o == IR_BUFHDR && irb->op2 == IRBUFHDR_RESET) | ||
666 | return fleft->op2; /* Shortcut for a single put operation. */ | ||
667 | } | ||
668 | } | ||
669 | /* Try to CSE the whole chain. */ | ||
670 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
671 | IRRef ref = J->chain[IR_BUFSTR]; | ||
672 | while (ref) { | ||
673 | IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1); | ||
674 | while (ira->o == irb->o && ira->op2 == irb->op2) { | ||
675 | lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT || | ||
676 | ira->o == IR_CALLL || ira->o == IR_CARG, | ||
677 | "bad buffer constructor IR op %d", ira->o); | ||
678 | if (ira->o == IR_BUFHDR && ira->op2 == IRBUFHDR_RESET) | ||
679 | return ref; /* CSE succeeded. */ | ||
680 | if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab) | ||
681 | break; | ||
682 | ira = IR(ira->op1); | ||
683 | irb = IR(irb->op1); | ||
684 | } | ||
685 | ref = irs->prev; | ||
686 | } | ||
687 | } | ||
688 | return EMITFOLD; /* No CSE possible. */ | ||
689 | } | ||
690 | |||
691 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse) | ||
692 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper) | ||
693 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower) | ||
694 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted) | ||
695 | LJFOLDF(bufput_kfold_op) | ||
696 | { | ||
697 | if (irref_isk(fleft->op2)) { | ||
698 | const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; | ||
699 | SBuf *sb = lj_buf_tmp_(J->L); | ||
700 | sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb, | ||
701 | ir_kstr(IR(fleft->op2))); | ||
702 | fins->o = IR_BUFPUT; | ||
703 | fins->op1 = fleft->op1; | ||
704 | fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb)); | ||
705 | return RETRYFOLD; | ||
706 | } | ||
707 | return EMITFOLD; /* Always emit, CSE later. */ | ||
708 | } | ||
709 | |||
710 | LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep) | ||
711 | LJFOLDF(bufput_kfold_rep) | ||
712 | { | ||
713 | if (irref_isk(fleft->op2)) { | ||
714 | IRIns *irc = IR(fleft->op1); | ||
715 | if (irref_isk(irc->op2)) { | ||
716 | SBuf *sb = lj_buf_tmp_(J->L); | ||
717 | sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i); | ||
718 | fins->o = IR_BUFPUT; | ||
719 | fins->op1 = irc->op1; | ||
720 | fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb)); | ||
721 | return RETRYFOLD; | ||
722 | } | ||
723 | } | ||
724 | return EMITFOLD; /* Always emit, CSE later. */ | ||
725 | } | ||
726 | |||
727 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint) | ||
728 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int) | ||
729 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint) | ||
730 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum) | ||
731 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr) | ||
732 | LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar) | ||
733 | LJFOLDF(bufput_kfold_fmt) | ||
734 | { | ||
735 | IRIns *irc = IR(fleft->op1); | ||
736 | lj_assertJ(irref_isk(irc->op2), "SFormat must be const"); | ||
737 | if (irref_isk(fleft->op2)) { | ||
738 | SFormat sf = (SFormat)IR(irc->op2)->i; | ||
739 | IRIns *ira = IR(fleft->op2); | ||
740 | SBuf *sb = lj_buf_tmp_(J->L); | ||
741 | switch (fins->op2) { | ||
742 | case IRCALL_lj_strfmt_putfxint: | ||
743 | sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64); | ||
744 | break; | ||
745 | case IRCALL_lj_strfmt_putfstr: | ||
746 | sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira)); | ||
747 | break; | ||
748 | case IRCALL_lj_strfmt_putfchar: | ||
749 | sb = lj_strfmt_putfchar(sb, sf, ira->i); | ||
750 | break; | ||
751 | case IRCALL_lj_strfmt_putfnum_int: | ||
752 | case IRCALL_lj_strfmt_putfnum_uint: | ||
753 | case IRCALL_lj_strfmt_putfnum: | ||
754 | default: { | ||
755 | const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; | ||
756 | sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf, | ||
757 | ir_knum(ira)->n); | ||
758 | break; | ||
759 | } | ||
760 | } | ||
761 | fins->o = IR_BUFPUT; | ||
762 | fins->op1 = irc->op1; | ||
763 | fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb)); | ||
764 | return RETRYFOLD; | ||
765 | } | ||
766 | return EMITFOLD; /* Always emit, CSE later. */ | ||
767 | } | ||
768 | |||
531 | /* -- Constant folding of pointer arithmetic ------------------------------ */ | 769 | /* -- Constant folding of pointer arithmetic ------------------------------ */ |
532 | 770 | ||
533 | LJFOLD(ADD KGC KINT) | 771 | LJFOLD(ADD KGC KINT) |
@@ -648,27 +886,22 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM) | |||
648 | LJFOLDF(kfold_conv_knum_int_num) | 886 | LJFOLDF(kfold_conv_knum_int_num) |
649 | { | 887 | { |
650 | lua_Number n = knumleft; | 888 | lua_Number n = knumleft; |
651 | if (!(fins->op2 & IRCONV_TRUNC)) { | 889 | int32_t k = lj_num2int(n); |
652 | int32_t k = lj_num2int(n); | 890 | if (irt_isguard(fins->t) && n != (lua_Number)k) { |
653 | if (irt_isguard(fins->t) && n != (lua_Number)k) { | 891 | /* We're about to create a guard which always fails, like CONV +1.5. |
654 | /* We're about to create a guard which always fails, like CONV +1.5. | 892 | ** Some pathological loops cause this during LICM, e.g.: |
655 | ** Some pathological loops cause this during LICM, e.g.: | 893 | ** local x,k,t = 0,1.5,{1,[1.5]=2} |
656 | ** local x,k,t = 0,1.5,{1,[1.5]=2} | 894 | ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end |
657 | ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end | 895 | ** assert(x == 300) |
658 | ** assert(x == 300) | 896 | */ |
659 | */ | 897 | return FAILFOLD; |
660 | return FAILFOLD; | ||
661 | } | ||
662 | return INTFOLD(k); | ||
663 | } else { | ||
664 | return INTFOLD((int32_t)n); | ||
665 | } | 898 | } |
899 | return INTFOLD(k); | ||
666 | } | 900 | } |
667 | 901 | ||
668 | LJFOLD(CONV KNUM IRCONV_U32_NUM) | 902 | LJFOLD(CONV KNUM IRCONV_U32_NUM) |
669 | LJFOLDF(kfold_conv_knum_u32_num) | 903 | LJFOLDF(kfold_conv_knum_u32_num) |
670 | { | 904 | { |
671 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
672 | #ifdef _MSC_VER | 905 | #ifdef _MSC_VER |
673 | { /* Workaround for MSVC bug. */ | 906 | { /* Workaround for MSVC bug. */ |
674 | volatile uint32_t u = (uint32_t)knumleft; | 907 | volatile uint32_t u = (uint32_t)knumleft; |
@@ -682,27 +915,27 @@ LJFOLDF(kfold_conv_knum_u32_num) | |||
682 | LJFOLD(CONV KNUM IRCONV_I64_NUM) | 915 | LJFOLD(CONV KNUM IRCONV_I64_NUM) |
683 | LJFOLDF(kfold_conv_knum_i64_num) | 916 | LJFOLDF(kfold_conv_knum_i64_num) |
684 | { | 917 | { |
685 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
686 | return INT64FOLD((uint64_t)(int64_t)knumleft); | 918 | return INT64FOLD((uint64_t)(int64_t)knumleft); |
687 | } | 919 | } |
688 | 920 | ||
689 | LJFOLD(CONV KNUM IRCONV_U64_NUM) | 921 | LJFOLD(CONV KNUM IRCONV_U64_NUM) |
690 | LJFOLDF(kfold_conv_knum_u64_num) | 922 | LJFOLDF(kfold_conv_knum_u64_num) |
691 | { | 923 | { |
692 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
693 | return INT64FOLD(lj_num2u64(knumleft)); | 924 | return INT64FOLD(lj_num2u64(knumleft)); |
694 | } | 925 | } |
695 | 926 | ||
696 | LJFOLD(TOSTR KNUM) | 927 | LJFOLD(TOSTR KNUM any) |
697 | LJFOLDF(kfold_tostr_knum) | 928 | LJFOLDF(kfold_tostr_knum) |
698 | { | 929 | { |
699 | return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); | 930 | return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft))); |
700 | } | 931 | } |
701 | 932 | ||
702 | LJFOLD(TOSTR KINT) | 933 | LJFOLD(TOSTR KINT any) |
703 | LJFOLDF(kfold_tostr_kint) | 934 | LJFOLDF(kfold_tostr_kint) |
704 | { | 935 | { |
705 | return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); | 936 | return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ? |
937 | lj_strfmt_int(J->L, fleft->i) : | ||
938 | lj_strfmt_char(J->L, fleft->i)); | ||
706 | } | 939 | } |
707 | 940 | ||
708 | LJFOLD(STRTO KGC) | 941 | LJFOLD(STRTO KGC) |
@@ -750,13 +983,13 @@ LJFOLDF(shortcut_round) | |||
750 | return NEXTFOLD; | 983 | return NEXTFOLD; |
751 | } | 984 | } |
752 | 985 | ||
753 | LJFOLD(ABS ABS KNUM) | 986 | LJFOLD(ABS ABS FLOAD) |
754 | LJFOLDF(shortcut_left) | 987 | LJFOLDF(shortcut_left) |
755 | { | 988 | { |
756 | return LEFTFOLD; /* f(g(x)) ==> g(x) */ | 989 | return LEFTFOLD; /* f(g(x)) ==> g(x) */ |
757 | } | 990 | } |
758 | 991 | ||
759 | LJFOLD(ABS NEG KNUM) | 992 | LJFOLD(ABS NEG FLOAD) |
760 | LJFOLDF(shortcut_dropleft) | 993 | LJFOLDF(shortcut_dropleft) |
761 | { | 994 | { |
762 | PHIBARRIER(fleft); | 995 | PHIBARRIER(fleft); |
@@ -836,8 +1069,10 @@ LJFOLDF(simplify_nummuldiv_k) | |||
836 | if (n == 1.0) { /* x o 1 ==> x */ | 1069 | if (n == 1.0) { /* x o 1 ==> x */ |
837 | return LEFTFOLD; | 1070 | return LEFTFOLD; |
838 | } else if (n == -1.0) { /* x o -1 ==> -x */ | 1071 | } else if (n == -1.0) { /* x o -1 ==> -x */ |
1072 | IRRef op1 = fins->op1; | ||
1073 | fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */ | ||
1074 | fins->op1 = op1; | ||
839 | fins->o = IR_NEG; | 1075 | fins->o = IR_NEG; |
840 | fins->op2 = (IRRef1)lj_ir_knum_neg(J); | ||
841 | return RETRYFOLD; | 1076 | return RETRYFOLD; |
842 | } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ | 1077 | } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ |
843 | fins->o = IR_ADD; | 1078 | fins->o = IR_ADD; |
@@ -878,7 +1113,7 @@ LJFOLDF(simplify_nummuldiv_negneg) | |||
878 | } | 1113 | } |
879 | 1114 | ||
880 | LJFOLD(POW any KINT) | 1115 | LJFOLD(POW any KINT) |
881 | LJFOLDF(simplify_numpow_xk) | 1116 | LJFOLDF(simplify_numpow_xkint) |
882 | { | 1117 | { |
883 | int32_t k = fright->i; | 1118 | int32_t k = fright->i; |
884 | TRef ref = fins->op1; | 1119 | TRef ref = fins->op1; |
@@ -907,13 +1142,22 @@ LJFOLDF(simplify_numpow_xk) | |||
907 | return ref; | 1142 | return ref; |
908 | } | 1143 | } |
909 | 1144 | ||
1145 | LJFOLD(POW any KNUM) | ||
1146 | LJFOLDF(simplify_numpow_xknum) | ||
1147 | { | ||
1148 | if (knumright == 0.5) /* x ^ 0.5 ==> sqrt(x) */ | ||
1149 | return emitir(IRTN(IR_FPMATH), fins->op1, IRFPM_SQRT); | ||
1150 | return NEXTFOLD; | ||
1151 | } | ||
1152 | |||
910 | LJFOLD(POW KNUM any) | 1153 | LJFOLD(POW KNUM any) |
911 | LJFOLDF(simplify_numpow_kx) | 1154 | LJFOLDF(simplify_numpow_kx) |
912 | { | 1155 | { |
913 | lua_Number n = knumleft; | 1156 | lua_Number n = knumleft; |
914 | if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */ | 1157 | if (n == 2.0 && irt_isint(fright->t)) { /* 2.0 ^ i ==> ldexp(1.0, i) */ |
915 | fins->o = IR_CONV; | ||
916 | #if LJ_TARGET_X86ORX64 | 1158 | #if LJ_TARGET_X86ORX64 |
1159 | /* Different IR_LDEXP calling convention on x86/x64 requires conversion. */ | ||
1160 | fins->o = IR_CONV; | ||
917 | fins->op1 = fins->op2; | 1161 | fins->op1 = fins->op2; |
918 | fins->op2 = IRCONV_NUM_INT; | 1162 | fins->op2 = IRCONV_NUM_INT; |
919 | fins->op2 = (IRRef1)lj_opt_fold(J); | 1163 | fins->op2 = (IRRef1)lj_opt_fold(J); |
@@ -1007,10 +1251,10 @@ LJFOLDF(simplify_tobit_conv) | |||
1007 | { | 1251 | { |
1008 | /* Fold even across PHI to avoid expensive num->int conversions in loop. */ | 1252 | /* Fold even across PHI to avoid expensive num->int conversions in loop. */ |
1009 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) { | 1253 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) { |
1010 | lua_assert(irt_isnum(fleft->t)); | 1254 | lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg"); |
1011 | return fleft->op1; | 1255 | return fleft->op1; |
1012 | } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { | 1256 | } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { |
1013 | lua_assert(irt_isnum(fleft->t)); | 1257 | lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg"); |
1014 | fins->o = IR_CONV; | 1258 | fins->o = IR_CONV; |
1015 | fins->op1 = fleft->op1; | 1259 | fins->op1 = fleft->op1; |
1016 | fins->op2 = (IRT_INT<<5)|IRT_U32; | 1260 | fins->op2 = (IRT_INT<<5)|IRT_U32; |
@@ -1050,7 +1294,7 @@ LJFOLDF(simplify_conv_sext) | |||
1050 | /* Use scalar evolution analysis results to strength-reduce sign-extension. */ | 1294 | /* Use scalar evolution analysis results to strength-reduce sign-extension. */ |
1051 | if (ref == J->scev.idx) { | 1295 | if (ref == J->scev.idx) { |
1052 | IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; | 1296 | IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; |
1053 | lua_assert(irt_isint(J->scev.t)); | 1297 | lj_assertJ(irt_isint(J->scev.t), "only int SCEV supported"); |
1054 | if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { | 1298 | if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { |
1055 | ok_reduce: | 1299 | ok_reduce: |
1056 | #if LJ_TARGET_X64 | 1300 | #if LJ_TARGET_X64 |
@@ -1081,6 +1325,10 @@ LJFOLD(CONV SUB IRCONV_U32_U64) | |||
1081 | LJFOLD(CONV MUL IRCONV_U32_U64) | 1325 | LJFOLD(CONV MUL IRCONV_U32_U64) |
1082 | LJFOLDF(simplify_conv_narrow) | 1326 | LJFOLDF(simplify_conv_narrow) |
1083 | { | 1327 | { |
1328 | #if LJ_64 | ||
1329 | UNUSED(J); | ||
1330 | return NEXTFOLD; | ||
1331 | #else | ||
1084 | IROp op = (IROp)fleft->o; | 1332 | IROp op = (IROp)fleft->o; |
1085 | IRType t = irt_type(fins->t); | 1333 | IRType t = irt_type(fins->t); |
1086 | IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2; | 1334 | IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2; |
@@ -1091,6 +1339,7 @@ LJFOLDF(simplify_conv_narrow) | |||
1091 | fins->op1 = op1; | 1339 | fins->op1 = op1; |
1092 | fins->op2 = op2; | 1340 | fins->op2 = op2; |
1093 | return RETRYFOLD; | 1341 | return RETRYFOLD; |
1342 | #endif | ||
1094 | } | 1343 | } |
1095 | 1344 | ||
1096 | /* Special CSE rule for CONV. */ | 1345 | /* Special CSE rule for CONV. */ |
@@ -1126,7 +1375,8 @@ LJFOLDF(narrow_convert) | |||
1126 | /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */ | 1375 | /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */ |
1127 | if (J->chain[IR_LOOP]) | 1376 | if (J->chain[IR_LOOP]) |
1128 | return NEXTFOLD; | 1377 | return NEXTFOLD; |
1129 | lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT); | 1378 | lj_assertJ(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT, |
1379 | "unexpected CONV TOBIT"); | ||
1130 | return lj_opt_narrow_convert(J); | 1380 | return lj_opt_narrow_convert(J); |
1131 | } | 1381 | } |
1132 | 1382 | ||
@@ -1204,7 +1454,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k) | |||
1204 | ** But this is mainly intended for simple address arithmetic. | 1454 | ** But this is mainly intended for simple address arithmetic. |
1205 | ** Also it's easier for the backend to optimize the original multiplies. | 1455 | ** Also it's easier for the backend to optimize the original multiplies. |
1206 | */ | 1456 | */ |
1207 | if (k == 1) { /* i * 1 ==> i */ | 1457 | if (k == 0) { /* i * 0 ==> 0 */ |
1458 | return RIGHTFOLD; | ||
1459 | } else if (k == 1) { /* i * 1 ==> i */ | ||
1208 | return LEFTFOLD; | 1460 | return LEFTFOLD; |
1209 | } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ | 1461 | } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ |
1210 | fins->o = IR_BSHL; | 1462 | fins->o = IR_BSHL; |
@@ -1217,9 +1469,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k) | |||
1217 | LJFOLD(MUL any KINT) | 1469 | LJFOLD(MUL any KINT) |
1218 | LJFOLDF(simplify_intmul_k32) | 1470 | LJFOLDF(simplify_intmul_k32) |
1219 | { | 1471 | { |
1220 | if (fright->i == 0) /* i * 0 ==> 0 */ | 1472 | if (fright->i >= 0) |
1221 | return INTFOLD(0); | ||
1222 | else if (fright->i > 0) | ||
1223 | return simplify_intmul_k(J, fright->i); | 1473 | return simplify_intmul_k(J, fright->i); |
1224 | return NEXTFOLD; | 1474 | return NEXTFOLD; |
1225 | } | 1475 | } |
@@ -1227,21 +1477,20 @@ LJFOLDF(simplify_intmul_k32) | |||
1227 | LJFOLD(MUL any KINT64) | 1477 | LJFOLD(MUL any KINT64) |
1228 | LJFOLDF(simplify_intmul_k64) | 1478 | LJFOLDF(simplify_intmul_k64) |
1229 | { | 1479 | { |
1230 | if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ | 1480 | #if LJ_HASFFI |
1231 | return INT64FOLD(0); | 1481 | if (ir_kint64(fright)->u64 < 0x80000000u) |
1232 | #if LJ_64 | ||
1233 | /* NYI: SPLIT for BSHL and 32 bit backend support. */ | ||
1234 | else if (ir_kint64(fright)->u64 < 0x80000000u) | ||
1235 | return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); | 1482 | return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); |
1236 | #endif | ||
1237 | return NEXTFOLD; | 1483 | return NEXTFOLD; |
1484 | #else | ||
1485 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; | ||
1486 | #endif | ||
1238 | } | 1487 | } |
1239 | 1488 | ||
1240 | LJFOLD(MOD any KINT) | 1489 | LJFOLD(MOD any KINT) |
1241 | LJFOLDF(simplify_intmod_k) | 1490 | LJFOLDF(simplify_intmod_k) |
1242 | { | 1491 | { |
1243 | int32_t k = fright->i; | 1492 | int32_t k = fright->i; |
1244 | lua_assert(k != 0); | 1493 | lj_assertJ(k != 0, "integer mod 0"); |
1245 | if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */ | 1494 | if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */ |
1246 | fins->o = IR_BAND; | 1495 | fins->o = IR_BAND; |
1247 | fins->op2 = lj_ir_kint(J, k-1); | 1496 | fins->op2 = lj_ir_kint(J, k-1); |
@@ -1490,6 +1739,15 @@ LJFOLDF(simplify_shiftk_andk) | |||
1490 | fins->op2 = (IRRef1)lj_ir_kint(J, k); | 1739 | fins->op2 = (IRRef1)lj_ir_kint(J, k); |
1491 | fins->ot = IRTI(IR_BAND); | 1740 | fins->ot = IRTI(IR_BAND); |
1492 | return RETRYFOLD; | 1741 | return RETRYFOLD; |
1742 | } else if (irk->o == IR_KINT64) { | ||
1743 | uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, fright->i, | ||
1744 | (IROp)fins->o); | ||
1745 | IROpT ot = fleft->ot; | ||
1746 | fins->op1 = fleft->op1; | ||
1747 | fins->op1 = (IRRef1)lj_opt_fold(J); | ||
1748 | fins->op2 = (IRRef1)lj_ir_kint64(J, k); | ||
1749 | fins->ot = ot; | ||
1750 | return RETRYFOLD; | ||
1493 | } | 1751 | } |
1494 | return NEXTFOLD; | 1752 | return NEXTFOLD; |
1495 | } | 1753 | } |
@@ -1505,6 +1763,47 @@ LJFOLDF(simplify_andk_shiftk) | |||
1505 | return NEXTFOLD; | 1763 | return NEXTFOLD; |
1506 | } | 1764 | } |
1507 | 1765 | ||
1766 | LJFOLD(BAND BOR KINT) | ||
1767 | LJFOLD(BOR BAND KINT) | ||
1768 | LJFOLDF(simplify_andor_k) | ||
1769 | { | ||
1770 | IRIns *irk = IR(fleft->op2); | ||
1771 | PHIBARRIER(fleft); | ||
1772 | if (irk->o == IR_KINT) { | ||
1773 | int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o); | ||
1774 | /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ | ||
1775 | /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ | ||
1776 | if (k == (fins->o == IR_BAND ? 0 : -1)) { | ||
1777 | fins->op1 = fleft->op1; | ||
1778 | return RETRYFOLD; | ||
1779 | } | ||
1780 | } | ||
1781 | return NEXTFOLD; | ||
1782 | } | ||
1783 | |||
1784 | LJFOLD(BAND BOR KINT64) | ||
1785 | LJFOLD(BOR BAND KINT64) | ||
1786 | LJFOLDF(simplify_andor_k64) | ||
1787 | { | ||
1788 | #if LJ_HASFFI | ||
1789 | IRIns *irk = IR(fleft->op2); | ||
1790 | PHIBARRIER(fleft); | ||
1791 | if (irk->o == IR_KINT64) { | ||
1792 | uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64, | ||
1793 | (IROp)fins->o); | ||
1794 | /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ | ||
1795 | /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ | ||
1796 | if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) { | ||
1797 | fins->op1 = fleft->op1; | ||
1798 | return RETRYFOLD; | ||
1799 | } | ||
1800 | } | ||
1801 | return NEXTFOLD; | ||
1802 | #else | ||
1803 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; | ||
1804 | #endif | ||
1805 | } | ||
1806 | |||
1508 | /* -- Reassociation ------------------------------------------------------- */ | 1807 | /* -- Reassociation ------------------------------------------------------- */ |
1509 | 1808 | ||
1510 | LJFOLD(ADD ADD KINT) | 1809 | LJFOLD(ADD ADD KINT) |
@@ -1534,11 +1833,11 @@ LJFOLD(BOR BOR KINT64) | |||
1534 | LJFOLD(BXOR BXOR KINT64) | 1833 | LJFOLD(BXOR BXOR KINT64) |
1535 | LJFOLDF(reassoc_intarith_k64) | 1834 | LJFOLDF(reassoc_intarith_k64) |
1536 | { | 1835 | { |
1537 | #if LJ_HASFFI || LJ_64 | 1836 | #if LJ_HASFFI |
1538 | IRIns *irk = IR(fleft->op2); | 1837 | IRIns *irk = IR(fleft->op2); |
1539 | if (irk->o == IR_KINT64) { | 1838 | if (irk->o == IR_KINT64) { |
1540 | uint64_t k = kfold_int64arith(ir_k64(irk)->u64, | 1839 | uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64, |
1541 | ir_k64(fright)->u64, (IROp)fins->o); | 1840 | (IROp)fins->o); |
1542 | PHIBARRIER(fleft); | 1841 | PHIBARRIER(fleft); |
1543 | fins->op1 = fleft->op1; | 1842 | fins->op1 = fleft->op1; |
1544 | fins->op2 = (IRRef1)lj_ir_kint64(J, k); | 1843 | fins->op2 = (IRRef1)lj_ir_kint64(J, k); |
@@ -1546,12 +1845,10 @@ LJFOLDF(reassoc_intarith_k64) | |||
1546 | } | 1845 | } |
1547 | return NEXTFOLD; | 1846 | return NEXTFOLD; |
1548 | #else | 1847 | #else |
1549 | UNUSED(J); lua_assert(0); return FAILFOLD; | 1848 | UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; |
1550 | #endif | 1849 | #endif |
1551 | } | 1850 | } |
1552 | 1851 | ||
1553 | LJFOLD(MIN MIN any) | ||
1554 | LJFOLD(MAX MAX any) | ||
1555 | LJFOLD(BAND BAND any) | 1852 | LJFOLD(BAND BAND any) |
1556 | LJFOLD(BOR BOR any) | 1853 | LJFOLD(BOR BOR any) |
1557 | LJFOLDF(reassoc_dup) | 1854 | LJFOLDF(reassoc_dup) |
@@ -1561,6 +1858,15 @@ LJFOLDF(reassoc_dup) | |||
1561 | return NEXTFOLD; | 1858 | return NEXTFOLD; |
1562 | } | 1859 | } |
1563 | 1860 | ||
1861 | LJFOLD(MIN MIN any) | ||
1862 | LJFOLD(MAX MAX any) | ||
1863 | LJFOLDF(reassoc_dup_minmax) | ||
1864 | { | ||
1865 | if (fins->op2 == fleft->op2) | ||
1866 | return LEFTFOLD; /* (a o b) o b ==> a o b */ | ||
1867 | return NEXTFOLD; | ||
1868 | } | ||
1869 | |||
1564 | LJFOLD(BXOR BXOR any) | 1870 | LJFOLD(BXOR BXOR any) |
1565 | LJFOLDF(reassoc_bxor) | 1871 | LJFOLDF(reassoc_bxor) |
1566 | { | 1872 | { |
@@ -1599,23 +1905,12 @@ LJFOLDF(reassoc_shift) | |||
1599 | return NEXTFOLD; | 1905 | return NEXTFOLD; |
1600 | } | 1906 | } |
1601 | 1907 | ||
1602 | LJFOLD(MIN MIN KNUM) | ||
1603 | LJFOLD(MAX MAX KNUM) | ||
1604 | LJFOLD(MIN MIN KINT) | 1908 | LJFOLD(MIN MIN KINT) |
1605 | LJFOLD(MAX MAX KINT) | 1909 | LJFOLD(MAX MAX KINT) |
1606 | LJFOLDF(reassoc_minmax_k) | 1910 | LJFOLDF(reassoc_minmax_k) |
1607 | { | 1911 | { |
1608 | IRIns *irk = IR(fleft->op2); | 1912 | IRIns *irk = IR(fleft->op2); |
1609 | if (irk->o == IR_KNUM) { | 1913 | if (irk->o == IR_KINT) { |
1610 | lua_Number a = ir_knum(irk)->n; | ||
1611 | lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD); | ||
1612 | if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ | ||
1613 | return LEFTFOLD; | ||
1614 | PHIBARRIER(fleft); | ||
1615 | fins->op1 = fleft->op1; | ||
1616 | fins->op2 = (IRRef1)lj_ir_knum(J, y); | ||
1617 | return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */ | ||
1618 | } else if (irk->o == IR_KINT) { | ||
1619 | int32_t a = irk->i; | 1914 | int32_t a = irk->i; |
1620 | int32_t y = kfold_intop(a, fright->i, fins->o); | 1915 | int32_t y = kfold_intop(a, fright->i, fins->o); |
1621 | if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ | 1916 | if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ |
@@ -1628,24 +1923,6 @@ LJFOLDF(reassoc_minmax_k) | |||
1628 | return NEXTFOLD; | 1923 | return NEXTFOLD; |
1629 | } | 1924 | } |
1630 | 1925 | ||
1631 | LJFOLD(MIN MAX any) | ||
1632 | LJFOLD(MAX MIN any) | ||
1633 | LJFOLDF(reassoc_minmax_left) | ||
1634 | { | ||
1635 | if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2) | ||
1636 | return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */ | ||
1637 | return NEXTFOLD; | ||
1638 | } | ||
1639 | |||
1640 | LJFOLD(MIN any MAX) | ||
1641 | LJFOLD(MAX any MIN) | ||
1642 | LJFOLDF(reassoc_minmax_right) | ||
1643 | { | ||
1644 | if (fins->op1 == fright->op1 || fins->op1 == fright->op2) | ||
1645 | return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */ | ||
1646 | return NEXTFOLD; | ||
1647 | } | ||
1648 | |||
1649 | /* -- Array bounds check elimination -------------------------------------- */ | 1926 | /* -- Array bounds check elimination -------------------------------------- */ |
1650 | 1927 | ||
1651 | /* Eliminate ABC across PHIs to handle t[i-1] forwarding case. | 1928 | /* Eliminate ABC across PHIs to handle t[i-1] forwarding case. |
@@ -1772,8 +2049,6 @@ LJFOLDF(comm_comp) | |||
1772 | 2049 | ||
1773 | LJFOLD(BAND any any) | 2050 | LJFOLD(BAND any any) |
1774 | LJFOLD(BOR any any) | 2051 | LJFOLD(BOR any any) |
1775 | LJFOLD(MIN any any) | ||
1776 | LJFOLD(MAX any any) | ||
1777 | LJFOLDF(comm_dup) | 2052 | LJFOLDF(comm_dup) |
1778 | { | 2053 | { |
1779 | if (fins->op1 == fins->op2) /* x o x ==> x */ | 2054 | if (fins->op1 == fins->op2) /* x o x ==> x */ |
@@ -1781,6 +2056,15 @@ LJFOLDF(comm_dup) | |||
1781 | return fold_comm_swap(J); | 2056 | return fold_comm_swap(J); |
1782 | } | 2057 | } |
1783 | 2058 | ||
2059 | LJFOLD(MIN any any) | ||
2060 | LJFOLD(MAX any any) | ||
2061 | LJFOLDF(comm_dup_minmax) | ||
2062 | { | ||
2063 | if (fins->op1 == fins->op2) /* x o x ==> x */ | ||
2064 | return LEFTFOLD; | ||
2065 | return NEXTFOLD; | ||
2066 | } | ||
2067 | |||
1784 | LJFOLD(BXOR any any) | 2068 | LJFOLD(BXOR any any) |
1785 | LJFOLDF(comm_bxor) | 2069 | LJFOLDF(comm_bxor) |
1786 | { | 2070 | { |
@@ -1817,7 +2101,7 @@ LJFOLDF(merge_eqne_snew_kgc) | |||
1817 | { | 2101 | { |
1818 | GCstr *kstr = ir_kstr(fright); | 2102 | GCstr *kstr = ir_kstr(fright); |
1819 | int32_t len = (int32_t)kstr->len; | 2103 | int32_t len = (int32_t)kstr->len; |
1820 | lua_assert(irt_isstr(fins->t)); | 2104 | lj_assertJ(irt_isstr(fins->t), "bad equality IR type"); |
1821 | 2105 | ||
1822 | #if LJ_TARGET_UNALIGNED | 2106 | #if LJ_TARGET_UNALIGNED |
1823 | #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */ | 2107 | #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */ |
@@ -1881,7 +2165,7 @@ LJFOLD(HLOAD KKPTR) | |||
1881 | LJFOLDF(kfold_hload_kkptr) | 2165 | LJFOLDF(kfold_hload_kkptr) |
1882 | { | 2166 | { |
1883 | UNUSED(J); | 2167 | UNUSED(J); |
1884 | lua_assert(ir_kptr(fleft) == niltvg(J2G(J))); | 2168 | lj_assertJ(ir_kptr(fleft) == niltvg(J2G(J)), "expected niltv"); |
1885 | return TREF_NIL; | 2169 | return TREF_NIL; |
1886 | } | 2170 | } |
1887 | 2171 | ||
@@ -1891,8 +2175,8 @@ LJFOLDX(lj_opt_fwd_hload) | |||
1891 | LJFOLD(ULOAD any) | 2175 | LJFOLD(ULOAD any) |
1892 | LJFOLDX(lj_opt_fwd_uload) | 2176 | LJFOLDX(lj_opt_fwd_uload) |
1893 | 2177 | ||
1894 | LJFOLD(CALLL any IRCALL_lj_tab_len) | 2178 | LJFOLD(ALEN any any) |
1895 | LJFOLDX(lj_opt_fwd_tab_len) | 2179 | LJFOLDX(lj_opt_fwd_alen) |
1896 | 2180 | ||
1897 | /* Upvalue refs are really loads, but there are no corresponding stores. | 2181 | /* Upvalue refs are really loads, but there are no corresponding stores. |
1898 | ** So CSE is ok for them, except for UREFO across a GC step (see below). | 2182 | ** So CSE is ok for them, except for UREFO across a GC step (see below). |
@@ -1953,6 +2237,7 @@ LJFOLDF(fwd_href_tdup) | |||
1953 | ** an aliased table, as it may invalidate all of the pointers and fields. | 2237 | ** an aliased table, as it may invalidate all of the pointers and fields. |
1954 | ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on | 2238 | ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on |
1955 | ** FLOADs. And NEWREF itself is treated like a store (see below). | 2239 | ** FLOADs. And NEWREF itself is treated like a store (see below). |
2240 | ** LREF is constant (per trace) since coroutine switches are not inlined. | ||
1956 | */ | 2241 | */ |
1957 | LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) | 2242 | LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) |
1958 | LJFOLDF(fload_tab_tnew_asize) | 2243 | LJFOLDF(fload_tab_tnew_asize) |
@@ -2016,6 +2301,35 @@ LJFOLDF(fload_str_len_snew) | |||
2016 | return NEXTFOLD; | 2301 | return NEXTFOLD; |
2017 | } | 2302 | } |
2018 | 2303 | ||
2304 | LJFOLD(FLOAD TOSTR IRFL_STR_LEN) | ||
2305 | LJFOLDF(fload_str_len_tostr) | ||
2306 | { | ||
2307 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR) | ||
2308 | return INTFOLD(1); | ||
2309 | return NEXTFOLD; | ||
2310 | } | ||
2311 | |||
2312 | LJFOLD(FLOAD any IRFL_SBUF_W) | ||
2313 | LJFOLD(FLOAD any IRFL_SBUF_E) | ||
2314 | LJFOLD(FLOAD any IRFL_SBUF_B) | ||
2315 | LJFOLD(FLOAD any IRFL_SBUF_L) | ||
2316 | LJFOLD(FLOAD any IRFL_SBUF_REF) | ||
2317 | LJFOLD(FLOAD any IRFL_SBUF_R) | ||
2318 | LJFOLDF(fload_sbuf) | ||
2319 | { | ||
2320 | TRef tr = lj_opt_fwd_fload(J); | ||
2321 | return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD; | ||
2322 | } | ||
2323 | |||
2324 | /* The fast function ID of function objects is immutable. */ | ||
2325 | LJFOLD(FLOAD KGC IRFL_FUNC_FFID) | ||
2326 | LJFOLDF(fload_func_ffid_kgc) | ||
2327 | { | ||
2328 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) | ||
2329 | return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid); | ||
2330 | return NEXTFOLD; | ||
2331 | } | ||
2332 | |||
2019 | /* The C type ID of cdata objects is immutable. */ | 2333 | /* The C type ID of cdata objects is immutable. */ |
2020 | LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) | 2334 | LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) |
2021 | LJFOLDF(fload_cdata_typeid_kgc) | 2335 | LJFOLDF(fload_cdata_typeid_kgc) |
@@ -2062,6 +2376,8 @@ LJFOLDF(fload_cdata_ptr_int64_cnew) | |||
2062 | } | 2376 | } |
2063 | 2377 | ||
2064 | LJFOLD(FLOAD any IRFL_STR_LEN) | 2378 | LJFOLD(FLOAD any IRFL_STR_LEN) |
2379 | LJFOLD(FLOAD any IRFL_FUNC_ENV) | ||
2380 | LJFOLD(FLOAD any IRFL_THREAD_ENV) | ||
2065 | LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) | 2381 | LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) |
2066 | LJFOLD(FLOAD any IRFL_CDATA_PTR) | 2382 | LJFOLD(FLOAD any IRFL_CDATA_PTR) |
2067 | LJFOLD(FLOAD any IRFL_CDATA_INT) | 2383 | LJFOLD(FLOAD any IRFL_CDATA_INT) |
@@ -2081,7 +2397,7 @@ LJFOLDF(fwd_sload) | |||
2081 | TRef tr = lj_opt_cse(J); | 2397 | TRef tr = lj_opt_cse(J); |
2082 | return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr; | 2398 | return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr; |
2083 | } else { | 2399 | } else { |
2084 | lua_assert(J->slot[fins->op1] != 0); | 2400 | lj_assertJ(J->slot[fins->op1] != 0, "uninitialized slot accessed"); |
2085 | return J->slot[fins->op1]; | 2401 | return J->slot[fins->op1]; |
2086 | } | 2402 | } |
2087 | } | 2403 | } |
@@ -2138,6 +2454,17 @@ LJFOLDF(barrier_tnew_tdup) | |||
2138 | return DROPFOLD; | 2454 | return DROPFOLD; |
2139 | } | 2455 | } |
2140 | 2456 | ||
2457 | /* -- Profiling ----------------------------------------------------------- */ | ||
2458 | |||
2459 | LJFOLD(PROF any any) | ||
2460 | LJFOLDF(prof) | ||
2461 | { | ||
2462 | IRRef ref = J->chain[IR_PROF]; | ||
2463 | if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */ | ||
2464 | return ref; | ||
2465 | return EMITFOLD; | ||
2466 | } | ||
2467 | |||
2141 | /* -- Stores and allocations ---------------------------------------------- */ | 2468 | /* -- Stores and allocations ---------------------------------------------- */ |
2142 | 2469 | ||
2143 | /* Stores and allocations cannot be folded or passed on to CSE in general. | 2470 | /* Stores and allocations cannot be folded or passed on to CSE in general. |
@@ -2160,8 +2487,10 @@ LJFOLD(XSTORE any any) | |||
2160 | LJFOLDX(lj_opt_dse_xstore) | 2487 | LJFOLDX(lj_opt_dse_xstore) |
2161 | 2488 | ||
2162 | LJFOLD(NEWREF any any) /* Treated like a store. */ | 2489 | LJFOLD(NEWREF any any) /* Treated like a store. */ |
2163 | LJFOLD(CALLS any any) | 2490 | LJFOLD(TMPREF any any) |
2491 | LJFOLD(CALLA any any) | ||
2164 | LJFOLD(CALLL any any) /* Safeguard fallback. */ | 2492 | LJFOLD(CALLL any any) /* Safeguard fallback. */ |
2493 | LJFOLD(CALLS any any) | ||
2165 | LJFOLD(CALLXS any any) | 2494 | LJFOLD(CALLXS any any) |
2166 | LJFOLD(XBAR) | 2495 | LJFOLD(XBAR) |
2167 | LJFOLD(RETF any any) /* Modifies BASE. */ | 2496 | LJFOLD(RETF any any) /* Modifies BASE. */ |
@@ -2194,8 +2523,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J) | |||
2194 | IRRef ref; | 2523 | IRRef ref; |
2195 | 2524 | ||
2196 | if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) { | 2525 | if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) { |
2197 | lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | | 2526 | lj_assertJ(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | |
2198 | JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT); | 2527 | JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT, |
2528 | "bad JIT_F_OPT_DEFAULT"); | ||
2199 | /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */ | 2529 | /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */ |
2200 | if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N) | 2530 | if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N) |
2201 | return lj_opt_cse(J); | 2531 | return lj_opt_cse(J); |
@@ -2220,10 +2550,14 @@ retry: | |||
2220 | if (fins->op1 >= J->cur.nk) { | 2550 | if (fins->op1 >= J->cur.nk) { |
2221 | key += (uint32_t)IR(fins->op1)->o << 10; | 2551 | key += (uint32_t)IR(fins->op1)->o << 10; |
2222 | *fleft = *IR(fins->op1); | 2552 | *fleft = *IR(fins->op1); |
2553 | if (fins->op1 < REF_TRUE) | ||
2554 | fleft[1] = IR(fins->op1)[1]; | ||
2223 | } | 2555 | } |
2224 | if (fins->op2 >= J->cur.nk) { | 2556 | if (fins->op2 >= J->cur.nk) { |
2225 | key += (uint32_t)IR(fins->op2)->o; | 2557 | key += (uint32_t)IR(fins->op2)->o; |
2226 | *fright = *IR(fins->op2); | 2558 | *fright = *IR(fins->op2); |
2559 | if (fins->op2 < REF_TRUE) | ||
2560 | fright[1] = IR(fins->op2)[1]; | ||
2227 | } else { | 2561 | } else { |
2228 | key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ | 2562 | key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ |
2229 | } | 2563 | } |
@@ -2253,7 +2587,7 @@ retry: | |||
2253 | return lj_ir_kint(J, fins->i); | 2587 | return lj_ir_kint(J, fins->i); |
2254 | if (ref == FAILFOLD) | 2588 | if (ref == FAILFOLD) |
2255 | lj_trace_err(J, LJ_TRERR_GFAIL); | 2589 | lj_trace_err(J, LJ_TRERR_GFAIL); |
2256 | lua_assert(ref == DROPFOLD); | 2590 | lj_assertJ(ref == DROPFOLD, "bad fold result"); |
2257 | return REF_DROP; | 2591 | return REF_DROP; |
2258 | } | 2592 | } |
2259 | 2593 | ||