summaryrefslogtreecommitdiff
path: root/src/lj_opt_fold.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_opt_fold.c')
-rw-r--r--src/lj_opt_fold.c1415
1 files changed, 1415 insertions, 0 deletions
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
new file mode 100644
index 00000000..e5d98162
--- /dev/null
+++ b/src/lj_opt_fold.c
@@ -0,0 +1,1415 @@
1/*
2** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3** CSE: Common-Subexpression Elimination.
4** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
5*/
6
7#define lj_opt_fold_c
8#define LUA_CORE
9
10#include "lj_obj.h"
11
12#if LJ_HASJIT
13
14#include "lj_str.h"
15#include "lj_ir.h"
16#include "lj_jit.h"
17#include "lj_iropt.h"
18#include "lj_trace.h"
19#include "lj_vm.h"
20
21/* Here's a short description how the FOLD engine processes instructions:
22**
23** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
24** The instruction and its operands are used to select matching fold rules.
25** These are applied iteratively until a fixed point is reached.
26**
27** The 8 bit opcode of the instruction itself plus the opcodes of the
28** two instructions referenced by its operands form a 24 bit key
29** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
30**
31** This key is used for partial matching against the fold rules. The
32** left/right operand fields of the key are successively masked with
33** the 'any' wildcard, from most specific to least specific:
34**
35** ins left right
36** ins any right
37** ins left any
38** ins any any
39**
40** The masked key is used to lookup a matching fold rule in a semi-perfect
41** hash table. If a matching rule is found, the related fold function is run.
42** Multiple rules can share the same fold function. A fold rule may return
43** one of several special values:
44**
45** - NEXTFOLD means no folding was applied, because an additional test
46** inside the fold function failed. Matching continues against less
47** specific fold rules. Finally the instruction is passed on to CSE.
48**
49** - RETRYFOLD means the instruction was modified in-place. Folding is
50** retried as if this instruction had just been received.
51**
52** All other return values are terminal actions -- no further folding is
53** applied:
54**
55** - INTFOLD(i) returns a reference to the integer constant i.
56**
57** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
58** without emitting an instruction.
59**
60** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
61** it without passing through any further optimizations.
62**
63** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
64** no result (e.g. guarded assertions): FAILFOLD means the guard would
65** always fail, i.e. the current trace is pointless. DROPFOLD means
66** the guard is always true and has been eliminated. CONDFOLD is a
67** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
68**
69** - Any other return value is interpreted as an IRRef or TRef. This
70** can be a reference to an existing or a newly created instruction.
71** Only the least-significant 16 bits (IRRef1) are used to form a TRef
72** which is finally returned to the caller.
73**
74** The FOLD engine receives instructions both from the trace recorder and
75** substituted instructions from LOOP unrolling. This means all types
76** of instructions may end up here, even though the recorder bypasses
77** FOLD in some cases. Thus all loads, stores and allocations must have
78** an any/any rule to avoid being passed on to CSE.
79**
80** Carefully read the following requirements before adding or modifying
81** any fold rules:
82**
83** Requirement #1: All fold rules must preserve their destination type.
84**
85** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
86** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
87**
88** Requirement #2: Fold rules should not create *new* instructions which
89** reference operands *across* PHIs.
90**
91** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
92** left operand is a PHI. Then fleft->op1 would point across the PHI
93** frontier to an invariant instruction. Adding a PHI for this instruction
94** would be counterproductive. The solution is to add a barrier which
95** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
96** The only exception is for recurrences with high latencies like
97** repeated int->num->int conversions.
98**
99** One could relax this condition a bit if the referenced instruction is
100** a PHI, too. But this often leads to worse code due to excessive
101** register shuffling.
102**
103** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
104** Even returning fleft->op1 would be ok, because a new PHI will added,
105** if needed. But again, this leads to excessive register shuffling and
106** should be avoided.
107**
108** Requirement #3: The set of all fold rules must be monotonic to guarantee
109** termination.
110**
111** The goal is optimization, so one primarily wants to add strength-reducing
112** rules. This means eliminating an instruction or replacing an instruction
113** with one or more simpler instructions. Don't add fold rules which point
114** into the other direction.
115**
116** Some rules (like commutativity) do not directly reduce the strength of
117** an instruction, but enable other fold rules (e.g. by moving constants
118** to the right operand). These rules must be made unidirectional to avoid
119** cycles.
120**
121** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
122*/
123
124/* Some local macros to save typing. Undef'd at the end. */
125#define IR(ref) (&J->cur.ir[(ref)])
126#define fins (&J->fold.ins)
127#define fleft (&J->fold.left)
128#define fright (&J->fold.right)
129#define knumleft (ir_knum(fleft)->n)
130#define knumright (ir_knum(fright)->n)
131
132/* Pass IR on to next optimization in chain (FOLD). */
133#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
134
135/* Fold function type. Fastcall on x86 significantly reduces their size. */
136typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
137
138/* Macros for the fold specs, so buildvm can recognize them. */
139#define LJFOLD(x)
140#define LJFOLDX(x)
141#define LJFOLDF(name) static TRef LJ_FASTCALL name(jit_State *J)
142/* Note: They must be at the start of a line or buildvm ignores them! */
143
144/* Barrier to prevent using operands across PHIs. */
145#define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
146
147/* Barrier to prevent folding across a GC step.
148** GC steps can only happen at the head of a trace and at LOOP.
149** And the GC is only driven forward if there is at least one allocation.
150*/
151#define gcstep_barrier(J, ref) \
152 ((ref) < J->chain[IR_LOOP] && \
153 (J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
154 J->chain[IR_SNEW] || J->chain[IR_TOSTR]))
155
156/* -- Constant folding ---------------------------------------------------- */
157
158LJFOLD(ADD KNUM KNUM)
159LJFOLD(SUB KNUM KNUM)
160LJFOLD(MUL KNUM KNUM)
161LJFOLD(DIV KNUM KNUM)
162LJFOLD(NEG KNUM KNUM)
163LJFOLD(ABS KNUM KNUM)
164LJFOLD(ATAN2 KNUM KNUM)
165LJFOLD(LDEXP KNUM KNUM)
166LJFOLD(MIN KNUM KNUM)
167LJFOLD(MAX KNUM KNUM)
168LJFOLDF(kfold_numarith)
169{
170 lua_Number a = knumleft;
171 lua_Number b = knumright;
172 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
173 return lj_ir_knum(J, y);
174}
175
176LJFOLD(FPMATH KNUM any)
177LJFOLDF(kfold_fpmath)
178{
179 lua_Number a = knumleft;
180 lua_Number y = lj_vm_foldfpm(a, fins->op2);
181 return lj_ir_knum(J, y);
182}
183
184LJFOLD(POWI KNUM KINT)
185LJFOLDF(kfold_powi)
186{
187 lua_Number a = knumleft;
188 lua_Number b = cast_num(fright->i);
189 lua_Number y = lj_vm_foldarith(a, b, IR_POWI - IR_ADD);
190 return lj_ir_knum(J, y);
191}
192
193static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
194{
195 switch (op) {
196 case IR_ADD: k1 += k2; break;
197 case IR_SUB: k1 -= k2; break;
198 case IR_BAND: k1 &= k2; break;
199 case IR_BOR: k1 |= k2; break;
200 case IR_BXOR: k1 ^= k2; break;
201 case IR_BSHL: k1 <<= (k2 & 31); break;
202 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
203 case IR_BSAR: k1 >>= (k2 & 31); break;
204 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
205 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
206 default: lua_assert(0); break;
207 }
208 return k1;
209}
210
211LJFOLD(ADD KINT KINT)
212LJFOLD(SUB KINT KINT)
213LJFOLD(BAND KINT KINT)
214LJFOLD(BOR KINT KINT)
215LJFOLD(BXOR KINT KINT)
216LJFOLD(BSHL KINT KINT)
217LJFOLD(BSHR KINT KINT)
218LJFOLD(BSAR KINT KINT)
219LJFOLD(BROL KINT KINT)
220LJFOLD(BROR KINT KINT)
221LJFOLDF(kfold_intarith)
222{
223 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
224}
225
226LJFOLD(BNOT KINT)
227LJFOLDF(kfold_bnot)
228{
229 return INTFOLD(~fleft->i);
230}
231
232LJFOLD(BSWAP KINT)
233LJFOLDF(kfold_bswap)
234{
235 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
236}
237
238LJFOLD(TONUM KINT)
239LJFOLDF(kfold_tonum)
240{
241 return lj_ir_knum(J, cast_num(fleft->i));
242}
243
244LJFOLD(TOBIT KNUM KNUM)
245LJFOLDF(kfold_tobit)
246{
247 TValue tv;
248 tv.n = knumleft + knumright;
249 return INTFOLD((int32_t)tv.u32.lo);
250}
251
252LJFOLD(TOINT KNUM any)
253LJFOLDF(kfold_toint)
254{
255 lua_Number n = knumleft;
256 int32_t k = lj_num2int(n);
257 if (irt_isguard(fins->t) && n != cast_num(k)) {
258 /* We're about to create a guard which always fails, like TOINT +1.5.
259 ** Some pathological loops cause this during LICM, e.g.:
260 ** local x,k,t = 0,1.5,{1,[1.5]=2}
261 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
262 ** assert(x == 300)
263 */
264 return FAILFOLD;
265 }
266 return INTFOLD(k);
267}
268
269LJFOLD(TOSTR KNUM)
270LJFOLDF(kfold_tostr_knum)
271{
272 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft));
273}
274
275LJFOLD(TOSTR KINT)
276LJFOLDF(kfold_tostr_kint)
277{
278 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i));
279}
280
281LJFOLD(STRTO KGC)
282LJFOLDF(kfold_strto)
283{
284 TValue n;
285 if (lj_str_numconv(strdata(ir_kstr(fleft)), &n))
286 return lj_ir_knum(J, numV(&n));
287 return FAILFOLD;
288}
289
290LJFOLD(SNEW STRREF KINT)
291LJFOLDF(kfold_snew)
292{
293 if (fright->i == 0)
294 return lj_ir_kstr(J, lj_str_new(J->L, "", 0));
295 PHIBARRIER(fleft);
296 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
297 const char *s = strdata(ir_kstr(IR(fleft->op1)));
298 int32_t ofs = IR(fleft->op2)->i;
299 return lj_ir_kstr(J, lj_str_new(J->L, s+ofs, (size_t)fright->i));
300 }
301 return NEXTFOLD;
302}
303
304/* Must not use kfold_kref for numbers (could be NaN). */
305LJFOLD(EQ KNUM KNUM)
306LJFOLD(NE KNUM KNUM)
307LJFOLD(LT KNUM KNUM)
308LJFOLD(GE KNUM KNUM)
309LJFOLD(LE KNUM KNUM)
310LJFOLD(GT KNUM KNUM)
311LJFOLD(ULT KNUM KNUM)
312LJFOLD(UGE KNUM KNUM)
313LJFOLD(ULE KNUM KNUM)
314LJFOLD(UGT KNUM KNUM)
315LJFOLDF(kfold_numcomp)
316{
317 return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
318}
319
320LJFOLD(LT KINT KINT)
321LJFOLD(GE KINT KINT)
322LJFOLD(LE KINT KINT)
323LJFOLD(GT KINT KINT)
324LJFOLD(ULT KINT KINT)
325LJFOLD(UGE KINT KINT)
326LJFOLD(ULE KINT KINT)
327LJFOLD(UGT KINT KINT)
328LJFOLD(ABC KINT KINT)
329LJFOLDF(kfold_intcomp)
330{
331 int32_t a = fleft->i, b = fright->i;
332 switch ((IROp)fins->o) {
333 case IR_LT: return CONDFOLD(a < b);
334 case IR_GE: return CONDFOLD(a >= b);
335 case IR_LE: return CONDFOLD(a <= b);
336 case IR_GT: return CONDFOLD(a > b);
337 case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
338 case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
339 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
340 case IR_ABC:
341 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
342 default: lua_assert(0); return FAILFOLD;
343 }
344}
345
346LJFOLD(LT KGC KGC)
347LJFOLD(GE KGC KGC)
348LJFOLD(LE KGC KGC)
349LJFOLD(GT KGC KGC)
350LJFOLDF(kfold_strcomp)
351{
352 if (irt_isstr(fins->t)) {
353 GCstr *a = ir_kstr(fleft);
354 GCstr *b = ir_kstr(fright);
355 return CONDFOLD(lj_ir_strcmp(a, b, (IROp)fins->o));
356 }
357 return NEXTFOLD;
358}
359
360/* Don't constant-fold away FLOAD checks against KNULL. */
361LJFOLD(EQ FLOAD KNULL)
362LJFOLD(NE FLOAD KNULL)
363LJFOLDX(lj_opt_cse)
364
365/* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
366LJFOLD(EQ any KNULL)
367LJFOLD(NE any KNULL)
368LJFOLD(EQ KNULL any)
369LJFOLD(NE KNULL any)
370LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */
371LJFOLD(NE KINT KINT)
372LJFOLD(EQ KGC KGC)
373LJFOLD(NE KGC KGC)
374LJFOLDF(kfold_kref)
375{
376 return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
377}
378
379/* -- Algebraic shortcuts ------------------------------------------------- */
380
381LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
382LJFOLD(FPMATH FPMATH IRFPM_CEIL)
383LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
384LJFOLDF(shortcut_round)
385{
386 IRFPMathOp op = (IRFPMathOp)fleft->op2;
387 if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
388 return LEFTFOLD; /* round(round_left(x)) = round_left(x) */
389 return NEXTFOLD;
390}
391
392LJFOLD(FPMATH TONUM IRFPM_FLOOR)
393LJFOLD(FPMATH TONUM IRFPM_CEIL)
394LJFOLD(FPMATH TONUM IRFPM_TRUNC)
395LJFOLD(ABS ABS KNUM)
396LJFOLDF(shortcut_left)
397{
398 return LEFTFOLD; /* f(g(x)) ==> g(x) */
399}
400
401LJFOLD(ABS NEG KNUM)
402LJFOLDF(shortcut_dropleft)
403{
404 PHIBARRIER(fleft);
405 fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */
406 return RETRYFOLD;
407}
408
409/* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
410LJFOLD(NEG NEG KNUM)
411LJFOLD(BNOT BNOT)
412LJFOLD(BSWAP BSWAP)
413LJFOLDF(shortcut_leftleft)
414{
415 PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */
416 return fleft->op1; /* f(g(x)) ==> x */
417}
418
419LJFOLD(TONUM TOINT)
420LJFOLDF(shortcut_leftleft_toint)
421{
422 PHIBARRIER(fleft);
423 if (irt_isguard(fleft->t)) /* Only safe with a guarded TOINT. */
424 return fleft->op1; /* f(g(x)) ==> x */
425 return NEXTFOLD;
426}
427
428LJFOLD(TOINT TONUM any)
429LJFOLD(TOBIT TONUM KNUM) /* The inverse must NOT be shortcut! */
430LJFOLDF(shortcut_leftleft_across_phi)
431{
432 /* Fold even across PHI to avoid expensive int->num->int conversions. */
433 return fleft->op1; /* f(g(x)) ==> x */
434}
435
436/* -- FP algebraic simplifications ---------------------------------------- */
437
438/* FP arithmetic is tricky -- there's not much to simplify.
439** Please note the following common pitfalls before sending "improvements":
440** x+0 ==> x is INVALID for x=-0
441** 0-x ==> -x is INVALID for x=+0
442** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
443*/
444
445LJFOLD(ADD NEG any)
446LJFOLDF(simplify_numadd_negx)
447{
448 PHIBARRIER(fleft);
449 fins->o = IR_SUB; /* (-a) + b ==> b - a */
450 fins->op1 = fins->op2;
451 fins->op2 = fleft->op1;
452 return RETRYFOLD;
453}
454
455LJFOLD(ADD any NEG)
456LJFOLDF(simplify_numadd_xneg)
457{
458 PHIBARRIER(fright);
459 fins->o = IR_SUB; /* a + (-b) ==> a - b */
460 fins->op2 = fright->op1;
461 return RETRYFOLD;
462}
463
464LJFOLD(SUB any KNUM)
465LJFOLDF(simplify_numsub_k)
466{
467 lua_Number n = knumright;
468 if (n == 0.0) /* x - (+-0) ==> x */
469 return LEFTFOLD;
470 return NEXTFOLD;
471}
472
473LJFOLD(SUB NEG KNUM)
474LJFOLDF(simplify_numsub_negk)
475{
476 PHIBARRIER(fleft);
477 fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */
478 fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
479 return RETRYFOLD;
480}
481
482LJFOLD(SUB any NEG)
483LJFOLDF(simplify_numsub_xneg)
484{
485 PHIBARRIER(fright);
486 fins->o = IR_ADD; /* a - (-b) ==> a + b */
487 fins->op2 = fright->op1;
488 return RETRYFOLD;
489}
490
491LJFOLD(MUL any KNUM)
492LJFOLD(DIV any KNUM)
493LJFOLDF(simplify_nummuldiv_k)
494{
495 lua_Number n = knumright;
496 if (n == 1.0) { /* x o 1 ==> x */
497 return LEFTFOLD;
498 } else if (n == -1.0) { /* x o -1 ==> -x */
499 fins->o = IR_NEG;
500 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
501 return RETRYFOLD;
502 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
503 fins->o = IR_ADD;
504 fins->op2 = fins->op1;
505 return RETRYFOLD;
506 }
507 return NEXTFOLD;
508}
509
510LJFOLD(MUL NEG KNUM)
511LJFOLD(DIV NEG KNUM)
512LJFOLDF(simplify_nummuldiv_negk)
513{
514 PHIBARRIER(fleft);
515 fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */
516 fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
517 return RETRYFOLD;
518}
519
520LJFOLD(MUL NEG NEG)
521LJFOLD(DIV NEG NEG)
522LJFOLDF(simplify_nummuldiv_negneg)
523{
524 PHIBARRIER(fleft);
525 PHIBARRIER(fright);
526 fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */
527 fins->op2 = fright->op1;
528 return RETRYFOLD;
529}
530
531LJFOLD(POWI any KINT)
532LJFOLDF(simplify_powi_xk)
533{
534 int32_t k = fright->i;
535 TRef ref = fins->op1;
536 if (k == 0) /* x ^ 0 ==> 1 */
537 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
538 if (k == 1) /* x ^ 1 ==> x */
539 return LEFTFOLD;
540 if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */
541 return NEXTFOLD;
542 if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
543 ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
544 k = -k;
545 }
546 /* Unroll x^k for 1 <= k <= 65536. */
547 for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
548 ref = emitir(IRTN(IR_MUL), ref, ref);
549 if ((k >>= 1) != 0) { /* Handle trailing bits. */
550 TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
551 for (; k != 1; k >>= 1) {
552 if (k & 1)
553 ref = emitir(IRTN(IR_MUL), ref, tmp);
554 tmp = emitir(IRTN(IR_MUL), tmp, tmp);
555 }
556 ref = emitir(IRTN(IR_MUL), ref, tmp);
557 }
558 return ref;
559}
560
561LJFOLD(POWI KNUM any)
562LJFOLDF(simplify_powi_kx)
563{
564 lua_Number n = knumleft;
565 if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
566 fins->o = IR_TONUM;
567 fins->op1 = fins->op2;
568 fins->op2 = 0;
569 fins->op2 = (IRRef1)lj_opt_fold(J);
570 fins->op1 = (IRRef1)lj_ir_knum_one(J);
571 fins->o = IR_LDEXP;
572 return RETRYFOLD;
573 }
574 return NEXTFOLD;
575}
576
577/* -- FP conversion narrowing --------------------------------------------- */
578
579LJFOLD(TOINT ADD any)
580LJFOLD(TOINT SUB any)
581LJFOLD(TOBIT ADD KNUM)
582LJFOLD(TOBIT SUB KNUM)
583LJFOLDF(narrow_convert)
584{
585 PHIBARRIER(fleft);
586 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
587 if (J->chain[IR_LOOP])
588 return NEXTFOLD;
589 return lj_opt_narrow_convert(J);
590}
591
592/* Relaxed CSE rule for TOINT allows commoning with stronger checks, too. */
593LJFOLD(TOINT any any)
594LJFOLDF(cse_toint)
595{
596 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
597 IRRef ref, op1 = fins->op1;
598 uint8_t guard = irt_isguard(fins->t);
599 for (ref = J->chain[IR_TOINT]; ref > op1; ref = IR(ref)->prev)
600 if (IR(ref)->op1 == op1 && irt_isguard(IR(ref)->t) >= guard)
601 return ref;
602 }
603 return EMITFOLD; /* No fallthrough to regular CSE. */
604}
605
606/* -- Integer algebraic simplifications ----------------------------------- */
607
608LJFOLD(ADD any KINT)
609LJFOLD(ADDOV any KINT)
610LJFOLD(SUBOV any KINT)
611LJFOLDF(simplify_intadd_k)
612{
613 if (fright->i == 0) /* i o 0 ==> i */
614 return LEFTFOLD;
615 return NEXTFOLD;
616}
617
618LJFOLD(SUB any KINT)
619LJFOLDF(simplify_intsub_k)
620{
621 if (fright->i == 0) /* i - 0 ==> i */
622 return LEFTFOLD;
623 fins->o = IR_ADD; /* i - k ==> i + (-k) */
624 fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */
625 return RETRYFOLD;
626}
627
628LJFOLD(SUB any any)
629LJFOLD(SUBOV any any)
630LJFOLDF(simplify_intsub)
631{
632 if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */
633 return INTFOLD(0);
634 return NEXTFOLD;
635}
636
637LJFOLD(SUB ADD any)
638LJFOLDF(simplify_intsubadd_leftcancel)
639{
640 if (!irt_isnum(fins->t)) {
641 PHIBARRIER(fleft);
642 if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */
643 return fleft->op2;
644 if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */
645 return fleft->op1;
646 }
647 return NEXTFOLD;
648}
649
650LJFOLD(SUB SUB any)
651LJFOLDF(simplify_intsubsub_leftcancel)
652{
653 if (!irt_isnum(fins->t)) {
654 PHIBARRIER(fleft);
655 if (fins->op1 == fleft->op1) { /* (i - j) - i ==> 0 - j */
656 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
657 fins->op2 = fleft->op2;
658 return RETRYFOLD;
659 }
660 }
661 return NEXTFOLD;
662}
663
664LJFOLD(SUB any SUB)
665LJFOLDF(simplify_intsubsub_rightcancel)
666{
667 if (!irt_isnum(fins->t)) {
668 PHIBARRIER(fright);
669 if (fins->op1 == fright->op1) /* i - (i - j) ==> j */
670 return fright->op2;
671 }
672 return NEXTFOLD;
673}
674
675LJFOLD(SUB any ADD)
676LJFOLDF(simplify_intsubadd_rightcancel)
677{
678 if (!irt_isnum(fins->t)) {
679 PHIBARRIER(fright);
680 if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */
681 fins->op2 = fright->op2;
682 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
683 return RETRYFOLD;
684 }
685 if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */
686 fins->op2 = fright->op1;
687 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
688 return RETRYFOLD;
689 }
690 }
691 return NEXTFOLD;
692}
693
694LJFOLD(SUB ADD ADD)
695LJFOLDF(simplify_intsubaddadd_cancel)
696{
697 if (!irt_isnum(fins->t)) {
698 PHIBARRIER(fleft);
699 PHIBARRIER(fright);
700 if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */
701 fins->op1 = fleft->op2;
702 fins->op2 = fright->op2;
703 return RETRYFOLD;
704 }
705 if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
706 fins->op1 = fleft->op2;
707 fins->op2 = fright->op1;
708 return RETRYFOLD;
709 }
710 if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
711 fins->op1 = fleft->op1;
712 fins->op2 = fright->op2;
713 return RETRYFOLD;
714 }
715 if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
716 fins->op1 = fleft->op1;
717 fins->op2 = fright->op1;
718 return RETRYFOLD;
719 }
720 }
721 return NEXTFOLD;
722}
723
724LJFOLD(BAND any KINT)
725LJFOLDF(simplify_band_k)
726{
727 if (fright->i == 0) /* i & 0 ==> 0 */
728 return RIGHTFOLD;
729 if (fright->i == -1) /* i & -1 ==> i */
730 return LEFTFOLD;
731 return NEXTFOLD;
732}
733
734LJFOLD(BOR any KINT)
735LJFOLDF(simplify_bor_k)
736{
737 if (fright->i == 0) /* i | 0 ==> i */
738 return LEFTFOLD;
739 if (fright->i == -1) /* i | -1 ==> -1 */
740 return RIGHTFOLD;
741 return NEXTFOLD;
742}
743
744LJFOLD(BXOR any KINT)
745LJFOLDF(simplify_bxor_k)
746{
747 if (fright->i == 0) /* i xor 0 ==> i */
748 return LEFTFOLD;
749 if (fright->i == -1) { /* i xor -1 ==> ~i */
750 fins->o = IR_BNOT;
751 fins->op2 = 0;
752 return RETRYFOLD;
753 }
754 return NEXTFOLD;
755}
756
757LJFOLD(BSHL any KINT)
758LJFOLD(BSHR any KINT)
759LJFOLD(BSAR any KINT)
760LJFOLD(BROL any KINT)
761LJFOLD(BROR any KINT)
762LJFOLDF(simplify_shift_ik)
763{
764 int32_t k = (fright->i & 31);
765 if (k == 0) /* i o 0 ==> i */
766 return LEFTFOLD;
767 if (k != fright->i) { /* i o k ==> i o (k & 31) */
768 fins->op2 = (IRRef1)lj_ir_kint(J, k);
769 return RETRYFOLD;
770 }
771 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&31) */
772 fins->o = IR_BROL;
773 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&31);
774 return RETRYFOLD;
775 }
776 return NEXTFOLD;
777}
778
779LJFOLD(BSHL any BAND)
780LJFOLD(BSHR any BAND)
781LJFOLD(BSAR any BAND)
782LJFOLD(BROL any BAND)
783LJFOLD(BROR any BAND)
784LJFOLDF(simplify_shift_andk)
785{
786#if LJ_TARGET_MASKEDSHIFT
787 IRIns *irk = IR(fright->op2);
788 PHIBARRIER(fright);
789 if (irk->o == IR_KINT) { /* i o (j & 31) ==> i o j */
790 int32_t k = irk->i & 31;
791 if (k == 31) {
792 fins->op2 = fright->op1;
793 return RETRYFOLD;
794 }
795 }
796#endif
797 return NEXTFOLD;
798}
799
800LJFOLD(BSHL KINT any)
801LJFOLD(BSHR KINT any)
802LJFOLDF(simplify_shift1_ki)
803{
804 if (fleft->i == 0) /* 0 o i ==> 0 */
805 return LEFTFOLD;
806 return NEXTFOLD;
807}
808
809LJFOLD(BSAR KINT any)
810LJFOLD(BROL KINT any)
811LJFOLD(BROR KINT any)
812LJFOLDF(simplify_shift2_ki)
813{
814 if (fleft->i == 0 || fleft->i == -1) /* 0 o i ==> 0; -1 o i ==> -1 */
815 return LEFTFOLD;
816 return NEXTFOLD;
817}
818
819/* -- Reassociation ------------------------------------------------------- */
820
821LJFOLD(ADD ADD KINT)
822LJFOLD(BAND BAND KINT)
823LJFOLD(BOR BOR KINT)
824LJFOLD(BXOR BXOR KINT)
825LJFOLDF(reassoc_intarith_k)
826{
827 IRIns *irk = IR(fleft->op2);
828 if (irk->o == IR_KINT) {
829 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
830 if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
831 return LEFTFOLD;
832 PHIBARRIER(fleft);
833 fins->op1 = fleft->op1;
834 fins->op2 = (IRRef1)lj_ir_kint(J, k);
835 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
836 }
837 return NEXTFOLD;
838}
839
840LJFOLD(MIN MIN any)
841LJFOLD(MAX MAX any)
842LJFOLD(BAND BAND any)
843LJFOLD(BOR BOR any)
844LJFOLDF(reassoc_dup)
845{
846 PHIBARRIER(fleft);
847 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
848 return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
849 return NEXTFOLD;
850}
851
852LJFOLD(BXOR BXOR any)
853LJFOLDF(reassoc_bxor)
854{
855 PHIBARRIER(fleft);
856 if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */
857 return fleft->op2;
858 if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */
859 return fleft->op1;
860 return NEXTFOLD;
861}
862
863LJFOLD(BSHL BSHL KINT)
864LJFOLD(BSHR BSHR KINT)
865LJFOLD(BSAR BSAR KINT)
866LJFOLD(BROL BROL KINT)
867LJFOLD(BROR BROR KINT)
868LJFOLDF(reassoc_shift)
869{
870 IRIns *irk = IR(fleft->op2);
871 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
872 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
873 int32_t k = (irk->i & 31) + (fright->i & 31);
874 if (k > 31) { /* Combined shift too wide? */
875 if (fins->o == IR_BSHL || fins->o == IR_BSHR)
876 return INTFOLD(0);
877 else if (fins->o == IR_BSAR)
878 k = 31;
879 else
880 k &= 31;
881 }
882 fins->op1 = fleft->op1;
883 fins->op2 = (IRRef1)lj_ir_kint(J, k);
884 return RETRYFOLD;
885 }
886 return NEXTFOLD;
887}
888
889LJFOLD(MIN MIN KNUM)
890LJFOLD(MAX MAX KNUM)
891LJFOLDF(reassoc_minmax_k)
892{
893 IRIns *irk = IR(fleft->op2);
894 if (irk->o == IR_KNUM) {
895 lua_Number a = ir_knum(irk)->n;
896 lua_Number b = knumright;
897 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
898 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
899 return LEFTFOLD;
900 PHIBARRIER(fleft);
901 fins->op1 = fleft->op1;
902 fins->op2 = (IRRef1)lj_ir_knum(J, y);
903 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
904 }
905 return NEXTFOLD;
906}
907
908LJFOLD(MIN MAX any)
909LJFOLD(MAX MIN any)
910LJFOLDF(reassoc_minmax_left)
911{
912 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
913 return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
914 return NEXTFOLD;
915}
916
917LJFOLD(MIN any MAX)
918LJFOLD(MAX any MIN)
919LJFOLDF(reassoc_minmax_right)
920{
921 if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
922 return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
923 return NEXTFOLD;
924}
925
926/* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
927** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
928** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
929*/
930LJFOLD(ABC any ADD)
931LJFOLDF(reassoc_abc)
932{
933 if (irref_isk(fright->op2)) {
934 IRIns *add2 = IR(fright->op1);
935 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
936 IR(fright->op2)->i == -IR(add2->op2)->i) {
937 IRRef ref = J->chain[IR_ABC];
938 IRRef lim = add2->op1;
939 if (fins->op1 > lim) lim = fins->op1;
940 while (ref > lim) {
941 IRIns *ir = IR(ref);
942 if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
943 return DROPFOLD;
944 ref = ir->prev;
945 }
946 }
947 }
948 return NEXTFOLD;
949}
950
951/* -- Commutativity ------------------------------------------------------- */
952
953/* The refs of commutative ops are canonicalized. Lower refs go to the right.
954** Rationale behind this:
955** - It (also) moves constants to the right.
956** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
957** - It helps CSE to find more matches.
958** - The assembler generates better code with constants at the right.
959*/
960
961LJFOLD(ADD any any)
962LJFOLD(MUL any any)
963LJFOLD(ADDOV any any)
964LJFOLDF(comm_swap)
965{
966 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
967 IRRef1 tmp = fins->op1;
968 fins->op1 = fins->op2;
969 fins->op2 = tmp;
970 return RETRYFOLD;
971 }
972 return NEXTFOLD;
973}
974
975LJFOLD(EQ any any)
976LJFOLD(NE any any)
977LJFOLDF(comm_equal)
978{
979 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
980 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
981 return CONDFOLD(fins->o == IR_EQ);
982 return comm_swap(J);
983}
984
985LJFOLD(LT any any)
986LJFOLD(GE any any)
987LJFOLD(LE any any)
988LJFOLD(GT any any)
989LJFOLD(ULT any any)
990LJFOLD(UGE any any)
991LJFOLD(ULE any any)
992LJFOLD(UGT any any)
993LJFOLDF(comm_comp)
994{
995 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
996 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
997 return CONDFOLD(fins->o & 1);
998 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
999 IRRef1 tmp = fins->op1;
1000 fins->op1 = fins->op2;
1001 fins->op2 = tmp;
1002 fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
1003 return RETRYFOLD;
1004 }
1005 return NEXTFOLD;
1006}
1007
1008LJFOLD(BAND any any)
1009LJFOLD(BOR any any)
1010LJFOLD(MIN any any)
1011LJFOLD(MAX any any)
1012LJFOLDF(comm_dup)
1013{
1014 if (fins->op1 == fins->op2) /* x o x ==> x */
1015 return LEFTFOLD;
1016 return comm_swap(J);
1017}
1018
1019LJFOLD(BXOR any any)
1020LJFOLDF(comm_bxor)
1021{
1022 if (fins->op1 == fins->op2) /* i xor i ==> 0 */
1023 return INTFOLD(0);
1024 return comm_swap(J);
1025}
1026
1027/* -- Simplification of compound expressions ------------------------------ */
1028
1029static int32_t kfold_xload(IRIns *ir, const void *p)
1030{
1031#if !LJ_TARGET_X86ORX64
1032#error "Missing support for unaligned loads"
1033#endif
1034 switch (irt_type(ir->t)) {
1035 case IRT_I8: return (int32_t)*(int8_t *)p;
1036 case IRT_U8: return (int32_t)*(uint8_t *)p;
1037 case IRT_I16: return (int32_t)*(int16_t *)p;
1038 case IRT_U16: return (int32_t)*(uint16_t *)p;
1039 default: lua_assert(irt_isint(ir->t)); return (int32_t)*(int32_t *)p;
1040 }
1041}
1042
1043/* Turn: string.sub(str, a, b) == kstr
1044** into: string.byte(str, a) == string.byte(kstr, 1) etc.
1045** Note: this creates unaligned XLOADs!
1046*/
1047LJFOLD(EQ SNEW KGC)
1048LJFOLD(NE SNEW KGC)
1049LJFOLDF(merge_eqne_snew_kgc)
1050{
1051 GCstr *kstr = ir_kstr(fright);
1052 int32_t len = (int32_t)kstr->len;
1053 lua_assert(irt_isstr(fins->t));
1054 if (len <= 4) { /* Handle string lengths 0, 1, 2, 3, 4. */
1055 IROp op = (IROp)fins->o;
1056 IRRef strref = fleft->op1;
1057 lua_assert(IR(strref)->o == IR_STRREF);
1058 if (op == IR_EQ) {
1059 emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
1060 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
1061 } else {
1062 /* NE is not expanded since this would need an OR of two conds. */
1063 if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */
1064 return NEXTFOLD;
1065 if (IR(fleft->op2)->i != len)
1066 return DROPFOLD;
1067 }
1068 if (len > 0) {
1069 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
1070 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, IRT_I8) :
1071 len == 2 ? IRT(IR_XLOAD, IRT_U16) :
1072 IRTI(IR_XLOAD));
1073 TRef tmp = emitir(ot, strref, len > 1 ? IRXLOAD_UNALIGNED : 0);
1074 TRef val = lj_ir_kint(J, kfold_xload(IR(tref_ref(tmp)), strdata(kstr)));
1075 if (len == 3)
1076 tmp = emitir(IRTI(IR_BAND), tmp,
1077 lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
1078 fins->op1 = (IRRef1)tmp;
1079 fins->op2 = (IRRef1)val;
1080 fins->ot = (IROpT)IRTGI(op);
1081 return RETRYFOLD;
1082 } else {
1083 return DROPFOLD;
1084 }
1085 }
1086 return NEXTFOLD;
1087}
1088
1089/* -- Loads --------------------------------------------------------------- */
1090
1091/* Loads cannot be folded or passed on to CSE in general.
1092** Alias analysis is needed to check for forwarding opportunities.
1093**
1094** Caveat: *all* loads must be listed here or they end up at CSE!
1095*/
1096
1097LJFOLD(ALOAD any)
1098LJFOLDX(lj_opt_fwd_aload)
1099
1100LJFOLD(HLOAD any)
1101LJFOLDX(lj_opt_fwd_hload)
1102
1103LJFOLD(ULOAD any)
1104LJFOLDX(lj_opt_fwd_uload)
1105
1106LJFOLD(TLEN any)
1107LJFOLDX(lj_opt_fwd_tlen)
1108
1109/* Upvalue refs are really loads, but there are no corresponding stores.
1110** So CSE is ok for them, except for UREFO across a GC step (see below).
1111** If the referenced function is const, its upvalue addresses are const, too.
1112** This can be used to improve CSE by looking for the same address,
1113** even if the upvalues originate from a different function.
1114*/
1115LJFOLD(UREFO KGC any)
1116LJFOLD(UREFC KGC any)
1117LJFOLDF(cse_uref)
1118{
1119 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1120 IRRef ref = J->chain[fins->o];
1121 GCfunc *fn = ir_kfunc(fleft);
1122 GCupval *uv = gco2uv(gcref(fn->l.uvptr[fins->op2]));
1123 while (ref > 0) {
1124 IRIns *ir = IR(ref);
1125 if (irref_isk(ir->op1)) {
1126 GCfunc *fn2 = ir_kfunc(IR(ir->op1));
1127 if (gco2uv(gcref(fn2->l.uvptr[ir->op2])) == uv) {
1128 if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
1129 break;
1130 return ref;
1131 }
1132 }
1133 ref = ir->prev;
1134 }
1135 }
1136 return EMITFOLD;
1137}
1138
1139/* We can safely FOLD/CSE array/hash refs and field loads, since there
1140** are no corresponding stores. But NEWREF may invalidate all of them.
1141** Lacking better disambiguation for table references, these optimizations
1142** are simply disabled across any NEWREF.
1143** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1144** FLOADs. And NEWREF itself is treated like a store (see below).
1145*/
1146LJFOLD(HREF any any)
1147LJFOLDF(cse_href)
1148{
1149 TRef tr = lj_opt_cse(J);
1150 return tref_ref(tr) < J->chain[IR_NEWREF] ? EMITFOLD : tr;
1151}
1152
1153LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1154LJFOLDF(fload_tab_tnew_asize)
1155{
1156 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF])
1157 return INTFOLD(fleft->op1);
1158 return NEXTFOLD;
1159}
1160
1161LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
1162LJFOLDF(fload_tab_tnew_hmask)
1163{
1164 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF])
1165 return INTFOLD((1 << fleft->op2)-1);
1166 return NEXTFOLD;
1167}
1168
1169LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
1170LJFOLDF(fload_tab_tdup_asize)
1171{
1172 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF])
1173 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
1174 return NEXTFOLD;
1175}
1176
1177LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
1178LJFOLDF(fload_tab_tdup_hmask)
1179{
1180 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF])
1181 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
1182 return NEXTFOLD;
1183}
1184
1185LJFOLD(FLOAD any IRFL_TAB_ARRAY)
1186LJFOLD(FLOAD any IRFL_TAB_NODE)
1187LJFOLD(FLOAD any IRFL_TAB_ASIZE)
1188LJFOLD(FLOAD any IRFL_TAB_HMASK)
1189LJFOLDF(fload_tab_ah)
1190{
1191 TRef tr = lj_opt_cse(J);
1192 return tref_ref(tr) < J->chain[IR_NEWREF] ? EMITFOLD : tr;
1193}
1194
1195/* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
1196LJFOLD(FLOAD KGC IRFL_STR_LEN)
1197LJFOLDF(fload_str_len)
1198{
1199 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1200 return INTFOLD((int32_t)ir_kstr(fleft)->len);
1201 return NEXTFOLD;
1202}
1203
1204LJFOLD(FLOAD any IRFL_STR_LEN)
1205LJFOLDX(lj_opt_cse)
1206
1207/* All other field loads need alias analysis. */
1208LJFOLD(FLOAD any any)
1209LJFOLDX(lj_opt_fwd_fload)
1210
1211/* This is for LOOP only. Recording handles SLOADs internally. */
1212LJFOLD(SLOAD any any)
1213LJFOLDF(fwd_sload)
1214{
1215 lua_assert(J->slot[fins->op1] != 0);
1216 return J->slot[fins->op1];
1217}
1218
1219/* Strings are immutable, so we can safely FOLD/CSE an XLOAD of a string. */
1220LJFOLD(XLOAD STRREF any)
1221LJFOLDF(xload_str)
1222{
1223 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
1224 GCstr *str = ir_kstr(IR(fleft->op1));
1225 int32_t ofs = IR(fleft->op2)->i;
1226 lua_assert((MSize)ofs < str->len);
1227 lua_assert((MSize)(ofs + (1<<((fins->op2>>8)&3))) <= str->len);
1228 return INTFOLD(kfold_xload(fins, strdata(str)+ofs));
1229 }
1230 return CSEFOLD;
1231}
1232/* No XLOAD of non-strings (yet), so we don't need a (XLOAD any any) rule. */
1233
1234/* -- Write barriers ------------------------------------------------------ */
1235
1236/* Write barriers are amenable to CSE, but not across any incremental
1237** GC steps.
1238**
1239** The same logic applies to open upvalue references, because the stack
1240** may be resized during a GC step.
1241*/
1242LJFOLD(TBAR any)
1243LJFOLD(OBAR any any)
1244LJFOLD(UREFO any any)
1245LJFOLDF(barrier_tab)
1246{
1247 TRef tr = lj_opt_cse(J);
1248 if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */
1249 return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
1250 return tr;
1251}
1252
1253LJFOLD(TBAR TNEW)
1254LJFOLD(TBAR TDUP)
1255LJFOLDF(barrier_tnew_tdup)
1256{
1257 /* New tables are always white and never need a barrier. */
1258 if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */
1259 return NEXTFOLD;
1260 return DROPFOLD;
1261}
1262
1263/* -- Stores and allocations ---------------------------------------------- */
1264
1265/* Stores and allocations cannot be folded or passed on to CSE in general.
1266** But some stores can be eliminated with dead-store elimination (DSE).
1267**
1268** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
1269*/
1270
1271LJFOLD(ASTORE any any)
1272LJFOLD(HSTORE any any)
1273LJFOLDX(lj_opt_dse_ahstore)
1274
1275LJFOLD(USTORE any any)
1276LJFOLDX(lj_opt_dse_ustore)
1277
1278LJFOLD(FSTORE any any)
1279LJFOLDX(lj_opt_dse_fstore)
1280
1281LJFOLD(NEWREF any any) /* Treated like a store. */
1282LJFOLD(TNEW any any)
1283LJFOLD(TDUP any)
1284LJFOLDF(store_raw)
1285{
1286 return EMITFOLD;
1287}
1288
1289/* ------------------------------------------------------------------------ */
1290
1291/* Every entry in the generated hash table is a 32 bit pattern:
1292**
1293** xxxxxxxx iiiiiiii llllllll rrrrrrrr
1294**
1295** xxxxxxxx = 8 bit index into fold function table
1296** iiiiiiii = 8 bit folded instruction opcode
1297** llllllll = 8 bit left instruction opcode
1298** rrrrrrrr = 8 bit right instruction opcode or 8 bits from literal field
1299*/
1300
1301#include "lj_folddef.h"
1302
1303/* ------------------------------------------------------------------------ */
1304
1305/* Fold IR instruction. */
1306TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
1307{
1308 uint32_t key, any;
1309 IRRef ref;
1310
1311 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
1312 lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
1313 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT);
1314 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
1315 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
1316 return lj_opt_cse(J);
1317
1318 /* Forwarding or CSE disabled? Emit raw IR for loads, except for SLOAD. */
1319 if ((J->flags & (JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
1320 (JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
1321 irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
1322 return lj_ir_emit(J);
1323
1324 /* DSE disabled? Emit raw IR for stores. */
1325 if (!(J->flags & JIT_F_OPT_DSE) && irm_kind(lj_ir_mode[fins->o]) == IRM_S)
1326 return lj_ir_emit(J);
1327 }
1328
1329 /* Fold engine start/retry point. */
1330retry:
1331 /* Construct key from opcode and operand opcodes (unless literal/none). */
1332 key = ((uint32_t)fins->o << 16);
1333 if (fins->op1 >= J->cur.nk) {
1334 key += (uint32_t)IR(fins->op1)->o << 8;
1335 *fleft = *IR(fins->op1);
1336 }
1337 if (fins->op2 >= J->cur.nk) {
1338 key += (uint32_t)IR(fins->op2)->o;
1339 *fright = *IR(fins->op2);
1340 } else {
1341 key += (fins->op2 & 0xffu); /* For IRFPM_* and IRFL_*. */
1342 }
1343
1344 /* Check for a match in order from most specific to least specific. */
1345 any = 0;
1346 for (;;) {
1347 uint32_t k = key | any;
1348 uint32_t h = fold_hashkey(k);
1349 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
1350 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
1351 ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
1352 if (ref != NEXTFOLD)
1353 break;
1354 }
1355 if (any == 0xffff) /* Exhausted folding. Pass on to CSE. */
1356 return lj_opt_cse(J);
1357 any = (any | (any >> 8)) ^ 0xff00;
1358 }
1359
1360 /* Return value processing, ordered by frequency. */
1361 if (LJ_LIKELY(ref >= MAX_FOLD))
1362 return TREF(ref, irt_t(IR(ref)->t));
1363 if (ref == RETRYFOLD)
1364 goto retry;
1365 if (ref == KINTFOLD)
1366 return lj_ir_kint(J, fins->i);
1367 if (ref == FAILFOLD)
1368 lj_trace_err(J, LJ_TRERR_GFAIL);
1369 lua_assert(ref == DROPFOLD);
1370 return REF_DROP;
1371}
1372
1373/* -- Common-Subexpression Elimination ------------------------------------ */
1374
1375/* CSE an IR instruction. This is very fast due to the skip-list chains. */
1376TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
1377{
1378 /* Avoid narrow to wide store-to-load forwarding stall */
1379 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
1380 IROp op = fins->o;
1381 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1382 /* Limited search for same operands in per-opcode chain. */
1383 IRRef ref = J->chain[op];
1384 IRRef lim = fins->op1;
1385 if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */
1386 while (ref > lim) {
1387 if (IR(ref)->op12 == op12)
1388 return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */
1389 ref = IR(ref)->prev;
1390 }
1391 }
1392 /* Otherwise emit IR (inlined for speed). */
1393 {
1394 IRRef ref = lj_ir_nextins(J);
1395 IRIns *ir = IR(ref);
1396 ir->prev = J->chain[op];
1397 ir->op12 = op12;
1398 J->chain[op] = (IRRef1)ref;
1399 ir->o = fins->o;
1400 J->guardemit.irt |= fins->t.irt;
1401 return TREF(ref, irt_t((ir->t = fins->t)));
1402 }
1403}
1404
1405/* ------------------------------------------------------------------------ */
1406
1407#undef IR
1408#undef fins
1409#undef fleft
1410#undef fright
1411#undef knumleft
1412#undef knumright
1413#undef emitir
1414
1415#endif