diff options
Diffstat (limited to 'src/lj_record.c')
-rw-r--r-- | src/lj_record.c | 953 |
1 files changed, 757 insertions, 196 deletions
diff --git a/src/lj_record.c b/src/lj_record.c index f7552db0..44163e5b 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
@@ -20,6 +20,9 @@ | |||
20 | #endif | 20 | #endif |
21 | #include "lj_bc.h" | 21 | #include "lj_bc.h" |
22 | #include "lj_ff.h" | 22 | #include "lj_ff.h" |
23 | #if LJ_HASPROFILE | ||
24 | #include "lj_debug.h" | ||
25 | #endif | ||
23 | #include "lj_ir.h" | 26 | #include "lj_ir.h" |
24 | #include "lj_jit.h" | 27 | #include "lj_jit.h" |
25 | #include "lj_ircall.h" | 28 | #include "lj_ircall.h" |
@@ -30,6 +33,7 @@ | |||
30 | #include "lj_snap.h" | 33 | #include "lj_snap.h" |
31 | #include "lj_dispatch.h" | 34 | #include "lj_dispatch.h" |
32 | #include "lj_vm.h" | 35 | #include "lj_vm.h" |
36 | #include "lj_prng.h" | ||
33 | 37 | ||
34 | /* Some local macros to save typing. Undef'd at the end. */ | 38 | /* Some local macros to save typing. Undef'd at the end. */ |
35 | #define IR(ref) (&J->cur.ir[(ref)]) | 39 | #define IR(ref) (&J->cur.ir[(ref)]) |
@@ -47,31 +51,52 @@ | |||
47 | static void rec_check_ir(jit_State *J) | 51 | static void rec_check_ir(jit_State *J) |
48 | { | 52 | { |
49 | IRRef i, nins = J->cur.nins, nk = J->cur.nk; | 53 | IRRef i, nins = J->cur.nins, nk = J->cur.nk; |
50 | lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536); | 54 | lj_assertJ(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536, |
51 | for (i = nins-1; i >= nk; i--) { | 55 | "inconsistent IR layout"); |
56 | for (i = nk; i < nins; i++) { | ||
52 | IRIns *ir = IR(i); | 57 | IRIns *ir = IR(i); |
53 | uint32_t mode = lj_ir_mode[ir->o]; | 58 | uint32_t mode = lj_ir_mode[ir->o]; |
54 | IRRef op1 = ir->op1; | 59 | IRRef op1 = ir->op1; |
55 | IRRef op2 = ir->op2; | 60 | IRRef op2 = ir->op2; |
61 | const char *err = NULL; | ||
56 | switch (irm_op1(mode)) { | 62 | switch (irm_op1(mode)) { |
57 | case IRMnone: lua_assert(op1 == 0); break; | 63 | case IRMnone: |
58 | case IRMref: lua_assert(op1 >= nk); | 64 | if (op1 != 0) err = "IRMnone op1 used"; |
59 | lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; | 65 | break; |
66 | case IRMref: | ||
67 | if (op1 < nk || (i >= REF_BIAS ? op1 >= i : op1 <= i)) | ||
68 | err = "IRMref op1 out of range"; | ||
69 | break; | ||
60 | case IRMlit: break; | 70 | case IRMlit: break; |
61 | case IRMcst: lua_assert(i < REF_BIAS); continue; | 71 | case IRMcst: |
72 | if (i >= REF_BIAS) { err = "constant in IR range"; break; } | ||
73 | if (irt_is64(ir->t) && ir->o != IR_KNULL) | ||
74 | i++; | ||
75 | continue; | ||
62 | } | 76 | } |
63 | switch (irm_op2(mode)) { | 77 | switch (irm_op2(mode)) { |
64 | case IRMnone: lua_assert(op2 == 0); break; | 78 | case IRMnone: |
65 | case IRMref: lua_assert(op2 >= nk); | 79 | if (op2) err = "IRMnone op2 used"; |
66 | lua_assert(i >= REF_BIAS ? op2 < i : op2 > i); break; | 80 | break; |
81 | case IRMref: | ||
82 | if (op2 < nk || (i >= REF_BIAS ? op2 >= i : op2 <= i)) | ||
83 | err = "IRMref op2 out of range"; | ||
84 | break; | ||
67 | case IRMlit: break; | 85 | case IRMlit: break; |
68 | case IRMcst: lua_assert(0); break; | 86 | case IRMcst: err = "IRMcst op2"; break; |
69 | } | 87 | } |
70 | if (ir->prev) { | 88 | if (!err && ir->prev) { |
71 | lua_assert(ir->prev >= nk); | 89 | if (ir->prev < nk || (i >= REF_BIAS ? ir->prev >= i : ir->prev <= i)) |
72 | lua_assert(i >= REF_BIAS ? ir->prev < i : ir->prev > i); | 90 | err = "chain out of range"; |
73 | lua_assert(ir->o == IR_NOP || IR(ir->prev)->o == ir->o); | 91 | else if (ir->o != IR_NOP && IR(ir->prev)->o != ir->o) |
92 | err = "chain to different op"; | ||
74 | } | 93 | } |
94 | lj_assertJ(!err, "bad IR %04d op %d(%04d,%04d): %s", | ||
95 | i-REF_BIAS, | ||
96 | ir->o, | ||
97 | irm_op1(mode) == IRMref ? op1-REF_BIAS : op1, | ||
98 | irm_op2(mode) == IRMref ? op2-REF_BIAS : op2, | ||
99 | err); | ||
75 | } | 100 | } |
76 | } | 101 | } |
77 | 102 | ||
@@ -81,48 +106,79 @@ static void rec_check_slots(jit_State *J) | |||
81 | BCReg s, nslots = J->baseslot + J->maxslot; | 106 | BCReg s, nslots = J->baseslot + J->maxslot; |
82 | int32_t depth = 0; | 107 | int32_t depth = 0; |
83 | cTValue *base = J->L->base - J->baseslot; | 108 | cTValue *base = J->L->base - J->baseslot; |
84 | lua_assert(J->baseslot >= 1); | 109 | lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot"); |
85 | lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME)); | 110 | lj_assertJ(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME), |
86 | lua_assert(nslots <= LJ_MAX_JSLOTS); | 111 | "baseslot does not point to frame"); |
112 | lj_assertJ(nslots <= LJ_MAX_JSLOTS, "slot overflow"); | ||
87 | for (s = 0; s < nslots; s++) { | 113 | for (s = 0; s < nslots; s++) { |
88 | TRef tr = J->slot[s]; | 114 | TRef tr = J->slot[s]; |
89 | if (tr) { | 115 | if (tr) { |
90 | cTValue *tv = &base[s]; | 116 | cTValue *tv = &base[s]; |
91 | IRRef ref = tref_ref(tr); | 117 | IRRef ref = tref_ref(tr); |
92 | IRIns *ir; | 118 | IRIns *ir = NULL; /* Silence compiler. */ |
93 | lua_assert(ref >= J->cur.nk && ref < J->cur.nins); | 119 | if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) { |
94 | ir = IR(ref); | 120 | lj_assertJ(ref >= J->cur.nk && ref < J->cur.nins, |
95 | lua_assert(irt_t(ir->t) == tref_t(tr)); | 121 | "slot %d ref %04d out of range", s, ref - REF_BIAS); |
122 | ir = IR(ref); | ||
123 | lj_assertJ(irt_t(ir->t) == tref_t(tr), "slot %d IR type mismatch", s); | ||
124 | } | ||
96 | if (s == 0) { | 125 | if (s == 0) { |
97 | lua_assert(tref_isfunc(tr)); | 126 | lj_assertJ(tref_isfunc(tr), "frame slot 0 is not a function"); |
127 | #if LJ_FR2 | ||
128 | } else if (s == 1) { | ||
129 | lj_assertJ((tr & ~TREF_FRAME) == 0, "bad frame slot 1"); | ||
130 | #endif | ||
98 | } else if ((tr & TREF_FRAME)) { | 131 | } else if ((tr & TREF_FRAME)) { |
99 | GCfunc *fn = gco2func(frame_gc(tv)); | 132 | GCfunc *fn = gco2func(frame_gc(tv)); |
100 | BCReg delta = (BCReg)(tv - frame_prev(tv)); | 133 | BCReg delta = (BCReg)(tv - frame_prev(tv)); |
101 | lua_assert(tref_isfunc(tr)); | 134 | #if LJ_FR2 |
102 | if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); | 135 | lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64, |
103 | lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta)); | 136 | "frame slot %d PC mismatch", s); |
137 | tr = J->slot[s-1]; | ||
138 | ir = IR(tref_ref(tr)); | ||
139 | #endif | ||
140 | lj_assertJ(tref_isfunc(tr), | ||
141 | "frame slot %d is not a function", s-LJ_FR2); | ||
142 | lj_assertJ(!tref_isk(tr) || fn == ir_kfunc(ir), | ||
143 | "frame slot %d function mismatch", s-LJ_FR2); | ||
144 | lj_assertJ(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME) | ||
145 | : (s == delta + LJ_FR2), | ||
146 | "frame slot %d broken chain", s-LJ_FR2); | ||
104 | depth++; | 147 | depth++; |
105 | } else if ((tr & TREF_CONT)) { | 148 | } else if ((tr & TREF_CONT)) { |
106 | lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); | 149 | #if LJ_FR2 |
107 | lua_assert((J->slot[s+1] & TREF_FRAME)); | 150 | lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64, |
151 | "cont slot %d continuation mismatch", s); | ||
152 | #else | ||
153 | lj_assertJ(ir_kptr(ir) == gcrefp(tv->gcr, void), | ||
154 | "cont slot %d continuation mismatch", s); | ||
155 | #endif | ||
156 | lj_assertJ((J->slot[s+1+LJ_FR2] & TREF_FRAME), | ||
157 | "cont slot %d not followed by frame", s); | ||
108 | depth++; | 158 | depth++; |
159 | } else if ((tr & TREF_KEYINDEX)) { | ||
160 | lj_assertJ(tref_isint(tr), "keyindex slot %d bad type %d", | ||
161 | s, tref_type(tr)); | ||
109 | } else { | 162 | } else { |
110 | if (tvisnumber(tv)) | 163 | /* Number repr. may differ, but other types must be the same. */ |
111 | lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */ | 164 | lj_assertJ(tvisnumber(tv) ? tref_isnumber(tr) : |
112 | else | 165 | itype2irt(tv) == tref_type(tr), |
113 | lua_assert(itype2irt(tv) == tref_type(tr)); | 166 | "slot %d type mismatch: stack type %d vs IR type %d", |
167 | s, itypemap(tv), tref_type(tr)); | ||
114 | if (tref_isk(tr)) { /* Compare constants. */ | 168 | if (tref_isk(tr)) { /* Compare constants. */ |
115 | TValue tvk; | 169 | TValue tvk; |
116 | lj_ir_kvalue(J->L, &tvk, ir); | 170 | lj_ir_kvalue(J->L, &tvk, ir); |
117 | if (!(tvisnum(&tvk) && tvisnan(&tvk))) | 171 | lj_assertJ((tvisnum(&tvk) && tvisnan(&tvk)) ? |
118 | lua_assert(lj_obj_equal(tv, &tvk)); | 172 | (tvisnum(tv) && tvisnan(tv)) : |
119 | else | 173 | lj_obj_equal(tv, &tvk), |
120 | lua_assert(tvisnum(tv) && tvisnan(tv)); | 174 | "slot %d const mismatch: stack %016llx vs IR %016llx", |
175 | s, tv->u64, tvk.u64); | ||
121 | } | 176 | } |
122 | } | 177 | } |
123 | } | 178 | } |
124 | } | 179 | } |
125 | lua_assert(J->framedepth == depth); | 180 | lj_assertJ(J->framedepth == depth, |
181 | "frame depth mismatch %d vs %d", J->framedepth, depth); | ||
126 | } | 182 | } |
127 | #endif | 183 | #endif |
128 | 184 | ||
@@ -156,10 +212,11 @@ static TRef sload(jit_State *J, int32_t slot) | |||
156 | /* Get TRef for current function. */ | 212 | /* Get TRef for current function. */ |
157 | static TRef getcurrf(jit_State *J) | 213 | static TRef getcurrf(jit_State *J) |
158 | { | 214 | { |
159 | if (J->base[-1]) | 215 | if (J->base[-1-LJ_FR2]) |
160 | return J->base[-1]; | 216 | return J->base[-1-LJ_FR2]; |
161 | lua_assert(J->baseslot == 1); | 217 | /* Non-base frame functions ought to be loaded already. */ |
162 | return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY); | 218 | lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot"); |
219 | return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY); | ||
163 | } | 220 | } |
164 | 221 | ||
165 | /* Compare for raw object equality. | 222 | /* Compare for raw object equality. |
@@ -205,6 +262,14 @@ TRef lj_record_constify(jit_State *J, cTValue *o) | |||
205 | return 0; /* Can't represent lightuserdata (pointless). */ | 262 | return 0; /* Can't represent lightuserdata (pointless). */ |
206 | } | 263 | } |
207 | 264 | ||
265 | /* Emit a VLOAD with the correct type. */ | ||
266 | TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t) | ||
267 | { | ||
268 | TRef tr = emitir(IRTG(IR_VLOAD, t), ref, idx); | ||
269 | if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ | ||
270 | return tr; | ||
271 | } | ||
272 | |||
208 | /* -- Record loop ops ----------------------------------------------------- */ | 273 | /* -- Record loop ops ----------------------------------------------------- */ |
209 | 274 | ||
210 | /* Loop event. */ | 275 | /* Loop event. */ |
@@ -221,17 +286,21 @@ static void canonicalize_slots(jit_State *J) | |||
221 | if (LJ_DUALNUM) return; | 286 | if (LJ_DUALNUM) return; |
222 | for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { | 287 | for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { |
223 | TRef tr = J->slot[s]; | 288 | TRef tr = J->slot[s]; |
224 | if (tref_isinteger(tr)) { | 289 | if (tref_isinteger(tr) && !(tr & TREF_KEYINDEX)) { |
225 | IRIns *ir = IR(tref_ref(tr)); | 290 | IRIns *ir = IR(tref_ref(tr)); |
226 | if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY))) | 291 | if (!(ir->o == IR_SLOAD && (ir->op2 & (IRSLOAD_READONLY)))) |
227 | J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); | 292 | J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); |
228 | } | 293 | } |
229 | } | 294 | } |
230 | } | 295 | } |
231 | 296 | ||
232 | /* Stop recording. */ | 297 | /* Stop recording. */ |
233 | static void rec_stop(jit_State *J, TraceLink linktype, TraceNo lnk) | 298 | void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk) |
234 | { | 299 | { |
300 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
301 | if (J->retryrec) | ||
302 | lj_trace_err(J, LJ_TRERR_RETRY); | ||
303 | #endif | ||
235 | lj_trace_end(J); | 304 | lj_trace_end(J); |
236 | J->cur.linktype = (uint8_t)linktype; | 305 | J->cur.linktype = (uint8_t)linktype; |
237 | J->cur.link = (uint16_t)lnk; | 306 | J->cur.link = (uint16_t)lnk; |
@@ -399,7 +468,8 @@ static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev, | |||
399 | TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode); | 468 | TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode); |
400 | TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode); | 469 | TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode); |
401 | int tc, dir = rec_for_direction(&tv[FORL_STEP]); | 470 | int tc, dir = rec_for_direction(&tv[FORL_STEP]); |
402 | lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); | 471 | lj_assertJ(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI, |
472 | "bad bytecode %d instead of FORI/JFORI", bc_op(*fori)); | ||
403 | scev->t.irt = t; | 473 | scev->t.irt = t; |
404 | scev->dir = dir; | 474 | scev->dir = dir; |
405 | scev->stop = tref_ref(stop); | 475 | scev->stop = tref_ref(stop); |
@@ -455,7 +525,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) | |||
455 | IRT_NUM; | 525 | IRT_NUM; |
456 | for (i = FORL_IDX; i <= FORL_STEP; i++) { | 526 | for (i = FORL_IDX; i <= FORL_STEP; i++) { |
457 | if (!tr[i]) sload(J, ra+i); | 527 | if (!tr[i]) sload(J, ra+i); |
458 | lua_assert(tref_isnumber_str(tr[i])); | 528 | lj_assertJ(tref_isnumber_str(tr[i]), "bad FORI argument type"); |
459 | if (tref_isstr(tr[i])) | 529 | if (tref_isstr(tr[i])) |
460 | tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0); | 530 | tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0); |
461 | if (t == IRT_INT) { | 531 | if (t == IRT_INT) { |
@@ -499,8 +569,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) | |||
499 | static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) | 569 | static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) |
500 | { | 570 | { |
501 | BCReg ra = bc_a(iterins); | 571 | BCReg ra = bc_a(iterins); |
502 | lua_assert(J->base[ra] != 0); | 572 | if (!tref_isnil(getslot(J, ra))) { /* Looping back? */ |
503 | if (!tref_isnil(J->base[ra])) { /* Looping back? */ | ||
504 | J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ | 573 | J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ |
505 | J->maxslot = ra-1+bc_b(J->pc[-1]); | 574 | J->maxslot = ra-1+bc_b(J->pc[-1]); |
506 | J->pc += bc_j(iterins)+1; | 575 | J->pc += bc_j(iterins)+1; |
@@ -538,12 +607,13 @@ static int innerloopleft(jit_State *J, const BCIns *pc) | |||
538 | /* Handle the case when an interpreted loop op is hit. */ | 607 | /* Handle the case when an interpreted loop op is hit. */ |
539 | static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) | 608 | static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) |
540 | { | 609 | { |
541 | if (J->parent == 0) { | 610 | if (J->parent == 0 && J->exitno == 0) { |
542 | if (pc == J->startpc && J->framedepth + J->retdepth == 0) { | 611 | if (pc == J->startpc && J->framedepth + J->retdepth == 0) { |
612 | if (bc_op(J->cur.startins) == BC_ITERN) return; /* See rec_itern(). */ | ||
543 | /* Same loop? */ | 613 | /* Same loop? */ |
544 | if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ | 614 | if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ |
545 | lj_trace_err(J, LJ_TRERR_LLEAVE); | 615 | lj_trace_err(J, LJ_TRERR_LLEAVE); |
546 | rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping root trace. */ | 616 | lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */ |
547 | } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */ | 617 | } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */ |
548 | /* It's usually better to abort here and wait until the inner loop | 618 | /* It's usually better to abort here and wait until the inner loop |
549 | ** is traced. But if the inner loop repeatedly didn't loop back, | 619 | ** is traced. But if the inner loop repeatedly didn't loop back, |
@@ -568,18 +638,129 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) | |||
568 | /* Handle the case when an already compiled loop op is hit. */ | 638 | /* Handle the case when an already compiled loop op is hit. */ |
569 | static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) | 639 | static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) |
570 | { | 640 | { |
571 | if (J->parent == 0) { /* Root trace hit an inner loop. */ | 641 | if (J->parent == 0 && J->exitno == 0) { /* Root trace hit an inner loop. */ |
572 | /* Better let the inner loop spawn a side trace back here. */ | 642 | /* Better let the inner loop spawn a side trace back here. */ |
573 | lj_trace_err(J, LJ_TRERR_LINNER); | 643 | lj_trace_err(J, LJ_TRERR_LINNER); |
574 | } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ | 644 | } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ |
575 | J->instunroll = 0; /* Cannot continue across a compiled loop op. */ | 645 | J->instunroll = 0; /* Cannot continue across a compiled loop op. */ |
576 | if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) | 646 | if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) |
577 | rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form an extra loop. */ | 647 | lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form extra loop. */ |
578 | else | 648 | else |
579 | rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */ | 649 | lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */ |
580 | } /* Side trace continues across a loop that's left or not entered. */ | 650 | } /* Side trace continues across a loop that's left or not entered. */ |
581 | } | 651 | } |
582 | 652 | ||
653 | /* Record ITERN. */ | ||
654 | static LoopEvent rec_itern(jit_State *J, BCReg ra, BCReg rb) | ||
655 | { | ||
656 | #if LJ_BE | ||
657 | /* YAGNI: Disabled on big-endian due to issues with lj_vm_next, | ||
658 | ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair. | ||
659 | */ | ||
660 | UNUSED(ra); UNUSED(rb); | ||
661 | setintV(&J->errinfo, (int32_t)BC_ITERN); | ||
662 | lj_trace_err_info(J, LJ_TRERR_NYIBC); | ||
663 | #else | ||
664 | RecordIndex ix; | ||
665 | /* Since ITERN is recorded at the start, we need our own loop detection. */ | ||
666 | if (J->pc == J->startpc && | ||
667 | (J->cur.nins > REF_FIRST+1 || | ||
668 | (J->cur.nins == REF_FIRST+1 && J->cur.ir[REF_FIRST].o != IR_PROF)) && | ||
669 | J->framedepth + J->retdepth == 0 && J->parent == 0 && J->exitno == 0) { | ||
670 | lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */ | ||
671 | return LOOPEV_ENTER; | ||
672 | } | ||
673 | J->maxslot = ra; | ||
674 | lj_snap_add(J); /* Required to make JLOOP the first ins in a side-trace. */ | ||
675 | ix.tab = getslot(J, ra-2); | ||
676 | ix.key = J->base[ra-1] ? J->base[ra-1] : | ||
677 | sloadt(J, (int32_t)(ra-1), IRT_INT, IRSLOAD_KEYINDEX); | ||
678 | copyTV(J->L, &ix.tabv, &J->L->base[ra-2]); | ||
679 | copyTV(J->L, &ix.keyv, &J->L->base[ra-1]); | ||
680 | ix.idxchain = (rb < 3); /* Omit value type check, if unused. */ | ||
681 | ix.mobj = 1; /* We need the next index, too. */ | ||
682 | J->maxslot = ra + lj_record_next(J, &ix); | ||
683 | J->needsnap = 1; | ||
684 | if (!tref_isnil(ix.key)) { /* Looping back? */ | ||
685 | J->base[ra-1] = ix.mobj | TREF_KEYINDEX; /* Control var has next index. */ | ||
686 | J->base[ra] = ix.key; | ||
687 | J->base[ra+1] = ix.val; | ||
688 | J->pc += bc_j(J->pc[1])+2; | ||
689 | return LOOPEV_ENTER; | ||
690 | } else { | ||
691 | J->maxslot = ra-3; | ||
692 | J->pc += 2; | ||
693 | return LOOPEV_LEAVE; | ||
694 | } | ||
695 | #endif | ||
696 | } | ||
697 | |||
698 | /* Record ISNEXT. */ | ||
699 | static void rec_isnext(jit_State *J, BCReg ra) | ||
700 | { | ||
701 | cTValue *b = &J->L->base[ra-3]; | ||
702 | if (tvisfunc(b) && funcV(b)->c.ffid == FF_next && | ||
703 | tvistab(b+1) && tvisnil(b+2)) { | ||
704 | /* These checks are folded away for a compiled pairs(). */ | ||
705 | TRef func = getslot(J, ra-3); | ||
706 | TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), func, IRFL_FUNC_FFID); | ||
707 | emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, FF_next)); | ||
708 | (void)getslot(J, ra-2); /* Type check for table. */ | ||
709 | (void)getslot(J, ra-1); /* Type check for nil key. */ | ||
710 | J->base[ra-1] = lj_ir_kint(J, 0) | TREF_KEYINDEX; | ||
711 | J->maxslot = ra; | ||
712 | } else { /* Abort trace. Interpreter will despecialize bytecode. */ | ||
713 | lj_trace_err(J, LJ_TRERR_RECERR); | ||
714 | } | ||
715 | } | ||
716 | |||
717 | /* -- Record profiler hook checks ----------------------------------------- */ | ||
718 | |||
719 | #if LJ_HASPROFILE | ||
720 | |||
721 | /* Need to insert profiler hook check? */ | ||
722 | static int rec_profile_need(jit_State *J, GCproto *pt, const BCIns *pc) | ||
723 | { | ||
724 | GCproto *ppt; | ||
725 | lj_assertJ(J->prof_mode == 'f' || J->prof_mode == 'l', | ||
726 | "bad profiler mode %c", J->prof_mode); | ||
727 | if (!pt) | ||
728 | return 0; | ||
729 | ppt = J->prev_pt; | ||
730 | J->prev_pt = pt; | ||
731 | if (pt != ppt && ppt) { | ||
732 | J->prev_line = -1; | ||
733 | return 1; | ||
734 | } | ||
735 | if (J->prof_mode == 'l') { | ||
736 | BCLine line = lj_debug_line(pt, proto_bcpos(pt, pc)); | ||
737 | BCLine pline = J->prev_line; | ||
738 | J->prev_line = line; | ||
739 | if (pline != line) | ||
740 | return 1; | ||
741 | } | ||
742 | return 0; | ||
743 | } | ||
744 | |||
745 | static void rec_profile_ins(jit_State *J, const BCIns *pc) | ||
746 | { | ||
747 | if (J->prof_mode && rec_profile_need(J, J->pt, pc)) { | ||
748 | emitir(IRTG(IR_PROF, IRT_NIL), 0, 0); | ||
749 | lj_snap_add(J); | ||
750 | } | ||
751 | } | ||
752 | |||
753 | static void rec_profile_ret(jit_State *J) | ||
754 | { | ||
755 | if (J->prof_mode == 'f') { | ||
756 | emitir(IRTG(IR_PROF, IRT_NIL), 0, 0); | ||
757 | J->prev_pt = NULL; | ||
758 | lj_snap_add(J); | ||
759 | } | ||
760 | } | ||
761 | |||
762 | #endif | ||
763 | |||
583 | /* -- Record calls and returns -------------------------------------------- */ | 764 | /* -- Record calls and returns -------------------------------------------- */ |
584 | 765 | ||
585 | /* Specialize to the runtime value of the called function or its prototype. */ | 766 | /* Specialize to the runtime value of the called function or its prototype. */ |
@@ -590,11 +771,26 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr) | |||
590 | GCproto *pt = funcproto(fn); | 771 | GCproto *pt = funcproto(fn); |
591 | /* Too many closures created? Probably not a monomorphic function. */ | 772 | /* Too many closures created? Probably not a monomorphic function. */ |
592 | if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */ | 773 | if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */ |
593 | TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC); | 774 | TRef trpt = emitir(IRT(IR_FLOAD, IRT_PGC), tr, IRFL_FUNC_PC); |
594 | emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt))); | 775 | emitir(IRTG(IR_EQ, IRT_PGC), trpt, lj_ir_kptr(J, proto_bc(pt))); |
595 | (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ | 776 | (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ |
596 | return tr; | 777 | return tr; |
597 | } | 778 | } |
779 | } else { | ||
780 | /* Don't specialize to non-monomorphic builtins. */ | ||
781 | switch (fn->c.ffid) { | ||
782 | case FF_coroutine_wrap_aux: | ||
783 | case FF_string_gmatch_aux: | ||
784 | /* NYI: io_file_iter doesn't have an ffid, yet. */ | ||
785 | { /* Specialize to the ffid. */ | ||
786 | TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID); | ||
787 | emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, fn->c.ffid)); | ||
788 | } | ||
789 | return tr; | ||
790 | default: | ||
791 | /* NYI: don't specialize to non-monomorphic C functions. */ | ||
792 | break; | ||
793 | } | ||
598 | } | 794 | } |
599 | /* Otherwise specialize to the function (closure) value itself. */ | 795 | /* Otherwise specialize to the function (closure) value itself. */ |
600 | kfunc = lj_ir_kfunc(J, fn); | 796 | kfunc = lj_ir_kfunc(J, fn); |
@@ -607,21 +803,31 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) | |||
607 | { | 803 | { |
608 | RecordIndex ix; | 804 | RecordIndex ix; |
609 | TValue *functv = &J->L->base[func]; | 805 | TValue *functv = &J->L->base[func]; |
610 | TRef *fbase = &J->base[func]; | 806 | TRef kfunc, *fbase = &J->base[func]; |
611 | ptrdiff_t i; | 807 | ptrdiff_t i; |
612 | for (i = 0; i <= nargs; i++) | 808 | (void)getslot(J, func); /* Ensure func has a reference. */ |
613 | (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ | 809 | for (i = 1; i <= nargs; i++) |
810 | (void)getslot(J, func+LJ_FR2+i); /* Ensure all args have a reference. */ | ||
614 | if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ | 811 | if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ |
615 | ix.tab = fbase[0]; | 812 | ix.tab = fbase[0]; |
616 | copyTV(J->L, &ix.tabv, functv); | 813 | copyTV(J->L, &ix.tabv, functv); |
617 | if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) | 814 | if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) |
618 | lj_trace_err(J, LJ_TRERR_NOMM); | 815 | lj_trace_err(J, LJ_TRERR_NOMM); |
619 | for (i = ++nargs; i > 0; i--) /* Shift arguments up. */ | 816 | for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */ |
620 | fbase[i] = fbase[i-1]; | 817 | fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1]; |
818 | #if LJ_FR2 | ||
819 | fbase[2] = fbase[0]; | ||
820 | #endif | ||
621 | fbase[0] = ix.mobj; /* Replace function. */ | 821 | fbase[0] = ix.mobj; /* Replace function. */ |
622 | functv = &ix.mobjv; | 822 | functv = &ix.mobjv; |
623 | } | 823 | } |
624 | fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]); | 824 | kfunc = rec_call_specialize(J, funcV(functv), fbase[0]); |
825 | #if LJ_FR2 | ||
826 | fbase[0] = kfunc; | ||
827 | fbase[1] = TREF_FRAME; | ||
828 | #else | ||
829 | fbase[0] = kfunc | TREF_FRAME; | ||
830 | #endif | ||
625 | J->maxslot = (BCReg)nargs; | 831 | J->maxslot = (BCReg)nargs; |
626 | } | 832 | } |
627 | 833 | ||
@@ -631,8 +837,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs) | |||
631 | rec_call_setup(J, func, nargs); | 837 | rec_call_setup(J, func, nargs); |
632 | /* Bump frame. */ | 838 | /* Bump frame. */ |
633 | J->framedepth++; | 839 | J->framedepth++; |
634 | J->base += func+1; | 840 | J->base += func+1+LJ_FR2; |
635 | J->baseslot += func+1; | 841 | J->baseslot += func+1+LJ_FR2; |
636 | if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) | 842 | if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) |
637 | lj_trace_err(J, LJ_TRERR_STACKOV); | 843 | lj_trace_err(J, LJ_TRERR_STACKOV); |
638 | } | 844 | } |
@@ -650,7 +856,9 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs) | |||
650 | func += cbase; | 856 | func += cbase; |
651 | } | 857 | } |
652 | /* Move func + args down. */ | 858 | /* Move func + args down. */ |
653 | memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1)); | 859 | if (LJ_FR2 && J->baseslot == 2) |
860 | J->base[func+1] = TREF_FRAME; | ||
861 | memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2)); | ||
654 | /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ | 862 | /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ |
655 | /* Tailcalls can form a loop, so count towards the loop unroll limit. */ | 863 | /* Tailcalls can form a loop, so count towards the loop unroll limit. */ |
656 | if (++J->tailcalled > J->loopunroll) | 864 | if (++J->tailcalled > J->loopunroll) |
@@ -680,6 +888,8 @@ static int check_downrec_unroll(jit_State *J, GCproto *pt) | |||
680 | return 0; | 888 | return 0; |
681 | } | 889 | } |
682 | 890 | ||
891 | static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot); | ||
892 | |||
683 | /* Record return. */ | 893 | /* Record return. */ |
684 | void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | 894 | void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) |
685 | { | 895 | { |
@@ -691,30 +901,32 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
691 | BCReg cbase = (BCReg)frame_delta(frame); | 901 | BCReg cbase = (BCReg)frame_delta(frame); |
692 | if (--J->framedepth <= 0) | 902 | if (--J->framedepth <= 0) |
693 | lj_trace_err(J, LJ_TRERR_NYIRETL); | 903 | lj_trace_err(J, LJ_TRERR_NYIRETL); |
694 | lua_assert(J->baseslot > 1); | 904 | lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return"); |
695 | gotresults++; | 905 | gotresults++; |
696 | rbase += cbase; | 906 | rbase += cbase; |
697 | J->baseslot -= (BCReg)cbase; | 907 | J->baseslot -= (BCReg)cbase; |
698 | J->base -= cbase; | 908 | J->base -= cbase; |
699 | J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */ | 909 | J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */ |
700 | frame = frame_prevd(frame); | 910 | frame = frame_prevd(frame); |
911 | J->needsnap = 1; /* Stop catching on-trace errors. */ | ||
701 | } | 912 | } |
702 | /* Return to lower frame via interpreter for unhandled cases. */ | 913 | /* Return to lower frame via interpreter for unhandled cases. */ |
703 | if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) && | 914 | if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) && |
704 | (!frame_islua(frame) || | 915 | (!frame_islua(frame) || |
705 | (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))))) { | 916 | (J->parent == 0 && J->exitno == 0 && |
917 | !bc_isret(bc_op(J->cur.startins))))) { | ||
706 | /* NYI: specialize to frame type and return directly, not via RET*. */ | 918 | /* NYI: specialize to frame type and return directly, not via RET*. */ |
707 | for (i = 0; i < (ptrdiff_t)rbase; i++) | 919 | for (i = 0; i < (ptrdiff_t)rbase; i++) |
708 | J->base[i] = 0; /* Purge dead slots. */ | 920 | J->base[i] = 0; /* Purge dead slots. */ |
709 | J->maxslot = rbase + (BCReg)gotresults; | 921 | J->maxslot = rbase + (BCReg)gotresults; |
710 | rec_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */ | 922 | lj_record_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */ |
711 | return; | 923 | return; |
712 | } | 924 | } |
713 | if (frame_isvarg(frame)) { | 925 | if (frame_isvarg(frame)) { |
714 | BCReg cbase = (BCReg)frame_delta(frame); | 926 | BCReg cbase = (BCReg)frame_delta(frame); |
715 | if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ | 927 | if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ |
716 | lj_trace_err(J, LJ_TRERR_NYIRETL); | 928 | lj_trace_err(J, LJ_TRERR_NYIRETL); |
717 | lua_assert(J->baseslot > 1); | 929 | lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return"); |
718 | rbase += cbase; | 930 | rbase += cbase; |
719 | J->baseslot -= (BCReg)cbase; | 931 | J->baseslot -= (BCReg)cbase; |
720 | J->base -= cbase; | 932 | J->base -= cbase; |
@@ -724,27 +936,28 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
724 | BCIns callins = *(frame_pc(frame)-1); | 936 | BCIns callins = *(frame_pc(frame)-1); |
725 | ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; | 937 | ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; |
726 | BCReg cbase = bc_a(callins); | 938 | BCReg cbase = bc_a(callins); |
727 | GCproto *pt = funcproto(frame_func(frame - (cbase+1))); | 939 | GCproto *pt = funcproto(frame_func(frame - (cbase+1+LJ_FR2))); |
728 | if ((pt->flags & PROTO_NOJIT)) | 940 | if ((pt->flags & PROTO_NOJIT)) |
729 | lj_trace_err(J, LJ_TRERR_CJITOFF); | 941 | lj_trace_err(J, LJ_TRERR_CJITOFF); |
730 | if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { | 942 | if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { |
731 | if (check_downrec_unroll(J, pt)) { | 943 | if (check_downrec_unroll(J, pt)) { |
732 | J->maxslot = (BCReg)(rbase + gotresults); | 944 | J->maxslot = (BCReg)(rbase + gotresults); |
733 | lj_snap_purge(J); | 945 | lj_snap_purge(J); |
734 | rec_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-recursion. */ | 946 | lj_record_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-rec. */ |
735 | return; | 947 | return; |
736 | } | 948 | } |
737 | lj_snap_add(J); | 949 | lj_snap_add(J); |
738 | } | 950 | } |
739 | for (i = 0; i < nresults; i++) /* Adjust results. */ | 951 | for (i = 0; i < nresults; i++) /* Adjust results. */ |
740 | J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL; | 952 | J->base[i-1-LJ_FR2] = i < gotresults ? J->base[rbase+i] : TREF_NIL; |
741 | J->maxslot = cbase+(BCReg)nresults; | 953 | J->maxslot = cbase+(BCReg)nresults; |
742 | if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ | 954 | if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ |
743 | J->framedepth--; | 955 | J->framedepth--; |
744 | lua_assert(J->baseslot > cbase+1); | 956 | lj_assertJ(J->baseslot > cbase+1+LJ_FR2, "bad baseslot for return"); |
745 | J->baseslot -= cbase+1; | 957 | J->baseslot -= cbase+1+LJ_FR2; |
746 | J->base -= cbase+1; | 958 | J->base -= cbase+1+LJ_FR2; |
747 | } else if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { | 959 | } else if (J->parent == 0 && J->exitno == 0 && |
960 | !bc_isret(bc_op(J->cur.startins))) { | ||
748 | /* Return to lower frame would leave the loop in a root trace. */ | 961 | /* Return to lower frame would leave the loop in a root trace. */ |
749 | lj_trace_err(J, LJ_TRERR_LLEAVE); | 962 | lj_trace_err(J, LJ_TRERR_LLEAVE); |
750 | } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */ | 963 | } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */ |
@@ -752,13 +965,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
752 | } else { /* Return to lower frame. Guard for the target we return to. */ | 965 | } else { /* Return to lower frame. Guard for the target we return to. */ |
753 | TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); | 966 | TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); |
754 | TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); | 967 | TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); |
755 | emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc); | 968 | emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc); |
756 | J->retdepth++; | 969 | J->retdepth++; |
757 | J->needsnap = 1; | 970 | J->needsnap = 1; |
758 | lua_assert(J->baseslot == 1); | 971 | lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot for return"); |
759 | /* Shift result slots up and clear the slots of the new frame below. */ | 972 | /* Shift result slots up and clear the slots of the new frame below. */ |
760 | memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults); | 973 | memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults); |
761 | memset(J->base-1, 0, sizeof(TRef)*(cbase+1)); | 974 | memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2)); |
762 | } | 975 | } |
763 | } else if (frame_iscont(frame)) { /* Return to continuation frame. */ | 976 | } else if (frame_iscont(frame)) { /* Return to continuation frame. */ |
764 | ASMFunction cont = frame_contf(frame); | 977 | ASMFunction cont = frame_contf(frame); |
@@ -767,24 +980,52 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
767 | lj_trace_err(J, LJ_TRERR_NYIRETL); | 980 | lj_trace_err(J, LJ_TRERR_NYIRETL); |
768 | J->baseslot -= (BCReg)cbase; | 981 | J->baseslot -= (BCReg)cbase; |
769 | J->base -= cbase; | 982 | J->base -= cbase; |
770 | J->maxslot = cbase-2; | 983 | J->maxslot = cbase-(2<<LJ_FR2); |
771 | if (cont == lj_cont_ra) { | 984 | if (cont == lj_cont_ra) { |
772 | /* Copy result to destination slot. */ | 985 | /* Copy result to destination slot. */ |
773 | BCReg dst = bc_a(*(frame_contpc(frame)-1)); | 986 | BCReg dst = bc_a(*(frame_contpc(frame)-1)); |
774 | J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; | 987 | J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; |
775 | if (dst >= J->maxslot) J->maxslot = dst+1; | 988 | if (dst >= J->maxslot) { |
989 | J->maxslot = dst+1; | ||
990 | } | ||
776 | } else if (cont == lj_cont_nop) { | 991 | } else if (cont == lj_cont_nop) { |
777 | /* Nothing to do here. */ | 992 | /* Nothing to do here. */ |
778 | } else if (cont == lj_cont_cat) { | 993 | } else if (cont == lj_cont_cat) { |
779 | lua_assert(0); | 994 | BCReg bslot = bc_b(*(frame_contpc(frame)-1)); |
995 | TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL; | ||
996 | if (bslot != J->maxslot) { /* Concatenate the remainder. */ | ||
997 | TValue *b = J->L->base, save; /* Simulate lower frame and result. */ | ||
998 | /* Can't handle MM_concat + CALLT + fast func side-effects. */ | ||
999 | if (J->postproc != LJ_POST_NONE) | ||
1000 | lj_trace_err(J, LJ_TRERR_NYIRETL); | ||
1001 | J->base[J->maxslot] = tr; | ||
1002 | copyTV(J->L, &save, b-(2<<LJ_FR2)); | ||
1003 | if (gotresults) | ||
1004 | copyTV(J->L, b-(2<<LJ_FR2), b+rbase); | ||
1005 | else | ||
1006 | setnilV(b-(2<<LJ_FR2)); | ||
1007 | J->L->base = b - cbase; | ||
1008 | tr = rec_cat(J, bslot, cbase-(2<<LJ_FR2)); | ||
1009 | b = J->L->base + cbase; /* Undo. */ | ||
1010 | J->L->base = b; | ||
1011 | copyTV(J->L, b-(2<<LJ_FR2), &save); | ||
1012 | } | ||
1013 | if (tr) { /* Store final result. */ | ||
1014 | BCReg dst = bc_a(*(frame_contpc(frame)-1)); | ||
1015 | J->base[dst] = tr; | ||
1016 | if (dst >= J->maxslot) { | ||
1017 | J->maxslot = dst+1; | ||
1018 | } | ||
1019 | } /* Otherwise continue with another __concat call. */ | ||
780 | } else { | 1020 | } else { |
781 | /* Result type already specialized. */ | 1021 | /* Result type already specialized. */ |
782 | lua_assert(cont == lj_cont_condf || cont == lj_cont_condt); | 1022 | lj_assertJ(cont == lj_cont_condf || cont == lj_cont_condt, |
1023 | "bad continuation type"); | ||
783 | } | 1024 | } |
784 | } else { | 1025 | } else { |
785 | lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ | 1026 | lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ |
786 | } | 1027 | } |
787 | lua_assert(J->baseslot >= 1); | 1028 | lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot for return"); |
788 | } | 1029 | } |
789 | 1030 | ||
790 | /* -- Metamethod handling ------------------------------------------------- */ | 1031 | /* -- Metamethod handling ------------------------------------------------- */ |
@@ -792,19 +1033,17 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
792 | /* Prepare to record call to metamethod. */ | 1033 | /* Prepare to record call to metamethod. */ |
793 | static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) | 1034 | static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) |
794 | { | 1035 | { |
795 | BCReg s, top = curr_proto(J->L)->framesize; | 1036 | BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize; |
796 | TRef trcont; | 1037 | #if LJ_FR2 |
797 | setcont(&J->L->base[top], cont); | 1038 | J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); |
798 | #if LJ_64 | 1039 | J->base[top+1] = TREF_CONT; |
799 | trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin)); | ||
800 | #else | 1040 | #else |
801 | trcont = lj_ir_kptr(J, (void *)cont); | 1041 | J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; |
802 | #endif | 1042 | #endif |
803 | J->base[top] = trcont | TREF_CONT; | ||
804 | J->framedepth++; | 1043 | J->framedepth++; |
805 | for (s = J->maxslot; s < top; s++) | 1044 | for (s = J->maxslot; s < top; s++) |
806 | J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ | 1045 | J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ |
807 | return top+1; | 1046 | return top+1+LJ_FR2; |
808 | } | 1047 | } |
809 | 1048 | ||
810 | /* Record metamethod lookup. */ | 1049 | /* Record metamethod lookup. */ |
@@ -823,7 +1062,7 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) | |||
823 | cTValue *mo; | 1062 | cTValue *mo; |
824 | if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) { | 1063 | if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) { |
825 | /* Specialize to the C library namespace object. */ | 1064 | /* Specialize to the C library namespace object. */ |
826 | emitir(IRTG(IR_EQ, IRT_P32), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); | 1065 | emitir(IRTG(IR_EQ, IRT_PGC), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); |
827 | } else { | 1066 | } else { |
828 | /* Specialize to the type of userdata. */ | 1067 | /* Specialize to the type of userdata. */ |
829 | TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE); | 1068 | TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE); |
@@ -852,7 +1091,8 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) | |||
852 | } | 1091 | } |
853 | /* The cdata metatable is treated as immutable. */ | 1092 | /* The cdata metatable is treated as immutable. */ |
854 | if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; | 1093 | if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; |
855 | ix->mt = mix.tab = lj_ir_ktab(J, mt); | 1094 | ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB, |
1095 | GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)])); | ||
856 | goto nocheck; | 1096 | goto nocheck; |
857 | } | 1097 | } |
858 | ix->mt = mt ? mix.tab : TREF_NIL; | 1098 | ix->mt = mt ? mix.tab : TREF_NIL; |
@@ -879,12 +1119,12 @@ nocheck: | |||
879 | static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) | 1119 | static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) |
880 | { | 1120 | { |
881 | /* Set up metamethod call first to save ix->tab and ix->tabv. */ | 1121 | /* Set up metamethod call first to save ix->tab and ix->tabv. */ |
882 | BCReg func = rec_mm_prep(J, lj_cont_ra); | 1122 | BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra); |
883 | TRef *base = J->base + func; | 1123 | TRef *base = J->base + func; |
884 | TValue *basev = J->L->base + func; | 1124 | TValue *basev = J->L->base + func; |
885 | base[1] = ix->tab; base[2] = ix->key; | 1125 | base[1+LJ_FR2] = ix->tab; base[2+LJ_FR2] = ix->key; |
886 | copyTV(J->L, basev+1, &ix->tabv); | 1126 | copyTV(J->L, basev+1+LJ_FR2, &ix->tabv); |
887 | copyTV(J->L, basev+2, &ix->keyv); | 1127 | copyTV(J->L, basev+2+LJ_FR2, &ix->keyv); |
888 | if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ | 1128 | if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ |
889 | if (mm != MM_unm) { | 1129 | if (mm != MM_unm) { |
890 | ix->tab = ix->key; | 1130 | ix->tab = ix->key; |
@@ -896,6 +1136,9 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) | |||
896 | } | 1136 | } |
897 | ok: | 1137 | ok: |
898 | base[0] = ix->mobj; | 1138 | base[0] = ix->mobj; |
1139 | #if LJ_FR2 | ||
1140 | base[1] = 0; | ||
1141 | #endif | ||
899 | copyTV(J->L, basev+0, &ix->mobjv); | 1142 | copyTV(J->L, basev+0, &ix->mobjv); |
900 | lj_record_call(J, func, 2); | 1143 | lj_record_call(J, func, 2); |
901 | return 0; /* No result yet. */ | 1144 | return 0; /* No result yet. */ |
@@ -912,6 +1155,8 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) | |||
912 | TRef *base = J->base + func; | 1155 | TRef *base = J->base + func; |
913 | TValue *basev = J->L->base + func; | 1156 | TValue *basev = J->L->base + func; |
914 | base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); | 1157 | base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); |
1158 | base += LJ_FR2; | ||
1159 | basev += LJ_FR2; | ||
915 | base[1] = tr; copyTV(J->L, basev+1, tv); | 1160 | base[1] = tr; copyTV(J->L, basev+1, tv); |
916 | #if LJ_52 | 1161 | #if LJ_52 |
917 | base[2] = tr; copyTV(J->L, basev+2, tv); | 1162 | base[2] = tr; copyTV(J->L, basev+2, tv); |
@@ -921,7 +1166,7 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) | |||
921 | lj_record_call(J, func, 2); | 1166 | lj_record_call(J, func, 2); |
922 | } else { | 1167 | } else { |
923 | if (LJ_52 && tref_istab(tr)) | 1168 | if (LJ_52 && tref_istab(tr)) |
924 | return lj_ir_call(J, IRCALL_lj_tab_len, tr); | 1169 | return emitir(IRTI(IR_ALEN), tr, TREF_NIL); |
925 | lj_trace_err(J, LJ_TRERR_NOMM); | 1170 | lj_trace_err(J, LJ_TRERR_NOMM); |
926 | } | 1171 | } |
927 | return 0; /* No result yet. */ | 1172 | return 0; /* No result yet. */ |
@@ -931,10 +1176,10 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) | |||
931 | static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) | 1176 | static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) |
932 | { | 1177 | { |
933 | BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); | 1178 | BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); |
934 | TRef *base = J->base + func; | 1179 | TRef *base = J->base + func + LJ_FR2; |
935 | TValue *tv = J->L->base + func; | 1180 | TValue *tv = J->L->base + func + LJ_FR2; |
936 | base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; | 1181 | base[-LJ_FR2] = ix->mobj; base[1] = ix->val; base[2] = ix->key; |
937 | copyTV(J->L, tv+0, &ix->mobjv); | 1182 | copyTV(J->L, tv-LJ_FR2, &ix->mobjv); |
938 | copyTV(J->L, tv+1, &ix->valv); | 1183 | copyTV(J->L, tv+1, &ix->valv); |
939 | copyTV(J->L, tv+2, &ix->keyv); | 1184 | copyTV(J->L, tv+2, &ix->keyv); |
940 | lj_record_call(J, func, 2); | 1185 | lj_record_call(J, func, 2); |
@@ -1030,7 +1275,7 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm) | |||
1030 | ix->tab = ix->val; | 1275 | ix->tab = ix->val; |
1031 | copyTV(J->L, &ix->tabv, &ix->valv); | 1276 | copyTV(J->L, &ix->tabv, &ix->valv); |
1032 | } else { | 1277 | } else { |
1033 | lua_assert(tref_iscdata(ix->key)); | 1278 | lj_assertJ(tref_iscdata(ix->key), "cdata expected"); |
1034 | ix->tab = ix->key; | 1279 | ix->tab = ix->key; |
1035 | copyTV(J->L, &ix->tabv, &ix->keyv); | 1280 | copyTV(J->L, &ix->tabv, &ix->keyv); |
1036 | } | 1281 | } |
@@ -1041,6 +1286,72 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm) | |||
1041 | 1286 | ||
1042 | /* -- Indexed access ------------------------------------------------------ */ | 1287 | /* -- Indexed access ------------------------------------------------------ */ |
1043 | 1288 | ||
1289 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
1290 | /* Bump table allocations in bytecode when they grow during recording. */ | ||
1291 | static void rec_idx_bump(jit_State *J, RecordIndex *ix) | ||
1292 | { | ||
1293 | RBCHashEntry *rbc = &J->rbchash[(ix->tab & (RBCHASH_SLOTS-1))]; | ||
1294 | if (tref_ref(ix->tab) == rbc->ref) { | ||
1295 | const BCIns *pc = mref(rbc->pc, const BCIns); | ||
1296 | GCtab *tb = tabV(&ix->tabv); | ||
1297 | uint32_t nhbits; | ||
1298 | IRIns *ir; | ||
1299 | if (!tvisnil(&ix->keyv)) | ||
1300 | (void)lj_tab_set(J->L, tb, &ix->keyv); /* Grow table right now. */ | ||
1301 | nhbits = tb->hmask > 0 ? lj_fls(tb->hmask)+1 : 0; | ||
1302 | ir = IR(tref_ref(ix->tab)); | ||
1303 | if (ir->o == IR_TNEW) { | ||
1304 | uint32_t ah = bc_d(*pc); | ||
1305 | uint32_t asize = ah & 0x7ff, hbits = ah >> 11; | ||
1306 | if (nhbits > hbits) hbits = nhbits; | ||
1307 | if (tb->asize > asize) { | ||
1308 | asize = tb->asize <= 0x7ff ? tb->asize : 0x7ff; | ||
1309 | } | ||
1310 | if ((asize | (hbits<<11)) != ah) { /* Has the size changed? */ | ||
1311 | /* Patch bytecode, but continue recording (for more patching). */ | ||
1312 | setbc_d(pc, (asize | (hbits<<11))); | ||
1313 | /* Patching TNEW operands is only safe if the trace is aborted. */ | ||
1314 | ir->op1 = asize; ir->op2 = hbits; | ||
1315 | J->retryrec = 1; /* Abort the trace at the end of recording. */ | ||
1316 | } | ||
1317 | } else if (ir->o == IR_TDUP) { | ||
1318 | GCtab *tpl = gco2tab(proto_kgc(&gcref(rbc->pt)->pt, ~(ptrdiff_t)bc_d(*pc))); | ||
1319 | /* Grow template table, but preserve keys with nil values. */ | ||
1320 | if ((tb->asize > tpl->asize && (1u << nhbits)-1 == tpl->hmask) || | ||
1321 | (tb->asize == tpl->asize && (1u << nhbits)-1 > tpl->hmask)) { | ||
1322 | Node *node = noderef(tpl->node); | ||
1323 | uint32_t i, hmask = tpl->hmask, asize; | ||
1324 | TValue *array; | ||
1325 | for (i = 0; i <= hmask; i++) { | ||
1326 | if (!tvisnil(&node[i].key) && tvisnil(&node[i].val)) | ||
1327 | settabV(J->L, &node[i].val, tpl); | ||
1328 | } | ||
1329 | if (!tvisnil(&ix->keyv) && tref_isk(ix->key)) { | ||
1330 | TValue *o = lj_tab_set(J->L, tpl, &ix->keyv); | ||
1331 | if (tvisnil(o)) settabV(J->L, o, tpl); | ||
1332 | } | ||
1333 | lj_tab_resize(J->L, tpl, tb->asize, nhbits); | ||
1334 | node = noderef(tpl->node); | ||
1335 | hmask = tpl->hmask; | ||
1336 | for (i = 0; i <= hmask; i++) { | ||
1337 | /* This is safe, since template tables only hold immutable values. */ | ||
1338 | if (tvistab(&node[i].val)) | ||
1339 | setnilV(&node[i].val); | ||
1340 | } | ||
1341 | /* The shape of the table may have changed. Clean up array part, too. */ | ||
1342 | asize = tpl->asize; | ||
1343 | array = tvref(tpl->array); | ||
1344 | for (i = 0; i < asize; i++) { | ||
1345 | if (tvistab(&array[i])) | ||
1346 | setnilV(&array[i]); | ||
1347 | } | ||
1348 | J->retryrec = 1; /* Abort the trace at the end of recording. */ | ||
1349 | } | ||
1350 | } | ||
1351 | } | ||
1352 | } | ||
1353 | #endif | ||
1354 | |||
1044 | /* Record bounds-check. */ | 1355 | /* Record bounds-check. */ |
1045 | static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) | 1356 | static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) |
1046 | { | 1357 | { |
@@ -1061,7 +1372,8 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) | |||
1061 | /* Got scalar evolution analysis results for this reference? */ | 1372 | /* Got scalar evolution analysis results for this reference? */ |
1062 | if (ref == J->scev.idx) { | 1373 | if (ref == J->scev.idx) { |
1063 | int32_t stop; | 1374 | int32_t stop; |
1064 | lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); | 1375 | lj_assertJ(irt_isint(J->scev.t) && ir->o == IR_SLOAD, |
1376 | "only int SCEV supported"); | ||
1065 | stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]); | 1377 | stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]); |
1066 | /* Runtime value for stop of loop is within bounds? */ | 1378 | /* Runtime value for stop of loop is within bounds? */ |
1067 | if ((uint64_t)stop + ofs < (uint64_t)asize) { | 1379 | if ((uint64_t)stop + ofs < (uint64_t)asize) { |
@@ -1080,11 +1392,14 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) | |||
1080 | } | 1392 | } |
1081 | 1393 | ||
1082 | /* Record indexed key lookup. */ | 1394 | /* Record indexed key lookup. */ |
1083 | static TRef rec_idx_key(jit_State *J, RecordIndex *ix) | 1395 | static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref, |
1396 | IRType1 *rbguard) | ||
1084 | { | 1397 | { |
1085 | TRef key; | 1398 | TRef key; |
1086 | GCtab *t = tabV(&ix->tabv); | 1399 | GCtab *t = tabV(&ix->tabv); |
1087 | ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */ | 1400 | ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */ |
1401 | *rbref = 0; | ||
1402 | rbguard->irt = 0; | ||
1088 | 1403 | ||
1089 | /* Integer keys are looked up in the array part first. */ | 1404 | /* Integer keys are looked up in the array part first. */ |
1090 | key = ix->key; | 1405 | key = ix->key; |
@@ -1098,8 +1413,8 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix) | |||
1098 | if ((MSize)k < t->asize) { /* Currently an array key? */ | 1413 | if ((MSize)k < t->asize) { /* Currently an array key? */ |
1099 | TRef arrayref; | 1414 | TRef arrayref; |
1100 | rec_idx_abc(J, asizeref, ikey, t->asize); | 1415 | rec_idx_abc(J, asizeref, ikey, t->asize); |
1101 | arrayref = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_ARRAY); | 1416 | arrayref = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_ARRAY); |
1102 | return emitir(IRT(IR_AREF, IRT_P32), arrayref, ikey); | 1417 | return emitir(IRT(IR_AREF, IRT_PGC), arrayref, ikey); |
1103 | } else { /* Currently not in array (may be an array extension)? */ | 1418 | } else { /* Currently not in array (may be an array extension)? */ |
1104 | emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */ | 1419 | emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */ |
1105 | if (k == 0 && tref_isk(key)) | 1420 | if (k == 0 && tref_isk(key)) |
@@ -1134,16 +1449,18 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix) | |||
1134 | MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val); | 1449 | MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val); |
1135 | if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) && | 1450 | if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) && |
1136 | hslot <= 65535*(MSize)sizeof(Node)) { | 1451 | hslot <= 65535*(MSize)sizeof(Node)) { |
1137 | TRef node, kslot; | 1452 | TRef node, kslot, hm; |
1138 | TRef hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); | 1453 | *rbref = J->cur.nins; /* Mark possible rollback point. */ |
1454 | *rbguard = J->guardemit; | ||
1455 | hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); | ||
1139 | emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); | 1456 | emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); |
1140 | node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE); | 1457 | node = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_NODE); |
1141 | kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); | 1458 | kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); |
1142 | return emitir(IRTG(IR_HREFK, IRT_P32), node, kslot); | 1459 | return emitir(IRTG(IR_HREFK, IRT_PGC), node, kslot); |
1143 | } | 1460 | } |
1144 | } | 1461 | } |
1145 | /* Fall back to a regular hash lookup. */ | 1462 | /* Fall back to a regular hash lookup. */ |
1146 | return emitir(IRT(IR_HREF, IRT_P32), ix->tab, key); | 1463 | return emitir(IRT(IR_HREF, IRT_PGC), ix->tab, key); |
1147 | } | 1464 | } |
1148 | 1465 | ||
1149 | /* Determine whether a key is NOT one of the fast metamethod names. */ | 1466 | /* Determine whether a key is NOT one of the fast metamethod names. */ |
@@ -1168,20 +1485,22 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1168 | { | 1485 | { |
1169 | TRef xref; | 1486 | TRef xref; |
1170 | IROp xrefop, loadop; | 1487 | IROp xrefop, loadop; |
1488 | IRRef rbref; | ||
1489 | IRType1 rbguard; | ||
1171 | cTValue *oldv; | 1490 | cTValue *oldv; |
1172 | 1491 | ||
1173 | while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ | 1492 | while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ |
1174 | /* Never call raw lj_record_idx() on non-table. */ | 1493 | /* Never call raw lj_record_idx() on non-table. */ |
1175 | lua_assert(ix->idxchain != 0); | 1494 | lj_assertJ(ix->idxchain != 0, "bad usage"); |
1176 | if (!lj_record_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index)) | 1495 | if (!lj_record_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index)) |
1177 | lj_trace_err(J, LJ_TRERR_NOMM); | 1496 | lj_trace_err(J, LJ_TRERR_NOMM); |
1178 | handlemm: | 1497 | handlemm: |
1179 | if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ | 1498 | if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ |
1180 | BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); | 1499 | BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); |
1181 | TRef *base = J->base + func; | 1500 | TRef *base = J->base + func + LJ_FR2; |
1182 | TValue *tv = J->L->base + func; | 1501 | TValue *tv = J->L->base + func + LJ_FR2; |
1183 | base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; | 1502 | base[-LJ_FR2] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; |
1184 | setfuncV(J->L, tv+0, funcV(&ix->mobjv)); | 1503 | setfuncV(J->L, tv-LJ_FR2, funcV(&ix->mobjv)); |
1185 | copyTV(J->L, tv+1, &ix->tabv); | 1504 | copyTV(J->L, tv+1, &ix->tabv); |
1186 | copyTV(J->L, tv+2, &ix->keyv); | 1505 | copyTV(J->L, tv+2, &ix->keyv); |
1187 | if (ix->val) { | 1506 | if (ix->val) { |
@@ -1194,6 +1513,16 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1194 | return 0; /* No result yet. */ | 1513 | return 0; /* No result yet. */ |
1195 | } | 1514 | } |
1196 | } | 1515 | } |
1516 | #if LJ_HASBUFFER | ||
1517 | /* The index table of buffer objects is treated as immutable. */ | ||
1518 | if (ix->mt == TREF_NIL && !ix->val && | ||
1519 | tref_isudata(ix->tab) && udataV(&ix->tabv)->udtype == UDTYPE_BUFFER && | ||
1520 | tref_istab(ix->mobj) && tref_isstr(ix->key) && tref_isk(ix->key)) { | ||
1521 | cTValue *val = lj_tab_getstr(tabV(&ix->mobjv), strV(&ix->keyv)); | ||
1522 | TRef tr = lj_record_constify(J, val); | ||
1523 | if (tr) return tr; /* Specialize to the value, i.e. a method. */ | ||
1524 | } | ||
1525 | #endif | ||
1197 | /* Otherwise retry lookup with metaobject. */ | 1526 | /* Otherwise retry lookup with metaobject. */ |
1198 | ix->tab = ix->mobj; | 1527 | ix->tab = ix->mobj; |
1199 | copyTV(J->L, &ix->tabv, &ix->mobjv); | 1528 | copyTV(J->L, &ix->tabv, &ix->mobjv); |
@@ -1213,7 +1542,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1213 | } | 1542 | } |
1214 | 1543 | ||
1215 | /* Record the key lookup. */ | 1544 | /* Record the key lookup. */ |
1216 | xref = rec_idx_key(J, ix); | 1545 | xref = rec_idx_key(J, ix, &rbref, &rbguard); |
1217 | xrefop = IR(tref_ref(xref))->o; | 1546 | xrefop = IR(tref_ref(xref))->o; |
1218 | loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD; | 1547 | loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD; |
1219 | /* The lj_meta_tset() inconsistency is gone, but better play safe. */ | 1548 | /* The lj_meta_tset() inconsistency is gone, but better play safe. */ |
@@ -1223,11 +1552,15 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1223 | IRType t = itype2irt(oldv); | 1552 | IRType t = itype2irt(oldv); |
1224 | TRef res; | 1553 | TRef res; |
1225 | if (oldv == niltvg(J2G(J))) { | 1554 | if (oldv == niltvg(J2G(J))) { |
1226 | emitir(IRTG(IR_EQ, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); | 1555 | emitir(IRTG(IR_EQ, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); |
1227 | res = TREF_NIL; | 1556 | res = TREF_NIL; |
1228 | } else { | 1557 | } else { |
1229 | res = emitir(IRTG(loadop, t), xref, 0); | 1558 | res = emitir(IRTG(loadop, t), xref, 0); |
1230 | } | 1559 | } |
1560 | if (tref_ref(res) < rbref) { /* HREFK + load forwarded? */ | ||
1561 | lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */ | ||
1562 | J->guardemit = rbguard; | ||
1563 | } | ||
1231 | if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index)) | 1564 | if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index)) |
1232 | goto handlemm; | 1565 | goto handlemm; |
1233 | if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */ | 1566 | if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */ |
@@ -1235,6 +1568,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1235 | } else { /* Indexed store. */ | 1568 | } else { /* Indexed store. */ |
1236 | GCtab *mt = tabref(tabV(&ix->tabv)->metatable); | 1569 | GCtab *mt = tabref(tabV(&ix->tabv)->metatable); |
1237 | int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val); | 1570 | int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val); |
1571 | if (tref_ref(xref) < rbref) { /* HREFK forwarded? */ | ||
1572 | lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */ | ||
1573 | J->guardemit = rbguard; | ||
1574 | } | ||
1238 | if (tvisnil(oldv)) { /* Previous value was nil? */ | 1575 | if (tvisnil(oldv)) { /* Previous value was nil? */ |
1239 | /* Need to duplicate the hasmm check for the early guards. */ | 1576 | /* Need to duplicate the hasmm check for the early guards. */ |
1240 | int hasmm = 0; | 1577 | int hasmm = 0; |
@@ -1245,24 +1582,28 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1245 | if (hasmm) | 1582 | if (hasmm) |
1246 | emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */ | 1583 | emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */ |
1247 | else if (xrefop == IR_HREF) | 1584 | else if (xrefop == IR_HREF) |
1248 | emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_P32), | 1585 | emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_PGC), |
1249 | xref, lj_ir_kkptr(J, niltvg(J2G(J)))); | 1586 | xref, lj_ir_kkptr(J, niltvg(J2G(J)))); |
1250 | if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) { | 1587 | if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) { |
1251 | lua_assert(hasmm); | 1588 | lj_assertJ(hasmm, "inconsistent metamethod handling"); |
1252 | goto handlemm; | 1589 | goto handlemm; |
1253 | } | 1590 | } |
1254 | lua_assert(!hasmm); | 1591 | lj_assertJ(!hasmm, "inconsistent metamethod handling"); |
1255 | if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */ | 1592 | if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */ |
1256 | TRef key = ix->key; | 1593 | TRef key = ix->key; |
1257 | if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ | 1594 | if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ |
1258 | key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); | 1595 | key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); |
1259 | xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key); | 1596 | xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key); |
1260 | keybarrier = 0; /* NEWREF already takes care of the key barrier. */ | 1597 | keybarrier = 0; /* NEWREF already takes care of the key barrier. */ |
1598 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
1599 | if ((J->flags & JIT_F_OPT_SINK)) /* Avoid a separate flag. */ | ||
1600 | rec_idx_bump(J, ix); | ||
1601 | #endif | ||
1261 | } | 1602 | } |
1262 | } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) { | 1603 | } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) { |
1263 | /* Cannot derive that the previous value was non-nil, must do checks. */ | 1604 | /* Cannot derive that the previous value was non-nil, must do checks. */ |
1264 | if (xrefop == IR_HREF) /* Guard against store to niltv. */ | 1605 | if (xrefop == IR_HREF) /* Guard against store to niltv. */ |
1265 | emitir(IRTG(IR_NE, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); | 1606 | emitir(IRTG(IR_NE, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); |
1266 | if (ix->idxchain) { /* Metamethod lookup required? */ | 1607 | if (ix->idxchain) { /* Metamethod lookup required? */ |
1267 | /* A check for NULL metatable is cheaper (hoistable) than a load. */ | 1608 | /* A check for NULL metatable is cheaper (hoistable) than a load. */ |
1268 | if (!mt) { | 1609 | if (!mt) { |
@@ -1284,7 +1625,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1284 | emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0); | 1625 | emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0); |
1285 | /* Invalidate neg. metamethod cache for stores with certain string keys. */ | 1626 | /* Invalidate neg. metamethod cache for stores with certain string keys. */ |
1286 | if (!nommstr(J, ix->key)) { | 1627 | if (!nommstr(J, ix->key)) { |
1287 | TRef fref = emitir(IRT(IR_FREF, IRT_P32), ix->tab, IRFL_TAB_NOMM); | 1628 | TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ix->tab, IRFL_TAB_NOMM); |
1288 | emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0)); | 1629 | emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0)); |
1289 | } | 1630 | } |
1290 | J->needsnap = 1; | 1631 | J->needsnap = 1; |
@@ -1292,6 +1633,72 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1292 | } | 1633 | } |
1293 | } | 1634 | } |
1294 | 1635 | ||
1636 | /* Determine result type of table traversal. */ | ||
1637 | static IRType rec_next_types(GCtab *t, uint32_t idx) | ||
1638 | { | ||
1639 | for (; idx < t->asize; idx++) { | ||
1640 | cTValue *a = arrayslot(t, idx); | ||
1641 | if (LJ_LIKELY(!tvisnil(a))) | ||
1642 | return (LJ_DUALNUM ? IRT_INT : IRT_NUM) + (itype2irt(a) << 8); | ||
1643 | } | ||
1644 | idx -= t->asize; | ||
1645 | for (; idx <= t->hmask; idx++) { | ||
1646 | Node *n = &noderef(t->node)[idx]; | ||
1647 | if (!tvisnil(&n->val)) | ||
1648 | return itype2irt(&n->key) + (itype2irt(&n->val) << 8); | ||
1649 | } | ||
1650 | return IRT_NIL + (IRT_NIL << 8); | ||
1651 | } | ||
1652 | |||
1653 | /* Record a table traversal step aka next(). */ | ||
1654 | int lj_record_next(jit_State *J, RecordIndex *ix) | ||
1655 | { | ||
1656 | IRType t, tkey, tval; | ||
1657 | TRef trvk; | ||
1658 | t = rec_next_types(tabV(&ix->tabv), ix->keyv.u32.lo); | ||
1659 | tkey = (t & 0xff); tval = (t >> 8); | ||
1660 | trvk = lj_ir_call(J, IRCALL_lj_vm_next, ix->tab, ix->key); | ||
1661 | if (ix->mobj || tkey == IRT_NIL) { | ||
1662 | TRef idx = emitir(IRTI(IR_HIOP), trvk, trvk); | ||
1663 | /* Always check for invalid key from next() for nil result. */ | ||
1664 | if (!ix->mobj) emitir(IRTGI(IR_NE), idx, lj_ir_kint(J, -1)); | ||
1665 | ix->mobj = idx; | ||
1666 | } | ||
1667 | ix->key = lj_record_vload(J, trvk, 1, tkey); | ||
1668 | if (tkey == IRT_NIL || ix->idxchain) { /* Omit value type check. */ | ||
1669 | ix->val = TREF_NIL; | ||
1670 | return 1; | ||
1671 | } else { /* Need value. */ | ||
1672 | ix->val = lj_record_vload(J, trvk, 0, tval); | ||
1673 | return 2; | ||
1674 | } | ||
1675 | } | ||
1676 | |||
1677 | static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i) | ||
1678 | { | ||
1679 | RecordIndex ix; | ||
1680 | cTValue *basev = J->L->base; | ||
1681 | GCtab *t = tabV(&basev[ra-1]); | ||
1682 | settabV(J->L, &ix.tabv, t); | ||
1683 | ix.tab = getslot(J, ra-1); | ||
1684 | ix.idxchain = 0; | ||
1685 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
1686 | if ((J->flags & JIT_F_OPT_SINK)) { | ||
1687 | if (t->asize < i+rn-ra) | ||
1688 | lj_tab_reasize(J->L, t, i+rn-ra); | ||
1689 | setnilV(&ix.keyv); | ||
1690 | rec_idx_bump(J, &ix); | ||
1691 | } | ||
1692 | #endif | ||
1693 | for (; ra < rn; i++, ra++) { | ||
1694 | setintV(&ix.keyv, i); | ||
1695 | ix.key = lj_ir_kint(J, i); | ||
1696 | copyTV(J->L, &ix.valv, &basev[ra]); | ||
1697 | ix.val = getslot(J, ra); | ||
1698 | lj_record_idx(J, &ix); | ||
1699 | } | ||
1700 | } | ||
1701 | |||
1295 | /* -- Upvalue access ------------------------------------------------------ */ | 1702 | /* -- Upvalue access ------------------------------------------------------ */ |
1296 | 1703 | ||
1297 | /* Check whether upvalue is immutable and ok to constify. */ | 1704 | /* Check whether upvalue is immutable and ok to constify. */ |
@@ -1328,13 +1735,17 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) | |||
1328 | int needbarrier = 0; | 1735 | int needbarrier = 0; |
1329 | if (rec_upvalue_constify(J, uvp)) { /* Try to constify immutable upvalue. */ | 1736 | if (rec_upvalue_constify(J, uvp)) { /* Try to constify immutable upvalue. */ |
1330 | TRef tr, kfunc; | 1737 | TRef tr, kfunc; |
1331 | lua_assert(val == 0); | 1738 | lj_assertJ(val == 0, "bad usage"); |
1332 | if (!tref_isk(fn)) { /* Late specialization of current function. */ | 1739 | if (!tref_isk(fn)) { /* Late specialization of current function. */ |
1333 | if (J->pt->flags >= PROTO_CLC_POLY) | 1740 | if (J->pt->flags >= PROTO_CLC_POLY) |
1334 | goto noconstify; | 1741 | goto noconstify; |
1335 | kfunc = lj_ir_kfunc(J, J->fn); | 1742 | kfunc = lj_ir_kfunc(J, J->fn); |
1336 | emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); | 1743 | emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); |
1337 | J->base[-1] = TREF_FRAME | kfunc; | 1744 | #if LJ_FR2 |
1745 | J->base[-2] = kfunc; | ||
1746 | #else | ||
1747 | J->base[-1] = kfunc | TREF_FRAME; | ||
1748 | #endif | ||
1338 | fn = kfunc; | 1749 | fn = kfunc; |
1339 | } | 1750 | } |
1340 | tr = lj_record_constify(J, uvval(uvp)); | 1751 | tr = lj_record_constify(J, uvval(uvp)); |
@@ -1345,16 +1756,16 @@ noconstify: | |||
1345 | /* Note: this effectively limits LJ_MAX_UPVAL to 127. */ | 1756 | /* Note: this effectively limits LJ_MAX_UPVAL to 127. */ |
1346 | uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); | 1757 | uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); |
1347 | if (!uvp->closed) { | 1758 | if (!uvp->closed) { |
1348 | uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_P32), fn, uv)); | 1759 | uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv)); |
1349 | /* In current stack? */ | 1760 | /* In current stack? */ |
1350 | if (uvval(uvp) >= tvref(J->L->stack) && | 1761 | if (uvval(uvp) >= tvref(J->L->stack) && |
1351 | uvval(uvp) < tvref(J->L->maxstack)) { | 1762 | uvval(uvp) < tvref(J->L->maxstack)) { |
1352 | int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); | 1763 | int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); |
1353 | if (slot >= 0) { /* Aliases an SSA slot? */ | 1764 | if (slot >= 0) { /* Aliases an SSA slot? */ |
1354 | emitir(IRTG(IR_EQ, IRT_P32), | 1765 | emitir(IRTG(IR_EQ, IRT_PGC), |
1355 | REF_BASE, | 1766 | REF_BASE, |
1356 | emitir(IRT(IR_ADD, IRT_P32), uref, | 1767 | emitir(IRT(IR_ADD, IRT_PGC), uref, |
1357 | lj_ir_kint(J, (slot - 1) * -8))); | 1768 | lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8))); |
1358 | slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ | 1769 | slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ |
1359 | if (val == 0) { | 1770 | if (val == 0) { |
1360 | return getslot(J, slot); | 1771 | return getslot(J, slot); |
@@ -1365,12 +1776,12 @@ noconstify: | |||
1365 | } | 1776 | } |
1366 | } | 1777 | } |
1367 | } | 1778 | } |
1368 | emitir(IRTG(IR_UGT, IRT_P32), | 1779 | emitir(IRTG(IR_UGT, IRT_PGC), |
1369 | emitir(IRT(IR_SUB, IRT_P32), uref, REF_BASE), | 1780 | emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE), |
1370 | lj_ir_kint(J, (J->baseslot + J->maxslot) * 8)); | 1781 | lj_ir_kint(J, (J->baseslot + J->maxslot) * 8)); |
1371 | } else { | 1782 | } else { |
1372 | needbarrier = 1; | 1783 | needbarrier = 1; |
1373 | uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_P32), fn, uv)); | 1784 | uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv)); |
1374 | } | 1785 | } |
1375 | if (val == 0) { /* Upvalue load */ | 1786 | if (val == 0) { /* Upvalue load */ |
1376 | IRType t = itype2irt(uvval(uvp)); | 1787 | IRType t = itype2irt(uvval(uvp)); |
@@ -1409,16 +1820,16 @@ static void check_call_unroll(jit_State *J, TraceNo lnk) | |||
1409 | if (count + J->tailcalled > J->param[JIT_P_recunroll]) { | 1820 | if (count + J->tailcalled > J->param[JIT_P_recunroll]) { |
1410 | J->pc++; | 1821 | J->pc++; |
1411 | if (J->framedepth + J->retdepth == 0) | 1822 | if (J->framedepth + J->retdepth == 0) |
1412 | rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-recursion. */ | 1823 | lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-rec. */ |
1413 | else | 1824 | else |
1414 | rec_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */ | 1825 | lj_record_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */ |
1415 | } | 1826 | } |
1416 | } else { | 1827 | } else { |
1417 | if (count > J->param[JIT_P_callunroll]) { | 1828 | if (count > J->param[JIT_P_callunroll]) { |
1418 | if (lnk) { /* Possible tail- or up-recursion. */ | 1829 | if (lnk) { /* Possible tail- or up-recursion. */ |
1419 | lj_trace_flush(J, lnk); /* Flush trace that only returns. */ | 1830 | lj_trace_flush(J, lnk); /* Flush trace that only returns. */ |
1420 | /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */ | 1831 | /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */ |
1421 | hotcount_set(J2GG(J), J->pc+1, LJ_PRNG_BITS(J, 4)); | 1832 | hotcount_set(J2GG(J), J->pc+1, lj_prng_u64(&J2G(J)->prng) & 15u); |
1422 | } | 1833 | } |
1423 | lj_trace_err(J, LJ_TRERR_CUNROLL); | 1834 | lj_trace_err(J, LJ_TRERR_CUNROLL); |
1424 | } | 1835 | } |
@@ -1445,11 +1856,14 @@ static void rec_func_setup(jit_State *J) | |||
1445 | static void rec_func_vararg(jit_State *J) | 1856 | static void rec_func_vararg(jit_State *J) |
1446 | { | 1857 | { |
1447 | GCproto *pt = J->pt; | 1858 | GCproto *pt = J->pt; |
1448 | BCReg s, fixargs, vframe = J->maxslot+1; | 1859 | BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2; |
1449 | lua_assert((pt->flags & PROTO_VARARG)); | 1860 | lj_assertJ((pt->flags & PROTO_VARARG), "FUNCV in non-vararg function"); |
1450 | if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) | 1861 | if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) |
1451 | lj_trace_err(J, LJ_TRERR_STACKOV); | 1862 | lj_trace_err(J, LJ_TRERR_STACKOV); |
1452 | J->base[vframe-1] = J->base[-1]; /* Copy function up. */ | 1863 | J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */ |
1864 | #if LJ_FR2 | ||
1865 | J->base[vframe-1] = TREF_FRAME; | ||
1866 | #endif | ||
1453 | /* Copy fixarg slots up and set their original slots to nil. */ | 1867 | /* Copy fixarg slots up and set their original slots to nil. */ |
1454 | fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; | 1868 | fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; |
1455 | for (s = 0; s < fixargs; s++) { | 1869 | for (s = 0; s < fixargs; s++) { |
@@ -1485,9 +1899,9 @@ static void rec_func_jit(jit_State *J, TraceNo lnk) | |||
1485 | } | 1899 | } |
1486 | J->instunroll = 0; /* Cannot continue across a compiled function. */ | 1900 | J->instunroll = 0; /* Cannot continue across a compiled function. */ |
1487 | if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) | 1901 | if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) |
1488 | rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-recursion. */ | 1902 | lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-rec. */ |
1489 | else | 1903 | else |
1490 | rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */ | 1904 | lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */ |
1491 | } | 1905 | } |
1492 | 1906 | ||
1493 | /* -- Vararg handling ----------------------------------------------------- */ | 1907 | /* -- Vararg handling ----------------------------------------------------- */ |
@@ -1511,8 +1925,10 @@ static int select_detect(jit_State *J) | |||
1511 | static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) | 1925 | static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) |
1512 | { | 1926 | { |
1513 | int32_t numparams = J->pt->numparams; | 1927 | int32_t numparams = J->pt->numparams; |
1514 | ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1; | 1928 | ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2; |
1515 | lua_assert(frame_isvarg(J->L->base-1)); | 1929 | lj_assertJ(frame_isvarg(J->L->base-1), "VARG in non-vararg frame"); |
1930 | if (LJ_FR2 && dst > J->maxslot) | ||
1931 | J->base[dst-1] = 0; /* Prevent resurrection of unrelated slot. */ | ||
1516 | if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ | 1932 | if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ |
1517 | ptrdiff_t i; | 1933 | ptrdiff_t i; |
1518 | if (nvararg < 0) nvararg = 0; | 1934 | if (nvararg < 0) nvararg = 0; |
@@ -1523,10 +1939,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) | |||
1523 | J->maxslot = dst + (BCReg)nresults; | 1939 | J->maxslot = dst + (BCReg)nresults; |
1524 | } | 1940 | } |
1525 | for (i = 0; i < nresults; i++) | 1941 | for (i = 0; i < nresults; i++) |
1526 | J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL; | 1942 | J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL; |
1527 | } else { /* Unknown number of varargs passed to trace. */ | 1943 | } else { /* Unknown number of varargs passed to trace. */ |
1528 | TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME); | 1944 | TRef fr = emitir(IRTI(IR_SLOAD), LJ_FR2, IRSLOAD_READONLY|IRSLOAD_FRAME); |
1529 | int32_t frofs = 8*(1+numparams)+FRAME_VARG; | 1945 | int32_t frofs = 8*(1+LJ_FR2+numparams)+FRAME_VARG; |
1530 | if (nresults >= 0) { /* Known fixed number of results. */ | 1946 | if (nresults >= 0) { /* Known fixed number of results. */ |
1531 | ptrdiff_t i; | 1947 | ptrdiff_t i; |
1532 | if (nvararg > 0) { | 1948 | if (nvararg > 0) { |
@@ -1535,16 +1951,13 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) | |||
1535 | if (nvararg >= nresults) | 1951 | if (nvararg >= nresults) |
1536 | emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults)); | 1952 | emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults)); |
1537 | else | 1953 | else |
1538 | emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, frame_ftsz(J->L->base-1))); | 1954 | emitir(IRTGI(IR_EQ), fr, |
1539 | vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); | 1955 | lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1))); |
1540 | vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); | 1956 | vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); |
1957 | vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8)); | ||
1541 | for (i = 0; i < nload; i++) { | 1958 | for (i = 0; i < nload; i++) { |
1542 | IRType t = itype2irt(&J->L->base[i-1-nvararg]); | 1959 | IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]); |
1543 | TRef aref = emitir(IRT(IR_AREF, IRT_P32), | 1960 | J->base[dst+i] = lj_record_vload(J, vbase, i, t); |
1544 | vbase, lj_ir_kint(J, (int32_t)i)); | ||
1545 | TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); | ||
1546 | if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ | ||
1547 | J->base[dst+i] = tr; | ||
1548 | } | 1961 | } |
1549 | } else { | 1962 | } else { |
1550 | emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs)); | 1963 | emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs)); |
@@ -1586,15 +1999,15 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) | |||
1586 | } | 1999 | } |
1587 | if (idx != 0 && idx <= nvararg) { | 2000 | if (idx != 0 && idx <= nvararg) { |
1588 | IRType t; | 2001 | IRType t; |
1589 | TRef aref, vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); | 2002 | TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); |
1590 | vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); | 2003 | vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, |
1591 | t = itype2irt(&J->L->base[idx-2-nvararg]); | 2004 | lj_ir_kint(J, frofs-(8<<LJ_FR2))); |
1592 | aref = emitir(IRT(IR_AREF, IRT_P32), vbase, tridx); | 2005 | t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]); |
1593 | tr = emitir(IRTG(IR_VLOAD, t), aref, 0); | 2006 | aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx); |
1594 | if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ | 2007 | tr = lj_record_vload(J, aref, 0, t); |
1595 | } | 2008 | } |
1596 | J->base[dst-2] = tr; | 2009 | J->base[dst-2-LJ_FR2] = tr; |
1597 | J->maxslot = dst-1; | 2010 | J->maxslot = dst-1-LJ_FR2; |
1598 | J->bcskip = 2; /* Skip CALLM + select. */ | 2011 | J->bcskip = 2; /* Skip CALLM + select. */ |
1599 | } else { | 2012 | } else { |
1600 | nyivarg: | 2013 | nyivarg: |
@@ -1612,8 +2025,63 @@ static TRef rec_tnew(jit_State *J, uint32_t ah) | |||
1612 | { | 2025 | { |
1613 | uint32_t asize = ah & 0x7ff; | 2026 | uint32_t asize = ah & 0x7ff; |
1614 | uint32_t hbits = ah >> 11; | 2027 | uint32_t hbits = ah >> 11; |
2028 | TRef tr; | ||
1615 | if (asize == 0x7ff) asize = 0x801; | 2029 | if (asize == 0x7ff) asize = 0x801; |
1616 | return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits); | 2030 | tr = emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits); |
2031 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
2032 | J->rbchash[(tr & (RBCHASH_SLOTS-1))].ref = tref_ref(tr); | ||
2033 | setmref(J->rbchash[(tr & (RBCHASH_SLOTS-1))].pc, J->pc); | ||
2034 | setgcref(J->rbchash[(tr & (RBCHASH_SLOTS-1))].pt, obj2gco(J->pt)); | ||
2035 | #endif | ||
2036 | return tr; | ||
2037 | } | ||
2038 | |||
2039 | /* -- Concatenation ------------------------------------------------------- */ | ||
2040 | |||
2041 | static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot) | ||
2042 | { | ||
2043 | TRef *top = &J->base[topslot]; | ||
2044 | TValue savetv[5]; | ||
2045 | BCReg s; | ||
2046 | RecordIndex ix; | ||
2047 | lj_assertJ(baseslot < topslot, "bad CAT arg"); | ||
2048 | for (s = baseslot; s <= topslot; s++) | ||
2049 | (void)getslot(J, s); /* Ensure all arguments have a reference. */ | ||
2050 | if (tref_isnumber_str(top[0]) && tref_isnumber_str(top[-1])) { | ||
2051 | TRef tr, hdr, *trp, *xbase, *base = &J->base[baseslot]; | ||
2052 | /* First convert numbers to strings. */ | ||
2053 | for (trp = top; trp >= base; trp--) { | ||
2054 | if (tref_isnumber(*trp)) | ||
2055 | *trp = emitir(IRT(IR_TOSTR, IRT_STR), *trp, | ||
2056 | tref_isnum(*trp) ? IRTOSTR_NUM : IRTOSTR_INT); | ||
2057 | else if (!tref_isstr(*trp)) | ||
2058 | break; | ||
2059 | } | ||
2060 | xbase = ++trp; | ||
2061 | tr = hdr = emitir(IRT(IR_BUFHDR, IRT_PGC), | ||
2062 | lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); | ||
2063 | do { | ||
2064 | tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, *trp++); | ||
2065 | } while (trp <= top); | ||
2066 | tr = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); | ||
2067 | J->maxslot = (BCReg)(xbase - J->base); | ||
2068 | if (xbase == base) return tr; /* Return simple concatenation result. */ | ||
2069 | /* Pass partial result. */ | ||
2070 | topslot = J->maxslot--; | ||
2071 | *xbase = tr; | ||
2072 | top = xbase; | ||
2073 | setstrV(J->L, &ix.keyv, &J2G(J)->strempty); /* Simulate string result. */ | ||
2074 | } else { | ||
2075 | J->maxslot = topslot-1; | ||
2076 | copyTV(J->L, &ix.keyv, &J->L->base[topslot]); | ||
2077 | } | ||
2078 | copyTV(J->L, &ix.tabv, &J->L->base[topslot-1]); | ||
2079 | ix.tab = top[-1]; | ||
2080 | ix.key = top[0]; | ||
2081 | memcpy(savetv, &J->L->base[topslot-1], sizeof(savetv)); /* Save slots. */ | ||
2082 | rec_mm_arith(J, &ix, MM_concat); /* Call __concat metamethod. */ | ||
2083 | memcpy(&J->L->base[topslot-1], savetv, sizeof(savetv)); /* Restore slots. */ | ||
2084 | return 0; /* No result yet. */ | ||
1617 | } | 2085 | } |
1618 | 2086 | ||
1619 | /* -- Record bytecode ops ------------------------------------------------- */ | 2087 | /* -- Record bytecode ops ------------------------------------------------- */ |
@@ -1634,7 +2102,15 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond) | |||
1634 | const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); | 2102 | const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); |
1635 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; | 2103 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; |
1636 | /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ | 2104 | /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ |
2105 | #if LJ_FR2 | ||
2106 | SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent]; | ||
2107 | uint64_t pcbase; | ||
2108 | memcpy(&pcbase, flink, sizeof(uint64_t)); | ||
2109 | pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8); | ||
2110 | memcpy(flink, &pcbase, sizeof(uint64_t)); | ||
2111 | #else | ||
1637 | J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); | 2112 | J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); |
2113 | #endif | ||
1638 | J->needsnap = 1; | 2114 | J->needsnap = 1; |
1639 | if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); | 2115 | if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); |
1640 | lj_snap_shrink(J); /* Shrink last snapshot if possible. */ | 2116 | lj_snap_shrink(J); /* Shrink last snapshot if possible. */ |
@@ -1654,7 +2130,7 @@ void lj_record_ins(jit_State *J) | |||
1654 | if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) { | 2130 | if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) { |
1655 | switch (J->postproc) { | 2131 | switch (J->postproc) { |
1656 | case LJ_POST_FIXCOMP: /* Fixup comparison. */ | 2132 | case LJ_POST_FIXCOMP: /* Fixup comparison. */ |
1657 | pc = frame_pc(&J2G(J)->tmptv); | 2133 | pc = (const BCIns *)(uintptr_t)J2G(J)->tmptv.u64; |
1658 | rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1))); | 2134 | rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1))); |
1659 | /* fallthrough */ | 2135 | /* fallthrough */ |
1660 | case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */ | 2136 | case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */ |
@@ -1692,7 +2168,7 @@ void lj_record_ins(jit_State *J) | |||
1692 | if (bc_op(*J->pc) >= BC__MAX) | 2168 | if (bc_op(*J->pc) >= BC__MAX) |
1693 | return; | 2169 | return; |
1694 | break; | 2170 | break; |
1695 | default: lua_assert(0); break; | 2171 | default: lj_assertJ(0, "bad post-processing mode"); break; |
1696 | } | 2172 | } |
1697 | J->postproc = LJ_POST_NONE; | 2173 | J->postproc = LJ_POST_NONE; |
1698 | } | 2174 | } |
@@ -1700,7 +2176,7 @@ void lj_record_ins(jit_State *J) | |||
1700 | /* Need snapshot before recording next bytecode (e.g. after a store). */ | 2176 | /* Need snapshot before recording next bytecode (e.g. after a store). */ |
1701 | if (J->needsnap) { | 2177 | if (J->needsnap) { |
1702 | J->needsnap = 0; | 2178 | J->needsnap = 0; |
1703 | lj_snap_purge(J); | 2179 | if (J->pt) lj_snap_purge(J); |
1704 | lj_snap_add(J); | 2180 | lj_snap_add(J); |
1705 | J->mergesnap = 1; | 2181 | J->mergesnap = 1; |
1706 | } | 2182 | } |
@@ -1722,6 +2198,10 @@ void lj_record_ins(jit_State *J) | |||
1722 | rec_check_ir(J); | 2198 | rec_check_ir(J); |
1723 | #endif | 2199 | #endif |
1724 | 2200 | ||
2201 | #if LJ_HASPROFILE | ||
2202 | rec_profile_ins(J, pc); | ||
2203 | #endif | ||
2204 | |||
1725 | /* Keep a copy of the runtime values of var/num/str operands. */ | 2205 | /* Keep a copy of the runtime values of var/num/str operands. */ |
1726 | #define rav (&ix.valv) | 2206 | #define rav (&ix.valv) |
1727 | #define rbv (&ix.tabv) | 2207 | #define rbv (&ix.tabv) |
@@ -1748,7 +2228,7 @@ void lj_record_ins(jit_State *J) | |||
1748 | switch (bcmode_c(op)) { | 2228 | switch (bcmode_c(op)) { |
1749 | case BCMvar: | 2229 | case BCMvar: |
1750 | copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; | 2230 | copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; |
1751 | case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; | 2231 | case BCMpri: setpriV(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; |
1752 | case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc); | 2232 | case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc); |
1753 | copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) : | 2233 | copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) : |
1754 | lj_ir_knumint(J, numV(tv)); } break; | 2234 | lj_ir_knumint(J, numV(tv)); } break; |
@@ -1843,6 +2323,18 @@ void lj_record_ins(jit_State *J) | |||
1843 | J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ | 2323 | J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ |
1844 | break; | 2324 | break; |
1845 | 2325 | ||
2326 | case BC_ISTYPE: case BC_ISNUM: | ||
2327 | /* These coercions need to correspond with lj_meta_istype(). */ | ||
2328 | if (LJ_DUALNUM && rc == ~LJ_TNUMX+1) | ||
2329 | ra = lj_opt_narrow_toint(J, ra); | ||
2330 | else if (rc == ~LJ_TNUMX+2) | ||
2331 | ra = lj_ir_tonum(J, ra); | ||
2332 | else if (rc == ~LJ_TSTR+1) | ||
2333 | ra = lj_ir_tostr(J, ra); | ||
2334 | /* else: type specialization suffices. */ | ||
2335 | J->base[bc_a(ins)] = ra; | ||
2336 | break; | ||
2337 | |||
1846 | /* -- Unary ops --------------------------------------------------------- */ | 2338 | /* -- Unary ops --------------------------------------------------------- */ |
1847 | 2339 | ||
1848 | case BC_NOT: | 2340 | case BC_NOT: |
@@ -1854,7 +2346,7 @@ void lj_record_ins(jit_State *J) | |||
1854 | if (tref_isstr(rc)) | 2346 | if (tref_isstr(rc)) |
1855 | rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); | 2347 | rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); |
1856 | else if (!LJ_52 && tref_istab(rc)) | 2348 | else if (!LJ_52 && tref_istab(rc)) |
1857 | rc = lj_ir_call(J, IRCALL_lj_tab_len, rc); | 2349 | rc = emitir(IRTI(IR_ALEN), rc, TREF_NIL); |
1858 | else | 2350 | else |
1859 | rc = rec_mm_len(J, rc, rcv); | 2351 | rc = rec_mm_len(J, rc, rcv); |
1860 | break; | 2352 | break; |
@@ -1906,11 +2398,23 @@ void lj_record_ins(jit_State *J) | |||
1906 | rc = rec_mm_arith(J, &ix, MM_pow); | 2398 | rc = rec_mm_arith(J, &ix, MM_pow); |
1907 | break; | 2399 | break; |
1908 | 2400 | ||
2401 | /* -- Miscellaneous ops ------------------------------------------------- */ | ||
2402 | |||
2403 | case BC_CAT: | ||
2404 | rc = rec_cat(J, rb, rc); | ||
2405 | break; | ||
2406 | |||
1909 | /* -- Constant and move ops --------------------------------------------- */ | 2407 | /* -- Constant and move ops --------------------------------------------- */ |
1910 | 2408 | ||
1911 | case BC_MOV: | 2409 | case BC_MOV: |
1912 | /* Clear gap of method call to avoid resurrecting previous refs. */ | 2410 | /* Clear gap of method call to avoid resurrecting previous refs. */ |
1913 | if (ra > J->maxslot) J->base[ra-1] = 0; | 2411 | if (ra > J->maxslot) { |
2412 | #if LJ_FR2 | ||
2413 | memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef)); | ||
2414 | #else | ||
2415 | J->base[ra-1] = 0; | ||
2416 | #endif | ||
2417 | } | ||
1914 | break; | 2418 | break; |
1915 | case BC_KSTR: case BC_KNUM: case BC_KPRI: | 2419 | case BC_KSTR: case BC_KNUM: case BC_KPRI: |
1916 | break; | 2420 | break; |
@@ -1918,6 +2422,8 @@ void lj_record_ins(jit_State *J) | |||
1918 | rc = lj_ir_kint(J, (int32_t)(int16_t)rc); | 2422 | rc = lj_ir_kint(J, (int32_t)(int16_t)rc); |
1919 | break; | 2423 | break; |
1920 | case BC_KNIL: | 2424 | case BC_KNIL: |
2425 | if (LJ_FR2 && ra > J->maxslot) | ||
2426 | J->base[ra-1] = 0; | ||
1921 | while (ra <= rc) | 2427 | while (ra <= rc) |
1922 | J->base[ra++] = TREF_NIL; | 2428 | J->base[ra++] = TREF_NIL; |
1923 | if (rc >= J->maxslot) J->maxslot = rc+1; | 2429 | if (rc >= J->maxslot) J->maxslot = rc+1; |
@@ -1954,6 +2460,14 @@ void lj_record_ins(jit_State *J) | |||
1954 | ix.idxchain = LJ_MAX_IDXCHAIN; | 2460 | ix.idxchain = LJ_MAX_IDXCHAIN; |
1955 | rc = lj_record_idx(J, &ix); | 2461 | rc = lj_record_idx(J, &ix); |
1956 | break; | 2462 | break; |
2463 | case BC_TGETR: case BC_TSETR: | ||
2464 | ix.idxchain = 0; | ||
2465 | rc = lj_record_idx(J, &ix); | ||
2466 | break; | ||
2467 | |||
2468 | case BC_TSETM: | ||
2469 | rec_tsetm(J, ra, (BCReg)(J->L->top - J->L->base), (int32_t)rcv->u32.lo); | ||
2470 | break; | ||
1957 | 2471 | ||
1958 | case BC_TNEW: | 2472 | case BC_TNEW: |
1959 | rc = rec_tnew(J, rc); | 2473 | rc = rec_tnew(J, rc); |
@@ -1961,33 +2475,38 @@ void lj_record_ins(jit_State *J) | |||
1961 | case BC_TDUP: | 2475 | case BC_TDUP: |
1962 | rc = emitir(IRTG(IR_TDUP, IRT_TAB), | 2476 | rc = emitir(IRTG(IR_TDUP, IRT_TAB), |
1963 | lj_ir_ktab(J, gco2tab(proto_kgc(J->pt, ~(ptrdiff_t)rc))), 0); | 2477 | lj_ir_ktab(J, gco2tab(proto_kgc(J->pt, ~(ptrdiff_t)rc))), 0); |
2478 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
2479 | J->rbchash[(rc & (RBCHASH_SLOTS-1))].ref = tref_ref(rc); | ||
2480 | setmref(J->rbchash[(rc & (RBCHASH_SLOTS-1))].pc, pc); | ||
2481 | setgcref(J->rbchash[(rc & (RBCHASH_SLOTS-1))].pt, obj2gco(J->pt)); | ||
2482 | #endif | ||
1964 | break; | 2483 | break; |
1965 | 2484 | ||
1966 | /* -- Calls and vararg handling ----------------------------------------- */ | 2485 | /* -- Calls and vararg handling ----------------------------------------- */ |
1967 | 2486 | ||
1968 | case BC_ITERC: | 2487 | case BC_ITERC: |
1969 | J->base[ra] = getslot(J, ra-3); | 2488 | J->base[ra] = getslot(J, ra-3); |
1970 | J->base[ra+1] = getslot(J, ra-2); | 2489 | J->base[ra+1+LJ_FR2] = getslot(J, ra-2); |
1971 | J->base[ra+2] = getslot(J, ra-1); | 2490 | J->base[ra+2+LJ_FR2] = getslot(J, ra-1); |
1972 | { /* Do the actual copy now because lj_record_call needs the values. */ | 2491 | { /* Do the actual copy now because lj_record_call needs the values. */ |
1973 | TValue *b = &J->L->base[ra]; | 2492 | TValue *b = &J->L->base[ra]; |
1974 | copyTV(J->L, b, b-3); | 2493 | copyTV(J->L, b, b-3); |
1975 | copyTV(J->L, b+1, b-2); | 2494 | copyTV(J->L, b+1+LJ_FR2, b-2); |
1976 | copyTV(J->L, b+2, b-1); | 2495 | copyTV(J->L, b+2+LJ_FR2, b-1); |
1977 | } | 2496 | } |
1978 | lj_record_call(J, ra, (ptrdiff_t)rc-1); | 2497 | lj_record_call(J, ra, (ptrdiff_t)rc-1); |
1979 | break; | 2498 | break; |
1980 | 2499 | ||
1981 | /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */ | 2500 | /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */ |
1982 | case BC_CALLM: | 2501 | case BC_CALLM: |
1983 | rc = (BCReg)(J->L->top - J->L->base) - ra; | 2502 | rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2; |
1984 | /* fallthrough */ | 2503 | /* fallthrough */ |
1985 | case BC_CALL: | 2504 | case BC_CALL: |
1986 | lj_record_call(J, ra, (ptrdiff_t)rc-1); | 2505 | lj_record_call(J, ra, (ptrdiff_t)rc-1); |
1987 | break; | 2506 | break; |
1988 | 2507 | ||
1989 | case BC_CALLMT: | 2508 | case BC_CALLMT: |
1990 | rc = (BCReg)(J->L->top - J->L->base) - ra; | 2509 | rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2; |
1991 | /* fallthrough */ | 2510 | /* fallthrough */ |
1992 | case BC_CALLT: | 2511 | case BC_CALLT: |
1993 | lj_record_tailcall(J, ra, (ptrdiff_t)rc-1); | 2512 | lj_record_tailcall(J, ra, (ptrdiff_t)rc-1); |
@@ -2004,6 +2523,9 @@ void lj_record_ins(jit_State *J) | |||
2004 | rc = (BCReg)(J->L->top - J->L->base) - ra + 1; | 2523 | rc = (BCReg)(J->L->top - J->L->base) - ra + 1; |
2005 | /* fallthrough */ | 2524 | /* fallthrough */ |
2006 | case BC_RET: case BC_RET0: case BC_RET1: | 2525 | case BC_RET: case BC_RET0: case BC_RET1: |
2526 | #if LJ_HASPROFILE | ||
2527 | rec_profile_ret(J); | ||
2528 | #endif | ||
2007 | lj_record_ret(J, ra, (ptrdiff_t)rc-1); | 2529 | lj_record_ret(J, ra, (ptrdiff_t)rc-1); |
2008 | break; | 2530 | break; |
2009 | 2531 | ||
@@ -2014,9 +2536,10 @@ void lj_record_ins(jit_State *J) | |||
2014 | J->loopref = J->cur.nins; | 2536 | J->loopref = J->cur.nins; |
2015 | break; | 2537 | break; |
2016 | case BC_JFORI: | 2538 | case BC_JFORI: |
2017 | lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); | 2539 | lj_assertJ(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL, |
2540 | "JFORI does not point to JFORL"); | ||
2018 | if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ | 2541 | if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ |
2019 | rec_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); | 2542 | lj_record_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); |
2020 | /* Continue tracing if the loop is not entered. */ | 2543 | /* Continue tracing if the loop is not entered. */ |
2021 | break; | 2544 | break; |
2022 | 2545 | ||
@@ -2026,6 +2549,9 @@ void lj_record_ins(jit_State *J) | |||
2026 | case BC_ITERL: | 2549 | case BC_ITERL: |
2027 | rec_loop_interp(J, pc, rec_iterl(J, *pc)); | 2550 | rec_loop_interp(J, pc, rec_iterl(J, *pc)); |
2028 | break; | 2551 | break; |
2552 | case BC_ITERN: | ||
2553 | rec_loop_interp(J, pc, rec_itern(J, ra, rb)); | ||
2554 | break; | ||
2029 | case BC_LOOP: | 2555 | case BC_LOOP: |
2030 | rec_loop_interp(J, pc, rec_loop(J, ra, 1)); | 2556 | rec_loop_interp(J, pc, rec_loop(J, ra, 1)); |
2031 | break; | 2557 | break; |
@@ -2054,6 +2580,10 @@ void lj_record_ins(jit_State *J) | |||
2054 | J->maxslot = ra; /* Shrink used slots. */ | 2580 | J->maxslot = ra; /* Shrink used slots. */ |
2055 | break; | 2581 | break; |
2056 | 2582 | ||
2583 | case BC_ISNEXT: | ||
2584 | rec_isnext(J, ra); | ||
2585 | break; | ||
2586 | |||
2057 | /* -- Function headers -------------------------------------------------- */ | 2587 | /* -- Function headers -------------------------------------------------- */ |
2058 | 2588 | ||
2059 | case BC_FUNCF: | 2589 | case BC_FUNCF: |
@@ -2068,7 +2598,8 @@ void lj_record_ins(jit_State *J) | |||
2068 | rec_func_lua(J); | 2598 | rec_func_lua(J); |
2069 | break; | 2599 | break; |
2070 | case BC_JFUNCV: | 2600 | case BC_JFUNCV: |
2071 | lua_assert(0); /* Cannot happen. No hotcall counting for varag funcs. */ | 2601 | /* Cannot happen. No hotcall counting for varag funcs. */ |
2602 | lj_assertJ(0, "unsupported vararg hotcall"); | ||
2072 | break; | 2603 | break; |
2073 | 2604 | ||
2074 | case BC_FUNCC: | 2605 | case BC_FUNCC: |
@@ -2082,12 +2613,8 @@ void lj_record_ins(jit_State *J) | |||
2082 | break; | 2613 | break; |
2083 | } | 2614 | } |
2084 | /* fallthrough */ | 2615 | /* fallthrough */ |
2085 | case BC_ITERN: | ||
2086 | case BC_ISNEXT: | ||
2087 | case BC_CAT: | ||
2088 | case BC_UCLO: | 2616 | case BC_UCLO: |
2089 | case BC_FNEW: | 2617 | case BC_FNEW: |
2090 | case BC_TSETM: | ||
2091 | setintV(&J->errinfo, (int32_t)op); | 2618 | setintV(&J->errinfo, (int32_t)op); |
2092 | lj_trace_err_info(J, LJ_TRERR_NYIBC); | 2619 | lj_trace_err_info(J, LJ_TRERR_NYIBC); |
2093 | break; | 2620 | break; |
@@ -2096,15 +2623,21 @@ void lj_record_ins(jit_State *J) | |||
2096 | /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ | 2623 | /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ |
2097 | if (bcmode_a(op) == BCMdst && rc) { | 2624 | if (bcmode_a(op) == BCMdst && rc) { |
2098 | J->base[ra] = rc; | 2625 | J->base[ra] = rc; |
2099 | if (ra >= J->maxslot) J->maxslot = ra+1; | 2626 | if (ra >= J->maxslot) { |
2627 | #if LJ_FR2 | ||
2628 | if (ra > J->maxslot) J->base[ra-1] = 0; | ||
2629 | #endif | ||
2630 | J->maxslot = ra+1; | ||
2631 | } | ||
2100 | } | 2632 | } |
2101 | 2633 | ||
2102 | #undef rav | 2634 | #undef rav |
2103 | #undef rbv | 2635 | #undef rbv |
2104 | #undef rcv | 2636 | #undef rcv |
2105 | 2637 | ||
2106 | /* Limit the number of recorded IR instructions. */ | 2638 | /* Limit the number of recorded IR instructions and constants. */ |
2107 | if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord]) | 2639 | if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord] || |
2640 | J->cur.nk < REF_BIAS-(IRRef)J->param[JIT_P_maxirconst]) | ||
2108 | lj_trace_err(J, LJ_TRERR_TRACEOV); | 2641 | lj_trace_err(J, LJ_TRERR_TRACEOV); |
2109 | } | 2642 | } |
2110 | 2643 | ||
@@ -2124,13 +2657,20 @@ static const BCIns *rec_setup_root(jit_State *J) | |||
2124 | J->bc_min = pc; | 2657 | J->bc_min = pc; |
2125 | break; | 2658 | break; |
2126 | case BC_ITERL: | 2659 | case BC_ITERL: |
2127 | lua_assert(bc_op(pc[-1]) == BC_ITERC); | 2660 | lj_assertJ(bc_op(pc[-1]) == BC_ITERC, "no ITERC before ITERL"); |
2128 | J->maxslot = ra + bc_b(pc[-1]) - 1; | 2661 | J->maxslot = ra + bc_b(pc[-1]) - 1; |
2129 | J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); | 2662 | J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); |
2130 | pc += 1+bc_j(ins); | 2663 | pc += 1+bc_j(ins); |
2131 | lua_assert(bc_op(pc[-1]) == BC_JMP); | 2664 | lj_assertJ(bc_op(pc[-1]) == BC_JMP, "ITERL does not point to JMP+1"); |
2132 | J->bc_min = pc; | 2665 | J->bc_min = pc; |
2133 | break; | 2666 | break; |
2667 | case BC_ITERN: | ||
2668 | lj_assertJ(bc_op(pc[1]) == BC_ITERL, "no ITERL after ITERN"); | ||
2669 | J->maxslot = ra; | ||
2670 | J->bc_extent = (MSize)(-bc_j(pc[1]))*sizeof(BCIns); | ||
2671 | J->bc_min = pc+2 + bc_j(pc[1]); | ||
2672 | J->state = LJ_TRACE_RECORD_1ST; /* Record the first ITERN, too. */ | ||
2673 | break; | ||
2134 | case BC_LOOP: | 2674 | case BC_LOOP: |
2135 | /* Only check BC range for real loops, but not for "repeat until true". */ | 2675 | /* Only check BC range for real loops, but not for "repeat until true". */ |
2136 | pcj = pc + bc_j(ins); | 2676 | pcj = pc + bc_j(ins); |
@@ -2153,8 +2693,14 @@ static const BCIns *rec_setup_root(jit_State *J) | |||
2153 | J->maxslot = J->pt->numparams; | 2693 | J->maxslot = J->pt->numparams; |
2154 | pc++; | 2694 | pc++; |
2155 | break; | 2695 | break; |
2696 | case BC_CALLM: | ||
2697 | case BC_CALL: | ||
2698 | case BC_ITERC: | ||
2699 | /* No bytecode range check for stitched traces. */ | ||
2700 | pc++; | ||
2701 | break; | ||
2156 | default: | 2702 | default: |
2157 | lua_assert(0); | 2703 | lj_assertJ(0, "bad root trace start bytecode %d", bc_op(ins)); |
2158 | break; | 2704 | break; |
2159 | } | 2705 | } |
2160 | return pc; | 2706 | return pc; |
@@ -2168,11 +2714,14 @@ void lj_record_setup(jit_State *J) | |||
2168 | /* Initialize state related to current trace. */ | 2714 | /* Initialize state related to current trace. */ |
2169 | memset(J->slot, 0, sizeof(J->slot)); | 2715 | memset(J->slot, 0, sizeof(J->slot)); |
2170 | memset(J->chain, 0, sizeof(J->chain)); | 2716 | memset(J->chain, 0, sizeof(J->chain)); |
2717 | #ifdef LUAJIT_ENABLE_TABLE_BUMP | ||
2718 | memset(J->rbchash, 0, sizeof(J->rbchash)); | ||
2719 | #endif | ||
2171 | memset(J->bpropcache, 0, sizeof(J->bpropcache)); | 2720 | memset(J->bpropcache, 0, sizeof(J->bpropcache)); |
2172 | J->scev.idx = REF_NIL; | 2721 | J->scev.idx = REF_NIL; |
2173 | setmref(J->scev.pc, NULL); | 2722 | setmref(J->scev.pc, NULL); |
2174 | 2723 | ||
2175 | J->baseslot = 1; /* Invoking function is at base[-1]. */ | 2724 | J->baseslot = 1+LJ_FR2; /* Invoking function is at base[-1-LJ_FR2]. */ |
2176 | J->base = J->slot + J->baseslot; | 2725 | J->base = J->slot + J->baseslot; |
2177 | J->maxslot = 0; | 2726 | J->maxslot = 0; |
2178 | J->framedepth = 0; | 2727 | J->framedepth = 0; |
@@ -2187,7 +2736,7 @@ void lj_record_setup(jit_State *J) | |||
2187 | J->bc_extent = ~(MSize)0; | 2736 | J->bc_extent = ~(MSize)0; |
2188 | 2737 | ||
2189 | /* Emit instructions for fixed references. Also triggers initial IR alloc. */ | 2738 | /* Emit instructions for fixed references. Also triggers initial IR alloc. */ |
2190 | emitir_raw(IRT(IR_BASE, IRT_P32), J->parent, J->exitno); | 2739 | emitir_raw(IRT(IR_BASE, IRT_PGC), J->parent, J->exitno); |
2191 | for (i = 0; i <= 2; i++) { | 2740 | for (i = 0; i <= 2; i++) { |
2192 | IRIns *ir = IR(REF_NIL-i); | 2741 | IRIns *ir = IR(REF_NIL-i); |
2193 | ir->i = 0; | 2742 | ir->i = 0; |
@@ -2218,10 +2767,15 @@ void lj_record_setup(jit_State *J) | |||
2218 | } | 2767 | } |
2219 | lj_snap_replay(J, T); | 2768 | lj_snap_replay(J, T); |
2220 | sidecheck: | 2769 | sidecheck: |
2221 | if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || | 2770 | if ((traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || |
2222 | T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + | 2771 | T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + |
2223 | J->param[JIT_P_tryside]) { | 2772 | J->param[JIT_P_tryside])) { |
2224 | rec_stop(J, LJ_TRLINK_INTERP, 0); | 2773 | if (bc_op(*J->pc) == BC_JLOOP) { |
2774 | BCIns startins = traceref(J, bc_d(*J->pc))->startins; | ||
2775 | if (bc_op(startins) == BC_ITERN) | ||
2776 | rec_itern(J, bc_a(startins), bc_b(startins)); | ||
2777 | } | ||
2778 | lj_record_stop(J, LJ_TRLINK_INTERP, 0); | ||
2225 | } | 2779 | } |
2226 | } else { /* Root trace. */ | 2780 | } else { /* Root trace. */ |
2227 | J->cur.root = 0; | 2781 | J->cur.root = 0; |
@@ -2229,13 +2783,20 @@ void lj_record_setup(jit_State *J) | |||
2229 | J->pc = rec_setup_root(J); | 2783 | J->pc = rec_setup_root(J); |
2230 | /* Note: the loop instruction itself is recorded at the end and not | 2784 | /* Note: the loop instruction itself is recorded at the end and not |
2231 | ** at the start! So snapshot #0 needs to point to the *next* instruction. | 2785 | ** at the start! So snapshot #0 needs to point to the *next* instruction. |
2786 | ** The one exception is BC_ITERN, which sets LJ_TRACE_RECORD_1ST. | ||
2232 | */ | 2787 | */ |
2233 | lj_snap_add(J); | 2788 | lj_snap_add(J); |
2234 | if (bc_op(J->cur.startins) == BC_FORL) | 2789 | if (bc_op(J->cur.startins) == BC_FORL) |
2235 | rec_for_loop(J, J->pc-1, &J->scev, 1); | 2790 | rec_for_loop(J, J->pc-1, &J->scev, 1); |
2791 | else if (bc_op(J->cur.startins) == BC_ITERC) | ||
2792 | J->startpc = NULL; | ||
2236 | if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) | 2793 | if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) |
2237 | lj_trace_err(J, LJ_TRERR_STACKOV); | 2794 | lj_trace_err(J, LJ_TRERR_STACKOV); |
2238 | } | 2795 | } |
2796 | #if LJ_HASPROFILE | ||
2797 | J->prev_pt = NULL; | ||
2798 | J->prev_line = -1; | ||
2799 | #endif | ||
2239 | #ifdef LUAJIT_ENABLE_CHECKHOOK | 2800 | #ifdef LUAJIT_ENABLE_CHECKHOOK |
2240 | /* Regularly check for instruction/line hooks from compiled code and | 2801 | /* Regularly check for instruction/line hooks from compiled code and |
2241 | ** exit to the interpreter if the hooks are set. | 2802 | ** exit to the interpreter if the hooks are set. |