diff options
Diffstat (limited to 'src/lj_record.c')
-rw-r--r-- | src/lj_record.c | 97 |
1 files changed, 49 insertions, 48 deletions
diff --git a/src/lj_record.c b/src/lj_record.c index 6af25ccb..3f442088 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
@@ -1696,7 +1696,7 @@ static void optstate_comp(jit_State *J, int cond) | |||
1696 | const BCIns *npc = J->pc + 2 + (cond ? bc_j(jmpins) : 0); | 1696 | const BCIns *npc = J->pc + 2 + (cond ? bc_j(jmpins) : 0); |
1697 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; | 1697 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; |
1698 | /* Avoid re-recording the comparison in side traces. */ | 1698 | /* Avoid re-recording the comparison in side traces. */ |
1699 | J->cur.snapmap[snap->mapofs + snap->nslots] = u32ptr(npc); | 1699 | J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); |
1700 | J->needsnap = 1; | 1700 | J->needsnap = 1; |
1701 | /* Shrink last snapshot if possible. */ | 1701 | /* Shrink last snapshot if possible. */ |
1702 | if (bc_a(jmpins) < J->maxslot) { | 1702 | if (bc_a(jmpins) < J->maxslot) { |
@@ -2159,61 +2159,62 @@ static void rec_setup_side(jit_State *J, Trace *T) | |||
2159 | { | 2159 | { |
2160 | SnapShot *snap = &T->snap[J->exitno]; | 2160 | SnapShot *snap = &T->snap[J->exitno]; |
2161 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 2161 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
2162 | BCReg s, nslots = snap->nslots; | 2162 | MSize n, nent = snap->nent; |
2163 | BloomFilter seen = 0; | 2163 | BloomFilter seen = 0; |
2164 | for (s = 0; s < nslots; s++) { | 2164 | /* Emit IR for slots inherited from parent snapshot. */ |
2165 | IRRef ref = snap_ref(map[s]); | 2165 | for (n = 0; n < nent; n++) { |
2166 | if (ref) { | 2166 | IRRef ref = snap_ref(map[n]); |
2167 | IRIns *ir = &T->ir[ref]; | 2167 | BCReg s = snap_slot(map[n]); |
2168 | TRef tr = 0; | 2168 | IRIns *ir = &T->ir[ref]; |
2169 | /* The bloom filter avoids O(nslots^2) overhead for de-duping slots. */ | 2169 | TRef tr; |
2170 | if (bloomtest(seen, ref)) { | 2170 | /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ |
2171 | BCReg j; | 2171 | if (bloomtest(seen, ref)) { |
2172 | for (j = 0; j < s; j++) | 2172 | MSize j; |
2173 | if (snap_ref(map[j]) == ref) { | 2173 | for (j = 0; j < n; j++) |
2174 | if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { | 2174 | if (snap_ref(map[j]) == ref) { |
2175 | lua_assert(s != 0); | 2175 | tr = J->slot[snap_slot(map[j])]; |
2176 | J->baseslot = s+1; | 2176 | if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { |
2177 | J->framedepth++; | 2177 | lua_assert(s != 0); |
2178 | } | ||
2179 | tr = J->slot[j]; | ||
2180 | goto dupslot; | ||
2181 | } | ||
2182 | } | ||
2183 | bloomset(seen, ref); | ||
2184 | switch ((IROp)ir->o) { | ||
2185 | case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break; | ||
2186 | case IR_KINT: tr = lj_ir_kint(J, ir->i); break; | ||
2187 | case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break; | ||
2188 | case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break; | ||
2189 | case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */ | ||
2190 | if (irt_isfunc(ir->t)) { | ||
2191 | if (s != 0) { | ||
2192 | J->baseslot = s+1; | 2178 | J->baseslot = s+1; |
2193 | J->framedepth++; | 2179 | J->framedepth++; |
2194 | } | 2180 | } |
2195 | tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2])); | 2181 | goto dupslot; |
2196 | tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr); | ||
2197 | } else { | ||
2198 | tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void)); | ||
2199 | tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr); | ||
2200 | } | 2182 | } |
2201 | break; | 2183 | } |
2202 | case IR_SLOAD: /* Inherited SLOADs don't need a guard or type check. */ | 2184 | bloomset(seen, ref); |
2203 | tr = emitir_raw(ir->ot & ~IRT_GUARD, s, | 2185 | switch ((IROp)ir->o) { |
2204 | (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT); | 2186 | /* Only have to deal with constants that can occur in stack slots. */ |
2205 | break; | 2187 | case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break; |
2206 | default: /* Parent refs are already typed and don't need a guard. */ | 2188 | case IR_KINT: tr = lj_ir_kint(J, ir->i); break; |
2207 | tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s, | 2189 | case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break; |
2208 | IRSLOAD_INHERIT|IRSLOAD_PARENT); | 2190 | case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break; |
2209 | break; | 2191 | case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */ |
2192 | if (irt_isfunc(ir->t)) { | ||
2193 | if (s != 0) { | ||
2194 | J->baseslot = s+1; | ||
2195 | J->framedepth++; | ||
2196 | } | ||
2197 | tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2])); | ||
2198 | tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr); | ||
2199 | } else { | ||
2200 | tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void)); | ||
2201 | tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr); | ||
2210 | } | 2202 | } |
2211 | dupslot: | 2203 | break; |
2212 | J->slot[s] = tr; | 2204 | case IR_SLOAD: /* Inherited SLOADs don't need a guard or type check. */ |
2205 | tr = emitir_raw(ir->ot & ~IRT_GUARD, s, | ||
2206 | (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT); | ||
2207 | break; | ||
2208 | default: /* Parent refs are already typed and don't need a guard. */ | ||
2209 | tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s, | ||
2210 | IRSLOAD_INHERIT|IRSLOAD_PARENT); | ||
2211 | break; | ||
2213 | } | 2212 | } |
2213 | dupslot: | ||
2214 | J->slot[s] = tr; | ||
2214 | } | 2215 | } |
2215 | J->base = J->slot + J->baseslot; | 2216 | J->base = J->slot + J->baseslot; |
2216 | J->maxslot = nslots - J->baseslot; | 2217 | J->maxslot = snap->nslots - J->baseslot; |
2217 | lj_snap_add(J); | 2218 | lj_snap_add(J); |
2218 | } | 2219 | } |
2219 | 2220 | ||
@@ -2259,7 +2260,7 @@ void lj_record_setup(jit_State *J) | |||
2259 | J->cur.root = (uint16_t)root; | 2260 | J->cur.root = (uint16_t)root; |
2260 | J->cur.startins = BCINS_AD(BC_JMP, 0, 0); | 2261 | J->cur.startins = BCINS_AD(BC_JMP, 0, 0); |
2261 | /* Check whether we could at least potentially form an extra loop. */ | 2262 | /* Check whether we could at least potentially form an extra loop. */ |
2262 | if (J->exitno == 0 && T->snap[0].nslots == 1 && T->snapmap[0] == 0) { | 2263 | if (J->exitno == 0 && T->snap[0].nent == 0) { |
2263 | /* We can narrow a FORL for some side traces, too. */ | 2264 | /* We can narrow a FORL for some side traces, too. */ |
2264 | if (J->pc > J->pt->bc && bc_op(J->pc[-1]) == BC_JFORI && | 2265 | if (J->pc > J->pt->bc && bc_op(J->pc[-1]) == BC_JFORI && |
2265 | bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { | 2266 | bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { |