diff options
Diffstat (limited to 'src/lj_opt_loop.c')
-rw-r--r-- | src/lj_opt_loop.c | 168 |
1 files changed, 87 insertions, 81 deletions
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index f2950fe9..e5ad5b43 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c | |||
@@ -10,7 +10,6 @@ | |||
10 | 10 | ||
11 | #if LJ_HASJIT | 11 | #if LJ_HASJIT |
12 | 12 | ||
13 | #include "lj_gc.h" | ||
14 | #include "lj_err.h" | 13 | #include "lj_err.h" |
15 | #include "lj_str.h" | 14 | #include "lj_str.h" |
16 | #include "lj_ir.h" | 15 | #include "lj_ir.h" |
@@ -163,21 +162,69 @@ static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi) | |||
163 | 162 | ||
164 | /* -- Loop unrolling using copy-substitution ------------------------------ */ | 163 | /* -- Loop unrolling using copy-substitution ------------------------------ */ |
165 | 164 | ||
165 | /* Copy-substitute snapshot. */ | ||
166 | static void loop_subst_snap(jit_State *J, SnapShot *osnap, | ||
167 | SnapEntry *loopmap, IRRef1 *subst) | ||
168 | { | ||
169 | SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs]; | ||
170 | MSize nmapofs, nframelinks; | ||
171 | MSize on, ln, nn, onent = osnap->nent; | ||
172 | BCReg nslots = osnap->nslots; | ||
173 | SnapShot *snap = &J->cur.snap[J->cur.nsnap]; | ||
174 | if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ | ||
175 | nmapofs = J->cur.nsnapmap; | ||
176 | J->cur.nsnap++; /* Add new snapshot. */ | ||
177 | } else { /* Otherwise overwrite previous snapshot. */ | ||
178 | snap--; | ||
179 | nmapofs = snap->mapofs; | ||
180 | } | ||
181 | J->guardemit.irt = 0; | ||
182 | nframelinks = osnap->nframelinks; | ||
183 | /* Setup new snapshot. */ | ||
184 | snap->mapofs = (uint16_t)nmapofs; | ||
185 | snap->ref = (IRRef1)J->cur.nins; | ||
186 | snap->nframelinks = (uint8_t)nframelinks; | ||
187 | snap->nslots = nslots; | ||
188 | snap->count = 0; | ||
189 | nmap = &J->cur.snapmap[nmapofs]; | ||
190 | /* Substitute snapshot slots. */ | ||
191 | on = ln = nn = 0; | ||
192 | while (on < onent) { | ||
193 | SnapEntry osn = omap[on], lsn = loopmap[ln]; | ||
194 | if (snap_slot(lsn) < snap_slot(osn)) { /* Copy slot from loop map. */ | ||
195 | nmap[nn++] = lsn; | ||
196 | ln++; | ||
197 | } else { /* Copy substituted slot from snapshot map. */ | ||
198 | if (snap_slot(lsn) == snap_slot(osn)) ln++; /* Shadowed loop slot. */ | ||
199 | if (!irref_isk(snap_ref(osn))) | ||
200 | osn = snap_setref(osn, subst[snap_ref(osn)]); | ||
201 | nmap[nn++] = osn; | ||
202 | on++; | ||
203 | } | ||
204 | } | ||
205 | while (snap_slot(loopmap[ln]) < nslots) /* Copy remaining loop slots. */ | ||
206 | nmap[nn++] = loopmap[ln++]; | ||
207 | snap->nent = (uint8_t)nn; | ||
208 | J->cur.nsnapmap = (uint16_t)(nmapofs + nn + nframelinks); | ||
209 | omap += onent; | ||
210 | nmap += nn; | ||
211 | for (nn = 0; nn < nframelinks; nn++) /* Copy frame links. */ | ||
212 | nmap[nn] = omap[nn]; | ||
213 | } | ||
214 | |||
166 | /* Unroll loop. */ | 215 | /* Unroll loop. */ |
167 | static void loop_unroll(jit_State *J) | 216 | static void loop_unroll(jit_State *J) |
168 | { | 217 | { |
169 | IRRef1 phi[LJ_MAX_PHI]; | 218 | IRRef1 phi[LJ_MAX_PHI]; |
170 | uint32_t nphi = 0; | 219 | uint32_t nphi = 0; |
171 | IRRef1 *subst; | 220 | IRRef1 *subst; |
172 | SnapShot *osnap, *snap; | 221 | SnapShot *osnap; |
173 | SnapEntry *loopmap; | 222 | SnapEntry *loopmap, *psentinel; |
174 | BCReg loopslots; | 223 | IRRef ins, invar; |
175 | MSize nsnap, nsnapmap; | ||
176 | IRRef ins, invar, osnapref; | ||
177 | 224 | ||
178 | /* Use temp buffer for substitution table. | 225 | /* Use temp buffer for substitution table. |
179 | ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. | 226 | ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. |
180 | ** Note: don't call into the VM or run the GC or the buffer may be gone. | 227 | ** Caveat: don't call into the VM or run the GC or the buffer may be gone. |
181 | */ | 228 | */ |
182 | invar = J->cur.nins; | 229 | invar = J->cur.nins; |
183 | subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, | 230 | subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, |
@@ -187,80 +234,37 @@ static void loop_unroll(jit_State *J) | |||
187 | /* LOOP separates the pre-roll from the loop body. */ | 234 | /* LOOP separates the pre-roll from the loop body. */ |
188 | emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0); | 235 | emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0); |
189 | 236 | ||
190 | /* Ensure size for copy-substituted snapshots (minus #0 and loop snapshot). */ | 237 | /* Grow snapshot buffer and map for copy-substituted snapshots. |
191 | nsnap = J->cur.nsnap; | 238 | ** Need up to twice the number of snapshots minus #0 and loop snapshot. |
192 | if (LJ_UNLIKELY(2*nsnap-2 > J->sizesnap)) { | 239 | ** Need up to twice the number of entries plus fallback substitutions |
193 | MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; | 240 | ** from the loop snapshot entries for each new snapshot. |
194 | if (2*nsnap-2 > maxsnap) | 241 | ** Caveat: both calls may reallocate J->cur.snap and J->cur.snapmap! |
195 | lj_trace_err(J, LJ_TRERR_SNAPOV); | 242 | */ |
196 | lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); | 243 | { |
197 | J->cur.snap = J->snapbuf; | 244 | MSize nsnap = J->cur.nsnap; |
198 | } | 245 | SnapShot *loopsnap; |
199 | nsnapmap = J->cur.nsnapmap; /* Use temp. copy to avoid undo. */ | 246 | lj_snap_grow_buf(J, 2*nsnap-2); |
200 | if (LJ_UNLIKELY(nsnapmap*2 > J->sizesnapmap)) { | 247 | lj_snap_grow_map(J, J->cur.nsnapmap*2+(nsnap-2)*J->cur.snap[nsnap-1].nent); |
201 | J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, | ||
202 | J->sizesnapmap*sizeof(SnapEntry), | ||
203 | 2*J->sizesnapmap*sizeof(SnapEntry)); | ||
204 | J->cur.snapmap = J->snapmapbuf; | ||
205 | J->sizesnapmap *= 2; | ||
206 | } | ||
207 | 248 | ||
208 | /* The loop snapshot is used for fallback substitutions. */ | 249 | /* The loop snapshot is used for fallback substitutions. */ |
209 | snap = &J->cur.snap[nsnap-1]; | 250 | loopsnap = &J->cur.snap[nsnap-1]; |
210 | loopmap = &J->cur.snapmap[snap->mapofs]; | 251 | loopmap = &J->cur.snapmap[loopsnap->mapofs]; |
211 | loopslots = snap->nslots; | 252 | /* The PC of snapshot #0 and the loop snapshot must match. */ |
212 | /* The PC of snapshot #0 and the loop snapshot must match. */ | 253 | psentinel = &loopmap[loopsnap->nent]; |
213 | lua_assert(loopmap[loopslots] == J->cur.snapmap[J->cur.snap[0].nslots]); | 254 | lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]); |
255 | *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */ | ||
256 | } | ||
214 | 257 | ||
215 | /* Start substitution with snapshot #1 (#0 is empty for root traces). */ | 258 | /* Start substitution with snapshot #1 (#0 is empty for root traces). */ |
216 | osnap = &J->cur.snap[1]; | 259 | osnap = &J->cur.snap[1]; |
217 | osnapref = osnap->ref; | ||
218 | 260 | ||
219 | /* Copy and substitute all recorded instructions and snapshots. */ | 261 | /* Copy and substitute all recorded instructions and snapshots. */ |
220 | for (ins = REF_FIRST; ins < invar; ins++) { | 262 | for (ins = REF_FIRST; ins < invar; ins++) { |
221 | IRIns *ir; | 263 | IRIns *ir; |
222 | IRRef op1, op2; | 264 | IRRef op1, op2; |
223 | 265 | ||
224 | /* Copy-substitute snapshot. */ | 266 | if (ins >= osnap->ref) /* Instruction belongs to next snapshot? */ |
225 | if (ins >= osnapref) { | 267 | loop_subst_snap(J, osnap++, loopmap, subst); /* Copy-substitute it. */ |
226 | SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs]; | ||
227 | BCReg s, nslots; | ||
228 | uint32_t nmapofs, nframelinks; | ||
229 | if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ | ||
230 | nmapofs = nsnapmap; | ||
231 | snap++; /* Add new snapshot. */ | ||
232 | } else { | ||
233 | nmapofs = snap->mapofs; /* Overwrite previous snapshot. */ | ||
234 | } | ||
235 | J->guardemit.irt = 0; | ||
236 | nslots = osnap->nslots; | ||
237 | nframelinks = osnap->nframelinks; | ||
238 | snap->mapofs = (uint16_t)nmapofs; | ||
239 | snap->ref = (IRRef1)J->cur.nins; | ||
240 | snap->nslots = (uint8_t)nslots; | ||
241 | snap->nframelinks = (uint8_t)nframelinks; | ||
242 | snap->count = 0; | ||
243 | osnap++; | ||
244 | osnapref = osnap->ref; | ||
245 | nsnapmap = nmapofs + nslots + nframelinks; | ||
246 | nmap = &J->cur.snapmap[nmapofs]; | ||
247 | /* Substitute snapshot slots. */ | ||
248 | for (s = 0; s < nslots; s++) { | ||
249 | IRRef ref = snap_ref(omap[s]); | ||
250 | if (ref) { | ||
251 | if (!irref_isk(ref)) | ||
252 | ref = subst[ref]; | ||
253 | } else if (s < loopslots) { | ||
254 | ref = loopmap[s]; | ||
255 | } | ||
256 | nmap[s] = ref; | ||
257 | } | ||
258 | /* Copy frame links. */ | ||
259 | nmap += nslots; | ||
260 | omap += nslots; | ||
261 | for (s = 0; s < nframelinks; s++) | ||
262 | nmap[s] = omap[s]; | ||
263 | } | ||
264 | 268 | ||
265 | /* Substitute instruction operands. */ | 269 | /* Substitute instruction operands. */ |
266 | ir = IR(ins); | 270 | ir = IR(ins); |
@@ -295,22 +299,24 @@ static void loop_unroll(jit_State *J) | |||
295 | } | 299 | } |
296 | } | 300 | } |
297 | } | 301 | } |
298 | if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ | 302 | if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */ |
299 | J->cur.nsnapmap = (uint16_t)nsnapmap; | 303 | J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs; |
300 | snap++; | ||
301 | } else { | ||
302 | J->cur.nsnapmap = (uint16_t)snap->mapofs; /* Last snapshot is redundant. */ | ||
303 | } | ||
304 | J->cur.nsnap = (uint16_t)(snap - J->cur.snap); | ||
305 | lua_assert(J->cur.nsnapmap <= J->sizesnapmap); | 304 | lua_assert(J->cur.nsnapmap <= J->sizesnapmap); |
305 | *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */ | ||
306 | 306 | ||
307 | loop_emit_phi(J, subst, phi, nphi); | 307 | loop_emit_phi(J, subst, phi, nphi); |
308 | } | 308 | } |
309 | 309 | ||
310 | /* Undo any partial changes made by the loop optimization. */ | 310 | /* Undo any partial changes made by the loop optimization. */ |
311 | static void loop_undo(jit_State *J, IRRef ins) | 311 | static void loop_undo(jit_State *J, IRRef ins, MSize nsnap) |
312 | { | 312 | { |
313 | ptrdiff_t i; | 313 | ptrdiff_t i; |
314 | SnapShot *snap = &J->cur.snap[nsnap-1]; | ||
315 | SnapEntry *map = J->cur.snapmap; | ||
316 | map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent]; /* Restore PC. */ | ||
317 | J->cur.nsnapmap = (uint16_t)(snap->mapofs + snap->nent + snap->nframelinks); | ||
318 | J->cur.nsnap = nsnap; | ||
319 | J->guardemit.irt = 0; | ||
314 | lj_ir_rollback(J, ins); | 320 | lj_ir_rollback(J, ins); |
315 | for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */ | 321 | for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */ |
316 | BPropEntry *bp = &J->bpropcache[i]; | 322 | BPropEntry *bp = &J->bpropcache[i]; |
@@ -336,6 +342,7 @@ static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud) | |||
336 | int lj_opt_loop(jit_State *J) | 342 | int lj_opt_loop(jit_State *J) |
337 | { | 343 | { |
338 | IRRef nins = J->cur.nins; | 344 | IRRef nins = J->cur.nins; |
345 | MSize nsnap = J->cur.nsnap; | ||
339 | int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); | 346 | int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); |
340 | if (LJ_UNLIKELY(errcode)) { | 347 | if (LJ_UNLIKELY(errcode)) { |
341 | lua_State *L = J->L; | 348 | lua_State *L = J->L; |
@@ -348,8 +355,7 @@ int lj_opt_loop(jit_State *J) | |||
348 | if (--J->instunroll < 0) /* But do not unroll forever. */ | 355 | if (--J->instunroll < 0) /* But do not unroll forever. */ |
349 | break; | 356 | break; |
350 | L->top--; /* Remove error object. */ | 357 | L->top--; /* Remove error object. */ |
351 | J->guardemit.irt = 0; | 358 | loop_undo(J, nins, nsnap); |
352 | loop_undo(J, nins); | ||
353 | return 1; /* Loop optimization failed, continue recording. */ | 359 | return 1; /* Loop optimization failed, continue recording. */ |
354 | default: | 360 | default: |
355 | break; | 361 | break; |