summaryrefslogtreecommitdiff
path: root/src/lj_opt_loop.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_opt_loop.c')
-rw-r--r--src/lj_opt_loop.c168
1 files changed, 87 insertions, 81 deletions
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
index f2950fe9..e5ad5b43 100644
--- a/src/lj_opt_loop.c
+++ b/src/lj_opt_loop.c
@@ -10,7 +10,6 @@
10 10
11#if LJ_HASJIT 11#if LJ_HASJIT
12 12
13#include "lj_gc.h"
14#include "lj_err.h" 13#include "lj_err.h"
15#include "lj_str.h" 14#include "lj_str.h"
16#include "lj_ir.h" 15#include "lj_ir.h"
@@ -163,21 +162,69 @@ static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi)
163 162
164/* -- Loop unrolling using copy-substitution ------------------------------ */ 163/* -- Loop unrolling using copy-substitution ------------------------------ */
165 164
165/* Copy-substitute snapshot. */
166static void loop_subst_snap(jit_State *J, SnapShot *osnap,
167 SnapEntry *loopmap, IRRef1 *subst)
168{
169 SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs];
170 MSize nmapofs, nframelinks;
171 MSize on, ln, nn, onent = osnap->nent;
172 BCReg nslots = osnap->nslots;
173 SnapShot *snap = &J->cur.snap[J->cur.nsnap];
174 if (irt_isguard(J->guardemit)) { /* Guard inbetween? */
175 nmapofs = J->cur.nsnapmap;
176 J->cur.nsnap++; /* Add new snapshot. */
177 } else { /* Otherwise overwrite previous snapshot. */
178 snap--;
179 nmapofs = snap->mapofs;
180 }
181 J->guardemit.irt = 0;
182 nframelinks = osnap->nframelinks;
183 /* Setup new snapshot. */
184 snap->mapofs = (uint16_t)nmapofs;
185 snap->ref = (IRRef1)J->cur.nins;
186 snap->nframelinks = (uint8_t)nframelinks;
187 snap->nslots = nslots;
188 snap->count = 0;
189 nmap = &J->cur.snapmap[nmapofs];
190 /* Substitute snapshot slots. */
191 on = ln = nn = 0;
192 while (on < onent) {
193 SnapEntry osn = omap[on], lsn = loopmap[ln];
194 if (snap_slot(lsn) < snap_slot(osn)) { /* Copy slot from loop map. */
195 nmap[nn++] = lsn;
196 ln++;
197 } else { /* Copy substituted slot from snapshot map. */
198 if (snap_slot(lsn) == snap_slot(osn)) ln++; /* Shadowed loop slot. */
199 if (!irref_isk(snap_ref(osn)))
200 osn = snap_setref(osn, subst[snap_ref(osn)]);
201 nmap[nn++] = osn;
202 on++;
203 }
204 }
205 while (snap_slot(loopmap[ln]) < nslots) /* Copy remaining loop slots. */
206 nmap[nn++] = loopmap[ln++];
207 snap->nent = (uint8_t)nn;
208 J->cur.nsnapmap = (uint16_t)(nmapofs + nn + nframelinks);
209 omap += onent;
210 nmap += nn;
211 for (nn = 0; nn < nframelinks; nn++) /* Copy frame links. */
212 nmap[nn] = omap[nn];
213}
214
166/* Unroll loop. */ 215/* Unroll loop. */
167static void loop_unroll(jit_State *J) 216static void loop_unroll(jit_State *J)
168{ 217{
169 IRRef1 phi[LJ_MAX_PHI]; 218 IRRef1 phi[LJ_MAX_PHI];
170 uint32_t nphi = 0; 219 uint32_t nphi = 0;
171 IRRef1 *subst; 220 IRRef1 *subst;
172 SnapShot *osnap, *snap; 221 SnapShot *osnap;
173 SnapEntry *loopmap; 222 SnapEntry *loopmap, *psentinel;
174 BCReg loopslots; 223 IRRef ins, invar;
175 MSize nsnap, nsnapmap;
176 IRRef ins, invar, osnapref;
177 224
178 /* Use temp buffer for substitution table. 225 /* Use temp buffer for substitution table.
179 ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. 226 ** Only non-constant refs in [REF_BIAS,invar) are valid indexes.
180 ** Note: don't call into the VM or run the GC or the buffer may be gone. 227 ** Caveat: don't call into the VM or run the GC or the buffer may be gone.
181 */ 228 */
182 invar = J->cur.nins; 229 invar = J->cur.nins;
183 subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, 230 subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf,
@@ -187,80 +234,37 @@ static void loop_unroll(jit_State *J)
187 /* LOOP separates the pre-roll from the loop body. */ 234 /* LOOP separates the pre-roll from the loop body. */
188 emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0); 235 emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0);
189 236
190 /* Ensure size for copy-substituted snapshots (minus #0 and loop snapshot). */ 237 /* Grow snapshot buffer and map for copy-substituted snapshots.
191 nsnap = J->cur.nsnap; 238 ** Need up to twice the number of snapshots minus #0 and loop snapshot.
192 if (LJ_UNLIKELY(2*nsnap-2 > J->sizesnap)) { 239 ** Need up to twice the number of entries plus fallback substitutions
193 MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; 240 ** from the loop snapshot entries for each new snapshot.
194 if (2*nsnap-2 > maxsnap) 241 ** Caveat: both calls may reallocate J->cur.snap and J->cur.snapmap!
195 lj_trace_err(J, LJ_TRERR_SNAPOV); 242 */
196 lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); 243 {
197 J->cur.snap = J->snapbuf; 244 MSize nsnap = J->cur.nsnap;
198 } 245 SnapShot *loopsnap;
199 nsnapmap = J->cur.nsnapmap; /* Use temp. copy to avoid undo. */ 246 lj_snap_grow_buf(J, 2*nsnap-2);
200 if (LJ_UNLIKELY(nsnapmap*2 > J->sizesnapmap)) { 247 lj_snap_grow_map(J, J->cur.nsnapmap*2+(nsnap-2)*J->cur.snap[nsnap-1].nent);
201 J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
202 J->sizesnapmap*sizeof(SnapEntry),
203 2*J->sizesnapmap*sizeof(SnapEntry));
204 J->cur.snapmap = J->snapmapbuf;
205 J->sizesnapmap *= 2;
206 }
207 248
208 /* The loop snapshot is used for fallback substitutions. */ 249 /* The loop snapshot is used for fallback substitutions. */
209 snap = &J->cur.snap[nsnap-1]; 250 loopsnap = &J->cur.snap[nsnap-1];
210 loopmap = &J->cur.snapmap[snap->mapofs]; 251 loopmap = &J->cur.snapmap[loopsnap->mapofs];
211 loopslots = snap->nslots; 252 /* The PC of snapshot #0 and the loop snapshot must match. */
212 /* The PC of snapshot #0 and the loop snapshot must match. */ 253 psentinel = &loopmap[loopsnap->nent];
213 lua_assert(loopmap[loopslots] == J->cur.snapmap[J->cur.snap[0].nslots]); 254 lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]);
255 *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */
256 }
214 257
215 /* Start substitution with snapshot #1 (#0 is empty for root traces). */ 258 /* Start substitution with snapshot #1 (#0 is empty for root traces). */
216 osnap = &J->cur.snap[1]; 259 osnap = &J->cur.snap[1];
217 osnapref = osnap->ref;
218 260
219 /* Copy and substitute all recorded instructions and snapshots. */ 261 /* Copy and substitute all recorded instructions and snapshots. */
220 for (ins = REF_FIRST; ins < invar; ins++) { 262 for (ins = REF_FIRST; ins < invar; ins++) {
221 IRIns *ir; 263 IRIns *ir;
222 IRRef op1, op2; 264 IRRef op1, op2;
223 265
224 /* Copy-substitute snapshot. */ 266 if (ins >= osnap->ref) /* Instruction belongs to next snapshot? */
225 if (ins >= osnapref) { 267 loop_subst_snap(J, osnap++, loopmap, subst); /* Copy-substitute it. */
226 SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs];
227 BCReg s, nslots;
228 uint32_t nmapofs, nframelinks;
229 if (irt_isguard(J->guardemit)) { /* Guard inbetween? */
230 nmapofs = nsnapmap;
231 snap++; /* Add new snapshot. */
232 } else {
233 nmapofs = snap->mapofs; /* Overwrite previous snapshot. */
234 }
235 J->guardemit.irt = 0;
236 nslots = osnap->nslots;
237 nframelinks = osnap->nframelinks;
238 snap->mapofs = (uint16_t)nmapofs;
239 snap->ref = (IRRef1)J->cur.nins;
240 snap->nslots = (uint8_t)nslots;
241 snap->nframelinks = (uint8_t)nframelinks;
242 snap->count = 0;
243 osnap++;
244 osnapref = osnap->ref;
245 nsnapmap = nmapofs + nslots + nframelinks;
246 nmap = &J->cur.snapmap[nmapofs];
247 /* Substitute snapshot slots. */
248 for (s = 0; s < nslots; s++) {
249 IRRef ref = snap_ref(omap[s]);
250 if (ref) {
251 if (!irref_isk(ref))
252 ref = subst[ref];
253 } else if (s < loopslots) {
254 ref = loopmap[s];
255 }
256 nmap[s] = ref;
257 }
258 /* Copy frame links. */
259 nmap += nslots;
260 omap += nslots;
261 for (s = 0; s < nframelinks; s++)
262 nmap[s] = omap[s];
263 }
264 268
265 /* Substitute instruction operands. */ 269 /* Substitute instruction operands. */
266 ir = IR(ins); 270 ir = IR(ins);
@@ -295,22 +299,24 @@ static void loop_unroll(jit_State *J)
295 } 299 }
296 } 300 }
297 } 301 }
298 if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ 302 if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */
299 J->cur.nsnapmap = (uint16_t)nsnapmap; 303 J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs;
300 snap++;
301 } else {
302 J->cur.nsnapmap = (uint16_t)snap->mapofs; /* Last snapshot is redundant. */
303 }
304 J->cur.nsnap = (uint16_t)(snap - J->cur.snap);
305 lua_assert(J->cur.nsnapmap <= J->sizesnapmap); 304 lua_assert(J->cur.nsnapmap <= J->sizesnapmap);
305 *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */
306 306
307 loop_emit_phi(J, subst, phi, nphi); 307 loop_emit_phi(J, subst, phi, nphi);
308} 308}
309 309
310/* Undo any partial changes made by the loop optimization. */ 310/* Undo any partial changes made by the loop optimization. */
311static void loop_undo(jit_State *J, IRRef ins) 311static void loop_undo(jit_State *J, IRRef ins, MSize nsnap)
312{ 312{
313 ptrdiff_t i; 313 ptrdiff_t i;
314 SnapShot *snap = &J->cur.snap[nsnap-1];
315 SnapEntry *map = J->cur.snapmap;
316 map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent]; /* Restore PC. */
317 J->cur.nsnapmap = (uint16_t)(snap->mapofs + snap->nent + snap->nframelinks);
318 J->cur.nsnap = nsnap;
319 J->guardemit.irt = 0;
314 lj_ir_rollback(J, ins); 320 lj_ir_rollback(J, ins);
315 for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */ 321 for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */
316 BPropEntry *bp = &J->bpropcache[i]; 322 BPropEntry *bp = &J->bpropcache[i];
@@ -336,6 +342,7 @@ static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud)
336int lj_opt_loop(jit_State *J) 342int lj_opt_loop(jit_State *J)
337{ 343{
338 IRRef nins = J->cur.nins; 344 IRRef nins = J->cur.nins;
345 MSize nsnap = J->cur.nsnap;
339 int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); 346 int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt);
340 if (LJ_UNLIKELY(errcode)) { 347 if (LJ_UNLIKELY(errcode)) {
341 lua_State *L = J->L; 348 lua_State *L = J->L;
@@ -348,8 +355,7 @@ int lj_opt_loop(jit_State *J)
348 if (--J->instunroll < 0) /* But do not unroll forever. */ 355 if (--J->instunroll < 0) /* But do not unroll forever. */
349 break; 356 break;
350 L->top--; /* Remove error object. */ 357 L->top--; /* Remove error object. */
351 J->guardemit.irt = 0; 358 loop_undo(J, nins, nsnap);
352 loop_undo(J, nins);
353 return 1; /* Loop optimization failed, continue recording. */ 359 return 1; /* Loop optimization failed, continue recording. */
354 default: 360 default:
355 break; 361 break;