diff options
Diffstat (limited to 'src/lj_snap.c')
-rw-r--r-- | src/lj_snap.c | 247 |
1 files changed, 131 insertions, 116 deletions
diff --git a/src/lj_snap.c b/src/lj_snap.c index f262e1c9..d22c90a4 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c | |||
@@ -23,28 +23,50 @@ | |||
23 | /* Some local macros to save typing. Undef'd at the end. */ | 23 | /* Some local macros to save typing. Undef'd at the end. */ |
24 | #define IR(ref) (&J->cur.ir[(ref)]) | 24 | #define IR(ref) (&J->cur.ir[(ref)]) |
25 | 25 | ||
26 | /* -- Snapshot buffer allocation ------------------------------------------ */ | ||
27 | |||
28 | /* Grow snapshot buffer. */ | ||
29 | void lj_snap_grow_buf_(jit_State *J, MSize need) | ||
30 | { | ||
31 | MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; | ||
32 | if (need > maxsnap) | ||
33 | lj_trace_err(J, LJ_TRERR_SNAPOV); | ||
34 | lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); | ||
35 | J->cur.snap = J->snapbuf; | ||
36 | } | ||
37 | |||
38 | /* Grow snapshot map buffer. */ | ||
39 | void lj_snap_grow_map_(jit_State *J, MSize need) | ||
40 | { | ||
41 | if (need < 2*J->sizesnapmap) | ||
42 | need = 2*J->sizesnapmap; | ||
43 | else if (need < 64) | ||
44 | need = 64; | ||
45 | J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, | ||
46 | J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry)); | ||
47 | J->cur.snapmap = J->snapmapbuf; | ||
48 | J->sizesnapmap = need; | ||
49 | } | ||
50 | |||
26 | /* -- Snapshot generation ------------------------------------------------- */ | 51 | /* -- Snapshot generation ------------------------------------------------- */ |
27 | 52 | ||
28 | /* NYI: Snapshots are in need of a redesign. The current storage model for | 53 | /* NYI: IR_FRAME should be eliminated, too. */ |
29 | ** snapshot maps is too wasteful. They could be compressed (1D or 2D) and | ||
30 | ** made more flexible at the same time. Iterators should no longer need to | ||
31 | ** skip unmodified slots. IR_FRAME should be eliminated, too. | ||
32 | */ | ||
33 | 54 | ||
34 | /* Add all modified slots to the snapshot. */ | 55 | /* Add all modified slots to the snapshot. */ |
35 | static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) | 56 | static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) |
36 | { | 57 | { |
37 | BCReg s; | 58 | BCReg s; |
59 | MSize n = 0; | ||
38 | for (s = 0; s < nslots; s++) { | 60 | for (s = 0; s < nslots; s++) { |
39 | IRRef ref = tref_ref(J->slot[s]); | 61 | IRRef ref = tref_ref(J->slot[s]); |
40 | if (ref) { | 62 | if (ref) { |
41 | IRIns *ir = IR(ref); | 63 | IRIns *ir = IR(ref); |
42 | if (ir->o == IR_SLOAD && ir->op1 == s && !(ir->op2 & IRSLOAD_INHERIT)) | 64 | if (!(ir->o == IR_SLOAD && ir->op1 == s && |
43 | ref = 0; | 65 | !(ir->op2 & IRSLOAD_INHERIT))) |
66 | map[n++] = SNAP(s, ir->o == IR_FRAME ? SNAP_FRAME : 0, ref); | ||
44 | } | 67 | } |
45 | map[s] = (SnapEntry)ref; | ||
46 | } | 68 | } |
47 | return nslots; | 69 | return n; |
48 | } | 70 | } |
49 | 71 | ||
50 | /* Add frame links at the end of the snapshot. */ | 72 | /* Add frame links at the end of the snapshot. */ |
@@ -53,17 +75,17 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map) | |||
53 | cTValue *frame = J->L->base - 1; | 75 | cTValue *frame = J->L->base - 1; |
54 | cTValue *lim = J->L->base - J->baseslot; | 76 | cTValue *lim = J->L->base - J->baseslot; |
55 | MSize f = 0; | 77 | MSize f = 0; |
56 | map[f++] = u32ptr(J->pc); | 78 | map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ |
57 | while (frame > lim) { | 79 | while (frame > lim) { /* Backwards traversal of all frames above base. */ |
58 | if (frame_islua(frame)) { | 80 | if (frame_islua(frame)) { |
59 | map[f++] = u32ptr(frame_pc(frame)); | 81 | map[f++] = SNAP_MKPC(frame_pc(frame)); |
60 | frame = frame_prevl(frame); | 82 | frame = frame_prevl(frame); |
61 | } else if (frame_ispcall(frame)) { | 83 | } else if (frame_ispcall(frame)) { |
62 | map[f++] = (uint32_t)frame_ftsz(frame); | 84 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); |
63 | frame = frame_prevd(frame); | 85 | frame = frame_prevd(frame); |
64 | } else if (frame_iscont(frame)) { | 86 | } else if (frame_iscont(frame)) { |
65 | map[f++] = (uint32_t)frame_ftsz(frame); | 87 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); |
66 | map[f++] = u32ptr(frame_contpc(frame)); | 88 | map[f++] = SNAP_MKPC(frame_contpc(frame)); |
67 | frame = frame_prevd(frame); | 89 | frame = frame_prevd(frame); |
68 | } else { | 90 | } else { |
69 | lua_assert(0); | 91 | lua_assert(0); |
@@ -76,28 +98,19 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map) | |||
76 | static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) | 98 | static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) |
77 | { | 99 | { |
78 | BCReg nslots = J->baseslot + J->maxslot; | 100 | BCReg nslots = J->baseslot + J->maxslot; |
79 | MSize nsm, nframelinks; | 101 | MSize nent, nframelinks; |
80 | SnapEntry *p; | 102 | SnapEntry *p; |
81 | /* Conservative estimate. Continuation frames need 2 slots. */ | 103 | /* Conservative estimate. Continuation frames need 2 slots. */ |
82 | nsm = nsnapmap + nslots + (uint32_t)J->framedepth*2+1; | 104 | lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth*2+1); |
83 | if (LJ_UNLIKELY(nsm > J->sizesnapmap)) { /* Need to grow snapshot map? */ | ||
84 | if (nsm < 2*J->sizesnapmap) | ||
85 | nsm = 2*J->sizesnapmap; | ||
86 | else if (nsm < 64) | ||
87 | nsm = 64; | ||
88 | J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, | ||
89 | J->sizesnapmap*sizeof(SnapEntry), nsm*sizeof(SnapEntry)); | ||
90 | J->cur.snapmap = J->snapmapbuf; | ||
91 | J->sizesnapmap = nsm; | ||
92 | } | ||
93 | p = &J->cur.snapmap[nsnapmap]; | 105 | p = &J->cur.snapmap[nsnapmap]; |
94 | nslots = snapshot_slots(J, p, nslots); | 106 | nent = snapshot_slots(J, p, nslots); |
95 | nframelinks = snapshot_framelinks(J, p + nslots); | 107 | nframelinks = snapshot_framelinks(J, p + nent); |
96 | J->cur.nsnapmap = (uint16_t)(nsnapmap + nslots + nframelinks); | 108 | J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + nframelinks); |
97 | snap->mapofs = (uint16_t)nsnapmap; | 109 | snap->mapofs = (uint16_t)nsnapmap; |
98 | snap->ref = (IRRef1)J->cur.nins; | 110 | snap->ref = (IRRef1)J->cur.nins; |
99 | snap->nslots = (uint8_t)nslots; | 111 | snap->nent = (uint8_t)nent; |
100 | snap->nframelinks = (uint8_t)nframelinks; | 112 | snap->nframelinks = (uint8_t)nframelinks; |
113 | snap->nslots = (uint8_t)nslots; | ||
101 | snap->count = 0; | 114 | snap->count = 0; |
102 | } | 115 | } |
103 | 116 | ||
@@ -111,14 +124,7 @@ void lj_snap_add(jit_State *J) | |||
111 | (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { | 124 | (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { |
112 | nsnapmap = J->cur.snap[--nsnap].mapofs; | 125 | nsnapmap = J->cur.snap[--nsnap].mapofs; |
113 | } else { | 126 | } else { |
114 | /* Need to grow snapshot buffer? */ | 127 | lj_snap_grow_buf(J, nsnap+1); |
115 | if (LJ_UNLIKELY(nsnap >= J->sizesnap)) { | ||
116 | MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; | ||
117 | if (nsnap >= maxsnap) | ||
118 | lj_trace_err(J, LJ_TRERR_SNAPOV); | ||
119 | lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); | ||
120 | J->cur.snap = J->snapbuf; | ||
121 | } | ||
122 | J->cur.nsnap = (uint16_t)(nsnap+1); | 128 | J->cur.nsnap = (uint16_t)(nsnap+1); |
123 | } | 129 | } |
124 | J->mergesnap = 0; | 130 | J->mergesnap = 0; |
@@ -131,14 +137,21 @@ void lj_snap_shrink(jit_State *J) | |||
131 | { | 137 | { |
132 | BCReg nslots = J->baseslot + J->maxslot; | 138 | BCReg nslots = J->baseslot + J->maxslot; |
133 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; | 139 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; |
134 | SnapEntry *oflinks = &J->cur.snapmap[snap->mapofs + snap->nslots]; | 140 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; |
135 | SnapEntry *nflinks = &J->cur.snapmap[snap->mapofs + nslots]; | 141 | MSize nent = snap->nent; |
136 | uint32_t s, nframelinks = snap->nframelinks; | ||
137 | lua_assert(nslots < snap->nslots); | 142 | lua_assert(nslots < snap->nslots); |
138 | snap->nslots = (uint8_t)nslots; | 143 | snap->nslots = (uint8_t)nslots; |
139 | J->cur.nsnapmap = (uint16_t)(snap->mapofs + nslots + nframelinks); | 144 | if (nent > 0 && snap_slot(map[nent-1]) >= nslots) { |
140 | for (s = 0; s < nframelinks; s++) /* Move frame links down. */ | 145 | MSize s, delta, nframelinks = snap->nframelinks; |
141 | nflinks[s] = oflinks[s]; | 146 | for (nent--; nent > 0 && snap_slot(map[nent-1]) >= nslots; nent--) |
147 | ; | ||
148 | delta = snap->nent - nent; | ||
149 | snap->nent = (uint8_t)nent; | ||
150 | J->cur.nsnapmap = (uint16_t)(snap->mapofs + nent + nframelinks); | ||
151 | map += nent; | ||
152 | for (s = 0; s < nframelinks; s++) /* Move frame links down. */ | ||
153 | map[s] = map[s+delta]; | ||
154 | } | ||
142 | } | 155 | } |
143 | 156 | ||
144 | /* -- Snapshot access ----------------------------------------------------- */ | 157 | /* -- Snapshot access ----------------------------------------------------- */ |
@@ -167,21 +180,24 @@ static RegSP snap_renameref(Trace *T, SnapNo lim, IRRef ref, RegSP rs) | |||
167 | return rs; | 180 | return rs; |
168 | } | 181 | } |
169 | 182 | ||
170 | /* Convert a snapshot into a linear slot -> RegSP map. */ | 183 | /* Convert a snapshot into a linear slot -> RegSP map. |
184 | ** Note: unused slots are not initialized! | ||
185 | */ | ||
171 | void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno) | 186 | void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno) |
172 | { | 187 | { |
173 | SnapShot *snap = &T->snap[snapno]; | 188 | SnapShot *snap = &T->snap[snapno]; |
174 | BCReg s, nslots = snap->nslots; | 189 | MSize n, nent = snap->nent; |
175 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 190 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
176 | BloomFilter rfilt = snap_renamefilter(T, snapno); | 191 | BloomFilter rfilt = snap_renamefilter(T, snapno); |
177 | for (s = 0; s < nslots; s++) { | 192 | for (n = 0; n < nent; n++) { |
178 | IRRef ref = snap_ref(map[s]); | 193 | SnapEntry sn = map[n]; |
194 | IRRef ref = snap_ref(sn); | ||
179 | if (!irref_isk(ref)) { | 195 | if (!irref_isk(ref)) { |
180 | IRIns *ir = &T->ir[ref]; | 196 | IRIns *ir = &T->ir[ref]; |
181 | uint32_t rs = ir->prev; | 197 | uint32_t rs = ir->prev; |
182 | if (bloomtest(rfilt, ref)) | 198 | if (bloomtest(rfilt, ref)) |
183 | rs = snap_renameref(T, snapno, ref, rs); | 199 | rs = snap_renameref(T, snapno, ref, rs); |
184 | rsmap[s] = (uint16_t)rs; | 200 | rsmap[snap_slot(sn)] = (uint16_t)rs; |
185 | } | 201 | } |
186 | } | 202 | } |
187 | } | 203 | } |
@@ -193,89 +209,88 @@ void lj_snap_restore(jit_State *J, void *exptr) | |||
193 | SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ | 209 | SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ |
194 | Trace *T = J->trace[J->parent]; | 210 | Trace *T = J->trace[J->parent]; |
195 | SnapShot *snap = &T->snap[snapno]; | 211 | SnapShot *snap = &T->snap[snapno]; |
196 | BCReg s, nslots = snap->nslots; | 212 | MSize n, nent = snap->nent; |
197 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 213 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
198 | SnapEntry *flinks = map + nslots + snap->nframelinks; | 214 | SnapEntry *flinks = map + nent + snap->nframelinks; |
199 | TValue *o, *newbase, *ntop; | 215 | BCReg nslots = snap->nslots; |
216 | TValue *frame; | ||
200 | BloomFilter rfilt = snap_renamefilter(T, snapno); | 217 | BloomFilter rfilt = snap_renamefilter(T, snapno); |
201 | lua_State *L = J->L; | 218 | lua_State *L = J->L; |
202 | 219 | ||
203 | /* Make sure the stack is big enough for the slots from the snapshot. */ | 220 | /* Make sure the stack is big enough for the slots from the snapshot. */ |
204 | if (L->base + nslots >= L->maxstack) { | 221 | if (LJ_UNLIKELY(L->base + nslots > L->maxstack)) { |
205 | L->top = curr_topL(L); | 222 | L->top = curr_topL(L); |
206 | lj_state_growstack(L, nslots - curr_proto(L)->framesize); | 223 | lj_state_growstack(L, nslots - curr_proto(L)->framesize); |
207 | } | 224 | } |
208 | 225 | ||
209 | /* Fill stack slots with data from the registers and spill slots. */ | 226 | /* Fill stack slots with data from the registers and spill slots. */ |
210 | newbase = NULL; | 227 | frame = L->base-1; |
211 | ntop = L->base; | 228 | for (n = 0; n < nent; n++) { |
212 | for (s = 0, o = L->base-1; s < nslots; s++, o++) { | 229 | IRRef ref = snap_ref(map[n]); |
213 | IRRef ref = snap_ref(map[s]); | 230 | BCReg s = snap_slot(map[n]); |
214 | if (ref) { | 231 | TValue *o = &frame[s]; /* Stack slots are relative to start frame. */ |
215 | IRIns *ir = &T->ir[ref]; | 232 | IRIns *ir = &T->ir[ref]; |
216 | if (irref_isk(ref)) { /* Restore constant slot. */ | 233 | if (irref_isk(ref)) { /* Restore constant slot. */ |
217 | lj_ir_kvalue(L, o, ir); | 234 | lj_ir_kvalue(L, o, ir); |
218 | } else { | 235 | } else { |
219 | IRType1 t = ir->t; | 236 | IRType1 t = ir->t; |
220 | RegSP rs = ir->prev; | 237 | RegSP rs = ir->prev; |
221 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) | 238 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) |
222 | rs = snap_renameref(T, snapno, ref, rs); | 239 | rs = snap_renameref(T, snapno, ref, rs); |
223 | if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ | 240 | if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ |
224 | int32_t *sps = &ex->spill[regsp_spill(rs)]; | 241 | int32_t *sps = &ex->spill[regsp_spill(rs)]; |
225 | if (irt_isinteger(t)) { | 242 | if (irt_isinteger(t)) { |
226 | setintV(o, *sps); | 243 | setintV(o, *sps); |
227 | } else if (irt_isnum(t)) { | 244 | } else if (irt_isnum(t)) { |
228 | o->u64 = *(uint64_t *)sps; | 245 | o->u64 = *(uint64_t *)sps; |
229 | } else { | 246 | } else { |
230 | lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ | 247 | lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ |
231 | setgcrefi(o->gcr, *sps); | 248 | setgcrefi(o->gcr, *sps); |
232 | setitype(o, irt_toitype(t)); | 249 | setitype(o, irt_toitype(t)); |
233 | } | 250 | } |
234 | } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */ | 251 | } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */ |
235 | Reg r = regsp_reg(rs); | 252 | Reg r = regsp_reg(rs); |
236 | if (irt_isinteger(t)) { | 253 | if (irt_isinteger(t)) { |
237 | setintV(o, ex->gpr[r-RID_MIN_GPR]); | 254 | setintV(o, ex->gpr[r-RID_MIN_GPR]); |
238 | } else if (irt_isnum(t)) { | 255 | } else if (irt_isnum(t)) { |
239 | setnumV(o, ex->fpr[r-RID_MIN_FPR]); | 256 | setnumV(o, ex->fpr[r-RID_MIN_FPR]); |
240 | } else { | 257 | } else { |
241 | if (!irt_ispri(t)) | 258 | if (!irt_ispri(t)) |
242 | setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); | 259 | setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); |
243 | setitype(o, irt_toitype(t)); | 260 | setitype(o, irt_toitype(t)); |
244 | } | 261 | } |
245 | } else { /* Restore frame slot. */ | 262 | } else { /* Restore frame slot. */ |
246 | lua_assert(ir->o == IR_FRAME); | 263 | lua_assert(ir->o == IR_FRAME); |
247 | /* This works for both PTR and FUNC IR_FRAME. */ | 264 | /* This works for both PTR and FUNC IR_FRAME. */ |
248 | setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void)); | 265 | setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void)); |
249 | if (s != 0) /* Do not overwrite link to previous frame. */ | 266 | if (s != 0) /* Do not overwrite link to previous frame. */ |
250 | o->fr.tp.ftsz = (int32_t)*--flinks; | 267 | o->fr.tp.ftsz = (int32_t)*--flinks; |
251 | if (irt_isfunc(ir->t)) { | 268 | if (irt_isfunc(ir->t)) { |
252 | GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); | 269 | GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); |
253 | if (isluafunc(fn)) { | 270 | if (isluafunc(fn)) { |
254 | TValue *fs; | 271 | MSize framesize = funcproto(fn)->framesize; |
255 | fs = o+1 + funcproto(fn)->framesize; | 272 | TValue *fs; |
256 | if (fs > ntop) ntop = fs; /* Update top for newly added frames. */ | 273 | L->base = ++o; |
257 | if (s != 0) newbase = o+1; | 274 | if (LJ_UNLIKELY(o + framesize > L->maxstack)) { /* Grow again? */ |
275 | ptrdiff_t fsave = savestack(L, frame); | ||
276 | L->top = o; | ||
277 | lj_state_growstack(L, framesize); | ||
278 | frame = restorestack(L, fsave); | ||
279 | o = L->top; | ||
258 | } | 280 | } |
281 | fs = o + framesize; | ||
282 | if (s == 0) /* Only partially clear tail call frame at #0. */ | ||
283 | o = &frame[nslots]; | ||
284 | while (o < fs) /* Clear slots of newly added frames. */ | ||
285 | setnilV(o++); | ||
259 | } | 286 | } |
260 | } | 287 | } |
261 | } | 288 | } |
262 | } else { | ||
263 | lua_assert(!newbase); | ||
264 | } | 289 | } |
265 | } | 290 | } |
266 | if (newbase) L->base = newbase; | ||
267 | if (ntop >= L->maxstack) { /* Need to grow the stack again. */ | ||
268 | MSize need = (MSize)(ntop - o); | ||
269 | L->top = o; | ||
270 | lj_state_growstack(L, need); | ||
271 | o = L->top; | ||
272 | ntop = o + need; | ||
273 | } | ||
274 | L->top = curr_topL(L); | 291 | L->top = curr_topL(L); |
275 | for (; o < ntop; o++) /* Clear remainder of newly added frames. */ | 292 | J->pc = snap_pc(*--flinks); |
276 | setnilV(o); | 293 | lua_assert(map + nent == flinks); |
277 | lua_assert(map + nslots == flinks-1); | ||
278 | J->pc = (const BCIns *)(uintptr_t)(*--flinks); | ||
279 | } | 294 | } |
280 | 295 | ||
281 | #undef IR | 296 | #undef IR |