summaryrefslogtreecommitdiff
path: root/src/lj_snap.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_snap.c')
-rw-r--r--src/lj_snap.c247
1 files changed, 131 insertions, 116 deletions
diff --git a/src/lj_snap.c b/src/lj_snap.c
index f262e1c9..d22c90a4 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -23,28 +23,50 @@
23/* Some local macros to save typing. Undef'd at the end. */ 23/* Some local macros to save typing. Undef'd at the end. */
24#define IR(ref) (&J->cur.ir[(ref)]) 24#define IR(ref) (&J->cur.ir[(ref)])
25 25
26/* -- Snapshot buffer allocation ------------------------------------------ */
27
28/* Grow snapshot buffer. */
29void lj_snap_grow_buf_(jit_State *J, MSize need)
30{
31 MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
32 if (need > maxsnap)
33 lj_trace_err(J, LJ_TRERR_SNAPOV);
34 lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
35 J->cur.snap = J->snapbuf;
36}
37
38/* Grow snapshot map buffer. */
39void lj_snap_grow_map_(jit_State *J, MSize need)
40{
41 if (need < 2*J->sizesnapmap)
42 need = 2*J->sizesnapmap;
43 else if (need < 64)
44 need = 64;
45 J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
46 J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
47 J->cur.snapmap = J->snapmapbuf;
48 J->sizesnapmap = need;
49}
50
26/* -- Snapshot generation ------------------------------------------------- */ 51/* -- Snapshot generation ------------------------------------------------- */
27 52
28/* NYI: Snapshots are in need of a redesign. The current storage model for 53/* NYI: IR_FRAME should be eliminated, too. */
29** snapshot maps is too wasteful. They could be compressed (1D or 2D) and
30** made more flexible at the same time. Iterators should no longer need to
31** skip unmodified slots. IR_FRAME should be eliminated, too.
32*/
33 54
34/* Add all modified slots to the snapshot. */ 55/* Add all modified slots to the snapshot. */
35static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) 56static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
36{ 57{
37 BCReg s; 58 BCReg s;
59 MSize n = 0;
38 for (s = 0; s < nslots; s++) { 60 for (s = 0; s < nslots; s++) {
39 IRRef ref = tref_ref(J->slot[s]); 61 IRRef ref = tref_ref(J->slot[s]);
40 if (ref) { 62 if (ref) {
41 IRIns *ir = IR(ref); 63 IRIns *ir = IR(ref);
42 if (ir->o == IR_SLOAD && ir->op1 == s && !(ir->op2 & IRSLOAD_INHERIT)) 64 if (!(ir->o == IR_SLOAD && ir->op1 == s &&
43 ref = 0; 65 !(ir->op2 & IRSLOAD_INHERIT)))
66 map[n++] = SNAP(s, ir->o == IR_FRAME ? SNAP_FRAME : 0, ref);
44 } 67 }
45 map[s] = (SnapEntry)ref;
46 } 68 }
47 return nslots; 69 return n;
48} 70}
49 71
50/* Add frame links at the end of the snapshot. */ 72/* Add frame links at the end of the snapshot. */
@@ -53,17 +75,17 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map)
53 cTValue *frame = J->L->base - 1; 75 cTValue *frame = J->L->base - 1;
54 cTValue *lim = J->L->base - J->baseslot; 76 cTValue *lim = J->L->base - J->baseslot;
55 MSize f = 0; 77 MSize f = 0;
56 map[f++] = u32ptr(J->pc); 78 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
57 while (frame > lim) { 79 while (frame > lim) { /* Backwards traversal of all frames above base. */
58 if (frame_islua(frame)) { 80 if (frame_islua(frame)) {
59 map[f++] = u32ptr(frame_pc(frame)); 81 map[f++] = SNAP_MKPC(frame_pc(frame));
60 frame = frame_prevl(frame); 82 frame = frame_prevl(frame);
61 } else if (frame_ispcall(frame)) { 83 } else if (frame_ispcall(frame)) {
62 map[f++] = (uint32_t)frame_ftsz(frame); 84 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
63 frame = frame_prevd(frame); 85 frame = frame_prevd(frame);
64 } else if (frame_iscont(frame)) { 86 } else if (frame_iscont(frame)) {
65 map[f++] = (uint32_t)frame_ftsz(frame); 87 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
66 map[f++] = u32ptr(frame_contpc(frame)); 88 map[f++] = SNAP_MKPC(frame_contpc(frame));
67 frame = frame_prevd(frame); 89 frame = frame_prevd(frame);
68 } else { 90 } else {
69 lua_assert(0); 91 lua_assert(0);
@@ -76,28 +98,19 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map)
76static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) 98static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
77{ 99{
78 BCReg nslots = J->baseslot + J->maxslot; 100 BCReg nslots = J->baseslot + J->maxslot;
79 MSize nsm, nframelinks; 101 MSize nent, nframelinks;
80 SnapEntry *p; 102 SnapEntry *p;
81 /* Conservative estimate. Continuation frames need 2 slots. */ 103 /* Conservative estimate. Continuation frames need 2 slots. */
82 nsm = nsnapmap + nslots + (uint32_t)J->framedepth*2+1; 104 lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth*2+1);
83 if (LJ_UNLIKELY(nsm > J->sizesnapmap)) { /* Need to grow snapshot map? */
84 if (nsm < 2*J->sizesnapmap)
85 nsm = 2*J->sizesnapmap;
86 else if (nsm < 64)
87 nsm = 64;
88 J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
89 J->sizesnapmap*sizeof(SnapEntry), nsm*sizeof(SnapEntry));
90 J->cur.snapmap = J->snapmapbuf;
91 J->sizesnapmap = nsm;
92 }
93 p = &J->cur.snapmap[nsnapmap]; 105 p = &J->cur.snapmap[nsnapmap];
94 nslots = snapshot_slots(J, p, nslots); 106 nent = snapshot_slots(J, p, nslots);
95 nframelinks = snapshot_framelinks(J, p + nslots); 107 nframelinks = snapshot_framelinks(J, p + nent);
96 J->cur.nsnapmap = (uint16_t)(nsnapmap + nslots + nframelinks); 108 J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + nframelinks);
97 snap->mapofs = (uint16_t)nsnapmap; 109 snap->mapofs = (uint16_t)nsnapmap;
98 snap->ref = (IRRef1)J->cur.nins; 110 snap->ref = (IRRef1)J->cur.nins;
99 snap->nslots = (uint8_t)nslots; 111 snap->nent = (uint8_t)nent;
100 snap->nframelinks = (uint8_t)nframelinks; 112 snap->nframelinks = (uint8_t)nframelinks;
113 snap->nslots = (uint8_t)nslots;
101 snap->count = 0; 114 snap->count = 0;
102} 115}
103 116
@@ -111,14 +124,7 @@ void lj_snap_add(jit_State *J)
111 (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { 124 (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) {
112 nsnapmap = J->cur.snap[--nsnap].mapofs; 125 nsnapmap = J->cur.snap[--nsnap].mapofs;
113 } else { 126 } else {
114 /* Need to grow snapshot buffer? */ 127 lj_snap_grow_buf(J, nsnap+1);
115 if (LJ_UNLIKELY(nsnap >= J->sizesnap)) {
116 MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
117 if (nsnap >= maxsnap)
118 lj_trace_err(J, LJ_TRERR_SNAPOV);
119 lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
120 J->cur.snap = J->snapbuf;
121 }
122 J->cur.nsnap = (uint16_t)(nsnap+1); 128 J->cur.nsnap = (uint16_t)(nsnap+1);
123 } 129 }
124 J->mergesnap = 0; 130 J->mergesnap = 0;
@@ -131,14 +137,21 @@ void lj_snap_shrink(jit_State *J)
131{ 137{
132 BCReg nslots = J->baseslot + J->maxslot; 138 BCReg nslots = J->baseslot + J->maxslot;
133 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; 139 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
134 SnapEntry *oflinks = &J->cur.snapmap[snap->mapofs + snap->nslots]; 140 SnapEntry *map = &J->cur.snapmap[snap->mapofs];
135 SnapEntry *nflinks = &J->cur.snapmap[snap->mapofs + nslots]; 141 MSize nent = snap->nent;
136 uint32_t s, nframelinks = snap->nframelinks;
137 lua_assert(nslots < snap->nslots); 142 lua_assert(nslots < snap->nslots);
138 snap->nslots = (uint8_t)nslots; 143 snap->nslots = (uint8_t)nslots;
139 J->cur.nsnapmap = (uint16_t)(snap->mapofs + nslots + nframelinks); 144 if (nent > 0 && snap_slot(map[nent-1]) >= nslots) {
140 for (s = 0; s < nframelinks; s++) /* Move frame links down. */ 145 MSize s, delta, nframelinks = snap->nframelinks;
141 nflinks[s] = oflinks[s]; 146 for (nent--; nent > 0 && snap_slot(map[nent-1]) >= nslots; nent--)
147 ;
148 delta = snap->nent - nent;
149 snap->nent = (uint8_t)nent;
150 J->cur.nsnapmap = (uint16_t)(snap->mapofs + nent + nframelinks);
151 map += nent;
152 for (s = 0; s < nframelinks; s++) /* Move frame links down. */
153 map[s] = map[s+delta];
154 }
142} 155}
143 156
144/* -- Snapshot access ----------------------------------------------------- */ 157/* -- Snapshot access ----------------------------------------------------- */
@@ -167,21 +180,24 @@ static RegSP snap_renameref(Trace *T, SnapNo lim, IRRef ref, RegSP rs)
167 return rs; 180 return rs;
168} 181}
169 182
170/* Convert a snapshot into a linear slot -> RegSP map. */ 183/* Convert a snapshot into a linear slot -> RegSP map.
184** Note: unused slots are not initialized!
185*/
171void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno) 186void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno)
172{ 187{
173 SnapShot *snap = &T->snap[snapno]; 188 SnapShot *snap = &T->snap[snapno];
174 BCReg s, nslots = snap->nslots; 189 MSize n, nent = snap->nent;
175 SnapEntry *map = &T->snapmap[snap->mapofs]; 190 SnapEntry *map = &T->snapmap[snap->mapofs];
176 BloomFilter rfilt = snap_renamefilter(T, snapno); 191 BloomFilter rfilt = snap_renamefilter(T, snapno);
177 for (s = 0; s < nslots; s++) { 192 for (n = 0; n < nent; n++) {
178 IRRef ref = snap_ref(map[s]); 193 SnapEntry sn = map[n];
194 IRRef ref = snap_ref(sn);
179 if (!irref_isk(ref)) { 195 if (!irref_isk(ref)) {
180 IRIns *ir = &T->ir[ref]; 196 IRIns *ir = &T->ir[ref];
181 uint32_t rs = ir->prev; 197 uint32_t rs = ir->prev;
182 if (bloomtest(rfilt, ref)) 198 if (bloomtest(rfilt, ref))
183 rs = snap_renameref(T, snapno, ref, rs); 199 rs = snap_renameref(T, snapno, ref, rs);
184 rsmap[s] = (uint16_t)rs; 200 rsmap[snap_slot(sn)] = (uint16_t)rs;
185 } 201 }
186 } 202 }
187} 203}
@@ -193,89 +209,88 @@ void lj_snap_restore(jit_State *J, void *exptr)
193 SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ 209 SnapNo snapno = J->exitno; /* For now, snapno == exitno. */
194 Trace *T = J->trace[J->parent]; 210 Trace *T = J->trace[J->parent];
195 SnapShot *snap = &T->snap[snapno]; 211 SnapShot *snap = &T->snap[snapno];
196 BCReg s, nslots = snap->nslots; 212 MSize n, nent = snap->nent;
197 SnapEntry *map = &T->snapmap[snap->mapofs]; 213 SnapEntry *map = &T->snapmap[snap->mapofs];
198 SnapEntry *flinks = map + nslots + snap->nframelinks; 214 SnapEntry *flinks = map + nent + snap->nframelinks;
199 TValue *o, *newbase, *ntop; 215 BCReg nslots = snap->nslots;
216 TValue *frame;
200 BloomFilter rfilt = snap_renamefilter(T, snapno); 217 BloomFilter rfilt = snap_renamefilter(T, snapno);
201 lua_State *L = J->L; 218 lua_State *L = J->L;
202 219
203 /* Make sure the stack is big enough for the slots from the snapshot. */ 220 /* Make sure the stack is big enough for the slots from the snapshot. */
204 if (L->base + nslots >= L->maxstack) { 221 if (LJ_UNLIKELY(L->base + nslots > L->maxstack)) {
205 L->top = curr_topL(L); 222 L->top = curr_topL(L);
206 lj_state_growstack(L, nslots - curr_proto(L)->framesize); 223 lj_state_growstack(L, nslots - curr_proto(L)->framesize);
207 } 224 }
208 225
209 /* Fill stack slots with data from the registers and spill slots. */ 226 /* Fill stack slots with data from the registers and spill slots. */
210 newbase = NULL; 227 frame = L->base-1;
211 ntop = L->base; 228 for (n = 0; n < nent; n++) {
212 for (s = 0, o = L->base-1; s < nslots; s++, o++) { 229 IRRef ref = snap_ref(map[n]);
213 IRRef ref = snap_ref(map[s]); 230 BCReg s = snap_slot(map[n]);
214 if (ref) { 231 TValue *o = &frame[s]; /* Stack slots are relative to start frame. */
215 IRIns *ir = &T->ir[ref]; 232 IRIns *ir = &T->ir[ref];
216 if (irref_isk(ref)) { /* Restore constant slot. */ 233 if (irref_isk(ref)) { /* Restore constant slot. */
217 lj_ir_kvalue(L, o, ir); 234 lj_ir_kvalue(L, o, ir);
218 } else { 235 } else {
219 IRType1 t = ir->t; 236 IRType1 t = ir->t;
220 RegSP rs = ir->prev; 237 RegSP rs = ir->prev;
221 if (LJ_UNLIKELY(bloomtest(rfilt, ref))) 238 if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
222 rs = snap_renameref(T, snapno, ref, rs); 239 rs = snap_renameref(T, snapno, ref, rs);
223 if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ 240 if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
224 int32_t *sps = &ex->spill[regsp_spill(rs)]; 241 int32_t *sps = &ex->spill[regsp_spill(rs)];
225 if (irt_isinteger(t)) { 242 if (irt_isinteger(t)) {
226 setintV(o, *sps); 243 setintV(o, *sps);
227 } else if (irt_isnum(t)) { 244 } else if (irt_isnum(t)) {
228 o->u64 = *(uint64_t *)sps; 245 o->u64 = *(uint64_t *)sps;
229 } else { 246 } else {
230 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ 247 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
231 setgcrefi(o->gcr, *sps); 248 setgcrefi(o->gcr, *sps);
232 setitype(o, irt_toitype(t)); 249 setitype(o, irt_toitype(t));
233 } 250 }
234 } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */ 251 } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */
235 Reg r = regsp_reg(rs); 252 Reg r = regsp_reg(rs);
236 if (irt_isinteger(t)) { 253 if (irt_isinteger(t)) {
237 setintV(o, ex->gpr[r-RID_MIN_GPR]); 254 setintV(o, ex->gpr[r-RID_MIN_GPR]);
238 } else if (irt_isnum(t)) { 255 } else if (irt_isnum(t)) {
239 setnumV(o, ex->fpr[r-RID_MIN_FPR]); 256 setnumV(o, ex->fpr[r-RID_MIN_FPR]);
240 } else { 257 } else {
241 if (!irt_ispri(t)) 258 if (!irt_ispri(t))
242 setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); 259 setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
243 setitype(o, irt_toitype(t)); 260 setitype(o, irt_toitype(t));
244 } 261 }
245 } else { /* Restore frame slot. */ 262 } else { /* Restore frame slot. */
246 lua_assert(ir->o == IR_FRAME); 263 lua_assert(ir->o == IR_FRAME);
247 /* This works for both PTR and FUNC IR_FRAME. */ 264 /* This works for both PTR and FUNC IR_FRAME. */
248 setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void)); 265 setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void));
249 if (s != 0) /* Do not overwrite link to previous frame. */ 266 if (s != 0) /* Do not overwrite link to previous frame. */
250 o->fr.tp.ftsz = (int32_t)*--flinks; 267 o->fr.tp.ftsz = (int32_t)*--flinks;
251 if (irt_isfunc(ir->t)) { 268 if (irt_isfunc(ir->t)) {
252 GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); 269 GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr));
253 if (isluafunc(fn)) { 270 if (isluafunc(fn)) {
254 TValue *fs; 271 MSize framesize = funcproto(fn)->framesize;
255 fs = o+1 + funcproto(fn)->framesize; 272 TValue *fs;
256 if (fs > ntop) ntop = fs; /* Update top for newly added frames. */ 273 L->base = ++o;
257 if (s != 0) newbase = o+1; 274 if (LJ_UNLIKELY(o + framesize > L->maxstack)) { /* Grow again? */
275 ptrdiff_t fsave = savestack(L, frame);
276 L->top = o;
277 lj_state_growstack(L, framesize);
278 frame = restorestack(L, fsave);
279 o = L->top;
258 } 280 }
281 fs = o + framesize;
282 if (s == 0) /* Only partially clear tail call frame at #0. */
283 o = &frame[nslots];
284 while (o < fs) /* Clear slots of newly added frames. */
285 setnilV(o++);
259 } 286 }
260 } 287 }
261 } 288 }
262 } else {
263 lua_assert(!newbase);
264 } 289 }
265 } 290 }
266 if (newbase) L->base = newbase;
267 if (ntop >= L->maxstack) { /* Need to grow the stack again. */
268 MSize need = (MSize)(ntop - o);
269 L->top = o;
270 lj_state_growstack(L, need);
271 o = L->top;
272 ntop = o + need;
273 }
274 L->top = curr_topL(L); 291 L->top = curr_topL(L);
275 for (; o < ntop; o++) /* Clear remainder of newly added frames. */ 292 J->pc = snap_pc(*--flinks);
276 setnilV(o); 293 lua_assert(map + nent == flinks);
277 lua_assert(map + nslots == flinks-1);
278 J->pc = (const BCIns *)(uintptr_t)(*--flinks);
279} 294}
280 295
281#undef IR 296#undef IR