aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2011-02-22 22:39:12 +0100
committerMike Pall <mike>2011-02-22 22:39:12 +0100
commit4c97cc773091d3a7b523283b178ab53451583ca4 (patch)
treee4861f7359c30a5802a7f7d32277da3af0f15e79
parent8d0b073ff0506b94fd0586f96ae6967cf8167290 (diff)
downloadluajit-4c97cc773091d3a7b523283b178ab53451583ca4.tar.gz
luajit-4c97cc773091d3a7b523283b178ab53451583ca4.tar.bz2
luajit-4c97cc773091d3a7b523283b178ab53451583ca4.zip
Eliminate dead slots in snapshots using bytecode data-flow analysis.
-rw-r--r--src/lj_bc.h2
-rw-r--r--src/lj_record.c8
-rw-r--r--src/lj_snap.c143
-rw-r--r--src/lj_snap.h1
4 files changed, 133 insertions, 21 deletions
diff --git a/src/lj_bc.h b/src/lj_bc.h
index 32c8f73c..9dffe0c0 100644
--- a/src/lj_bc.h
+++ b/src/lj_bc.h
@@ -156,8 +156,8 @@
156 _(CALLT, base, ___, lit, call) \ 156 _(CALLT, base, ___, lit, call) \
157 _(ITERC, base, lit, lit, call) \ 157 _(ITERC, base, lit, lit, call) \
158 _(ITERN, base, lit, lit, call) \ 158 _(ITERN, base, lit, lit, call) \
159 _(ISNEXT, base, ___, jump, ___) \
160 _(VARG, base, lit, lit, ___) \ 159 _(VARG, base, lit, lit, ___) \
160 _(ISNEXT, base, ___, jump, ___) \
161 \ 161 \
162 /* Returns. */ \ 162 /* Returns. */ \
163 _(RETM, base, ___, lit, ___) \ 163 _(RETM, base, ___, lit, ___) \
diff --git a/src/lj_record.c b/src/lj_record.c
index 6517a1b7..cfdd3e1a 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -1364,11 +1364,8 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond)
1364 /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ 1364 /* Set PC to opposite target to avoid re-recording the comp. in side trace. */
1365 J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); 1365 J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc);
1366 J->needsnap = 1; 1366 J->needsnap = 1;
1367 /* Shrink last snapshot if possible. */ 1367 if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins);
1368 if (bc_a(jmpins) < J->maxslot) { 1368 lj_snap_shrink(J); /* Shrink last snapshot if possible. */
1369 J->maxslot = bc_a(jmpins);
1370 lj_snap_shrink(J);
1371 }
1372} 1369}
1373 1370
1374/* Record the next bytecode instruction (_before_ it's executed). */ 1371/* Record the next bytecode instruction (_before_ it's executed). */
@@ -1411,6 +1408,7 @@ void lj_record_ins(jit_State *J)
1411 /* Need snapshot before recording next bytecode (e.g. after a store). */ 1408 /* Need snapshot before recording next bytecode (e.g. after a store). */
1412 if (J->needsnap) { 1409 if (J->needsnap) {
1413 J->needsnap = 0; 1410 J->needsnap = 0;
1411 lj_snap_purge(J);
1414 lj_snap_add(J); 1412 lj_snap_add(J);
1415 J->mergesnap = 1; 1413 J->mergesnap = 1;
1416 } 1414 }
diff --git a/src/lj_snap.c b/src/lj_snap.c
index e04da81f..59435b20 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -13,6 +13,7 @@
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_state.h" 14#include "lj_state.h"
15#include "lj_frame.h" 15#include "lj_frame.h"
16#include "lj_bc.h"
16#include "lj_ir.h" 17#include "lj_ir.h"
17#include "lj_jit.h" 18#include "lj_jit.h"
18#include "lj_iropt.h" 19#include "lj_iropt.h"
@@ -138,27 +139,139 @@ void lj_snap_add(jit_State *J)
138 snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap); 139 snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap);
139} 140}
140 141
142/* -- Snapshot modification ----------------------------------------------- */
143
144#define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
145
146/* Find unused slots with reaching-definitions bytecode data-flow analysis. */
147static BCReg snap_usedef(jit_State *J, uint8_t *udf,
148 const BCIns *pc, BCReg maxslot)
149{
150 BCReg s;
151 GCobj *o;
152
153 if (maxslot == 0) return 0;
154#ifdef LUAJIT_USE_VALGRIND
155 /* Avoid errors for harmless reads beyond maxslot. */
156 memset(udf, 1, SNAP_USEDEF_SLOTS);
157#else
158 memset(udf, 1, maxslot);
159#endif
160
161 /* Treat open upvalues as used. */
162 o = gcref(J->L->openupval);
163 while (o) {
164 if (uvval(gco2uv(o)) < J->L->base) break;
165 udf[uvval(gco2uv(o)) - J->L->base] = 0;
166 o = gcref(o->gch.nextgc);
167 }
168
169#define USE_SLOT(s) udf[(s)] &= ~1
170#define DEF_SLOT(s) udf[(s)] *= 3
171
172 /* Scan through following bytecode and check for uses/defs. */
173 lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
174 for (;;) {
175 BCIns ins = *pc++;
176 BCOp op = bc_op(ins);
177 switch (bcmode_b(op)) {
178 case BCMvar: USE_SLOT(bc_b(ins)); break;
179 default: break;
180 }
181 switch (bcmode_c(op)) {
182 case BCMvar: USE_SLOT(bc_c(ins)); break;
183 case BCMrbase:
184 lua_assert(op == BC_CAT);
185 for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
186 for (; s < maxslot; s++) DEF_SLOT(s);
187 break;
188 case BCMjump:
189 handle_jump: {
190 BCReg minslot = bc_a(ins);
191 if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT;
192 else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-1])-1;
193 for (s = minslot; s < maxslot; s++) DEF_SLOT(s);
194 return minslot < maxslot ? minslot : maxslot;
195 }
196 case BCMlit:
197 if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
198 goto handle_jump;
199 } else if (bc_isret(op)) {
200 BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1);
201 for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
202 for (; s < top; s++) USE_SLOT(s);
203 for (; s < maxslot; s++) DEF_SLOT(s);
204 return 0;
205 }
206 break;
207 case BCMfunc: return maxslot; /* NYI: will abort, anyway. */
208 default: break;
209 }
210 switch (bcmode_a(op)) {
211 case BCMvar: USE_SLOT(bc_a(ins)); break;
212 case BCMdst:
213 if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins));
214 break;
215 case BCMbase:
216 if (op >= BC_CALLM && op <= BC_VARG) {
217 BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
218 maxslot : (bc_a(ins) + bc_c(ins));
219 for (s = bc_a(ins); s < top; s++) USE_SLOT(s);
220 for (; s < maxslot; s++) DEF_SLOT(s);
221 if (op == BC_CALLT || op == BC_CALLMT) {
222 for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
223 return 0;
224 }
225 } else if (op == BC_KNIL) {
226 for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s);
227 } else if (op == BC_TSETM) {
228 for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s);
229 }
230 break;
231 default: break;
232 }
233 lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
234 }
235
236#undef USE_SLOT
237#undef DEF_SLOT
238
239 return 0; /* unreachable */
240}
241
242/* Purge dead slots before the next snapshot. */
243void lj_snap_purge(jit_State *J)
244{
245 uint8_t udf[SNAP_USEDEF_SLOTS];
246 BCReg maxslot = J->maxslot;
247 BCReg s = snap_usedef(J, udf, J->pc, maxslot);
248 for (; s < maxslot; s++)
249 if (udf[s] != 0)
250 J->base[s] = 0; /* Purge dead slots. */
251}
252
141/* Shrink last snapshot. */ 253/* Shrink last snapshot. */
142void lj_snap_shrink(jit_State *J) 254void lj_snap_shrink(jit_State *J)
143{ 255{
144 BCReg nslots = J->baseslot + J->maxslot;
145 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; 256 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
146 SnapEntry *map = &J->cur.snapmap[snap->mapofs]; 257 SnapEntry *map = &J->cur.snapmap[snap->mapofs];
147 MSize nent = snap->nent; 258 MSize n, m, nlim, nent = snap->nent;
148 lua_assert(nslots < snap->nslots); 259 uint8_t udf[SNAP_USEDEF_SLOTS];
149 snap->nslots = (uint8_t)nslots; 260 BCReg maxslot = J->maxslot;
150 if (nent > 0 && snap_slot(map[nent-1]) >= nslots) { 261 BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
151 MSize s, delta, depth = snap->depth; 262 BCReg baseslot = J->baseslot;
152 lua_assert(depth == (MSize)J->framedepth); 263 maxslot += baseslot;
153 for (nent--; nent > 0 && snap_slot(map[nent-1]) >= nslots; nent--) 264 minslot += baseslot;
154 ; 265 snap->nslots = (uint8_t)maxslot;
155 delta = snap->nent - nent; 266 for (n = m = 0; n < nent; n++) { /* Remove unused slots from snapshot. */
156 snap->nent = (uint8_t)nent; 267 BCReg s = snap_slot(map[n]);
157 J->cur.nsnapmap = (uint16_t)(snap->mapofs + nent + 1 + depth); 268 if (s < minslot || (s < maxslot && udf[s-baseslot] == 0))
158 map += nent; 269 map[m++] = map[n]; /* Only copy used slots. */
159 for (s = 0; s <= depth; s++) /* Move PC + frame links down. */
160 map[s] = map[s+delta];
161 } 270 }
271 snap->nent = (uint8_t)m;
272 nlim = nent + snap->depth;
273 while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */
274 J->cur.nsnapmap = (uint16_t)(snap->mapofs + m); /* Free up space in map. */
162} 275}
163 276
164/* -- Snapshot access ----------------------------------------------------- */ 277/* -- Snapshot access ----------------------------------------------------- */
diff --git a/src/lj_snap.h b/src/lj_snap.h
index e4c11a8c..031b0ac3 100644
--- a/src/lj_snap.h
+++ b/src/lj_snap.h
@@ -11,6 +11,7 @@
11 11
12#if LJ_HASJIT 12#if LJ_HASJIT
13LJ_FUNC void lj_snap_add(jit_State *J); 13LJ_FUNC void lj_snap_add(jit_State *J);
14LJ_FUNC void lj_snap_purge(jit_State *J);
14LJ_FUNC void lj_snap_shrink(jit_State *J); 15LJ_FUNC void lj_snap_shrink(jit_State *J);
15LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno); 16LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno);
16LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr); 17LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr);