aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2011-11-20 19:14:39 +0100
committerMike Pall <mike>2011-11-20 19:16:40 +0100
commitcecbe3c15fc62921098f3468b9de86cf0b631b9e (patch)
treed1b18c59652a3e163ce050e58b7b647fdf312271
parentcc7a12be93660677a5128124c11c68127cfe33eb (diff)
downloadluajit-cecbe3c15fc62921098f3468b9de86cf0b631b9e.tar.gz
luajit-cecbe3c15fc62921098f3468b9de86cf0b631b9e.tar.bz2
luajit-cecbe3c15fc62921098f3468b9de86cf0b631b9e.zip
Specialize to prototype for non-monomorphic functions.
Solves the trace-explosion problem with closure-heavy programming.
-rw-r--r--doc/status.html8
-rw-r--r--src/lj_func.c4
-rw-r--r--src/lj_ir.h1
-rw-r--r--src/lj_obj.h3
-rw-r--r--src/lj_parse.c2
-rw-r--r--src/lj_record.c28
6 files changed, 31 insertions, 15 deletions
diff --git a/doc/status.html b/doc/status.html
index d386e1aa..5f8fa463 100644
--- a/doc/status.html
+++ b/doc/status.html
@@ -148,14 +148,6 @@ with me, before writing major improvements, to avoid duplication of
148effort. 148effort.
149</li> 149</li>
150<li> 150<li>
151The trace compiler currently doesn't back off specialization for
152function call dispatch. It should really fall back to specializing on
153the prototype, not the closure identity. This can lead to the so-called
154"trace explosion" problem with <b>closure-heavy programming</b>. The
155trace linking heuristics prevent this, but in the worst case this
156means the code always falls back to the interpreter.
157</li>
158<li>
159<b>Trace management</b> needs more tuning: less drastic countermeasures 151<b>Trace management</b> needs more tuning: less drastic countermeasures
160against trace explosion and better heuristics in general. 152against trace explosion and better heuristics in general.
161</li> 153</li>
diff --git a/src/lj_func.c b/src/lj_func.c
index 334ba4c8..97be0a2a 100644
--- a/src/lj_func.c
+++ b/src/lj_func.c
@@ -118,6 +118,7 @@ GCfunc *lj_func_newC(lua_State *L, MSize nelems, GCtab *env)
118 118
119static GCfunc *func_newL(lua_State *L, GCproto *pt, GCtab *env) 119static GCfunc *func_newL(lua_State *L, GCproto *pt, GCtab *env)
120{ 120{
121 uint32_t count;
121 GCfunc *fn = (GCfunc *)lj_mem_newgco(L, sizeLfunc((MSize)pt->sizeuv)); 122 GCfunc *fn = (GCfunc *)lj_mem_newgco(L, sizeLfunc((MSize)pt->sizeuv));
122 fn->l.gct = ~LJ_TFUNC; 123 fn->l.gct = ~LJ_TFUNC;
123 fn->l.ffid = FF_LUA; 124 fn->l.ffid = FF_LUA;
@@ -125,6 +126,9 @@ static GCfunc *func_newL(lua_State *L, GCproto *pt, GCtab *env)
125 /* NOBARRIER: Really a setgcref. But the GCfunc is new (marked white). */ 126 /* NOBARRIER: Really a setgcref. But the GCfunc is new (marked white). */
126 setmref(fn->l.pc, proto_bc(pt)); 127 setmref(fn->l.pc, proto_bc(pt));
127 setgcref(fn->l.env, obj2gco(env)); 128 setgcref(fn->l.env, obj2gco(env));
129 /* Saturating 3 bit counter (0..7) for created closures. */
130 count = (uint32_t)pt->flags + PROTO_CLCOUNT;
131 pt->flags = (uint8_t)(count - ((count >> PROTO_CLC_BITS) & PROTO_CLCOUNT));
128 return fn; 132 return fn;
129} 133}
130 134
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 7a9f1a9c..8cf8129f 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -183,6 +183,7 @@ IRFPMDEF(FPMENUM)
183#define IRFLDEF(_) \ 183#define IRFLDEF(_) \
184 _(STR_LEN, offsetof(GCstr, len)) \ 184 _(STR_LEN, offsetof(GCstr, len)) \
185 _(FUNC_ENV, offsetof(GCfunc, l.env)) \ 185 _(FUNC_ENV, offsetof(GCfunc, l.env)) \
186 _(FUNC_PC, offsetof(GCfunc, l.pc)) \
186 _(TAB_META, offsetof(GCtab, metatable)) \ 187 _(TAB_META, offsetof(GCtab, metatable)) \
187 _(TAB_ARRAY, offsetof(GCtab, array)) \ 188 _(TAB_ARRAY, offsetof(GCtab, array)) \
188 _(TAB_NODE, offsetof(GCtab, node)) \ 189 _(TAB_NODE, offsetof(GCtab, node)) \
diff --git a/src/lj_obj.h b/src/lj_obj.h
index afb29d0f..4a360df6 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -318,6 +318,9 @@ typedef struct GCproto {
318/* Only used during parsing. */ 318/* Only used during parsing. */
319#define PROTO_HAS_RETURN 0x20 /* Already emitted a return. */ 319#define PROTO_HAS_RETURN 0x20 /* Already emitted a return. */
320#define PROTO_FIXUP_RETURN 0x40 /* Need to fixup emitted returns. */ 320#define PROTO_FIXUP_RETURN 0x40 /* Need to fixup emitted returns. */
321/* Top bits used for counting created closures. */
322#define PROTO_CLCOUNT 0x20 /* Base of saturating 3 bit counter. */
323#define PROTO_CLC_BITS 3
321 324
322#define proto_kgc(pt, idx) \ 325#define proto_kgc(pt, idx) \
323 check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \ 326 check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \
diff --git a/src/lj_parse.c b/src/lj_parse.c
index f0bb4419..4b8a8e6f 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -1392,7 +1392,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
1392 pt->gct = ~LJ_TPROTO; 1392 pt->gct = ~LJ_TPROTO;
1393 pt->sizept = (MSize)sizept; 1393 pt->sizept = (MSize)sizept;
1394 pt->trace = 0; 1394 pt->trace = 0;
1395 pt->flags = fs->flags; 1395 pt->flags = (uint8_t)(fs->flags & ~(PROTO_HAS_RETURN|PROTO_FIXUP_RETURN));
1396 pt->numparams = fs->numparams; 1396 pt->numparams = fs->numparams;
1397 pt->framesize = fs->framesize; 1397 pt->framesize = fs->framesize;
1398 setgcref(pt->chunkname, obj2gco(ls->chunkname)); 1398 setgcref(pt->chunkname, obj2gco(ls->chunkname));
diff --git a/src/lj_record.c b/src/lj_record.c
index 61e09b6d..a76f5d94 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -563,12 +563,32 @@ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev)
563 563
564/* -- Record calls and returns -------------------------------------------- */ 564/* -- Record calls and returns -------------------------------------------- */
565 565
566/* Specialize to the runtime value of the called function or its prototype. */
567static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr)
568{
569 TRef kfunc;
570 if (isluafunc(fn)) {
571 GCproto *pt = funcproto(fn);
572 /* 3 or more closures created? Probably not a monomorphic function. */
573 if (pt->flags >= 3*PROTO_CLCOUNT) { /* Specialize to prototype instead. */
574 TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC);
575 emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt)));
576 (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */
577 return tr;
578 }
579 }
580 /* Otherwise specialize to the function (closure) value itself. */
581 kfunc = lj_ir_kfunc(J, fn);
582 emitir(IRTG(IR_EQ, IRT_FUNC), tr, kfunc);
583 return kfunc;
584}
585
566/* Record call setup. */ 586/* Record call setup. */
567static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) 587static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
568{ 588{
569 RecordIndex ix; 589 RecordIndex ix;
570 TValue *functv = &J->L->base[func]; 590 TValue *functv = &J->L->base[func];
571 TRef trfunc, *fbase = &J->base[func]; 591 TRef *fbase = &J->base[func];
572 ptrdiff_t i; 592 ptrdiff_t i;
573 for (i = 0; i <= nargs; i++) 593 for (i = 0; i <= nargs; i++)
574 (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ 594 (void)getslot(J, func+i); /* Ensure func and all args have a reference. */
@@ -582,11 +602,7 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
582 fbase[0] = ix.mobj; /* Replace function. */ 602 fbase[0] = ix.mobj; /* Replace function. */
583 functv = &ix.mobjv; 603 functv = &ix.mobjv;
584 } 604 }
585 605 fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]);
586 /* Specialize to the runtime value of the called function. */
587 trfunc = lj_ir_kfunc(J, funcV(functv));
588 emitir(IRTG(IR_EQ, IRT_FUNC), fbase[0], trfunc);
589 fbase[0] = trfunc | TREF_FRAME;
590 J->maxslot = (BCReg)nargs; 606 J->maxslot = (BCReg)nargs;
591} 607}
592 608