diff options
author | Mike Pall <mike> | 2011-11-20 19:14:39 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2011-11-20 19:16:40 +0100 |
commit | cecbe3c15fc62921098f3468b9de86cf0b631b9e (patch) | |
tree | d1b18c59652a3e163ce050e58b7b647fdf312271 | |
parent | cc7a12be93660677a5128124c11c68127cfe33eb (diff) | |
download | luajit-cecbe3c15fc62921098f3468b9de86cf0b631b9e.tar.gz luajit-cecbe3c15fc62921098f3468b9de86cf0b631b9e.tar.bz2 luajit-cecbe3c15fc62921098f3468b9de86cf0b631b9e.zip |
Specialize to prototype for non-monomorphic functions.
Solves the trace-explosion problem with closure-heavy programming.
-rw-r--r-- | doc/status.html | 8 | ||||
-rw-r--r-- | src/lj_func.c | 4 | ||||
-rw-r--r-- | src/lj_ir.h | 1 | ||||
-rw-r--r-- | src/lj_obj.h | 3 | ||||
-rw-r--r-- | src/lj_parse.c | 2 | ||||
-rw-r--r-- | src/lj_record.c | 28 |
6 files changed, 31 insertions, 15 deletions
diff --git a/doc/status.html b/doc/status.html index d386e1aa..5f8fa463 100644 --- a/doc/status.html +++ b/doc/status.html | |||
@@ -148,14 +148,6 @@ with me, before writing major improvements, to avoid duplication of | |||
148 | effort. | 148 | effort. |
149 | </li> | 149 | </li> |
150 | <li> | 150 | <li> |
151 | The trace compiler currently doesn't back off specialization for | ||
152 | function call dispatch. It should really fall back to specializing on | ||
153 | the prototype, not the closure identity. This can lead to the so-called | ||
154 | "trace explosion" problem with <b>closure-heavy programming</b>. The | ||
155 | trace linking heuristics prevent this, but in the worst case this | ||
156 | means the code always falls back to the interpreter. | ||
157 | </li> | ||
158 | <li> | ||
159 | <b>Trace management</b> needs more tuning: less drastic countermeasures | 151 | <b>Trace management</b> needs more tuning: less drastic countermeasures |
160 | against trace explosion and better heuristics in general. | 152 | against trace explosion and better heuristics in general. |
161 | </li> | 153 | </li> |
diff --git a/src/lj_func.c b/src/lj_func.c index 334ba4c8..97be0a2a 100644 --- a/src/lj_func.c +++ b/src/lj_func.c | |||
@@ -118,6 +118,7 @@ GCfunc *lj_func_newC(lua_State *L, MSize nelems, GCtab *env) | |||
118 | 118 | ||
119 | static GCfunc *func_newL(lua_State *L, GCproto *pt, GCtab *env) | 119 | static GCfunc *func_newL(lua_State *L, GCproto *pt, GCtab *env) |
120 | { | 120 | { |
121 | uint32_t count; | ||
121 | GCfunc *fn = (GCfunc *)lj_mem_newgco(L, sizeLfunc((MSize)pt->sizeuv)); | 122 | GCfunc *fn = (GCfunc *)lj_mem_newgco(L, sizeLfunc((MSize)pt->sizeuv)); |
122 | fn->l.gct = ~LJ_TFUNC; | 123 | fn->l.gct = ~LJ_TFUNC; |
123 | fn->l.ffid = FF_LUA; | 124 | fn->l.ffid = FF_LUA; |
@@ -125,6 +126,9 @@ static GCfunc *func_newL(lua_State *L, GCproto *pt, GCtab *env) | |||
125 | /* NOBARRIER: Really a setgcref. But the GCfunc is new (marked white). */ | 126 | /* NOBARRIER: Really a setgcref. But the GCfunc is new (marked white). */ |
126 | setmref(fn->l.pc, proto_bc(pt)); | 127 | setmref(fn->l.pc, proto_bc(pt)); |
127 | setgcref(fn->l.env, obj2gco(env)); | 128 | setgcref(fn->l.env, obj2gco(env)); |
129 | /* Saturating 3 bit counter (0..7) for created closures. */ | ||
130 | count = (uint32_t)pt->flags + PROTO_CLCOUNT; | ||
131 | pt->flags = (uint8_t)(count - ((count >> PROTO_CLC_BITS) & PROTO_CLCOUNT)); | ||
128 | return fn; | 132 | return fn; |
129 | } | 133 | } |
130 | 134 | ||
diff --git a/src/lj_ir.h b/src/lj_ir.h index 7a9f1a9c..8cf8129f 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
@@ -183,6 +183,7 @@ IRFPMDEF(FPMENUM) | |||
183 | #define IRFLDEF(_) \ | 183 | #define IRFLDEF(_) \ |
184 | _(STR_LEN, offsetof(GCstr, len)) \ | 184 | _(STR_LEN, offsetof(GCstr, len)) \ |
185 | _(FUNC_ENV, offsetof(GCfunc, l.env)) \ | 185 | _(FUNC_ENV, offsetof(GCfunc, l.env)) \ |
186 | _(FUNC_PC, offsetof(GCfunc, l.pc)) \ | ||
186 | _(TAB_META, offsetof(GCtab, metatable)) \ | 187 | _(TAB_META, offsetof(GCtab, metatable)) \ |
187 | _(TAB_ARRAY, offsetof(GCtab, array)) \ | 188 | _(TAB_ARRAY, offsetof(GCtab, array)) \ |
188 | _(TAB_NODE, offsetof(GCtab, node)) \ | 189 | _(TAB_NODE, offsetof(GCtab, node)) \ |
diff --git a/src/lj_obj.h b/src/lj_obj.h index afb29d0f..4a360df6 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h | |||
@@ -318,6 +318,9 @@ typedef struct GCproto { | |||
318 | /* Only used during parsing. */ | 318 | /* Only used during parsing. */ |
319 | #define PROTO_HAS_RETURN 0x20 /* Already emitted a return. */ | 319 | #define PROTO_HAS_RETURN 0x20 /* Already emitted a return. */ |
320 | #define PROTO_FIXUP_RETURN 0x40 /* Need to fixup emitted returns. */ | 320 | #define PROTO_FIXUP_RETURN 0x40 /* Need to fixup emitted returns. */ |
321 | /* Top bits used for counting created closures. */ | ||
322 | #define PROTO_CLCOUNT 0x20 /* Base of saturating 3 bit counter. */ | ||
323 | #define PROTO_CLC_BITS 3 | ||
321 | 324 | ||
322 | #define proto_kgc(pt, idx) \ | 325 | #define proto_kgc(pt, idx) \ |
323 | check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \ | 326 | check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \ |
diff --git a/src/lj_parse.c b/src/lj_parse.c index f0bb4419..4b8a8e6f 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c | |||
@@ -1392,7 +1392,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line) | |||
1392 | pt->gct = ~LJ_TPROTO; | 1392 | pt->gct = ~LJ_TPROTO; |
1393 | pt->sizept = (MSize)sizept; | 1393 | pt->sizept = (MSize)sizept; |
1394 | pt->trace = 0; | 1394 | pt->trace = 0; |
1395 | pt->flags = fs->flags; | 1395 | pt->flags = (uint8_t)(fs->flags & ~(PROTO_HAS_RETURN|PROTO_FIXUP_RETURN)); |
1396 | pt->numparams = fs->numparams; | 1396 | pt->numparams = fs->numparams; |
1397 | pt->framesize = fs->framesize; | 1397 | pt->framesize = fs->framesize; |
1398 | setgcref(pt->chunkname, obj2gco(ls->chunkname)); | 1398 | setgcref(pt->chunkname, obj2gco(ls->chunkname)); |
diff --git a/src/lj_record.c b/src/lj_record.c index 61e09b6d..a76f5d94 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
@@ -563,12 +563,32 @@ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) | |||
563 | 563 | ||
564 | /* -- Record calls and returns -------------------------------------------- */ | 564 | /* -- Record calls and returns -------------------------------------------- */ |
565 | 565 | ||
566 | /* Specialize to the runtime value of the called function or its prototype. */ | ||
567 | static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr) | ||
568 | { | ||
569 | TRef kfunc; | ||
570 | if (isluafunc(fn)) { | ||
571 | GCproto *pt = funcproto(fn); | ||
572 | /* 3 or more closures created? Probably not a monomorphic function. */ | ||
573 | if (pt->flags >= 3*PROTO_CLCOUNT) { /* Specialize to prototype instead. */ | ||
574 | TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC); | ||
575 | emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt))); | ||
576 | (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ | ||
577 | return tr; | ||
578 | } | ||
579 | } | ||
580 | /* Otherwise specialize to the function (closure) value itself. */ | ||
581 | kfunc = lj_ir_kfunc(J, fn); | ||
582 | emitir(IRTG(IR_EQ, IRT_FUNC), tr, kfunc); | ||
583 | return kfunc; | ||
584 | } | ||
585 | |||
566 | /* Record call setup. */ | 586 | /* Record call setup. */ |
567 | static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) | 587 | static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) |
568 | { | 588 | { |
569 | RecordIndex ix; | 589 | RecordIndex ix; |
570 | TValue *functv = &J->L->base[func]; | 590 | TValue *functv = &J->L->base[func]; |
571 | TRef trfunc, *fbase = &J->base[func]; | 591 | TRef *fbase = &J->base[func]; |
572 | ptrdiff_t i; | 592 | ptrdiff_t i; |
573 | for (i = 0; i <= nargs; i++) | 593 | for (i = 0; i <= nargs; i++) |
574 | (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ | 594 | (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ |
@@ -582,11 +602,7 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) | |||
582 | fbase[0] = ix.mobj; /* Replace function. */ | 602 | fbase[0] = ix.mobj; /* Replace function. */ |
583 | functv = &ix.mobjv; | 603 | functv = &ix.mobjv; |
584 | } | 604 | } |
585 | 605 | fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]); | |
586 | /* Specialize to the runtime value of the called function. */ | ||
587 | trfunc = lj_ir_kfunc(J, funcV(functv)); | ||
588 | emitir(IRTG(IR_EQ, IRT_FUNC), fbase[0], trfunc); | ||
589 | fbase[0] = trfunc | TREF_FRAME; | ||
590 | J->maxslot = (BCReg)nargs; | 606 | J->maxslot = (BCReg)nargs; |
591 | } | 607 | } |
592 | 608 | ||