aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2015-08-29 23:22:29 +0200
committerMike Pall <mike>2015-08-29 23:24:26 +0200
commita3a6866d4c2cc096b478c8f81b73a3b818034b89 (patch)
tree763f1e2db63fdbc46671e2f44e33ce71a4db2746
parentad29314c2c5b205d591e3a6af0a18dc0f69a7f48 (diff)
downloadluajit-a3a6866d4c2cc096b478c8f81b73a3b818034b89.tar.gz
luajit-a3a6866d4c2cc096b478c8f81b73a3b818034b89.tar.bz2
luajit-a3a6866d4c2cc096b478c8f81b73a3b818034b89.zip
Re-enable trace stitching.
Thanks to Vyacheslav Egorov.
-rw-r--r--doc/changes.html2
-rw-r--r--src/lj_ffrecord.c39
-rw-r--r--src/lj_gc.c2
-rw-r--r--src/lj_ir.c44
-rw-r--r--src/lj_iropt.h1
-rw-r--r--src/lj_jit.h1
-rw-r--r--src/lj_snap.c4
-rw-r--r--src/lj_trace.c7
-rw-r--r--src/lj_traceerr.h2
-rw-r--r--src/vm_arm.dasc13
-rw-r--r--src/vm_mips.dasc10
-rw-r--r--src/vm_ppc.dasc8
-rw-r--r--src/vm_x86.dasc11
13 files changed, 60 insertions, 84 deletions
diff --git a/doc/changes.html b/doc/changes.html
index 64dc4c2a..febb03c7 100644
--- a/doc/changes.html
+++ b/doc/changes.html
@@ -90,7 +90,7 @@ Please take a look at the commit history for more details.
90</ul></li> 90</ul></li>
91<li>Improvements to the JIT compiler: 91<li>Improvements to the JIT compiler:
92<ul> 92<ul>
93<li>Add trace stitching (disabled for now).</li> 93<li>Add trace stitching.</li>
94<li>Compile various builtins: <tt>string.char()</tt>, <tt>string.reverse()</tt>, <tt>string.lower()</tt>, <tt>string.upper()</tt>, <tt>string.rep()</tt>, <tt>string.format()</tt>, <tt>table.concat()</tt>, <tt>bit.tohex()</tt>, <tt>getfenv(0)</tt>, <tt>debug.getmetatable()</tt>.</li> 94<li>Compile various builtins: <tt>string.char()</tt>, <tt>string.reverse()</tt>, <tt>string.lower()</tt>, <tt>string.upper()</tt>, <tt>string.rep()</tt>, <tt>string.format()</tt>, <tt>table.concat()</tt>, <tt>bit.tohex()</tt>, <tt>getfenv(0)</tt>, <tt>debug.getmetatable()</tt>.</li>
95<li>Compile <tt>string.find()</tt> for fixed string searches (no patterns).</li> 95<li>Compile <tt>string.find()</tt> for fixed string searches (no patterns).</li>
96<li>Compile <tt>BC_TSETM</tt>, e.g. <tt>{1,2,3,f()}</tt>.</li> 96<li>Compile <tt>BC_TSETM</tt>, e.g. <tt>{1,2,3,f()}</tt>.</li>
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index a08113ca..6cc05a24 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -96,18 +96,10 @@ static ptrdiff_t results_wanted(jit_State *J)
96 return -1; 96 return -1;
97} 97}
98 98
99#ifdef LUAJIT_TRACE_STITCHING
100/* This feature is disabled for now due to a design mistake. Sorry.
101**
102** It causes unpredictable behavior and crashes when a full trace flush
103** happens with a stitching continuation still in the stack somewhere.
104*/
105
106/* Trace stitching: add continuation below frame to start a new trace. */ 99/* Trace stitching: add continuation below frame to start a new trace. */
107static void recff_stitch(jit_State *J) 100static void recff_stitch(jit_State *J)
108{ 101{
109 ASMFunction cont = lj_cont_stitch; 102 ASMFunction cont = lj_cont_stitch;
110 TraceNo traceno = J->cur.traceno;
111 lua_State *L = J->L; 103 lua_State *L = J->L;
112 TValue *base = L->base; 104 TValue *base = L->base;
113 const BCIns *pc = frame_pc(base-1); 105 const BCIns *pc = frame_pc(base-1);
@@ -120,7 +112,7 @@ static void recff_stitch(jit_State *J)
120 setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT); 112 setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT);
121 setcont(base, cont); 113 setcont(base, cont);
122 setframe_pc(base, pc); 114 setframe_pc(base, pc);
123 if (LJ_DUALNUM) setintV(base-1, traceno); else base[-1].u64 = traceno; 115 setnilV(base-1); /* Incorrect, but rec_check_slots() won't run anymore. */
124 L->base += 2; 116 L->base += 2;
125 L->top += 2; 117 L->top += 2;
126 118
@@ -132,7 +124,9 @@ static void recff_stitch(jit_State *J)
132 trcont = lj_ir_kptr(J, (void *)cont); 124 trcont = lj_ir_kptr(J, (void *)cont);
133#endif 125#endif
134 J->base[0] = trcont | TREF_CONT; 126 J->base[0] = trcont | TREF_CONT;
135 J->base[-1] = LJ_DUALNUM ? lj_ir_kint(J,traceno) : lj_ir_knum_u64(J,traceno); 127 J->ktracep = lj_ir_k64_reserve(J);
128 lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE);
129 J->base[-1] = emitir(IRT(IR_XLOAD, IRT_P64), lj_ir_kptr(J, &J->ktracep->gcr), 0);
136 J->base += 2; 130 J->base += 2;
137 J->baseslot += 2; 131 J->baseslot += 2;
138 J->framedepth++; 132 J->framedepth++;
@@ -181,31 +175,6 @@ static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
181 175
182/* Must stop the trace for classic C functions with arbitrary side-effects. */ 176/* Must stop the trace for classic C functions with arbitrary side-effects. */
183#define recff_c recff_nyi 177#define recff_c recff_nyi
184#else
185/* Fallback handler for fast functions that are not recorded (yet). */
186static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
187{
188 setfuncV(J->L, &J->errinfo, J->fn);
189 lj_trace_err_info(J, LJ_TRERR_NYIFF);
190 UNUSED(rd);
191}
192
193/* Throw error for unsupported variant of fast function. */
194LJ_NORET static void recff_nyiu(jit_State *J, RecordFFData *rd)
195{
196 setfuncV(J->L, &J->errinfo, J->fn);
197 lj_trace_err_info(J, LJ_TRERR_NYIFFU);
198 UNUSED(rd);
199}
200
201/* Must abort the trace for classic C functions with arbitrary side-effects. */
202static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd)
203{
204 setfuncV(J->L, &J->errinfo, J->fn);
205 lj_trace_err_info(J, LJ_TRERR_NYICF);
206 UNUSED(rd);
207}
208#endif
209 178
210/* Emit BUFHDR for the global temporary buffer. */ 179/* Emit BUFHDR for the global temporary buffer. */
211static TRef recff_bufhdr(jit_State *J) 180static TRef recff_bufhdr(jit_State *J)
diff --git a/src/lj_gc.c b/src/lj_gc.c
index 99d664aa..afd39972 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -69,7 +69,7 @@ static void gc_mark(global_State *g, GCobj *o)
69 gray2black(o); /* Closed upvalues are never gray. */ 69 gray2black(o); /* Closed upvalues are never gray. */
70 } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) { 70 } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) {
71 lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB || 71 lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB ||
72 gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO); 72 gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE);
73 setgcrefr(o->gch.gclist, g->gc.gray); 73 setgcrefr(o->gch.gclist, g->gc.gray);
74 setgcref(g->gc.gray, o); 74 setgcref(g->gc.gray, o);
75 } 75 }
diff --git a/src/lj_ir.c b/src/lj_ir.c
index 9682e05e..567aec86 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -209,24 +209,13 @@ void lj_ir_k64_freeall(jit_State *J)
209 lj_mem_free(J2G(J), k, sizeof(K64Array)); 209 lj_mem_free(J2G(J), k, sizeof(K64Array));
210 k = next; 210 k = next;
211 } 211 }
212 setmref(J->k64, NULL);
212} 213}
213 214
214/* Find 64 bit constant in chained array or add it. */ 215/* Get new 64 bit constant slot. */
215cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64) 216static TValue *ir_k64_add(jit_State *J, K64Array *kp, uint64_t u64)
216{ 217{
217 K64Array *k, *kp = NULL;
218 TValue *ntv; 218 TValue *ntv;
219 MSize idx;
220 /* Search for the constant in the whole chain of arrays. */
221 for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) {
222 kp = k; /* Remember previous element in list. */
223 for (idx = 0; idx < k->numk; idx++) { /* Search one array. */
224 TValue *tv = &k->k[idx];
225 if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */
226 return tv;
227 }
228 }
229 /* Constant was not found, need to add it. */
230 if (!(kp && kp->numk < LJ_MIN_K64SZ)) { /* Allocate a new array. */ 219 if (!(kp && kp->numk < LJ_MIN_K64SZ)) { /* Allocate a new array. */
231 K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array); 220 K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array);
232 setmref(kn->next, NULL); 221 setmref(kn->next, NULL);
@@ -242,6 +231,33 @@ cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
242 return ntv; 231 return ntv;
243} 232}
244 233
234/* Find 64 bit constant in chained array or add it. */
235cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
236{
237 K64Array *k, *kp = NULL;
238 MSize idx;
239 /* Search for the constant in the whole chain of arrays. */
240 for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) {
241 kp = k; /* Remember previous element in list. */
242 for (idx = 0; idx < k->numk; idx++) { /* Search one array. */
243 TValue *tv = &k->k[idx];
244 if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */
245 return tv;
246 }
247 }
248 /* Otherwise add a new constant. */
249 return ir_k64_add(J, kp, u64);
250}
251
252TValue *lj_ir_k64_reserve(jit_State *J)
253{
254 K64Array *k, *kp = NULL;
255 lj_ir_k64_find(J, 0); /* Intern dummy 0 to protect the reserved slot. */
256 /* Find last K64Array, if any. */
257 for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) kp = k;
258 return ir_k64_add(J, kp, 0); /* Set to 0. Final value is set later. */
259}
260
245/* Intern 64 bit constant, given by its address. */ 261/* Intern 64 bit constant, given by its address. */
246TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv) 262TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv)
247{ 263{
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index 4e424e70..4106ef8a 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -40,6 +40,7 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J)
40LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k); 40LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k);
41LJ_FUNC void lj_ir_k64_freeall(jit_State *J); 41LJ_FUNC void lj_ir_k64_freeall(jit_State *J);
42LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv); 42LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv);
43LJ_FUNC TValue *lj_ir_k64_reserve(jit_State *J);
43LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64); 44LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64);
44LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64); 45LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64);
45LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n); 46LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n);
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 1df56cae..10900bf6 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -381,6 +381,7 @@ typedef struct jit_State {
381 GCRef *trace; /* Array of traces. */ 381 GCRef *trace; /* Array of traces. */
382 TraceNo freetrace; /* Start of scan for next free trace. */ 382 TraceNo freetrace; /* Start of scan for next free trace. */
383 MSize sizetrace; /* Size of trace array. */ 383 MSize sizetrace; /* Size of trace array. */
384 TValue *ktracep; /* Pointer to K64Array slot with GCtrace pointer. */
384 385
385 IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ 386 IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */
386 TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ 387 TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */
diff --git a/src/lj_snap.c b/src/lj_snap.c
index d8e7987c..fa9abb74 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -631,8 +631,8 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
631 } else if (irt_isnum(t)) { 631 } else if (irt_isnum(t)) {
632 setnumV(o, ex->fpr[r-RID_MIN_FPR]); 632 setnumV(o, ex->fpr[r-RID_MIN_FPR]);
633#endif 633#endif
634 } else if (LJ_64 && irt_islightud(t)) { 634 } else if (LJ_64 && irt_is64(t)) {
635 /* 64 bit lightuserdata which may escape already has the tag bits. */ 635 /* 64 bit values that already have the tag bits. */
636 o->u64 = ex->gpr[r-RID_MIN_GPR]; 636 o->u64 = ex->gpr[r-RID_MIN_GPR];
637 } else if (irt_ispri(t)) { 637 } else if (irt_ispri(t)) {
638 setpriV(o, irt_toitype(t)); 638 setpriV(o, irt_toitype(t));
diff --git a/src/lj_trace.c b/src/lj_trace.c
index 42f4321d..1d0c2e5e 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -274,7 +274,7 @@ int lj_trace_flushall(lua_State *L)
274 if (T->root == 0) 274 if (T->root == 0)
275 trace_flushroot(J, T); 275 trace_flushroot(J, T);
276 lj_gdbjit_deltrace(J, T); 276 lj_gdbjit_deltrace(J, T);
277 T->traceno = 0; 277 T->traceno = T->link = 0; /* Blacklist the link for cont_stitch. */
278 setgcrefnull(J->trace[i]); 278 setgcrefnull(J->trace[i]);
279 } 279 }
280 } 280 }
@@ -284,6 +284,7 @@ int lj_trace_flushall(lua_State *L)
284 memset(J->penalty, 0, sizeof(J->penalty)); 284 memset(J->penalty, 0, sizeof(J->penalty));
285 /* Free the whole machine code and invalidate all exit stub groups. */ 285 /* Free the whole machine code and invalidate all exit stub groups. */
286 lj_mcode_free(J); 286 lj_mcode_free(J);
287 lj_ir_k64_freeall(J);
287 memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup)); 288 memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup));
288 lj_vmevent_send(L, TRACE, 289 lj_vmevent_send(L, TRACE,
289 setstrV(L, L->top++, lj_str_newlit(L, "flush")); 290 setstrV(L, L->top++, lj_str_newlit(L, "flush"));
@@ -402,6 +403,7 @@ static void trace_start(jit_State *J)
402 J->postproc = LJ_POST_NONE; 403 J->postproc = LJ_POST_NONE;
403 lj_resetsplit(J); 404 lj_resetsplit(J);
404 J->retryrec = 0; 405 J->retryrec = 0;
406 J->ktracep = NULL;
405 setgcref(J->cur.startpt, obj2gco(J->pt)); 407 setgcref(J->cur.startpt, obj2gco(J->pt));
406 408
407 L = J->L; 409 L = J->L;
@@ -477,6 +479,9 @@ static void trace_stop(jit_State *J)
477 lj_mcode_commit(J, J->cur.mcode); 479 lj_mcode_commit(J, J->cur.mcode);
478 J->postproc = LJ_POST_NONE; 480 J->postproc = LJ_POST_NONE;
479 trace_save(J, T); 481 trace_save(J, T);
482 if (J->ktracep) { /* Patch K64Array slot with the final GCtrace pointer. */
483 setgcV(J->L, J->ktracep, obj2gco(T), LJ_TTRACE);
484 }
480 485
481 L = J->L; 486 L = J->L;
482 lj_vmevent_send(L, TRACE, 487 lj_vmevent_send(L, TRACE,
diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h
index 12e90d03..d434be15 100644
--- a/src/lj_traceerr.h
+++ b/src/lj_traceerr.h
@@ -25,8 +25,6 @@ TREDEF(BADTYPE, "bad argument type")
25TREDEF(CJITOFF, "JIT compilation disabled for function") 25TREDEF(CJITOFF, "JIT compilation disabled for function")
26TREDEF(CUNROLL, "call unroll limit reached") 26TREDEF(CUNROLL, "call unroll limit reached")
27TREDEF(DOWNREC, "down-recursion, restarting") 27TREDEF(DOWNREC, "down-recursion, restarting")
28TREDEF(NYICF, "NYI: C function %s")
29TREDEF(NYIFF, "NYI: FastFunc %s")
30TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s") 28TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s")
31TREDEF(NYIRETL, "NYI: return to lower frame") 29TREDEF(NYIRETL, "NYI: return to lower frame")
32 30
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 0bd9b147..af722f9e 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -2086,7 +2086,7 @@ static void build_subroutines(BuildCtx *ctx)
2086 | // RA = resultptr, CARG4 = meta base 2086 | // RA = resultptr, CARG4 = meta base
2087 | ldr RB, SAVE_MULTRES 2087 | ldr RB, SAVE_MULTRES
2088 | ldr INS, [PC, #-4] 2088 | ldr INS, [PC, #-4]
2089 | ldr CARG3, [CARG4, #-24] // Save previous trace number. 2089 | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace.
2090 | subs RB, RB, #8 2090 | subs RB, RB, #8
2091 | decode_RA8 RC, INS // Call base. 2091 | decode_RA8 RC, INS // Call base.
2092 | beq >2 2092 | beq >2
@@ -2101,23 +2101,20 @@ static void build_subroutines(BuildCtx *ctx)
2101 | decode_RA8 RA, INS 2101 | decode_RA8 RA, INS
2102 | decode_RB8 RB, INS 2102 | decode_RB8 RB, INS
2103 | add RA, RA, RB 2103 | add RA, RA, RB
2104 | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
2105 |3: 2104 |3:
2106 | cmp RA, RC 2105 | cmp RA, RC
2107 | mvn CARG2, #~LJ_TNIL 2106 | mvn CARG2, #~LJ_TNIL
2108 | bhi >9 // More results wanted? 2107 | bhi >9 // More results wanted?
2109 | 2108 |
2110 | ldr TRACE:RA, [CARG1, CARG3, lsl #2] 2109 | ldrh RA, TRACE:CARG3->traceno
2111 | cmp TRACE:RA, #0 2110 | ldrh RC, TRACE:CARG3->link
2112 | beq ->cont_nop 2111 | cmp RC, RA
2113 | ldrh RC, TRACE:RA->link
2114 | cmp RC, CARG3
2115 | beq ->cont_nop // Blacklisted. 2112 | beq ->cont_nop // Blacklisted.
2116 | cmp RC, #0 2113 | cmp RC, #0
2117 | bne =>BC_JLOOP // Jump to stitched trace. 2114 | bne =>BC_JLOOP // Jump to stitched trace.
2118 | 2115 |
2119 | // Stitch a new trace to the previous trace. 2116 | // Stitch a new trace to the previous trace.
2120 | str CARG3, [DISPATCH, #DISPATCH_J(exitno)] 2117 | str RA, [DISPATCH, #DISPATCH_J(exitno)]
2121 | str L, [DISPATCH, #DISPATCH_J(L)] 2118 | str L, [DISPATCH, #DISPATCH_J(L)]
2122 | str BASE, L->base 2119 | str BASE, L->base
2123 | sub CARG1, DISPATCH, #-GG_DISP2J 2120 | sub CARG1, DISPATCH, #-GG_DISP2J
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 7cfdf4b1..134ed569 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -2015,7 +2015,7 @@ static void build_subroutines(BuildCtx *ctx)
2015 |.if JIT 2015 |.if JIT
2016 | // RA = resultptr, RB = meta base 2016 | // RA = resultptr, RB = meta base
2017 | lw INS, -4(PC) 2017 | lw INS, -4(PC)
2018 | lw TMP3, -24+LO(RB) // Save previous trace number. 2018 | lw TMP2, -24+LO(RB) // Save previous trace.
2019 | decode_RA8a RC, INS 2019 | decode_RA8a RC, INS
2020 | addiu AT, MULTRES, -8 2020 | addiu AT, MULTRES, -8
2021 | decode_RA8b RC 2021 | decode_RA8b RC
@@ -2034,17 +2034,13 @@ static void build_subroutines(BuildCtx *ctx)
2034 | decode_RA8b RA 2034 | decode_RA8b RA
2035 | decode_RB8b RB 2035 | decode_RB8b RB
2036 | addu RA, RA, RB 2036 | addu RA, RA, RB
2037 | lw TMP1, DISPATCH_J(trace)(DISPATCH)
2038 | addu RA, BASE, RA 2037 | addu RA, BASE, RA
2039 |3: 2038 |3:
2040 | sltu AT, RC, RA 2039 | sltu AT, RC, RA
2041 | bnez AT, >9 // More results wanted? 2040 | bnez AT, >9 // More results wanted?
2042 |. sll TMP2, TMP3, 2 2041 |. nop
2043 | 2042 |
2044 | addu TMP2, TMP1, TMP2 2043 | lhu TMP3, TRACE:TMP2->traceno
2045 | lw TRACE:TMP2, 0(TMP2)
2046 | beqz TRACE:TMP2, ->cont_nop
2047 |. nop
2048 | lhu RD, TRACE:TMP2->link 2044 | lhu RD, TRACE:TMP2->link
2049 | beq RD, TMP3, ->cont_nop // Blacklisted. 2045 | beq RD, TMP3, ->cont_nop // Blacklisted.
2050 |. load_got lj_dispatch_stitch 2046 |. load_got lj_dispatch_stitch
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index 9299c554..0d6915fd 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -2525,7 +2525,7 @@ static void build_subroutines(BuildCtx *ctx)
2525 |.if JIT 2525 |.if JIT
2526 | // RA = resultptr, RB = meta base 2526 | // RA = resultptr, RB = meta base
2527 | lwz INS, -4(PC) 2527 | lwz INS, -4(PC)
2528 | lwz TMP3, -20(RB) // Save previous trace number. 2528 | lwz TRACE:TMP2, -20(RB) // Save previous trace.
2529 | addic. TMP1, MULTRES, -8 2529 | addic. TMP1, MULTRES, -8
2530 | decode_RA8 RC, INS // Call base. 2530 | decode_RA8 RC, INS // Call base.
2531 | beq >2 2531 | beq >2
@@ -2540,15 +2540,11 @@ static void build_subroutines(BuildCtx *ctx)
2540 | decode_RA8 RA, INS 2540 | decode_RA8 RA, INS
2541 | decode_RB8 RB, INS 2541 | decode_RB8 RB, INS
2542 | add RA, RA, RB 2542 | add RA, RA, RB
2543 | lwz TMP1, DISPATCH_J(trace)(DISPATCH)
2544 |3: 2543 |3:
2545 | cmplw RA, RC 2544 | cmplw RA, RC
2546 | bgt >9 // More results wanted? 2545 | bgt >9 // More results wanted?
2547 | 2546 |
2548 | slwi TMP2, TMP3, 2 2547 | lhz TMP3, TRACE:TMP2->traceno
2549 | lwzx TRACE:TMP2, TMP1, TMP2
2550 | cmpwi TRACE:TMP2, 0
2551 | beq ->cont_nop
2552 | lhz RD, TRACE:TMP2->link 2548 | lhz RD, TRACE:TMP2->link
2553 | cmpw RD, TMP3 2549 | cmpw RD, TMP3
2554 | cmpwi cr1, RD, 0 2550 | cmpwi cr1, RD, 0
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index f31e595b..96ac1da8 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -2667,8 +2667,8 @@ static void build_subroutines(BuildCtx *ctx)
2667 |->cont_stitch: // Trace stitching. 2667 |->cont_stitch: // Trace stitching.
2668 |.if JIT 2668 |.if JIT
2669 | // BASE = base, RC = result, RB = mbase 2669 | // BASE = base, RC = result, RB = mbase
2670 | mov RA, [RB-24] // Save previous trace number. 2670 | mov TRACE:RA, [RB-24] // Save previous trace.
2671 | mov TMP1, RA 2671 | mov TMP1, TRACE:RA
2672 | mov TMP3, DISPATCH // Need one more register. 2672 | mov TMP3, DISPATCH // Need one more register.
2673 | mov DISPATCH, MULTRES 2673 | mov DISPATCH, MULTRES
2674 | movzx RA, PC_RA 2674 | movzx RA, PC_RA
@@ -2699,11 +2699,8 @@ static void build_subroutines(BuildCtx *ctx)
2699 | ja >9 // More results wanted? 2699 | ja >9 // More results wanted?
2700 | 2700 |
2701 | mov DISPATCH, TMP3 2701 | mov DISPATCH, TMP3
2702 | mov RB, TMP1 // Get previous trace number. 2702 | mov TRACE:RD, TMP1 // Get previous trace.
2703 | mov RA, [DISPATCH+DISPATCH_J(trace)] 2703 | movzx RB, word TRACE:RD->traceno
2704 | mov TRACE:RD, [RA+RB*4]
2705 | test TRACE:RD, TRACE:RD
2706 | jz ->cont_nop
2707 | movzx RD, word TRACE:RD->link 2704 | movzx RD, word TRACE:RD->link
2708 | cmp RD, RB 2705 | cmp RD, RB
2709 | je ->cont_nop // Blacklisted. 2706 | je ->cont_nop // Blacklisted.