diff options
-rw-r--r-- | src/lj_asm.c | 99 |
1 files changed, 88 insertions, 11 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 18383bcc..2f00749b 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -83,6 +83,9 @@ typedef struct ASMState { | |||
83 | MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ | 83 | MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ |
84 | MCode *realign; /* Realign loop if not NULL. */ | 84 | MCode *realign; /* Realign loop if not NULL. */ |
85 | 85 | ||
86 | #ifdef RID_NUM_KREF | ||
87 | int32_t krefk[RID_NUM_KREF]; | ||
88 | #endif | ||
86 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ | 89 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ |
87 | uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent slot to RegSP map. */ | 90 | uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent slot to RegSP map. */ |
88 | #if LJ_SOFTFP | 91 | #if LJ_SOFTFP |
@@ -214,6 +217,8 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) | |||
214 | } else if (e[1] == 's') { | 217 | } else if (e[1] == 's') { |
215 | uint32_t slot = va_arg(argp, uint32_t); | 218 | uint32_t slot = va_arg(argp, uint32_t); |
216 | p += sprintf(p, "[sp+0x%x]", sps_scale(slot)); | 219 | p += sprintf(p, "[sp+0x%x]", sps_scale(slot)); |
220 | } else if (e[1] == 'x') { | ||
221 | p += sprintf(p, "%08x", va_arg(argp, int32_t)); | ||
217 | } else { | 222 | } else { |
218 | lua_assert(0); | 223 | lua_assert(0); |
219 | } | 224 | } |
@@ -253,6 +258,24 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) | |||
253 | 258 | ||
254 | #define ra_used(ir) (ra_hasreg((ir)->r) || ra_hasspill((ir)->s)) | 259 | #define ra_used(ir) (ra_hasreg((ir)->r) || ra_hasspill((ir)->s)) |
255 | 260 | ||
261 | #ifdef RID_NUM_KREF | ||
262 | #define ra_iskref(ref) ((ref) < RID_NUM_KREF) | ||
263 | #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) | ||
264 | #define ra_krefk(as, ref) (as->krefk[(ref)]) | ||
265 | |||
266 | static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) | ||
267 | { | ||
268 | IRRef ref = (IRRef)(r - RID_MIN_KREF); | ||
269 | as->krefk[ref] = k; | ||
270 | as->cost[r] = REGCOST(ref, ref); | ||
271 | } | ||
272 | |||
273 | #else | ||
274 | #define ra_iskref(ref) 0 | ||
275 | #define ra_krefreg(ref) RID_MIN_GPR | ||
276 | #define ra_krefk(as, ref) 0 | ||
277 | #endif | ||
278 | |||
256 | /* Setup register allocator. */ | 279 | /* Setup register allocator. */ |
257 | static void ra_setup(ASMState *as) | 280 | static void ra_setup(ASMState *as) |
258 | { | 281 | { |
@@ -268,9 +291,20 @@ static void ra_setup(ASMState *as) | |||
268 | } | 291 | } |
269 | 292 | ||
270 | /* Rematerialize constants. */ | 293 | /* Rematerialize constants. */ |
271 | static Reg ra_rematk(ASMState *as, IRIns *ir) | 294 | static Reg ra_rematk(ASMState *as, IRRef ref) |
272 | { | 295 | { |
273 | Reg r = ir->r; | 296 | IRIns *ir; |
297 | Reg r; | ||
298 | if (ra_iskref(ref)) { | ||
299 | r = ra_krefreg(ref); | ||
300 | lua_assert(!rset_test(as->freeset, r)); | ||
301 | ra_free(as, r); | ||
302 | ra_modified(as, r); | ||
303 | emit_loadi(as, r, ra_krefk(as, ref)); | ||
304 | return r; | ||
305 | } | ||
306 | ir = IR(ref); | ||
307 | r = ir->r; | ||
274 | lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); | 308 | lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); |
275 | ra_free(as, r); | 309 | ra_free(as, r); |
276 | ra_modified(as, r); | 310 | ra_modified(as, r); |
@@ -337,10 +371,10 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref) | |||
337 | /* Restore a register (marked as free). Rematerialize or force a spill. */ | 371 | /* Restore a register (marked as free). Rematerialize or force a spill. */ |
338 | static Reg ra_restore(ASMState *as, IRRef ref) | 372 | static Reg ra_restore(ASMState *as, IRRef ref) |
339 | { | 373 | { |
340 | IRIns *ir = IR(ref); | ||
341 | if (emit_canremat(ref)) { | 374 | if (emit_canremat(ref)) { |
342 | return ra_rematk(as, ir); | 375 | return ra_rematk(as, ref); |
343 | } else { | 376 | } else { |
377 | IRIns *ir = IR(ref); | ||
344 | int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ | 378 | int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ |
345 | Reg r = ir->r; | 379 | Reg r = ir->r; |
346 | lua_assert(ra_hasreg(r)); | 380 | lua_assert(ra_hasreg(r)); |
@@ -379,7 +413,7 @@ static Reg ra_evict(ASMState *as, RegSet allow) | |||
379 | FPRDEF(MINCOST) | 413 | FPRDEF(MINCOST) |
380 | } | 414 | } |
381 | ref = regcost_ref(cost); | 415 | ref = regcost_ref(cost); |
382 | lua_assert(ref >= as->T->nk && ref < as->T->nins); | 416 | lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins)); |
383 | /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ | 417 | /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ |
384 | if (!irref_isk(ref) && (as->weakset & allow)) { | 418 | if (!irref_isk(ref) && (as->weakset & allow)) { |
385 | IRIns *ir = IR(ref); | 419 | IRIns *ir = IR(ref); |
@@ -429,13 +463,56 @@ static void ra_evictk(ASMState *as) | |||
429 | Reg r = rset_pickbot(work); | 463 | Reg r = rset_pickbot(work); |
430 | IRRef ref = regcost_ref(as->cost[r]); | 464 | IRRef ref = regcost_ref(as->cost[r]); |
431 | if (emit_canremat(ref)) { | 465 | if (emit_canremat(ref)) { |
432 | ra_rematk(as, IR(ref)); | 466 | ra_rematk(as, ref); |
433 | checkmclim(as); | 467 | checkmclim(as); |
434 | } | 468 | } |
435 | rset_clear(work, r); | 469 | rset_clear(work, r); |
436 | } | 470 | } |
437 | } | 471 | } |
438 | 472 | ||
473 | #ifdef RID_NUM_KREF | ||
474 | /* Allocate a register for a constant. */ | ||
475 | static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | ||
476 | { | ||
477 | /* First try to find a register which already holds the same constant. */ | ||
478 | RegSet work = ~as->freeset & RSET_GPR; | ||
479 | Reg r; | ||
480 | while (work) { | ||
481 | IRRef ref; | ||
482 | r = rset_pickbot(work); | ||
483 | ref = regcost_ref(as->cost[r]); | ||
484 | if (emit_canremat(ref) && | ||
485 | k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) | ||
486 | return r; | ||
487 | rset_clear(work, r); | ||
488 | } | ||
489 | work = as->freeset & allow; | ||
490 | if (work) | ||
491 | r = rset_pickbot(work); | ||
492 | else | ||
493 | r = ra_evict(as, allow); | ||
494 | RA_DBGX((as, "allock $x $r", k, r)); | ||
495 | ra_setkref(as, r, k); | ||
496 | rset_clear(as->freeset, r); | ||
497 | ra_noweak(as, r); | ||
498 | return r; | ||
499 | } | ||
500 | |||
501 | /* Allocate a specific register for a constant. */ | ||
502 | static void ra_allockreg(ASMState *as, int32_t k, Reg r) | ||
503 | { | ||
504 | Reg kr = ra_allock(as, k, RID2RSET(r)); | ||
505 | if (kr != r) { | ||
506 | IRIns irdummy; | ||
507 | irdummy.t.irt = IRT_INT; | ||
508 | ra_scratch(as, RID2RSET(r)); | ||
509 | emit_movrr(as, &irdummy, kr, r); | ||
510 | } | ||
511 | } | ||
512 | #else | ||
513 | #define ra_allockreg(as, k, r) emit_loadi(as, (r), (k)) | ||
514 | #endif | ||
515 | |||
439 | /* Allocate a register for ref from the allowed set of registers. | 516 | /* Allocate a register for ref from the allowed set of registers. |
440 | ** Note: this function assumes the ref does NOT have a register yet! | 517 | ** Note: this function assumes the ref does NOT have a register yet! |
441 | ** Picks an optimal register, sets the cost and marks the register as non-free. | 518 | ** Picks an optimal register, sets the cost and marks the register as non-free. |
@@ -454,7 +531,7 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow) | |||
454 | goto found; | 531 | goto found; |
455 | /* Rematerialization is cheaper than missing a hint. */ | 532 | /* Rematerialization is cheaper than missing a hint. */ |
456 | if (rset_test(allow, r) && emit_canremat(regcost_ref(as->cost[r]))) { | 533 | if (rset_test(allow, r) && emit_canremat(regcost_ref(as->cost[r]))) { |
457 | ra_rematk(as, IR(regcost_ref(as->cost[r]))); | 534 | ra_rematk(as, regcost_ref(as->cost[r])); |
458 | goto found; | 535 | goto found; |
459 | } | 536 | } |
460 | RA_DBGX((as, "hintmiss $f $r", ref, r)); | 537 | RA_DBGX((as, "hintmiss $f $r", ref, r)); |
@@ -794,7 +871,7 @@ static void asm_tnew(ASMState *as, IRIns *ir) | |||
794 | as->gcsteps++; | 871 | as->gcsteps++; |
795 | asm_setupresult(as, ir, ci); /* GCtab * */ | 872 | asm_setupresult(as, ir, ci); /* GCtab * */ |
796 | asm_gencall(as, ci, args); | 873 | asm_gencall(as, ci, args); |
797 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1 | (ir->op2 << 24)); | 874 | ra_allockreg(as, ir->op1 | (ir->op2 << 24), ra_releasetmp(as, ASMREF_TMP1)); |
798 | } | 875 | } |
799 | 876 | ||
800 | static void asm_tdup(ASMState *as, IRIns *ir) | 877 | static void asm_tdup(ASMState *as, IRIns *ir) |
@@ -1201,8 +1278,8 @@ static void asm_tail_link(ASMState *as) | |||
1201 | if (bc_isret(bc_op(*retpc))) | 1278 | if (bc_isret(bc_op(*retpc))) |
1202 | pc = retpc; | 1279 | pc = retpc; |
1203 | } | 1280 | } |
1204 | emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch); | 1281 | ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); |
1205 | emit_loada(as, RID_LPC, pc); | 1282 | ra_allockreg(as, i32ptr(pc), RID_LPC); |
1206 | mres = (int32_t)(snap->nslots - baseslot); | 1283 | mres = (int32_t)(snap->nslots - baseslot); |
1207 | switch (bc_op(*pc)) { | 1284 | switch (bc_op(*pc)) { |
1208 | case BC_CALLM: case BC_CALLMT: | 1285 | case BC_CALLM: case BC_CALLMT: |
@@ -1211,7 +1288,7 @@ static void asm_tail_link(ASMState *as) | |||
1211 | case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; | 1288 | case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; |
1212 | default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; | 1289 | default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; |
1213 | } | 1290 | } |
1214 | emit_loadi(as, RID_RET, mres); /* Return MULTRES or 0. */ | 1291 | ra_allockreg(as, mres, RID_RET); /* Return MULTRES or 0. */ |
1215 | } else if (baseslot) { | 1292 | } else if (baseslot) { |
1216 | /* Save modified BASE for linking to trace with higher start frame. */ | 1293 | /* Save modified BASE for linking to trace with higher start frame. */ |
1217 | emit_setgl(as, RID_BASE, jit_base); | 1294 | emit_setgl(as, RID_BASE, jit_base); |