diff options
| -rw-r--r-- | src/lj_asm.c | 99 |
1 files changed, 88 insertions, 11 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 18383bcc..2f00749b 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -83,6 +83,9 @@ typedef struct ASMState { | |||
| 83 | MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ | 83 | MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ |
| 84 | MCode *realign; /* Realign loop if not NULL. */ | 84 | MCode *realign; /* Realign loop if not NULL. */ |
| 85 | 85 | ||
| 86 | #ifdef RID_NUM_KREF | ||
| 87 | int32_t krefk[RID_NUM_KREF]; | ||
| 88 | #endif | ||
| 86 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ | 89 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ |
| 87 | uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent slot to RegSP map. */ | 90 | uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent slot to RegSP map. */ |
| 88 | #if LJ_SOFTFP | 91 | #if LJ_SOFTFP |
| @@ -214,6 +217,8 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) | |||
| 214 | } else if (e[1] == 's') { | 217 | } else if (e[1] == 's') { |
| 215 | uint32_t slot = va_arg(argp, uint32_t); | 218 | uint32_t slot = va_arg(argp, uint32_t); |
| 216 | p += sprintf(p, "[sp+0x%x]", sps_scale(slot)); | 219 | p += sprintf(p, "[sp+0x%x]", sps_scale(slot)); |
| 220 | } else if (e[1] == 'x') { | ||
| 221 | p += sprintf(p, "%08x", va_arg(argp, int32_t)); | ||
| 217 | } else { | 222 | } else { |
| 218 | lua_assert(0); | 223 | lua_assert(0); |
| 219 | } | 224 | } |
| @@ -253,6 +258,24 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) | |||
| 253 | 258 | ||
| 254 | #define ra_used(ir) (ra_hasreg((ir)->r) || ra_hasspill((ir)->s)) | 259 | #define ra_used(ir) (ra_hasreg((ir)->r) || ra_hasspill((ir)->s)) |
| 255 | 260 | ||
| 261 | #ifdef RID_NUM_KREF | ||
| 262 | #define ra_iskref(ref) ((ref) < RID_NUM_KREF) | ||
| 263 | #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) | ||
| 264 | #define ra_krefk(as, ref) (as->krefk[(ref)]) | ||
| 265 | |||
| 266 | static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) | ||
| 267 | { | ||
| 268 | IRRef ref = (IRRef)(r - RID_MIN_KREF); | ||
| 269 | as->krefk[ref] = k; | ||
| 270 | as->cost[r] = REGCOST(ref, ref); | ||
| 271 | } | ||
| 272 | |||
| 273 | #else | ||
| 274 | #define ra_iskref(ref) 0 | ||
| 275 | #define ra_krefreg(ref) RID_MIN_GPR | ||
| 276 | #define ra_krefk(as, ref) 0 | ||
| 277 | #endif | ||
| 278 | |||
| 256 | /* Setup register allocator. */ | 279 | /* Setup register allocator. */ |
| 257 | static void ra_setup(ASMState *as) | 280 | static void ra_setup(ASMState *as) |
| 258 | { | 281 | { |
| @@ -268,9 +291,20 @@ static void ra_setup(ASMState *as) | |||
| 268 | } | 291 | } |
| 269 | 292 | ||
| 270 | /* Rematerialize constants. */ | 293 | /* Rematerialize constants. */ |
| 271 | static Reg ra_rematk(ASMState *as, IRIns *ir) | 294 | static Reg ra_rematk(ASMState *as, IRRef ref) |
| 272 | { | 295 | { |
| 273 | Reg r = ir->r; | 296 | IRIns *ir; |
| 297 | Reg r; | ||
| 298 | if (ra_iskref(ref)) { | ||
| 299 | r = ra_krefreg(ref); | ||
| 300 | lua_assert(!rset_test(as->freeset, r)); | ||
| 301 | ra_free(as, r); | ||
| 302 | ra_modified(as, r); | ||
| 303 | emit_loadi(as, r, ra_krefk(as, ref)); | ||
| 304 | return r; | ||
| 305 | } | ||
| 306 | ir = IR(ref); | ||
| 307 | r = ir->r; | ||
| 274 | lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); | 308 | lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); |
| 275 | ra_free(as, r); | 309 | ra_free(as, r); |
| 276 | ra_modified(as, r); | 310 | ra_modified(as, r); |
| @@ -337,10 +371,10 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref) | |||
| 337 | /* Restore a register (marked as free). Rematerialize or force a spill. */ | 371 | /* Restore a register (marked as free). Rematerialize or force a spill. */ |
| 338 | static Reg ra_restore(ASMState *as, IRRef ref) | 372 | static Reg ra_restore(ASMState *as, IRRef ref) |
| 339 | { | 373 | { |
| 340 | IRIns *ir = IR(ref); | ||
| 341 | if (emit_canremat(ref)) { | 374 | if (emit_canremat(ref)) { |
| 342 | return ra_rematk(as, ir); | 375 | return ra_rematk(as, ref); |
| 343 | } else { | 376 | } else { |
| 377 | IRIns *ir = IR(ref); | ||
| 344 | int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ | 378 | int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ |
| 345 | Reg r = ir->r; | 379 | Reg r = ir->r; |
| 346 | lua_assert(ra_hasreg(r)); | 380 | lua_assert(ra_hasreg(r)); |
| @@ -379,7 +413,7 @@ static Reg ra_evict(ASMState *as, RegSet allow) | |||
| 379 | FPRDEF(MINCOST) | 413 | FPRDEF(MINCOST) |
| 380 | } | 414 | } |
| 381 | ref = regcost_ref(cost); | 415 | ref = regcost_ref(cost); |
| 382 | lua_assert(ref >= as->T->nk && ref < as->T->nins); | 416 | lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins)); |
| 383 | /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ | 417 | /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ |
| 384 | if (!irref_isk(ref) && (as->weakset & allow)) { | 418 | if (!irref_isk(ref) && (as->weakset & allow)) { |
| 385 | IRIns *ir = IR(ref); | 419 | IRIns *ir = IR(ref); |
| @@ -429,13 +463,56 @@ static void ra_evictk(ASMState *as) | |||
| 429 | Reg r = rset_pickbot(work); | 463 | Reg r = rset_pickbot(work); |
| 430 | IRRef ref = regcost_ref(as->cost[r]); | 464 | IRRef ref = regcost_ref(as->cost[r]); |
| 431 | if (emit_canremat(ref)) { | 465 | if (emit_canremat(ref)) { |
| 432 | ra_rematk(as, IR(ref)); | 466 | ra_rematk(as, ref); |
| 433 | checkmclim(as); | 467 | checkmclim(as); |
| 434 | } | 468 | } |
| 435 | rset_clear(work, r); | 469 | rset_clear(work, r); |
| 436 | } | 470 | } |
| 437 | } | 471 | } |
| 438 | 472 | ||
| 473 | #ifdef RID_NUM_KREF | ||
| 474 | /* Allocate a register for a constant. */ | ||
| 475 | static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | ||
| 476 | { | ||
| 477 | /* First try to find a register which already holds the same constant. */ | ||
| 478 | RegSet work = ~as->freeset & RSET_GPR; | ||
| 479 | Reg r; | ||
| 480 | while (work) { | ||
| 481 | IRRef ref; | ||
| 482 | r = rset_pickbot(work); | ||
| 483 | ref = regcost_ref(as->cost[r]); | ||
| 484 | if (emit_canremat(ref) && | ||
| 485 | k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) | ||
| 486 | return r; | ||
| 487 | rset_clear(work, r); | ||
| 488 | } | ||
| 489 | work = as->freeset & allow; | ||
| 490 | if (work) | ||
| 491 | r = rset_pickbot(work); | ||
| 492 | else | ||
| 493 | r = ra_evict(as, allow); | ||
| 494 | RA_DBGX((as, "allock $x $r", k, r)); | ||
| 495 | ra_setkref(as, r, k); | ||
| 496 | rset_clear(as->freeset, r); | ||
| 497 | ra_noweak(as, r); | ||
| 498 | return r; | ||
| 499 | } | ||
| 500 | |||
| 501 | /* Allocate a specific register for a constant. */ | ||
| 502 | static void ra_allockreg(ASMState *as, int32_t k, Reg r) | ||
| 503 | { | ||
| 504 | Reg kr = ra_allock(as, k, RID2RSET(r)); | ||
| 505 | if (kr != r) { | ||
| 506 | IRIns irdummy; | ||
| 507 | irdummy.t.irt = IRT_INT; | ||
| 508 | ra_scratch(as, RID2RSET(r)); | ||
| 509 | emit_movrr(as, &irdummy, kr, r); | ||
| 510 | } | ||
| 511 | } | ||
| 512 | #else | ||
| 513 | #define ra_allockreg(as, k, r) emit_loadi(as, (r), (k)) | ||
| 514 | #endif | ||
| 515 | |||
| 439 | /* Allocate a register for ref from the allowed set of registers. | 516 | /* Allocate a register for ref from the allowed set of registers. |
| 440 | ** Note: this function assumes the ref does NOT have a register yet! | 517 | ** Note: this function assumes the ref does NOT have a register yet! |
| 441 | ** Picks an optimal register, sets the cost and marks the register as non-free. | 518 | ** Picks an optimal register, sets the cost and marks the register as non-free. |
| @@ -454,7 +531,7 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow) | |||
| 454 | goto found; | 531 | goto found; |
| 455 | /* Rematerialization is cheaper than missing a hint. */ | 532 | /* Rematerialization is cheaper than missing a hint. */ |
| 456 | if (rset_test(allow, r) && emit_canremat(regcost_ref(as->cost[r]))) { | 533 | if (rset_test(allow, r) && emit_canremat(regcost_ref(as->cost[r]))) { |
| 457 | ra_rematk(as, IR(regcost_ref(as->cost[r]))); | 534 | ra_rematk(as, regcost_ref(as->cost[r])); |
| 458 | goto found; | 535 | goto found; |
| 459 | } | 536 | } |
| 460 | RA_DBGX((as, "hintmiss $f $r", ref, r)); | 537 | RA_DBGX((as, "hintmiss $f $r", ref, r)); |
| @@ -794,7 +871,7 @@ static void asm_tnew(ASMState *as, IRIns *ir) | |||
| 794 | as->gcsteps++; | 871 | as->gcsteps++; |
| 795 | asm_setupresult(as, ir, ci); /* GCtab * */ | 872 | asm_setupresult(as, ir, ci); /* GCtab * */ |
| 796 | asm_gencall(as, ci, args); | 873 | asm_gencall(as, ci, args); |
| 797 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1 | (ir->op2 << 24)); | 874 | ra_allockreg(as, ir->op1 | (ir->op2 << 24), ra_releasetmp(as, ASMREF_TMP1)); |
| 798 | } | 875 | } |
| 799 | 876 | ||
| 800 | static void asm_tdup(ASMState *as, IRIns *ir) | 877 | static void asm_tdup(ASMState *as, IRIns *ir) |
| @@ -1201,8 +1278,8 @@ static void asm_tail_link(ASMState *as) | |||
| 1201 | if (bc_isret(bc_op(*retpc))) | 1278 | if (bc_isret(bc_op(*retpc))) |
| 1202 | pc = retpc; | 1279 | pc = retpc; |
| 1203 | } | 1280 | } |
| 1204 | emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch); | 1281 | ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); |
| 1205 | emit_loada(as, RID_LPC, pc); | 1282 | ra_allockreg(as, i32ptr(pc), RID_LPC); |
| 1206 | mres = (int32_t)(snap->nslots - baseslot); | 1283 | mres = (int32_t)(snap->nslots - baseslot); |
| 1207 | switch (bc_op(*pc)) { | 1284 | switch (bc_op(*pc)) { |
| 1208 | case BC_CALLM: case BC_CALLMT: | 1285 | case BC_CALLM: case BC_CALLMT: |
| @@ -1211,7 +1288,7 @@ static void asm_tail_link(ASMState *as) | |||
| 1211 | case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; | 1288 | case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; |
| 1212 | default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; | 1289 | default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; |
| 1213 | } | 1290 | } |
| 1214 | emit_loadi(as, RID_RET, mres); /* Return MULTRES or 0. */ | 1291 | ra_allockreg(as, mres, RID_RET); /* Return MULTRES or 0. */ |
| 1215 | } else if (baseslot) { | 1292 | } else if (baseslot) { |
| 1216 | /* Save modified BASE for linking to trace with higher start frame. */ | 1293 | /* Save modified BASE for linking to trace with higher start frame. */ |
| 1217 | emit_setgl(as, RID_BASE, jit_base); | 1294 | emit_setgl(as, RID_BASE, jit_base); |
