aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_asm.c')
-rw-r--r--src/lj_asm.c870
1 files changed, 716 insertions, 154 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 60be4337..aae7b5b9 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -90,12 +90,18 @@ typedef struct ASMState {
90 MCode *realign; /* Realign loop if not NULL. */ 90 MCode *realign; /* Realign loop if not NULL. */
91 91
92#ifdef RID_NUM_KREF 92#ifdef RID_NUM_KREF
93 int32_t krefk[RID_NUM_KREF]; 93 intptr_t krefk[RID_NUM_KREF];
94#endif 94#endif
95 IRRef1 phireg[RID_MAX]; /* PHI register references. */ 95 IRRef1 phireg[RID_MAX]; /* PHI register references. */
96 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ 96 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */
97} ASMState; 97} ASMState;
98 98
99#ifdef LUA_USE_ASSERT
100#define lj_assertA(c, ...) lj_assertG_(J2G(as->J), (c), __VA_ARGS__)
101#else
102#define lj_assertA(c, ...) ((void)as)
103#endif
104
99#define IR(ref) (&as->ir[(ref)]) 105#define IR(ref) (&as->ir[(ref)])
100 106
101#define ASMREF_TMP1 REF_TRUE /* Temp. register. */ 107#define ASMREF_TMP1 REF_TRUE /* Temp. register. */
@@ -127,9 +133,8 @@ static LJ_AINLINE void checkmclim(ASMState *as)
127#ifdef LUA_USE_ASSERT 133#ifdef LUA_USE_ASSERT
128 if (as->mcp + MCLIM_REDZONE < as->mcp_prev) { 134 if (as->mcp + MCLIM_REDZONE < as->mcp_prev) {
129 IRIns *ir = IR(as->curins+1); 135 IRIns *ir = IR(as->curins+1);
130 fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d %02d %04d %04d\n", as->mcp, 136 lj_assertA(0, "red zone overflow: %p IR %04d %02d %04d %04d\n", as->mcp,
131 as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); 137 as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS);
132 lua_assert(0);
133 } 138 }
134#endif 139#endif
135 if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as); 140 if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as);
@@ -143,7 +148,7 @@ static LJ_AINLINE void checkmclim(ASMState *as)
143#define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) 148#define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref)))
144#define ra_krefk(as, ref) (as->krefk[(ref)]) 149#define ra_krefk(as, ref) (as->krefk[(ref)])
145 150
146static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) 151static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k)
147{ 152{
148 IRRef ref = (IRRef)(r - RID_MIN_KREF); 153 IRRef ref = (IRRef)(r - RID_MIN_KREF);
149 as->krefk[ref] = k; 154 as->krefk[ref] = k;
@@ -170,6 +175,8 @@ IRFLDEF(FLOFS)
170#include "lj_emit_x86.h" 175#include "lj_emit_x86.h"
171#elif LJ_TARGET_ARM 176#elif LJ_TARGET_ARM
172#include "lj_emit_arm.h" 177#include "lj_emit_arm.h"
178#elif LJ_TARGET_ARM64
179#include "lj_emit_arm64.h"
173#elif LJ_TARGET_PPC 180#elif LJ_TARGET_PPC
174#include "lj_emit_ppc.h" 181#include "lj_emit_ppc.h"
175#elif LJ_TARGET_MIPS 182#elif LJ_TARGET_MIPS
@@ -178,6 +185,12 @@ IRFLDEF(FLOFS)
178#error "Missing instruction emitter for target CPU" 185#error "Missing instruction emitter for target CPU"
179#endif 186#endif
180 187
188/* Generic load/store of register from/to stack slot. */
189#define emit_spload(as, ir, r, ofs) \
190 emit_loadofs(as, ir, (r), RID_SP, (ofs))
191#define emit_spstore(as, ir, r, ofs) \
192 emit_storeofs(as, ir, (r), RID_SP, (ofs))
193
181/* -- Register allocator debugging ---------------------------------------- */ 194/* -- Register allocator debugging ---------------------------------------- */
182 195
183/* #define LUAJIT_DEBUG_RA */ 196/* #define LUAJIT_DEBUG_RA */
@@ -235,7 +248,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...)
235 *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q; 248 *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q;
236 } else { 249 } else {
237 *p++ = '?'; 250 *p++ = '?';
238 lua_assert(0); 251 lj_assertA(0, "bad register %d for debug format \"%s\"", r, fmt);
239 } 252 }
240 } else if (e[1] == 'f' || e[1] == 'i') { 253 } else if (e[1] == 'f' || e[1] == 'i') {
241 IRRef ref; 254 IRRef ref;
@@ -253,7 +266,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...)
253 } else if (e[1] == 'x') { 266 } else if (e[1] == 'x') {
254 p += sprintf(p, "%08x", va_arg(argp, int32_t)); 267 p += sprintf(p, "%08x", va_arg(argp, int32_t));
255 } else { 268 } else {
256 lua_assert(0); 269 lj_assertA(0, "bad debug format code");
257 } 270 }
258 fmt = e+2; 271 fmt = e+2;
259 } 272 }
@@ -312,37 +325,51 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
312 Reg r; 325 Reg r;
313 if (ra_iskref(ref)) { 326 if (ra_iskref(ref)) {
314 r = ra_krefreg(ref); 327 r = ra_krefreg(ref);
315 lua_assert(!rset_test(as->freeset, r)); 328 lj_assertA(!rset_test(as->freeset, r), "rematk of free reg %d", r);
316 ra_free(as, r); 329 ra_free(as, r);
317 ra_modified(as, r); 330 ra_modified(as, r);
331#if LJ_64
332 emit_loadu64(as, r, ra_krefk(as, ref));
333#else
318 emit_loadi(as, r, ra_krefk(as, ref)); 334 emit_loadi(as, r, ra_krefk(as, ref));
335#endif
319 return r; 336 return r;
320 } 337 }
321 ir = IR(ref); 338 ir = IR(ref);
322 r = ir->r; 339 r = ir->r;
323 lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); 340 lj_assertA(ra_hasreg(r), "rematk of K%03d has no reg", REF_BIAS - ref);
341 lj_assertA(!ra_hasspill(ir->s),
342 "rematk of K%03d has spill slot [%x]", REF_BIAS - ref, ir->s);
324 ra_free(as, r); 343 ra_free(as, r);
325 ra_modified(as, r); 344 ra_modified(as, r);
326 ir->r = RID_INIT; /* Do not keep any hint. */ 345 ir->r = RID_INIT; /* Do not keep any hint. */
327 RA_DBGX((as, "remat $i $r", ir, r)); 346 RA_DBGX((as, "remat $i $r", ir, r));
328#if !LJ_SOFTFP 347#if !LJ_SOFTFP32
329 if (ir->o == IR_KNUM) { 348 if (ir->o == IR_KNUM) {
330 emit_loadn(as, r, ir_knum(ir)); 349 emit_loadk64(as, r, ir);
331 } else 350 } else
332#endif 351#endif
333 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { 352 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
334 ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ 353 ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
335 emit_getgl(as, r, jit_base); 354 emit_getgl(as, r, jit_base);
336 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { 355 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
337 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ 356 /* REF_NIL stores ASMREF_L register. */
338 emit_getgl(as, r, jit_L); 357 lj_assertA(irt_isnil(ir->t), "rematk of bad ASMREF_L");
358 emit_getgl(as, r, cur_L);
339#if LJ_64 359#if LJ_64
340 } else if (ir->o == IR_KINT64) { 360 } else if (ir->o == IR_KINT64) {
341 emit_loadu64(as, r, ir_kint64(ir)->u64); 361 emit_loadu64(as, r, ir_kint64(ir)->u64);
362#if LJ_GC64
363 } else if (ir->o == IR_KGC) {
364 emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
365 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
366 emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
367#endif
342#endif 368#endif
343 } else { 369 } else {
344 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 370 lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
345 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 371 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
372 "rematk of bad IR op %d", ir->o);
346 emit_loadi(as, r, ir->i); 373 emit_loadi(as, r, ir->i);
347 } 374 }
348 return r; 375 return r;
@@ -352,7 +379,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
352static int32_t ra_spill(ASMState *as, IRIns *ir) 379static int32_t ra_spill(ASMState *as, IRIns *ir)
353{ 380{
354 int32_t slot = ir->s; 381 int32_t slot = ir->s;
355 lua_assert(ir >= as->ir + REF_TRUE); 382 lj_assertA(ir >= as->ir + REF_TRUE,
383 "spill of K%03d", REF_BIAS - (int)(ir - as->ir));
356 if (!ra_hasspill(slot)) { 384 if (!ra_hasspill(slot)) {
357 if (irt_is64(ir->t)) { 385 if (irt_is64(ir->t)) {
358 slot = as->evenspill; 386 slot = as->evenspill;
@@ -377,7 +405,9 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref)
377{ 405{
378 IRIns *ir = IR(ref); 406 IRIns *ir = IR(ref);
379 Reg r = ir->r; 407 Reg r = ir->r;
380 lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); 408 lj_assertA(ra_hasreg(r), "release of TMP%d has no reg", ref-ASMREF_TMP1+1);
409 lj_assertA(!ra_hasspill(ir->s),
410 "release of TMP%d has spill slot [%x]", ref-ASMREF_TMP1+1, ir->s);
381 ra_free(as, r); 411 ra_free(as, r);
382 ra_modified(as, r); 412 ra_modified(as, r);
383 ir->r = RID_INIT; 413 ir->r = RID_INIT;
@@ -393,7 +423,7 @@ static Reg ra_restore(ASMState *as, IRRef ref)
393 IRIns *ir = IR(ref); 423 IRIns *ir = IR(ref);
394 int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ 424 int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */
395 Reg r = ir->r; 425 Reg r = ir->r;
396 lua_assert(ra_hasreg(r)); 426 lj_assertA(ra_hasreg(r), "restore of IR %04d has no reg", ref - REF_BIAS);
397 ra_sethint(ir->r, r); /* Keep hint. */ 427 ra_sethint(ir->r, r); /* Keep hint. */
398 ra_free(as, r); 428 ra_free(as, r);
399 if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ 429 if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */
@@ -422,14 +452,15 @@ static Reg ra_evict(ASMState *as, RegSet allow)
422{ 452{
423 IRRef ref; 453 IRRef ref;
424 RegCost cost = ~(RegCost)0; 454 RegCost cost = ~(RegCost)0;
425 lua_assert(allow != RSET_EMPTY); 455 lj_assertA(allow != RSET_EMPTY, "evict from empty set");
426 if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) { 456 if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) {
427 GPRDEF(MINCOST) 457 GPRDEF(MINCOST)
428 } else { 458 } else {
429 FPRDEF(MINCOST) 459 FPRDEF(MINCOST)
430 } 460 }
431 ref = regcost_ref(cost); 461 ref = regcost_ref(cost);
432 lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins)); 462 lj_assertA(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins),
463 "evict of out-of-range IR %04d", ref - REF_BIAS);
433 /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ 464 /* Preferably pick any weak ref instead of a non-weak, non-const ref. */
434 if (!irref_isk(ref) && (as->weakset & allow)) { 465 if (!irref_isk(ref) && (as->weakset & allow)) {
435 IRIns *ir = IR(ref); 466 IRIns *ir = IR(ref);
@@ -511,7 +542,7 @@ static void ra_evictk(ASMState *as)
511 542
512#ifdef RID_NUM_KREF 543#ifdef RID_NUM_KREF
513/* Allocate a register for a constant. */ 544/* Allocate a register for a constant. */
514static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) 545static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
515{ 546{
516 /* First try to find a register which already holds the same constant. */ 547 /* First try to find a register which already holds the same constant. */
517 RegSet pick, work = ~as->freeset & RSET_GPR; 548 RegSet pick, work = ~as->freeset & RSET_GPR;
@@ -520,9 +551,31 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
520 IRRef ref; 551 IRRef ref;
521 r = rset_pickbot(work); 552 r = rset_pickbot(work);
522 ref = regcost_ref(as->cost[r]); 553 ref = regcost_ref(as->cost[r]);
554#if LJ_64
555 if (ref < ASMREF_L) {
556 if (ra_iskref(ref)) {
557 if (k == ra_krefk(as, ref))
558 return r;
559 } else {
560 IRIns *ir = IR(ref);
561 if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
562#if LJ_GC64
563 (ir->o == IR_KINT && k == ir->i) ||
564 (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
565 ((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
566 k == (intptr_t)ir_kptr(ir))
567#else
568 (ir->o != IR_KINT64 && k == ir->i)
569#endif
570 )
571 return r;
572 }
573 }
574#else
523 if (ref < ASMREF_L && 575 if (ref < ASMREF_L &&
524 k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) 576 k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i))
525 return r; 577 return r;
578#endif
526 rset_clear(work, r); 579 rset_clear(work, r);
527 } 580 }
528 pick = as->freeset & allow; 581 pick = as->freeset & allow;
@@ -542,7 +595,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
542} 595}
543 596
544/* Allocate a specific register for a constant. */ 597/* Allocate a specific register for a constant. */
545static void ra_allockreg(ASMState *as, int32_t k, Reg r) 598static void ra_allockreg(ASMState *as, intptr_t k, Reg r)
546{ 599{
547 Reg kr = ra_allock(as, k, RID2RSET(r)); 600 Reg kr = ra_allock(as, k, RID2RSET(r));
548 if (kr != r) { 601 if (kr != r) {
@@ -565,7 +618,8 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow)
565 IRIns *ir = IR(ref); 618 IRIns *ir = IR(ref);
566 RegSet pick = as->freeset & allow; 619 RegSet pick = as->freeset & allow;
567 Reg r; 620 Reg r;
568 lua_assert(ra_noreg(ir->r)); 621 lj_assertA(ra_noreg(ir->r),
622 "IR %04d already has reg %d", ref - REF_BIAS, ir->r);
569 if (pick) { 623 if (pick) {
570 /* First check register hint from propagation or PHI. */ 624 /* First check register hint from propagation or PHI. */
571 if (ra_hashint(ir->r)) { 625 if (ra_hashint(ir->r)) {
@@ -612,15 +666,27 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
612 return r; 666 return r;
613} 667}
614 668
669/* Add a register rename to the IR. */
670static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno)
671{
672 IRRef ren;
673 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno);
674 ren = tref_ref(lj_ir_emit(as->J));
675 as->J->cur.ir[ren].r = (uint8_t)down;
676 as->J->cur.ir[ren].s = SPS_NONE;
677}
678
615/* Rename register allocation and emit move. */ 679/* Rename register allocation and emit move. */
616static void ra_rename(ASMState *as, Reg down, Reg up) 680static void ra_rename(ASMState *as, Reg down, Reg up)
617{ 681{
618 IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); 682 IRRef ref = regcost_ref(as->cost[up] = as->cost[down]);
619 IRIns *ir = IR(ref); 683 IRIns *ir = IR(ref);
620 ir->r = (uint8_t)up; 684 ir->r = (uint8_t)up;
621 as->cost[down] = 0; 685 as->cost[down] = 0;
622 lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); 686 lj_assertA((down < RID_MAX_GPR) == (up < RID_MAX_GPR),
623 lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); 687 "rename between GPR/FPR %d and %d", down, up);
688 lj_assertA(!rset_test(as->freeset, down), "rename from free reg %d", down);
689 lj_assertA(rset_test(as->freeset, up), "rename to non-free reg %d", up);
624 ra_free(as, down); /* 'down' is free ... */ 690 ra_free(as, down); /* 'down' is free ... */
625 ra_modified(as, down); 691 ra_modified(as, down);
626 rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ 692 rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */
@@ -628,11 +694,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
628 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); 694 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
629 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ 695 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
630 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ 696 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
631 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); 697 ra_addrename(as, down, ref, as->snapno);
632 ren = tref_ref(lj_ir_emit(as->J));
633 as->ir = as->T->ir; /* The IR may have been reallocated. */
634 IR(ren)->r = (uint8_t)down;
635 IR(ren)->s = SPS_NONE;
636 } 698 }
637} 699}
638 700
@@ -665,7 +727,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r)
665{ 727{
666 Reg dest = ra_dest(as, ir, RID2RSET(r)); 728 Reg dest = ra_dest(as, ir, RID2RSET(r));
667 if (dest != r) { 729 if (dest != r) {
668 lua_assert(rset_test(as->freeset, r)); 730 lj_assertA(rset_test(as->freeset, r), "dest reg %d is not free", r);
669 ra_modified(as, r); 731 ra_modified(as, r);
670 emit_movrr(as, ir, dest, r); 732 emit_movrr(as, ir, dest, r);
671 } 733 }
@@ -682,20 +744,25 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
682 if (ra_noreg(left)) { 744 if (ra_noreg(left)) {
683 if (irref_isk(lref)) { 745 if (irref_isk(lref)) {
684 if (ir->o == IR_KNUM) { 746 if (ir->o == IR_KNUM) {
685 cTValue *tv = ir_knum(ir);
686 /* FP remat needs a load except for +0. Still better than eviction. */ 747 /* FP remat needs a load except for +0. Still better than eviction. */
687 if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { 748 if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) {
688 emit_loadn(as, dest, tv); 749 emit_loadk64(as, dest, ir);
689 return; 750 return;
690 } 751 }
691#if LJ_64 752#if LJ_64
692 } else if (ir->o == IR_KINT64) { 753 } else if (ir->o == IR_KINT64) {
693 emit_loadu64(as, dest, ir_kint64(ir)->u64); 754 emit_loadk64(as, dest, ir);
755 return;
756#if LJ_GC64
757 } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) {
758 emit_loadk64(as, dest, ir);
694 return; 759 return;
695#endif 760#endif
696 } else { 761#endif
697 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 762 } else if (ir->o != IR_KPRI) {
698 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 763 lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
764 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
765 "K%03d has bad IR op %d", REF_BIAS - lref, ir->o);
699 emit_loadi(as, dest, ir->i); 766 emit_loadi(as, dest, ir->i);
700 return; 767 return;
701 } 768 }
@@ -837,11 +904,14 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref)
837#endif 904#endif
838 { /* Allocate stored values for TNEW, TDUP and CNEW. */ 905 { /* Allocate stored values for TNEW, TDUP and CNEW. */
839 IRIns *irs; 906 IRIns *irs;
840 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW); 907 lj_assertA(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW,
908 "sink of IR %04d has bad op %d", ref - REF_BIAS, ir->o);
841 for (irs = IR(as->snapref-1); irs > ir; irs--) 909 for (irs = IR(as->snapref-1); irs > ir; irs--)
842 if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) { 910 if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) {
843 lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || 911 lj_assertA(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
844 irs->o == IR_FSTORE || irs->o == IR_XSTORE); 912 irs->o == IR_FSTORE || irs->o == IR_XSTORE,
913 "sunk store IR %04d has bad op %d",
914 (int)(irs - as->ir) - REF_BIAS, irs->o);
845 asm_snap_alloc1(as, irs->op2); 915 asm_snap_alloc1(as, irs->op2);
846 if (LJ_32 && (irs+1)->o == IR_HIOP) 916 if (LJ_32 && (irs+1)->o == IR_HIOP)
847 asm_snap_alloc1(as, (irs+1)->op2); 917 asm_snap_alloc1(as, (irs+1)->op2);
@@ -888,7 +958,9 @@ static void asm_snap_alloc(ASMState *as)
888 if (!irref_isk(ref)) { 958 if (!irref_isk(ref)) {
889 asm_snap_alloc1(as, ref); 959 asm_snap_alloc1(as, ref);
890 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { 960 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
891 lua_assert(irt_type(IR(ref+1)->t) == IRT_SOFTFP); 961 lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP,
962 "snap %d[%d] points to bad SOFTFP IR %04d",
963 as->snapno, n, ref - REF_BIAS);
892 asm_snap_alloc1(as, ref+1); 964 asm_snap_alloc1(as, ref+1);
893 } 965 }
894 } 966 }
@@ -934,7 +1006,7 @@ static void asm_snap_prep(ASMState *as)
934 } else { 1006 } else {
935 /* Process any renames above the highwater mark. */ 1007 /* Process any renames above the highwater mark. */
936 for (; as->snaprename < as->T->nins; as->snaprename++) { 1008 for (; as->snaprename < as->T->nins; as->snaprename++) {
937 IRIns *ir = IR(as->snaprename); 1009 IRIns *ir = &as->T->ir[as->snaprename];
938 if (asm_snap_checkrename(as, ir->op1)) 1010 if (asm_snap_checkrename(as, ir->op1))
939 ir->op2 = REF_BIAS-1; /* Kill rename. */ 1011 ir->op2 = REF_BIAS-1; /* Kill rename. */
940 } 1012 }
@@ -943,44 +1015,6 @@ static void asm_snap_prep(ASMState *as)
943 1015
944/* -- Miscellaneous helpers ----------------------------------------------- */ 1016/* -- Miscellaneous helpers ----------------------------------------------- */
945 1017
946/* Collect arguments from CALL* and CARG instructions. */
947static void asm_collectargs(ASMState *as, IRIns *ir,
948 const CCallInfo *ci, IRRef *args)
949{
950 uint32_t n = CCI_NARGS(ci);
951 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
952 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
953 while (n-- > 1) {
954 ir = IR(ir->op1);
955 lua_assert(ir->o == IR_CARG);
956 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
957 }
958 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
959 lua_assert(IR(ir->op1)->o != IR_CARG);
960}
961
962/* Reconstruct CCallInfo flags for CALLX*. */
963static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
964{
965 uint32_t nargs = 0;
966 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
967 IRIns *ira = IR(ir->op1);
968 nargs++;
969 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
970 }
971#if LJ_HASFFI
972 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
973 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
974 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
975 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
976#if LJ_TARGET_X86
977 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
978#endif
979 }
980#endif
981 return (nargs | (ir->t.irt << CCI_OTSHIFT));
982}
983
984/* Calculate stack adjustment. */ 1018/* Calculate stack adjustment. */
985static int32_t asm_stack_adjust(ASMState *as) 1019static int32_t asm_stack_adjust(ASMState *as)
986{ 1020{
@@ -990,21 +1024,26 @@ static int32_t asm_stack_adjust(ASMState *as)
990} 1024}
991 1025
992/* Must match with hash*() in lj_tab.c. */ 1026/* Must match with hash*() in lj_tab.c. */
993static uint32_t ir_khash(IRIns *ir) 1027static uint32_t ir_khash(ASMState *as, IRIns *ir)
994{ 1028{
995 uint32_t lo, hi; 1029 uint32_t lo, hi;
1030 UNUSED(as);
996 if (irt_isstr(ir->t)) { 1031 if (irt_isstr(ir->t)) {
997 return ir_kstr(ir)->hash; 1032 return ir_kstr(ir)->sid;
998 } else if (irt_isnum(ir->t)) { 1033 } else if (irt_isnum(ir->t)) {
999 lo = ir_knum(ir)->u32.lo; 1034 lo = ir_knum(ir)->u32.lo;
1000 hi = ir_knum(ir)->u32.hi << 1; 1035 hi = ir_knum(ir)->u32.hi << 1;
1001 } else if (irt_ispri(ir->t)) { 1036 } else if (irt_ispri(ir->t)) {
1002 lua_assert(!irt_isnil(ir->t)); 1037 lj_assertA(!irt_isnil(ir->t), "hash of nil key");
1003 return irt_type(ir->t)-IRT_FALSE; 1038 return irt_type(ir->t)-IRT_FALSE;
1004 } else { 1039 } else {
1005 lua_assert(irt_isgcv(ir->t)); 1040 lj_assertA(irt_isgcv(ir->t), "hash of bad IR type %d", irt_type(ir->t));
1006 lo = u32ptr(ir_kgc(ir)); 1041 lo = u32ptr(ir_kgc(ir));
1042#if LJ_GC64
1043 hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15);
1044#else
1007 hi = lo + HASH_BIAS; 1045 hi = lo + HASH_BIAS;
1046#endif
1008 } 1047 }
1009 return hashrot(lo, hi); 1048 return hashrot(lo, hi);
1010} 1049}
@@ -1065,6 +1104,237 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
1065 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ 1104 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */
1066} 1105}
1067 1106
1107/* -- Buffer operations --------------------------------------------------- */
1108
1109static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
1110
1111static void asm_bufhdr(ASMState *as, IRIns *ir)
1112{
1113 Reg sb = ra_dest(as, ir, RSET_GPR);
1114 if ((ir->op2 & IRBUFHDR_APPEND)) {
1115 /* Rematerialize const buffer pointer instead of likely spill. */
1116 IRIns *irp = IR(ir->op1);
1117 if (!(ra_hasreg(irp->r) || irp == ir-1 ||
1118 (irp == ir-2 && !ra_used(ir-1)))) {
1119 while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
1120 irp = IR(irp->op1);
1121 if (irref_isk(irp->op1)) {
1122 ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
1123 ir = irp;
1124 }
1125 }
1126 } else {
1127 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
1128 /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */
1129 emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
1130 emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
1131 }
1132#if LJ_TARGET_X86ORX64
1133 ra_left(as, sb, ir->op1);
1134#else
1135 ra_leftov(as, sb, ir->op1);
1136#endif
1137}
1138
1139static void asm_bufput(ASMState *as, IRIns *ir)
1140{
1141 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1142 IRRef args[3];
1143 IRIns *irs;
1144 int kchar = -129;
1145 args[0] = ir->op1; /* SBuf * */
1146 args[1] = ir->op2; /* GCstr * */
1147 irs = IR(ir->op2);
1148 lj_assertA(irt_isstr(irs->t),
1149 "BUFPUT of non-string IR %04d", ir->op2 - REF_BIAS);
1150 if (irs->o == IR_KGC) {
1151 GCstr *s = ir_kstr(irs);
1152 if (s->len == 1) { /* Optimize put of single-char string constant. */
1153 kchar = (int8_t)strdata(s)[0]; /* Signed! */
1154 args[1] = ASMREF_TMP1; /* int, truncated to char */
1155 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1156 }
1157 } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
1158 if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */
1159 if (irs->op2 == IRTOSTR_NUM) {
1160 args[1] = ASMREF_TMP1; /* TValue * */
1161 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
1162 } else {
1163 lj_assertA(irt_isinteger(IR(irs->op1)->t),
1164 "TOSTR of non-numeric IR %04d", irs->op1);
1165 args[1] = irs->op1; /* int */
1166 if (irs->op2 == IRTOSTR_INT)
1167 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
1168 else
1169 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1170 }
1171 } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */
1172 args[1] = irs->op1; /* const void * */
1173 args[2] = irs->op2; /* MSize */
1174 ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
1175 }
1176 }
1177 asm_setupresult(as, ir, ci); /* SBuf * */
1178 asm_gencall(as, ci, args);
1179 if (args[1] == ASMREF_TMP1) {
1180 Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
1181 if (kchar == -129)
1182 asm_tvptr(as, tmp, irs->op1);
1183 else
1184 ra_allockreg(as, kchar, tmp);
1185 }
1186}
1187
1188static void asm_bufstr(ASMState *as, IRIns *ir)
1189{
1190 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
1191 IRRef args[1];
1192 args[0] = ir->op1; /* SBuf *sb */
1193 as->gcsteps++;
1194 asm_setupresult(as, ir, ci); /* GCstr * */
1195 asm_gencall(as, ci, args);
1196}
1197
1198/* -- Type conversions ---------------------------------------------------- */
1199
1200static void asm_tostr(ASMState *as, IRIns *ir)
1201{
1202 const CCallInfo *ci;
1203 IRRef args[2];
1204 args[0] = ASMREF_L;
1205 as->gcsteps++;
1206 if (ir->op2 == IRTOSTR_NUM) {
1207 args[1] = ASMREF_TMP1; /* cTValue * */
1208 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
1209 } else {
1210 args[1] = ir->op1; /* int32_t k */
1211 if (ir->op2 == IRTOSTR_INT)
1212 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
1213 else
1214 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
1215 }
1216 asm_setupresult(as, ir, ci); /* GCstr * */
1217 asm_gencall(as, ci, args);
1218 if (ir->op2 == IRTOSTR_NUM)
1219 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
1220}
1221
1222#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
1223static void asm_conv64(ASMState *as, IRIns *ir)
1224{
1225 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1226 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1227 IRCallID id;
1228 IRRef args[2];
1229 lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP,
1230 "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS);
1231 args[LJ_BE] = (ir-1)->op1;
1232 args[LJ_LE] = ir->op1;
1233 if (st == IRT_NUM || st == IRT_FLOAT) {
1234 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
1235 ir--;
1236 } else {
1237 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1238 }
1239 {
1240#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1241 CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
1242 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
1243#else
1244 const CCallInfo *ci = &lj_ir_callinfo[id];
1245#endif
1246 asm_setupresult(as, ir, ci);
1247 asm_gencall(as, ci, args);
1248 }
1249}
1250#endif
1251
1252/* -- Memory references --------------------------------------------------- */
1253
1254static void asm_newref(ASMState *as, IRIns *ir)
1255{
1256 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1257 IRRef args[3];
1258 if (ir->r == RID_SINK)
1259 return;
1260 args[0] = ASMREF_L; /* lua_State *L */
1261 args[1] = ir->op1; /* GCtab *t */
1262 args[2] = ASMREF_TMP1; /* cTValue *key */
1263 asm_setupresult(as, ir, ci); /* TValue * */
1264 asm_gencall(as, ci, args);
1265 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
1266}
1267
1268static void asm_lref(ASMState *as, IRIns *ir)
1269{
1270 Reg r = ra_dest(as, ir, RSET_GPR);
1271#if LJ_TARGET_X86ORX64
1272 ra_left(as, r, ASMREF_L);
1273#else
1274 ra_leftov(as, r, ASMREF_L);
1275#endif
1276}
1277
1278/* -- Calls --------------------------------------------------------------- */
1279
1280/* Collect arguments from CALL* and CARG instructions. */
1281static void asm_collectargs(ASMState *as, IRIns *ir,
1282 const CCallInfo *ci, IRRef *args)
1283{
1284 uint32_t n = CCI_XNARGS(ci);
1285 /* Account for split args. */
1286 lj_assertA(n <= CCI_NARGS_MAX*2, "too many args %d to collect", n);
1287 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1288 while (n-- > 1) {
1289 ir = IR(ir->op1);
1290 lj_assertA(ir->o == IR_CARG, "malformed CALL arg tree");
1291 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
1292 }
1293 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
1294 lj_assertA(IR(ir->op1)->o != IR_CARG, "malformed CALL arg tree");
1295}
1296
1297/* Reconstruct CCallInfo flags for CALLX*. */
1298static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
1299{
1300 uint32_t nargs = 0;
1301 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
1302 IRIns *ira = IR(ir->op1);
1303 nargs++;
1304 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
1305 }
1306#if LJ_HASFFI
1307 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
1308 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
1309 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
1310 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
1311#if LJ_TARGET_X86
1312 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
1313#endif
1314 }
1315#endif
1316 return (nargs | (ir->t.irt << CCI_OTSHIFT));
1317}
1318
1319static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
1320{
1321 const CCallInfo *ci = &lj_ir_callinfo[id];
1322 IRRef args[2];
1323 args[0] = ir->op1;
1324 args[1] = ir->op2;
1325 asm_setupresult(as, ir, ci);
1326 asm_gencall(as, ci, args);
1327}
1328
1329static void asm_call(ASMState *as, IRIns *ir)
1330{
1331 IRRef args[CCI_NARGS_MAX];
1332 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1333 asm_collectargs(as, ir, ci, args);
1334 asm_setupresult(as, ir, ci);
1335 asm_gencall(as, ci, args);
1336}
1337
1068/* -- PHI and loop handling ----------------------------------------------- */ 1338/* -- PHI and loop handling ----------------------------------------------- */
1069 1339
1070/* Break a PHI cycle by renaming to a free register (evict if needed). */ 1340/* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1250,12 +1520,7 @@ static void asm_phi_fixup(ASMState *as)
1250 irt_clearmark(ir->t); 1520 irt_clearmark(ir->t);
1251 /* Left PHI gained a spill slot before the loop? */ 1521 /* Left PHI gained a spill slot before the loop? */
1252 if (ra_hasspill(ir->s)) { 1522 if (ra_hasspill(ir->s)) {
1253 IRRef ren; 1523 ra_addrename(as, r, lref, as->loopsnapno);
1254 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
1255 ren = tref_ref(lj_ir_emit(as->J));
1256 as->ir = as->T->ir; /* The IR may have been reallocated. */
1257 IR(ren)->r = (uint8_t)r;
1258 IR(ren)->s = SPS_NONE;
1259 } 1524 }
1260 } 1525 }
1261 rset_clear(work, r); 1526 rset_clear(work, r);
@@ -1330,6 +1595,8 @@ static void asm_loop(ASMState *as)
1330#include "lj_asm_x86.h" 1595#include "lj_asm_x86.h"
1331#elif LJ_TARGET_ARM 1596#elif LJ_TARGET_ARM
1332#include "lj_asm_arm.h" 1597#include "lj_asm_arm.h"
1598#elif LJ_TARGET_ARM64
1599#include "lj_asm_arm64.h"
1333#elif LJ_TARGET_PPC 1600#elif LJ_TARGET_PPC
1334#include "lj_asm_ppc.h" 1601#include "lj_asm_ppc.h"
1335#elif LJ_TARGET_MIPS 1602#elif LJ_TARGET_MIPS
@@ -1338,6 +1605,203 @@ static void asm_loop(ASMState *as)
1338#error "Missing assembler for target CPU" 1605#error "Missing assembler for target CPU"
1339#endif 1606#endif
1340 1607
1608/* -- Common instruction helpers ------------------------------------------ */
1609
1610#if !LJ_SOFTFP32
1611#if !LJ_TARGET_X86ORX64
1612#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1613#define asm_fppowi(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1614#endif
1615
1616static void asm_pow(ASMState *as, IRIns *ir)
1617{
1618#if LJ_64 && LJ_HASFFI
1619 if (!irt_isnum(ir->t))
1620 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1621 IRCALL_lj_carith_powu64);
1622 else
1623#endif
1624 if (irt_isnum(IR(ir->op2)->t))
1625 asm_callid(as, ir, IRCALL_pow);
1626 else
1627 asm_fppowi(as, ir);
1628}
1629
1630static void asm_div(ASMState *as, IRIns *ir)
1631{
1632#if LJ_64 && LJ_HASFFI
1633 if (!irt_isnum(ir->t))
1634 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1635 IRCALL_lj_carith_divu64);
1636 else
1637#endif
1638 asm_fpdiv(as, ir);
1639}
1640#endif
1641
1642static void asm_mod(ASMState *as, IRIns *ir)
1643{
1644#if LJ_64 && LJ_HASFFI
1645 if (!irt_isint(ir->t))
1646 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1647 IRCALL_lj_carith_modu64);
1648 else
1649#endif
1650 asm_callid(as, ir, IRCALL_lj_vm_modi);
1651}
1652
1653static void asm_fuseequal(ASMState *as, IRIns *ir)
1654{
1655 /* Fuse HREF + EQ/NE. */
1656 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1657 as->curins--;
1658 asm_href(as, ir-1, (IROp)ir->o);
1659 } else {
1660 asm_equal(as, ir);
1661 }
1662}
1663
1664static void asm_alen(ASMState *as, IRIns *ir)
1665{
1666 asm_callid(as, ir, ir->op2 == REF_NIL ? IRCALL_lj_tab_len :
1667 IRCALL_lj_tab_len_hint);
1668}
1669
1670/* -- Instruction dispatch ------------------------------------------------ */
1671
1672/* Assemble a single instruction. */
1673static void asm_ir(ASMState *as, IRIns *ir)
1674{
1675 switch ((IROp)ir->o) {
1676 /* Miscellaneous ops. */
1677 case IR_LOOP: asm_loop(as); break;
1678 case IR_NOP: case IR_XBAR:
1679 lj_assertA(!ra_used(ir),
1680 "IR %04d not unused", (int)(ir - as->ir) - REF_BIAS);
1681 break;
1682 case IR_USE:
1683 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1684 case IR_PHI: asm_phi(as, ir); break;
1685 case IR_HIOP: asm_hiop(as, ir); break;
1686 case IR_GCSTEP: asm_gcstep(as, ir); break;
1687 case IR_PROF: asm_prof(as, ir); break;
1688
1689 /* Guarded assertions. */
1690 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1691 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1692 case IR_ABC:
1693 asm_comp(as, ir);
1694 break;
1695 case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break;
1696
1697 case IR_RETF: asm_retf(as, ir); break;
1698
1699 /* Bit ops. */
1700 case IR_BNOT: asm_bnot(as, ir); break;
1701 case IR_BSWAP: asm_bswap(as, ir); break;
1702 case IR_BAND: asm_band(as, ir); break;
1703 case IR_BOR: asm_bor(as, ir); break;
1704 case IR_BXOR: asm_bxor(as, ir); break;
1705 case IR_BSHL: asm_bshl(as, ir); break;
1706 case IR_BSHR: asm_bshr(as, ir); break;
1707 case IR_BSAR: asm_bsar(as, ir); break;
1708 case IR_BROL: asm_brol(as, ir); break;
1709 case IR_BROR: asm_bror(as, ir); break;
1710
1711 /* Arithmetic ops. */
1712 case IR_ADD: asm_add(as, ir); break;
1713 case IR_SUB: asm_sub(as, ir); break;
1714 case IR_MUL: asm_mul(as, ir); break;
1715 case IR_MOD: asm_mod(as, ir); break;
1716 case IR_NEG: asm_neg(as, ir); break;
1717#if LJ_SOFTFP32
1718 case IR_DIV: case IR_POW: case IR_ABS:
1719 case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
1720 /* Unused for LJ_SOFTFP32. */
1721 lj_assertA(0, "IR %04d with unused op %d",
1722 (int)(ir - as->ir) - REF_BIAS, ir->o);
1723 break;
1724#else
1725 case IR_DIV: asm_div(as, ir); break;
1726 case IR_POW: asm_pow(as, ir); break;
1727 case IR_ABS: asm_abs(as, ir); break;
1728 case IR_LDEXP: asm_ldexp(as, ir); break;
1729 case IR_FPMATH: asm_fpmath(as, ir); break;
1730 case IR_TOBIT: asm_tobit(as, ir); break;
1731#endif
1732 case IR_MIN: asm_min(as, ir); break;
1733 case IR_MAX: asm_max(as, ir); break;
1734
1735 /* Overflow-checking arithmetic ops. */
1736 case IR_ADDOV: asm_addov(as, ir); break;
1737 case IR_SUBOV: asm_subov(as, ir); break;
1738 case IR_MULOV: asm_mulov(as, ir); break;
1739
1740 /* Memory references. */
1741 case IR_AREF: asm_aref(as, ir); break;
1742 case IR_HREF: asm_href(as, ir, 0); break;
1743 case IR_HREFK: asm_hrefk(as, ir); break;
1744 case IR_NEWREF: asm_newref(as, ir); break;
1745 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1746 case IR_FREF: asm_fref(as, ir); break;
1747 case IR_STRREF: asm_strref(as, ir); break;
1748 case IR_LREF: asm_lref(as, ir); break;
1749
1750 /* Loads and stores. */
1751 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1752 asm_ahuvload(as, ir);
1753 break;
1754 case IR_FLOAD: asm_fload(as, ir); break;
1755 case IR_XLOAD: asm_xload(as, ir); break;
1756 case IR_SLOAD: asm_sload(as, ir); break;
1757 case IR_ALEN: asm_alen(as, ir); break;
1758
1759 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1760 case IR_FSTORE: asm_fstore(as, ir); break;
1761 case IR_XSTORE: asm_xstore(as, ir); break;
1762
1763 /* Allocations. */
1764 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1765 case IR_TNEW: asm_tnew(as, ir); break;
1766 case IR_TDUP: asm_tdup(as, ir); break;
1767 case IR_CNEW: case IR_CNEWI:
1768#if LJ_HASFFI
1769 asm_cnew(as, ir);
1770#else
1771 lj_assertA(0, "IR %04d with unused op %d",
1772 (int)(ir - as->ir) - REF_BIAS, ir->o);
1773#endif
1774 break;
1775
1776 /* Buffer operations. */
1777 case IR_BUFHDR: asm_bufhdr(as, ir); break;
1778 case IR_BUFPUT: asm_bufput(as, ir); break;
1779 case IR_BUFSTR: asm_bufstr(as, ir); break;
1780
1781 /* Write barriers. */
1782 case IR_TBAR: asm_tbar(as, ir); break;
1783 case IR_OBAR: asm_obar(as, ir); break;
1784
1785 /* Type conversions. */
1786 case IR_CONV: asm_conv(as, ir); break;
1787 case IR_TOSTR: asm_tostr(as, ir); break;
1788 case IR_STRTO: asm_strto(as, ir); break;
1789
1790 /* Calls. */
1791 case IR_CALLA:
1792 as->gcsteps++;
1793 /* fallthrough */
1794 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1795 case IR_CALLXS: asm_callx(as, ir); break;
1796 case IR_CARG: break;
1797
1798 default:
1799 setintV(&as->J->errinfo, ir->o);
1800 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1801 break;
1802 }
1803}
1804
1341/* -- Head of trace ------------------------------------------------------- */ 1805/* -- Head of trace ------------------------------------------------------- */
1342 1806
1343/* Head of a root trace. */ 1807/* Head of a root trace. */
@@ -1383,8 +1847,10 @@ static void asm_head_side(ASMState *as)
1383 for (i = as->stopins; i > REF_BASE; i--) { 1847 for (i = as->stopins; i > REF_BASE; i--) {
1384 IRIns *ir = IR(i); 1848 IRIns *ir = IR(i);
1385 RegSP rs; 1849 RegSP rs;
1386 lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || 1850 lj_assertA((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
1387 (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL); 1851 (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL,
1852 "IR %04d has bad parent op %d",
1853 (int)(ir - as->ir) - REF_BIAS, ir->o);
1388 rs = as->parentmap[i - REF_FIRST]; 1854 rs = as->parentmap[i - REF_FIRST];
1389 if (ra_hasreg(ir->r)) { 1855 if (ra_hasreg(ir->r)) {
1390 rset_clear(allow, ir->r); 1856 rset_clear(allow, ir->r);
@@ -1536,7 +2002,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
1536 SnapEntry sn = map[n-1]; 2002 SnapEntry sn = map[n-1];
1537 if ((sn & SNAP_FRAME)) { 2003 if ((sn & SNAP_FRAME)) {
1538 *gotframe = 1; 2004 *gotframe = 1;
1539 return snap_slot(sn); 2005 return snap_slot(sn) - LJ_FR2;
1540 } 2006 }
1541 } 2007 }
1542 return 0; 2008 return 0;
@@ -1556,19 +2022,23 @@ static void asm_tail_link(ASMState *as)
1556 2022
1557 if (as->T->link == 0) { 2023 if (as->T->link == 0) {
1558 /* Setup fixed registers for exit to interpreter. */ 2024 /* Setup fixed registers for exit to interpreter. */
1559 const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); 2025 const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]);
1560 int32_t mres; 2026 int32_t mres;
1561 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ 2027 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */
1562 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; 2028 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
1563 if (bc_isret(bc_op(*retpc))) 2029 if (bc_isret(bc_op(*retpc)))
1564 pc = retpc; 2030 pc = retpc;
1565 } 2031 }
2032#if LJ_GC64
2033 emit_loadu64(as, RID_LPC, u64ptr(pc));
2034#else
1566 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); 2035 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
1567 ra_allockreg(as, i32ptr(pc), RID_LPC); 2036 ra_allockreg(as, i32ptr(pc), RID_LPC);
1568 mres = (int32_t)(snap->nslots - baseslot); 2037#endif
2038 mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
1569 switch (bc_op(*pc)) { 2039 switch (bc_op(*pc)) {
1570 case BC_CALLM: case BC_CALLMT: 2040 case BC_CALLM: case BC_CALLMT:
1571 mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; 2041 mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
1572 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; 2042 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
1573 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; 2043 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
1574 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; 2044 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
@@ -1580,6 +2050,11 @@ static void asm_tail_link(ASMState *as)
1580 } 2050 }
1581 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); 2051 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
1582 2052
2053 if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */
2054 setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal));
2055 IR(as->J->ktrace)->o = IR_KGC;
2056 }
2057
1583 /* Sync the interpreter state with the on-trace state. */ 2058 /* Sync the interpreter state with the on-trace state. */
1584 asm_stack_restore(as, snap); 2059 asm_stack_restore(as, snap);
1585 2060
@@ -1605,17 +2080,23 @@ static void asm_setup_regsp(ASMState *as)
1605 ra_setup(as); 2080 ra_setup(as);
1606 2081
1607 /* Clear reg/sp for constants. */ 2082 /* Clear reg/sp for constants. */
1608 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) 2083 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
1609 ir->prev = REGSP_INIT; 2084 ir->prev = REGSP_INIT;
2085 if (irt_is64(ir->t) && ir->o != IR_KNULL) {
2086#if LJ_GC64
2087 /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
2088 ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */
2089#else
2090 /* Make life easier for backends by putting address of constant in i. */
2091 ir->i = (int32_t)(intptr_t)(ir+1);
2092#endif
2093 ir++;
2094 }
2095 }
1610 2096
1611 /* REF_BASE is used for implicit references to the BASE register. */ 2097 /* REF_BASE is used for implicit references to the BASE register. */
1612 lastir->prev = REGSP_HINT(RID_BASE); 2098 lastir->prev = REGSP_HINT(RID_BASE);
1613 2099
1614 ir = IR(nins-1);
1615 if (ir->o == IR_RENAME) {
1616 do { ir--; nins--; } while (ir->o == IR_RENAME);
1617 T->nins = nins; /* Remove any renames left over from ASM restart. */
1618 }
1619 as->snaprename = nins; 2100 as->snaprename = nins;
1620 as->snapref = nins; 2101 as->snapref = nins;
1621 as->snapno = T->nsnap; 2102 as->snapno = T->nsnap;
@@ -1628,7 +2109,7 @@ static void asm_setup_regsp(ASMState *as)
1628 ir = IR(REF_FIRST); 2109 ir = IR(REF_FIRST);
1629 if (as->parent) { 2110 if (as->parent) {
1630 uint16_t *p; 2111 uint16_t *p;
1631 lastir = lj_snap_regspmap(as->parent, as->J->exitno, ir); 2112 lastir = lj_snap_regspmap(as->J, as->parent, as->J->exitno, ir);
1632 if (lastir - ir > LJ_MAX_JSLOTS) 2113 if (lastir - ir > LJ_MAX_JSLOTS)
1633 lj_trace_err(as->J, LJ_TRERR_NYICOAL); 2114 lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1634 as->stopins = (IRRef)((lastir-1) - as->ir); 2115 as->stopins = (IRRef)((lastir-1) - as->ir);
@@ -1676,7 +2157,7 @@ static void asm_setup_regsp(ASMState *as)
1676 as->modset |= RSET_SCRATCH; 2157 as->modset |= RSET_SCRATCH;
1677 continue; 2158 continue;
1678 } 2159 }
1679 case IR_CALLN: case IR_CALLL: case IR_CALLS: { 2160 case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
1680 const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; 2161 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1681 ir->prev = asm_setup_call_slots(as, ir, ci); 2162 ir->prev = asm_setup_call_slots(as, ir, ci);
1682 if (inloop) 2163 if (inloop)
@@ -1701,8 +2182,8 @@ static void asm_setup_regsp(ASMState *as)
1701 ir->prev = REGSP_HINT(RID_FPRET); 2182 ir->prev = REGSP_HINT(RID_FPRET);
1702 continue; 2183 continue;
1703 } 2184 }
1704 /* fallthrough */
1705#endif 2185#endif
2186 /* fallthrough */
1706 case IR_CALLN: case IR_CALLXS: 2187 case IR_CALLN: case IR_CALLXS:
1707#if LJ_SOFTFP 2188#if LJ_SOFTFP
1708 case IR_MIN: case IR_MAX: 2189 case IR_MIN: case IR_MAX:
@@ -1721,11 +2202,23 @@ static void asm_setup_regsp(ASMState *as)
1721#endif 2202#endif
1722 /* fallthrough */ 2203 /* fallthrough */
1723 /* C calls evict all scratch regs and return results in RID_RET. */ 2204 /* C calls evict all scratch regs and return results in RID_RET. */
1724 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: 2205 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
1725 if (REGARG_NUMGPR < 3 && as->evenspill < 3) 2206 if (REGARG_NUMGPR < 3 && as->evenspill < 3)
1726 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ 2207 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
2208#if LJ_TARGET_X86 && LJ_HASFFI
2209 if (0) {
2210 case IR_CNEW:
2211 if (ir->op2 != REF_NIL && as->evenspill < 4)
2212 as->evenspill = 4; /* lj_cdata_newv needs 4 args. */
2213 }
1727 /* fallthrough */ 2214 /* fallthrough */
1728 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: 2215#else
2216 /* fallthrough */
2217 case IR_CNEW:
2218#endif
2219 /* fallthrough */
2220 case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
2221 case IR_BUFSTR:
1729 ir->prev = REGSP_HINT(RID_RET); 2222 ir->prev = REGSP_HINT(RID_RET);
1730 if (inloop) 2223 if (inloop)
1731 as->modset = RSET_SCRATCH; 2224 as->modset = RSET_SCRATCH;
@@ -1734,21 +2227,26 @@ static void asm_setup_regsp(ASMState *as)
1734 if (inloop) 2227 if (inloop)
1735 as->modset = RSET_SCRATCH; 2228 as->modset = RSET_SCRATCH;
1736 break; 2229 break;
1737#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP 2230#if !LJ_SOFTFP
1738 case IR_ATAN2: case IR_LDEXP: 2231#if !LJ_TARGET_X86ORX64
2232 case IR_LDEXP:
2233#endif
1739#endif 2234#endif
2235 /* fallthrough */
1740 case IR_POW: 2236 case IR_POW:
1741 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 2237 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1742#if LJ_TARGET_X86ORX64
1743 ir->prev = REGSP_HINT(RID_XMM0);
1744 if (inloop) 2238 if (inloop)
1745 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); 2239 as->modset |= RSET_SCRATCH;
2240#if LJ_TARGET_X86
2241 if (irt_isnum(IR(ir->op2)->t)) {
2242 if (as->evenspill < 4) /* Leave room to call pow(). */
2243 as->evenspill = 4;
2244 }
2245 break;
1746#else 2246#else
1747 ir->prev = REGSP_HINT(RID_FPRET); 2247 ir->prev = REGSP_HINT(RID_FPRET);
1748 if (inloop)
1749 as->modset |= RSET_SCRATCH;
1750#endif
1751 continue; 2248 continue;
2249#endif
1752 } 2250 }
1753 /* fallthrough */ /* for integer POW */ 2251 /* fallthrough */ /* for integer POW */
1754 case IR_DIV: case IR_MOD: 2252 case IR_DIV: case IR_MOD:
@@ -1761,31 +2259,31 @@ static void asm_setup_regsp(ASMState *as)
1761 break; 2259 break;
1762 case IR_FPMATH: 2260 case IR_FPMATH:
1763#if LJ_TARGET_X86ORX64 2261#if LJ_TARGET_X86ORX64
1764 if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ 2262 if (ir->op2 <= IRFPM_TRUNC) {
1765 ir->prev = REGSP_HINT(RID_XMM0); 2263 if (!(as->flags & JIT_F_SSE4_1)) {
1766#if !LJ_64 2264 ir->prev = REGSP_HINT(RID_XMM0);
1767 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ 2265 if (inloop)
1768 as->evenspill = 4; 2266 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
1769#endif 2267 continue;
1770 if (inloop) 2268 }
1771 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); 2269 break;
1772 continue;
1773 } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
1774 ir->prev = REGSP_HINT(RID_XMM0);
1775 if (inloop)
1776 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
1777 continue;
1778 } 2270 }
2271#endif
2272 if (inloop)
2273 as->modset |= RSET_SCRATCH;
2274#if LJ_TARGET_X86
1779 break; 2275 break;
1780#else 2276#else
1781 ir->prev = REGSP_HINT(RID_FPRET); 2277 ir->prev = REGSP_HINT(RID_FPRET);
1782 if (inloop)
1783 as->modset |= RSET_SCRATCH;
1784 continue; 2278 continue;
1785#endif 2279#endif
1786#if LJ_TARGET_X86ORX64 2280#if LJ_TARGET_X86ORX64
1787 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ 2281 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */
1788 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: 2282 case IR_BSHL: case IR_BSHR: case IR_BSAR:
2283 if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */
2284 break;
2285 /* fallthrough */
2286 case IR_BROL: case IR_BROR:
1789 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { 2287 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
1790 IR(ir->op2)->r = REGSP_HINT(RID_ECX); 2288 IR(ir->op2)->r = REGSP_HINT(RID_ECX);
1791 if (inloop) 2289 if (inloop)
@@ -1831,14 +2329,25 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1831 ASMState *as = &as_; 2329 ASMState *as = &as_;
1832 MCode *origtop; 2330 MCode *origtop;
1833 2331
2332 /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
2333 {
2334 IRRef nins = T->nins;
2335 IRIns *ir = &T->ir[nins-1];
2336 if (ir->o == IR_NOP || ir->o == IR_RENAME) {
2337 do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME);
2338 T->nins = nins;
2339 }
2340 }
2341
1834 /* Ensure an initialized instruction beyond the last one for HIOP checks. */ 2342 /* Ensure an initialized instruction beyond the last one for HIOP checks. */
1835 J->cur.nins = lj_ir_nextins(J); 2343 /* This also allows one RENAME to be added without reallocating curfinal. */
1836 lj_ir_nop(&J->cur.ir[J->cur.nins]); 2344 as->orignins = lj_ir_nextins(J);
2345 lj_ir_nop(&J->cur.ir[as->orignins]);
1837 2346
1838 /* Setup initial state. Copy some fields to reduce indirections. */ 2347 /* Setup initial state. Copy some fields to reduce indirections. */
1839 as->J = J; 2348 as->J = J;
1840 as->T = T; 2349 as->T = T;
1841 as->ir = T->ir; 2350 J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */
1842 as->flags = J->flags; 2351 as->flags = J->flags;
1843 as->loopref = J->loopref; 2352 as->loopref = J->loopref;
1844 as->realign = NULL; 2353 as->realign = NULL;
@@ -1851,12 +2360,41 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1851 as->mclim = as->mcbot + MCLIM_REDZONE; 2360 as->mclim = as->mcbot + MCLIM_REDZONE;
1852 asm_setup_target(as); 2361 asm_setup_target(as);
1853 2362
1854 do { 2363 /*
2364 ** This is a loop, because the MCode may have to be (re-)assembled
2365 ** multiple times:
2366 **
2367 ** 1. as->realign is set (and the assembly aborted), if the arch-specific
2368 ** backend wants the MCode to be aligned differently.
2369 **
2370 ** This is currently only the case on x86/x64, where small loops get
2371 ** an aligned loop body plus a short branch. Not much effort is wasted,
2372 ** because the abort happens very quickly and only once.
2373 **
2374 ** 2. The IR is immovable, since the MCode embeds pointers to various
2375 ** constants inside the IR. But RENAMEs may need to be added to the IR
2376 ** during assembly, which might grow and reallocate the IR. We check
2377 ** at the end if the IR (in J->cur.ir) has actually grown, resize the
2378 ** copy (in J->curfinal.ir) and try again.
2379 **
2380 ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have
2381 ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to
2382 ** always have one spare slot in the IR (see above), which means we
2383 ** have to redo the assembly for only ~2% of all traces.
2384 **
2385 ** Very, very rarely, this needs to be done repeatedly, since the
2386 ** location of constants inside the IR (actually, reachability from
2387 ** a global pointer) may affect register allocation and thus the
2388 ** number of RENAMEs.
2389 */
2390 for (;;) {
1855 as->mcp = as->mctop; 2391 as->mcp = as->mctop;
1856#ifdef LUA_USE_ASSERT 2392#ifdef LUA_USE_ASSERT
1857 as->mcp_prev = as->mcp; 2393 as->mcp_prev = as->mcp;
1858#endif 2394#endif
1859 as->curins = T->nins; 2395 as->ir = J->curfinal->ir; /* Use the copied IR. */
2396 as->curins = J->cur.nins = as->orignins;
2397
1860 RA_DBG_START(); 2398 RA_DBG_START();
1861 RA_DBGX((as, "===== STOP =====")); 2399 RA_DBGX((as, "===== STOP ====="));
1862 2400
@@ -1875,7 +2413,10 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1875 /* Assemble a trace in linear backwards order. */ 2413 /* Assemble a trace in linear backwards order. */
1876 for (as->curins--; as->curins > as->stopins; as->curins--) { 2414 for (as->curins--; as->curins > as->stopins; as->curins--) {
1877 IRIns *ir = IR(as->curins); 2415 IRIns *ir = IR(as->curins);
1878 lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ 2416 /* 64 bit types handled by SPLIT for 32 bit archs. */
2417 lj_assertA(!(LJ_32 && irt_isint64(ir->t)),
2418 "IR %04d has unsplit 64 bit type",
2419 (int)(ir - as->ir) - REF_BIAS);
1879 if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) 2420 if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE))
1880 continue; /* Dead-code elimination can be soooo easy. */ 2421 continue; /* Dead-code elimination can be soooo easy. */
1881 if (irt_isguard(ir->t)) 2422 if (irt_isguard(ir->t))
@@ -1884,22 +2425,40 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1884 checkmclim(as); 2425 checkmclim(as);
1885 asm_ir(as, ir); 2426 asm_ir(as, ir);
1886 } 2427 }
1887 } while (as->realign); /* Retry in case the MCode needs to be realigned. */
1888 2428
1889 /* Emit head of trace. */ 2429 if (as->realign && J->curfinal->nins >= T->nins)
1890 RA_DBG_REF(); 2430 continue; /* Retry in case only the MCode needs to be realigned. */
1891 checkmclim(as); 2431
1892 if (as->gcsteps > 0) { 2432 /* Emit head of trace. */
1893 as->curins = as->T->snap[0].ref; 2433 RA_DBG_REF();
1894 asm_snap_prep(as); /* The GC check is a guard. */ 2434 checkmclim(as);
1895 asm_gc_check(as); 2435 if (as->gcsteps > 0) {
2436 as->curins = as->T->snap[0].ref;
2437 asm_snap_prep(as); /* The GC check is a guard. */
2438 asm_gc_check(as);
2439 as->curins = as->stopins;
2440 }
2441 ra_evictk(as);
2442 if (as->parent)
2443 asm_head_side(as);
2444 else
2445 asm_head_root(as);
2446 asm_phi_fixup(as);
2447
2448 if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */
2449 lj_assertA(J->curfinal->nk == T->nk, "unexpected IR constant growth");
2450 memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
2451 (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */
2452 T->nins = J->curfinal->nins;
2453 break; /* Done. */
2454 }
2455
2456 /* Otherwise try again with a bigger IR. */
2457 lj_trace_free(J2G(J), J->curfinal);
2458 J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */
2459 J->curfinal = lj_trace_alloc(J->L, T);
2460 as->realign = NULL;
1896 } 2461 }
1897 ra_evictk(as);
1898 if (as->parent)
1899 asm_head_side(as);
1900 else
1901 asm_head_root(as);
1902 asm_phi_fixup(as);
1903 2462
1904 RA_DBGX((as, "===== START ====")); 2463 RA_DBGX((as, "===== START ===="));
1905 RA_DBG_FLUSH(); 2464 RA_DBG_FLUSH();
@@ -1912,6 +2471,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1912 if (!as->loopref) 2471 if (!as->loopref)
1913 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ 2472 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
1914 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); 2473 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
2474#if LJ_TARGET_MCODE_FIXUP
2475 asm_mcode_fixup(T->mcode, T->szmcode);
2476#endif
1915 lj_mcode_sync(T->mcode, origtop); 2477 lj_mcode_sync(T->mcode, origtop);
1916} 2478}
1917 2479