aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_asm.c')
-rw-r--r--src/lj_asm.c1021
1 files changed, 838 insertions, 183 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 9ff9215f..7abafbf4 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -11,6 +11,7 @@
11#if LJ_HASJIT 11#if LJ_HASJIT
12 12
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_buf.h"
14#include "lj_str.h" 15#include "lj_str.h"
15#include "lj_tab.h" 16#include "lj_tab.h"
16#include "lj_frame.h" 17#include "lj_frame.h"
@@ -71,6 +72,7 @@ typedef struct ASMState {
71 IRRef snaprename; /* Rename highwater mark for snapshot check. */ 72 IRRef snaprename; /* Rename highwater mark for snapshot check. */
72 SnapNo snapno; /* Current snapshot number. */ 73 SnapNo snapno; /* Current snapshot number. */
73 SnapNo loopsnapno; /* Loop snapshot number. */ 74 SnapNo loopsnapno; /* Loop snapshot number. */
75 int snapalloc; /* Current snapshot needs allocation. */
74 BloomFilter snapfilt1, snapfilt2; /* Filled with snapshot refs. */ 76 BloomFilter snapfilt1, snapfilt2; /* Filled with snapshot refs. */
75 77
76 IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */ 78 IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */
@@ -85,18 +87,25 @@ typedef struct ASMState {
85 87
86 MCode *mcbot; /* Bottom of reserved MCode. */ 88 MCode *mcbot; /* Bottom of reserved MCode. */
87 MCode *mctop; /* Top of generated MCode. */ 89 MCode *mctop; /* Top of generated MCode. */
90 MCode *mctoporig; /* Original top of generated MCode. */
88 MCode *mcloop; /* Pointer to loop MCode (or NULL). */ 91 MCode *mcloop; /* Pointer to loop MCode (or NULL). */
89 MCode *invmcp; /* Points to invertible loop branch (or NULL). */ 92 MCode *invmcp; /* Points to invertible loop branch (or NULL). */
90 MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ 93 MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */
91 MCode *realign; /* Realign loop if not NULL. */ 94 MCode *realign; /* Realign loop if not NULL. */
92 95
93#ifdef RID_NUM_KREF 96#ifdef RID_NUM_KREF
94 int32_t krefk[RID_NUM_KREF]; 97 intptr_t krefk[RID_NUM_KREF];
95#endif 98#endif
96 IRRef1 phireg[RID_MAX]; /* PHI register references. */ 99 IRRef1 phireg[RID_MAX]; /* PHI register references. */
97 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ 100 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */
98} ASMState; 101} ASMState;
99 102
103#ifdef LUA_USE_ASSERT
104#define lj_assertA(c, ...) lj_assertG_(J2G(as->J), (c), __VA_ARGS__)
105#else
106#define lj_assertA(c, ...) ((void)as)
107#endif
108
100#define IR(ref) (&as->ir[(ref)]) 109#define IR(ref) (&as->ir[(ref)])
101 110
102#define ASMREF_TMP1 REF_TRUE /* Temp. register. */ 111#define ASMREF_TMP1 REF_TRUE /* Temp. register. */
@@ -128,9 +137,8 @@ static LJ_AINLINE void checkmclim(ASMState *as)
128#ifdef LUA_USE_ASSERT 137#ifdef LUA_USE_ASSERT
129 if (as->mcp + MCLIM_REDZONE < as->mcp_prev) { 138 if (as->mcp + MCLIM_REDZONE < as->mcp_prev) {
130 IRIns *ir = IR(as->curins+1); 139 IRIns *ir = IR(as->curins+1);
131 fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d %02d %04d %04d\n", as->mcp, 140 lj_assertA(0, "red zone overflow: %p IR %04d %02d %04d %04d\n", as->mcp,
132 as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); 141 as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS);
133 lua_assert(0);
134 } 142 }
135#endif 143#endif
136 if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as); 144 if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as);
@@ -144,7 +152,7 @@ static LJ_AINLINE void checkmclim(ASMState *as)
144#define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) 152#define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref)))
145#define ra_krefk(as, ref) (as->krefk[(ref)]) 153#define ra_krefk(as, ref) (as->krefk[(ref)])
146 154
147static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) 155static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k)
148{ 156{
149 IRRef ref = (IRRef)(r - RID_MIN_KREF); 157 IRRef ref = (IRRef)(r - RID_MIN_KREF);
150 as->krefk[ref] = k; 158 as->krefk[ref] = k;
@@ -171,6 +179,8 @@ IRFLDEF(FLOFS)
171#include "lj_emit_x86.h" 179#include "lj_emit_x86.h"
172#elif LJ_TARGET_ARM 180#elif LJ_TARGET_ARM
173#include "lj_emit_arm.h" 181#include "lj_emit_arm.h"
182#elif LJ_TARGET_ARM64
183#include "lj_emit_arm64.h"
174#elif LJ_TARGET_PPC 184#elif LJ_TARGET_PPC
175#include "lj_emit_ppc.h" 185#include "lj_emit_ppc.h"
176#elif LJ_TARGET_MIPS 186#elif LJ_TARGET_MIPS
@@ -179,6 +189,12 @@ IRFLDEF(FLOFS)
179#error "Missing instruction emitter for target CPU" 189#error "Missing instruction emitter for target CPU"
180#endif 190#endif
181 191
192/* Generic load/store of register from/to stack slot. */
193#define emit_spload(as, ir, r, ofs) \
194 emit_loadofs(as, ir, (r), RID_SP, (ofs))
195#define emit_spstore(as, ir, r, ofs) \
196 emit_storeofs(as, ir, (r), RID_SP, (ofs))
197
182/* -- Register allocator debugging ---------------------------------------- */ 198/* -- Register allocator debugging ---------------------------------------- */
183 199
184/* #define LUAJIT_DEBUG_RA */ 200/* #define LUAJIT_DEBUG_RA */
@@ -236,7 +252,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...)
236 *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q; 252 *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q;
237 } else { 253 } else {
238 *p++ = '?'; 254 *p++ = '?';
239 lua_assert(0); 255 lj_assertA(0, "bad register %d for debug format \"%s\"", r, fmt);
240 } 256 }
241 } else if (e[1] == 'f' || e[1] == 'i') { 257 } else if (e[1] == 'f' || e[1] == 'i') {
242 IRRef ref; 258 IRRef ref;
@@ -254,7 +270,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...)
254 } else if (e[1] == 'x') { 270 } else if (e[1] == 'x') {
255 p += sprintf(p, "%08x", va_arg(argp, int32_t)); 271 p += sprintf(p, "%08x", va_arg(argp, int32_t));
256 } else { 272 } else {
257 lua_assert(0); 273 lj_assertA(0, "bad debug format code");
258 } 274 }
259 fmt = e+2; 275 fmt = e+2;
260 } 276 }
@@ -313,37 +329,51 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
313 Reg r; 329 Reg r;
314 if (ra_iskref(ref)) { 330 if (ra_iskref(ref)) {
315 r = ra_krefreg(ref); 331 r = ra_krefreg(ref);
316 lua_assert(!rset_test(as->freeset, r)); 332 lj_assertA(!rset_test(as->freeset, r), "rematk of free reg %d", r);
317 ra_free(as, r); 333 ra_free(as, r);
318 ra_modified(as, r); 334 ra_modified(as, r);
335#if LJ_64
336 emit_loadu64(as, r, ra_krefk(as, ref));
337#else
319 emit_loadi(as, r, ra_krefk(as, ref)); 338 emit_loadi(as, r, ra_krefk(as, ref));
339#endif
320 return r; 340 return r;
321 } 341 }
322 ir = IR(ref); 342 ir = IR(ref);
323 r = ir->r; 343 r = ir->r;
324 lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); 344 lj_assertA(ra_hasreg(r), "rematk of K%03d has no reg", REF_BIAS - ref);
345 lj_assertA(!ra_hasspill(ir->s),
346 "rematk of K%03d has spill slot [%x]", REF_BIAS - ref, ir->s);
325 ra_free(as, r); 347 ra_free(as, r);
326 ra_modified(as, r); 348 ra_modified(as, r);
327 ir->r = RID_INIT; /* Do not keep any hint. */ 349 ir->r = RID_INIT; /* Do not keep any hint. */
328 RA_DBGX((as, "remat $i $r", ir, r)); 350 RA_DBGX((as, "remat $i $r", ir, r));
329#if !LJ_SOFTFP 351#if !LJ_SOFTFP32
330 if (ir->o == IR_KNUM) { 352 if (ir->o == IR_KNUM) {
331 emit_loadn(as, r, ir_knum(ir)); 353 emit_loadk64(as, r, ir);
332 } else 354 } else
333#endif 355#endif
334 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { 356 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
335 ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ 357 ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
336 emit_getgl(as, r, jit_base); 358 emit_getgl(as, r, jit_base);
337 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { 359 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
338 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ 360 /* REF_NIL stores ASMREF_L register. */
339 emit_getgl(as, r, jit_L); 361 lj_assertA(irt_isnil(ir->t), "rematk of bad ASMREF_L");
362 emit_getgl(as, r, cur_L);
340#if LJ_64 363#if LJ_64
341 } else if (ir->o == IR_KINT64) { 364 } else if (ir->o == IR_KINT64) {
342 emit_loadu64(as, r, ir_kint64(ir)->u64); 365 emit_loadu64(as, r, ir_kint64(ir)->u64);
366#if LJ_GC64
367 } else if (ir->o == IR_KGC) {
368 emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
369 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
370 emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
371#endif
343#endif 372#endif
344 } else { 373 } else {
345 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 374 lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
346 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 375 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
376 "rematk of bad IR op %d", ir->o);
347 emit_loadi(as, r, ir->i); 377 emit_loadi(as, r, ir->i);
348 } 378 }
349 return r; 379 return r;
@@ -353,7 +383,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
353static int32_t ra_spill(ASMState *as, IRIns *ir) 383static int32_t ra_spill(ASMState *as, IRIns *ir)
354{ 384{
355 int32_t slot = ir->s; 385 int32_t slot = ir->s;
356 lua_assert(ir >= as->ir + REF_TRUE); 386 lj_assertA(ir >= as->ir + REF_TRUE,
387 "spill of K%03d", REF_BIAS - (int)(ir - as->ir));
357 if (!ra_hasspill(slot)) { 388 if (!ra_hasspill(slot)) {
358 if (irt_is64(ir->t)) { 389 if (irt_is64(ir->t)) {
359 slot = as->evenspill; 390 slot = as->evenspill;
@@ -378,7 +409,9 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref)
378{ 409{
379 IRIns *ir = IR(ref); 410 IRIns *ir = IR(ref);
380 Reg r = ir->r; 411 Reg r = ir->r;
381 lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); 412 lj_assertA(ra_hasreg(r), "release of TMP%d has no reg", ref-ASMREF_TMP1+1);
413 lj_assertA(!ra_hasspill(ir->s),
414 "release of TMP%d has spill slot [%x]", ref-ASMREF_TMP1+1, ir->s);
382 ra_free(as, r); 415 ra_free(as, r);
383 ra_modified(as, r); 416 ra_modified(as, r);
384 ir->r = RID_INIT; 417 ir->r = RID_INIT;
@@ -394,7 +427,7 @@ static Reg ra_restore(ASMState *as, IRRef ref)
394 IRIns *ir = IR(ref); 427 IRIns *ir = IR(ref);
395 int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ 428 int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */
396 Reg r = ir->r; 429 Reg r = ir->r;
397 lua_assert(ra_hasreg(r)); 430 lj_assertA(ra_hasreg(r), "restore of IR %04d has no reg", ref - REF_BIAS);
398 ra_sethint(ir->r, r); /* Keep hint. */ 431 ra_sethint(ir->r, r); /* Keep hint. */
399 ra_free(as, r); 432 ra_free(as, r);
400 if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ 433 if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */
@@ -423,14 +456,15 @@ static Reg ra_evict(ASMState *as, RegSet allow)
423{ 456{
424 IRRef ref; 457 IRRef ref;
425 RegCost cost = ~(RegCost)0; 458 RegCost cost = ~(RegCost)0;
426 lua_assert(allow != RSET_EMPTY); 459 lj_assertA(allow != RSET_EMPTY, "evict from empty set");
427 if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) { 460 if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) {
428 GPRDEF(MINCOST) 461 GPRDEF(MINCOST)
429 } else { 462 } else {
430 FPRDEF(MINCOST) 463 FPRDEF(MINCOST)
431 } 464 }
432 ref = regcost_ref(cost); 465 ref = regcost_ref(cost);
433 lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins)); 466 lj_assertA(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins),
467 "evict of out-of-range IR %04d", ref - REF_BIAS);
434 /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ 468 /* Preferably pick any weak ref instead of a non-weak, non-const ref. */
435 if (!irref_isk(ref) && (as->weakset & allow)) { 469 if (!irref_isk(ref) && (as->weakset & allow)) {
436 IRIns *ir = IR(ref); 470 IRIns *ir = IR(ref);
@@ -512,7 +546,7 @@ static void ra_evictk(ASMState *as)
512 546
513#ifdef RID_NUM_KREF 547#ifdef RID_NUM_KREF
514/* Allocate a register for a constant. */ 548/* Allocate a register for a constant. */
515static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) 549static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
516{ 550{
517 /* First try to find a register which already holds the same constant. */ 551 /* First try to find a register which already holds the same constant. */
518 RegSet pick, work = ~as->freeset & RSET_GPR; 552 RegSet pick, work = ~as->freeset & RSET_GPR;
@@ -521,9 +555,31 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
521 IRRef ref; 555 IRRef ref;
522 r = rset_pickbot(work); 556 r = rset_pickbot(work);
523 ref = regcost_ref(as->cost[r]); 557 ref = regcost_ref(as->cost[r]);
558#if LJ_64
559 if (ref < ASMREF_L) {
560 if (ra_iskref(ref)) {
561 if (k == ra_krefk(as, ref))
562 return r;
563 } else {
564 IRIns *ir = IR(ref);
565 if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
566#if LJ_GC64
567 (ir->o == IR_KINT && k == ir->i) ||
568 (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
569 ((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
570 k == (intptr_t)ir_kptr(ir))
571#else
572 (ir->o != IR_KINT64 && k == ir->i)
573#endif
574 )
575 return r;
576 }
577 }
578#else
524 if (ref < ASMREF_L && 579 if (ref < ASMREF_L &&
525 k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) 580 k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i))
526 return r; 581 return r;
582#endif
527 rset_clear(work, r); 583 rset_clear(work, r);
528 } 584 }
529 pick = as->freeset & allow; 585 pick = as->freeset & allow;
@@ -543,7 +599,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
543} 599}
544 600
545/* Allocate a specific register for a constant. */ 601/* Allocate a specific register for a constant. */
546static void ra_allockreg(ASMState *as, int32_t k, Reg r) 602static void ra_allockreg(ASMState *as, intptr_t k, Reg r)
547{ 603{
548 Reg kr = ra_allock(as, k, RID2RSET(r)); 604 Reg kr = ra_allock(as, k, RID2RSET(r));
549 if (kr != r) { 605 if (kr != r) {
@@ -566,7 +622,8 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow)
566 IRIns *ir = IR(ref); 622 IRIns *ir = IR(ref);
567 RegSet pick = as->freeset & allow; 623 RegSet pick = as->freeset & allow;
568 Reg r; 624 Reg r;
569 lua_assert(ra_noreg(ir->r)); 625 lj_assertA(ra_noreg(ir->r),
626 "IR %04d already has reg %d", ref - REF_BIAS, ir->r);
570 if (pick) { 627 if (pick) {
571 /* First check register hint from propagation or PHI. */ 628 /* First check register hint from propagation or PHI. */
572 if (ra_hashint(ir->r)) { 629 if (ra_hashint(ir->r)) {
@@ -613,15 +670,27 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
613 return r; 670 return r;
614} 671}
615 672
673/* Add a register rename to the IR. */
674static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno)
675{
676 IRRef ren;
677 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno);
678 ren = tref_ref(lj_ir_emit(as->J));
679 as->J->cur.ir[ren].r = (uint8_t)down;
680 as->J->cur.ir[ren].s = SPS_NONE;
681}
682
616/* Rename register allocation and emit move. */ 683/* Rename register allocation and emit move. */
617static void ra_rename(ASMState *as, Reg down, Reg up) 684static void ra_rename(ASMState *as, Reg down, Reg up)
618{ 685{
619 IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); 686 IRRef ref = regcost_ref(as->cost[up] = as->cost[down]);
620 IRIns *ir = IR(ref); 687 IRIns *ir = IR(ref);
621 ir->r = (uint8_t)up; 688 ir->r = (uint8_t)up;
622 as->cost[down] = 0; 689 as->cost[down] = 0;
623 lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); 690 lj_assertA((down < RID_MAX_GPR) == (up < RID_MAX_GPR),
624 lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); 691 "rename between GPR/FPR %d and %d", down, up);
692 lj_assertA(!rset_test(as->freeset, down), "rename from free reg %d", down);
693 lj_assertA(rset_test(as->freeset, up), "rename to non-free reg %d", up);
625 ra_free(as, down); /* 'down' is free ... */ 694 ra_free(as, down); /* 'down' is free ... */
626 ra_modified(as, down); 695 ra_modified(as, down);
627 rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ 696 rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */
@@ -629,11 +698,14 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
629 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); 698 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
630 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ 699 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
631 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ 700 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
632 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); 701 /*
633 ren = tref_ref(lj_ir_emit(as->J)); 702 ** The rename is effective at the subsequent (already emitted) exit
634 as->ir = as->T->ir; /* The IR may have been reallocated. */ 703 ** branch. This is for the current snapshot (as->snapno). Except if we
635 IR(ren)->r = (uint8_t)down; 704 ** haven't yet allocated any refs for the snapshot (as->snapalloc == 1),
636 IR(ren)->s = SPS_NONE; 705 ** then it belongs to the next snapshot.
706 ** See also the discussion at asm_snap_checkrename().
707 */
708 ra_addrename(as, down, ref, as->snapno + as->snapalloc);
637 } 709 }
638} 710}
639 711
@@ -666,7 +738,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r)
666{ 738{
667 Reg dest = ra_dest(as, ir, RID2RSET(r)); 739 Reg dest = ra_dest(as, ir, RID2RSET(r));
668 if (dest != r) { 740 if (dest != r) {
669 lua_assert(rset_test(as->freeset, r)); 741 lj_assertA(rset_test(as->freeset, r), "dest reg %d is not free", r);
670 ra_modified(as, r); 742 ra_modified(as, r);
671 emit_movrr(as, ir, dest, r); 743 emit_movrr(as, ir, dest, r);
672 } 744 }
@@ -683,20 +755,25 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
683 if (ra_noreg(left)) { 755 if (ra_noreg(left)) {
684 if (irref_isk(lref)) { 756 if (irref_isk(lref)) {
685 if (ir->o == IR_KNUM) { 757 if (ir->o == IR_KNUM) {
686 cTValue *tv = ir_knum(ir);
687 /* FP remat needs a load except for +0. Still better than eviction. */ 758 /* FP remat needs a load except for +0. Still better than eviction. */
688 if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { 759 if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) {
689 emit_loadn(as, dest, tv); 760 emit_loadk64(as, dest, ir);
690 return; 761 return;
691 } 762 }
692#if LJ_64 763#if LJ_64
693 } else if (ir->o == IR_KINT64) { 764 } else if (ir->o == IR_KINT64) {
694 emit_loadu64(as, dest, ir_kint64(ir)->u64); 765 emit_loadk64(as, dest, ir);
766 return;
767#if LJ_GC64
768 } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) {
769 emit_loadk64(as, dest, ir);
695 return; 770 return;
696#endif 771#endif
697 } else { 772#endif
698 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 773 } else if (ir->o != IR_KPRI) {
699 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 774 lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
775 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
776 "K%03d has bad IR op %d", REF_BIAS - lref, ir->o);
700 emit_loadi(as, dest, ir->i); 777 emit_loadi(as, dest, ir->i);
701 return; 778 return;
702 } 779 }
@@ -741,11 +818,11 @@ static void ra_leftov(ASMState *as, Reg dest, IRRef lref)
741} 818}
742#endif 819#endif
743 820
744#if !LJ_64
745/* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */ 821/* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */
746static void ra_destpair(ASMState *as, IRIns *ir) 822static void ra_destpair(ASMState *as, IRIns *ir)
747{ 823{
748 Reg destlo = ir->r, desthi = (ir+1)->r; 824 Reg destlo = ir->r, desthi = (ir+1)->r;
825 IRIns *irx = (LJ_64 && !irt_is64(ir->t)) ? ir+1 : ir;
749 /* First spill unrelated refs blocking the destination registers. */ 826 /* First spill unrelated refs blocking the destination registers. */
750 if (!rset_test(as->freeset, RID_RETLO) && 827 if (!rset_test(as->freeset, RID_RETLO) &&
751 destlo != RID_RETLO && desthi != RID_RETLO) 828 destlo != RID_RETLO && desthi != RID_RETLO)
@@ -769,29 +846,28 @@ static void ra_destpair(ASMState *as, IRIns *ir)
769 /* Check for conflicts and shuffle the registers as needed. */ 846 /* Check for conflicts and shuffle the registers as needed. */
770 if (destlo == RID_RETHI) { 847 if (destlo == RID_RETHI) {
771 if (desthi == RID_RETLO) { 848 if (desthi == RID_RETLO) {
772#if LJ_TARGET_X86 849#if LJ_TARGET_X86ORX64
773 *--as->mcp = XI_XCHGa + RID_RETHI; 850 *--as->mcp = REX_64IR(irx, XI_XCHGa + RID_RETHI);
774#else 851#else
775 emit_movrr(as, ir, RID_RETHI, RID_TMP); 852 emit_movrr(as, irx, RID_RETHI, RID_TMP);
776 emit_movrr(as, ir, RID_RETLO, RID_RETHI); 853 emit_movrr(as, irx, RID_RETLO, RID_RETHI);
777 emit_movrr(as, ir, RID_TMP, RID_RETLO); 854 emit_movrr(as, irx, RID_TMP, RID_RETLO);
778#endif 855#endif
779 } else { 856 } else {
780 emit_movrr(as, ir, RID_RETHI, RID_RETLO); 857 emit_movrr(as, irx, RID_RETHI, RID_RETLO);
781 if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); 858 if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI);
782 } 859 }
783 } else if (desthi == RID_RETLO) { 860 } else if (desthi == RID_RETLO) {
784 emit_movrr(as, ir, RID_RETLO, RID_RETHI); 861 emit_movrr(as, irx, RID_RETLO, RID_RETHI);
785 if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); 862 if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO);
786 } else { 863 } else {
787 if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); 864 if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI);
788 if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); 865 if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO);
789 } 866 }
790 /* Restore spill slots (if any). */ 867 /* Restore spill slots (if any). */
791 if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI); 868 if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI);
792 if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO); 869 if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO);
793} 870}
794#endif
795 871
796/* -- Snapshot handling --------- ----------------------------------------- */ 872/* -- Snapshot handling --------- ----------------------------------------- */
797 873
@@ -841,11 +917,14 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref)
841#endif 917#endif
842 { /* Allocate stored values for TNEW, TDUP and CNEW. */ 918 { /* Allocate stored values for TNEW, TDUP and CNEW. */
843 IRIns *irs; 919 IRIns *irs;
844 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW); 920 lj_assertA(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW,
921 "sink of IR %04d has bad op %d", ref - REF_BIAS, ir->o);
845 for (irs = IR(as->snapref-1); irs > ir; irs--) 922 for (irs = IR(as->snapref-1); irs > ir; irs--)
846 if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) { 923 if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) {
847 lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || 924 lj_assertA(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
848 irs->o == IR_FSTORE || irs->o == IR_XSTORE); 925 irs->o == IR_FSTORE || irs->o == IR_XSTORE,
926 "sunk store IR %04d has bad op %d",
927 (int)(irs - as->ir) - REF_BIAS, irs->o);
849 asm_snap_alloc1(as, irs->op2); 928 asm_snap_alloc1(as, irs->op2);
850 if (LJ_32 && (irs+1)->o == IR_HIOP) 929 if (LJ_32 && (irs+1)->o == IR_HIOP)
851 asm_snap_alloc1(as, (irs+1)->op2); 930 asm_snap_alloc1(as, (irs+1)->op2);
@@ -881,9 +960,9 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref)
881} 960}
882 961
883/* Allocate refs escaping to a snapshot. */ 962/* Allocate refs escaping to a snapshot. */
884static void asm_snap_alloc(ASMState *as) 963static void asm_snap_alloc(ASMState *as, int snapno)
885{ 964{
886 SnapShot *snap = &as->T->snap[as->snapno]; 965 SnapShot *snap = &as->T->snap[snapno];
887 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 966 SnapEntry *map = &as->T->snapmap[snap->mapofs];
888 MSize n, nent = snap->nent; 967 MSize n, nent = snap->nent;
889 as->snapfilt1 = as->snapfilt2 = 0; 968 as->snapfilt1 = as->snapfilt2 = 0;
@@ -893,7 +972,9 @@ static void asm_snap_alloc(ASMState *as)
893 if (!irref_isk(ref)) { 972 if (!irref_isk(ref)) {
894 asm_snap_alloc1(as, ref); 973 asm_snap_alloc1(as, ref);
895 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { 974 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
896 lua_assert(irt_type(IR(ref+1)->t) == IRT_SOFTFP); 975 lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP,
976 "snap %d[%d] points to bad SOFTFP IR %04d",
977 snapno, n, ref - REF_BIAS);
897 asm_snap_alloc1(as, ref+1); 978 asm_snap_alloc1(as, ref+1);
898 } 979 }
899 } 980 }
@@ -919,67 +1000,55 @@ static int asm_snap_checkrename(ASMState *as, IRRef ren)
919 return 0; /* Not found. */ 1000 return 0; /* Not found. */
920} 1001}
921 1002
922/* Prepare snapshot for next guard instruction. */ 1003/* Prepare snapshot for next guard or throwing instruction. */
923static void asm_snap_prep(ASMState *as) 1004static void asm_snap_prep(ASMState *as)
924{ 1005{
925 if (as->curins < as->snapref) { 1006 if (as->snapalloc) {
926 do { 1007 /* Alloc on first invocation for each snapshot. */
927 if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */ 1008 as->snapalloc = 0;
928 as->snapno--; 1009 asm_snap_alloc(as, as->snapno);
929 as->snapref = as->T->snap[as->snapno].ref;
930 } while (as->curins < as->snapref);
931 asm_snap_alloc(as);
932 as->snaprename = as->T->nins; 1010 as->snaprename = as->T->nins;
933 } else { 1011 } else {
934 /* Process any renames above the highwater mark. */ 1012 /* Check any renames above the highwater mark. */
935 for (; as->snaprename < as->T->nins; as->snaprename++) { 1013 for (; as->snaprename < as->T->nins; as->snaprename++) {
936 IRIns *ir = IR(as->snaprename); 1014 IRIns *ir = &as->T->ir[as->snaprename];
937 if (asm_snap_checkrename(as, ir->op1)) 1015 if (asm_snap_checkrename(as, ir->op1))
938 ir->op2 = REF_BIAS-1; /* Kill rename. */ 1016 ir->op2 = REF_BIAS-1; /* Kill rename. */
939 } 1017 }
940 } 1018 }
941} 1019}
942 1020
943/* -- Miscellaneous helpers ----------------------------------------------- */ 1021/* Move to previous snapshot when we cross the current snapshot ref. */
944 1022static void asm_snap_prev(ASMState *as)
945/* Collect arguments from CALL* and CARG instructions. */
946static void asm_collectargs(ASMState *as, IRIns *ir,
947 const CCallInfo *ci, IRRef *args)
948{ 1023{
949 uint32_t n = CCI_NARGS(ci); 1024 if (as->curins < as->snapref) {
950 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ 1025 uintptr_t ofs = (uintptr_t)(as->mctoporig - as->mcp);
951 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } 1026 if (ofs >= 0x10000) lj_trace_err(as->J, LJ_TRERR_MCODEOV);
952 while (n-- > 1) { 1027 do {
953 ir = IR(ir->op1); 1028 if (as->snapno == 0) return;
954 lua_assert(ir->o == IR_CARG); 1029 as->snapno--;
955 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; 1030 as->snapref = as->T->snap[as->snapno].ref;
1031 as->T->snap[as->snapno].mcofs = (uint16_t)ofs; /* Remember mcode ofs. */
1032 } while (as->curins < as->snapref); /* May have no ins inbetween. */
1033 as->snapalloc = 1;
956 } 1034 }
957 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
958 lua_assert(IR(ir->op1)->o != IR_CARG);
959} 1035}
960 1036
961/* Reconstruct CCallInfo flags for CALLX*. */ 1037/* Fixup snapshot mcode offsetst. */
962static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) 1038static void asm_snap_fixup_mcofs(ASMState *as)
963{ 1039{
964 uint32_t nargs = 0; 1040 uint32_t sz = (uint32_t)(as->mctoporig - as->mcp);
965 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ 1041 SnapShot *snap = as->T->snap;
966 IRIns *ira = IR(ir->op1); 1042 SnapNo i;
967 nargs++; 1043 for (i = as->T->nsnap-1; i > 0; i--) {
968 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } 1044 /* Compute offset from mcode start and store in correct snapshot. */
1045 snap[i].mcofs = (uint16_t)(sz - snap[i-1].mcofs);
969 } 1046 }
970#if LJ_HASFFI 1047 snap[0].mcofs = 0;
971 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
972 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
973 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
974 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
975#if LJ_TARGET_X86
976 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
977#endif
978 }
979#endif
980 return (nargs | (ir->t.irt << CCI_OTSHIFT));
981} 1048}
982 1049
1050/* -- Miscellaneous helpers ----------------------------------------------- */
1051
983/* Calculate stack adjustment. */ 1052/* Calculate stack adjustment. */
984static int32_t asm_stack_adjust(ASMState *as) 1053static int32_t asm_stack_adjust(ASMState *as)
985{ 1054{
@@ -989,21 +1058,26 @@ static int32_t asm_stack_adjust(ASMState *as)
989} 1058}
990 1059
991/* Must match with hash*() in lj_tab.c. */ 1060/* Must match with hash*() in lj_tab.c. */
992static uint32_t ir_khash(IRIns *ir) 1061static uint32_t ir_khash(ASMState *as, IRIns *ir)
993{ 1062{
994 uint32_t lo, hi; 1063 uint32_t lo, hi;
1064 UNUSED(as);
995 if (irt_isstr(ir->t)) { 1065 if (irt_isstr(ir->t)) {
996 return ir_kstr(ir)->hash; 1066 return ir_kstr(ir)->sid;
997 } else if (irt_isnum(ir->t)) { 1067 } else if (irt_isnum(ir->t)) {
998 lo = ir_knum(ir)->u32.lo; 1068 lo = ir_knum(ir)->u32.lo;
999 hi = ir_knum(ir)->u32.hi << 1; 1069 hi = ir_knum(ir)->u32.hi << 1;
1000 } else if (irt_ispri(ir->t)) { 1070 } else if (irt_ispri(ir->t)) {
1001 lua_assert(!irt_isnil(ir->t)); 1071 lj_assertA(!irt_isnil(ir->t), "hash of nil key");
1002 return irt_type(ir->t)-IRT_FALSE; 1072 return irt_type(ir->t)-IRT_FALSE;
1003 } else { 1073 } else {
1004 lua_assert(irt_isgcv(ir->t)); 1074 lj_assertA(irt_isgcv(ir->t), "hash of bad IR type %d", irt_type(ir->t));
1005 lo = u32ptr(ir_kgc(ir)); 1075 lo = u32ptr(ir_kgc(ir));
1076#if LJ_GC64
1077 hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15);
1078#else
1006 hi = lo + HASH_BIAS; 1079 hi = lo + HASH_BIAS;
1080#endif
1007 } 1081 }
1008 return hashrot(lo, hi); 1082 return hashrot(lo, hi);
1009} 1083}
@@ -1017,6 +1091,7 @@ static void asm_snew(ASMState *as, IRIns *ir)
1017{ 1091{
1018 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new]; 1092 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
1019 IRRef args[3]; 1093 IRRef args[3];
1094 asm_snap_prep(as);
1020 args[0] = ASMREF_L; /* lua_State *L */ 1095 args[0] = ASMREF_L; /* lua_State *L */
1021 args[1] = ir->op1; /* const char *str */ 1096 args[1] = ir->op1; /* const char *str */
1022 args[2] = ir->op2; /* size_t len */ 1097 args[2] = ir->op2; /* size_t len */
@@ -1029,6 +1104,7 @@ static void asm_tnew(ASMState *as, IRIns *ir)
1029{ 1104{
1030 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1]; 1105 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
1031 IRRef args[2]; 1106 IRRef args[2];
1107 asm_snap_prep(as);
1032 args[0] = ASMREF_L; /* lua_State *L */ 1108 args[0] = ASMREF_L; /* lua_State *L */
1033 args[1] = ASMREF_TMP1; /* uint32_t ahsize */ 1109 args[1] = ASMREF_TMP1; /* uint32_t ahsize */
1034 as->gcsteps++; 1110 as->gcsteps++;
@@ -1041,6 +1117,7 @@ static void asm_tdup(ASMState *as, IRIns *ir)
1041{ 1117{
1042 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup]; 1118 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
1043 IRRef args[2]; 1119 IRRef args[2];
1120 asm_snap_prep(as);
1044 args[0] = ASMREF_L; /* lua_State *L */ 1121 args[0] = ASMREF_L; /* lua_State *L */
1045 args[1] = ir->op1; /* const GCtab *kt */ 1122 args[1] = ir->op1; /* const GCtab *kt */
1046 as->gcsteps++; 1123 as->gcsteps++;
@@ -1064,6 +1141,260 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
1064 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ 1141 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */
1065} 1142}
1066 1143
1144/* -- Buffer operations --------------------------------------------------- */
1145
1146static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode);
1147#if LJ_HASBUFFER
1148static void asm_bufhdr_write(ASMState *as, Reg sb);
1149#endif
1150
1151static void asm_bufhdr(ASMState *as, IRIns *ir)
1152{
1153 Reg sb = ra_dest(as, ir, RSET_GPR);
1154 switch (ir->op2) {
1155 case IRBUFHDR_RESET: {
1156 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
1157 IRIns irbp;
1158 irbp.ot = IRT(0, IRT_PTR); /* Buffer data pointer type. */
1159 emit_storeofs(as, &irbp, tmp, sb, offsetof(SBuf, w));
1160 emit_loadofs(as, &irbp, tmp, sb, offsetof(SBuf, b));
1161 break;
1162 }
1163 case IRBUFHDR_APPEND: {
1164 /* Rematerialize const buffer pointer instead of likely spill. */
1165 IRIns *irp = IR(ir->op1);
1166 if (!(ra_hasreg(irp->r) || irp == ir-1 ||
1167 (irp == ir-2 && !ra_used(ir-1)))) {
1168 while (!(irp->o == IR_BUFHDR && irp->op2 == IRBUFHDR_RESET))
1169 irp = IR(irp->op1);
1170 if (irref_isk(irp->op1)) {
1171 ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
1172 ir = irp;
1173 }
1174 }
1175 break;
1176 }
1177#if LJ_HASBUFFER
1178 case IRBUFHDR_WRITE:
1179 asm_bufhdr_write(as, sb);
1180 break;
1181#endif
1182 default: lj_assertA(0, "bad BUFHDR op2 %d", ir->op2); break;
1183 }
1184#if LJ_TARGET_X86ORX64
1185 ra_left(as, sb, ir->op1);
1186#else
1187 ra_leftov(as, sb, ir->op1);
1188#endif
1189}
1190
1191static void asm_bufput(ASMState *as, IRIns *ir)
1192{
1193 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1194 IRRef args[3];
1195 IRIns *irs;
1196 int kchar = -129;
1197 args[0] = ir->op1; /* SBuf * */
1198 args[1] = ir->op2; /* GCstr * */
1199 irs = IR(ir->op2);
1200 lj_assertA(irt_isstr(irs->t),
1201 "BUFPUT of non-string IR %04d", ir->op2 - REF_BIAS);
1202 if (irs->o == IR_KGC) {
1203 GCstr *s = ir_kstr(irs);
1204 if (s->len == 1) { /* Optimize put of single-char string constant. */
1205 kchar = (int8_t)strdata(s)[0]; /* Signed! */
1206 args[1] = ASMREF_TMP1; /* int, truncated to char */
1207 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1208 }
1209 } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
1210 if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */
1211 if (irs->op2 == IRTOSTR_NUM) {
1212 args[1] = ASMREF_TMP1; /* TValue * */
1213 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
1214 } else {
1215 lj_assertA(irt_isinteger(IR(irs->op1)->t),
1216 "TOSTR of non-numeric IR %04d", irs->op1);
1217 args[1] = irs->op1; /* int */
1218 if (irs->op2 == IRTOSTR_INT)
1219 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
1220 else
1221 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1222 }
1223 } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */
1224 args[1] = irs->op1; /* const void * */
1225 args[2] = irs->op2; /* MSize */
1226 ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
1227 }
1228 }
1229 asm_setupresult(as, ir, ci); /* SBuf * */
1230 asm_gencall(as, ci, args);
1231 if (args[1] == ASMREF_TMP1) {
1232 Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
1233 if (kchar == -129)
1234 asm_tvptr(as, tmp, irs->op1, IRTMPREF_IN1);
1235 else
1236 ra_allockreg(as, kchar, tmp);
1237 }
1238}
1239
1240static void asm_bufstr(ASMState *as, IRIns *ir)
1241{
1242 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
1243 IRRef args[1];
1244 args[0] = ir->op1; /* SBuf *sb */
1245 as->gcsteps++;
1246 asm_setupresult(as, ir, ci); /* GCstr * */
1247 asm_gencall(as, ci, args);
1248}
1249
1250/* -- Type conversions ---------------------------------------------------- */
1251
1252static void asm_tostr(ASMState *as, IRIns *ir)
1253{
1254 const CCallInfo *ci;
1255 IRRef args[2];
1256 asm_snap_prep(as);
1257 args[0] = ASMREF_L;
1258 as->gcsteps++;
1259 if (ir->op2 == IRTOSTR_NUM) {
1260 args[1] = ASMREF_TMP1; /* cTValue * */
1261 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
1262 } else {
1263 args[1] = ir->op1; /* int32_t k */
1264 if (ir->op2 == IRTOSTR_INT)
1265 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
1266 else
1267 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
1268 }
1269 asm_setupresult(as, ir, ci); /* GCstr * */
1270 asm_gencall(as, ci, args);
1271 if (ir->op2 == IRTOSTR_NUM)
1272 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1, IRTMPREF_IN1);
1273}
1274
1275#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
1276static void asm_conv64(ASMState *as, IRIns *ir)
1277{
1278 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1279 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1280 IRCallID id;
1281 IRRef args[2];
1282 lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP,
1283 "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS);
1284 args[LJ_BE] = (ir-1)->op1;
1285 args[LJ_LE] = ir->op1;
1286 if (st == IRT_NUM || st == IRT_FLOAT) {
1287 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
1288 ir--;
1289 } else {
1290 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1291 }
1292 {
1293#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1294 CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
1295 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
1296#else
1297 const CCallInfo *ci = &lj_ir_callinfo[id];
1298#endif
1299 asm_setupresult(as, ir, ci);
1300 asm_gencall(as, ci, args);
1301 }
1302}
1303#endif
1304
1305/* -- Memory references --------------------------------------------------- */
1306
1307static void asm_newref(ASMState *as, IRIns *ir)
1308{
1309 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1310 IRRef args[3];
1311 if (ir->r == RID_SINK)
1312 return;
1313 asm_snap_prep(as);
1314 args[0] = ASMREF_L; /* lua_State *L */
1315 args[1] = ir->op1; /* GCtab *t */
1316 args[2] = ASMREF_TMP1; /* cTValue *key */
1317 asm_setupresult(as, ir, ci); /* TValue * */
1318 asm_gencall(as, ci, args);
1319 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2, IRTMPREF_IN1);
1320}
1321
1322static void asm_tmpref(ASMState *as, IRIns *ir)
1323{
1324 Reg r = ra_dest(as, ir, RSET_GPR);
1325 asm_tvptr(as, r, ir->op1, ir->op2);
1326}
1327
1328static void asm_lref(ASMState *as, IRIns *ir)
1329{
1330 Reg r = ra_dest(as, ir, RSET_GPR);
1331#if LJ_TARGET_X86ORX64
1332 ra_left(as, r, ASMREF_L);
1333#else
1334 ra_leftov(as, r, ASMREF_L);
1335#endif
1336}
1337
1338/* -- Calls --------------------------------------------------------------- */
1339
1340/* Collect arguments from CALL* and CARG instructions. */
1341static void asm_collectargs(ASMState *as, IRIns *ir,
1342 const CCallInfo *ci, IRRef *args)
1343{
1344 uint32_t n = CCI_XNARGS(ci);
1345 /* Account for split args. */
1346 lj_assertA(n <= CCI_NARGS_MAX*2, "too many args %d to collect", n);
1347 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1348 while (n-- > 1) {
1349 ir = IR(ir->op1);
1350 lj_assertA(ir->o == IR_CARG, "malformed CALL arg tree");
1351 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
1352 }
1353 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
1354 lj_assertA(IR(ir->op1)->o != IR_CARG, "malformed CALL arg tree");
1355}
1356
1357/* Reconstruct CCallInfo flags for CALLX*. */
1358static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
1359{
1360 uint32_t nargs = 0;
1361 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
1362 IRIns *ira = IR(ir->op1);
1363 nargs++;
1364 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
1365 }
1366#if LJ_HASFFI
1367 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
1368 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
1369 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
1370 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
1371#if LJ_TARGET_X86
1372 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
1373#endif
1374 }
1375#endif
1376 return (nargs | (ir->t.irt << CCI_OTSHIFT));
1377}
1378
1379static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
1380{
1381 const CCallInfo *ci = &lj_ir_callinfo[id];
1382 IRRef args[2];
1383 args[0] = ir->op1;
1384 args[1] = ir->op2;
1385 asm_setupresult(as, ir, ci);
1386 asm_gencall(as, ci, args);
1387}
1388
1389static void asm_call(ASMState *as, IRIns *ir)
1390{
1391 IRRef args[CCI_NARGS_MAX];
1392 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1393 asm_collectargs(as, ir, ci, args);
1394 asm_setupresult(as, ir, ci);
1395 asm_gencall(as, ci, args);
1396}
1397
1067/* -- PHI and loop handling ----------------------------------------------- */ 1398/* -- PHI and loop handling ----------------------------------------------- */
1068 1399
1069/* Break a PHI cycle by renaming to a free register (evict if needed). */ 1400/* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1249,12 +1580,7 @@ static void asm_phi_fixup(ASMState *as)
1249 irt_clearmark(ir->t); 1580 irt_clearmark(ir->t);
1250 /* Left PHI gained a spill slot before the loop? */ 1581 /* Left PHI gained a spill slot before the loop? */
1251 if (ra_hasspill(ir->s)) { 1582 if (ra_hasspill(ir->s)) {
1252 IRRef ren; 1583 ra_addrename(as, r, lref, as->loopsnapno);
1253 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
1254 ren = tref_ref(lj_ir_emit(as->J));
1255 as->ir = as->T->ir; /* The IR may have been reallocated. */
1256 IR(ren)->r = (uint8_t)r;
1257 IR(ren)->s = SPS_NONE;
1258 } 1584 }
1259 } 1585 }
1260 rset_clear(work, r); 1586 rset_clear(work, r);
@@ -1329,6 +1655,8 @@ static void asm_loop(ASMState *as)
1329#include "lj_asm_x86.h" 1655#include "lj_asm_x86.h"
1330#elif LJ_TARGET_ARM 1656#elif LJ_TARGET_ARM
1331#include "lj_asm_arm.h" 1657#include "lj_asm_arm.h"
1658#elif LJ_TARGET_ARM64
1659#include "lj_asm_arm64.h"
1332#elif LJ_TARGET_PPC 1660#elif LJ_TARGET_PPC
1333#include "lj_asm_ppc.h" 1661#include "lj_asm_ppc.h"
1334#elif LJ_TARGET_MIPS 1662#elif LJ_TARGET_MIPS
@@ -1337,6 +1665,204 @@ static void asm_loop(ASMState *as)
1337#error "Missing assembler for target CPU" 1665#error "Missing assembler for target CPU"
1338#endif 1666#endif
1339 1667
1668/* -- Common instruction helpers ------------------------------------------ */
1669
1670#if !LJ_SOFTFP32
1671#if !LJ_TARGET_X86ORX64
1672#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1673#define asm_fppowi(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1674#endif
1675
1676static void asm_pow(ASMState *as, IRIns *ir)
1677{
1678#if LJ_64 && LJ_HASFFI
1679 if (!irt_isnum(ir->t))
1680 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1681 IRCALL_lj_carith_powu64);
1682 else
1683#endif
1684 if (irt_isnum(IR(ir->op2)->t))
1685 asm_callid(as, ir, IRCALL_pow);
1686 else
1687 asm_fppowi(as, ir);
1688}
1689
1690static void asm_div(ASMState *as, IRIns *ir)
1691{
1692#if LJ_64 && LJ_HASFFI
1693 if (!irt_isnum(ir->t))
1694 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1695 IRCALL_lj_carith_divu64);
1696 else
1697#endif
1698 asm_fpdiv(as, ir);
1699}
1700#endif
1701
1702static void asm_mod(ASMState *as, IRIns *ir)
1703{
1704#if LJ_64 && LJ_HASFFI
1705 if (!irt_isint(ir->t))
1706 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1707 IRCALL_lj_carith_modu64);
1708 else
1709#endif
1710 asm_callid(as, ir, IRCALL_lj_vm_modi);
1711}
1712
1713static void asm_fuseequal(ASMState *as, IRIns *ir)
1714{
1715 /* Fuse HREF + EQ/NE. */
1716 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1717 as->curins--;
1718 asm_href(as, ir-1, (IROp)ir->o);
1719 } else {
1720 asm_equal(as, ir);
1721 }
1722}
1723
1724static void asm_alen(ASMState *as, IRIns *ir)
1725{
1726 asm_callid(as, ir, ir->op2 == REF_NIL ? IRCALL_lj_tab_len :
1727 IRCALL_lj_tab_len_hint);
1728}
1729
1730/* -- Instruction dispatch ------------------------------------------------ */
1731
1732/* Assemble a single instruction. */
1733static void asm_ir(ASMState *as, IRIns *ir)
1734{
1735 switch ((IROp)ir->o) {
1736 /* Miscellaneous ops. */
1737 case IR_LOOP: asm_loop(as); break;
1738 case IR_NOP: case IR_XBAR:
1739 lj_assertA(!ra_used(ir),
1740 "IR %04d not unused", (int)(ir - as->ir) - REF_BIAS);
1741 break;
1742 case IR_USE:
1743 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1744 case IR_PHI: asm_phi(as, ir); break;
1745 case IR_HIOP: asm_hiop(as, ir); break;
1746 case IR_GCSTEP: asm_gcstep(as, ir); break;
1747 case IR_PROF: asm_prof(as, ir); break;
1748
1749 /* Guarded assertions. */
1750 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1751 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1752 case IR_ABC:
1753 asm_comp(as, ir);
1754 break;
1755 case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break;
1756
1757 case IR_RETF: asm_retf(as, ir); break;
1758
1759 /* Bit ops. */
1760 case IR_BNOT: asm_bnot(as, ir); break;
1761 case IR_BSWAP: asm_bswap(as, ir); break;
1762 case IR_BAND: asm_band(as, ir); break;
1763 case IR_BOR: asm_bor(as, ir); break;
1764 case IR_BXOR: asm_bxor(as, ir); break;
1765 case IR_BSHL: asm_bshl(as, ir); break;
1766 case IR_BSHR: asm_bshr(as, ir); break;
1767 case IR_BSAR: asm_bsar(as, ir); break;
1768 case IR_BROL: asm_brol(as, ir); break;
1769 case IR_BROR: asm_bror(as, ir); break;
1770
1771 /* Arithmetic ops. */
1772 case IR_ADD: asm_add(as, ir); break;
1773 case IR_SUB: asm_sub(as, ir); break;
1774 case IR_MUL: asm_mul(as, ir); break;
1775 case IR_MOD: asm_mod(as, ir); break;
1776 case IR_NEG: asm_neg(as, ir); break;
1777#if LJ_SOFTFP32
1778 case IR_DIV: case IR_POW: case IR_ABS:
1779 case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
1780 /* Unused for LJ_SOFTFP32. */
1781 lj_assertA(0, "IR %04d with unused op %d",
1782 (int)(ir - as->ir) - REF_BIAS, ir->o);
1783 break;
1784#else
1785 case IR_DIV: asm_div(as, ir); break;
1786 case IR_POW: asm_pow(as, ir); break;
1787 case IR_ABS: asm_abs(as, ir); break;
1788 case IR_LDEXP: asm_ldexp(as, ir); break;
1789 case IR_FPMATH: asm_fpmath(as, ir); break;
1790 case IR_TOBIT: asm_tobit(as, ir); break;
1791#endif
1792 case IR_MIN: asm_min(as, ir); break;
1793 case IR_MAX: asm_max(as, ir); break;
1794
1795 /* Overflow-checking arithmetic ops. */
1796 case IR_ADDOV: asm_addov(as, ir); break;
1797 case IR_SUBOV: asm_subov(as, ir); break;
1798 case IR_MULOV: asm_mulov(as, ir); break;
1799
1800 /* Memory references. */
1801 case IR_AREF: asm_aref(as, ir); break;
1802 case IR_HREF: asm_href(as, ir, 0); break;
1803 case IR_HREFK: asm_hrefk(as, ir); break;
1804 case IR_NEWREF: asm_newref(as, ir); break;
1805 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1806 case IR_FREF: asm_fref(as, ir); break;
1807 case IR_TMPREF: asm_tmpref(as, ir); break;
1808 case IR_STRREF: asm_strref(as, ir); break;
1809 case IR_LREF: asm_lref(as, ir); break;
1810
1811 /* Loads and stores. */
1812 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1813 asm_ahuvload(as, ir);
1814 break;
1815 case IR_FLOAD: asm_fload(as, ir); break;
1816 case IR_XLOAD: asm_xload(as, ir); break;
1817 case IR_SLOAD: asm_sload(as, ir); break;
1818 case IR_ALEN: asm_alen(as, ir); break;
1819
1820 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1821 case IR_FSTORE: asm_fstore(as, ir); break;
1822 case IR_XSTORE: asm_xstore(as, ir); break;
1823
1824 /* Allocations. */
1825 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1826 case IR_TNEW: asm_tnew(as, ir); break;
1827 case IR_TDUP: asm_tdup(as, ir); break;
1828 case IR_CNEW: case IR_CNEWI:
1829#if LJ_HASFFI
1830 asm_cnew(as, ir);
1831#else
1832 lj_assertA(0, "IR %04d with unused op %d",
1833 (int)(ir - as->ir) - REF_BIAS, ir->o);
1834#endif
1835 break;
1836
1837 /* Buffer operations. */
1838 case IR_BUFHDR: asm_bufhdr(as, ir); break;
1839 case IR_BUFPUT: asm_bufput(as, ir); break;
1840 case IR_BUFSTR: asm_bufstr(as, ir); break;
1841
1842 /* Write barriers. */
1843 case IR_TBAR: asm_tbar(as, ir); break;
1844 case IR_OBAR: asm_obar(as, ir); break;
1845
1846 /* Type conversions. */
1847 case IR_CONV: asm_conv(as, ir); break;
1848 case IR_TOSTR: asm_tostr(as, ir); break;
1849 case IR_STRTO: asm_strto(as, ir); break;
1850
1851 /* Calls. */
1852 case IR_CALLA:
1853 as->gcsteps++;
1854 /* fallthrough */
1855 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1856 case IR_CALLXS: asm_callx(as, ir); break;
1857 case IR_CARG: break;
1858
1859 default:
1860 setintV(&as->J->errinfo, ir->o);
1861 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1862 break;
1863 }
1864}
1865
1340/* -- Head of trace ------------------------------------------------------- */ 1866/* -- Head of trace ------------------------------------------------------- */
1341 1867
1342/* Head of a root trace. */ 1868/* Head of a root trace. */
@@ -1373,8 +1899,7 @@ static void asm_head_side(ASMState *as)
1373 1899
1374 if (as->snapno && as->topslot > as->parent->topslot) { 1900 if (as->snapno && as->topslot > as->parent->topslot) {
1375 /* Force snap #0 alloc to prevent register overwrite in stack check. */ 1901 /* Force snap #0 alloc to prevent register overwrite in stack check. */
1376 as->snapno = 0; 1902 asm_snap_alloc(as, 0);
1377 asm_snap_alloc(as);
1378 } 1903 }
1379 allow = asm_head_side_base(as, irp, allow); 1904 allow = asm_head_side_base(as, irp, allow);
1380 1905
@@ -1382,8 +1907,10 @@ static void asm_head_side(ASMState *as)
1382 for (i = as->stopins; i > REF_BASE; i--) { 1907 for (i = as->stopins; i > REF_BASE; i--) {
1383 IRIns *ir = IR(i); 1908 IRIns *ir = IR(i);
1384 RegSP rs; 1909 RegSP rs;
1385 lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || 1910 lj_assertA((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
1386 (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL); 1911 (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL,
1912 "IR %04d has bad parent op %d",
1913 (int)(ir - as->ir) - REF_BIAS, ir->o);
1387 rs = as->parentmap[i - REF_FIRST]; 1914 rs = as->parentmap[i - REF_FIRST];
1388 if (ra_hasreg(ir->r)) { 1915 if (ra_hasreg(ir->r)) {
1389 rset_clear(allow, ir->r); 1916 rset_clear(allow, ir->r);
@@ -1535,7 +2062,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
1535 SnapEntry sn = map[n-1]; 2062 SnapEntry sn = map[n-1];
1536 if ((sn & SNAP_FRAME)) { 2063 if ((sn & SNAP_FRAME)) {
1537 *gotframe = 1; 2064 *gotframe = 1;
1538 return snap_slot(sn); 2065 return snap_slot(sn) - LJ_FR2;
1539 } 2066 }
1540 } 2067 }
1541 return 0; 2068 return 0;
@@ -1555,19 +2082,23 @@ static void asm_tail_link(ASMState *as)
1555 2082
1556 if (as->T->link == 0) { 2083 if (as->T->link == 0) {
1557 /* Setup fixed registers for exit to interpreter. */ 2084 /* Setup fixed registers for exit to interpreter. */
1558 const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); 2085 const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]);
1559 int32_t mres; 2086 int32_t mres;
1560 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ 2087 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */
1561 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; 2088 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
1562 if (bc_isret(bc_op(*retpc))) 2089 if (bc_isret(bc_op(*retpc)))
1563 pc = retpc; 2090 pc = retpc;
1564 } 2091 }
2092#if LJ_GC64
2093 emit_loadu64(as, RID_LPC, u64ptr(pc));
2094#else
1565 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); 2095 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
1566 ra_allockreg(as, i32ptr(pc), RID_LPC); 2096 ra_allockreg(as, i32ptr(pc), RID_LPC);
1567 mres = (int32_t)(snap->nslots - baseslot); 2097#endif
2098 mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
1568 switch (bc_op(*pc)) { 2099 switch (bc_op(*pc)) {
1569 case BC_CALLM: case BC_CALLMT: 2100 case BC_CALLM: case BC_CALLMT:
1570 mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; 2101 mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
1571 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; 2102 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
1572 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; 2103 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
1573 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; 2104 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
@@ -1579,6 +2110,11 @@ static void asm_tail_link(ASMState *as)
1579 } 2110 }
1580 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); 2111 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
1581 2112
2113 if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */
2114 setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal));
2115 IR(as->J->ktrace)->o = IR_KGC;
2116 }
2117
1582 /* Sync the interpreter state with the on-trace state. */ 2118 /* Sync the interpreter state with the on-trace state. */
1583 asm_stack_restore(as, snap); 2119 asm_stack_restore(as, snap);
1584 2120
@@ -1602,22 +2138,32 @@ static void asm_setup_regsp(ASMState *as)
1602#endif 2138#endif
1603 2139
1604 ra_setup(as); 2140 ra_setup(as);
2141#if LJ_TARGET_ARM64
2142 ra_setkref(as, RID_GL, (intptr_t)J2G(as->J));
2143#endif
1605 2144
1606 /* Clear reg/sp for constants. */ 2145 /* Clear reg/sp for constants. */
1607 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) 2146 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
1608 ir->prev = REGSP_INIT; 2147 ir->prev = REGSP_INIT;
2148 if (irt_is64(ir->t) && ir->o != IR_KNULL) {
2149#if LJ_GC64
2150 /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
2151 ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */
2152#else
2153 /* Make life easier for backends by putting address of constant in i. */
2154 ir->i = (int32_t)(intptr_t)(ir+1);
2155#endif
2156 ir++;
2157 }
2158 }
1609 2159
1610 /* REF_BASE is used for implicit references to the BASE register. */ 2160 /* REF_BASE is used for implicit references to the BASE register. */
1611 lastir->prev = REGSP_HINT(RID_BASE); 2161 lastir->prev = REGSP_HINT(RID_BASE);
1612 2162
1613 ir = IR(nins-1);
1614 if (ir->o == IR_RENAME) {
1615 do { ir--; nins--; } while (ir->o == IR_RENAME);
1616 T->nins = nins; /* Remove any renames left over from ASM restart. */
1617 }
1618 as->snaprename = nins; 2163 as->snaprename = nins;
1619 as->snapref = nins; 2164 as->snapref = nins;
1620 as->snapno = T->nsnap; 2165 as->snapno = T->nsnap;
2166 as->snapalloc = 0;
1621 2167
1622 as->stopins = REF_BASE; 2168 as->stopins = REF_BASE;
1623 as->orignins = nins; 2169 as->orignins = nins;
@@ -1627,7 +2173,7 @@ static void asm_setup_regsp(ASMState *as)
1627 ir = IR(REF_FIRST); 2173 ir = IR(REF_FIRST);
1628 if (as->parent) { 2174 if (as->parent) {
1629 uint16_t *p; 2175 uint16_t *p;
1630 lastir = lj_snap_regspmap(as->parent, as->J->exitno, ir); 2176 lastir = lj_snap_regspmap(as->J, as->parent, as->J->exitno, ir);
1631 if (lastir - ir > LJ_MAX_JSLOTS) 2177 if (lastir - ir > LJ_MAX_JSLOTS)
1632 lj_trace_err(as->J, LJ_TRERR_NYICOAL); 2178 lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1633 as->stopins = (IRRef)((lastir-1) - as->ir); 2179 as->stopins = (IRRef)((lastir-1) - as->ir);
@@ -1666,6 +2212,10 @@ static void asm_setup_regsp(ASMState *as)
1666 ir->prev = (uint16_t)REGSP_HINT((rload & 15)); 2212 ir->prev = (uint16_t)REGSP_HINT((rload & 15));
1667 rload = lj_ror(rload, 4); 2213 rload = lj_ror(rload, 4);
1668 continue; 2214 continue;
2215 case IR_TMPREF:
2216 if ((ir->op2 & IRTMPREF_OUT2) && as->evenspill < 4)
2217 as->evenspill = 4; /* TMPREF OUT2 needs two TValues on the stack. */
2218 break;
1669#endif 2219#endif
1670 case IR_CALLXS: { 2220 case IR_CALLXS: {
1671 CCallInfo ci; 2221 CCallInfo ci;
@@ -1675,7 +2225,17 @@ static void asm_setup_regsp(ASMState *as)
1675 as->modset |= RSET_SCRATCH; 2225 as->modset |= RSET_SCRATCH;
1676 continue; 2226 continue;
1677 } 2227 }
1678 case IR_CALLN: case IR_CALLL: case IR_CALLS: { 2228 case IR_CALLL:
2229 /* lj_vm_next needs two TValues on the stack. */
2230#if LJ_TARGET_X64 && LJ_ABI_WIN
2231 if (ir->op2 == IRCALL_lj_vm_next && as->evenspill < SPS_FIRST + 4)
2232 as->evenspill = SPS_FIRST + 4;
2233#else
2234 if (SPS_FIRST < 4 && ir->op2 == IRCALL_lj_vm_next && as->evenspill < 4)
2235 as->evenspill = 4;
2236#endif
2237 /* fallthrough */
2238 case IR_CALLN: case IR_CALLA: case IR_CALLS: {
1679 const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; 2239 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1680 ir->prev = asm_setup_call_slots(as, ir, ci); 2240 ir->prev = asm_setup_call_slots(as, ir, ci);
1681 if (inloop) 2241 if (inloop)
@@ -1683,7 +2243,6 @@ static void asm_setup_regsp(ASMState *as)
1683 (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; 2243 (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
1684 continue; 2244 continue;
1685 } 2245 }
1686#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
1687 case IR_HIOP: 2246 case IR_HIOP:
1688 switch ((ir-1)->o) { 2247 switch ((ir-1)->o) {
1689#if LJ_SOFTFP && LJ_TARGET_ARM 2248#if LJ_SOFTFP && LJ_TARGET_ARM
@@ -1694,15 +2253,15 @@ static void asm_setup_regsp(ASMState *as)
1694 } 2253 }
1695 break; 2254 break;
1696#endif 2255#endif
1697#if !LJ_SOFTFP && LJ_NEED_FP64 2256#if !LJ_SOFTFP && LJ_NEED_FP64 && LJ_32 && LJ_HASFFI
1698 case IR_CONV: 2257 case IR_CONV:
1699 if (irt_isfp((ir-1)->t)) { 2258 if (irt_isfp((ir-1)->t)) {
1700 ir->prev = REGSP_HINT(RID_FPRET); 2259 ir->prev = REGSP_HINT(RID_FPRET);
1701 continue; 2260 continue;
1702 } 2261 }
1703 /* fallthrough */
1704#endif 2262#endif
1705 case IR_CALLN: case IR_CALLXS: 2263 /* fallthrough */
2264 case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
1706#if LJ_SOFTFP 2265#if LJ_SOFTFP
1707 case IR_MIN: case IR_MAX: 2266 case IR_MIN: case IR_MAX:
1708#endif 2267#endif
@@ -1713,18 +2272,29 @@ static void asm_setup_regsp(ASMState *as)
1713 break; 2272 break;
1714 } 2273 }
1715 break; 2274 break;
1716#endif
1717#if LJ_SOFTFP 2275#if LJ_SOFTFP
1718 case IR_MIN: case IR_MAX: 2276 case IR_MIN: case IR_MAX:
1719 if ((ir+1)->o != IR_HIOP) break; 2277 if ((ir+1)->o != IR_HIOP) break;
1720#endif 2278#endif
1721 /* fallthrough */ 2279 /* fallthrough */
1722 /* C calls evict all scratch regs and return results in RID_RET. */ 2280 /* C calls evict all scratch regs and return results in RID_RET. */
1723 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: 2281 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
1724 if (REGARG_NUMGPR < 3 && as->evenspill < 3) 2282 if (REGARG_NUMGPR < 3 && as->evenspill < 3)
1725 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ 2283 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
2284#if LJ_TARGET_X86 && LJ_HASFFI
2285 if (0) {
2286 case IR_CNEW:
2287 if (ir->op2 != REF_NIL && as->evenspill < 4)
2288 as->evenspill = 4; /* lj_cdata_newv needs 4 args. */
2289 }
1726 /* fallthrough */ 2290 /* fallthrough */
1727 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: 2291#else
2292 /* fallthrough */
2293 case IR_CNEW:
2294#endif
2295 /* fallthrough */
2296 case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
2297 case IR_BUFSTR:
1728 ir->prev = REGSP_HINT(RID_RET); 2298 ir->prev = REGSP_HINT(RID_RET);
1729 if (inloop) 2299 if (inloop)
1730 as->modset = RSET_SCRATCH; 2300 as->modset = RSET_SCRATCH;
@@ -1733,58 +2303,73 @@ static void asm_setup_regsp(ASMState *as)
1733 if (inloop) 2303 if (inloop)
1734 as->modset = RSET_SCRATCH; 2304 as->modset = RSET_SCRATCH;
1735 break; 2305 break;
1736#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP 2306#if !LJ_SOFTFP
1737 case IR_ATAN2: case IR_LDEXP: 2307#if !LJ_TARGET_X86ORX64
2308 case IR_LDEXP:
2309#endif
1738#endif 2310#endif
2311 /* fallthrough */
1739 case IR_POW: 2312 case IR_POW:
1740 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 2313 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1741#if LJ_TARGET_X86ORX64
1742 ir->prev = REGSP_HINT(RID_XMM0);
1743 if (inloop) 2314 if (inloop)
1744 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); 2315 as->modset |= RSET_SCRATCH;
2316#if LJ_TARGET_X86
2317 if (irt_isnum(IR(ir->op2)->t)) {
2318 if (as->evenspill < 4) /* Leave room to call pow(). */
2319 as->evenspill = 4;
2320 }
2321 break;
1745#else 2322#else
1746 ir->prev = REGSP_HINT(RID_FPRET); 2323 ir->prev = REGSP_HINT(RID_FPRET);
1747 if (inloop)
1748 as->modset |= RSET_SCRATCH;
1749#endif
1750 continue; 2324 continue;
2325#endif
1751 } 2326 }
1752 /* fallthrough */ /* for integer POW */ 2327 /* fallthrough */ /* for integer POW */
1753 case IR_DIV: case IR_MOD: 2328 case IR_DIV: case IR_MOD:
1754 if (!irt_isnum(ir->t)) { 2329 if ((LJ_64 && LJ_SOFTFP) || !irt_isnum(ir->t)) {
1755 ir->prev = REGSP_HINT(RID_RET); 2330 ir->prev = REGSP_HINT(RID_RET);
1756 if (inloop) 2331 if (inloop)
1757 as->modset |= (RSET_SCRATCH & RSET_GPR); 2332 as->modset |= (RSET_SCRATCH & RSET_GPR);
1758 continue; 2333 continue;
1759 } 2334 }
1760 break; 2335 break;
1761 case IR_FPMATH: 2336#if LJ_64 && LJ_SOFTFP
1762#if LJ_TARGET_X86ORX64 2337 case IR_ADD: case IR_SUB: case IR_MUL:
1763 if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ 2338 if (irt_isnum(ir->t)) {
1764 ir->prev = REGSP_HINT(RID_XMM0); 2339 ir->prev = REGSP_HINT(RID_RET);
1765#if !LJ_64
1766 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */
1767 as->evenspill = 4;
1768#endif
1769 if (inloop)
1770 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1771 continue;
1772 } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
1773 ir->prev = REGSP_HINT(RID_XMM0);
1774 if (inloop) 2340 if (inloop)
1775 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); 2341 as->modset |= (RSET_SCRATCH & RSET_GPR);
1776 continue; 2342 continue;
1777 } 2343 }
1778 break; 2344 break;
1779#else 2345#endif
1780 ir->prev = REGSP_HINT(RID_FPRET); 2346 case IR_FPMATH:
2347#if LJ_TARGET_X86ORX64
2348 if (ir->op2 <= IRFPM_TRUNC) {
2349 if (!(as->flags & JIT_F_SSE4_1)) {
2350 ir->prev = REGSP_HINT(RID_XMM0);
2351 if (inloop)
2352 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
2353 continue;
2354 }
2355 break;
2356 }
2357#endif
1781 if (inloop) 2358 if (inloop)
1782 as->modset |= RSET_SCRATCH; 2359 as->modset |= RSET_SCRATCH;
2360#if LJ_TARGET_X86
2361 break;
2362#else
2363 ir->prev = REGSP_HINT(RID_FPRET);
1783 continue; 2364 continue;
1784#endif 2365#endif
1785#if LJ_TARGET_X86ORX64 2366#if LJ_TARGET_X86ORX64
1786 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ 2367 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */
1787 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: 2368 case IR_BSHL: case IR_BSHR: case IR_BSAR:
2369 if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */
2370 break;
2371 /* fallthrough */
2372 case IR_BROL: case IR_BROR:
1788 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { 2373 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
1789 IR(ir->op2)->r = REGSP_HINT(RID_ECX); 2374 IR(ir->op2)->r = REGSP_HINT(RID_ECX);
1790 if (inloop) 2375 if (inloop)
@@ -1828,16 +2413,26 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1828{ 2413{
1829 ASMState as_; 2414 ASMState as_;
1830 ASMState *as = &as_; 2415 ASMState *as = &as_;
1831 MCode *origtop; 2416
2417 /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
2418 {
2419 IRRef nins = T->nins;
2420 IRIns *ir = &T->ir[nins-1];
2421 if (ir->o == IR_NOP || ir->o == IR_RENAME) {
2422 do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME);
2423 T->nins = nins;
2424 }
2425 }
1832 2426
1833 /* Ensure an initialized instruction beyond the last one for HIOP checks. */ 2427 /* Ensure an initialized instruction beyond the last one for HIOP checks. */
1834 J->cur.nins = lj_ir_nextins(J); 2428 /* This also allows one RENAME to be added without reallocating curfinal. */
1835 lj_ir_nop(&J->cur.ir[J->cur.nins]); 2429 as->orignins = lj_ir_nextins(J);
2430 lj_ir_nop(&J->cur.ir[as->orignins]);
1836 2431
1837 /* Setup initial state. Copy some fields to reduce indirections. */ 2432 /* Setup initial state. Copy some fields to reduce indirections. */
1838 as->J = J; 2433 as->J = J;
1839 as->T = T; 2434 as->T = T;
1840 as->ir = T->ir; 2435 J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */
1841 as->flags = J->flags; 2436 as->flags = J->flags;
1842 as->loopref = J->loopref; 2437 as->loopref = J->loopref;
1843 as->realign = NULL; 2438 as->realign = NULL;
@@ -1845,17 +2440,46 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1845 as->parent = J->parent ? traceref(J, J->parent) : NULL; 2440 as->parent = J->parent ? traceref(J, J->parent) : NULL;
1846 2441
1847 /* Reserve MCode memory. */ 2442 /* Reserve MCode memory. */
1848 as->mctop = origtop = lj_mcode_reserve(J, &as->mcbot); 2443 as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot);
1849 as->mcp = as->mctop; 2444 as->mcp = as->mctop;
1850 as->mclim = as->mcbot + MCLIM_REDZONE; 2445 as->mclim = as->mcbot + MCLIM_REDZONE;
1851 asm_setup_target(as); 2446 asm_setup_target(as);
1852 2447
1853 do { 2448 /*
2449 ** This is a loop, because the MCode may have to be (re-)assembled
2450 ** multiple times:
2451 **
2452 ** 1. as->realign is set (and the assembly aborted), if the arch-specific
2453 ** backend wants the MCode to be aligned differently.
2454 **
2455 ** This is currently only the case on x86/x64, where small loops get
2456 ** an aligned loop body plus a short branch. Not much effort is wasted,
2457 ** because the abort happens very quickly and only once.
2458 **
2459 ** 2. The IR is immovable, since the MCode embeds pointers to various
2460 ** constants inside the IR. But RENAMEs may need to be added to the IR
2461 ** during assembly, which might grow and reallocate the IR. We check
2462 ** at the end if the IR (in J->cur.ir) has actually grown, resize the
2463 ** copy (in J->curfinal.ir) and try again.
2464 **
2465 ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have
2466 ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to
2467 ** always have one spare slot in the IR (see above), which means we
2468 ** have to redo the assembly for only ~2% of all traces.
2469 **
2470 ** Very, very rarely, this needs to be done repeatedly, since the
2471 ** location of constants inside the IR (actually, reachability from
2472 ** a global pointer) may affect register allocation and thus the
2473 ** number of RENAMEs.
2474 */
2475 for (;;) {
1854 as->mcp = as->mctop; 2476 as->mcp = as->mctop;
1855#ifdef LUA_USE_ASSERT 2477#ifdef LUA_USE_ASSERT
1856 as->mcp_prev = as->mcp; 2478 as->mcp_prev = as->mcp;
1857#endif 2479#endif
1858 as->curins = T->nins; 2480 as->ir = J->curfinal->ir; /* Use the copied IR. */
2481 as->curins = J->cur.nins = as->orignins;
2482
1859 RA_DBG_START(); 2483 RA_DBG_START();
1860 RA_DBGX((as, "===== STOP =====")); 2484 RA_DBGX((as, "===== STOP ====="));
1861 2485
@@ -1874,7 +2498,11 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1874 /* Assemble a trace in linear backwards order. */ 2498 /* Assemble a trace in linear backwards order. */
1875 for (as->curins--; as->curins > as->stopins; as->curins--) { 2499 for (as->curins--; as->curins > as->stopins; as->curins--) {
1876 IRIns *ir = IR(as->curins); 2500 IRIns *ir = IR(as->curins);
1877 lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ 2501 /* 64 bit types handled by SPLIT for 32 bit archs. */
2502 lj_assertA(!(LJ_32 && irt_isint64(ir->t)),
2503 "IR %04d has unsplit 64 bit type",
2504 (int)(ir - as->ir) - REF_BIAS);
2505 asm_snap_prev(as);
1878 if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) 2506 if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE))
1879 continue; /* Dead-code elimination can be soooo easy. */ 2507 continue; /* Dead-code elimination can be soooo easy. */
1880 if (irt_isguard(ir->t)) 2508 if (irt_isguard(ir->t))
@@ -1883,22 +2511,43 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1883 checkmclim(as); 2511 checkmclim(as);
1884 asm_ir(as, ir); 2512 asm_ir(as, ir);
1885 } 2513 }
1886 } while (as->realign); /* Retry in case the MCode needs to be realigned. */
1887 2514
1888 /* Emit head of trace. */ 2515 if (as->realign && J->curfinal->nins >= T->nins)
1889 RA_DBG_REF(); 2516 continue; /* Retry in case only the MCode needs to be realigned. */
1890 checkmclim(as); 2517
1891 if (as->gcsteps > 0) { 2518 /* Emit head of trace. */
1892 as->curins = as->T->snap[0].ref; 2519 RA_DBG_REF();
1893 asm_snap_prep(as); /* The GC check is a guard. */ 2520 checkmclim(as);
1894 asm_gc_check(as); 2521 if (as->gcsteps > 0) {
2522 as->curins = as->T->snap[0].ref;
2523 asm_snap_prep(as); /* The GC check is a guard. */
2524 asm_gc_check(as);
2525 as->curins = as->stopins;
2526 }
2527 ra_evictk(as);
2528 if (as->parent)
2529 asm_head_side(as);
2530 else
2531 asm_head_root(as);
2532 asm_phi_fixup(as);
2533
2534 if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */
2535 lj_assertA(J->curfinal->nk == T->nk, "unexpected IR constant growth");
2536 memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
2537 (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */
2538 T->nins = J->curfinal->nins;
2539 /* Fill mcofs of any unprocessed snapshots. */
2540 as->curins = REF_FIRST;
2541 asm_snap_prev(as);
2542 break; /* Done. */
2543 }
2544
2545 /* Otherwise try again with a bigger IR. */
2546 lj_trace_free(J2G(J), J->curfinal);
2547 J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */
2548 J->curfinal = lj_trace_alloc(J->L, T);
2549 as->realign = NULL;
1895 } 2550 }
1896 ra_evictk(as);
1897 if (as->parent)
1898 asm_head_side(as);
1899 else
1900 asm_head_root(as);
1901 asm_phi_fixup(as);
1902 2551
1903 RA_DBGX((as, "===== START ====")); 2552 RA_DBGX((as, "===== START ===="));
1904 RA_DBG_FLUSH(); 2553 RA_DBG_FLUSH();
@@ -1908,10 +2557,16 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1908 /* Set trace entry point before fixing up tail to allow link to self. */ 2557 /* Set trace entry point before fixing up tail to allow link to self. */
1909 T->mcode = as->mcp; 2558 T->mcode = as->mcp;
1910 T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0; 2559 T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0;
1911 if (!as->loopref) 2560 if (as->loopref)
2561 asm_loop_tail_fixup(as);
2562 else
1912 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ 2563 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
1913 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); 2564 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
1914 lj_mcode_sync(T->mcode, origtop); 2565 asm_snap_fixup_mcofs(as);
2566#if LJ_TARGET_MCODE_FIXUP
2567 asm_mcode_fixup(T->mcode, T->szmcode);
2568#endif
2569 lj_mcode_sync(T->mcode, as->mctoporig);
1915} 2570}
1916 2571
1917#undef IR 2572#undef IR