summaryrefslogtreecommitdiff
path: root/src/lj_asm.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_asm.c')
-rw-r--r--src/lj_asm.c1016
1 files changed, 834 insertions, 182 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 7c4d8f52..71079b30 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -11,6 +11,7 @@
11#if LJ_HASJIT 11#if LJ_HASJIT
12 12
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_buf.h"
14#include "lj_str.h" 15#include "lj_str.h"
15#include "lj_tab.h" 16#include "lj_tab.h"
16#include "lj_frame.h" 17#include "lj_frame.h"
@@ -71,6 +72,7 @@ typedef struct ASMState {
71 IRRef snaprename; /* Rename highwater mark for snapshot check. */ 72 IRRef snaprename; /* Rename highwater mark for snapshot check. */
72 SnapNo snapno; /* Current snapshot number. */ 73 SnapNo snapno; /* Current snapshot number. */
73 SnapNo loopsnapno; /* Loop snapshot number. */ 74 SnapNo loopsnapno; /* Loop snapshot number. */
75 int snapalloc; /* Current snapshot needs allocation. */
74 BloomFilter snapfilt1, snapfilt2; /* Filled with snapshot refs. */ 76 BloomFilter snapfilt1, snapfilt2; /* Filled with snapshot refs. */
75 77
76 IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */ 78 IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */
@@ -85,18 +87,25 @@ typedef struct ASMState {
85 87
86 MCode *mcbot; /* Bottom of reserved MCode. */ 88 MCode *mcbot; /* Bottom of reserved MCode. */
87 MCode *mctop; /* Top of generated MCode. */ 89 MCode *mctop; /* Top of generated MCode. */
90 MCode *mctoporig; /* Original top of generated MCode. */
88 MCode *mcloop; /* Pointer to loop MCode (or NULL). */ 91 MCode *mcloop; /* Pointer to loop MCode (or NULL). */
89 MCode *invmcp; /* Points to invertible loop branch (or NULL). */ 92 MCode *invmcp; /* Points to invertible loop branch (or NULL). */
90 MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ 93 MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */
91 MCode *realign; /* Realign loop if not NULL. */ 94 MCode *realign; /* Realign loop if not NULL. */
92 95
93#ifdef RID_NUM_KREF 96#ifdef RID_NUM_KREF
94 int32_t krefk[RID_NUM_KREF]; 97 intptr_t krefk[RID_NUM_KREF];
95#endif 98#endif
96 IRRef1 phireg[RID_MAX]; /* PHI register references. */ 99 IRRef1 phireg[RID_MAX]; /* PHI register references. */
97 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ 100 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */
98} ASMState; 101} ASMState;
99 102
103#ifdef LUA_USE_ASSERT
104#define lj_assertA(c, ...) lj_assertG_(J2G(as->J), (c), __VA_ARGS__)
105#else
106#define lj_assertA(c, ...) ((void)as)
107#endif
108
100#define IR(ref) (&as->ir[(ref)]) 109#define IR(ref) (&as->ir[(ref)])
101 110
102#define ASMREF_TMP1 REF_TRUE /* Temp. register. */ 111#define ASMREF_TMP1 REF_TRUE /* Temp. register. */
@@ -128,9 +137,8 @@ static LJ_AINLINE void checkmclim(ASMState *as)
128#ifdef LUA_USE_ASSERT 137#ifdef LUA_USE_ASSERT
129 if (as->mcp + MCLIM_REDZONE < as->mcp_prev) { 138 if (as->mcp + MCLIM_REDZONE < as->mcp_prev) {
130 IRIns *ir = IR(as->curins+1); 139 IRIns *ir = IR(as->curins+1);
131 fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d %02d %04d %04d\n", as->mcp, 140 lj_assertA(0, "red zone overflow: %p IR %04d %02d %04d %04d\n", as->mcp,
132 as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); 141 as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS);
133 lua_assert(0);
134 } 142 }
135#endif 143#endif
136 if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as); 144 if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as);
@@ -144,7 +152,7 @@ static LJ_AINLINE void checkmclim(ASMState *as)
144#define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) 152#define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref)))
145#define ra_krefk(as, ref) (as->krefk[(ref)]) 153#define ra_krefk(as, ref) (as->krefk[(ref)])
146 154
147static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) 155static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k)
148{ 156{
149 IRRef ref = (IRRef)(r - RID_MIN_KREF); 157 IRRef ref = (IRRef)(r - RID_MIN_KREF);
150 as->krefk[ref] = k; 158 as->krefk[ref] = k;
@@ -171,6 +179,8 @@ IRFLDEF(FLOFS)
171#include "lj_emit_x86.h" 179#include "lj_emit_x86.h"
172#elif LJ_TARGET_ARM 180#elif LJ_TARGET_ARM
173#include "lj_emit_arm.h" 181#include "lj_emit_arm.h"
182#elif LJ_TARGET_ARM64
183#include "lj_emit_arm64.h"
174#elif LJ_TARGET_PPC 184#elif LJ_TARGET_PPC
175#include "lj_emit_ppc.h" 185#include "lj_emit_ppc.h"
176#elif LJ_TARGET_MIPS 186#elif LJ_TARGET_MIPS
@@ -179,6 +189,12 @@ IRFLDEF(FLOFS)
179#error "Missing instruction emitter for target CPU" 189#error "Missing instruction emitter for target CPU"
180#endif 190#endif
181 191
192/* Generic load/store of register from/to stack slot. */
193#define emit_spload(as, ir, r, ofs) \
194 emit_loadofs(as, ir, (r), RID_SP, (ofs))
195#define emit_spstore(as, ir, r, ofs) \
196 emit_storeofs(as, ir, (r), RID_SP, (ofs))
197
182/* -- Register allocator debugging ---------------------------------------- */ 198/* -- Register allocator debugging ---------------------------------------- */
183 199
184/* #define LUAJIT_DEBUG_RA */ 200/* #define LUAJIT_DEBUG_RA */
@@ -236,7 +252,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...)
236 *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q; 252 *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q;
237 } else { 253 } else {
238 *p++ = '?'; 254 *p++ = '?';
239 lua_assert(0); 255 lj_assertA(0, "bad register %d for debug format \"%s\"", r, fmt);
240 } 256 }
241 } else if (e[1] == 'f' || e[1] == 'i') { 257 } else if (e[1] == 'f' || e[1] == 'i') {
242 IRRef ref; 258 IRRef ref;
@@ -254,7 +270,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...)
254 } else if (e[1] == 'x') { 270 } else if (e[1] == 'x') {
255 p += sprintf(p, "%08x", va_arg(argp, int32_t)); 271 p += sprintf(p, "%08x", va_arg(argp, int32_t));
256 } else { 272 } else {
257 lua_assert(0); 273 lj_assertA(0, "bad debug format code");
258 } 274 }
259 fmt = e+2; 275 fmt = e+2;
260 } 276 }
@@ -313,37 +329,51 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
313 Reg r; 329 Reg r;
314 if (ra_iskref(ref)) { 330 if (ra_iskref(ref)) {
315 r = ra_krefreg(ref); 331 r = ra_krefreg(ref);
316 lua_assert(!rset_test(as->freeset, r)); 332 lj_assertA(!rset_test(as->freeset, r), "rematk of free reg %d", r);
317 ra_free(as, r); 333 ra_free(as, r);
318 ra_modified(as, r); 334 ra_modified(as, r);
335#if LJ_64
336 emit_loadu64(as, r, ra_krefk(as, ref));
337#else
319 emit_loadi(as, r, ra_krefk(as, ref)); 338 emit_loadi(as, r, ra_krefk(as, ref));
339#endif
320 return r; 340 return r;
321 } 341 }
322 ir = IR(ref); 342 ir = IR(ref);
323 r = ir->r; 343 r = ir->r;
324 lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); 344 lj_assertA(ra_hasreg(r), "rematk of K%03d has no reg", REF_BIAS - ref);
345 lj_assertA(!ra_hasspill(ir->s),
346 "rematk of K%03d has spill slot [%x]", REF_BIAS - ref, ir->s);
325 ra_free(as, r); 347 ra_free(as, r);
326 ra_modified(as, r); 348 ra_modified(as, r);
327 ir->r = RID_INIT; /* Do not keep any hint. */ 349 ir->r = RID_INIT; /* Do not keep any hint. */
328 RA_DBGX((as, "remat $i $r", ir, r)); 350 RA_DBGX((as, "remat $i $r", ir, r));
329#if !LJ_SOFTFP 351#if !LJ_SOFTFP32
330 if (ir->o == IR_KNUM) { 352 if (ir->o == IR_KNUM) {
331 emit_loadn(as, r, ir_knum(ir)); 353 emit_loadk64(as, r, ir);
332 } else 354 } else
333#endif 355#endif
334 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { 356 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
335 ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ 357 ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
336 emit_getgl(as, r, jit_base); 358 emit_getgl(as, r, jit_base);
337 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { 359 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
338 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ 360 /* REF_NIL stores ASMREF_L register. */
339 emit_getgl(as, r, jit_L); 361 lj_assertA(irt_isnil(ir->t), "rematk of bad ASMREF_L");
362 emit_getgl(as, r, cur_L);
340#if LJ_64 363#if LJ_64
341 } else if (ir->o == IR_KINT64) { 364 } else if (ir->o == IR_KINT64) {
342 emit_loadu64(as, r, ir_kint64(ir)->u64); 365 emit_loadu64(as, r, ir_kint64(ir)->u64);
366#if LJ_GC64
367 } else if (ir->o == IR_KGC) {
368 emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
369 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
370 emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
371#endif
343#endif 372#endif
344 } else { 373 } else {
345 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 374 lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
346 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 375 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
376 "rematk of bad IR op %d", ir->o);
347 emit_loadi(as, r, ir->i); 377 emit_loadi(as, r, ir->i);
348 } 378 }
349 return r; 379 return r;
@@ -353,7 +383,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
353static int32_t ra_spill(ASMState *as, IRIns *ir) 383static int32_t ra_spill(ASMState *as, IRIns *ir)
354{ 384{
355 int32_t slot = ir->s; 385 int32_t slot = ir->s;
356 lua_assert(ir >= as->ir + REF_TRUE); 386 lj_assertA(ir >= as->ir + REF_TRUE,
387 "spill of K%03d", REF_BIAS - (int)(ir - as->ir));
357 if (!ra_hasspill(slot)) { 388 if (!ra_hasspill(slot)) {
358 if (irt_is64(ir->t)) { 389 if (irt_is64(ir->t)) {
359 slot = as->evenspill; 390 slot = as->evenspill;
@@ -378,7 +409,9 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref)
378{ 409{
379 IRIns *ir = IR(ref); 410 IRIns *ir = IR(ref);
380 Reg r = ir->r; 411 Reg r = ir->r;
381 lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); 412 lj_assertA(ra_hasreg(r), "release of TMP%d has no reg", ref-ASMREF_TMP1+1);
413 lj_assertA(!ra_hasspill(ir->s),
414 "release of TMP%d has spill slot [%x]", ref-ASMREF_TMP1+1, ir->s);
382 ra_free(as, r); 415 ra_free(as, r);
383 ra_modified(as, r); 416 ra_modified(as, r);
384 ir->r = RID_INIT; 417 ir->r = RID_INIT;
@@ -394,7 +427,7 @@ static Reg ra_restore(ASMState *as, IRRef ref)
394 IRIns *ir = IR(ref); 427 IRIns *ir = IR(ref);
395 int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ 428 int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */
396 Reg r = ir->r; 429 Reg r = ir->r;
397 lua_assert(ra_hasreg(r)); 430 lj_assertA(ra_hasreg(r), "restore of IR %04d has no reg", ref - REF_BIAS);
398 ra_sethint(ir->r, r); /* Keep hint. */ 431 ra_sethint(ir->r, r); /* Keep hint. */
399 ra_free(as, r); 432 ra_free(as, r);
400 if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ 433 if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */
@@ -423,14 +456,15 @@ static Reg ra_evict(ASMState *as, RegSet allow)
423{ 456{
424 IRRef ref; 457 IRRef ref;
425 RegCost cost = ~(RegCost)0; 458 RegCost cost = ~(RegCost)0;
426 lua_assert(allow != RSET_EMPTY); 459 lj_assertA(allow != RSET_EMPTY, "evict from empty set");
427 if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) { 460 if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) {
428 GPRDEF(MINCOST) 461 GPRDEF(MINCOST)
429 } else { 462 } else {
430 FPRDEF(MINCOST) 463 FPRDEF(MINCOST)
431 } 464 }
432 ref = regcost_ref(cost); 465 ref = regcost_ref(cost);
433 lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins)); 466 lj_assertA(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins),
467 "evict of out-of-range IR %04d", ref - REF_BIAS);
434 /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ 468 /* Preferably pick any weak ref instead of a non-weak, non-const ref. */
435 if (!irref_isk(ref) && (as->weakset & allow)) { 469 if (!irref_isk(ref) && (as->weakset & allow)) {
436 IRIns *ir = IR(ref); 470 IRIns *ir = IR(ref);
@@ -512,7 +546,7 @@ static void ra_evictk(ASMState *as)
512 546
513#ifdef RID_NUM_KREF 547#ifdef RID_NUM_KREF
514/* Allocate a register for a constant. */ 548/* Allocate a register for a constant. */
515static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) 549static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
516{ 550{
517 /* First try to find a register which already holds the same constant. */ 551 /* First try to find a register which already holds the same constant. */
518 RegSet pick, work = ~as->freeset & RSET_GPR; 552 RegSet pick, work = ~as->freeset & RSET_GPR;
@@ -521,9 +555,31 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
521 IRRef ref; 555 IRRef ref;
522 r = rset_pickbot(work); 556 r = rset_pickbot(work);
523 ref = regcost_ref(as->cost[r]); 557 ref = regcost_ref(as->cost[r]);
558#if LJ_64
559 if (ref < ASMREF_L) {
560 if (ra_iskref(ref)) {
561 if (k == ra_krefk(as, ref))
562 return r;
563 } else {
564 IRIns *ir = IR(ref);
565 if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
566#if LJ_GC64
567 (ir->o == IR_KINT && k == ir->i) ||
568 (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
569 ((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
570 k == (intptr_t)ir_kptr(ir))
571#else
572 (ir->o != IR_KINT64 && k == ir->i)
573#endif
574 )
575 return r;
576 }
577 }
578#else
524 if (ref < ASMREF_L && 579 if (ref < ASMREF_L &&
525 k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) 580 k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i))
526 return r; 581 return r;
582#endif
527 rset_clear(work, r); 583 rset_clear(work, r);
528 } 584 }
529 pick = as->freeset & allow; 585 pick = as->freeset & allow;
@@ -543,7 +599,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
543} 599}
544 600
545/* Allocate a specific register for a constant. */ 601/* Allocate a specific register for a constant. */
546static void ra_allockreg(ASMState *as, int32_t k, Reg r) 602static void ra_allockreg(ASMState *as, intptr_t k, Reg r)
547{ 603{
548 Reg kr = ra_allock(as, k, RID2RSET(r)); 604 Reg kr = ra_allock(as, k, RID2RSET(r));
549 if (kr != r) { 605 if (kr != r) {
@@ -566,7 +622,8 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow)
566 IRIns *ir = IR(ref); 622 IRIns *ir = IR(ref);
567 RegSet pick = as->freeset & allow; 623 RegSet pick = as->freeset & allow;
568 Reg r; 624 Reg r;
569 lua_assert(ra_noreg(ir->r)); 625 lj_assertA(ra_noreg(ir->r),
626 "IR %04d already has reg %d", ref - REF_BIAS, ir->r);
570 if (pick) { 627 if (pick) {
571 /* First check register hint from propagation or PHI. */ 628 /* First check register hint from propagation or PHI. */
572 if (ra_hashint(ir->r)) { 629 if (ra_hashint(ir->r)) {
@@ -613,15 +670,27 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
613 return r; 670 return r;
614} 671}
615 672
673/* Add a register rename to the IR. */
674static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno)
675{
676 IRRef ren;
677 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno);
678 ren = tref_ref(lj_ir_emit(as->J));
679 as->J->cur.ir[ren].r = (uint8_t)down;
680 as->J->cur.ir[ren].s = SPS_NONE;
681}
682
616/* Rename register allocation and emit move. */ 683/* Rename register allocation and emit move. */
617static void ra_rename(ASMState *as, Reg down, Reg up) 684static void ra_rename(ASMState *as, Reg down, Reg up)
618{ 685{
619 IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); 686 IRRef ref = regcost_ref(as->cost[up] = as->cost[down]);
620 IRIns *ir = IR(ref); 687 IRIns *ir = IR(ref);
621 ir->r = (uint8_t)up; 688 ir->r = (uint8_t)up;
622 as->cost[down] = 0; 689 as->cost[down] = 0;
623 lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); 690 lj_assertA((down < RID_MAX_GPR) == (up < RID_MAX_GPR),
624 lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); 691 "rename between GPR/FPR %d and %d", down, up);
692 lj_assertA(!rset_test(as->freeset, down), "rename from free reg %d", down);
693 lj_assertA(rset_test(as->freeset, up), "rename to non-free reg %d", up);
625 ra_free(as, down); /* 'down' is free ... */ 694 ra_free(as, down); /* 'down' is free ... */
626 ra_modified(as, down); 695 ra_modified(as, down);
627 rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ 696 rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */
@@ -629,11 +698,14 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
629 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); 698 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
630 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ 699 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
631 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ 700 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
632 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); 701 /*
633 ren = tref_ref(lj_ir_emit(as->J)); 702 ** The rename is effective at the subsequent (already emitted) exit
634 as->ir = as->T->ir; /* The IR may have been reallocated. */ 703 ** branch. This is for the current snapshot (as->snapno). Except if we
635 IR(ren)->r = (uint8_t)down; 704 ** haven't yet allocated any refs for the snapshot (as->snapalloc == 1),
636 IR(ren)->s = SPS_NONE; 705 ** then it belongs to the next snapshot.
706 ** See also the discussion at asm_snap_checkrename().
707 */
708 ra_addrename(as, down, ref, as->snapno + as->snapalloc);
637 } 709 }
638} 710}
639 711
@@ -666,7 +738,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r)
666{ 738{
667 Reg dest = ra_dest(as, ir, RID2RSET(r)); 739 Reg dest = ra_dest(as, ir, RID2RSET(r));
668 if (dest != r) { 740 if (dest != r) {
669 lua_assert(rset_test(as->freeset, r)); 741 lj_assertA(rset_test(as->freeset, r), "dest reg %d is not free", r);
670 ra_modified(as, r); 742 ra_modified(as, r);
671 emit_movrr(as, ir, dest, r); 743 emit_movrr(as, ir, dest, r);
672 } 744 }
@@ -683,20 +755,25 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
683 if (ra_noreg(left)) { 755 if (ra_noreg(left)) {
684 if (irref_isk(lref)) { 756 if (irref_isk(lref)) {
685 if (ir->o == IR_KNUM) { 757 if (ir->o == IR_KNUM) {
686 cTValue *tv = ir_knum(ir);
687 /* FP remat needs a load except for +0. Still better than eviction. */ 758 /* FP remat needs a load except for +0. Still better than eviction. */
688 if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { 759 if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) {
689 emit_loadn(as, dest, tv); 760 emit_loadk64(as, dest, ir);
690 return; 761 return;
691 } 762 }
692#if LJ_64 763#if LJ_64
693 } else if (ir->o == IR_KINT64) { 764 } else if (ir->o == IR_KINT64) {
694 emit_loadu64(as, dest, ir_kint64(ir)->u64); 765 emit_loadk64(as, dest, ir);
766 return;
767#if LJ_GC64
768 } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) {
769 emit_loadk64(as, dest, ir);
695 return; 770 return;
696#endif 771#endif
697 } else { 772#endif
698 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 773 } else if (ir->o != IR_KPRI) {
699 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 774 lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
775 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
776 "K%03d has bad IR op %d", REF_BIAS - lref, ir->o);
700 emit_loadi(as, dest, ir->i); 777 emit_loadi(as, dest, ir->i);
701 return; 778 return;
702 } 779 }
@@ -741,11 +818,11 @@ static void ra_leftov(ASMState *as, Reg dest, IRRef lref)
741} 818}
742#endif 819#endif
743 820
744#if !LJ_64
745/* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */ 821/* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */
746static void ra_destpair(ASMState *as, IRIns *ir) 822static void ra_destpair(ASMState *as, IRIns *ir)
747{ 823{
748 Reg destlo = ir->r, desthi = (ir+1)->r; 824 Reg destlo = ir->r, desthi = (ir+1)->r;
825 IRIns *irx = (LJ_64 && !irt_is64(ir->t)) ? ir+1 : ir;
749 /* First spill unrelated refs blocking the destination registers. */ 826 /* First spill unrelated refs blocking the destination registers. */
750 if (!rset_test(as->freeset, RID_RETLO) && 827 if (!rset_test(as->freeset, RID_RETLO) &&
751 destlo != RID_RETLO && desthi != RID_RETLO) 828 destlo != RID_RETLO && desthi != RID_RETLO)
@@ -769,29 +846,29 @@ static void ra_destpair(ASMState *as, IRIns *ir)
769 /* Check for conflicts and shuffle the registers as needed. */ 846 /* Check for conflicts and shuffle the registers as needed. */
770 if (destlo == RID_RETHI) { 847 if (destlo == RID_RETHI) {
771 if (desthi == RID_RETLO) { 848 if (desthi == RID_RETLO) {
772#if LJ_TARGET_X86 849#if LJ_TARGET_X86ORX64
773 *--as->mcp = XI_XCHGa + RID_RETHI; 850 *--as->mcp = XI_XCHGa + RID_RETHI;
851 if (LJ_64 && irt_is64(irx->t)) *--as->mcp = 0x48;
774#else 852#else
775 emit_movrr(as, ir, RID_RETHI, RID_TMP); 853 emit_movrr(as, irx, RID_RETHI, RID_TMP);
776 emit_movrr(as, ir, RID_RETLO, RID_RETHI); 854 emit_movrr(as, irx, RID_RETLO, RID_RETHI);
777 emit_movrr(as, ir, RID_TMP, RID_RETLO); 855 emit_movrr(as, irx, RID_TMP, RID_RETLO);
778#endif 856#endif
779 } else { 857 } else {
780 emit_movrr(as, ir, RID_RETHI, RID_RETLO); 858 emit_movrr(as, irx, RID_RETHI, RID_RETLO);
781 if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); 859 if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI);
782 } 860 }
783 } else if (desthi == RID_RETLO) { 861 } else if (desthi == RID_RETLO) {
784 emit_movrr(as, ir, RID_RETLO, RID_RETHI); 862 emit_movrr(as, irx, RID_RETLO, RID_RETHI);
785 if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); 863 if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO);
786 } else { 864 } else {
787 if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); 865 if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI);
788 if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); 866 if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO);
789 } 867 }
790 /* Restore spill slots (if any). */ 868 /* Restore spill slots (if any). */
791 if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI); 869 if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI);
792 if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO); 870 if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO);
793} 871}
794#endif
795 872
796/* -- Snapshot handling --------- ----------------------------------------- */ 873/* -- Snapshot handling --------- ----------------------------------------- */
797 874
@@ -841,11 +918,14 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref)
841#endif 918#endif
842 { /* Allocate stored values for TNEW, TDUP and CNEW. */ 919 { /* Allocate stored values for TNEW, TDUP and CNEW. */
843 IRIns *irs; 920 IRIns *irs;
844 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW); 921 lj_assertA(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW,
922 "sink of IR %04d has bad op %d", ref - REF_BIAS, ir->o);
845 for (irs = IR(as->snapref-1); irs > ir; irs--) 923 for (irs = IR(as->snapref-1); irs > ir; irs--)
846 if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) { 924 if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) {
847 lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || 925 lj_assertA(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
848 irs->o == IR_FSTORE || irs->o == IR_XSTORE); 926 irs->o == IR_FSTORE || irs->o == IR_XSTORE,
927 "sunk store IR %04d has bad op %d",
928 (int)(irs - as->ir) - REF_BIAS, irs->o);
849 asm_snap_alloc1(as, irs->op2); 929 asm_snap_alloc1(as, irs->op2);
850 if (LJ_32 && (irs+1)->o == IR_HIOP) 930 if (LJ_32 && (irs+1)->o == IR_HIOP)
851 asm_snap_alloc1(as, (irs+1)->op2); 931 asm_snap_alloc1(as, (irs+1)->op2);
@@ -881,9 +961,9 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref)
881} 961}
882 962
883/* Allocate refs escaping to a snapshot. */ 963/* Allocate refs escaping to a snapshot. */
884static void asm_snap_alloc(ASMState *as) 964static void asm_snap_alloc(ASMState *as, int snapno)
885{ 965{
886 SnapShot *snap = &as->T->snap[as->snapno]; 966 SnapShot *snap = &as->T->snap[snapno];
887 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 967 SnapEntry *map = &as->T->snapmap[snap->mapofs];
888 MSize n, nent = snap->nent; 968 MSize n, nent = snap->nent;
889 as->snapfilt1 = as->snapfilt2 = 0; 969 as->snapfilt1 = as->snapfilt2 = 0;
@@ -893,7 +973,9 @@ static void asm_snap_alloc(ASMState *as)
893 if (!irref_isk(ref)) { 973 if (!irref_isk(ref)) {
894 asm_snap_alloc1(as, ref); 974 asm_snap_alloc1(as, ref);
895 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { 975 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
896 lua_assert(irt_type(IR(ref+1)->t) == IRT_SOFTFP); 976 lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP,
977 "snap %d[%d] points to bad SOFTFP IR %04d",
978 snapno, n, ref - REF_BIAS);
897 asm_snap_alloc1(as, ref+1); 979 asm_snap_alloc1(as, ref+1);
898 } 980 }
899 } 981 }
@@ -919,67 +1001,55 @@ static int asm_snap_checkrename(ASMState *as, IRRef ren)
919 return 0; /* Not found. */ 1001 return 0; /* Not found. */
920} 1002}
921 1003
922/* Prepare snapshot for next guard instruction. */ 1004/* Prepare snapshot for next guard or throwing instruction. */
923static void asm_snap_prep(ASMState *as) 1005static void asm_snap_prep(ASMState *as)
924{ 1006{
925 if (as->curins < as->snapref) { 1007 if (as->snapalloc) {
926 do { 1008 /* Alloc on first invocation for each snapshot. */
927 if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */ 1009 as->snapalloc = 0;
928 as->snapno--; 1010 asm_snap_alloc(as, as->snapno);
929 as->snapref = as->T->snap[as->snapno].ref;
930 } while (as->curins < as->snapref);
931 asm_snap_alloc(as);
932 as->snaprename = as->T->nins; 1011 as->snaprename = as->T->nins;
933 } else { 1012 } else {
934 /* Process any renames above the highwater mark. */ 1013 /* Check any renames above the highwater mark. */
935 for (; as->snaprename < as->T->nins; as->snaprename++) { 1014 for (; as->snaprename < as->T->nins; as->snaprename++) {
936 IRIns *ir = IR(as->snaprename); 1015 IRIns *ir = &as->T->ir[as->snaprename];
937 if (asm_snap_checkrename(as, ir->op1)) 1016 if (asm_snap_checkrename(as, ir->op1))
938 ir->op2 = REF_BIAS-1; /* Kill rename. */ 1017 ir->op2 = REF_BIAS-1; /* Kill rename. */
939 } 1018 }
940 } 1019 }
941} 1020}
942 1021
943/* -- Miscellaneous helpers ----------------------------------------------- */ 1022/* Move to previous snapshot when we cross the current snapshot ref. */
944 1023static void asm_snap_prev(ASMState *as)
945/* Collect arguments from CALL* and CARG instructions. */
946static void asm_collectargs(ASMState *as, IRIns *ir,
947 const CCallInfo *ci, IRRef *args)
948{ 1024{
949 uint32_t n = CCI_NARGS(ci); 1025 if (as->curins < as->snapref) {
950 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ 1026 uintptr_t ofs = (uintptr_t)(as->mctoporig - as->mcp);
951 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } 1027 if (ofs >= 0x10000) lj_trace_err(as->J, LJ_TRERR_MCODEOV);
952 while (n-- > 1) { 1028 do {
953 ir = IR(ir->op1); 1029 if (as->snapno == 0) return;
954 lua_assert(ir->o == IR_CARG); 1030 as->snapno--;
955 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; 1031 as->snapref = as->T->snap[as->snapno].ref;
1032 as->T->snap[as->snapno].mcofs = (uint16_t)ofs; /* Remember mcode ofs. */
1033 } while (as->curins < as->snapref); /* May have no ins inbetween. */
1034 as->snapalloc = 1;
956 } 1035 }
957 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
958 lua_assert(IR(ir->op1)->o != IR_CARG);
959} 1036}
960 1037
961/* Reconstruct CCallInfo flags for CALLX*. */ 1038/* Fixup snapshot mcode offsetst. */
962static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) 1039static void asm_snap_fixup_mcofs(ASMState *as)
963{ 1040{
964 uint32_t nargs = 0; 1041 uint32_t sz = (uint32_t)(as->mctoporig - as->mcp);
965 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ 1042 SnapShot *snap = as->T->snap;
966 IRIns *ira = IR(ir->op1); 1043 SnapNo i;
967 nargs++; 1044 for (i = as->T->nsnap-1; i > 0; i--) {
968 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } 1045 /* Compute offset from mcode start and store in correct snapshot. */
1046 snap[i].mcofs = (uint16_t)(sz - snap[i-1].mcofs);
969 } 1047 }
970#if LJ_HASFFI 1048 snap[0].mcofs = 0;
971 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
972 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
973 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
974 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
975#if LJ_TARGET_X86
976 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
977#endif
978 }
979#endif
980 return (nargs | (ir->t.irt << CCI_OTSHIFT));
981} 1049}
982 1050
1051/* -- Miscellaneous helpers ----------------------------------------------- */
1052
983/* Calculate stack adjustment. */ 1053/* Calculate stack adjustment. */
984static int32_t asm_stack_adjust(ASMState *as) 1054static int32_t asm_stack_adjust(ASMState *as)
985{ 1055{
@@ -989,21 +1059,26 @@ static int32_t asm_stack_adjust(ASMState *as)
989} 1059}
990 1060
991/* Must match with hash*() in lj_tab.c. */ 1061/* Must match with hash*() in lj_tab.c. */
992static uint32_t ir_khash(IRIns *ir) 1062static uint32_t ir_khash(ASMState *as, IRIns *ir)
993{ 1063{
994 uint32_t lo, hi; 1064 uint32_t lo, hi;
1065 UNUSED(as);
995 if (irt_isstr(ir->t)) { 1066 if (irt_isstr(ir->t)) {
996 return ir_kstr(ir)->hash; 1067 return ir_kstr(ir)->sid;
997 } else if (irt_isnum(ir->t)) { 1068 } else if (irt_isnum(ir->t)) {
998 lo = ir_knum(ir)->u32.lo; 1069 lo = ir_knum(ir)->u32.lo;
999 hi = ir_knum(ir)->u32.hi << 1; 1070 hi = ir_knum(ir)->u32.hi << 1;
1000 } else if (irt_ispri(ir->t)) { 1071 } else if (irt_ispri(ir->t)) {
1001 lua_assert(!irt_isnil(ir->t)); 1072 lj_assertA(!irt_isnil(ir->t), "hash of nil key");
1002 return irt_type(ir->t)-IRT_FALSE; 1073 return irt_type(ir->t)-IRT_FALSE;
1003 } else { 1074 } else {
1004 lua_assert(irt_isgcv(ir->t)); 1075 lj_assertA(irt_isgcv(ir->t), "hash of bad IR type %d", irt_type(ir->t));
1005 lo = u32ptr(ir_kgc(ir)); 1076 lo = u32ptr(ir_kgc(ir));
1077#if LJ_GC64
1078 hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15);
1079#else
1006 hi = lo + HASH_BIAS; 1080 hi = lo + HASH_BIAS;
1081#endif
1007 } 1082 }
1008 return hashrot(lo, hi); 1083 return hashrot(lo, hi);
1009} 1084}
@@ -1017,6 +1092,7 @@ static void asm_snew(ASMState *as, IRIns *ir)
1017{ 1092{
1018 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new]; 1093 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
1019 IRRef args[3]; 1094 IRRef args[3];
1095 asm_snap_prep(as);
1020 args[0] = ASMREF_L; /* lua_State *L */ 1096 args[0] = ASMREF_L; /* lua_State *L */
1021 args[1] = ir->op1; /* const char *str */ 1097 args[1] = ir->op1; /* const char *str */
1022 args[2] = ir->op2; /* size_t len */ 1098 args[2] = ir->op2; /* size_t len */
@@ -1029,6 +1105,7 @@ static void asm_tnew(ASMState *as, IRIns *ir)
1029{ 1105{
1030 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1]; 1106 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
1031 IRRef args[2]; 1107 IRRef args[2];
1108 asm_snap_prep(as);
1032 args[0] = ASMREF_L; /* lua_State *L */ 1109 args[0] = ASMREF_L; /* lua_State *L */
1033 args[1] = ASMREF_TMP1; /* uint32_t ahsize */ 1110 args[1] = ASMREF_TMP1; /* uint32_t ahsize */
1034 as->gcsteps++; 1111 as->gcsteps++;
@@ -1041,6 +1118,7 @@ static void asm_tdup(ASMState *as, IRIns *ir)
1041{ 1118{
1042 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup]; 1119 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
1043 IRRef args[2]; 1120 IRRef args[2];
1121 asm_snap_prep(as);
1044 args[0] = ASMREF_L; /* lua_State *L */ 1122 args[0] = ASMREF_L; /* lua_State *L */
1045 args[1] = ir->op1; /* const GCtab *kt */ 1123 args[1] = ir->op1; /* const GCtab *kt */
1046 as->gcsteps++; 1124 as->gcsteps++;
@@ -1064,6 +1142,260 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
1064 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ 1142 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */
1065} 1143}
1066 1144
1145/* -- Buffer operations --------------------------------------------------- */
1146
1147static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode);
1148#if LJ_HASBUFFER
1149static void asm_bufhdr_write(ASMState *as, Reg sb);
1150#endif
1151
1152static void asm_bufhdr(ASMState *as, IRIns *ir)
1153{
1154 Reg sb = ra_dest(as, ir, RSET_GPR);
1155 switch (ir->op2) {
1156 case IRBUFHDR_RESET: {
1157 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
1158 IRIns irbp;
1159 irbp.ot = IRT(0, IRT_PTR); /* Buffer data pointer type. */
1160 emit_storeofs(as, &irbp, tmp, sb, offsetof(SBuf, w));
1161 emit_loadofs(as, &irbp, tmp, sb, offsetof(SBuf, b));
1162 break;
1163 }
1164 case IRBUFHDR_APPEND: {
1165 /* Rematerialize const buffer pointer instead of likely spill. */
1166 IRIns *irp = IR(ir->op1);
1167 if (!(ra_hasreg(irp->r) || irp == ir-1 ||
1168 (irp == ir-2 && !ra_used(ir-1)))) {
1169 while (!(irp->o == IR_BUFHDR && irp->op2 == IRBUFHDR_RESET))
1170 irp = IR(irp->op1);
1171 if (irref_isk(irp->op1)) {
1172 ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
1173 ir = irp;
1174 }
1175 }
1176 break;
1177 }
1178#if LJ_HASBUFFER
1179 case IRBUFHDR_WRITE:
1180 asm_bufhdr_write(as, sb);
1181 break;
1182#endif
1183 default: lj_assertA(0, "bad BUFHDR op2 %d", ir->op2); break;
1184 }
1185#if LJ_TARGET_X86ORX64
1186 ra_left(as, sb, ir->op1);
1187#else
1188 ra_leftov(as, sb, ir->op1);
1189#endif
1190}
1191
1192static void asm_bufput(ASMState *as, IRIns *ir)
1193{
1194 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1195 IRRef args[3];
1196 IRIns *irs;
1197 int kchar = -129;
1198 args[0] = ir->op1; /* SBuf * */
1199 args[1] = ir->op2; /* GCstr * */
1200 irs = IR(ir->op2);
1201 lj_assertA(irt_isstr(irs->t),
1202 "BUFPUT of non-string IR %04d", ir->op2 - REF_BIAS);
1203 if (irs->o == IR_KGC) {
1204 GCstr *s = ir_kstr(irs);
1205 if (s->len == 1) { /* Optimize put of single-char string constant. */
1206 kchar = (int8_t)strdata(s)[0]; /* Signed! */
1207 args[1] = ASMREF_TMP1; /* int, truncated to char */
1208 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1209 }
1210 } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
1211 if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */
1212 if (irs->op2 == IRTOSTR_NUM) {
1213 args[1] = ASMREF_TMP1; /* TValue * */
1214 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
1215 } else {
1216 lj_assertA(irt_isinteger(IR(irs->op1)->t),
1217 "TOSTR of non-numeric IR %04d", irs->op1);
1218 args[1] = irs->op1; /* int */
1219 if (irs->op2 == IRTOSTR_INT)
1220 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
1221 else
1222 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1223 }
1224 } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */
1225 args[1] = irs->op1; /* const void * */
1226 args[2] = irs->op2; /* MSize */
1227 ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
1228 }
1229 }
1230 asm_setupresult(as, ir, ci); /* SBuf * */
1231 asm_gencall(as, ci, args);
1232 if (args[1] == ASMREF_TMP1) {
1233 Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
1234 if (kchar == -129)
1235 asm_tvptr(as, tmp, irs->op1, IRTMPREF_IN1);
1236 else
1237 ra_allockreg(as, kchar, tmp);
1238 }
1239}
1240
1241static void asm_bufstr(ASMState *as, IRIns *ir)
1242{
1243 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
1244 IRRef args[1];
1245 args[0] = ir->op1; /* SBuf *sb */
1246 as->gcsteps++;
1247 asm_setupresult(as, ir, ci); /* GCstr * */
1248 asm_gencall(as, ci, args);
1249}
1250
1251/* -- Type conversions ---------------------------------------------------- */
1252
1253static void asm_tostr(ASMState *as, IRIns *ir)
1254{
1255 const CCallInfo *ci;
1256 IRRef args[2];
1257 asm_snap_prep(as);
1258 args[0] = ASMREF_L;
1259 as->gcsteps++;
1260 if (ir->op2 == IRTOSTR_NUM) {
1261 args[1] = ASMREF_TMP1; /* cTValue * */
1262 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
1263 } else {
1264 args[1] = ir->op1; /* int32_t k */
1265 if (ir->op2 == IRTOSTR_INT)
1266 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
1267 else
1268 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
1269 }
1270 asm_setupresult(as, ir, ci); /* GCstr * */
1271 asm_gencall(as, ci, args);
1272 if (ir->op2 == IRTOSTR_NUM)
1273 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1, IRTMPREF_IN1);
1274}
1275
1276#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
1277static void asm_conv64(ASMState *as, IRIns *ir)
1278{
1279 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1280 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1281 IRCallID id;
1282 IRRef args[2];
1283 lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP,
1284 "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS);
1285 args[LJ_BE] = (ir-1)->op1;
1286 args[LJ_LE] = ir->op1;
1287 if (st == IRT_NUM || st == IRT_FLOAT) {
1288 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
1289 ir--;
1290 } else {
1291 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1292 }
1293 {
1294#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1295 CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
1296 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
1297#else
1298 const CCallInfo *ci = &lj_ir_callinfo[id];
1299#endif
1300 asm_setupresult(as, ir, ci);
1301 asm_gencall(as, ci, args);
1302 }
1303}
1304#endif
1305
1306/* -- Memory references --------------------------------------------------- */
1307
1308static void asm_newref(ASMState *as, IRIns *ir)
1309{
1310 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1311 IRRef args[3];
1312 if (ir->r == RID_SINK)
1313 return;
1314 asm_snap_prep(as);
1315 args[0] = ASMREF_L; /* lua_State *L */
1316 args[1] = ir->op1; /* GCtab *t */
1317 args[2] = ASMREF_TMP1; /* cTValue *key */
1318 asm_setupresult(as, ir, ci); /* TValue * */
1319 asm_gencall(as, ci, args);
1320 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2, IRTMPREF_IN1);
1321}
1322
1323static void asm_tmpref(ASMState *as, IRIns *ir)
1324{
1325 Reg r = ra_dest(as, ir, RSET_GPR);
1326 asm_tvptr(as, r, ir->op1, ir->op2);
1327}
1328
1329static void asm_lref(ASMState *as, IRIns *ir)
1330{
1331 Reg r = ra_dest(as, ir, RSET_GPR);
1332#if LJ_TARGET_X86ORX64
1333 ra_left(as, r, ASMREF_L);
1334#else
1335 ra_leftov(as, r, ASMREF_L);
1336#endif
1337}
1338
1339/* -- Calls --------------------------------------------------------------- */
1340
1341/* Collect arguments from CALL* and CARG instructions. */
1342static void asm_collectargs(ASMState *as, IRIns *ir,
1343 const CCallInfo *ci, IRRef *args)
1344{
1345 uint32_t n = CCI_XNARGS(ci);
1346 /* Account for split args. */
1347 lj_assertA(n <= CCI_NARGS_MAX*2, "too many args %d to collect", n);
1348 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1349 while (n-- > 1) {
1350 ir = IR(ir->op1);
1351 lj_assertA(ir->o == IR_CARG, "malformed CALL arg tree");
1352 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
1353 }
1354 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
1355 lj_assertA(IR(ir->op1)->o != IR_CARG, "malformed CALL arg tree");
1356}
1357
1358/* Reconstruct CCallInfo flags for CALLX*. */
1359static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
1360{
1361 uint32_t nargs = 0;
1362 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
1363 IRIns *ira = IR(ir->op1);
1364 nargs++;
1365 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
1366 }
1367#if LJ_HASFFI
1368 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
1369 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
1370 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
1371 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
1372#if LJ_TARGET_X86
1373 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
1374#endif
1375 }
1376#endif
1377 return (nargs | (ir->t.irt << CCI_OTSHIFT));
1378}
1379
1380static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
1381{
1382 const CCallInfo *ci = &lj_ir_callinfo[id];
1383 IRRef args[2];
1384 args[0] = ir->op1;
1385 args[1] = ir->op2;
1386 asm_setupresult(as, ir, ci);
1387 asm_gencall(as, ci, args);
1388}
1389
1390static void asm_call(ASMState *as, IRIns *ir)
1391{
1392 IRRef args[CCI_NARGS_MAX];
1393 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1394 asm_collectargs(as, ir, ci, args);
1395 asm_setupresult(as, ir, ci);
1396 asm_gencall(as, ci, args);
1397}
1398
1067/* -- PHI and loop handling ----------------------------------------------- */ 1399/* -- PHI and loop handling ----------------------------------------------- */
1068 1400
1069/* Break a PHI cycle by renaming to a free register (evict if needed). */ 1401/* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1249,12 +1581,7 @@ static void asm_phi_fixup(ASMState *as)
1249 irt_clearmark(ir->t); 1581 irt_clearmark(ir->t);
1250 /* Left PHI gained a spill slot before the loop? */ 1582 /* Left PHI gained a spill slot before the loop? */
1251 if (ra_hasspill(ir->s)) { 1583 if (ra_hasspill(ir->s)) {
1252 IRRef ren; 1584 ra_addrename(as, r, lref, as->loopsnapno);
1253 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
1254 ren = tref_ref(lj_ir_emit(as->J));
1255 as->ir = as->T->ir; /* The IR may have been reallocated. */
1256 IR(ren)->r = (uint8_t)r;
1257 IR(ren)->s = SPS_NONE;
1258 } 1585 }
1259 } 1586 }
1260 rset_clear(work, r); 1587 rset_clear(work, r);
@@ -1329,6 +1656,8 @@ static void asm_loop(ASMState *as)
1329#include "lj_asm_x86.h" 1656#include "lj_asm_x86.h"
1330#elif LJ_TARGET_ARM 1657#elif LJ_TARGET_ARM
1331#include "lj_asm_arm.h" 1658#include "lj_asm_arm.h"
1659#elif LJ_TARGET_ARM64
1660#include "lj_asm_arm64.h"
1332#elif LJ_TARGET_PPC 1661#elif LJ_TARGET_PPC
1333#include "lj_asm_ppc.h" 1662#include "lj_asm_ppc.h"
1334#elif LJ_TARGET_MIPS 1663#elif LJ_TARGET_MIPS
@@ -1337,6 +1666,200 @@ static void asm_loop(ASMState *as)
1337#error "Missing assembler for target CPU" 1666#error "Missing assembler for target CPU"
1338#endif 1667#endif
1339 1668
1669/* -- Common instruction helpers ------------------------------------------ */
1670
1671#if !LJ_SOFTFP32
1672#if !LJ_TARGET_X86ORX64
1673#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1674#endif
1675
1676static void asm_pow(ASMState *as, IRIns *ir)
1677{
1678#if LJ_64 && LJ_HASFFI
1679 if (!irt_isnum(ir->t))
1680 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1681 IRCALL_lj_carith_powu64);
1682 else
1683#endif
1684 asm_callid(as, ir, IRCALL_pow);
1685}
1686
1687static void asm_div(ASMState *as, IRIns *ir)
1688{
1689#if LJ_64 && LJ_HASFFI
1690 if (!irt_isnum(ir->t))
1691 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1692 IRCALL_lj_carith_divu64);
1693 else
1694#endif
1695 asm_fpdiv(as, ir);
1696}
1697#endif
1698
1699static void asm_mod(ASMState *as, IRIns *ir)
1700{
1701#if LJ_64 && LJ_HASFFI
1702 if (!irt_isint(ir->t))
1703 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1704 IRCALL_lj_carith_modu64);
1705 else
1706#endif
1707 asm_callid(as, ir, IRCALL_lj_vm_modi);
1708}
1709
1710static void asm_fuseequal(ASMState *as, IRIns *ir)
1711{
1712 /* Fuse HREF + EQ/NE. */
1713 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1714 as->curins--;
1715 asm_href(as, ir-1, (IROp)ir->o);
1716 } else {
1717 asm_equal(as, ir);
1718 }
1719}
1720
1721static void asm_alen(ASMState *as, IRIns *ir)
1722{
1723 asm_callid(as, ir, ir->op2 == REF_NIL ? IRCALL_lj_tab_len :
1724 IRCALL_lj_tab_len_hint);
1725}
1726
1727/* -- Instruction dispatch ------------------------------------------------ */
1728
1729/* Assemble a single instruction. */
1730static void asm_ir(ASMState *as, IRIns *ir)
1731{
1732 switch ((IROp)ir->o) {
1733 /* Miscellaneous ops. */
1734 case IR_LOOP: asm_loop(as); break;
1735 case IR_NOP: case IR_XBAR:
1736 lj_assertA(!ra_used(ir),
1737 "IR %04d not unused", (int)(ir - as->ir) - REF_BIAS);
1738 break;
1739 case IR_USE:
1740 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1741 case IR_PHI: asm_phi(as, ir); break;
1742 case IR_HIOP: asm_hiop(as, ir); break;
1743 case IR_GCSTEP: asm_gcstep(as, ir); break;
1744 case IR_PROF: asm_prof(as, ir); break;
1745
1746 /* Guarded assertions. */
1747 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1748 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1749 case IR_ABC:
1750 asm_comp(as, ir);
1751 break;
1752 case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break;
1753
1754 case IR_RETF: asm_retf(as, ir); break;
1755
1756 /* Bit ops. */
1757 case IR_BNOT: asm_bnot(as, ir); break;
1758 case IR_BSWAP: asm_bswap(as, ir); break;
1759 case IR_BAND: asm_band(as, ir); break;
1760 case IR_BOR: asm_bor(as, ir); break;
1761 case IR_BXOR: asm_bxor(as, ir); break;
1762 case IR_BSHL: asm_bshl(as, ir); break;
1763 case IR_BSHR: asm_bshr(as, ir); break;
1764 case IR_BSAR: asm_bsar(as, ir); break;
1765 case IR_BROL: asm_brol(as, ir); break;
1766 case IR_BROR: asm_bror(as, ir); break;
1767
1768 /* Arithmetic ops. */
1769 case IR_ADD: asm_add(as, ir); break;
1770 case IR_SUB: asm_sub(as, ir); break;
1771 case IR_MUL: asm_mul(as, ir); break;
1772 case IR_MOD: asm_mod(as, ir); break;
1773 case IR_NEG: asm_neg(as, ir); break;
1774#if LJ_SOFTFP32
1775 case IR_DIV: case IR_POW: case IR_ABS:
1776 case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
1777 /* Unused for LJ_SOFTFP32. */
1778 lj_assertA(0, "IR %04d with unused op %d",
1779 (int)(ir - as->ir) - REF_BIAS, ir->o);
1780 break;
1781#else
1782 case IR_DIV: asm_div(as, ir); break;
1783 case IR_POW: asm_pow(as, ir); break;
1784 case IR_ABS: asm_abs(as, ir); break;
1785 case IR_LDEXP: asm_ldexp(as, ir); break;
1786 case IR_FPMATH: asm_fpmath(as, ir); break;
1787 case IR_TOBIT: asm_tobit(as, ir); break;
1788#endif
1789 case IR_MIN: asm_min(as, ir); break;
1790 case IR_MAX: asm_max(as, ir); break;
1791
1792 /* Overflow-checking arithmetic ops. */
1793 case IR_ADDOV: asm_addov(as, ir); break;
1794 case IR_SUBOV: asm_subov(as, ir); break;
1795 case IR_MULOV: asm_mulov(as, ir); break;
1796
1797 /* Memory references. */
1798 case IR_AREF: asm_aref(as, ir); break;
1799 case IR_HREF: asm_href(as, ir, 0); break;
1800 case IR_HREFK: asm_hrefk(as, ir); break;
1801 case IR_NEWREF: asm_newref(as, ir); break;
1802 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1803 case IR_FREF: asm_fref(as, ir); break;
1804 case IR_TMPREF: asm_tmpref(as, ir); break;
1805 case IR_STRREF: asm_strref(as, ir); break;
1806 case IR_LREF: asm_lref(as, ir); break;
1807
1808 /* Loads and stores. */
1809 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1810 asm_ahuvload(as, ir);
1811 break;
1812 case IR_FLOAD: asm_fload(as, ir); break;
1813 case IR_XLOAD: asm_xload(as, ir); break;
1814 case IR_SLOAD: asm_sload(as, ir); break;
1815 case IR_ALEN: asm_alen(as, ir); break;
1816
1817 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1818 case IR_FSTORE: asm_fstore(as, ir); break;
1819 case IR_XSTORE: asm_xstore(as, ir); break;
1820
1821 /* Allocations. */
1822 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1823 case IR_TNEW: asm_tnew(as, ir); break;
1824 case IR_TDUP: asm_tdup(as, ir); break;
1825 case IR_CNEW: case IR_CNEWI:
1826#if LJ_HASFFI
1827 asm_cnew(as, ir);
1828#else
1829 lj_assertA(0, "IR %04d with unused op %d",
1830 (int)(ir - as->ir) - REF_BIAS, ir->o);
1831#endif
1832 break;
1833
1834 /* Buffer operations. */
1835 case IR_BUFHDR: asm_bufhdr(as, ir); break;
1836 case IR_BUFPUT: asm_bufput(as, ir); break;
1837 case IR_BUFSTR: asm_bufstr(as, ir); break;
1838
1839 /* Write barriers. */
1840 case IR_TBAR: asm_tbar(as, ir); break;
1841 case IR_OBAR: asm_obar(as, ir); break;
1842
1843 /* Type conversions. */
1844 case IR_CONV: asm_conv(as, ir); break;
1845 case IR_TOSTR: asm_tostr(as, ir); break;
1846 case IR_STRTO: asm_strto(as, ir); break;
1847
1848 /* Calls. */
1849 case IR_CALLA:
1850 as->gcsteps++;
1851 /* fallthrough */
1852 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1853 case IR_CALLXS: asm_callx(as, ir); break;
1854 case IR_CARG: break;
1855
1856 default:
1857 setintV(&as->J->errinfo, ir->o);
1858 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1859 break;
1860 }
1861}
1862
1340/* -- Head of trace ------------------------------------------------------- */ 1863/* -- Head of trace ------------------------------------------------------- */
1341 1864
1342/* Head of a root trace. */ 1865/* Head of a root trace. */
@@ -1375,8 +1898,7 @@ static void asm_head_side(ASMState *as)
1375 1898
1376 if (as->snapno && as->topslot > as->parent->topslot) { 1899 if (as->snapno && as->topslot > as->parent->topslot) {
1377 /* Force snap #0 alloc to prevent register overwrite in stack check. */ 1900 /* Force snap #0 alloc to prevent register overwrite in stack check. */
1378 as->snapno = 0; 1901 asm_snap_alloc(as, 0);
1379 asm_snap_alloc(as);
1380 } 1902 }
1381 pbase = asm_head_side_base(as, irp); 1903 pbase = asm_head_side_base(as, irp);
1382 if (pbase != RID_NONE) { 1904 if (pbase != RID_NONE) {
@@ -1388,8 +1910,10 @@ static void asm_head_side(ASMState *as)
1388 for (i = as->stopins; i > REF_BASE; i--) { 1910 for (i = as->stopins; i > REF_BASE; i--) {
1389 IRIns *ir = IR(i); 1911 IRIns *ir = IR(i);
1390 RegSP rs; 1912 RegSP rs;
1391 lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || 1913 lj_assertA((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
1392 (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL); 1914 (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL,
1915 "IR %04d has bad parent op %d",
1916 (int)(ir - as->ir) - REF_BIAS, ir->o);
1393 rs = as->parentmap[i - REF_FIRST]; 1917 rs = as->parentmap[i - REF_FIRST];
1394 if (ra_hasreg(ir->r)) { 1918 if (ra_hasreg(ir->r)) {
1395 rset_clear(allow, ir->r); 1919 rset_clear(allow, ir->r);
@@ -1542,7 +2066,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
1542 SnapEntry sn = map[n-1]; 2066 SnapEntry sn = map[n-1];
1543 if ((sn & SNAP_FRAME)) { 2067 if ((sn & SNAP_FRAME)) {
1544 *gotframe = 1; 2068 *gotframe = 1;
1545 return snap_slot(sn); 2069 return snap_slot(sn) - LJ_FR2;
1546 } 2070 }
1547 } 2071 }
1548 return 0; 2072 return 0;
@@ -1562,19 +2086,23 @@ static void asm_tail_link(ASMState *as)
1562 2086
1563 if (as->T->link == 0) { 2087 if (as->T->link == 0) {
1564 /* Setup fixed registers for exit to interpreter. */ 2088 /* Setup fixed registers for exit to interpreter. */
1565 const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); 2089 const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]);
1566 int32_t mres; 2090 int32_t mres;
1567 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ 2091 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */
1568 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; 2092 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
1569 if (bc_isret(bc_op(*retpc))) 2093 if (bc_isret(bc_op(*retpc)))
1570 pc = retpc; 2094 pc = retpc;
1571 } 2095 }
2096#if LJ_GC64
2097 emit_loadu64(as, RID_LPC, u64ptr(pc));
2098#else
1572 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); 2099 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
1573 ra_allockreg(as, i32ptr(pc), RID_LPC); 2100 ra_allockreg(as, i32ptr(pc), RID_LPC);
1574 mres = (int32_t)(snap->nslots - baseslot); 2101#endif
2102 mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
1575 switch (bc_op(*pc)) { 2103 switch (bc_op(*pc)) {
1576 case BC_CALLM: case BC_CALLMT: 2104 case BC_CALLM: case BC_CALLMT:
1577 mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; 2105 mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
1578 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; 2106 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
1579 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; 2107 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
1580 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; 2108 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
@@ -1586,6 +2114,11 @@ static void asm_tail_link(ASMState *as)
1586 } 2114 }
1587 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); 2115 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
1588 2116
2117 if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */
2118 setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal));
2119 IR(as->J->ktrace)->o = IR_KGC;
2120 }
2121
1589 /* Sync the interpreter state with the on-trace state. */ 2122 /* Sync the interpreter state with the on-trace state. */
1590 asm_stack_restore(as, snap); 2123 asm_stack_restore(as, snap);
1591 2124
@@ -1609,22 +2142,32 @@ static void asm_setup_regsp(ASMState *as)
1609#endif 2142#endif
1610 2143
1611 ra_setup(as); 2144 ra_setup(as);
2145#if LJ_TARGET_ARM64
2146 ra_setkref(as, RID_GL, (intptr_t)J2G(as->J));
2147#endif
1612 2148
1613 /* Clear reg/sp for constants. */ 2149 /* Clear reg/sp for constants. */
1614 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) 2150 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
1615 ir->prev = REGSP_INIT; 2151 ir->prev = REGSP_INIT;
2152 if (irt_is64(ir->t) && ir->o != IR_KNULL) {
2153#if LJ_GC64
2154 /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
2155 ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */
2156#else
2157 /* Make life easier for backends by putting address of constant in i. */
2158 ir->i = (int32_t)(intptr_t)(ir+1);
2159#endif
2160 ir++;
2161 }
2162 }
1616 2163
1617 /* REF_BASE is used for implicit references to the BASE register. */ 2164 /* REF_BASE is used for implicit references to the BASE register. */
1618 lastir->prev = REGSP_HINT(RID_BASE); 2165 lastir->prev = REGSP_HINT(RID_BASE);
1619 2166
1620 ir = IR(nins-1);
1621 if (ir->o == IR_RENAME) {
1622 do { ir--; nins--; } while (ir->o == IR_RENAME);
1623 T->nins = nins; /* Remove any renames left over from ASM restart. */
1624 }
1625 as->snaprename = nins; 2167 as->snaprename = nins;
1626 as->snapref = nins; 2168 as->snapref = nins;
1627 as->snapno = T->nsnap; 2169 as->snapno = T->nsnap;
2170 as->snapalloc = 0;
1628 2171
1629 as->stopins = REF_BASE; 2172 as->stopins = REF_BASE;
1630 as->orignins = nins; 2173 as->orignins = nins;
@@ -1634,7 +2177,7 @@ static void asm_setup_regsp(ASMState *as)
1634 ir = IR(REF_FIRST); 2177 ir = IR(REF_FIRST);
1635 if (as->parent) { 2178 if (as->parent) {
1636 uint16_t *p; 2179 uint16_t *p;
1637 lastir = lj_snap_regspmap(as->parent, as->J->exitno, ir); 2180 lastir = lj_snap_regspmap(as->J, as->parent, as->J->exitno, ir);
1638 if (lastir - ir > LJ_MAX_JSLOTS) 2181 if (lastir - ir > LJ_MAX_JSLOTS)
1639 lj_trace_err(as->J, LJ_TRERR_NYICOAL); 2182 lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1640 as->stopins = (IRRef)((lastir-1) - as->ir); 2183 as->stopins = (IRRef)((lastir-1) - as->ir);
@@ -1673,6 +2216,10 @@ static void asm_setup_regsp(ASMState *as)
1673 ir->prev = (uint16_t)REGSP_HINT((rload & 15)); 2216 ir->prev = (uint16_t)REGSP_HINT((rload & 15));
1674 rload = lj_ror(rload, 4); 2217 rload = lj_ror(rload, 4);
1675 continue; 2218 continue;
2219 case IR_TMPREF:
2220 if ((ir->op2 & IRTMPREF_OUT2) && as->evenspill < 4)
2221 as->evenspill = 4; /* TMPREF OUT2 needs two TValues on the stack. */
2222 break;
1676#endif 2223#endif
1677 case IR_CALLXS: { 2224 case IR_CALLXS: {
1678 CCallInfo ci; 2225 CCallInfo ci;
@@ -1682,7 +2229,17 @@ static void asm_setup_regsp(ASMState *as)
1682 as->modset |= RSET_SCRATCH; 2229 as->modset |= RSET_SCRATCH;
1683 continue; 2230 continue;
1684 } 2231 }
1685 case IR_CALLN: case IR_CALLL: case IR_CALLS: { 2232 case IR_CALLL:
2233 /* lj_vm_next needs two TValues on the stack. */
2234#if LJ_TARGET_X64 && LJ_ABI_WIN
2235 if (ir->op2 == IRCALL_lj_vm_next && as->evenspill < SPS_FIRST + 4)
2236 as->evenspill = SPS_FIRST + 4;
2237#else
2238 if (SPS_FIRST < 4 && ir->op2 == IRCALL_lj_vm_next && as->evenspill < 4)
2239 as->evenspill = 4;
2240#endif
2241 /* fallthrough */
2242 case IR_CALLN: case IR_CALLA: case IR_CALLS: {
1686 const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; 2243 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1687 ir->prev = asm_setup_call_slots(as, ir, ci); 2244 ir->prev = asm_setup_call_slots(as, ir, ci);
1688 if (inloop) 2245 if (inloop)
@@ -1690,7 +2247,6 @@ static void asm_setup_regsp(ASMState *as)
1690 (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; 2247 (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
1691 continue; 2248 continue;
1692 } 2249 }
1693#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
1694 case IR_HIOP: 2250 case IR_HIOP:
1695 switch ((ir-1)->o) { 2251 switch ((ir-1)->o) {
1696#if LJ_SOFTFP && LJ_TARGET_ARM 2252#if LJ_SOFTFP && LJ_TARGET_ARM
@@ -1701,15 +2257,15 @@ static void asm_setup_regsp(ASMState *as)
1701 } 2257 }
1702 break; 2258 break;
1703#endif 2259#endif
1704#if !LJ_SOFTFP && LJ_NEED_FP64 2260#if !LJ_SOFTFP && LJ_NEED_FP64 && LJ_32 && LJ_HASFFI
1705 case IR_CONV: 2261 case IR_CONV:
1706 if (irt_isfp((ir-1)->t)) { 2262 if (irt_isfp((ir-1)->t)) {
1707 ir->prev = REGSP_HINT(RID_FPRET); 2263 ir->prev = REGSP_HINT(RID_FPRET);
1708 continue; 2264 continue;
1709 } 2265 }
1710 /* fallthrough */
1711#endif 2266#endif
1712 case IR_CALLN: case IR_CALLXS: 2267 /* fallthrough */
2268 case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
1713#if LJ_SOFTFP 2269#if LJ_SOFTFP
1714 case IR_MIN: case IR_MAX: 2270 case IR_MIN: case IR_MAX:
1715#endif 2271#endif
@@ -1720,18 +2276,29 @@ static void asm_setup_regsp(ASMState *as)
1720 break; 2276 break;
1721 } 2277 }
1722 break; 2278 break;
1723#endif
1724#if LJ_SOFTFP 2279#if LJ_SOFTFP
1725 case IR_MIN: case IR_MAX: 2280 case IR_MIN: case IR_MAX:
1726 if ((ir+1)->o != IR_HIOP) break; 2281 if ((ir+1)->o != IR_HIOP) break;
1727#endif 2282#endif
1728 /* fallthrough */ 2283 /* fallthrough */
1729 /* C calls evict all scratch regs and return results in RID_RET. */ 2284 /* C calls evict all scratch regs and return results in RID_RET. */
1730 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: 2285 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
1731 if (REGARG_NUMGPR < 3 && as->evenspill < 3) 2286 if (REGARG_NUMGPR < 3 && as->evenspill < 3)
1732 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ 2287 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
2288#if LJ_TARGET_X86 && LJ_HASFFI
2289 if (0) {
2290 case IR_CNEW:
2291 if (ir->op2 != REF_NIL && as->evenspill < 4)
2292 as->evenspill = 4; /* lj_cdata_newv needs 4 args. */
2293 }
1733 /* fallthrough */ 2294 /* fallthrough */
1734 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: 2295#else
2296 /* fallthrough */
2297 case IR_CNEW:
2298#endif
2299 /* fallthrough */
2300 case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
2301 case IR_BUFSTR:
1735 ir->prev = REGSP_HINT(RID_RET); 2302 ir->prev = REGSP_HINT(RID_RET);
1736 if (inloop) 2303 if (inloop)
1737 as->modset = RSET_SCRATCH; 2304 as->modset = RSET_SCRATCH;
@@ -1740,58 +2307,73 @@ static void asm_setup_regsp(ASMState *as)
1740 if (inloop) 2307 if (inloop)
1741 as->modset = RSET_SCRATCH; 2308 as->modset = RSET_SCRATCH;
1742 break; 2309 break;
1743#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP 2310#if !LJ_SOFTFP
1744 case IR_ATAN2: case IR_LDEXP: 2311#if !LJ_TARGET_X86ORX64
2312 case IR_LDEXP:
1745#endif 2313#endif
2314#endif
2315 /* fallthrough */
1746 case IR_POW: 2316 case IR_POW:
1747 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 2317 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1748#if LJ_TARGET_X86ORX64
1749 ir->prev = REGSP_HINT(RID_XMM0);
1750 if (inloop) 2318 if (inloop)
1751 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); 2319 as->modset |= RSET_SCRATCH;
2320#if LJ_TARGET_X86
2321 if (irt_isnum(IR(ir->op2)->t)) {
2322 if (as->evenspill < 4) /* Leave room to call pow(). */
2323 as->evenspill = 4;
2324 }
2325 break;
1752#else 2326#else
1753 ir->prev = REGSP_HINT(RID_FPRET); 2327 ir->prev = REGSP_HINT(RID_FPRET);
1754 if (inloop)
1755 as->modset |= RSET_SCRATCH;
1756#endif
1757 continue; 2328 continue;
2329#endif
1758 } 2330 }
1759 /* fallthrough */ /* for integer POW */ 2331 /* fallthrough */ /* for integer POW */
1760 case IR_DIV: case IR_MOD: 2332 case IR_DIV: case IR_MOD:
1761 if (!irt_isnum(ir->t)) { 2333 if ((LJ_64 && LJ_SOFTFP) || !irt_isnum(ir->t)) {
1762 ir->prev = REGSP_HINT(RID_RET); 2334 ir->prev = REGSP_HINT(RID_RET);
1763 if (inloop) 2335 if (inloop)
1764 as->modset |= (RSET_SCRATCH & RSET_GPR); 2336 as->modset |= (RSET_SCRATCH & RSET_GPR);
1765 continue; 2337 continue;
1766 } 2338 }
1767 break; 2339 break;
1768 case IR_FPMATH: 2340#if LJ_64 && LJ_SOFTFP
1769#if LJ_TARGET_X86ORX64 2341 case IR_ADD: case IR_SUB: case IR_MUL:
1770 if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ 2342 if (irt_isnum(ir->t)) {
1771 ir->prev = REGSP_HINT(RID_XMM0); 2343 ir->prev = REGSP_HINT(RID_RET);
1772#if !LJ_64
1773 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */
1774 as->evenspill = 4;
1775#endif
1776 if (inloop)
1777 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1778 continue;
1779 } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
1780 ir->prev = REGSP_HINT(RID_XMM0);
1781 if (inloop) 2344 if (inloop)
1782 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); 2345 as->modset |= (RSET_SCRATCH & RSET_GPR);
1783 continue; 2346 continue;
1784 } 2347 }
1785 break; 2348 break;
1786#else 2349#endif
1787 ir->prev = REGSP_HINT(RID_FPRET); 2350 case IR_FPMATH:
2351#if LJ_TARGET_X86ORX64
2352 if (ir->op2 <= IRFPM_TRUNC) {
2353 if (!(as->flags & JIT_F_SSE4_1)) {
2354 ir->prev = REGSP_HINT(RID_XMM0);
2355 if (inloop)
2356 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
2357 continue;
2358 }
2359 break;
2360 }
2361#endif
1788 if (inloop) 2362 if (inloop)
1789 as->modset |= RSET_SCRATCH; 2363 as->modset |= RSET_SCRATCH;
2364#if LJ_TARGET_X86
2365 break;
2366#else
2367 ir->prev = REGSP_HINT(RID_FPRET);
1790 continue; 2368 continue;
1791#endif 2369#endif
1792#if LJ_TARGET_X86ORX64 2370#if LJ_TARGET_X86ORX64
1793 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ 2371 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */
1794 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: 2372 case IR_BSHL: case IR_BSHR: case IR_BSAR:
2373 if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */
2374 break;
2375 /* fallthrough */
2376 case IR_BROL: case IR_BROR:
1795 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { 2377 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
1796 IR(ir->op2)->r = REGSP_HINT(RID_ECX); 2378 IR(ir->op2)->r = REGSP_HINT(RID_ECX);
1797 if (inloop) 2379 if (inloop)
@@ -1835,16 +2417,26 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1835{ 2417{
1836 ASMState as_; 2418 ASMState as_;
1837 ASMState *as = &as_; 2419 ASMState *as = &as_;
1838 MCode *origtop; 2420
2421 /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
2422 {
2423 IRRef nins = T->nins;
2424 IRIns *ir = &T->ir[nins-1];
2425 if (ir->o == IR_NOP || ir->o == IR_RENAME) {
2426 do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME);
2427 T->nins = nins;
2428 }
2429 }
1839 2430
1840 /* Ensure an initialized instruction beyond the last one for HIOP checks. */ 2431 /* Ensure an initialized instruction beyond the last one for HIOP checks. */
1841 J->cur.nins = lj_ir_nextins(J); 2432 /* This also allows one RENAME to be added without reallocating curfinal. */
1842 lj_ir_nop(&J->cur.ir[J->cur.nins]); 2433 as->orignins = lj_ir_nextins(J);
2434 lj_ir_nop(&J->cur.ir[as->orignins]);
1843 2435
1844 /* Setup initial state. Copy some fields to reduce indirections. */ 2436 /* Setup initial state. Copy some fields to reduce indirections. */
1845 as->J = J; 2437 as->J = J;
1846 as->T = T; 2438 as->T = T;
1847 as->ir = T->ir; 2439 J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */
1848 as->flags = J->flags; 2440 as->flags = J->flags;
1849 as->loopref = J->loopref; 2441 as->loopref = J->loopref;
1850 as->realign = NULL; 2442 as->realign = NULL;
@@ -1852,17 +2444,46 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1852 as->parent = J->parent ? traceref(J, J->parent) : NULL; 2444 as->parent = J->parent ? traceref(J, J->parent) : NULL;
1853 2445
1854 /* Reserve MCode memory. */ 2446 /* Reserve MCode memory. */
1855 as->mctop = origtop = lj_mcode_reserve(J, &as->mcbot); 2447 as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot);
1856 as->mcp = as->mctop; 2448 as->mcp = as->mctop;
1857 as->mclim = as->mcbot + MCLIM_REDZONE; 2449 as->mclim = as->mcbot + MCLIM_REDZONE;
1858 asm_setup_target(as); 2450 asm_setup_target(as);
1859 2451
1860 do { 2452 /*
2453 ** This is a loop, because the MCode may have to be (re-)assembled
2454 ** multiple times:
2455 **
2456 ** 1. as->realign is set (and the assembly aborted), if the arch-specific
2457 ** backend wants the MCode to be aligned differently.
2458 **
2459 ** This is currently only the case on x86/x64, where small loops get
2460 ** an aligned loop body plus a short branch. Not much effort is wasted,
2461 ** because the abort happens very quickly and only once.
2462 **
2463 ** 2. The IR is immovable, since the MCode embeds pointers to various
2464 ** constants inside the IR. But RENAMEs may need to be added to the IR
2465 ** during assembly, which might grow and reallocate the IR. We check
2466 ** at the end if the IR (in J->cur.ir) has actually grown, resize the
2467 ** copy (in J->curfinal.ir) and try again.
2468 **
2469 ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have
2470 ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to
2471 ** always have one spare slot in the IR (see above), which means we
2472 ** have to redo the assembly for only ~2% of all traces.
2473 **
2474 ** Very, very rarely, this needs to be done repeatedly, since the
2475 ** location of constants inside the IR (actually, reachability from
2476 ** a global pointer) may affect register allocation and thus the
2477 ** number of RENAMEs.
2478 */
2479 for (;;) {
1861 as->mcp = as->mctop; 2480 as->mcp = as->mctop;
1862#ifdef LUA_USE_ASSERT 2481#ifdef LUA_USE_ASSERT
1863 as->mcp_prev = as->mcp; 2482 as->mcp_prev = as->mcp;
1864#endif 2483#endif
1865 as->curins = T->nins; 2484 as->ir = J->curfinal->ir; /* Use the copied IR. */
2485 as->curins = J->cur.nins = as->orignins;
2486
1866 RA_DBG_START(); 2487 RA_DBG_START();
1867 RA_DBGX((as, "===== STOP =====")); 2488 RA_DBGX((as, "===== STOP ====="));
1868 2489
@@ -1881,7 +2502,11 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1881 /* Assemble a trace in linear backwards order. */ 2502 /* Assemble a trace in linear backwards order. */
1882 for (as->curins--; as->curins > as->stopins; as->curins--) { 2503 for (as->curins--; as->curins > as->stopins; as->curins--) {
1883 IRIns *ir = IR(as->curins); 2504 IRIns *ir = IR(as->curins);
1884 lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ 2505 /* 64 bit types handled by SPLIT for 32 bit archs. */
2506 lj_assertA(!(LJ_32 && irt_isint64(ir->t)),
2507 "IR %04d has unsplit 64 bit type",
2508 (int)(ir - as->ir) - REF_BIAS);
2509 asm_snap_prev(as);
1885 if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) 2510 if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE))
1886 continue; /* Dead-code elimination can be soooo easy. */ 2511 continue; /* Dead-code elimination can be soooo easy. */
1887 if (irt_isguard(ir->t)) 2512 if (irt_isguard(ir->t))
@@ -1890,22 +2515,43 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1890 checkmclim(as); 2515 checkmclim(as);
1891 asm_ir(as, ir); 2516 asm_ir(as, ir);
1892 } 2517 }
1893 } while (as->realign); /* Retry in case the MCode needs to be realigned. */
1894 2518
1895 /* Emit head of trace. */ 2519 if (as->realign && J->curfinal->nins >= T->nins)
1896 RA_DBG_REF(); 2520 continue; /* Retry in case only the MCode needs to be realigned. */
1897 checkmclim(as); 2521
1898 if (as->gcsteps > 0) { 2522 /* Emit head of trace. */
1899 as->curins = as->T->snap[0].ref; 2523 RA_DBG_REF();
1900 asm_snap_prep(as); /* The GC check is a guard. */ 2524 checkmclim(as);
1901 asm_gc_check(as); 2525 if (as->gcsteps > 0) {
2526 as->curins = as->T->snap[0].ref;
2527 asm_snap_prep(as); /* The GC check is a guard. */
2528 asm_gc_check(as);
2529 as->curins = as->stopins;
2530 }
2531 ra_evictk(as);
2532 if (as->parent)
2533 asm_head_side(as);
2534 else
2535 asm_head_root(as);
2536 asm_phi_fixup(as);
2537
2538 if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */
2539 lj_assertA(J->curfinal->nk == T->nk, "unexpected IR constant growth");
2540 memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
2541 (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */
2542 T->nins = J->curfinal->nins;
2543 /* Fill mcofs of any unprocessed snapshots. */
2544 as->curins = REF_FIRST;
2545 asm_snap_prev(as);
2546 break; /* Done. */
2547 }
2548
2549 /* Otherwise try again with a bigger IR. */
2550 lj_trace_free(J2G(J), J->curfinal);
2551 J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */
2552 J->curfinal = lj_trace_alloc(J->L, T);
2553 as->realign = NULL;
1902 } 2554 }
1903 ra_evictk(as);
1904 if (as->parent)
1905 asm_head_side(as);
1906 else
1907 asm_head_root(as);
1908 asm_phi_fixup(as);
1909 2555
1910 RA_DBGX((as, "===== START ====")); 2556 RA_DBGX((as, "===== START ===="));
1911 RA_DBG_FLUSH(); 2557 RA_DBG_FLUSH();
@@ -1915,10 +2561,16 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1915 /* Set trace entry point before fixing up tail to allow link to self. */ 2561 /* Set trace entry point before fixing up tail to allow link to self. */
1916 T->mcode = as->mcp; 2562 T->mcode = as->mcp;
1917 T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0; 2563 T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0;
1918 if (!as->loopref) 2564 if (as->loopref)
2565 asm_loop_tail_fixup(as);
2566 else
1919 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ 2567 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
1920 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); 2568 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
1921 lj_mcode_sync(T->mcode, origtop); 2569 asm_snap_fixup_mcofs(as);
2570#if LJ_TARGET_MCODE_FIXUP
2571 asm_mcode_fixup(T->mcode, T->szmcode);
2572#endif
2573 lj_mcode_sync(T->mcode, as->mctoporig);
1922} 2574}
1923 2575
1924#undef IR 2576#undef IR