diff options
Diffstat (limited to 'src/lj_asm.c')
-rw-r--r-- | src/lj_asm.c | 870 |
1 files changed, 716 insertions, 154 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 60be4337..aae7b5b9 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -90,12 +90,18 @@ typedef struct ASMState { | |||
90 | MCode *realign; /* Realign loop if not NULL. */ | 90 | MCode *realign; /* Realign loop if not NULL. */ |
91 | 91 | ||
92 | #ifdef RID_NUM_KREF | 92 | #ifdef RID_NUM_KREF |
93 | int32_t krefk[RID_NUM_KREF]; | 93 | intptr_t krefk[RID_NUM_KREF]; |
94 | #endif | 94 | #endif |
95 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ | 95 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ |
96 | uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ | 96 | uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ |
97 | } ASMState; | 97 | } ASMState; |
98 | 98 | ||
99 | #ifdef LUA_USE_ASSERT | ||
100 | #define lj_assertA(c, ...) lj_assertG_(J2G(as->J), (c), __VA_ARGS__) | ||
101 | #else | ||
102 | #define lj_assertA(c, ...) ((void)as) | ||
103 | #endif | ||
104 | |||
99 | #define IR(ref) (&as->ir[(ref)]) | 105 | #define IR(ref) (&as->ir[(ref)]) |
100 | 106 | ||
101 | #define ASMREF_TMP1 REF_TRUE /* Temp. register. */ | 107 | #define ASMREF_TMP1 REF_TRUE /* Temp. register. */ |
@@ -127,9 +133,8 @@ static LJ_AINLINE void checkmclim(ASMState *as) | |||
127 | #ifdef LUA_USE_ASSERT | 133 | #ifdef LUA_USE_ASSERT |
128 | if (as->mcp + MCLIM_REDZONE < as->mcp_prev) { | 134 | if (as->mcp + MCLIM_REDZONE < as->mcp_prev) { |
129 | IRIns *ir = IR(as->curins+1); | 135 | IRIns *ir = IR(as->curins+1); |
130 | fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d %02d %04d %04d\n", as->mcp, | 136 | lj_assertA(0, "red zone overflow: %p IR %04d %02d %04d %04d\n", as->mcp, |
131 | as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); | 137 | as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); |
132 | lua_assert(0); | ||
133 | } | 138 | } |
134 | #endif | 139 | #endif |
135 | if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as); | 140 | if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as); |
@@ -143,7 +148,7 @@ static LJ_AINLINE void checkmclim(ASMState *as) | |||
143 | #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) | 148 | #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) |
144 | #define ra_krefk(as, ref) (as->krefk[(ref)]) | 149 | #define ra_krefk(as, ref) (as->krefk[(ref)]) |
145 | 150 | ||
146 | static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) | 151 | static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k) |
147 | { | 152 | { |
148 | IRRef ref = (IRRef)(r - RID_MIN_KREF); | 153 | IRRef ref = (IRRef)(r - RID_MIN_KREF); |
149 | as->krefk[ref] = k; | 154 | as->krefk[ref] = k; |
@@ -170,6 +175,8 @@ IRFLDEF(FLOFS) | |||
170 | #include "lj_emit_x86.h" | 175 | #include "lj_emit_x86.h" |
171 | #elif LJ_TARGET_ARM | 176 | #elif LJ_TARGET_ARM |
172 | #include "lj_emit_arm.h" | 177 | #include "lj_emit_arm.h" |
178 | #elif LJ_TARGET_ARM64 | ||
179 | #include "lj_emit_arm64.h" | ||
173 | #elif LJ_TARGET_PPC | 180 | #elif LJ_TARGET_PPC |
174 | #include "lj_emit_ppc.h" | 181 | #include "lj_emit_ppc.h" |
175 | #elif LJ_TARGET_MIPS | 182 | #elif LJ_TARGET_MIPS |
@@ -178,6 +185,12 @@ IRFLDEF(FLOFS) | |||
178 | #error "Missing instruction emitter for target CPU" | 185 | #error "Missing instruction emitter for target CPU" |
179 | #endif | 186 | #endif |
180 | 187 | ||
188 | /* Generic load/store of register from/to stack slot. */ | ||
189 | #define emit_spload(as, ir, r, ofs) \ | ||
190 | emit_loadofs(as, ir, (r), RID_SP, (ofs)) | ||
191 | #define emit_spstore(as, ir, r, ofs) \ | ||
192 | emit_storeofs(as, ir, (r), RID_SP, (ofs)) | ||
193 | |||
181 | /* -- Register allocator debugging ---------------------------------------- */ | 194 | /* -- Register allocator debugging ---------------------------------------- */ |
182 | 195 | ||
183 | /* #define LUAJIT_DEBUG_RA */ | 196 | /* #define LUAJIT_DEBUG_RA */ |
@@ -235,7 +248,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) | |||
235 | *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q; | 248 | *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q; |
236 | } else { | 249 | } else { |
237 | *p++ = '?'; | 250 | *p++ = '?'; |
238 | lua_assert(0); | 251 | lj_assertA(0, "bad register %d for debug format \"%s\"", r, fmt); |
239 | } | 252 | } |
240 | } else if (e[1] == 'f' || e[1] == 'i') { | 253 | } else if (e[1] == 'f' || e[1] == 'i') { |
241 | IRRef ref; | 254 | IRRef ref; |
@@ -253,7 +266,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) | |||
253 | } else if (e[1] == 'x') { | 266 | } else if (e[1] == 'x') { |
254 | p += sprintf(p, "%08x", va_arg(argp, int32_t)); | 267 | p += sprintf(p, "%08x", va_arg(argp, int32_t)); |
255 | } else { | 268 | } else { |
256 | lua_assert(0); | 269 | lj_assertA(0, "bad debug format code"); |
257 | } | 270 | } |
258 | fmt = e+2; | 271 | fmt = e+2; |
259 | } | 272 | } |
@@ -312,37 +325,51 @@ static Reg ra_rematk(ASMState *as, IRRef ref) | |||
312 | Reg r; | 325 | Reg r; |
313 | if (ra_iskref(ref)) { | 326 | if (ra_iskref(ref)) { |
314 | r = ra_krefreg(ref); | 327 | r = ra_krefreg(ref); |
315 | lua_assert(!rset_test(as->freeset, r)); | 328 | lj_assertA(!rset_test(as->freeset, r), "rematk of free reg %d", r); |
316 | ra_free(as, r); | 329 | ra_free(as, r); |
317 | ra_modified(as, r); | 330 | ra_modified(as, r); |
331 | #if LJ_64 | ||
332 | emit_loadu64(as, r, ra_krefk(as, ref)); | ||
333 | #else | ||
318 | emit_loadi(as, r, ra_krefk(as, ref)); | 334 | emit_loadi(as, r, ra_krefk(as, ref)); |
335 | #endif | ||
319 | return r; | 336 | return r; |
320 | } | 337 | } |
321 | ir = IR(ref); | 338 | ir = IR(ref); |
322 | r = ir->r; | 339 | r = ir->r; |
323 | lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); | 340 | lj_assertA(ra_hasreg(r), "rematk of K%03d has no reg", REF_BIAS - ref); |
341 | lj_assertA(!ra_hasspill(ir->s), | ||
342 | "rematk of K%03d has spill slot [%x]", REF_BIAS - ref, ir->s); | ||
324 | ra_free(as, r); | 343 | ra_free(as, r); |
325 | ra_modified(as, r); | 344 | ra_modified(as, r); |
326 | ir->r = RID_INIT; /* Do not keep any hint. */ | 345 | ir->r = RID_INIT; /* Do not keep any hint. */ |
327 | RA_DBGX((as, "remat $i $r", ir, r)); | 346 | RA_DBGX((as, "remat $i $r", ir, r)); |
328 | #if !LJ_SOFTFP | 347 | #if !LJ_SOFTFP32 |
329 | if (ir->o == IR_KNUM) { | 348 | if (ir->o == IR_KNUM) { |
330 | emit_loadn(as, r, ir_knum(ir)); | 349 | emit_loadk64(as, r, ir); |
331 | } else | 350 | } else |
332 | #endif | 351 | #endif |
333 | if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { | 352 | if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { |
334 | ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ | 353 | ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ |
335 | emit_getgl(as, r, jit_base); | 354 | emit_getgl(as, r, jit_base); |
336 | } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { | 355 | } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { |
337 | lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ | 356 | /* REF_NIL stores ASMREF_L register. */ |
338 | emit_getgl(as, r, jit_L); | 357 | lj_assertA(irt_isnil(ir->t), "rematk of bad ASMREF_L"); |
358 | emit_getgl(as, r, cur_L); | ||
339 | #if LJ_64 | 359 | #if LJ_64 |
340 | } else if (ir->o == IR_KINT64) { | 360 | } else if (ir->o == IR_KINT64) { |
341 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 361 | emit_loadu64(as, r, ir_kint64(ir)->u64); |
362 | #if LJ_GC64 | ||
363 | } else if (ir->o == IR_KGC) { | ||
364 | emit_loadu64(as, r, (uintptr_t)ir_kgc(ir)); | ||
365 | } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
366 | emit_loadu64(as, r, (uintptr_t)ir_kptr(ir)); | ||
367 | #endif | ||
342 | #endif | 368 | #endif |
343 | } else { | 369 | } else { |
344 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | 370 | lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC || |
345 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); | 371 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL, |
372 | "rematk of bad IR op %d", ir->o); | ||
346 | emit_loadi(as, r, ir->i); | 373 | emit_loadi(as, r, ir->i); |
347 | } | 374 | } |
348 | return r; | 375 | return r; |
@@ -352,7 +379,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref) | |||
352 | static int32_t ra_spill(ASMState *as, IRIns *ir) | 379 | static int32_t ra_spill(ASMState *as, IRIns *ir) |
353 | { | 380 | { |
354 | int32_t slot = ir->s; | 381 | int32_t slot = ir->s; |
355 | lua_assert(ir >= as->ir + REF_TRUE); | 382 | lj_assertA(ir >= as->ir + REF_TRUE, |
383 | "spill of K%03d", REF_BIAS - (int)(ir - as->ir)); | ||
356 | if (!ra_hasspill(slot)) { | 384 | if (!ra_hasspill(slot)) { |
357 | if (irt_is64(ir->t)) { | 385 | if (irt_is64(ir->t)) { |
358 | slot = as->evenspill; | 386 | slot = as->evenspill; |
@@ -377,7 +405,9 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref) | |||
377 | { | 405 | { |
378 | IRIns *ir = IR(ref); | 406 | IRIns *ir = IR(ref); |
379 | Reg r = ir->r; | 407 | Reg r = ir->r; |
380 | lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); | 408 | lj_assertA(ra_hasreg(r), "release of TMP%d has no reg", ref-ASMREF_TMP1+1); |
409 | lj_assertA(!ra_hasspill(ir->s), | ||
410 | "release of TMP%d has spill slot [%x]", ref-ASMREF_TMP1+1, ir->s); | ||
381 | ra_free(as, r); | 411 | ra_free(as, r); |
382 | ra_modified(as, r); | 412 | ra_modified(as, r); |
383 | ir->r = RID_INIT; | 413 | ir->r = RID_INIT; |
@@ -393,7 +423,7 @@ static Reg ra_restore(ASMState *as, IRRef ref) | |||
393 | IRIns *ir = IR(ref); | 423 | IRIns *ir = IR(ref); |
394 | int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ | 424 | int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ |
395 | Reg r = ir->r; | 425 | Reg r = ir->r; |
396 | lua_assert(ra_hasreg(r)); | 426 | lj_assertA(ra_hasreg(r), "restore of IR %04d has no reg", ref - REF_BIAS); |
397 | ra_sethint(ir->r, r); /* Keep hint. */ | 427 | ra_sethint(ir->r, r); /* Keep hint. */ |
398 | ra_free(as, r); | 428 | ra_free(as, r); |
399 | if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ | 429 | if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ |
@@ -422,14 +452,15 @@ static Reg ra_evict(ASMState *as, RegSet allow) | |||
422 | { | 452 | { |
423 | IRRef ref; | 453 | IRRef ref; |
424 | RegCost cost = ~(RegCost)0; | 454 | RegCost cost = ~(RegCost)0; |
425 | lua_assert(allow != RSET_EMPTY); | 455 | lj_assertA(allow != RSET_EMPTY, "evict from empty set"); |
426 | if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) { | 456 | if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) { |
427 | GPRDEF(MINCOST) | 457 | GPRDEF(MINCOST) |
428 | } else { | 458 | } else { |
429 | FPRDEF(MINCOST) | 459 | FPRDEF(MINCOST) |
430 | } | 460 | } |
431 | ref = regcost_ref(cost); | 461 | ref = regcost_ref(cost); |
432 | lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins)); | 462 | lj_assertA(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins), |
463 | "evict of out-of-range IR %04d", ref - REF_BIAS); | ||
433 | /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ | 464 | /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ |
434 | if (!irref_isk(ref) && (as->weakset & allow)) { | 465 | if (!irref_isk(ref) && (as->weakset & allow)) { |
435 | IRIns *ir = IR(ref); | 466 | IRIns *ir = IR(ref); |
@@ -511,7 +542,7 @@ static void ra_evictk(ASMState *as) | |||
511 | 542 | ||
512 | #ifdef RID_NUM_KREF | 543 | #ifdef RID_NUM_KREF |
513 | /* Allocate a register for a constant. */ | 544 | /* Allocate a register for a constant. */ |
514 | static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | 545 | static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow) |
515 | { | 546 | { |
516 | /* First try to find a register which already holds the same constant. */ | 547 | /* First try to find a register which already holds the same constant. */ |
517 | RegSet pick, work = ~as->freeset & RSET_GPR; | 548 | RegSet pick, work = ~as->freeset & RSET_GPR; |
@@ -520,9 +551,31 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | |||
520 | IRRef ref; | 551 | IRRef ref; |
521 | r = rset_pickbot(work); | 552 | r = rset_pickbot(work); |
522 | ref = regcost_ref(as->cost[r]); | 553 | ref = regcost_ref(as->cost[r]); |
554 | #if LJ_64 | ||
555 | if (ref < ASMREF_L) { | ||
556 | if (ra_iskref(ref)) { | ||
557 | if (k == ra_krefk(as, ref)) | ||
558 | return r; | ||
559 | } else { | ||
560 | IRIns *ir = IR(ref); | ||
561 | if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || | ||
562 | #if LJ_GC64 | ||
563 | (ir->o == IR_KINT && k == ir->i) || | ||
564 | (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || | ||
565 | ((ir->o == IR_KPTR || ir->o == IR_KKPTR) && | ||
566 | k == (intptr_t)ir_kptr(ir)) | ||
567 | #else | ||
568 | (ir->o != IR_KINT64 && k == ir->i) | ||
569 | #endif | ||
570 | ) | ||
571 | return r; | ||
572 | } | ||
573 | } | ||
574 | #else | ||
523 | if (ref < ASMREF_L && | 575 | if (ref < ASMREF_L && |
524 | k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) | 576 | k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) |
525 | return r; | 577 | return r; |
578 | #endif | ||
526 | rset_clear(work, r); | 579 | rset_clear(work, r); |
527 | } | 580 | } |
528 | pick = as->freeset & allow; | 581 | pick = as->freeset & allow; |
@@ -542,7 +595,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | |||
542 | } | 595 | } |
543 | 596 | ||
544 | /* Allocate a specific register for a constant. */ | 597 | /* Allocate a specific register for a constant. */ |
545 | static void ra_allockreg(ASMState *as, int32_t k, Reg r) | 598 | static void ra_allockreg(ASMState *as, intptr_t k, Reg r) |
546 | { | 599 | { |
547 | Reg kr = ra_allock(as, k, RID2RSET(r)); | 600 | Reg kr = ra_allock(as, k, RID2RSET(r)); |
548 | if (kr != r) { | 601 | if (kr != r) { |
@@ -565,7 +618,8 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow) | |||
565 | IRIns *ir = IR(ref); | 618 | IRIns *ir = IR(ref); |
566 | RegSet pick = as->freeset & allow; | 619 | RegSet pick = as->freeset & allow; |
567 | Reg r; | 620 | Reg r; |
568 | lua_assert(ra_noreg(ir->r)); | 621 | lj_assertA(ra_noreg(ir->r), |
622 | "IR %04d already has reg %d", ref - REF_BIAS, ir->r); | ||
569 | if (pick) { | 623 | if (pick) { |
570 | /* First check register hint from propagation or PHI. */ | 624 | /* First check register hint from propagation or PHI. */ |
571 | if (ra_hashint(ir->r)) { | 625 | if (ra_hashint(ir->r)) { |
@@ -612,15 +666,27 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow) | |||
612 | return r; | 666 | return r; |
613 | } | 667 | } |
614 | 668 | ||
669 | /* Add a register rename to the IR. */ | ||
670 | static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno) | ||
671 | { | ||
672 | IRRef ren; | ||
673 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno); | ||
674 | ren = tref_ref(lj_ir_emit(as->J)); | ||
675 | as->J->cur.ir[ren].r = (uint8_t)down; | ||
676 | as->J->cur.ir[ren].s = SPS_NONE; | ||
677 | } | ||
678 | |||
615 | /* Rename register allocation and emit move. */ | 679 | /* Rename register allocation and emit move. */ |
616 | static void ra_rename(ASMState *as, Reg down, Reg up) | 680 | static void ra_rename(ASMState *as, Reg down, Reg up) |
617 | { | 681 | { |
618 | IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); | 682 | IRRef ref = regcost_ref(as->cost[up] = as->cost[down]); |
619 | IRIns *ir = IR(ref); | 683 | IRIns *ir = IR(ref); |
620 | ir->r = (uint8_t)up; | 684 | ir->r = (uint8_t)up; |
621 | as->cost[down] = 0; | 685 | as->cost[down] = 0; |
622 | lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); | 686 | lj_assertA((down < RID_MAX_GPR) == (up < RID_MAX_GPR), |
623 | lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); | 687 | "rename between GPR/FPR %d and %d", down, up); |
688 | lj_assertA(!rset_test(as->freeset, down), "rename from free reg %d", down); | ||
689 | lj_assertA(rset_test(as->freeset, up), "rename to non-free reg %d", up); | ||
624 | ra_free(as, down); /* 'down' is free ... */ | 690 | ra_free(as, down); /* 'down' is free ... */ |
625 | ra_modified(as, down); | 691 | ra_modified(as, down); |
626 | rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ | 692 | rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ |
@@ -628,11 +694,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up) | |||
628 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); | 694 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); |
629 | emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ | 695 | emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ |
630 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ | 696 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ |
631 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); | 697 | ra_addrename(as, down, ref, as->snapno); |
632 | ren = tref_ref(lj_ir_emit(as->J)); | ||
633 | as->ir = as->T->ir; /* The IR may have been reallocated. */ | ||
634 | IR(ren)->r = (uint8_t)down; | ||
635 | IR(ren)->s = SPS_NONE; | ||
636 | } | 698 | } |
637 | } | 699 | } |
638 | 700 | ||
@@ -665,7 +727,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r) | |||
665 | { | 727 | { |
666 | Reg dest = ra_dest(as, ir, RID2RSET(r)); | 728 | Reg dest = ra_dest(as, ir, RID2RSET(r)); |
667 | if (dest != r) { | 729 | if (dest != r) { |
668 | lua_assert(rset_test(as->freeset, r)); | 730 | lj_assertA(rset_test(as->freeset, r), "dest reg %d is not free", r); |
669 | ra_modified(as, r); | 731 | ra_modified(as, r); |
670 | emit_movrr(as, ir, dest, r); | 732 | emit_movrr(as, ir, dest, r); |
671 | } | 733 | } |
@@ -682,20 +744,25 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) | |||
682 | if (ra_noreg(left)) { | 744 | if (ra_noreg(left)) { |
683 | if (irref_isk(lref)) { | 745 | if (irref_isk(lref)) { |
684 | if (ir->o == IR_KNUM) { | 746 | if (ir->o == IR_KNUM) { |
685 | cTValue *tv = ir_knum(ir); | ||
686 | /* FP remat needs a load except for +0. Still better than eviction. */ | 747 | /* FP remat needs a load except for +0. Still better than eviction. */ |
687 | if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { | 748 | if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) { |
688 | emit_loadn(as, dest, tv); | 749 | emit_loadk64(as, dest, ir); |
689 | return; | 750 | return; |
690 | } | 751 | } |
691 | #if LJ_64 | 752 | #if LJ_64 |
692 | } else if (ir->o == IR_KINT64) { | 753 | } else if (ir->o == IR_KINT64) { |
693 | emit_loadu64(as, dest, ir_kint64(ir)->u64); | 754 | emit_loadk64(as, dest, ir); |
755 | return; | ||
756 | #if LJ_GC64 | ||
757 | } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
758 | emit_loadk64(as, dest, ir); | ||
694 | return; | 759 | return; |
695 | #endif | 760 | #endif |
696 | } else { | 761 | #endif |
697 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | 762 | } else if (ir->o != IR_KPRI) { |
698 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); | 763 | lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC || |
764 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL, | ||
765 | "K%03d has bad IR op %d", REF_BIAS - lref, ir->o); | ||
699 | emit_loadi(as, dest, ir->i); | 766 | emit_loadi(as, dest, ir->i); |
700 | return; | 767 | return; |
701 | } | 768 | } |
@@ -837,11 +904,14 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref) | |||
837 | #endif | 904 | #endif |
838 | { /* Allocate stored values for TNEW, TDUP and CNEW. */ | 905 | { /* Allocate stored values for TNEW, TDUP and CNEW. */ |
839 | IRIns *irs; | 906 | IRIns *irs; |
840 | lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW); | 907 | lj_assertA(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW, |
908 | "sink of IR %04d has bad op %d", ref - REF_BIAS, ir->o); | ||
841 | for (irs = IR(as->snapref-1); irs > ir; irs--) | 909 | for (irs = IR(as->snapref-1); irs > ir; irs--) |
842 | if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) { | 910 | if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) { |
843 | lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || | 911 | lj_assertA(irs->o == IR_ASTORE || irs->o == IR_HSTORE || |
844 | irs->o == IR_FSTORE || irs->o == IR_XSTORE); | 912 | irs->o == IR_FSTORE || irs->o == IR_XSTORE, |
913 | "sunk store IR %04d has bad op %d", | ||
914 | (int)(irs - as->ir) - REF_BIAS, irs->o); | ||
845 | asm_snap_alloc1(as, irs->op2); | 915 | asm_snap_alloc1(as, irs->op2); |
846 | if (LJ_32 && (irs+1)->o == IR_HIOP) | 916 | if (LJ_32 && (irs+1)->o == IR_HIOP) |
847 | asm_snap_alloc1(as, (irs+1)->op2); | 917 | asm_snap_alloc1(as, (irs+1)->op2); |
@@ -888,7 +958,9 @@ static void asm_snap_alloc(ASMState *as) | |||
888 | if (!irref_isk(ref)) { | 958 | if (!irref_isk(ref)) { |
889 | asm_snap_alloc1(as, ref); | 959 | asm_snap_alloc1(as, ref); |
890 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { | 960 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { |
891 | lua_assert(irt_type(IR(ref+1)->t) == IRT_SOFTFP); | 961 | lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP, |
962 | "snap %d[%d] points to bad SOFTFP IR %04d", | ||
963 | as->snapno, n, ref - REF_BIAS); | ||
892 | asm_snap_alloc1(as, ref+1); | 964 | asm_snap_alloc1(as, ref+1); |
893 | } | 965 | } |
894 | } | 966 | } |
@@ -934,7 +1006,7 @@ static void asm_snap_prep(ASMState *as) | |||
934 | } else { | 1006 | } else { |
935 | /* Process any renames above the highwater mark. */ | 1007 | /* Process any renames above the highwater mark. */ |
936 | for (; as->snaprename < as->T->nins; as->snaprename++) { | 1008 | for (; as->snaprename < as->T->nins; as->snaprename++) { |
937 | IRIns *ir = IR(as->snaprename); | 1009 | IRIns *ir = &as->T->ir[as->snaprename]; |
938 | if (asm_snap_checkrename(as, ir->op1)) | 1010 | if (asm_snap_checkrename(as, ir->op1)) |
939 | ir->op2 = REF_BIAS-1; /* Kill rename. */ | 1011 | ir->op2 = REF_BIAS-1; /* Kill rename. */ |
940 | } | 1012 | } |
@@ -943,44 +1015,6 @@ static void asm_snap_prep(ASMState *as) | |||
943 | 1015 | ||
944 | /* -- Miscellaneous helpers ----------------------------------------------- */ | 1016 | /* -- Miscellaneous helpers ----------------------------------------------- */ |
945 | 1017 | ||
946 | /* Collect arguments from CALL* and CARG instructions. */ | ||
947 | static void asm_collectargs(ASMState *as, IRIns *ir, | ||
948 | const CCallInfo *ci, IRRef *args) | ||
949 | { | ||
950 | uint32_t n = CCI_NARGS(ci); | ||
951 | lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ | ||
952 | if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } | ||
953 | while (n-- > 1) { | ||
954 | ir = IR(ir->op1); | ||
955 | lua_assert(ir->o == IR_CARG); | ||
956 | args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; | ||
957 | } | ||
958 | args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; | ||
959 | lua_assert(IR(ir->op1)->o != IR_CARG); | ||
960 | } | ||
961 | |||
962 | /* Reconstruct CCallInfo flags for CALLX*. */ | ||
963 | static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) | ||
964 | { | ||
965 | uint32_t nargs = 0; | ||
966 | if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ | ||
967 | IRIns *ira = IR(ir->op1); | ||
968 | nargs++; | ||
969 | while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } | ||
970 | } | ||
971 | #if LJ_HASFFI | ||
972 | if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ | ||
973 | CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; | ||
974 | CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); | ||
975 | nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); | ||
976 | #if LJ_TARGET_X86 | ||
977 | nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); | ||
978 | #endif | ||
979 | } | ||
980 | #endif | ||
981 | return (nargs | (ir->t.irt << CCI_OTSHIFT)); | ||
982 | } | ||
983 | |||
984 | /* Calculate stack adjustment. */ | 1018 | /* Calculate stack adjustment. */ |
985 | static int32_t asm_stack_adjust(ASMState *as) | 1019 | static int32_t asm_stack_adjust(ASMState *as) |
986 | { | 1020 | { |
@@ -990,21 +1024,26 @@ static int32_t asm_stack_adjust(ASMState *as) | |||
990 | } | 1024 | } |
991 | 1025 | ||
992 | /* Must match with hash*() in lj_tab.c. */ | 1026 | /* Must match with hash*() in lj_tab.c. */ |
993 | static uint32_t ir_khash(IRIns *ir) | 1027 | static uint32_t ir_khash(ASMState *as, IRIns *ir) |
994 | { | 1028 | { |
995 | uint32_t lo, hi; | 1029 | uint32_t lo, hi; |
1030 | UNUSED(as); | ||
996 | if (irt_isstr(ir->t)) { | 1031 | if (irt_isstr(ir->t)) { |
997 | return ir_kstr(ir)->hash; | 1032 | return ir_kstr(ir)->sid; |
998 | } else if (irt_isnum(ir->t)) { | 1033 | } else if (irt_isnum(ir->t)) { |
999 | lo = ir_knum(ir)->u32.lo; | 1034 | lo = ir_knum(ir)->u32.lo; |
1000 | hi = ir_knum(ir)->u32.hi << 1; | 1035 | hi = ir_knum(ir)->u32.hi << 1; |
1001 | } else if (irt_ispri(ir->t)) { | 1036 | } else if (irt_ispri(ir->t)) { |
1002 | lua_assert(!irt_isnil(ir->t)); | 1037 | lj_assertA(!irt_isnil(ir->t), "hash of nil key"); |
1003 | return irt_type(ir->t)-IRT_FALSE; | 1038 | return irt_type(ir->t)-IRT_FALSE; |
1004 | } else { | 1039 | } else { |
1005 | lua_assert(irt_isgcv(ir->t)); | 1040 | lj_assertA(irt_isgcv(ir->t), "hash of bad IR type %d", irt_type(ir->t)); |
1006 | lo = u32ptr(ir_kgc(ir)); | 1041 | lo = u32ptr(ir_kgc(ir)); |
1042 | #if LJ_GC64 | ||
1043 | hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15); | ||
1044 | #else | ||
1007 | hi = lo + HASH_BIAS; | 1045 | hi = lo + HASH_BIAS; |
1046 | #endif | ||
1008 | } | 1047 | } |
1009 | return hashrot(lo, hi); | 1048 | return hashrot(lo, hi); |
1010 | } | 1049 | } |
@@ -1065,6 +1104,237 @@ static void asm_gcstep(ASMState *as, IRIns *ir) | |||
1065 | as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ | 1104 | as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ |
1066 | } | 1105 | } |
1067 | 1106 | ||
1107 | /* -- Buffer operations --------------------------------------------------- */ | ||
1108 | |||
1109 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref); | ||
1110 | |||
1111 | static void asm_bufhdr(ASMState *as, IRIns *ir) | ||
1112 | { | ||
1113 | Reg sb = ra_dest(as, ir, RSET_GPR); | ||
1114 | if ((ir->op2 & IRBUFHDR_APPEND)) { | ||
1115 | /* Rematerialize const buffer pointer instead of likely spill. */ | ||
1116 | IRIns *irp = IR(ir->op1); | ||
1117 | if (!(ra_hasreg(irp->r) || irp == ir-1 || | ||
1118 | (irp == ir-2 && !ra_used(ir-1)))) { | ||
1119 | while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND))) | ||
1120 | irp = IR(irp->op1); | ||
1121 | if (irref_isk(irp->op1)) { | ||
1122 | ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR)); | ||
1123 | ir = irp; | ||
1124 | } | ||
1125 | } | ||
1126 | } else { | ||
1127 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); | ||
1128 | /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */ | ||
1129 | emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); | ||
1130 | emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); | ||
1131 | } | ||
1132 | #if LJ_TARGET_X86ORX64 | ||
1133 | ra_left(as, sb, ir->op1); | ||
1134 | #else | ||
1135 | ra_leftov(as, sb, ir->op1); | ||
1136 | #endif | ||
1137 | } | ||
1138 | |||
1139 | static void asm_bufput(ASMState *as, IRIns *ir) | ||
1140 | { | ||
1141 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr]; | ||
1142 | IRRef args[3]; | ||
1143 | IRIns *irs; | ||
1144 | int kchar = -129; | ||
1145 | args[0] = ir->op1; /* SBuf * */ | ||
1146 | args[1] = ir->op2; /* GCstr * */ | ||
1147 | irs = IR(ir->op2); | ||
1148 | lj_assertA(irt_isstr(irs->t), | ||
1149 | "BUFPUT of non-string IR %04d", ir->op2 - REF_BIAS); | ||
1150 | if (irs->o == IR_KGC) { | ||
1151 | GCstr *s = ir_kstr(irs); | ||
1152 | if (s->len == 1) { /* Optimize put of single-char string constant. */ | ||
1153 | kchar = (int8_t)strdata(s)[0]; /* Signed! */ | ||
1154 | args[1] = ASMREF_TMP1; /* int, truncated to char */ | ||
1155 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; | ||
1156 | } | ||
1157 | } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) { | ||
1158 | if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */ | ||
1159 | if (irs->op2 == IRTOSTR_NUM) { | ||
1160 | args[1] = ASMREF_TMP1; /* TValue * */ | ||
1161 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum]; | ||
1162 | } else { | ||
1163 | lj_assertA(irt_isinteger(IR(irs->op1)->t), | ||
1164 | "TOSTR of non-numeric IR %04d", irs->op1); | ||
1165 | args[1] = irs->op1; /* int */ | ||
1166 | if (irs->op2 == IRTOSTR_INT) | ||
1167 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint]; | ||
1168 | else | ||
1169 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; | ||
1170 | } | ||
1171 | } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */ | ||
1172 | args[1] = irs->op1; /* const void * */ | ||
1173 | args[2] = irs->op2; /* MSize */ | ||
1174 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem]; | ||
1175 | } | ||
1176 | } | ||
1177 | asm_setupresult(as, ir, ci); /* SBuf * */ | ||
1178 | asm_gencall(as, ci, args); | ||
1179 | if (args[1] == ASMREF_TMP1) { | ||
1180 | Reg tmp = ra_releasetmp(as, ASMREF_TMP1); | ||
1181 | if (kchar == -129) | ||
1182 | asm_tvptr(as, tmp, irs->op1); | ||
1183 | else | ||
1184 | ra_allockreg(as, kchar, tmp); | ||
1185 | } | ||
1186 | } | ||
1187 | |||
1188 | static void asm_bufstr(ASMState *as, IRIns *ir) | ||
1189 | { | ||
1190 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr]; | ||
1191 | IRRef args[1]; | ||
1192 | args[0] = ir->op1; /* SBuf *sb */ | ||
1193 | as->gcsteps++; | ||
1194 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
1195 | asm_gencall(as, ci, args); | ||
1196 | } | ||
1197 | |||
1198 | /* -- Type conversions ---------------------------------------------------- */ | ||
1199 | |||
1200 | static void asm_tostr(ASMState *as, IRIns *ir) | ||
1201 | { | ||
1202 | const CCallInfo *ci; | ||
1203 | IRRef args[2]; | ||
1204 | args[0] = ASMREF_L; | ||
1205 | as->gcsteps++; | ||
1206 | if (ir->op2 == IRTOSTR_NUM) { | ||
1207 | args[1] = ASMREF_TMP1; /* cTValue * */ | ||
1208 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num]; | ||
1209 | } else { | ||
1210 | args[1] = ir->op1; /* int32_t k */ | ||
1211 | if (ir->op2 == IRTOSTR_INT) | ||
1212 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int]; | ||
1213 | else | ||
1214 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char]; | ||
1215 | } | ||
1216 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
1217 | asm_gencall(as, ci, args); | ||
1218 | if (ir->op2 == IRTOSTR_NUM) | ||
1219 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); | ||
1220 | } | ||
1221 | |||
1222 | #if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86 | ||
1223 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
1224 | { | ||
1225 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
1226 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
1227 | IRCallID id; | ||
1228 | IRRef args[2]; | ||
1229 | lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP, | ||
1230 | "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS); | ||
1231 | args[LJ_BE] = (ir-1)->op1; | ||
1232 | args[LJ_LE] = ir->op1; | ||
1233 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
1234 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
1235 | ir--; | ||
1236 | } else { | ||
1237 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
1238 | } | ||
1239 | { | ||
1240 | #if LJ_TARGET_ARM && !LJ_ABI_SOFTFP | ||
1241 | CCallInfo cim = lj_ir_callinfo[id], *ci = &cim; | ||
1242 | cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ | ||
1243 | #else | ||
1244 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
1245 | #endif | ||
1246 | asm_setupresult(as, ir, ci); | ||
1247 | asm_gencall(as, ci, args); | ||
1248 | } | ||
1249 | } | ||
1250 | #endif | ||
1251 | |||
1252 | /* -- Memory references --------------------------------------------------- */ | ||
1253 | |||
1254 | static void asm_newref(ASMState *as, IRIns *ir) | ||
1255 | { | ||
1256 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
1257 | IRRef args[3]; | ||
1258 | if (ir->r == RID_SINK) | ||
1259 | return; | ||
1260 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1261 | args[1] = ir->op1; /* GCtab *t */ | ||
1262 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
1263 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
1264 | asm_gencall(as, ci, args); | ||
1265 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | ||
1266 | } | ||
1267 | |||
1268 | static void asm_lref(ASMState *as, IRIns *ir) | ||
1269 | { | ||
1270 | Reg r = ra_dest(as, ir, RSET_GPR); | ||
1271 | #if LJ_TARGET_X86ORX64 | ||
1272 | ra_left(as, r, ASMREF_L); | ||
1273 | #else | ||
1274 | ra_leftov(as, r, ASMREF_L); | ||
1275 | #endif | ||
1276 | } | ||
1277 | |||
1278 | /* -- Calls --------------------------------------------------------------- */ | ||
1279 | |||
1280 | /* Collect arguments from CALL* and CARG instructions. */ | ||
1281 | static void asm_collectargs(ASMState *as, IRIns *ir, | ||
1282 | const CCallInfo *ci, IRRef *args) | ||
1283 | { | ||
1284 | uint32_t n = CCI_XNARGS(ci); | ||
1285 | /* Account for split args. */ | ||
1286 | lj_assertA(n <= CCI_NARGS_MAX*2, "too many args %d to collect", n); | ||
1287 | if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } | ||
1288 | while (n-- > 1) { | ||
1289 | ir = IR(ir->op1); | ||
1290 | lj_assertA(ir->o == IR_CARG, "malformed CALL arg tree"); | ||
1291 | args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; | ||
1292 | } | ||
1293 | args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; | ||
1294 | lj_assertA(IR(ir->op1)->o != IR_CARG, "malformed CALL arg tree"); | ||
1295 | } | ||
1296 | |||
1297 | /* Reconstruct CCallInfo flags for CALLX*. */ | ||
1298 | static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) | ||
1299 | { | ||
1300 | uint32_t nargs = 0; | ||
1301 | if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ | ||
1302 | IRIns *ira = IR(ir->op1); | ||
1303 | nargs++; | ||
1304 | while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } | ||
1305 | } | ||
1306 | #if LJ_HASFFI | ||
1307 | if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ | ||
1308 | CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; | ||
1309 | CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); | ||
1310 | nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); | ||
1311 | #if LJ_TARGET_X86 | ||
1312 | nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); | ||
1313 | #endif | ||
1314 | } | ||
1315 | #endif | ||
1316 | return (nargs | (ir->t.irt << CCI_OTSHIFT)); | ||
1317 | } | ||
1318 | |||
1319 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | ||
1320 | { | ||
1321 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
1322 | IRRef args[2]; | ||
1323 | args[0] = ir->op1; | ||
1324 | args[1] = ir->op2; | ||
1325 | asm_setupresult(as, ir, ci); | ||
1326 | asm_gencall(as, ci, args); | ||
1327 | } | ||
1328 | |||
1329 | static void asm_call(ASMState *as, IRIns *ir) | ||
1330 | { | ||
1331 | IRRef args[CCI_NARGS_MAX]; | ||
1332 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
1333 | asm_collectargs(as, ir, ci, args); | ||
1334 | asm_setupresult(as, ir, ci); | ||
1335 | asm_gencall(as, ci, args); | ||
1336 | } | ||
1337 | |||
1068 | /* -- PHI and loop handling ----------------------------------------------- */ | 1338 | /* -- PHI and loop handling ----------------------------------------------- */ |
1069 | 1339 | ||
1070 | /* Break a PHI cycle by renaming to a free register (evict if needed). */ | 1340 | /* Break a PHI cycle by renaming to a free register (evict if needed). */ |
@@ -1250,12 +1520,7 @@ static void asm_phi_fixup(ASMState *as) | |||
1250 | irt_clearmark(ir->t); | 1520 | irt_clearmark(ir->t); |
1251 | /* Left PHI gained a spill slot before the loop? */ | 1521 | /* Left PHI gained a spill slot before the loop? */ |
1252 | if (ra_hasspill(ir->s)) { | 1522 | if (ra_hasspill(ir->s)) { |
1253 | IRRef ren; | 1523 | ra_addrename(as, r, lref, as->loopsnapno); |
1254 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno); | ||
1255 | ren = tref_ref(lj_ir_emit(as->J)); | ||
1256 | as->ir = as->T->ir; /* The IR may have been reallocated. */ | ||
1257 | IR(ren)->r = (uint8_t)r; | ||
1258 | IR(ren)->s = SPS_NONE; | ||
1259 | } | 1524 | } |
1260 | } | 1525 | } |
1261 | rset_clear(work, r); | 1526 | rset_clear(work, r); |
@@ -1330,6 +1595,8 @@ static void asm_loop(ASMState *as) | |||
1330 | #include "lj_asm_x86.h" | 1595 | #include "lj_asm_x86.h" |
1331 | #elif LJ_TARGET_ARM | 1596 | #elif LJ_TARGET_ARM |
1332 | #include "lj_asm_arm.h" | 1597 | #include "lj_asm_arm.h" |
1598 | #elif LJ_TARGET_ARM64 | ||
1599 | #include "lj_asm_arm64.h" | ||
1333 | #elif LJ_TARGET_PPC | 1600 | #elif LJ_TARGET_PPC |
1334 | #include "lj_asm_ppc.h" | 1601 | #include "lj_asm_ppc.h" |
1335 | #elif LJ_TARGET_MIPS | 1602 | #elif LJ_TARGET_MIPS |
@@ -1338,6 +1605,203 @@ static void asm_loop(ASMState *as) | |||
1338 | #error "Missing assembler for target CPU" | 1605 | #error "Missing assembler for target CPU" |
1339 | #endif | 1606 | #endif |
1340 | 1607 | ||
1608 | /* -- Common instruction helpers ------------------------------------------ */ | ||
1609 | |||
1610 | #if !LJ_SOFTFP32 | ||
1611 | #if !LJ_TARGET_X86ORX64 | ||
1612 | #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) | ||
1613 | #define asm_fppowi(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) | ||
1614 | #endif | ||
1615 | |||
1616 | static void asm_pow(ASMState *as, IRIns *ir) | ||
1617 | { | ||
1618 | #if LJ_64 && LJ_HASFFI | ||
1619 | if (!irt_isnum(ir->t)) | ||
1620 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
1621 | IRCALL_lj_carith_powu64); | ||
1622 | else | ||
1623 | #endif | ||
1624 | if (irt_isnum(IR(ir->op2)->t)) | ||
1625 | asm_callid(as, ir, IRCALL_pow); | ||
1626 | else | ||
1627 | asm_fppowi(as, ir); | ||
1628 | } | ||
1629 | |||
1630 | static void asm_div(ASMState *as, IRIns *ir) | ||
1631 | { | ||
1632 | #if LJ_64 && LJ_HASFFI | ||
1633 | if (!irt_isnum(ir->t)) | ||
1634 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : | ||
1635 | IRCALL_lj_carith_divu64); | ||
1636 | else | ||
1637 | #endif | ||
1638 | asm_fpdiv(as, ir); | ||
1639 | } | ||
1640 | #endif | ||
1641 | |||
1642 | static void asm_mod(ASMState *as, IRIns *ir) | ||
1643 | { | ||
1644 | #if LJ_64 && LJ_HASFFI | ||
1645 | if (!irt_isint(ir->t)) | ||
1646 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : | ||
1647 | IRCALL_lj_carith_modu64); | ||
1648 | else | ||
1649 | #endif | ||
1650 | asm_callid(as, ir, IRCALL_lj_vm_modi); | ||
1651 | } | ||
1652 | |||
1653 | static void asm_fuseequal(ASMState *as, IRIns *ir) | ||
1654 | { | ||
1655 | /* Fuse HREF + EQ/NE. */ | ||
1656 | if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { | ||
1657 | as->curins--; | ||
1658 | asm_href(as, ir-1, (IROp)ir->o); | ||
1659 | } else { | ||
1660 | asm_equal(as, ir); | ||
1661 | } | ||
1662 | } | ||
1663 | |||
1664 | static void asm_alen(ASMState *as, IRIns *ir) | ||
1665 | { | ||
1666 | asm_callid(as, ir, ir->op2 == REF_NIL ? IRCALL_lj_tab_len : | ||
1667 | IRCALL_lj_tab_len_hint); | ||
1668 | } | ||
1669 | |||
1670 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
1671 | |||
1672 | /* Assemble a single instruction. */ | ||
1673 | static void asm_ir(ASMState *as, IRIns *ir) | ||
1674 | { | ||
1675 | switch ((IROp)ir->o) { | ||
1676 | /* Miscellaneous ops. */ | ||
1677 | case IR_LOOP: asm_loop(as); break; | ||
1678 | case IR_NOP: case IR_XBAR: | ||
1679 | lj_assertA(!ra_used(ir), | ||
1680 | "IR %04d not unused", (int)(ir - as->ir) - REF_BIAS); | ||
1681 | break; | ||
1682 | case IR_USE: | ||
1683 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
1684 | case IR_PHI: asm_phi(as, ir); break; | ||
1685 | case IR_HIOP: asm_hiop(as, ir); break; | ||
1686 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
1687 | case IR_PROF: asm_prof(as, ir); break; | ||
1688 | |||
1689 | /* Guarded assertions. */ | ||
1690 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
1691 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
1692 | case IR_ABC: | ||
1693 | asm_comp(as, ir); | ||
1694 | break; | ||
1695 | case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break; | ||
1696 | |||
1697 | case IR_RETF: asm_retf(as, ir); break; | ||
1698 | |||
1699 | /* Bit ops. */ | ||
1700 | case IR_BNOT: asm_bnot(as, ir); break; | ||
1701 | case IR_BSWAP: asm_bswap(as, ir); break; | ||
1702 | case IR_BAND: asm_band(as, ir); break; | ||
1703 | case IR_BOR: asm_bor(as, ir); break; | ||
1704 | case IR_BXOR: asm_bxor(as, ir); break; | ||
1705 | case IR_BSHL: asm_bshl(as, ir); break; | ||
1706 | case IR_BSHR: asm_bshr(as, ir); break; | ||
1707 | case IR_BSAR: asm_bsar(as, ir); break; | ||
1708 | case IR_BROL: asm_brol(as, ir); break; | ||
1709 | case IR_BROR: asm_bror(as, ir); break; | ||
1710 | |||
1711 | /* Arithmetic ops. */ | ||
1712 | case IR_ADD: asm_add(as, ir); break; | ||
1713 | case IR_SUB: asm_sub(as, ir); break; | ||
1714 | case IR_MUL: asm_mul(as, ir); break; | ||
1715 | case IR_MOD: asm_mod(as, ir); break; | ||
1716 | case IR_NEG: asm_neg(as, ir); break; | ||
1717 | #if LJ_SOFTFP32 | ||
1718 | case IR_DIV: case IR_POW: case IR_ABS: | ||
1719 | case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: | ||
1720 | /* Unused for LJ_SOFTFP32. */ | ||
1721 | lj_assertA(0, "IR %04d with unused op %d", | ||
1722 | (int)(ir - as->ir) - REF_BIAS, ir->o); | ||
1723 | break; | ||
1724 | #else | ||
1725 | case IR_DIV: asm_div(as, ir); break; | ||
1726 | case IR_POW: asm_pow(as, ir); break; | ||
1727 | case IR_ABS: asm_abs(as, ir); break; | ||
1728 | case IR_LDEXP: asm_ldexp(as, ir); break; | ||
1729 | case IR_FPMATH: asm_fpmath(as, ir); break; | ||
1730 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
1731 | #endif | ||
1732 | case IR_MIN: asm_min(as, ir); break; | ||
1733 | case IR_MAX: asm_max(as, ir); break; | ||
1734 | |||
1735 | /* Overflow-checking arithmetic ops. */ | ||
1736 | case IR_ADDOV: asm_addov(as, ir); break; | ||
1737 | case IR_SUBOV: asm_subov(as, ir); break; | ||
1738 | case IR_MULOV: asm_mulov(as, ir); break; | ||
1739 | |||
1740 | /* Memory references. */ | ||
1741 | case IR_AREF: asm_aref(as, ir); break; | ||
1742 | case IR_HREF: asm_href(as, ir, 0); break; | ||
1743 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
1744 | case IR_NEWREF: asm_newref(as, ir); break; | ||
1745 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
1746 | case IR_FREF: asm_fref(as, ir); break; | ||
1747 | case IR_STRREF: asm_strref(as, ir); break; | ||
1748 | case IR_LREF: asm_lref(as, ir); break; | ||
1749 | |||
1750 | /* Loads and stores. */ | ||
1751 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
1752 | asm_ahuvload(as, ir); | ||
1753 | break; | ||
1754 | case IR_FLOAD: asm_fload(as, ir); break; | ||
1755 | case IR_XLOAD: asm_xload(as, ir); break; | ||
1756 | case IR_SLOAD: asm_sload(as, ir); break; | ||
1757 | case IR_ALEN: asm_alen(as, ir); break; | ||
1758 | |||
1759 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
1760 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
1761 | case IR_XSTORE: asm_xstore(as, ir); break; | ||
1762 | |||
1763 | /* Allocations. */ | ||
1764 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
1765 | case IR_TNEW: asm_tnew(as, ir); break; | ||
1766 | case IR_TDUP: asm_tdup(as, ir); break; | ||
1767 | case IR_CNEW: case IR_CNEWI: | ||
1768 | #if LJ_HASFFI | ||
1769 | asm_cnew(as, ir); | ||
1770 | #else | ||
1771 | lj_assertA(0, "IR %04d with unused op %d", | ||
1772 | (int)(ir - as->ir) - REF_BIAS, ir->o); | ||
1773 | #endif | ||
1774 | break; | ||
1775 | |||
1776 | /* Buffer operations. */ | ||
1777 | case IR_BUFHDR: asm_bufhdr(as, ir); break; | ||
1778 | case IR_BUFPUT: asm_bufput(as, ir); break; | ||
1779 | case IR_BUFSTR: asm_bufstr(as, ir); break; | ||
1780 | |||
1781 | /* Write barriers. */ | ||
1782 | case IR_TBAR: asm_tbar(as, ir); break; | ||
1783 | case IR_OBAR: asm_obar(as, ir); break; | ||
1784 | |||
1785 | /* Type conversions. */ | ||
1786 | case IR_CONV: asm_conv(as, ir); break; | ||
1787 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
1788 | case IR_STRTO: asm_strto(as, ir); break; | ||
1789 | |||
1790 | /* Calls. */ | ||
1791 | case IR_CALLA: | ||
1792 | as->gcsteps++; | ||
1793 | /* fallthrough */ | ||
1794 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
1795 | case IR_CALLXS: asm_callx(as, ir); break; | ||
1796 | case IR_CARG: break; | ||
1797 | |||
1798 | default: | ||
1799 | setintV(&as->J->errinfo, ir->o); | ||
1800 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1801 | break; | ||
1802 | } | ||
1803 | } | ||
1804 | |||
1341 | /* -- Head of trace ------------------------------------------------------- */ | 1805 | /* -- Head of trace ------------------------------------------------------- */ |
1342 | 1806 | ||
1343 | /* Head of a root trace. */ | 1807 | /* Head of a root trace. */ |
@@ -1383,8 +1847,10 @@ static void asm_head_side(ASMState *as) | |||
1383 | for (i = as->stopins; i > REF_BASE; i--) { | 1847 | for (i = as->stopins; i > REF_BASE; i--) { |
1384 | IRIns *ir = IR(i); | 1848 | IRIns *ir = IR(i); |
1385 | RegSP rs; | 1849 | RegSP rs; |
1386 | lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || | 1850 | lj_assertA((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || |
1387 | (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL); | 1851 | (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL, |
1852 | "IR %04d has bad parent op %d", | ||
1853 | (int)(ir - as->ir) - REF_BIAS, ir->o); | ||
1388 | rs = as->parentmap[i - REF_FIRST]; | 1854 | rs = as->parentmap[i - REF_FIRST]; |
1389 | if (ra_hasreg(ir->r)) { | 1855 | if (ra_hasreg(ir->r)) { |
1390 | rset_clear(allow, ir->r); | 1856 | rset_clear(allow, ir->r); |
@@ -1536,7 +2002,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe) | |||
1536 | SnapEntry sn = map[n-1]; | 2002 | SnapEntry sn = map[n-1]; |
1537 | if ((sn & SNAP_FRAME)) { | 2003 | if ((sn & SNAP_FRAME)) { |
1538 | *gotframe = 1; | 2004 | *gotframe = 1; |
1539 | return snap_slot(sn); | 2005 | return snap_slot(sn) - LJ_FR2; |
1540 | } | 2006 | } |
1541 | } | 2007 | } |
1542 | return 0; | 2008 | return 0; |
@@ -1556,19 +2022,23 @@ static void asm_tail_link(ASMState *as) | |||
1556 | 2022 | ||
1557 | if (as->T->link == 0) { | 2023 | if (as->T->link == 0) { |
1558 | /* Setup fixed registers for exit to interpreter. */ | 2024 | /* Setup fixed registers for exit to interpreter. */ |
1559 | const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); | 2025 | const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]); |
1560 | int32_t mres; | 2026 | int32_t mres; |
1561 | if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ | 2027 | if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ |
1562 | BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; | 2028 | BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; |
1563 | if (bc_isret(bc_op(*retpc))) | 2029 | if (bc_isret(bc_op(*retpc))) |
1564 | pc = retpc; | 2030 | pc = retpc; |
1565 | } | 2031 | } |
2032 | #if LJ_GC64 | ||
2033 | emit_loadu64(as, RID_LPC, u64ptr(pc)); | ||
2034 | #else | ||
1566 | ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); | 2035 | ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); |
1567 | ra_allockreg(as, i32ptr(pc), RID_LPC); | 2036 | ra_allockreg(as, i32ptr(pc), RID_LPC); |
1568 | mres = (int32_t)(snap->nslots - baseslot); | 2037 | #endif |
2038 | mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); | ||
1569 | switch (bc_op(*pc)) { | 2039 | switch (bc_op(*pc)) { |
1570 | case BC_CALLM: case BC_CALLMT: | 2040 | case BC_CALLM: case BC_CALLMT: |
1571 | mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; | 2041 | mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break; |
1572 | case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; | 2042 | case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; |
1573 | case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; | 2043 | case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; |
1574 | default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; | 2044 | default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; |
@@ -1580,6 +2050,11 @@ static void asm_tail_link(ASMState *as) | |||
1580 | } | 2050 | } |
1581 | emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); | 2051 | emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); |
1582 | 2052 | ||
2053 | if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */ | ||
2054 | setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal)); | ||
2055 | IR(as->J->ktrace)->o = IR_KGC; | ||
2056 | } | ||
2057 | |||
1583 | /* Sync the interpreter state with the on-trace state. */ | 2058 | /* Sync the interpreter state with the on-trace state. */ |
1584 | asm_stack_restore(as, snap); | 2059 | asm_stack_restore(as, snap); |
1585 | 2060 | ||
@@ -1605,17 +2080,23 @@ static void asm_setup_regsp(ASMState *as) | |||
1605 | ra_setup(as); | 2080 | ra_setup(as); |
1606 | 2081 | ||
1607 | /* Clear reg/sp for constants. */ | 2082 | /* Clear reg/sp for constants. */ |
1608 | for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) | 2083 | for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) { |
1609 | ir->prev = REGSP_INIT; | 2084 | ir->prev = REGSP_INIT; |
2085 | if (irt_is64(ir->t) && ir->o != IR_KNULL) { | ||
2086 | #if LJ_GC64 | ||
2087 | /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ | ||
2088 | ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */ | ||
2089 | #else | ||
2090 | /* Make life easier for backends by putting address of constant in i. */ | ||
2091 | ir->i = (int32_t)(intptr_t)(ir+1); | ||
2092 | #endif | ||
2093 | ir++; | ||
2094 | } | ||
2095 | } | ||
1610 | 2096 | ||
1611 | /* REF_BASE is used for implicit references to the BASE register. */ | 2097 | /* REF_BASE is used for implicit references to the BASE register. */ |
1612 | lastir->prev = REGSP_HINT(RID_BASE); | 2098 | lastir->prev = REGSP_HINT(RID_BASE); |
1613 | 2099 | ||
1614 | ir = IR(nins-1); | ||
1615 | if (ir->o == IR_RENAME) { | ||
1616 | do { ir--; nins--; } while (ir->o == IR_RENAME); | ||
1617 | T->nins = nins; /* Remove any renames left over from ASM restart. */ | ||
1618 | } | ||
1619 | as->snaprename = nins; | 2100 | as->snaprename = nins; |
1620 | as->snapref = nins; | 2101 | as->snapref = nins; |
1621 | as->snapno = T->nsnap; | 2102 | as->snapno = T->nsnap; |
@@ -1628,7 +2109,7 @@ static void asm_setup_regsp(ASMState *as) | |||
1628 | ir = IR(REF_FIRST); | 2109 | ir = IR(REF_FIRST); |
1629 | if (as->parent) { | 2110 | if (as->parent) { |
1630 | uint16_t *p; | 2111 | uint16_t *p; |
1631 | lastir = lj_snap_regspmap(as->parent, as->J->exitno, ir); | 2112 | lastir = lj_snap_regspmap(as->J, as->parent, as->J->exitno, ir); |
1632 | if (lastir - ir > LJ_MAX_JSLOTS) | 2113 | if (lastir - ir > LJ_MAX_JSLOTS) |
1633 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); | 2114 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); |
1634 | as->stopins = (IRRef)((lastir-1) - as->ir); | 2115 | as->stopins = (IRRef)((lastir-1) - as->ir); |
@@ -1676,7 +2157,7 @@ static void asm_setup_regsp(ASMState *as) | |||
1676 | as->modset |= RSET_SCRATCH; | 2157 | as->modset |= RSET_SCRATCH; |
1677 | continue; | 2158 | continue; |
1678 | } | 2159 | } |
1679 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { | 2160 | case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: { |
1680 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | 2161 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; |
1681 | ir->prev = asm_setup_call_slots(as, ir, ci); | 2162 | ir->prev = asm_setup_call_slots(as, ir, ci); |
1682 | if (inloop) | 2163 | if (inloop) |
@@ -1701,8 +2182,8 @@ static void asm_setup_regsp(ASMState *as) | |||
1701 | ir->prev = REGSP_HINT(RID_FPRET); | 2182 | ir->prev = REGSP_HINT(RID_FPRET); |
1702 | continue; | 2183 | continue; |
1703 | } | 2184 | } |
1704 | /* fallthrough */ | ||
1705 | #endif | 2185 | #endif |
2186 | /* fallthrough */ | ||
1706 | case IR_CALLN: case IR_CALLXS: | 2187 | case IR_CALLN: case IR_CALLXS: |
1707 | #if LJ_SOFTFP | 2188 | #if LJ_SOFTFP |
1708 | case IR_MIN: case IR_MAX: | 2189 | case IR_MIN: case IR_MAX: |
@@ -1721,11 +2202,23 @@ static void asm_setup_regsp(ASMState *as) | |||
1721 | #endif | 2202 | #endif |
1722 | /* fallthrough */ | 2203 | /* fallthrough */ |
1723 | /* C calls evict all scratch regs and return results in RID_RET. */ | 2204 | /* C calls evict all scratch regs and return results in RID_RET. */ |
1724 | case IR_SNEW: case IR_XSNEW: case IR_NEWREF: | 2205 | case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT: |
1725 | if (REGARG_NUMGPR < 3 && as->evenspill < 3) | 2206 | if (REGARG_NUMGPR < 3 && as->evenspill < 3) |
1726 | as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ | 2207 | as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ |
2208 | #if LJ_TARGET_X86 && LJ_HASFFI | ||
2209 | if (0) { | ||
2210 | case IR_CNEW: | ||
2211 | if (ir->op2 != REF_NIL && as->evenspill < 4) | ||
2212 | as->evenspill = 4; /* lj_cdata_newv needs 4 args. */ | ||
2213 | } | ||
1727 | /* fallthrough */ | 2214 | /* fallthrough */ |
1728 | case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: | 2215 | #else |
2216 | /* fallthrough */ | ||
2217 | case IR_CNEW: | ||
2218 | #endif | ||
2219 | /* fallthrough */ | ||
2220 | case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR: | ||
2221 | case IR_BUFSTR: | ||
1729 | ir->prev = REGSP_HINT(RID_RET); | 2222 | ir->prev = REGSP_HINT(RID_RET); |
1730 | if (inloop) | 2223 | if (inloop) |
1731 | as->modset = RSET_SCRATCH; | 2224 | as->modset = RSET_SCRATCH; |
@@ -1734,21 +2227,26 @@ static void asm_setup_regsp(ASMState *as) | |||
1734 | if (inloop) | 2227 | if (inloop) |
1735 | as->modset = RSET_SCRATCH; | 2228 | as->modset = RSET_SCRATCH; |
1736 | break; | 2229 | break; |
1737 | #if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP | 2230 | #if !LJ_SOFTFP |
1738 | case IR_ATAN2: case IR_LDEXP: | 2231 | #if !LJ_TARGET_X86ORX64 |
2232 | case IR_LDEXP: | ||
2233 | #endif | ||
1739 | #endif | 2234 | #endif |
2235 | /* fallthrough */ | ||
1740 | case IR_POW: | 2236 | case IR_POW: |
1741 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { | 2237 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
1742 | #if LJ_TARGET_X86ORX64 | ||
1743 | ir->prev = REGSP_HINT(RID_XMM0); | ||
1744 | if (inloop) | 2238 | if (inloop) |
1745 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); | 2239 | as->modset |= RSET_SCRATCH; |
2240 | #if LJ_TARGET_X86 | ||
2241 | if (irt_isnum(IR(ir->op2)->t)) { | ||
2242 | if (as->evenspill < 4) /* Leave room to call pow(). */ | ||
2243 | as->evenspill = 4; | ||
2244 | } | ||
2245 | break; | ||
1746 | #else | 2246 | #else |
1747 | ir->prev = REGSP_HINT(RID_FPRET); | 2247 | ir->prev = REGSP_HINT(RID_FPRET); |
1748 | if (inloop) | ||
1749 | as->modset |= RSET_SCRATCH; | ||
1750 | #endif | ||
1751 | continue; | 2248 | continue; |
2249 | #endif | ||
1752 | } | 2250 | } |
1753 | /* fallthrough */ /* for integer POW */ | 2251 | /* fallthrough */ /* for integer POW */ |
1754 | case IR_DIV: case IR_MOD: | 2252 | case IR_DIV: case IR_MOD: |
@@ -1761,31 +2259,31 @@ static void asm_setup_regsp(ASMState *as) | |||
1761 | break; | 2259 | break; |
1762 | case IR_FPMATH: | 2260 | case IR_FPMATH: |
1763 | #if LJ_TARGET_X86ORX64 | 2261 | #if LJ_TARGET_X86ORX64 |
1764 | if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ | 2262 | if (ir->op2 <= IRFPM_TRUNC) { |
1765 | ir->prev = REGSP_HINT(RID_XMM0); | 2263 | if (!(as->flags & JIT_F_SSE4_1)) { |
1766 | #if !LJ_64 | 2264 | ir->prev = REGSP_HINT(RID_XMM0); |
1767 | if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ | 2265 | if (inloop) |
1768 | as->evenspill = 4; | 2266 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); |
1769 | #endif | 2267 | continue; |
1770 | if (inloop) | 2268 | } |
1771 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); | 2269 | break; |
1772 | continue; | ||
1773 | } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) { | ||
1774 | ir->prev = REGSP_HINT(RID_XMM0); | ||
1775 | if (inloop) | ||
1776 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); | ||
1777 | continue; | ||
1778 | } | 2270 | } |
2271 | #endif | ||
2272 | if (inloop) | ||
2273 | as->modset |= RSET_SCRATCH; | ||
2274 | #if LJ_TARGET_X86 | ||
1779 | break; | 2275 | break; |
1780 | #else | 2276 | #else |
1781 | ir->prev = REGSP_HINT(RID_FPRET); | 2277 | ir->prev = REGSP_HINT(RID_FPRET); |
1782 | if (inloop) | ||
1783 | as->modset |= RSET_SCRATCH; | ||
1784 | continue; | 2278 | continue; |
1785 | #endif | 2279 | #endif |
1786 | #if LJ_TARGET_X86ORX64 | 2280 | #if LJ_TARGET_X86ORX64 |
1787 | /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ | 2281 | /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ |
1788 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: | 2282 | case IR_BSHL: case IR_BSHR: case IR_BSAR: |
2283 | if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */ | ||
2284 | break; | ||
2285 | /* fallthrough */ | ||
2286 | case IR_BROL: case IR_BROR: | ||
1789 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { | 2287 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { |
1790 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); | 2288 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); |
1791 | if (inloop) | 2289 | if (inloop) |
@@ -1831,14 +2329,25 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1831 | ASMState *as = &as_; | 2329 | ASMState *as = &as_; |
1832 | MCode *origtop; | 2330 | MCode *origtop; |
1833 | 2331 | ||
2332 | /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */ | ||
2333 | { | ||
2334 | IRRef nins = T->nins; | ||
2335 | IRIns *ir = &T->ir[nins-1]; | ||
2336 | if (ir->o == IR_NOP || ir->o == IR_RENAME) { | ||
2337 | do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME); | ||
2338 | T->nins = nins; | ||
2339 | } | ||
2340 | } | ||
2341 | |||
1834 | /* Ensure an initialized instruction beyond the last one for HIOP checks. */ | 2342 | /* Ensure an initialized instruction beyond the last one for HIOP checks. */ |
1835 | J->cur.nins = lj_ir_nextins(J); | 2343 | /* This also allows one RENAME to be added without reallocating curfinal. */ |
1836 | lj_ir_nop(&J->cur.ir[J->cur.nins]); | 2344 | as->orignins = lj_ir_nextins(J); |
2345 | lj_ir_nop(&J->cur.ir[as->orignins]); | ||
1837 | 2346 | ||
1838 | /* Setup initial state. Copy some fields to reduce indirections. */ | 2347 | /* Setup initial state. Copy some fields to reduce indirections. */ |
1839 | as->J = J; | 2348 | as->J = J; |
1840 | as->T = T; | 2349 | as->T = T; |
1841 | as->ir = T->ir; | 2350 | J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */ |
1842 | as->flags = J->flags; | 2351 | as->flags = J->flags; |
1843 | as->loopref = J->loopref; | 2352 | as->loopref = J->loopref; |
1844 | as->realign = NULL; | 2353 | as->realign = NULL; |
@@ -1851,12 +2360,41 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1851 | as->mclim = as->mcbot + MCLIM_REDZONE; | 2360 | as->mclim = as->mcbot + MCLIM_REDZONE; |
1852 | asm_setup_target(as); | 2361 | asm_setup_target(as); |
1853 | 2362 | ||
1854 | do { | 2363 | /* |
2364 | ** This is a loop, because the MCode may have to be (re-)assembled | ||
2365 | ** multiple times: | ||
2366 | ** | ||
2367 | ** 1. as->realign is set (and the assembly aborted), if the arch-specific | ||
2368 | ** backend wants the MCode to be aligned differently. | ||
2369 | ** | ||
2370 | ** This is currently only the case on x86/x64, where small loops get | ||
2371 | ** an aligned loop body plus a short branch. Not much effort is wasted, | ||
2372 | ** because the abort happens very quickly and only once. | ||
2373 | ** | ||
2374 | ** 2. The IR is immovable, since the MCode embeds pointers to various | ||
2375 | ** constants inside the IR. But RENAMEs may need to be added to the IR | ||
2376 | ** during assembly, which might grow and reallocate the IR. We check | ||
2377 | ** at the end if the IR (in J->cur.ir) has actually grown, resize the | ||
2378 | ** copy (in J->curfinal.ir) and try again. | ||
2379 | ** | ||
2380 | ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have | ||
2381 | ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to | ||
2382 | ** always have one spare slot in the IR (see above), which means we | ||
2383 | ** have to redo the assembly for only ~2% of all traces. | ||
2384 | ** | ||
2385 | ** Very, very rarely, this needs to be done repeatedly, since the | ||
2386 | ** location of constants inside the IR (actually, reachability from | ||
2387 | ** a global pointer) may affect register allocation and thus the | ||
2388 | ** number of RENAMEs. | ||
2389 | */ | ||
2390 | for (;;) { | ||
1855 | as->mcp = as->mctop; | 2391 | as->mcp = as->mctop; |
1856 | #ifdef LUA_USE_ASSERT | 2392 | #ifdef LUA_USE_ASSERT |
1857 | as->mcp_prev = as->mcp; | 2393 | as->mcp_prev = as->mcp; |
1858 | #endif | 2394 | #endif |
1859 | as->curins = T->nins; | 2395 | as->ir = J->curfinal->ir; /* Use the copied IR. */ |
2396 | as->curins = J->cur.nins = as->orignins; | ||
2397 | |||
1860 | RA_DBG_START(); | 2398 | RA_DBG_START(); |
1861 | RA_DBGX((as, "===== STOP =====")); | 2399 | RA_DBGX((as, "===== STOP =====")); |
1862 | 2400 | ||
@@ -1875,7 +2413,10 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1875 | /* Assemble a trace in linear backwards order. */ | 2413 | /* Assemble a trace in linear backwards order. */ |
1876 | for (as->curins--; as->curins > as->stopins; as->curins--) { | 2414 | for (as->curins--; as->curins > as->stopins; as->curins--) { |
1877 | IRIns *ir = IR(as->curins); | 2415 | IRIns *ir = IR(as->curins); |
1878 | lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ | 2416 | /* 64 bit types handled by SPLIT for 32 bit archs. */ |
2417 | lj_assertA(!(LJ_32 && irt_isint64(ir->t)), | ||
2418 | "IR %04d has unsplit 64 bit type", | ||
2419 | (int)(ir - as->ir) - REF_BIAS); | ||
1879 | if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) | 2420 | if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) |
1880 | continue; /* Dead-code elimination can be soooo easy. */ | 2421 | continue; /* Dead-code elimination can be soooo easy. */ |
1881 | if (irt_isguard(ir->t)) | 2422 | if (irt_isguard(ir->t)) |
@@ -1884,22 +2425,40 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1884 | checkmclim(as); | 2425 | checkmclim(as); |
1885 | asm_ir(as, ir); | 2426 | asm_ir(as, ir); |
1886 | } | 2427 | } |
1887 | } while (as->realign); /* Retry in case the MCode needs to be realigned. */ | ||
1888 | 2428 | ||
1889 | /* Emit head of trace. */ | 2429 | if (as->realign && J->curfinal->nins >= T->nins) |
1890 | RA_DBG_REF(); | 2430 | continue; /* Retry in case only the MCode needs to be realigned. */ |
1891 | checkmclim(as); | 2431 | |
1892 | if (as->gcsteps > 0) { | 2432 | /* Emit head of trace. */ |
1893 | as->curins = as->T->snap[0].ref; | 2433 | RA_DBG_REF(); |
1894 | asm_snap_prep(as); /* The GC check is a guard. */ | 2434 | checkmclim(as); |
1895 | asm_gc_check(as); | 2435 | if (as->gcsteps > 0) { |
2436 | as->curins = as->T->snap[0].ref; | ||
2437 | asm_snap_prep(as); /* The GC check is a guard. */ | ||
2438 | asm_gc_check(as); | ||
2439 | as->curins = as->stopins; | ||
2440 | } | ||
2441 | ra_evictk(as); | ||
2442 | if (as->parent) | ||
2443 | asm_head_side(as); | ||
2444 | else | ||
2445 | asm_head_root(as); | ||
2446 | asm_phi_fixup(as); | ||
2447 | |||
2448 | if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */ | ||
2449 | lj_assertA(J->curfinal->nk == T->nk, "unexpected IR constant growth"); | ||
2450 | memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins, | ||
2451 | (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */ | ||
2452 | T->nins = J->curfinal->nins; | ||
2453 | break; /* Done. */ | ||
2454 | } | ||
2455 | |||
2456 | /* Otherwise try again with a bigger IR. */ | ||
2457 | lj_trace_free(J2G(J), J->curfinal); | ||
2458 | J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */ | ||
2459 | J->curfinal = lj_trace_alloc(J->L, T); | ||
2460 | as->realign = NULL; | ||
1896 | } | 2461 | } |
1897 | ra_evictk(as); | ||
1898 | if (as->parent) | ||
1899 | asm_head_side(as); | ||
1900 | else | ||
1901 | asm_head_root(as); | ||
1902 | asm_phi_fixup(as); | ||
1903 | 2462 | ||
1904 | RA_DBGX((as, "===== START ====")); | 2463 | RA_DBGX((as, "===== START ====")); |
1905 | RA_DBG_FLUSH(); | 2464 | RA_DBG_FLUSH(); |
@@ -1912,6 +2471,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1912 | if (!as->loopref) | 2471 | if (!as->loopref) |
1913 | asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ | 2472 | asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ |
1914 | T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); | 2473 | T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); |
2474 | #if LJ_TARGET_MCODE_FIXUP | ||
2475 | asm_mcode_fixup(T->mcode, T->szmcode); | ||
2476 | #endif | ||
1915 | lj_mcode_sync(T->mcode, origtop); | 2477 | lj_mcode_sync(T->mcode, origtop); |
1916 | } | 2478 | } |
1917 | 2479 | ||