diff options
Diffstat (limited to 'src/lj_asm.c')
-rw-r--r-- | src/lj_asm.c | 1021 |
1 files changed, 838 insertions, 183 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 9ff9215f..7abafbf4 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #if LJ_HASJIT | 11 | #if LJ_HASJIT |
12 | 12 | ||
13 | #include "lj_gc.h" | 13 | #include "lj_gc.h" |
14 | #include "lj_buf.h" | ||
14 | #include "lj_str.h" | 15 | #include "lj_str.h" |
15 | #include "lj_tab.h" | 16 | #include "lj_tab.h" |
16 | #include "lj_frame.h" | 17 | #include "lj_frame.h" |
@@ -71,6 +72,7 @@ typedef struct ASMState { | |||
71 | IRRef snaprename; /* Rename highwater mark for snapshot check. */ | 72 | IRRef snaprename; /* Rename highwater mark for snapshot check. */ |
72 | SnapNo snapno; /* Current snapshot number. */ | 73 | SnapNo snapno; /* Current snapshot number. */ |
73 | SnapNo loopsnapno; /* Loop snapshot number. */ | 74 | SnapNo loopsnapno; /* Loop snapshot number. */ |
75 | int snapalloc; /* Current snapshot needs allocation. */ | ||
74 | BloomFilter snapfilt1, snapfilt2; /* Filled with snapshot refs. */ | 76 | BloomFilter snapfilt1, snapfilt2; /* Filled with snapshot refs. */ |
75 | 77 | ||
76 | IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */ | 78 | IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */ |
@@ -85,18 +87,25 @@ typedef struct ASMState { | |||
85 | 87 | ||
86 | MCode *mcbot; /* Bottom of reserved MCode. */ | 88 | MCode *mcbot; /* Bottom of reserved MCode. */ |
87 | MCode *mctop; /* Top of generated MCode. */ | 89 | MCode *mctop; /* Top of generated MCode. */ |
90 | MCode *mctoporig; /* Original top of generated MCode. */ | ||
88 | MCode *mcloop; /* Pointer to loop MCode (or NULL). */ | 91 | MCode *mcloop; /* Pointer to loop MCode (or NULL). */ |
89 | MCode *invmcp; /* Points to invertible loop branch (or NULL). */ | 92 | MCode *invmcp; /* Points to invertible loop branch (or NULL). */ |
90 | MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ | 93 | MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ |
91 | MCode *realign; /* Realign loop if not NULL. */ | 94 | MCode *realign; /* Realign loop if not NULL. */ |
92 | 95 | ||
93 | #ifdef RID_NUM_KREF | 96 | #ifdef RID_NUM_KREF |
94 | int32_t krefk[RID_NUM_KREF]; | 97 | intptr_t krefk[RID_NUM_KREF]; |
95 | #endif | 98 | #endif |
96 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ | 99 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ |
97 | uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ | 100 | uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ |
98 | } ASMState; | 101 | } ASMState; |
99 | 102 | ||
103 | #ifdef LUA_USE_ASSERT | ||
104 | #define lj_assertA(c, ...) lj_assertG_(J2G(as->J), (c), __VA_ARGS__) | ||
105 | #else | ||
106 | #define lj_assertA(c, ...) ((void)as) | ||
107 | #endif | ||
108 | |||
100 | #define IR(ref) (&as->ir[(ref)]) | 109 | #define IR(ref) (&as->ir[(ref)]) |
101 | 110 | ||
102 | #define ASMREF_TMP1 REF_TRUE /* Temp. register. */ | 111 | #define ASMREF_TMP1 REF_TRUE /* Temp. register. */ |
@@ -128,9 +137,8 @@ static LJ_AINLINE void checkmclim(ASMState *as) | |||
128 | #ifdef LUA_USE_ASSERT | 137 | #ifdef LUA_USE_ASSERT |
129 | if (as->mcp + MCLIM_REDZONE < as->mcp_prev) { | 138 | if (as->mcp + MCLIM_REDZONE < as->mcp_prev) { |
130 | IRIns *ir = IR(as->curins+1); | 139 | IRIns *ir = IR(as->curins+1); |
131 | fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d %02d %04d %04d\n", as->mcp, | 140 | lj_assertA(0, "red zone overflow: %p IR %04d %02d %04d %04d\n", as->mcp, |
132 | as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); | 141 | as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); |
133 | lua_assert(0); | ||
134 | } | 142 | } |
135 | #endif | 143 | #endif |
136 | if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as); | 144 | if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as); |
@@ -144,7 +152,7 @@ static LJ_AINLINE void checkmclim(ASMState *as) | |||
144 | #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) | 152 | #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) |
145 | #define ra_krefk(as, ref) (as->krefk[(ref)]) | 153 | #define ra_krefk(as, ref) (as->krefk[(ref)]) |
146 | 154 | ||
147 | static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) | 155 | static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k) |
148 | { | 156 | { |
149 | IRRef ref = (IRRef)(r - RID_MIN_KREF); | 157 | IRRef ref = (IRRef)(r - RID_MIN_KREF); |
150 | as->krefk[ref] = k; | 158 | as->krefk[ref] = k; |
@@ -171,6 +179,8 @@ IRFLDEF(FLOFS) | |||
171 | #include "lj_emit_x86.h" | 179 | #include "lj_emit_x86.h" |
172 | #elif LJ_TARGET_ARM | 180 | #elif LJ_TARGET_ARM |
173 | #include "lj_emit_arm.h" | 181 | #include "lj_emit_arm.h" |
182 | #elif LJ_TARGET_ARM64 | ||
183 | #include "lj_emit_arm64.h" | ||
174 | #elif LJ_TARGET_PPC | 184 | #elif LJ_TARGET_PPC |
175 | #include "lj_emit_ppc.h" | 185 | #include "lj_emit_ppc.h" |
176 | #elif LJ_TARGET_MIPS | 186 | #elif LJ_TARGET_MIPS |
@@ -179,6 +189,12 @@ IRFLDEF(FLOFS) | |||
179 | #error "Missing instruction emitter for target CPU" | 189 | #error "Missing instruction emitter for target CPU" |
180 | #endif | 190 | #endif |
181 | 191 | ||
192 | /* Generic load/store of register from/to stack slot. */ | ||
193 | #define emit_spload(as, ir, r, ofs) \ | ||
194 | emit_loadofs(as, ir, (r), RID_SP, (ofs)) | ||
195 | #define emit_spstore(as, ir, r, ofs) \ | ||
196 | emit_storeofs(as, ir, (r), RID_SP, (ofs)) | ||
197 | |||
182 | /* -- Register allocator debugging ---------------------------------------- */ | 198 | /* -- Register allocator debugging ---------------------------------------- */ |
183 | 199 | ||
184 | /* #define LUAJIT_DEBUG_RA */ | 200 | /* #define LUAJIT_DEBUG_RA */ |
@@ -236,7 +252,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) | |||
236 | *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q; | 252 | *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q; |
237 | } else { | 253 | } else { |
238 | *p++ = '?'; | 254 | *p++ = '?'; |
239 | lua_assert(0); | 255 | lj_assertA(0, "bad register %d for debug format \"%s\"", r, fmt); |
240 | } | 256 | } |
241 | } else if (e[1] == 'f' || e[1] == 'i') { | 257 | } else if (e[1] == 'f' || e[1] == 'i') { |
242 | IRRef ref; | 258 | IRRef ref; |
@@ -254,7 +270,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) | |||
254 | } else if (e[1] == 'x') { | 270 | } else if (e[1] == 'x') { |
255 | p += sprintf(p, "%08x", va_arg(argp, int32_t)); | 271 | p += sprintf(p, "%08x", va_arg(argp, int32_t)); |
256 | } else { | 272 | } else { |
257 | lua_assert(0); | 273 | lj_assertA(0, "bad debug format code"); |
258 | } | 274 | } |
259 | fmt = e+2; | 275 | fmt = e+2; |
260 | } | 276 | } |
@@ -313,37 +329,51 @@ static Reg ra_rematk(ASMState *as, IRRef ref) | |||
313 | Reg r; | 329 | Reg r; |
314 | if (ra_iskref(ref)) { | 330 | if (ra_iskref(ref)) { |
315 | r = ra_krefreg(ref); | 331 | r = ra_krefreg(ref); |
316 | lua_assert(!rset_test(as->freeset, r)); | 332 | lj_assertA(!rset_test(as->freeset, r), "rematk of free reg %d", r); |
317 | ra_free(as, r); | 333 | ra_free(as, r); |
318 | ra_modified(as, r); | 334 | ra_modified(as, r); |
335 | #if LJ_64 | ||
336 | emit_loadu64(as, r, ra_krefk(as, ref)); | ||
337 | #else | ||
319 | emit_loadi(as, r, ra_krefk(as, ref)); | 338 | emit_loadi(as, r, ra_krefk(as, ref)); |
339 | #endif | ||
320 | return r; | 340 | return r; |
321 | } | 341 | } |
322 | ir = IR(ref); | 342 | ir = IR(ref); |
323 | r = ir->r; | 343 | r = ir->r; |
324 | lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); | 344 | lj_assertA(ra_hasreg(r), "rematk of K%03d has no reg", REF_BIAS - ref); |
345 | lj_assertA(!ra_hasspill(ir->s), | ||
346 | "rematk of K%03d has spill slot [%x]", REF_BIAS - ref, ir->s); | ||
325 | ra_free(as, r); | 347 | ra_free(as, r); |
326 | ra_modified(as, r); | 348 | ra_modified(as, r); |
327 | ir->r = RID_INIT; /* Do not keep any hint. */ | 349 | ir->r = RID_INIT; /* Do not keep any hint. */ |
328 | RA_DBGX((as, "remat $i $r", ir, r)); | 350 | RA_DBGX((as, "remat $i $r", ir, r)); |
329 | #if !LJ_SOFTFP | 351 | #if !LJ_SOFTFP32 |
330 | if (ir->o == IR_KNUM) { | 352 | if (ir->o == IR_KNUM) { |
331 | emit_loadn(as, r, ir_knum(ir)); | 353 | emit_loadk64(as, r, ir); |
332 | } else | 354 | } else |
333 | #endif | 355 | #endif |
334 | if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { | 356 | if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { |
335 | ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ | 357 | ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ |
336 | emit_getgl(as, r, jit_base); | 358 | emit_getgl(as, r, jit_base); |
337 | } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { | 359 | } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { |
338 | lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ | 360 | /* REF_NIL stores ASMREF_L register. */ |
339 | emit_getgl(as, r, jit_L); | 361 | lj_assertA(irt_isnil(ir->t), "rematk of bad ASMREF_L"); |
362 | emit_getgl(as, r, cur_L); | ||
340 | #if LJ_64 | 363 | #if LJ_64 |
341 | } else if (ir->o == IR_KINT64) { | 364 | } else if (ir->o == IR_KINT64) { |
342 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 365 | emit_loadu64(as, r, ir_kint64(ir)->u64); |
366 | #if LJ_GC64 | ||
367 | } else if (ir->o == IR_KGC) { | ||
368 | emit_loadu64(as, r, (uintptr_t)ir_kgc(ir)); | ||
369 | } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
370 | emit_loadu64(as, r, (uintptr_t)ir_kptr(ir)); | ||
371 | #endif | ||
343 | #endif | 372 | #endif |
344 | } else { | 373 | } else { |
345 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | 374 | lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC || |
346 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); | 375 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL, |
376 | "rematk of bad IR op %d", ir->o); | ||
347 | emit_loadi(as, r, ir->i); | 377 | emit_loadi(as, r, ir->i); |
348 | } | 378 | } |
349 | return r; | 379 | return r; |
@@ -353,7 +383,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref) | |||
353 | static int32_t ra_spill(ASMState *as, IRIns *ir) | 383 | static int32_t ra_spill(ASMState *as, IRIns *ir) |
354 | { | 384 | { |
355 | int32_t slot = ir->s; | 385 | int32_t slot = ir->s; |
356 | lua_assert(ir >= as->ir + REF_TRUE); | 386 | lj_assertA(ir >= as->ir + REF_TRUE, |
387 | "spill of K%03d", REF_BIAS - (int)(ir - as->ir)); | ||
357 | if (!ra_hasspill(slot)) { | 388 | if (!ra_hasspill(slot)) { |
358 | if (irt_is64(ir->t)) { | 389 | if (irt_is64(ir->t)) { |
359 | slot = as->evenspill; | 390 | slot = as->evenspill; |
@@ -378,7 +409,9 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref) | |||
378 | { | 409 | { |
379 | IRIns *ir = IR(ref); | 410 | IRIns *ir = IR(ref); |
380 | Reg r = ir->r; | 411 | Reg r = ir->r; |
381 | lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); | 412 | lj_assertA(ra_hasreg(r), "release of TMP%d has no reg", ref-ASMREF_TMP1+1); |
413 | lj_assertA(!ra_hasspill(ir->s), | ||
414 | "release of TMP%d has spill slot [%x]", ref-ASMREF_TMP1+1, ir->s); | ||
382 | ra_free(as, r); | 415 | ra_free(as, r); |
383 | ra_modified(as, r); | 416 | ra_modified(as, r); |
384 | ir->r = RID_INIT; | 417 | ir->r = RID_INIT; |
@@ -394,7 +427,7 @@ static Reg ra_restore(ASMState *as, IRRef ref) | |||
394 | IRIns *ir = IR(ref); | 427 | IRIns *ir = IR(ref); |
395 | int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ | 428 | int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ |
396 | Reg r = ir->r; | 429 | Reg r = ir->r; |
397 | lua_assert(ra_hasreg(r)); | 430 | lj_assertA(ra_hasreg(r), "restore of IR %04d has no reg", ref - REF_BIAS); |
398 | ra_sethint(ir->r, r); /* Keep hint. */ | 431 | ra_sethint(ir->r, r); /* Keep hint. */ |
399 | ra_free(as, r); | 432 | ra_free(as, r); |
400 | if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ | 433 | if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ |
@@ -423,14 +456,15 @@ static Reg ra_evict(ASMState *as, RegSet allow) | |||
423 | { | 456 | { |
424 | IRRef ref; | 457 | IRRef ref; |
425 | RegCost cost = ~(RegCost)0; | 458 | RegCost cost = ~(RegCost)0; |
426 | lua_assert(allow != RSET_EMPTY); | 459 | lj_assertA(allow != RSET_EMPTY, "evict from empty set"); |
427 | if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) { | 460 | if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) { |
428 | GPRDEF(MINCOST) | 461 | GPRDEF(MINCOST) |
429 | } else { | 462 | } else { |
430 | FPRDEF(MINCOST) | 463 | FPRDEF(MINCOST) |
431 | } | 464 | } |
432 | ref = regcost_ref(cost); | 465 | ref = regcost_ref(cost); |
433 | lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins)); | 466 | lj_assertA(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins), |
467 | "evict of out-of-range IR %04d", ref - REF_BIAS); | ||
434 | /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ | 468 | /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ |
435 | if (!irref_isk(ref) && (as->weakset & allow)) { | 469 | if (!irref_isk(ref) && (as->weakset & allow)) { |
436 | IRIns *ir = IR(ref); | 470 | IRIns *ir = IR(ref); |
@@ -512,7 +546,7 @@ static void ra_evictk(ASMState *as) | |||
512 | 546 | ||
513 | #ifdef RID_NUM_KREF | 547 | #ifdef RID_NUM_KREF |
514 | /* Allocate a register for a constant. */ | 548 | /* Allocate a register for a constant. */ |
515 | static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | 549 | static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow) |
516 | { | 550 | { |
517 | /* First try to find a register which already holds the same constant. */ | 551 | /* First try to find a register which already holds the same constant. */ |
518 | RegSet pick, work = ~as->freeset & RSET_GPR; | 552 | RegSet pick, work = ~as->freeset & RSET_GPR; |
@@ -521,9 +555,31 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | |||
521 | IRRef ref; | 555 | IRRef ref; |
522 | r = rset_pickbot(work); | 556 | r = rset_pickbot(work); |
523 | ref = regcost_ref(as->cost[r]); | 557 | ref = regcost_ref(as->cost[r]); |
558 | #if LJ_64 | ||
559 | if (ref < ASMREF_L) { | ||
560 | if (ra_iskref(ref)) { | ||
561 | if (k == ra_krefk(as, ref)) | ||
562 | return r; | ||
563 | } else { | ||
564 | IRIns *ir = IR(ref); | ||
565 | if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || | ||
566 | #if LJ_GC64 | ||
567 | (ir->o == IR_KINT && k == ir->i) || | ||
568 | (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || | ||
569 | ((ir->o == IR_KPTR || ir->o == IR_KKPTR) && | ||
570 | k == (intptr_t)ir_kptr(ir)) | ||
571 | #else | ||
572 | (ir->o != IR_KINT64 && k == ir->i) | ||
573 | #endif | ||
574 | ) | ||
575 | return r; | ||
576 | } | ||
577 | } | ||
578 | #else | ||
524 | if (ref < ASMREF_L && | 579 | if (ref < ASMREF_L && |
525 | k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) | 580 | k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) |
526 | return r; | 581 | return r; |
582 | #endif | ||
527 | rset_clear(work, r); | 583 | rset_clear(work, r); |
528 | } | 584 | } |
529 | pick = as->freeset & allow; | 585 | pick = as->freeset & allow; |
@@ -543,7 +599,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | |||
543 | } | 599 | } |
544 | 600 | ||
545 | /* Allocate a specific register for a constant. */ | 601 | /* Allocate a specific register for a constant. */ |
546 | static void ra_allockreg(ASMState *as, int32_t k, Reg r) | 602 | static void ra_allockreg(ASMState *as, intptr_t k, Reg r) |
547 | { | 603 | { |
548 | Reg kr = ra_allock(as, k, RID2RSET(r)); | 604 | Reg kr = ra_allock(as, k, RID2RSET(r)); |
549 | if (kr != r) { | 605 | if (kr != r) { |
@@ -566,7 +622,8 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow) | |||
566 | IRIns *ir = IR(ref); | 622 | IRIns *ir = IR(ref); |
567 | RegSet pick = as->freeset & allow; | 623 | RegSet pick = as->freeset & allow; |
568 | Reg r; | 624 | Reg r; |
569 | lua_assert(ra_noreg(ir->r)); | 625 | lj_assertA(ra_noreg(ir->r), |
626 | "IR %04d already has reg %d", ref - REF_BIAS, ir->r); | ||
570 | if (pick) { | 627 | if (pick) { |
571 | /* First check register hint from propagation or PHI. */ | 628 | /* First check register hint from propagation or PHI. */ |
572 | if (ra_hashint(ir->r)) { | 629 | if (ra_hashint(ir->r)) { |
@@ -613,15 +670,27 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow) | |||
613 | return r; | 670 | return r; |
614 | } | 671 | } |
615 | 672 | ||
673 | /* Add a register rename to the IR. */ | ||
674 | static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno) | ||
675 | { | ||
676 | IRRef ren; | ||
677 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno); | ||
678 | ren = tref_ref(lj_ir_emit(as->J)); | ||
679 | as->J->cur.ir[ren].r = (uint8_t)down; | ||
680 | as->J->cur.ir[ren].s = SPS_NONE; | ||
681 | } | ||
682 | |||
616 | /* Rename register allocation and emit move. */ | 683 | /* Rename register allocation and emit move. */ |
617 | static void ra_rename(ASMState *as, Reg down, Reg up) | 684 | static void ra_rename(ASMState *as, Reg down, Reg up) |
618 | { | 685 | { |
619 | IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); | 686 | IRRef ref = regcost_ref(as->cost[up] = as->cost[down]); |
620 | IRIns *ir = IR(ref); | 687 | IRIns *ir = IR(ref); |
621 | ir->r = (uint8_t)up; | 688 | ir->r = (uint8_t)up; |
622 | as->cost[down] = 0; | 689 | as->cost[down] = 0; |
623 | lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); | 690 | lj_assertA((down < RID_MAX_GPR) == (up < RID_MAX_GPR), |
624 | lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); | 691 | "rename between GPR/FPR %d and %d", down, up); |
692 | lj_assertA(!rset_test(as->freeset, down), "rename from free reg %d", down); | ||
693 | lj_assertA(rset_test(as->freeset, up), "rename to non-free reg %d", up); | ||
625 | ra_free(as, down); /* 'down' is free ... */ | 694 | ra_free(as, down); /* 'down' is free ... */ |
626 | ra_modified(as, down); | 695 | ra_modified(as, down); |
627 | rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ | 696 | rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ |
@@ -629,11 +698,14 @@ static void ra_rename(ASMState *as, Reg down, Reg up) | |||
629 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); | 698 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); |
630 | emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ | 699 | emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ |
631 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ | 700 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ |
632 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); | 701 | /* |
633 | ren = tref_ref(lj_ir_emit(as->J)); | 702 | ** The rename is effective at the subsequent (already emitted) exit |
634 | as->ir = as->T->ir; /* The IR may have been reallocated. */ | 703 | ** branch. This is for the current snapshot (as->snapno). Except if we |
635 | IR(ren)->r = (uint8_t)down; | 704 | ** haven't yet allocated any refs for the snapshot (as->snapalloc == 1), |
636 | IR(ren)->s = SPS_NONE; | 705 | ** then it belongs to the next snapshot. |
706 | ** See also the discussion at asm_snap_checkrename(). | ||
707 | */ | ||
708 | ra_addrename(as, down, ref, as->snapno + as->snapalloc); | ||
637 | } | 709 | } |
638 | } | 710 | } |
639 | 711 | ||
@@ -666,7 +738,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r) | |||
666 | { | 738 | { |
667 | Reg dest = ra_dest(as, ir, RID2RSET(r)); | 739 | Reg dest = ra_dest(as, ir, RID2RSET(r)); |
668 | if (dest != r) { | 740 | if (dest != r) { |
669 | lua_assert(rset_test(as->freeset, r)); | 741 | lj_assertA(rset_test(as->freeset, r), "dest reg %d is not free", r); |
670 | ra_modified(as, r); | 742 | ra_modified(as, r); |
671 | emit_movrr(as, ir, dest, r); | 743 | emit_movrr(as, ir, dest, r); |
672 | } | 744 | } |
@@ -683,20 +755,25 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) | |||
683 | if (ra_noreg(left)) { | 755 | if (ra_noreg(left)) { |
684 | if (irref_isk(lref)) { | 756 | if (irref_isk(lref)) { |
685 | if (ir->o == IR_KNUM) { | 757 | if (ir->o == IR_KNUM) { |
686 | cTValue *tv = ir_knum(ir); | ||
687 | /* FP remat needs a load except for +0. Still better than eviction. */ | 758 | /* FP remat needs a load except for +0. Still better than eviction. */ |
688 | if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { | 759 | if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) { |
689 | emit_loadn(as, dest, tv); | 760 | emit_loadk64(as, dest, ir); |
690 | return; | 761 | return; |
691 | } | 762 | } |
692 | #if LJ_64 | 763 | #if LJ_64 |
693 | } else if (ir->o == IR_KINT64) { | 764 | } else if (ir->o == IR_KINT64) { |
694 | emit_loadu64(as, dest, ir_kint64(ir)->u64); | 765 | emit_loadk64(as, dest, ir); |
766 | return; | ||
767 | #if LJ_GC64 | ||
768 | } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
769 | emit_loadk64(as, dest, ir); | ||
695 | return; | 770 | return; |
696 | #endif | 771 | #endif |
697 | } else { | 772 | #endif |
698 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | 773 | } else if (ir->o != IR_KPRI) { |
699 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); | 774 | lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC || |
775 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL, | ||
776 | "K%03d has bad IR op %d", REF_BIAS - lref, ir->o); | ||
700 | emit_loadi(as, dest, ir->i); | 777 | emit_loadi(as, dest, ir->i); |
701 | return; | 778 | return; |
702 | } | 779 | } |
@@ -741,11 +818,11 @@ static void ra_leftov(ASMState *as, Reg dest, IRRef lref) | |||
741 | } | 818 | } |
742 | #endif | 819 | #endif |
743 | 820 | ||
744 | #if !LJ_64 | ||
745 | /* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */ | 821 | /* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */ |
746 | static void ra_destpair(ASMState *as, IRIns *ir) | 822 | static void ra_destpair(ASMState *as, IRIns *ir) |
747 | { | 823 | { |
748 | Reg destlo = ir->r, desthi = (ir+1)->r; | 824 | Reg destlo = ir->r, desthi = (ir+1)->r; |
825 | IRIns *irx = (LJ_64 && !irt_is64(ir->t)) ? ir+1 : ir; | ||
749 | /* First spill unrelated refs blocking the destination registers. */ | 826 | /* First spill unrelated refs blocking the destination registers. */ |
750 | if (!rset_test(as->freeset, RID_RETLO) && | 827 | if (!rset_test(as->freeset, RID_RETLO) && |
751 | destlo != RID_RETLO && desthi != RID_RETLO) | 828 | destlo != RID_RETLO && desthi != RID_RETLO) |
@@ -769,29 +846,28 @@ static void ra_destpair(ASMState *as, IRIns *ir) | |||
769 | /* Check for conflicts and shuffle the registers as needed. */ | 846 | /* Check for conflicts and shuffle the registers as needed. */ |
770 | if (destlo == RID_RETHI) { | 847 | if (destlo == RID_RETHI) { |
771 | if (desthi == RID_RETLO) { | 848 | if (desthi == RID_RETLO) { |
772 | #if LJ_TARGET_X86 | 849 | #if LJ_TARGET_X86ORX64 |
773 | *--as->mcp = XI_XCHGa + RID_RETHI; | 850 | *--as->mcp = REX_64IR(irx, XI_XCHGa + RID_RETHI); |
774 | #else | 851 | #else |
775 | emit_movrr(as, ir, RID_RETHI, RID_TMP); | 852 | emit_movrr(as, irx, RID_RETHI, RID_TMP); |
776 | emit_movrr(as, ir, RID_RETLO, RID_RETHI); | 853 | emit_movrr(as, irx, RID_RETLO, RID_RETHI); |
777 | emit_movrr(as, ir, RID_TMP, RID_RETLO); | 854 | emit_movrr(as, irx, RID_TMP, RID_RETLO); |
778 | #endif | 855 | #endif |
779 | } else { | 856 | } else { |
780 | emit_movrr(as, ir, RID_RETHI, RID_RETLO); | 857 | emit_movrr(as, irx, RID_RETHI, RID_RETLO); |
781 | if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); | 858 | if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI); |
782 | } | 859 | } |
783 | } else if (desthi == RID_RETLO) { | 860 | } else if (desthi == RID_RETLO) { |
784 | emit_movrr(as, ir, RID_RETLO, RID_RETHI); | 861 | emit_movrr(as, irx, RID_RETLO, RID_RETHI); |
785 | if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); | 862 | if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO); |
786 | } else { | 863 | } else { |
787 | if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); | 864 | if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI); |
788 | if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); | 865 | if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO); |
789 | } | 866 | } |
790 | /* Restore spill slots (if any). */ | 867 | /* Restore spill slots (if any). */ |
791 | if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI); | 868 | if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI); |
792 | if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO); | 869 | if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO); |
793 | } | 870 | } |
794 | #endif | ||
795 | 871 | ||
796 | /* -- Snapshot handling --------- ----------------------------------------- */ | 872 | /* -- Snapshot handling --------- ----------------------------------------- */ |
797 | 873 | ||
@@ -841,11 +917,14 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref) | |||
841 | #endif | 917 | #endif |
842 | { /* Allocate stored values for TNEW, TDUP and CNEW. */ | 918 | { /* Allocate stored values for TNEW, TDUP and CNEW. */ |
843 | IRIns *irs; | 919 | IRIns *irs; |
844 | lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW); | 920 | lj_assertA(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW, |
921 | "sink of IR %04d has bad op %d", ref - REF_BIAS, ir->o); | ||
845 | for (irs = IR(as->snapref-1); irs > ir; irs--) | 922 | for (irs = IR(as->snapref-1); irs > ir; irs--) |
846 | if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) { | 923 | if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) { |
847 | lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || | 924 | lj_assertA(irs->o == IR_ASTORE || irs->o == IR_HSTORE || |
848 | irs->o == IR_FSTORE || irs->o == IR_XSTORE); | 925 | irs->o == IR_FSTORE || irs->o == IR_XSTORE, |
926 | "sunk store IR %04d has bad op %d", | ||
927 | (int)(irs - as->ir) - REF_BIAS, irs->o); | ||
849 | asm_snap_alloc1(as, irs->op2); | 928 | asm_snap_alloc1(as, irs->op2); |
850 | if (LJ_32 && (irs+1)->o == IR_HIOP) | 929 | if (LJ_32 && (irs+1)->o == IR_HIOP) |
851 | asm_snap_alloc1(as, (irs+1)->op2); | 930 | asm_snap_alloc1(as, (irs+1)->op2); |
@@ -881,9 +960,9 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref) | |||
881 | } | 960 | } |
882 | 961 | ||
883 | /* Allocate refs escaping to a snapshot. */ | 962 | /* Allocate refs escaping to a snapshot. */ |
884 | static void asm_snap_alloc(ASMState *as) | 963 | static void asm_snap_alloc(ASMState *as, int snapno) |
885 | { | 964 | { |
886 | SnapShot *snap = &as->T->snap[as->snapno]; | 965 | SnapShot *snap = &as->T->snap[snapno]; |
887 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 966 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
888 | MSize n, nent = snap->nent; | 967 | MSize n, nent = snap->nent; |
889 | as->snapfilt1 = as->snapfilt2 = 0; | 968 | as->snapfilt1 = as->snapfilt2 = 0; |
@@ -893,7 +972,9 @@ static void asm_snap_alloc(ASMState *as) | |||
893 | if (!irref_isk(ref)) { | 972 | if (!irref_isk(ref)) { |
894 | asm_snap_alloc1(as, ref); | 973 | asm_snap_alloc1(as, ref); |
895 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { | 974 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { |
896 | lua_assert(irt_type(IR(ref+1)->t) == IRT_SOFTFP); | 975 | lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP, |
976 | "snap %d[%d] points to bad SOFTFP IR %04d", | ||
977 | snapno, n, ref - REF_BIAS); | ||
897 | asm_snap_alloc1(as, ref+1); | 978 | asm_snap_alloc1(as, ref+1); |
898 | } | 979 | } |
899 | } | 980 | } |
@@ -919,67 +1000,55 @@ static int asm_snap_checkrename(ASMState *as, IRRef ren) | |||
919 | return 0; /* Not found. */ | 1000 | return 0; /* Not found. */ |
920 | } | 1001 | } |
921 | 1002 | ||
922 | /* Prepare snapshot for next guard instruction. */ | 1003 | /* Prepare snapshot for next guard or throwing instruction. */ |
923 | static void asm_snap_prep(ASMState *as) | 1004 | static void asm_snap_prep(ASMState *as) |
924 | { | 1005 | { |
925 | if (as->curins < as->snapref) { | 1006 | if (as->snapalloc) { |
926 | do { | 1007 | /* Alloc on first invocation for each snapshot. */ |
927 | if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */ | 1008 | as->snapalloc = 0; |
928 | as->snapno--; | 1009 | asm_snap_alloc(as, as->snapno); |
929 | as->snapref = as->T->snap[as->snapno].ref; | ||
930 | } while (as->curins < as->snapref); | ||
931 | asm_snap_alloc(as); | ||
932 | as->snaprename = as->T->nins; | 1010 | as->snaprename = as->T->nins; |
933 | } else { | 1011 | } else { |
934 | /* Process any renames above the highwater mark. */ | 1012 | /* Check any renames above the highwater mark. */ |
935 | for (; as->snaprename < as->T->nins; as->snaprename++) { | 1013 | for (; as->snaprename < as->T->nins; as->snaprename++) { |
936 | IRIns *ir = IR(as->snaprename); | 1014 | IRIns *ir = &as->T->ir[as->snaprename]; |
937 | if (asm_snap_checkrename(as, ir->op1)) | 1015 | if (asm_snap_checkrename(as, ir->op1)) |
938 | ir->op2 = REF_BIAS-1; /* Kill rename. */ | 1016 | ir->op2 = REF_BIAS-1; /* Kill rename. */ |
939 | } | 1017 | } |
940 | } | 1018 | } |
941 | } | 1019 | } |
942 | 1020 | ||
943 | /* -- Miscellaneous helpers ----------------------------------------------- */ | 1021 | /* Move to previous snapshot when we cross the current snapshot ref. */ |
944 | 1022 | static void asm_snap_prev(ASMState *as) | |
945 | /* Collect arguments from CALL* and CARG instructions. */ | ||
946 | static void asm_collectargs(ASMState *as, IRIns *ir, | ||
947 | const CCallInfo *ci, IRRef *args) | ||
948 | { | 1023 | { |
949 | uint32_t n = CCI_NARGS(ci); | 1024 | if (as->curins < as->snapref) { |
950 | lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ | 1025 | uintptr_t ofs = (uintptr_t)(as->mctoporig - as->mcp); |
951 | if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } | 1026 | if (ofs >= 0x10000) lj_trace_err(as->J, LJ_TRERR_MCODEOV); |
952 | while (n-- > 1) { | 1027 | do { |
953 | ir = IR(ir->op1); | 1028 | if (as->snapno == 0) return; |
954 | lua_assert(ir->o == IR_CARG); | 1029 | as->snapno--; |
955 | args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; | 1030 | as->snapref = as->T->snap[as->snapno].ref; |
1031 | as->T->snap[as->snapno].mcofs = (uint16_t)ofs; /* Remember mcode ofs. */ | ||
1032 | } while (as->curins < as->snapref); /* May have no ins inbetween. */ | ||
1033 | as->snapalloc = 1; | ||
956 | } | 1034 | } |
957 | args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; | ||
958 | lua_assert(IR(ir->op1)->o != IR_CARG); | ||
959 | } | 1035 | } |
960 | 1036 | ||
961 | /* Reconstruct CCallInfo flags for CALLX*. */ | 1037 | /* Fixup snapshot mcode offsetst. */ |
962 | static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) | 1038 | static void asm_snap_fixup_mcofs(ASMState *as) |
963 | { | 1039 | { |
964 | uint32_t nargs = 0; | 1040 | uint32_t sz = (uint32_t)(as->mctoporig - as->mcp); |
965 | if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ | 1041 | SnapShot *snap = as->T->snap; |
966 | IRIns *ira = IR(ir->op1); | 1042 | SnapNo i; |
967 | nargs++; | 1043 | for (i = as->T->nsnap-1; i > 0; i--) { |
968 | while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } | 1044 | /* Compute offset from mcode start and store in correct snapshot. */ |
1045 | snap[i].mcofs = (uint16_t)(sz - snap[i-1].mcofs); | ||
969 | } | 1046 | } |
970 | #if LJ_HASFFI | 1047 | snap[0].mcofs = 0; |
971 | if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ | ||
972 | CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; | ||
973 | CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); | ||
974 | nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); | ||
975 | #if LJ_TARGET_X86 | ||
976 | nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); | ||
977 | #endif | ||
978 | } | ||
979 | #endif | ||
980 | return (nargs | (ir->t.irt << CCI_OTSHIFT)); | ||
981 | } | 1048 | } |
982 | 1049 | ||
1050 | /* -- Miscellaneous helpers ----------------------------------------------- */ | ||
1051 | |||
983 | /* Calculate stack adjustment. */ | 1052 | /* Calculate stack adjustment. */ |
984 | static int32_t asm_stack_adjust(ASMState *as) | 1053 | static int32_t asm_stack_adjust(ASMState *as) |
985 | { | 1054 | { |
@@ -989,21 +1058,26 @@ static int32_t asm_stack_adjust(ASMState *as) | |||
989 | } | 1058 | } |
990 | 1059 | ||
991 | /* Must match with hash*() in lj_tab.c. */ | 1060 | /* Must match with hash*() in lj_tab.c. */ |
992 | static uint32_t ir_khash(IRIns *ir) | 1061 | static uint32_t ir_khash(ASMState *as, IRIns *ir) |
993 | { | 1062 | { |
994 | uint32_t lo, hi; | 1063 | uint32_t lo, hi; |
1064 | UNUSED(as); | ||
995 | if (irt_isstr(ir->t)) { | 1065 | if (irt_isstr(ir->t)) { |
996 | return ir_kstr(ir)->hash; | 1066 | return ir_kstr(ir)->sid; |
997 | } else if (irt_isnum(ir->t)) { | 1067 | } else if (irt_isnum(ir->t)) { |
998 | lo = ir_knum(ir)->u32.lo; | 1068 | lo = ir_knum(ir)->u32.lo; |
999 | hi = ir_knum(ir)->u32.hi << 1; | 1069 | hi = ir_knum(ir)->u32.hi << 1; |
1000 | } else if (irt_ispri(ir->t)) { | 1070 | } else if (irt_ispri(ir->t)) { |
1001 | lua_assert(!irt_isnil(ir->t)); | 1071 | lj_assertA(!irt_isnil(ir->t), "hash of nil key"); |
1002 | return irt_type(ir->t)-IRT_FALSE; | 1072 | return irt_type(ir->t)-IRT_FALSE; |
1003 | } else { | 1073 | } else { |
1004 | lua_assert(irt_isgcv(ir->t)); | 1074 | lj_assertA(irt_isgcv(ir->t), "hash of bad IR type %d", irt_type(ir->t)); |
1005 | lo = u32ptr(ir_kgc(ir)); | 1075 | lo = u32ptr(ir_kgc(ir)); |
1076 | #if LJ_GC64 | ||
1077 | hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15); | ||
1078 | #else | ||
1006 | hi = lo + HASH_BIAS; | 1079 | hi = lo + HASH_BIAS; |
1080 | #endif | ||
1007 | } | 1081 | } |
1008 | return hashrot(lo, hi); | 1082 | return hashrot(lo, hi); |
1009 | } | 1083 | } |
@@ -1017,6 +1091,7 @@ static void asm_snew(ASMState *as, IRIns *ir) | |||
1017 | { | 1091 | { |
1018 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new]; | 1092 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new]; |
1019 | IRRef args[3]; | 1093 | IRRef args[3]; |
1094 | asm_snap_prep(as); | ||
1020 | args[0] = ASMREF_L; /* lua_State *L */ | 1095 | args[0] = ASMREF_L; /* lua_State *L */ |
1021 | args[1] = ir->op1; /* const char *str */ | 1096 | args[1] = ir->op1; /* const char *str */ |
1022 | args[2] = ir->op2; /* size_t len */ | 1097 | args[2] = ir->op2; /* size_t len */ |
@@ -1029,6 +1104,7 @@ static void asm_tnew(ASMState *as, IRIns *ir) | |||
1029 | { | 1104 | { |
1030 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1]; | 1105 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1]; |
1031 | IRRef args[2]; | 1106 | IRRef args[2]; |
1107 | asm_snap_prep(as); | ||
1032 | args[0] = ASMREF_L; /* lua_State *L */ | 1108 | args[0] = ASMREF_L; /* lua_State *L */ |
1033 | args[1] = ASMREF_TMP1; /* uint32_t ahsize */ | 1109 | args[1] = ASMREF_TMP1; /* uint32_t ahsize */ |
1034 | as->gcsteps++; | 1110 | as->gcsteps++; |
@@ -1041,6 +1117,7 @@ static void asm_tdup(ASMState *as, IRIns *ir) | |||
1041 | { | 1117 | { |
1042 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup]; | 1118 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup]; |
1043 | IRRef args[2]; | 1119 | IRRef args[2]; |
1120 | asm_snap_prep(as); | ||
1044 | args[0] = ASMREF_L; /* lua_State *L */ | 1121 | args[0] = ASMREF_L; /* lua_State *L */ |
1045 | args[1] = ir->op1; /* const GCtab *kt */ | 1122 | args[1] = ir->op1; /* const GCtab *kt */ |
1046 | as->gcsteps++; | 1123 | as->gcsteps++; |
@@ -1064,6 +1141,260 @@ static void asm_gcstep(ASMState *as, IRIns *ir) | |||
1064 | as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ | 1141 | as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ |
1065 | } | 1142 | } |
1066 | 1143 | ||
1144 | /* -- Buffer operations --------------------------------------------------- */ | ||
1145 | |||
1146 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode); | ||
1147 | #if LJ_HASBUFFER | ||
1148 | static void asm_bufhdr_write(ASMState *as, Reg sb); | ||
1149 | #endif | ||
1150 | |||
1151 | static void asm_bufhdr(ASMState *as, IRIns *ir) | ||
1152 | { | ||
1153 | Reg sb = ra_dest(as, ir, RSET_GPR); | ||
1154 | switch (ir->op2) { | ||
1155 | case IRBUFHDR_RESET: { | ||
1156 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); | ||
1157 | IRIns irbp; | ||
1158 | irbp.ot = IRT(0, IRT_PTR); /* Buffer data pointer type. */ | ||
1159 | emit_storeofs(as, &irbp, tmp, sb, offsetof(SBuf, w)); | ||
1160 | emit_loadofs(as, &irbp, tmp, sb, offsetof(SBuf, b)); | ||
1161 | break; | ||
1162 | } | ||
1163 | case IRBUFHDR_APPEND: { | ||
1164 | /* Rematerialize const buffer pointer instead of likely spill. */ | ||
1165 | IRIns *irp = IR(ir->op1); | ||
1166 | if (!(ra_hasreg(irp->r) || irp == ir-1 || | ||
1167 | (irp == ir-2 && !ra_used(ir-1)))) { | ||
1168 | while (!(irp->o == IR_BUFHDR && irp->op2 == IRBUFHDR_RESET)) | ||
1169 | irp = IR(irp->op1); | ||
1170 | if (irref_isk(irp->op1)) { | ||
1171 | ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR)); | ||
1172 | ir = irp; | ||
1173 | } | ||
1174 | } | ||
1175 | break; | ||
1176 | } | ||
1177 | #if LJ_HASBUFFER | ||
1178 | case IRBUFHDR_WRITE: | ||
1179 | asm_bufhdr_write(as, sb); | ||
1180 | break; | ||
1181 | #endif | ||
1182 | default: lj_assertA(0, "bad BUFHDR op2 %d", ir->op2); break; | ||
1183 | } | ||
1184 | #if LJ_TARGET_X86ORX64 | ||
1185 | ra_left(as, sb, ir->op1); | ||
1186 | #else | ||
1187 | ra_leftov(as, sb, ir->op1); | ||
1188 | #endif | ||
1189 | } | ||
1190 | |||
1191 | static void asm_bufput(ASMState *as, IRIns *ir) | ||
1192 | { | ||
1193 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr]; | ||
1194 | IRRef args[3]; | ||
1195 | IRIns *irs; | ||
1196 | int kchar = -129; | ||
1197 | args[0] = ir->op1; /* SBuf * */ | ||
1198 | args[1] = ir->op2; /* GCstr * */ | ||
1199 | irs = IR(ir->op2); | ||
1200 | lj_assertA(irt_isstr(irs->t), | ||
1201 | "BUFPUT of non-string IR %04d", ir->op2 - REF_BIAS); | ||
1202 | if (irs->o == IR_KGC) { | ||
1203 | GCstr *s = ir_kstr(irs); | ||
1204 | if (s->len == 1) { /* Optimize put of single-char string constant. */ | ||
1205 | kchar = (int8_t)strdata(s)[0]; /* Signed! */ | ||
1206 | args[1] = ASMREF_TMP1; /* int, truncated to char */ | ||
1207 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; | ||
1208 | } | ||
1209 | } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) { | ||
1210 | if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */ | ||
1211 | if (irs->op2 == IRTOSTR_NUM) { | ||
1212 | args[1] = ASMREF_TMP1; /* TValue * */ | ||
1213 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum]; | ||
1214 | } else { | ||
1215 | lj_assertA(irt_isinteger(IR(irs->op1)->t), | ||
1216 | "TOSTR of non-numeric IR %04d", irs->op1); | ||
1217 | args[1] = irs->op1; /* int */ | ||
1218 | if (irs->op2 == IRTOSTR_INT) | ||
1219 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint]; | ||
1220 | else | ||
1221 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; | ||
1222 | } | ||
1223 | } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */ | ||
1224 | args[1] = irs->op1; /* const void * */ | ||
1225 | args[2] = irs->op2; /* MSize */ | ||
1226 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem]; | ||
1227 | } | ||
1228 | } | ||
1229 | asm_setupresult(as, ir, ci); /* SBuf * */ | ||
1230 | asm_gencall(as, ci, args); | ||
1231 | if (args[1] == ASMREF_TMP1) { | ||
1232 | Reg tmp = ra_releasetmp(as, ASMREF_TMP1); | ||
1233 | if (kchar == -129) | ||
1234 | asm_tvptr(as, tmp, irs->op1, IRTMPREF_IN1); | ||
1235 | else | ||
1236 | ra_allockreg(as, kchar, tmp); | ||
1237 | } | ||
1238 | } | ||
1239 | |||
1240 | static void asm_bufstr(ASMState *as, IRIns *ir) | ||
1241 | { | ||
1242 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr]; | ||
1243 | IRRef args[1]; | ||
1244 | args[0] = ir->op1; /* SBuf *sb */ | ||
1245 | as->gcsteps++; | ||
1246 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
1247 | asm_gencall(as, ci, args); | ||
1248 | } | ||
1249 | |||
1250 | /* -- Type conversions ---------------------------------------------------- */ | ||
1251 | |||
1252 | static void asm_tostr(ASMState *as, IRIns *ir) | ||
1253 | { | ||
1254 | const CCallInfo *ci; | ||
1255 | IRRef args[2]; | ||
1256 | asm_snap_prep(as); | ||
1257 | args[0] = ASMREF_L; | ||
1258 | as->gcsteps++; | ||
1259 | if (ir->op2 == IRTOSTR_NUM) { | ||
1260 | args[1] = ASMREF_TMP1; /* cTValue * */ | ||
1261 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num]; | ||
1262 | } else { | ||
1263 | args[1] = ir->op1; /* int32_t k */ | ||
1264 | if (ir->op2 == IRTOSTR_INT) | ||
1265 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int]; | ||
1266 | else | ||
1267 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char]; | ||
1268 | } | ||
1269 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
1270 | asm_gencall(as, ci, args); | ||
1271 | if (ir->op2 == IRTOSTR_NUM) | ||
1272 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1, IRTMPREF_IN1); | ||
1273 | } | ||
1274 | |||
1275 | #if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86 | ||
1276 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
1277 | { | ||
1278 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
1279 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
1280 | IRCallID id; | ||
1281 | IRRef args[2]; | ||
1282 | lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP, | ||
1283 | "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS); | ||
1284 | args[LJ_BE] = (ir-1)->op1; | ||
1285 | args[LJ_LE] = ir->op1; | ||
1286 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
1287 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
1288 | ir--; | ||
1289 | } else { | ||
1290 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
1291 | } | ||
1292 | { | ||
1293 | #if LJ_TARGET_ARM && !LJ_ABI_SOFTFP | ||
1294 | CCallInfo cim = lj_ir_callinfo[id], *ci = &cim; | ||
1295 | cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ | ||
1296 | #else | ||
1297 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
1298 | #endif | ||
1299 | asm_setupresult(as, ir, ci); | ||
1300 | asm_gencall(as, ci, args); | ||
1301 | } | ||
1302 | } | ||
1303 | #endif | ||
1304 | |||
1305 | /* -- Memory references --------------------------------------------------- */ | ||
1306 | |||
1307 | static void asm_newref(ASMState *as, IRIns *ir) | ||
1308 | { | ||
1309 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
1310 | IRRef args[3]; | ||
1311 | if (ir->r == RID_SINK) | ||
1312 | return; | ||
1313 | asm_snap_prep(as); | ||
1314 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1315 | args[1] = ir->op1; /* GCtab *t */ | ||
1316 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
1317 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
1318 | asm_gencall(as, ci, args); | ||
1319 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2, IRTMPREF_IN1); | ||
1320 | } | ||
1321 | |||
1322 | static void asm_tmpref(ASMState *as, IRIns *ir) | ||
1323 | { | ||
1324 | Reg r = ra_dest(as, ir, RSET_GPR); | ||
1325 | asm_tvptr(as, r, ir->op1, ir->op2); | ||
1326 | } | ||
1327 | |||
1328 | static void asm_lref(ASMState *as, IRIns *ir) | ||
1329 | { | ||
1330 | Reg r = ra_dest(as, ir, RSET_GPR); | ||
1331 | #if LJ_TARGET_X86ORX64 | ||
1332 | ra_left(as, r, ASMREF_L); | ||
1333 | #else | ||
1334 | ra_leftov(as, r, ASMREF_L); | ||
1335 | #endif | ||
1336 | } | ||
1337 | |||
1338 | /* -- Calls --------------------------------------------------------------- */ | ||
1339 | |||
1340 | /* Collect arguments from CALL* and CARG instructions. */ | ||
1341 | static void asm_collectargs(ASMState *as, IRIns *ir, | ||
1342 | const CCallInfo *ci, IRRef *args) | ||
1343 | { | ||
1344 | uint32_t n = CCI_XNARGS(ci); | ||
1345 | /* Account for split args. */ | ||
1346 | lj_assertA(n <= CCI_NARGS_MAX*2, "too many args %d to collect", n); | ||
1347 | if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } | ||
1348 | while (n-- > 1) { | ||
1349 | ir = IR(ir->op1); | ||
1350 | lj_assertA(ir->o == IR_CARG, "malformed CALL arg tree"); | ||
1351 | args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; | ||
1352 | } | ||
1353 | args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; | ||
1354 | lj_assertA(IR(ir->op1)->o != IR_CARG, "malformed CALL arg tree"); | ||
1355 | } | ||
1356 | |||
1357 | /* Reconstruct CCallInfo flags for CALLX*. */ | ||
1358 | static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) | ||
1359 | { | ||
1360 | uint32_t nargs = 0; | ||
1361 | if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ | ||
1362 | IRIns *ira = IR(ir->op1); | ||
1363 | nargs++; | ||
1364 | while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } | ||
1365 | } | ||
1366 | #if LJ_HASFFI | ||
1367 | if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ | ||
1368 | CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; | ||
1369 | CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); | ||
1370 | nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); | ||
1371 | #if LJ_TARGET_X86 | ||
1372 | nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); | ||
1373 | #endif | ||
1374 | } | ||
1375 | #endif | ||
1376 | return (nargs | (ir->t.irt << CCI_OTSHIFT)); | ||
1377 | } | ||
1378 | |||
1379 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | ||
1380 | { | ||
1381 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
1382 | IRRef args[2]; | ||
1383 | args[0] = ir->op1; | ||
1384 | args[1] = ir->op2; | ||
1385 | asm_setupresult(as, ir, ci); | ||
1386 | asm_gencall(as, ci, args); | ||
1387 | } | ||
1388 | |||
1389 | static void asm_call(ASMState *as, IRIns *ir) | ||
1390 | { | ||
1391 | IRRef args[CCI_NARGS_MAX]; | ||
1392 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
1393 | asm_collectargs(as, ir, ci, args); | ||
1394 | asm_setupresult(as, ir, ci); | ||
1395 | asm_gencall(as, ci, args); | ||
1396 | } | ||
1397 | |||
1067 | /* -- PHI and loop handling ----------------------------------------------- */ | 1398 | /* -- PHI and loop handling ----------------------------------------------- */ |
1068 | 1399 | ||
1069 | /* Break a PHI cycle by renaming to a free register (evict if needed). */ | 1400 | /* Break a PHI cycle by renaming to a free register (evict if needed). */ |
@@ -1249,12 +1580,7 @@ static void asm_phi_fixup(ASMState *as) | |||
1249 | irt_clearmark(ir->t); | 1580 | irt_clearmark(ir->t); |
1250 | /* Left PHI gained a spill slot before the loop? */ | 1581 | /* Left PHI gained a spill slot before the loop? */ |
1251 | if (ra_hasspill(ir->s)) { | 1582 | if (ra_hasspill(ir->s)) { |
1252 | IRRef ren; | 1583 | ra_addrename(as, r, lref, as->loopsnapno); |
1253 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno); | ||
1254 | ren = tref_ref(lj_ir_emit(as->J)); | ||
1255 | as->ir = as->T->ir; /* The IR may have been reallocated. */ | ||
1256 | IR(ren)->r = (uint8_t)r; | ||
1257 | IR(ren)->s = SPS_NONE; | ||
1258 | } | 1584 | } |
1259 | } | 1585 | } |
1260 | rset_clear(work, r); | 1586 | rset_clear(work, r); |
@@ -1329,6 +1655,8 @@ static void asm_loop(ASMState *as) | |||
1329 | #include "lj_asm_x86.h" | 1655 | #include "lj_asm_x86.h" |
1330 | #elif LJ_TARGET_ARM | 1656 | #elif LJ_TARGET_ARM |
1331 | #include "lj_asm_arm.h" | 1657 | #include "lj_asm_arm.h" |
1658 | #elif LJ_TARGET_ARM64 | ||
1659 | #include "lj_asm_arm64.h" | ||
1332 | #elif LJ_TARGET_PPC | 1660 | #elif LJ_TARGET_PPC |
1333 | #include "lj_asm_ppc.h" | 1661 | #include "lj_asm_ppc.h" |
1334 | #elif LJ_TARGET_MIPS | 1662 | #elif LJ_TARGET_MIPS |
@@ -1337,6 +1665,204 @@ static void asm_loop(ASMState *as) | |||
1337 | #error "Missing assembler for target CPU" | 1665 | #error "Missing assembler for target CPU" |
1338 | #endif | 1666 | #endif |
1339 | 1667 | ||
1668 | /* -- Common instruction helpers ------------------------------------------ */ | ||
1669 | |||
1670 | #if !LJ_SOFTFP32 | ||
1671 | #if !LJ_TARGET_X86ORX64 | ||
1672 | #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) | ||
1673 | #define asm_fppowi(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) | ||
1674 | #endif | ||
1675 | |||
1676 | static void asm_pow(ASMState *as, IRIns *ir) | ||
1677 | { | ||
1678 | #if LJ_64 && LJ_HASFFI | ||
1679 | if (!irt_isnum(ir->t)) | ||
1680 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
1681 | IRCALL_lj_carith_powu64); | ||
1682 | else | ||
1683 | #endif | ||
1684 | if (irt_isnum(IR(ir->op2)->t)) | ||
1685 | asm_callid(as, ir, IRCALL_pow); | ||
1686 | else | ||
1687 | asm_fppowi(as, ir); | ||
1688 | } | ||
1689 | |||
1690 | static void asm_div(ASMState *as, IRIns *ir) | ||
1691 | { | ||
1692 | #if LJ_64 && LJ_HASFFI | ||
1693 | if (!irt_isnum(ir->t)) | ||
1694 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : | ||
1695 | IRCALL_lj_carith_divu64); | ||
1696 | else | ||
1697 | #endif | ||
1698 | asm_fpdiv(as, ir); | ||
1699 | } | ||
1700 | #endif | ||
1701 | |||
1702 | static void asm_mod(ASMState *as, IRIns *ir) | ||
1703 | { | ||
1704 | #if LJ_64 && LJ_HASFFI | ||
1705 | if (!irt_isint(ir->t)) | ||
1706 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : | ||
1707 | IRCALL_lj_carith_modu64); | ||
1708 | else | ||
1709 | #endif | ||
1710 | asm_callid(as, ir, IRCALL_lj_vm_modi); | ||
1711 | } | ||
1712 | |||
1713 | static void asm_fuseequal(ASMState *as, IRIns *ir) | ||
1714 | { | ||
1715 | /* Fuse HREF + EQ/NE. */ | ||
1716 | if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { | ||
1717 | as->curins--; | ||
1718 | asm_href(as, ir-1, (IROp)ir->o); | ||
1719 | } else { | ||
1720 | asm_equal(as, ir); | ||
1721 | } | ||
1722 | } | ||
1723 | |||
1724 | static void asm_alen(ASMState *as, IRIns *ir) | ||
1725 | { | ||
1726 | asm_callid(as, ir, ir->op2 == REF_NIL ? IRCALL_lj_tab_len : | ||
1727 | IRCALL_lj_tab_len_hint); | ||
1728 | } | ||
1729 | |||
1730 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
1731 | |||
1732 | /* Assemble a single instruction. */ | ||
1733 | static void asm_ir(ASMState *as, IRIns *ir) | ||
1734 | { | ||
1735 | switch ((IROp)ir->o) { | ||
1736 | /* Miscellaneous ops. */ | ||
1737 | case IR_LOOP: asm_loop(as); break; | ||
1738 | case IR_NOP: case IR_XBAR: | ||
1739 | lj_assertA(!ra_used(ir), | ||
1740 | "IR %04d not unused", (int)(ir - as->ir) - REF_BIAS); | ||
1741 | break; | ||
1742 | case IR_USE: | ||
1743 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
1744 | case IR_PHI: asm_phi(as, ir); break; | ||
1745 | case IR_HIOP: asm_hiop(as, ir); break; | ||
1746 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
1747 | case IR_PROF: asm_prof(as, ir); break; | ||
1748 | |||
1749 | /* Guarded assertions. */ | ||
1750 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
1751 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
1752 | case IR_ABC: | ||
1753 | asm_comp(as, ir); | ||
1754 | break; | ||
1755 | case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break; | ||
1756 | |||
1757 | case IR_RETF: asm_retf(as, ir); break; | ||
1758 | |||
1759 | /* Bit ops. */ | ||
1760 | case IR_BNOT: asm_bnot(as, ir); break; | ||
1761 | case IR_BSWAP: asm_bswap(as, ir); break; | ||
1762 | case IR_BAND: asm_band(as, ir); break; | ||
1763 | case IR_BOR: asm_bor(as, ir); break; | ||
1764 | case IR_BXOR: asm_bxor(as, ir); break; | ||
1765 | case IR_BSHL: asm_bshl(as, ir); break; | ||
1766 | case IR_BSHR: asm_bshr(as, ir); break; | ||
1767 | case IR_BSAR: asm_bsar(as, ir); break; | ||
1768 | case IR_BROL: asm_brol(as, ir); break; | ||
1769 | case IR_BROR: asm_bror(as, ir); break; | ||
1770 | |||
1771 | /* Arithmetic ops. */ | ||
1772 | case IR_ADD: asm_add(as, ir); break; | ||
1773 | case IR_SUB: asm_sub(as, ir); break; | ||
1774 | case IR_MUL: asm_mul(as, ir); break; | ||
1775 | case IR_MOD: asm_mod(as, ir); break; | ||
1776 | case IR_NEG: asm_neg(as, ir); break; | ||
1777 | #if LJ_SOFTFP32 | ||
1778 | case IR_DIV: case IR_POW: case IR_ABS: | ||
1779 | case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: | ||
1780 | /* Unused for LJ_SOFTFP32. */ | ||
1781 | lj_assertA(0, "IR %04d with unused op %d", | ||
1782 | (int)(ir - as->ir) - REF_BIAS, ir->o); | ||
1783 | break; | ||
1784 | #else | ||
1785 | case IR_DIV: asm_div(as, ir); break; | ||
1786 | case IR_POW: asm_pow(as, ir); break; | ||
1787 | case IR_ABS: asm_abs(as, ir); break; | ||
1788 | case IR_LDEXP: asm_ldexp(as, ir); break; | ||
1789 | case IR_FPMATH: asm_fpmath(as, ir); break; | ||
1790 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
1791 | #endif | ||
1792 | case IR_MIN: asm_min(as, ir); break; | ||
1793 | case IR_MAX: asm_max(as, ir); break; | ||
1794 | |||
1795 | /* Overflow-checking arithmetic ops. */ | ||
1796 | case IR_ADDOV: asm_addov(as, ir); break; | ||
1797 | case IR_SUBOV: asm_subov(as, ir); break; | ||
1798 | case IR_MULOV: asm_mulov(as, ir); break; | ||
1799 | |||
1800 | /* Memory references. */ | ||
1801 | case IR_AREF: asm_aref(as, ir); break; | ||
1802 | case IR_HREF: asm_href(as, ir, 0); break; | ||
1803 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
1804 | case IR_NEWREF: asm_newref(as, ir); break; | ||
1805 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
1806 | case IR_FREF: asm_fref(as, ir); break; | ||
1807 | case IR_TMPREF: asm_tmpref(as, ir); break; | ||
1808 | case IR_STRREF: asm_strref(as, ir); break; | ||
1809 | case IR_LREF: asm_lref(as, ir); break; | ||
1810 | |||
1811 | /* Loads and stores. */ | ||
1812 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
1813 | asm_ahuvload(as, ir); | ||
1814 | break; | ||
1815 | case IR_FLOAD: asm_fload(as, ir); break; | ||
1816 | case IR_XLOAD: asm_xload(as, ir); break; | ||
1817 | case IR_SLOAD: asm_sload(as, ir); break; | ||
1818 | case IR_ALEN: asm_alen(as, ir); break; | ||
1819 | |||
1820 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
1821 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
1822 | case IR_XSTORE: asm_xstore(as, ir); break; | ||
1823 | |||
1824 | /* Allocations. */ | ||
1825 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
1826 | case IR_TNEW: asm_tnew(as, ir); break; | ||
1827 | case IR_TDUP: asm_tdup(as, ir); break; | ||
1828 | case IR_CNEW: case IR_CNEWI: | ||
1829 | #if LJ_HASFFI | ||
1830 | asm_cnew(as, ir); | ||
1831 | #else | ||
1832 | lj_assertA(0, "IR %04d with unused op %d", | ||
1833 | (int)(ir - as->ir) - REF_BIAS, ir->o); | ||
1834 | #endif | ||
1835 | break; | ||
1836 | |||
1837 | /* Buffer operations. */ | ||
1838 | case IR_BUFHDR: asm_bufhdr(as, ir); break; | ||
1839 | case IR_BUFPUT: asm_bufput(as, ir); break; | ||
1840 | case IR_BUFSTR: asm_bufstr(as, ir); break; | ||
1841 | |||
1842 | /* Write barriers. */ | ||
1843 | case IR_TBAR: asm_tbar(as, ir); break; | ||
1844 | case IR_OBAR: asm_obar(as, ir); break; | ||
1845 | |||
1846 | /* Type conversions. */ | ||
1847 | case IR_CONV: asm_conv(as, ir); break; | ||
1848 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
1849 | case IR_STRTO: asm_strto(as, ir); break; | ||
1850 | |||
1851 | /* Calls. */ | ||
1852 | case IR_CALLA: | ||
1853 | as->gcsteps++; | ||
1854 | /* fallthrough */ | ||
1855 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
1856 | case IR_CALLXS: asm_callx(as, ir); break; | ||
1857 | case IR_CARG: break; | ||
1858 | |||
1859 | default: | ||
1860 | setintV(&as->J->errinfo, ir->o); | ||
1861 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1862 | break; | ||
1863 | } | ||
1864 | } | ||
1865 | |||
1340 | /* -- Head of trace ------------------------------------------------------- */ | 1866 | /* -- Head of trace ------------------------------------------------------- */ |
1341 | 1867 | ||
1342 | /* Head of a root trace. */ | 1868 | /* Head of a root trace. */ |
@@ -1373,8 +1899,7 @@ static void asm_head_side(ASMState *as) | |||
1373 | 1899 | ||
1374 | if (as->snapno && as->topslot > as->parent->topslot) { | 1900 | if (as->snapno && as->topslot > as->parent->topslot) { |
1375 | /* Force snap #0 alloc to prevent register overwrite in stack check. */ | 1901 | /* Force snap #0 alloc to prevent register overwrite in stack check. */ |
1376 | as->snapno = 0; | 1902 | asm_snap_alloc(as, 0); |
1377 | asm_snap_alloc(as); | ||
1378 | } | 1903 | } |
1379 | allow = asm_head_side_base(as, irp, allow); | 1904 | allow = asm_head_side_base(as, irp, allow); |
1380 | 1905 | ||
@@ -1382,8 +1907,10 @@ static void asm_head_side(ASMState *as) | |||
1382 | for (i = as->stopins; i > REF_BASE; i--) { | 1907 | for (i = as->stopins; i > REF_BASE; i--) { |
1383 | IRIns *ir = IR(i); | 1908 | IRIns *ir = IR(i); |
1384 | RegSP rs; | 1909 | RegSP rs; |
1385 | lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || | 1910 | lj_assertA((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || |
1386 | (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL); | 1911 | (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL, |
1912 | "IR %04d has bad parent op %d", | ||
1913 | (int)(ir - as->ir) - REF_BIAS, ir->o); | ||
1387 | rs = as->parentmap[i - REF_FIRST]; | 1914 | rs = as->parentmap[i - REF_FIRST]; |
1388 | if (ra_hasreg(ir->r)) { | 1915 | if (ra_hasreg(ir->r)) { |
1389 | rset_clear(allow, ir->r); | 1916 | rset_clear(allow, ir->r); |
@@ -1535,7 +2062,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe) | |||
1535 | SnapEntry sn = map[n-1]; | 2062 | SnapEntry sn = map[n-1]; |
1536 | if ((sn & SNAP_FRAME)) { | 2063 | if ((sn & SNAP_FRAME)) { |
1537 | *gotframe = 1; | 2064 | *gotframe = 1; |
1538 | return snap_slot(sn); | 2065 | return snap_slot(sn) - LJ_FR2; |
1539 | } | 2066 | } |
1540 | } | 2067 | } |
1541 | return 0; | 2068 | return 0; |
@@ -1555,19 +2082,23 @@ static void asm_tail_link(ASMState *as) | |||
1555 | 2082 | ||
1556 | if (as->T->link == 0) { | 2083 | if (as->T->link == 0) { |
1557 | /* Setup fixed registers for exit to interpreter. */ | 2084 | /* Setup fixed registers for exit to interpreter. */ |
1558 | const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); | 2085 | const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]); |
1559 | int32_t mres; | 2086 | int32_t mres; |
1560 | if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ | 2087 | if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ |
1561 | BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; | 2088 | BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; |
1562 | if (bc_isret(bc_op(*retpc))) | 2089 | if (bc_isret(bc_op(*retpc))) |
1563 | pc = retpc; | 2090 | pc = retpc; |
1564 | } | 2091 | } |
2092 | #if LJ_GC64 | ||
2093 | emit_loadu64(as, RID_LPC, u64ptr(pc)); | ||
2094 | #else | ||
1565 | ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); | 2095 | ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); |
1566 | ra_allockreg(as, i32ptr(pc), RID_LPC); | 2096 | ra_allockreg(as, i32ptr(pc), RID_LPC); |
1567 | mres = (int32_t)(snap->nslots - baseslot); | 2097 | #endif |
2098 | mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); | ||
1568 | switch (bc_op(*pc)) { | 2099 | switch (bc_op(*pc)) { |
1569 | case BC_CALLM: case BC_CALLMT: | 2100 | case BC_CALLM: case BC_CALLMT: |
1570 | mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; | 2101 | mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break; |
1571 | case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; | 2102 | case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; |
1572 | case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; | 2103 | case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; |
1573 | default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; | 2104 | default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; |
@@ -1579,6 +2110,11 @@ static void asm_tail_link(ASMState *as) | |||
1579 | } | 2110 | } |
1580 | emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); | 2111 | emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); |
1581 | 2112 | ||
2113 | if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */ | ||
2114 | setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal)); | ||
2115 | IR(as->J->ktrace)->o = IR_KGC; | ||
2116 | } | ||
2117 | |||
1582 | /* Sync the interpreter state with the on-trace state. */ | 2118 | /* Sync the interpreter state with the on-trace state. */ |
1583 | asm_stack_restore(as, snap); | 2119 | asm_stack_restore(as, snap); |
1584 | 2120 | ||
@@ -1602,22 +2138,32 @@ static void asm_setup_regsp(ASMState *as) | |||
1602 | #endif | 2138 | #endif |
1603 | 2139 | ||
1604 | ra_setup(as); | 2140 | ra_setup(as); |
2141 | #if LJ_TARGET_ARM64 | ||
2142 | ra_setkref(as, RID_GL, (intptr_t)J2G(as->J)); | ||
2143 | #endif | ||
1605 | 2144 | ||
1606 | /* Clear reg/sp for constants. */ | 2145 | /* Clear reg/sp for constants. */ |
1607 | for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) | 2146 | for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) { |
1608 | ir->prev = REGSP_INIT; | 2147 | ir->prev = REGSP_INIT; |
2148 | if (irt_is64(ir->t) && ir->o != IR_KNULL) { | ||
2149 | #if LJ_GC64 | ||
2150 | /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ | ||
2151 | ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */ | ||
2152 | #else | ||
2153 | /* Make life easier for backends by putting address of constant in i. */ | ||
2154 | ir->i = (int32_t)(intptr_t)(ir+1); | ||
2155 | #endif | ||
2156 | ir++; | ||
2157 | } | ||
2158 | } | ||
1609 | 2159 | ||
1610 | /* REF_BASE is used for implicit references to the BASE register. */ | 2160 | /* REF_BASE is used for implicit references to the BASE register. */ |
1611 | lastir->prev = REGSP_HINT(RID_BASE); | 2161 | lastir->prev = REGSP_HINT(RID_BASE); |
1612 | 2162 | ||
1613 | ir = IR(nins-1); | ||
1614 | if (ir->o == IR_RENAME) { | ||
1615 | do { ir--; nins--; } while (ir->o == IR_RENAME); | ||
1616 | T->nins = nins; /* Remove any renames left over from ASM restart. */ | ||
1617 | } | ||
1618 | as->snaprename = nins; | 2163 | as->snaprename = nins; |
1619 | as->snapref = nins; | 2164 | as->snapref = nins; |
1620 | as->snapno = T->nsnap; | 2165 | as->snapno = T->nsnap; |
2166 | as->snapalloc = 0; | ||
1621 | 2167 | ||
1622 | as->stopins = REF_BASE; | 2168 | as->stopins = REF_BASE; |
1623 | as->orignins = nins; | 2169 | as->orignins = nins; |
@@ -1627,7 +2173,7 @@ static void asm_setup_regsp(ASMState *as) | |||
1627 | ir = IR(REF_FIRST); | 2173 | ir = IR(REF_FIRST); |
1628 | if (as->parent) { | 2174 | if (as->parent) { |
1629 | uint16_t *p; | 2175 | uint16_t *p; |
1630 | lastir = lj_snap_regspmap(as->parent, as->J->exitno, ir); | 2176 | lastir = lj_snap_regspmap(as->J, as->parent, as->J->exitno, ir); |
1631 | if (lastir - ir > LJ_MAX_JSLOTS) | 2177 | if (lastir - ir > LJ_MAX_JSLOTS) |
1632 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); | 2178 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); |
1633 | as->stopins = (IRRef)((lastir-1) - as->ir); | 2179 | as->stopins = (IRRef)((lastir-1) - as->ir); |
@@ -1666,6 +2212,10 @@ static void asm_setup_regsp(ASMState *as) | |||
1666 | ir->prev = (uint16_t)REGSP_HINT((rload & 15)); | 2212 | ir->prev = (uint16_t)REGSP_HINT((rload & 15)); |
1667 | rload = lj_ror(rload, 4); | 2213 | rload = lj_ror(rload, 4); |
1668 | continue; | 2214 | continue; |
2215 | case IR_TMPREF: | ||
2216 | if ((ir->op2 & IRTMPREF_OUT2) && as->evenspill < 4) | ||
2217 | as->evenspill = 4; /* TMPREF OUT2 needs two TValues on the stack. */ | ||
2218 | break; | ||
1669 | #endif | 2219 | #endif |
1670 | case IR_CALLXS: { | 2220 | case IR_CALLXS: { |
1671 | CCallInfo ci; | 2221 | CCallInfo ci; |
@@ -1675,7 +2225,17 @@ static void asm_setup_regsp(ASMState *as) | |||
1675 | as->modset |= RSET_SCRATCH; | 2225 | as->modset |= RSET_SCRATCH; |
1676 | continue; | 2226 | continue; |
1677 | } | 2227 | } |
1678 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { | 2228 | case IR_CALLL: |
2229 | /* lj_vm_next needs two TValues on the stack. */ | ||
2230 | #if LJ_TARGET_X64 && LJ_ABI_WIN | ||
2231 | if (ir->op2 == IRCALL_lj_vm_next && as->evenspill < SPS_FIRST + 4) | ||
2232 | as->evenspill = SPS_FIRST + 4; | ||
2233 | #else | ||
2234 | if (SPS_FIRST < 4 && ir->op2 == IRCALL_lj_vm_next && as->evenspill < 4) | ||
2235 | as->evenspill = 4; | ||
2236 | #endif | ||
2237 | /* fallthrough */ | ||
2238 | case IR_CALLN: case IR_CALLA: case IR_CALLS: { | ||
1679 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | 2239 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; |
1680 | ir->prev = asm_setup_call_slots(as, ir, ci); | 2240 | ir->prev = asm_setup_call_slots(as, ir, ci); |
1681 | if (inloop) | 2241 | if (inloop) |
@@ -1683,7 +2243,6 @@ static void asm_setup_regsp(ASMState *as) | |||
1683 | (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; | 2243 | (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; |
1684 | continue; | 2244 | continue; |
1685 | } | 2245 | } |
1686 | #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) | ||
1687 | case IR_HIOP: | 2246 | case IR_HIOP: |
1688 | switch ((ir-1)->o) { | 2247 | switch ((ir-1)->o) { |
1689 | #if LJ_SOFTFP && LJ_TARGET_ARM | 2248 | #if LJ_SOFTFP && LJ_TARGET_ARM |
@@ -1694,15 +2253,15 @@ static void asm_setup_regsp(ASMState *as) | |||
1694 | } | 2253 | } |
1695 | break; | 2254 | break; |
1696 | #endif | 2255 | #endif |
1697 | #if !LJ_SOFTFP && LJ_NEED_FP64 | 2256 | #if !LJ_SOFTFP && LJ_NEED_FP64 && LJ_32 && LJ_HASFFI |
1698 | case IR_CONV: | 2257 | case IR_CONV: |
1699 | if (irt_isfp((ir-1)->t)) { | 2258 | if (irt_isfp((ir-1)->t)) { |
1700 | ir->prev = REGSP_HINT(RID_FPRET); | 2259 | ir->prev = REGSP_HINT(RID_FPRET); |
1701 | continue; | 2260 | continue; |
1702 | } | 2261 | } |
1703 | /* fallthrough */ | ||
1704 | #endif | 2262 | #endif |
1705 | case IR_CALLN: case IR_CALLXS: | 2263 | /* fallthrough */ |
2264 | case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: | ||
1706 | #if LJ_SOFTFP | 2265 | #if LJ_SOFTFP |
1707 | case IR_MIN: case IR_MAX: | 2266 | case IR_MIN: case IR_MAX: |
1708 | #endif | 2267 | #endif |
@@ -1713,18 +2272,29 @@ static void asm_setup_regsp(ASMState *as) | |||
1713 | break; | 2272 | break; |
1714 | } | 2273 | } |
1715 | break; | 2274 | break; |
1716 | #endif | ||
1717 | #if LJ_SOFTFP | 2275 | #if LJ_SOFTFP |
1718 | case IR_MIN: case IR_MAX: | 2276 | case IR_MIN: case IR_MAX: |
1719 | if ((ir+1)->o != IR_HIOP) break; | 2277 | if ((ir+1)->o != IR_HIOP) break; |
1720 | #endif | 2278 | #endif |
1721 | /* fallthrough */ | 2279 | /* fallthrough */ |
1722 | /* C calls evict all scratch regs and return results in RID_RET. */ | 2280 | /* C calls evict all scratch regs and return results in RID_RET. */ |
1723 | case IR_SNEW: case IR_XSNEW: case IR_NEWREF: | 2281 | case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT: |
1724 | if (REGARG_NUMGPR < 3 && as->evenspill < 3) | 2282 | if (REGARG_NUMGPR < 3 && as->evenspill < 3) |
1725 | as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ | 2283 | as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ |
2284 | #if LJ_TARGET_X86 && LJ_HASFFI | ||
2285 | if (0) { | ||
2286 | case IR_CNEW: | ||
2287 | if (ir->op2 != REF_NIL && as->evenspill < 4) | ||
2288 | as->evenspill = 4; /* lj_cdata_newv needs 4 args. */ | ||
2289 | } | ||
1726 | /* fallthrough */ | 2290 | /* fallthrough */ |
1727 | case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: | 2291 | #else |
2292 | /* fallthrough */ | ||
2293 | case IR_CNEW: | ||
2294 | #endif | ||
2295 | /* fallthrough */ | ||
2296 | case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR: | ||
2297 | case IR_BUFSTR: | ||
1728 | ir->prev = REGSP_HINT(RID_RET); | 2298 | ir->prev = REGSP_HINT(RID_RET); |
1729 | if (inloop) | 2299 | if (inloop) |
1730 | as->modset = RSET_SCRATCH; | 2300 | as->modset = RSET_SCRATCH; |
@@ -1733,58 +2303,73 @@ static void asm_setup_regsp(ASMState *as) | |||
1733 | if (inloop) | 2303 | if (inloop) |
1734 | as->modset = RSET_SCRATCH; | 2304 | as->modset = RSET_SCRATCH; |
1735 | break; | 2305 | break; |
1736 | #if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP | 2306 | #if !LJ_SOFTFP |
1737 | case IR_ATAN2: case IR_LDEXP: | 2307 | #if !LJ_TARGET_X86ORX64 |
2308 | case IR_LDEXP: | ||
2309 | #endif | ||
1738 | #endif | 2310 | #endif |
2311 | /* fallthrough */ | ||
1739 | case IR_POW: | 2312 | case IR_POW: |
1740 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { | 2313 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
1741 | #if LJ_TARGET_X86ORX64 | ||
1742 | ir->prev = REGSP_HINT(RID_XMM0); | ||
1743 | if (inloop) | 2314 | if (inloop) |
1744 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); | 2315 | as->modset |= RSET_SCRATCH; |
2316 | #if LJ_TARGET_X86 | ||
2317 | if (irt_isnum(IR(ir->op2)->t)) { | ||
2318 | if (as->evenspill < 4) /* Leave room to call pow(). */ | ||
2319 | as->evenspill = 4; | ||
2320 | } | ||
2321 | break; | ||
1745 | #else | 2322 | #else |
1746 | ir->prev = REGSP_HINT(RID_FPRET); | 2323 | ir->prev = REGSP_HINT(RID_FPRET); |
1747 | if (inloop) | ||
1748 | as->modset |= RSET_SCRATCH; | ||
1749 | #endif | ||
1750 | continue; | 2324 | continue; |
2325 | #endif | ||
1751 | } | 2326 | } |
1752 | /* fallthrough */ /* for integer POW */ | 2327 | /* fallthrough */ /* for integer POW */ |
1753 | case IR_DIV: case IR_MOD: | 2328 | case IR_DIV: case IR_MOD: |
1754 | if (!irt_isnum(ir->t)) { | 2329 | if ((LJ_64 && LJ_SOFTFP) || !irt_isnum(ir->t)) { |
1755 | ir->prev = REGSP_HINT(RID_RET); | 2330 | ir->prev = REGSP_HINT(RID_RET); |
1756 | if (inloop) | 2331 | if (inloop) |
1757 | as->modset |= (RSET_SCRATCH & RSET_GPR); | 2332 | as->modset |= (RSET_SCRATCH & RSET_GPR); |
1758 | continue; | 2333 | continue; |
1759 | } | 2334 | } |
1760 | break; | 2335 | break; |
1761 | case IR_FPMATH: | 2336 | #if LJ_64 && LJ_SOFTFP |
1762 | #if LJ_TARGET_X86ORX64 | 2337 | case IR_ADD: case IR_SUB: case IR_MUL: |
1763 | if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ | 2338 | if (irt_isnum(ir->t)) { |
1764 | ir->prev = REGSP_HINT(RID_XMM0); | 2339 | ir->prev = REGSP_HINT(RID_RET); |
1765 | #if !LJ_64 | ||
1766 | if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ | ||
1767 | as->evenspill = 4; | ||
1768 | #endif | ||
1769 | if (inloop) | ||
1770 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); | ||
1771 | continue; | ||
1772 | } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) { | ||
1773 | ir->prev = REGSP_HINT(RID_XMM0); | ||
1774 | if (inloop) | 2340 | if (inloop) |
1775 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); | 2341 | as->modset |= (RSET_SCRATCH & RSET_GPR); |
1776 | continue; | 2342 | continue; |
1777 | } | 2343 | } |
1778 | break; | 2344 | break; |
1779 | #else | 2345 | #endif |
1780 | ir->prev = REGSP_HINT(RID_FPRET); | 2346 | case IR_FPMATH: |
2347 | #if LJ_TARGET_X86ORX64 | ||
2348 | if (ir->op2 <= IRFPM_TRUNC) { | ||
2349 | if (!(as->flags & JIT_F_SSE4_1)) { | ||
2350 | ir->prev = REGSP_HINT(RID_XMM0); | ||
2351 | if (inloop) | ||
2352 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); | ||
2353 | continue; | ||
2354 | } | ||
2355 | break; | ||
2356 | } | ||
2357 | #endif | ||
1781 | if (inloop) | 2358 | if (inloop) |
1782 | as->modset |= RSET_SCRATCH; | 2359 | as->modset |= RSET_SCRATCH; |
2360 | #if LJ_TARGET_X86 | ||
2361 | break; | ||
2362 | #else | ||
2363 | ir->prev = REGSP_HINT(RID_FPRET); | ||
1783 | continue; | 2364 | continue; |
1784 | #endif | 2365 | #endif |
1785 | #if LJ_TARGET_X86ORX64 | 2366 | #if LJ_TARGET_X86ORX64 |
1786 | /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ | 2367 | /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ |
1787 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: | 2368 | case IR_BSHL: case IR_BSHR: case IR_BSAR: |
2369 | if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */ | ||
2370 | break; | ||
2371 | /* fallthrough */ | ||
2372 | case IR_BROL: case IR_BROR: | ||
1788 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { | 2373 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { |
1789 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); | 2374 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); |
1790 | if (inloop) | 2375 | if (inloop) |
@@ -1828,16 +2413,26 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1828 | { | 2413 | { |
1829 | ASMState as_; | 2414 | ASMState as_; |
1830 | ASMState *as = &as_; | 2415 | ASMState *as = &as_; |
1831 | MCode *origtop; | 2416 | |
2417 | /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */ | ||
2418 | { | ||
2419 | IRRef nins = T->nins; | ||
2420 | IRIns *ir = &T->ir[nins-1]; | ||
2421 | if (ir->o == IR_NOP || ir->o == IR_RENAME) { | ||
2422 | do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME); | ||
2423 | T->nins = nins; | ||
2424 | } | ||
2425 | } | ||
1832 | 2426 | ||
1833 | /* Ensure an initialized instruction beyond the last one for HIOP checks. */ | 2427 | /* Ensure an initialized instruction beyond the last one for HIOP checks. */ |
1834 | J->cur.nins = lj_ir_nextins(J); | 2428 | /* This also allows one RENAME to be added without reallocating curfinal. */ |
1835 | lj_ir_nop(&J->cur.ir[J->cur.nins]); | 2429 | as->orignins = lj_ir_nextins(J); |
2430 | lj_ir_nop(&J->cur.ir[as->orignins]); | ||
1836 | 2431 | ||
1837 | /* Setup initial state. Copy some fields to reduce indirections. */ | 2432 | /* Setup initial state. Copy some fields to reduce indirections. */ |
1838 | as->J = J; | 2433 | as->J = J; |
1839 | as->T = T; | 2434 | as->T = T; |
1840 | as->ir = T->ir; | 2435 | J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */ |
1841 | as->flags = J->flags; | 2436 | as->flags = J->flags; |
1842 | as->loopref = J->loopref; | 2437 | as->loopref = J->loopref; |
1843 | as->realign = NULL; | 2438 | as->realign = NULL; |
@@ -1845,17 +2440,46 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1845 | as->parent = J->parent ? traceref(J, J->parent) : NULL; | 2440 | as->parent = J->parent ? traceref(J, J->parent) : NULL; |
1846 | 2441 | ||
1847 | /* Reserve MCode memory. */ | 2442 | /* Reserve MCode memory. */ |
1848 | as->mctop = origtop = lj_mcode_reserve(J, &as->mcbot); | 2443 | as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot); |
1849 | as->mcp = as->mctop; | 2444 | as->mcp = as->mctop; |
1850 | as->mclim = as->mcbot + MCLIM_REDZONE; | 2445 | as->mclim = as->mcbot + MCLIM_REDZONE; |
1851 | asm_setup_target(as); | 2446 | asm_setup_target(as); |
1852 | 2447 | ||
1853 | do { | 2448 | /* |
2449 | ** This is a loop, because the MCode may have to be (re-)assembled | ||
2450 | ** multiple times: | ||
2451 | ** | ||
2452 | ** 1. as->realign is set (and the assembly aborted), if the arch-specific | ||
2453 | ** backend wants the MCode to be aligned differently. | ||
2454 | ** | ||
2455 | ** This is currently only the case on x86/x64, where small loops get | ||
2456 | ** an aligned loop body plus a short branch. Not much effort is wasted, | ||
2457 | ** because the abort happens very quickly and only once. | ||
2458 | ** | ||
2459 | ** 2. The IR is immovable, since the MCode embeds pointers to various | ||
2460 | ** constants inside the IR. But RENAMEs may need to be added to the IR | ||
2461 | ** during assembly, which might grow and reallocate the IR. We check | ||
2462 | ** at the end if the IR (in J->cur.ir) has actually grown, resize the | ||
2463 | ** copy (in J->curfinal.ir) and try again. | ||
2464 | ** | ||
2465 | ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have | ||
2466 | ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to | ||
2467 | ** always have one spare slot in the IR (see above), which means we | ||
2468 | ** have to redo the assembly for only ~2% of all traces. | ||
2469 | ** | ||
2470 | ** Very, very rarely, this needs to be done repeatedly, since the | ||
2471 | ** location of constants inside the IR (actually, reachability from | ||
2472 | ** a global pointer) may affect register allocation and thus the | ||
2473 | ** number of RENAMEs. | ||
2474 | */ | ||
2475 | for (;;) { | ||
1854 | as->mcp = as->mctop; | 2476 | as->mcp = as->mctop; |
1855 | #ifdef LUA_USE_ASSERT | 2477 | #ifdef LUA_USE_ASSERT |
1856 | as->mcp_prev = as->mcp; | 2478 | as->mcp_prev = as->mcp; |
1857 | #endif | 2479 | #endif |
1858 | as->curins = T->nins; | 2480 | as->ir = J->curfinal->ir; /* Use the copied IR. */ |
2481 | as->curins = J->cur.nins = as->orignins; | ||
2482 | |||
1859 | RA_DBG_START(); | 2483 | RA_DBG_START(); |
1860 | RA_DBGX((as, "===== STOP =====")); | 2484 | RA_DBGX((as, "===== STOP =====")); |
1861 | 2485 | ||
@@ -1874,7 +2498,11 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1874 | /* Assemble a trace in linear backwards order. */ | 2498 | /* Assemble a trace in linear backwards order. */ |
1875 | for (as->curins--; as->curins > as->stopins; as->curins--) { | 2499 | for (as->curins--; as->curins > as->stopins; as->curins--) { |
1876 | IRIns *ir = IR(as->curins); | 2500 | IRIns *ir = IR(as->curins); |
1877 | lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ | 2501 | /* 64 bit types handled by SPLIT for 32 bit archs. */ |
2502 | lj_assertA(!(LJ_32 && irt_isint64(ir->t)), | ||
2503 | "IR %04d has unsplit 64 bit type", | ||
2504 | (int)(ir - as->ir) - REF_BIAS); | ||
2505 | asm_snap_prev(as); | ||
1878 | if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) | 2506 | if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) |
1879 | continue; /* Dead-code elimination can be soooo easy. */ | 2507 | continue; /* Dead-code elimination can be soooo easy. */ |
1880 | if (irt_isguard(ir->t)) | 2508 | if (irt_isguard(ir->t)) |
@@ -1883,22 +2511,43 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1883 | checkmclim(as); | 2511 | checkmclim(as); |
1884 | asm_ir(as, ir); | 2512 | asm_ir(as, ir); |
1885 | } | 2513 | } |
1886 | } while (as->realign); /* Retry in case the MCode needs to be realigned. */ | ||
1887 | 2514 | ||
1888 | /* Emit head of trace. */ | 2515 | if (as->realign && J->curfinal->nins >= T->nins) |
1889 | RA_DBG_REF(); | 2516 | continue; /* Retry in case only the MCode needs to be realigned. */ |
1890 | checkmclim(as); | 2517 | |
1891 | if (as->gcsteps > 0) { | 2518 | /* Emit head of trace. */ |
1892 | as->curins = as->T->snap[0].ref; | 2519 | RA_DBG_REF(); |
1893 | asm_snap_prep(as); /* The GC check is a guard. */ | 2520 | checkmclim(as); |
1894 | asm_gc_check(as); | 2521 | if (as->gcsteps > 0) { |
2522 | as->curins = as->T->snap[0].ref; | ||
2523 | asm_snap_prep(as); /* The GC check is a guard. */ | ||
2524 | asm_gc_check(as); | ||
2525 | as->curins = as->stopins; | ||
2526 | } | ||
2527 | ra_evictk(as); | ||
2528 | if (as->parent) | ||
2529 | asm_head_side(as); | ||
2530 | else | ||
2531 | asm_head_root(as); | ||
2532 | asm_phi_fixup(as); | ||
2533 | |||
2534 | if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */ | ||
2535 | lj_assertA(J->curfinal->nk == T->nk, "unexpected IR constant growth"); | ||
2536 | memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins, | ||
2537 | (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */ | ||
2538 | T->nins = J->curfinal->nins; | ||
2539 | /* Fill mcofs of any unprocessed snapshots. */ | ||
2540 | as->curins = REF_FIRST; | ||
2541 | asm_snap_prev(as); | ||
2542 | break; /* Done. */ | ||
2543 | } | ||
2544 | |||
2545 | /* Otherwise try again with a bigger IR. */ | ||
2546 | lj_trace_free(J2G(J), J->curfinal); | ||
2547 | J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */ | ||
2548 | J->curfinal = lj_trace_alloc(J->L, T); | ||
2549 | as->realign = NULL; | ||
1895 | } | 2550 | } |
1896 | ra_evictk(as); | ||
1897 | if (as->parent) | ||
1898 | asm_head_side(as); | ||
1899 | else | ||
1900 | asm_head_root(as); | ||
1901 | asm_phi_fixup(as); | ||
1902 | 2551 | ||
1903 | RA_DBGX((as, "===== START ====")); | 2552 | RA_DBGX((as, "===== START ====")); |
1904 | RA_DBG_FLUSH(); | 2553 | RA_DBG_FLUSH(); |
@@ -1908,10 +2557,16 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1908 | /* Set trace entry point before fixing up tail to allow link to self. */ | 2557 | /* Set trace entry point before fixing up tail to allow link to self. */ |
1909 | T->mcode = as->mcp; | 2558 | T->mcode = as->mcp; |
1910 | T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0; | 2559 | T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0; |
1911 | if (!as->loopref) | 2560 | if (as->loopref) |
2561 | asm_loop_tail_fixup(as); | ||
2562 | else | ||
1912 | asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ | 2563 | asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ |
1913 | T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); | 2564 | T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); |
1914 | lj_mcode_sync(T->mcode, origtop); | 2565 | asm_snap_fixup_mcofs(as); |
2566 | #if LJ_TARGET_MCODE_FIXUP | ||
2567 | asm_mcode_fixup(T->mcode, T->szmcode); | ||
2568 | #endif | ||
2569 | lj_mcode_sync(T->mcode, as->mctoporig); | ||
1915 | } | 2570 | } |
1916 | 2571 | ||
1917 | #undef IR | 2572 | #undef IR |