diff options
author | Mike Pall <mike> | 2011-05-12 01:27:20 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2011-05-12 01:27:20 +0200 |
commit | 800349387a25b73e1c98f4dcf39a65b7559894fe (patch) | |
tree | 1d78d45553bff8354603662dc06bc8acdce2085c /src | |
parent | b79cdba3a06cbabb772535b5bce18d6460135845 (diff) | |
download | luajit-800349387a25b73e1c98f4dcf39a65b7559894fe.tar.gz luajit-800349387a25b73e1c98f4dcf39a65b7559894fe.tar.bz2 luajit-800349387a25b73e1c98f4dcf39a65b7559894fe.zip |
Disentangle target-specific parts of JIT assembler backend.
Diffstat (limited to 'src')
-rw-r--r-- | src/lj_asm.c | 341 | ||||
-rw-r--r-- | src/lj_target_x86.h | 6 |
2 files changed, 193 insertions, 154 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 46142f5c..be6c359e 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -41,7 +41,9 @@ typedef struct ASMState { | |||
41 | IRIns *ir; /* Copy of pointer to IR instructions/constants. */ | 41 | IRIns *ir; /* Copy of pointer to IR instructions/constants. */ |
42 | jit_State *J; /* JIT compiler state. */ | 42 | jit_State *J; /* JIT compiler state. */ |
43 | 43 | ||
44 | #if LJ_TARGET_X86ORX64 | ||
44 | x86ModRM mrm; /* Fused x86 address operand. */ | 45 | x86ModRM mrm; /* Fused x86 address operand. */ |
46 | #endif | ||
45 | 47 | ||
46 | RegSet freeset; /* Set of free registers. */ | 48 | RegSet freeset; /* Set of free registers. */ |
47 | RegSet modset; /* Set of registers modified inside the loop. */ | 49 | RegSet modset; /* Set of registers modified inside the loop. */ |
@@ -77,7 +79,7 @@ typedef struct ASMState { | |||
77 | MCode *mctop; /* Top of generated MCode. */ | 79 | MCode *mctop; /* Top of generated MCode. */ |
78 | MCode *mcloop; /* Pointer to loop MCode (or NULL). */ | 80 | MCode *mcloop; /* Pointer to loop MCode (or NULL). */ |
79 | MCode *invmcp; /* Points to invertible loop branch (or NULL). */ | 81 | MCode *invmcp; /* Points to invertible loop branch (or NULL). */ |
80 | MCode *testmcp; /* Pending opportunity to remove test r,r. */ | 82 | MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ |
81 | MCode *realign; /* Realign loop if not NULL. */ | 83 | MCode *realign; /* Realign loop if not NULL. */ |
82 | 84 | ||
83 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ | 85 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ |
@@ -102,10 +104,6 @@ typedef struct ASMState { | |||
102 | ((o) == IR_ALOAD || (o) == IR_HLOAD || (o) == IR_ULOAD || \ | 104 | ((o) == IR_ALOAD || (o) == IR_HLOAD || (o) == IR_ULOAD || \ |
103 | (o) == IR_FLOAD || (o) == IR_XLOAD || (o) == IR_SLOAD || (o) == IR_VLOAD) | 105 | (o) == IR_FLOAD || (o) == IR_XLOAD || (o) == IR_SLOAD || (o) == IR_VLOAD) |
104 | 106 | ||
105 | /* Instruction selection for XMM moves. */ | ||
106 | #define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS) | ||
107 | #define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD) | ||
108 | |||
109 | /* Sparse limit checks using a red zone before the actual limit. */ | 107 | /* Sparse limit checks using a red zone before the actual limit. */ |
110 | #define MCLIM_REDZONE 64 | 108 | #define MCLIM_REDZONE 64 |
111 | #define checkmclim(as) \ | 109 | #define checkmclim(as) \ |
@@ -116,7 +114,23 @@ static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as) | |||
116 | lj_mcode_limiterr(as->J, (size_t)(as->mctop - as->mcp + 4*MCLIM_REDZONE)); | 114 | lj_mcode_limiterr(as->J, (size_t)(as->mctop - as->mcp + 4*MCLIM_REDZONE)); |
117 | } | 115 | } |
118 | 116 | ||
119 | /* -- Emit x86 instructions ----------------------------------------------- */ | 117 | /* Arch-specific field offsets. */ |
118 | static const uint8_t field_ofs[IRFL__MAX+1] = { | ||
119 | #define FLOFS(name, ofs) (uint8_t)(ofs), | ||
120 | IRFLDEF(FLOFS) | ||
121 | #undef FLOFS | ||
122 | 0 | ||
123 | }; | ||
124 | |||
125 | /* Define this if you want to run LuaJIT with Valgrind. */ | ||
126 | #ifdef LUAJIT_USE_VALGRIND | ||
127 | #include <valgrind/valgrind.h> | ||
128 | #define VG_INVALIDATE(p, sz) VALGRIND_DISCARD_TRANSLATIONS(p, sz) | ||
129 | #else | ||
130 | #define VG_INVALIDATE(p, sz) ((void)0) | ||
131 | #endif | ||
132 | |||
133 | /* -- Emit basic instructions --------------------------------------------- */ | ||
120 | 134 | ||
121 | #define MODRM(mode, r1, r2) ((MCode)((mode)+(((r1)&7)<<3)+((r2)&7))) | 135 | #define MODRM(mode, r1, r2) ((MCode)((mode)+(((r1)&7)<<3)+((r2)&7))) |
122 | 136 | ||
@@ -338,16 +352,6 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb) | |||
338 | as->mcp = emit_opm(xo, mode, rr, rb, p, 0); | 352 | as->mcp = emit_opm(xo, mode, rr, rb, p, 0); |
339 | } | 353 | } |
340 | 354 | ||
341 | static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | ||
342 | { | ||
343 | if (ofs) { | ||
344 | if ((as->flags & JIT_F_LEA_AGU)) | ||
345 | emit_rmro(as, XO_LEA, r, r, ofs); | ||
346 | else | ||
347 | emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs); | ||
348 | } | ||
349 | } | ||
350 | |||
351 | /* op rm/mrm, i */ | 355 | /* op rm/mrm, i */ |
352 | static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i) | 356 | static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i) |
353 | { | 357 | { |
@@ -362,7 +366,11 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i) | |||
362 | emit_mrm(as, xo, (Reg)(xg & 7) | (rb & REX_64), (rb & ~REX_64)); | 366 | emit_mrm(as, xo, (Reg)(xg & 7) | (rb & REX_64), (rb & ~REX_64)); |
363 | } | 367 | } |
364 | 368 | ||
365 | /* -- Emit moves ---------------------------------------------------------- */ | 369 | /* -- Emit loads/stores --------------------------------------------------- */ |
370 | |||
371 | /* Instruction selection for XMM moves. */ | ||
372 | #define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS) | ||
373 | #define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD) | ||
366 | 374 | ||
367 | /* mov [base+ofs], i */ | 375 | /* mov [base+ofs], i */ |
368 | static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) | 376 | static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) |
@@ -435,7 +443,7 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv) | |||
435 | emit_rma(as, XMM_MOVRM(as), r, &tv->n); | 443 | emit_rma(as, XMM_MOVRM(as), r, &tv->n); |
436 | } | 444 | } |
437 | 445 | ||
438 | /* -- Emit branches ------------------------------------------------------- */ | 446 | /* -- Emit control-flow instructions -------------------------------------- */ |
439 | 447 | ||
440 | /* Label for short jumps. */ | 448 | /* Label for short jumps. */ |
441 | typedef MCode *MCLabel; | 449 | typedef MCode *MCLabel; |
@@ -520,6 +528,59 @@ static void emit_call_(ASMState *as, MCode *target) | |||
520 | 528 | ||
521 | #define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f)) | 529 | #define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f)) |
522 | 530 | ||
531 | /* -- Emit generic operations --------------------------------------------- */ | ||
532 | |||
533 | /* Use 64 bit operations to handle 64 bit IR types. */ | ||
534 | #if LJ_64 | ||
535 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) | ||
536 | #else | ||
537 | #define REX_64IR(ir, r) (r) | ||
538 | #endif | ||
539 | |||
540 | /* Generic move between two regs. */ | ||
541 | static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | ||
542 | { | ||
543 | UNUSED(ir); | ||
544 | if (dst < RID_MAX_GPR) | ||
545 | emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); | ||
546 | else | ||
547 | emit_rr(as, XMM_MOVRR(as), dst, src); | ||
548 | } | ||
549 | |||
550 | /* Generic load of register from stack slot. */ | ||
551 | static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | ||
552 | { | ||
553 | if (r < RID_MAX_GPR) | ||
554 | emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); | ||
555 | else | ||
556 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs); | ||
557 | } | ||
558 | |||
559 | /* Generic store of register to stack slot. */ | ||
560 | static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | ||
561 | { | ||
562 | if (r < RID_MAX_GPR) | ||
563 | emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, ofs); | ||
564 | else | ||
565 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, RID_ESP, ofs); | ||
566 | } | ||
567 | |||
568 | /* Add offset to pointer. */ | ||
569 | static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | ||
570 | { | ||
571 | if (ofs) { | ||
572 | if ((as->flags & JIT_F_LEA_AGU)) | ||
573 | emit_rmro(as, XO_LEA, r, r, ofs); | ||
574 | else | ||
575 | emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs); | ||
576 | } | ||
577 | } | ||
578 | |||
579 | #define emit_spsub(as, ofs) emit_addptr(as, RID_ESP|REX_64, -(ofs)) | ||
580 | |||
581 | /* Prefer rematerialization of BASE/L from global_State over spills. */ | ||
582 | #define emit_canremat(ref) ((ref) <= REF_BASE) | ||
583 | |||
523 | /* -- Register allocator debugging ---------------------------------------- */ | 584 | /* -- Register allocator debugging ---------------------------------------- */ |
524 | 585 | ||
525 | /* #define LUAJIT_DEBUG_RA */ | 586 | /* #define LUAJIT_DEBUG_RA */ |
@@ -533,7 +594,7 @@ static void emit_call_(ASMState *as, MCode *target) | |||
533 | static const char *const ra_regname[] = { | 594 | static const char *const ra_regname[] = { |
534 | GPRDEF(RIDNAME) | 595 | GPRDEF(RIDNAME) |
535 | FPRDEF(RIDNAME) | 596 | FPRDEF(RIDNAME) |
536 | "mrm", | 597 | VRIDDEF(RIDNAME) |
537 | NULL | 598 | NULL |
538 | }; | 599 | }; |
539 | #undef RIDNAME | 600 | #undef RIDNAME |
@@ -591,7 +652,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) | |||
591 | p += sprintf(p, "K%03d", REF_BIAS - ref); | 652 | p += sprintf(p, "K%03d", REF_BIAS - ref); |
592 | } else if (e[1] == 's') { | 653 | } else if (e[1] == 's') { |
593 | uint32_t slot = va_arg(argp, uint32_t); | 654 | uint32_t slot = va_arg(argp, uint32_t); |
594 | p += sprintf(p, "[esp+0x%x]", sps_scale(slot)); | 655 | p += sprintf(p, "[sp+0x%x]", sps_scale(slot)); |
595 | } else { | 656 | } else { |
596 | lua_assert(0); | 657 | lua_assert(0); |
597 | } | 658 | } |
@@ -634,14 +695,17 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) | |||
634 | /* Setup register allocator. */ | 695 | /* Setup register allocator. */ |
635 | static void ra_setup(ASMState *as) | 696 | static void ra_setup(ASMState *as) |
636 | { | 697 | { |
698 | Reg r; | ||
637 | /* Initially all regs (except the stack pointer) are free for use. */ | 699 | /* Initially all regs (except the stack pointer) are free for use. */ |
638 | as->freeset = RSET_ALL; | 700 | as->freeset = RSET_INIT; |
639 | as->modset = RSET_EMPTY; | 701 | as->modset = RSET_EMPTY; |
640 | as->weakset = RSET_EMPTY; | 702 | as->weakset = RSET_EMPTY; |
641 | as->phiset = RSET_EMPTY; | 703 | as->phiset = RSET_EMPTY; |
642 | memset(as->phireg, 0, sizeof(as->phireg)); | 704 | memset(as->phireg, 0, sizeof(as->phireg)); |
643 | memset(as->cost, 0, sizeof(as->cost)); | 705 | memset(as->cost, 0, sizeof(as->cost)); |
644 | as->cost[RID_ESP] = REGCOST(~0u, 0u); | 706 | for (r = RID_MIN_GPR; r < RID_MAX; r++) |
707 | if (!rset_test(RSET_INIT, r)) | ||
708 | as->cost[r] = REGCOST(~0u, 0u); | ||
645 | } | 709 | } |
646 | 710 | ||
647 | /* Rematerialize constants. */ | 711 | /* Rematerialize constants. */ |
@@ -655,11 +719,11 @@ static Reg ra_rematk(ASMState *as, IRIns *ir) | |||
655 | RA_DBGX((as, "remat $i $r", ir, r)); | 719 | RA_DBGX((as, "remat $i $r", ir, r)); |
656 | if (ir->o == IR_KNUM) { | 720 | if (ir->o == IR_KNUM) { |
657 | emit_loadn(as, r, ir_knum(ir)); | 721 | emit_loadn(as, r, ir_knum(ir)); |
658 | } else if (ir->o == IR_BASE) { | 722 | } else if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { |
659 | ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ | 723 | ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ |
660 | emit_getgl(as, r, jit_base); | 724 | emit_getgl(as, r, jit_base); |
661 | } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */ | 725 | } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { |
662 | lua_assert(irt_isnil(ir->t)); | 726 | lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ |
663 | emit_getgl(as, r, jit_L); | 727 | emit_getgl(as, r, jit_L); |
664 | #if LJ_64 | 728 | #if LJ_64 |
665 | } else if (ir->o == IR_KINT64) { | 729 | } else if (ir->o == IR_KINT64) { |
@@ -708,28 +772,11 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref) | |||
708 | return r; | 772 | return r; |
709 | } | 773 | } |
710 | 774 | ||
711 | /* Use 64 bit operations to handle 64 bit IR types. */ | ||
712 | #if LJ_64 | ||
713 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) | ||
714 | #else | ||
715 | #define REX_64IR(ir, r) (r) | ||
716 | #endif | ||
717 | |||
718 | /* Generic move between two regs. */ | ||
719 | static void ra_movrr(ASMState *as, IRIns *ir, Reg r1, Reg r2) | ||
720 | { | ||
721 | UNUSED(ir); | ||
722 | if (r1 < RID_MAX_GPR) | ||
723 | emit_rr(as, XO_MOV, REX_64IR(ir, r1), r2); | ||
724 | else | ||
725 | emit_rr(as, XMM_MOVRR(as), r1, r2); | ||
726 | } | ||
727 | |||
728 | /* Restore a register (marked as free). Rematerialize or force a spill. */ | 775 | /* Restore a register (marked as free). Rematerialize or force a spill. */ |
729 | static Reg ra_restore(ASMState *as, IRRef ref) | 776 | static Reg ra_restore(ASMState *as, IRRef ref) |
730 | { | 777 | { |
731 | IRIns *ir = IR(ref); | 778 | IRIns *ir = IR(ref); |
732 | if (irref_isk(ref) || ref == REF_BASE) { | 779 | if (emit_canremat(ref)) { |
733 | return ra_rematk(as, ir); | 780 | return ra_rematk(as, ir); |
734 | } else { | 781 | } else { |
735 | int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ | 782 | int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ |
@@ -740,11 +787,7 @@ static Reg ra_restore(ASMState *as, IRRef ref) | |||
740 | if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ | 787 | if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ |
741 | ra_modified(as, r); | 788 | ra_modified(as, r); |
742 | RA_DBGX((as, "restore $i $r", ir, r)); | 789 | RA_DBGX((as, "restore $i $r", ir, r)); |
743 | if (r < RID_MAX_GPR) | 790 | emit_spload(as, ir, r, ofs); |
744 | emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); | ||
745 | else | ||
746 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, | ||
747 | r, RID_ESP, ofs); | ||
748 | } | 791 | } |
749 | return r; | 792 | return r; |
750 | } | 793 | } |
@@ -754,16 +797,13 @@ static Reg ra_restore(ASMState *as, IRRef ref) | |||
754 | static void ra_save(ASMState *as, IRIns *ir, Reg r) | 797 | static void ra_save(ASMState *as, IRIns *ir, Reg r) |
755 | { | 798 | { |
756 | RA_DBGX((as, "save $i $r", ir, r)); | 799 | RA_DBGX((as, "save $i $r", ir, r)); |
757 | if (r < RID_MAX_GPR) | 800 | emit_spstore(as, ir, r, sps_scale(ir->s)); |
758 | emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, sps_scale(ir->s)); | ||
759 | else | ||
760 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, | ||
761 | r, RID_ESP, sps_scale(ir->s)); | ||
762 | } | 801 | } |
763 | 802 | ||
764 | #define MINCOST(r) \ | 803 | #define MINCOST(name) \ |
765 | if (LJ_LIKELY(allow&RID2RSET(r)) && as->cost[r] < cost) \ | 804 | if (rset_test(RSET_ALL, RID_##name) && \ |
766 | cost = as->cost[r] | 805 | LJ_LIKELY(allow&RID2RSET(RID_##name)) && as->cost[RID_##name] < cost) \ |
806 | cost = as->cost[RID_##name]; | ||
767 | 807 | ||
768 | /* Evict the register with the lowest cost, forcing a restore. */ | 808 | /* Evict the register with the lowest cost, forcing a restore. */ |
769 | static Reg ra_evict(ASMState *as, RegSet allow) | 809 | static Reg ra_evict(ASMState *as, RegSet allow) |
@@ -772,19 +812,9 @@ static Reg ra_evict(ASMState *as, RegSet allow) | |||
772 | RegCost cost = ~(RegCost)0; | 812 | RegCost cost = ~(RegCost)0; |
773 | lua_assert(allow != RSET_EMPTY); | 813 | lua_assert(allow != RSET_EMPTY); |
774 | if (allow < RID2RSET(RID_MAX_GPR)) { | 814 | if (allow < RID2RSET(RID_MAX_GPR)) { |
775 | MINCOST(RID_EAX);MINCOST(RID_ECX);MINCOST(RID_EDX);MINCOST(RID_EBX); | 815 | GPRDEF(MINCOST) |
776 | MINCOST(RID_EBP);MINCOST(RID_ESI);MINCOST(RID_EDI); | ||
777 | #if LJ_64 | ||
778 | MINCOST(RID_R8D);MINCOST(RID_R9D);MINCOST(RID_R10D);MINCOST(RID_R11D); | ||
779 | MINCOST(RID_R12D);MINCOST(RID_R13D);MINCOST(RID_R14D);MINCOST(RID_R15D); | ||
780 | #endif | ||
781 | } else { | 816 | } else { |
782 | MINCOST(RID_XMM0);MINCOST(RID_XMM1);MINCOST(RID_XMM2);MINCOST(RID_XMM3); | 817 | FPRDEF(MINCOST) |
783 | MINCOST(RID_XMM4);MINCOST(RID_XMM5);MINCOST(RID_XMM6);MINCOST(RID_XMM7); | ||
784 | #if LJ_64 | ||
785 | MINCOST(RID_XMM8);MINCOST(RID_XMM9);MINCOST(RID_XMM10);MINCOST(RID_XMM11); | ||
786 | MINCOST(RID_XMM12);MINCOST(RID_XMM13);MINCOST(RID_XMM14);MINCOST(RID_XMM15); | ||
787 | #endif | ||
788 | } | 818 | } |
789 | ref = regcost_ref(cost); | 819 | ref = regcost_ref(cost); |
790 | lua_assert(ref >= as->T->nk && ref < as->T->nins); | 820 | lua_assert(ref >= as->T->nk && ref < as->T->nins); |
@@ -836,7 +866,7 @@ static void ra_evictk(ASMState *as) | |||
836 | while (work) { | 866 | while (work) { |
837 | Reg r = rset_pickbot(work); | 867 | Reg r = rset_pickbot(work); |
838 | IRRef ref = regcost_ref(as->cost[r]); | 868 | IRRef ref = regcost_ref(as->cost[r]); |
839 | if (irref_isk(ref)) { | 869 | if (emit_canremat(ref)) { |
840 | ra_rematk(as, IR(ref)); | 870 | ra_rematk(as, IR(ref)); |
841 | checkmclim(as); | 871 | checkmclim(as); |
842 | } | 872 | } |
@@ -861,7 +891,7 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow) | |||
861 | if (rset_test(pick, r)) /* Use hint register if possible. */ | 891 | if (rset_test(pick, r)) /* Use hint register if possible. */ |
862 | goto found; | 892 | goto found; |
863 | /* Rematerialization is cheaper than missing a hint. */ | 893 | /* Rematerialization is cheaper than missing a hint. */ |
864 | if (rset_test(allow, r) && irref_isk(regcost_ref(as->cost[r]))) { | 894 | if (rset_test(allow, r) && emit_canremat(regcost_ref(as->cost[r]))) { |
865 | ra_rematk(as, IR(regcost_ref(as->cost[r]))); | 895 | ra_rematk(as, IR(regcost_ref(as->cost[r]))); |
866 | goto found; | 896 | goto found; |
867 | } | 897 | } |
@@ -873,11 +903,9 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow) | |||
873 | pick &= ~as->modset; | 903 | pick &= ~as->modset; |
874 | r = rset_pickbot(pick); /* Reduce conflicts with inverse allocation. */ | 904 | r = rset_pickbot(pick); /* Reduce conflicts with inverse allocation. */ |
875 | } else { | 905 | } else { |
876 | #if LJ_64 | ||
877 | /* We've got plenty of regs, so get callee-save regs if possible. */ | 906 | /* We've got plenty of regs, so get callee-save regs if possible. */ |
878 | if ((pick & ~RSET_SCRATCH)) | 907 | if (RID_NUM_GPR > 8 && (pick & ~RSET_SCRATCH)) |
879 | pick &= ~RSET_SCRATCH; | 908 | pick &= ~RSET_SCRATCH; |
880 | #endif | ||
881 | r = rset_picktop(pick); | 909 | r = rset_picktop(pick); |
882 | } | 910 | } |
883 | } else { | 911 | } else { |
@@ -916,7 +944,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up) | |||
916 | rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ | 944 | rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ |
917 | ra_noweak(as, up); | 945 | ra_noweak(as, up); |
918 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); | 946 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); |
919 | ra_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ | 947 | emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ |
920 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ | 948 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ |
921 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); | 949 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); |
922 | ren = tref_ref(lj_ir_emit(as->J)); | 950 | ren = tref_ref(lj_ir_emit(as->J)); |
@@ -949,7 +977,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r) | |||
949 | Reg dest = ra_dest(as, ir, RID2RSET(r)); | 977 | Reg dest = ra_dest(as, ir, RID2RSET(r)); |
950 | if (dest != r) { | 978 | if (dest != r) { |
951 | ra_scratch(as, RID2RSET(r)); | 979 | ra_scratch(as, RID2RSET(r)); |
952 | ra_movrr(as, ir, dest, r); | 980 | emit_movrr(as, ir, dest, r); |
953 | } | 981 | } |
954 | } | 982 | } |
955 | 983 | ||
@@ -993,7 +1021,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) | |||
993 | ra_modified(as, left); | 1021 | ra_modified(as, left); |
994 | ra_rename(as, left, dest); | 1022 | ra_rename(as, left, dest); |
995 | } else { | 1023 | } else { |
996 | ra_movrr(as, ir, dest, left); | 1024 | emit_movrr(as, ir, dest, left); |
997 | } | 1025 | } |
998 | } | 1026 | } |
999 | } | 1027 | } |
@@ -1152,14 +1180,6 @@ static void asm_guardcc(ASMState *as, int cc) | |||
1152 | 1180 | ||
1153 | /* -- Memory operand fusion ----------------------------------------------- */ | 1181 | /* -- Memory operand fusion ----------------------------------------------- */ |
1154 | 1182 | ||
1155 | /* Arch-specific field offsets. */ | ||
1156 | static const uint8_t field_ofs[IRFL__MAX+1] = { | ||
1157 | #define FLOFS(name, ofs) (uint8_t)(ofs), | ||
1158 | IRFLDEF(FLOFS) | ||
1159 | #undef FLOFS | ||
1160 | 0 | ||
1161 | }; | ||
1162 | |||
1163 | /* Limit linear search to this distance. Avoids O(n^2) behavior. */ | 1183 | /* Limit linear search to this distance. Avoids O(n^2) behavior. */ |
1164 | #define CONFLICT_SEARCH_LIM 31 | 1184 | #define CONFLICT_SEARCH_LIM 31 |
1165 | 1185 | ||
@@ -1503,7 +1523,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
1503 | lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ | 1523 | lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ |
1504 | if (ra_hasreg(ir->r)) { | 1524 | if (ra_hasreg(ir->r)) { |
1505 | ra_noweak(as, ir->r); | 1525 | ra_noweak(as, ir->r); |
1506 | ra_movrr(as, ir, r, ir->r); | 1526 | emit_movrr(as, ir, r, ir->r); |
1507 | } else { | 1527 | } else { |
1508 | ra_allocref(as, ref, RID2RSET(r)); | 1528 | ra_allocref(as, ref, RID2RSET(r)); |
1509 | } | 1529 | } |
@@ -2880,8 +2900,8 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa) | |||
2880 | RegSet allow = RSET_GPR; | 2900 | RegSet allow = RSET_GPR; |
2881 | Reg dest, right; | 2901 | Reg dest, right; |
2882 | int32_t k = 0; | 2902 | int32_t k = 0; |
2883 | if (as->testmcp == as->mcp) { /* Drop test r,r instruction. */ | 2903 | if (as->flagmcp == as->mcp) { /* Drop test r,r instruction. */ |
2884 | as->testmcp = NULL; | 2904 | as->flagmcp = NULL; |
2885 | as->mcp += (LJ_64 && *as->mcp != XI_TEST) ? 3 : 2; | 2905 | as->mcp += (LJ_64 && *as->mcp != XI_TEST) ? 3 : 2; |
2886 | } | 2906 | } |
2887 | right = IR(rref)->r; | 2907 | right = IR(rref)->r; |
@@ -2996,7 +3016,7 @@ static void asm_add(ASMState *as, IRIns *ir) | |||
2996 | { | 3016 | { |
2997 | if (irt_isnum(ir->t)) | 3017 | if (irt_isnum(ir->t)) |
2998 | asm_fparith(as, ir, XO_ADDSD); | 3018 | asm_fparith(as, ir, XO_ADDSD); |
2999 | else if ((as->flags & JIT_F_LEA_AGU) || as->testmcp == as->mcp || | 3019 | else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp || |
3000 | irt_is64(ir->t) || !asm_lea(as, ir)) | 3020 | irt_is64(ir->t) || !asm_lea(as, ir)) |
3001 | asm_intarith(as, ir, XOg_ADD); | 3021 | asm_intarith(as, ir, XOg_ADD); |
3002 | } | 3022 | } |
@@ -3215,7 +3235,7 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | |||
3215 | /* Use test r,r instead of cmp r,0. */ | 3235 | /* Use test r,r instead of cmp r,0. */ |
3216 | emit_rr(as, XO_TEST, r64 + left, left); | 3236 | emit_rr(as, XO_TEST, r64 + left, left); |
3217 | if (irl+1 == ir) /* Referencing previous ins? */ | 3237 | if (irl+1 == ir) /* Referencing previous ins? */ |
3218 | as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ | 3238 | as->flagmcp = as->mcp; /* Set flag to drop test r,r if possible. */ |
3219 | } else { | 3239 | } else { |
3220 | emit_gmrmi(as, XG_ARITHi(XOg_CMP), r64 + left, imm); | 3240 | emit_gmrmi(as, XG_ARITHi(XOg_CMP), r64 + left, imm); |
3221 | } | 3241 | } |
@@ -3273,7 +3293,7 @@ static void asm_comp_int64(ASMState *as, IRIns *ir) | |||
3273 | 3293 | ||
3274 | /* All register allocations must be performed _before_ this point. */ | 3294 | /* All register allocations must be performed _before_ this point. */ |
3275 | l_around = emit_label(as); | 3295 | l_around = emit_label(as); |
3276 | as->invmcp = as->testmcp = NULL; /* Cannot use these optimizations. */ | 3296 | as->invmcp = as->flagmcp = NULL; /* Cannot use these optimizations. */ |
3277 | 3297 | ||
3278 | /* Loword comparison and branch. */ | 3298 | /* Loword comparison and branch. */ |
3279 | asm_guardcc(as, cc >> 4); /* Always use unsigned compare for loword. */ | 3299 | asm_guardcc(as, cc >> 4); /* Always use unsigned compare for loword. */ |
@@ -3620,7 +3640,7 @@ static void asm_phi(ASMState *as, IRIns *ir) | |||
3620 | r = ra_allocref(as, ir->op2, allow); | 3640 | r = ra_allocref(as, ir->op2, allow); |
3621 | } else { /* Duplicate right PHI, need a copy (rare). */ | 3641 | } else { /* Duplicate right PHI, need a copy (rare). */ |
3622 | r = ra_scratch(as, allow); | 3642 | r = ra_scratch(as, allow); |
3623 | ra_movrr(as, irr, r, irr->r); | 3643 | emit_movrr(as, irr, r, irr->r); |
3624 | } | 3644 | } |
3625 | ir->r = (uint8_t)r; | 3645 | ir->r = (uint8_t)r; |
3626 | rset_set(as->phiset, r); | 3646 | rset_set(as->phiset, r); |
@@ -3690,7 +3710,7 @@ static void asm_loop(ASMState *as) | |||
3690 | if (as->gcsteps) | 3710 | if (as->gcsteps) |
3691 | asm_gc_check(as); | 3711 | asm_gc_check(as); |
3692 | /* LOOP marks the transition from the variant to the invariant part. */ | 3712 | /* LOOP marks the transition from the variant to the invariant part. */ |
3693 | as->testmcp = as->invmcp = NULL; | 3713 | as->flagmcp = as->invmcp = NULL; |
3694 | as->sectref = 0; | 3714 | as->sectref = 0; |
3695 | if (!neverfuse(as)) as->fuseref = 0; | 3715 | if (!neverfuse(as)) as->fuseref = 0; |
3696 | asm_phi_shuffle(as); | 3716 | asm_phi_shuffle(as); |
@@ -3732,7 +3752,7 @@ static void asm_head_root(ASMState *as) | |||
3732 | emit_setgli(as, vmstate, (int32_t)as->T->traceno); | 3752 | emit_setgli(as, vmstate, (int32_t)as->T->traceno); |
3733 | spadj = asm_stack_adjust(as); | 3753 | spadj = asm_stack_adjust(as); |
3734 | as->T->spadjust = (uint16_t)spadj; | 3754 | as->T->spadjust = (uint16_t)spadj; |
3735 | emit_addptr(as, RID_ESP|REX_64, -spadj); | 3755 | emit_spsub(as, spadj); |
3736 | /* Root traces assume a checked stack for the starting proto. */ | 3756 | /* Root traces assume a checked stack for the starting proto. */ |
3737 | as->T->topslot = gcref(as->T->startpt)->pt.framesize; | 3757 | as->T->topslot = gcref(as->T->startpt)->pt.framesize; |
3738 | } | 3758 | } |
@@ -3846,7 +3866,7 @@ static void asm_head_side(ASMState *as) | |||
3846 | 3866 | ||
3847 | /* Store trace number and adjust stack frame relative to the parent. */ | 3867 | /* Store trace number and adjust stack frame relative to the parent. */ |
3848 | emit_setgli(as, vmstate, (int32_t)as->T->traceno); | 3868 | emit_setgli(as, vmstate, (int32_t)as->T->traceno); |
3849 | emit_addptr(as, RID_ESP|REX_64, -spdelta); | 3869 | emit_spsub(as, spdelta); |
3850 | 3870 | ||
3851 | /* Restore target registers from parent spill slots. */ | 3871 | /* Restore target registers from parent spill slots. */ |
3852 | if (pass3) { | 3872 | if (pass3) { |
@@ -3859,10 +3879,7 @@ static void asm_head_side(ASMState *as) | |||
3859 | if (ra_hasspill(regsp_spill(rs))) { | 3879 | if (ra_hasspill(regsp_spill(rs))) { |
3860 | int32_t ofs = sps_scale(regsp_spill(rs)); | 3880 | int32_t ofs = sps_scale(regsp_spill(rs)); |
3861 | ra_free(as, r); | 3881 | ra_free(as, r); |
3862 | if (r < RID_MAX_GPR) | 3882 | emit_spload(as, ir, r, ofs); |
3863 | emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); | ||
3864 | else | ||
3865 | emit_rmro(as, XMM_MOVRM(as), r, RID_ESP, ofs); | ||
3866 | checkmclim(as); | 3883 | checkmclim(as); |
3867 | } | 3884 | } |
3868 | } | 3885 | } |
@@ -3879,7 +3896,7 @@ static void asm_head_side(ASMState *as) | |||
3879 | rset_clear(live, rp); | 3896 | rset_clear(live, rp); |
3880 | rset_clear(allow, rp); | 3897 | rset_clear(allow, rp); |
3881 | ra_free(as, ir->r); | 3898 | ra_free(as, ir->r); |
3882 | ra_movrr(as, ir, ir->r, rp); | 3899 | emit_movrr(as, ir, ir->r, rp); |
3883 | checkmclim(as); | 3900 | checkmclim(as); |
3884 | } | 3901 | } |
3885 | 3902 | ||
@@ -4005,6 +4022,30 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |||
4005 | as->mctop = p; | 4022 | as->mctop = p; |
4006 | } | 4023 | } |
4007 | 4024 | ||
4025 | /* Prepare tail of code. */ | ||
4026 | static void asm_tail_prep(ASMState *as) | ||
4027 | { | ||
4028 | MCode *p = as->mctop; | ||
4029 | /* Realign and leave room for backwards loop branch or exit branch. */ | ||
4030 | if (as->realign) { | ||
4031 | int i = ((int)(intptr_t)as->realign) & 15; | ||
4032 | /* Fill unused mcode tail with NOPs to make the prefetcher happy. */ | ||
4033 | while (i-- > 0) | ||
4034 | *--p = XI_NOP; | ||
4035 | as->mctop = p; | ||
4036 | p -= (as->loopinv ? 5 : 2); /* Space for short/near jmp. */ | ||
4037 | } else { | ||
4038 | p -= 5; /* Space for exit branch (near jmp). */ | ||
4039 | } | ||
4040 | if (as->loopref) { | ||
4041 | as->invmcp = as->mcp = p; | ||
4042 | } else { | ||
4043 | /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ | ||
4044 | as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0)); | ||
4045 | as->invmcp = NULL; | ||
4046 | } | ||
4047 | } | ||
4048 | |||
4008 | /* -- Instruction dispatch ------------------------------------------------ */ | 4049 | /* -- Instruction dispatch ------------------------------------------------ */ |
4009 | 4050 | ||
4010 | /* Assemble a single instruction. */ | 4051 | /* Assemble a single instruction. */ |
@@ -4160,22 +4201,6 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
4160 | } | 4201 | } |
4161 | } | 4202 | } |
4162 | 4203 | ||
4163 | /* Assemble a trace in linear backwards order. */ | ||
4164 | static void asm_trace(ASMState *as) | ||
4165 | { | ||
4166 | for (as->curins--; as->curins > as->stopins; as->curins--) { | ||
4167 | IRIns *ir = IR(as->curins); | ||
4168 | lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ | ||
4169 | if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) | ||
4170 | continue; /* Dead-code elimination can be soooo easy. */ | ||
4171 | if (irt_isguard(ir->t)) | ||
4172 | asm_snap_prep(as); | ||
4173 | RA_DBG_REF(); | ||
4174 | checkmclim(as); | ||
4175 | asm_ir(as, ir); | ||
4176 | } | ||
4177 | } | ||
4178 | |||
4179 | /* -- Trace setup --------------------------------------------------------- */ | 4204 | /* -- Trace setup --------------------------------------------------------- */ |
4180 | 4205 | ||
4181 | /* Ensure there are enough stack slots for call arguments. */ | 4206 | /* Ensure there are enough stack slots for call arguments. */ |
@@ -4215,9 +4240,16 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
4215 | #endif | 4240 | #endif |
4216 | } | 4241 | } |
4217 | 4242 | ||
4243 | /* Target-specific setup. */ | ||
4244 | static void asm_setup_target(ASMState *as) | ||
4245 | { | ||
4246 | asm_exitstub_setup(as, as->T->nsnap); | ||
4247 | } | ||
4248 | |||
4218 | /* Clear reg/sp for all instructions and add register hints. */ | 4249 | /* Clear reg/sp for all instructions and add register hints. */ |
4219 | static void asm_setup_regsp(ASMState *as, GCtrace *T) | 4250 | static void asm_setup_regsp(ASMState *as) |
4220 | { | 4251 | { |
4252 | GCtrace *T = as->T; | ||
4221 | IRRef i, nins; | 4253 | IRRef i, nins; |
4222 | int inloop; | 4254 | int inloop; |
4223 | 4255 | ||
@@ -4289,10 +4321,8 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
4289 | #endif | 4321 | #endif |
4290 | /* C calls evict all scratch regs and return results in RID_RET. */ | 4322 | /* C calls evict all scratch regs and return results in RID_RET. */ |
4291 | case IR_SNEW: case IR_XSNEW: case IR_NEWREF: | 4323 | case IR_SNEW: case IR_XSNEW: case IR_NEWREF: |
4292 | #if !LJ_64 | 4324 | if (REGARG_NUMGPR < 3 && as->evenspill < 3) |
4293 | if (as->evenspill < 3) /* lj_str_new and lj_tab_newkey need 3 args. */ | 4325 | as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ |
4294 | as->evenspill = 3; | ||
4295 | #endif | ||
4296 | case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: | 4326 | case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: |
4297 | ir->prev = REGSP_HINT(RID_RET); | 4327 | ir->prev = REGSP_HINT(RID_RET); |
4298 | if (inloop) | 4328 | if (inloop) |
@@ -4304,12 +4334,18 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
4304 | break; | 4334 | break; |
4305 | case IR_POW: | 4335 | case IR_POW: |
4306 | if (irt_isnum(ir->t)) { | 4336 | if (irt_isnum(ir->t)) { |
4337 | #if LJ_TARGET_X86ORX64 | ||
4307 | ir->prev = REGSP_HINT(RID_XMM0); | 4338 | ir->prev = REGSP_HINT(RID_XMM0); |
4308 | if (inloop) | 4339 | if (inloop) |
4309 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); | 4340 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); |
4341 | #else | ||
4342 | ir->prev = REGSP_HINT(RID_FPRET); | ||
4343 | if (inloop) | ||
4344 | as->modset |= RSET_SCRATCH; | ||
4345 | #endif | ||
4310 | continue; | 4346 | continue; |
4311 | } | 4347 | } |
4312 | /* fallthrough */ | 4348 | /* fallthrough for integer POW */ |
4313 | case IR_DIV: case IR_MOD: | 4349 | case IR_DIV: case IR_MOD: |
4314 | #if LJ_64 && LJ_HASFFI | 4350 | #if LJ_64 && LJ_HASFFI |
4315 | if (!irt_isnum(ir->t)) { | 4351 | if (!irt_isnum(ir->t)) { |
@@ -4321,6 +4357,7 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
4321 | #endif | 4357 | #endif |
4322 | break; | 4358 | break; |
4323 | case IR_FPMATH: | 4359 | case IR_FPMATH: |
4360 | #if LJ_TARGET_X86ORX64 | ||
4324 | if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ | 4361 | if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ |
4325 | ir->prev = REGSP_HINT(RID_XMM0); | 4362 | ir->prev = REGSP_HINT(RID_XMM0); |
4326 | #if !LJ_64 | 4363 | #if !LJ_64 |
@@ -4337,7 +4374,14 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
4337 | continue; | 4374 | continue; |
4338 | } | 4375 | } |
4339 | break; | 4376 | break; |
4340 | /* Non-constant shift counts need to be in RID_ECX. */ | 4377 | #else |
4378 | ir->prev = REGSP_HINT(RID_FPRET); | ||
4379 | if (inloop) | ||
4380 | as->modset |= RSET_SCRATCH; | ||
4381 | continue; | ||
4382 | #endif | ||
4383 | #if LJ_TARGET_X86ORX64 | ||
4384 | /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ | ||
4341 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: | 4385 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: |
4342 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { | 4386 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { |
4343 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); | 4387 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); |
@@ -4345,6 +4389,7 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
4345 | rset_set(as->modset, RID_ECX); | 4389 | rset_set(as->modset, RID_ECX); |
4346 | } | 4390 | } |
4347 | break; | 4391 | break; |
4392 | #endif | ||
4348 | /* Do not propagate hints across type conversions. */ | 4393 | /* Do not propagate hints across type conversions. */ |
4349 | case IR_CONV: case IR_TOBIT: | 4394 | case IR_CONV: case IR_TOBIT: |
4350 | break; | 4395 | break; |
@@ -4366,14 +4411,6 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
4366 | 4411 | ||
4367 | /* -- Assembler core ------------------------------------------------------ */ | 4412 | /* -- Assembler core ------------------------------------------------------ */ |
4368 | 4413 | ||
4369 | /* Define this if you want to run LuaJIT with Valgrind. */ | ||
4370 | #ifdef LUAJIT_USE_VALGRIND | ||
4371 | #include <valgrind/valgrind.h> | ||
4372 | #define VG_INVALIDATE(p, sz) VALGRIND_DISCARD_TRANSLATIONS(p, sz) | ||
4373 | #else | ||
4374 | #define VG_INVALIDATE(p, sz) ((void)0) | ||
4375 | #endif | ||
4376 | |||
4377 | /* Assemble a trace. */ | 4414 | /* Assemble a trace. */ |
4378 | void lj_asm_trace(jit_State *J, GCtrace *T) | 4415 | void lj_asm_trace(jit_State *J, GCtrace *T) |
4379 | { | 4416 | { |
@@ -4397,45 +4434,41 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
4397 | as->mctop = lj_mcode_reserve(J, &as->mcbot); /* Reserve MCode memory. */ | 4434 | as->mctop = lj_mcode_reserve(J, &as->mcbot); /* Reserve MCode memory. */ |
4398 | as->mcp = as->mctop; | 4435 | as->mcp = as->mctop; |
4399 | as->mclim = as->mcbot + MCLIM_REDZONE; | 4436 | as->mclim = as->mcbot + MCLIM_REDZONE; |
4400 | asm_exitstub_setup(as, T->nsnap); | 4437 | asm_setup_target(as); |
4401 | 4438 | ||
4402 | do { | 4439 | do { |
4403 | as->mcp = as->mctop; | 4440 | as->mcp = as->mctop; |
4404 | as->curins = T->nins; | 4441 | as->curins = T->nins; |
4405 | RA_DBG_START(); | 4442 | RA_DBG_START(); |
4406 | RA_DBGX((as, "===== STOP =====")); | 4443 | RA_DBGX((as, "===== STOP =====")); |
4407 | /* Realign and leave room for backwards loop branch or exit branch. */ | 4444 | |
4408 | if (as->realign) { | 4445 | /* General trace setup. Emit tail of trace. */ |
4409 | int i = ((int)(intptr_t)as->realign) & 15; | 4446 | asm_tail_prep(as); |
4410 | MCode *p = as->mctop; | ||
4411 | /* Fill unused mcode tail with NOPs to make the prefetcher happy. */ | ||
4412 | while (i-- > 0) | ||
4413 | *--p = XI_NOP; | ||
4414 | as->mctop = p; | ||
4415 | as->mcp = p - (as->loopinv ? 5 : 2); /* Space for short/near jmp. */ | ||
4416 | } else { | ||
4417 | as->mcp = as->mctop - 5; /* Space for exit branch (near jmp). */ | ||
4418 | } | ||
4419 | as->invmcp = as->mcp; | ||
4420 | as->mcloop = NULL; | 4447 | as->mcloop = NULL; |
4421 | as->testmcp = NULL; | 4448 | as->flagmcp = NULL; |
4422 | as->topslot = 0; | 4449 | as->topslot = 0; |
4423 | as->gcsteps = 0; | 4450 | as->gcsteps = 0; |
4424 | as->sectref = as->loopref; | 4451 | as->sectref = as->loopref; |
4425 | as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; | 4452 | as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; |
4426 | 4453 | asm_setup_regsp(as); | |
4427 | /* Setup register allocation. */ | 4454 | if (!as->loopref) |
4428 | asm_setup_regsp(as, T); | ||
4429 | |||
4430 | if (!as->loopref) { | ||
4431 | /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ | ||
4432 | as->mcp -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0); | ||
4433 | as->invmcp = NULL; | ||
4434 | asm_tail_link(as); | 4455 | asm_tail_link(as); |
4456 | |||
4457 | /* Assemble a trace in linear backwards order. */ | ||
4458 | for (as->curins--; as->curins > as->stopins; as->curins--) { | ||
4459 | IRIns *ir = IR(as->curins); | ||
4460 | lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ | ||
4461 | if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) | ||
4462 | continue; /* Dead-code elimination can be soooo easy. */ | ||
4463 | if (irt_isguard(ir->t)) | ||
4464 | asm_snap_prep(as); | ||
4465 | RA_DBG_REF(); | ||
4466 | checkmclim(as); | ||
4467 | asm_ir(as, ir); | ||
4435 | } | 4468 | } |
4436 | asm_trace(as); | ||
4437 | } while (as->realign); /* Retry in case the MCode needs to be realigned. */ | 4469 | } while (as->realign); /* Retry in case the MCode needs to be realigned. */ |
4438 | 4470 | ||
4471 | /* Emit head of trace. */ | ||
4439 | RA_DBG_REF(); | 4472 | RA_DBG_REF(); |
4440 | checkmclim(as); | 4473 | checkmclim(as); |
4441 | if (as->gcsteps) { | 4474 | if (as->gcsteps) { |
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 564ffc63..48b53b6d 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
@@ -21,6 +21,8 @@ | |||
21 | #define FPRDEF(_) \ | 21 | #define FPRDEF(_) \ |
22 | _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) | 22 | _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) |
23 | #endif | 23 | #endif |
24 | #define VRIDDEF(_) \ | ||
25 | _(MRM) | ||
24 | 26 | ||
25 | #define RIDENUM(name) RID_##name, | 27 | #define RIDENUM(name) RID_##name, |
26 | 28 | ||
@@ -63,6 +65,7 @@ enum { | |||
63 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) | 65 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) |
64 | #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) | 66 | #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) |
65 | #define RSET_ALL (RSET_GPR|RSET_FPR) | 67 | #define RSET_ALL (RSET_GPR|RSET_FPR) |
68 | #define RSET_INIT RSET_ALL | ||
66 | 69 | ||
67 | #if LJ_64 | 70 | #if LJ_64 |
68 | /* Note: this requires the use of FORCE_REX! */ | 71 | /* Note: this requires the use of FORCE_REX! */ |
@@ -80,6 +83,7 @@ enum { | |||
80 | (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1)) | 83 | (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1)) |
81 | #define REGARG_GPRS \ | 84 | #define REGARG_GPRS \ |
82 | (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5)) | 85 | (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5)) |
86 | #define REGARG_NUMGPR 4 | ||
83 | #define REGARG_FIRSTFPR RID_XMM0 | 87 | #define REGARG_FIRSTFPR RID_XMM0 |
84 | #define REGARG_LASTFPR RID_XMM3 | 88 | #define REGARG_LASTFPR RID_XMM3 |
85 | #define STACKARG_OFS (4*8) | 89 | #define STACKARG_OFS (4*8) |
@@ -90,6 +94,7 @@ enum { | |||
90 | #define REGARG_GPRS \ | 94 | #define REGARG_GPRS \ |
91 | (RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \ | 95 | (RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \ |
92 | <<5))<<5))<<5))<<5))<<5)) | 96 | <<5))<<5))<<5))<<5))<<5)) |
97 | #define REGARG_NUMGPR 6 | ||
93 | #define REGARG_FIRSTFPR RID_XMM0 | 98 | #define REGARG_FIRSTFPR RID_XMM0 |
94 | #define REGARG_LASTFPR RID_XMM7 | 99 | #define REGARG_LASTFPR RID_XMM7 |
95 | #define STACKARG_OFS 0 | 100 | #define STACKARG_OFS 0 |
@@ -98,6 +103,7 @@ enum { | |||
98 | /* Common x86 ABI. */ | 103 | /* Common x86 ABI. */ |
99 | #define RSET_SCRATCH (RSET_ACD|RSET_FPR) | 104 | #define RSET_SCRATCH (RSET_ACD|RSET_FPR) |
100 | #define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */ | 105 | #define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */ |
106 | #define REGARG_NUMGPR 2 /* Fastcall only. */ | ||
101 | #define STACKARG_OFS 0 | 107 | #define STACKARG_OFS 0 |
102 | #endif | 108 | #endif |
103 | 109 | ||