diff options
| author | Mike Pall <mike> | 2011-05-12 01:27:20 +0200 |
|---|---|---|
| committer | Mike Pall <mike> | 2011-05-12 01:27:20 +0200 |
| commit | 800349387a25b73e1c98f4dcf39a65b7559894fe (patch) | |
| tree | 1d78d45553bff8354603662dc06bc8acdce2085c /src | |
| parent | b79cdba3a06cbabb772535b5bce18d6460135845 (diff) | |
| download | luajit-800349387a25b73e1c98f4dcf39a65b7559894fe.tar.gz luajit-800349387a25b73e1c98f4dcf39a65b7559894fe.tar.bz2 luajit-800349387a25b73e1c98f4dcf39a65b7559894fe.zip | |
Disentangle target-specific parts of JIT assembler backend.
Diffstat (limited to 'src')
| -rw-r--r-- | src/lj_asm.c | 341 | ||||
| -rw-r--r-- | src/lj_target_x86.h | 6 |
2 files changed, 193 insertions, 154 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 46142f5c..be6c359e 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -41,7 +41,9 @@ typedef struct ASMState { | |||
| 41 | IRIns *ir; /* Copy of pointer to IR instructions/constants. */ | 41 | IRIns *ir; /* Copy of pointer to IR instructions/constants. */ |
| 42 | jit_State *J; /* JIT compiler state. */ | 42 | jit_State *J; /* JIT compiler state. */ |
| 43 | 43 | ||
| 44 | #if LJ_TARGET_X86ORX64 | ||
| 44 | x86ModRM mrm; /* Fused x86 address operand. */ | 45 | x86ModRM mrm; /* Fused x86 address operand. */ |
| 46 | #endif | ||
| 45 | 47 | ||
| 46 | RegSet freeset; /* Set of free registers. */ | 48 | RegSet freeset; /* Set of free registers. */ |
| 47 | RegSet modset; /* Set of registers modified inside the loop. */ | 49 | RegSet modset; /* Set of registers modified inside the loop. */ |
| @@ -77,7 +79,7 @@ typedef struct ASMState { | |||
| 77 | MCode *mctop; /* Top of generated MCode. */ | 79 | MCode *mctop; /* Top of generated MCode. */ |
| 78 | MCode *mcloop; /* Pointer to loop MCode (or NULL). */ | 80 | MCode *mcloop; /* Pointer to loop MCode (or NULL). */ |
| 79 | MCode *invmcp; /* Points to invertible loop branch (or NULL). */ | 81 | MCode *invmcp; /* Points to invertible loop branch (or NULL). */ |
| 80 | MCode *testmcp; /* Pending opportunity to remove test r,r. */ | 82 | MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ |
| 81 | MCode *realign; /* Realign loop if not NULL. */ | 83 | MCode *realign; /* Realign loop if not NULL. */ |
| 82 | 84 | ||
| 83 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ | 85 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ |
| @@ -102,10 +104,6 @@ typedef struct ASMState { | |||
| 102 | ((o) == IR_ALOAD || (o) == IR_HLOAD || (o) == IR_ULOAD || \ | 104 | ((o) == IR_ALOAD || (o) == IR_HLOAD || (o) == IR_ULOAD || \ |
| 103 | (o) == IR_FLOAD || (o) == IR_XLOAD || (o) == IR_SLOAD || (o) == IR_VLOAD) | 105 | (o) == IR_FLOAD || (o) == IR_XLOAD || (o) == IR_SLOAD || (o) == IR_VLOAD) |
| 104 | 106 | ||
| 105 | /* Instruction selection for XMM moves. */ | ||
| 106 | #define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS) | ||
| 107 | #define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD) | ||
| 108 | |||
| 109 | /* Sparse limit checks using a red zone before the actual limit. */ | 107 | /* Sparse limit checks using a red zone before the actual limit. */ |
| 110 | #define MCLIM_REDZONE 64 | 108 | #define MCLIM_REDZONE 64 |
| 111 | #define checkmclim(as) \ | 109 | #define checkmclim(as) \ |
| @@ -116,7 +114,23 @@ static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as) | |||
| 116 | lj_mcode_limiterr(as->J, (size_t)(as->mctop - as->mcp + 4*MCLIM_REDZONE)); | 114 | lj_mcode_limiterr(as->J, (size_t)(as->mctop - as->mcp + 4*MCLIM_REDZONE)); |
| 117 | } | 115 | } |
| 118 | 116 | ||
| 119 | /* -- Emit x86 instructions ----------------------------------------------- */ | 117 | /* Arch-specific field offsets. */ |
| 118 | static const uint8_t field_ofs[IRFL__MAX+1] = { | ||
| 119 | #define FLOFS(name, ofs) (uint8_t)(ofs), | ||
| 120 | IRFLDEF(FLOFS) | ||
| 121 | #undef FLOFS | ||
| 122 | 0 | ||
| 123 | }; | ||
| 124 | |||
| 125 | /* Define this if you want to run LuaJIT with Valgrind. */ | ||
| 126 | #ifdef LUAJIT_USE_VALGRIND | ||
| 127 | #include <valgrind/valgrind.h> | ||
| 128 | #define VG_INVALIDATE(p, sz) VALGRIND_DISCARD_TRANSLATIONS(p, sz) | ||
| 129 | #else | ||
| 130 | #define VG_INVALIDATE(p, sz) ((void)0) | ||
| 131 | #endif | ||
| 132 | |||
| 133 | /* -- Emit basic instructions --------------------------------------------- */ | ||
| 120 | 134 | ||
| 121 | #define MODRM(mode, r1, r2) ((MCode)((mode)+(((r1)&7)<<3)+((r2)&7))) | 135 | #define MODRM(mode, r1, r2) ((MCode)((mode)+(((r1)&7)<<3)+((r2)&7))) |
| 122 | 136 | ||
| @@ -338,16 +352,6 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb) | |||
| 338 | as->mcp = emit_opm(xo, mode, rr, rb, p, 0); | 352 | as->mcp = emit_opm(xo, mode, rr, rb, p, 0); |
| 339 | } | 353 | } |
| 340 | 354 | ||
| 341 | static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | ||
| 342 | { | ||
| 343 | if (ofs) { | ||
| 344 | if ((as->flags & JIT_F_LEA_AGU)) | ||
| 345 | emit_rmro(as, XO_LEA, r, r, ofs); | ||
| 346 | else | ||
| 347 | emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs); | ||
| 348 | } | ||
| 349 | } | ||
| 350 | |||
| 351 | /* op rm/mrm, i */ | 355 | /* op rm/mrm, i */ |
| 352 | static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i) | 356 | static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i) |
| 353 | { | 357 | { |
| @@ -362,7 +366,11 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i) | |||
| 362 | emit_mrm(as, xo, (Reg)(xg & 7) | (rb & REX_64), (rb & ~REX_64)); | 366 | emit_mrm(as, xo, (Reg)(xg & 7) | (rb & REX_64), (rb & ~REX_64)); |
| 363 | } | 367 | } |
| 364 | 368 | ||
| 365 | /* -- Emit moves ---------------------------------------------------------- */ | 369 | /* -- Emit loads/stores --------------------------------------------------- */ |
| 370 | |||
| 371 | /* Instruction selection for XMM moves. */ | ||
| 372 | #define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS) | ||
| 373 | #define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD) | ||
| 366 | 374 | ||
| 367 | /* mov [base+ofs], i */ | 375 | /* mov [base+ofs], i */ |
| 368 | static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) | 376 | static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) |
| @@ -435,7 +443,7 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv) | |||
| 435 | emit_rma(as, XMM_MOVRM(as), r, &tv->n); | 443 | emit_rma(as, XMM_MOVRM(as), r, &tv->n); |
| 436 | } | 444 | } |
| 437 | 445 | ||
| 438 | /* -- Emit branches ------------------------------------------------------- */ | 446 | /* -- Emit control-flow instructions -------------------------------------- */ |
| 439 | 447 | ||
| 440 | /* Label for short jumps. */ | 448 | /* Label for short jumps. */ |
| 441 | typedef MCode *MCLabel; | 449 | typedef MCode *MCLabel; |
| @@ -520,6 +528,59 @@ static void emit_call_(ASMState *as, MCode *target) | |||
| 520 | 528 | ||
| 521 | #define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f)) | 529 | #define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f)) |
| 522 | 530 | ||
| 531 | /* -- Emit generic operations --------------------------------------------- */ | ||
| 532 | |||
| 533 | /* Use 64 bit operations to handle 64 bit IR types. */ | ||
| 534 | #if LJ_64 | ||
| 535 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) | ||
| 536 | #else | ||
| 537 | #define REX_64IR(ir, r) (r) | ||
| 538 | #endif | ||
| 539 | |||
| 540 | /* Generic move between two regs. */ | ||
| 541 | static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | ||
| 542 | { | ||
| 543 | UNUSED(ir); | ||
| 544 | if (dst < RID_MAX_GPR) | ||
| 545 | emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); | ||
| 546 | else | ||
| 547 | emit_rr(as, XMM_MOVRR(as), dst, src); | ||
| 548 | } | ||
| 549 | |||
| 550 | /* Generic load of register from stack slot. */ | ||
| 551 | static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | ||
| 552 | { | ||
| 553 | if (r < RID_MAX_GPR) | ||
| 554 | emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); | ||
| 555 | else | ||
| 556 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs); | ||
| 557 | } | ||
| 558 | |||
| 559 | /* Generic store of register to stack slot. */ | ||
| 560 | static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | ||
| 561 | { | ||
| 562 | if (r < RID_MAX_GPR) | ||
| 563 | emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, ofs); | ||
| 564 | else | ||
| 565 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, RID_ESP, ofs); | ||
| 566 | } | ||
| 567 | |||
| 568 | /* Add offset to pointer. */ | ||
| 569 | static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | ||
| 570 | { | ||
| 571 | if (ofs) { | ||
| 572 | if ((as->flags & JIT_F_LEA_AGU)) | ||
| 573 | emit_rmro(as, XO_LEA, r, r, ofs); | ||
| 574 | else | ||
| 575 | emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs); | ||
| 576 | } | ||
| 577 | } | ||
| 578 | |||
| 579 | #define emit_spsub(as, ofs) emit_addptr(as, RID_ESP|REX_64, -(ofs)) | ||
| 580 | |||
| 581 | /* Prefer rematerialization of BASE/L from global_State over spills. */ | ||
| 582 | #define emit_canremat(ref) ((ref) <= REF_BASE) | ||
| 583 | |||
| 523 | /* -- Register allocator debugging ---------------------------------------- */ | 584 | /* -- Register allocator debugging ---------------------------------------- */ |
| 524 | 585 | ||
| 525 | /* #define LUAJIT_DEBUG_RA */ | 586 | /* #define LUAJIT_DEBUG_RA */ |
| @@ -533,7 +594,7 @@ static void emit_call_(ASMState *as, MCode *target) | |||
| 533 | static const char *const ra_regname[] = { | 594 | static const char *const ra_regname[] = { |
| 534 | GPRDEF(RIDNAME) | 595 | GPRDEF(RIDNAME) |
| 535 | FPRDEF(RIDNAME) | 596 | FPRDEF(RIDNAME) |
| 536 | "mrm", | 597 | VRIDDEF(RIDNAME) |
| 537 | NULL | 598 | NULL |
| 538 | }; | 599 | }; |
| 539 | #undef RIDNAME | 600 | #undef RIDNAME |
| @@ -591,7 +652,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) | |||
| 591 | p += sprintf(p, "K%03d", REF_BIAS - ref); | 652 | p += sprintf(p, "K%03d", REF_BIAS - ref); |
| 592 | } else if (e[1] == 's') { | 653 | } else if (e[1] == 's') { |
| 593 | uint32_t slot = va_arg(argp, uint32_t); | 654 | uint32_t slot = va_arg(argp, uint32_t); |
| 594 | p += sprintf(p, "[esp+0x%x]", sps_scale(slot)); | 655 | p += sprintf(p, "[sp+0x%x]", sps_scale(slot)); |
| 595 | } else { | 656 | } else { |
| 596 | lua_assert(0); | 657 | lua_assert(0); |
| 597 | } | 658 | } |
| @@ -634,14 +695,17 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) | |||
| 634 | /* Setup register allocator. */ | 695 | /* Setup register allocator. */ |
| 635 | static void ra_setup(ASMState *as) | 696 | static void ra_setup(ASMState *as) |
| 636 | { | 697 | { |
| 698 | Reg r; | ||
| 637 | /* Initially all regs (except the stack pointer) are free for use. */ | 699 | /* Initially all regs (except the stack pointer) are free for use. */ |
| 638 | as->freeset = RSET_ALL; | 700 | as->freeset = RSET_INIT; |
| 639 | as->modset = RSET_EMPTY; | 701 | as->modset = RSET_EMPTY; |
| 640 | as->weakset = RSET_EMPTY; | 702 | as->weakset = RSET_EMPTY; |
| 641 | as->phiset = RSET_EMPTY; | 703 | as->phiset = RSET_EMPTY; |
| 642 | memset(as->phireg, 0, sizeof(as->phireg)); | 704 | memset(as->phireg, 0, sizeof(as->phireg)); |
| 643 | memset(as->cost, 0, sizeof(as->cost)); | 705 | memset(as->cost, 0, sizeof(as->cost)); |
| 644 | as->cost[RID_ESP] = REGCOST(~0u, 0u); | 706 | for (r = RID_MIN_GPR; r < RID_MAX; r++) |
| 707 | if (!rset_test(RSET_INIT, r)) | ||
| 708 | as->cost[r] = REGCOST(~0u, 0u); | ||
| 645 | } | 709 | } |
| 646 | 710 | ||
| 647 | /* Rematerialize constants. */ | 711 | /* Rematerialize constants. */ |
| @@ -655,11 +719,11 @@ static Reg ra_rematk(ASMState *as, IRIns *ir) | |||
| 655 | RA_DBGX((as, "remat $i $r", ir, r)); | 719 | RA_DBGX((as, "remat $i $r", ir, r)); |
| 656 | if (ir->o == IR_KNUM) { | 720 | if (ir->o == IR_KNUM) { |
| 657 | emit_loadn(as, r, ir_knum(ir)); | 721 | emit_loadn(as, r, ir_knum(ir)); |
| 658 | } else if (ir->o == IR_BASE) { | 722 | } else if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { |
| 659 | ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ | 723 | ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ |
| 660 | emit_getgl(as, r, jit_base); | 724 | emit_getgl(as, r, jit_base); |
| 661 | } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */ | 725 | } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { |
| 662 | lua_assert(irt_isnil(ir->t)); | 726 | lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ |
| 663 | emit_getgl(as, r, jit_L); | 727 | emit_getgl(as, r, jit_L); |
| 664 | #if LJ_64 | 728 | #if LJ_64 |
| 665 | } else if (ir->o == IR_KINT64) { | 729 | } else if (ir->o == IR_KINT64) { |
| @@ -708,28 +772,11 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref) | |||
| 708 | return r; | 772 | return r; |
| 709 | } | 773 | } |
| 710 | 774 | ||
| 711 | /* Use 64 bit operations to handle 64 bit IR types. */ | ||
| 712 | #if LJ_64 | ||
| 713 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) | ||
| 714 | #else | ||
| 715 | #define REX_64IR(ir, r) (r) | ||
| 716 | #endif | ||
| 717 | |||
| 718 | /* Generic move between two regs. */ | ||
| 719 | static void ra_movrr(ASMState *as, IRIns *ir, Reg r1, Reg r2) | ||
| 720 | { | ||
| 721 | UNUSED(ir); | ||
| 722 | if (r1 < RID_MAX_GPR) | ||
| 723 | emit_rr(as, XO_MOV, REX_64IR(ir, r1), r2); | ||
| 724 | else | ||
| 725 | emit_rr(as, XMM_MOVRR(as), r1, r2); | ||
| 726 | } | ||
| 727 | |||
| 728 | /* Restore a register (marked as free). Rematerialize or force a spill. */ | 775 | /* Restore a register (marked as free). Rematerialize or force a spill. */ |
| 729 | static Reg ra_restore(ASMState *as, IRRef ref) | 776 | static Reg ra_restore(ASMState *as, IRRef ref) |
| 730 | { | 777 | { |
| 731 | IRIns *ir = IR(ref); | 778 | IRIns *ir = IR(ref); |
| 732 | if (irref_isk(ref) || ref == REF_BASE) { | 779 | if (emit_canremat(ref)) { |
| 733 | return ra_rematk(as, ir); | 780 | return ra_rematk(as, ir); |
| 734 | } else { | 781 | } else { |
| 735 | int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ | 782 | int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ |
| @@ -740,11 +787,7 @@ static Reg ra_restore(ASMState *as, IRRef ref) | |||
| 740 | if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ | 787 | if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ |
| 741 | ra_modified(as, r); | 788 | ra_modified(as, r); |
| 742 | RA_DBGX((as, "restore $i $r", ir, r)); | 789 | RA_DBGX((as, "restore $i $r", ir, r)); |
| 743 | if (r < RID_MAX_GPR) | 790 | emit_spload(as, ir, r, ofs); |
| 744 | emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); | ||
| 745 | else | ||
| 746 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, | ||
| 747 | r, RID_ESP, ofs); | ||
| 748 | } | 791 | } |
| 749 | return r; | 792 | return r; |
| 750 | } | 793 | } |
| @@ -754,16 +797,13 @@ static Reg ra_restore(ASMState *as, IRRef ref) | |||
| 754 | static void ra_save(ASMState *as, IRIns *ir, Reg r) | 797 | static void ra_save(ASMState *as, IRIns *ir, Reg r) |
| 755 | { | 798 | { |
| 756 | RA_DBGX((as, "save $i $r", ir, r)); | 799 | RA_DBGX((as, "save $i $r", ir, r)); |
| 757 | if (r < RID_MAX_GPR) | 800 | emit_spstore(as, ir, r, sps_scale(ir->s)); |
| 758 | emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, sps_scale(ir->s)); | ||
| 759 | else | ||
| 760 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, | ||
| 761 | r, RID_ESP, sps_scale(ir->s)); | ||
| 762 | } | 801 | } |
| 763 | 802 | ||
| 764 | #define MINCOST(r) \ | 803 | #define MINCOST(name) \ |
| 765 | if (LJ_LIKELY(allow&RID2RSET(r)) && as->cost[r] < cost) \ | 804 | if (rset_test(RSET_ALL, RID_##name) && \ |
| 766 | cost = as->cost[r] | 805 | LJ_LIKELY(allow&RID2RSET(RID_##name)) && as->cost[RID_##name] < cost) \ |
| 806 | cost = as->cost[RID_##name]; | ||
| 767 | 807 | ||
| 768 | /* Evict the register with the lowest cost, forcing a restore. */ | 808 | /* Evict the register with the lowest cost, forcing a restore. */ |
| 769 | static Reg ra_evict(ASMState *as, RegSet allow) | 809 | static Reg ra_evict(ASMState *as, RegSet allow) |
| @@ -772,19 +812,9 @@ static Reg ra_evict(ASMState *as, RegSet allow) | |||
| 772 | RegCost cost = ~(RegCost)0; | 812 | RegCost cost = ~(RegCost)0; |
| 773 | lua_assert(allow != RSET_EMPTY); | 813 | lua_assert(allow != RSET_EMPTY); |
| 774 | if (allow < RID2RSET(RID_MAX_GPR)) { | 814 | if (allow < RID2RSET(RID_MAX_GPR)) { |
| 775 | MINCOST(RID_EAX);MINCOST(RID_ECX);MINCOST(RID_EDX);MINCOST(RID_EBX); | 815 | GPRDEF(MINCOST) |
| 776 | MINCOST(RID_EBP);MINCOST(RID_ESI);MINCOST(RID_EDI); | ||
| 777 | #if LJ_64 | ||
| 778 | MINCOST(RID_R8D);MINCOST(RID_R9D);MINCOST(RID_R10D);MINCOST(RID_R11D); | ||
| 779 | MINCOST(RID_R12D);MINCOST(RID_R13D);MINCOST(RID_R14D);MINCOST(RID_R15D); | ||
| 780 | #endif | ||
| 781 | } else { | 816 | } else { |
| 782 | MINCOST(RID_XMM0);MINCOST(RID_XMM1);MINCOST(RID_XMM2);MINCOST(RID_XMM3); | 817 | FPRDEF(MINCOST) |
| 783 | MINCOST(RID_XMM4);MINCOST(RID_XMM5);MINCOST(RID_XMM6);MINCOST(RID_XMM7); | ||
| 784 | #if LJ_64 | ||
| 785 | MINCOST(RID_XMM8);MINCOST(RID_XMM9);MINCOST(RID_XMM10);MINCOST(RID_XMM11); | ||
| 786 | MINCOST(RID_XMM12);MINCOST(RID_XMM13);MINCOST(RID_XMM14);MINCOST(RID_XMM15); | ||
| 787 | #endif | ||
| 788 | } | 818 | } |
| 789 | ref = regcost_ref(cost); | 819 | ref = regcost_ref(cost); |
| 790 | lua_assert(ref >= as->T->nk && ref < as->T->nins); | 820 | lua_assert(ref >= as->T->nk && ref < as->T->nins); |
| @@ -836,7 +866,7 @@ static void ra_evictk(ASMState *as) | |||
| 836 | while (work) { | 866 | while (work) { |
| 837 | Reg r = rset_pickbot(work); | 867 | Reg r = rset_pickbot(work); |
| 838 | IRRef ref = regcost_ref(as->cost[r]); | 868 | IRRef ref = regcost_ref(as->cost[r]); |
| 839 | if (irref_isk(ref)) { | 869 | if (emit_canremat(ref)) { |
| 840 | ra_rematk(as, IR(ref)); | 870 | ra_rematk(as, IR(ref)); |
| 841 | checkmclim(as); | 871 | checkmclim(as); |
| 842 | } | 872 | } |
| @@ -861,7 +891,7 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow) | |||
| 861 | if (rset_test(pick, r)) /* Use hint register if possible. */ | 891 | if (rset_test(pick, r)) /* Use hint register if possible. */ |
| 862 | goto found; | 892 | goto found; |
| 863 | /* Rematerialization is cheaper than missing a hint. */ | 893 | /* Rematerialization is cheaper than missing a hint. */ |
| 864 | if (rset_test(allow, r) && irref_isk(regcost_ref(as->cost[r]))) { | 894 | if (rset_test(allow, r) && emit_canremat(regcost_ref(as->cost[r]))) { |
| 865 | ra_rematk(as, IR(regcost_ref(as->cost[r]))); | 895 | ra_rematk(as, IR(regcost_ref(as->cost[r]))); |
| 866 | goto found; | 896 | goto found; |
| 867 | } | 897 | } |
| @@ -873,11 +903,9 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow) | |||
| 873 | pick &= ~as->modset; | 903 | pick &= ~as->modset; |
| 874 | r = rset_pickbot(pick); /* Reduce conflicts with inverse allocation. */ | 904 | r = rset_pickbot(pick); /* Reduce conflicts with inverse allocation. */ |
| 875 | } else { | 905 | } else { |
| 876 | #if LJ_64 | ||
| 877 | /* We've got plenty of regs, so get callee-save regs if possible. */ | 906 | /* We've got plenty of regs, so get callee-save regs if possible. */ |
| 878 | if ((pick & ~RSET_SCRATCH)) | 907 | if (RID_NUM_GPR > 8 && (pick & ~RSET_SCRATCH)) |
| 879 | pick &= ~RSET_SCRATCH; | 908 | pick &= ~RSET_SCRATCH; |
| 880 | #endif | ||
| 881 | r = rset_picktop(pick); | 909 | r = rset_picktop(pick); |
| 882 | } | 910 | } |
| 883 | } else { | 911 | } else { |
| @@ -916,7 +944,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up) | |||
| 916 | rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ | 944 | rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ |
| 917 | ra_noweak(as, up); | 945 | ra_noweak(as, up); |
| 918 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); | 946 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); |
| 919 | ra_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ | 947 | emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ |
| 920 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ | 948 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ |
| 921 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); | 949 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); |
| 922 | ren = tref_ref(lj_ir_emit(as->J)); | 950 | ren = tref_ref(lj_ir_emit(as->J)); |
| @@ -949,7 +977,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r) | |||
| 949 | Reg dest = ra_dest(as, ir, RID2RSET(r)); | 977 | Reg dest = ra_dest(as, ir, RID2RSET(r)); |
| 950 | if (dest != r) { | 978 | if (dest != r) { |
| 951 | ra_scratch(as, RID2RSET(r)); | 979 | ra_scratch(as, RID2RSET(r)); |
| 952 | ra_movrr(as, ir, dest, r); | 980 | emit_movrr(as, ir, dest, r); |
| 953 | } | 981 | } |
| 954 | } | 982 | } |
| 955 | 983 | ||
| @@ -993,7 +1021,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) | |||
| 993 | ra_modified(as, left); | 1021 | ra_modified(as, left); |
| 994 | ra_rename(as, left, dest); | 1022 | ra_rename(as, left, dest); |
| 995 | } else { | 1023 | } else { |
| 996 | ra_movrr(as, ir, dest, left); | 1024 | emit_movrr(as, ir, dest, left); |
| 997 | } | 1025 | } |
| 998 | } | 1026 | } |
| 999 | } | 1027 | } |
| @@ -1152,14 +1180,6 @@ static void asm_guardcc(ASMState *as, int cc) | |||
| 1152 | 1180 | ||
| 1153 | /* -- Memory operand fusion ----------------------------------------------- */ | 1181 | /* -- Memory operand fusion ----------------------------------------------- */ |
| 1154 | 1182 | ||
| 1155 | /* Arch-specific field offsets. */ | ||
| 1156 | static const uint8_t field_ofs[IRFL__MAX+1] = { | ||
| 1157 | #define FLOFS(name, ofs) (uint8_t)(ofs), | ||
| 1158 | IRFLDEF(FLOFS) | ||
| 1159 | #undef FLOFS | ||
| 1160 | 0 | ||
| 1161 | }; | ||
| 1162 | |||
| 1163 | /* Limit linear search to this distance. Avoids O(n^2) behavior. */ | 1183 | /* Limit linear search to this distance. Avoids O(n^2) behavior. */ |
| 1164 | #define CONFLICT_SEARCH_LIM 31 | 1184 | #define CONFLICT_SEARCH_LIM 31 |
| 1165 | 1185 | ||
| @@ -1503,7 +1523,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
| 1503 | lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ | 1523 | lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ |
| 1504 | if (ra_hasreg(ir->r)) { | 1524 | if (ra_hasreg(ir->r)) { |
| 1505 | ra_noweak(as, ir->r); | 1525 | ra_noweak(as, ir->r); |
| 1506 | ra_movrr(as, ir, r, ir->r); | 1526 | emit_movrr(as, ir, r, ir->r); |
| 1507 | } else { | 1527 | } else { |
| 1508 | ra_allocref(as, ref, RID2RSET(r)); | 1528 | ra_allocref(as, ref, RID2RSET(r)); |
| 1509 | } | 1529 | } |
| @@ -2880,8 +2900,8 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa) | |||
| 2880 | RegSet allow = RSET_GPR; | 2900 | RegSet allow = RSET_GPR; |
| 2881 | Reg dest, right; | 2901 | Reg dest, right; |
| 2882 | int32_t k = 0; | 2902 | int32_t k = 0; |
| 2883 | if (as->testmcp == as->mcp) { /* Drop test r,r instruction. */ | 2903 | if (as->flagmcp == as->mcp) { /* Drop test r,r instruction. */ |
| 2884 | as->testmcp = NULL; | 2904 | as->flagmcp = NULL; |
| 2885 | as->mcp += (LJ_64 && *as->mcp != XI_TEST) ? 3 : 2; | 2905 | as->mcp += (LJ_64 && *as->mcp != XI_TEST) ? 3 : 2; |
| 2886 | } | 2906 | } |
| 2887 | right = IR(rref)->r; | 2907 | right = IR(rref)->r; |
| @@ -2996,7 +3016,7 @@ static void asm_add(ASMState *as, IRIns *ir) | |||
| 2996 | { | 3016 | { |
| 2997 | if (irt_isnum(ir->t)) | 3017 | if (irt_isnum(ir->t)) |
| 2998 | asm_fparith(as, ir, XO_ADDSD); | 3018 | asm_fparith(as, ir, XO_ADDSD); |
| 2999 | else if ((as->flags & JIT_F_LEA_AGU) || as->testmcp == as->mcp || | 3019 | else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp || |
| 3000 | irt_is64(ir->t) || !asm_lea(as, ir)) | 3020 | irt_is64(ir->t) || !asm_lea(as, ir)) |
| 3001 | asm_intarith(as, ir, XOg_ADD); | 3021 | asm_intarith(as, ir, XOg_ADD); |
| 3002 | } | 3022 | } |
| @@ -3215,7 +3235,7 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | |||
| 3215 | /* Use test r,r instead of cmp r,0. */ | 3235 | /* Use test r,r instead of cmp r,0. */ |
| 3216 | emit_rr(as, XO_TEST, r64 + left, left); | 3236 | emit_rr(as, XO_TEST, r64 + left, left); |
| 3217 | if (irl+1 == ir) /* Referencing previous ins? */ | 3237 | if (irl+1 == ir) /* Referencing previous ins? */ |
| 3218 | as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ | 3238 | as->flagmcp = as->mcp; /* Set flag to drop test r,r if possible. */ |
| 3219 | } else { | 3239 | } else { |
| 3220 | emit_gmrmi(as, XG_ARITHi(XOg_CMP), r64 + left, imm); | 3240 | emit_gmrmi(as, XG_ARITHi(XOg_CMP), r64 + left, imm); |
| 3221 | } | 3241 | } |
| @@ -3273,7 +3293,7 @@ static void asm_comp_int64(ASMState *as, IRIns *ir) | |||
| 3273 | 3293 | ||
| 3274 | /* All register allocations must be performed _before_ this point. */ | 3294 | /* All register allocations must be performed _before_ this point. */ |
| 3275 | l_around = emit_label(as); | 3295 | l_around = emit_label(as); |
| 3276 | as->invmcp = as->testmcp = NULL; /* Cannot use these optimizations. */ | 3296 | as->invmcp = as->flagmcp = NULL; /* Cannot use these optimizations. */ |
| 3277 | 3297 | ||
| 3278 | /* Loword comparison and branch. */ | 3298 | /* Loword comparison and branch. */ |
| 3279 | asm_guardcc(as, cc >> 4); /* Always use unsigned compare for loword. */ | 3299 | asm_guardcc(as, cc >> 4); /* Always use unsigned compare for loword. */ |
| @@ -3620,7 +3640,7 @@ static void asm_phi(ASMState *as, IRIns *ir) | |||
| 3620 | r = ra_allocref(as, ir->op2, allow); | 3640 | r = ra_allocref(as, ir->op2, allow); |
| 3621 | } else { /* Duplicate right PHI, need a copy (rare). */ | 3641 | } else { /* Duplicate right PHI, need a copy (rare). */ |
| 3622 | r = ra_scratch(as, allow); | 3642 | r = ra_scratch(as, allow); |
| 3623 | ra_movrr(as, irr, r, irr->r); | 3643 | emit_movrr(as, irr, r, irr->r); |
| 3624 | } | 3644 | } |
| 3625 | ir->r = (uint8_t)r; | 3645 | ir->r = (uint8_t)r; |
| 3626 | rset_set(as->phiset, r); | 3646 | rset_set(as->phiset, r); |
| @@ -3690,7 +3710,7 @@ static void asm_loop(ASMState *as) | |||
| 3690 | if (as->gcsteps) | 3710 | if (as->gcsteps) |
| 3691 | asm_gc_check(as); | 3711 | asm_gc_check(as); |
| 3692 | /* LOOP marks the transition from the variant to the invariant part. */ | 3712 | /* LOOP marks the transition from the variant to the invariant part. */ |
| 3693 | as->testmcp = as->invmcp = NULL; | 3713 | as->flagmcp = as->invmcp = NULL; |
| 3694 | as->sectref = 0; | 3714 | as->sectref = 0; |
| 3695 | if (!neverfuse(as)) as->fuseref = 0; | 3715 | if (!neverfuse(as)) as->fuseref = 0; |
| 3696 | asm_phi_shuffle(as); | 3716 | asm_phi_shuffle(as); |
| @@ -3732,7 +3752,7 @@ static void asm_head_root(ASMState *as) | |||
| 3732 | emit_setgli(as, vmstate, (int32_t)as->T->traceno); | 3752 | emit_setgli(as, vmstate, (int32_t)as->T->traceno); |
| 3733 | spadj = asm_stack_adjust(as); | 3753 | spadj = asm_stack_adjust(as); |
| 3734 | as->T->spadjust = (uint16_t)spadj; | 3754 | as->T->spadjust = (uint16_t)spadj; |
| 3735 | emit_addptr(as, RID_ESP|REX_64, -spadj); | 3755 | emit_spsub(as, spadj); |
| 3736 | /* Root traces assume a checked stack for the starting proto. */ | 3756 | /* Root traces assume a checked stack for the starting proto. */ |
| 3737 | as->T->topslot = gcref(as->T->startpt)->pt.framesize; | 3757 | as->T->topslot = gcref(as->T->startpt)->pt.framesize; |
| 3738 | } | 3758 | } |
| @@ -3846,7 +3866,7 @@ static void asm_head_side(ASMState *as) | |||
| 3846 | 3866 | ||
| 3847 | /* Store trace number and adjust stack frame relative to the parent. */ | 3867 | /* Store trace number and adjust stack frame relative to the parent. */ |
| 3848 | emit_setgli(as, vmstate, (int32_t)as->T->traceno); | 3868 | emit_setgli(as, vmstate, (int32_t)as->T->traceno); |
| 3849 | emit_addptr(as, RID_ESP|REX_64, -spdelta); | 3869 | emit_spsub(as, spdelta); |
| 3850 | 3870 | ||
| 3851 | /* Restore target registers from parent spill slots. */ | 3871 | /* Restore target registers from parent spill slots. */ |
| 3852 | if (pass3) { | 3872 | if (pass3) { |
| @@ -3859,10 +3879,7 @@ static void asm_head_side(ASMState *as) | |||
| 3859 | if (ra_hasspill(regsp_spill(rs))) { | 3879 | if (ra_hasspill(regsp_spill(rs))) { |
| 3860 | int32_t ofs = sps_scale(regsp_spill(rs)); | 3880 | int32_t ofs = sps_scale(regsp_spill(rs)); |
| 3861 | ra_free(as, r); | 3881 | ra_free(as, r); |
| 3862 | if (r < RID_MAX_GPR) | 3882 | emit_spload(as, ir, r, ofs); |
| 3863 | emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); | ||
| 3864 | else | ||
| 3865 | emit_rmro(as, XMM_MOVRM(as), r, RID_ESP, ofs); | ||
| 3866 | checkmclim(as); | 3883 | checkmclim(as); |
| 3867 | } | 3884 | } |
| 3868 | } | 3885 | } |
| @@ -3879,7 +3896,7 @@ static void asm_head_side(ASMState *as) | |||
| 3879 | rset_clear(live, rp); | 3896 | rset_clear(live, rp); |
| 3880 | rset_clear(allow, rp); | 3897 | rset_clear(allow, rp); |
| 3881 | ra_free(as, ir->r); | 3898 | ra_free(as, ir->r); |
| 3882 | ra_movrr(as, ir, ir->r, rp); | 3899 | emit_movrr(as, ir, ir->r, rp); |
| 3883 | checkmclim(as); | 3900 | checkmclim(as); |
| 3884 | } | 3901 | } |
| 3885 | 3902 | ||
| @@ -4005,6 +4022,30 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |||
| 4005 | as->mctop = p; | 4022 | as->mctop = p; |
| 4006 | } | 4023 | } |
| 4007 | 4024 | ||
| 4025 | /* Prepare tail of code. */ | ||
| 4026 | static void asm_tail_prep(ASMState *as) | ||
| 4027 | { | ||
| 4028 | MCode *p = as->mctop; | ||
| 4029 | /* Realign and leave room for backwards loop branch or exit branch. */ | ||
| 4030 | if (as->realign) { | ||
| 4031 | int i = ((int)(intptr_t)as->realign) & 15; | ||
| 4032 | /* Fill unused mcode tail with NOPs to make the prefetcher happy. */ | ||
| 4033 | while (i-- > 0) | ||
| 4034 | *--p = XI_NOP; | ||
| 4035 | as->mctop = p; | ||
| 4036 | p -= (as->loopinv ? 5 : 2); /* Space for short/near jmp. */ | ||
| 4037 | } else { | ||
| 4038 | p -= 5; /* Space for exit branch (near jmp). */ | ||
| 4039 | } | ||
| 4040 | if (as->loopref) { | ||
| 4041 | as->invmcp = as->mcp = p; | ||
| 4042 | } else { | ||
| 4043 | /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ | ||
| 4044 | as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0)); | ||
| 4045 | as->invmcp = NULL; | ||
| 4046 | } | ||
| 4047 | } | ||
| 4048 | |||
| 4008 | /* -- Instruction dispatch ------------------------------------------------ */ | 4049 | /* -- Instruction dispatch ------------------------------------------------ */ |
| 4009 | 4050 | ||
| 4010 | /* Assemble a single instruction. */ | 4051 | /* Assemble a single instruction. */ |
| @@ -4160,22 +4201,6 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
| 4160 | } | 4201 | } |
| 4161 | } | 4202 | } |
| 4162 | 4203 | ||
| 4163 | /* Assemble a trace in linear backwards order. */ | ||
| 4164 | static void asm_trace(ASMState *as) | ||
| 4165 | { | ||
| 4166 | for (as->curins--; as->curins > as->stopins; as->curins--) { | ||
| 4167 | IRIns *ir = IR(as->curins); | ||
| 4168 | lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ | ||
| 4169 | if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) | ||
| 4170 | continue; /* Dead-code elimination can be soooo easy. */ | ||
| 4171 | if (irt_isguard(ir->t)) | ||
| 4172 | asm_snap_prep(as); | ||
| 4173 | RA_DBG_REF(); | ||
| 4174 | checkmclim(as); | ||
| 4175 | asm_ir(as, ir); | ||
| 4176 | } | ||
| 4177 | } | ||
| 4178 | |||
| 4179 | /* -- Trace setup --------------------------------------------------------- */ | 4204 | /* -- Trace setup --------------------------------------------------------- */ |
| 4180 | 4205 | ||
| 4181 | /* Ensure there are enough stack slots for call arguments. */ | 4206 | /* Ensure there are enough stack slots for call arguments. */ |
| @@ -4215,9 +4240,16 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
| 4215 | #endif | 4240 | #endif |
| 4216 | } | 4241 | } |
| 4217 | 4242 | ||
| 4243 | /* Target-specific setup. */ | ||
| 4244 | static void asm_setup_target(ASMState *as) | ||
| 4245 | { | ||
| 4246 | asm_exitstub_setup(as, as->T->nsnap); | ||
| 4247 | } | ||
| 4248 | |||
| 4218 | /* Clear reg/sp for all instructions and add register hints. */ | 4249 | /* Clear reg/sp for all instructions and add register hints. */ |
| 4219 | static void asm_setup_regsp(ASMState *as, GCtrace *T) | 4250 | static void asm_setup_regsp(ASMState *as) |
| 4220 | { | 4251 | { |
| 4252 | GCtrace *T = as->T; | ||
| 4221 | IRRef i, nins; | 4253 | IRRef i, nins; |
| 4222 | int inloop; | 4254 | int inloop; |
| 4223 | 4255 | ||
| @@ -4289,10 +4321,8 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
| 4289 | #endif | 4321 | #endif |
| 4290 | /* C calls evict all scratch regs and return results in RID_RET. */ | 4322 | /* C calls evict all scratch regs and return results in RID_RET. */ |
| 4291 | case IR_SNEW: case IR_XSNEW: case IR_NEWREF: | 4323 | case IR_SNEW: case IR_XSNEW: case IR_NEWREF: |
| 4292 | #if !LJ_64 | 4324 | if (REGARG_NUMGPR < 3 && as->evenspill < 3) |
| 4293 | if (as->evenspill < 3) /* lj_str_new and lj_tab_newkey need 3 args. */ | 4325 | as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ |
| 4294 | as->evenspill = 3; | ||
| 4295 | #endif | ||
| 4296 | case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: | 4326 | case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: |
| 4297 | ir->prev = REGSP_HINT(RID_RET); | 4327 | ir->prev = REGSP_HINT(RID_RET); |
| 4298 | if (inloop) | 4328 | if (inloop) |
| @@ -4304,12 +4334,18 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
| 4304 | break; | 4334 | break; |
| 4305 | case IR_POW: | 4335 | case IR_POW: |
| 4306 | if (irt_isnum(ir->t)) { | 4336 | if (irt_isnum(ir->t)) { |
| 4337 | #if LJ_TARGET_X86ORX64 | ||
| 4307 | ir->prev = REGSP_HINT(RID_XMM0); | 4338 | ir->prev = REGSP_HINT(RID_XMM0); |
| 4308 | if (inloop) | 4339 | if (inloop) |
| 4309 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); | 4340 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); |
| 4341 | #else | ||
| 4342 | ir->prev = REGSP_HINT(RID_FPRET); | ||
| 4343 | if (inloop) | ||
| 4344 | as->modset |= RSET_SCRATCH; | ||
| 4345 | #endif | ||
| 4310 | continue; | 4346 | continue; |
| 4311 | } | 4347 | } |
| 4312 | /* fallthrough */ | 4348 | /* fallthrough for integer POW */ |
| 4313 | case IR_DIV: case IR_MOD: | 4349 | case IR_DIV: case IR_MOD: |
| 4314 | #if LJ_64 && LJ_HASFFI | 4350 | #if LJ_64 && LJ_HASFFI |
| 4315 | if (!irt_isnum(ir->t)) { | 4351 | if (!irt_isnum(ir->t)) { |
| @@ -4321,6 +4357,7 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
| 4321 | #endif | 4357 | #endif |
| 4322 | break; | 4358 | break; |
| 4323 | case IR_FPMATH: | 4359 | case IR_FPMATH: |
| 4360 | #if LJ_TARGET_X86ORX64 | ||
| 4324 | if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ | 4361 | if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ |
| 4325 | ir->prev = REGSP_HINT(RID_XMM0); | 4362 | ir->prev = REGSP_HINT(RID_XMM0); |
| 4326 | #if !LJ_64 | 4363 | #if !LJ_64 |
| @@ -4337,7 +4374,14 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
| 4337 | continue; | 4374 | continue; |
| 4338 | } | 4375 | } |
| 4339 | break; | 4376 | break; |
| 4340 | /* Non-constant shift counts need to be in RID_ECX. */ | 4377 | #else |
| 4378 | ir->prev = REGSP_HINT(RID_FPRET); | ||
| 4379 | if (inloop) | ||
| 4380 | as->modset |= RSET_SCRATCH; | ||
| 4381 | continue; | ||
| 4382 | #endif | ||
| 4383 | #if LJ_TARGET_X86ORX64 | ||
| 4384 | /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ | ||
| 4341 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: | 4385 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: |
| 4342 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { | 4386 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { |
| 4343 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); | 4387 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); |
| @@ -4345,6 +4389,7 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
| 4345 | rset_set(as->modset, RID_ECX); | 4389 | rset_set(as->modset, RID_ECX); |
| 4346 | } | 4390 | } |
| 4347 | break; | 4391 | break; |
| 4392 | #endif | ||
| 4348 | /* Do not propagate hints across type conversions. */ | 4393 | /* Do not propagate hints across type conversions. */ |
| 4349 | case IR_CONV: case IR_TOBIT: | 4394 | case IR_CONV: case IR_TOBIT: |
| 4350 | break; | 4395 | break; |
| @@ -4366,14 +4411,6 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
| 4366 | 4411 | ||
| 4367 | /* -- Assembler core ------------------------------------------------------ */ | 4412 | /* -- Assembler core ------------------------------------------------------ */ |
| 4368 | 4413 | ||
| 4369 | /* Define this if you want to run LuaJIT with Valgrind. */ | ||
| 4370 | #ifdef LUAJIT_USE_VALGRIND | ||
| 4371 | #include <valgrind/valgrind.h> | ||
| 4372 | #define VG_INVALIDATE(p, sz) VALGRIND_DISCARD_TRANSLATIONS(p, sz) | ||
| 4373 | #else | ||
| 4374 | #define VG_INVALIDATE(p, sz) ((void)0) | ||
| 4375 | #endif | ||
| 4376 | |||
| 4377 | /* Assemble a trace. */ | 4414 | /* Assemble a trace. */ |
| 4378 | void lj_asm_trace(jit_State *J, GCtrace *T) | 4415 | void lj_asm_trace(jit_State *J, GCtrace *T) |
| 4379 | { | 4416 | { |
| @@ -4397,45 +4434,41 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
| 4397 | as->mctop = lj_mcode_reserve(J, &as->mcbot); /* Reserve MCode memory. */ | 4434 | as->mctop = lj_mcode_reserve(J, &as->mcbot); /* Reserve MCode memory. */ |
| 4398 | as->mcp = as->mctop; | 4435 | as->mcp = as->mctop; |
| 4399 | as->mclim = as->mcbot + MCLIM_REDZONE; | 4436 | as->mclim = as->mcbot + MCLIM_REDZONE; |
| 4400 | asm_exitstub_setup(as, T->nsnap); | 4437 | asm_setup_target(as); |
| 4401 | 4438 | ||
| 4402 | do { | 4439 | do { |
| 4403 | as->mcp = as->mctop; | 4440 | as->mcp = as->mctop; |
| 4404 | as->curins = T->nins; | 4441 | as->curins = T->nins; |
| 4405 | RA_DBG_START(); | 4442 | RA_DBG_START(); |
| 4406 | RA_DBGX((as, "===== STOP =====")); | 4443 | RA_DBGX((as, "===== STOP =====")); |
| 4407 | /* Realign and leave room for backwards loop branch or exit branch. */ | 4444 | |
| 4408 | if (as->realign) { | 4445 | /* General trace setup. Emit tail of trace. */ |
| 4409 | int i = ((int)(intptr_t)as->realign) & 15; | 4446 | asm_tail_prep(as); |
| 4410 | MCode *p = as->mctop; | ||
| 4411 | /* Fill unused mcode tail with NOPs to make the prefetcher happy. */ | ||
| 4412 | while (i-- > 0) | ||
| 4413 | *--p = XI_NOP; | ||
| 4414 | as->mctop = p; | ||
| 4415 | as->mcp = p - (as->loopinv ? 5 : 2); /* Space for short/near jmp. */ | ||
| 4416 | } else { | ||
| 4417 | as->mcp = as->mctop - 5; /* Space for exit branch (near jmp). */ | ||
| 4418 | } | ||
| 4419 | as->invmcp = as->mcp; | ||
| 4420 | as->mcloop = NULL; | 4447 | as->mcloop = NULL; |
| 4421 | as->testmcp = NULL; | 4448 | as->flagmcp = NULL; |
| 4422 | as->topslot = 0; | 4449 | as->topslot = 0; |
| 4423 | as->gcsteps = 0; | 4450 | as->gcsteps = 0; |
| 4424 | as->sectref = as->loopref; | 4451 | as->sectref = as->loopref; |
| 4425 | as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; | 4452 | as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; |
| 4426 | 4453 | asm_setup_regsp(as); | |
| 4427 | /* Setup register allocation. */ | 4454 | if (!as->loopref) |
| 4428 | asm_setup_regsp(as, T); | ||
| 4429 | |||
| 4430 | if (!as->loopref) { | ||
| 4431 | /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ | ||
| 4432 | as->mcp -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0); | ||
| 4433 | as->invmcp = NULL; | ||
| 4434 | asm_tail_link(as); | 4455 | asm_tail_link(as); |
| 4456 | |||
| 4457 | /* Assemble a trace in linear backwards order. */ | ||
| 4458 | for (as->curins--; as->curins > as->stopins; as->curins--) { | ||
| 4459 | IRIns *ir = IR(as->curins); | ||
| 4460 | lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ | ||
| 4461 | if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) | ||
| 4462 | continue; /* Dead-code elimination can be soooo easy. */ | ||
| 4463 | if (irt_isguard(ir->t)) | ||
| 4464 | asm_snap_prep(as); | ||
| 4465 | RA_DBG_REF(); | ||
| 4466 | checkmclim(as); | ||
| 4467 | asm_ir(as, ir); | ||
| 4435 | } | 4468 | } |
| 4436 | asm_trace(as); | ||
| 4437 | } while (as->realign); /* Retry in case the MCode needs to be realigned. */ | 4469 | } while (as->realign); /* Retry in case the MCode needs to be realigned. */ |
| 4438 | 4470 | ||
| 4471 | /* Emit head of trace. */ | ||
| 4439 | RA_DBG_REF(); | 4472 | RA_DBG_REF(); |
| 4440 | checkmclim(as); | 4473 | checkmclim(as); |
| 4441 | if (as->gcsteps) { | 4474 | if (as->gcsteps) { |
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 564ffc63..48b53b6d 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
| @@ -21,6 +21,8 @@ | |||
| 21 | #define FPRDEF(_) \ | 21 | #define FPRDEF(_) \ |
| 22 | _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) | 22 | _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) |
| 23 | #endif | 23 | #endif |
| 24 | #define VRIDDEF(_) \ | ||
| 25 | _(MRM) | ||
| 24 | 26 | ||
| 25 | #define RIDENUM(name) RID_##name, | 27 | #define RIDENUM(name) RID_##name, |
| 26 | 28 | ||
| @@ -63,6 +65,7 @@ enum { | |||
| 63 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) | 65 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) |
| 64 | #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) | 66 | #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) |
| 65 | #define RSET_ALL (RSET_GPR|RSET_FPR) | 67 | #define RSET_ALL (RSET_GPR|RSET_FPR) |
| 68 | #define RSET_INIT RSET_ALL | ||
| 66 | 69 | ||
| 67 | #if LJ_64 | 70 | #if LJ_64 |
| 68 | /* Note: this requires the use of FORCE_REX! */ | 71 | /* Note: this requires the use of FORCE_REX! */ |
| @@ -80,6 +83,7 @@ enum { | |||
| 80 | (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1)) | 83 | (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1)) |
| 81 | #define REGARG_GPRS \ | 84 | #define REGARG_GPRS \ |
| 82 | (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5)) | 85 | (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5)) |
| 86 | #define REGARG_NUMGPR 4 | ||
| 83 | #define REGARG_FIRSTFPR RID_XMM0 | 87 | #define REGARG_FIRSTFPR RID_XMM0 |
| 84 | #define REGARG_LASTFPR RID_XMM3 | 88 | #define REGARG_LASTFPR RID_XMM3 |
| 85 | #define STACKARG_OFS (4*8) | 89 | #define STACKARG_OFS (4*8) |
| @@ -90,6 +94,7 @@ enum { | |||
| 90 | #define REGARG_GPRS \ | 94 | #define REGARG_GPRS \ |
| 91 | (RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \ | 95 | (RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \ |
| 92 | <<5))<<5))<<5))<<5))<<5)) | 96 | <<5))<<5))<<5))<<5))<<5)) |
| 97 | #define REGARG_NUMGPR 6 | ||
| 93 | #define REGARG_FIRSTFPR RID_XMM0 | 98 | #define REGARG_FIRSTFPR RID_XMM0 |
| 94 | #define REGARG_LASTFPR RID_XMM7 | 99 | #define REGARG_LASTFPR RID_XMM7 |
| 95 | #define STACKARG_OFS 0 | 100 | #define STACKARG_OFS 0 |
| @@ -98,6 +103,7 @@ enum { | |||
| 98 | /* Common x86 ABI. */ | 103 | /* Common x86 ABI. */ |
| 99 | #define RSET_SCRATCH (RSET_ACD|RSET_FPR) | 104 | #define RSET_SCRATCH (RSET_ACD|RSET_FPR) |
| 100 | #define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */ | 105 | #define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */ |
| 106 | #define REGARG_NUMGPR 2 /* Fastcall only. */ | ||
| 101 | #define STACKARG_OFS 0 | 107 | #define STACKARG_OFS 0 |
| 102 | #endif | 108 | #endif |
| 103 | 109 | ||
