diff options
author | Mike Pall <mike> | 2010-02-21 17:26:21 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2010-02-21 17:26:21 +0100 |
commit | e76bcd2914a2a56ed5db44822d141bd7820d3f4e (patch) | |
tree | 94564717aa375ebe9b722837c2a5a3bb25627092 /src | |
parent | c1658ddcf1a2e6b6713419451cfe4ed96424e6e6 (diff) | |
download | luajit-e76bcd2914a2a56ed5db44822d141bd7820d3f4e.tar.gz luajit-e76bcd2914a2a56ed5db44822d141bd7820d3f4e.tar.bz2 luajit-e76bcd2914a2a56ed5db44822d141bd7820d3f4e.zip |
Refactor Lua stack handling in lj_asm.c.
Diffstat (limited to 'src')
-rw-r--r-- | src/lj_asm.c | 219 |
1 files changed, 115 insertions, 104 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 8d852ac0..db71aebc 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -741,6 +741,21 @@ static void ra_evictset(ASMState *as, RegSet drop) | |||
741 | } | 741 | } |
742 | } | 742 | } |
743 | 743 | ||
744 | /* Evict (rematerialize) all registers allocated to constants. */ | ||
745 | static void ra_evictk(ASMState *as) | ||
746 | { | ||
747 | RegSet work = ~as->freeset & RSET_ALL; | ||
748 | while (work) { | ||
749 | Reg r = rset_pickbot(work); | ||
750 | IRRef ref = regcost_ref(as->cost[r]); | ||
751 | if (irref_isk(ref)) { | ||
752 | ra_rematk(as, IR(ref)); | ||
753 | checkmclim(as); | ||
754 | } | ||
755 | rset_clear(work, r); | ||
756 | } | ||
757 | } | ||
758 | |||
744 | /* Allocate a register for ref from the allowed set of registers. | 759 | /* Allocate a register for ref from the allowed set of registers. |
745 | ** Note: this function assumes the ref does NOT have a register yet! | 760 | ** Note: this function assumes the ref does NOT have a register yet! |
746 | ** Picks an optimal register, sets the cost and marks the register as non-free. | 761 | ** Picks an optimal register, sets the cost and marks the register as non-free. |
@@ -2504,6 +2519,93 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2504 | #define asm_comp(as, ir, ci, cf, cu) \ | 2519 | #define asm_comp(as, ir, ci, cf, cu) \ |
2505 | asm_comp_(as, ir, (ci)+((cf)<<4)+(cu)) | 2520 | asm_comp_(as, ir, (ci)+((cf)<<4)+(cu)) |
2506 | 2521 | ||
2522 | /* -- Stack handling ------------------------------------------------------ */ | ||
2523 | |||
2524 | /* Get extent of the stack for a snapshot. */ | ||
2525 | static BCReg asm_stack_extent(ASMState *as, SnapShot *snap, BCReg *ptopslot) | ||
2526 | { | ||
2527 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | ||
2528 | MSize n, nent = snap->nent; | ||
2529 | BCReg baseslot = 0, topslot = 0; | ||
2530 | /* Must check all frames to find topslot (outer can be larger than inner). */ | ||
2531 | for (n = 0; n < nent; n++) { | ||
2532 | SnapEntry sn = map[n]; | ||
2533 | if ((sn & SNAP_FRAME)) { | ||
2534 | IRIns *ir = IR(snap_ref(sn)); | ||
2535 | GCfunc *fn = ir_kfunc(ir); | ||
2536 | if (isluafunc(fn)) { | ||
2537 | BCReg s = snap_slot(sn); | ||
2538 | BCReg fs = s + funcproto(fn)->framesize; | ||
2539 | if (fs > topslot) topslot = fs; | ||
2540 | baseslot = s; | ||
2541 | } | ||
2542 | } | ||
2543 | } | ||
2544 | *ptopslot = topslot; | ||
2545 | return baseslot; | ||
2546 | } | ||
2547 | |||
2548 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ | ||
2549 | static void asm_stack_check(ASMState *as, BCReg topslot, | ||
2550 | Reg pbase, RegSet allow, ExitNo exitno) | ||
2551 | { | ||
2552 | /* Try to get an unused temp. register, otherwise spill/restore eax. */ | ||
2553 | Reg r = allow ? rset_pickbot(allow) : RID_EAX; | ||
2554 | emit_jcc(as, CC_B, exitstub_addr(as->J, exitno)); | ||
2555 | if (allow == RSET_EMPTY) /* Restore temp. register. */ | ||
2556 | emit_rmro(as, XO_MOV, r, RID_ESP, sps_scale(SPS_TEMP1)); | ||
2557 | else | ||
2558 | ra_modified(as, r); | ||
2559 | emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot)); | ||
2560 | if (ra_hasreg(pbase) && pbase != r) | ||
2561 | emit_rr(as, XO_ARITH(XOg_SUB), r, pbase); | ||
2562 | else | ||
2563 | emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, | ||
2564 | ptr2addr(&J2G(as->J)->jit_base)); | ||
2565 | emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); | ||
2566 | emit_getgl(as, r, jit_L); | ||
2567 | if (allow == RSET_EMPTY) /* Spill temp. register. */ | ||
2568 | emit_rmro(as, XO_MOVto, r, RID_ESP, sps_scale(SPS_TEMP1)); | ||
2569 | } | ||
2570 | |||
2571 | /* Restore Lua stack from on-trace state. */ | ||
2572 | static void asm_stack_restore(ASMState *as, SnapShot *snap) | ||
2573 | { | ||
2574 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | ||
2575 | MSize n, nent = snap->nent; | ||
2576 | SnapEntry *flinks = map + nent + snap->depth; | ||
2577 | /* Store the value of all modified slots to the Lua stack. */ | ||
2578 | for (n = 0; n < nent; n++) { | ||
2579 | SnapEntry sn = map[n]; | ||
2580 | BCReg s = snap_slot(sn); | ||
2581 | int32_t ofs = 8*((int32_t)s-1); | ||
2582 | IRRef ref = snap_ref(sn); | ||
2583 | IRIns *ir = IR(ref); | ||
2584 | /* No need to restore readonly slots and unmodified non-parent slots. */ | ||
2585 | if (ir->o == IR_SLOAD && ir->op1 == s && | ||
2586 | (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) | ||
2587 | continue; | ||
2588 | if (irt_isnum(ir->t)) { | ||
2589 | Reg src = ra_alloc1(as, ref, RSET_FPR); | ||
2590 | emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); | ||
2591 | } else { | ||
2592 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); | ||
2593 | if (!irref_isk(ref)) { | ||
2594 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | ||
2595 | emit_movtomro(as, src, RID_BASE, ofs); | ||
2596 | } else if (!irt_ispri(ir->t)) { | ||
2597 | emit_movmroi(as, RID_BASE, ofs, ir->i); | ||
2598 | } | ||
2599 | if (!(sn & (SNAP_CONT|SNAP_FRAME))) | ||
2600 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); | ||
2601 | else if (s != 0) /* Do not overwrite link to previous frame. */ | ||
2602 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); | ||
2603 | } | ||
2604 | checkmclim(as); | ||
2605 | } | ||
2606 | lua_assert(map + nent == flinks); | ||
2607 | } | ||
2608 | |||
2507 | /* -- GC handling --------------------------------------------------------- */ | 2609 | /* -- GC handling --------------------------------------------------------- */ |
2508 | 2610 | ||
2509 | /* Sync all live GC values to Lua stack slots. */ | 2611 | /* Sync all live GC values to Lua stack slots. */ |
@@ -2802,21 +2904,6 @@ static void asm_loop(ASMState *as) | |||
2802 | 2904 | ||
2803 | /* -- Head of trace ------------------------------------------------------- */ | 2905 | /* -- Head of trace ------------------------------------------------------- */ |
2804 | 2906 | ||
2805 | /* Rematerialize all remaining constants in registers. */ | ||
2806 | static void asm_const_remat(ASMState *as) | ||
2807 | { | ||
2808 | RegSet work = ~as->freeset & RSET_ALL; | ||
2809 | while (work) { | ||
2810 | Reg r = rset_pickbot(work); | ||
2811 | IRRef ref = regcost_ref(as->cost[r]); | ||
2812 | if (irref_isk(ref)) { | ||
2813 | ra_rematk(as, IR(ref)); | ||
2814 | checkmclim(as); | ||
2815 | } | ||
2816 | rset_clear(work, r); | ||
2817 | } | ||
2818 | } | ||
2819 | |||
2820 | /* Coalesce BASE register for a root trace. */ | 2907 | /* Coalesce BASE register for a root trace. */ |
2821 | static void asm_head_root_base(ASMState *as) | 2908 | static void asm_head_root_base(ASMState *as) |
2822 | { | 2909 | { |
@@ -2844,32 +2931,6 @@ static void asm_head_root(ASMState *as) | |||
2844 | as->T->topslot = gcref(as->T->startpt)->pt.framesize; | 2931 | as->T->topslot = gcref(as->T->startpt)->pt.framesize; |
2845 | } | 2932 | } |
2846 | 2933 | ||
2847 | /* Check Lua stack size for overflow at the start of a side trace. | ||
2848 | ** Stack overflow is rare, so let the regular exit handling fix this up. | ||
2849 | ** This is done in the context of the *parent* trace and parent exitno! | ||
2850 | */ | ||
2851 | static void asm_checkstack(ASMState *as, BCReg topslot, | ||
2852 | Reg pbase, RegSet allow, ExitNo exitno) | ||
2853 | { | ||
2854 | /* Try to get an unused temp. register, otherwise spill/restore eax. */ | ||
2855 | Reg r = allow ? rset_pickbot(allow) : RID_EAX; | ||
2856 | emit_jcc(as, CC_B, exitstub_addr(as->J, exitno)); | ||
2857 | if (allow == RSET_EMPTY) /* Restore temp. register. */ | ||
2858 | emit_rmro(as, XO_MOV, r, RID_ESP, sps_scale(SPS_TEMP1)); | ||
2859 | else | ||
2860 | ra_modified(as, r); | ||
2861 | emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot)); | ||
2862 | if (ra_hasreg(pbase) && pbase != r) | ||
2863 | emit_rr(as, XO_ARITH(XOg_SUB), r, pbase); | ||
2864 | else | ||
2865 | emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, | ||
2866 | ptr2addr(&J2G(as->J)->jit_base)); | ||
2867 | emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); | ||
2868 | emit_getgl(as, r, jit_L); | ||
2869 | if (allow == RSET_EMPTY) /* Spill temp. register. */ | ||
2870 | emit_rmro(as, XO_MOVto, r, RID_ESP, sps_scale(SPS_TEMP1)); | ||
2871 | } | ||
2872 | |||
2873 | /* Coalesce or reload BASE register for a side trace. */ | 2934 | /* Coalesce or reload BASE register for a side trace. */ |
2874 | static RegSet asm_head_side_base(ASMState *as, Reg pbase, RegSet allow) | 2935 | static RegSet asm_head_side_base(ASMState *as, Reg pbase, RegSet allow) |
2875 | { | 2936 | { |
@@ -3039,88 +3100,38 @@ static void asm_head_side(ASMState *as) | |||
3039 | if (as->topslot > as->T->topslot) { /* Need to check for higher slot? */ | 3100 | if (as->topslot > as->T->topslot) { /* Need to check for higher slot? */ |
3040 | as->T->topslot = (uint8_t)as->topslot; /* Remember for child traces. */ | 3101 | as->T->topslot = (uint8_t)as->topslot; /* Remember for child traces. */ |
3041 | /* Reuse the parent exit in the context of the parent trace. */ | 3102 | /* Reuse the parent exit in the context of the parent trace. */ |
3042 | asm_checkstack(as, as->topslot, pbase, allow & RSET_GPR, as->J->exitno); | 3103 | asm_stack_check(as, as->topslot, pbase, allow & RSET_GPR, as->J->exitno); |
3043 | } | 3104 | } |
3044 | } | 3105 | } |
3045 | 3106 | ||
3046 | /* -- Tail of trace ------------------------------------------------------- */ | 3107 | /* -- Tail of trace ------------------------------------------------------- */ |
3047 | 3108 | ||
3048 | /* Sync Lua stack slots to match the last snapshot. | 3109 | /* Link to another trace. */ |
3049 | ** Note: code generation is backwards, so this is best read bottom-up. | 3110 | static void asm_tail_link(ASMState *as) |
3050 | */ | ||
3051 | static void asm_tail_sync(ASMState *as) | ||
3052 | { | 3111 | { |
3053 | SnapNo snapno = as->T->nsnap-1; /* Last snapshot. */ | 3112 | SnapNo snapno = as->T->nsnap-1; /* Last snapshot. */ |
3054 | SnapShot *snap = &as->T->snap[snapno]; | 3113 | SnapShot *snap = &as->T->snap[snapno]; |
3055 | MSize n, nent = snap->nent; | 3114 | BCReg baseslot = asm_stack_extent(as, snap, &as->topslot); |
3056 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | ||
3057 | SnapEntry *flinks = map + nent + snap->depth; | ||
3058 | BCReg newbase = 0, topslot = 0; | ||
3059 | 3115 | ||
3060 | checkmclim(as); | 3116 | checkmclim(as); |
3061 | ra_allocref(as, REF_BASE, RID2RSET(RID_BASE)); | 3117 | ra_allocref(as, REF_BASE, RID2RSET(RID_BASE)); |
3062 | 3118 | ||
3063 | /* Must check all frames to find topslot (outer can be larger than inner). */ | ||
3064 | for (n = 0; n < nent; n++) { | ||
3065 | SnapEntry sn = map[n]; | ||
3066 | if ((sn & SNAP_FRAME)) { | ||
3067 | IRIns *ir = IR(snap_ref(sn)); | ||
3068 | GCfunc *fn = ir_kfunc(ir); | ||
3069 | if (isluafunc(fn)) { | ||
3070 | BCReg s = snap_slot(sn); | ||
3071 | BCReg fs = s + funcproto(fn)->framesize; | ||
3072 | if (fs > topslot) topslot = fs; | ||
3073 | newbase = s; | ||
3074 | } | ||
3075 | } | ||
3076 | } | ||
3077 | as->topslot = topslot; /* Used in asm_head_side(). */ | ||
3078 | |||
3079 | if (as->T->link == TRACE_INTERP) { | 3119 | if (as->T->link == TRACE_INTERP) { |
3080 | /* Setup fixed registers for exit to interpreter. */ | 3120 | /* Setup fixed registers for exit to interpreter. */ |
3081 | emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch); | 3121 | emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch); |
3082 | emit_loadi(as, RID_PC, (int32_t)map[nent]); | 3122 | emit_loada(as, RID_PC, snap_pc(as->T->snapmap[snap->mapofs + snap->nent])); |
3083 | } else if (newbase) { | 3123 | } else if (baseslot) { |
3084 | /* Save modified BASE for linking to trace with higher start frame. */ | 3124 | /* Save modified BASE for linking to trace with higher start frame. */ |
3085 | emit_setgl(as, RID_BASE, jit_base); | 3125 | emit_setgl(as, RID_BASE, jit_base); |
3086 | } | 3126 | } |
3127 | emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); | ||
3087 | 3128 | ||
3088 | emit_addptr(as, RID_BASE, 8*(int32_t)newbase); | 3129 | /* Sync the interpreter state with the on-trace state. */ |
3089 | 3130 | asm_stack_restore(as, snap); | |
3090 | /* Store the value of all modified slots to the Lua stack. */ | ||
3091 | for (n = 0; n < nent; n++) { | ||
3092 | SnapEntry sn = map[n]; | ||
3093 | BCReg s = snap_slot(sn); | ||
3094 | int32_t ofs = 8*((int32_t)s-1); | ||
3095 | IRRef ref = snap_ref(sn); | ||
3096 | IRIns *ir = IR(ref); | ||
3097 | /* No need to restore readonly slots and unmodified non-parent slots. */ | ||
3098 | if (ir->o == IR_SLOAD && ir->op1 == s && | ||
3099 | (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) | ||
3100 | continue; | ||
3101 | if (irt_isnum(ir->t)) { | ||
3102 | Reg src = ra_alloc1(as, ref, RSET_FPR); | ||
3103 | emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); | ||
3104 | } else { | ||
3105 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); | ||
3106 | if (!irref_isk(ref)) { | ||
3107 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | ||
3108 | emit_movtomro(as, src, RID_BASE, ofs); | ||
3109 | } else if (!irt_ispri(ir->t)) { | ||
3110 | emit_movmroi(as, RID_BASE, ofs, ir->i); | ||
3111 | } | ||
3112 | if (!(sn & (SNAP_CONT|SNAP_FRAME))) | ||
3113 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); | ||
3114 | else if (s != 0) /* Do not overwrite link to previous frame. */ | ||
3115 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); | ||
3116 | } | ||
3117 | checkmclim(as); | ||
3118 | } | ||
3119 | lua_assert(map + nent == flinks); | ||
3120 | 3131 | ||
3121 | /* Root traces that grow the stack need to check the stack at the end. */ | 3132 | /* Root traces that grow the stack need to check the stack at the end. */ |
3122 | if (!as->parent && topslot) | 3133 | if (!as->parent && as->topslot) |
3123 | asm_checkstack(as, topslot, RID_BASE, as->freeset & RSET_GPR, snapno); | 3134 | asm_stack_check(as, as->topslot, RID_BASE, as->freeset & RSET_GPR, snapno); |
3124 | } | 3135 | } |
3125 | 3136 | ||
3126 | /* Fixup the tail code. */ | 3137 | /* Fixup the tail code. */ |
@@ -3483,7 +3494,7 @@ void lj_asm_trace(jit_State *J, Trace *T) | |||
3483 | /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ | 3494 | /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ |
3484 | as->mcp -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6; | 3495 | as->mcp -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6; |
3485 | as->invmcp = NULL; | 3496 | as->invmcp = NULL; |
3486 | asm_tail_sync(as); | 3497 | asm_tail_link(as); |
3487 | } | 3498 | } |
3488 | asm_trace(as); | 3499 | asm_trace(as); |
3489 | } while (as->realign); /* Retry in case the MCode needs to be realigned. */ | 3500 | } while (as->realign); /* Retry in case the MCode needs to be realigned. */ |
@@ -3492,7 +3503,7 @@ void lj_asm_trace(jit_State *J, Trace *T) | |||
3492 | checkmclim(as); | 3503 | checkmclim(as); |
3493 | if (as->gcsteps) | 3504 | if (as->gcsteps) |
3494 | asm_gc_check(as, &as->T->snap[0]); | 3505 | asm_gc_check(as, &as->T->snap[0]); |
3495 | asm_const_remat(as); | 3506 | ra_evictk(as); |
3496 | if (as->parent) | 3507 | if (as->parent) |
3497 | asm_head_side(as); | 3508 | asm_head_side(as); |
3498 | else | 3509 | else |