aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2010-02-21 17:26:21 +0100
committerMike Pall <mike>2010-02-21 17:26:21 +0100
commite76bcd2914a2a56ed5db44822d141bd7820d3f4e (patch)
tree94564717aa375ebe9b722837c2a5a3bb25627092
parentc1658ddcf1a2e6b6713419451cfe4ed96424e6e6 (diff)
downloadluajit-e76bcd2914a2a56ed5db44822d141bd7820d3f4e.tar.gz
luajit-e76bcd2914a2a56ed5db44822d141bd7820d3f4e.tar.bz2
luajit-e76bcd2914a2a56ed5db44822d141bd7820d3f4e.zip
Refactor Lua stack handling in lj_asm.c.
-rw-r--r--src/lj_asm.c219
1 files changed, 115 insertions, 104 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 8d852ac0..db71aebc 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -741,6 +741,21 @@ static void ra_evictset(ASMState *as, RegSet drop)
741 } 741 }
742} 742}
743 743
744/* Evict (rematerialize) all registers allocated to constants. */
745static void ra_evictk(ASMState *as)
746{
747 RegSet work = ~as->freeset & RSET_ALL;
748 while (work) {
749 Reg r = rset_pickbot(work);
750 IRRef ref = regcost_ref(as->cost[r]);
751 if (irref_isk(ref)) {
752 ra_rematk(as, IR(ref));
753 checkmclim(as);
754 }
755 rset_clear(work, r);
756 }
757}
758
744/* Allocate a register for ref from the allowed set of registers. 759/* Allocate a register for ref from the allowed set of registers.
745** Note: this function assumes the ref does NOT have a register yet! 760** Note: this function assumes the ref does NOT have a register yet!
746** Picks an optimal register, sets the cost and marks the register as non-free. 761** Picks an optimal register, sets the cost and marks the register as non-free.
@@ -2504,6 +2519,93 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2504#define asm_comp(as, ir, ci, cf, cu) \ 2519#define asm_comp(as, ir, ci, cf, cu) \
2505 asm_comp_(as, ir, (ci)+((cf)<<4)+(cu)) 2520 asm_comp_(as, ir, (ci)+((cf)<<4)+(cu))
2506 2521
2522/* -- Stack handling ------------------------------------------------------ */
2523
2524/* Get extent of the stack for a snapshot. */
2525static BCReg asm_stack_extent(ASMState *as, SnapShot *snap, BCReg *ptopslot)
2526{
2527 SnapEntry *map = &as->T->snapmap[snap->mapofs];
2528 MSize n, nent = snap->nent;
2529 BCReg baseslot = 0, topslot = 0;
2530 /* Must check all frames to find topslot (outer can be larger than inner). */
2531 for (n = 0; n < nent; n++) {
2532 SnapEntry sn = map[n];
2533 if ((sn & SNAP_FRAME)) {
2534 IRIns *ir = IR(snap_ref(sn));
2535 GCfunc *fn = ir_kfunc(ir);
2536 if (isluafunc(fn)) {
2537 BCReg s = snap_slot(sn);
2538 BCReg fs = s + funcproto(fn)->framesize;
2539 if (fs > topslot) topslot = fs;
2540 baseslot = s;
2541 }
2542 }
2543 }
2544 *ptopslot = topslot;
2545 return baseslot;
2546}
2547
2548/* Check Lua stack size for overflow. Use exit handler as fallback. */
2549static void asm_stack_check(ASMState *as, BCReg topslot,
2550 Reg pbase, RegSet allow, ExitNo exitno)
2551{
2552 /* Try to get an unused temp. register, otherwise spill/restore eax. */
2553 Reg r = allow ? rset_pickbot(allow) : RID_EAX;
2554 emit_jcc(as, CC_B, exitstub_addr(as->J, exitno));
2555 if (allow == RSET_EMPTY) /* Restore temp. register. */
2556 emit_rmro(as, XO_MOV, r, RID_ESP, sps_scale(SPS_TEMP1));
2557 else
2558 ra_modified(as, r);
2559 emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot));
2560 if (ra_hasreg(pbase) && pbase != r)
2561 emit_rr(as, XO_ARITH(XOg_SUB), r, pbase);
2562 else
2563 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
2564 ptr2addr(&J2G(as->J)->jit_base));
2565 emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
2566 emit_getgl(as, r, jit_L);
2567 if (allow == RSET_EMPTY) /* Spill temp. register. */
2568 emit_rmro(as, XO_MOVto, r, RID_ESP, sps_scale(SPS_TEMP1));
2569}
2570
2571/* Restore Lua stack from on-trace state. */
2572static void asm_stack_restore(ASMState *as, SnapShot *snap)
2573{
2574 SnapEntry *map = &as->T->snapmap[snap->mapofs];
2575 MSize n, nent = snap->nent;
2576 SnapEntry *flinks = map + nent + snap->depth;
2577 /* Store the value of all modified slots to the Lua stack. */
2578 for (n = 0; n < nent; n++) {
2579 SnapEntry sn = map[n];
2580 BCReg s = snap_slot(sn);
2581 int32_t ofs = 8*((int32_t)s-1);
2582 IRRef ref = snap_ref(sn);
2583 IRIns *ir = IR(ref);
2584 /* No need to restore readonly slots and unmodified non-parent slots. */
2585 if (ir->o == IR_SLOAD && ir->op1 == s &&
2586 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
2587 continue;
2588 if (irt_isnum(ir->t)) {
2589 Reg src = ra_alloc1(as, ref, RSET_FPR);
2590 emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
2591 } else {
2592 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t));
2593 if (!irref_isk(ref)) {
2594 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
2595 emit_movtomro(as, src, RID_BASE, ofs);
2596 } else if (!irt_ispri(ir->t)) {
2597 emit_movmroi(as, RID_BASE, ofs, ir->i);
2598 }
2599 if (!(sn & (SNAP_CONT|SNAP_FRAME)))
2600 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
2601 else if (s != 0) /* Do not overwrite link to previous frame. */
2602 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
2603 }
2604 checkmclim(as);
2605 }
2606 lua_assert(map + nent == flinks);
2607}
2608
2507/* -- GC handling --------------------------------------------------------- */ 2609/* -- GC handling --------------------------------------------------------- */
2508 2610
2509/* Sync all live GC values to Lua stack slots. */ 2611/* Sync all live GC values to Lua stack slots. */
@@ -2802,21 +2904,6 @@ static void asm_loop(ASMState *as)
2802 2904
2803/* -- Head of trace ------------------------------------------------------- */ 2905/* -- Head of trace ------------------------------------------------------- */
2804 2906
2805/* Rematerialize all remaining constants in registers. */
2806static void asm_const_remat(ASMState *as)
2807{
2808 RegSet work = ~as->freeset & RSET_ALL;
2809 while (work) {
2810 Reg r = rset_pickbot(work);
2811 IRRef ref = regcost_ref(as->cost[r]);
2812 if (irref_isk(ref)) {
2813 ra_rematk(as, IR(ref));
2814 checkmclim(as);
2815 }
2816 rset_clear(work, r);
2817 }
2818}
2819
2820/* Coalesce BASE register for a root trace. */ 2907/* Coalesce BASE register for a root trace. */
2821static void asm_head_root_base(ASMState *as) 2908static void asm_head_root_base(ASMState *as)
2822{ 2909{
@@ -2844,32 +2931,6 @@ static void asm_head_root(ASMState *as)
2844 as->T->topslot = gcref(as->T->startpt)->pt.framesize; 2931 as->T->topslot = gcref(as->T->startpt)->pt.framesize;
2845} 2932}
2846 2933
2847/* Check Lua stack size for overflow at the start of a side trace.
2848** Stack overflow is rare, so let the regular exit handling fix this up.
2849** This is done in the context of the *parent* trace and parent exitno!
2850*/
2851static void asm_checkstack(ASMState *as, BCReg topslot,
2852 Reg pbase, RegSet allow, ExitNo exitno)
2853{
2854 /* Try to get an unused temp. register, otherwise spill/restore eax. */
2855 Reg r = allow ? rset_pickbot(allow) : RID_EAX;
2856 emit_jcc(as, CC_B, exitstub_addr(as->J, exitno));
2857 if (allow == RSET_EMPTY) /* Restore temp. register. */
2858 emit_rmro(as, XO_MOV, r, RID_ESP, sps_scale(SPS_TEMP1));
2859 else
2860 ra_modified(as, r);
2861 emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot));
2862 if (ra_hasreg(pbase) && pbase != r)
2863 emit_rr(as, XO_ARITH(XOg_SUB), r, pbase);
2864 else
2865 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
2866 ptr2addr(&J2G(as->J)->jit_base));
2867 emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
2868 emit_getgl(as, r, jit_L);
2869 if (allow == RSET_EMPTY) /* Spill temp. register. */
2870 emit_rmro(as, XO_MOVto, r, RID_ESP, sps_scale(SPS_TEMP1));
2871}
2872
2873/* Coalesce or reload BASE register for a side trace. */ 2934/* Coalesce or reload BASE register for a side trace. */
2874static RegSet asm_head_side_base(ASMState *as, Reg pbase, RegSet allow) 2935static RegSet asm_head_side_base(ASMState *as, Reg pbase, RegSet allow)
2875{ 2936{
@@ -3039,88 +3100,38 @@ static void asm_head_side(ASMState *as)
3039 if (as->topslot > as->T->topslot) { /* Need to check for higher slot? */ 3100 if (as->topslot > as->T->topslot) { /* Need to check for higher slot? */
3040 as->T->topslot = (uint8_t)as->topslot; /* Remember for child traces. */ 3101 as->T->topslot = (uint8_t)as->topslot; /* Remember for child traces. */
3041 /* Reuse the parent exit in the context of the parent trace. */ 3102 /* Reuse the parent exit in the context of the parent trace. */
3042 asm_checkstack(as, as->topslot, pbase, allow & RSET_GPR, as->J->exitno); 3103 asm_stack_check(as, as->topslot, pbase, allow & RSET_GPR, as->J->exitno);
3043 } 3104 }
3044} 3105}
3045 3106
3046/* -- Tail of trace ------------------------------------------------------- */ 3107/* -- Tail of trace ------------------------------------------------------- */
3047 3108
3048/* Sync Lua stack slots to match the last snapshot. 3109/* Link to another trace. */
3049** Note: code generation is backwards, so this is best read bottom-up. 3110static void asm_tail_link(ASMState *as)
3050*/
3051static void asm_tail_sync(ASMState *as)
3052{ 3111{
3053 SnapNo snapno = as->T->nsnap-1; /* Last snapshot. */ 3112 SnapNo snapno = as->T->nsnap-1; /* Last snapshot. */
3054 SnapShot *snap = &as->T->snap[snapno]; 3113 SnapShot *snap = &as->T->snap[snapno];
3055 MSize n, nent = snap->nent; 3114 BCReg baseslot = asm_stack_extent(as, snap, &as->topslot);
3056 SnapEntry *map = &as->T->snapmap[snap->mapofs];
3057 SnapEntry *flinks = map + nent + snap->depth;
3058 BCReg newbase = 0, topslot = 0;
3059 3115
3060 checkmclim(as); 3116 checkmclim(as);
3061 ra_allocref(as, REF_BASE, RID2RSET(RID_BASE)); 3117 ra_allocref(as, REF_BASE, RID2RSET(RID_BASE));
3062 3118
3063 /* Must check all frames to find topslot (outer can be larger than inner). */
3064 for (n = 0; n < nent; n++) {
3065 SnapEntry sn = map[n];
3066 if ((sn & SNAP_FRAME)) {
3067 IRIns *ir = IR(snap_ref(sn));
3068 GCfunc *fn = ir_kfunc(ir);
3069 if (isluafunc(fn)) {
3070 BCReg s = snap_slot(sn);
3071 BCReg fs = s + funcproto(fn)->framesize;
3072 if (fs > topslot) topslot = fs;
3073 newbase = s;
3074 }
3075 }
3076 }
3077 as->topslot = topslot; /* Used in asm_head_side(). */
3078
3079 if (as->T->link == TRACE_INTERP) { 3119 if (as->T->link == TRACE_INTERP) {
3080 /* Setup fixed registers for exit to interpreter. */ 3120 /* Setup fixed registers for exit to interpreter. */
3081 emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch); 3121 emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch);
3082 emit_loadi(as, RID_PC, (int32_t)map[nent]); 3122 emit_loada(as, RID_PC, snap_pc(as->T->snapmap[snap->mapofs + snap->nent]));
3083 } else if (newbase) { 3123 } else if (baseslot) {
3084 /* Save modified BASE for linking to trace with higher start frame. */ 3124 /* Save modified BASE for linking to trace with higher start frame. */
3085 emit_setgl(as, RID_BASE, jit_base); 3125 emit_setgl(as, RID_BASE, jit_base);
3086 } 3126 }
3127 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
3087 3128
3088 emit_addptr(as, RID_BASE, 8*(int32_t)newbase); 3129 /* Sync the interpreter state with the on-trace state. */
3089 3130 asm_stack_restore(as, snap);
3090 /* Store the value of all modified slots to the Lua stack. */
3091 for (n = 0; n < nent; n++) {
3092 SnapEntry sn = map[n];
3093 BCReg s = snap_slot(sn);
3094 int32_t ofs = 8*((int32_t)s-1);
3095 IRRef ref = snap_ref(sn);
3096 IRIns *ir = IR(ref);
3097 /* No need to restore readonly slots and unmodified non-parent slots. */
3098 if (ir->o == IR_SLOAD && ir->op1 == s &&
3099 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
3100 continue;
3101 if (irt_isnum(ir->t)) {
3102 Reg src = ra_alloc1(as, ref, RSET_FPR);
3103 emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
3104 } else {
3105 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t));
3106 if (!irref_isk(ref)) {
3107 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
3108 emit_movtomro(as, src, RID_BASE, ofs);
3109 } else if (!irt_ispri(ir->t)) {
3110 emit_movmroi(as, RID_BASE, ofs, ir->i);
3111 }
3112 if (!(sn & (SNAP_CONT|SNAP_FRAME)))
3113 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
3114 else if (s != 0) /* Do not overwrite link to previous frame. */
3115 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
3116 }
3117 checkmclim(as);
3118 }
3119 lua_assert(map + nent == flinks);
3120 3131
3121 /* Root traces that grow the stack need to check the stack at the end. */ 3132 /* Root traces that grow the stack need to check the stack at the end. */
3122 if (!as->parent && topslot) 3133 if (!as->parent && as->topslot)
3123 asm_checkstack(as, topslot, RID_BASE, as->freeset & RSET_GPR, snapno); 3134 asm_stack_check(as, as->topslot, RID_BASE, as->freeset & RSET_GPR, snapno);
3124} 3135}
3125 3136
3126/* Fixup the tail code. */ 3137/* Fixup the tail code. */
@@ -3483,7 +3494,7 @@ void lj_asm_trace(jit_State *J, Trace *T)
3483 /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ 3494 /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
3484 as->mcp -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6; 3495 as->mcp -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6;
3485 as->invmcp = NULL; 3496 as->invmcp = NULL;
3486 asm_tail_sync(as); 3497 asm_tail_link(as);
3487 } 3498 }
3488 asm_trace(as); 3499 asm_trace(as);
3489 } while (as->realign); /* Retry in case the MCode needs to be realigned. */ 3500 } while (as->realign); /* Retry in case the MCode needs to be realigned. */
@@ -3492,7 +3503,7 @@ void lj_asm_trace(jit_State *J, Trace *T)
3492 checkmclim(as); 3503 checkmclim(as);
3493 if (as->gcsteps) 3504 if (as->gcsteps)
3494 asm_gc_check(as, &as->T->snap[0]); 3505 asm_gc_check(as, &as->T->snap[0]);
3495 asm_const_remat(as); 3506 ra_evictk(as);
3496 if (as->parent) 3507 if (as->parent)
3497 asm_head_side(as); 3508 asm_head_side(as);
3498 else 3509 else