diff options
Diffstat (limited to 'src/lj_asm.c')
-rw-r--r-- | src/lj_asm.c | 101 |
1 files changed, 48 insertions, 53 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 9f5ce030..b3656e00 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -926,9 +926,9 @@ static void asm_snap_alloc(ASMState *as) | |||
926 | { | 926 | { |
927 | SnapShot *snap = &as->T->snap[as->snapno]; | 927 | SnapShot *snap = &as->T->snap[as->snapno]; |
928 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 928 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
929 | BCReg s, nslots = snap->nslots; | 929 | MSize n, nent = snap->nent; |
930 | for (s = 0; s < nslots; s++) { | 930 | for (n = 0; n < nent; n++) { |
931 | IRRef ref = snap_ref(map[s]); | 931 | IRRef ref = snap_ref(map[n]); |
932 | if (!irref_isk(ref)) { | 932 | if (!irref_isk(ref)) { |
933 | IRIns *ir = IR(ref); | 933 | IRIns *ir = IR(ref); |
934 | if (!ra_used(ir) && ir->o != IR_FRAME) { | 934 | if (!ra_used(ir) && ir->o != IR_FRAME) { |
@@ -960,9 +960,9 @@ static int asm_snap_checkrename(ASMState *as, IRRef ren) | |||
960 | { | 960 | { |
961 | SnapShot *snap = &as->T->snap[as->snapno]; | 961 | SnapShot *snap = &as->T->snap[as->snapno]; |
962 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 962 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
963 | BCReg s, nslots = snap->nslots; | 963 | MSize n, nent = snap->nent; |
964 | for (s = 0; s < nslots; s++) { | 964 | for (n = 0; n < nent; n++) { |
965 | IRRef ref = snap_ref(map[s]); | 965 | IRRef ref = snap_ref(map[n]); |
966 | if (ref == ren) { | 966 | if (ref == ren) { |
967 | IRIns *ir = IR(ref); | 967 | IRIns *ir = IR(ref); |
968 | ra_spill(as, ir); /* Register renamed, so force a spill slot. */ | 968 | ra_spill(as, ir); /* Register renamed, so force a spill slot. */ |
@@ -2465,18 +2465,17 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base) | |||
2465 | */ | 2465 | */ |
2466 | RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base); | 2466 | RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base); |
2467 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 2467 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
2468 | BCReg s, nslots = snap->nslots; | 2468 | MSize n, nent = snap->nent; |
2469 | for (s = 0; s < nslots; s++) { | 2469 | for (n = 0; n < nent; n++) { |
2470 | IRRef ref = snap_ref(map[s]); | 2470 | IRRef ref = snap_ref(map[n]); |
2471 | if (!irref_isk(ref)) { | 2471 | if (!irref_isk(ref)) { |
2472 | int32_t ofs = 8*(int32_t)(snap_slot(map[n])-1); | ||
2472 | IRIns *ir = IR(ref); | 2473 | IRIns *ir = IR(ref); |
2473 | if (ir->o == IR_FRAME) { | 2474 | if (ir->o == IR_FRAME) { |
2474 | /* NYI: sync the frame, bump base, set topslot, clear new slots. */ | 2475 | /* NYI: sync the frame, bump base, set topslot, clear new slots. */ |
2475 | lj_trace_err(as->J, LJ_TRERR_NYIGCF); | 2476 | lj_trace_err(as->J, LJ_TRERR_NYIGCF); |
2476 | } else if (irt_isgcv(ir->t) && | 2477 | } else if (irt_isgcv(ir->t)) { |
2477 | !(ir->o == IR_SLOAD && ir->op1 < nslots && map[ir->op1] == 0)) { | ||
2478 | Reg src = ra_alloc1(as, ref, allow); | 2478 | Reg src = ra_alloc1(as, ref, allow); |
2479 | int32_t ofs = 8*(int32_t)(s-1); | ||
2480 | emit_movtomro(as, src, base, ofs); | 2479 | emit_movtomro(as, src, base, ofs); |
2481 | emit_movmroi(as, base, ofs+4, irt_toitype(ir->t)); | 2480 | emit_movmroi(as, base, ofs+4, irt_toitype(ir->t)); |
2482 | checkmclim(as); | 2481 | checkmclim(as); |
@@ -2504,7 +2503,7 @@ static void asm_gc_check(ASMState *as, SnapShot *snap) | |||
2504 | emit_loadi(as, tmp, (int32_t)as->gcsteps); | 2503 | emit_loadi(as, tmp, (int32_t)as->gcsteps); |
2505 | /* We don't know spadj yet, so get the C frame from L->cframe. */ | 2504 | /* We don't know spadj yet, so get the C frame from L->cframe. */ |
2506 | emit_movmroi(as, tmp, CFRAME_OFS_PC, | 2505 | emit_movmroi(as, tmp, CFRAME_OFS_PC, |
2507 | (int32_t)as->T->snapmap[snap->mapofs+snap->nslots]); | 2506 | (int32_t)as->T->snapmap[snap->mapofs+snap->nent]); |
2508 | emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); | 2507 | emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); |
2509 | lstate = IR(ASMREF_L)->r; | 2508 | lstate = IR(ASMREF_L)->r; |
2510 | emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe)); | 2509 | emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe)); |
@@ -2965,19 +2964,19 @@ static void asm_head_side(ASMState *as) | |||
2965 | static void asm_tail_sync(ASMState *as) | 2964 | static void asm_tail_sync(ASMState *as) |
2966 | { | 2965 | { |
2967 | SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */ | 2966 | SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */ |
2968 | BCReg s, nslots = snap->nslots; | 2967 | MSize n, nent = snap->nent; |
2969 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 2968 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
2970 | SnapEntry *flinks = map + nslots + snap->nframelinks; | 2969 | SnapEntry *flinks = map + nent + snap->nframelinks; |
2971 | BCReg newbase = 0; | 2970 | BCReg newbase = 0; |
2972 | BCReg secondbase = ~(BCReg)0; | 2971 | BCReg nslots, topslot = 0; |
2973 | BCReg topslot = 0; | ||
2974 | 2972 | ||
2975 | checkmclim(as); | 2973 | checkmclim(as); |
2976 | ra_allocref(as, REF_BASE, RID2RSET(RID_BASE)); | 2974 | ra_allocref(as, REF_BASE, RID2RSET(RID_BASE)); |
2977 | 2975 | ||
2978 | /* Must check all frames to find topslot (outer can be larger than inner). */ | 2976 | /* Must check all frames to find topslot (outer can be larger than inner). */ |
2979 | for (s = 0; s < nslots; s++) { | 2977 | for (n = 0; n < nent; n++) { |
2980 | IRRef ref = snap_ref(map[s]); | 2978 | IRRef ref = snap_ref(map[n]); |
2979 | BCReg s = snap_slot(map[n]); | ||
2981 | if (!irref_isk(ref)) { | 2980 | if (!irref_isk(ref)) { |
2982 | IRIns *ir = IR(ref); | 2981 | IRIns *ir = IR(ref); |
2983 | if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { | 2982 | if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { |
@@ -2985,10 +2984,7 @@ static void asm_tail_sync(ASMState *as) | |||
2985 | if (isluafunc(fn)) { | 2984 | if (isluafunc(fn)) { |
2986 | BCReg fs = s + funcproto(fn)->framesize; | 2985 | BCReg fs = s + funcproto(fn)->framesize; |
2987 | if (fs > topslot) topslot = fs; | 2986 | if (fs > topslot) topslot = fs; |
2988 | if (s != 0) { | 2987 | newbase = s; |
2989 | newbase = s; | ||
2990 | if (secondbase == ~(BCReg)0) secondbase = s; | ||
2991 | } | ||
2992 | } | 2988 | } |
2993 | } | 2989 | } |
2994 | } | 2990 | } |
@@ -2998,7 +2994,7 @@ static void asm_tail_sync(ASMState *as) | |||
2998 | if (as->T->link == TRACE_INTERP) { | 2994 | if (as->T->link == TRACE_INTERP) { |
2999 | /* Setup fixed registers for exit to interpreter. */ | 2995 | /* Setup fixed registers for exit to interpreter. */ |
3000 | emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch); | 2996 | emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch); |
3001 | emit_loadi(as, RID_PC, (int32_t)map[nslots]); | 2997 | emit_loadi(as, RID_PC, (int32_t)map[nent]); |
3002 | } else if (newbase) { | 2998 | } else if (newbase) { |
3003 | /* Save modified BASE for linking to trace with higher start frame. */ | 2999 | /* Save modified BASE for linking to trace with higher start frame. */ |
3004 | emit_setgl(as, RID_BASE, jit_base); | 3000 | emit_setgl(as, RID_BASE, jit_base); |
@@ -3007,51 +3003,50 @@ static void asm_tail_sync(ASMState *as) | |||
3007 | emit_addptr(as, RID_BASE, 8*(int32_t)newbase); | 3003 | emit_addptr(as, RID_BASE, 8*(int32_t)newbase); |
3008 | 3004 | ||
3009 | /* Clear stack slots of newly added frames. */ | 3005 | /* Clear stack slots of newly added frames. */ |
3006 | nslots = snap->nslots; | ||
3010 | if (nslots <= topslot) { | 3007 | if (nslots <= topslot) { |
3011 | if (nslots < topslot) { | 3008 | if (nslots < topslot) { |
3009 | BCReg s; | ||
3012 | for (s = nslots; s <= topslot; s++) { | 3010 | for (s = nslots; s <= topslot; s++) { |
3013 | emit_movtomro(as, RID_EAX, RID_BASE, 8*(int32_t)s-4); | 3011 | emit_movtomro(as, RID_EAX, RID_BASE, 8*((int32_t)s-1)+4); |
3014 | checkmclim(as); | 3012 | checkmclim(as); |
3015 | } | 3013 | } |
3016 | emit_loadi(as, RID_EAX, LJ_TNIL); | 3014 | emit_loadi(as, RID_EAX, LJ_TNIL); |
3017 | } else { | 3015 | } else { |
3018 | emit_movmroi(as, RID_BASE, 8*(int32_t)nslots-4, LJ_TNIL); | 3016 | emit_movmroi(as, RID_BASE, 8*((int32_t)nslots-1)+4, LJ_TNIL); |
3019 | } | 3017 | } |
3020 | } | 3018 | } |
3021 | 3019 | ||
3022 | /* Store the value of all modified slots to the Lua stack. */ | 3020 | /* Store the value of all modified slots to the Lua stack. */ |
3023 | for (s = 0; s < nslots; s++) { | 3021 | for (n = 0; n < nent; n++) { |
3022 | BCReg s = snap_slot(map[n]); | ||
3024 | int32_t ofs = 8*((int32_t)s-1); | 3023 | int32_t ofs = 8*((int32_t)s-1); |
3025 | IRRef ref = snap_ref(map[s]); | 3024 | IRRef ref = snap_ref(map[n]); |
3026 | if (ref) { | 3025 | IRIns *ir = IR(ref); |
3027 | IRIns *ir = IR(ref); | 3026 | /* No need to restore readonly slots and unmodified non-parent slots. */ |
3028 | /* No need to restore readonly slots and unmodified non-parent slots. */ | 3027 | if (ir->o == IR_SLOAD && ir->op1 == s && |
3029 | if (ir->o == IR_SLOAD && ir->op1 == s && | 3028 | (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) |
3030 | (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) | 3029 | continue; |
3031 | continue; | 3030 | if (irt_isnum(ir->t)) { |
3032 | if (irt_isnum(ir->t)) { | 3031 | Reg src = ra_alloc1(as, ref, RSET_FPR); |
3033 | Reg src = ra_alloc1(as, ref, RSET_FPR); | 3032 | emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); |
3034 | emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); | 3033 | } else if (ir->o == IR_FRAME) { |
3035 | } else if (ir->o == IR_FRAME) { | 3034 | emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2)))); |
3036 | emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2)))); | 3035 | if (s != 0) /* Do not overwrite link to previous frame. */ |
3037 | if (s != 0) /* Do not overwrite link to previous frame. */ | 3036 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks)); |
3038 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks)); | ||
3039 | } else { | ||
3040 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); | ||
3041 | if (!irref_isk(ref)) { | ||
3042 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | ||
3043 | emit_movtomro(as, src, RID_BASE, ofs); | ||
3044 | } else if (!irt_ispri(ir->t)) { | ||
3045 | emit_movmroi(as, RID_BASE, ofs, ir->i); | ||
3046 | } | ||
3047 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); | ||
3048 | } | ||
3049 | } else { | 3037 | } else { |
3050 | lua_assert(!(s > secondbase)); | 3038 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); |
3039 | if (!irref_isk(ref)) { | ||
3040 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | ||
3041 | emit_movtomro(as, src, RID_BASE, ofs); | ||
3042 | } else if (!irt_ispri(ir->t)) { | ||
3043 | emit_movmroi(as, RID_BASE, ofs, ir->i); | ||
3044 | } | ||
3045 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); | ||
3051 | } | 3046 | } |
3052 | checkmclim(as); | 3047 | checkmclim(as); |
3053 | } | 3048 | } |
3054 | lua_assert(map + nslots == flinks-1); | 3049 | lua_assert(map + nent == flinks-1); |
3055 | } | 3050 | } |
3056 | 3051 | ||
3057 | /* Fixup the tail code. */ | 3052 | /* Fixup the tail code. */ |