aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_asm.c')
-rw-r--r--src/lj_asm.c101
1 files changed, 48 insertions, 53 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 9f5ce030..b3656e00 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -926,9 +926,9 @@ static void asm_snap_alloc(ASMState *as)
926{ 926{
927 SnapShot *snap = &as->T->snap[as->snapno]; 927 SnapShot *snap = &as->T->snap[as->snapno];
928 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 928 SnapEntry *map = &as->T->snapmap[snap->mapofs];
929 BCReg s, nslots = snap->nslots; 929 MSize n, nent = snap->nent;
930 for (s = 0; s < nslots; s++) { 930 for (n = 0; n < nent; n++) {
931 IRRef ref = snap_ref(map[s]); 931 IRRef ref = snap_ref(map[n]);
932 if (!irref_isk(ref)) { 932 if (!irref_isk(ref)) {
933 IRIns *ir = IR(ref); 933 IRIns *ir = IR(ref);
934 if (!ra_used(ir) && ir->o != IR_FRAME) { 934 if (!ra_used(ir) && ir->o != IR_FRAME) {
@@ -960,9 +960,9 @@ static int asm_snap_checkrename(ASMState *as, IRRef ren)
960{ 960{
961 SnapShot *snap = &as->T->snap[as->snapno]; 961 SnapShot *snap = &as->T->snap[as->snapno];
962 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 962 SnapEntry *map = &as->T->snapmap[snap->mapofs];
963 BCReg s, nslots = snap->nslots; 963 MSize n, nent = snap->nent;
964 for (s = 0; s < nslots; s++) { 964 for (n = 0; n < nent; n++) {
965 IRRef ref = snap_ref(map[s]); 965 IRRef ref = snap_ref(map[n]);
966 if (ref == ren) { 966 if (ref == ren) {
967 IRIns *ir = IR(ref); 967 IRIns *ir = IR(ref);
968 ra_spill(as, ir); /* Register renamed, so force a spill slot. */ 968 ra_spill(as, ir); /* Register renamed, so force a spill slot. */
@@ -2465,18 +2465,17 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base)
2465 */ 2465 */
2466 RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base); 2466 RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base);
2467 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 2467 SnapEntry *map = &as->T->snapmap[snap->mapofs];
2468 BCReg s, nslots = snap->nslots; 2468 MSize n, nent = snap->nent;
2469 for (s = 0; s < nslots; s++) { 2469 for (n = 0; n < nent; n++) {
2470 IRRef ref = snap_ref(map[s]); 2470 IRRef ref = snap_ref(map[n]);
2471 if (!irref_isk(ref)) { 2471 if (!irref_isk(ref)) {
2472 int32_t ofs = 8*(int32_t)(snap_slot(map[n])-1);
2472 IRIns *ir = IR(ref); 2473 IRIns *ir = IR(ref);
2473 if (ir->o == IR_FRAME) { 2474 if (ir->o == IR_FRAME) {
2474 /* NYI: sync the frame, bump base, set topslot, clear new slots. */ 2475 /* NYI: sync the frame, bump base, set topslot, clear new slots. */
2475 lj_trace_err(as->J, LJ_TRERR_NYIGCF); 2476 lj_trace_err(as->J, LJ_TRERR_NYIGCF);
2476 } else if (irt_isgcv(ir->t) && 2477 } else if (irt_isgcv(ir->t)) {
2477 !(ir->o == IR_SLOAD && ir->op1 < nslots && map[ir->op1] == 0)) {
2478 Reg src = ra_alloc1(as, ref, allow); 2478 Reg src = ra_alloc1(as, ref, allow);
2479 int32_t ofs = 8*(int32_t)(s-1);
2480 emit_movtomro(as, src, base, ofs); 2479 emit_movtomro(as, src, base, ofs);
2481 emit_movmroi(as, base, ofs+4, irt_toitype(ir->t)); 2480 emit_movmroi(as, base, ofs+4, irt_toitype(ir->t));
2482 checkmclim(as); 2481 checkmclim(as);
@@ -2504,7 +2503,7 @@ static void asm_gc_check(ASMState *as, SnapShot *snap)
2504 emit_loadi(as, tmp, (int32_t)as->gcsteps); 2503 emit_loadi(as, tmp, (int32_t)as->gcsteps);
2505 /* We don't know spadj yet, so get the C frame from L->cframe. */ 2504 /* We don't know spadj yet, so get the C frame from L->cframe. */
2506 emit_movmroi(as, tmp, CFRAME_OFS_PC, 2505 emit_movmroi(as, tmp, CFRAME_OFS_PC,
2507 (int32_t)as->T->snapmap[snap->mapofs+snap->nslots]); 2506 (int32_t)as->T->snapmap[snap->mapofs+snap->nent]);
2508 emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); 2507 emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK);
2509 lstate = IR(ASMREF_L)->r; 2508 lstate = IR(ASMREF_L)->r;
2510 emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe)); 2509 emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe));
@@ -2965,19 +2964,19 @@ static void asm_head_side(ASMState *as)
2965static void asm_tail_sync(ASMState *as) 2964static void asm_tail_sync(ASMState *as)
2966{ 2965{
2967 SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */ 2966 SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */
2968 BCReg s, nslots = snap->nslots; 2967 MSize n, nent = snap->nent;
2969 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 2968 SnapEntry *map = &as->T->snapmap[snap->mapofs];
2970 SnapEntry *flinks = map + nslots + snap->nframelinks; 2969 SnapEntry *flinks = map + nent + snap->nframelinks;
2971 BCReg newbase = 0; 2970 BCReg newbase = 0;
2972 BCReg secondbase = ~(BCReg)0; 2971 BCReg nslots, topslot = 0;
2973 BCReg topslot = 0;
2974 2972
2975 checkmclim(as); 2973 checkmclim(as);
2976 ra_allocref(as, REF_BASE, RID2RSET(RID_BASE)); 2974 ra_allocref(as, REF_BASE, RID2RSET(RID_BASE));
2977 2975
2978 /* Must check all frames to find topslot (outer can be larger than inner). */ 2976 /* Must check all frames to find topslot (outer can be larger than inner). */
2979 for (s = 0; s < nslots; s++) { 2977 for (n = 0; n < nent; n++) {
2980 IRRef ref = snap_ref(map[s]); 2978 IRRef ref = snap_ref(map[n]);
2979 BCReg s = snap_slot(map[n]);
2981 if (!irref_isk(ref)) { 2980 if (!irref_isk(ref)) {
2982 IRIns *ir = IR(ref); 2981 IRIns *ir = IR(ref);
2983 if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { 2982 if (ir->o == IR_FRAME && irt_isfunc(ir->t)) {
@@ -2985,10 +2984,7 @@ static void asm_tail_sync(ASMState *as)
2985 if (isluafunc(fn)) { 2984 if (isluafunc(fn)) {
2986 BCReg fs = s + funcproto(fn)->framesize; 2985 BCReg fs = s + funcproto(fn)->framesize;
2987 if (fs > topslot) topslot = fs; 2986 if (fs > topslot) topslot = fs;
2988 if (s != 0) { 2987 newbase = s;
2989 newbase = s;
2990 if (secondbase == ~(BCReg)0) secondbase = s;
2991 }
2992 } 2988 }
2993 } 2989 }
2994 } 2990 }
@@ -2998,7 +2994,7 @@ static void asm_tail_sync(ASMState *as)
2998 if (as->T->link == TRACE_INTERP) { 2994 if (as->T->link == TRACE_INTERP) {
2999 /* Setup fixed registers for exit to interpreter. */ 2995 /* Setup fixed registers for exit to interpreter. */
3000 emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch); 2996 emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch);
3001 emit_loadi(as, RID_PC, (int32_t)map[nslots]); 2997 emit_loadi(as, RID_PC, (int32_t)map[nent]);
3002 } else if (newbase) { 2998 } else if (newbase) {
3003 /* Save modified BASE for linking to trace with higher start frame. */ 2999 /* Save modified BASE for linking to trace with higher start frame. */
3004 emit_setgl(as, RID_BASE, jit_base); 3000 emit_setgl(as, RID_BASE, jit_base);
@@ -3007,51 +3003,50 @@ static void asm_tail_sync(ASMState *as)
3007 emit_addptr(as, RID_BASE, 8*(int32_t)newbase); 3003 emit_addptr(as, RID_BASE, 8*(int32_t)newbase);
3008 3004
3009 /* Clear stack slots of newly added frames. */ 3005 /* Clear stack slots of newly added frames. */
3006 nslots = snap->nslots;
3010 if (nslots <= topslot) { 3007 if (nslots <= topslot) {
3011 if (nslots < topslot) { 3008 if (nslots < topslot) {
3009 BCReg s;
3012 for (s = nslots; s <= topslot; s++) { 3010 for (s = nslots; s <= topslot; s++) {
3013 emit_movtomro(as, RID_EAX, RID_BASE, 8*(int32_t)s-4); 3011 emit_movtomro(as, RID_EAX, RID_BASE, 8*((int32_t)s-1)+4);
3014 checkmclim(as); 3012 checkmclim(as);
3015 } 3013 }
3016 emit_loadi(as, RID_EAX, LJ_TNIL); 3014 emit_loadi(as, RID_EAX, LJ_TNIL);
3017 } else { 3015 } else {
3018 emit_movmroi(as, RID_BASE, 8*(int32_t)nslots-4, LJ_TNIL); 3016 emit_movmroi(as, RID_BASE, 8*((int32_t)nslots-1)+4, LJ_TNIL);
3019 } 3017 }
3020 } 3018 }
3021 3019
3022 /* Store the value of all modified slots to the Lua stack. */ 3020 /* Store the value of all modified slots to the Lua stack. */
3023 for (s = 0; s < nslots; s++) { 3021 for (n = 0; n < nent; n++) {
3022 BCReg s = snap_slot(map[n]);
3024 int32_t ofs = 8*((int32_t)s-1); 3023 int32_t ofs = 8*((int32_t)s-1);
3025 IRRef ref = snap_ref(map[s]); 3024 IRRef ref = snap_ref(map[n]);
3026 if (ref) { 3025 IRIns *ir = IR(ref);
3027 IRIns *ir = IR(ref); 3026 /* No need to restore readonly slots and unmodified non-parent slots. */
3028 /* No need to restore readonly slots and unmodified non-parent slots. */ 3027 if (ir->o == IR_SLOAD && ir->op1 == s &&
3029 if (ir->o == IR_SLOAD && ir->op1 == s && 3028 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
3030 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) 3029 continue;
3031 continue; 3030 if (irt_isnum(ir->t)) {
3032 if (irt_isnum(ir->t)) { 3031 Reg src = ra_alloc1(as, ref, RSET_FPR);
3033 Reg src = ra_alloc1(as, ref, RSET_FPR); 3032 emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
3034 emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); 3033 } else if (ir->o == IR_FRAME) {
3035 } else if (ir->o == IR_FRAME) { 3034 emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2))));
3036 emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2)))); 3035 if (s != 0) /* Do not overwrite link to previous frame. */
3037 if (s != 0) /* Do not overwrite link to previous frame. */ 3036 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks));
3038 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks));
3039 } else {
3040 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t));
3041 if (!irref_isk(ref)) {
3042 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
3043 emit_movtomro(as, src, RID_BASE, ofs);
3044 } else if (!irt_ispri(ir->t)) {
3045 emit_movmroi(as, RID_BASE, ofs, ir->i);
3046 }
3047 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
3048 }
3049 } else { 3037 } else {
3050 lua_assert(!(s > secondbase)); 3038 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t));
3039 if (!irref_isk(ref)) {
3040 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
3041 emit_movtomro(as, src, RID_BASE, ofs);
3042 } else if (!irt_ispri(ir->t)) {
3043 emit_movmroi(as, RID_BASE, ofs, ir->i);
3044 }
3045 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
3051 } 3046 }
3052 checkmclim(as); 3047 checkmclim(as);
3053 } 3048 }
3054 lua_assert(map + nslots == flinks-1); 3049 lua_assert(map + nent == flinks-1);
3055} 3050}
3056 3051
3057/* Fixup the tail code. */ 3052/* Fixup the tail code. */