From c4e9dc00129057ddee23943f5e1ddd1eefcb8610 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 22 Dec 2009 06:16:29 +0100 Subject: Miscellaneous cleanups for x64 interpreter. --- src/buildvm_x86.dasc | 59 ++++++++++++++++++++++++++++++++++++++-------------- src/buildvm_x86.h | 22 ++++++++++---------- 2 files changed, 54 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc index 58767c1e..65e9be1d 100644 --- a/src/buildvm_x86.dasc +++ b/src/buildvm_x86.dasc @@ -19,10 +19,14 @@ |// Fixed register assignments for the interpreter. |// This is very fragile and has many dependencies. Caveat emptor. |.define BASE, edx // Not C callee-save, refetched anyway. -|.if not X64 or X64WIN +|.if not X64 |.define KBASE, edi // Must be C callee-save. |.define KBASEa, KBASE |.define PC, esi // Must be C callee-save. +|.elif X64WIN +|.define KBASE, edi // Must be C callee-save. +|.define KBASEa, rdi +|.define PC, esi // Must be C callee-save. |.else |.define KBASE, r15d // Must be C callee-save. |.define KBASEa, r15 @@ -136,7 +140,9 @@ |.define FPARG1, qword [esp] |// TMPQ overlaps TMP1/TMP2. ARG5/NRESULTS overlap TMP1/TMP2 (and TMPQ). |.define TMPQ, qword [esp+aword*4] +|.define TMP3, ARG4 |.define ARG5, TMP1 +|.define TMPa, TMP1 |.define NRESULTS, TMP2 | |// Arguments for vm_call and vm_pcall. @@ -183,7 +189,9 @@ |// TMPQ overlaps TMP1/TMP2. NRESULTS overlaps TMP2 (and TMPQ). |.define TMPQ, qword [rsp+aword*10] |.define NRESULTS, TMP2 +|.define TMPa, ARG5 |.define ARG5d, dword [rsp+aword*4] +|.define TMP3, ARG5d | |//----------------------------------------------------------------------- |.else // x64/POSIX stack layout @@ -205,7 +213,7 @@ |.define SAVE_R2, aword [rsp+aword*6] |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. |.define SAVE_CFRAME, aword [rsp+aword*4] -|.define UNUSED1, aword [rsp+aword*3] +|.define TMPa, aword [rsp+aword*3] |//----- ^^^ awords above, vvv dwords below |.define SAVE_PC, dword [rsp+dword*5] |.define SAVE_L, dword [rsp+dword*4] @@ -217,6 +225,7 @@ | |// TMPQ overlaps TMP1/TMP2. NRESULTS overlaps TMP2 (and TMPQ). |.define TMPQ, qword [rsp] +|.define TMP3, dword [rsp+aword*3] |.define NRESULTS, TMP2 | |.endif @@ -900,9 +909,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | cvtsi2sd xmm0, RC | movsd TMPQ, xmm0 } else { + |.if not X64 | mov ARG4, RC | fild ARG4 | fstp TMPQ + |.endif } | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. | jmp >1 @@ -971,9 +982,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | cvtsi2sd xmm0, RC | movsd TMPQ, xmm0 } else { + |.if not X64 | mov ARG4, RC | fild ARG4 | fstp TMPQ + |.endif } | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. | jmp >1 @@ -2182,8 +2195,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | cvtsd2si RC, qword [RA+8] | mov ARG3, RC } else { + |.if not X64 | fld qword [RA+8] | fistp ARG3 + |.endif } | mov RC, TMP2 | cmp RB, RC // len < end? (unsigned compare) @@ -2324,7 +2339,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) if (sse) { | cvtsi2sd xmm0, RC; jmp ->fff_resxmm0 } else { + |.if not X64 | mov ARG1, RC; fild ARG1; jmp ->fff_resn + |.endif } | |//-- Bit library -------------------------------------------------------- @@ -2339,12 +2356,14 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | cvtsi2sd xmm0, RB | jmp ->fff_resxmm0 } else { + |.if not X64 |.ffunc_n bit_tobit | mov TMP1, TOBIT_BIAS | fadd TMP1 | fstp FPARG1 // 64 bit FP store. | fild ARG1 // 32 bit integer load (s2lfwd ok). | jmp ->fff_resn + |.endif } | |.macro .ffunc_bit, name @@ -2354,11 +2373,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | addsd xmm0, xmm1 | movd RB, xmm0 ||} else { + |.if not X64 | .ffunc_n name | mov TMP1, TOBIT_BIAS | fadd TMP1 | fstp FPARG1 | mov RB, ARG1 + |.endif ||} |.endmacro | @@ -2409,11 +2430,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | mov BASE, TMP1 | jmp ->fff_resxmm0 } else { + |.if not X64 |->fff_resbit: |->fff_resbit_op: | mov ARG1, RB | fild ARG1 | jmp ->fff_resn + |.endif } | |->fff_fallback_bit_op: @@ -2433,6 +2456,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | movd RB, xmm0 | movd RA, xmm1 ||} else { + |.if not X64 | .ffunc_nn name | mov TMP1, TOBIT_BIAS | fadd TMP1 @@ -2442,6 +2466,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | mov RC, RA // Assumes RA is ecx. | mov RA, ARG3 | mov RB, ARG1 + |.endif ||} | ins RB, cl | mov RA, RC @@ -2467,7 +2492,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | sub BASE, RA | mov [RA-4], PC | mov SAVE_PC, PC // Redundant (but a defined value). - | mov ARG3, BASE // Save old BASE (relative). + | mov TMP1, BASE // Save old BASE (relative). | mov L:RB->base, RA | lea RC, [RA+NARGS:RC*8-8] | mov ARG1, L:RB @@ -2486,7 +2511,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | shr RC, 3 | add NARGS:RC, 1 | mov LFUNC:RB, [RA-8] - | mov BASE, ARG3 // Restore old BASE. + | mov BASE, TMP1 // Restore old BASE. | add BASE, RA | cmp [RA-4], PC; jne >2 // Callable modified by handler? | jmp aword LFUNC:RB->gate // Retry the call. @@ -2509,11 +2534,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |->fff_gcstep: // Call GC step function. | // RA = new base, RC = nargs+1 - | pop RB // Must keep stack at same level. - | mov ARG3, RB // Save return address + | pop RBa // Must keep stack at same level. + | mov TMPa, RBa // Save return address | mov L:RB, SAVE_L | sub BASE, RA - | mov ARG2, BASE // Save old BASE (relative). + | mov TMP2, BASE // Save old BASE (relative). | mov [RA-4], PC | mov SAVE_PC, PC // Redundant (but a defined value). | mov L:RB->base, RA @@ -2531,10 +2556,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | shr RC, 3 | add NARGS:RC, 1 | mov PC, [RA-4] - | mov BASE, ARG2 // Restore old BASE. + | mov BASE, TMP2 // Restore old BASE. | add BASE, RA - | mov RB, ARG3 - | push RB // Restore return address. + | mov RBa, TMPa + | push RBa // Restore return address. | mov LFUNC:RB, [RA-8] | ret | @@ -3369,9 +3394,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | cvtsi2sd xmm0, RC | mov BASE, RB // Restore BASE. } else { + |.if not X64 | mov ARG1, RC | mov BASE, RB // Restore BASE. | fild ARG1 + |.endif } | movzx RA, PC_RA | jmp <1 @@ -3994,7 +4021,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | mov TMP1, STR:RC | mov TMP2, LJ_TSTR | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. - | mov ARG4, TAB:RB // Save TAB:RB for us. + | mov TMP3, TAB:RB // Save TAB:RB for us. | mov ARG2, TAB:RB | mov L:RB, SAVE_L | mov ARG3, RC @@ -4004,7 +4031,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) | // Handles write barrier for the new key. TValue * returned in eax (RC). | mov BASE, L:RB->base - | mov TAB:RB, ARG4 // Need TAB:RB for barrier. + | mov TAB:RB, TMP3 // Need TAB:RB for barrier. | mov RA, eax | jmp <2 // Must check write barrier for value. | @@ -4211,7 +4238,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | lea RA, [BASE+RA*8] | mov PROTO:RC, LFUNC:RC->pt | movzx RC, byte PROTO:RC->numparams - | mov ARG3, KBASE // Need one more free register. + | mov TMP1, KBASE // Need one more free register. | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)] | sub KBASE, [BASE-4] | // Note: KBASE may now be even _above_ BASE if nargs was < numparams. @@ -4237,7 +4264,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | cmp RA, RB | jb <2 |3: - | mov KBASE, ARG3 + | mov KBASE, TMP1 | ins_next | |5: // Copy all varargs. @@ -4496,11 +4523,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | ins_AD // RA = base (ignored), RD = traceno | mov RA, [DISPATCH+DISPATCH_J(trace)] | mov TRACE:RD, [RA+RD*4] - | mov RD, TRACE:RD->mcode + | mov RDa, TRACE:RD->mcode | mov L:RB, SAVE_L | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB - | jmp RD + | jmp RDa #endif break; diff --git a/src/buildvm_x86.h b/src/buildvm_x86.h index feb94924..4f3e6c93 100644 --- a/src/buildvm_x86.h +++ b/src/buildvm_x86.h @@ -407,17 +407,17 @@ static const unsigned char build_actionlist[14716] = { 216,68,36,16,221,92,36,8,216,68,36,16,221,28,36,137,200,139,76,36,8,139,44, 36,255,211,205,137,193,252,233,244,129,248,116,184,237,252,233,244,54,248, 118,184,237,248,54,139,108,36,48,41,202,137,113,252,252,137,116,36,24,137, - 84,36,8,137,141,233,141,68,193,252,248,137,44,36,141,144,233,137,133,233, + 84,36,16,137,141,233,141,68,193,252,248,137,44,36,141,144,233,137,133,233, 139,73,252,248,59,149,233,15,135,244,251,252,255,145,233,133,192,15,133,244, 249,248,1,139,141,233,255,139,133,233,41,200,193,232,3,131,192,1,139,105, - 252,248,139,84,36,8,1,202,57,113,252,252,15,133,244,248,252,255,165,233,248, - 2,129,121,253,252,252,239,15,133,244,29,252,255,165,233,248,3,139,141,233, - 139,84,36,8,1,202,252,233,244,68,248,5,199,68,36,4,237,232,251,1,0,252,233, - 244,1,248,65,93,137,108,36,8,139,108,36,48,41,202,137,84,36,4,137,113,252, - 252,137,116,36,24,137,141,233,141,68,193,252,248,137,44,36,137,133,233,255, - 232,251,1,19,139,141,233,139,133,233,41,200,193,232,3,131,192,1,139,113,252, - 252,139,84,36,4,1,202,139,108,36,8,85,139,105,252,248,195,248,136,255,15, - 182,131,233,168,235,15,133,244,251,168,235,15,133,244,247,168,235,15,132, + 252,248,139,84,36,16,1,202,57,113,252,252,15,133,244,248,252,255,165,233, + 248,2,129,121,253,252,252,239,15,133,244,29,252,255,165,233,248,3,139,141, + 233,139,84,36,8,1,202,252,233,244,68,248,5,199,68,36,4,237,232,251,1,0,252, + 233,244,1,248,65,93,137,108,36,16,139,108,36,48,41,202,137,84,36,20,137,113, + 252,252,137,116,36,24,137,141,233,141,68,193,252,248,137,44,36,137,133,233, + 255,232,251,1,19,139,141,233,139,133,233,41,200,193,232,3,131,192,1,139,113, + 252,252,139,84,36,20,1,202,139,108,36,16,85,139,105,252,248,195,248,136,255, + 15,182,131,233,168,235,15,133,244,251,168,235,15,133,244,247,168,235,15,132, 244,247,252,255,139,233,252,233,244,247,255,248,137,15,182,131,233,168,235, 15,133,244,251,168,235,15,132,244,251,252,255,139,233,15,132,244,247,168, 235,15,132,244,251,248,1,139,108,36,48,139,68,36,20,137,68,36,8,137,149,233, @@ -685,11 +685,11 @@ static const unsigned char build_actionlist[14716] = { 236,137,41,137,65,4,139,105,252,240,139,65,252,244,137,105,8,137,65,12,139, 105,224,139,65,228,137,105,252,248,137,65,252,252,129,252,248,239,184,3,0, 0,0,15,133,244,29,252,255,165,233,255,15,182,252,236,139,66,252,248,141,12, - 202,139,128,233,15,182,128,233,137,124,36,8,141,188,253,194,233,43,122,252, + 202,139,128,233,15,182,128,233,137,124,36,16,141,188,253,194,233,43,122,252, 252,133,252,237,15,132,244,251,141,108,252,233,252,248,57,215,15,131,244, 248,248,1,139,71,252,248,137,1,139,71,252,252,131,199,8,137,65,4,131,193, 8,57,252,233,15,131,244,249,57,215,15,130,244,1,248,2,199,65,4,237,131,193, - 8,57,252,233,15,130,244,2,248,3,139,124,36,8,139,6,15,182,204,15,182,232, + 8,57,252,233,15,130,244,2,248,3,139,124,36,16,139,6,15,182,204,15,182,232, 131,198,4,193,232,16,252,255,36,171,248,5,199,68,36,20,1,0,0,0,137,208,41, 252,248,15,134,244,3,255,137,197,193,252,237,3,137,108,36,4,131,197,1,137, 108,36,20,139,108,36,48,1,200,59,133,233,15,135,244,253,248,6,139,71,252, -- cgit v1.2.3-55-g6feb