diff options
author | Mike Pall <mike> | 2009-12-22 06:16:29 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2009-12-22 06:16:29 +0100 |
commit | c4e9dc00129057ddee23943f5e1ddd1eefcb8610 (patch) | |
tree | 35afb0ab8f099a1a29b9a736a4c14d136496ad3a /src/buildvm_x86.dasc | |
parent | 44a9d7b00c957d0aa8aabf0d139b1a4cb530abc1 (diff) | |
download | luajit-c4e9dc00129057ddee23943f5e1ddd1eefcb8610.tar.gz luajit-c4e9dc00129057ddee23943f5e1ddd1eefcb8610.tar.bz2 luajit-c4e9dc00129057ddee23943f5e1ddd1eefcb8610.zip |
Miscellaneous cleanups for x64 interpreter.
Diffstat (limited to 'src/buildvm_x86.dasc')
-rw-r--r-- | src/buildvm_x86.dasc | 59 |
1 files changed, 43 insertions, 16 deletions
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc index 58767c1e..65e9be1d 100644 --- a/src/buildvm_x86.dasc +++ b/src/buildvm_x86.dasc | |||
@@ -19,10 +19,14 @@ | |||
19 | |// Fixed register assignments for the interpreter. | 19 | |// Fixed register assignments for the interpreter. |
20 | |// This is very fragile and has many dependencies. Caveat emptor. | 20 | |// This is very fragile and has many dependencies. Caveat emptor. |
21 | |.define BASE, edx // Not C callee-save, refetched anyway. | 21 | |.define BASE, edx // Not C callee-save, refetched anyway. |
22 | |.if not X64 or X64WIN | 22 | |.if not X64 |
23 | |.define KBASE, edi // Must be C callee-save. | 23 | |.define KBASE, edi // Must be C callee-save. |
24 | |.define KBASEa, KBASE | 24 | |.define KBASEa, KBASE |
25 | |.define PC, esi // Must be C callee-save. | 25 | |.define PC, esi // Must be C callee-save. |
26 | |.elif X64WIN | ||
27 | |.define KBASE, edi // Must be C callee-save. | ||
28 | |.define KBASEa, rdi | ||
29 | |.define PC, esi // Must be C callee-save. | ||
26 | |.else | 30 | |.else |
27 | |.define KBASE, r15d // Must be C callee-save. | 31 | |.define KBASE, r15d // Must be C callee-save. |
28 | |.define KBASEa, r15 | 32 | |.define KBASEa, r15 |
@@ -136,7 +140,9 @@ | |||
136 | |.define FPARG1, qword [esp] | 140 | |.define FPARG1, qword [esp] |
137 | |// TMPQ overlaps TMP1/TMP2. ARG5/NRESULTS overlap TMP1/TMP2 (and TMPQ). | 141 | |// TMPQ overlaps TMP1/TMP2. ARG5/NRESULTS overlap TMP1/TMP2 (and TMPQ). |
138 | |.define TMPQ, qword [esp+aword*4] | 142 | |.define TMPQ, qword [esp+aword*4] |
143 | |.define TMP3, ARG4 | ||
139 | |.define ARG5, TMP1 | 144 | |.define ARG5, TMP1 |
145 | |.define TMPa, TMP1 | ||
140 | |.define NRESULTS, TMP2 | 146 | |.define NRESULTS, TMP2 |
141 | | | 147 | | |
142 | |// Arguments for vm_call and vm_pcall. | 148 | |// Arguments for vm_call and vm_pcall. |
@@ -183,7 +189,9 @@ | |||
183 | |// TMPQ overlaps TMP1/TMP2. NRESULTS overlaps TMP2 (and TMPQ). | 189 | |// TMPQ overlaps TMP1/TMP2. NRESULTS overlaps TMP2 (and TMPQ). |
184 | |.define TMPQ, qword [rsp+aword*10] | 190 | |.define TMPQ, qword [rsp+aword*10] |
185 | |.define NRESULTS, TMP2 | 191 | |.define NRESULTS, TMP2 |
192 | |.define TMPa, ARG5 | ||
186 | |.define ARG5d, dword [rsp+aword*4] | 193 | |.define ARG5d, dword [rsp+aword*4] |
194 | |.define TMP3, ARG5d | ||
187 | | | 195 | | |
188 | |//----------------------------------------------------------------------- | 196 | |//----------------------------------------------------------------------- |
189 | |.else // x64/POSIX stack layout | 197 | |.else // x64/POSIX stack layout |
@@ -205,7 +213,7 @@ | |||
205 | |.define SAVE_R2, aword [rsp+aword*6] | 213 | |.define SAVE_R2, aword [rsp+aword*6] |
206 | |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. | 214 | |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. |
207 | |.define SAVE_CFRAME, aword [rsp+aword*4] | 215 | |.define SAVE_CFRAME, aword [rsp+aword*4] |
208 | |.define UNUSED1, aword [rsp+aword*3] | 216 | |.define TMPa, aword [rsp+aword*3] |
209 | |//----- ^^^ awords above, vvv dwords below | 217 | |//----- ^^^ awords above, vvv dwords below |
210 | |.define SAVE_PC, dword [rsp+dword*5] | 218 | |.define SAVE_PC, dword [rsp+dword*5] |
211 | |.define SAVE_L, dword [rsp+dword*4] | 219 | |.define SAVE_L, dword [rsp+dword*4] |
@@ -217,6 +225,7 @@ | |||
217 | | | 225 | | |
218 | |// TMPQ overlaps TMP1/TMP2. NRESULTS overlaps TMP2 (and TMPQ). | 226 | |// TMPQ overlaps TMP1/TMP2. NRESULTS overlaps TMP2 (and TMPQ). |
219 | |.define TMPQ, qword [rsp] | 227 | |.define TMPQ, qword [rsp] |
228 | |.define TMP3, dword [rsp+aword*3] | ||
220 | |.define NRESULTS, TMP2 | 229 | |.define NRESULTS, TMP2 |
221 | | | 230 | | |
222 | |.endif | 231 | |.endif |
@@ -900,9 +909,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
900 | | cvtsi2sd xmm0, RC | 909 | | cvtsi2sd xmm0, RC |
901 | | movsd TMPQ, xmm0 | 910 | | movsd TMPQ, xmm0 |
902 | } else { | 911 | } else { |
912 | |.if not X64 | ||
903 | | mov ARG4, RC | 913 | | mov ARG4, RC |
904 | | fild ARG4 | 914 | | fild ARG4 |
905 | | fstp TMPQ | 915 | | fstp TMPQ |
916 | |.endif | ||
906 | } | 917 | } |
907 | | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. | 918 | | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. |
908 | | jmp >1 | 919 | | jmp >1 |
@@ -971,9 +982,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
971 | | cvtsi2sd xmm0, RC | 982 | | cvtsi2sd xmm0, RC |
972 | | movsd TMPQ, xmm0 | 983 | | movsd TMPQ, xmm0 |
973 | } else { | 984 | } else { |
985 | |.if not X64 | ||
974 | | mov ARG4, RC | 986 | | mov ARG4, RC |
975 | | fild ARG4 | 987 | | fild ARG4 |
976 | | fstp TMPQ | 988 | | fstp TMPQ |
989 | |.endif | ||
977 | } | 990 | } |
978 | | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. | 991 | | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. |
979 | | jmp >1 | 992 | | jmp >1 |
@@ -2182,8 +2195,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2182 | | cvtsd2si RC, qword [RA+8] | 2195 | | cvtsd2si RC, qword [RA+8] |
2183 | | mov ARG3, RC | 2196 | | mov ARG3, RC |
2184 | } else { | 2197 | } else { |
2198 | |.if not X64 | ||
2185 | | fld qword [RA+8] | 2199 | | fld qword [RA+8] |
2186 | | fistp ARG3 | 2200 | | fistp ARG3 |
2201 | |.endif | ||
2187 | } | 2202 | } |
2188 | | mov RC, TMP2 | 2203 | | mov RC, TMP2 |
2189 | | cmp RB, RC // len < end? (unsigned compare) | 2204 | | cmp RB, RC // len < end? (unsigned compare) |
@@ -2324,7 +2339,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2324 | if (sse) { | 2339 | if (sse) { |
2325 | | cvtsi2sd xmm0, RC; jmp ->fff_resxmm0 | 2340 | | cvtsi2sd xmm0, RC; jmp ->fff_resxmm0 |
2326 | } else { | 2341 | } else { |
2342 | |.if not X64 | ||
2327 | | mov ARG1, RC; fild ARG1; jmp ->fff_resn | 2343 | | mov ARG1, RC; fild ARG1; jmp ->fff_resn |
2344 | |.endif | ||
2328 | } | 2345 | } |
2329 | | | 2346 | | |
2330 | |//-- Bit library -------------------------------------------------------- | 2347 | |//-- Bit library -------------------------------------------------------- |
@@ -2339,12 +2356,14 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2339 | | cvtsi2sd xmm0, RB | 2356 | | cvtsi2sd xmm0, RB |
2340 | | jmp ->fff_resxmm0 | 2357 | | jmp ->fff_resxmm0 |
2341 | } else { | 2358 | } else { |
2359 | |.if not X64 | ||
2342 | |.ffunc_n bit_tobit | 2360 | |.ffunc_n bit_tobit |
2343 | | mov TMP1, TOBIT_BIAS | 2361 | | mov TMP1, TOBIT_BIAS |
2344 | | fadd TMP1 | 2362 | | fadd TMP1 |
2345 | | fstp FPARG1 // 64 bit FP store. | 2363 | | fstp FPARG1 // 64 bit FP store. |
2346 | | fild ARG1 // 32 bit integer load (s2lfwd ok). | 2364 | | fild ARG1 // 32 bit integer load (s2lfwd ok). |
2347 | | jmp ->fff_resn | 2365 | | jmp ->fff_resn |
2366 | |.endif | ||
2348 | } | 2367 | } |
2349 | | | 2368 | | |
2350 | |.macro .ffunc_bit, name | 2369 | |.macro .ffunc_bit, name |
@@ -2354,11 +2373,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2354 | | addsd xmm0, xmm1 | 2373 | | addsd xmm0, xmm1 |
2355 | | movd RB, xmm0 | 2374 | | movd RB, xmm0 |
2356 | ||} else { | 2375 | ||} else { |
2376 | |.if not X64 | ||
2357 | | .ffunc_n name | 2377 | | .ffunc_n name |
2358 | | mov TMP1, TOBIT_BIAS | 2378 | | mov TMP1, TOBIT_BIAS |
2359 | | fadd TMP1 | 2379 | | fadd TMP1 |
2360 | | fstp FPARG1 | 2380 | | fstp FPARG1 |
2361 | | mov RB, ARG1 | 2381 | | mov RB, ARG1 |
2382 | |.endif | ||
2362 | ||} | 2383 | ||} |
2363 | |.endmacro | 2384 | |.endmacro |
2364 | | | 2385 | | |
@@ -2409,11 +2430,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2409 | | mov BASE, TMP1 | 2430 | | mov BASE, TMP1 |
2410 | | jmp ->fff_resxmm0 | 2431 | | jmp ->fff_resxmm0 |
2411 | } else { | 2432 | } else { |
2433 | |.if not X64 | ||
2412 | |->fff_resbit: | 2434 | |->fff_resbit: |
2413 | |->fff_resbit_op: | 2435 | |->fff_resbit_op: |
2414 | | mov ARG1, RB | 2436 | | mov ARG1, RB |
2415 | | fild ARG1 | 2437 | | fild ARG1 |
2416 | | jmp ->fff_resn | 2438 | | jmp ->fff_resn |
2439 | |.endif | ||
2417 | } | 2440 | } |
2418 | | | 2441 | | |
2419 | |->fff_fallback_bit_op: | 2442 | |->fff_fallback_bit_op: |
@@ -2433,6 +2456,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2433 | | movd RB, xmm0 | 2456 | | movd RB, xmm0 |
2434 | | movd RA, xmm1 | 2457 | | movd RA, xmm1 |
2435 | ||} else { | 2458 | ||} else { |
2459 | |.if not X64 | ||
2436 | | .ffunc_nn name | 2460 | | .ffunc_nn name |
2437 | | mov TMP1, TOBIT_BIAS | 2461 | | mov TMP1, TOBIT_BIAS |
2438 | | fadd TMP1 | 2462 | | fadd TMP1 |
@@ -2442,6 +2466,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2442 | | mov RC, RA // Assumes RA is ecx. | 2466 | | mov RC, RA // Assumes RA is ecx. |
2443 | | mov RA, ARG3 | 2467 | | mov RA, ARG3 |
2444 | | mov RB, ARG1 | 2468 | | mov RB, ARG1 |
2469 | |.endif | ||
2445 | ||} | 2470 | ||} |
2446 | | ins RB, cl | 2471 | | ins RB, cl |
2447 | | mov RA, RC | 2472 | | mov RA, RC |
@@ -2467,7 +2492,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2467 | | sub BASE, RA | 2492 | | sub BASE, RA |
2468 | | mov [RA-4], PC | 2493 | | mov [RA-4], PC |
2469 | | mov SAVE_PC, PC // Redundant (but a defined value). | 2494 | | mov SAVE_PC, PC // Redundant (but a defined value). |
2470 | | mov ARG3, BASE // Save old BASE (relative). | 2495 | | mov TMP1, BASE // Save old BASE (relative). |
2471 | | mov L:RB->base, RA | 2496 | | mov L:RB->base, RA |
2472 | | lea RC, [RA+NARGS:RC*8-8] | 2497 | | lea RC, [RA+NARGS:RC*8-8] |
2473 | | mov ARG1, L:RB | 2498 | | mov ARG1, L:RB |
@@ -2486,7 +2511,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2486 | | shr RC, 3 | 2511 | | shr RC, 3 |
2487 | | add NARGS:RC, 1 | 2512 | | add NARGS:RC, 1 |
2488 | | mov LFUNC:RB, [RA-8] | 2513 | | mov LFUNC:RB, [RA-8] |
2489 | | mov BASE, ARG3 // Restore old BASE. | 2514 | | mov BASE, TMP1 // Restore old BASE. |
2490 | | add BASE, RA | 2515 | | add BASE, RA |
2491 | | cmp [RA-4], PC; jne >2 // Callable modified by handler? | 2516 | | cmp [RA-4], PC; jne >2 // Callable modified by handler? |
2492 | | jmp aword LFUNC:RB->gate // Retry the call. | 2517 | | jmp aword LFUNC:RB->gate // Retry the call. |
@@ -2509,11 +2534,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2509 | | | 2534 | | |
2510 | |->fff_gcstep: // Call GC step function. | 2535 | |->fff_gcstep: // Call GC step function. |
2511 | | // RA = new base, RC = nargs+1 | 2536 | | // RA = new base, RC = nargs+1 |
2512 | | pop RB // Must keep stack at same level. | 2537 | | pop RBa // Must keep stack at same level. |
2513 | | mov ARG3, RB // Save return address | 2538 | | mov TMPa, RBa // Save return address |
2514 | | mov L:RB, SAVE_L | 2539 | | mov L:RB, SAVE_L |
2515 | | sub BASE, RA | 2540 | | sub BASE, RA |
2516 | | mov ARG2, BASE // Save old BASE (relative). | 2541 | | mov TMP2, BASE // Save old BASE (relative). |
2517 | | mov [RA-4], PC | 2542 | | mov [RA-4], PC |
2518 | | mov SAVE_PC, PC // Redundant (but a defined value). | 2543 | | mov SAVE_PC, PC // Redundant (but a defined value). |
2519 | | mov L:RB->base, RA | 2544 | | mov L:RB->base, RA |
@@ -2531,10 +2556,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2531 | | shr RC, 3 | 2556 | | shr RC, 3 |
2532 | | add NARGS:RC, 1 | 2557 | | add NARGS:RC, 1 |
2533 | | mov PC, [RA-4] | 2558 | | mov PC, [RA-4] |
2534 | | mov BASE, ARG2 // Restore old BASE. | 2559 | | mov BASE, TMP2 // Restore old BASE. |
2535 | | add BASE, RA | 2560 | | add BASE, RA |
2536 | | mov RB, ARG3 | 2561 | | mov RBa, TMPa |
2537 | | push RB // Restore return address. | 2562 | | push RBa // Restore return address. |
2538 | | mov LFUNC:RB, [RA-8] | 2563 | | mov LFUNC:RB, [RA-8] |
2539 | | ret | 2564 | | ret |
2540 | | | 2565 | | |
@@ -3369,9 +3394,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
3369 | | cvtsi2sd xmm0, RC | 3394 | | cvtsi2sd xmm0, RC |
3370 | | mov BASE, RB // Restore BASE. | 3395 | | mov BASE, RB // Restore BASE. |
3371 | } else { | 3396 | } else { |
3397 | |.if not X64 | ||
3372 | | mov ARG1, RC | 3398 | | mov ARG1, RC |
3373 | | mov BASE, RB // Restore BASE. | 3399 | | mov BASE, RB // Restore BASE. |
3374 | | fild ARG1 | 3400 | | fild ARG1 |
3401 | |.endif | ||
3375 | } | 3402 | } |
3376 | | movzx RA, PC_RA | 3403 | | movzx RA, PC_RA |
3377 | | jmp <1 | 3404 | | jmp <1 |
@@ -3994,7 +4021,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
3994 | | mov TMP1, STR:RC | 4021 | | mov TMP1, STR:RC |
3995 | | mov TMP2, LJ_TSTR | 4022 | | mov TMP2, LJ_TSTR |
3996 | | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. | 4023 | | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. |
3997 | | mov ARG4, TAB:RB // Save TAB:RB for us. | 4024 | | mov TMP3, TAB:RB // Save TAB:RB for us. |
3998 | | mov ARG2, TAB:RB | 4025 | | mov ARG2, TAB:RB |
3999 | | mov L:RB, SAVE_L | 4026 | | mov L:RB, SAVE_L |
4000 | | mov ARG3, RC | 4027 | | mov ARG3, RC |
@@ -4004,7 +4031,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4004 | | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) | 4031 | | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) |
4005 | | // Handles write barrier for the new key. TValue * returned in eax (RC). | 4032 | | // Handles write barrier for the new key. TValue * returned in eax (RC). |
4006 | | mov BASE, L:RB->base | 4033 | | mov BASE, L:RB->base |
4007 | | mov TAB:RB, ARG4 // Need TAB:RB for barrier. | 4034 | | mov TAB:RB, TMP3 // Need TAB:RB for barrier. |
4008 | | mov RA, eax | 4035 | | mov RA, eax |
4009 | | jmp <2 // Must check write barrier for value. | 4036 | | jmp <2 // Must check write barrier for value. |
4010 | | | 4037 | | |
@@ -4211,7 +4238,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4211 | | lea RA, [BASE+RA*8] | 4238 | | lea RA, [BASE+RA*8] |
4212 | | mov PROTO:RC, LFUNC:RC->pt | 4239 | | mov PROTO:RC, LFUNC:RC->pt |
4213 | | movzx RC, byte PROTO:RC->numparams | 4240 | | movzx RC, byte PROTO:RC->numparams |
4214 | | mov ARG3, KBASE // Need one more free register. | 4241 | | mov TMP1, KBASE // Need one more free register. |
4215 | | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)] | 4242 | | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)] |
4216 | | sub KBASE, [BASE-4] | 4243 | | sub KBASE, [BASE-4] |
4217 | | // Note: KBASE may now be even _above_ BASE if nargs was < numparams. | 4244 | | // Note: KBASE may now be even _above_ BASE if nargs was < numparams. |
@@ -4237,7 +4264,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4237 | | cmp RA, RB | 4264 | | cmp RA, RB |
4238 | | jb <2 | 4265 | | jb <2 |
4239 | |3: | 4266 | |3: |
4240 | | mov KBASE, ARG3 | 4267 | | mov KBASE, TMP1 |
4241 | | ins_next | 4268 | | ins_next |
4242 | | | 4269 | | |
4243 | |5: // Copy all varargs. | 4270 | |5: // Copy all varargs. |
@@ -4496,11 +4523,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4496 | | ins_AD // RA = base (ignored), RD = traceno | 4523 | | ins_AD // RA = base (ignored), RD = traceno |
4497 | | mov RA, [DISPATCH+DISPATCH_J(trace)] | 4524 | | mov RA, [DISPATCH+DISPATCH_J(trace)] |
4498 | | mov TRACE:RD, [RA+RD*4] | 4525 | | mov TRACE:RD, [RA+RD*4] |
4499 | | mov RD, TRACE:RD->mcode | 4526 | | mov RDa, TRACE:RD->mcode |
4500 | | mov L:RB, SAVE_L | 4527 | | mov L:RB, SAVE_L |
4501 | | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE | 4528 | | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE |
4502 | | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB | 4529 | | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB |
4503 | | jmp RD | 4530 | | jmp RDa |
4504 | #endif | 4531 | #endif |
4505 | break; | 4532 | break; |
4506 | 4533 | ||