aboutsummaryrefslogtreecommitdiff
path: root/src/buildvm_x86.dasc
diff options
context:
space:
mode:
authorMike Pall <mike>2009-12-22 06:16:29 +0100
committerMike Pall <mike>2009-12-22 06:16:29 +0100
commitc4e9dc00129057ddee23943f5e1ddd1eefcb8610 (patch)
tree35afb0ab8f099a1a29b9a736a4c14d136496ad3a /src/buildvm_x86.dasc
parent44a9d7b00c957d0aa8aabf0d139b1a4cb530abc1 (diff)
downloadluajit-c4e9dc00129057ddee23943f5e1ddd1eefcb8610.tar.gz
luajit-c4e9dc00129057ddee23943f5e1ddd1eefcb8610.tar.bz2
luajit-c4e9dc00129057ddee23943f5e1ddd1eefcb8610.zip
Miscellaneous cleanups for x64 interpreter.
Diffstat (limited to 'src/buildvm_x86.dasc')
-rw-r--r--src/buildvm_x86.dasc59
1 files changed, 43 insertions, 16 deletions
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc
index 58767c1e..65e9be1d 100644
--- a/src/buildvm_x86.dasc
+++ b/src/buildvm_x86.dasc
@@ -19,10 +19,14 @@
19|// Fixed register assignments for the interpreter. 19|// Fixed register assignments for the interpreter.
20|// This is very fragile and has many dependencies. Caveat emptor. 20|// This is very fragile and has many dependencies. Caveat emptor.
21|.define BASE, edx // Not C callee-save, refetched anyway. 21|.define BASE, edx // Not C callee-save, refetched anyway.
22|.if not X64 or X64WIN 22|.if not X64
23|.define KBASE, edi // Must be C callee-save. 23|.define KBASE, edi // Must be C callee-save.
24|.define KBASEa, KBASE 24|.define KBASEa, KBASE
25|.define PC, esi // Must be C callee-save. 25|.define PC, esi // Must be C callee-save.
26|.elif X64WIN
27|.define KBASE, edi // Must be C callee-save.
28|.define KBASEa, rdi
29|.define PC, esi // Must be C callee-save.
26|.else 30|.else
27|.define KBASE, r15d // Must be C callee-save. 31|.define KBASE, r15d // Must be C callee-save.
28|.define KBASEa, r15 32|.define KBASEa, r15
@@ -136,7 +140,9 @@
136|.define FPARG1, qword [esp] 140|.define FPARG1, qword [esp]
137|// TMPQ overlaps TMP1/TMP2. ARG5/NRESULTS overlap TMP1/TMP2 (and TMPQ). 141|// TMPQ overlaps TMP1/TMP2. ARG5/NRESULTS overlap TMP1/TMP2 (and TMPQ).
138|.define TMPQ, qword [esp+aword*4] 142|.define TMPQ, qword [esp+aword*4]
143|.define TMP3, ARG4
139|.define ARG5, TMP1 144|.define ARG5, TMP1
145|.define TMPa, TMP1
140|.define NRESULTS, TMP2 146|.define NRESULTS, TMP2
141| 147|
142|// Arguments for vm_call and vm_pcall. 148|// Arguments for vm_call and vm_pcall.
@@ -183,7 +189,9 @@
183|// TMPQ overlaps TMP1/TMP2. NRESULTS overlaps TMP2 (and TMPQ). 189|// TMPQ overlaps TMP1/TMP2. NRESULTS overlaps TMP2 (and TMPQ).
184|.define TMPQ, qword [rsp+aword*10] 190|.define TMPQ, qword [rsp+aword*10]
185|.define NRESULTS, TMP2 191|.define NRESULTS, TMP2
192|.define TMPa, ARG5
186|.define ARG5d, dword [rsp+aword*4] 193|.define ARG5d, dword [rsp+aword*4]
194|.define TMP3, ARG5d
187| 195|
188|//----------------------------------------------------------------------- 196|//-----------------------------------------------------------------------
189|.else // x64/POSIX stack layout 197|.else // x64/POSIX stack layout
@@ -205,7 +213,7 @@
205|.define SAVE_R2, aword [rsp+aword*6] 213|.define SAVE_R2, aword [rsp+aword*6]
206|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. 214|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
207|.define SAVE_CFRAME, aword [rsp+aword*4] 215|.define SAVE_CFRAME, aword [rsp+aword*4]
208|.define UNUSED1, aword [rsp+aword*3] 216|.define TMPa, aword [rsp+aword*3]
209|//----- ^^^ awords above, vvv dwords below 217|//----- ^^^ awords above, vvv dwords below
210|.define SAVE_PC, dword [rsp+dword*5] 218|.define SAVE_PC, dword [rsp+dword*5]
211|.define SAVE_L, dword [rsp+dword*4] 219|.define SAVE_L, dword [rsp+dword*4]
@@ -217,6 +225,7 @@
217| 225|
218|// TMPQ overlaps TMP1/TMP2. NRESULTS overlaps TMP2 (and TMPQ). 226|// TMPQ overlaps TMP1/TMP2. NRESULTS overlaps TMP2 (and TMPQ).
219|.define TMPQ, qword [rsp] 227|.define TMPQ, qword [rsp]
228|.define TMP3, dword [rsp+aword*3]
220|.define NRESULTS, TMP2 229|.define NRESULTS, TMP2
221| 230|
222|.endif 231|.endif
@@ -900,9 +909,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
900 | cvtsi2sd xmm0, RC 909 | cvtsi2sd xmm0, RC
901 | movsd TMPQ, xmm0 910 | movsd TMPQ, xmm0
902 } else { 911 } else {
912 |.if not X64
903 | mov ARG4, RC 913 | mov ARG4, RC
904 | fild ARG4 914 | fild ARG4
905 | fstp TMPQ 915 | fstp TMPQ
916 |.endif
906 } 917 }
907 | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. 918 | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
908 | jmp >1 919 | jmp >1
@@ -971,9 +982,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
971 | cvtsi2sd xmm0, RC 982 | cvtsi2sd xmm0, RC
972 | movsd TMPQ, xmm0 983 | movsd TMPQ, xmm0
973 } else { 984 } else {
985 |.if not X64
974 | mov ARG4, RC 986 | mov ARG4, RC
975 | fild ARG4 987 | fild ARG4
976 | fstp TMPQ 988 | fstp TMPQ
989 |.endif
977 } 990 }
978 | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. 991 | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
979 | jmp >1 992 | jmp >1
@@ -2182,8 +2195,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2182 | cvtsd2si RC, qword [RA+8] 2195 | cvtsd2si RC, qword [RA+8]
2183 | mov ARG3, RC 2196 | mov ARG3, RC
2184 } else { 2197 } else {
2198 |.if not X64
2185 | fld qword [RA+8] 2199 | fld qword [RA+8]
2186 | fistp ARG3 2200 | fistp ARG3
2201 |.endif
2187 } 2202 }
2188 | mov RC, TMP2 2203 | mov RC, TMP2
2189 | cmp RB, RC // len < end? (unsigned compare) 2204 | cmp RB, RC // len < end? (unsigned compare)
@@ -2324,7 +2339,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2324 if (sse) { 2339 if (sse) {
2325 | cvtsi2sd xmm0, RC; jmp ->fff_resxmm0 2340 | cvtsi2sd xmm0, RC; jmp ->fff_resxmm0
2326 } else { 2341 } else {
2342 |.if not X64
2327 | mov ARG1, RC; fild ARG1; jmp ->fff_resn 2343 | mov ARG1, RC; fild ARG1; jmp ->fff_resn
2344 |.endif
2328 } 2345 }
2329 | 2346 |
2330 |//-- Bit library -------------------------------------------------------- 2347 |//-- Bit library --------------------------------------------------------
@@ -2339,12 +2356,14 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2339 | cvtsi2sd xmm0, RB 2356 | cvtsi2sd xmm0, RB
2340 | jmp ->fff_resxmm0 2357 | jmp ->fff_resxmm0
2341 } else { 2358 } else {
2359 |.if not X64
2342 |.ffunc_n bit_tobit 2360 |.ffunc_n bit_tobit
2343 | mov TMP1, TOBIT_BIAS 2361 | mov TMP1, TOBIT_BIAS
2344 | fadd TMP1 2362 | fadd TMP1
2345 | fstp FPARG1 // 64 bit FP store. 2363 | fstp FPARG1 // 64 bit FP store.
2346 | fild ARG1 // 32 bit integer load (s2lfwd ok). 2364 | fild ARG1 // 32 bit integer load (s2lfwd ok).
2347 | jmp ->fff_resn 2365 | jmp ->fff_resn
2366 |.endif
2348 } 2367 }
2349 | 2368 |
2350 |.macro .ffunc_bit, name 2369 |.macro .ffunc_bit, name
@@ -2354,11 +2373,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2354 | addsd xmm0, xmm1 2373 | addsd xmm0, xmm1
2355 | movd RB, xmm0 2374 | movd RB, xmm0
2356 ||} else { 2375 ||} else {
2376 |.if not X64
2357 | .ffunc_n name 2377 | .ffunc_n name
2358 | mov TMP1, TOBIT_BIAS 2378 | mov TMP1, TOBIT_BIAS
2359 | fadd TMP1 2379 | fadd TMP1
2360 | fstp FPARG1 2380 | fstp FPARG1
2361 | mov RB, ARG1 2381 | mov RB, ARG1
2382 |.endif
2362 ||} 2383 ||}
2363 |.endmacro 2384 |.endmacro
2364 | 2385 |
@@ -2409,11 +2430,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2409 | mov BASE, TMP1 2430 | mov BASE, TMP1
2410 | jmp ->fff_resxmm0 2431 | jmp ->fff_resxmm0
2411 } else { 2432 } else {
2433 |.if not X64
2412 |->fff_resbit: 2434 |->fff_resbit:
2413 |->fff_resbit_op: 2435 |->fff_resbit_op:
2414 | mov ARG1, RB 2436 | mov ARG1, RB
2415 | fild ARG1 2437 | fild ARG1
2416 | jmp ->fff_resn 2438 | jmp ->fff_resn
2439 |.endif
2417 } 2440 }
2418 | 2441 |
2419 |->fff_fallback_bit_op: 2442 |->fff_fallback_bit_op:
@@ -2433,6 +2456,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2433 | movd RB, xmm0 2456 | movd RB, xmm0
2434 | movd RA, xmm1 2457 | movd RA, xmm1
2435 ||} else { 2458 ||} else {
2459 |.if not X64
2436 | .ffunc_nn name 2460 | .ffunc_nn name
2437 | mov TMP1, TOBIT_BIAS 2461 | mov TMP1, TOBIT_BIAS
2438 | fadd TMP1 2462 | fadd TMP1
@@ -2442,6 +2466,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2442 | mov RC, RA // Assumes RA is ecx. 2466 | mov RC, RA // Assumes RA is ecx.
2443 | mov RA, ARG3 2467 | mov RA, ARG3
2444 | mov RB, ARG1 2468 | mov RB, ARG1
2469 |.endif
2445 ||} 2470 ||}
2446 | ins RB, cl 2471 | ins RB, cl
2447 | mov RA, RC 2472 | mov RA, RC
@@ -2467,7 +2492,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2467 | sub BASE, RA 2492 | sub BASE, RA
2468 | mov [RA-4], PC 2493 | mov [RA-4], PC
2469 | mov SAVE_PC, PC // Redundant (but a defined value). 2494 | mov SAVE_PC, PC // Redundant (but a defined value).
2470 | mov ARG3, BASE // Save old BASE (relative). 2495 | mov TMP1, BASE // Save old BASE (relative).
2471 | mov L:RB->base, RA 2496 | mov L:RB->base, RA
2472 | lea RC, [RA+NARGS:RC*8-8] 2497 | lea RC, [RA+NARGS:RC*8-8]
2473 | mov ARG1, L:RB 2498 | mov ARG1, L:RB
@@ -2486,7 +2511,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2486 | shr RC, 3 2511 | shr RC, 3
2487 | add NARGS:RC, 1 2512 | add NARGS:RC, 1
2488 | mov LFUNC:RB, [RA-8] 2513 | mov LFUNC:RB, [RA-8]
2489 | mov BASE, ARG3 // Restore old BASE. 2514 | mov BASE, TMP1 // Restore old BASE.
2490 | add BASE, RA 2515 | add BASE, RA
2491 | cmp [RA-4], PC; jne >2 // Callable modified by handler? 2516 | cmp [RA-4], PC; jne >2 // Callable modified by handler?
2492 | jmp aword LFUNC:RB->gate // Retry the call. 2517 | jmp aword LFUNC:RB->gate // Retry the call.
@@ -2509,11 +2534,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2509 | 2534 |
2510 |->fff_gcstep: // Call GC step function. 2535 |->fff_gcstep: // Call GC step function.
2511 | // RA = new base, RC = nargs+1 2536 | // RA = new base, RC = nargs+1
2512 | pop RB // Must keep stack at same level. 2537 | pop RBa // Must keep stack at same level.
2513 | mov ARG3, RB // Save return address 2538 | mov TMPa, RBa // Save return address
2514 | mov L:RB, SAVE_L 2539 | mov L:RB, SAVE_L
2515 | sub BASE, RA 2540 | sub BASE, RA
2516 | mov ARG2, BASE // Save old BASE (relative). 2541 | mov TMP2, BASE // Save old BASE (relative).
2517 | mov [RA-4], PC 2542 | mov [RA-4], PC
2518 | mov SAVE_PC, PC // Redundant (but a defined value). 2543 | mov SAVE_PC, PC // Redundant (but a defined value).
2519 | mov L:RB->base, RA 2544 | mov L:RB->base, RA
@@ -2531,10 +2556,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2531 | shr RC, 3 2556 | shr RC, 3
2532 | add NARGS:RC, 1 2557 | add NARGS:RC, 1
2533 | mov PC, [RA-4] 2558 | mov PC, [RA-4]
2534 | mov BASE, ARG2 // Restore old BASE. 2559 | mov BASE, TMP2 // Restore old BASE.
2535 | add BASE, RA 2560 | add BASE, RA
2536 | mov RB, ARG3 2561 | mov RBa, TMPa
2537 | push RB // Restore return address. 2562 | push RBa // Restore return address.
2538 | mov LFUNC:RB, [RA-8] 2563 | mov LFUNC:RB, [RA-8]
2539 | ret 2564 | ret
2540 | 2565 |
@@ -3369,9 +3394,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
3369 | cvtsi2sd xmm0, RC 3394 | cvtsi2sd xmm0, RC
3370 | mov BASE, RB // Restore BASE. 3395 | mov BASE, RB // Restore BASE.
3371 } else { 3396 } else {
3397 |.if not X64
3372 | mov ARG1, RC 3398 | mov ARG1, RC
3373 | mov BASE, RB // Restore BASE. 3399 | mov BASE, RB // Restore BASE.
3374 | fild ARG1 3400 | fild ARG1
3401 |.endif
3375 } 3402 }
3376 | movzx RA, PC_RA 3403 | movzx RA, PC_RA
3377 | jmp <1 3404 | jmp <1
@@ -3994,7 +4021,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
3994 | mov TMP1, STR:RC 4021 | mov TMP1, STR:RC
3995 | mov TMP2, LJ_TSTR 4022 | mov TMP2, LJ_TSTR
3996 | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. 4023 | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
3997 | mov ARG4, TAB:RB // Save TAB:RB for us. 4024 | mov TMP3, TAB:RB // Save TAB:RB for us.
3998 | mov ARG2, TAB:RB 4025 | mov ARG2, TAB:RB
3999 | mov L:RB, SAVE_L 4026 | mov L:RB, SAVE_L
4000 | mov ARG3, RC 4027 | mov ARG3, RC
@@ -4004,7 +4031,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4004 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) 4031 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
4005 | // Handles write barrier for the new key. TValue * returned in eax (RC). 4032 | // Handles write barrier for the new key. TValue * returned in eax (RC).
4006 | mov BASE, L:RB->base 4033 | mov BASE, L:RB->base
4007 | mov TAB:RB, ARG4 // Need TAB:RB for barrier. 4034 | mov TAB:RB, TMP3 // Need TAB:RB for barrier.
4008 | mov RA, eax 4035 | mov RA, eax
4009 | jmp <2 // Must check write barrier for value. 4036 | jmp <2 // Must check write barrier for value.
4010 | 4037 |
@@ -4211,7 +4238,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4211 | lea RA, [BASE+RA*8] 4238 | lea RA, [BASE+RA*8]
4212 | mov PROTO:RC, LFUNC:RC->pt 4239 | mov PROTO:RC, LFUNC:RC->pt
4213 | movzx RC, byte PROTO:RC->numparams 4240 | movzx RC, byte PROTO:RC->numparams
4214 | mov ARG3, KBASE // Need one more free register. 4241 | mov TMP1, KBASE // Need one more free register.
4215 | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)] 4242 | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)]
4216 | sub KBASE, [BASE-4] 4243 | sub KBASE, [BASE-4]
4217 | // Note: KBASE may now be even _above_ BASE if nargs was < numparams. 4244 | // Note: KBASE may now be even _above_ BASE if nargs was < numparams.
@@ -4237,7 +4264,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4237 | cmp RA, RB 4264 | cmp RA, RB
4238 | jb <2 4265 | jb <2
4239 |3: 4266 |3:
4240 | mov KBASE, ARG3 4267 | mov KBASE, TMP1
4241 | ins_next 4268 | ins_next
4242 | 4269 |
4243 |5: // Copy all varargs. 4270 |5: // Copy all varargs.
@@ -4496,11 +4523,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4496 | ins_AD // RA = base (ignored), RD = traceno 4523 | ins_AD // RA = base (ignored), RD = traceno
4497 | mov RA, [DISPATCH+DISPATCH_J(trace)] 4524 | mov RA, [DISPATCH+DISPATCH_J(trace)]
4498 | mov TRACE:RD, [RA+RD*4] 4525 | mov TRACE:RD, [RA+RD*4]
4499 | mov RD, TRACE:RD->mcode 4526 | mov RDa, TRACE:RD->mcode
4500 | mov L:RB, SAVE_L 4527 | mov L:RB, SAVE_L
4501 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE 4528 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
4502 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB 4529 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB
4503 | jmp RD 4530 | jmp RDa
4504#endif 4531#endif
4505 break; 4532 break;
4506 4533