aboutsummaryrefslogtreecommitdiff
path: root/src/buildvm_x86.dasc
diff options
context:
space:
mode:
authorMike Pall <mike>2009-12-27 17:42:41 +0100
committerMike Pall <mike>2009-12-27 17:42:41 +0100
commitbc470637081979939698413ca108372e672d3b48 (patch)
tree6559f76ea898aba5c263c8ab56f63eaa2f6480ff /src/buildvm_x86.dasc
parent690760aa3853e63331f46e40c8276d9f5939261d (diff)
downloadluajit-bc470637081979939698413ca108372e672d3b48.tar.gz
luajit-bc470637081979939698413ca108372e672d3b48.tar.bz2
luajit-bc470637081979939698413ca108372e672d3b48.zip
Use fastcall for remaining 1-arg/2-arg calls from interpreter.
Simplifies conversion to x64 calling conventions.
Diffstat (limited to 'src/buildvm_x86.dasc')
-rw-r--r--src/buildvm_x86.dasc175
1 files changed, 59 insertions, 116 deletions
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc
index 9ce8ef16..eadd8d57 100644
--- a/src/buildvm_x86.dasc
+++ b/src/buildvm_x86.dasc
@@ -588,14 +588,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
588 | // - A return back from a lua_call() with (high) nresults adjustment. 588 | // - A return back from a lua_call() with (high) nresults adjustment.
589 | mov L:RB->top, BASE // Save current top held in BASE (yes). 589 | mov L:RB->top, BASE // Save current top held in BASE (yes).
590 | mov NRESULTS, RD // Need to fill only remainder with nil. 590 | mov NRESULTS, RD // Need to fill only remainder with nil.
591 |.if X64 591 | mov FCARG2, RA
592 | mov CARG2d, RA // Caveat: CARG1d may be RA. 592 | mov FCARG1, L:RB
593 | mov CARG1d, L:RB 593 | call extern lj_state_growstack@8 // (lua_State *L, int n)
594 |.else
595 | mov ARG2, RA // Grow by wanted nresults+1.
596 | mov ARG1, L:RB
597 |.endif
598 | call extern lj_state_growstack // (lua_State *L, int n)
599 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE. 594 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
600 | jmp <3 595 | jmp <3
601 | 596 |
@@ -653,11 +648,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
653 |//-- Grow stack on-demand ----------------------------------------------- 648 |//-- Grow stack on-demand -----------------------------------------------
654 | 649 |
655 |->gate_c_growstack: // Grow stack for C function. 650 |->gate_c_growstack: // Grow stack for C function.
656 |.if X64 651 | mov FCARG2, LUA_MINSTACK
657 | mov CARG2d, LUA_MINSTACK
658 |.else
659 | mov ARG2, LUA_MINSTACK
660 |.endif
661 | jmp >1 652 | jmp >1
662 | 653 |
663 |->gate_lv_growstack: // Grow stack for vararg Lua function. 654 |->gate_lv_growstack: // Grow stack for vararg Lua function.
@@ -677,17 +668,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
677 | mov L:RB->base, BASE 668 | mov L:RB->base, BASE
678 | mov L:RB->top, RC 669 | mov L:RB->top, RC
679 | mov SAVE_PC, PC 670 | mov SAVE_PC, PC
680 |.if X64 671 | mov FCARG2, RA
681 | mov CARG2d, RA
682 | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
683 |.else
684 | mov ARG2, RA
685 | mov ARG1, L:RB
686 |.endif
687 |1: 672 |1:
673 | mov FCARG1, L:RB
688 | // L:RB = L, L->base = new base, L->top = top 674 | // L:RB = L, L->base = new base, L->top = top
689 | // SAVE_PC = initial PC+1 (undefined for C functions) 675 | // SAVE_PC = initial PC+1 (undefined for C functions)
690 | call extern lj_state_growstack // (lua_State *L, int n) 676 | call extern lj_state_growstack@8 // (lua_State *L, int n)
691 | mov RA, L:RB->base 677 | mov RA, L:RB->base
692 | mov RC, L:RB->top 678 | mov RC, L:RB->top
693 | mov LFUNC:RB, [RA-8] 679 | mov LFUNC:RB, [RA-8]
@@ -1189,20 +1175,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1189 | jmp aword LFUNC:RB->gate 1175 | jmp aword LFUNC:RB->gate
1190 | 1176 |
1191 |->vmeta_len: 1177 |->vmeta_len:
1192 |.if X64
1193 | mov L:RB, SAVE_L 1178 | mov L:RB, SAVE_L
1194 | mov L:RB->base, BASE // Caveat: CARG2d may be BASE.
1195 | lea CARG2d, [BASE+RD*8]
1196 | mov CARG1d, L:RB
1197 |.else
1198 | lea RD, [BASE+RD*8]
1199 | mov L:RB, SAVE_L
1200 | mov ARG2, RD
1201 | mov ARG1, L:RB
1202 | mov L:RB->base, BASE 1179 | mov L:RB->base, BASE
1203 |.endif 1180 | lea FCARG2, [BASE+RD*8] // Caveat: FCARG2 == BASE
1181 | mov L:FCARG1, L:RB
1204 | mov SAVE_PC, PC 1182 | mov SAVE_PC, PC
1205 | call extern lj_meta_len // (lua_State *L, TValue *o) 1183 | call extern lj_meta_len@8 // (lua_State *L, TValue *o)
1206 | // TValue * (metamethod) returned in eax (RC). 1184 | // TValue * (metamethod) returned in eax (RC).
1207 | mov BASE, L:RB->base 1185 | mov BASE, L:RB->base
1208 | jmp ->vmeta_binop // Binop call for compatibility. 1186 | jmp ->vmeta_binop // Binop call for compatibility.
@@ -1243,19 +1221,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1243 |//-- Argument coercion for 'for' statement ------------------------------ 1221 |//-- Argument coercion for 'for' statement ------------------------------
1244 | 1222 |
1245 |->vmeta_for: 1223 |->vmeta_for:
1246 |.if X64
1247 | mov L:RB, SAVE_L
1248 | mov L:RB->base, BASE // Caveat: CARG2d may be BASE.
1249 | mov CARG2d, RA
1250 | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
1251 |.else
1252 | mov L:RB, SAVE_L 1224 | mov L:RB, SAVE_L
1253 | mov ARG2, RA
1254 | mov ARG1, L:RB
1255 | mov L:RB->base, BASE 1225 | mov L:RB->base, BASE
1256 |.endif 1226 | mov FCARG2, RA // Caveat: FCARG2 == BASE
1227 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
1257 | mov SAVE_PC, PC 1228 | mov SAVE_PC, PC
1258 | call extern lj_meta_for // (lua_State *L, TValue *base) 1229 | call extern lj_meta_for@8 // (lua_State *L, TValue *base)
1259 | mov BASE, L:RB->base 1230 | mov BASE, L:RB->base
1260 | mov RC, [PC-4] 1231 | mov RC, [PC-4]
1261 | movzx RA, RCH 1232 | movzx RA, RCH
@@ -1572,30 +1543,20 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1572 |.ffunc_1 ipairs_aux 1543 |.ffunc_1 ipairs_aux
1573 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback 1544 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
1574 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback 1545 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
1575 | // Caveat: xmm0/xmm1/ARG2 used in getinth call, too.
1576 if (sse) { 1546 if (sse) {
1577 | movsd xmm0, qword [RA+8] 1547 | movsd xmm0, qword [RA+8]
1578 | sseconst_1 xmm1, RBa 1548 | sseconst_1 xmm1, RBa
1579 |.if X64WIN
1580 | addsd xmm1, xmm0
1581 | cvtsd2si RC, xmm1
1582 | movsd qword [RA-8], xmm1
1583 |.else
1584 | addsd xmm0, xmm1 1549 | addsd xmm0, xmm1
1585 | cvtsd2si RC, xmm0 1550 | cvtsd2si RC, xmm0
1586 | movsd qword [RA-8], xmm0 1551 | movsd qword [RA-8], xmm0
1587 | .if not X64
1588 | mov ARG2, RC
1589 | .endif
1590 |.endif
1591 } else { 1552 } else {
1592 |.if not X64 1553 |.if not X64
1593 | fld qword [RA+8] 1554 | fld qword [RA+8]
1594 | fld1 1555 | fld1
1595 | faddp st1 1556 | faddp st1
1596 | fist ARG2 1557 | fist ARG1
1597 | fstp qword [RA-8] 1558 | fstp qword [RA-8]
1598 | mov RC, ARG2 1559 | mov RC, ARG1
1599 |.endif 1560 |.endif
1600 } 1561 }
1601 | mov TAB:RB, [RA] 1562 | mov TAB:RB, [RA]
@@ -1611,14 +1572,15 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1611 | jmp ->fff_res2 1572 | jmp ->fff_res2
1612 |2: // Check for empty hash part first. Otherwise call C function. 1573 |2: // Check for empty hash part first. Otherwise call C function.
1613 | cmp dword TAB:RB->hmask, 0; je ->fff_res0 1574 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1614 |.if X64
1615 | mov CARG1d, TAB:RB
1616 |.else
1617 | mov ARG1, TAB:RB
1618 |.endif
1619 | mov TMP1, BASE // Save BASE and RA. 1575 | mov TMP1, BASE // Save BASE and RA.
1576 |.if X64 and not X64WIN
1577 | mov FCARG1, TAB:RB
1620 | mov RB, RA 1578 | mov RB, RA
1621 | call extern lj_tab_getinth // (GCtab *t, int32_t key) 1579 |.else
1580 | xchg FCARG1, TAB:RB // Caveat: FCARG1 == RA
1581 |.endif
1582 | mov FCARG2, RC
1583 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
1622 | // cTValue * or NULL returned in eax (RC). 1584 | // cTValue * or NULL returned in eax (RC).
1623 | mov RA, RB 1585 | mov RA, RB
1624 | mov BASE, TMP1 1586 | mov BASE, TMP1
@@ -1825,28 +1787,22 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1825 | mov RD, 1+2 // nresults+1 = 1 + false + error. 1787 | mov RD, 1+2 // nresults+1 = 1 + false + error.
1826 | jmp <7 1788 | jmp <7
1827 |.else 1789 |.else
1828 |.if X64 1790 | mov FCARG2, L:PC
1829 | mov CARG2d, L:PC 1791 | mov FCARG1, L:RB
1830 | mov CARG1d, L:RB 1792 | call extern lj_ffh_coroutine_wrap_err@8 // (lua_State *L, lua_State *co)
1831 |.else
1832 | mov ARG2, L:PC
1833 | mov ARG1, L:RB
1834 |.endif
1835 | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1836 | // Error function does not return. 1793 | // Error function does not return.
1837 |.endif 1794 |.endif
1838 | 1795 |
1839 |9: // Handle stack expansion on return from yield. 1796 |9: // Handle stack expansion on return from yield.
1840 | mov L:RA, ARG1 // The callee doesn't modify SAVE_L.
1841 | mov L:RA->top, KBASE // Undo coroutine stack clearing.
1842 |.if X64 1797 |.if X64
1843 | mov CARG2d, PC 1798 | mov L:RA, TMP1
1844 | mov CARG1d, L:RB
1845 |.else 1799 |.else
1846 | mov ARG2, PC 1800 | mov L:RA, ARG1 // The callee doesn't modify SAVE_L.
1847 | mov ARG1, L:RB
1848 |.endif 1801 |.endif
1849 | call extern lj_state_growstack // (lua_State *L, int n) 1802 | mov L:RA->top, KBASE // Undo coroutine stack clearing.
1803 | mov FCARG2, PC
1804 | mov FCARG1, L:RB
1805 | call extern lj_state_growstack@8 // (lua_State *L, int n)
1850 | mov BASE, L:RB->base 1806 | mov BASE, L:RB->base
1851 | jmp <4 // Retry the stack move. 1807 | jmp <4 // Retry the stack move.
1852 |.endmacro 1808 |.endmacro
@@ -2493,13 +2449,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2493 | mov TMP1, BASE // Save old BASE (relative). 2449 | mov TMP1, BASE // Save old BASE (relative).
2494 | mov L:RB->base, RA 2450 | mov L:RB->base, RA
2495 | lea RC, [RA+NARGS:RC*8-8] 2451 | lea RC, [RA+NARGS:RC*8-8]
2496 | mov ARG1, L:RB
2497 | lea BASE, [RC+8*LUA_MINSTACK] // Ensure enough space for handler. 2452 | lea BASE, [RC+8*LUA_MINSTACK] // Ensure enough space for handler.
2498 | mov L:RB->top, RC 2453 | mov L:RB->top, RC
2499 | mov CFUNC:RA, [RA-8] 2454 | mov CFUNC:RC, [RA-8]
2500 | cmp BASE, L:RB->maxstack 2455 | cmp BASE, L:RB->maxstack
2501 | ja >5 // Need to grow stack. 2456 | ja >5 // Need to grow stack.
2502 | call aword CFUNC:RA->f // (lua_State *L) 2457 |.if X64
2458 | mov CARG1d, L:RB
2459 |.else
2460 | mov ARG1, L:RB
2461 |.endif
2462 | call aword CFUNC:RC->f // (lua_State *L)
2503 | // Either throws an error or recovers and returns 0 or NRESULTS (+1). 2463 | // Either throws an error or recovers and returns 0 or NRESULTS (+1).
2504 | test RC, RC; jnz >3 2464 | test RC, RC; jnz >3
2505 |1: // Returned 0: retry fast path. 2465 |1: // Returned 0: retry fast path.
@@ -2526,8 +2486,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2526 | jmp ->fff_res 2486 | jmp ->fff_res
2527 | 2487 |
2528 |5: // Grow stack for fallback handler. 2488 |5: // Grow stack for fallback handler.
2529 | mov ARG2, LUA_MINSTACK 2489 | mov FCARG2, LUA_MINSTACK
2530 | call extern lj_state_growstack // (lua_State *L, int n) 2490 | mov FCARG1, L:RB
2491 | call extern lj_state_growstack@8 // (lua_State *L, int n)
2531 | jmp <1 // Dumb retry (goes through ff first). 2492 | jmp <1 // Dumb retry (goes through ff first).
2532 | 2493 |
2533 |->fff_gcstep: // Call GC step function. 2494 |->fff_gcstep: // Call GC step function.
@@ -2541,13 +2502,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2541 | mov SAVE_PC, PC // Redundant (but a defined value). 2502 | mov SAVE_PC, PC // Redundant (but a defined value).
2542 | mov L:RB->base, RA 2503 | mov L:RB->base, RA
2543 | lea RC, [RA+NARGS:RC*8-8] 2504 | lea RC, [RA+NARGS:RC*8-8]
2544 |.if X64 2505 | mov FCARG1, L:RB
2545 | mov CARG1d, L:RB
2546 |.else
2547 | mov ARG1, L:RB
2548 |.endif
2549 | mov L:RB->top, RC 2506 | mov L:RB->top, RC
2550 | call extern lj_gc_step // (lua_State *L) 2507 | call extern lj_gc_step@4 // (lua_State *L)
2551 | mov RA, L:RB->base 2508 | mov RA, L:RB->base
2552 | mov RC, L:RB->top 2509 | mov RC, L:RB->top
2553 | sub RC, RA 2510 | sub RC, RA
@@ -2619,17 +2576,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2619#if LJ_HASJIT 2576#if LJ_HASJIT
2620 | mov L:RB, SAVE_L 2577 | mov L:RB, SAVE_L
2621 | mov L:RB->base, BASE 2578 | mov L:RB->base, BASE
2622 |.if X64 2579 | mov FCARG2, PC
2623 | mov CARG2d, PC 2580 | lea FCARG1, [DISPATCH+GG_DISP2J]
2624 | lea CARG1d, [DISPATCH+GG_DISP2J]
2625 |.else
2626 | lea RA, [DISPATCH+GG_DISP2J]
2627 | mov ARG2, PC
2628 | mov ARG1, RA
2629 |.endif
2630 | mov [DISPATCH+DISPATCH_J(L)], L:RB 2581 | mov [DISPATCH+DISPATCH_J(L)], L:RB
2631 | mov SAVE_PC, PC 2582 | mov SAVE_PC, PC
2632 | call extern lj_trace_hot // (jit_State *J, const BCIns *pc) 2583 | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc)
2633 | jmp <4 2584 | jmp <4
2634#endif 2585#endif
2635 | 2586 |
@@ -2637,17 +2588,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2637#if LJ_HASJIT 2588#if LJ_HASJIT
2638 | mov L:RB, SAVE_L 2589 | mov L:RB, SAVE_L
2639 | mov L:RB->base, BASE 2590 | mov L:RB->base, BASE
2640 |.if X64 2591 | mov FCARG2, PC
2641 | mov CARG2d, PC 2592 | lea FCARG1, [DISPATCH+GG_DISP2J]
2642 | lea CARG1d, [DISPATCH+GG_DISP2J]
2643 |.else
2644 | lea RA, [DISPATCH+GG_DISP2J]
2645 | mov ARG2, PC
2646 | mov ARG1, RA
2647 |.endif
2648 | mov [DISPATCH+DISPATCH_J(L)], L:RB 2593 | mov [DISPATCH+DISPATCH_J(L)], L:RB
2649 | mov SAVE_PC, PC 2594 | mov SAVE_PC, PC
2650 | call extern lj_trace_hot // (jit_State *J, const BCIns *pc) 2595 | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc)
2651 | mov BASE, L:RB->base 2596 | mov BASE, L:RB->base
2652 | // Dispatch the first instruction and optionally record it. 2597 | // Dispatch the first instruction and optionally record it.
2653 | ins_next 2598 | ins_next
@@ -2689,12 +2634,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2689 | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)] 2634 | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)]
2690 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] 2635 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2691 | mov [DISPATCH+DISPATCH_J(L)], L:RB 2636 | mov [DISPATCH+DISPATCH_J(L)], L:RB
2692 | lea RC, [esp+16]
2693 | mov L:RB->base, BASE 2637 | mov L:RB->base, BASE
2694 | lea RA, [DISPATCH+GG_DISP2J] 2638 | lea FCARG2, [esp+16]
2695 | mov ARG2, RC 2639 | lea FCARG1, [DISPATCH+GG_DISP2J]
2696 | mov ARG1, RA 2640 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
2697 | call extern lj_trace_exit // (jit_State *J, ExitState *ex)
2698 | // Interpreter C frame returned in eax. 2641 | // Interpreter C frame returned in eax.
2699 | mov esp, eax // Reposition stack to C frame. 2642 | mov esp, eax // Reposition stack to C frame.
2700 | mov BASE, L:RB->base 2643 | mov BASE, L:RB->base
@@ -3863,11 +3806,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
3863 | mov L:RB, SAVE_L 3806 | mov L:RB, SAVE_L
3864 | cmp dword L:RB->openupval, 0 3807 | cmp dword L:RB->openupval, 0
3865 | je >1 3808 | je >1
3866 | lea RA, [BASE+RA*8]
3867 | mov ARG2, RA
3868 | mov ARG1, L:RB
3869 | mov L:RB->base, BASE 3809 | mov L:RB->base, BASE
3870 | call extern lj_func_closeuv // (lua_State *L, TValue *level) 3810 | lea FCARG2, [BASE+RA*8] // Caveat: FCARG2 == BASE
3811 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
3812 | call extern lj_func_closeuv@8 // (lua_State *L, TValue *level)
3871 | mov BASE, L:RB->base 3813 | mov BASE, L:RB->base
3872 |1: 3814 |1:
3873 | ins_next 3815 | ins_next
@@ -4456,7 +4398,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4456 | jbe <3 // No vararg slots? 4398 | jbe <3 // No vararg slots?
4457 | mov RB, RC 4399 | mov RB, RC
4458 | shr RB, 3 4400 | shr RB, 3
4459 | mov ARG2, RB // Store this for stack growth below.
4460 | add RB, 1 4401 | add RB, 1
4461 | mov NRESULTS, RB // NRESULTS = #varargs+1 4402 | mov NRESULTS, RB // NRESULTS = #varargs+1
4462 | mov L:RB, SAVE_L 4403 | mov L:RB, SAVE_L
@@ -4479,8 +4420,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4479 | mov L:RB->top, RA 4420 | mov L:RB->top, RA
4480 | mov SAVE_PC, PC 4421 | mov SAVE_PC, PC
4481 | sub KBASE, BASE // Need delta, because BASE may change. 4422 | sub KBASE, BASE // Need delta, because BASE may change.
4482 | mov ARG1, L:RB 4423 | mov FCARG2, NRESULTS
4483 | call extern lj_state_growstack // (lua_State *L, int n) 4424 | sub FCARG2, 1
4425 | mov FCARG1, L:RB
4426 | call extern lj_state_growstack@8 // (lua_State *L, int n)
4484 | mov BASE, L:RB->base 4427 | mov BASE, L:RB->base
4485 | mov RA, L:RB->top 4428 | mov RA, L:RB->top
4486 | add KBASE, BASE 4429 | add KBASE, BASE