aboutsummaryrefslogtreecommitdiff
path: root/src/vm_x86.dasc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/vm_x86.dasc1014
1 files changed, 271 insertions, 743 deletions
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index f25dfd30..eaa99740 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -18,7 +18,6 @@
18| 18|
19|.if P64 19|.if P64
20|.define X64, 1 20|.define X64, 1
21|.define SSE, 1
22|.if WIN 21|.if WIN
23|.define X64WIN, 1 22|.define X64WIN, 1
24|.endif 23|.endif
@@ -116,6 +115,7 @@
116|.type NODE, Node 115|.type NODE, Node
117|.type NARGS, int 116|.type NARGS, int
118|.type TRACE, GCtrace 117|.type TRACE, GCtrace
118|.type SBUF, SBuf
119| 119|
120|// Stack layout while in interpreter. Must match with lj_frame.h. 120|// Stack layout while in interpreter. Must match with lj_frame.h.
121|//----------------------------------------------------------------------- 121|//-----------------------------------------------------------------------
@@ -630,17 +630,18 @@ static void build_subroutines(BuildCtx *ctx)
630 | lea KBASEa, [esp+CFRAME_RESUME] 630 | lea KBASEa, [esp+CFRAME_RESUME]
631 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 631 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
632 | add DISPATCH, GG_G2DISP 632 | add DISPATCH, GG_G2DISP
633 | mov L:RB->cframe, KBASEa
634 | mov SAVE_PC, RD // Any value outside of bytecode is ok. 633 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
635 | mov SAVE_CFRAME, RDa 634 | mov SAVE_CFRAME, RDa
636 |.if X64 635 |.if X64
637 | mov SAVE_NRES, RD 636 | mov SAVE_NRES, RD
638 | mov SAVE_ERRF, RD 637 | mov SAVE_ERRF, RD
639 |.endif 638 |.endif
639 | mov L:RB->cframe, KBASEa
640 | cmp byte L:RB->status, RDL 640 | cmp byte L:RB->status, RDL
641 | je >3 // Initial resume (like a call). 641 | je >2 // Initial resume (like a call).
642 | 642 |
643 | // Resume after yield (like a return). 643 | // Resume after yield (like a return).
644 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
644 | set_vmstate INTERP 645 | set_vmstate INTERP
645 | mov byte L:RB->status, RDL 646 | mov byte L:RB->status, RDL
646 | mov BASE, L:RB->base 647 | mov BASE, L:RB->base
@@ -680,20 +681,19 @@ static void build_subroutines(BuildCtx *ctx)
680 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! 681 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
681 |.endif 682 |.endif
682 | 683 |
684 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
683 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 685 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
684 | mov SAVE_CFRAME, KBASEa 686 | mov SAVE_CFRAME, KBASEa
685 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. 687 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
688 | add DISPATCH, GG_G2DISP
686 |.if X64 689 |.if X64
687 | mov L:RB->cframe, rsp 690 | mov L:RB->cframe, rsp
688 |.else 691 |.else
689 | mov L:RB->cframe, esp 692 | mov L:RB->cframe, esp
690 |.endif 693 |.endif
691 | 694 |
692 |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype). 695 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
693 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 696 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
694 | add DISPATCH, GG_G2DISP
695 |
696 |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype).
697 | set_vmstate INTERP 697 | set_vmstate INTERP
698 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). 698 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
699 | add PC, RA 699 | add PC, RA
@@ -731,14 +731,17 @@ static void build_subroutines(BuildCtx *ctx)
731 | 731 |
732 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). 732 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
733 | sub KBASE, L:RB->top 733 | sub KBASE, L:RB->top
734 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
734 | mov SAVE_ERRF, 0 // No error function. 735 | mov SAVE_ERRF, 0 // No error function.
735 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. 736 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
737 | add DISPATCH, GG_G2DISP
736 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). 738 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
737 | 739 |
738 |.if X64 740 |.if X64
739 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 741 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
740 | mov SAVE_CFRAME, KBASEa 742 | mov SAVE_CFRAME, KBASEa
741 | mov L:RB->cframe, rsp 743 | mov L:RB->cframe, rsp
744 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
742 | 745 |
743 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) 746 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
744 |.else 747 |.else
@@ -749,6 +752,7 @@ static void build_subroutines(BuildCtx *ctx)
749 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. 752 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
750 | mov SAVE_CFRAME, KBASE 753 | mov SAVE_CFRAME, KBASE
751 | mov L:RB->cframe, esp 754 | mov L:RB->cframe, esp
755 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
752 | 756 |
753 | call BASE // (lua_State *L, lua_CFunction func, void *ud) 757 | call BASE // (lua_State *L, lua_CFunction func, void *ud)
754 |.endif 758 |.endif
@@ -856,13 +860,9 @@ static void build_subroutines(BuildCtx *ctx)
856 |.if DUALNUM 860 |.if DUALNUM
857 | mov TMP2, LJ_TISNUM 861 | mov TMP2, LJ_TISNUM
858 | mov TMP1, RC 862 | mov TMP1, RC
859 |.elif SSE 863 |.else
860 | cvtsi2sd xmm0, RC 864 | cvtsi2sd xmm0, RC
861 | movsd TMPQ, xmm0 865 | movsd TMPQ, xmm0
862 |.else
863 | mov ARG4, RC
864 | fild ARG4
865 | fstp TMPQ
866 |.endif 866 |.endif
867 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 867 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
868 | jmp >1 868 | jmp >1
@@ -916,6 +916,19 @@ static void build_subroutines(BuildCtx *ctx)
916 | mov NARGS:RD, 2+1 // 2 args for func(t, k). 916 | mov NARGS:RD, 2+1 // 2 args for func(t, k).
917 | jmp ->vm_call_dispatch_f 917 | jmp ->vm_call_dispatch_f
918 | 918 |
919 |->vmeta_tgetr:
920 | mov FCARG1, TAB:RB
921 | mov RB, BASE // Save BASE.
922 | mov FCARG2, RC // Caveat: FCARG2 == BASE
923 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
924 | // cTValue * or NULL returned in eax (RC).
925 | movzx RA, PC_RA
926 | mov BASE, RB // Restore BASE.
927 | test RC, RC
928 | jnz ->BC_TGETR_Z
929 | mov dword [BASE+RA*8+4], LJ_TNIL
930 | jmp ->BC_TGETR2_Z
931 |
919 |//----------------------------------------------------------------------- 932 |//-----------------------------------------------------------------------
920 | 933 |
921 |->vmeta_tsets: 934 |->vmeta_tsets:
@@ -935,13 +948,9 @@ static void build_subroutines(BuildCtx *ctx)
935 |.if DUALNUM 948 |.if DUALNUM
936 | mov TMP2, LJ_TISNUM 949 | mov TMP2, LJ_TISNUM
937 | mov TMP1, RC 950 | mov TMP1, RC
938 |.elif SSE 951 |.else
939 | cvtsi2sd xmm0, RC 952 | cvtsi2sd xmm0, RC
940 | movsd TMPQ, xmm0 953 | movsd TMPQ, xmm0
941 |.else
942 | mov ARG4, RC
943 | fild ARG4
944 | fstp TMPQ
945 |.endif 954 |.endif
946 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 955 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
947 | jmp >1 956 | jmp >1
@@ -1007,6 +1016,33 @@ static void build_subroutines(BuildCtx *ctx)
1007 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). 1016 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
1008 | jmp ->vm_call_dispatch_f 1017 | jmp ->vm_call_dispatch_f
1009 | 1018 |
1019 |->vmeta_tsetr:
1020 |.if X64WIN
1021 | mov L:CARG1d, SAVE_L
1022 | mov CARG3d, RC
1023 | mov L:CARG1d->base, BASE
1024 | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
1025 |.elif X64
1026 | mov L:CARG1d, SAVE_L
1027 | mov CARG2d, TAB:RB
1028 | mov L:CARG1d->base, BASE
1029 | mov RB, BASE // Save BASE.
1030 | mov CARG3d, RC // Caveat: CARG3d == BASE.
1031 |.else
1032 | mov L:RA, SAVE_L
1033 | mov ARG2, TAB:RB
1034 | mov RB, BASE // Save BASE.
1035 | mov ARG3, RC
1036 | mov ARG1, L:RA
1037 | mov L:RA->base, BASE
1038 |.endif
1039 | mov SAVE_PC, PC
1040 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1041 | // TValue * returned in eax (RC).
1042 | movzx RA, PC_RA
1043 | mov BASE, RB // Restore BASE.
1044 | jmp ->BC_TSETR_Z
1045 |
1010 |//-- Comparison metamethods --------------------------------------------- 1046 |//-- Comparison metamethods ---------------------------------------------
1011 | 1047 |
1012 |->vmeta_comp: 1048 |->vmeta_comp:
@@ -1101,6 +1137,26 @@ static void build_subroutines(BuildCtx *ctx)
1101 | jmp <3 1137 | jmp <3
1102 |.endif 1138 |.endif
1103 | 1139 |
1140 |->vmeta_istype:
1141 |.if X64
1142 | mov L:RB, SAVE_L
1143 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1144 | mov CARG2d, RA
1145 | movzx CARG3d, PC_RD
1146 | mov L:CARG1d, L:RB
1147 |.else
1148 | movzx RD, PC_RD
1149 | mov ARG2, RA
1150 | mov L:RB, SAVE_L
1151 | mov ARG3, RD
1152 | mov ARG1, L:RB
1153 | mov L:RB->base, BASE
1154 |.endif
1155 | mov SAVE_PC, PC
1156 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1157 | mov BASE, L:RB->base
1158 | jmp <6
1159 |
1104 |//-- Arithmetic metamethods --------------------------------------------- 1160 |//-- Arithmetic metamethods ---------------------------------------------
1105 | 1161 |
1106 |->vmeta_arith_vno: 1162 |->vmeta_arith_vno:
@@ -1509,11 +1565,7 @@ static void build_subroutines(BuildCtx *ctx)
1509 |.else 1565 |.else
1510 | jae ->fff_fallback 1566 | jae ->fff_fallback
1511 |.endif 1567 |.endif
1512 |.if SSE
1513 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 1568 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1514 |.else
1515 | fld qword [BASE]; jmp ->fff_resn
1516 |.endif
1517 | 1569 |
1518 |.ffunc_1 tostring 1570 |.ffunc_1 tostring
1519 | // Only handles the string or number case inline. 1571 | // Only handles the string or number case inline.
@@ -1538,9 +1590,9 @@ static void build_subroutines(BuildCtx *ctx)
1538 |.endif 1590 |.endif
1539 | mov L:FCARG1, L:RB 1591 | mov L:FCARG1, L:RB
1540 |.if DUALNUM 1592 |.if DUALNUM
1541 | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) 1593 | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o)
1542 |.else 1594 |.else
1543 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) 1595 | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np)
1544 |.endif 1596 |.endif
1545 | // GCstr returned in eax (RD). 1597 | // GCstr returned in eax (RD).
1546 | mov BASE, L:RB->base 1598 | mov BASE, L:RB->base
@@ -1631,19 +1683,12 @@ static void build_subroutines(BuildCtx *ctx)
1631 | add RD, 1 1683 | add RD, 1
1632 | mov dword [BASE-4], LJ_TISNUM 1684 | mov dword [BASE-4], LJ_TISNUM
1633 | mov dword [BASE-8], RD 1685 | mov dword [BASE-8], RD
1634 |.elif SSE 1686 |.else
1635 | movsd xmm0, qword [BASE+8] 1687 | movsd xmm0, qword [BASE+8]
1636 | sseconst_1 xmm1, RBa 1688 | sseconst_1 xmm1, RBa
1637 | addsd xmm0, xmm1 1689 | addsd xmm0, xmm1
1638 | cvtsd2si RD, xmm0 1690 | cvttsd2si RD, xmm0
1639 | movsd qword [BASE-8], xmm0 1691 | movsd qword [BASE-8], xmm0
1640 |.else
1641 | fld qword [BASE+8]
1642 | fld1
1643 | faddp st1
1644 | fist ARG1
1645 | fstp qword [BASE-8]
1646 | mov RD, ARG1
1647 |.endif 1692 |.endif
1648 | mov TAB:RB, [BASE] 1693 | mov TAB:RB, [BASE]
1649 | cmp RD, TAB:RB->asize; jae >2 // Not in array part? 1694 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
@@ -1690,12 +1735,9 @@ static void build_subroutines(BuildCtx *ctx)
1690 |.if DUALNUM 1735 |.if DUALNUM
1691 | mov dword [BASE+12], LJ_TISNUM 1736 | mov dword [BASE+12], LJ_TISNUM
1692 | mov dword [BASE+8], 0 1737 | mov dword [BASE+8], 0
1693 |.elif SSE 1738 |.else
1694 | xorps xmm0, xmm0 1739 | xorps xmm0, xmm0
1695 | movsd qword [BASE+8], xmm0 1740 | movsd qword [BASE+8], xmm0
1696 |.else
1697 | fldz
1698 | fstp qword [BASE+8]
1699 |.endif 1741 |.endif
1700 | mov RD, 1+3 1742 | mov RD, 1+3
1701 | jmp ->fff_res 1743 | jmp ->fff_res
@@ -1802,7 +1844,6 @@ static void build_subroutines(BuildCtx *ctx)
1802 | mov ARG3, RA 1844 | mov ARG3, RA
1803 |.endif 1845 |.endif
1804 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) 1846 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1805 | set_vmstate INTERP
1806 | 1847 |
1807 | mov L:RB, SAVE_L 1848 | mov L:RB, SAVE_L
1808 |.if X64 1849 |.if X64
@@ -1811,6 +1852,9 @@ static void build_subroutines(BuildCtx *ctx)
1811 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. 1852 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
1812 |.endif 1853 |.endif
1813 | mov BASE, L:RB->base 1854 | mov BASE, L:RB->base
1855 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1856 | set_vmstate INTERP
1857 |
1814 | cmp eax, LUA_YIELD 1858 | cmp eax, LUA_YIELD
1815 | ja >8 1859 | ja >8
1816 |4: 1860 |4:
@@ -1925,12 +1969,10 @@ static void build_subroutines(BuildCtx *ctx)
1925 |->fff_resi: // Dummy. 1969 |->fff_resi: // Dummy.
1926 |.endif 1970 |.endif
1927 | 1971 |
1928 |.if SSE
1929 |->fff_resn: 1972 |->fff_resn:
1930 | mov PC, [BASE-4] 1973 | mov PC, [BASE-4]
1931 | fstp qword [BASE-8] 1974 | fstp qword [BASE-8]
1932 | jmp ->fff_res1 1975 | jmp ->fff_res1
1933 |.endif
1934 | 1976 |
1935 | .ffunc_1 math_abs 1977 | .ffunc_1 math_abs
1936 |.if DUALNUM 1978 |.if DUALNUM
@@ -1954,8 +1996,6 @@ static void build_subroutines(BuildCtx *ctx)
1954 |.else 1996 |.else
1955 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1997 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1956 |.endif 1998 |.endif
1957 |
1958 |.if SSE
1959 | movsd xmm0, qword [BASE] 1999 | movsd xmm0, qword [BASE]
1960 | sseconst_abs xmm1, RDa 2000 | sseconst_abs xmm1, RDa
1961 | andps xmm0, xmm1 2001 | andps xmm0, xmm1
@@ -1963,15 +2003,6 @@ static void build_subroutines(BuildCtx *ctx)
1963 | mov PC, [BASE-4] 2003 | mov PC, [BASE-4]
1964 | movsd qword [BASE-8], xmm0 2004 | movsd qword [BASE-8], xmm0
1965 | // fallthrough 2005 | // fallthrough
1966 |.else
1967 | fld qword [BASE]
1968 | fabs
1969 | // fallthrough
1970 |->fff_resxmm0: // Dummy.
1971 |->fff_resn:
1972 | mov PC, [BASE-4]
1973 | fstp qword [BASE-8]
1974 |.endif
1975 | 2006 |
1976 |->fff_res1: 2007 |->fff_res1:
1977 | mov RD, 1+1 2008 | mov RD, 1+1
@@ -2008,48 +2039,24 @@ static void build_subroutines(BuildCtx *ctx)
2008 |.else 2039 |.else
2009 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2040 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2010 |.endif 2041 |.endif
2011 |.if SSE
2012 | movsd xmm0, qword [BASE] 2042 | movsd xmm0, qword [BASE]
2013 | call ->vm_ .. func 2043 | call ->vm_ .. func .. _sse
2014 | .if DUALNUM 2044 |.if DUALNUM
2015 | cvtsd2si RB, xmm0 2045 | cvttsd2si RB, xmm0
2016 | cmp RB, 0x80000000 2046 | cmp RB, 0x80000000
2017 | jne ->fff_resi 2047 | jne ->fff_resi
2018 | cvtsi2sd xmm1, RB 2048 | cvtsi2sd xmm1, RB
2019 | ucomisd xmm0, xmm1 2049 | ucomisd xmm0, xmm1
2020 | jp ->fff_resxmm0 2050 | jp ->fff_resxmm0
2021 | je ->fff_resi 2051 | je ->fff_resi
2022 | .endif
2023 | jmp ->fff_resxmm0
2024 |.else
2025 | fld qword [BASE]
2026 | call ->vm_ .. func
2027 | .if DUALNUM
2028 | fist ARG1
2029 | mov RB, ARG1
2030 | cmp RB, 0x80000000; jne >2
2031 | fdup
2032 | fild ARG1
2033 | fcomparepp
2034 | jp ->fff_resn
2035 | jne ->fff_resn
2036 |2:
2037 | fpop
2038 | jmp ->fff_resi
2039 | .else
2040 | jmp ->fff_resn
2041 | .endif
2042 |.endif 2052 |.endif
2053 | jmp ->fff_resxmm0
2043 |.endmacro 2054 |.endmacro
2044 | 2055 |
2045 | math_round floor 2056 | math_round floor
2046 | math_round ceil 2057 | math_round ceil
2047 | 2058 |
2048 |.if SSE
2049 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 2059 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
2050 |.else
2051 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
2052 |.endif
2053 | 2060 |
2054 |.ffunc math_log 2061 |.ffunc math_log
2055 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 2062 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
@@ -2072,42 +2079,24 @@ static void build_subroutines(BuildCtx *ctx)
2072 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn 2079 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
2073 | 2080 |
2074 |.macro math_extern, func 2081 |.macro math_extern, func
2075 |.if SSE
2076 | .ffunc_nsse math_ .. func 2082 | .ffunc_nsse math_ .. func
2077 | .if not X64 2083 |.if not X64
2078 | movsd FPARG1, xmm0 2084 | movsd FPARG1, xmm0
2079 | .endif
2080 |.else
2081 | .ffunc_n math_ .. func
2082 | fstp FPARG1
2083 |.endif 2085 |.endif
2084 | mov RB, BASE 2086 | mov RB, BASE
2085 | call extern lj_vm_ .. func 2087 | call extern lj_vm_ .. func
2086 | mov BASE, RB 2088 | mov BASE, RB
2087 | .if X64 2089 |.if X64
2088 | jmp ->fff_resxmm0 2090 | jmp ->fff_resxmm0
2089 | .else 2091 |.else
2090 | jmp ->fff_resn 2092 | jmp ->fff_resn
2091 | .endif 2093 |.endif
2092 |.endmacro 2094 |.endmacro
2093 | 2095 |
2094 | math_extern sinh 2096 | math_extern sinh
2095 | math_extern cosh 2097 | math_extern cosh
2096 | math_extern tanh 2098 | math_extern tanh
2097 | 2099 |
2098 |->ff_math_deg:
2099 |.if SSE
2100 |.ffunc_nsse math_rad
2101 | mov CFUNC:RB, [BASE-8]
2102 | mulsd xmm0, qword CFUNC:RB->upvalue[0]
2103 | jmp ->fff_resxmm0
2104 |.else
2105 |.ffunc_n math_rad
2106 | mov CFUNC:RB, [BASE-8]
2107 | fmul qword CFUNC:RB->upvalue[0]
2108 | jmp ->fff_resn
2109 |.endif
2110 |
2111 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn 2100 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
2112 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn 2101 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
2113 | 2102 |
@@ -2123,65 +2112,34 @@ static void build_subroutines(BuildCtx *ctx)
2123 | cmp RB, 0x00200000; jb >4 2112 | cmp RB, 0x00200000; jb >4
2124 |1: 2113 |1:
2125 | shr RB, 21; sub RB, RC // Extract and unbias exponent. 2114 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
2126 |.if SSE
2127 | cvtsi2sd xmm0, RB 2115 | cvtsi2sd xmm0, RB
2128 |.else
2129 | mov TMP1, RB; fild TMP1
2130 |.endif
2131 | mov RB, [BASE-4] 2116 | mov RB, [BASE-4]
2132 | and RB, 0x800fffff // Mask off exponent. 2117 | and RB, 0x800fffff // Mask off exponent.
2133 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. 2118 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
2134 | mov [BASE-4], RB 2119 | mov [BASE-4], RB
2135 |2: 2120 |2:
2136 |.if SSE
2137 | movsd qword [BASE], xmm0 2121 | movsd qword [BASE], xmm0
2138 |.else
2139 | fstp qword [BASE]
2140 |.endif
2141 | mov RD, 1+2 2122 | mov RD, 1+2
2142 | jmp ->fff_res 2123 | jmp ->fff_res
2143 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. 2124 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
2144 |.if SSE
2145 | xorps xmm0, xmm0; jmp <2 2125 | xorps xmm0, xmm0; jmp <2
2146 |.else
2147 | fldz; jmp <2
2148 |.endif
2149 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. 2126 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
2150 |.if SSE
2151 | movsd xmm0, qword [BASE] 2127 | movsd xmm0, qword [BASE]
2152 | sseconst_hi xmm1, RBa, 43500000 // 2^54. 2128 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
2153 | mulsd xmm0, xmm1 2129 | mulsd xmm0, xmm1
2154 | movsd qword [BASE-8], xmm0 2130 | movsd qword [BASE-8], xmm0
2155 |.else
2156 | fld qword [BASE]
2157 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
2158 | fstp qword [BASE-8]
2159 |.endif
2160 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 2131 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2161 | 2132 |
2162 |.if SSE
2163 |.ffunc_nsse math_modf 2133 |.ffunc_nsse math_modf
2164 |.else
2165 |.ffunc_n math_modf
2166 |.endif
2167 | mov RB, [BASE+4] 2134 | mov RB, [BASE+4]
2168 | mov PC, [BASE-4] 2135 | mov PC, [BASE-4]
2169 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? 2136 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
2170 |.if SSE
2171 | movaps xmm4, xmm0 2137 | movaps xmm4, xmm0
2172 | call ->vm_trunc 2138 | call ->vm_trunc_sse
2173 | subsd xmm4, xmm0 2139 | subsd xmm4, xmm0
2174 |1: 2140 |1:
2175 | movsd qword [BASE-8], xmm0 2141 | movsd qword [BASE-8], xmm0
2176 | movsd qword [BASE], xmm4 2142 | movsd qword [BASE], xmm4
2177 |.else
2178 | fdup
2179 | call ->vm_trunc
2180 | fsub st1, st0
2181 |1:
2182 | fstp qword [BASE-8]
2183 | fstp qword [BASE]
2184 |.endif
2185 | mov RC, [BASE-4]; mov RB, [BASE+4] 2143 | mov RC, [BASE-4]; mov RB, [BASE+4]
2186 | xor RC, RB; js >3 // Need to adjust sign? 2144 | xor RC, RB; js >3 // Need to adjust sign?
2187 |2: 2145 |2:
@@ -2191,24 +2149,16 @@ static void build_subroutines(BuildCtx *ctx)
2191 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. 2149 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
2192 | jmp <2 2150 | jmp <2
2193 |4: 2151 |4:
2194 |.if SSE
2195 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. 2152 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
2196 |.else
2197 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
2198 |.endif
2199 | 2153 |
2200 |.ffunc_nnr math_fmod 2154 |.ffunc_nnr math_fmod
2201 |1: ; fprem; fnstsw ax; sahf; jp <1 2155 |1: ; fprem; fnstsw ax; sahf; jp <1
2202 | fpop1 2156 | fpop1
2203 | jmp ->fff_resn 2157 | jmp ->fff_resn
2204 | 2158 |
2205 |.if SSE 2159 |.ffunc_nnsse math_pow; call ->vm_pow_sse; jmp ->fff_resxmm0
2206 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
2207 |.else
2208 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
2209 |.endif
2210 | 2160 |
2211 |.macro math_minmax, name, cmovop, fcmovop, sseop 2161 |.macro math_minmax, name, cmovop, sseop
2212 | .ffunc name 2162 | .ffunc name
2213 | mov RA, 2 2163 | mov RA, 2
2214 | cmp dword [BASE+4], LJ_TISNUM 2164 | cmp dword [BASE+4], LJ_TISNUM
@@ -2225,12 +2175,7 @@ static void build_subroutines(BuildCtx *ctx)
2225 |3: 2175 |3:
2226 | ja ->fff_fallback 2176 | ja ->fff_fallback
2227 | // Convert intermediate result to number and continue below. 2177 | // Convert intermediate result to number and continue below.
2228 |.if SSE
2229 | cvtsi2sd xmm0, RB 2178 | cvtsi2sd xmm0, RB
2230 |.else
2231 | mov TMP1, RB
2232 | fild TMP1
2233 |.endif
2234 | jmp >6 2179 | jmp >6
2235 |4: 2180 |4:
2236 | ja ->fff_fallback 2181 | ja ->fff_fallback
@@ -2238,7 +2183,6 @@ static void build_subroutines(BuildCtx *ctx)
2238 | jae ->fff_fallback 2183 | jae ->fff_fallback
2239 |.endif 2184 |.endif
2240 | 2185 |
2241 |.if SSE
2242 | movsd xmm0, qword [BASE] 2186 | movsd xmm0, qword [BASE]
2243 |5: // Handle numbers or integers. 2187 |5: // Handle numbers or integers.
2244 | cmp RA, RD; jae ->fff_resxmm0 2188 | cmp RA, RD; jae ->fff_resxmm0
@@ -2257,48 +2201,13 @@ static void build_subroutines(BuildCtx *ctx)
2257 | sseop xmm0, xmm1 2201 | sseop xmm0, xmm1
2258 | add RA, 1 2202 | add RA, 1
2259 | jmp <5 2203 | jmp <5
2260 |.else
2261 | fld qword [BASE]
2262 |5: // Handle numbers or integers.
2263 | cmp RA, RD; jae ->fff_resn
2264 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2265 |.if DUALNUM
2266 | jb >6
2267 | ja >9
2268 | fild dword [BASE+RA*8-8]
2269 | jmp >7
2270 |.else
2271 | jae >9
2272 |.endif
2273 |6:
2274 | fld qword [BASE+RA*8-8]
2275 |7:
2276 | fucomi st1; fcmovop st1; fpop1
2277 | add RA, 1
2278 | jmp <5
2279 |.endif
2280 |.endmacro 2204 |.endmacro
2281 | 2205 |
2282 | math_minmax math_min, cmovg, fcmovnbe, minsd 2206 | math_minmax math_min, cmovg, minsd
2283 | math_minmax math_max, cmovl, fcmovbe, maxsd 2207 | math_minmax math_max, cmovl, maxsd
2284 |.if not SSE
2285 |9:
2286 | fpop; jmp ->fff_fallback
2287 |.endif
2288 | 2208 |
2289 |//-- String library ----------------------------------------------------- 2209 |//-- String library -----------------------------------------------------
2290 | 2210 |
2291 |.ffunc_1 string_len
2292 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2293 | mov STR:RB, [BASE]
2294 |.if DUALNUM
2295 | mov RB, dword STR:RB->len; jmp ->fff_resi
2296 |.elif SSE
2297 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2298 |.else
2299 | fild dword STR:RB->len; jmp ->fff_resn
2300 |.endif
2301 |
2302 |.ffunc string_byte // Only handle the 1-arg case here. 2211 |.ffunc string_byte // Only handle the 1-arg case here.
2303 | cmp NARGS:RD, 1+1; jne ->fff_fallback 2212 | cmp NARGS:RD, 1+1; jne ->fff_fallback
2304 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2213 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2309,10 +2218,8 @@ static void build_subroutines(BuildCtx *ctx)
2309 | movzx RB, byte STR:RB[1] 2218 | movzx RB, byte STR:RB[1]
2310 |.if DUALNUM 2219 |.if DUALNUM
2311 | jmp ->fff_resi 2220 | jmp ->fff_resi
2312 |.elif SSE
2313 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2314 |.else 2221 |.else
2315 | mov TMP1, RB; fild TMP1; jmp ->fff_resn 2222 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2316 |.endif 2223 |.endif
2317 | 2224 |
2318 |.ffunc string_char // Only handle the 1-arg case here. 2225 |.ffunc string_char // Only handle the 1-arg case here.
@@ -2324,16 +2231,11 @@ static void build_subroutines(BuildCtx *ctx)
2324 | mov RB, dword [BASE] 2231 | mov RB, dword [BASE]
2325 | cmp RB, 255; ja ->fff_fallback 2232 | cmp RB, 255; ja ->fff_fallback
2326 | mov TMP2, RB 2233 | mov TMP2, RB
2327 |.elif SSE 2234 |.else
2328 | jae ->fff_fallback 2235 | jae ->fff_fallback
2329 | cvttsd2si RB, qword [BASE] 2236 | cvttsd2si RB, qword [BASE]
2330 | cmp RB, 255; ja ->fff_fallback 2237 | cmp RB, 255; ja ->fff_fallback
2331 | mov TMP2, RB 2238 | mov TMP2, RB
2332 |.else
2333 | jae ->fff_fallback
2334 | fld qword [BASE]
2335 | fistp TMP2
2336 | cmp TMP2, 255; ja ->fff_fallback
2337 |.endif 2239 |.endif
2338 |.if X64 2240 |.if X64
2339 | mov TMP3, 1 2241 | mov TMP3, 1
@@ -2354,6 +2256,7 @@ static void build_subroutines(BuildCtx *ctx)
2354 |.endif 2256 |.endif
2355 | mov SAVE_PC, PC 2257 | mov SAVE_PC, PC
2356 | call extern lj_str_new // (lua_State *L, char *str, size_t l) 2258 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
2259 |->fff_resstr:
2357 | // GCstr * returned in eax (RD). 2260 | // GCstr * returned in eax (RD).
2358 | mov BASE, L:RB->base 2261 | mov BASE, L:RB->base
2359 | mov PC, [BASE-4] 2262 | mov PC, [BASE-4]
@@ -2371,14 +2274,10 @@ static void build_subroutines(BuildCtx *ctx)
2371 | jne ->fff_fallback 2274 | jne ->fff_fallback
2372 | mov RB, dword [BASE+16] 2275 | mov RB, dword [BASE+16]
2373 | mov TMP2, RB 2276 | mov TMP2, RB
2374 |.elif SSE 2277 |.else
2375 | jae ->fff_fallback 2278 | jae ->fff_fallback
2376 | cvttsd2si RB, qword [BASE+16] 2279 | cvttsd2si RB, qword [BASE+16]
2377 | mov TMP2, RB 2280 | mov TMP2, RB
2378 |.else
2379 | jae ->fff_fallback
2380 | fld qword [BASE+16]
2381 | fistp TMP2
2382 |.endif 2281 |.endif
2383 |1: 2282 |1:
2384 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2283 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2393,12 +2292,8 @@ static void build_subroutines(BuildCtx *ctx)
2393 | mov RB, STR:RB->len 2292 | mov RB, STR:RB->len
2394 |.if DUALNUM 2293 |.if DUALNUM
2395 | mov RA, dword [BASE+8] 2294 | mov RA, dword [BASE+8]
2396 |.elif SSE
2397 | cvttsd2si RA, qword [BASE+8]
2398 |.else 2295 |.else
2399 | fld qword [BASE+8] 2296 | cvttsd2si RA, qword [BASE+8]
2400 | fistp ARG3
2401 | mov RA, ARG3
2402 |.endif 2297 |.endif
2403 | mov RC, TMP2 2298 | mov RC, TMP2
2404 | cmp RB, RC // len < end? (unsigned compare) 2299 | cmp RB, RC // len < end? (unsigned compare)
@@ -2442,123 +2337,27 @@ static void build_subroutines(BuildCtx *ctx)
2442 | xor RC, RC // Zero length. Any ptr in RB is ok. 2337 | xor RC, RC // Zero length. Any ptr in RB is ok.
2443 | jmp <4 2338 | jmp <4
2444 | 2339 |
2445 |.ffunc string_rep // Only handle the 1-char case inline. 2340 |.macro ffstring_op, name
2341 | .ffunc_1 string_ .. name
2446 | ffgccheck 2342 | ffgccheck
2447 | cmp NARGS:RD, 2+1; jne ->fff_fallback // Exactly 2 arguments.
2448 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2343 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2449 | cmp dword [BASE+12], LJ_TISNUM 2344 | mov L:RB, SAVE_L
2450 | mov STR:RB, [BASE] 2345 | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2451 |.if DUALNUM 2346 | mov L:RB->base, BASE
2452 | jne ->fff_fallback 2347 | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
2453 | mov RC, dword [BASE+8] 2348 | mov RC, SBUF:FCARG1->b
2454 |.elif SSE 2349 | mov SBUF:FCARG1->L, L:RB
2455 | jae ->fff_fallback 2350 | mov SBUF:FCARG1->p, RC
2456 | cvttsd2si RC, qword [BASE+8] 2351 | mov SAVE_PC, PC
2457 |.else 2352 | call extern lj_buf_putstr_ .. name .. @8
2458 | jae ->fff_fallback 2353 | mov FCARG1, eax
2459 | fld qword [BASE+8] 2354 | call extern lj_buf_tostr@4
2460 | fistp TMP2 2355 | jmp ->fff_resstr
2461 | mov RC, TMP2
2462 |.endif
2463 | test RC, RC
2464 | jle ->fff_emptystr // Count <= 0? (or non-int)
2465 | cmp dword STR:RB->len, 1
2466 | jb ->fff_emptystr // Zero length string?
2467 | jne ->fff_fallback_2 // Fallback for > 1-char strings.
2468 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
2469 | movzx RA, byte STR:RB[1]
2470 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2471 |.if X64
2472 | mov TMP3, RC
2473 |.else
2474 | mov ARG3, RC
2475 |.endif
2476 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2477 | mov [RB], RAL
2478 | add RB, 1
2479 | sub RC, 1
2480 | jnz <1
2481 | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2482 | jmp ->fff_newstr
2483 |
2484 |.ffunc_1 string_reverse
2485 | ffgccheck
2486 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2487 | mov STR:RB, [BASE]
2488 | mov RC, STR:RB->len
2489 | test RC, RC
2490 | jz ->fff_emptystr // Zero length string?
2491 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2492 | add RB, #STR
2493 | mov TMP2, PC // Need another temp register.
2494 |.if X64
2495 | mov TMP3, RC
2496 |.else
2497 | mov ARG3, RC
2498 |.endif
2499 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2500 |1:
2501 | movzx RA, byte [RB]
2502 | add RB, 1
2503 | sub RC, 1
2504 | mov [PC+RC], RAL
2505 | jnz <1
2506 | mov RD, PC
2507 | mov PC, TMP2
2508 | jmp ->fff_newstr
2509 |
2510 |.macro ffstring_case, name, lo, hi
2511 | .ffunc_1 name
2512 | ffgccheck
2513 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2514 | mov STR:RB, [BASE]
2515 | mov RC, STR:RB->len
2516 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2517 | add RB, #STR
2518 | mov TMP2, PC // Need another temp register.
2519 |.if X64
2520 | mov TMP3, RC
2521 |.else
2522 | mov ARG3, RC
2523 |.endif
2524 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2525 | jmp >3
2526 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
2527 | movzx RA, byte [RB+RC]
2528 | cmp RA, lo
2529 | jb >2
2530 | cmp RA, hi
2531 | ja >2
2532 | xor RA, 0x20
2533 |2:
2534 | mov [PC+RC], RAL
2535 |3:
2536 | sub RC, 1
2537 | jns <1
2538 | mov RD, PC
2539 | mov PC, TMP2
2540 | jmp ->fff_newstr
2541 |.endmacro 2356 |.endmacro
2542 | 2357 |
2543 |ffstring_case string_lower, 0x41, 0x5a 2358 |ffstring_op reverse
2544 |ffstring_case string_upper, 0x61, 0x7a 2359 |ffstring_op lower
2545 | 2360 |ffstring_op upper
2546 |//-- Table library ------------------------------------------------------
2547 |
2548 |.ffunc_1 table_getn
2549 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
2550 | mov RB, BASE // Save BASE.
2551 | mov TAB:FCARG1, [BASE]
2552 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2553 | // Length of table returned in eax (RD).
2554 | mov BASE, RB // Restore BASE.
2555 |.if DUALNUM
2556 | mov RB, RD; jmp ->fff_resi
2557 |.elif SSE
2558 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2559 |.else
2560 | mov ARG1, RD; fild ARG1; jmp ->fff_resn
2561 |.endif
2562 | 2361 |
2563 |//-- Bit library -------------------------------------------------------- 2362 |//-- Bit library --------------------------------------------------------
2564 | 2363 |
@@ -2567,11 +2366,7 @@ static void build_subroutines(BuildCtx *ctx)
2567 |.macro .ffunc_bit, name, kind 2366 |.macro .ffunc_bit, name, kind
2568 | .ffunc_1 name 2367 | .ffunc_1 name
2569 |.if kind == 2 2368 |.if kind == 2
2570 |.if SSE
2571 | sseconst_tobit xmm1, RBa 2369 | sseconst_tobit xmm1, RBa
2572 |.else
2573 | mov TMP1, TOBIT_BIAS
2574 |.endif
2575 |.endif 2370 |.endif
2576 | cmp dword [BASE+4], LJ_TISNUM 2371 | cmp dword [BASE+4], LJ_TISNUM
2577 |.if DUALNUM 2372 |.if DUALNUM
@@ -2587,37 +2382,17 @@ static void build_subroutines(BuildCtx *ctx)
2587 |.else 2382 |.else
2588 | jae ->fff_fallback 2383 | jae ->fff_fallback
2589 |.endif 2384 |.endif
2590 |.if SSE
2591 | movsd xmm0, qword [BASE] 2385 | movsd xmm0, qword [BASE]
2592 |.if kind < 2 2386 |.if kind < 2
2593 | sseconst_tobit xmm1, RBa 2387 | sseconst_tobit xmm1, RBa
2594 |.endif 2388 |.endif
2595 | addsd xmm0, xmm1 2389 | addsd xmm0, xmm1
2596 | movd RB, xmm0 2390 | movd RB, xmm0
2597 |.else
2598 | fld qword [BASE]
2599 |.if kind < 2
2600 | mov TMP1, TOBIT_BIAS
2601 |.endif
2602 | fadd TMP1
2603 | fstp FPARG1
2604 |.if kind > 0
2605 | mov RB, ARG1
2606 |.endif
2607 |.endif
2608 |2: 2391 |2:
2609 |.endmacro 2392 |.endmacro
2610 | 2393 |
2611 |.ffunc_bit bit_tobit, 0 2394 |.ffunc_bit bit_tobit, 0
2612 |.if DUALNUM or SSE
2613 |.if not SSE
2614 | mov RB, ARG1
2615 |.endif
2616 | jmp ->fff_resbit 2395 | jmp ->fff_resbit
2617 |.else
2618 | fild ARG1
2619 | jmp ->fff_resn
2620 |.endif
2621 | 2396 |
2622 |.macro .ffunc_bit_op, name, ins 2397 |.macro .ffunc_bit_op, name, ins
2623 | .ffunc_bit name, 2 2398 | .ffunc_bit name, 2
@@ -2637,17 +2412,10 @@ static void build_subroutines(BuildCtx *ctx)
2637 |.else 2412 |.else
2638 | jae ->fff_fallback_bit_op 2413 | jae ->fff_fallback_bit_op
2639 |.endif 2414 |.endif
2640 |.if SSE
2641 | movsd xmm0, qword [RD] 2415 | movsd xmm0, qword [RD]
2642 | addsd xmm0, xmm1 2416 | addsd xmm0, xmm1
2643 | movd RA, xmm0 2417 | movd RA, xmm0
2644 | ins RB, RA 2418 | ins RB, RA
2645 |.else
2646 | fld qword [RD]
2647 | fadd TMP1
2648 | fstp FPARG1
2649 | ins RB, ARG1
2650 |.endif
2651 | sub RD, 8 2419 | sub RD, 8
2652 | jmp <1 2420 | jmp <1
2653 |.endmacro 2421 |.endmacro
@@ -2664,15 +2432,10 @@ static void build_subroutines(BuildCtx *ctx)
2664 | not RB 2432 | not RB
2665 |.if DUALNUM 2433 |.if DUALNUM
2666 | jmp ->fff_resbit 2434 | jmp ->fff_resbit
2667 |.elif SSE 2435 |.else
2668 |->fff_resbit: 2436 |->fff_resbit:
2669 | cvtsi2sd xmm0, RB 2437 | cvtsi2sd xmm0, RB
2670 | jmp ->fff_resxmm0 2438 | jmp ->fff_resxmm0
2671 |.else
2672 |->fff_resbit:
2673 | mov ARG1, RB
2674 | fild ARG1
2675 | jmp ->fff_resn
2676 |.endif 2439 |.endif
2677 | 2440 |
2678 |->fff_fallback_bit_op: 2441 |->fff_fallback_bit_op:
@@ -2685,22 +2448,13 @@ static void build_subroutines(BuildCtx *ctx)
2685 | // Note: no inline conversion from number for 2nd argument! 2448 | // Note: no inline conversion from number for 2nd argument!
2686 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback 2449 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2687 | mov RA, dword [BASE+8] 2450 | mov RA, dword [BASE+8]
2688 |.elif SSE 2451 |.else
2689 | .ffunc_nnsse name 2452 | .ffunc_nnsse name
2690 | sseconst_tobit xmm2, RBa 2453 | sseconst_tobit xmm2, RBa
2691 | addsd xmm0, xmm2 2454 | addsd xmm0, xmm2
2692 | addsd xmm1, xmm2 2455 | addsd xmm1, xmm2
2693 | movd RB, xmm0 2456 | movd RB, xmm0
2694 | movd RA, xmm1 2457 | movd RA, xmm1
2695 |.else
2696 | .ffunc_nn name
2697 | mov TMP1, TOBIT_BIAS
2698 | fadd TMP1
2699 | fstp FPARG3
2700 | fadd TMP1
2701 | fstp FPARG1
2702 | mov RA, ARG3
2703 | mov RB, ARG1
2704 |.endif 2458 |.endif
2705 | ins RB, cl // Assumes RA is ecx. 2459 | ins RB, cl // Assumes RA is ecx.
2706 | jmp ->fff_resbit 2460 | jmp ->fff_resbit
@@ -2834,7 +2588,7 @@ static void build_subroutines(BuildCtx *ctx)
2834 | mov FCARG2, PC // Caveat: FCARG2 == BASE 2588 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2835 | mov FCARG1, L:RB 2589 | mov FCARG1, L:RB
2836 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. 2590 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2837 | call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc) 2591 | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc)
2838 |3: 2592 |3:
2839 | mov BASE, L:RB->base 2593 | mov BASE, L:RB->base
2840 |4: 2594 |4:
@@ -2905,6 +2659,19 @@ static void build_subroutines(BuildCtx *ctx)
2905 | add NARGS:RD, 1 2659 | add NARGS:RD, 1
2906 | jmp RBa 2660 | jmp RBa
2907 | 2661 |
2662 |->vm_profhook: // Dispatch target for profiler hook.
2663#if LJ_HASPROFILE
2664 | mov L:RB, SAVE_L
2665 | mov L:RB->base, BASE
2666 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2667 | mov FCARG1, L:RB
2668 | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc)
2669 | mov BASE, L:RB->base
2670 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2671 | sub PC, 4
2672 | jmp ->cont_nop
2673#endif
2674 |
2908 |//----------------------------------------------------------------------- 2675 |//-----------------------------------------------------------------------
2909 |//-- Trace exit handler ------------------------------------------------- 2676 |//-- Trace exit handler -------------------------------------------------
2910 |//----------------------------------------------------------------------- 2677 |//-----------------------------------------------------------------------
@@ -2957,10 +2724,9 @@ static void build_subroutines(BuildCtx *ctx)
2957 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 2724 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
2958 |.endif 2725 |.endif
2959 | // Caveat: RB is ebp. 2726 | // Caveat: RB is ebp.
2960 | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)] 2727 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2961 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] 2728 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2962 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa 2729 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2963 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
2964 | mov L:RB->base, BASE 2730 | mov L:RB->base, BASE
2965 |.if X64WIN 2731 |.if X64WIN
2966 | lea CARG2, [rsp+4*8] 2732 | lea CARG2, [rsp+4*8]
@@ -2970,6 +2736,7 @@ static void build_subroutines(BuildCtx *ctx)
2970 | lea FCARG2, [esp+16] 2736 | lea FCARG2, [esp+16]
2971 |.endif 2737 |.endif
2972 | lea FCARG1, [DISPATCH+GG_DISP2J] 2738 | lea FCARG1, [DISPATCH+GG_DISP2J]
2739 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2973 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) 2740 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
2974 | // MULTRES or negated error code returned in eax (RD). 2741 | // MULTRES or negated error code returned in eax (RD).
2975 | mov RAa, L:RB->cframe 2742 | mov RAa, L:RB->cframe
@@ -3017,11 +2784,13 @@ static void build_subroutines(BuildCtx *ctx)
3017 | mov r12, TMPQ 2784 | mov r12, TMPQ
3018 |.endif 2785 |.endif
3019 | test RD, RD; js >3 // Check for error from exit. 2786 | test RD, RD; js >3 // Check for error from exit.
2787 | mov L:RB, SAVE_L
3020 | mov MULTRES, RD 2788 | mov MULTRES, RD
3021 | mov LFUNC:KBASE, [BASE-8] 2789 | mov LFUNC:KBASE, [BASE-8]
3022 | mov KBASE, LFUNC:KBASE->pc 2790 | mov KBASE, LFUNC:KBASE->pc
3023 | mov KBASE, [KBASE+PC2PROTO(k)] 2791 | mov KBASE, [KBASE+PC2PROTO(k)]
3024 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 2792 | mov L:RB->base, BASE
2793 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
3025 | set_vmstate INTERP 2794 | set_vmstate INTERP
3026 | // Modified copy of ins_next which handles function header dispatch, too. 2795 | // Modified copy of ins_next which handles function header dispatch, too.
3027 | mov RC, [PC] 2796 | mov RC, [PC]
@@ -3051,27 +2820,9 @@ static void build_subroutines(BuildCtx *ctx)
3051 |//----------------------------------------------------------------------- 2820 |//-----------------------------------------------------------------------
3052 | 2821 |
3053 |// FP value rounding. Called by math.floor/math.ceil fast functions 2822 |// FP value rounding. Called by math.floor/math.ceil fast functions
3054 |// and from JIT code. 2823 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3055 | 2824 |.macro vm_round, name, mode
3056 |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. 2825 |->name .. _sse:
3057 |.macro vm_round_x87, mode1, mode2
3058 | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2.
3059 | mov [esp+8], eax
3060 | mov ax, mode1
3061 | or ax, [esp+4]
3062 |.if mode2 ~= 0xffff
3063 | and ax, mode2
3064 |.endif
3065 | mov [esp+6], ax
3066 | fldcw word [esp+6]
3067 | frndint
3068 | fldcw word [esp+4]
3069 | mov eax, [esp+8]
3070 | ret
3071 |.endmacro
3072 |
3073 |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3074 |.macro vm_round_sse, mode
3075 | sseconst_abs xmm2, RDa 2826 | sseconst_abs xmm2, RDa
3076 | sseconst_2p52 xmm3, RDa 2827 | sseconst_2p52 xmm3, RDa
3077 | movaps xmm1, xmm0 2828 | movaps xmm1, xmm0
@@ -3107,22 +2858,21 @@ static void build_subroutines(BuildCtx *ctx)
3107 | ret 2858 | ret
3108 |.endmacro 2859 |.endmacro
3109 | 2860 |
3110 |.macro vm_round, name, ssemode, mode1, mode2 2861 |->vm_floor:
3111 |->name: 2862 |.if not X64
3112 |.if not SSE 2863 | movsd xmm0, qword [esp+4]
3113 | vm_round_x87 mode1, mode2 2864 | call ->vm_floor_sse
2865 | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
2866 | fld qword [esp+4]
2867 | ret
3114 |.endif 2868 |.endif
3115 |->name .. _sse:
3116 | vm_round_sse ssemode
3117 |.endmacro
3118 | 2869 |
3119 | vm_round vm_floor, 0, 0x0400, 0xf7ff 2870 | vm_round vm_floor, 0
3120 | vm_round vm_ceil, 1, 0x0800, 0xfbff 2871 | vm_round vm_ceil, 1
3121 | vm_round vm_trunc, 2, 0x0c00, 0xffff 2872 | vm_round vm_trunc, 2
3122 | 2873 |
3123 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 2874 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
3124 |->vm_mod: 2875 |->vm_mod:
3125 |.if SSE
3126 |// Args in xmm0/xmm1, return value in xmm0. 2876 |// Args in xmm0/xmm1, return value in xmm0.
3127 |// Caveat: xmm0-xmm5 and RC (eax) modified! 2877 |// Caveat: xmm0-xmm5 and RC (eax) modified!
3128 | movaps xmm5, xmm0 2878 | movaps xmm5, xmm0
@@ -3150,23 +2900,6 @@ static void build_subroutines(BuildCtx *ctx)
3150 | movaps xmm0, xmm5 2900 | movaps xmm0, xmm5
3151 | subsd xmm0, xmm1 2901 | subsd xmm0, xmm1
3152 | ret 2902 | ret
3153 |.else
3154 |// Args/ret on x87 stack (y on top). No xmm registers modified.
3155 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
3156 | fld st1
3157 | fdiv st1
3158 | fnstcw word [esp+4]
3159 | mov ax, 0x0400
3160 | or ax, [esp+4]
3161 | and ax, 0xf7ff
3162 | mov [esp+6], ax
3163 | fldcw word [esp+6]
3164 | frndint
3165 | fldcw word [esp+4]
3166 | fmulp st1
3167 | fsubp st1
3168 | ret
3169 |.endif
3170 | 2903 |
3171 |// FP log2(x). Called by math.log(x, base). 2904 |// FP log2(x). Called by math.log(x, base).
3172 |->vm_log2: 2905 |->vm_log2:
@@ -3217,105 +2950,15 @@ static void build_subroutines(BuildCtx *ctx)
3217 | 2950 |
3218 |// Generic power function x^y. Called by BC_POW, math.pow fast function, 2951 |// Generic power function x^y. Called by BC_POW, math.pow fast function,
3219 |// and vm_arith. 2952 |// and vm_arith.
3220 |// Args/ret on x87 stack (y on top). RC (eax) modified.
3221 |// Caveat: needs 3 slots on x87 stack!
3222 |->vm_pow:
3223 |.if not SSE
3224 | fist dword [esp+4] // Store/reload int before comparison.
3225 | fild dword [esp+4] // Integral exponent used in vm_powi.
3226 | fucomip st1
3227 | jnz >8 // Branch for FP exponents.
3228 | jp >9 // Branch for NaN exponent.
3229 | fpop // Pop y and fallthrough to vm_powi.
3230 |
3231 |// FP/int power function x^i. Arg1/ret on x87 stack.
3232 |// Arg2 (int) on C stack. RC (eax) modified.
3233 |// Caveat: needs 2 slots on x87 stack!
3234 | mov eax, [esp+4]
3235 | cmp eax, 1; jle >6 // i<=1?
3236 | // Now 1 < (unsigned)i <= 0x80000000.
3237 |1: // Handle leading zeros.
3238 | test eax, 1; jnz >2
3239 | fmul st0
3240 | shr eax, 1
3241 | jmp <1
3242 |2:
3243 | shr eax, 1; jz >5
3244 | fdup
3245 |3: // Handle trailing bits.
3246 | fmul st0
3247 | shr eax, 1; jz >4
3248 | jnc <3
3249 | fmul st1, st0
3250 | jmp <3
3251 |4:
3252 | fmulp st1
3253 |5:
3254 | ret
3255 |6:
3256 | je <5 // x^1 ==> x
3257 | jb >7
3258 | fld1; fdivrp st1
3259 | neg eax
3260 | cmp eax, 1; je <5 // x^-1 ==> 1/x
3261 | jmp <1 // x^-i ==> (1/x)^i
3262 |7:
3263 | fpop; fld1 // x^0 ==> 1
3264 | ret
3265 |
3266 |8: // FP/FP power function x^y.
3267 | fst dword [esp+4]
3268 | fxch
3269 | fst dword [esp+8]
3270 | mov eax, [esp+4]; shl eax, 1
3271 | cmp eax, 0xff000000; je >2 // x^+-Inf?
3272 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3273 | cmp eax, 0xff000000; je >4 // +-Inf^y?
3274 | fyl2x
3275 | jmp ->vm_exp2raw
3276 |
3277 |9: // Handle x^NaN.
3278 | fld1
3279 | fucomip st2
3280 | je >1 // 1^NaN ==> 1
3281 | fxch // x^NaN ==> NaN
3282 |1:
3283 | fpop
3284 | ret
3285 |
3286 |2: // Handle x^+-Inf.
3287 | fabs
3288 | fld1
3289 | fucomip st1
3290 | je >3 // +-1^+-Inf ==> 1
3291 | fpop; fabs; fldz; mov eax, 0; setc al
3292 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
3293 | fxch
3294 |3:
3295 | fpop1; fabs
3296 | ret
3297 |
3298 |4: // Handle +-0^y or +-Inf^y.
3299 | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
3300 | fpop; fpop
3301 | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
3302 | fldz // y < 0, +-Inf^y ==> 0
3303 | ret
3304 |5:
3305 | mov dword [esp+4], 0x7f800000 // Return +Inf.
3306 | fld dword [esp+4]
3307 | ret
3308 |.endif
3309 |
3310 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. 2953 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
3311 |// Needs 16 byte scratch area for x86. Also called from JIT code. 2954 |// Needs 16 byte scratch area for x86. Also called from JIT code.
3312 |->vm_pow_sse: 2955 |->vm_pow_sse:
3313 | cvtsd2si eax, xmm1 2956 | cvttsd2si eax, xmm1
3314 | cvtsi2sd xmm2, eax 2957 | cvtsi2sd xmm2, eax
3315 | ucomisd xmm1, xmm2 2958 | ucomisd xmm1, xmm2
3316 | jnz >8 // Branch for FP exponents. 2959 | jnz >8 // Branch for FP exponents.
3317 | jp >9 // Branch for NaN exponent. 2960 | jp >9 // Branch for NaN exponent.
3318 | // Fallthrough to vm_powi_sse. 2961 | // Fallthrough.
3319 | 2962 |
3320 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. 2963 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
3321 |->vm_powi_sse: 2964 |->vm_powi_sse:
@@ -3437,8 +3080,8 @@ static void build_subroutines(BuildCtx *ctx)
3437 | .else 3080 | .else
3438 | .define fpmop, CARG1d 3081 | .define fpmop, CARG1d
3439 | .endif 3082 | .endif
3440 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil 3083 | cmp fpmop, 1; jb ->vm_floor_sse; je ->vm_ceil_sse
3441 | cmp fpmop, 3; jb ->vm_trunc; ja >2 3084 | cmp fpmop, 3; jb ->vm_trunc_sse; ja >2
3442 | sqrtsd xmm0, xmm0; ret 3085 | sqrtsd xmm0, xmm0; ret
3443 |2: 3086 |2:
3444 | .if X64WIN 3087 | .if X64WIN
@@ -3478,14 +3121,13 @@ static void build_subroutines(BuildCtx *ctx)
3478 | ret 3121 | ret
3479 |.else // x86 calling convention. 3122 |.else // x86 calling convention.
3480 | .define fpmop, eax 3123 | .define fpmop, eax
3481 |.if SSE
3482 | mov fpmop, [esp+12] 3124 | mov fpmop, [esp+12]
3483 | movsd xmm0, qword [esp+4] 3125 | movsd xmm0, qword [esp+4]
3484 | cmp fpmop, 1; je >1; ja >2 3126 | cmp fpmop, 1; je >1; ja >2
3485 | call ->vm_floor; jmp >7 3127 | call ->vm_floor_sse; jmp >7
3486 |1: ; call ->vm_ceil; jmp >7 3128 |1: ; call ->vm_ceil_sse; jmp >7
3487 |2: ; cmp fpmop, 3; je >1; ja >2 3129 |2: ; cmp fpmop, 3; je >1; ja >2
3488 | call ->vm_trunc; jmp >7 3130 | call ->vm_trunc_sse; jmp >7
3489 |1: 3131 |1:
3490 | sqrtsd xmm0, xmm0 3132 | sqrtsd xmm0, xmm0
3491 |7: 3133 |7:
@@ -3503,23 +3145,6 @@ static void build_subroutines(BuildCtx *ctx)
3503 |2: ; cmp fpmop, 11; je >1; ja >9 3145 |2: ; cmp fpmop, 11; je >1; ja >9
3504 | fcos; ret 3146 | fcos; ret
3505 |1: ; fptan; fpop; ret 3147 |1: ; fptan; fpop; ret
3506 |.else
3507 | mov fpmop, [esp+12]
3508 | fld qword [esp+4]
3509 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3510 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3511 | fsqrt; ret
3512 |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3513 | cmp fpmop, 7; je >1; ja >2
3514 | fldln2; fxch; fyl2x; ret
3515 |1: ; fld1; fxch; fyl2x; ret
3516 |2: ; cmp fpmop, 9; je >1; ja >2
3517 | fldlg2; fxch; fyl2x; ret
3518 |1: ; fsin; ret
3519 |2: ; cmp fpmop, 11; je >1; ja >9
3520 | fcos; ret
3521 |1: ; fptan; fpop; ret
3522 |.endif
3523 |.endif 3148 |.endif
3524 |9: ; int3 // Bad fpm. 3149 |9: ; int3 // Bad fpm.
3525 |.endif 3150 |.endif
@@ -3541,7 +3166,7 @@ static void build_subroutines(BuildCtx *ctx)
3541 |2: ; cmp foldop, 3; je >1; ja >2 3166 |2: ; cmp foldop, 3; je >1; ja >2
3542 | mulsd xmm0, xmm1; ret 3167 | mulsd xmm0, xmm1; ret
3543 |1: ; divsd xmm0, xmm1; ret 3168 |1: ; divsd xmm0, xmm1; ret
3544 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow 3169 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow_sse
3545 | cmp foldop, 7; je >1; ja >2 3170 | cmp foldop, 7; je >1; ja >2
3546 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret 3171 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
3547 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret 3172 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
@@ -3574,7 +3199,7 @@ static void build_subroutines(BuildCtx *ctx)
3574 |1: ; maxsd xmm0, xmm1; ret 3199 |1: ; maxsd xmm0, xmm1; ret
3575 |9: ; int3 // Bad op. 3200 |9: ; int3 // Bad op.
3576 | 3201 |
3577 |.elif SSE // x86 calling convention with SSE ops. 3202 |.else // x86 calling convention.
3578 | 3203 |
3579 | .define foldop, eax 3204 | .define foldop, eax
3580 | mov foldop, [esp+20] 3205 | mov foldop, [esp+20]
@@ -3593,7 +3218,7 @@ static void build_subroutines(BuildCtx *ctx)
3593 |2: ; cmp foldop, 5 3218 |2: ; cmp foldop, 5
3594 | je >1; ja >2 3219 | je >1; ja >2
3595 | call ->vm_mod; jmp <7 3220 | call ->vm_mod; jmp <7
3596 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. 3221 |1: ; pop edx; call ->vm_pow_sse; push edx; jmp <7 // Writes to scratch area.
3597 |2: ; cmp foldop, 7; je >1; ja >2 3222 |2: ; cmp foldop, 7; je >1; ja >2
3598 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 3223 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
3599 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 3224 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
@@ -3608,29 +3233,6 @@ static void build_subroutines(BuildCtx *ctx)
3608 |1: ; maxsd xmm0, xmm1; jmp <7 3233 |1: ; maxsd xmm0, xmm1; jmp <7
3609 |9: ; int3 // Bad op. 3234 |9: ; int3 // Bad op.
3610 | 3235 |
3611 |.else // x86 calling convention with x87 ops.
3612 |
3613 | mov eax, [esp+20]
3614 | fld qword [esp+4]
3615 | fld qword [esp+12]
3616 | cmp eax, 1; je >1; ja >2
3617 | faddp st1; ret
3618 |1: ; fsubp st1; ret
3619 |2: ; cmp eax, 3; je >1; ja >2
3620 | fmulp st1; ret
3621 |1: ; fdivp st1; ret
3622 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
3623 | cmp eax, 7; je >1; ja >2
3624 | fpop; fchs; ret
3625 |1: ; fpop; fabs; ret
3626 |2: ; cmp eax, 9; je >1; ja >2
3627 | fpatan; ret
3628 |1: ; fxch; fscale; fpop1; ret
3629 |2: ; cmp eax, 11; je >1; ja >9
3630 | fucomi st1; fcmovnbe st1; fpop1; ret
3631 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
3632 |9: ; int3 // Bad op.
3633 |
3634 |.endif 3236 |.endif
3635 | 3237 |
3636 |//----------------------------------------------------------------------- 3238 |//-----------------------------------------------------------------------
@@ -3943,19 +3545,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3943 | // RA is a number. 3545 | // RA is a number.
3944 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp 3546 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3945 | // RA is a number, RD is an integer. 3547 | // RA is a number, RD is an integer.
3946 |.if SSE
3947 | cvtsi2sd xmm0, dword [BASE+RD*8] 3548 | cvtsi2sd xmm0, dword [BASE+RD*8]
3948 | jmp >2 3549 | jmp >2
3949 |.else
3950 | fld qword [BASE+RA*8]
3951 | fild dword [BASE+RD*8]
3952 | jmp >3
3953 |.endif
3954 | 3550 |
3955 |8: // RA is an integer, RD is not an integer. 3551 |8: // RA is an integer, RD is not an integer.
3956 | ja ->vmeta_comp 3552 | ja ->vmeta_comp
3957 | // RA is an integer, RD is a number. 3553 | // RA is an integer, RD is a number.
3958 |.if SSE
3959 | cvtsi2sd xmm1, dword [BASE+RA*8] 3554 | cvtsi2sd xmm1, dword [BASE+RA*8]
3960 | movsd xmm0, qword [BASE+RD*8] 3555 | movsd xmm0, qword [BASE+RD*8]
3961 | add PC, 4 3556 | add PC, 4
@@ -3963,29 +3558,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3963 | jmp_comp jbe, ja, jb, jae, <9 3558 | jmp_comp jbe, ja, jb, jae, <9
3964 | jmp <6 3559 | jmp <6
3965 |.else 3560 |.else
3966 | fild dword [BASE+RA*8]
3967 | jmp >2
3968 |.endif
3969 |.else
3970 | checknum RA, ->vmeta_comp 3561 | checknum RA, ->vmeta_comp
3971 | checknum RD, ->vmeta_comp 3562 | checknum RD, ->vmeta_comp
3972 |.endif 3563 |.endif
3973 |.if SSE
3974 |1: 3564 |1:
3975 | movsd xmm0, qword [BASE+RD*8] 3565 | movsd xmm0, qword [BASE+RD*8]
3976 |2: 3566 |2:
3977 | add PC, 4 3567 | add PC, 4
3978 | ucomisd xmm0, qword [BASE+RA*8] 3568 | ucomisd xmm0, qword [BASE+RA*8]
3979 |3: 3569 |3:
3980 |.else
3981 |1:
3982 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
3983 |2:
3984 | fld qword [BASE+RD*8]
3985 |3:
3986 | add PC, 4
3987 | fcomparepp
3988 |.endif
3989 | // Unordered: all of ZF CF PF set, ordered: PF clear. 3570 | // Unordered: all of ZF CF PF set, ordered: PF clear.
3990 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 3571 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
3991 |.if DUALNUM 3572 |.if DUALNUM
@@ -4025,43 +3606,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4025 | // RD is a number. 3606 | // RD is a number.
4026 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 3607 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
4027 | // RD is a number, RA is an integer. 3608 | // RD is a number, RA is an integer.
4028 |.if SSE
4029 | cvtsi2sd xmm0, dword [BASE+RA*8] 3609 | cvtsi2sd xmm0, dword [BASE+RA*8]
4030 |.else
4031 | fild dword [BASE+RA*8]
4032 |.endif
4033 | jmp >2 3610 | jmp >2
4034 | 3611 |
4035 |8: // RD is an integer, RA is not an integer. 3612 |8: // RD is an integer, RA is not an integer.
4036 | ja >5 3613 | ja >5
4037 | // RD is an integer, RA is a number. 3614 | // RD is an integer, RA is a number.
4038 |.if SSE
4039 | cvtsi2sd xmm0, dword [BASE+RD*8] 3615 | cvtsi2sd xmm0, dword [BASE+RD*8]
4040 | ucomisd xmm0, qword [BASE+RA*8] 3616 | ucomisd xmm0, qword [BASE+RA*8]
4041 |.else
4042 | fild dword [BASE+RD*8]
4043 | fld qword [BASE+RA*8]
4044 |.endif
4045 | jmp >4 3617 | jmp >4
4046 | 3618 |
4047 |.else 3619 |.else
4048 | cmp RB, LJ_TISNUM; jae >5 3620 | cmp RB, LJ_TISNUM; jae >5
4049 | checknum RA, >5 3621 | checknum RA, >5
4050 |.endif 3622 |.endif
4051 |.if SSE
4052 |1: 3623 |1:
4053 | movsd xmm0, qword [BASE+RA*8] 3624 | movsd xmm0, qword [BASE+RA*8]
4054 |2: 3625 |2:
4055 | ucomisd xmm0, qword [BASE+RD*8] 3626 | ucomisd xmm0, qword [BASE+RD*8]
4056 |4: 3627 |4:
4057 |.else
4058 |1:
4059 | fld qword [BASE+RA*8]
4060 |2:
4061 | fld qword [BASE+RD*8]
4062 |4:
4063 | fcomparepp
4064 |.endif
4065 iseqne_fp: 3628 iseqne_fp:
4066 if (vk) { 3629 if (vk) {
4067 | jp >2 // Unordered means not equal. 3630 | jp >2 // Unordered means not equal.
@@ -4184,39 +3747,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4184 | // RA is a number. 3747 | // RA is a number.
4185 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 3748 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
4186 | // RA is a number, RD is an integer. 3749 | // RA is a number, RD is an integer.
4187 |.if SSE
4188 | cvtsi2sd xmm0, dword [KBASE+RD*8] 3750 | cvtsi2sd xmm0, dword [KBASE+RD*8]
4189 |.else
4190 | fild dword [KBASE+RD*8]
4191 |.endif
4192 | jmp >2 3751 | jmp >2
4193 | 3752 |
4194 |8: // RA is an integer, RD is a number. 3753 |8: // RA is an integer, RD is a number.
4195 |.if SSE
4196 | cvtsi2sd xmm0, dword [BASE+RA*8] 3754 | cvtsi2sd xmm0, dword [BASE+RA*8]
4197 | ucomisd xmm0, qword [KBASE+RD*8] 3755 | ucomisd xmm0, qword [KBASE+RD*8]
4198 |.else
4199 | fild dword [BASE+RA*8]
4200 | fld qword [KBASE+RD*8]
4201 |.endif
4202 | jmp >4 3756 | jmp >4
4203 |.else 3757 |.else
4204 | cmp RB, LJ_TISNUM; jae >3 3758 | cmp RB, LJ_TISNUM; jae >3
4205 |.endif 3759 |.endif
4206 |.if SSE
4207 |1: 3760 |1:
4208 | movsd xmm0, qword [KBASE+RD*8] 3761 | movsd xmm0, qword [KBASE+RD*8]
4209 |2: 3762 |2:
4210 | ucomisd xmm0, qword [BASE+RA*8] 3763 | ucomisd xmm0, qword [BASE+RA*8]
4211 |4: 3764 |4:
4212 |.else
4213 |1:
4214 | fld qword [KBASE+RD*8]
4215 |2:
4216 | fld qword [BASE+RA*8]
4217 |4:
4218 | fcomparepp
4219 |.endif
4220 goto iseqne_fp; 3765 goto iseqne_fp;
4221 case BC_ISEQP: case BC_ISNEP: 3766 case BC_ISEQP: case BC_ISNEP:
4222 vk = op == BC_ISEQP; 3767 vk = op == BC_ISEQP;
@@ -4267,6 +3812,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4267 | ins_next 3812 | ins_next
4268 break; 3813 break;
4269 3814
3815 case BC_ISTYPE:
3816 | ins_AD // RA = src, RD = -type
3817 | add RD, [BASE+RA*8+4]
3818 | jne ->vmeta_istype
3819 | ins_next
3820 break;
3821 case BC_ISNUM:
3822 | ins_AD // RA = src, RD = -(TISNUM-1)
3823 | checknum RA, ->vmeta_istype
3824 | ins_next
3825 break;
3826
4270 /* -- Unary ops --------------------------------------------------------- */ 3827 /* -- Unary ops --------------------------------------------------------- */
4271 3828
4272 case BC_MOV: 3829 case BC_MOV:
@@ -4310,16 +3867,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4310 |.else 3867 |.else
4311 | checknum RD, ->vmeta_unm 3868 | checknum RD, ->vmeta_unm
4312 |.endif 3869 |.endif
4313 |.if SSE
4314 | movsd xmm0, qword [BASE+RD*8] 3870 | movsd xmm0, qword [BASE+RD*8]
4315 | sseconst_sign xmm1, RDa 3871 | sseconst_sign xmm1, RDa
4316 | xorps xmm0, xmm1 3872 | xorps xmm0, xmm1
4317 | movsd qword [BASE+RA*8], xmm0 3873 | movsd qword [BASE+RA*8], xmm0
4318 |.else
4319 | fld qword [BASE+RD*8]
4320 | fchs
4321 | fstp qword [BASE+RA*8]
4322 |.endif
4323 |.if DUALNUM 3874 |.if DUALNUM
4324 | jmp <9 3875 | jmp <9
4325 |.else 3876 |.else
@@ -4335,15 +3886,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4335 |1: 3886 |1:
4336 | mov dword [BASE+RA*8+4], LJ_TISNUM 3887 | mov dword [BASE+RA*8+4], LJ_TISNUM
4337 | mov dword [BASE+RA*8], RD 3888 | mov dword [BASE+RA*8], RD
4338 |.elif SSE 3889 |.else
4339 | xorps xmm0, xmm0 3890 | xorps xmm0, xmm0
4340 | cvtsi2sd xmm0, dword STR:RD->len 3891 | cvtsi2sd xmm0, dword STR:RD->len
4341 |1: 3892 |1:
4342 | movsd qword [BASE+RA*8], xmm0 3893 | movsd qword [BASE+RA*8], xmm0
4343 |.else
4344 | fild dword STR:RD->len
4345 |1:
4346 | fstp qword [BASE+RA*8]
4347 |.endif 3894 |.endif
4348 | ins_next 3895 | ins_next
4349 |2: 3896 |2:
@@ -4361,11 +3908,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4361 | // Length of table returned in eax (RD). 3908 | // Length of table returned in eax (RD).
4362 |.if DUALNUM 3909 |.if DUALNUM
4363 | // Nothing to do. 3910 | // Nothing to do.
4364 |.elif SSE
4365 | cvtsi2sd xmm0, RD
4366 |.else 3911 |.else
4367 | mov ARG1, RD 3912 | cvtsi2sd xmm0, RD
4368 | fild ARG1
4369 |.endif 3913 |.endif
4370 | mov BASE, RB // Restore BASE. 3914 | mov BASE, RB // Restore BASE.
4371 | movzx RA, PC_RA 3915 | movzx RA, PC_RA
@@ -4380,7 +3924,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4380 3924
4381 /* -- Binary ops -------------------------------------------------------- */ 3925 /* -- Binary ops -------------------------------------------------------- */
4382 3926
4383 |.macro ins_arithpre, x87ins, sseins, ssereg 3927 |.macro ins_arithpre, sseins, ssereg
4384 | ins_ABC 3928 | ins_ABC
4385 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3929 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
4386 ||switch (vk) { 3930 ||switch (vk) {
@@ -4389,37 +3933,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4389 | .if DUALNUM 3933 | .if DUALNUM
4390 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn 3934 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
4391 | .endif 3935 | .endif
4392 | .if SSE 3936 | movsd xmm0, qword [BASE+RB*8]
4393 | movsd xmm0, qword [BASE+RB*8] 3937 | sseins ssereg, qword [KBASE+RC*8]
4394 | sseins ssereg, qword [KBASE+RC*8]
4395 | .else
4396 | fld qword [BASE+RB*8]
4397 | x87ins qword [KBASE+RC*8]
4398 | .endif
4399 || break; 3938 || break;
4400 ||case 1: 3939 ||case 1:
4401 | checknum RB, ->vmeta_arith_nv 3940 | checknum RB, ->vmeta_arith_nv
4402 | .if DUALNUM 3941 | .if DUALNUM
4403 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv 3942 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
4404 | .endif 3943 | .endif
4405 | .if SSE 3944 | movsd xmm0, qword [KBASE+RC*8]
4406 | movsd xmm0, qword [KBASE+RC*8] 3945 | sseins ssereg, qword [BASE+RB*8]
4407 | sseins ssereg, qword [BASE+RB*8]
4408 | .else
4409 | fld qword [KBASE+RC*8]
4410 | x87ins qword [BASE+RB*8]
4411 | .endif
4412 || break; 3946 || break;
4413 ||default: 3947 ||default:
4414 | checknum RB, ->vmeta_arith_vv 3948 | checknum RB, ->vmeta_arith_vv
4415 | checknum RC, ->vmeta_arith_vv 3949 | checknum RC, ->vmeta_arith_vv
4416 | .if SSE 3950 | movsd xmm0, qword [BASE+RB*8]
4417 | movsd xmm0, qword [BASE+RB*8] 3951 | sseins ssereg, qword [BASE+RC*8]
4418 | sseins ssereg, qword [BASE+RC*8]
4419 | .else
4420 | fld qword [BASE+RB*8]
4421 | x87ins qword [BASE+RC*8]
4422 | .endif
4423 || break; 3952 || break;
4424 ||} 3953 ||}
4425 |.endmacro 3954 |.endmacro
@@ -4457,54 +3986,50 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4457 |.endmacro 3986 |.endmacro
4458 | 3987 |
4459 |.macro ins_arithpost 3988 |.macro ins_arithpost
4460 |.if SSE
4461 | movsd qword [BASE+RA*8], xmm0 3989 | movsd qword [BASE+RA*8], xmm0
4462 |.else
4463 | fstp qword [BASE+RA*8]
4464 |.endif
4465 |.endmacro 3990 |.endmacro
4466 | 3991 |
4467 |.macro ins_arith, x87ins, sseins 3992 |.macro ins_arith, sseins
4468 | ins_arithpre x87ins, sseins, xmm0 3993 | ins_arithpre sseins, xmm0
4469 | ins_arithpost 3994 | ins_arithpost
4470 | ins_next 3995 | ins_next
4471 |.endmacro 3996 |.endmacro
4472 | 3997 |
4473 |.macro ins_arith, intins, x87ins, sseins 3998 |.macro ins_arith, intins, sseins
4474 |.if DUALNUM 3999 |.if DUALNUM
4475 | ins_arithdn intins 4000 | ins_arithdn intins
4476 |.else 4001 |.else
4477 | ins_arith, x87ins, sseins 4002 | ins_arith, sseins
4478 |.endif 4003 |.endif
4479 |.endmacro 4004 |.endmacro
4480 4005
4481 | // RA = dst, RB = src1 or num const, RC = src2 or num const 4006 | // RA = dst, RB = src1 or num const, RC = src2 or num const
4482 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 4007 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
4483 | ins_arith add, fadd, addsd 4008 | ins_arith add, addsd
4484 break; 4009 break;
4485 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 4010 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
4486 | ins_arith sub, fsub, subsd 4011 | ins_arith sub, subsd
4487 break; 4012 break;
4488 case BC_MULVN: case BC_MULNV: case BC_MULVV: 4013 case BC_MULVN: case BC_MULNV: case BC_MULVV:
4489 | ins_arith imul, fmul, mulsd 4014 | ins_arith imul, mulsd
4490 break; 4015 break;
4491 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 4016 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
4492 | ins_arith fdiv, divsd 4017 | ins_arith divsd
4493 break; 4018 break;
4494 case BC_MODVN: 4019 case BC_MODVN:
4495 | ins_arithpre fld, movsd, xmm1 4020 | ins_arithpre movsd, xmm1
4496 |->BC_MODVN_Z: 4021 |->BC_MODVN_Z:
4497 | call ->vm_mod 4022 | call ->vm_mod
4498 | ins_arithpost 4023 | ins_arithpost
4499 | ins_next 4024 | ins_next
4500 break; 4025 break;
4501 case BC_MODNV: case BC_MODVV: 4026 case BC_MODNV: case BC_MODVV:
4502 | ins_arithpre fld, movsd, xmm1 4027 | ins_arithpre movsd, xmm1
4503 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 4028 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
4504 break; 4029 break;
4505 case BC_POW: 4030 case BC_POW:
4506 | ins_arithpre fld, movsd, xmm1 4031 | ins_arithpre movsd, xmm1
4507 | call ->vm_pow 4032 | call ->vm_pow_sse
4508 | ins_arithpost 4033 | ins_arithpost
4509 | ins_next 4034 | ins_next
4510 break; 4035 break;
@@ -4573,25 +4098,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4573 | movsx RD, RDW 4098 | movsx RD, RDW
4574 | mov dword [BASE+RA*8+4], LJ_TISNUM 4099 | mov dword [BASE+RA*8+4], LJ_TISNUM
4575 | mov dword [BASE+RA*8], RD 4100 | mov dword [BASE+RA*8], RD
4576 |.elif SSE 4101 |.else
4577 | movsx RD, RDW // Sign-extend literal. 4102 | movsx RD, RDW // Sign-extend literal.
4578 | cvtsi2sd xmm0, RD 4103 | cvtsi2sd xmm0, RD
4579 | movsd qword [BASE+RA*8], xmm0 4104 | movsd qword [BASE+RA*8], xmm0
4580 |.else
4581 | fild PC_RD // Refetch signed RD from instruction.
4582 | fstp qword [BASE+RA*8]
4583 |.endif 4105 |.endif
4584 | ins_next 4106 | ins_next
4585 break; 4107 break;
4586 case BC_KNUM: 4108 case BC_KNUM:
4587 | ins_AD // RA = dst, RD = num const 4109 | ins_AD // RA = dst, RD = num const
4588 |.if SSE
4589 | movsd xmm0, qword [KBASE+RD*8] 4110 | movsd xmm0, qword [KBASE+RD*8]
4590 | movsd qword [BASE+RA*8], xmm0 4111 | movsd qword [BASE+RA*8], xmm0
4591 |.else
4592 | fld qword [KBASE+RD*8]
4593 | fstp qword [BASE+RA*8]
4594 |.endif
4595 | ins_next 4112 | ins_next
4596 break; 4113 break;
4597 case BC_KPRI: 4114 case BC_KPRI:
@@ -4698,18 +4215,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4698 case BC_USETN: 4215 case BC_USETN:
4699 | ins_AD // RA = upvalue #, RD = num const 4216 | ins_AD // RA = upvalue #, RD = num const
4700 | mov LFUNC:RB, [BASE-8] 4217 | mov LFUNC:RB, [BASE-8]
4701 |.if SSE
4702 | movsd xmm0, qword [KBASE+RD*8] 4218 | movsd xmm0, qword [KBASE+RD*8]
4703 |.else
4704 | fld qword [KBASE+RD*8]
4705 |.endif
4706 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4219 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4707 | mov RA, UPVAL:RB->v 4220 | mov RA, UPVAL:RB->v
4708 |.if SSE
4709 | movsd qword [RA], xmm0 4221 | movsd qword [RA], xmm0
4710 |.else
4711 | fstp qword [RA]
4712 |.endif
4713 | ins_next 4222 | ins_next
4714 break; 4223 break;
4715 case BC_USETP: 4224 case BC_USETP:
@@ -4863,18 +4372,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4863 |.else 4372 |.else
4864 | // Convert number to int and back and compare. 4373 | // Convert number to int and back and compare.
4865 | checknum RC, >5 4374 | checknum RC, >5
4866 |.if SSE
4867 | movsd xmm0, qword [BASE+RC*8] 4375 | movsd xmm0, qword [BASE+RC*8]
4868 | cvtsd2si RC, xmm0 4376 | cvttsd2si RC, xmm0
4869 | cvtsi2sd xmm1, RC 4377 | cvtsi2sd xmm1, RC
4870 | ucomisd xmm0, xmm1 4378 | ucomisd xmm0, xmm1
4871 |.else
4872 | fld qword [BASE+RC*8]
4873 | fist ARG1
4874 | fild ARG1
4875 | fcomparepp
4876 | mov RC, ARG1
4877 |.endif
4878 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. 4379 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4879 |.endif 4380 |.endif
4880 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4381 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -4998,6 +4499,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4998 | mov dword [BASE+RA*8+4], LJ_TNIL 4499 | mov dword [BASE+RA*8+4], LJ_TNIL
4999 | jmp <1 4500 | jmp <1
5000 break; 4501 break;
4502 case BC_TGETR:
4503 | ins_ABC // RA = dst, RB = table, RC = key
4504 | mov TAB:RB, [BASE+RB*8]
4505 |.if DUALNUM
4506 | mov RC, dword [BASE+RC*8]
4507 |.else
4508 | cvttsd2si RC, qword [BASE+RC*8]
4509 |.endif
4510 | cmp RC, TAB:RB->asize
4511 | jae ->vmeta_tgetr // Not in array part? Use fallback.
4512 | shl RC, 3
4513 | add RC, TAB:RB->array
4514 | // Get array slot.
4515 |->BC_TGETR_Z:
4516 |.if X64
4517 | mov RBa, [RC]
4518 | mov [BASE+RA*8], RBa
4519 |.else
4520 | mov RB, [RC]
4521 | mov RC, [RC+4]
4522 | mov [BASE+RA*8], RB
4523 | mov [BASE+RA*8+4], RC
4524 |.endif
4525 |->BC_TGETR2_Z:
4526 | ins_next
4527 break;
5001 4528
5002 case BC_TSETV: 4529 case BC_TSETV:
5003 | ins_ABC // RA = src, RB = table, RC = key 4530 | ins_ABC // RA = src, RB = table, RC = key
@@ -5011,18 +4538,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5011 |.else 4538 |.else
5012 | // Convert number to int and back and compare. 4539 | // Convert number to int and back and compare.
5013 | checknum RC, >5 4540 | checknum RC, >5
5014 |.if SSE
5015 | movsd xmm0, qword [BASE+RC*8] 4541 | movsd xmm0, qword [BASE+RC*8]
5016 | cvtsd2si RC, xmm0 4542 | cvttsd2si RC, xmm0
5017 | cvtsi2sd xmm1, RC 4543 | cvtsi2sd xmm1, RC
5018 | ucomisd xmm0, xmm1 4544 | ucomisd xmm0, xmm1
5019 |.else
5020 | fld qword [BASE+RC*8]
5021 | fist ARG1
5022 | fild ARG1
5023 | fcomparepp
5024 | mov RC, ARG1
5025 |.endif
5026 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. 4545 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
5027 |.endif 4546 |.endif
5028 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4547 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -5192,6 +4711,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5192 | movzx RA, PC_RA // Restore RA. 4711 | movzx RA, PC_RA // Restore RA.
5193 | jmp <2 4712 | jmp <2
5194 break; 4713 break;
4714 case BC_TSETR:
4715 | ins_ABC // RA = src, RB = table, RC = key
4716 | mov TAB:RB, [BASE+RB*8]
4717 |.if DUALNUM
4718 | mov RC, dword [BASE+RC*8]
4719 |.else
4720 | cvttsd2si RC, qword [BASE+RC*8]
4721 |.endif
4722 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4723 | jnz >7
4724 |2:
4725 | cmp RC, TAB:RB->asize
4726 | jae ->vmeta_tsetr
4727 | shl RC, 3
4728 | add RC, TAB:RB->array
4729 | // Set array slot.
4730 |->BC_TSETR_Z:
4731 |.if X64
4732 | mov RBa, [BASE+RA*8]
4733 | mov [RC], RBa
4734 |.else
4735 | mov RB, [BASE+RA*8+4]
4736 | mov RA, [BASE+RA*8]
4737 | mov [RC+4], RB
4738 | mov [RC], RA
4739 |.endif
4740 | ins_next
4741 |
4742 |7: // Possible table write barrier for the value. Skip valiswhite check.
4743 | barrierback TAB:RB, RA
4744 | movzx RA, PC_RA // Restore RA.
4745 | jmp <2
4746 break;
5195 4747
5196 case BC_TSETM: 4748 case BC_TSETM:
5197 | ins_AD // RA = base (table at base-1), RD = num const (start index) 4749 | ins_AD // RA = base (table at base-1), RD = num const (start index)
@@ -5386,10 +4938,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5386 |.if DUALNUM 4938 |.if DUALNUM
5387 | mov dword [BASE+RA*8+4], LJ_TISNUM 4939 | mov dword [BASE+RA*8+4], LJ_TISNUM
5388 | mov dword [BASE+RA*8], RC 4940 | mov dword [BASE+RA*8], RC
5389 |.elif SSE
5390 | cvtsi2sd xmm0, RC
5391 |.else 4941 |.else
5392 | fild dword [BASE+RA*8-8] 4942 | cvtsi2sd xmm0, RC
5393 |.endif 4943 |.endif
5394 | // Copy array slot to returned value. 4944 | // Copy array slot to returned value.
5395 |.if X64 4945 |.if X64
@@ -5405,10 +4955,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5405 | // Return array index as a numeric key. 4955 | // Return array index as a numeric key.
5406 |.if DUALNUM 4956 |.if DUALNUM
5407 | // See above. 4957 | // See above.
5408 |.elif SSE
5409 | movsd qword [BASE+RA*8], xmm0
5410 |.else 4958 |.else
5411 | fstp qword [BASE+RA*8] 4959 | movsd qword [BASE+RA*8], xmm0
5412 |.endif 4960 |.endif
5413 | mov [BASE+RA*8-8], RC // Update control var. 4961 | mov [BASE+RA*8-8], RC // Update control var.
5414 |2: 4962 |2:
@@ -5421,9 +4969,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5421 | 4969 |
5422 |4: // Skip holes in array part. 4970 |4: // Skip holes in array part.
5423 | add RC, 1 4971 | add RC, 1
5424 |.if not (DUALNUM or SSE)
5425 | mov [BASE+RA*8-8], RC
5426 |.endif
5427 | jmp <1 4972 | jmp <1
5428 | 4973 |
5429 |5: // Traverse hash part. 4974 |5: // Traverse hash part.
@@ -5757,7 +5302,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5757 if (!vk) { 5302 if (!vk) {
5758 | cmp RB, LJ_TISNUM; jae ->vmeta_for 5303 | cmp RB, LJ_TISNUM; jae ->vmeta_for
5759 } 5304 }
5760 |.if SSE
5761 | movsd xmm0, qword FOR_IDX 5305 | movsd xmm0, qword FOR_IDX
5762 | movsd xmm1, qword FOR_STOP 5306 | movsd xmm1, qword FOR_STOP
5763 if (vk) { 5307 if (vk) {
@@ -5770,22 +5314,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5770 | ucomisd xmm1, xmm0 5314 | ucomisd xmm1, xmm0
5771 |1: 5315 |1:
5772 | movsd qword FOR_EXT, xmm0 5316 | movsd qword FOR_EXT, xmm0
5773 |.else
5774 | fld qword FOR_STOP
5775 | fld qword FOR_IDX
5776 if (vk) {
5777 | fadd qword FOR_STEP // nidx = idx + step
5778 | fst qword FOR_IDX
5779 | fst qword FOR_EXT
5780 | test RB, RB; js >1
5781 } else {
5782 | fst qword FOR_EXT
5783 | jl >1
5784 }
5785 | fxch // Swap lim/(n)idx if step non-negative.
5786 |1:
5787 | fcomparepp
5788 |.endif
5789 if (op == BC_FORI) { 5317 if (op == BC_FORI) {
5790 |.if DUALNUM 5318 |.if DUALNUM
5791 | jnb <7 5319 | jnb <7
@@ -5813,11 +5341,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5813 |2: 5341 |2:
5814 | ins_next 5342 | ins_next
5815 |.endif 5343 |.endif
5816 |.if SSE 5344 |
5817 |3: // Invert comparison if step is negative. 5345 |3: // Invert comparison if step is negative.
5818 | ucomisd xmm0, xmm1 5346 | ucomisd xmm0, xmm1
5819 | jmp <1 5347 | jmp <1
5820 |.endif
5821 break; 5348 break;
5822 5349
5823 case BC_ITERL: 5350 case BC_ITERL:
@@ -5874,7 +5401,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5874 | mov RDa, TRACE:RD->mcode 5401 | mov RDa, TRACE:RD->mcode
5875 | mov L:RB, SAVE_L 5402 | mov L:RB, SAVE_L
5876 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE 5403 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
5877 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB 5404 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
5878 | // Save additional callee-save registers only used in compiled code. 5405 | // Save additional callee-save registers only used in compiled code.
5879 |.if X64WIN 5406 |.if X64WIN
5880 | mov TMPQ, r12 5407 | mov TMPQ, r12
@@ -6041,9 +5568,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
6041 | // (lua_State *L, lua_CFunction f) 5568 | // (lua_State *L, lua_CFunction f)
6042 | call aword [DISPATCH+DISPATCH_GL(wrapf)] 5569 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
6043 } 5570 }
6044 | set_vmstate INTERP
6045 | // nresults returned in eax (RD). 5571 | // nresults returned in eax (RD).
6046 | mov BASE, L:RB->base 5572 | mov BASE, L:RB->base
5573 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
5574 | set_vmstate INTERP
6047 | lea RA, [BASE+RD*8] 5575 | lea RA, [BASE+RD*8]
6048 | neg RA 5576 | neg RA
6049 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 5577 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8