aboutsummaryrefslogtreecommitdiff
path: root/src/vm_x86.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm_x86.dasc')
-rw-r--r--src/vm_x86.dasc965
1 files changed, 236 insertions, 729 deletions
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index f25dfd30..62a5e139 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -18,7 +18,6 @@
18| 18|
19|.if P64 19|.if P64
20|.define X64, 1 20|.define X64, 1
21|.define SSE, 1
22|.if WIN 21|.if WIN
23|.define X64WIN, 1 22|.define X64WIN, 1
24|.endif 23|.endif
@@ -116,6 +115,7 @@
116|.type NODE, Node 115|.type NODE, Node
117|.type NARGS, int 116|.type NARGS, int
118|.type TRACE, GCtrace 117|.type TRACE, GCtrace
118|.type SBUF, SBuf
119| 119|
120|// Stack layout while in interpreter. Must match with lj_frame.h. 120|// Stack layout while in interpreter. Must match with lj_frame.h.
121|//----------------------------------------------------------------------- 121|//-----------------------------------------------------------------------
@@ -856,13 +856,9 @@ static void build_subroutines(BuildCtx *ctx)
856 |.if DUALNUM 856 |.if DUALNUM
857 | mov TMP2, LJ_TISNUM 857 | mov TMP2, LJ_TISNUM
858 | mov TMP1, RC 858 | mov TMP1, RC
859 |.elif SSE 859 |.else
860 | cvtsi2sd xmm0, RC 860 | cvtsi2sd xmm0, RC
861 | movsd TMPQ, xmm0 861 | movsd TMPQ, xmm0
862 |.else
863 | mov ARG4, RC
864 | fild ARG4
865 | fstp TMPQ
866 |.endif 862 |.endif
867 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 863 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
868 | jmp >1 864 | jmp >1
@@ -916,6 +912,19 @@ static void build_subroutines(BuildCtx *ctx)
916 | mov NARGS:RD, 2+1 // 2 args for func(t, k). 912 | mov NARGS:RD, 2+1 // 2 args for func(t, k).
917 | jmp ->vm_call_dispatch_f 913 | jmp ->vm_call_dispatch_f
918 | 914 |
915 |->vmeta_tgetr:
916 | mov FCARG1, TAB:RB
917 | mov RB, BASE // Save BASE.
918 | mov FCARG2, RC // Caveat: FCARG2 == BASE
919 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
920 | // cTValue * or NULL returned in eax (RC).
921 | movzx RA, PC_RA
922 | mov BASE, RB // Restore BASE.
923 | test RC, RC
924 | jnz ->BC_TGETR_Z
925 | mov dword [BASE+RA*8+4], LJ_TNIL
926 | jmp ->BC_TGETR2_Z
927 |
919 |//----------------------------------------------------------------------- 928 |//-----------------------------------------------------------------------
920 | 929 |
921 |->vmeta_tsets: 930 |->vmeta_tsets:
@@ -935,13 +944,9 @@ static void build_subroutines(BuildCtx *ctx)
935 |.if DUALNUM 944 |.if DUALNUM
936 | mov TMP2, LJ_TISNUM 945 | mov TMP2, LJ_TISNUM
937 | mov TMP1, RC 946 | mov TMP1, RC
938 |.elif SSE 947 |.else
939 | cvtsi2sd xmm0, RC 948 | cvtsi2sd xmm0, RC
940 | movsd TMPQ, xmm0 949 | movsd TMPQ, xmm0
941 |.else
942 | mov ARG4, RC
943 | fild ARG4
944 | fstp TMPQ
945 |.endif 950 |.endif
946 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 951 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
947 | jmp >1 952 | jmp >1
@@ -1007,6 +1012,33 @@ static void build_subroutines(BuildCtx *ctx)
1007 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). 1012 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
1008 | jmp ->vm_call_dispatch_f 1013 | jmp ->vm_call_dispatch_f
1009 | 1014 |
1015 |->vmeta_tsetr:
1016 |.if X64WIN
1017 | mov L:CARG1d, SAVE_L
1018 | mov CARG3d, RC
1019 | mov L:CARG1d->base, BASE
1020 | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
1021 |.elif X64
1022 | mov L:CARG1d, SAVE_L
1023 | mov CARG2d, TAB:RB
1024 | mov L:CARG1d->base, BASE
1025 | mov RB, BASE // Save BASE.
1026 | mov CARG3d, RC // Caveat: CARG3d == BASE.
1027 |.else
1028 | mov L:RA, SAVE_L
1029 | mov ARG2, TAB:RB
1030 | mov RB, BASE // Save BASE.
1031 | mov ARG3, RC
1032 | mov ARG1, L:RA
1033 | mov L:RA->base, BASE
1034 |.endif
1035 | mov SAVE_PC, PC
1036 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1037 | // TValue * returned in eax (RC).
1038 | movzx RA, PC_RA
1039 | mov BASE, RB // Restore BASE.
1040 | jmp ->BC_TSETR_Z
1041 |
1010 |//-- Comparison metamethods --------------------------------------------- 1042 |//-- Comparison metamethods ---------------------------------------------
1011 | 1043 |
1012 |->vmeta_comp: 1044 |->vmeta_comp:
@@ -1101,6 +1133,26 @@ static void build_subroutines(BuildCtx *ctx)
1101 | jmp <3 1133 | jmp <3
1102 |.endif 1134 |.endif
1103 | 1135 |
1136 |->vmeta_istype:
1137 |.if X64
1138 | mov L:RB, SAVE_L
1139 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1140 | mov CARG2d, RA
1141 | movzx CARG3d, PC_RD
1142 | mov L:CARG1d, L:RB
1143 |.else
1144 | movzx RD, PC_RD
1145 | mov ARG2, RA
1146 | mov L:RB, SAVE_L
1147 | mov ARG3, RD
1148 | mov ARG1, L:RB
1149 | mov L:RB->base, BASE
1150 |.endif
1151 | mov SAVE_PC, PC
1152 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1153 | mov BASE, L:RB->base
1154 | jmp <6
1155 |
1104 |//-- Arithmetic metamethods --------------------------------------------- 1156 |//-- Arithmetic metamethods ---------------------------------------------
1105 | 1157 |
1106 |->vmeta_arith_vno: 1158 |->vmeta_arith_vno:
@@ -1509,11 +1561,7 @@ static void build_subroutines(BuildCtx *ctx)
1509 |.else 1561 |.else
1510 | jae ->fff_fallback 1562 | jae ->fff_fallback
1511 |.endif 1563 |.endif
1512 |.if SSE
1513 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 1564 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1514 |.else
1515 | fld qword [BASE]; jmp ->fff_resn
1516 |.endif
1517 | 1565 |
1518 |.ffunc_1 tostring 1566 |.ffunc_1 tostring
1519 | // Only handles the string or number case inline. 1567 | // Only handles the string or number case inline.
@@ -1538,9 +1586,9 @@ static void build_subroutines(BuildCtx *ctx)
1538 |.endif 1586 |.endif
1539 | mov L:FCARG1, L:RB 1587 | mov L:FCARG1, L:RB
1540 |.if DUALNUM 1588 |.if DUALNUM
1541 | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) 1589 | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o)
1542 |.else 1590 |.else
1543 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) 1591 | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np)
1544 |.endif 1592 |.endif
1545 | // GCstr returned in eax (RD). 1593 | // GCstr returned in eax (RD).
1546 | mov BASE, L:RB->base 1594 | mov BASE, L:RB->base
@@ -1631,19 +1679,12 @@ static void build_subroutines(BuildCtx *ctx)
1631 | add RD, 1 1679 | add RD, 1
1632 | mov dword [BASE-4], LJ_TISNUM 1680 | mov dword [BASE-4], LJ_TISNUM
1633 | mov dword [BASE-8], RD 1681 | mov dword [BASE-8], RD
1634 |.elif SSE 1682 |.else
1635 | movsd xmm0, qword [BASE+8] 1683 | movsd xmm0, qword [BASE+8]
1636 | sseconst_1 xmm1, RBa 1684 | sseconst_1 xmm1, RBa
1637 | addsd xmm0, xmm1 1685 | addsd xmm0, xmm1
1638 | cvtsd2si RD, xmm0 1686 | cvttsd2si RD, xmm0
1639 | movsd qword [BASE-8], xmm0 1687 | movsd qword [BASE-8], xmm0
1640 |.else
1641 | fld qword [BASE+8]
1642 | fld1
1643 | faddp st1
1644 | fist ARG1
1645 | fstp qword [BASE-8]
1646 | mov RD, ARG1
1647 |.endif 1688 |.endif
1648 | mov TAB:RB, [BASE] 1689 | mov TAB:RB, [BASE]
1649 | cmp RD, TAB:RB->asize; jae >2 // Not in array part? 1690 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
@@ -1690,12 +1731,9 @@ static void build_subroutines(BuildCtx *ctx)
1690 |.if DUALNUM 1731 |.if DUALNUM
1691 | mov dword [BASE+12], LJ_TISNUM 1732 | mov dword [BASE+12], LJ_TISNUM
1692 | mov dword [BASE+8], 0 1733 | mov dword [BASE+8], 0
1693 |.elif SSE 1734 |.else
1694 | xorps xmm0, xmm0 1735 | xorps xmm0, xmm0
1695 | movsd qword [BASE+8], xmm0 1736 | movsd qword [BASE+8], xmm0
1696 |.else
1697 | fldz
1698 | fstp qword [BASE+8]
1699 |.endif 1737 |.endif
1700 | mov RD, 1+3 1738 | mov RD, 1+3
1701 | jmp ->fff_res 1739 | jmp ->fff_res
@@ -1925,12 +1963,10 @@ static void build_subroutines(BuildCtx *ctx)
1925 |->fff_resi: // Dummy. 1963 |->fff_resi: // Dummy.
1926 |.endif 1964 |.endif
1927 | 1965 |
1928 |.if SSE
1929 |->fff_resn: 1966 |->fff_resn:
1930 | mov PC, [BASE-4] 1967 | mov PC, [BASE-4]
1931 | fstp qword [BASE-8] 1968 | fstp qword [BASE-8]
1932 | jmp ->fff_res1 1969 | jmp ->fff_res1
1933 |.endif
1934 | 1970 |
1935 | .ffunc_1 math_abs 1971 | .ffunc_1 math_abs
1936 |.if DUALNUM 1972 |.if DUALNUM
@@ -1954,8 +1990,6 @@ static void build_subroutines(BuildCtx *ctx)
1954 |.else 1990 |.else
1955 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1991 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1956 |.endif 1992 |.endif
1957 |
1958 |.if SSE
1959 | movsd xmm0, qword [BASE] 1993 | movsd xmm0, qword [BASE]
1960 | sseconst_abs xmm1, RDa 1994 | sseconst_abs xmm1, RDa
1961 | andps xmm0, xmm1 1995 | andps xmm0, xmm1
@@ -1963,15 +1997,6 @@ static void build_subroutines(BuildCtx *ctx)
1963 | mov PC, [BASE-4] 1997 | mov PC, [BASE-4]
1964 | movsd qword [BASE-8], xmm0 1998 | movsd qword [BASE-8], xmm0
1965 | // fallthrough 1999 | // fallthrough
1966 |.else
1967 | fld qword [BASE]
1968 | fabs
1969 | // fallthrough
1970 |->fff_resxmm0: // Dummy.
1971 |->fff_resn:
1972 | mov PC, [BASE-4]
1973 | fstp qword [BASE-8]
1974 |.endif
1975 | 2000 |
1976 |->fff_res1: 2001 |->fff_res1:
1977 | mov RD, 1+1 2002 | mov RD, 1+1
@@ -2008,48 +2033,24 @@ static void build_subroutines(BuildCtx *ctx)
2008 |.else 2033 |.else
2009 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2034 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2010 |.endif 2035 |.endif
2011 |.if SSE
2012 | movsd xmm0, qword [BASE] 2036 | movsd xmm0, qword [BASE]
2013 | call ->vm_ .. func 2037 | call ->vm_ .. func .. _sse
2014 | .if DUALNUM 2038 |.if DUALNUM
2015 | cvtsd2si RB, xmm0 2039 | cvttsd2si RB, xmm0
2016 | cmp RB, 0x80000000 2040 | cmp RB, 0x80000000
2017 | jne ->fff_resi 2041 | jne ->fff_resi
2018 | cvtsi2sd xmm1, RB 2042 | cvtsi2sd xmm1, RB
2019 | ucomisd xmm0, xmm1 2043 | ucomisd xmm0, xmm1
2020 | jp ->fff_resxmm0 2044 | jp ->fff_resxmm0
2021 | je ->fff_resi 2045 | je ->fff_resi
2022 | .endif
2023 | jmp ->fff_resxmm0
2024 |.else
2025 | fld qword [BASE]
2026 | call ->vm_ .. func
2027 | .if DUALNUM
2028 | fist ARG1
2029 | mov RB, ARG1
2030 | cmp RB, 0x80000000; jne >2
2031 | fdup
2032 | fild ARG1
2033 | fcomparepp
2034 | jp ->fff_resn
2035 | jne ->fff_resn
2036 |2:
2037 | fpop
2038 | jmp ->fff_resi
2039 | .else
2040 | jmp ->fff_resn
2041 | .endif
2042 |.endif 2046 |.endif
2047 | jmp ->fff_resxmm0
2043 |.endmacro 2048 |.endmacro
2044 | 2049 |
2045 | math_round floor 2050 | math_round floor
2046 | math_round ceil 2051 | math_round ceil
2047 | 2052 |
2048 |.if SSE
2049 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 2053 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
2050 |.else
2051 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
2052 |.endif
2053 | 2054 |
2054 |.ffunc math_log 2055 |.ffunc math_log
2055 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 2056 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
@@ -2072,42 +2073,24 @@ static void build_subroutines(BuildCtx *ctx)
2072 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn 2073 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
2073 | 2074 |
2074 |.macro math_extern, func 2075 |.macro math_extern, func
2075 |.if SSE
2076 | .ffunc_nsse math_ .. func 2076 | .ffunc_nsse math_ .. func
2077 | .if not X64 2077 |.if not X64
2078 | movsd FPARG1, xmm0 2078 | movsd FPARG1, xmm0
2079 | .endif
2080 |.else
2081 | .ffunc_n math_ .. func
2082 | fstp FPARG1
2083 |.endif 2079 |.endif
2084 | mov RB, BASE 2080 | mov RB, BASE
2085 | call extern lj_vm_ .. func 2081 | call extern lj_vm_ .. func
2086 | mov BASE, RB 2082 | mov BASE, RB
2087 | .if X64 2083 |.if X64
2088 | jmp ->fff_resxmm0 2084 | jmp ->fff_resxmm0
2089 | .else 2085 |.else
2090 | jmp ->fff_resn 2086 | jmp ->fff_resn
2091 | .endif 2087 |.endif
2092 |.endmacro 2088 |.endmacro
2093 | 2089 |
2094 | math_extern sinh 2090 | math_extern sinh
2095 | math_extern cosh 2091 | math_extern cosh
2096 | math_extern tanh 2092 | math_extern tanh
2097 | 2093 |
2098 |->ff_math_deg:
2099 |.if SSE
2100 |.ffunc_nsse math_rad
2101 | mov CFUNC:RB, [BASE-8]
2102 | mulsd xmm0, qword CFUNC:RB->upvalue[0]
2103 | jmp ->fff_resxmm0
2104 |.else
2105 |.ffunc_n math_rad
2106 | mov CFUNC:RB, [BASE-8]
2107 | fmul qword CFUNC:RB->upvalue[0]
2108 | jmp ->fff_resn
2109 |.endif
2110 |
2111 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn 2094 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
2112 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn 2095 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
2113 | 2096 |
@@ -2123,65 +2106,34 @@ static void build_subroutines(BuildCtx *ctx)
2123 | cmp RB, 0x00200000; jb >4 2106 | cmp RB, 0x00200000; jb >4
2124 |1: 2107 |1:
2125 | shr RB, 21; sub RB, RC // Extract and unbias exponent. 2108 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
2126 |.if SSE
2127 | cvtsi2sd xmm0, RB 2109 | cvtsi2sd xmm0, RB
2128 |.else
2129 | mov TMP1, RB; fild TMP1
2130 |.endif
2131 | mov RB, [BASE-4] 2110 | mov RB, [BASE-4]
2132 | and RB, 0x800fffff // Mask off exponent. 2111 | and RB, 0x800fffff // Mask off exponent.
2133 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. 2112 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
2134 | mov [BASE-4], RB 2113 | mov [BASE-4], RB
2135 |2: 2114 |2:
2136 |.if SSE
2137 | movsd qword [BASE], xmm0 2115 | movsd qword [BASE], xmm0
2138 |.else
2139 | fstp qword [BASE]
2140 |.endif
2141 | mov RD, 1+2 2116 | mov RD, 1+2
2142 | jmp ->fff_res 2117 | jmp ->fff_res
2143 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. 2118 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
2144 |.if SSE
2145 | xorps xmm0, xmm0; jmp <2 2119 | xorps xmm0, xmm0; jmp <2
2146 |.else
2147 | fldz; jmp <2
2148 |.endif
2149 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. 2120 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
2150 |.if SSE
2151 | movsd xmm0, qword [BASE] 2121 | movsd xmm0, qword [BASE]
2152 | sseconst_hi xmm1, RBa, 43500000 // 2^54. 2122 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
2153 | mulsd xmm0, xmm1 2123 | mulsd xmm0, xmm1
2154 | movsd qword [BASE-8], xmm0 2124 | movsd qword [BASE-8], xmm0
2155 |.else
2156 | fld qword [BASE]
2157 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
2158 | fstp qword [BASE-8]
2159 |.endif
2160 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 2125 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2161 | 2126 |
2162 |.if SSE
2163 |.ffunc_nsse math_modf 2127 |.ffunc_nsse math_modf
2164 |.else
2165 |.ffunc_n math_modf
2166 |.endif
2167 | mov RB, [BASE+4] 2128 | mov RB, [BASE+4]
2168 | mov PC, [BASE-4] 2129 | mov PC, [BASE-4]
2169 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? 2130 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
2170 |.if SSE
2171 | movaps xmm4, xmm0 2131 | movaps xmm4, xmm0
2172 | call ->vm_trunc 2132 | call ->vm_trunc_sse
2173 | subsd xmm4, xmm0 2133 | subsd xmm4, xmm0
2174 |1: 2134 |1:
2175 | movsd qword [BASE-8], xmm0 2135 | movsd qword [BASE-8], xmm0
2176 | movsd qword [BASE], xmm4 2136 | movsd qword [BASE], xmm4
2177 |.else
2178 | fdup
2179 | call ->vm_trunc
2180 | fsub st1, st0
2181 |1:
2182 | fstp qword [BASE-8]
2183 | fstp qword [BASE]
2184 |.endif
2185 | mov RC, [BASE-4]; mov RB, [BASE+4] 2137 | mov RC, [BASE-4]; mov RB, [BASE+4]
2186 | xor RC, RB; js >3 // Need to adjust sign? 2138 | xor RC, RB; js >3 // Need to adjust sign?
2187 |2: 2139 |2:
@@ -2191,24 +2143,16 @@ static void build_subroutines(BuildCtx *ctx)
2191 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. 2143 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
2192 | jmp <2 2144 | jmp <2
2193 |4: 2145 |4:
2194 |.if SSE
2195 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. 2146 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
2196 |.else
2197 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
2198 |.endif
2199 | 2147 |
2200 |.ffunc_nnr math_fmod 2148 |.ffunc_nnr math_fmod
2201 |1: ; fprem; fnstsw ax; sahf; jp <1 2149 |1: ; fprem; fnstsw ax; sahf; jp <1
2202 | fpop1 2150 | fpop1
2203 | jmp ->fff_resn 2151 | jmp ->fff_resn
2204 | 2152 |
2205 |.if SSE 2153 |.ffunc_nnsse math_pow; call ->vm_pow_sse; jmp ->fff_resxmm0
2206 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
2207 |.else
2208 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
2209 |.endif
2210 | 2154 |
2211 |.macro math_minmax, name, cmovop, fcmovop, sseop 2155 |.macro math_minmax, name, cmovop, sseop
2212 | .ffunc name 2156 | .ffunc name
2213 | mov RA, 2 2157 | mov RA, 2
2214 | cmp dword [BASE+4], LJ_TISNUM 2158 | cmp dword [BASE+4], LJ_TISNUM
@@ -2225,12 +2169,7 @@ static void build_subroutines(BuildCtx *ctx)
2225 |3: 2169 |3:
2226 | ja ->fff_fallback 2170 | ja ->fff_fallback
2227 | // Convert intermediate result to number and continue below. 2171 | // Convert intermediate result to number and continue below.
2228 |.if SSE
2229 | cvtsi2sd xmm0, RB 2172 | cvtsi2sd xmm0, RB
2230 |.else
2231 | mov TMP1, RB
2232 | fild TMP1
2233 |.endif
2234 | jmp >6 2173 | jmp >6
2235 |4: 2174 |4:
2236 | ja ->fff_fallback 2175 | ja ->fff_fallback
@@ -2238,7 +2177,6 @@ static void build_subroutines(BuildCtx *ctx)
2238 | jae ->fff_fallback 2177 | jae ->fff_fallback
2239 |.endif 2178 |.endif
2240 | 2179 |
2241 |.if SSE
2242 | movsd xmm0, qword [BASE] 2180 | movsd xmm0, qword [BASE]
2243 |5: // Handle numbers or integers. 2181 |5: // Handle numbers or integers.
2244 | cmp RA, RD; jae ->fff_resxmm0 2182 | cmp RA, RD; jae ->fff_resxmm0
@@ -2257,48 +2195,13 @@ static void build_subroutines(BuildCtx *ctx)
2257 | sseop xmm0, xmm1 2195 | sseop xmm0, xmm1
2258 | add RA, 1 2196 | add RA, 1
2259 | jmp <5 2197 | jmp <5
2260 |.else
2261 | fld qword [BASE]
2262 |5: // Handle numbers or integers.
2263 | cmp RA, RD; jae ->fff_resn
2264 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2265 |.if DUALNUM
2266 | jb >6
2267 | ja >9
2268 | fild dword [BASE+RA*8-8]
2269 | jmp >7
2270 |.else
2271 | jae >9
2272 |.endif
2273 |6:
2274 | fld qword [BASE+RA*8-8]
2275 |7:
2276 | fucomi st1; fcmovop st1; fpop1
2277 | add RA, 1
2278 | jmp <5
2279 |.endif
2280 |.endmacro 2198 |.endmacro
2281 | 2199 |
2282 | math_minmax math_min, cmovg, fcmovnbe, minsd 2200 | math_minmax math_min, cmovg, minsd
2283 | math_minmax math_max, cmovl, fcmovbe, maxsd 2201 | math_minmax math_max, cmovl, maxsd
2284 |.if not SSE
2285 |9:
2286 | fpop; jmp ->fff_fallback
2287 |.endif
2288 | 2202 |
2289 |//-- String library ----------------------------------------------------- 2203 |//-- String library -----------------------------------------------------
2290 | 2204 |
2291 |.ffunc_1 string_len
2292 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2293 | mov STR:RB, [BASE]
2294 |.if DUALNUM
2295 | mov RB, dword STR:RB->len; jmp ->fff_resi
2296 |.elif SSE
2297 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2298 |.else
2299 | fild dword STR:RB->len; jmp ->fff_resn
2300 |.endif
2301 |
2302 |.ffunc string_byte // Only handle the 1-arg case here. 2205 |.ffunc string_byte // Only handle the 1-arg case here.
2303 | cmp NARGS:RD, 1+1; jne ->fff_fallback 2206 | cmp NARGS:RD, 1+1; jne ->fff_fallback
2304 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2207 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2309,10 +2212,8 @@ static void build_subroutines(BuildCtx *ctx)
2309 | movzx RB, byte STR:RB[1] 2212 | movzx RB, byte STR:RB[1]
2310 |.if DUALNUM 2213 |.if DUALNUM
2311 | jmp ->fff_resi 2214 | jmp ->fff_resi
2312 |.elif SSE
2313 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2314 |.else 2215 |.else
2315 | mov TMP1, RB; fild TMP1; jmp ->fff_resn 2216 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2316 |.endif 2217 |.endif
2317 | 2218 |
2318 |.ffunc string_char // Only handle the 1-arg case here. 2219 |.ffunc string_char // Only handle the 1-arg case here.
@@ -2324,16 +2225,11 @@ static void build_subroutines(BuildCtx *ctx)
2324 | mov RB, dword [BASE] 2225 | mov RB, dword [BASE]
2325 | cmp RB, 255; ja ->fff_fallback 2226 | cmp RB, 255; ja ->fff_fallback
2326 | mov TMP2, RB 2227 | mov TMP2, RB
2327 |.elif SSE 2228 |.else
2328 | jae ->fff_fallback 2229 | jae ->fff_fallback
2329 | cvttsd2si RB, qword [BASE] 2230 | cvttsd2si RB, qword [BASE]
2330 | cmp RB, 255; ja ->fff_fallback 2231 | cmp RB, 255; ja ->fff_fallback
2331 | mov TMP2, RB 2232 | mov TMP2, RB
2332 |.else
2333 | jae ->fff_fallback
2334 | fld qword [BASE]
2335 | fistp TMP2
2336 | cmp TMP2, 255; ja ->fff_fallback
2337 |.endif 2233 |.endif
2338 |.if X64 2234 |.if X64
2339 | mov TMP3, 1 2235 | mov TMP3, 1
@@ -2354,6 +2250,7 @@ static void build_subroutines(BuildCtx *ctx)
2354 |.endif 2250 |.endif
2355 | mov SAVE_PC, PC 2251 | mov SAVE_PC, PC
2356 | call extern lj_str_new // (lua_State *L, char *str, size_t l) 2252 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
2253 |->fff_resstr:
2357 | // GCstr * returned in eax (RD). 2254 | // GCstr * returned in eax (RD).
2358 | mov BASE, L:RB->base 2255 | mov BASE, L:RB->base
2359 | mov PC, [BASE-4] 2256 | mov PC, [BASE-4]
@@ -2371,14 +2268,10 @@ static void build_subroutines(BuildCtx *ctx)
2371 | jne ->fff_fallback 2268 | jne ->fff_fallback
2372 | mov RB, dword [BASE+16] 2269 | mov RB, dword [BASE+16]
2373 | mov TMP2, RB 2270 | mov TMP2, RB
2374 |.elif SSE 2271 |.else
2375 | jae ->fff_fallback 2272 | jae ->fff_fallback
2376 | cvttsd2si RB, qword [BASE+16] 2273 | cvttsd2si RB, qword [BASE+16]
2377 | mov TMP2, RB 2274 | mov TMP2, RB
2378 |.else
2379 | jae ->fff_fallback
2380 | fld qword [BASE+16]
2381 | fistp TMP2
2382 |.endif 2275 |.endif
2383 |1: 2276 |1:
2384 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2277 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2393,12 +2286,8 @@ static void build_subroutines(BuildCtx *ctx)
2393 | mov RB, STR:RB->len 2286 | mov RB, STR:RB->len
2394 |.if DUALNUM 2287 |.if DUALNUM
2395 | mov RA, dword [BASE+8] 2288 | mov RA, dword [BASE+8]
2396 |.elif SSE
2397 | cvttsd2si RA, qword [BASE+8]
2398 |.else 2289 |.else
2399 | fld qword [BASE+8] 2290 | cvttsd2si RA, qword [BASE+8]
2400 | fistp ARG3
2401 | mov RA, ARG3
2402 |.endif 2291 |.endif
2403 | mov RC, TMP2 2292 | mov RC, TMP2
2404 | cmp RB, RC // len < end? (unsigned compare) 2293 | cmp RB, RC // len < end? (unsigned compare)
@@ -2442,123 +2331,27 @@ static void build_subroutines(BuildCtx *ctx)
2442 | xor RC, RC // Zero length. Any ptr in RB is ok. 2331 | xor RC, RC // Zero length. Any ptr in RB is ok.
2443 | jmp <4 2332 | jmp <4
2444 | 2333 |
2445 |.ffunc string_rep // Only handle the 1-char case inline. 2334 |.macro ffstring_op, name
2335 | .ffunc_1 string_ .. name
2446 | ffgccheck 2336 | ffgccheck
2447 | cmp NARGS:RD, 2+1; jne ->fff_fallback // Exactly 2 arguments.
2448 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2337 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2449 | cmp dword [BASE+12], LJ_TISNUM 2338 | mov L:RB, SAVE_L
2450 | mov STR:RB, [BASE] 2339 | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2451 |.if DUALNUM 2340 | mov L:RB->base, BASE
2452 | jne ->fff_fallback 2341 | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
2453 | mov RC, dword [BASE+8] 2342 | mov RC, SBUF:FCARG1->b
2454 |.elif SSE 2343 | mov SBUF:FCARG1->L, L:RB
2455 | jae ->fff_fallback 2344 | mov SBUF:FCARG1->p, RC
2456 | cvttsd2si RC, qword [BASE+8] 2345 | mov SAVE_PC, PC
2457 |.else 2346 | call extern lj_buf_putstr_ .. name .. @8
2458 | jae ->fff_fallback 2347 | mov FCARG1, eax
2459 | fld qword [BASE+8] 2348 | call extern lj_buf_tostr@4
2460 | fistp TMP2 2349 | jmp ->fff_resstr
2461 | mov RC, TMP2
2462 |.endif
2463 | test RC, RC
2464 | jle ->fff_emptystr // Count <= 0? (or non-int)
2465 | cmp dword STR:RB->len, 1
2466 | jb ->fff_emptystr // Zero length string?
2467 | jne ->fff_fallback_2 // Fallback for > 1-char strings.
2468 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
2469 | movzx RA, byte STR:RB[1]
2470 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2471 |.if X64
2472 | mov TMP3, RC
2473 |.else
2474 | mov ARG3, RC
2475 |.endif
2476 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2477 | mov [RB], RAL
2478 | add RB, 1
2479 | sub RC, 1
2480 | jnz <1
2481 | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2482 | jmp ->fff_newstr
2483 |
2484 |.ffunc_1 string_reverse
2485 | ffgccheck
2486 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2487 | mov STR:RB, [BASE]
2488 | mov RC, STR:RB->len
2489 | test RC, RC
2490 | jz ->fff_emptystr // Zero length string?
2491 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2492 | add RB, #STR
2493 | mov TMP2, PC // Need another temp register.
2494 |.if X64
2495 | mov TMP3, RC
2496 |.else
2497 | mov ARG3, RC
2498 |.endif
2499 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2500 |1:
2501 | movzx RA, byte [RB]
2502 | add RB, 1
2503 | sub RC, 1
2504 | mov [PC+RC], RAL
2505 | jnz <1
2506 | mov RD, PC
2507 | mov PC, TMP2
2508 | jmp ->fff_newstr
2509 |
2510 |.macro ffstring_case, name, lo, hi
2511 | .ffunc_1 name
2512 | ffgccheck
2513 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2514 | mov STR:RB, [BASE]
2515 | mov RC, STR:RB->len
2516 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2517 | add RB, #STR
2518 | mov TMP2, PC // Need another temp register.
2519 |.if X64
2520 | mov TMP3, RC
2521 |.else
2522 | mov ARG3, RC
2523 |.endif
2524 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2525 | jmp >3
2526 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
2527 | movzx RA, byte [RB+RC]
2528 | cmp RA, lo
2529 | jb >2
2530 | cmp RA, hi
2531 | ja >2
2532 | xor RA, 0x20
2533 |2:
2534 | mov [PC+RC], RAL
2535 |3:
2536 | sub RC, 1
2537 | jns <1
2538 | mov RD, PC
2539 | mov PC, TMP2
2540 | jmp ->fff_newstr
2541 |.endmacro 2350 |.endmacro
2542 | 2351 |
2543 |ffstring_case string_lower, 0x41, 0x5a 2352 |ffstring_op reverse
2544 |ffstring_case string_upper, 0x61, 0x7a 2353 |ffstring_op lower
2545 | 2354 |ffstring_op upper
2546 |//-- Table library ------------------------------------------------------
2547 |
2548 |.ffunc_1 table_getn
2549 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
2550 | mov RB, BASE // Save BASE.
2551 | mov TAB:FCARG1, [BASE]
2552 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2553 | // Length of table returned in eax (RD).
2554 | mov BASE, RB // Restore BASE.
2555 |.if DUALNUM
2556 | mov RB, RD; jmp ->fff_resi
2557 |.elif SSE
2558 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2559 |.else
2560 | mov ARG1, RD; fild ARG1; jmp ->fff_resn
2561 |.endif
2562 | 2355 |
2563 |//-- Bit library -------------------------------------------------------- 2356 |//-- Bit library --------------------------------------------------------
2564 | 2357 |
@@ -2567,11 +2360,7 @@ static void build_subroutines(BuildCtx *ctx)
2567 |.macro .ffunc_bit, name, kind 2360 |.macro .ffunc_bit, name, kind
2568 | .ffunc_1 name 2361 | .ffunc_1 name
2569 |.if kind == 2 2362 |.if kind == 2
2570 |.if SSE
2571 | sseconst_tobit xmm1, RBa 2363 | sseconst_tobit xmm1, RBa
2572 |.else
2573 | mov TMP1, TOBIT_BIAS
2574 |.endif
2575 |.endif 2364 |.endif
2576 | cmp dword [BASE+4], LJ_TISNUM 2365 | cmp dword [BASE+4], LJ_TISNUM
2577 |.if DUALNUM 2366 |.if DUALNUM
@@ -2587,37 +2376,17 @@ static void build_subroutines(BuildCtx *ctx)
2587 |.else 2376 |.else
2588 | jae ->fff_fallback 2377 | jae ->fff_fallback
2589 |.endif 2378 |.endif
2590 |.if SSE
2591 | movsd xmm0, qword [BASE] 2379 | movsd xmm0, qword [BASE]
2592 |.if kind < 2 2380 |.if kind < 2
2593 | sseconst_tobit xmm1, RBa 2381 | sseconst_tobit xmm1, RBa
2594 |.endif 2382 |.endif
2595 | addsd xmm0, xmm1 2383 | addsd xmm0, xmm1
2596 | movd RB, xmm0 2384 | movd RB, xmm0
2597 |.else
2598 | fld qword [BASE]
2599 |.if kind < 2
2600 | mov TMP1, TOBIT_BIAS
2601 |.endif
2602 | fadd TMP1
2603 | fstp FPARG1
2604 |.if kind > 0
2605 | mov RB, ARG1
2606 |.endif
2607 |.endif
2608 |2: 2385 |2:
2609 |.endmacro 2386 |.endmacro
2610 | 2387 |
2611 |.ffunc_bit bit_tobit, 0 2388 |.ffunc_bit bit_tobit, 0
2612 |.if DUALNUM or SSE
2613 |.if not SSE
2614 | mov RB, ARG1
2615 |.endif
2616 | jmp ->fff_resbit 2389 | jmp ->fff_resbit
2617 |.else
2618 | fild ARG1
2619 | jmp ->fff_resn
2620 |.endif
2621 | 2390 |
2622 |.macro .ffunc_bit_op, name, ins 2391 |.macro .ffunc_bit_op, name, ins
2623 | .ffunc_bit name, 2 2392 | .ffunc_bit name, 2
@@ -2637,17 +2406,10 @@ static void build_subroutines(BuildCtx *ctx)
2637 |.else 2406 |.else
2638 | jae ->fff_fallback_bit_op 2407 | jae ->fff_fallback_bit_op
2639 |.endif 2408 |.endif
2640 |.if SSE
2641 | movsd xmm0, qword [RD] 2409 | movsd xmm0, qword [RD]
2642 | addsd xmm0, xmm1 2410 | addsd xmm0, xmm1
2643 | movd RA, xmm0 2411 | movd RA, xmm0
2644 | ins RB, RA 2412 | ins RB, RA
2645 |.else
2646 | fld qword [RD]
2647 | fadd TMP1
2648 | fstp FPARG1
2649 | ins RB, ARG1
2650 |.endif
2651 | sub RD, 8 2413 | sub RD, 8
2652 | jmp <1 2414 | jmp <1
2653 |.endmacro 2415 |.endmacro
@@ -2664,15 +2426,10 @@ static void build_subroutines(BuildCtx *ctx)
2664 | not RB 2426 | not RB
2665 |.if DUALNUM 2427 |.if DUALNUM
2666 | jmp ->fff_resbit 2428 | jmp ->fff_resbit
2667 |.elif SSE 2429 |.else
2668 |->fff_resbit: 2430 |->fff_resbit:
2669 | cvtsi2sd xmm0, RB 2431 | cvtsi2sd xmm0, RB
2670 | jmp ->fff_resxmm0 2432 | jmp ->fff_resxmm0
2671 |.else
2672 |->fff_resbit:
2673 | mov ARG1, RB
2674 | fild ARG1
2675 | jmp ->fff_resn
2676 |.endif 2433 |.endif
2677 | 2434 |
2678 |->fff_fallback_bit_op: 2435 |->fff_fallback_bit_op:
@@ -2685,22 +2442,13 @@ static void build_subroutines(BuildCtx *ctx)
2685 | // Note: no inline conversion from number for 2nd argument! 2442 | // Note: no inline conversion from number for 2nd argument!
2686 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback 2443 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2687 | mov RA, dword [BASE+8] 2444 | mov RA, dword [BASE+8]
2688 |.elif SSE 2445 |.else
2689 | .ffunc_nnsse name 2446 | .ffunc_nnsse name
2690 | sseconst_tobit xmm2, RBa 2447 | sseconst_tobit xmm2, RBa
2691 | addsd xmm0, xmm2 2448 | addsd xmm0, xmm2
2692 | addsd xmm1, xmm2 2449 | addsd xmm1, xmm2
2693 | movd RB, xmm0 2450 | movd RB, xmm0
2694 | movd RA, xmm1 2451 | movd RA, xmm1
2695 |.else
2696 | .ffunc_nn name
2697 | mov TMP1, TOBIT_BIAS
2698 | fadd TMP1
2699 | fstp FPARG3
2700 | fadd TMP1
2701 | fstp FPARG1
2702 | mov RA, ARG3
2703 | mov RB, ARG1
2704 |.endif 2452 |.endif
2705 | ins RB, cl // Assumes RA is ecx. 2453 | ins RB, cl // Assumes RA is ecx.
2706 | jmp ->fff_resbit 2454 | jmp ->fff_resbit
@@ -3051,27 +2799,9 @@ static void build_subroutines(BuildCtx *ctx)
3051 |//----------------------------------------------------------------------- 2799 |//-----------------------------------------------------------------------
3052 | 2800 |
3053 |// FP value rounding. Called by math.floor/math.ceil fast functions 2801 |// FP value rounding. Called by math.floor/math.ceil fast functions
3054 |// and from JIT code. 2802 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3055 | 2803 |.macro vm_round, name, mode
3056 |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. 2804 |->name .. _sse:
3057 |.macro vm_round_x87, mode1, mode2
3058 | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2.
3059 | mov [esp+8], eax
3060 | mov ax, mode1
3061 | or ax, [esp+4]
3062 |.if mode2 ~= 0xffff
3063 | and ax, mode2
3064 |.endif
3065 | mov [esp+6], ax
3066 | fldcw word [esp+6]
3067 | frndint
3068 | fldcw word [esp+4]
3069 | mov eax, [esp+8]
3070 | ret
3071 |.endmacro
3072 |
3073 |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3074 |.macro vm_round_sse, mode
3075 | sseconst_abs xmm2, RDa 2805 | sseconst_abs xmm2, RDa
3076 | sseconst_2p52 xmm3, RDa 2806 | sseconst_2p52 xmm3, RDa
3077 | movaps xmm1, xmm0 2807 | movaps xmm1, xmm0
@@ -3107,22 +2837,21 @@ static void build_subroutines(BuildCtx *ctx)
3107 | ret 2837 | ret
3108 |.endmacro 2838 |.endmacro
3109 | 2839 |
3110 |.macro vm_round, name, ssemode, mode1, mode2 2840 |->vm_floor:
3111 |->name: 2841 |.if not X64
3112 |.if not SSE 2842 | movsd xmm0, qword [esp+4]
3113 | vm_round_x87 mode1, mode2 2843 | call ->vm_floor_sse
2844 | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
2845 | fld qword [esp+4]
2846 | ret
3114 |.endif 2847 |.endif
3115 |->name .. _sse:
3116 | vm_round_sse ssemode
3117 |.endmacro
3118 | 2848 |
3119 | vm_round vm_floor, 0, 0x0400, 0xf7ff 2849 | vm_round vm_floor, 0
3120 | vm_round vm_ceil, 1, 0x0800, 0xfbff 2850 | vm_round vm_ceil, 1
3121 | vm_round vm_trunc, 2, 0x0c00, 0xffff 2851 | vm_round vm_trunc, 2
3122 | 2852 |
3123 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 2853 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
3124 |->vm_mod: 2854 |->vm_mod:
3125 |.if SSE
3126 |// Args in xmm0/xmm1, return value in xmm0. 2855 |// Args in xmm0/xmm1, return value in xmm0.
3127 |// Caveat: xmm0-xmm5 and RC (eax) modified! 2856 |// Caveat: xmm0-xmm5 and RC (eax) modified!
3128 | movaps xmm5, xmm0 2857 | movaps xmm5, xmm0
@@ -3150,23 +2879,6 @@ static void build_subroutines(BuildCtx *ctx)
3150 | movaps xmm0, xmm5 2879 | movaps xmm0, xmm5
3151 | subsd xmm0, xmm1 2880 | subsd xmm0, xmm1
3152 | ret 2881 | ret
3153 |.else
3154 |// Args/ret on x87 stack (y on top). No xmm registers modified.
3155 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
3156 | fld st1
3157 | fdiv st1
3158 | fnstcw word [esp+4]
3159 | mov ax, 0x0400
3160 | or ax, [esp+4]
3161 | and ax, 0xf7ff
3162 | mov [esp+6], ax
3163 | fldcw word [esp+6]
3164 | frndint
3165 | fldcw word [esp+4]
3166 | fmulp st1
3167 | fsubp st1
3168 | ret
3169 |.endif
3170 | 2882 |
3171 |// FP log2(x). Called by math.log(x, base). 2883 |// FP log2(x). Called by math.log(x, base).
3172 |->vm_log2: 2884 |->vm_log2:
@@ -3217,105 +2929,15 @@ static void build_subroutines(BuildCtx *ctx)
3217 | 2929 |
3218 |// Generic power function x^y. Called by BC_POW, math.pow fast function, 2930 |// Generic power function x^y. Called by BC_POW, math.pow fast function,
3219 |// and vm_arith. 2931 |// and vm_arith.
3220 |// Args/ret on x87 stack (y on top). RC (eax) modified.
3221 |// Caveat: needs 3 slots on x87 stack!
3222 |->vm_pow:
3223 |.if not SSE
3224 | fist dword [esp+4] // Store/reload int before comparison.
3225 | fild dword [esp+4] // Integral exponent used in vm_powi.
3226 | fucomip st1
3227 | jnz >8 // Branch for FP exponents.
3228 | jp >9 // Branch for NaN exponent.
3229 | fpop // Pop y and fallthrough to vm_powi.
3230 |
3231 |// FP/int power function x^i. Arg1/ret on x87 stack.
3232 |// Arg2 (int) on C stack. RC (eax) modified.
3233 |// Caveat: needs 2 slots on x87 stack!
3234 | mov eax, [esp+4]
3235 | cmp eax, 1; jle >6 // i<=1?
3236 | // Now 1 < (unsigned)i <= 0x80000000.
3237 |1: // Handle leading zeros.
3238 | test eax, 1; jnz >2
3239 | fmul st0
3240 | shr eax, 1
3241 | jmp <1
3242 |2:
3243 | shr eax, 1; jz >5
3244 | fdup
3245 |3: // Handle trailing bits.
3246 | fmul st0
3247 | shr eax, 1; jz >4
3248 | jnc <3
3249 | fmul st1, st0
3250 | jmp <3
3251 |4:
3252 | fmulp st1
3253 |5:
3254 | ret
3255 |6:
3256 | je <5 // x^1 ==> x
3257 | jb >7
3258 | fld1; fdivrp st1
3259 | neg eax
3260 | cmp eax, 1; je <5 // x^-1 ==> 1/x
3261 | jmp <1 // x^-i ==> (1/x)^i
3262 |7:
3263 | fpop; fld1 // x^0 ==> 1
3264 | ret
3265 |
3266 |8: // FP/FP power function x^y.
3267 | fst dword [esp+4]
3268 | fxch
3269 | fst dword [esp+8]
3270 | mov eax, [esp+4]; shl eax, 1
3271 | cmp eax, 0xff000000; je >2 // x^+-Inf?
3272 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3273 | cmp eax, 0xff000000; je >4 // +-Inf^y?
3274 | fyl2x
3275 | jmp ->vm_exp2raw
3276 |
3277 |9: // Handle x^NaN.
3278 | fld1
3279 | fucomip st2
3280 | je >1 // 1^NaN ==> 1
3281 | fxch // x^NaN ==> NaN
3282 |1:
3283 | fpop
3284 | ret
3285 |
3286 |2: // Handle x^+-Inf.
3287 | fabs
3288 | fld1
3289 | fucomip st1
3290 | je >3 // +-1^+-Inf ==> 1
3291 | fpop; fabs; fldz; mov eax, 0; setc al
3292 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
3293 | fxch
3294 |3:
3295 | fpop1; fabs
3296 | ret
3297 |
3298 |4: // Handle +-0^y or +-Inf^y.
3299 | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
3300 | fpop; fpop
3301 | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
3302 | fldz // y < 0, +-Inf^y ==> 0
3303 | ret
3304 |5:
3305 | mov dword [esp+4], 0x7f800000 // Return +Inf.
3306 | fld dword [esp+4]
3307 | ret
3308 |.endif
3309 |
3310 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. 2932 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
3311 |// Needs 16 byte scratch area for x86. Also called from JIT code. 2933 |// Needs 16 byte scratch area for x86. Also called from JIT code.
3312 |->vm_pow_sse: 2934 |->vm_pow_sse:
3313 | cvtsd2si eax, xmm1 2935 | cvttsd2si eax, xmm1
3314 | cvtsi2sd xmm2, eax 2936 | cvtsi2sd xmm2, eax
3315 | ucomisd xmm1, xmm2 2937 | ucomisd xmm1, xmm2
3316 | jnz >8 // Branch for FP exponents. 2938 | jnz >8 // Branch for FP exponents.
3317 | jp >9 // Branch for NaN exponent. 2939 | jp >9 // Branch for NaN exponent.
3318 | // Fallthrough to vm_powi_sse. 2940 | // Fallthrough.
3319 | 2941 |
3320 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. 2942 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
3321 |->vm_powi_sse: 2943 |->vm_powi_sse:
@@ -3437,8 +3059,8 @@ static void build_subroutines(BuildCtx *ctx)
3437 | .else 3059 | .else
3438 | .define fpmop, CARG1d 3060 | .define fpmop, CARG1d
3439 | .endif 3061 | .endif
3440 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil 3062 | cmp fpmop, 1; jb ->vm_floor_sse; je ->vm_ceil_sse
3441 | cmp fpmop, 3; jb ->vm_trunc; ja >2 3063 | cmp fpmop, 3; jb ->vm_trunc_sse; ja >2
3442 | sqrtsd xmm0, xmm0; ret 3064 | sqrtsd xmm0, xmm0; ret
3443 |2: 3065 |2:
3444 | .if X64WIN 3066 | .if X64WIN
@@ -3478,14 +3100,13 @@ static void build_subroutines(BuildCtx *ctx)
3478 | ret 3100 | ret
3479 |.else // x86 calling convention. 3101 |.else // x86 calling convention.
3480 | .define fpmop, eax 3102 | .define fpmop, eax
3481 |.if SSE
3482 | mov fpmop, [esp+12] 3103 | mov fpmop, [esp+12]
3483 | movsd xmm0, qword [esp+4] 3104 | movsd xmm0, qword [esp+4]
3484 | cmp fpmop, 1; je >1; ja >2 3105 | cmp fpmop, 1; je >1; ja >2
3485 | call ->vm_floor; jmp >7 3106 | call ->vm_floor_sse; jmp >7
3486 |1: ; call ->vm_ceil; jmp >7 3107 |1: ; call ->vm_ceil_sse; jmp >7
3487 |2: ; cmp fpmop, 3; je >1; ja >2 3108 |2: ; cmp fpmop, 3; je >1; ja >2
3488 | call ->vm_trunc; jmp >7 3109 | call ->vm_trunc_sse; jmp >7
3489 |1: 3110 |1:
3490 | sqrtsd xmm0, xmm0 3111 | sqrtsd xmm0, xmm0
3491 |7: 3112 |7:
@@ -3503,23 +3124,6 @@ static void build_subroutines(BuildCtx *ctx)
3503 |2: ; cmp fpmop, 11; je >1; ja >9 3124 |2: ; cmp fpmop, 11; je >1; ja >9
3504 | fcos; ret 3125 | fcos; ret
3505 |1: ; fptan; fpop; ret 3126 |1: ; fptan; fpop; ret
3506 |.else
3507 | mov fpmop, [esp+12]
3508 | fld qword [esp+4]
3509 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3510 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3511 | fsqrt; ret
3512 |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3513 | cmp fpmop, 7; je >1; ja >2
3514 | fldln2; fxch; fyl2x; ret
3515 |1: ; fld1; fxch; fyl2x; ret
3516 |2: ; cmp fpmop, 9; je >1; ja >2
3517 | fldlg2; fxch; fyl2x; ret
3518 |1: ; fsin; ret
3519 |2: ; cmp fpmop, 11; je >1; ja >9
3520 | fcos; ret
3521 |1: ; fptan; fpop; ret
3522 |.endif
3523 |.endif 3127 |.endif
3524 |9: ; int3 // Bad fpm. 3128 |9: ; int3 // Bad fpm.
3525 |.endif 3129 |.endif
@@ -3541,7 +3145,7 @@ static void build_subroutines(BuildCtx *ctx)
3541 |2: ; cmp foldop, 3; je >1; ja >2 3145 |2: ; cmp foldop, 3; je >1; ja >2
3542 | mulsd xmm0, xmm1; ret 3146 | mulsd xmm0, xmm1; ret
3543 |1: ; divsd xmm0, xmm1; ret 3147 |1: ; divsd xmm0, xmm1; ret
3544 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow 3148 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow_sse
3545 | cmp foldop, 7; je >1; ja >2 3149 | cmp foldop, 7; je >1; ja >2
3546 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret 3150 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
3547 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret 3151 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
@@ -3574,7 +3178,7 @@ static void build_subroutines(BuildCtx *ctx)
3574 |1: ; maxsd xmm0, xmm1; ret 3178 |1: ; maxsd xmm0, xmm1; ret
3575 |9: ; int3 // Bad op. 3179 |9: ; int3 // Bad op.
3576 | 3180 |
3577 |.elif SSE // x86 calling convention with SSE ops. 3181 |.else // x86 calling convention.
3578 | 3182 |
3579 | .define foldop, eax 3183 | .define foldop, eax
3580 | mov foldop, [esp+20] 3184 | mov foldop, [esp+20]
@@ -3593,7 +3197,7 @@ static void build_subroutines(BuildCtx *ctx)
3593 |2: ; cmp foldop, 5 3197 |2: ; cmp foldop, 5
3594 | je >1; ja >2 3198 | je >1; ja >2
3595 | call ->vm_mod; jmp <7 3199 | call ->vm_mod; jmp <7
3596 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. 3200 |1: ; pop edx; call ->vm_pow_sse; push edx; jmp <7 // Writes to scratch area.
3597 |2: ; cmp foldop, 7; je >1; ja >2 3201 |2: ; cmp foldop, 7; je >1; ja >2
3598 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 3202 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
3599 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 3203 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
@@ -3608,29 +3212,6 @@ static void build_subroutines(BuildCtx *ctx)
3608 |1: ; maxsd xmm0, xmm1; jmp <7 3212 |1: ; maxsd xmm0, xmm1; jmp <7
3609 |9: ; int3 // Bad op. 3213 |9: ; int3 // Bad op.
3610 | 3214 |
3611 |.else // x86 calling convention with x87 ops.
3612 |
3613 | mov eax, [esp+20]
3614 | fld qword [esp+4]
3615 | fld qword [esp+12]
3616 | cmp eax, 1; je >1; ja >2
3617 | faddp st1; ret
3618 |1: ; fsubp st1; ret
3619 |2: ; cmp eax, 3; je >1; ja >2
3620 | fmulp st1; ret
3621 |1: ; fdivp st1; ret
3622 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
3623 | cmp eax, 7; je >1; ja >2
3624 | fpop; fchs; ret
3625 |1: ; fpop; fabs; ret
3626 |2: ; cmp eax, 9; je >1; ja >2
3627 | fpatan; ret
3628 |1: ; fxch; fscale; fpop1; ret
3629 |2: ; cmp eax, 11; je >1; ja >9
3630 | fucomi st1; fcmovnbe st1; fpop1; ret
3631 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
3632 |9: ; int3 // Bad op.
3633 |
3634 |.endif 3215 |.endif
3635 | 3216 |
3636 |//----------------------------------------------------------------------- 3217 |//-----------------------------------------------------------------------
@@ -3943,19 +3524,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3943 | // RA is a number. 3524 | // RA is a number.
3944 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp 3525 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3945 | // RA is a number, RD is an integer. 3526 | // RA is a number, RD is an integer.
3946 |.if SSE
3947 | cvtsi2sd xmm0, dword [BASE+RD*8] 3527 | cvtsi2sd xmm0, dword [BASE+RD*8]
3948 | jmp >2 3528 | jmp >2
3949 |.else
3950 | fld qword [BASE+RA*8]
3951 | fild dword [BASE+RD*8]
3952 | jmp >3
3953 |.endif
3954 | 3529 |
3955 |8: // RA is an integer, RD is not an integer. 3530 |8: // RA is an integer, RD is not an integer.
3956 | ja ->vmeta_comp 3531 | ja ->vmeta_comp
3957 | // RA is an integer, RD is a number. 3532 | // RA is an integer, RD is a number.
3958 |.if SSE
3959 | cvtsi2sd xmm1, dword [BASE+RA*8] 3533 | cvtsi2sd xmm1, dword [BASE+RA*8]
3960 | movsd xmm0, qword [BASE+RD*8] 3534 | movsd xmm0, qword [BASE+RD*8]
3961 | add PC, 4 3535 | add PC, 4
@@ -3963,29 +3537,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3963 | jmp_comp jbe, ja, jb, jae, <9 3537 | jmp_comp jbe, ja, jb, jae, <9
3964 | jmp <6 3538 | jmp <6
3965 |.else 3539 |.else
3966 | fild dword [BASE+RA*8]
3967 | jmp >2
3968 |.endif
3969 |.else
3970 | checknum RA, ->vmeta_comp 3540 | checknum RA, ->vmeta_comp
3971 | checknum RD, ->vmeta_comp 3541 | checknum RD, ->vmeta_comp
3972 |.endif 3542 |.endif
3973 |.if SSE
3974 |1: 3543 |1:
3975 | movsd xmm0, qword [BASE+RD*8] 3544 | movsd xmm0, qword [BASE+RD*8]
3976 |2: 3545 |2:
3977 | add PC, 4 3546 | add PC, 4
3978 | ucomisd xmm0, qword [BASE+RA*8] 3547 | ucomisd xmm0, qword [BASE+RA*8]
3979 |3: 3548 |3:
3980 |.else
3981 |1:
3982 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
3983 |2:
3984 | fld qword [BASE+RD*8]
3985 |3:
3986 | add PC, 4
3987 | fcomparepp
3988 |.endif
3989 | // Unordered: all of ZF CF PF set, ordered: PF clear. 3549 | // Unordered: all of ZF CF PF set, ordered: PF clear.
3990 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 3550 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
3991 |.if DUALNUM 3551 |.if DUALNUM
@@ -4025,43 +3585,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4025 | // RD is a number. 3585 | // RD is a number.
4026 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 3586 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
4027 | // RD is a number, RA is an integer. 3587 | // RD is a number, RA is an integer.
4028 |.if SSE
4029 | cvtsi2sd xmm0, dword [BASE+RA*8] 3588 | cvtsi2sd xmm0, dword [BASE+RA*8]
4030 |.else
4031 | fild dword [BASE+RA*8]
4032 |.endif
4033 | jmp >2 3589 | jmp >2
4034 | 3590 |
4035 |8: // RD is an integer, RA is not an integer. 3591 |8: // RD is an integer, RA is not an integer.
4036 | ja >5 3592 | ja >5
4037 | // RD is an integer, RA is a number. 3593 | // RD is an integer, RA is a number.
4038 |.if SSE
4039 | cvtsi2sd xmm0, dword [BASE+RD*8] 3594 | cvtsi2sd xmm0, dword [BASE+RD*8]
4040 | ucomisd xmm0, qword [BASE+RA*8] 3595 | ucomisd xmm0, qword [BASE+RA*8]
4041 |.else
4042 | fild dword [BASE+RD*8]
4043 | fld qword [BASE+RA*8]
4044 |.endif
4045 | jmp >4 3596 | jmp >4
4046 | 3597 |
4047 |.else 3598 |.else
4048 | cmp RB, LJ_TISNUM; jae >5 3599 | cmp RB, LJ_TISNUM; jae >5
4049 | checknum RA, >5 3600 | checknum RA, >5
4050 |.endif 3601 |.endif
4051 |.if SSE
4052 |1: 3602 |1:
4053 | movsd xmm0, qword [BASE+RA*8] 3603 | movsd xmm0, qword [BASE+RA*8]
4054 |2: 3604 |2:
4055 | ucomisd xmm0, qword [BASE+RD*8] 3605 | ucomisd xmm0, qword [BASE+RD*8]
4056 |4: 3606 |4:
4057 |.else
4058 |1:
4059 | fld qword [BASE+RA*8]
4060 |2:
4061 | fld qword [BASE+RD*8]
4062 |4:
4063 | fcomparepp
4064 |.endif
4065 iseqne_fp: 3607 iseqne_fp:
4066 if (vk) { 3608 if (vk) {
4067 | jp >2 // Unordered means not equal. 3609 | jp >2 // Unordered means not equal.
@@ -4184,39 +3726,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4184 | // RA is a number. 3726 | // RA is a number.
4185 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 3727 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
4186 | // RA is a number, RD is an integer. 3728 | // RA is a number, RD is an integer.
4187 |.if SSE
4188 | cvtsi2sd xmm0, dword [KBASE+RD*8] 3729 | cvtsi2sd xmm0, dword [KBASE+RD*8]
4189 |.else
4190 | fild dword [KBASE+RD*8]
4191 |.endif
4192 | jmp >2 3730 | jmp >2
4193 | 3731 |
4194 |8: // RA is an integer, RD is a number. 3732 |8: // RA is an integer, RD is a number.
4195 |.if SSE
4196 | cvtsi2sd xmm0, dword [BASE+RA*8] 3733 | cvtsi2sd xmm0, dword [BASE+RA*8]
4197 | ucomisd xmm0, qword [KBASE+RD*8] 3734 | ucomisd xmm0, qword [KBASE+RD*8]
4198 |.else
4199 | fild dword [BASE+RA*8]
4200 | fld qword [KBASE+RD*8]
4201 |.endif
4202 | jmp >4 3735 | jmp >4
4203 |.else 3736 |.else
4204 | cmp RB, LJ_TISNUM; jae >3 3737 | cmp RB, LJ_TISNUM; jae >3
4205 |.endif 3738 |.endif
4206 |.if SSE
4207 |1: 3739 |1:
4208 | movsd xmm0, qword [KBASE+RD*8] 3740 | movsd xmm0, qword [KBASE+RD*8]
4209 |2: 3741 |2:
4210 | ucomisd xmm0, qword [BASE+RA*8] 3742 | ucomisd xmm0, qword [BASE+RA*8]
4211 |4: 3743 |4:
4212 |.else
4213 |1:
4214 | fld qword [KBASE+RD*8]
4215 |2:
4216 | fld qword [BASE+RA*8]
4217 |4:
4218 | fcomparepp
4219 |.endif
4220 goto iseqne_fp; 3744 goto iseqne_fp;
4221 case BC_ISEQP: case BC_ISNEP: 3745 case BC_ISEQP: case BC_ISNEP:
4222 vk = op == BC_ISEQP; 3746 vk = op == BC_ISEQP;
@@ -4267,6 +3791,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4267 | ins_next 3791 | ins_next
4268 break; 3792 break;
4269 3793
3794 case BC_ISTYPE:
3795 | ins_AD // RA = src, RD = -type
3796 | add RD, [BASE+RA*8+4]
3797 | jne ->vmeta_istype
3798 | ins_next
3799 break;
3800 case BC_ISNUM:
3801 | ins_AD // RA = src, RD = -(TISNUM-1)
3802 | checknum RA, ->vmeta_istype
3803 | ins_next
3804 break;
3805
4270 /* -- Unary ops --------------------------------------------------------- */ 3806 /* -- Unary ops --------------------------------------------------------- */
4271 3807
4272 case BC_MOV: 3808 case BC_MOV:
@@ -4310,16 +3846,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4310 |.else 3846 |.else
4311 | checknum RD, ->vmeta_unm 3847 | checknum RD, ->vmeta_unm
4312 |.endif 3848 |.endif
4313 |.if SSE
4314 | movsd xmm0, qword [BASE+RD*8] 3849 | movsd xmm0, qword [BASE+RD*8]
4315 | sseconst_sign xmm1, RDa 3850 | sseconst_sign xmm1, RDa
4316 | xorps xmm0, xmm1 3851 | xorps xmm0, xmm1
4317 | movsd qword [BASE+RA*8], xmm0 3852 | movsd qword [BASE+RA*8], xmm0
4318 |.else
4319 | fld qword [BASE+RD*8]
4320 | fchs
4321 | fstp qword [BASE+RA*8]
4322 |.endif
4323 |.if DUALNUM 3853 |.if DUALNUM
4324 | jmp <9 3854 | jmp <9
4325 |.else 3855 |.else
@@ -4335,15 +3865,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4335 |1: 3865 |1:
4336 | mov dword [BASE+RA*8+4], LJ_TISNUM 3866 | mov dword [BASE+RA*8+4], LJ_TISNUM
4337 | mov dword [BASE+RA*8], RD 3867 | mov dword [BASE+RA*8], RD
4338 |.elif SSE 3868 |.else
4339 | xorps xmm0, xmm0 3869 | xorps xmm0, xmm0
4340 | cvtsi2sd xmm0, dword STR:RD->len 3870 | cvtsi2sd xmm0, dword STR:RD->len
4341 |1: 3871 |1:
4342 | movsd qword [BASE+RA*8], xmm0 3872 | movsd qword [BASE+RA*8], xmm0
4343 |.else
4344 | fild dword STR:RD->len
4345 |1:
4346 | fstp qword [BASE+RA*8]
4347 |.endif 3873 |.endif
4348 | ins_next 3874 | ins_next
4349 |2: 3875 |2:
@@ -4361,11 +3887,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4361 | // Length of table returned in eax (RD). 3887 | // Length of table returned in eax (RD).
4362 |.if DUALNUM 3888 |.if DUALNUM
4363 | // Nothing to do. 3889 | // Nothing to do.
4364 |.elif SSE
4365 | cvtsi2sd xmm0, RD
4366 |.else 3890 |.else
4367 | mov ARG1, RD 3891 | cvtsi2sd xmm0, RD
4368 | fild ARG1
4369 |.endif 3892 |.endif
4370 | mov BASE, RB // Restore BASE. 3893 | mov BASE, RB // Restore BASE.
4371 | movzx RA, PC_RA 3894 | movzx RA, PC_RA
@@ -4380,7 +3903,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4380 3903
4381 /* -- Binary ops -------------------------------------------------------- */ 3904 /* -- Binary ops -------------------------------------------------------- */
4382 3905
4383 |.macro ins_arithpre, x87ins, sseins, ssereg 3906 |.macro ins_arithpre, sseins, ssereg
4384 | ins_ABC 3907 | ins_ABC
4385 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3908 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
4386 ||switch (vk) { 3909 ||switch (vk) {
@@ -4389,37 +3912,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4389 | .if DUALNUM 3912 | .if DUALNUM
4390 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn 3913 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
4391 | .endif 3914 | .endif
4392 | .if SSE 3915 | movsd xmm0, qword [BASE+RB*8]
4393 | movsd xmm0, qword [BASE+RB*8] 3916 | sseins ssereg, qword [KBASE+RC*8]
4394 | sseins ssereg, qword [KBASE+RC*8]
4395 | .else
4396 | fld qword [BASE+RB*8]
4397 | x87ins qword [KBASE+RC*8]
4398 | .endif
4399 || break; 3917 || break;
4400 ||case 1: 3918 ||case 1:
4401 | checknum RB, ->vmeta_arith_nv 3919 | checknum RB, ->vmeta_arith_nv
4402 | .if DUALNUM 3920 | .if DUALNUM
4403 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv 3921 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
4404 | .endif 3922 | .endif
4405 | .if SSE 3923 | movsd xmm0, qword [KBASE+RC*8]
4406 | movsd xmm0, qword [KBASE+RC*8] 3924 | sseins ssereg, qword [BASE+RB*8]
4407 | sseins ssereg, qword [BASE+RB*8]
4408 | .else
4409 | fld qword [KBASE+RC*8]
4410 | x87ins qword [BASE+RB*8]
4411 | .endif
4412 || break; 3925 || break;
4413 ||default: 3926 ||default:
4414 | checknum RB, ->vmeta_arith_vv 3927 | checknum RB, ->vmeta_arith_vv
4415 | checknum RC, ->vmeta_arith_vv 3928 | checknum RC, ->vmeta_arith_vv
4416 | .if SSE 3929 | movsd xmm0, qword [BASE+RB*8]
4417 | movsd xmm0, qword [BASE+RB*8] 3930 | sseins ssereg, qword [BASE+RC*8]
4418 | sseins ssereg, qword [BASE+RC*8]
4419 | .else
4420 | fld qword [BASE+RB*8]
4421 | x87ins qword [BASE+RC*8]
4422 | .endif
4423 || break; 3931 || break;
4424 ||} 3932 ||}
4425 |.endmacro 3933 |.endmacro
@@ -4457,54 +3965,50 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4457 |.endmacro 3965 |.endmacro
4458 | 3966 |
4459 |.macro ins_arithpost 3967 |.macro ins_arithpost
4460 |.if SSE
4461 | movsd qword [BASE+RA*8], xmm0 3968 | movsd qword [BASE+RA*8], xmm0
4462 |.else
4463 | fstp qword [BASE+RA*8]
4464 |.endif
4465 |.endmacro 3969 |.endmacro
4466 | 3970 |
4467 |.macro ins_arith, x87ins, sseins 3971 |.macro ins_arith, sseins
4468 | ins_arithpre x87ins, sseins, xmm0 3972 | ins_arithpre sseins, xmm0
4469 | ins_arithpost 3973 | ins_arithpost
4470 | ins_next 3974 | ins_next
4471 |.endmacro 3975 |.endmacro
4472 | 3976 |
4473 |.macro ins_arith, intins, x87ins, sseins 3977 |.macro ins_arith, intins, sseins
4474 |.if DUALNUM 3978 |.if DUALNUM
4475 | ins_arithdn intins 3979 | ins_arithdn intins
4476 |.else 3980 |.else
4477 | ins_arith, x87ins, sseins 3981 | ins_arith, sseins
4478 |.endif 3982 |.endif
4479 |.endmacro 3983 |.endmacro
4480 3984
4481 | // RA = dst, RB = src1 or num const, RC = src2 or num const 3985 | // RA = dst, RB = src1 or num const, RC = src2 or num const
4482 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3986 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
4483 | ins_arith add, fadd, addsd 3987 | ins_arith add, addsd
4484 break; 3988 break;
4485 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3989 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
4486 | ins_arith sub, fsub, subsd 3990 | ins_arith sub, subsd
4487 break; 3991 break;
4488 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3992 case BC_MULVN: case BC_MULNV: case BC_MULVV:
4489 | ins_arith imul, fmul, mulsd 3993 | ins_arith imul, mulsd
4490 break; 3994 break;
4491 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3995 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
4492 | ins_arith fdiv, divsd 3996 | ins_arith divsd
4493 break; 3997 break;
4494 case BC_MODVN: 3998 case BC_MODVN:
4495 | ins_arithpre fld, movsd, xmm1 3999 | ins_arithpre movsd, xmm1
4496 |->BC_MODVN_Z: 4000 |->BC_MODVN_Z:
4497 | call ->vm_mod 4001 | call ->vm_mod
4498 | ins_arithpost 4002 | ins_arithpost
4499 | ins_next 4003 | ins_next
4500 break; 4004 break;
4501 case BC_MODNV: case BC_MODVV: 4005 case BC_MODNV: case BC_MODVV:
4502 | ins_arithpre fld, movsd, xmm1 4006 | ins_arithpre movsd, xmm1
4503 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 4007 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
4504 break; 4008 break;
4505 case BC_POW: 4009 case BC_POW:
4506 | ins_arithpre fld, movsd, xmm1 4010 | ins_arithpre movsd, xmm1
4507 | call ->vm_pow 4011 | call ->vm_pow_sse
4508 | ins_arithpost 4012 | ins_arithpost
4509 | ins_next 4013 | ins_next
4510 break; 4014 break;
@@ -4573,25 +4077,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4573 | movsx RD, RDW 4077 | movsx RD, RDW
4574 | mov dword [BASE+RA*8+4], LJ_TISNUM 4078 | mov dword [BASE+RA*8+4], LJ_TISNUM
4575 | mov dword [BASE+RA*8], RD 4079 | mov dword [BASE+RA*8], RD
4576 |.elif SSE 4080 |.else
4577 | movsx RD, RDW // Sign-extend literal. 4081 | movsx RD, RDW // Sign-extend literal.
4578 | cvtsi2sd xmm0, RD 4082 | cvtsi2sd xmm0, RD
4579 | movsd qword [BASE+RA*8], xmm0 4083 | movsd qword [BASE+RA*8], xmm0
4580 |.else
4581 | fild PC_RD // Refetch signed RD from instruction.
4582 | fstp qword [BASE+RA*8]
4583 |.endif 4084 |.endif
4584 | ins_next 4085 | ins_next
4585 break; 4086 break;
4586 case BC_KNUM: 4087 case BC_KNUM:
4587 | ins_AD // RA = dst, RD = num const 4088 | ins_AD // RA = dst, RD = num const
4588 |.if SSE
4589 | movsd xmm0, qword [KBASE+RD*8] 4089 | movsd xmm0, qword [KBASE+RD*8]
4590 | movsd qword [BASE+RA*8], xmm0 4090 | movsd qword [BASE+RA*8], xmm0
4591 |.else
4592 | fld qword [KBASE+RD*8]
4593 | fstp qword [BASE+RA*8]
4594 |.endif
4595 | ins_next 4091 | ins_next
4596 break; 4092 break;
4597 case BC_KPRI: 4093 case BC_KPRI:
@@ -4698,18 +4194,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4698 case BC_USETN: 4194 case BC_USETN:
4699 | ins_AD // RA = upvalue #, RD = num const 4195 | ins_AD // RA = upvalue #, RD = num const
4700 | mov LFUNC:RB, [BASE-8] 4196 | mov LFUNC:RB, [BASE-8]
4701 |.if SSE
4702 | movsd xmm0, qword [KBASE+RD*8] 4197 | movsd xmm0, qword [KBASE+RD*8]
4703 |.else
4704 | fld qword [KBASE+RD*8]
4705 |.endif
4706 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4198 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4707 | mov RA, UPVAL:RB->v 4199 | mov RA, UPVAL:RB->v
4708 |.if SSE
4709 | movsd qword [RA], xmm0 4200 | movsd qword [RA], xmm0
4710 |.else
4711 | fstp qword [RA]
4712 |.endif
4713 | ins_next 4201 | ins_next
4714 break; 4202 break;
4715 case BC_USETP: 4203 case BC_USETP:
@@ -4863,18 +4351,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4863 |.else 4351 |.else
4864 | // Convert number to int and back and compare. 4352 | // Convert number to int and back and compare.
4865 | checknum RC, >5 4353 | checknum RC, >5
4866 |.if SSE
4867 | movsd xmm0, qword [BASE+RC*8] 4354 | movsd xmm0, qword [BASE+RC*8]
4868 | cvtsd2si RC, xmm0 4355 | cvttsd2si RC, xmm0
4869 | cvtsi2sd xmm1, RC 4356 | cvtsi2sd xmm1, RC
4870 | ucomisd xmm0, xmm1 4357 | ucomisd xmm0, xmm1
4871 |.else
4872 | fld qword [BASE+RC*8]
4873 | fist ARG1
4874 | fild ARG1
4875 | fcomparepp
4876 | mov RC, ARG1
4877 |.endif
4878 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. 4358 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4879 |.endif 4359 |.endif
4880 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4360 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -4998,6 +4478,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4998 | mov dword [BASE+RA*8+4], LJ_TNIL 4478 | mov dword [BASE+RA*8+4], LJ_TNIL
4999 | jmp <1 4479 | jmp <1
5000 break; 4480 break;
4481 case BC_TGETR:
4482 | ins_ABC // RA = dst, RB = table, RC = key
4483 | mov TAB:RB, [BASE+RB*8]
4484 |.if DUALNUM
4485 | mov RC, dword [BASE+RC*8]
4486 |.else
4487 | cvttsd2si RC, qword [BASE+RC*8]
4488 |.endif
4489 | cmp RC, TAB:RB->asize
4490 | jae ->vmeta_tgetr // Not in array part? Use fallback.
4491 | shl RC, 3
4492 | add RC, TAB:RB->array
4493 | // Get array slot.
4494 |->BC_TGETR_Z:
4495 |.if X64
4496 | mov RBa, [RC]
4497 | mov [BASE+RA*8], RBa
4498 |.else
4499 | mov RB, [RC]
4500 | mov RC, [RC+4]
4501 | mov [BASE+RA*8], RB
4502 | mov [BASE+RA*8+4], RC
4503 |.endif
4504 |->BC_TGETR2_Z:
4505 | ins_next
4506 break;
5001 4507
5002 case BC_TSETV: 4508 case BC_TSETV:
5003 | ins_ABC // RA = src, RB = table, RC = key 4509 | ins_ABC // RA = src, RB = table, RC = key
@@ -5011,18 +4517,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5011 |.else 4517 |.else
5012 | // Convert number to int and back and compare. 4518 | // Convert number to int and back and compare.
5013 | checknum RC, >5 4519 | checknum RC, >5
5014 |.if SSE
5015 | movsd xmm0, qword [BASE+RC*8] 4520 | movsd xmm0, qword [BASE+RC*8]
5016 | cvtsd2si RC, xmm0 4521 | cvttsd2si RC, xmm0
5017 | cvtsi2sd xmm1, RC 4522 | cvtsi2sd xmm1, RC
5018 | ucomisd xmm0, xmm1 4523 | ucomisd xmm0, xmm1
5019 |.else
5020 | fld qword [BASE+RC*8]
5021 | fist ARG1
5022 | fild ARG1
5023 | fcomparepp
5024 | mov RC, ARG1
5025 |.endif
5026 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. 4524 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
5027 |.endif 4525 |.endif
5028 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4526 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -5192,6 +4690,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5192 | movzx RA, PC_RA // Restore RA. 4690 | movzx RA, PC_RA // Restore RA.
5193 | jmp <2 4691 | jmp <2
5194 break; 4692 break;
4693 case BC_TSETR:
4694 | ins_ABC // RA = src, RB = table, RC = key
4695 | mov TAB:RB, [BASE+RB*8]
4696 |.if DUALNUM
4697 | mov RC, dword [BASE+RC*8]
4698 |.else
4699 | cvttsd2si RC, qword [BASE+RC*8]
4700 |.endif
4701 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4702 | jnz >7
4703 |2:
4704 | cmp RC, TAB:RB->asize
4705 | jae ->vmeta_tsetr
4706 | shl RC, 3
4707 | add RC, TAB:RB->array
4708 | // Set array slot.
4709 |->BC_TSETR_Z:
4710 |.if X64
4711 | mov RBa, [BASE+RA*8]
4712 | mov [RC], RBa
4713 |.else
4714 | mov RB, [BASE+RA*8+4]
4715 | mov RA, [BASE+RA*8]
4716 | mov [RC+4], RB
4717 | mov [RC], RA
4718 |.endif
4719 | ins_next
4720 |
4721 |7: // Possible table write barrier for the value. Skip valiswhite check.
4722 | barrierback TAB:RB, RA
4723 | movzx RA, PC_RA // Restore RA.
4724 | jmp <2
4725 break;
5195 4726
5196 case BC_TSETM: 4727 case BC_TSETM:
5197 | ins_AD // RA = base (table at base-1), RD = num const (start index) 4728 | ins_AD // RA = base (table at base-1), RD = num const (start index)
@@ -5386,10 +4917,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5386 |.if DUALNUM 4917 |.if DUALNUM
5387 | mov dword [BASE+RA*8+4], LJ_TISNUM 4918 | mov dword [BASE+RA*8+4], LJ_TISNUM
5388 | mov dword [BASE+RA*8], RC 4919 | mov dword [BASE+RA*8], RC
5389 |.elif SSE
5390 | cvtsi2sd xmm0, RC
5391 |.else 4920 |.else
5392 | fild dword [BASE+RA*8-8] 4921 | cvtsi2sd xmm0, RC
5393 |.endif 4922 |.endif
5394 | // Copy array slot to returned value. 4923 | // Copy array slot to returned value.
5395 |.if X64 4924 |.if X64
@@ -5405,10 +4934,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5405 | // Return array index as a numeric key. 4934 | // Return array index as a numeric key.
5406 |.if DUALNUM 4935 |.if DUALNUM
5407 | // See above. 4936 | // See above.
5408 |.elif SSE
5409 | movsd qword [BASE+RA*8], xmm0
5410 |.else 4937 |.else
5411 | fstp qword [BASE+RA*8] 4938 | movsd qword [BASE+RA*8], xmm0
5412 |.endif 4939 |.endif
5413 | mov [BASE+RA*8-8], RC // Update control var. 4940 | mov [BASE+RA*8-8], RC // Update control var.
5414 |2: 4941 |2:
@@ -5421,9 +4948,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5421 | 4948 |
5422 |4: // Skip holes in array part. 4949 |4: // Skip holes in array part.
5423 | add RC, 1 4950 | add RC, 1
5424 |.if not (DUALNUM or SSE)
5425 | mov [BASE+RA*8-8], RC
5426 |.endif
5427 | jmp <1 4951 | jmp <1
5428 | 4952 |
5429 |5: // Traverse hash part. 4953 |5: // Traverse hash part.
@@ -5757,7 +5281,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5757 if (!vk) { 5281 if (!vk) {
5758 | cmp RB, LJ_TISNUM; jae ->vmeta_for 5282 | cmp RB, LJ_TISNUM; jae ->vmeta_for
5759 } 5283 }
5760 |.if SSE
5761 | movsd xmm0, qword FOR_IDX 5284 | movsd xmm0, qword FOR_IDX
5762 | movsd xmm1, qword FOR_STOP 5285 | movsd xmm1, qword FOR_STOP
5763 if (vk) { 5286 if (vk) {
@@ -5770,22 +5293,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5770 | ucomisd xmm1, xmm0 5293 | ucomisd xmm1, xmm0
5771 |1: 5294 |1:
5772 | movsd qword FOR_EXT, xmm0 5295 | movsd qword FOR_EXT, xmm0
5773 |.else
5774 | fld qword FOR_STOP
5775 | fld qword FOR_IDX
5776 if (vk) {
5777 | fadd qword FOR_STEP // nidx = idx + step
5778 | fst qword FOR_IDX
5779 | fst qword FOR_EXT
5780 | test RB, RB; js >1
5781 } else {
5782 | fst qword FOR_EXT
5783 | jl >1
5784 }
5785 | fxch // Swap lim/(n)idx if step non-negative.
5786 |1:
5787 | fcomparepp
5788 |.endif
5789 if (op == BC_FORI) { 5296 if (op == BC_FORI) {
5790 |.if DUALNUM 5297 |.if DUALNUM
5791 | jnb <7 5298 | jnb <7
@@ -5813,11 +5320,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5813 |2: 5320 |2:
5814 | ins_next 5321 | ins_next
5815 |.endif 5322 |.endif
5816 |.if SSE 5323 |
5817 |3: // Invert comparison if step is negative. 5324 |3: // Invert comparison if step is negative.
5818 | ucomisd xmm0, xmm1 5325 | ucomisd xmm0, xmm1
5819 | jmp <1 5326 | jmp <1
5820 |.endif
5821 break; 5327 break;
5822 5328
5823 case BC_ITERL: 5329 case BC_ITERL:
@@ -5875,6 +5381,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5875 | mov L:RB, SAVE_L 5381 | mov L:RB, SAVE_L
5876 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE 5382 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
5877 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB 5383 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB
5384 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
5878 | // Save additional callee-save registers only used in compiled code. 5385 | // Save additional callee-save registers only used in compiled code.
5879 |.if X64WIN 5386 |.if X64WIN
5880 | mov TMPQ, r12 5387 | mov TMPQ, r12