aboutsummaryrefslogtreecommitdiff
path: root/src/vm_x86.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm_x86.dasc')
-rw-r--r--src/vm_x86.dasc872
1 files changed, 231 insertions, 641 deletions
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index b4674e2b..3fd897ec 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -18,7 +18,6 @@
18| 18|
19|.if P64 19|.if P64
20|.define X64, 1 20|.define X64, 1
21|.define SSE, 1
22|.if WIN 21|.if WIN
23|.define X64WIN, 1 22|.define X64WIN, 1
24|.endif 23|.endif
@@ -856,13 +855,9 @@ static void build_subroutines(BuildCtx *ctx)
856 |.if DUALNUM 855 |.if DUALNUM
857 | mov TMP2, LJ_TISNUM 856 | mov TMP2, LJ_TISNUM
858 | mov TMP1, RC 857 | mov TMP1, RC
859 |.elif SSE 858 |.else
860 | cvtsi2sd xmm0, RC 859 | cvtsi2sd xmm0, RC
861 | movsd TMPQ, xmm0 860 | movsd TMPQ, xmm0
862 |.else
863 | mov ARG4, RC
864 | fild ARG4
865 | fstp TMPQ
866 |.endif 861 |.endif
867 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 862 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
868 | jmp >1 863 | jmp >1
@@ -916,6 +911,19 @@ static void build_subroutines(BuildCtx *ctx)
916 | mov NARGS:RD, 2+1 // 2 args for func(t, k). 911 | mov NARGS:RD, 2+1 // 2 args for func(t, k).
917 | jmp ->vm_call_dispatch_f 912 | jmp ->vm_call_dispatch_f
918 | 913 |
914 |->vmeta_tgetr:
915 | mov FCARG1, TAB:RB
916 | mov RB, BASE // Save BASE.
917 | mov FCARG2, RC // Caveat: FCARG2 == BASE
918 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
919 | // cTValue * or NULL returned in eax (RC).
920 | movzx RA, PC_RA
921 | mov BASE, RB // Restore BASE.
922 | test RC, RC
923 | jnz ->BC_TGETR_Z
924 | mov dword [BASE+RA*8+4], LJ_TNIL
925 | jmp ->BC_TGETR2_Z
926 |
919 |//----------------------------------------------------------------------- 927 |//-----------------------------------------------------------------------
920 | 928 |
921 |->vmeta_tsets: 929 |->vmeta_tsets:
@@ -935,13 +943,9 @@ static void build_subroutines(BuildCtx *ctx)
935 |.if DUALNUM 943 |.if DUALNUM
936 | mov TMP2, LJ_TISNUM 944 | mov TMP2, LJ_TISNUM
937 | mov TMP1, RC 945 | mov TMP1, RC
938 |.elif SSE 946 |.else
939 | cvtsi2sd xmm0, RC 947 | cvtsi2sd xmm0, RC
940 | movsd TMPQ, xmm0 948 | movsd TMPQ, xmm0
941 |.else
942 | mov ARG4, RC
943 | fild ARG4
944 | fstp TMPQ
945 |.endif 949 |.endif
946 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 950 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
947 | jmp >1 951 | jmp >1
@@ -1007,6 +1011,33 @@ static void build_subroutines(BuildCtx *ctx)
1007 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). 1011 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
1008 | jmp ->vm_call_dispatch_f 1012 | jmp ->vm_call_dispatch_f
1009 | 1013 |
1014 |->vmeta_tsetr:
1015 |.if X64WIN
1016 | mov L:CARG1d, SAVE_L
1017 | mov CARG3d, RC
1018 | mov L:CARG1d->base, BASE
1019 | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
1020 |.elif X64
1021 | mov L:CARG1d, SAVE_L
1022 | mov CARG2d, TAB:RB
1023 | mov L:CARG1d->base, BASE
1024 | mov RB, BASE // Save BASE.
1025 | mov CARG3d, RC // Caveat: CARG3d == BASE.
1026 |.else
1027 | mov L:RA, SAVE_L
1028 | mov ARG2, TAB:RB
1029 | mov RB, BASE // Save BASE.
1030 | mov ARG3, RC
1031 | mov ARG1, L:RA
1032 | mov L:RA->base, BASE
1033 |.endif
1034 | mov SAVE_PC, PC
1035 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1036 | // TValue * returned in eax (RC).
1037 | movzx RA, PC_RA
1038 | mov BASE, RB // Restore BASE.
1039 | jmp ->BC_TSETR_Z
1040 |
1010 |//-- Comparison metamethods --------------------------------------------- 1041 |//-- Comparison metamethods ---------------------------------------------
1011 | 1042 |
1012 |->vmeta_comp: 1043 |->vmeta_comp:
@@ -1101,6 +1132,26 @@ static void build_subroutines(BuildCtx *ctx)
1101 | jmp <3 1132 | jmp <3
1102 |.endif 1133 |.endif
1103 | 1134 |
1135 |->vmeta_istype:
1136 |.if X64
1137 | mov L:CARG1d, SAVE_L
1138 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1139 | mov CARG2d, RA
1140 | movzx CARG3d, PC_RD
1141 | mov L:RB, L:CARG1d
1142 |.else
1143 | movzx RD, PC_RD
1144 | mov ARG2, RA
1145 | mov L:RB, SAVE_L
1146 | mov ARG3, RD
1147 | mov ARG1, L:RB
1148 | mov L:RB->base, BASE
1149 |.endif
1150 | mov SAVE_PC, PC
1151 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1152 | mov BASE, L:RB->base
1153 | jmp <6
1154 |
1104 |//-- Arithmetic metamethods --------------------------------------------- 1155 |//-- Arithmetic metamethods ---------------------------------------------
1105 | 1156 |
1106 |->vmeta_arith_vno: 1157 |->vmeta_arith_vno:
@@ -1509,11 +1560,7 @@ static void build_subroutines(BuildCtx *ctx)
1509 |.else 1560 |.else
1510 | jae ->fff_fallback 1561 | jae ->fff_fallback
1511 |.endif 1562 |.endif
1512 |.if SSE
1513 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 1563 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1514 |.else
1515 | fld qword [BASE]; jmp ->fff_resn
1516 |.endif
1517 | 1564 |
1518 |.ffunc_1 tostring 1565 |.ffunc_1 tostring
1519 | // Only handles the string or number case inline. 1566 | // Only handles the string or number case inline.
@@ -1631,19 +1678,12 @@ static void build_subroutines(BuildCtx *ctx)
1631 | add RD, 1 1678 | add RD, 1
1632 | mov dword [BASE-4], LJ_TISNUM 1679 | mov dword [BASE-4], LJ_TISNUM
1633 | mov dword [BASE-8], RD 1680 | mov dword [BASE-8], RD
1634 |.elif SSE 1681 |.else
1635 | movsd xmm0, qword [BASE+8] 1682 | movsd xmm0, qword [BASE+8]
1636 | sseconst_1 xmm1, RBa 1683 | sseconst_1 xmm1, RBa
1637 | addsd xmm0, xmm1 1684 | addsd xmm0, xmm1
1638 | cvtsd2si RD, xmm0 1685 | cvttsd2si RD, xmm0
1639 | movsd qword [BASE-8], xmm0 1686 | movsd qword [BASE-8], xmm0
1640 |.else
1641 | fld qword [BASE+8]
1642 | fld1
1643 | faddp st1
1644 | fist ARG1
1645 | fstp qword [BASE-8]
1646 | mov RD, ARG1
1647 |.endif 1687 |.endif
1648 | mov TAB:RB, [BASE] 1688 | mov TAB:RB, [BASE]
1649 | cmp RD, TAB:RB->asize; jae >2 // Not in array part? 1689 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
@@ -1690,12 +1730,9 @@ static void build_subroutines(BuildCtx *ctx)
1690 |.if DUALNUM 1730 |.if DUALNUM
1691 | mov dword [BASE+12], LJ_TISNUM 1731 | mov dword [BASE+12], LJ_TISNUM
1692 | mov dword [BASE+8], 0 1732 | mov dword [BASE+8], 0
1693 |.elif SSE 1733 |.else
1694 | xorps xmm0, xmm0 1734 | xorps xmm0, xmm0
1695 | movsd qword [BASE+8], xmm0 1735 | movsd qword [BASE+8], xmm0
1696 |.else
1697 | fldz
1698 | fstp qword [BASE+8]
1699 |.endif 1736 |.endif
1700 | mov RD, 1+3 1737 | mov RD, 1+3
1701 | jmp ->fff_res 1738 | jmp ->fff_res
@@ -1925,12 +1962,10 @@ static void build_subroutines(BuildCtx *ctx)
1925 |->fff_resi: // Dummy. 1962 |->fff_resi: // Dummy.
1926 |.endif 1963 |.endif
1927 | 1964 |
1928 |.if SSE
1929 |->fff_resn: 1965 |->fff_resn:
1930 | mov PC, [BASE-4] 1966 | mov PC, [BASE-4]
1931 | fstp qword [BASE-8] 1967 | fstp qword [BASE-8]
1932 | jmp ->fff_res1 1968 | jmp ->fff_res1
1933 |.endif
1934 | 1969 |
1935 | .ffunc_1 math_abs 1970 | .ffunc_1 math_abs
1936 |.if DUALNUM 1971 |.if DUALNUM
@@ -1954,8 +1989,6 @@ static void build_subroutines(BuildCtx *ctx)
1954 |.else 1989 |.else
1955 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1990 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1956 |.endif 1991 |.endif
1957 |
1958 |.if SSE
1959 | movsd xmm0, qword [BASE] 1992 | movsd xmm0, qword [BASE]
1960 | sseconst_abs xmm1, RDa 1993 | sseconst_abs xmm1, RDa
1961 | andps xmm0, xmm1 1994 | andps xmm0, xmm1
@@ -1963,15 +1996,6 @@ static void build_subroutines(BuildCtx *ctx)
1963 | mov PC, [BASE-4] 1996 | mov PC, [BASE-4]
1964 | movsd qword [BASE-8], xmm0 1997 | movsd qword [BASE-8], xmm0
1965 | // fallthrough 1998 | // fallthrough
1966 |.else
1967 | fld qword [BASE]
1968 | fabs
1969 | // fallthrough
1970 |->fff_resxmm0: // Dummy.
1971 |->fff_resn:
1972 | mov PC, [BASE-4]
1973 | fstp qword [BASE-8]
1974 |.endif
1975 | 1999 |
1976 |->fff_res1: 2000 |->fff_res1:
1977 | mov RD, 1+1 2001 | mov RD, 1+1
@@ -2008,48 +2032,24 @@ static void build_subroutines(BuildCtx *ctx)
2008 |.else 2032 |.else
2009 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2033 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2010 |.endif 2034 |.endif
2011 |.if SSE
2012 | movsd xmm0, qword [BASE] 2035 | movsd xmm0, qword [BASE]
2013 | call ->vm_ .. func 2036 | call ->vm_ .. func .. _sse
2014 | .if DUALNUM 2037 |.if DUALNUM
2015 | cvtsd2si RB, xmm0 2038 | cvttsd2si RB, xmm0
2016 | cmp RB, 0x80000000 2039 | cmp RB, 0x80000000
2017 | jne ->fff_resi 2040 | jne ->fff_resi
2018 | cvtsi2sd xmm1, RB 2041 | cvtsi2sd xmm1, RB
2019 | ucomisd xmm0, xmm1 2042 | ucomisd xmm0, xmm1
2020 | jp ->fff_resxmm0 2043 | jp ->fff_resxmm0
2021 | je ->fff_resi 2044 | je ->fff_resi
2022 | .endif
2023 | jmp ->fff_resxmm0
2024 |.else
2025 | fld qword [BASE]
2026 | call ->vm_ .. func
2027 | .if DUALNUM
2028 | fist ARG1
2029 | mov RB, ARG1
2030 | cmp RB, 0x80000000; jne >2
2031 | fdup
2032 | fild ARG1
2033 | fcomparepp
2034 | jp ->fff_resn
2035 | jne ->fff_resn
2036 |2:
2037 | fpop
2038 | jmp ->fff_resi
2039 | .else
2040 | jmp ->fff_resn
2041 | .endif
2042 |.endif 2045 |.endif
2046 | jmp ->fff_resxmm0
2043 |.endmacro 2047 |.endmacro
2044 | 2048 |
2045 | math_round floor 2049 | math_round floor
2046 | math_round ceil 2050 | math_round ceil
2047 | 2051 |
2048 |.if SSE
2049 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 2052 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
2050 |.else
2051 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
2052 |.endif
2053 | 2053 |
2054 |.ffunc math_log 2054 |.ffunc math_log
2055 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 2055 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
@@ -2072,42 +2072,24 @@ static void build_subroutines(BuildCtx *ctx)
2072 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn 2072 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
2073 | 2073 |
2074 |.macro math_extern, func 2074 |.macro math_extern, func
2075 |.if SSE
2076 | .ffunc_nsse math_ .. func 2075 | .ffunc_nsse math_ .. func
2077 | .if not X64 2076 |.if not X64
2078 | movsd FPARG1, xmm0 2077 | movsd FPARG1, xmm0
2079 | .endif
2080 |.else
2081 | .ffunc_n math_ .. func
2082 | fstp FPARG1
2083 |.endif 2078 |.endif
2084 | mov RB, BASE 2079 | mov RB, BASE
2085 | call extern lj_vm_ .. func 2080 | call extern lj_vm_ .. func
2086 | mov BASE, RB 2081 | mov BASE, RB
2087 | .if X64 2082 |.if X64
2088 | jmp ->fff_resxmm0 2083 | jmp ->fff_resxmm0
2089 | .else 2084 |.else
2090 | jmp ->fff_resn 2085 | jmp ->fff_resn
2091 | .endif 2086 |.endif
2092 |.endmacro 2087 |.endmacro
2093 | 2088 |
2094 | math_extern sinh 2089 | math_extern sinh
2095 | math_extern cosh 2090 | math_extern cosh
2096 | math_extern tanh 2091 | math_extern tanh
2097 | 2092 |
2098 |->ff_math_deg:
2099 |.if SSE
2100 |.ffunc_nsse math_rad
2101 | mov CFUNC:RB, [BASE-8]
2102 | mulsd xmm0, qword CFUNC:RB->upvalue[0]
2103 | jmp ->fff_resxmm0
2104 |.else
2105 |.ffunc_n math_rad
2106 | mov CFUNC:RB, [BASE-8]
2107 | fmul qword CFUNC:RB->upvalue[0]
2108 | jmp ->fff_resn
2109 |.endif
2110 |
2111 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn 2093 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
2112 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn 2094 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
2113 | 2095 |
@@ -2123,65 +2105,34 @@ static void build_subroutines(BuildCtx *ctx)
2123 | cmp RB, 0x00200000; jb >4 2105 | cmp RB, 0x00200000; jb >4
2124 |1: 2106 |1:
2125 | shr RB, 21; sub RB, RC // Extract and unbias exponent. 2107 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
2126 |.if SSE
2127 | cvtsi2sd xmm0, RB 2108 | cvtsi2sd xmm0, RB
2128 |.else
2129 | mov TMP1, RB; fild TMP1
2130 |.endif
2131 | mov RB, [BASE-4] 2109 | mov RB, [BASE-4]
2132 | and RB, 0x800fffff // Mask off exponent. 2110 | and RB, 0x800fffff // Mask off exponent.
2133 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. 2111 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
2134 | mov [BASE-4], RB 2112 | mov [BASE-4], RB
2135 |2: 2113 |2:
2136 |.if SSE
2137 | movsd qword [BASE], xmm0 2114 | movsd qword [BASE], xmm0
2138 |.else
2139 | fstp qword [BASE]
2140 |.endif
2141 | mov RD, 1+2 2115 | mov RD, 1+2
2142 | jmp ->fff_res 2116 | jmp ->fff_res
2143 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. 2117 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
2144 |.if SSE
2145 | xorps xmm0, xmm0; jmp <2 2118 | xorps xmm0, xmm0; jmp <2
2146 |.else
2147 | fldz; jmp <2
2148 |.endif
2149 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. 2119 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
2150 |.if SSE
2151 | movsd xmm0, qword [BASE] 2120 | movsd xmm0, qword [BASE]
2152 | sseconst_hi xmm1, RBa, 43500000 // 2^54. 2121 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
2153 | mulsd xmm0, xmm1 2122 | mulsd xmm0, xmm1
2154 | movsd qword [BASE-8], xmm0 2123 | movsd qword [BASE-8], xmm0
2155 |.else
2156 | fld qword [BASE]
2157 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
2158 | fstp qword [BASE-8]
2159 |.endif
2160 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 2124 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2161 | 2125 |
2162 |.if SSE
2163 |.ffunc_nsse math_modf 2126 |.ffunc_nsse math_modf
2164 |.else
2165 |.ffunc_n math_modf
2166 |.endif
2167 | mov RB, [BASE+4] 2127 | mov RB, [BASE+4]
2168 | mov PC, [BASE-4] 2128 | mov PC, [BASE-4]
2169 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? 2129 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
2170 |.if SSE
2171 | movaps xmm4, xmm0 2130 | movaps xmm4, xmm0
2172 | call ->vm_trunc 2131 | call ->vm_trunc_sse
2173 | subsd xmm4, xmm0 2132 | subsd xmm4, xmm0
2174 |1: 2133 |1:
2175 | movsd qword [BASE-8], xmm0 2134 | movsd qword [BASE-8], xmm0
2176 | movsd qword [BASE], xmm4 2135 | movsd qword [BASE], xmm4
2177 |.else
2178 | fdup
2179 | call ->vm_trunc
2180 | fsub st1, st0
2181 |1:
2182 | fstp qword [BASE-8]
2183 | fstp qword [BASE]
2184 |.endif
2185 | mov RC, [BASE-4]; mov RB, [BASE+4] 2136 | mov RC, [BASE-4]; mov RB, [BASE+4]
2186 | xor RC, RB; js >3 // Need to adjust sign? 2137 | xor RC, RB; js >3 // Need to adjust sign?
2187 |2: 2138 |2:
@@ -2191,24 +2142,16 @@ static void build_subroutines(BuildCtx *ctx)
2191 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. 2142 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
2192 | jmp <2 2143 | jmp <2
2193 |4: 2144 |4:
2194 |.if SSE
2195 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. 2145 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
2196 |.else
2197 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
2198 |.endif
2199 | 2146 |
2200 |.ffunc_nnr math_fmod 2147 |.ffunc_nnr math_fmod
2201 |1: ; fprem; fnstsw ax; sahf; jp <1 2148 |1: ; fprem; fnstsw ax; sahf; jp <1
2202 | fpop1 2149 | fpop1
2203 | jmp ->fff_resn 2150 | jmp ->fff_resn
2204 | 2151 |
2205 |.if SSE 2152 |.ffunc_nnsse math_pow; call ->vm_pow_sse; jmp ->fff_resxmm0
2206 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
2207 |.else
2208 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
2209 |.endif
2210 | 2153 |
2211 |.macro math_minmax, name, cmovop, fcmovop, sseop 2154 |.macro math_minmax, name, cmovop, sseop
2212 | .ffunc name 2155 | .ffunc name
2213 | mov RA, 2 2156 | mov RA, 2
2214 | cmp dword [BASE+4], LJ_TISNUM 2157 | cmp dword [BASE+4], LJ_TISNUM
@@ -2225,12 +2168,7 @@ static void build_subroutines(BuildCtx *ctx)
2225 |3: 2168 |3:
2226 | ja ->fff_fallback 2169 | ja ->fff_fallback
2227 | // Convert intermediate result to number and continue below. 2170 | // Convert intermediate result to number and continue below.
2228 |.if SSE
2229 | cvtsi2sd xmm0, RB 2171 | cvtsi2sd xmm0, RB
2230 |.else
2231 | mov TMP1, RB
2232 | fild TMP1
2233 |.endif
2234 | jmp >6 2172 | jmp >6
2235 |4: 2173 |4:
2236 | ja ->fff_fallback 2174 | ja ->fff_fallback
@@ -2238,7 +2176,6 @@ static void build_subroutines(BuildCtx *ctx)
2238 | jae ->fff_fallback 2176 | jae ->fff_fallback
2239 |.endif 2177 |.endif
2240 | 2178 |
2241 |.if SSE
2242 | movsd xmm0, qword [BASE] 2179 | movsd xmm0, qword [BASE]
2243 |5: // Handle numbers or integers. 2180 |5: // Handle numbers or integers.
2244 | cmp RA, RD; jae ->fff_resxmm0 2181 | cmp RA, RD; jae ->fff_resxmm0
@@ -2257,34 +2194,10 @@ static void build_subroutines(BuildCtx *ctx)
2257 | sseop xmm0, xmm1 2194 | sseop xmm0, xmm1
2258 | add RA, 1 2195 | add RA, 1
2259 | jmp <5 2196 | jmp <5
2260 |.else
2261 | fld qword [BASE]
2262 |5: // Handle numbers or integers.
2263 | cmp RA, RD; jae ->fff_resn
2264 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2265 |.if DUALNUM
2266 | jb >6
2267 | ja >9
2268 | fild dword [BASE+RA*8-8]
2269 | jmp >7
2270 |.else
2271 | jae >9
2272 |.endif
2273 |6:
2274 | fld qword [BASE+RA*8-8]
2275 |7:
2276 | fucomi st1; fcmovop st1; fpop1
2277 | add RA, 1
2278 | jmp <5
2279 |.endif
2280 |.endmacro 2197 |.endmacro
2281 | 2198 |
2282 | math_minmax math_min, cmovg, fcmovnbe, minsd 2199 | math_minmax math_min, cmovg, minsd
2283 | math_minmax math_max, cmovl, fcmovbe, maxsd 2200 | math_minmax math_max, cmovl, maxsd
2284 |.if not SSE
2285 |9:
2286 | fpop; jmp ->fff_fallback
2287 |.endif
2288 | 2201 |
2289 |//-- String library ----------------------------------------------------- 2202 |//-- String library -----------------------------------------------------
2290 | 2203 |
@@ -2293,10 +2206,8 @@ static void build_subroutines(BuildCtx *ctx)
2293 | mov STR:RB, [BASE] 2206 | mov STR:RB, [BASE]
2294 |.if DUALNUM 2207 |.if DUALNUM
2295 | mov RB, dword STR:RB->len; jmp ->fff_resi 2208 | mov RB, dword STR:RB->len; jmp ->fff_resi
2296 |.elif SSE
2297 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2298 |.else 2209 |.else
2299 | fild dword STR:RB->len; jmp ->fff_resn 2210 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2300 |.endif 2211 |.endif
2301 | 2212 |
2302 |.ffunc string_byte // Only handle the 1-arg case here. 2213 |.ffunc string_byte // Only handle the 1-arg case here.
@@ -2309,10 +2220,8 @@ static void build_subroutines(BuildCtx *ctx)
2309 | movzx RB, byte STR:RB[1] 2220 | movzx RB, byte STR:RB[1]
2310 |.if DUALNUM 2221 |.if DUALNUM
2311 | jmp ->fff_resi 2222 | jmp ->fff_resi
2312 |.elif SSE
2313 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2314 |.else 2223 |.else
2315 | mov TMP1, RB; fild TMP1; jmp ->fff_resn 2224 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2316 |.endif 2225 |.endif
2317 | 2226 |
2318 |.ffunc string_char // Only handle the 1-arg case here. 2227 |.ffunc string_char // Only handle the 1-arg case here.
@@ -2324,16 +2233,11 @@ static void build_subroutines(BuildCtx *ctx)
2324 | mov RB, dword [BASE] 2233 | mov RB, dword [BASE]
2325 | cmp RB, 255; ja ->fff_fallback 2234 | cmp RB, 255; ja ->fff_fallback
2326 | mov TMP2, RB 2235 | mov TMP2, RB
2327 |.elif SSE 2236 |.else
2328 | jae ->fff_fallback 2237 | jae ->fff_fallback
2329 | cvttsd2si RB, qword [BASE] 2238 | cvttsd2si RB, qword [BASE]
2330 | cmp RB, 255; ja ->fff_fallback 2239 | cmp RB, 255; ja ->fff_fallback
2331 | mov TMP2, RB 2240 | mov TMP2, RB
2332 |.else
2333 | jae ->fff_fallback
2334 | fld qword [BASE]
2335 | fistp TMP2
2336 | cmp TMP2, 255; ja ->fff_fallback
2337 |.endif 2241 |.endif
2338 |.if X64 2242 |.if X64
2339 | mov TMP3, 1 2243 | mov TMP3, 1
@@ -2371,14 +2275,10 @@ static void build_subroutines(BuildCtx *ctx)
2371 | jne ->fff_fallback 2275 | jne ->fff_fallback
2372 | mov RB, dword [BASE+16] 2276 | mov RB, dword [BASE+16]
2373 | mov TMP2, RB 2277 | mov TMP2, RB
2374 |.elif SSE 2278 |.else
2375 | jae ->fff_fallback 2279 | jae ->fff_fallback
2376 | cvttsd2si RB, qword [BASE+16] 2280 | cvttsd2si RB, qword [BASE+16]
2377 | mov TMP2, RB 2281 | mov TMP2, RB
2378 |.else
2379 | jae ->fff_fallback
2380 | fld qword [BASE+16]
2381 | fistp TMP2
2382 |.endif 2282 |.endif
2383 |1: 2283 |1:
2384 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2284 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2393,12 +2293,8 @@ static void build_subroutines(BuildCtx *ctx)
2393 | mov RB, STR:RB->len 2293 | mov RB, STR:RB->len
2394 |.if DUALNUM 2294 |.if DUALNUM
2395 | mov RA, dword [BASE+8] 2295 | mov RA, dword [BASE+8]
2396 |.elif SSE
2397 | cvttsd2si RA, qword [BASE+8]
2398 |.else 2296 |.else
2399 | fld qword [BASE+8] 2297 | cvttsd2si RA, qword [BASE+8]
2400 | fistp ARG3
2401 | mov RA, ARG3
2402 |.endif 2298 |.endif
2403 | mov RC, TMP2 2299 | mov RC, TMP2
2404 | cmp RB, RC // len < end? (unsigned compare) 2300 | cmp RB, RC // len < end? (unsigned compare)
@@ -2451,34 +2347,30 @@ static void build_subroutines(BuildCtx *ctx)
2451 |.if DUALNUM 2347 |.if DUALNUM
2452 | jne ->fff_fallback 2348 | jne ->fff_fallback
2453 | mov RC, dword [BASE+8] 2349 | mov RC, dword [BASE+8]
2454 |.elif SSE
2455 | jae ->fff_fallback
2456 | cvttsd2si RC, qword [BASE+8]
2457 |.else 2350 |.else
2458 | jae ->fff_fallback 2351 | jae ->fff_fallback
2459 | fld qword [BASE+8] 2352 | cvttsd2si RC, qword [BASE+8]
2460 | fistp TMP2
2461 | mov RC, TMP2
2462 |.endif 2353 |.endif
2463 | test RC, RC 2354 | test RC, RC
2464 | jle ->fff_emptystr // Count <= 0? (or non-int) 2355 | jle ->fff_emptystr // Count <= 0? (or non-int)
2465 | cmp dword STR:RB->len, 1 2356 | cmp dword STR:RB->len, 1
2466 | jb ->fff_emptystr // Zero length string? 2357 | jb ->fff_emptystr // Zero length string?
2467 | jne ->fff_fallback_2 // Fallback for > 1-char strings. 2358 | jne ->fff_fallback_2 // Fallback for > 1-char strings.
2468 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
2469 | movzx RA, byte STR:RB[1] 2359 | movzx RA, byte STR:RB[1]
2470 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] 2360 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.b)]
2361 | add RB, RC
2362 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.e)], RB; jb ->fff_fallback_2
2471 |.if X64 2363 |.if X64
2472 | mov TMP3, RC 2364 | mov TMP3, RC
2473 |.else 2365 |.else
2474 | mov ARG3, RC 2366 | mov ARG3, RC
2475 |.endif 2367 |.endif
2476 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). 2368 |1: // Fill buffer with char.
2477 | mov [RB], RAL 2369 | sub RB, 1
2478 | add RB, 1
2479 | sub RC, 1 2370 | sub RC, 1
2371 | mov [RB], RAL
2480 | jnz <1 2372 | jnz <1
2481 | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] 2373 | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.b)]
2482 | jmp ->fff_newstr 2374 | jmp ->fff_newstr
2483 | 2375 |
2484 |.ffunc_1 string_reverse 2376 |.ffunc_1 string_reverse
@@ -2488,15 +2380,16 @@ static void build_subroutines(BuildCtx *ctx)
2488 | mov RC, STR:RB->len 2380 | mov RC, STR:RB->len
2489 | test RC, RC 2381 | test RC, RC
2490 | jz ->fff_emptystr // Zero length string? 2382 | jz ->fff_emptystr // Zero length string?
2491 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2492 | add RB, #STR
2493 | mov TMP2, PC // Need another temp register. 2383 | mov TMP2, PC // Need another temp register.
2384 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.b)]
2385 | lea RA, [PC+RC]
2386 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.e)], RA; jb ->fff_fallback_1
2387 | add RB, #STR
2494 |.if X64 2388 |.if X64
2495 | mov TMP3, RC 2389 | mov TMP3, RC
2496 |.else 2390 |.else
2497 | mov ARG3, RC 2391 | mov ARG3, RC
2498 |.endif 2392 |.endif
2499 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2500 |1: 2393 |1:
2501 | movzx RA, byte [RB] 2394 | movzx RA, byte [RB]
2502 | add RB, 1 2395 | add RB, 1
@@ -2511,17 +2404,18 @@ static void build_subroutines(BuildCtx *ctx)
2511 | .ffunc_1 name 2404 | .ffunc_1 name
2512 | ffgccheck 2405 | ffgccheck
2513 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2406 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2407 | mov TMP2, PC // Need another temp register.
2514 | mov STR:RB, [BASE] 2408 | mov STR:RB, [BASE]
2515 | mov RC, STR:RB->len 2409 | mov RC, STR:RB->len
2516 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 2410 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.b)]
2411 | lea RA, [PC+RC]
2412 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.e)], RA; jb ->fff_fallback_1
2517 | add RB, #STR 2413 | add RB, #STR
2518 | mov TMP2, PC // Need another temp register.
2519 |.if X64 2414 |.if X64
2520 | mov TMP3, RC 2415 | mov TMP3, RC
2521 |.else 2416 |.else
2522 | mov ARG3, RC 2417 | mov ARG3, RC
2523 |.endif 2418 |.endif
2524 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2525 | jmp >3 2419 | jmp >3
2526 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?). 2420 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
2527 | movzx RA, byte [RB+RC] 2421 | movzx RA, byte [RB+RC]
@@ -2543,23 +2437,6 @@ static void build_subroutines(BuildCtx *ctx)
2543 |ffstring_case string_lower, 0x41, 0x5a 2437 |ffstring_case string_lower, 0x41, 0x5a
2544 |ffstring_case string_upper, 0x61, 0x7a 2438 |ffstring_case string_upper, 0x61, 0x7a
2545 | 2439 |
2546 |//-- Table library ------------------------------------------------------
2547 |
2548 |.ffunc_1 table_getn
2549 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
2550 | mov RB, BASE // Save BASE.
2551 | mov TAB:FCARG1, [BASE]
2552 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2553 | // Length of table returned in eax (RD).
2554 | mov BASE, RB // Restore BASE.
2555 |.if DUALNUM
2556 | mov RB, RD; jmp ->fff_resi
2557 |.elif SSE
2558 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2559 |.else
2560 | mov ARG1, RD; fild ARG1; jmp ->fff_resn
2561 |.endif
2562 |
2563 |//-- Bit library -------------------------------------------------------- 2440 |//-- Bit library --------------------------------------------------------
2564 | 2441 |
2565 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!). 2442 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
@@ -2567,11 +2444,7 @@ static void build_subroutines(BuildCtx *ctx)
2567 |.macro .ffunc_bit, name, kind 2444 |.macro .ffunc_bit, name, kind
2568 | .ffunc_1 name 2445 | .ffunc_1 name
2569 |.if kind == 2 2446 |.if kind == 2
2570 |.if SSE
2571 | sseconst_tobit xmm1, RBa 2447 | sseconst_tobit xmm1, RBa
2572 |.else
2573 | mov TMP1, TOBIT_BIAS
2574 |.endif
2575 |.endif 2448 |.endif
2576 | cmp dword [BASE+4], LJ_TISNUM 2449 | cmp dword [BASE+4], LJ_TISNUM
2577 |.if DUALNUM 2450 |.if DUALNUM
@@ -2587,37 +2460,17 @@ static void build_subroutines(BuildCtx *ctx)
2587 |.else 2460 |.else
2588 | jae ->fff_fallback 2461 | jae ->fff_fallback
2589 |.endif 2462 |.endif
2590 |.if SSE
2591 | movsd xmm0, qword [BASE] 2463 | movsd xmm0, qword [BASE]
2592 |.if kind < 2 2464 |.if kind < 2
2593 | sseconst_tobit xmm1, RBa 2465 | sseconst_tobit xmm1, RBa
2594 |.endif 2466 |.endif
2595 | addsd xmm0, xmm1 2467 | addsd xmm0, xmm1
2596 | movd RB, xmm0 2468 | movd RB, xmm0
2597 |.else
2598 | fld qword [BASE]
2599 |.if kind < 2
2600 | mov TMP1, TOBIT_BIAS
2601 |.endif
2602 | fadd TMP1
2603 | fstp FPARG1
2604 |.if kind > 0
2605 | mov RB, ARG1
2606 |.endif
2607 |.endif
2608 |2: 2469 |2:
2609 |.endmacro 2470 |.endmacro
2610 | 2471 |
2611 |.ffunc_bit bit_tobit, 0 2472 |.ffunc_bit bit_tobit, 0
2612 |.if DUALNUM or SSE
2613 |.if not SSE
2614 | mov RB, ARG1
2615 |.endif
2616 | jmp ->fff_resbit 2473 | jmp ->fff_resbit
2617 |.else
2618 | fild ARG1
2619 | jmp ->fff_resn
2620 |.endif
2621 | 2474 |
2622 |.macro .ffunc_bit_op, name, ins 2475 |.macro .ffunc_bit_op, name, ins
2623 | .ffunc_bit name, 2 2476 | .ffunc_bit name, 2
@@ -2637,17 +2490,10 @@ static void build_subroutines(BuildCtx *ctx)
2637 |.else 2490 |.else
2638 | jae ->fff_fallback_bit_op 2491 | jae ->fff_fallback_bit_op
2639 |.endif 2492 |.endif
2640 |.if SSE
2641 | movsd xmm0, qword [RD] 2493 | movsd xmm0, qword [RD]
2642 | addsd xmm0, xmm1 2494 | addsd xmm0, xmm1
2643 | movd RA, xmm0 2495 | movd RA, xmm0
2644 | ins RB, RA 2496 | ins RB, RA
2645 |.else
2646 | fld qword [RD]
2647 | fadd TMP1
2648 | fstp FPARG1
2649 | ins RB, ARG1
2650 |.endif
2651 | sub RD, 8 2497 | sub RD, 8
2652 | jmp <1 2498 | jmp <1
2653 |.endmacro 2499 |.endmacro
@@ -2664,15 +2510,10 @@ static void build_subroutines(BuildCtx *ctx)
2664 | not RB 2510 | not RB
2665 |.if DUALNUM 2511 |.if DUALNUM
2666 | jmp ->fff_resbit 2512 | jmp ->fff_resbit
2667 |.elif SSE 2513 |.else
2668 |->fff_resbit: 2514 |->fff_resbit:
2669 | cvtsi2sd xmm0, RB 2515 | cvtsi2sd xmm0, RB
2670 | jmp ->fff_resxmm0 2516 | jmp ->fff_resxmm0
2671 |.else
2672 |->fff_resbit:
2673 | mov ARG1, RB
2674 | fild ARG1
2675 | jmp ->fff_resn
2676 |.endif 2517 |.endif
2677 | 2518 |
2678 |->fff_fallback_bit_op: 2519 |->fff_fallback_bit_op:
@@ -2685,22 +2526,13 @@ static void build_subroutines(BuildCtx *ctx)
2685 | // Note: no inline conversion from number for 2nd argument! 2526 | // Note: no inline conversion from number for 2nd argument!
2686 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback 2527 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2687 | mov RA, dword [BASE+8] 2528 | mov RA, dword [BASE+8]
2688 |.elif SSE 2529 |.else
2689 | .ffunc_nnsse name 2530 | .ffunc_nnsse name
2690 | sseconst_tobit xmm2, RBa 2531 | sseconst_tobit xmm2, RBa
2691 | addsd xmm0, xmm2 2532 | addsd xmm0, xmm2
2692 | addsd xmm1, xmm2 2533 | addsd xmm1, xmm2
2693 | movd RB, xmm0 2534 | movd RB, xmm0
2694 | movd RA, xmm1 2535 | movd RA, xmm1
2695 |.else
2696 | .ffunc_nn name
2697 | mov TMP1, TOBIT_BIAS
2698 | fadd TMP1
2699 | fstp FPARG3
2700 | fadd TMP1
2701 | fstp FPARG1
2702 | mov RA, ARG3
2703 | mov RB, ARG1
2704 |.endif 2536 |.endif
2705 | ins RB, cl // Assumes RA is ecx. 2537 | ins RB, cl // Assumes RA is ecx.
2706 | jmp ->fff_resbit 2538 | jmp ->fff_resbit
@@ -3051,27 +2883,9 @@ static void build_subroutines(BuildCtx *ctx)
3051 |//----------------------------------------------------------------------- 2883 |//-----------------------------------------------------------------------
3052 | 2884 |
3053 |// FP value rounding. Called by math.floor/math.ceil fast functions 2885 |// FP value rounding. Called by math.floor/math.ceil fast functions
3054 |// and from JIT code. 2886 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3055 | 2887 |.macro vm_round, name, mode
3056 |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. 2888 |->name .. _sse:
3057 |.macro vm_round_x87, mode1, mode2
3058 | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2.
3059 | mov [esp+8], eax
3060 | mov ax, mode1
3061 | or ax, [esp+4]
3062 |.if mode2 ~= 0xffff
3063 | and ax, mode2
3064 |.endif
3065 | mov [esp+6], ax
3066 | fldcw word [esp+6]
3067 | frndint
3068 | fldcw word [esp+4]
3069 | mov eax, [esp+8]
3070 | ret
3071 |.endmacro
3072 |
3073 |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3074 |.macro vm_round_sse, mode
3075 | sseconst_abs xmm2, RDa 2889 | sseconst_abs xmm2, RDa
3076 | sseconst_2p52 xmm3, RDa 2890 | sseconst_2p52 xmm3, RDa
3077 | movaps xmm1, xmm0 2891 | movaps xmm1, xmm0
@@ -3107,22 +2921,21 @@ static void build_subroutines(BuildCtx *ctx)
3107 | ret 2921 | ret
3108 |.endmacro 2922 |.endmacro
3109 | 2923 |
3110 |.macro vm_round, name, ssemode, mode1, mode2 2924 |->vm_floor:
3111 |->name: 2925 |.if not X64
3112 |.if not SSE 2926 | movsd xmm0, qword [esp+4]
3113 | vm_round_x87 mode1, mode2 2927 | call ->vm_floor_sse
2928 | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
2929 | fld qword [esp+4]
2930 | ret
3114 |.endif 2931 |.endif
3115 |->name .. _sse:
3116 | vm_round_sse ssemode
3117 |.endmacro
3118 | 2932 |
3119 | vm_round vm_floor, 0, 0x0400, 0xf7ff 2933 | vm_round vm_floor, 0
3120 | vm_round vm_ceil, 1, 0x0800, 0xfbff 2934 | vm_round vm_ceil, 1
3121 | vm_round vm_trunc, 2, 0x0c00, 0xffff 2935 | vm_round vm_trunc, 2
3122 | 2936 |
3123 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 2937 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
3124 |->vm_mod: 2938 |->vm_mod:
3125 |.if SSE
3126 |// Args in xmm0/xmm1, return value in xmm0. 2939 |// Args in xmm0/xmm1, return value in xmm0.
3127 |// Caveat: xmm0-xmm5 and RC (eax) modified! 2940 |// Caveat: xmm0-xmm5 and RC (eax) modified!
3128 | movaps xmm5, xmm0 2941 | movaps xmm5, xmm0
@@ -3150,23 +2963,6 @@ static void build_subroutines(BuildCtx *ctx)
3150 | movaps xmm0, xmm5 2963 | movaps xmm0, xmm5
3151 | subsd xmm0, xmm1 2964 | subsd xmm0, xmm1
3152 | ret 2965 | ret
3153 |.else
3154 |// Args/ret on x87 stack (y on top). No xmm registers modified.
3155 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
3156 | fld st1
3157 | fdiv st1
3158 | fnstcw word [esp+4]
3159 | mov ax, 0x0400
3160 | or ax, [esp+4]
3161 | and ax, 0xf7ff
3162 | mov [esp+6], ax
3163 | fldcw word [esp+6]
3164 | frndint
3165 | fldcw word [esp+4]
3166 | fmulp st1
3167 | fsubp st1
3168 | ret
3169 |.endif
3170 | 2966 |
3171 |// FP log2(x). Called by math.log(x, base). 2967 |// FP log2(x). Called by math.log(x, base).
3172 |->vm_log2: 2968 |->vm_log2:
@@ -3217,105 +3013,15 @@ static void build_subroutines(BuildCtx *ctx)
3217 | 3013 |
3218 |// Generic power function x^y. Called by BC_POW, math.pow fast function, 3014 |// Generic power function x^y. Called by BC_POW, math.pow fast function,
3219 |// and vm_arith. 3015 |// and vm_arith.
3220 |// Args/ret on x87 stack (y on top). RC (eax) modified.
3221 |// Caveat: needs 3 slots on x87 stack!
3222 |->vm_pow:
3223 |.if not SSE
3224 | fist dword [esp+4] // Store/reload int before comparison.
3225 | fild dword [esp+4] // Integral exponent used in vm_powi.
3226 | fucomip st1
3227 | jnz >8 // Branch for FP exponents.
3228 | jp >9 // Branch for NaN exponent.
3229 | fpop // Pop y and fallthrough to vm_powi.
3230 |
3231 |// FP/int power function x^i. Arg1/ret on x87 stack.
3232 |// Arg2 (int) on C stack. RC (eax) modified.
3233 |// Caveat: needs 2 slots on x87 stack!
3234 | mov eax, [esp+4]
3235 | cmp eax, 1; jle >6 // i<=1?
3236 | // Now 1 < (unsigned)i <= 0x80000000.
3237 |1: // Handle leading zeros.
3238 | test eax, 1; jnz >2
3239 | fmul st0
3240 | shr eax, 1
3241 | jmp <1
3242 |2:
3243 | shr eax, 1; jz >5
3244 | fdup
3245 |3: // Handle trailing bits.
3246 | fmul st0
3247 | shr eax, 1; jz >4
3248 | jnc <3
3249 | fmul st1, st0
3250 | jmp <3
3251 |4:
3252 | fmulp st1
3253 |5:
3254 | ret
3255 |6:
3256 | je <5 // x^1 ==> x
3257 | jb >7
3258 | fld1; fdivrp st1
3259 | neg eax
3260 | cmp eax, 1; je <5 // x^-1 ==> 1/x
3261 | jmp <1 // x^-i ==> (1/x)^i
3262 |7:
3263 | fpop; fld1 // x^0 ==> 1
3264 | ret
3265 |
3266 |8: // FP/FP power function x^y.
3267 | fst dword [esp+4]
3268 | fxch
3269 | fst dword [esp+8]
3270 | mov eax, [esp+4]; shl eax, 1
3271 | cmp eax, 0xff000000; je >2 // x^+-Inf?
3272 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3273 | cmp eax, 0xff000000; je >4 // +-Inf^y?
3274 | fyl2x
3275 | jmp ->vm_exp2raw
3276 |
3277 |9: // Handle x^NaN.
3278 | fld1
3279 | fucomip st2
3280 | je >1 // 1^NaN ==> 1
3281 | fxch // x^NaN ==> NaN
3282 |1:
3283 | fpop
3284 | ret
3285 |
3286 |2: // Handle x^+-Inf.
3287 | fabs
3288 | fld1
3289 | fucomip st1
3290 | je >3 // +-1^+-Inf ==> 1
3291 | fpop; fabs; fldz; mov eax, 0; setc al
3292 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
3293 | fxch
3294 |3:
3295 | fpop1; fabs
3296 | ret
3297 |
3298 |4: // Handle +-0^y or +-Inf^y.
3299 | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
3300 | fpop; fpop
3301 | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
3302 | fldz // y < 0, +-Inf^y ==> 0
3303 | ret
3304 |5:
3305 | mov dword [esp+4], 0x7f800000 // Return +Inf.
3306 | fld dword [esp+4]
3307 | ret
3308 |.endif
3309 |
3310 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. 3016 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
3311 |// Needs 16 byte scratch area for x86. Also called from JIT code. 3017 |// Needs 16 byte scratch area for x86. Also called from JIT code.
3312 |->vm_pow_sse: 3018 |->vm_pow_sse:
3313 | cvtsd2si eax, xmm1 3019 | cvttsd2si eax, xmm1
3314 | cvtsi2sd xmm2, eax 3020 | cvtsi2sd xmm2, eax
3315 | ucomisd xmm1, xmm2 3021 | ucomisd xmm1, xmm2
3316 | jnz >8 // Branch for FP exponents. 3022 | jnz >8 // Branch for FP exponents.
3317 | jp >9 // Branch for NaN exponent. 3023 | jp >9 // Branch for NaN exponent.
3318 | // Fallthrough to vm_powi_sse. 3024 | // Fallthrough.
3319 | 3025 |
3320 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. 3026 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
3321 |->vm_powi_sse: 3027 |->vm_powi_sse:
@@ -3437,8 +3143,8 @@ static void build_subroutines(BuildCtx *ctx)
3437 | .else 3143 | .else
3438 | .define fpmop, CARG1d 3144 | .define fpmop, CARG1d
3439 | .endif 3145 | .endif
3440 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil 3146 | cmp fpmop, 1; jb ->vm_floor_sse; je ->vm_ceil_sse
3441 | cmp fpmop, 3; jb ->vm_trunc; ja >2 3147 | cmp fpmop, 3; jb ->vm_trunc_sse; ja >2
3442 | sqrtsd xmm0, xmm0; ret 3148 | sqrtsd xmm0, xmm0; ret
3443 |2: 3149 |2:
3444 | .if X64WIN 3150 | .if X64WIN
@@ -3478,14 +3184,13 @@ static void build_subroutines(BuildCtx *ctx)
3478 | ret 3184 | ret
3479 |.else // x86 calling convention. 3185 |.else // x86 calling convention.
3480 | .define fpmop, eax 3186 | .define fpmop, eax
3481 |.if SSE
3482 | mov fpmop, [esp+12] 3187 | mov fpmop, [esp+12]
3483 | movsd xmm0, qword [esp+4] 3188 | movsd xmm0, qword [esp+4]
3484 | cmp fpmop, 1; je >1; ja >2 3189 | cmp fpmop, 1; je >1; ja >2
3485 | call ->vm_floor; jmp >7 3190 | call ->vm_floor_sse; jmp >7
3486 |1: ; call ->vm_ceil; jmp >7 3191 |1: ; call ->vm_ceil_sse; jmp >7
3487 |2: ; cmp fpmop, 3; je >1; ja >2 3192 |2: ; cmp fpmop, 3; je >1; ja >2
3488 | call ->vm_trunc; jmp >7 3193 | call ->vm_trunc_sse; jmp >7
3489 |1: 3194 |1:
3490 | sqrtsd xmm0, xmm0 3195 | sqrtsd xmm0, xmm0
3491 |7: 3196 |7:
@@ -3503,23 +3208,6 @@ static void build_subroutines(BuildCtx *ctx)
3503 |2: ; cmp fpmop, 11; je >1; ja >9 3208 |2: ; cmp fpmop, 11; je >1; ja >9
3504 | fcos; ret 3209 | fcos; ret
3505 |1: ; fptan; fpop; ret 3210 |1: ; fptan; fpop; ret
3506 |.else
3507 | mov fpmop, [esp+12]
3508 | fld qword [esp+4]
3509 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3510 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3511 | fsqrt; ret
3512 |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3513 | cmp fpmop, 7; je >1; ja >2
3514 | fldln2; fxch; fyl2x; ret
3515 |1: ; fld1; fxch; fyl2x; ret
3516 |2: ; cmp fpmop, 9; je >1; ja >2
3517 | fldlg2; fxch; fyl2x; ret
3518 |1: ; fsin; ret
3519 |2: ; cmp fpmop, 11; je >1; ja >9
3520 | fcos; ret
3521 |1: ; fptan; fpop; ret
3522 |.endif
3523 |.endif 3211 |.endif
3524 |9: ; int3 // Bad fpm. 3212 |9: ; int3 // Bad fpm.
3525 |.endif 3213 |.endif
@@ -3541,7 +3229,7 @@ static void build_subroutines(BuildCtx *ctx)
3541 |2: ; cmp foldop, 3; je >1; ja >2 3229 |2: ; cmp foldop, 3; je >1; ja >2
3542 | mulsd xmm0, xmm1; ret 3230 | mulsd xmm0, xmm1; ret
3543 |1: ; divsd xmm0, xmm1; ret 3231 |1: ; divsd xmm0, xmm1; ret
3544 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow 3232 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow_sse
3545 | cmp foldop, 7; je >1; ja >2 3233 | cmp foldop, 7; je >1; ja >2
3546 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret 3234 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
3547 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret 3235 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
@@ -3574,7 +3262,7 @@ static void build_subroutines(BuildCtx *ctx)
3574 |1: ; maxsd xmm0, xmm1; ret 3262 |1: ; maxsd xmm0, xmm1; ret
3575 |9: ; int3 // Bad op. 3263 |9: ; int3 // Bad op.
3576 | 3264 |
3577 |.elif SSE // x86 calling convention with SSE ops. 3265 |.else // x86 calling convention.
3578 | 3266 |
3579 | .define foldop, eax 3267 | .define foldop, eax
3580 | mov foldop, [esp+20] 3268 | mov foldop, [esp+20]
@@ -3593,7 +3281,7 @@ static void build_subroutines(BuildCtx *ctx)
3593 |2: ; cmp foldop, 5 3281 |2: ; cmp foldop, 5
3594 | je >1; ja >2 3282 | je >1; ja >2
3595 | call ->vm_mod; jmp <7 3283 | call ->vm_mod; jmp <7
3596 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. 3284 |1: ; pop edx; call ->vm_pow_sse; push edx; jmp <7 // Writes to scratch area.
3597 |2: ; cmp foldop, 7; je >1; ja >2 3285 |2: ; cmp foldop, 7; je >1; ja >2
3598 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 3286 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
3599 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 3287 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
@@ -3608,29 +3296,6 @@ static void build_subroutines(BuildCtx *ctx)
3608 |1: ; maxsd xmm0, xmm1; jmp <7 3296 |1: ; maxsd xmm0, xmm1; jmp <7
3609 |9: ; int3 // Bad op. 3297 |9: ; int3 // Bad op.
3610 | 3298 |
3611 |.else // x86 calling convention with x87 ops.
3612 |
3613 | mov eax, [esp+20]
3614 | fld qword [esp+4]
3615 | fld qword [esp+12]
3616 | cmp eax, 1; je >1; ja >2
3617 | faddp st1; ret
3618 |1: ; fsubp st1; ret
3619 |2: ; cmp eax, 3; je >1; ja >2
3620 | fmulp st1; ret
3621 |1: ; fdivp st1; ret
3622 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
3623 | cmp eax, 7; je >1; ja >2
3624 | fpop; fchs; ret
3625 |1: ; fpop; fabs; ret
3626 |2: ; cmp eax, 9; je >1; ja >2
3627 | fpatan; ret
3628 |1: ; fxch; fscale; fpop1; ret
3629 |2: ; cmp eax, 11; je >1; ja >9
3630 | fucomi st1; fcmovnbe st1; fpop1; ret
3631 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
3632 |9: ; int3 // Bad op.
3633 |
3634 |.endif 3299 |.endif
3635 | 3300 |
3636 |//----------------------------------------------------------------------- 3301 |//-----------------------------------------------------------------------
@@ -3943,19 +3608,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3943 | // RA is a number. 3608 | // RA is a number.
3944 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp 3609 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3945 | // RA is a number, RD is an integer. 3610 | // RA is a number, RD is an integer.
3946 |.if SSE
3947 | cvtsi2sd xmm0, dword [BASE+RD*8] 3611 | cvtsi2sd xmm0, dword [BASE+RD*8]
3948 | jmp >2 3612 | jmp >2
3949 |.else
3950 | fld qword [BASE+RA*8]
3951 | fild dword [BASE+RD*8]
3952 | jmp >3
3953 |.endif
3954 | 3613 |
3955 |8: // RA is an integer, RD is not an integer. 3614 |8: // RA is an integer, RD is not an integer.
3956 | ja ->vmeta_comp 3615 | ja ->vmeta_comp
3957 | // RA is an integer, RD is a number. 3616 | // RA is an integer, RD is a number.
3958 |.if SSE
3959 | cvtsi2sd xmm1, dword [BASE+RA*8] 3617 | cvtsi2sd xmm1, dword [BASE+RA*8]
3960 | movsd xmm0, qword [BASE+RD*8] 3618 | movsd xmm0, qword [BASE+RD*8]
3961 | add PC, 4 3619 | add PC, 4
@@ -3963,29 +3621,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3963 | jmp_comp jbe, ja, jb, jae, <9 3621 | jmp_comp jbe, ja, jb, jae, <9
3964 | jmp <6 3622 | jmp <6
3965 |.else 3623 |.else
3966 | fild dword [BASE+RA*8]
3967 | jmp >2
3968 |.endif
3969 |.else
3970 | checknum RA, ->vmeta_comp 3624 | checknum RA, ->vmeta_comp
3971 | checknum RD, ->vmeta_comp 3625 | checknum RD, ->vmeta_comp
3972 |.endif 3626 |.endif
3973 |.if SSE
3974 |1: 3627 |1:
3975 | movsd xmm0, qword [BASE+RD*8] 3628 | movsd xmm0, qword [BASE+RD*8]
3976 |2: 3629 |2:
3977 | add PC, 4 3630 | add PC, 4
3978 | ucomisd xmm0, qword [BASE+RA*8] 3631 | ucomisd xmm0, qword [BASE+RA*8]
3979 |3: 3632 |3:
3980 |.else
3981 |1:
3982 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
3983 |2:
3984 | fld qword [BASE+RD*8]
3985 |3:
3986 | add PC, 4
3987 | fcomparepp
3988 |.endif
3989 | // Unordered: all of ZF CF PF set, ordered: PF clear. 3633 | // Unordered: all of ZF CF PF set, ordered: PF clear.
3990 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 3634 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
3991 |.if DUALNUM 3635 |.if DUALNUM
@@ -4025,43 +3669,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4025 | // RD is a number. 3669 | // RD is a number.
4026 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 3670 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
4027 | // RD is a number, RA is an integer. 3671 | // RD is a number, RA is an integer.
4028 |.if SSE
4029 | cvtsi2sd xmm0, dword [BASE+RA*8] 3672 | cvtsi2sd xmm0, dword [BASE+RA*8]
4030 |.else
4031 | fild dword [BASE+RA*8]
4032 |.endif
4033 | jmp >2 3673 | jmp >2
4034 | 3674 |
4035 |8: // RD is an integer, RA is not an integer. 3675 |8: // RD is an integer, RA is not an integer.
4036 | ja >5 3676 | ja >5
4037 | // RD is an integer, RA is a number. 3677 | // RD is an integer, RA is a number.
4038 |.if SSE
4039 | cvtsi2sd xmm0, dword [BASE+RD*8] 3678 | cvtsi2sd xmm0, dword [BASE+RD*8]
4040 | ucomisd xmm0, qword [BASE+RA*8] 3679 | ucomisd xmm0, qword [BASE+RA*8]
4041 |.else
4042 | fild dword [BASE+RD*8]
4043 | fld qword [BASE+RA*8]
4044 |.endif
4045 | jmp >4 3680 | jmp >4
4046 | 3681 |
4047 |.else 3682 |.else
4048 | cmp RB, LJ_TISNUM; jae >5 3683 | cmp RB, LJ_TISNUM; jae >5
4049 | checknum RA, >5 3684 | checknum RA, >5
4050 |.endif 3685 |.endif
4051 |.if SSE
4052 |1: 3686 |1:
4053 | movsd xmm0, qword [BASE+RA*8] 3687 | movsd xmm0, qword [BASE+RA*8]
4054 |2: 3688 |2:
4055 | ucomisd xmm0, qword [BASE+RD*8] 3689 | ucomisd xmm0, qword [BASE+RD*8]
4056 |4: 3690 |4:
4057 |.else
4058 |1:
4059 | fld qword [BASE+RA*8]
4060 |2:
4061 | fld qword [BASE+RD*8]
4062 |4:
4063 | fcomparepp
4064 |.endif
4065 iseqne_fp: 3691 iseqne_fp:
4066 if (vk) { 3692 if (vk) {
4067 | jp >2 // Unordered means not equal. 3693 | jp >2 // Unordered means not equal.
@@ -4184,39 +3810,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4184 | // RA is a number. 3810 | // RA is a number.
4185 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 3811 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
4186 | // RA is a number, RD is an integer. 3812 | // RA is a number, RD is an integer.
4187 |.if SSE
4188 | cvtsi2sd xmm0, dword [KBASE+RD*8] 3813 | cvtsi2sd xmm0, dword [KBASE+RD*8]
4189 |.else
4190 | fild dword [KBASE+RD*8]
4191 |.endif
4192 | jmp >2 3814 | jmp >2
4193 | 3815 |
4194 |8: // RA is an integer, RD is a number. 3816 |8: // RA is an integer, RD is a number.
4195 |.if SSE
4196 | cvtsi2sd xmm0, dword [BASE+RA*8] 3817 | cvtsi2sd xmm0, dword [BASE+RA*8]
4197 | ucomisd xmm0, qword [KBASE+RD*8] 3818 | ucomisd xmm0, qword [KBASE+RD*8]
4198 |.else
4199 | fild dword [BASE+RA*8]
4200 | fld qword [KBASE+RD*8]
4201 |.endif
4202 | jmp >4 3819 | jmp >4
4203 |.else 3820 |.else
4204 | cmp RB, LJ_TISNUM; jae >3 3821 | cmp RB, LJ_TISNUM; jae >3
4205 |.endif 3822 |.endif
4206 |.if SSE
4207 |1: 3823 |1:
4208 | movsd xmm0, qword [KBASE+RD*8] 3824 | movsd xmm0, qword [KBASE+RD*8]
4209 |2: 3825 |2:
4210 | ucomisd xmm0, qword [BASE+RA*8] 3826 | ucomisd xmm0, qword [BASE+RA*8]
4211 |4: 3827 |4:
4212 |.else
4213 |1:
4214 | fld qword [KBASE+RD*8]
4215 |2:
4216 | fld qword [BASE+RA*8]
4217 |4:
4218 | fcomparepp
4219 |.endif
4220 goto iseqne_fp; 3828 goto iseqne_fp;
4221 case BC_ISEQP: case BC_ISNEP: 3829 case BC_ISEQP: case BC_ISNEP:
4222 vk = op == BC_ISEQP; 3830 vk = op == BC_ISEQP;
@@ -4267,6 +3875,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4267 | ins_next 3875 | ins_next
4268 break; 3876 break;
4269 3877
3878 case BC_ISTYPE:
3879 | ins_AD // RA = src, RD = -type
3880 | add RD, [BASE+RA*8+4]
3881 | jne ->vmeta_istype
3882 | ins_next
3883 break;
3884 case BC_ISNUM:
3885 | ins_AD // RA = src, RD = -(TISNUM-1)
3886 | checknum RA, ->vmeta_istype
3887 | ins_next
3888 break;
3889
4270 /* -- Unary ops --------------------------------------------------------- */ 3890 /* -- Unary ops --------------------------------------------------------- */
4271 3891
4272 case BC_MOV: 3892 case BC_MOV:
@@ -4310,16 +3930,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4310 |.else 3930 |.else
4311 | checknum RD, ->vmeta_unm 3931 | checknum RD, ->vmeta_unm
4312 |.endif 3932 |.endif
4313 |.if SSE
4314 | movsd xmm0, qword [BASE+RD*8] 3933 | movsd xmm0, qword [BASE+RD*8]
4315 | sseconst_sign xmm1, RDa 3934 | sseconst_sign xmm1, RDa
4316 | xorps xmm0, xmm1 3935 | xorps xmm0, xmm1
4317 | movsd qword [BASE+RA*8], xmm0 3936 | movsd qword [BASE+RA*8], xmm0
4318 |.else
4319 | fld qword [BASE+RD*8]
4320 | fchs
4321 | fstp qword [BASE+RA*8]
4322 |.endif
4323 |.if DUALNUM 3937 |.if DUALNUM
4324 | jmp <9 3938 | jmp <9
4325 |.else 3939 |.else
@@ -4335,15 +3949,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4335 |1: 3949 |1:
4336 | mov dword [BASE+RA*8+4], LJ_TISNUM 3950 | mov dword [BASE+RA*8+4], LJ_TISNUM
4337 | mov dword [BASE+RA*8], RD 3951 | mov dword [BASE+RA*8], RD
4338 |.elif SSE 3952 |.else
4339 | xorps xmm0, xmm0 3953 | xorps xmm0, xmm0
4340 | cvtsi2sd xmm0, dword STR:RD->len 3954 | cvtsi2sd xmm0, dword STR:RD->len
4341 |1: 3955 |1:
4342 | movsd qword [BASE+RA*8], xmm0 3956 | movsd qword [BASE+RA*8], xmm0
4343 |.else
4344 | fild dword STR:RD->len
4345 |1:
4346 | fstp qword [BASE+RA*8]
4347 |.endif 3957 |.endif
4348 | ins_next 3958 | ins_next
4349 |2: 3959 |2:
@@ -4361,11 +3971,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4361 | // Length of table returned in eax (RD). 3971 | // Length of table returned in eax (RD).
4362 |.if DUALNUM 3972 |.if DUALNUM
4363 | // Nothing to do. 3973 | // Nothing to do.
4364 |.elif SSE
4365 | cvtsi2sd xmm0, RD
4366 |.else 3974 |.else
4367 | mov ARG1, RD 3975 | cvtsi2sd xmm0, RD
4368 | fild ARG1
4369 |.endif 3976 |.endif
4370 | mov BASE, RB // Restore BASE. 3977 | mov BASE, RB // Restore BASE.
4371 | movzx RA, PC_RA 3978 | movzx RA, PC_RA
@@ -4380,7 +3987,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4380 3987
4381 /* -- Binary ops -------------------------------------------------------- */ 3988 /* -- Binary ops -------------------------------------------------------- */
4382 3989
4383 |.macro ins_arithpre, x87ins, sseins, ssereg 3990 |.macro ins_arithpre, sseins, ssereg
4384 | ins_ABC 3991 | ins_ABC
4385 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3992 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
4386 ||switch (vk) { 3993 ||switch (vk) {
@@ -4389,37 +3996,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4389 | .if DUALNUM 3996 | .if DUALNUM
4390 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn 3997 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
4391 | .endif 3998 | .endif
4392 | .if SSE 3999 | movsd xmm0, qword [BASE+RB*8]
4393 | movsd xmm0, qword [BASE+RB*8] 4000 | sseins ssereg, qword [KBASE+RC*8]
4394 | sseins ssereg, qword [KBASE+RC*8]
4395 | .else
4396 | fld qword [BASE+RB*8]
4397 | x87ins qword [KBASE+RC*8]
4398 | .endif
4399 || break; 4001 || break;
4400 ||case 1: 4002 ||case 1:
4401 | checknum RB, ->vmeta_arith_nv 4003 | checknum RB, ->vmeta_arith_nv
4402 | .if DUALNUM 4004 | .if DUALNUM
4403 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv 4005 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
4404 | .endif 4006 | .endif
4405 | .if SSE 4007 | movsd xmm0, qword [KBASE+RC*8]
4406 | movsd xmm0, qword [KBASE+RC*8] 4008 | sseins ssereg, qword [BASE+RB*8]
4407 | sseins ssereg, qword [BASE+RB*8]
4408 | .else
4409 | fld qword [KBASE+RC*8]
4410 | x87ins qword [BASE+RB*8]
4411 | .endif
4412 || break; 4009 || break;
4413 ||default: 4010 ||default:
4414 | checknum RB, ->vmeta_arith_vv 4011 | checknum RB, ->vmeta_arith_vv
4415 | checknum RC, ->vmeta_arith_vv 4012 | checknum RC, ->vmeta_arith_vv
4416 | .if SSE 4013 | movsd xmm0, qword [BASE+RB*8]
4417 | movsd xmm0, qword [BASE+RB*8] 4014 | sseins ssereg, qword [BASE+RC*8]
4418 | sseins ssereg, qword [BASE+RC*8]
4419 | .else
4420 | fld qword [BASE+RB*8]
4421 | x87ins qword [BASE+RC*8]
4422 | .endif
4423 || break; 4015 || break;
4424 ||} 4016 ||}
4425 |.endmacro 4017 |.endmacro
@@ -4457,54 +4049,50 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4457 |.endmacro 4049 |.endmacro
4458 | 4050 |
4459 |.macro ins_arithpost 4051 |.macro ins_arithpost
4460 |.if SSE
4461 | movsd qword [BASE+RA*8], xmm0 4052 | movsd qword [BASE+RA*8], xmm0
4462 |.else
4463 | fstp qword [BASE+RA*8]
4464 |.endif
4465 |.endmacro 4053 |.endmacro
4466 | 4054 |
4467 |.macro ins_arith, x87ins, sseins 4055 |.macro ins_arith, sseins
4468 | ins_arithpre x87ins, sseins, xmm0 4056 | ins_arithpre sseins, xmm0
4469 | ins_arithpost 4057 | ins_arithpost
4470 | ins_next 4058 | ins_next
4471 |.endmacro 4059 |.endmacro
4472 | 4060 |
4473 |.macro ins_arith, intins, x87ins, sseins 4061 |.macro ins_arith, intins, sseins
4474 |.if DUALNUM 4062 |.if DUALNUM
4475 | ins_arithdn intins 4063 | ins_arithdn intins
4476 |.else 4064 |.else
4477 | ins_arith, x87ins, sseins 4065 | ins_arith, sseins
4478 |.endif 4066 |.endif
4479 |.endmacro 4067 |.endmacro
4480 4068
4481 | // RA = dst, RB = src1 or num const, RC = src2 or num const 4069 | // RA = dst, RB = src1 or num const, RC = src2 or num const
4482 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 4070 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
4483 | ins_arith add, fadd, addsd 4071 | ins_arith add, addsd
4484 break; 4072 break;
4485 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 4073 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
4486 | ins_arith sub, fsub, subsd 4074 | ins_arith sub, subsd
4487 break; 4075 break;
4488 case BC_MULVN: case BC_MULNV: case BC_MULVV: 4076 case BC_MULVN: case BC_MULNV: case BC_MULVV:
4489 | ins_arith imul, fmul, mulsd 4077 | ins_arith imul, mulsd
4490 break; 4078 break;
4491 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 4079 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
4492 | ins_arith fdiv, divsd 4080 | ins_arith divsd
4493 break; 4081 break;
4494 case BC_MODVN: 4082 case BC_MODVN:
4495 | ins_arithpre fld, movsd, xmm1 4083 | ins_arithpre movsd, xmm1
4496 |->BC_MODVN_Z: 4084 |->BC_MODVN_Z:
4497 | call ->vm_mod 4085 | call ->vm_mod
4498 | ins_arithpost 4086 | ins_arithpost
4499 | ins_next 4087 | ins_next
4500 break; 4088 break;
4501 case BC_MODNV: case BC_MODVV: 4089 case BC_MODNV: case BC_MODVV:
4502 | ins_arithpre fld, movsd, xmm1 4090 | ins_arithpre movsd, xmm1
4503 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 4091 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
4504 break; 4092 break;
4505 case BC_POW: 4093 case BC_POW:
4506 | ins_arithpre fld, movsd, xmm1 4094 | ins_arithpre movsd, xmm1
4507 | call ->vm_pow 4095 | call ->vm_pow_sse
4508 | ins_arithpost 4096 | ins_arithpost
4509 | ins_next 4097 | ins_next
4510 break; 4098 break;
@@ -4573,25 +4161,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4573 | movsx RD, RDW 4161 | movsx RD, RDW
4574 | mov dword [BASE+RA*8+4], LJ_TISNUM 4162 | mov dword [BASE+RA*8+4], LJ_TISNUM
4575 | mov dword [BASE+RA*8], RD 4163 | mov dword [BASE+RA*8], RD
4576 |.elif SSE 4164 |.else
4577 | movsx RD, RDW // Sign-extend literal. 4165 | movsx RD, RDW // Sign-extend literal.
4578 | cvtsi2sd xmm0, RD 4166 | cvtsi2sd xmm0, RD
4579 | movsd qword [BASE+RA*8], xmm0 4167 | movsd qword [BASE+RA*8], xmm0
4580 |.else
4581 | fild PC_RD // Refetch signed RD from instruction.
4582 | fstp qword [BASE+RA*8]
4583 |.endif 4168 |.endif
4584 | ins_next 4169 | ins_next
4585 break; 4170 break;
4586 case BC_KNUM: 4171 case BC_KNUM:
4587 | ins_AD // RA = dst, RD = num const 4172 | ins_AD // RA = dst, RD = num const
4588 |.if SSE
4589 | movsd xmm0, qword [KBASE+RD*8] 4173 | movsd xmm0, qword [KBASE+RD*8]
4590 | movsd qword [BASE+RA*8], xmm0 4174 | movsd qword [BASE+RA*8], xmm0
4591 |.else
4592 | fld qword [KBASE+RD*8]
4593 | fstp qword [BASE+RA*8]
4594 |.endif
4595 | ins_next 4175 | ins_next
4596 break; 4176 break;
4597 case BC_KPRI: 4177 case BC_KPRI:
@@ -4698,18 +4278,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4698 case BC_USETN: 4278 case BC_USETN:
4699 | ins_AD // RA = upvalue #, RD = num const 4279 | ins_AD // RA = upvalue #, RD = num const
4700 | mov LFUNC:RB, [BASE-8] 4280 | mov LFUNC:RB, [BASE-8]
4701 |.if SSE
4702 | movsd xmm0, qword [KBASE+RD*8] 4281 | movsd xmm0, qword [KBASE+RD*8]
4703 |.else
4704 | fld qword [KBASE+RD*8]
4705 |.endif
4706 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4282 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4707 | mov RA, UPVAL:RB->v 4283 | mov RA, UPVAL:RB->v
4708 |.if SSE
4709 | movsd qword [RA], xmm0 4284 | movsd qword [RA], xmm0
4710 |.else
4711 | fstp qword [RA]
4712 |.endif
4713 | ins_next 4285 | ins_next
4714 break; 4286 break;
4715 case BC_USETP: 4287 case BC_USETP:
@@ -4863,18 +4435,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4863 |.else 4435 |.else
4864 | // Convert number to int and back and compare. 4436 | // Convert number to int and back and compare.
4865 | checknum RC, >5 4437 | checknum RC, >5
4866 |.if SSE
4867 | movsd xmm0, qword [BASE+RC*8] 4438 | movsd xmm0, qword [BASE+RC*8]
4868 | cvtsd2si RC, xmm0 4439 | cvttsd2si RC, xmm0
4869 | cvtsi2sd xmm1, RC 4440 | cvtsi2sd xmm1, RC
4870 | ucomisd xmm0, xmm1 4441 | ucomisd xmm0, xmm1
4871 |.else
4872 | fld qword [BASE+RC*8]
4873 | fist ARG1
4874 | fild ARG1
4875 | fcomparepp
4876 | mov RC, ARG1
4877 |.endif
4878 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. 4442 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4879 |.endif 4443 |.endif
4880 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4444 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -4998,6 +4562,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4998 | mov dword [BASE+RA*8+4], LJ_TNIL 4562 | mov dword [BASE+RA*8+4], LJ_TNIL
4999 | jmp <1 4563 | jmp <1
5000 break; 4564 break;
4565 case BC_TGETR:
4566 | ins_ABC // RA = dst, RB = table, RC = key
4567 | mov TAB:RB, [BASE+RB*8]
4568 |.if DUALNUM
4569 | mov RC, dword [BASE+RC*8]
4570 |.else
4571 | cvttsd2si RC, qword [BASE+RC*8]
4572 |.endif
4573 | cmp RC, TAB:RB->asize
4574 | jae ->vmeta_tgetr // Not in array part? Use fallback.
4575 | shl RC, 3
4576 | add RC, TAB:RB->array
4577 | // Get array slot.
4578 |->BC_TGETR_Z:
4579 |.if X64
4580 | mov RBa, [RC]
4581 | mov [BASE+RA*8], RBa
4582 |.else
4583 | mov RB, [RC]
4584 | mov RC, [RC+4]
4585 | mov [BASE+RA*8], RB
4586 | mov [BASE+RA*8+4], RC
4587 |.endif
4588 |->BC_TGETR2_Z:
4589 | ins_next
4590 break;
5001 4591
5002 case BC_TSETV: 4592 case BC_TSETV:
5003 | ins_ABC // RA = src, RB = table, RC = key 4593 | ins_ABC // RA = src, RB = table, RC = key
@@ -5011,18 +4601,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5011 |.else 4601 |.else
5012 | // Convert number to int and back and compare. 4602 | // Convert number to int and back and compare.
5013 | checknum RC, >5 4603 | checknum RC, >5
5014 |.if SSE
5015 | movsd xmm0, qword [BASE+RC*8] 4604 | movsd xmm0, qword [BASE+RC*8]
5016 | cvtsd2si RC, xmm0 4605 | cvttsd2si RC, xmm0
5017 | cvtsi2sd xmm1, RC 4606 | cvtsi2sd xmm1, RC
5018 | ucomisd xmm0, xmm1 4607 | ucomisd xmm0, xmm1
5019 |.else
5020 | fld qword [BASE+RC*8]
5021 | fist ARG1
5022 | fild ARG1
5023 | fcomparepp
5024 | mov RC, ARG1
5025 |.endif
5026 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. 4608 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
5027 |.endif 4609 |.endif
5028 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4610 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -5192,6 +4774,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5192 | movzx RA, PC_RA // Restore RA. 4774 | movzx RA, PC_RA // Restore RA.
5193 | jmp <2 4775 | jmp <2
5194 break; 4776 break;
4777 case BC_TSETR:
4778 | ins_ABC // RA = src, RB = table, RC = key
4779 | mov TAB:RB, [BASE+RB*8]
4780 |.if DUALNUM
4781 | mov RC, dword [BASE+RC*8]
4782 |.else
4783 | cvttsd2si RC, qword [BASE+RC*8]
4784 |.endif
4785 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4786 | jnz >7
4787 |2:
4788 | cmp RC, TAB:RB->asize
4789 | jae ->vmeta_tsetr
4790 | shl RC, 3
4791 | add RC, TAB:RB->array
4792 | // Set array slot.
4793 |->BC_TSETR_Z:
4794 |.if X64
4795 | mov RBa, [BASE+RA*8]
4796 | mov [RC], RBa
4797 |.else
4798 | mov RB, [BASE+RA*8+4]
4799 | mov RA, [BASE+RA*8]
4800 | mov [RC+4], RB
4801 | mov [RC], RA
4802 |.endif
4803 | ins_next
4804 |
4805 |7: // Possible table write barrier for the value. Skip valiswhite check.
4806 | barrierback TAB:RB, RA
4807 | movzx RA, PC_RA // Restore RA.
4808 | jmp <2
4809 break;
5195 4810
5196 case BC_TSETM: 4811 case BC_TSETM:
5197 | ins_AD // RA = base (table at base-1), RD = num const (start index) 4812 | ins_AD // RA = base (table at base-1), RD = num const (start index)
@@ -5386,10 +5001,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5386 |.if DUALNUM 5001 |.if DUALNUM
5387 | mov dword [BASE+RA*8+4], LJ_TISNUM 5002 | mov dword [BASE+RA*8+4], LJ_TISNUM
5388 | mov dword [BASE+RA*8], RC 5003 | mov dword [BASE+RA*8], RC
5389 |.elif SSE
5390 | cvtsi2sd xmm0, RC
5391 |.else 5004 |.else
5392 | fild dword [BASE+RA*8-8] 5005 | cvtsi2sd xmm0, RC
5393 |.endif 5006 |.endif
5394 | // Copy array slot to returned value. 5007 | // Copy array slot to returned value.
5395 |.if X64 5008 |.if X64
@@ -5405,10 +5018,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5405 | // Return array index as a numeric key. 5018 | // Return array index as a numeric key.
5406 |.if DUALNUM 5019 |.if DUALNUM
5407 | // See above. 5020 | // See above.
5408 |.elif SSE
5409 | movsd qword [BASE+RA*8], xmm0
5410 |.else 5021 |.else
5411 | fstp qword [BASE+RA*8] 5022 | movsd qword [BASE+RA*8], xmm0
5412 |.endif 5023 |.endif
5413 | mov [BASE+RA*8-8], RC // Update control var. 5024 | mov [BASE+RA*8-8], RC // Update control var.
5414 |2: 5025 |2:
@@ -5421,9 +5032,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5421 | 5032 |
5422 |4: // Skip holes in array part. 5033 |4: // Skip holes in array part.
5423 | add RC, 1 5034 | add RC, 1
5424 |.if not (DUALNUM or SSE)
5425 | mov [BASE+RA*8-8], RC
5426 |.endif
5427 | jmp <1 5035 | jmp <1
5428 | 5036 |
5429 |5: // Traverse hash part. 5037 |5: // Traverse hash part.
@@ -5757,7 +5365,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5757 if (!vk) { 5365 if (!vk) {
5758 | cmp RB, LJ_TISNUM; jae ->vmeta_for 5366 | cmp RB, LJ_TISNUM; jae ->vmeta_for
5759 } 5367 }
5760 |.if SSE
5761 | movsd xmm0, qword FOR_IDX 5368 | movsd xmm0, qword FOR_IDX
5762 | movsd xmm1, qword FOR_STOP 5369 | movsd xmm1, qword FOR_STOP
5763 if (vk) { 5370 if (vk) {
@@ -5770,22 +5377,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5770 | ucomisd xmm1, xmm0 5377 | ucomisd xmm1, xmm0
5771 |1: 5378 |1:
5772 | movsd qword FOR_EXT, xmm0 5379 | movsd qword FOR_EXT, xmm0
5773 |.else
5774 | fld qword FOR_STOP
5775 | fld qword FOR_IDX
5776 if (vk) {
5777 | fadd qword FOR_STEP // nidx = idx + step
5778 | fst qword FOR_IDX
5779 | fst qword FOR_EXT
5780 | test RB, RB; js >1
5781 } else {
5782 | fst qword FOR_EXT
5783 | jl >1
5784 }
5785 | fxch // Swap lim/(n)idx if step non-negative.
5786 |1:
5787 | fcomparepp
5788 |.endif
5789 if (op == BC_FORI) { 5380 if (op == BC_FORI) {
5790 |.if DUALNUM 5381 |.if DUALNUM
5791 | jnb <7 5382 | jnb <7
@@ -5813,11 +5404,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5813 |2: 5404 |2:
5814 | ins_next 5405 | ins_next
5815 |.endif 5406 |.endif
5816 |.if SSE 5407 |
5817 |3: // Invert comparison if step is negative. 5408 |3: // Invert comparison if step is negative.
5818 | ucomisd xmm0, xmm1 5409 | ucomisd xmm0, xmm1
5819 | jmp <1 5410 | jmp <1
5820 |.endif
5821 break; 5411 break;
5822 5412
5823 case BC_ITERL: 5413 case BC_ITERL: