summaryrefslogtreecommitdiff
path: root/src/buildvm_x86.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'src/buildvm_x86.dasc')
-rw-r--r--src/buildvm_x86.dasc963
1 files changed, 762 insertions, 201 deletions
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc
index 4a0bbeab..4d96423c 100644
--- a/src/buildvm_x86.dasc
+++ b/src/buildvm_x86.dasc
@@ -306,7 +306,8 @@
306| 306|
307|// Macros to test operand types. 307|// Macros to test operand types.
308|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro 308|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro
309|.macro checknum, reg, target; checktp reg, LJ_TISNUM; ja target; .endmacro 309|.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro
310|.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro
310|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro 311|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro
311|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro 312|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro
312| 313|
@@ -810,7 +811,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
810 | 811 |
811 |->vmeta_tgetb: 812 |->vmeta_tgetb:
812 | movzx RC, PC_RC 813 | movzx RC, PC_RC
813 if (sse) { 814 if (LJ_DUALNUM) {
815 | mov TMP2, LJ_TISNUM
816 | mov TMP1, RC
817 } else if (sse) {
814 | cvtsi2sd xmm0, RC 818 | cvtsi2sd xmm0, RC
815 | movsd TMPQ, xmm0 819 | movsd TMPQ, xmm0
816 } else { 820 } else {
@@ -888,7 +892,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
888 | 892 |
889 |->vmeta_tsetb: 893 |->vmeta_tsetb:
890 | movzx RC, PC_RC 894 | movzx RC, PC_RC
891 if (sse) { 895 if (LJ_DUALNUM) {
896 | mov TMP2, LJ_TISNUM
897 | mov TMP1, RC
898 } else if (sse) {
892 | cvtsi2sd xmm0, RC 899 | cvtsi2sd xmm0, RC
893 | movsd TMPQ, xmm0 900 | movsd TMPQ, xmm0
894 } else { 901 } else {
@@ -1051,17 +1058,25 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1051 | mov FCARG1, L:RB 1058 | mov FCARG1, L:RB
1052 | mov FCARG2, dword [PC-4] 1059 | mov FCARG2, dword [PC-4]
1053 | mov SAVE_PC, PC 1060 | mov SAVE_PC, PC
1054 | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns op) 1061 | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins)
1055 | // 0/1 or TValue * (metamethod) returned in eax (RC). 1062 | // 0/1 or TValue * (metamethod) returned in eax (RC).
1056 | jmp <3 1063 | jmp <3
1057#endif 1064#endif
1058 | 1065 |
1059 |//-- Arithmetic metamethods --------------------------------------------- 1066 |//-- Arithmetic metamethods ---------------------------------------------
1060 | 1067 |
1068 |->vmeta_arith_vno:
1069#if LJ_DUALNUM
1070 | movzx RB, PC_RB
1071#endif
1061 |->vmeta_arith_vn: 1072 |->vmeta_arith_vn:
1062 | lea RC, [KBASE+RC*8] 1073 | lea RC, [KBASE+RC*8]
1063 | jmp >1 1074 | jmp >1
1064 | 1075 |
1076 |->vmeta_arith_nvo:
1077#if LJ_DUALNUM
1078 | movzx RC, PC_RC
1079#endif
1065 |->vmeta_arith_nv: 1080 |->vmeta_arith_nv:
1066 | lea RC, [KBASE+RC*8] 1081 | lea RC, [KBASE+RC*8]
1067 | lea RB, [BASE+RB*8] 1082 | lea RB, [BASE+RB*8]
@@ -1073,6 +1088,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1073 | mov RB, RC 1088 | mov RB, RC
1074 | jmp >2 1089 | jmp >2
1075 | 1090 |
1091 |->vmeta_arith_vvo:
1092#if LJ_DUALNUM
1093 | movzx RB, PC_RB
1094#endif
1076 |->vmeta_arith_vv: 1095 |->vmeta_arith_vv:
1077 | lea RC, [BASE+RC*8] 1096 | lea RC, [BASE+RC*8]
1078 |1: 1097 |1:
@@ -1210,20 +1229,20 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1210 | 1229 |
1211 |.macro .ffunc_n, name 1230 |.macro .ffunc_n, name
1212 | .ffunc_1 name 1231 | .ffunc_1 name
1213 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback 1232 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1214 | fld qword [BASE] 1233 | fld qword [BASE]
1215 |.endmacro 1234 |.endmacro
1216 | 1235 |
1217 |.macro .ffunc_n, name, op 1236 |.macro .ffunc_n, name, op
1218 | .ffunc_1 name 1237 | .ffunc_1 name
1219 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback 1238 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1220 | op 1239 | op
1221 | fld qword [BASE] 1240 | fld qword [BASE]
1222 |.endmacro 1241 |.endmacro
1223 | 1242 |
1224 |.macro .ffunc_nsse, name, op 1243 |.macro .ffunc_nsse, name, op
1225 | .ffunc_1 name 1244 | .ffunc_1 name
1226 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback 1245 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1227 | op xmm0, qword [BASE] 1246 | op xmm0, qword [BASE]
1228 |.endmacro 1247 |.endmacro
1229 | 1248 |
@@ -1233,24 +1252,24 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1233 | 1252 |
1234 |.macro .ffunc_nn, name 1253 |.macro .ffunc_nn, name
1235 | .ffunc_2 name 1254 | .ffunc_2 name
1236 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback 1255 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1237 | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback 1256 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
1238 | fld qword [BASE] 1257 | fld qword [BASE]
1239 | fld qword [BASE+8] 1258 | fld qword [BASE+8]
1240 |.endmacro 1259 |.endmacro
1241 | 1260 |
1242 |.macro .ffunc_nnsse, name 1261 |.macro .ffunc_nnsse, name
1243 | .ffunc_2 name 1262 | .ffunc_2 name
1244 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback 1263 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1245 | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback 1264 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
1246 | movsd xmm0, qword [BASE] 1265 | movsd xmm0, qword [BASE]
1247 | movsd xmm1, qword [BASE+8] 1266 | movsd xmm1, qword [BASE+8]
1248 |.endmacro 1267 |.endmacro
1249 | 1268 |
1250 |.macro .ffunc_nnr, name 1269 |.macro .ffunc_nnr, name
1251 | .ffunc_2 name 1270 | .ffunc_2 name
1252 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback 1271 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1253 | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback 1272 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
1254 | fld qword [BASE+8] 1273 | fld qword [BASE+8]
1255 | fld qword [BASE] 1274 | fld qword [BASE]
1256 |.endmacro 1275 |.endmacro
@@ -1431,7 +1450,15 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1431 |.ffunc tonumber 1450 |.ffunc tonumber
1432 | // Only handles the number case inline (without a base argument). 1451 | // Only handles the number case inline (without a base argument).
1433 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 1452 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
1434 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback 1453 | cmp dword [BASE+4], LJ_TISNUM
1454 if (LJ_DUALNUM) {
1455 | jne >1
1456 | mov RB, dword [BASE]; jmp ->fff_resi
1457 |1:
1458 | ja ->fff_fallback
1459 } else {
1460 | jae ->fff_fallback
1461 }
1435 if (sse) { 1462 if (sse) {
1436 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 1463 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1437 } else { 1464 } else {
@@ -1460,7 +1487,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1460 | mov FCARG2, BASE // Otherwise: FCARG2 == BASE 1487 | mov FCARG2, BASE // Otherwise: FCARG2 == BASE
1461 |.endif 1488 |.endif
1462 | mov L:FCARG1, L:RB 1489 | mov L:FCARG1, L:RB
1463 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) 1490 if (LJ_DUALNUM) {
1491 | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o)
1492 } else {
1493 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np)
1494 }
1464 | // GCstr returned in eax (RD). 1495 | // GCstr returned in eax (RD).
1465 | mov BASE, L:RB->base 1496 | mov BASE, L:RB->base
1466 | jmp <2 1497 | jmp <2
@@ -1538,9 +1569,19 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1538 | 1569 |
1539 |.ffunc_1 ipairs_aux 1570 |.ffunc_1 ipairs_aux
1540 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback 1571 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1541 | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback 1572 | cmp dword [BASE+12], LJ_TISNUM
1573 if (LJ_DUALNUM) {
1574 | jne ->fff_fallback
1575 } else {
1576 | jae ->fff_fallback
1577 }
1542 | mov PC, [BASE-4] 1578 | mov PC, [BASE-4]
1543 if (sse) { 1579 if (LJ_DUALNUM) {
1580 | mov RD, dword [BASE+8]
1581 | add RD, 1
1582 | mov dword [BASE-4], LJ_TISNUM
1583 | mov dword [BASE-8], RD
1584 } else if (sse) {
1544 | movsd xmm0, qword [BASE+8] 1585 | movsd xmm0, qword [BASE+8]
1545 | sseconst_1 xmm1, RBa 1586 | sseconst_1 xmm1, RBa
1546 | addsd xmm0, xmm1 1587 | addsd xmm0, xmm1
@@ -1598,7 +1639,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1598 | mov PC, [BASE-4] 1639 | mov PC, [BASE-4]
1599 | mov dword [BASE-4], LJ_TFUNC 1640 | mov dword [BASE-4], LJ_TFUNC
1600 | mov [BASE-8], CFUNC:RD 1641 | mov [BASE-8], CFUNC:RD
1601 if (sse) { 1642 if (LJ_DUALNUM) {
1643 | mov dword [BASE+12], LJ_TISNUM
1644 | mov dword [BASE+8], 0
1645 } else if (sse) {
1602 | xorps xmm0, xmm0 1646 | xorps xmm0, xmm0
1603 | movsd qword [BASE+8], xmm0 1647 | movsd qword [BASE+8], xmm0
1604 } else { 1648 } else {
@@ -1829,13 +1873,39 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1829 | 1873 |
1830 |//-- Math library ------------------------------------------------------- 1874 |//-- Math library -------------------------------------------------------
1831 | 1875 |
1876 if (!LJ_DUALNUM) {
1877 |->fff_resi: // Dummy.
1878 }
1832 if (sse) { 1879 if (sse) {
1833 |->fff_resn: 1880 |->fff_resn:
1834 | mov PC, [BASE-4] 1881 | mov PC, [BASE-4]
1835 | fstp qword [BASE-8] 1882 | fstp qword [BASE-8]
1836 | jmp ->fff_res1 1883 | jmp ->fff_res1
1837 | 1884 }
1838 |.ffunc_nsse math_abs 1885 | .ffunc_1 math_abs
1886 if (LJ_DUALNUM) {
1887 | cmp dword [BASE+4], LJ_TISNUM; jne >2
1888 | mov RB, dword [BASE]
1889 | cmp RB, 0; jns ->fff_resi
1890 | neg RB; js >1
1891 |->fff_resbit:
1892 |->fff_resi:
1893 | mov PC, [BASE-4]
1894 | mov dword [BASE-4], LJ_TISNUM
1895 | mov dword [BASE-8], RB
1896 | jmp ->fff_res1
1897 |1:
1898 | mov PC, [BASE-4]
1899 | mov dword [BASE-4], 0x41e00000 // 2^31.
1900 | mov dword [BASE-8], 0
1901 | jmp ->fff_res1
1902 |2:
1903 | ja ->fff_fallback
1904 } else {
1905 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1906 }
1907 if (sse) {
1908 | movsd xmm0, qword [BASE]
1839 | sseconst_abs xmm1, RDa 1909 | sseconst_abs xmm1, RDa
1840 | andps xmm0, xmm1 1910 | andps xmm0, xmm1
1841 |->fff_resxmm0: 1911 |->fff_resxmm0:
@@ -1843,7 +1913,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1843 | movsd qword [BASE-8], xmm0 1913 | movsd qword [BASE-8], xmm0
1844 | // fallthrough 1914 | // fallthrough
1845 } else { 1915 } else {
1846 |.ffunc_n math_abs 1916 | fld qword [BASE]
1847 | fabs 1917 | fabs
1848 | // fallthrough 1918 | // fallthrough
1849 |->fff_resxmm0: // Dummy. 1919 |->fff_resxmm0: // Dummy.
@@ -1876,16 +1946,60 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1876 | mov RAa, -8 // Results start at BASE+RA = BASE-8. 1946 | mov RAa, -8 // Results start at BASE+RA = BASE-8.
1877 | jmp ->vm_return 1947 | jmp ->vm_return
1878 | 1948 |
1949 |.macro math_round, func
1950 | .ffunc math_ .. func
1951 ||if (LJ_DUALNUM) {
1952 | cmp dword [BASE+4], LJ_TISNUM; jne >1
1953 | mov RB, dword [BASE]; jmp ->fff_resi
1954 |1:
1955 | ja ->fff_fallback
1956 ||} else {
1957 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1958 ||}
1959 ||if (sse) {
1960 | movsd xmm0, qword [BASE]
1961 | call ->vm_ .. func
1962 || if (LJ_DUALNUM) {
1963 | cvtsd2si RB, xmm0
1964 | cmp RB, 0x80000000
1965 | jne ->fff_resi
1966 | cvtsi2sd xmm1, RB
1967 | ucomisd xmm0, xmm1
1968 | jp ->fff_resxmm0
1969 | je ->fff_resi
1970 || }
1971 | jmp ->fff_resxmm0
1972 ||} else {
1973 | fld qword [BASE]
1974 | call ->vm_ .. func
1975 || if (LJ_DUALNUM) {
1976 |.if not X64
1977 | fist ARG1
1978 | mov RB, ARG1
1979 | cmp RB, 0x80000000; jne >2
1980 | fdup
1981 | fild ARG1
1982 | fcomparepp
1983 | jp ->fff_resn
1984 | jne ->fff_resn
1985 |2:
1986 | fpop
1987 | jmp ->fff_resi
1988 |.endif
1989 || } else {
1990 | jmp ->fff_resn
1991 || }
1992 ||}
1993 |.endmacro
1994 |
1995 | math_round floor
1996 | math_round ceil
1997 |
1879 if (sse) { 1998 if (sse) {
1880 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 1999 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
1881 |.ffunc_nsse math_floor; call ->vm_floor; jmp ->fff_resxmm0
1882 |.ffunc_nsse math_ceil; call ->vm_ceil; jmp ->fff_resxmm0
1883 } else { 2000 } else {
1884 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn 2001 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
1885 |.ffunc_n math_floor; call ->vm_floor; jmp ->fff_resn
1886 |.ffunc_n math_ceil; call ->vm_ceil; jmp ->fff_resn
1887 } 2002 }
1888 |
1889 |.ffunc_n math_log, fldln2; fyl2x; jmp ->fff_resn 2003 |.ffunc_n math_log, fldln2; fyl2x; jmp ->fff_resn
1890 |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn 2004 |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn
1891 |.ffunc_n math_exp; call ->vm_exp; jmp ->fff_resn 2005 |.ffunc_n math_exp; call ->vm_exp; jmp ->fff_resn
@@ -1946,7 +2060,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1946 | 2060 |
1947 |.ffunc_1 math_frexp 2061 |.ffunc_1 math_frexp
1948 | mov RB, [BASE+4] 2062 | mov RB, [BASE+4]
1949 | cmp RB, LJ_TISNUM; ja ->fff_fallback 2063 | cmp RB, LJ_TISNUM; jae ->fff_fallback
1950 | mov PC, [BASE-4] 2064 | mov PC, [BASE-4]
1951 | mov RC, [BASE] 2065 | mov RC, [BASE]
1952 | mov [BASE-4], RB; mov [BASE-8], RC 2066 | mov [BASE-4], RB; mov [BASE-8], RC
@@ -2041,44 +2155,91 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2041 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn 2155 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
2042 } 2156 }
2043 | 2157 |
2044 |.macro math_minmax, name, cmovop, nocmovop, sseop 2158 |.macro math_minmax, name, cmovop, fcmovop, nofcmovop, sseop
2159 | .ffunc name
2160 | mov RA, 2
2161 | cmp dword [BASE+4], LJ_TISNUM
2162 ||if (LJ_DUALNUM) {
2163 | jne >4
2164 | mov RB, dword [BASE]
2165 |1: // Handle integers.
2166 | cmp RA, RD; jae ->fff_resi
2167 | cmp dword [BASE+RA*8-4], LJ_TISNUM; jne >3
2168 | cmp RB, dword [BASE+RA*8-8]
2169 | cmovop RB, dword [BASE+RA*8-8]
2170 | add RA, 1
2171 | jmp <1
2172 |3:
2173 | ja ->fff_fallback
2174 | // Convert intermediate result to number and continue below.
2045 ||if (sse) { 2175 ||if (sse) {
2046 |.ffunc_nsse name 2176 | cvtsi2sd xmm0, RB
2047 | mov RB, 2 2177 ||} else {
2048 |1: 2178 |.if not X64
2049 | cmp RB, RD 2179 | mov TMP1, RB
2050 | jae ->fff_resxmm0 2180 | fild TMP1
2051 | cmp dword [BASE+RB*8-4], LJ_TISNUM; ja ->fff_fallback 2181 |.endif
2052 | movsd xmm1, qword [BASE+RB*8-8] 2182 ||}
2183 | jmp >6
2184 |4:
2185 | ja ->fff_fallback
2186 ||} else {
2187 | jae ->fff_fallback
2188 ||}
2189 |
2190 ||if (sse) {
2191 | movsd xmm0, qword [BASE]
2192 |5: // Handle numbers or integers.
2193 | cmp RA, RD; jae ->fff_resxmm0
2194 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2195 ||if (LJ_DUALNUM) {
2196 | jb >6
2197 | ja ->fff_fallback
2198 | cvtsi2sd xmm1, dword [BASE+RA*8-8]
2199 | jmp >7
2200 ||} else {
2201 | jae ->fff_fallback
2202 ||}
2203 |6:
2204 | movsd xmm1, qword [BASE+RA*8-8]
2205 |7:
2053 | sseop xmm0, xmm1 2206 | sseop xmm0, xmm1
2054 | add RB, 1 2207 | add RA, 1
2055 | jmp <1 2208 | jmp <5
2056 ||} else { 2209 ||} else {
2057 |.if not X64 2210 |.if not X64
2058 |.ffunc_n name 2211 | fld qword [BASE]
2059 | mov RB, 2 2212 |5: // Handle numbers or integers.
2060 |1: 2213 | cmp RA, RD; jae ->fff_resn
2061 | cmp RB, RD 2214 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2062 | jae ->fff_resn 2215 ||if (LJ_DUALNUM) {
2063 | cmp dword [BASE+RB*8-4], LJ_TISNUM; ja >5 2216 | jb >6
2064 | fld qword [BASE+RB*8-8] 2217 | ja >9
2218 | fild dword [BASE+RA*8-8]
2219 | jmp >7
2220 ||} else {
2221 | jae >9
2222 ||}
2223 |6:
2224 | fld qword [BASE+RA*8-8]
2225 |7:
2065 ||if (cmov) { 2226 ||if (cmov) {
2066 | fucomi st1; cmovop st1; fpop1 2227 | fucomi st1; fcmovop st1; fpop1
2067 ||} else { 2228 ||} else {
2068 | push eax 2229 | push eax
2069 | fucom st1; fnstsw ax; test ah, 1; nocmovop >2; fxch; 2: ; fpop 2230 | fucom st1; fnstsw ax; test ah, 1; nofcmovop >2; fxch; 2: ; fpop
2070 | pop eax 2231 | pop eax
2071 ||} 2232 ||}
2072 | add RB, 1 2233 | add RA, 1
2073 | jmp <1 2234 | jmp <5
2074 |.endif 2235 |.endif
2075 ||} 2236 ||}
2076 |.endmacro 2237 |.endmacro
2077 | 2238 |
2078 | math_minmax math_min, fcmovnbe, jz, minsd 2239 | math_minmax math_min, cmovg, fcmovnbe, jz, minsd
2079 | math_minmax math_max, fcmovbe, jnz, maxsd 2240 | math_minmax math_max, cmovl, fcmovbe, jnz, maxsd
2080 if (!sse) { 2241 if (!sse) {
2081 |5: 2242 |9:
2082 | fpop; jmp ->fff_fallback 2243 | fpop; jmp ->fff_fallback
2083 } 2244 }
2084 | 2245 |
@@ -2087,7 +2248,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2087 |.ffunc_1 string_len 2248 |.ffunc_1 string_len
2088 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2249 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2089 | mov STR:RB, [BASE] 2250 | mov STR:RB, [BASE]
2090 if (sse) { 2251 if (LJ_DUALNUM) {
2252 | mov RB, dword STR:RB->len; jmp ->fff_resi
2253 } else if (sse) {
2091 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0 2254 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2092 } else { 2255 } else {
2093 | fild dword STR:RB->len; jmp ->fff_resn 2256 | fild dword STR:RB->len; jmp ->fff_resn
@@ -2101,7 +2264,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2101 | cmp dword STR:RB->len, 1 2264 | cmp dword STR:RB->len, 1
2102 | jb ->fff_res0 // Return no results for empty string. 2265 | jb ->fff_res0 // Return no results for empty string.
2103 | movzx RB, byte STR:RB[1] 2266 | movzx RB, byte STR:RB[1]
2104 if (sse) { 2267 if (LJ_DUALNUM) {
2268 | jmp ->fff_resi
2269 } else if (sse) {
2105 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 2270 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2106 } else { 2271 } else {
2107 | mov TMP1, RB; fild TMP1; jmp ->fff_resn 2272 | mov TMP1, RB; fild TMP1; jmp ->fff_resn
@@ -2110,12 +2275,19 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2110 |.ffunc string_char // Only handle the 1-arg case here. 2275 |.ffunc string_char // Only handle the 1-arg case here.
2111 | ffgccheck 2276 | ffgccheck
2112 | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg. 2277 | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
2113 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback 2278 | cmp dword [BASE+4], LJ_TISNUM
2114 if (sse) { 2279 if (LJ_DUALNUM) {
2280 | jne ->fff_fallback
2281 | mov RB, dword [BASE]
2282 | cmp RB, 255; ja ->fff_fallback
2283 | mov TMP2, RB
2284 } else if (sse) {
2285 | jae ->fff_fallback
2115 | cvttsd2si RB, qword [BASE] 2286 | cvttsd2si RB, qword [BASE]
2116 | cmp RB, 255; ja ->fff_fallback 2287 | cmp RB, 255; ja ->fff_fallback
2117 | mov TMP2, RB 2288 | mov TMP2, RB
2118 } else { 2289 } else {
2290 | jae ->fff_fallback
2119 | fld qword [BASE] 2291 | fld qword [BASE]
2120 | fistp TMP2 2292 | fistp TMP2
2121 | cmp TMP2, 255; ja ->fff_fallback 2293 | cmp TMP2, 255; ja ->fff_fallback
@@ -2151,21 +2323,34 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2151 | mov TMP2, -1 2323 | mov TMP2, -1
2152 | cmp NARGS:RD, 1+2; jb ->fff_fallback 2324 | cmp NARGS:RD, 1+2; jb ->fff_fallback
2153 | jna >1 2325 | jna >1
2154 | cmp dword [BASE+20], LJ_TISNUM; ja ->fff_fallback 2326 | cmp dword [BASE+20], LJ_TISNUM
2155 if (sse) { 2327 if (LJ_DUALNUM) {
2328 | jne ->fff_fallback
2329 | mov RB, dword [BASE+16]
2330 | mov TMP2, RB
2331 } else if (sse) {
2332 | jae ->fff_fallback
2156 | cvttsd2si RB, qword [BASE+16] 2333 | cvttsd2si RB, qword [BASE+16]
2157 | mov TMP2, RB 2334 | mov TMP2, RB
2158 } else { 2335 } else {
2336 | jae ->fff_fallback
2159 | fld qword [BASE+16] 2337 | fld qword [BASE+16]
2160 | fistp TMP2 2338 | fistp TMP2
2161 } 2339 }
2162 |1: 2340 |1:
2163 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2341 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2164 | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback 2342 | cmp dword [BASE+12], LJ_TISNUM
2343 if (LJ_DUALNUM) {
2344 | jne ->fff_fallback
2345 } else {
2346 | jae ->fff_fallback
2347 }
2165 | mov STR:RB, [BASE] 2348 | mov STR:RB, [BASE]
2166 | mov TMP3, STR:RB 2349 | mov TMP3, STR:RB
2167 | mov RB, STR:RB->len 2350 | mov RB, STR:RB->len
2168 if (sse) { 2351 if (LJ_DUALNUM) {
2352 | mov RA, dword [BASE+8]
2353 } else if (sse) {
2169 | cvttsd2si RA, qword [BASE+8] 2354 | cvttsd2si RA, qword [BASE+8]
2170 } else { 2355 } else {
2171 |.if not X64 2356 |.if not X64
@@ -2219,11 +2404,16 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2219 |.ffunc_2 string_rep // Only handle the 1-char case inline. 2404 |.ffunc_2 string_rep // Only handle the 1-char case inline.
2220 | ffgccheck 2405 | ffgccheck
2221 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2406 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2222 | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback 2407 | cmp dword [BASE+12], LJ_TISNUM
2223 | mov STR:RB, [BASE] 2408 | mov STR:RB, [BASE]
2224 if (sse) { 2409 if (LJ_DUALNUM) {
2410 | jne ->fff_fallback
2411 | mov RC, dword [BASE+8]
2412 } else if (sse) {
2413 | jae ->fff_fallback
2225 | cvttsd2si RC, qword [BASE+8] 2414 | cvttsd2si RC, qword [BASE+8]
2226 } else { 2415 } else {
2416 | jae ->fff_fallback
2227 | fld qword [BASE+8] 2417 | fld qword [BASE+8]
2228 | fistp TMP2 2418 | fistp TMP2
2229 | mov RC, TMP2 2419 | mov RC, TMP2
@@ -2320,7 +2510,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2320 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t) 2510 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2321 | // Length of table returned in eax (RD). 2511 | // Length of table returned in eax (RD).
2322 | mov BASE, RB // Restore BASE. 2512 | mov BASE, RB // Restore BASE.
2323 if (sse) { 2513 if (LJ_DUALNUM) {
2514 | mov RB, RD; jmp ->fff_resi
2515 } else if (sse) {
2324 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0 2516 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2325 } else { 2517 } else {
2326 |.if not X64 2518 |.if not X64
@@ -2332,49 +2524,85 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2332 | 2524 |
2333 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!). 2525 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
2334 | 2526 |
2335 if (sse) { 2527 |.macro .ffunc_bit, name, kind
2336 |.ffunc_nsse bit_tobit 2528 | .ffunc name
2337 | sseconst_tobit xmm1, RBa 2529 |.if kind == 2
2338 | addsd xmm0, xmm1 2530 ||if (sse) {
2339 | movd RB, xmm0 2531 | sseconst_tobit xmm1, RBa
2340 | cvtsi2sd xmm0, RB 2532 ||} else {
2341 | jmp ->fff_resxmm0 2533 | mov TMP1, TOBIT_BIAS
2342 } else { 2534 ||}
2343 |.if not X64 2535 |.endif
2344 |.ffunc_n bit_tobit 2536 | cmp dword [BASE+4], LJ_TISNUM
2345 | mov TMP1, TOBIT_BIAS 2537 ||if (LJ_DUALNUM) {
2346 | fadd TMP1 2538 | jne >1
2347 | fstp FPARG1 // 64 bit FP store. 2539 | mov RB, dword [BASE]
2348 | fild ARG1 // 32 bit integer load (s2lfwd ok). 2540 |.if kind > 0
2349 | jmp ->fff_resn 2541 | jmp >2
2350 |.endif 2542 |.else
2351 } 2543 | jmp ->fff_resbit
2352 | 2544 |.endif
2353 |.macro .ffunc_bit, name 2545 |1:
2546 | ja ->fff_fallback
2547 ||} else {
2548 | jae ->fff_fallback
2549 ||}
2354 ||if (sse) { 2550 ||if (sse) {
2355 | .ffunc_nsse name 2551 | movsd xmm0, qword [BASE]
2552 |.if kind < 2
2356 | sseconst_tobit xmm1, RBa 2553 | sseconst_tobit xmm1, RBa
2554 |.endif
2357 | addsd xmm0, xmm1 2555 | addsd xmm0, xmm1
2358 | movd RB, xmm0 2556 | movd RB, xmm0
2359 ||} else { 2557 ||} else {
2360 |.if not X64 2558 |.if not X64
2361 | .ffunc_n name 2559 | fld qword [BASE]
2560 |.if kind < 2
2362 | mov TMP1, TOBIT_BIAS 2561 | mov TMP1, TOBIT_BIAS
2562 |.endif
2363 | fadd TMP1 2563 | fadd TMP1
2364 | fstp FPARG1 2564 | fstp FPARG1
2565 |.if kind > 0
2365 | mov RB, ARG1 2566 | mov RB, ARG1
2366 |.endif 2567 |.endif
2568 |.endif
2367 ||} 2569 ||}
2570 |2:
2368 |.endmacro 2571 |.endmacro
2369 | 2572 |
2573 |.ffunc_bit bit_tobit, 0
2574 if (LJ_DUALNUM || sse) {
2575 if (!sse) {
2576 |.if not X64
2577 | mov RB, ARG1
2578 |.endif
2579 }
2580 | jmp ->fff_resbit
2581 } else {
2582 |.if not X64
2583 | fild ARG1
2584 | jmp ->fff_resn
2585 |.endif
2586 }
2587 |
2370 |.macro .ffunc_bit_op, name, ins 2588 |.macro .ffunc_bit_op, name, ins
2371 | .ffunc_bit name 2589 | .ffunc_bit name, 2
2372 | mov TMP2, NARGS:RD // Save for fallback. 2590 | mov TMP2, NARGS:RD // Save for fallback.
2373 | lea RD, [BASE+NARGS:RD*8-16] 2591 | lea RD, [BASE+NARGS:RD*8-16]
2374 |1: 2592 |1:
2375 | cmp RD, BASE 2593 | cmp RD, BASE
2376 | jbe ->fff_resbit 2594 | jbe ->fff_resbit
2377 | cmp dword [RD+4], LJ_TISNUM; ja ->fff_fallback_bit_op 2595 | cmp dword [RD+4], LJ_TISNUM
2596 ||if (LJ_DUALNUM) {
2597 | jne >2
2598 | ins RB, dword [RD]
2599 | sub RD, 8
2600 | jmp <1
2601 |2:
2602 | ja ->fff_fallback_bit_op
2603 ||} else {
2604 | jae ->fff_fallback_bit_op
2605 ||}
2378 ||if (sse) { 2606 ||if (sse) {
2379 | movsd xmm0, qword [RD] 2607 | movsd xmm0, qword [RD]
2380 | addsd xmm0, xmm1 2608 | addsd xmm0, xmm1
@@ -2396,13 +2624,15 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2396 |.ffunc_bit_op bit_bor, or 2624 |.ffunc_bit_op bit_bor, or
2397 |.ffunc_bit_op bit_bxor, xor 2625 |.ffunc_bit_op bit_bxor, xor
2398 | 2626 |
2399 |.ffunc_bit bit_bswap 2627 |.ffunc_bit bit_bswap, 1
2400 | bswap RB 2628 | bswap RB
2401 | jmp ->fff_resbit 2629 | jmp ->fff_resbit
2402 | 2630 |
2403 |.ffunc_bit bit_bnot 2631 |.ffunc_bit bit_bnot, 1
2404 | not RB 2632 | not RB
2405 if (sse) { 2633 if (LJ_DUALNUM) {
2634 | jmp ->fff_resbit
2635 } else if (sse) {
2406 |->fff_resbit: 2636 |->fff_resbit:
2407 | cvtsi2sd xmm0, RB 2637 | cvtsi2sd xmm0, RB
2408 | jmp ->fff_resxmm0 2638 | jmp ->fff_resxmm0
@@ -2420,12 +2650,16 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2420 | jmp ->fff_fallback 2650 | jmp ->fff_fallback
2421 | 2651 |
2422 |.macro .ffunc_bit_sh, name, ins 2652 |.macro .ffunc_bit_sh, name, ins
2423 ||if (sse) { 2653 ||if (LJ_DUALNUM) {
2654 | .ffunc_bit name, 1
2655 | // Note: no inline conversion from number for 2nd argument!
2656 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2657 | mov RA, dword [BASE+8]
2658 ||} else if (sse) {
2424 | .ffunc_nnsse name 2659 | .ffunc_nnsse name
2425 | sseconst_tobit xmm2, RBa 2660 | sseconst_tobit xmm2, RBa
2426 | addsd xmm0, xmm2 2661 | addsd xmm0, xmm2
2427 | addsd xmm1, xmm2 2662 | addsd xmm1, xmm2
2428 | mov RC, RA // Assumes RA is ecx.
2429 | movd RB, xmm0 2663 | movd RB, xmm0
2430 | movd RA, xmm1 2664 | movd RA, xmm1
2431 ||} else { 2665 ||} else {
@@ -2436,13 +2670,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2436 | fstp FPARG3 2670 | fstp FPARG3
2437 | fadd TMP1 2671 | fadd TMP1
2438 | fstp FPARG1 2672 | fstp FPARG1
2439 | mov RC, RA // Assumes RA is ecx.
2440 | mov RA, ARG3 2673 | mov RA, ARG3
2441 | mov RB, ARG1 2674 | mov RB, ARG1
2442 |.endif 2675 |.endif
2443 ||} 2676 ||}
2444 | ins RB, cl 2677 | ins RB, cl // Assumes RA is ecx.
2445 | mov RA, RC
2446 | jmp ->fff_resbit 2678 | jmp ->fff_resbit
2447 |.endmacro 2679 |.endmacro
2448 | 2680 |
@@ -3073,7 +3305,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
3073 | ret 3305 | ret
3074 |6: 3306 |6:
3075 | je <5 // x^1 ==> x 3307 | je <5 // x^1 ==> x
3076 | jb >7 3308 | jb >7 // x^0 ==> 1
3077 | neg eax 3309 | neg eax
3078 | call <1 3310 | call <1
3079 | sseconst_1 xmm1, RDa 3311 | sseconst_1 xmm1, RDa
@@ -3536,43 +3768,100 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
3536 3768
3537 /* Remember: all ops branch for a true comparison, fall through otherwise. */ 3769 /* Remember: all ops branch for a true comparison, fall through otherwise. */
3538 3770
3771 |.macro jmp_comp, lt, ge, le, gt, target
3772 ||switch (op) {
3773 ||case BC_ISLT:
3774 | lt target
3775 ||break;
3776 ||case BC_ISGE:
3777 | ge target
3778 ||break;
3779 ||case BC_ISLE:
3780 | le target
3781 ||break;
3782 ||case BC_ISGT:
3783 | gt target
3784 ||break;
3785 ||default: break; /* Shut up GCC. */
3786 ||}
3787 |.endmacro
3788
3539 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 3789 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
3540 | // RA = src1, RD = src2, JMP with RD = target 3790 | // RA = src1, RD = src2, JMP with RD = target
3541 | ins_AD 3791 | ins_AD
3542 | checknum RA, ->vmeta_comp 3792 if (LJ_DUALNUM) {
3543 | checknum RD, ->vmeta_comp 3793 | checkint RA, >7
3794 | checkint RD, >8
3795 | mov RB, dword [BASE+RA*8]
3796 | add PC, 4
3797 | cmp RB, dword [BASE+RD*8]
3798 | jmp_comp jge, jl, jg, jle, >9
3799 |6:
3800 | movzx RD, PC_RD
3801 | branchPC RD
3802 |9:
3803 | ins_next
3804 |
3805 |7: // RA is not an integer.
3806 | ja ->vmeta_comp
3807 | // RA is a number.
3808 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3809 | // RA is a number, RD is an integer.
3810 if (sse) {
3811 | cvtsi2sd xmm0, dword [BASE+RD*8]
3812 | jmp >2
3813 } else {
3814 | fld qword [BASE+RA*8]
3815 | fild dword [BASE+RD*8]
3816 | jmp >3
3817 }
3818 |
3819 |8: // RA is an integer, RD is not an integer.
3820 | ja ->vmeta_comp
3821 | // RA is an integer, RD is a number.
3822 if (sse) {
3823 | cvtsi2sd xmm1, dword [BASE+RA*8]
3824 | movsd xmm0, qword [BASE+RD*8]
3825 | add PC, 4
3826 | ucomisd xmm0, xmm1
3827 | jmp_comp jbe, ja, jb, jae, <9
3828 | jmp <6
3829 } else {
3830 | fild dword [BASE+RA*8]
3831 | jmp >2
3832 }
3833 } else {
3834 | checknum RA, ->vmeta_comp
3835 | checknum RD, ->vmeta_comp
3836 }
3544 if (sse) { 3837 if (sse) {
3838 |1:
3545 | movsd xmm0, qword [BASE+RD*8] 3839 | movsd xmm0, qword [BASE+RD*8]
3840 |2:
3546 | add PC, 4 3841 | add PC, 4
3547 | ucomisd xmm0, qword [BASE+RA*8] 3842 | ucomisd xmm0, qword [BASE+RA*8]
3843 |3:
3548 } else { 3844 } else {
3845 |1:
3549 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A. 3846 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
3847 |2:
3550 | fld qword [BASE+RD*8] 3848 | fld qword [BASE+RD*8]
3849 |3:
3551 | add PC, 4 3850 | add PC, 4
3552 | fcomparepp // eax (RD) modified! 3851 | fcomparepp // eax (RD) modified!
3553 } 3852 }
3554 | // Unordered: all of ZF CF PF set, ordered: PF clear. 3853 | // Unordered: all of ZF CF PF set, ordered: PF clear.
3555 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 3854 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
3556 switch (op) { 3855 if (LJ_DUALNUM) {
3557 case BC_ISLT: 3856 | jmp_comp jbe, ja, jb, jae, <9
3558 | jbe >2 3857 | jmp <6
3559 break; 3858 } else {
3560 case BC_ISGE: 3859 | jmp_comp jbe, ja, jb, jae, >1
3561 | ja >2 3860 | movzx RD, PC_RD
3562 break; 3861 | branchPC RD
3563 case BC_ISLE: 3862 |1:
3564 | jb >2 3863 | ins_next
3565 break;
3566 case BC_ISGT:
3567 | jae >2
3568 break;
3569 default: break; /* Shut up GCC. */
3570 } 3864 }
3571 |1:
3572 | movzx RD, PC_RD
3573 | branchPC RD
3574 |2:
3575 | ins_next
3576 break; 3865 break;
3577 3866
3578 case BC_ISEQV: case BC_ISNEV: 3867 case BC_ISEQV: case BC_ISNEV:
@@ -3580,14 +3869,61 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
3580 | ins_AD // RA = src1, RD = src2, JMP with RD = target 3869 | ins_AD // RA = src1, RD = src2, JMP with RD = target
3581 | mov RB, [BASE+RD*8+4] 3870 | mov RB, [BASE+RD*8+4]
3582 | add PC, 4 3871 | add PC, 4
3583 | cmp RB, LJ_TISNUM; ja >5 3872 if (LJ_DUALNUM) {
3584 | checknum RA, >5 3873 | cmp RB, LJ_TISNUM; jne >7
3874 | checkint RA, >8
3875 | mov RB, dword [BASE+RD*8]
3876 | cmp RB, dword [BASE+RA*8]
3877 if (vk) {
3878 | jne >9
3879 } else {
3880 | je >9
3881 }
3882 | movzx RD, PC_RD
3883 | branchPC RD
3884 |9:
3885 | ins_next
3886 |
3887 |7: // RD is not an integer.
3888 | ja >5
3889 | // RD is a number.
3890 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
3891 | // RD is a number, RA is an integer.
3892 if (sse) {
3893 | cvtsi2sd xmm0, dword [BASE+RA*8]
3894 } else {
3895 | fild dword [BASE+RA*8]
3896 }
3897 | jmp >2
3898 |
3899 |8: // RD is an integer, RA is not an integer.
3900 | ja >5
3901 | // RD is an integer, RA is a number.
3902 if (sse) {
3903 | cvtsi2sd xmm0, dword [BASE+RD*8]
3904 | ucomisd xmm0, qword [BASE+RA*8]
3905 } else {
3906 | fild dword [BASE+RD*8]
3907 | fld qword [BASE+RA*8]
3908 }
3909 | jmp >4
3910 |
3911 } else {
3912 | cmp RB, LJ_TISNUM; jae >5
3913 | checknum RA, >5
3914 }
3585 if (sse) { 3915 if (sse) {
3586 | movsd xmm0, qword [BASE+RD*8] 3916 |1:
3587 | ucomisd xmm0, qword [BASE+RA*8] 3917 | movsd xmm0, qword [BASE+RA*8]
3918 |2:
3919 | ucomisd xmm0, qword [BASE+RD*8]
3920 |4:
3588 } else { 3921 } else {
3922 |1:
3589 | fld qword [BASE+RA*8] 3923 | fld qword [BASE+RA*8]
3924 |2:
3590 | fld qword [BASE+RD*8] 3925 | fld qword [BASE+RD*8]
3926 |4:
3591 | fcomparepp // eax (RD) modified! 3927 | fcomparepp // eax (RD) modified!
3592 } 3928 }
3593 iseqne_fp: 3929 iseqne_fp:
@@ -3616,7 +3952,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
3616 | branchPC RD 3952 | branchPC RD
3617 |1: // EQ: Fallthrough to next instruction. 3953 |1: // EQ: Fallthrough to next instruction.
3618 } 3954 }
3619 | ins_next 3955 if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
3956 op == BC_ISEQN || op == BC_ISNEN)) {
3957 | jmp <9
3958 } else {
3959 | ins_next
3960 }
3620 | 3961 |
3621 if (op == BC_ISEQV || op == BC_ISNEV) { 3962 if (op == BC_ISEQV || op == BC_ISNEV) {
3622 |5: // Either or both types are not numbers. 3963 |5: // Either or both types are not numbers.
@@ -3652,7 +3993,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
3652 | jmp ->vmeta_equal // Handle __eq metamethod. 3993 | jmp ->vmeta_equal // Handle __eq metamethod.
3653 } else if (LJ_HASFFI) { 3994 } else if (LJ_HASFFI) {
3654 |3: 3995 |3:
3655 | cmp RB, LJ_TCDATA; jne <2 3996 | cmp RB, LJ_TCDATA
3997 if (LJ_DUALNUM && vk) {
3998 | jne <9
3999 } else {
4000 | jne <2
4001 }
3656 | jmp ->vmeta_equal_cd 4002 | jmp ->vmeta_equal_cd
3657 } 4003 }
3658 break; 4004 break;
@@ -3676,14 +4022,59 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
3676 | ins_AD // RA = src, RD = num const, JMP with RD = target 4022 | ins_AD // RA = src, RD = num const, JMP with RD = target
3677 | mov RB, [BASE+RA*8+4] 4023 | mov RB, [BASE+RA*8+4]
3678 | add PC, 4 4024 | add PC, 4
3679 | cmp RB, LJ_TISNUM; ja >3 4025 if (LJ_DUALNUM) {
4026 | cmp RB, LJ_TISNUM; jne >7
4027 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8
4028 | mov RB, dword [KBASE+RD*8]
4029 | cmp RB, dword [BASE+RA*8]
4030 if (vk) {
4031 | jne >9
4032 } else {
4033 | je >9
4034 }
4035 | movzx RD, PC_RD
4036 | branchPC RD
4037 |9:
4038 | ins_next
4039 |
4040 |7: // RA is not an integer.
4041 | ja >3
4042 | // RA is a number.
4043 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
4044 | // RA is a number, RD is an integer.
4045 if (sse) {
4046 | cvtsi2sd xmm0, dword [KBASE+RD*8]
4047 } else {
4048 | fild dword [KBASE+RD*8]
4049 }
4050 | jmp >2
4051 |
4052 |8: // RA is an integer, RD is a number.
4053 if (sse) {
4054 | cvtsi2sd xmm0, dword [BASE+RA*8]
4055 | ucomisd xmm0, qword [KBASE+RD*8]
4056 } else {
4057 | fild dword [BASE+RA*8]
4058 | fld qword [BASE+RD*8]
4059 }
4060 | jmp >4
4061 |
4062 } else {
4063 | cmp RB, LJ_TISNUM; jae >3
4064 }
3680 if (sse) { 4065 if (sse) {
4066 |1:
3681 | movsd xmm0, qword [KBASE+RD*8] 4067 | movsd xmm0, qword [KBASE+RD*8]
4068 |2:
3682 | ucomisd xmm0, qword [BASE+RA*8] 4069 | ucomisd xmm0, qword [BASE+RA*8]
4070 |4:
3683 } else { 4071 } else {
3684 | fld qword [BASE+RA*8] 4072 |1:
3685 | fld qword [KBASE+RD*8] 4073 | fld qword [KBASE+RD*8]
4074 |2:
4075 | fld qword [BASE+RA*8]
3686 | fcomparepp // eax (RD) modified! 4076 | fcomparepp // eax (RD) modified!
4077 |4:
3687 } 4078 }
3688 goto iseqne_fp; 4079 goto iseqne_fp;
3689 case BC_ISEQP: case BC_ISNEP: 4080 case BC_ISEQP: case BC_ISNEP:
@@ -3760,7 +4151,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
3760 break; 4151 break;
3761 case BC_UNM: 4152 case BC_UNM:
3762 | ins_AD // RA = dst, RD = src 4153 | ins_AD // RA = dst, RD = src
3763 | checknum RD, ->vmeta_unm 4154 if (LJ_DUALNUM) {
4155 | checkint RD, >5
4156 | mov RB, [BASE+RD*8]
4157 | neg RB
4158 | jo >4
4159 | mov dword [BASE+RA*8+4], LJ_TISNUM
4160 | mov dword [BASE+RA*8], RB
4161 |9:
4162 | ins_next
4163 |4:
4164 | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31.
4165 | mov dword [BASE+RA*8], 0
4166 | jmp <9
4167 |5:
4168 | ja ->vmeta_unm
4169 } else {
4170 | checknum RD, ->vmeta_unm
4171 }
3764 if (sse) { 4172 if (sse) {
3765 | movsd xmm0, qword [BASE+RD*8] 4173 | movsd xmm0, qword [BASE+RD*8]
3766 | sseconst_sign xmm1, RDa 4174 | sseconst_sign xmm1, RDa
@@ -3771,13 +4179,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
3771 | fchs 4179 | fchs
3772 | fstp qword [BASE+RA*8] 4180 | fstp qword [BASE+RA*8]
3773 } 4181 }
3774 | ins_next 4182 if (LJ_DUALNUM) {
4183 | jmp <9
4184 } else {
4185 | ins_next
4186 }
3775 break; 4187 break;
3776 case BC_LEN: 4188 case BC_LEN:
3777 | ins_AD // RA = dst, RD = src 4189 | ins_AD // RA = dst, RD = src
3778 | checkstr RD, >2 4190 | checkstr RD, >2
3779 | mov STR:RD, [BASE+RD*8] 4191 | mov STR:RD, [BASE+RD*8]
3780 if (sse) { 4192 if (LJ_DUALNUM) {
4193 | mov RD, dword STR:RD->len
4194 |1:
4195 | mov dword [BASE+RA*8+4], LJ_TISNUM
4196 | mov dword [BASE+RA*8], RD
4197 } else if (sse) {
3781 | xorps xmm0, xmm0 4198 | xorps xmm0, xmm0
3782 | cvtsi2sd xmm0, dword STR:RD->len 4199 | cvtsi2sd xmm0, dword STR:RD->len
3783 |1: 4200 |1:
@@ -3793,45 +4210,52 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
3793 | mov TAB:FCARG1, [BASE+RD*8] 4210 | mov TAB:FCARG1, [BASE+RD*8]
3794 | mov RB, BASE // Save BASE. 4211 | mov RB, BASE // Save BASE.
3795 | call extern lj_tab_len@4 // (GCtab *t) 4212 | call extern lj_tab_len@4 // (GCtab *t)
3796 | // Length of table returned in eax (RC). 4213 | // Length of table returned in eax (RD).
3797 if (sse) { 4214 if (LJ_DUALNUM) {
3798 | cvtsi2sd xmm0, RC 4215 | // Nothing to do.
3799 | mov BASE, RB // Restore BASE. 4216 } else if (sse) {
4217 | cvtsi2sd xmm0, RD
3800 } else { 4218 } else {
3801 |.if not X64 4219 |.if not X64
3802 | mov ARG1, RC 4220 | mov ARG1, RD
3803 | mov BASE, RB // Restore BASE.
3804 | fild ARG1 4221 | fild ARG1
3805 |.endif 4222 |.endif
3806 } 4223 }
4224 | mov BASE, RB // Restore BASE.
3807 | movzx RA, PC_RA 4225 | movzx RA, PC_RA
3808 | jmp <1 4226 | jmp <1
3809 break; 4227 break;
3810 4228
3811 /* -- Binary ops -------------------------------------------------------- */ 4229 /* -- Binary ops -------------------------------------------------------- */
3812 4230
3813 |.macro ins_arithpre, ins, sseins, ssereg 4231 |.macro ins_arithpre, x87ins, sseins, ssereg
3814 | ins_ABC 4232 | ins_ABC
3815 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 4233 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3816 ||switch (vk) { 4234 ||switch (vk) {
3817 ||case 0: 4235 ||case 0:
3818 | checknum RB, ->vmeta_arith_vn 4236 | checknum RB, ->vmeta_arith_vn
4237 ||if (LJ_DUALNUM) {
4238 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
4239 ||}
3819 ||if (sse) { 4240 ||if (sse) {
3820 | movsd xmm0, qword [BASE+RB*8] 4241 | movsd xmm0, qword [BASE+RB*8]
3821 | sseins ssereg, qword [KBASE+RC*8] 4242 | sseins ssereg, qword [KBASE+RC*8]
3822 ||} else { 4243 ||} else {
3823 | fld qword [BASE+RB*8] 4244 | fld qword [BASE+RB*8]
3824 | ins qword [KBASE+RC*8] 4245 | x87ins qword [KBASE+RC*8]
3825 ||} 4246 ||}
3826 || break; 4247 || break;
3827 ||case 1: 4248 ||case 1:
3828 | checknum RB, ->vmeta_arith_nv 4249 | checknum RB, ->vmeta_arith_nv
4250 ||if (LJ_DUALNUM) {
4251 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
4252 ||}
3829 ||if (sse) { 4253 ||if (sse) {
3830 | movsd xmm0, qword [KBASE+RC*8] 4254 | movsd xmm0, qword [KBASE+RC*8]
3831 | sseins ssereg, qword [BASE+RB*8] 4255 | sseins ssereg, qword [BASE+RB*8]
3832 ||} else { 4256 ||} else {
3833 | fld qword [KBASE+RC*8] 4257 | fld qword [KBASE+RC*8]
3834 | ins qword [BASE+RB*8] 4258 | x87ins qword [BASE+RB*8]
3835 ||} 4259 ||}
3836 || break; 4260 || break;
3837 ||default: 4261 ||default:
@@ -3842,12 +4266,44 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
3842 | sseins ssereg, qword [BASE+RC*8] 4266 | sseins ssereg, qword [BASE+RC*8]
3843 ||} else { 4267 ||} else {
3844 | fld qword [BASE+RB*8] 4268 | fld qword [BASE+RB*8]
3845 | ins qword [BASE+RC*8] 4269 | x87ins qword [BASE+RC*8]
3846 ||} 4270 ||}
3847 || break; 4271 || break;
3848 ||} 4272 ||}
3849 |.endmacro 4273 |.endmacro
3850 | 4274 |
4275 |.macro ins_arithdn, intins
4276 | ins_ABC
4277 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
4278 ||switch (vk) {
4279 ||case 0:
4280 | checkint RB, ->vmeta_arith_vn
4281 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_vn
4282 | mov RB, [BASE+RB*8]
4283 | intins RB, [KBASE+RC*8]; jo ->vmeta_arith_vno
4284 || break;
4285 ||case 1:
4286 | checkint RB, ->vmeta_arith_nv
4287 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_nv
4288 | mov RC, [KBASE+RC*8]
4289 | intins RC, [BASE+RB*8]; jo ->vmeta_arith_nvo
4290 || break;
4291 ||default:
4292 | checkint RB, ->vmeta_arith_vv
4293 | checkint RC, ->vmeta_arith_vv
4294 | mov RB, [BASE+RB*8]
4295 | intins RB, [BASE+RC*8]; jo ->vmeta_arith_vvo
4296 || break;
4297 ||}
4298 | mov dword [BASE+RA*8+4], LJ_TISNUM
4299 ||if (vk == 1) {
4300 | mov dword [BASE+RA*8], RC
4301 ||} else {
4302 | mov dword [BASE+RA*8], RB
4303 ||}
4304 | ins_next
4305 |.endmacro
4306 |
3851 |.macro ins_arithpost 4307 |.macro ins_arithpost
3852 ||if (sse) { 4308 ||if (sse) {
3853 | movsd qword [BASE+RA*8], xmm0 4309 | movsd qword [BASE+RA*8], xmm0
@@ -3856,21 +4312,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
3856 ||} 4312 ||}
3857 |.endmacro 4313 |.endmacro
3858 | 4314 |
3859 |.macro ins_arith, ins, sseins 4315 |.macro ins_arith, x87ins, sseins
3860 | ins_arithpre ins, sseins, xmm0 4316 | ins_arithpre x87ins, sseins, xmm0
3861 | ins_arithpost 4317 | ins_arithpost
3862 | ins_next 4318 | ins_next
3863 |.endmacro 4319 |.endmacro
4320 |
4321 |.macro ins_arith, intins, x87ins, sseins
4322 ||if (LJ_DUALNUM) {
4323 | ins_arithdn intins
4324 ||} else {
4325 | ins_arith, x87ins, sseins
4326 ||}
4327 |.endmacro
3864 4328
3865 | // RA = dst, RB = src1 or num const, RC = src2 or num const 4329 | // RA = dst, RB = src1 or num const, RC = src2 or num const
3866 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 4330 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3867 | ins_arith fadd, addsd 4331 | ins_arith add, fadd, addsd
3868 break; 4332 break;
3869 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 4333 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3870 | ins_arith fsub, subsd 4334 | ins_arith sub, fsub, subsd
3871 break; 4335 break;
3872 case BC_MULVN: case BC_MULNV: case BC_MULVV: 4336 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3873 | ins_arith fmul, mulsd 4337 | ins_arith imul, fmul, mulsd
3874 break; 4338 break;
3875 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 4339 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3876 | ins_arith fdiv, divsd 4340 | ins_arith fdiv, divsd
@@ -3953,7 +4417,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
3953 break; 4417 break;
3954 case BC_KSHORT: 4418 case BC_KSHORT:
3955 | ins_AD // RA = dst, RD = signed int16 literal 4419 | ins_AD // RA = dst, RD = signed int16 literal
3956 if (sse) { 4420 if (LJ_DUALNUM) {
4421 | movsx RD, RDW
4422 | mov dword [BASE+RA*8+4], LJ_TISNUM
4423 | mov dword [BASE+RA*8], RD
4424 } else if (sse) {
3957 | movsx RD, RDW // Sign-extend literal. 4425 | movsx RD, RDW // Sign-extend literal.
3958 | cvtsi2sd xmm0, RD 4426 | cvtsi2sd xmm0, RD
3959 | movsd qword [BASE+RA*8], xmm0 4427 | movsd qword [BASE+RA*8], xmm0
@@ -4236,23 +4704,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4236 | checktab RB, ->vmeta_tgetv 4704 | checktab RB, ->vmeta_tgetv
4237 | mov TAB:RB, [BASE+RB*8] 4705 | mov TAB:RB, [BASE+RB*8]
4238 | 4706 |
4239 | // Integer key? Convert number to int and back and compare. 4707 | // Integer key?
4240 | checknum RC, >5 4708 if (LJ_DUALNUM) {
4241 if (sse) { 4709 | checkint RC, >5
4242 | movsd xmm0, qword [BASE+RC*8] 4710 | mov RC, dword [BASE+RC*8]
4243 | cvtsd2si RC, xmm0
4244 | cvtsi2sd xmm1, RC
4245 | ucomisd xmm0, xmm1
4246 } else { 4711 } else {
4247 |.if not X64 4712 | // Convert number to int and back and compare.
4248 | fld qword [BASE+RC*8] 4713 | checknum RC, >5
4249 | fist ARG1 4714 if (sse) {
4250 | fild ARG1 4715 | movsd xmm0, qword [BASE+RC*8]
4251 | fcomparepp // eax (RC) modified! 4716 | cvtsd2si RC, xmm0
4252 | mov RC, ARG1 4717 | cvtsi2sd xmm1, RC
4253 |.endif 4718 | ucomisd xmm0, xmm1
4719 } else {
4720 |.if not X64
4721 | fld qword [BASE+RC*8]
4722 | fist ARG1
4723 | fild ARG1
4724 | fcomparepp // eax (RC) modified!
4725 | mov RC, ARG1
4726 |.endif
4727 }
4728 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4254 } 4729 }
4255 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4256 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4730 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
4257 | jae ->vmeta_tgetv // Not in array part? Use fallback. 4731 | jae ->vmeta_tgetv // Not in array part? Use fallback.
4258 | shl RC, 3 4732 | shl RC, 3
@@ -4380,23 +4854,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4380 | checktab RB, ->vmeta_tsetv 4854 | checktab RB, ->vmeta_tsetv
4381 | mov TAB:RB, [BASE+RB*8] 4855 | mov TAB:RB, [BASE+RB*8]
4382 | 4856 |
4383 | // Integer key? Convert number to int and back and compare. 4857 | // Integer key?
4384 | checknum RC, >5 4858 if (LJ_DUALNUM) {
4385 if (sse) { 4859 | checkint RC, >5
4386 | movsd xmm0, qword [BASE+RC*8] 4860 | mov RC, dword [BASE+RC*8]
4387 | cvtsd2si RC, xmm0
4388 | cvtsi2sd xmm1, RC
4389 | ucomisd xmm0, xmm1
4390 } else { 4861 } else {
4391 |.if not X64 4862 | // Convert number to int and back and compare.
4392 | fld qword [BASE+RC*8] 4863 | checknum RC, >5
4393 | fist ARG1 4864 if (sse) {
4394 | fild ARG1 4865 | movsd xmm0, qword [BASE+RC*8]
4395 | fcomparepp // eax (RC) modified! 4866 | cvtsd2si RC, xmm0
4396 | mov RC, ARG1 4867 | cvtsi2sd xmm1, RC
4397 |.endif 4868 | ucomisd xmm0, xmm1
4869 } else {
4870 |.if not X64
4871 | fld qword [BASE+RC*8]
4872 | fist ARG1
4873 | fild ARG1
4874 | fcomparepp // eax (RC) modified!
4875 | mov RC, ARG1
4876 |.endif
4877 }
4878 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
4398 } 4879 }
4399 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
4400 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4880 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
4401 | jae ->vmeta_tsetv 4881 | jae ->vmeta_tsetv
4402 | shl RC, 3 4882 | shl RC, 3
@@ -4755,7 +5235,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4755 |1: // Traverse array part. 5235 |1: // Traverse array part.
4756 | cmp RC, DISPATCH; jae >5 // Index points after array part? 5236 | cmp RC, DISPATCH; jae >5 // Index points after array part?
4757 | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4 5237 | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4
4758 if (sse) { 5238 if (LJ_DUALNUM) {
5239 | mov dword [BASE+RA*8+4], LJ_TISNUM
5240 | mov dword [BASE+RA*8], RC
5241 } else if (sse) {
4759 | cvtsi2sd xmm0, RC 5242 | cvtsi2sd xmm0, RC
4760 } else { 5243 } else {
4761 | fild dword [BASE+RA*8-8] 5244 | fild dword [BASE+RA*8-8]
@@ -4772,7 +5255,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4772 |.endif 5255 |.endif
4773 | add RC, 1 5256 | add RC, 1
4774 | // Return array index as a numeric key. 5257 | // Return array index as a numeric key.
4775 if (sse) { 5258 if (LJ_DUALNUM) {
5259 | // See above.
5260 } else if (sse) {
4776 | movsd qword [BASE+RA*8], xmm0 5261 | movsd qword [BASE+RA*8], xmm0
4777 } else { 5262 } else {
4778 | fstp qword [BASE+RA*8] 5263 | fstp qword [BASE+RA*8]
@@ -4788,7 +5273,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4788 | 5273 |
4789 |4: // Skip holes in array part. 5274 |4: // Skip holes in array part.
4790 | add RC, 1 5275 | add RC, 1
4791 if (!sse) { 5276 if (!LJ_DUALNUM && !sse) {
4792 | mov [BASE+RA*8-8], RC 5277 | mov [BASE+RA*8-8], RC
4793 } 5278 }
4794 | jmp <1 5279 | jmp <1
@@ -5020,10 +5505,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
5020 5505
5021 /* -- Loops and branches ------------------------------------------------ */ 5506 /* -- Loops and branches ------------------------------------------------ */
5022 5507
5023 |.define FOR_IDX, qword [RA]; .define FOR_TIDX, dword [RA+4] 5508 |.define FOR_IDX, [RA]; .define FOR_TIDX, dword [RA+4]
5024 |.define FOR_STOP, qword [RA+8]; .define FOR_TSTOP, dword [RA+12] 5509 |.define FOR_STOP, [RA+8]; .define FOR_TSTOP, dword [RA+12]
5025 |.define FOR_STEP, qword [RA+16]; .define FOR_TSTEP, dword [RA+20] 5510 |.define FOR_STEP, [RA+16]; .define FOR_TSTEP, dword [RA+20]
5026 |.define FOR_EXT, qword [RA+24]; .define FOR_TEXT, dword [RA+28] 5511 |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28]
5027 5512
5028 case BC_FORL: 5513 case BC_FORL:
5029#if LJ_HASJIT 5514#if LJ_HASJIT
@@ -5042,37 +5527,101 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
5042 vk = (op == BC_IFORL || op == BC_JFORL); 5527 vk = (op == BC_IFORL || op == BC_JFORL);
5043 | ins_AJ // RA = base, RD = target (after end of loop or start of loop) 5528 | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
5044 | lea RA, [BASE+RA*8] 5529 | lea RA, [BASE+RA*8]
5530 if (LJ_DUALNUM) {
5531 | cmp FOR_TIDX, LJ_TISNUM; jne >9
5532 if (!vk) {
5533 | cmp FOR_TSTOP, LJ_TISNUM; jne ->vmeta_for
5534 | cmp FOR_TSTEP, LJ_TISNUM; jne ->vmeta_for
5535 | mov RB, dword FOR_IDX
5536 | cmp dword FOR_STEP, 0; jl >5
5537 } else {
5538 | mov RB, dword FOR_STEP
5539 | test RB, RB; js >5
5540 | add RB, dword FOR_IDX; jo >1
5541 | mov dword FOR_IDX, RB
5542 }
5543 | cmp RB, dword FOR_STOP
5544 | mov FOR_TEXT, LJ_TISNUM
5545 | mov dword FOR_EXT, RB
5546 if (op == BC_FORI) {
5547 | jle >7
5548 |1:
5549 |6:
5550 | branchPC RD
5551 } else if (op == BC_JFORI) {
5552 | branchPC RD
5553 | movzx RD, PC_RD
5554 | jle =>BC_JLOOP
5555 |1:
5556 |6:
5557 } else if (op == BC_IFORL) {
5558 | jg >7
5559 |6:
5560 | branchPC RD
5561 |1:
5562 } else {
5563 | jle =>BC_JLOOP
5564 |1:
5565 |6:
5566 }
5567 |7:
5568 | ins_next
5569 |
5570 |5: // Invert check for negative step.
5571 if (vk) {
5572 | add RB, dword FOR_IDX; jo <1
5573 | mov dword FOR_IDX, RB
5574 }
5575 | cmp RB, dword FOR_STOP
5576 | mov FOR_TEXT, LJ_TISNUM
5577 | mov dword FOR_EXT, RB
5578 if (op == BC_FORI) {
5579 | jge <7
5580 } else if (op == BC_JFORI) {
5581 | branchPC RD
5582 | movzx RD, PC_RD
5583 | jge =>BC_JLOOP
5584 } else if (op == BC_IFORL) {
5585 | jl <7
5586 } else {
5587 | jge =>BC_JLOOP
5588 }
5589 | jmp <6
5590 |9: // Fallback to FP variant.
5591 } else if (!vk) {
5592 | cmp FOR_TIDX, LJ_TISNUM
5593 }
5045 if (!vk) { 5594 if (!vk) {
5046 | cmp FOR_TIDX, LJ_TISNUM; ja ->vmeta_for // Type checks 5595 | jae ->vmeta_for
5047 | cmp FOR_TSTOP, LJ_TISNUM; ja ->vmeta_for 5596 | cmp FOR_TSTOP, LJ_TISNUM; jae ->vmeta_for
5048 } 5597 }
5049 | mov RB, FOR_TSTEP // Load type/hiword of for step. 5598 | mov RB, FOR_TSTEP // Load type/hiword of for step.
5050 if (!vk) { 5599 if (!vk) {
5051 | cmp RB, LJ_TISNUM; ja ->vmeta_for 5600 | cmp RB, LJ_TISNUM; jae ->vmeta_for
5052 } 5601 }
5053 if (sse) { 5602 if (sse) {
5054 | movsd xmm0, FOR_IDX 5603 | movsd xmm0, qword FOR_IDX
5055 | movsd xmm1, FOR_STOP 5604 | movsd xmm1, qword FOR_STOP
5056 if (vk) { 5605 if (vk) {
5057 | addsd xmm0, FOR_STEP 5606 | addsd xmm0, qword FOR_STEP
5058 | movsd FOR_IDX, xmm0 5607 | movsd qword FOR_IDX, xmm0
5059 | test RB, RB; js >3 5608 | test RB, RB; js >3
5060 } else { 5609 } else {
5061 | jl >3 5610 | jl >3
5062 } 5611 }
5063 | ucomisd xmm1, xmm0 5612 | ucomisd xmm1, xmm0
5064 |1: 5613 |1:
5065 | movsd FOR_EXT, xmm0 5614 | movsd qword FOR_EXT, xmm0
5066 } else { 5615 } else {
5067 | fld FOR_STOP 5616 | fld qword FOR_STOP
5068 | fld FOR_IDX 5617 | fld qword FOR_IDX
5069 if (vk) { 5618 if (vk) {
5070 | fadd FOR_STEP // nidx = idx + step 5619 | fadd qword FOR_STEP // nidx = idx + step
5071 | fst FOR_IDX 5620 | fst qword FOR_IDX
5072 | fst FOR_EXT 5621 | fst qword FOR_EXT
5073 | test RB, RB; js >1 5622 | test RB, RB; js >1
5074 } else { 5623 } else {
5075 | fst FOR_EXT 5624 | fst qword FOR_EXT
5076 | jl >1 5625 | jl >1
5077 } 5626 }
5078 | fxch // Swap lim/(n)idx if step non-negative. 5627 | fxch // Swap lim/(n)idx if step non-negative.
@@ -5083,20 +5632,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
5083 } 5632 }
5084 } 5633 }
5085 if (op == BC_FORI) { 5634 if (op == BC_FORI) {
5086 | jnb >2 5635 if (LJ_DUALNUM) {
5087 | branchPC RD 5636 | jnb <7
5637 } else {
5638 | jnb >2
5639 | branchPC RD
5640 }
5088 } else if (op == BC_JFORI) { 5641 } else if (op == BC_JFORI) {
5089 | branchPC RD 5642 | branchPC RD
5090 | movzx RD, PC_RD 5643 | movzx RD, PC_RD
5091 | jnb =>BC_JLOOP 5644 | jnb =>BC_JLOOP
5092 } else if (op == BC_IFORL) { 5645 } else if (op == BC_IFORL) {
5093 | jb >2 5646 if (LJ_DUALNUM) {
5094 | branchPC RD 5647 | jb <7
5648 } else {
5649 | jb >2
5650 | branchPC RD
5651 }
5095 } else { 5652 } else {
5096 | jnb =>BC_JLOOP 5653 | jnb =>BC_JLOOP
5097 } 5654 }
5098 |2: 5655 if (LJ_DUALNUM) {
5099 | ins_next 5656 | jmp <6
5657 } else {
5658 |2:
5659 | ins_next
5660 }
5100 if (sse) { 5661 if (sse) {
5101 |3: // Invert comparison if step is negative. 5662 |3: // Invert comparison if step is negative.
5102 | ucomisd xmm0, xmm1 5663 | ucomisd xmm0, xmm1