diff options
Diffstat (limited to 'src/buildvm_x86.dasc')
-rw-r--r-- | src/buildvm_x86.dasc | 963 |
1 files changed, 762 insertions, 201 deletions
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc index 4a0bbeab..4d96423c 100644 --- a/src/buildvm_x86.dasc +++ b/src/buildvm_x86.dasc | |||
@@ -306,7 +306,8 @@ | |||
306 | | | 306 | | |
307 | |// Macros to test operand types. | 307 | |// Macros to test operand types. |
308 | |.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro | 308 | |.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro |
309 | |.macro checknum, reg, target; checktp reg, LJ_TISNUM; ja target; .endmacro | 309 | |.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro |
310 | |.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro | ||
310 | |.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro | 311 | |.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro |
311 | |.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro | 312 | |.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro |
312 | | | 313 | | |
@@ -810,7 +811,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
810 | | | 811 | | |
811 | |->vmeta_tgetb: | 812 | |->vmeta_tgetb: |
812 | | movzx RC, PC_RC | 813 | | movzx RC, PC_RC |
813 | if (sse) { | 814 | if (LJ_DUALNUM) { |
815 | | mov TMP2, LJ_TISNUM | ||
816 | | mov TMP1, RC | ||
817 | } else if (sse) { | ||
814 | | cvtsi2sd xmm0, RC | 818 | | cvtsi2sd xmm0, RC |
815 | | movsd TMPQ, xmm0 | 819 | | movsd TMPQ, xmm0 |
816 | } else { | 820 | } else { |
@@ -888,7 +892,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
888 | | | 892 | | |
889 | |->vmeta_tsetb: | 893 | |->vmeta_tsetb: |
890 | | movzx RC, PC_RC | 894 | | movzx RC, PC_RC |
891 | if (sse) { | 895 | if (LJ_DUALNUM) { |
896 | | mov TMP2, LJ_TISNUM | ||
897 | | mov TMP1, RC | ||
898 | } else if (sse) { | ||
892 | | cvtsi2sd xmm0, RC | 899 | | cvtsi2sd xmm0, RC |
893 | | movsd TMPQ, xmm0 | 900 | | movsd TMPQ, xmm0 |
894 | } else { | 901 | } else { |
@@ -1051,17 +1058,25 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1051 | | mov FCARG1, L:RB | 1058 | | mov FCARG1, L:RB |
1052 | | mov FCARG2, dword [PC-4] | 1059 | | mov FCARG2, dword [PC-4] |
1053 | | mov SAVE_PC, PC | 1060 | | mov SAVE_PC, PC |
1054 | | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns op) | 1061 | | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins) |
1055 | | // 0/1 or TValue * (metamethod) returned in eax (RC). | 1062 | | // 0/1 or TValue * (metamethod) returned in eax (RC). |
1056 | | jmp <3 | 1063 | | jmp <3 |
1057 | #endif | 1064 | #endif |
1058 | | | 1065 | | |
1059 | |//-- Arithmetic metamethods --------------------------------------------- | 1066 | |//-- Arithmetic metamethods --------------------------------------------- |
1060 | | | 1067 | | |
1068 | |->vmeta_arith_vno: | ||
1069 | #if LJ_DUALNUM | ||
1070 | | movzx RB, PC_RB | ||
1071 | #endif | ||
1061 | |->vmeta_arith_vn: | 1072 | |->vmeta_arith_vn: |
1062 | | lea RC, [KBASE+RC*8] | 1073 | | lea RC, [KBASE+RC*8] |
1063 | | jmp >1 | 1074 | | jmp >1 |
1064 | | | 1075 | | |
1076 | |->vmeta_arith_nvo: | ||
1077 | #if LJ_DUALNUM | ||
1078 | | movzx RC, PC_RC | ||
1079 | #endif | ||
1065 | |->vmeta_arith_nv: | 1080 | |->vmeta_arith_nv: |
1066 | | lea RC, [KBASE+RC*8] | 1081 | | lea RC, [KBASE+RC*8] |
1067 | | lea RB, [BASE+RB*8] | 1082 | | lea RB, [BASE+RB*8] |
@@ -1073,6 +1088,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1073 | | mov RB, RC | 1088 | | mov RB, RC |
1074 | | jmp >2 | 1089 | | jmp >2 |
1075 | | | 1090 | | |
1091 | |->vmeta_arith_vvo: | ||
1092 | #if LJ_DUALNUM | ||
1093 | | movzx RB, PC_RB | ||
1094 | #endif | ||
1076 | |->vmeta_arith_vv: | 1095 | |->vmeta_arith_vv: |
1077 | | lea RC, [BASE+RC*8] | 1096 | | lea RC, [BASE+RC*8] |
1078 | |1: | 1097 | |1: |
@@ -1210,20 +1229,20 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1210 | | | 1229 | | |
1211 | |.macro .ffunc_n, name | 1230 | |.macro .ffunc_n, name |
1212 | | .ffunc_1 name | 1231 | | .ffunc_1 name |
1213 | | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback | 1232 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
1214 | | fld qword [BASE] | 1233 | | fld qword [BASE] |
1215 | |.endmacro | 1234 | |.endmacro |
1216 | | | 1235 | | |
1217 | |.macro .ffunc_n, name, op | 1236 | |.macro .ffunc_n, name, op |
1218 | | .ffunc_1 name | 1237 | | .ffunc_1 name |
1219 | | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback | 1238 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
1220 | | op | 1239 | | op |
1221 | | fld qword [BASE] | 1240 | | fld qword [BASE] |
1222 | |.endmacro | 1241 | |.endmacro |
1223 | | | 1242 | | |
1224 | |.macro .ffunc_nsse, name, op | 1243 | |.macro .ffunc_nsse, name, op |
1225 | | .ffunc_1 name | 1244 | | .ffunc_1 name |
1226 | | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback | 1245 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
1227 | | op xmm0, qword [BASE] | 1246 | | op xmm0, qword [BASE] |
1228 | |.endmacro | 1247 | |.endmacro |
1229 | | | 1248 | | |
@@ -1233,24 +1252,24 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1233 | | | 1252 | | |
1234 | |.macro .ffunc_nn, name | 1253 | |.macro .ffunc_nn, name |
1235 | | .ffunc_2 name | 1254 | | .ffunc_2 name |
1236 | | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback | 1255 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
1237 | | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback | 1256 | | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback |
1238 | | fld qword [BASE] | 1257 | | fld qword [BASE] |
1239 | | fld qword [BASE+8] | 1258 | | fld qword [BASE+8] |
1240 | |.endmacro | 1259 | |.endmacro |
1241 | | | 1260 | | |
1242 | |.macro .ffunc_nnsse, name | 1261 | |.macro .ffunc_nnsse, name |
1243 | | .ffunc_2 name | 1262 | | .ffunc_2 name |
1244 | | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback | 1263 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
1245 | | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback | 1264 | | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback |
1246 | | movsd xmm0, qword [BASE] | 1265 | | movsd xmm0, qword [BASE] |
1247 | | movsd xmm1, qword [BASE+8] | 1266 | | movsd xmm1, qword [BASE+8] |
1248 | |.endmacro | 1267 | |.endmacro |
1249 | | | 1268 | | |
1250 | |.macro .ffunc_nnr, name | 1269 | |.macro .ffunc_nnr, name |
1251 | | .ffunc_2 name | 1270 | | .ffunc_2 name |
1252 | | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback | 1271 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
1253 | | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback | 1272 | | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback |
1254 | | fld qword [BASE+8] | 1273 | | fld qword [BASE+8] |
1255 | | fld qword [BASE] | 1274 | | fld qword [BASE] |
1256 | |.endmacro | 1275 | |.endmacro |
@@ -1431,7 +1450,15 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1431 | |.ffunc tonumber | 1450 | |.ffunc tonumber |
1432 | | // Only handles the number case inline (without a base argument). | 1451 | | // Only handles the number case inline (without a base argument). |
1433 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. | 1452 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. |
1434 | | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback | 1453 | | cmp dword [BASE+4], LJ_TISNUM |
1454 | if (LJ_DUALNUM) { | ||
1455 | | jne >1 | ||
1456 | | mov RB, dword [BASE]; jmp ->fff_resi | ||
1457 | |1: | ||
1458 | | ja ->fff_fallback | ||
1459 | } else { | ||
1460 | | jae ->fff_fallback | ||
1461 | } | ||
1435 | if (sse) { | 1462 | if (sse) { |
1436 | | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 | 1463 | | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 |
1437 | } else { | 1464 | } else { |
@@ -1460,7 +1487,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1460 | | mov FCARG2, BASE // Otherwise: FCARG2 == BASE | 1487 | | mov FCARG2, BASE // Otherwise: FCARG2 == BASE |
1461 | |.endif | 1488 | |.endif |
1462 | | mov L:FCARG1, L:RB | 1489 | | mov L:FCARG1, L:RB |
1463 | | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) | 1490 | if (LJ_DUALNUM) { |
1491 | | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) | ||
1492 | } else { | ||
1493 | | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) | ||
1494 | } | ||
1464 | | // GCstr returned in eax (RD). | 1495 | | // GCstr returned in eax (RD). |
1465 | | mov BASE, L:RB->base | 1496 | | mov BASE, L:RB->base |
1466 | | jmp <2 | 1497 | | jmp <2 |
@@ -1538,9 +1569,19 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1538 | | | 1569 | | |
1539 | |.ffunc_1 ipairs_aux | 1570 | |.ffunc_1 ipairs_aux |
1540 | | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback | 1571 | | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback |
1541 | | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback | 1572 | | cmp dword [BASE+12], LJ_TISNUM |
1573 | if (LJ_DUALNUM) { | ||
1574 | | jne ->fff_fallback | ||
1575 | } else { | ||
1576 | | jae ->fff_fallback | ||
1577 | } | ||
1542 | | mov PC, [BASE-4] | 1578 | | mov PC, [BASE-4] |
1543 | if (sse) { | 1579 | if (LJ_DUALNUM) { |
1580 | | mov RD, dword [BASE+8] | ||
1581 | | add RD, 1 | ||
1582 | | mov dword [BASE-4], LJ_TISNUM | ||
1583 | | mov dword [BASE-8], RD | ||
1584 | } else if (sse) { | ||
1544 | | movsd xmm0, qword [BASE+8] | 1585 | | movsd xmm0, qword [BASE+8] |
1545 | | sseconst_1 xmm1, RBa | 1586 | | sseconst_1 xmm1, RBa |
1546 | | addsd xmm0, xmm1 | 1587 | | addsd xmm0, xmm1 |
@@ -1598,7 +1639,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1598 | | mov PC, [BASE-4] | 1639 | | mov PC, [BASE-4] |
1599 | | mov dword [BASE-4], LJ_TFUNC | 1640 | | mov dword [BASE-4], LJ_TFUNC |
1600 | | mov [BASE-8], CFUNC:RD | 1641 | | mov [BASE-8], CFUNC:RD |
1601 | if (sse) { | 1642 | if (LJ_DUALNUM) { |
1643 | | mov dword [BASE+12], LJ_TISNUM | ||
1644 | | mov dword [BASE+8], 0 | ||
1645 | } else if (sse) { | ||
1602 | | xorps xmm0, xmm0 | 1646 | | xorps xmm0, xmm0 |
1603 | | movsd qword [BASE+8], xmm0 | 1647 | | movsd qword [BASE+8], xmm0 |
1604 | } else { | 1648 | } else { |
@@ -1829,13 +1873,39 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1829 | | | 1873 | | |
1830 | |//-- Math library ------------------------------------------------------- | 1874 | |//-- Math library ------------------------------------------------------- |
1831 | | | 1875 | | |
1876 | if (!LJ_DUALNUM) { | ||
1877 | |->fff_resi: // Dummy. | ||
1878 | } | ||
1832 | if (sse) { | 1879 | if (sse) { |
1833 | |->fff_resn: | 1880 | |->fff_resn: |
1834 | | mov PC, [BASE-4] | 1881 | | mov PC, [BASE-4] |
1835 | | fstp qword [BASE-8] | 1882 | | fstp qword [BASE-8] |
1836 | | jmp ->fff_res1 | 1883 | | jmp ->fff_res1 |
1837 | | | 1884 | } |
1838 | |.ffunc_nsse math_abs | 1885 | | .ffunc_1 math_abs |
1886 | if (LJ_DUALNUM) { | ||
1887 | | cmp dword [BASE+4], LJ_TISNUM; jne >2 | ||
1888 | | mov RB, dword [BASE] | ||
1889 | | cmp RB, 0; jns ->fff_resi | ||
1890 | | neg RB; js >1 | ||
1891 | |->fff_resbit: | ||
1892 | |->fff_resi: | ||
1893 | | mov PC, [BASE-4] | ||
1894 | | mov dword [BASE-4], LJ_TISNUM | ||
1895 | | mov dword [BASE-8], RB | ||
1896 | | jmp ->fff_res1 | ||
1897 | |1: | ||
1898 | | mov PC, [BASE-4] | ||
1899 | | mov dword [BASE-4], 0x41e00000 // 2^31. | ||
1900 | | mov dword [BASE-8], 0 | ||
1901 | | jmp ->fff_res1 | ||
1902 | |2: | ||
1903 | | ja ->fff_fallback | ||
1904 | } else { | ||
1905 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
1906 | } | ||
1907 | if (sse) { | ||
1908 | | movsd xmm0, qword [BASE] | ||
1839 | | sseconst_abs xmm1, RDa | 1909 | | sseconst_abs xmm1, RDa |
1840 | | andps xmm0, xmm1 | 1910 | | andps xmm0, xmm1 |
1841 | |->fff_resxmm0: | 1911 | |->fff_resxmm0: |
@@ -1843,7 +1913,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1843 | | movsd qword [BASE-8], xmm0 | 1913 | | movsd qword [BASE-8], xmm0 |
1844 | | // fallthrough | 1914 | | // fallthrough |
1845 | } else { | 1915 | } else { |
1846 | |.ffunc_n math_abs | 1916 | | fld qword [BASE] |
1847 | | fabs | 1917 | | fabs |
1848 | | // fallthrough | 1918 | | // fallthrough |
1849 | |->fff_resxmm0: // Dummy. | 1919 | |->fff_resxmm0: // Dummy. |
@@ -1876,16 +1946,60 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1876 | | mov RAa, -8 // Results start at BASE+RA = BASE-8. | 1946 | | mov RAa, -8 // Results start at BASE+RA = BASE-8. |
1877 | | jmp ->vm_return | 1947 | | jmp ->vm_return |
1878 | | | 1948 | | |
1949 | |.macro math_round, func | ||
1950 | | .ffunc math_ .. func | ||
1951 | ||if (LJ_DUALNUM) { | ||
1952 | | cmp dword [BASE+4], LJ_TISNUM; jne >1 | ||
1953 | | mov RB, dword [BASE]; jmp ->fff_resi | ||
1954 | |1: | ||
1955 | | ja ->fff_fallback | ||
1956 | ||} else { | ||
1957 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
1958 | ||} | ||
1959 | ||if (sse) { | ||
1960 | | movsd xmm0, qword [BASE] | ||
1961 | | call ->vm_ .. func | ||
1962 | || if (LJ_DUALNUM) { | ||
1963 | | cvtsd2si RB, xmm0 | ||
1964 | | cmp RB, 0x80000000 | ||
1965 | | jne ->fff_resi | ||
1966 | | cvtsi2sd xmm1, RB | ||
1967 | | ucomisd xmm0, xmm1 | ||
1968 | | jp ->fff_resxmm0 | ||
1969 | | je ->fff_resi | ||
1970 | || } | ||
1971 | | jmp ->fff_resxmm0 | ||
1972 | ||} else { | ||
1973 | | fld qword [BASE] | ||
1974 | | call ->vm_ .. func | ||
1975 | || if (LJ_DUALNUM) { | ||
1976 | |.if not X64 | ||
1977 | | fist ARG1 | ||
1978 | | mov RB, ARG1 | ||
1979 | | cmp RB, 0x80000000; jne >2 | ||
1980 | | fdup | ||
1981 | | fild ARG1 | ||
1982 | | fcomparepp | ||
1983 | | jp ->fff_resn | ||
1984 | | jne ->fff_resn | ||
1985 | |2: | ||
1986 | | fpop | ||
1987 | | jmp ->fff_resi | ||
1988 | |.endif | ||
1989 | || } else { | ||
1990 | | jmp ->fff_resn | ||
1991 | || } | ||
1992 | ||} | ||
1993 | |.endmacro | ||
1994 | | | ||
1995 | | math_round floor | ||
1996 | | math_round ceil | ||
1997 | | | ||
1879 | if (sse) { | 1998 | if (sse) { |
1880 | |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 | 1999 | |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 |
1881 | |.ffunc_nsse math_floor; call ->vm_floor; jmp ->fff_resxmm0 | ||
1882 | |.ffunc_nsse math_ceil; call ->vm_ceil; jmp ->fff_resxmm0 | ||
1883 | } else { | 2000 | } else { |
1884 | |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn | 2001 | |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn |
1885 | |.ffunc_n math_floor; call ->vm_floor; jmp ->fff_resn | ||
1886 | |.ffunc_n math_ceil; call ->vm_ceil; jmp ->fff_resn | ||
1887 | } | 2002 | } |
1888 | | | ||
1889 | |.ffunc_n math_log, fldln2; fyl2x; jmp ->fff_resn | 2003 | |.ffunc_n math_log, fldln2; fyl2x; jmp ->fff_resn |
1890 | |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn | 2004 | |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn |
1891 | |.ffunc_n math_exp; call ->vm_exp; jmp ->fff_resn | 2005 | |.ffunc_n math_exp; call ->vm_exp; jmp ->fff_resn |
@@ -1946,7 +2060,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
1946 | | | 2060 | | |
1947 | |.ffunc_1 math_frexp | 2061 | |.ffunc_1 math_frexp |
1948 | | mov RB, [BASE+4] | 2062 | | mov RB, [BASE+4] |
1949 | | cmp RB, LJ_TISNUM; ja ->fff_fallback | 2063 | | cmp RB, LJ_TISNUM; jae ->fff_fallback |
1950 | | mov PC, [BASE-4] | 2064 | | mov PC, [BASE-4] |
1951 | | mov RC, [BASE] | 2065 | | mov RC, [BASE] |
1952 | | mov [BASE-4], RB; mov [BASE-8], RC | 2066 | | mov [BASE-4], RB; mov [BASE-8], RC |
@@ -2041,44 +2155,91 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2041 | |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn | 2155 | |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn |
2042 | } | 2156 | } |
2043 | | | 2157 | | |
2044 | |.macro math_minmax, name, cmovop, nocmovop, sseop | 2158 | |.macro math_minmax, name, cmovop, fcmovop, nofcmovop, sseop |
2159 | | .ffunc name | ||
2160 | | mov RA, 2 | ||
2161 | | cmp dword [BASE+4], LJ_TISNUM | ||
2162 | ||if (LJ_DUALNUM) { | ||
2163 | | jne >4 | ||
2164 | | mov RB, dword [BASE] | ||
2165 | |1: // Handle integers. | ||
2166 | | cmp RA, RD; jae ->fff_resi | ||
2167 | | cmp dword [BASE+RA*8-4], LJ_TISNUM; jne >3 | ||
2168 | | cmp RB, dword [BASE+RA*8-8] | ||
2169 | | cmovop RB, dword [BASE+RA*8-8] | ||
2170 | | add RA, 1 | ||
2171 | | jmp <1 | ||
2172 | |3: | ||
2173 | | ja ->fff_fallback | ||
2174 | | // Convert intermediate result to number and continue below. | ||
2045 | ||if (sse) { | 2175 | ||if (sse) { |
2046 | |.ffunc_nsse name | 2176 | | cvtsi2sd xmm0, RB |
2047 | | mov RB, 2 | 2177 | ||} else { |
2048 | |1: | 2178 | |.if not X64 |
2049 | | cmp RB, RD | 2179 | | mov TMP1, RB |
2050 | | jae ->fff_resxmm0 | 2180 | | fild TMP1 |
2051 | | cmp dword [BASE+RB*8-4], LJ_TISNUM; ja ->fff_fallback | 2181 | |.endif |
2052 | | movsd xmm1, qword [BASE+RB*8-8] | 2182 | ||} |
2183 | | jmp >6 | ||
2184 | |4: | ||
2185 | | ja ->fff_fallback | ||
2186 | ||} else { | ||
2187 | | jae ->fff_fallback | ||
2188 | ||} | ||
2189 | | | ||
2190 | ||if (sse) { | ||
2191 | | movsd xmm0, qword [BASE] | ||
2192 | |5: // Handle numbers or integers. | ||
2193 | | cmp RA, RD; jae ->fff_resxmm0 | ||
2194 | | cmp dword [BASE+RA*8-4], LJ_TISNUM | ||
2195 | ||if (LJ_DUALNUM) { | ||
2196 | | jb >6 | ||
2197 | | ja ->fff_fallback | ||
2198 | | cvtsi2sd xmm1, dword [BASE+RA*8-8] | ||
2199 | | jmp >7 | ||
2200 | ||} else { | ||
2201 | | jae ->fff_fallback | ||
2202 | ||} | ||
2203 | |6: | ||
2204 | | movsd xmm1, qword [BASE+RA*8-8] | ||
2205 | |7: | ||
2053 | | sseop xmm0, xmm1 | 2206 | | sseop xmm0, xmm1 |
2054 | | add RB, 1 | 2207 | | add RA, 1 |
2055 | | jmp <1 | 2208 | | jmp <5 |
2056 | ||} else { | 2209 | ||} else { |
2057 | |.if not X64 | 2210 | |.if not X64 |
2058 | |.ffunc_n name | 2211 | | fld qword [BASE] |
2059 | | mov RB, 2 | 2212 | |5: // Handle numbers or integers. |
2060 | |1: | 2213 | | cmp RA, RD; jae ->fff_resn |
2061 | | cmp RB, RD | 2214 | | cmp dword [BASE+RA*8-4], LJ_TISNUM |
2062 | | jae ->fff_resn | 2215 | ||if (LJ_DUALNUM) { |
2063 | | cmp dword [BASE+RB*8-4], LJ_TISNUM; ja >5 | 2216 | | jb >6 |
2064 | | fld qword [BASE+RB*8-8] | 2217 | | ja >9 |
2218 | | fild dword [BASE+RA*8-8] | ||
2219 | | jmp >7 | ||
2220 | ||} else { | ||
2221 | | jae >9 | ||
2222 | ||} | ||
2223 | |6: | ||
2224 | | fld qword [BASE+RA*8-8] | ||
2225 | |7: | ||
2065 | ||if (cmov) { | 2226 | ||if (cmov) { |
2066 | | fucomi st1; cmovop st1; fpop1 | 2227 | | fucomi st1; fcmovop st1; fpop1 |
2067 | ||} else { | 2228 | ||} else { |
2068 | | push eax | 2229 | | push eax |
2069 | | fucom st1; fnstsw ax; test ah, 1; nocmovop >2; fxch; 2: ; fpop | 2230 | | fucom st1; fnstsw ax; test ah, 1; nofcmovop >2; fxch; 2: ; fpop |
2070 | | pop eax | 2231 | | pop eax |
2071 | ||} | 2232 | ||} |
2072 | | add RB, 1 | 2233 | | add RA, 1 |
2073 | | jmp <1 | 2234 | | jmp <5 |
2074 | |.endif | 2235 | |.endif |
2075 | ||} | 2236 | ||} |
2076 | |.endmacro | 2237 | |.endmacro |
2077 | | | 2238 | | |
2078 | | math_minmax math_min, fcmovnbe, jz, minsd | 2239 | | math_minmax math_min, cmovg, fcmovnbe, jz, minsd |
2079 | | math_minmax math_max, fcmovbe, jnz, maxsd | 2240 | | math_minmax math_max, cmovl, fcmovbe, jnz, maxsd |
2080 | if (!sse) { | 2241 | if (!sse) { |
2081 | |5: | 2242 | |9: |
2082 | | fpop; jmp ->fff_fallback | 2243 | | fpop; jmp ->fff_fallback |
2083 | } | 2244 | } |
2084 | | | 2245 | | |
@@ -2087,7 +2248,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2087 | |.ffunc_1 string_len | 2248 | |.ffunc_1 string_len |
2088 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2249 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
2089 | | mov STR:RB, [BASE] | 2250 | | mov STR:RB, [BASE] |
2090 | if (sse) { | 2251 | if (LJ_DUALNUM) { |
2252 | | mov RB, dword STR:RB->len; jmp ->fff_resi | ||
2253 | } else if (sse) { | ||
2091 | | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0 | 2254 | | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0 |
2092 | } else { | 2255 | } else { |
2093 | | fild dword STR:RB->len; jmp ->fff_resn | 2256 | | fild dword STR:RB->len; jmp ->fff_resn |
@@ -2101,7 +2264,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2101 | | cmp dword STR:RB->len, 1 | 2264 | | cmp dword STR:RB->len, 1 |
2102 | | jb ->fff_res0 // Return no results for empty string. | 2265 | | jb ->fff_res0 // Return no results for empty string. |
2103 | | movzx RB, byte STR:RB[1] | 2266 | | movzx RB, byte STR:RB[1] |
2104 | if (sse) { | 2267 | if (LJ_DUALNUM) { |
2268 | | jmp ->fff_resi | ||
2269 | } else if (sse) { | ||
2105 | | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 | 2270 | | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 |
2106 | } else { | 2271 | } else { |
2107 | | mov TMP1, RB; fild TMP1; jmp ->fff_resn | 2272 | | mov TMP1, RB; fild TMP1; jmp ->fff_resn |
@@ -2110,12 +2275,19 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2110 | |.ffunc string_char // Only handle the 1-arg case here. | 2275 | |.ffunc string_char // Only handle the 1-arg case here. |
2111 | | ffgccheck | 2276 | | ffgccheck |
2112 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg. | 2277 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg. |
2113 | | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback | 2278 | | cmp dword [BASE+4], LJ_TISNUM |
2114 | if (sse) { | 2279 | if (LJ_DUALNUM) { |
2280 | | jne ->fff_fallback | ||
2281 | | mov RB, dword [BASE] | ||
2282 | | cmp RB, 255; ja ->fff_fallback | ||
2283 | | mov TMP2, RB | ||
2284 | } else if (sse) { | ||
2285 | | jae ->fff_fallback | ||
2115 | | cvttsd2si RB, qword [BASE] | 2286 | | cvttsd2si RB, qword [BASE] |
2116 | | cmp RB, 255; ja ->fff_fallback | 2287 | | cmp RB, 255; ja ->fff_fallback |
2117 | | mov TMP2, RB | 2288 | | mov TMP2, RB |
2118 | } else { | 2289 | } else { |
2290 | | jae ->fff_fallback | ||
2119 | | fld qword [BASE] | 2291 | | fld qword [BASE] |
2120 | | fistp TMP2 | 2292 | | fistp TMP2 |
2121 | | cmp TMP2, 255; ja ->fff_fallback | 2293 | | cmp TMP2, 255; ja ->fff_fallback |
@@ -2151,21 +2323,34 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2151 | | mov TMP2, -1 | 2323 | | mov TMP2, -1 |
2152 | | cmp NARGS:RD, 1+2; jb ->fff_fallback | 2324 | | cmp NARGS:RD, 1+2; jb ->fff_fallback |
2153 | | jna >1 | 2325 | | jna >1 |
2154 | | cmp dword [BASE+20], LJ_TISNUM; ja ->fff_fallback | 2326 | | cmp dword [BASE+20], LJ_TISNUM |
2155 | if (sse) { | 2327 | if (LJ_DUALNUM) { |
2328 | | jne ->fff_fallback | ||
2329 | | mov RB, dword [BASE+16] | ||
2330 | | mov TMP2, RB | ||
2331 | } else if (sse) { | ||
2332 | | jae ->fff_fallback | ||
2156 | | cvttsd2si RB, qword [BASE+16] | 2333 | | cvttsd2si RB, qword [BASE+16] |
2157 | | mov TMP2, RB | 2334 | | mov TMP2, RB |
2158 | } else { | 2335 | } else { |
2336 | | jae ->fff_fallback | ||
2159 | | fld qword [BASE+16] | 2337 | | fld qword [BASE+16] |
2160 | | fistp TMP2 | 2338 | | fistp TMP2 |
2161 | } | 2339 | } |
2162 | |1: | 2340 | |1: |
2163 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2341 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
2164 | | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback | 2342 | | cmp dword [BASE+12], LJ_TISNUM |
2343 | if (LJ_DUALNUM) { | ||
2344 | | jne ->fff_fallback | ||
2345 | } else { | ||
2346 | | jae ->fff_fallback | ||
2347 | } | ||
2165 | | mov STR:RB, [BASE] | 2348 | | mov STR:RB, [BASE] |
2166 | | mov TMP3, STR:RB | 2349 | | mov TMP3, STR:RB |
2167 | | mov RB, STR:RB->len | 2350 | | mov RB, STR:RB->len |
2168 | if (sse) { | 2351 | if (LJ_DUALNUM) { |
2352 | | mov RA, dword [BASE+8] | ||
2353 | } else if (sse) { | ||
2169 | | cvttsd2si RA, qword [BASE+8] | 2354 | | cvttsd2si RA, qword [BASE+8] |
2170 | } else { | 2355 | } else { |
2171 | |.if not X64 | 2356 | |.if not X64 |
@@ -2219,11 +2404,16 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2219 | |.ffunc_2 string_rep // Only handle the 1-char case inline. | 2404 | |.ffunc_2 string_rep // Only handle the 1-char case inline. |
2220 | | ffgccheck | 2405 | | ffgccheck |
2221 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2406 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
2222 | | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback | 2407 | | cmp dword [BASE+12], LJ_TISNUM |
2223 | | mov STR:RB, [BASE] | 2408 | | mov STR:RB, [BASE] |
2224 | if (sse) { | 2409 | if (LJ_DUALNUM) { |
2410 | | jne ->fff_fallback | ||
2411 | | mov RC, dword [BASE+8] | ||
2412 | } else if (sse) { | ||
2413 | | jae ->fff_fallback | ||
2225 | | cvttsd2si RC, qword [BASE+8] | 2414 | | cvttsd2si RC, qword [BASE+8] |
2226 | } else { | 2415 | } else { |
2416 | | jae ->fff_fallback | ||
2227 | | fld qword [BASE+8] | 2417 | | fld qword [BASE+8] |
2228 | | fistp TMP2 | 2418 | | fistp TMP2 |
2229 | | mov RC, TMP2 | 2419 | | mov RC, TMP2 |
@@ -2320,7 +2510,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2320 | | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t) | 2510 | | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t) |
2321 | | // Length of table returned in eax (RD). | 2511 | | // Length of table returned in eax (RD). |
2322 | | mov BASE, RB // Restore BASE. | 2512 | | mov BASE, RB // Restore BASE. |
2323 | if (sse) { | 2513 | if (LJ_DUALNUM) { |
2514 | | mov RB, RD; jmp ->fff_resi | ||
2515 | } else if (sse) { | ||
2324 | | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0 | 2516 | | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0 |
2325 | } else { | 2517 | } else { |
2326 | |.if not X64 | 2518 | |.if not X64 |
@@ -2332,49 +2524,85 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2332 | | | 2524 | | |
2333 | |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!). | 2525 | |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!). |
2334 | | | 2526 | | |
2335 | if (sse) { | 2527 | |.macro .ffunc_bit, name, kind |
2336 | |.ffunc_nsse bit_tobit | 2528 | | .ffunc name |
2337 | | sseconst_tobit xmm1, RBa | 2529 | |.if kind == 2 |
2338 | | addsd xmm0, xmm1 | 2530 | ||if (sse) { |
2339 | | movd RB, xmm0 | 2531 | | sseconst_tobit xmm1, RBa |
2340 | | cvtsi2sd xmm0, RB | 2532 | ||} else { |
2341 | | jmp ->fff_resxmm0 | 2533 | | mov TMP1, TOBIT_BIAS |
2342 | } else { | 2534 | ||} |
2343 | |.if not X64 | 2535 | |.endif |
2344 | |.ffunc_n bit_tobit | 2536 | | cmp dword [BASE+4], LJ_TISNUM |
2345 | | mov TMP1, TOBIT_BIAS | 2537 | ||if (LJ_DUALNUM) { |
2346 | | fadd TMP1 | 2538 | | jne >1 |
2347 | | fstp FPARG1 // 64 bit FP store. | 2539 | | mov RB, dword [BASE] |
2348 | | fild ARG1 // 32 bit integer load (s2lfwd ok). | 2540 | |.if kind > 0 |
2349 | | jmp ->fff_resn | 2541 | | jmp >2 |
2350 | |.endif | 2542 | |.else |
2351 | } | 2543 | | jmp ->fff_resbit |
2352 | | | 2544 | |.endif |
2353 | |.macro .ffunc_bit, name | 2545 | |1: |
2546 | | ja ->fff_fallback | ||
2547 | ||} else { | ||
2548 | | jae ->fff_fallback | ||
2549 | ||} | ||
2354 | ||if (sse) { | 2550 | ||if (sse) { |
2355 | | .ffunc_nsse name | 2551 | | movsd xmm0, qword [BASE] |
2552 | |.if kind < 2 | ||
2356 | | sseconst_tobit xmm1, RBa | 2553 | | sseconst_tobit xmm1, RBa |
2554 | |.endif | ||
2357 | | addsd xmm0, xmm1 | 2555 | | addsd xmm0, xmm1 |
2358 | | movd RB, xmm0 | 2556 | | movd RB, xmm0 |
2359 | ||} else { | 2557 | ||} else { |
2360 | |.if not X64 | 2558 | |.if not X64 |
2361 | | .ffunc_n name | 2559 | | fld qword [BASE] |
2560 | |.if kind < 2 | ||
2362 | | mov TMP1, TOBIT_BIAS | 2561 | | mov TMP1, TOBIT_BIAS |
2562 | |.endif | ||
2363 | | fadd TMP1 | 2563 | | fadd TMP1 |
2364 | | fstp FPARG1 | 2564 | | fstp FPARG1 |
2565 | |.if kind > 0 | ||
2365 | | mov RB, ARG1 | 2566 | | mov RB, ARG1 |
2366 | |.endif | 2567 | |.endif |
2568 | |.endif | ||
2367 | ||} | 2569 | ||} |
2570 | |2: | ||
2368 | |.endmacro | 2571 | |.endmacro |
2369 | | | 2572 | | |
2573 | |.ffunc_bit bit_tobit, 0 | ||
2574 | if (LJ_DUALNUM || sse) { | ||
2575 | if (!sse) { | ||
2576 | |.if not X64 | ||
2577 | | mov RB, ARG1 | ||
2578 | |.endif | ||
2579 | } | ||
2580 | | jmp ->fff_resbit | ||
2581 | } else { | ||
2582 | |.if not X64 | ||
2583 | | fild ARG1 | ||
2584 | | jmp ->fff_resn | ||
2585 | |.endif | ||
2586 | } | ||
2587 | | | ||
2370 | |.macro .ffunc_bit_op, name, ins | 2588 | |.macro .ffunc_bit_op, name, ins |
2371 | | .ffunc_bit name | 2589 | | .ffunc_bit name, 2 |
2372 | | mov TMP2, NARGS:RD // Save for fallback. | 2590 | | mov TMP2, NARGS:RD // Save for fallback. |
2373 | | lea RD, [BASE+NARGS:RD*8-16] | 2591 | | lea RD, [BASE+NARGS:RD*8-16] |
2374 | |1: | 2592 | |1: |
2375 | | cmp RD, BASE | 2593 | | cmp RD, BASE |
2376 | | jbe ->fff_resbit | 2594 | | jbe ->fff_resbit |
2377 | | cmp dword [RD+4], LJ_TISNUM; ja ->fff_fallback_bit_op | 2595 | | cmp dword [RD+4], LJ_TISNUM |
2596 | ||if (LJ_DUALNUM) { | ||
2597 | | jne >2 | ||
2598 | | ins RB, dword [RD] | ||
2599 | | sub RD, 8 | ||
2600 | | jmp <1 | ||
2601 | |2: | ||
2602 | | ja ->fff_fallback_bit_op | ||
2603 | ||} else { | ||
2604 | | jae ->fff_fallback_bit_op | ||
2605 | ||} | ||
2378 | ||if (sse) { | 2606 | ||if (sse) { |
2379 | | movsd xmm0, qword [RD] | 2607 | | movsd xmm0, qword [RD] |
2380 | | addsd xmm0, xmm1 | 2608 | | addsd xmm0, xmm1 |
@@ -2396,13 +2624,15 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2396 | |.ffunc_bit_op bit_bor, or | 2624 | |.ffunc_bit_op bit_bor, or |
2397 | |.ffunc_bit_op bit_bxor, xor | 2625 | |.ffunc_bit_op bit_bxor, xor |
2398 | | | 2626 | | |
2399 | |.ffunc_bit bit_bswap | 2627 | |.ffunc_bit bit_bswap, 1 |
2400 | | bswap RB | 2628 | | bswap RB |
2401 | | jmp ->fff_resbit | 2629 | | jmp ->fff_resbit |
2402 | | | 2630 | | |
2403 | |.ffunc_bit bit_bnot | 2631 | |.ffunc_bit bit_bnot, 1 |
2404 | | not RB | 2632 | | not RB |
2405 | if (sse) { | 2633 | if (LJ_DUALNUM) { |
2634 | | jmp ->fff_resbit | ||
2635 | } else if (sse) { | ||
2406 | |->fff_resbit: | 2636 | |->fff_resbit: |
2407 | | cvtsi2sd xmm0, RB | 2637 | | cvtsi2sd xmm0, RB |
2408 | | jmp ->fff_resxmm0 | 2638 | | jmp ->fff_resxmm0 |
@@ -2420,12 +2650,16 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2420 | | jmp ->fff_fallback | 2650 | | jmp ->fff_fallback |
2421 | | | 2651 | | |
2422 | |.macro .ffunc_bit_sh, name, ins | 2652 | |.macro .ffunc_bit_sh, name, ins |
2423 | ||if (sse) { | 2653 | ||if (LJ_DUALNUM) { |
2654 | | .ffunc_bit name, 1 | ||
2655 | | // Note: no inline conversion from number for 2nd argument! | ||
2656 | | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback | ||
2657 | | mov RA, dword [BASE+8] | ||
2658 | ||} else if (sse) { | ||
2424 | | .ffunc_nnsse name | 2659 | | .ffunc_nnsse name |
2425 | | sseconst_tobit xmm2, RBa | 2660 | | sseconst_tobit xmm2, RBa |
2426 | | addsd xmm0, xmm2 | 2661 | | addsd xmm0, xmm2 |
2427 | | addsd xmm1, xmm2 | 2662 | | addsd xmm1, xmm2 |
2428 | | mov RC, RA // Assumes RA is ecx. | ||
2429 | | movd RB, xmm0 | 2663 | | movd RB, xmm0 |
2430 | | movd RA, xmm1 | 2664 | | movd RA, xmm1 |
2431 | ||} else { | 2665 | ||} else { |
@@ -2436,13 +2670,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2436 | | fstp FPARG3 | 2670 | | fstp FPARG3 |
2437 | | fadd TMP1 | 2671 | | fadd TMP1 |
2438 | | fstp FPARG1 | 2672 | | fstp FPARG1 |
2439 | | mov RC, RA // Assumes RA is ecx. | ||
2440 | | mov RA, ARG3 | 2673 | | mov RA, ARG3 |
2441 | | mov RB, ARG1 | 2674 | | mov RB, ARG1 |
2442 | |.endif | 2675 | |.endif |
2443 | ||} | 2676 | ||} |
2444 | | ins RB, cl | 2677 | | ins RB, cl // Assumes RA is ecx. |
2445 | | mov RA, RC | ||
2446 | | jmp ->fff_resbit | 2678 | | jmp ->fff_resbit |
2447 | |.endmacro | 2679 | |.endmacro |
2448 | | | 2680 | | |
@@ -3073,7 +3305,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
3073 | | ret | 3305 | | ret |
3074 | |6: | 3306 | |6: |
3075 | | je <5 // x^1 ==> x | 3307 | | je <5 // x^1 ==> x |
3076 | | jb >7 | 3308 | | jb >7 // x^0 ==> 1 |
3077 | | neg eax | 3309 | | neg eax |
3078 | | call <1 | 3310 | | call <1 |
3079 | | sseconst_1 xmm1, RDa | 3311 | | sseconst_1 xmm1, RDa |
@@ -3536,43 +3768,100 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
3536 | 3768 | ||
3537 | /* Remember: all ops branch for a true comparison, fall through otherwise. */ | 3769 | /* Remember: all ops branch for a true comparison, fall through otherwise. */ |
3538 | 3770 | ||
3771 | |.macro jmp_comp, lt, ge, le, gt, target | ||
3772 | ||switch (op) { | ||
3773 | ||case BC_ISLT: | ||
3774 | | lt target | ||
3775 | ||break; | ||
3776 | ||case BC_ISGE: | ||
3777 | | ge target | ||
3778 | ||break; | ||
3779 | ||case BC_ISLE: | ||
3780 | | le target | ||
3781 | ||break; | ||
3782 | ||case BC_ISGT: | ||
3783 | | gt target | ||
3784 | ||break; | ||
3785 | ||default: break; /* Shut up GCC. */ | ||
3786 | ||} | ||
3787 | |.endmacro | ||
3788 | |||
3539 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | 3789 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: |
3540 | | // RA = src1, RD = src2, JMP with RD = target | 3790 | | // RA = src1, RD = src2, JMP with RD = target |
3541 | | ins_AD | 3791 | | ins_AD |
3542 | | checknum RA, ->vmeta_comp | 3792 | if (LJ_DUALNUM) { |
3543 | | checknum RD, ->vmeta_comp | 3793 | | checkint RA, >7 |
3794 | | checkint RD, >8 | ||
3795 | | mov RB, dword [BASE+RA*8] | ||
3796 | | add PC, 4 | ||
3797 | | cmp RB, dword [BASE+RD*8] | ||
3798 | | jmp_comp jge, jl, jg, jle, >9 | ||
3799 | |6: | ||
3800 | | movzx RD, PC_RD | ||
3801 | | branchPC RD | ||
3802 | |9: | ||
3803 | | ins_next | ||
3804 | | | ||
3805 | |7: // RA is not an integer. | ||
3806 | | ja ->vmeta_comp | ||
3807 | | // RA is a number. | ||
3808 | | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp | ||
3809 | | // RA is a number, RD is an integer. | ||
3810 | if (sse) { | ||
3811 | | cvtsi2sd xmm0, dword [BASE+RD*8] | ||
3812 | | jmp >2 | ||
3813 | } else { | ||
3814 | | fld qword [BASE+RA*8] | ||
3815 | | fild dword [BASE+RD*8] | ||
3816 | | jmp >3 | ||
3817 | } | ||
3818 | | | ||
3819 | |8: // RA is an integer, RD is not an integer. | ||
3820 | | ja ->vmeta_comp | ||
3821 | | // RA is an integer, RD is a number. | ||
3822 | if (sse) { | ||
3823 | | cvtsi2sd xmm1, dword [BASE+RA*8] | ||
3824 | | movsd xmm0, qword [BASE+RD*8] | ||
3825 | | add PC, 4 | ||
3826 | | ucomisd xmm0, xmm1 | ||
3827 | | jmp_comp jbe, ja, jb, jae, <9 | ||
3828 | | jmp <6 | ||
3829 | } else { | ||
3830 | | fild dword [BASE+RA*8] | ||
3831 | | jmp >2 | ||
3832 | } | ||
3833 | } else { | ||
3834 | | checknum RA, ->vmeta_comp | ||
3835 | | checknum RD, ->vmeta_comp | ||
3836 | } | ||
3544 | if (sse) { | 3837 | if (sse) { |
3838 | |1: | ||
3545 | | movsd xmm0, qword [BASE+RD*8] | 3839 | | movsd xmm0, qword [BASE+RD*8] |
3840 | |2: | ||
3546 | | add PC, 4 | 3841 | | add PC, 4 |
3547 | | ucomisd xmm0, qword [BASE+RA*8] | 3842 | | ucomisd xmm0, qword [BASE+RA*8] |
3843 | |3: | ||
3548 | } else { | 3844 | } else { |
3845 | |1: | ||
3549 | | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A. | 3846 | | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A. |
3847 | |2: | ||
3550 | | fld qword [BASE+RD*8] | 3848 | | fld qword [BASE+RD*8] |
3849 | |3: | ||
3551 | | add PC, 4 | 3850 | | add PC, 4 |
3552 | | fcomparepp // eax (RD) modified! | 3851 | | fcomparepp // eax (RD) modified! |
3553 | } | 3852 | } |
3554 | | // Unordered: all of ZF CF PF set, ordered: PF clear. | 3853 | | // Unordered: all of ZF CF PF set, ordered: PF clear. |
3555 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. | 3854 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. |
3556 | switch (op) { | 3855 | if (LJ_DUALNUM) { |
3557 | case BC_ISLT: | 3856 | | jmp_comp jbe, ja, jb, jae, <9 |
3558 | | jbe >2 | 3857 | | jmp <6 |
3559 | break; | 3858 | } else { |
3560 | case BC_ISGE: | 3859 | | jmp_comp jbe, ja, jb, jae, >1 |
3561 | | ja >2 | 3860 | | movzx RD, PC_RD |
3562 | break; | 3861 | | branchPC RD |
3563 | case BC_ISLE: | 3862 | |1: |
3564 | | jb >2 | 3863 | | ins_next |
3565 | break; | ||
3566 | case BC_ISGT: | ||
3567 | | jae >2 | ||
3568 | break; | ||
3569 | default: break; /* Shut up GCC. */ | ||
3570 | } | 3864 | } |
3571 | |1: | ||
3572 | | movzx RD, PC_RD | ||
3573 | | branchPC RD | ||
3574 | |2: | ||
3575 | | ins_next | ||
3576 | break; | 3865 | break; |
3577 | 3866 | ||
3578 | case BC_ISEQV: case BC_ISNEV: | 3867 | case BC_ISEQV: case BC_ISNEV: |
@@ -3580,14 +3869,61 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
3580 | | ins_AD // RA = src1, RD = src2, JMP with RD = target | 3869 | | ins_AD // RA = src1, RD = src2, JMP with RD = target |
3581 | | mov RB, [BASE+RD*8+4] | 3870 | | mov RB, [BASE+RD*8+4] |
3582 | | add PC, 4 | 3871 | | add PC, 4 |
3583 | | cmp RB, LJ_TISNUM; ja >5 | 3872 | if (LJ_DUALNUM) { |
3584 | | checknum RA, >5 | 3873 | | cmp RB, LJ_TISNUM; jne >7 |
3874 | | checkint RA, >8 | ||
3875 | | mov RB, dword [BASE+RD*8] | ||
3876 | | cmp RB, dword [BASE+RA*8] | ||
3877 | if (vk) { | ||
3878 | | jne >9 | ||
3879 | } else { | ||
3880 | | je >9 | ||
3881 | } | ||
3882 | | movzx RD, PC_RD | ||
3883 | | branchPC RD | ||
3884 | |9: | ||
3885 | | ins_next | ||
3886 | | | ||
3887 | |7: // RD is not an integer. | ||
3888 | | ja >5 | ||
3889 | | // RD is a number. | ||
3890 | | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 | ||
3891 | | // RD is a number, RA is an integer. | ||
3892 | if (sse) { | ||
3893 | | cvtsi2sd xmm0, dword [BASE+RA*8] | ||
3894 | } else { | ||
3895 | | fild dword [BASE+RA*8] | ||
3896 | } | ||
3897 | | jmp >2 | ||
3898 | | | ||
3899 | |8: // RD is an integer, RA is not an integer. | ||
3900 | | ja >5 | ||
3901 | | // RD is an integer, RA is a number. | ||
3902 | if (sse) { | ||
3903 | | cvtsi2sd xmm0, dword [BASE+RD*8] | ||
3904 | | ucomisd xmm0, qword [BASE+RA*8] | ||
3905 | } else { | ||
3906 | | fild dword [BASE+RD*8] | ||
3907 | | fld qword [BASE+RA*8] | ||
3908 | } | ||
3909 | | jmp >4 | ||
3910 | | | ||
3911 | } else { | ||
3912 | | cmp RB, LJ_TISNUM; jae >5 | ||
3913 | | checknum RA, >5 | ||
3914 | } | ||
3585 | if (sse) { | 3915 | if (sse) { |
3586 | | movsd xmm0, qword [BASE+RD*8] | 3916 | |1: |
3587 | | ucomisd xmm0, qword [BASE+RA*8] | 3917 | | movsd xmm0, qword [BASE+RA*8] |
3918 | |2: | ||
3919 | | ucomisd xmm0, qword [BASE+RD*8] | ||
3920 | |4: | ||
3588 | } else { | 3921 | } else { |
3922 | |1: | ||
3589 | | fld qword [BASE+RA*8] | 3923 | | fld qword [BASE+RA*8] |
3924 | |2: | ||
3590 | | fld qword [BASE+RD*8] | 3925 | | fld qword [BASE+RD*8] |
3926 | |4: | ||
3591 | | fcomparepp // eax (RD) modified! | 3927 | | fcomparepp // eax (RD) modified! |
3592 | } | 3928 | } |
3593 | iseqne_fp: | 3929 | iseqne_fp: |
@@ -3616,7 +3952,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
3616 | | branchPC RD | 3952 | | branchPC RD |
3617 | |1: // EQ: Fallthrough to next instruction. | 3953 | |1: // EQ: Fallthrough to next instruction. |
3618 | } | 3954 | } |
3619 | | ins_next | 3955 | if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV || |
3956 | op == BC_ISEQN || op == BC_ISNEN)) { | ||
3957 | | jmp <9 | ||
3958 | } else { | ||
3959 | | ins_next | ||
3960 | } | ||
3620 | | | 3961 | | |
3621 | if (op == BC_ISEQV || op == BC_ISNEV) { | 3962 | if (op == BC_ISEQV || op == BC_ISNEV) { |
3622 | |5: // Either or both types are not numbers. | 3963 | |5: // Either or both types are not numbers. |
@@ -3652,7 +3993,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
3652 | | jmp ->vmeta_equal // Handle __eq metamethod. | 3993 | | jmp ->vmeta_equal // Handle __eq metamethod. |
3653 | } else if (LJ_HASFFI) { | 3994 | } else if (LJ_HASFFI) { |
3654 | |3: | 3995 | |3: |
3655 | | cmp RB, LJ_TCDATA; jne <2 | 3996 | | cmp RB, LJ_TCDATA |
3997 | if (LJ_DUALNUM && vk) { | ||
3998 | | jne <9 | ||
3999 | } else { | ||
4000 | | jne <2 | ||
4001 | } | ||
3656 | | jmp ->vmeta_equal_cd | 4002 | | jmp ->vmeta_equal_cd |
3657 | } | 4003 | } |
3658 | break; | 4004 | break; |
@@ -3676,14 +4022,59 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
3676 | | ins_AD // RA = src, RD = num const, JMP with RD = target | 4022 | | ins_AD // RA = src, RD = num const, JMP with RD = target |
3677 | | mov RB, [BASE+RA*8+4] | 4023 | | mov RB, [BASE+RA*8+4] |
3678 | | add PC, 4 | 4024 | | add PC, 4 |
3679 | | cmp RB, LJ_TISNUM; ja >3 | 4025 | if (LJ_DUALNUM) { |
4026 | | cmp RB, LJ_TISNUM; jne >7 | ||
4027 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8 | ||
4028 | | mov RB, dword [KBASE+RD*8] | ||
4029 | | cmp RB, dword [BASE+RA*8] | ||
4030 | if (vk) { | ||
4031 | | jne >9 | ||
4032 | } else { | ||
4033 | | je >9 | ||
4034 | } | ||
4035 | | movzx RD, PC_RD | ||
4036 | | branchPC RD | ||
4037 | |9: | ||
4038 | | ins_next | ||
4039 | | | ||
4040 | |7: // RA is not an integer. | ||
4041 | | ja >3 | ||
4042 | | // RA is a number. | ||
4043 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 | ||
4044 | | // RA is a number, RD is an integer. | ||
4045 | if (sse) { | ||
4046 | | cvtsi2sd xmm0, dword [KBASE+RD*8] | ||
4047 | } else { | ||
4048 | | fild dword [KBASE+RD*8] | ||
4049 | } | ||
4050 | | jmp >2 | ||
4051 | | | ||
4052 | |8: // RA is an integer, RD is a number. | ||
4053 | if (sse) { | ||
4054 | | cvtsi2sd xmm0, dword [BASE+RA*8] | ||
4055 | | ucomisd xmm0, qword [KBASE+RD*8] | ||
4056 | } else { | ||
4057 | | fild dword [BASE+RA*8] | ||
4058 | | fld qword [BASE+RD*8] | ||
4059 | } | ||
4060 | | jmp >4 | ||
4061 | | | ||
4062 | } else { | ||
4063 | | cmp RB, LJ_TISNUM; jae >3 | ||
4064 | } | ||
3680 | if (sse) { | 4065 | if (sse) { |
4066 | |1: | ||
3681 | | movsd xmm0, qword [KBASE+RD*8] | 4067 | | movsd xmm0, qword [KBASE+RD*8] |
4068 | |2: | ||
3682 | | ucomisd xmm0, qword [BASE+RA*8] | 4069 | | ucomisd xmm0, qword [BASE+RA*8] |
4070 | |4: | ||
3683 | } else { | 4071 | } else { |
3684 | | fld qword [BASE+RA*8] | 4072 | |1: |
3685 | | fld qword [KBASE+RD*8] | 4073 | | fld qword [KBASE+RD*8] |
4074 | |2: | ||
4075 | | fld qword [BASE+RA*8] | ||
3686 | | fcomparepp // eax (RD) modified! | 4076 | | fcomparepp // eax (RD) modified! |
4077 | |4: | ||
3687 | } | 4078 | } |
3688 | goto iseqne_fp; | 4079 | goto iseqne_fp; |
3689 | case BC_ISEQP: case BC_ISNEP: | 4080 | case BC_ISEQP: case BC_ISNEP: |
@@ -3760,7 +4151,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
3760 | break; | 4151 | break; |
3761 | case BC_UNM: | 4152 | case BC_UNM: |
3762 | | ins_AD // RA = dst, RD = src | 4153 | | ins_AD // RA = dst, RD = src |
3763 | | checknum RD, ->vmeta_unm | 4154 | if (LJ_DUALNUM) { |
4155 | | checkint RD, >5 | ||
4156 | | mov RB, [BASE+RD*8] | ||
4157 | | neg RB | ||
4158 | | jo >4 | ||
4159 | | mov dword [BASE+RA*8+4], LJ_TISNUM | ||
4160 | | mov dword [BASE+RA*8], RB | ||
4161 | |9: | ||
4162 | | ins_next | ||
4163 | |4: | ||
4164 | | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31. | ||
4165 | | mov dword [BASE+RA*8], 0 | ||
4166 | | jmp <9 | ||
4167 | |5: | ||
4168 | | ja ->vmeta_unm | ||
4169 | } else { | ||
4170 | | checknum RD, ->vmeta_unm | ||
4171 | } | ||
3764 | if (sse) { | 4172 | if (sse) { |
3765 | | movsd xmm0, qword [BASE+RD*8] | 4173 | | movsd xmm0, qword [BASE+RD*8] |
3766 | | sseconst_sign xmm1, RDa | 4174 | | sseconst_sign xmm1, RDa |
@@ -3771,13 +4179,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
3771 | | fchs | 4179 | | fchs |
3772 | | fstp qword [BASE+RA*8] | 4180 | | fstp qword [BASE+RA*8] |
3773 | } | 4181 | } |
3774 | | ins_next | 4182 | if (LJ_DUALNUM) { |
4183 | | jmp <9 | ||
4184 | } else { | ||
4185 | | ins_next | ||
4186 | } | ||
3775 | break; | 4187 | break; |
3776 | case BC_LEN: | 4188 | case BC_LEN: |
3777 | | ins_AD // RA = dst, RD = src | 4189 | | ins_AD // RA = dst, RD = src |
3778 | | checkstr RD, >2 | 4190 | | checkstr RD, >2 |
3779 | | mov STR:RD, [BASE+RD*8] | 4191 | | mov STR:RD, [BASE+RD*8] |
3780 | if (sse) { | 4192 | if (LJ_DUALNUM) { |
4193 | | mov RD, dword STR:RD->len | ||
4194 | |1: | ||
4195 | | mov dword [BASE+RA*8+4], LJ_TISNUM | ||
4196 | | mov dword [BASE+RA*8], RD | ||
4197 | } else if (sse) { | ||
3781 | | xorps xmm0, xmm0 | 4198 | | xorps xmm0, xmm0 |
3782 | | cvtsi2sd xmm0, dword STR:RD->len | 4199 | | cvtsi2sd xmm0, dword STR:RD->len |
3783 | |1: | 4200 | |1: |
@@ -3793,45 +4210,52 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
3793 | | mov TAB:FCARG1, [BASE+RD*8] | 4210 | | mov TAB:FCARG1, [BASE+RD*8] |
3794 | | mov RB, BASE // Save BASE. | 4211 | | mov RB, BASE // Save BASE. |
3795 | | call extern lj_tab_len@4 // (GCtab *t) | 4212 | | call extern lj_tab_len@4 // (GCtab *t) |
3796 | | // Length of table returned in eax (RC). | 4213 | | // Length of table returned in eax (RD). |
3797 | if (sse) { | 4214 | if (LJ_DUALNUM) { |
3798 | | cvtsi2sd xmm0, RC | 4215 | | // Nothing to do. |
3799 | | mov BASE, RB // Restore BASE. | 4216 | } else if (sse) { |
4217 | | cvtsi2sd xmm0, RD | ||
3800 | } else { | 4218 | } else { |
3801 | |.if not X64 | 4219 | |.if not X64 |
3802 | | mov ARG1, RC | 4220 | | mov ARG1, RD |
3803 | | mov BASE, RB // Restore BASE. | ||
3804 | | fild ARG1 | 4221 | | fild ARG1 |
3805 | |.endif | 4222 | |.endif |
3806 | } | 4223 | } |
4224 | | mov BASE, RB // Restore BASE. | ||
3807 | | movzx RA, PC_RA | 4225 | | movzx RA, PC_RA |
3808 | | jmp <1 | 4226 | | jmp <1 |
3809 | break; | 4227 | break; |
3810 | 4228 | ||
3811 | /* -- Binary ops -------------------------------------------------------- */ | 4229 | /* -- Binary ops -------------------------------------------------------- */ |
3812 | 4230 | ||
3813 | |.macro ins_arithpre, ins, sseins, ssereg | 4231 | |.macro ins_arithpre, x87ins, sseins, ssereg |
3814 | | ins_ABC | 4232 | | ins_ABC |
3815 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | 4233 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); |
3816 | ||switch (vk) { | 4234 | ||switch (vk) { |
3817 | ||case 0: | 4235 | ||case 0: |
3818 | | checknum RB, ->vmeta_arith_vn | 4236 | | checknum RB, ->vmeta_arith_vn |
4237 | ||if (LJ_DUALNUM) { | ||
4238 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn | ||
4239 | ||} | ||
3819 | ||if (sse) { | 4240 | ||if (sse) { |
3820 | | movsd xmm0, qword [BASE+RB*8] | 4241 | | movsd xmm0, qword [BASE+RB*8] |
3821 | | sseins ssereg, qword [KBASE+RC*8] | 4242 | | sseins ssereg, qword [KBASE+RC*8] |
3822 | ||} else { | 4243 | ||} else { |
3823 | | fld qword [BASE+RB*8] | 4244 | | fld qword [BASE+RB*8] |
3824 | | ins qword [KBASE+RC*8] | 4245 | | x87ins qword [KBASE+RC*8] |
3825 | ||} | 4246 | ||} |
3826 | || break; | 4247 | || break; |
3827 | ||case 1: | 4248 | ||case 1: |
3828 | | checknum RB, ->vmeta_arith_nv | 4249 | | checknum RB, ->vmeta_arith_nv |
4250 | ||if (LJ_DUALNUM) { | ||
4251 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv | ||
4252 | ||} | ||
3829 | ||if (sse) { | 4253 | ||if (sse) { |
3830 | | movsd xmm0, qword [KBASE+RC*8] | 4254 | | movsd xmm0, qword [KBASE+RC*8] |
3831 | | sseins ssereg, qword [BASE+RB*8] | 4255 | | sseins ssereg, qword [BASE+RB*8] |
3832 | ||} else { | 4256 | ||} else { |
3833 | | fld qword [KBASE+RC*8] | 4257 | | fld qword [KBASE+RC*8] |
3834 | | ins qword [BASE+RB*8] | 4258 | | x87ins qword [BASE+RB*8] |
3835 | ||} | 4259 | ||} |
3836 | || break; | 4260 | || break; |
3837 | ||default: | 4261 | ||default: |
@@ -3842,12 +4266,44 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
3842 | | sseins ssereg, qword [BASE+RC*8] | 4266 | | sseins ssereg, qword [BASE+RC*8] |
3843 | ||} else { | 4267 | ||} else { |
3844 | | fld qword [BASE+RB*8] | 4268 | | fld qword [BASE+RB*8] |
3845 | | ins qword [BASE+RC*8] | 4269 | | x87ins qword [BASE+RC*8] |
3846 | ||} | 4270 | ||} |
3847 | || break; | 4271 | || break; |
3848 | ||} | 4272 | ||} |
3849 | |.endmacro | 4273 | |.endmacro |
3850 | | | 4274 | | |
4275 | |.macro ins_arithdn, intins | ||
4276 | | ins_ABC | ||
4277 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | ||
4278 | ||switch (vk) { | ||
4279 | ||case 0: | ||
4280 | | checkint RB, ->vmeta_arith_vn | ||
4281 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_vn | ||
4282 | | mov RB, [BASE+RB*8] | ||
4283 | | intins RB, [KBASE+RC*8]; jo ->vmeta_arith_vno | ||
4284 | || break; | ||
4285 | ||case 1: | ||
4286 | | checkint RB, ->vmeta_arith_nv | ||
4287 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_nv | ||
4288 | | mov RC, [KBASE+RC*8] | ||
4289 | | intins RC, [BASE+RB*8]; jo ->vmeta_arith_nvo | ||
4290 | || break; | ||
4291 | ||default: | ||
4292 | | checkint RB, ->vmeta_arith_vv | ||
4293 | | checkint RC, ->vmeta_arith_vv | ||
4294 | | mov RB, [BASE+RB*8] | ||
4295 | | intins RB, [BASE+RC*8]; jo ->vmeta_arith_vvo | ||
4296 | || break; | ||
4297 | ||} | ||
4298 | | mov dword [BASE+RA*8+4], LJ_TISNUM | ||
4299 | ||if (vk == 1) { | ||
4300 | | mov dword [BASE+RA*8], RC | ||
4301 | ||} else { | ||
4302 | | mov dword [BASE+RA*8], RB | ||
4303 | ||} | ||
4304 | | ins_next | ||
4305 | |.endmacro | ||
4306 | | | ||
3851 | |.macro ins_arithpost | 4307 | |.macro ins_arithpost |
3852 | ||if (sse) { | 4308 | ||if (sse) { |
3853 | | movsd qword [BASE+RA*8], xmm0 | 4309 | | movsd qword [BASE+RA*8], xmm0 |
@@ -3856,21 +4312,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
3856 | ||} | 4312 | ||} |
3857 | |.endmacro | 4313 | |.endmacro |
3858 | | | 4314 | | |
3859 | |.macro ins_arith, ins, sseins | 4315 | |.macro ins_arith, x87ins, sseins |
3860 | | ins_arithpre ins, sseins, xmm0 | 4316 | | ins_arithpre x87ins, sseins, xmm0 |
3861 | | ins_arithpost | 4317 | | ins_arithpost |
3862 | | ins_next | 4318 | | ins_next |
3863 | |.endmacro | 4319 | |.endmacro |
4320 | | | ||
4321 | |.macro ins_arith, intins, x87ins, sseins | ||
4322 | ||if (LJ_DUALNUM) { | ||
4323 | | ins_arithdn intins | ||
4324 | ||} else { | ||
4325 | | ins_arith, x87ins, sseins | ||
4326 | ||} | ||
4327 | |.endmacro | ||
3864 | 4328 | ||
3865 | | // RA = dst, RB = src1 or num const, RC = src2 or num const | 4329 | | // RA = dst, RB = src1 or num const, RC = src2 or num const |
3866 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | 4330 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: |
3867 | | ins_arith fadd, addsd | 4331 | | ins_arith add, fadd, addsd |
3868 | break; | 4332 | break; |
3869 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | 4333 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: |
3870 | | ins_arith fsub, subsd | 4334 | | ins_arith sub, fsub, subsd |
3871 | break; | 4335 | break; |
3872 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | 4336 | case BC_MULVN: case BC_MULNV: case BC_MULVV: |
3873 | | ins_arith fmul, mulsd | 4337 | | ins_arith imul, fmul, mulsd |
3874 | break; | 4338 | break; |
3875 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | 4339 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: |
3876 | | ins_arith fdiv, divsd | 4340 | | ins_arith fdiv, divsd |
@@ -3953,7 +4417,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
3953 | break; | 4417 | break; |
3954 | case BC_KSHORT: | 4418 | case BC_KSHORT: |
3955 | | ins_AD // RA = dst, RD = signed int16 literal | 4419 | | ins_AD // RA = dst, RD = signed int16 literal |
3956 | if (sse) { | 4420 | if (LJ_DUALNUM) { |
4421 | | movsx RD, RDW | ||
4422 | | mov dword [BASE+RA*8+4], LJ_TISNUM | ||
4423 | | mov dword [BASE+RA*8], RD | ||
4424 | } else if (sse) { | ||
3957 | | movsx RD, RDW // Sign-extend literal. | 4425 | | movsx RD, RDW // Sign-extend literal. |
3958 | | cvtsi2sd xmm0, RD | 4426 | | cvtsi2sd xmm0, RD |
3959 | | movsd qword [BASE+RA*8], xmm0 | 4427 | | movsd qword [BASE+RA*8], xmm0 |
@@ -4236,23 +4704,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4236 | | checktab RB, ->vmeta_tgetv | 4704 | | checktab RB, ->vmeta_tgetv |
4237 | | mov TAB:RB, [BASE+RB*8] | 4705 | | mov TAB:RB, [BASE+RB*8] |
4238 | | | 4706 | | |
4239 | | // Integer key? Convert number to int and back and compare. | 4707 | | // Integer key? |
4240 | | checknum RC, >5 | 4708 | if (LJ_DUALNUM) { |
4241 | if (sse) { | 4709 | | checkint RC, >5 |
4242 | | movsd xmm0, qword [BASE+RC*8] | 4710 | | mov RC, dword [BASE+RC*8] |
4243 | | cvtsd2si RC, xmm0 | ||
4244 | | cvtsi2sd xmm1, RC | ||
4245 | | ucomisd xmm0, xmm1 | ||
4246 | } else { | 4711 | } else { |
4247 | |.if not X64 | 4712 | | // Convert number to int and back and compare. |
4248 | | fld qword [BASE+RC*8] | 4713 | | checknum RC, >5 |
4249 | | fist ARG1 | 4714 | if (sse) { |
4250 | | fild ARG1 | 4715 | | movsd xmm0, qword [BASE+RC*8] |
4251 | | fcomparepp // eax (RC) modified! | 4716 | | cvtsd2si RC, xmm0 |
4252 | | mov RC, ARG1 | 4717 | | cvtsi2sd xmm1, RC |
4253 | |.endif | 4718 | | ucomisd xmm0, xmm1 |
4719 | } else { | ||
4720 | |.if not X64 | ||
4721 | | fld qword [BASE+RC*8] | ||
4722 | | fist ARG1 | ||
4723 | | fild ARG1 | ||
4724 | | fcomparepp // eax (RC) modified! | ||
4725 | | mov RC, ARG1 | ||
4726 | |.endif | ||
4727 | } | ||
4728 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. | ||
4254 | } | 4729 | } |
4255 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. | ||
4256 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. | 4730 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. |
4257 | | jae ->vmeta_tgetv // Not in array part? Use fallback. | 4731 | | jae ->vmeta_tgetv // Not in array part? Use fallback. |
4258 | | shl RC, 3 | 4732 | | shl RC, 3 |
@@ -4380,23 +4854,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4380 | | checktab RB, ->vmeta_tsetv | 4854 | | checktab RB, ->vmeta_tsetv |
4381 | | mov TAB:RB, [BASE+RB*8] | 4855 | | mov TAB:RB, [BASE+RB*8] |
4382 | | | 4856 | | |
4383 | | // Integer key? Convert number to int and back and compare. | 4857 | | // Integer key? |
4384 | | checknum RC, >5 | 4858 | if (LJ_DUALNUM) { |
4385 | if (sse) { | 4859 | | checkint RC, >5 |
4386 | | movsd xmm0, qword [BASE+RC*8] | 4860 | | mov RC, dword [BASE+RC*8] |
4387 | | cvtsd2si RC, xmm0 | ||
4388 | | cvtsi2sd xmm1, RC | ||
4389 | | ucomisd xmm0, xmm1 | ||
4390 | } else { | 4861 | } else { |
4391 | |.if not X64 | 4862 | | // Convert number to int and back and compare. |
4392 | | fld qword [BASE+RC*8] | 4863 | | checknum RC, >5 |
4393 | | fist ARG1 | 4864 | if (sse) { |
4394 | | fild ARG1 | 4865 | | movsd xmm0, qword [BASE+RC*8] |
4395 | | fcomparepp // eax (RC) modified! | 4866 | | cvtsd2si RC, xmm0 |
4396 | | mov RC, ARG1 | 4867 | | cvtsi2sd xmm1, RC |
4397 | |.endif | 4868 | | ucomisd xmm0, xmm1 |
4869 | } else { | ||
4870 | |.if not X64 | ||
4871 | | fld qword [BASE+RC*8] | ||
4872 | | fist ARG1 | ||
4873 | | fild ARG1 | ||
4874 | | fcomparepp // eax (RC) modified! | ||
4875 | | mov RC, ARG1 | ||
4876 | |.endif | ||
4877 | } | ||
4878 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. | ||
4398 | } | 4879 | } |
4399 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. | ||
4400 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. | 4880 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. |
4401 | | jae ->vmeta_tsetv | 4881 | | jae ->vmeta_tsetv |
4402 | | shl RC, 3 | 4882 | | shl RC, 3 |
@@ -4755,7 +5235,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4755 | |1: // Traverse array part. | 5235 | |1: // Traverse array part. |
4756 | | cmp RC, DISPATCH; jae >5 // Index points after array part? | 5236 | | cmp RC, DISPATCH; jae >5 // Index points after array part? |
4757 | | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4 | 5237 | | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4 |
4758 | if (sse) { | 5238 | if (LJ_DUALNUM) { |
5239 | | mov dword [BASE+RA*8+4], LJ_TISNUM | ||
5240 | | mov dword [BASE+RA*8], RC | ||
5241 | } else if (sse) { | ||
4759 | | cvtsi2sd xmm0, RC | 5242 | | cvtsi2sd xmm0, RC |
4760 | } else { | 5243 | } else { |
4761 | | fild dword [BASE+RA*8-8] | 5244 | | fild dword [BASE+RA*8-8] |
@@ -4772,7 +5255,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4772 | |.endif | 5255 | |.endif |
4773 | | add RC, 1 | 5256 | | add RC, 1 |
4774 | | // Return array index as a numeric key. | 5257 | | // Return array index as a numeric key. |
4775 | if (sse) { | 5258 | if (LJ_DUALNUM) { |
5259 | | // See above. | ||
5260 | } else if (sse) { | ||
4776 | | movsd qword [BASE+RA*8], xmm0 | 5261 | | movsd qword [BASE+RA*8], xmm0 |
4777 | } else { | 5262 | } else { |
4778 | | fstp qword [BASE+RA*8] | 5263 | | fstp qword [BASE+RA*8] |
@@ -4788,7 +5273,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
4788 | | | 5273 | | |
4789 | |4: // Skip holes in array part. | 5274 | |4: // Skip holes in array part. |
4790 | | add RC, 1 | 5275 | | add RC, 1 |
4791 | if (!sse) { | 5276 | if (!LJ_DUALNUM && !sse) { |
4792 | | mov [BASE+RA*8-8], RC | 5277 | | mov [BASE+RA*8-8], RC |
4793 | } | 5278 | } |
4794 | | jmp <1 | 5279 | | jmp <1 |
@@ -5020,10 +5505,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
5020 | 5505 | ||
5021 | /* -- Loops and branches ------------------------------------------------ */ | 5506 | /* -- Loops and branches ------------------------------------------------ */ |
5022 | 5507 | ||
5023 | |.define FOR_IDX, qword [RA]; .define FOR_TIDX, dword [RA+4] | 5508 | |.define FOR_IDX, [RA]; .define FOR_TIDX, dword [RA+4] |
5024 | |.define FOR_STOP, qword [RA+8]; .define FOR_TSTOP, dword [RA+12] | 5509 | |.define FOR_STOP, [RA+8]; .define FOR_TSTOP, dword [RA+12] |
5025 | |.define FOR_STEP, qword [RA+16]; .define FOR_TSTEP, dword [RA+20] | 5510 | |.define FOR_STEP, [RA+16]; .define FOR_TSTEP, dword [RA+20] |
5026 | |.define FOR_EXT, qword [RA+24]; .define FOR_TEXT, dword [RA+28] | 5511 | |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28] |
5027 | 5512 | ||
5028 | case BC_FORL: | 5513 | case BC_FORL: |
5029 | #if LJ_HASJIT | 5514 | #if LJ_HASJIT |
@@ -5042,37 +5527,101 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
5042 | vk = (op == BC_IFORL || op == BC_JFORL); | 5527 | vk = (op == BC_IFORL || op == BC_JFORL); |
5043 | | ins_AJ // RA = base, RD = target (after end of loop or start of loop) | 5528 | | ins_AJ // RA = base, RD = target (after end of loop or start of loop) |
5044 | | lea RA, [BASE+RA*8] | 5529 | | lea RA, [BASE+RA*8] |
5530 | if (LJ_DUALNUM) { | ||
5531 | | cmp FOR_TIDX, LJ_TISNUM; jne >9 | ||
5532 | if (!vk) { | ||
5533 | | cmp FOR_TSTOP, LJ_TISNUM; jne ->vmeta_for | ||
5534 | | cmp FOR_TSTEP, LJ_TISNUM; jne ->vmeta_for | ||
5535 | | mov RB, dword FOR_IDX | ||
5536 | | cmp dword FOR_STEP, 0; jl >5 | ||
5537 | } else { | ||
5538 | | mov RB, dword FOR_STEP | ||
5539 | | test RB, RB; js >5 | ||
5540 | | add RB, dword FOR_IDX; jo >1 | ||
5541 | | mov dword FOR_IDX, RB | ||
5542 | } | ||
5543 | | cmp RB, dword FOR_STOP | ||
5544 | | mov FOR_TEXT, LJ_TISNUM | ||
5545 | | mov dword FOR_EXT, RB | ||
5546 | if (op == BC_FORI) { | ||
5547 | | jle >7 | ||
5548 | |1: | ||
5549 | |6: | ||
5550 | | branchPC RD | ||
5551 | } else if (op == BC_JFORI) { | ||
5552 | | branchPC RD | ||
5553 | | movzx RD, PC_RD | ||
5554 | | jle =>BC_JLOOP | ||
5555 | |1: | ||
5556 | |6: | ||
5557 | } else if (op == BC_IFORL) { | ||
5558 | | jg >7 | ||
5559 | |6: | ||
5560 | | branchPC RD | ||
5561 | |1: | ||
5562 | } else { | ||
5563 | | jle =>BC_JLOOP | ||
5564 | |1: | ||
5565 | |6: | ||
5566 | } | ||
5567 | |7: | ||
5568 | | ins_next | ||
5569 | | | ||
5570 | |5: // Invert check for negative step. | ||
5571 | if (vk) { | ||
5572 | | add RB, dword FOR_IDX; jo <1 | ||
5573 | | mov dword FOR_IDX, RB | ||
5574 | } | ||
5575 | | cmp RB, dword FOR_STOP | ||
5576 | | mov FOR_TEXT, LJ_TISNUM | ||
5577 | | mov dword FOR_EXT, RB | ||
5578 | if (op == BC_FORI) { | ||
5579 | | jge <7 | ||
5580 | } else if (op == BC_JFORI) { | ||
5581 | | branchPC RD | ||
5582 | | movzx RD, PC_RD | ||
5583 | | jge =>BC_JLOOP | ||
5584 | } else if (op == BC_IFORL) { | ||
5585 | | jl <7 | ||
5586 | } else { | ||
5587 | | jge =>BC_JLOOP | ||
5588 | } | ||
5589 | | jmp <6 | ||
5590 | |9: // Fallback to FP variant. | ||
5591 | } else if (!vk) { | ||
5592 | | cmp FOR_TIDX, LJ_TISNUM | ||
5593 | } | ||
5045 | if (!vk) { | 5594 | if (!vk) { |
5046 | | cmp FOR_TIDX, LJ_TISNUM; ja ->vmeta_for // Type checks | 5595 | | jae ->vmeta_for |
5047 | | cmp FOR_TSTOP, LJ_TISNUM; ja ->vmeta_for | 5596 | | cmp FOR_TSTOP, LJ_TISNUM; jae ->vmeta_for |
5048 | } | 5597 | } |
5049 | | mov RB, FOR_TSTEP // Load type/hiword of for step. | 5598 | | mov RB, FOR_TSTEP // Load type/hiword of for step. |
5050 | if (!vk) { | 5599 | if (!vk) { |
5051 | | cmp RB, LJ_TISNUM; ja ->vmeta_for | 5600 | | cmp RB, LJ_TISNUM; jae ->vmeta_for |
5052 | } | 5601 | } |
5053 | if (sse) { | 5602 | if (sse) { |
5054 | | movsd xmm0, FOR_IDX | 5603 | | movsd xmm0, qword FOR_IDX |
5055 | | movsd xmm1, FOR_STOP | 5604 | | movsd xmm1, qword FOR_STOP |
5056 | if (vk) { | 5605 | if (vk) { |
5057 | | addsd xmm0, FOR_STEP | 5606 | | addsd xmm0, qword FOR_STEP |
5058 | | movsd FOR_IDX, xmm0 | 5607 | | movsd qword FOR_IDX, xmm0 |
5059 | | test RB, RB; js >3 | 5608 | | test RB, RB; js >3 |
5060 | } else { | 5609 | } else { |
5061 | | jl >3 | 5610 | | jl >3 |
5062 | } | 5611 | } |
5063 | | ucomisd xmm1, xmm0 | 5612 | | ucomisd xmm1, xmm0 |
5064 | |1: | 5613 | |1: |
5065 | | movsd FOR_EXT, xmm0 | 5614 | | movsd qword FOR_EXT, xmm0 |
5066 | } else { | 5615 | } else { |
5067 | | fld FOR_STOP | 5616 | | fld qword FOR_STOP |
5068 | | fld FOR_IDX | 5617 | | fld qword FOR_IDX |
5069 | if (vk) { | 5618 | if (vk) { |
5070 | | fadd FOR_STEP // nidx = idx + step | 5619 | | fadd qword FOR_STEP // nidx = idx + step |
5071 | | fst FOR_IDX | 5620 | | fst qword FOR_IDX |
5072 | | fst FOR_EXT | 5621 | | fst qword FOR_EXT |
5073 | | test RB, RB; js >1 | 5622 | | test RB, RB; js >1 |
5074 | } else { | 5623 | } else { |
5075 | | fst FOR_EXT | 5624 | | fst qword FOR_EXT |
5076 | | jl >1 | 5625 | | jl >1 |
5077 | } | 5626 | } |
5078 | | fxch // Swap lim/(n)idx if step non-negative. | 5627 | | fxch // Swap lim/(n)idx if step non-negative. |
@@ -5083,20 +5632,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) | |||
5083 | } | 5632 | } |
5084 | } | 5633 | } |
5085 | if (op == BC_FORI) { | 5634 | if (op == BC_FORI) { |
5086 | | jnb >2 | 5635 | if (LJ_DUALNUM) { |
5087 | | branchPC RD | 5636 | | jnb <7 |
5637 | } else { | ||
5638 | | jnb >2 | ||
5639 | | branchPC RD | ||
5640 | } | ||
5088 | } else if (op == BC_JFORI) { | 5641 | } else if (op == BC_JFORI) { |
5089 | | branchPC RD | 5642 | | branchPC RD |
5090 | | movzx RD, PC_RD | 5643 | | movzx RD, PC_RD |
5091 | | jnb =>BC_JLOOP | 5644 | | jnb =>BC_JLOOP |
5092 | } else if (op == BC_IFORL) { | 5645 | } else if (op == BC_IFORL) { |
5093 | | jb >2 | 5646 | if (LJ_DUALNUM) { |
5094 | | branchPC RD | 5647 | | jb <7 |
5648 | } else { | ||
5649 | | jb >2 | ||
5650 | | branchPC RD | ||
5651 | } | ||
5095 | } else { | 5652 | } else { |
5096 | | jnb =>BC_JLOOP | 5653 | | jnb =>BC_JLOOP |
5097 | } | 5654 | } |
5098 | |2: | 5655 | if (LJ_DUALNUM) { |
5099 | | ins_next | 5656 | | jmp <6 |
5657 | } else { | ||
5658 | |2: | ||
5659 | | ins_next | ||
5660 | } | ||
5100 | if (sse) { | 5661 | if (sse) { |
5101 | |3: // Invert comparison if step is negative. | 5662 | |3: // Invert comparison if step is negative. |
5102 | | ucomisd xmm0, xmm1 | 5663 | | ucomisd xmm0, xmm1 |