aboutsummaryrefslogtreecommitdiff
path: root/src/vm_mips64.dasc
diff options
context:
space:
mode:
authorMike Pall <mike>2020-01-20 22:15:45 +0100
committerMike Pall <mike>2020-01-20 22:15:45 +0100
commit94d0b53004a5fa368defa4307a17edcdb87fe727 (patch)
tree2468fb7d60f39ccadcd696d333c83ef49f3dfc02 /src/vm_mips64.dasc
parentdfa692b746c9de067857d5fc992a41730be3d99a (diff)
downloadluajit-94d0b53004a5fa368defa4307a17edcdb87fe727.tar.gz
luajit-94d0b53004a5fa368defa4307a17edcdb87fe727.tar.bz2
luajit-94d0b53004a5fa368defa4307a17edcdb87fe727.zip
MIPS: Add MIPS64 R6 port.
Contributed by Hua Zhang, YunQiang Su from Wave Computing, and Radovan Birdic from RT-RK. Sponsored by Wave Computing.
Diffstat (limited to '')
-rw-r--r--src/vm_mips64.dasc370
1 files changed, 341 insertions, 29 deletions
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
index 1682c81e..91c12216 100644
--- a/src/vm_mips64.dasc
+++ b/src/vm_mips64.dasc
@@ -83,6 +83,10 @@
83| 83|
84|.define FRET1, f0 84|.define FRET1, f0
85|.define FRET2, f2 85|.define FRET2, f2
86|
87|.define FTMP0, f20
88|.define FTMP1, f21
89|.define FTMP2, f22
86|.endif 90|.endif
87| 91|
88|// Stack layout while in interpreter. Must match with lj_frame.h. 92|// Stack layout while in interpreter. Must match with lj_frame.h.
@@ -310,10 +314,10 @@
310|.endmacro 314|.endmacro
311| 315|
312|// Assumes DISPATCH is relative to GL. 316|// Assumes DISPATCH is relative to GL.
313#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) 317#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
314#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) 318#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
315#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) 319#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch))
316#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name) 320#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name)
317| 321|
318#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) 322#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
319| 323|
@@ -492,8 +496,15 @@ static void build_subroutines(BuildCtx *ctx)
492 |7: // Less results wanted. 496 |7: // Less results wanted.
493 | subu TMP0, RD, TMP2 497 | subu TMP0, RD, TMP2
494 | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it. 498 | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it.
499 |.if MIPSR6
500 | selnez TMP0, TMP0, TMP2 // LUA_MULTRET+1 case?
501 | seleqz BASE, BASE, TMP2
502 | b <3
503 |. or BASE, BASE, TMP0
504 |.else
495 | b <3 505 | b <3
496 |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case? 506 |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case?
507 |.endif
497 | 508 |
498 |8: // Corner case: need to grow stack for filling up results. 509 |8: // Corner case: need to grow stack for filling up results.
499 | // This can happen if: 510 | // This can happen if:
@@ -1121,11 +1132,16 @@ static void build_subroutines(BuildCtx *ctx)
1121 |.endmacro 1132 |.endmacro
1122 | 1133 |
1123 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot! 1134 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot!
1135 |// MIPSR6: no delay slot, but a forbidden slot.
1124 |.macro ffgccheck 1136 |.macro ffgccheck
1125 | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) 1137 | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH)
1126 | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) 1138 | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
1127 | dsubu AT, TMP0, TMP1 1139 | dsubu AT, TMP0, TMP1
1140 |.if MIPSR6
1141 | bgezalc AT, ->fff_gcstep
1142 |.else
1128 | bgezal AT, ->fff_gcstep 1143 | bgezal AT, ->fff_gcstep
1144 |.endif
1129 |.endmacro 1145 |.endmacro
1130 | 1146 |
1131 |//-- Base library: checks ----------------------------------------------- 1147 |//-- Base library: checks -----------------------------------------------
@@ -1153,7 +1169,13 @@ static void build_subroutines(BuildCtx *ctx)
1153 | sltu TMP1, TISNUM, TMP0 1169 | sltu TMP1, TISNUM, TMP0
1154 | not TMP2, TMP0 1170 | not TMP2, TMP0
1155 | li TMP3, ~LJ_TISNUM 1171 | li TMP3, ~LJ_TISNUM
1172 |.if MIPSR6
1173 | selnez TMP2, TMP2, TMP1
1174 | seleqz TMP3, TMP3, TMP1
1175 | or TMP2, TMP2, TMP3
1176 |.else
1156 | movz TMP2, TMP3, TMP1 1177 | movz TMP2, TMP3, TMP1
1178 |.endif
1157 | dsll TMP2, TMP2, 3 1179 | dsll TMP2, TMP2, 3
1158 | daddu TMP2, CFUNC:RB, TMP2 1180 | daddu TMP2, CFUNC:RB, TMP2
1159 | b ->fff_restv 1181 | b ->fff_restv
@@ -1165,7 +1187,11 @@ static void build_subroutines(BuildCtx *ctx)
1165 | gettp TMP2, CARG1 1187 | gettp TMP2, CARG1
1166 | daddiu TMP0, TMP2, -LJ_TTAB 1188 | daddiu TMP0, TMP2, -LJ_TTAB
1167 | daddiu TMP1, TMP2, -LJ_TUDATA 1189 | daddiu TMP1, TMP2, -LJ_TUDATA
1190 |.if MIPSR6
1191 | selnez TMP0, TMP1, TMP0
1192 |.else
1168 | movn TMP0, TMP1, TMP0 1193 | movn TMP0, TMP1, TMP0
1194 |.endif
1169 | bnez TMP0, >6 1195 | bnez TMP0, >6
1170 |. cleartp TAB:CARG1 1196 |. cleartp TAB:CARG1
1171 |1: // Field metatable must be at same offset for GCtab and GCudata! 1197 |1: // Field metatable must be at same offset for GCtab and GCudata!
@@ -1204,7 +1230,13 @@ static void build_subroutines(BuildCtx *ctx)
1204 | 1230 |
1205 |6: 1231 |6:
1206 | sltiu AT, TMP2, LJ_TISNUM 1232 | sltiu AT, TMP2, LJ_TISNUM
1233 |.if MIPSR6
1234 | selnez TMP0, TISNUM, AT
1235 | seleqz AT, TMP2, AT
1236 | or TMP2, TMP0, AT
1237 |.else
1207 | movn TMP2, TISNUM, AT 1238 | movn TMP2, TISNUM, AT
1239 |.endif
1208 | dsll TMP2, TMP2, 3 1240 | dsll TMP2, TMP2, 3
1209 | dsubu TMP0, DISPATCH, TMP2 1241 | dsubu TMP0, DISPATCH, TMP2
1210 | b <2 1242 | b <2
@@ -1266,8 +1298,13 @@ static void build_subroutines(BuildCtx *ctx)
1266 | or TMP0, TMP0, TMP1 1298 | or TMP0, TMP0, TMP1
1267 | bnez TMP0, ->fff_fallback 1299 | bnez TMP0, ->fff_fallback
1268 |. sd BASE, L->base // Add frame since C call can throw. 1300 |. sd BASE, L->base // Add frame since C call can throw.
1301 |.if MIPSR6
1302 | sd PC, SAVE_PC // Redundant (but a defined value).
1303 | ffgccheck
1304 |.else
1269 | ffgccheck 1305 | ffgccheck
1270 |. sd PC, SAVE_PC // Redundant (but a defined value). 1306 |. sd PC, SAVE_PC // Redundant (but a defined value).
1307 |.endif
1271 | load_got lj_strfmt_number 1308 | load_got lj_strfmt_number
1272 | move CARG1, L 1309 | move CARG1, L
1273 | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) 1310 | call_intern lj_strfmt_number // (lua_State *L, cTValue *o)
@@ -1438,8 +1475,15 @@ static void build_subroutines(BuildCtx *ctx)
1438 | addiu AT, TMP0, -LUA_YIELD 1475 | addiu AT, TMP0, -LUA_YIELD
1439 | daddu CARG3, CARG2, TMP0 1476 | daddu CARG3, CARG2, TMP0
1440 | daddiu TMP3, CARG2, 8 1477 | daddiu TMP3, CARG2, 8
1478 |.if MIPSR6
1479 | seleqz CARG2, CARG2, AT
1480 | selnez TMP3, TMP3, AT
1481 | bgtz AT, ->fff_fallback // st > LUA_YIELD?
1482 |. or CARG2, TMP3, CARG2
1483 |.else
1441 | bgtz AT, ->fff_fallback // st > LUA_YIELD? 1484 | bgtz AT, ->fff_fallback // st > LUA_YIELD?
1442 |. movn CARG2, TMP3, AT 1485 |. movn CARG2, TMP3, AT
1486 |.endif
1443 | xor TMP2, TMP2, CARG3 1487 | xor TMP2, TMP2, CARG3
1444 | bnez TMP1, ->fff_fallback // cframe != 0? 1488 | bnez TMP1, ->fff_fallback // cframe != 0?
1445 |. or AT, TMP2, TMP0 1489 |. or AT, TMP2, TMP0
@@ -1751,7 +1795,7 @@ static void build_subroutines(BuildCtx *ctx)
1751 | b ->fff_res 1795 | b ->fff_res
1752 |. li RD, (2+1)*8 1796 |. li RD, (2+1)*8
1753 | 1797 |
1754 |.macro math_minmax, name, intins, fpins 1798 |.macro math_minmax, name, intins, intinsc, fpins
1755 | .ffunc_1 name 1799 | .ffunc_1 name
1756 | daddu TMP3, BASE, NARGS8:RC 1800 | daddu TMP3, BASE, NARGS8:RC
1757 | checkint CARG1, >5 1801 | checkint CARG1, >5
@@ -1763,7 +1807,13 @@ static void build_subroutines(BuildCtx *ctx)
1763 |. sextw CARG1, CARG1 1807 |. sextw CARG1, CARG1
1764 | lw CARG2, LO(TMP2) 1808 | lw CARG2, LO(TMP2)
1765 |. slt AT, CARG1, CARG2 1809 |. slt AT, CARG1, CARG2
1810 |.if MIPSR6
1811 | intins TMP1, CARG2, AT
1812 | intinsc CARG1, CARG1, AT
1813 | or CARG1, CARG1, TMP1
1814 |.else
1766 | intins CARG1, CARG2, AT 1815 | intins CARG1, CARG2, AT
1816 |.endif
1767 | daddiu TMP2, TMP2, 8 1817 | daddiu TMP2, TMP2, 8
1768 | zextw CARG1, CARG1 1818 | zextw CARG1, CARG1
1769 | b <1 1819 | b <1
@@ -1799,13 +1849,23 @@ static void build_subroutines(BuildCtx *ctx)
1799 |. nop 1849 |. nop
1800 |7: 1850 |7:
1801 |.if FPU 1851 |.if FPU
1852 |.if MIPSR6
1853 | fpins FRET1, FRET1, FARG1
1854 |.else
1802 | c.olt.d FRET1, FARG1 1855 | c.olt.d FRET1, FARG1
1803 | fpins FRET1, FARG1 1856 | fpins FRET1, FARG1
1857 |.endif
1804 |.else 1858 |.else
1805 | bal ->vm_sfcmpolt 1859 | bal ->vm_sfcmpolt
1806 |. nop 1860 |. nop
1861 |.if MIPSR6
1862 | intins AT, CARG2, CRET1
1863 | intinsc CARG1, CARG1, CRET1
1864 | or CARG1, CARG1, AT
1865 |.else
1807 | intins CARG1, CARG2, CRET1 1866 | intins CARG1, CARG2, CRET1
1808 |.endif 1867 |.endif
1868 |.endif
1809 | b <6 1869 | b <6
1810 |. daddiu TMP2, TMP2, 8 1870 |. daddiu TMP2, TMP2, 8
1811 | 1871 |
@@ -1825,8 +1885,13 @@ static void build_subroutines(BuildCtx *ctx)
1825 | 1885 |
1826 |.endmacro 1886 |.endmacro
1827 | 1887 |
1828 | math_minmax math_min, movz, movf.d 1888 |.if MIPSR6
1829 | math_minmax math_max, movn, movt.d 1889 | math_minmax math_min, seleqz, selnez, min.d
1890 | math_minmax math_max, selnez, seleqz, max.d
1891 |.else
1892 | math_minmax math_min, movz, _, movf.d
1893 | math_minmax math_max, movn, _, movt.d
1894 |.endif
1830 | 1895 |
1831 |//-- String library ----------------------------------------------------- 1896 |//-- String library -----------------------------------------------------
1832 | 1897 |
@@ -1851,7 +1916,9 @@ static void build_subroutines(BuildCtx *ctx)
1851 | 1916 |
1852 |.ffunc string_char // Only handle the 1-arg case here. 1917 |.ffunc string_char // Only handle the 1-arg case here.
1853 | ffgccheck 1918 | ffgccheck
1919 |.if not MIPSR6
1854 |. nop 1920 |. nop
1921 |.endif
1855 | ld CARG1, 0(BASE) 1922 | ld CARG1, 0(BASE)
1856 | gettp TMP0, CARG1 1923 | gettp TMP0, CARG1
1857 | xori AT, NARGS8:RC, 8 // Exactly 1 argument. 1924 | xori AT, NARGS8:RC, 8 // Exactly 1 argument.
@@ -1881,7 +1948,9 @@ static void build_subroutines(BuildCtx *ctx)
1881 | 1948 |
1882 |.ffunc string_sub 1949 |.ffunc string_sub
1883 | ffgccheck 1950 | ffgccheck
1951 |.if not MIPSR6
1884 |. nop 1952 |. nop
1953 |.endif
1885 | addiu AT, NARGS8:RC, -16 1954 | addiu AT, NARGS8:RC, -16
1886 | ld TMP0, 0(BASE) 1955 | ld TMP0, 0(BASE)
1887 | bltz AT, ->fff_fallback 1956 | bltz AT, ->fff_fallback
@@ -1904,8 +1973,30 @@ static void build_subroutines(BuildCtx *ctx)
1904 | addiu TMP0, CARG2, 1 1973 | addiu TMP0, CARG2, 1
1905 | addu TMP1, CARG4, TMP0 1974 | addu TMP1, CARG4, TMP0
1906 | slt TMP3, CARG3, r0 1975 | slt TMP3, CARG3, r0
1976 |.if MIPSR6
1977 | seleqz CARG4, CARG4, AT
1978 | selnez TMP1, TMP1, AT
1979 | or CARG4, TMP1, CARG4 // if (end < 0) end += len+1
1980 |.else
1907 | movn CARG4, TMP1, AT // if (end < 0) end += len+1 1981 | movn CARG4, TMP1, AT // if (end < 0) end += len+1
1982 |.endif
1908 | addu TMP1, CARG3, TMP0 1983 | addu TMP1, CARG3, TMP0
1984 |.if MIPSR6
1985 | selnez TMP1, TMP1, TMP3
1986 | seleqz CARG3, CARG3, TMP3
1987 | or CARG3, TMP1, CARG3 // if (start < 0) start += len+1
1988 | li TMP2, 1
1989 | slt AT, CARG4, r0
1990 | slt TMP3, r0, CARG3
1991 | seleqz CARG4, CARG4, AT // if (end < 0) end = 0
1992 | selnez CARG3, CARG3, TMP3
1993 | seleqz TMP2, TMP2, TMP3
1994 | or CARG3, TMP2, CARG3 // if (start < 1) start = 1
1995 | slt AT, CARG2, CARG4
1996 | seleqz CARG4, CARG4, AT
1997 | selnez CARG2, CARG2, AT
1998 | or CARG4, CARG2, CARG4 // if (end > len) end = len
1999 |.else
1909 | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1 2000 | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1
1910 | li TMP2, 1 2001 | li TMP2, 1
1911 | slt AT, CARG4, r0 2002 | slt AT, CARG4, r0
@@ -1914,6 +2005,7 @@ static void build_subroutines(BuildCtx *ctx)
1914 | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1 2005 | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1
1915 | slt AT, CARG2, CARG4 2006 | slt AT, CARG2, CARG4
1916 | movn CARG4, CARG2, AT // if (end > len) end = len 2007 | movn CARG4, CARG2, AT // if (end > len) end = len
2008 |.endif
1917 | daddu CARG2, STR:CARG1, CARG3 2009 | daddu CARG2, STR:CARG1, CARG3
1918 | subu CARG3, CARG4, CARG3 // len = end - start 2010 | subu CARG3, CARG4, CARG3 // len = end - start
1919 | daddiu CARG2, CARG2, sizeof(GCstr)-1 2011 | daddiu CARG2, CARG2, sizeof(GCstr)-1
@@ -1975,7 +2067,13 @@ static void build_subroutines(BuildCtx *ctx)
1975 | slt AT, CARG1, r0 2067 | slt AT, CARG1, r0
1976 | dsrlv CRET1, TMP0, CARG3 2068 | dsrlv CRET1, TMP0, CARG3
1977 | dsubu TMP0, r0, CRET1 2069 | dsubu TMP0, r0, CRET1
2070 |.if MIPSR6
2071 | selnez TMP0, TMP0, AT
2072 | seleqz CRET1, CRET1, AT
2073 | or CRET1, CRET1, TMP0
2074 |.else
1978 | movn CRET1, TMP0, AT 2075 | movn CRET1, TMP0, AT
2076 |.endif
1979 | jr ra 2077 | jr ra
1980 |. zextw CRET1, CRET1 2078 |. zextw CRET1, CRET1
1981 |1: 2079 |1:
@@ -1998,14 +2096,28 @@ static void build_subroutines(BuildCtx *ctx)
1998 | slt AT, CARG1, r0 2096 | slt AT, CARG1, r0
1999 | dsrlv CRET1, CRET2, TMP0 2097 | dsrlv CRET1, CRET2, TMP0
2000 | dsubu CARG1, r0, CRET1 2098 | dsubu CARG1, r0, CRET1
2099 |.if MIPSR6
2100 | seleqz CRET1, CRET1, AT
2101 | selnez CARG1, CARG1, AT
2102 | or CRET1, CRET1, CARG1
2103 |.else
2001 | movn CRET1, CARG1, AT 2104 | movn CRET1, CARG1, AT
2105 |.endif
2002 | li CARG1, 64 2106 | li CARG1, 64
2003 | subu TMP0, CARG1, TMP0 2107 | subu TMP0, CARG1, TMP0
2004 | dsllv CRET2, CRET2, TMP0 // Integer check. 2108 | dsllv CRET2, CRET2, TMP0 // Integer check.
2005 | sextw AT, CRET1 2109 | sextw AT, CRET1
2006 | xor AT, CRET1, AT // Range check. 2110 | xor AT, CRET1, AT // Range check.
2007 | jr ra 2111 | jr ra
2112 |.if MIPSR6
2113 | seleqz AT, AT, CRET2
2114 | selnez CRET2, CRET2, CRET2
2115 | jr ra
2116 |. or CRET2, AT, CRET2
2117 |.else
2118 | jr ra
2008 |. movz CRET2, AT, CRET2 2119 |. movz CRET2, AT, CRET2
2120 |.endif
2009 |1: 2121 |1:
2010 | jr ra 2122 | jr ra
2011 |. li CRET2, 1 2123 |. li CRET2, 1
@@ -2515,15 +2627,22 @@ static void build_subroutines(BuildCtx *ctx)
2515 | 2627 |
2516 |// Hard-float round to integer. 2628 |// Hard-float round to integer.
2517 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. 2629 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
2630 |// MIPSR6: Modifies FTMP1, too.
2518 |.macro vm_round_hf, func 2631 |.macro vm_round_hf, func
2519 | lui TMP0, 0x4330 // Hiword of 2^52 (double). 2632 | lui TMP0, 0x4330 // Hiword of 2^52 (double).
2520 | dsll TMP0, TMP0, 32 2633 | dsll TMP0, TMP0, 32
2521 | dmtc1 TMP0, f4 2634 | dmtc1 TMP0, f4
2522 | abs.d FRET2, FARG1 // |x| 2635 | abs.d FRET2, FARG1 // |x|
2523 | dmfc1 AT, FARG1 2636 | dmfc1 AT, FARG1
2637 |.if MIPSR6
2638 | cmp.lt.d FTMP1, FRET2, f4
2639 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52
2640 | bc1eqz FTMP1, >1 // Truncate only if |x| < 2^52.
2641 |.else
2524 | c.olt.d 0, FRET2, f4 2642 | c.olt.d 0, FRET2, f4
2525 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 2643 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52
2526 | bc1f 0, >1 // Truncate only if |x| < 2^52. 2644 | bc1f 0, >1 // Truncate only if |x| < 2^52.
2645 |.endif
2527 |. sub.d FRET1, FRET1, f4 2646 |. sub.d FRET1, FRET1, f4
2528 | slt AT, AT, r0 2647 | slt AT, AT, r0
2529 |.if "func" == "ceil" 2648 |.if "func" == "ceil"
@@ -2534,16 +2653,38 @@ static void build_subroutines(BuildCtx *ctx)
2534 |.if "func" == "trunc" 2653 |.if "func" == "trunc"
2535 | dsll TMP0, TMP0, 32 2654 | dsll TMP0, TMP0, 32
2536 | dmtc1 TMP0, f4 2655 | dmtc1 TMP0, f4
2656 |.if MIPSR6
2657 | cmp.lt.d FTMP1, FRET2, FRET1 // |x| < result?
2658 | sub.d FRET2, FRET1, f4
2659 | sel.d FTMP1, FRET1, FRET2 // If yes, subtract +1.
2660 | dmtc1 AT, FRET1
2661 | neg.d FRET2, FTMP1
2662 | jr ra
2663 |. sel.d FRET1, FTMP1, FRET2 // Merge sign bit back in.
2664 |.else
2537 | c.olt.d 0, FRET2, FRET1 // |x| < result? 2665 | c.olt.d 0, FRET2, FRET1 // |x| < result?
2538 | sub.d FRET2, FRET1, f4 2666 | sub.d FRET2, FRET1, f4
2539 | movt.d FRET1, FRET2, 0 // If yes, subtract +1. 2667 | movt.d FRET1, FRET2, 0 // If yes, subtract +1.
2540 | neg.d FRET2, FRET1 2668 | neg.d FRET2, FRET1
2541 | jr ra 2669 | jr ra
2542 |. movn.d FRET1, FRET2, AT // Merge sign bit back in. 2670 |. movn.d FRET1, FRET2, AT // Merge sign bit back in.
2671 |.endif
2543 |.else 2672 |.else
2544 | neg.d FRET2, FRET1 2673 | neg.d FRET2, FRET1
2545 | dsll TMP0, TMP0, 32 2674 | dsll TMP0, TMP0, 32
2546 | dmtc1 TMP0, f4 2675 | dmtc1 TMP0, f4
2676 |.if MIPSR6
2677 | dmtc1 AT, FTMP1
2678 | sel.d FTMP1, FRET1, FRET2
2679 |.if "func" == "ceil"
2680 | cmp.lt.d FRET1, FTMP1, FARG1 // x > result?
2681 |.else
2682 | cmp.lt.d FRET1, FARG1, FTMP1 // x < result?
2683 |.endif
2684 | sub.d FRET2, FTMP1, f4 // If yes, subtract +-1.
2685 | jr ra
2686 |. sel.d FRET1, FTMP1, FRET2
2687 |.else
2547 | movn.d FRET1, FRET2, AT // Merge sign bit back in. 2688 | movn.d FRET1, FRET2, AT // Merge sign bit back in.
2548 |.if "func" == "ceil" 2689 |.if "func" == "ceil"
2549 | c.olt.d 0, FRET1, FARG1 // x > result? 2690 | c.olt.d 0, FRET1, FARG1 // x > result?
@@ -2554,6 +2695,7 @@ static void build_subroutines(BuildCtx *ctx)
2554 | jr ra 2695 | jr ra
2555 |. movt.d FRET1, FRET2, 0 2696 |. movt.d FRET1, FRET2, 0
2556 |.endif 2697 |.endif
2698 |.endif
2557 |1: 2699 |1:
2558 | jr ra 2700 | jr ra
2559 |. mov.d FRET1, FARG1 2701 |. mov.d FRET1, FARG1
@@ -2698,7 +2840,7 @@ static void build_subroutines(BuildCtx *ctx)
2698 |. li CRET1, 0 2840 |. li CRET1, 0
2699 |.endif 2841 |.endif
2700 | 2842 |
2701 |.macro sfmin_max, name, intins 2843 |.macro sfmin_max, name, intins, intinsc
2702 |->vm_sf .. name: 2844 |->vm_sf .. name:
2703 |.if JIT and not FPU 2845 |.if JIT and not FPU
2704 | move TMP2, ra 2846 | move TMP2, ra
@@ -2707,13 +2849,25 @@ static void build_subroutines(BuildCtx *ctx)
2707 | move ra, TMP2 2849 | move ra, TMP2
2708 | move TMP0, CRET1 2850 | move TMP0, CRET1
2709 | move CRET1, CARG1 2851 | move CRET1, CARG1
2852 |.if MIPSR6
2853 | intins CRET1, CRET1, TMP0
2854 | intinsc TMP0, CARG2, TMP0
2855 | jr ra
2856 |. or CRET1, CRET1, TMP0
2857 |.else
2710 | jr ra 2858 | jr ra
2711 |. intins CRET1, CARG2, TMP0 2859 |. intins CRET1, CARG2, TMP0
2712 |.endif 2860 |.endif
2861 |.endif
2713 |.endmacro 2862 |.endmacro
2714 | 2863 |
2715 | sfmin_max min, movz 2864 |.if MIPSR6
2716 | sfmin_max max, movn 2865 | sfmin_max min, selnez, seleqz
2866 | sfmin_max max, seleqz, selnez
2867 |.else
2868 | sfmin_max min, movz, _
2869 | sfmin_max max, movn, _
2870 |.endif
2717 | 2871 |
2718 |//----------------------------------------------------------------------- 2872 |//-----------------------------------------------------------------------
2719 |//-- Miscellaneous functions -------------------------------------------- 2873 |//-- Miscellaneous functions --------------------------------------------
@@ -2882,7 +3036,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2882 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3036 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2883 | slt AT, CARG1, CARG2 3037 | slt AT, CARG1, CARG2
2884 | addu TMP2, TMP2, TMP3 3038 | addu TMP2, TMP2, TMP3
3039 |.if MIPSR6
3040 | movop TMP2, TMP2, AT
3041 |.else
2885 | movop TMP2, r0, AT 3042 | movop TMP2, r0, AT
3043 |.endif
2886 |1: 3044 |1:
2887 | daddu PC, PC, TMP2 3045 | daddu PC, PC, TMP2
2888 | ins_next 3046 | ins_next
@@ -2900,16 +3058,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2900 |.endif 3058 |.endif
2901 |3: // RA and RD are both numbers. 3059 |3: // RA and RD are both numbers.
2902 |.if FPU 3060 |.if FPU
2903 | fcomp f20, f22 3061 |.if MIPSR6
3062 | fcomp FTMP0, FTMP0, FTMP2
3063 | addu TMP2, TMP2, TMP3
3064 | mfc1 TMP3, FTMP0
3065 | b <1
3066 |. fmovop TMP2, TMP2, TMP3
3067 |.else
3068 | fcomp FTMP0, FTMP2
2904 | addu TMP2, TMP2, TMP3 3069 | addu TMP2, TMP2, TMP3
2905 | b <1 3070 | b <1
2906 |. fmovop TMP2, r0 3071 |. fmovop TMP2, r0
3072 |.endif
2907 |.else 3073 |.else
2908 | bal sfcomp 3074 | bal sfcomp
2909 |. addu TMP2, TMP2, TMP3 3075 |. addu TMP2, TMP2, TMP3
2910 | b <1 3076 | b <1
3077 |.if MIPSR6
3078 |. movop TMP2, TMP2, CRET1
3079 |.else
2911 |. movop TMP2, r0, CRET1 3080 |. movop TMP2, r0, CRET1
2912 |.endif 3081 |.endif
3082 |.endif
2913 | 3083 |
2914 |4: // RA is a number, RD is not a number. 3084 |4: // RA is a number, RD is not a number.
2915 | bne CARG4, TISNUM, ->vmeta_comp 3085 | bne CARG4, TISNUM, ->vmeta_comp
@@ -2956,15 +3126,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2956 |.endif 3126 |.endif
2957 |.endmacro 3127 |.endmacro
2958 | 3128 |
3129 |.if MIPSR6
3130 if (op == BC_ISLT) {
3131 | bc_comp FTMP0, FTMP2, CARG1, CARG2, selnez, selnez, cmp.lt.d, ->vm_sfcmpolt
3132 } else if (op == BC_ISGE) {
3133 | bc_comp FTMP0, FTMP2, CARG1, CARG2, seleqz, seleqz, cmp.lt.d, ->vm_sfcmpolt
3134 } else if (op == BC_ISLE) {
3135 | bc_comp FTMP2, FTMP0, CARG2, CARG1, seleqz, seleqz, cmp.ult.d, ->vm_sfcmpult
3136 } else {
3137 | bc_comp FTMP2, FTMP0, CARG2, CARG1, selnez, selnez, cmp.ult.d, ->vm_sfcmpult
3138 }
3139 |.else
2959 if (op == BC_ISLT) { 3140 if (op == BC_ISLT) {
2960 | bc_comp f20, f22, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt 3141 | bc_comp FTMP0, FTMP2, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt
2961 } else if (op == BC_ISGE) { 3142 } else if (op == BC_ISGE) {
2962 | bc_comp f20, f22, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt 3143 | bc_comp FTMP0, FTMP2, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt
2963 } else if (op == BC_ISLE) { 3144 } else if (op == BC_ISLE) {
2964 | bc_comp f22, f20, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult 3145 | bc_comp FTMP2, FTMP0, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult
2965 } else { 3146 } else {
2966 | bc_comp f22, f20, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult 3147 | bc_comp FTMP2, FTMP0, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult
2967 } 3148 }
3149 |.endif
2968 break; 3150 break;
2969 3151
2970 case BC_ISEQV: case BC_ISNEV: 3152 case BC_ISEQV: case BC_ISNEV:
@@ -3010,7 +3192,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3010 |2: // Check if the tags are the same and it's a table or userdata. 3192 |2: // Check if the tags are the same and it's a table or userdata.
3011 | xor AT, CARG3, CARG4 // Same type? 3193 | xor AT, CARG3, CARG4 // Same type?
3012 | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? 3194 | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata?
3195 |.if MIPSR6
3196 | seleqz TMP0, TMP0, AT
3197 |.else
3013 | movn TMP0, r0, AT 3198 | movn TMP0, r0, AT
3199 |.endif
3014 if (vk) { 3200 if (vk) {
3015 | beqz TMP0, <1 3201 | beqz TMP0, <1
3016 } else { 3202 } else {
@@ -3060,11 +3246,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3060 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3246 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3061 | xor TMP1, CARG1, CARG2 3247 | xor TMP1, CARG1, CARG2
3062 | addu TMP2, TMP2, TMP3 3248 | addu TMP2, TMP2, TMP3
3249 |.if MIPSR6
3250 if (vk) {
3251 | seleqz TMP2, TMP2, TMP1
3252 } else {
3253 | selnez TMP2, TMP2, TMP1
3254 }
3255 |.else
3063 if (vk) { 3256 if (vk) {
3064 | movn TMP2, r0, TMP1 3257 | movn TMP2, r0, TMP1
3065 } else { 3258 } else {
3066 | movz TMP2, r0, TMP1 3259 | movz TMP2, r0, TMP1
3067 } 3260 }
3261 |.endif
3068 | daddu PC, PC, TMP2 3262 | daddu PC, PC, TMP2
3069 | ins_next 3263 | ins_next
3070 break; 3264 break;
@@ -3091,6 +3285,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3091 | bne CARG4, TISNUM, >6 3285 | bne CARG4, TISNUM, >6
3092 |. addu TMP2, TMP2, TMP3 3286 |. addu TMP2, TMP2, TMP3
3093 | xor AT, CARG1, CARG2 3287 | xor AT, CARG1, CARG2
3288 |.if MIPSR6
3289 if (vk) {
3290 | seleqz TMP2, TMP2, AT
3291 |1:
3292 | daddu PC, PC, TMP2
3293 |2:
3294 } else {
3295 | selnez TMP2, TMP2, AT
3296 |1:
3297 |2:
3298 | daddu PC, PC, TMP2
3299 }
3300 |.else
3094 if (vk) { 3301 if (vk) {
3095 | movn TMP2, r0, AT 3302 | movn TMP2, r0, AT
3096 |1: 3303 |1:
@@ -3102,6 +3309,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3102 |2: 3309 |2:
3103 | daddu PC, PC, TMP2 3310 | daddu PC, PC, TMP2
3104 } 3311 }
3312 |.endif
3105 | ins_next 3313 | ins_next
3106 | 3314 |
3107 |3: // RA is not an integer. 3315 |3: // RA is not an integer.
@@ -3114,30 +3322,49 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3114 |. addu TMP2, TMP2, TMP3 3322 |. addu TMP2, TMP2, TMP3
3115 | sltu AT, CARG4, TISNUM 3323 | sltu AT, CARG4, TISNUM
3116 |.if FPU 3324 |.if FPU
3117 | ldc1 f20, 0(RA) 3325 | ldc1 FTMP0, 0(RA)
3118 | ldc1 f22, 0(RD) 3326 | ldc1 FTMP2, 0(RD)
3119 |.endif 3327 |.endif
3120 | beqz AT, >5 3328 | beqz AT, >5
3121 |. nop 3329 |. nop
3122 |4: // RA and RD are both numbers. 3330 |4: // RA and RD are both numbers.
3123 |.if FPU 3331 |.if FPU
3124 | c.eq.d f20, f22 3332 |.if MIPSR6
3333 | cmp.eq.d FTMP0, FTMP0, FTMP2
3334 | dmfc1 TMP1, FTMP0
3335 | b <1
3336 if (vk) {
3337 |. selnez TMP2, TMP2, TMP1
3338 } else {
3339 |. seleqz TMP2, TMP2, TMP1
3340 }
3341 |.else
3342 | c.eq.d FTMP0, FTMP2
3125 | b <1 3343 | b <1
3126 if (vk) { 3344 if (vk) {
3127 |. movf TMP2, r0 3345 |. movf TMP2, r0
3128 } else { 3346 } else {
3129 |. movt TMP2, r0 3347 |. movt TMP2, r0
3130 } 3348 }
3349 |.endif
3131 |.else 3350 |.else
3132 | bal ->vm_sfcmpeq 3351 | bal ->vm_sfcmpeq
3133 |. nop 3352 |. nop
3134 | b <1 3353 | b <1
3354 |.if MIPSR6
3355 if (vk) {
3356 |. selnez TMP2, TMP2, CRET1
3357 } else {
3358 |. seleqz TMP2, TMP2, CRET1
3359 }
3360 |.else
3135 if (vk) { 3361 if (vk) {
3136 |. movz TMP2, r0, CRET1 3362 |. movz TMP2, r0, CRET1
3137 } else { 3363 } else {
3138 |. movn TMP2, r0, CRET1 3364 |. movn TMP2, r0, CRET1
3139 } 3365 }
3140 |.endif 3366 |.endif
3367 |.endif
3141 | 3368 |
3142 |5: // RA is a number, RD is not a number. 3369 |5: // RA is a number, RD is not a number.
3143 |.if FFI 3370 |.if FFI
@@ -3147,9 +3374,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3147 |.endif 3374 |.endif
3148 | // RA is a number, RD is an integer. Convert RD to a number. 3375 | // RA is a number, RD is an integer. Convert RD to a number.
3149 |.if FPU 3376 |.if FPU
3150 |. lwc1 f22, LO(RD) 3377 |. lwc1 FTMP2, LO(RD)
3151 | b <4 3378 | b <4
3152 |. cvt.d.w f22, f22 3379 |. cvt.d.w FTMP2, FTMP2
3153 |.else 3380 |.else
3154 |. sextw CARG2, CARG2 3381 |. sextw CARG2, CARG2
3155 | bal ->vm_sfi2d_2 3382 | bal ->vm_sfi2d_2
@@ -3167,10 +3394,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3167 |.endif 3394 |.endif
3168 | // RA is an integer, RD is a number. Convert RA to a number. 3395 | // RA is an integer, RD is a number. Convert RA to a number.
3169 |.if FPU 3396 |.if FPU
3170 |. lwc1 f20, LO(RA) 3397 |. lwc1 FTMP0, LO(RA)
3171 | ldc1 f22, 0(RD) 3398 | ldc1 FTMP2, 0(RD)
3172 | b <4 3399 | b <4
3173 | cvt.d.w f20, f20 3400 | cvt.d.w FTMP0, FTMP0
3174 |.else 3401 |.else
3175 |. sextw CARG1, CARG1 3402 |. sextw CARG1, CARG1
3176 | bal ->vm_sfi2d_1 3403 | bal ->vm_sfi2d_1
@@ -3213,11 +3440,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3213 | decode_RD4b TMP2 3440 | decode_RD4b TMP2
3214 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3441 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3215 | addu TMP2, TMP2, TMP3 3442 | addu TMP2, TMP2, TMP3
3443 |.if MIPSR6
3444 if (vk) {
3445 | seleqz TMP2, TMP2, TMP0
3446 } else {
3447 | selnez TMP2, TMP2, TMP0
3448 }
3449 |.else
3216 if (vk) { 3450 if (vk) {
3217 | movn TMP2, r0, TMP0 3451 | movn TMP2, r0, TMP0
3218 } else { 3452 } else {
3219 | movz TMP2, r0, TMP0 3453 | movz TMP2, r0, TMP0
3220 } 3454 }
3455 |.endif
3221 | daddu PC, PC, TMP2 3456 | daddu PC, PC, TMP2
3222 | ins_next 3457 | ins_next
3223 break; 3458 break;
@@ -3236,11 +3471,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3236 | decode_RD4b TMP2 3471 | decode_RD4b TMP2
3237 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3472 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3238 | addu TMP2, TMP2, TMP3 3473 | addu TMP2, TMP2, TMP3
3474 |.if MIPSR6
3475 if (op == BC_IST) {
3476 | selnez TMP2, TMP2, TMP0;
3477 } else {
3478 | seleqz TMP2, TMP2, TMP0;
3479 }
3480 |.else
3239 if (op == BC_IST) { 3481 if (op == BC_IST) {
3240 | movz TMP2, r0, TMP0 3482 | movz TMP2, r0, TMP0
3241 } else { 3483 } else {
3242 | movn TMP2, r0, TMP0 3484 | movn TMP2, r0, TMP0
3243 } 3485 }
3486 |.endif
3244 | daddu PC, PC, TMP2 3487 | daddu PC, PC, TMP2
3245 } else { 3488 } else {
3246 | ld CRET1, 0(RD) 3489 | ld CRET1, 0(RD)
@@ -3483,9 +3726,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3483 | bltz TMP1, ->vmeta_arith 3726 | bltz TMP1, ->vmeta_arith
3484 |. daddu RA, BASE, RA 3727 |. daddu RA, BASE, RA
3485 |.elif "intins" == "mult" 3728 |.elif "intins" == "mult"
3729 |.if MIPSR6
3730 |. nop
3731 | mul CRET1, CARG3, CARG4
3732 | muh TMP2, CARG3, CARG4
3733 |.else
3486 |. intins CARG3, CARG4 3734 |. intins CARG3, CARG4
3487 | mflo CRET1 3735 | mflo CRET1
3488 | mfhi TMP2 3736 | mfhi TMP2
3737 |.endif
3489 | sra TMP1, CRET1, 31 3738 | sra TMP1, CRET1, 31
3490 | bne TMP1, TMP2, ->vmeta_arith 3739 | bne TMP1, TMP2, ->vmeta_arith
3491 |. daddu RA, BASE, RA 3740 |. daddu RA, BASE, RA
@@ -3508,16 +3757,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3508 |.endif 3757 |.endif
3509 | 3758 |
3510 |5: // Check for two numbers. 3759 |5: // Check for two numbers.
3511 | .FPU ldc1 f20, 0(RB) 3760 | .FPU ldc1 FTMP0, 0(RB)
3512 | sltu AT, TMP0, TISNUM 3761 | sltu AT, TMP0, TISNUM
3513 | sltu TMP0, TMP1, TISNUM 3762 | sltu TMP0, TMP1, TISNUM
3514 | .FPU ldc1 f22, 0(RC) 3763 | .FPU ldc1 FTMP2, 0(RC)
3515 | and AT, AT, TMP0 3764 | and AT, AT, TMP0
3516 | beqz AT, ->vmeta_arith 3765 | beqz AT, ->vmeta_arith
3517 |. daddu RA, BASE, RA 3766 |. daddu RA, BASE, RA
3518 | 3767 |
3519 |.if FPU 3768 |.if FPU
3520 | fpins FRET1, f20, f22 3769 | fpins FRET1, FTMP0, FTMP2
3521 |.elif "fpcall" == "sfpmod" 3770 |.elif "fpcall" == "sfpmod"
3522 | sfpmod 3771 | sfpmod
3523 |.else 3772 |.else
@@ -3847,7 +4096,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3847 | li TMP0, 0x801 4096 | li TMP0, 0x801
3848 | addiu AT, CARG2, -0x7ff 4097 | addiu AT, CARG2, -0x7ff
3849 | srl CARG3, RD, 14 4098 | srl CARG3, RD, 14
4099 |.if MIPSR6
4100 | seleqz TMP0, TMP0, AT
4101 | selnez CARG2, CARG2, AT
4102 | or CARG2, CARG2, TMP0
4103 |.else
3850 | movz CARG2, TMP0, AT 4104 | movz CARG2, TMP0, AT
4105 |.endif
3851 | // (lua_State *L, int32_t asize, uint32_t hbits) 4106 | // (lua_State *L, int32_t asize, uint32_t hbits)
3852 | call_intern lj_tab_new 4107 | call_intern lj_tab_new
3853 |. move CARG1, L 4108 |. move CARG1, L
@@ -4128,7 +4383,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4128 | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 4383 | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
4129 | settp STR:RC, TMP3 // Tagged key to look for. 4384 | settp STR:RC, TMP3 // Tagged key to look for.
4130 |.if FPU 4385 |.if FPU
4131 | ldc1 f20, 0(RA) 4386 | ldc1 FTMP0, 0(RA)
4132 |.else 4387 |.else
4133 | ld CRET1, 0(RA) 4388 | ld CRET1, 0(RA)
4134 |.endif 4389 |.endif
@@ -4144,7 +4399,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4144 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4399 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4145 | bnez AT, >7 4400 | bnez AT, >7
4146 |.if FPU 4401 |.if FPU
4147 |. sdc1 f20, NODE:TMP2->val 4402 |. sdc1 FTMP0, NODE:TMP2->val
4148 |.else 4403 |.else
4149 |. sd CRET1, NODE:TMP2->val 4404 |. sd CRET1, NODE:TMP2->val
4150 |.endif 4405 |.endif
@@ -4185,7 +4440,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4185 | ld BASE, L->base 4440 | ld BASE, L->base
4186 |.if FPU 4441 |.if FPU
4187 | b <3 // No 2nd write barrier needed. 4442 | b <3 // No 2nd write barrier needed.
4188 |. sdc1 f20, 0(CRET1) 4443 |. sdc1 FTMP0, 0(CRET1)
4189 |.else 4444 |.else
4190 | ld CARG1, 0(RA) 4445 | ld CARG1, 0(RA)
4191 | b <3 // No 2nd write barrier needed. 4446 | b <3 // No 2nd write barrier needed.
@@ -4528,7 +4783,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4528 | ld CARG1, 0(RC) 4783 | ld CARG1, 0(RC)
4529 | sltu AT, RC, TMP3 4784 | sltu AT, RC, TMP3
4530 | daddiu RC, RC, 8 4785 | daddiu RC, RC, 8
4786 |.if MIPSR6
4787 | selnez CARG1, CARG1, AT
4788 | seleqz AT, TISNIL, AT
4789 | or CARG1, CARG1, AT
4790 |.else
4531 | movz CARG1, TISNIL, AT 4791 | movz CARG1, TISNIL, AT
4792 |.endif
4532 | sd CARG1, 0(RA) 4793 | sd CARG1, 0(RA)
4533 | sltu AT, RA, TMP2 4794 | sltu AT, RA, TMP2
4534 | bnez AT, <1 4795 | bnez AT, <1
@@ -4717,7 +4978,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4717 | dext AT, CRET1, 31, 0 4978 | dext AT, CRET1, 31, 0
4718 | slt CRET1, CARG2, CARG3 4979 | slt CRET1, CARG2, CARG3
4719 | slt TMP1, CARG3, CARG2 4980 | slt TMP1, CARG3, CARG2
4981 |.if MIPSR6
4982 | selnez TMP1, TMP1, AT
4983 | seleqz CRET1, CRET1, AT
4984 | or CRET1, CRET1, TMP1
4985 |.else
4720 | movn CRET1, TMP1, AT 4986 | movn CRET1, TMP1, AT
4987 |.endif
4721 } else { 4988 } else {
4722 | bne CARG3, TISNUM, >5 4989 | bne CARG3, TISNUM, >5
4723 |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type 4990 |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type
@@ -4733,20 +5000,34 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4733 | slt CRET1, CRET1, CARG1 5000 | slt CRET1, CRET1, CARG1
4734 | slt AT, CARG2, r0 5001 | slt AT, CARG2, r0
4735 | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. 5002 | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow.
5003 |.if MIPSR6
5004 | selnez TMP1, TMP1, AT
5005 | seleqz CRET1, CRET1, AT
5006 | or CRET1, CRET1, TMP1
5007 |.else
4736 | movn CRET1, TMP1, AT 5008 | movn CRET1, TMP1, AT
5009 |.endif
4737 | or CRET1, CRET1, TMP0 5010 | or CRET1, CRET1, TMP0
4738 | zextw CARG1, CARG1 5011 | zextw CARG1, CARG1
4739 | settp CARG1, TISNUM 5012 | settp CARG1, TISNUM
4740 } 5013 }
4741 |1: 5014 |1:
4742 if (op == BC_FORI) { 5015 if (op == BC_FORI) {
5016 |.if MIPSR6
5017 | selnez TMP2, TMP2, CRET1
5018 |.else
4743 | movz TMP2, r0, CRET1 5019 | movz TMP2, r0, CRET1
5020 |.endif
4744 | daddu PC, PC, TMP2 5021 | daddu PC, PC, TMP2
4745 } else if (op == BC_JFORI) { 5022 } else if (op == BC_JFORI) {
4746 | daddu PC, PC, TMP2 5023 | daddu PC, PC, TMP2
4747 | lhu RD, -4+OFS_RD(PC) 5024 | lhu RD, -4+OFS_RD(PC)
4748 } else if (op == BC_IFORL) { 5025 } else if (op == BC_IFORL) {
5026 |.if MIPSR6
5027 | seleqz TMP2, TMP2, CRET1
5028 |.else
4749 | movn TMP2, r0, CRET1 5029 | movn TMP2, r0, CRET1
5030 |.endif
4750 | daddu PC, PC, TMP2 5031 | daddu PC, PC, TMP2
4751 } 5032 }
4752 if (vk) { 5033 if (vk) {
@@ -4776,6 +5057,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4776 | and AT, AT, TMP0 5057 | and AT, AT, TMP0
4777 | beqz AT, ->vmeta_for 5058 | beqz AT, ->vmeta_for
4778 |. slt TMP3, TMP3, r0 5059 |. slt TMP3, TMP3, r0
5060 |.if MIPSR6
5061 | dmtc1 TMP3, FTMP2
5062 | cmp.lt.d FTMP0, f0, f2
5063 | cmp.lt.d FTMP1, f2, f0
5064 | sel.d FTMP2, FTMP1, FTMP0
5065 | b <1
5066 |. dmfc1 CRET1, FTMP2
5067 |.else
4779 | c.ole.d 0, f0, f2 5068 | c.ole.d 0, f0, f2
4780 | c.ole.d 1, f2, f0 5069 | c.ole.d 1, f2, f0
4781 | li CRET1, 1 5070 | li CRET1, 1
@@ -4783,12 +5072,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4783 | movt AT, r0, 1 5072 | movt AT, r0, 1
4784 | b <1 5073 | b <1
4785 |. movn CRET1, AT, TMP3 5074 |. movn CRET1, AT, TMP3
5075 |.endif
4786 } else { 5076 } else {
4787 | ldc1 f0, FORL_IDX*8(RA) 5077 | ldc1 f0, FORL_IDX*8(RA)
4788 | ldc1 f4, FORL_STEP*8(RA) 5078 | ldc1 f4, FORL_STEP*8(RA)
4789 | ldc1 f2, FORL_STOP*8(RA) 5079 | ldc1 f2, FORL_STOP*8(RA)
4790 | ld TMP3, FORL_STEP*8(RA) 5080 | ld TMP3, FORL_STEP*8(RA)
4791 | add.d f0, f0, f4 5081 | add.d f0, f0, f4
5082 |.if MIPSR6
5083 | slt TMP3, TMP3, r0
5084 | dmtc1 TMP3, FTMP2
5085 | cmp.lt.d FTMP0, f0, f2
5086 | cmp.lt.d FTMP1, f2, f0
5087 | sel.d FTMP2, FTMP1, FTMP0
5088 | dmfc1 CRET1, FTMP2
5089 if (op == BC_IFORL) {
5090 | seleqz TMP2, TMP2, CRET1
5091 | daddu PC, PC, TMP2
5092 }
5093 |.else
4792 | c.ole.d 0, f0, f2 5094 | c.ole.d 0, f0, f2
4793 | c.ole.d 1, f2, f0 5095 | c.ole.d 1, f2, f0
4794 | slt TMP3, TMP3, r0 5096 | slt TMP3, TMP3, r0
@@ -4801,6 +5103,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4801 | movn TMP2, r0, CRET1 5103 | movn TMP2, r0, CRET1
4802 | daddu PC, PC, TMP2 5104 | daddu PC, PC, TMP2
4803 } 5105 }
5106 |.endif
4804 | sdc1 f0, FORL_IDX*8(RA) 5107 | sdc1 f0, FORL_IDX*8(RA)
4805 | ins_next1 5108 | ins_next1
4806 | b <2 5109 | b <2
@@ -4976,8 +5279,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4976 | ld TMP0, 0(RA) 5279 | ld TMP0, 0(RA)
4977 | sltu AT, RA, RC // Less args than parameters? 5280 | sltu AT, RA, RC // Less args than parameters?
4978 | move CARG1, TMP0 5281 | move CARG1, TMP0
5282 |.if MIPSR6
5283 | selnez TMP0, TMP0, AT
5284 | seleqz TMP3, TISNIL, AT
5285 | or TMP0, TMP0, TMP3
5286 | seleqz TMP3, CARG1, AT
5287 | selnez CARG1, TISNIL, AT
5288 | or CARG1, CARG1, TMP3
5289 |.else
4979 | movz TMP0, TISNIL, AT // Clear missing parameters. 5290 | movz TMP0, TISNIL, AT // Clear missing parameters.
4980 | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC). 5291 | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC).
5292 |.endif
4981 | addiu TMP2, TMP2, -1 5293 | addiu TMP2, TMP2, -1
4982 | sd TMP0, 16(TMP1) 5294 | sd TMP0, 16(TMP1)
4983 | daddiu TMP1, TMP1, 8 5295 | daddiu TMP1, TMP1, 8