diff options
Diffstat (limited to 'src/buildvm_x86.dasc')
-rw-r--r-- | src/buildvm_x86.dasc | 51 |
1 files changed, 38 insertions, 13 deletions
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc index 7167ffb1..fa9bf9fb 100644 --- a/src/buildvm_x86.dasc +++ b/src/buildvm_x86.dasc | |||
@@ -2128,15 +2128,25 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2128 | | fistp TMP2 | 2128 | | fistp TMP2 |
2129 | | cmp TMP2, 255; ja ->fff_fallback | 2129 | | cmp TMP2, 255; ja ->fff_fallback |
2130 | } | 2130 | } |
2131 | | lea RC, TMP2 // Little-endian. | 2131 | |.if X64 |
2132 | | mov TMP1, RA // Save RA. | 2132 | | mov TMP3, 1 |
2133 | |.else | ||
2133 | | mov ARG3, 1 | 2134 | | mov ARG3, 1 |
2134 | | mov ARG2, RC | 2135 | |.endif |
2136 | | lea RDa, TMP2 // Points to stack. Little-endian. | ||
2137 | | mov TMP1, RA // Save RA. | ||
2135 | |->fff_newstr: | 2138 | |->fff_newstr: |
2136 | | mov L:RB, SAVE_L | 2139 | | mov L:RB, SAVE_L |
2140 | | mov L:RB->base, BASE | ||
2141 | |.if X64 | ||
2142 | | mov CARG3d, TMP3 // Zero-extended to size_t. | ||
2143 | | mov CARG2, RDa // May be 64 bit ptr to stack. | ||
2144 | | mov CARG1d, L:RB | ||
2145 | |.else | ||
2146 | | mov ARG2, RD | ||
2137 | | mov ARG1, L:RB | 2147 | | mov ARG1, L:RB |
2148 | |.endif | ||
2138 | | mov SAVE_PC, PC | 2149 | | mov SAVE_PC, PC |
2139 | | mov L:RB->base, BASE | ||
2140 | | call extern lj_str_new // (lua_State *L, char *str, size_t l) | 2150 | | call extern lj_str_new // (lua_State *L, char *str, size_t l) |
2141 | | // GCstr * returned in eax (RC). | 2151 | | // GCstr * returned in eax (RC). |
2142 | | mov RA, TMP1 | 2152 | | mov RA, TMP1 |
@@ -2163,33 +2173,36 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2163 | | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback | 2173 | | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback |
2164 | | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback | 2174 | | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback |
2165 | | mov STR:RB, [RA] | 2175 | | mov STR:RB, [RA] |
2166 | | mov ARG2, STR:RB | 2176 | | mov TMP3, STR:RB |
2167 | | mov RB, STR:RB->len | 2177 | | mov RB, STR:RB->len |
2168 | if (sse) { | 2178 | if (sse) { |
2169 | | cvtsd2si RC, qword [RA+8] | 2179 | | cvtsd2si RA, qword [RA+8] |
2170 | | mov ARG3, RC | ||
2171 | } else { | 2180 | } else { |
2172 | |.if not X64 | 2181 | |.if not X64 |
2173 | | fld qword [RA+8] | 2182 | | fld qword [RA+8] |
2174 | | fistp ARG3 | 2183 | | fistp ARG3 |
2184 | | mov RA, ARG3 | ||
2175 | |.endif | 2185 | |.endif |
2176 | } | 2186 | } |
2177 | | mov RC, TMP2 | 2187 | | mov RC, TMP2 |
2178 | | cmp RB, RC // len < end? (unsigned compare) | 2188 | | cmp RB, RC // len < end? (unsigned compare) |
2179 | | jb >5 | 2189 | | jb >5 |
2180 | |2: | 2190 | |2: |
2181 | | mov RA, ARG3 | ||
2182 | | test RA, RA // start <= 0? | 2191 | | test RA, RA // start <= 0? |
2183 | | jle >7 | 2192 | | jle >7 |
2184 | |3: | 2193 | |3: |
2185 | | mov STR:RB, ARG2 | 2194 | | mov STR:RB, TMP3 |
2186 | | sub RC, RA // start > end? | 2195 | | sub RC, RA // start > end? |
2187 | | jl ->fff_emptystr | 2196 | | jl ->fff_emptystr |
2188 | | lea RB, [STR:RB+RA+#STR-1] | 2197 | | lea RB, [STR:RB+RA+#STR-1] |
2189 | | add RC, 1 | 2198 | | add RC, 1 |
2190 | |4: | 2199 | |4: |
2191 | | mov ARG2, RB | 2200 | |.if X64 |
2201 | | mov TMP3, RC | ||
2202 | |.else | ||
2192 | | mov ARG3, RC | 2203 | | mov ARG3, RC |
2204 | |.endif | ||
2205 | | mov RD, RB | ||
2193 | | jmp ->fff_newstr | 2206 | | jmp ->fff_newstr |
2194 | | | 2207 | | |
2195 | |5: // Negative end or overflow. | 2208 | |5: // Negative end or overflow. |
@@ -2234,13 +2247,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2234 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2 | 2247 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2 |
2235 | | movzx RA, byte STR:RB[1] | 2248 | | movzx RA, byte STR:RB[1] |
2236 | | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | 2249 | | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] |
2250 | |.if X64 | ||
2251 | | mov TMP3, RC | ||
2252 | |.else | ||
2237 | | mov ARG3, RC | 2253 | | mov ARG3, RC |
2238 | | mov ARG2, RB | 2254 | |.endif |
2239 | |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). | 2255 | |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). |
2240 | | mov [RB], RAL | 2256 | | mov [RB], RAL |
2241 | | add RB, 1 | 2257 | | add RB, 1 |
2242 | | sub RC, 1 | 2258 | | sub RC, 1 |
2243 | | jnz <1 | 2259 | | jnz <1 |
2260 | | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
2244 | | jmp ->fff_newstr | 2261 | | jmp ->fff_newstr |
2245 | | | 2262 | | |
2246 | |.ffunc_1 string_reverse | 2263 | |.ffunc_1 string_reverse |
@@ -2254,15 +2271,19 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2254 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 | 2271 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 |
2255 | | add RB, #STR | 2272 | | add RB, #STR |
2256 | | mov TMP2, PC // Need another temp register. | 2273 | | mov TMP2, PC // Need another temp register. |
2274 | |.if X64 | ||
2275 | | mov TMP3, RC | ||
2276 | |.else | ||
2257 | | mov ARG3, RC | 2277 | | mov ARG3, RC |
2278 | |.endif | ||
2258 | | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | 2279 | | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] |
2259 | | mov ARG2, PC | ||
2260 | |1: | 2280 | |1: |
2261 | | movzx RA, byte [RB] | 2281 | | movzx RA, byte [RB] |
2262 | | add RB, 1 | 2282 | | add RB, 1 |
2263 | | sub RC, 1 | 2283 | | sub RC, 1 |
2264 | | mov [PC+RC], RAL | 2284 | | mov [PC+RC], RAL |
2265 | | jnz <1 | 2285 | | jnz <1 |
2286 | | mov RD, PC | ||
2266 | | mov PC, TMP2 | 2287 | | mov PC, TMP2 |
2267 | | jmp ->fff_newstr | 2288 | | jmp ->fff_newstr |
2268 | | | 2289 | | |
@@ -2276,9 +2297,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2276 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 | 2297 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 |
2277 | | add RB, #STR | 2298 | | add RB, #STR |
2278 | | mov TMP2, PC // Need another temp register. | 2299 | | mov TMP2, PC // Need another temp register. |
2300 | |.if X64 | ||
2301 | | mov TMP3, RC | ||
2302 | |.else | ||
2279 | | mov ARG3, RC | 2303 | | mov ARG3, RC |
2304 | |.endif | ||
2280 | | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | 2305 | | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] |
2281 | | mov ARG2, PC | ||
2282 | | jmp >3 | 2306 | | jmp >3 |
2283 | |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?). | 2307 | |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?). |
2284 | | movzx RA, byte [RB+RC] | 2308 | | movzx RA, byte [RB+RC] |
@@ -2292,6 +2316,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | |||
2292 | |3: | 2316 | |3: |
2293 | | sub RC, 1 | 2317 | | sub RC, 1 |
2294 | | jns <1 | 2318 | | jns <1 |
2319 | | mov RD, PC | ||
2295 | | mov PC, TMP2 | 2320 | | mov PC, TMP2 |
2296 | | jmp ->fff_newstr | 2321 | | jmp ->fff_newstr |
2297 | |.endmacro | 2322 | |.endmacro |