summaryrefslogtreecommitdiff
path: root/src/buildvm_x86.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'src/buildvm_x86.dasc')
-rw-r--r--src/buildvm_x86.dasc51
1 files changed, 38 insertions, 13 deletions
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc
index 7167ffb1..fa9bf9fb 100644
--- a/src/buildvm_x86.dasc
+++ b/src/buildvm_x86.dasc
@@ -2128,15 +2128,25 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2128 | fistp TMP2 2128 | fistp TMP2
2129 | cmp TMP2, 255; ja ->fff_fallback 2129 | cmp TMP2, 255; ja ->fff_fallback
2130 } 2130 }
2131 | lea RC, TMP2 // Little-endian. 2131 |.if X64
2132 | mov TMP1, RA // Save RA. 2132 | mov TMP3, 1
2133 |.else
2133 | mov ARG3, 1 2134 | mov ARG3, 1
2134 | mov ARG2, RC 2135 |.endif
2136 | lea RDa, TMP2 // Points to stack. Little-endian.
2137 | mov TMP1, RA // Save RA.
2135 |->fff_newstr: 2138 |->fff_newstr:
2136 | mov L:RB, SAVE_L 2139 | mov L:RB, SAVE_L
2140 | mov L:RB->base, BASE
2141 |.if X64
2142 | mov CARG3d, TMP3 // Zero-extended to size_t.
2143 | mov CARG2, RDa // May be 64 bit ptr to stack.
2144 | mov CARG1d, L:RB
2145 |.else
2146 | mov ARG2, RD
2137 | mov ARG1, L:RB 2147 | mov ARG1, L:RB
2148 |.endif
2138 | mov SAVE_PC, PC 2149 | mov SAVE_PC, PC
2139 | mov L:RB->base, BASE
2140 | call extern lj_str_new // (lua_State *L, char *str, size_t l) 2150 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
2141 | // GCstr * returned in eax (RC). 2151 | // GCstr * returned in eax (RC).
2142 | mov RA, TMP1 2152 | mov RA, TMP1
@@ -2163,33 +2173,36 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2163 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback 2173 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
2164 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback 2174 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
2165 | mov STR:RB, [RA] 2175 | mov STR:RB, [RA]
2166 | mov ARG2, STR:RB 2176 | mov TMP3, STR:RB
2167 | mov RB, STR:RB->len 2177 | mov RB, STR:RB->len
2168 if (sse) { 2178 if (sse) {
2169 | cvtsd2si RC, qword [RA+8] 2179 | cvtsd2si RA, qword [RA+8]
2170 | mov ARG3, RC
2171 } else { 2180 } else {
2172 |.if not X64 2181 |.if not X64
2173 | fld qword [RA+8] 2182 | fld qword [RA+8]
2174 | fistp ARG3 2183 | fistp ARG3
2184 | mov RA, ARG3
2175 |.endif 2185 |.endif
2176 } 2186 }
2177 | mov RC, TMP2 2187 | mov RC, TMP2
2178 | cmp RB, RC // len < end? (unsigned compare) 2188 | cmp RB, RC // len < end? (unsigned compare)
2179 | jb >5 2189 | jb >5
2180 |2: 2190 |2:
2181 | mov RA, ARG3
2182 | test RA, RA // start <= 0? 2191 | test RA, RA // start <= 0?
2183 | jle >7 2192 | jle >7
2184 |3: 2193 |3:
2185 | mov STR:RB, ARG2 2194 | mov STR:RB, TMP3
2186 | sub RC, RA // start > end? 2195 | sub RC, RA // start > end?
2187 | jl ->fff_emptystr 2196 | jl ->fff_emptystr
2188 | lea RB, [STR:RB+RA+#STR-1] 2197 | lea RB, [STR:RB+RA+#STR-1]
2189 | add RC, 1 2198 | add RC, 1
2190 |4: 2199 |4:
2191 | mov ARG2, RB 2200 |.if X64
2201 | mov TMP3, RC
2202 |.else
2192 | mov ARG3, RC 2203 | mov ARG3, RC
2204 |.endif
2205 | mov RD, RB
2193 | jmp ->fff_newstr 2206 | jmp ->fff_newstr
2194 | 2207 |
2195 |5: // Negative end or overflow. 2208 |5: // Negative end or overflow.
@@ -2234,13 +2247,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2234 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2 2247 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
2235 | movzx RA, byte STR:RB[1] 2248 | movzx RA, byte STR:RB[1]
2236 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] 2249 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2250 |.if X64
2251 | mov TMP3, RC
2252 |.else
2237 | mov ARG3, RC 2253 | mov ARG3, RC
2238 | mov ARG2, RB 2254 |.endif
2239 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). 2255 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2240 | mov [RB], RAL 2256 | mov [RB], RAL
2241 | add RB, 1 2257 | add RB, 1
2242 | sub RC, 1 2258 | sub RC, 1
2243 | jnz <1 2259 | jnz <1
2260 | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2244 | jmp ->fff_newstr 2261 | jmp ->fff_newstr
2245 | 2262 |
2246 |.ffunc_1 string_reverse 2263 |.ffunc_1 string_reverse
@@ -2254,15 +2271,19 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2254 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 2271 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2255 | add RB, #STR 2272 | add RB, #STR
2256 | mov TMP2, PC // Need another temp register. 2273 | mov TMP2, PC // Need another temp register.
2274 |.if X64
2275 | mov TMP3, RC
2276 |.else
2257 | mov ARG3, RC 2277 | mov ARG3, RC
2278 |.endif
2258 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] 2279 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2259 | mov ARG2, PC
2260 |1: 2280 |1:
2261 | movzx RA, byte [RB] 2281 | movzx RA, byte [RB]
2262 | add RB, 1 2282 | add RB, 1
2263 | sub RC, 1 2283 | sub RC, 1
2264 | mov [PC+RC], RAL 2284 | mov [PC+RC], RAL
2265 | jnz <1 2285 | jnz <1
2286 | mov RD, PC
2266 | mov PC, TMP2 2287 | mov PC, TMP2
2267 | jmp ->fff_newstr 2288 | jmp ->fff_newstr
2268 | 2289 |
@@ -2276,9 +2297,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2276 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 2297 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2277 | add RB, #STR 2298 | add RB, #STR
2278 | mov TMP2, PC // Need another temp register. 2299 | mov TMP2, PC // Need another temp register.
2300 |.if X64
2301 | mov TMP3, RC
2302 |.else
2279 | mov ARG3, RC 2303 | mov ARG3, RC
2304 |.endif
2280 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] 2305 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2281 | mov ARG2, PC
2282 | jmp >3 2306 | jmp >3
2283 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?). 2307 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
2284 | movzx RA, byte [RB+RC] 2308 | movzx RA, byte [RB+RC]
@@ -2292,6 +2316,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2292 |3: 2316 |3:
2293 | sub RC, 1 2317 | sub RC, 1
2294 | jns <1 2318 | jns <1
2319 | mov RD, PC
2295 | mov PC, TMP2 2320 | mov PC, TMP2
2296 | jmp ->fff_newstr 2321 | jmp ->fff_newstr
2297 |.endmacro 2322 |.endmacro