aboutsummaryrefslogtreecommitdiff
path: root/src/vm_arm.dasc
diff options
context:
space:
mode:
authorMike Pall <mike>2012-10-15 16:53:03 +0200
committerMike Pall <mike>2012-10-15 16:53:03 +0200
commit2621617a9292ea821a0630339e20e83e11858a5e (patch)
treeb9fc47ee02e0afeb124466455ee07d27da32fc58 /src/vm_arm.dasc
parent894d2d6ef4bf50a7c355e49e4508de5d07edad2d (diff)
downloadluajit-2621617a9292ea821a0630339e20e83e11858a5e.tar.gz
luajit-2621617a9292ea821a0630339e20e83e11858a5e.tar.bz2
luajit-2621617a9292ea821a0630339e20e83e11858a5e.zip
ARM: Drop hard-fp variants of floor/ceil/trunc.
Soft-fp variants are faster on a Cortex-A9. Duh.
Diffstat (limited to '')
-rw-r--r--src/vm_arm.dasc115
1 files changed, 38 insertions, 77 deletions
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index f00b3028..fb9363e4 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -1368,14 +1368,8 @@ static void build_subroutines(BuildCtx *ctx)
1368 | movmi CARG1, #0x80000000 1368 | movmi CARG1, #0x80000000
1369 | bmi <1 1369 | bmi <1
1370 |4: 1370 |4:
1371 |.if HFABI 1371 | bl ->vm_..func.._sf
1372 | vmov d0, CARG1, CARG2
1373 | bl ->vm_..func.._hf
1374 | b ->fff_resd
1375 |.else
1376 | bl ->vm_..func
1377 | b ->fff_restv 1372 | b ->fff_restv
1378 |.endif
1379 |.endmacro 1373 |.endmacro
1380 | 1374 |
1381 | math_round floor 1375 | math_round floor
@@ -2221,52 +2215,9 @@ static void build_subroutines(BuildCtx *ctx)
2221 |// 2215 |//
2222 |// double lj_vm_floor/ceil/trunc(double x); 2216 |// double lj_vm_floor/ceil/trunc(double x);
2223 |.macro vm_round, func, hf 2217 |.macro vm_round, func, hf
2224 |.if FPU 2218 |.if hf == 1
2225 |.if hf == 0
2226 | vmov d0, CARG1, CARG2
2227 | vldr d2, <8 // 2^52
2228 |.else
2229 | vldr d2, <8 // 2^52
2230 | vmov CARG1, CARG2, d0 2219 | vmov CARG1, CARG2, d0
2231 |.endif 2220 |.endif
2232 | vabs.f64 d1, d0
2233 | vcmp.f64 d1, d2 // |x| >= 2^52 or NaN?
2234 | vmrs
2235 |.if "func" == "trunc"
2236 | bxpl lr // Return argument unchanged.
2237 | vadd.f64 d0, d1, d2
2238 | vsub.f64 d0, d0, d2 // (|x| + 2^52) - 2^52
2239 | vldr d2, <9 // +1.0
2240 | vcmp.f64 d1, d0 // |x| < result: subtract +1.0
2241 | vmrs
2242 | vsubmi.f64 d0, d0, d2
2243 | cmp CARG2, #0
2244 | vnegmi.f64 d0, d0 // Merge sign bit back in.
2245 |.else
2246 | vadd.f64 d1, d1, d2
2247 | bxpl lr // Return argument unchanged.
2248 | cmp CARG2, #0
2249 | vsub.f64 d1, d1, d2 // (|x| + 2^52) - 2^52
2250 | vldr d2, <9 // +1.0
2251 | vnegmi.f64 d1, d1 // Merge sign bit back in.
2252 |.if "func" == "floor"
2253 | vcmp.f64 d0, d1 // x < result: subtract +1.0.
2254 | vmrs
2255 | vsubmi.f64 d0, d1, d2
2256 |.else
2257 | vcmp.f64 d1, d0 // x > result: add +1.0.
2258 | vmrs
2259 | vaddmi.f64 d0, d1, d2
2260 |.endif
2261 | vmovpl.f64 d0, d1
2262 |.endif
2263 |.if hf == 0
2264 | vmov CARG1, CARG2, d0
2265 |.endif
2266 | bx lr
2267 |
2268 |.else
2269 |
2270 | lsl CARG3, CARG2, #1 2221 | lsl CARG3, CARG2, #1
2271 | adds RB, CARG3, #0x00200000 2222 | adds RB, CARG3, #0x00200000
2272 | bpl >2 // |x| < 1? 2223 | bpl >2 // |x| < 1?
@@ -2286,6 +2237,9 @@ static void build_subroutines(BuildCtx *ctx)
2286 |.else 2237 |.else
2287 | bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0) 2238 | bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0)
2288 |.endif 2239 |.endif
2240 |.if hf == 1
2241 | vmoveq d0, CARG1, CARG2
2242 |.endif
2289 | bxeq lr // iszero: done. 2243 | bxeq lr // iszero: done.
2290 | mvn CARG4, #1 2244 | mvn CARG4, #1
2291 | cmp RB, #0 2245 | cmp RB, #0
@@ -2294,6 +2248,9 @@ static void build_subroutines(BuildCtx *ctx)
2294 | add RB, RB, #32 2248 | add RB, RB, #32
2295 | subs CARG1, CARG1, CARG4, lsl RB // lo = lo-lomask 2249 | subs CARG1, CARG1, CARG4, lsl RB // lo = lo-lomask
2296 | sbc CARG2, CARG2, CARG3 // hi = hi-himask+carry 2250 | sbc CARG2, CARG2, CARG3 // hi = hi-himask+carry
2251 |.if hf == 1
2252 | vmov d0, CARG1, CARG2
2253 |.endif
2297 | bx lr 2254 | bx lr
2298 | 2255 |
2299 |2: // |x| < 1: 2256 |2: // |x| < 1:
@@ -2308,45 +2265,41 @@ static void build_subroutines(BuildCtx *ctx)
2308 | and CARG2, CARG2, #0x80000000 2265 | and CARG2, CARG2, #0x80000000
2309 | ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0) 2266 | ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0)
2310 | orrne CARG2, CARG2, CARG4 2267 | orrne CARG2, CARG2, CARG4
2311 | bx lr 2268 |.if hf == 1
2269 | vmov d0, CARG1, CARG2
2312 |.endif 2270 |.endif
2271 | bx lr
2313 |.endmacro 2272 |.endmacro
2314 | 2273 |
2315 |.if FPU
2316 |.align 8
2317 |9:
2318 | .long 0, 0x3ff00000 // +1.0
2319 |8:
2320 | .long 0, 0x43300000 // 2^52
2321 |.else
2322 |9: 2274 |9:
2323 | .long 0x3ff00000 // hiword(+1.0) 2275 | .long 0x3ff00000 // hiword(+1.0)
2324 |.endif
2325 | 2276 |
2326 |->vm_floor: 2277 |->vm_floor:
2327 |.if not HFABI 2278 |.if HFABI
2328 | vm_round floor, 0
2329 |.endif
2330 |->vm_floor_hf:
2331 |.if FPU
2332 | vm_round floor, 1 2279 | vm_round floor, 1
2333 |.endif 2280 |.endif
2281 |->vm_floor_sf:
2282 | vm_round floor, 0
2334 | 2283 |
2335 |->vm_ceil: 2284 |->vm_ceil:
2336 |.if not HFABI 2285 |.if HFABI
2337 | vm_round ceil, 0
2338 |.endif
2339 |->vm_ceil_hf:
2340 |.if FPU
2341 | vm_round ceil, 1 2286 | vm_round ceil, 1
2342 |.endif 2287 |.endif
2288 |->vm_ceil_sf:
2289 | vm_round ceil, 0
2343 | 2290 |
2344 |->vm_trunc: 2291 |.macro vm_trunc, hf
2345 |.if JIT and not HFABI 2292 |.if JIT
2293 |.if hf == 1
2294 | vmov CARG1, CARG2, d0
2295 |.endif
2346 | lsl CARG3, CARG2, #1 2296 | lsl CARG3, CARG2, #1
2347 | adds RB, CARG3, #0x00200000 2297 | adds RB, CARG3, #0x00200000
2348 | andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0. 2298 | andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0.
2349 | movpl CARG1, #0 2299 | movpl CARG1, #0
2300 |.if hf == 1
2301 | vmovpl d0, CARG1, CARG2
2302 |.endif
2350 | bxpl lr 2303 | bxpl lr
2351 | mvn CARG4, #0x3cc 2304 | mvn CARG4, #0x3cc
2352 | subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0. 2305 | subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0.
@@ -2355,13 +2308,19 @@ static void build_subroutines(BuildCtx *ctx)
2355 | and CARG1, CARG1, CARG4, lsl RB // lo &= lomask 2308 | and CARG1, CARG1, CARG4, lsl RB // lo &= lomask
2356 | subs RB, RB, #32 2309 | subs RB, RB, #32
2357 | andpl CARG2, CARG2, CARG4, lsl RB // |x| <= 2^20: hi &= himask 2310 | andpl CARG2, CARG2, CARG4, lsl RB // |x| <= 2^20: hi &= himask
2311 |.if hf == 1
2312 | vmov d0, CARG1, CARG2
2313 |.endif
2358 | bx lr 2314 | bx lr
2359 |.endif 2315 |.endif
2316 |.endmacro
2360 | 2317 |
2361 |->vm_trunc_hf: 2318 |->vm_trunc:
2362 |.if JIT and FPU 2319 |.if HFABI
2363 | vm_round trunc, 1 2320 | vm_trunc 1
2364 |.endif 2321 |.endif
2322 |->vm_trunc_sf:
2323 | vm_trunc 0
2365 | 2324 |
2366 | // double lj_vm_mod(double dividend, double divisor); 2325 | // double lj_vm_mod(double dividend, double divisor);
2367 |->vm_mod: 2326 |->vm_mod:
@@ -2369,7 +2328,9 @@ static void build_subroutines(BuildCtx *ctx)
2369 | // Special calling convention. Also, RC (r11) is not preserved. 2328 | // Special calling convention. Also, RC (r11) is not preserved.
2370 | vdiv.f64 d0, d6, d7 2329 | vdiv.f64 d0, d6, d7
2371 | mov RC, lr 2330 | mov RC, lr
2372 | bl ->vm_floor_hf 2331 | vmov CARG1, CARG2, d0
2332 | bl ->vm_floor_sf
2333 | vmov d0, CARG1, CARG2
2373 | vmul.f64 d0, d0, d7 2334 | vmul.f64 d0, d0, d7
2374 | mov lr, RC 2335 | mov lr, RC
2375 | vsub.f64 d6, d6, d0 2336 | vsub.f64 d6, d6, d0
@@ -2377,7 +2338,7 @@ static void build_subroutines(BuildCtx *ctx)
2377 |.else 2338 |.else
2378 | push {r0, r1, r2, r3, r4, lr} 2339 | push {r0, r1, r2, r3, r4, lr}
2379 | bl extern __aeabi_ddiv 2340 | bl extern __aeabi_ddiv
2380 | bl ->vm_floor 2341 | bl ->vm_floor_sf
2381 | ldrd CARG34, [sp, #8] 2342 | ldrd CARG34, [sp, #8]
2382 | bl extern __aeabi_dmul 2343 | bl extern __aeabi_dmul
2383 | ldrd CARG34, [sp] 2344 | ldrd CARG34, [sp]