aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2012-07-30 18:59:13 +0200
committerMike Pall <mike>2012-07-30 18:59:13 +0200
commita373fddbd3b129f3f95474533e74f0a52744ff8c (patch)
tree9dc1e4ee3eae94a289278b246ff659d8b63cae6d /src
parent23abbd9ef344289d1dae6d8fcf9d3c0ab8e1e6e1 (diff)
downloadluajit-a373fddbd3b129f3f95474533e74f0a52744ff8c.tar.gz
luajit-a373fddbd3b129f3f95474533e74f0a52744ff8c.tar.bz2
luajit-a373fddbd3b129f3f95474533e74f0a52744ff8c.zip
ARM: Add VFP and hard-float ABI variants to interpreter.
Diffstat (limited to 'src')
-rw-r--r--src/lj_frame.h4
-rw-r--r--src/lj_target_arm.h11
-rw-r--r--src/vm_arm.dasc447
3 files changed, 434 insertions, 28 deletions
diff --git a/src/lj_frame.h b/src/lj_frame.h
index b8429c2a..b8af2349 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -97,7 +97,11 @@ enum {
97#define CFRAME_OFS_L 12 97#define CFRAME_OFS_L 12
98#define CFRAME_OFS_PC 8 98#define CFRAME_OFS_PC 8
99#define CFRAME_OFS_MULTRES 4 99#define CFRAME_OFS_MULTRES 4
100#if LJ_ARCH_HASFPU
101#define CFRAME_SIZE 128
102#else
100#define CFRAME_SIZE 64 103#define CFRAME_SIZE 64
104#endif
101#define CFRAME_SHIFT_MULTRES 3 105#define CFRAME_SHIFT_MULTRES 3
102#elif LJ_TARGET_PPC 106#elif LJ_TARGET_PPC
103#if LJ_ARCH_PPC64 107#if LJ_ARCH_PPC64
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h
index a24fc819..20e8ad36 100644
--- a/src/lj_target_arm.h
+++ b/src/lj_target_arm.h
@@ -14,7 +14,9 @@
14#if LJ_SOFTFP 14#if LJ_SOFTFP
15#define FPRDEF(_) 15#define FPRDEF(_)
16#else 16#else
17#error "NYI: hard-float support for ARM" 17#define FPRDEF(_) \
18 _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \
19 _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15)
18#endif 20#endif
19#define VRIDDEF(_) 21#define VRIDDEF(_)
20 22
@@ -45,7 +47,7 @@ enum {
45#if LJ_SOFTFP 47#if LJ_SOFTFP
46 RID_MAX_FPR = RID_MIN_FPR, 48 RID_MAX_FPR = RID_MIN_FPR,
47#else 49#else
48#error "NYI: VFP support for ARM" 50 RID_MAX_FPR = RID_D15+1,
49#endif 51#endif
50 RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, 52 RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
51 RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR 53 RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
@@ -68,7 +70,8 @@ enum {
68#define RSET_FPR 0 70#define RSET_FPR 0
69#define RSET_ALL RSET_GPR 71#define RSET_ALL RSET_GPR
70#else 72#else
71#error "NYI: VFP support for ARM" 73#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
74#define RSET_ALL (RSET_GPR|RSET_FPR)
72#endif 75#endif
73#define RSET_INIT RSET_ALL 76#define RSET_INIT RSET_ALL
74 77
@@ -82,7 +85,7 @@ enum {
82#if LJ_SOFTFP 85#if LJ_SOFTFP
83#define RSET_SCRATCH_FPR 0 86#define RSET_SCRATCH_FPR 0
84#else 87#else
85#error "NYI: VFP support for ARM" 88#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1))
86#endif 89#endif
87#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) 90#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
88#define REGARG_FIRSTGPR RID_R0 91#define REGARG_FIRSTGPR RID_R0
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 8ddce49e..26f97aa3 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -46,6 +46,7 @@
46|.define CRET2, r1 46|.define CRET2, r1
47| 47|
48|// Stack layout while in interpreter. Must match with lj_frame.h. 48|// Stack layout while in interpreter. Must match with lj_frame.h.
49|.define SAVE_R4, [sp, #28]
49|.define CFRAME_SPACE, #28 50|.define CFRAME_SPACE, #28
50|.define SAVE_ERRF, [sp, #24] 51|.define SAVE_ERRF, [sp, #24]
51|.define SAVE_NRES, [sp, #20] 52|.define SAVE_NRES, [sp, #20]
@@ -60,6 +61,20 @@
60|.define TMPD, [sp] 61|.define TMPD, [sp]
61|.define TMPDp, sp 62|.define TMPDp, sp
62| 63|
64|.if FPU
65|.macro saveregs
66| push {r5, r6, r7, r8, r9, r10, r11, lr}
67| vpush {d8-d15}
68| sub sp, sp, CFRAME_SPACE+4
69| str r4, SAVE_R4
70|.endmacro
71|.macro restoreregs_ret
72| ldr r4, SAVE_R4
73| add sp, sp, CFRAME_SPACE+4
74| vpop {d8-d15}
75| pop {r5, r6, r7, r8, r9, r10, r11, pc}
76|.endmacro
77|.else
63|.macro saveregs 78|.macro saveregs
64| push {r4, r5, r6, r7, r8, r9, r10, r11, lr} 79| push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
65| sub sp, sp, CFRAME_SPACE 80| sub sp, sp, CFRAME_SPACE
@@ -68,6 +83,7 @@
68| add sp, sp, CFRAME_SPACE 83| add sp, sp, CFRAME_SPACE
69| pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} 84| pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
70|.endmacro 85|.endmacro
86|.endif
71| 87|
72|// Type definitions. Some of these are only used for documentation. 88|// Type definitions. Some of these are only used for documentation.
73|.type L, lua_State, LREG 89|.type L, lua_State, LREG
@@ -875,6 +891,29 @@ static void build_subroutines(BuildCtx *ctx)
875 | bhs ->fff_fallback 891 | bhs ->fff_fallback
876 |.endmacro 892 |.endmacro
877 | 893 |
894 |.macro .ffunc_d, name
895 | .ffunc name
896 | ldr CARG2, [BASE, #4]
897 | cmp NARGS8:RC, #8
898 | vldr d0, [BASE]
899 | blo ->fff_fallback
900 | checktp CARG2, LJ_TISNUM
901 | bhs ->fff_fallback
902 |.endmacro
903 |
904 |.macro .ffunc_dd, name
905 | .ffunc name
906 | ldr CARG2, [BASE, #4]
907 | ldr CARG4, [BASE, #12]
908 | cmp NARGS8:RC, #16
909 | vldr d0, [BASE]
910 | vldr d1, [BASE, #8]
911 | blo ->fff_fallback
912 | checktp CARG2, LJ_TISNUM
913 | cmnlo CARG4, #-LJ_TISNUM
914 | bhs ->fff_fallback
915 |.endmacro
916 |
878 |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. 917 |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2.
879 |.macro ffgccheck 918 |.macro ffgccheck
880 | ldr CARG1, [DISPATCH, #DISPATCH_GL(gc.total)] 919 | ldr CARG1, [DISPATCH, #DISPATCH_GL(gc.total)]
@@ -1327,8 +1366,14 @@ static void build_subroutines(BuildCtx *ctx)
1327 | movmi CARG1, #0x80000000 1366 | movmi CARG1, #0x80000000
1328 | bmi <1 1367 | bmi <1
1329 |4: 1368 |4:
1369 |.if HFABI
1370 | vmov d0, CARG1, CARG2
1371 | bl ->vm_..func.._hf
1372 | b ->fff_resd
1373 |.else
1330 | bl ->vm_..func 1374 | bl ->vm_..func
1331 | b ->fff_restv 1375 | b ->fff_restv
1376 |.endif
1332 |.endmacro 1377 |.endmacro
1333 | 1378 |
1334 | math_round floor 1379 | math_round floor
@@ -1381,22 +1426,48 @@ static void build_subroutines(BuildCtx *ctx)
1381 | b <5 1426 | b <5
1382 | 1427 |
1383 |.macro math_extern, func 1428 |.macro math_extern, func
1429 |.if HFABI
1430 | .ffunc_d math_ .. func
1431 |.else
1384 | .ffunc_n math_ .. func 1432 | .ffunc_n math_ .. func
1433 |.endif
1385 | .IOS mov RA, BASE 1434 | .IOS mov RA, BASE
1386 | bl extern func 1435 | bl extern func
1387 | .IOS mov BASE, RA 1436 | .IOS mov BASE, RA
1437 |.if HFABI
1438 | b ->fff_resd
1439 |.else
1388 | b ->fff_restv 1440 | b ->fff_restv
1441 |.endif
1389 |.endmacro 1442 |.endmacro
1390 | 1443 |
1391 |.macro math_extern2, func 1444 |.macro math_extern2, func
1445 |.if HFABI
1446 | .ffunc_dd math_ .. func
1447 |.else
1392 | .ffunc_nn math_ .. func 1448 | .ffunc_nn math_ .. func
1449 |.endif
1393 | .IOS mov RA, BASE 1450 | .IOS mov RA, BASE
1394 | bl extern func 1451 | bl extern func
1395 | .IOS mov BASE, RA 1452 | .IOS mov BASE, RA
1453 |.if HFABI
1454 | b ->fff_resd
1455 |.else
1396 | b ->fff_restv 1456 | b ->fff_restv
1457 |.endif
1397 |.endmacro 1458 |.endmacro
1398 | 1459 |
1460 |.if FPU
1461 | .ffunc_d math_sqrt
1462 | vsqrt.f64 d0, d0
1463 |->fff_resd:
1464 | ldr PC, [BASE, FRAME_PC]
1465 | vstr d0, [BASE, #-8]
1466 | b ->fff_res1
1467 |.else
1399 | math_extern sqrt 1468 | math_extern sqrt
1469 |.endif
1470 |
1400 | math_extern log 1471 | math_extern log
1401 | math_extern log10 1472 | math_extern log10
1402 | math_extern exp 1473 | math_extern exp
@@ -1414,11 +1485,34 @@ static void build_subroutines(BuildCtx *ctx)
1414 | math_extern2 fmod 1485 | math_extern2 fmod
1415 | 1486 |
1416 |->ff_math_deg: 1487 |->ff_math_deg:
1417 |.ffunc_n math_rad 1488 |.if FPU
1489 | .ffunc_d math_rad
1490 | vldr d1, CFUNC:CARG3->upvalue[0]
1491 | vmul.f64 d0, d0, d1
1492 | b ->fff_resd
1493 |.else
1494 | .ffunc_n math_rad
1418 | ldrd CARG34, CFUNC:CARG3->upvalue[0] 1495 | ldrd CARG34, CFUNC:CARG3->upvalue[0]
1419 | bl extern __aeabi_dmul 1496 | bl extern __aeabi_dmul
1420 | b ->fff_restv 1497 | b ->fff_restv
1498 |.endif
1421 | 1499 |
1500 |.if HFABI
1501 | .ffunc math_ldexp
1502 | ldr CARG4, [BASE, #4]
1503 | ldrd CARG12, [BASE, #8]
1504 | cmp NARGS8:RC, #16
1505 | blo ->fff_fallback
1506 | vldr d0, [BASE]
1507 | checktp CARG4, LJ_TISNUM
1508 | bhs ->fff_fallback
1509 | checktp CARG2, LJ_TISNUM
1510 | bne ->fff_fallback
1511 | .IOS mov RA, BASE
1512 | bl extern ldexp // (double x, int exp)
1513 | .IOS mov BASE, RA
1514 | b ->fff_resd
1515 |.else
1422 |.ffunc_2 math_ldexp 1516 |.ffunc_2 math_ldexp
1423 | checktp CARG2, LJ_TISNUM 1517 | checktp CARG2, LJ_TISNUM
1424 | bhs ->fff_fallback 1518 | bhs ->fff_fallback
@@ -1428,7 +1522,22 @@ static void build_subroutines(BuildCtx *ctx)
1428 | bl extern ldexp // (double x, int exp) 1522 | bl extern ldexp // (double x, int exp)
1429 | .IOS mov BASE, RA 1523 | .IOS mov BASE, RA
1430 | b ->fff_restv 1524 | b ->fff_restv
1525 |.endif
1431 | 1526 |
1527 |.if HFABI
1528 |.ffunc_d math_frexp
1529 | mov CARG1, sp
1530 | .IOS mov RA, BASE
1531 | bl extern frexp
1532 | .IOS mov BASE, RA
1533 | ldr CARG3, [sp]
1534 | mvn CARG4, #~LJ_TISNUM
1535 | ldr PC, [BASE, FRAME_PC]
1536 | vstr d0, [BASE, #-8]
1537 | mov RC, #(2+1)*8
1538 | strd CARG34, [BASE]
1539 | b ->fff_res
1540 |.else
1432 |.ffunc_n math_frexp 1541 |.ffunc_n math_frexp
1433 | mov CARG3, sp 1542 | mov CARG3, sp
1434 | .IOS mov RA, BASE 1543 | .IOS mov RA, BASE
@@ -1441,7 +1550,19 @@ static void build_subroutines(BuildCtx *ctx)
1441 | mov RC, #(2+1)*8 1550 | mov RC, #(2+1)*8
1442 | strd CARG34, [BASE] 1551 | strd CARG34, [BASE]
1443 | b ->fff_res 1552 | b ->fff_res
1553 |.endif
1444 | 1554 |
1555 |.if HFABI
1556 |.ffunc_d math_modf
1557 | sub CARG1, BASE, #8
1558 | ldr PC, [BASE, FRAME_PC]
1559 | .IOS mov RA, BASE
1560 | bl extern modf
1561 | .IOS mov BASE, RA
1562 | mov RC, #(2+1)*8
1563 | vstr d0, [BASE]
1564 | b ->fff_res
1565 |.else
1445 |.ffunc_n math_modf 1566 |.ffunc_n math_modf
1446 | sub CARG3, BASE, #8 1567 | sub CARG3, BASE, #8
1447 | ldr PC, [BASE, FRAME_PC] 1568 | ldr PC, [BASE, FRAME_PC]
@@ -1451,8 +1572,56 @@ static void build_subroutines(BuildCtx *ctx)
1451 | mov RC, #(2+1)*8 1572 | mov RC, #(2+1)*8
1452 | strd CARG12, [BASE] 1573 | strd CARG12, [BASE]
1453 | b ->fff_res 1574 | b ->fff_res
1575 |.endif
1454 | 1576 |
1455 |.macro math_minmax, name, cond, fcond 1577 |.macro math_minmax, name, cond, fcond
1578 |.if FPU
1579 | .ffunc_1 name
1580 | add RB, BASE, RC
1581 | checktp CARG2, LJ_TISNUM
1582 | add RA, BASE, #8
1583 | bne >4
1584 |1: // Handle integers.
1585 | ldrd CARG34, [RA]
1586 | cmp RA, RB
1587 | bhs ->fff_restv
1588 | checktp CARG4, LJ_TISNUM
1589 | bne >3
1590 | cmp CARG1, CARG3
1591 | add RA, RA, #8
1592 | mov..cond CARG1, CARG3
1593 | b <1
1594 |3: // Convert intermediate result to number and continue below.
1595 | vmov s4, CARG1
1596 | bhi ->fff_fallback
1597 | vldr d1, [RA]
1598 | vcvt.f64.s32 d0, s4
1599 | b >6
1600 |
1601 |4:
1602 | vldr d0, [BASE]
1603 | bhi ->fff_fallback
1604 |5: // Handle numbers.
1605 | ldrd CARG34, [RA]
1606 | vldr d1, [RA]
1607 | cmp RA, RB
1608 | bhs ->fff_resd
1609 | checktp CARG4, LJ_TISNUM
1610 | bhs >7
1611 |6:
1612 | vcmp.f64 d0, d1
1613 | vmrs
1614 | add RA, RA, #8
1615 | vmov..fcond.f64 d0, d1
1616 | b <5
1617 |7: // Convert integer to number and continue above.
1618 | vmov s4, CARG3
1619 | bhi ->fff_fallback
1620 | vcvt.f64.s32 d1, s4
1621 | b <6
1622 |
1623 |.else
1624 |
1456 | .ffunc_1 name 1625 | .ffunc_1 name
1457 | checktp CARG2, LJ_TISNUM 1626 | checktp CARG2, LJ_TISNUM
1458 | mov RA, #8 1627 | mov RA, #8
@@ -1467,9 +1636,8 @@ static void build_subroutines(BuildCtx *ctx)
1467 | add RA, RA, #8 1636 | add RA, RA, #8
1468 | mov..cond CARG1, CARG3 1637 | mov..cond CARG1, CARG3
1469 | b <1 1638 | b <1
1470 |3: 1639 |3: // Convert intermediate result to number and continue below.
1471 | bhi ->fff_fallback 1640 | bhi ->fff_fallback
1472 | // Convert intermediate result to number and continue below.
1473 | bl extern __aeabi_i2d 1641 | bl extern __aeabi_i2d
1474 | ldrd CARG34, [BASE, RA] 1642 | ldrd CARG34, [BASE, RA]
1475 | b >6 1643 | b >6
@@ -1495,6 +1663,7 @@ static void build_subroutines(BuildCtx *ctx)
1495 | bl extern __aeabi_i2d 1663 | bl extern __aeabi_i2d
1496 | ldrd CARG34, TMPD 1664 | ldrd CARG34, TMPD
1497 | b <6 1665 | b <6
1666 |.endif
1498 |.endmacro 1667 |.endmacro
1499 | 1668 |
1500 | math_minmax math_min, gt, hi 1669 | math_minmax math_min, gt, hi
@@ -1959,6 +2128,9 @@ static void build_subroutines(BuildCtx *ctx)
1959 | ldr CARG2, [CARG1, #-4]! // Get exit instruction. 2128 | ldr CARG2, [CARG1, #-4]! // Get exit instruction.
1960 | str CARG1, [sp, #56] // Store exit pc in RID_LR and RID_PC. 2129 | str CARG1, [sp, #56] // Store exit pc in RID_LR and RID_PC.
1961 | str CARG1, [sp, #60] 2130 | str CARG1, [sp, #60]
2131 |.if FPU
2132 | vpush {d0-d15}
2133 |.endif
1962 | lsl CARG2, CARG2, #8 2134 | lsl CARG2, CARG2, #8
1963 | add CARG1, CARG1, CARG2, asr #6 2135 | add CARG1, CARG1, CARG2, asr #6
1964 | ldr CARG2, [lr, #4] // Load exit stub group offset. 2136 | ldr CARG2, [lr, #4] // Load exit stub group offset.
@@ -2025,8 +2197,53 @@ static void build_subroutines(BuildCtx *ctx)
2025 |// FP value rounding. Called from JIT code. 2197 |// FP value rounding. Called from JIT code.
2026 |// 2198 |//
2027 |// double lj_vm_floor/ceil/trunc(double x); 2199 |// double lj_vm_floor/ceil/trunc(double x);
2028 |.macro vm_round, func 2200 |.macro vm_round, func, hf
2029 |->vm_ .. func: 2201 |.if FPU
2202 |.if hf == 0
2203 | vmov d0, CARG1, CARG2
2204 | vldr d2, <8 // 2^52
2205 |.else
2206 | vldr d2, <8 // 2^52
2207 | vmov CARG1, CARG2, d0
2208 |.endif
2209 | vabs.f64 d1, d0
2210 | vcmp.f64 d1, d2 // |x| >= 2^52 or NaN?
2211 | vmrs
2212 |.if "func" == "trunc"
2213 | vadd.f64 d0, d1, d2
2214 | bxpl lr // Return argument unchanged.
2215 | vsub.f64 d0, d0, d2 // (|x| + 2^52) - 2^52
2216 | vldr d2, <9 // +1.0
2217 | vcmp.f64 d1, d0 // |x| < result: subtract +1.0
2218 | vmrs
2219 | vsubmi.f64 d0, d1, d2
2220 | cmp CARG2, #0
2221 | vnegmi.f64 d0, d0 // Merge sign bit back in.
2222 |.else
2223 | vadd.f64 d1, d1, d2
2224 | bxpl lr // Return argument unchanged.
2225 | cmp CARG2, #0
2226 | vsub.f64 d1, d1, d2 // (|x| + 2^52) - 2^52
2227 | vldr d2, <9 // +1.0
2228 | vnegmi.f64 d1, d1 // Merge sign bit back in.
2229 |.if "func" == "floor"
2230 | vcmp.f64 d0, d1 // x < result: subtract +1.0.
2231 | vmrs
2232 | vsubmi.f64 d0, d1, d2
2233 |.else
2234 | vcmp.f64 d1, d0 // x > result: add +1.0.
2235 | vmrs
2236 | vaddmi.f64 d0, d1, d2
2237 |.endif
2238 | vmovpl.f64 d0, d1
2239 |.endif
2240 |.if hf == 0
2241 | vmov CARG1, CARG2, d0
2242 |.endif
2243 | bx lr
2244 |
2245 |.else
2246 |
2030 | lsl CARG3, CARG2, #1 2247 | lsl CARG3, CARG2, #1
2031 | adds RB, CARG3, #0x00200000 2248 | adds RB, CARG3, #0x00200000
2032 | bpl >2 // |x| < 1? 2249 | bpl >2 // |x| < 1?
@@ -2069,15 +2286,40 @@ static void build_subroutines(BuildCtx *ctx)
2069 | ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0) 2286 | ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0)
2070 | orrne CARG2, CARG2, CARG4 2287 | orrne CARG2, CARG2, CARG4
2071 | bx lr 2288 | bx lr
2289 |.endif
2072 |.endmacro 2290 |.endmacro
2073 | 2291 |
2292 |.if FPU
2293 |.align 8
2294 |9:
2295 | .long 0, 0x3ff00000 // +1.0
2296 |8:
2297 | .long 0, 0x43300000 // 2^52
2298 |.else
2074 |9: 2299 |9:
2075 | .long 0x3ff00000 // hiword(1.0) 2300 | .long 0x3ff00000 // hiword(+1.0)
2076 | vm_round floor 2301 |.endif
2077 | vm_round ceil 2302 |
2303 |->vm_floor:
2304 |.if not HFABI
2305 | vm_round floor, 0
2306 |.endif
2307 |->vm_floor_hf:
2308 |.if FPU
2309 | vm_round floor, 1
2310 |.endif
2311 |
2312 |->vm_ceil:
2313 |.if not HFABI
2314 | vm_round ceil, 0
2315 |.endif
2316 |->vm_ceil_hf:
2317 |.if FPU
2318 | vm_round ceil, 1
2319 |.endif
2078 | 2320 |
2079 |->vm_trunc: 2321 |->vm_trunc:
2080 |.if JIT 2322 |.if JIT and not HFABI
2081 | lsl CARG3, CARG2, #1 2323 | lsl CARG3, CARG2, #1
2082 | adds RB, CARG3, #0x00200000 2324 | adds RB, CARG3, #0x00200000
2083 | andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0. 2325 | andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0.
@@ -2093,8 +2335,23 @@ static void build_subroutines(BuildCtx *ctx)
2093 | bx lr 2335 | bx lr
2094 |.endif 2336 |.endif
2095 | 2337 |
2338 |->vm_trunc_hf:
2339 |.if JIT and FPU
2340 | vm_round trunc, 1
2341 |.endif
2342 |
2096 | // double lj_vm_mod(double dividend, double divisor); 2343 | // double lj_vm_mod(double dividend, double divisor);
2097 |->vm_mod: 2344 |->vm_mod:
2345 |.if FPU
2346 | // Special calling convention. Also, RC (r11) is not preserved.
2347 | vdiv.f64 d0, d6, d7
2348 | mov RC, lr
2349 | bl ->vm_floor_hf
2350 | vmul.f64 d0, d0, d7
2351 | mov lr, RC
2352 | vsub.f64 d6, d6, d0
2353 | bx lr
2354 |.else
2098 | push {r0, r1, r2, r3, r4, lr} 2355 | push {r0, r1, r2, r3, r4, lr}
2099 | bl extern __aeabi_ddiv 2356 | bl extern __aeabi_ddiv
2100 | bl ->vm_floor 2357 | bl ->vm_floor
@@ -2105,6 +2362,7 @@ static void build_subroutines(BuildCtx *ctx)
2105 | bl extern __aeabi_dadd 2362 | bl extern __aeabi_dadd
2106 | add sp, sp, #20 2363 | add sp, sp, #20
2107 | pop {pc} 2364 | pop {pc}
2365 |.endif
2108 | 2366 |
2109 | // int lj_vm_modi(int dividend, int divisor); 2367 | // int lj_vm_modi(int dividend, int divisor);
2110 |->vm_modi: 2368 |->vm_modi:
@@ -2266,6 +2524,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2266 | ins_next 2524 | ins_next
2267 | 2525 |
2268 |3: // CARG12 is not an integer. 2526 |3: // CARG12 is not an integer.
2527 |.if FPU
2528 | vldr d0, [RA]
2529 | bhi ->vmeta_comp
2530 | // d0 is a number.
2531 | checktp CARG4, LJ_TISNUM
2532 | vldr d1, [RC]
2533 | blo >5
2534 | // d0 is a number, CARG3 is an integer.
2535 | vmov s4, CARG3
2536 | vcvt.f64.s32 d1, s4
2537 | b >5
2538 |4: // CARG1 is an integer, CARG34 is not an integer.
2539 | vldr d1, [RC]
2540 | bhi ->vmeta_comp
2541 | // CARG1 is an integer, d1 is a number.
2542 | vmov s4, CARG1
2543 | vcvt.f64.s32 d0, s4
2544 |5: // d0 and d1 are numbers.
2545 | vcmp.f64 d0, d1
2546 | vmrs
2547 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
2548 if (op == BC_ISLT) {
2549 | sublo PC, RB, #0x20000
2550 } else if (op == BC_ISGE) {
2551 | subhs PC, RB, #0x20000
2552 } else if (op == BC_ISLE) {
2553 | subls PC, RB, #0x20000
2554 } else {
2555 | subhi PC, RB, #0x20000
2556 }
2557 | b <1
2558 |.else
2269 | bhi ->vmeta_comp 2559 | bhi ->vmeta_comp
2270 | // CARG12 is a number. 2560 | // CARG12 is a number.
2271 | checktp CARG4, LJ_TISNUM 2561 | checktp CARG4, LJ_TISNUM
@@ -2282,7 +2572,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2282 | b >5 2572 | b >5
2283 |4: // CARG1 is an integer, CARG34 is not an integer. 2573 |4: // CARG1 is an integer, CARG34 is not an integer.
2284 | bhi ->vmeta_comp 2574 | bhi ->vmeta_comp
2285 | // CARG1 is an integer, CARG34 is a number 2575 | // CARG1 is an integer, CARG34 is a number.
2286 | mov RA, RB // Save RB. 2576 | mov RA, RB // Save RB.
2287 | bl extern __aeabi_i2d 2577 | bl extern __aeabi_i2d
2288 | ldrd CARG34, [RC] // Restore second operand. 2578 | ldrd CARG34, [RC] // Restore second operand.
@@ -2299,6 +2589,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2299 | subhi PC, RA, #0x20000 2589 | subhi PC, RA, #0x20000
2300 } 2590 }
2301 | b <1 2591 | b <1
2592 |.endif
2302 break; 2593 break;
2303 2594
2304 case BC_ISEQV: case BC_ISNEV: 2595 case BC_ISEQV: case BC_ISNEV:
@@ -2439,6 +2730,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2439 } 2730 }
2440 | bhi <2 2731 | bhi <2
2441 |.endif 2732 |.endif
2733 |.if FPU
2734 | checktp CARG4, LJ_TISNUM
2735 | vmov s4, CARG3
2736 | vldr d0, [RA]
2737 | vldrlo d1, [RC]
2738 | vcvths.f64.s32 d1, s4
2739 | b >5
2740 |4: // CARG1 is an integer, d1 is a number.
2741 | vmov s4, CARG1
2742 | vldr d1, [RC]
2743 | vcvt.f64.s32 d0, s4
2744 |5: // d0 and d1 are numbers.
2745 | vcmp.f64 d0, d1
2746 | vmrs
2747 if (vk) {
2748 | subeq PC, RB, #0x20000
2749 } else {
2750 | subne PC, RB, #0x20000
2751 }
2752 | b <2
2753 |.else
2442 | // CARG12 is a number. 2754 | // CARG12 is a number.
2443 | checktp CARG4, LJ_TISNUM 2755 | checktp CARG4, LJ_TISNUM
2444 | movlo RA, RB // Save RB. 2756 | movlo RA, RB // Save RB.
@@ -2458,6 +2770,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2458 | subne PC, RA, #0x20000 2770 | subne PC, RA, #0x20000
2459 } 2771 }
2460 | b <2 2772 | b <2
2773 |.endif
2461 | 2774 |
2462 |.if FFI 2775 |.if FFI
2463 |7: 2776 |7:
@@ -2617,20 +2930,55 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2617 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 2930 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2618 ||switch (vk) { 2931 ||switch (vk) {
2619 ||case 0: 2932 ||case 0:
2933 | .if FPU
2934 | ldrd CARG12, [RB, BASE]!
2935 | ldrd CARG34, [RC, KBASE]!
2936 | .else
2620 | ldrd CARG12, [BASE, RB] 2937 | ldrd CARG12, [BASE, RB]
2621 | ldrd CARG34, [KBASE, RC] 2938 | ldrd CARG34, [KBASE, RC]
2939 | .endif
2622 || break; 2940 || break;
2623 ||case 1: 2941 ||case 1:
2942 | .if FPU
2943 | ldrd CARG34, [RB, BASE]!
2944 | ldrd CARG12, [RC, KBASE]!
2945 | .else
2624 | ldrd CARG34, [BASE, RB] 2946 | ldrd CARG34, [BASE, RB]
2625 | ldrd CARG12, [KBASE, RC] 2947 | ldrd CARG12, [KBASE, RC]
2948 | .endif
2626 || break; 2949 || break;
2627 ||default: 2950 ||default:
2951 | .if FPU
2952 | ldrd CARG12, [RB, BASE]!
2953 | ldrd CARG34, [RC, BASE]!
2954 | .else
2628 | ldrd CARG12, [BASE, RB] 2955 | ldrd CARG12, [BASE, RB]
2629 | ldrd CARG34, [BASE, RC] 2956 | ldrd CARG34, [BASE, RC]
2957 | .endif
2630 || break; 2958 || break;
2631 ||} 2959 ||}
2632 |.endmacro 2960 |.endmacro
2633 | 2961 |
2962 |.macro ins_arithpre_fpu, reg1, reg2
2963 |.if FPU
2964 ||if (vk == 1) {
2965 | vldr reg2, [RB]
2966 | vldr reg1, [RC]
2967 ||} else {
2968 | vldr reg1, [RB]
2969 | vldr reg2, [RC]
2970 ||}
2971 |.endif
2972 |.endmacro
2973 |
2974 |.macro ins_arithpost_fpu, reg
2975 | ins_next1
2976 | add RA, BASE, RA
2977 | ins_next2
2978 | vstr reg, [RA]
2979 | ins_next3
2980 |.endmacro
2981 |
2634 |.macro ins_arithfallback, ins 2982 |.macro ins_arithfallback, ins
2635 ||switch (vk) { 2983 ||switch (vk) {
2636 ||case 0: 2984 ||case 0:
@@ -2645,9 +2993,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2645 ||} 2993 ||}
2646 |.endmacro 2994 |.endmacro
2647 | 2995 |
2648 |.macro ins_arithdn, intins, fpcall 2996 |.macro ins_arithdn, intins, fpins, fpcall
2649 | ins_arithpre 2997 | ins_arithpre
2650 |.if "intins" ~= "vm_modi" 2998 |.if "intins" ~= "vm_modi" and not FPU
2651 | ins_next1 2999 | ins_next1
2652 |.endif 3000 |.endif
2653 | ins_arithcheck_int >5 3001 | ins_arithcheck_int >5
@@ -2665,57 +3013,74 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2665 | ins_arithfallback bvs 3013 | ins_arithfallback bvs
2666 |.endif 3014 |.endif
2667 |4: 3015 |4:
2668 |.if "intins" == "vm_modi" 3016 |.if "intins" == "vm_modi" or FPU
2669 | ins_next1 3017 | ins_next1
2670 |.endif 3018 |.endif
2671 | ins_next2 3019 | ins_next2
2672 | strd CARG12, [BASE, RA] 3020 | strd CARG12, [BASE, RA]
2673 | ins_next3 3021 | ins_next3
2674 |5: // FP variant. 3022 |5: // FP variant.
3023 | ins_arithpre_fpu d6, d7
2675 | ins_arithfallback ins_arithcheck_num 3024 | ins_arithfallback ins_arithcheck_num
3025 |.if FPU
2676 |.if "intins" == "vm_modi" 3026 |.if "intins" == "vm_modi"
2677 | bl fpcall 3027 | bl fpcall
2678 |.else 3028 |.else
3029 | fpins d6, d6, d7
3030 |.endif
3031 | ins_arithpost_fpu d6
3032 |.else
2679 | bl fpcall 3033 | bl fpcall
2680 | ins_next1 3034 |.if "intins" ~= "vm_modi"
3035 | ins_next1
2681 |.endif 3036 |.endif
2682 | b <4 3037 | b <4
3038 |.endif
2683 |.endmacro 3039 |.endmacro
2684 | 3040 |
2685 |.macro ins_arithfp, fpcall 3041 |.macro ins_arithfp, fpins, fpcall
2686 | ins_arithpre 3042 | ins_arithpre
3043 |.if "fpins" ~= "extern" or HFABI
3044 | ins_arithpre_fpu d0, d1
3045 |.endif
2687 | ins_arithfallback ins_arithcheck_num 3046 | ins_arithfallback ins_arithcheck_num
2688 |.if "fpcall" == "extern pow" 3047 |.if "fpins" == "extern"
2689 | .IOS mov RC, BASE 3048 | .IOS mov RC, BASE
2690 | bl fpcall 3049 | bl fpcall
2691 | .IOS mov BASE, RC 3050 | .IOS mov BASE, RC
3051 |.elif FPU
3052 | fpins d0, d0, d1
2692 |.else 3053 |.else
2693 | bl fpcall 3054 | bl fpcall
2694 |.endif 3055 |.endif
3056 |.if ("fpins" ~= "extern" or HFABI) and FPU
3057 | ins_arithpost_fpu d0
3058 |.else
2695 | ins_next1 3059 | ins_next1
2696 | ins_next2 3060 | ins_next2
2697 | strd CARG12, [BASE, RA] 3061 | strd CARG12, [BASE, RA]
2698 | ins_next3 3062 | ins_next3
3063 |.endif
2699 |.endmacro 3064 |.endmacro
2700 3065
2701 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3066 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2702 | ins_arithdn adds, extern __aeabi_dadd 3067 | ins_arithdn adds, vadd.f64, extern __aeabi_dadd
2703 break; 3068 break;
2704 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3069 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2705 | ins_arithdn subs, extern __aeabi_dsub 3070 | ins_arithdn subs, vsub.f64, extern __aeabi_dsub
2706 break; 3071 break;
2707 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3072 case BC_MULVN: case BC_MULNV: case BC_MULVV:
2708 | ins_arithdn smull, extern __aeabi_dmul 3073 | ins_arithdn smull, vmul.f64, extern __aeabi_dmul
2709 break; 3074 break;
2710 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3075 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
2711 | ins_arithfp extern __aeabi_ddiv 3076 | ins_arithfp vdiv.f64, extern __aeabi_ddiv
2712 break; 3077 break;
2713 case BC_MODVN: case BC_MODNV: case BC_MODVV: 3078 case BC_MODVN: case BC_MODNV: case BC_MODVV:
2714 | ins_arithdn vm_modi, ->vm_mod 3079 | ins_arithdn vm_modi, vm_mod, ->vm_mod
2715 break; 3080 break;
2716 case BC_POW: 3081 case BC_POW:
2717 | // NYI: (partial) integer arithmetic. 3082 | // NYI: (partial) integer arithmetic.
2718 | ins_arithfp extern pow 3083 | ins_arithfp extern, extern pow
2719 break; 3084 break;
2720 3085
2721 case BC_CAT: 3086 case BC_CAT:
@@ -3775,20 +4140,46 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3775 | cmnlo CARG4, #-LJ_TISNUM 4140 | cmnlo CARG4, #-LJ_TISNUM
3776 | cmnlo RB, #-LJ_TISNUM 4141 | cmnlo RB, #-LJ_TISNUM
3777 | bhs ->vmeta_for 4142 | bhs ->vmeta_for
4143 |.if FPU
4144 | vldr d0, FOR_IDX
4145 | vldr d1, FOR_STOP
4146 | cmp RB, #0
4147 | vstr d0, FOR_EXT
4148 |.else
3778 | cmp RB, #0 4149 | cmp RB, #0
3779 | strd CARG12, FOR_IDX
3780 | strd CARG12, FOR_EXT 4150 | strd CARG12, FOR_EXT
3781 | blt >8 4151 | blt >8
4152 |.endif
3782 } else { 4153 } else {
4154 |.if FPU
4155 | vldr d0, FOR_IDX
4156 | vldr d2, FOR_STEP
4157 | vldr d1, FOR_STOP
4158 | cmp CARG4, #0
4159 | vadd.f64 d0, d0, d2
4160 |.else
3783 | cmp CARG4, #0 4161 | cmp CARG4, #0
3784 | blt >8 4162 | blt >8
3785 | bl extern __aeabi_dadd 4163 | bl extern __aeabi_dadd
3786 | strd CARG12, FOR_IDX 4164 | strd CARG12, FOR_IDX
3787 | ldrd CARG34, FOR_STOP 4165 | ldrd CARG34, FOR_STOP
3788 | strd CARG12, FOR_EXT 4166 | strd CARG12, FOR_EXT
4167 |.endif
3789 } 4168 }
3790 |6: 4169 |6:
4170 |.if FPU
4171 | vcmpge.f64 d0, d1
4172 | vcmplt.f64 d1, d0
4173 | vmrs
4174 |.else
3791 | bl extern __aeabi_cdcmple 4175 | bl extern __aeabi_cdcmple
4176 |.endif
4177 if (vk) {
4178 |.if FPU
4179 | vstr d0, FOR_IDX
4180 | vstr d0, FOR_EXT
4181 |.endif
4182 }
3792 if (op == BC_FORI) { 4183 if (op == BC_FORI) {
3793 | subhi PC, RC, #0x20000 4184 | subhi PC, RC, #0x20000
3794 } else if (op == BC_JFORI) { 4185 } else if (op == BC_JFORI) {
@@ -3804,6 +4195,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3804 | ins_next2 4195 | ins_next2
3805 | b <3 4196 | b <3
3806 | 4197 |
4198 |.if not FPU
3807 |8: // Invert check for negative step. 4199 |8: // Invert check for negative step.
3808 if (vk) { 4200 if (vk) {
3809 | bl extern __aeabi_dadd 4201 | bl extern __aeabi_dadd
@@ -3814,6 +4206,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3814 | mov CARG4, CARG2 4206 | mov CARG4, CARG2
3815 | ldrd CARG12, FOR_STOP 4207 | ldrd CARG12, FOR_STOP
3816 | b <6 4208 | b <6
4209 |.endif
3817 break; 4210 break;
3818 4211
3819 case BC_ITERL: 4212 case BC_ITERL:
@@ -4048,8 +4441,14 @@ static void emit_asm_debug(BuildCtx *ctx)
4048 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ 4441 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4049 "\t.byte 0x8e\n\t.uleb128 1\n", /* offset lr */ 4442 "\t.byte 0x8e\n\t.uleb128 1\n", /* offset lr */
4050 fcofs, CFRAME_SIZE); 4443 fcofs, CFRAME_SIZE);
4051 for (i = 11; i >= 4; i--) /* offset r4-r11 */ 4444 for (i = 11; i >= (LJ_ARCH_HASFPU ? 5 : 4); i--) /* offset r4-r11 */
4052 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i)); 4445 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i));
4446#if LJ_ARCH_HASFPU
4447 for (i = 15; i >= 8; i--) /* offset d8-d15 */
4448 fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 %d, %d\n",
4449 64+2*i, 10+2*(15-i));
4450 fprintf(ctx->fp, "\t.byte 0x84\n\t.uleb128 %d\n", 25); /* offset r4 */
4451#endif
4053 fprintf(ctx->fp, 4452 fprintf(ctx->fp,
4054 "\t.align 2\n" 4453 "\t.align 2\n"
4055 ".LEFDE0:\n\n"); 4454 ".LEFDE0:\n\n");