diff options
author | Mike Pall <mike> | 2012-07-30 18:59:13 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2012-07-30 18:59:13 +0200 |
commit | a373fddbd3b129f3f95474533e74f0a52744ff8c (patch) | |
tree | 9dc1e4ee3eae94a289278b246ff659d8b63cae6d /src | |
parent | 23abbd9ef344289d1dae6d8fcf9d3c0ab8e1e6e1 (diff) | |
download | luajit-a373fddbd3b129f3f95474533e74f0a52744ff8c.tar.gz luajit-a373fddbd3b129f3f95474533e74f0a52744ff8c.tar.bz2 luajit-a373fddbd3b129f3f95474533e74f0a52744ff8c.zip |
ARM: Add VFP and hard-float ABI variants to interpreter.
Diffstat (limited to 'src')
-rw-r--r-- | src/lj_frame.h | 4 | ||||
-rw-r--r-- | src/lj_target_arm.h | 11 | ||||
-rw-r--r-- | src/vm_arm.dasc | 447 |
3 files changed, 434 insertions, 28 deletions
diff --git a/src/lj_frame.h b/src/lj_frame.h index b8429c2a..b8af2349 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h | |||
@@ -97,7 +97,11 @@ enum { | |||
97 | #define CFRAME_OFS_L 12 | 97 | #define CFRAME_OFS_L 12 |
98 | #define CFRAME_OFS_PC 8 | 98 | #define CFRAME_OFS_PC 8 |
99 | #define CFRAME_OFS_MULTRES 4 | 99 | #define CFRAME_OFS_MULTRES 4 |
100 | #if LJ_ARCH_HASFPU | ||
101 | #define CFRAME_SIZE 128 | ||
102 | #else | ||
100 | #define CFRAME_SIZE 64 | 103 | #define CFRAME_SIZE 64 |
104 | #endif | ||
101 | #define CFRAME_SHIFT_MULTRES 3 | 105 | #define CFRAME_SHIFT_MULTRES 3 |
102 | #elif LJ_TARGET_PPC | 106 | #elif LJ_TARGET_PPC |
103 | #if LJ_ARCH_PPC64 | 107 | #if LJ_ARCH_PPC64 |
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h index a24fc819..20e8ad36 100644 --- a/src/lj_target_arm.h +++ b/src/lj_target_arm.h | |||
@@ -14,7 +14,9 @@ | |||
14 | #if LJ_SOFTFP | 14 | #if LJ_SOFTFP |
15 | #define FPRDEF(_) | 15 | #define FPRDEF(_) |
16 | #else | 16 | #else |
17 | #error "NYI: hard-float support for ARM" | 17 | #define FPRDEF(_) \ |
18 | _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \ | ||
19 | _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) | ||
18 | #endif | 20 | #endif |
19 | #define VRIDDEF(_) | 21 | #define VRIDDEF(_) |
20 | 22 | ||
@@ -45,7 +47,7 @@ enum { | |||
45 | #if LJ_SOFTFP | 47 | #if LJ_SOFTFP |
46 | RID_MAX_FPR = RID_MIN_FPR, | 48 | RID_MAX_FPR = RID_MIN_FPR, |
47 | #else | 49 | #else |
48 | #error "NYI: VFP support for ARM" | 50 | RID_MAX_FPR = RID_D15+1, |
49 | #endif | 51 | #endif |
50 | RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, | 52 | RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, |
51 | RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR | 53 | RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR |
@@ -68,7 +70,8 @@ enum { | |||
68 | #define RSET_FPR 0 | 70 | #define RSET_FPR 0 |
69 | #define RSET_ALL RSET_GPR | 71 | #define RSET_ALL RSET_GPR |
70 | #else | 72 | #else |
71 | #error "NYI: VFP support for ARM" | 73 | #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) |
74 | #define RSET_ALL (RSET_GPR|RSET_FPR) | ||
72 | #endif | 75 | #endif |
73 | #define RSET_INIT RSET_ALL | 76 | #define RSET_INIT RSET_ALL |
74 | 77 | ||
@@ -82,7 +85,7 @@ enum { | |||
82 | #if LJ_SOFTFP | 85 | #if LJ_SOFTFP |
83 | #define RSET_SCRATCH_FPR 0 | 86 | #define RSET_SCRATCH_FPR 0 |
84 | #else | 87 | #else |
85 | #error "NYI: VFP support for ARM" | 88 | #define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1)) |
86 | #endif | 89 | #endif |
87 | #define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) | 90 | #define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) |
88 | #define REGARG_FIRSTGPR RID_R0 | 91 | #define REGARG_FIRSTGPR RID_R0 |
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 8ddce49e..26f97aa3 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc | |||
@@ -46,6 +46,7 @@ | |||
46 | |.define CRET2, r1 | 46 | |.define CRET2, r1 |
47 | | | 47 | | |
48 | |// Stack layout while in interpreter. Must match with lj_frame.h. | 48 | |// Stack layout while in interpreter. Must match with lj_frame.h. |
49 | |.define SAVE_R4, [sp, #28] | ||
49 | |.define CFRAME_SPACE, #28 | 50 | |.define CFRAME_SPACE, #28 |
50 | |.define SAVE_ERRF, [sp, #24] | 51 | |.define SAVE_ERRF, [sp, #24] |
51 | |.define SAVE_NRES, [sp, #20] | 52 | |.define SAVE_NRES, [sp, #20] |
@@ -60,6 +61,20 @@ | |||
60 | |.define TMPD, [sp] | 61 | |.define TMPD, [sp] |
61 | |.define TMPDp, sp | 62 | |.define TMPDp, sp |
62 | | | 63 | | |
64 | |.if FPU | ||
65 | |.macro saveregs | ||
66 | | push {r5, r6, r7, r8, r9, r10, r11, lr} | ||
67 | | vpush {d8-d15} | ||
68 | | sub sp, sp, CFRAME_SPACE+4 | ||
69 | | str r4, SAVE_R4 | ||
70 | |.endmacro | ||
71 | |.macro restoreregs_ret | ||
72 | | ldr r4, SAVE_R4 | ||
73 | | add sp, sp, CFRAME_SPACE+4 | ||
74 | | vpop {d8-d15} | ||
75 | | pop {r5, r6, r7, r8, r9, r10, r11, pc} | ||
76 | |.endmacro | ||
77 | |.else | ||
63 | |.macro saveregs | 78 | |.macro saveregs |
64 | | push {r4, r5, r6, r7, r8, r9, r10, r11, lr} | 79 | | push {r4, r5, r6, r7, r8, r9, r10, r11, lr} |
65 | | sub sp, sp, CFRAME_SPACE | 80 | | sub sp, sp, CFRAME_SPACE |
@@ -68,6 +83,7 @@ | |||
68 | | add sp, sp, CFRAME_SPACE | 83 | | add sp, sp, CFRAME_SPACE |
69 | | pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} | 84 | | pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} |
70 | |.endmacro | 85 | |.endmacro |
86 | |.endif | ||
71 | | | 87 | | |
72 | |// Type definitions. Some of these are only used for documentation. | 88 | |// Type definitions. Some of these are only used for documentation. |
73 | |.type L, lua_State, LREG | 89 | |.type L, lua_State, LREG |
@@ -875,6 +891,29 @@ static void build_subroutines(BuildCtx *ctx) | |||
875 | | bhs ->fff_fallback | 891 | | bhs ->fff_fallback |
876 | |.endmacro | 892 | |.endmacro |
877 | | | 893 | | |
894 | |.macro .ffunc_d, name | ||
895 | | .ffunc name | ||
896 | | ldr CARG2, [BASE, #4] | ||
897 | | cmp NARGS8:RC, #8 | ||
898 | | vldr d0, [BASE] | ||
899 | | blo ->fff_fallback | ||
900 | | checktp CARG2, LJ_TISNUM | ||
901 | | bhs ->fff_fallback | ||
902 | |.endmacro | ||
903 | | | ||
904 | |.macro .ffunc_dd, name | ||
905 | | .ffunc name | ||
906 | | ldr CARG2, [BASE, #4] | ||
907 | | ldr CARG4, [BASE, #12] | ||
908 | | cmp NARGS8:RC, #16 | ||
909 | | vldr d0, [BASE] | ||
910 | | vldr d1, [BASE, #8] | ||
911 | | blo ->fff_fallback | ||
912 | | checktp CARG2, LJ_TISNUM | ||
913 | | cmnlo CARG4, #-LJ_TISNUM | ||
914 | | bhs ->fff_fallback | ||
915 | |.endmacro | ||
916 | | | ||
878 | |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. | 917 | |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. |
879 | |.macro ffgccheck | 918 | |.macro ffgccheck |
880 | | ldr CARG1, [DISPATCH, #DISPATCH_GL(gc.total)] | 919 | | ldr CARG1, [DISPATCH, #DISPATCH_GL(gc.total)] |
@@ -1327,8 +1366,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
1327 | | movmi CARG1, #0x80000000 | 1366 | | movmi CARG1, #0x80000000 |
1328 | | bmi <1 | 1367 | | bmi <1 |
1329 | |4: | 1368 | |4: |
1369 | |.if HFABI | ||
1370 | | vmov d0, CARG1, CARG2 | ||
1371 | | bl ->vm_..func.._hf | ||
1372 | | b ->fff_resd | ||
1373 | |.else | ||
1330 | | bl ->vm_..func | 1374 | | bl ->vm_..func |
1331 | | b ->fff_restv | 1375 | | b ->fff_restv |
1376 | |.endif | ||
1332 | |.endmacro | 1377 | |.endmacro |
1333 | | | 1378 | | |
1334 | | math_round floor | 1379 | | math_round floor |
@@ -1381,22 +1426,48 @@ static void build_subroutines(BuildCtx *ctx) | |||
1381 | | b <5 | 1426 | | b <5 |
1382 | | | 1427 | | |
1383 | |.macro math_extern, func | 1428 | |.macro math_extern, func |
1429 | |.if HFABI | ||
1430 | | .ffunc_d math_ .. func | ||
1431 | |.else | ||
1384 | | .ffunc_n math_ .. func | 1432 | | .ffunc_n math_ .. func |
1433 | |.endif | ||
1385 | | .IOS mov RA, BASE | 1434 | | .IOS mov RA, BASE |
1386 | | bl extern func | 1435 | | bl extern func |
1387 | | .IOS mov BASE, RA | 1436 | | .IOS mov BASE, RA |
1437 | |.if HFABI | ||
1438 | | b ->fff_resd | ||
1439 | |.else | ||
1388 | | b ->fff_restv | 1440 | | b ->fff_restv |
1441 | |.endif | ||
1389 | |.endmacro | 1442 | |.endmacro |
1390 | | | 1443 | | |
1391 | |.macro math_extern2, func | 1444 | |.macro math_extern2, func |
1445 | |.if HFABI | ||
1446 | | .ffunc_dd math_ .. func | ||
1447 | |.else | ||
1392 | | .ffunc_nn math_ .. func | 1448 | | .ffunc_nn math_ .. func |
1449 | |.endif | ||
1393 | | .IOS mov RA, BASE | 1450 | | .IOS mov RA, BASE |
1394 | | bl extern func | 1451 | | bl extern func |
1395 | | .IOS mov BASE, RA | 1452 | | .IOS mov BASE, RA |
1453 | |.if HFABI | ||
1454 | | b ->fff_resd | ||
1455 | |.else | ||
1396 | | b ->fff_restv | 1456 | | b ->fff_restv |
1457 | |.endif | ||
1397 | |.endmacro | 1458 | |.endmacro |
1398 | | | 1459 | | |
1460 | |.if FPU | ||
1461 | | .ffunc_d math_sqrt | ||
1462 | | vsqrt.f64 d0, d0 | ||
1463 | |->fff_resd: | ||
1464 | | ldr PC, [BASE, FRAME_PC] | ||
1465 | | vstr d0, [BASE, #-8] | ||
1466 | | b ->fff_res1 | ||
1467 | |.else | ||
1399 | | math_extern sqrt | 1468 | | math_extern sqrt |
1469 | |.endif | ||
1470 | | | ||
1400 | | math_extern log | 1471 | | math_extern log |
1401 | | math_extern log10 | 1472 | | math_extern log10 |
1402 | | math_extern exp | 1473 | | math_extern exp |
@@ -1414,11 +1485,34 @@ static void build_subroutines(BuildCtx *ctx) | |||
1414 | | math_extern2 fmod | 1485 | | math_extern2 fmod |
1415 | | | 1486 | | |
1416 | |->ff_math_deg: | 1487 | |->ff_math_deg: |
1417 | |.ffunc_n math_rad | 1488 | |.if FPU |
1489 | | .ffunc_d math_rad | ||
1490 | | vldr d1, CFUNC:CARG3->upvalue[0] | ||
1491 | | vmul.f64 d0, d0, d1 | ||
1492 | | b ->fff_resd | ||
1493 | |.else | ||
1494 | | .ffunc_n math_rad | ||
1418 | | ldrd CARG34, CFUNC:CARG3->upvalue[0] | 1495 | | ldrd CARG34, CFUNC:CARG3->upvalue[0] |
1419 | | bl extern __aeabi_dmul | 1496 | | bl extern __aeabi_dmul |
1420 | | b ->fff_restv | 1497 | | b ->fff_restv |
1498 | |.endif | ||
1421 | | | 1499 | | |
1500 | |.if HFABI | ||
1501 | | .ffunc math_ldexp | ||
1502 | | ldr CARG4, [BASE, #4] | ||
1503 | | ldrd CARG12, [BASE, #8] | ||
1504 | | cmp NARGS8:RC, #16 | ||
1505 | | blo ->fff_fallback | ||
1506 | | vldr d0, [BASE] | ||
1507 | | checktp CARG4, LJ_TISNUM | ||
1508 | | bhs ->fff_fallback | ||
1509 | | checktp CARG2, LJ_TISNUM | ||
1510 | | bne ->fff_fallback | ||
1511 | | .IOS mov RA, BASE | ||
1512 | | bl extern ldexp // (double x, int exp) | ||
1513 | | .IOS mov BASE, RA | ||
1514 | | b ->fff_resd | ||
1515 | |.else | ||
1422 | |.ffunc_2 math_ldexp | 1516 | |.ffunc_2 math_ldexp |
1423 | | checktp CARG2, LJ_TISNUM | 1517 | | checktp CARG2, LJ_TISNUM |
1424 | | bhs ->fff_fallback | 1518 | | bhs ->fff_fallback |
@@ -1428,7 +1522,22 @@ static void build_subroutines(BuildCtx *ctx) | |||
1428 | | bl extern ldexp // (double x, int exp) | 1522 | | bl extern ldexp // (double x, int exp) |
1429 | | .IOS mov BASE, RA | 1523 | | .IOS mov BASE, RA |
1430 | | b ->fff_restv | 1524 | | b ->fff_restv |
1525 | |.endif | ||
1431 | | | 1526 | | |
1527 | |.if HFABI | ||
1528 | |.ffunc_d math_frexp | ||
1529 | | mov CARG1, sp | ||
1530 | | .IOS mov RA, BASE | ||
1531 | | bl extern frexp | ||
1532 | | .IOS mov BASE, RA | ||
1533 | | ldr CARG3, [sp] | ||
1534 | | mvn CARG4, #~LJ_TISNUM | ||
1535 | | ldr PC, [BASE, FRAME_PC] | ||
1536 | | vstr d0, [BASE, #-8] | ||
1537 | | mov RC, #(2+1)*8 | ||
1538 | | strd CARG34, [BASE] | ||
1539 | | b ->fff_res | ||
1540 | |.else | ||
1432 | |.ffunc_n math_frexp | 1541 | |.ffunc_n math_frexp |
1433 | | mov CARG3, sp | 1542 | | mov CARG3, sp |
1434 | | .IOS mov RA, BASE | 1543 | | .IOS mov RA, BASE |
@@ -1441,7 +1550,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
1441 | | mov RC, #(2+1)*8 | 1550 | | mov RC, #(2+1)*8 |
1442 | | strd CARG34, [BASE] | 1551 | | strd CARG34, [BASE] |
1443 | | b ->fff_res | 1552 | | b ->fff_res |
1553 | |.endif | ||
1444 | | | 1554 | | |
1555 | |.if HFABI | ||
1556 | |.ffunc_d math_modf | ||
1557 | | sub CARG1, BASE, #8 | ||
1558 | | ldr PC, [BASE, FRAME_PC] | ||
1559 | | .IOS mov RA, BASE | ||
1560 | | bl extern modf | ||
1561 | | .IOS mov BASE, RA | ||
1562 | | mov RC, #(2+1)*8 | ||
1563 | | vstr d0, [BASE] | ||
1564 | | b ->fff_res | ||
1565 | |.else | ||
1445 | |.ffunc_n math_modf | 1566 | |.ffunc_n math_modf |
1446 | | sub CARG3, BASE, #8 | 1567 | | sub CARG3, BASE, #8 |
1447 | | ldr PC, [BASE, FRAME_PC] | 1568 | | ldr PC, [BASE, FRAME_PC] |
@@ -1451,8 +1572,56 @@ static void build_subroutines(BuildCtx *ctx) | |||
1451 | | mov RC, #(2+1)*8 | 1572 | | mov RC, #(2+1)*8 |
1452 | | strd CARG12, [BASE] | 1573 | | strd CARG12, [BASE] |
1453 | | b ->fff_res | 1574 | | b ->fff_res |
1575 | |.endif | ||
1454 | | | 1576 | | |
1455 | |.macro math_minmax, name, cond, fcond | 1577 | |.macro math_minmax, name, cond, fcond |
1578 | |.if FPU | ||
1579 | | .ffunc_1 name | ||
1580 | | add RB, BASE, RC | ||
1581 | | checktp CARG2, LJ_TISNUM | ||
1582 | | add RA, BASE, #8 | ||
1583 | | bne >4 | ||
1584 | |1: // Handle integers. | ||
1585 | | ldrd CARG34, [RA] | ||
1586 | | cmp RA, RB | ||
1587 | | bhs ->fff_restv | ||
1588 | | checktp CARG4, LJ_TISNUM | ||
1589 | | bne >3 | ||
1590 | | cmp CARG1, CARG3 | ||
1591 | | add RA, RA, #8 | ||
1592 | | mov..cond CARG1, CARG3 | ||
1593 | | b <1 | ||
1594 | |3: // Convert intermediate result to number and continue below. | ||
1595 | | vmov s4, CARG1 | ||
1596 | | bhi ->fff_fallback | ||
1597 | | vldr d1, [RA] | ||
1598 | | vcvt.f64.s32 d0, s4 | ||
1599 | | b >6 | ||
1600 | | | ||
1601 | |4: | ||
1602 | | vldr d0, [BASE] | ||
1603 | | bhi ->fff_fallback | ||
1604 | |5: // Handle numbers. | ||
1605 | | ldrd CARG34, [RA] | ||
1606 | | vldr d1, [RA] | ||
1607 | | cmp RA, RB | ||
1608 | | bhs ->fff_resd | ||
1609 | | checktp CARG4, LJ_TISNUM | ||
1610 | | bhs >7 | ||
1611 | |6: | ||
1612 | | vcmp.f64 d0, d1 | ||
1613 | | vmrs | ||
1614 | | add RA, RA, #8 | ||
1615 | | vmov..fcond.f64 d0, d1 | ||
1616 | | b <5 | ||
1617 | |7: // Convert integer to number and continue above. | ||
1618 | | vmov s4, CARG3 | ||
1619 | | bhi ->fff_fallback | ||
1620 | | vcvt.f64.s32 d1, s4 | ||
1621 | | b <6 | ||
1622 | | | ||
1623 | |.else | ||
1624 | | | ||
1456 | | .ffunc_1 name | 1625 | | .ffunc_1 name |
1457 | | checktp CARG2, LJ_TISNUM | 1626 | | checktp CARG2, LJ_TISNUM |
1458 | | mov RA, #8 | 1627 | | mov RA, #8 |
@@ -1467,9 +1636,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
1467 | | add RA, RA, #8 | 1636 | | add RA, RA, #8 |
1468 | | mov..cond CARG1, CARG3 | 1637 | | mov..cond CARG1, CARG3 |
1469 | | b <1 | 1638 | | b <1 |
1470 | |3: | 1639 | |3: // Convert intermediate result to number and continue below. |
1471 | | bhi ->fff_fallback | 1640 | | bhi ->fff_fallback |
1472 | | // Convert intermediate result to number and continue below. | ||
1473 | | bl extern __aeabi_i2d | 1641 | | bl extern __aeabi_i2d |
1474 | | ldrd CARG34, [BASE, RA] | 1642 | | ldrd CARG34, [BASE, RA] |
1475 | | b >6 | 1643 | | b >6 |
@@ -1495,6 +1663,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1495 | | bl extern __aeabi_i2d | 1663 | | bl extern __aeabi_i2d |
1496 | | ldrd CARG34, TMPD | 1664 | | ldrd CARG34, TMPD |
1497 | | b <6 | 1665 | | b <6 |
1666 | |.endif | ||
1498 | |.endmacro | 1667 | |.endmacro |
1499 | | | 1668 | | |
1500 | | math_minmax math_min, gt, hi | 1669 | | math_minmax math_min, gt, hi |
@@ -1959,6 +2128,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1959 | | ldr CARG2, [CARG1, #-4]! // Get exit instruction. | 2128 | | ldr CARG2, [CARG1, #-4]! // Get exit instruction. |
1960 | | str CARG1, [sp, #56] // Store exit pc in RID_LR and RID_PC. | 2129 | | str CARG1, [sp, #56] // Store exit pc in RID_LR and RID_PC. |
1961 | | str CARG1, [sp, #60] | 2130 | | str CARG1, [sp, #60] |
2131 | |.if FPU | ||
2132 | | vpush {d0-d15} | ||
2133 | |.endif | ||
1962 | | lsl CARG2, CARG2, #8 | 2134 | | lsl CARG2, CARG2, #8 |
1963 | | add CARG1, CARG1, CARG2, asr #6 | 2135 | | add CARG1, CARG1, CARG2, asr #6 |
1964 | | ldr CARG2, [lr, #4] // Load exit stub group offset. | 2136 | | ldr CARG2, [lr, #4] // Load exit stub group offset. |
@@ -2025,8 +2197,53 @@ static void build_subroutines(BuildCtx *ctx) | |||
2025 | |// FP value rounding. Called from JIT code. | 2197 | |// FP value rounding. Called from JIT code. |
2026 | |// | 2198 | |// |
2027 | |// double lj_vm_floor/ceil/trunc(double x); | 2199 | |// double lj_vm_floor/ceil/trunc(double x); |
2028 | |.macro vm_round, func | 2200 | |.macro vm_round, func, hf |
2029 | |->vm_ .. func: | 2201 | |.if FPU |
2202 | |.if hf == 0 | ||
2203 | | vmov d0, CARG1, CARG2 | ||
2204 | | vldr d2, <8 // 2^52 | ||
2205 | |.else | ||
2206 | | vldr d2, <8 // 2^52 | ||
2207 | | vmov CARG1, CARG2, d0 | ||
2208 | |.endif | ||
2209 | | vabs.f64 d1, d0 | ||
2210 | | vcmp.f64 d1, d2 // |x| >= 2^52 or NaN? | ||
2211 | | vmrs | ||
2212 | |.if "func" == "trunc" | ||
2213 | | vadd.f64 d0, d1, d2 | ||
2214 | | bxpl lr // Return argument unchanged. | ||
2215 | | vsub.f64 d0, d0, d2 // (|x| + 2^52) - 2^52 | ||
2216 | | vldr d2, <9 // +1.0 | ||
2217 | | vcmp.f64 d1, d0 // |x| < result: subtract +1.0 | ||
2218 | | vmrs | ||
2219 | | vsubmi.f64 d0, d1, d2 | ||
2220 | | cmp CARG2, #0 | ||
2221 | | vnegmi.f64 d0, d0 // Merge sign bit back in. | ||
2222 | |.else | ||
2223 | | vadd.f64 d1, d1, d2 | ||
2224 | | bxpl lr // Return argument unchanged. | ||
2225 | | cmp CARG2, #0 | ||
2226 | | vsub.f64 d1, d1, d2 // (|x| + 2^52) - 2^52 | ||
2227 | | vldr d2, <9 // +1.0 | ||
2228 | | vnegmi.f64 d1, d1 // Merge sign bit back in. | ||
2229 | |.if "func" == "floor" | ||
2230 | | vcmp.f64 d0, d1 // x < result: subtract +1.0. | ||
2231 | | vmrs | ||
2232 | | vsubmi.f64 d0, d1, d2 | ||
2233 | |.else | ||
2234 | | vcmp.f64 d1, d0 // x > result: add +1.0. | ||
2235 | | vmrs | ||
2236 | | vaddmi.f64 d0, d1, d2 | ||
2237 | |.endif | ||
2238 | | vmovpl.f64 d0, d1 | ||
2239 | |.endif | ||
2240 | |.if hf == 0 | ||
2241 | | vmov CARG1, CARG2, d0 | ||
2242 | |.endif | ||
2243 | | bx lr | ||
2244 | | | ||
2245 | |.else | ||
2246 | | | ||
2030 | | lsl CARG3, CARG2, #1 | 2247 | | lsl CARG3, CARG2, #1 |
2031 | | adds RB, CARG3, #0x00200000 | 2248 | | adds RB, CARG3, #0x00200000 |
2032 | | bpl >2 // |x| < 1? | 2249 | | bpl >2 // |x| < 1? |
@@ -2069,15 +2286,40 @@ static void build_subroutines(BuildCtx *ctx) | |||
2069 | | ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0) | 2286 | | ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0) |
2070 | | orrne CARG2, CARG2, CARG4 | 2287 | | orrne CARG2, CARG2, CARG4 |
2071 | | bx lr | 2288 | | bx lr |
2289 | |.endif | ||
2072 | |.endmacro | 2290 | |.endmacro |
2073 | | | 2291 | | |
2292 | |.if FPU | ||
2293 | |.align 8 | ||
2294 | |9: | ||
2295 | | .long 0, 0x3ff00000 // +1.0 | ||
2296 | |8: | ||
2297 | | .long 0, 0x43300000 // 2^52 | ||
2298 | |.else | ||
2074 | |9: | 2299 | |9: |
2075 | | .long 0x3ff00000 // hiword(1.0) | 2300 | | .long 0x3ff00000 // hiword(+1.0) |
2076 | | vm_round floor | 2301 | |.endif |
2077 | | vm_round ceil | 2302 | | |
2303 | |->vm_floor: | ||
2304 | |.if not HFABI | ||
2305 | | vm_round floor, 0 | ||
2306 | |.endif | ||
2307 | |->vm_floor_hf: | ||
2308 | |.if FPU | ||
2309 | | vm_round floor, 1 | ||
2310 | |.endif | ||
2311 | | | ||
2312 | |->vm_ceil: | ||
2313 | |.if not HFABI | ||
2314 | | vm_round ceil, 0 | ||
2315 | |.endif | ||
2316 | |->vm_ceil_hf: | ||
2317 | |.if FPU | ||
2318 | | vm_round ceil, 1 | ||
2319 | |.endif | ||
2078 | | | 2320 | | |
2079 | |->vm_trunc: | 2321 | |->vm_trunc: |
2080 | |.if JIT | 2322 | |.if JIT and not HFABI |
2081 | | lsl CARG3, CARG2, #1 | 2323 | | lsl CARG3, CARG2, #1 |
2082 | | adds RB, CARG3, #0x00200000 | 2324 | | adds RB, CARG3, #0x00200000 |
2083 | | andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0. | 2325 | | andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0. |
@@ -2093,8 +2335,23 @@ static void build_subroutines(BuildCtx *ctx) | |||
2093 | | bx lr | 2335 | | bx lr |
2094 | |.endif | 2336 | |.endif |
2095 | | | 2337 | | |
2338 | |->vm_trunc_hf: | ||
2339 | |.if JIT and FPU | ||
2340 | | vm_round trunc, 1 | ||
2341 | |.endif | ||
2342 | | | ||
2096 | | // double lj_vm_mod(double dividend, double divisor); | 2343 | | // double lj_vm_mod(double dividend, double divisor); |
2097 | |->vm_mod: | 2344 | |->vm_mod: |
2345 | |.if FPU | ||
2346 | | // Special calling convention. Also, RC (r11) is not preserved. | ||
2347 | | vdiv.f64 d0, d6, d7 | ||
2348 | | mov RC, lr | ||
2349 | | bl ->vm_floor_hf | ||
2350 | | vmul.f64 d0, d0, d7 | ||
2351 | | mov lr, RC | ||
2352 | | vsub.f64 d6, d6, d0 | ||
2353 | | bx lr | ||
2354 | |.else | ||
2098 | | push {r0, r1, r2, r3, r4, lr} | 2355 | | push {r0, r1, r2, r3, r4, lr} |
2099 | | bl extern __aeabi_ddiv | 2356 | | bl extern __aeabi_ddiv |
2100 | | bl ->vm_floor | 2357 | | bl ->vm_floor |
@@ -2105,6 +2362,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2105 | | bl extern __aeabi_dadd | 2362 | | bl extern __aeabi_dadd |
2106 | | add sp, sp, #20 | 2363 | | add sp, sp, #20 |
2107 | | pop {pc} | 2364 | | pop {pc} |
2365 | |.endif | ||
2108 | | | 2366 | | |
2109 | | // int lj_vm_modi(int dividend, int divisor); | 2367 | | // int lj_vm_modi(int dividend, int divisor); |
2110 | |->vm_modi: | 2368 | |->vm_modi: |
@@ -2266,6 +2524,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2266 | | ins_next | 2524 | | ins_next |
2267 | | | 2525 | | |
2268 | |3: // CARG12 is not an integer. | 2526 | |3: // CARG12 is not an integer. |
2527 | |.if FPU | ||
2528 | | vldr d0, [RA] | ||
2529 | | bhi ->vmeta_comp | ||
2530 | | // d0 is a number. | ||
2531 | | checktp CARG4, LJ_TISNUM | ||
2532 | | vldr d1, [RC] | ||
2533 | | blo >5 | ||
2534 | | // d0 is a number, CARG3 is an integer. | ||
2535 | | vmov s4, CARG3 | ||
2536 | | vcvt.f64.s32 d1, s4 | ||
2537 | | b >5 | ||
2538 | |4: // CARG1 is an integer, CARG34 is not an integer. | ||
2539 | | vldr d1, [RC] | ||
2540 | | bhi ->vmeta_comp | ||
2541 | | // CARG1 is an integer, d1 is a number. | ||
2542 | | vmov s4, CARG1 | ||
2543 | | vcvt.f64.s32 d0, s4 | ||
2544 | |5: // d0 and d1 are numbers. | ||
2545 | | vcmp.f64 d0, d1 | ||
2546 | | vmrs | ||
2547 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. | ||
2548 | if (op == BC_ISLT) { | ||
2549 | | sublo PC, RB, #0x20000 | ||
2550 | } else if (op == BC_ISGE) { | ||
2551 | | subhs PC, RB, #0x20000 | ||
2552 | } else if (op == BC_ISLE) { | ||
2553 | | subls PC, RB, #0x20000 | ||
2554 | } else { | ||
2555 | | subhi PC, RB, #0x20000 | ||
2556 | } | ||
2557 | | b <1 | ||
2558 | |.else | ||
2269 | | bhi ->vmeta_comp | 2559 | | bhi ->vmeta_comp |
2270 | | // CARG12 is a number. | 2560 | | // CARG12 is a number. |
2271 | | checktp CARG4, LJ_TISNUM | 2561 | | checktp CARG4, LJ_TISNUM |
@@ -2282,7 +2572,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2282 | | b >5 | 2572 | | b >5 |
2283 | |4: // CARG1 is an integer, CARG34 is not an integer. | 2573 | |4: // CARG1 is an integer, CARG34 is not an integer. |
2284 | | bhi ->vmeta_comp | 2574 | | bhi ->vmeta_comp |
2285 | | // CARG1 is an integer, CARG34 is a number | 2575 | | // CARG1 is an integer, CARG34 is a number. |
2286 | | mov RA, RB // Save RB. | 2576 | | mov RA, RB // Save RB. |
2287 | | bl extern __aeabi_i2d | 2577 | | bl extern __aeabi_i2d |
2288 | | ldrd CARG34, [RC] // Restore second operand. | 2578 | | ldrd CARG34, [RC] // Restore second operand. |
@@ -2299,6 +2589,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2299 | | subhi PC, RA, #0x20000 | 2589 | | subhi PC, RA, #0x20000 |
2300 | } | 2590 | } |
2301 | | b <1 | 2591 | | b <1 |
2592 | |.endif | ||
2302 | break; | 2593 | break; |
2303 | 2594 | ||
2304 | case BC_ISEQV: case BC_ISNEV: | 2595 | case BC_ISEQV: case BC_ISNEV: |
@@ -2439,6 +2730,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2439 | } | 2730 | } |
2440 | | bhi <2 | 2731 | | bhi <2 |
2441 | |.endif | 2732 | |.endif |
2733 | |.if FPU | ||
2734 | | checktp CARG4, LJ_TISNUM | ||
2735 | | vmov s4, CARG3 | ||
2736 | | vldr d0, [RA] | ||
2737 | | vldrlo d1, [RC] | ||
2738 | | vcvths.f64.s32 d1, s4 | ||
2739 | | b >5 | ||
2740 | |4: // CARG1 is an integer, d1 is a number. | ||
2741 | | vmov s4, CARG1 | ||
2742 | | vldr d1, [RC] | ||
2743 | | vcvt.f64.s32 d0, s4 | ||
2744 | |5: // d0 and d1 are numbers. | ||
2745 | | vcmp.f64 d0, d1 | ||
2746 | | vmrs | ||
2747 | if (vk) { | ||
2748 | | subeq PC, RB, #0x20000 | ||
2749 | } else { | ||
2750 | | subne PC, RB, #0x20000 | ||
2751 | } | ||
2752 | | b <2 | ||
2753 | |.else | ||
2442 | | // CARG12 is a number. | 2754 | | // CARG12 is a number. |
2443 | | checktp CARG4, LJ_TISNUM | 2755 | | checktp CARG4, LJ_TISNUM |
2444 | | movlo RA, RB // Save RB. | 2756 | | movlo RA, RB // Save RB. |
@@ -2458,6 +2770,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2458 | | subne PC, RA, #0x20000 | 2770 | | subne PC, RA, #0x20000 |
2459 | } | 2771 | } |
2460 | | b <2 | 2772 | | b <2 |
2773 | |.endif | ||
2461 | | | 2774 | | |
2462 | |.if FFI | 2775 | |.if FFI |
2463 | |7: | 2776 | |7: |
@@ -2617,20 +2930,55 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2617 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | 2930 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); |
2618 | ||switch (vk) { | 2931 | ||switch (vk) { |
2619 | ||case 0: | 2932 | ||case 0: |
2933 | | .if FPU | ||
2934 | | ldrd CARG12, [RB, BASE]! | ||
2935 | | ldrd CARG34, [RC, KBASE]! | ||
2936 | | .else | ||
2620 | | ldrd CARG12, [BASE, RB] | 2937 | | ldrd CARG12, [BASE, RB] |
2621 | | ldrd CARG34, [KBASE, RC] | 2938 | | ldrd CARG34, [KBASE, RC] |
2939 | | .endif | ||
2622 | || break; | 2940 | || break; |
2623 | ||case 1: | 2941 | ||case 1: |
2942 | | .if FPU | ||
2943 | | ldrd CARG34, [RB, BASE]! | ||
2944 | | ldrd CARG12, [RC, KBASE]! | ||
2945 | | .else | ||
2624 | | ldrd CARG34, [BASE, RB] | 2946 | | ldrd CARG34, [BASE, RB] |
2625 | | ldrd CARG12, [KBASE, RC] | 2947 | | ldrd CARG12, [KBASE, RC] |
2948 | | .endif | ||
2626 | || break; | 2949 | || break; |
2627 | ||default: | 2950 | ||default: |
2951 | | .if FPU | ||
2952 | | ldrd CARG12, [RB, BASE]! | ||
2953 | | ldrd CARG34, [RC, BASE]! | ||
2954 | | .else | ||
2628 | | ldrd CARG12, [BASE, RB] | 2955 | | ldrd CARG12, [BASE, RB] |
2629 | | ldrd CARG34, [BASE, RC] | 2956 | | ldrd CARG34, [BASE, RC] |
2957 | | .endif | ||
2630 | || break; | 2958 | || break; |
2631 | ||} | 2959 | ||} |
2632 | |.endmacro | 2960 | |.endmacro |
2633 | | | 2961 | | |
2962 | |.macro ins_arithpre_fpu, reg1, reg2 | ||
2963 | |.if FPU | ||
2964 | ||if (vk == 1) { | ||
2965 | | vldr reg2, [RB] | ||
2966 | | vldr reg1, [RC] | ||
2967 | ||} else { | ||
2968 | | vldr reg1, [RB] | ||
2969 | | vldr reg2, [RC] | ||
2970 | ||} | ||
2971 | |.endif | ||
2972 | |.endmacro | ||
2973 | | | ||
2974 | |.macro ins_arithpost_fpu, reg | ||
2975 | | ins_next1 | ||
2976 | | add RA, BASE, RA | ||
2977 | | ins_next2 | ||
2978 | | vstr reg, [RA] | ||
2979 | | ins_next3 | ||
2980 | |.endmacro | ||
2981 | | | ||
2634 | |.macro ins_arithfallback, ins | 2982 | |.macro ins_arithfallback, ins |
2635 | ||switch (vk) { | 2983 | ||switch (vk) { |
2636 | ||case 0: | 2984 | ||case 0: |
@@ -2645,9 +2993,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2645 | ||} | 2993 | ||} |
2646 | |.endmacro | 2994 | |.endmacro |
2647 | | | 2995 | | |
2648 | |.macro ins_arithdn, intins, fpcall | 2996 | |.macro ins_arithdn, intins, fpins, fpcall |
2649 | | ins_arithpre | 2997 | | ins_arithpre |
2650 | |.if "intins" ~= "vm_modi" | 2998 | |.if "intins" ~= "vm_modi" and not FPU |
2651 | | ins_next1 | 2999 | | ins_next1 |
2652 | |.endif | 3000 | |.endif |
2653 | | ins_arithcheck_int >5 | 3001 | | ins_arithcheck_int >5 |
@@ -2665,57 +3013,74 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2665 | | ins_arithfallback bvs | 3013 | | ins_arithfallback bvs |
2666 | |.endif | 3014 | |.endif |
2667 | |4: | 3015 | |4: |
2668 | |.if "intins" == "vm_modi" | 3016 | |.if "intins" == "vm_modi" or FPU |
2669 | | ins_next1 | 3017 | | ins_next1 |
2670 | |.endif | 3018 | |.endif |
2671 | | ins_next2 | 3019 | | ins_next2 |
2672 | | strd CARG12, [BASE, RA] | 3020 | | strd CARG12, [BASE, RA] |
2673 | | ins_next3 | 3021 | | ins_next3 |
2674 | |5: // FP variant. | 3022 | |5: // FP variant. |
3023 | | ins_arithpre_fpu d6, d7 | ||
2675 | | ins_arithfallback ins_arithcheck_num | 3024 | | ins_arithfallback ins_arithcheck_num |
3025 | |.if FPU | ||
2676 | |.if "intins" == "vm_modi" | 3026 | |.if "intins" == "vm_modi" |
2677 | | bl fpcall | 3027 | | bl fpcall |
2678 | |.else | 3028 | |.else |
3029 | | fpins d6, d6, d7 | ||
3030 | |.endif | ||
3031 | | ins_arithpost_fpu d6 | ||
3032 | |.else | ||
2679 | | bl fpcall | 3033 | | bl fpcall |
2680 | | ins_next1 | 3034 | |.if "intins" ~= "vm_modi" |
3035 | | ins_next1 | ||
2681 | |.endif | 3036 | |.endif |
2682 | | b <4 | 3037 | | b <4 |
3038 | |.endif | ||
2683 | |.endmacro | 3039 | |.endmacro |
2684 | | | 3040 | | |
2685 | |.macro ins_arithfp, fpcall | 3041 | |.macro ins_arithfp, fpins, fpcall |
2686 | | ins_arithpre | 3042 | | ins_arithpre |
3043 | |.if "fpins" ~= "extern" or HFABI | ||
3044 | | ins_arithpre_fpu d0, d1 | ||
3045 | |.endif | ||
2687 | | ins_arithfallback ins_arithcheck_num | 3046 | | ins_arithfallback ins_arithcheck_num |
2688 | |.if "fpcall" == "extern pow" | 3047 | |.if "fpins" == "extern" |
2689 | | .IOS mov RC, BASE | 3048 | | .IOS mov RC, BASE |
2690 | | bl fpcall | 3049 | | bl fpcall |
2691 | | .IOS mov BASE, RC | 3050 | | .IOS mov BASE, RC |
3051 | |.elif FPU | ||
3052 | | fpins d0, d0, d1 | ||
2692 | |.else | 3053 | |.else |
2693 | | bl fpcall | 3054 | | bl fpcall |
2694 | |.endif | 3055 | |.endif |
3056 | |.if ("fpins" ~= "extern" or HFABI) and FPU | ||
3057 | | ins_arithpost_fpu d0 | ||
3058 | |.else | ||
2695 | | ins_next1 | 3059 | | ins_next1 |
2696 | | ins_next2 | 3060 | | ins_next2 |
2697 | | strd CARG12, [BASE, RA] | 3061 | | strd CARG12, [BASE, RA] |
2698 | | ins_next3 | 3062 | | ins_next3 |
3063 | |.endif | ||
2699 | |.endmacro | 3064 | |.endmacro |
2700 | 3065 | ||
2701 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | 3066 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: |
2702 | | ins_arithdn adds, extern __aeabi_dadd | 3067 | | ins_arithdn adds, vadd.f64, extern __aeabi_dadd |
2703 | break; | 3068 | break; |
2704 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | 3069 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: |
2705 | | ins_arithdn subs, extern __aeabi_dsub | 3070 | | ins_arithdn subs, vsub.f64, extern __aeabi_dsub |
2706 | break; | 3071 | break; |
2707 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | 3072 | case BC_MULVN: case BC_MULNV: case BC_MULVV: |
2708 | | ins_arithdn smull, extern __aeabi_dmul | 3073 | | ins_arithdn smull, vmul.f64, extern __aeabi_dmul |
2709 | break; | 3074 | break; |
2710 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | 3075 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: |
2711 | | ins_arithfp extern __aeabi_ddiv | 3076 | | ins_arithfp vdiv.f64, extern __aeabi_ddiv |
2712 | break; | 3077 | break; |
2713 | case BC_MODVN: case BC_MODNV: case BC_MODVV: | 3078 | case BC_MODVN: case BC_MODNV: case BC_MODVV: |
2714 | | ins_arithdn vm_modi, ->vm_mod | 3079 | | ins_arithdn vm_modi, vm_mod, ->vm_mod |
2715 | break; | 3080 | break; |
2716 | case BC_POW: | 3081 | case BC_POW: |
2717 | | // NYI: (partial) integer arithmetic. | 3082 | | // NYI: (partial) integer arithmetic. |
2718 | | ins_arithfp extern pow | 3083 | | ins_arithfp extern, extern pow |
2719 | break; | 3084 | break; |
2720 | 3085 | ||
2721 | case BC_CAT: | 3086 | case BC_CAT: |
@@ -3775,20 +4140,46 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3775 | | cmnlo CARG4, #-LJ_TISNUM | 4140 | | cmnlo CARG4, #-LJ_TISNUM |
3776 | | cmnlo RB, #-LJ_TISNUM | 4141 | | cmnlo RB, #-LJ_TISNUM |
3777 | | bhs ->vmeta_for | 4142 | | bhs ->vmeta_for |
4143 | |.if FPU | ||
4144 | | vldr d0, FOR_IDX | ||
4145 | | vldr d1, FOR_STOP | ||
4146 | | cmp RB, #0 | ||
4147 | | vstr d0, FOR_EXT | ||
4148 | |.else | ||
3778 | | cmp RB, #0 | 4149 | | cmp RB, #0 |
3779 | | strd CARG12, FOR_IDX | ||
3780 | | strd CARG12, FOR_EXT | 4150 | | strd CARG12, FOR_EXT |
3781 | | blt >8 | 4151 | | blt >8 |
4152 | |.endif | ||
3782 | } else { | 4153 | } else { |
4154 | |.if FPU | ||
4155 | | vldr d0, FOR_IDX | ||
4156 | | vldr d2, FOR_STEP | ||
4157 | | vldr d1, FOR_STOP | ||
4158 | | cmp CARG4, #0 | ||
4159 | | vadd.f64 d0, d0, d2 | ||
4160 | |.else | ||
3783 | | cmp CARG4, #0 | 4161 | | cmp CARG4, #0 |
3784 | | blt >8 | 4162 | | blt >8 |
3785 | | bl extern __aeabi_dadd | 4163 | | bl extern __aeabi_dadd |
3786 | | strd CARG12, FOR_IDX | 4164 | | strd CARG12, FOR_IDX |
3787 | | ldrd CARG34, FOR_STOP | 4165 | | ldrd CARG34, FOR_STOP |
3788 | | strd CARG12, FOR_EXT | 4166 | | strd CARG12, FOR_EXT |
4167 | |.endif | ||
3789 | } | 4168 | } |
3790 | |6: | 4169 | |6: |
4170 | |.if FPU | ||
4171 | | vcmpge.f64 d0, d1 | ||
4172 | | vcmplt.f64 d1, d0 | ||
4173 | | vmrs | ||
4174 | |.else | ||
3791 | | bl extern __aeabi_cdcmple | 4175 | | bl extern __aeabi_cdcmple |
4176 | |.endif | ||
4177 | if (vk) { | ||
4178 | |.if FPU | ||
4179 | | vstr d0, FOR_IDX | ||
4180 | | vstr d0, FOR_EXT | ||
4181 | |.endif | ||
4182 | } | ||
3792 | if (op == BC_FORI) { | 4183 | if (op == BC_FORI) { |
3793 | | subhi PC, RC, #0x20000 | 4184 | | subhi PC, RC, #0x20000 |
3794 | } else if (op == BC_JFORI) { | 4185 | } else if (op == BC_JFORI) { |
@@ -3804,6 +4195,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3804 | | ins_next2 | 4195 | | ins_next2 |
3805 | | b <3 | 4196 | | b <3 |
3806 | | | 4197 | | |
4198 | |.if not FPU | ||
3807 | |8: // Invert check for negative step. | 4199 | |8: // Invert check for negative step. |
3808 | if (vk) { | 4200 | if (vk) { |
3809 | | bl extern __aeabi_dadd | 4201 | | bl extern __aeabi_dadd |
@@ -3814,6 +4206,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3814 | | mov CARG4, CARG2 | 4206 | | mov CARG4, CARG2 |
3815 | | ldrd CARG12, FOR_STOP | 4207 | | ldrd CARG12, FOR_STOP |
3816 | | b <6 | 4208 | | b <6 |
4209 | |.endif | ||
3817 | break; | 4210 | break; |
3818 | 4211 | ||
3819 | case BC_ITERL: | 4212 | case BC_ITERL: |
@@ -4048,8 +4441,14 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
4048 | "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ | 4441 | "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ |
4049 | "\t.byte 0x8e\n\t.uleb128 1\n", /* offset lr */ | 4442 | "\t.byte 0x8e\n\t.uleb128 1\n", /* offset lr */ |
4050 | fcofs, CFRAME_SIZE); | 4443 | fcofs, CFRAME_SIZE); |
4051 | for (i = 11; i >= 4; i--) /* offset r4-r11 */ | 4444 | for (i = 11; i >= (LJ_ARCH_HASFPU ? 5 : 4); i--) /* offset r4-r11 */ |
4052 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i)); | 4445 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i)); |
4446 | #if LJ_ARCH_HASFPU | ||
4447 | for (i = 15; i >= 8; i--) /* offset d8-d15 */ | ||
4448 | fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 %d, %d\n", | ||
4449 | 64+2*i, 10+2*(15-i)); | ||
4450 | fprintf(ctx->fp, "\t.byte 0x84\n\t.uleb128 %d\n", 25); /* offset r4 */ | ||
4451 | #endif | ||
4053 | fprintf(ctx->fp, | 4452 | fprintf(ctx->fp, |
4054 | "\t.align 2\n" | 4453 | "\t.align 2\n" |
4055 | ".LEFDE0:\n\n"); | 4454 | ".LEFDE0:\n\n"); |