diff options
| author | Mike Pall <mike> | 2012-07-30 18:59:13 +0200 |
|---|---|---|
| committer | Mike Pall <mike> | 2012-07-30 18:59:13 +0200 |
| commit | a373fddbd3b129f3f95474533e74f0a52744ff8c (patch) | |
| tree | 9dc1e4ee3eae94a289278b246ff659d8b63cae6d /src | |
| parent | 23abbd9ef344289d1dae6d8fcf9d3c0ab8e1e6e1 (diff) | |
| download | luajit-a373fddbd3b129f3f95474533e74f0a52744ff8c.tar.gz luajit-a373fddbd3b129f3f95474533e74f0a52744ff8c.tar.bz2 luajit-a373fddbd3b129f3f95474533e74f0a52744ff8c.zip | |
ARM: Add VFP and hard-float ABI variants to interpreter.
Diffstat (limited to 'src')
| -rw-r--r-- | src/lj_frame.h | 4 | ||||
| -rw-r--r-- | src/lj_target_arm.h | 11 | ||||
| -rw-r--r-- | src/vm_arm.dasc | 447 |
3 files changed, 434 insertions, 28 deletions
diff --git a/src/lj_frame.h b/src/lj_frame.h index b8429c2a..b8af2349 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h | |||
| @@ -97,7 +97,11 @@ enum { | |||
| 97 | #define CFRAME_OFS_L 12 | 97 | #define CFRAME_OFS_L 12 |
| 98 | #define CFRAME_OFS_PC 8 | 98 | #define CFRAME_OFS_PC 8 |
| 99 | #define CFRAME_OFS_MULTRES 4 | 99 | #define CFRAME_OFS_MULTRES 4 |
| 100 | #if LJ_ARCH_HASFPU | ||
| 101 | #define CFRAME_SIZE 128 | ||
| 102 | #else | ||
| 100 | #define CFRAME_SIZE 64 | 103 | #define CFRAME_SIZE 64 |
| 104 | #endif | ||
| 101 | #define CFRAME_SHIFT_MULTRES 3 | 105 | #define CFRAME_SHIFT_MULTRES 3 |
| 102 | #elif LJ_TARGET_PPC | 106 | #elif LJ_TARGET_PPC |
| 103 | #if LJ_ARCH_PPC64 | 107 | #if LJ_ARCH_PPC64 |
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h index a24fc819..20e8ad36 100644 --- a/src/lj_target_arm.h +++ b/src/lj_target_arm.h | |||
| @@ -14,7 +14,9 @@ | |||
| 14 | #if LJ_SOFTFP | 14 | #if LJ_SOFTFP |
| 15 | #define FPRDEF(_) | 15 | #define FPRDEF(_) |
| 16 | #else | 16 | #else |
| 17 | #error "NYI: hard-float support for ARM" | 17 | #define FPRDEF(_) \ |
| 18 | _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \ | ||
| 19 | _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) | ||
| 18 | #endif | 20 | #endif |
| 19 | #define VRIDDEF(_) | 21 | #define VRIDDEF(_) |
| 20 | 22 | ||
| @@ -45,7 +47,7 @@ enum { | |||
| 45 | #if LJ_SOFTFP | 47 | #if LJ_SOFTFP |
| 46 | RID_MAX_FPR = RID_MIN_FPR, | 48 | RID_MAX_FPR = RID_MIN_FPR, |
| 47 | #else | 49 | #else |
| 48 | #error "NYI: VFP support for ARM" | 50 | RID_MAX_FPR = RID_D15+1, |
| 49 | #endif | 51 | #endif |
| 50 | RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, | 52 | RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, |
| 51 | RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR | 53 | RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR |
| @@ -68,7 +70,8 @@ enum { | |||
| 68 | #define RSET_FPR 0 | 70 | #define RSET_FPR 0 |
| 69 | #define RSET_ALL RSET_GPR | 71 | #define RSET_ALL RSET_GPR |
| 70 | #else | 72 | #else |
| 71 | #error "NYI: VFP support for ARM" | 73 | #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) |
| 74 | #define RSET_ALL (RSET_GPR|RSET_FPR) | ||
| 72 | #endif | 75 | #endif |
| 73 | #define RSET_INIT RSET_ALL | 76 | #define RSET_INIT RSET_ALL |
| 74 | 77 | ||
| @@ -82,7 +85,7 @@ enum { | |||
| 82 | #if LJ_SOFTFP | 85 | #if LJ_SOFTFP |
| 83 | #define RSET_SCRATCH_FPR 0 | 86 | #define RSET_SCRATCH_FPR 0 |
| 84 | #else | 87 | #else |
| 85 | #error "NYI: VFP support for ARM" | 88 | #define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1)) |
| 86 | #endif | 89 | #endif |
| 87 | #define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) | 90 | #define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) |
| 88 | #define REGARG_FIRSTGPR RID_R0 | 91 | #define REGARG_FIRSTGPR RID_R0 |
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 8ddce49e..26f97aa3 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc | |||
| @@ -46,6 +46,7 @@ | |||
| 46 | |.define CRET2, r1 | 46 | |.define CRET2, r1 |
| 47 | | | 47 | | |
| 48 | |// Stack layout while in interpreter. Must match with lj_frame.h. | 48 | |// Stack layout while in interpreter. Must match with lj_frame.h. |
| 49 | |.define SAVE_R4, [sp, #28] | ||
| 49 | |.define CFRAME_SPACE, #28 | 50 | |.define CFRAME_SPACE, #28 |
| 50 | |.define SAVE_ERRF, [sp, #24] | 51 | |.define SAVE_ERRF, [sp, #24] |
| 51 | |.define SAVE_NRES, [sp, #20] | 52 | |.define SAVE_NRES, [sp, #20] |
| @@ -60,6 +61,20 @@ | |||
| 60 | |.define TMPD, [sp] | 61 | |.define TMPD, [sp] |
| 61 | |.define TMPDp, sp | 62 | |.define TMPDp, sp |
| 62 | | | 63 | | |
| 64 | |.if FPU | ||
| 65 | |.macro saveregs | ||
| 66 | | push {r5, r6, r7, r8, r9, r10, r11, lr} | ||
| 67 | | vpush {d8-d15} | ||
| 68 | | sub sp, sp, CFRAME_SPACE+4 | ||
| 69 | | str r4, SAVE_R4 | ||
| 70 | |.endmacro | ||
| 71 | |.macro restoreregs_ret | ||
| 72 | | ldr r4, SAVE_R4 | ||
| 73 | | add sp, sp, CFRAME_SPACE+4 | ||
| 74 | | vpop {d8-d15} | ||
| 75 | | pop {r5, r6, r7, r8, r9, r10, r11, pc} | ||
| 76 | |.endmacro | ||
| 77 | |.else | ||
| 63 | |.macro saveregs | 78 | |.macro saveregs |
| 64 | | push {r4, r5, r6, r7, r8, r9, r10, r11, lr} | 79 | | push {r4, r5, r6, r7, r8, r9, r10, r11, lr} |
| 65 | | sub sp, sp, CFRAME_SPACE | 80 | | sub sp, sp, CFRAME_SPACE |
| @@ -68,6 +83,7 @@ | |||
| 68 | | add sp, sp, CFRAME_SPACE | 83 | | add sp, sp, CFRAME_SPACE |
| 69 | | pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} | 84 | | pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} |
| 70 | |.endmacro | 85 | |.endmacro |
| 86 | |.endif | ||
| 71 | | | 87 | | |
| 72 | |// Type definitions. Some of these are only used for documentation. | 88 | |// Type definitions. Some of these are only used for documentation. |
| 73 | |.type L, lua_State, LREG | 89 | |.type L, lua_State, LREG |
| @@ -875,6 +891,29 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 875 | | bhs ->fff_fallback | 891 | | bhs ->fff_fallback |
| 876 | |.endmacro | 892 | |.endmacro |
| 877 | | | 893 | | |
| 894 | |.macro .ffunc_d, name | ||
| 895 | | .ffunc name | ||
| 896 | | ldr CARG2, [BASE, #4] | ||
| 897 | | cmp NARGS8:RC, #8 | ||
| 898 | | vldr d0, [BASE] | ||
| 899 | | blo ->fff_fallback | ||
| 900 | | checktp CARG2, LJ_TISNUM | ||
| 901 | | bhs ->fff_fallback | ||
| 902 | |.endmacro | ||
| 903 | | | ||
| 904 | |.macro .ffunc_dd, name | ||
| 905 | | .ffunc name | ||
| 906 | | ldr CARG2, [BASE, #4] | ||
| 907 | | ldr CARG4, [BASE, #12] | ||
| 908 | | cmp NARGS8:RC, #16 | ||
| 909 | | vldr d0, [BASE] | ||
| 910 | | vldr d1, [BASE, #8] | ||
| 911 | | blo ->fff_fallback | ||
| 912 | | checktp CARG2, LJ_TISNUM | ||
| 913 | | cmnlo CARG4, #-LJ_TISNUM | ||
| 914 | | bhs ->fff_fallback | ||
| 915 | |.endmacro | ||
| 916 | | | ||
| 878 | |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. | 917 | |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. |
| 879 | |.macro ffgccheck | 918 | |.macro ffgccheck |
| 880 | | ldr CARG1, [DISPATCH, #DISPATCH_GL(gc.total)] | 919 | | ldr CARG1, [DISPATCH, #DISPATCH_GL(gc.total)] |
| @@ -1327,8 +1366,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1327 | | movmi CARG1, #0x80000000 | 1366 | | movmi CARG1, #0x80000000 |
| 1328 | | bmi <1 | 1367 | | bmi <1 |
| 1329 | |4: | 1368 | |4: |
| 1369 | |.if HFABI | ||
| 1370 | | vmov d0, CARG1, CARG2 | ||
| 1371 | | bl ->vm_..func.._hf | ||
| 1372 | | b ->fff_resd | ||
| 1373 | |.else | ||
| 1330 | | bl ->vm_..func | 1374 | | bl ->vm_..func |
| 1331 | | b ->fff_restv | 1375 | | b ->fff_restv |
| 1376 | |.endif | ||
| 1332 | |.endmacro | 1377 | |.endmacro |
| 1333 | | | 1378 | | |
| 1334 | | math_round floor | 1379 | | math_round floor |
| @@ -1381,22 +1426,48 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1381 | | b <5 | 1426 | | b <5 |
| 1382 | | | 1427 | | |
| 1383 | |.macro math_extern, func | 1428 | |.macro math_extern, func |
| 1429 | |.if HFABI | ||
| 1430 | | .ffunc_d math_ .. func | ||
| 1431 | |.else | ||
| 1384 | | .ffunc_n math_ .. func | 1432 | | .ffunc_n math_ .. func |
| 1433 | |.endif | ||
| 1385 | | .IOS mov RA, BASE | 1434 | | .IOS mov RA, BASE |
| 1386 | | bl extern func | 1435 | | bl extern func |
| 1387 | | .IOS mov BASE, RA | 1436 | | .IOS mov BASE, RA |
| 1437 | |.if HFABI | ||
| 1438 | | b ->fff_resd | ||
| 1439 | |.else | ||
| 1388 | | b ->fff_restv | 1440 | | b ->fff_restv |
| 1441 | |.endif | ||
| 1389 | |.endmacro | 1442 | |.endmacro |
| 1390 | | | 1443 | | |
| 1391 | |.macro math_extern2, func | 1444 | |.macro math_extern2, func |
| 1445 | |.if HFABI | ||
| 1446 | | .ffunc_dd math_ .. func | ||
| 1447 | |.else | ||
| 1392 | | .ffunc_nn math_ .. func | 1448 | | .ffunc_nn math_ .. func |
| 1449 | |.endif | ||
| 1393 | | .IOS mov RA, BASE | 1450 | | .IOS mov RA, BASE |
| 1394 | | bl extern func | 1451 | | bl extern func |
| 1395 | | .IOS mov BASE, RA | 1452 | | .IOS mov BASE, RA |
| 1453 | |.if HFABI | ||
| 1454 | | b ->fff_resd | ||
| 1455 | |.else | ||
| 1396 | | b ->fff_restv | 1456 | | b ->fff_restv |
| 1457 | |.endif | ||
| 1397 | |.endmacro | 1458 | |.endmacro |
| 1398 | | | 1459 | | |
| 1460 | |.if FPU | ||
| 1461 | | .ffunc_d math_sqrt | ||
| 1462 | | vsqrt.f64 d0, d0 | ||
| 1463 | |->fff_resd: | ||
| 1464 | | ldr PC, [BASE, FRAME_PC] | ||
| 1465 | | vstr d0, [BASE, #-8] | ||
| 1466 | | b ->fff_res1 | ||
| 1467 | |.else | ||
| 1399 | | math_extern sqrt | 1468 | | math_extern sqrt |
| 1469 | |.endif | ||
| 1470 | | | ||
| 1400 | | math_extern log | 1471 | | math_extern log |
| 1401 | | math_extern log10 | 1472 | | math_extern log10 |
| 1402 | | math_extern exp | 1473 | | math_extern exp |
| @@ -1414,11 +1485,34 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1414 | | math_extern2 fmod | 1485 | | math_extern2 fmod |
| 1415 | | | 1486 | | |
| 1416 | |->ff_math_deg: | 1487 | |->ff_math_deg: |
| 1417 | |.ffunc_n math_rad | 1488 | |.if FPU |
| 1489 | | .ffunc_d math_rad | ||
| 1490 | | vldr d1, CFUNC:CARG3->upvalue[0] | ||
| 1491 | | vmul.f64 d0, d0, d1 | ||
| 1492 | | b ->fff_resd | ||
| 1493 | |.else | ||
| 1494 | | .ffunc_n math_rad | ||
| 1418 | | ldrd CARG34, CFUNC:CARG3->upvalue[0] | 1495 | | ldrd CARG34, CFUNC:CARG3->upvalue[0] |
| 1419 | | bl extern __aeabi_dmul | 1496 | | bl extern __aeabi_dmul |
| 1420 | | b ->fff_restv | 1497 | | b ->fff_restv |
| 1498 | |.endif | ||
| 1421 | | | 1499 | | |
| 1500 | |.if HFABI | ||
| 1501 | | .ffunc math_ldexp | ||
| 1502 | | ldr CARG4, [BASE, #4] | ||
| 1503 | | ldrd CARG12, [BASE, #8] | ||
| 1504 | | cmp NARGS8:RC, #16 | ||
| 1505 | | blo ->fff_fallback | ||
| 1506 | | vldr d0, [BASE] | ||
| 1507 | | checktp CARG4, LJ_TISNUM | ||
| 1508 | | bhs ->fff_fallback | ||
| 1509 | | checktp CARG2, LJ_TISNUM | ||
| 1510 | | bne ->fff_fallback | ||
| 1511 | | .IOS mov RA, BASE | ||
| 1512 | | bl extern ldexp // (double x, int exp) | ||
| 1513 | | .IOS mov BASE, RA | ||
| 1514 | | b ->fff_resd | ||
| 1515 | |.else | ||
| 1422 | |.ffunc_2 math_ldexp | 1516 | |.ffunc_2 math_ldexp |
| 1423 | | checktp CARG2, LJ_TISNUM | 1517 | | checktp CARG2, LJ_TISNUM |
| 1424 | | bhs ->fff_fallback | 1518 | | bhs ->fff_fallback |
| @@ -1428,7 +1522,22 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1428 | | bl extern ldexp // (double x, int exp) | 1522 | | bl extern ldexp // (double x, int exp) |
| 1429 | | .IOS mov BASE, RA | 1523 | | .IOS mov BASE, RA |
| 1430 | | b ->fff_restv | 1524 | | b ->fff_restv |
| 1525 | |.endif | ||
| 1431 | | | 1526 | | |
| 1527 | |.if HFABI | ||
| 1528 | |.ffunc_d math_frexp | ||
| 1529 | | mov CARG1, sp | ||
| 1530 | | .IOS mov RA, BASE | ||
| 1531 | | bl extern frexp | ||
| 1532 | | .IOS mov BASE, RA | ||
| 1533 | | ldr CARG3, [sp] | ||
| 1534 | | mvn CARG4, #~LJ_TISNUM | ||
| 1535 | | ldr PC, [BASE, FRAME_PC] | ||
| 1536 | | vstr d0, [BASE, #-8] | ||
| 1537 | | mov RC, #(2+1)*8 | ||
| 1538 | | strd CARG34, [BASE] | ||
| 1539 | | b ->fff_res | ||
| 1540 | |.else | ||
| 1432 | |.ffunc_n math_frexp | 1541 | |.ffunc_n math_frexp |
| 1433 | | mov CARG3, sp | 1542 | | mov CARG3, sp |
| 1434 | | .IOS mov RA, BASE | 1543 | | .IOS mov RA, BASE |
| @@ -1441,7 +1550,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1441 | | mov RC, #(2+1)*8 | 1550 | | mov RC, #(2+1)*8 |
| 1442 | | strd CARG34, [BASE] | 1551 | | strd CARG34, [BASE] |
| 1443 | | b ->fff_res | 1552 | | b ->fff_res |
| 1553 | |.endif | ||
| 1444 | | | 1554 | | |
| 1555 | |.if HFABI | ||
| 1556 | |.ffunc_d math_modf | ||
| 1557 | | sub CARG1, BASE, #8 | ||
| 1558 | | ldr PC, [BASE, FRAME_PC] | ||
| 1559 | | .IOS mov RA, BASE | ||
| 1560 | | bl extern modf | ||
| 1561 | | .IOS mov BASE, RA | ||
| 1562 | | mov RC, #(2+1)*8 | ||
| 1563 | | vstr d0, [BASE] | ||
| 1564 | | b ->fff_res | ||
| 1565 | |.else | ||
| 1445 | |.ffunc_n math_modf | 1566 | |.ffunc_n math_modf |
| 1446 | | sub CARG3, BASE, #8 | 1567 | | sub CARG3, BASE, #8 |
| 1447 | | ldr PC, [BASE, FRAME_PC] | 1568 | | ldr PC, [BASE, FRAME_PC] |
| @@ -1451,8 +1572,56 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1451 | | mov RC, #(2+1)*8 | 1572 | | mov RC, #(2+1)*8 |
| 1452 | | strd CARG12, [BASE] | 1573 | | strd CARG12, [BASE] |
| 1453 | | b ->fff_res | 1574 | | b ->fff_res |
| 1575 | |.endif | ||
| 1454 | | | 1576 | | |
| 1455 | |.macro math_minmax, name, cond, fcond | 1577 | |.macro math_minmax, name, cond, fcond |
| 1578 | |.if FPU | ||
| 1579 | | .ffunc_1 name | ||
| 1580 | | add RB, BASE, RC | ||
| 1581 | | checktp CARG2, LJ_TISNUM | ||
| 1582 | | add RA, BASE, #8 | ||
| 1583 | | bne >4 | ||
| 1584 | |1: // Handle integers. | ||
| 1585 | | ldrd CARG34, [RA] | ||
| 1586 | | cmp RA, RB | ||
| 1587 | | bhs ->fff_restv | ||
| 1588 | | checktp CARG4, LJ_TISNUM | ||
| 1589 | | bne >3 | ||
| 1590 | | cmp CARG1, CARG3 | ||
| 1591 | | add RA, RA, #8 | ||
| 1592 | | mov..cond CARG1, CARG3 | ||
| 1593 | | b <1 | ||
| 1594 | |3: // Convert intermediate result to number and continue below. | ||
| 1595 | | vmov s4, CARG1 | ||
| 1596 | | bhi ->fff_fallback | ||
| 1597 | | vldr d1, [RA] | ||
| 1598 | | vcvt.f64.s32 d0, s4 | ||
| 1599 | | b >6 | ||
| 1600 | | | ||
| 1601 | |4: | ||
| 1602 | | vldr d0, [BASE] | ||
| 1603 | | bhi ->fff_fallback | ||
| 1604 | |5: // Handle numbers. | ||
| 1605 | | ldrd CARG34, [RA] | ||
| 1606 | | vldr d1, [RA] | ||
| 1607 | | cmp RA, RB | ||
| 1608 | | bhs ->fff_resd | ||
| 1609 | | checktp CARG4, LJ_TISNUM | ||
| 1610 | | bhs >7 | ||
| 1611 | |6: | ||
| 1612 | | vcmp.f64 d0, d1 | ||
| 1613 | | vmrs | ||
| 1614 | | add RA, RA, #8 | ||
| 1615 | | vmov..fcond.f64 d0, d1 | ||
| 1616 | | b <5 | ||
| 1617 | |7: // Convert integer to number and continue above. | ||
| 1618 | | vmov s4, CARG3 | ||
| 1619 | | bhi ->fff_fallback | ||
| 1620 | | vcvt.f64.s32 d1, s4 | ||
| 1621 | | b <6 | ||
| 1622 | | | ||
| 1623 | |.else | ||
| 1624 | | | ||
| 1456 | | .ffunc_1 name | 1625 | | .ffunc_1 name |
| 1457 | | checktp CARG2, LJ_TISNUM | 1626 | | checktp CARG2, LJ_TISNUM |
| 1458 | | mov RA, #8 | 1627 | | mov RA, #8 |
| @@ -1467,9 +1636,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1467 | | add RA, RA, #8 | 1636 | | add RA, RA, #8 |
| 1468 | | mov..cond CARG1, CARG3 | 1637 | | mov..cond CARG1, CARG3 |
| 1469 | | b <1 | 1638 | | b <1 |
| 1470 | |3: | 1639 | |3: // Convert intermediate result to number and continue below. |
| 1471 | | bhi ->fff_fallback | 1640 | | bhi ->fff_fallback |
| 1472 | | // Convert intermediate result to number and continue below. | ||
| 1473 | | bl extern __aeabi_i2d | 1641 | | bl extern __aeabi_i2d |
| 1474 | | ldrd CARG34, [BASE, RA] | 1642 | | ldrd CARG34, [BASE, RA] |
| 1475 | | b >6 | 1643 | | b >6 |
| @@ -1495,6 +1663,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1495 | | bl extern __aeabi_i2d | 1663 | | bl extern __aeabi_i2d |
| 1496 | | ldrd CARG34, TMPD | 1664 | | ldrd CARG34, TMPD |
| 1497 | | b <6 | 1665 | | b <6 |
| 1666 | |.endif | ||
| 1498 | |.endmacro | 1667 | |.endmacro |
| 1499 | | | 1668 | | |
| 1500 | | math_minmax math_min, gt, hi | 1669 | | math_minmax math_min, gt, hi |
| @@ -1959,6 +2128,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1959 | | ldr CARG2, [CARG1, #-4]! // Get exit instruction. | 2128 | | ldr CARG2, [CARG1, #-4]! // Get exit instruction. |
| 1960 | | str CARG1, [sp, #56] // Store exit pc in RID_LR and RID_PC. | 2129 | | str CARG1, [sp, #56] // Store exit pc in RID_LR and RID_PC. |
| 1961 | | str CARG1, [sp, #60] | 2130 | | str CARG1, [sp, #60] |
| 2131 | |.if FPU | ||
| 2132 | | vpush {d0-d15} | ||
| 2133 | |.endif | ||
| 1962 | | lsl CARG2, CARG2, #8 | 2134 | | lsl CARG2, CARG2, #8 |
| 1963 | | add CARG1, CARG1, CARG2, asr #6 | 2135 | | add CARG1, CARG1, CARG2, asr #6 |
| 1964 | | ldr CARG2, [lr, #4] // Load exit stub group offset. | 2136 | | ldr CARG2, [lr, #4] // Load exit stub group offset. |
| @@ -2025,8 +2197,53 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2025 | |// FP value rounding. Called from JIT code. | 2197 | |// FP value rounding. Called from JIT code. |
| 2026 | |// | 2198 | |// |
| 2027 | |// double lj_vm_floor/ceil/trunc(double x); | 2199 | |// double lj_vm_floor/ceil/trunc(double x); |
| 2028 | |.macro vm_round, func | 2200 | |.macro vm_round, func, hf |
| 2029 | |->vm_ .. func: | 2201 | |.if FPU |
| 2202 | |.if hf == 0 | ||
| 2203 | | vmov d0, CARG1, CARG2 | ||
| 2204 | | vldr d2, <8 // 2^52 | ||
| 2205 | |.else | ||
| 2206 | | vldr d2, <8 // 2^52 | ||
| 2207 | | vmov CARG1, CARG2, d0 | ||
| 2208 | |.endif | ||
| 2209 | | vabs.f64 d1, d0 | ||
| 2210 | | vcmp.f64 d1, d2 // |x| >= 2^52 or NaN? | ||
| 2211 | | vmrs | ||
| 2212 | |.if "func" == "trunc" | ||
| 2213 | | vadd.f64 d0, d1, d2 | ||
| 2214 | | bxpl lr // Return argument unchanged. | ||
| 2215 | | vsub.f64 d0, d0, d2 // (|x| + 2^52) - 2^52 | ||
| 2216 | | vldr d2, <9 // +1.0 | ||
| 2217 | | vcmp.f64 d1, d0 // |x| < result: subtract +1.0 | ||
| 2218 | | vmrs | ||
| 2219 | | vsubmi.f64 d0, d1, d2 | ||
| 2220 | | cmp CARG2, #0 | ||
| 2221 | | vnegmi.f64 d0, d0 // Merge sign bit back in. | ||
| 2222 | |.else | ||
| 2223 | | vadd.f64 d1, d1, d2 | ||
| 2224 | | bxpl lr // Return argument unchanged. | ||
| 2225 | | cmp CARG2, #0 | ||
| 2226 | | vsub.f64 d1, d1, d2 // (|x| + 2^52) - 2^52 | ||
| 2227 | | vldr d2, <9 // +1.0 | ||
| 2228 | | vnegmi.f64 d1, d1 // Merge sign bit back in. | ||
| 2229 | |.if "func" == "floor" | ||
| 2230 | | vcmp.f64 d0, d1 // x < result: subtract +1.0. | ||
| 2231 | | vmrs | ||
| 2232 | | vsubmi.f64 d0, d1, d2 | ||
| 2233 | |.else | ||
| 2234 | | vcmp.f64 d1, d0 // x > result: add +1.0. | ||
| 2235 | | vmrs | ||
| 2236 | | vaddmi.f64 d0, d1, d2 | ||
| 2237 | |.endif | ||
| 2238 | | vmovpl.f64 d0, d1 | ||
| 2239 | |.endif | ||
| 2240 | |.if hf == 0 | ||
| 2241 | | vmov CARG1, CARG2, d0 | ||
| 2242 | |.endif | ||
| 2243 | | bx lr | ||
| 2244 | | | ||
| 2245 | |.else | ||
| 2246 | | | ||
| 2030 | | lsl CARG3, CARG2, #1 | 2247 | | lsl CARG3, CARG2, #1 |
| 2031 | | adds RB, CARG3, #0x00200000 | 2248 | | adds RB, CARG3, #0x00200000 |
| 2032 | | bpl >2 // |x| < 1? | 2249 | | bpl >2 // |x| < 1? |
| @@ -2069,15 +2286,40 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2069 | | ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0) | 2286 | | ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0) |
| 2070 | | orrne CARG2, CARG2, CARG4 | 2287 | | orrne CARG2, CARG2, CARG4 |
| 2071 | | bx lr | 2288 | | bx lr |
| 2289 | |.endif | ||
| 2072 | |.endmacro | 2290 | |.endmacro |
| 2073 | | | 2291 | | |
| 2292 | |.if FPU | ||
| 2293 | |.align 8 | ||
| 2294 | |9: | ||
| 2295 | | .long 0, 0x3ff00000 // +1.0 | ||
| 2296 | |8: | ||
| 2297 | | .long 0, 0x43300000 // 2^52 | ||
| 2298 | |.else | ||
| 2074 | |9: | 2299 | |9: |
| 2075 | | .long 0x3ff00000 // hiword(1.0) | 2300 | | .long 0x3ff00000 // hiword(+1.0) |
| 2076 | | vm_round floor | 2301 | |.endif |
| 2077 | | vm_round ceil | 2302 | | |
| 2303 | |->vm_floor: | ||
| 2304 | |.if not HFABI | ||
| 2305 | | vm_round floor, 0 | ||
| 2306 | |.endif | ||
| 2307 | |->vm_floor_hf: | ||
| 2308 | |.if FPU | ||
| 2309 | | vm_round floor, 1 | ||
| 2310 | |.endif | ||
| 2311 | | | ||
| 2312 | |->vm_ceil: | ||
| 2313 | |.if not HFABI | ||
| 2314 | | vm_round ceil, 0 | ||
| 2315 | |.endif | ||
| 2316 | |->vm_ceil_hf: | ||
| 2317 | |.if FPU | ||
| 2318 | | vm_round ceil, 1 | ||
| 2319 | |.endif | ||
| 2078 | | | 2320 | | |
| 2079 | |->vm_trunc: | 2321 | |->vm_trunc: |
| 2080 | |.if JIT | 2322 | |.if JIT and not HFABI |
| 2081 | | lsl CARG3, CARG2, #1 | 2323 | | lsl CARG3, CARG2, #1 |
| 2082 | | adds RB, CARG3, #0x00200000 | 2324 | | adds RB, CARG3, #0x00200000 |
| 2083 | | andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0. | 2325 | | andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0. |
| @@ -2093,8 +2335,23 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2093 | | bx lr | 2335 | | bx lr |
| 2094 | |.endif | 2336 | |.endif |
| 2095 | | | 2337 | | |
| 2338 | |->vm_trunc_hf: | ||
| 2339 | |.if JIT and FPU | ||
| 2340 | | vm_round trunc, 1 | ||
| 2341 | |.endif | ||
| 2342 | | | ||
| 2096 | | // double lj_vm_mod(double dividend, double divisor); | 2343 | | // double lj_vm_mod(double dividend, double divisor); |
| 2097 | |->vm_mod: | 2344 | |->vm_mod: |
| 2345 | |.if FPU | ||
| 2346 | | // Special calling convention. Also, RC (r11) is not preserved. | ||
| 2347 | | vdiv.f64 d0, d6, d7 | ||
| 2348 | | mov RC, lr | ||
| 2349 | | bl ->vm_floor_hf | ||
| 2350 | | vmul.f64 d0, d0, d7 | ||
| 2351 | | mov lr, RC | ||
| 2352 | | vsub.f64 d6, d6, d0 | ||
| 2353 | | bx lr | ||
| 2354 | |.else | ||
| 2098 | | push {r0, r1, r2, r3, r4, lr} | 2355 | | push {r0, r1, r2, r3, r4, lr} |
| 2099 | | bl extern __aeabi_ddiv | 2356 | | bl extern __aeabi_ddiv |
| 2100 | | bl ->vm_floor | 2357 | | bl ->vm_floor |
| @@ -2105,6 +2362,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2105 | | bl extern __aeabi_dadd | 2362 | | bl extern __aeabi_dadd |
| 2106 | | add sp, sp, #20 | 2363 | | add sp, sp, #20 |
| 2107 | | pop {pc} | 2364 | | pop {pc} |
| 2365 | |.endif | ||
| 2108 | | | 2366 | | |
| 2109 | | // int lj_vm_modi(int dividend, int divisor); | 2367 | | // int lj_vm_modi(int dividend, int divisor); |
| 2110 | |->vm_modi: | 2368 | |->vm_modi: |
| @@ -2266,6 +2524,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 2266 | | ins_next | 2524 | | ins_next |
| 2267 | | | 2525 | | |
| 2268 | |3: // CARG12 is not an integer. | 2526 | |3: // CARG12 is not an integer. |
| 2527 | |.if FPU | ||
| 2528 | | vldr d0, [RA] | ||
| 2529 | | bhi ->vmeta_comp | ||
| 2530 | | // d0 is a number. | ||
| 2531 | | checktp CARG4, LJ_TISNUM | ||
| 2532 | | vldr d1, [RC] | ||
| 2533 | | blo >5 | ||
| 2534 | | // d0 is a number, CARG3 is an integer. | ||
| 2535 | | vmov s4, CARG3 | ||
| 2536 | | vcvt.f64.s32 d1, s4 | ||
| 2537 | | b >5 | ||
| 2538 | |4: // CARG1 is an integer, CARG34 is not an integer. | ||
| 2539 | | vldr d1, [RC] | ||
| 2540 | | bhi ->vmeta_comp | ||
| 2541 | | // CARG1 is an integer, d1 is a number. | ||
| 2542 | | vmov s4, CARG1 | ||
| 2543 | | vcvt.f64.s32 d0, s4 | ||
| 2544 | |5: // d0 and d1 are numbers. | ||
| 2545 | | vcmp.f64 d0, d1 | ||
| 2546 | | vmrs | ||
| 2547 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. | ||
| 2548 | if (op == BC_ISLT) { | ||
| 2549 | | sublo PC, RB, #0x20000 | ||
| 2550 | } else if (op == BC_ISGE) { | ||
| 2551 | | subhs PC, RB, #0x20000 | ||
| 2552 | } else if (op == BC_ISLE) { | ||
| 2553 | | subls PC, RB, #0x20000 | ||
| 2554 | } else { | ||
| 2555 | | subhi PC, RB, #0x20000 | ||
| 2556 | } | ||
| 2557 | | b <1 | ||
| 2558 | |.else | ||
| 2269 | | bhi ->vmeta_comp | 2559 | | bhi ->vmeta_comp |
| 2270 | | // CARG12 is a number. | 2560 | | // CARG12 is a number. |
| 2271 | | checktp CARG4, LJ_TISNUM | 2561 | | checktp CARG4, LJ_TISNUM |
| @@ -2282,7 +2572,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 2282 | | b >5 | 2572 | | b >5 |
| 2283 | |4: // CARG1 is an integer, CARG34 is not an integer. | 2573 | |4: // CARG1 is an integer, CARG34 is not an integer. |
| 2284 | | bhi ->vmeta_comp | 2574 | | bhi ->vmeta_comp |
| 2285 | | // CARG1 is an integer, CARG34 is a number | 2575 | | // CARG1 is an integer, CARG34 is a number. |
| 2286 | | mov RA, RB // Save RB. | 2576 | | mov RA, RB // Save RB. |
| 2287 | | bl extern __aeabi_i2d | 2577 | | bl extern __aeabi_i2d |
| 2288 | | ldrd CARG34, [RC] // Restore second operand. | 2578 | | ldrd CARG34, [RC] // Restore second operand. |
| @@ -2299,6 +2589,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 2299 | | subhi PC, RA, #0x20000 | 2589 | | subhi PC, RA, #0x20000 |
| 2300 | } | 2590 | } |
| 2301 | | b <1 | 2591 | | b <1 |
| 2592 | |.endif | ||
| 2302 | break; | 2593 | break; |
| 2303 | 2594 | ||
| 2304 | case BC_ISEQV: case BC_ISNEV: | 2595 | case BC_ISEQV: case BC_ISNEV: |
| @@ -2439,6 +2730,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 2439 | } | 2730 | } |
| 2440 | | bhi <2 | 2731 | | bhi <2 |
| 2441 | |.endif | 2732 | |.endif |
| 2733 | |.if FPU | ||
| 2734 | | checktp CARG4, LJ_TISNUM | ||
| 2735 | | vmov s4, CARG3 | ||
| 2736 | | vldr d0, [RA] | ||
| 2737 | | vldrlo d1, [RC] | ||
| 2738 | | vcvths.f64.s32 d1, s4 | ||
| 2739 | | b >5 | ||
| 2740 | |4: // CARG1 is an integer, d1 is a number. | ||
| 2741 | | vmov s4, CARG1 | ||
| 2742 | | vldr d1, [RC] | ||
| 2743 | | vcvt.f64.s32 d0, s4 | ||
| 2744 | |5: // d0 and d1 are numbers. | ||
| 2745 | | vcmp.f64 d0, d1 | ||
| 2746 | | vmrs | ||
| 2747 | if (vk) { | ||
| 2748 | | subeq PC, RB, #0x20000 | ||
| 2749 | } else { | ||
| 2750 | | subne PC, RB, #0x20000 | ||
| 2751 | } | ||
| 2752 | | b <2 | ||
| 2753 | |.else | ||
| 2442 | | // CARG12 is a number. | 2754 | | // CARG12 is a number. |
| 2443 | | checktp CARG4, LJ_TISNUM | 2755 | | checktp CARG4, LJ_TISNUM |
| 2444 | | movlo RA, RB // Save RB. | 2756 | | movlo RA, RB // Save RB. |
| @@ -2458,6 +2770,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 2458 | | subne PC, RA, #0x20000 | 2770 | | subne PC, RA, #0x20000 |
| 2459 | } | 2771 | } |
| 2460 | | b <2 | 2772 | | b <2 |
| 2773 | |.endif | ||
| 2461 | | | 2774 | | |
| 2462 | |.if FFI | 2775 | |.if FFI |
| 2463 | |7: | 2776 | |7: |
| @@ -2617,20 +2930,55 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 2617 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | 2930 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); |
| 2618 | ||switch (vk) { | 2931 | ||switch (vk) { |
| 2619 | ||case 0: | 2932 | ||case 0: |
| 2933 | | .if FPU | ||
| 2934 | | ldrd CARG12, [RB, BASE]! | ||
| 2935 | | ldrd CARG34, [RC, KBASE]! | ||
| 2936 | | .else | ||
| 2620 | | ldrd CARG12, [BASE, RB] | 2937 | | ldrd CARG12, [BASE, RB] |
| 2621 | | ldrd CARG34, [KBASE, RC] | 2938 | | ldrd CARG34, [KBASE, RC] |
| 2939 | | .endif | ||
| 2622 | || break; | 2940 | || break; |
| 2623 | ||case 1: | 2941 | ||case 1: |
| 2942 | | .if FPU | ||
| 2943 | | ldrd CARG34, [RB, BASE]! | ||
| 2944 | | ldrd CARG12, [RC, KBASE]! | ||
| 2945 | | .else | ||
| 2624 | | ldrd CARG34, [BASE, RB] | 2946 | | ldrd CARG34, [BASE, RB] |
| 2625 | | ldrd CARG12, [KBASE, RC] | 2947 | | ldrd CARG12, [KBASE, RC] |
| 2948 | | .endif | ||
| 2626 | || break; | 2949 | || break; |
| 2627 | ||default: | 2950 | ||default: |
| 2951 | | .if FPU | ||
| 2952 | | ldrd CARG12, [RB, BASE]! | ||
| 2953 | | ldrd CARG34, [RC, BASE]! | ||
| 2954 | | .else | ||
| 2628 | | ldrd CARG12, [BASE, RB] | 2955 | | ldrd CARG12, [BASE, RB] |
| 2629 | | ldrd CARG34, [BASE, RC] | 2956 | | ldrd CARG34, [BASE, RC] |
| 2957 | | .endif | ||
| 2630 | || break; | 2958 | || break; |
| 2631 | ||} | 2959 | ||} |
| 2632 | |.endmacro | 2960 | |.endmacro |
| 2633 | | | 2961 | | |
| 2962 | |.macro ins_arithpre_fpu, reg1, reg2 | ||
| 2963 | |.if FPU | ||
| 2964 | ||if (vk == 1) { | ||
| 2965 | | vldr reg2, [RB] | ||
| 2966 | | vldr reg1, [RC] | ||
| 2967 | ||} else { | ||
| 2968 | | vldr reg1, [RB] | ||
| 2969 | | vldr reg2, [RC] | ||
| 2970 | ||} | ||
| 2971 | |.endif | ||
| 2972 | |.endmacro | ||
| 2973 | | | ||
| 2974 | |.macro ins_arithpost_fpu, reg | ||
| 2975 | | ins_next1 | ||
| 2976 | | add RA, BASE, RA | ||
| 2977 | | ins_next2 | ||
| 2978 | | vstr reg, [RA] | ||
| 2979 | | ins_next3 | ||
| 2980 | |.endmacro | ||
| 2981 | | | ||
| 2634 | |.macro ins_arithfallback, ins | 2982 | |.macro ins_arithfallback, ins |
| 2635 | ||switch (vk) { | 2983 | ||switch (vk) { |
| 2636 | ||case 0: | 2984 | ||case 0: |
| @@ -2645,9 +2993,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 2645 | ||} | 2993 | ||} |
| 2646 | |.endmacro | 2994 | |.endmacro |
| 2647 | | | 2995 | | |
| 2648 | |.macro ins_arithdn, intins, fpcall | 2996 | |.macro ins_arithdn, intins, fpins, fpcall |
| 2649 | | ins_arithpre | 2997 | | ins_arithpre |
| 2650 | |.if "intins" ~= "vm_modi" | 2998 | |.if "intins" ~= "vm_modi" and not FPU |
| 2651 | | ins_next1 | 2999 | | ins_next1 |
| 2652 | |.endif | 3000 | |.endif |
| 2653 | | ins_arithcheck_int >5 | 3001 | | ins_arithcheck_int >5 |
| @@ -2665,57 +3013,74 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 2665 | | ins_arithfallback bvs | 3013 | | ins_arithfallback bvs |
| 2666 | |.endif | 3014 | |.endif |
| 2667 | |4: | 3015 | |4: |
| 2668 | |.if "intins" == "vm_modi" | 3016 | |.if "intins" == "vm_modi" or FPU |
| 2669 | | ins_next1 | 3017 | | ins_next1 |
| 2670 | |.endif | 3018 | |.endif |
| 2671 | | ins_next2 | 3019 | | ins_next2 |
| 2672 | | strd CARG12, [BASE, RA] | 3020 | | strd CARG12, [BASE, RA] |
| 2673 | | ins_next3 | 3021 | | ins_next3 |
| 2674 | |5: // FP variant. | 3022 | |5: // FP variant. |
| 3023 | | ins_arithpre_fpu d6, d7 | ||
| 2675 | | ins_arithfallback ins_arithcheck_num | 3024 | | ins_arithfallback ins_arithcheck_num |
| 3025 | |.if FPU | ||
| 2676 | |.if "intins" == "vm_modi" | 3026 | |.if "intins" == "vm_modi" |
| 2677 | | bl fpcall | 3027 | | bl fpcall |
| 2678 | |.else | 3028 | |.else |
| 3029 | | fpins d6, d6, d7 | ||
| 3030 | |.endif | ||
| 3031 | | ins_arithpost_fpu d6 | ||
| 3032 | |.else | ||
| 2679 | | bl fpcall | 3033 | | bl fpcall |
| 2680 | | ins_next1 | 3034 | |.if "intins" ~= "vm_modi" |
| 3035 | | ins_next1 | ||
| 2681 | |.endif | 3036 | |.endif |
| 2682 | | b <4 | 3037 | | b <4 |
| 3038 | |.endif | ||
| 2683 | |.endmacro | 3039 | |.endmacro |
| 2684 | | | 3040 | | |
| 2685 | |.macro ins_arithfp, fpcall | 3041 | |.macro ins_arithfp, fpins, fpcall |
| 2686 | | ins_arithpre | 3042 | | ins_arithpre |
| 3043 | |.if "fpins" ~= "extern" or HFABI | ||
| 3044 | | ins_arithpre_fpu d0, d1 | ||
| 3045 | |.endif | ||
| 2687 | | ins_arithfallback ins_arithcheck_num | 3046 | | ins_arithfallback ins_arithcheck_num |
| 2688 | |.if "fpcall" == "extern pow" | 3047 | |.if "fpins" == "extern" |
| 2689 | | .IOS mov RC, BASE | 3048 | | .IOS mov RC, BASE |
| 2690 | | bl fpcall | 3049 | | bl fpcall |
| 2691 | | .IOS mov BASE, RC | 3050 | | .IOS mov BASE, RC |
| 3051 | |.elif FPU | ||
| 3052 | | fpins d0, d0, d1 | ||
| 2692 | |.else | 3053 | |.else |
| 2693 | | bl fpcall | 3054 | | bl fpcall |
| 2694 | |.endif | 3055 | |.endif |
| 3056 | |.if ("fpins" ~= "extern" or HFABI) and FPU | ||
| 3057 | | ins_arithpost_fpu d0 | ||
| 3058 | |.else | ||
| 2695 | | ins_next1 | 3059 | | ins_next1 |
| 2696 | | ins_next2 | 3060 | | ins_next2 |
| 2697 | | strd CARG12, [BASE, RA] | 3061 | | strd CARG12, [BASE, RA] |
| 2698 | | ins_next3 | 3062 | | ins_next3 |
| 3063 | |.endif | ||
| 2699 | |.endmacro | 3064 | |.endmacro |
| 2700 | 3065 | ||
| 2701 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | 3066 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: |
| 2702 | | ins_arithdn adds, extern __aeabi_dadd | 3067 | | ins_arithdn adds, vadd.f64, extern __aeabi_dadd |
| 2703 | break; | 3068 | break; |
| 2704 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | 3069 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: |
| 2705 | | ins_arithdn subs, extern __aeabi_dsub | 3070 | | ins_arithdn subs, vsub.f64, extern __aeabi_dsub |
| 2706 | break; | 3071 | break; |
| 2707 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | 3072 | case BC_MULVN: case BC_MULNV: case BC_MULVV: |
| 2708 | | ins_arithdn smull, extern __aeabi_dmul | 3073 | | ins_arithdn smull, vmul.f64, extern __aeabi_dmul |
| 2709 | break; | 3074 | break; |
| 2710 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | 3075 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: |
| 2711 | | ins_arithfp extern __aeabi_ddiv | 3076 | | ins_arithfp vdiv.f64, extern __aeabi_ddiv |
| 2712 | break; | 3077 | break; |
| 2713 | case BC_MODVN: case BC_MODNV: case BC_MODVV: | 3078 | case BC_MODVN: case BC_MODNV: case BC_MODVV: |
| 2714 | | ins_arithdn vm_modi, ->vm_mod | 3079 | | ins_arithdn vm_modi, vm_mod, ->vm_mod |
| 2715 | break; | 3080 | break; |
| 2716 | case BC_POW: | 3081 | case BC_POW: |
| 2717 | | // NYI: (partial) integer arithmetic. | 3082 | | // NYI: (partial) integer arithmetic. |
| 2718 | | ins_arithfp extern pow | 3083 | | ins_arithfp extern, extern pow |
| 2719 | break; | 3084 | break; |
| 2720 | 3085 | ||
| 2721 | case BC_CAT: | 3086 | case BC_CAT: |
| @@ -3775,20 +4140,46 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 3775 | | cmnlo CARG4, #-LJ_TISNUM | 4140 | | cmnlo CARG4, #-LJ_TISNUM |
| 3776 | | cmnlo RB, #-LJ_TISNUM | 4141 | | cmnlo RB, #-LJ_TISNUM |
| 3777 | | bhs ->vmeta_for | 4142 | | bhs ->vmeta_for |
| 4143 | |.if FPU | ||
| 4144 | | vldr d0, FOR_IDX | ||
| 4145 | | vldr d1, FOR_STOP | ||
| 4146 | | cmp RB, #0 | ||
| 4147 | | vstr d0, FOR_EXT | ||
| 4148 | |.else | ||
| 3778 | | cmp RB, #0 | 4149 | | cmp RB, #0 |
| 3779 | | strd CARG12, FOR_IDX | ||
| 3780 | | strd CARG12, FOR_EXT | 4150 | | strd CARG12, FOR_EXT |
| 3781 | | blt >8 | 4151 | | blt >8 |
| 4152 | |.endif | ||
| 3782 | } else { | 4153 | } else { |
| 4154 | |.if FPU | ||
| 4155 | | vldr d0, FOR_IDX | ||
| 4156 | | vldr d2, FOR_STEP | ||
| 4157 | | vldr d1, FOR_STOP | ||
| 4158 | | cmp CARG4, #0 | ||
| 4159 | | vadd.f64 d0, d0, d2 | ||
| 4160 | |.else | ||
| 3783 | | cmp CARG4, #0 | 4161 | | cmp CARG4, #0 |
| 3784 | | blt >8 | 4162 | | blt >8 |
| 3785 | | bl extern __aeabi_dadd | 4163 | | bl extern __aeabi_dadd |
| 3786 | | strd CARG12, FOR_IDX | 4164 | | strd CARG12, FOR_IDX |
| 3787 | | ldrd CARG34, FOR_STOP | 4165 | | ldrd CARG34, FOR_STOP |
| 3788 | | strd CARG12, FOR_EXT | 4166 | | strd CARG12, FOR_EXT |
| 4167 | |.endif | ||
| 3789 | } | 4168 | } |
| 3790 | |6: | 4169 | |6: |
| 4170 | |.if FPU | ||
| 4171 | | vcmpge.f64 d0, d1 | ||
| 4172 | | vcmplt.f64 d1, d0 | ||
| 4173 | | vmrs | ||
| 4174 | |.else | ||
| 3791 | | bl extern __aeabi_cdcmple | 4175 | | bl extern __aeabi_cdcmple |
| 4176 | |.endif | ||
| 4177 | if (vk) { | ||
| 4178 | |.if FPU | ||
| 4179 | | vstr d0, FOR_IDX | ||
| 4180 | | vstr d0, FOR_EXT | ||
| 4181 | |.endif | ||
| 4182 | } | ||
| 3792 | if (op == BC_FORI) { | 4183 | if (op == BC_FORI) { |
| 3793 | | subhi PC, RC, #0x20000 | 4184 | | subhi PC, RC, #0x20000 |
| 3794 | } else if (op == BC_JFORI) { | 4185 | } else if (op == BC_JFORI) { |
| @@ -3804,6 +4195,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 3804 | | ins_next2 | 4195 | | ins_next2 |
| 3805 | | b <3 | 4196 | | b <3 |
| 3806 | | | 4197 | | |
| 4198 | |.if not FPU | ||
| 3807 | |8: // Invert check for negative step. | 4199 | |8: // Invert check for negative step. |
| 3808 | if (vk) { | 4200 | if (vk) { |
| 3809 | | bl extern __aeabi_dadd | 4201 | | bl extern __aeabi_dadd |
| @@ -3814,6 +4206,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 3814 | | mov CARG4, CARG2 | 4206 | | mov CARG4, CARG2 |
| 3815 | | ldrd CARG12, FOR_STOP | 4207 | | ldrd CARG12, FOR_STOP |
| 3816 | | b <6 | 4208 | | b <6 |
| 4209 | |.endif | ||
| 3817 | break; | 4210 | break; |
| 3818 | 4211 | ||
| 3819 | case BC_ITERL: | 4212 | case BC_ITERL: |
| @@ -4048,8 +4441,14 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
| 4048 | "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ | 4441 | "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ |
| 4049 | "\t.byte 0x8e\n\t.uleb128 1\n", /* offset lr */ | 4442 | "\t.byte 0x8e\n\t.uleb128 1\n", /* offset lr */ |
| 4050 | fcofs, CFRAME_SIZE); | 4443 | fcofs, CFRAME_SIZE); |
| 4051 | for (i = 11; i >= 4; i--) /* offset r4-r11 */ | 4444 | for (i = 11; i >= (LJ_ARCH_HASFPU ? 5 : 4); i--) /* offset r4-r11 */ |
| 4052 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i)); | 4445 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i)); |
| 4446 | #if LJ_ARCH_HASFPU | ||
| 4447 | for (i = 15; i >= 8; i--) /* offset d8-d15 */ | ||
| 4448 | fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 %d, %d\n", | ||
| 4449 | 64+2*i, 10+2*(15-i)); | ||
| 4450 | fprintf(ctx->fp, "\t.byte 0x84\n\t.uleb128 %d\n", 25); /* offset r4 */ | ||
| 4451 | #endif | ||
| 4053 | fprintf(ctx->fp, | 4452 | fprintf(ctx->fp, |
| 4054 | "\t.align 2\n" | 4453 | "\t.align 2\n" |
| 4055 | ".LEFDE0:\n\n"); | 4454 | ".LEFDE0:\n\n"); |
