diff options
author | Mike Pall <mike> | 2022-01-24 14:37:50 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2022-01-24 14:37:50 +0100 |
commit | 9512d5c1aced61e13e7be2d3208ec7ae3516b458 (patch) | |
tree | c31882578a670847adb37475362b7d21ae9bc099 /src/vm_x64.dasc | |
parent | c18acfe7565b9b20be0a73563f535766233ad78a (diff) | |
download | luajit-9512d5c1aced61e13e7be2d3208ec7ae3516b458.tar.gz luajit-9512d5c1aced61e13e7be2d3208ec7ae3516b458.tar.bz2 luajit-9512d5c1aced61e13e7be2d3208ec7ae3516b458.zip |
Fix pow() optimization inconsistencies.
Diffstat (limited to 'src/vm_x64.dasc')
-rw-r--r-- | src/vm_x64.dasc | 44 |
1 files changed, 6 insertions, 38 deletions
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index b222190a..4aa8589c 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc | |||
@@ -1755,13 +1755,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
1755 | | jmp ->fff_resxmm0 | 1755 | | jmp ->fff_resxmm0 |
1756 | |.endmacro | 1756 | |.endmacro |
1757 | | | 1757 | | |
1758 | |.macro math_extern2, func | 1758 | |.macro math_extern2, name, func |
1759 | | .ffunc_nn math_ .. func | 1759 | | .ffunc_nn math_ .. name |
1760 | | mov RB, BASE | 1760 | | mov RB, BASE |
1761 | | call extern func | 1761 | | call extern func |
1762 | | mov BASE, RB | 1762 | | mov BASE, RB |
1763 | | jmp ->fff_resxmm0 | 1763 | | jmp ->fff_resxmm0 |
1764 | |.endmacro | 1764 | |.endmacro |
1765 | |.macro math_extern2, func | ||
1766 | | math_extern2 func, func | ||
1767 | |.endmacro | ||
1765 | | | 1768 | | |
1766 | | math_extern log10 | 1769 | | math_extern log10 |
1767 | | math_extern exp | 1770 | | math_extern exp |
@@ -1774,7 +1777,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1774 | | math_extern sinh | 1777 | | math_extern sinh |
1775 | | math_extern cosh | 1778 | | math_extern cosh |
1776 | | math_extern tanh | 1779 | | math_extern tanh |
1777 | | math_extern2 pow | 1780 | | math_extern2 pow, lj_vm_pow |
1778 | | math_extern2 atan2 | 1781 | | math_extern2 atan2 |
1779 | | math_extern2 fmod | 1782 | | math_extern2 fmod |
1780 | | | 1783 | | |
@@ -2579,41 +2582,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
2579 | | subsd xmm0, xmm1 | 2582 | | subsd xmm0, xmm1 |
2580 | | ret | 2583 | | ret |
2581 | | | 2584 | | |
2582 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. | ||
2583 | |->vm_powi_sse: | ||
2584 | | cmp eax, 1; jle >6 // i<=1? | ||
2585 | | // Now 1 < (unsigned)i <= 0x80000000. | ||
2586 | |1: // Handle leading zeros. | ||
2587 | | test eax, 1; jnz >2 | ||
2588 | | mulsd xmm0, xmm0 | ||
2589 | | shr eax, 1 | ||
2590 | | jmp <1 | ||
2591 | |2: | ||
2592 | | shr eax, 1; jz >5 | ||
2593 | | movaps xmm1, xmm0 | ||
2594 | |3: // Handle trailing bits. | ||
2595 | | mulsd xmm0, xmm0 | ||
2596 | | shr eax, 1; jz >4 | ||
2597 | | jnc <3 | ||
2598 | | mulsd xmm1, xmm0 | ||
2599 | | jmp <3 | ||
2600 | |4: | ||
2601 | | mulsd xmm0, xmm1 | ||
2602 | |5: | ||
2603 | | ret | ||
2604 | |6: | ||
2605 | | je <5 // x^1 ==> x | ||
2606 | | jb >7 // x^0 ==> 1 | ||
2607 | | neg eax | ||
2608 | | call <1 | ||
2609 | | sseconst_1 xmm1, RD | ||
2610 | | divsd xmm1, xmm0 | ||
2611 | | movaps xmm0, xmm1 | ||
2612 | | ret | ||
2613 | |7: | ||
2614 | | sseconst_1 xmm0, RD | ||
2615 | | ret | ||
2616 | | | ||
2617 | |//----------------------------------------------------------------------- | 2585 | |//----------------------------------------------------------------------- |
2618 | |//-- Miscellaneous functions -------------------------------------------- | 2586 | |//-- Miscellaneous functions -------------------------------------------- |
2619 | |//----------------------------------------------------------------------- | 2587 | |//----------------------------------------------------------------------- |