aboutsummaryrefslogtreecommitdiff
path: root/src/vm_x64.dasc
diff options
context:
space:
mode:
authorMike Pall <mike>2022-01-24 14:37:50 +0100
committerMike Pall <mike>2022-01-24 14:37:50 +0100
commit9512d5c1aced61e13e7be2d3208ec7ae3516b458 (patch)
treec31882578a670847adb37475362b7d21ae9bc099 /src/vm_x64.dasc
parentc18acfe7565b9b20be0a73563f535766233ad78a (diff)
downloadluajit-9512d5c1aced61e13e7be2d3208ec7ae3516b458.tar.gz
luajit-9512d5c1aced61e13e7be2d3208ec7ae3516b458.tar.bz2
luajit-9512d5c1aced61e13e7be2d3208ec7ae3516b458.zip
Fix pow() optimization inconsistencies.
Diffstat (limited to 'src/vm_x64.dasc')
-rw-r--r--src/vm_x64.dasc44
1 files changed, 6 insertions, 38 deletions
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index b222190a..4aa8589c 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -1755,13 +1755,16 @@ static void build_subroutines(BuildCtx *ctx)
1755 | jmp ->fff_resxmm0 1755 | jmp ->fff_resxmm0
1756 |.endmacro 1756 |.endmacro
1757 | 1757 |
1758 |.macro math_extern2, func 1758 |.macro math_extern2, name, func
1759 | .ffunc_nn math_ .. func 1759 | .ffunc_nn math_ .. name
1760 | mov RB, BASE 1760 | mov RB, BASE
1761 | call extern func 1761 | call extern func
1762 | mov BASE, RB 1762 | mov BASE, RB
1763 | jmp ->fff_resxmm0 1763 | jmp ->fff_resxmm0
1764 |.endmacro 1764 |.endmacro
1765 |.macro math_extern2, func
1766 | math_extern2 func, func
1767 |.endmacro
1765 | 1768 |
1766 | math_extern log10 1769 | math_extern log10
1767 | math_extern exp 1770 | math_extern exp
@@ -1774,7 +1777,7 @@ static void build_subroutines(BuildCtx *ctx)
1774 | math_extern sinh 1777 | math_extern sinh
1775 | math_extern cosh 1778 | math_extern cosh
1776 | math_extern tanh 1779 | math_extern tanh
1777 | math_extern2 pow 1780 | math_extern2 pow, lj_vm_pow
1778 | math_extern2 atan2 1781 | math_extern2 atan2
1779 | math_extern2 fmod 1782 | math_extern2 fmod
1780 | 1783 |
@@ -2579,41 +2582,6 @@ static void build_subroutines(BuildCtx *ctx)
2579 | subsd xmm0, xmm1 2582 | subsd xmm0, xmm1
2580 | ret 2583 | ret
2581 | 2584 |
2582 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
2583 |->vm_powi_sse:
2584 | cmp eax, 1; jle >6 // i<=1?
2585 | // Now 1 < (unsigned)i <= 0x80000000.
2586 |1: // Handle leading zeros.
2587 | test eax, 1; jnz >2
2588 | mulsd xmm0, xmm0
2589 | shr eax, 1
2590 | jmp <1
2591 |2:
2592 | shr eax, 1; jz >5
2593 | movaps xmm1, xmm0
2594 |3: // Handle trailing bits.
2595 | mulsd xmm0, xmm0
2596 | shr eax, 1; jz >4
2597 | jnc <3
2598 | mulsd xmm1, xmm0
2599 | jmp <3
2600 |4:
2601 | mulsd xmm0, xmm1
2602 |5:
2603 | ret
2604 |6:
2605 | je <5 // x^1 ==> x
2606 | jb >7 // x^0 ==> 1
2607 | neg eax
2608 | call <1
2609 | sseconst_1 xmm1, RD
2610 | divsd xmm1, xmm0
2611 | movaps xmm0, xmm1
2612 | ret
2613 |7:
2614 | sseconst_1 xmm0, RD
2615 | ret
2616 |
2617 |//----------------------------------------------------------------------- 2585 |//-----------------------------------------------------------------------
2618 |//-- Miscellaneous functions -------------------------------------------- 2586 |//-- Miscellaneous functions --------------------------------------------
2619 |//----------------------------------------------------------------------- 2587 |//-----------------------------------------------------------------------