diff options
author | Mike Pall <mike> | 2022-01-24 14:37:50 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2022-01-24 14:37:50 +0100 |
commit | 9512d5c1aced61e13e7be2d3208ec7ae3516b458 (patch) | |
tree | c31882578a670847adb37475362b7d21ae9bc099 /src/vm_x86.dasc | |
parent | c18acfe7565b9b20be0a73563f535766233ad78a (diff) | |
download | luajit-9512d5c1aced61e13e7be2d3208ec7ae3516b458.tar.gz luajit-9512d5c1aced61e13e7be2d3208ec7ae3516b458.tar.bz2 luajit-9512d5c1aced61e13e7be2d3208ec7ae3516b458.zip |
Fix pow() optimization inconsistencies.
Diffstat (limited to 'src/vm_x86.dasc')
-rw-r--r-- | src/vm_x86.dasc | 46 |
1 files changed, 7 insertions, 39 deletions
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index eb56840a..36af852d 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc | |||
@@ -2138,8 +2138,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
2138 | | jmp ->fff_resfp | 2138 | | jmp ->fff_resfp |
2139 | |.endmacro | 2139 | |.endmacro |
2140 | | | 2140 | | |
2141 | |.macro math_extern2, func | 2141 | |.macro math_extern2, name, func |
2142 | | .ffunc_nnsse math_ .. func | 2142 | | .ffunc_nnsse math_ .. name |
2143 | |.if not X64 | 2143 | |.if not X64 |
2144 | | movsd FPARG1, xmm0 | 2144 | | movsd FPARG1, xmm0 |
2145 | | movsd FPARG3, xmm1 | 2145 | | movsd FPARG3, xmm1 |
@@ -2149,6 +2149,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
2149 | | mov BASE, RB | 2149 | | mov BASE, RB |
2150 | | jmp ->fff_resfp | 2150 | | jmp ->fff_resfp |
2151 | |.endmacro | 2151 | |.endmacro |
2152 | |.macro math_extern2, func | ||
2153 | | math_extern2 func, func | ||
2154 | |.endmacro | ||
2152 | | | 2155 | | |
2153 | | math_extern log10 | 2156 | | math_extern log10 |
2154 | | math_extern exp | 2157 | | math_extern exp |
@@ -2161,7 +2164,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2161 | | math_extern sinh | 2164 | | math_extern sinh |
2162 | | math_extern cosh | 2165 | | math_extern cosh |
2163 | | math_extern tanh | 2166 | | math_extern tanh |
2164 | | math_extern2 pow | 2167 | | math_extern2 pow, lj_vm_pow |
2165 | | math_extern2 atan2 | 2168 | | math_extern2 atan2 |
2166 | | math_extern2 fmod | 2169 | | math_extern2 fmod |
2167 | | | 2170 | | |
@@ -3038,41 +3041,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
3038 | | subsd xmm0, xmm1 | 3041 | | subsd xmm0, xmm1 |
3039 | | ret | 3042 | | ret |
3040 | | | 3043 | | |
3041 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. | ||
3042 | |->vm_powi_sse: | ||
3043 | | cmp eax, 1; jle >6 // i<=1? | ||
3044 | | // Now 1 < (unsigned)i <= 0x80000000. | ||
3045 | |1: // Handle leading zeros. | ||
3046 | | test eax, 1; jnz >2 | ||
3047 | | mulsd xmm0, xmm0 | ||
3048 | | shr eax, 1 | ||
3049 | | jmp <1 | ||
3050 | |2: | ||
3051 | | shr eax, 1; jz >5 | ||
3052 | | movaps xmm1, xmm0 | ||
3053 | |3: // Handle trailing bits. | ||
3054 | | mulsd xmm0, xmm0 | ||
3055 | | shr eax, 1; jz >4 | ||
3056 | | jnc <3 | ||
3057 | | mulsd xmm1, xmm0 | ||
3058 | | jmp <3 | ||
3059 | |4: | ||
3060 | | mulsd xmm0, xmm1 | ||
3061 | |5: | ||
3062 | | ret | ||
3063 | |6: | ||
3064 | | je <5 // x^1 ==> x | ||
3065 | | jb >7 // x^0 ==> 1 | ||
3066 | | neg eax | ||
3067 | | call <1 | ||
3068 | | sseconst_1 xmm1, RDa | ||
3069 | | divsd xmm1, xmm0 | ||
3070 | | movaps xmm0, xmm1 | ||
3071 | | ret | ||
3072 | |7: | ||
3073 | | sseconst_1 xmm0, RDa | ||
3074 | | ret | ||
3075 | | | ||
3076 | |//----------------------------------------------------------------------- | 3044 | |//----------------------------------------------------------------------- |
3077 | |//-- Miscellaneous functions -------------------------------------------- | 3045 | |//-- Miscellaneous functions -------------------------------------------- |
3078 | |//----------------------------------------------------------------------- | 3046 | |//----------------------------------------------------------------------- |
@@ -3954,7 +3922,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3954 | | movsd FPARG1, xmm0 | 3922 | | movsd FPARG1, xmm0 |
3955 | | movsd FPARG3, xmm1 | 3923 | | movsd FPARG3, xmm1 |
3956 | |.endif | 3924 | |.endif |
3957 | | call extern pow | 3925 | | call extern lj_vm_pow |
3958 | | movzx RA, PC_RA | 3926 | | movzx RA, PC_RA |
3959 | | mov BASE, RB | 3927 | | mov BASE, RB |
3960 | |.if X64 | 3928 | |.if X64 |