diff options
author | Mike Pall <mike> | 2022-01-24 14:37:50 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2022-01-24 14:37:50 +0100 |
commit | 9512d5c1aced61e13e7be2d3208ec7ae3516b458 (patch) | |
tree | c31882578a670847adb37475362b7d21ae9bc099 /src/lj_vmmath.c | |
parent | c18acfe7565b9b20be0a73563f535766233ad78a (diff) | |
download | luajit-9512d5c1aced61e13e7be2d3208ec7ae3516b458.tar.gz luajit-9512d5c1aced61e13e7be2d3208ec7ae3516b458.tar.bz2 luajit-9512d5c1aced61e13e7be2d3208ec7ae3516b458.zip |
Fix pow() optimization inconsistencies.
Diffstat (limited to 'src/lj_vmmath.c')
-rw-r--r-- | src/lj_vmmath.c | 82 |
1 files changed, 45 insertions, 37 deletions
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index 536199d8..fa0de922 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c | |||
@@ -30,11 +30,51 @@ LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); } | |||
30 | LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); } | 30 | LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); } |
31 | LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); } | 31 | LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); } |
32 | LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); } | 32 | LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); } |
33 | LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); } | ||
34 | LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); } | 33 | LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); } |
35 | #endif | 34 | #endif |
36 | 35 | ||
37 | /* -- Helper functions for generated machine code ------------------------- */ | 36 | /* -- Helper functions ---------------------------------------------------- */ |
37 | |||
38 | /* Unsigned x^k. */ | ||
39 | static double lj_vm_powui(double x, uint32_t k) | ||
40 | { | ||
41 | double y; | ||
42 | lj_assertX(k != 0, "pow with zero exponent"); | ||
43 | for (; (k & 1) == 0; k >>= 1) x *= x; | ||
44 | y = x; | ||
45 | if ((k >>= 1) != 0) { | ||
46 | for (;;) { | ||
47 | x *= x; | ||
48 | if (k == 1) break; | ||
49 | if (k & 1) y *= x; | ||
50 | k >>= 1; | ||
51 | } | ||
52 | y *= x; | ||
53 | } | ||
54 | return y; | ||
55 | } | ||
56 | |||
57 | /* Signed x^k. */ | ||
58 | double lj_vm_powi(double x, int32_t k) | ||
59 | { | ||
60 | if (k > 1) | ||
61 | return lj_vm_powui(x, (uint32_t)k); | ||
62 | else if (k == 1) | ||
63 | return x; | ||
64 | else if (k == 0) | ||
65 | return 1.0; | ||
66 | else | ||
67 | return 1.0 / lj_vm_powui(x, (uint32_t)-k); | ||
68 | } | ||
69 | |||
70 | double lj_vm_pow(double x, double y) | ||
71 | { | ||
72 | int32_t k = lj_num2int(y); | ||
73 | if ((k >= -65536 && k <= 65536) && y == (double)k) | ||
74 | return lj_vm_powi(x, k); | ||
75 | else | ||
76 | return pow(x, y); | ||
77 | } | ||
38 | 78 | ||
39 | double lj_vm_foldarith(double x, double y, int op) | 79 | double lj_vm_foldarith(double x, double y, int op) |
40 | { | 80 | { |
@@ -44,7 +84,7 @@ double lj_vm_foldarith(double x, double y, int op) | |||
44 | case IR_MUL - IR_ADD: return x*y; break; | 84 | case IR_MUL - IR_ADD: return x*y; break; |
45 | case IR_DIV - IR_ADD: return x/y; break; | 85 | case IR_DIV - IR_ADD: return x/y; break; |
46 | case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break; | 86 | case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break; |
47 | case IR_POW - IR_ADD: return pow(x, y); break; | 87 | case IR_POW - IR_ADD: return lj_vm_pow(x, y); break; |
48 | case IR_NEG - IR_ADD: return -x; break; | 88 | case IR_NEG - IR_ADD: return -x; break; |
49 | case IR_ABS - IR_ADD: return fabs(x); break; | 89 | case IR_ABS - IR_ADD: return fabs(x); break; |
50 | #if LJ_HASJIT | 90 | #if LJ_HASJIT |
@@ -56,6 +96,8 @@ double lj_vm_foldarith(double x, double y, int op) | |||
56 | } | 96 | } |
57 | } | 97 | } |
58 | 98 | ||
99 | /* -- Helper functions for generated machine code ------------------------- */ | ||
100 | |||
59 | #if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS | 101 | #if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS |
60 | int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) | 102 | int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) |
61 | { | 103 | { |
@@ -80,40 +122,6 @@ double lj_vm_log2(double a) | |||
80 | } | 122 | } |
81 | #endif | 123 | #endif |
82 | 124 | ||
83 | #if !LJ_TARGET_X86ORX64 | ||
84 | /* Unsigned x^k. */ | ||
85 | static double lj_vm_powui(double x, uint32_t k) | ||
86 | { | ||
87 | double y; | ||
88 | lj_assertX(k != 0, "pow with zero exponent"); | ||
89 | for (; (k & 1) == 0; k >>= 1) x *= x; | ||
90 | y = x; | ||
91 | if ((k >>= 1) != 0) { | ||
92 | for (;;) { | ||
93 | x *= x; | ||
94 | if (k == 1) break; | ||
95 | if (k & 1) y *= x; | ||
96 | k >>= 1; | ||
97 | } | ||
98 | y *= x; | ||
99 | } | ||
100 | return y; | ||
101 | } | ||
102 | |||
103 | /* Signed x^k. */ | ||
104 | double lj_vm_powi(double x, int32_t k) | ||
105 | { | ||
106 | if (k > 1) | ||
107 | return lj_vm_powui(x, (uint32_t)k); | ||
108 | else if (k == 1) | ||
109 | return x; | ||
110 | else if (k == 0) | ||
111 | return 1.0; | ||
112 | else | ||
113 | return 1.0 / lj_vm_powui(x, (uint32_t)-k); | ||
114 | } | ||
115 | #endif | ||
116 | |||
117 | /* Computes fpm(x) for extended math functions. */ | 125 | /* Computes fpm(x) for extended math functions. */ |
118 | double lj_vm_foldfpm(double x, int fpm) | 126 | double lj_vm_foldfpm(double x, int fpm) |
119 | { | 127 | { |