aboutsummaryrefslogtreecommitdiff
path: root/src/lj_vmmath.c
diff options
context:
space:
mode:
authorMike Pall <mike>2022-01-24 14:37:50 +0100
committerMike Pall <mike>2022-01-24 14:37:50 +0100
commit9512d5c1aced61e13e7be2d3208ec7ae3516b458 (patch)
treec31882578a670847adb37475362b7d21ae9bc099 /src/lj_vmmath.c
parentc18acfe7565b9b20be0a73563f535766233ad78a (diff)
downloadluajit-9512d5c1aced61e13e7be2d3208ec7ae3516b458.tar.gz
luajit-9512d5c1aced61e13e7be2d3208ec7ae3516b458.tar.bz2
luajit-9512d5c1aced61e13e7be2d3208ec7ae3516b458.zip
Fix pow() optimization inconsistencies.
Diffstat (limited to 'src/lj_vmmath.c')
-rw-r--r--src/lj_vmmath.c82
1 files changed, 45 insertions, 37 deletions
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
index 536199d8..fa0de922 100644
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -30,11 +30,51 @@ LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); }
30LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); } 30LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); }
31LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); } 31LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); }
32LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); } 32LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); }
33LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); }
34LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); } 33LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); }
35#endif 34#endif
36 35
37/* -- Helper functions for generated machine code ------------------------- */ 36/* -- Helper functions ---------------------------------------------------- */
37
38/* Unsigned x^k. */
39static double lj_vm_powui(double x, uint32_t k)
40{
41 double y;
42 lj_assertX(k != 0, "pow with zero exponent");
43 for (; (k & 1) == 0; k >>= 1) x *= x;
44 y = x;
45 if ((k >>= 1) != 0) {
46 for (;;) {
47 x *= x;
48 if (k == 1) break;
49 if (k & 1) y *= x;
50 k >>= 1;
51 }
52 y *= x;
53 }
54 return y;
55}
56
57/* Signed x^k. */
58double lj_vm_powi(double x, int32_t k)
59{
60 if (k > 1)
61 return lj_vm_powui(x, (uint32_t)k);
62 else if (k == 1)
63 return x;
64 else if (k == 0)
65 return 1.0;
66 else
67 return 1.0 / lj_vm_powui(x, (uint32_t)-k);
68}
69
70double lj_vm_pow(double x, double y)
71{
72 int32_t k = lj_num2int(y);
73 if ((k >= -65536 && k <= 65536) && y == (double)k)
74 return lj_vm_powi(x, k);
75 else
76 return pow(x, y);
77}
38 78
39double lj_vm_foldarith(double x, double y, int op) 79double lj_vm_foldarith(double x, double y, int op)
40{ 80{
@@ -44,7 +84,7 @@ double lj_vm_foldarith(double x, double y, int op)
44 case IR_MUL - IR_ADD: return x*y; break; 84 case IR_MUL - IR_ADD: return x*y; break;
45 case IR_DIV - IR_ADD: return x/y; break; 85 case IR_DIV - IR_ADD: return x/y; break;
46 case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break; 86 case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break;
47 case IR_POW - IR_ADD: return pow(x, y); break; 87 case IR_POW - IR_ADD: return lj_vm_pow(x, y); break;
48 case IR_NEG - IR_ADD: return -x; break; 88 case IR_NEG - IR_ADD: return -x; break;
49 case IR_ABS - IR_ADD: return fabs(x); break; 89 case IR_ABS - IR_ADD: return fabs(x); break;
50#if LJ_HASJIT 90#if LJ_HASJIT
@@ -56,6 +96,8 @@ double lj_vm_foldarith(double x, double y, int op)
56 } 96 }
57} 97}
58 98
99/* -- Helper functions for generated machine code ------------------------- */
100
59#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS 101#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS
60int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) 102int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
61{ 103{
@@ -80,40 +122,6 @@ double lj_vm_log2(double a)
80} 122}
81#endif 123#endif
82 124
83#if !LJ_TARGET_X86ORX64
84/* Unsigned x^k. */
85static double lj_vm_powui(double x, uint32_t k)
86{
87 double y;
88 lj_assertX(k != 0, "pow with zero exponent");
89 for (; (k & 1) == 0; k >>= 1) x *= x;
90 y = x;
91 if ((k >>= 1) != 0) {
92 for (;;) {
93 x *= x;
94 if (k == 1) break;
95 if (k & 1) y *= x;
96 k >>= 1;
97 }
98 y *= x;
99 }
100 return y;
101}
102
103/* Signed x^k. */
104double lj_vm_powi(double x, int32_t k)
105{
106 if (k > 1)
107 return lj_vm_powui(x, (uint32_t)k);
108 else if (k == 1)
109 return x;
110 else if (k == 0)
111 return 1.0;
112 else
113 return 1.0 / lj_vm_powui(x, (uint32_t)-k);
114}
115#endif
116
117/* Computes fpm(x) for extended math functions. */ 125/* Computes fpm(x) for extended math functions. */
118double lj_vm_foldfpm(double x, int fpm) 126double lj_vm_foldfpm(double x, int fpm)
119{ 127{