diff options
Diffstat (limited to '')
-rw-r--r-- | src/lj_vmmath.c | 110 |
1 files changed, 44 insertions, 66 deletions
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index 2da5f6b7..3351e72b 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c | |||
@@ -13,16 +13,40 @@ | |||
13 | #include "lj_ir.h" | 13 | #include "lj_ir.h" |
14 | #include "lj_vm.h" | 14 | #include "lj_vm.h" |
15 | 15 | ||
16 | /* -- Helper functions for generated machine code ------------------------- */ | 16 | /* -- Wrapper functions --------------------------------------------------- */ |
17 | 17 | ||
18 | #if LJ_TARGET_X86ORX64 | 18 | #if LJ_TARGET_X86 && __ELF__ && __PIC__ |
19 | /* Wrapper functions to avoid linker issues on OSX. */ | 19 | /* Wrapper functions to deal with the ELF/x86 PIC disaster. */ |
20 | LJ_FUNCA double lj_vm_sinh(double x) { return sinh(x); } | 20 | LJ_FUNCA double lj_wrap_log(double x) { return log(x); } |
21 | LJ_FUNCA double lj_vm_cosh(double x) { return cosh(x); } | 21 | LJ_FUNCA double lj_wrap_log10(double x) { return log10(x); } |
22 | LJ_FUNCA double lj_vm_tanh(double x) { return tanh(x); } | 22 | LJ_FUNCA double lj_wrap_exp(double x) { return exp(x); } |
23 | LJ_FUNCA double lj_wrap_sin(double x) { return sin(x); } | ||
24 | LJ_FUNCA double lj_wrap_cos(double x) { return cos(x); } | ||
25 | LJ_FUNCA double lj_wrap_tan(double x) { return tan(x); } | ||
26 | LJ_FUNCA double lj_wrap_asin(double x) { return asin(x); } | ||
27 | LJ_FUNCA double lj_wrap_acos(double x) { return acos(x); } | ||
28 | LJ_FUNCA double lj_wrap_atan(double x) { return atan(x); } | ||
29 | LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); } | ||
30 | LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); } | ||
31 | LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); } | ||
32 | LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); } | ||
33 | LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); } | ||
34 | LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); } | ||
23 | #endif | 35 | #endif |
24 | 36 | ||
25 | #if !LJ_TARGET_X86ORX64 | 37 | /* -- Helper functions ---------------------------------------------------- */ |
38 | |||
39 | /* Required to prevent the C compiler from applying FMA optimizations. | ||
40 | ** | ||
41 | ** Yes, there's -ffp-contract and the FP_CONTRACT pragma ... in theory. | ||
42 | ** But the current state of C compilers is a mess in this regard. | ||
43 | ** Also, this function is not performance sensitive at all. | ||
44 | */ | ||
45 | LJ_NOINLINE static double lj_vm_floormul(double x, double y) | ||
46 | { | ||
47 | return lj_vm_floor(x / y) * y; | ||
48 | } | ||
49 | |||
26 | double lj_vm_foldarith(double x, double y, int op) | 50 | double lj_vm_foldarith(double x, double y, int op) |
27 | { | 51 | { |
28 | switch (op) { | 52 | switch (op) { |
@@ -30,42 +54,27 @@ double lj_vm_foldarith(double x, double y, int op) | |||
30 | case IR_SUB - IR_ADD: return x-y; break; | 54 | case IR_SUB - IR_ADD: return x-y; break; |
31 | case IR_MUL - IR_ADD: return x*y; break; | 55 | case IR_MUL - IR_ADD: return x*y; break; |
32 | case IR_DIV - IR_ADD: return x/y; break; | 56 | case IR_DIV - IR_ADD: return x/y; break; |
33 | case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break; | 57 | case IR_MOD - IR_ADD: return x-lj_vm_floormul(x, y); break; |
34 | case IR_POW - IR_ADD: return pow(x, y); break; | 58 | case IR_POW - IR_ADD: return pow(x, y); break; |
35 | case IR_NEG - IR_ADD: return -x; break; | 59 | case IR_NEG - IR_ADD: return -x; break; |
36 | case IR_ABS - IR_ADD: return fabs(x); break; | 60 | case IR_ABS - IR_ADD: return fabs(x); break; |
37 | #if LJ_HASJIT | 61 | #if LJ_HASJIT |
38 | case IR_ATAN2 - IR_ADD: return atan2(x, y); break; | ||
39 | case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; | 62 | case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; |
40 | case IR_MIN - IR_ADD: return x > y ? y : x; break; | 63 | case IR_MIN - IR_ADD: return x < y ? x : y; break; |
41 | case IR_MAX - IR_ADD: return x < y ? y : x; break; | 64 | case IR_MAX - IR_ADD: return x > y ? x : y; break; |
42 | #endif | 65 | #endif |
43 | default: return x; | 66 | default: return x; |
44 | } | 67 | } |
45 | } | 68 | } |
46 | #endif | ||
47 | 69 | ||
48 | #if LJ_HASJIT | 70 | /* -- Helper functions for generated machine code ------------------------- */ |
49 | |||
50 | #ifdef LUAJIT_NO_LOG2 | ||
51 | double lj_vm_log2(double a) | ||
52 | { | ||
53 | return log(a) * 1.4426950408889634074; | ||
54 | } | ||
55 | #endif | ||
56 | |||
57 | #ifdef LUAJIT_NO_EXP2 | ||
58 | double lj_vm_exp2(double a) | ||
59 | { | ||
60 | return exp(a * 0.6931471805599453); | ||
61 | } | ||
62 | #endif | ||
63 | 71 | ||
64 | #if !(LJ_TARGET_ARM || LJ_TARGET_PPC) | 72 | #if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS |
65 | int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) | 73 | int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) |
66 | { | 74 | { |
67 | uint32_t y, ua, ub; | 75 | uint32_t y, ua, ub; |
68 | lua_assert(b != 0); /* This must be checked before using this function. */ | 76 | /* This must be checked before using this function. */ |
77 | lj_assertX(b != 0, "modulo with zero divisor"); | ||
69 | ua = a < 0 ? ~(uint32_t)a+1u : (uint32_t)a; | 78 | ua = a < 0 ? ~(uint32_t)a+1u : (uint32_t)a; |
70 | ub = b < 0 ? ~(uint32_t)b+1u : (uint32_t)b; | 79 | ub = b < 0 ? ~(uint32_t)b+1u : (uint32_t)b; |
71 | y = ua % ub; | 80 | y = ua % ub; |
@@ -75,38 +84,14 @@ int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) | |||
75 | } | 84 | } |
76 | #endif | 85 | #endif |
77 | 86 | ||
78 | #if !LJ_TARGET_X86ORX64 | 87 | #if LJ_HASJIT |
79 | /* Unsigned x^k. */ | ||
80 | static double lj_vm_powui(double x, uint32_t k) | ||
81 | { | ||
82 | double y; | ||
83 | lua_assert(k != 0); | ||
84 | for (; (k & 1) == 0; k >>= 1) x *= x; | ||
85 | y = x; | ||
86 | if ((k >>= 1) != 0) { | ||
87 | for (;;) { | ||
88 | x *= x; | ||
89 | if (k == 1) break; | ||
90 | if (k & 1) y *= x; | ||
91 | k >>= 1; | ||
92 | } | ||
93 | y *= x; | ||
94 | } | ||
95 | return y; | ||
96 | } | ||
97 | 88 | ||
98 | /* Signed x^k. */ | 89 | #ifdef LUAJIT_NO_LOG2 |
99 | double lj_vm_powi(double x, int32_t k) | 90 | double lj_vm_log2(double a) |
100 | { | 91 | { |
101 | if (k > 1) | 92 | return log(a) * 1.4426950408889634074; |
102 | return lj_vm_powui(x, (uint32_t)k); | ||
103 | else if (k == 1) | ||
104 | return x; | ||
105 | else if (k == 0) | ||
106 | return 1.0; | ||
107 | else | ||
108 | return 1.0 / lj_vm_powui(x, ~(uint32_t)k+1u); | ||
109 | } | 93 | } |
94 | #endif | ||
110 | 95 | ||
111 | /* Computes fpm(x) for extended math functions. */ | 96 | /* Computes fpm(x) for extended math functions. */ |
112 | double lj_vm_foldfpm(double x, int fpm) | 97 | double lj_vm_foldfpm(double x, int fpm) |
@@ -116,19 +101,12 @@ double lj_vm_foldfpm(double x, int fpm) | |||
116 | case IRFPM_CEIL: return lj_vm_ceil(x); | 101 | case IRFPM_CEIL: return lj_vm_ceil(x); |
117 | case IRFPM_TRUNC: return lj_vm_trunc(x); | 102 | case IRFPM_TRUNC: return lj_vm_trunc(x); |
118 | case IRFPM_SQRT: return sqrt(x); | 103 | case IRFPM_SQRT: return sqrt(x); |
119 | case IRFPM_EXP: return exp(x); | ||
120 | case IRFPM_EXP2: return lj_vm_exp2(x); | ||
121 | case IRFPM_LOG: return log(x); | 104 | case IRFPM_LOG: return log(x); |
122 | case IRFPM_LOG2: return lj_vm_log2(x); | 105 | case IRFPM_LOG2: return lj_vm_log2(x); |
123 | case IRFPM_LOG10: return log10(x); | 106 | default: lj_assertX(0, "bad fpm %d", fpm); |
124 | case IRFPM_SIN: return sin(x); | ||
125 | case IRFPM_COS: return cos(x); | ||
126 | case IRFPM_TAN: return tan(x); | ||
127 | default: lua_assert(0); | ||
128 | } | 107 | } |
129 | return 0; | 108 | return 0; |
130 | } | 109 | } |
131 | #endif | ||
132 | 110 | ||
133 | #if LJ_HASFFI | 111 | #if LJ_HASFFI |
134 | int lj_vm_errno(void) | 112 | int lj_vm_errno(void) |