diff options
| author | Mike Pall <mike> | 2022-01-24 14:37:50 +0100 |
|---|---|---|
| committer | Mike Pall <mike> | 2022-01-24 14:37:50 +0100 |
| commit | 9512d5c1aced61e13e7be2d3208ec7ae3516b458 (patch) | |
| tree | c31882578a670847adb37475362b7d21ae9bc099 /src | |
| parent | c18acfe7565b9b20be0a73563f535766233ad78a (diff) | |
| download | luajit-9512d5c1aced61e13e7be2d3208ec7ae3516b458.tar.gz luajit-9512d5c1aced61e13e7be2d3208ec7ae3516b458.tar.bz2 luajit-9512d5c1aced61e13e7be2d3208ec7ae3516b458.zip | |
Fix pow() optimization inconsistencies.
Diffstat (limited to 'src')
| -rw-r--r-- | src/lj_asm.c | 7 | ||||
| -rw-r--r-- | src/lj_asm_x86.h | 13 | ||||
| -rw-r--r-- | src/lj_dispatch.h | 2 | ||||
| -rw-r--r-- | src/lj_ircall.h | 2 | ||||
| -rw-r--r-- | src/lj_opt_fold.c | 27 | ||||
| -rw-r--r-- | src/lj_opt_narrow.c | 12 | ||||
| -rw-r--r-- | src/lj_vm.h | 7 | ||||
| -rw-r--r-- | src/lj_vmmath.c | 82 | ||||
| -rw-r--r-- | src/vm_arm.dasc | 13 | ||||
| -rw-r--r-- | src/vm_arm64.dasc | 11 | ||||
| -rw-r--r-- | src/vm_mips.dasc | 11 | ||||
| -rw-r--r-- | src/vm_mips64.dasc | 11 | ||||
| -rw-r--r-- | src/vm_ppc.dasc | 11 | ||||
| -rw-r--r-- | src/vm_x64.dasc | 44 | ||||
| -rw-r--r-- | src/vm_x86.dasc | 46 |
15 files changed, 104 insertions, 195 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 7abafbf4..eaee5547 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -1670,7 +1670,6 @@ static void asm_loop(ASMState *as) | |||
| 1670 | #if !LJ_SOFTFP32 | 1670 | #if !LJ_SOFTFP32 |
| 1671 | #if !LJ_TARGET_X86ORX64 | 1671 | #if !LJ_TARGET_X86ORX64 |
| 1672 | #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) | 1672 | #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) |
| 1673 | #define asm_fppowi(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) | ||
| 1674 | #endif | 1673 | #endif |
| 1675 | 1674 | ||
| 1676 | static void asm_pow(ASMState *as, IRIns *ir) | 1675 | static void asm_pow(ASMState *as, IRIns *ir) |
| @@ -1681,10 +1680,8 @@ static void asm_pow(ASMState *as, IRIns *ir) | |||
| 1681 | IRCALL_lj_carith_powu64); | 1680 | IRCALL_lj_carith_powu64); |
| 1682 | else | 1681 | else |
| 1683 | #endif | 1682 | #endif |
| 1684 | if (irt_isnum(IR(ir->op2)->t)) | 1683 | asm_callid(as, ir, irt_isnum(IR(ir->op2)->t) ? IRCALL_lj_vm_pow : |
| 1685 | asm_callid(as, ir, IRCALL_pow); | 1684 | IRCALL_lj_vm_powi); |
| 1686 | else | ||
| 1687 | asm_fppowi(as, ir); | ||
| 1688 | } | 1685 | } |
| 1689 | 1686 | ||
| 1690 | static void asm_div(ASMState *as, IRIns *ir) | 1687 | static void asm_div(ASMState *as, IRIns *ir) |
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 1ef7c38f..38069e1d 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
| @@ -2017,19 +2017,6 @@ static void asm_ldexp(ASMState *as, IRIns *ir) | |||
| 2017 | asm_x87load(as, ir->op2); | 2017 | asm_x87load(as, ir->op2); |
| 2018 | } | 2018 | } |
| 2019 | 2019 | ||
| 2020 | static void asm_fppowi(ASMState *as, IRIns *ir) | ||
| 2021 | { | ||
| 2022 | /* The modified regs must match with the *.dasc implementation. */ | ||
| 2023 | RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); | ||
| 2024 | if (ra_hasreg(ir->r)) | ||
| 2025 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | ||
| 2026 | ra_evictset(as, drop); | ||
| 2027 | ra_destreg(as, ir, RID_XMM0); | ||
| 2028 | emit_call(as, lj_vm_powi_sse); | ||
| 2029 | ra_left(as, RID_XMM0, ir->op1); | ||
| 2030 | ra_left(as, RID_EAX, ir->op2); | ||
| 2031 | } | ||
| 2032 | |||
| 2033 | static int asm_swapops(ASMState *as, IRIns *ir) | 2020 | static int asm_swapops(ASMState *as, IRIns *ir) |
| 2034 | { | 2021 | { |
| 2035 | IRIns *irl = IR(ir->op1); | 2022 | IRIns *irl = IR(ir->op1); |
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index 0594af51..d0f86fab 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h | |||
| @@ -44,7 +44,7 @@ extern double __divdf3(double a, double b); | |||
| 44 | #define GOTDEF(_) \ | 44 | #define GOTDEF(_) \ |
| 45 | _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ | 45 | _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ |
| 46 | _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ | 46 | _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ |
| 47 | _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \ | 47 | _(lj_vm_pow) _(fmod) _(ldexp) _(lj_vm_modi) \ |
| 48 | _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \ | 48 | _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \ |
| 49 | _(lj_dispatch_profile) _(lj_err_throw) \ | 49 | _(lj_dispatch_profile) _(lj_err_throw) \ |
| 50 | _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ | 50 | _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ |
diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 7fc3d1fd..c4d4a7b8 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h | |||
| @@ -218,7 +218,7 @@ typedef struct CCallInfo { | |||
| 218 | _(ANY, log, 1, N, NUM, XA_FP) \ | 218 | _(ANY, log, 1, N, NUM, XA_FP) \ |
| 219 | _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \ | 219 | _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \ |
| 220 | _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \ | 220 | _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \ |
| 221 | _(ANY, pow, 2, N, NUM, XA2_FP) \ | 221 | _(ANY, lj_vm_pow, 2, N, NUM, XA2_FP) \ |
| 222 | _(ANY, atan2, 2, N, NUM, XA2_FP) \ | 222 | _(ANY, atan2, 2, N, NUM, XA2_FP) \ |
| 223 | _(ANY, ldexp, 2, N, NUM, XA_FP) \ | 223 | _(ANY, ldexp, 2, N, NUM, XA_FP) \ |
| 224 | _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \ | 224 | _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \ |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 8200b240..34f70e27 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
| @@ -1143,33 +1143,6 @@ LJFOLDF(simplify_numpow_xkint) | |||
| 1143 | return ref; | 1143 | return ref; |
| 1144 | } | 1144 | } |
| 1145 | 1145 | ||
| 1146 | LJFOLD(POW any KNUM) | ||
| 1147 | LJFOLDF(simplify_numpow_xknum) | ||
| 1148 | { | ||
| 1149 | if (knumright == 0.5) /* x ^ 0.5 ==> sqrt(x) */ | ||
| 1150 | return emitir(IRTN(IR_FPMATH), fins->op1, IRFPM_SQRT); | ||
| 1151 | return NEXTFOLD; | ||
| 1152 | } | ||
| 1153 | |||
| 1154 | LJFOLD(POW KNUM any) | ||
| 1155 | LJFOLDF(simplify_numpow_kx) | ||
| 1156 | { | ||
| 1157 | lua_Number n = knumleft; | ||
| 1158 | if (n == 2.0 && irt_isint(fright->t)) { /* 2.0 ^ i ==> ldexp(1.0, i) */ | ||
| 1159 | #if LJ_TARGET_X86ORX64 | ||
| 1160 | /* Different IR_LDEXP calling convention on x86/x64 requires conversion. */ | ||
| 1161 | fins->o = IR_CONV; | ||
| 1162 | fins->op1 = fins->op2; | ||
| 1163 | fins->op2 = IRCONV_NUM_INT; | ||
| 1164 | fins->op2 = (IRRef1)lj_opt_fold(J); | ||
| 1165 | #endif | ||
| 1166 | fins->op1 = (IRRef1)lj_ir_knum_one(J); | ||
| 1167 | fins->o = IR_LDEXP; | ||
| 1168 | return RETRYFOLD; | ||
| 1169 | } | ||
| 1170 | return NEXTFOLD; | ||
| 1171 | } | ||
| 1172 | |||
| 1173 | /* -- Simplify conversions ------------------------------------------------ */ | 1146 | /* -- Simplify conversions ------------------------------------------------ */ |
| 1174 | 1147 | ||
| 1175 | LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */ | 1148 | LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */ |
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 57b19613..fe92468e 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c | |||
| @@ -590,20 +590,14 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) | |||
| 590 | rb = conv_str_tonum(J, rb, vb); | 590 | rb = conv_str_tonum(J, rb, vb); |
| 591 | rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */ | 591 | rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */ |
| 592 | rc = conv_str_tonum(J, rc, vc); | 592 | rc = conv_str_tonum(J, rc, vc); |
| 593 | /* Narrowing must be unconditional to preserve (-x)^i semantics. */ | ||
| 594 | if (tvisint(vc) || numisint(numV(vc))) { | 593 | if (tvisint(vc) || numisint(numV(vc))) { |
| 595 | int checkrange = 0; | 594 | int32_t k = numberVint(vc); |
| 596 | /* pow() is faster for bigger exponents. But do this only for (+k)^i. */ | 595 | if (!(k >= -65536 && k <= 65536)) goto force_pow_num; |
| 597 | if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { | ||
| 598 | int32_t k = numberVint(vc); | ||
| 599 | if (!(k >= -65536 && k <= 65536)) goto force_pow_num; | ||
| 600 | checkrange = 1; | ||
| 601 | } | ||
| 602 | if (!tref_isinteger(rc)) { | 596 | if (!tref_isinteger(rc)) { |
| 603 | /* Guarded conversion to integer! */ | 597 | /* Guarded conversion to integer! */ |
| 604 | rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK); | 598 | rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK); |
| 605 | } | 599 | } |
| 606 | if (checkrange && !tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */ | 600 | if (!tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */ |
| 607 | TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536)); | 601 | TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536)); |
| 608 | emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536)); | 602 | emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536)); |
| 609 | } | 603 | } |
diff --git a/src/lj_vm.h b/src/lj_vm.h index 7713d16b..bfa7e0fd 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h | |||
| @@ -83,10 +83,6 @@ LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); | |||
| 83 | LJ_ASMF void lj_vm_floor_sse(void); | 83 | LJ_ASMF void lj_vm_floor_sse(void); |
| 84 | LJ_ASMF void lj_vm_ceil_sse(void); | 84 | LJ_ASMF void lj_vm_ceil_sse(void); |
| 85 | LJ_ASMF void lj_vm_trunc_sse(void); | 85 | LJ_ASMF void lj_vm_trunc_sse(void); |
| 86 | LJ_ASMF void lj_vm_powi_sse(void); | ||
| 87 | #define lj_vm_powi NULL | ||
| 88 | #else | ||
| 89 | LJ_ASMF double lj_vm_powi(double, int32_t); | ||
| 90 | #endif | 86 | #endif |
| 91 | #if LJ_TARGET_PPC || LJ_TARGET_ARM64 | 87 | #if LJ_TARGET_PPC || LJ_TARGET_ARM64 |
| 92 | #define lj_vm_trunc trunc | 88 | #define lj_vm_trunc trunc |
| @@ -102,6 +98,9 @@ LJ_ASMF int lj_vm_errno(void); | |||
| 102 | LJ_ASMF TValue *lj_vm_next(GCtab *t, uint32_t idx); | 98 | LJ_ASMF TValue *lj_vm_next(GCtab *t, uint32_t idx); |
| 103 | #endif | 99 | #endif |
| 104 | 100 | ||
| 101 | LJ_ASMF double lj_vm_powi(double, int32_t); | ||
| 102 | LJ_ASMF double lj_vm_pow(double, double); | ||
| 103 | |||
| 105 | /* Continuations for metamethods. */ | 104 | /* Continuations for metamethods. */ |
| 106 | LJ_ASMF void lj_cont_cat(void); /* Continue with concatenation. */ | 105 | LJ_ASMF void lj_cont_cat(void); /* Continue with concatenation. */ |
| 107 | LJ_ASMF void lj_cont_ra(void); /* Store result in RA from instruction. */ | 106 | LJ_ASMF void lj_cont_ra(void); /* Store result in RA from instruction. */ |
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index 536199d8..fa0de922 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c | |||
| @@ -30,11 +30,51 @@ LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); } | |||
| 30 | LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); } | 30 | LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); } |
| 31 | LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); } | 31 | LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); } |
| 32 | LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); } | 32 | LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); } |
| 33 | LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); } | ||
| 34 | LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); } | 33 | LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); } |
| 35 | #endif | 34 | #endif |
| 36 | 35 | ||
| 37 | /* -- Helper functions for generated machine code ------------------------- */ | 36 | /* -- Helper functions ---------------------------------------------------- */ |
| 37 | |||
| 38 | /* Unsigned x^k. */ | ||
| 39 | static double lj_vm_powui(double x, uint32_t k) | ||
| 40 | { | ||
| 41 | double y; | ||
| 42 | lj_assertX(k != 0, "pow with zero exponent"); | ||
| 43 | for (; (k & 1) == 0; k >>= 1) x *= x; | ||
| 44 | y = x; | ||
| 45 | if ((k >>= 1) != 0) { | ||
| 46 | for (;;) { | ||
| 47 | x *= x; | ||
| 48 | if (k == 1) break; | ||
| 49 | if (k & 1) y *= x; | ||
| 50 | k >>= 1; | ||
| 51 | } | ||
| 52 | y *= x; | ||
| 53 | } | ||
| 54 | return y; | ||
| 55 | } | ||
| 56 | |||
| 57 | /* Signed x^k. */ | ||
| 58 | double lj_vm_powi(double x, int32_t k) | ||
| 59 | { | ||
| 60 | if (k > 1) | ||
| 61 | return lj_vm_powui(x, (uint32_t)k); | ||
| 62 | else if (k == 1) | ||
| 63 | return x; | ||
| 64 | else if (k == 0) | ||
| 65 | return 1.0; | ||
| 66 | else | ||
| 67 | return 1.0 / lj_vm_powui(x, (uint32_t)-k); | ||
| 68 | } | ||
| 69 | |||
| 70 | double lj_vm_pow(double x, double y) | ||
| 71 | { | ||
| 72 | int32_t k = lj_num2int(y); | ||
| 73 | if ((k >= -65536 && k <= 65536) && y == (double)k) | ||
| 74 | return lj_vm_powi(x, k); | ||
| 75 | else | ||
| 76 | return pow(x, y); | ||
| 77 | } | ||
| 38 | 78 | ||
| 39 | double lj_vm_foldarith(double x, double y, int op) | 79 | double lj_vm_foldarith(double x, double y, int op) |
| 40 | { | 80 | { |
| @@ -44,7 +84,7 @@ double lj_vm_foldarith(double x, double y, int op) | |||
| 44 | case IR_MUL - IR_ADD: return x*y; break; | 84 | case IR_MUL - IR_ADD: return x*y; break; |
| 45 | case IR_DIV - IR_ADD: return x/y; break; | 85 | case IR_DIV - IR_ADD: return x/y; break; |
| 46 | case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break; | 86 | case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break; |
| 47 | case IR_POW - IR_ADD: return pow(x, y); break; | 87 | case IR_POW - IR_ADD: return lj_vm_pow(x, y); break; |
| 48 | case IR_NEG - IR_ADD: return -x; break; | 88 | case IR_NEG - IR_ADD: return -x; break; |
| 49 | case IR_ABS - IR_ADD: return fabs(x); break; | 89 | case IR_ABS - IR_ADD: return fabs(x); break; |
| 50 | #if LJ_HASJIT | 90 | #if LJ_HASJIT |
| @@ -56,6 +96,8 @@ double lj_vm_foldarith(double x, double y, int op) | |||
| 56 | } | 96 | } |
| 57 | } | 97 | } |
| 58 | 98 | ||
| 99 | /* -- Helper functions for generated machine code ------------------------- */ | ||
| 100 | |||
| 59 | #if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS | 101 | #if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS |
| 60 | int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) | 102 | int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) |
| 61 | { | 103 | { |
| @@ -80,40 +122,6 @@ double lj_vm_log2(double a) | |||
| 80 | } | 122 | } |
| 81 | #endif | 123 | #endif |
| 82 | 124 | ||
| 83 | #if !LJ_TARGET_X86ORX64 | ||
| 84 | /* Unsigned x^k. */ | ||
| 85 | static double lj_vm_powui(double x, uint32_t k) | ||
| 86 | { | ||
| 87 | double y; | ||
| 88 | lj_assertX(k != 0, "pow with zero exponent"); | ||
| 89 | for (; (k & 1) == 0; k >>= 1) x *= x; | ||
| 90 | y = x; | ||
| 91 | if ((k >>= 1) != 0) { | ||
| 92 | for (;;) { | ||
| 93 | x *= x; | ||
| 94 | if (k == 1) break; | ||
| 95 | if (k & 1) y *= x; | ||
| 96 | k >>= 1; | ||
| 97 | } | ||
| 98 | y *= x; | ||
| 99 | } | ||
| 100 | return y; | ||
| 101 | } | ||
| 102 | |||
| 103 | /* Signed x^k. */ | ||
| 104 | double lj_vm_powi(double x, int32_t k) | ||
| 105 | { | ||
| 106 | if (k > 1) | ||
| 107 | return lj_vm_powui(x, (uint32_t)k); | ||
| 108 | else if (k == 1) | ||
| 109 | return x; | ||
| 110 | else if (k == 0) | ||
| 111 | return 1.0; | ||
| 112 | else | ||
| 113 | return 1.0 / lj_vm_powui(x, (uint32_t)-k); | ||
| 114 | } | ||
| 115 | #endif | ||
| 116 | |||
| 117 | /* Computes fpm(x) for extended math functions. */ | 125 | /* Computes fpm(x) for extended math functions. */ |
| 118 | double lj_vm_foldfpm(double x, int fpm) | 126 | double lj_vm_foldfpm(double x, int fpm) |
| 119 | { | 127 | { |
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 770c1602..636619fd 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc | |||
| @@ -1477,11 +1477,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1477 | |.endif | 1477 | |.endif |
| 1478 | |.endmacro | 1478 | |.endmacro |
| 1479 | | | 1479 | | |
| 1480 | |.macro math_extern2, func | 1480 | |.macro math_extern2, name, func |
| 1481 | |.if HFABI | 1481 | |.if HFABI |
| 1482 | | .ffunc_dd math_ .. func | 1482 | | .ffunc_dd math_ .. name |
| 1483 | |.else | 1483 | |.else |
| 1484 | | .ffunc_nn math_ .. func | 1484 | | .ffunc_nn math_ .. name |
| 1485 | |.endif | 1485 | |.endif |
| 1486 | | .IOS mov RA, BASE | 1486 | | .IOS mov RA, BASE |
| 1487 | | bl extern func | 1487 | | bl extern func |
| @@ -1492,6 +1492,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1492 | | b ->fff_restv | 1492 | | b ->fff_restv |
| 1493 | |.endif | 1493 | |.endif |
| 1494 | |.endmacro | 1494 | |.endmacro |
| 1495 | |.macro math_extern2, func | ||
| 1496 | | math_extern2 func, func | ||
| 1497 | |.endmacro | ||
| 1495 | | | 1498 | | |
| 1496 | |.if FPU | 1499 | |.if FPU |
| 1497 | | .ffunc_d math_sqrt | 1500 | | .ffunc_d math_sqrt |
| @@ -1537,7 +1540,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1537 | | math_extern sinh | 1540 | | math_extern sinh |
| 1538 | | math_extern cosh | 1541 | | math_extern cosh |
| 1539 | | math_extern tanh | 1542 | | math_extern tanh |
| 1540 | | math_extern2 pow | 1543 | | math_extern2 pow, lj_vm_pow |
| 1541 | | math_extern2 atan2 | 1544 | | math_extern2 atan2 |
| 1542 | | math_extern2 fmod | 1545 | | math_extern2 fmod |
| 1543 | | | 1546 | | |
| @@ -3203,7 +3206,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 3203 | break; | 3206 | break; |
| 3204 | case BC_POW: | 3207 | case BC_POW: |
| 3205 | | // NYI: (partial) integer arithmetic. | 3208 | | // NYI: (partial) integer arithmetic. |
| 3206 | | ins_arithfp extern, extern pow | 3209 | | ins_arithfp extern, extern lj_vm_pow |
| 3207 | break; | 3210 | break; |
| 3208 | 3211 | ||
| 3209 | case BC_CAT: | 3212 | case BC_CAT: |
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index f5f1b5f1..7ef9ffba 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc | |||
| @@ -1387,11 +1387,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1387 | | b ->fff_resn | 1387 | | b ->fff_resn |
| 1388 | |.endmacro | 1388 | |.endmacro |
| 1389 | | | 1389 | | |
| 1390 | |.macro math_extern2, func | 1390 | |.macro math_extern2, name, func |
| 1391 | | .ffunc_nn math_ .. func | 1391 | | .ffunc_nn math_ .. name |
| 1392 | | bl extern func | 1392 | | bl extern func |
| 1393 | | b ->fff_resn | 1393 | | b ->fff_resn |
| 1394 | |.endmacro | 1394 | |.endmacro |
| 1395 | |.macro math_extern2, func | ||
| 1396 | | math_extern2 func, func | ||
| 1397 | |.endmacro | ||
| 1395 | | | 1398 | | |
| 1396 | |.ffunc_n math_sqrt | 1399 | |.ffunc_n math_sqrt |
| 1397 | | fsqrt d0, d0 | 1400 | | fsqrt d0, d0 |
| @@ -1420,7 +1423,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1420 | | math_extern sinh | 1423 | | math_extern sinh |
| 1421 | | math_extern cosh | 1424 | | math_extern cosh |
| 1422 | | math_extern tanh | 1425 | | math_extern tanh |
| 1423 | | math_extern2 pow | 1426 | | math_extern2 pow, lj_vm_pow |
| 1424 | | math_extern2 atan2 | 1427 | | math_extern2 atan2 |
| 1425 | | math_extern2 fmod | 1428 | | math_extern2 fmod |
| 1426 | | | 1429 | | |
| @@ -2674,7 +2677,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 2674 | | ins_arithload FARG1, FARG2 | 2677 | | ins_arithload FARG1, FARG2 |
| 2675 | | ins_arithfallback ins_arithcheck_num | 2678 | | ins_arithfallback ins_arithcheck_num |
| 2676 | |.if "fpins" == "fpow" | 2679 | |.if "fpins" == "fpow" |
| 2677 | | bl extern pow | 2680 | | bl extern lj_vm_pow |
| 2678 | |.else | 2681 | |.else |
| 2679 | | fpins FARG1, FARG1, FARG2 | 2682 | | fpins FARG1, FARG1, FARG2 |
| 2680 | |.endif | 2683 | |.endif |
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 34645bf1..cf791f74 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc | |||
| @@ -1623,14 +1623,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1623 | |. nop | 1623 | |. nop |
| 1624 | |.endmacro | 1624 | |.endmacro |
| 1625 | | | 1625 | | |
| 1626 | |.macro math_extern2, func | 1626 | |.macro math_extern2, name, func |
| 1627 | | .ffunc_nn math_ .. func | 1627 | | .ffunc_nn math_ .. name |
| 1628 | |. load_got func | 1628 | |. load_got func |
| 1629 | | call_extern | 1629 | | call_extern |
| 1630 | |. nop | 1630 | |. nop |
| 1631 | | b ->fff_resn | 1631 | | b ->fff_resn |
| 1632 | |. nop | 1632 | |. nop |
| 1633 | |.endmacro | 1633 | |.endmacro |
| 1634 | |.macro math_extern2, func | ||
| 1635 | | math_extern2 func, func | ||
| 1636 | |.endmacro | ||
| 1634 | | | 1637 | | |
| 1635 | |// TODO: Return integer type if result is integer (own sf implementation). | 1638 | |// TODO: Return integer type if result is integer (own sf implementation). |
| 1636 | |.macro math_round, func | 1639 | |.macro math_round, func |
| @@ -1684,7 +1687,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1684 | | math_extern sinh | 1687 | | math_extern sinh |
| 1685 | | math_extern cosh | 1688 | | math_extern cosh |
| 1686 | | math_extern tanh | 1689 | | math_extern tanh |
| 1687 | | math_extern2 pow | 1690 | | math_extern2 pow, lj_vm_pow |
| 1688 | | math_extern2 atan2 | 1691 | | math_extern2 atan2 |
| 1689 | | math_extern2 fmod | 1692 | | math_extern2 fmod |
| 1690 | | | 1693 | | |
| @@ -3689,7 +3692,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 3689 | | sltiu AT, SFARG1HI, LJ_TISNUM | 3692 | | sltiu AT, SFARG1HI, LJ_TISNUM |
| 3690 | | sltiu TMP0, SFARG2HI, LJ_TISNUM | 3693 | | sltiu TMP0, SFARG2HI, LJ_TISNUM |
| 3691 | | and AT, AT, TMP0 | 3694 | | and AT, AT, TMP0 |
| 3692 | | load_got pow | 3695 | | load_got lj_vm_pow |
| 3693 | | beqz AT, ->vmeta_arith | 3696 | | beqz AT, ->vmeta_arith |
| 3694 | |. addu RA, BASE, RA | 3697 | |. addu RA, BASE, RA |
| 3695 | |.if FPU | 3698 | |.if FPU |
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc index 651bc42e..3b916379 100644 --- a/src/vm_mips64.dasc +++ b/src/vm_mips64.dasc | |||
| @@ -1667,14 +1667,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1667 | |. nop | 1667 | |. nop |
| 1668 | |.endmacro | 1668 | |.endmacro |
| 1669 | | | 1669 | | |
| 1670 | |.macro math_extern2, func | 1670 | |.macro math_extern2, name, func |
| 1671 | | .ffunc_nn math_ .. func | 1671 | | .ffunc_nn math_ .. name |
| 1672 | |. load_got func | 1672 | |. load_got func |
| 1673 | | call_extern | 1673 | | call_extern |
| 1674 | |. nop | 1674 | |. nop |
| 1675 | | b ->fff_resn | 1675 | | b ->fff_resn |
| 1676 | |. nop | 1676 | |. nop |
| 1677 | |.endmacro | 1677 | |.endmacro |
| 1678 | |.macro math_extern2, func | ||
| 1679 | | math_extern2 func, func | ||
| 1680 | |.endmacro | ||
| 1678 | | | 1681 | | |
| 1679 | |// TODO: Return integer type if result is integer (own sf implementation). | 1682 | |// TODO: Return integer type if result is integer (own sf implementation). |
| 1680 | |.macro math_round, func | 1683 | |.macro math_round, func |
| @@ -1728,7 +1731,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1728 | | math_extern sinh | 1731 | | math_extern sinh |
| 1729 | | math_extern cosh | 1732 | | math_extern cosh |
| 1730 | | math_extern tanh | 1733 | | math_extern tanh |
| 1731 | | math_extern2 pow | 1734 | | math_extern2 pow, lj_vm_pow |
| 1732 | | math_extern2 atan2 | 1735 | | math_extern2 atan2 |
| 1733 | | math_extern2 fmod | 1736 | | math_extern2 fmod |
| 1734 | | | 1737 | | |
| @@ -3915,7 +3918,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 3915 | | sltiu TMP0, TMP0, LJ_TISNUM | 3918 | | sltiu TMP0, TMP0, LJ_TISNUM |
| 3916 | | sltiu TMP1, TMP1, LJ_TISNUM | 3919 | | sltiu TMP1, TMP1, LJ_TISNUM |
| 3917 | | and AT, TMP0, TMP1 | 3920 | | and AT, TMP0, TMP1 |
| 3918 | | load_got pow | 3921 | | load_got lj_vm_pow |
| 3919 | | beqz AT, ->vmeta_arith | 3922 | | beqz AT, ->vmeta_arith |
| 3920 | |. daddu RA, BASE, RA | 3923 | |. daddu RA, BASE, RA |
| 3921 | |.if FPU | 3924 | |.if FPU |
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index 3cad37d2..cc4d56d7 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc | |||
| @@ -2012,11 +2012,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2012 | | b ->fff_resn | 2012 | | b ->fff_resn |
| 2013 | |.endmacro | 2013 | |.endmacro |
| 2014 | | | 2014 | | |
| 2015 | |.macro math_extern2, func | 2015 | |.macro math_extern2, name, func |
| 2016 | | .ffunc_nn math_ .. func | 2016 | | .ffunc_nn math_ .. name |
| 2017 | | blex func | 2017 | | blex func |
| 2018 | | b ->fff_resn | 2018 | | b ->fff_resn |
| 2019 | |.endmacro | 2019 | |.endmacro |
| 2020 | |.macro math_extern2, func | ||
| 2021 | | math_extern2 func, func | ||
| 2022 | |.endmacro | ||
| 2020 | | | 2023 | | |
| 2021 | |.macro math_round, func | 2024 | |.macro math_round, func |
| 2022 | | .ffunc_1 math_ .. func | 2025 | | .ffunc_1 math_ .. func |
| @@ -2141,7 +2144,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2141 | | math_extern sinh | 2144 | | math_extern sinh |
| 2142 | | math_extern cosh | 2145 | | math_extern cosh |
| 2143 | | math_extern tanh | 2146 | | math_extern tanh |
| 2144 | | math_extern2 pow | 2147 | | math_extern2 pow, lj_vm_pow |
| 2145 | | math_extern2 atan2 | 2148 | | math_extern2 atan2 |
| 2146 | | math_extern2 fmod | 2149 | | math_extern2 fmod |
| 2147 | | | 2150 | | |
| @@ -4139,7 +4142,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4139 | | checknum cr1, CARG3 | 4142 | | checknum cr1, CARG3 |
| 4140 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | 4143 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt |
| 4141 | | bge ->vmeta_arith_vv | 4144 | | bge ->vmeta_arith_vv |
| 4142 | | blex pow | 4145 | | blex lj_vm_pow |
| 4143 | | ins_next1 | 4146 | | ins_next1 |
| 4144 | |.if FPU | 4147 | |.if FPU |
| 4145 | | stfdx FARG1, BASE, RA | 4148 | | stfdx FARG1, BASE, RA |
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index b222190a..4aa8589c 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc | |||
| @@ -1755,13 +1755,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1755 | | jmp ->fff_resxmm0 | 1755 | | jmp ->fff_resxmm0 |
| 1756 | |.endmacro | 1756 | |.endmacro |
| 1757 | | | 1757 | | |
| 1758 | |.macro math_extern2, func | 1758 | |.macro math_extern2, name, func |
| 1759 | | .ffunc_nn math_ .. func | 1759 | | .ffunc_nn math_ .. name |
| 1760 | | mov RB, BASE | 1760 | | mov RB, BASE |
| 1761 | | call extern func | 1761 | | call extern func |
| 1762 | | mov BASE, RB | 1762 | | mov BASE, RB |
| 1763 | | jmp ->fff_resxmm0 | 1763 | | jmp ->fff_resxmm0 |
| 1764 | |.endmacro | 1764 | |.endmacro |
| 1765 | |.macro math_extern2, func | ||
| 1766 | | math_extern2 func, func | ||
| 1767 | |.endmacro | ||
| 1765 | | | 1768 | | |
| 1766 | | math_extern log10 | 1769 | | math_extern log10 |
| 1767 | | math_extern exp | 1770 | | math_extern exp |
| @@ -1774,7 +1777,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1774 | | math_extern sinh | 1777 | | math_extern sinh |
| 1775 | | math_extern cosh | 1778 | | math_extern cosh |
| 1776 | | math_extern tanh | 1779 | | math_extern tanh |
| 1777 | | math_extern2 pow | 1780 | | math_extern2 pow, lj_vm_pow |
| 1778 | | math_extern2 atan2 | 1781 | | math_extern2 atan2 |
| 1779 | | math_extern2 fmod | 1782 | | math_extern2 fmod |
| 1780 | | | 1783 | | |
| @@ -2579,41 +2582,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2579 | | subsd xmm0, xmm1 | 2582 | | subsd xmm0, xmm1 |
| 2580 | | ret | 2583 | | ret |
| 2581 | | | 2584 | | |
| 2582 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. | ||
| 2583 | |->vm_powi_sse: | ||
| 2584 | | cmp eax, 1; jle >6 // i<=1? | ||
| 2585 | | // Now 1 < (unsigned)i <= 0x80000000. | ||
| 2586 | |1: // Handle leading zeros. | ||
| 2587 | | test eax, 1; jnz >2 | ||
| 2588 | | mulsd xmm0, xmm0 | ||
| 2589 | | shr eax, 1 | ||
| 2590 | | jmp <1 | ||
| 2591 | |2: | ||
| 2592 | | shr eax, 1; jz >5 | ||
| 2593 | | movaps xmm1, xmm0 | ||
| 2594 | |3: // Handle trailing bits. | ||
| 2595 | | mulsd xmm0, xmm0 | ||
| 2596 | | shr eax, 1; jz >4 | ||
| 2597 | | jnc <3 | ||
| 2598 | | mulsd xmm1, xmm0 | ||
| 2599 | | jmp <3 | ||
| 2600 | |4: | ||
| 2601 | | mulsd xmm0, xmm1 | ||
| 2602 | |5: | ||
| 2603 | | ret | ||
| 2604 | |6: | ||
| 2605 | | je <5 // x^1 ==> x | ||
| 2606 | | jb >7 // x^0 ==> 1 | ||
| 2607 | | neg eax | ||
| 2608 | | call <1 | ||
| 2609 | | sseconst_1 xmm1, RD | ||
| 2610 | | divsd xmm1, xmm0 | ||
| 2611 | | movaps xmm0, xmm1 | ||
| 2612 | | ret | ||
| 2613 | |7: | ||
| 2614 | | sseconst_1 xmm0, RD | ||
| 2615 | | ret | ||
| 2616 | | | ||
| 2617 | |//----------------------------------------------------------------------- | 2585 | |//----------------------------------------------------------------------- |
| 2618 | |//-- Miscellaneous functions -------------------------------------------- | 2586 | |//-- Miscellaneous functions -------------------------------------------- |
| 2619 | |//----------------------------------------------------------------------- | 2587 | |//----------------------------------------------------------------------- |
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index eb56840a..36af852d 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc | |||
| @@ -2138,8 +2138,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2138 | | jmp ->fff_resfp | 2138 | | jmp ->fff_resfp |
| 2139 | |.endmacro | 2139 | |.endmacro |
| 2140 | | | 2140 | | |
| 2141 | |.macro math_extern2, func | 2141 | |.macro math_extern2, name, func |
| 2142 | | .ffunc_nnsse math_ .. func | 2142 | | .ffunc_nnsse math_ .. name |
| 2143 | |.if not X64 | 2143 | |.if not X64 |
| 2144 | | movsd FPARG1, xmm0 | 2144 | | movsd FPARG1, xmm0 |
| 2145 | | movsd FPARG3, xmm1 | 2145 | | movsd FPARG3, xmm1 |
| @@ -2149,6 +2149,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2149 | | mov BASE, RB | 2149 | | mov BASE, RB |
| 2150 | | jmp ->fff_resfp | 2150 | | jmp ->fff_resfp |
| 2151 | |.endmacro | 2151 | |.endmacro |
| 2152 | |.macro math_extern2, func | ||
| 2153 | | math_extern2 func, func | ||
| 2154 | |.endmacro | ||
| 2152 | | | 2155 | | |
| 2153 | | math_extern log10 | 2156 | | math_extern log10 |
| 2154 | | math_extern exp | 2157 | | math_extern exp |
| @@ -2161,7 +2164,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2161 | | math_extern sinh | 2164 | | math_extern sinh |
| 2162 | | math_extern cosh | 2165 | | math_extern cosh |
| 2163 | | math_extern tanh | 2166 | | math_extern tanh |
| 2164 | | math_extern2 pow | 2167 | | math_extern2 pow, lj_vm_pow |
| 2165 | | math_extern2 atan2 | 2168 | | math_extern2 atan2 |
| 2166 | | math_extern2 fmod | 2169 | | math_extern2 fmod |
| 2167 | | | 2170 | | |
| @@ -3038,41 +3041,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 3038 | | subsd xmm0, xmm1 | 3041 | | subsd xmm0, xmm1 |
| 3039 | | ret | 3042 | | ret |
| 3040 | | | 3043 | | |
| 3041 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. | ||
| 3042 | |->vm_powi_sse: | ||
| 3043 | | cmp eax, 1; jle >6 // i<=1? | ||
| 3044 | | // Now 1 < (unsigned)i <= 0x80000000. | ||
| 3045 | |1: // Handle leading zeros. | ||
| 3046 | | test eax, 1; jnz >2 | ||
| 3047 | | mulsd xmm0, xmm0 | ||
| 3048 | | shr eax, 1 | ||
| 3049 | | jmp <1 | ||
| 3050 | |2: | ||
| 3051 | | shr eax, 1; jz >5 | ||
| 3052 | | movaps xmm1, xmm0 | ||
| 3053 | |3: // Handle trailing bits. | ||
| 3054 | | mulsd xmm0, xmm0 | ||
| 3055 | | shr eax, 1; jz >4 | ||
| 3056 | | jnc <3 | ||
| 3057 | | mulsd xmm1, xmm0 | ||
| 3058 | | jmp <3 | ||
| 3059 | |4: | ||
| 3060 | | mulsd xmm0, xmm1 | ||
| 3061 | |5: | ||
| 3062 | | ret | ||
| 3063 | |6: | ||
| 3064 | | je <5 // x^1 ==> x | ||
| 3065 | | jb >7 // x^0 ==> 1 | ||
| 3066 | | neg eax | ||
| 3067 | | call <1 | ||
| 3068 | | sseconst_1 xmm1, RDa | ||
| 3069 | | divsd xmm1, xmm0 | ||
| 3070 | | movaps xmm0, xmm1 | ||
| 3071 | | ret | ||
| 3072 | |7: | ||
| 3073 | | sseconst_1 xmm0, RDa | ||
| 3074 | | ret | ||
| 3075 | | | ||
| 3076 | |//----------------------------------------------------------------------- | 3044 | |//----------------------------------------------------------------------- |
| 3077 | |//-- Miscellaneous functions -------------------------------------------- | 3045 | |//-- Miscellaneous functions -------------------------------------------- |
| 3078 | |//----------------------------------------------------------------------- | 3046 | |//----------------------------------------------------------------------- |
| @@ -3954,7 +3922,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 3954 | | movsd FPARG1, xmm0 | 3922 | | movsd FPARG1, xmm0 |
| 3955 | | movsd FPARG3, xmm1 | 3923 | | movsd FPARG3, xmm1 |
| 3956 | |.endif | 3924 | |.endif |
| 3957 | | call extern pow | 3925 | | call extern lj_vm_pow |
| 3958 | | movzx RA, PC_RA | 3926 | | movzx RA, PC_RA |
| 3959 | | mov BASE, RB | 3927 | | mov BASE, RB |
| 3960 | |.if X64 | 3928 | |.if X64 |
