diff options
-rw-r--r-- | src/lj_asm.c | 7 | ||||
-rw-r--r-- | src/lj_asm_x86.h | 13 | ||||
-rw-r--r-- | src/lj_dispatch.h | 2 | ||||
-rw-r--r-- | src/lj_ircall.h | 2 | ||||
-rw-r--r-- | src/lj_opt_fold.c | 27 | ||||
-rw-r--r-- | src/lj_opt_narrow.c | 12 | ||||
-rw-r--r-- | src/lj_vm.h | 7 | ||||
-rw-r--r-- | src/lj_vmmath.c | 82 | ||||
-rw-r--r-- | src/vm_arm.dasc | 13 | ||||
-rw-r--r-- | src/vm_arm64.dasc | 11 | ||||
-rw-r--r-- | src/vm_mips.dasc | 11 | ||||
-rw-r--r-- | src/vm_mips64.dasc | 11 | ||||
-rw-r--r-- | src/vm_ppc.dasc | 11 | ||||
-rw-r--r-- | src/vm_x64.dasc | 44 | ||||
-rw-r--r-- | src/vm_x86.dasc | 46 |
15 files changed, 104 insertions, 195 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 7abafbf4..eaee5547 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -1670,7 +1670,6 @@ static void asm_loop(ASMState *as) | |||
1670 | #if !LJ_SOFTFP32 | 1670 | #if !LJ_SOFTFP32 |
1671 | #if !LJ_TARGET_X86ORX64 | 1671 | #if !LJ_TARGET_X86ORX64 |
1672 | #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) | 1672 | #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) |
1673 | #define asm_fppowi(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) | ||
1674 | #endif | 1673 | #endif |
1675 | 1674 | ||
1676 | static void asm_pow(ASMState *as, IRIns *ir) | 1675 | static void asm_pow(ASMState *as, IRIns *ir) |
@@ -1681,10 +1680,8 @@ static void asm_pow(ASMState *as, IRIns *ir) | |||
1681 | IRCALL_lj_carith_powu64); | 1680 | IRCALL_lj_carith_powu64); |
1682 | else | 1681 | else |
1683 | #endif | 1682 | #endif |
1684 | if (irt_isnum(IR(ir->op2)->t)) | 1683 | asm_callid(as, ir, irt_isnum(IR(ir->op2)->t) ? IRCALL_lj_vm_pow : |
1685 | asm_callid(as, ir, IRCALL_pow); | 1684 | IRCALL_lj_vm_powi); |
1686 | else | ||
1687 | asm_fppowi(as, ir); | ||
1688 | } | 1685 | } |
1689 | 1686 | ||
1690 | static void asm_div(ASMState *as, IRIns *ir) | 1687 | static void asm_div(ASMState *as, IRIns *ir) |
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 1ef7c38f..38069e1d 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
@@ -2017,19 +2017,6 @@ static void asm_ldexp(ASMState *as, IRIns *ir) | |||
2017 | asm_x87load(as, ir->op2); | 2017 | asm_x87load(as, ir->op2); |
2018 | } | 2018 | } |
2019 | 2019 | ||
2020 | static void asm_fppowi(ASMState *as, IRIns *ir) | ||
2021 | { | ||
2022 | /* The modified regs must match with the *.dasc implementation. */ | ||
2023 | RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); | ||
2024 | if (ra_hasreg(ir->r)) | ||
2025 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | ||
2026 | ra_evictset(as, drop); | ||
2027 | ra_destreg(as, ir, RID_XMM0); | ||
2028 | emit_call(as, lj_vm_powi_sse); | ||
2029 | ra_left(as, RID_XMM0, ir->op1); | ||
2030 | ra_left(as, RID_EAX, ir->op2); | ||
2031 | } | ||
2032 | |||
2033 | static int asm_swapops(ASMState *as, IRIns *ir) | 2020 | static int asm_swapops(ASMState *as, IRIns *ir) |
2034 | { | 2021 | { |
2035 | IRIns *irl = IR(ir->op1); | 2022 | IRIns *irl = IR(ir->op1); |
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index 0594af51..d0f86fab 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h | |||
@@ -44,7 +44,7 @@ extern double __divdf3(double a, double b); | |||
44 | #define GOTDEF(_) \ | 44 | #define GOTDEF(_) \ |
45 | _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ | 45 | _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ |
46 | _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ | 46 | _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ |
47 | _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \ | 47 | _(lj_vm_pow) _(fmod) _(ldexp) _(lj_vm_modi) \ |
48 | _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \ | 48 | _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \ |
49 | _(lj_dispatch_profile) _(lj_err_throw) \ | 49 | _(lj_dispatch_profile) _(lj_err_throw) \ |
50 | _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ | 50 | _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ |
diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 7fc3d1fd..c4d4a7b8 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h | |||
@@ -218,7 +218,7 @@ typedef struct CCallInfo { | |||
218 | _(ANY, log, 1, N, NUM, XA_FP) \ | 218 | _(ANY, log, 1, N, NUM, XA_FP) \ |
219 | _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \ | 219 | _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \ |
220 | _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \ | 220 | _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \ |
221 | _(ANY, pow, 2, N, NUM, XA2_FP) \ | 221 | _(ANY, lj_vm_pow, 2, N, NUM, XA2_FP) \ |
222 | _(ANY, atan2, 2, N, NUM, XA2_FP) \ | 222 | _(ANY, atan2, 2, N, NUM, XA2_FP) \ |
223 | _(ANY, ldexp, 2, N, NUM, XA_FP) \ | 223 | _(ANY, ldexp, 2, N, NUM, XA_FP) \ |
224 | _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \ | 224 | _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \ |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 8200b240..34f70e27 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
@@ -1143,33 +1143,6 @@ LJFOLDF(simplify_numpow_xkint) | |||
1143 | return ref; | 1143 | return ref; |
1144 | } | 1144 | } |
1145 | 1145 | ||
1146 | LJFOLD(POW any KNUM) | ||
1147 | LJFOLDF(simplify_numpow_xknum) | ||
1148 | { | ||
1149 | if (knumright == 0.5) /* x ^ 0.5 ==> sqrt(x) */ | ||
1150 | return emitir(IRTN(IR_FPMATH), fins->op1, IRFPM_SQRT); | ||
1151 | return NEXTFOLD; | ||
1152 | } | ||
1153 | |||
1154 | LJFOLD(POW KNUM any) | ||
1155 | LJFOLDF(simplify_numpow_kx) | ||
1156 | { | ||
1157 | lua_Number n = knumleft; | ||
1158 | if (n == 2.0 && irt_isint(fright->t)) { /* 2.0 ^ i ==> ldexp(1.0, i) */ | ||
1159 | #if LJ_TARGET_X86ORX64 | ||
1160 | /* Different IR_LDEXP calling convention on x86/x64 requires conversion. */ | ||
1161 | fins->o = IR_CONV; | ||
1162 | fins->op1 = fins->op2; | ||
1163 | fins->op2 = IRCONV_NUM_INT; | ||
1164 | fins->op2 = (IRRef1)lj_opt_fold(J); | ||
1165 | #endif | ||
1166 | fins->op1 = (IRRef1)lj_ir_knum_one(J); | ||
1167 | fins->o = IR_LDEXP; | ||
1168 | return RETRYFOLD; | ||
1169 | } | ||
1170 | return NEXTFOLD; | ||
1171 | } | ||
1172 | |||
1173 | /* -- Simplify conversions ------------------------------------------------ */ | 1146 | /* -- Simplify conversions ------------------------------------------------ */ |
1174 | 1147 | ||
1175 | LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */ | 1148 | LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */ |
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 57b19613..fe92468e 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c | |||
@@ -590,20 +590,14 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) | |||
590 | rb = conv_str_tonum(J, rb, vb); | 590 | rb = conv_str_tonum(J, rb, vb); |
591 | rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */ | 591 | rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */ |
592 | rc = conv_str_tonum(J, rc, vc); | 592 | rc = conv_str_tonum(J, rc, vc); |
593 | /* Narrowing must be unconditional to preserve (-x)^i semantics. */ | ||
594 | if (tvisint(vc) || numisint(numV(vc))) { | 593 | if (tvisint(vc) || numisint(numV(vc))) { |
595 | int checkrange = 0; | 594 | int32_t k = numberVint(vc); |
596 | /* pow() is faster for bigger exponents. But do this only for (+k)^i. */ | 595 | if (!(k >= -65536 && k <= 65536)) goto force_pow_num; |
597 | if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { | ||
598 | int32_t k = numberVint(vc); | ||
599 | if (!(k >= -65536 && k <= 65536)) goto force_pow_num; | ||
600 | checkrange = 1; | ||
601 | } | ||
602 | if (!tref_isinteger(rc)) { | 596 | if (!tref_isinteger(rc)) { |
603 | /* Guarded conversion to integer! */ | 597 | /* Guarded conversion to integer! */ |
604 | rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK); | 598 | rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK); |
605 | } | 599 | } |
606 | if (checkrange && !tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */ | 600 | if (!tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */ |
607 | TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536)); | 601 | TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536)); |
608 | emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536)); | 602 | emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536)); |
609 | } | 603 | } |
diff --git a/src/lj_vm.h b/src/lj_vm.h index 7713d16b..bfa7e0fd 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h | |||
@@ -83,10 +83,6 @@ LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); | |||
83 | LJ_ASMF void lj_vm_floor_sse(void); | 83 | LJ_ASMF void lj_vm_floor_sse(void); |
84 | LJ_ASMF void lj_vm_ceil_sse(void); | 84 | LJ_ASMF void lj_vm_ceil_sse(void); |
85 | LJ_ASMF void lj_vm_trunc_sse(void); | 85 | LJ_ASMF void lj_vm_trunc_sse(void); |
86 | LJ_ASMF void lj_vm_powi_sse(void); | ||
87 | #define lj_vm_powi NULL | ||
88 | #else | ||
89 | LJ_ASMF double lj_vm_powi(double, int32_t); | ||
90 | #endif | 86 | #endif |
91 | #if LJ_TARGET_PPC || LJ_TARGET_ARM64 | 87 | #if LJ_TARGET_PPC || LJ_TARGET_ARM64 |
92 | #define lj_vm_trunc trunc | 88 | #define lj_vm_trunc trunc |
@@ -102,6 +98,9 @@ LJ_ASMF int lj_vm_errno(void); | |||
102 | LJ_ASMF TValue *lj_vm_next(GCtab *t, uint32_t idx); | 98 | LJ_ASMF TValue *lj_vm_next(GCtab *t, uint32_t idx); |
103 | #endif | 99 | #endif |
104 | 100 | ||
101 | LJ_ASMF double lj_vm_powi(double, int32_t); | ||
102 | LJ_ASMF double lj_vm_pow(double, double); | ||
103 | |||
105 | /* Continuations for metamethods. */ | 104 | /* Continuations for metamethods. */ |
106 | LJ_ASMF void lj_cont_cat(void); /* Continue with concatenation. */ | 105 | LJ_ASMF void lj_cont_cat(void); /* Continue with concatenation. */ |
107 | LJ_ASMF void lj_cont_ra(void); /* Store result in RA from instruction. */ | 106 | LJ_ASMF void lj_cont_ra(void); /* Store result in RA from instruction. */ |
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index 536199d8..fa0de922 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c | |||
@@ -30,11 +30,51 @@ LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); } | |||
30 | LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); } | 30 | LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); } |
31 | LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); } | 31 | LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); } |
32 | LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); } | 32 | LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); } |
33 | LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); } | ||
34 | LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); } | 33 | LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); } |
35 | #endif | 34 | #endif |
36 | 35 | ||
37 | /* -- Helper functions for generated machine code ------------------------- */ | 36 | /* -- Helper functions ---------------------------------------------------- */ |
37 | |||
38 | /* Unsigned x^k. */ | ||
39 | static double lj_vm_powui(double x, uint32_t k) | ||
40 | { | ||
41 | double y; | ||
42 | lj_assertX(k != 0, "pow with zero exponent"); | ||
43 | for (; (k & 1) == 0; k >>= 1) x *= x; | ||
44 | y = x; | ||
45 | if ((k >>= 1) != 0) { | ||
46 | for (;;) { | ||
47 | x *= x; | ||
48 | if (k == 1) break; | ||
49 | if (k & 1) y *= x; | ||
50 | k >>= 1; | ||
51 | } | ||
52 | y *= x; | ||
53 | } | ||
54 | return y; | ||
55 | } | ||
56 | |||
57 | /* Signed x^k. */ | ||
58 | double lj_vm_powi(double x, int32_t k) | ||
59 | { | ||
60 | if (k > 1) | ||
61 | return lj_vm_powui(x, (uint32_t)k); | ||
62 | else if (k == 1) | ||
63 | return x; | ||
64 | else if (k == 0) | ||
65 | return 1.0; | ||
66 | else | ||
67 | return 1.0 / lj_vm_powui(x, (uint32_t)-k); | ||
68 | } | ||
69 | |||
70 | double lj_vm_pow(double x, double y) | ||
71 | { | ||
72 | int32_t k = lj_num2int(y); | ||
73 | if ((k >= -65536 && k <= 65536) && y == (double)k) | ||
74 | return lj_vm_powi(x, k); | ||
75 | else | ||
76 | return pow(x, y); | ||
77 | } | ||
38 | 78 | ||
39 | double lj_vm_foldarith(double x, double y, int op) | 79 | double lj_vm_foldarith(double x, double y, int op) |
40 | { | 80 | { |
@@ -44,7 +84,7 @@ double lj_vm_foldarith(double x, double y, int op) | |||
44 | case IR_MUL - IR_ADD: return x*y; break; | 84 | case IR_MUL - IR_ADD: return x*y; break; |
45 | case IR_DIV - IR_ADD: return x/y; break; | 85 | case IR_DIV - IR_ADD: return x/y; break; |
46 | case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break; | 86 | case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break; |
47 | case IR_POW - IR_ADD: return pow(x, y); break; | 87 | case IR_POW - IR_ADD: return lj_vm_pow(x, y); break; |
48 | case IR_NEG - IR_ADD: return -x; break; | 88 | case IR_NEG - IR_ADD: return -x; break; |
49 | case IR_ABS - IR_ADD: return fabs(x); break; | 89 | case IR_ABS - IR_ADD: return fabs(x); break; |
50 | #if LJ_HASJIT | 90 | #if LJ_HASJIT |
@@ -56,6 +96,8 @@ double lj_vm_foldarith(double x, double y, int op) | |||
56 | } | 96 | } |
57 | } | 97 | } |
58 | 98 | ||
99 | /* -- Helper functions for generated machine code ------------------------- */ | ||
100 | |||
59 | #if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS | 101 | #if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS |
60 | int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) | 102 | int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) |
61 | { | 103 | { |
@@ -80,40 +122,6 @@ double lj_vm_log2(double a) | |||
80 | } | 122 | } |
81 | #endif | 123 | #endif |
82 | 124 | ||
83 | #if !LJ_TARGET_X86ORX64 | ||
84 | /* Unsigned x^k. */ | ||
85 | static double lj_vm_powui(double x, uint32_t k) | ||
86 | { | ||
87 | double y; | ||
88 | lj_assertX(k != 0, "pow with zero exponent"); | ||
89 | for (; (k & 1) == 0; k >>= 1) x *= x; | ||
90 | y = x; | ||
91 | if ((k >>= 1) != 0) { | ||
92 | for (;;) { | ||
93 | x *= x; | ||
94 | if (k == 1) break; | ||
95 | if (k & 1) y *= x; | ||
96 | k >>= 1; | ||
97 | } | ||
98 | y *= x; | ||
99 | } | ||
100 | return y; | ||
101 | } | ||
102 | |||
103 | /* Signed x^k. */ | ||
104 | double lj_vm_powi(double x, int32_t k) | ||
105 | { | ||
106 | if (k > 1) | ||
107 | return lj_vm_powui(x, (uint32_t)k); | ||
108 | else if (k == 1) | ||
109 | return x; | ||
110 | else if (k == 0) | ||
111 | return 1.0; | ||
112 | else | ||
113 | return 1.0 / lj_vm_powui(x, (uint32_t)-k); | ||
114 | } | ||
115 | #endif | ||
116 | |||
117 | /* Computes fpm(x) for extended math functions. */ | 125 | /* Computes fpm(x) for extended math functions. */ |
118 | double lj_vm_foldfpm(double x, int fpm) | 126 | double lj_vm_foldfpm(double x, int fpm) |
119 | { | 127 | { |
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 770c1602..636619fd 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc | |||
@@ -1477,11 +1477,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
1477 | |.endif | 1477 | |.endif |
1478 | |.endmacro | 1478 | |.endmacro |
1479 | | | 1479 | | |
1480 | |.macro math_extern2, func | 1480 | |.macro math_extern2, name, func |
1481 | |.if HFABI | 1481 | |.if HFABI |
1482 | | .ffunc_dd math_ .. func | 1482 | | .ffunc_dd math_ .. name |
1483 | |.else | 1483 | |.else |
1484 | | .ffunc_nn math_ .. func | 1484 | | .ffunc_nn math_ .. name |
1485 | |.endif | 1485 | |.endif |
1486 | | .IOS mov RA, BASE | 1486 | | .IOS mov RA, BASE |
1487 | | bl extern func | 1487 | | bl extern func |
@@ -1492,6 +1492,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1492 | | b ->fff_restv | 1492 | | b ->fff_restv |
1493 | |.endif | 1493 | |.endif |
1494 | |.endmacro | 1494 | |.endmacro |
1495 | |.macro math_extern2, func | ||
1496 | | math_extern2 func, func | ||
1497 | |.endmacro | ||
1495 | | | 1498 | | |
1496 | |.if FPU | 1499 | |.if FPU |
1497 | | .ffunc_d math_sqrt | 1500 | | .ffunc_d math_sqrt |
@@ -1537,7 +1540,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1537 | | math_extern sinh | 1540 | | math_extern sinh |
1538 | | math_extern cosh | 1541 | | math_extern cosh |
1539 | | math_extern tanh | 1542 | | math_extern tanh |
1540 | | math_extern2 pow | 1543 | | math_extern2 pow, lj_vm_pow |
1541 | | math_extern2 atan2 | 1544 | | math_extern2 atan2 |
1542 | | math_extern2 fmod | 1545 | | math_extern2 fmod |
1543 | | | 1546 | | |
@@ -3203,7 +3206,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3203 | break; | 3206 | break; |
3204 | case BC_POW: | 3207 | case BC_POW: |
3205 | | // NYI: (partial) integer arithmetic. | 3208 | | // NYI: (partial) integer arithmetic. |
3206 | | ins_arithfp extern, extern pow | 3209 | | ins_arithfp extern, extern lj_vm_pow |
3207 | break; | 3210 | break; |
3208 | 3211 | ||
3209 | case BC_CAT: | 3212 | case BC_CAT: |
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index f5f1b5f1..7ef9ffba 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc | |||
@@ -1387,11 +1387,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
1387 | | b ->fff_resn | 1387 | | b ->fff_resn |
1388 | |.endmacro | 1388 | |.endmacro |
1389 | | | 1389 | | |
1390 | |.macro math_extern2, func | 1390 | |.macro math_extern2, name, func |
1391 | | .ffunc_nn math_ .. func | 1391 | | .ffunc_nn math_ .. name |
1392 | | bl extern func | 1392 | | bl extern func |
1393 | | b ->fff_resn | 1393 | | b ->fff_resn |
1394 | |.endmacro | 1394 | |.endmacro |
1395 | |.macro math_extern2, func | ||
1396 | | math_extern2 func, func | ||
1397 | |.endmacro | ||
1395 | | | 1398 | | |
1396 | |.ffunc_n math_sqrt | 1399 | |.ffunc_n math_sqrt |
1397 | | fsqrt d0, d0 | 1400 | | fsqrt d0, d0 |
@@ -1420,7 +1423,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1420 | | math_extern sinh | 1423 | | math_extern sinh |
1421 | | math_extern cosh | 1424 | | math_extern cosh |
1422 | | math_extern tanh | 1425 | | math_extern tanh |
1423 | | math_extern2 pow | 1426 | | math_extern2 pow, lj_vm_pow |
1424 | | math_extern2 atan2 | 1427 | | math_extern2 atan2 |
1425 | | math_extern2 fmod | 1428 | | math_extern2 fmod |
1426 | | | 1429 | | |
@@ -2674,7 +2677,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2674 | | ins_arithload FARG1, FARG2 | 2677 | | ins_arithload FARG1, FARG2 |
2675 | | ins_arithfallback ins_arithcheck_num | 2678 | | ins_arithfallback ins_arithcheck_num |
2676 | |.if "fpins" == "fpow" | 2679 | |.if "fpins" == "fpow" |
2677 | | bl extern pow | 2680 | | bl extern lj_vm_pow |
2678 | |.else | 2681 | |.else |
2679 | | fpins FARG1, FARG1, FARG2 | 2682 | | fpins FARG1, FARG1, FARG2 |
2680 | |.endif | 2683 | |.endif |
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 34645bf1..cf791f74 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc | |||
@@ -1623,14 +1623,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
1623 | |. nop | 1623 | |. nop |
1624 | |.endmacro | 1624 | |.endmacro |
1625 | | | 1625 | | |
1626 | |.macro math_extern2, func | 1626 | |.macro math_extern2, name, func |
1627 | | .ffunc_nn math_ .. func | 1627 | | .ffunc_nn math_ .. name |
1628 | |. load_got func | 1628 | |. load_got func |
1629 | | call_extern | 1629 | | call_extern |
1630 | |. nop | 1630 | |. nop |
1631 | | b ->fff_resn | 1631 | | b ->fff_resn |
1632 | |. nop | 1632 | |. nop |
1633 | |.endmacro | 1633 | |.endmacro |
1634 | |.macro math_extern2, func | ||
1635 | | math_extern2 func, func | ||
1636 | |.endmacro | ||
1634 | | | 1637 | | |
1635 | |// TODO: Return integer type if result is integer (own sf implementation). | 1638 | |// TODO: Return integer type if result is integer (own sf implementation). |
1636 | |.macro math_round, func | 1639 | |.macro math_round, func |
@@ -1684,7 +1687,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1684 | | math_extern sinh | 1687 | | math_extern sinh |
1685 | | math_extern cosh | 1688 | | math_extern cosh |
1686 | | math_extern tanh | 1689 | | math_extern tanh |
1687 | | math_extern2 pow | 1690 | | math_extern2 pow, lj_vm_pow |
1688 | | math_extern2 atan2 | 1691 | | math_extern2 atan2 |
1689 | | math_extern2 fmod | 1692 | | math_extern2 fmod |
1690 | | | 1693 | | |
@@ -3689,7 +3692,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3689 | | sltiu AT, SFARG1HI, LJ_TISNUM | 3692 | | sltiu AT, SFARG1HI, LJ_TISNUM |
3690 | | sltiu TMP0, SFARG2HI, LJ_TISNUM | 3693 | | sltiu TMP0, SFARG2HI, LJ_TISNUM |
3691 | | and AT, AT, TMP0 | 3694 | | and AT, AT, TMP0 |
3692 | | load_got pow | 3695 | | load_got lj_vm_pow |
3693 | | beqz AT, ->vmeta_arith | 3696 | | beqz AT, ->vmeta_arith |
3694 | |. addu RA, BASE, RA | 3697 | |. addu RA, BASE, RA |
3695 | |.if FPU | 3698 | |.if FPU |
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc index 651bc42e..3b916379 100644 --- a/src/vm_mips64.dasc +++ b/src/vm_mips64.dasc | |||
@@ -1667,14 +1667,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
1667 | |. nop | 1667 | |. nop |
1668 | |.endmacro | 1668 | |.endmacro |
1669 | | | 1669 | | |
1670 | |.macro math_extern2, func | 1670 | |.macro math_extern2, name, func |
1671 | | .ffunc_nn math_ .. func | 1671 | | .ffunc_nn math_ .. name |
1672 | |. load_got func | 1672 | |. load_got func |
1673 | | call_extern | 1673 | | call_extern |
1674 | |. nop | 1674 | |. nop |
1675 | | b ->fff_resn | 1675 | | b ->fff_resn |
1676 | |. nop | 1676 | |. nop |
1677 | |.endmacro | 1677 | |.endmacro |
1678 | |.macro math_extern2, func | ||
1679 | | math_extern2 func, func | ||
1680 | |.endmacro | ||
1678 | | | 1681 | | |
1679 | |// TODO: Return integer type if result is integer (own sf implementation). | 1682 | |// TODO: Return integer type if result is integer (own sf implementation). |
1680 | |.macro math_round, func | 1683 | |.macro math_round, func |
@@ -1728,7 +1731,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1728 | | math_extern sinh | 1731 | | math_extern sinh |
1729 | | math_extern cosh | 1732 | | math_extern cosh |
1730 | | math_extern tanh | 1733 | | math_extern tanh |
1731 | | math_extern2 pow | 1734 | | math_extern2 pow, lj_vm_pow |
1732 | | math_extern2 atan2 | 1735 | | math_extern2 atan2 |
1733 | | math_extern2 fmod | 1736 | | math_extern2 fmod |
1734 | | | 1737 | | |
@@ -3915,7 +3918,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3915 | | sltiu TMP0, TMP0, LJ_TISNUM | 3918 | | sltiu TMP0, TMP0, LJ_TISNUM |
3916 | | sltiu TMP1, TMP1, LJ_TISNUM | 3919 | | sltiu TMP1, TMP1, LJ_TISNUM |
3917 | | and AT, TMP0, TMP1 | 3920 | | and AT, TMP0, TMP1 |
3918 | | load_got pow | 3921 | | load_got lj_vm_pow |
3919 | | beqz AT, ->vmeta_arith | 3922 | | beqz AT, ->vmeta_arith |
3920 | |. daddu RA, BASE, RA | 3923 | |. daddu RA, BASE, RA |
3921 | |.if FPU | 3924 | |.if FPU |
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index 3cad37d2..cc4d56d7 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc | |||
@@ -2012,11 +2012,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
2012 | | b ->fff_resn | 2012 | | b ->fff_resn |
2013 | |.endmacro | 2013 | |.endmacro |
2014 | | | 2014 | | |
2015 | |.macro math_extern2, func | 2015 | |.macro math_extern2, name, func |
2016 | | .ffunc_nn math_ .. func | 2016 | | .ffunc_nn math_ .. name |
2017 | | blex func | 2017 | | blex func |
2018 | | b ->fff_resn | 2018 | | b ->fff_resn |
2019 | |.endmacro | 2019 | |.endmacro |
2020 | |.macro math_extern2, func | ||
2021 | | math_extern2 func, func | ||
2022 | |.endmacro | ||
2020 | | | 2023 | | |
2021 | |.macro math_round, func | 2024 | |.macro math_round, func |
2022 | | .ffunc_1 math_ .. func | 2025 | | .ffunc_1 math_ .. func |
@@ -2141,7 +2144,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2141 | | math_extern sinh | 2144 | | math_extern sinh |
2142 | | math_extern cosh | 2145 | | math_extern cosh |
2143 | | math_extern tanh | 2146 | | math_extern tanh |
2144 | | math_extern2 pow | 2147 | | math_extern2 pow, lj_vm_pow |
2145 | | math_extern2 atan2 | 2148 | | math_extern2 atan2 |
2146 | | math_extern2 fmod | 2149 | | math_extern2 fmod |
2147 | | | 2150 | | |
@@ -4139,7 +4142,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4139 | | checknum cr1, CARG3 | 4142 | | checknum cr1, CARG3 |
4140 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | 4143 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt |
4141 | | bge ->vmeta_arith_vv | 4144 | | bge ->vmeta_arith_vv |
4142 | | blex pow | 4145 | | blex lj_vm_pow |
4143 | | ins_next1 | 4146 | | ins_next1 |
4144 | |.if FPU | 4147 | |.if FPU |
4145 | | stfdx FARG1, BASE, RA | 4148 | | stfdx FARG1, BASE, RA |
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index b222190a..4aa8589c 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc | |||
@@ -1755,13 +1755,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
1755 | | jmp ->fff_resxmm0 | 1755 | | jmp ->fff_resxmm0 |
1756 | |.endmacro | 1756 | |.endmacro |
1757 | | | 1757 | | |
1758 | |.macro math_extern2, func | 1758 | |.macro math_extern2, name, func |
1759 | | .ffunc_nn math_ .. func | 1759 | | .ffunc_nn math_ .. name |
1760 | | mov RB, BASE | 1760 | | mov RB, BASE |
1761 | | call extern func | 1761 | | call extern func |
1762 | | mov BASE, RB | 1762 | | mov BASE, RB |
1763 | | jmp ->fff_resxmm0 | 1763 | | jmp ->fff_resxmm0 |
1764 | |.endmacro | 1764 | |.endmacro |
1765 | |.macro math_extern2, func | ||
1766 | | math_extern2 func, func | ||
1767 | |.endmacro | ||
1765 | | | 1768 | | |
1766 | | math_extern log10 | 1769 | | math_extern log10 |
1767 | | math_extern exp | 1770 | | math_extern exp |
@@ -1774,7 +1777,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1774 | | math_extern sinh | 1777 | | math_extern sinh |
1775 | | math_extern cosh | 1778 | | math_extern cosh |
1776 | | math_extern tanh | 1779 | | math_extern tanh |
1777 | | math_extern2 pow | 1780 | | math_extern2 pow, lj_vm_pow |
1778 | | math_extern2 atan2 | 1781 | | math_extern2 atan2 |
1779 | | math_extern2 fmod | 1782 | | math_extern2 fmod |
1780 | | | 1783 | | |
@@ -2579,41 +2582,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
2579 | | subsd xmm0, xmm1 | 2582 | | subsd xmm0, xmm1 |
2580 | | ret | 2583 | | ret |
2581 | | | 2584 | | |
2582 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. | ||
2583 | |->vm_powi_sse: | ||
2584 | | cmp eax, 1; jle >6 // i<=1? | ||
2585 | | // Now 1 < (unsigned)i <= 0x80000000. | ||
2586 | |1: // Handle leading zeros. | ||
2587 | | test eax, 1; jnz >2 | ||
2588 | | mulsd xmm0, xmm0 | ||
2589 | | shr eax, 1 | ||
2590 | | jmp <1 | ||
2591 | |2: | ||
2592 | | shr eax, 1; jz >5 | ||
2593 | | movaps xmm1, xmm0 | ||
2594 | |3: // Handle trailing bits. | ||
2595 | | mulsd xmm0, xmm0 | ||
2596 | | shr eax, 1; jz >4 | ||
2597 | | jnc <3 | ||
2598 | | mulsd xmm1, xmm0 | ||
2599 | | jmp <3 | ||
2600 | |4: | ||
2601 | | mulsd xmm0, xmm1 | ||
2602 | |5: | ||
2603 | | ret | ||
2604 | |6: | ||
2605 | | je <5 // x^1 ==> x | ||
2606 | | jb >7 // x^0 ==> 1 | ||
2607 | | neg eax | ||
2608 | | call <1 | ||
2609 | | sseconst_1 xmm1, RD | ||
2610 | | divsd xmm1, xmm0 | ||
2611 | | movaps xmm0, xmm1 | ||
2612 | | ret | ||
2613 | |7: | ||
2614 | | sseconst_1 xmm0, RD | ||
2615 | | ret | ||
2616 | | | ||
2617 | |//----------------------------------------------------------------------- | 2585 | |//----------------------------------------------------------------------- |
2618 | |//-- Miscellaneous functions -------------------------------------------- | 2586 | |//-- Miscellaneous functions -------------------------------------------- |
2619 | |//----------------------------------------------------------------------- | 2587 | |//----------------------------------------------------------------------- |
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index eb56840a..36af852d 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc | |||
@@ -2138,8 +2138,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
2138 | | jmp ->fff_resfp | 2138 | | jmp ->fff_resfp |
2139 | |.endmacro | 2139 | |.endmacro |
2140 | | | 2140 | | |
2141 | |.macro math_extern2, func | 2141 | |.macro math_extern2, name, func |
2142 | | .ffunc_nnsse math_ .. func | 2142 | | .ffunc_nnsse math_ .. name |
2143 | |.if not X64 | 2143 | |.if not X64 |
2144 | | movsd FPARG1, xmm0 | 2144 | | movsd FPARG1, xmm0 |
2145 | | movsd FPARG3, xmm1 | 2145 | | movsd FPARG3, xmm1 |
@@ -2149,6 +2149,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
2149 | | mov BASE, RB | 2149 | | mov BASE, RB |
2150 | | jmp ->fff_resfp | 2150 | | jmp ->fff_resfp |
2151 | |.endmacro | 2151 | |.endmacro |
2152 | |.macro math_extern2, func | ||
2153 | | math_extern2 func, func | ||
2154 | |.endmacro | ||
2152 | | | 2155 | | |
2153 | | math_extern log10 | 2156 | | math_extern log10 |
2154 | | math_extern exp | 2157 | | math_extern exp |
@@ -2161,7 +2164,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2161 | | math_extern sinh | 2164 | | math_extern sinh |
2162 | | math_extern cosh | 2165 | | math_extern cosh |
2163 | | math_extern tanh | 2166 | | math_extern tanh |
2164 | | math_extern2 pow | 2167 | | math_extern2 pow, lj_vm_pow |
2165 | | math_extern2 atan2 | 2168 | | math_extern2 atan2 |
2166 | | math_extern2 fmod | 2169 | | math_extern2 fmod |
2167 | | | 2170 | | |
@@ -3038,41 +3041,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
3038 | | subsd xmm0, xmm1 | 3041 | | subsd xmm0, xmm1 |
3039 | | ret | 3042 | | ret |
3040 | | | 3043 | | |
3041 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. | ||
3042 | |->vm_powi_sse: | ||
3043 | | cmp eax, 1; jle >6 // i<=1? | ||
3044 | | // Now 1 < (unsigned)i <= 0x80000000. | ||
3045 | |1: // Handle leading zeros. | ||
3046 | | test eax, 1; jnz >2 | ||
3047 | | mulsd xmm0, xmm0 | ||
3048 | | shr eax, 1 | ||
3049 | | jmp <1 | ||
3050 | |2: | ||
3051 | | shr eax, 1; jz >5 | ||
3052 | | movaps xmm1, xmm0 | ||
3053 | |3: // Handle trailing bits. | ||
3054 | | mulsd xmm0, xmm0 | ||
3055 | | shr eax, 1; jz >4 | ||
3056 | | jnc <3 | ||
3057 | | mulsd xmm1, xmm0 | ||
3058 | | jmp <3 | ||
3059 | |4: | ||
3060 | | mulsd xmm0, xmm1 | ||
3061 | |5: | ||
3062 | | ret | ||
3063 | |6: | ||
3064 | | je <5 // x^1 ==> x | ||
3065 | | jb >7 // x^0 ==> 1 | ||
3066 | | neg eax | ||
3067 | | call <1 | ||
3068 | | sseconst_1 xmm1, RDa | ||
3069 | | divsd xmm1, xmm0 | ||
3070 | | movaps xmm0, xmm1 | ||
3071 | | ret | ||
3072 | |7: | ||
3073 | | sseconst_1 xmm0, RDa | ||
3074 | | ret | ||
3075 | | | ||
3076 | |//----------------------------------------------------------------------- | 3044 | |//----------------------------------------------------------------------- |
3077 | |//-- Miscellaneous functions -------------------------------------------- | 3045 | |//-- Miscellaneous functions -------------------------------------------- |
3078 | |//----------------------------------------------------------------------- | 3046 | |//----------------------------------------------------------------------- |
@@ -3954,7 +3922,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3954 | | movsd FPARG1, xmm0 | 3922 | | movsd FPARG1, xmm0 |
3955 | | movsd FPARG3, xmm1 | 3923 | | movsd FPARG3, xmm1 |
3956 | |.endif | 3924 | |.endif |
3957 | | call extern pow | 3925 | | call extern lj_vm_pow |
3958 | | movzx RA, PC_RA | 3926 | | movzx RA, PC_RA |
3959 | | mov BASE, RB | 3927 | | mov BASE, RB |
3960 | |.if X64 | 3928 | |.if X64 |