aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/lj_asm.c7
-rw-r--r--src/lj_asm_x86.h13
-rw-r--r--src/lj_dispatch.h2
-rw-r--r--src/lj_ircall.h2
-rw-r--r--src/lj_opt_fold.c27
-rw-r--r--src/lj_opt_narrow.c12
-rw-r--r--src/lj_vm.h7
-rw-r--r--src/lj_vmmath.c82
-rw-r--r--src/vm_arm.dasc13
-rw-r--r--src/vm_arm64.dasc11
-rw-r--r--src/vm_mips.dasc11
-rw-r--r--src/vm_mips64.dasc11
-rw-r--r--src/vm_ppc.dasc11
-rw-r--r--src/vm_x64.dasc44
-rw-r--r--src/vm_x86.dasc46
15 files changed, 104 insertions, 195 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 7abafbf4..eaee5547 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1670,7 +1670,6 @@ static void asm_loop(ASMState *as)
1670#if !LJ_SOFTFP32 1670#if !LJ_SOFTFP32
1671#if !LJ_TARGET_X86ORX64 1671#if !LJ_TARGET_X86ORX64
1672#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) 1672#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1673#define asm_fppowi(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1674#endif 1673#endif
1675 1674
1676static void asm_pow(ASMState *as, IRIns *ir) 1675static void asm_pow(ASMState *as, IRIns *ir)
@@ -1681,10 +1680,8 @@ static void asm_pow(ASMState *as, IRIns *ir)
1681 IRCALL_lj_carith_powu64); 1680 IRCALL_lj_carith_powu64);
1682 else 1681 else
1683#endif 1682#endif
1684 if (irt_isnum(IR(ir->op2)->t)) 1683 asm_callid(as, ir, irt_isnum(IR(ir->op2)->t) ? IRCALL_lj_vm_pow :
1685 asm_callid(as, ir, IRCALL_pow); 1684 IRCALL_lj_vm_powi);
1686 else
1687 asm_fppowi(as, ir);
1688} 1685}
1689 1686
1690static void asm_div(ASMState *as, IRIns *ir) 1687static void asm_div(ASMState *as, IRIns *ir)
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 1ef7c38f..38069e1d 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -2017,19 +2017,6 @@ static void asm_ldexp(ASMState *as, IRIns *ir)
2017 asm_x87load(as, ir->op2); 2017 asm_x87load(as, ir->op2);
2018} 2018}
2019 2019
2020static void asm_fppowi(ASMState *as, IRIns *ir)
2021{
2022 /* The modified regs must match with the *.dasc implementation. */
2023 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX);
2024 if (ra_hasreg(ir->r))
2025 rset_clear(drop, ir->r); /* Dest reg handled below. */
2026 ra_evictset(as, drop);
2027 ra_destreg(as, ir, RID_XMM0);
2028 emit_call(as, lj_vm_powi_sse);
2029 ra_left(as, RID_XMM0, ir->op1);
2030 ra_left(as, RID_EAX, ir->op2);
2031}
2032
2033static int asm_swapops(ASMState *as, IRIns *ir) 2020static int asm_swapops(ASMState *as, IRIns *ir)
2034{ 2021{
2035 IRIns *irl = IR(ir->op1); 2022 IRIns *irl = IR(ir->op1);
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
index 0594af51..d0f86fab 100644
--- a/src/lj_dispatch.h
+++ b/src/lj_dispatch.h
@@ -44,7 +44,7 @@ extern double __divdf3(double a, double b);
44#define GOTDEF(_) \ 44#define GOTDEF(_) \
45 _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ 45 _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
46 _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ 46 _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
47 _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \ 47 _(lj_vm_pow) _(fmod) _(ldexp) _(lj_vm_modi) \
48 _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \ 48 _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
49 _(lj_dispatch_profile) _(lj_err_throw) \ 49 _(lj_dispatch_profile) _(lj_err_throw) \
50 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ 50 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 7fc3d1fd..c4d4a7b8 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -218,7 +218,7 @@ typedef struct CCallInfo {
218 _(ANY, log, 1, N, NUM, XA_FP) \ 218 _(ANY, log, 1, N, NUM, XA_FP) \
219 _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \ 219 _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \
220 _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \ 220 _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \
221 _(ANY, pow, 2, N, NUM, XA2_FP) \ 221 _(ANY, lj_vm_pow, 2, N, NUM, XA2_FP) \
222 _(ANY, atan2, 2, N, NUM, XA2_FP) \ 222 _(ANY, atan2, 2, N, NUM, XA2_FP) \
223 _(ANY, ldexp, 2, N, NUM, XA_FP) \ 223 _(ANY, ldexp, 2, N, NUM, XA_FP) \
224 _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \ 224 _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 8200b240..34f70e27 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -1143,33 +1143,6 @@ LJFOLDF(simplify_numpow_xkint)
1143 return ref; 1143 return ref;
1144} 1144}
1145 1145
1146LJFOLD(POW any KNUM)
1147LJFOLDF(simplify_numpow_xknum)
1148{
1149 if (knumright == 0.5) /* x ^ 0.5 ==> sqrt(x) */
1150 return emitir(IRTN(IR_FPMATH), fins->op1, IRFPM_SQRT);
1151 return NEXTFOLD;
1152}
1153
1154LJFOLD(POW KNUM any)
1155LJFOLDF(simplify_numpow_kx)
1156{
1157 lua_Number n = knumleft;
1158 if (n == 2.0 && irt_isint(fright->t)) { /* 2.0 ^ i ==> ldexp(1.0, i) */
1159#if LJ_TARGET_X86ORX64
1160 /* Different IR_LDEXP calling convention on x86/x64 requires conversion. */
1161 fins->o = IR_CONV;
1162 fins->op1 = fins->op2;
1163 fins->op2 = IRCONV_NUM_INT;
1164 fins->op2 = (IRRef1)lj_opt_fold(J);
1165#endif
1166 fins->op1 = (IRRef1)lj_ir_knum_one(J);
1167 fins->o = IR_LDEXP;
1168 return RETRYFOLD;
1169 }
1170 return NEXTFOLD;
1171}
1172
1173/* -- Simplify conversions ------------------------------------------------ */ 1146/* -- Simplify conversions ------------------------------------------------ */
1174 1147
1175LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */ 1148LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index 57b19613..fe92468e 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -590,20 +590,14 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
590 rb = conv_str_tonum(J, rb, vb); 590 rb = conv_str_tonum(J, rb, vb);
591 rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */ 591 rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */
592 rc = conv_str_tonum(J, rc, vc); 592 rc = conv_str_tonum(J, rc, vc);
593 /* Narrowing must be unconditional to preserve (-x)^i semantics. */
594 if (tvisint(vc) || numisint(numV(vc))) { 593 if (tvisint(vc) || numisint(numV(vc))) {
595 int checkrange = 0; 594 int32_t k = numberVint(vc);
596 /* pow() is faster for bigger exponents. But do this only for (+k)^i. */ 595 if (!(k >= -65536 && k <= 65536)) goto force_pow_num;
597 if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) {
598 int32_t k = numberVint(vc);
599 if (!(k >= -65536 && k <= 65536)) goto force_pow_num;
600 checkrange = 1;
601 }
602 if (!tref_isinteger(rc)) { 596 if (!tref_isinteger(rc)) {
603 /* Guarded conversion to integer! */ 597 /* Guarded conversion to integer! */
604 rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK); 598 rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK);
605 } 599 }
606 if (checkrange && !tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */ 600 if (!tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */
607 TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536)); 601 TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536));
608 emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536)); 602 emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536));
609 } 603 }
diff --git a/src/lj_vm.h b/src/lj_vm.h
index 7713d16b..bfa7e0fd 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -83,10 +83,6 @@ LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
83LJ_ASMF void lj_vm_floor_sse(void); 83LJ_ASMF void lj_vm_floor_sse(void);
84LJ_ASMF void lj_vm_ceil_sse(void); 84LJ_ASMF void lj_vm_ceil_sse(void);
85LJ_ASMF void lj_vm_trunc_sse(void); 85LJ_ASMF void lj_vm_trunc_sse(void);
86LJ_ASMF void lj_vm_powi_sse(void);
87#define lj_vm_powi NULL
88#else
89LJ_ASMF double lj_vm_powi(double, int32_t);
90#endif 86#endif
91#if LJ_TARGET_PPC || LJ_TARGET_ARM64 87#if LJ_TARGET_PPC || LJ_TARGET_ARM64
92#define lj_vm_trunc trunc 88#define lj_vm_trunc trunc
@@ -102,6 +98,9 @@ LJ_ASMF int lj_vm_errno(void);
102LJ_ASMF TValue *lj_vm_next(GCtab *t, uint32_t idx); 98LJ_ASMF TValue *lj_vm_next(GCtab *t, uint32_t idx);
103#endif 99#endif
104 100
101LJ_ASMF double lj_vm_powi(double, int32_t);
102LJ_ASMF double lj_vm_pow(double, double);
103
105/* Continuations for metamethods. */ 104/* Continuations for metamethods. */
106LJ_ASMF void lj_cont_cat(void); /* Continue with concatenation. */ 105LJ_ASMF void lj_cont_cat(void); /* Continue with concatenation. */
107LJ_ASMF void lj_cont_ra(void); /* Store result in RA from instruction. */ 106LJ_ASMF void lj_cont_ra(void); /* Store result in RA from instruction. */
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
index 536199d8..fa0de922 100644
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -30,11 +30,51 @@ LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); }
30LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); } 30LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); }
31LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); } 31LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); }
32LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); } 32LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); }
33LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); }
34LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); } 33LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); }
35#endif 34#endif
36 35
37/* -- Helper functions for generated machine code ------------------------- */ 36/* -- Helper functions ---------------------------------------------------- */
37
38/* Unsigned x^k. */
39static double lj_vm_powui(double x, uint32_t k)
40{
41 double y;
42 lj_assertX(k != 0, "pow with zero exponent");
43 for (; (k & 1) == 0; k >>= 1) x *= x;
44 y = x;
45 if ((k >>= 1) != 0) {
46 for (;;) {
47 x *= x;
48 if (k == 1) break;
49 if (k & 1) y *= x;
50 k >>= 1;
51 }
52 y *= x;
53 }
54 return y;
55}
56
57/* Signed x^k. */
58double lj_vm_powi(double x, int32_t k)
59{
60 if (k > 1)
61 return lj_vm_powui(x, (uint32_t)k);
62 else if (k == 1)
63 return x;
64 else if (k == 0)
65 return 1.0;
66 else
67 return 1.0 / lj_vm_powui(x, (uint32_t)-k);
68}
69
70double lj_vm_pow(double x, double y)
71{
72 int32_t k = lj_num2int(y);
73 if ((k >= -65536 && k <= 65536) && y == (double)k)
74 return lj_vm_powi(x, k);
75 else
76 return pow(x, y);
77}
38 78
39double lj_vm_foldarith(double x, double y, int op) 79double lj_vm_foldarith(double x, double y, int op)
40{ 80{
@@ -44,7 +84,7 @@ double lj_vm_foldarith(double x, double y, int op)
44 case IR_MUL - IR_ADD: return x*y; break; 84 case IR_MUL - IR_ADD: return x*y; break;
45 case IR_DIV - IR_ADD: return x/y; break; 85 case IR_DIV - IR_ADD: return x/y; break;
46 case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break; 86 case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break;
47 case IR_POW - IR_ADD: return pow(x, y); break; 87 case IR_POW - IR_ADD: return lj_vm_pow(x, y); break;
48 case IR_NEG - IR_ADD: return -x; break; 88 case IR_NEG - IR_ADD: return -x; break;
49 case IR_ABS - IR_ADD: return fabs(x); break; 89 case IR_ABS - IR_ADD: return fabs(x); break;
50#if LJ_HASJIT 90#if LJ_HASJIT
@@ -56,6 +96,8 @@ double lj_vm_foldarith(double x, double y, int op)
56 } 96 }
57} 97}
58 98
99/* -- Helper functions for generated machine code ------------------------- */
100
59#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS 101#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS
60int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) 102int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
61{ 103{
@@ -80,40 +122,6 @@ double lj_vm_log2(double a)
80} 122}
81#endif 123#endif
82 124
83#if !LJ_TARGET_X86ORX64
84/* Unsigned x^k. */
85static double lj_vm_powui(double x, uint32_t k)
86{
87 double y;
88 lj_assertX(k != 0, "pow with zero exponent");
89 for (; (k & 1) == 0; k >>= 1) x *= x;
90 y = x;
91 if ((k >>= 1) != 0) {
92 for (;;) {
93 x *= x;
94 if (k == 1) break;
95 if (k & 1) y *= x;
96 k >>= 1;
97 }
98 y *= x;
99 }
100 return y;
101}
102
103/* Signed x^k. */
104double lj_vm_powi(double x, int32_t k)
105{
106 if (k > 1)
107 return lj_vm_powui(x, (uint32_t)k);
108 else if (k == 1)
109 return x;
110 else if (k == 0)
111 return 1.0;
112 else
113 return 1.0 / lj_vm_powui(x, (uint32_t)-k);
114}
115#endif
116
117/* Computes fpm(x) for extended math functions. */ 125/* Computes fpm(x) for extended math functions. */
118double lj_vm_foldfpm(double x, int fpm) 126double lj_vm_foldfpm(double x, int fpm)
119{ 127{
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 770c1602..636619fd 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -1477,11 +1477,11 @@ static void build_subroutines(BuildCtx *ctx)
1477 |.endif 1477 |.endif
1478 |.endmacro 1478 |.endmacro
1479 | 1479 |
1480 |.macro math_extern2, func 1480 |.macro math_extern2, name, func
1481 |.if HFABI 1481 |.if HFABI
1482 | .ffunc_dd math_ .. func 1482 | .ffunc_dd math_ .. name
1483 |.else 1483 |.else
1484 | .ffunc_nn math_ .. func 1484 | .ffunc_nn math_ .. name
1485 |.endif 1485 |.endif
1486 | .IOS mov RA, BASE 1486 | .IOS mov RA, BASE
1487 | bl extern func 1487 | bl extern func
@@ -1492,6 +1492,9 @@ static void build_subroutines(BuildCtx *ctx)
1492 | b ->fff_restv 1492 | b ->fff_restv
1493 |.endif 1493 |.endif
1494 |.endmacro 1494 |.endmacro
1495 |.macro math_extern2, func
1496 | math_extern2 func, func
1497 |.endmacro
1495 | 1498 |
1496 |.if FPU 1499 |.if FPU
1497 | .ffunc_d math_sqrt 1500 | .ffunc_d math_sqrt
@@ -1537,7 +1540,7 @@ static void build_subroutines(BuildCtx *ctx)
1537 | math_extern sinh 1540 | math_extern sinh
1538 | math_extern cosh 1541 | math_extern cosh
1539 | math_extern tanh 1542 | math_extern tanh
1540 | math_extern2 pow 1543 | math_extern2 pow, lj_vm_pow
1541 | math_extern2 atan2 1544 | math_extern2 atan2
1542 | math_extern2 fmod 1545 | math_extern2 fmod
1543 | 1546 |
@@ -3203,7 +3206,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3203 break; 3206 break;
3204 case BC_POW: 3207 case BC_POW:
3205 | // NYI: (partial) integer arithmetic. 3208 | // NYI: (partial) integer arithmetic.
3206 | ins_arithfp extern, extern pow 3209 | ins_arithfp extern, extern lj_vm_pow
3207 break; 3210 break;
3208 3211
3209 case BC_CAT: 3212 case BC_CAT:
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index f5f1b5f1..7ef9ffba 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -1387,11 +1387,14 @@ static void build_subroutines(BuildCtx *ctx)
1387 | b ->fff_resn 1387 | b ->fff_resn
1388 |.endmacro 1388 |.endmacro
1389 | 1389 |
1390 |.macro math_extern2, func 1390 |.macro math_extern2, name, func
1391 | .ffunc_nn math_ .. func 1391 | .ffunc_nn math_ .. name
1392 | bl extern func 1392 | bl extern func
1393 | b ->fff_resn 1393 | b ->fff_resn
1394 |.endmacro 1394 |.endmacro
1395 |.macro math_extern2, func
1396 | math_extern2 func, func
1397 |.endmacro
1395 | 1398 |
1396 |.ffunc_n math_sqrt 1399 |.ffunc_n math_sqrt
1397 | fsqrt d0, d0 1400 | fsqrt d0, d0
@@ -1420,7 +1423,7 @@ static void build_subroutines(BuildCtx *ctx)
1420 | math_extern sinh 1423 | math_extern sinh
1421 | math_extern cosh 1424 | math_extern cosh
1422 | math_extern tanh 1425 | math_extern tanh
1423 | math_extern2 pow 1426 | math_extern2 pow, lj_vm_pow
1424 | math_extern2 atan2 1427 | math_extern2 atan2
1425 | math_extern2 fmod 1428 | math_extern2 fmod
1426 | 1429 |
@@ -2674,7 +2677,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2674 | ins_arithload FARG1, FARG2 2677 | ins_arithload FARG1, FARG2
2675 | ins_arithfallback ins_arithcheck_num 2678 | ins_arithfallback ins_arithcheck_num
2676 |.if "fpins" == "fpow" 2679 |.if "fpins" == "fpow"
2677 | bl extern pow 2680 | bl extern lj_vm_pow
2678 |.else 2681 |.else
2679 | fpins FARG1, FARG1, FARG2 2682 | fpins FARG1, FARG1, FARG2
2680 |.endif 2683 |.endif
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 34645bf1..cf791f74 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -1623,14 +1623,17 @@ static void build_subroutines(BuildCtx *ctx)
1623 |. nop 1623 |. nop
1624 |.endmacro 1624 |.endmacro
1625 | 1625 |
1626 |.macro math_extern2, func 1626 |.macro math_extern2, name, func
1627 | .ffunc_nn math_ .. func 1627 | .ffunc_nn math_ .. name
1628 |. load_got func 1628 |. load_got func
1629 | call_extern 1629 | call_extern
1630 |. nop 1630 |. nop
1631 | b ->fff_resn 1631 | b ->fff_resn
1632 |. nop 1632 |. nop
1633 |.endmacro 1633 |.endmacro
1634 |.macro math_extern2, func
1635 | math_extern2 func, func
1636 |.endmacro
1634 | 1637 |
1635 |// TODO: Return integer type if result is integer (own sf implementation). 1638 |// TODO: Return integer type if result is integer (own sf implementation).
1636 |.macro math_round, func 1639 |.macro math_round, func
@@ -1684,7 +1687,7 @@ static void build_subroutines(BuildCtx *ctx)
1684 | math_extern sinh 1687 | math_extern sinh
1685 | math_extern cosh 1688 | math_extern cosh
1686 | math_extern tanh 1689 | math_extern tanh
1687 | math_extern2 pow 1690 | math_extern2 pow, lj_vm_pow
1688 | math_extern2 atan2 1691 | math_extern2 atan2
1689 | math_extern2 fmod 1692 | math_extern2 fmod
1690 | 1693 |
@@ -3689,7 +3692,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3689 | sltiu AT, SFARG1HI, LJ_TISNUM 3692 | sltiu AT, SFARG1HI, LJ_TISNUM
3690 | sltiu TMP0, SFARG2HI, LJ_TISNUM 3693 | sltiu TMP0, SFARG2HI, LJ_TISNUM
3691 | and AT, AT, TMP0 3694 | and AT, AT, TMP0
3692 | load_got pow 3695 | load_got lj_vm_pow
3693 | beqz AT, ->vmeta_arith 3696 | beqz AT, ->vmeta_arith
3694 |. addu RA, BASE, RA 3697 |. addu RA, BASE, RA
3695 |.if FPU 3698 |.if FPU
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
index 651bc42e..3b916379 100644
--- a/src/vm_mips64.dasc
+++ b/src/vm_mips64.dasc
@@ -1667,14 +1667,17 @@ static void build_subroutines(BuildCtx *ctx)
1667 |. nop 1667 |. nop
1668 |.endmacro 1668 |.endmacro
1669 | 1669 |
1670 |.macro math_extern2, func 1670 |.macro math_extern2, name, func
1671 | .ffunc_nn math_ .. func 1671 | .ffunc_nn math_ .. name
1672 |. load_got func 1672 |. load_got func
1673 | call_extern 1673 | call_extern
1674 |. nop 1674 |. nop
1675 | b ->fff_resn 1675 | b ->fff_resn
1676 |. nop 1676 |. nop
1677 |.endmacro 1677 |.endmacro
1678 |.macro math_extern2, func
1679 | math_extern2 func, func
1680 |.endmacro
1678 | 1681 |
1679 |// TODO: Return integer type if result is integer (own sf implementation). 1682 |// TODO: Return integer type if result is integer (own sf implementation).
1680 |.macro math_round, func 1683 |.macro math_round, func
@@ -1728,7 +1731,7 @@ static void build_subroutines(BuildCtx *ctx)
1728 | math_extern sinh 1731 | math_extern sinh
1729 | math_extern cosh 1732 | math_extern cosh
1730 | math_extern tanh 1733 | math_extern tanh
1731 | math_extern2 pow 1734 | math_extern2 pow, lj_vm_pow
1732 | math_extern2 atan2 1735 | math_extern2 atan2
1733 | math_extern2 fmod 1736 | math_extern2 fmod
1734 | 1737 |
@@ -3915,7 +3918,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3915 | sltiu TMP0, TMP0, LJ_TISNUM 3918 | sltiu TMP0, TMP0, LJ_TISNUM
3916 | sltiu TMP1, TMP1, LJ_TISNUM 3919 | sltiu TMP1, TMP1, LJ_TISNUM
3917 | and AT, TMP0, TMP1 3920 | and AT, TMP0, TMP1
3918 | load_got pow 3921 | load_got lj_vm_pow
3919 | beqz AT, ->vmeta_arith 3922 | beqz AT, ->vmeta_arith
3920 |. daddu RA, BASE, RA 3923 |. daddu RA, BASE, RA
3921 |.if FPU 3924 |.if FPU
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index 3cad37d2..cc4d56d7 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -2012,11 +2012,14 @@ static void build_subroutines(BuildCtx *ctx)
2012 | b ->fff_resn 2012 | b ->fff_resn
2013 |.endmacro 2013 |.endmacro
2014 | 2014 |
2015 |.macro math_extern2, func 2015 |.macro math_extern2, name, func
2016 | .ffunc_nn math_ .. func 2016 | .ffunc_nn math_ .. name
2017 | blex func 2017 | blex func
2018 | b ->fff_resn 2018 | b ->fff_resn
2019 |.endmacro 2019 |.endmacro
2020 |.macro math_extern2, func
2021 | math_extern2 func, func
2022 |.endmacro
2020 | 2023 |
2021 |.macro math_round, func 2024 |.macro math_round, func
2022 | .ffunc_1 math_ .. func 2025 | .ffunc_1 math_ .. func
@@ -2141,7 +2144,7 @@ static void build_subroutines(BuildCtx *ctx)
2141 | math_extern sinh 2144 | math_extern sinh
2142 | math_extern cosh 2145 | math_extern cosh
2143 | math_extern tanh 2146 | math_extern tanh
2144 | math_extern2 pow 2147 | math_extern2 pow, lj_vm_pow
2145 | math_extern2 atan2 2148 | math_extern2 atan2
2146 | math_extern2 fmod 2149 | math_extern2 fmod
2147 | 2150 |
@@ -4139,7 +4142,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4139 | checknum cr1, CARG3 4142 | checknum cr1, CARG3
4140 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 4143 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
4141 | bge ->vmeta_arith_vv 4144 | bge ->vmeta_arith_vv
4142 | blex pow 4145 | blex lj_vm_pow
4143 | ins_next1 4146 | ins_next1
4144 |.if FPU 4147 |.if FPU
4145 | stfdx FARG1, BASE, RA 4148 | stfdx FARG1, BASE, RA
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index b222190a..4aa8589c 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -1755,13 +1755,16 @@ static void build_subroutines(BuildCtx *ctx)
1755 | jmp ->fff_resxmm0 1755 | jmp ->fff_resxmm0
1756 |.endmacro 1756 |.endmacro
1757 | 1757 |
1758 |.macro math_extern2, func 1758 |.macro math_extern2, name, func
1759 | .ffunc_nn math_ .. func 1759 | .ffunc_nn math_ .. name
1760 | mov RB, BASE 1760 | mov RB, BASE
1761 | call extern func 1761 | call extern func
1762 | mov BASE, RB 1762 | mov BASE, RB
1763 | jmp ->fff_resxmm0 1763 | jmp ->fff_resxmm0
1764 |.endmacro 1764 |.endmacro
1765 |.macro math_extern2, func
1766 | math_extern2 func, func
1767 |.endmacro
1765 | 1768 |
1766 | math_extern log10 1769 | math_extern log10
1767 | math_extern exp 1770 | math_extern exp
@@ -1774,7 +1777,7 @@ static void build_subroutines(BuildCtx *ctx)
1774 | math_extern sinh 1777 | math_extern sinh
1775 | math_extern cosh 1778 | math_extern cosh
1776 | math_extern tanh 1779 | math_extern tanh
1777 | math_extern2 pow 1780 | math_extern2 pow, lj_vm_pow
1778 | math_extern2 atan2 1781 | math_extern2 atan2
1779 | math_extern2 fmod 1782 | math_extern2 fmod
1780 | 1783 |
@@ -2579,41 +2582,6 @@ static void build_subroutines(BuildCtx *ctx)
2579 | subsd xmm0, xmm1 2582 | subsd xmm0, xmm1
2580 | ret 2583 | ret
2581 | 2584 |
2582 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
2583 |->vm_powi_sse:
2584 | cmp eax, 1; jle >6 // i<=1?
2585 | // Now 1 < (unsigned)i <= 0x80000000.
2586 |1: // Handle leading zeros.
2587 | test eax, 1; jnz >2
2588 | mulsd xmm0, xmm0
2589 | shr eax, 1
2590 | jmp <1
2591 |2:
2592 | shr eax, 1; jz >5
2593 | movaps xmm1, xmm0
2594 |3: // Handle trailing bits.
2595 | mulsd xmm0, xmm0
2596 | shr eax, 1; jz >4
2597 | jnc <3
2598 | mulsd xmm1, xmm0
2599 | jmp <3
2600 |4:
2601 | mulsd xmm0, xmm1
2602 |5:
2603 | ret
2604 |6:
2605 | je <5 // x^1 ==> x
2606 | jb >7 // x^0 ==> 1
2607 | neg eax
2608 | call <1
2609 | sseconst_1 xmm1, RD
2610 | divsd xmm1, xmm0
2611 | movaps xmm0, xmm1
2612 | ret
2613 |7:
2614 | sseconst_1 xmm0, RD
2615 | ret
2616 |
2617 |//----------------------------------------------------------------------- 2585 |//-----------------------------------------------------------------------
2618 |//-- Miscellaneous functions -------------------------------------------- 2586 |//-- Miscellaneous functions --------------------------------------------
2619 |//----------------------------------------------------------------------- 2587 |//-----------------------------------------------------------------------
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index eb56840a..36af852d 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -2138,8 +2138,8 @@ static void build_subroutines(BuildCtx *ctx)
2138 | jmp ->fff_resfp 2138 | jmp ->fff_resfp
2139 |.endmacro 2139 |.endmacro
2140 | 2140 |
2141 |.macro math_extern2, func 2141 |.macro math_extern2, name, func
2142 | .ffunc_nnsse math_ .. func 2142 | .ffunc_nnsse math_ .. name
2143 |.if not X64 2143 |.if not X64
2144 | movsd FPARG1, xmm0 2144 | movsd FPARG1, xmm0
2145 | movsd FPARG3, xmm1 2145 | movsd FPARG3, xmm1
@@ -2149,6 +2149,9 @@ static void build_subroutines(BuildCtx *ctx)
2149 | mov BASE, RB 2149 | mov BASE, RB
2150 | jmp ->fff_resfp 2150 | jmp ->fff_resfp
2151 |.endmacro 2151 |.endmacro
2152 |.macro math_extern2, func
2153 | math_extern2 func, func
2154 |.endmacro
2152 | 2155 |
2153 | math_extern log10 2156 | math_extern log10
2154 | math_extern exp 2157 | math_extern exp
@@ -2161,7 +2164,7 @@ static void build_subroutines(BuildCtx *ctx)
2161 | math_extern sinh 2164 | math_extern sinh
2162 | math_extern cosh 2165 | math_extern cosh
2163 | math_extern tanh 2166 | math_extern tanh
2164 | math_extern2 pow 2167 | math_extern2 pow, lj_vm_pow
2165 | math_extern2 atan2 2168 | math_extern2 atan2
2166 | math_extern2 fmod 2169 | math_extern2 fmod
2167 | 2170 |
@@ -3038,41 +3041,6 @@ static void build_subroutines(BuildCtx *ctx)
3038 | subsd xmm0, xmm1 3041 | subsd xmm0, xmm1
3039 | ret 3042 | ret
3040 | 3043 |
3041 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
3042 |->vm_powi_sse:
3043 | cmp eax, 1; jle >6 // i<=1?
3044 | // Now 1 < (unsigned)i <= 0x80000000.
3045 |1: // Handle leading zeros.
3046 | test eax, 1; jnz >2
3047 | mulsd xmm0, xmm0
3048 | shr eax, 1
3049 | jmp <1
3050 |2:
3051 | shr eax, 1; jz >5
3052 | movaps xmm1, xmm0
3053 |3: // Handle trailing bits.
3054 | mulsd xmm0, xmm0
3055 | shr eax, 1; jz >4
3056 | jnc <3
3057 | mulsd xmm1, xmm0
3058 | jmp <3
3059 |4:
3060 | mulsd xmm0, xmm1
3061 |5:
3062 | ret
3063 |6:
3064 | je <5 // x^1 ==> x
3065 | jb >7 // x^0 ==> 1
3066 | neg eax
3067 | call <1
3068 | sseconst_1 xmm1, RDa
3069 | divsd xmm1, xmm0
3070 | movaps xmm0, xmm1
3071 | ret
3072 |7:
3073 | sseconst_1 xmm0, RDa
3074 | ret
3075 |
3076 |//----------------------------------------------------------------------- 3044 |//-----------------------------------------------------------------------
3077 |//-- Miscellaneous functions -------------------------------------------- 3045 |//-- Miscellaneous functions --------------------------------------------
3078 |//----------------------------------------------------------------------- 3046 |//-----------------------------------------------------------------------
@@ -3954,7 +3922,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3954 | movsd FPARG1, xmm0 3922 | movsd FPARG1, xmm0
3955 | movsd FPARG3, xmm1 3923 | movsd FPARG3, xmm1
3956 |.endif 3924 |.endif
3957 | call extern pow 3925 | call extern lj_vm_pow
3958 | movzx RA, PC_RA 3926 | movzx RA, PC_RA
3959 | mov BASE, RB 3927 | mov BASE, RB
3960 |.if X64 3928 |.if X64