diff options
author | Mike Pall <mike> | 2014-12-08 02:02:34 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2014-12-08 02:02:34 +0100 |
commit | ad03eba715e5e0d0bd0f3c0ddef4b8f5bbb0c626 (patch) | |
tree | 3404e1b148e08f2320a9937ca4849dc794b36bad /src/vm_x86.dasc | |
parent | e03df1e3395bc719d43bd9196d0290757f992b2f (diff) | |
download | luajit-ad03eba715e5e0d0bd0f3c0ddef4b8f5bbb0c626.tar.gz luajit-ad03eba715e5e0d0bd0f3c0ddef4b8f5bbb0c626.tar.bz2 luajit-ad03eba715e5e0d0bd0f3c0ddef4b8f5bbb0c626.zip |
x86/x64: Drop internal x87 math functions. Use libm functions.
Diffstat (limited to '')
-rw-r--r-- | src/vm_x86.dasc | 425 |
1 files changed, 62 insertions, 363 deletions
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index cd43afbd..290054dc 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc | |||
@@ -373,7 +373,6 @@ | |||
373 | | fpop | 373 | | fpop |
374 | |.endmacro | 374 | |.endmacro |
375 | | | 375 | | |
376 | |.macro fdup; fld st0; .endmacro | ||
377 | |.macro fpop1; fstp st1; .endmacro | 376 | |.macro fpop1; fstp st1; .endmacro |
378 | | | 377 | | |
379 | |// Synthesize SSE FP constants. | 378 | |// Synthesize SSE FP constants. |
@@ -1329,19 +1328,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1329 | | cmp NARGS:RD, 2+1; jb ->fff_fallback | 1328 | | cmp NARGS:RD, 2+1; jb ->fff_fallback |
1330 | |.endmacro | 1329 | |.endmacro |
1331 | | | 1330 | | |
1332 | |.macro .ffunc_n, name | ||
1333 | | .ffunc_1 name | ||
1334 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
1335 | | fld qword [BASE] | ||
1336 | |.endmacro | ||
1337 | | | ||
1338 | |.macro .ffunc_n, name, op | ||
1339 | | .ffunc_1 name | ||
1340 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
1341 | | op | ||
1342 | | fld qword [BASE] | ||
1343 | |.endmacro | ||
1344 | | | ||
1345 | |.macro .ffunc_nsse, name, op | 1331 | |.macro .ffunc_nsse, name, op |
1346 | | .ffunc_1 name | 1332 | | .ffunc_1 name |
1347 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 1333 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
@@ -1352,14 +1338,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1352 | | .ffunc_nsse name, movsd | 1338 | | .ffunc_nsse name, movsd |
1353 | |.endmacro | 1339 | |.endmacro |
1354 | | | 1340 | | |
1355 | |.macro .ffunc_nn, name | ||
1356 | | .ffunc_2 name | ||
1357 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
1358 | | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback | ||
1359 | | fld qword [BASE] | ||
1360 | | fld qword [BASE+8] | ||
1361 | |.endmacro | ||
1362 | | | ||
1363 | |.macro .ffunc_nnsse, name | 1341 | |.macro .ffunc_nnsse, name |
1364 | | .ffunc_2 name | 1342 | | .ffunc_2 name |
1365 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 1343 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
@@ -2029,6 +2007,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
2029 | | mov RAa, -8 // Results start at BASE+RA = BASE-8. | 2007 | | mov RAa, -8 // Results start at BASE+RA = BASE-8. |
2030 | | jmp ->vm_return | 2008 | | jmp ->vm_return |
2031 | | | 2009 | | |
2010 | |.if X64 | ||
2011 | |.define fff_resfp, fff_resxmm0 | ||
2012 | |.else | ||
2013 | |.define fff_resfp, fff_resn | ||
2014 | |.endif | ||
2015 | | | ||
2032 | |.macro math_round, func | 2016 | |.macro math_round, func |
2033 | | .ffunc math_ .. func | 2017 | | .ffunc math_ .. func |
2034 | |.if DUALNUM | 2018 | |.if DUALNUM |
@@ -2061,22 +2045,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
2061 | |.ffunc math_log | 2045 | |.ffunc math_log |
2062 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. | 2046 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. |
2063 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 2047 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
2064 | | fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn | 2048 | | movsd xmm0, qword [BASE] |
2065 | | | 2049 | |.if not X64 |
2066 | |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn | 2050 | | movsd FPARG1, xmm0 |
2067 | |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn | 2051 | |.endif |
2068 | | | 2052 | | mov RB, BASE |
2069 | |.ffunc_n math_sin; fsin; jmp ->fff_resn | 2053 | | call extern log |
2070 | |.ffunc_n math_cos; fcos; jmp ->fff_resn | 2054 | | mov BASE, RB |
2071 | |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn | 2055 | | jmp ->fff_resfp |
2072 | | | ||
2073 | |.ffunc_n math_asin | ||
2074 | | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan | ||
2075 | | jmp ->fff_resn | ||
2076 | |.ffunc_n math_acos | ||
2077 | | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan | ||
2078 | | jmp ->fff_resn | ||
2079 | |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn | ||
2080 | | | 2056 | | |
2081 | |.macro math_extern, func | 2057 | |.macro math_extern, func |
2082 | | .ffunc_nsse math_ .. func | 2058 | | .ffunc_nsse math_ .. func |
@@ -2086,18 +2062,36 @@ static void build_subroutines(BuildCtx *ctx) | |||
2086 | | mov RB, BASE | 2062 | | mov RB, BASE |
2087 | | call extern func | 2063 | | call extern func |
2088 | | mov BASE, RB | 2064 | | mov BASE, RB |
2089 | |.if X64 | 2065 | | jmp ->fff_resfp |
2090 | | jmp ->fff_resxmm0 | 2066 | |.endmacro |
2091 | |.else | 2067 | | |
2092 | | jmp ->fff_resn | 2068 | |.macro math_extern2, func |
2069 | | .ffunc_nnsse math_ .. func | ||
2070 | |.if not X64 | ||
2071 | | movsd FPARG1, xmm0 | ||
2072 | | movsd FPARG3, xmm1 | ||
2093 | |.endif | 2073 | |.endif |
2074 | | mov RB, BASE | ||
2075 | | call extern func | ||
2076 | | mov BASE, RB | ||
2077 | | jmp ->fff_resfp | ||
2094 | |.endmacro | 2078 | |.endmacro |
2095 | | | 2079 | | |
2080 | | math_extern log10 | ||
2081 | | math_extern exp | ||
2082 | | math_extern sin | ||
2083 | | math_extern cos | ||
2084 | | math_extern tan | ||
2085 | | math_extern asin | ||
2086 | | math_extern acos | ||
2087 | | math_extern atan | ||
2096 | | math_extern sinh | 2088 | | math_extern sinh |
2097 | | math_extern cosh | 2089 | | math_extern cosh |
2098 | | math_extern tanh | 2090 | | math_extern tanh |
2091 | | math_extern2 pow | ||
2092 | | math_extern2 atan2 | ||
2093 | | math_extern2 fmod | ||
2099 | | | 2094 | | |
2100 | |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn | ||
2101 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn | 2095 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn |
2102 | | | 2096 | | |
2103 | |.ffunc_1 math_frexp | 2097 | |.ffunc_1 math_frexp |
@@ -2151,13 +2145,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
2151 | |4: | 2145 | |4: |
2152 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. | 2146 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. |
2153 | | | 2147 | | |
2154 | |.ffunc_nnr math_fmod | ||
2155 | |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1 | ||
2156 | | fpop1 | ||
2157 | | jmp ->fff_resn | ||
2158 | | | ||
2159 | |.ffunc_nnsse math_pow; call ->vm_pow_sse; jmp ->fff_resxmm0 | ||
2160 | | | ||
2161 | |.macro math_minmax, name, cmovop, sseop | 2148 | |.macro math_minmax, name, cmovop, sseop |
2162 | | .ffunc name | 2149 | | .ffunc name |
2163 | | mov RA, 2 | 2150 | | mov RA, 2 |
@@ -2899,7 +2886,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
2899 | | | 2886 | | |
2900 | |// FP value rounding. Called by math.floor/math.ceil fast functions | 2887 | |// FP value rounding. Called by math.floor/math.ceil fast functions |
2901 | |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. | 2888 | |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. |
2902 | |.macro vm_round, name, mode | 2889 | |.macro vm_round, name, mode, cond |
2890 | |->name: | ||
2891 | |.if not X64 and cond | ||
2892 | | movsd xmm0, qword [esp+4] | ||
2893 | | call ->name .. _sse | ||
2894 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. | ||
2895 | | fld qword [esp+4] | ||
2896 | | ret | ||
2897 | |.endif | ||
2898 | | | ||
2903 | |->name .. _sse: | 2899 | |->name .. _sse: |
2904 | | sseconst_abs xmm2, RDa | 2900 | | sseconst_abs xmm2, RDa |
2905 | | sseconst_2p52 xmm3, RDa | 2901 | | sseconst_2p52 xmm3, RDa |
@@ -2936,18 +2932,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
2936 | | ret | 2932 | | ret |
2937 | |.endmacro | 2933 | |.endmacro |
2938 | | | 2934 | | |
2939 | |->vm_floor: | 2935 | | vm_round vm_floor, 0, 1 |
2940 | |.if not X64 | 2936 | | vm_round vm_ceil, 1, JIT |
2941 | | movsd xmm0, qword [esp+4] | 2937 | | vm_round vm_trunc, 2, JIT |
2942 | | call ->vm_floor_sse | ||
2943 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. | ||
2944 | | fld qword [esp+4] | ||
2945 | | ret | ||
2946 | |.endif | ||
2947 | | | ||
2948 | | vm_round vm_floor, 0 | ||
2949 | | vm_round vm_ceil, 1 | ||
2950 | | vm_round vm_trunc, 2 | ||
2951 | | | 2938 | | |
2952 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. | 2939 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. |
2953 | |->vm_mod: | 2940 | |->vm_mod: |
@@ -2979,65 +2966,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
2979 | | subsd xmm0, xmm1 | 2966 | | subsd xmm0, xmm1 |
2980 | | ret | 2967 | | ret |
2981 | | | 2968 | | |
2982 | |// FP log2(x). Called by math.log(x, base). | ||
2983 | |->vm_log2: | ||
2984 | |.if X64WIN | ||
2985 | | movsd qword [rsp+8], xmm0 // Use scratch area. | ||
2986 | | fld1 | ||
2987 | | fld qword [rsp+8] | ||
2988 | | fyl2x | ||
2989 | | fstp qword [rsp+8] | ||
2990 | | movsd xmm0, qword [rsp+8] | ||
2991 | |.elif X64 | ||
2992 | | movsd qword [rsp-8], xmm0 // Use red zone. | ||
2993 | | fld1 | ||
2994 | | fld qword [rsp-8] | ||
2995 | | fyl2x | ||
2996 | | fstp qword [rsp-8] | ||
2997 | | movsd xmm0, qword [rsp-8] | ||
2998 | |.else | ||
2999 | | fld1 | ||
3000 | | fld qword [esp+4] | ||
3001 | | fyl2x | ||
3002 | |.endif | ||
3003 | | ret | ||
3004 | | | ||
3005 | |// FP exponentiation e^x and 2^x. Called by math.exp fast function and | ||
3006 | |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified. | ||
3007 | |// Caveat: needs 3 slots on x87 stack! | ||
3008 | |->vm_exp_x87: | ||
3009 | | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e)) | ||
3010 | |->vm_exp2_x87: | ||
3011 | | .if X64WIN | ||
3012 | | .define expscratch, dword [rsp+8] // Use scratch area. | ||
3013 | | .elif X64 | ||
3014 | | .define expscratch, dword [rsp-8] // Use red zone. | ||
3015 | | .else | ||
3016 | | .define expscratch, dword [esp+4] // Needs 4 byte scratch area. | ||
3017 | | .endif | ||
3018 | | fst expscratch // Caveat: overwrites ARG1. | ||
3019 | | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf | ||
3020 | | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0 | ||
3021 | |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check. | ||
3022 | | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. | ||
3023 | | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int | ||
3024 | |1: | ||
3025 | | ret | ||
3026 | |2: | ||
3027 | | fpop; fldz; ret | ||
3028 | | | ||
3029 | |// Generic power function x^y. Called by BC_POW, math.pow fast function, | ||
3030 | |// and vm_arith. | ||
3031 | |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. | ||
3032 | |// Needs 16 byte scratch area for x86. Also called from JIT code. | ||
3033 | |->vm_pow_sse: | ||
3034 | | cvttsd2si eax, xmm1 | ||
3035 | | cvtsi2sd xmm2, eax | ||
3036 | | ucomisd xmm1, xmm2 | ||
3037 | | jnz >8 // Branch for FP exponents. | ||
3038 | | jp >9 // Branch for NaN exponent. | ||
3039 | | // Fallthrough. | ||
3040 | | | ||
3041 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. | 2969 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. |
3042 | |->vm_powi_sse: | 2970 | |->vm_powi_sse: |
3043 | | cmp eax, 1; jle >6 // i<=1? | 2971 | | cmp eax, 1; jle >6 // i<=1? |
@@ -3073,246 +3001,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
3073 | | sseconst_1 xmm0, RDa | 3001 | | sseconst_1 xmm0, RDa |
3074 | | ret | 3002 | | ret |
3075 | | | 3003 | | |
3076 | |8: // FP/FP power function x^y. | ||
3077 | |.if X64 | ||
3078 | | movd rax, xmm1; shl rax, 1 | ||
3079 | | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf? | ||
3080 | | movd rax, xmm0; shl rax, 1; je >4 // +-0^y? | ||
3081 | | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y? | ||
3082 | | .if X64WIN | ||
3083 | | movsd qword [rsp+16], xmm1 // Use scratch area. | ||
3084 | | movsd qword [rsp+8], xmm0 | ||
3085 | | fld qword [rsp+16] | ||
3086 | | fld qword [rsp+8] | ||
3087 | | .else | ||
3088 | | movsd qword [rsp-16], xmm1 // Use red zone. | ||
3089 | | movsd qword [rsp-8], xmm0 | ||
3090 | | fld qword [rsp-16] | ||
3091 | | fld qword [rsp-8] | ||
3092 | | .endif | ||
3093 | |.else | ||
3094 | | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area. | ||
3095 | | movsd qword [esp+4], xmm0 | ||
3096 | | cmp dword [esp+12], 0; jne >1 | ||
3097 | | mov eax, [esp+16]; shl eax, 1 | ||
3098 | | cmp eax, 0xffe00000; je >2 // x^+-Inf? | ||
3099 | |1: | ||
3100 | | cmp dword [esp+4], 0; jne >1 | ||
3101 | | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y? | ||
3102 | | cmp eax, 0xffe00000; je >5 // +-Inf^y? | ||
3103 | |1: | ||
3104 | | fld qword [esp+12] | ||
3105 | | fld qword [esp+4] | ||
3106 | |.endif | ||
3107 | | fyl2x // y*log2(x) | ||
3108 | | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. | ||
3109 | | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int | ||
3110 | |.if X64WIN | ||
3111 | | fstp qword [rsp+8] // Use scratch area. | ||
3112 | | movsd xmm0, qword [rsp+8] | ||
3113 | |.elif X64 | ||
3114 | | fstp qword [rsp-8] // Use red zone. | ||
3115 | | movsd xmm0, qword [rsp-8] | ||
3116 | |.else | ||
3117 | | fstp qword [esp+4] // Needs 8 byte scratch area. | ||
3118 | | movsd xmm0, qword [esp+4] | ||
3119 | |.endif | ||
3120 | | ret | ||
3121 | | | ||
3122 | |9: // Handle x^NaN. | ||
3123 | | sseconst_1 xmm2, RDa | ||
3124 | | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1 | ||
3125 | | movaps xmm0, xmm1 // x^NaN ==> NaN | ||
3126 | |1: | ||
3127 | | ret | ||
3128 | | | ||
3129 | |2: // Handle x^+-Inf. | ||
3130 | | sseconst_abs xmm2, RDa | ||
3131 | | andpd xmm0, xmm2 // |x| | ||
3132 | | sseconst_1 xmm2, RDa | ||
3133 | | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1 | ||
3134 | | movmskpd eax, xmm1 | ||
3135 | | xorps xmm0, xmm0 | ||
3136 | | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0 | ||
3137 | |3: | ||
3138 | | sseconst_hi xmm0, RDa, 7ff00000 // +Inf | ||
3139 | | ret | ||
3140 | | | ||
3141 | |4: // Handle +-0^y. | ||
3142 | | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf | ||
3143 | | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0 | ||
3144 | | ret | ||
3145 | | | ||
3146 | |5: // Handle +-Inf^y. | ||
3147 | | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf | ||
3148 | | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0 | ||
3149 | | ret | ||
3150 | | | ||
3151 | |// Callable from C: double lj_vm_foldfpm(double x, int fpm) | ||
3152 | |// Computes fpm(x) for extended math functions. ORDER FPM. | ||
3153 | |->vm_foldfpm: | ||
3154 | |.if JIT | ||
3155 | |.if X64 | ||
3156 | | .if X64WIN | ||
3157 | | .define fpmop, CARG2d | ||
3158 | | .else | ||
3159 | | .define fpmop, CARG1d | ||
3160 | | .endif | ||
3161 | | cmp fpmop, 1; jb ->vm_floor_sse; je ->vm_ceil_sse | ||
3162 | | cmp fpmop, 3; jb ->vm_trunc_sse; ja >2 | ||
3163 | | sqrtsd xmm0, xmm0; ret | ||
3164 | |2: | ||
3165 | | .if X64WIN | ||
3166 | | movsd qword [rsp+8], xmm0 // Use scratch area. | ||
3167 | | fld qword [rsp+8] | ||
3168 | | .else | ||
3169 | | movsd qword [rsp-8], xmm0 // Use red zone. | ||
3170 | | fld qword [rsp-8] | ||
3171 | | .endif | ||
3172 | | cmp fpmop, 5; ja >2 | ||
3173 | | .if X64WIN; pop rax; .endif | ||
3174 | | je >1 | ||
3175 | | call ->vm_exp_x87 | ||
3176 | | .if X64WIN; push rax; .endif | ||
3177 | | jmp >7 | ||
3178 | |1: | ||
3179 | | call ->vm_exp2_x87 | ||
3180 | | .if X64WIN; push rax; .endif | ||
3181 | | jmp >7 | ||
3182 | |2: ; cmp fpmop, 7; je >1; ja >2 | ||
3183 | | fldln2; fxch; fyl2x; jmp >7 | ||
3184 | |1: ; fld1; fxch; fyl2x; jmp >7 | ||
3185 | |2: ; cmp fpmop, 9; je >1; ja >2 | ||
3186 | | fldlg2; fxch; fyl2x; jmp >7 | ||
3187 | |1: ; fsin; jmp >7 | ||
3188 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
3189 | | fcos; jmp >7 | ||
3190 | |1: ; fptan; fpop | ||
3191 | |7: | ||
3192 | | .if X64WIN | ||
3193 | | fstp qword [rsp+8] // Use scratch area. | ||
3194 | | movsd xmm0, qword [rsp+8] | ||
3195 | | .else | ||
3196 | | fstp qword [rsp-8] // Use red zone. | ||
3197 | | movsd xmm0, qword [rsp-8] | ||
3198 | | .endif | ||
3199 | | ret | ||
3200 | |.else // x86 calling convention. | ||
3201 | | .define fpmop, eax | ||
3202 | | mov fpmop, [esp+12] | ||
3203 | | movsd xmm0, qword [esp+4] | ||
3204 | | cmp fpmop, 1; je >1; ja >2 | ||
3205 | | call ->vm_floor_sse; jmp >7 | ||
3206 | |1: ; call ->vm_ceil_sse; jmp >7 | ||
3207 | |2: ; cmp fpmop, 3; je >1; ja >2 | ||
3208 | | call ->vm_trunc_sse; jmp >7 | ||
3209 | |1: | ||
3210 | | sqrtsd xmm0, xmm0 | ||
3211 | |7: | ||
3212 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. | ||
3213 | | fld qword [esp+4] | ||
3214 | | ret | ||
3215 | |2: ; fld qword [esp+4] | ||
3216 | | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 | ||
3217 | |2: ; cmp fpmop, 7; je >1; ja >2 | ||
3218 | | fldln2; fxch; fyl2x; ret | ||
3219 | |1: ; fld1; fxch; fyl2x; ret | ||
3220 | |2: ; cmp fpmop, 9; je >1; ja >2 | ||
3221 | | fldlg2; fxch; fyl2x; ret | ||
3222 | |1: ; fsin; ret | ||
3223 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
3224 | | fcos; ret | ||
3225 | |1: ; fptan; fpop; ret | ||
3226 | |.endif | ||
3227 | |9: ; int3 // Bad fpm. | ||
3228 | |.endif | ||
3229 | | | ||
3230 | |// Callable from C: double lj_vm_foldarith(double x, double y, int op) | ||
3231 | |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -) | ||
3232 | |// and basic math functions. ORDER ARITH | ||
3233 | |->vm_foldarith: | ||
3234 | |.if X64 | ||
3235 | | | ||
3236 | | .if X64WIN | ||
3237 | | .define foldop, CARG3d | ||
3238 | | .else | ||
3239 | | .define foldop, CARG1d | ||
3240 | | .endif | ||
3241 | | cmp foldop, 1; je >1; ja >2 | ||
3242 | | addsd xmm0, xmm1; ret | ||
3243 | |1: ; subsd xmm0, xmm1; ret | ||
3244 | |2: ; cmp foldop, 3; je >1; ja >2 | ||
3245 | | mulsd xmm0, xmm1; ret | ||
3246 | |1: ; divsd xmm0, xmm1; ret | ||
3247 | |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow_sse | ||
3248 | | cmp foldop, 7; je >1; ja >2 | ||
3249 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret | ||
3250 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret | ||
3251 | |2: ; cmp foldop, 9; ja >2 | ||
3252 | |.if X64WIN | ||
3253 | | movsd qword [rsp+8], xmm0 // Use scratch area. | ||
3254 | | movsd qword [rsp+16], xmm1 | ||
3255 | | fld qword [rsp+8] | ||
3256 | | fld qword [rsp+16] | ||
3257 | |.else | ||
3258 | | movsd qword [rsp-8], xmm0 // Use red zone. | ||
3259 | | movsd qword [rsp-16], xmm1 | ||
3260 | | fld qword [rsp-8] | ||
3261 | | fld qword [rsp-16] | ||
3262 | |.endif | ||
3263 | | je >1 | ||
3264 | | fpatan | ||
3265 | |7: | ||
3266 | |.if X64WIN | ||
3267 | | fstp qword [rsp+8] // Use scratch area. | ||
3268 | | movsd xmm0, qword [rsp+8] | ||
3269 | |.else | ||
3270 | | fstp qword [rsp-8] // Use red zone. | ||
3271 | | movsd xmm0, qword [rsp-8] | ||
3272 | |.endif | ||
3273 | | ret | ||
3274 | |1: ; fxch; fscale; fpop1; jmp <7 | ||
3275 | |2: ; cmp foldop, 11; je >1; ja >9 | ||
3276 | | minsd xmm0, xmm1; ret | ||
3277 | |1: ; maxsd xmm0, xmm1; ret | ||
3278 | |9: ; int3 // Bad op. | ||
3279 | | | ||
3280 | |.else // x86 calling convention. | ||
3281 | | | ||
3282 | | .define foldop, eax | ||
3283 | | mov foldop, [esp+20] | ||
3284 | | movsd xmm0, qword [esp+4] | ||
3285 | | movsd xmm1, qword [esp+12] | ||
3286 | | cmp foldop, 1; je >1; ja >2 | ||
3287 | | addsd xmm0, xmm1 | ||
3288 | |7: | ||
3289 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. | ||
3290 | | fld qword [esp+4] | ||
3291 | | ret | ||
3292 | |1: ; subsd xmm0, xmm1; jmp <7 | ||
3293 | |2: ; cmp foldop, 3; je >1; ja >2 | ||
3294 | | mulsd xmm0, xmm1; jmp <7 | ||
3295 | |1: ; divsd xmm0, xmm1; jmp <7 | ||
3296 | |2: ; cmp foldop, 5 | ||
3297 | | je >1; ja >2 | ||
3298 | | call ->vm_mod; jmp <7 | ||
3299 | |1: ; pop edx; call ->vm_pow_sse; push edx; jmp <7 // Writes to scratch area. | ||
3300 | |2: ; cmp foldop, 7; je >1; ja >2 | ||
3301 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 | ||
3302 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 | ||
3303 | |2: ; cmp foldop, 9; ja >2 | ||
3304 | | fld qword [esp+4] // Reload from stack | ||
3305 | | fld qword [esp+12] | ||
3306 | | je >1 | ||
3307 | | fpatan; ret | ||
3308 | |1: ; fxch; fscale; fpop1; ret | ||
3309 | |2: ; cmp foldop, 11; je >1; ja >9 | ||
3310 | | minsd xmm0, xmm1; jmp <7 | ||
3311 | |1: ; maxsd xmm0, xmm1; jmp <7 | ||
3312 | |9: ; int3 // Bad op. | ||
3313 | | | ||
3314 | |.endif | ||
3315 | | | ||
3316 | |//----------------------------------------------------------------------- | 3004 | |//----------------------------------------------------------------------- |
3317 | |//-- Miscellaneous functions -------------------------------------------- | 3005 | |//-- Miscellaneous functions -------------------------------------------- |
3318 | |//----------------------------------------------------------------------- | 3006 | |//----------------------------------------------------------------------- |
@@ -4107,8 +3795,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4107 | break; | 3795 | break; |
4108 | case BC_POW: | 3796 | case BC_POW: |
4109 | | ins_arithpre movsd, xmm1 | 3797 | | ins_arithpre movsd, xmm1 |
4110 | | call ->vm_pow_sse | 3798 | | mov RB, BASE |
3799 | |.if not X64 | ||
3800 | | movsd FPARG1, xmm0 | ||
3801 | | movsd FPARG3, xmm1 | ||
3802 | |.endif | ||
3803 | | call extern pow | ||
3804 | | movzx RA, PC_RA | ||
3805 | | mov BASE, RB | ||
3806 | |.if X64 | ||
4111 | | ins_arithpost | 3807 | | ins_arithpost |
3808 | |.else | ||
3809 | | fstp qword [BASE+RA*8] | ||
3810 | |.endif | ||
4112 | | ins_next | 3811 | | ins_next |
4113 | break; | 3812 | break; |
4114 | 3813 | ||