aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2009-12-25 23:12:30 +0100
committerMike Pall <mike>2009-12-25 23:12:30 +0100
commit690760aa3853e63331f46e40c8276d9f5939261d (patch)
treeb68fb518d22c3a08d8886bc532de91fdfdcc9360
parent6ce0c90ed642157f019b50ad1eb06246471a47b1 (diff)
downloadluajit-690760aa3853e63331f46e40c8276d9f5939261d.tar.gz
luajit-690760aa3853e63331f46e40c8276d9f5939261d.tar.bz2
luajit-690760aa3853e63331f46e40c8276d9f5939261d.zip
Add SSE variant of pow/powi to interpreter.
Use SSE pow/powi helper functions from compiled code. Cleanup use of helper functions. Related cleanups of folding functions in x64 interpreter.
-rw-r--r--src/buildvm_x86.dasc418
-rw-r--r--src/buildvm_x86.h1139
-rw-r--r--src/lj_asm.c111
-rw-r--r--src/lj_vm.h7
4 files changed, 942 insertions, 733 deletions
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc
index 99842d08..9ce8ef16 100644
--- a/src/buildvm_x86.dasc
+++ b/src/buildvm_x86.dasc
@@ -96,10 +96,6 @@
96|.type TRACE, Trace 96|.type TRACE, Trace
97|.type EXITINFO, ExitInfo 97|.type EXITINFO, ExitInfo
98| 98|
99|// x86/x64 portability macros
100|.macro push_eax; .if X64; push rax; .else; push eax; .endif; .endmacro
101|.macro pop_eax; .if X64; pop rax; .else; pop eax; .endif; .endmacro
102|
103|// Stack layout while in interpreter. Must match with lj_frame.h. 99|// Stack layout while in interpreter. Must match with lj_frame.h.
104|//----------------------------------------------------------------------- 100|//-----------------------------------------------------------------------
105|.if not X64 // x86 stack layout. 101|.if not X64 // x86 stack layout.
@@ -2072,10 +2068,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2072 | fpop1 2068 | fpop1
2073 | jmp ->fff_resn 2069 | jmp ->fff_resn
2074 | 2070 |
2075 if (0 && sse) { // NYI 2071 if (sse) {
2076 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0 2072 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
2077 } else { 2073 } else {
2078 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn 2074 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
2079 } 2075 }
2080 | 2076 |
2081 |.macro math_minmax, name, cmovop, nocmovop, sseop 2077 |.macro math_minmax, name, cmovop, nocmovop, sseop
@@ -2091,6 +2087,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2091 | add RB, 1 2087 | add RB, 1
2092 | jmp <1 2088 | jmp <1
2093 ||} else { 2089 ||} else {
2090 |.if not X64
2094 |.ffunc_n name 2091 |.ffunc_n name
2095 | mov RB, 2 2092 | mov RB, 2
2096 |1: 2093 |1:
@@ -2101,12 +2098,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2101 ||if (cmov) { 2098 ||if (cmov) {
2102 | fucomi st1; cmovop st1; fpop1 2099 | fucomi st1; cmovop st1; fpop1
2103 ||} else { 2100 ||} else {
2104 | push_eax 2101 | push eax
2105 | fucom st1; fnstsw ax; test ah, 1; nocmovop >2; fxch; 2: ; fpop 2102 | fucom st1; fnstsw ax; test ah, 1; nocmovop >2; fxch; 2: ; fpop
2106 | pop_eax 2103 | pop eax
2107 ||} 2104 ||}
2108 | add RB, 1 2105 | add RB, 1
2109 | jmp <1 2106 | jmp <1
2107 |.endif
2110 ||} 2108 ||}
2111 |.endmacro 2109 |.endmacro
2112 | 2110 |
@@ -2842,19 +2840,29 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2842 |->vm_exp: 2840 |->vm_exp:
2843 | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e)) 2841 | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e))
2844 |->vm_exp2: 2842 |->vm_exp2:
2845 | fst dword [esp+4] // Caveat: overwrites ARG1. 2843 | .if X64WIN
2846 | cmp dword [esp+4], 0x7f800000; je >1 // Special case: e^+Inf = +Inf 2844 | .define expscratch, dword [rsp+8] // Use scratch area.
2847 | cmp dword [esp+4], 0xff800000; je >2 // Special case: e^-Inf = 0 2845 | .elif X64
2846 | .define expscratch, dword [rsp-8] // Use red zone.
2847 | .else
2848 | .define expscratch, dword [esp+4] // Needs 4 byte scratch area.
2849 | .endif
2850 | fst expscratch // Caveat: overwrites ARG1.
2851 | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf
2852 | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0
2848 |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check. 2853 |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
2849 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. 2854 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
2850 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int 2855 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
2851 |1: 2856 |1:
2852 | ret 2857 | ret
2853 |2: 2858 |2:
2854 | fpop; fldz; ret 2859 | fpop; fldz; ret
2855 | 2860 |
2856 |// Generic power function x^y. Called by BC_POW, math.pow fast function 2861 |// Generic power function x^y. Called by BC_POW, math.pow fast function,
2857 |// and vm_arith. Args/ret on x87 stack (y on top). No int/xmm regs modified. 2862 |// and vm_arith.
2863 if (!sse) {
2864 |.if not X64
2865 |// Args/ret on x87 stack (y on top). RC (eax) modified.
2858 |// Caveat: needs 3 slots on x87 stack! 2866 |// Caveat: needs 3 slots on x87 stack!
2859 |->vm_pow: 2867 |->vm_pow:
2860 | fist dword [esp+4] // Store/reload int before comparison. 2868 | fist dword [esp+4] // Store/reload int before comparison.
@@ -2862,18 +2870,16 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2862 ||if (cmov) { 2870 ||if (cmov) {
2863 | fucomip st1 2871 | fucomip st1
2864 ||} else { 2872 ||} else {
2865 | push_eax; fucomp st1; fnstsw ax; sahf; pop_eax 2873 | fucomp st1; fnstsw ax; sahf
2866 ||} 2874 ||}
2867 | jnz >8 // Branch for FP exponents. 2875 | jnz >8 // Branch for FP exponents.
2868 | jp >9 // Branch for NaN exponent. 2876 | jp >9 // Branch for NaN exponent.
2869 | fpop // Pop y and fallthrough to vm_powi. 2877 | fpop // Pop y and fallthrough to vm_powi.
2870 | 2878 |
2871 |// FP/int power function x^i. Called from JIT code. Arg1/ret on x87 stack. 2879 |// FP/int power function x^i. Arg1/ret on x87 stack.
2872 |// Arg2 (int) on C stack. No int/xmm regs modified. 2880 |// Arg2 (int) on C stack. RC (eax) modified.
2873 |// Caveat: needs 2 slots on x87 stack! 2881 |// Caveat: needs 2 slots on x87 stack!
2874 |->vm_powi: 2882 | mov eax, [esp+4]
2875 | push_eax
2876 | mov eax, [esp+8]
2877 | cmp eax, 1; jle >6 // i<=1? 2883 | cmp eax, 1; jle >6 // i<=1?
2878 | // Now 1 < (unsigned)i <= 0x80000000. 2884 | // Now 1 < (unsigned)i <= 0x80000000.
2879 |1: // Handle leading zeros. 2885 |1: // Handle leading zeros.
@@ -2893,7 +2899,6 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2893 |4: 2899 |4:
2894 | fmulp st1 2900 | fmulp st1
2895 |5: 2901 |5:
2896 | pop_eax
2897 | ret 2902 | ret
2898 |6: 2903 |6:
2899 | je <5 // x^1 ==> x 2904 | je <5 // x^1 ==> x
@@ -2904,19 +2909,16 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2904 | jmp <1 // x^-i ==> (1/x)^i 2909 | jmp <1 // x^-i ==> (1/x)^i
2905 |7: 2910 |7:
2906 | fpop; fld1 // x^0 ==> 1 2911 | fpop; fld1 // x^0 ==> 1
2907 | pop_eax
2908 | ret 2912 | ret
2909 | 2913 |
2910 |8: // FP/FP power function x^y. 2914 |8: // FP/FP power function x^y.
2911 | push_eax 2915 | fst dword [esp+4]
2912 | fst dword [esp+8]
2913 | fxch 2916 | fxch
2914 | fst dword [esp+12] 2917 | fst dword [esp+8]
2915 | mov eax, [esp+8]; shl eax, 1 2918 | mov eax, [esp+4]; shl eax, 1
2916 | cmp eax, 0xff000000; je >2 // x^+-Inf? 2919 | cmp eax, 0xff000000; je >2 // x^+-Inf?
2917 | mov eax, [esp+12]; shl eax, 1; je >4 // +-0^y? 2920 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
2918 | cmp eax, 0xff000000; je >4 // +-Inf^y? 2921 | cmp eax, 0xff000000; je >4 // +-Inf^y?
2919 | pop_eax
2920 | fyl2x 2922 | fyl2x
2921 | jmp ->vm_exp2raw 2923 | jmp ->vm_exp2raw
2922 | 2924 |
@@ -2925,7 +2927,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2925 ||if (cmov) { 2927 ||if (cmov) {
2926 | fucomip st2 2928 | fucomip st2
2927 ||} else { 2929 ||} else {
2928 | push_eax; fucomp st2; fnstsw ax; sahf; pop_eax 2930 | fucomp st2; fnstsw ax; sahf
2929 ||} 2931 ||}
2930 | je >1 // 1^NaN ==> 1 2932 | je >1 // 1^NaN ==> 1
2931 | fxch // x^NaN ==> NaN 2933 | fxch // x^NaN ==> NaN
@@ -2943,41 +2945,205 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2943 ||} 2945 ||}
2944 | je >3 // +-1^+-Inf ==> 1 2946 | je >3 // +-1^+-Inf ==> 1
2945 | fpop; fabs; fldz; mov eax, 0; setc al 2947 | fpop; fabs; fldz; mov eax, 0; setc al
2946 | ror eax, 1; xor eax, [esp+8]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0 2948 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
2947 | fxch 2949 | fxch
2948 |3: 2950 |3:
2949 | fpop1; fabs; pop_eax 2951 | fpop1; fabs
2950 | ret 2952 | ret
2951 | 2953 |
2952 |4: // Handle +-0^y or +-Inf^y. 2954 |4: // Handle +-0^y or +-Inf^y.
2953 | cmp dword [esp+8], 0; jge <3 // y >= 0, x^y ==> |x| 2955 | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
2954 | fpop; fpop 2956 | fpop; fpop
2955 | test eax, eax; pop_eax; jz >5 // y < 0, +-0^y ==> +Inf 2957 | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
2956 | fldz // y < 0, +-Inf^y ==> 0 2958 | fldz // y < 0, +-Inf^y ==> 0
2957 | ret 2959 | ret
2958 |5: 2960 |5:
2959 | mov dword [esp+8], 0x7f800000 // Return +Inf. 2961 | mov dword [esp+4], 0x7f800000 // Return +Inf.
2960 | fld dword [esp+8] 2962 | fld dword [esp+4]
2963 | ret
2964 |.endif
2965 } else {
2966 |->vm_pow:
2967 }
2968 |
2969 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
2970 |// Needs 16 byte scratch area for x86. Also called from JIT code.
2971 |->vm_pow_sse:
2972 | cvtsd2si eax, xmm1
2973 | cvtsi2sd xmm2, eax
2974 | ucomisd xmm1, xmm2
2975 | jnz >8 // Branch for FP exponents.
2976 | jp >9 // Branch for NaN exponent.
2977 | // Fallthrough to vm_powi_sse.
2978 |
2979 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
2980 |->vm_powi_sse:
2981 | cmp eax, 1; jle >6 // i<=1?
2982 | // Now 1 < (unsigned)i <= 0x80000000.
2983 |1: // Handle leading zeros.
2984 | test eax, 1; jnz >2
2985 | mulsd xmm0, xmm0
2986 | shr eax, 1
2987 | jmp <1
2988 |2:
2989 | shr eax, 1; jz >5
2990 | movaps xmm1, xmm0
2991 |3: // Handle trailing bits.
2992 | mulsd xmm0, xmm0
2993 | shr eax, 1; jz >4
2994 | jnc <3
2995 | mulsd xmm1, xmm0
2996 | jmp <3
2997 |4:
2998 | mulsd xmm0, xmm1
2999 |5:
3000 | ret
3001 |6:
3002 | je <5 // x^1 ==> x
3003 | jb >7
3004 | push RDa
3005 | sseconst_1 xmm1, RDa
3006 | divsd xmm1, xmm0
3007 | pop RDa
3008 | movaps xmm0, xmm1
3009 | neg eax
3010 | cmp eax, 1; je <5 // x^-1 ==> 1/x
3011 | jmp <1 // x^-i ==> (1/x)^i
3012 |7:
3013 | sseconst_1 xmm0, RDa
3014 | ret
3015 |
3016 |8: // FP/FP power function x^y.
3017 |.if X64
3018 | movd rax, xmm1; shl rax, 1
3019 | ror rax, 32; cmp rax, 0xffe00000; je >2 // x^+-Inf?
3020 | movd rax, xmm0; shl rax, 1; je >4 // +-0^y?
3021 | ror rax, 32; cmp rax, 0xffe00000; je >5 // +-Inf^y?
3022 | .if X64WIN
3023 | movsd qword [rsp+16], xmm1 // Use scratch area.
3024 | movsd qword [rsp+8], xmm0
3025 | fld qword [rsp+16]
3026 | fld qword [rsp+8]
3027 | .else
3028 | movsd qword [rsp-16], xmm1 // Use red zone.
3029 | movsd qword [rsp-8], xmm0
3030 | fld qword [rsp-16]
3031 | fld qword [rsp-8]
3032 | .endif
3033 |.else
3034 | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area.
3035 | movsd qword [esp+4], xmm0
3036 | cmp dword [esp+12], 0; jne >1
3037 | mov eax, [esp+16]; shl eax, 1
3038 | cmp eax, 0xffe00000; je >2 // x^+-Inf?
3039 |1:
3040 | cmp dword [esp+4], 0; jne >1
3041 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3042 | cmp eax, 0xffe00000; je >5 // +-Inf^y?
3043 |1:
3044 | fld qword [esp+12]
3045 | fld qword [esp+4]
3046 |.endif
3047 | fyl2x // y*log2(x)
3048 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
3049 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
3050 |.if X64WIN
3051 | fstp qword [rsp+8] // Use scratch area.
3052 | movsd xmm0, qword [rsp+8]
3053 |.elif X64
3054 | fstp qword [rsp-8] // Use red zone.
3055 | movsd xmm0, qword [rsp-8]
3056 |.else
3057 | fstp qword [esp+4] // Needs 8 byte scratch area.
3058 | movsd xmm0, qword [esp+4]
3059 |.endif
3060 | ret
3061 |
3062 |9: // Handle x^NaN.
3063 | sseconst_1 xmm2, RDa
3064 | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1
3065 | movaps xmm0, xmm1 // x^NaN ==> NaN
3066 |1:
3067 | ret
3068 |
3069 |2: // Handle x^+-Inf.
3070 | sseconst_abs xmm2, RDa
3071 | andpd xmm0, xmm2 // |x|
3072 | sseconst_1 xmm2, RDa
3073 | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1
3074 | movmskpd eax, xmm1
3075 | xorps xmm0, xmm0
3076 | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0
3077 |3:
3078 | sseconst_hi xmm0, RDa, 7ff00000 // +Inf
3079 | ret
3080 |
3081 |4: // Handle +-0^y.
3082 | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf
3083 | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0
3084 | ret
3085 |
3086 |5: // Handle +-Inf^y.
3087 | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf
3088 | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0
2961 | ret 3089 | ret
2962 | 3090 |
2963 |// Callable from C: double lj_vm_foldfpm(double x, int fpm) 3091 |// Callable from C: double lj_vm_foldfpm(double x, int fpm)
2964 |// Computes fpm(x) for extended math functions. ORDER FPM. 3092 |// Computes fpm(x) for extended math functions. ORDER FPM.
2965 |->vm_foldfpm: 3093 |->vm_foldfpm:
2966 if (sse) { 3094 if (sse) {
2967 |.if X64WIN
2968 | .define fpmop, CARG2d
2969 |.elif X64
2970 | .define fpmop, CARG1d
2971 |.else
2972 | .define fpmop, eax
2973 | mov fpmop, [esp+12]
2974 | movsd xmm0, qword [esp+4]
2975 |.endif
2976 |.if X64 3095 |.if X64
3096 |
3097 | .if X64WIN
3098 | .define fpmop, CARG2d
3099 | .else
3100 | .define fpmop, CARG1d
3101 | .endif
2977 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil 3102 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
2978 | cmp fpmop, 3; jb ->vm_trunc; ja >2 3103 | cmp fpmop, 3; jb ->vm_trunc; ja >2
2979 | sqrtsd xmm0, xmm0; ret 3104 | sqrtsd xmm0, xmm0; ret
2980 |.else 3105 |2:
3106 | .if X64WIN
3107 | movsd qword [rsp+8], xmm0 // Use scratch area.
3108 | fld qword [rsp+8]
3109 | .else
3110 | movsd qword [rsp-8], xmm0 // Use red zone.
3111 | fld qword [rsp-8]
3112 | .endif
3113 | cmp fpmop, 5; ja >2
3114 | .if X64WIN; pop rax; .endif
3115 | je >1
3116 | call ->vm_exp
3117 | .if X64WIN; push rax; .endif
3118 | jmp >7
3119 |1:
3120 | call ->vm_exp2
3121 | .if X64WIN; push rax; .endif
3122 | jmp >7
3123 |2: ; cmp fpmop, 7; je >1; ja >2
3124 | fldln2; fxch; fyl2x; jmp >7
3125 |1: ; fld1; fxch; fyl2x; jmp >7
3126 |2: ; cmp fpmop, 9; je >1; ja >2
3127 | fldlg2; fxch; fyl2x; jmp >7
3128 |1: ; fsin; jmp >7
3129 |2: ; cmp fpmop, 11; je >1; ja >9
3130 | fcos; jmp >7
3131 |1: ; fptan; fpop
3132 |7:
3133 | .if X64WIN
3134 | fstp qword [rsp+8] // Use scratch area.
3135 | movsd xmm0, qword [rsp+8]
3136 | .else
3137 | fstp qword [rsp-8] // Use red zone.
3138 | movsd xmm0, qword [rsp-8]
3139 | .endif
3140 | ret
3141 |
3142 |.else // x86 calling convention.
3143 |
3144 | .define fpmop, eax
3145 | mov fpmop, [esp+12]
3146 | movsd xmm0, qword [esp+4]
2981 | cmp fpmop, 1; je >1; ja >2 3147 | cmp fpmop, 1; je >1; ja >2
2982 | call ->vm_floor; jmp >7 3148 | call ->vm_floor; jmp >7
2983 |1: ; call ->vm_ceil; jmp >7 3149 |1: ; call ->vm_ceil; jmp >7
@@ -2989,27 +3155,36 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2989 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. 3155 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
2990 | fld qword [esp+4] 3156 | fld qword [esp+4]
2991 | ret 3157 | ret
3158 |2: ; fld qword [esp+4]
3159 | cmp fpmop, 5; jb ->vm_exp; je ->vm_exp2
3160 |2: ; cmp fpmop, 7; je >1; ja >2
3161 | fldln2; fxch; fyl2x; ret
3162 |1: ; fld1; fxch; fyl2x; ret
3163 |2: ; cmp fpmop, 9; je >1; ja >2
3164 | fldlg2; fxch; fyl2x; ret
3165 |1: ; fsin; ret
3166 |2: ; cmp fpmop, 11; je >1; ja >9
3167 | fcos; ret
3168 |1: ; fptan; fpop; ret
3169 |
2992 |.endif 3170 |.endif
2993 |2:
2994 | fld qword [esp+4]
2995 } else { 3171 } else {
2996 | mov fpmop, [esp+12] 3172 | mov fpmop, [esp+12]
2997 | fld qword [esp+4] 3173 | fld qword [esp+4]
2998 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil 3174 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
2999 | cmp fpmop, 3; jb ->vm_trunc; ja >2 3175 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3000 | fsqrt; ret 3176 | fsqrt; ret
3001 |2: 3177 |2: ; cmp fpmop, 5; jb ->vm_exp; je ->vm_exp2
3178 | cmp fpmop, 7; je >1; ja >2
3179 | fldln2; fxch; fyl2x; ret
3180 |1: ; fld1; fxch; fyl2x; ret
3181 |2: ; cmp fpmop, 9; je >1; ja >2
3182 | fldlg2; fxch; fyl2x; ret
3183 |1: ; fsin; ret
3184 |2: ; cmp fpmop, 11; je >1; ja >9
3185 | fcos; ret
3186 |1: ; fptan; fpop; ret
3002 } 3187 }
3003 | cmp fpmop, 5; jb ->vm_exp; je ->vm_exp2
3004 | cmp fpmop, 7; je >1; ja >2
3005 | fldln2; fxch; fyl2x; ret
3006 |1: ; fld1; fxch; fyl2x; ret
3007 |2: ; cmp fpmop, 9; je >1; ja >2
3008 | fldlg2; fxch; fyl2x; ret
3009 |1: ; fsin; ret
3010 |2: ; cmp fpmop, 11; je >1; ja >9
3011 | fcos; ret
3012 |1: ; fptan; fpop; ret
3013 |9: ; int3 // Bad fpm. 3188 |9: ; int3 // Bad fpm.
3014 | 3189 |
3015 |// Callable from C: double lj_vm_foldarith(double x, double y, int op) 3190 |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
@@ -3017,72 +3192,87 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
3017 |// and basic math functions. ORDER ARITH 3192 |// and basic math functions. ORDER ARITH
3018 |->vm_foldarith: 3193 |->vm_foldarith:
3019 if (sse) { 3194 if (sse) {
3020 |.macro retxmm0; .if X64; ret; .else; jmp >7; .endif; .endmacro 3195 |.if X64
3021 |.macro retst0; .if X64; jmp >7; .else; ret; .endif; .endmacro
3022 | 3196 |
3197 | .if X64WIN
3198 | .define foldop, CARG3d
3199 | .else
3200 | .define foldop, CARG1d
3201 | .endif
3202 | cmp foldop, 1; je >1; ja >2
3203 | addsd xmm0, xmm1; ret
3204 |1: ; subsd xmm0, xmm1; ret
3205 |2: ; cmp foldop, 3; je >1; ja >2
3206 | mulsd xmm0, xmm1; ret
3207 |1: ; divsd xmm0, xmm1; ret
3208 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow
3209 | cmp foldop, 7; je >1; ja >2
3210 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
3211 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
3212 |2: ; cmp foldop, 9; ja >2
3023 |.if X64WIN 3213 |.if X64WIN
3024 | .define foldop, CARG3d 3214 | movsd qword [rsp+8], xmm0 // Use scratch area.
3025 |.elif X64 3215 | movsd qword [rsp+16], xmm1
3026 | .define foldop, CARG1d 3216 | fld qword [rsp+8]
3217 | fld qword [rsp+16]
3027 |.else 3218 |.else
3219 | movsd qword [rsp-8], xmm0 // Use red zone.
3220 | movsd qword [rsp-16], xmm1
3221 | fld qword [rsp-8]
3222 | fld qword [rsp-16]
3223 |.endif
3224 | je >1
3225 | fpatan
3226 |7:
3227 |.if X64WIN
3228 | fstp qword [rsp+8] // Use scratch area.
3229 | movsd xmm0, qword [rsp+8]
3230 |.else
3231 | fstp qword [rsp-8] // Use red zone.
3232 | movsd xmm0, qword [rsp-8]
3233 |.endif
3234 | ret
3235 |1: ; fxch; fscale; fpop1; jmp <7
3236 |2: ; cmp foldop, 11; je >1; ja >9
3237 | minsd xmm0, xmm1; ret
3238 |1: ; maxsd xmm0, xmm1; ret
3239 |9: ; int3 // Bad op.
3240 |
3241 |.else // x86 calling convention.
3242 |
3028 | .define foldop, eax 3243 | .define foldop, eax
3029 | mov foldop, [esp+20] 3244 | mov foldop, [esp+20]
3030 | movsd xmm0, qword [esp+4] 3245 | movsd xmm0, qword [esp+4]
3031 | movsd xmm1, qword [esp+12] 3246 | movsd xmm1, qword [esp+12]
3032 |.endif
3033 | cmp foldop, 1; je >1; ja >2 3247 | cmp foldop, 1; je >1; ja >2
3034 | addsd xmm0, xmm1; retxmm0 3248 | addsd xmm0, xmm1
3035 |1: ; subsd xmm0, xmm1; retxmm0 3249 |7:
3250 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3251 | fld qword [esp+4]
3252 | ret
3253 |1: ; subsd xmm0, xmm1; jmp <7
3036 |2: ; cmp foldop, 3; je >1; ja >2 3254 |2: ; cmp foldop, 3; je >1; ja >2
3037 | mulsd xmm0, xmm1; retxmm0 3255 | mulsd xmm0, xmm1; jmp <7
3038 |1: ; divsd xmm0, xmm1; retxmm0 3256 |1: ; divsd xmm0, xmm1; jmp <7
3039 |2: ; cmp foldop, 5 3257 |2: ; cmp foldop, 5
3040 |.if X64
3041 | jb ->vm_mod; je ->vm_pow // NYI: broken without SSE vm_pow.
3042 |.else
3043 | je >1; ja >2 3258 | je >1; ja >2
3044 | call ->vm_mod; retxmm0 3259 | call ->vm_mod; jmp <7
3045 |1: ; fld qword [esp+4]; fld qword [esp+12]; jmp ->vm_pow // NYI 3260 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area.
3046 |2: 3261 |2: ; cmp foldop, 7; je >1; ja >2
3047 |.endif 3262 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
3048 | cmp foldop, 7; je >1; ja >2 3263 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
3049 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; retxmm0
3050 |1:
3051 | sseconst_abs xmm1, RDa; andps xmm0, xmm1; retxmm0
3052 |2: ; cmp foldop, 9; ja >2 3264 |2: ; cmp foldop, 9; ja >2
3053 |.if X64WIN
3054 | movsd qword [esp+8], xmm0 // Use scratch area.
3055 | movsd qword [esp+16], xmm1
3056 | fld qword [esp+8]
3057 | fld qword [esp+16]
3058 |.elif X64
3059 | movsd qword [esp-8], xmm0 // Use red zone.
3060 | movsd qword [esp-16], xmm1
3061 | fld qword [esp-8]
3062 | fld qword [esp-16]
3063 |.else
3064 | fld qword [esp+4] // Reload from stack 3265 | fld qword [esp+4] // Reload from stack
3065 | fld qword [esp+12] 3266 | fld qword [esp+12]
3066 |.endif
3067 | je >1 3267 | je >1
3068 | fpatan; retst0 3268 | fpatan; ret
3069 |1: ; fxch; fscale; fpop1; retst0 3269 |1: ; fxch; fscale; fpop1; ret
3070 |2: ; cmp foldop, 11; je >1; ja >9 3270 |2: ; cmp foldop, 11; je >1; ja >9
3071 | minsd xmm0, xmm1; retxmm0 3271 | minsd xmm0, xmm1; jmp <7
3072 |1: ; maxsd xmm0, xmm1; retxmm0 3272 |1: ; maxsd xmm0, xmm1; jmp <7
3073 |9: ; int3 // Bad op. 3273 |9: ; int3 // Bad op.
3074 |7: // Move return value depending on calling convention. 3274 |
3075 |.if X64WIN
3076 | fstp qword [esp+8] // Use scratch area.
3077 | movsd xmm0, qword [esp+8]
3078 |.elif X64
3079 | fstp qword [esp-8] // Use red zone.
3080 | movsd xmm0, qword [esp-8]
3081 |.else
3082 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3083 | fld qword [esp+4]
3084 |.endif 3275 |.endif
3085 | ret
3086 } else { 3276 } else {
3087 | mov eax, [esp+20] 3277 | mov eax, [esp+20]
3088 | fld qword [esp+4] 3278 | fld qword [esp+4]
@@ -3483,17 +3673,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
3483 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3673 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3484 break; 3674 break;
3485 case BC_POW: 3675 case BC_POW:
3486 if (sse) { 3676 | ins_arithpre fld, movsd, xmm1
3487 sse = 0; /* NYI: temporary workaround. */ 3677 | call ->vm_pow
3488 | ins_arithpre fld, movsd, xmm1 3678 | ins_arithpost
3489 | call ->vm_pow
3490 | ins_arithpost
3491 sse = 1;
3492 } else {
3493 | ins_arithpre fld, movsd, xmm1
3494 | call ->vm_pow
3495 | ins_arithpost
3496 }
3497 | ins_next 3679 | ins_next
3498 break; 3680 break;
3499 3681
diff --git a/src/buildvm_x86.h b/src/buildvm_x86.h
index e2ba7c1e..f1f14e70 100644
--- a/src/buildvm_x86.h
+++ b/src/buildvm_x86.h
@@ -12,7 +12,7 @@
12#define DASM_SECTION_CODE_OP 0 12#define DASM_SECTION_CODE_OP 0
13#define DASM_SECTION_CODE_SUB 1 13#define DASM_SECTION_CODE_SUB 1
14#define DASM_MAXSECTION 2 14#define DASM_MAXSECTION 2
15static const unsigned char build_actionlist[14716] = { 15static const unsigned char build_actionlist[15226] = {
16 254,1,248,10,137,202,139,173,233,137,114,252,252,15,182,141,233,139,181,233, 16 254,1,248,10,137,202,139,173,233,137,114,252,252,15,182,141,233,139,181,233,
17 139,189,233,139,108,36,48,141,12,202,141,68,194,252,252,59,141,233,15,135, 17 139,189,233,139,108,36,48,141,12,202,141,68,194,252,252,59,141,233,15,135,
18 244,11,248,9,189,237,248,1,137,40,137,104,8,131,192,16,57,200,15,130,244, 18 244,11,248,9,189,237,248,1,137,40,137,104,8,131,192,16,57,200,15,130,244,
@@ -469,48 +469,72 @@ static const unsigned char build_actionlist[14716] = {
469 222,201,248,146,217,84,36,4,129,124,36,4,0,0,128,127,15,132,244,247,129,124, 469 222,201,248,146,217,84,36,4,129,124,36,4,0,0,128,127,15,132,244,247,129,124,
470 36,4,0,0,128,252,255,15,132,244,248,248,147,217,192,217,252,252,220,252,233, 470 36,4,0,0,128,252,255,15,132,244,248,248,147,217,192,217,252,252,220,252,233,
471 217,201,217,252,240,217,232,222,193,217,252,253,221,217,248,1,195,248,2,221, 471 217,201,217,252,240,217,232,222,193,217,252,253,221,217,248,1,195,248,2,221,
472 216,217,252,238,195,248,106,219,84,36,4,219,68,36,4,255,223,252,233,255,80, 472 216,217,252,238,195,255,248,106,219,84,36,4,219,68,36,4,255,223,252,233,255,
473 221,252,233,223,224,158,88,255,15,133,244,254,15,138,244,255,221,216,248, 473 221,252,233,223,224,158,255,15,133,244,254,15,138,244,255,221,216,139,68,
474 148,80,139,68,36,8,131,252,248,1,15,142,244,252,248,1,169,1,0,0,0,15,133, 474 36,4,131,252,248,1,15,142,244,252,248,1,169,1,0,0,0,15,133,244,248,216,200,
475 244,248,216,200,209,232,252,233,244,1,248,2,209,232,15,132,244,251,217,192, 475 209,232,252,233,244,1,248,2,209,232,15,132,244,251,217,192,248,3,216,200,
476 248,3,216,200,209,232,15,132,244,250,15,131,244,3,255,220,201,252,233,244, 476 209,232,15,132,244,250,15,131,244,3,220,201,252,233,244,3,248,4,255,222,201,
477 3,248,4,222,201,248,5,88,195,248,6,15,132,244,5,15,130,244,253,217,232,222, 477 248,5,195,248,6,15,132,244,5,15,130,244,253,217,232,222,252,241,252,247,216,
478 252,241,252,247,216,131,252,248,1,15,132,244,5,252,233,244,1,248,7,221,216, 478 131,252,248,1,15,132,244,5,252,233,244,1,248,7,221,216,217,232,195,248,8,
479 217,232,88,195,248,8,80,217,84,36,8,217,201,217,84,36,12,139,68,36,8,209, 479 217,84,36,4,217,201,217,84,36,8,139,68,36,4,209,224,61,0,0,0,252,255,15,132,
480 224,61,0,0,0,252,255,15,132,244,248,139,68,36,12,209,224,15,132,244,250,61, 480 244,248,139,68,36,8,209,224,15,132,244,250,61,0,0,0,252,255,15,132,244,250,
481 0,0,0,252,255,15,132,244,250,255,88,217,252,241,252,233,244,147,248,9,217, 481 217,252,241,252,233,244,147,248,9,255,217,232,255,223,252,234,255,221,252,
482 232,255,223,252,234,255,80,221,252,234,223,224,158,88,255,15,132,244,247, 482 234,223,224,158,255,15,132,244,247,217,201,248,1,221,216,195,248,2,217,225,
483 217,201,248,1,221,216,195,248,2,217,225,217,232,255,221,252,233,223,224,158, 483 217,232,255,15,132,244,249,221,216,217,225,217,252,238,184,0,0,0,0,15,146,
484 255,15,132,244,249,221,216,217,225,217,252,238,184,0,0,0,0,15,146,208,209, 484 208,209,200,51,68,36,4,15,137,244,249,217,201,248,3,221,217,217,225,195,248,
485 200,51,68,36,8,15,137,244,249,217,201,248,3,221,217,217,225,88,195,248,4, 485 4,131,124,36,4,0,15,141,244,3,221,216,221,216,133,192,15,132,244,251,217,
486 131,124,36,8,0,15,141,244,3,221,216,221,216,133,192,88,15,132,244,251,217, 486 252,238,195,248,5,199,68,36,4,0,0,128,127,217,68,36,4,195,255,248,106,255,
487 252,238,195,248,5,199,68,36,8,0,0,128,127,217,68,36,8,195,248,149,255,139, 487 248,148,252,242,15,45,193,252,242,15,42,208,102,15,46,202,15,133,244,254,
488 68,36,12,252,242,15,16,68,36,4,131,252,248,1,15,132,244,247,15,135,244,248, 488 15,138,244,255,248,149,131,252,248,1,15,142,244,252,248,1,169,1,0,0,0,15,
489 232,244,81,252,233,244,253,248,1,232,244,83,252,233,244,253,248,2,131,252, 489 133,244,248,252,242,15,89,192,209,232,252,233,244,1,248,2,209,232,15,132,
490 248,3,15,132,244,247,15,135,244,248,232,244,103,255,252,233,244,253,248,1, 490 244,251,15,40,200,248,3,252,242,15,89,192,209,232,15,132,244,250,15,131,244,
491 252,242,15,81,192,248,7,252,242,15,17,68,36,4,221,68,36,4,195,248,2,221,68, 491 3,255,252,242,15,89,200,252,233,244,3,248,4,252,242,15,89,193,248,5,195,248,
492 36,4,255,139,68,36,12,221,68,36,4,131,252,248,1,15,130,244,81,15,132,244, 492 6,15,132,244,5,15,130,244,253,80,184,0,0,252,240,63,102,15,110,200,102,15,
493 83,131,252,248,3,15,130,244,103,15,135,244,248,217,252,250,195,248,2,255, 493 112,201,81,252,242,15,94,200,88,15,40,193,252,247,216,131,252,248,1,15,132,
494 131,252,248,5,15,130,244,87,15,132,244,146,131,252,248,7,15,132,244,247,15, 494 244,5,252,233,244,1,248,7,184,0,0,252,240,63,102,15,110,192,102,15,112,192,
495 135,244,248,217,252,237,217,201,217,252,241,195,248,1,217,232,217,201,217, 495 81,195,248,8,252,242,15,17,76,36,12,252,242,15,17,68,36,4,131,124,36,12,0,
496 252,241,195,248,2,131,252,248,9,15,132,244,247,15,135,244,248,217,252,236, 496 15,133,244,247,139,68,36,16,209,224,61,0,0,224,252,255,15,132,244,248,248,
497 217,201,217,252,241,195,248,1,217,252,254,195,248,2,131,252,248,11,15,132, 497 1,255,131,124,36,4,0,15,133,244,247,139,68,36,8,209,224,15,132,244,250,61,
498 244,247,15,135,244,255,255,217,252,255,195,248,1,217,252,242,221,216,195, 498 0,0,224,252,255,15,132,244,251,248,1,221,68,36,12,221,68,36,4,217,252,241,
499 248,9,204,248,150,255,139,68,36,20,252,242,15,16,68,36,4,252,242,15,16,76, 499 217,192,217,252,252,220,252,233,217,201,217,252,240,217,232,222,193,217,252,
500 36,12,131,252,248,1,15,132,244,247,15,135,244,248,252,242,15,88,193,252,233, 500 253,221,217,221,92,36,4,252,242,15,16,68,36,4,195,248,9,184,0,0,252,240,63,
501 244,253,248,1,252,242,15,92,193,252,233,244,253,248,2,131,252,248,3,15,132, 501 102,15,110,208,102,15,112,210,81,102,15,46,194,15,132,244,247,15,40,193,248,
502 244,247,15,135,244,248,252,242,15,89,193,252,233,244,253,248,1,252,242,15, 502 1,195,248,2,102,15,252,239,210,102,15,118,210,102,15,115,210,1,102,15,84,
503 94,193,252,233,244,253,248,2,255,131,252,248,5,15,132,244,247,15,135,244, 503 194,184,0,0,252,240,63,102,15,110,208,102,15,112,210,81,102,15,46,194,15,
504 248,232,244,145,252,233,244,253,248,1,221,68,36,4,221,68,36,12,252,233,244, 504 132,244,1,102,15,80,193,15,87,192,136,196,15,146,208,48,224,15,133,244,1,
505 106,248,2,131,252,248,7,15,132,244,247,15,135,244,248,184,0,0,0,128,102,15, 505 248,3,184,0,0,252,240,127,102,15,110,192,102,15,112,192,81,195,248,4,102,
506 110,200,102,15,112,201,81,15,87,193,252,233,244,253,248,1,102,15,252,239, 506 15,80,193,133,192,15,133,244,3,255,15,87,192,195,248,5,102,15,80,193,133,
507 201,102,15,118,201,102,15,115,209,1,15,84,193,252,233,244,253,248,2,255,131, 507 192,15,132,244,3,15,87,192,195,248,150,255,139,68,36,12,252,242,15,16,68,
508 252,248,9,15,135,244,248,221,68,36,4,221,68,36,12,15,132,244,247,217,252, 508 36,4,131,252,248,1,15,132,244,247,15,135,244,248,232,244,81,252,233,244,253,
509 243,195,248,1,217,201,217,252,253,221,217,195,248,2,131,252,248,11,15,132, 509 248,1,232,244,83,252,233,244,253,248,2,131,252,248,3,15,132,244,247,15,135,
510 244,247,15,135,244,255,252,242,15,93,193,252,233,244,253,248,1,252,242,15, 510 244,248,232,244,103,255,252,233,244,253,248,1,252,242,15,81,192,248,7,252,
511 95,193,252,233,244,253,248,9,204,248,7,252,242,15,17,68,36,4,221,68,36,4, 511 242,15,17,68,36,4,221,68,36,4,195,248,2,221,68,36,4,131,252,248,5,15,130,
512 195,255,139,68,36,20,221,68,36,4,221,68,36,12,131,252,248,1,15,132,244,247, 512 244,87,15,132,244,146,248,2,131,252,248,7,15,132,244,247,15,135,244,248,217,
513 15,135,244,248,222,193,195,248,1,222,252,233,195,248,2,131,252,248,3,15,132, 513 252,237,217,201,217,252,241,195,248,1,217,232,217,201,217,252,241,195,248,
514 2,131,252,248,9,15,132,244,247,15,135,244,248,255,217,252,236,217,201,217,
515 252,241,195,248,1,217,252,254,195,248,2,131,252,248,11,15,132,244,247,15,
516 135,244,255,217,252,255,195,248,1,217,252,242,221,216,195,255,139,68,36,12,
517 221,68,36,4,131,252,248,1,15,130,244,81,15,132,244,83,131,252,248,3,15,130,
518 244,103,15,135,244,248,217,252,250,195,248,2,131,252,248,5,15,130,244,87,
519 15,132,244,146,131,252,248,7,15,132,244,247,15,135,244,248,217,252,237,217,
520 201,217,252,241,195,248,1,217,232,217,201,217,252,241,195,248,2,131,252,248,
521 9,15,132,244,247,255,15,135,244,248,217,252,236,217,201,217,252,241,195,248,
522 1,217,252,254,195,248,2,131,252,248,11,15,132,244,247,15,135,244,255,217,
523 252,255,195,248,1,217,252,242,221,216,195,255,248,9,204,248,151,255,139,68,
524 36,20,252,242,15,16,68,36,4,252,242,15,16,76,36,12,131,252,248,1,15,132,244,
525 247,15,135,244,248,252,242,15,88,193,248,7,252,242,15,17,68,36,4,221,68,36,
526 4,195,248,1,252,242,15,92,193,252,233,244,7,248,2,131,252,248,3,15,132,244,
527 247,15,135,244,248,252,242,15,89,193,252,233,244,7,248,1,252,242,15,94,193,
528 252,233,244,7,248,2,131,252,248,5,15,132,244,247,255,15,135,244,248,232,244,
529 145,252,233,244,7,248,1,90,232,244,106,82,252,233,244,7,248,2,131,252,248,
530 7,15,132,244,247,15,135,244,248,184,0,0,0,128,102,15,110,200,102,15,112,201,
531 81,15,87,193,252,233,244,7,248,1,102,15,252,239,201,102,15,118,201,102,15,
532 115,209,1,15,84,193,252,233,244,7,248,2,255,131,252,248,9,15,135,244,248,
533 221,68,36,4,221,68,36,12,15,132,244,247,217,252,243,195,248,1,217,201,217,
534 252,253,221,217,195,248,2,131,252,248,11,15,132,244,247,15,135,244,255,252,
535 242,15,93,193,252,233,244,7,248,1,252,242,15,95,193,252,233,244,7,248,9,204,
536 255,139,68,36,20,221,68,36,4,221,68,36,12,131,252,248,1,15,132,244,247,15,
537 135,244,248,222,193,195,248,1,222,252,233,195,248,2,131,252,248,3,15,132,
514 244,247,15,135,244,248,222,201,195,248,1,222,252,249,195,248,2,131,252,248, 538 244,247,15,135,244,248,222,201,195,248,1,222,252,249,195,248,2,131,252,248,
515 5,15,130,244,145,15,132,244,106,131,252,248,7,15,132,244,247,15,135,244,248, 539 5,15,130,244,145,15,132,244,106,131,252,248,7,15,132,244,247,15,135,244,248,
516 255,221,216,217,224,195,248,1,221,216,217,225,195,248,2,131,252,248,9,15, 540 255,221,216,217,224,195,248,1,221,216,217,225,195,248,2,131,252,248,9,15,
@@ -518,208 +542,208 @@ static const unsigned char build_actionlist[14716] = {
518 217,195,248,2,131,252,248,11,15,132,244,247,15,135,244,255,255,219,252,233, 542 217,195,248,2,131,252,248,11,15,132,244,247,15,135,244,255,255,219,252,233,
519 219,209,221,217,195,248,1,219,252,233,218,209,221,217,195,255,221,225,223, 543 219,209,221,217,195,248,1,219,252,233,218,209,221,217,195,255,221,225,223,
520 224,252,246,196,1,15,132,244,248,217,201,248,2,221,216,195,248,1,221,225, 544 224,252,246,196,1,15,132,244,248,217,201,248,2,221,216,195,248,1,221,225,
521 223,224,252,246,196,1,15,133,244,248,217,201,248,2,221,216,195,255,248,9, 545 223,224,252,246,196,1,15,133,244,248,217,201,248,2,221,216,195,255,248,152,
522 204,255,248,151,156,90,137,209,129,252,242,0,0,32,0,82,157,156,90,49,192, 546 156,90,137,209,129,252,242,0,0,32,0,82,157,156,90,49,192,57,209,15,132,244,
523 57,209,15,132,244,247,139,68,36,4,87,83,15,162,139,124,36,16,137,7,137,95, 547 247,139,68,36,4,87,83,15,162,139,124,36,16,137,7,137,95,4,137,79,8,137,87,
524 4,137,79,8,137,87,12,91,95,248,1,195,255,249,255,129,124,253,202,4,239,15, 548 12,91,95,248,1,195,255,249,255,129,124,253,202,4,239,15,135,244,41,129,124,
525 135,244,41,129,124,253,194,4,239,15,135,244,41,255,252,242,15,16,4,194,131, 549 253,194,4,239,15,135,244,41,255,252,242,15,16,4,194,131,198,4,102,15,46,4,
526 198,4,102,15,46,4,202,255,221,4,202,221,4,194,131,198,4,255,223,252,233,221, 550 202,255,221,4,202,221,4,194,131,198,4,255,223,252,233,221,216,255,218,252,
527 216,255,218,252,233,223,224,158,255,15,134,244,248,255,15,131,244,248,255, 551 233,223,224,158,255,15,134,244,248,255,15,131,244,248,255,248,1,15,183,70,
528 248,1,15,183,70,252,254,141,180,253,134,233,248,2,139,6,15,182,204,15,182, 552 252,254,141,180,253,134,233,248,2,139,6,15,182,204,15,182,232,131,198,4,193,
529 232,131,198,4,193,232,16,252,255,36,171,255,139,108,194,4,131,198,4,129,252, 553 232,16,252,255,36,171,255,139,108,194,4,131,198,4,129,252,253,239,15,135,
530 253,239,15,135,244,251,129,124,253,202,4,239,15,135,244,251,255,252,242,15, 554 244,251,129,124,253,202,4,239,15,135,244,251,255,252,242,15,16,4,194,102,
531 16,4,194,102,15,46,4,202,255,221,4,202,221,4,194,255,15,138,244,248,15,133, 555 15,46,4,202,255,221,4,202,221,4,194,255,15,138,244,248,15,133,244,248,255,
532 244,248,255,15,138,244,248,15,132,244,247,255,248,1,15,183,70,252,254,141, 556 15,138,244,248,15,132,244,247,255,248,1,15,183,70,252,254,141,180,253,134,
533 180,253,134,233,248,2,255,248,2,15,183,70,252,254,141,180,253,134,233,248, 557 233,248,2,255,248,2,15,183,70,252,254,141,180,253,134,233,248,1,255,248,5,
534 1,255,248,5,57,108,202,4,15,133,244,2,129,252,253,239,15,131,244,1,139,12, 558 57,108,202,4,15,133,244,2,129,252,253,239,15,131,244,1,139,12,202,139,4,194,
535 202,139,4,194,57,193,15,132,244,1,129,252,253,239,15,135,244,2,139,169,233, 559 57,193,15,132,244,1,129,252,253,239,15,135,244,2,139,169,233,133,252,237,
536 133,252,237,15,132,244,2,252,246,133,233,235,15,133,244,2,255,49,252,237, 560 15,132,244,2,252,246,133,233,235,15,133,244,2,255,49,252,237,255,189,1,0,
537 255,189,1,0,0,0,255,252,233,244,45,255,252,247,208,131,198,4,129,124,253, 561 0,0,255,252,233,244,45,255,252,247,208,131,198,4,129,124,253,202,4,239,15,
538 202,4,239,15,133,244,248,139,12,202,59,12,135,255,131,198,4,129,124,253,202, 562 133,244,248,139,12,202,59,12,135,255,131,198,4,129,124,253,202,4,239,15,135,
539 4,239,15,135,244,248,255,252,242,15,16,4,199,102,15,46,4,202,255,221,4,202, 563 244,248,255,252,242,15,16,4,199,102,15,46,4,202,255,221,4,202,221,4,199,255,
540 221,4,199,255,252,247,208,131,198,4,57,68,202,4,255,139,108,194,4,131,198, 564 252,247,208,131,198,4,57,68,202,4,255,139,108,194,4,131,198,4,129,252,253,
541 4,129,252,253,239,255,15,131,244,247,255,15,130,244,247,255,137,108,202,4, 565 239,255,15,131,244,247,255,15,130,244,247,255,137,108,202,4,139,44,194,137,
542 139,44,194,137,44,202,255,15,183,70,252,254,141,180,253,134,233,248,1,139, 566 44,202,255,15,183,70,252,254,141,180,253,134,233,248,1,139,6,15,182,204,15,
543 6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,139,108,194, 567 182,232,131,198,4,193,232,16,252,255,36,171,255,139,108,194,4,139,4,194,137,
544 4,139,4,194,137,108,202,4,137,4,202,139,6,15,182,204,15,182,232,131,198,4, 568 108,202,4,137,4,202,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,
545 193,232,16,252,255,36,171,255,49,252,237,129,124,253,194,4,239,129,213,239, 569 255,36,171,255,49,252,237,129,124,253,194,4,239,129,213,239,137,108,202,4,
546 137,108,202,4,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36, 570 139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,129,124,
547 171,255,129,124,253,194,4,239,15,135,244,48,255,252,242,15,16,4,194,184,0, 571 253,194,4,239,15,135,244,48,255,252,242,15,16,4,194,184,0,0,0,128,102,15,
548 0,0,128,102,15,110,200,102,15,112,201,81,15,87,193,252,242,15,17,4,202,255, 572 110,200,102,15,112,201,81,15,87,193,252,242,15,17,4,202,255,221,4,194,217,
549 221,4,194,217,224,221,28,202,255,129,124,253,194,4,239,15,133,244,248,139, 573 224,221,28,202,255,129,124,253,194,4,239,15,133,244,248,139,4,194,255,15,
550 4,194,255,15,87,192,252,242,15,42,128,233,248,1,252,242,15,17,4,202,255,219, 574 87,192,252,242,15,42,128,233,248,1,252,242,15,17,4,202,255,219,128,233,248,
551 128,233,248,1,221,28,202,255,139,6,15,182,204,15,182,232,131,198,4,193,232, 575 1,221,28,202,255,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,
552 16,252,255,36,171,248,2,129,124,253,194,4,239,15,133,244,50,139,12,194,137, 576 36,171,248,2,129,124,253,194,4,239,15,133,244,50,139,12,194,137,213,232,251,
553 213,232,251,1,18,255,252,242,15,42,192,137,252,234,255,137,4,36,137,252,234, 577 1,18,255,252,242,15,42,192,137,252,234,255,137,4,36,137,252,234,219,4,36,
554 219,4,36,255,15,182,78,252,253,252,233,244,1,255,15,182,252,236,15,182,192, 578 255,15,182,78,252,253,252,233,244,1,255,15,182,252,236,15,182,192,255,129,
555 255,129,124,253,252,234,4,239,15,135,244,46,255,252,242,15,16,4,252,234,252, 579 124,253,252,234,4,239,15,135,244,46,255,252,242,15,16,4,252,234,252,242,15,
556 242,15,88,4,199,255,221,4,252,234,220,4,199,255,129,124,253,252,234,4,239, 580 88,4,199,255,221,4,252,234,220,4,199,255,129,124,253,252,234,4,239,15,135,
557 15,135,244,47,255,252,242,15,16,4,199,252,242,15,88,4,252,234,255,221,4,199, 581 244,47,255,252,242,15,16,4,199,252,242,15,88,4,252,234,255,221,4,199,220,
558 220,4,252,234,255,129,124,253,252,234,4,239,15,135,244,49,129,124,253,194, 582 4,252,234,255,129,124,253,252,234,4,239,15,135,244,49,129,124,253,194,4,239,
559 4,239,15,135,244,49,255,252,242,15,16,4,252,234,252,242,15,88,4,194,255,221, 583 15,135,244,49,255,252,242,15,16,4,252,234,252,242,15,88,4,194,255,221,4,252,
560 4,252,234,220,4,194,255,252,242,15,16,4,252,234,252,242,15,92,4,199,255,221, 584 234,220,4,194,255,252,242,15,16,4,252,234,252,242,15,92,4,199,255,221,4,252,
561 4,252,234,220,36,199,255,252,242,15,16,4,199,252,242,15,92,4,252,234,255, 585 234,220,36,199,255,252,242,15,16,4,199,252,242,15,92,4,252,234,255,221,4,
562 221,4,199,220,36,252,234,255,252,242,15,16,4,252,234,252,242,15,92,4,194, 586 199,220,36,252,234,255,252,242,15,16,4,252,234,252,242,15,92,4,194,255,221,
563 255,221,4,252,234,220,36,194,255,252,242,15,16,4,252,234,252,242,15,89,4, 587 4,252,234,220,36,194,255,252,242,15,16,4,252,234,252,242,15,89,4,199,255,
564 199,255,221,4,252,234,220,12,199,255,252,242,15,16,4,199,252,242,15,89,4, 588 221,4,252,234,220,12,199,255,252,242,15,16,4,199,252,242,15,89,4,252,234,
565 252,234,255,221,4,199,220,12,252,234,255,252,242,15,16,4,252,234,252,242, 589 255,221,4,199,220,12,252,234,255,252,242,15,16,4,252,234,252,242,15,89,4,
566 15,89,4,194,255,221,4,252,234,220,12,194,255,252,242,15,16,4,252,234,252, 590 194,255,221,4,252,234,220,12,194,255,252,242,15,16,4,252,234,252,242,15,94,
567 242,15,94,4,199,255,221,4,252,234,220,52,199,255,252,242,15,16,4,199,252, 591 4,199,255,221,4,252,234,220,52,199,255,252,242,15,16,4,199,252,242,15,94,
568 242,15,94,4,252,234,255,221,4,199,220,52,252,234,255,252,242,15,16,4,252, 592 4,252,234,255,221,4,199,220,52,252,234,255,252,242,15,16,4,252,234,252,242,
569 234,252,242,15,94,4,194,255,221,4,252,234,220,52,194,255,252,242,15,16,4, 593 15,94,4,194,255,221,4,252,234,220,52,194,255,252,242,15,16,4,252,234,252,
570 252,234,252,242,15,16,12,199,255,221,4,252,234,221,4,199,255,252,242,15,16, 594 242,15,16,12,199,255,221,4,252,234,221,4,199,255,252,242,15,16,4,199,252,
571 4,199,252,242,15,16,12,252,234,255,221,4,199,221,4,252,234,255,252,242,15, 595 242,15,16,12,252,234,255,221,4,199,221,4,252,234,255,252,242,15,16,4,252,
572 16,4,252,234,252,242,15,16,12,194,255,221,4,252,234,221,4,194,255,248,152, 596 234,252,242,15,16,12,194,255,221,4,252,234,221,4,194,255,248,153,232,244,
573 232,244,145,255,252,233,244,152,255,232,244,106,255,15,182,252,236,15,182, 597 145,255,252,233,244,153,255,232,244,106,255,15,182,252,236,15,182,192,141,
574 192,141,12,194,41,232,137,76,36,4,137,68,36,8,248,33,139,108,36,48,137,44, 598 12,194,41,232,137,76,36,4,137,68,36,8,248,33,139,108,36,48,137,44,36,137,
575 36,137,116,36,24,137,149,233,232,251,1,23,139,149,233,133,192,15,133,244, 599 116,36,24,137,149,233,232,251,1,23,139,149,233,133,192,15,133,244,42,15,182,
576 42,15,182,110,252,255,15,182,78,252,253,139,68,252,234,4,139,44,252,234,137, 600 110,252,255,15,182,78,252,253,139,68,252,234,4,139,44,252,234,137,68,202,
577 68,202,4,137,44,202,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252, 601 4,137,44,202,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,
578 255,36,171,255,252,247,208,139,4,135,199,68,202,4,237,137,4,202,139,6,15, 602 171,255,252,247,208,139,4,135,199,68,202,4,237,137,4,202,139,6,15,182,204,
579 182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,15,191,192,252, 603 15,182,232,131,198,4,193,232,16,252,255,36,171,255,15,191,192,252,242,15,
580 242,15,42,192,252,242,15,17,4,202,255,223,70,252,254,221,28,202,255,252,242, 604 42,192,252,242,15,17,4,202,255,223,70,252,254,221,28,202,255,252,242,15,16,
581 15,16,4,199,252,242,15,17,4,202,255,221,4,199,221,28,202,255,252,247,208, 605 4,199,252,242,15,17,4,202,255,221,4,199,221,28,202,255,252,247,208,137,68,
582 137,68,202,4,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36, 606 202,4,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,
583 171,255,141,76,202,12,141,68,194,4,189,237,137,105,252,248,248,1,137,41,131, 607 141,76,202,12,141,68,194,4,189,237,137,105,252,248,248,1,137,41,131,193,8,
584 193,8,57,193,15,134,244,1,139,6,15,182,204,15,182,232,131,198,4,193,232,16, 608 57,193,15,134,244,1,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,
585 252,255,36,171,255,139,106,252,248,139,172,253,133,233,139,173,233,139,69, 609 255,36,171,255,139,106,252,248,139,172,253,133,233,139,173,233,139,69,4,139,
586 4,139,109,0,137,68,202,4,137,44,202,139,6,15,182,204,15,182,232,131,198,4, 610 109,0,137,68,202,4,137,44,202,139,6,15,182,204,15,182,232,131,198,4,193,232,
587 193,232,16,252,255,36,171,255,139,106,252,248,139,172,253,141,233,128,189, 611 16,252,255,36,171,255,139,106,252,248,139,172,253,141,233,128,189,233,0,139,
588 233,0,139,173,233,139,12,194,139,68,194,4,137,77,0,137,69,4,15,132,244,247, 612 173,233,139,12,194,139,68,194,4,137,77,0,137,69,4,15,132,244,247,252,246,
589 252,246,133,233,235,15,133,244,248,248,1,139,6,15,182,204,15,182,232,131, 613 133,233,235,15,133,244,248,248,1,139,6,15,182,204,15,182,232,131,198,4,193,
590 198,4,193,232,16,252,255,36,171,248,2,129,232,239,129,252,248,239,15,134, 614 232,16,252,255,36,171,248,2,129,232,239,129,252,248,239,15,134,244,1,252,
591 244,1,252,246,129,233,235,15,132,244,1,135,213,141,139,233,255,232,251,1, 615 246,129,233,235,15,132,244,1,135,213,141,139,233,255,232,251,1,24,137,252,
592 24,137,252,234,252,233,244,1,255,252,247,208,139,106,252,248,139,172,253, 616 234,252,233,244,1,255,252,247,208,139,106,252,248,139,172,253,141,233,139,
593 141,233,139,12,135,139,133,233,137,8,199,64,4,237,252,246,133,233,235,15, 617 12,135,139,133,233,137,8,199,64,4,237,252,246,133,233,235,15,133,244,248,
594 133,244,248,248,1,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255, 618 248,1,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,
595 36,171,248,2,252,246,129,233,235,15,132,244,1,128,189,233,0,15,132,244,1, 619 2,252,246,129,233,235,15,132,244,1,128,189,233,0,15,132,244,1,137,213,137,
596 137,213,137,194,141,139,233,232,251,1,24,137,252,234,252,233,244,1,255,139, 620 194,141,139,233,232,251,1,24,137,252,234,252,233,244,1,255,139,106,252,248,
597 106,252,248,255,252,242,15,16,4,199,255,139,172,253,141,233,139,141,233,255, 621 255,252,242,15,16,4,199,255,139,172,253,141,233,139,141,233,255,252,247,208,
598 252,247,208,139,106,252,248,139,172,253,141,233,139,141,233,137,65,4,139, 622 139,106,252,248,139,172,253,141,233,139,141,233,137,65,4,139,6,15,182,204,
599 6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,141,180,253, 623 15,182,232,131,198,4,193,232,16,252,255,36,171,255,141,180,253,134,233,139,
600 134,233,139,108,36,48,131,189,233,0,15,132,244,247,141,12,202,137,76,36,4, 624 108,36,48,131,189,233,0,15,132,244,247,141,12,202,137,76,36,4,137,44,36,137,
601 137,44,36,137,149,233,232,251,1,25,139,149,233,248,1,139,6,15,182,204,15, 625 149,233,232,251,1,25,139,149,233,248,1,139,6,15,182,204,15,182,232,131,198,
602 182,232,131,198,4,193,232,16,252,255,36,171,255,252,247,208,139,74,252,248, 626 4,193,232,16,252,255,36,171,255,252,247,208,139,74,252,248,139,4,135,139,
603 139,4,135,139,108,36,48,137,76,36,8,137,68,36,4,137,116,36,24,137,44,36,137, 627 108,36,48,137,76,36,8,137,68,36,4,137,116,36,24,137,44,36,137,149,233,232,
604 149,233,232,251,1,26,139,149,233,15,182,78,252,253,137,4,202,199,68,202,4, 628 251,1,26,139,149,233,15,182,78,252,253,137,4,202,199,68,202,4,237,139,6,15,
605 237,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,137, 629 182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,137,197,37,252,
606 197,37,252,255,7,0,0,193,252,237,11,61,252,255,7,0,0,15,148,209,137,108,36, 630 255,7,0,0,193,252,237,11,61,252,255,7,0,0,15,148,209,137,108,36,8,1,200,139,
607 8,1,200,139,108,36,48,1,200,137,68,36,4,137,116,36,24,139,139,233,137,44, 631 108,36,48,1,200,137,68,36,4,137,116,36,24,139,139,233,137,44,36,59,139,233,
608 36,59,139,233,137,149,233,15,131,244,248,248,1,232,251,1,27,139,149,233,15, 632 137,149,233,15,131,244,248,248,1,232,251,1,27,139,149,233,15,182,78,252,253,
609 182,78,252,253,137,4,202,199,68,202,4,237,139,6,15,182,204,15,182,232,131, 633 137,4,202,199,68,202,4,237,139,6,15,182,204,15,182,232,131,198,4,193,232,
610 198,4,193,232,16,252,255,36,171,248,2,137,252,233,232,251,1,28,252,233,244, 634 16,252,255,36,171,248,2,137,252,233,232,251,1,28,252,233,244,1,255,252,247,
611 1,255,252,247,208,139,108,36,48,139,139,233,137,116,36,24,59,139,233,137, 635 208,139,108,36,48,139,139,233,137,116,36,24,59,139,233,137,149,233,15,131,
612 149,233,15,131,244,249,248,2,139,20,135,137,252,233,232,251,1,29,139,149, 636 244,249,248,2,139,20,135,137,252,233,232,251,1,29,139,149,233,15,182,78,252,
613 233,15,182,78,252,253,137,4,202,199,68,202,4,237,139,6,15,182,204,15,182, 637 253,137,4,202,199,68,202,4,237,139,6,15,182,204,15,182,232,131,198,4,193,
614 232,131,198,4,193,232,16,252,255,36,171,248,3,137,252,233,232,251,1,28,15, 638 232,16,252,255,36,171,248,3,137,252,233,232,251,1,28,15,183,70,252,254,252,
615 183,70,252,254,252,247,208,252,233,244,2,255,252,247,208,139,106,252,248, 639 247,208,252,233,244,2,255,252,247,208,139,106,252,248,139,173,233,139,4,135,
616 139,173,233,139,4,135,252,233,244,153,255,252,247,208,139,106,252,248,139, 640 252,233,244,154,255,252,247,208,139,106,252,248,139,173,233,139,4,135,252,
617 173,233,139,4,135,252,233,244,154,255,15,182,252,236,15,182,192,129,124,253, 641 233,244,155,255,15,182,252,236,15,182,192,129,124,253,252,234,4,239,15,133,
618 252,234,4,239,15,133,244,36,139,44,252,234,129,124,253,194,4,239,15,135,244, 642 244,36,139,44,252,234,129,124,253,194,4,239,15,135,244,251,255,252,242,15,
619 251,255,252,242,15,16,4,194,252,242,15,45,192,252,242,15,42,200,102,15,46, 643 16,4,194,252,242,15,45,192,252,242,15,42,200,102,15,46,193,255,221,4,194,
620 193,255,221,4,194,219,20,36,219,4,36,255,139,4,36,255,15,133,244,36,59,133, 644 219,20,36,219,4,36,255,139,4,36,255,15,133,244,36,59,133,233,15,131,244,36,
621 233,15,131,244,36,193,224,3,3,133,233,129,120,253,4,239,15,132,244,248,248, 645 193,224,3,3,133,233,129,120,253,4,239,15,132,244,248,248,1,139,40,139,64,
622 1,139,40,139,64,4,137,44,202,137,68,202,4,139,6,15,182,204,15,182,232,131, 646 4,137,44,202,137,68,202,4,139,6,15,182,204,15,182,232,131,198,4,193,232,16,
623 198,4,193,232,16,252,255,36,171,248,2,131,189,233,0,15,132,244,1,139,141, 647 252,255,36,171,248,2,131,189,233,0,15,132,244,1,139,141,233,252,246,129,233,
624 233,252,246,129,233,235,15,132,244,36,15,182,78,252,253,252,233,244,1,248, 648 235,15,132,244,36,15,182,78,252,253,252,233,244,1,248,5,255,129,124,253,194,
625 5,255,129,124,253,194,4,239,15,133,244,36,139,4,194,252,233,244,153,255,15, 649 4,239,15,133,244,36,139,4,194,252,233,244,154,255,15,182,252,236,15,182,192,
626 182,252,236,15,182,192,252,247,208,139,4,135,129,124,253,252,234,4,239,15, 650 252,247,208,139,4,135,129,124,253,252,234,4,239,15,133,244,34,139,44,252,
627 133,244,34,139,44,252,234,248,153,139,141,233,35,136,233,105,201,239,3,141, 651 234,248,154,139,141,233,35,136,233,105,201,239,3,141,233,248,1,129,185,233,
628 233,248,1,129,185,233,239,15,133,244,250,57,129,233,15,133,244,250,129,121, 652 239,15,133,244,250,57,129,233,15,133,244,250,129,121,253,4,239,15,132,244,
629 253,4,239,15,132,244,251,15,182,70,252,253,139,41,139,73,4,137,44,194,248, 653 251,15,182,70,252,253,139,41,139,73,4,137,44,194,248,2,255,137,76,194,4,139,
630 2,255,137,76,194,4,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255, 654 6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,3,15,182,
631 36,171,248,3,15,182,70,252,253,185,237,252,233,244,2,248,4,139,137,233,133, 655 70,252,253,185,237,252,233,244,2,248,4,139,137,233,133,201,15,133,244,1,248,
632 201,15,133,244,1,248,5,139,141,233,133,201,15,132,244,3,252,246,129,233,235, 656 5,139,141,233,133,201,15,132,244,3,252,246,129,233,235,15,133,244,3,252,233,
633 15,133,244,3,252,233,244,34,255,15,182,252,236,15,182,192,129,124,253,252, 657 244,34,255,15,182,252,236,15,182,192,129,124,253,252,234,4,239,15,133,244,
634 234,4,239,15,133,244,35,139,44,252,234,59,133,233,15,131,244,35,193,224,3, 658 35,139,44,252,234,59,133,233,15,131,244,35,193,224,3,3,133,233,129,120,253,
635 3,133,233,129,120,253,4,239,15,132,244,248,248,1,139,40,139,64,4,137,44,202, 659 4,239,15,132,244,248,248,1,139,40,139,64,4,137,44,202,137,68,202,4,139,6,
636 137,68,202,4,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36, 660 15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,2,131,189,233,
637 171,248,2,131,189,233,0,15,132,244,1,139,141,233,252,246,129,233,235,15,132, 661 0,15,132,244,1,139,141,233,252,246,129,233,235,15,132,244,35,255,15,182,252,
638 244,35,255,15,182,252,236,15,182,192,129,124,253,252,234,4,239,15,133,244, 662 236,15,182,192,129,124,253,252,234,4,239,15,133,244,39,139,44,252,234,129,
639 39,139,44,252,234,129,124,253,194,4,239,15,135,244,251,255,15,133,244,39, 663 124,253,194,4,239,15,135,244,251,255,15,133,244,39,59,133,233,15,131,244,
640 59,133,233,15,131,244,39,193,224,3,3,133,233,129,120,253,4,239,15,132,244, 664 39,193,224,3,3,133,233,129,120,253,4,239,15,132,244,249,248,1,252,246,133,
641 249,248,1,252,246,133,233,235,15,133,244,253,248,2,139,108,202,4,139,12,202, 665 233,235,15,133,244,253,248,2,139,108,202,4,139,12,202,137,104,4,137,8,139,
642 137,104,4,137,8,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255, 666 6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,3,131,189,
643 36,171,248,3,131,189,233,0,15,132,244,1,139,141,233,255,252,246,129,233,235, 667 233,0,15,132,244,1,139,141,233,255,252,246,129,233,235,15,132,244,39,15,182,
644 15,132,244,39,15,182,78,252,253,252,233,244,1,248,5,129,124,253,194,4,239, 668 78,252,253,252,233,244,1,248,5,129,124,253,194,4,239,15,133,244,39,139,4,
645 15,133,244,39,139,4,194,252,233,244,154,248,7,128,165,233,235,139,139,233, 669 194,252,233,244,155,248,7,128,165,233,235,139,139,233,137,171,233,137,141,
646 137,171,233,137,141,233,15,182,78,252,253,252,233,244,2,255,15,182,252,236, 670 233,15,182,78,252,253,252,233,244,2,255,15,182,252,236,15,182,192,252,247,
647 15,182,192,252,247,208,139,4,135,129,124,253,252,234,4,239,15,133,244,37, 671 208,139,4,135,129,124,253,252,234,4,239,15,133,244,37,139,44,252,234,248,
648 139,44,252,234,248,154,139,141,233,35,136,233,105,201,239,198,133,233,0,3, 672 155,139,141,233,35,136,233,105,201,239,198,133,233,0,3,141,233,248,1,129,
649 141,233,248,1,129,185,233,239,15,133,244,251,57,129,233,15,133,244,251,129, 673 185,233,239,15,133,244,251,57,129,233,15,133,244,251,129,121,253,4,239,15,
650 121,253,4,239,15,132,244,250,248,2,255,252,246,133,233,235,15,133,244,253, 674 132,244,250,248,2,255,252,246,133,233,235,15,133,244,253,248,3,15,182,70,
651 248,3,15,182,70,252,253,139,108,194,4,139,4,194,137,105,4,137,1,139,6,15, 675 252,253,139,108,194,4,139,4,194,137,105,4,137,1,139,6,15,182,204,15,182,232,
652 182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,4,131,189,233, 676 131,198,4,193,232,16,252,255,36,171,248,4,131,189,233,0,15,132,244,2,137,
653 0,15,132,244,2,137,76,36,16,139,141,233,252,246,129,233,235,15,132,244,37, 677 76,36,16,139,141,233,252,246,129,233,235,15,132,244,37,139,76,36,16,252,233,
654 139,76,36,16,252,233,244,2,248,5,139,137,233,133,201,15,133,244,1,255,139, 678 244,2,248,5,139,137,233,133,201,15,133,244,1,255,139,141,233,133,201,15,132,
655 141,233,133,201,15,132,244,252,252,246,129,233,235,15,132,244,37,248,6,137, 679 244,252,252,246,129,233,235,15,132,244,37,248,6,137,68,36,16,199,68,36,20,
656 68,36,16,199,68,36,20,237,141,68,36,16,137,108,36,12,137,108,36,4,139,108, 680 237,141,68,36,16,137,108,36,12,137,108,36,4,139,108,36,48,137,68,36,8,137,
657 36,48,137,68,36,8,137,44,36,137,116,36,24,137,149,233,232,251,1,30,139,149, 681 44,36,137,116,36,24,137,149,233,232,251,1,30,139,149,233,139,108,36,12,137,
658 233,139,108,36,12,137,193,252,233,244,2,248,7,128,165,233,235,139,131,233, 682 193,252,233,244,2,248,7,128,165,233,235,139,131,233,137,171,233,137,133,233,
659 137,171,233,137,133,233,252,233,244,3,255,15,182,252,236,15,182,192,129,124, 683 252,233,244,3,255,15,182,252,236,15,182,192,129,124,253,252,234,4,239,15,
660 253,252,234,4,239,15,133,244,38,139,44,252,234,59,133,233,15,131,244,38,193, 684 133,244,38,139,44,252,234,59,133,233,15,131,244,38,193,224,3,3,133,233,129,
661 224,3,3,133,233,129,120,253,4,239,15,132,244,249,248,1,252,246,133,233,235, 685 120,253,4,239,15,132,244,249,248,1,252,246,133,233,235,15,133,244,253,248,
662 15,133,244,253,248,2,139,108,202,4,139,12,202,137,104,4,137,8,139,6,15,182, 686 2,139,108,202,4,139,12,202,137,104,4,137,8,139,6,15,182,204,15,182,232,131,
663 204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,3,131,189,233,0,15, 687 198,4,193,232,16,252,255,36,171,248,3,131,189,233,0,15,132,244,1,255,139,
664 132,244,1,255,139,141,233,252,246,129,233,235,15,132,244,38,15,182,78,252, 688 141,233,252,246,129,233,235,15,132,244,38,15,182,78,252,253,252,233,244,1,
665 253,252,233,244,1,248,7,128,165,233,235,139,139,233,137,171,233,137,141,233, 689 248,7,128,165,233,235,139,139,233,137,171,233,137,141,233,15,182,78,252,253,
666 15,182,78,252,253,252,233,244,2,255,137,124,36,16,255,221,4,199,219,92,36, 690 252,233,244,2,255,137,124,36,16,255,221,4,199,219,92,36,12,255,248,1,141,
667 12,255,248,1,141,12,202,139,105,252,248,252,246,133,233,235,15,133,244,253, 691 12,202,139,105,252,248,252,246,133,233,235,15,133,244,253,248,2,139,68,36,
668 248,2,139,68,36,20,255,252,242,15,45,252,248,255,139,124,36,12,255,131,232, 692 20,255,252,242,15,45,252,248,255,139,124,36,12,255,131,232,1,15,132,244,250,
669 1,15,132,244,250,1,252,248,59,133,233,15,131,244,251,41,252,248,193,231,3, 693 1,252,248,59,133,233,15,131,244,251,41,252,248,193,231,3,3,189,233,248,3,
670 3,189,233,248,3,139,41,137,47,139,105,4,131,193,8,137,111,4,131,199,8,131, 694 139,41,137,47,139,105,4,131,193,8,137,111,4,131,199,8,131,232,1,15,133,244,
671 232,1,15,133,244,3,248,4,139,124,36,16,139,6,15,182,204,15,182,232,131,198, 695 3,248,4,139,124,36,16,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,
672 4,193,232,16,252,255,36,171,248,5,137,108,36,4,139,108,36,48,137,68,36,8, 696 255,36,171,248,5,137,108,36,4,139,108,36,48,137,68,36,8,137,44,36,137,116,
673 137,44,36,137,116,36,24,137,149,233,232,251,1,31,139,149,233,15,182,78,252, 697 36,24,137,149,233,232,251,1,31,139,149,233,15,182,78,252,253,252,233,244,
674 253,252,233,244,1,248,7,128,165,233,235,139,131,233,137,171,233,255,137,133, 698 1,248,7,128,165,233,235,139,131,233,137,171,233,255,137,133,233,252,233,244,
675 233,252,233,244,2,255,3,68,36,20,255,141,76,202,8,139,105,252,248,129,121, 699 2,255,3,68,36,20,255,141,76,202,8,139,105,252,248,129,121,253,252,252,239,
676 253,252,252,239,15,133,244,29,252,255,165,233,255,141,76,202,8,137,215,139, 700 15,133,244,29,252,255,165,233,255,141,76,202,8,137,215,139,105,252,248,129,
677 105,252,248,129,121,253,252,252,239,15,133,244,29,248,51,139,114,252,252, 701 121,253,252,252,239,15,133,244,29,248,51,139,114,252,252,252,247,198,237,
678 252,247,198,237,15,133,244,253,248,1,137,106,252,248,137,68,36,20,131,232, 702 15,133,244,253,248,1,137,106,252,248,137,68,36,20,131,232,1,15,132,244,249,
679 1,15,132,244,249,248,2,139,41,137,47,139,105,4,137,111,4,131,199,8,131,193, 703 248,2,139,41,137,47,139,105,4,137,111,4,131,199,8,131,193,8,131,232,1,15,
680 8,131,232,1,15,133,244,2,139,106,252,248,248,3,137,209,128,189,233,1,15,135, 704 133,244,2,139,106,252,248,248,3,137,209,128,189,233,1,15,135,244,251,248,
681 244,251,248,4,139,68,36,20,252,255,165,233,248,5,255,252,247,198,237,15,133, 705 4,139,68,36,20,252,255,165,233,248,5,255,252,247,198,237,15,133,244,4,15,
682 244,4,15,182,70,252,253,252,247,208,141,20,194,139,122,252,248,139,191,233, 706 182,70,252,253,252,247,208,141,20,194,139,122,252,248,139,191,233,139,191,
683 139,191,233,252,233,244,4,248,7,15,139,244,1,131,230,252,248,41,252,242,137, 707 233,252,233,244,4,248,7,15,139,244,1,131,230,252,248,41,252,242,137,215,139,
684 215,139,114,252,252,252,233,244,1,255,141,76,202,8,139,105,232,139,65,252, 708 114,252,252,252,233,244,1,255,141,76,202,8,139,105,232,139,65,252,236,137,
685 236,137,41,137,65,4,139,105,252,240,139,65,252,244,137,105,8,137,65,12,139, 709 41,137,65,4,139,105,252,240,139,65,252,244,137,105,8,137,65,12,139,105,224,
686 105,224,139,65,228,137,105,252,248,137,65,252,252,129,252,248,239,184,3,0, 710 139,65,228,137,105,252,248,137,65,252,252,129,252,248,239,184,3,0,0,0,15,
687 0,0,15,133,244,29,252,255,165,233,255,15,182,252,236,139,66,252,248,141,12, 711 133,244,29,252,255,165,233,255,15,182,252,236,139,66,252,248,141,12,202,139,
688 202,139,128,233,15,182,128,233,137,124,36,16,141,188,253,194,233,43,122,252, 712 128,233,15,182,128,233,137,124,36,16,141,188,253,194,233,43,122,252,252,133,
689 252,133,252,237,15,132,244,251,141,108,252,233,252,248,57,215,15,131,244, 713 252,237,15,132,244,251,141,108,252,233,252,248,57,215,15,131,244,248,248,
690 248,248,1,139,71,252,248,137,1,139,71,252,252,131,199,8,137,65,4,131,193, 714 1,139,71,252,248,137,1,139,71,252,252,131,199,8,137,65,4,131,193,8,57,252,
691 8,57,252,233,15,131,244,249,57,215,15,130,244,1,248,2,199,65,4,237,131,193, 715 233,15,131,244,249,57,215,15,130,244,1,248,2,199,65,4,237,131,193,8,57,252,
692 8,57,252,233,15,130,244,2,248,3,139,124,36,16,139,6,15,182,204,15,182,232, 716 233,15,130,244,2,248,3,139,124,36,16,139,6,15,182,204,15,182,232,131,198,
693 131,198,4,193,232,16,252,255,36,171,248,5,199,68,36,20,1,0,0,0,137,208,41, 717 4,193,232,16,252,255,36,171,248,5,199,68,36,20,1,0,0,0,137,208,41,252,248,
694 252,248,15,134,244,3,255,137,197,193,252,237,3,137,108,36,4,131,197,1,137, 718 15,134,244,3,255,137,197,193,252,237,3,137,108,36,4,131,197,1,137,108,36,
695 108,36,20,139,108,36,48,1,200,59,133,233,15,135,244,253,248,6,139,71,252, 719 20,139,108,36,48,1,200,59,133,233,15,135,244,253,248,6,139,71,252,248,137,
696 248,137,1,139,71,252,252,131,199,8,137,65,4,131,193,8,57,215,15,130,244,6, 720 1,139,71,252,252,131,199,8,137,65,4,131,193,8,57,215,15,130,244,6,252,233,
697 252,233,244,3,248,7,137,149,233,137,141,233,137,116,36,24,41,215,137,44,36, 721 244,3,248,7,137,149,233,137,141,233,137,116,36,24,41,215,137,44,36,232,251,
698 232,251,1,0,139,149,233,139,141,233,1,215,252,233,244,6,255,193,225,3,255, 722 1,0,139,149,233,139,141,233,1,215,252,233,244,6,255,193,225,3,255,248,1,139,
699 248,1,139,114,252,252,137,68,36,20,252,247,198,237,15,133,244,253,255,248, 723 114,252,252,137,68,36,20,252,247,198,237,15,133,244,253,255,248,17,137,215,
700 17,137,215,131,232,1,15,132,244,249,248,2,139,44,15,137,111,252,248,139,108, 724 131,232,1,15,132,244,249,248,2,139,44,15,137,111,252,248,139,108,15,4,137,
701 15,4,137,111,252,252,131,199,8,131,232,1,15,133,244,2,248,3,139,68,36,20, 725 111,252,252,131,199,8,131,232,1,15,133,244,2,248,3,139,68,36,20,15,182,110,
702 15,182,110,252,255,248,5,57,197,15,135,244,252,255,139,108,10,4,137,106,252, 726 252,255,248,5,57,197,15,135,244,252,255,139,108,10,4,137,106,252,252,139,
703 252,139,44,10,137,106,252,248,255,248,5,56,70,252,255,15,135,244,252,255, 727 44,10,137,106,252,248,255,248,5,56,70,252,255,15,135,244,252,255,15,182,78,
704 15,182,78,252,253,252,247,209,141,20,202,139,122,252,248,139,191,233,139, 728 252,253,252,247,209,141,20,202,139,122,252,248,139,191,233,139,191,233,139,
705 191,233,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248, 729 6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,6,255,199,
706 6,255,199,71,252,252,237,131,199,8,255,199,68,194,252,244,237,255,131,192, 730 71,252,252,237,131,199,8,255,199,68,194,252,244,237,255,131,192,1,252,233,
707 1,252,233,244,5,248,7,15,139,244,18,131,230,252,248,41,252,242,255,1,252, 731 244,5,248,7,15,139,244,18,131,230,252,248,41,252,242,255,1,252,241,255,137,
708 241,255,137,252,245,209,252,237,129,229,239,102,131,172,253,43,233,1,15,132, 732 252,245,209,252,237,129,229,239,102,131,172,253,43,233,1,15,132,244,138,255,
709 244,138,255,141,12,202,255,129,121,253,4,239,15,135,244,52,129,121,253,12, 733 141,12,202,255,129,121,253,4,239,15,135,244,52,129,121,253,12,239,15,135,
710 239,15,135,244,52,255,139,105,20,255,129,252,253,239,15,135,244,52,255,252, 734 244,52,255,139,105,20,255,129,252,253,239,15,135,244,52,255,252,242,15,16,
711 242,15,16,1,252,242,15,16,73,8,255,252,242,15,88,65,16,252,242,15,17,1,133, 735 1,252,242,15,16,73,8,255,252,242,15,88,65,16,252,242,15,17,1,133,252,237,
712 252,237,15,136,244,249,255,15,140,244,249,255,102,15,46,200,248,1,252,242, 736 15,136,244,249,255,15,140,244,249,255,102,15,46,200,248,1,252,242,15,17,65,
713 15,17,65,24,255,221,65,8,221,1,255,220,65,16,221,17,221,81,24,133,252,237, 737 24,255,221,65,8,221,1,255,220,65,16,221,17,221,81,24,133,252,237,15,136,244,
714 15,136,244,247,255,221,81,24,15,140,244,247,255,217,201,248,1,255,15,183, 738 247,255,221,81,24,15,140,244,247,255,217,201,248,1,255,15,183,70,252,254,
715 70,252,254,255,15,131,244,248,141,180,253,134,233,255,141,180,253,134,233, 739 255,15,131,244,248,141,180,253,134,233,255,141,180,253,134,233,15,183,70,
716 15,183,70,252,254,15,131,245,255,15,130,244,248,141,180,253,134,233,255,248, 740 252,254,15,131,245,255,15,130,244,248,141,180,253,134,233,255,248,3,102,15,
717 3,102,15,46,193,252,233,244,1,255,141,12,202,139,105,4,129,252,253,239,15, 741 46,193,252,233,244,1,255,141,12,202,139,105,4,129,252,253,239,15,132,244,
718 132,244,247,255,137,105,252,252,139,41,137,105,252,248,252,233,245,255,141, 742 247,255,137,105,252,252,139,41,137,105,252,248,252,233,245,255,141,180,253,
719 180,253,134,233,139,1,137,105,252,252,137,65,252,248,255,139,139,233,139, 743 134,233,139,1,137,105,252,252,137,65,252,248,255,139,139,233,139,4,129,139,
720 4,129,139,128,233,139,108,36,48,137,147,233,137,171,233,252,255,224,255,141, 744 128,233,139,108,36,48,137,147,233,137,171,233,252,255,224,255,141,180,253,
721 180,253,134,233,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255, 745 134,233,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,
722 36,171,255,254,0 746 254,0
723}; 747};
724 748
725enum { 749enum {
@@ -861,7 +885,8 @@ enum {
861 GLOB_vm_mod, 885 GLOB_vm_mod,
862 GLOB_vm_exp2, 886 GLOB_vm_exp2,
863 GLOB_vm_exp2raw, 887 GLOB_vm_exp2raw,
864 GLOB_vm_powi, 888 GLOB_vm_pow_sse,
889 GLOB_vm_powi_sse,
865 GLOB_vm_foldfpm, 890 GLOB_vm_foldfpm,
866 GLOB_vm_foldarith, 891 GLOB_vm_foldarith,
867 GLOB_vm_cpuid, 892 GLOB_vm_cpuid,
@@ -1009,7 +1034,8 @@ static const char *const globnames[] = {
1009 "vm_mod", 1034 "vm_mod",
1010 "vm_exp2", 1035 "vm_exp2",
1011 "vm_exp2raw", 1036 "vm_exp2raw",
1012 "vm_powi", 1037 "vm_pow_sse",
1038 "vm_powi_sse",
1013 "vm_foldfpm", 1039 "vm_foldfpm",
1014 "vm_foldarith", 1040 "vm_foldarith",
1015 "vm_cpuid", 1041 "vm_cpuid",
@@ -1231,7 +1257,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1231 dasm_put(Dst, 5245); 1257 dasm_put(Dst, 5245);
1232 } 1258 }
1233 dasm_put(Dst, 5255, 2+1, LJ_TISNUM, LJ_TISNUM); 1259 dasm_put(Dst, 5255, 2+1, LJ_TISNUM, LJ_TISNUM);
1234 if (0 && sse) { // NYI 1260 if (sse) {
1235 dasm_put(Dst, 5307, 1+1, LJ_TISNUM, LJ_TISNUM); 1261 dasm_put(Dst, 5307, 1+1, LJ_TISNUM, LJ_TISNUM);
1236 } else { 1262 } else {
1237 dasm_put(Dst, 5354, 2+1, LJ_TISNUM, LJ_TISNUM); 1263 dasm_put(Dst, 5354, 2+1, LJ_TISNUM, LJ_TISNUM);
@@ -1456,56 +1482,66 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1456 dasm_put(Dst, 9407); 1482 dasm_put(Dst, 9407);
1457 } 1483 }
1458 dasm_put(Dst, 9454); 1484 dasm_put(Dst, 9454);
1485 if (!sse) {
1486 dasm_put(Dst, 9528);
1459 if (cmov) { 1487 if (cmov) {
1460 dasm_put(Dst, 9538); 1488 dasm_put(Dst, 9539);
1461 } else { 1489 } else {
1462 dasm_put(Dst, 9542); 1490 dasm_put(Dst, 9543);
1463 } 1491 }
1464 dasm_put(Dst, 9551); 1492 dasm_put(Dst, 9550);
1465 dasm_put(Dst, 9620); 1493 dasm_put(Dst, 9624);
1466 dasm_put(Dst, 9722); 1494 dasm_put(Dst, 9724);
1467 if (cmov) { 1495 if (cmov) {
1468 dasm_put(Dst, 9735); 1496 dasm_put(Dst, 9727);
1469 } else { 1497 } else {
1470 dasm_put(Dst, 9739); 1498 dasm_put(Dst, 9731);
1471 } 1499 }
1472 dasm_put(Dst, 9748); 1500 dasm_put(Dst, 9738);
1473 if (cmov) { 1501 if (cmov) {
1474 dasm_put(Dst, 9538); 1502 dasm_put(Dst, 9539);
1503 } else {
1504 dasm_put(Dst, 9543);
1505 }
1506 dasm_put(Dst, 9756);
1475 } else { 1507 } else {
1476 dasm_put(Dst, 9766); 1508 dasm_put(Dst, 9835);
1477 } 1509 }
1478 dasm_put(Dst, 9773); 1510 dasm_put(Dst, 9838);
1511 dasm_put(Dst, 9923);
1512 dasm_put(Dst, 10054);
1513 dasm_put(Dst, 10253);
1479 if (sse) { 1514 if (sse) {
1480 dasm_put(Dst, 9856); 1515 dasm_put(Dst, 10276);
1481 dasm_put(Dst, 9913); 1516 dasm_put(Dst, 10333);
1517 dasm_put(Dst, 10424);
1482 } else { 1518 } else {
1483 dasm_put(Dst, 9945); 1519 dasm_put(Dst, 10466);
1520 dasm_put(Dst, 10558);
1484 } 1521 }
1485 dasm_put(Dst, 9984); 1522 dasm_put(Dst, 10604);
1486 dasm_put(Dst, 10071);
1487 if (sse) { 1523 if (sse) {
1488 dasm_put(Dst, 10089); 1524 dasm_put(Dst, 10610);
1489 dasm_put(Dst, 10176); 1525 dasm_put(Dst, 10715);
1490 dasm_put(Dst, 10270); 1526 dasm_put(Dst, 10798);
1491 } else { 1527 } else {
1492 dasm_put(Dst, 10356); 1528 dasm_put(Dst, 10870);
1493 dasm_put(Dst, 10439); 1529 dasm_put(Dst, 10953);
1494 if (cmov) { 1530 if (cmov) {
1495 dasm_put(Dst, 10494); 1531 dasm_put(Dst, 11008);
1496 } else { 1532 } else {
1497 dasm_put(Dst, 10513); 1533 dasm_put(Dst, 11027);
1498 } 1534 }
1499 dasm_put(Dst, 10554); 1535 dasm_put(Dst, 10866);
1500 } 1536 }
1501 dasm_put(Dst, 10558); 1537 dasm_put(Dst, 11068);
1502} 1538}
1503 1539
1504/* Generate the code for a single instruction. */ 1540/* Generate the code for a single instruction. */
1505static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse) 1541static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
1506{ 1542{
1507 int vk = 0; 1543 int vk = 0;
1508 dasm_put(Dst, 10612, defop); 1544 dasm_put(Dst, 11122, defop);
1509 1545
1510 switch (op) { 1546 switch (op) {
1511 1547
@@ -1514,495 +1550,456 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
1514 /* Remember: all ops branch for a true comparison, fall through otherwise. */ 1550 /* Remember: all ops branch for a true comparison, fall through otherwise. */
1515 1551
1516 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 1552 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
1517 dasm_put(Dst, 10614, LJ_TISNUM, LJ_TISNUM); 1553 dasm_put(Dst, 11124, LJ_TISNUM, LJ_TISNUM);
1518 if (sse) { 1554 if (sse) {
1519 dasm_put(Dst, 10635); 1555 dasm_put(Dst, 11145);
1520 } else { 1556 } else {
1521 dasm_put(Dst, 10650); 1557 dasm_put(Dst, 11160);
1522 if (cmov) { 1558 if (cmov) {
1523 dasm_put(Dst, 10660); 1559 dasm_put(Dst, 11170);
1524 } else { 1560 } else {
1525 dasm_put(Dst, 10666); 1561 dasm_put(Dst, 11176);
1526 } 1562 }
1527 } 1563 }
1528 switch (op) { 1564 switch (op) {
1529 case BC_ISLT: 1565 case BC_ISLT:
1530 dasm_put(Dst, 10673); 1566 dasm_put(Dst, 11183);
1531 break; 1567 break;
1532 case BC_ISGE: 1568 case BC_ISGE:
1533 dasm_put(Dst, 10434); 1569 dasm_put(Dst, 10419);
1534 break; 1570 break;
1535 case BC_ISLE: 1571 case BC_ISLE:
1536 dasm_put(Dst, 6614); 1572 dasm_put(Dst, 6614);
1537 break; 1573 break;
1538 case BC_ISGT: 1574 case BC_ISGT:
1539 dasm_put(Dst, 10678); 1575 dasm_put(Dst, 11188);
1540 break; 1576 break;
1541 default: break; /* Shut up GCC. */ 1577 default: break; /* Shut up GCC. */
1542 } 1578 }
1543 dasm_put(Dst, 10683, -BCBIAS_J*4); 1579 dasm_put(Dst, 11193, -BCBIAS_J*4);
1544 break; 1580 break;
1545 1581
1546 case BC_ISEQV: case BC_ISNEV: 1582 case BC_ISEQV: case BC_ISNEV:
1547 vk = op == BC_ISEQV; 1583 vk = op == BC_ISEQV;
1548 dasm_put(Dst, 10716, LJ_TISNUM, LJ_TISNUM); 1584 dasm_put(Dst, 11226, LJ_TISNUM, LJ_TISNUM);
1549 if (sse) { 1585 if (sse) {
1550 dasm_put(Dst, 10742); 1586 dasm_put(Dst, 11252);
1551 } else { 1587 } else {
1552 dasm_put(Dst, 10754); 1588 dasm_put(Dst, 11264);
1553 if (cmov) { 1589 if (cmov) {
1554 dasm_put(Dst, 10660); 1590 dasm_put(Dst, 11170);
1555 } else { 1591 } else {
1556 dasm_put(Dst, 10666); 1592 dasm_put(Dst, 11176);
1557 } 1593 }
1558 } 1594 }
1559 iseqne_fp: 1595 iseqne_fp:
1560 if (vk) { 1596 if (vk) {
1561 dasm_put(Dst, 10761); 1597 dasm_put(Dst, 11271);
1562 } else { 1598 } else {
1563 dasm_put(Dst, 10770); 1599 dasm_put(Dst, 11280);
1564 } 1600 }
1565 iseqne_end: 1601 iseqne_end:
1566 if (vk) { 1602 if (vk) {
1567 dasm_put(Dst, 10779, -BCBIAS_J*4); 1603 dasm_put(Dst, 11289, -BCBIAS_J*4);
1568 } else { 1604 } else {
1569 dasm_put(Dst, 10794, -BCBIAS_J*4); 1605 dasm_put(Dst, 11304, -BCBIAS_J*4);
1570 } 1606 }
1571 dasm_put(Dst, 8621); 1607 dasm_put(Dst, 8621);
1572 if (op == BC_ISEQV || op == BC_ISNEV) { 1608 if (op == BC_ISEQV || op == BC_ISNEV) {
1573 dasm_put(Dst, 10809, LJ_TISPRI, LJ_TISTABUD, Dt6(->metatable), Dt6(->nomm), 1<<MM_eq); 1609 dasm_put(Dst, 11319, LJ_TISPRI, LJ_TISTABUD, Dt6(->metatable), Dt6(->nomm), 1<<MM_eq);
1574 if (vk) { 1610 if (vk) {
1575 dasm_put(Dst, 10867); 1611 dasm_put(Dst, 11377);
1576 } else { 1612 } else {
1577 dasm_put(Dst, 10871); 1613 dasm_put(Dst, 11381);
1578 } 1614 }
1579 dasm_put(Dst, 10877); 1615 dasm_put(Dst, 11387);
1580 } 1616 }
1581 break; 1617 break;
1582 case BC_ISEQS: case BC_ISNES: 1618 case BC_ISEQS: case BC_ISNES:
1583 vk = op == BC_ISEQS; 1619 vk = op == BC_ISEQS;
1584 dasm_put(Dst, 10882, LJ_TSTR); 1620 dasm_put(Dst, 11392, LJ_TSTR);
1585 iseqne_test: 1621 iseqne_test:
1586 if (vk) { 1622 if (vk) {
1587 dasm_put(Dst, 10765); 1623 dasm_put(Dst, 11275);
1588 } else { 1624 } else {
1589 dasm_put(Dst, 10774); 1625 dasm_put(Dst, 10553);
1590 } 1626 }
1591 goto iseqne_end; 1627 goto iseqne_end;
1592 case BC_ISEQN: case BC_ISNEN: 1628 case BC_ISEQN: case BC_ISNEN:
1593 vk = op == BC_ISEQN; 1629 vk = op == BC_ISEQN;
1594 dasm_put(Dst, 10905, LJ_TISNUM); 1630 dasm_put(Dst, 11415, LJ_TISNUM);
1595 if (sse) { 1631 if (sse) {
1596 dasm_put(Dst, 10919); 1632 dasm_put(Dst, 11429);
1597 } else { 1633 } else {
1598 dasm_put(Dst, 10931); 1634 dasm_put(Dst, 11441);
1599 if (cmov) { 1635 if (cmov) {
1600 dasm_put(Dst, 10660); 1636 dasm_put(Dst, 11170);
1601 } else { 1637 } else {
1602 dasm_put(Dst, 10666); 1638 dasm_put(Dst, 11176);
1603 } 1639 }
1604 } 1640 }
1605 goto iseqne_fp; 1641 goto iseqne_fp;
1606 case BC_ISEQP: case BC_ISNEP: 1642 case BC_ISEQP: case BC_ISNEP:
1607 vk = op == BC_ISEQP; 1643 vk = op == BC_ISEQP;
1608 dasm_put(Dst, 10938); 1644 dasm_put(Dst, 11448);
1609 goto iseqne_test; 1645 goto iseqne_test;
1610 1646
1611 /* -- Unary test and copy ops ------------------------------------------- */ 1647 /* -- Unary test and copy ops ------------------------------------------- */
1612 1648
1613 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: 1649 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
1614 dasm_put(Dst, 10949, LJ_TISTRUECOND); 1650 dasm_put(Dst, 11459, LJ_TISTRUECOND);
1615 if (op == BC_IST || op == BC_ISTC) { 1651 if (op == BC_IST || op == BC_ISTC) {
1616 dasm_put(Dst, 10961); 1652 dasm_put(Dst, 11471);
1617 } else { 1653 } else {
1618 dasm_put(Dst, 10966); 1654 dasm_put(Dst, 11476);
1619 } 1655 }
1620 if (op == BC_ISTC || op == BC_ISFC) { 1656 if (op == BC_ISTC || op == BC_ISFC) {
1621 dasm_put(Dst, 10971); 1657 dasm_put(Dst, 11481);
1622 } 1658 }
1623 dasm_put(Dst, 10982, -BCBIAS_J*4); 1659 dasm_put(Dst, 11492, -BCBIAS_J*4);
1624 break; 1660 break;
1625 1661
1626 /* -- Unary ops --------------------------------------------------------- */ 1662 /* -- Unary ops --------------------------------------------------------- */
1627 1663
1628 case BC_MOV: 1664 case BC_MOV:
1629 dasm_put(Dst, 11013); 1665 dasm_put(Dst, 11523);
1630 break; 1666 break;
1631 case BC_NOT: 1667 case BC_NOT:
1632 dasm_put(Dst, 11046, LJ_TISTRUECOND, LJ_TTRUE); 1668 dasm_put(Dst, 11556, LJ_TISTRUECOND, LJ_TTRUE);
1633 break; 1669 break;
1634 case BC_UNM: 1670 case BC_UNM:
1635 dasm_put(Dst, 11081, LJ_TISNUM); 1671 dasm_put(Dst, 11591, LJ_TISNUM);
1636 if (sse) { 1672 if (sse) {
1637 dasm_put(Dst, 11092); 1673 dasm_put(Dst, 11602);
1638 } else { 1674 } else {
1639 dasm_put(Dst, 11122); 1675 dasm_put(Dst, 11632);
1640 } 1676 }
1641 dasm_put(Dst, 8621); 1677 dasm_put(Dst, 8621);
1642 break; 1678 break;
1643 case BC_LEN: 1679 case BC_LEN:
1644 dasm_put(Dst, 11131, LJ_TSTR); 1680 dasm_put(Dst, 11641, LJ_TSTR);
1645 if (sse) { 1681 if (sse) {
1646 dasm_put(Dst, 11145, Dt5(->len)); 1682 dasm_put(Dst, 11655, Dt5(->len));
1647 } else { 1683 } else {
1648 dasm_put(Dst, 11163, Dt5(->len)); 1684 dasm_put(Dst, 11673, Dt5(->len));
1649 } 1685 }
1650 dasm_put(Dst, 11172, LJ_TTAB); 1686 dasm_put(Dst, 11682, LJ_TTAB);
1651 if (sse) { 1687 if (sse) {
1652 dasm_put(Dst, 11212); 1688 dasm_put(Dst, 11722);
1653 } else { 1689 } else {
1654 dasm_put(Dst, 11221); 1690 dasm_put(Dst, 11731);
1655 } 1691 }
1656 dasm_put(Dst, 11231); 1692 dasm_put(Dst, 11741);
1657 break; 1693 break;
1658 1694
1659 /* -- Binary ops -------------------------------------------------------- */ 1695 /* -- Binary ops -------------------------------------------------------- */
1660 1696
1661 1697
1662 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 1698 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
1663 dasm_put(Dst, 11241); 1699 dasm_put(Dst, 11751);
1664 vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 1700 vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
1665 switch (vk) { 1701 switch (vk) {
1666 case 0: 1702 case 0:
1667 dasm_put(Dst, 11249, LJ_TISNUM); 1703 dasm_put(Dst, 11759, LJ_TISNUM);
1668 if (sse) { 1704 if (sse) {
1669 dasm_put(Dst, 11261); 1705 dasm_put(Dst, 11771);
1670 } else { 1706 } else {
1671 dasm_put(Dst, 11275); 1707 dasm_put(Dst, 11785);
1672 } 1708 }
1673 break; 1709 break;
1674 case 1: 1710 case 1:
1675 dasm_put(Dst, 11283, LJ_TISNUM); 1711 dasm_put(Dst, 11793, LJ_TISNUM);
1676 if (sse) { 1712 if (sse) {
1677 dasm_put(Dst, 11295); 1713 dasm_put(Dst, 11805);
1678 } else { 1714 } else {
1679 dasm_put(Dst, 11309); 1715 dasm_put(Dst, 11819);
1680 } 1716 }
1681 break; 1717 break;
1682 default: 1718 default:
1683 dasm_put(Dst, 11317, LJ_TISNUM, LJ_TISNUM); 1719 dasm_put(Dst, 11827, LJ_TISNUM, LJ_TISNUM);
1684 if (sse) { 1720 if (sse) {
1685 dasm_put(Dst, 11339); 1721 dasm_put(Dst, 11849);
1686 } else { 1722 } else {
1687 dasm_put(Dst, 11353); 1723 dasm_put(Dst, 11863);
1688 } 1724 }
1689 break; 1725 break;
1690 } 1726 }
1691 if (sse) { 1727 if (sse) {
1692 dasm_put(Dst, 11115); 1728 dasm_put(Dst, 11625);
1693 } else { 1729 } else {
1694 dasm_put(Dst, 11127); 1730 dasm_put(Dst, 11637);
1695 } 1731 }
1696 dasm_put(Dst, 8621); 1732 dasm_put(Dst, 8621);
1697 break; 1733 break;
1698 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 1734 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
1699 dasm_put(Dst, 11241); 1735 dasm_put(Dst, 11751);
1700 vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 1736 vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
1701 switch (vk) { 1737 switch (vk) {
1702 case 0: 1738 case 0:
1703 dasm_put(Dst, 11249, LJ_TISNUM); 1739 dasm_put(Dst, 11759, LJ_TISNUM);
1704 if (sse) { 1740 if (sse) {
1705 dasm_put(Dst, 11361); 1741 dasm_put(Dst, 11871);
1706 } else { 1742 } else {
1707 dasm_put(Dst, 11375); 1743 dasm_put(Dst, 11885);
1708 } 1744 }
1709 break; 1745 break;
1710 case 1: 1746 case 1:
1711 dasm_put(Dst, 11283, LJ_TISNUM); 1747 dasm_put(Dst, 11793, LJ_TISNUM);
1712 if (sse) { 1748 if (sse) {
1713 dasm_put(Dst, 11383); 1749 dasm_put(Dst, 11893);
1714 } else { 1750 } else {
1715 dasm_put(Dst, 11397); 1751 dasm_put(Dst, 11907);
1716 } 1752 }
1717 break; 1753 break;
1718 default: 1754 default:
1719 dasm_put(Dst, 11317, LJ_TISNUM, LJ_TISNUM); 1755 dasm_put(Dst, 11827, LJ_TISNUM, LJ_TISNUM);
1720 if (sse) { 1756 if (sse) {
1721 dasm_put(Dst, 11405); 1757 dasm_put(Dst, 11915);
1722 } else { 1758 } else {
1723 dasm_put(Dst, 11419); 1759 dasm_put(Dst, 11929);
1724 } 1760 }
1725 break; 1761 break;
1726 } 1762 }
1727 if (sse) { 1763 if (sse) {
1728 dasm_put(Dst, 11115); 1764 dasm_put(Dst, 11625);
1729 } else { 1765 } else {
1730 dasm_put(Dst, 11127); 1766 dasm_put(Dst, 11637);
1731 } 1767 }
1732 dasm_put(Dst, 8621); 1768 dasm_put(Dst, 8621);
1733 break; 1769 break;
1734 case BC_MULVN: case BC_MULNV: case BC_MULVV: 1770 case BC_MULVN: case BC_MULNV: case BC_MULVV:
1735 dasm_put(Dst, 11241); 1771 dasm_put(Dst, 11751);
1736 vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 1772 vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
1737 switch (vk) { 1773 switch (vk) {
1738 case 0: 1774 case 0:
1739 dasm_put(Dst, 11249, LJ_TISNUM); 1775 dasm_put(Dst, 11759, LJ_TISNUM);
1740 if (sse) { 1776 if (sse) {
1741 dasm_put(Dst, 11427); 1777 dasm_put(Dst, 11937);
1742 } else { 1778 } else {
1743 dasm_put(Dst, 11441); 1779 dasm_put(Dst, 11951);
1744 } 1780 }
1745 break; 1781 break;
1746 case 1: 1782 case 1:
1747 dasm_put(Dst, 11283, LJ_TISNUM); 1783 dasm_put(Dst, 11793, LJ_TISNUM);
1748 if (sse) { 1784 if (sse) {
1749 dasm_put(Dst, 11449); 1785 dasm_put(Dst, 11959);
1750 } else { 1786 } else {
1751 dasm_put(Dst, 11463); 1787 dasm_put(Dst, 11973);
1752 } 1788 }
1753 break; 1789 break;
1754 default: 1790 default:
1755 dasm_put(Dst, 11317, LJ_TISNUM, LJ_TISNUM); 1791 dasm_put(Dst, 11827, LJ_TISNUM, LJ_TISNUM);
1756 if (sse) { 1792 if (sse) {
1757 dasm_put(Dst, 11471); 1793 dasm_put(Dst, 11981);
1758 } else { 1794 } else {
1759 dasm_put(Dst, 11485); 1795 dasm_put(Dst, 11995);
1760 } 1796 }
1761 break; 1797 break;
1762 } 1798 }
1763 if (sse) { 1799 if (sse) {
1764 dasm_put(Dst, 11115); 1800 dasm_put(Dst, 11625);
1765 } else { 1801 } else {
1766 dasm_put(Dst, 11127); 1802 dasm_put(Dst, 11637);
1767 } 1803 }
1768 dasm_put(Dst, 8621); 1804 dasm_put(Dst, 8621);
1769 break; 1805 break;
1770 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 1806 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
1771 dasm_put(Dst, 11241); 1807 dasm_put(Dst, 11751);
1772 vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 1808 vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
1773 switch (vk) { 1809 switch (vk) {
1774 case 0: 1810 case 0:
1775 dasm_put(Dst, 11249, LJ_TISNUM); 1811 dasm_put(Dst, 11759, LJ_TISNUM);
1776 if (sse) { 1812 if (sse) {
1777 dasm_put(Dst, 11493); 1813 dasm_put(Dst, 12003);
1778 } else { 1814 } else {
1779 dasm_put(Dst, 11507); 1815 dasm_put(Dst, 12017);
1780 } 1816 }
1781 break; 1817 break;
1782 case 1: 1818 case 1:
1783 dasm_put(Dst, 11283, LJ_TISNUM); 1819 dasm_put(Dst, 11793, LJ_TISNUM);
1784 if (sse) { 1820 if (sse) {
1785 dasm_put(Dst, 11515); 1821 dasm_put(Dst, 12025);
1786 } else { 1822 } else {
1787 dasm_put(Dst, 11529); 1823 dasm_put(Dst, 12039);
1788 } 1824 }
1789 break; 1825 break;
1790 default: 1826 default:
1791 dasm_put(Dst, 11317, LJ_TISNUM, LJ_TISNUM); 1827 dasm_put(Dst, 11827, LJ_TISNUM, LJ_TISNUM);
1792 if (sse) { 1828 if (sse) {
1793 dasm_put(Dst, 11537); 1829 dasm_put(Dst, 12047);
1794 } else { 1830 } else {
1795 dasm_put(Dst, 11551); 1831 dasm_put(Dst, 12061);
1796 } 1832 }
1797 break; 1833 break;
1798 } 1834 }
1799 if (sse) { 1835 if (sse) {
1800 dasm_put(Dst, 11115); 1836 dasm_put(Dst, 11625);
1801 } else { 1837 } else {
1802 dasm_put(Dst, 11127); 1838 dasm_put(Dst, 11637);
1803 } 1839 }
1804 dasm_put(Dst, 8621); 1840 dasm_put(Dst, 8621);
1805 break; 1841 break;
1806 case BC_MODVN: 1842 case BC_MODVN:
1807 dasm_put(Dst, 11241); 1843 dasm_put(Dst, 11751);
1808 vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 1844 vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
1809 switch (vk) { 1845 switch (vk) {
1810 case 0: 1846 case 0:
1811 dasm_put(Dst, 11249, LJ_TISNUM); 1847 dasm_put(Dst, 11759, LJ_TISNUM);
1812 if (sse) { 1848 if (sse) {
1813 dasm_put(Dst, 11559); 1849 dasm_put(Dst, 12069);
1814 } else { 1850 } else {
1815 dasm_put(Dst, 11573); 1851 dasm_put(Dst, 12083);
1816 } 1852 }
1817 break; 1853 break;
1818 case 1: 1854 case 1:
1819 dasm_put(Dst, 11283, LJ_TISNUM); 1855 dasm_put(Dst, 11793, LJ_TISNUM);
1820 if (sse) { 1856 if (sse) {
1821 dasm_put(Dst, 11581); 1857 dasm_put(Dst, 12091);
1822 } else { 1858 } else {
1823 dasm_put(Dst, 11595); 1859 dasm_put(Dst, 12105);
1824 } 1860 }
1825 break; 1861 break;
1826 default: 1862 default:
1827 dasm_put(Dst, 11317, LJ_TISNUM, LJ_TISNUM); 1863 dasm_put(Dst, 11827, LJ_TISNUM, LJ_TISNUM);
1828 if (sse) { 1864 if (sse) {
1829 dasm_put(Dst, 11603); 1865 dasm_put(Dst, 12113);
1830 } else { 1866 } else {
1831 dasm_put(Dst, 11617); 1867 dasm_put(Dst, 12127);
1832 } 1868 }
1833 break; 1869 break;
1834 } 1870 }
1835 dasm_put(Dst, 11625); 1871 dasm_put(Dst, 12135);
1836 if (sse) { 1872 if (sse) {
1837 dasm_put(Dst, 11115); 1873 dasm_put(Dst, 11625);
1838 } else { 1874 } else {
1839 dasm_put(Dst, 11127); 1875 dasm_put(Dst, 11637);
1840 } 1876 }
1841 dasm_put(Dst, 8621); 1877 dasm_put(Dst, 8621);
1842 break; 1878 break;
1843 case BC_MODNV: case BC_MODVV: 1879 case BC_MODNV: case BC_MODVV:
1844 dasm_put(Dst, 11241); 1880 dasm_put(Dst, 11751);
1845 vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 1881 vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
1846 switch (vk) { 1882 switch (vk) {
1847 case 0: 1883 case 0:
1848 dasm_put(Dst, 11249, LJ_TISNUM); 1884 dasm_put(Dst, 11759, LJ_TISNUM);
1849 if (sse) { 1885 if (sse) {
1850 dasm_put(Dst, 11559); 1886 dasm_put(Dst, 12069);
1851 } else { 1887 } else {
1852 dasm_put(Dst, 11573); 1888 dasm_put(Dst, 12083);
1853 } 1889 }
1854 break; 1890 break;
1855 case 1: 1891 case 1:
1856 dasm_put(Dst, 11283, LJ_TISNUM); 1892 dasm_put(Dst, 11793, LJ_TISNUM);
1857 if (sse) { 1893 if (sse) {
1858 dasm_put(Dst, 11581); 1894 dasm_put(Dst, 12091);
1859 } else { 1895 } else {
1860 dasm_put(Dst, 11595); 1896 dasm_put(Dst, 12105);
1861 } 1897 }
1862 break; 1898 break;
1863 default: 1899 default:
1864 dasm_put(Dst, 11317, LJ_TISNUM, LJ_TISNUM); 1900 dasm_put(Dst, 11827, LJ_TISNUM, LJ_TISNUM);
1865 if (sse) { 1901 if (sse) {
1866 dasm_put(Dst, 11603); 1902 dasm_put(Dst, 12113);
1867 } else { 1903 } else {
1868 dasm_put(Dst, 11617); 1904 dasm_put(Dst, 12127);
1869 } 1905 }
1870 break; 1906 break;
1871 } 1907 }
1872 dasm_put(Dst, 11631); 1908 dasm_put(Dst, 12141);
1873 break; 1909 break;
1874 case BC_POW: 1910 case BC_POW:
1911 dasm_put(Dst, 11751);
1912 vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
1913 switch (vk) {
1914 case 0:
1915 dasm_put(Dst, 11759, LJ_TISNUM);
1875 if (sse) { 1916 if (sse) {
1876 sse = 0; /* NYI: temporary workaround. */ 1917 dasm_put(Dst, 12069);
1877 dasm_put(Dst, 11241);
1878 vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
1879 switch (vk) {
1880 case 0:
1881 dasm_put(Dst, 11249, LJ_TISNUM);
1882 if (sse) {
1883 dasm_put(Dst, 11559);
1884 } else {
1885 dasm_put(Dst, 11573);
1886 }
1887 break;
1888 case 1:
1889 dasm_put(Dst, 11283, LJ_TISNUM);
1890 if (sse) {
1891 dasm_put(Dst, 11581);
1892 } else {
1893 dasm_put(Dst, 11595);
1894 }
1895 break;
1896 default:
1897 dasm_put(Dst, 11317, LJ_TISNUM, LJ_TISNUM);
1898 if (sse) {
1899 dasm_put(Dst, 11603);
1900 } else {
1901 dasm_put(Dst, 11617);
1902 }
1903 break;
1904 }
1905 dasm_put(Dst, 11636);
1906 if (sse) {
1907 dasm_put(Dst, 11115);
1908 } else {
1909 dasm_put(Dst, 11127);
1910 }
1911 sse = 1;
1912 } else { 1918 } else {
1913 dasm_put(Dst, 11241); 1919 dasm_put(Dst, 12083);
1914 vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 1920 }
1915 switch (vk) { 1921 break;
1916 case 0: 1922 case 1:
1917 dasm_put(Dst, 11249, LJ_TISNUM); 1923 dasm_put(Dst, 11793, LJ_TISNUM);
1918 if (sse) { 1924 if (sse) {
1919 dasm_put(Dst, 11559); 1925 dasm_put(Dst, 12091);
1920 } else { 1926 } else {
1921 dasm_put(Dst, 11573); 1927 dasm_put(Dst, 12105);
1922 } 1928 }
1923 break; 1929 break;
1924 case 1: 1930 default:
1925 dasm_put(Dst, 11283, LJ_TISNUM); 1931 dasm_put(Dst, 11827, LJ_TISNUM, LJ_TISNUM);
1926 if (sse) { 1932 if (sse) {
1927 dasm_put(Dst, 11581); 1933 dasm_put(Dst, 12113);
1928 } else { 1934 } else {
1929 dasm_put(Dst, 11595); 1935 dasm_put(Dst, 12127);
1930 } 1936 }
1931 break; 1937 break;
1932 default: 1938 }
1933 dasm_put(Dst, 11317, LJ_TISNUM, LJ_TISNUM); 1939 dasm_put(Dst, 12146);
1934 if (sse) { 1940 if (sse) {
1935 dasm_put(Dst, 11603); 1941 dasm_put(Dst, 11625);
1936 } else { 1942 } else {
1937 dasm_put(Dst, 11617); 1943 dasm_put(Dst, 11637);
1938 }
1939 break;
1940 }
1941 dasm_put(Dst, 11636);
1942 if (sse) {
1943 dasm_put(Dst, 11115);
1944 } else {
1945 dasm_put(Dst, 11127);
1946 }
1947 } 1944 }
1948 dasm_put(Dst, 8621); 1945 dasm_put(Dst, 8621);
1949 break; 1946 break;
1950 1947
1951 case BC_CAT: 1948 case BC_CAT:
1952 dasm_put(Dst, 11640, Dt1(->base), Dt1(->base)); 1949 dasm_put(Dst, 12150, Dt1(->base), Dt1(->base));
1953 break; 1950 break;
1954 1951
1955 /* -- Constant ops ------------------------------------------------------ */ 1952 /* -- Constant ops ------------------------------------------------------ */
1956 1953
1957 case BC_KSTR: 1954 case BC_KSTR:
1958 dasm_put(Dst, 11734, LJ_TSTR); 1955 dasm_put(Dst, 12244, LJ_TSTR);
1959 break; 1956 break;
1960 case BC_KSHORT: 1957 case BC_KSHORT:
1961 if (sse) { 1958 if (sse) {
1962 dasm_put(Dst, 11767); 1959 dasm_put(Dst, 12277);
1963 } else { 1960 } else {
1964 dasm_put(Dst, 11782); 1961 dasm_put(Dst, 12292);
1965 } 1962 }
1966 dasm_put(Dst, 8621); 1963 dasm_put(Dst, 8621);
1967 break; 1964 break;
1968 case BC_KNUM: 1965 case BC_KNUM:
1969 if (sse) { 1966 if (sse) {
1970 dasm_put(Dst, 11790); 1967 dasm_put(Dst, 12300);
1971 } else { 1968 } else {
1972 dasm_put(Dst, 11803); 1969 dasm_put(Dst, 12313);
1973 } 1970 }
1974 dasm_put(Dst, 8621); 1971 dasm_put(Dst, 8621);
1975 break; 1972 break;
1976 case BC_KPRI: 1973 case BC_KPRI:
1977 dasm_put(Dst, 11810); 1974 dasm_put(Dst, 12320);
1978 break; 1975 break;
1979 case BC_KNIL: 1976 case BC_KNIL:
1980 dasm_put(Dst, 11836, LJ_TNIL); 1977 dasm_put(Dst, 12346, LJ_TNIL);
1981 break; 1978 break;
1982 1979
1983 /* -- Upvalue and function ops ------------------------------------------ */ 1980 /* -- Upvalue and function ops ------------------------------------------ */
1984 1981
1985 case BC_UGET: 1982 case BC_UGET:
1986 dasm_put(Dst, 11882, offsetof(GCfuncL, uvptr), DtA(->v)); 1983 dasm_put(Dst, 12392, offsetof(GCfuncL, uvptr), DtA(->v));
1987 break; 1984 break;
1988 case BC_USETV: 1985 case BC_USETV:
1989#define TV2MARKOFS \ 1986#define TV2MARKOFS \
1990 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) 1987 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
1991 dasm_put(Dst, 11926, offsetof(GCfuncL, uvptr), DtA(->closed), DtA(->v), TV2MARKOFS, LJ_GC_BLACK, LJ_TISGCV, LJ_TISNUM - LJ_TISGCV, Dt4(->gch.marked), LJ_GC_WHITES, GG_DISP2G); 1988 dasm_put(Dst, 12436, offsetof(GCfuncL, uvptr), DtA(->closed), DtA(->v), TV2MARKOFS, LJ_GC_BLACK, LJ_TISGCV, LJ_TISNUM - LJ_TISGCV, Dt4(->gch.marked), LJ_GC_WHITES, GG_DISP2G);
1992 dasm_put(Dst, 12016); 1989 dasm_put(Dst, 12526);
1993 break; 1990 break;
1994#undef TV2MARKOFS 1991#undef TV2MARKOFS
1995 case BC_USETS: 1992 case BC_USETS:
1996 dasm_put(Dst, 12028, offsetof(GCfuncL, uvptr), DtA(->v), LJ_TSTR, DtA(->marked), LJ_GC_BLACK, Dt4(->gch.marked), LJ_GC_WHITES, DtA(->closed), GG_DISP2G); 1993 dasm_put(Dst, 12538, offsetof(GCfuncL, uvptr), DtA(->v), LJ_TSTR, DtA(->marked), LJ_GC_BLACK, Dt4(->gch.marked), LJ_GC_WHITES, DtA(->closed), GG_DISP2G);
1997 break; 1994 break;
1998 case BC_USETN: 1995 case BC_USETN:
1999 dasm_put(Dst, 12119); 1996 dasm_put(Dst, 12629);
2000 if (sse) { 1997 if (sse) {
2001 dasm_put(Dst, 12124); 1998 dasm_put(Dst, 12634);
2002 } else { 1999 } else {
2003 dasm_put(Dst, 10934); 2000 dasm_put(Dst, 11444);
2004 } 2001 }
2005 dasm_put(Dst, 12131, offsetof(GCfuncL, uvptr), DtA(->v)); 2002 dasm_put(Dst, 12641, offsetof(GCfuncL, uvptr), DtA(->v));
2006 if (sse) { 2003 if (sse) {
2007 dasm_put(Dst, 4988); 2004 dasm_put(Dst, 4988);
2008 } else { 2005 } else {
@@ -2011,159 +2008,159 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
2011 dasm_put(Dst, 8621); 2008 dasm_put(Dst, 8621);
2012 break; 2009 break;
2013 case BC_USETP: 2010 case BC_USETP:
2014 dasm_put(Dst, 12140, offsetof(GCfuncL, uvptr), DtA(->v)); 2011 dasm_put(Dst, 12650, offsetof(GCfuncL, uvptr), DtA(->v));
2015 break; 2012 break;
2016 case BC_UCLO: 2013 case BC_UCLO:
2017 dasm_put(Dst, 12177, -BCBIAS_J*4, Dt1(->openupval), Dt1(->base), Dt1(->base)); 2014 dasm_put(Dst, 12687, -BCBIAS_J*4, Dt1(->openupval), Dt1(->base), Dt1(->base));
2018 break; 2015 break;
2019 2016
2020 case BC_FNEW: 2017 case BC_FNEW:
2021 dasm_put(Dst, 12235, Dt1(->base), Dt1(->base), LJ_TFUNC); 2018 dasm_put(Dst, 12745, Dt1(->base), Dt1(->base), LJ_TFUNC);
2022 break; 2019 break;
2023 2020
2024 /* -- Table ops --------------------------------------------------------- */ 2021 /* -- Table ops --------------------------------------------------------- */
2025 2022
2026 case BC_TNEW: 2023 case BC_TNEW:
2027 dasm_put(Dst, 12306, DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold), Dt1(->base), Dt1(->base), LJ_TTAB); 2024 dasm_put(Dst, 12816, DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold), Dt1(->base), Dt1(->base), LJ_TTAB);
2028 break; 2025 break;
2029 case BC_TDUP: 2026 case BC_TDUP:
2030 dasm_put(Dst, 12417, DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold), Dt1(->base), Dt1(->base), LJ_TTAB); 2027 dasm_put(Dst, 12927, DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold), Dt1(->base), Dt1(->base), LJ_TTAB);
2031 break; 2028 break;
2032 2029
2033 case BC_GGET: 2030 case BC_GGET:
2034 dasm_put(Dst, 12509, Dt7(->env)); 2031 dasm_put(Dst, 13019, Dt7(->env));
2035 break; 2032 break;
2036 case BC_GSET: 2033 case BC_GSET:
2037 dasm_put(Dst, 12527, Dt7(->env)); 2034 dasm_put(Dst, 13037, Dt7(->env));
2038 break; 2035 break;
2039 2036
2040 case BC_TGETV: 2037 case BC_TGETV:
2041 dasm_put(Dst, 12545, LJ_TTAB, LJ_TISNUM); 2038 dasm_put(Dst, 13055, LJ_TTAB, LJ_TISNUM);
2042 if (sse) { 2039 if (sse) {
2043 dasm_put(Dst, 12578); 2040 dasm_put(Dst, 13088);
2044 } else { 2041 } else {
2045 dasm_put(Dst, 12599); 2042 dasm_put(Dst, 13109);
2046 if (cmov) { 2043 if (cmov) {
2047 dasm_put(Dst, 10660); 2044 dasm_put(Dst, 11170);
2048 } else { 2045 } else {
2049 dasm_put(Dst, 10666); 2046 dasm_put(Dst, 11176);
2050 } 2047 }
2051 dasm_put(Dst, 12609); 2048 dasm_put(Dst, 13119);
2052 } 2049 }
2053 dasm_put(Dst, 12613, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->metatable), Dt6(->metatable), Dt6(->nomm), 1<<MM_index); 2050 dasm_put(Dst, 13123, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->metatable), Dt6(->metatable), Dt6(->nomm), 1<<MM_index);
2054 dasm_put(Dst, 12705, LJ_TSTR); 2051 dasm_put(Dst, 13215, LJ_TSTR);
2055 break; 2052 break;
2056 case BC_TGETS: 2053 case BC_TGETS:
2057 dasm_put(Dst, 12723, LJ_TTAB, Dt6(->hmask), Dt5(->hash), sizeof(Node), Dt6(->node), DtB(->key.it), LJ_TSTR, DtB(->key.gcr), LJ_TNIL); 2054 dasm_put(Dst, 13233, LJ_TTAB, Dt6(->hmask), Dt5(->hash), sizeof(Node), Dt6(->node), DtB(->key.it), LJ_TSTR, DtB(->key.gcr), LJ_TNIL);
2058 dasm_put(Dst, 12807, LJ_TNIL, DtB(->next), Dt6(->metatable), Dt6(->nomm), 1<<MM_index); 2055 dasm_put(Dst, 13317, LJ_TNIL, DtB(->next), Dt6(->metatable), Dt6(->nomm), 1<<MM_index);
2059 break; 2056 break;
2060 case BC_TGETB: 2057 case BC_TGETB:
2061 dasm_put(Dst, 12878, LJ_TTAB, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->metatable), Dt6(->metatable), Dt6(->nomm), 1<<MM_index); 2058 dasm_put(Dst, 13388, LJ_TTAB, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->metatable), Dt6(->metatable), Dt6(->nomm), 1<<MM_index);
2062 dasm_put(Dst, 11231); 2059 dasm_put(Dst, 11741);
2063 break; 2060 break;
2064 2061
2065 case BC_TSETV: 2062 case BC_TSETV:
2066 dasm_put(Dst, 12977, LJ_TTAB, LJ_TISNUM); 2063 dasm_put(Dst, 13487, LJ_TTAB, LJ_TISNUM);
2067 if (sse) { 2064 if (sse) {
2068 dasm_put(Dst, 12578); 2065 dasm_put(Dst, 13088);
2069 } else { 2066 } else {
2070 dasm_put(Dst, 12599); 2067 dasm_put(Dst, 13109);
2071 if (cmov) { 2068 if (cmov) {
2072 dasm_put(Dst, 10660); 2069 dasm_put(Dst, 11170);
2073 } else { 2070 } else {
2074 dasm_put(Dst, 10666); 2071 dasm_put(Dst, 11176);
2075 } 2072 }
2076 dasm_put(Dst, 12609); 2073 dasm_put(Dst, 13119);
2077 } 2074 }
2078 dasm_put(Dst, 13010, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable), Dt6(->metatable)); 2075 dasm_put(Dst, 13520, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable), Dt6(->metatable));
2079 dasm_put(Dst, 13093, Dt6(->nomm), 1<<MM_newindex, LJ_TSTR, Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist)); 2076 dasm_put(Dst, 13603, Dt6(->nomm), 1<<MM_newindex, LJ_TSTR, Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist));
2080 break; 2077 break;
2081 case BC_TSETS: 2078 case BC_TSETS:
2082 dasm_put(Dst, 13155, LJ_TTAB, Dt6(->hmask), Dt5(->hash), sizeof(Node), Dt6(->nomm), Dt6(->node), DtB(->key.it), LJ_TSTR, DtB(->key.gcr), LJ_TNIL); 2079 dasm_put(Dst, 13665, LJ_TTAB, Dt6(->hmask), Dt5(->hash), sizeof(Node), Dt6(->nomm), Dt6(->node), DtB(->key.it), LJ_TSTR, DtB(->key.gcr), LJ_TNIL);
2083 dasm_put(Dst, 13230, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable), Dt6(->metatable), Dt6(->nomm), 1<<MM_newindex, DtB(->next)); 2080 dasm_put(Dst, 13740, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable), Dt6(->metatable), Dt6(->nomm), 1<<MM_newindex, DtB(->next));
2084 dasm_put(Dst, 13322, Dt6(->metatable), Dt6(->nomm), 1<<MM_newindex, LJ_TSTR, Dt1(->base), Dt1(->base), Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist)); 2081 dasm_put(Dst, 13832, Dt6(->metatable), Dt6(->nomm), 1<<MM_newindex, LJ_TSTR, Dt1(->base), Dt1(->base), Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist));
2085 break; 2082 break;
2086 case BC_TSETB: 2083 case BC_TSETB:
2087 dasm_put(Dst, 13418, LJ_TTAB, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable)); 2084 dasm_put(Dst, 13928, LJ_TTAB, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable));
2088 dasm_put(Dst, 13516, Dt6(->metatable), Dt6(->nomm), 1<<MM_newindex, Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist)); 2085 dasm_put(Dst, 14026, Dt6(->metatable), Dt6(->nomm), 1<<MM_newindex, Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist));
2089 break; 2086 break;
2090 2087
2091 case BC_TSETM: 2088 case BC_TSETM:
2092 dasm_put(Dst, 13562); 2089 dasm_put(Dst, 14072);
2093 if (sse) { 2090 if (sse) {
2094 dasm_put(Dst, 12124); 2091 dasm_put(Dst, 12634);
2095 } else { 2092 } else {
2096 dasm_put(Dst, 13567); 2093 dasm_put(Dst, 14077);
2097 } 2094 }
2098 dasm_put(Dst, 13575, Dt6(->marked), LJ_GC_BLACK); 2095 dasm_put(Dst, 14085, Dt6(->marked), LJ_GC_BLACK);
2099 if (sse) { 2096 if (sse) {
2100 dasm_put(Dst, 13600); 2097 dasm_put(Dst, 14110);
2101 } else { 2098 } else {
2102 dasm_put(Dst, 13607); 2099 dasm_put(Dst, 14117);
2103 } 2100 }
2104 dasm_put(Dst, 13612, Dt6(->asize), Dt6(->array), Dt1(->base), Dt1(->base), Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain)); 2101 dasm_put(Dst, 14122, Dt6(->asize), Dt6(->array), Dt1(->base), Dt1(->base), Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain));
2105 dasm_put(Dst, 13740, Dt6(->gclist)); 2102 dasm_put(Dst, 14250, Dt6(->gclist));
2106 break; 2103 break;
2107 2104
2108 /* -- Calls and vararg handling ----------------------------------------- */ 2105 /* -- Calls and vararg handling ----------------------------------------- */
2109 2106
2110 case BC_CALL: case BC_CALLM: 2107 case BC_CALL: case BC_CALLM:
2111 dasm_put(Dst, 11245); 2108 dasm_put(Dst, 11755);
2112 if (op == BC_CALLM) { 2109 if (op == BC_CALLM) {
2113 dasm_put(Dst, 13748); 2110 dasm_put(Dst, 14258);
2114 } 2111 }
2115 dasm_put(Dst, 13753, LJ_TFUNC, Dt7(->gate)); 2112 dasm_put(Dst, 14263, LJ_TFUNC, Dt7(->gate));
2116 break; 2113 break;
2117 2114
2118 case BC_CALLMT: 2115 case BC_CALLMT:
2119 dasm_put(Dst, 13748); 2116 dasm_put(Dst, 14258);
2120 break; 2117 break;
2121 case BC_CALLT: 2118 case BC_CALLT:
2122 dasm_put(Dst, 13776, LJ_TFUNC, FRAME_TYPE, Dt7(->ffid), Dt7(->gate)); 2119 dasm_put(Dst, 14286, LJ_TFUNC, FRAME_TYPE, Dt7(->ffid), Dt7(->gate));
2123 dasm_put(Dst, 13881, FRAME_TYPE, Dt7(->pt), Dt9(->k)); 2120 dasm_put(Dst, 14391, FRAME_TYPE, Dt7(->pt), Dt9(->k));
2124 break; 2121 break;
2125 2122
2126 case BC_ITERC: 2123 case BC_ITERC:
2127 dasm_put(Dst, 13938, LJ_TFUNC, Dt7(->gate)); 2124 dasm_put(Dst, 14448, LJ_TFUNC, Dt7(->gate));
2128 break; 2125 break;
2129 2126
2130 case BC_VARG: 2127 case BC_VARG:
2131 dasm_put(Dst, 14000, Dt7(->pt), Dt9(->numparams), (8+FRAME_VARG), LJ_TNIL); 2128 dasm_put(Dst, 14510, Dt7(->pt), Dt9(->numparams), (8+FRAME_VARG), LJ_TNIL);
2132 dasm_put(Dst, 14144, Dt1(->maxstack), Dt1(->base), Dt1(->top), Dt1(->base), Dt1(->top)); 2129 dasm_put(Dst, 14654, Dt1(->maxstack), Dt1(->base), Dt1(->top), Dt1(->base), Dt1(->top));
2133 break; 2130 break;
2134 2131
2135 /* -- Returns ----------------------------------------------------------- */ 2132 /* -- Returns ----------------------------------------------------------- */
2136 2133
2137 case BC_RETM: 2134 case BC_RETM:
2138 dasm_put(Dst, 13748); 2135 dasm_put(Dst, 14258);
2139 break; 2136 break;
2140 2137
2141 case BC_RET: case BC_RET0: case BC_RET1: 2138 case BC_RET: case BC_RET0: case BC_RET1:
2142 if (op != BC_RET0) { 2139 if (op != BC_RET0) {
2143 dasm_put(Dst, 14239); 2140 dasm_put(Dst, 14749);
2144 } 2141 }
2145 dasm_put(Dst, 14243, FRAME_TYPE); 2142 dasm_put(Dst, 14753, FRAME_TYPE);
2146 switch (op) { 2143 switch (op) {
2147 case BC_RET: 2144 case BC_RET:
2148 dasm_put(Dst, 14262); 2145 dasm_put(Dst, 14772);
2149 break; 2146 break;
2150 case BC_RET1: 2147 case BC_RET1:
2151 dasm_put(Dst, 14320); 2148 dasm_put(Dst, 14830);
2152 /* fallthrough */ 2149 /* fallthrough */
2153 case BC_RET0: 2150 case BC_RET0:
2154 dasm_put(Dst, 14336); 2151 dasm_put(Dst, 14846);
2155 default: 2152 default:
2156 break; 2153 break;
2157 } 2154 }
2158 dasm_put(Dst, 14347, Dt7(->pt), Dt9(->k)); 2155 dasm_put(Dst, 14857, Dt7(->pt), Dt9(->k));
2159 if (op == BC_RET) { 2156 if (op == BC_RET) {
2160 dasm_put(Dst, 14389, LJ_TNIL); 2157 dasm_put(Dst, 14899, LJ_TNIL);
2161 } else { 2158 } else {
2162 dasm_put(Dst, 14398, LJ_TNIL); 2159 dasm_put(Dst, 14908, LJ_TNIL);
2163 } 2160 }
2164 dasm_put(Dst, 14405); 2161 dasm_put(Dst, 14915);
2165 if (op != BC_RET0) { 2162 if (op != BC_RET0) {
2166 dasm_put(Dst, 14426); 2163 dasm_put(Dst, 14936);
2167 } 2164 }
2168 dasm_put(Dst, 5084); 2165 dasm_put(Dst, 5084);
2169 break; 2166 break;
@@ -2173,7 +2170,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
2173 2170
2174 case BC_FORL: 2171 case BC_FORL:
2175#if LJ_HASJIT 2172#if LJ_HASJIT
2176 dasm_put(Dst, 14430, HOTCOUNT_PCMASK, GG_DISP2HOT); 2173 dasm_put(Dst, 14940, HOTCOUNT_PCMASK, GG_DISP2HOT);
2177#endif 2174#endif
2178 break; 2175 break;
2179 2176
@@ -2185,57 +2182,57 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
2185 case BC_FORI: 2182 case BC_FORI:
2186 case BC_IFORL: 2183 case BC_IFORL:
2187 vk = (op == BC_IFORL || op == BC_JFORL); 2184 vk = (op == BC_IFORL || op == BC_JFORL);
2188 dasm_put(Dst, 14451); 2185 dasm_put(Dst, 14961);
2189 if (!vk) { 2186 if (!vk) {
2190 dasm_put(Dst, 14455, LJ_TISNUM, LJ_TISNUM); 2187 dasm_put(Dst, 14965, LJ_TISNUM, LJ_TISNUM);
2191 } 2188 }
2192 dasm_put(Dst, 14474); 2189 dasm_put(Dst, 14984);
2193 if (!vk) { 2190 if (!vk) {
2194 dasm_put(Dst, 14478, LJ_TISNUM); 2191 dasm_put(Dst, 14988, LJ_TISNUM);
2195 } 2192 }
2196 if (sse) { 2193 if (sse) {
2197 dasm_put(Dst, 14487); 2194 dasm_put(Dst, 14997);
2198 if (vk) { 2195 if (vk) {
2199 dasm_put(Dst, 14499); 2196 dasm_put(Dst, 15009);
2200 } else { 2197 } else {
2201 dasm_put(Dst, 14518); 2198 dasm_put(Dst, 15028);
2202 } 2199 }
2203 dasm_put(Dst, 14523); 2200 dasm_put(Dst, 15033);
2204 } else { 2201 } else {
2205 dasm_put(Dst, 14536); 2202 dasm_put(Dst, 15046);
2206 if (vk) { 2203 if (vk) {
2207 dasm_put(Dst, 14542); 2204 dasm_put(Dst, 15052);
2208 } else { 2205 } else {
2209 dasm_put(Dst, 14558); 2206 dasm_put(Dst, 15068);
2210 } 2207 }
2211 dasm_put(Dst, 14566); 2208 dasm_put(Dst, 15076);
2212 if (cmov) { 2209 if (cmov) {
2213 dasm_put(Dst, 10660); 2210 dasm_put(Dst, 11170);
2214 } else { 2211 } else {
2215 dasm_put(Dst, 10666); 2212 dasm_put(Dst, 11176);
2216 } 2213 }
2217 if (!cmov) { 2214 if (!cmov) {
2218 dasm_put(Dst, 14571); 2215 dasm_put(Dst, 15081);
2219 } 2216 }
2220 } 2217 }
2221 if (op == BC_FORI) { 2218 if (op == BC_FORI) {
2222 dasm_put(Dst, 14577, -BCBIAS_J*4); 2219 dasm_put(Dst, 15087, -BCBIAS_J*4);
2223 } else if (op == BC_JFORI) { 2220 } else if (op == BC_JFORI) {
2224 dasm_put(Dst, 14587, -BCBIAS_J*4, BC_JLOOP); 2221 dasm_put(Dst, 15097, -BCBIAS_J*4, BC_JLOOP);
2225 } else if (op == BC_IFORL) { 2222 } else if (op == BC_IFORL) {
2226 dasm_put(Dst, 14601, -BCBIAS_J*4); 2223 dasm_put(Dst, 15111, -BCBIAS_J*4);
2227 } else { 2224 } else {
2228 dasm_put(Dst, 14597, BC_JLOOP); 2225 dasm_put(Dst, 15107, BC_JLOOP);
2229 } 2226 }
2230 dasm_put(Dst, 10695); 2227 dasm_put(Dst, 11205);
2231 if (sse) { 2228 if (sse) {
2232 dasm_put(Dst, 14611); 2229 dasm_put(Dst, 15121);
2233 } 2230 }
2234 break; 2231 break;
2235 2232
2236 case BC_ITERL: 2233 case BC_ITERL:
2237#if LJ_HASJIT 2234#if LJ_HASJIT
2238 dasm_put(Dst, 14430, HOTCOUNT_PCMASK, GG_DISP2HOT); 2235 dasm_put(Dst, 14940, HOTCOUNT_PCMASK, GG_DISP2HOT);
2239#endif 2236#endif
2240 break; 2237 break;
2241 2238
@@ -2244,18 +2241,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
2244 break; 2241 break;
2245#endif 2242#endif
2246 case BC_IITERL: 2243 case BC_IITERL:
2247 dasm_put(Dst, 14622, LJ_TNIL); 2244 dasm_put(Dst, 15132, LJ_TNIL);
2248 if (op == BC_JITERL) { 2245 if (op == BC_JITERL) {
2249 dasm_put(Dst, 14637, BC_JLOOP); 2246 dasm_put(Dst, 15147, BC_JLOOP);
2250 } else { 2247 } else {
2251 dasm_put(Dst, 14651, -BCBIAS_J*4); 2248 dasm_put(Dst, 15161, -BCBIAS_J*4);
2252 } 2249 }
2253 dasm_put(Dst, 10992); 2250 dasm_put(Dst, 11502);
2254 break; 2251 break;
2255 2252
2256 case BC_LOOP: 2253 case BC_LOOP:
2257#if LJ_HASJIT 2254#if LJ_HASJIT
2258 dasm_put(Dst, 14430, HOTCOUNT_PCMASK, GG_DISP2HOT); 2255 dasm_put(Dst, 14940, HOTCOUNT_PCMASK, GG_DISP2HOT);
2259#endif 2256#endif
2260 break; 2257 break;
2261 2258
@@ -2265,12 +2262,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
2265 2262
2266 case BC_JLOOP: 2263 case BC_JLOOP:
2267#if LJ_HASJIT 2264#if LJ_HASJIT
2268 dasm_put(Dst, 14667, DISPATCH_J(trace), DtD(->mcode), DISPATCH_GL(jit_base), DISPATCH_GL(jit_L)); 2265 dasm_put(Dst, 15177, DISPATCH_J(trace), DtD(->mcode), DISPATCH_GL(jit_base), DISPATCH_GL(jit_L));
2269#endif 2266#endif
2270 break; 2267 break;
2271 2268
2272 case BC_JMP: 2269 case BC_JMP:
2273 dasm_put(Dst, 14690, -BCBIAS_J*4); 2270 dasm_put(Dst, 15200, -BCBIAS_J*4);
2274 break; 2271 break;
2275 2272
2276 /* ---------------------------------------------------------------------- */ 2273 /* ---------------------------------------------------------------------- */
@@ -2298,7 +2295,7 @@ static int build_backend(BuildCtx *ctx)
2298 2295
2299 build_subroutines(ctx, cmov, sse); 2296 build_subroutines(ctx, cmov, sse);
2300 2297
2301 dasm_put(Dst, 14714); 2298 dasm_put(Dst, 15224);
2302 for (op = 0; op < BC__MAX; op++) 2299 for (op = 0; op < BC__MAX; op++)
2303 build_ins(ctx, (BCOp)op, op, cmov, sse); 2300 build_ins(ctx, (BCOp)op, op, cmov, sse);
2304 2301
diff --git a/src/lj_asm.c b/src/lj_asm.c
index c2cc4342..eb14b0e5 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1991,9 +1991,19 @@ static int fpmjoin_pow(ASMState *as, IRIns *ir)
1991 IRIns *irpp = IR(irp->op1); 1991 IRIns *irpp = IR(irp->op1);
1992 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1992 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1993 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1993 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1994 emit_call(as, lj_vm_pow); /* st0 = lj_vm_pow(st1, st0) */ 1994 /* The modified regs must match with the *.dasc implementation. */
1995 asm_x87load(as, irp->op2); 1995 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1996 asm_x87load(as, irpp->op1); 1996 IRIns *irx;
1997 if (ra_hasreg(ir->r))
1998 rset_clear(drop, ir->r); /* Dest reg handled below. */
1999 ra_evictset(as, drop);
2000 ra_destreg(as, ir, RID_XMM0);
2001 emit_call(as, lj_vm_pow_sse);
2002 irx = IR(irpp->op1);
2003 if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
2004 irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */
2005 ra_left(as, RID_XMM0, irpp->op1);
2006 ra_left(as, RID_XMM1, irp->op2);
1997 return 1; 2007 return 1;
1998 } 2008 }
1999 } 2009 }
@@ -2007,30 +2017,35 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
2007 Reg dest = ra_dest(as, ir, RSET_FPR); 2017 Reg dest = ra_dest(as, ir, RSET_FPR);
2008 Reg left = asm_fuseload(as, ir->op1, RSET_FPR); 2018 Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
2009 emit_mrm(as, XO_SQRTSD, dest, left); 2019 emit_mrm(as, XO_SQRTSD, dest, left);
2010 } else if ((as->flags & JIT_F_SSE4_1) && fpm <= IRFPM_TRUNC) {
2011 Reg dest = ra_dest(as, ir, RSET_FPR);
2012 Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
2013 /* Round down/up/trunc == 1001/1010/1011. */
2014 emit_i8(as, 0x09 + fpm);
2015 /* ROUNDSD has a 4-byte opcode which doesn't fit in x86Op. */
2016 emit_mrm(as, XO_ROUNDSD, dest, left);
2017 /* Let's pretend it's a 3-byte opcode, and compensate afterwards. */
2018 /* This is atrocious, but the alternatives are much worse. */
2019 if (LJ_64 && as->mcp[1] != (MCode)(XO_ROUNDSD >> 16)) {
2020 as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f; /* Swap 0F and REX. */
2021 }
2022 *--as->mcp = 0x66; /* 1st byte of ROUNDSD opcode. */
2023 } else if (fpm <= IRFPM_TRUNC) { 2020 } else if (fpm <= IRFPM_TRUNC) {
2024 /* The modified regs must match with the *.dasc implementation. */ 2021 if (as->flags & JIT_F_SSE4_1) { /* SSE4.1 has a rounding instruction. */
2025 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); 2022 Reg dest = ra_dest(as, ir, RSET_FPR);
2026 if (ra_hasreg(ir->r)) 2023 Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
2027 rset_clear(drop, ir->r); /* Dest reg handled below. */ 2024 /* ROUNDSD has a 4-byte opcode which doesn't fit in x86Op.
2028 ra_evictset(as, drop); 2025 ** Let's pretend it's a 3-byte opcode, and compensate afterwards.
2029 ra_destreg(as, ir, RID_XMM0); 2026 ** This is atrocious, but the alternatives are much worse.
2030 emit_call(as, fpm == IRFPM_FLOOR ? lj_vm_floor_sse : 2027 */
2031 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); 2028 /* Round down/up/trunc == 1001/1010/1011. */
2032 ra_left(as, RID_XMM0, ir->op1); 2029 emit_i8(as, 0x09 + fpm);
2033 } else { 2030 emit_mrm(as, XO_ROUNDSD, dest, left);
2031 if (LJ_64 && as->mcp[1] != (MCode)(XO_ROUNDSD >> 16)) {
2032 as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f; /* Swap 0F and REX. */
2033 }
2034 *--as->mcp = 0x66; /* 1st byte of ROUNDSD opcode. */
2035 } else { /* Call helper functions for SSE2 variant. */
2036 /* The modified regs must match with the *.dasc implementation. */
2037 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
2038 if (ra_hasreg(ir->r))
2039 rset_clear(drop, ir->r); /* Dest reg handled below. */
2040 ra_evictset(as, drop);
2041 ra_destreg(as, ir, RID_XMM0);
2042 emit_call(as, fpm == IRFPM_FLOOR ? lj_vm_floor_sse :
2043 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
2044 ra_left(as, RID_XMM0, ir->op1);
2045 }
2046 } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) {
2047 /* Rejoined to pow(). */
2048 } else { /* Handle x87 ops. */
2034 int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ 2049 int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
2035 Reg dest = ir->r; 2050 Reg dest = ir->r;
2036 if (ra_hasreg(dest)) { 2051 if (ra_hasreg(dest)) {
@@ -2040,14 +2055,8 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
2040 } 2055 }
2041 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); 2056 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
2042 switch (fpm) { /* st0 = lj_vm_*(st0) */ 2057 switch (fpm) { /* st0 = lj_vm_*(st0) */
2043 case IRFPM_FLOOR: emit_call(as, lj_vm_floor); break;
2044 case IRFPM_CEIL: emit_call(as, lj_vm_ceil); break;
2045 case IRFPM_TRUNC: emit_call(as, lj_vm_trunc); break;
2046 case IRFPM_EXP: emit_call(as, lj_vm_exp); break; 2058 case IRFPM_EXP: emit_call(as, lj_vm_exp); break;
2047 case IRFPM_EXP2: 2059 case IRFPM_EXP2: emit_call(as, lj_vm_exp2); break;
2048 if (fpmjoin_pow(as, ir)) return;
2049 emit_call(as, lj_vm_exp2); /* st0 = lj_vm_exp2(st0) */
2050 break;
2051 case IRFPM_SIN: emit_x87op(as, XI_FSIN); break; 2060 case IRFPM_SIN: emit_x87op(as, XI_FSIN); break;
2052 case IRFPM_COS: emit_x87op(as, XI_FCOS); break; 2061 case IRFPM_COS: emit_x87op(as, XI_FCOS); break;
2053 case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break; 2062 case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break;
@@ -2063,10 +2072,6 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
2063 emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break; 2072 emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break;
2064 case IR_LDEXP: 2073 case IR_LDEXP:
2065 emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break; 2074 emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break;
2066 case IR_POWI:
2067 emit_call(as, lj_vm_powi); /* st0 = lj_vm_powi(st0, [esp]) */
2068 emit_rmro(as, XO_MOVto, ra_alloc1(as, ir->op2, RSET_GPR), RID_ESP, 0);
2069 break;
2070 default: lua_assert(0); break; 2075 default: lua_assert(0); break;
2071 } 2076 }
2072 break; 2077 break;
@@ -2085,6 +2090,19 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
2085 } 2090 }
2086} 2091}
2087 2092
2093static void asm_powi(ASMState *as, IRIns *ir)
2094{
2095 /* The modified regs must match with the *.dasc implementation. */
2096 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX);
2097 if (ra_hasreg(ir->r))
2098 rset_clear(drop, ir->r); /* Dest reg handled below. */
2099 ra_evictset(as, drop);
2100 ra_destreg(as, ir, RID_XMM0);
2101 emit_call(as, lj_vm_powi_sse);
2102 ra_left(as, RID_XMM0, ir->op1);
2103 ra_left(as, RID_EAX, ir->op2);
2104}
2105
2088/* Find out whether swapping operands might be beneficial. */ 2106/* Find out whether swapping operands might be beneficial. */
2089static int swapops(ASMState *as, IRIns *ir) 2107static int swapops(ASMState *as, IRIns *ir)
2090{ 2108{
@@ -3132,9 +3150,10 @@ static void asm_ir(ASMState *as, IRIns *ir)
3132 case IR_MIN: asm_fparith(as, ir, XO_MINSD); break; 3150 case IR_MIN: asm_fparith(as, ir, XO_MINSD); break;
3133 case IR_MAX: asm_fparith(as, ir, XO_MAXSD); break; 3151 case IR_MAX: asm_fparith(as, ir, XO_MAXSD); break;
3134 3152
3135 case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: case IR_POWI: 3153 case IR_FPMATH: case IR_ATAN2: case IR_LDEXP:
3136 asm_fpmath(as, ir); 3154 asm_fpmath(as, ir);
3137 break; 3155 break;
3156 case IR_POWI: asm_powi(as, ir); break;
3138 3157
3139 /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ 3158 /* Overflow-checking arithmetic ops. Note: don't use LEA here! */
3140 case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; 3159 case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break;
@@ -3285,8 +3304,22 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
3285 if (inloop) 3304 if (inloop)
3286 as->modset = RSET_SCRATCH; 3305 as->modset = RSET_SCRATCH;
3287 break; 3306 break;
3307 case IR_POWI:
3308 ir->prev = REGSP_HINT(RID_XMM0);
3309 if (inloop)
3310 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX);
3311 continue;
3288 case IR_FPMATH: 3312 case IR_FPMATH:
3289 if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) { 3313 if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */
3314 ir->prev = REGSP_HINT(RID_XMM0);
3315#if !LJ_64
3316 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */
3317 as->evenspill = 4;
3318#endif
3319 if (inloop)
3320 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
3321 continue;
3322 } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
3290 ir->prev = REGSP_HINT(RID_XMM0); 3323 ir->prev = REGSP_HINT(RID_XMM0);
3291 if (inloop) 3324 if (inloop)
3292 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); 3325 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
diff --git a/src/lj_vm.h b/src/lj_vm.h
index 07adc36d..ed375747 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -34,16 +34,13 @@ LJ_ASMF void lj_vm_exit_handler(void);
34LJ_ASMF void lj_vm_exit_interp(void); 34LJ_ASMF void lj_vm_exit_interp(void);
35 35
36/* Handlers callable from compiled code. */ 36/* Handlers callable from compiled code. */
37LJ_ASMF void lj_vm_floor(void);
38LJ_ASMF void lj_vm_ceil(void);
39LJ_ASMF void lj_vm_trunc(void);
40LJ_ASMF void lj_vm_floor_sse(void); 37LJ_ASMF void lj_vm_floor_sse(void);
41LJ_ASMF void lj_vm_ceil_sse(void); 38LJ_ASMF void lj_vm_ceil_sse(void);
42LJ_ASMF void lj_vm_trunc_sse(void); 39LJ_ASMF void lj_vm_trunc_sse(void);
43LJ_ASMF void lj_vm_exp(void); 40LJ_ASMF void lj_vm_exp(void);
44LJ_ASMF void lj_vm_exp2(void); 41LJ_ASMF void lj_vm_exp2(void);
45LJ_ASMF void lj_vm_pow(void); 42LJ_ASMF void lj_vm_pow_sse(void);
46LJ_ASMF void lj_vm_powi(void); 43LJ_ASMF void lj_vm_powi_sse(void);
47 44
48/* Call gates for functions. */ 45/* Call gates for functions. */
49LJ_ASMF void lj_gate_lf(void); 46LJ_ASMF void lj_gate_lf(void);