diff options
Diffstat (limited to 'src/vm_x86.dasc')
-rw-r--r-- | src/vm_x86.dasc | 1788 |
1 files changed, 613 insertions, 1175 deletions
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 5b3356dc..bda9d7d7 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc | |||
@@ -18,7 +18,6 @@ | |||
18 | | | 18 | | |
19 | |.if P64 | 19 | |.if P64 |
20 | |.define X64, 1 | 20 | |.define X64, 1 |
21 | |.define SSE, 1 | ||
22 | |.if WIN | 21 | |.if WIN |
23 | |.define X64WIN, 1 | 22 | |.define X64WIN, 1 |
24 | |.endif | 23 | |.endif |
@@ -116,24 +115,74 @@ | |||
116 | |.type NODE, Node | 115 | |.type NODE, Node |
117 | |.type NARGS, int | 116 | |.type NARGS, int |
118 | |.type TRACE, GCtrace | 117 | |.type TRACE, GCtrace |
118 | |.type SBUF, SBuf | ||
119 | | | 119 | | |
120 | |// Stack layout while in interpreter. Must match with lj_frame.h. | 120 | |// Stack layout while in interpreter. Must match with lj_frame.h. |
121 | |//----------------------------------------------------------------------- | 121 | |//----------------------------------------------------------------------- |
122 | |.if not X64 // x86 stack layout. | 122 | |.if not X64 // x86 stack layout. |
123 | | | 123 | | |
124 | |.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). | 124 | |.if WIN |
125 | | | ||
126 | |.define CFRAME_SPACE, aword*9 // Delta for esp (see <--). | ||
125 | |.macro saveregs_ | 127 | |.macro saveregs_ |
126 | | push edi; push esi; push ebx | 128 | | push edi; push esi; push ebx |
129 | | push extern lj_err_unwind_win | ||
130 | | fs; push dword [0] | ||
131 | | fs; mov [0], esp | ||
127 | | sub esp, CFRAME_SPACE | 132 | | sub esp, CFRAME_SPACE |
128 | |.endmacro | 133 | |.endmacro |
129 | |.macro saveregs | 134 | |.macro restoreregs |
130 | | push ebp; saveregs_ | 135 | | add esp, CFRAME_SPACE |
136 | | fs; pop dword [0] | ||
137 | | pop edi // Short for esp += 4. | ||
138 | | pop ebx; pop esi; pop edi; pop ebp | ||
139 | |.endmacro | ||
140 | | | ||
141 | |.else | ||
142 | | | ||
143 | |.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). | ||
144 | |.macro saveregs_ | ||
145 | | push edi; push esi; push ebx | ||
146 | | sub esp, CFRAME_SPACE | ||
131 | |.endmacro | 147 | |.endmacro |
132 | |.macro restoreregs | 148 | |.macro restoreregs |
133 | | add esp, CFRAME_SPACE | 149 | | add esp, CFRAME_SPACE |
134 | | pop ebx; pop esi; pop edi; pop ebp | 150 | | pop ebx; pop esi; pop edi; pop ebp |
135 | |.endmacro | 151 | |.endmacro |
136 | | | 152 | | |
153 | |.endif | ||
154 | | | ||
155 | |.macro saveregs | ||
156 | | push ebp; saveregs_ | ||
157 | |.endmacro | ||
158 | | | ||
159 | |.if WIN | ||
160 | |.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only. | ||
161 | |.define SAVE_NRES, aword [esp+aword*18] | ||
162 | |.define SAVE_CFRAME, aword [esp+aword*17] | ||
163 | |.define SAVE_L, aword [esp+aword*16] | ||
164 | |//----- 16 byte aligned, ^^^ arguments from C caller | ||
165 | |.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter. | ||
166 | |.define SAVE_R4, aword [esp+aword*14] | ||
167 | |.define SAVE_R3, aword [esp+aword*13] | ||
168 | |.define SAVE_R2, aword [esp+aword*12] | ||
169 | |//----- 16 byte aligned | ||
170 | |.define SAVE_R1, aword [esp+aword*11] | ||
171 | |.define SEH_FUNC, aword [esp+aword*10] | ||
172 | |.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves. | ||
173 | |.define UNUSED2, aword [esp+aword*8] | ||
174 | |//----- 16 byte aligned | ||
175 | |.define UNUSED1, aword [esp+aword*7] | ||
176 | |.define SAVE_PC, aword [esp+aword*6] | ||
177 | |.define TMP2, aword [esp+aword*5] | ||
178 | |.define TMP1, aword [esp+aword*4] | ||
179 | |//----- 16 byte aligned | ||
180 | |.define ARG4, aword [esp+aword*3] | ||
181 | |.define ARG3, aword [esp+aword*2] | ||
182 | |.define ARG2, aword [esp+aword*1] | ||
183 | |.define ARG1, aword [esp] //<-- esp while in interpreter. | ||
184 | |//----- 16 byte aligned, ^^^ arguments for C callee | ||
185 | |.else | ||
137 | |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. | 186 | |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. |
138 | |.define SAVE_NRES, aword [esp+aword*14] | 187 | |.define SAVE_NRES, aword [esp+aword*14] |
139 | |.define SAVE_CFRAME, aword [esp+aword*13] | 188 | |.define SAVE_CFRAME, aword [esp+aword*13] |
@@ -154,6 +203,7 @@ | |||
154 | |.define ARG2, aword [esp+aword*1] | 203 | |.define ARG2, aword [esp+aword*1] |
155 | |.define ARG1, aword [esp] //<-- esp while in interpreter. | 204 | |.define ARG1, aword [esp] //<-- esp while in interpreter. |
156 | |//----- 16 byte aligned, ^^^ arguments for C callee | 205 | |//----- 16 byte aligned, ^^^ arguments for C callee |
206 | |.endif | ||
157 | | | 207 | | |
158 | |// FPARGx overlaps ARGx and ARG(x+1) on x86. | 208 | |// FPARGx overlaps ARGx and ARG(x+1) on x86. |
159 | |.define FPARG3, qword [esp+qword*1] | 209 | |.define FPARG3, qword [esp+qword*1] |
@@ -389,7 +439,6 @@ | |||
389 | | fpop | 439 | | fpop |
390 | |.endmacro | 440 | |.endmacro |
391 | | | 441 | | |
392 | |.macro fdup; fld st0; .endmacro | ||
393 | |.macro fpop1; fstp st1; .endmacro | 442 | |.macro fpop1; fstp st1; .endmacro |
394 | | | 443 | | |
395 | |// Synthesize SSE FP constants. | 444 | |// Synthesize SSE FP constants. |
@@ -552,6 +601,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
552 | |.else | 601 | |.else |
553 | | mov eax, FCARG2 // Error return status for vm_pcall. | 602 | | mov eax, FCARG2 // Error return status for vm_pcall. |
554 | | mov esp, FCARG1 | 603 | | mov esp, FCARG1 |
604 | |.if WIN | ||
605 | | lea FCARG1, SEH_NEXT | ||
606 | | fs; mov [0], FCARG1 | ||
607 | |.endif | ||
555 | |.endif | 608 | |.endif |
556 | |->vm_unwind_c_eh: // Landing pad for external unwinder. | 609 | |->vm_unwind_c_eh: // Landing pad for external unwinder. |
557 | | mov L:RB, SAVE_L | 610 | | mov L:RB, SAVE_L |
@@ -575,6 +628,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
575 | |.else | 628 | |.else |
576 | | and FCARG1, CFRAME_RAWMASK | 629 | | and FCARG1, CFRAME_RAWMASK |
577 | | mov esp, FCARG1 | 630 | | mov esp, FCARG1 |
631 | |.if WIN | ||
632 | | lea FCARG1, SEH_NEXT | ||
633 | | fs; mov [0], FCARG1 | ||
634 | |.endif | ||
578 | |.endif | 635 | |.endif |
579 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. | 636 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. |
580 | | mov L:RB, SAVE_L | 637 | | mov L:RB, SAVE_L |
@@ -588,6 +645,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
588 | | set_vmstate INTERP | 645 | | set_vmstate INTERP |
589 | | jmp ->vm_returnc // Increments RD/MULTRES and returns. | 646 | | jmp ->vm_returnc // Increments RD/MULTRES and returns. |
590 | | | 647 | | |
648 | |.if WIN and not X64 | ||
649 | |->vm_rtlunwind@16: // Thin layer around RtlUnwind. | ||
650 | | // (void *cframe, void *excptrec, void *unwinder, int errcode) | ||
651 | | mov [esp], FCARG1 // Return value for RtlUnwind. | ||
652 | | push FCARG2 // Exception record for RtlUnwind. | ||
653 | | push 0 // Ignored by RtlUnwind. | ||
654 | | push dword [FCARG1+CFRAME_OFS_SEH] | ||
655 | | call extern RtlUnwind@16 // Violates ABI (clobbers too much). | ||
656 | | mov FCARG1, eax | ||
657 | | mov FCARG2, [esp+4] // errcode (for vm_unwind_c). | ||
658 | | ret // Jump to unwinder. | ||
659 | |.endif | ||
660 | | | ||
591 | |//----------------------------------------------------------------------- | 661 | |//----------------------------------------------------------------------- |
592 | |//-- Grow stack for calls ----------------------------------------------- | 662 | |//-- Grow stack for calls ----------------------------------------------- |
593 | |//----------------------------------------------------------------------- | 663 | |//----------------------------------------------------------------------- |
@@ -643,17 +713,18 @@ static void build_subroutines(BuildCtx *ctx) | |||
643 | | lea KBASEa, [esp+CFRAME_RESUME] | 713 | | lea KBASEa, [esp+CFRAME_RESUME] |
644 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | 714 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. |
645 | | add DISPATCH, GG_G2DISP | 715 | | add DISPATCH, GG_G2DISP |
646 | | mov L:RB->cframe, KBASEa | ||
647 | | mov SAVE_PC, RD // Any value outside of bytecode is ok. | 716 | | mov SAVE_PC, RD // Any value outside of bytecode is ok. |
648 | | mov SAVE_CFRAME, RDa | 717 | | mov SAVE_CFRAME, RDa |
649 | |.if X64 | 718 | |.if X64 |
650 | | mov SAVE_NRES, RD | 719 | | mov SAVE_NRES, RD |
651 | | mov SAVE_ERRF, RD | 720 | | mov SAVE_ERRF, RD |
652 | |.endif | 721 | |.endif |
722 | | mov L:RB->cframe, KBASEa | ||
653 | | cmp byte L:RB->status, RDL | 723 | | cmp byte L:RB->status, RDL |
654 | | je >3 // Initial resume (like a call). | 724 | | je >2 // Initial resume (like a call). |
655 | | | 725 | | |
656 | | // Resume after yield (like a return). | 726 | | // Resume after yield (like a return). |
727 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
657 | | set_vmstate INTERP | 728 | | set_vmstate INTERP |
658 | | mov byte L:RB->status, RDL | 729 | | mov byte L:RB->status, RDL |
659 | | mov BASE, L:RB->base | 730 | | mov BASE, L:RB->base |
@@ -693,20 +764,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
693 | | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! | 764 | | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! |
694 | |.endif | 765 | |.endif |
695 | | | 766 | | |
767 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
696 | | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. | 768 | | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. |
697 | | mov SAVE_CFRAME, KBASEa | 769 | | mov SAVE_CFRAME, KBASEa |
698 | | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. | 770 | | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. |
771 | | add DISPATCH, GG_G2DISP | ||
699 | |.if X64 | 772 | |.if X64 |
700 | | mov L:RB->cframe, rsp | 773 | | mov L:RB->cframe, rsp |
701 | |.else | 774 | |.else |
702 | | mov L:RB->cframe, esp | 775 | | mov L:RB->cframe, esp |
703 | |.endif | 776 | |.endif |
704 | | | 777 | | |
705 | |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype). | 778 | |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). |
706 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | 779 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB |
707 | | add DISPATCH, GG_G2DISP | ||
708 | | | ||
709 | |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype). | ||
710 | | set_vmstate INTERP | 780 | | set_vmstate INTERP |
711 | | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). | 781 | | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). |
712 | | add PC, RA | 782 | | add PC, RA |
@@ -744,14 +814,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
744 | | | 814 | | |
745 | | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). | 815 | | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). |
746 | | sub KBASE, L:RB->top | 816 | | sub KBASE, L:RB->top |
817 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
747 | | mov SAVE_ERRF, 0 // No error function. | 818 | | mov SAVE_ERRF, 0 // No error function. |
748 | | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. | 819 | | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. |
820 | | add DISPATCH, GG_G2DISP | ||
749 | | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). | 821 | | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). |
750 | | | 822 | | |
751 | |.if X64 | 823 | |.if X64 |
752 | | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. | 824 | | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. |
753 | | mov SAVE_CFRAME, KBASEa | 825 | | mov SAVE_CFRAME, KBASEa |
754 | | mov L:RB->cframe, rsp | 826 | | mov L:RB->cframe, rsp |
827 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
755 | | | 828 | | |
756 | | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) | 829 | | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) |
757 | |.else | 830 | |.else |
@@ -762,6 +835,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
762 | | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. | 835 | | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. |
763 | | mov SAVE_CFRAME, KBASE | 836 | | mov SAVE_CFRAME, KBASE |
764 | | mov L:RB->cframe, esp | 837 | | mov L:RB->cframe, esp |
838 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
765 | | | 839 | | |
766 | | call BASE // (lua_State *L, lua_CFunction func, void *ud) | 840 | | call BASE // (lua_State *L, lua_CFunction func, void *ud) |
767 | |.endif | 841 | |.endif |
@@ -869,13 +943,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
869 | |.if DUALNUM | 943 | |.if DUALNUM |
870 | | mov TMP2, LJ_TISNUM | 944 | | mov TMP2, LJ_TISNUM |
871 | | mov TMP1, RC | 945 | | mov TMP1, RC |
872 | |.elif SSE | 946 | |.else |
873 | | cvtsi2sd xmm0, RC | 947 | | cvtsi2sd xmm0, RC |
874 | | movsd TMPQ, xmm0 | 948 | | movsd TMPQ, xmm0 |
875 | |.else | ||
876 | | mov ARG4, RC | ||
877 | | fild ARG4 | ||
878 | | fstp TMPQ | ||
879 | |.endif | 949 | |.endif |
880 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. | 950 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. |
881 | | jmp >1 | 951 | | jmp >1 |
@@ -929,6 +999,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
929 | | mov NARGS:RD, 2+1 // 2 args for func(t, k). | 999 | | mov NARGS:RD, 2+1 // 2 args for func(t, k). |
930 | | jmp ->vm_call_dispatch_f | 1000 | | jmp ->vm_call_dispatch_f |
931 | | | 1001 | | |
1002 | |->vmeta_tgetr: | ||
1003 | | mov FCARG1, TAB:RB | ||
1004 | | mov RB, BASE // Save BASE. | ||
1005 | | mov FCARG2, RC // Caveat: FCARG2 == BASE | ||
1006 | | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) | ||
1007 | | // cTValue * or NULL returned in eax (RC). | ||
1008 | | movzx RA, PC_RA | ||
1009 | | mov BASE, RB // Restore BASE. | ||
1010 | | test RC, RC | ||
1011 | | jnz ->BC_TGETR_Z | ||
1012 | | mov dword [BASE+RA*8+4], LJ_TNIL | ||
1013 | | jmp ->BC_TGETR2_Z | ||
1014 | | | ||
932 | |//----------------------------------------------------------------------- | 1015 | |//----------------------------------------------------------------------- |
933 | | | 1016 | | |
934 | |->vmeta_tsets: | 1017 | |->vmeta_tsets: |
@@ -948,13 +1031,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
948 | |.if DUALNUM | 1031 | |.if DUALNUM |
949 | | mov TMP2, LJ_TISNUM | 1032 | | mov TMP2, LJ_TISNUM |
950 | | mov TMP1, RC | 1033 | | mov TMP1, RC |
951 | |.elif SSE | 1034 | |.else |
952 | | cvtsi2sd xmm0, RC | 1035 | | cvtsi2sd xmm0, RC |
953 | | movsd TMPQ, xmm0 | 1036 | | movsd TMPQ, xmm0 |
954 | |.else | ||
955 | | mov ARG4, RC | ||
956 | | fild ARG4 | ||
957 | | fstp TMPQ | ||
958 | |.endif | 1037 | |.endif |
959 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. | 1038 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. |
960 | | jmp >1 | 1039 | | jmp >1 |
@@ -1020,6 +1099,33 @@ static void build_subroutines(BuildCtx *ctx) | |||
1020 | | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). | 1099 | | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). |
1021 | | jmp ->vm_call_dispatch_f | 1100 | | jmp ->vm_call_dispatch_f |
1022 | | | 1101 | | |
1102 | |->vmeta_tsetr: | ||
1103 | |.if X64WIN | ||
1104 | | mov L:CARG1d, SAVE_L | ||
1105 | | mov CARG3d, RC | ||
1106 | | mov L:CARG1d->base, BASE | ||
1107 | | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE. | ||
1108 | |.elif X64 | ||
1109 | | mov L:CARG1d, SAVE_L | ||
1110 | | mov CARG2d, TAB:RB | ||
1111 | | mov L:CARG1d->base, BASE | ||
1112 | | mov RB, BASE // Save BASE. | ||
1113 | | mov CARG3d, RC // Caveat: CARG3d == BASE. | ||
1114 | |.else | ||
1115 | | mov L:RA, SAVE_L | ||
1116 | | mov ARG2, TAB:RB | ||
1117 | | mov RB, BASE // Save BASE. | ||
1118 | | mov ARG3, RC | ||
1119 | | mov ARG1, L:RA | ||
1120 | | mov L:RA->base, BASE | ||
1121 | |.endif | ||
1122 | | mov SAVE_PC, PC | ||
1123 | | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | ||
1124 | | // TValue * returned in eax (RC). | ||
1125 | | movzx RA, PC_RA | ||
1126 | | mov BASE, RB // Restore BASE. | ||
1127 | | jmp ->BC_TSETR_Z | ||
1128 | | | ||
1023 | |//-- Comparison metamethods --------------------------------------------- | 1129 | |//-- Comparison metamethods --------------------------------------------- |
1024 | | | 1130 | | |
1025 | |->vmeta_comp: | 1131 | |->vmeta_comp: |
@@ -1114,6 +1220,26 @@ static void build_subroutines(BuildCtx *ctx) | |||
1114 | | jmp <3 | 1220 | | jmp <3 |
1115 | |.endif | 1221 | |.endif |
1116 | | | 1222 | | |
1223 | |->vmeta_istype: | ||
1224 | |.if X64 | ||
1225 | | mov L:RB, SAVE_L | ||
1226 | | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. | ||
1227 | | mov CARG2d, RA | ||
1228 | | movzx CARG3d, PC_RD | ||
1229 | | mov L:CARG1d, L:RB | ||
1230 | |.else | ||
1231 | | movzx RD, PC_RD | ||
1232 | | mov ARG2, RA | ||
1233 | | mov L:RB, SAVE_L | ||
1234 | | mov ARG3, RD | ||
1235 | | mov ARG1, L:RB | ||
1236 | | mov L:RB->base, BASE | ||
1237 | |.endif | ||
1238 | | mov SAVE_PC, PC | ||
1239 | | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) | ||
1240 | | mov BASE, L:RB->base | ||
1241 | | jmp <6 | ||
1242 | | | ||
1117 | |//-- Arithmetic metamethods --------------------------------------------- | 1243 | |//-- Arithmetic metamethods --------------------------------------------- |
1118 | | | 1244 | | |
1119 | |->vmeta_arith_vno: | 1245 | |->vmeta_arith_vno: |
@@ -1290,19 +1416,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1290 | | cmp NARGS:RD, 2+1; jb ->fff_fallback | 1416 | | cmp NARGS:RD, 2+1; jb ->fff_fallback |
1291 | |.endmacro | 1417 | |.endmacro |
1292 | | | 1418 | | |
1293 | |.macro .ffunc_n, name | ||
1294 | | .ffunc_1 name | ||
1295 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
1296 | | fld qword [BASE] | ||
1297 | |.endmacro | ||
1298 | | | ||
1299 | |.macro .ffunc_n, name, op | ||
1300 | | .ffunc_1 name | ||
1301 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
1302 | | op | ||
1303 | | fld qword [BASE] | ||
1304 | |.endmacro | ||
1305 | | | ||
1306 | |.macro .ffunc_nsse, name, op | 1419 | |.macro .ffunc_nsse, name, op |
1307 | | .ffunc_1 name | 1420 | | .ffunc_1 name |
1308 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 1421 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
@@ -1313,14 +1426,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1313 | | .ffunc_nsse name, movsd | 1426 | | .ffunc_nsse name, movsd |
1314 | |.endmacro | 1427 | |.endmacro |
1315 | | | 1428 | | |
1316 | |.macro .ffunc_nn, name | ||
1317 | | .ffunc_2 name | ||
1318 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | ||
1319 | | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback | ||
1320 | | fld qword [BASE] | ||
1321 | | fld qword [BASE+8] | ||
1322 | |.endmacro | ||
1323 | | | ||
1324 | |.macro .ffunc_nnsse, name | 1429 | |.macro .ffunc_nnsse, name |
1325 | | .ffunc_2 name | 1430 | | .ffunc_2 name |
1326 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 1431 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
@@ -1418,7 +1523,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1418 | | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. | 1523 | | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. |
1419 | | mov [BASE-8], TAB:RB | 1524 | | mov [BASE-8], TAB:RB |
1420 | | mov RA, TAB:RB->hmask | 1525 | | mov RA, TAB:RB->hmask |
1421 | | and RA, STR:RC->hash | 1526 | | and RA, STR:RC->sid |
1422 | | imul RA, #NODE | 1527 | | imul RA, #NODE |
1423 | | add NODE:RA, TAB:RB->node | 1528 | | add NODE:RA, TAB:RB->node |
1424 | |3: // Rearranged logic, because we expect _not_ to find the key. | 1529 | |3: // Rearranged logic, because we expect _not_ to find the key. |
@@ -1526,11 +1631,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1526 | |.else | 1631 | |.else |
1527 | | jae ->fff_fallback | 1632 | | jae ->fff_fallback |
1528 | |.endif | 1633 | |.endif |
1529 | |.if SSE | ||
1530 | | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 | 1634 | | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 |
1531 | |.else | ||
1532 | | fld qword [BASE]; jmp ->fff_resn | ||
1533 | |.endif | ||
1534 | | | 1635 | | |
1535 | |.ffunc_1 tostring | 1636 | |.ffunc_1 tostring |
1536 | | // Only handles the string or number case inline. | 1637 | | // Only handles the string or number case inline. |
@@ -1555,9 +1656,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1555 | |.endif | 1656 | |.endif |
1556 | | mov L:FCARG1, L:RB | 1657 | | mov L:FCARG1, L:RB |
1557 | |.if DUALNUM | 1658 | |.if DUALNUM |
1558 | | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) | 1659 | | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o) |
1559 | |.else | 1660 | |.else |
1560 | | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) | 1661 | | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np) |
1561 | |.endif | 1662 | |.endif |
1562 | | // GCstr returned in eax (RD). | 1663 | | // GCstr returned in eax (RD). |
1563 | | mov BASE, L:RB->base | 1664 | | mov BASE, L:RB->base |
@@ -1569,55 +1670,35 @@ static void build_subroutines(BuildCtx *ctx) | |||
1569 | | je >2 // Missing 2nd arg? | 1670 | | je >2 // Missing 2nd arg? |
1570 | |1: | 1671 | |1: |
1571 | | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback | 1672 | | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback |
1572 | | mov L:RB, SAVE_L | ||
1573 | | mov L:RB->base, BASE // Add frame since C call can throw. | ||
1574 | | mov L:RB->top, BASE // Dummy frame length is ok. | ||
1575 | | mov PC, [BASE-4] | 1673 | | mov PC, [BASE-4] |
1674 | | mov RB, BASE // Save BASE. | ||
1576 | |.if X64WIN | 1675 | |.if X64WIN |
1577 | | lea CARG3d, [BASE+8] | 1676 | | mov CARG1d, [BASE] |
1578 | | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. | 1677 | | lea CARG3d, [BASE-8] |
1579 | | mov CARG1d, L:RB | 1678 | | lea CARG2d, [BASE+8] // Caveat: CARG2d == BASE. |
1580 | |.elif X64 | 1679 | |.elif X64 |
1581 | | mov CARG2d, [BASE] | 1680 | | mov CARG1d, [BASE] |
1582 | | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. | 1681 | | lea CARG2d, [BASE+8] |
1583 | | mov CARG1d, L:RB | 1682 | | lea CARG3d, [BASE-8] // Caveat: CARG3d == BASE. |
1584 | |.else | 1683 | |.else |
1585 | | mov TAB:RD, [BASE] | 1684 | | mov TAB:RD, [BASE] |
1586 | | mov ARG2, TAB:RD | 1685 | | mov ARG1, TAB:RD |
1587 | | mov ARG1, L:RB | ||
1588 | | add BASE, 8 | 1686 | | add BASE, 8 |
1687 | | mov ARG2, BASE | ||
1688 | | sub BASE, 8+8 | ||
1589 | | mov ARG3, BASE | 1689 | | mov ARG3, BASE |
1590 | |.endif | 1690 | |.endif |
1591 | | mov SAVE_PC, PC // Needed for ITERN fallback. | 1691 | | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) |
1592 | | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) | 1692 | | // 1=found, 0=end, -1=error returned in eax (RD). |
1593 | | // Flag returned in eax (RD). | 1693 | | mov BASE, RB // Restore BASE. |
1594 | | mov BASE, L:RB->base | 1694 | | test RD, RD; jg ->fff_res2 // Found key/value. |
1595 | | test RD, RD; jz >3 // End of traversal? | 1695 | | js ->fff_fallback_2 // Invalid key. |
1596 | | // Copy key and value to results. | 1696 | | // End of traversal: return nil. |
1597 | |.if X64 | 1697 | | mov dword [BASE-4], LJ_TNIL |
1598 | | mov RBa, [BASE+8] | 1698 | | jmp ->fff_res1 |
1599 | | mov RDa, [BASE+16] | ||
1600 | | mov [BASE-8], RBa | ||
1601 | | mov [BASE], RDa | ||
1602 | |.else | ||
1603 | | mov RB, [BASE+8] | ||
1604 | | mov RD, [BASE+12] | ||
1605 | | mov [BASE-8], RB | ||
1606 | | mov [BASE-4], RD | ||
1607 | | mov RB, [BASE+16] | ||
1608 | | mov RD, [BASE+20] | ||
1609 | | mov [BASE], RB | ||
1610 | | mov [BASE+4], RD | ||
1611 | |.endif | ||
1612 | |->fff_res2: | ||
1613 | | mov RD, 1+2 | ||
1614 | | jmp ->fff_res | ||
1615 | |2: // Set missing 2nd arg to nil. | 1699 | |2: // Set missing 2nd arg to nil. |
1616 | | mov dword [BASE+12], LJ_TNIL | 1700 | | mov dword [BASE+12], LJ_TNIL |
1617 | | jmp <1 | 1701 | | jmp <1 |
1618 | |3: // End of traversal: return nil. | ||
1619 | | mov dword [BASE-4], LJ_TNIL | ||
1620 | | jmp ->fff_res1 | ||
1621 | | | 1702 | | |
1622 | |.ffunc_1 pairs | 1703 | |.ffunc_1 pairs |
1623 | | mov TAB:RB, [BASE] | 1704 | | mov TAB:RB, [BASE] |
@@ -1648,19 +1729,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1648 | | add RD, 1 | 1729 | | add RD, 1 |
1649 | | mov dword [BASE-4], LJ_TISNUM | 1730 | | mov dword [BASE-4], LJ_TISNUM |
1650 | | mov dword [BASE-8], RD | 1731 | | mov dword [BASE-8], RD |
1651 | |.elif SSE | 1732 | |.else |
1652 | | movsd xmm0, qword [BASE+8] | 1733 | | movsd xmm0, qword [BASE+8] |
1653 | | sseconst_1 xmm1, RBa | 1734 | | sseconst_1 xmm1, RBa |
1654 | | addsd xmm0, xmm1 | 1735 | | addsd xmm0, xmm1 |
1655 | | cvtsd2si RD, xmm0 | 1736 | | cvttsd2si RD, xmm0 |
1656 | | movsd qword [BASE-8], xmm0 | 1737 | | movsd qword [BASE-8], xmm0 |
1657 | |.else | ||
1658 | | fld qword [BASE+8] | ||
1659 | | fld1 | ||
1660 | | faddp st1 | ||
1661 | | fist ARG1 | ||
1662 | | fstp qword [BASE-8] | ||
1663 | | mov RD, ARG1 | ||
1664 | |.endif | 1738 | |.endif |
1665 | | mov TAB:RB, [BASE] | 1739 | | mov TAB:RB, [BASE] |
1666 | | cmp RD, TAB:RB->asize; jae >2 // Not in array part? | 1740 | | cmp RD, TAB:RB->asize; jae >2 // Not in array part? |
@@ -1678,7 +1752,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1678 | | mov [BASE], RB | 1752 | | mov [BASE], RB |
1679 | | mov [BASE+4], RD | 1753 | | mov [BASE+4], RD |
1680 | |.endif | 1754 | |.endif |
1681 | | jmp ->fff_res2 | 1755 | |->fff_res2: |
1756 | | mov RD, 1+2 | ||
1757 | | jmp ->fff_res | ||
1682 | |2: // Check for empty hash part first. Otherwise call C function. | 1758 | |2: // Check for empty hash part first. Otherwise call C function. |
1683 | | cmp dword TAB:RB->hmask, 0; je ->fff_res0 | 1759 | | cmp dword TAB:RB->hmask, 0; je ->fff_res0 |
1684 | | mov FCARG1, TAB:RB | 1760 | | mov FCARG1, TAB:RB |
@@ -1707,12 +1783,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1707 | |.if DUALNUM | 1783 | |.if DUALNUM |
1708 | | mov dword [BASE+12], LJ_TISNUM | 1784 | | mov dword [BASE+12], LJ_TISNUM |
1709 | | mov dword [BASE+8], 0 | 1785 | | mov dword [BASE+8], 0 |
1710 | |.elif SSE | 1786 | |.else |
1711 | | xorps xmm0, xmm0 | 1787 | | xorps xmm0, xmm0 |
1712 | | movsd qword [BASE+8], xmm0 | 1788 | | movsd qword [BASE+8], xmm0 |
1713 | |.else | ||
1714 | | fldz | ||
1715 | | fstp qword [BASE+8] | ||
1716 | |.endif | 1789 | |.endif |
1717 | | mov RD, 1+3 | 1790 | | mov RD, 1+3 |
1718 | | jmp ->fff_res | 1791 | | jmp ->fff_res |
@@ -1819,7 +1892,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1819 | | mov ARG3, RA | 1892 | | mov ARG3, RA |
1820 | |.endif | 1893 | |.endif |
1821 | | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) | 1894 | | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) |
1822 | | set_vmstate INTERP | ||
1823 | | | 1895 | | |
1824 | | mov L:RB, SAVE_L | 1896 | | mov L:RB, SAVE_L |
1825 | |.if X64 | 1897 | |.if X64 |
@@ -1828,6 +1900,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1828 | | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. | 1900 | | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. |
1829 | |.endif | 1901 | |.endif |
1830 | | mov BASE, L:RB->base | 1902 | | mov BASE, L:RB->base |
1903 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
1904 | | set_vmstate INTERP | ||
1905 | | | ||
1831 | | cmp eax, LUA_YIELD | 1906 | | cmp eax, LUA_YIELD |
1832 | | ja >8 | 1907 | | ja >8 |
1833 | |4: | 1908 | |4: |
@@ -1942,12 +2017,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
1942 | |->fff_resi: // Dummy. | 2017 | |->fff_resi: // Dummy. |
1943 | |.endif | 2018 | |.endif |
1944 | | | 2019 | | |
1945 | |.if SSE | ||
1946 | |->fff_resn: | 2020 | |->fff_resn: |
1947 | | mov PC, [BASE-4] | 2021 | | mov PC, [BASE-4] |
1948 | | fstp qword [BASE-8] | 2022 | | fstp qword [BASE-8] |
1949 | | jmp ->fff_res1 | 2023 | | jmp ->fff_res1 |
1950 | |.endif | ||
1951 | | | 2024 | | |
1952 | | .ffunc_1 math_abs | 2025 | | .ffunc_1 math_abs |
1953 | |.if DUALNUM | 2026 | |.if DUALNUM |
@@ -1971,8 +2044,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1971 | |.else | 2044 | |.else |
1972 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 2045 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
1973 | |.endif | 2046 | |.endif |
1974 | | | ||
1975 | |.if SSE | ||
1976 | | movsd xmm0, qword [BASE] | 2047 | | movsd xmm0, qword [BASE] |
1977 | | sseconst_abs xmm1, RDa | 2048 | | sseconst_abs xmm1, RDa |
1978 | | andps xmm0, xmm1 | 2049 | | andps xmm0, xmm1 |
@@ -1980,15 +2051,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1980 | | mov PC, [BASE-4] | 2051 | | mov PC, [BASE-4] |
1981 | | movsd qword [BASE-8], xmm0 | 2052 | | movsd qword [BASE-8], xmm0 |
1982 | | // fallthrough | 2053 | | // fallthrough |
1983 | |.else | ||
1984 | | fld qword [BASE] | ||
1985 | | fabs | ||
1986 | | // fallthrough | ||
1987 | |->fff_resxmm0: // Dummy. | ||
1988 | |->fff_resn: | ||
1989 | | mov PC, [BASE-4] | ||
1990 | | fstp qword [BASE-8] | ||
1991 | |.endif | ||
1992 | | | 2054 | | |
1993 | |->fff_res1: | 2055 | |->fff_res1: |
1994 | | mov RD, 1+1 | 2056 | | mov RD, 1+1 |
@@ -2015,6 +2077,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
2015 | | mov RAa, -8 // Results start at BASE+RA = BASE-8. | 2077 | | mov RAa, -8 // Results start at BASE+RA = BASE-8. |
2016 | | jmp ->vm_return | 2078 | | jmp ->vm_return |
2017 | | | 2079 | | |
2080 | |.if X64 | ||
2081 | |.define fff_resfp, fff_resxmm0 | ||
2082 | |.else | ||
2083 | |.define fff_resfp, fff_resn | ||
2084 | |.endif | ||
2085 | | | ||
2018 | |.macro math_round, func | 2086 | |.macro math_round, func |
2019 | | .ffunc math_ .. func | 2087 | | .ffunc math_ .. func |
2020 | |.if DUALNUM | 2088 | |.if DUALNUM |
@@ -2025,107 +2093,75 @@ static void build_subroutines(BuildCtx *ctx) | |||
2025 | |.else | 2093 | |.else |
2026 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 2094 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
2027 | |.endif | 2095 | |.endif |
2028 | |.if SSE | ||
2029 | | movsd xmm0, qword [BASE] | 2096 | | movsd xmm0, qword [BASE] |
2030 | | call ->vm_ .. func | 2097 | | call ->vm_ .. func .. _sse |
2031 | | .if DUALNUM | 2098 | |.if DUALNUM |
2032 | | cvtsd2si RB, xmm0 | 2099 | | cvttsd2si RB, xmm0 |
2033 | | cmp RB, 0x80000000 | 2100 | | cmp RB, 0x80000000 |
2034 | | jne ->fff_resi | 2101 | | jne ->fff_resi |
2035 | | cvtsi2sd xmm1, RB | 2102 | | cvtsi2sd xmm1, RB |
2036 | | ucomisd xmm0, xmm1 | 2103 | | ucomisd xmm0, xmm1 |
2037 | | jp ->fff_resxmm0 | 2104 | | jp ->fff_resxmm0 |
2038 | | je ->fff_resi | 2105 | | je ->fff_resi |
2039 | | .endif | ||
2040 | | jmp ->fff_resxmm0 | ||
2041 | |.else | ||
2042 | | fld qword [BASE] | ||
2043 | | call ->vm_ .. func | ||
2044 | | .if DUALNUM | ||
2045 | | fist ARG1 | ||
2046 | | mov RB, ARG1 | ||
2047 | | cmp RB, 0x80000000; jne >2 | ||
2048 | | fdup | ||
2049 | | fild ARG1 | ||
2050 | | fcomparepp | ||
2051 | | jp ->fff_resn | ||
2052 | | jne ->fff_resn | ||
2053 | |2: | ||
2054 | | fpop | ||
2055 | | jmp ->fff_resi | ||
2056 | | .else | ||
2057 | | jmp ->fff_resn | ||
2058 | | .endif | ||
2059 | |.endif | 2106 | |.endif |
2107 | | jmp ->fff_resxmm0 | ||
2060 | |.endmacro | 2108 | |.endmacro |
2061 | | | 2109 | | |
2062 | | math_round floor | 2110 | | math_round floor |
2063 | | math_round ceil | 2111 | | math_round ceil |
2064 | | | 2112 | | |
2065 | |.if SSE | ||
2066 | |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 | 2113 | |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 |
2067 | |.else | ||
2068 | |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn | ||
2069 | |.endif | ||
2070 | | | 2114 | | |
2071 | |.ffunc math_log | 2115 | |.ffunc math_log |
2072 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. | 2116 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. |
2073 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 2117 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
2074 | | fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn | 2118 | | movsd xmm0, qword [BASE] |
2075 | | | 2119 | |.if not X64 |
2076 | |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn | 2120 | | movsd FPARG1, xmm0 |
2077 | |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn | 2121 | |.endif |
2078 | | | 2122 | | mov RB, BASE |
2079 | |.ffunc_n math_sin; fsin; jmp ->fff_resn | 2123 | | call extern log |
2080 | |.ffunc_n math_cos; fcos; jmp ->fff_resn | 2124 | | mov BASE, RB |
2081 | |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn | 2125 | | jmp ->fff_resfp |
2082 | | | ||
2083 | |.ffunc_n math_asin | ||
2084 | | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan | ||
2085 | | jmp ->fff_resn | ||
2086 | |.ffunc_n math_acos | ||
2087 | | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan | ||
2088 | | jmp ->fff_resn | ||
2089 | |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn | ||
2090 | | | 2126 | | |
2091 | |.macro math_extern, func | 2127 | |.macro math_extern, func |
2092 | |.if SSE | ||
2093 | | .ffunc_nsse math_ .. func | 2128 | | .ffunc_nsse math_ .. func |
2094 | | .if not X64 | 2129 | |.if not X64 |
2095 | | movsd FPARG1, xmm0 | 2130 | | movsd FPARG1, xmm0 |
2096 | | .endif | ||
2097 | |.else | ||
2098 | | .ffunc_n math_ .. func | ||
2099 | | fstp FPARG1 | ||
2100 | |.endif | 2131 | |.endif |
2101 | | mov RB, BASE | 2132 | | mov RB, BASE |
2102 | | call extern lj_vm_ .. func | 2133 | | call extern func |
2103 | | mov BASE, RB | 2134 | | mov BASE, RB |
2104 | | .if X64 | 2135 | | jmp ->fff_resfp |
2105 | | jmp ->fff_resxmm0 | ||
2106 | | .else | ||
2107 | | jmp ->fff_resn | ||
2108 | | .endif | ||
2109 | |.endmacro | 2136 | |.endmacro |
2110 | | | 2137 | | |
2138 | |.macro math_extern2, func | ||
2139 | | .ffunc_nnsse math_ .. func | ||
2140 | |.if not X64 | ||
2141 | | movsd FPARG1, xmm0 | ||
2142 | | movsd FPARG3, xmm1 | ||
2143 | |.endif | ||
2144 | | mov RB, BASE | ||
2145 | | call extern func | ||
2146 | | mov BASE, RB | ||
2147 | | jmp ->fff_resfp | ||
2148 | |.endmacro | ||
2149 | | | ||
2150 | | math_extern log10 | ||
2151 | | math_extern exp | ||
2152 | | math_extern sin | ||
2153 | | math_extern cos | ||
2154 | | math_extern tan | ||
2155 | | math_extern asin | ||
2156 | | math_extern acos | ||
2157 | | math_extern atan | ||
2111 | | math_extern sinh | 2158 | | math_extern sinh |
2112 | | math_extern cosh | 2159 | | math_extern cosh |
2113 | | math_extern tanh | 2160 | | math_extern tanh |
2161 | | math_extern2 pow | ||
2162 | | math_extern2 atan2 | ||
2163 | | math_extern2 fmod | ||
2114 | | | 2164 | | |
2115 | |->ff_math_deg: | ||
2116 | |.if SSE | ||
2117 | |.ffunc_nsse math_rad | ||
2118 | | mov CFUNC:RB, [BASE-8] | ||
2119 | | mulsd xmm0, qword CFUNC:RB->upvalue[0] | ||
2120 | | jmp ->fff_resxmm0 | ||
2121 | |.else | ||
2122 | |.ffunc_n math_rad | ||
2123 | | mov CFUNC:RB, [BASE-8] | ||
2124 | | fmul qword CFUNC:RB->upvalue[0] | ||
2125 | | jmp ->fff_resn | ||
2126 | |.endif | ||
2127 | | | ||
2128 | |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn | ||
2129 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn | 2165 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn |
2130 | | | 2166 | | |
2131 | |.ffunc_1 math_frexp | 2167 | |.ffunc_1 math_frexp |
@@ -2140,65 +2176,34 @@ static void build_subroutines(BuildCtx *ctx) | |||
2140 | | cmp RB, 0x00200000; jb >4 | 2176 | | cmp RB, 0x00200000; jb >4 |
2141 | |1: | 2177 | |1: |
2142 | | shr RB, 21; sub RB, RC // Extract and unbias exponent. | 2178 | | shr RB, 21; sub RB, RC // Extract and unbias exponent. |
2143 | |.if SSE | ||
2144 | | cvtsi2sd xmm0, RB | 2179 | | cvtsi2sd xmm0, RB |
2145 | |.else | ||
2146 | | mov TMP1, RB; fild TMP1 | ||
2147 | |.endif | ||
2148 | | mov RB, [BASE-4] | 2180 | | mov RB, [BASE-4] |
2149 | | and RB, 0x800fffff // Mask off exponent. | 2181 | | and RB, 0x800fffff // Mask off exponent. |
2150 | | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. | 2182 | | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. |
2151 | | mov [BASE-4], RB | 2183 | | mov [BASE-4], RB |
2152 | |2: | 2184 | |2: |
2153 | |.if SSE | ||
2154 | | movsd qword [BASE], xmm0 | 2185 | | movsd qword [BASE], xmm0 |
2155 | |.else | ||
2156 | | fstp qword [BASE] | ||
2157 | |.endif | ||
2158 | | mov RD, 1+2 | 2186 | | mov RD, 1+2 |
2159 | | jmp ->fff_res | 2187 | | jmp ->fff_res |
2160 | |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. | 2188 | |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. |
2161 | |.if SSE | ||
2162 | | xorps xmm0, xmm0; jmp <2 | 2189 | | xorps xmm0, xmm0; jmp <2 |
2163 | |.else | ||
2164 | | fldz; jmp <2 | ||
2165 | |.endif | ||
2166 | |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. | 2190 | |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. |
2167 | |.if SSE | ||
2168 | | movsd xmm0, qword [BASE] | 2191 | | movsd xmm0, qword [BASE] |
2169 | | sseconst_hi xmm1, RBa, 43500000 // 2^54. | 2192 | | sseconst_hi xmm1, RBa, 43500000 // 2^54. |
2170 | | mulsd xmm0, xmm1 | 2193 | | mulsd xmm0, xmm1 |
2171 | | movsd qword [BASE-8], xmm0 | 2194 | | movsd qword [BASE-8], xmm0 |
2172 | |.else | ||
2173 | | fld qword [BASE] | ||
2174 | | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54 | ||
2175 | | fstp qword [BASE-8] | ||
2176 | |.endif | ||
2177 | | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 | 2195 | | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 |
2178 | | | 2196 | | |
2179 | |.if SSE | ||
2180 | |.ffunc_nsse math_modf | 2197 | |.ffunc_nsse math_modf |
2181 | |.else | ||
2182 | |.ffunc_n math_modf | ||
2183 | |.endif | ||
2184 | | mov RB, [BASE+4] | 2198 | | mov RB, [BASE+4] |
2185 | | mov PC, [BASE-4] | 2199 | | mov PC, [BASE-4] |
2186 | | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? | 2200 | | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? |
2187 | |.if SSE | ||
2188 | | movaps xmm4, xmm0 | 2201 | | movaps xmm4, xmm0 |
2189 | | call ->vm_trunc | 2202 | | call ->vm_trunc_sse |
2190 | | subsd xmm4, xmm0 | 2203 | | subsd xmm4, xmm0 |
2191 | |1: | 2204 | |1: |
2192 | | movsd qword [BASE-8], xmm0 | 2205 | | movsd qword [BASE-8], xmm0 |
2193 | | movsd qword [BASE], xmm4 | 2206 | | movsd qword [BASE], xmm4 |
2194 | |.else | ||
2195 | | fdup | ||
2196 | | call ->vm_trunc | ||
2197 | | fsub st1, st0 | ||
2198 | |1: | ||
2199 | | fstp qword [BASE-8] | ||
2200 | | fstp qword [BASE] | ||
2201 | |.endif | ||
2202 | | mov RC, [BASE-4]; mov RB, [BASE+4] | 2207 | | mov RC, [BASE-4]; mov RB, [BASE+4] |
2203 | | xor RC, RB; js >3 // Need to adjust sign? | 2208 | | xor RC, RB; js >3 // Need to adjust sign? |
2204 | |2: | 2209 | |2: |
@@ -2208,25 +2213,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
2208 | | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. | 2213 | | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. |
2209 | | jmp <2 | 2214 | | jmp <2 |
2210 | |4: | 2215 | |4: |
2211 | |.if SSE | ||
2212 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. | 2216 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. |
2213 | |.else | ||
2214 | | fldz; fxch; jmp <1 // Return +-Inf and +-0. | ||
2215 | |.endif | ||
2216 | | | ||
2217 | |.ffunc_nnr math_fmod | ||
2218 | |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1 | ||
2219 | | fpop1 | ||
2220 | | jmp ->fff_resn | ||
2221 | | | 2217 | | |
2222 | |.if SSE | 2218 | |.macro math_minmax, name, cmovop, sseop |
2223 | |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0 | 2219 | | .ffunc_1 name |
2224 | |.else | ||
2225 | |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn | ||
2226 | |.endif | ||
2227 | | | ||
2228 | |.macro math_minmax, name, cmovop, fcmovop, sseop | ||
2229 | | .ffunc name | ||
2230 | | mov RA, 2 | 2220 | | mov RA, 2 |
2231 | | cmp dword [BASE+4], LJ_TISNUM | 2221 | | cmp dword [BASE+4], LJ_TISNUM |
2232 | |.if DUALNUM | 2222 | |.if DUALNUM |
@@ -2242,12 +2232,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2242 | |3: | 2232 | |3: |
2243 | | ja ->fff_fallback | 2233 | | ja ->fff_fallback |
2244 | | // Convert intermediate result to number and continue below. | 2234 | | // Convert intermediate result to number and continue below. |
2245 | |.if SSE | ||
2246 | | cvtsi2sd xmm0, RB | 2235 | | cvtsi2sd xmm0, RB |
2247 | |.else | ||
2248 | | mov TMP1, RB | ||
2249 | | fild TMP1 | ||
2250 | |.endif | ||
2251 | | jmp >6 | 2236 | | jmp >6 |
2252 | |4: | 2237 | |4: |
2253 | | ja ->fff_fallback | 2238 | | ja ->fff_fallback |
@@ -2255,7 +2240,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
2255 | | jae ->fff_fallback | 2240 | | jae ->fff_fallback |
2256 | |.endif | 2241 | |.endif |
2257 | | | 2242 | | |
2258 | |.if SSE | ||
2259 | | movsd xmm0, qword [BASE] | 2243 | | movsd xmm0, qword [BASE] |
2260 | |5: // Handle numbers or integers. | 2244 | |5: // Handle numbers or integers. |
2261 | | cmp RA, RD; jae ->fff_resxmm0 | 2245 | | cmp RA, RD; jae ->fff_resxmm0 |
@@ -2274,48 +2258,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
2274 | | sseop xmm0, xmm1 | 2258 | | sseop xmm0, xmm1 |
2275 | | add RA, 1 | 2259 | | add RA, 1 |
2276 | | jmp <5 | 2260 | | jmp <5 |
2277 | |.else | ||
2278 | | fld qword [BASE] | ||
2279 | |5: // Handle numbers or integers. | ||
2280 | | cmp RA, RD; jae ->fff_resn | ||
2281 | | cmp dword [BASE+RA*8-4], LJ_TISNUM | ||
2282 | |.if DUALNUM | ||
2283 | | jb >6 | ||
2284 | | ja >9 | ||
2285 | | fild dword [BASE+RA*8-8] | ||
2286 | | jmp >7 | ||
2287 | |.else | ||
2288 | | jae >9 | ||
2289 | |.endif | ||
2290 | |6: | ||
2291 | | fld qword [BASE+RA*8-8] | ||
2292 | |7: | ||
2293 | | fucomi st1; fcmovop st1; fpop1 | ||
2294 | | add RA, 1 | ||
2295 | | jmp <5 | ||
2296 | |.endif | ||
2297 | |.endmacro | 2261 | |.endmacro |
2298 | | | 2262 | | |
2299 | | math_minmax math_min, cmovg, fcmovnbe, minsd | 2263 | | math_minmax math_min, cmovg, minsd |
2300 | | math_minmax math_max, cmovl, fcmovbe, maxsd | 2264 | | math_minmax math_max, cmovl, maxsd |
2301 | |.if not SSE | ||
2302 | |9: | ||
2303 | | fpop; jmp ->fff_fallback | ||
2304 | |.endif | ||
2305 | | | 2265 | | |
2306 | |//-- String library ----------------------------------------------------- | 2266 | |//-- String library ----------------------------------------------------- |
2307 | | | 2267 | | |
2308 | |.ffunc_1 string_len | ||
2309 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | ||
2310 | | mov STR:RB, [BASE] | ||
2311 | |.if DUALNUM | ||
2312 | | mov RB, dword STR:RB->len; jmp ->fff_resi | ||
2313 | |.elif SSE | ||
2314 | | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0 | ||
2315 | |.else | ||
2316 | | fild dword STR:RB->len; jmp ->fff_resn | ||
2317 | |.endif | ||
2318 | | | ||
2319 | |.ffunc string_byte // Only handle the 1-arg case here. | 2268 | |.ffunc string_byte // Only handle the 1-arg case here. |
2320 | | cmp NARGS:RD, 1+1; jne ->fff_fallback | 2269 | | cmp NARGS:RD, 1+1; jne ->fff_fallback |
2321 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2270 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
@@ -2326,10 +2275,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
2326 | | movzx RB, byte STR:RB[1] | 2275 | | movzx RB, byte STR:RB[1] |
2327 | |.if DUALNUM | 2276 | |.if DUALNUM |
2328 | | jmp ->fff_resi | 2277 | | jmp ->fff_resi |
2329 | |.elif SSE | ||
2330 | | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 | ||
2331 | |.else | 2278 | |.else |
2332 | | mov TMP1, RB; fild TMP1; jmp ->fff_resn | 2279 | | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 |
2333 | |.endif | 2280 | |.endif |
2334 | | | 2281 | | |
2335 | |.ffunc string_char // Only handle the 1-arg case here. | 2282 | |.ffunc string_char // Only handle the 1-arg case here. |
@@ -2341,16 +2288,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
2341 | | mov RB, dword [BASE] | 2288 | | mov RB, dword [BASE] |
2342 | | cmp RB, 255; ja ->fff_fallback | 2289 | | cmp RB, 255; ja ->fff_fallback |
2343 | | mov TMP2, RB | 2290 | | mov TMP2, RB |
2344 | |.elif SSE | 2291 | |.else |
2345 | | jae ->fff_fallback | 2292 | | jae ->fff_fallback |
2346 | | cvttsd2si RB, qword [BASE] | 2293 | | cvttsd2si RB, qword [BASE] |
2347 | | cmp RB, 255; ja ->fff_fallback | 2294 | | cmp RB, 255; ja ->fff_fallback |
2348 | | mov TMP2, RB | 2295 | | mov TMP2, RB |
2349 | |.else | ||
2350 | | jae ->fff_fallback | ||
2351 | | fld qword [BASE] | ||
2352 | | fistp TMP2 | ||
2353 | | cmp TMP2, 255; ja ->fff_fallback | ||
2354 | |.endif | 2296 | |.endif |
2355 | |.if X64 | 2297 | |.if X64 |
2356 | | mov TMP3, 1 | 2298 | | mov TMP3, 1 |
@@ -2371,6 +2313,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2371 | |.endif | 2313 | |.endif |
2372 | | mov SAVE_PC, PC | 2314 | | mov SAVE_PC, PC |
2373 | | call extern lj_str_new // (lua_State *L, char *str, size_t l) | 2315 | | call extern lj_str_new // (lua_State *L, char *str, size_t l) |
2316 | |->fff_resstr: | ||
2374 | | // GCstr * returned in eax (RD). | 2317 | | // GCstr * returned in eax (RD). |
2375 | | mov BASE, L:RB->base | 2318 | | mov BASE, L:RB->base |
2376 | | mov PC, [BASE-4] | 2319 | | mov PC, [BASE-4] |
@@ -2388,14 +2331,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
2388 | | jne ->fff_fallback | 2331 | | jne ->fff_fallback |
2389 | | mov RB, dword [BASE+16] | 2332 | | mov RB, dword [BASE+16] |
2390 | | mov TMP2, RB | 2333 | | mov TMP2, RB |
2391 | |.elif SSE | 2334 | |.else |
2392 | | jae ->fff_fallback | 2335 | | jae ->fff_fallback |
2393 | | cvttsd2si RB, qword [BASE+16] | 2336 | | cvttsd2si RB, qword [BASE+16] |
2394 | | mov TMP2, RB | 2337 | | mov TMP2, RB |
2395 | |.else | ||
2396 | | jae ->fff_fallback | ||
2397 | | fld qword [BASE+16] | ||
2398 | | fistp TMP2 | ||
2399 | |.endif | 2338 | |.endif |
2400 | |1: | 2339 | |1: |
2401 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2340 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
@@ -2410,12 +2349,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
2410 | | mov RB, STR:RB->len | 2349 | | mov RB, STR:RB->len |
2411 | |.if DUALNUM | 2350 | |.if DUALNUM |
2412 | | mov RA, dword [BASE+8] | 2351 | | mov RA, dword [BASE+8] |
2413 | |.elif SSE | ||
2414 | | cvttsd2si RA, qword [BASE+8] | ||
2415 | |.else | 2352 | |.else |
2416 | | fld qword [BASE+8] | 2353 | | cvttsd2si RA, qword [BASE+8] |
2417 | | fistp ARG3 | ||
2418 | | mov RA, ARG3 | ||
2419 | |.endif | 2354 | |.endif |
2420 | | mov RC, TMP2 | 2355 | | mov RC, TMP2 |
2421 | | cmp RB, RC // len < end? (unsigned compare) | 2356 | | cmp RB, RC // len < end? (unsigned compare) |
@@ -2459,136 +2394,34 @@ static void build_subroutines(BuildCtx *ctx) | |||
2459 | | xor RC, RC // Zero length. Any ptr in RB is ok. | 2394 | | xor RC, RC // Zero length. Any ptr in RB is ok. |
2460 | | jmp <4 | 2395 | | jmp <4 |
2461 | | | 2396 | | |
2462 | |.ffunc string_rep // Only handle the 1-char case inline. | 2397 | |.macro ffstring_op, name |
2398 | | .ffunc_1 string_ .. name | ||
2463 | | ffgccheck | 2399 | | ffgccheck |
2464 | | cmp NARGS:RD, 2+1; jne ->fff_fallback // Exactly 2 arguments. | ||
2465 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2400 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
2466 | | cmp dword [BASE+12], LJ_TISNUM | 2401 | | mov L:RB, SAVE_L |
2467 | | mov STR:RB, [BASE] | 2402 | | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] |
2468 | |.if DUALNUM | 2403 | | mov L:RB->base, BASE |
2469 | | jne ->fff_fallback | 2404 | | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE |
2470 | | mov RC, dword [BASE+8] | 2405 | | mov RCa, SBUF:FCARG1->b |
2471 | |.elif SSE | 2406 | | mov SBUF:FCARG1->L, L:RB |
2472 | | jae ->fff_fallback | 2407 | | mov SBUF:FCARG1->w, RCa |
2473 | | cvttsd2si RC, qword [BASE+8] | 2408 | | mov SAVE_PC, PC |
2474 | |.else | 2409 | | call extern lj_buf_putstr_ .. name .. @8 |
2475 | | jae ->fff_fallback | 2410 | | mov FCARG1, eax |
2476 | | fld qword [BASE+8] | 2411 | | call extern lj_buf_tostr@4 |
2477 | | fistp TMP2 | 2412 | | jmp ->fff_resstr |
2478 | | mov RC, TMP2 | ||
2479 | |.endif | ||
2480 | | test RC, RC | ||
2481 | | jle ->fff_emptystr // Count <= 0? (or non-int) | ||
2482 | | cmp dword STR:RB->len, 1 | ||
2483 | | jb ->fff_emptystr // Zero length string? | ||
2484 | | jne ->fff_fallback_2 // Fallback for > 1-char strings. | ||
2485 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2 | ||
2486 | | movzx RA, byte STR:RB[1] | ||
2487 | | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
2488 | |.if X64 | ||
2489 | | mov TMP3, RC | ||
2490 | |.else | ||
2491 | | mov ARG3, RC | ||
2492 | |.endif | ||
2493 | |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). | ||
2494 | | mov [RB], RAL | ||
2495 | | add RB, 1 | ||
2496 | | sub RC, 1 | ||
2497 | | jnz <1 | ||
2498 | | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
2499 | | jmp ->fff_newstr | ||
2500 | | | ||
2501 | |.ffunc_1 string_reverse | ||
2502 | | ffgccheck | ||
2503 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | ||
2504 | | mov STR:RB, [BASE] | ||
2505 | | mov RC, STR:RB->len | ||
2506 | | test RC, RC | ||
2507 | | jz ->fff_emptystr // Zero length string? | ||
2508 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 | ||
2509 | | add RB, #STR | ||
2510 | | mov TMP2, PC // Need another temp register. | ||
2511 | |.if X64 | ||
2512 | | mov TMP3, RC | ||
2513 | |.else | ||
2514 | | mov ARG3, RC | ||
2515 | |.endif | ||
2516 | | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
2517 | |1: | ||
2518 | | movzx RA, byte [RB] | ||
2519 | | add RB, 1 | ||
2520 | | sub RC, 1 | ||
2521 | | mov [PC+RC], RAL | ||
2522 | | jnz <1 | ||
2523 | | mov RD, PC | ||
2524 | | mov PC, TMP2 | ||
2525 | | jmp ->fff_newstr | ||
2526 | | | ||
2527 | |.macro ffstring_case, name, lo, hi | ||
2528 | | .ffunc_1 name | ||
2529 | | ffgccheck | ||
2530 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | ||
2531 | | mov STR:RB, [BASE] | ||
2532 | | mov RC, STR:RB->len | ||
2533 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 | ||
2534 | | add RB, #STR | ||
2535 | | mov TMP2, PC // Need another temp register. | ||
2536 | |.if X64 | ||
2537 | | mov TMP3, RC | ||
2538 | |.else | ||
2539 | | mov ARG3, RC | ||
2540 | |.endif | ||
2541 | | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
2542 | | jmp >3 | ||
2543 | |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?). | ||
2544 | | movzx RA, byte [RB+RC] | ||
2545 | | cmp RA, lo | ||
2546 | | jb >2 | ||
2547 | | cmp RA, hi | ||
2548 | | ja >2 | ||
2549 | | xor RA, 0x20 | ||
2550 | |2: | ||
2551 | | mov [PC+RC], RAL | ||
2552 | |3: | ||
2553 | | sub RC, 1 | ||
2554 | | jns <1 | ||
2555 | | mov RD, PC | ||
2556 | | mov PC, TMP2 | ||
2557 | | jmp ->fff_newstr | ||
2558 | |.endmacro | 2413 | |.endmacro |
2559 | | | 2414 | | |
2560 | |ffstring_case string_lower, 0x41, 0x5a | 2415 | |ffstring_op reverse |
2561 | |ffstring_case string_upper, 0x61, 0x7a | 2416 | |ffstring_op lower |
2562 | | | 2417 | |ffstring_op upper |
2563 | |//-- Table library ------------------------------------------------------ | ||
2564 | | | ||
2565 | |.ffunc_1 table_getn | ||
2566 | | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback | ||
2567 | | mov RB, BASE // Save BASE. | ||
2568 | | mov TAB:FCARG1, [BASE] | ||
2569 | | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t) | ||
2570 | | // Length of table returned in eax (RD). | ||
2571 | | mov BASE, RB // Restore BASE. | ||
2572 | |.if DUALNUM | ||
2573 | | mov RB, RD; jmp ->fff_resi | ||
2574 | |.elif SSE | ||
2575 | | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0 | ||
2576 | |.else | ||
2577 | | mov ARG1, RD; fild ARG1; jmp ->fff_resn | ||
2578 | |.endif | ||
2579 | | | 2418 | | |
2580 | |//-- Bit library -------------------------------------------------------- | 2419 | |//-- Bit library -------------------------------------------------------- |
2581 | | | 2420 | | |
2582 | |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!). | ||
2583 | | | ||
2584 | |.macro .ffunc_bit, name, kind, fdef | 2421 | |.macro .ffunc_bit, name, kind, fdef |
2585 | | fdef name | 2422 | | fdef name |
2586 | |.if kind == 2 | 2423 | |.if kind == 2 |
2587 | |.if SSE | ||
2588 | | sseconst_tobit xmm1, RBa | 2424 | | sseconst_tobit xmm1, RBa |
2589 | |.else | ||
2590 | | mov TMP1, TOBIT_BIAS | ||
2591 | |.endif | ||
2592 | |.endif | 2425 | |.endif |
2593 | | cmp dword [BASE+4], LJ_TISNUM | 2426 | | cmp dword [BASE+4], LJ_TISNUM |
2594 | |.if DUALNUM | 2427 | |.if DUALNUM |
@@ -2604,24 +2437,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
2604 | |.else | 2437 | |.else |
2605 | | jae ->fff_fallback | 2438 | | jae ->fff_fallback |
2606 | |.endif | 2439 | |.endif |
2607 | |.if SSE | ||
2608 | | movsd xmm0, qword [BASE] | 2440 | | movsd xmm0, qword [BASE] |
2609 | |.if kind < 2 | 2441 | |.if kind < 2 |
2610 | | sseconst_tobit xmm1, RBa | 2442 | | sseconst_tobit xmm1, RBa |
2611 | |.endif | 2443 | |.endif |
2612 | | addsd xmm0, xmm1 | 2444 | | addsd xmm0, xmm1 |
2613 | | movd RB, xmm0 | 2445 | | movd RB, xmm0 |
2614 | |.else | ||
2615 | | fld qword [BASE] | ||
2616 | |.if kind < 2 | ||
2617 | | mov TMP1, TOBIT_BIAS | ||
2618 | |.endif | ||
2619 | | fadd TMP1 | ||
2620 | | fstp FPARG1 | ||
2621 | |.if kind > 0 | ||
2622 | | mov RB, ARG1 | ||
2623 | |.endif | ||
2624 | |.endif | ||
2625 | |2: | 2446 | |2: |
2626 | |.endmacro | 2447 | |.endmacro |
2627 | | | 2448 | | |
@@ -2630,15 +2451,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2630 | |.endmacro | 2451 | |.endmacro |
2631 | | | 2452 | | |
2632 | |.ffunc_bit bit_tobit, 0 | 2453 | |.ffunc_bit bit_tobit, 0 |
2633 | |.if DUALNUM or SSE | ||
2634 | |.if not SSE | ||
2635 | | mov RB, ARG1 | ||
2636 | |.endif | ||
2637 | | jmp ->fff_resbit | 2454 | | jmp ->fff_resbit |
2638 | |.else | ||
2639 | | fild ARG1 | ||
2640 | | jmp ->fff_resn | ||
2641 | |.endif | ||
2642 | | | 2455 | | |
2643 | |.macro .ffunc_bit_op, name, ins | 2456 | |.macro .ffunc_bit_op, name, ins |
2644 | | .ffunc_bit name, 2 | 2457 | | .ffunc_bit name, 2 |
@@ -2658,17 +2471,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
2658 | |.else | 2471 | |.else |
2659 | | jae ->fff_fallback_bit_op | 2472 | | jae ->fff_fallback_bit_op |
2660 | |.endif | 2473 | |.endif |
2661 | |.if SSE | ||
2662 | | movsd xmm0, qword [RD] | 2474 | | movsd xmm0, qword [RD] |
2663 | | addsd xmm0, xmm1 | 2475 | | addsd xmm0, xmm1 |
2664 | | movd RA, xmm0 | 2476 | | movd RA, xmm0 |
2665 | | ins RB, RA | 2477 | | ins RB, RA |
2666 | |.else | ||
2667 | | fld qword [RD] | ||
2668 | | fadd TMP1 | ||
2669 | | fstp FPARG1 | ||
2670 | | ins RB, ARG1 | ||
2671 | |.endif | ||
2672 | | sub RD, 8 | 2478 | | sub RD, 8 |
2673 | | jmp <1 | 2479 | | jmp <1 |
2674 | |.endmacro | 2480 | |.endmacro |
@@ -2685,15 +2491,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
2685 | | not RB | 2491 | | not RB |
2686 | |.if DUALNUM | 2492 | |.if DUALNUM |
2687 | | jmp ->fff_resbit | 2493 | | jmp ->fff_resbit |
2688 | |.elif SSE | 2494 | |.else |
2689 | |->fff_resbit: | 2495 | |->fff_resbit: |
2690 | | cvtsi2sd xmm0, RB | 2496 | | cvtsi2sd xmm0, RB |
2691 | | jmp ->fff_resxmm0 | 2497 | | jmp ->fff_resxmm0 |
2692 | |.else | ||
2693 | |->fff_resbit: | ||
2694 | | mov ARG1, RB | ||
2695 | | fild ARG1 | ||
2696 | | jmp ->fff_resn | ||
2697 | |.endif | 2498 | |.endif |
2698 | | | 2499 | | |
2699 | |->fff_fallback_bit_op: | 2500 | |->fff_fallback_bit_op: |
@@ -2706,22 +2507,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
2706 | | // Note: no inline conversion from number for 2nd argument! | 2507 | | // Note: no inline conversion from number for 2nd argument! |
2707 | | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback | 2508 | | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback |
2708 | | mov RA, dword [BASE+8] | 2509 | | mov RA, dword [BASE+8] |
2709 | |.elif SSE | 2510 | |.else |
2710 | | .ffunc_nnsse name | 2511 | | .ffunc_nnsse name |
2711 | | sseconst_tobit xmm2, RBa | 2512 | | sseconst_tobit xmm2, RBa |
2712 | | addsd xmm0, xmm2 | 2513 | | addsd xmm0, xmm2 |
2713 | | addsd xmm1, xmm2 | 2514 | | addsd xmm1, xmm2 |
2714 | | movd RB, xmm0 | 2515 | | movd RB, xmm0 |
2715 | | movd RA, xmm1 | 2516 | | movd RA, xmm1 |
2716 | |.else | ||
2717 | | .ffunc_nn name | ||
2718 | | mov TMP1, TOBIT_BIAS | ||
2719 | | fadd TMP1 | ||
2720 | | fstp FPARG3 | ||
2721 | | fadd TMP1 | ||
2722 | | fstp FPARG1 | ||
2723 | | mov RA, ARG3 | ||
2724 | | mov RB, ARG1 | ||
2725 | |.endif | 2517 | |.endif |
2726 | | ins RB, cl // Assumes RA is ecx. | 2518 | | ins RB, cl // Assumes RA is ecx. |
2727 | | jmp ->fff_resbit | 2519 | | jmp ->fff_resbit |
@@ -2855,7 +2647,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2855 | | mov FCARG2, PC // Caveat: FCARG2 == BASE | 2647 | | mov FCARG2, PC // Caveat: FCARG2 == BASE |
2856 | | mov FCARG1, L:RB | 2648 | | mov FCARG1, L:RB |
2857 | | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. | 2649 | | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. |
2858 | | call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc) | 2650 | | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc) |
2859 | |3: | 2651 | |3: |
2860 | | mov BASE, L:RB->base | 2652 | | mov BASE, L:RB->base |
2861 | |4: | 2653 | |4: |
@@ -2926,6 +2718,79 @@ static void build_subroutines(BuildCtx *ctx) | |||
2926 | | add NARGS:RD, 1 | 2718 | | add NARGS:RD, 1 |
2927 | | jmp RBa | 2719 | | jmp RBa |
2928 | | | 2720 | | |
2721 | |->cont_stitch: // Trace stitching. | ||
2722 | |.if JIT | ||
2723 | | // BASE = base, RC = result, RB = mbase | ||
2724 | | mov TRACE:RA, [RB-24] // Save previous trace. | ||
2725 | | mov TMP1, TRACE:RA | ||
2726 | | mov TMP3, DISPATCH // Need one more register. | ||
2727 | | mov DISPATCH, MULTRES | ||
2728 | | movzx RA, PC_RA | ||
2729 | | lea RA, [BASE+RA*8] // Call base. | ||
2730 | | sub DISPATCH, 1 | ||
2731 | | jz >2 | ||
2732 | |1: // Move results down. | ||
2733 | |.if X64 | ||
2734 | | mov RBa, [RC] | ||
2735 | | mov [RA], RBa | ||
2736 | |.else | ||
2737 | | mov RB, [RC] | ||
2738 | | mov [RA], RB | ||
2739 | | mov RB, [RC+4] | ||
2740 | | mov [RA+4], RB | ||
2741 | |.endif | ||
2742 | | add RC, 8 | ||
2743 | | add RA, 8 | ||
2744 | | sub DISPATCH, 1 | ||
2745 | | jnz <1 | ||
2746 | |2: | ||
2747 | | movzx RC, PC_RA | ||
2748 | | movzx RB, PC_RB | ||
2749 | | add RC, RB | ||
2750 | | lea RC, [BASE+RC*8-8] | ||
2751 | |3: | ||
2752 | | cmp RC, RA | ||
2753 | | ja >9 // More results wanted? | ||
2754 | | | ||
2755 | | mov DISPATCH, TMP3 | ||
2756 | | mov TRACE:RD, TMP1 // Get previous trace. | ||
2757 | | movzx RB, word TRACE:RD->traceno | ||
2758 | | movzx RD, word TRACE:RD->link | ||
2759 | | cmp RD, RB | ||
2760 | | je ->cont_nop // Blacklisted. | ||
2761 | | test RD, RD | ||
2762 | | jne =>BC_JLOOP // Jump to stitched trace. | ||
2763 | | | ||
2764 | | // Stitch a new trace to the previous trace. | ||
2765 | | mov [DISPATCH+DISPATCH_J(exitno)], RB | ||
2766 | | mov L:RB, SAVE_L | ||
2767 | | mov L:RB->base, BASE | ||
2768 | | mov FCARG2, PC | ||
2769 | | lea FCARG1, [DISPATCH+GG_DISP2J] | ||
2770 | | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa | ||
2771 | | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc) | ||
2772 | | mov BASE, L:RB->base | ||
2773 | | jmp ->cont_nop | ||
2774 | | | ||
2775 | |9: // Fill up results with nil. | ||
2776 | | mov dword [RA+4], LJ_TNIL | ||
2777 | | add RA, 8 | ||
2778 | | jmp <3 | ||
2779 | |.endif | ||
2780 | | | ||
2781 | |->vm_profhook: // Dispatch target for profiler hook. | ||
2782 | #if LJ_HASPROFILE | ||
2783 | | mov L:RB, SAVE_L | ||
2784 | | mov L:RB->base, BASE | ||
2785 | | mov FCARG2, PC // Caveat: FCARG2 == BASE | ||
2786 | | mov FCARG1, L:RB | ||
2787 | | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc) | ||
2788 | | mov BASE, L:RB->base | ||
2789 | | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | ||
2790 | | sub PC, 4 | ||
2791 | | jmp ->cont_nop | ||
2792 | #endif | ||
2793 | | | ||
2929 | |//----------------------------------------------------------------------- | 2794 | |//----------------------------------------------------------------------- |
2930 | |//-- Trace exit handler ------------------------------------------------- | 2795 | |//-- Trace exit handler ------------------------------------------------- |
2931 | |//----------------------------------------------------------------------- | 2796 | |//----------------------------------------------------------------------- |
@@ -2978,10 +2843,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
2978 | | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 | 2843 | | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 |
2979 | |.endif | 2844 | |.endif |
2980 | | // Caveat: RB is ebp. | 2845 | | // Caveat: RB is ebp. |
2981 | | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)] | 2846 | | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] |
2982 | | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] | 2847 | | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] |
2983 | | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa | 2848 | | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa |
2984 | | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 | ||
2985 | | mov L:RB->base, BASE | 2849 | | mov L:RB->base, BASE |
2986 | |.if X64WIN | 2850 | |.if X64WIN |
2987 | | lea CARG2, [rsp+4*8] | 2851 | | lea CARG2, [rsp+4*8] |
@@ -2991,6 +2855,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2991 | | lea FCARG2, [esp+16] | 2855 | | lea FCARG2, [esp+16] |
2992 | |.endif | 2856 | |.endif |
2993 | | lea FCARG1, [DISPATCH+GG_DISP2J] | 2857 | | lea FCARG1, [DISPATCH+GG_DISP2J] |
2858 | | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 | ||
2994 | | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) | 2859 | | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) |
2995 | | // MULTRES or negated error code returned in eax (RD). | 2860 | | // MULTRES or negated error code returned in eax (RD). |
2996 | | mov RAa, L:RB->cframe | 2861 | | mov RAa, L:RB->cframe |
@@ -3037,12 +2902,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
3037 | | mov r13, TMPa | 2902 | | mov r13, TMPa |
3038 | | mov r12, TMPQ | 2903 | | mov r12, TMPQ |
3039 | |.endif | 2904 | |.endif |
3040 | | test RD, RD; js >3 // Check for error from exit. | 2905 | | cmp RD, -LUA_ERRERR; jae >9 // Check for error from exit. |
2906 | | mov L:RB, SAVE_L | ||
3041 | | mov MULTRES, RD | 2907 | | mov MULTRES, RD |
3042 | | mov LFUNC:KBASE, [BASE-8] | 2908 | | mov LFUNC:KBASE, [BASE-8] |
3043 | | mov KBASE, LFUNC:KBASE->pc | 2909 | | mov KBASE, LFUNC:KBASE->pc |
3044 | | mov KBASE, [KBASE+PC2PROTO(k)] | 2910 | | mov KBASE, [KBASE+PC2PROTO(k)] |
3045 | | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 | 2911 | | mov L:RB->base, BASE |
2912 | | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 | ||
3046 | | set_vmstate INTERP | 2913 | | set_vmstate INTERP |
3047 | | // Modified copy of ins_next which handles function header dispatch, too. | 2914 | | // Modified copy of ins_next which handles function header dispatch, too. |
3048 | | mov RC, [PC] | 2915 | | mov RC, [PC] |
@@ -3050,19 +2917,51 @@ static void build_subroutines(BuildCtx *ctx) | |||
3050 | | movzx OP, RCL | 2917 | | movzx OP, RCL |
3051 | | add PC, 4 | 2918 | | add PC, 4 |
3052 | | shr RC, 16 | 2919 | | shr RC, 16 |
2920 | | cmp MULTRES, -17 // Static dispatch? | ||
2921 | | je >5 | ||
3053 | | cmp OP, BC_FUNCF // Function header? | 2922 | | cmp OP, BC_FUNCF // Function header? |
3054 | | jb >2 | 2923 | | jb >3 |
3055 | | mov RC, MULTRES // RC/RD holds nres+1. | 2924 | | cmp OP, BC_FUNCC+2 // Fast function? |
2925 | | jae >4 | ||
3056 | |2: | 2926 | |2: |
2927 | | mov RC, MULTRES // RC/RD holds nres+1. | ||
2928 | |3: | ||
3057 | |.if X64 | 2929 | |.if X64 |
3058 | | jmp aword [DISPATCH+OP*8] | 2930 | | jmp aword [DISPATCH+OP*8] |
3059 | |.else | 2931 | |.else |
3060 | | jmp aword [DISPATCH+OP*4] | 2932 | | jmp aword [DISPATCH+OP*4] |
3061 | |.endif | 2933 | |.endif |
3062 | | | 2934 | | |
3063 | |3: // Rethrow error from the right C frame. | 2935 | |4: // Check frame below fast function. |
2936 | | mov RC, [BASE-4] | ||
2937 | | test RC, FRAME_TYPE | ||
2938 | | jnz <2 // Trace stitching continuation? | ||
2939 | | // Otherwise set KBASE for Lua function below fast function. | ||
2940 | | movzx RC, byte [RC-3] | ||
2941 | | not RCa | ||
2942 | | mov LFUNC:KBASE, [BASE+RC*8-8] | ||
2943 | | mov KBASE, LFUNC:KBASE->pc | ||
2944 | | mov KBASE, [KBASE+PC2PROTO(k)] | ||
2945 | | jmp <2 | ||
2946 | | | ||
2947 | |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. | ||
2948 | | mov RA, [DISPATCH+DISPATCH_J(trace)] | ||
2949 | | mov TRACE:RA, [RA+RD*4] | ||
2950 | | mov RC, TRACE:RA->startins | ||
2951 | | movzx RA, RCH | ||
2952 | | movzx OP, RCL | ||
2953 | | shr RC, 16 | ||
2954 | |.if X64 | ||
2955 | | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] | ||
2956 | |.else | ||
2957 | | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] | ||
2958 | |.endif | ||
2959 | | | ||
2960 | |9: // Rethrow error from the right C frame. | ||
2961 | | mov FCARG2, RD | ||
3064 | | mov FCARG1, L:RB | 2962 | | mov FCARG1, L:RB |
3065 | | call extern lj_err_run@4 // (lua_State *L) | 2963 | | neg FCARG2 |
2964 | | call extern lj_err_trace@8 // (lua_State *L, int errcode) | ||
3066 | |.endif | 2965 | |.endif |
3067 | | | 2966 | | |
3068 | |//----------------------------------------------------------------------- | 2967 | |//----------------------------------------------------------------------- |
@@ -3070,27 +2969,18 @@ static void build_subroutines(BuildCtx *ctx) | |||
3070 | |//----------------------------------------------------------------------- | 2969 | |//----------------------------------------------------------------------- |
3071 | | | 2970 | | |
3072 | |// FP value rounding. Called by math.floor/math.ceil fast functions | 2971 | |// FP value rounding. Called by math.floor/math.ceil fast functions |
3073 | |// and from JIT code. | 2972 | |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. |
3074 | | | 2973 | |.macro vm_round, name, mode, cond |
3075 | |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. | 2974 | |->name: |
3076 | |.macro vm_round_x87, mode1, mode2 | 2975 | |.if not X64 and cond |
3077 | | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2. | 2976 | | movsd xmm0, qword [esp+4] |
3078 | | mov [esp+8], eax | 2977 | | call ->name .. _sse |
3079 | | mov ax, mode1 | 2978 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. |
3080 | | or ax, [esp+4] | 2979 | | fld qword [esp+4] |
3081 | |.if mode2 ~= 0xffff | ||
3082 | | and ax, mode2 | ||
3083 | |.endif | ||
3084 | | mov [esp+6], ax | ||
3085 | | fldcw word [esp+6] | ||
3086 | | frndint | ||
3087 | | fldcw word [esp+4] | ||
3088 | | mov eax, [esp+8] | ||
3089 | | ret | 2980 | | ret |
3090 | |.endmacro | 2981 | |.endif |
3091 | | | 2982 | | |
3092 | |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. | 2983 | |->name .. _sse: |
3093 | |.macro vm_round_sse, mode | ||
3094 | | sseconst_abs xmm2, RDa | 2984 | | sseconst_abs xmm2, RDa |
3095 | | sseconst_2p52 xmm3, RDa | 2985 | | sseconst_2p52 xmm3, RDa |
3096 | | movaps xmm1, xmm0 | 2986 | | movaps xmm1, xmm0 |
@@ -3128,22 +3018,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
3128 | | ret | 3018 | | ret |
3129 | |.endmacro | 3019 | |.endmacro |
3130 | | | 3020 | | |
3131 | |.macro vm_round, name, ssemode, mode1, mode2 | 3021 | | vm_round vm_floor, 0, 1 |
3132 | |->name: | 3022 | | vm_round vm_ceil, 1, JIT |
3133 | |.if not SSE | 3023 | | vm_round vm_trunc, 2, JIT |
3134 | | vm_round_x87 mode1, mode2 | ||
3135 | |.endif | ||
3136 | |->name .. _sse: | ||
3137 | | vm_round_sse ssemode | ||
3138 | |.endmacro | ||
3139 | | | ||
3140 | | vm_round vm_floor, 0, 0x0400, 0xf7ff | ||
3141 | | vm_round vm_ceil, 1, 0x0800, 0xfbff | ||
3142 | | vm_round vm_trunc, 2, 0x0c00, 0xffff | ||
3143 | | | 3024 | | |
3144 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. | 3025 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. |
3145 | |->vm_mod: | 3026 | |->vm_mod: |
3146 | |.if SSE | ||
3147 | |// Args in xmm0/xmm1, return value in xmm0. | 3027 | |// Args in xmm0/xmm1, return value in xmm0. |
3148 | |// Caveat: xmm0-xmm5 and RC (eax) modified! | 3028 | |// Caveat: xmm0-xmm5 and RC (eax) modified! |
3149 | | movaps xmm5, xmm0 | 3029 | | movaps xmm5, xmm0 |
@@ -3171,488 +3051,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
3171 | | movaps xmm0, xmm5 | 3051 | | movaps xmm0, xmm5 |
3172 | | subsd xmm0, xmm1 | 3052 | | subsd xmm0, xmm1 |
3173 | | ret | 3053 | | ret |
3174 | |.else | ||
3175 | |// Args/ret on x87 stack (y on top). No xmm registers modified. | ||
3176 | |// Caveat: needs 3 slots on x87 stack! RC (eax) modified! | ||
3177 | | fld st1 | ||
3178 | | fdiv st1 | ||
3179 | | fnstcw word [esp+4] | ||
3180 | | mov ax, 0x0400 | ||
3181 | | or ax, [esp+4] | ||
3182 | | and ax, 0xf7ff | ||
3183 | | mov [esp+6], ax | ||
3184 | | fldcw word [esp+6] | ||
3185 | | frndint | ||
3186 | | fldcw word [esp+4] | ||
3187 | | fmulp st1 | ||
3188 | | fsubp st1 | ||
3189 | | ret | ||
3190 | |.endif | ||
3191 | | | ||
3192 | |// FP log2(x). Called by math.log(x, base). | ||
3193 | |->vm_log2: | ||
3194 | |.if X64WIN | ||
3195 | | movsd qword [rsp+8], xmm0 // Use scratch area. | ||
3196 | | fld1 | ||
3197 | | fld qword [rsp+8] | ||
3198 | | fyl2x | ||
3199 | | fstp qword [rsp+8] | ||
3200 | | movsd xmm0, qword [rsp+8] | ||
3201 | |.elif X64 | ||
3202 | | movsd qword [rsp-8], xmm0 // Use red zone. | ||
3203 | | fld1 | ||
3204 | | fld qword [rsp-8] | ||
3205 | | fyl2x | ||
3206 | | fstp qword [rsp-8] | ||
3207 | | movsd xmm0, qword [rsp-8] | ||
3208 | |.else | ||
3209 | | fld1 | ||
3210 | | fld qword [esp+4] | ||
3211 | | fyl2x | ||
3212 | |.endif | ||
3213 | | ret | ||
3214 | | | ||
3215 | |// FP exponentiation e^x and 2^x. Called by math.exp fast function and | ||
3216 | |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified. | ||
3217 | |// Caveat: needs 3 slots on x87 stack! | ||
3218 | |->vm_exp_x87: | ||
3219 | | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e)) | ||
3220 | |->vm_exp2_x87: | ||
3221 | | .if X64WIN | ||
3222 | | .define expscratch, dword [rsp+8] // Use scratch area. | ||
3223 | | .elif X64 | ||
3224 | | .define expscratch, dword [rsp-8] // Use red zone. | ||
3225 | | .else | ||
3226 | | .define expscratch, dword [esp+4] // Needs 4 byte scratch area. | ||
3227 | | .endif | ||
3228 | | fst expscratch // Caveat: overwrites ARG1. | ||
3229 | | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf | ||
3230 | | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0 | ||
3231 | |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check. | ||
3232 | | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. | ||
3233 | | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int | ||
3234 | |1: | ||
3235 | | ret | ||
3236 | |2: | ||
3237 | | fpop; fldz; ret | ||
3238 | | | ||
3239 | |// Generic power function x^y. Called by BC_POW, math.pow fast function, | ||
3240 | |// and vm_arith. | ||
3241 | |// Args/ret on x87 stack (y on top). RC (eax) modified. | ||
3242 | |// Caveat: needs 3 slots on x87 stack! | ||
3243 | |->vm_pow: | ||
3244 | |.if not SSE | ||
3245 | | fist dword [esp+4] // Store/reload int before comparison. | ||
3246 | | fild dword [esp+4] // Integral exponent used in vm_powi. | ||
3247 | | fucomip st1 | ||
3248 | | jnz >8 // Branch for FP exponents. | ||
3249 | | jp >9 // Branch for NaN exponent. | ||
3250 | | fpop // Pop y and fallthrough to vm_powi. | ||
3251 | | | ||
3252 | |// FP/int power function x^i. Arg1/ret on x87 stack. | ||
3253 | |// Arg2 (int) on C stack. RC (eax) modified. | ||
3254 | |// Caveat: needs 2 slots on x87 stack! | ||
3255 | | mov eax, [esp+4] | ||
3256 | | cmp eax, 1; jle >6 // i<=1? | ||
3257 | | // Now 1 < (unsigned)i <= 0x80000000. | ||
3258 | |1: // Handle leading zeros. | ||
3259 | | test eax, 1; jnz >2 | ||
3260 | | fmul st0 | ||
3261 | | shr eax, 1 | ||
3262 | | jmp <1 | ||
3263 | |2: | ||
3264 | | shr eax, 1; jz >5 | ||
3265 | | fdup | ||
3266 | |3: // Handle trailing bits. | ||
3267 | | fmul st0 | ||
3268 | | shr eax, 1; jz >4 | ||
3269 | | jnc <3 | ||
3270 | | fmul st1, st0 | ||
3271 | | jmp <3 | ||
3272 | |4: | ||
3273 | | fmulp st1 | ||
3274 | |5: | ||
3275 | | ret | ||
3276 | |6: | ||
3277 | | je <5 // x^1 ==> x | ||
3278 | | jb >7 | ||
3279 | | fld1; fdivrp st1 | ||
3280 | | neg eax | ||
3281 | | cmp eax, 1; je <5 // x^-1 ==> 1/x | ||
3282 | | jmp <1 // x^-i ==> (1/x)^i | ||
3283 | |7: | ||
3284 | | fpop; fld1 // x^0 ==> 1 | ||
3285 | | ret | ||
3286 | | | ||
3287 | |8: // FP/FP power function x^y. | ||
3288 | | fst dword [esp+4] | ||
3289 | | fxch | ||
3290 | | fst dword [esp+8] | ||
3291 | | mov eax, [esp+4]; shl eax, 1 | ||
3292 | | cmp eax, 0xff000000; je >2 // x^+-Inf? | ||
3293 | | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y? | ||
3294 | | cmp eax, 0xff000000; je >4 // +-Inf^y? | ||
3295 | | fyl2x | ||
3296 | | jmp ->vm_exp2raw | ||
3297 | | | ||
3298 | |9: // Handle x^NaN. | ||
3299 | | fld1 | ||
3300 | | fucomip st2 | ||
3301 | | je >1 // 1^NaN ==> 1 | ||
3302 | | fxch // x^NaN ==> NaN | ||
3303 | |1: | ||
3304 | | fpop | ||
3305 | | ret | ||
3306 | | | ||
3307 | |2: // Handle x^+-Inf. | ||
3308 | | fabs | ||
3309 | | fld1 | ||
3310 | | fucomip st1 | ||
3311 | | je >3 // +-1^+-Inf ==> 1 | ||
3312 | | fpop; fabs; fldz; mov eax, 0; setc al | ||
3313 | | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0 | ||
3314 | | fxch | ||
3315 | |3: | ||
3316 | | fpop1; fabs | ||
3317 | | ret | ||
3318 | | | ||
3319 | |4: // Handle +-0^y or +-Inf^y. | ||
3320 | | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x| | ||
3321 | | fpop; fpop | ||
3322 | | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf | ||
3323 | | fldz // y < 0, +-Inf^y ==> 0 | ||
3324 | | ret | ||
3325 | |5: | ||
3326 | | mov dword [esp+4], 0x7f800000 // Return +Inf. | ||
3327 | | fld dword [esp+4] | ||
3328 | | ret | ||
3329 | |.endif | ||
3330 | | | ||
3331 | |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. | ||
3332 | |// Needs 16 byte scratch area for x86. Also called from JIT code. | ||
3333 | |->vm_pow_sse: | ||
3334 | | cvtsd2si eax, xmm1 | ||
3335 | | cvtsi2sd xmm2, eax | ||
3336 | | ucomisd xmm1, xmm2 | ||
3337 | | jnz >8 // Branch for FP exponents. | ||
3338 | | jp >9 // Branch for NaN exponent. | ||
3339 | | // Fallthrough to vm_powi_sse. | ||
3340 | | | ||
3341 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. | ||
3342 | |->vm_powi_sse: | ||
3343 | | cmp eax, 1; jle >6 // i<=1? | ||
3344 | | // Now 1 < (unsigned)i <= 0x80000000. | ||
3345 | |1: // Handle leading zeros. | ||
3346 | | test eax, 1; jnz >2 | ||
3347 | | mulsd xmm0, xmm0 | ||
3348 | | shr eax, 1 | ||
3349 | | jmp <1 | ||
3350 | |2: | ||
3351 | | shr eax, 1; jz >5 | ||
3352 | | movaps xmm1, xmm0 | ||
3353 | |3: // Handle trailing bits. | ||
3354 | | mulsd xmm0, xmm0 | ||
3355 | | shr eax, 1; jz >4 | ||
3356 | | jnc <3 | ||
3357 | | mulsd xmm1, xmm0 | ||
3358 | | jmp <3 | ||
3359 | |4: | ||
3360 | | mulsd xmm0, xmm1 | ||
3361 | |5: | ||
3362 | | ret | ||
3363 | |6: | ||
3364 | | je <5 // x^1 ==> x | ||
3365 | | jb >7 // x^0 ==> 1 | ||
3366 | | neg eax | ||
3367 | | call <1 | ||
3368 | | sseconst_1 xmm1, RDa | ||
3369 | | divsd xmm1, xmm0 | ||
3370 | | movaps xmm0, xmm1 | ||
3371 | | ret | ||
3372 | |7: | ||
3373 | | sseconst_1 xmm0, RDa | ||
3374 | | ret | ||
3375 | | | ||
3376 | |8: // FP/FP power function x^y. | ||
3377 | |.if X64 | ||
3378 | | movd rax, xmm1; shl rax, 1 | ||
3379 | | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf? | ||
3380 | | movd rax, xmm0; shl rax, 1; je >4 // +-0^y? | ||
3381 | | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y? | ||
3382 | | .if X64WIN | ||
3383 | | movsd qword [rsp+16], xmm1 // Use scratch area. | ||
3384 | | movsd qword [rsp+8], xmm0 | ||
3385 | | fld qword [rsp+16] | ||
3386 | | fld qword [rsp+8] | ||
3387 | | .else | ||
3388 | | movsd qword [rsp-16], xmm1 // Use red zone. | ||
3389 | | movsd qword [rsp-8], xmm0 | ||
3390 | | fld qword [rsp-16] | ||
3391 | | fld qword [rsp-8] | ||
3392 | | .endif | ||
3393 | |.else | ||
3394 | | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area. | ||
3395 | | movsd qword [esp+4], xmm0 | ||
3396 | | cmp dword [esp+12], 0; jne >1 | ||
3397 | | mov eax, [esp+16]; shl eax, 1 | ||
3398 | | cmp eax, 0xffe00000; je >2 // x^+-Inf? | ||
3399 | |1: | ||
3400 | | cmp dword [esp+4], 0; jne >1 | ||
3401 | | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y? | ||
3402 | | cmp eax, 0xffe00000; je >5 // +-Inf^y? | ||
3403 | |1: | ||
3404 | | fld qword [esp+12] | ||
3405 | | fld qword [esp+4] | ||
3406 | |.endif | ||
3407 | | fyl2x // y*log2(x) | ||
3408 | | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. | ||
3409 | | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int | ||
3410 | |.if X64WIN | ||
3411 | | fstp qword [rsp+8] // Use scratch area. | ||
3412 | | movsd xmm0, qword [rsp+8] | ||
3413 | |.elif X64 | ||
3414 | | fstp qword [rsp-8] // Use red zone. | ||
3415 | | movsd xmm0, qword [rsp-8] | ||
3416 | |.else | ||
3417 | | fstp qword [esp+4] // Needs 8 byte scratch area. | ||
3418 | | movsd xmm0, qword [esp+4] | ||
3419 | |.endif | ||
3420 | | ret | ||
3421 | | | ||
3422 | |9: // Handle x^NaN. | ||
3423 | | sseconst_1 xmm2, RDa | ||
3424 | | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1 | ||
3425 | | movaps xmm0, xmm1 // x^NaN ==> NaN | ||
3426 | |1: | ||
3427 | | ret | ||
3428 | | | ||
3429 | |2: // Handle x^+-Inf. | ||
3430 | | sseconst_abs xmm2, RDa | ||
3431 | | andpd xmm0, xmm2 // |x| | ||
3432 | | sseconst_1 xmm2, RDa | ||
3433 | | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1 | ||
3434 | | movmskpd eax, xmm1 | ||
3435 | | xorps xmm0, xmm0 | ||
3436 | | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0 | ||
3437 | |3: | ||
3438 | | sseconst_hi xmm0, RDa, 7ff00000 // +Inf | ||
3439 | | ret | ||
3440 | | | ||
3441 | |4: // Handle +-0^y. | ||
3442 | | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf | ||
3443 | | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0 | ||
3444 | | ret | ||
3445 | | | ||
3446 | |5: // Handle +-Inf^y. | ||
3447 | | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf | ||
3448 | | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0 | ||
3449 | | ret | ||
3450 | | | ||
3451 | |// Callable from C: double lj_vm_foldfpm(double x, int fpm) | ||
3452 | |// Computes fpm(x) for extended math functions. ORDER FPM. | ||
3453 | |->vm_foldfpm: | ||
3454 | |.if JIT | ||
3455 | |.if X64 | ||
3456 | | .if X64WIN | ||
3457 | | .define fpmop, CARG2d | ||
3458 | | .else | ||
3459 | | .define fpmop, CARG1d | ||
3460 | | .endif | ||
3461 | | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil | ||
3462 | | cmp fpmop, 3; jb ->vm_trunc; ja >2 | ||
3463 | | sqrtsd xmm0, xmm0; ret | ||
3464 | |2: | ||
3465 | | .if X64WIN | ||
3466 | | movsd qword [rsp+8], xmm0 // Use scratch area. | ||
3467 | | fld qword [rsp+8] | ||
3468 | | .else | ||
3469 | | movsd qword [rsp-8], xmm0 // Use red zone. | ||
3470 | | fld qword [rsp-8] | ||
3471 | | .endif | ||
3472 | | cmp fpmop, 5; ja >2 | ||
3473 | | .if X64WIN; pop rax; .endif | ||
3474 | | je >1 | ||
3475 | | call ->vm_exp_x87 | ||
3476 | | .if X64WIN; push rax; .endif | ||
3477 | | jmp >7 | ||
3478 | |1: | ||
3479 | | call ->vm_exp2_x87 | ||
3480 | | .if X64WIN; push rax; .endif | ||
3481 | | jmp >7 | ||
3482 | |2: ; cmp fpmop, 7; je >1; ja >2 | ||
3483 | | fldln2; fxch; fyl2x; jmp >7 | ||
3484 | |1: ; fld1; fxch; fyl2x; jmp >7 | ||
3485 | |2: ; cmp fpmop, 9; je >1; ja >2 | ||
3486 | | fldlg2; fxch; fyl2x; jmp >7 | ||
3487 | |1: ; fsin; jmp >7 | ||
3488 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
3489 | | fcos; jmp >7 | ||
3490 | |1: ; fptan; fpop | ||
3491 | |7: | ||
3492 | | .if X64WIN | ||
3493 | | fstp qword [rsp+8] // Use scratch area. | ||
3494 | | movsd xmm0, qword [rsp+8] | ||
3495 | | .else | ||
3496 | | fstp qword [rsp-8] // Use red zone. | ||
3497 | | movsd xmm0, qword [rsp-8] | ||
3498 | | .endif | ||
3499 | | ret | ||
3500 | |.else // x86 calling convention. | ||
3501 | | .define fpmop, eax | ||
3502 | |.if SSE | ||
3503 | | mov fpmop, [esp+12] | ||
3504 | | movsd xmm0, qword [esp+4] | ||
3505 | | cmp fpmop, 1; je >1; ja >2 | ||
3506 | | call ->vm_floor; jmp >7 | ||
3507 | |1: ; call ->vm_ceil; jmp >7 | ||
3508 | |2: ; cmp fpmop, 3; je >1; ja >2 | ||
3509 | | call ->vm_trunc; jmp >7 | ||
3510 | |1: | ||
3511 | | sqrtsd xmm0, xmm0 | ||
3512 | |7: | ||
3513 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. | ||
3514 | | fld qword [esp+4] | ||
3515 | | ret | ||
3516 | |2: ; fld qword [esp+4] | ||
3517 | | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 | ||
3518 | |2: ; cmp fpmop, 7; je >1; ja >2 | ||
3519 | | fldln2; fxch; fyl2x; ret | ||
3520 | |1: ; fld1; fxch; fyl2x; ret | ||
3521 | |2: ; cmp fpmop, 9; je >1; ja >2 | ||
3522 | | fldlg2; fxch; fyl2x; ret | ||
3523 | |1: ; fsin; ret | ||
3524 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
3525 | | fcos; ret | ||
3526 | |1: ; fptan; fpop; ret | ||
3527 | |.else | ||
3528 | | mov fpmop, [esp+12] | ||
3529 | | fld qword [esp+4] | ||
3530 | | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil | ||
3531 | | cmp fpmop, 3; jb ->vm_trunc; ja >2 | ||
3532 | | fsqrt; ret | ||
3533 | |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 | ||
3534 | | cmp fpmop, 7; je >1; ja >2 | ||
3535 | | fldln2; fxch; fyl2x; ret | ||
3536 | |1: ; fld1; fxch; fyl2x; ret | ||
3537 | |2: ; cmp fpmop, 9; je >1; ja >2 | ||
3538 | | fldlg2; fxch; fyl2x; ret | ||
3539 | |1: ; fsin; ret | ||
3540 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
3541 | | fcos; ret | ||
3542 | |1: ; fptan; fpop; ret | ||
3543 | |.endif | ||
3544 | |.endif | ||
3545 | |9: ; int3 // Bad fpm. | ||
3546 | |.endif | ||
3547 | | | ||
3548 | |// Callable from C: double lj_vm_foldarith(double x, double y, int op) | ||
3549 | |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -) | ||
3550 | |// and basic math functions. ORDER ARITH | ||
3551 | |->vm_foldarith: | ||
3552 | |.if X64 | ||
3553 | | | ||
3554 | | .if X64WIN | ||
3555 | | .define foldop, CARG3d | ||
3556 | | .else | ||
3557 | | .define foldop, CARG1d | ||
3558 | | .endif | ||
3559 | | cmp foldop, 1; je >1; ja >2 | ||
3560 | | addsd xmm0, xmm1; ret | ||
3561 | |1: ; subsd xmm0, xmm1; ret | ||
3562 | |2: ; cmp foldop, 3; je >1; ja >2 | ||
3563 | | mulsd xmm0, xmm1; ret | ||
3564 | |1: ; divsd xmm0, xmm1; ret | ||
3565 | |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow | ||
3566 | | cmp foldop, 7; je >1; ja >2 | ||
3567 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret | ||
3568 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret | ||
3569 | |2: ; cmp foldop, 9; ja >2 | ||
3570 | |.if X64WIN | ||
3571 | | movsd qword [rsp+8], xmm0 // Use scratch area. | ||
3572 | | movsd qword [rsp+16], xmm1 | ||
3573 | | fld qword [rsp+8] | ||
3574 | | fld qword [rsp+16] | ||
3575 | |.else | ||
3576 | | movsd qword [rsp-8], xmm0 // Use red zone. | ||
3577 | | movsd qword [rsp-16], xmm1 | ||
3578 | | fld qword [rsp-8] | ||
3579 | | fld qword [rsp-16] | ||
3580 | |.endif | ||
3581 | | je >1 | ||
3582 | | fpatan | ||
3583 | |7: | ||
3584 | |.if X64WIN | ||
3585 | | fstp qword [rsp+8] // Use scratch area. | ||
3586 | | movsd xmm0, qword [rsp+8] | ||
3587 | |.else | ||
3588 | | fstp qword [rsp-8] // Use red zone. | ||
3589 | | movsd xmm0, qword [rsp-8] | ||
3590 | |.endif | ||
3591 | | ret | ||
3592 | |1: ; fxch; fscale; fpop1; jmp <7 | ||
3593 | |2: ; cmp foldop, 11; je >1; ja >9 | ||
3594 | | minsd xmm0, xmm1; ret | ||
3595 | |1: ; maxsd xmm0, xmm1; ret | ||
3596 | |9: ; int3 // Bad op. | ||
3597 | | | ||
3598 | |.elif SSE // x86 calling convention with SSE ops. | ||
3599 | | | ||
3600 | | .define foldop, eax | ||
3601 | | mov foldop, [esp+20] | ||
3602 | | movsd xmm0, qword [esp+4] | ||
3603 | | movsd xmm1, qword [esp+12] | ||
3604 | | cmp foldop, 1; je >1; ja >2 | ||
3605 | | addsd xmm0, xmm1 | ||
3606 | |7: | ||
3607 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. | ||
3608 | | fld qword [esp+4] | ||
3609 | | ret | ||
3610 | |1: ; subsd xmm0, xmm1; jmp <7 | ||
3611 | |2: ; cmp foldop, 3; je >1; ja >2 | ||
3612 | | mulsd xmm0, xmm1; jmp <7 | ||
3613 | |1: ; divsd xmm0, xmm1; jmp <7 | ||
3614 | |2: ; cmp foldop, 5 | ||
3615 | | je >1; ja >2 | ||
3616 | | call ->vm_mod; jmp <7 | ||
3617 | |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. | ||
3618 | |2: ; cmp foldop, 7; je >1; ja >2 | ||
3619 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 | ||
3620 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 | ||
3621 | |2: ; cmp foldop, 9; ja >2 | ||
3622 | | fld qword [esp+4] // Reload from stack | ||
3623 | | fld qword [esp+12] | ||
3624 | | je >1 | ||
3625 | | fpatan; ret | ||
3626 | |1: ; fxch; fscale; fpop1; ret | ||
3627 | |2: ; cmp foldop, 11; je >1; ja >9 | ||
3628 | | minsd xmm0, xmm1; jmp <7 | ||
3629 | |1: ; maxsd xmm0, xmm1; jmp <7 | ||
3630 | |9: ; int3 // Bad op. | ||
3631 | | | ||
3632 | |.else // x86 calling convention with x87 ops. | ||
3633 | | | ||
3634 | | mov eax, [esp+20] | ||
3635 | | fld qword [esp+4] | ||
3636 | | fld qword [esp+12] | ||
3637 | | cmp eax, 1; je >1; ja >2 | ||
3638 | | faddp st1; ret | ||
3639 | |1: ; fsubp st1; ret | ||
3640 | |2: ; cmp eax, 3; je >1; ja >2 | ||
3641 | | fmulp st1; ret | ||
3642 | |1: ; fdivp st1; ret | ||
3643 | |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow | ||
3644 | | cmp eax, 7; je >1; ja >2 | ||
3645 | | fpop; fchs; ret | ||
3646 | |1: ; fpop; fabs; ret | ||
3647 | |2: ; cmp eax, 9; je >1; ja >2 | ||
3648 | | fpatan; ret | ||
3649 | |1: ; fxch; fscale; fpop1; ret | ||
3650 | |2: ; cmp eax, 11; je >1; ja >9 | ||
3651 | | fucomi st1; fcmovnbe st1; fpop1; ret | ||
3652 | |1: ; fucomi st1; fcmovbe st1; fpop1; ret | ||
3653 | |9: ; int3 // Bad op. | ||
3654 | | | ||
3655 | |.endif | ||
3656 | | | 3054 | | |
3657 | |//----------------------------------------------------------------------- | 3055 | |//----------------------------------------------------------------------- |
3658 | |//-- Miscellaneous functions -------------------------------------------- | 3056 | |//-- Miscellaneous functions -------------------------------------------- |
@@ -3664,6 +3062,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
3664 | | mov eax, CARG1d | 3062 | | mov eax, CARG1d |
3665 | | .if X64WIN; push rsi; mov rsi, CARG2; .endif | 3063 | | .if X64WIN; push rsi; mov rsi, CARG2; .endif |
3666 | | push rbx | 3064 | | push rbx |
3065 | | xor ecx, ecx | ||
3667 | | cpuid | 3066 | | cpuid |
3668 | | mov [rsi], eax | 3067 | | mov [rsi], eax |
3669 | | mov [rsi+4], ebx | 3068 | | mov [rsi+4], ebx |
@@ -3687,6 +3086,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
3687 | | mov eax, [esp+4] // Argument 1 is function number. | 3086 | | mov eax, [esp+4] // Argument 1 is function number. |
3688 | | push edi | 3087 | | push edi |
3689 | | push ebx | 3088 | | push ebx |
3089 | | xor ecx, ecx | ||
3690 | | cpuid | 3090 | | cpuid |
3691 | | mov edi, [esp+16] // Argument 2 is result area. | 3091 | | mov edi, [esp+16] // Argument 2 is result area. |
3692 | | mov [edi], eax | 3092 | | mov [edi], eax |
@@ -3699,6 +3099,86 @@ static void build_subroutines(BuildCtx *ctx) | |||
3699 | | ret | 3099 | | ret |
3700 | |.endif | 3100 | |.endif |
3701 | | | 3101 | | |
3102 | |.define NEXT_TAB, TAB:FCARG1 | ||
3103 | |.define NEXT_IDX, FCARG2 | ||
3104 | |.define NEXT_PTR, RCa | ||
3105 | |.define NEXT_PTRd, RC | ||
3106 | |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro | ||
3107 | |.if X64 | ||
3108 | |.define NEXT_TMP, CARG3d | ||
3109 | |.define NEXT_TMPq, CARG3 | ||
3110 | |.define NEXT_ASIZE, CARG4d | ||
3111 | |.macro NEXT_ENTER; .endmacro | ||
3112 | |.macro NEXT_LEAVE; ret; .endmacro | ||
3113 | |.if X64WIN | ||
3114 | |.define NEXT_RES_PTR, [rsp+aword*5] | ||
3115 | |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro | ||
3116 | |.else | ||
3117 | |.define NEXT_RES_PTR, [rsp+aword*1] | ||
3118 | |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro | ||
3119 | |.endif | ||
3120 | |.else | ||
3121 | |.define NEXT_ASIZE, esi | ||
3122 | |.define NEXT_TMP, edi | ||
3123 | |.macro NEXT_ENTER; push esi; push edi; .endmacro | ||
3124 | |.macro NEXT_LEAVE; pop edi; pop esi; ret; .endmacro | ||
3125 | |.define NEXT_RES_PTR, [esp+dword*3] | ||
3126 | |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro | ||
3127 | |.endif | ||
3128 | | | ||
3129 | |// TValue *lj_vm_next(GCtab *t, uint32_t idx) | ||
3130 | |// Next idx returned in edx. | ||
3131 | |->vm_next: | ||
3132 | |.if JIT | ||
3133 | | NEXT_ENTER | ||
3134 | | mov NEXT_ASIZE, NEXT_TAB->asize | ||
3135 | |1: // Traverse array part. | ||
3136 | | cmp NEXT_IDX, NEXT_ASIZE; jae >5 | ||
3137 | | mov NEXT_TMP, NEXT_TAB->array | ||
3138 | | cmp dword [NEXT_TMP+NEXT_IDX*8+4], LJ_TNIL; je >2 | ||
3139 | | lea NEXT_PTR, NEXT_RES_PTR | ||
3140 | |.if X64 | ||
3141 | | mov NEXT_TMPq, qword [NEXT_TMP+NEXT_IDX*8] | ||
3142 | | mov qword [NEXT_PTR], NEXT_TMPq | ||
3143 | |.else | ||
3144 | | mov NEXT_ASIZE, dword [NEXT_TMP+NEXT_IDX*8+4] | ||
3145 | | mov NEXT_TMP, dword [NEXT_TMP+NEXT_IDX*8] | ||
3146 | | mov dword [NEXT_PTR+4], NEXT_ASIZE | ||
3147 | | mov dword [NEXT_PTR], NEXT_TMP | ||
3148 | |.endif | ||
3149 | |.if DUALNUM | ||
3150 | | mov dword [NEXT_PTR+dword*3], LJ_TISNUM | ||
3151 | | mov dword [NEXT_PTR+dword*2], NEXT_IDX | ||
3152 | |.else | ||
3153 | | cvtsi2sd xmm0, NEXT_IDX | ||
3154 | | movsd qword [NEXT_PTR+dword*2], xmm0 | ||
3155 | |.endif | ||
3156 | | NEXT_RES_IDX 1 | ||
3157 | | NEXT_LEAVE | ||
3158 | |2: // Skip holes in array part. | ||
3159 | | add NEXT_IDX, 1 | ||
3160 | | jmp <1 | ||
3161 | | | ||
3162 | |5: // Traverse hash part. | ||
3163 | | sub NEXT_IDX, NEXT_ASIZE | ||
3164 | |6: | ||
3165 | | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9 | ||
3166 | | imul NEXT_PTRd, NEXT_IDX, #NODE | ||
3167 | | add NODE:NEXT_PTRd, dword NEXT_TAB->node | ||
3168 | | cmp dword NODE:NEXT_PTR->val.it, LJ_TNIL; je >7 | ||
3169 | | NEXT_RES_IDXL NEXT_ASIZE+1 | ||
3170 | | NEXT_LEAVE | ||
3171 | |7: // Skip holes in hash part. | ||
3172 | | add NEXT_IDX, 1 | ||
3173 | | jmp <6 | ||
3174 | | | ||
3175 | |9: // End of iteration. Set the key to nil (not the value). | ||
3176 | | NEXT_RES_IDX NEXT_ASIZE | ||
3177 | | lea NEXT_PTR, NEXT_RES_PTR | ||
3178 | | mov dword [NEXT_PTR+dword*3], LJ_TNIL | ||
3179 | | NEXT_LEAVE | ||
3180 | |.endif | ||
3181 | | | ||
3702 | |//----------------------------------------------------------------------- | 3182 | |//----------------------------------------------------------------------- |
3703 | |//-- Assertions --------------------------------------------------------- | 3183 | |//-- Assertions --------------------------------------------------------- |
3704 | |//----------------------------------------------------------------------- | 3184 | |//----------------------------------------------------------------------- |
@@ -3964,19 +3444,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3964 | | // RA is a number. | 3444 | | // RA is a number. |
3965 | | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp | 3445 | | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp |
3966 | | // RA is a number, RD is an integer. | 3446 | | // RA is a number, RD is an integer. |
3967 | |.if SSE | ||
3968 | | cvtsi2sd xmm0, dword [BASE+RD*8] | 3447 | | cvtsi2sd xmm0, dword [BASE+RD*8] |
3969 | | jmp >2 | 3448 | | jmp >2 |
3970 | |.else | ||
3971 | | fld qword [BASE+RA*8] | ||
3972 | | fild dword [BASE+RD*8] | ||
3973 | | jmp >3 | ||
3974 | |.endif | ||
3975 | | | 3449 | | |
3976 | |8: // RA is an integer, RD is not an integer. | 3450 | |8: // RA is an integer, RD is not an integer. |
3977 | | ja ->vmeta_comp | 3451 | | ja ->vmeta_comp |
3978 | | // RA is an integer, RD is a number. | 3452 | | // RA is an integer, RD is a number. |
3979 | |.if SSE | ||
3980 | | cvtsi2sd xmm1, dword [BASE+RA*8] | 3453 | | cvtsi2sd xmm1, dword [BASE+RA*8] |
3981 | | movsd xmm0, qword [BASE+RD*8] | 3454 | | movsd xmm0, qword [BASE+RD*8] |
3982 | | add PC, 4 | 3455 | | add PC, 4 |
@@ -3984,29 +3457,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3984 | | jmp_comp jbe, ja, jb, jae, <9 | 3457 | | jmp_comp jbe, ja, jb, jae, <9 |
3985 | | jmp <6 | 3458 | | jmp <6 |
3986 | |.else | 3459 | |.else |
3987 | | fild dword [BASE+RA*8] | ||
3988 | | jmp >2 | ||
3989 | |.endif | ||
3990 | |.else | ||
3991 | | checknum RA, ->vmeta_comp | 3460 | | checknum RA, ->vmeta_comp |
3992 | | checknum RD, ->vmeta_comp | 3461 | | checknum RD, ->vmeta_comp |
3993 | |.endif | 3462 | |.endif |
3994 | |.if SSE | ||
3995 | |1: | 3463 | |1: |
3996 | | movsd xmm0, qword [BASE+RD*8] | 3464 | | movsd xmm0, qword [BASE+RD*8] |
3997 | |2: | 3465 | |2: |
3998 | | add PC, 4 | 3466 | | add PC, 4 |
3999 | | ucomisd xmm0, qword [BASE+RA*8] | 3467 | | ucomisd xmm0, qword [BASE+RA*8] |
4000 | |3: | 3468 | |3: |
4001 | |.else | ||
4002 | |1: | ||
4003 | | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A. | ||
4004 | |2: | ||
4005 | | fld qword [BASE+RD*8] | ||
4006 | |3: | ||
4007 | | add PC, 4 | ||
4008 | | fcomparepp | ||
4009 | |.endif | ||
4010 | | // Unordered: all of ZF CF PF set, ordered: PF clear. | 3469 | | // Unordered: all of ZF CF PF set, ordered: PF clear. |
4011 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. | 3470 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. |
4012 | |.if DUALNUM | 3471 | |.if DUALNUM |
@@ -4046,43 +3505,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4046 | | // RD is a number. | 3505 | | // RD is a number. |
4047 | | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 | 3506 | | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 |
4048 | | // RD is a number, RA is an integer. | 3507 | | // RD is a number, RA is an integer. |
4049 | |.if SSE | ||
4050 | | cvtsi2sd xmm0, dword [BASE+RA*8] | 3508 | | cvtsi2sd xmm0, dword [BASE+RA*8] |
4051 | |.else | ||
4052 | | fild dword [BASE+RA*8] | ||
4053 | |.endif | ||
4054 | | jmp >2 | 3509 | | jmp >2 |
4055 | | | 3510 | | |
4056 | |8: // RD is an integer, RA is not an integer. | 3511 | |8: // RD is an integer, RA is not an integer. |
4057 | | ja >5 | 3512 | | ja >5 |
4058 | | // RD is an integer, RA is a number. | 3513 | | // RD is an integer, RA is a number. |
4059 | |.if SSE | ||
4060 | | cvtsi2sd xmm0, dword [BASE+RD*8] | 3514 | | cvtsi2sd xmm0, dword [BASE+RD*8] |
4061 | | ucomisd xmm0, qword [BASE+RA*8] | 3515 | | ucomisd xmm0, qword [BASE+RA*8] |
4062 | |.else | ||
4063 | | fild dword [BASE+RD*8] | ||
4064 | | fld qword [BASE+RA*8] | ||
4065 | |.endif | ||
4066 | | jmp >4 | 3516 | | jmp >4 |
4067 | | | 3517 | | |
4068 | |.else | 3518 | |.else |
4069 | | cmp RB, LJ_TISNUM; jae >5 | 3519 | | cmp RB, LJ_TISNUM; jae >5 |
4070 | | checknum RA, >5 | 3520 | | checknum RA, >5 |
4071 | |.endif | 3521 | |.endif |
4072 | |.if SSE | ||
4073 | |1: | 3522 | |1: |
4074 | | movsd xmm0, qword [BASE+RA*8] | 3523 | | movsd xmm0, qword [BASE+RA*8] |
4075 | |2: | 3524 | |2: |
4076 | | ucomisd xmm0, qword [BASE+RD*8] | 3525 | | ucomisd xmm0, qword [BASE+RD*8] |
4077 | |4: | 3526 | |4: |
4078 | |.else | ||
4079 | |1: | ||
4080 | | fld qword [BASE+RA*8] | ||
4081 | |2: | ||
4082 | | fld qword [BASE+RD*8] | ||
4083 | |4: | ||
4084 | | fcomparepp | ||
4085 | |.endif | ||
4086 | iseqne_fp: | 3527 | iseqne_fp: |
4087 | if (vk) { | 3528 | if (vk) { |
4088 | | jp >2 // Unordered means not equal. | 3529 | | jp >2 // Unordered means not equal. |
@@ -4205,39 +3646,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4205 | | // RA is a number. | 3646 | | // RA is a number. |
4206 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 | 3647 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 |
4207 | | // RA is a number, RD is an integer. | 3648 | | // RA is a number, RD is an integer. |
4208 | |.if SSE | ||
4209 | | cvtsi2sd xmm0, dword [KBASE+RD*8] | 3649 | | cvtsi2sd xmm0, dword [KBASE+RD*8] |
4210 | |.else | ||
4211 | | fild dword [KBASE+RD*8] | ||
4212 | |.endif | ||
4213 | | jmp >2 | 3650 | | jmp >2 |
4214 | | | 3651 | | |
4215 | |8: // RA is an integer, RD is a number. | 3652 | |8: // RA is an integer, RD is a number. |
4216 | |.if SSE | ||
4217 | | cvtsi2sd xmm0, dword [BASE+RA*8] | 3653 | | cvtsi2sd xmm0, dword [BASE+RA*8] |
4218 | | ucomisd xmm0, qword [KBASE+RD*8] | 3654 | | ucomisd xmm0, qword [KBASE+RD*8] |
4219 | |.else | ||
4220 | | fild dword [BASE+RA*8] | ||
4221 | | fld qword [KBASE+RD*8] | ||
4222 | |.endif | ||
4223 | | jmp >4 | 3655 | | jmp >4 |
4224 | |.else | 3656 | |.else |
4225 | | cmp RB, LJ_TISNUM; jae >3 | 3657 | | cmp RB, LJ_TISNUM; jae >3 |
4226 | |.endif | 3658 | |.endif |
4227 | |.if SSE | ||
4228 | |1: | 3659 | |1: |
4229 | | movsd xmm0, qword [KBASE+RD*8] | 3660 | | movsd xmm0, qword [KBASE+RD*8] |
4230 | |2: | 3661 | |2: |
4231 | | ucomisd xmm0, qword [BASE+RA*8] | 3662 | | ucomisd xmm0, qword [BASE+RA*8] |
4232 | |4: | 3663 | |4: |
4233 | |.else | ||
4234 | |1: | ||
4235 | | fld qword [KBASE+RD*8] | ||
4236 | |2: | ||
4237 | | fld qword [BASE+RA*8] | ||
4238 | |4: | ||
4239 | | fcomparepp | ||
4240 | |.endif | ||
4241 | goto iseqne_fp; | 3664 | goto iseqne_fp; |
4242 | case BC_ISEQP: case BC_ISNEP: | 3665 | case BC_ISEQP: case BC_ISNEP: |
4243 | vk = op == BC_ISEQP; | 3666 | vk = op == BC_ISEQP; |
@@ -4288,6 +3711,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4288 | | ins_next | 3711 | | ins_next |
4289 | break; | 3712 | break; |
4290 | 3713 | ||
3714 | case BC_ISTYPE: | ||
3715 | | ins_AD // RA = src, RD = -type | ||
3716 | | add RD, [BASE+RA*8+4] | ||
3717 | | jne ->vmeta_istype | ||
3718 | | ins_next | ||
3719 | break; | ||
3720 | case BC_ISNUM: | ||
3721 | | ins_AD // RA = src, RD = -(TISNUM-1) | ||
3722 | | checknum RA, ->vmeta_istype | ||
3723 | | ins_next | ||
3724 | break; | ||
3725 | |||
4291 | /* -- Unary ops --------------------------------------------------------- */ | 3726 | /* -- Unary ops --------------------------------------------------------- */ |
4292 | 3727 | ||
4293 | case BC_MOV: | 3728 | case BC_MOV: |
@@ -4331,16 +3766,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4331 | |.else | 3766 | |.else |
4332 | | checknum RD, ->vmeta_unm | 3767 | | checknum RD, ->vmeta_unm |
4333 | |.endif | 3768 | |.endif |
4334 | |.if SSE | ||
4335 | | movsd xmm0, qword [BASE+RD*8] | 3769 | | movsd xmm0, qword [BASE+RD*8] |
4336 | | sseconst_sign xmm1, RDa | 3770 | | sseconst_sign xmm1, RDa |
4337 | | xorps xmm0, xmm1 | 3771 | | xorps xmm0, xmm1 |
4338 | | movsd qword [BASE+RA*8], xmm0 | 3772 | | movsd qword [BASE+RA*8], xmm0 |
4339 | |.else | ||
4340 | | fld qword [BASE+RD*8] | ||
4341 | | fchs | ||
4342 | | fstp qword [BASE+RA*8] | ||
4343 | |.endif | ||
4344 | |.if DUALNUM | 3773 | |.if DUALNUM |
4345 | | jmp <9 | 3774 | | jmp <9 |
4346 | |.else | 3775 | |.else |
@@ -4356,15 +3785,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4356 | |1: | 3785 | |1: |
4357 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 3786 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
4358 | | mov dword [BASE+RA*8], RD | 3787 | | mov dword [BASE+RA*8], RD |
4359 | |.elif SSE | 3788 | |.else |
4360 | | xorps xmm0, xmm0 | 3789 | | xorps xmm0, xmm0 |
4361 | | cvtsi2sd xmm0, dword STR:RD->len | 3790 | | cvtsi2sd xmm0, dword STR:RD->len |
4362 | |1: | 3791 | |1: |
4363 | | movsd qword [BASE+RA*8], xmm0 | 3792 | | movsd qword [BASE+RA*8], xmm0 |
4364 | |.else | ||
4365 | | fild dword STR:RD->len | ||
4366 | |1: | ||
4367 | | fstp qword [BASE+RA*8] | ||
4368 | |.endif | 3793 | |.endif |
4369 | | ins_next | 3794 | | ins_next |
4370 | |2: | 3795 | |2: |
@@ -4382,11 +3807,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4382 | | // Length of table returned in eax (RD). | 3807 | | // Length of table returned in eax (RD). |
4383 | |.if DUALNUM | 3808 | |.if DUALNUM |
4384 | | // Nothing to do. | 3809 | | // Nothing to do. |
4385 | |.elif SSE | ||
4386 | | cvtsi2sd xmm0, RD | ||
4387 | |.else | 3810 | |.else |
4388 | | mov ARG1, RD | 3811 | | cvtsi2sd xmm0, RD |
4389 | | fild ARG1 | ||
4390 | |.endif | 3812 | |.endif |
4391 | | mov BASE, RB // Restore BASE. | 3813 | | mov BASE, RB // Restore BASE. |
4392 | | movzx RA, PC_RA | 3814 | | movzx RA, PC_RA |
@@ -4401,7 +3823,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4401 | 3823 | ||
4402 | /* -- Binary ops -------------------------------------------------------- */ | 3824 | /* -- Binary ops -------------------------------------------------------- */ |
4403 | 3825 | ||
4404 | |.macro ins_arithpre, x87ins, sseins, ssereg | 3826 | |.macro ins_arithpre, sseins, ssereg |
4405 | | ins_ABC | 3827 | | ins_ABC |
4406 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | 3828 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); |
4407 | ||switch (vk) { | 3829 | ||switch (vk) { |
@@ -4410,37 +3832,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4410 | | .if DUALNUM | 3832 | | .if DUALNUM |
4411 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn | 3833 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn |
4412 | | .endif | 3834 | | .endif |
4413 | | .if SSE | 3835 | | movsd xmm0, qword [BASE+RB*8] |
4414 | | movsd xmm0, qword [BASE+RB*8] | 3836 | | sseins ssereg, qword [KBASE+RC*8] |
4415 | | sseins ssereg, qword [KBASE+RC*8] | ||
4416 | | .else | ||
4417 | | fld qword [BASE+RB*8] | ||
4418 | | x87ins qword [KBASE+RC*8] | ||
4419 | | .endif | ||
4420 | || break; | 3837 | || break; |
4421 | ||case 1: | 3838 | ||case 1: |
4422 | | checknum RB, ->vmeta_arith_nv | 3839 | | checknum RB, ->vmeta_arith_nv |
4423 | | .if DUALNUM | 3840 | | .if DUALNUM |
4424 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv | 3841 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv |
4425 | | .endif | 3842 | | .endif |
4426 | | .if SSE | 3843 | | movsd xmm0, qword [KBASE+RC*8] |
4427 | | movsd xmm0, qword [KBASE+RC*8] | 3844 | | sseins ssereg, qword [BASE+RB*8] |
4428 | | sseins ssereg, qword [BASE+RB*8] | ||
4429 | | .else | ||
4430 | | fld qword [KBASE+RC*8] | ||
4431 | | x87ins qword [BASE+RB*8] | ||
4432 | | .endif | ||
4433 | || break; | 3845 | || break; |
4434 | ||default: | 3846 | ||default: |
4435 | | checknum RB, ->vmeta_arith_vv | 3847 | | checknum RB, ->vmeta_arith_vv |
4436 | | checknum RC, ->vmeta_arith_vv | 3848 | | checknum RC, ->vmeta_arith_vv |
4437 | | .if SSE | 3849 | | movsd xmm0, qword [BASE+RB*8] |
4438 | | movsd xmm0, qword [BASE+RB*8] | 3850 | | sseins ssereg, qword [BASE+RC*8] |
4439 | | sseins ssereg, qword [BASE+RC*8] | ||
4440 | | .else | ||
4441 | | fld qword [BASE+RB*8] | ||
4442 | | x87ins qword [BASE+RC*8] | ||
4443 | | .endif | ||
4444 | || break; | 3851 | || break; |
4445 | ||} | 3852 | ||} |
4446 | |.endmacro | 3853 | |.endmacro |
@@ -4478,55 +3885,62 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4478 | |.endmacro | 3885 | |.endmacro |
4479 | | | 3886 | | |
4480 | |.macro ins_arithpost | 3887 | |.macro ins_arithpost |
4481 | |.if SSE | ||
4482 | | movsd qword [BASE+RA*8], xmm0 | 3888 | | movsd qword [BASE+RA*8], xmm0 |
4483 | |.else | ||
4484 | | fstp qword [BASE+RA*8] | ||
4485 | |.endif | ||
4486 | |.endmacro | 3889 | |.endmacro |
4487 | | | 3890 | | |
4488 | |.macro ins_arith, x87ins, sseins | 3891 | |.macro ins_arith, sseins |
4489 | | ins_arithpre x87ins, sseins, xmm0 | 3892 | | ins_arithpre sseins, xmm0 |
4490 | | ins_arithpost | 3893 | | ins_arithpost |
4491 | | ins_next | 3894 | | ins_next |
4492 | |.endmacro | 3895 | |.endmacro |
4493 | | | 3896 | | |
4494 | |.macro ins_arith, intins, x87ins, sseins | 3897 | |.macro ins_arith, intins, sseins |
4495 | |.if DUALNUM | 3898 | |.if DUALNUM |
4496 | | ins_arithdn intins | 3899 | | ins_arithdn intins |
4497 | |.else | 3900 | |.else |
4498 | | ins_arith, x87ins, sseins | 3901 | | ins_arith, sseins |
4499 | |.endif | 3902 | |.endif |
4500 | |.endmacro | 3903 | |.endmacro |
4501 | 3904 | ||
4502 | | // RA = dst, RB = src1 or num const, RC = src2 or num const | 3905 | | // RA = dst, RB = src1 or num const, RC = src2 or num const |
4503 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | 3906 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: |
4504 | | ins_arith add, fadd, addsd | 3907 | | ins_arith add, addsd |
4505 | break; | 3908 | break; |
4506 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | 3909 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: |
4507 | | ins_arith sub, fsub, subsd | 3910 | | ins_arith sub, subsd |
4508 | break; | 3911 | break; |
4509 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | 3912 | case BC_MULVN: case BC_MULNV: case BC_MULVV: |
4510 | | ins_arith imul, fmul, mulsd | 3913 | | ins_arith imul, mulsd |
4511 | break; | 3914 | break; |
4512 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | 3915 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: |
4513 | | ins_arith fdiv, divsd | 3916 | | ins_arith divsd |
4514 | break; | 3917 | break; |
4515 | case BC_MODVN: | 3918 | case BC_MODVN: |
4516 | | ins_arithpre fld, movsd, xmm1 | 3919 | | ins_arithpre movsd, xmm1 |
4517 | |->BC_MODVN_Z: | 3920 | |->BC_MODVN_Z: |
4518 | | call ->vm_mod | 3921 | | call ->vm_mod |
4519 | | ins_arithpost | 3922 | | ins_arithpost |
4520 | | ins_next | 3923 | | ins_next |
4521 | break; | 3924 | break; |
4522 | case BC_MODNV: case BC_MODVV: | 3925 | case BC_MODNV: case BC_MODVV: |
4523 | | ins_arithpre fld, movsd, xmm1 | 3926 | | ins_arithpre movsd, xmm1 |
4524 | | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | 3927 | | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. |
4525 | break; | 3928 | break; |
4526 | case BC_POW: | 3929 | case BC_POW: |
4527 | | ins_arithpre fld, movsd, xmm1 | 3930 | | ins_arithpre movsd, xmm1 |
4528 | | call ->vm_pow | 3931 | | mov RB, BASE |
3932 | |.if not X64 | ||
3933 | | movsd FPARG1, xmm0 | ||
3934 | | movsd FPARG3, xmm1 | ||
3935 | |.endif | ||
3936 | | call extern pow | ||
3937 | | movzx RA, PC_RA | ||
3938 | | mov BASE, RB | ||
3939 | |.if X64 | ||
4529 | | ins_arithpost | 3940 | | ins_arithpost |
3941 | |.else | ||
3942 | | fstp qword [BASE+RA*8] | ||
3943 | |.endif | ||
4530 | | ins_next | 3944 | | ins_next |
4531 | break; | 3945 | break; |
4532 | 3946 | ||
@@ -4594,25 +4008,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4594 | | movsx RD, RDW | 4008 | | movsx RD, RDW |
4595 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 4009 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
4596 | | mov dword [BASE+RA*8], RD | 4010 | | mov dword [BASE+RA*8], RD |
4597 | |.elif SSE | 4011 | |.else |
4598 | | movsx RD, RDW // Sign-extend literal. | 4012 | | movsx RD, RDW // Sign-extend literal. |
4599 | | cvtsi2sd xmm0, RD | 4013 | | cvtsi2sd xmm0, RD |
4600 | | movsd qword [BASE+RA*8], xmm0 | 4014 | | movsd qword [BASE+RA*8], xmm0 |
4601 | |.else | ||
4602 | | fild PC_RD // Refetch signed RD from instruction. | ||
4603 | | fstp qword [BASE+RA*8] | ||
4604 | |.endif | 4015 | |.endif |
4605 | | ins_next | 4016 | | ins_next |
4606 | break; | 4017 | break; |
4607 | case BC_KNUM: | 4018 | case BC_KNUM: |
4608 | | ins_AD // RA = dst, RD = num const | 4019 | | ins_AD // RA = dst, RD = num const |
4609 | |.if SSE | ||
4610 | | movsd xmm0, qword [KBASE+RD*8] | 4020 | | movsd xmm0, qword [KBASE+RD*8] |
4611 | | movsd qword [BASE+RA*8], xmm0 | 4021 | | movsd qword [BASE+RA*8], xmm0 |
4612 | |.else | ||
4613 | | fld qword [KBASE+RD*8] | ||
4614 | | fstp qword [BASE+RA*8] | ||
4615 | |.endif | ||
4616 | | ins_next | 4022 | | ins_next |
4617 | break; | 4023 | break; |
4618 | case BC_KPRI: | 4024 | case BC_KPRI: |
@@ -4719,18 +4125,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4719 | case BC_USETN: | 4125 | case BC_USETN: |
4720 | | ins_AD // RA = upvalue #, RD = num const | 4126 | | ins_AD // RA = upvalue #, RD = num const |
4721 | | mov LFUNC:RB, [BASE-8] | 4127 | | mov LFUNC:RB, [BASE-8] |
4722 | |.if SSE | ||
4723 | | movsd xmm0, qword [KBASE+RD*8] | 4128 | | movsd xmm0, qword [KBASE+RD*8] |
4724 | |.else | ||
4725 | | fld qword [KBASE+RD*8] | ||
4726 | |.endif | ||
4727 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | 4129 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] |
4728 | | mov RA, UPVAL:RB->v | 4130 | | mov RA, UPVAL:RB->v |
4729 | |.if SSE | ||
4730 | | movsd qword [RA], xmm0 | 4131 | | movsd qword [RA], xmm0 |
4731 | |.else | ||
4732 | | fstp qword [RA] | ||
4733 | |.endif | ||
4734 | | ins_next | 4132 | | ins_next |
4735 | break; | 4133 | break; |
4736 | case BC_USETP: | 4134 | case BC_USETP: |
@@ -4884,18 +4282,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4884 | |.else | 4282 | |.else |
4885 | | // Convert number to int and back and compare. | 4283 | | // Convert number to int and back and compare. |
4886 | | checknum RC, >5 | 4284 | | checknum RC, >5 |
4887 | |.if SSE | ||
4888 | | movsd xmm0, qword [BASE+RC*8] | 4285 | | movsd xmm0, qword [BASE+RC*8] |
4889 | | cvtsd2si RC, xmm0 | 4286 | | cvttsd2si RC, xmm0 |
4890 | | cvtsi2sd xmm1, RC | 4287 | | cvtsi2sd xmm1, RC |
4891 | | ucomisd xmm0, xmm1 | 4288 | | ucomisd xmm0, xmm1 |
4892 | |.else | ||
4893 | | fld qword [BASE+RC*8] | ||
4894 | | fist ARG1 | ||
4895 | | fild ARG1 | ||
4896 | | fcomparepp | ||
4897 | | mov RC, ARG1 | ||
4898 | |.endif | ||
4899 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. | 4289 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. |
4900 | |.endif | 4290 | |.endif |
4901 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. | 4291 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. |
@@ -4941,7 +4331,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4941 | | mov TAB:RB, [BASE+RB*8] | 4331 | | mov TAB:RB, [BASE+RB*8] |
4942 | |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. | 4332 | |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. |
4943 | | mov RA, TAB:RB->hmask | 4333 | | mov RA, TAB:RB->hmask |
4944 | | and RA, STR:RC->hash | 4334 | | and RA, STR:RC->sid |
4945 | | imul RA, #NODE | 4335 | | imul RA, #NODE |
4946 | | add NODE:RA, TAB:RB->node | 4336 | | add NODE:RA, TAB:RB->node |
4947 | |1: | 4337 | |1: |
@@ -5019,6 +4409,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5019 | | mov dword [BASE+RA*8+4], LJ_TNIL | 4409 | | mov dword [BASE+RA*8+4], LJ_TNIL |
5020 | | jmp <1 | 4410 | | jmp <1 |
5021 | break; | 4411 | break; |
4412 | case BC_TGETR: | ||
4413 | | ins_ABC // RA = dst, RB = table, RC = key | ||
4414 | | mov TAB:RB, [BASE+RB*8] | ||
4415 | |.if DUALNUM | ||
4416 | | mov RC, dword [BASE+RC*8] | ||
4417 | |.else | ||
4418 | | cvttsd2si RC, qword [BASE+RC*8] | ||
4419 | |.endif | ||
4420 | | cmp RC, TAB:RB->asize | ||
4421 | | jae ->vmeta_tgetr // Not in array part? Use fallback. | ||
4422 | | shl RC, 3 | ||
4423 | | add RC, TAB:RB->array | ||
4424 | | // Get array slot. | ||
4425 | |->BC_TGETR_Z: | ||
4426 | |.if X64 | ||
4427 | | mov RBa, [RC] | ||
4428 | | mov [BASE+RA*8], RBa | ||
4429 | |.else | ||
4430 | | mov RB, [RC] | ||
4431 | | mov RC, [RC+4] | ||
4432 | | mov [BASE+RA*8], RB | ||
4433 | | mov [BASE+RA*8+4], RC | ||
4434 | |.endif | ||
4435 | |->BC_TGETR2_Z: | ||
4436 | | ins_next | ||
4437 | break; | ||
5022 | 4438 | ||
5023 | case BC_TSETV: | 4439 | case BC_TSETV: |
5024 | | ins_ABC // RA = src, RB = table, RC = key | 4440 | | ins_ABC // RA = src, RB = table, RC = key |
@@ -5032,18 +4448,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5032 | |.else | 4448 | |.else |
5033 | | // Convert number to int and back and compare. | 4449 | | // Convert number to int and back and compare. |
5034 | | checknum RC, >5 | 4450 | | checknum RC, >5 |
5035 | |.if SSE | ||
5036 | | movsd xmm0, qword [BASE+RC*8] | 4451 | | movsd xmm0, qword [BASE+RC*8] |
5037 | | cvtsd2si RC, xmm0 | 4452 | | cvttsd2si RC, xmm0 |
5038 | | cvtsi2sd xmm1, RC | 4453 | | cvtsi2sd xmm1, RC |
5039 | | ucomisd xmm0, xmm1 | 4454 | | ucomisd xmm0, xmm1 |
5040 | |.else | ||
5041 | | fld qword [BASE+RC*8] | ||
5042 | | fist ARG1 | ||
5043 | | fild ARG1 | ||
5044 | | fcomparepp | ||
5045 | | mov RC, ARG1 | ||
5046 | |.endif | ||
5047 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. | 4455 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. |
5048 | |.endif | 4456 | |.endif |
5049 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. | 4457 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. |
@@ -5094,7 +4502,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5094 | | mov TAB:RB, [BASE+RB*8] | 4502 | | mov TAB:RB, [BASE+RB*8] |
5095 | |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. | 4503 | |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. |
5096 | | mov RA, TAB:RB->hmask | 4504 | | mov RA, TAB:RB->hmask |
5097 | | and RA, STR:RC->hash | 4505 | | and RA, STR:RC->sid |
5098 | | imul RA, #NODE | 4506 | | imul RA, #NODE |
5099 | | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. | 4507 | | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. |
5100 | | add NODE:RA, TAB:RB->node | 4508 | | add NODE:RA, TAB:RB->node |
@@ -5213,6 +4621,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5213 | | movzx RA, PC_RA // Restore RA. | 4621 | | movzx RA, PC_RA // Restore RA. |
5214 | | jmp <2 | 4622 | | jmp <2 |
5215 | break; | 4623 | break; |
4624 | case BC_TSETR: | ||
4625 | | ins_ABC // RA = src, RB = table, RC = key | ||
4626 | | mov TAB:RB, [BASE+RB*8] | ||
4627 | |.if DUALNUM | ||
4628 | | mov RC, dword [BASE+RC*8] | ||
4629 | |.else | ||
4630 | | cvttsd2si RC, qword [BASE+RC*8] | ||
4631 | |.endif | ||
4632 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
4633 | | jnz >7 | ||
4634 | |2: | ||
4635 | | cmp RC, TAB:RB->asize | ||
4636 | | jae ->vmeta_tsetr | ||
4637 | | shl RC, 3 | ||
4638 | | add RC, TAB:RB->array | ||
4639 | | // Set array slot. | ||
4640 | |->BC_TSETR_Z: | ||
4641 | |.if X64 | ||
4642 | | mov RBa, [BASE+RA*8] | ||
4643 | | mov [RC], RBa | ||
4644 | |.else | ||
4645 | | mov RB, [BASE+RA*8+4] | ||
4646 | | mov RA, [BASE+RA*8] | ||
4647 | | mov [RC+4], RB | ||
4648 | | mov [RC], RA | ||
4649 | |.endif | ||
4650 | | ins_next | ||
4651 | | | ||
4652 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
4653 | | barrierback TAB:RB, RA | ||
4654 | | movzx RA, PC_RA // Restore RA. | ||
4655 | | jmp <2 | ||
4656 | break; | ||
5216 | 4657 | ||
5217 | case BC_TSETM: | 4658 | case BC_TSETM: |
5218 | | ins_AD // RA = base (table at base-1), RD = num const (start index) | 4659 | | ins_AD // RA = base (table at base-1), RD = num const (start index) |
@@ -5389,10 +4830,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5389 | break; | 4830 | break; |
5390 | 4831 | ||
5391 | case BC_ITERN: | 4832 | case BC_ITERN: |
5392 | | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
5393 | |.if JIT | 4833 | |.if JIT |
5394 | | // NYI: add hotloop, record BC_ITERN. | 4834 | | hotloop RB |
5395 | |.endif | 4835 | |.endif |
4836 | |->vm_IITERN: | ||
4837 | | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
5396 | | mov TMP1, KBASE // Need two more free registers. | 4838 | | mov TMP1, KBASE // Need two more free registers. |
5397 | | mov TMP2, DISPATCH | 4839 | | mov TMP2, DISPATCH |
5398 | | mov TAB:RB, [BASE+RA*8-16] | 4840 | | mov TAB:RB, [BASE+RA*8-16] |
@@ -5406,10 +4848,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5406 | |.if DUALNUM | 4848 | |.if DUALNUM |
5407 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 4849 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
5408 | | mov dword [BASE+RA*8], RC | 4850 | | mov dword [BASE+RA*8], RC |
5409 | |.elif SSE | ||
5410 | | cvtsi2sd xmm0, RC | ||
5411 | |.else | 4851 | |.else |
5412 | | fild dword [BASE+RA*8-8] | 4852 | | cvtsi2sd xmm0, RC |
5413 | |.endif | 4853 | |.endif |
5414 | | // Copy array slot to returned value. | 4854 | | // Copy array slot to returned value. |
5415 | |.if X64 | 4855 | |.if X64 |
@@ -5425,10 +4865,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5425 | | // Return array index as a numeric key. | 4865 | | // Return array index as a numeric key. |
5426 | |.if DUALNUM | 4866 | |.if DUALNUM |
5427 | | // See above. | 4867 | | // See above. |
5428 | |.elif SSE | ||
5429 | | movsd qword [BASE+RA*8], xmm0 | ||
5430 | |.else | 4868 | |.else |
5431 | | fstp qword [BASE+RA*8] | 4869 | | movsd qword [BASE+RA*8], xmm0 |
5432 | |.endif | 4870 | |.endif |
5433 | | mov [BASE+RA*8-8], RC // Update control var. | 4871 | | mov [BASE+RA*8-8], RC // Update control var. |
5434 | |2: | 4872 | |2: |
@@ -5441,9 +4879,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5441 | | | 4879 | | |
5442 | |4: // Skip holes in array part. | 4880 | |4: // Skip holes in array part. |
5443 | | add RC, 1 | 4881 | | add RC, 1 |
5444 | |.if not (DUALNUM or SSE) | ||
5445 | | mov [BASE+RA*8-8], RC | ||
5446 | |.endif | ||
5447 | | jmp <1 | 4882 | | jmp <1 |
5448 | | | 4883 | | |
5449 | |5: // Traverse hash part. | 4884 | |5: // Traverse hash part. |
@@ -5487,14 +4922,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5487 | | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 | 4922 | | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 |
5488 | | branchPC RD | 4923 | | branchPC RD |
5489 | | mov dword [BASE+RA*8-8], 0 // Initialize control var. | 4924 | | mov dword [BASE+RA*8-8], 0 // Initialize control var. |
5490 | | mov dword [BASE+RA*8-4], 0xfffe7fff | 4925 | | mov dword [BASE+RA*8-4], LJ_KEYINDEX |
5491 | |1: | 4926 | |1: |
5492 | | ins_next | 4927 | | ins_next |
5493 | |5: // Despecialize bytecode if any of the checks fail. | 4928 | |5: // Despecialize bytecode if any of the checks fail. |
5494 | | mov PC_OP, BC_JMP | 4929 | | mov PC_OP, BC_JMP |
5495 | | branchPC RD | 4930 | | branchPC RD |
4931 | |.if JIT | ||
4932 | | cmp byte [PC], BC_ITERN | ||
4933 | | jne >6 | ||
4934 | |.endif | ||
5496 | | mov byte [PC], BC_ITERC | 4935 | | mov byte [PC], BC_ITERC |
5497 | | jmp <1 | 4936 | | jmp <1 |
4937 | |.if JIT | ||
4938 | |6: // Unpatch JLOOP. | ||
4939 | | mov RA, [DISPATCH+DISPATCH_J(trace)] | ||
4940 | | movzx RC, word [PC+2] | ||
4941 | | mov TRACE:RA, [RA+RC*4] | ||
4942 | | mov eax, TRACE:RA->startins | ||
4943 | | mov al, BC_ITERC | ||
4944 | | mov dword [PC], eax | ||
4945 | | jmp <1 | ||
4946 | |.endif | ||
5498 | break; | 4947 | break; |
5499 | 4948 | ||
5500 | case BC_VARG: | 4949 | case BC_VARG: |
@@ -5777,7 +5226,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5777 | if (!vk) { | 5226 | if (!vk) { |
5778 | | cmp RB, LJ_TISNUM; jae ->vmeta_for | 5227 | | cmp RB, LJ_TISNUM; jae ->vmeta_for |
5779 | } | 5228 | } |
5780 | |.if SSE | ||
5781 | | movsd xmm0, qword FOR_IDX | 5229 | | movsd xmm0, qword FOR_IDX |
5782 | | movsd xmm1, qword FOR_STOP | 5230 | | movsd xmm1, qword FOR_STOP |
5783 | if (vk) { | 5231 | if (vk) { |
@@ -5790,22 +5238,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5790 | | ucomisd xmm1, xmm0 | 5238 | | ucomisd xmm1, xmm0 |
5791 | |1: | 5239 | |1: |
5792 | | movsd qword FOR_EXT, xmm0 | 5240 | | movsd qword FOR_EXT, xmm0 |
5793 | |.else | ||
5794 | | fld qword FOR_STOP | ||
5795 | | fld qword FOR_IDX | ||
5796 | if (vk) { | ||
5797 | | fadd qword FOR_STEP // nidx = idx + step | ||
5798 | | fst qword FOR_IDX | ||
5799 | | fst qword FOR_EXT | ||
5800 | | test RB, RB; js >1 | ||
5801 | } else { | ||
5802 | | fst qword FOR_EXT | ||
5803 | | jl >1 | ||
5804 | } | ||
5805 | | fxch // Swap lim/(n)idx if step non-negative. | ||
5806 | |1: | ||
5807 | | fcomparepp | ||
5808 | |.endif | ||
5809 | if (op == BC_FORI) { | 5241 | if (op == BC_FORI) { |
5810 | |.if DUALNUM | 5242 | |.if DUALNUM |
5811 | | jnb <7 | 5243 | | jnb <7 |
@@ -5833,11 +5265,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5833 | |2: | 5265 | |2: |
5834 | | ins_next | 5266 | | ins_next |
5835 | |.endif | 5267 | |.endif |
5836 | |.if SSE | 5268 | | |
5837 | |3: // Invert comparison if step is negative. | 5269 | |3: // Invert comparison if step is negative. |
5838 | | ucomisd xmm0, xmm1 | 5270 | | ucomisd xmm0, xmm1 |
5839 | | jmp <1 | 5271 | | jmp <1 |
5840 | |.endif | ||
5841 | break; | 5272 | break; |
5842 | 5273 | ||
5843 | case BC_ITERL: | 5274 | case BC_ITERL: |
@@ -5875,7 +5306,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5875 | | ins_A // RA = base, RD = target (loop extent) | 5306 | | ins_A // RA = base, RD = target (loop extent) |
5876 | | // Note: RA/RD is only used by trace recorder to determine scope/extent | 5307 | | // Note: RA/RD is only used by trace recorder to determine scope/extent |
5877 | | // This opcode does NOT jump, it's only purpose is to detect a hot loop. | 5308 | | // This opcode does NOT jump, it's only purpose is to detect a hot loop. |
5878 | |.if JIT | 5309 | |.if JIT |
5879 | | hotloop RB | 5310 | | hotloop RB |
5880 | |.endif | 5311 | |.endif |
5881 | | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. | 5312 | | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. |
@@ -5894,7 +5325,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5894 | | mov RDa, TRACE:RD->mcode | 5325 | | mov RDa, TRACE:RD->mcode |
5895 | | mov L:RB, SAVE_L | 5326 | | mov L:RB, SAVE_L |
5896 | | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE | 5327 | | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE |
5897 | | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB | 5328 | | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB |
5898 | | // Save additional callee-save registers only used in compiled code. | 5329 | | // Save additional callee-save registers only used in compiled code. |
5899 | |.if X64WIN | 5330 | |.if X64WIN |
5900 | | mov TMPQ, r12 | 5331 | | mov TMPQ, r12 |
@@ -6061,9 +5492,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
6061 | | // (lua_State *L, lua_CFunction f) | 5492 | | // (lua_State *L, lua_CFunction f) |
6062 | | call aword [DISPATCH+DISPATCH_GL(wrapf)] | 5493 | | call aword [DISPATCH+DISPATCH_GL(wrapf)] |
6063 | } | 5494 | } |
6064 | | set_vmstate INTERP | ||
6065 | | // nresults returned in eax (RD). | 5495 | | // nresults returned in eax (RD). |
6066 | | mov BASE, L:RB->base | 5496 | | mov BASE, L:RB->base |
5497 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
5498 | | set_vmstate INTERP | ||
6067 | | lea RA, [BASE+RD*8] | 5499 | | lea RA, [BASE+RD*8] |
6068 | | neg RA | 5500 | | neg RA |
6069 | | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 | 5501 | | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 |
@@ -6176,7 +5608,7 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
6176 | ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); | 5608 | ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); |
6177 | #endif | 5609 | #endif |
6178 | #if !LJ_NO_UNWIND | 5610 | #if !LJ_NO_UNWIND |
6179 | #if (defined(__sun__) && defined(__svr4__)) | 5611 | #if LJ_TARGET_SOLARIS |
6180 | #if LJ_64 | 5612 | #if LJ_64 |
6181 | fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); | 5613 | fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); |
6182 | #else | 5614 | #else |
@@ -6383,15 +5815,21 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
6383 | "LEFDEY:\n\n", fcsize); | 5815 | "LEFDEY:\n\n", fcsize); |
6384 | } | 5816 | } |
6385 | #endif | 5817 | #endif |
6386 | #if LJ_64 | 5818 | #if !LJ_64 |
6387 | fprintf(ctx->fp, "\t.subsections_via_symbols\n"); | ||
6388 | #else | ||
6389 | fprintf(ctx->fp, | 5819 | fprintf(ctx->fp, |
6390 | "\t.non_lazy_symbol_pointer\n" | 5820 | "\t.non_lazy_symbol_pointer\n" |
6391 | "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" | 5821 | "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" |
6392 | ".indirect_symbol _lj_err_unwind_dwarf\n" | 5822 | ".indirect_symbol _lj_err_unwind_dwarf\n" |
6393 | ".long 0\n"); | 5823 | ".long 0\n\n"); |
5824 | fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n"); | ||
5825 | { | ||
5826 | const char *const *xn; | ||
5827 | for (xn = ctx->extnames; *xn; xn++) | ||
5828 | if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) | ||
5829 | fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn); | ||
5830 | } | ||
6394 | #endif | 5831 | #endif |
5832 | fprintf(ctx->fp, ".subsections_via_symbols\n"); | ||
6395 | } | 5833 | } |
6396 | break; | 5834 | break; |
6397 | #endif | 5835 | #endif |