summaryrefslogtreecommitdiff
path: root/src/vm_x86.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm_x86.dasc')
-rw-r--r--src/vm_x86.dasc1788
1 files changed, 613 insertions, 1175 deletions
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index 5b3356dc..bda9d7d7 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -18,7 +18,6 @@
18| 18|
19|.if P64 19|.if P64
20|.define X64, 1 20|.define X64, 1
21|.define SSE, 1
22|.if WIN 21|.if WIN
23|.define X64WIN, 1 22|.define X64WIN, 1
24|.endif 23|.endif
@@ -116,24 +115,74 @@
116|.type NODE, Node 115|.type NODE, Node
117|.type NARGS, int 116|.type NARGS, int
118|.type TRACE, GCtrace 117|.type TRACE, GCtrace
118|.type SBUF, SBuf
119| 119|
120|// Stack layout while in interpreter. Must match with lj_frame.h. 120|// Stack layout while in interpreter. Must match with lj_frame.h.
121|//----------------------------------------------------------------------- 121|//-----------------------------------------------------------------------
122|.if not X64 // x86 stack layout. 122|.if not X64 // x86 stack layout.
123| 123|
124|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). 124|.if WIN
125|
126|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
125|.macro saveregs_ 127|.macro saveregs_
126| push edi; push esi; push ebx 128| push edi; push esi; push ebx
129| push extern lj_err_unwind_win
130| fs; push dword [0]
131| fs; mov [0], esp
127| sub esp, CFRAME_SPACE 132| sub esp, CFRAME_SPACE
128|.endmacro 133|.endmacro
129|.macro saveregs 134|.macro restoreregs
130| push ebp; saveregs_ 135| add esp, CFRAME_SPACE
136| fs; pop dword [0]
137| pop edi // Short for esp += 4.
138| pop ebx; pop esi; pop edi; pop ebp
139|.endmacro
140|
141|.else
142|
143|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
144|.macro saveregs_
145| push edi; push esi; push ebx
146| sub esp, CFRAME_SPACE
131|.endmacro 147|.endmacro
132|.macro restoreregs 148|.macro restoreregs
133| add esp, CFRAME_SPACE 149| add esp, CFRAME_SPACE
134| pop ebx; pop esi; pop edi; pop ebp 150| pop ebx; pop esi; pop edi; pop ebp
135|.endmacro 151|.endmacro
136| 152|
153|.endif
154|
155|.macro saveregs
156| push ebp; saveregs_
157|.endmacro
158|
159|.if WIN
160|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only.
161|.define SAVE_NRES, aword [esp+aword*18]
162|.define SAVE_CFRAME, aword [esp+aword*17]
163|.define SAVE_L, aword [esp+aword*16]
164|//----- 16 byte aligned, ^^^ arguments from C caller
165|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter.
166|.define SAVE_R4, aword [esp+aword*14]
167|.define SAVE_R3, aword [esp+aword*13]
168|.define SAVE_R2, aword [esp+aword*12]
169|//----- 16 byte aligned
170|.define SAVE_R1, aword [esp+aword*11]
171|.define SEH_FUNC, aword [esp+aword*10]
172|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves.
173|.define UNUSED2, aword [esp+aword*8]
174|//----- 16 byte aligned
175|.define UNUSED1, aword [esp+aword*7]
176|.define SAVE_PC, aword [esp+aword*6]
177|.define TMP2, aword [esp+aword*5]
178|.define TMP1, aword [esp+aword*4]
179|//----- 16 byte aligned
180|.define ARG4, aword [esp+aword*3]
181|.define ARG3, aword [esp+aword*2]
182|.define ARG2, aword [esp+aword*1]
183|.define ARG1, aword [esp] //<-- esp while in interpreter.
184|//----- 16 byte aligned, ^^^ arguments for C callee
185|.else
137|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. 186|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
138|.define SAVE_NRES, aword [esp+aword*14] 187|.define SAVE_NRES, aword [esp+aword*14]
139|.define SAVE_CFRAME, aword [esp+aword*13] 188|.define SAVE_CFRAME, aword [esp+aword*13]
@@ -154,6 +203,7 @@
154|.define ARG2, aword [esp+aword*1] 203|.define ARG2, aword [esp+aword*1]
155|.define ARG1, aword [esp] //<-- esp while in interpreter. 204|.define ARG1, aword [esp] //<-- esp while in interpreter.
156|//----- 16 byte aligned, ^^^ arguments for C callee 205|//----- 16 byte aligned, ^^^ arguments for C callee
206|.endif
157| 207|
158|// FPARGx overlaps ARGx and ARG(x+1) on x86. 208|// FPARGx overlaps ARGx and ARG(x+1) on x86.
159|.define FPARG3, qword [esp+qword*1] 209|.define FPARG3, qword [esp+qword*1]
@@ -389,7 +439,6 @@
389| fpop 439| fpop
390|.endmacro 440|.endmacro
391| 441|
392|.macro fdup; fld st0; .endmacro
393|.macro fpop1; fstp st1; .endmacro 442|.macro fpop1; fstp st1; .endmacro
394| 443|
395|// Synthesize SSE FP constants. 444|// Synthesize SSE FP constants.
@@ -552,6 +601,10 @@ static void build_subroutines(BuildCtx *ctx)
552 |.else 601 |.else
553 | mov eax, FCARG2 // Error return status for vm_pcall. 602 | mov eax, FCARG2 // Error return status for vm_pcall.
554 | mov esp, FCARG1 603 | mov esp, FCARG1
604 |.if WIN
605 | lea FCARG1, SEH_NEXT
606 | fs; mov [0], FCARG1
607 |.endif
555 |.endif 608 |.endif
556 |->vm_unwind_c_eh: // Landing pad for external unwinder. 609 |->vm_unwind_c_eh: // Landing pad for external unwinder.
557 | mov L:RB, SAVE_L 610 | mov L:RB, SAVE_L
@@ -575,6 +628,10 @@ static void build_subroutines(BuildCtx *ctx)
575 |.else 628 |.else
576 | and FCARG1, CFRAME_RAWMASK 629 | and FCARG1, CFRAME_RAWMASK
577 | mov esp, FCARG1 630 | mov esp, FCARG1
631 |.if WIN
632 | lea FCARG1, SEH_NEXT
633 | fs; mov [0], FCARG1
634 |.endif
578 |.endif 635 |.endif
579 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 636 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
580 | mov L:RB, SAVE_L 637 | mov L:RB, SAVE_L
@@ -588,6 +645,19 @@ static void build_subroutines(BuildCtx *ctx)
588 | set_vmstate INTERP 645 | set_vmstate INTERP
589 | jmp ->vm_returnc // Increments RD/MULTRES and returns. 646 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
590 | 647 |
648 |.if WIN and not X64
649 |->vm_rtlunwind@16: // Thin layer around RtlUnwind.
650 | // (void *cframe, void *excptrec, void *unwinder, int errcode)
651 | mov [esp], FCARG1 // Return value for RtlUnwind.
652 | push FCARG2 // Exception record for RtlUnwind.
653 | push 0 // Ignored by RtlUnwind.
654 | push dword [FCARG1+CFRAME_OFS_SEH]
655 | call extern RtlUnwind@16 // Violates ABI (clobbers too much).
656 | mov FCARG1, eax
657 | mov FCARG2, [esp+4] // errcode (for vm_unwind_c).
658 | ret // Jump to unwinder.
659 |.endif
660 |
591 |//----------------------------------------------------------------------- 661 |//-----------------------------------------------------------------------
592 |//-- Grow stack for calls ----------------------------------------------- 662 |//-- Grow stack for calls -----------------------------------------------
593 |//----------------------------------------------------------------------- 663 |//-----------------------------------------------------------------------
@@ -643,17 +713,18 @@ static void build_subroutines(BuildCtx *ctx)
643 | lea KBASEa, [esp+CFRAME_RESUME] 713 | lea KBASEa, [esp+CFRAME_RESUME]
644 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 714 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
645 | add DISPATCH, GG_G2DISP 715 | add DISPATCH, GG_G2DISP
646 | mov L:RB->cframe, KBASEa
647 | mov SAVE_PC, RD // Any value outside of bytecode is ok. 716 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
648 | mov SAVE_CFRAME, RDa 717 | mov SAVE_CFRAME, RDa
649 |.if X64 718 |.if X64
650 | mov SAVE_NRES, RD 719 | mov SAVE_NRES, RD
651 | mov SAVE_ERRF, RD 720 | mov SAVE_ERRF, RD
652 |.endif 721 |.endif
722 | mov L:RB->cframe, KBASEa
653 | cmp byte L:RB->status, RDL 723 | cmp byte L:RB->status, RDL
654 | je >3 // Initial resume (like a call). 724 | je >2 // Initial resume (like a call).
655 | 725 |
656 | // Resume after yield (like a return). 726 | // Resume after yield (like a return).
727 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
657 | set_vmstate INTERP 728 | set_vmstate INTERP
658 | mov byte L:RB->status, RDL 729 | mov byte L:RB->status, RDL
659 | mov BASE, L:RB->base 730 | mov BASE, L:RB->base
@@ -693,20 +764,19 @@ static void build_subroutines(BuildCtx *ctx)
693 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! 764 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
694 |.endif 765 |.endif
695 | 766 |
767 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
696 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 768 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
697 | mov SAVE_CFRAME, KBASEa 769 | mov SAVE_CFRAME, KBASEa
698 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. 770 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
771 | add DISPATCH, GG_G2DISP
699 |.if X64 772 |.if X64
700 | mov L:RB->cframe, rsp 773 | mov L:RB->cframe, rsp
701 |.else 774 |.else
702 | mov L:RB->cframe, esp 775 | mov L:RB->cframe, esp
703 |.endif 776 |.endif
704 | 777 |
705 |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype). 778 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
706 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 779 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
707 | add DISPATCH, GG_G2DISP
708 |
709 |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype).
710 | set_vmstate INTERP 780 | set_vmstate INTERP
711 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). 781 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
712 | add PC, RA 782 | add PC, RA
@@ -744,14 +814,17 @@ static void build_subroutines(BuildCtx *ctx)
744 | 814 |
745 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). 815 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
746 | sub KBASE, L:RB->top 816 | sub KBASE, L:RB->top
817 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
747 | mov SAVE_ERRF, 0 // No error function. 818 | mov SAVE_ERRF, 0 // No error function.
748 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. 819 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
820 | add DISPATCH, GG_G2DISP
749 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). 821 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
750 | 822 |
751 |.if X64 823 |.if X64
752 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 824 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
753 | mov SAVE_CFRAME, KBASEa 825 | mov SAVE_CFRAME, KBASEa
754 | mov L:RB->cframe, rsp 826 | mov L:RB->cframe, rsp
827 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
755 | 828 |
756 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) 829 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
757 |.else 830 |.else
@@ -762,6 +835,7 @@ static void build_subroutines(BuildCtx *ctx)
762 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. 835 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
763 | mov SAVE_CFRAME, KBASE 836 | mov SAVE_CFRAME, KBASE
764 | mov L:RB->cframe, esp 837 | mov L:RB->cframe, esp
838 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
765 | 839 |
766 | call BASE // (lua_State *L, lua_CFunction func, void *ud) 840 | call BASE // (lua_State *L, lua_CFunction func, void *ud)
767 |.endif 841 |.endif
@@ -869,13 +943,9 @@ static void build_subroutines(BuildCtx *ctx)
869 |.if DUALNUM 943 |.if DUALNUM
870 | mov TMP2, LJ_TISNUM 944 | mov TMP2, LJ_TISNUM
871 | mov TMP1, RC 945 | mov TMP1, RC
872 |.elif SSE 946 |.else
873 | cvtsi2sd xmm0, RC 947 | cvtsi2sd xmm0, RC
874 | movsd TMPQ, xmm0 948 | movsd TMPQ, xmm0
875 |.else
876 | mov ARG4, RC
877 | fild ARG4
878 | fstp TMPQ
879 |.endif 949 |.endif
880 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 950 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
881 | jmp >1 951 | jmp >1
@@ -929,6 +999,19 @@ static void build_subroutines(BuildCtx *ctx)
929 | mov NARGS:RD, 2+1 // 2 args for func(t, k). 999 | mov NARGS:RD, 2+1 // 2 args for func(t, k).
930 | jmp ->vm_call_dispatch_f 1000 | jmp ->vm_call_dispatch_f
931 | 1001 |
1002 |->vmeta_tgetr:
1003 | mov FCARG1, TAB:RB
1004 | mov RB, BASE // Save BASE.
1005 | mov FCARG2, RC // Caveat: FCARG2 == BASE
1006 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
1007 | // cTValue * or NULL returned in eax (RC).
1008 | movzx RA, PC_RA
1009 | mov BASE, RB // Restore BASE.
1010 | test RC, RC
1011 | jnz ->BC_TGETR_Z
1012 | mov dword [BASE+RA*8+4], LJ_TNIL
1013 | jmp ->BC_TGETR2_Z
1014 |
932 |//----------------------------------------------------------------------- 1015 |//-----------------------------------------------------------------------
933 | 1016 |
934 |->vmeta_tsets: 1017 |->vmeta_tsets:
@@ -948,13 +1031,9 @@ static void build_subroutines(BuildCtx *ctx)
948 |.if DUALNUM 1031 |.if DUALNUM
949 | mov TMP2, LJ_TISNUM 1032 | mov TMP2, LJ_TISNUM
950 | mov TMP1, RC 1033 | mov TMP1, RC
951 |.elif SSE 1034 |.else
952 | cvtsi2sd xmm0, RC 1035 | cvtsi2sd xmm0, RC
953 | movsd TMPQ, xmm0 1036 | movsd TMPQ, xmm0
954 |.else
955 | mov ARG4, RC
956 | fild ARG4
957 | fstp TMPQ
958 |.endif 1037 |.endif
959 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 1038 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
960 | jmp >1 1039 | jmp >1
@@ -1020,6 +1099,33 @@ static void build_subroutines(BuildCtx *ctx)
1020 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). 1099 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
1021 | jmp ->vm_call_dispatch_f 1100 | jmp ->vm_call_dispatch_f
1022 | 1101 |
1102 |->vmeta_tsetr:
1103 |.if X64WIN
1104 | mov L:CARG1d, SAVE_L
1105 | mov CARG3d, RC
1106 | mov L:CARG1d->base, BASE
1107 | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
1108 |.elif X64
1109 | mov L:CARG1d, SAVE_L
1110 | mov CARG2d, TAB:RB
1111 | mov L:CARG1d->base, BASE
1112 | mov RB, BASE // Save BASE.
1113 | mov CARG3d, RC // Caveat: CARG3d == BASE.
1114 |.else
1115 | mov L:RA, SAVE_L
1116 | mov ARG2, TAB:RB
1117 | mov RB, BASE // Save BASE.
1118 | mov ARG3, RC
1119 | mov ARG1, L:RA
1120 | mov L:RA->base, BASE
1121 |.endif
1122 | mov SAVE_PC, PC
1123 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1124 | // TValue * returned in eax (RC).
1125 | movzx RA, PC_RA
1126 | mov BASE, RB // Restore BASE.
1127 | jmp ->BC_TSETR_Z
1128 |
1023 |//-- Comparison metamethods --------------------------------------------- 1129 |//-- Comparison metamethods ---------------------------------------------
1024 | 1130 |
1025 |->vmeta_comp: 1131 |->vmeta_comp:
@@ -1114,6 +1220,26 @@ static void build_subroutines(BuildCtx *ctx)
1114 | jmp <3 1220 | jmp <3
1115 |.endif 1221 |.endif
1116 | 1222 |
1223 |->vmeta_istype:
1224 |.if X64
1225 | mov L:RB, SAVE_L
1226 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1227 | mov CARG2d, RA
1228 | movzx CARG3d, PC_RD
1229 | mov L:CARG1d, L:RB
1230 |.else
1231 | movzx RD, PC_RD
1232 | mov ARG2, RA
1233 | mov L:RB, SAVE_L
1234 | mov ARG3, RD
1235 | mov ARG1, L:RB
1236 | mov L:RB->base, BASE
1237 |.endif
1238 | mov SAVE_PC, PC
1239 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1240 | mov BASE, L:RB->base
1241 | jmp <6
1242 |
1117 |//-- Arithmetic metamethods --------------------------------------------- 1243 |//-- Arithmetic metamethods ---------------------------------------------
1118 | 1244 |
1119 |->vmeta_arith_vno: 1245 |->vmeta_arith_vno:
@@ -1290,19 +1416,6 @@ static void build_subroutines(BuildCtx *ctx)
1290 | cmp NARGS:RD, 2+1; jb ->fff_fallback 1416 | cmp NARGS:RD, 2+1; jb ->fff_fallback
1291 |.endmacro 1417 |.endmacro
1292 | 1418 |
1293 |.macro .ffunc_n, name
1294 | .ffunc_1 name
1295 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1296 | fld qword [BASE]
1297 |.endmacro
1298 |
1299 |.macro .ffunc_n, name, op
1300 | .ffunc_1 name
1301 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1302 | op
1303 | fld qword [BASE]
1304 |.endmacro
1305 |
1306 |.macro .ffunc_nsse, name, op 1419 |.macro .ffunc_nsse, name, op
1307 | .ffunc_1 name 1420 | .ffunc_1 name
1308 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1421 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
@@ -1313,14 +1426,6 @@ static void build_subroutines(BuildCtx *ctx)
1313 | .ffunc_nsse name, movsd 1426 | .ffunc_nsse name, movsd
1314 |.endmacro 1427 |.endmacro
1315 | 1428 |
1316 |.macro .ffunc_nn, name
1317 | .ffunc_2 name
1318 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1319 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
1320 | fld qword [BASE]
1321 | fld qword [BASE+8]
1322 |.endmacro
1323 |
1324 |.macro .ffunc_nnsse, name 1429 |.macro .ffunc_nnsse, name
1325 | .ffunc_2 name 1430 | .ffunc_2 name
1326 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1431 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
@@ -1418,7 +1523,7 @@ static void build_subroutines(BuildCtx *ctx)
1418 | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. 1523 | mov dword [BASE-4], LJ_TTAB // Store metatable as default result.
1419 | mov [BASE-8], TAB:RB 1524 | mov [BASE-8], TAB:RB
1420 | mov RA, TAB:RB->hmask 1525 | mov RA, TAB:RB->hmask
1421 | and RA, STR:RC->hash 1526 | and RA, STR:RC->sid
1422 | imul RA, #NODE 1527 | imul RA, #NODE
1423 | add NODE:RA, TAB:RB->node 1528 | add NODE:RA, TAB:RB->node
1424 |3: // Rearranged logic, because we expect _not_ to find the key. 1529 |3: // Rearranged logic, because we expect _not_ to find the key.
@@ -1526,11 +1631,7 @@ static void build_subroutines(BuildCtx *ctx)
1526 |.else 1631 |.else
1527 | jae ->fff_fallback 1632 | jae ->fff_fallback
1528 |.endif 1633 |.endif
1529 |.if SSE
1530 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 1634 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1531 |.else
1532 | fld qword [BASE]; jmp ->fff_resn
1533 |.endif
1534 | 1635 |
1535 |.ffunc_1 tostring 1636 |.ffunc_1 tostring
1536 | // Only handles the string or number case inline. 1637 | // Only handles the string or number case inline.
@@ -1555,9 +1656,9 @@ static void build_subroutines(BuildCtx *ctx)
1555 |.endif 1656 |.endif
1556 | mov L:FCARG1, L:RB 1657 | mov L:FCARG1, L:RB
1557 |.if DUALNUM 1658 |.if DUALNUM
1558 | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) 1659 | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o)
1559 |.else 1660 |.else
1560 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) 1661 | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np)
1561 |.endif 1662 |.endif
1562 | // GCstr returned in eax (RD). 1663 | // GCstr returned in eax (RD).
1563 | mov BASE, L:RB->base 1664 | mov BASE, L:RB->base
@@ -1569,55 +1670,35 @@ static void build_subroutines(BuildCtx *ctx)
1569 | je >2 // Missing 2nd arg? 1670 | je >2 // Missing 2nd arg?
1570 |1: 1671 |1:
1571 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback 1672 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1572 | mov L:RB, SAVE_L
1573 | mov L:RB->base, BASE // Add frame since C call can throw.
1574 | mov L:RB->top, BASE // Dummy frame length is ok.
1575 | mov PC, [BASE-4] 1673 | mov PC, [BASE-4]
1674 | mov RB, BASE // Save BASE.
1576 |.if X64WIN 1675 |.if X64WIN
1577 | lea CARG3d, [BASE+8] 1676 | mov CARG1d, [BASE]
1578 | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. 1677 | lea CARG3d, [BASE-8]
1579 | mov CARG1d, L:RB 1678 | lea CARG2d, [BASE+8] // Caveat: CARG2d == BASE.
1580 |.elif X64 1679 |.elif X64
1581 | mov CARG2d, [BASE] 1680 | mov CARG1d, [BASE]
1582 | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. 1681 | lea CARG2d, [BASE+8]
1583 | mov CARG1d, L:RB 1682 | lea CARG3d, [BASE-8] // Caveat: CARG3d == BASE.
1584 |.else 1683 |.else
1585 | mov TAB:RD, [BASE] 1684 | mov TAB:RD, [BASE]
1586 | mov ARG2, TAB:RD 1685 | mov ARG1, TAB:RD
1587 | mov ARG1, L:RB
1588 | add BASE, 8 1686 | add BASE, 8
1687 | mov ARG2, BASE
1688 | sub BASE, 8+8
1589 | mov ARG3, BASE 1689 | mov ARG3, BASE
1590 |.endif 1690 |.endif
1591 | mov SAVE_PC, PC // Needed for ITERN fallback. 1691 | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
1592 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) 1692 | // 1=found, 0=end, -1=error returned in eax (RD).
1593 | // Flag returned in eax (RD). 1693 | mov BASE, RB // Restore BASE.
1594 | mov BASE, L:RB->base 1694 | test RD, RD; jg ->fff_res2 // Found key/value.
1595 | test RD, RD; jz >3 // End of traversal? 1695 | js ->fff_fallback_2 // Invalid key.
1596 | // Copy key and value to results. 1696 | // End of traversal: return nil.
1597 |.if X64 1697 | mov dword [BASE-4], LJ_TNIL
1598 | mov RBa, [BASE+8] 1698 | jmp ->fff_res1
1599 | mov RDa, [BASE+16]
1600 | mov [BASE-8], RBa
1601 | mov [BASE], RDa
1602 |.else
1603 | mov RB, [BASE+8]
1604 | mov RD, [BASE+12]
1605 | mov [BASE-8], RB
1606 | mov [BASE-4], RD
1607 | mov RB, [BASE+16]
1608 | mov RD, [BASE+20]
1609 | mov [BASE], RB
1610 | mov [BASE+4], RD
1611 |.endif
1612 |->fff_res2:
1613 | mov RD, 1+2
1614 | jmp ->fff_res
1615 |2: // Set missing 2nd arg to nil. 1699 |2: // Set missing 2nd arg to nil.
1616 | mov dword [BASE+12], LJ_TNIL 1700 | mov dword [BASE+12], LJ_TNIL
1617 | jmp <1 1701 | jmp <1
1618 |3: // End of traversal: return nil.
1619 | mov dword [BASE-4], LJ_TNIL
1620 | jmp ->fff_res1
1621 | 1702 |
1622 |.ffunc_1 pairs 1703 |.ffunc_1 pairs
1623 | mov TAB:RB, [BASE] 1704 | mov TAB:RB, [BASE]
@@ -1648,19 +1729,12 @@ static void build_subroutines(BuildCtx *ctx)
1648 | add RD, 1 1729 | add RD, 1
1649 | mov dword [BASE-4], LJ_TISNUM 1730 | mov dword [BASE-4], LJ_TISNUM
1650 | mov dword [BASE-8], RD 1731 | mov dword [BASE-8], RD
1651 |.elif SSE 1732 |.else
1652 | movsd xmm0, qword [BASE+8] 1733 | movsd xmm0, qword [BASE+8]
1653 | sseconst_1 xmm1, RBa 1734 | sseconst_1 xmm1, RBa
1654 | addsd xmm0, xmm1 1735 | addsd xmm0, xmm1
1655 | cvtsd2si RD, xmm0 1736 | cvttsd2si RD, xmm0
1656 | movsd qword [BASE-8], xmm0 1737 | movsd qword [BASE-8], xmm0
1657 |.else
1658 | fld qword [BASE+8]
1659 | fld1
1660 | faddp st1
1661 | fist ARG1
1662 | fstp qword [BASE-8]
1663 | mov RD, ARG1
1664 |.endif 1738 |.endif
1665 | mov TAB:RB, [BASE] 1739 | mov TAB:RB, [BASE]
1666 | cmp RD, TAB:RB->asize; jae >2 // Not in array part? 1740 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
@@ -1678,7 +1752,9 @@ static void build_subroutines(BuildCtx *ctx)
1678 | mov [BASE], RB 1752 | mov [BASE], RB
1679 | mov [BASE+4], RD 1753 | mov [BASE+4], RD
1680 |.endif 1754 |.endif
1681 | jmp ->fff_res2 1755 |->fff_res2:
1756 | mov RD, 1+2
1757 | jmp ->fff_res
1682 |2: // Check for empty hash part first. Otherwise call C function. 1758 |2: // Check for empty hash part first. Otherwise call C function.
1683 | cmp dword TAB:RB->hmask, 0; je ->fff_res0 1759 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1684 | mov FCARG1, TAB:RB 1760 | mov FCARG1, TAB:RB
@@ -1707,12 +1783,9 @@ static void build_subroutines(BuildCtx *ctx)
1707 |.if DUALNUM 1783 |.if DUALNUM
1708 | mov dword [BASE+12], LJ_TISNUM 1784 | mov dword [BASE+12], LJ_TISNUM
1709 | mov dword [BASE+8], 0 1785 | mov dword [BASE+8], 0
1710 |.elif SSE 1786 |.else
1711 | xorps xmm0, xmm0 1787 | xorps xmm0, xmm0
1712 | movsd qword [BASE+8], xmm0 1788 | movsd qword [BASE+8], xmm0
1713 |.else
1714 | fldz
1715 | fstp qword [BASE+8]
1716 |.endif 1789 |.endif
1717 | mov RD, 1+3 1790 | mov RD, 1+3
1718 | jmp ->fff_res 1791 | jmp ->fff_res
@@ -1819,7 +1892,6 @@ static void build_subroutines(BuildCtx *ctx)
1819 | mov ARG3, RA 1892 | mov ARG3, RA
1820 |.endif 1893 |.endif
1821 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) 1894 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1822 | set_vmstate INTERP
1823 | 1895 |
1824 | mov L:RB, SAVE_L 1896 | mov L:RB, SAVE_L
1825 |.if X64 1897 |.if X64
@@ -1828,6 +1900,9 @@ static void build_subroutines(BuildCtx *ctx)
1828 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. 1900 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
1829 |.endif 1901 |.endif
1830 | mov BASE, L:RB->base 1902 | mov BASE, L:RB->base
1903 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1904 | set_vmstate INTERP
1905 |
1831 | cmp eax, LUA_YIELD 1906 | cmp eax, LUA_YIELD
1832 | ja >8 1907 | ja >8
1833 |4: 1908 |4:
@@ -1942,12 +2017,10 @@ static void build_subroutines(BuildCtx *ctx)
1942 |->fff_resi: // Dummy. 2017 |->fff_resi: // Dummy.
1943 |.endif 2018 |.endif
1944 | 2019 |
1945 |.if SSE
1946 |->fff_resn: 2020 |->fff_resn:
1947 | mov PC, [BASE-4] 2021 | mov PC, [BASE-4]
1948 | fstp qword [BASE-8] 2022 | fstp qword [BASE-8]
1949 | jmp ->fff_res1 2023 | jmp ->fff_res1
1950 |.endif
1951 | 2024 |
1952 | .ffunc_1 math_abs 2025 | .ffunc_1 math_abs
1953 |.if DUALNUM 2026 |.if DUALNUM
@@ -1971,8 +2044,6 @@ static void build_subroutines(BuildCtx *ctx)
1971 |.else 2044 |.else
1972 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2045 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1973 |.endif 2046 |.endif
1974 |
1975 |.if SSE
1976 | movsd xmm0, qword [BASE] 2047 | movsd xmm0, qword [BASE]
1977 | sseconst_abs xmm1, RDa 2048 | sseconst_abs xmm1, RDa
1978 | andps xmm0, xmm1 2049 | andps xmm0, xmm1
@@ -1980,15 +2051,6 @@ static void build_subroutines(BuildCtx *ctx)
1980 | mov PC, [BASE-4] 2051 | mov PC, [BASE-4]
1981 | movsd qword [BASE-8], xmm0 2052 | movsd qword [BASE-8], xmm0
1982 | // fallthrough 2053 | // fallthrough
1983 |.else
1984 | fld qword [BASE]
1985 | fabs
1986 | // fallthrough
1987 |->fff_resxmm0: // Dummy.
1988 |->fff_resn:
1989 | mov PC, [BASE-4]
1990 | fstp qword [BASE-8]
1991 |.endif
1992 | 2054 |
1993 |->fff_res1: 2055 |->fff_res1:
1994 | mov RD, 1+1 2056 | mov RD, 1+1
@@ -2015,6 +2077,12 @@ static void build_subroutines(BuildCtx *ctx)
2015 | mov RAa, -8 // Results start at BASE+RA = BASE-8. 2077 | mov RAa, -8 // Results start at BASE+RA = BASE-8.
2016 | jmp ->vm_return 2078 | jmp ->vm_return
2017 | 2079 |
2080 |.if X64
2081 |.define fff_resfp, fff_resxmm0
2082 |.else
2083 |.define fff_resfp, fff_resn
2084 |.endif
2085 |
2018 |.macro math_round, func 2086 |.macro math_round, func
2019 | .ffunc math_ .. func 2087 | .ffunc math_ .. func
2020 |.if DUALNUM 2088 |.if DUALNUM
@@ -2025,107 +2093,75 @@ static void build_subroutines(BuildCtx *ctx)
2025 |.else 2093 |.else
2026 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2094 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2027 |.endif 2095 |.endif
2028 |.if SSE
2029 | movsd xmm0, qword [BASE] 2096 | movsd xmm0, qword [BASE]
2030 | call ->vm_ .. func 2097 | call ->vm_ .. func .. _sse
2031 | .if DUALNUM 2098 |.if DUALNUM
2032 | cvtsd2si RB, xmm0 2099 | cvttsd2si RB, xmm0
2033 | cmp RB, 0x80000000 2100 | cmp RB, 0x80000000
2034 | jne ->fff_resi 2101 | jne ->fff_resi
2035 | cvtsi2sd xmm1, RB 2102 | cvtsi2sd xmm1, RB
2036 | ucomisd xmm0, xmm1 2103 | ucomisd xmm0, xmm1
2037 | jp ->fff_resxmm0 2104 | jp ->fff_resxmm0
2038 | je ->fff_resi 2105 | je ->fff_resi
2039 | .endif
2040 | jmp ->fff_resxmm0
2041 |.else
2042 | fld qword [BASE]
2043 | call ->vm_ .. func
2044 | .if DUALNUM
2045 | fist ARG1
2046 | mov RB, ARG1
2047 | cmp RB, 0x80000000; jne >2
2048 | fdup
2049 | fild ARG1
2050 | fcomparepp
2051 | jp ->fff_resn
2052 | jne ->fff_resn
2053 |2:
2054 | fpop
2055 | jmp ->fff_resi
2056 | .else
2057 | jmp ->fff_resn
2058 | .endif
2059 |.endif 2106 |.endif
2107 | jmp ->fff_resxmm0
2060 |.endmacro 2108 |.endmacro
2061 | 2109 |
2062 | math_round floor 2110 | math_round floor
2063 | math_round ceil 2111 | math_round ceil
2064 | 2112 |
2065 |.if SSE
2066 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 2113 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
2067 |.else
2068 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
2069 |.endif
2070 | 2114 |
2071 |.ffunc math_log 2115 |.ffunc math_log
2072 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 2116 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
2073 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2117 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2074 | fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn 2118 | movsd xmm0, qword [BASE]
2075 | 2119 |.if not X64
2076 |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn 2120 | movsd FPARG1, xmm0
2077 |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn 2121 |.endif
2078 | 2122 | mov RB, BASE
2079 |.ffunc_n math_sin; fsin; jmp ->fff_resn 2123 | call extern log
2080 |.ffunc_n math_cos; fcos; jmp ->fff_resn 2124 | mov BASE, RB
2081 |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn 2125 | jmp ->fff_resfp
2082 |
2083 |.ffunc_n math_asin
2084 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan
2085 | jmp ->fff_resn
2086 |.ffunc_n math_acos
2087 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan
2088 | jmp ->fff_resn
2089 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
2090 | 2126 |
2091 |.macro math_extern, func 2127 |.macro math_extern, func
2092 |.if SSE
2093 | .ffunc_nsse math_ .. func 2128 | .ffunc_nsse math_ .. func
2094 | .if not X64 2129 |.if not X64
2095 | movsd FPARG1, xmm0 2130 | movsd FPARG1, xmm0
2096 | .endif
2097 |.else
2098 | .ffunc_n math_ .. func
2099 | fstp FPARG1
2100 |.endif 2131 |.endif
2101 | mov RB, BASE 2132 | mov RB, BASE
2102 | call extern lj_vm_ .. func 2133 | call extern func
2103 | mov BASE, RB 2134 | mov BASE, RB
2104 | .if X64 2135 | jmp ->fff_resfp
2105 | jmp ->fff_resxmm0
2106 | .else
2107 | jmp ->fff_resn
2108 | .endif
2109 |.endmacro 2136 |.endmacro
2110 | 2137 |
2138 |.macro math_extern2, func
2139 | .ffunc_nnsse math_ .. func
2140 |.if not X64
2141 | movsd FPARG1, xmm0
2142 | movsd FPARG3, xmm1
2143 |.endif
2144 | mov RB, BASE
2145 | call extern func
2146 | mov BASE, RB
2147 | jmp ->fff_resfp
2148 |.endmacro
2149 |
2150 | math_extern log10
2151 | math_extern exp
2152 | math_extern sin
2153 | math_extern cos
2154 | math_extern tan
2155 | math_extern asin
2156 | math_extern acos
2157 | math_extern atan
2111 | math_extern sinh 2158 | math_extern sinh
2112 | math_extern cosh 2159 | math_extern cosh
2113 | math_extern tanh 2160 | math_extern tanh
2161 | math_extern2 pow
2162 | math_extern2 atan2
2163 | math_extern2 fmod
2114 | 2164 |
2115 |->ff_math_deg:
2116 |.if SSE
2117 |.ffunc_nsse math_rad
2118 | mov CFUNC:RB, [BASE-8]
2119 | mulsd xmm0, qword CFUNC:RB->upvalue[0]
2120 | jmp ->fff_resxmm0
2121 |.else
2122 |.ffunc_n math_rad
2123 | mov CFUNC:RB, [BASE-8]
2124 | fmul qword CFUNC:RB->upvalue[0]
2125 | jmp ->fff_resn
2126 |.endif
2127 |
2128 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
2129 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn 2165 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
2130 | 2166 |
2131 |.ffunc_1 math_frexp 2167 |.ffunc_1 math_frexp
@@ -2140,65 +2176,34 @@ static void build_subroutines(BuildCtx *ctx)
2140 | cmp RB, 0x00200000; jb >4 2176 | cmp RB, 0x00200000; jb >4
2141 |1: 2177 |1:
2142 | shr RB, 21; sub RB, RC // Extract and unbias exponent. 2178 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
2143 |.if SSE
2144 | cvtsi2sd xmm0, RB 2179 | cvtsi2sd xmm0, RB
2145 |.else
2146 | mov TMP1, RB; fild TMP1
2147 |.endif
2148 | mov RB, [BASE-4] 2180 | mov RB, [BASE-4]
2149 | and RB, 0x800fffff // Mask off exponent. 2181 | and RB, 0x800fffff // Mask off exponent.
2150 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. 2182 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
2151 | mov [BASE-4], RB 2183 | mov [BASE-4], RB
2152 |2: 2184 |2:
2153 |.if SSE
2154 | movsd qword [BASE], xmm0 2185 | movsd qword [BASE], xmm0
2155 |.else
2156 | fstp qword [BASE]
2157 |.endif
2158 | mov RD, 1+2 2186 | mov RD, 1+2
2159 | jmp ->fff_res 2187 | jmp ->fff_res
2160 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. 2188 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
2161 |.if SSE
2162 | xorps xmm0, xmm0; jmp <2 2189 | xorps xmm0, xmm0; jmp <2
2163 |.else
2164 | fldz; jmp <2
2165 |.endif
2166 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. 2190 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
2167 |.if SSE
2168 | movsd xmm0, qword [BASE] 2191 | movsd xmm0, qword [BASE]
2169 | sseconst_hi xmm1, RBa, 43500000 // 2^54. 2192 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
2170 | mulsd xmm0, xmm1 2193 | mulsd xmm0, xmm1
2171 | movsd qword [BASE-8], xmm0 2194 | movsd qword [BASE-8], xmm0
2172 |.else
2173 | fld qword [BASE]
2174 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
2175 | fstp qword [BASE-8]
2176 |.endif
2177 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 2195 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2178 | 2196 |
2179 |.if SSE
2180 |.ffunc_nsse math_modf 2197 |.ffunc_nsse math_modf
2181 |.else
2182 |.ffunc_n math_modf
2183 |.endif
2184 | mov RB, [BASE+4] 2198 | mov RB, [BASE+4]
2185 | mov PC, [BASE-4] 2199 | mov PC, [BASE-4]
2186 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? 2200 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
2187 |.if SSE
2188 | movaps xmm4, xmm0 2201 | movaps xmm4, xmm0
2189 | call ->vm_trunc 2202 | call ->vm_trunc_sse
2190 | subsd xmm4, xmm0 2203 | subsd xmm4, xmm0
2191 |1: 2204 |1:
2192 | movsd qword [BASE-8], xmm0 2205 | movsd qword [BASE-8], xmm0
2193 | movsd qword [BASE], xmm4 2206 | movsd qword [BASE], xmm4
2194 |.else
2195 | fdup
2196 | call ->vm_trunc
2197 | fsub st1, st0
2198 |1:
2199 | fstp qword [BASE-8]
2200 | fstp qword [BASE]
2201 |.endif
2202 | mov RC, [BASE-4]; mov RB, [BASE+4] 2207 | mov RC, [BASE-4]; mov RB, [BASE+4]
2203 | xor RC, RB; js >3 // Need to adjust sign? 2208 | xor RC, RB; js >3 // Need to adjust sign?
2204 |2: 2209 |2:
@@ -2208,25 +2213,10 @@ static void build_subroutines(BuildCtx *ctx)
2208 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. 2213 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
2209 | jmp <2 2214 | jmp <2
2210 |4: 2215 |4:
2211 |.if SSE
2212 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. 2216 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
2213 |.else
2214 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
2215 |.endif
2216 |
2217 |.ffunc_nnr math_fmod
2218 |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1
2219 | fpop1
2220 | jmp ->fff_resn
2221 | 2217 |
2222 |.if SSE 2218 |.macro math_minmax, name, cmovop, sseop
2223 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0 2219 | .ffunc_1 name
2224 |.else
2225 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
2226 |.endif
2227 |
2228 |.macro math_minmax, name, cmovop, fcmovop, sseop
2229 | .ffunc name
2230 | mov RA, 2 2220 | mov RA, 2
2231 | cmp dword [BASE+4], LJ_TISNUM 2221 | cmp dword [BASE+4], LJ_TISNUM
2232 |.if DUALNUM 2222 |.if DUALNUM
@@ -2242,12 +2232,7 @@ static void build_subroutines(BuildCtx *ctx)
2242 |3: 2232 |3:
2243 | ja ->fff_fallback 2233 | ja ->fff_fallback
2244 | // Convert intermediate result to number and continue below. 2234 | // Convert intermediate result to number and continue below.
2245 |.if SSE
2246 | cvtsi2sd xmm0, RB 2235 | cvtsi2sd xmm0, RB
2247 |.else
2248 | mov TMP1, RB
2249 | fild TMP1
2250 |.endif
2251 | jmp >6 2236 | jmp >6
2252 |4: 2237 |4:
2253 | ja ->fff_fallback 2238 | ja ->fff_fallback
@@ -2255,7 +2240,6 @@ static void build_subroutines(BuildCtx *ctx)
2255 | jae ->fff_fallback 2240 | jae ->fff_fallback
2256 |.endif 2241 |.endif
2257 | 2242 |
2258 |.if SSE
2259 | movsd xmm0, qword [BASE] 2243 | movsd xmm0, qword [BASE]
2260 |5: // Handle numbers or integers. 2244 |5: // Handle numbers or integers.
2261 | cmp RA, RD; jae ->fff_resxmm0 2245 | cmp RA, RD; jae ->fff_resxmm0
@@ -2274,48 +2258,13 @@ static void build_subroutines(BuildCtx *ctx)
2274 | sseop xmm0, xmm1 2258 | sseop xmm0, xmm1
2275 | add RA, 1 2259 | add RA, 1
2276 | jmp <5 2260 | jmp <5
2277 |.else
2278 | fld qword [BASE]
2279 |5: // Handle numbers or integers.
2280 | cmp RA, RD; jae ->fff_resn
2281 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2282 |.if DUALNUM
2283 | jb >6
2284 | ja >9
2285 | fild dword [BASE+RA*8-8]
2286 | jmp >7
2287 |.else
2288 | jae >9
2289 |.endif
2290 |6:
2291 | fld qword [BASE+RA*8-8]
2292 |7:
2293 | fucomi st1; fcmovop st1; fpop1
2294 | add RA, 1
2295 | jmp <5
2296 |.endif
2297 |.endmacro 2261 |.endmacro
2298 | 2262 |
2299 | math_minmax math_min, cmovg, fcmovnbe, minsd 2263 | math_minmax math_min, cmovg, minsd
2300 | math_minmax math_max, cmovl, fcmovbe, maxsd 2264 | math_minmax math_max, cmovl, maxsd
2301 |.if not SSE
2302 |9:
2303 | fpop; jmp ->fff_fallback
2304 |.endif
2305 | 2265 |
2306 |//-- String library ----------------------------------------------------- 2266 |//-- String library -----------------------------------------------------
2307 | 2267 |
2308 |.ffunc_1 string_len
2309 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2310 | mov STR:RB, [BASE]
2311 |.if DUALNUM
2312 | mov RB, dword STR:RB->len; jmp ->fff_resi
2313 |.elif SSE
2314 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2315 |.else
2316 | fild dword STR:RB->len; jmp ->fff_resn
2317 |.endif
2318 |
2319 |.ffunc string_byte // Only handle the 1-arg case here. 2268 |.ffunc string_byte // Only handle the 1-arg case here.
2320 | cmp NARGS:RD, 1+1; jne ->fff_fallback 2269 | cmp NARGS:RD, 1+1; jne ->fff_fallback
2321 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2270 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2326,10 +2275,8 @@ static void build_subroutines(BuildCtx *ctx)
2326 | movzx RB, byte STR:RB[1] 2275 | movzx RB, byte STR:RB[1]
2327 |.if DUALNUM 2276 |.if DUALNUM
2328 | jmp ->fff_resi 2277 | jmp ->fff_resi
2329 |.elif SSE
2330 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2331 |.else 2278 |.else
2332 | mov TMP1, RB; fild TMP1; jmp ->fff_resn 2279 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2333 |.endif 2280 |.endif
2334 | 2281 |
2335 |.ffunc string_char // Only handle the 1-arg case here. 2282 |.ffunc string_char // Only handle the 1-arg case here.
@@ -2341,16 +2288,11 @@ static void build_subroutines(BuildCtx *ctx)
2341 | mov RB, dword [BASE] 2288 | mov RB, dword [BASE]
2342 | cmp RB, 255; ja ->fff_fallback 2289 | cmp RB, 255; ja ->fff_fallback
2343 | mov TMP2, RB 2290 | mov TMP2, RB
2344 |.elif SSE 2291 |.else
2345 | jae ->fff_fallback 2292 | jae ->fff_fallback
2346 | cvttsd2si RB, qword [BASE] 2293 | cvttsd2si RB, qword [BASE]
2347 | cmp RB, 255; ja ->fff_fallback 2294 | cmp RB, 255; ja ->fff_fallback
2348 | mov TMP2, RB 2295 | mov TMP2, RB
2349 |.else
2350 | jae ->fff_fallback
2351 | fld qword [BASE]
2352 | fistp TMP2
2353 | cmp TMP2, 255; ja ->fff_fallback
2354 |.endif 2296 |.endif
2355 |.if X64 2297 |.if X64
2356 | mov TMP3, 1 2298 | mov TMP3, 1
@@ -2371,6 +2313,7 @@ static void build_subroutines(BuildCtx *ctx)
2371 |.endif 2313 |.endif
2372 | mov SAVE_PC, PC 2314 | mov SAVE_PC, PC
2373 | call extern lj_str_new // (lua_State *L, char *str, size_t l) 2315 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
2316 |->fff_resstr:
2374 | // GCstr * returned in eax (RD). 2317 | // GCstr * returned in eax (RD).
2375 | mov BASE, L:RB->base 2318 | mov BASE, L:RB->base
2376 | mov PC, [BASE-4] 2319 | mov PC, [BASE-4]
@@ -2388,14 +2331,10 @@ static void build_subroutines(BuildCtx *ctx)
2388 | jne ->fff_fallback 2331 | jne ->fff_fallback
2389 | mov RB, dword [BASE+16] 2332 | mov RB, dword [BASE+16]
2390 | mov TMP2, RB 2333 | mov TMP2, RB
2391 |.elif SSE 2334 |.else
2392 | jae ->fff_fallback 2335 | jae ->fff_fallback
2393 | cvttsd2si RB, qword [BASE+16] 2336 | cvttsd2si RB, qword [BASE+16]
2394 | mov TMP2, RB 2337 | mov TMP2, RB
2395 |.else
2396 | jae ->fff_fallback
2397 | fld qword [BASE+16]
2398 | fistp TMP2
2399 |.endif 2338 |.endif
2400 |1: 2339 |1:
2401 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2340 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2410,12 +2349,8 @@ static void build_subroutines(BuildCtx *ctx)
2410 | mov RB, STR:RB->len 2349 | mov RB, STR:RB->len
2411 |.if DUALNUM 2350 |.if DUALNUM
2412 | mov RA, dword [BASE+8] 2351 | mov RA, dword [BASE+8]
2413 |.elif SSE
2414 | cvttsd2si RA, qword [BASE+8]
2415 |.else 2352 |.else
2416 | fld qword [BASE+8] 2353 | cvttsd2si RA, qword [BASE+8]
2417 | fistp ARG3
2418 | mov RA, ARG3
2419 |.endif 2354 |.endif
2420 | mov RC, TMP2 2355 | mov RC, TMP2
2421 | cmp RB, RC // len < end? (unsigned compare) 2356 | cmp RB, RC // len < end? (unsigned compare)
@@ -2459,136 +2394,34 @@ static void build_subroutines(BuildCtx *ctx)
2459 | xor RC, RC // Zero length. Any ptr in RB is ok. 2394 | xor RC, RC // Zero length. Any ptr in RB is ok.
2460 | jmp <4 2395 | jmp <4
2461 | 2396 |
2462 |.ffunc string_rep // Only handle the 1-char case inline. 2397 |.macro ffstring_op, name
2398 | .ffunc_1 string_ .. name
2463 | ffgccheck 2399 | ffgccheck
2464 | cmp NARGS:RD, 2+1; jne ->fff_fallback // Exactly 2 arguments.
2465 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2400 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2466 | cmp dword [BASE+12], LJ_TISNUM 2401 | mov L:RB, SAVE_L
2467 | mov STR:RB, [BASE] 2402 | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2468 |.if DUALNUM 2403 | mov L:RB->base, BASE
2469 | jne ->fff_fallback 2404 | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
2470 | mov RC, dword [BASE+8] 2405 | mov RCa, SBUF:FCARG1->b
2471 |.elif SSE 2406 | mov SBUF:FCARG1->L, L:RB
2472 | jae ->fff_fallback 2407 | mov SBUF:FCARG1->w, RCa
2473 | cvttsd2si RC, qword [BASE+8] 2408 | mov SAVE_PC, PC
2474 |.else 2409 | call extern lj_buf_putstr_ .. name .. @8
2475 | jae ->fff_fallback 2410 | mov FCARG1, eax
2476 | fld qword [BASE+8] 2411 | call extern lj_buf_tostr@4
2477 | fistp TMP2 2412 | jmp ->fff_resstr
2478 | mov RC, TMP2
2479 |.endif
2480 | test RC, RC
2481 | jle ->fff_emptystr // Count <= 0? (or non-int)
2482 | cmp dword STR:RB->len, 1
2483 | jb ->fff_emptystr // Zero length string?
2484 | jne ->fff_fallback_2 // Fallback for > 1-char strings.
2485 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
2486 | movzx RA, byte STR:RB[1]
2487 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2488 |.if X64
2489 | mov TMP3, RC
2490 |.else
2491 | mov ARG3, RC
2492 |.endif
2493 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2494 | mov [RB], RAL
2495 | add RB, 1
2496 | sub RC, 1
2497 | jnz <1
2498 | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2499 | jmp ->fff_newstr
2500 |
2501 |.ffunc_1 string_reverse
2502 | ffgccheck
2503 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2504 | mov STR:RB, [BASE]
2505 | mov RC, STR:RB->len
2506 | test RC, RC
2507 | jz ->fff_emptystr // Zero length string?
2508 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2509 | add RB, #STR
2510 | mov TMP2, PC // Need another temp register.
2511 |.if X64
2512 | mov TMP3, RC
2513 |.else
2514 | mov ARG3, RC
2515 |.endif
2516 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2517 |1:
2518 | movzx RA, byte [RB]
2519 | add RB, 1
2520 | sub RC, 1
2521 | mov [PC+RC], RAL
2522 | jnz <1
2523 | mov RD, PC
2524 | mov PC, TMP2
2525 | jmp ->fff_newstr
2526 |
2527 |.macro ffstring_case, name, lo, hi
2528 | .ffunc_1 name
2529 | ffgccheck
2530 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2531 | mov STR:RB, [BASE]
2532 | mov RC, STR:RB->len
2533 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2534 | add RB, #STR
2535 | mov TMP2, PC // Need another temp register.
2536 |.if X64
2537 | mov TMP3, RC
2538 |.else
2539 | mov ARG3, RC
2540 |.endif
2541 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2542 | jmp >3
2543 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
2544 | movzx RA, byte [RB+RC]
2545 | cmp RA, lo
2546 | jb >2
2547 | cmp RA, hi
2548 | ja >2
2549 | xor RA, 0x20
2550 |2:
2551 | mov [PC+RC], RAL
2552 |3:
2553 | sub RC, 1
2554 | jns <1
2555 | mov RD, PC
2556 | mov PC, TMP2
2557 | jmp ->fff_newstr
2558 |.endmacro 2413 |.endmacro
2559 | 2414 |
2560 |ffstring_case string_lower, 0x41, 0x5a 2415 |ffstring_op reverse
2561 |ffstring_case string_upper, 0x61, 0x7a 2416 |ffstring_op lower
2562 | 2417 |ffstring_op upper
2563 |//-- Table library ------------------------------------------------------
2564 |
2565 |.ffunc_1 table_getn
2566 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
2567 | mov RB, BASE // Save BASE.
2568 | mov TAB:FCARG1, [BASE]
2569 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2570 | // Length of table returned in eax (RD).
2571 | mov BASE, RB // Restore BASE.
2572 |.if DUALNUM
2573 | mov RB, RD; jmp ->fff_resi
2574 |.elif SSE
2575 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2576 |.else
2577 | mov ARG1, RD; fild ARG1; jmp ->fff_resn
2578 |.endif
2579 | 2418 |
2580 |//-- Bit library -------------------------------------------------------- 2419 |//-- Bit library --------------------------------------------------------
2581 | 2420 |
2582 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
2583 |
2584 |.macro .ffunc_bit, name, kind, fdef 2421 |.macro .ffunc_bit, name, kind, fdef
2585 | fdef name 2422 | fdef name
2586 |.if kind == 2 2423 |.if kind == 2
2587 |.if SSE
2588 | sseconst_tobit xmm1, RBa 2424 | sseconst_tobit xmm1, RBa
2589 |.else
2590 | mov TMP1, TOBIT_BIAS
2591 |.endif
2592 |.endif 2425 |.endif
2593 | cmp dword [BASE+4], LJ_TISNUM 2426 | cmp dword [BASE+4], LJ_TISNUM
2594 |.if DUALNUM 2427 |.if DUALNUM
@@ -2604,24 +2437,12 @@ static void build_subroutines(BuildCtx *ctx)
2604 |.else 2437 |.else
2605 | jae ->fff_fallback 2438 | jae ->fff_fallback
2606 |.endif 2439 |.endif
2607 |.if SSE
2608 | movsd xmm0, qword [BASE] 2440 | movsd xmm0, qword [BASE]
2609 |.if kind < 2 2441 |.if kind < 2
2610 | sseconst_tobit xmm1, RBa 2442 | sseconst_tobit xmm1, RBa
2611 |.endif 2443 |.endif
2612 | addsd xmm0, xmm1 2444 | addsd xmm0, xmm1
2613 | movd RB, xmm0 2445 | movd RB, xmm0
2614 |.else
2615 | fld qword [BASE]
2616 |.if kind < 2
2617 | mov TMP1, TOBIT_BIAS
2618 |.endif
2619 | fadd TMP1
2620 | fstp FPARG1
2621 |.if kind > 0
2622 | mov RB, ARG1
2623 |.endif
2624 |.endif
2625 |2: 2446 |2:
2626 |.endmacro 2447 |.endmacro
2627 | 2448 |
@@ -2630,15 +2451,7 @@ static void build_subroutines(BuildCtx *ctx)
2630 |.endmacro 2451 |.endmacro
2631 | 2452 |
2632 |.ffunc_bit bit_tobit, 0 2453 |.ffunc_bit bit_tobit, 0
2633 |.if DUALNUM or SSE
2634 |.if not SSE
2635 | mov RB, ARG1
2636 |.endif
2637 | jmp ->fff_resbit 2454 | jmp ->fff_resbit
2638 |.else
2639 | fild ARG1
2640 | jmp ->fff_resn
2641 |.endif
2642 | 2455 |
2643 |.macro .ffunc_bit_op, name, ins 2456 |.macro .ffunc_bit_op, name, ins
2644 | .ffunc_bit name, 2 2457 | .ffunc_bit name, 2
@@ -2658,17 +2471,10 @@ static void build_subroutines(BuildCtx *ctx)
2658 |.else 2471 |.else
2659 | jae ->fff_fallback_bit_op 2472 | jae ->fff_fallback_bit_op
2660 |.endif 2473 |.endif
2661 |.if SSE
2662 | movsd xmm0, qword [RD] 2474 | movsd xmm0, qword [RD]
2663 | addsd xmm0, xmm1 2475 | addsd xmm0, xmm1
2664 | movd RA, xmm0 2476 | movd RA, xmm0
2665 | ins RB, RA 2477 | ins RB, RA
2666 |.else
2667 | fld qword [RD]
2668 | fadd TMP1
2669 | fstp FPARG1
2670 | ins RB, ARG1
2671 |.endif
2672 | sub RD, 8 2478 | sub RD, 8
2673 | jmp <1 2479 | jmp <1
2674 |.endmacro 2480 |.endmacro
@@ -2685,15 +2491,10 @@ static void build_subroutines(BuildCtx *ctx)
2685 | not RB 2491 | not RB
2686 |.if DUALNUM 2492 |.if DUALNUM
2687 | jmp ->fff_resbit 2493 | jmp ->fff_resbit
2688 |.elif SSE 2494 |.else
2689 |->fff_resbit: 2495 |->fff_resbit:
2690 | cvtsi2sd xmm0, RB 2496 | cvtsi2sd xmm0, RB
2691 | jmp ->fff_resxmm0 2497 | jmp ->fff_resxmm0
2692 |.else
2693 |->fff_resbit:
2694 | mov ARG1, RB
2695 | fild ARG1
2696 | jmp ->fff_resn
2697 |.endif 2498 |.endif
2698 | 2499 |
2699 |->fff_fallback_bit_op: 2500 |->fff_fallback_bit_op:
@@ -2706,22 +2507,13 @@ static void build_subroutines(BuildCtx *ctx)
2706 | // Note: no inline conversion from number for 2nd argument! 2507 | // Note: no inline conversion from number for 2nd argument!
2707 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback 2508 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2708 | mov RA, dword [BASE+8] 2509 | mov RA, dword [BASE+8]
2709 |.elif SSE 2510 |.else
2710 | .ffunc_nnsse name 2511 | .ffunc_nnsse name
2711 | sseconst_tobit xmm2, RBa 2512 | sseconst_tobit xmm2, RBa
2712 | addsd xmm0, xmm2 2513 | addsd xmm0, xmm2
2713 | addsd xmm1, xmm2 2514 | addsd xmm1, xmm2
2714 | movd RB, xmm0 2515 | movd RB, xmm0
2715 | movd RA, xmm1 2516 | movd RA, xmm1
2716 |.else
2717 | .ffunc_nn name
2718 | mov TMP1, TOBIT_BIAS
2719 | fadd TMP1
2720 | fstp FPARG3
2721 | fadd TMP1
2722 | fstp FPARG1
2723 | mov RA, ARG3
2724 | mov RB, ARG1
2725 |.endif 2517 |.endif
2726 | ins RB, cl // Assumes RA is ecx. 2518 | ins RB, cl // Assumes RA is ecx.
2727 | jmp ->fff_resbit 2519 | jmp ->fff_resbit
@@ -2855,7 +2647,7 @@ static void build_subroutines(BuildCtx *ctx)
2855 | mov FCARG2, PC // Caveat: FCARG2 == BASE 2647 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2856 | mov FCARG1, L:RB 2648 | mov FCARG1, L:RB
2857 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. 2649 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2858 | call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc) 2650 | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc)
2859 |3: 2651 |3:
2860 | mov BASE, L:RB->base 2652 | mov BASE, L:RB->base
2861 |4: 2653 |4:
@@ -2926,6 +2718,79 @@ static void build_subroutines(BuildCtx *ctx)
2926 | add NARGS:RD, 1 2718 | add NARGS:RD, 1
2927 | jmp RBa 2719 | jmp RBa
2928 | 2720 |
2721 |->cont_stitch: // Trace stitching.
2722 |.if JIT
2723 | // BASE = base, RC = result, RB = mbase
2724 | mov TRACE:RA, [RB-24] // Save previous trace.
2725 | mov TMP1, TRACE:RA
2726 | mov TMP3, DISPATCH // Need one more register.
2727 | mov DISPATCH, MULTRES
2728 | movzx RA, PC_RA
2729 | lea RA, [BASE+RA*8] // Call base.
2730 | sub DISPATCH, 1
2731 | jz >2
2732 |1: // Move results down.
2733 |.if X64
2734 | mov RBa, [RC]
2735 | mov [RA], RBa
2736 |.else
2737 | mov RB, [RC]
2738 | mov [RA], RB
2739 | mov RB, [RC+4]
2740 | mov [RA+4], RB
2741 |.endif
2742 | add RC, 8
2743 | add RA, 8
2744 | sub DISPATCH, 1
2745 | jnz <1
2746 |2:
2747 | movzx RC, PC_RA
2748 | movzx RB, PC_RB
2749 | add RC, RB
2750 | lea RC, [BASE+RC*8-8]
2751 |3:
2752 | cmp RC, RA
2753 | ja >9 // More results wanted?
2754 |
2755 | mov DISPATCH, TMP3
2756 | mov TRACE:RD, TMP1 // Get previous trace.
2757 | movzx RB, word TRACE:RD->traceno
2758 | movzx RD, word TRACE:RD->link
2759 | cmp RD, RB
2760 | je ->cont_nop // Blacklisted.
2761 | test RD, RD
2762 | jne =>BC_JLOOP // Jump to stitched trace.
2763 |
2764 | // Stitch a new trace to the previous trace.
2765 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2766 | mov L:RB, SAVE_L
2767 | mov L:RB->base, BASE
2768 | mov FCARG2, PC
2769 | lea FCARG1, [DISPATCH+GG_DISP2J]
2770 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2771 | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc)
2772 | mov BASE, L:RB->base
2773 | jmp ->cont_nop
2774 |
2775 |9: // Fill up results with nil.
2776 | mov dword [RA+4], LJ_TNIL
2777 | add RA, 8
2778 | jmp <3
2779 |.endif
2780 |
2781 |->vm_profhook: // Dispatch target for profiler hook.
2782#if LJ_HASPROFILE
2783 | mov L:RB, SAVE_L
2784 | mov L:RB->base, BASE
2785 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2786 | mov FCARG1, L:RB
2787 | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc)
2788 | mov BASE, L:RB->base
2789 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2790 | sub PC, 4
2791 | jmp ->cont_nop
2792#endif
2793 |
2929 |//----------------------------------------------------------------------- 2794 |//-----------------------------------------------------------------------
2930 |//-- Trace exit handler ------------------------------------------------- 2795 |//-- Trace exit handler -------------------------------------------------
2931 |//----------------------------------------------------------------------- 2796 |//-----------------------------------------------------------------------
@@ -2978,10 +2843,9 @@ static void build_subroutines(BuildCtx *ctx)
2978 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 2843 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
2979 |.endif 2844 |.endif
2980 | // Caveat: RB is ebp. 2845 | // Caveat: RB is ebp.
2981 | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)] 2846 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2982 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] 2847 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2983 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa 2848 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2984 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
2985 | mov L:RB->base, BASE 2849 | mov L:RB->base, BASE
2986 |.if X64WIN 2850 |.if X64WIN
2987 | lea CARG2, [rsp+4*8] 2851 | lea CARG2, [rsp+4*8]
@@ -2991,6 +2855,7 @@ static void build_subroutines(BuildCtx *ctx)
2991 | lea FCARG2, [esp+16] 2855 | lea FCARG2, [esp+16]
2992 |.endif 2856 |.endif
2993 | lea FCARG1, [DISPATCH+GG_DISP2J] 2857 | lea FCARG1, [DISPATCH+GG_DISP2J]
2858 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2994 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) 2859 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
2995 | // MULTRES or negated error code returned in eax (RD). 2860 | // MULTRES or negated error code returned in eax (RD).
2996 | mov RAa, L:RB->cframe 2861 | mov RAa, L:RB->cframe
@@ -3037,12 +2902,14 @@ static void build_subroutines(BuildCtx *ctx)
3037 | mov r13, TMPa 2902 | mov r13, TMPa
3038 | mov r12, TMPQ 2903 | mov r12, TMPQ
3039 |.endif 2904 |.endif
3040 | test RD, RD; js >3 // Check for error from exit. 2905 | cmp RD, -LUA_ERRERR; jae >9 // Check for error from exit.
2906 | mov L:RB, SAVE_L
3041 | mov MULTRES, RD 2907 | mov MULTRES, RD
3042 | mov LFUNC:KBASE, [BASE-8] 2908 | mov LFUNC:KBASE, [BASE-8]
3043 | mov KBASE, LFUNC:KBASE->pc 2909 | mov KBASE, LFUNC:KBASE->pc
3044 | mov KBASE, [KBASE+PC2PROTO(k)] 2910 | mov KBASE, [KBASE+PC2PROTO(k)]
3045 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 2911 | mov L:RB->base, BASE
2912 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
3046 | set_vmstate INTERP 2913 | set_vmstate INTERP
3047 | // Modified copy of ins_next which handles function header dispatch, too. 2914 | // Modified copy of ins_next which handles function header dispatch, too.
3048 | mov RC, [PC] 2915 | mov RC, [PC]
@@ -3050,19 +2917,51 @@ static void build_subroutines(BuildCtx *ctx)
3050 | movzx OP, RCL 2917 | movzx OP, RCL
3051 | add PC, 4 2918 | add PC, 4
3052 | shr RC, 16 2919 | shr RC, 16
2920 | cmp MULTRES, -17 // Static dispatch?
2921 | je >5
3053 | cmp OP, BC_FUNCF // Function header? 2922 | cmp OP, BC_FUNCF // Function header?
3054 | jb >2 2923 | jb >3
3055 | mov RC, MULTRES // RC/RD holds nres+1. 2924 | cmp OP, BC_FUNCC+2 // Fast function?
2925 | jae >4
3056 |2: 2926 |2:
2927 | mov RC, MULTRES // RC/RD holds nres+1.
2928 |3:
3057 |.if X64 2929 |.if X64
3058 | jmp aword [DISPATCH+OP*8] 2930 | jmp aword [DISPATCH+OP*8]
3059 |.else 2931 |.else
3060 | jmp aword [DISPATCH+OP*4] 2932 | jmp aword [DISPATCH+OP*4]
3061 |.endif 2933 |.endif
3062 | 2934 |
3063 |3: // Rethrow error from the right C frame. 2935 |4: // Check frame below fast function.
2936 | mov RC, [BASE-4]
2937 | test RC, FRAME_TYPE
2938 | jnz <2 // Trace stitching continuation?
2939 | // Otherwise set KBASE for Lua function below fast function.
2940 | movzx RC, byte [RC-3]
2941 | not RCa
2942 | mov LFUNC:KBASE, [BASE+RC*8-8]
2943 | mov KBASE, LFUNC:KBASE->pc
2944 | mov KBASE, [KBASE+PC2PROTO(k)]
2945 | jmp <2
2946 |
2947 |5: // Dispatch to static entry of original ins replaced by BC_JLOOP.
2948 | mov RA, [DISPATCH+DISPATCH_J(trace)]
2949 | mov TRACE:RA, [RA+RD*4]
2950 | mov RC, TRACE:RA->startins
2951 | movzx RA, RCH
2952 | movzx OP, RCL
2953 | shr RC, 16
2954 |.if X64
2955 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC]
2956 |.else
2957 | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC]
2958 |.endif
2959 |
2960 |9: // Rethrow error from the right C frame.
2961 | mov FCARG2, RD
3064 | mov FCARG1, L:RB 2962 | mov FCARG1, L:RB
3065 | call extern lj_err_run@4 // (lua_State *L) 2963 | neg FCARG2
2964 | call extern lj_err_trace@8 // (lua_State *L, int errcode)
3066 |.endif 2965 |.endif
3067 | 2966 |
3068 |//----------------------------------------------------------------------- 2967 |//-----------------------------------------------------------------------
@@ -3070,27 +2969,18 @@ static void build_subroutines(BuildCtx *ctx)
3070 |//----------------------------------------------------------------------- 2969 |//-----------------------------------------------------------------------
3071 | 2970 |
3072 |// FP value rounding. Called by math.floor/math.ceil fast functions 2971 |// FP value rounding. Called by math.floor/math.ceil fast functions
3073 |// and from JIT code. 2972 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3074 | 2973 |.macro vm_round, name, mode, cond
3075 |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. 2974 |->name:
3076 |.macro vm_round_x87, mode1, mode2 2975 |.if not X64 and cond
3077 | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2. 2976 | movsd xmm0, qword [esp+4]
3078 | mov [esp+8], eax 2977 | call ->name .. _sse
3079 | mov ax, mode1 2978 | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
3080 | or ax, [esp+4] 2979 | fld qword [esp+4]
3081 |.if mode2 ~= 0xffff
3082 | and ax, mode2
3083 |.endif
3084 | mov [esp+6], ax
3085 | fldcw word [esp+6]
3086 | frndint
3087 | fldcw word [esp+4]
3088 | mov eax, [esp+8]
3089 | ret 2980 | ret
3090 |.endmacro 2981 |.endif
3091 | 2982 |
3092 |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. 2983 |->name .. _sse:
3093 |.macro vm_round_sse, mode
3094 | sseconst_abs xmm2, RDa 2984 | sseconst_abs xmm2, RDa
3095 | sseconst_2p52 xmm3, RDa 2985 | sseconst_2p52 xmm3, RDa
3096 | movaps xmm1, xmm0 2986 | movaps xmm1, xmm0
@@ -3128,22 +3018,12 @@ static void build_subroutines(BuildCtx *ctx)
3128 | ret 3018 | ret
3129 |.endmacro 3019 |.endmacro
3130 | 3020 |
3131 |.macro vm_round, name, ssemode, mode1, mode2 3021 | vm_round vm_floor, 0, 1
3132 |->name: 3022 | vm_round vm_ceil, 1, JIT
3133 |.if not SSE 3023 | vm_round vm_trunc, 2, JIT
3134 | vm_round_x87 mode1, mode2
3135 |.endif
3136 |->name .. _sse:
3137 | vm_round_sse ssemode
3138 |.endmacro
3139 |
3140 | vm_round vm_floor, 0, 0x0400, 0xf7ff
3141 | vm_round vm_ceil, 1, 0x0800, 0xfbff
3142 | vm_round vm_trunc, 2, 0x0c00, 0xffff
3143 | 3024 |
3144 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 3025 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
3145 |->vm_mod: 3026 |->vm_mod:
3146 |.if SSE
3147 |// Args in xmm0/xmm1, return value in xmm0. 3027 |// Args in xmm0/xmm1, return value in xmm0.
3148 |// Caveat: xmm0-xmm5 and RC (eax) modified! 3028 |// Caveat: xmm0-xmm5 and RC (eax) modified!
3149 | movaps xmm5, xmm0 3029 | movaps xmm5, xmm0
@@ -3171,488 +3051,6 @@ static void build_subroutines(BuildCtx *ctx)
3171 | movaps xmm0, xmm5 3051 | movaps xmm0, xmm5
3172 | subsd xmm0, xmm1 3052 | subsd xmm0, xmm1
3173 | ret 3053 | ret
3174 |.else
3175 |// Args/ret on x87 stack (y on top). No xmm registers modified.
3176 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
3177 | fld st1
3178 | fdiv st1
3179 | fnstcw word [esp+4]
3180 | mov ax, 0x0400
3181 | or ax, [esp+4]
3182 | and ax, 0xf7ff
3183 | mov [esp+6], ax
3184 | fldcw word [esp+6]
3185 | frndint
3186 | fldcw word [esp+4]
3187 | fmulp st1
3188 | fsubp st1
3189 | ret
3190 |.endif
3191 |
3192 |// FP log2(x). Called by math.log(x, base).
3193 |->vm_log2:
3194 |.if X64WIN
3195 | movsd qword [rsp+8], xmm0 // Use scratch area.
3196 | fld1
3197 | fld qword [rsp+8]
3198 | fyl2x
3199 | fstp qword [rsp+8]
3200 | movsd xmm0, qword [rsp+8]
3201 |.elif X64
3202 | movsd qword [rsp-8], xmm0 // Use red zone.
3203 | fld1
3204 | fld qword [rsp-8]
3205 | fyl2x
3206 | fstp qword [rsp-8]
3207 | movsd xmm0, qword [rsp-8]
3208 |.else
3209 | fld1
3210 | fld qword [esp+4]
3211 | fyl2x
3212 |.endif
3213 | ret
3214 |
3215 |// FP exponentiation e^x and 2^x. Called by math.exp fast function and
3216 |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
3217 |// Caveat: needs 3 slots on x87 stack!
3218 |->vm_exp_x87:
3219 | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e))
3220 |->vm_exp2_x87:
3221 | .if X64WIN
3222 | .define expscratch, dword [rsp+8] // Use scratch area.
3223 | .elif X64
3224 | .define expscratch, dword [rsp-8] // Use red zone.
3225 | .else
3226 | .define expscratch, dword [esp+4] // Needs 4 byte scratch area.
3227 | .endif
3228 | fst expscratch // Caveat: overwrites ARG1.
3229 | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf
3230 | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0
3231 |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
3232 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
3233 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
3234 |1:
3235 | ret
3236 |2:
3237 | fpop; fldz; ret
3238 |
3239 |// Generic power function x^y. Called by BC_POW, math.pow fast function,
3240 |// and vm_arith.
3241 |// Args/ret on x87 stack (y on top). RC (eax) modified.
3242 |// Caveat: needs 3 slots on x87 stack!
3243 |->vm_pow:
3244 |.if not SSE
3245 | fist dword [esp+4] // Store/reload int before comparison.
3246 | fild dword [esp+4] // Integral exponent used in vm_powi.
3247 | fucomip st1
3248 | jnz >8 // Branch for FP exponents.
3249 | jp >9 // Branch for NaN exponent.
3250 | fpop // Pop y and fallthrough to vm_powi.
3251 |
3252 |// FP/int power function x^i. Arg1/ret on x87 stack.
3253 |// Arg2 (int) on C stack. RC (eax) modified.
3254 |// Caveat: needs 2 slots on x87 stack!
3255 | mov eax, [esp+4]
3256 | cmp eax, 1; jle >6 // i<=1?
3257 | // Now 1 < (unsigned)i <= 0x80000000.
3258 |1: // Handle leading zeros.
3259 | test eax, 1; jnz >2
3260 | fmul st0
3261 | shr eax, 1
3262 | jmp <1
3263 |2:
3264 | shr eax, 1; jz >5
3265 | fdup
3266 |3: // Handle trailing bits.
3267 | fmul st0
3268 | shr eax, 1; jz >4
3269 | jnc <3
3270 | fmul st1, st0
3271 | jmp <3
3272 |4:
3273 | fmulp st1
3274 |5:
3275 | ret
3276 |6:
3277 | je <5 // x^1 ==> x
3278 | jb >7
3279 | fld1; fdivrp st1
3280 | neg eax
3281 | cmp eax, 1; je <5 // x^-1 ==> 1/x
3282 | jmp <1 // x^-i ==> (1/x)^i
3283 |7:
3284 | fpop; fld1 // x^0 ==> 1
3285 | ret
3286 |
3287 |8: // FP/FP power function x^y.
3288 | fst dword [esp+4]
3289 | fxch
3290 | fst dword [esp+8]
3291 | mov eax, [esp+4]; shl eax, 1
3292 | cmp eax, 0xff000000; je >2 // x^+-Inf?
3293 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3294 | cmp eax, 0xff000000; je >4 // +-Inf^y?
3295 | fyl2x
3296 | jmp ->vm_exp2raw
3297 |
3298 |9: // Handle x^NaN.
3299 | fld1
3300 | fucomip st2
3301 | je >1 // 1^NaN ==> 1
3302 | fxch // x^NaN ==> NaN
3303 |1:
3304 | fpop
3305 | ret
3306 |
3307 |2: // Handle x^+-Inf.
3308 | fabs
3309 | fld1
3310 | fucomip st1
3311 | je >3 // +-1^+-Inf ==> 1
3312 | fpop; fabs; fldz; mov eax, 0; setc al
3313 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
3314 | fxch
3315 |3:
3316 | fpop1; fabs
3317 | ret
3318 |
3319 |4: // Handle +-0^y or +-Inf^y.
3320 | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
3321 | fpop; fpop
3322 | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
3323 | fldz // y < 0, +-Inf^y ==> 0
3324 | ret
3325 |5:
3326 | mov dword [esp+4], 0x7f800000 // Return +Inf.
3327 | fld dword [esp+4]
3328 | ret
3329 |.endif
3330 |
3331 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
3332 |// Needs 16 byte scratch area for x86. Also called from JIT code.
3333 |->vm_pow_sse:
3334 | cvtsd2si eax, xmm1
3335 | cvtsi2sd xmm2, eax
3336 | ucomisd xmm1, xmm2
3337 | jnz >8 // Branch for FP exponents.
3338 | jp >9 // Branch for NaN exponent.
3339 | // Fallthrough to vm_powi_sse.
3340 |
3341 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
3342 |->vm_powi_sse:
3343 | cmp eax, 1; jle >6 // i<=1?
3344 | // Now 1 < (unsigned)i <= 0x80000000.
3345 |1: // Handle leading zeros.
3346 | test eax, 1; jnz >2
3347 | mulsd xmm0, xmm0
3348 | shr eax, 1
3349 | jmp <1
3350 |2:
3351 | shr eax, 1; jz >5
3352 | movaps xmm1, xmm0
3353 |3: // Handle trailing bits.
3354 | mulsd xmm0, xmm0
3355 | shr eax, 1; jz >4
3356 | jnc <3
3357 | mulsd xmm1, xmm0
3358 | jmp <3
3359 |4:
3360 | mulsd xmm0, xmm1
3361 |5:
3362 | ret
3363 |6:
3364 | je <5 // x^1 ==> x
3365 | jb >7 // x^0 ==> 1
3366 | neg eax
3367 | call <1
3368 | sseconst_1 xmm1, RDa
3369 | divsd xmm1, xmm0
3370 | movaps xmm0, xmm1
3371 | ret
3372 |7:
3373 | sseconst_1 xmm0, RDa
3374 | ret
3375 |
3376 |8: // FP/FP power function x^y.
3377 |.if X64
3378 | movd rax, xmm1; shl rax, 1
3379 | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf?
3380 | movd rax, xmm0; shl rax, 1; je >4 // +-0^y?
3381 | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y?
3382 | .if X64WIN
3383 | movsd qword [rsp+16], xmm1 // Use scratch area.
3384 | movsd qword [rsp+8], xmm0
3385 | fld qword [rsp+16]
3386 | fld qword [rsp+8]
3387 | .else
3388 | movsd qword [rsp-16], xmm1 // Use red zone.
3389 | movsd qword [rsp-8], xmm0
3390 | fld qword [rsp-16]
3391 | fld qword [rsp-8]
3392 | .endif
3393 |.else
3394 | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area.
3395 | movsd qword [esp+4], xmm0
3396 | cmp dword [esp+12], 0; jne >1
3397 | mov eax, [esp+16]; shl eax, 1
3398 | cmp eax, 0xffe00000; je >2 // x^+-Inf?
3399 |1:
3400 | cmp dword [esp+4], 0; jne >1
3401 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3402 | cmp eax, 0xffe00000; je >5 // +-Inf^y?
3403 |1:
3404 | fld qword [esp+12]
3405 | fld qword [esp+4]
3406 |.endif
3407 | fyl2x // y*log2(x)
3408 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
3409 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
3410 |.if X64WIN
3411 | fstp qword [rsp+8] // Use scratch area.
3412 | movsd xmm0, qword [rsp+8]
3413 |.elif X64
3414 | fstp qword [rsp-8] // Use red zone.
3415 | movsd xmm0, qword [rsp-8]
3416 |.else
3417 | fstp qword [esp+4] // Needs 8 byte scratch area.
3418 | movsd xmm0, qword [esp+4]
3419 |.endif
3420 | ret
3421 |
3422 |9: // Handle x^NaN.
3423 | sseconst_1 xmm2, RDa
3424 | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1
3425 | movaps xmm0, xmm1 // x^NaN ==> NaN
3426 |1:
3427 | ret
3428 |
3429 |2: // Handle x^+-Inf.
3430 | sseconst_abs xmm2, RDa
3431 | andpd xmm0, xmm2 // |x|
3432 | sseconst_1 xmm2, RDa
3433 | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1
3434 | movmskpd eax, xmm1
3435 | xorps xmm0, xmm0
3436 | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0
3437 |3:
3438 | sseconst_hi xmm0, RDa, 7ff00000 // +Inf
3439 | ret
3440 |
3441 |4: // Handle +-0^y.
3442 | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf
3443 | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0
3444 | ret
3445 |
3446 |5: // Handle +-Inf^y.
3447 | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf
3448 | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0
3449 | ret
3450 |
3451 |// Callable from C: double lj_vm_foldfpm(double x, int fpm)
3452 |// Computes fpm(x) for extended math functions. ORDER FPM.
3453 |->vm_foldfpm:
3454 |.if JIT
3455 |.if X64
3456 | .if X64WIN
3457 | .define fpmop, CARG2d
3458 | .else
3459 | .define fpmop, CARG1d
3460 | .endif
3461 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3462 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3463 | sqrtsd xmm0, xmm0; ret
3464 |2:
3465 | .if X64WIN
3466 | movsd qword [rsp+8], xmm0 // Use scratch area.
3467 | fld qword [rsp+8]
3468 | .else
3469 | movsd qword [rsp-8], xmm0 // Use red zone.
3470 | fld qword [rsp-8]
3471 | .endif
3472 | cmp fpmop, 5; ja >2
3473 | .if X64WIN; pop rax; .endif
3474 | je >1
3475 | call ->vm_exp_x87
3476 | .if X64WIN; push rax; .endif
3477 | jmp >7
3478 |1:
3479 | call ->vm_exp2_x87
3480 | .if X64WIN; push rax; .endif
3481 | jmp >7
3482 |2: ; cmp fpmop, 7; je >1; ja >2
3483 | fldln2; fxch; fyl2x; jmp >7
3484 |1: ; fld1; fxch; fyl2x; jmp >7
3485 |2: ; cmp fpmop, 9; je >1; ja >2
3486 | fldlg2; fxch; fyl2x; jmp >7
3487 |1: ; fsin; jmp >7
3488 |2: ; cmp fpmop, 11; je >1; ja >9
3489 | fcos; jmp >7
3490 |1: ; fptan; fpop
3491 |7:
3492 | .if X64WIN
3493 | fstp qword [rsp+8] // Use scratch area.
3494 | movsd xmm0, qword [rsp+8]
3495 | .else
3496 | fstp qword [rsp-8] // Use red zone.
3497 | movsd xmm0, qword [rsp-8]
3498 | .endif
3499 | ret
3500 |.else // x86 calling convention.
3501 | .define fpmop, eax
3502 |.if SSE
3503 | mov fpmop, [esp+12]
3504 | movsd xmm0, qword [esp+4]
3505 | cmp fpmop, 1; je >1; ja >2
3506 | call ->vm_floor; jmp >7
3507 |1: ; call ->vm_ceil; jmp >7
3508 |2: ; cmp fpmop, 3; je >1; ja >2
3509 | call ->vm_trunc; jmp >7
3510 |1:
3511 | sqrtsd xmm0, xmm0
3512 |7:
3513 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3514 | fld qword [esp+4]
3515 | ret
3516 |2: ; fld qword [esp+4]
3517 | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3518 |2: ; cmp fpmop, 7; je >1; ja >2
3519 | fldln2; fxch; fyl2x; ret
3520 |1: ; fld1; fxch; fyl2x; ret
3521 |2: ; cmp fpmop, 9; je >1; ja >2
3522 | fldlg2; fxch; fyl2x; ret
3523 |1: ; fsin; ret
3524 |2: ; cmp fpmop, 11; je >1; ja >9
3525 | fcos; ret
3526 |1: ; fptan; fpop; ret
3527 |.else
3528 | mov fpmop, [esp+12]
3529 | fld qword [esp+4]
3530 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3531 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3532 | fsqrt; ret
3533 |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3534 | cmp fpmop, 7; je >1; ja >2
3535 | fldln2; fxch; fyl2x; ret
3536 |1: ; fld1; fxch; fyl2x; ret
3537 |2: ; cmp fpmop, 9; je >1; ja >2
3538 | fldlg2; fxch; fyl2x; ret
3539 |1: ; fsin; ret
3540 |2: ; cmp fpmop, 11; je >1; ja >9
3541 | fcos; ret
3542 |1: ; fptan; fpop; ret
3543 |.endif
3544 |.endif
3545 |9: ; int3 // Bad fpm.
3546 |.endif
3547 |
3548 |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
3549 |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
3550 |// and basic math functions. ORDER ARITH
3551 |->vm_foldarith:
3552 |.if X64
3553 |
3554 | .if X64WIN
3555 | .define foldop, CARG3d
3556 | .else
3557 | .define foldop, CARG1d
3558 | .endif
3559 | cmp foldop, 1; je >1; ja >2
3560 | addsd xmm0, xmm1; ret
3561 |1: ; subsd xmm0, xmm1; ret
3562 |2: ; cmp foldop, 3; je >1; ja >2
3563 | mulsd xmm0, xmm1; ret
3564 |1: ; divsd xmm0, xmm1; ret
3565 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow
3566 | cmp foldop, 7; je >1; ja >2
3567 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
3568 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
3569 |2: ; cmp foldop, 9; ja >2
3570 |.if X64WIN
3571 | movsd qword [rsp+8], xmm0 // Use scratch area.
3572 | movsd qword [rsp+16], xmm1
3573 | fld qword [rsp+8]
3574 | fld qword [rsp+16]
3575 |.else
3576 | movsd qword [rsp-8], xmm0 // Use red zone.
3577 | movsd qword [rsp-16], xmm1
3578 | fld qword [rsp-8]
3579 | fld qword [rsp-16]
3580 |.endif
3581 | je >1
3582 | fpatan
3583 |7:
3584 |.if X64WIN
3585 | fstp qword [rsp+8] // Use scratch area.
3586 | movsd xmm0, qword [rsp+8]
3587 |.else
3588 | fstp qword [rsp-8] // Use red zone.
3589 | movsd xmm0, qword [rsp-8]
3590 |.endif
3591 | ret
3592 |1: ; fxch; fscale; fpop1; jmp <7
3593 |2: ; cmp foldop, 11; je >1; ja >9
3594 | minsd xmm0, xmm1; ret
3595 |1: ; maxsd xmm0, xmm1; ret
3596 |9: ; int3 // Bad op.
3597 |
3598 |.elif SSE // x86 calling convention with SSE ops.
3599 |
3600 | .define foldop, eax
3601 | mov foldop, [esp+20]
3602 | movsd xmm0, qword [esp+4]
3603 | movsd xmm1, qword [esp+12]
3604 | cmp foldop, 1; je >1; ja >2
3605 | addsd xmm0, xmm1
3606 |7:
3607 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3608 | fld qword [esp+4]
3609 | ret
3610 |1: ; subsd xmm0, xmm1; jmp <7
3611 |2: ; cmp foldop, 3; je >1; ja >2
3612 | mulsd xmm0, xmm1; jmp <7
3613 |1: ; divsd xmm0, xmm1; jmp <7
3614 |2: ; cmp foldop, 5
3615 | je >1; ja >2
3616 | call ->vm_mod; jmp <7
3617 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area.
3618 |2: ; cmp foldop, 7; je >1; ja >2
3619 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
3620 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
3621 |2: ; cmp foldop, 9; ja >2
3622 | fld qword [esp+4] // Reload from stack
3623 | fld qword [esp+12]
3624 | je >1
3625 | fpatan; ret
3626 |1: ; fxch; fscale; fpop1; ret
3627 |2: ; cmp foldop, 11; je >1; ja >9
3628 | minsd xmm0, xmm1; jmp <7
3629 |1: ; maxsd xmm0, xmm1; jmp <7
3630 |9: ; int3 // Bad op.
3631 |
3632 |.else // x86 calling convention with x87 ops.
3633 |
3634 | mov eax, [esp+20]
3635 | fld qword [esp+4]
3636 | fld qword [esp+12]
3637 | cmp eax, 1; je >1; ja >2
3638 | faddp st1; ret
3639 |1: ; fsubp st1; ret
3640 |2: ; cmp eax, 3; je >1; ja >2
3641 | fmulp st1; ret
3642 |1: ; fdivp st1; ret
3643 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
3644 | cmp eax, 7; je >1; ja >2
3645 | fpop; fchs; ret
3646 |1: ; fpop; fabs; ret
3647 |2: ; cmp eax, 9; je >1; ja >2
3648 | fpatan; ret
3649 |1: ; fxch; fscale; fpop1; ret
3650 |2: ; cmp eax, 11; je >1; ja >9
3651 | fucomi st1; fcmovnbe st1; fpop1; ret
3652 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
3653 |9: ; int3 // Bad op.
3654 |
3655 |.endif
3656 | 3054 |
3657 |//----------------------------------------------------------------------- 3055 |//-----------------------------------------------------------------------
3658 |//-- Miscellaneous functions -------------------------------------------- 3056 |//-- Miscellaneous functions --------------------------------------------
@@ -3664,6 +3062,7 @@ static void build_subroutines(BuildCtx *ctx)
3664 | mov eax, CARG1d 3062 | mov eax, CARG1d
3665 | .if X64WIN; push rsi; mov rsi, CARG2; .endif 3063 | .if X64WIN; push rsi; mov rsi, CARG2; .endif
3666 | push rbx 3064 | push rbx
3065 | xor ecx, ecx
3667 | cpuid 3066 | cpuid
3668 | mov [rsi], eax 3067 | mov [rsi], eax
3669 | mov [rsi+4], ebx 3068 | mov [rsi+4], ebx
@@ -3687,6 +3086,7 @@ static void build_subroutines(BuildCtx *ctx)
3687 | mov eax, [esp+4] // Argument 1 is function number. 3086 | mov eax, [esp+4] // Argument 1 is function number.
3688 | push edi 3087 | push edi
3689 | push ebx 3088 | push ebx
3089 | xor ecx, ecx
3690 | cpuid 3090 | cpuid
3691 | mov edi, [esp+16] // Argument 2 is result area. 3091 | mov edi, [esp+16] // Argument 2 is result area.
3692 | mov [edi], eax 3092 | mov [edi], eax
@@ -3699,6 +3099,86 @@ static void build_subroutines(BuildCtx *ctx)
3699 | ret 3099 | ret
3700 |.endif 3100 |.endif
3701 | 3101 |
3102 |.define NEXT_TAB, TAB:FCARG1
3103 |.define NEXT_IDX, FCARG2
3104 |.define NEXT_PTR, RCa
3105 |.define NEXT_PTRd, RC
3106 |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro
3107 |.if X64
3108 |.define NEXT_TMP, CARG3d
3109 |.define NEXT_TMPq, CARG3
3110 |.define NEXT_ASIZE, CARG4d
3111 |.macro NEXT_ENTER; .endmacro
3112 |.macro NEXT_LEAVE; ret; .endmacro
3113 |.if X64WIN
3114 |.define NEXT_RES_PTR, [rsp+aword*5]
3115 |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro
3116 |.else
3117 |.define NEXT_RES_PTR, [rsp+aword*1]
3118 |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro
3119 |.endif
3120 |.else
3121 |.define NEXT_ASIZE, esi
3122 |.define NEXT_TMP, edi
3123 |.macro NEXT_ENTER; push esi; push edi; .endmacro
3124 |.macro NEXT_LEAVE; pop edi; pop esi; ret; .endmacro
3125 |.define NEXT_RES_PTR, [esp+dword*3]
3126 |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro
3127 |.endif
3128 |
3129 |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
3130 |// Next idx returned in edx.
3131 |->vm_next:
3132 |.if JIT
3133 | NEXT_ENTER
3134 | mov NEXT_ASIZE, NEXT_TAB->asize
3135 |1: // Traverse array part.
3136 | cmp NEXT_IDX, NEXT_ASIZE; jae >5
3137 | mov NEXT_TMP, NEXT_TAB->array
3138 | cmp dword [NEXT_TMP+NEXT_IDX*8+4], LJ_TNIL; je >2
3139 | lea NEXT_PTR, NEXT_RES_PTR
3140 |.if X64
3141 | mov NEXT_TMPq, qword [NEXT_TMP+NEXT_IDX*8]
3142 | mov qword [NEXT_PTR], NEXT_TMPq
3143 |.else
3144 | mov NEXT_ASIZE, dword [NEXT_TMP+NEXT_IDX*8+4]
3145 | mov NEXT_TMP, dword [NEXT_TMP+NEXT_IDX*8]
3146 | mov dword [NEXT_PTR+4], NEXT_ASIZE
3147 | mov dword [NEXT_PTR], NEXT_TMP
3148 |.endif
3149 |.if DUALNUM
3150 | mov dword [NEXT_PTR+dword*3], LJ_TISNUM
3151 | mov dword [NEXT_PTR+dword*2], NEXT_IDX
3152 |.else
3153 | cvtsi2sd xmm0, NEXT_IDX
3154 | movsd qword [NEXT_PTR+dword*2], xmm0
3155 |.endif
3156 | NEXT_RES_IDX 1
3157 | NEXT_LEAVE
3158 |2: // Skip holes in array part.
3159 | add NEXT_IDX, 1
3160 | jmp <1
3161 |
3162 |5: // Traverse hash part.
3163 | sub NEXT_IDX, NEXT_ASIZE
3164 |6:
3165 | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9
3166 | imul NEXT_PTRd, NEXT_IDX, #NODE
3167 | add NODE:NEXT_PTRd, dword NEXT_TAB->node
3168 | cmp dword NODE:NEXT_PTR->val.it, LJ_TNIL; je >7
3169 | NEXT_RES_IDXL NEXT_ASIZE+1
3170 | NEXT_LEAVE
3171 |7: // Skip holes in hash part.
3172 | add NEXT_IDX, 1
3173 | jmp <6
3174 |
3175 |9: // End of iteration. Set the key to nil (not the value).
3176 | NEXT_RES_IDX NEXT_ASIZE
3177 | lea NEXT_PTR, NEXT_RES_PTR
3178 | mov dword [NEXT_PTR+dword*3], LJ_TNIL
3179 | NEXT_LEAVE
3180 |.endif
3181 |
3702 |//----------------------------------------------------------------------- 3182 |//-----------------------------------------------------------------------
3703 |//-- Assertions --------------------------------------------------------- 3183 |//-- Assertions ---------------------------------------------------------
3704 |//----------------------------------------------------------------------- 3184 |//-----------------------------------------------------------------------
@@ -3964,19 +3444,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3964 | // RA is a number. 3444 | // RA is a number.
3965 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp 3445 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3966 | // RA is a number, RD is an integer. 3446 | // RA is a number, RD is an integer.
3967 |.if SSE
3968 | cvtsi2sd xmm0, dword [BASE+RD*8] 3447 | cvtsi2sd xmm0, dword [BASE+RD*8]
3969 | jmp >2 3448 | jmp >2
3970 |.else
3971 | fld qword [BASE+RA*8]
3972 | fild dword [BASE+RD*8]
3973 | jmp >3
3974 |.endif
3975 | 3449 |
3976 |8: // RA is an integer, RD is not an integer. 3450 |8: // RA is an integer, RD is not an integer.
3977 | ja ->vmeta_comp 3451 | ja ->vmeta_comp
3978 | // RA is an integer, RD is a number. 3452 | // RA is an integer, RD is a number.
3979 |.if SSE
3980 | cvtsi2sd xmm1, dword [BASE+RA*8] 3453 | cvtsi2sd xmm1, dword [BASE+RA*8]
3981 | movsd xmm0, qword [BASE+RD*8] 3454 | movsd xmm0, qword [BASE+RD*8]
3982 | add PC, 4 3455 | add PC, 4
@@ -3984,29 +3457,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3984 | jmp_comp jbe, ja, jb, jae, <9 3457 | jmp_comp jbe, ja, jb, jae, <9
3985 | jmp <6 3458 | jmp <6
3986 |.else 3459 |.else
3987 | fild dword [BASE+RA*8]
3988 | jmp >2
3989 |.endif
3990 |.else
3991 | checknum RA, ->vmeta_comp 3460 | checknum RA, ->vmeta_comp
3992 | checknum RD, ->vmeta_comp 3461 | checknum RD, ->vmeta_comp
3993 |.endif 3462 |.endif
3994 |.if SSE
3995 |1: 3463 |1:
3996 | movsd xmm0, qword [BASE+RD*8] 3464 | movsd xmm0, qword [BASE+RD*8]
3997 |2: 3465 |2:
3998 | add PC, 4 3466 | add PC, 4
3999 | ucomisd xmm0, qword [BASE+RA*8] 3467 | ucomisd xmm0, qword [BASE+RA*8]
4000 |3: 3468 |3:
4001 |.else
4002 |1:
4003 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
4004 |2:
4005 | fld qword [BASE+RD*8]
4006 |3:
4007 | add PC, 4
4008 | fcomparepp
4009 |.endif
4010 | // Unordered: all of ZF CF PF set, ordered: PF clear. 3469 | // Unordered: all of ZF CF PF set, ordered: PF clear.
4011 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 3470 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
4012 |.if DUALNUM 3471 |.if DUALNUM
@@ -4046,43 +3505,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4046 | // RD is a number. 3505 | // RD is a number.
4047 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 3506 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
4048 | // RD is a number, RA is an integer. 3507 | // RD is a number, RA is an integer.
4049 |.if SSE
4050 | cvtsi2sd xmm0, dword [BASE+RA*8] 3508 | cvtsi2sd xmm0, dword [BASE+RA*8]
4051 |.else
4052 | fild dword [BASE+RA*8]
4053 |.endif
4054 | jmp >2 3509 | jmp >2
4055 | 3510 |
4056 |8: // RD is an integer, RA is not an integer. 3511 |8: // RD is an integer, RA is not an integer.
4057 | ja >5 3512 | ja >5
4058 | // RD is an integer, RA is a number. 3513 | // RD is an integer, RA is a number.
4059 |.if SSE
4060 | cvtsi2sd xmm0, dword [BASE+RD*8] 3514 | cvtsi2sd xmm0, dword [BASE+RD*8]
4061 | ucomisd xmm0, qword [BASE+RA*8] 3515 | ucomisd xmm0, qword [BASE+RA*8]
4062 |.else
4063 | fild dword [BASE+RD*8]
4064 | fld qword [BASE+RA*8]
4065 |.endif
4066 | jmp >4 3516 | jmp >4
4067 | 3517 |
4068 |.else 3518 |.else
4069 | cmp RB, LJ_TISNUM; jae >5 3519 | cmp RB, LJ_TISNUM; jae >5
4070 | checknum RA, >5 3520 | checknum RA, >5
4071 |.endif 3521 |.endif
4072 |.if SSE
4073 |1: 3522 |1:
4074 | movsd xmm0, qword [BASE+RA*8] 3523 | movsd xmm0, qword [BASE+RA*8]
4075 |2: 3524 |2:
4076 | ucomisd xmm0, qword [BASE+RD*8] 3525 | ucomisd xmm0, qword [BASE+RD*8]
4077 |4: 3526 |4:
4078 |.else
4079 |1:
4080 | fld qword [BASE+RA*8]
4081 |2:
4082 | fld qword [BASE+RD*8]
4083 |4:
4084 | fcomparepp
4085 |.endif
4086 iseqne_fp: 3527 iseqne_fp:
4087 if (vk) { 3528 if (vk) {
4088 | jp >2 // Unordered means not equal. 3529 | jp >2 // Unordered means not equal.
@@ -4205,39 +3646,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4205 | // RA is a number. 3646 | // RA is a number.
4206 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 3647 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
4207 | // RA is a number, RD is an integer. 3648 | // RA is a number, RD is an integer.
4208 |.if SSE
4209 | cvtsi2sd xmm0, dword [KBASE+RD*8] 3649 | cvtsi2sd xmm0, dword [KBASE+RD*8]
4210 |.else
4211 | fild dword [KBASE+RD*8]
4212 |.endif
4213 | jmp >2 3650 | jmp >2
4214 | 3651 |
4215 |8: // RA is an integer, RD is a number. 3652 |8: // RA is an integer, RD is a number.
4216 |.if SSE
4217 | cvtsi2sd xmm0, dword [BASE+RA*8] 3653 | cvtsi2sd xmm0, dword [BASE+RA*8]
4218 | ucomisd xmm0, qword [KBASE+RD*8] 3654 | ucomisd xmm0, qword [KBASE+RD*8]
4219 |.else
4220 | fild dword [BASE+RA*8]
4221 | fld qword [KBASE+RD*8]
4222 |.endif
4223 | jmp >4 3655 | jmp >4
4224 |.else 3656 |.else
4225 | cmp RB, LJ_TISNUM; jae >3 3657 | cmp RB, LJ_TISNUM; jae >3
4226 |.endif 3658 |.endif
4227 |.if SSE
4228 |1: 3659 |1:
4229 | movsd xmm0, qword [KBASE+RD*8] 3660 | movsd xmm0, qword [KBASE+RD*8]
4230 |2: 3661 |2:
4231 | ucomisd xmm0, qword [BASE+RA*8] 3662 | ucomisd xmm0, qword [BASE+RA*8]
4232 |4: 3663 |4:
4233 |.else
4234 |1:
4235 | fld qword [KBASE+RD*8]
4236 |2:
4237 | fld qword [BASE+RA*8]
4238 |4:
4239 | fcomparepp
4240 |.endif
4241 goto iseqne_fp; 3664 goto iseqne_fp;
4242 case BC_ISEQP: case BC_ISNEP: 3665 case BC_ISEQP: case BC_ISNEP:
4243 vk = op == BC_ISEQP; 3666 vk = op == BC_ISEQP;
@@ -4288,6 +3711,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4288 | ins_next 3711 | ins_next
4289 break; 3712 break;
4290 3713
3714 case BC_ISTYPE:
3715 | ins_AD // RA = src, RD = -type
3716 | add RD, [BASE+RA*8+4]
3717 | jne ->vmeta_istype
3718 | ins_next
3719 break;
3720 case BC_ISNUM:
3721 | ins_AD // RA = src, RD = -(TISNUM-1)
3722 | checknum RA, ->vmeta_istype
3723 | ins_next
3724 break;
3725
4291 /* -- Unary ops --------------------------------------------------------- */ 3726 /* -- Unary ops --------------------------------------------------------- */
4292 3727
4293 case BC_MOV: 3728 case BC_MOV:
@@ -4331,16 +3766,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4331 |.else 3766 |.else
4332 | checknum RD, ->vmeta_unm 3767 | checknum RD, ->vmeta_unm
4333 |.endif 3768 |.endif
4334 |.if SSE
4335 | movsd xmm0, qword [BASE+RD*8] 3769 | movsd xmm0, qword [BASE+RD*8]
4336 | sseconst_sign xmm1, RDa 3770 | sseconst_sign xmm1, RDa
4337 | xorps xmm0, xmm1 3771 | xorps xmm0, xmm1
4338 | movsd qword [BASE+RA*8], xmm0 3772 | movsd qword [BASE+RA*8], xmm0
4339 |.else
4340 | fld qword [BASE+RD*8]
4341 | fchs
4342 | fstp qword [BASE+RA*8]
4343 |.endif
4344 |.if DUALNUM 3773 |.if DUALNUM
4345 | jmp <9 3774 | jmp <9
4346 |.else 3775 |.else
@@ -4356,15 +3785,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4356 |1: 3785 |1:
4357 | mov dword [BASE+RA*8+4], LJ_TISNUM 3786 | mov dword [BASE+RA*8+4], LJ_TISNUM
4358 | mov dword [BASE+RA*8], RD 3787 | mov dword [BASE+RA*8], RD
4359 |.elif SSE 3788 |.else
4360 | xorps xmm0, xmm0 3789 | xorps xmm0, xmm0
4361 | cvtsi2sd xmm0, dword STR:RD->len 3790 | cvtsi2sd xmm0, dword STR:RD->len
4362 |1: 3791 |1:
4363 | movsd qword [BASE+RA*8], xmm0 3792 | movsd qword [BASE+RA*8], xmm0
4364 |.else
4365 | fild dword STR:RD->len
4366 |1:
4367 | fstp qword [BASE+RA*8]
4368 |.endif 3793 |.endif
4369 | ins_next 3794 | ins_next
4370 |2: 3795 |2:
@@ -4382,11 +3807,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4382 | // Length of table returned in eax (RD). 3807 | // Length of table returned in eax (RD).
4383 |.if DUALNUM 3808 |.if DUALNUM
4384 | // Nothing to do. 3809 | // Nothing to do.
4385 |.elif SSE
4386 | cvtsi2sd xmm0, RD
4387 |.else 3810 |.else
4388 | mov ARG1, RD 3811 | cvtsi2sd xmm0, RD
4389 | fild ARG1
4390 |.endif 3812 |.endif
4391 | mov BASE, RB // Restore BASE. 3813 | mov BASE, RB // Restore BASE.
4392 | movzx RA, PC_RA 3814 | movzx RA, PC_RA
@@ -4401,7 +3823,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4401 3823
4402 /* -- Binary ops -------------------------------------------------------- */ 3824 /* -- Binary ops -------------------------------------------------------- */
4403 3825
4404 |.macro ins_arithpre, x87ins, sseins, ssereg 3826 |.macro ins_arithpre, sseins, ssereg
4405 | ins_ABC 3827 | ins_ABC
4406 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3828 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
4407 ||switch (vk) { 3829 ||switch (vk) {
@@ -4410,37 +3832,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4410 | .if DUALNUM 3832 | .if DUALNUM
4411 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn 3833 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
4412 | .endif 3834 | .endif
4413 | .if SSE 3835 | movsd xmm0, qword [BASE+RB*8]
4414 | movsd xmm0, qword [BASE+RB*8] 3836 | sseins ssereg, qword [KBASE+RC*8]
4415 | sseins ssereg, qword [KBASE+RC*8]
4416 | .else
4417 | fld qword [BASE+RB*8]
4418 | x87ins qword [KBASE+RC*8]
4419 | .endif
4420 || break; 3837 || break;
4421 ||case 1: 3838 ||case 1:
4422 | checknum RB, ->vmeta_arith_nv 3839 | checknum RB, ->vmeta_arith_nv
4423 | .if DUALNUM 3840 | .if DUALNUM
4424 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv 3841 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
4425 | .endif 3842 | .endif
4426 | .if SSE 3843 | movsd xmm0, qword [KBASE+RC*8]
4427 | movsd xmm0, qword [KBASE+RC*8] 3844 | sseins ssereg, qword [BASE+RB*8]
4428 | sseins ssereg, qword [BASE+RB*8]
4429 | .else
4430 | fld qword [KBASE+RC*8]
4431 | x87ins qword [BASE+RB*8]
4432 | .endif
4433 || break; 3845 || break;
4434 ||default: 3846 ||default:
4435 | checknum RB, ->vmeta_arith_vv 3847 | checknum RB, ->vmeta_arith_vv
4436 | checknum RC, ->vmeta_arith_vv 3848 | checknum RC, ->vmeta_arith_vv
4437 | .if SSE 3849 | movsd xmm0, qword [BASE+RB*8]
4438 | movsd xmm0, qword [BASE+RB*8] 3850 | sseins ssereg, qword [BASE+RC*8]
4439 | sseins ssereg, qword [BASE+RC*8]
4440 | .else
4441 | fld qword [BASE+RB*8]
4442 | x87ins qword [BASE+RC*8]
4443 | .endif
4444 || break; 3851 || break;
4445 ||} 3852 ||}
4446 |.endmacro 3853 |.endmacro
@@ -4478,55 +3885,62 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4478 |.endmacro 3885 |.endmacro
4479 | 3886 |
4480 |.macro ins_arithpost 3887 |.macro ins_arithpost
4481 |.if SSE
4482 | movsd qword [BASE+RA*8], xmm0 3888 | movsd qword [BASE+RA*8], xmm0
4483 |.else
4484 | fstp qword [BASE+RA*8]
4485 |.endif
4486 |.endmacro 3889 |.endmacro
4487 | 3890 |
4488 |.macro ins_arith, x87ins, sseins 3891 |.macro ins_arith, sseins
4489 | ins_arithpre x87ins, sseins, xmm0 3892 | ins_arithpre sseins, xmm0
4490 | ins_arithpost 3893 | ins_arithpost
4491 | ins_next 3894 | ins_next
4492 |.endmacro 3895 |.endmacro
4493 | 3896 |
4494 |.macro ins_arith, intins, x87ins, sseins 3897 |.macro ins_arith, intins, sseins
4495 |.if DUALNUM 3898 |.if DUALNUM
4496 | ins_arithdn intins 3899 | ins_arithdn intins
4497 |.else 3900 |.else
4498 | ins_arith, x87ins, sseins 3901 | ins_arith, sseins
4499 |.endif 3902 |.endif
4500 |.endmacro 3903 |.endmacro
4501 3904
4502 | // RA = dst, RB = src1 or num const, RC = src2 or num const 3905 | // RA = dst, RB = src1 or num const, RC = src2 or num const
4503 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3906 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
4504 | ins_arith add, fadd, addsd 3907 | ins_arith add, addsd
4505 break; 3908 break;
4506 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3909 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
4507 | ins_arith sub, fsub, subsd 3910 | ins_arith sub, subsd
4508 break; 3911 break;
4509 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3912 case BC_MULVN: case BC_MULNV: case BC_MULVV:
4510 | ins_arith imul, fmul, mulsd 3913 | ins_arith imul, mulsd
4511 break; 3914 break;
4512 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3915 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
4513 | ins_arith fdiv, divsd 3916 | ins_arith divsd
4514 break; 3917 break;
4515 case BC_MODVN: 3918 case BC_MODVN:
4516 | ins_arithpre fld, movsd, xmm1 3919 | ins_arithpre movsd, xmm1
4517 |->BC_MODVN_Z: 3920 |->BC_MODVN_Z:
4518 | call ->vm_mod 3921 | call ->vm_mod
4519 | ins_arithpost 3922 | ins_arithpost
4520 | ins_next 3923 | ins_next
4521 break; 3924 break;
4522 case BC_MODNV: case BC_MODVV: 3925 case BC_MODNV: case BC_MODVV:
4523 | ins_arithpre fld, movsd, xmm1 3926 | ins_arithpre movsd, xmm1
4524 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3927 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
4525 break; 3928 break;
4526 case BC_POW: 3929 case BC_POW:
4527 | ins_arithpre fld, movsd, xmm1 3930 | ins_arithpre movsd, xmm1
4528 | call ->vm_pow 3931 | mov RB, BASE
3932 |.if not X64
3933 | movsd FPARG1, xmm0
3934 | movsd FPARG3, xmm1
3935 |.endif
3936 | call extern pow
3937 | movzx RA, PC_RA
3938 | mov BASE, RB
3939 |.if X64
4529 | ins_arithpost 3940 | ins_arithpost
3941 |.else
3942 | fstp qword [BASE+RA*8]
3943 |.endif
4530 | ins_next 3944 | ins_next
4531 break; 3945 break;
4532 3946
@@ -4594,25 +4008,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4594 | movsx RD, RDW 4008 | movsx RD, RDW
4595 | mov dword [BASE+RA*8+4], LJ_TISNUM 4009 | mov dword [BASE+RA*8+4], LJ_TISNUM
4596 | mov dword [BASE+RA*8], RD 4010 | mov dword [BASE+RA*8], RD
4597 |.elif SSE 4011 |.else
4598 | movsx RD, RDW // Sign-extend literal. 4012 | movsx RD, RDW // Sign-extend literal.
4599 | cvtsi2sd xmm0, RD 4013 | cvtsi2sd xmm0, RD
4600 | movsd qword [BASE+RA*8], xmm0 4014 | movsd qword [BASE+RA*8], xmm0
4601 |.else
4602 | fild PC_RD // Refetch signed RD from instruction.
4603 | fstp qword [BASE+RA*8]
4604 |.endif 4015 |.endif
4605 | ins_next 4016 | ins_next
4606 break; 4017 break;
4607 case BC_KNUM: 4018 case BC_KNUM:
4608 | ins_AD // RA = dst, RD = num const 4019 | ins_AD // RA = dst, RD = num const
4609 |.if SSE
4610 | movsd xmm0, qword [KBASE+RD*8] 4020 | movsd xmm0, qword [KBASE+RD*8]
4611 | movsd qword [BASE+RA*8], xmm0 4021 | movsd qword [BASE+RA*8], xmm0
4612 |.else
4613 | fld qword [KBASE+RD*8]
4614 | fstp qword [BASE+RA*8]
4615 |.endif
4616 | ins_next 4022 | ins_next
4617 break; 4023 break;
4618 case BC_KPRI: 4024 case BC_KPRI:
@@ -4719,18 +4125,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4719 case BC_USETN: 4125 case BC_USETN:
4720 | ins_AD // RA = upvalue #, RD = num const 4126 | ins_AD // RA = upvalue #, RD = num const
4721 | mov LFUNC:RB, [BASE-8] 4127 | mov LFUNC:RB, [BASE-8]
4722 |.if SSE
4723 | movsd xmm0, qword [KBASE+RD*8] 4128 | movsd xmm0, qword [KBASE+RD*8]
4724 |.else
4725 | fld qword [KBASE+RD*8]
4726 |.endif
4727 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4129 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4728 | mov RA, UPVAL:RB->v 4130 | mov RA, UPVAL:RB->v
4729 |.if SSE
4730 | movsd qword [RA], xmm0 4131 | movsd qword [RA], xmm0
4731 |.else
4732 | fstp qword [RA]
4733 |.endif
4734 | ins_next 4132 | ins_next
4735 break; 4133 break;
4736 case BC_USETP: 4134 case BC_USETP:
@@ -4884,18 +4282,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4884 |.else 4282 |.else
4885 | // Convert number to int and back and compare. 4283 | // Convert number to int and back and compare.
4886 | checknum RC, >5 4284 | checknum RC, >5
4887 |.if SSE
4888 | movsd xmm0, qword [BASE+RC*8] 4285 | movsd xmm0, qword [BASE+RC*8]
4889 | cvtsd2si RC, xmm0 4286 | cvttsd2si RC, xmm0
4890 | cvtsi2sd xmm1, RC 4287 | cvtsi2sd xmm1, RC
4891 | ucomisd xmm0, xmm1 4288 | ucomisd xmm0, xmm1
4892 |.else
4893 | fld qword [BASE+RC*8]
4894 | fist ARG1
4895 | fild ARG1
4896 | fcomparepp
4897 | mov RC, ARG1
4898 |.endif
4899 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. 4289 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4900 |.endif 4290 |.endif
4901 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4291 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -4941,7 +4331,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4941 | mov TAB:RB, [BASE+RB*8] 4331 | mov TAB:RB, [BASE+RB*8]
4942 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. 4332 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
4943 | mov RA, TAB:RB->hmask 4333 | mov RA, TAB:RB->hmask
4944 | and RA, STR:RC->hash 4334 | and RA, STR:RC->sid
4945 | imul RA, #NODE 4335 | imul RA, #NODE
4946 | add NODE:RA, TAB:RB->node 4336 | add NODE:RA, TAB:RB->node
4947 |1: 4337 |1:
@@ -5019,6 +4409,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5019 | mov dword [BASE+RA*8+4], LJ_TNIL 4409 | mov dword [BASE+RA*8+4], LJ_TNIL
5020 | jmp <1 4410 | jmp <1
5021 break; 4411 break;
4412 case BC_TGETR:
4413 | ins_ABC // RA = dst, RB = table, RC = key
4414 | mov TAB:RB, [BASE+RB*8]
4415 |.if DUALNUM
4416 | mov RC, dword [BASE+RC*8]
4417 |.else
4418 | cvttsd2si RC, qword [BASE+RC*8]
4419 |.endif
4420 | cmp RC, TAB:RB->asize
4421 | jae ->vmeta_tgetr // Not in array part? Use fallback.
4422 | shl RC, 3
4423 | add RC, TAB:RB->array
4424 | // Get array slot.
4425 |->BC_TGETR_Z:
4426 |.if X64
4427 | mov RBa, [RC]
4428 | mov [BASE+RA*8], RBa
4429 |.else
4430 | mov RB, [RC]
4431 | mov RC, [RC+4]
4432 | mov [BASE+RA*8], RB
4433 | mov [BASE+RA*8+4], RC
4434 |.endif
4435 |->BC_TGETR2_Z:
4436 | ins_next
4437 break;
5022 4438
5023 case BC_TSETV: 4439 case BC_TSETV:
5024 | ins_ABC // RA = src, RB = table, RC = key 4440 | ins_ABC // RA = src, RB = table, RC = key
@@ -5032,18 +4448,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5032 |.else 4448 |.else
5033 | // Convert number to int and back and compare. 4449 | // Convert number to int and back and compare.
5034 | checknum RC, >5 4450 | checknum RC, >5
5035 |.if SSE
5036 | movsd xmm0, qword [BASE+RC*8] 4451 | movsd xmm0, qword [BASE+RC*8]
5037 | cvtsd2si RC, xmm0 4452 | cvttsd2si RC, xmm0
5038 | cvtsi2sd xmm1, RC 4453 | cvtsi2sd xmm1, RC
5039 | ucomisd xmm0, xmm1 4454 | ucomisd xmm0, xmm1
5040 |.else
5041 | fld qword [BASE+RC*8]
5042 | fist ARG1
5043 | fild ARG1
5044 | fcomparepp
5045 | mov RC, ARG1
5046 |.endif
5047 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. 4455 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
5048 |.endif 4456 |.endif
5049 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4457 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -5094,7 +4502,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5094 | mov TAB:RB, [BASE+RB*8] 4502 | mov TAB:RB, [BASE+RB*8]
5095 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. 4503 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
5096 | mov RA, TAB:RB->hmask 4504 | mov RA, TAB:RB->hmask
5097 | and RA, STR:RC->hash 4505 | and RA, STR:RC->sid
5098 | imul RA, #NODE 4506 | imul RA, #NODE
5099 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. 4507 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
5100 | add NODE:RA, TAB:RB->node 4508 | add NODE:RA, TAB:RB->node
@@ -5213,6 +4621,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5213 | movzx RA, PC_RA // Restore RA. 4621 | movzx RA, PC_RA // Restore RA.
5214 | jmp <2 4622 | jmp <2
5215 break; 4623 break;
4624 case BC_TSETR:
4625 | ins_ABC // RA = src, RB = table, RC = key
4626 | mov TAB:RB, [BASE+RB*8]
4627 |.if DUALNUM
4628 | mov RC, dword [BASE+RC*8]
4629 |.else
4630 | cvttsd2si RC, qword [BASE+RC*8]
4631 |.endif
4632 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4633 | jnz >7
4634 |2:
4635 | cmp RC, TAB:RB->asize
4636 | jae ->vmeta_tsetr
4637 | shl RC, 3
4638 | add RC, TAB:RB->array
4639 | // Set array slot.
4640 |->BC_TSETR_Z:
4641 |.if X64
4642 | mov RBa, [BASE+RA*8]
4643 | mov [RC], RBa
4644 |.else
4645 | mov RB, [BASE+RA*8+4]
4646 | mov RA, [BASE+RA*8]
4647 | mov [RC+4], RB
4648 | mov [RC], RA
4649 |.endif
4650 | ins_next
4651 |
4652 |7: // Possible table write barrier for the value. Skip valiswhite check.
4653 | barrierback TAB:RB, RA
4654 | movzx RA, PC_RA // Restore RA.
4655 | jmp <2
4656 break;
5216 4657
5217 case BC_TSETM: 4658 case BC_TSETM:
5218 | ins_AD // RA = base (table at base-1), RD = num const (start index) 4659 | ins_AD // RA = base (table at base-1), RD = num const (start index)
@@ -5389,10 +4830,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5389 break; 4830 break;
5390 4831
5391 case BC_ITERN: 4832 case BC_ITERN:
5392 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
5393 |.if JIT 4833 |.if JIT
5394 | // NYI: add hotloop, record BC_ITERN. 4834 | hotloop RB
5395 |.endif 4835 |.endif
4836 |->vm_IITERN:
4837 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
5396 | mov TMP1, KBASE // Need two more free registers. 4838 | mov TMP1, KBASE // Need two more free registers.
5397 | mov TMP2, DISPATCH 4839 | mov TMP2, DISPATCH
5398 | mov TAB:RB, [BASE+RA*8-16] 4840 | mov TAB:RB, [BASE+RA*8-16]
@@ -5406,10 +4848,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5406 |.if DUALNUM 4848 |.if DUALNUM
5407 | mov dword [BASE+RA*8+4], LJ_TISNUM 4849 | mov dword [BASE+RA*8+4], LJ_TISNUM
5408 | mov dword [BASE+RA*8], RC 4850 | mov dword [BASE+RA*8], RC
5409 |.elif SSE
5410 | cvtsi2sd xmm0, RC
5411 |.else 4851 |.else
5412 | fild dword [BASE+RA*8-8] 4852 | cvtsi2sd xmm0, RC
5413 |.endif 4853 |.endif
5414 | // Copy array slot to returned value. 4854 | // Copy array slot to returned value.
5415 |.if X64 4855 |.if X64
@@ -5425,10 +4865,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5425 | // Return array index as a numeric key. 4865 | // Return array index as a numeric key.
5426 |.if DUALNUM 4866 |.if DUALNUM
5427 | // See above. 4867 | // See above.
5428 |.elif SSE
5429 | movsd qword [BASE+RA*8], xmm0
5430 |.else 4868 |.else
5431 | fstp qword [BASE+RA*8] 4869 | movsd qword [BASE+RA*8], xmm0
5432 |.endif 4870 |.endif
5433 | mov [BASE+RA*8-8], RC // Update control var. 4871 | mov [BASE+RA*8-8], RC // Update control var.
5434 |2: 4872 |2:
@@ -5441,9 +4879,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5441 | 4879 |
5442 |4: // Skip holes in array part. 4880 |4: // Skip holes in array part.
5443 | add RC, 1 4881 | add RC, 1
5444 |.if not (DUALNUM or SSE)
5445 | mov [BASE+RA*8-8], RC
5446 |.endif
5447 | jmp <1 4882 | jmp <1
5448 | 4883 |
5449 |5: // Traverse hash part. 4884 |5: // Traverse hash part.
@@ -5487,14 +4922,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5487 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 4922 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
5488 | branchPC RD 4923 | branchPC RD
5489 | mov dword [BASE+RA*8-8], 0 // Initialize control var. 4924 | mov dword [BASE+RA*8-8], 0 // Initialize control var.
5490 | mov dword [BASE+RA*8-4], 0xfffe7fff 4925 | mov dword [BASE+RA*8-4], LJ_KEYINDEX
5491 |1: 4926 |1:
5492 | ins_next 4927 | ins_next
5493 |5: // Despecialize bytecode if any of the checks fail. 4928 |5: // Despecialize bytecode if any of the checks fail.
5494 | mov PC_OP, BC_JMP 4929 | mov PC_OP, BC_JMP
5495 | branchPC RD 4930 | branchPC RD
4931 |.if JIT
4932 | cmp byte [PC], BC_ITERN
4933 | jne >6
4934 |.endif
5496 | mov byte [PC], BC_ITERC 4935 | mov byte [PC], BC_ITERC
5497 | jmp <1 4936 | jmp <1
4937 |.if JIT
4938 |6: // Unpatch JLOOP.
4939 | mov RA, [DISPATCH+DISPATCH_J(trace)]
4940 | movzx RC, word [PC+2]
4941 | mov TRACE:RA, [RA+RC*4]
4942 | mov eax, TRACE:RA->startins
4943 | mov al, BC_ITERC
4944 | mov dword [PC], eax
4945 | jmp <1
4946 |.endif
5498 break; 4947 break;
5499 4948
5500 case BC_VARG: 4949 case BC_VARG:
@@ -5777,7 +5226,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5777 if (!vk) { 5226 if (!vk) {
5778 | cmp RB, LJ_TISNUM; jae ->vmeta_for 5227 | cmp RB, LJ_TISNUM; jae ->vmeta_for
5779 } 5228 }
5780 |.if SSE
5781 | movsd xmm0, qword FOR_IDX 5229 | movsd xmm0, qword FOR_IDX
5782 | movsd xmm1, qword FOR_STOP 5230 | movsd xmm1, qword FOR_STOP
5783 if (vk) { 5231 if (vk) {
@@ -5790,22 +5238,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5790 | ucomisd xmm1, xmm0 5238 | ucomisd xmm1, xmm0
5791 |1: 5239 |1:
5792 | movsd qword FOR_EXT, xmm0 5240 | movsd qword FOR_EXT, xmm0
5793 |.else
5794 | fld qword FOR_STOP
5795 | fld qword FOR_IDX
5796 if (vk) {
5797 | fadd qword FOR_STEP // nidx = idx + step
5798 | fst qword FOR_IDX
5799 | fst qword FOR_EXT
5800 | test RB, RB; js >1
5801 } else {
5802 | fst qword FOR_EXT
5803 | jl >1
5804 }
5805 | fxch // Swap lim/(n)idx if step non-negative.
5806 |1:
5807 | fcomparepp
5808 |.endif
5809 if (op == BC_FORI) { 5241 if (op == BC_FORI) {
5810 |.if DUALNUM 5242 |.if DUALNUM
5811 | jnb <7 5243 | jnb <7
@@ -5833,11 +5265,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5833 |2: 5265 |2:
5834 | ins_next 5266 | ins_next
5835 |.endif 5267 |.endif
5836 |.if SSE 5268 |
5837 |3: // Invert comparison if step is negative. 5269 |3: // Invert comparison if step is negative.
5838 | ucomisd xmm0, xmm1 5270 | ucomisd xmm0, xmm1
5839 | jmp <1 5271 | jmp <1
5840 |.endif
5841 break; 5272 break;
5842 5273
5843 case BC_ITERL: 5274 case BC_ITERL:
@@ -5875,7 +5306,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5875 | ins_A // RA = base, RD = target (loop extent) 5306 | ins_A // RA = base, RD = target (loop extent)
5876 | // Note: RA/RD is only used by trace recorder to determine scope/extent 5307 | // Note: RA/RD is only used by trace recorder to determine scope/extent
5877 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. 5308 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
5878 |.if JIT 5309 |.if JIT
5879 | hotloop RB 5310 | hotloop RB
5880 |.endif 5311 |.endif
5881 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. 5312 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
@@ -5894,7 +5325,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5894 | mov RDa, TRACE:RD->mcode 5325 | mov RDa, TRACE:RD->mcode
5895 | mov L:RB, SAVE_L 5326 | mov L:RB, SAVE_L
5896 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE 5327 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
5897 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB 5328 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
5898 | // Save additional callee-save registers only used in compiled code. 5329 | // Save additional callee-save registers only used in compiled code.
5899 |.if X64WIN 5330 |.if X64WIN
5900 | mov TMPQ, r12 5331 | mov TMPQ, r12
@@ -6061,9 +5492,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
6061 | // (lua_State *L, lua_CFunction f) 5492 | // (lua_State *L, lua_CFunction f)
6062 | call aword [DISPATCH+DISPATCH_GL(wrapf)] 5493 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
6063 } 5494 }
6064 | set_vmstate INTERP
6065 | // nresults returned in eax (RD). 5495 | // nresults returned in eax (RD).
6066 | mov BASE, L:RB->base 5496 | mov BASE, L:RB->base
5497 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
5498 | set_vmstate INTERP
6067 | lea RA, [BASE+RD*8] 5499 | lea RA, [BASE+RD*8]
6068 | neg RA 5500 | neg RA
6069 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 5501 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
@@ -6176,7 +5608,7 @@ static void emit_asm_debug(BuildCtx *ctx)
6176 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); 5608 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
6177#endif 5609#endif
6178#if !LJ_NO_UNWIND 5610#if !LJ_NO_UNWIND
6179#if (defined(__sun__) && defined(__svr4__)) 5611#if LJ_TARGET_SOLARIS
6180#if LJ_64 5612#if LJ_64
6181 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); 5613 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
6182#else 5614#else
@@ -6383,15 +5815,21 @@ static void emit_asm_debug(BuildCtx *ctx)
6383 "LEFDEY:\n\n", fcsize); 5815 "LEFDEY:\n\n", fcsize);
6384 } 5816 }
6385#endif 5817#endif
6386#if LJ_64 5818#if !LJ_64
6387 fprintf(ctx->fp, "\t.subsections_via_symbols\n");
6388#else
6389 fprintf(ctx->fp, 5819 fprintf(ctx->fp,
6390 "\t.non_lazy_symbol_pointer\n" 5820 "\t.non_lazy_symbol_pointer\n"
6391 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" 5821 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
6392 ".indirect_symbol _lj_err_unwind_dwarf\n" 5822 ".indirect_symbol _lj_err_unwind_dwarf\n"
6393 ".long 0\n"); 5823 ".long 0\n\n");
5824 fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n");
5825 {
5826 const char *const *xn;
5827 for (xn = ctx->extnames; *xn; xn++)
5828 if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
5829 fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn);
5830 }
6394#endif 5831#endif
5832 fprintf(ctx->fp, ".subsections_via_symbols\n");
6395 } 5833 }
6396 break; 5834 break;
6397#endif 5835#endif