aboutsummaryrefslogtreecommitdiff
path: root/src/vm_x86.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm_x86.dasc')
-rw-r--r--src/vm_x86.dasc1738
1 files changed, 598 insertions, 1140 deletions
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index 8c2740c3..eb56840a 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -18,7 +18,6 @@
18| 18|
19|.if P64 19|.if P64
20|.define X64, 1 20|.define X64, 1
21|.define SSE, 1
22|.if WIN 21|.if WIN
23|.define X64WIN, 1 22|.define X64WIN, 1
24|.endif 23|.endif
@@ -116,24 +115,74 @@
116|.type NODE, Node 115|.type NODE, Node
117|.type NARGS, int 116|.type NARGS, int
118|.type TRACE, GCtrace 117|.type TRACE, GCtrace
118|.type SBUF, SBuf
119| 119|
120|// Stack layout while in interpreter. Must match with lj_frame.h. 120|// Stack layout while in interpreter. Must match with lj_frame.h.
121|//----------------------------------------------------------------------- 121|//-----------------------------------------------------------------------
122|.if not X64 // x86 stack layout. 122|.if not X64 // x86 stack layout.
123| 123|
124|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). 124|.if WIN
125|
126|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
125|.macro saveregs_ 127|.macro saveregs_
126| push edi; push esi; push ebx 128| push edi; push esi; push ebx
129| push extern lj_err_unwind_win
130| fs; push dword [0]
131| fs; mov [0], esp
127| sub esp, CFRAME_SPACE 132| sub esp, CFRAME_SPACE
128|.endmacro 133|.endmacro
129|.macro saveregs 134|.macro restoreregs
130| push ebp; saveregs_ 135| add esp, CFRAME_SPACE
136| fs; pop dword [0]
137| pop edi // Short for esp += 4.
138| pop ebx; pop esi; pop edi; pop ebp
139|.endmacro
140|
141|.else
142|
143|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
144|.macro saveregs_
145| push edi; push esi; push ebx
146| sub esp, CFRAME_SPACE
131|.endmacro 147|.endmacro
132|.macro restoreregs 148|.macro restoreregs
133| add esp, CFRAME_SPACE 149| add esp, CFRAME_SPACE
134| pop ebx; pop esi; pop edi; pop ebp 150| pop ebx; pop esi; pop edi; pop ebp
135|.endmacro 151|.endmacro
136| 152|
153|.endif
154|
155|.macro saveregs
156| push ebp; saveregs_
157|.endmacro
158|
159|.if WIN
160|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only.
161|.define SAVE_NRES, aword [esp+aword*18]
162|.define SAVE_CFRAME, aword [esp+aword*17]
163|.define SAVE_L, aword [esp+aword*16]
164|//----- 16 byte aligned, ^^^ arguments from C caller
165|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter.
166|.define SAVE_R4, aword [esp+aword*14]
167|.define SAVE_R3, aword [esp+aword*13]
168|.define SAVE_R2, aword [esp+aword*12]
169|//----- 16 byte aligned
170|.define SAVE_R1, aword [esp+aword*11]
171|.define SEH_FUNC, aword [esp+aword*10]
172|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves.
173|.define UNUSED2, aword [esp+aword*8]
174|//----- 16 byte aligned
175|.define UNUSED1, aword [esp+aword*7]
176|.define SAVE_PC, aword [esp+aword*6]
177|.define TMP2, aword [esp+aword*5]
178|.define TMP1, aword [esp+aword*4]
179|//----- 16 byte aligned
180|.define ARG4, aword [esp+aword*3]
181|.define ARG3, aword [esp+aword*2]
182|.define ARG2, aword [esp+aword*1]
183|.define ARG1, aword [esp] //<-- esp while in interpreter.
184|//----- 16 byte aligned, ^^^ arguments for C callee
185|.else
137|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. 186|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
138|.define SAVE_NRES, aword [esp+aword*14] 187|.define SAVE_NRES, aword [esp+aword*14]
139|.define SAVE_CFRAME, aword [esp+aword*13] 188|.define SAVE_CFRAME, aword [esp+aword*13]
@@ -154,6 +203,7 @@
154|.define ARG2, aword [esp+aword*1] 203|.define ARG2, aword [esp+aword*1]
155|.define ARG1, aword [esp] //<-- esp while in interpreter. 204|.define ARG1, aword [esp] //<-- esp while in interpreter.
156|//----- 16 byte aligned, ^^^ arguments for C callee 205|//----- 16 byte aligned, ^^^ arguments for C callee
206|.endif
157| 207|
158|// FPARGx overlaps ARGx and ARG(x+1) on x86. 208|// FPARGx overlaps ARGx and ARG(x+1) on x86.
159|.define FPARG3, qword [esp+qword*1] 209|.define FPARG3, qword [esp+qword*1]
@@ -389,7 +439,6 @@
389| fpop 439| fpop
390|.endmacro 440|.endmacro
391| 441|
392|.macro fdup; fld st0; .endmacro
393|.macro fpop1; fstp st1; .endmacro 442|.macro fpop1; fstp st1; .endmacro
394| 443|
395|// Synthesize SSE FP constants. 444|// Synthesize SSE FP constants.
@@ -555,6 +604,10 @@ static void build_subroutines(BuildCtx *ctx)
555 |.else 604 |.else
556 | mov eax, FCARG2 // Error return status for vm_pcall. 605 | mov eax, FCARG2 // Error return status for vm_pcall.
557 | mov esp, FCARG1 606 | mov esp, FCARG1
607 |.if WIN
608 | lea FCARG1, SEH_NEXT
609 | fs; mov [0], FCARG1
610 |.endif
558 |.endif 611 |.endif
559 |->vm_unwind_c_eh: // Landing pad for external unwinder. 612 |->vm_unwind_c_eh: // Landing pad for external unwinder.
560 | mov L:RB, SAVE_L 613 | mov L:RB, SAVE_L
@@ -578,6 +631,10 @@ static void build_subroutines(BuildCtx *ctx)
578 |.else 631 |.else
579 | and FCARG1, CFRAME_RAWMASK 632 | and FCARG1, CFRAME_RAWMASK
580 | mov esp, FCARG1 633 | mov esp, FCARG1
634 |.if WIN
635 | lea FCARG1, SEH_NEXT
636 | fs; mov [0], FCARG1
637 |.endif
581 |.endif 638 |.endif
582 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 639 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
583 | mov L:RB, SAVE_L 640 | mov L:RB, SAVE_L
@@ -591,6 +648,19 @@ static void build_subroutines(BuildCtx *ctx)
591 | set_vmstate INTERP 648 | set_vmstate INTERP
592 | jmp ->vm_returnc // Increments RD/MULTRES and returns. 649 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
593 | 650 |
651 |.if WIN and not X64
652 |->vm_rtlunwind@16: // Thin layer around RtlUnwind.
653 | // (void *cframe, void *excptrec, void *unwinder, int errcode)
654 | mov [esp], FCARG1 // Return value for RtlUnwind.
655 | push FCARG2 // Exception record for RtlUnwind.
656 | push 0 // Ignored by RtlUnwind.
657 | push dword [FCARG1+CFRAME_OFS_SEH]
658 | call extern RtlUnwind@16 // Violates ABI (clobbers too much).
659 | mov FCARG1, eax
660 | mov FCARG2, [esp+4] // errcode (for vm_unwind_c).
661 | ret // Jump to unwinder.
662 |.endif
663 |
594 |//----------------------------------------------------------------------- 664 |//-----------------------------------------------------------------------
595 |//-- Grow stack for calls ----------------------------------------------- 665 |//-- Grow stack for calls -----------------------------------------------
596 |//----------------------------------------------------------------------- 666 |//-----------------------------------------------------------------------
@@ -646,17 +716,18 @@ static void build_subroutines(BuildCtx *ctx)
646 | lea KBASEa, [esp+CFRAME_RESUME] 716 | lea KBASEa, [esp+CFRAME_RESUME]
647 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 717 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
648 | add DISPATCH, GG_G2DISP 718 | add DISPATCH, GG_G2DISP
649 | mov L:RB->cframe, KBASEa
650 | mov SAVE_PC, RD // Any value outside of bytecode is ok. 719 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
651 | mov SAVE_CFRAME, RDa 720 | mov SAVE_CFRAME, RDa
652 |.if X64 721 |.if X64
653 | mov SAVE_NRES, RD 722 | mov SAVE_NRES, RD
654 | mov SAVE_ERRF, RD 723 | mov SAVE_ERRF, RD
655 |.endif 724 |.endif
725 | mov L:RB->cframe, KBASEa
656 | cmp byte L:RB->status, RDL 726 | cmp byte L:RB->status, RDL
657 | je >3 // Initial resume (like a call). 727 | je >2 // Initial resume (like a call).
658 | 728 |
659 | // Resume after yield (like a return). 729 | // Resume after yield (like a return).
730 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
660 | set_vmstate INTERP 731 | set_vmstate INTERP
661 | mov byte L:RB->status, RDL 732 | mov byte L:RB->status, RDL
662 | mov BASE, L:RB->base 733 | mov BASE, L:RB->base
@@ -696,20 +767,19 @@ static void build_subroutines(BuildCtx *ctx)
696 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! 767 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
697 |.endif 768 |.endif
698 | 769 |
770 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
699 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 771 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
700 | mov SAVE_CFRAME, KBASEa 772 | mov SAVE_CFRAME, KBASEa
701 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. 773 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
774 | add DISPATCH, GG_G2DISP
702 |.if X64 775 |.if X64
703 | mov L:RB->cframe, rsp 776 | mov L:RB->cframe, rsp
704 |.else 777 |.else
705 | mov L:RB->cframe, esp 778 | mov L:RB->cframe, esp
706 |.endif 779 |.endif
707 | 780 |
708 |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype). 781 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
709 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 782 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
710 | add DISPATCH, GG_G2DISP
711 |
712 |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype).
713 | set_vmstate INTERP 783 | set_vmstate INTERP
714 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). 784 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
715 | add PC, RA 785 | add PC, RA
@@ -747,14 +817,17 @@ static void build_subroutines(BuildCtx *ctx)
747 | 817 |
748 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). 818 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
749 | sub KBASE, L:RB->top 819 | sub KBASE, L:RB->top
820 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
750 | mov SAVE_ERRF, 0 // No error function. 821 | mov SAVE_ERRF, 0 // No error function.
751 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. 822 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
823 | add DISPATCH, GG_G2DISP
752 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). 824 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
753 | 825 |
754 |.if X64 826 |.if X64
755 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 827 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
756 | mov SAVE_CFRAME, KBASEa 828 | mov SAVE_CFRAME, KBASEa
757 | mov L:RB->cframe, rsp 829 | mov L:RB->cframe, rsp
830 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
758 | 831 |
759 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) 832 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
760 |.else 833 |.else
@@ -765,6 +838,7 @@ static void build_subroutines(BuildCtx *ctx)
765 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. 838 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
766 | mov SAVE_CFRAME, KBASE 839 | mov SAVE_CFRAME, KBASE
767 | mov L:RB->cframe, esp 840 | mov L:RB->cframe, esp
841 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
768 | 842 |
769 | call BASE // (lua_State *L, lua_CFunction func, void *ud) 843 | call BASE // (lua_State *L, lua_CFunction func, void *ud)
770 |.endif 844 |.endif
@@ -872,13 +946,9 @@ static void build_subroutines(BuildCtx *ctx)
872 |.if DUALNUM 946 |.if DUALNUM
873 | mov TMP2, LJ_TISNUM 947 | mov TMP2, LJ_TISNUM
874 | mov TMP1, RC 948 | mov TMP1, RC
875 |.elif SSE 949 |.else
876 | cvtsi2sd xmm0, RC 950 | cvtsi2sd xmm0, RC
877 | movsd TMPQ, xmm0 951 | movsd TMPQ, xmm0
878 |.else
879 | mov ARG4, RC
880 | fild ARG4
881 | fstp TMPQ
882 |.endif 952 |.endif
883 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 953 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
884 | jmp >1 954 | jmp >1
@@ -932,6 +1002,19 @@ static void build_subroutines(BuildCtx *ctx)
932 | mov NARGS:RD, 2+1 // 2 args for func(t, k). 1002 | mov NARGS:RD, 2+1 // 2 args for func(t, k).
933 | jmp ->vm_call_dispatch_f 1003 | jmp ->vm_call_dispatch_f
934 | 1004 |
1005 |->vmeta_tgetr:
1006 | mov FCARG1, TAB:RB
1007 | mov RB, BASE // Save BASE.
1008 | mov FCARG2, RC // Caveat: FCARG2 == BASE
1009 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
1010 | // cTValue * or NULL returned in eax (RC).
1011 | movzx RA, PC_RA
1012 | mov BASE, RB // Restore BASE.
1013 | test RC, RC
1014 | jnz ->BC_TGETR_Z
1015 | mov dword [BASE+RA*8+4], LJ_TNIL
1016 | jmp ->BC_TGETR2_Z
1017 |
935 |//----------------------------------------------------------------------- 1018 |//-----------------------------------------------------------------------
936 | 1019 |
937 |->vmeta_tsets: 1020 |->vmeta_tsets:
@@ -951,13 +1034,9 @@ static void build_subroutines(BuildCtx *ctx)
951 |.if DUALNUM 1034 |.if DUALNUM
952 | mov TMP2, LJ_TISNUM 1035 | mov TMP2, LJ_TISNUM
953 | mov TMP1, RC 1036 | mov TMP1, RC
954 |.elif SSE 1037 |.else
955 | cvtsi2sd xmm0, RC 1038 | cvtsi2sd xmm0, RC
956 | movsd TMPQ, xmm0 1039 | movsd TMPQ, xmm0
957 |.else
958 | mov ARG4, RC
959 | fild ARG4
960 | fstp TMPQ
961 |.endif 1040 |.endif
962 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 1041 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
963 | jmp >1 1042 | jmp >1
@@ -1023,6 +1102,33 @@ static void build_subroutines(BuildCtx *ctx)
1023 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). 1102 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
1024 | jmp ->vm_call_dispatch_f 1103 | jmp ->vm_call_dispatch_f
1025 | 1104 |
1105 |->vmeta_tsetr:
1106 |.if X64WIN
1107 | mov L:CARG1d, SAVE_L
1108 | mov CARG3d, RC
1109 | mov L:CARG1d->base, BASE
1110 | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
1111 |.elif X64
1112 | mov L:CARG1d, SAVE_L
1113 | mov CARG2d, TAB:RB
1114 | mov L:CARG1d->base, BASE
1115 | mov RB, BASE // Save BASE.
1116 | mov CARG3d, RC // Caveat: CARG3d == BASE.
1117 |.else
1118 | mov L:RA, SAVE_L
1119 | mov ARG2, TAB:RB
1120 | mov RB, BASE // Save BASE.
1121 | mov ARG3, RC
1122 | mov ARG1, L:RA
1123 | mov L:RA->base, BASE
1124 |.endif
1125 | mov SAVE_PC, PC
1126 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1127 | // TValue * returned in eax (RC).
1128 | movzx RA, PC_RA
1129 | mov BASE, RB // Restore BASE.
1130 | jmp ->BC_TSETR_Z
1131 |
1026 |//-- Comparison metamethods --------------------------------------------- 1132 |//-- Comparison metamethods ---------------------------------------------
1027 | 1133 |
1028 |->vmeta_comp: 1134 |->vmeta_comp:
@@ -1117,6 +1223,26 @@ static void build_subroutines(BuildCtx *ctx)
1117 | jmp <3 1223 | jmp <3
1118 |.endif 1224 |.endif
1119 | 1225 |
1226 |->vmeta_istype:
1227 |.if X64
1228 | mov L:RB, SAVE_L
1229 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1230 | mov CARG2d, RA
1231 | movzx CARG3d, PC_RD
1232 | mov L:CARG1d, L:RB
1233 |.else
1234 | movzx RD, PC_RD
1235 | mov ARG2, RA
1236 | mov L:RB, SAVE_L
1237 | mov ARG3, RD
1238 | mov ARG1, L:RB
1239 | mov L:RB->base, BASE
1240 |.endif
1241 | mov SAVE_PC, PC
1242 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1243 | mov BASE, L:RB->base
1244 | jmp <6
1245 |
1120 |//-- Arithmetic metamethods --------------------------------------------- 1246 |//-- Arithmetic metamethods ---------------------------------------------
1121 | 1247 |
1122 |->vmeta_arith_vno: 1248 |->vmeta_arith_vno:
@@ -1293,19 +1419,6 @@ static void build_subroutines(BuildCtx *ctx)
1293 | cmp NARGS:RD, 2+1; jb ->fff_fallback 1419 | cmp NARGS:RD, 2+1; jb ->fff_fallback
1294 |.endmacro 1420 |.endmacro
1295 | 1421 |
1296 |.macro .ffunc_n, name
1297 | .ffunc_1 name
1298 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1299 | fld qword [BASE]
1300 |.endmacro
1301 |
1302 |.macro .ffunc_n, name, op
1303 | .ffunc_1 name
1304 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1305 | op
1306 | fld qword [BASE]
1307 |.endmacro
1308 |
1309 |.macro .ffunc_nsse, name, op 1422 |.macro .ffunc_nsse, name, op
1310 | .ffunc_1 name 1423 | .ffunc_1 name
1311 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1424 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
@@ -1316,14 +1429,6 @@ static void build_subroutines(BuildCtx *ctx)
1316 | .ffunc_nsse name, movsd 1429 | .ffunc_nsse name, movsd
1317 |.endmacro 1430 |.endmacro
1318 | 1431 |
1319 |.macro .ffunc_nn, name
1320 | .ffunc_2 name
1321 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1322 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
1323 | fld qword [BASE]
1324 | fld qword [BASE+8]
1325 |.endmacro
1326 |
1327 |.macro .ffunc_nnsse, name 1432 |.macro .ffunc_nnsse, name
1328 | .ffunc_2 name 1433 | .ffunc_2 name
1329 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1434 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
@@ -1421,7 +1526,7 @@ static void build_subroutines(BuildCtx *ctx)
1421 | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. 1526 | mov dword [BASE-4], LJ_TTAB // Store metatable as default result.
1422 | mov [BASE-8], TAB:RB 1527 | mov [BASE-8], TAB:RB
1423 | mov RA, TAB:RB->hmask 1528 | mov RA, TAB:RB->hmask
1424 | and RA, STR:RC->hash 1529 | and RA, STR:RC->sid
1425 | imul RA, #NODE 1530 | imul RA, #NODE
1426 | add NODE:RA, TAB:RB->node 1531 | add NODE:RA, TAB:RB->node
1427 |3: // Rearranged logic, because we expect _not_ to find the key. 1532 |3: // Rearranged logic, because we expect _not_ to find the key.
@@ -1529,11 +1634,7 @@ static void build_subroutines(BuildCtx *ctx)
1529 |.else 1634 |.else
1530 | jae ->fff_fallback 1635 | jae ->fff_fallback
1531 |.endif 1636 |.endif
1532 |.if SSE
1533 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 1637 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1534 |.else
1535 | fld qword [BASE]; jmp ->fff_resn
1536 |.endif
1537 | 1638 |
1538 |.ffunc_1 tostring 1639 |.ffunc_1 tostring
1539 | // Only handles the string or number case inline. 1640 | // Only handles the string or number case inline.
@@ -1558,9 +1659,9 @@ static void build_subroutines(BuildCtx *ctx)
1558 |.endif 1659 |.endif
1559 | mov L:FCARG1, L:RB 1660 | mov L:FCARG1, L:RB
1560 |.if DUALNUM 1661 |.if DUALNUM
1561 | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) 1662 | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o)
1562 |.else 1663 |.else
1563 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) 1664 | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np)
1564 |.endif 1665 |.endif
1565 | // GCstr returned in eax (RD). 1666 | // GCstr returned in eax (RD).
1566 | mov BASE, L:RB->base 1667 | mov BASE, L:RB->base
@@ -1572,55 +1673,35 @@ static void build_subroutines(BuildCtx *ctx)
1572 | je >2 // Missing 2nd arg? 1673 | je >2 // Missing 2nd arg?
1573 |1: 1674 |1:
1574 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback 1675 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1575 | mov L:RB, SAVE_L
1576 | mov L:RB->base, BASE // Add frame since C call can throw.
1577 | mov L:RB->top, BASE // Dummy frame length is ok.
1578 | mov PC, [BASE-4] 1676 | mov PC, [BASE-4]
1677 | mov RB, BASE // Save BASE.
1579 |.if X64WIN 1678 |.if X64WIN
1580 | lea CARG3d, [BASE+8] 1679 | mov CARG1d, [BASE]
1581 | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. 1680 | lea CARG3d, [BASE-8]
1582 | mov CARG1d, L:RB 1681 | lea CARG2d, [BASE+8] // Caveat: CARG2d == BASE.
1583 |.elif X64 1682 |.elif X64
1584 | mov CARG2d, [BASE] 1683 | mov CARG1d, [BASE]
1585 | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. 1684 | lea CARG2d, [BASE+8]
1586 | mov CARG1d, L:RB 1685 | lea CARG3d, [BASE-8] // Caveat: CARG3d == BASE.
1587 |.else 1686 |.else
1588 | mov TAB:RD, [BASE] 1687 | mov TAB:RD, [BASE]
1589 | mov ARG2, TAB:RD 1688 | mov ARG1, TAB:RD
1590 | mov ARG1, L:RB
1591 | add BASE, 8 1689 | add BASE, 8
1690 | mov ARG2, BASE
1691 | sub BASE, 8+8
1592 | mov ARG3, BASE 1692 | mov ARG3, BASE
1593 |.endif 1693 |.endif
1594 | mov SAVE_PC, PC // Needed for ITERN fallback. 1694 | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
1595 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) 1695 | // 1=found, 0=end, -1=error returned in eax (RD).
1596 | // Flag returned in eax (RD). 1696 | mov BASE, RB // Restore BASE.
1597 | mov BASE, L:RB->base 1697 | test RD, RD; jg ->fff_res2 // Found key/value.
1598 | test RD, RD; jz >3 // End of traversal? 1698 | js ->fff_fallback_2 // Invalid key.
1599 | // Copy key and value to results. 1699 | // End of traversal: return nil.
1600 |.if X64 1700 | mov dword [BASE-4], LJ_TNIL
1601 | mov RBa, [BASE+8] 1701 | jmp ->fff_res1
1602 | mov RDa, [BASE+16]
1603 | mov [BASE-8], RBa
1604 | mov [BASE], RDa
1605 |.else
1606 | mov RB, [BASE+8]
1607 | mov RD, [BASE+12]
1608 | mov [BASE-8], RB
1609 | mov [BASE-4], RD
1610 | mov RB, [BASE+16]
1611 | mov RD, [BASE+20]
1612 | mov [BASE], RB
1613 | mov [BASE+4], RD
1614 |.endif
1615 |->fff_res2:
1616 | mov RD, 1+2
1617 | jmp ->fff_res
1618 |2: // Set missing 2nd arg to nil. 1702 |2: // Set missing 2nd arg to nil.
1619 | mov dword [BASE+12], LJ_TNIL 1703 | mov dword [BASE+12], LJ_TNIL
1620 | jmp <1 1704 | jmp <1
1621 |3: // End of traversal: return nil.
1622 | mov dword [BASE-4], LJ_TNIL
1623 | jmp ->fff_res1
1624 | 1705 |
1625 |.ffunc_1 pairs 1706 |.ffunc_1 pairs
1626 | mov TAB:RB, [BASE] 1707 | mov TAB:RB, [BASE]
@@ -1651,19 +1732,12 @@ static void build_subroutines(BuildCtx *ctx)
1651 | add RD, 1 1732 | add RD, 1
1652 | mov dword [BASE-4], LJ_TISNUM 1733 | mov dword [BASE-4], LJ_TISNUM
1653 | mov dword [BASE-8], RD 1734 | mov dword [BASE-8], RD
1654 |.elif SSE 1735 |.else
1655 | movsd xmm0, qword [BASE+8] 1736 | movsd xmm0, qword [BASE+8]
1656 | sseconst_1 xmm1, RBa 1737 | sseconst_1 xmm1, RBa
1657 | addsd xmm0, xmm1 1738 | addsd xmm0, xmm1
1658 | cvtsd2si RD, xmm0 1739 | cvttsd2si RD, xmm0
1659 | movsd qword [BASE-8], xmm0 1740 | movsd qword [BASE-8], xmm0
1660 |.else
1661 | fld qword [BASE+8]
1662 | fld1
1663 | faddp st1
1664 | fist ARG1
1665 | fstp qword [BASE-8]
1666 | mov RD, ARG1
1667 |.endif 1741 |.endif
1668 | mov TAB:RB, [BASE] 1742 | mov TAB:RB, [BASE]
1669 | cmp RD, TAB:RB->asize; jae >2 // Not in array part? 1743 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
@@ -1681,7 +1755,9 @@ static void build_subroutines(BuildCtx *ctx)
1681 | mov [BASE], RB 1755 | mov [BASE], RB
1682 | mov [BASE+4], RD 1756 | mov [BASE+4], RD
1683 |.endif 1757 |.endif
1684 | jmp ->fff_res2 1758 |->fff_res2:
1759 | mov RD, 1+2
1760 | jmp ->fff_res
1685 |2: // Check for empty hash part first. Otherwise call C function. 1761 |2: // Check for empty hash part first. Otherwise call C function.
1686 | cmp dword TAB:RB->hmask, 0; je ->fff_res0 1762 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1687 | mov FCARG1, TAB:RB 1763 | mov FCARG1, TAB:RB
@@ -1710,12 +1786,9 @@ static void build_subroutines(BuildCtx *ctx)
1710 |.if DUALNUM 1786 |.if DUALNUM
1711 | mov dword [BASE+12], LJ_TISNUM 1787 | mov dword [BASE+12], LJ_TISNUM
1712 | mov dword [BASE+8], 0 1788 | mov dword [BASE+8], 0
1713 |.elif SSE 1789 |.else
1714 | xorps xmm0, xmm0 1790 | xorps xmm0, xmm0
1715 | movsd qword [BASE+8], xmm0 1791 | movsd qword [BASE+8], xmm0
1716 |.else
1717 | fldz
1718 | fstp qword [BASE+8]
1719 |.endif 1792 |.endif
1720 | mov RD, 1+3 1793 | mov RD, 1+3
1721 | jmp ->fff_res 1794 | jmp ->fff_res
@@ -1822,7 +1895,6 @@ static void build_subroutines(BuildCtx *ctx)
1822 | mov ARG3, RA 1895 | mov ARG3, RA
1823 |.endif 1896 |.endif
1824 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) 1897 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1825 | set_vmstate INTERP
1826 | 1898 |
1827 | mov L:RB, SAVE_L 1899 | mov L:RB, SAVE_L
1828 |.if X64 1900 |.if X64
@@ -1831,6 +1903,9 @@ static void build_subroutines(BuildCtx *ctx)
1831 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. 1903 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
1832 |.endif 1904 |.endif
1833 | mov BASE, L:RB->base 1905 | mov BASE, L:RB->base
1906 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1907 | set_vmstate INTERP
1908 |
1834 | cmp eax, LUA_YIELD 1909 | cmp eax, LUA_YIELD
1835 | ja >8 1910 | ja >8
1836 |4: 1911 |4:
@@ -1945,12 +2020,10 @@ static void build_subroutines(BuildCtx *ctx)
1945 |->fff_resi: // Dummy. 2020 |->fff_resi: // Dummy.
1946 |.endif 2021 |.endif
1947 | 2022 |
1948 |.if SSE
1949 |->fff_resn: 2023 |->fff_resn:
1950 | mov PC, [BASE-4] 2024 | mov PC, [BASE-4]
1951 | fstp qword [BASE-8] 2025 | fstp qword [BASE-8]
1952 | jmp ->fff_res1 2026 | jmp ->fff_res1
1953 |.endif
1954 | 2027 |
1955 | .ffunc_1 math_abs 2028 | .ffunc_1 math_abs
1956 |.if DUALNUM 2029 |.if DUALNUM
@@ -1974,8 +2047,6 @@ static void build_subroutines(BuildCtx *ctx)
1974 |.else 2047 |.else
1975 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2048 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1976 |.endif 2049 |.endif
1977 |
1978 |.if SSE
1979 | movsd xmm0, qword [BASE] 2050 | movsd xmm0, qword [BASE]
1980 | sseconst_abs xmm1, RDa 2051 | sseconst_abs xmm1, RDa
1981 | andps xmm0, xmm1 2052 | andps xmm0, xmm1
@@ -1983,15 +2054,6 @@ static void build_subroutines(BuildCtx *ctx)
1983 | mov PC, [BASE-4] 2054 | mov PC, [BASE-4]
1984 | movsd qword [BASE-8], xmm0 2055 | movsd qword [BASE-8], xmm0
1985 | // fallthrough 2056 | // fallthrough
1986 |.else
1987 | fld qword [BASE]
1988 | fabs
1989 | // fallthrough
1990 |->fff_resxmm0: // Dummy.
1991 |->fff_resn:
1992 | mov PC, [BASE-4]
1993 | fstp qword [BASE-8]
1994 |.endif
1995 | 2057 |
1996 |->fff_res1: 2058 |->fff_res1:
1997 | mov RD, 1+1 2059 | mov RD, 1+1
@@ -2018,6 +2080,12 @@ static void build_subroutines(BuildCtx *ctx)
2018 | mov RAa, -8 // Results start at BASE+RA = BASE-8. 2080 | mov RAa, -8 // Results start at BASE+RA = BASE-8.
2019 | jmp ->vm_return 2081 | jmp ->vm_return
2020 | 2082 |
2083 |.if X64
2084 |.define fff_resfp, fff_resxmm0
2085 |.else
2086 |.define fff_resfp, fff_resn
2087 |.endif
2088 |
2021 |.macro math_round, func 2089 |.macro math_round, func
2022 | .ffunc math_ .. func 2090 | .ffunc math_ .. func
2023 |.if DUALNUM 2091 |.if DUALNUM
@@ -2028,107 +2096,75 @@ static void build_subroutines(BuildCtx *ctx)
2028 |.else 2096 |.else
2029 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2097 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2030 |.endif 2098 |.endif
2031 |.if SSE
2032 | movsd xmm0, qword [BASE] 2099 | movsd xmm0, qword [BASE]
2033 | call ->vm_ .. func 2100 | call ->vm_ .. func .. _sse
2034 | .if DUALNUM 2101 |.if DUALNUM
2035 | cvtsd2si RB, xmm0 2102 | cvttsd2si RB, xmm0
2036 | cmp RB, 0x80000000 2103 | cmp RB, 0x80000000
2037 | jne ->fff_resi 2104 | jne ->fff_resi
2038 | cvtsi2sd xmm1, RB 2105 | cvtsi2sd xmm1, RB
2039 | ucomisd xmm0, xmm1 2106 | ucomisd xmm0, xmm1
2040 | jp ->fff_resxmm0 2107 | jp ->fff_resxmm0
2041 | je ->fff_resi 2108 | je ->fff_resi
2042 | .endif
2043 | jmp ->fff_resxmm0
2044 |.else
2045 | fld qword [BASE]
2046 | call ->vm_ .. func
2047 | .if DUALNUM
2048 | fist ARG1
2049 | mov RB, ARG1
2050 | cmp RB, 0x80000000; jne >2
2051 | fdup
2052 | fild ARG1
2053 | fcomparepp
2054 | jp ->fff_resn
2055 | jne ->fff_resn
2056 |2:
2057 | fpop
2058 | jmp ->fff_resi
2059 | .else
2060 | jmp ->fff_resn
2061 | .endif
2062 |.endif 2109 |.endif
2110 | jmp ->fff_resxmm0
2063 |.endmacro 2111 |.endmacro
2064 | 2112 |
2065 | math_round floor 2113 | math_round floor
2066 | math_round ceil 2114 | math_round ceil
2067 | 2115 |
2068 |.if SSE
2069 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 2116 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
2070 |.else
2071 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
2072 |.endif
2073 | 2117 |
2074 |.ffunc math_log 2118 |.ffunc math_log
2075 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 2119 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
2076 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2120 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2077 | fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn 2121 | movsd xmm0, qword [BASE]
2078 | 2122 |.if not X64
2079 |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn 2123 | movsd FPARG1, xmm0
2080 |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn 2124 |.endif
2081 | 2125 | mov RB, BASE
2082 |.ffunc_n math_sin; fsin; jmp ->fff_resn 2126 | call extern log
2083 |.ffunc_n math_cos; fcos; jmp ->fff_resn 2127 | mov BASE, RB
2084 |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn 2128 | jmp ->fff_resfp
2085 |
2086 |.ffunc_n math_asin
2087 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan
2088 | jmp ->fff_resn
2089 |.ffunc_n math_acos
2090 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan
2091 | jmp ->fff_resn
2092 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
2093 | 2129 |
2094 |.macro math_extern, func 2130 |.macro math_extern, func
2095 |.if SSE
2096 | .ffunc_nsse math_ .. func 2131 | .ffunc_nsse math_ .. func
2097 | .if not X64 2132 |.if not X64
2098 | movsd FPARG1, xmm0 2133 | movsd FPARG1, xmm0
2099 | .endif
2100 |.else
2101 | .ffunc_n math_ .. func
2102 | fstp FPARG1
2103 |.endif 2134 |.endif
2104 | mov RB, BASE 2135 | mov RB, BASE
2105 | call extern lj_vm_ .. func 2136 | call extern func
2106 | mov BASE, RB 2137 | mov BASE, RB
2107 | .if X64 2138 | jmp ->fff_resfp
2108 | jmp ->fff_resxmm0
2109 | .else
2110 | jmp ->fff_resn
2111 | .endif
2112 |.endmacro 2139 |.endmacro
2113 | 2140 |
2141 |.macro math_extern2, func
2142 | .ffunc_nnsse math_ .. func
2143 |.if not X64
2144 | movsd FPARG1, xmm0
2145 | movsd FPARG3, xmm1
2146 |.endif
2147 | mov RB, BASE
2148 | call extern func
2149 | mov BASE, RB
2150 | jmp ->fff_resfp
2151 |.endmacro
2152 |
2153 | math_extern log10
2154 | math_extern exp
2155 | math_extern sin
2156 | math_extern cos
2157 | math_extern tan
2158 | math_extern asin
2159 | math_extern acos
2160 | math_extern atan
2114 | math_extern sinh 2161 | math_extern sinh
2115 | math_extern cosh 2162 | math_extern cosh
2116 | math_extern tanh 2163 | math_extern tanh
2164 | math_extern2 pow
2165 | math_extern2 atan2
2166 | math_extern2 fmod
2117 | 2167 |
2118 |->ff_math_deg:
2119 |.if SSE
2120 |.ffunc_nsse math_rad
2121 | mov CFUNC:RB, [BASE-8]
2122 | mulsd xmm0, qword CFUNC:RB->upvalue[0]
2123 | jmp ->fff_resxmm0
2124 |.else
2125 |.ffunc_n math_rad
2126 | mov CFUNC:RB, [BASE-8]
2127 | fmul qword CFUNC:RB->upvalue[0]
2128 | jmp ->fff_resn
2129 |.endif
2130 |
2131 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
2132 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn 2168 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
2133 | 2169 |
2134 |.ffunc_1 math_frexp 2170 |.ffunc_1 math_frexp
@@ -2143,65 +2179,34 @@ static void build_subroutines(BuildCtx *ctx)
2143 | cmp RB, 0x00200000; jb >4 2179 | cmp RB, 0x00200000; jb >4
2144 |1: 2180 |1:
2145 | shr RB, 21; sub RB, RC // Extract and unbias exponent. 2181 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
2146 |.if SSE
2147 | cvtsi2sd xmm0, RB 2182 | cvtsi2sd xmm0, RB
2148 |.else
2149 | mov TMP1, RB; fild TMP1
2150 |.endif
2151 | mov RB, [BASE-4] 2183 | mov RB, [BASE-4]
2152 | and RB, 0x800fffff // Mask off exponent. 2184 | and RB, 0x800fffff // Mask off exponent.
2153 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. 2185 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
2154 | mov [BASE-4], RB 2186 | mov [BASE-4], RB
2155 |2: 2187 |2:
2156 |.if SSE
2157 | movsd qword [BASE], xmm0 2188 | movsd qword [BASE], xmm0
2158 |.else
2159 | fstp qword [BASE]
2160 |.endif
2161 | mov RD, 1+2 2189 | mov RD, 1+2
2162 | jmp ->fff_res 2190 | jmp ->fff_res
2163 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. 2191 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
2164 |.if SSE
2165 | xorps xmm0, xmm0; jmp <2 2192 | xorps xmm0, xmm0; jmp <2
2166 |.else
2167 | fldz; jmp <2
2168 |.endif
2169 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. 2193 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
2170 |.if SSE
2171 | movsd xmm0, qword [BASE] 2194 | movsd xmm0, qword [BASE]
2172 | sseconst_hi xmm1, RBa, 43500000 // 2^54. 2195 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
2173 | mulsd xmm0, xmm1 2196 | mulsd xmm0, xmm1
2174 | movsd qword [BASE-8], xmm0 2197 | movsd qword [BASE-8], xmm0
2175 |.else
2176 | fld qword [BASE]
2177 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
2178 | fstp qword [BASE-8]
2179 |.endif
2180 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 2198 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2181 | 2199 |
2182 |.if SSE
2183 |.ffunc_nsse math_modf 2200 |.ffunc_nsse math_modf
2184 |.else
2185 |.ffunc_n math_modf
2186 |.endif
2187 | mov RB, [BASE+4] 2201 | mov RB, [BASE+4]
2188 | mov PC, [BASE-4] 2202 | mov PC, [BASE-4]
2189 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? 2203 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
2190 |.if SSE
2191 | movaps xmm4, xmm0 2204 | movaps xmm4, xmm0
2192 | call ->vm_trunc 2205 | call ->vm_trunc_sse
2193 | subsd xmm4, xmm0 2206 | subsd xmm4, xmm0
2194 |1: 2207 |1:
2195 | movsd qword [BASE-8], xmm0 2208 | movsd qword [BASE-8], xmm0
2196 | movsd qword [BASE], xmm4 2209 | movsd qword [BASE], xmm4
2197 |.else
2198 | fdup
2199 | call ->vm_trunc
2200 | fsub st1, st0
2201 |1:
2202 | fstp qword [BASE-8]
2203 | fstp qword [BASE]
2204 |.endif
2205 | mov RC, [BASE-4]; mov RB, [BASE+4] 2210 | mov RC, [BASE-4]; mov RB, [BASE+4]
2206 | xor RC, RB; js >3 // Need to adjust sign? 2211 | xor RC, RB; js >3 // Need to adjust sign?
2207 |2: 2212 |2:
@@ -2211,25 +2216,10 @@ static void build_subroutines(BuildCtx *ctx)
2211 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. 2216 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
2212 | jmp <2 2217 | jmp <2
2213 |4: 2218 |4:
2214 |.if SSE
2215 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. 2219 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
2216 |.else
2217 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
2218 |.endif
2219 |
2220 |.ffunc_nnr math_fmod
2221 |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1
2222 | fpop1
2223 | jmp ->fff_resn
2224 |
2225 |.if SSE
2226 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
2227 |.else
2228 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
2229 |.endif
2230 | 2220 |
2231 |.macro math_minmax, name, cmovop, fcmovop, sseop 2221 |.macro math_minmax, name, cmovop, sseop
2232 | .ffunc name 2222 | .ffunc_1 name
2233 | mov RA, 2 2223 | mov RA, 2
2234 | cmp dword [BASE+4], LJ_TISNUM 2224 | cmp dword [BASE+4], LJ_TISNUM
2235 |.if DUALNUM 2225 |.if DUALNUM
@@ -2245,12 +2235,7 @@ static void build_subroutines(BuildCtx *ctx)
2245 |3: 2235 |3:
2246 | ja ->fff_fallback 2236 | ja ->fff_fallback
2247 | // Convert intermediate result to number and continue below. 2237 | // Convert intermediate result to number and continue below.
2248 |.if SSE
2249 | cvtsi2sd xmm0, RB 2238 | cvtsi2sd xmm0, RB
2250 |.else
2251 | mov TMP1, RB
2252 | fild TMP1
2253 |.endif
2254 | jmp >6 2239 | jmp >6
2255 |4: 2240 |4:
2256 | ja ->fff_fallback 2241 | ja ->fff_fallback
@@ -2258,7 +2243,6 @@ static void build_subroutines(BuildCtx *ctx)
2258 | jae ->fff_fallback 2243 | jae ->fff_fallback
2259 |.endif 2244 |.endif
2260 | 2245 |
2261 |.if SSE
2262 | movsd xmm0, qword [BASE] 2246 | movsd xmm0, qword [BASE]
2263 |5: // Handle numbers or integers. 2247 |5: // Handle numbers or integers.
2264 | cmp RA, RD; jae ->fff_resxmm0 2248 | cmp RA, RD; jae ->fff_resxmm0
@@ -2277,48 +2261,13 @@ static void build_subroutines(BuildCtx *ctx)
2277 | sseop xmm0, xmm1 2261 | sseop xmm0, xmm1
2278 | add RA, 1 2262 | add RA, 1
2279 | jmp <5 2263 | jmp <5
2280 |.else
2281 | fld qword [BASE]
2282 |5: // Handle numbers or integers.
2283 | cmp RA, RD; jae ->fff_resn
2284 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2285 |.if DUALNUM
2286 | jb >6
2287 | ja >9
2288 | fild dword [BASE+RA*8-8]
2289 | jmp >7
2290 |.else
2291 | jae >9
2292 |.endif
2293 |6:
2294 | fld qword [BASE+RA*8-8]
2295 |7:
2296 | fucomi st1; fcmovop st1; fpop1
2297 | add RA, 1
2298 | jmp <5
2299 |.endif
2300 |.endmacro 2264 |.endmacro
2301 | 2265 |
2302 | math_minmax math_min, cmovg, fcmovnbe, minsd 2266 | math_minmax math_min, cmovg, minsd
2303 | math_minmax math_max, cmovl, fcmovbe, maxsd 2267 | math_minmax math_max, cmovl, maxsd
2304 |.if not SSE
2305 |9:
2306 | fpop; jmp ->fff_fallback
2307 |.endif
2308 | 2268 |
2309 |//-- String library ----------------------------------------------------- 2269 |//-- String library -----------------------------------------------------
2310 | 2270 |
2311 |.ffunc_1 string_len
2312 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2313 | mov STR:RB, [BASE]
2314 |.if DUALNUM
2315 | mov RB, dword STR:RB->len; jmp ->fff_resi
2316 |.elif SSE
2317 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2318 |.else
2319 | fild dword STR:RB->len; jmp ->fff_resn
2320 |.endif
2321 |
2322 |.ffunc string_byte // Only handle the 1-arg case here. 2271 |.ffunc string_byte // Only handle the 1-arg case here.
2323 | cmp NARGS:RD, 1+1; jne ->fff_fallback 2272 | cmp NARGS:RD, 1+1; jne ->fff_fallback
2324 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2273 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2329,10 +2278,8 @@ static void build_subroutines(BuildCtx *ctx)
2329 | movzx RB, byte STR:RB[1] 2278 | movzx RB, byte STR:RB[1]
2330 |.if DUALNUM 2279 |.if DUALNUM
2331 | jmp ->fff_resi 2280 | jmp ->fff_resi
2332 |.elif SSE
2333 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2334 |.else 2281 |.else
2335 | mov TMP1, RB; fild TMP1; jmp ->fff_resn 2282 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2336 |.endif 2283 |.endif
2337 | 2284 |
2338 |.ffunc string_char // Only handle the 1-arg case here. 2285 |.ffunc string_char // Only handle the 1-arg case here.
@@ -2344,16 +2291,11 @@ static void build_subroutines(BuildCtx *ctx)
2344 | mov RB, dword [BASE] 2291 | mov RB, dword [BASE]
2345 | cmp RB, 255; ja ->fff_fallback 2292 | cmp RB, 255; ja ->fff_fallback
2346 | mov TMP2, RB 2293 | mov TMP2, RB
2347 |.elif SSE 2294 |.else
2348 | jae ->fff_fallback 2295 | jae ->fff_fallback
2349 | cvttsd2si RB, qword [BASE] 2296 | cvttsd2si RB, qword [BASE]
2350 | cmp RB, 255; ja ->fff_fallback 2297 | cmp RB, 255; ja ->fff_fallback
2351 | mov TMP2, RB 2298 | mov TMP2, RB
2352 |.else
2353 | jae ->fff_fallback
2354 | fld qword [BASE]
2355 | fistp TMP2
2356 | cmp TMP2, 255; ja ->fff_fallback
2357 |.endif 2299 |.endif
2358 |.if X64 2300 |.if X64
2359 | mov TMP3, 1 2301 | mov TMP3, 1
@@ -2374,6 +2316,7 @@ static void build_subroutines(BuildCtx *ctx)
2374 |.endif 2316 |.endif
2375 | mov SAVE_PC, PC 2317 | mov SAVE_PC, PC
2376 | call extern lj_str_new // (lua_State *L, char *str, size_t l) 2318 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
2319 |->fff_resstr:
2377 | // GCstr * returned in eax (RD). 2320 | // GCstr * returned in eax (RD).
2378 | mov BASE, L:RB->base 2321 | mov BASE, L:RB->base
2379 | mov PC, [BASE-4] 2322 | mov PC, [BASE-4]
@@ -2391,14 +2334,10 @@ static void build_subroutines(BuildCtx *ctx)
2391 | jne ->fff_fallback 2334 | jne ->fff_fallback
2392 | mov RB, dword [BASE+16] 2335 | mov RB, dword [BASE+16]
2393 | mov TMP2, RB 2336 | mov TMP2, RB
2394 |.elif SSE 2337 |.else
2395 | jae ->fff_fallback 2338 | jae ->fff_fallback
2396 | cvttsd2si RB, qword [BASE+16] 2339 | cvttsd2si RB, qword [BASE+16]
2397 | mov TMP2, RB 2340 | mov TMP2, RB
2398 |.else
2399 | jae ->fff_fallback
2400 | fld qword [BASE+16]
2401 | fistp TMP2
2402 |.endif 2341 |.endif
2403 |1: 2342 |1:
2404 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2343 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2413,12 +2352,8 @@ static void build_subroutines(BuildCtx *ctx)
2413 | mov RB, STR:RB->len 2352 | mov RB, STR:RB->len
2414 |.if DUALNUM 2353 |.if DUALNUM
2415 | mov RA, dword [BASE+8] 2354 | mov RA, dword [BASE+8]
2416 |.elif SSE
2417 | cvttsd2si RA, qword [BASE+8]
2418 |.else 2355 |.else
2419 | fld qword [BASE+8] 2356 | cvttsd2si RA, qword [BASE+8]
2420 | fistp ARG3
2421 | mov RA, ARG3
2422 |.endif 2357 |.endif
2423 | mov RC, TMP2 2358 | mov RC, TMP2
2424 | cmp RB, RC // len < end? (unsigned compare) 2359 | cmp RB, RC // len < end? (unsigned compare)
@@ -2462,136 +2397,34 @@ static void build_subroutines(BuildCtx *ctx)
2462 | xor RC, RC // Zero length. Any ptr in RB is ok. 2397 | xor RC, RC // Zero length. Any ptr in RB is ok.
2463 | jmp <4 2398 | jmp <4
2464 | 2399 |
2465 |.ffunc string_rep // Only handle the 1-char case inline. 2400 |.macro ffstring_op, name
2466 | ffgccheck 2401 | .ffunc_1 string_ .. name
2467 | cmp NARGS:RD, 2+1; jne ->fff_fallback // Exactly 2 arguments.
2468 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2469 | cmp dword [BASE+12], LJ_TISNUM
2470 | mov STR:RB, [BASE]
2471 |.if DUALNUM
2472 | jne ->fff_fallback
2473 | mov RC, dword [BASE+8]
2474 |.elif SSE
2475 | jae ->fff_fallback
2476 | cvttsd2si RC, qword [BASE+8]
2477 |.else
2478 | jae ->fff_fallback
2479 | fld qword [BASE+8]
2480 | fistp TMP2
2481 | mov RC, TMP2
2482 |.endif
2483 | test RC, RC
2484 | jle ->fff_emptystr // Count <= 0? (or non-int)
2485 | cmp dword STR:RB->len, 1
2486 | jb ->fff_emptystr // Zero length string?
2487 | jne ->fff_fallback_2 // Fallback for > 1-char strings.
2488 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
2489 | movzx RA, byte STR:RB[1]
2490 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2491 |.if X64
2492 | mov TMP3, RC
2493 |.else
2494 | mov ARG3, RC
2495 |.endif
2496 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2497 | mov [RB], RAL
2498 | add RB, 1
2499 | sub RC, 1
2500 | jnz <1
2501 | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2502 | jmp ->fff_newstr
2503 |
2504 |.ffunc_1 string_reverse
2505 | ffgccheck 2402 | ffgccheck
2506 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2403 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2507 | mov STR:RB, [BASE] 2404 | mov L:RB, SAVE_L
2508 | mov RC, STR:RB->len 2405 | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2509 | test RC, RC 2406 | mov L:RB->base, BASE
2510 | jz ->fff_emptystr // Zero length string? 2407 | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
2511 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 2408 | mov RCa, SBUF:FCARG1->b
2512 | add RB, #STR 2409 | mov SBUF:FCARG1->L, L:RB
2513 | mov TMP2, PC // Need another temp register. 2410 | mov SBUF:FCARG1->w, RCa
2514 |.if X64 2411 | mov SAVE_PC, PC
2515 | mov TMP3, RC 2412 | call extern lj_buf_putstr_ .. name .. @8
2516 |.else 2413 | mov FCARG1, eax
2517 | mov ARG3, RC 2414 | call extern lj_buf_tostr@4
2518 |.endif 2415 | jmp ->fff_resstr
2519 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2520 |1:
2521 | movzx RA, byte [RB]
2522 | add RB, 1
2523 | sub RC, 1
2524 | mov [PC+RC], RAL
2525 | jnz <1
2526 | mov RD, PC
2527 | mov PC, TMP2
2528 | jmp ->fff_newstr
2529 |
2530 |.macro ffstring_case, name, lo, hi
2531 | .ffunc_1 name
2532 | ffgccheck
2533 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2534 | mov STR:RB, [BASE]
2535 | mov RC, STR:RB->len
2536 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2537 | add RB, #STR
2538 | mov TMP2, PC // Need another temp register.
2539 |.if X64
2540 | mov TMP3, RC
2541 |.else
2542 | mov ARG3, RC
2543 |.endif
2544 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2545 | jmp >3
2546 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
2547 | movzx RA, byte [RB+RC]
2548 | cmp RA, lo
2549 | jb >2
2550 | cmp RA, hi
2551 | ja >2
2552 | xor RA, 0x20
2553 |2:
2554 | mov [PC+RC], RAL
2555 |3:
2556 | sub RC, 1
2557 | jns <1
2558 | mov RD, PC
2559 | mov PC, TMP2
2560 | jmp ->fff_newstr
2561 |.endmacro 2416 |.endmacro
2562 | 2417 |
2563 |ffstring_case string_lower, 0x41, 0x5a 2418 |ffstring_op reverse
2564 |ffstring_case string_upper, 0x61, 0x7a 2419 |ffstring_op lower
2565 | 2420 |ffstring_op upper
2566 |//-- Table library ------------------------------------------------------
2567 |
2568 |.ffunc_1 table_getn
2569 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
2570 | mov RB, BASE // Save BASE.
2571 | mov TAB:FCARG1, [BASE]
2572 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2573 | // Length of table returned in eax (RD).
2574 | mov BASE, RB // Restore BASE.
2575 |.if DUALNUM
2576 | mov RB, RD; jmp ->fff_resi
2577 |.elif SSE
2578 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2579 |.else
2580 | mov ARG1, RD; fild ARG1; jmp ->fff_resn
2581 |.endif
2582 | 2421 |
2583 |//-- Bit library -------------------------------------------------------- 2422 |//-- Bit library --------------------------------------------------------
2584 | 2423 |
2585 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
2586 |
2587 |.macro .ffunc_bit, name, kind, fdef 2424 |.macro .ffunc_bit, name, kind, fdef
2588 | fdef name 2425 | fdef name
2589 |.if kind == 2 2426 |.if kind == 2
2590 |.if SSE
2591 | sseconst_tobit xmm1, RBa 2427 | sseconst_tobit xmm1, RBa
2592 |.else
2593 | mov TMP1, TOBIT_BIAS
2594 |.endif
2595 |.endif 2428 |.endif
2596 | cmp dword [BASE+4], LJ_TISNUM 2429 | cmp dword [BASE+4], LJ_TISNUM
2597 |.if DUALNUM 2430 |.if DUALNUM
@@ -2607,24 +2440,12 @@ static void build_subroutines(BuildCtx *ctx)
2607 |.else 2440 |.else
2608 | jae ->fff_fallback 2441 | jae ->fff_fallback
2609 |.endif 2442 |.endif
2610 |.if SSE
2611 | movsd xmm0, qword [BASE] 2443 | movsd xmm0, qword [BASE]
2612 |.if kind < 2 2444 |.if kind < 2
2613 | sseconst_tobit xmm1, RBa 2445 | sseconst_tobit xmm1, RBa
2614 |.endif 2446 |.endif
2615 | addsd xmm0, xmm1 2447 | addsd xmm0, xmm1
2616 | movd RB, xmm0 2448 | movd RB, xmm0
2617 |.else
2618 | fld qword [BASE]
2619 |.if kind < 2
2620 | mov TMP1, TOBIT_BIAS
2621 |.endif
2622 | fadd TMP1
2623 | fstp FPARG1
2624 |.if kind > 0
2625 | mov RB, ARG1
2626 |.endif
2627 |.endif
2628 |2: 2449 |2:
2629 |.endmacro 2450 |.endmacro
2630 | 2451 |
@@ -2633,15 +2454,7 @@ static void build_subroutines(BuildCtx *ctx)
2633 |.endmacro 2454 |.endmacro
2634 | 2455 |
2635 |.ffunc_bit bit_tobit, 0 2456 |.ffunc_bit bit_tobit, 0
2636 |.if DUALNUM or SSE
2637 |.if not SSE
2638 | mov RB, ARG1
2639 |.endif
2640 | jmp ->fff_resbit 2457 | jmp ->fff_resbit
2641 |.else
2642 | fild ARG1
2643 | jmp ->fff_resn
2644 |.endif
2645 | 2458 |
2646 |.macro .ffunc_bit_op, name, ins 2459 |.macro .ffunc_bit_op, name, ins
2647 | .ffunc_bit name, 2 2460 | .ffunc_bit name, 2
@@ -2661,17 +2474,10 @@ static void build_subroutines(BuildCtx *ctx)
2661 |.else 2474 |.else
2662 | jae ->fff_fallback_bit_op 2475 | jae ->fff_fallback_bit_op
2663 |.endif 2476 |.endif
2664 |.if SSE
2665 | movsd xmm0, qword [RD] 2477 | movsd xmm0, qword [RD]
2666 | addsd xmm0, xmm1 2478 | addsd xmm0, xmm1
2667 | movd RA, xmm0 2479 | movd RA, xmm0
2668 | ins RB, RA 2480 | ins RB, RA
2669 |.else
2670 | fld qword [RD]
2671 | fadd TMP1
2672 | fstp FPARG1
2673 | ins RB, ARG1
2674 |.endif
2675 | sub RD, 8 2481 | sub RD, 8
2676 | jmp <1 2482 | jmp <1
2677 |.endmacro 2483 |.endmacro
@@ -2688,15 +2494,10 @@ static void build_subroutines(BuildCtx *ctx)
2688 | not RB 2494 | not RB
2689 |.if DUALNUM 2495 |.if DUALNUM
2690 | jmp ->fff_resbit 2496 | jmp ->fff_resbit
2691 |.elif SSE 2497 |.else
2692 |->fff_resbit: 2498 |->fff_resbit:
2693 | cvtsi2sd xmm0, RB 2499 | cvtsi2sd xmm0, RB
2694 | jmp ->fff_resxmm0 2500 | jmp ->fff_resxmm0
2695 |.else
2696 |->fff_resbit:
2697 | mov ARG1, RB
2698 | fild ARG1
2699 | jmp ->fff_resn
2700 |.endif 2501 |.endif
2701 | 2502 |
2702 |->fff_fallback_bit_op: 2503 |->fff_fallback_bit_op:
@@ -2709,22 +2510,13 @@ static void build_subroutines(BuildCtx *ctx)
2709 | // Note: no inline conversion from number for 2nd argument! 2510 | // Note: no inline conversion from number for 2nd argument!
2710 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback 2511 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2711 | mov RA, dword [BASE+8] 2512 | mov RA, dword [BASE+8]
2712 |.elif SSE 2513 |.else
2713 | .ffunc_nnsse name 2514 | .ffunc_nnsse name
2714 | sseconst_tobit xmm2, RBa 2515 | sseconst_tobit xmm2, RBa
2715 | addsd xmm0, xmm2 2516 | addsd xmm0, xmm2
2716 | addsd xmm1, xmm2 2517 | addsd xmm1, xmm2
2717 | movd RB, xmm0 2518 | movd RB, xmm0
2718 | movd RA, xmm1 2519 | movd RA, xmm1
2719 |.else
2720 | .ffunc_nn name
2721 | mov TMP1, TOBIT_BIAS
2722 | fadd TMP1
2723 | fstp FPARG3
2724 | fadd TMP1
2725 | fstp FPARG1
2726 | mov RA, ARG3
2727 | mov RB, ARG1
2728 |.endif 2520 |.endif
2729 | ins RB, cl // Assumes RA is ecx. 2521 | ins RB, cl // Assumes RA is ecx.
2730 | jmp ->fff_resbit 2522 | jmp ->fff_resbit
@@ -2858,7 +2650,7 @@ static void build_subroutines(BuildCtx *ctx)
2858 | mov FCARG2, PC // Caveat: FCARG2 == BASE 2650 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2859 | mov FCARG1, L:RB 2651 | mov FCARG1, L:RB
2860 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. 2652 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2861 | call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc) 2653 | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc)
2862 |3: 2654 |3:
2863 | mov BASE, L:RB->base 2655 | mov BASE, L:RB->base
2864 |4: 2656 |4:
@@ -2929,6 +2721,79 @@ static void build_subroutines(BuildCtx *ctx)
2929 | add NARGS:RD, 1 2721 | add NARGS:RD, 1
2930 | jmp RBa 2722 | jmp RBa
2931 | 2723 |
2724 |->cont_stitch: // Trace stitching.
2725 |.if JIT
2726 | // BASE = base, RC = result, RB = mbase
2727 | mov TRACE:RA, [RB-24] // Save previous trace.
2728 | mov TMP1, TRACE:RA
2729 | mov TMP3, DISPATCH // Need one more register.
2730 | mov DISPATCH, MULTRES
2731 | movzx RA, PC_RA
2732 | lea RA, [BASE+RA*8] // Call base.
2733 | sub DISPATCH, 1
2734 | jz >2
2735 |1: // Move results down.
2736 |.if X64
2737 | mov RBa, [RC]
2738 | mov [RA], RBa
2739 |.else
2740 | mov RB, [RC]
2741 | mov [RA], RB
2742 | mov RB, [RC+4]
2743 | mov [RA+4], RB
2744 |.endif
2745 | add RC, 8
2746 | add RA, 8
2747 | sub DISPATCH, 1
2748 | jnz <1
2749 |2:
2750 | movzx RC, PC_RA
2751 | movzx RB, PC_RB
2752 | add RC, RB
2753 | lea RC, [BASE+RC*8-8]
2754 |3:
2755 | cmp RC, RA
2756 | ja >9 // More results wanted?
2757 |
2758 | mov DISPATCH, TMP3
2759 | mov TRACE:RD, TMP1 // Get previous trace.
2760 | movzx RB, word TRACE:RD->traceno
2761 | movzx RD, word TRACE:RD->link
2762 | cmp RD, RB
2763 | je ->cont_nop // Blacklisted.
2764 | test RD, RD
2765 | jne =>BC_JLOOP // Jump to stitched trace.
2766 |
2767 | // Stitch a new trace to the previous trace.
2768 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2769 | mov L:RB, SAVE_L
2770 | mov L:RB->base, BASE
2771 | mov FCARG2, PC
2772 | lea FCARG1, [DISPATCH+GG_DISP2J]
2773 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2774 | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc)
2775 | mov BASE, L:RB->base
2776 | jmp ->cont_nop
2777 |
2778 |9: // Fill up results with nil.
2779 | mov dword [RA+4], LJ_TNIL
2780 | add RA, 8
2781 | jmp <3
2782 |.endif
2783 |
2784 |->vm_profhook: // Dispatch target for profiler hook.
2785#if LJ_HASPROFILE
2786 | mov L:RB, SAVE_L
2787 | mov L:RB->base, BASE
2788 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2789 | mov FCARG1, L:RB
2790 | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc)
2791 | mov BASE, L:RB->base
2792 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2793 | sub PC, 4
2794 | jmp ->cont_nop
2795#endif
2796 |
2932 |//----------------------------------------------------------------------- 2797 |//-----------------------------------------------------------------------
2933 |//-- Trace exit handler ------------------------------------------------- 2798 |//-- Trace exit handler -------------------------------------------------
2934 |//----------------------------------------------------------------------- 2799 |//-----------------------------------------------------------------------
@@ -2981,10 +2846,9 @@ static void build_subroutines(BuildCtx *ctx)
2981 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 2846 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
2982 |.endif 2847 |.endif
2983 | // Caveat: RB is ebp. 2848 | // Caveat: RB is ebp.
2984 | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)] 2849 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2985 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] 2850 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2986 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa 2851 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2987 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
2988 | mov L:RB->base, BASE 2852 | mov L:RB->base, BASE
2989 |.if X64WIN 2853 |.if X64WIN
2990 | lea CARG2, [rsp+4*8] 2854 | lea CARG2, [rsp+4*8]
@@ -2994,6 +2858,7 @@ static void build_subroutines(BuildCtx *ctx)
2994 | lea FCARG2, [esp+16] 2858 | lea FCARG2, [esp+16]
2995 |.endif 2859 |.endif
2996 | lea FCARG1, [DISPATCH+GG_DISP2J] 2860 | lea FCARG1, [DISPATCH+GG_DISP2J]
2861 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2997 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) 2862 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
2998 | // MULTRES or negated error code returned in eax (RD). 2863 | // MULTRES or negated error code returned in eax (RD).
2999 | mov RAa, L:RB->cframe 2864 | mov RAa, L:RB->cframe
@@ -3040,12 +2905,14 @@ static void build_subroutines(BuildCtx *ctx)
3040 | mov r13, TMPa 2905 | mov r13, TMPa
3041 | mov r12, TMPQ 2906 | mov r12, TMPQ
3042 |.endif 2907 |.endif
3043 | test RD, RD; js >3 // Check for error from exit. 2908 | test RD, RD; js >9 // Check for error from exit.
2909 | mov L:RB, SAVE_L
3044 | mov MULTRES, RD 2910 | mov MULTRES, RD
3045 | mov LFUNC:KBASE, [BASE-8] 2911 | mov LFUNC:KBASE, [BASE-8]
3046 | mov KBASE, LFUNC:KBASE->pc 2912 | mov KBASE, LFUNC:KBASE->pc
3047 | mov KBASE, [KBASE+PC2PROTO(k)] 2913 | mov KBASE, [KBASE+PC2PROTO(k)]
3048 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 2914 | mov L:RB->base, BASE
2915 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
3049 | set_vmstate INTERP 2916 | set_vmstate INTERP
3050 | // Modified copy of ins_next which handles function header dispatch, too. 2917 | // Modified copy of ins_next which handles function header dispatch, too.
3051 | mov RC, [PC] 2918 | mov RC, [PC]
@@ -3054,18 +2921,35 @@ static void build_subroutines(BuildCtx *ctx)
3054 | add PC, 4 2921 | add PC, 4
3055 | shr RC, 16 2922 | shr RC, 16
3056 | cmp OP, BC_FUNCF // Function header? 2923 | cmp OP, BC_FUNCF // Function header?
3057 | jb >2 2924 | jb >3
3058 | mov RC, MULTRES // RC/RD holds nres+1. 2925 | cmp OP, BC_FUNCC+2 // Fast function?
2926 | jae >4
3059 |2: 2927 |2:
2928 | mov RC, MULTRES // RC/RD holds nres+1.
2929 |3:
3060 |.if X64 2930 |.if X64
3061 | jmp aword [DISPATCH+OP*8] 2931 | jmp aword [DISPATCH+OP*8]
3062 |.else 2932 |.else
3063 | jmp aword [DISPATCH+OP*4] 2933 | jmp aword [DISPATCH+OP*4]
3064 |.endif 2934 |.endif
3065 | 2935 |
3066 |3: // Rethrow error from the right C frame. 2936 |4: // Check frame below fast function.
2937 | mov RC, [BASE-4]
2938 | test RC, FRAME_TYPE
2939 | jnz <2 // Trace stitching continuation?
2940 | // Otherwise set KBASE for Lua function below fast function.
2941 | movzx RC, byte [RC-3]
2942 | not RCa
2943 | mov LFUNC:KBASE, [BASE+RC*8-8]
2944 | mov KBASE, LFUNC:KBASE->pc
2945 | mov KBASE, [KBASE+PC2PROTO(k)]
2946 | jmp <2
2947 |
2948 |9: // Rethrow error from the right C frame.
2949 | mov FCARG2, RD
3067 | mov FCARG1, L:RB 2950 | mov FCARG1, L:RB
3068 | call extern lj_err_run@4 // (lua_State *L) 2951 | neg FCARG2
2952 | call extern lj_err_trace@8 // (lua_State *L, int errcode)
3069 |.endif 2953 |.endif
3070 | 2954 |
3071 |//----------------------------------------------------------------------- 2955 |//-----------------------------------------------------------------------
@@ -3073,27 +2957,18 @@ static void build_subroutines(BuildCtx *ctx)
3073 |//----------------------------------------------------------------------- 2957 |//-----------------------------------------------------------------------
3074 | 2958 |
3075 |// FP value rounding. Called by math.floor/math.ceil fast functions 2959 |// FP value rounding. Called by math.floor/math.ceil fast functions
3076 |// and from JIT code. 2960 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3077 | 2961 |.macro vm_round, name, mode, cond
3078 |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. 2962 |->name:
3079 |.macro vm_round_x87, mode1, mode2 2963 |.if not X64 and cond
3080 | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2. 2964 | movsd xmm0, qword [esp+4]
3081 | mov [esp+8], eax 2965 | call ->name .. _sse
3082 | mov ax, mode1 2966 | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
3083 | or ax, [esp+4] 2967 | fld qword [esp+4]
3084 |.if mode2 ~= 0xffff
3085 | and ax, mode2
3086 |.endif
3087 | mov [esp+6], ax
3088 | fldcw word [esp+6]
3089 | frndint
3090 | fldcw word [esp+4]
3091 | mov eax, [esp+8]
3092 | ret 2968 | ret
3093 |.endmacro 2969 |.endif
3094 | 2970 |
3095 |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. 2971 |->name .. _sse:
3096 |.macro vm_round_sse, mode
3097 | sseconst_abs xmm2, RDa 2972 | sseconst_abs xmm2, RDa
3098 | sseconst_2p52 xmm3, RDa 2973 | sseconst_2p52 xmm3, RDa
3099 | movaps xmm1, xmm0 2974 | movaps xmm1, xmm0
@@ -3129,22 +3004,12 @@ static void build_subroutines(BuildCtx *ctx)
3129 | ret 3004 | ret
3130 |.endmacro 3005 |.endmacro
3131 | 3006 |
3132 |.macro vm_round, name, ssemode, mode1, mode2 3007 | vm_round vm_floor, 0, 1
3133 |->name: 3008 | vm_round vm_ceil, 1, JIT
3134 |.if not SSE 3009 | vm_round vm_trunc, 2, JIT
3135 | vm_round_x87 mode1, mode2
3136 |.endif
3137 |->name .. _sse:
3138 | vm_round_sse ssemode
3139 |.endmacro
3140 |
3141 | vm_round vm_floor, 0, 0x0400, 0xf7ff
3142 | vm_round vm_ceil, 1, 0x0800, 0xfbff
3143 | vm_round vm_trunc, 2, 0x0c00, 0xffff
3144 | 3010 |
3145 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 3011 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
3146 |->vm_mod: 3012 |->vm_mod:
3147 |.if SSE
3148 |// Args in xmm0/xmm1, return value in xmm0. 3013 |// Args in xmm0/xmm1, return value in xmm0.
3149 |// Caveat: xmm0-xmm5 and RC (eax) modified! 3014 |// Caveat: xmm0-xmm5 and RC (eax) modified!
3150 | movaps xmm5, xmm0 3015 | movaps xmm5, xmm0
@@ -3172,172 +3037,6 @@ static void build_subroutines(BuildCtx *ctx)
3172 | movaps xmm0, xmm5 3037 | movaps xmm0, xmm5
3173 | subsd xmm0, xmm1 3038 | subsd xmm0, xmm1
3174 | ret 3039 | ret
3175 |.else
3176 |// Args/ret on x87 stack (y on top). No xmm registers modified.
3177 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
3178 | fld st1
3179 | fdiv st1
3180 | fnstcw word [esp+4]
3181 | mov ax, 0x0400
3182 | or ax, [esp+4]
3183 | and ax, 0xf7ff
3184 | mov [esp+6], ax
3185 | fldcw word [esp+6]
3186 | frndint
3187 | fldcw word [esp+4]
3188 | fmulp st1
3189 | fsubp st1
3190 | ret
3191 |.endif
3192 |
3193 |// FP log2(x). Called by math.log(x, base).
3194 |->vm_log2:
3195 |.if X64WIN
3196 | movsd qword [rsp+8], xmm0 // Use scratch area.
3197 | fld1
3198 | fld qword [rsp+8]
3199 | fyl2x
3200 | fstp qword [rsp+8]
3201 | movsd xmm0, qword [rsp+8]
3202 |.elif X64
3203 | movsd qword [rsp-8], xmm0 // Use red zone.
3204 | fld1
3205 | fld qword [rsp-8]
3206 | fyl2x
3207 | fstp qword [rsp-8]
3208 | movsd xmm0, qword [rsp-8]
3209 |.else
3210 | fld1
3211 | fld qword [esp+4]
3212 | fyl2x
3213 |.endif
3214 | ret
3215 |
3216 |// FP exponentiation e^x and 2^x. Called by math.exp fast function and
3217 |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
3218 |// Caveat: needs 3 slots on x87 stack!
3219 |->vm_exp_x87:
3220 | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e))
3221 |->vm_exp2_x87:
3222 | .if X64WIN
3223 | .define expscratch, dword [rsp+8] // Use scratch area.
3224 | .elif X64
3225 | .define expscratch, dword [rsp-8] // Use red zone.
3226 | .else
3227 | .define expscratch, dword [esp+4] // Needs 4 byte scratch area.
3228 | .endif
3229 | fst expscratch // Caveat: overwrites ARG1.
3230 | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf
3231 | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0
3232 |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
3233 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
3234 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
3235 |1:
3236 | ret
3237 |2:
3238 | fpop; fldz; ret
3239 |
3240 |// Generic power function x^y. Called by BC_POW, math.pow fast function,
3241 |// and vm_arith.
3242 |// Args/ret on x87 stack (y on top). RC (eax) modified.
3243 |// Caveat: needs 3 slots on x87 stack!
3244 |->vm_pow:
3245 |.if not SSE
3246 | fist dword [esp+4] // Store/reload int before comparison.
3247 | fild dword [esp+4] // Integral exponent used in vm_powi.
3248 | fucomip st1
3249 | jnz >8 // Branch for FP exponents.
3250 | jp >9 // Branch for NaN exponent.
3251 | fpop // Pop y and fallthrough to vm_powi.
3252 |
3253 |// FP/int power function x^i. Arg1/ret on x87 stack.
3254 |// Arg2 (int) on C stack. RC (eax) modified.
3255 |// Caveat: needs 2 slots on x87 stack!
3256 | mov eax, [esp+4]
3257 | cmp eax, 1; jle >6 // i<=1?
3258 | // Now 1 < (unsigned)i <= 0x80000000.
3259 |1: // Handle leading zeros.
3260 | test eax, 1; jnz >2
3261 | fmul st0
3262 | shr eax, 1
3263 | jmp <1
3264 |2:
3265 | shr eax, 1; jz >5
3266 | fdup
3267 |3: // Handle trailing bits.
3268 | fmul st0
3269 | shr eax, 1; jz >4
3270 | jnc <3
3271 | fmul st1, st0
3272 | jmp <3
3273 |4:
3274 | fmulp st1
3275 |5:
3276 | ret
3277 |6:
3278 | je <5 // x^1 ==> x
3279 | jb >7
3280 | fld1; fdivrp st1
3281 | neg eax
3282 | cmp eax, 1; je <5 // x^-1 ==> 1/x
3283 | jmp <1 // x^-i ==> (1/x)^i
3284 |7:
3285 | fpop; fld1 // x^0 ==> 1
3286 | ret
3287 |
3288 |8: // FP/FP power function x^y.
3289 | fst dword [esp+4]
3290 | fxch
3291 | fst dword [esp+8]
3292 | mov eax, [esp+4]; shl eax, 1
3293 | cmp eax, 0xff000000; je >2 // x^+-Inf?
3294 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3295 | cmp eax, 0xff000000; je >4 // +-Inf^y?
3296 | fyl2x
3297 | jmp ->vm_exp2raw
3298 |
3299 |9: // Handle x^NaN.
3300 | fld1
3301 | fucomip st2
3302 | je >1 // 1^NaN ==> 1
3303 | fxch // x^NaN ==> NaN
3304 |1:
3305 | fpop
3306 | ret
3307 |
3308 |2: // Handle x^+-Inf.
3309 | fabs
3310 | fld1
3311 | fucomip st1
3312 | je >3 // +-1^+-Inf ==> 1
3313 | fpop; fabs; fldz; mov eax, 0; setc al
3314 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
3315 | fxch
3316 |3:
3317 | fpop1; fabs
3318 | ret
3319 |
3320 |4: // Handle +-0^y or +-Inf^y.
3321 | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
3322 | fpop; fpop
3323 | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
3324 | fldz // y < 0, +-Inf^y ==> 0
3325 | ret
3326 |5:
3327 | mov dword [esp+4], 0x7f800000 // Return +Inf.
3328 | fld dword [esp+4]
3329 | ret
3330 |.endif
3331 |
3332 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
3333 |// Needs 16 byte scratch area for x86. Also called from JIT code.
3334 |->vm_pow_sse:
3335 | cvtsd2si eax, xmm1
3336 | cvtsi2sd xmm2, eax
3337 | ucomisd xmm1, xmm2
3338 | jnz >8 // Branch for FP exponents.
3339 | jp >9 // Branch for NaN exponent.
3340 | // Fallthrough to vm_powi_sse.
3341 | 3040 |
3342 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. 3041 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
3343 |->vm_powi_sse: 3042 |->vm_powi_sse:
@@ -3374,287 +3073,6 @@ static void build_subroutines(BuildCtx *ctx)
3374 | sseconst_1 xmm0, RDa 3073 | sseconst_1 xmm0, RDa
3375 | ret 3074 | ret
3376 | 3075 |
3377 |8: // FP/FP power function x^y.
3378 |.if X64
3379 | movd rax, xmm1; shl rax, 1
3380 | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf?
3381 | movd rax, xmm0; shl rax, 1; je >4 // +-0^y?
3382 | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y?
3383 | .if X64WIN
3384 | movsd qword [rsp+16], xmm1 // Use scratch area.
3385 | movsd qword [rsp+8], xmm0
3386 | fld qword [rsp+16]
3387 | fld qword [rsp+8]
3388 | .else
3389 | movsd qword [rsp-16], xmm1 // Use red zone.
3390 | movsd qword [rsp-8], xmm0
3391 | fld qword [rsp-16]
3392 | fld qword [rsp-8]
3393 | .endif
3394 |.else
3395 | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area.
3396 | movsd qword [esp+4], xmm0
3397 | cmp dword [esp+12], 0; jne >1
3398 | mov eax, [esp+16]; shl eax, 1
3399 | cmp eax, 0xffe00000; je >2 // x^+-Inf?
3400 |1:
3401 | cmp dword [esp+4], 0; jne >1
3402 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3403 | cmp eax, 0xffe00000; je >5 // +-Inf^y?
3404 |1:
3405 | fld qword [esp+12]
3406 | fld qword [esp+4]
3407 |.endif
3408 | fyl2x // y*log2(x)
3409 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
3410 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
3411 |.if X64WIN
3412 | fstp qword [rsp+8] // Use scratch area.
3413 | movsd xmm0, qword [rsp+8]
3414 |.elif X64
3415 | fstp qword [rsp-8] // Use red zone.
3416 | movsd xmm0, qword [rsp-8]
3417 |.else
3418 | fstp qword [esp+4] // Needs 8 byte scratch area.
3419 | movsd xmm0, qword [esp+4]
3420 |.endif
3421 | ret
3422 |
3423 |9: // Handle x^NaN.
3424 | sseconst_1 xmm2, RDa
3425 | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1
3426 | movaps xmm0, xmm1 // x^NaN ==> NaN
3427 |1:
3428 | ret
3429 |
3430 |2: // Handle x^+-Inf.
3431 | sseconst_abs xmm2, RDa
3432 | andpd xmm0, xmm2 // |x|
3433 | sseconst_1 xmm2, RDa
3434 | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1
3435 | movmskpd eax, xmm1
3436 | xorps xmm0, xmm0
3437 | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0
3438 |3:
3439 | sseconst_hi xmm0, RDa, 7ff00000 // +Inf
3440 | ret
3441 |
3442 |4: // Handle +-0^y.
3443 | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf
3444 | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0
3445 | ret
3446 |
3447 |5: // Handle +-Inf^y.
3448 | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf
3449 | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0
3450 | ret
3451 |
3452 |// Callable from C: double lj_vm_foldfpm(double x, int fpm)
3453 |// Computes fpm(x) for extended math functions. ORDER FPM.
3454 |->vm_foldfpm:
3455 |.if JIT
3456 |.if X64
3457 | .if X64WIN
3458 | .define fpmop, CARG2d
3459 | .else
3460 | .define fpmop, CARG1d
3461 | .endif
3462 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3463 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3464 | sqrtsd xmm0, xmm0; ret
3465 |2:
3466 | .if X64WIN
3467 | movsd qword [rsp+8], xmm0 // Use scratch area.
3468 | fld qword [rsp+8]
3469 | .else
3470 | movsd qword [rsp-8], xmm0 // Use red zone.
3471 | fld qword [rsp-8]
3472 | .endif
3473 | cmp fpmop, 5; ja >2
3474 | .if X64WIN; pop rax; .endif
3475 | je >1
3476 | call ->vm_exp_x87
3477 | .if X64WIN; push rax; .endif
3478 | jmp >7
3479 |1:
3480 | call ->vm_exp2_x87
3481 | .if X64WIN; push rax; .endif
3482 | jmp >7
3483 |2: ; cmp fpmop, 7; je >1; ja >2
3484 | fldln2; fxch; fyl2x; jmp >7
3485 |1: ; fld1; fxch; fyl2x; jmp >7
3486 |2: ; cmp fpmop, 9; je >1; ja >2
3487 | fldlg2; fxch; fyl2x; jmp >7
3488 |1: ; fsin; jmp >7
3489 |2: ; cmp fpmop, 11; je >1; ja >9
3490 | fcos; jmp >7
3491 |1: ; fptan; fpop
3492 |7:
3493 | .if X64WIN
3494 | fstp qword [rsp+8] // Use scratch area.
3495 | movsd xmm0, qword [rsp+8]
3496 | .else
3497 | fstp qword [rsp-8] // Use red zone.
3498 | movsd xmm0, qword [rsp-8]
3499 | .endif
3500 | ret
3501 |.else // x86 calling convention.
3502 | .define fpmop, eax
3503 |.if SSE
3504 | mov fpmop, [esp+12]
3505 | movsd xmm0, qword [esp+4]
3506 | cmp fpmop, 1; je >1; ja >2
3507 | call ->vm_floor; jmp >7
3508 |1: ; call ->vm_ceil; jmp >7
3509 |2: ; cmp fpmop, 3; je >1; ja >2
3510 | call ->vm_trunc; jmp >7
3511 |1:
3512 | sqrtsd xmm0, xmm0
3513 |7:
3514 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3515 | fld qword [esp+4]
3516 | ret
3517 |2: ; fld qword [esp+4]
3518 | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3519 |2: ; cmp fpmop, 7; je >1; ja >2
3520 | fldln2; fxch; fyl2x; ret
3521 |1: ; fld1; fxch; fyl2x; ret
3522 |2: ; cmp fpmop, 9; je >1; ja >2
3523 | fldlg2; fxch; fyl2x; ret
3524 |1: ; fsin; ret
3525 |2: ; cmp fpmop, 11; je >1; ja >9
3526 | fcos; ret
3527 |1: ; fptan; fpop; ret
3528 |.else
3529 | mov fpmop, [esp+12]
3530 | fld qword [esp+4]
3531 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3532 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3533 | fsqrt; ret
3534 |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3535 | cmp fpmop, 7; je >1; ja >2
3536 | fldln2; fxch; fyl2x; ret
3537 |1: ; fld1; fxch; fyl2x; ret
3538 |2: ; cmp fpmop, 9; je >1; ja >2
3539 | fldlg2; fxch; fyl2x; ret
3540 |1: ; fsin; ret
3541 |2: ; cmp fpmop, 11; je >1; ja >9
3542 | fcos; ret
3543 |1: ; fptan; fpop; ret
3544 |.endif
3545 |.endif
3546 |9: ; int3 // Bad fpm.
3547 |.endif
3548 |
3549 |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
3550 |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
3551 |// and basic math functions. ORDER ARITH
3552 |->vm_foldarith:
3553 |.if X64
3554 |
3555 | .if X64WIN
3556 | .define foldop, CARG3d
3557 | .else
3558 | .define foldop, CARG1d
3559 | .endif
3560 | cmp foldop, 1; je >1; ja >2
3561 | addsd xmm0, xmm1; ret
3562 |1: ; subsd xmm0, xmm1; ret
3563 |2: ; cmp foldop, 3; je >1; ja >2
3564 | mulsd xmm0, xmm1; ret
3565 |1: ; divsd xmm0, xmm1; ret
3566 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow
3567 | cmp foldop, 7; je >1; ja >2
3568 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
3569 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
3570 |2: ; cmp foldop, 9; ja >2
3571 |.if X64WIN
3572 | movsd qword [rsp+8], xmm0 // Use scratch area.
3573 | movsd qword [rsp+16], xmm1
3574 | fld qword [rsp+8]
3575 | fld qword [rsp+16]
3576 |.else
3577 | movsd qword [rsp-8], xmm0 // Use red zone.
3578 | movsd qword [rsp-16], xmm1
3579 | fld qword [rsp-8]
3580 | fld qword [rsp-16]
3581 |.endif
3582 | je >1
3583 | fpatan
3584 |7:
3585 |.if X64WIN
3586 | fstp qword [rsp+8] // Use scratch area.
3587 | movsd xmm0, qword [rsp+8]
3588 |.else
3589 | fstp qword [rsp-8] // Use red zone.
3590 | movsd xmm0, qword [rsp-8]
3591 |.endif
3592 | ret
3593 |1: ; fxch; fscale; fpop1; jmp <7
3594 |2: ; cmp foldop, 11; je >1; ja >9
3595 | minsd xmm0, xmm1; ret
3596 |1: ; maxsd xmm0, xmm1; ret
3597 |9: ; int3 // Bad op.
3598 |
3599 |.elif SSE // x86 calling convention with SSE ops.
3600 |
3601 | .define foldop, eax
3602 | mov foldop, [esp+20]
3603 | movsd xmm0, qword [esp+4]
3604 | movsd xmm1, qword [esp+12]
3605 | cmp foldop, 1; je >1; ja >2
3606 | addsd xmm0, xmm1
3607 |7:
3608 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3609 | fld qword [esp+4]
3610 | ret
3611 |1: ; subsd xmm0, xmm1; jmp <7
3612 |2: ; cmp foldop, 3; je >1; ja >2
3613 | mulsd xmm0, xmm1; jmp <7
3614 |1: ; divsd xmm0, xmm1; jmp <7
3615 |2: ; cmp foldop, 5
3616 | je >1; ja >2
3617 | call ->vm_mod; jmp <7
3618 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area.
3619 |2: ; cmp foldop, 7; je >1; ja >2
3620 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
3621 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
3622 |2: ; cmp foldop, 9; ja >2
3623 | fld qword [esp+4] // Reload from stack
3624 | fld qword [esp+12]
3625 | je >1
3626 | fpatan; ret
3627 |1: ; fxch; fscale; fpop1; ret
3628 |2: ; cmp foldop, 11; je >1; ja >9
3629 | minsd xmm0, xmm1; jmp <7
3630 |1: ; maxsd xmm0, xmm1; jmp <7
3631 |9: ; int3 // Bad op.
3632 |
3633 |.else // x86 calling convention with x87 ops.
3634 |
3635 | mov eax, [esp+20]
3636 | fld qword [esp+4]
3637 | fld qword [esp+12]
3638 | cmp eax, 1; je >1; ja >2
3639 | faddp st1; ret
3640 |1: ; fsubp st1; ret
3641 |2: ; cmp eax, 3; je >1; ja >2
3642 | fmulp st1; ret
3643 |1: ; fdivp st1; ret
3644 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
3645 | cmp eax, 7; je >1; ja >2
3646 | fpop; fchs; ret
3647 |1: ; fpop; fabs; ret
3648 |2: ; cmp eax, 9; je >1; ja >2
3649 | fpatan; ret
3650 |1: ; fxch; fscale; fpop1; ret
3651 |2: ; cmp eax, 11; je >1; ja >9
3652 | fucomi st1; fcmovnbe st1; fpop1; ret
3653 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
3654 |9: ; int3 // Bad op.
3655 |
3656 |.endif
3657 |
3658 |//----------------------------------------------------------------------- 3076 |//-----------------------------------------------------------------------
3659 |//-- Miscellaneous functions -------------------------------------------- 3077 |//-- Miscellaneous functions --------------------------------------------
3660 |//----------------------------------------------------------------------- 3078 |//-----------------------------------------------------------------------
@@ -3665,6 +3083,7 @@ static void build_subroutines(BuildCtx *ctx)
3665 | mov eax, CARG1d 3083 | mov eax, CARG1d
3666 | .if X64WIN; push rsi; mov rsi, CARG2; .endif 3084 | .if X64WIN; push rsi; mov rsi, CARG2; .endif
3667 | push rbx 3085 | push rbx
3086 | xor ecx, ecx
3668 | cpuid 3087 | cpuid
3669 | mov [rsi], eax 3088 | mov [rsi], eax
3670 | mov [rsi+4], ebx 3089 | mov [rsi+4], ebx
@@ -3688,6 +3107,7 @@ static void build_subroutines(BuildCtx *ctx)
3688 | mov eax, [esp+4] // Argument 1 is function number. 3107 | mov eax, [esp+4] // Argument 1 is function number.
3689 | push edi 3108 | push edi
3690 | push ebx 3109 | push ebx
3110 | xor ecx, ecx
3691 | cpuid 3111 | cpuid
3692 | mov edi, [esp+16] // Argument 2 is result area. 3112 | mov edi, [esp+16] // Argument 2 is result area.
3693 | mov [edi], eax 3113 | mov [edi], eax
@@ -3700,6 +3120,86 @@ static void build_subroutines(BuildCtx *ctx)
3700 | ret 3120 | ret
3701 |.endif 3121 |.endif
3702 | 3122 |
3123 |.define NEXT_TAB, TAB:FCARG1
3124 |.define NEXT_IDX, FCARG2
3125 |.define NEXT_PTR, RCa
3126 |.define NEXT_PTRd, RC
3127 |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro
3128 |.if X64
3129 |.define NEXT_TMP, CARG3d
3130 |.define NEXT_TMPq, CARG3
3131 |.define NEXT_ASIZE, CARG4d
3132 |.macro NEXT_ENTER; .endmacro
3133 |.macro NEXT_LEAVE; ret; .endmacro
3134 |.if X64WIN
3135 |.define NEXT_RES_PTR, [rsp+aword*5]
3136 |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro
3137 |.else
3138 |.define NEXT_RES_PTR, [rsp+aword*1]
3139 |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro
3140 |.endif
3141 |.else
3142 |.define NEXT_ASIZE, esi
3143 |.define NEXT_TMP, edi
3144 |.macro NEXT_ENTER; push esi; push edi; .endmacro
3145 |.macro NEXT_LEAVE; pop edi; pop esi; ret; .endmacro
3146 |.define NEXT_RES_PTR, [esp+dword*3]
3147 |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro
3148 |.endif
3149 |
3150 |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
3151 |// Next idx returned in edx.
3152 |->vm_next:
3153 |.if JIT
3154 | NEXT_ENTER
3155 | mov NEXT_ASIZE, NEXT_TAB->asize
3156 |1: // Traverse array part.
3157 | cmp NEXT_IDX, NEXT_ASIZE; jae >5
3158 | mov NEXT_TMP, NEXT_TAB->array
3159 | cmp dword [NEXT_TMP+NEXT_IDX*8+4], LJ_TNIL; je >2
3160 | lea NEXT_PTR, NEXT_RES_PTR
3161 |.if X64
3162 | mov NEXT_TMPq, qword [NEXT_TMP+NEXT_IDX*8]
3163 | mov qword [NEXT_PTR], NEXT_TMPq
3164 |.else
3165 | mov NEXT_ASIZE, dword [NEXT_TMP+NEXT_IDX*8+4]
3166 | mov NEXT_TMP, dword [NEXT_TMP+NEXT_IDX*8]
3167 | mov dword [NEXT_PTR+4], NEXT_ASIZE
3168 | mov dword [NEXT_PTR], NEXT_TMP
3169 |.endif
3170 |.if DUALNUM
3171 | mov dword [NEXT_PTR+dword*3], LJ_TISNUM
3172 | mov dword [NEXT_PTR+dword*2], NEXT_IDX
3173 |.else
3174 | cvtsi2sd xmm0, NEXT_IDX
3175 | movsd qword [NEXT_PTR+dword*2], xmm0
3176 |.endif
3177 | NEXT_RES_IDX 1
3178 | NEXT_LEAVE
3179 |2: // Skip holes in array part.
3180 | add NEXT_IDX, 1
3181 | jmp <1
3182 |
3183 |5: // Traverse hash part.
3184 | sub NEXT_IDX, NEXT_ASIZE
3185 |6:
3186 | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9
3187 | imul NEXT_PTRd, NEXT_IDX, #NODE
3188 | add NODE:NEXT_PTRd, dword NEXT_TAB->node
3189 | cmp dword NODE:NEXT_PTR->val.it, LJ_TNIL; je >7
3190 | NEXT_RES_IDXL NEXT_ASIZE+1
3191 | NEXT_LEAVE
3192 |7: // Skip holes in hash part.
3193 | add NEXT_IDX, 1
3194 | jmp <6
3195 |
3196 |9: // End of iteration. Set the key to nil (not the value).
3197 | NEXT_RES_IDX NEXT_ASIZE
3198 | lea NEXT_PTR, NEXT_RES_PTR
3199 | mov dword [NEXT_PTR+dword*3], LJ_TNIL
3200 | NEXT_LEAVE
3201 |.endif
3202 |
3703 |//----------------------------------------------------------------------- 3203 |//-----------------------------------------------------------------------
3704 |//-- Assertions --------------------------------------------------------- 3204 |//-- Assertions ---------------------------------------------------------
3705 |//----------------------------------------------------------------------- 3205 |//-----------------------------------------------------------------------
@@ -3965,19 +3465,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3965 | // RA is a number. 3465 | // RA is a number.
3966 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp 3466 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3967 | // RA is a number, RD is an integer. 3467 | // RA is a number, RD is an integer.
3968 |.if SSE
3969 | cvtsi2sd xmm0, dword [BASE+RD*8] 3468 | cvtsi2sd xmm0, dword [BASE+RD*8]
3970 | jmp >2 3469 | jmp >2
3971 |.else
3972 | fld qword [BASE+RA*8]
3973 | fild dword [BASE+RD*8]
3974 | jmp >3
3975 |.endif
3976 | 3470 |
3977 |8: // RA is an integer, RD is not an integer. 3471 |8: // RA is an integer, RD is not an integer.
3978 | ja ->vmeta_comp 3472 | ja ->vmeta_comp
3979 | // RA is an integer, RD is a number. 3473 | // RA is an integer, RD is a number.
3980 |.if SSE
3981 | cvtsi2sd xmm1, dword [BASE+RA*8] 3474 | cvtsi2sd xmm1, dword [BASE+RA*8]
3982 | movsd xmm0, qword [BASE+RD*8] 3475 | movsd xmm0, qword [BASE+RD*8]
3983 | add PC, 4 3476 | add PC, 4
@@ -3985,29 +3478,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3985 | jmp_comp jbe, ja, jb, jae, <9 3478 | jmp_comp jbe, ja, jb, jae, <9
3986 | jmp <6 3479 | jmp <6
3987 |.else 3480 |.else
3988 | fild dword [BASE+RA*8]
3989 | jmp >2
3990 |.endif
3991 |.else
3992 | checknum RA, ->vmeta_comp 3481 | checknum RA, ->vmeta_comp
3993 | checknum RD, ->vmeta_comp 3482 | checknum RD, ->vmeta_comp
3994 |.endif 3483 |.endif
3995 |.if SSE
3996 |1: 3484 |1:
3997 | movsd xmm0, qword [BASE+RD*8] 3485 | movsd xmm0, qword [BASE+RD*8]
3998 |2: 3486 |2:
3999 | add PC, 4 3487 | add PC, 4
4000 | ucomisd xmm0, qword [BASE+RA*8] 3488 | ucomisd xmm0, qword [BASE+RA*8]
4001 |3: 3489 |3:
4002 |.else
4003 |1:
4004 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
4005 |2:
4006 | fld qword [BASE+RD*8]
4007 |3:
4008 | add PC, 4
4009 | fcomparepp
4010 |.endif
4011 | // Unordered: all of ZF CF PF set, ordered: PF clear. 3490 | // Unordered: all of ZF CF PF set, ordered: PF clear.
4012 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 3491 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
4013 |.if DUALNUM 3492 |.if DUALNUM
@@ -4047,43 +3526,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4047 | // RD is a number. 3526 | // RD is a number.
4048 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 3527 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
4049 | // RD is a number, RA is an integer. 3528 | // RD is a number, RA is an integer.
4050 |.if SSE
4051 | cvtsi2sd xmm0, dword [BASE+RA*8] 3529 | cvtsi2sd xmm0, dword [BASE+RA*8]
4052 |.else
4053 | fild dword [BASE+RA*8]
4054 |.endif
4055 | jmp >2 3530 | jmp >2
4056 | 3531 |
4057 |8: // RD is an integer, RA is not an integer. 3532 |8: // RD is an integer, RA is not an integer.
4058 | ja >5 3533 | ja >5
4059 | // RD is an integer, RA is a number. 3534 | // RD is an integer, RA is a number.
4060 |.if SSE
4061 | cvtsi2sd xmm0, dword [BASE+RD*8] 3535 | cvtsi2sd xmm0, dword [BASE+RD*8]
4062 | ucomisd xmm0, qword [BASE+RA*8] 3536 | ucomisd xmm0, qword [BASE+RA*8]
4063 |.else
4064 | fild dword [BASE+RD*8]
4065 | fld qword [BASE+RA*8]
4066 |.endif
4067 | jmp >4 3537 | jmp >4
4068 | 3538 |
4069 |.else 3539 |.else
4070 | cmp RB, LJ_TISNUM; jae >5 3540 | cmp RB, LJ_TISNUM; jae >5
4071 | checknum RA, >5 3541 | checknum RA, >5
4072 |.endif 3542 |.endif
4073 |.if SSE
4074 |1: 3543 |1:
4075 | movsd xmm0, qword [BASE+RA*8] 3544 | movsd xmm0, qword [BASE+RA*8]
4076 |2: 3545 |2:
4077 | ucomisd xmm0, qword [BASE+RD*8] 3546 | ucomisd xmm0, qword [BASE+RD*8]
4078 |4: 3547 |4:
4079 |.else
4080 |1:
4081 | fld qword [BASE+RA*8]
4082 |2:
4083 | fld qword [BASE+RD*8]
4084 |4:
4085 | fcomparepp
4086 |.endif
4087 iseqne_fp: 3548 iseqne_fp:
4088 if (vk) { 3549 if (vk) {
4089 | jp >2 // Unordered means not equal. 3550 | jp >2 // Unordered means not equal.
@@ -4206,39 +3667,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4206 | // RA is a number. 3667 | // RA is a number.
4207 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 3668 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
4208 | // RA is a number, RD is an integer. 3669 | // RA is a number, RD is an integer.
4209 |.if SSE
4210 | cvtsi2sd xmm0, dword [KBASE+RD*8] 3670 | cvtsi2sd xmm0, dword [KBASE+RD*8]
4211 |.else
4212 | fild dword [KBASE+RD*8]
4213 |.endif
4214 | jmp >2 3671 | jmp >2
4215 | 3672 |
4216 |8: // RA is an integer, RD is a number. 3673 |8: // RA is an integer, RD is a number.
4217 |.if SSE
4218 | cvtsi2sd xmm0, dword [BASE+RA*8] 3674 | cvtsi2sd xmm0, dword [BASE+RA*8]
4219 | ucomisd xmm0, qword [KBASE+RD*8] 3675 | ucomisd xmm0, qword [KBASE+RD*8]
4220 |.else
4221 | fild dword [BASE+RA*8]
4222 | fld qword [KBASE+RD*8]
4223 |.endif
4224 | jmp >4 3676 | jmp >4
4225 |.else 3677 |.else
4226 | cmp RB, LJ_TISNUM; jae >3 3678 | cmp RB, LJ_TISNUM; jae >3
4227 |.endif 3679 |.endif
4228 |.if SSE
4229 |1: 3680 |1:
4230 | movsd xmm0, qword [KBASE+RD*8] 3681 | movsd xmm0, qword [KBASE+RD*8]
4231 |2: 3682 |2:
4232 | ucomisd xmm0, qword [BASE+RA*8] 3683 | ucomisd xmm0, qword [BASE+RA*8]
4233 |4: 3684 |4:
4234 |.else
4235 |1:
4236 | fld qword [KBASE+RD*8]
4237 |2:
4238 | fld qword [BASE+RA*8]
4239 |4:
4240 | fcomparepp
4241 |.endif
4242 goto iseqne_fp; 3685 goto iseqne_fp;
4243 case BC_ISEQP: case BC_ISNEP: 3686 case BC_ISEQP: case BC_ISNEP:
4244 vk = op == BC_ISEQP; 3687 vk = op == BC_ISEQP;
@@ -4289,6 +3732,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4289 | ins_next 3732 | ins_next
4290 break; 3733 break;
4291 3734
3735 case BC_ISTYPE:
3736 | ins_AD // RA = src, RD = -type
3737 | add RD, [BASE+RA*8+4]
3738 | jne ->vmeta_istype
3739 | ins_next
3740 break;
3741 case BC_ISNUM:
3742 | ins_AD // RA = src, RD = -(TISNUM-1)
3743 | checknum RA, ->vmeta_istype
3744 | ins_next
3745 break;
3746
4292 /* -- Unary ops --------------------------------------------------------- */ 3747 /* -- Unary ops --------------------------------------------------------- */
4293 3748
4294 case BC_MOV: 3749 case BC_MOV:
@@ -4332,16 +3787,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4332 |.else 3787 |.else
4333 | checknum RD, ->vmeta_unm 3788 | checknum RD, ->vmeta_unm
4334 |.endif 3789 |.endif
4335 |.if SSE
4336 | movsd xmm0, qword [BASE+RD*8] 3790 | movsd xmm0, qword [BASE+RD*8]
4337 | sseconst_sign xmm1, RDa 3791 | sseconst_sign xmm1, RDa
4338 | xorps xmm0, xmm1 3792 | xorps xmm0, xmm1
4339 | movsd qword [BASE+RA*8], xmm0 3793 | movsd qword [BASE+RA*8], xmm0
4340 |.else
4341 | fld qword [BASE+RD*8]
4342 | fchs
4343 | fstp qword [BASE+RA*8]
4344 |.endif
4345 |.if DUALNUM 3794 |.if DUALNUM
4346 | jmp <9 3795 | jmp <9
4347 |.else 3796 |.else
@@ -4357,15 +3806,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4357 |1: 3806 |1:
4358 | mov dword [BASE+RA*8+4], LJ_TISNUM 3807 | mov dword [BASE+RA*8+4], LJ_TISNUM
4359 | mov dword [BASE+RA*8], RD 3808 | mov dword [BASE+RA*8], RD
4360 |.elif SSE 3809 |.else
4361 | xorps xmm0, xmm0 3810 | xorps xmm0, xmm0
4362 | cvtsi2sd xmm0, dword STR:RD->len 3811 | cvtsi2sd xmm0, dword STR:RD->len
4363 |1: 3812 |1:
4364 | movsd qword [BASE+RA*8], xmm0 3813 | movsd qword [BASE+RA*8], xmm0
4365 |.else
4366 | fild dword STR:RD->len
4367 |1:
4368 | fstp qword [BASE+RA*8]
4369 |.endif 3814 |.endif
4370 | ins_next 3815 | ins_next
4371 |2: 3816 |2:
@@ -4383,11 +3828,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4383 | // Length of table returned in eax (RD). 3828 | // Length of table returned in eax (RD).
4384 |.if DUALNUM 3829 |.if DUALNUM
4385 | // Nothing to do. 3830 | // Nothing to do.
4386 |.elif SSE
4387 | cvtsi2sd xmm0, RD
4388 |.else 3831 |.else
4389 | mov ARG1, RD 3832 | cvtsi2sd xmm0, RD
4390 | fild ARG1
4391 |.endif 3833 |.endif
4392 | mov BASE, RB // Restore BASE. 3834 | mov BASE, RB // Restore BASE.
4393 | movzx RA, PC_RA 3835 | movzx RA, PC_RA
@@ -4402,7 +3844,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4402 3844
4403 /* -- Binary ops -------------------------------------------------------- */ 3845 /* -- Binary ops -------------------------------------------------------- */
4404 3846
4405 |.macro ins_arithpre, x87ins, sseins, ssereg 3847 |.macro ins_arithpre, sseins, ssereg
4406 | ins_ABC 3848 | ins_ABC
4407 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3849 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
4408 ||switch (vk) { 3850 ||switch (vk) {
@@ -4411,37 +3853,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4411 | .if DUALNUM 3853 | .if DUALNUM
4412 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn 3854 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
4413 | .endif 3855 | .endif
4414 | .if SSE 3856 | movsd xmm0, qword [BASE+RB*8]
4415 | movsd xmm0, qword [BASE+RB*8] 3857 | sseins ssereg, qword [KBASE+RC*8]
4416 | sseins ssereg, qword [KBASE+RC*8]
4417 | .else
4418 | fld qword [BASE+RB*8]
4419 | x87ins qword [KBASE+RC*8]
4420 | .endif
4421 || break; 3858 || break;
4422 ||case 1: 3859 ||case 1:
4423 | checknum RB, ->vmeta_arith_nv 3860 | checknum RB, ->vmeta_arith_nv
4424 | .if DUALNUM 3861 | .if DUALNUM
4425 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv 3862 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
4426 | .endif 3863 | .endif
4427 | .if SSE 3864 | movsd xmm0, qword [KBASE+RC*8]
4428 | movsd xmm0, qword [KBASE+RC*8] 3865 | sseins ssereg, qword [BASE+RB*8]
4429 | sseins ssereg, qword [BASE+RB*8]
4430 | .else
4431 | fld qword [KBASE+RC*8]
4432 | x87ins qword [BASE+RB*8]
4433 | .endif
4434 || break; 3866 || break;
4435 ||default: 3867 ||default:
4436 | checknum RB, ->vmeta_arith_vv 3868 | checknum RB, ->vmeta_arith_vv
4437 | checknum RC, ->vmeta_arith_vv 3869 | checknum RC, ->vmeta_arith_vv
4438 | .if SSE 3870 | movsd xmm0, qword [BASE+RB*8]
4439 | movsd xmm0, qword [BASE+RB*8] 3871 | sseins ssereg, qword [BASE+RC*8]
4440 | sseins ssereg, qword [BASE+RC*8]
4441 | .else
4442 | fld qword [BASE+RB*8]
4443 | x87ins qword [BASE+RC*8]
4444 | .endif
4445 || break; 3872 || break;
4446 ||} 3873 ||}
4447 |.endmacro 3874 |.endmacro
@@ -4479,55 +3906,62 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4479 |.endmacro 3906 |.endmacro
4480 | 3907 |
4481 |.macro ins_arithpost 3908 |.macro ins_arithpost
4482 |.if SSE
4483 | movsd qword [BASE+RA*8], xmm0 3909 | movsd qword [BASE+RA*8], xmm0
4484 |.else
4485 | fstp qword [BASE+RA*8]
4486 |.endif
4487 |.endmacro 3910 |.endmacro
4488 | 3911 |
4489 |.macro ins_arith, x87ins, sseins 3912 |.macro ins_arith, sseins
4490 | ins_arithpre x87ins, sseins, xmm0 3913 | ins_arithpre sseins, xmm0
4491 | ins_arithpost 3914 | ins_arithpost
4492 | ins_next 3915 | ins_next
4493 |.endmacro 3916 |.endmacro
4494 | 3917 |
4495 |.macro ins_arith, intins, x87ins, sseins 3918 |.macro ins_arith, intins, sseins
4496 |.if DUALNUM 3919 |.if DUALNUM
4497 | ins_arithdn intins 3920 | ins_arithdn intins
4498 |.else 3921 |.else
4499 | ins_arith, x87ins, sseins 3922 | ins_arith, sseins
4500 |.endif 3923 |.endif
4501 |.endmacro 3924 |.endmacro
4502 3925
4503 | // RA = dst, RB = src1 or num const, RC = src2 or num const 3926 | // RA = dst, RB = src1 or num const, RC = src2 or num const
4504 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3927 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
4505 | ins_arith add, fadd, addsd 3928 | ins_arith add, addsd
4506 break; 3929 break;
4507 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3930 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
4508 | ins_arith sub, fsub, subsd 3931 | ins_arith sub, subsd
4509 break; 3932 break;
4510 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3933 case BC_MULVN: case BC_MULNV: case BC_MULVV:
4511 | ins_arith imul, fmul, mulsd 3934 | ins_arith imul, mulsd
4512 break; 3935 break;
4513 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3936 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
4514 | ins_arith fdiv, divsd 3937 | ins_arith divsd
4515 break; 3938 break;
4516 case BC_MODVN: 3939 case BC_MODVN:
4517 | ins_arithpre fld, movsd, xmm1 3940 | ins_arithpre movsd, xmm1
4518 |->BC_MODVN_Z: 3941 |->BC_MODVN_Z:
4519 | call ->vm_mod 3942 | call ->vm_mod
4520 | ins_arithpost 3943 | ins_arithpost
4521 | ins_next 3944 | ins_next
4522 break; 3945 break;
4523 case BC_MODNV: case BC_MODVV: 3946 case BC_MODNV: case BC_MODVV:
4524 | ins_arithpre fld, movsd, xmm1 3947 | ins_arithpre movsd, xmm1
4525 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3948 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
4526 break; 3949 break;
4527 case BC_POW: 3950 case BC_POW:
4528 | ins_arithpre fld, movsd, xmm1 3951 | ins_arithpre movsd, xmm1
4529 | call ->vm_pow 3952 | mov RB, BASE
3953 |.if not X64
3954 | movsd FPARG1, xmm0
3955 | movsd FPARG3, xmm1
3956 |.endif
3957 | call extern pow
3958 | movzx RA, PC_RA
3959 | mov BASE, RB
3960 |.if X64
4530 | ins_arithpost 3961 | ins_arithpost
3962 |.else
3963 | fstp qword [BASE+RA*8]
3964 |.endif
4531 | ins_next 3965 | ins_next
4532 break; 3966 break;
4533 3967
@@ -4595,25 +4029,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4595 | movsx RD, RDW 4029 | movsx RD, RDW
4596 | mov dword [BASE+RA*8+4], LJ_TISNUM 4030 | mov dword [BASE+RA*8+4], LJ_TISNUM
4597 | mov dword [BASE+RA*8], RD 4031 | mov dword [BASE+RA*8], RD
4598 |.elif SSE 4032 |.else
4599 | movsx RD, RDW // Sign-extend literal. 4033 | movsx RD, RDW // Sign-extend literal.
4600 | cvtsi2sd xmm0, RD 4034 | cvtsi2sd xmm0, RD
4601 | movsd qword [BASE+RA*8], xmm0 4035 | movsd qword [BASE+RA*8], xmm0
4602 |.else
4603 | fild PC_RD // Refetch signed RD from instruction.
4604 | fstp qword [BASE+RA*8]
4605 |.endif 4036 |.endif
4606 | ins_next 4037 | ins_next
4607 break; 4038 break;
4608 case BC_KNUM: 4039 case BC_KNUM:
4609 | ins_AD // RA = dst, RD = num const 4040 | ins_AD // RA = dst, RD = num const
4610 |.if SSE
4611 | movsd xmm0, qword [KBASE+RD*8] 4041 | movsd xmm0, qword [KBASE+RD*8]
4612 | movsd qword [BASE+RA*8], xmm0 4042 | movsd qword [BASE+RA*8], xmm0
4613 |.else
4614 | fld qword [KBASE+RD*8]
4615 | fstp qword [BASE+RA*8]
4616 |.endif
4617 | ins_next 4043 | ins_next
4618 break; 4044 break;
4619 case BC_KPRI: 4045 case BC_KPRI:
@@ -4720,18 +4146,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4720 case BC_USETN: 4146 case BC_USETN:
4721 | ins_AD // RA = upvalue #, RD = num const 4147 | ins_AD // RA = upvalue #, RD = num const
4722 | mov LFUNC:RB, [BASE-8] 4148 | mov LFUNC:RB, [BASE-8]
4723 |.if SSE
4724 | movsd xmm0, qword [KBASE+RD*8] 4149 | movsd xmm0, qword [KBASE+RD*8]
4725 |.else
4726 | fld qword [KBASE+RD*8]
4727 |.endif
4728 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4150 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4729 | mov RA, UPVAL:RB->v 4151 | mov RA, UPVAL:RB->v
4730 |.if SSE
4731 | movsd qword [RA], xmm0 4152 | movsd qword [RA], xmm0
4732 |.else
4733 | fstp qword [RA]
4734 |.endif
4735 | ins_next 4153 | ins_next
4736 break; 4154 break;
4737 case BC_USETP: 4155 case BC_USETP:
@@ -4885,18 +4303,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4885 |.else 4303 |.else
4886 | // Convert number to int and back and compare. 4304 | // Convert number to int and back and compare.
4887 | checknum RC, >5 4305 | checknum RC, >5
4888 |.if SSE
4889 | movsd xmm0, qword [BASE+RC*8] 4306 | movsd xmm0, qword [BASE+RC*8]
4890 | cvtsd2si RC, xmm0 4307 | cvttsd2si RC, xmm0
4891 | cvtsi2sd xmm1, RC 4308 | cvtsi2sd xmm1, RC
4892 | ucomisd xmm0, xmm1 4309 | ucomisd xmm0, xmm1
4893 |.else
4894 | fld qword [BASE+RC*8]
4895 | fist ARG1
4896 | fild ARG1
4897 | fcomparepp
4898 | mov RC, ARG1
4899 |.endif
4900 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. 4310 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4901 |.endif 4311 |.endif
4902 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4312 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -4942,7 +4352,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4942 | mov TAB:RB, [BASE+RB*8] 4352 | mov TAB:RB, [BASE+RB*8]
4943 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. 4353 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
4944 | mov RA, TAB:RB->hmask 4354 | mov RA, TAB:RB->hmask
4945 | and RA, STR:RC->hash 4355 | and RA, STR:RC->sid
4946 | imul RA, #NODE 4356 | imul RA, #NODE
4947 | add NODE:RA, TAB:RB->node 4357 | add NODE:RA, TAB:RB->node
4948 |1: 4358 |1:
@@ -5020,6 +4430,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5020 | mov dword [BASE+RA*8+4], LJ_TNIL 4430 | mov dword [BASE+RA*8+4], LJ_TNIL
5021 | jmp <1 4431 | jmp <1
5022 break; 4432 break;
4433 case BC_TGETR:
4434 | ins_ABC // RA = dst, RB = table, RC = key
4435 | mov TAB:RB, [BASE+RB*8]
4436 |.if DUALNUM
4437 | mov RC, dword [BASE+RC*8]
4438 |.else
4439 | cvttsd2si RC, qword [BASE+RC*8]
4440 |.endif
4441 | cmp RC, TAB:RB->asize
4442 | jae ->vmeta_tgetr // Not in array part? Use fallback.
4443 | shl RC, 3
4444 | add RC, TAB:RB->array
4445 | // Get array slot.
4446 |->BC_TGETR_Z:
4447 |.if X64
4448 | mov RBa, [RC]
4449 | mov [BASE+RA*8], RBa
4450 |.else
4451 | mov RB, [RC]
4452 | mov RC, [RC+4]
4453 | mov [BASE+RA*8], RB
4454 | mov [BASE+RA*8+4], RC
4455 |.endif
4456 |->BC_TGETR2_Z:
4457 | ins_next
4458 break;
5023 4459
5024 case BC_TSETV: 4460 case BC_TSETV:
5025 | ins_ABC // RA = src, RB = table, RC = key 4461 | ins_ABC // RA = src, RB = table, RC = key
@@ -5033,18 +4469,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5033 |.else 4469 |.else
5034 | // Convert number to int and back and compare. 4470 | // Convert number to int and back and compare.
5035 | checknum RC, >5 4471 | checknum RC, >5
5036 |.if SSE
5037 | movsd xmm0, qword [BASE+RC*8] 4472 | movsd xmm0, qword [BASE+RC*8]
5038 | cvtsd2si RC, xmm0 4473 | cvttsd2si RC, xmm0
5039 | cvtsi2sd xmm1, RC 4474 | cvtsi2sd xmm1, RC
5040 | ucomisd xmm0, xmm1 4475 | ucomisd xmm0, xmm1
5041 |.else
5042 | fld qword [BASE+RC*8]
5043 | fist ARG1
5044 | fild ARG1
5045 | fcomparepp
5046 | mov RC, ARG1
5047 |.endif
5048 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. 4476 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
5049 |.endif 4477 |.endif
5050 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4478 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -5095,7 +4523,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5095 | mov TAB:RB, [BASE+RB*8] 4523 | mov TAB:RB, [BASE+RB*8]
5096 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. 4524 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
5097 | mov RA, TAB:RB->hmask 4525 | mov RA, TAB:RB->hmask
5098 | and RA, STR:RC->hash 4526 | and RA, STR:RC->sid
5099 | imul RA, #NODE 4527 | imul RA, #NODE
5100 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. 4528 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
5101 | add NODE:RA, TAB:RB->node 4529 | add NODE:RA, TAB:RB->node
@@ -5214,6 +4642,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5214 | movzx RA, PC_RA // Restore RA. 4642 | movzx RA, PC_RA // Restore RA.
5215 | jmp <2 4643 | jmp <2
5216 break; 4644 break;
4645 case BC_TSETR:
4646 | ins_ABC // RA = src, RB = table, RC = key
4647 | mov TAB:RB, [BASE+RB*8]
4648 |.if DUALNUM
4649 | mov RC, dword [BASE+RC*8]
4650 |.else
4651 | cvttsd2si RC, qword [BASE+RC*8]
4652 |.endif
4653 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4654 | jnz >7
4655 |2:
4656 | cmp RC, TAB:RB->asize
4657 | jae ->vmeta_tsetr
4658 | shl RC, 3
4659 | add RC, TAB:RB->array
4660 | // Set array slot.
4661 |->BC_TSETR_Z:
4662 |.if X64
4663 | mov RBa, [BASE+RA*8]
4664 | mov [RC], RBa
4665 |.else
4666 | mov RB, [BASE+RA*8+4]
4667 | mov RA, [BASE+RA*8]
4668 | mov [RC+4], RB
4669 | mov [RC], RA
4670 |.endif
4671 | ins_next
4672 |
4673 |7: // Possible table write barrier for the value. Skip valiswhite check.
4674 | barrierback TAB:RB, RA
4675 | movzx RA, PC_RA // Restore RA.
4676 | jmp <2
4677 break;
5217 4678
5218 case BC_TSETM: 4679 case BC_TSETM:
5219 | ins_AD // RA = base (table at base-1), RD = num const (start index) 4680 | ins_AD // RA = base (table at base-1), RD = num const (start index)
@@ -5390,10 +4851,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5390 break; 4851 break;
5391 4852
5392 case BC_ITERN: 4853 case BC_ITERN:
5393 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
5394 |.if JIT 4854 |.if JIT
5395 | // NYI: add hotloop, record BC_ITERN. 4855 | hotloop RB
5396 |.endif 4856 |.endif
4857 |->vm_IITERN:
4858 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
5397 | mov TMP1, KBASE // Need two more free registers. 4859 | mov TMP1, KBASE // Need two more free registers.
5398 | mov TMP2, DISPATCH 4860 | mov TMP2, DISPATCH
5399 | mov TAB:RB, [BASE+RA*8-16] 4861 | mov TAB:RB, [BASE+RA*8-16]
@@ -5407,10 +4869,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5407 |.if DUALNUM 4869 |.if DUALNUM
5408 | mov dword [BASE+RA*8+4], LJ_TISNUM 4870 | mov dword [BASE+RA*8+4], LJ_TISNUM
5409 | mov dword [BASE+RA*8], RC 4871 | mov dword [BASE+RA*8], RC
5410 |.elif SSE
5411 | cvtsi2sd xmm0, RC
5412 |.else 4872 |.else
5413 | fild dword [BASE+RA*8-8] 4873 | cvtsi2sd xmm0, RC
5414 |.endif 4874 |.endif
5415 | // Copy array slot to returned value. 4875 | // Copy array slot to returned value.
5416 |.if X64 4876 |.if X64
@@ -5426,10 +4886,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5426 | // Return array index as a numeric key. 4886 | // Return array index as a numeric key.
5427 |.if DUALNUM 4887 |.if DUALNUM
5428 | // See above. 4888 | // See above.
5429 |.elif SSE
5430 | movsd qword [BASE+RA*8], xmm0
5431 |.else 4889 |.else
5432 | fstp qword [BASE+RA*8] 4890 | movsd qword [BASE+RA*8], xmm0
5433 |.endif 4891 |.endif
5434 | mov [BASE+RA*8-8], RC // Update control var. 4892 | mov [BASE+RA*8-8], RC // Update control var.
5435 |2: 4893 |2:
@@ -5442,9 +4900,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5442 | 4900 |
5443 |4: // Skip holes in array part. 4901 |4: // Skip holes in array part.
5444 | add RC, 1 4902 | add RC, 1
5445 |.if not (DUALNUM or SSE)
5446 | mov [BASE+RA*8-8], RC
5447 |.endif
5448 | jmp <1 4903 | jmp <1
5449 | 4904 |
5450 |5: // Traverse hash part. 4905 |5: // Traverse hash part.
@@ -5488,14 +4943,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5488 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 4943 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
5489 | branchPC RD 4944 | branchPC RD
5490 | mov dword [BASE+RA*8-8], 0 // Initialize control var. 4945 | mov dword [BASE+RA*8-8], 0 // Initialize control var.
5491 | mov dword [BASE+RA*8-4], 0xfffe7fff 4946 | mov dword [BASE+RA*8-4], LJ_KEYINDEX
5492 |1: 4947 |1:
5493 | ins_next 4948 | ins_next
5494 |5: // Despecialize bytecode if any of the checks fail. 4949 |5: // Despecialize bytecode if any of the checks fail.
5495 | mov PC_OP, BC_JMP 4950 | mov PC_OP, BC_JMP
5496 | branchPC RD 4951 | branchPC RD
4952 |.if JIT
4953 | cmp byte [PC], BC_ITERN
4954 | jne >6
4955 |.endif
5497 | mov byte [PC], BC_ITERC 4956 | mov byte [PC], BC_ITERC
5498 | jmp <1 4957 | jmp <1
4958 |.if JIT
4959 |6: // Unpatch JLOOP.
4960 | mov RA, [DISPATCH+DISPATCH_J(trace)]
4961 | movzx RC, word [PC+2]
4962 | mov TRACE:RA, [RA+RC*4]
4963 | mov eax, TRACE:RA->startins
4964 | mov al, BC_ITERC
4965 | mov dword [PC], eax
4966 | jmp <1
4967 |.endif
5499 break; 4968 break;
5500 4969
5501 case BC_VARG: 4970 case BC_VARG:
@@ -5778,7 +5247,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5778 if (!vk) { 5247 if (!vk) {
5779 | cmp RB, LJ_TISNUM; jae ->vmeta_for 5248 | cmp RB, LJ_TISNUM; jae ->vmeta_for
5780 } 5249 }
5781 |.if SSE
5782 | movsd xmm0, qword FOR_IDX 5250 | movsd xmm0, qword FOR_IDX
5783 | movsd xmm1, qword FOR_STOP 5251 | movsd xmm1, qword FOR_STOP
5784 if (vk) { 5252 if (vk) {
@@ -5791,22 +5259,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5791 | ucomisd xmm1, xmm0 5259 | ucomisd xmm1, xmm0
5792 |1: 5260 |1:
5793 | movsd qword FOR_EXT, xmm0 5261 | movsd qword FOR_EXT, xmm0
5794 |.else
5795 | fld qword FOR_STOP
5796 | fld qword FOR_IDX
5797 if (vk) {
5798 | fadd qword FOR_STEP // nidx = idx + step
5799 | fst qword FOR_IDX
5800 | fst qword FOR_EXT
5801 | test RB, RB; js >1
5802 } else {
5803 | fst qword FOR_EXT
5804 | jl >1
5805 }
5806 | fxch // Swap lim/(n)idx if step non-negative.
5807 |1:
5808 | fcomparepp
5809 |.endif
5810 if (op == BC_FORI) { 5262 if (op == BC_FORI) {
5811 |.if DUALNUM 5263 |.if DUALNUM
5812 | jnb <7 5264 | jnb <7
@@ -5834,11 +5286,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5834 |2: 5286 |2:
5835 | ins_next 5287 | ins_next
5836 |.endif 5288 |.endif
5837 |.if SSE 5289 |
5838 |3: // Invert comparison if step is negative. 5290 |3: // Invert comparison if step is negative.
5839 | ucomisd xmm0, xmm1 5291 | ucomisd xmm0, xmm1
5840 | jmp <1 5292 | jmp <1
5841 |.endif
5842 break; 5293 break;
5843 5294
5844 case BC_ITERL: 5295 case BC_ITERL:
@@ -5876,7 +5327,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5876 | ins_A // RA = base, RD = target (loop extent) 5327 | ins_A // RA = base, RD = target (loop extent)
5877 | // Note: RA/RD is only used by trace recorder to determine scope/extent 5328 | // Note: RA/RD is only used by trace recorder to determine scope/extent
5878 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. 5329 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
5879 |.if JIT 5330 |.if JIT
5880 | hotloop RB 5331 | hotloop RB
5881 |.endif 5332 |.endif
5882 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. 5333 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
@@ -5895,7 +5346,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5895 | mov RDa, TRACE:RD->mcode 5346 | mov RDa, TRACE:RD->mcode
5896 | mov L:RB, SAVE_L 5347 | mov L:RB, SAVE_L
5897 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE 5348 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
5898 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB 5349 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
5899 | // Save additional callee-save registers only used in compiled code. 5350 | // Save additional callee-save registers only used in compiled code.
5900 |.if X64WIN 5351 |.if X64WIN
5901 | mov TMPQ, r12 5352 | mov TMPQ, r12
@@ -6062,9 +5513,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
6062 | // (lua_State *L, lua_CFunction f) 5513 | // (lua_State *L, lua_CFunction f)
6063 | call aword [DISPATCH+DISPATCH_GL(wrapf)] 5514 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
6064 } 5515 }
6065 | set_vmstate INTERP
6066 | // nresults returned in eax (RD). 5516 | // nresults returned in eax (RD).
6067 | mov BASE, L:RB->base 5517 | mov BASE, L:RB->base
5518 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
5519 | set_vmstate INTERP
6068 | lea RA, [BASE+RD*8] 5520 | lea RA, [BASE+RD*8]
6069 | neg RA 5521 | neg RA
6070 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 5522 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
@@ -6177,7 +5629,7 @@ static void emit_asm_debug(BuildCtx *ctx)
6177 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); 5629 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
6178#endif 5630#endif
6179#if !LJ_NO_UNWIND 5631#if !LJ_NO_UNWIND
6180#if (defined(__sun__) && defined(__svr4__)) 5632#if LJ_TARGET_SOLARIS
6181#if LJ_64 5633#if LJ_64
6182 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); 5634 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
6183#else 5635#else
@@ -6384,15 +5836,21 @@ static void emit_asm_debug(BuildCtx *ctx)
6384 "LEFDEY:\n\n", fcsize); 5836 "LEFDEY:\n\n", fcsize);
6385 } 5837 }
6386#endif 5838#endif
6387#if LJ_64 5839#if !LJ_64
6388 fprintf(ctx->fp, "\t.subsections_via_symbols\n");
6389#else
6390 fprintf(ctx->fp, 5840 fprintf(ctx->fp,
6391 "\t.non_lazy_symbol_pointer\n" 5841 "\t.non_lazy_symbol_pointer\n"
6392 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" 5842 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
6393 ".indirect_symbol _lj_err_unwind_dwarf\n" 5843 ".indirect_symbol _lj_err_unwind_dwarf\n"
6394 ".long 0\n"); 5844 ".long 0\n\n");
5845 fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n");
5846 {
5847 const char *const *xn;
5848 for (xn = ctx->extnames; *xn; xn++)
5849 if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
5850 fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn);
5851 }
6395#endif 5852#endif
5853 fprintf(ctx->fp, ".subsections_via_symbols\n");
6396 } 5854 }
6397 break; 5855 break;
6398#endif 5856#endif