aboutsummaryrefslogtreecommitdiff
path: root/src/vm_x86.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm_x86.dasc')
-rw-r--r--src/vm_x86.dasc1563
1 files changed, 471 insertions, 1092 deletions
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index 359ad4f4..211ae7b9 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -18,7 +18,6 @@
18| 18|
19|.if P64 19|.if P64
20|.define X64, 1 20|.define X64, 1
21|.define SSE, 1
22|.if WIN 21|.if WIN
23|.define X64WIN, 1 22|.define X64WIN, 1
24|.endif 23|.endif
@@ -116,24 +115,74 @@
116|.type NODE, Node 115|.type NODE, Node
117|.type NARGS, int 116|.type NARGS, int
118|.type TRACE, GCtrace 117|.type TRACE, GCtrace
118|.type SBUF, SBuf
119| 119|
120|// Stack layout while in interpreter. Must match with lj_frame.h. 120|// Stack layout while in interpreter. Must match with lj_frame.h.
121|//----------------------------------------------------------------------- 121|//-----------------------------------------------------------------------
122|.if not X64 // x86 stack layout. 122|.if not X64 // x86 stack layout.
123| 123|
124|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). 124|.if WIN
125|
126|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
125|.macro saveregs_ 127|.macro saveregs_
126| push edi; push esi; push ebx 128| push edi; push esi; push ebx
129| push extern lj_err_unwind_win
130| fs; push dword [0]
131| fs; mov [0], esp
127| sub esp, CFRAME_SPACE 132| sub esp, CFRAME_SPACE
128|.endmacro 133|.endmacro
129|.macro saveregs 134|.macro restoreregs
130| push ebp; saveregs_ 135| add esp, CFRAME_SPACE
136| fs; pop dword [0]
137| pop edi // Short for esp += 4.
138| pop ebx; pop esi; pop edi; pop ebp
139|.endmacro
140|
141|.else
142|
143|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
144|.macro saveregs_
145| push edi; push esi; push ebx
146| sub esp, CFRAME_SPACE
131|.endmacro 147|.endmacro
132|.macro restoreregs 148|.macro restoreregs
133| add esp, CFRAME_SPACE 149| add esp, CFRAME_SPACE
134| pop ebx; pop esi; pop edi; pop ebp 150| pop ebx; pop esi; pop edi; pop ebp
135|.endmacro 151|.endmacro
136| 152|
153|.endif
154|
155|.macro saveregs
156| push ebp; saveregs_
157|.endmacro
158|
159|.if WIN
160|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only.
161|.define SAVE_NRES, aword [esp+aword*18]
162|.define SAVE_CFRAME, aword [esp+aword*17]
163|.define SAVE_L, aword [esp+aword*16]
164|//----- 16 byte aligned, ^^^ arguments from C caller
165|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter.
166|.define SAVE_R4, aword [esp+aword*14]
167|.define SAVE_R3, aword [esp+aword*13]
168|.define SAVE_R2, aword [esp+aword*12]
169|//----- 16 byte aligned
170|.define SAVE_R1, aword [esp+aword*11]
171|.define SEH_FUNC, aword [esp+aword*10]
172|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves.
173|.define UNUSED2, aword [esp+aword*8]
174|//----- 16 byte aligned
175|.define UNUSED1, aword [esp+aword*7]
176|.define SAVE_PC, aword [esp+aword*6]
177|.define TMP2, aword [esp+aword*5]
178|.define TMP1, aword [esp+aword*4]
179|//----- 16 byte aligned
180|.define ARG4, aword [esp+aword*3]
181|.define ARG3, aword [esp+aword*2]
182|.define ARG2, aword [esp+aword*1]
183|.define ARG1, aword [esp] //<-- esp while in interpreter.
184|//----- 16 byte aligned, ^^^ arguments for C callee
185|.else
137|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. 186|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
138|.define SAVE_NRES, aword [esp+aword*14] 187|.define SAVE_NRES, aword [esp+aword*14]
139|.define SAVE_CFRAME, aword [esp+aword*13] 188|.define SAVE_CFRAME, aword [esp+aword*13]
@@ -154,6 +203,7 @@
154|.define ARG2, aword [esp+aword*1] 203|.define ARG2, aword [esp+aword*1]
155|.define ARG1, aword [esp] //<-- esp while in interpreter. 204|.define ARG1, aword [esp] //<-- esp while in interpreter.
156|//----- 16 byte aligned, ^^^ arguments for C callee 205|//----- 16 byte aligned, ^^^ arguments for C callee
206|.endif
157| 207|
158|// FPARGx overlaps ARGx and ARG(x+1) on x86. 208|// FPARGx overlaps ARGx and ARG(x+1) on x86.
159|.define FPARG3, qword [esp+qword*1] 209|.define FPARG3, qword [esp+qword*1]
@@ -389,7 +439,6 @@
389| fpop 439| fpop
390|.endmacro 440|.endmacro
391| 441|
392|.macro fdup; fld st0; .endmacro
393|.macro fpop1; fstp st1; .endmacro 442|.macro fpop1; fstp st1; .endmacro
394| 443|
395|// Synthesize SSE FP constants. 444|// Synthesize SSE FP constants.
@@ -555,6 +604,10 @@ static void build_subroutines(BuildCtx *ctx)
555 |.else 604 |.else
556 | mov eax, FCARG2 // Error return status for vm_pcall. 605 | mov eax, FCARG2 // Error return status for vm_pcall.
557 | mov esp, FCARG1 606 | mov esp, FCARG1
607 |.if WIN
608 | lea FCARG1, SEH_NEXT
609 | fs; mov [0], FCARG1
610 |.endif
558 |.endif 611 |.endif
559 |->vm_unwind_c_eh: // Landing pad for external unwinder. 612 |->vm_unwind_c_eh: // Landing pad for external unwinder.
560 | mov L:RB, SAVE_L 613 | mov L:RB, SAVE_L
@@ -578,6 +631,10 @@ static void build_subroutines(BuildCtx *ctx)
578 |.else 631 |.else
579 | and FCARG1, CFRAME_RAWMASK 632 | and FCARG1, CFRAME_RAWMASK
580 | mov esp, FCARG1 633 | mov esp, FCARG1
634 |.if WIN
635 | lea FCARG1, SEH_NEXT
636 | fs; mov [0], FCARG1
637 |.endif
581 |.endif 638 |.endif
582 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 639 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
583 | mov L:RB, SAVE_L 640 | mov L:RB, SAVE_L
@@ -591,6 +648,19 @@ static void build_subroutines(BuildCtx *ctx)
591 | set_vmstate INTERP 648 | set_vmstate INTERP
592 | jmp ->vm_returnc // Increments RD/MULTRES and returns. 649 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
593 | 650 |
651 |.if WIN and not X64
652 |->vm_rtlunwind@16: // Thin layer around RtlUnwind.
653 | // (void *cframe, void *excptrec, void *unwinder, int errcode)
654 | mov [esp], FCARG1 // Return value for RtlUnwind.
655 | push FCARG2 // Exception record for RtlUnwind.
656 | push 0 // Ignored by RtlUnwind.
657 | push dword [FCARG1+CFRAME_OFS_SEH]
658 | call extern RtlUnwind@16 // Violates ABI (clobbers too much).
659 | mov FCARG1, eax
660 | mov FCARG2, [esp+4] // errcode (for vm_unwind_c).
661 | ret // Jump to unwinder.
662 |.endif
663 |
594 |//----------------------------------------------------------------------- 664 |//-----------------------------------------------------------------------
595 |//-- Grow stack for calls ----------------------------------------------- 665 |//-- Grow stack for calls -----------------------------------------------
596 |//----------------------------------------------------------------------- 666 |//-----------------------------------------------------------------------
@@ -646,17 +716,18 @@ static void build_subroutines(BuildCtx *ctx)
646 | lea KBASEa, [esp+CFRAME_RESUME] 716 | lea KBASEa, [esp+CFRAME_RESUME]
647 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 717 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
648 | add DISPATCH, GG_G2DISP 718 | add DISPATCH, GG_G2DISP
649 | mov L:RB->cframe, KBASEa
650 | mov SAVE_PC, RD // Any value outside of bytecode is ok. 719 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
651 | mov SAVE_CFRAME, RDa 720 | mov SAVE_CFRAME, RDa
652 |.if X64 721 |.if X64
653 | mov SAVE_NRES, RD 722 | mov SAVE_NRES, RD
654 | mov SAVE_ERRF, RD 723 | mov SAVE_ERRF, RD
655 |.endif 724 |.endif
725 | mov L:RB->cframe, KBASEa
656 | cmp byte L:RB->status, RDL 726 | cmp byte L:RB->status, RDL
657 | je >3 // Initial resume (like a call). 727 | je >2 // Initial resume (like a call).
658 | 728 |
659 | // Resume after yield (like a return). 729 | // Resume after yield (like a return).
730 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
660 | set_vmstate INTERP 731 | set_vmstate INTERP
661 | mov byte L:RB->status, RDL 732 | mov byte L:RB->status, RDL
662 | mov BASE, L:RB->base 733 | mov BASE, L:RB->base
@@ -696,20 +767,19 @@ static void build_subroutines(BuildCtx *ctx)
696 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! 767 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
697 |.endif 768 |.endif
698 | 769 |
770 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
699 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 771 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
700 | mov SAVE_CFRAME, KBASEa 772 | mov SAVE_CFRAME, KBASEa
701 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. 773 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
774 | add DISPATCH, GG_G2DISP
702 |.if X64 775 |.if X64
703 | mov L:RB->cframe, rsp 776 | mov L:RB->cframe, rsp
704 |.else 777 |.else
705 | mov L:RB->cframe, esp 778 | mov L:RB->cframe, esp
706 |.endif 779 |.endif
707 | 780 |
708 |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype). 781 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
709 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 782 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
710 | add DISPATCH, GG_G2DISP
711 |
712 |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype).
713 | set_vmstate INTERP 783 | set_vmstate INTERP
714 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). 784 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
715 | add PC, RA 785 | add PC, RA
@@ -747,14 +817,17 @@ static void build_subroutines(BuildCtx *ctx)
747 | 817 |
748 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). 818 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
749 | sub KBASE, L:RB->top 819 | sub KBASE, L:RB->top
820 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
750 | mov SAVE_ERRF, 0 // No error function. 821 | mov SAVE_ERRF, 0 // No error function.
751 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. 822 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
823 | add DISPATCH, GG_G2DISP
752 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). 824 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
753 | 825 |
754 |.if X64 826 |.if X64
755 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 827 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
756 | mov SAVE_CFRAME, KBASEa 828 | mov SAVE_CFRAME, KBASEa
757 | mov L:RB->cframe, rsp 829 | mov L:RB->cframe, rsp
830 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
758 | 831 |
759 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) 832 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
760 |.else 833 |.else
@@ -765,6 +838,7 @@ static void build_subroutines(BuildCtx *ctx)
765 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. 838 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
766 | mov SAVE_CFRAME, KBASE 839 | mov SAVE_CFRAME, KBASE
767 | mov L:RB->cframe, esp 840 | mov L:RB->cframe, esp
841 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
768 | 842 |
769 | call BASE // (lua_State *L, lua_CFunction func, void *ud) 843 | call BASE // (lua_State *L, lua_CFunction func, void *ud)
770 |.endif 844 |.endif
@@ -872,13 +946,9 @@ static void build_subroutines(BuildCtx *ctx)
872 |.if DUALNUM 946 |.if DUALNUM
873 | mov TMP2, LJ_TISNUM 947 | mov TMP2, LJ_TISNUM
874 | mov TMP1, RC 948 | mov TMP1, RC
875 |.elif SSE 949 |.else
876 | cvtsi2sd xmm0, RC 950 | cvtsi2sd xmm0, RC
877 | movsd TMPQ, xmm0 951 | movsd TMPQ, xmm0
878 |.else
879 | mov ARG4, RC
880 | fild ARG4
881 | fstp TMPQ
882 |.endif 952 |.endif
883 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 953 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
884 | jmp >1 954 | jmp >1
@@ -932,6 +1002,19 @@ static void build_subroutines(BuildCtx *ctx)
932 | mov NARGS:RD, 2+1 // 2 args for func(t, k). 1002 | mov NARGS:RD, 2+1 // 2 args for func(t, k).
933 | jmp ->vm_call_dispatch_f 1003 | jmp ->vm_call_dispatch_f
934 | 1004 |
1005 |->vmeta_tgetr:
1006 | mov FCARG1, TAB:RB
1007 | mov RB, BASE // Save BASE.
1008 | mov FCARG2, RC // Caveat: FCARG2 == BASE
1009 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
1010 | // cTValue * or NULL returned in eax (RC).
1011 | movzx RA, PC_RA
1012 | mov BASE, RB // Restore BASE.
1013 | test RC, RC
1014 | jnz ->BC_TGETR_Z
1015 | mov dword [BASE+RA*8+4], LJ_TNIL
1016 | jmp ->BC_TGETR2_Z
1017 |
935 |//----------------------------------------------------------------------- 1018 |//-----------------------------------------------------------------------
936 | 1019 |
937 |->vmeta_tsets: 1020 |->vmeta_tsets:
@@ -951,13 +1034,9 @@ static void build_subroutines(BuildCtx *ctx)
951 |.if DUALNUM 1034 |.if DUALNUM
952 | mov TMP2, LJ_TISNUM 1035 | mov TMP2, LJ_TISNUM
953 | mov TMP1, RC 1036 | mov TMP1, RC
954 |.elif SSE 1037 |.else
955 | cvtsi2sd xmm0, RC 1038 | cvtsi2sd xmm0, RC
956 | movsd TMPQ, xmm0 1039 | movsd TMPQ, xmm0
957 |.else
958 | mov ARG4, RC
959 | fild ARG4
960 | fstp TMPQ
961 |.endif 1040 |.endif
962 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 1041 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
963 | jmp >1 1042 | jmp >1
@@ -1023,6 +1102,33 @@ static void build_subroutines(BuildCtx *ctx)
1023 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). 1102 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
1024 | jmp ->vm_call_dispatch_f 1103 | jmp ->vm_call_dispatch_f
1025 | 1104 |
1105 |->vmeta_tsetr:
1106 |.if X64WIN
1107 | mov L:CARG1d, SAVE_L
1108 | mov CARG3d, RC
1109 | mov L:CARG1d->base, BASE
1110 | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
1111 |.elif X64
1112 | mov L:CARG1d, SAVE_L
1113 | mov CARG2d, TAB:RB
1114 | mov L:CARG1d->base, BASE
1115 | mov RB, BASE // Save BASE.
1116 | mov CARG3d, RC // Caveat: CARG3d == BASE.
1117 |.else
1118 | mov L:RA, SAVE_L
1119 | mov ARG2, TAB:RB
1120 | mov RB, BASE // Save BASE.
1121 | mov ARG3, RC
1122 | mov ARG1, L:RA
1123 | mov L:RA->base, BASE
1124 |.endif
1125 | mov SAVE_PC, PC
1126 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1127 | // TValue * returned in eax (RC).
1128 | movzx RA, PC_RA
1129 | mov BASE, RB // Restore BASE.
1130 | jmp ->BC_TSETR_Z
1131 |
1026 |//-- Comparison metamethods --------------------------------------------- 1132 |//-- Comparison metamethods ---------------------------------------------
1027 | 1133 |
1028 |->vmeta_comp: 1134 |->vmeta_comp:
@@ -1117,6 +1223,26 @@ static void build_subroutines(BuildCtx *ctx)
1117 | jmp <3 1223 | jmp <3
1118 |.endif 1224 |.endif
1119 | 1225 |
1226 |->vmeta_istype:
1227 |.if X64
1228 | mov L:RB, SAVE_L
1229 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1230 | mov CARG2d, RA
1231 | movzx CARG3d, PC_RD
1232 | mov L:CARG1d, L:RB
1233 |.else
1234 | movzx RD, PC_RD
1235 | mov ARG2, RA
1236 | mov L:RB, SAVE_L
1237 | mov ARG3, RD
1238 | mov ARG1, L:RB
1239 | mov L:RB->base, BASE
1240 |.endif
1241 | mov SAVE_PC, PC
1242 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1243 | mov BASE, L:RB->base
1244 | jmp <6
1245 |
1120 |//-- Arithmetic metamethods --------------------------------------------- 1246 |//-- Arithmetic metamethods ---------------------------------------------
1121 | 1247 |
1122 |->vmeta_arith_vno: 1248 |->vmeta_arith_vno:
@@ -1289,19 +1415,6 @@ static void build_subroutines(BuildCtx *ctx)
1289 | cmp NARGS:RD, 2+1; jb ->fff_fallback 1415 | cmp NARGS:RD, 2+1; jb ->fff_fallback
1290 |.endmacro 1416 |.endmacro
1291 | 1417 |
1292 |.macro .ffunc_n, name
1293 | .ffunc_1 name
1294 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1295 | fld qword [BASE]
1296 |.endmacro
1297 |
1298 |.macro .ffunc_n, name, op
1299 | .ffunc_1 name
1300 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1301 | op
1302 | fld qword [BASE]
1303 |.endmacro
1304 |
1305 |.macro .ffunc_nsse, name, op 1418 |.macro .ffunc_nsse, name, op
1306 | .ffunc_1 name 1419 | .ffunc_1 name
1307 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1420 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
@@ -1312,14 +1425,6 @@ static void build_subroutines(BuildCtx *ctx)
1312 | .ffunc_nsse name, movsd 1425 | .ffunc_nsse name, movsd
1313 |.endmacro 1426 |.endmacro
1314 | 1427 |
1315 |.macro .ffunc_nn, name
1316 | .ffunc_2 name
1317 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1318 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
1319 | fld qword [BASE]
1320 | fld qword [BASE+8]
1321 |.endmacro
1322 |
1323 |.macro .ffunc_nnsse, name 1428 |.macro .ffunc_nnsse, name
1324 | .ffunc_2 name 1429 | .ffunc_2 name
1325 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1430 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
@@ -1525,11 +1630,7 @@ static void build_subroutines(BuildCtx *ctx)
1525 |.else 1630 |.else
1526 | jae ->fff_fallback 1631 | jae ->fff_fallback
1527 |.endif 1632 |.endif
1528 |.if SSE
1529 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 1633 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1530 |.else
1531 | fld qword [BASE]; jmp ->fff_resn
1532 |.endif
1533 | 1634 |
1534 |.ffunc_1 tostring 1635 |.ffunc_1 tostring
1535 | // Only handles the string or number case inline. 1636 | // Only handles the string or number case inline.
@@ -1554,9 +1655,9 @@ static void build_subroutines(BuildCtx *ctx)
1554 |.endif 1655 |.endif
1555 | mov L:FCARG1, L:RB 1656 | mov L:FCARG1, L:RB
1556 |.if DUALNUM 1657 |.if DUALNUM
1557 | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) 1658 | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o)
1558 |.else 1659 |.else
1559 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) 1660 | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np)
1560 |.endif 1661 |.endif
1561 | // GCstr returned in eax (RD). 1662 | // GCstr returned in eax (RD).
1562 | mov BASE, L:RB->base 1663 | mov BASE, L:RB->base
@@ -1647,19 +1748,12 @@ static void build_subroutines(BuildCtx *ctx)
1647 | add RD, 1 1748 | add RD, 1
1648 | mov dword [BASE-4], LJ_TISNUM 1749 | mov dword [BASE-4], LJ_TISNUM
1649 | mov dword [BASE-8], RD 1750 | mov dword [BASE-8], RD
1650 |.elif SSE 1751 |.else
1651 | movsd xmm0, qword [BASE+8] 1752 | movsd xmm0, qword [BASE+8]
1652 | sseconst_1 xmm1, RBa 1753 | sseconst_1 xmm1, RBa
1653 | addsd xmm0, xmm1 1754 | addsd xmm0, xmm1
1654 | cvtsd2si RD, xmm0 1755 | cvttsd2si RD, xmm0
1655 | movsd qword [BASE-8], xmm0 1756 | movsd qword [BASE-8], xmm0
1656 |.else
1657 | fld qword [BASE+8]
1658 | fld1
1659 | faddp st1
1660 | fist ARG1
1661 | fstp qword [BASE-8]
1662 | mov RD, ARG1
1663 |.endif 1757 |.endif
1664 | mov TAB:RB, [BASE] 1758 | mov TAB:RB, [BASE]
1665 | cmp RD, TAB:RB->asize; jae >2 // Not in array part? 1759 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
@@ -1706,12 +1800,9 @@ static void build_subroutines(BuildCtx *ctx)
1706 |.if DUALNUM 1800 |.if DUALNUM
1707 | mov dword [BASE+12], LJ_TISNUM 1801 | mov dword [BASE+12], LJ_TISNUM
1708 | mov dword [BASE+8], 0 1802 | mov dword [BASE+8], 0
1709 |.elif SSE 1803 |.else
1710 | xorps xmm0, xmm0 1804 | xorps xmm0, xmm0
1711 | movsd qword [BASE+8], xmm0 1805 | movsd qword [BASE+8], xmm0
1712 |.else
1713 | fldz
1714 | fstp qword [BASE+8]
1715 |.endif 1806 |.endif
1716 | mov RD, 1+3 1807 | mov RD, 1+3
1717 | jmp ->fff_res 1808 | jmp ->fff_res
@@ -1818,7 +1909,6 @@ static void build_subroutines(BuildCtx *ctx)
1818 | mov ARG3, RA 1909 | mov ARG3, RA
1819 |.endif 1910 |.endif
1820 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) 1911 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1821 | set_vmstate INTERP
1822 | 1912 |
1823 | mov L:RB, SAVE_L 1913 | mov L:RB, SAVE_L
1824 |.if X64 1914 |.if X64
@@ -1827,6 +1917,9 @@ static void build_subroutines(BuildCtx *ctx)
1827 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. 1917 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
1828 |.endif 1918 |.endif
1829 | mov BASE, L:RB->base 1919 | mov BASE, L:RB->base
1920 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1921 | set_vmstate INTERP
1922 |
1830 | cmp eax, LUA_YIELD 1923 | cmp eax, LUA_YIELD
1831 | ja >8 1924 | ja >8
1832 |4: 1925 |4:
@@ -1941,12 +2034,10 @@ static void build_subroutines(BuildCtx *ctx)
1941 |->fff_resi: // Dummy. 2034 |->fff_resi: // Dummy.
1942 |.endif 2035 |.endif
1943 | 2036 |
1944 |.if SSE
1945 |->fff_resn: 2037 |->fff_resn:
1946 | mov PC, [BASE-4] 2038 | mov PC, [BASE-4]
1947 | fstp qword [BASE-8] 2039 | fstp qword [BASE-8]
1948 | jmp ->fff_res1 2040 | jmp ->fff_res1
1949 |.endif
1950 | 2041 |
1951 | .ffunc_1 math_abs 2042 | .ffunc_1 math_abs
1952 |.if DUALNUM 2043 |.if DUALNUM
@@ -1970,8 +2061,6 @@ static void build_subroutines(BuildCtx *ctx)
1970 |.else 2061 |.else
1971 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2062 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1972 |.endif 2063 |.endif
1973 |
1974 |.if SSE
1975 | movsd xmm0, qword [BASE] 2064 | movsd xmm0, qword [BASE]
1976 | sseconst_abs xmm1, RDa 2065 | sseconst_abs xmm1, RDa
1977 | andps xmm0, xmm1 2066 | andps xmm0, xmm1
@@ -1979,15 +2068,6 @@ static void build_subroutines(BuildCtx *ctx)
1979 | mov PC, [BASE-4] 2068 | mov PC, [BASE-4]
1980 | movsd qword [BASE-8], xmm0 2069 | movsd qword [BASE-8], xmm0
1981 | // fallthrough 2070 | // fallthrough
1982 |.else
1983 | fld qword [BASE]
1984 | fabs
1985 | // fallthrough
1986 |->fff_resxmm0: // Dummy.
1987 |->fff_resn:
1988 | mov PC, [BASE-4]
1989 | fstp qword [BASE-8]
1990 |.endif
1991 | 2071 |
1992 |->fff_res1: 2072 |->fff_res1:
1993 | mov RD, 1+1 2073 | mov RD, 1+1
@@ -2014,6 +2094,12 @@ static void build_subroutines(BuildCtx *ctx)
2014 | mov RAa, -8 // Results start at BASE+RA = BASE-8. 2094 | mov RAa, -8 // Results start at BASE+RA = BASE-8.
2015 | jmp ->vm_return 2095 | jmp ->vm_return
2016 | 2096 |
2097 |.if X64
2098 |.define fff_resfp, fff_resxmm0
2099 |.else
2100 |.define fff_resfp, fff_resn
2101 |.endif
2102 |
2017 |.macro math_round, func 2103 |.macro math_round, func
2018 | .ffunc math_ .. func 2104 | .ffunc math_ .. func
2019 |.if DUALNUM 2105 |.if DUALNUM
@@ -2024,107 +2110,75 @@ static void build_subroutines(BuildCtx *ctx)
2024 |.else 2110 |.else
2025 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2111 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2026 |.endif 2112 |.endif
2027 |.if SSE
2028 | movsd xmm0, qword [BASE] 2113 | movsd xmm0, qword [BASE]
2029 | call ->vm_ .. func 2114 | call ->vm_ .. func .. _sse
2030 | .if DUALNUM 2115 |.if DUALNUM
2031 | cvtsd2si RB, xmm0 2116 | cvttsd2si RB, xmm0
2032 | cmp RB, 0x80000000 2117 | cmp RB, 0x80000000
2033 | jne ->fff_resi 2118 | jne ->fff_resi
2034 | cvtsi2sd xmm1, RB 2119 | cvtsi2sd xmm1, RB
2035 | ucomisd xmm0, xmm1 2120 | ucomisd xmm0, xmm1
2036 | jp ->fff_resxmm0 2121 | jp ->fff_resxmm0
2037 | je ->fff_resi 2122 | je ->fff_resi
2038 | .endif
2039 | jmp ->fff_resxmm0
2040 |.else
2041 | fld qword [BASE]
2042 | call ->vm_ .. func
2043 | .if DUALNUM
2044 | fist ARG1
2045 | mov RB, ARG1
2046 | cmp RB, 0x80000000; jne >2
2047 | fdup
2048 | fild ARG1
2049 | fcomparepp
2050 | jp ->fff_resn
2051 | jne ->fff_resn
2052 |2:
2053 | fpop
2054 | jmp ->fff_resi
2055 | .else
2056 | jmp ->fff_resn
2057 | .endif
2058 |.endif 2123 |.endif
2124 | jmp ->fff_resxmm0
2059 |.endmacro 2125 |.endmacro
2060 | 2126 |
2061 | math_round floor 2127 | math_round floor
2062 | math_round ceil 2128 | math_round ceil
2063 | 2129 |
2064 |.if SSE
2065 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 2130 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
2066 |.else
2067 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
2068 |.endif
2069 | 2131 |
2070 |.ffunc math_log 2132 |.ffunc math_log
2071 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 2133 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
2072 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2134 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2073 | fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn 2135 | movsd xmm0, qword [BASE]
2074 | 2136 |.if not X64
2075 |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn 2137 | movsd FPARG1, xmm0
2076 |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn 2138 |.endif
2077 | 2139 | mov RB, BASE
2078 |.ffunc_n math_sin; fsin; jmp ->fff_resn 2140 | call extern log
2079 |.ffunc_n math_cos; fcos; jmp ->fff_resn 2141 | mov BASE, RB
2080 |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn 2142 | jmp ->fff_resfp
2081 |
2082 |.ffunc_n math_asin
2083 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan
2084 | jmp ->fff_resn
2085 |.ffunc_n math_acos
2086 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan
2087 | jmp ->fff_resn
2088 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
2089 | 2143 |
2090 |.macro math_extern, func 2144 |.macro math_extern, func
2091 |.if SSE
2092 | .ffunc_nsse math_ .. func 2145 | .ffunc_nsse math_ .. func
2093 | .if not X64 2146 |.if not X64
2094 | movsd FPARG1, xmm0 2147 | movsd FPARG1, xmm0
2095 | .endif
2096 |.else
2097 | .ffunc_n math_ .. func
2098 | fstp FPARG1
2099 |.endif 2148 |.endif
2100 | mov RB, BASE 2149 | mov RB, BASE
2101 | call extern lj_vm_ .. func 2150 | call extern func
2102 | mov BASE, RB 2151 | mov BASE, RB
2103 | .if X64 2152 | jmp ->fff_resfp
2104 | jmp ->fff_resxmm0 2153 |.endmacro
2105 | .else 2154 |
2106 | jmp ->fff_resn 2155 |.macro math_extern2, func
2107 | .endif 2156 | .ffunc_nnsse math_ .. func
2157 |.if not X64
2158 | movsd FPARG1, xmm0
2159 | movsd FPARG3, xmm1
2160 |.endif
2161 | mov RB, BASE
2162 | call extern func
2163 | mov BASE, RB
2164 | jmp ->fff_resfp
2108 |.endmacro 2165 |.endmacro
2109 | 2166 |
2167 | math_extern log10
2168 | math_extern exp
2169 | math_extern sin
2170 | math_extern cos
2171 | math_extern tan
2172 | math_extern asin
2173 | math_extern acos
2174 | math_extern atan
2110 | math_extern sinh 2175 | math_extern sinh
2111 | math_extern cosh 2176 | math_extern cosh
2112 | math_extern tanh 2177 | math_extern tanh
2178 | math_extern2 pow
2179 | math_extern2 atan2
2180 | math_extern2 fmod
2113 | 2181 |
2114 |->ff_math_deg:
2115 |.if SSE
2116 |.ffunc_nsse math_rad
2117 | mov CFUNC:RB, [BASE-8]
2118 | mulsd xmm0, qword CFUNC:RB->upvalue[0]
2119 | jmp ->fff_resxmm0
2120 |.else
2121 |.ffunc_n math_rad
2122 | mov CFUNC:RB, [BASE-8]
2123 | fmul qword CFUNC:RB->upvalue[0]
2124 | jmp ->fff_resn
2125 |.endif
2126 |
2127 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
2128 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn 2182 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
2129 | 2183 |
2130 |.ffunc_1 math_frexp 2184 |.ffunc_1 math_frexp
@@ -2139,65 +2193,34 @@ static void build_subroutines(BuildCtx *ctx)
2139 | cmp RB, 0x00200000; jb >4 2193 | cmp RB, 0x00200000; jb >4
2140 |1: 2194 |1:
2141 | shr RB, 21; sub RB, RC // Extract and unbias exponent. 2195 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
2142 |.if SSE
2143 | cvtsi2sd xmm0, RB 2196 | cvtsi2sd xmm0, RB
2144 |.else
2145 | mov TMP1, RB; fild TMP1
2146 |.endif
2147 | mov RB, [BASE-4] 2197 | mov RB, [BASE-4]
2148 | and RB, 0x800fffff // Mask off exponent. 2198 | and RB, 0x800fffff // Mask off exponent.
2149 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. 2199 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
2150 | mov [BASE-4], RB 2200 | mov [BASE-4], RB
2151 |2: 2201 |2:
2152 |.if SSE
2153 | movsd qword [BASE], xmm0 2202 | movsd qword [BASE], xmm0
2154 |.else
2155 | fstp qword [BASE]
2156 |.endif
2157 | mov RD, 1+2 2203 | mov RD, 1+2
2158 | jmp ->fff_res 2204 | jmp ->fff_res
2159 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. 2205 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
2160 |.if SSE
2161 | xorps xmm0, xmm0; jmp <2 2206 | xorps xmm0, xmm0; jmp <2
2162 |.else
2163 | fldz; jmp <2
2164 |.endif
2165 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. 2207 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
2166 |.if SSE
2167 | movsd xmm0, qword [BASE] 2208 | movsd xmm0, qword [BASE]
2168 | sseconst_hi xmm1, RBa, 43500000 // 2^54. 2209 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
2169 | mulsd xmm0, xmm1 2210 | mulsd xmm0, xmm1
2170 | movsd qword [BASE-8], xmm0 2211 | movsd qword [BASE-8], xmm0
2171 |.else
2172 | fld qword [BASE]
2173 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
2174 | fstp qword [BASE-8]
2175 |.endif
2176 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 2212 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2177 | 2213 |
2178 |.if SSE
2179 |.ffunc_nsse math_modf 2214 |.ffunc_nsse math_modf
2180 |.else
2181 |.ffunc_n math_modf
2182 |.endif
2183 | mov RB, [BASE+4] 2215 | mov RB, [BASE+4]
2184 | mov PC, [BASE-4] 2216 | mov PC, [BASE-4]
2185 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? 2217 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
2186 |.if SSE
2187 | movaps xmm4, xmm0 2218 | movaps xmm4, xmm0
2188 | call ->vm_trunc 2219 | call ->vm_trunc_sse
2189 | subsd xmm4, xmm0 2220 | subsd xmm4, xmm0
2190 |1: 2221 |1:
2191 | movsd qword [BASE-8], xmm0 2222 | movsd qword [BASE-8], xmm0
2192 | movsd qword [BASE], xmm4 2223 | movsd qword [BASE], xmm4
2193 |.else
2194 | fdup
2195 | call ->vm_trunc
2196 | fsub st1, st0
2197 |1:
2198 | fstp qword [BASE-8]
2199 | fstp qword [BASE]
2200 |.endif
2201 | mov RC, [BASE-4]; mov RB, [BASE+4] 2224 | mov RC, [BASE-4]; mov RB, [BASE+4]
2202 | xor RC, RB; js >3 // Need to adjust sign? 2225 | xor RC, RB; js >3 // Need to adjust sign?
2203 |2: 2226 |2:
@@ -2207,24 +2230,9 @@ static void build_subroutines(BuildCtx *ctx)
2207 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. 2230 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
2208 | jmp <2 2231 | jmp <2
2209 |4: 2232 |4:
2210 |.if SSE
2211 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. 2233 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
2212 |.else
2213 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
2214 |.endif
2215 |
2216 |.ffunc_nnr math_fmod
2217 |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1
2218 | fpop1
2219 | jmp ->fff_resn
2220 | 2234 |
2221 |.if SSE 2235 |.macro math_minmax, name, cmovop, sseop
2222 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
2223 |.else
2224 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
2225 |.endif
2226 |
2227 |.macro math_minmax, name, cmovop, fcmovop, sseop
2228 | .ffunc name 2236 | .ffunc name
2229 | mov RA, 2 2237 | mov RA, 2
2230 | cmp dword [BASE+4], LJ_TISNUM 2238 | cmp dword [BASE+4], LJ_TISNUM
@@ -2241,12 +2249,7 @@ static void build_subroutines(BuildCtx *ctx)
2241 |3: 2249 |3:
2242 | ja ->fff_fallback 2250 | ja ->fff_fallback
2243 | // Convert intermediate result to number and continue below. 2251 | // Convert intermediate result to number and continue below.
2244 |.if SSE
2245 | cvtsi2sd xmm0, RB 2252 | cvtsi2sd xmm0, RB
2246 |.else
2247 | mov TMP1, RB
2248 | fild TMP1
2249 |.endif
2250 | jmp >6 2253 | jmp >6
2251 |4: 2254 |4:
2252 | ja ->fff_fallback 2255 | ja ->fff_fallback
@@ -2254,7 +2257,6 @@ static void build_subroutines(BuildCtx *ctx)
2254 | jae ->fff_fallback 2257 | jae ->fff_fallback
2255 |.endif 2258 |.endif
2256 | 2259 |
2257 |.if SSE
2258 | movsd xmm0, qword [BASE] 2260 | movsd xmm0, qword [BASE]
2259 |5: // Handle numbers or integers. 2261 |5: // Handle numbers or integers.
2260 | cmp RA, RD; jae ->fff_resxmm0 2262 | cmp RA, RD; jae ->fff_resxmm0
@@ -2273,48 +2275,13 @@ static void build_subroutines(BuildCtx *ctx)
2273 | sseop xmm0, xmm1 2275 | sseop xmm0, xmm1
2274 | add RA, 1 2276 | add RA, 1
2275 | jmp <5 2277 | jmp <5
2276 |.else
2277 | fld qword [BASE]
2278 |5: // Handle numbers or integers.
2279 | cmp RA, RD; jae ->fff_resn
2280 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2281 |.if DUALNUM
2282 | jb >6
2283 | ja >9
2284 | fild dword [BASE+RA*8-8]
2285 | jmp >7
2286 |.else
2287 | jae >9
2288 |.endif
2289 |6:
2290 | fld qword [BASE+RA*8-8]
2291 |7:
2292 | fucomi st1; fcmovop st1; fpop1
2293 | add RA, 1
2294 | jmp <5
2295 |.endif
2296 |.endmacro 2278 |.endmacro
2297 | 2279 |
2298 | math_minmax math_min, cmovg, fcmovnbe, minsd 2280 | math_minmax math_min, cmovg, minsd
2299 | math_minmax math_max, cmovl, fcmovbe, maxsd 2281 | math_minmax math_max, cmovl, maxsd
2300 |.if not SSE
2301 |9:
2302 | fpop; jmp ->fff_fallback
2303 |.endif
2304 | 2282 |
2305 |//-- String library ----------------------------------------------------- 2283 |//-- String library -----------------------------------------------------
2306 | 2284 |
2307 |.ffunc_1 string_len
2308 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2309 | mov STR:RB, [BASE]
2310 |.if DUALNUM
2311 | mov RB, dword STR:RB->len; jmp ->fff_resi
2312 |.elif SSE
2313 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2314 |.else
2315 | fild dword STR:RB->len; jmp ->fff_resn
2316 |.endif
2317 |
2318 |.ffunc string_byte // Only handle the 1-arg case here. 2285 |.ffunc string_byte // Only handle the 1-arg case here.
2319 | cmp NARGS:RD, 1+1; jne ->fff_fallback 2286 | cmp NARGS:RD, 1+1; jne ->fff_fallback
2320 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2287 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2325,10 +2292,8 @@ static void build_subroutines(BuildCtx *ctx)
2325 | movzx RB, byte STR:RB[1] 2292 | movzx RB, byte STR:RB[1]
2326 |.if DUALNUM 2293 |.if DUALNUM
2327 | jmp ->fff_resi 2294 | jmp ->fff_resi
2328 |.elif SSE
2329 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2330 |.else 2295 |.else
2331 | mov TMP1, RB; fild TMP1; jmp ->fff_resn 2296 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2332 |.endif 2297 |.endif
2333 | 2298 |
2334 |.ffunc string_char // Only handle the 1-arg case here. 2299 |.ffunc string_char // Only handle the 1-arg case here.
@@ -2340,16 +2305,11 @@ static void build_subroutines(BuildCtx *ctx)
2340 | mov RB, dword [BASE] 2305 | mov RB, dword [BASE]
2341 | cmp RB, 255; ja ->fff_fallback 2306 | cmp RB, 255; ja ->fff_fallback
2342 | mov TMP2, RB 2307 | mov TMP2, RB
2343 |.elif SSE 2308 |.else
2344 | jae ->fff_fallback 2309 | jae ->fff_fallback
2345 | cvttsd2si RB, qword [BASE] 2310 | cvttsd2si RB, qword [BASE]
2346 | cmp RB, 255; ja ->fff_fallback 2311 | cmp RB, 255; ja ->fff_fallback
2347 | mov TMP2, RB 2312 | mov TMP2, RB
2348 |.else
2349 | jae ->fff_fallback
2350 | fld qword [BASE]
2351 | fistp TMP2
2352 | cmp TMP2, 255; ja ->fff_fallback
2353 |.endif 2313 |.endif
2354 |.if X64 2314 |.if X64
2355 | mov TMP3, 1 2315 | mov TMP3, 1
@@ -2370,6 +2330,7 @@ static void build_subroutines(BuildCtx *ctx)
2370 |.endif 2330 |.endif
2371 | mov SAVE_PC, PC 2331 | mov SAVE_PC, PC
2372 | call extern lj_str_new // (lua_State *L, char *str, size_t l) 2332 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
2333 |->fff_resstr:
2373 | // GCstr * returned in eax (RD). 2334 | // GCstr * returned in eax (RD).
2374 | mov BASE, L:RB->base 2335 | mov BASE, L:RB->base
2375 | mov PC, [BASE-4] 2336 | mov PC, [BASE-4]
@@ -2387,14 +2348,10 @@ static void build_subroutines(BuildCtx *ctx)
2387 | jne ->fff_fallback 2348 | jne ->fff_fallback
2388 | mov RB, dword [BASE+16] 2349 | mov RB, dword [BASE+16]
2389 | mov TMP2, RB 2350 | mov TMP2, RB
2390 |.elif SSE 2351 |.else
2391 | jae ->fff_fallback 2352 | jae ->fff_fallback
2392 | cvttsd2si RB, qword [BASE+16] 2353 | cvttsd2si RB, qword [BASE+16]
2393 | mov TMP2, RB 2354 | mov TMP2, RB
2394 |.else
2395 | jae ->fff_fallback
2396 | fld qword [BASE+16]
2397 | fistp TMP2
2398 |.endif 2355 |.endif
2399 |1: 2356 |1:
2400 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2357 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2409,12 +2366,8 @@ static void build_subroutines(BuildCtx *ctx)
2409 | mov RB, STR:RB->len 2366 | mov RB, STR:RB->len
2410 |.if DUALNUM 2367 |.if DUALNUM
2411 | mov RA, dword [BASE+8] 2368 | mov RA, dword [BASE+8]
2412 |.elif SSE
2413 | cvttsd2si RA, qword [BASE+8]
2414 |.else 2369 |.else
2415 | fld qword [BASE+8] 2370 | cvttsd2si RA, qword [BASE+8]
2416 | fistp ARG3
2417 | mov RA, ARG3
2418 |.endif 2371 |.endif
2419 | mov RC, TMP2 2372 | mov RC, TMP2
2420 | cmp RB, RC // len < end? (unsigned compare) 2373 | cmp RB, RC // len < end? (unsigned compare)
@@ -2458,136 +2411,34 @@ static void build_subroutines(BuildCtx *ctx)
2458 | xor RC, RC // Zero length. Any ptr in RB is ok. 2411 | xor RC, RC // Zero length. Any ptr in RB is ok.
2459 | jmp <4 2412 | jmp <4
2460 | 2413 |
2461 |.ffunc string_rep // Only handle the 1-char case inline. 2414 |.macro ffstring_op, name
2462 | ffgccheck 2415 | .ffunc_1 string_ .. name
2463 | cmp NARGS:RD, 2+1; jne ->fff_fallback // Exactly 2 arguments.
2464 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2465 | cmp dword [BASE+12], LJ_TISNUM
2466 | mov STR:RB, [BASE]
2467 |.if DUALNUM
2468 | jne ->fff_fallback
2469 | mov RC, dword [BASE+8]
2470 |.elif SSE
2471 | jae ->fff_fallback
2472 | cvttsd2si RC, qword [BASE+8]
2473 |.else
2474 | jae ->fff_fallback
2475 | fld qword [BASE+8]
2476 | fistp TMP2
2477 | mov RC, TMP2
2478 |.endif
2479 | test RC, RC
2480 | jle ->fff_emptystr // Count <= 0? (or non-int)
2481 | cmp dword STR:RB->len, 1
2482 | jb ->fff_emptystr // Zero length string?
2483 | jne ->fff_fallback_2 // Fallback for > 1-char strings.
2484 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
2485 | movzx RA, byte STR:RB[1]
2486 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2487 |.if X64
2488 | mov TMP3, RC
2489 |.else
2490 | mov ARG3, RC
2491 |.endif
2492 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2493 | mov [RB], RAL
2494 | add RB, 1
2495 | sub RC, 1
2496 | jnz <1
2497 | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2498 | jmp ->fff_newstr
2499 |
2500 |.ffunc_1 string_reverse
2501 | ffgccheck 2416 | ffgccheck
2502 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2417 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2503 | mov STR:RB, [BASE] 2418 | mov L:RB, SAVE_L
2504 | mov RC, STR:RB->len 2419 | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2505 | test RC, RC 2420 | mov L:RB->base, BASE
2506 | jz ->fff_emptystr // Zero length string? 2421 | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
2507 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 2422 | mov RC, SBUF:FCARG1->b
2508 | add RB, #STR 2423 | mov SBUF:FCARG1->L, L:RB
2509 | mov TMP2, PC // Need another temp register. 2424 | mov SBUF:FCARG1->p, RC
2510 |.if X64 2425 | mov SAVE_PC, PC
2511 | mov TMP3, RC 2426 | call extern lj_buf_putstr_ .. name .. @8
2512 |.else 2427 | mov FCARG1, eax
2513 | mov ARG3, RC 2428 | call extern lj_buf_tostr@4
2514 |.endif 2429 | jmp ->fff_resstr
2515 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2516 |1:
2517 | movzx RA, byte [RB]
2518 | add RB, 1
2519 | sub RC, 1
2520 | mov [PC+RC], RAL
2521 | jnz <1
2522 | mov RD, PC
2523 | mov PC, TMP2
2524 | jmp ->fff_newstr
2525 |
2526 |.macro ffstring_case, name, lo, hi
2527 | .ffunc_1 name
2528 | ffgccheck
2529 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2530 | mov STR:RB, [BASE]
2531 | mov RC, STR:RB->len
2532 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2533 | add RB, #STR
2534 | mov TMP2, PC // Need another temp register.
2535 |.if X64
2536 | mov TMP3, RC
2537 |.else
2538 | mov ARG3, RC
2539 |.endif
2540 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2541 | jmp >3
2542 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
2543 | movzx RA, byte [RB+RC]
2544 | cmp RA, lo
2545 | jb >2
2546 | cmp RA, hi
2547 | ja >2
2548 | xor RA, 0x20
2549 |2:
2550 | mov [PC+RC], RAL
2551 |3:
2552 | sub RC, 1
2553 | jns <1
2554 | mov RD, PC
2555 | mov PC, TMP2
2556 | jmp ->fff_newstr
2557 |.endmacro 2430 |.endmacro
2558 | 2431 |
2559 |ffstring_case string_lower, 0x41, 0x5a 2432 |ffstring_op reverse
2560 |ffstring_case string_upper, 0x61, 0x7a 2433 |ffstring_op lower
2561 | 2434 |ffstring_op upper
2562 |//-- Table library ------------------------------------------------------
2563 |
2564 |.ffunc_1 table_getn
2565 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
2566 | mov RB, BASE // Save BASE.
2567 | mov TAB:FCARG1, [BASE]
2568 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2569 | // Length of table returned in eax (RD).
2570 | mov BASE, RB // Restore BASE.
2571 |.if DUALNUM
2572 | mov RB, RD; jmp ->fff_resi
2573 |.elif SSE
2574 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2575 |.else
2576 | mov ARG1, RD; fild ARG1; jmp ->fff_resn
2577 |.endif
2578 | 2435 |
2579 |//-- Bit library -------------------------------------------------------- 2436 |//-- Bit library --------------------------------------------------------
2580 | 2437 |
2581 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
2582 |
2583 |.macro .ffunc_bit, name, kind, fdef 2438 |.macro .ffunc_bit, name, kind, fdef
2584 | fdef name 2439 | fdef name
2585 |.if kind == 2 2440 |.if kind == 2
2586 |.if SSE
2587 | sseconst_tobit xmm1, RBa 2441 | sseconst_tobit xmm1, RBa
2588 |.else
2589 | mov TMP1, TOBIT_BIAS
2590 |.endif
2591 |.endif 2442 |.endif
2592 | cmp dword [BASE+4], LJ_TISNUM 2443 | cmp dword [BASE+4], LJ_TISNUM
2593 |.if DUALNUM 2444 |.if DUALNUM
@@ -2603,24 +2454,12 @@ static void build_subroutines(BuildCtx *ctx)
2603 |.else 2454 |.else
2604 | jae ->fff_fallback 2455 | jae ->fff_fallback
2605 |.endif 2456 |.endif
2606 |.if SSE
2607 | movsd xmm0, qword [BASE] 2457 | movsd xmm0, qword [BASE]
2608 |.if kind < 2 2458 |.if kind < 2
2609 | sseconst_tobit xmm1, RBa 2459 | sseconst_tobit xmm1, RBa
2610 |.endif 2460 |.endif
2611 | addsd xmm0, xmm1 2461 | addsd xmm0, xmm1
2612 | movd RB, xmm0 2462 | movd RB, xmm0
2613 |.else
2614 | fld qword [BASE]
2615 |.if kind < 2
2616 | mov TMP1, TOBIT_BIAS
2617 |.endif
2618 | fadd TMP1
2619 | fstp FPARG1
2620 |.if kind > 0
2621 | mov RB, ARG1
2622 |.endif
2623 |.endif
2624 |2: 2463 |2:
2625 |.endmacro 2464 |.endmacro
2626 | 2465 |
@@ -2629,15 +2468,7 @@ static void build_subroutines(BuildCtx *ctx)
2629 |.endmacro 2468 |.endmacro
2630 | 2469 |
2631 |.ffunc_bit bit_tobit, 0 2470 |.ffunc_bit bit_tobit, 0
2632 |.if DUALNUM or SSE
2633 |.if not SSE
2634 | mov RB, ARG1
2635 |.endif
2636 | jmp ->fff_resbit 2471 | jmp ->fff_resbit
2637 |.else
2638 | fild ARG1
2639 | jmp ->fff_resn
2640 |.endif
2641 | 2472 |
2642 |.macro .ffunc_bit_op, name, ins 2473 |.macro .ffunc_bit_op, name, ins
2643 | .ffunc_bit name, 2 2474 | .ffunc_bit name, 2
@@ -2657,17 +2488,10 @@ static void build_subroutines(BuildCtx *ctx)
2657 |.else 2488 |.else
2658 | jae ->fff_fallback_bit_op 2489 | jae ->fff_fallback_bit_op
2659 |.endif 2490 |.endif
2660 |.if SSE
2661 | movsd xmm0, qword [RD] 2491 | movsd xmm0, qword [RD]
2662 | addsd xmm0, xmm1 2492 | addsd xmm0, xmm1
2663 | movd RA, xmm0 2493 | movd RA, xmm0
2664 | ins RB, RA 2494 | ins RB, RA
2665 |.else
2666 | fld qword [RD]
2667 | fadd TMP1
2668 | fstp FPARG1
2669 | ins RB, ARG1
2670 |.endif
2671 | sub RD, 8 2495 | sub RD, 8
2672 | jmp <1 2496 | jmp <1
2673 |.endmacro 2497 |.endmacro
@@ -2684,15 +2508,10 @@ static void build_subroutines(BuildCtx *ctx)
2684 | not RB 2508 | not RB
2685 |.if DUALNUM 2509 |.if DUALNUM
2686 | jmp ->fff_resbit 2510 | jmp ->fff_resbit
2687 |.elif SSE 2511 |.else
2688 |->fff_resbit: 2512 |->fff_resbit:
2689 | cvtsi2sd xmm0, RB 2513 | cvtsi2sd xmm0, RB
2690 | jmp ->fff_resxmm0 2514 | jmp ->fff_resxmm0
2691 |.else
2692 |->fff_resbit:
2693 | mov ARG1, RB
2694 | fild ARG1
2695 | jmp ->fff_resn
2696 |.endif 2515 |.endif
2697 | 2516 |
2698 |->fff_fallback_bit_op: 2517 |->fff_fallback_bit_op:
@@ -2705,22 +2524,13 @@ static void build_subroutines(BuildCtx *ctx)
2705 | // Note: no inline conversion from number for 2nd argument! 2524 | // Note: no inline conversion from number for 2nd argument!
2706 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback 2525 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2707 | mov RA, dword [BASE+8] 2526 | mov RA, dword [BASE+8]
2708 |.elif SSE 2527 |.else
2709 | .ffunc_nnsse name 2528 | .ffunc_nnsse name
2710 | sseconst_tobit xmm2, RBa 2529 | sseconst_tobit xmm2, RBa
2711 | addsd xmm0, xmm2 2530 | addsd xmm0, xmm2
2712 | addsd xmm1, xmm2 2531 | addsd xmm1, xmm2
2713 | movd RB, xmm0 2532 | movd RB, xmm0
2714 | movd RA, xmm1 2533 | movd RA, xmm1
2715 |.else
2716 | .ffunc_nn name
2717 | mov TMP1, TOBIT_BIAS
2718 | fadd TMP1
2719 | fstp FPARG3
2720 | fadd TMP1
2721 | fstp FPARG1
2722 | mov RA, ARG3
2723 | mov RB, ARG1
2724 |.endif 2534 |.endif
2725 | ins RB, cl // Assumes RA is ecx. 2535 | ins RB, cl // Assumes RA is ecx.
2726 | jmp ->fff_resbit 2536 | jmp ->fff_resbit
@@ -2854,7 +2664,7 @@ static void build_subroutines(BuildCtx *ctx)
2854 | mov FCARG2, PC // Caveat: FCARG2 == BASE 2664 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2855 | mov FCARG1, L:RB 2665 | mov FCARG1, L:RB
2856 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. 2666 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2857 | call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc) 2667 | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc)
2858 |3: 2668 |3:
2859 | mov BASE, L:RB->base 2669 | mov BASE, L:RB->base
2860 |4: 2670 |4:
@@ -2925,6 +2735,79 @@ static void build_subroutines(BuildCtx *ctx)
2925 | add NARGS:RD, 1 2735 | add NARGS:RD, 1
2926 | jmp RBa 2736 | jmp RBa
2927 | 2737 |
2738 |->cont_stitch: // Trace stitching.
2739 |.if JIT
2740 | // BASE = base, RC = result, RB = mbase
2741 | mov TRACE:RA, [RB-24] // Save previous trace.
2742 | mov TMP1, TRACE:RA
2743 | mov TMP3, DISPATCH // Need one more register.
2744 | mov DISPATCH, MULTRES
2745 | movzx RA, PC_RA
2746 | lea RA, [BASE+RA*8] // Call base.
2747 | sub DISPATCH, 1
2748 | jz >2
2749 |1: // Move results down.
2750 |.if X64
2751 | mov RBa, [RC]
2752 | mov [RA], RBa
2753 |.else
2754 | mov RB, [RC]
2755 | mov [RA], RB
2756 | mov RB, [RC+4]
2757 | mov [RA+4], RB
2758 |.endif
2759 | add RC, 8
2760 | add RA, 8
2761 | sub DISPATCH, 1
2762 | jnz <1
2763 |2:
2764 | movzx RC, PC_RA
2765 | movzx RB, PC_RB
2766 | add RC, RB
2767 | lea RC, [BASE+RC*8-8]
2768 |3:
2769 | cmp RC, RA
2770 | ja >9 // More results wanted?
2771 |
2772 | mov DISPATCH, TMP3
2773 | mov TRACE:RD, TMP1 // Get previous trace.
2774 | movzx RB, word TRACE:RD->traceno
2775 | movzx RD, word TRACE:RD->link
2776 | cmp RD, RB
2777 | je ->cont_nop // Blacklisted.
2778 | test RD, RD
2779 | jne =>BC_JLOOP // Jump to stitched trace.
2780 |
2781 | // Stitch a new trace to the previous trace.
2782 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2783 | mov L:RB, SAVE_L
2784 | mov L:RB->base, BASE
2785 | mov FCARG2, PC
2786 | lea FCARG1, [DISPATCH+GG_DISP2J]
2787 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2788 | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc)
2789 | mov BASE, L:RB->base
2790 | jmp ->cont_nop
2791 |
2792 |9: // Fill up results with nil.
2793 | mov dword [RA+4], LJ_TNIL
2794 | add RA, 8
2795 | jmp <3
2796 |.endif
2797 |
2798 |->vm_profhook: // Dispatch target for profiler hook.
2799#if LJ_HASPROFILE
2800 | mov L:RB, SAVE_L
2801 | mov L:RB->base, BASE
2802 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2803 | mov FCARG1, L:RB
2804 | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc)
2805 | mov BASE, L:RB->base
2806 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2807 | sub PC, 4
2808 | jmp ->cont_nop
2809#endif
2810 |
2928 |//----------------------------------------------------------------------- 2811 |//-----------------------------------------------------------------------
2929 |//-- Trace exit handler ------------------------------------------------- 2812 |//-- Trace exit handler -------------------------------------------------
2930 |//----------------------------------------------------------------------- 2813 |//-----------------------------------------------------------------------
@@ -2977,10 +2860,9 @@ static void build_subroutines(BuildCtx *ctx)
2977 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 2860 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
2978 |.endif 2861 |.endif
2979 | // Caveat: RB is ebp. 2862 | // Caveat: RB is ebp.
2980 | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)] 2863 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2981 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] 2864 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2982 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa 2865 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2983 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
2984 | mov L:RB->base, BASE 2866 | mov L:RB->base, BASE
2985 |.if X64WIN 2867 |.if X64WIN
2986 | lea CARG2, [rsp+4*8] 2868 | lea CARG2, [rsp+4*8]
@@ -2990,6 +2872,7 @@ static void build_subroutines(BuildCtx *ctx)
2990 | lea FCARG2, [esp+16] 2872 | lea FCARG2, [esp+16]
2991 |.endif 2873 |.endif
2992 | lea FCARG1, [DISPATCH+GG_DISP2J] 2874 | lea FCARG1, [DISPATCH+GG_DISP2J]
2875 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2993 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) 2876 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
2994 | // MULTRES or negated error code returned in eax (RD). 2877 | // MULTRES or negated error code returned in eax (RD).
2995 | mov RAa, L:RB->cframe 2878 | mov RAa, L:RB->cframe
@@ -3036,12 +2919,14 @@ static void build_subroutines(BuildCtx *ctx)
3036 | mov r13, TMPa 2919 | mov r13, TMPa
3037 | mov r12, TMPQ 2920 | mov r12, TMPQ
3038 |.endif 2921 |.endif
3039 | test RD, RD; js >3 // Check for error from exit. 2922 | test RD, RD; js >9 // Check for error from exit.
2923 | mov L:RB, SAVE_L
3040 | mov MULTRES, RD 2924 | mov MULTRES, RD
3041 | mov LFUNC:KBASE, [BASE-8] 2925 | mov LFUNC:KBASE, [BASE-8]
3042 | mov KBASE, LFUNC:KBASE->pc 2926 | mov KBASE, LFUNC:KBASE->pc
3043 | mov KBASE, [KBASE+PC2PROTO(k)] 2927 | mov KBASE, [KBASE+PC2PROTO(k)]
3044 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 2928 | mov L:RB->base, BASE
2929 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
3045 | set_vmstate INTERP 2930 | set_vmstate INTERP
3046 | // Modified copy of ins_next which handles function header dispatch, too. 2931 | // Modified copy of ins_next which handles function header dispatch, too.
3047 | mov RC, [PC] 2932 | mov RC, [PC]
@@ -3050,16 +2935,31 @@ static void build_subroutines(BuildCtx *ctx)
3050 | add PC, 4 2935 | add PC, 4
3051 | shr RC, 16 2936 | shr RC, 16
3052 | cmp OP, BC_FUNCF // Function header? 2937 | cmp OP, BC_FUNCF // Function header?
3053 | jb >2 2938 | jb >3
3054 | mov RC, MULTRES // RC/RD holds nres+1. 2939 | cmp OP, BC_FUNCC+2 // Fast function?
2940 | jae >4
3055 |2: 2941 |2:
2942 | mov RC, MULTRES // RC/RD holds nres+1.
2943 |3:
3056 |.if X64 2944 |.if X64
3057 | jmp aword [DISPATCH+OP*8] 2945 | jmp aword [DISPATCH+OP*8]
3058 |.else 2946 |.else
3059 | jmp aword [DISPATCH+OP*4] 2947 | jmp aword [DISPATCH+OP*4]
3060 |.endif 2948 |.endif
3061 | 2949 |
3062 |3: // Rethrow error from the right C frame. 2950 |4: // Check frame below fast function.
2951 | mov RC, [BASE-4]
2952 | test RC, FRAME_TYPE
2953 | jnz <2 // Trace stitching continuation?
2954 | // Otherwise set KBASE for Lua function below fast function.
2955 | movzx RC, byte [RC-3]
2956 | not RCa
2957 | mov LFUNC:KBASE, [BASE+RC*8-8]
2958 | mov KBASE, LFUNC:KBASE->pc
2959 | mov KBASE, [KBASE+PC2PROTO(k)]
2960 | jmp <2
2961 |
2962 |9: // Rethrow error from the right C frame.
3063 | neg RD 2963 | neg RD
3064 | mov FCARG1, L:RB 2964 | mov FCARG1, L:RB
3065 | mov FCARG2, RD 2965 | mov FCARG2, RD
@@ -3071,27 +2971,18 @@ static void build_subroutines(BuildCtx *ctx)
3071 |//----------------------------------------------------------------------- 2971 |//-----------------------------------------------------------------------
3072 | 2972 |
3073 |// FP value rounding. Called by math.floor/math.ceil fast functions 2973 |// FP value rounding. Called by math.floor/math.ceil fast functions
3074 |// and from JIT code. 2974 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3075 | 2975 |.macro vm_round, name, mode, cond
3076 |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. 2976 |->name:
3077 |.macro vm_round_x87, mode1, mode2 2977 |.if not X64 and cond
3078 | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2. 2978 | movsd xmm0, qword [esp+4]
3079 | mov [esp+8], eax 2979 | call ->name .. _sse
3080 | mov ax, mode1 2980 | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
3081 | or ax, [esp+4] 2981 | fld qword [esp+4]
3082 |.if mode2 ~= 0xffff
3083 | and ax, mode2
3084 |.endif
3085 | mov [esp+6], ax
3086 | fldcw word [esp+6]
3087 | frndint
3088 | fldcw word [esp+4]
3089 | mov eax, [esp+8]
3090 | ret 2982 | ret
3091 |.endmacro 2983 |.endif
3092 | 2984 |
3093 |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. 2985 |->name .. _sse:
3094 |.macro vm_round_sse, mode
3095 | sseconst_abs xmm2, RDa 2986 | sseconst_abs xmm2, RDa
3096 | sseconst_2p52 xmm3, RDa 2987 | sseconst_2p52 xmm3, RDa
3097 | movaps xmm1, xmm0 2988 | movaps xmm1, xmm0
@@ -3127,22 +3018,12 @@ static void build_subroutines(BuildCtx *ctx)
3127 | ret 3018 | ret
3128 |.endmacro 3019 |.endmacro
3129 | 3020 |
3130 |.macro vm_round, name, ssemode, mode1, mode2 3021 | vm_round vm_floor, 0, 1
3131 |->name: 3022 | vm_round vm_ceil, 1, JIT
3132 |.if not SSE 3023 | vm_round vm_trunc, 2, JIT
3133 | vm_round_x87 mode1, mode2
3134 |.endif
3135 |->name .. _sse:
3136 | vm_round_sse ssemode
3137 |.endmacro
3138 |
3139 | vm_round vm_floor, 0, 0x0400, 0xf7ff
3140 | vm_round vm_ceil, 1, 0x0800, 0xfbff
3141 | vm_round vm_trunc, 2, 0x0c00, 0xffff
3142 | 3024 |
3143 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 3025 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
3144 |->vm_mod: 3026 |->vm_mod:
3145 |.if SSE
3146 |// Args in xmm0/xmm1, return value in xmm0. 3027 |// Args in xmm0/xmm1, return value in xmm0.
3147 |// Caveat: xmm0-xmm5 and RC (eax) modified! 3028 |// Caveat: xmm0-xmm5 and RC (eax) modified!
3148 | movaps xmm5, xmm0 3029 | movaps xmm5, xmm0
@@ -3170,172 +3051,6 @@ static void build_subroutines(BuildCtx *ctx)
3170 | movaps xmm0, xmm5 3051 | movaps xmm0, xmm5
3171 | subsd xmm0, xmm1 3052 | subsd xmm0, xmm1
3172 | ret 3053 | ret
3173 |.else
3174 |// Args/ret on x87 stack (y on top). No xmm registers modified.
3175 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
3176 | fld st1
3177 | fdiv st1
3178 | fnstcw word [esp+4]
3179 | mov ax, 0x0400
3180 | or ax, [esp+4]
3181 | and ax, 0xf7ff
3182 | mov [esp+6], ax
3183 | fldcw word [esp+6]
3184 | frndint
3185 | fldcw word [esp+4]
3186 | fmulp st1
3187 | fsubp st1
3188 | ret
3189 |.endif
3190 |
3191 |// FP log2(x). Called by math.log(x, base).
3192 |->vm_log2:
3193 |.if X64WIN
3194 | movsd qword [rsp+8], xmm0 // Use scratch area.
3195 | fld1
3196 | fld qword [rsp+8]
3197 | fyl2x
3198 | fstp qword [rsp+8]
3199 | movsd xmm0, qword [rsp+8]
3200 |.elif X64
3201 | movsd qword [rsp-8], xmm0 // Use red zone.
3202 | fld1
3203 | fld qword [rsp-8]
3204 | fyl2x
3205 | fstp qword [rsp-8]
3206 | movsd xmm0, qword [rsp-8]
3207 |.else
3208 | fld1
3209 | fld qword [esp+4]
3210 | fyl2x
3211 |.endif
3212 | ret
3213 |
3214 |// FP exponentiation e^x and 2^x. Called by math.exp fast function and
3215 |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
3216 |// Caveat: needs 3 slots on x87 stack!
3217 |->vm_exp_x87:
3218 | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e))
3219 |->vm_exp2_x87:
3220 | .if X64WIN
3221 | .define expscratch, dword [rsp+8] // Use scratch area.
3222 | .elif X64
3223 | .define expscratch, dword [rsp-8] // Use red zone.
3224 | .else
3225 | .define expscratch, dword [esp+4] // Needs 4 byte scratch area.
3226 | .endif
3227 | fst expscratch // Caveat: overwrites ARG1.
3228 | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf
3229 | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0
3230 |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
3231 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
3232 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
3233 |1:
3234 | ret
3235 |2:
3236 | fpop; fldz; ret
3237 |
3238 |// Generic power function x^y. Called by BC_POW, math.pow fast function,
3239 |// and vm_arith.
3240 |// Args/ret on x87 stack (y on top). RC (eax) modified.
3241 |// Caveat: needs 3 slots on x87 stack!
3242 |->vm_pow:
3243 |.if not SSE
3244 | fist dword [esp+4] // Store/reload int before comparison.
3245 | fild dword [esp+4] // Integral exponent used in vm_powi.
3246 | fucomip st1
3247 | jnz >8 // Branch for FP exponents.
3248 | jp >9 // Branch for NaN exponent.
3249 | fpop // Pop y and fallthrough to vm_powi.
3250 |
3251 |// FP/int power function x^i. Arg1/ret on x87 stack.
3252 |// Arg2 (int) on C stack. RC (eax) modified.
3253 |// Caveat: needs 2 slots on x87 stack!
3254 | mov eax, [esp+4]
3255 | cmp eax, 1; jle >6 // i<=1?
3256 | // Now 1 < (unsigned)i <= 0x80000000.
3257 |1: // Handle leading zeros.
3258 | test eax, 1; jnz >2
3259 | fmul st0
3260 | shr eax, 1
3261 | jmp <1
3262 |2:
3263 | shr eax, 1; jz >5
3264 | fdup
3265 |3: // Handle trailing bits.
3266 | fmul st0
3267 | shr eax, 1; jz >4
3268 | jnc <3
3269 | fmul st1, st0
3270 | jmp <3
3271 |4:
3272 | fmulp st1
3273 |5:
3274 | ret
3275 |6:
3276 | je <5 // x^1 ==> x
3277 | jb >7
3278 | fld1; fdivrp st1
3279 | neg eax
3280 | cmp eax, 1; je <5 // x^-1 ==> 1/x
3281 | jmp <1 // x^-i ==> (1/x)^i
3282 |7:
3283 | fpop; fld1 // x^0 ==> 1
3284 | ret
3285 |
3286 |8: // FP/FP power function x^y.
3287 | fst dword [esp+4]
3288 | fxch
3289 | fst dword [esp+8]
3290 | mov eax, [esp+4]; shl eax, 1
3291 | cmp eax, 0xff000000; je >2 // x^+-Inf?
3292 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3293 | cmp eax, 0xff000000; je >4 // +-Inf^y?
3294 | fyl2x
3295 | jmp ->vm_exp2raw
3296 |
3297 |9: // Handle x^NaN.
3298 | fld1
3299 | fucomip st2
3300 | je >1 // 1^NaN ==> 1
3301 | fxch // x^NaN ==> NaN
3302 |1:
3303 | fpop
3304 | ret
3305 |
3306 |2: // Handle x^+-Inf.
3307 | fabs
3308 | fld1
3309 | fucomip st1
3310 | je >3 // +-1^+-Inf ==> 1
3311 | fpop; fabs; fldz; mov eax, 0; setc al
3312 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
3313 | fxch
3314 |3:
3315 | fpop1; fabs
3316 | ret
3317 |
3318 |4: // Handle +-0^y or +-Inf^y.
3319 | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
3320 | fpop; fpop
3321 | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
3322 | fldz // y < 0, +-Inf^y ==> 0
3323 | ret
3324 |5:
3325 | mov dword [esp+4], 0x7f800000 // Return +Inf.
3326 | fld dword [esp+4]
3327 | ret
3328 |.endif
3329 |
3330 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
3331 |// Needs 16 byte scratch area for x86. Also called from JIT code.
3332 |->vm_pow_sse:
3333 | cvtsd2si eax, xmm1
3334 | cvtsi2sd xmm2, eax
3335 | ucomisd xmm1, xmm2
3336 | jnz >8 // Branch for FP exponents.
3337 | jp >9 // Branch for NaN exponent.
3338 | // Fallthrough to vm_powi_sse.
3339 | 3054 |
3340 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. 3055 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
3341 |->vm_powi_sse: 3056 |->vm_powi_sse:
@@ -3372,287 +3087,6 @@ static void build_subroutines(BuildCtx *ctx)
3372 | sseconst_1 xmm0, RDa 3087 | sseconst_1 xmm0, RDa
3373 | ret 3088 | ret
3374 | 3089 |
3375 |8: // FP/FP power function x^y.
3376 |.if X64
3377 | movd rax, xmm1; shl rax, 1
3378 | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf?
3379 | movd rax, xmm0; shl rax, 1; je >4 // +-0^y?
3380 | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y?
3381 | .if X64WIN
3382 | movsd qword [rsp+16], xmm1 // Use scratch area.
3383 | movsd qword [rsp+8], xmm0
3384 | fld qword [rsp+16]
3385 | fld qword [rsp+8]
3386 | .else
3387 | movsd qword [rsp-16], xmm1 // Use red zone.
3388 | movsd qword [rsp-8], xmm0
3389 | fld qword [rsp-16]
3390 | fld qword [rsp-8]
3391 | .endif
3392 |.else
3393 | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area.
3394 | movsd qword [esp+4], xmm0
3395 | cmp dword [esp+12], 0; jne >1
3396 | mov eax, [esp+16]; shl eax, 1
3397 | cmp eax, 0xffe00000; je >2 // x^+-Inf?
3398 |1:
3399 | cmp dword [esp+4], 0; jne >1
3400 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3401 | cmp eax, 0xffe00000; je >5 // +-Inf^y?
3402 |1:
3403 | fld qword [esp+12]
3404 | fld qword [esp+4]
3405 |.endif
3406 | fyl2x // y*log2(x)
3407 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
3408 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
3409 |.if X64WIN
3410 | fstp qword [rsp+8] // Use scratch area.
3411 | movsd xmm0, qword [rsp+8]
3412 |.elif X64
3413 | fstp qword [rsp-8] // Use red zone.
3414 | movsd xmm0, qword [rsp-8]
3415 |.else
3416 | fstp qword [esp+4] // Needs 8 byte scratch area.
3417 | movsd xmm0, qword [esp+4]
3418 |.endif
3419 | ret
3420 |
3421 |9: // Handle x^NaN.
3422 | sseconst_1 xmm2, RDa
3423 | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1
3424 | movaps xmm0, xmm1 // x^NaN ==> NaN
3425 |1:
3426 | ret
3427 |
3428 |2: // Handle x^+-Inf.
3429 | sseconst_abs xmm2, RDa
3430 | andpd xmm0, xmm2 // |x|
3431 | sseconst_1 xmm2, RDa
3432 | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1
3433 | movmskpd eax, xmm1
3434 | xorps xmm0, xmm0
3435 | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0
3436 |3:
3437 | sseconst_hi xmm0, RDa, 7ff00000 // +Inf
3438 | ret
3439 |
3440 |4: // Handle +-0^y.
3441 | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf
3442 | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0
3443 | ret
3444 |
3445 |5: // Handle +-Inf^y.
3446 | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf
3447 | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0
3448 | ret
3449 |
3450 |// Callable from C: double lj_vm_foldfpm(double x, int fpm)
3451 |// Computes fpm(x) for extended math functions. ORDER FPM.
3452 |->vm_foldfpm:
3453 |.if JIT
3454 |.if X64
3455 | .if X64WIN
3456 | .define fpmop, CARG2d
3457 | .else
3458 | .define fpmop, CARG1d
3459 | .endif
3460 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3461 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3462 | sqrtsd xmm0, xmm0; ret
3463 |2:
3464 | .if X64WIN
3465 | movsd qword [rsp+8], xmm0 // Use scratch area.
3466 | fld qword [rsp+8]
3467 | .else
3468 | movsd qword [rsp-8], xmm0 // Use red zone.
3469 | fld qword [rsp-8]
3470 | .endif
3471 | cmp fpmop, 5; ja >2
3472 | .if X64WIN; pop rax; .endif
3473 | je >1
3474 | call ->vm_exp_x87
3475 | .if X64WIN; push rax; .endif
3476 | jmp >7
3477 |1:
3478 | call ->vm_exp2_x87
3479 | .if X64WIN; push rax; .endif
3480 | jmp >7
3481 |2: ; cmp fpmop, 7; je >1; ja >2
3482 | fldln2; fxch; fyl2x; jmp >7
3483 |1: ; fld1; fxch; fyl2x; jmp >7
3484 |2: ; cmp fpmop, 9; je >1; ja >2
3485 | fldlg2; fxch; fyl2x; jmp >7
3486 |1: ; fsin; jmp >7
3487 |2: ; cmp fpmop, 11; je >1; ja >9
3488 | fcos; jmp >7
3489 |1: ; fptan; fpop
3490 |7:
3491 | .if X64WIN
3492 | fstp qword [rsp+8] // Use scratch area.
3493 | movsd xmm0, qword [rsp+8]
3494 | .else
3495 | fstp qword [rsp-8] // Use red zone.
3496 | movsd xmm0, qword [rsp-8]
3497 | .endif
3498 | ret
3499 |.else // x86 calling convention.
3500 | .define fpmop, eax
3501 |.if SSE
3502 | mov fpmop, [esp+12]
3503 | movsd xmm0, qword [esp+4]
3504 | cmp fpmop, 1; je >1; ja >2
3505 | call ->vm_floor; jmp >7
3506 |1: ; call ->vm_ceil; jmp >7
3507 |2: ; cmp fpmop, 3; je >1; ja >2
3508 | call ->vm_trunc; jmp >7
3509 |1:
3510 | sqrtsd xmm0, xmm0
3511 |7:
3512 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3513 | fld qword [esp+4]
3514 | ret
3515 |2: ; fld qword [esp+4]
3516 | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3517 |2: ; cmp fpmop, 7; je >1; ja >2
3518 | fldln2; fxch; fyl2x; ret
3519 |1: ; fld1; fxch; fyl2x; ret
3520 |2: ; cmp fpmop, 9; je >1; ja >2
3521 | fldlg2; fxch; fyl2x; ret
3522 |1: ; fsin; ret
3523 |2: ; cmp fpmop, 11; je >1; ja >9
3524 | fcos; ret
3525 |1: ; fptan; fpop; ret
3526 |.else
3527 | mov fpmop, [esp+12]
3528 | fld qword [esp+4]
3529 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3530 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3531 | fsqrt; ret
3532 |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3533 | cmp fpmop, 7; je >1; ja >2
3534 | fldln2; fxch; fyl2x; ret
3535 |1: ; fld1; fxch; fyl2x; ret
3536 |2: ; cmp fpmop, 9; je >1; ja >2
3537 | fldlg2; fxch; fyl2x; ret
3538 |1: ; fsin; ret
3539 |2: ; cmp fpmop, 11; je >1; ja >9
3540 | fcos; ret
3541 |1: ; fptan; fpop; ret
3542 |.endif
3543 |.endif
3544 |9: ; int3 // Bad fpm.
3545 |.endif
3546 |
3547 |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
3548 |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
3549 |// and basic math functions. ORDER ARITH
3550 |->vm_foldarith:
3551 |.if X64
3552 |
3553 | .if X64WIN
3554 | .define foldop, CARG3d
3555 | .else
3556 | .define foldop, CARG1d
3557 | .endif
3558 | cmp foldop, 1; je >1; ja >2
3559 | addsd xmm0, xmm1; ret
3560 |1: ; subsd xmm0, xmm1; ret
3561 |2: ; cmp foldop, 3; je >1; ja >2
3562 | mulsd xmm0, xmm1; ret
3563 |1: ; divsd xmm0, xmm1; ret
3564 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow
3565 | cmp foldop, 7; je >1; ja >2
3566 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
3567 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
3568 |2: ; cmp foldop, 9; ja >2
3569 |.if X64WIN
3570 | movsd qword [rsp+8], xmm0 // Use scratch area.
3571 | movsd qword [rsp+16], xmm1
3572 | fld qword [rsp+8]
3573 | fld qword [rsp+16]
3574 |.else
3575 | movsd qword [rsp-8], xmm0 // Use red zone.
3576 | movsd qword [rsp-16], xmm1
3577 | fld qword [rsp-8]
3578 | fld qword [rsp-16]
3579 |.endif
3580 | je >1
3581 | fpatan
3582 |7:
3583 |.if X64WIN
3584 | fstp qword [rsp+8] // Use scratch area.
3585 | movsd xmm0, qword [rsp+8]
3586 |.else
3587 | fstp qword [rsp-8] // Use red zone.
3588 | movsd xmm0, qword [rsp-8]
3589 |.endif
3590 | ret
3591 |1: ; fxch; fscale; fpop1; jmp <7
3592 |2: ; cmp foldop, 11; je >1; ja >9
3593 | minsd xmm0, xmm1; ret
3594 |1: ; maxsd xmm0, xmm1; ret
3595 |9: ; int3 // Bad op.
3596 |
3597 |.elif SSE // x86 calling convention with SSE ops.
3598 |
3599 | .define foldop, eax
3600 | mov foldop, [esp+20]
3601 | movsd xmm0, qword [esp+4]
3602 | movsd xmm1, qword [esp+12]
3603 | cmp foldop, 1; je >1; ja >2
3604 | addsd xmm0, xmm1
3605 |7:
3606 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3607 | fld qword [esp+4]
3608 | ret
3609 |1: ; subsd xmm0, xmm1; jmp <7
3610 |2: ; cmp foldop, 3; je >1; ja >2
3611 | mulsd xmm0, xmm1; jmp <7
3612 |1: ; divsd xmm0, xmm1; jmp <7
3613 |2: ; cmp foldop, 5
3614 | je >1; ja >2
3615 | call ->vm_mod; jmp <7
3616 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area.
3617 |2: ; cmp foldop, 7; je >1; ja >2
3618 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
3619 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
3620 |2: ; cmp foldop, 9; ja >2
3621 | fld qword [esp+4] // Reload from stack
3622 | fld qword [esp+12]
3623 | je >1
3624 | fpatan; ret
3625 |1: ; fxch; fscale; fpop1; ret
3626 |2: ; cmp foldop, 11; je >1; ja >9
3627 | minsd xmm0, xmm1; jmp <7
3628 |1: ; maxsd xmm0, xmm1; jmp <7
3629 |9: ; int3 // Bad op.
3630 |
3631 |.else // x86 calling convention with x87 ops.
3632 |
3633 | mov eax, [esp+20]
3634 | fld qword [esp+4]
3635 | fld qword [esp+12]
3636 | cmp eax, 1; je >1; ja >2
3637 | faddp st1; ret
3638 |1: ; fsubp st1; ret
3639 |2: ; cmp eax, 3; je >1; ja >2
3640 | fmulp st1; ret
3641 |1: ; fdivp st1; ret
3642 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
3643 | cmp eax, 7; je >1; ja >2
3644 | fpop; fchs; ret
3645 |1: ; fpop; fabs; ret
3646 |2: ; cmp eax, 9; je >1; ja >2
3647 | fpatan; ret
3648 |1: ; fxch; fscale; fpop1; ret
3649 |2: ; cmp eax, 11; je >1; ja >9
3650 | fucomi st1; fcmovnbe st1; fpop1; ret
3651 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
3652 |9: ; int3 // Bad op.
3653 |
3654 |.endif
3655 |
3656 |//----------------------------------------------------------------------- 3090 |//-----------------------------------------------------------------------
3657 |//-- Miscellaneous functions -------------------------------------------- 3091 |//-- Miscellaneous functions --------------------------------------------
3658 |//----------------------------------------------------------------------- 3092 |//-----------------------------------------------------------------------
@@ -3663,6 +3097,7 @@ static void build_subroutines(BuildCtx *ctx)
3663 | mov eax, CARG1d 3097 | mov eax, CARG1d
3664 | .if X64WIN; push rsi; mov rsi, CARG2; .endif 3098 | .if X64WIN; push rsi; mov rsi, CARG2; .endif
3665 | push rbx 3099 | push rbx
3100 | xor ecx, ecx
3666 | cpuid 3101 | cpuid
3667 | mov [rsi], eax 3102 | mov [rsi], eax
3668 | mov [rsi+4], ebx 3103 | mov [rsi+4], ebx
@@ -3686,6 +3121,7 @@ static void build_subroutines(BuildCtx *ctx)
3686 | mov eax, [esp+4] // Argument 1 is function number. 3121 | mov eax, [esp+4] // Argument 1 is function number.
3687 | push edi 3122 | push edi
3688 | push ebx 3123 | push ebx
3124 | xor ecx, ecx
3689 | cpuid 3125 | cpuid
3690 | mov edi, [esp+16] // Argument 2 is result area. 3126 | mov edi, [esp+16] // Argument 2 is result area.
3691 | mov [edi], eax 3127 | mov [edi], eax
@@ -3963,19 +3399,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3963 | // RA is a number. 3399 | // RA is a number.
3964 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp 3400 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3965 | // RA is a number, RD is an integer. 3401 | // RA is a number, RD is an integer.
3966 |.if SSE
3967 | cvtsi2sd xmm0, dword [BASE+RD*8] 3402 | cvtsi2sd xmm0, dword [BASE+RD*8]
3968 | jmp >2 3403 | jmp >2
3969 |.else
3970 | fld qword [BASE+RA*8]
3971 | fild dword [BASE+RD*8]
3972 | jmp >3
3973 |.endif
3974 | 3404 |
3975 |8: // RA is an integer, RD is not an integer. 3405 |8: // RA is an integer, RD is not an integer.
3976 | ja ->vmeta_comp 3406 | ja ->vmeta_comp
3977 | // RA is an integer, RD is a number. 3407 | // RA is an integer, RD is a number.
3978 |.if SSE
3979 | cvtsi2sd xmm1, dword [BASE+RA*8] 3408 | cvtsi2sd xmm1, dword [BASE+RA*8]
3980 | movsd xmm0, qword [BASE+RD*8] 3409 | movsd xmm0, qword [BASE+RD*8]
3981 | add PC, 4 3410 | add PC, 4
@@ -3983,29 +3412,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3983 | jmp_comp jbe, ja, jb, jae, <9 3412 | jmp_comp jbe, ja, jb, jae, <9
3984 | jmp <6 3413 | jmp <6
3985 |.else 3414 |.else
3986 | fild dword [BASE+RA*8]
3987 | jmp >2
3988 |.endif
3989 |.else
3990 | checknum RA, ->vmeta_comp 3415 | checknum RA, ->vmeta_comp
3991 | checknum RD, ->vmeta_comp 3416 | checknum RD, ->vmeta_comp
3992 |.endif 3417 |.endif
3993 |.if SSE
3994 |1: 3418 |1:
3995 | movsd xmm0, qword [BASE+RD*8] 3419 | movsd xmm0, qword [BASE+RD*8]
3996 |2: 3420 |2:
3997 | add PC, 4 3421 | add PC, 4
3998 | ucomisd xmm0, qword [BASE+RA*8] 3422 | ucomisd xmm0, qword [BASE+RA*8]
3999 |3: 3423 |3:
4000 |.else
4001 |1:
4002 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
4003 |2:
4004 | fld qword [BASE+RD*8]
4005 |3:
4006 | add PC, 4
4007 | fcomparepp
4008 |.endif
4009 | // Unordered: all of ZF CF PF set, ordered: PF clear. 3424 | // Unordered: all of ZF CF PF set, ordered: PF clear.
4010 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 3425 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
4011 |.if DUALNUM 3426 |.if DUALNUM
@@ -4045,43 +3460,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4045 | // RD is a number. 3460 | // RD is a number.
4046 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 3461 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
4047 | // RD is a number, RA is an integer. 3462 | // RD is a number, RA is an integer.
4048 |.if SSE
4049 | cvtsi2sd xmm0, dword [BASE+RA*8] 3463 | cvtsi2sd xmm0, dword [BASE+RA*8]
4050 |.else
4051 | fild dword [BASE+RA*8]
4052 |.endif
4053 | jmp >2 3464 | jmp >2
4054 | 3465 |
4055 |8: // RD is an integer, RA is not an integer. 3466 |8: // RD is an integer, RA is not an integer.
4056 | ja >5 3467 | ja >5
4057 | // RD is an integer, RA is a number. 3468 | // RD is an integer, RA is a number.
4058 |.if SSE
4059 | cvtsi2sd xmm0, dword [BASE+RD*8] 3469 | cvtsi2sd xmm0, dword [BASE+RD*8]
4060 | ucomisd xmm0, qword [BASE+RA*8] 3470 | ucomisd xmm0, qword [BASE+RA*8]
4061 |.else
4062 | fild dword [BASE+RD*8]
4063 | fld qword [BASE+RA*8]
4064 |.endif
4065 | jmp >4 3471 | jmp >4
4066 | 3472 |
4067 |.else 3473 |.else
4068 | cmp RB, LJ_TISNUM; jae >5 3474 | cmp RB, LJ_TISNUM; jae >5
4069 | checknum RA, >5 3475 | checknum RA, >5
4070 |.endif 3476 |.endif
4071 |.if SSE
4072 |1: 3477 |1:
4073 | movsd xmm0, qword [BASE+RA*8] 3478 | movsd xmm0, qword [BASE+RA*8]
4074 |2: 3479 |2:
4075 | ucomisd xmm0, qword [BASE+RD*8] 3480 | ucomisd xmm0, qword [BASE+RD*8]
4076 |4: 3481 |4:
4077 |.else
4078 |1:
4079 | fld qword [BASE+RA*8]
4080 |2:
4081 | fld qword [BASE+RD*8]
4082 |4:
4083 | fcomparepp
4084 |.endif
4085 iseqne_fp: 3482 iseqne_fp:
4086 if (vk) { 3483 if (vk) {
4087 | jp >2 // Unordered means not equal. 3484 | jp >2 // Unordered means not equal.
@@ -4204,39 +3601,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4204 | // RA is a number. 3601 | // RA is a number.
4205 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 3602 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
4206 | // RA is a number, RD is an integer. 3603 | // RA is a number, RD is an integer.
4207 |.if SSE
4208 | cvtsi2sd xmm0, dword [KBASE+RD*8] 3604 | cvtsi2sd xmm0, dword [KBASE+RD*8]
4209 |.else
4210 | fild dword [KBASE+RD*8]
4211 |.endif
4212 | jmp >2 3605 | jmp >2
4213 | 3606 |
4214 |8: // RA is an integer, RD is a number. 3607 |8: // RA is an integer, RD is a number.
4215 |.if SSE
4216 | cvtsi2sd xmm0, dword [BASE+RA*8] 3608 | cvtsi2sd xmm0, dword [BASE+RA*8]
4217 | ucomisd xmm0, qword [KBASE+RD*8] 3609 | ucomisd xmm0, qword [KBASE+RD*8]
4218 |.else
4219 | fild dword [BASE+RA*8]
4220 | fld qword [KBASE+RD*8]
4221 |.endif
4222 | jmp >4 3610 | jmp >4
4223 |.else 3611 |.else
4224 | cmp RB, LJ_TISNUM; jae >3 3612 | cmp RB, LJ_TISNUM; jae >3
4225 |.endif 3613 |.endif
4226 |.if SSE
4227 |1: 3614 |1:
4228 | movsd xmm0, qword [KBASE+RD*8] 3615 | movsd xmm0, qword [KBASE+RD*8]
4229 |2: 3616 |2:
4230 | ucomisd xmm0, qword [BASE+RA*8] 3617 | ucomisd xmm0, qword [BASE+RA*8]
4231 |4: 3618 |4:
4232 |.else
4233 |1:
4234 | fld qword [KBASE+RD*8]
4235 |2:
4236 | fld qword [BASE+RA*8]
4237 |4:
4238 | fcomparepp
4239 |.endif
4240 goto iseqne_fp; 3619 goto iseqne_fp;
4241 case BC_ISEQP: case BC_ISNEP: 3620 case BC_ISEQP: case BC_ISNEP:
4242 vk = op == BC_ISEQP; 3621 vk = op == BC_ISEQP;
@@ -4287,6 +3666,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4287 | ins_next 3666 | ins_next
4288 break; 3667 break;
4289 3668
3669 case BC_ISTYPE:
3670 | ins_AD // RA = src, RD = -type
3671 | add RD, [BASE+RA*8+4]
3672 | jne ->vmeta_istype
3673 | ins_next
3674 break;
3675 case BC_ISNUM:
3676 | ins_AD // RA = src, RD = -(TISNUM-1)
3677 | checknum RA, ->vmeta_istype
3678 | ins_next
3679 break;
3680
4290 /* -- Unary ops --------------------------------------------------------- */ 3681 /* -- Unary ops --------------------------------------------------------- */
4291 3682
4292 case BC_MOV: 3683 case BC_MOV:
@@ -4330,16 +3721,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4330 |.else 3721 |.else
4331 | checknum RD, ->vmeta_unm 3722 | checknum RD, ->vmeta_unm
4332 |.endif 3723 |.endif
4333 |.if SSE
4334 | movsd xmm0, qword [BASE+RD*8] 3724 | movsd xmm0, qword [BASE+RD*8]
4335 | sseconst_sign xmm1, RDa 3725 | sseconst_sign xmm1, RDa
4336 | xorps xmm0, xmm1 3726 | xorps xmm0, xmm1
4337 | movsd qword [BASE+RA*8], xmm0 3727 | movsd qword [BASE+RA*8], xmm0
4338 |.else
4339 | fld qword [BASE+RD*8]
4340 | fchs
4341 | fstp qword [BASE+RA*8]
4342 |.endif
4343 |.if DUALNUM 3728 |.if DUALNUM
4344 | jmp <9 3729 | jmp <9
4345 |.else 3730 |.else
@@ -4355,15 +3740,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4355 |1: 3740 |1:
4356 | mov dword [BASE+RA*8+4], LJ_TISNUM 3741 | mov dword [BASE+RA*8+4], LJ_TISNUM
4357 | mov dword [BASE+RA*8], RD 3742 | mov dword [BASE+RA*8], RD
4358 |.elif SSE 3743 |.else
4359 | xorps xmm0, xmm0 3744 | xorps xmm0, xmm0
4360 | cvtsi2sd xmm0, dword STR:RD->len 3745 | cvtsi2sd xmm0, dword STR:RD->len
4361 |1: 3746 |1:
4362 | movsd qword [BASE+RA*8], xmm0 3747 | movsd qword [BASE+RA*8], xmm0
4363 |.else
4364 | fild dword STR:RD->len
4365 |1:
4366 | fstp qword [BASE+RA*8]
4367 |.endif 3748 |.endif
4368 | ins_next 3749 | ins_next
4369 |2: 3750 |2:
@@ -4381,11 +3762,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4381 | // Length of table returned in eax (RD). 3762 | // Length of table returned in eax (RD).
4382 |.if DUALNUM 3763 |.if DUALNUM
4383 | // Nothing to do. 3764 | // Nothing to do.
4384 |.elif SSE
4385 | cvtsi2sd xmm0, RD
4386 |.else 3765 |.else
4387 | mov ARG1, RD 3766 | cvtsi2sd xmm0, RD
4388 | fild ARG1
4389 |.endif 3767 |.endif
4390 | mov BASE, RB // Restore BASE. 3768 | mov BASE, RB // Restore BASE.
4391 | movzx RA, PC_RA 3769 | movzx RA, PC_RA
@@ -4400,7 +3778,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4400 3778
4401 /* -- Binary ops -------------------------------------------------------- */ 3779 /* -- Binary ops -------------------------------------------------------- */
4402 3780
4403 |.macro ins_arithpre, x87ins, sseins, ssereg 3781 |.macro ins_arithpre, sseins, ssereg
4404 | ins_ABC 3782 | ins_ABC
4405 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3783 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
4406 ||switch (vk) { 3784 ||switch (vk) {
@@ -4409,37 +3787,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4409 | .if DUALNUM 3787 | .if DUALNUM
4410 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn 3788 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
4411 | .endif 3789 | .endif
4412 | .if SSE 3790 | movsd xmm0, qword [BASE+RB*8]
4413 | movsd xmm0, qword [BASE+RB*8] 3791 | sseins ssereg, qword [KBASE+RC*8]
4414 | sseins ssereg, qword [KBASE+RC*8]
4415 | .else
4416 | fld qword [BASE+RB*8]
4417 | x87ins qword [KBASE+RC*8]
4418 | .endif
4419 || break; 3792 || break;
4420 ||case 1: 3793 ||case 1:
4421 | checknum RB, ->vmeta_arith_nv 3794 | checknum RB, ->vmeta_arith_nv
4422 | .if DUALNUM 3795 | .if DUALNUM
4423 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv 3796 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
4424 | .endif 3797 | .endif
4425 | .if SSE 3798 | movsd xmm0, qword [KBASE+RC*8]
4426 | movsd xmm0, qword [KBASE+RC*8] 3799 | sseins ssereg, qword [BASE+RB*8]
4427 | sseins ssereg, qword [BASE+RB*8]
4428 | .else
4429 | fld qword [KBASE+RC*8]
4430 | x87ins qword [BASE+RB*8]
4431 | .endif
4432 || break; 3800 || break;
4433 ||default: 3801 ||default:
4434 | checknum RB, ->vmeta_arith_vv 3802 | checknum RB, ->vmeta_arith_vv
4435 | checknum RC, ->vmeta_arith_vv 3803 | checknum RC, ->vmeta_arith_vv
4436 | .if SSE 3804 | movsd xmm0, qword [BASE+RB*8]
4437 | movsd xmm0, qword [BASE+RB*8] 3805 | sseins ssereg, qword [BASE+RC*8]
4438 | sseins ssereg, qword [BASE+RC*8]
4439 | .else
4440 | fld qword [BASE+RB*8]
4441 | x87ins qword [BASE+RC*8]
4442 | .endif
4443 || break; 3806 || break;
4444 ||} 3807 ||}
4445 |.endmacro 3808 |.endmacro
@@ -4477,55 +3840,62 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4477 |.endmacro 3840 |.endmacro
4478 | 3841 |
4479 |.macro ins_arithpost 3842 |.macro ins_arithpost
4480 |.if SSE
4481 | movsd qword [BASE+RA*8], xmm0 3843 | movsd qword [BASE+RA*8], xmm0
4482 |.else
4483 | fstp qword [BASE+RA*8]
4484 |.endif
4485 |.endmacro 3844 |.endmacro
4486 | 3845 |
4487 |.macro ins_arith, x87ins, sseins 3846 |.macro ins_arith, sseins
4488 | ins_arithpre x87ins, sseins, xmm0 3847 | ins_arithpre sseins, xmm0
4489 | ins_arithpost 3848 | ins_arithpost
4490 | ins_next 3849 | ins_next
4491 |.endmacro 3850 |.endmacro
4492 | 3851 |
4493 |.macro ins_arith, intins, x87ins, sseins 3852 |.macro ins_arith, intins, sseins
4494 |.if DUALNUM 3853 |.if DUALNUM
4495 | ins_arithdn intins 3854 | ins_arithdn intins
4496 |.else 3855 |.else
4497 | ins_arith, x87ins, sseins 3856 | ins_arith, sseins
4498 |.endif 3857 |.endif
4499 |.endmacro 3858 |.endmacro
4500 3859
4501 | // RA = dst, RB = src1 or num const, RC = src2 or num const 3860 | // RA = dst, RB = src1 or num const, RC = src2 or num const
4502 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3861 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
4503 | ins_arith add, fadd, addsd 3862 | ins_arith add, addsd
4504 break; 3863 break;
4505 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3864 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
4506 | ins_arith sub, fsub, subsd 3865 | ins_arith sub, subsd
4507 break; 3866 break;
4508 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3867 case BC_MULVN: case BC_MULNV: case BC_MULVV:
4509 | ins_arith imul, fmul, mulsd 3868 | ins_arith imul, mulsd
4510 break; 3869 break;
4511 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3870 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
4512 | ins_arith fdiv, divsd 3871 | ins_arith divsd
4513 break; 3872 break;
4514 case BC_MODVN: 3873 case BC_MODVN:
4515 | ins_arithpre fld, movsd, xmm1 3874 | ins_arithpre movsd, xmm1
4516 |->BC_MODVN_Z: 3875 |->BC_MODVN_Z:
4517 | call ->vm_mod 3876 | call ->vm_mod
4518 | ins_arithpost 3877 | ins_arithpost
4519 | ins_next 3878 | ins_next
4520 break; 3879 break;
4521 case BC_MODNV: case BC_MODVV: 3880 case BC_MODNV: case BC_MODVV:
4522 | ins_arithpre fld, movsd, xmm1 3881 | ins_arithpre movsd, xmm1
4523 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3882 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
4524 break; 3883 break;
4525 case BC_POW: 3884 case BC_POW:
4526 | ins_arithpre fld, movsd, xmm1 3885 | ins_arithpre movsd, xmm1
4527 | call ->vm_pow 3886 | mov RB, BASE
3887 |.if not X64
3888 | movsd FPARG1, xmm0
3889 | movsd FPARG3, xmm1
3890 |.endif
3891 | call extern pow
3892 | movzx RA, PC_RA
3893 | mov BASE, RB
3894 |.if X64
4528 | ins_arithpost 3895 | ins_arithpost
3896 |.else
3897 | fstp qword [BASE+RA*8]
3898 |.endif
4529 | ins_next 3899 | ins_next
4530 break; 3900 break;
4531 3901
@@ -4593,25 +3963,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4593 | movsx RD, RDW 3963 | movsx RD, RDW
4594 | mov dword [BASE+RA*8+4], LJ_TISNUM 3964 | mov dword [BASE+RA*8+4], LJ_TISNUM
4595 | mov dword [BASE+RA*8], RD 3965 | mov dword [BASE+RA*8], RD
4596 |.elif SSE 3966 |.else
4597 | movsx RD, RDW // Sign-extend literal. 3967 | movsx RD, RDW // Sign-extend literal.
4598 | cvtsi2sd xmm0, RD 3968 | cvtsi2sd xmm0, RD
4599 | movsd qword [BASE+RA*8], xmm0 3969 | movsd qword [BASE+RA*8], xmm0
4600 |.else
4601 | fild PC_RD // Refetch signed RD from instruction.
4602 | fstp qword [BASE+RA*8]
4603 |.endif 3970 |.endif
4604 | ins_next 3971 | ins_next
4605 break; 3972 break;
4606 case BC_KNUM: 3973 case BC_KNUM:
4607 | ins_AD // RA = dst, RD = num const 3974 | ins_AD // RA = dst, RD = num const
4608 |.if SSE
4609 | movsd xmm0, qword [KBASE+RD*8] 3975 | movsd xmm0, qword [KBASE+RD*8]
4610 | movsd qword [BASE+RA*8], xmm0 3976 | movsd qword [BASE+RA*8], xmm0
4611 |.else
4612 | fld qword [KBASE+RD*8]
4613 | fstp qword [BASE+RA*8]
4614 |.endif
4615 | ins_next 3977 | ins_next
4616 break; 3978 break;
4617 case BC_KPRI: 3979 case BC_KPRI:
@@ -4718,18 +4080,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4718 case BC_USETN: 4080 case BC_USETN:
4719 | ins_AD // RA = upvalue #, RD = num const 4081 | ins_AD // RA = upvalue #, RD = num const
4720 | mov LFUNC:RB, [BASE-8] 4082 | mov LFUNC:RB, [BASE-8]
4721 |.if SSE
4722 | movsd xmm0, qword [KBASE+RD*8] 4083 | movsd xmm0, qword [KBASE+RD*8]
4723 |.else
4724 | fld qword [KBASE+RD*8]
4725 |.endif
4726 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4084 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4727 | mov RA, UPVAL:RB->v 4085 | mov RA, UPVAL:RB->v
4728 |.if SSE
4729 | movsd qword [RA], xmm0 4086 | movsd qword [RA], xmm0
4730 |.else
4731 | fstp qword [RA]
4732 |.endif
4733 | ins_next 4087 | ins_next
4734 break; 4088 break;
4735 case BC_USETP: 4089 case BC_USETP:
@@ -4883,18 +4237,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4883 |.else 4237 |.else
4884 | // Convert number to int and back and compare. 4238 | // Convert number to int and back and compare.
4885 | checknum RC, >5 4239 | checknum RC, >5
4886 |.if SSE
4887 | movsd xmm0, qword [BASE+RC*8] 4240 | movsd xmm0, qword [BASE+RC*8]
4888 | cvtsd2si RC, xmm0 4241 | cvttsd2si RC, xmm0
4889 | cvtsi2sd xmm1, RC 4242 | cvtsi2sd xmm1, RC
4890 | ucomisd xmm0, xmm1 4243 | ucomisd xmm0, xmm1
4891 |.else
4892 | fld qword [BASE+RC*8]
4893 | fist ARG1
4894 | fild ARG1
4895 | fcomparepp
4896 | mov RC, ARG1
4897 |.endif
4898 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. 4244 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4899 |.endif 4245 |.endif
4900 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4246 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -5018,6 +4364,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5018 | mov dword [BASE+RA*8+4], LJ_TNIL 4364 | mov dword [BASE+RA*8+4], LJ_TNIL
5019 | jmp <1 4365 | jmp <1
5020 break; 4366 break;
4367 case BC_TGETR:
4368 | ins_ABC // RA = dst, RB = table, RC = key
4369 | mov TAB:RB, [BASE+RB*8]
4370 |.if DUALNUM
4371 | mov RC, dword [BASE+RC*8]
4372 |.else
4373 | cvttsd2si RC, qword [BASE+RC*8]
4374 |.endif
4375 | cmp RC, TAB:RB->asize
4376 | jae ->vmeta_tgetr // Not in array part? Use fallback.
4377 | shl RC, 3
4378 | add RC, TAB:RB->array
4379 | // Get array slot.
4380 |->BC_TGETR_Z:
4381 |.if X64
4382 | mov RBa, [RC]
4383 | mov [BASE+RA*8], RBa
4384 |.else
4385 | mov RB, [RC]
4386 | mov RC, [RC+4]
4387 | mov [BASE+RA*8], RB
4388 | mov [BASE+RA*8+4], RC
4389 |.endif
4390 |->BC_TGETR2_Z:
4391 | ins_next
4392 break;
5021 4393
5022 case BC_TSETV: 4394 case BC_TSETV:
5023 | ins_ABC // RA = src, RB = table, RC = key 4395 | ins_ABC // RA = src, RB = table, RC = key
@@ -5031,18 +4403,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5031 |.else 4403 |.else
5032 | // Convert number to int and back and compare. 4404 | // Convert number to int and back and compare.
5033 | checknum RC, >5 4405 | checknum RC, >5
5034 |.if SSE
5035 | movsd xmm0, qword [BASE+RC*8] 4406 | movsd xmm0, qword [BASE+RC*8]
5036 | cvtsd2si RC, xmm0 4407 | cvttsd2si RC, xmm0
5037 | cvtsi2sd xmm1, RC 4408 | cvtsi2sd xmm1, RC
5038 | ucomisd xmm0, xmm1 4409 | ucomisd xmm0, xmm1
5039 |.else
5040 | fld qword [BASE+RC*8]
5041 | fist ARG1
5042 | fild ARG1
5043 | fcomparepp
5044 | mov RC, ARG1
5045 |.endif
5046 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. 4410 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
5047 |.endif 4411 |.endif
5048 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4412 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -5212,6 +4576,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5212 | movzx RA, PC_RA // Restore RA. 4576 | movzx RA, PC_RA // Restore RA.
5213 | jmp <2 4577 | jmp <2
5214 break; 4578 break;
4579 case BC_TSETR:
4580 | ins_ABC // RA = src, RB = table, RC = key
4581 | mov TAB:RB, [BASE+RB*8]
4582 |.if DUALNUM
4583 | mov RC, dword [BASE+RC*8]
4584 |.else
4585 | cvttsd2si RC, qword [BASE+RC*8]
4586 |.endif
4587 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4588 | jnz >7
4589 |2:
4590 | cmp RC, TAB:RB->asize
4591 | jae ->vmeta_tsetr
4592 | shl RC, 3
4593 | add RC, TAB:RB->array
4594 | // Set array slot.
4595 |->BC_TSETR_Z:
4596 |.if X64
4597 | mov RBa, [BASE+RA*8]
4598 | mov [RC], RBa
4599 |.else
4600 | mov RB, [BASE+RA*8+4]
4601 | mov RA, [BASE+RA*8]
4602 | mov [RC+4], RB
4603 | mov [RC], RA
4604 |.endif
4605 | ins_next
4606 |
4607 |7: // Possible table write barrier for the value. Skip valiswhite check.
4608 | barrierback TAB:RB, RA
4609 | movzx RA, PC_RA // Restore RA.
4610 | jmp <2
4611 break;
5215 4612
5216 case BC_TSETM: 4613 case BC_TSETM:
5217 | ins_AD // RA = base (table at base-1), RD = num const (start index) 4614 | ins_AD // RA = base (table at base-1), RD = num const (start index)
@@ -5405,10 +4802,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5405 |.if DUALNUM 4802 |.if DUALNUM
5406 | mov dword [BASE+RA*8+4], LJ_TISNUM 4803 | mov dword [BASE+RA*8+4], LJ_TISNUM
5407 | mov dword [BASE+RA*8], RC 4804 | mov dword [BASE+RA*8], RC
5408 |.elif SSE
5409 | cvtsi2sd xmm0, RC
5410 |.else 4805 |.else
5411 | fild dword [BASE+RA*8-8] 4806 | cvtsi2sd xmm0, RC
5412 |.endif 4807 |.endif
5413 | // Copy array slot to returned value. 4808 | // Copy array slot to returned value.
5414 |.if X64 4809 |.if X64
@@ -5424,10 +4819,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5424 | // Return array index as a numeric key. 4819 | // Return array index as a numeric key.
5425 |.if DUALNUM 4820 |.if DUALNUM
5426 | // See above. 4821 | // See above.
5427 |.elif SSE
5428 | movsd qword [BASE+RA*8], xmm0
5429 |.else 4822 |.else
5430 | fstp qword [BASE+RA*8] 4823 | movsd qword [BASE+RA*8], xmm0
5431 |.endif 4824 |.endif
5432 | mov [BASE+RA*8-8], RC // Update control var. 4825 | mov [BASE+RA*8-8], RC // Update control var.
5433 |2: 4826 |2:
@@ -5440,9 +4833,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5440 | 4833 |
5441 |4: // Skip holes in array part. 4834 |4: // Skip holes in array part.
5442 | add RC, 1 4835 | add RC, 1
5443 |.if not (DUALNUM or SSE)
5444 | mov [BASE+RA*8-8], RC
5445 |.endif
5446 | jmp <1 4836 | jmp <1
5447 | 4837 |
5448 |5: // Traverse hash part. 4838 |5: // Traverse hash part.
@@ -5776,7 +5166,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5776 if (!vk) { 5166 if (!vk) {
5777 | cmp RB, LJ_TISNUM; jae ->vmeta_for 5167 | cmp RB, LJ_TISNUM; jae ->vmeta_for
5778 } 5168 }
5779 |.if SSE
5780 | movsd xmm0, qword FOR_IDX 5169 | movsd xmm0, qword FOR_IDX
5781 | movsd xmm1, qword FOR_STOP 5170 | movsd xmm1, qword FOR_STOP
5782 if (vk) { 5171 if (vk) {
@@ -5789,22 +5178,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5789 | ucomisd xmm1, xmm0 5178 | ucomisd xmm1, xmm0
5790 |1: 5179 |1:
5791 | movsd qword FOR_EXT, xmm0 5180 | movsd qword FOR_EXT, xmm0
5792 |.else
5793 | fld qword FOR_STOP
5794 | fld qword FOR_IDX
5795 if (vk) {
5796 | fadd qword FOR_STEP // nidx = idx + step
5797 | fst qword FOR_IDX
5798 | fst qword FOR_EXT
5799 | test RB, RB; js >1
5800 } else {
5801 | fst qword FOR_EXT
5802 | jl >1
5803 }
5804 | fxch // Swap lim/(n)idx if step non-negative.
5805 |1:
5806 | fcomparepp
5807 |.endif
5808 if (op == BC_FORI) { 5181 if (op == BC_FORI) {
5809 |.if DUALNUM 5182 |.if DUALNUM
5810 | jnb <7 5183 | jnb <7
@@ -5832,11 +5205,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5832 |2: 5205 |2:
5833 | ins_next 5206 | ins_next
5834 |.endif 5207 |.endif
5835 |.if SSE 5208 |
5836 |3: // Invert comparison if step is negative. 5209 |3: // Invert comparison if step is negative.
5837 | ucomisd xmm0, xmm1 5210 | ucomisd xmm0, xmm1
5838 | jmp <1 5211 | jmp <1
5839 |.endif
5840 break; 5212 break;
5841 5213
5842 case BC_ITERL: 5214 case BC_ITERL:
@@ -5874,7 +5246,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5874 | ins_A // RA = base, RD = target (loop extent) 5246 | ins_A // RA = base, RD = target (loop extent)
5875 | // Note: RA/RD is only used by trace recorder to determine scope/extent 5247 | // Note: RA/RD is only used by trace recorder to determine scope/extent
5876 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. 5248 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
5877 |.if JIT 5249 |.if JIT
5878 | hotloop RB 5250 | hotloop RB
5879 |.endif 5251 |.endif
5880 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. 5252 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
@@ -5893,7 +5265,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5893 | mov RDa, TRACE:RD->mcode 5265 | mov RDa, TRACE:RD->mcode
5894 | mov L:RB, SAVE_L 5266 | mov L:RB, SAVE_L
5895 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE 5267 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
5896 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB 5268 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
5897 | // Save additional callee-save registers only used in compiled code. 5269 | // Save additional callee-save registers only used in compiled code.
5898 |.if X64WIN 5270 |.if X64WIN
5899 | mov TMPQ, r12 5271 | mov TMPQ, r12
@@ -6060,9 +5432,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
6060 | // (lua_State *L, lua_CFunction f) 5432 | // (lua_State *L, lua_CFunction f)
6061 | call aword [DISPATCH+DISPATCH_GL(wrapf)] 5433 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
6062 } 5434 }
6063 | set_vmstate INTERP
6064 | // nresults returned in eax (RD). 5435 | // nresults returned in eax (RD).
6065 | mov BASE, L:RB->base 5436 | mov BASE, L:RB->base
5437 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
5438 | set_vmstate INTERP
6066 | lea RA, [BASE+RD*8] 5439 | lea RA, [BASE+RD*8]
6067 | neg RA 5440 | neg RA
6068 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 5441 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
@@ -6382,15 +5755,21 @@ static void emit_asm_debug(BuildCtx *ctx)
6382 "LEFDEY:\n\n", fcsize); 5755 "LEFDEY:\n\n", fcsize);
6383 } 5756 }
6384#endif 5757#endif
6385#if LJ_64 5758#if !LJ_64
6386 fprintf(ctx->fp, "\t.subsections_via_symbols\n");
6387#else
6388 fprintf(ctx->fp, 5759 fprintf(ctx->fp,
6389 "\t.non_lazy_symbol_pointer\n" 5760 "\t.non_lazy_symbol_pointer\n"
6390 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" 5761 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
6391 ".indirect_symbol _lj_err_unwind_dwarf\n" 5762 ".indirect_symbol _lj_err_unwind_dwarf\n"
6392 ".long 0\n"); 5763 ".long 0\n\n");
5764 fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n");
5765 {
5766 const char *const *xn;
5767 for (xn = ctx->extnames; *xn; xn++)
5768 if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
5769 fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn);
5770 }
6393#endif 5771#endif
5772 fprintf(ctx->fp, ".subsections_via_symbols\n");
6394 } 5773 }
6395 break; 5774 break;
6396#endif 5775#endif