diff options
Diffstat (limited to '')
-rw-r--r-- | src/vm_x86.dasc | 1014 |
1 files changed, 271 insertions, 743 deletions
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index f25dfd30..eaa99740 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc | |||
@@ -18,7 +18,6 @@ | |||
18 | | | 18 | | |
19 | |.if P64 | 19 | |.if P64 |
20 | |.define X64, 1 | 20 | |.define X64, 1 |
21 | |.define SSE, 1 | ||
22 | |.if WIN | 21 | |.if WIN |
23 | |.define X64WIN, 1 | 22 | |.define X64WIN, 1 |
24 | |.endif | 23 | |.endif |
@@ -116,6 +115,7 @@ | |||
116 | |.type NODE, Node | 115 | |.type NODE, Node |
117 | |.type NARGS, int | 116 | |.type NARGS, int |
118 | |.type TRACE, GCtrace | 117 | |.type TRACE, GCtrace |
118 | |.type SBUF, SBuf | ||
119 | | | 119 | | |
120 | |// Stack layout while in interpreter. Must match with lj_frame.h. | 120 | |// Stack layout while in interpreter. Must match with lj_frame.h. |
121 | |//----------------------------------------------------------------------- | 121 | |//----------------------------------------------------------------------- |
@@ -630,17 +630,18 @@ static void build_subroutines(BuildCtx *ctx) | |||
630 | | lea KBASEa, [esp+CFRAME_RESUME] | 630 | | lea KBASEa, [esp+CFRAME_RESUME] |
631 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | 631 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. |
632 | | add DISPATCH, GG_G2DISP | 632 | | add DISPATCH, GG_G2DISP |
633 | | mov L:RB->cframe, KBASEa | ||
634 | | mov SAVE_PC, RD // Any value outside of bytecode is ok. | 633 | | mov SAVE_PC, RD // Any value outside of bytecode is ok. |
635 | | mov SAVE_CFRAME, RDa | 634 | | mov SAVE_CFRAME, RDa |
636 | |.if X64 | 635 | |.if X64 |
637 | | mov SAVE_NRES, RD | 636 | | mov SAVE_NRES, RD |
638 | | mov SAVE_ERRF, RD | 637 | | mov SAVE_ERRF, RD |
639 | |.endif | 638 | |.endif |
639 | | mov L:RB->cframe, KBASEa | ||
640 | | cmp byte L:RB->status, RDL | 640 | | cmp byte L:RB->status, RDL |
641 | | je >3 // Initial resume (like a call). | 641 | | je >2 // Initial resume (like a call). |
642 | | | 642 | | |
643 | | // Resume after yield (like a return). | 643 | | // Resume after yield (like a return). |
644 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
644 | | set_vmstate INTERP | 645 | | set_vmstate INTERP |
645 | | mov byte L:RB->status, RDL | 646 | | mov byte L:RB->status, RDL |
646 | | mov BASE, L:RB->base | 647 | | mov BASE, L:RB->base |
@@ -680,20 +681,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
680 | | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! | 681 | | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! |
681 | |.endif | 682 | |.endif |
682 | | | 683 | | |
684 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
683 | | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. | 685 | | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. |
684 | | mov SAVE_CFRAME, KBASEa | 686 | | mov SAVE_CFRAME, KBASEa |
685 | | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. | 687 | | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. |
688 | | add DISPATCH, GG_G2DISP | ||
686 | |.if X64 | 689 | |.if X64 |
687 | | mov L:RB->cframe, rsp | 690 | | mov L:RB->cframe, rsp |
688 | |.else | 691 | |.else |
689 | | mov L:RB->cframe, esp | 692 | | mov L:RB->cframe, esp |
690 | |.endif | 693 | |.endif |
691 | | | 694 | | |
692 | |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype). | 695 | |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). |
693 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | 696 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB |
694 | | add DISPATCH, GG_G2DISP | ||
695 | | | ||
696 | |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype). | ||
697 | | set_vmstate INTERP | 697 | | set_vmstate INTERP |
698 | | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). | 698 | | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). |
699 | | add PC, RA | 699 | | add PC, RA |
@@ -731,14 +731,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
731 | | | 731 | | |
732 | | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). | 732 | | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). |
733 | | sub KBASE, L:RB->top | 733 | | sub KBASE, L:RB->top |
734 | | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | ||
734 | | mov SAVE_ERRF, 0 // No error function. | 735 | | mov SAVE_ERRF, 0 // No error function. |
735 | | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. | 736 | | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. |
737 | | add DISPATCH, GG_G2DISP | ||
736 | | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). | 738 | | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). |
737 | | | 739 | | |
738 | |.if X64 | 740 | |.if X64 |
739 | | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. | 741 | | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. |
740 | | mov SAVE_CFRAME, KBASEa | 742 | | mov SAVE_CFRAME, KBASEa |
741 | | mov L:RB->cframe, rsp | 743 | | mov L:RB->cframe, rsp |
744 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
742 | | | 745 | | |
743 | | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) | 746 | | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) |
744 | |.else | 747 | |.else |
@@ -749,6 +752,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
749 | | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. | 752 | | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. |
750 | | mov SAVE_CFRAME, KBASE | 753 | | mov SAVE_CFRAME, KBASE |
751 | | mov L:RB->cframe, esp | 754 | | mov L:RB->cframe, esp |
755 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
752 | | | 756 | | |
753 | | call BASE // (lua_State *L, lua_CFunction func, void *ud) | 757 | | call BASE // (lua_State *L, lua_CFunction func, void *ud) |
754 | |.endif | 758 | |.endif |
@@ -856,13 +860,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
856 | |.if DUALNUM | 860 | |.if DUALNUM |
857 | | mov TMP2, LJ_TISNUM | 861 | | mov TMP2, LJ_TISNUM |
858 | | mov TMP1, RC | 862 | | mov TMP1, RC |
859 | |.elif SSE | 863 | |.else |
860 | | cvtsi2sd xmm0, RC | 864 | | cvtsi2sd xmm0, RC |
861 | | movsd TMPQ, xmm0 | 865 | | movsd TMPQ, xmm0 |
862 | |.else | ||
863 | | mov ARG4, RC | ||
864 | | fild ARG4 | ||
865 | | fstp TMPQ | ||
866 | |.endif | 866 | |.endif |
867 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. | 867 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. |
868 | | jmp >1 | 868 | | jmp >1 |
@@ -916,6 +916,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
916 | | mov NARGS:RD, 2+1 // 2 args for func(t, k). | 916 | | mov NARGS:RD, 2+1 // 2 args for func(t, k). |
917 | | jmp ->vm_call_dispatch_f | 917 | | jmp ->vm_call_dispatch_f |
918 | | | 918 | | |
919 | |->vmeta_tgetr: | ||
920 | | mov FCARG1, TAB:RB | ||
921 | | mov RB, BASE // Save BASE. | ||
922 | | mov FCARG2, RC // Caveat: FCARG2 == BASE | ||
923 | | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) | ||
924 | | // cTValue * or NULL returned in eax (RC). | ||
925 | | movzx RA, PC_RA | ||
926 | | mov BASE, RB // Restore BASE. | ||
927 | | test RC, RC | ||
928 | | jnz ->BC_TGETR_Z | ||
929 | | mov dword [BASE+RA*8+4], LJ_TNIL | ||
930 | | jmp ->BC_TGETR2_Z | ||
931 | | | ||
919 | |//----------------------------------------------------------------------- | 932 | |//----------------------------------------------------------------------- |
920 | | | 933 | | |
921 | |->vmeta_tsets: | 934 | |->vmeta_tsets: |
@@ -935,13 +948,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
935 | |.if DUALNUM | 948 | |.if DUALNUM |
936 | | mov TMP2, LJ_TISNUM | 949 | | mov TMP2, LJ_TISNUM |
937 | | mov TMP1, RC | 950 | | mov TMP1, RC |
938 | |.elif SSE | 951 | |.else |
939 | | cvtsi2sd xmm0, RC | 952 | | cvtsi2sd xmm0, RC |
940 | | movsd TMPQ, xmm0 | 953 | | movsd TMPQ, xmm0 |
941 | |.else | ||
942 | | mov ARG4, RC | ||
943 | | fild ARG4 | ||
944 | | fstp TMPQ | ||
945 | |.endif | 954 | |.endif |
946 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. | 955 | | lea RCa, TMPQ // Store temp. TValue in TMPQ. |
947 | | jmp >1 | 956 | | jmp >1 |
@@ -1007,6 +1016,33 @@ static void build_subroutines(BuildCtx *ctx) | |||
1007 | | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). | 1016 | | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). |
1008 | | jmp ->vm_call_dispatch_f | 1017 | | jmp ->vm_call_dispatch_f |
1009 | | | 1018 | | |
1019 | |->vmeta_tsetr: | ||
1020 | |.if X64WIN | ||
1021 | | mov L:CARG1d, SAVE_L | ||
1022 | | mov CARG3d, RC | ||
1023 | | mov L:CARG1d->base, BASE | ||
1024 | | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE. | ||
1025 | |.elif X64 | ||
1026 | | mov L:CARG1d, SAVE_L | ||
1027 | | mov CARG2d, TAB:RB | ||
1028 | | mov L:CARG1d->base, BASE | ||
1029 | | mov RB, BASE // Save BASE. | ||
1030 | | mov CARG3d, RC // Caveat: CARG3d == BASE. | ||
1031 | |.else | ||
1032 | | mov L:RA, SAVE_L | ||
1033 | | mov ARG2, TAB:RB | ||
1034 | | mov RB, BASE // Save BASE. | ||
1035 | | mov ARG3, RC | ||
1036 | | mov ARG1, L:RA | ||
1037 | | mov L:RA->base, BASE | ||
1038 | |.endif | ||
1039 | | mov SAVE_PC, PC | ||
1040 | | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | ||
1041 | | // TValue * returned in eax (RC). | ||
1042 | | movzx RA, PC_RA | ||
1043 | | mov BASE, RB // Restore BASE. | ||
1044 | | jmp ->BC_TSETR_Z | ||
1045 | | | ||
1010 | |//-- Comparison metamethods --------------------------------------------- | 1046 | |//-- Comparison metamethods --------------------------------------------- |
1011 | | | 1047 | | |
1012 | |->vmeta_comp: | 1048 | |->vmeta_comp: |
@@ -1101,6 +1137,26 @@ static void build_subroutines(BuildCtx *ctx) | |||
1101 | | jmp <3 | 1137 | | jmp <3 |
1102 | |.endif | 1138 | |.endif |
1103 | | | 1139 | | |
1140 | |->vmeta_istype: | ||
1141 | |.if X64 | ||
1142 | | mov L:RB, SAVE_L | ||
1143 | | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. | ||
1144 | | mov CARG2d, RA | ||
1145 | | movzx CARG3d, PC_RD | ||
1146 | | mov L:CARG1d, L:RB | ||
1147 | |.else | ||
1148 | | movzx RD, PC_RD | ||
1149 | | mov ARG2, RA | ||
1150 | | mov L:RB, SAVE_L | ||
1151 | | mov ARG3, RD | ||
1152 | | mov ARG1, L:RB | ||
1153 | | mov L:RB->base, BASE | ||
1154 | |.endif | ||
1155 | | mov SAVE_PC, PC | ||
1156 | | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) | ||
1157 | | mov BASE, L:RB->base | ||
1158 | | jmp <6 | ||
1159 | | | ||
1104 | |//-- Arithmetic metamethods --------------------------------------------- | 1160 | |//-- Arithmetic metamethods --------------------------------------------- |
1105 | | | 1161 | | |
1106 | |->vmeta_arith_vno: | 1162 | |->vmeta_arith_vno: |
@@ -1509,11 +1565,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1509 | |.else | 1565 | |.else |
1510 | | jae ->fff_fallback | 1566 | | jae ->fff_fallback |
1511 | |.endif | 1567 | |.endif |
1512 | |.if SSE | ||
1513 | | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 | 1568 | | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 |
1514 | |.else | ||
1515 | | fld qword [BASE]; jmp ->fff_resn | ||
1516 | |.endif | ||
1517 | | | 1569 | | |
1518 | |.ffunc_1 tostring | 1570 | |.ffunc_1 tostring |
1519 | | // Only handles the string or number case inline. | 1571 | | // Only handles the string or number case inline. |
@@ -1538,9 +1590,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1538 | |.endif | 1590 | |.endif |
1539 | | mov L:FCARG1, L:RB | 1591 | | mov L:FCARG1, L:RB |
1540 | |.if DUALNUM | 1592 | |.if DUALNUM |
1541 | | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) | 1593 | | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o) |
1542 | |.else | 1594 | |.else |
1543 | | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) | 1595 | | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np) |
1544 | |.endif | 1596 | |.endif |
1545 | | // GCstr returned in eax (RD). | 1597 | | // GCstr returned in eax (RD). |
1546 | | mov BASE, L:RB->base | 1598 | | mov BASE, L:RB->base |
@@ -1631,19 +1683,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1631 | | add RD, 1 | 1683 | | add RD, 1 |
1632 | | mov dword [BASE-4], LJ_TISNUM | 1684 | | mov dword [BASE-4], LJ_TISNUM |
1633 | | mov dword [BASE-8], RD | 1685 | | mov dword [BASE-8], RD |
1634 | |.elif SSE | 1686 | |.else |
1635 | | movsd xmm0, qword [BASE+8] | 1687 | | movsd xmm0, qword [BASE+8] |
1636 | | sseconst_1 xmm1, RBa | 1688 | | sseconst_1 xmm1, RBa |
1637 | | addsd xmm0, xmm1 | 1689 | | addsd xmm0, xmm1 |
1638 | | cvtsd2si RD, xmm0 | 1690 | | cvttsd2si RD, xmm0 |
1639 | | movsd qword [BASE-8], xmm0 | 1691 | | movsd qword [BASE-8], xmm0 |
1640 | |.else | ||
1641 | | fld qword [BASE+8] | ||
1642 | | fld1 | ||
1643 | | faddp st1 | ||
1644 | | fist ARG1 | ||
1645 | | fstp qword [BASE-8] | ||
1646 | | mov RD, ARG1 | ||
1647 | |.endif | 1692 | |.endif |
1648 | | mov TAB:RB, [BASE] | 1693 | | mov TAB:RB, [BASE] |
1649 | | cmp RD, TAB:RB->asize; jae >2 // Not in array part? | 1694 | | cmp RD, TAB:RB->asize; jae >2 // Not in array part? |
@@ -1690,12 +1735,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1690 | |.if DUALNUM | 1735 | |.if DUALNUM |
1691 | | mov dword [BASE+12], LJ_TISNUM | 1736 | | mov dword [BASE+12], LJ_TISNUM |
1692 | | mov dword [BASE+8], 0 | 1737 | | mov dword [BASE+8], 0 |
1693 | |.elif SSE | 1738 | |.else |
1694 | | xorps xmm0, xmm0 | 1739 | | xorps xmm0, xmm0 |
1695 | | movsd qword [BASE+8], xmm0 | 1740 | | movsd qword [BASE+8], xmm0 |
1696 | |.else | ||
1697 | | fldz | ||
1698 | | fstp qword [BASE+8] | ||
1699 | |.endif | 1741 | |.endif |
1700 | | mov RD, 1+3 | 1742 | | mov RD, 1+3 |
1701 | | jmp ->fff_res | 1743 | | jmp ->fff_res |
@@ -1802,7 +1844,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1802 | | mov ARG3, RA | 1844 | | mov ARG3, RA |
1803 | |.endif | 1845 | |.endif |
1804 | | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) | 1846 | | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) |
1805 | | set_vmstate INTERP | ||
1806 | | | 1847 | | |
1807 | | mov L:RB, SAVE_L | 1848 | | mov L:RB, SAVE_L |
1808 | |.if X64 | 1849 | |.if X64 |
@@ -1811,6 +1852,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1811 | | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. | 1852 | | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. |
1812 | |.endif | 1853 | |.endif |
1813 | | mov BASE, L:RB->base | 1854 | | mov BASE, L:RB->base |
1855 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
1856 | | set_vmstate INTERP | ||
1857 | | | ||
1814 | | cmp eax, LUA_YIELD | 1858 | | cmp eax, LUA_YIELD |
1815 | | ja >8 | 1859 | | ja >8 |
1816 | |4: | 1860 | |4: |
@@ -1925,12 +1969,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
1925 | |->fff_resi: // Dummy. | 1969 | |->fff_resi: // Dummy. |
1926 | |.endif | 1970 | |.endif |
1927 | | | 1971 | | |
1928 | |.if SSE | ||
1929 | |->fff_resn: | 1972 | |->fff_resn: |
1930 | | mov PC, [BASE-4] | 1973 | | mov PC, [BASE-4] |
1931 | | fstp qword [BASE-8] | 1974 | | fstp qword [BASE-8] |
1932 | | jmp ->fff_res1 | 1975 | | jmp ->fff_res1 |
1933 | |.endif | ||
1934 | | | 1976 | | |
1935 | | .ffunc_1 math_abs | 1977 | | .ffunc_1 math_abs |
1936 | |.if DUALNUM | 1978 | |.if DUALNUM |
@@ -1954,8 +1996,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1954 | |.else | 1996 | |.else |
1955 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 1997 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
1956 | |.endif | 1998 | |.endif |
1957 | | | ||
1958 | |.if SSE | ||
1959 | | movsd xmm0, qword [BASE] | 1999 | | movsd xmm0, qword [BASE] |
1960 | | sseconst_abs xmm1, RDa | 2000 | | sseconst_abs xmm1, RDa |
1961 | | andps xmm0, xmm1 | 2001 | | andps xmm0, xmm1 |
@@ -1963,15 +2003,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
1963 | | mov PC, [BASE-4] | 2003 | | mov PC, [BASE-4] |
1964 | | movsd qword [BASE-8], xmm0 | 2004 | | movsd qword [BASE-8], xmm0 |
1965 | | // fallthrough | 2005 | | // fallthrough |
1966 | |.else | ||
1967 | | fld qword [BASE] | ||
1968 | | fabs | ||
1969 | | // fallthrough | ||
1970 | |->fff_resxmm0: // Dummy. | ||
1971 | |->fff_resn: | ||
1972 | | mov PC, [BASE-4] | ||
1973 | | fstp qword [BASE-8] | ||
1974 | |.endif | ||
1975 | | | 2006 | | |
1976 | |->fff_res1: | 2007 | |->fff_res1: |
1977 | | mov RD, 1+1 | 2008 | | mov RD, 1+1 |
@@ -2008,48 +2039,24 @@ static void build_subroutines(BuildCtx *ctx) | |||
2008 | |.else | 2039 | |.else |
2009 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | 2040 | | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |
2010 | |.endif | 2041 | |.endif |
2011 | |.if SSE | ||
2012 | | movsd xmm0, qword [BASE] | 2042 | | movsd xmm0, qword [BASE] |
2013 | | call ->vm_ .. func | 2043 | | call ->vm_ .. func .. _sse |
2014 | | .if DUALNUM | 2044 | |.if DUALNUM |
2015 | | cvtsd2si RB, xmm0 | 2045 | | cvttsd2si RB, xmm0 |
2016 | | cmp RB, 0x80000000 | 2046 | | cmp RB, 0x80000000 |
2017 | | jne ->fff_resi | 2047 | | jne ->fff_resi |
2018 | | cvtsi2sd xmm1, RB | 2048 | | cvtsi2sd xmm1, RB |
2019 | | ucomisd xmm0, xmm1 | 2049 | | ucomisd xmm0, xmm1 |
2020 | | jp ->fff_resxmm0 | 2050 | | jp ->fff_resxmm0 |
2021 | | je ->fff_resi | 2051 | | je ->fff_resi |
2022 | | .endif | ||
2023 | | jmp ->fff_resxmm0 | ||
2024 | |.else | ||
2025 | | fld qword [BASE] | ||
2026 | | call ->vm_ .. func | ||
2027 | | .if DUALNUM | ||
2028 | | fist ARG1 | ||
2029 | | mov RB, ARG1 | ||
2030 | | cmp RB, 0x80000000; jne >2 | ||
2031 | | fdup | ||
2032 | | fild ARG1 | ||
2033 | | fcomparepp | ||
2034 | | jp ->fff_resn | ||
2035 | | jne ->fff_resn | ||
2036 | |2: | ||
2037 | | fpop | ||
2038 | | jmp ->fff_resi | ||
2039 | | .else | ||
2040 | | jmp ->fff_resn | ||
2041 | | .endif | ||
2042 | |.endif | 2052 | |.endif |
2053 | | jmp ->fff_resxmm0 | ||
2043 | |.endmacro | 2054 | |.endmacro |
2044 | | | 2055 | | |
2045 | | math_round floor | 2056 | | math_round floor |
2046 | | math_round ceil | 2057 | | math_round ceil |
2047 | | | 2058 | | |
2048 | |.if SSE | ||
2049 | |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 | 2059 | |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 |
2050 | |.else | ||
2051 | |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn | ||
2052 | |.endif | ||
2053 | | | 2060 | | |
2054 | |.ffunc math_log | 2061 | |.ffunc math_log |
2055 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. | 2062 | | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. |
@@ -2072,42 +2079,24 @@ static void build_subroutines(BuildCtx *ctx) | |||
2072 | |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn | 2079 | |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn |
2073 | | | 2080 | | |
2074 | |.macro math_extern, func | 2081 | |.macro math_extern, func |
2075 | |.if SSE | ||
2076 | | .ffunc_nsse math_ .. func | 2082 | | .ffunc_nsse math_ .. func |
2077 | | .if not X64 | 2083 | |.if not X64 |
2078 | | movsd FPARG1, xmm0 | 2084 | | movsd FPARG1, xmm0 |
2079 | | .endif | ||
2080 | |.else | ||
2081 | | .ffunc_n math_ .. func | ||
2082 | | fstp FPARG1 | ||
2083 | |.endif | 2085 | |.endif |
2084 | | mov RB, BASE | 2086 | | mov RB, BASE |
2085 | | call extern lj_vm_ .. func | 2087 | | call extern lj_vm_ .. func |
2086 | | mov BASE, RB | 2088 | | mov BASE, RB |
2087 | | .if X64 | 2089 | |.if X64 |
2088 | | jmp ->fff_resxmm0 | 2090 | | jmp ->fff_resxmm0 |
2089 | | .else | 2091 | |.else |
2090 | | jmp ->fff_resn | 2092 | | jmp ->fff_resn |
2091 | | .endif | 2093 | |.endif |
2092 | |.endmacro | 2094 | |.endmacro |
2093 | | | 2095 | | |
2094 | | math_extern sinh | 2096 | | math_extern sinh |
2095 | | math_extern cosh | 2097 | | math_extern cosh |
2096 | | math_extern tanh | 2098 | | math_extern tanh |
2097 | | | 2099 | | |
2098 | |->ff_math_deg: | ||
2099 | |.if SSE | ||
2100 | |.ffunc_nsse math_rad | ||
2101 | | mov CFUNC:RB, [BASE-8] | ||
2102 | | mulsd xmm0, qword CFUNC:RB->upvalue[0] | ||
2103 | | jmp ->fff_resxmm0 | ||
2104 | |.else | ||
2105 | |.ffunc_n math_rad | ||
2106 | | mov CFUNC:RB, [BASE-8] | ||
2107 | | fmul qword CFUNC:RB->upvalue[0] | ||
2108 | | jmp ->fff_resn | ||
2109 | |.endif | ||
2110 | | | ||
2111 | |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn | 2100 | |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn |
2112 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn | 2101 | |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn |
2113 | | | 2102 | | |
@@ -2123,65 +2112,34 @@ static void build_subroutines(BuildCtx *ctx) | |||
2123 | | cmp RB, 0x00200000; jb >4 | 2112 | | cmp RB, 0x00200000; jb >4 |
2124 | |1: | 2113 | |1: |
2125 | | shr RB, 21; sub RB, RC // Extract and unbias exponent. | 2114 | | shr RB, 21; sub RB, RC // Extract and unbias exponent. |
2126 | |.if SSE | ||
2127 | | cvtsi2sd xmm0, RB | 2115 | | cvtsi2sd xmm0, RB |
2128 | |.else | ||
2129 | | mov TMP1, RB; fild TMP1 | ||
2130 | |.endif | ||
2131 | | mov RB, [BASE-4] | 2116 | | mov RB, [BASE-4] |
2132 | | and RB, 0x800fffff // Mask off exponent. | 2117 | | and RB, 0x800fffff // Mask off exponent. |
2133 | | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. | 2118 | | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. |
2134 | | mov [BASE-4], RB | 2119 | | mov [BASE-4], RB |
2135 | |2: | 2120 | |2: |
2136 | |.if SSE | ||
2137 | | movsd qword [BASE], xmm0 | 2121 | | movsd qword [BASE], xmm0 |
2138 | |.else | ||
2139 | | fstp qword [BASE] | ||
2140 | |.endif | ||
2141 | | mov RD, 1+2 | 2122 | | mov RD, 1+2 |
2142 | | jmp ->fff_res | 2123 | | jmp ->fff_res |
2143 | |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. | 2124 | |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. |
2144 | |.if SSE | ||
2145 | | xorps xmm0, xmm0; jmp <2 | 2125 | | xorps xmm0, xmm0; jmp <2 |
2146 | |.else | ||
2147 | | fldz; jmp <2 | ||
2148 | |.endif | ||
2149 | |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. | 2126 | |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. |
2150 | |.if SSE | ||
2151 | | movsd xmm0, qword [BASE] | 2127 | | movsd xmm0, qword [BASE] |
2152 | | sseconst_hi xmm1, RBa, 43500000 // 2^54. | 2128 | | sseconst_hi xmm1, RBa, 43500000 // 2^54. |
2153 | | mulsd xmm0, xmm1 | 2129 | | mulsd xmm0, xmm1 |
2154 | | movsd qword [BASE-8], xmm0 | 2130 | | movsd qword [BASE-8], xmm0 |
2155 | |.else | ||
2156 | | fld qword [BASE] | ||
2157 | | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54 | ||
2158 | | fstp qword [BASE-8] | ||
2159 | |.endif | ||
2160 | | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 | 2131 | | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 |
2161 | | | 2132 | | |
2162 | |.if SSE | ||
2163 | |.ffunc_nsse math_modf | 2133 | |.ffunc_nsse math_modf |
2164 | |.else | ||
2165 | |.ffunc_n math_modf | ||
2166 | |.endif | ||
2167 | | mov RB, [BASE+4] | 2134 | | mov RB, [BASE+4] |
2168 | | mov PC, [BASE-4] | 2135 | | mov PC, [BASE-4] |
2169 | | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? | 2136 | | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? |
2170 | |.if SSE | ||
2171 | | movaps xmm4, xmm0 | 2137 | | movaps xmm4, xmm0 |
2172 | | call ->vm_trunc | 2138 | | call ->vm_trunc_sse |
2173 | | subsd xmm4, xmm0 | 2139 | | subsd xmm4, xmm0 |
2174 | |1: | 2140 | |1: |
2175 | | movsd qword [BASE-8], xmm0 | 2141 | | movsd qword [BASE-8], xmm0 |
2176 | | movsd qword [BASE], xmm4 | 2142 | | movsd qword [BASE], xmm4 |
2177 | |.else | ||
2178 | | fdup | ||
2179 | | call ->vm_trunc | ||
2180 | | fsub st1, st0 | ||
2181 | |1: | ||
2182 | | fstp qword [BASE-8] | ||
2183 | | fstp qword [BASE] | ||
2184 | |.endif | ||
2185 | | mov RC, [BASE-4]; mov RB, [BASE+4] | 2143 | | mov RC, [BASE-4]; mov RB, [BASE+4] |
2186 | | xor RC, RB; js >3 // Need to adjust sign? | 2144 | | xor RC, RB; js >3 // Need to adjust sign? |
2187 | |2: | 2145 | |2: |
@@ -2191,24 +2149,16 @@ static void build_subroutines(BuildCtx *ctx) | |||
2191 | | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. | 2149 | | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. |
2192 | | jmp <2 | 2150 | | jmp <2 |
2193 | |4: | 2151 | |4: |
2194 | |.if SSE | ||
2195 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. | 2152 | | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. |
2196 | |.else | ||
2197 | | fldz; fxch; jmp <1 // Return +-Inf and +-0. | ||
2198 | |.endif | ||
2199 | | | 2153 | | |
2200 | |.ffunc_nnr math_fmod | 2154 | |.ffunc_nnr math_fmod |
2201 | |1: ; fprem; fnstsw ax; sahf; jp <1 | 2155 | |1: ; fprem; fnstsw ax; sahf; jp <1 |
2202 | | fpop1 | 2156 | | fpop1 |
2203 | | jmp ->fff_resn | 2157 | | jmp ->fff_resn |
2204 | | | 2158 | | |
2205 | |.if SSE | 2159 | |.ffunc_nnsse math_pow; call ->vm_pow_sse; jmp ->fff_resxmm0 |
2206 | |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0 | ||
2207 | |.else | ||
2208 | |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn | ||
2209 | |.endif | ||
2210 | | | 2160 | | |
2211 | |.macro math_minmax, name, cmovop, fcmovop, sseop | 2161 | |.macro math_minmax, name, cmovop, sseop |
2212 | | .ffunc name | 2162 | | .ffunc name |
2213 | | mov RA, 2 | 2163 | | mov RA, 2 |
2214 | | cmp dword [BASE+4], LJ_TISNUM | 2164 | | cmp dword [BASE+4], LJ_TISNUM |
@@ -2225,12 +2175,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2225 | |3: | 2175 | |3: |
2226 | | ja ->fff_fallback | 2176 | | ja ->fff_fallback |
2227 | | // Convert intermediate result to number and continue below. | 2177 | | // Convert intermediate result to number and continue below. |
2228 | |.if SSE | ||
2229 | | cvtsi2sd xmm0, RB | 2178 | | cvtsi2sd xmm0, RB |
2230 | |.else | ||
2231 | | mov TMP1, RB | ||
2232 | | fild TMP1 | ||
2233 | |.endif | ||
2234 | | jmp >6 | 2179 | | jmp >6 |
2235 | |4: | 2180 | |4: |
2236 | | ja ->fff_fallback | 2181 | | ja ->fff_fallback |
@@ -2238,7 +2183,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
2238 | | jae ->fff_fallback | 2183 | | jae ->fff_fallback |
2239 | |.endif | 2184 | |.endif |
2240 | | | 2185 | | |
2241 | |.if SSE | ||
2242 | | movsd xmm0, qword [BASE] | 2186 | | movsd xmm0, qword [BASE] |
2243 | |5: // Handle numbers or integers. | 2187 | |5: // Handle numbers or integers. |
2244 | | cmp RA, RD; jae ->fff_resxmm0 | 2188 | | cmp RA, RD; jae ->fff_resxmm0 |
@@ -2257,48 +2201,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
2257 | | sseop xmm0, xmm1 | 2201 | | sseop xmm0, xmm1 |
2258 | | add RA, 1 | 2202 | | add RA, 1 |
2259 | | jmp <5 | 2203 | | jmp <5 |
2260 | |.else | ||
2261 | | fld qword [BASE] | ||
2262 | |5: // Handle numbers or integers. | ||
2263 | | cmp RA, RD; jae ->fff_resn | ||
2264 | | cmp dword [BASE+RA*8-4], LJ_TISNUM | ||
2265 | |.if DUALNUM | ||
2266 | | jb >6 | ||
2267 | | ja >9 | ||
2268 | | fild dword [BASE+RA*8-8] | ||
2269 | | jmp >7 | ||
2270 | |.else | ||
2271 | | jae >9 | ||
2272 | |.endif | ||
2273 | |6: | ||
2274 | | fld qword [BASE+RA*8-8] | ||
2275 | |7: | ||
2276 | | fucomi st1; fcmovop st1; fpop1 | ||
2277 | | add RA, 1 | ||
2278 | | jmp <5 | ||
2279 | |.endif | ||
2280 | |.endmacro | 2204 | |.endmacro |
2281 | | | 2205 | | |
2282 | | math_minmax math_min, cmovg, fcmovnbe, minsd | 2206 | | math_minmax math_min, cmovg, minsd |
2283 | | math_minmax math_max, cmovl, fcmovbe, maxsd | 2207 | | math_minmax math_max, cmovl, maxsd |
2284 | |.if not SSE | ||
2285 | |9: | ||
2286 | | fpop; jmp ->fff_fallback | ||
2287 | |.endif | ||
2288 | | | 2208 | | |
2289 | |//-- String library ----------------------------------------------------- | 2209 | |//-- String library ----------------------------------------------------- |
2290 | | | 2210 | | |
2291 | |.ffunc_1 string_len | ||
2292 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | ||
2293 | | mov STR:RB, [BASE] | ||
2294 | |.if DUALNUM | ||
2295 | | mov RB, dword STR:RB->len; jmp ->fff_resi | ||
2296 | |.elif SSE | ||
2297 | | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0 | ||
2298 | |.else | ||
2299 | | fild dword STR:RB->len; jmp ->fff_resn | ||
2300 | |.endif | ||
2301 | | | ||
2302 | |.ffunc string_byte // Only handle the 1-arg case here. | 2211 | |.ffunc string_byte // Only handle the 1-arg case here. |
2303 | | cmp NARGS:RD, 1+1; jne ->fff_fallback | 2212 | | cmp NARGS:RD, 1+1; jne ->fff_fallback |
2304 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2213 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
@@ -2309,10 +2218,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
2309 | | movzx RB, byte STR:RB[1] | 2218 | | movzx RB, byte STR:RB[1] |
2310 | |.if DUALNUM | 2219 | |.if DUALNUM |
2311 | | jmp ->fff_resi | 2220 | | jmp ->fff_resi |
2312 | |.elif SSE | ||
2313 | | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 | ||
2314 | |.else | 2221 | |.else |
2315 | | mov TMP1, RB; fild TMP1; jmp ->fff_resn | 2222 | | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 |
2316 | |.endif | 2223 | |.endif |
2317 | | | 2224 | | |
2318 | |.ffunc string_char // Only handle the 1-arg case here. | 2225 | |.ffunc string_char // Only handle the 1-arg case here. |
@@ -2324,16 +2231,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
2324 | | mov RB, dword [BASE] | 2231 | | mov RB, dword [BASE] |
2325 | | cmp RB, 255; ja ->fff_fallback | 2232 | | cmp RB, 255; ja ->fff_fallback |
2326 | | mov TMP2, RB | 2233 | | mov TMP2, RB |
2327 | |.elif SSE | 2234 | |.else |
2328 | | jae ->fff_fallback | 2235 | | jae ->fff_fallback |
2329 | | cvttsd2si RB, qword [BASE] | 2236 | | cvttsd2si RB, qword [BASE] |
2330 | | cmp RB, 255; ja ->fff_fallback | 2237 | | cmp RB, 255; ja ->fff_fallback |
2331 | | mov TMP2, RB | 2238 | | mov TMP2, RB |
2332 | |.else | ||
2333 | | jae ->fff_fallback | ||
2334 | | fld qword [BASE] | ||
2335 | | fistp TMP2 | ||
2336 | | cmp TMP2, 255; ja ->fff_fallback | ||
2337 | |.endif | 2239 | |.endif |
2338 | |.if X64 | 2240 | |.if X64 |
2339 | | mov TMP3, 1 | 2241 | | mov TMP3, 1 |
@@ -2354,6 +2256,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2354 | |.endif | 2256 | |.endif |
2355 | | mov SAVE_PC, PC | 2257 | | mov SAVE_PC, PC |
2356 | | call extern lj_str_new // (lua_State *L, char *str, size_t l) | 2258 | | call extern lj_str_new // (lua_State *L, char *str, size_t l) |
2259 | |->fff_resstr: | ||
2357 | | // GCstr * returned in eax (RD). | 2260 | | // GCstr * returned in eax (RD). |
2358 | | mov BASE, L:RB->base | 2261 | | mov BASE, L:RB->base |
2359 | | mov PC, [BASE-4] | 2262 | | mov PC, [BASE-4] |
@@ -2371,14 +2274,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
2371 | | jne ->fff_fallback | 2274 | | jne ->fff_fallback |
2372 | | mov RB, dword [BASE+16] | 2275 | | mov RB, dword [BASE+16] |
2373 | | mov TMP2, RB | 2276 | | mov TMP2, RB |
2374 | |.elif SSE | 2277 | |.else |
2375 | | jae ->fff_fallback | 2278 | | jae ->fff_fallback |
2376 | | cvttsd2si RB, qword [BASE+16] | 2279 | | cvttsd2si RB, qword [BASE+16] |
2377 | | mov TMP2, RB | 2280 | | mov TMP2, RB |
2378 | |.else | ||
2379 | | jae ->fff_fallback | ||
2380 | | fld qword [BASE+16] | ||
2381 | | fistp TMP2 | ||
2382 | |.endif | 2281 | |.endif |
2383 | |1: | 2282 | |1: |
2384 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2283 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
@@ -2393,12 +2292,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
2393 | | mov RB, STR:RB->len | 2292 | | mov RB, STR:RB->len |
2394 | |.if DUALNUM | 2293 | |.if DUALNUM |
2395 | | mov RA, dword [BASE+8] | 2294 | | mov RA, dword [BASE+8] |
2396 | |.elif SSE | ||
2397 | | cvttsd2si RA, qword [BASE+8] | ||
2398 | |.else | 2295 | |.else |
2399 | | fld qword [BASE+8] | 2296 | | cvttsd2si RA, qword [BASE+8] |
2400 | | fistp ARG3 | ||
2401 | | mov RA, ARG3 | ||
2402 | |.endif | 2297 | |.endif |
2403 | | mov RC, TMP2 | 2298 | | mov RC, TMP2 |
2404 | | cmp RB, RC // len < end? (unsigned compare) | 2299 | | cmp RB, RC // len < end? (unsigned compare) |
@@ -2442,123 +2337,27 @@ static void build_subroutines(BuildCtx *ctx) | |||
2442 | | xor RC, RC // Zero length. Any ptr in RB is ok. | 2337 | | xor RC, RC // Zero length. Any ptr in RB is ok. |
2443 | | jmp <4 | 2338 | | jmp <4 |
2444 | | | 2339 | | |
2445 | |.ffunc string_rep // Only handle the 1-char case inline. | 2340 | |.macro ffstring_op, name |
2341 | | .ffunc_1 string_ .. name | ||
2446 | | ffgccheck | 2342 | | ffgccheck |
2447 | | cmp NARGS:RD, 2+1; jne ->fff_fallback // Exactly 2 arguments. | ||
2448 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | 2343 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback |
2449 | | cmp dword [BASE+12], LJ_TISNUM | 2344 | | mov L:RB, SAVE_L |
2450 | | mov STR:RB, [BASE] | 2345 | | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] |
2451 | |.if DUALNUM | 2346 | | mov L:RB->base, BASE |
2452 | | jne ->fff_fallback | 2347 | | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE |
2453 | | mov RC, dword [BASE+8] | 2348 | | mov RC, SBUF:FCARG1->b |
2454 | |.elif SSE | 2349 | | mov SBUF:FCARG1->L, L:RB |
2455 | | jae ->fff_fallback | 2350 | | mov SBUF:FCARG1->p, RC |
2456 | | cvttsd2si RC, qword [BASE+8] | 2351 | | mov SAVE_PC, PC |
2457 | |.else | 2352 | | call extern lj_buf_putstr_ .. name .. @8 |
2458 | | jae ->fff_fallback | 2353 | | mov FCARG1, eax |
2459 | | fld qword [BASE+8] | 2354 | | call extern lj_buf_tostr@4 |
2460 | | fistp TMP2 | 2355 | | jmp ->fff_resstr |
2461 | | mov RC, TMP2 | ||
2462 | |.endif | ||
2463 | | test RC, RC | ||
2464 | | jle ->fff_emptystr // Count <= 0? (or non-int) | ||
2465 | | cmp dword STR:RB->len, 1 | ||
2466 | | jb ->fff_emptystr // Zero length string? | ||
2467 | | jne ->fff_fallback_2 // Fallback for > 1-char strings. | ||
2468 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2 | ||
2469 | | movzx RA, byte STR:RB[1] | ||
2470 | | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
2471 | |.if X64 | ||
2472 | | mov TMP3, RC | ||
2473 | |.else | ||
2474 | | mov ARG3, RC | ||
2475 | |.endif | ||
2476 | |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). | ||
2477 | | mov [RB], RAL | ||
2478 | | add RB, 1 | ||
2479 | | sub RC, 1 | ||
2480 | | jnz <1 | ||
2481 | | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
2482 | | jmp ->fff_newstr | ||
2483 | | | ||
2484 | |.ffunc_1 string_reverse | ||
2485 | | ffgccheck | ||
2486 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | ||
2487 | | mov STR:RB, [BASE] | ||
2488 | | mov RC, STR:RB->len | ||
2489 | | test RC, RC | ||
2490 | | jz ->fff_emptystr // Zero length string? | ||
2491 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 | ||
2492 | | add RB, #STR | ||
2493 | | mov TMP2, PC // Need another temp register. | ||
2494 | |.if X64 | ||
2495 | | mov TMP3, RC | ||
2496 | |.else | ||
2497 | | mov ARG3, RC | ||
2498 | |.endif | ||
2499 | | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
2500 | |1: | ||
2501 | | movzx RA, byte [RB] | ||
2502 | | add RB, 1 | ||
2503 | | sub RC, 1 | ||
2504 | | mov [PC+RC], RAL | ||
2505 | | jnz <1 | ||
2506 | | mov RD, PC | ||
2507 | | mov PC, TMP2 | ||
2508 | | jmp ->fff_newstr | ||
2509 | | | ||
2510 | |.macro ffstring_case, name, lo, hi | ||
2511 | | .ffunc_1 name | ||
2512 | | ffgccheck | ||
2513 | | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | ||
2514 | | mov STR:RB, [BASE] | ||
2515 | | mov RC, STR:RB->len | ||
2516 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 | ||
2517 | | add RB, #STR | ||
2518 | | mov TMP2, PC // Need another temp register. | ||
2519 | |.if X64 | ||
2520 | | mov TMP3, RC | ||
2521 | |.else | ||
2522 | | mov ARG3, RC | ||
2523 | |.endif | ||
2524 | | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | ||
2525 | | jmp >3 | ||
2526 | |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?). | ||
2527 | | movzx RA, byte [RB+RC] | ||
2528 | | cmp RA, lo | ||
2529 | | jb >2 | ||
2530 | | cmp RA, hi | ||
2531 | | ja >2 | ||
2532 | | xor RA, 0x20 | ||
2533 | |2: | ||
2534 | | mov [PC+RC], RAL | ||
2535 | |3: | ||
2536 | | sub RC, 1 | ||
2537 | | jns <1 | ||
2538 | | mov RD, PC | ||
2539 | | mov PC, TMP2 | ||
2540 | | jmp ->fff_newstr | ||
2541 | |.endmacro | 2356 | |.endmacro |
2542 | | | 2357 | | |
2543 | |ffstring_case string_lower, 0x41, 0x5a | 2358 | |ffstring_op reverse |
2544 | |ffstring_case string_upper, 0x61, 0x7a | 2359 | |ffstring_op lower |
2545 | | | 2360 | |ffstring_op upper |
2546 | |//-- Table library ------------------------------------------------------ | ||
2547 | | | ||
2548 | |.ffunc_1 table_getn | ||
2549 | | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback | ||
2550 | | mov RB, BASE // Save BASE. | ||
2551 | | mov TAB:FCARG1, [BASE] | ||
2552 | | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t) | ||
2553 | | // Length of table returned in eax (RD). | ||
2554 | | mov BASE, RB // Restore BASE. | ||
2555 | |.if DUALNUM | ||
2556 | | mov RB, RD; jmp ->fff_resi | ||
2557 | |.elif SSE | ||
2558 | | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0 | ||
2559 | |.else | ||
2560 | | mov ARG1, RD; fild ARG1; jmp ->fff_resn | ||
2561 | |.endif | ||
2562 | | | 2361 | | |
2563 | |//-- Bit library -------------------------------------------------------- | 2362 | |//-- Bit library -------------------------------------------------------- |
2564 | | | 2363 | | |
@@ -2567,11 +2366,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2567 | |.macro .ffunc_bit, name, kind | 2366 | |.macro .ffunc_bit, name, kind |
2568 | | .ffunc_1 name | 2367 | | .ffunc_1 name |
2569 | |.if kind == 2 | 2368 | |.if kind == 2 |
2570 | |.if SSE | ||
2571 | | sseconst_tobit xmm1, RBa | 2369 | | sseconst_tobit xmm1, RBa |
2572 | |.else | ||
2573 | | mov TMP1, TOBIT_BIAS | ||
2574 | |.endif | ||
2575 | |.endif | 2370 | |.endif |
2576 | | cmp dword [BASE+4], LJ_TISNUM | 2371 | | cmp dword [BASE+4], LJ_TISNUM |
2577 | |.if DUALNUM | 2372 | |.if DUALNUM |
@@ -2587,37 +2382,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
2587 | |.else | 2382 | |.else |
2588 | | jae ->fff_fallback | 2383 | | jae ->fff_fallback |
2589 | |.endif | 2384 | |.endif |
2590 | |.if SSE | ||
2591 | | movsd xmm0, qword [BASE] | 2385 | | movsd xmm0, qword [BASE] |
2592 | |.if kind < 2 | 2386 | |.if kind < 2 |
2593 | | sseconst_tobit xmm1, RBa | 2387 | | sseconst_tobit xmm1, RBa |
2594 | |.endif | 2388 | |.endif |
2595 | | addsd xmm0, xmm1 | 2389 | | addsd xmm0, xmm1 |
2596 | | movd RB, xmm0 | 2390 | | movd RB, xmm0 |
2597 | |.else | ||
2598 | | fld qword [BASE] | ||
2599 | |.if kind < 2 | ||
2600 | | mov TMP1, TOBIT_BIAS | ||
2601 | |.endif | ||
2602 | | fadd TMP1 | ||
2603 | | fstp FPARG1 | ||
2604 | |.if kind > 0 | ||
2605 | | mov RB, ARG1 | ||
2606 | |.endif | ||
2607 | |.endif | ||
2608 | |2: | 2391 | |2: |
2609 | |.endmacro | 2392 | |.endmacro |
2610 | | | 2393 | | |
2611 | |.ffunc_bit bit_tobit, 0 | 2394 | |.ffunc_bit bit_tobit, 0 |
2612 | |.if DUALNUM or SSE | ||
2613 | |.if not SSE | ||
2614 | | mov RB, ARG1 | ||
2615 | |.endif | ||
2616 | | jmp ->fff_resbit | 2395 | | jmp ->fff_resbit |
2617 | |.else | ||
2618 | | fild ARG1 | ||
2619 | | jmp ->fff_resn | ||
2620 | |.endif | ||
2621 | | | 2396 | | |
2622 | |.macro .ffunc_bit_op, name, ins | 2397 | |.macro .ffunc_bit_op, name, ins |
2623 | | .ffunc_bit name, 2 | 2398 | | .ffunc_bit name, 2 |
@@ -2637,17 +2412,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
2637 | |.else | 2412 | |.else |
2638 | | jae ->fff_fallback_bit_op | 2413 | | jae ->fff_fallback_bit_op |
2639 | |.endif | 2414 | |.endif |
2640 | |.if SSE | ||
2641 | | movsd xmm0, qword [RD] | 2415 | | movsd xmm0, qword [RD] |
2642 | | addsd xmm0, xmm1 | 2416 | | addsd xmm0, xmm1 |
2643 | | movd RA, xmm0 | 2417 | | movd RA, xmm0 |
2644 | | ins RB, RA | 2418 | | ins RB, RA |
2645 | |.else | ||
2646 | | fld qword [RD] | ||
2647 | | fadd TMP1 | ||
2648 | | fstp FPARG1 | ||
2649 | | ins RB, ARG1 | ||
2650 | |.endif | ||
2651 | | sub RD, 8 | 2419 | | sub RD, 8 |
2652 | | jmp <1 | 2420 | | jmp <1 |
2653 | |.endmacro | 2421 | |.endmacro |
@@ -2664,15 +2432,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
2664 | | not RB | 2432 | | not RB |
2665 | |.if DUALNUM | 2433 | |.if DUALNUM |
2666 | | jmp ->fff_resbit | 2434 | | jmp ->fff_resbit |
2667 | |.elif SSE | 2435 | |.else |
2668 | |->fff_resbit: | 2436 | |->fff_resbit: |
2669 | | cvtsi2sd xmm0, RB | 2437 | | cvtsi2sd xmm0, RB |
2670 | | jmp ->fff_resxmm0 | 2438 | | jmp ->fff_resxmm0 |
2671 | |.else | ||
2672 | |->fff_resbit: | ||
2673 | | mov ARG1, RB | ||
2674 | | fild ARG1 | ||
2675 | | jmp ->fff_resn | ||
2676 | |.endif | 2439 | |.endif |
2677 | | | 2440 | | |
2678 | |->fff_fallback_bit_op: | 2441 | |->fff_fallback_bit_op: |
@@ -2685,22 +2448,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
2685 | | // Note: no inline conversion from number for 2nd argument! | 2448 | | // Note: no inline conversion from number for 2nd argument! |
2686 | | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback | 2449 | | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback |
2687 | | mov RA, dword [BASE+8] | 2450 | | mov RA, dword [BASE+8] |
2688 | |.elif SSE | 2451 | |.else |
2689 | | .ffunc_nnsse name | 2452 | | .ffunc_nnsse name |
2690 | | sseconst_tobit xmm2, RBa | 2453 | | sseconst_tobit xmm2, RBa |
2691 | | addsd xmm0, xmm2 | 2454 | | addsd xmm0, xmm2 |
2692 | | addsd xmm1, xmm2 | 2455 | | addsd xmm1, xmm2 |
2693 | | movd RB, xmm0 | 2456 | | movd RB, xmm0 |
2694 | | movd RA, xmm1 | 2457 | | movd RA, xmm1 |
2695 | |.else | ||
2696 | | .ffunc_nn name | ||
2697 | | mov TMP1, TOBIT_BIAS | ||
2698 | | fadd TMP1 | ||
2699 | | fstp FPARG3 | ||
2700 | | fadd TMP1 | ||
2701 | | fstp FPARG1 | ||
2702 | | mov RA, ARG3 | ||
2703 | | mov RB, ARG1 | ||
2704 | |.endif | 2458 | |.endif |
2705 | | ins RB, cl // Assumes RA is ecx. | 2459 | | ins RB, cl // Assumes RA is ecx. |
2706 | | jmp ->fff_resbit | 2460 | | jmp ->fff_resbit |
@@ -2834,7 +2588,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2834 | | mov FCARG2, PC // Caveat: FCARG2 == BASE | 2588 | | mov FCARG2, PC // Caveat: FCARG2 == BASE |
2835 | | mov FCARG1, L:RB | 2589 | | mov FCARG1, L:RB |
2836 | | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. | 2590 | | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. |
2837 | | call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc) | 2591 | | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc) |
2838 | |3: | 2592 | |3: |
2839 | | mov BASE, L:RB->base | 2593 | | mov BASE, L:RB->base |
2840 | |4: | 2594 | |4: |
@@ -2905,6 +2659,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
2905 | | add NARGS:RD, 1 | 2659 | | add NARGS:RD, 1 |
2906 | | jmp RBa | 2660 | | jmp RBa |
2907 | | | 2661 | | |
2662 | |->vm_profhook: // Dispatch target for profiler hook. | ||
2663 | #if LJ_HASPROFILE | ||
2664 | | mov L:RB, SAVE_L | ||
2665 | | mov L:RB->base, BASE | ||
2666 | | mov FCARG2, PC // Caveat: FCARG2 == BASE | ||
2667 | | mov FCARG1, L:RB | ||
2668 | | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc) | ||
2669 | | mov BASE, L:RB->base | ||
2670 | | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | ||
2671 | | sub PC, 4 | ||
2672 | | jmp ->cont_nop | ||
2673 | #endif | ||
2674 | | | ||
2908 | |//----------------------------------------------------------------------- | 2675 | |//----------------------------------------------------------------------- |
2909 | |//-- Trace exit handler ------------------------------------------------- | 2676 | |//-- Trace exit handler ------------------------------------------------- |
2910 | |//----------------------------------------------------------------------- | 2677 | |//----------------------------------------------------------------------- |
@@ -2957,10 +2724,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
2957 | | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 | 2724 | | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 |
2958 | |.endif | 2725 | |.endif |
2959 | | // Caveat: RB is ebp. | 2726 | | // Caveat: RB is ebp. |
2960 | | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)] | 2727 | | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] |
2961 | | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] | 2728 | | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] |
2962 | | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa | 2729 | | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa |
2963 | | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 | ||
2964 | | mov L:RB->base, BASE | 2730 | | mov L:RB->base, BASE |
2965 | |.if X64WIN | 2731 | |.if X64WIN |
2966 | | lea CARG2, [rsp+4*8] | 2732 | | lea CARG2, [rsp+4*8] |
@@ -2970,6 +2736,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2970 | | lea FCARG2, [esp+16] | 2736 | | lea FCARG2, [esp+16] |
2971 | |.endif | 2737 | |.endif |
2972 | | lea FCARG1, [DISPATCH+GG_DISP2J] | 2738 | | lea FCARG1, [DISPATCH+GG_DISP2J] |
2739 | | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 | ||
2973 | | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) | 2740 | | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) |
2974 | | // MULTRES or negated error code returned in eax (RD). | 2741 | | // MULTRES or negated error code returned in eax (RD). |
2975 | | mov RAa, L:RB->cframe | 2742 | | mov RAa, L:RB->cframe |
@@ -3017,11 +2784,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
3017 | | mov r12, TMPQ | 2784 | | mov r12, TMPQ |
3018 | |.endif | 2785 | |.endif |
3019 | | test RD, RD; js >3 // Check for error from exit. | 2786 | | test RD, RD; js >3 // Check for error from exit. |
2787 | | mov L:RB, SAVE_L | ||
3020 | | mov MULTRES, RD | 2788 | | mov MULTRES, RD |
3021 | | mov LFUNC:KBASE, [BASE-8] | 2789 | | mov LFUNC:KBASE, [BASE-8] |
3022 | | mov KBASE, LFUNC:KBASE->pc | 2790 | | mov KBASE, LFUNC:KBASE->pc |
3023 | | mov KBASE, [KBASE+PC2PROTO(k)] | 2791 | | mov KBASE, [KBASE+PC2PROTO(k)] |
3024 | | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 | 2792 | | mov L:RB->base, BASE |
2793 | | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 | ||
3025 | | set_vmstate INTERP | 2794 | | set_vmstate INTERP |
3026 | | // Modified copy of ins_next which handles function header dispatch, too. | 2795 | | // Modified copy of ins_next which handles function header dispatch, too. |
3027 | | mov RC, [PC] | 2796 | | mov RC, [PC] |
@@ -3051,27 +2820,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
3051 | |//----------------------------------------------------------------------- | 2820 | |//----------------------------------------------------------------------- |
3052 | | | 2821 | | |
3053 | |// FP value rounding. Called by math.floor/math.ceil fast functions | 2822 | |// FP value rounding. Called by math.floor/math.ceil fast functions |
3054 | |// and from JIT code. | 2823 | |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. |
3055 | | | 2824 | |.macro vm_round, name, mode |
3056 | |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. | 2825 | |->name .. _sse: |
3057 | |.macro vm_round_x87, mode1, mode2 | ||
3058 | | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2. | ||
3059 | | mov [esp+8], eax | ||
3060 | | mov ax, mode1 | ||
3061 | | or ax, [esp+4] | ||
3062 | |.if mode2 ~= 0xffff | ||
3063 | | and ax, mode2 | ||
3064 | |.endif | ||
3065 | | mov [esp+6], ax | ||
3066 | | fldcw word [esp+6] | ||
3067 | | frndint | ||
3068 | | fldcw word [esp+4] | ||
3069 | | mov eax, [esp+8] | ||
3070 | | ret | ||
3071 | |.endmacro | ||
3072 | | | ||
3073 | |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. | ||
3074 | |.macro vm_round_sse, mode | ||
3075 | | sseconst_abs xmm2, RDa | 2826 | | sseconst_abs xmm2, RDa |
3076 | | sseconst_2p52 xmm3, RDa | 2827 | | sseconst_2p52 xmm3, RDa |
3077 | | movaps xmm1, xmm0 | 2828 | | movaps xmm1, xmm0 |
@@ -3107,22 +2858,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
3107 | | ret | 2858 | | ret |
3108 | |.endmacro | 2859 | |.endmacro |
3109 | | | 2860 | | |
3110 | |.macro vm_round, name, ssemode, mode1, mode2 | 2861 | |->vm_floor: |
3111 | |->name: | 2862 | |.if not X64 |
3112 | |.if not SSE | 2863 | | movsd xmm0, qword [esp+4] |
3113 | | vm_round_x87 mode1, mode2 | 2864 | | call ->vm_floor_sse |
2865 | | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. | ||
2866 | | fld qword [esp+4] | ||
2867 | | ret | ||
3114 | |.endif | 2868 | |.endif |
3115 | |->name .. _sse: | ||
3116 | | vm_round_sse ssemode | ||
3117 | |.endmacro | ||
3118 | | | 2869 | | |
3119 | | vm_round vm_floor, 0, 0x0400, 0xf7ff | 2870 | | vm_round vm_floor, 0 |
3120 | | vm_round vm_ceil, 1, 0x0800, 0xfbff | 2871 | | vm_round vm_ceil, 1 |
3121 | | vm_round vm_trunc, 2, 0x0c00, 0xffff | 2872 | | vm_round vm_trunc, 2 |
3122 | | | 2873 | | |
3123 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. | 2874 | |// FP modulo x%y. Called by BC_MOD* and vm_arith. |
3124 | |->vm_mod: | 2875 | |->vm_mod: |
3125 | |.if SSE | ||
3126 | |// Args in xmm0/xmm1, return value in xmm0. | 2876 | |// Args in xmm0/xmm1, return value in xmm0. |
3127 | |// Caveat: xmm0-xmm5 and RC (eax) modified! | 2877 | |// Caveat: xmm0-xmm5 and RC (eax) modified! |
3128 | | movaps xmm5, xmm0 | 2878 | | movaps xmm5, xmm0 |
@@ -3150,23 +2900,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
3150 | | movaps xmm0, xmm5 | 2900 | | movaps xmm0, xmm5 |
3151 | | subsd xmm0, xmm1 | 2901 | | subsd xmm0, xmm1 |
3152 | | ret | 2902 | | ret |
3153 | |.else | ||
3154 | |// Args/ret on x87 stack (y on top). No xmm registers modified. | ||
3155 | |// Caveat: needs 3 slots on x87 stack! RC (eax) modified! | ||
3156 | | fld st1 | ||
3157 | | fdiv st1 | ||
3158 | | fnstcw word [esp+4] | ||
3159 | | mov ax, 0x0400 | ||
3160 | | or ax, [esp+4] | ||
3161 | | and ax, 0xf7ff | ||
3162 | | mov [esp+6], ax | ||
3163 | | fldcw word [esp+6] | ||
3164 | | frndint | ||
3165 | | fldcw word [esp+4] | ||
3166 | | fmulp st1 | ||
3167 | | fsubp st1 | ||
3168 | | ret | ||
3169 | |.endif | ||
3170 | | | 2903 | | |
3171 | |// FP log2(x). Called by math.log(x, base). | 2904 | |// FP log2(x). Called by math.log(x, base). |
3172 | |->vm_log2: | 2905 | |->vm_log2: |
@@ -3217,105 +2950,15 @@ static void build_subroutines(BuildCtx *ctx) | |||
3217 | | | 2950 | | |
3218 | |// Generic power function x^y. Called by BC_POW, math.pow fast function, | 2951 | |// Generic power function x^y. Called by BC_POW, math.pow fast function, |
3219 | |// and vm_arith. | 2952 | |// and vm_arith. |
3220 | |// Args/ret on x87 stack (y on top). RC (eax) modified. | ||
3221 | |// Caveat: needs 3 slots on x87 stack! | ||
3222 | |->vm_pow: | ||
3223 | |.if not SSE | ||
3224 | | fist dword [esp+4] // Store/reload int before comparison. | ||
3225 | | fild dword [esp+4] // Integral exponent used in vm_powi. | ||
3226 | | fucomip st1 | ||
3227 | | jnz >8 // Branch for FP exponents. | ||
3228 | | jp >9 // Branch for NaN exponent. | ||
3229 | | fpop // Pop y and fallthrough to vm_powi. | ||
3230 | | | ||
3231 | |// FP/int power function x^i. Arg1/ret on x87 stack. | ||
3232 | |// Arg2 (int) on C stack. RC (eax) modified. | ||
3233 | |// Caveat: needs 2 slots on x87 stack! | ||
3234 | | mov eax, [esp+4] | ||
3235 | | cmp eax, 1; jle >6 // i<=1? | ||
3236 | | // Now 1 < (unsigned)i <= 0x80000000. | ||
3237 | |1: // Handle leading zeros. | ||
3238 | | test eax, 1; jnz >2 | ||
3239 | | fmul st0 | ||
3240 | | shr eax, 1 | ||
3241 | | jmp <1 | ||
3242 | |2: | ||
3243 | | shr eax, 1; jz >5 | ||
3244 | | fdup | ||
3245 | |3: // Handle trailing bits. | ||
3246 | | fmul st0 | ||
3247 | | shr eax, 1; jz >4 | ||
3248 | | jnc <3 | ||
3249 | | fmul st1, st0 | ||
3250 | | jmp <3 | ||
3251 | |4: | ||
3252 | | fmulp st1 | ||
3253 | |5: | ||
3254 | | ret | ||
3255 | |6: | ||
3256 | | je <5 // x^1 ==> x | ||
3257 | | jb >7 | ||
3258 | | fld1; fdivrp st1 | ||
3259 | | neg eax | ||
3260 | | cmp eax, 1; je <5 // x^-1 ==> 1/x | ||
3261 | | jmp <1 // x^-i ==> (1/x)^i | ||
3262 | |7: | ||
3263 | | fpop; fld1 // x^0 ==> 1 | ||
3264 | | ret | ||
3265 | | | ||
3266 | |8: // FP/FP power function x^y. | ||
3267 | | fst dword [esp+4] | ||
3268 | | fxch | ||
3269 | | fst dword [esp+8] | ||
3270 | | mov eax, [esp+4]; shl eax, 1 | ||
3271 | | cmp eax, 0xff000000; je >2 // x^+-Inf? | ||
3272 | | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y? | ||
3273 | | cmp eax, 0xff000000; je >4 // +-Inf^y? | ||
3274 | | fyl2x | ||
3275 | | jmp ->vm_exp2raw | ||
3276 | | | ||
3277 | |9: // Handle x^NaN. | ||
3278 | | fld1 | ||
3279 | | fucomip st2 | ||
3280 | | je >1 // 1^NaN ==> 1 | ||
3281 | | fxch // x^NaN ==> NaN | ||
3282 | |1: | ||
3283 | | fpop | ||
3284 | | ret | ||
3285 | | | ||
3286 | |2: // Handle x^+-Inf. | ||
3287 | | fabs | ||
3288 | | fld1 | ||
3289 | | fucomip st1 | ||
3290 | | je >3 // +-1^+-Inf ==> 1 | ||
3291 | | fpop; fabs; fldz; mov eax, 0; setc al | ||
3292 | | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0 | ||
3293 | | fxch | ||
3294 | |3: | ||
3295 | | fpop1; fabs | ||
3296 | | ret | ||
3297 | | | ||
3298 | |4: // Handle +-0^y or +-Inf^y. | ||
3299 | | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x| | ||
3300 | | fpop; fpop | ||
3301 | | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf | ||
3302 | | fldz // y < 0, +-Inf^y ==> 0 | ||
3303 | | ret | ||
3304 | |5: | ||
3305 | | mov dword [esp+4], 0x7f800000 // Return +Inf. | ||
3306 | | fld dword [esp+4] | ||
3307 | | ret | ||
3308 | |.endif | ||
3309 | | | ||
3310 | |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. | 2953 | |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. |
3311 | |// Needs 16 byte scratch area for x86. Also called from JIT code. | 2954 | |// Needs 16 byte scratch area for x86. Also called from JIT code. |
3312 | |->vm_pow_sse: | 2955 | |->vm_pow_sse: |
3313 | | cvtsd2si eax, xmm1 | 2956 | | cvttsd2si eax, xmm1 |
3314 | | cvtsi2sd xmm2, eax | 2957 | | cvtsi2sd xmm2, eax |
3315 | | ucomisd xmm1, xmm2 | 2958 | | ucomisd xmm1, xmm2 |
3316 | | jnz >8 // Branch for FP exponents. | 2959 | | jnz >8 // Branch for FP exponents. |
3317 | | jp >9 // Branch for NaN exponent. | 2960 | | jp >9 // Branch for NaN exponent. |
3318 | | // Fallthrough to vm_powi_sse. | 2961 | | // Fallthrough. |
3319 | | | 2962 | | |
3320 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. | 2963 | |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. |
3321 | |->vm_powi_sse: | 2964 | |->vm_powi_sse: |
@@ -3437,8 +3080,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
3437 | | .else | 3080 | | .else |
3438 | | .define fpmop, CARG1d | 3081 | | .define fpmop, CARG1d |
3439 | | .endif | 3082 | | .endif |
3440 | | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil | 3083 | | cmp fpmop, 1; jb ->vm_floor_sse; je ->vm_ceil_sse |
3441 | | cmp fpmop, 3; jb ->vm_trunc; ja >2 | 3084 | | cmp fpmop, 3; jb ->vm_trunc_sse; ja >2 |
3442 | | sqrtsd xmm0, xmm0; ret | 3085 | | sqrtsd xmm0, xmm0; ret |
3443 | |2: | 3086 | |2: |
3444 | | .if X64WIN | 3087 | | .if X64WIN |
@@ -3478,14 +3121,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
3478 | | ret | 3121 | | ret |
3479 | |.else // x86 calling convention. | 3122 | |.else // x86 calling convention. |
3480 | | .define fpmop, eax | 3123 | | .define fpmop, eax |
3481 | |.if SSE | ||
3482 | | mov fpmop, [esp+12] | 3124 | | mov fpmop, [esp+12] |
3483 | | movsd xmm0, qword [esp+4] | 3125 | | movsd xmm0, qword [esp+4] |
3484 | | cmp fpmop, 1; je >1; ja >2 | 3126 | | cmp fpmop, 1; je >1; ja >2 |
3485 | | call ->vm_floor; jmp >7 | 3127 | | call ->vm_floor_sse; jmp >7 |
3486 | |1: ; call ->vm_ceil; jmp >7 | 3128 | |1: ; call ->vm_ceil_sse; jmp >7 |
3487 | |2: ; cmp fpmop, 3; je >1; ja >2 | 3129 | |2: ; cmp fpmop, 3; je >1; ja >2 |
3488 | | call ->vm_trunc; jmp >7 | 3130 | | call ->vm_trunc_sse; jmp >7 |
3489 | |1: | 3131 | |1: |
3490 | | sqrtsd xmm0, xmm0 | 3132 | | sqrtsd xmm0, xmm0 |
3491 | |7: | 3133 | |7: |
@@ -3503,23 +3145,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
3503 | |2: ; cmp fpmop, 11; je >1; ja >9 | 3145 | |2: ; cmp fpmop, 11; je >1; ja >9 |
3504 | | fcos; ret | 3146 | | fcos; ret |
3505 | |1: ; fptan; fpop; ret | 3147 | |1: ; fptan; fpop; ret |
3506 | |.else | ||
3507 | | mov fpmop, [esp+12] | ||
3508 | | fld qword [esp+4] | ||
3509 | | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil | ||
3510 | | cmp fpmop, 3; jb ->vm_trunc; ja >2 | ||
3511 | | fsqrt; ret | ||
3512 | |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 | ||
3513 | | cmp fpmop, 7; je >1; ja >2 | ||
3514 | | fldln2; fxch; fyl2x; ret | ||
3515 | |1: ; fld1; fxch; fyl2x; ret | ||
3516 | |2: ; cmp fpmop, 9; je >1; ja >2 | ||
3517 | | fldlg2; fxch; fyl2x; ret | ||
3518 | |1: ; fsin; ret | ||
3519 | |2: ; cmp fpmop, 11; je >1; ja >9 | ||
3520 | | fcos; ret | ||
3521 | |1: ; fptan; fpop; ret | ||
3522 | |.endif | ||
3523 | |.endif | 3148 | |.endif |
3524 | |9: ; int3 // Bad fpm. | 3149 | |9: ; int3 // Bad fpm. |
3525 | |.endif | 3150 | |.endif |
@@ -3541,7 +3166,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
3541 | |2: ; cmp foldop, 3; je >1; ja >2 | 3166 | |2: ; cmp foldop, 3; je >1; ja >2 |
3542 | | mulsd xmm0, xmm1; ret | 3167 | | mulsd xmm0, xmm1; ret |
3543 | |1: ; divsd xmm0, xmm1; ret | 3168 | |1: ; divsd xmm0, xmm1; ret |
3544 | |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow | 3169 | |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow_sse |
3545 | | cmp foldop, 7; je >1; ja >2 | 3170 | | cmp foldop, 7; je >1; ja >2 |
3546 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret | 3171 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret |
3547 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret | 3172 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret |
@@ -3574,7 +3199,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
3574 | |1: ; maxsd xmm0, xmm1; ret | 3199 | |1: ; maxsd xmm0, xmm1; ret |
3575 | |9: ; int3 // Bad op. | 3200 | |9: ; int3 // Bad op. |
3576 | | | 3201 | | |
3577 | |.elif SSE // x86 calling convention with SSE ops. | 3202 | |.else // x86 calling convention. |
3578 | | | 3203 | | |
3579 | | .define foldop, eax | 3204 | | .define foldop, eax |
3580 | | mov foldop, [esp+20] | 3205 | | mov foldop, [esp+20] |
@@ -3593,7 +3218,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
3593 | |2: ; cmp foldop, 5 | 3218 | |2: ; cmp foldop, 5 |
3594 | | je >1; ja >2 | 3219 | | je >1; ja >2 |
3595 | | call ->vm_mod; jmp <7 | 3220 | | call ->vm_mod; jmp <7 |
3596 | |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. | 3221 | |1: ; pop edx; call ->vm_pow_sse; push edx; jmp <7 // Writes to scratch area. |
3597 | |2: ; cmp foldop, 7; je >1; ja >2 | 3222 | |2: ; cmp foldop, 7; je >1; ja >2 |
3598 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 | 3223 | | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 |
3599 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 | 3224 | |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 |
@@ -3608,29 +3233,6 @@ static void build_subroutines(BuildCtx *ctx) | |||
3608 | |1: ; maxsd xmm0, xmm1; jmp <7 | 3233 | |1: ; maxsd xmm0, xmm1; jmp <7 |
3609 | |9: ; int3 // Bad op. | 3234 | |9: ; int3 // Bad op. |
3610 | | | 3235 | | |
3611 | |.else // x86 calling convention with x87 ops. | ||
3612 | | | ||
3613 | | mov eax, [esp+20] | ||
3614 | | fld qword [esp+4] | ||
3615 | | fld qword [esp+12] | ||
3616 | | cmp eax, 1; je >1; ja >2 | ||
3617 | | faddp st1; ret | ||
3618 | |1: ; fsubp st1; ret | ||
3619 | |2: ; cmp eax, 3; je >1; ja >2 | ||
3620 | | fmulp st1; ret | ||
3621 | |1: ; fdivp st1; ret | ||
3622 | |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow | ||
3623 | | cmp eax, 7; je >1; ja >2 | ||
3624 | | fpop; fchs; ret | ||
3625 | |1: ; fpop; fabs; ret | ||
3626 | |2: ; cmp eax, 9; je >1; ja >2 | ||
3627 | | fpatan; ret | ||
3628 | |1: ; fxch; fscale; fpop1; ret | ||
3629 | |2: ; cmp eax, 11; je >1; ja >9 | ||
3630 | | fucomi st1; fcmovnbe st1; fpop1; ret | ||
3631 | |1: ; fucomi st1; fcmovbe st1; fpop1; ret | ||
3632 | |9: ; int3 // Bad op. | ||
3633 | | | ||
3634 | |.endif | 3236 | |.endif |
3635 | | | 3237 | | |
3636 | |//----------------------------------------------------------------------- | 3238 | |//----------------------------------------------------------------------- |
@@ -3943,19 +3545,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3943 | | // RA is a number. | 3545 | | // RA is a number. |
3944 | | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp | 3546 | | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp |
3945 | | // RA is a number, RD is an integer. | 3547 | | // RA is a number, RD is an integer. |
3946 | |.if SSE | ||
3947 | | cvtsi2sd xmm0, dword [BASE+RD*8] | 3548 | | cvtsi2sd xmm0, dword [BASE+RD*8] |
3948 | | jmp >2 | 3549 | | jmp >2 |
3949 | |.else | ||
3950 | | fld qword [BASE+RA*8] | ||
3951 | | fild dword [BASE+RD*8] | ||
3952 | | jmp >3 | ||
3953 | |.endif | ||
3954 | | | 3550 | | |
3955 | |8: // RA is an integer, RD is not an integer. | 3551 | |8: // RA is an integer, RD is not an integer. |
3956 | | ja ->vmeta_comp | 3552 | | ja ->vmeta_comp |
3957 | | // RA is an integer, RD is a number. | 3553 | | // RA is an integer, RD is a number. |
3958 | |.if SSE | ||
3959 | | cvtsi2sd xmm1, dword [BASE+RA*8] | 3554 | | cvtsi2sd xmm1, dword [BASE+RA*8] |
3960 | | movsd xmm0, qword [BASE+RD*8] | 3555 | | movsd xmm0, qword [BASE+RD*8] |
3961 | | add PC, 4 | 3556 | | add PC, 4 |
@@ -3963,29 +3558,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3963 | | jmp_comp jbe, ja, jb, jae, <9 | 3558 | | jmp_comp jbe, ja, jb, jae, <9 |
3964 | | jmp <6 | 3559 | | jmp <6 |
3965 | |.else | 3560 | |.else |
3966 | | fild dword [BASE+RA*8] | ||
3967 | | jmp >2 | ||
3968 | |.endif | ||
3969 | |.else | ||
3970 | | checknum RA, ->vmeta_comp | 3561 | | checknum RA, ->vmeta_comp |
3971 | | checknum RD, ->vmeta_comp | 3562 | | checknum RD, ->vmeta_comp |
3972 | |.endif | 3563 | |.endif |
3973 | |.if SSE | ||
3974 | |1: | 3564 | |1: |
3975 | | movsd xmm0, qword [BASE+RD*8] | 3565 | | movsd xmm0, qword [BASE+RD*8] |
3976 | |2: | 3566 | |2: |
3977 | | add PC, 4 | 3567 | | add PC, 4 |
3978 | | ucomisd xmm0, qword [BASE+RA*8] | 3568 | | ucomisd xmm0, qword [BASE+RA*8] |
3979 | |3: | 3569 | |3: |
3980 | |.else | ||
3981 | |1: | ||
3982 | | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A. | ||
3983 | |2: | ||
3984 | | fld qword [BASE+RD*8] | ||
3985 | |3: | ||
3986 | | add PC, 4 | ||
3987 | | fcomparepp | ||
3988 | |.endif | ||
3989 | | // Unordered: all of ZF CF PF set, ordered: PF clear. | 3570 | | // Unordered: all of ZF CF PF set, ordered: PF clear. |
3990 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. | 3571 | | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. |
3991 | |.if DUALNUM | 3572 | |.if DUALNUM |
@@ -4025,43 +3606,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4025 | | // RD is a number. | 3606 | | // RD is a number. |
4026 | | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 | 3607 | | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 |
4027 | | // RD is a number, RA is an integer. | 3608 | | // RD is a number, RA is an integer. |
4028 | |.if SSE | ||
4029 | | cvtsi2sd xmm0, dword [BASE+RA*8] | 3609 | | cvtsi2sd xmm0, dword [BASE+RA*8] |
4030 | |.else | ||
4031 | | fild dword [BASE+RA*8] | ||
4032 | |.endif | ||
4033 | | jmp >2 | 3610 | | jmp >2 |
4034 | | | 3611 | | |
4035 | |8: // RD is an integer, RA is not an integer. | 3612 | |8: // RD is an integer, RA is not an integer. |
4036 | | ja >5 | 3613 | | ja >5 |
4037 | | // RD is an integer, RA is a number. | 3614 | | // RD is an integer, RA is a number. |
4038 | |.if SSE | ||
4039 | | cvtsi2sd xmm0, dword [BASE+RD*8] | 3615 | | cvtsi2sd xmm0, dword [BASE+RD*8] |
4040 | | ucomisd xmm0, qword [BASE+RA*8] | 3616 | | ucomisd xmm0, qword [BASE+RA*8] |
4041 | |.else | ||
4042 | | fild dword [BASE+RD*8] | ||
4043 | | fld qword [BASE+RA*8] | ||
4044 | |.endif | ||
4045 | | jmp >4 | 3617 | | jmp >4 |
4046 | | | 3618 | | |
4047 | |.else | 3619 | |.else |
4048 | | cmp RB, LJ_TISNUM; jae >5 | 3620 | | cmp RB, LJ_TISNUM; jae >5 |
4049 | | checknum RA, >5 | 3621 | | checknum RA, >5 |
4050 | |.endif | 3622 | |.endif |
4051 | |.if SSE | ||
4052 | |1: | 3623 | |1: |
4053 | | movsd xmm0, qword [BASE+RA*8] | 3624 | | movsd xmm0, qword [BASE+RA*8] |
4054 | |2: | 3625 | |2: |
4055 | | ucomisd xmm0, qword [BASE+RD*8] | 3626 | | ucomisd xmm0, qword [BASE+RD*8] |
4056 | |4: | 3627 | |4: |
4057 | |.else | ||
4058 | |1: | ||
4059 | | fld qword [BASE+RA*8] | ||
4060 | |2: | ||
4061 | | fld qword [BASE+RD*8] | ||
4062 | |4: | ||
4063 | | fcomparepp | ||
4064 | |.endif | ||
4065 | iseqne_fp: | 3628 | iseqne_fp: |
4066 | if (vk) { | 3629 | if (vk) { |
4067 | | jp >2 // Unordered means not equal. | 3630 | | jp >2 // Unordered means not equal. |
@@ -4184,39 +3747,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4184 | | // RA is a number. | 3747 | | // RA is a number. |
4185 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 | 3748 | | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 |
4186 | | // RA is a number, RD is an integer. | 3749 | | // RA is a number, RD is an integer. |
4187 | |.if SSE | ||
4188 | | cvtsi2sd xmm0, dword [KBASE+RD*8] | 3750 | | cvtsi2sd xmm0, dword [KBASE+RD*8] |
4189 | |.else | ||
4190 | | fild dword [KBASE+RD*8] | ||
4191 | |.endif | ||
4192 | | jmp >2 | 3751 | | jmp >2 |
4193 | | | 3752 | | |
4194 | |8: // RA is an integer, RD is a number. | 3753 | |8: // RA is an integer, RD is a number. |
4195 | |.if SSE | ||
4196 | | cvtsi2sd xmm0, dword [BASE+RA*8] | 3754 | | cvtsi2sd xmm0, dword [BASE+RA*8] |
4197 | | ucomisd xmm0, qword [KBASE+RD*8] | 3755 | | ucomisd xmm0, qword [KBASE+RD*8] |
4198 | |.else | ||
4199 | | fild dword [BASE+RA*8] | ||
4200 | | fld qword [KBASE+RD*8] | ||
4201 | |.endif | ||
4202 | | jmp >4 | 3756 | | jmp >4 |
4203 | |.else | 3757 | |.else |
4204 | | cmp RB, LJ_TISNUM; jae >3 | 3758 | | cmp RB, LJ_TISNUM; jae >3 |
4205 | |.endif | 3759 | |.endif |
4206 | |.if SSE | ||
4207 | |1: | 3760 | |1: |
4208 | | movsd xmm0, qword [KBASE+RD*8] | 3761 | | movsd xmm0, qword [KBASE+RD*8] |
4209 | |2: | 3762 | |2: |
4210 | | ucomisd xmm0, qword [BASE+RA*8] | 3763 | | ucomisd xmm0, qword [BASE+RA*8] |
4211 | |4: | 3764 | |4: |
4212 | |.else | ||
4213 | |1: | ||
4214 | | fld qword [KBASE+RD*8] | ||
4215 | |2: | ||
4216 | | fld qword [BASE+RA*8] | ||
4217 | |4: | ||
4218 | | fcomparepp | ||
4219 | |.endif | ||
4220 | goto iseqne_fp; | 3765 | goto iseqne_fp; |
4221 | case BC_ISEQP: case BC_ISNEP: | 3766 | case BC_ISEQP: case BC_ISNEP: |
4222 | vk = op == BC_ISEQP; | 3767 | vk = op == BC_ISEQP; |
@@ -4267,6 +3812,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4267 | | ins_next | 3812 | | ins_next |
4268 | break; | 3813 | break; |
4269 | 3814 | ||
3815 | case BC_ISTYPE: | ||
3816 | | ins_AD // RA = src, RD = -type | ||
3817 | | add RD, [BASE+RA*8+4] | ||
3818 | | jne ->vmeta_istype | ||
3819 | | ins_next | ||
3820 | break; | ||
3821 | case BC_ISNUM: | ||
3822 | | ins_AD // RA = src, RD = -(TISNUM-1) | ||
3823 | | checknum RA, ->vmeta_istype | ||
3824 | | ins_next | ||
3825 | break; | ||
3826 | |||
4270 | /* -- Unary ops --------------------------------------------------------- */ | 3827 | /* -- Unary ops --------------------------------------------------------- */ |
4271 | 3828 | ||
4272 | case BC_MOV: | 3829 | case BC_MOV: |
@@ -4310,16 +3867,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4310 | |.else | 3867 | |.else |
4311 | | checknum RD, ->vmeta_unm | 3868 | | checknum RD, ->vmeta_unm |
4312 | |.endif | 3869 | |.endif |
4313 | |.if SSE | ||
4314 | | movsd xmm0, qword [BASE+RD*8] | 3870 | | movsd xmm0, qword [BASE+RD*8] |
4315 | | sseconst_sign xmm1, RDa | 3871 | | sseconst_sign xmm1, RDa |
4316 | | xorps xmm0, xmm1 | 3872 | | xorps xmm0, xmm1 |
4317 | | movsd qword [BASE+RA*8], xmm0 | 3873 | | movsd qword [BASE+RA*8], xmm0 |
4318 | |.else | ||
4319 | | fld qword [BASE+RD*8] | ||
4320 | | fchs | ||
4321 | | fstp qword [BASE+RA*8] | ||
4322 | |.endif | ||
4323 | |.if DUALNUM | 3874 | |.if DUALNUM |
4324 | | jmp <9 | 3875 | | jmp <9 |
4325 | |.else | 3876 | |.else |
@@ -4335,15 +3886,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4335 | |1: | 3886 | |1: |
4336 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 3887 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
4337 | | mov dword [BASE+RA*8], RD | 3888 | | mov dword [BASE+RA*8], RD |
4338 | |.elif SSE | 3889 | |.else |
4339 | | xorps xmm0, xmm0 | 3890 | | xorps xmm0, xmm0 |
4340 | | cvtsi2sd xmm0, dword STR:RD->len | 3891 | | cvtsi2sd xmm0, dword STR:RD->len |
4341 | |1: | 3892 | |1: |
4342 | | movsd qword [BASE+RA*8], xmm0 | 3893 | | movsd qword [BASE+RA*8], xmm0 |
4343 | |.else | ||
4344 | | fild dword STR:RD->len | ||
4345 | |1: | ||
4346 | | fstp qword [BASE+RA*8] | ||
4347 | |.endif | 3894 | |.endif |
4348 | | ins_next | 3895 | | ins_next |
4349 | |2: | 3896 | |2: |
@@ -4361,11 +3908,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4361 | | // Length of table returned in eax (RD). | 3908 | | // Length of table returned in eax (RD). |
4362 | |.if DUALNUM | 3909 | |.if DUALNUM |
4363 | | // Nothing to do. | 3910 | | // Nothing to do. |
4364 | |.elif SSE | ||
4365 | | cvtsi2sd xmm0, RD | ||
4366 | |.else | 3911 | |.else |
4367 | | mov ARG1, RD | 3912 | | cvtsi2sd xmm0, RD |
4368 | | fild ARG1 | ||
4369 | |.endif | 3913 | |.endif |
4370 | | mov BASE, RB // Restore BASE. | 3914 | | mov BASE, RB // Restore BASE. |
4371 | | movzx RA, PC_RA | 3915 | | movzx RA, PC_RA |
@@ -4380,7 +3924,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4380 | 3924 | ||
4381 | /* -- Binary ops -------------------------------------------------------- */ | 3925 | /* -- Binary ops -------------------------------------------------------- */ |
4382 | 3926 | ||
4383 | |.macro ins_arithpre, x87ins, sseins, ssereg | 3927 | |.macro ins_arithpre, sseins, ssereg |
4384 | | ins_ABC | 3928 | | ins_ABC |
4385 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | 3929 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); |
4386 | ||switch (vk) { | 3930 | ||switch (vk) { |
@@ -4389,37 +3933,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4389 | | .if DUALNUM | 3933 | | .if DUALNUM |
4390 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn | 3934 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn |
4391 | | .endif | 3935 | | .endif |
4392 | | .if SSE | 3936 | | movsd xmm0, qword [BASE+RB*8] |
4393 | | movsd xmm0, qword [BASE+RB*8] | 3937 | | sseins ssereg, qword [KBASE+RC*8] |
4394 | | sseins ssereg, qword [KBASE+RC*8] | ||
4395 | | .else | ||
4396 | | fld qword [BASE+RB*8] | ||
4397 | | x87ins qword [KBASE+RC*8] | ||
4398 | | .endif | ||
4399 | || break; | 3938 | || break; |
4400 | ||case 1: | 3939 | ||case 1: |
4401 | | checknum RB, ->vmeta_arith_nv | 3940 | | checknum RB, ->vmeta_arith_nv |
4402 | | .if DUALNUM | 3941 | | .if DUALNUM |
4403 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv | 3942 | | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv |
4404 | | .endif | 3943 | | .endif |
4405 | | .if SSE | 3944 | | movsd xmm0, qword [KBASE+RC*8] |
4406 | | movsd xmm0, qword [KBASE+RC*8] | 3945 | | sseins ssereg, qword [BASE+RB*8] |
4407 | | sseins ssereg, qword [BASE+RB*8] | ||
4408 | | .else | ||
4409 | | fld qword [KBASE+RC*8] | ||
4410 | | x87ins qword [BASE+RB*8] | ||
4411 | | .endif | ||
4412 | || break; | 3946 | || break; |
4413 | ||default: | 3947 | ||default: |
4414 | | checknum RB, ->vmeta_arith_vv | 3948 | | checknum RB, ->vmeta_arith_vv |
4415 | | checknum RC, ->vmeta_arith_vv | 3949 | | checknum RC, ->vmeta_arith_vv |
4416 | | .if SSE | 3950 | | movsd xmm0, qword [BASE+RB*8] |
4417 | | movsd xmm0, qword [BASE+RB*8] | 3951 | | sseins ssereg, qword [BASE+RC*8] |
4418 | | sseins ssereg, qword [BASE+RC*8] | ||
4419 | | .else | ||
4420 | | fld qword [BASE+RB*8] | ||
4421 | | x87ins qword [BASE+RC*8] | ||
4422 | | .endif | ||
4423 | || break; | 3952 | || break; |
4424 | ||} | 3953 | ||} |
4425 | |.endmacro | 3954 | |.endmacro |
@@ -4457,54 +3986,50 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4457 | |.endmacro | 3986 | |.endmacro |
4458 | | | 3987 | | |
4459 | |.macro ins_arithpost | 3988 | |.macro ins_arithpost |
4460 | |.if SSE | ||
4461 | | movsd qword [BASE+RA*8], xmm0 | 3989 | | movsd qword [BASE+RA*8], xmm0 |
4462 | |.else | ||
4463 | | fstp qword [BASE+RA*8] | ||
4464 | |.endif | ||
4465 | |.endmacro | 3990 | |.endmacro |
4466 | | | 3991 | | |
4467 | |.macro ins_arith, x87ins, sseins | 3992 | |.macro ins_arith, sseins |
4468 | | ins_arithpre x87ins, sseins, xmm0 | 3993 | | ins_arithpre sseins, xmm0 |
4469 | | ins_arithpost | 3994 | | ins_arithpost |
4470 | | ins_next | 3995 | | ins_next |
4471 | |.endmacro | 3996 | |.endmacro |
4472 | | | 3997 | | |
4473 | |.macro ins_arith, intins, x87ins, sseins | 3998 | |.macro ins_arith, intins, sseins |
4474 | |.if DUALNUM | 3999 | |.if DUALNUM |
4475 | | ins_arithdn intins | 4000 | | ins_arithdn intins |
4476 | |.else | 4001 | |.else |
4477 | | ins_arith, x87ins, sseins | 4002 | | ins_arith, sseins |
4478 | |.endif | 4003 | |.endif |
4479 | |.endmacro | 4004 | |.endmacro |
4480 | 4005 | ||
4481 | | // RA = dst, RB = src1 or num const, RC = src2 or num const | 4006 | | // RA = dst, RB = src1 or num const, RC = src2 or num const |
4482 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | 4007 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: |
4483 | | ins_arith add, fadd, addsd | 4008 | | ins_arith add, addsd |
4484 | break; | 4009 | break; |
4485 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | 4010 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: |
4486 | | ins_arith sub, fsub, subsd | 4011 | | ins_arith sub, subsd |
4487 | break; | 4012 | break; |
4488 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | 4013 | case BC_MULVN: case BC_MULNV: case BC_MULVV: |
4489 | | ins_arith imul, fmul, mulsd | 4014 | | ins_arith imul, mulsd |
4490 | break; | 4015 | break; |
4491 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | 4016 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: |
4492 | | ins_arith fdiv, divsd | 4017 | | ins_arith divsd |
4493 | break; | 4018 | break; |
4494 | case BC_MODVN: | 4019 | case BC_MODVN: |
4495 | | ins_arithpre fld, movsd, xmm1 | 4020 | | ins_arithpre movsd, xmm1 |
4496 | |->BC_MODVN_Z: | 4021 | |->BC_MODVN_Z: |
4497 | | call ->vm_mod | 4022 | | call ->vm_mod |
4498 | | ins_arithpost | 4023 | | ins_arithpost |
4499 | | ins_next | 4024 | | ins_next |
4500 | break; | 4025 | break; |
4501 | case BC_MODNV: case BC_MODVV: | 4026 | case BC_MODNV: case BC_MODVV: |
4502 | | ins_arithpre fld, movsd, xmm1 | 4027 | | ins_arithpre movsd, xmm1 |
4503 | | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | 4028 | | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. |
4504 | break; | 4029 | break; |
4505 | case BC_POW: | 4030 | case BC_POW: |
4506 | | ins_arithpre fld, movsd, xmm1 | 4031 | | ins_arithpre movsd, xmm1 |
4507 | | call ->vm_pow | 4032 | | call ->vm_pow_sse |
4508 | | ins_arithpost | 4033 | | ins_arithpost |
4509 | | ins_next | 4034 | | ins_next |
4510 | break; | 4035 | break; |
@@ -4573,25 +4098,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4573 | | movsx RD, RDW | 4098 | | movsx RD, RDW |
4574 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 4099 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
4575 | | mov dword [BASE+RA*8], RD | 4100 | | mov dword [BASE+RA*8], RD |
4576 | |.elif SSE | 4101 | |.else |
4577 | | movsx RD, RDW // Sign-extend literal. | 4102 | | movsx RD, RDW // Sign-extend literal. |
4578 | | cvtsi2sd xmm0, RD | 4103 | | cvtsi2sd xmm0, RD |
4579 | | movsd qword [BASE+RA*8], xmm0 | 4104 | | movsd qword [BASE+RA*8], xmm0 |
4580 | |.else | ||
4581 | | fild PC_RD // Refetch signed RD from instruction. | ||
4582 | | fstp qword [BASE+RA*8] | ||
4583 | |.endif | 4105 | |.endif |
4584 | | ins_next | 4106 | | ins_next |
4585 | break; | 4107 | break; |
4586 | case BC_KNUM: | 4108 | case BC_KNUM: |
4587 | | ins_AD // RA = dst, RD = num const | 4109 | | ins_AD // RA = dst, RD = num const |
4588 | |.if SSE | ||
4589 | | movsd xmm0, qword [KBASE+RD*8] | 4110 | | movsd xmm0, qword [KBASE+RD*8] |
4590 | | movsd qword [BASE+RA*8], xmm0 | 4111 | | movsd qword [BASE+RA*8], xmm0 |
4591 | |.else | ||
4592 | | fld qword [KBASE+RD*8] | ||
4593 | | fstp qword [BASE+RA*8] | ||
4594 | |.endif | ||
4595 | | ins_next | 4112 | | ins_next |
4596 | break; | 4113 | break; |
4597 | case BC_KPRI: | 4114 | case BC_KPRI: |
@@ -4698,18 +4215,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4698 | case BC_USETN: | 4215 | case BC_USETN: |
4699 | | ins_AD // RA = upvalue #, RD = num const | 4216 | | ins_AD // RA = upvalue #, RD = num const |
4700 | | mov LFUNC:RB, [BASE-8] | 4217 | | mov LFUNC:RB, [BASE-8] |
4701 | |.if SSE | ||
4702 | | movsd xmm0, qword [KBASE+RD*8] | 4218 | | movsd xmm0, qword [KBASE+RD*8] |
4703 | |.else | ||
4704 | | fld qword [KBASE+RD*8] | ||
4705 | |.endif | ||
4706 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | 4219 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] |
4707 | | mov RA, UPVAL:RB->v | 4220 | | mov RA, UPVAL:RB->v |
4708 | |.if SSE | ||
4709 | | movsd qword [RA], xmm0 | 4221 | | movsd qword [RA], xmm0 |
4710 | |.else | ||
4711 | | fstp qword [RA] | ||
4712 | |.endif | ||
4713 | | ins_next | 4222 | | ins_next |
4714 | break; | 4223 | break; |
4715 | case BC_USETP: | 4224 | case BC_USETP: |
@@ -4863,18 +4372,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4863 | |.else | 4372 | |.else |
4864 | | // Convert number to int and back and compare. | 4373 | | // Convert number to int and back and compare. |
4865 | | checknum RC, >5 | 4374 | | checknum RC, >5 |
4866 | |.if SSE | ||
4867 | | movsd xmm0, qword [BASE+RC*8] | 4375 | | movsd xmm0, qword [BASE+RC*8] |
4868 | | cvtsd2si RC, xmm0 | 4376 | | cvttsd2si RC, xmm0 |
4869 | | cvtsi2sd xmm1, RC | 4377 | | cvtsi2sd xmm1, RC |
4870 | | ucomisd xmm0, xmm1 | 4378 | | ucomisd xmm0, xmm1 |
4871 | |.else | ||
4872 | | fld qword [BASE+RC*8] | ||
4873 | | fist ARG1 | ||
4874 | | fild ARG1 | ||
4875 | | fcomparepp | ||
4876 | | mov RC, ARG1 | ||
4877 | |.endif | ||
4878 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. | 4379 | | jne ->vmeta_tgetv // Generic numeric key? Use fallback. |
4879 | |.endif | 4380 | |.endif |
4880 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. | 4381 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. |
@@ -4998,6 +4499,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4998 | | mov dword [BASE+RA*8+4], LJ_TNIL | 4499 | | mov dword [BASE+RA*8+4], LJ_TNIL |
4999 | | jmp <1 | 4500 | | jmp <1 |
5000 | break; | 4501 | break; |
4502 | case BC_TGETR: | ||
4503 | | ins_ABC // RA = dst, RB = table, RC = key | ||
4504 | | mov TAB:RB, [BASE+RB*8] | ||
4505 | |.if DUALNUM | ||
4506 | | mov RC, dword [BASE+RC*8] | ||
4507 | |.else | ||
4508 | | cvttsd2si RC, qword [BASE+RC*8] | ||
4509 | |.endif | ||
4510 | | cmp RC, TAB:RB->asize | ||
4511 | | jae ->vmeta_tgetr // Not in array part? Use fallback. | ||
4512 | | shl RC, 3 | ||
4513 | | add RC, TAB:RB->array | ||
4514 | | // Get array slot. | ||
4515 | |->BC_TGETR_Z: | ||
4516 | |.if X64 | ||
4517 | | mov RBa, [RC] | ||
4518 | | mov [BASE+RA*8], RBa | ||
4519 | |.else | ||
4520 | | mov RB, [RC] | ||
4521 | | mov RC, [RC+4] | ||
4522 | | mov [BASE+RA*8], RB | ||
4523 | | mov [BASE+RA*8+4], RC | ||
4524 | |.endif | ||
4525 | |->BC_TGETR2_Z: | ||
4526 | | ins_next | ||
4527 | break; | ||
5001 | 4528 | ||
5002 | case BC_TSETV: | 4529 | case BC_TSETV: |
5003 | | ins_ABC // RA = src, RB = table, RC = key | 4530 | | ins_ABC // RA = src, RB = table, RC = key |
@@ -5011,18 +4538,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5011 | |.else | 4538 | |.else |
5012 | | // Convert number to int and back and compare. | 4539 | | // Convert number to int and back and compare. |
5013 | | checknum RC, >5 | 4540 | | checknum RC, >5 |
5014 | |.if SSE | ||
5015 | | movsd xmm0, qword [BASE+RC*8] | 4541 | | movsd xmm0, qword [BASE+RC*8] |
5016 | | cvtsd2si RC, xmm0 | 4542 | | cvttsd2si RC, xmm0 |
5017 | | cvtsi2sd xmm1, RC | 4543 | | cvtsi2sd xmm1, RC |
5018 | | ucomisd xmm0, xmm1 | 4544 | | ucomisd xmm0, xmm1 |
5019 | |.else | ||
5020 | | fld qword [BASE+RC*8] | ||
5021 | | fist ARG1 | ||
5022 | | fild ARG1 | ||
5023 | | fcomparepp | ||
5024 | | mov RC, ARG1 | ||
5025 | |.endif | ||
5026 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. | 4545 | | jne ->vmeta_tsetv // Generic numeric key? Use fallback. |
5027 | |.endif | 4546 | |.endif |
5028 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. | 4547 | | cmp RC, TAB:RB->asize // Takes care of unordered, too. |
@@ -5192,6 +4711,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5192 | | movzx RA, PC_RA // Restore RA. | 4711 | | movzx RA, PC_RA // Restore RA. |
5193 | | jmp <2 | 4712 | | jmp <2 |
5194 | break; | 4713 | break; |
4714 | case BC_TSETR: | ||
4715 | | ins_ABC // RA = src, RB = table, RC = key | ||
4716 | | mov TAB:RB, [BASE+RB*8] | ||
4717 | |.if DUALNUM | ||
4718 | | mov RC, dword [BASE+RC*8] | ||
4719 | |.else | ||
4720 | | cvttsd2si RC, qword [BASE+RC*8] | ||
4721 | |.endif | ||
4722 | | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | ||
4723 | | jnz >7 | ||
4724 | |2: | ||
4725 | | cmp RC, TAB:RB->asize | ||
4726 | | jae ->vmeta_tsetr | ||
4727 | | shl RC, 3 | ||
4728 | | add RC, TAB:RB->array | ||
4729 | | // Set array slot. | ||
4730 | |->BC_TSETR_Z: | ||
4731 | |.if X64 | ||
4732 | | mov RBa, [BASE+RA*8] | ||
4733 | | mov [RC], RBa | ||
4734 | |.else | ||
4735 | | mov RB, [BASE+RA*8+4] | ||
4736 | | mov RA, [BASE+RA*8] | ||
4737 | | mov [RC+4], RB | ||
4738 | | mov [RC], RA | ||
4739 | |.endif | ||
4740 | | ins_next | ||
4741 | | | ||
4742 | |7: // Possible table write barrier for the value. Skip valiswhite check. | ||
4743 | | barrierback TAB:RB, RA | ||
4744 | | movzx RA, PC_RA // Restore RA. | ||
4745 | | jmp <2 | ||
4746 | break; | ||
5195 | 4747 | ||
5196 | case BC_TSETM: | 4748 | case BC_TSETM: |
5197 | | ins_AD // RA = base (table at base-1), RD = num const (start index) | 4749 | | ins_AD // RA = base (table at base-1), RD = num const (start index) |
@@ -5386,10 +4938,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5386 | |.if DUALNUM | 4938 | |.if DUALNUM |
5387 | | mov dword [BASE+RA*8+4], LJ_TISNUM | 4939 | | mov dword [BASE+RA*8+4], LJ_TISNUM |
5388 | | mov dword [BASE+RA*8], RC | 4940 | | mov dword [BASE+RA*8], RC |
5389 | |.elif SSE | ||
5390 | | cvtsi2sd xmm0, RC | ||
5391 | |.else | 4941 | |.else |
5392 | | fild dword [BASE+RA*8-8] | 4942 | | cvtsi2sd xmm0, RC |
5393 | |.endif | 4943 | |.endif |
5394 | | // Copy array slot to returned value. | 4944 | | // Copy array slot to returned value. |
5395 | |.if X64 | 4945 | |.if X64 |
@@ -5405,10 +4955,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5405 | | // Return array index as a numeric key. | 4955 | | // Return array index as a numeric key. |
5406 | |.if DUALNUM | 4956 | |.if DUALNUM |
5407 | | // See above. | 4957 | | // See above. |
5408 | |.elif SSE | ||
5409 | | movsd qword [BASE+RA*8], xmm0 | ||
5410 | |.else | 4958 | |.else |
5411 | | fstp qword [BASE+RA*8] | 4959 | | movsd qword [BASE+RA*8], xmm0 |
5412 | |.endif | 4960 | |.endif |
5413 | | mov [BASE+RA*8-8], RC // Update control var. | 4961 | | mov [BASE+RA*8-8], RC // Update control var. |
5414 | |2: | 4962 | |2: |
@@ -5421,9 +4969,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5421 | | | 4969 | | |
5422 | |4: // Skip holes in array part. | 4970 | |4: // Skip holes in array part. |
5423 | | add RC, 1 | 4971 | | add RC, 1 |
5424 | |.if not (DUALNUM or SSE) | ||
5425 | | mov [BASE+RA*8-8], RC | ||
5426 | |.endif | ||
5427 | | jmp <1 | 4972 | | jmp <1 |
5428 | | | 4973 | | |
5429 | |5: // Traverse hash part. | 4974 | |5: // Traverse hash part. |
@@ -5757,7 +5302,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5757 | if (!vk) { | 5302 | if (!vk) { |
5758 | | cmp RB, LJ_TISNUM; jae ->vmeta_for | 5303 | | cmp RB, LJ_TISNUM; jae ->vmeta_for |
5759 | } | 5304 | } |
5760 | |.if SSE | ||
5761 | | movsd xmm0, qword FOR_IDX | 5305 | | movsd xmm0, qword FOR_IDX |
5762 | | movsd xmm1, qword FOR_STOP | 5306 | | movsd xmm1, qword FOR_STOP |
5763 | if (vk) { | 5307 | if (vk) { |
@@ -5770,22 +5314,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5770 | | ucomisd xmm1, xmm0 | 5314 | | ucomisd xmm1, xmm0 |
5771 | |1: | 5315 | |1: |
5772 | | movsd qword FOR_EXT, xmm0 | 5316 | | movsd qword FOR_EXT, xmm0 |
5773 | |.else | ||
5774 | | fld qword FOR_STOP | ||
5775 | | fld qword FOR_IDX | ||
5776 | if (vk) { | ||
5777 | | fadd qword FOR_STEP // nidx = idx + step | ||
5778 | | fst qword FOR_IDX | ||
5779 | | fst qword FOR_EXT | ||
5780 | | test RB, RB; js >1 | ||
5781 | } else { | ||
5782 | | fst qword FOR_EXT | ||
5783 | | jl >1 | ||
5784 | } | ||
5785 | | fxch // Swap lim/(n)idx if step non-negative. | ||
5786 | |1: | ||
5787 | | fcomparepp | ||
5788 | |.endif | ||
5789 | if (op == BC_FORI) { | 5317 | if (op == BC_FORI) { |
5790 | |.if DUALNUM | 5318 | |.if DUALNUM |
5791 | | jnb <7 | 5319 | | jnb <7 |
@@ -5813,11 +5341,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5813 | |2: | 5341 | |2: |
5814 | | ins_next | 5342 | | ins_next |
5815 | |.endif | 5343 | |.endif |
5816 | |.if SSE | 5344 | | |
5817 | |3: // Invert comparison if step is negative. | 5345 | |3: // Invert comparison if step is negative. |
5818 | | ucomisd xmm0, xmm1 | 5346 | | ucomisd xmm0, xmm1 |
5819 | | jmp <1 | 5347 | | jmp <1 |
5820 | |.endif | ||
5821 | break; | 5348 | break; |
5822 | 5349 | ||
5823 | case BC_ITERL: | 5350 | case BC_ITERL: |
@@ -5874,7 +5401,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5874 | | mov RDa, TRACE:RD->mcode | 5401 | | mov RDa, TRACE:RD->mcode |
5875 | | mov L:RB, SAVE_L | 5402 | | mov L:RB, SAVE_L |
5876 | | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE | 5403 | | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE |
5877 | | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB | 5404 | | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB |
5878 | | // Save additional callee-save registers only used in compiled code. | 5405 | | // Save additional callee-save registers only used in compiled code. |
5879 | |.if X64WIN | 5406 | |.if X64WIN |
5880 | | mov TMPQ, r12 | 5407 | | mov TMPQ, r12 |
@@ -6041,9 +5568,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
6041 | | // (lua_State *L, lua_CFunction f) | 5568 | | // (lua_State *L, lua_CFunction f) |
6042 | | call aword [DISPATCH+DISPATCH_GL(wrapf)] | 5569 | | call aword [DISPATCH+DISPATCH_GL(wrapf)] |
6043 | } | 5570 | } |
6044 | | set_vmstate INTERP | ||
6045 | | // nresults returned in eax (RD). | 5571 | | // nresults returned in eax (RD). |
6046 | | mov BASE, L:RB->base | 5572 | | mov BASE, L:RB->base |
5573 | | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | ||
5574 | | set_vmstate INTERP | ||
6047 | | lea RA, [BASE+RD*8] | 5575 | | lea RA, [BASE+RD*8] |
6048 | | neg RA | 5576 | | neg RA |
6049 | | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 | 5577 | | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 |