diff options
Diffstat (limited to '')
-rw-r--r-- | src/lj_asm_x86.h | 594 |
1 files changed, 219 insertions, 375 deletions
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index ffd59d33..718cb12e 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
@@ -392,7 +392,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64) | |||
392 | /* Count the required number of stack slots for a call. */ | 392 | /* Count the required number of stack slots for a call. */ |
393 | static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) | 393 | static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) |
394 | { | 394 | { |
395 | uint32_t i, nargs = CCI_NARGS(ci); | 395 | uint32_t i, nargs = CCI_XNARGS(ci); |
396 | int nslots = 0; | 396 | int nslots = 0; |
397 | #if LJ_64 | 397 | #if LJ_64 |
398 | if (LJ_ABI_WIN) { | 398 | if (LJ_ABI_WIN) { |
@@ -425,7 +425,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
425 | /* Generate a call to a C function. */ | 425 | /* Generate a call to a C function. */ |
426 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | 426 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) |
427 | { | 427 | { |
428 | uint32_t n, nargs = CCI_NARGS(ci); | 428 | uint32_t n, nargs = CCI_XNARGS(ci); |
429 | int32_t ofs = STACKARG_OFS; | 429 | int32_t ofs = STACKARG_OFS; |
430 | #if LJ_64 | 430 | #if LJ_64 |
431 | uint32_t gprs = REGARG_GPRS; | 431 | uint32_t gprs = REGARG_GPRS; |
@@ -560,7 +560,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
560 | if (ra_hasreg(dest)) { | 560 | if (ra_hasreg(dest)) { |
561 | ra_free(as, dest); | 561 | ra_free(as, dest); |
562 | ra_modified(as, dest); | 562 | ra_modified(as, dest); |
563 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, | 563 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, |
564 | dest, RID_ESP, ofs); | 564 | dest, RID_ESP, ofs); |
565 | } | 565 | } |
566 | if ((ci->flags & CCI_CASTU64)) { | 566 | if ((ci->flags & CCI_CASTU64)) { |
@@ -584,15 +584,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
584 | } | 584 | } |
585 | } | 585 | } |
586 | 586 | ||
587 | static void asm_call(ASMState *as, IRIns *ir) | ||
588 | { | ||
589 | IRRef args[CCI_NARGS_MAX]; | ||
590 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
591 | asm_collectargs(as, ir, ci, args); | ||
592 | asm_setupresult(as, ir, ci); | ||
593 | asm_gencall(as, ci, args); | ||
594 | } | ||
595 | |||
596 | /* Return a constant function pointer or NULL for indirect calls. */ | 587 | /* Return a constant function pointer or NULL for indirect calls. */ |
597 | static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) | 588 | static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) |
598 | { | 589 | { |
@@ -652,7 +643,7 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
652 | { | 643 | { |
653 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | 644 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); |
654 | void *pc = ir_kptr(IR(ir->op2)); | 645 | void *pc = ir_kptr(IR(ir->op2)); |
655 | int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); | 646 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); |
656 | as->topslot -= (BCReg)delta; | 647 | as->topslot -= (BCReg)delta; |
657 | if ((int32_t)as->topslot < 0) as->topslot = 0; | 648 | if ((int32_t)as->topslot < 0) as->topslot = 0; |
658 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | 649 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ |
@@ -672,8 +663,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | |||
672 | asm_guardcc(as, CC_NE); | 663 | asm_guardcc(as, CC_NE); |
673 | emit_rr(as, XO_UCOMISD, left, tmp); | 664 | emit_rr(as, XO_UCOMISD, left, tmp); |
674 | emit_rr(as, XO_CVTSI2SD, tmp, dest); | 665 | emit_rr(as, XO_CVTSI2SD, tmp, dest); |
675 | if (!(as->flags & JIT_F_SPLIT_XMM)) | 666 | emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ |
676 | emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ | ||
677 | emit_rr(as, XO_CVTTSD2SI, dest, left); | 667 | emit_rr(as, XO_CVTTSD2SI, dest, left); |
678 | /* Can't fuse since left is needed twice. */ | 668 | /* Can't fuse since left is needed twice. */ |
679 | } | 669 | } |
@@ -729,8 +719,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
729 | emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, | 719 | emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, |
730 | dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); | 720 | dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); |
731 | } | 721 | } |
732 | if (!(as->flags & JIT_F_SPLIT_XMM)) | 722 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ |
733 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ | ||
734 | } else if (stfp) { /* FP to integer conversion. */ | 723 | } else if (stfp) { /* FP to integer conversion. */ |
735 | if (irt_isguard(ir->t)) { | 724 | if (irt_isguard(ir->t)) { |
736 | /* Checked conversions are only supported from number to int. */ | 725 | /* Checked conversions are only supported from number to int. */ |
@@ -738,9 +727,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
738 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | 727 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); |
739 | } else { | 728 | } else { |
740 | Reg dest = ra_dest(as, ir, RSET_GPR); | 729 | Reg dest = ra_dest(as, ir, RSET_GPR); |
741 | x86Op op = st == IRT_NUM ? | 730 | x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI; |
742 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) : | ||
743 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI); | ||
744 | if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { | 731 | if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { |
745 | /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ | 732 | /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ |
746 | /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ | 733 | /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ |
@@ -834,8 +821,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir) | |||
834 | if (ra_hasreg(dest)) { | 821 | if (ra_hasreg(dest)) { |
835 | ra_free(as, dest); | 822 | ra_free(as, dest); |
836 | ra_modified(as, dest); | 823 | ra_modified(as, dest); |
837 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, | 824 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs); |
838 | dest, RID_ESP, ofs); | ||
839 | } | 825 | } |
840 | emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, | 826 | emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, |
841 | irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); | 827 | irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); |
@@ -863,7 +849,6 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | |||
863 | Reg lo, hi; | 849 | Reg lo, hi; |
864 | lua_assert(st == IRT_NUM || st == IRT_FLOAT); | 850 | lua_assert(st == IRT_NUM || st == IRT_FLOAT); |
865 | lua_assert(dt == IRT_I64 || dt == IRT_U64); | 851 | lua_assert(dt == IRT_I64 || dt == IRT_U64); |
866 | lua_assert(((ir-1)->op2 & IRCONV_TRUNC)); | ||
867 | hi = ra_dest(as, ir, RSET_GPR); | 852 | hi = ra_dest(as, ir, RSET_GPR); |
868 | lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); | 853 | lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); |
869 | if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); | 854 | if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); |
@@ -906,6 +891,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | |||
906 | st == IRT_NUM ? XOg_FLDq: XOg_FLDd, | 891 | st == IRT_NUM ? XOg_FLDq: XOg_FLDd, |
907 | asm_fuseload(as, ir->op1, RSET_EMPTY)); | 892 | asm_fuseload(as, ir->op1, RSET_EMPTY)); |
908 | } | 893 | } |
894 | |||
895 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
896 | { | ||
897 | if (irt_isfp(ir->t)) | ||
898 | asm_conv_fp_int64(as, ir); | ||
899 | else | ||
900 | asm_conv_int64_fp(as, ir); | ||
901 | } | ||
909 | #endif | 902 | #endif |
910 | 903 | ||
911 | static void asm_strto(ASMState *as, IRIns *ir) | 904 | static void asm_strto(ASMState *as, IRIns *ir) |
@@ -927,29 +920,32 @@ static void asm_strto(ASMState *as, IRIns *ir) | |||
927 | RID_ESP, sps_scale(ir->s)); | 920 | RID_ESP, sps_scale(ir->s)); |
928 | } | 921 | } |
929 | 922 | ||
930 | static void asm_tostr(ASMState *as, IRIns *ir) | 923 | /* -- Memory references --------------------------------------------------- */ |
924 | |||
925 | /* Get pointer to TValue. */ | ||
926 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | ||
931 | { | 927 | { |
932 | IRIns *irl = IR(ir->op1); | 928 | IRIns *ir = IR(ref); |
933 | IRRef args[2]; | 929 | if (irt_isnum(ir->t)) { |
934 | args[0] = ASMREF_L; | 930 | /* For numbers use the constant itself or a spill slot as a TValue. */ |
935 | as->gcsteps++; | 931 | if (irref_isk(ref)) |
936 | if (irt_isnum(irl->t)) { | 932 | emit_loada(as, dest, ir_knum(ir)); |
937 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; | 933 | else |
938 | args[1] = ASMREF_TMP1; /* const lua_Number * */ | 934 | emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir)); |
939 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
940 | asm_gencall(as, ci, args); | ||
941 | emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64, | ||
942 | RID_ESP, ra_spill(as, irl)); | ||
943 | } else { | 935 | } else { |
944 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; | 936 | /* Otherwise use g->tmptv to hold the TValue. */ |
945 | args[1] = ir->op1; /* int32_t k */ | 937 | if (!irref_isk(ref)) { |
946 | asm_setupresult(as, ir, ci); /* GCstr * */ | 938 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); |
947 | asm_gencall(as, ci, args); | 939 | emit_movtomro(as, REX_64IR(ir, src), dest, 0); |
940 | } else if (!irt_ispri(ir->t)) { | ||
941 | emit_movmroi(as, dest, 0, ir->i); | ||
942 | } | ||
943 | if (!(LJ_64 && irt_islightud(ir->t))) | ||
944 | emit_movmroi(as, dest, 4, irt_toitype(ir->t)); | ||
945 | emit_loada(as, dest, &J2G(as->J)->tmptv); | ||
948 | } | 946 | } |
949 | } | 947 | } |
950 | 948 | ||
951 | /* -- Memory references --------------------------------------------------- */ | ||
952 | |||
953 | static void asm_aref(ASMState *as, IRIns *ir) | 949 | static void asm_aref(ASMState *as, IRIns *ir) |
954 | { | 950 | { |
955 | Reg dest = ra_dest(as, ir, RSET_GPR); | 951 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -960,23 +956,6 @@ static void asm_aref(ASMState *as, IRIns *ir) | |||
960 | emit_rr(as, XO_MOV, dest, as->mrm.base); | 956 | emit_rr(as, XO_MOV, dest, as->mrm.base); |
961 | } | 957 | } |
962 | 958 | ||
963 | /* Merge NE(HREF, niltv) check. */ | ||
964 | static MCode *merge_href_niltv(ASMState *as, IRIns *ir) | ||
965 | { | ||
966 | /* Assumes nothing else generates NE of HREF. */ | ||
967 | if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins && | ||
968 | ra_hasreg(ir->r)) { | ||
969 | MCode *p = as->mcp; | ||
970 | p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6; | ||
971 | /* Ensure no loop branch inversion happened. */ | ||
972 | if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) { | ||
973 | as->mcp = p; /* Kill cmp reg, imm32 + jz exit. */ | ||
974 | return p + *(int32_t *)(p-4); /* Return exit address. */ | ||
975 | } | ||
976 | } | ||
977 | return NULL; | ||
978 | } | ||
979 | |||
980 | /* Inlined hash lookup. Specialized for key type and for const keys. | 959 | /* Inlined hash lookup. Specialized for key type and for const keys. |
981 | ** The equivalent C code is: | 960 | ** The equivalent C code is: |
982 | ** Node *n = hashkey(t, key); | 961 | ** Node *n = hashkey(t, key); |
@@ -985,10 +964,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir) | |||
985 | ** } while ((n = nextnode(n))); | 964 | ** } while ((n = nextnode(n))); |
986 | ** return niltv(L); | 965 | ** return niltv(L); |
987 | */ | 966 | */ |
988 | static void asm_href(ASMState *as, IRIns *ir) | 967 | static void asm_href(ASMState *as, IRIns *ir, IROp merge) |
989 | { | 968 | { |
990 | MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */ | ||
991 | RegSet allow = RSET_GPR; | 969 | RegSet allow = RSET_GPR; |
970 | int destused = ra_used(ir); | ||
992 | Reg dest = ra_dest(as, ir, allow); | 971 | Reg dest = ra_dest(as, ir, allow); |
993 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); | 972 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); |
994 | Reg key = RID_NONE, tmp = RID_NONE; | 973 | Reg key = RID_NONE, tmp = RID_NONE; |
@@ -1005,14 +984,12 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
1005 | tmp = ra_scratch(as, rset_exclude(allow, key)); | 984 | tmp = ra_scratch(as, rset_exclude(allow, key)); |
1006 | } | 985 | } |
1007 | 986 | ||
1008 | /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */ | 987 | /* Key not found in chain: jump to exit (if merged) or load niltv. */ |
1009 | l_end = emit_label(as); | 988 | l_end = emit_label(as); |
1010 | if (nilexit && ir[1].o == IR_NE) { | 989 | if (merge == IR_NE) |
1011 | emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */ | 990 | asm_guardcc(as, CC_E); /* XI_JMP is not found by lj_asm_patchexit. */ |
1012 | nilexit = NULL; | 991 | else if (destused) |
1013 | } else { | ||
1014 | emit_loada(as, dest, niltvg(J2G(as->J))); | 992 | emit_loada(as, dest, niltvg(J2G(as->J))); |
1015 | } | ||
1016 | 993 | ||
1017 | /* Follow hash chain until the end. */ | 994 | /* Follow hash chain until the end. */ |
1018 | l_loop = emit_sjcc_label(as, CC_NZ); | 995 | l_loop = emit_sjcc_label(as, CC_NZ); |
@@ -1021,8 +998,8 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
1021 | l_next = emit_label(as); | 998 | l_next = emit_label(as); |
1022 | 999 | ||
1023 | /* Type and value comparison. */ | 1000 | /* Type and value comparison. */ |
1024 | if (nilexit) | 1001 | if (merge == IR_EQ) |
1025 | emit_jcc(as, CC_E, nilexit); | 1002 | asm_guardcc(as, CC_E); |
1026 | else | 1003 | else |
1027 | emit_sjcc(as, CC_E, l_end); | 1004 | emit_sjcc(as, CC_E, l_end); |
1028 | if (irt_isnum(kt)) { | 1005 | if (irt_isnum(kt)) { |
@@ -1178,41 +1155,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
1178 | #endif | 1155 | #endif |
1179 | } | 1156 | } |
1180 | 1157 | ||
1181 | static void asm_newref(ASMState *as, IRIns *ir) | ||
1182 | { | ||
1183 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
1184 | IRRef args[3]; | ||
1185 | IRIns *irkey; | ||
1186 | Reg tmp; | ||
1187 | if (ir->r == RID_SINK) | ||
1188 | return; | ||
1189 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1190 | args[1] = ir->op1; /* GCtab *t */ | ||
1191 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
1192 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
1193 | asm_gencall(as, ci, args); | ||
1194 | tmp = ra_releasetmp(as, ASMREF_TMP1); | ||
1195 | irkey = IR(ir->op2); | ||
1196 | if (irt_isnum(irkey->t)) { | ||
1197 | /* For numbers use the constant itself or a spill slot as a TValue. */ | ||
1198 | if (irref_isk(ir->op2)) | ||
1199 | emit_loada(as, tmp, ir_knum(irkey)); | ||
1200 | else | ||
1201 | emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey)); | ||
1202 | } else { | ||
1203 | /* Otherwise use g->tmptv to hold the TValue. */ | ||
1204 | if (!irref_isk(ir->op2)) { | ||
1205 | Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp)); | ||
1206 | emit_movtomro(as, REX_64IR(irkey, src), tmp, 0); | ||
1207 | } else if (!irt_ispri(irkey->t)) { | ||
1208 | emit_movmroi(as, tmp, 0, irkey->i); | ||
1209 | } | ||
1210 | if (!(LJ_64 && irt_islightud(irkey->t))) | ||
1211 | emit_movmroi(as, tmp, 4, irt_toitype(irkey->t)); | ||
1212 | emit_loada(as, tmp, &J2G(as->J)->tmptv); | ||
1213 | } | ||
1214 | } | ||
1215 | |||
1216 | static void asm_uref(ASMState *as, IRIns *ir) | 1158 | static void asm_uref(ASMState *as, IRIns *ir) |
1217 | { | 1159 | { |
1218 | /* NYI: Check that UREFO is still open and not aliasing a slot. */ | 1160 | /* NYI: Check that UREFO is still open and not aliasing a slot. */ |
@@ -1272,7 +1214,7 @@ static void asm_fxload(ASMState *as, IRIns *ir) | |||
1272 | case IRT_U8: xo = XO_MOVZXb; break; | 1214 | case IRT_U8: xo = XO_MOVZXb; break; |
1273 | case IRT_I16: xo = XO_MOVSXw; break; | 1215 | case IRT_I16: xo = XO_MOVSXw; break; |
1274 | case IRT_U16: xo = XO_MOVZXw; break; | 1216 | case IRT_U16: xo = XO_MOVZXw; break; |
1275 | case IRT_NUM: xo = XMM_MOVRM(as); break; | 1217 | case IRT_NUM: xo = XO_MOVSD; break; |
1276 | case IRT_FLOAT: xo = XO_MOVSS; break; | 1218 | case IRT_FLOAT: xo = XO_MOVSS; break; |
1277 | default: | 1219 | default: |
1278 | if (LJ_64 && irt_is64(ir->t)) | 1220 | if (LJ_64 && irt_is64(ir->t)) |
@@ -1285,6 +1227,9 @@ static void asm_fxload(ASMState *as, IRIns *ir) | |||
1285 | emit_mrm(as, xo, dest, RID_MRM); | 1227 | emit_mrm(as, xo, dest, RID_MRM); |
1286 | } | 1228 | } |
1287 | 1229 | ||
1230 | #define asm_fload(as, ir) asm_fxload(as, ir) | ||
1231 | #define asm_xload(as, ir) asm_fxload(as, ir) | ||
1232 | |||
1288 | static void asm_fxstore(ASMState *as, IRIns *ir) | 1233 | static void asm_fxstore(ASMState *as, IRIns *ir) |
1289 | { | 1234 | { |
1290 | RegSet allow = RSET_GPR; | 1235 | RegSet allow = RSET_GPR; |
@@ -1348,6 +1293,9 @@ static void asm_fxstore(ASMState *as, IRIns *ir) | |||
1348 | } | 1293 | } |
1349 | } | 1294 | } |
1350 | 1295 | ||
1296 | #define asm_fstore(as, ir) asm_fxstore(as, ir) | ||
1297 | #define asm_xstore(as, ir) asm_fxstore(as, ir) | ||
1298 | |||
1351 | #if LJ_64 | 1299 | #if LJ_64 |
1352 | static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) | 1300 | static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) |
1353 | { | 1301 | { |
@@ -1386,7 +1334,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
1386 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; | 1334 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; |
1387 | Reg dest = ra_dest(as, ir, allow); | 1335 | Reg dest = ra_dest(as, ir, allow); |
1388 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1336 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
1389 | emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); | 1337 | emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM); |
1390 | } else { | 1338 | } else { |
1391 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1339 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
1392 | } | 1340 | } |
@@ -1452,7 +1400,7 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1452 | Reg left = ra_scratch(as, RSET_FPR); | 1400 | Reg left = ra_scratch(as, RSET_FPR); |
1453 | asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ | 1401 | asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ |
1454 | base = ra_alloc1(as, REF_BASE, RSET_GPR); | 1402 | base = ra_alloc1(as, REF_BASE, RSET_GPR); |
1455 | emit_rmro(as, XMM_MOVRM(as), left, base, ofs); | 1403 | emit_rmro(as, XO_MOVSD, left, base, ofs); |
1456 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | 1404 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ |
1457 | #if LJ_64 | 1405 | #if LJ_64 |
1458 | } else if (irt_islightud(t)) { | 1406 | } else if (irt_islightud(t)) { |
@@ -1470,11 +1418,9 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1470 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 1418 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); |
1471 | if ((ir->op2 & IRSLOAD_CONVERT)) { | 1419 | if ((ir->op2 & IRSLOAD_CONVERT)) { |
1472 | t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ | 1420 | t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ |
1473 | emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); | 1421 | emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs); |
1474 | } else if (irt_isnum(t)) { | ||
1475 | emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); | ||
1476 | } else { | 1422 | } else { |
1477 | emit_rmro(as, XO_MOV, dest, base, ofs); | 1423 | emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs); |
1478 | } | 1424 | } |
1479 | } else { | 1425 | } else { |
1480 | if (!(ir->op2 & IRSLOAD_TYPECHECK)) | 1426 | if (!(ir->op2 & IRSLOAD_TYPECHECK)) |
@@ -1501,15 +1447,13 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1501 | static void asm_cnew(ASMState *as, IRIns *ir) | 1447 | static void asm_cnew(ASMState *as, IRIns *ir) |
1502 | { | 1448 | { |
1503 | CTState *cts = ctype_ctsG(J2G(as->J)); | 1449 | CTState *cts = ctype_ctsG(J2G(as->J)); |
1504 | CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; | 1450 | CTypeID id = (CTypeID)IR(ir->op1)->i; |
1505 | CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? | 1451 | CTSize sz; |
1506 | lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; | 1452 | CTInfo info = lj_ctype_info(cts, id, &sz); |
1507 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | 1453 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; |
1508 | IRRef args[2]; | 1454 | IRRef args[4]; |
1509 | lua_assert(sz != CTSIZE_INVALID); | 1455 | lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); |
1510 | 1456 | ||
1511 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1512 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1513 | as->gcsteps++; | 1457 | as->gcsteps++; |
1514 | asm_setupresult(as, ir, ci); /* GCcdata * */ | 1458 | asm_setupresult(as, ir, ci); /* GCcdata * */ |
1515 | 1459 | ||
@@ -1552,15 +1496,26 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1552 | } while (1); | 1496 | } while (1); |
1553 | #endif | 1497 | #endif |
1554 | lua_assert(sz == 4 || sz == 8); | 1498 | lua_assert(sz == 4 || sz == 8); |
1499 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1500 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1501 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1502 | args[1] = ir->op1; /* CTypeID id */ | ||
1503 | args[2] = ir->op2; /* CTSize sz */ | ||
1504 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1505 | asm_gencall(as, ci, args); | ||
1506 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1507 | return; | ||
1555 | } | 1508 | } |
1556 | 1509 | ||
1557 | /* Combine initialization of marked, gct and ctypeid. */ | 1510 | /* Combine initialization of marked, gct and ctypeid. */ |
1558 | emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); | 1511 | emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); |
1559 | emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, | 1512 | emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, |
1560 | (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16))); | 1513 | (int32_t)((~LJ_TCDATA<<8)+(id<<16))); |
1561 | emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); | 1514 | emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); |
1562 | emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); | 1515 | emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); |
1563 | 1516 | ||
1517 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1518 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1564 | asm_gencall(as, ci, args); | 1519 | asm_gencall(as, ci, args); |
1565 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); | 1520 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); |
1566 | } | 1521 | } |
@@ -1638,36 +1593,9 @@ static void asm_x87load(ASMState *as, IRRef ref) | |||
1638 | } | 1593 | } |
1639 | } | 1594 | } |
1640 | 1595 | ||
1641 | /* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */ | ||
1642 | static int fpmjoin_pow(ASMState *as, IRIns *ir) | ||
1643 | { | ||
1644 | IRIns *irp = IR(ir->op1); | ||
1645 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | ||
1646 | IRIns *irpp = IR(irp->op1); | ||
1647 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | ||
1648 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | ||
1649 | /* The modified regs must match with the *.dasc implementation. */ | ||
1650 | RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); | ||
1651 | IRIns *irx; | ||
1652 | if (ra_hasreg(ir->r)) | ||
1653 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | ||
1654 | ra_evictset(as, drop); | ||
1655 | ra_destreg(as, ir, RID_XMM0); | ||
1656 | emit_call(as, lj_vm_pow_sse); | ||
1657 | irx = IR(irpp->op1); | ||
1658 | if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1) | ||
1659 | irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */ | ||
1660 | ra_left(as, RID_XMM0, irpp->op1); | ||
1661 | ra_left(as, RID_XMM1, irp->op2); | ||
1662 | return 1; | ||
1663 | } | ||
1664 | } | ||
1665 | return 0; | ||
1666 | } | ||
1667 | |||
1668 | static void asm_fpmath(ASMState *as, IRIns *ir) | 1596 | static void asm_fpmath(ASMState *as, IRIns *ir) |
1669 | { | 1597 | { |
1670 | IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER; | 1598 | IRFPMathOp fpm = (IRFPMathOp)ir->op2; |
1671 | if (fpm == IRFPM_SQRT) { | 1599 | if (fpm == IRFPM_SQRT) { |
1672 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1600 | Reg dest = ra_dest(as, ir, RSET_FPR); |
1673 | Reg left = asm_fuseload(as, ir->op1, RSET_FPR); | 1601 | Reg left = asm_fuseload(as, ir->op1, RSET_FPR); |
@@ -1698,53 +1626,31 @@ static void asm_fpmath(ASMState *as, IRIns *ir) | |||
1698 | fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); | 1626 | fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); |
1699 | ra_left(as, RID_XMM0, ir->op1); | 1627 | ra_left(as, RID_XMM0, ir->op1); |
1700 | } | 1628 | } |
1701 | } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { | 1629 | } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { |
1702 | /* Rejoined to pow(). */ | 1630 | /* Rejoined to pow(). */ |
1703 | } else { /* Handle x87 ops. */ | 1631 | } else { |
1704 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | 1632 | asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); |
1705 | Reg dest = ir->r; | ||
1706 | if (ra_hasreg(dest)) { | ||
1707 | ra_free(as, dest); | ||
1708 | ra_modified(as, dest); | ||
1709 | emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); | ||
1710 | } | ||
1711 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); | ||
1712 | switch (fpm) { /* st0 = lj_vm_*(st0) */ | ||
1713 | case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break; | ||
1714 | case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break; | ||
1715 | case IRFPM_SIN: emit_x87op(as, XI_FSIN); break; | ||
1716 | case IRFPM_COS: emit_x87op(as, XI_FCOS); break; | ||
1717 | case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break; | ||
1718 | case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10: | ||
1719 | /* Note: the use of fyl2xp1 would be pointless here. When computing | ||
1720 | ** log(1.0+eps) the precision is already lost after 1.0 is added. | ||
1721 | ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense. | ||
1722 | */ | ||
1723 | emit_x87op(as, XI_FYL2X); break; | ||
1724 | case IRFPM_OTHER: | ||
1725 | switch (ir->o) { | ||
1726 | case IR_ATAN2: | ||
1727 | emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break; | ||
1728 | case IR_LDEXP: | ||
1729 | emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break; | ||
1730 | default: lua_assert(0); break; | ||
1731 | } | ||
1732 | break; | ||
1733 | default: lua_assert(0); break; | ||
1734 | } | ||
1735 | asm_x87load(as, ir->op1); | ||
1736 | switch (fpm) { | ||
1737 | case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break; | ||
1738 | case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break; | ||
1739 | case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break; | ||
1740 | case IRFPM_OTHER: | ||
1741 | if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2); | ||
1742 | break; | ||
1743 | default: break; | ||
1744 | } | ||
1745 | } | 1633 | } |
1746 | } | 1634 | } |
1747 | 1635 | ||
1636 | #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) | ||
1637 | |||
1638 | static void asm_ldexp(ASMState *as, IRIns *ir) | ||
1639 | { | ||
1640 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | ||
1641 | Reg dest = ir->r; | ||
1642 | if (ra_hasreg(dest)) { | ||
1643 | ra_free(as, dest); | ||
1644 | ra_modified(as, dest); | ||
1645 | emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs); | ||
1646 | } | ||
1647 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); | ||
1648 | emit_x87op(as, XI_FPOP1); | ||
1649 | emit_x87op(as, XI_FSCALE); | ||
1650 | asm_x87load(as, ir->op1); | ||
1651 | asm_x87load(as, ir->op2); | ||
1652 | } | ||
1653 | |||
1748 | static void asm_fppowi(ASMState *as, IRIns *ir) | 1654 | static void asm_fppowi(ASMState *as, IRIns *ir) |
1749 | { | 1655 | { |
1750 | /* The modified regs must match with the *.dasc implementation. */ | 1656 | /* The modified regs must match with the *.dasc implementation. */ |
@@ -1758,26 +1664,15 @@ static void asm_fppowi(ASMState *as, IRIns *ir) | |||
1758 | ra_left(as, RID_EAX, ir->op2); | 1664 | ra_left(as, RID_EAX, ir->op2); |
1759 | } | 1665 | } |
1760 | 1666 | ||
1761 | #if LJ_64 && LJ_HASFFI | 1667 | static void asm_pow(ASMState *as, IRIns *ir) |
1762 | static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id) | ||
1763 | { | 1668 | { |
1764 | const CCallInfo *ci = &lj_ir_callinfo[id]; | 1669 | #if LJ_64 && LJ_HASFFI |
1765 | IRRef args[2]; | 1670 | if (!irt_isnum(ir->t)) |
1766 | args[0] = ir->op1; | 1671 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : |
1767 | args[1] = ir->op2; | 1672 | IRCALL_lj_carith_powu64); |
1768 | asm_setupresult(as, ir, ci); | 1673 | else |
1769 | asm_gencall(as, ci, args); | ||
1770 | } | ||
1771 | #endif | 1674 | #endif |
1772 | 1675 | asm_fppowi(as, ir); | |
1773 | static void asm_intmod(ASMState *as, IRIns *ir) | ||
1774 | { | ||
1775 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi]; | ||
1776 | IRRef args[2]; | ||
1777 | args[0] = ir->op1; | ||
1778 | args[1] = ir->op2; | ||
1779 | asm_setupresult(as, ir, ci); | ||
1780 | asm_gencall(as, ci, args); | ||
1781 | } | 1676 | } |
1782 | 1677 | ||
1783 | static int asm_swapops(ASMState *as, IRIns *ir) | 1678 | static int asm_swapops(ASMState *as, IRIns *ir) |
@@ -1960,6 +1855,44 @@ static void asm_add(ASMState *as, IRIns *ir) | |||
1960 | asm_intarith(as, ir, XOg_ADD); | 1855 | asm_intarith(as, ir, XOg_ADD); |
1961 | } | 1856 | } |
1962 | 1857 | ||
1858 | static void asm_sub(ASMState *as, IRIns *ir) | ||
1859 | { | ||
1860 | if (irt_isnum(ir->t)) | ||
1861 | asm_fparith(as, ir, XO_SUBSD); | ||
1862 | else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ | ||
1863 | asm_intarith(as, ir, XOg_SUB); | ||
1864 | } | ||
1865 | |||
1866 | static void asm_mul(ASMState *as, IRIns *ir) | ||
1867 | { | ||
1868 | if (irt_isnum(ir->t)) | ||
1869 | asm_fparith(as, ir, XO_MULSD); | ||
1870 | else | ||
1871 | asm_intarith(as, ir, XOg_X_IMUL); | ||
1872 | } | ||
1873 | |||
1874 | static void asm_div(ASMState *as, IRIns *ir) | ||
1875 | { | ||
1876 | #if LJ_64 && LJ_HASFFI | ||
1877 | if (!irt_isnum(ir->t)) | ||
1878 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : | ||
1879 | IRCALL_lj_carith_divu64); | ||
1880 | else | ||
1881 | #endif | ||
1882 | asm_fparith(as, ir, XO_DIVSD); | ||
1883 | } | ||
1884 | |||
1885 | static void asm_mod(ASMState *as, IRIns *ir) | ||
1886 | { | ||
1887 | #if LJ_64 && LJ_HASFFI | ||
1888 | if (!irt_isint(ir->t)) | ||
1889 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : | ||
1890 | IRCALL_lj_carith_modu64); | ||
1891 | else | ||
1892 | #endif | ||
1893 | asm_callid(as, ir, IRCALL_lj_vm_modi); | ||
1894 | } | ||
1895 | |||
1963 | static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) | 1896 | static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) |
1964 | { | 1897 | { |
1965 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1898 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -1967,7 +1900,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) | |||
1967 | ra_left(as, dest, ir->op1); | 1900 | ra_left(as, dest, ir->op1); |
1968 | } | 1901 | } |
1969 | 1902 | ||
1970 | static void asm_min_max(ASMState *as, IRIns *ir, int cc) | 1903 | static void asm_neg(ASMState *as, IRIns *ir) |
1904 | { | ||
1905 | if (irt_isnum(ir->t)) | ||
1906 | asm_fparith(as, ir, XO_XORPS); | ||
1907 | else | ||
1908 | asm_neg_not(as, ir, XOg_NEG); | ||
1909 | } | ||
1910 | |||
1911 | #define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS) | ||
1912 | |||
1913 | static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) | ||
1971 | { | 1914 | { |
1972 | Reg right, dest = ra_dest(as, ir, RSET_GPR); | 1915 | Reg right, dest = ra_dest(as, ir, RSET_GPR); |
1973 | IRRef lref = ir->op1, rref = ir->op2; | 1916 | IRRef lref = ir->op1, rref = ir->op2; |
@@ -1978,7 +1921,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc) | |||
1978 | ra_left(as, dest, lref); | 1921 | ra_left(as, dest, lref); |
1979 | } | 1922 | } |
1980 | 1923 | ||
1981 | static void asm_bitswap(ASMState *as, IRIns *ir) | 1924 | static void asm_min(ASMState *as, IRIns *ir) |
1925 | { | ||
1926 | if (irt_isnum(ir->t)) | ||
1927 | asm_fparith(as, ir, XO_MINSD); | ||
1928 | else | ||
1929 | asm_intmin_max(as, ir, CC_G); | ||
1930 | } | ||
1931 | |||
1932 | static void asm_max(ASMState *as, IRIns *ir) | ||
1933 | { | ||
1934 | if (irt_isnum(ir->t)) | ||
1935 | asm_fparith(as, ir, XO_MAXSD); | ||
1936 | else | ||
1937 | asm_intmin_max(as, ir, CC_L); | ||
1938 | } | ||
1939 | |||
1940 | /* Note: don't use LEA for overflow-checking arithmetic! */ | ||
1941 | #define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD) | ||
1942 | #define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB) | ||
1943 | #define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL) | ||
1944 | |||
1945 | #define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT) | ||
1946 | |||
1947 | static void asm_bswap(ASMState *as, IRIns *ir) | ||
1982 | { | 1948 | { |
1983 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1949 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1984 | as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), | 1950 | as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), |
@@ -1986,7 +1952,11 @@ static void asm_bitswap(ASMState *as, IRIns *ir) | |||
1986 | ra_left(as, dest, ir->op1); | 1952 | ra_left(as, dest, ir->op1); |
1987 | } | 1953 | } |
1988 | 1954 | ||
1989 | static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | 1955 | #define asm_band(as, ir) asm_intarith(as, ir, XOg_AND) |
1956 | #define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR) | ||
1957 | #define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR) | ||
1958 | |||
1959 | static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv) | ||
1990 | { | 1960 | { |
1991 | IRRef rref = ir->op2; | 1961 | IRRef rref = ir->op2; |
1992 | IRIns *irr = IR(rref); | 1962 | IRIns *irr = IR(rref); |
@@ -1995,11 +1965,27 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | |||
1995 | int shift; | 1965 | int shift; |
1996 | dest = ra_dest(as, ir, RSET_GPR); | 1966 | dest = ra_dest(as, ir, RSET_GPR); |
1997 | shift = irr->i & (irt_is64(ir->t) ? 63 : 31); | 1967 | shift = irr->i & (irt_is64(ir->t) ? 63 : 31); |
1968 | if (!xv && shift && (as->flags & JIT_F_BMI2)) { | ||
1969 | Reg left = asm_fuseloadm(as, ir->op1, RSET_GPR, irt_is64(ir->t)); | ||
1970 | if (left != dest) { /* BMI2 rotate right by constant. */ | ||
1971 | emit_i8(as, xs == XOg_ROL ? -shift : shift); | ||
1972 | emit_mrm(as, VEX_64IR(ir, XV_RORX), dest, left); | ||
1973 | return; | ||
1974 | } | ||
1975 | } | ||
1998 | switch (shift) { | 1976 | switch (shift) { |
1999 | case 0: break; | 1977 | case 0: break; |
2000 | case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break; | 1978 | case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break; |
2001 | default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break; | 1979 | default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break; |
2002 | } | 1980 | } |
1981 | } else if ((as->flags & JIT_F_BMI2) && xv) { /* BMI2 variable shifts. */ | ||
1982 | Reg left, right; | ||
1983 | dest = ra_dest(as, ir, RSET_GPR); | ||
1984 | right = ra_alloc1(as, rref, RSET_GPR); | ||
1985 | left = asm_fuseloadm(as, ir->op1, rset_exclude(RSET_GPR, right), | ||
1986 | irt_is64(ir->t)); | ||
1987 | emit_mrm(as, VEX_64IR(ir, xv) ^ (right << 19), dest, left); | ||
1988 | return; | ||
2003 | } else { /* Variable shifts implicitly use register cl (i.e. ecx). */ | 1989 | } else { /* Variable shifts implicitly use register cl (i.e. ecx). */ |
2004 | Reg right; | 1990 | Reg right; |
2005 | dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX)); | 1991 | dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX)); |
@@ -2025,6 +2011,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | |||
2025 | */ | 2011 | */ |
2026 | } | 2012 | } |
2027 | 2013 | ||
2014 | #define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL, XV_SHLX) | ||
2015 | #define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR, XV_SHRX) | ||
2016 | #define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR, XV_SARX) | ||
2017 | #define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL, 0) | ||
2018 | #define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR, 0) | ||
2019 | |||
2028 | /* -- Comparisons --------------------------------------------------------- */ | 2020 | /* -- Comparisons --------------------------------------------------------- */ |
2029 | 2021 | ||
2030 | /* Virtual flags for unordered FP comparisons. */ | 2022 | /* Virtual flags for unordered FP comparisons. */ |
@@ -2051,8 +2043,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = { | |||
2051 | }; | 2043 | }; |
2052 | 2044 | ||
2053 | /* FP and integer comparisons. */ | 2045 | /* FP and integer comparisons. */ |
2054 | static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | 2046 | static void asm_comp(ASMState *as, IRIns *ir) |
2055 | { | 2047 | { |
2048 | uint32_t cc = asm_compmap[ir->o]; | ||
2056 | if (irt_isnum(ir->t)) { | 2049 | if (irt_isnum(ir->t)) { |
2057 | IRRef lref = ir->op1; | 2050 | IRRef lref = ir->op1; |
2058 | IRRef rref = ir->op2; | 2051 | IRRef rref = ir->op2; |
@@ -2207,6 +2200,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | |||
2207 | } | 2200 | } |
2208 | } | 2201 | } |
2209 | 2202 | ||
2203 | #define asm_equal(as, ir) asm_comp(as, ir) | ||
2204 | |||
2210 | #if LJ_32 && LJ_HASFFI | 2205 | #if LJ_32 && LJ_HASFFI |
2211 | /* 64 bit integer comparisons in 32 bit mode. */ | 2206 | /* 64 bit integer comparisons in 32 bit mode. */ |
2212 | static void asm_comp_int64(ASMState *as, IRIns *ir) | 2207 | static void asm_comp_int64(ASMState *as, IRIns *ir) |
@@ -2289,13 +2284,9 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
2289 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | 2284 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ |
2290 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | 2285 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; |
2291 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ | 2286 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ |
2292 | if (usehi || uselo) { | ||
2293 | if (irt_isfp(ir->t)) | ||
2294 | asm_conv_fp_int64(as, ir); | ||
2295 | else | ||
2296 | asm_conv_int64_fp(as, ir); | ||
2297 | } | ||
2298 | as->curins--; /* Always skip the CONV. */ | 2287 | as->curins--; /* Always skip the CONV. */ |
2288 | if (usehi || uselo) | ||
2289 | asm_conv64(as, ir); | ||
2299 | return; | 2290 | return; |
2300 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ | 2291 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ |
2301 | asm_comp_int64(as, ir); | 2292 | asm_comp_int64(as, ir); |
@@ -2344,6 +2335,16 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
2344 | #endif | 2335 | #endif |
2345 | } | 2336 | } |
2346 | 2337 | ||
2338 | /* -- Profiling ----------------------------------------------------------- */ | ||
2339 | |||
2340 | static void asm_prof(ASMState *as, IRIns *ir) | ||
2341 | { | ||
2342 | UNUSED(ir); | ||
2343 | asm_guardcc(as, CC_NE); | ||
2344 | emit_i8(as, HOOK_PROFILE); | ||
2345 | emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask); | ||
2346 | } | ||
2347 | |||
2347 | /* -- Stack handling ------------------------------------------------------ */ | 2348 | /* -- Stack handling ------------------------------------------------------ */ |
2348 | 2349 | ||
2349 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ | 2350 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ |
@@ -2365,7 +2366,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
2365 | emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, | 2366 | emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, |
2366 | ptr2addr(&J2G(as->J)->jit_base)); | 2367 | ptr2addr(&J2G(as->J)->jit_base)); |
2367 | emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); | 2368 | emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); |
2368 | emit_getgl(as, r, jit_L); | 2369 | emit_getgl(as, r, cur_L); |
2369 | if (allow == RSET_EMPTY) /* Spill temp. register. */ | 2370 | if (allow == RSET_EMPTY) /* Spill temp. register. */ |
2370 | emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); | 2371 | emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); |
2371 | } | 2372 | } |
@@ -2593,163 +2594,6 @@ static void asm_tail_prep(ASMState *as) | |||
2593 | } | 2594 | } |
2594 | } | 2595 | } |
2595 | 2596 | ||
2596 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
2597 | |||
2598 | /* Assemble a single instruction. */ | ||
2599 | static void asm_ir(ASMState *as, IRIns *ir) | ||
2600 | { | ||
2601 | switch ((IROp)ir->o) { | ||
2602 | /* Miscellaneous ops. */ | ||
2603 | case IR_LOOP: asm_loop(as); break; | ||
2604 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
2605 | case IR_USE: | ||
2606 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
2607 | case IR_PHI: asm_phi(as, ir); break; | ||
2608 | case IR_HIOP: asm_hiop(as, ir); break; | ||
2609 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
2610 | |||
2611 | /* Guarded assertions. */ | ||
2612 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
2613 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
2614 | case IR_EQ: case IR_NE: case IR_ABC: | ||
2615 | asm_comp(as, ir, asm_compmap[ir->o]); | ||
2616 | break; | ||
2617 | |||
2618 | case IR_RETF: asm_retf(as, ir); break; | ||
2619 | |||
2620 | /* Bit ops. */ | ||
2621 | case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break; | ||
2622 | case IR_BSWAP: asm_bitswap(as, ir); break; | ||
2623 | |||
2624 | case IR_BAND: asm_intarith(as, ir, XOg_AND); break; | ||
2625 | case IR_BOR: asm_intarith(as, ir, XOg_OR); break; | ||
2626 | case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break; | ||
2627 | |||
2628 | case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break; | ||
2629 | case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break; | ||
2630 | case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break; | ||
2631 | case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break; | ||
2632 | case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break; | ||
2633 | |||
2634 | /* Arithmetic ops. */ | ||
2635 | case IR_ADD: asm_add(as, ir); break; | ||
2636 | case IR_SUB: | ||
2637 | if (irt_isnum(ir->t)) | ||
2638 | asm_fparith(as, ir, XO_SUBSD); | ||
2639 | else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ | ||
2640 | asm_intarith(as, ir, XOg_SUB); | ||
2641 | break; | ||
2642 | case IR_MUL: | ||
2643 | if (irt_isnum(ir->t)) | ||
2644 | asm_fparith(as, ir, XO_MULSD); | ||
2645 | else | ||
2646 | asm_intarith(as, ir, XOg_X_IMUL); | ||
2647 | break; | ||
2648 | case IR_DIV: | ||
2649 | #if LJ_64 && LJ_HASFFI | ||
2650 | if (!irt_isnum(ir->t)) | ||
2651 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : | ||
2652 | IRCALL_lj_carith_divu64); | ||
2653 | else | ||
2654 | #endif | ||
2655 | asm_fparith(as, ir, XO_DIVSD); | ||
2656 | break; | ||
2657 | case IR_MOD: | ||
2658 | #if LJ_64 && LJ_HASFFI | ||
2659 | if (!irt_isint(ir->t)) | ||
2660 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : | ||
2661 | IRCALL_lj_carith_modu64); | ||
2662 | else | ||
2663 | #endif | ||
2664 | asm_intmod(as, ir); | ||
2665 | break; | ||
2666 | |||
2667 | case IR_NEG: | ||
2668 | if (irt_isnum(ir->t)) | ||
2669 | asm_fparith(as, ir, XO_XORPS); | ||
2670 | else | ||
2671 | asm_neg_not(as, ir, XOg_NEG); | ||
2672 | break; | ||
2673 | case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break; | ||
2674 | |||
2675 | case IR_MIN: | ||
2676 | if (irt_isnum(ir->t)) | ||
2677 | asm_fparith(as, ir, XO_MINSD); | ||
2678 | else | ||
2679 | asm_min_max(as, ir, CC_G); | ||
2680 | break; | ||
2681 | case IR_MAX: | ||
2682 | if (irt_isnum(ir->t)) | ||
2683 | asm_fparith(as, ir, XO_MAXSD); | ||
2684 | else | ||
2685 | asm_min_max(as, ir, CC_L); | ||
2686 | break; | ||
2687 | |||
2688 | case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: | ||
2689 | asm_fpmath(as, ir); | ||
2690 | break; | ||
2691 | case IR_POW: | ||
2692 | #if LJ_64 && LJ_HASFFI | ||
2693 | if (!irt_isnum(ir->t)) | ||
2694 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
2695 | IRCALL_lj_carith_powu64); | ||
2696 | else | ||
2697 | #endif | ||
2698 | asm_fppowi(as, ir); | ||
2699 | break; | ||
2700 | |||
2701 | /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ | ||
2702 | case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; | ||
2703 | case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break; | ||
2704 | case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break; | ||
2705 | |||
2706 | /* Memory references. */ | ||
2707 | case IR_AREF: asm_aref(as, ir); break; | ||
2708 | case IR_HREF: asm_href(as, ir); break; | ||
2709 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
2710 | case IR_NEWREF: asm_newref(as, ir); break; | ||
2711 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
2712 | case IR_FREF: asm_fref(as, ir); break; | ||
2713 | case IR_STRREF: asm_strref(as, ir); break; | ||
2714 | |||
2715 | /* Loads and stores. */ | ||
2716 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
2717 | asm_ahuvload(as, ir); | ||
2718 | break; | ||
2719 | case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break; | ||
2720 | case IR_SLOAD: asm_sload(as, ir); break; | ||
2721 | |||
2722 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
2723 | case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break; | ||
2724 | |||
2725 | /* Allocations. */ | ||
2726 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
2727 | case IR_TNEW: asm_tnew(as, ir); break; | ||
2728 | case IR_TDUP: asm_tdup(as, ir); break; | ||
2729 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
2730 | |||
2731 | /* Write barriers. */ | ||
2732 | case IR_TBAR: asm_tbar(as, ir); break; | ||
2733 | case IR_OBAR: asm_obar(as, ir); break; | ||
2734 | |||
2735 | /* Type conversions. */ | ||
2736 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
2737 | case IR_CONV: asm_conv(as, ir); break; | ||
2738 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
2739 | case IR_STRTO: asm_strto(as, ir); break; | ||
2740 | |||
2741 | /* Calls. */ | ||
2742 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
2743 | case IR_CALLXS: asm_callx(as, ir); break; | ||
2744 | case IR_CARG: break; | ||
2745 | |||
2746 | default: | ||
2747 | setintV(&as->J->errinfo, ir->o); | ||
2748 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
2749 | break; | ||
2750 | } | ||
2751 | } | ||
2752 | |||
2753 | /* -- Trace setup --------------------------------------------------------- */ | 2597 | /* -- Trace setup --------------------------------------------------------- */ |
2754 | 2598 | ||
2755 | /* Ensure there are enough stack slots for call arguments. */ | 2599 | /* Ensure there are enough stack slots for call arguments. */ |