diff options
Diffstat (limited to 'src/lj_asm_x86.h')
-rw-r--r-- | src/lj_asm_x86.h | 526 |
1 files changed, 198 insertions, 328 deletions
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index fda911e5..3e87ba18 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
@@ -384,7 +384,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64) | |||
384 | /* Count the required number of stack slots for a call. */ | 384 | /* Count the required number of stack slots for a call. */ |
385 | static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) | 385 | static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) |
386 | { | 386 | { |
387 | uint32_t i, nargs = CCI_NARGS(ci); | 387 | uint32_t i, nargs = CCI_XNARGS(ci); |
388 | int nslots = 0; | 388 | int nslots = 0; |
389 | #if LJ_64 | 389 | #if LJ_64 |
390 | if (LJ_ABI_WIN) { | 390 | if (LJ_ABI_WIN) { |
@@ -417,7 +417,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
417 | /* Generate a call to a C function. */ | 417 | /* Generate a call to a C function. */ |
418 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | 418 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) |
419 | { | 419 | { |
420 | uint32_t n, nargs = CCI_NARGS(ci); | 420 | uint32_t n, nargs = CCI_XNARGS(ci); |
421 | int32_t ofs = STACKARG_OFS; | 421 | int32_t ofs = STACKARG_OFS; |
422 | #if LJ_64 | 422 | #if LJ_64 |
423 | uint32_t gprs = REGARG_GPRS; | 423 | uint32_t gprs = REGARG_GPRS; |
@@ -552,7 +552,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
552 | if (ra_hasreg(dest)) { | 552 | if (ra_hasreg(dest)) { |
553 | ra_free(as, dest); | 553 | ra_free(as, dest); |
554 | ra_modified(as, dest); | 554 | ra_modified(as, dest); |
555 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, | 555 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, |
556 | dest, RID_ESP, ofs); | 556 | dest, RID_ESP, ofs); |
557 | } | 557 | } |
558 | if ((ci->flags & CCI_CASTU64)) { | 558 | if ((ci->flags & CCI_CASTU64)) { |
@@ -576,15 +576,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
576 | } | 576 | } |
577 | } | 577 | } |
578 | 578 | ||
579 | static void asm_call(ASMState *as, IRIns *ir) | ||
580 | { | ||
581 | IRRef args[CCI_NARGS_MAX]; | ||
582 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
583 | asm_collectargs(as, ir, ci, args); | ||
584 | asm_setupresult(as, ir, ci); | ||
585 | asm_gencall(as, ci, args); | ||
586 | } | ||
587 | |||
588 | /* Return a constant function pointer or NULL for indirect calls. */ | 579 | /* Return a constant function pointer or NULL for indirect calls. */ |
589 | static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) | 580 | static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) |
590 | { | 581 | { |
@@ -664,8 +655,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | |||
664 | asm_guardcc(as, CC_NE); | 655 | asm_guardcc(as, CC_NE); |
665 | emit_rr(as, XO_UCOMISD, left, tmp); | 656 | emit_rr(as, XO_UCOMISD, left, tmp); |
666 | emit_rr(as, XO_CVTSI2SD, tmp, dest); | 657 | emit_rr(as, XO_CVTSI2SD, tmp, dest); |
667 | if (!(as->flags & JIT_F_SPLIT_XMM)) | 658 | emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ |
668 | emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ | ||
669 | emit_rr(as, XO_CVTTSD2SI, dest, left); | 659 | emit_rr(as, XO_CVTTSD2SI, dest, left); |
670 | /* Can't fuse since left is needed twice. */ | 660 | /* Can't fuse since left is needed twice. */ |
671 | } | 661 | } |
@@ -721,8 +711,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
721 | emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, | 711 | emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, |
722 | dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); | 712 | dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); |
723 | } | 713 | } |
724 | if (!(as->flags & JIT_F_SPLIT_XMM)) | 714 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ |
725 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ | ||
726 | } else if (stfp) { /* FP to integer conversion. */ | 715 | } else if (stfp) { /* FP to integer conversion. */ |
727 | if (irt_isguard(ir->t)) { | 716 | if (irt_isguard(ir->t)) { |
728 | /* Checked conversions are only supported from number to int. */ | 717 | /* Checked conversions are only supported from number to int. */ |
@@ -730,9 +719,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
730 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | 719 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); |
731 | } else { | 720 | } else { |
732 | Reg dest = ra_dest(as, ir, RSET_GPR); | 721 | Reg dest = ra_dest(as, ir, RSET_GPR); |
733 | x86Op op = st == IRT_NUM ? | 722 | x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI; |
734 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) : | ||
735 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI); | ||
736 | if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { | 723 | if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { |
737 | /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ | 724 | /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ |
738 | /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ | 725 | /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ |
@@ -826,8 +813,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir) | |||
826 | if (ra_hasreg(dest)) { | 813 | if (ra_hasreg(dest)) { |
827 | ra_free(as, dest); | 814 | ra_free(as, dest); |
828 | ra_modified(as, dest); | 815 | ra_modified(as, dest); |
829 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, | 816 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs); |
830 | dest, RID_ESP, ofs); | ||
831 | } | 817 | } |
832 | emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, | 818 | emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, |
833 | irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); | 819 | irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); |
@@ -855,7 +841,6 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | |||
855 | Reg lo, hi; | 841 | Reg lo, hi; |
856 | lua_assert(st == IRT_NUM || st == IRT_FLOAT); | 842 | lua_assert(st == IRT_NUM || st == IRT_FLOAT); |
857 | lua_assert(dt == IRT_I64 || dt == IRT_U64); | 843 | lua_assert(dt == IRT_I64 || dt == IRT_U64); |
858 | lua_assert(((ir-1)->op2 & IRCONV_TRUNC)); | ||
859 | hi = ra_dest(as, ir, RSET_GPR); | 844 | hi = ra_dest(as, ir, RSET_GPR); |
860 | lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); | 845 | lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); |
861 | if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); | 846 | if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); |
@@ -898,6 +883,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | |||
898 | st == IRT_NUM ? XOg_FLDq: XOg_FLDd, | 883 | st == IRT_NUM ? XOg_FLDq: XOg_FLDd, |
899 | asm_fuseload(as, ir->op1, RSET_EMPTY)); | 884 | asm_fuseload(as, ir->op1, RSET_EMPTY)); |
900 | } | 885 | } |
886 | |||
887 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
888 | { | ||
889 | if (irt_isfp(ir->t)) | ||
890 | asm_conv_fp_int64(as, ir); | ||
891 | else | ||
892 | asm_conv_int64_fp(as, ir); | ||
893 | } | ||
901 | #endif | 894 | #endif |
902 | 895 | ||
903 | static void asm_strto(ASMState *as, IRIns *ir) | 896 | static void asm_strto(ASMState *as, IRIns *ir) |
@@ -919,29 +912,32 @@ static void asm_strto(ASMState *as, IRIns *ir) | |||
919 | RID_ESP, sps_scale(ir->s)); | 912 | RID_ESP, sps_scale(ir->s)); |
920 | } | 913 | } |
921 | 914 | ||
922 | static void asm_tostr(ASMState *as, IRIns *ir) | 915 | /* -- Memory references --------------------------------------------------- */ |
916 | |||
917 | /* Get pointer to TValue. */ | ||
918 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | ||
923 | { | 919 | { |
924 | IRIns *irl = IR(ir->op1); | 920 | IRIns *ir = IR(ref); |
925 | IRRef args[2]; | 921 | if (irt_isnum(ir->t)) { |
926 | args[0] = ASMREF_L; | 922 | /* For numbers use the constant itself or a spill slot as a TValue. */ |
927 | as->gcsteps++; | 923 | if (irref_isk(ref)) |
928 | if (irt_isnum(irl->t)) { | 924 | emit_loada(as, dest, ir_knum(ir)); |
929 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; | 925 | else |
930 | args[1] = ASMREF_TMP1; /* const lua_Number * */ | 926 | emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir)); |
931 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
932 | asm_gencall(as, ci, args); | ||
933 | emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64, | ||
934 | RID_ESP, ra_spill(as, irl)); | ||
935 | } else { | 927 | } else { |
936 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; | 928 | /* Otherwise use g->tmptv to hold the TValue. */ |
937 | args[1] = ir->op1; /* int32_t k */ | 929 | if (!irref_isk(ref)) { |
938 | asm_setupresult(as, ir, ci); /* GCstr * */ | 930 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); |
939 | asm_gencall(as, ci, args); | 931 | emit_movtomro(as, REX_64IR(ir, src), dest, 0); |
932 | } else if (!irt_ispri(ir->t)) { | ||
933 | emit_movmroi(as, dest, 0, ir->i); | ||
934 | } | ||
935 | if (!(LJ_64 && irt_islightud(ir->t))) | ||
936 | emit_movmroi(as, dest, 4, irt_toitype(ir->t)); | ||
937 | emit_loada(as, dest, &J2G(as->J)->tmptv); | ||
940 | } | 938 | } |
941 | } | 939 | } |
942 | 940 | ||
943 | /* -- Memory references --------------------------------------------------- */ | ||
944 | |||
945 | static void asm_aref(ASMState *as, IRIns *ir) | 941 | static void asm_aref(ASMState *as, IRIns *ir) |
946 | { | 942 | { |
947 | Reg dest = ra_dest(as, ir, RSET_GPR); | 943 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -952,23 +948,6 @@ static void asm_aref(ASMState *as, IRIns *ir) | |||
952 | emit_rr(as, XO_MOV, dest, as->mrm.base); | 948 | emit_rr(as, XO_MOV, dest, as->mrm.base); |
953 | } | 949 | } |
954 | 950 | ||
955 | /* Merge NE(HREF, niltv) check. */ | ||
956 | static MCode *merge_href_niltv(ASMState *as, IRIns *ir) | ||
957 | { | ||
958 | /* Assumes nothing else generates NE of HREF. */ | ||
959 | if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins && | ||
960 | ra_hasreg(ir->r)) { | ||
961 | MCode *p = as->mcp; | ||
962 | p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6; | ||
963 | /* Ensure no loop branch inversion happened. */ | ||
964 | if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) { | ||
965 | as->mcp = p; /* Kill cmp reg, imm32 + jz exit. */ | ||
966 | return p + *(int32_t *)(p-4); /* Return exit address. */ | ||
967 | } | ||
968 | } | ||
969 | return NULL; | ||
970 | } | ||
971 | |||
972 | /* Inlined hash lookup. Specialized for key type and for const keys. | 951 | /* Inlined hash lookup. Specialized for key type and for const keys. |
973 | ** The equivalent C code is: | 952 | ** The equivalent C code is: |
974 | ** Node *n = hashkey(t, key); | 953 | ** Node *n = hashkey(t, key); |
@@ -977,10 +956,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir) | |||
977 | ** } while ((n = nextnode(n))); | 956 | ** } while ((n = nextnode(n))); |
978 | ** return niltv(L); | 957 | ** return niltv(L); |
979 | */ | 958 | */ |
980 | static void asm_href(ASMState *as, IRIns *ir) | 959 | static void asm_href(ASMState *as, IRIns *ir, IROp merge) |
981 | { | 960 | { |
982 | MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */ | ||
983 | RegSet allow = RSET_GPR; | 961 | RegSet allow = RSET_GPR; |
962 | int destused = ra_used(ir); | ||
984 | Reg dest = ra_dest(as, ir, allow); | 963 | Reg dest = ra_dest(as, ir, allow); |
985 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); | 964 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); |
986 | Reg key = RID_NONE, tmp = RID_NONE; | 965 | Reg key = RID_NONE, tmp = RID_NONE; |
@@ -997,14 +976,12 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
997 | tmp = ra_scratch(as, rset_exclude(allow, key)); | 976 | tmp = ra_scratch(as, rset_exclude(allow, key)); |
998 | } | 977 | } |
999 | 978 | ||
1000 | /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */ | 979 | /* Key not found in chain: jump to exit (if merged) or load niltv. */ |
1001 | l_end = emit_label(as); | 980 | l_end = emit_label(as); |
1002 | if (nilexit && ir[1].o == IR_NE) { | 981 | if (merge == IR_NE) |
1003 | emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */ | 982 | asm_guardcc(as, CC_E); /* XI_JMP is not found by lj_asm_patchexit. */ |
1004 | nilexit = NULL; | 983 | else if (destused) |
1005 | } else { | ||
1006 | emit_loada(as, dest, niltvg(J2G(as->J))); | 984 | emit_loada(as, dest, niltvg(J2G(as->J))); |
1007 | } | ||
1008 | 985 | ||
1009 | /* Follow hash chain until the end. */ | 986 | /* Follow hash chain until the end. */ |
1010 | l_loop = emit_sjcc_label(as, CC_NZ); | 987 | l_loop = emit_sjcc_label(as, CC_NZ); |
@@ -1013,8 +990,8 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
1013 | l_next = emit_label(as); | 990 | l_next = emit_label(as); |
1014 | 991 | ||
1015 | /* Type and value comparison. */ | 992 | /* Type and value comparison. */ |
1016 | if (nilexit) | 993 | if (merge == IR_EQ) |
1017 | emit_jcc(as, CC_E, nilexit); | 994 | asm_guardcc(as, CC_E); |
1018 | else | 995 | else |
1019 | emit_sjcc(as, CC_E, l_end); | 996 | emit_sjcc(as, CC_E, l_end); |
1020 | if (irt_isnum(kt)) { | 997 | if (irt_isnum(kt)) { |
@@ -1170,41 +1147,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
1170 | #endif | 1147 | #endif |
1171 | } | 1148 | } |
1172 | 1149 | ||
1173 | static void asm_newref(ASMState *as, IRIns *ir) | ||
1174 | { | ||
1175 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
1176 | IRRef args[3]; | ||
1177 | IRIns *irkey; | ||
1178 | Reg tmp; | ||
1179 | if (ir->r == RID_SINK) | ||
1180 | return; | ||
1181 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1182 | args[1] = ir->op1; /* GCtab *t */ | ||
1183 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
1184 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
1185 | asm_gencall(as, ci, args); | ||
1186 | tmp = ra_releasetmp(as, ASMREF_TMP1); | ||
1187 | irkey = IR(ir->op2); | ||
1188 | if (irt_isnum(irkey->t)) { | ||
1189 | /* For numbers use the constant itself or a spill slot as a TValue. */ | ||
1190 | if (irref_isk(ir->op2)) | ||
1191 | emit_loada(as, tmp, ir_knum(irkey)); | ||
1192 | else | ||
1193 | emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey)); | ||
1194 | } else { | ||
1195 | /* Otherwise use g->tmptv to hold the TValue. */ | ||
1196 | if (!irref_isk(ir->op2)) { | ||
1197 | Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp)); | ||
1198 | emit_movtomro(as, REX_64IR(irkey, src), tmp, 0); | ||
1199 | } else if (!irt_ispri(irkey->t)) { | ||
1200 | emit_movmroi(as, tmp, 0, irkey->i); | ||
1201 | } | ||
1202 | if (!(LJ_64 && irt_islightud(irkey->t))) | ||
1203 | emit_movmroi(as, tmp, 4, irt_toitype(irkey->t)); | ||
1204 | emit_loada(as, tmp, &J2G(as->J)->tmptv); | ||
1205 | } | ||
1206 | } | ||
1207 | |||
1208 | static void asm_uref(ASMState *as, IRIns *ir) | 1150 | static void asm_uref(ASMState *as, IRIns *ir) |
1209 | { | 1151 | { |
1210 | /* NYI: Check that UREFO is still open and not aliasing a slot. */ | 1152 | /* NYI: Check that UREFO is still open and not aliasing a slot. */ |
@@ -1264,7 +1206,7 @@ static void asm_fxload(ASMState *as, IRIns *ir) | |||
1264 | case IRT_U8: xo = XO_MOVZXb; break; | 1206 | case IRT_U8: xo = XO_MOVZXb; break; |
1265 | case IRT_I16: xo = XO_MOVSXw; break; | 1207 | case IRT_I16: xo = XO_MOVSXw; break; |
1266 | case IRT_U16: xo = XO_MOVZXw; break; | 1208 | case IRT_U16: xo = XO_MOVZXw; break; |
1267 | case IRT_NUM: xo = XMM_MOVRM(as); break; | 1209 | case IRT_NUM: xo = XO_MOVSD; break; |
1268 | case IRT_FLOAT: xo = XO_MOVSS; break; | 1210 | case IRT_FLOAT: xo = XO_MOVSS; break; |
1269 | default: | 1211 | default: |
1270 | if (LJ_64 && irt_is64(ir->t)) | 1212 | if (LJ_64 && irt_is64(ir->t)) |
@@ -1277,6 +1219,9 @@ static void asm_fxload(ASMState *as, IRIns *ir) | |||
1277 | emit_mrm(as, xo, dest, RID_MRM); | 1219 | emit_mrm(as, xo, dest, RID_MRM); |
1278 | } | 1220 | } |
1279 | 1221 | ||
1222 | #define asm_fload(as, ir) asm_fxload(as, ir) | ||
1223 | #define asm_xload(as, ir) asm_fxload(as, ir) | ||
1224 | |||
1280 | static void asm_fxstore(ASMState *as, IRIns *ir) | 1225 | static void asm_fxstore(ASMState *as, IRIns *ir) |
1281 | { | 1226 | { |
1282 | RegSet allow = RSET_GPR; | 1227 | RegSet allow = RSET_GPR; |
@@ -1340,6 +1285,9 @@ static void asm_fxstore(ASMState *as, IRIns *ir) | |||
1340 | } | 1285 | } |
1341 | } | 1286 | } |
1342 | 1287 | ||
1288 | #define asm_fstore(as, ir) asm_fxstore(as, ir) | ||
1289 | #define asm_xstore(as, ir) asm_fxstore(as, ir) | ||
1290 | |||
1343 | #if LJ_64 | 1291 | #if LJ_64 |
1344 | static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) | 1292 | static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) |
1345 | { | 1293 | { |
@@ -1378,7 +1326,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
1378 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; | 1326 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; |
1379 | Reg dest = ra_dest(as, ir, allow); | 1327 | Reg dest = ra_dest(as, ir, allow); |
1380 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1328 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
1381 | emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); | 1329 | emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM); |
1382 | } else { | 1330 | } else { |
1383 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1331 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
1384 | } | 1332 | } |
@@ -1444,7 +1392,7 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1444 | Reg left = ra_scratch(as, RSET_FPR); | 1392 | Reg left = ra_scratch(as, RSET_FPR); |
1445 | asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ | 1393 | asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ |
1446 | base = ra_alloc1(as, REF_BASE, RSET_GPR); | 1394 | base = ra_alloc1(as, REF_BASE, RSET_GPR); |
1447 | emit_rmro(as, XMM_MOVRM(as), left, base, ofs); | 1395 | emit_rmro(as, XO_MOVSD, left, base, ofs); |
1448 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | 1396 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ |
1449 | #if LJ_64 | 1397 | #if LJ_64 |
1450 | } else if (irt_islightud(t)) { | 1398 | } else if (irt_islightud(t)) { |
@@ -1462,11 +1410,9 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1462 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 1410 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); |
1463 | if ((ir->op2 & IRSLOAD_CONVERT)) { | 1411 | if ((ir->op2 & IRSLOAD_CONVERT)) { |
1464 | t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ | 1412 | t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ |
1465 | emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); | 1413 | emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs); |
1466 | } else if (irt_isnum(t)) { | ||
1467 | emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); | ||
1468 | } else { | 1414 | } else { |
1469 | emit_rmro(as, XO_MOV, dest, base, ofs); | 1415 | emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs); |
1470 | } | 1416 | } |
1471 | } else { | 1417 | } else { |
1472 | if (!(ir->op2 & IRSLOAD_TYPECHECK)) | 1418 | if (!(ir->op2 & IRSLOAD_TYPECHECK)) |
@@ -1493,15 +1439,13 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1493 | static void asm_cnew(ASMState *as, IRIns *ir) | 1439 | static void asm_cnew(ASMState *as, IRIns *ir) |
1494 | { | 1440 | { |
1495 | CTState *cts = ctype_ctsG(J2G(as->J)); | 1441 | CTState *cts = ctype_ctsG(J2G(as->J)); |
1496 | CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; | 1442 | CTypeID id = (CTypeID)IR(ir->op1)->i; |
1497 | CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? | 1443 | CTSize sz; |
1498 | lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; | 1444 | CTInfo info = lj_ctype_info(cts, id, &sz); |
1499 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | 1445 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; |
1500 | IRRef args[2]; | 1446 | IRRef args[4]; |
1501 | lua_assert(sz != CTSIZE_INVALID); | 1447 | lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); |
1502 | 1448 | ||
1503 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1504 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1505 | as->gcsteps++; | 1449 | as->gcsteps++; |
1506 | asm_setupresult(as, ir, ci); /* GCcdata * */ | 1450 | asm_setupresult(as, ir, ci); /* GCcdata * */ |
1507 | 1451 | ||
@@ -1544,15 +1488,26 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1544 | } while (1); | 1488 | } while (1); |
1545 | #endif | 1489 | #endif |
1546 | lua_assert(sz == 4 || sz == 8); | 1490 | lua_assert(sz == 4 || sz == 8); |
1491 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1492 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1493 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1494 | args[1] = ir->op1; /* CTypeID id */ | ||
1495 | args[2] = ir->op2; /* CTSize sz */ | ||
1496 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1497 | asm_gencall(as, ci, args); | ||
1498 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1499 | return; | ||
1547 | } | 1500 | } |
1548 | 1501 | ||
1549 | /* Combine initialization of marked, gct and ctypeid. */ | 1502 | /* Combine initialization of marked, gct and ctypeid. */ |
1550 | emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); | 1503 | emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); |
1551 | emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, | 1504 | emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, |
1552 | (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16))); | 1505 | (int32_t)((~LJ_TCDATA<<8)+(id<<16))); |
1553 | emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); | 1506 | emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); |
1554 | emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); | 1507 | emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); |
1555 | 1508 | ||
1509 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1510 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1556 | asm_gencall(as, ci, args); | 1511 | asm_gencall(as, ci, args); |
1557 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); | 1512 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); |
1558 | } | 1513 | } |
@@ -1630,31 +1585,21 @@ static void asm_x87load(ASMState *as, IRRef ref) | |||
1630 | } | 1585 | } |
1631 | } | 1586 | } |
1632 | 1587 | ||
1633 | /* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */ | 1588 | static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) |
1634 | static int fpmjoin_pow(ASMState *as, IRIns *ir) | ||
1635 | { | 1589 | { |
1636 | IRIns *irp = IR(ir->op1); | 1590 | /* The modified regs must match with the *.dasc implementation. */ |
1637 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | 1591 | RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); |
1638 | IRIns *irpp = IR(irp->op1); | 1592 | IRIns *irx; |
1639 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | 1593 | if (ra_hasreg(ir->r)) |
1640 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | 1594 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
1641 | /* The modified regs must match with the *.dasc implementation. */ | 1595 | ra_evictset(as, drop); |
1642 | RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); | 1596 | ra_destreg(as, ir, RID_XMM0); |
1643 | IRIns *irx; | 1597 | emit_call(as, lj_vm_pow_sse); |
1644 | if (ra_hasreg(ir->r)) | 1598 | irx = IR(lref); |
1645 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 1599 | if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1) |
1646 | ra_evictset(as, drop); | 1600 | irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */ |
1647 | ra_destreg(as, ir, RID_XMM0); | 1601 | ra_left(as, RID_XMM0, lref); |
1648 | emit_call(as, lj_vm_pow_sse); | 1602 | ra_left(as, RID_XMM1, rref); |
1649 | irx = IR(irpp->op1); | ||
1650 | if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1) | ||
1651 | irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */ | ||
1652 | ra_left(as, RID_XMM0, irpp->op1); | ||
1653 | ra_left(as, RID_XMM1, irp->op2); | ||
1654 | return 1; | ||
1655 | } | ||
1656 | } | ||
1657 | return 0; | ||
1658 | } | 1603 | } |
1659 | 1604 | ||
1660 | static void asm_fpmath(ASMState *as, IRIns *ir) | 1605 | static void asm_fpmath(ASMState *as, IRIns *ir) |
@@ -1690,7 +1635,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir) | |||
1690 | fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); | 1635 | fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); |
1691 | ra_left(as, RID_XMM0, ir->op1); | 1636 | ra_left(as, RID_XMM0, ir->op1); |
1692 | } | 1637 | } |
1693 | } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { | 1638 | } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { |
1694 | /* Rejoined to pow(). */ | 1639 | /* Rejoined to pow(). */ |
1695 | } else { /* Handle x87 ops. */ | 1640 | } else { /* Handle x87 ops. */ |
1696 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | 1641 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ |
@@ -1698,7 +1643,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir) | |||
1698 | if (ra_hasreg(dest)) { | 1643 | if (ra_hasreg(dest)) { |
1699 | ra_free(as, dest); | 1644 | ra_free(as, dest); |
1700 | ra_modified(as, dest); | 1645 | ra_modified(as, dest); |
1701 | emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); | 1646 | emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs); |
1702 | } | 1647 | } |
1703 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); | 1648 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); |
1704 | switch (fpm) { /* st0 = lj_vm_*(st0) */ | 1649 | switch (fpm) { /* st0 = lj_vm_*(st0) */ |
@@ -1737,6 +1682,9 @@ static void asm_fpmath(ASMState *as, IRIns *ir) | |||
1737 | } | 1682 | } |
1738 | } | 1683 | } |
1739 | 1684 | ||
1685 | #define asm_atan2(as, ir) asm_fpmath(as, ir) | ||
1686 | #define asm_ldexp(as, ir) asm_fpmath(as, ir) | ||
1687 | |||
1740 | static void asm_fppowi(ASMState *as, IRIns *ir) | 1688 | static void asm_fppowi(ASMState *as, IRIns *ir) |
1741 | { | 1689 | { |
1742 | /* The modified regs must match with the *.dasc implementation. */ | 1690 | /* The modified regs must match with the *.dasc implementation. */ |
@@ -1750,26 +1698,15 @@ static void asm_fppowi(ASMState *as, IRIns *ir) | |||
1750 | ra_left(as, RID_EAX, ir->op2); | 1698 | ra_left(as, RID_EAX, ir->op2); |
1751 | } | 1699 | } |
1752 | 1700 | ||
1753 | #if LJ_64 && LJ_HASFFI | 1701 | static void asm_pow(ASMState *as, IRIns *ir) |
1754 | static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id) | ||
1755 | { | 1702 | { |
1756 | const CCallInfo *ci = &lj_ir_callinfo[id]; | 1703 | #if LJ_64 && LJ_HASFFI |
1757 | IRRef args[2]; | 1704 | if (!irt_isnum(ir->t)) |
1758 | args[0] = ir->op1; | 1705 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : |
1759 | args[1] = ir->op2; | 1706 | IRCALL_lj_carith_powu64); |
1760 | asm_setupresult(as, ir, ci); | 1707 | else |
1761 | asm_gencall(as, ci, args); | ||
1762 | } | ||
1763 | #endif | 1708 | #endif |
1764 | 1709 | asm_fppowi(as, ir); | |
1765 | static void asm_intmod(ASMState *as, IRIns *ir) | ||
1766 | { | ||
1767 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi]; | ||
1768 | IRRef args[2]; | ||
1769 | args[0] = ir->op1; | ||
1770 | args[1] = ir->op2; | ||
1771 | asm_setupresult(as, ir, ci); | ||
1772 | asm_gencall(as, ci, args); | ||
1773 | } | 1710 | } |
1774 | 1711 | ||
1775 | static int asm_swapops(ASMState *as, IRIns *ir) | 1712 | static int asm_swapops(ASMState *as, IRIns *ir) |
@@ -1948,6 +1885,44 @@ static void asm_add(ASMState *as, IRIns *ir) | |||
1948 | asm_intarith(as, ir, XOg_ADD); | 1885 | asm_intarith(as, ir, XOg_ADD); |
1949 | } | 1886 | } |
1950 | 1887 | ||
1888 | static void asm_sub(ASMState *as, IRIns *ir) | ||
1889 | { | ||
1890 | if (irt_isnum(ir->t)) | ||
1891 | asm_fparith(as, ir, XO_SUBSD); | ||
1892 | else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ | ||
1893 | asm_intarith(as, ir, XOg_SUB); | ||
1894 | } | ||
1895 | |||
1896 | static void asm_mul(ASMState *as, IRIns *ir) | ||
1897 | { | ||
1898 | if (irt_isnum(ir->t)) | ||
1899 | asm_fparith(as, ir, XO_MULSD); | ||
1900 | else | ||
1901 | asm_intarith(as, ir, XOg_X_IMUL); | ||
1902 | } | ||
1903 | |||
1904 | static void asm_div(ASMState *as, IRIns *ir) | ||
1905 | { | ||
1906 | #if LJ_64 && LJ_HASFFI | ||
1907 | if (!irt_isnum(ir->t)) | ||
1908 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : | ||
1909 | IRCALL_lj_carith_divu64); | ||
1910 | else | ||
1911 | #endif | ||
1912 | asm_fparith(as, ir, XO_DIVSD); | ||
1913 | } | ||
1914 | |||
1915 | static void asm_mod(ASMState *as, IRIns *ir) | ||
1916 | { | ||
1917 | #if LJ_64 && LJ_HASFFI | ||
1918 | if (!irt_isint(ir->t)) | ||
1919 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : | ||
1920 | IRCALL_lj_carith_modu64); | ||
1921 | else | ||
1922 | #endif | ||
1923 | asm_callid(as, ir, IRCALL_lj_vm_modi); | ||
1924 | } | ||
1925 | |||
1951 | static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) | 1926 | static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) |
1952 | { | 1927 | { |
1953 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1928 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -1955,7 +1930,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) | |||
1955 | ra_left(as, dest, ir->op1); | 1930 | ra_left(as, dest, ir->op1); |
1956 | } | 1931 | } |
1957 | 1932 | ||
1958 | static void asm_min_max(ASMState *as, IRIns *ir, int cc) | 1933 | static void asm_neg(ASMState *as, IRIns *ir) |
1934 | { | ||
1935 | if (irt_isnum(ir->t)) | ||
1936 | asm_fparith(as, ir, XO_XORPS); | ||
1937 | else | ||
1938 | asm_neg_not(as, ir, XOg_NEG); | ||
1939 | } | ||
1940 | |||
1941 | #define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS) | ||
1942 | |||
1943 | static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) | ||
1959 | { | 1944 | { |
1960 | Reg right, dest = ra_dest(as, ir, RSET_GPR); | 1945 | Reg right, dest = ra_dest(as, ir, RSET_GPR); |
1961 | IRRef lref = ir->op1, rref = ir->op2; | 1946 | IRRef lref = ir->op1, rref = ir->op2; |
@@ -1966,7 +1951,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc) | |||
1966 | ra_left(as, dest, lref); | 1951 | ra_left(as, dest, lref); |
1967 | } | 1952 | } |
1968 | 1953 | ||
1969 | static void asm_bitswap(ASMState *as, IRIns *ir) | 1954 | static void asm_min(ASMState *as, IRIns *ir) |
1955 | { | ||
1956 | if (irt_isnum(ir->t)) | ||
1957 | asm_fparith(as, ir, XO_MINSD); | ||
1958 | else | ||
1959 | asm_intmin_max(as, ir, CC_G); | ||
1960 | } | ||
1961 | |||
1962 | static void asm_max(ASMState *as, IRIns *ir) | ||
1963 | { | ||
1964 | if (irt_isnum(ir->t)) | ||
1965 | asm_fparith(as, ir, XO_MAXSD); | ||
1966 | else | ||
1967 | asm_intmin_max(as, ir, CC_L); | ||
1968 | } | ||
1969 | |||
1970 | /* Note: don't use LEA for overflow-checking arithmetic! */ | ||
1971 | #define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD) | ||
1972 | #define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB) | ||
1973 | #define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL) | ||
1974 | |||
1975 | #define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT) | ||
1976 | |||
1977 | static void asm_bswap(ASMState *as, IRIns *ir) | ||
1970 | { | 1978 | { |
1971 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1979 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1972 | as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), | 1980 | as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), |
@@ -1974,6 +1982,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir) | |||
1974 | ra_left(as, dest, ir->op1); | 1982 | ra_left(as, dest, ir->op1); |
1975 | } | 1983 | } |
1976 | 1984 | ||
1985 | #define asm_band(as, ir) asm_intarith(as, ir, XOg_AND) | ||
1986 | #define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR) | ||
1987 | #define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR) | ||
1988 | |||
1977 | static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | 1989 | static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) |
1978 | { | 1990 | { |
1979 | IRRef rref = ir->op2; | 1991 | IRRef rref = ir->op2; |
@@ -2013,6 +2025,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | |||
2013 | */ | 2025 | */ |
2014 | } | 2026 | } |
2015 | 2027 | ||
2028 | #define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL) | ||
2029 | #define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR) | ||
2030 | #define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR) | ||
2031 | #define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL) | ||
2032 | #define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR) | ||
2033 | |||
2016 | /* -- Comparisons --------------------------------------------------------- */ | 2034 | /* -- Comparisons --------------------------------------------------------- */ |
2017 | 2035 | ||
2018 | /* Virtual flags for unordered FP comparisons. */ | 2036 | /* Virtual flags for unordered FP comparisons. */ |
@@ -2039,8 +2057,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = { | |||
2039 | }; | 2057 | }; |
2040 | 2058 | ||
2041 | /* FP and integer comparisons. */ | 2059 | /* FP and integer comparisons. */ |
2042 | static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | 2060 | static void asm_comp(ASMState *as, IRIns *ir) |
2043 | { | 2061 | { |
2062 | uint32_t cc = asm_compmap[ir->o]; | ||
2044 | if (irt_isnum(ir->t)) { | 2063 | if (irt_isnum(ir->t)) { |
2045 | IRRef lref = ir->op1; | 2064 | IRRef lref = ir->op1; |
2046 | IRRef rref = ir->op2; | 2065 | IRRef rref = ir->op2; |
@@ -2195,6 +2214,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | |||
2195 | } | 2214 | } |
2196 | } | 2215 | } |
2197 | 2216 | ||
2217 | #define asm_equal(as, ir) asm_comp(as, ir) | ||
2218 | |||
2198 | #if LJ_32 && LJ_HASFFI | 2219 | #if LJ_32 && LJ_HASFFI |
2199 | /* 64 bit integer comparisons in 32 bit mode. */ | 2220 | /* 64 bit integer comparisons in 32 bit mode. */ |
2200 | static void asm_comp_int64(ASMState *as, IRIns *ir) | 2221 | static void asm_comp_int64(ASMState *as, IRIns *ir) |
@@ -2277,13 +2298,9 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
2277 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | 2298 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ |
2278 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | 2299 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; |
2279 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ | 2300 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ |
2280 | if (usehi || uselo) { | ||
2281 | if (irt_isfp(ir->t)) | ||
2282 | asm_conv_fp_int64(as, ir); | ||
2283 | else | ||
2284 | asm_conv_int64_fp(as, ir); | ||
2285 | } | ||
2286 | as->curins--; /* Always skip the CONV. */ | 2301 | as->curins--; /* Always skip the CONV. */ |
2302 | if (usehi || uselo) | ||
2303 | asm_conv64(as, ir); | ||
2287 | return; | 2304 | return; |
2288 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ | 2305 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ |
2289 | asm_comp_int64(as, ir); | 2306 | asm_comp_int64(as, ir); |
@@ -2332,6 +2349,16 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
2332 | #endif | 2349 | #endif |
2333 | } | 2350 | } |
2334 | 2351 | ||
2352 | /* -- Profiling ----------------------------------------------------------- */ | ||
2353 | |||
2354 | static void asm_prof(ASMState *as, IRIns *ir) | ||
2355 | { | ||
2356 | UNUSED(ir); | ||
2357 | asm_guardcc(as, CC_NE); | ||
2358 | emit_i8(as, HOOK_PROFILE); | ||
2359 | emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask); | ||
2360 | } | ||
2361 | |||
2335 | /* -- Stack handling ------------------------------------------------------ */ | 2362 | /* -- Stack handling ------------------------------------------------------ */ |
2336 | 2363 | ||
2337 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ | 2364 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ |
@@ -2353,7 +2380,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
2353 | emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, | 2380 | emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, |
2354 | ptr2addr(&J2G(as->J)->jit_base)); | 2381 | ptr2addr(&J2G(as->J)->jit_base)); |
2355 | emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); | 2382 | emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); |
2356 | emit_getgl(as, r, jit_L); | 2383 | emit_getgl(as, r, cur_L); |
2357 | if (allow == RSET_EMPTY) /* Spill temp. register. */ | 2384 | if (allow == RSET_EMPTY) /* Spill temp. register. */ |
2358 | emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); | 2385 | emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); |
2359 | } | 2386 | } |
@@ -2581,163 +2608,6 @@ static void asm_tail_prep(ASMState *as) | |||
2581 | } | 2608 | } |
2582 | } | 2609 | } |
2583 | 2610 | ||
2584 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
2585 | |||
2586 | /* Assemble a single instruction. */ | ||
2587 | static void asm_ir(ASMState *as, IRIns *ir) | ||
2588 | { | ||
2589 | switch ((IROp)ir->o) { | ||
2590 | /* Miscellaneous ops. */ | ||
2591 | case IR_LOOP: asm_loop(as); break; | ||
2592 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
2593 | case IR_USE: | ||
2594 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
2595 | case IR_PHI: asm_phi(as, ir); break; | ||
2596 | case IR_HIOP: asm_hiop(as, ir); break; | ||
2597 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
2598 | |||
2599 | /* Guarded assertions. */ | ||
2600 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
2601 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
2602 | case IR_EQ: case IR_NE: case IR_ABC: | ||
2603 | asm_comp(as, ir, asm_compmap[ir->o]); | ||
2604 | break; | ||
2605 | |||
2606 | case IR_RETF: asm_retf(as, ir); break; | ||
2607 | |||
2608 | /* Bit ops. */ | ||
2609 | case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break; | ||
2610 | case IR_BSWAP: asm_bitswap(as, ir); break; | ||
2611 | |||
2612 | case IR_BAND: asm_intarith(as, ir, XOg_AND); break; | ||
2613 | case IR_BOR: asm_intarith(as, ir, XOg_OR); break; | ||
2614 | case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break; | ||
2615 | |||
2616 | case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break; | ||
2617 | case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break; | ||
2618 | case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break; | ||
2619 | case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break; | ||
2620 | case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break; | ||
2621 | |||
2622 | /* Arithmetic ops. */ | ||
2623 | case IR_ADD: asm_add(as, ir); break; | ||
2624 | case IR_SUB: | ||
2625 | if (irt_isnum(ir->t)) | ||
2626 | asm_fparith(as, ir, XO_SUBSD); | ||
2627 | else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ | ||
2628 | asm_intarith(as, ir, XOg_SUB); | ||
2629 | break; | ||
2630 | case IR_MUL: | ||
2631 | if (irt_isnum(ir->t)) | ||
2632 | asm_fparith(as, ir, XO_MULSD); | ||
2633 | else | ||
2634 | asm_intarith(as, ir, XOg_X_IMUL); | ||
2635 | break; | ||
2636 | case IR_DIV: | ||
2637 | #if LJ_64 && LJ_HASFFI | ||
2638 | if (!irt_isnum(ir->t)) | ||
2639 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : | ||
2640 | IRCALL_lj_carith_divu64); | ||
2641 | else | ||
2642 | #endif | ||
2643 | asm_fparith(as, ir, XO_DIVSD); | ||
2644 | break; | ||
2645 | case IR_MOD: | ||
2646 | #if LJ_64 && LJ_HASFFI | ||
2647 | if (!irt_isint(ir->t)) | ||
2648 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : | ||
2649 | IRCALL_lj_carith_modu64); | ||
2650 | else | ||
2651 | #endif | ||
2652 | asm_intmod(as, ir); | ||
2653 | break; | ||
2654 | |||
2655 | case IR_NEG: | ||
2656 | if (irt_isnum(ir->t)) | ||
2657 | asm_fparith(as, ir, XO_XORPS); | ||
2658 | else | ||
2659 | asm_neg_not(as, ir, XOg_NEG); | ||
2660 | break; | ||
2661 | case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break; | ||
2662 | |||
2663 | case IR_MIN: | ||
2664 | if (irt_isnum(ir->t)) | ||
2665 | asm_fparith(as, ir, XO_MINSD); | ||
2666 | else | ||
2667 | asm_min_max(as, ir, CC_G); | ||
2668 | break; | ||
2669 | case IR_MAX: | ||
2670 | if (irt_isnum(ir->t)) | ||
2671 | asm_fparith(as, ir, XO_MAXSD); | ||
2672 | else | ||
2673 | asm_min_max(as, ir, CC_L); | ||
2674 | break; | ||
2675 | |||
2676 | case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: | ||
2677 | asm_fpmath(as, ir); | ||
2678 | break; | ||
2679 | case IR_POW: | ||
2680 | #if LJ_64 && LJ_HASFFI | ||
2681 | if (!irt_isnum(ir->t)) | ||
2682 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
2683 | IRCALL_lj_carith_powu64); | ||
2684 | else | ||
2685 | #endif | ||
2686 | asm_fppowi(as, ir); | ||
2687 | break; | ||
2688 | |||
2689 | /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ | ||
2690 | case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; | ||
2691 | case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break; | ||
2692 | case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break; | ||
2693 | |||
2694 | /* Memory references. */ | ||
2695 | case IR_AREF: asm_aref(as, ir); break; | ||
2696 | case IR_HREF: asm_href(as, ir); break; | ||
2697 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
2698 | case IR_NEWREF: asm_newref(as, ir); break; | ||
2699 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
2700 | case IR_FREF: asm_fref(as, ir); break; | ||
2701 | case IR_STRREF: asm_strref(as, ir); break; | ||
2702 | |||
2703 | /* Loads and stores. */ | ||
2704 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
2705 | asm_ahuvload(as, ir); | ||
2706 | break; | ||
2707 | case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break; | ||
2708 | case IR_SLOAD: asm_sload(as, ir); break; | ||
2709 | |||
2710 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
2711 | case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break; | ||
2712 | |||
2713 | /* Allocations. */ | ||
2714 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
2715 | case IR_TNEW: asm_tnew(as, ir); break; | ||
2716 | case IR_TDUP: asm_tdup(as, ir); break; | ||
2717 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
2718 | |||
2719 | /* Write barriers. */ | ||
2720 | case IR_TBAR: asm_tbar(as, ir); break; | ||
2721 | case IR_OBAR: asm_obar(as, ir); break; | ||
2722 | |||
2723 | /* Type conversions. */ | ||
2724 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
2725 | case IR_CONV: asm_conv(as, ir); break; | ||
2726 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
2727 | case IR_STRTO: asm_strto(as, ir); break; | ||
2728 | |||
2729 | /* Calls. */ | ||
2730 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
2731 | case IR_CALLXS: asm_callx(as, ir); break; | ||
2732 | case IR_CARG: break; | ||
2733 | |||
2734 | default: | ||
2735 | setintV(&as->J->errinfo, ir->o); | ||
2736 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
2737 | break; | ||
2738 | } | ||
2739 | } | ||
2740 | |||
2741 | /* -- Trace setup --------------------------------------------------------- */ | 2611 | /* -- Trace setup --------------------------------------------------------- */ |
2742 | 2612 | ||
2743 | /* Ensure there are enough stack slots for call arguments. */ | 2613 | /* Ensure there are enough stack slots for call arguments. */ |