diff options
Diffstat (limited to 'src/lj_asm_x86.h')
-rw-r--r-- | src/lj_asm_x86.h | 514 |
1 files changed, 187 insertions, 327 deletions
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 64441ccb..f2f8157d 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
@@ -384,7 +384,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64) | |||
384 | /* Count the required number of stack slots for a call. */ | 384 | /* Count the required number of stack slots for a call. */ |
385 | static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) | 385 | static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) |
386 | { | 386 | { |
387 | uint32_t i, nargs = CCI_NARGS(ci); | 387 | uint32_t i, nargs = CCI_XNARGS(ci); |
388 | int nslots = 0; | 388 | int nslots = 0; |
389 | #if LJ_64 | 389 | #if LJ_64 |
390 | if (LJ_ABI_WIN) { | 390 | if (LJ_ABI_WIN) { |
@@ -417,7 +417,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
417 | /* Generate a call to a C function. */ | 417 | /* Generate a call to a C function. */ |
418 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | 418 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) |
419 | { | 419 | { |
420 | uint32_t n, nargs = CCI_NARGS(ci); | 420 | uint32_t n, nargs = CCI_XNARGS(ci); |
421 | int32_t ofs = STACKARG_OFS; | 421 | int32_t ofs = STACKARG_OFS; |
422 | #if LJ_64 | 422 | #if LJ_64 |
423 | uint32_t gprs = REGARG_GPRS; | 423 | uint32_t gprs = REGARG_GPRS; |
@@ -552,7 +552,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
552 | if (ra_hasreg(dest)) { | 552 | if (ra_hasreg(dest)) { |
553 | ra_free(as, dest); | 553 | ra_free(as, dest); |
554 | ra_modified(as, dest); | 554 | ra_modified(as, dest); |
555 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, | 555 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, |
556 | dest, RID_ESP, ofs); | 556 | dest, RID_ESP, ofs); |
557 | } | 557 | } |
558 | if ((ci->flags & CCI_CASTU64)) { | 558 | if ((ci->flags & CCI_CASTU64)) { |
@@ -576,15 +576,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
576 | } | 576 | } |
577 | } | 577 | } |
578 | 578 | ||
579 | static void asm_call(ASMState *as, IRIns *ir) | ||
580 | { | ||
581 | IRRef args[CCI_NARGS_MAX]; | ||
582 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
583 | asm_collectargs(as, ir, ci, args); | ||
584 | asm_setupresult(as, ir, ci); | ||
585 | asm_gencall(as, ci, args); | ||
586 | } | ||
587 | |||
588 | /* Return a constant function pointer or NULL for indirect calls. */ | 579 | /* Return a constant function pointer or NULL for indirect calls. */ |
589 | static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) | 580 | static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) |
590 | { | 581 | { |
@@ -663,8 +654,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | |||
663 | asm_guardcc(as, CC_NE); | 654 | asm_guardcc(as, CC_NE); |
664 | emit_rr(as, XO_UCOMISD, left, tmp); | 655 | emit_rr(as, XO_UCOMISD, left, tmp); |
665 | emit_rr(as, XO_CVTSI2SD, tmp, dest); | 656 | emit_rr(as, XO_CVTSI2SD, tmp, dest); |
666 | if (!(as->flags & JIT_F_SPLIT_XMM)) | 657 | emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ |
667 | emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ | ||
668 | emit_rr(as, XO_CVTTSD2SI, dest, left); | 658 | emit_rr(as, XO_CVTTSD2SI, dest, left); |
669 | /* Can't fuse since left is needed twice. */ | 659 | /* Can't fuse since left is needed twice. */ |
670 | } | 660 | } |
@@ -720,8 +710,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
720 | emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, | 710 | emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, |
721 | dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); | 711 | dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); |
722 | } | 712 | } |
723 | if (!(as->flags & JIT_F_SPLIT_XMM)) | 713 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ |
724 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ | ||
725 | } else if (stfp) { /* FP to integer conversion. */ | 714 | } else if (stfp) { /* FP to integer conversion. */ |
726 | if (irt_isguard(ir->t)) { | 715 | if (irt_isguard(ir->t)) { |
727 | /* Checked conversions are only supported from number to int. */ | 716 | /* Checked conversions are only supported from number to int. */ |
@@ -729,9 +718,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
729 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | 718 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); |
730 | } else { | 719 | } else { |
731 | Reg dest = ra_dest(as, ir, RSET_GPR); | 720 | Reg dest = ra_dest(as, ir, RSET_GPR); |
732 | x86Op op = st == IRT_NUM ? | 721 | x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI; |
733 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) : | ||
734 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI); | ||
735 | if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { | 722 | if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { |
736 | /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ | 723 | /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ |
737 | /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ | 724 | /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ |
@@ -825,8 +812,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir) | |||
825 | if (ra_hasreg(dest)) { | 812 | if (ra_hasreg(dest)) { |
826 | ra_free(as, dest); | 813 | ra_free(as, dest); |
827 | ra_modified(as, dest); | 814 | ra_modified(as, dest); |
828 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, | 815 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs); |
829 | dest, RID_ESP, ofs); | ||
830 | } | 816 | } |
831 | emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, | 817 | emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, |
832 | irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); | 818 | irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); |
@@ -854,7 +840,6 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | |||
854 | Reg lo, hi; | 840 | Reg lo, hi; |
855 | lua_assert(st == IRT_NUM || st == IRT_FLOAT); | 841 | lua_assert(st == IRT_NUM || st == IRT_FLOAT); |
856 | lua_assert(dt == IRT_I64 || dt == IRT_U64); | 842 | lua_assert(dt == IRT_I64 || dt == IRT_U64); |
857 | lua_assert(((ir-1)->op2 & IRCONV_TRUNC)); | ||
858 | hi = ra_dest(as, ir, RSET_GPR); | 843 | hi = ra_dest(as, ir, RSET_GPR); |
859 | lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); | 844 | lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); |
860 | if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); | 845 | if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); |
@@ -897,6 +882,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | |||
897 | st == IRT_NUM ? XOg_FLDq: XOg_FLDd, | 882 | st == IRT_NUM ? XOg_FLDq: XOg_FLDd, |
898 | asm_fuseload(as, ir->op1, RSET_EMPTY)); | 883 | asm_fuseload(as, ir->op1, RSET_EMPTY)); |
899 | } | 884 | } |
885 | |||
886 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
887 | { | ||
888 | if (irt_isfp(ir->t)) | ||
889 | asm_conv_fp_int64(as, ir); | ||
890 | else | ||
891 | asm_conv_int64_fp(as, ir); | ||
892 | } | ||
900 | #endif | 893 | #endif |
901 | 894 | ||
902 | static void asm_strto(ASMState *as, IRIns *ir) | 895 | static void asm_strto(ASMState *as, IRIns *ir) |
@@ -918,29 +911,32 @@ static void asm_strto(ASMState *as, IRIns *ir) | |||
918 | RID_ESP, sps_scale(ir->s)); | 911 | RID_ESP, sps_scale(ir->s)); |
919 | } | 912 | } |
920 | 913 | ||
921 | static void asm_tostr(ASMState *as, IRIns *ir) | 914 | /* -- Memory references --------------------------------------------------- */ |
915 | |||
916 | /* Get pointer to TValue. */ | ||
917 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | ||
922 | { | 918 | { |
923 | IRIns *irl = IR(ir->op1); | 919 | IRIns *ir = IR(ref); |
924 | IRRef args[2]; | 920 | if (irt_isnum(ir->t)) { |
925 | args[0] = ASMREF_L; | 921 | /* For numbers use the constant itself or a spill slot as a TValue. */ |
926 | as->gcsteps++; | 922 | if (irref_isk(ref)) |
927 | if (irt_isnum(irl->t)) { | 923 | emit_loada(as, dest, ir_knum(ir)); |
928 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; | 924 | else |
929 | args[1] = ASMREF_TMP1; /* const lua_Number * */ | 925 | emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir)); |
930 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
931 | asm_gencall(as, ci, args); | ||
932 | emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64, | ||
933 | RID_ESP, ra_spill(as, irl)); | ||
934 | } else { | 926 | } else { |
935 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; | 927 | /* Otherwise use g->tmptv to hold the TValue. */ |
936 | args[1] = ir->op1; /* int32_t k */ | 928 | if (!irref_isk(ref)) { |
937 | asm_setupresult(as, ir, ci); /* GCstr * */ | 929 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); |
938 | asm_gencall(as, ci, args); | 930 | emit_movtomro(as, REX_64IR(ir, src), dest, 0); |
931 | } else if (!irt_ispri(ir->t)) { | ||
932 | emit_movmroi(as, dest, 0, ir->i); | ||
933 | } | ||
934 | if (!(LJ_64 && irt_islightud(ir->t))) | ||
935 | emit_movmroi(as, dest, 4, irt_toitype(ir->t)); | ||
936 | emit_loada(as, dest, &J2G(as->J)->tmptv); | ||
939 | } | 937 | } |
940 | } | 938 | } |
941 | 939 | ||
942 | /* -- Memory references --------------------------------------------------- */ | ||
943 | |||
944 | static void asm_aref(ASMState *as, IRIns *ir) | 940 | static void asm_aref(ASMState *as, IRIns *ir) |
945 | { | 941 | { |
946 | Reg dest = ra_dest(as, ir, RSET_GPR); | 942 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -951,23 +947,6 @@ static void asm_aref(ASMState *as, IRIns *ir) | |||
951 | emit_rr(as, XO_MOV, dest, as->mrm.base); | 947 | emit_rr(as, XO_MOV, dest, as->mrm.base); |
952 | } | 948 | } |
953 | 949 | ||
954 | /* Merge NE(HREF, niltv) check. */ | ||
955 | static MCode *merge_href_niltv(ASMState *as, IRIns *ir) | ||
956 | { | ||
957 | /* Assumes nothing else generates NE of HREF. */ | ||
958 | if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins && | ||
959 | ra_hasreg(ir->r)) { | ||
960 | MCode *p = as->mcp; | ||
961 | p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6; | ||
962 | /* Ensure no loop branch inversion happened. */ | ||
963 | if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) { | ||
964 | as->mcp = p; /* Kill cmp reg, imm32 + jz exit. */ | ||
965 | return p + *(int32_t *)(p-4); /* Return exit address. */ | ||
966 | } | ||
967 | } | ||
968 | return NULL; | ||
969 | } | ||
970 | |||
971 | /* Inlined hash lookup. Specialized for key type and for const keys. | 950 | /* Inlined hash lookup. Specialized for key type and for const keys. |
972 | ** The equivalent C code is: | 951 | ** The equivalent C code is: |
973 | ** Node *n = hashkey(t, key); | 952 | ** Node *n = hashkey(t, key); |
@@ -976,10 +955,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir) | |||
976 | ** } while ((n = nextnode(n))); | 955 | ** } while ((n = nextnode(n))); |
977 | ** return niltv(L); | 956 | ** return niltv(L); |
978 | */ | 957 | */ |
979 | static void asm_href(ASMState *as, IRIns *ir) | 958 | static void asm_href(ASMState *as, IRIns *ir, IROp merge) |
980 | { | 959 | { |
981 | MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */ | ||
982 | RegSet allow = RSET_GPR; | 960 | RegSet allow = RSET_GPR; |
961 | int destused = ra_used(ir); | ||
983 | Reg dest = ra_dest(as, ir, allow); | 962 | Reg dest = ra_dest(as, ir, allow); |
984 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); | 963 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); |
985 | Reg key = RID_NONE, tmp = RID_NONE; | 964 | Reg key = RID_NONE, tmp = RID_NONE; |
@@ -996,14 +975,12 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
996 | tmp = ra_scratch(as, rset_exclude(allow, key)); | 975 | tmp = ra_scratch(as, rset_exclude(allow, key)); |
997 | } | 976 | } |
998 | 977 | ||
999 | /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */ | 978 | /* Key not found in chain: jump to exit (if merged) or load niltv. */ |
1000 | l_end = emit_label(as); | 979 | l_end = emit_label(as); |
1001 | if (nilexit && ir[1].o == IR_NE) { | 980 | if (merge == IR_NE) |
1002 | emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */ | 981 | asm_guardcc(as, CC_E); /* XI_JMP is not found by lj_asm_patchexit. */ |
1003 | nilexit = NULL; | 982 | else if (destused) |
1004 | } else { | ||
1005 | emit_loada(as, dest, niltvg(J2G(as->J))); | 983 | emit_loada(as, dest, niltvg(J2G(as->J))); |
1006 | } | ||
1007 | 984 | ||
1008 | /* Follow hash chain until the end. */ | 985 | /* Follow hash chain until the end. */ |
1009 | l_loop = emit_sjcc_label(as, CC_NZ); | 986 | l_loop = emit_sjcc_label(as, CC_NZ); |
@@ -1012,8 +989,8 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
1012 | l_next = emit_label(as); | 989 | l_next = emit_label(as); |
1013 | 990 | ||
1014 | /* Type and value comparison. */ | 991 | /* Type and value comparison. */ |
1015 | if (nilexit) | 992 | if (merge == IR_EQ) |
1016 | emit_jcc(as, CC_E, nilexit); | 993 | asm_guardcc(as, CC_E); |
1017 | else | 994 | else |
1018 | emit_sjcc(as, CC_E, l_end); | 995 | emit_sjcc(as, CC_E, l_end); |
1019 | if (irt_isnum(kt)) { | 996 | if (irt_isnum(kt)) { |
@@ -1169,41 +1146,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
1169 | #endif | 1146 | #endif |
1170 | } | 1147 | } |
1171 | 1148 | ||
1172 | static void asm_newref(ASMState *as, IRIns *ir) | ||
1173 | { | ||
1174 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
1175 | IRRef args[3]; | ||
1176 | IRIns *irkey; | ||
1177 | Reg tmp; | ||
1178 | if (ir->r == RID_SINK) | ||
1179 | return; | ||
1180 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1181 | args[1] = ir->op1; /* GCtab *t */ | ||
1182 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
1183 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
1184 | asm_gencall(as, ci, args); | ||
1185 | tmp = ra_releasetmp(as, ASMREF_TMP1); | ||
1186 | irkey = IR(ir->op2); | ||
1187 | if (irt_isnum(irkey->t)) { | ||
1188 | /* For numbers use the constant itself or a spill slot as a TValue. */ | ||
1189 | if (irref_isk(ir->op2)) | ||
1190 | emit_loada(as, tmp, ir_knum(irkey)); | ||
1191 | else | ||
1192 | emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey)); | ||
1193 | } else { | ||
1194 | /* Otherwise use g->tmptv to hold the TValue. */ | ||
1195 | if (!irref_isk(ir->op2)) { | ||
1196 | Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp)); | ||
1197 | emit_movtomro(as, REX_64IR(irkey, src), tmp, 0); | ||
1198 | } else if (!irt_ispri(irkey->t)) { | ||
1199 | emit_movmroi(as, tmp, 0, irkey->i); | ||
1200 | } | ||
1201 | if (!(LJ_64 && irt_islightud(irkey->t))) | ||
1202 | emit_movmroi(as, tmp, 4, irt_toitype(irkey->t)); | ||
1203 | emit_loada(as, tmp, &J2G(as->J)->tmptv); | ||
1204 | } | ||
1205 | } | ||
1206 | |||
1207 | static void asm_uref(ASMState *as, IRIns *ir) | 1149 | static void asm_uref(ASMState *as, IRIns *ir) |
1208 | { | 1150 | { |
1209 | /* NYI: Check that UREFO is still open and not aliasing a slot. */ | 1151 | /* NYI: Check that UREFO is still open and not aliasing a slot. */ |
@@ -1263,7 +1205,7 @@ static void asm_fxload(ASMState *as, IRIns *ir) | |||
1263 | case IRT_U8: xo = XO_MOVZXb; break; | 1205 | case IRT_U8: xo = XO_MOVZXb; break; |
1264 | case IRT_I16: xo = XO_MOVSXw; break; | 1206 | case IRT_I16: xo = XO_MOVSXw; break; |
1265 | case IRT_U16: xo = XO_MOVZXw; break; | 1207 | case IRT_U16: xo = XO_MOVZXw; break; |
1266 | case IRT_NUM: xo = XMM_MOVRM(as); break; | 1208 | case IRT_NUM: xo = XO_MOVSD; break; |
1267 | case IRT_FLOAT: xo = XO_MOVSS; break; | 1209 | case IRT_FLOAT: xo = XO_MOVSS; break; |
1268 | default: | 1210 | default: |
1269 | if (LJ_64 && irt_is64(ir->t)) | 1211 | if (LJ_64 && irt_is64(ir->t)) |
@@ -1276,6 +1218,9 @@ static void asm_fxload(ASMState *as, IRIns *ir) | |||
1276 | emit_mrm(as, xo, dest, RID_MRM); | 1218 | emit_mrm(as, xo, dest, RID_MRM); |
1277 | } | 1219 | } |
1278 | 1220 | ||
1221 | #define asm_fload(as, ir) asm_fxload(as, ir) | ||
1222 | #define asm_xload(as, ir) asm_fxload(as, ir) | ||
1223 | |||
1279 | static void asm_fxstore(ASMState *as, IRIns *ir) | 1224 | static void asm_fxstore(ASMState *as, IRIns *ir) |
1280 | { | 1225 | { |
1281 | RegSet allow = RSET_GPR; | 1226 | RegSet allow = RSET_GPR; |
@@ -1339,6 +1284,9 @@ static void asm_fxstore(ASMState *as, IRIns *ir) | |||
1339 | } | 1284 | } |
1340 | } | 1285 | } |
1341 | 1286 | ||
1287 | #define asm_fstore(as, ir) asm_fxstore(as, ir) | ||
1288 | #define asm_xstore(as, ir) asm_fxstore(as, ir) | ||
1289 | |||
1342 | #if LJ_64 | 1290 | #if LJ_64 |
1343 | static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) | 1291 | static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) |
1344 | { | 1292 | { |
@@ -1377,7 +1325,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
1377 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; | 1325 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; |
1378 | Reg dest = ra_dest(as, ir, allow); | 1326 | Reg dest = ra_dest(as, ir, allow); |
1379 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1327 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
1380 | emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); | 1328 | emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM); |
1381 | } else { | 1329 | } else { |
1382 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1330 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
1383 | } | 1331 | } |
@@ -1443,7 +1391,7 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1443 | Reg left = ra_scratch(as, RSET_FPR); | 1391 | Reg left = ra_scratch(as, RSET_FPR); |
1444 | asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ | 1392 | asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ |
1445 | base = ra_alloc1(as, REF_BASE, RSET_GPR); | 1393 | base = ra_alloc1(as, REF_BASE, RSET_GPR); |
1446 | emit_rmro(as, XMM_MOVRM(as), left, base, ofs); | 1394 | emit_rmro(as, XO_MOVSD, left, base, ofs); |
1447 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | 1395 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ |
1448 | #if LJ_64 | 1396 | #if LJ_64 |
1449 | } else if (irt_islightud(t)) { | 1397 | } else if (irt_islightud(t)) { |
@@ -1461,11 +1409,9 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1461 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 1409 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); |
1462 | if ((ir->op2 & IRSLOAD_CONVERT)) { | 1410 | if ((ir->op2 & IRSLOAD_CONVERT)) { |
1463 | t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ | 1411 | t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ |
1464 | emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); | 1412 | emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs); |
1465 | } else if (irt_isnum(t)) { | ||
1466 | emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); | ||
1467 | } else { | 1413 | } else { |
1468 | emit_rmro(as, XO_MOV, dest, base, ofs); | 1414 | emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs); |
1469 | } | 1415 | } |
1470 | } else { | 1416 | } else { |
1471 | if (!(ir->op2 & IRSLOAD_TYPECHECK)) | 1417 | if (!(ir->op2 & IRSLOAD_TYPECHECK)) |
@@ -1492,15 +1438,13 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1492 | static void asm_cnew(ASMState *as, IRIns *ir) | 1438 | static void asm_cnew(ASMState *as, IRIns *ir) |
1493 | { | 1439 | { |
1494 | CTState *cts = ctype_ctsG(J2G(as->J)); | 1440 | CTState *cts = ctype_ctsG(J2G(as->J)); |
1495 | CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; | 1441 | CTypeID id = (CTypeID)IR(ir->op1)->i; |
1496 | CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? | 1442 | CTSize sz; |
1497 | lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; | 1443 | CTInfo info = lj_ctype_info(cts, id, &sz); |
1498 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | 1444 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; |
1499 | IRRef args[2]; | 1445 | IRRef args[4]; |
1500 | lua_assert(sz != CTSIZE_INVALID); | 1446 | lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); |
1501 | 1447 | ||
1502 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1503 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1504 | as->gcsteps++; | 1448 | as->gcsteps++; |
1505 | asm_setupresult(as, ir, ci); /* GCcdata * */ | 1449 | asm_setupresult(as, ir, ci); /* GCcdata * */ |
1506 | 1450 | ||
@@ -1543,15 +1487,26 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1543 | } while (1); | 1487 | } while (1); |
1544 | #endif | 1488 | #endif |
1545 | lua_assert(sz == 4 || sz == 8); | 1489 | lua_assert(sz == 4 || sz == 8); |
1490 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1491 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1492 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1493 | args[1] = ir->op1; /* CTypeID id */ | ||
1494 | args[2] = ir->op2; /* CTSize sz */ | ||
1495 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1496 | asm_gencall(as, ci, args); | ||
1497 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1498 | return; | ||
1546 | } | 1499 | } |
1547 | 1500 | ||
1548 | /* Combine initialization of marked, gct and ctypeid. */ | 1501 | /* Combine initialization of marked, gct and ctypeid. */ |
1549 | emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); | 1502 | emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); |
1550 | emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, | 1503 | emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, |
1551 | (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16))); | 1504 | (int32_t)((~LJ_TCDATA<<8)+(id<<16))); |
1552 | emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); | 1505 | emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); |
1553 | emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); | 1506 | emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); |
1554 | 1507 | ||
1508 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1509 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1555 | asm_gencall(as, ci, args); | 1510 | asm_gencall(as, ci, args); |
1556 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); | 1511 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); |
1557 | } | 1512 | } |
@@ -1629,31 +1584,21 @@ static void asm_x87load(ASMState *as, IRRef ref) | |||
1629 | } | 1584 | } |
1630 | } | 1585 | } |
1631 | 1586 | ||
1632 | /* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */ | 1587 | static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) |
1633 | static int fpmjoin_pow(ASMState *as, IRIns *ir) | ||
1634 | { | 1588 | { |
1635 | IRIns *irp = IR(ir->op1); | 1589 | /* The modified regs must match with the *.dasc implementation. */ |
1636 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | 1590 | RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); |
1637 | IRIns *irpp = IR(irp->op1); | 1591 | IRIns *irx; |
1638 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | 1592 | if (ra_hasreg(ir->r)) |
1639 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | 1593 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
1640 | /* The modified regs must match with the *.dasc implementation. */ | 1594 | ra_evictset(as, drop); |
1641 | RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); | 1595 | ra_destreg(as, ir, RID_XMM0); |
1642 | IRIns *irx; | 1596 | emit_call(as, lj_vm_pow_sse); |
1643 | if (ra_hasreg(ir->r)) | 1597 | irx = IR(lref); |
1644 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 1598 | if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1) |
1645 | ra_evictset(as, drop); | 1599 | irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */ |
1646 | ra_destreg(as, ir, RID_XMM0); | 1600 | ra_left(as, RID_XMM0, lref); |
1647 | emit_call(as, lj_vm_pow_sse); | 1601 | ra_left(as, RID_XMM1, rref); |
1648 | irx = IR(irpp->op1); | ||
1649 | if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1) | ||
1650 | irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */ | ||
1651 | ra_left(as, RID_XMM0, irpp->op1); | ||
1652 | ra_left(as, RID_XMM1, irp->op2); | ||
1653 | return 1; | ||
1654 | } | ||
1655 | } | ||
1656 | return 0; | ||
1657 | } | 1602 | } |
1658 | 1603 | ||
1659 | static void asm_fpmath(ASMState *as, IRIns *ir) | 1604 | static void asm_fpmath(ASMState *as, IRIns *ir) |
@@ -1689,7 +1634,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir) | |||
1689 | fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); | 1634 | fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); |
1690 | ra_left(as, RID_XMM0, ir->op1); | 1635 | ra_left(as, RID_XMM0, ir->op1); |
1691 | } | 1636 | } |
1692 | } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { | 1637 | } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { |
1693 | /* Rejoined to pow(). */ | 1638 | /* Rejoined to pow(). */ |
1694 | } else { /* Handle x87 ops. */ | 1639 | } else { /* Handle x87 ops. */ |
1695 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | 1640 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ |
@@ -1697,7 +1642,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir) | |||
1697 | if (ra_hasreg(dest)) { | 1642 | if (ra_hasreg(dest)) { |
1698 | ra_free(as, dest); | 1643 | ra_free(as, dest); |
1699 | ra_modified(as, dest); | 1644 | ra_modified(as, dest); |
1700 | emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); | 1645 | emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs); |
1701 | } | 1646 | } |
1702 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); | 1647 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); |
1703 | switch (fpm) { /* st0 = lj_vm_*(st0) */ | 1648 | switch (fpm) { /* st0 = lj_vm_*(st0) */ |
@@ -1736,6 +1681,9 @@ static void asm_fpmath(ASMState *as, IRIns *ir) | |||
1736 | } | 1681 | } |
1737 | } | 1682 | } |
1738 | 1683 | ||
1684 | #define asm_atan2(as, ir) asm_fpmath(as, ir) | ||
1685 | #define asm_ldexp(as, ir) asm_fpmath(as, ir) | ||
1686 | |||
1739 | static void asm_fppowi(ASMState *as, IRIns *ir) | 1687 | static void asm_fppowi(ASMState *as, IRIns *ir) |
1740 | { | 1688 | { |
1741 | /* The modified regs must match with the *.dasc implementation. */ | 1689 | /* The modified regs must match with the *.dasc implementation. */ |
@@ -1749,26 +1697,15 @@ static void asm_fppowi(ASMState *as, IRIns *ir) | |||
1749 | ra_left(as, RID_EAX, ir->op2); | 1697 | ra_left(as, RID_EAX, ir->op2); |
1750 | } | 1698 | } |
1751 | 1699 | ||
1752 | #if LJ_64 && LJ_HASFFI | 1700 | static void asm_pow(ASMState *as, IRIns *ir) |
1753 | static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id) | ||
1754 | { | 1701 | { |
1755 | const CCallInfo *ci = &lj_ir_callinfo[id]; | 1702 | #if LJ_64 && LJ_HASFFI |
1756 | IRRef args[2]; | 1703 | if (!irt_isnum(ir->t)) |
1757 | args[0] = ir->op1; | 1704 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : |
1758 | args[1] = ir->op2; | 1705 | IRCALL_lj_carith_powu64); |
1759 | asm_setupresult(as, ir, ci); | 1706 | else |
1760 | asm_gencall(as, ci, args); | ||
1761 | } | ||
1762 | #endif | 1707 | #endif |
1763 | 1708 | asm_fppowi(as, ir); | |
1764 | static void asm_intmod(ASMState *as, IRIns *ir) | ||
1765 | { | ||
1766 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi]; | ||
1767 | IRRef args[2]; | ||
1768 | args[0] = ir->op1; | ||
1769 | args[1] = ir->op2; | ||
1770 | asm_setupresult(as, ir, ci); | ||
1771 | asm_gencall(as, ci, args); | ||
1772 | } | 1709 | } |
1773 | 1710 | ||
1774 | static int asm_swapops(ASMState *as, IRIns *ir) | 1711 | static int asm_swapops(ASMState *as, IRIns *ir) |
@@ -1947,6 +1884,44 @@ static void asm_add(ASMState *as, IRIns *ir) | |||
1947 | asm_intarith(as, ir, XOg_ADD); | 1884 | asm_intarith(as, ir, XOg_ADD); |
1948 | } | 1885 | } |
1949 | 1886 | ||
1887 | static void asm_sub(ASMState *as, IRIns *ir) | ||
1888 | { | ||
1889 | if (irt_isnum(ir->t)) | ||
1890 | asm_fparith(as, ir, XO_SUBSD); | ||
1891 | else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ | ||
1892 | asm_intarith(as, ir, XOg_SUB); | ||
1893 | } | ||
1894 | |||
1895 | static void asm_mul(ASMState *as, IRIns *ir) | ||
1896 | { | ||
1897 | if (irt_isnum(ir->t)) | ||
1898 | asm_fparith(as, ir, XO_MULSD); | ||
1899 | else | ||
1900 | asm_intarith(as, ir, XOg_X_IMUL); | ||
1901 | } | ||
1902 | |||
1903 | static void asm_div(ASMState *as, IRIns *ir) | ||
1904 | { | ||
1905 | #if LJ_64 && LJ_HASFFI | ||
1906 | if (!irt_isnum(ir->t)) | ||
1907 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : | ||
1908 | IRCALL_lj_carith_divu64); | ||
1909 | else | ||
1910 | #endif | ||
1911 | asm_fparith(as, ir, XO_DIVSD); | ||
1912 | } | ||
1913 | |||
1914 | static void asm_mod(ASMState *as, IRIns *ir) | ||
1915 | { | ||
1916 | #if LJ_64 && LJ_HASFFI | ||
1917 | if (!irt_isint(ir->t)) | ||
1918 | asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : | ||
1919 | IRCALL_lj_carith_modu64); | ||
1920 | else | ||
1921 | #endif | ||
1922 | asm_callid(as, ir, IRCALL_lj_vm_modi); | ||
1923 | } | ||
1924 | |||
1950 | static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) | 1925 | static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) |
1951 | { | 1926 | { |
1952 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1927 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -1954,7 +1929,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) | |||
1954 | ra_left(as, dest, ir->op1); | 1929 | ra_left(as, dest, ir->op1); |
1955 | } | 1930 | } |
1956 | 1931 | ||
1957 | static void asm_min_max(ASMState *as, IRIns *ir, int cc) | 1932 | static void asm_neg(ASMState *as, IRIns *ir) |
1933 | { | ||
1934 | if (irt_isnum(ir->t)) | ||
1935 | asm_fparith(as, ir, XO_XORPS); | ||
1936 | else | ||
1937 | asm_neg_not(as, ir, XOg_NEG); | ||
1938 | } | ||
1939 | |||
1940 | #define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS) | ||
1941 | |||
1942 | static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) | ||
1958 | { | 1943 | { |
1959 | Reg right, dest = ra_dest(as, ir, RSET_GPR); | 1944 | Reg right, dest = ra_dest(as, ir, RSET_GPR); |
1960 | IRRef lref = ir->op1, rref = ir->op2; | 1945 | IRRef lref = ir->op1, rref = ir->op2; |
@@ -1965,7 +1950,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc) | |||
1965 | ra_left(as, dest, lref); | 1950 | ra_left(as, dest, lref); |
1966 | } | 1951 | } |
1967 | 1952 | ||
1968 | static void asm_bitswap(ASMState *as, IRIns *ir) | 1953 | static void asm_min(ASMState *as, IRIns *ir) |
1954 | { | ||
1955 | if (irt_isnum(ir->t)) | ||
1956 | asm_fparith(as, ir, XO_MINSD); | ||
1957 | else | ||
1958 | asm_intmin_max(as, ir, CC_G); | ||
1959 | } | ||
1960 | |||
1961 | static void asm_max(ASMState *as, IRIns *ir) | ||
1962 | { | ||
1963 | if (irt_isnum(ir->t)) | ||
1964 | asm_fparith(as, ir, XO_MAXSD); | ||
1965 | else | ||
1966 | asm_intmin_max(as, ir, CC_L); | ||
1967 | } | ||
1968 | |||
1969 | /* Note: don't use LEA for overflow-checking arithmetic! */ | ||
1970 | #define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD) | ||
1971 | #define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB) | ||
1972 | #define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL) | ||
1973 | |||
1974 | #define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT) | ||
1975 | |||
1976 | static void asm_bswap(ASMState *as, IRIns *ir) | ||
1969 | { | 1977 | { |
1970 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1978 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1971 | as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), | 1979 | as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), |
@@ -1973,6 +1981,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir) | |||
1973 | ra_left(as, dest, ir->op1); | 1981 | ra_left(as, dest, ir->op1); |
1974 | } | 1982 | } |
1975 | 1983 | ||
1984 | #define asm_band(as, ir) asm_intarith(as, ir, XOg_AND) | ||
1985 | #define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR) | ||
1986 | #define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR) | ||
1987 | |||
1976 | static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | 1988 | static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) |
1977 | { | 1989 | { |
1978 | IRRef rref = ir->op2; | 1990 | IRRef rref = ir->op2; |
@@ -2012,6 +2024,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | |||
2012 | */ | 2024 | */ |
2013 | } | 2025 | } |
2014 | 2026 | ||
2027 | #define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL) | ||
2028 | #define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR) | ||
2029 | #define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR) | ||
2030 | #define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL) | ||
2031 | #define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR) | ||
2032 | |||
2015 | /* -- Comparisons --------------------------------------------------------- */ | 2033 | /* -- Comparisons --------------------------------------------------------- */ |
2016 | 2034 | ||
2017 | /* Virtual flags for unordered FP comparisons. */ | 2035 | /* Virtual flags for unordered FP comparisons. */ |
@@ -2038,8 +2056,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = { | |||
2038 | }; | 2056 | }; |
2039 | 2057 | ||
2040 | /* FP and integer comparisons. */ | 2058 | /* FP and integer comparisons. */ |
2041 | static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | 2059 | static void asm_comp(ASMState *as, IRIns *ir) |
2042 | { | 2060 | { |
2061 | uint32_t cc = asm_compmap[ir->o]; | ||
2043 | if (irt_isnum(ir->t)) { | 2062 | if (irt_isnum(ir->t)) { |
2044 | IRRef lref = ir->op1; | 2063 | IRRef lref = ir->op1; |
2045 | IRRef rref = ir->op2; | 2064 | IRRef rref = ir->op2; |
@@ -2194,6 +2213,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | |||
2194 | } | 2213 | } |
2195 | } | 2214 | } |
2196 | 2215 | ||
2216 | #define asm_equal(as, ir) asm_comp(as, ir) | ||
2217 | |||
2197 | #if LJ_32 && LJ_HASFFI | 2218 | #if LJ_32 && LJ_HASFFI |
2198 | /* 64 bit integer comparisons in 32 bit mode. */ | 2219 | /* 64 bit integer comparisons in 32 bit mode. */ |
2199 | static void asm_comp_int64(ASMState *as, IRIns *ir) | 2220 | static void asm_comp_int64(ASMState *as, IRIns *ir) |
@@ -2276,13 +2297,9 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
2276 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | 2297 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ |
2277 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | 2298 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; |
2278 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ | 2299 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ |
2279 | if (usehi || uselo) { | ||
2280 | if (irt_isfp(ir->t)) | ||
2281 | asm_conv_fp_int64(as, ir); | ||
2282 | else | ||
2283 | asm_conv_int64_fp(as, ir); | ||
2284 | } | ||
2285 | as->curins--; /* Always skip the CONV. */ | 2300 | as->curins--; /* Always skip the CONV. */ |
2301 | if (usehi || uselo) | ||
2302 | asm_conv64(as, ir); | ||
2286 | return; | 2303 | return; |
2287 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ | 2304 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ |
2288 | asm_comp_int64(as, ir); | 2305 | asm_comp_int64(as, ir); |
@@ -2580,163 +2597,6 @@ static void asm_tail_prep(ASMState *as) | |||
2580 | } | 2597 | } |
2581 | } | 2598 | } |
2582 | 2599 | ||
2583 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
2584 | |||
2585 | /* Assemble a single instruction. */ | ||
2586 | static void asm_ir(ASMState *as, IRIns *ir) | ||
2587 | { | ||
2588 | switch ((IROp)ir->o) { | ||
2589 | /* Miscellaneous ops. */ | ||
2590 | case IR_LOOP: asm_loop(as); break; | ||
2591 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
2592 | case IR_USE: | ||
2593 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
2594 | case IR_PHI: asm_phi(as, ir); break; | ||
2595 | case IR_HIOP: asm_hiop(as, ir); break; | ||
2596 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
2597 | |||
2598 | /* Guarded assertions. */ | ||
2599 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
2600 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
2601 | case IR_EQ: case IR_NE: case IR_ABC: | ||
2602 | asm_comp(as, ir, asm_compmap[ir->o]); | ||
2603 | break; | ||
2604 | |||
2605 | case IR_RETF: asm_retf(as, ir); break; | ||
2606 | |||
2607 | /* Bit ops. */ | ||
2608 | case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break; | ||
2609 | case IR_BSWAP: asm_bitswap(as, ir); break; | ||
2610 | |||
2611 | case IR_BAND: asm_intarith(as, ir, XOg_AND); break; | ||
2612 | case IR_BOR: asm_intarith(as, ir, XOg_OR); break; | ||
2613 | case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break; | ||
2614 | |||
2615 | case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break; | ||
2616 | case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break; | ||
2617 | case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break; | ||
2618 | case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break; | ||
2619 | case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break; | ||
2620 | |||
2621 | /* Arithmetic ops. */ | ||
2622 | case IR_ADD: asm_add(as, ir); break; | ||
2623 | case IR_SUB: | ||
2624 | if (irt_isnum(ir->t)) | ||
2625 | asm_fparith(as, ir, XO_SUBSD); | ||
2626 | else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ | ||
2627 | asm_intarith(as, ir, XOg_SUB); | ||
2628 | break; | ||
2629 | case IR_MUL: | ||
2630 | if (irt_isnum(ir->t)) | ||
2631 | asm_fparith(as, ir, XO_MULSD); | ||
2632 | else | ||
2633 | asm_intarith(as, ir, XOg_X_IMUL); | ||
2634 | break; | ||
2635 | case IR_DIV: | ||
2636 | #if LJ_64 && LJ_HASFFI | ||
2637 | if (!irt_isnum(ir->t)) | ||
2638 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : | ||
2639 | IRCALL_lj_carith_divu64); | ||
2640 | else | ||
2641 | #endif | ||
2642 | asm_fparith(as, ir, XO_DIVSD); | ||
2643 | break; | ||
2644 | case IR_MOD: | ||
2645 | #if LJ_64 && LJ_HASFFI | ||
2646 | if (!irt_isint(ir->t)) | ||
2647 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : | ||
2648 | IRCALL_lj_carith_modu64); | ||
2649 | else | ||
2650 | #endif | ||
2651 | asm_intmod(as, ir); | ||
2652 | break; | ||
2653 | |||
2654 | case IR_NEG: | ||
2655 | if (irt_isnum(ir->t)) | ||
2656 | asm_fparith(as, ir, XO_XORPS); | ||
2657 | else | ||
2658 | asm_neg_not(as, ir, XOg_NEG); | ||
2659 | break; | ||
2660 | case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break; | ||
2661 | |||
2662 | case IR_MIN: | ||
2663 | if (irt_isnum(ir->t)) | ||
2664 | asm_fparith(as, ir, XO_MINSD); | ||
2665 | else | ||
2666 | asm_min_max(as, ir, CC_G); | ||
2667 | break; | ||
2668 | case IR_MAX: | ||
2669 | if (irt_isnum(ir->t)) | ||
2670 | asm_fparith(as, ir, XO_MAXSD); | ||
2671 | else | ||
2672 | asm_min_max(as, ir, CC_L); | ||
2673 | break; | ||
2674 | |||
2675 | case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: | ||
2676 | asm_fpmath(as, ir); | ||
2677 | break; | ||
2678 | case IR_POW: | ||
2679 | #if LJ_64 && LJ_HASFFI | ||
2680 | if (!irt_isnum(ir->t)) | ||
2681 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
2682 | IRCALL_lj_carith_powu64); | ||
2683 | else | ||
2684 | #endif | ||
2685 | asm_fppowi(as, ir); | ||
2686 | break; | ||
2687 | |||
2688 | /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ | ||
2689 | case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; | ||
2690 | case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break; | ||
2691 | case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break; | ||
2692 | |||
2693 | /* Memory references. */ | ||
2694 | case IR_AREF: asm_aref(as, ir); break; | ||
2695 | case IR_HREF: asm_href(as, ir); break; | ||
2696 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
2697 | case IR_NEWREF: asm_newref(as, ir); break; | ||
2698 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
2699 | case IR_FREF: asm_fref(as, ir); break; | ||
2700 | case IR_STRREF: asm_strref(as, ir); break; | ||
2701 | |||
2702 | /* Loads and stores. */ | ||
2703 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
2704 | asm_ahuvload(as, ir); | ||
2705 | break; | ||
2706 | case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break; | ||
2707 | case IR_SLOAD: asm_sload(as, ir); break; | ||
2708 | |||
2709 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
2710 | case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break; | ||
2711 | |||
2712 | /* Allocations. */ | ||
2713 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
2714 | case IR_TNEW: asm_tnew(as, ir); break; | ||
2715 | case IR_TDUP: asm_tdup(as, ir); break; | ||
2716 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
2717 | |||
2718 | /* Write barriers. */ | ||
2719 | case IR_TBAR: asm_tbar(as, ir); break; | ||
2720 | case IR_OBAR: asm_obar(as, ir); break; | ||
2721 | |||
2722 | /* Type conversions. */ | ||
2723 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
2724 | case IR_CONV: asm_conv(as, ir); break; | ||
2725 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
2726 | case IR_STRTO: asm_strto(as, ir); break; | ||
2727 | |||
2728 | /* Calls. */ | ||
2729 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
2730 | case IR_CALLXS: asm_callx(as, ir); break; | ||
2731 | case IR_CARG: break; | ||
2732 | |||
2733 | default: | ||
2734 | setintV(&as->J->errinfo, ir->o); | ||
2735 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
2736 | break; | ||
2737 | } | ||
2738 | } | ||
2739 | |||
2740 | /* -- Trace setup --------------------------------------------------------- */ | 2600 | /* -- Trace setup --------------------------------------------------------- */ |
2741 | 2601 | ||
2742 | /* Ensure there are enough stack slots for call arguments. */ | 2602 | /* Ensure there are enough stack slots for call arguments. */ |