aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm_x86.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lj_asm_x86.h594
1 files changed, 219 insertions, 375 deletions
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index ffd59d33..718cb12e 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -392,7 +392,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64)
392/* Count the required number of stack slots for a call. */ 392/* Count the required number of stack slots for a call. */
393static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) 393static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
394{ 394{
395 uint32_t i, nargs = CCI_NARGS(ci); 395 uint32_t i, nargs = CCI_XNARGS(ci);
396 int nslots = 0; 396 int nslots = 0;
397#if LJ_64 397#if LJ_64
398 if (LJ_ABI_WIN) { 398 if (LJ_ABI_WIN) {
@@ -425,7 +425,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
425/* Generate a call to a C function. */ 425/* Generate a call to a C function. */
426static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 426static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
427{ 427{
428 uint32_t n, nargs = CCI_NARGS(ci); 428 uint32_t n, nargs = CCI_XNARGS(ci);
429 int32_t ofs = STACKARG_OFS; 429 int32_t ofs = STACKARG_OFS;
430#if LJ_64 430#if LJ_64
431 uint32_t gprs = REGARG_GPRS; 431 uint32_t gprs = REGARG_GPRS;
@@ -560,7 +560,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
560 if (ra_hasreg(dest)) { 560 if (ra_hasreg(dest)) {
561 ra_free(as, dest); 561 ra_free(as, dest);
562 ra_modified(as, dest); 562 ra_modified(as, dest);
563 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 563 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS,
564 dest, RID_ESP, ofs); 564 dest, RID_ESP, ofs);
565 } 565 }
566 if ((ci->flags & CCI_CASTU64)) { 566 if ((ci->flags & CCI_CASTU64)) {
@@ -584,15 +584,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
584 } 584 }
585} 585}
586 586
587static void asm_call(ASMState *as, IRIns *ir)
588{
589 IRRef args[CCI_NARGS_MAX];
590 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
591 asm_collectargs(as, ir, ci, args);
592 asm_setupresult(as, ir, ci);
593 asm_gencall(as, ci, args);
594}
595
596/* Return a constant function pointer or NULL for indirect calls. */ 587/* Return a constant function pointer or NULL for indirect calls. */
597static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) 588static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
598{ 589{
@@ -652,7 +643,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
652{ 643{
653 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 644 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
654 void *pc = ir_kptr(IR(ir->op2)); 645 void *pc = ir_kptr(IR(ir->op2));
655 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 646 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
656 as->topslot -= (BCReg)delta; 647 as->topslot -= (BCReg)delta;
657 if ((int32_t)as->topslot < 0) as->topslot = 0; 648 if ((int32_t)as->topslot < 0) as->topslot = 0;
658 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 649 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -672,8 +663,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
672 asm_guardcc(as, CC_NE); 663 asm_guardcc(as, CC_NE);
673 emit_rr(as, XO_UCOMISD, left, tmp); 664 emit_rr(as, XO_UCOMISD, left, tmp);
674 emit_rr(as, XO_CVTSI2SD, tmp, dest); 665 emit_rr(as, XO_CVTSI2SD, tmp, dest);
675 if (!(as->flags & JIT_F_SPLIT_XMM)) 666 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
676 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
677 emit_rr(as, XO_CVTTSD2SI, dest, left); 667 emit_rr(as, XO_CVTTSD2SI, dest, left);
678 /* Can't fuse since left is needed twice. */ 668 /* Can't fuse since left is needed twice. */
679} 669}
@@ -729,8 +719,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
729 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, 719 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
730 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); 720 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
731 } 721 }
732 if (!(as->flags & JIT_F_SPLIT_XMM)) 722 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
733 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
734 } else if (stfp) { /* FP to integer conversion. */ 723 } else if (stfp) { /* FP to integer conversion. */
735 if (irt_isguard(ir->t)) { 724 if (irt_isguard(ir->t)) {
736 /* Checked conversions are only supported from number to int. */ 725 /* Checked conversions are only supported from number to int. */
@@ -738,9 +727,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
738 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 727 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
739 } else { 728 } else {
740 Reg dest = ra_dest(as, ir, RSET_GPR); 729 Reg dest = ra_dest(as, ir, RSET_GPR);
741 x86Op op = st == IRT_NUM ? 730 x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
742 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) :
743 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI);
744 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { 731 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
745 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ 732 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
746 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ 733 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
@@ -834,8 +821,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
834 if (ra_hasreg(dest)) { 821 if (ra_hasreg(dest)) {
835 ra_free(as, dest); 822 ra_free(as, dest);
836 ra_modified(as, dest); 823 ra_modified(as, dest);
837 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 824 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs);
838 dest, RID_ESP, ofs);
839 } 825 }
840 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, 826 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
841 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); 827 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
@@ -863,7 +849,6 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
863 Reg lo, hi; 849 Reg lo, hi;
864 lua_assert(st == IRT_NUM || st == IRT_FLOAT); 850 lua_assert(st == IRT_NUM || st == IRT_FLOAT);
865 lua_assert(dt == IRT_I64 || dt == IRT_U64); 851 lua_assert(dt == IRT_I64 || dt == IRT_U64);
866 lua_assert(((ir-1)->op2 & IRCONV_TRUNC));
867 hi = ra_dest(as, ir, RSET_GPR); 852 hi = ra_dest(as, ir, RSET_GPR);
868 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); 853 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
869 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); 854 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
@@ -906,6 +891,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
906 st == IRT_NUM ? XOg_FLDq: XOg_FLDd, 891 st == IRT_NUM ? XOg_FLDq: XOg_FLDd,
907 asm_fuseload(as, ir->op1, RSET_EMPTY)); 892 asm_fuseload(as, ir->op1, RSET_EMPTY));
908} 893}
894
895static void asm_conv64(ASMState *as, IRIns *ir)
896{
897 if (irt_isfp(ir->t))
898 asm_conv_fp_int64(as, ir);
899 else
900 asm_conv_int64_fp(as, ir);
901}
909#endif 902#endif
910 903
911static void asm_strto(ASMState *as, IRIns *ir) 904static void asm_strto(ASMState *as, IRIns *ir)
@@ -927,29 +920,32 @@ static void asm_strto(ASMState *as, IRIns *ir)
927 RID_ESP, sps_scale(ir->s)); 920 RID_ESP, sps_scale(ir->s));
928} 921}
929 922
930static void asm_tostr(ASMState *as, IRIns *ir) 923/* -- Memory references --------------------------------------------------- */
924
925/* Get pointer to TValue. */
926static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
931{ 927{
932 IRIns *irl = IR(ir->op1); 928 IRIns *ir = IR(ref);
933 IRRef args[2]; 929 if (irt_isnum(ir->t)) {
934 args[0] = ASMREF_L; 930 /* For numbers use the constant itself or a spill slot as a TValue. */
935 as->gcsteps++; 931 if (irref_isk(ref))
936 if (irt_isnum(irl->t)) { 932 emit_loada(as, dest, ir_knum(ir));
937 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; 933 else
938 args[1] = ASMREF_TMP1; /* const lua_Number * */ 934 emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
939 asm_setupresult(as, ir, ci); /* GCstr * */
940 asm_gencall(as, ci, args);
941 emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
942 RID_ESP, ra_spill(as, irl));
943 } else { 935 } else {
944 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; 936 /* Otherwise use g->tmptv to hold the TValue. */
945 args[1] = ir->op1; /* int32_t k */ 937 if (!irref_isk(ref)) {
946 asm_setupresult(as, ir, ci); /* GCstr * */ 938 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
947 asm_gencall(as, ci, args); 939 emit_movtomro(as, REX_64IR(ir, src), dest, 0);
940 } else if (!irt_ispri(ir->t)) {
941 emit_movmroi(as, dest, 0, ir->i);
942 }
943 if (!(LJ_64 && irt_islightud(ir->t)))
944 emit_movmroi(as, dest, 4, irt_toitype(ir->t));
945 emit_loada(as, dest, &J2G(as->J)->tmptv);
948 } 946 }
949} 947}
950 948
951/* -- Memory references --------------------------------------------------- */
952
953static void asm_aref(ASMState *as, IRIns *ir) 949static void asm_aref(ASMState *as, IRIns *ir)
954{ 950{
955 Reg dest = ra_dest(as, ir, RSET_GPR); 951 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -960,23 +956,6 @@ static void asm_aref(ASMState *as, IRIns *ir)
960 emit_rr(as, XO_MOV, dest, as->mrm.base); 956 emit_rr(as, XO_MOV, dest, as->mrm.base);
961} 957}
962 958
963/* Merge NE(HREF, niltv) check. */
964static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
965{
966 /* Assumes nothing else generates NE of HREF. */
967 if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins &&
968 ra_hasreg(ir->r)) {
969 MCode *p = as->mcp;
970 p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6;
971 /* Ensure no loop branch inversion happened. */
972 if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) {
973 as->mcp = p; /* Kill cmp reg, imm32 + jz exit. */
974 return p + *(int32_t *)(p-4); /* Return exit address. */
975 }
976 }
977 return NULL;
978}
979
980/* Inlined hash lookup. Specialized for key type and for const keys. 959/* Inlined hash lookup. Specialized for key type and for const keys.
981** The equivalent C code is: 960** The equivalent C code is:
982** Node *n = hashkey(t, key); 961** Node *n = hashkey(t, key);
@@ -985,10 +964,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
985** } while ((n = nextnode(n))); 964** } while ((n = nextnode(n)));
986** return niltv(L); 965** return niltv(L);
987*/ 966*/
988static void asm_href(ASMState *as, IRIns *ir) 967static void asm_href(ASMState *as, IRIns *ir, IROp merge)
989{ 968{
990 MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */
991 RegSet allow = RSET_GPR; 969 RegSet allow = RSET_GPR;
970 int destused = ra_used(ir);
992 Reg dest = ra_dest(as, ir, allow); 971 Reg dest = ra_dest(as, ir, allow);
993 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); 972 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
994 Reg key = RID_NONE, tmp = RID_NONE; 973 Reg key = RID_NONE, tmp = RID_NONE;
@@ -1005,14 +984,12 @@ static void asm_href(ASMState *as, IRIns *ir)
1005 tmp = ra_scratch(as, rset_exclude(allow, key)); 984 tmp = ra_scratch(as, rset_exclude(allow, key));
1006 } 985 }
1007 986
1008 /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */ 987 /* Key not found in chain: jump to exit (if merged) or load niltv. */
1009 l_end = emit_label(as); 988 l_end = emit_label(as);
1010 if (nilexit && ir[1].o == IR_NE) { 989 if (merge == IR_NE)
1011 emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */ 990 asm_guardcc(as, CC_E); /* XI_JMP is not found by lj_asm_patchexit. */
1012 nilexit = NULL; 991 else if (destused)
1013 } else {
1014 emit_loada(as, dest, niltvg(J2G(as->J))); 992 emit_loada(as, dest, niltvg(J2G(as->J)));
1015 }
1016 993
1017 /* Follow hash chain until the end. */ 994 /* Follow hash chain until the end. */
1018 l_loop = emit_sjcc_label(as, CC_NZ); 995 l_loop = emit_sjcc_label(as, CC_NZ);
@@ -1021,8 +998,8 @@ static void asm_href(ASMState *as, IRIns *ir)
1021 l_next = emit_label(as); 998 l_next = emit_label(as);
1022 999
1023 /* Type and value comparison. */ 1000 /* Type and value comparison. */
1024 if (nilexit) 1001 if (merge == IR_EQ)
1025 emit_jcc(as, CC_E, nilexit); 1002 asm_guardcc(as, CC_E);
1026 else 1003 else
1027 emit_sjcc(as, CC_E, l_end); 1004 emit_sjcc(as, CC_E, l_end);
1028 if (irt_isnum(kt)) { 1005 if (irt_isnum(kt)) {
@@ -1178,41 +1155,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1178#endif 1155#endif
1179} 1156}
1180 1157
1181static void asm_newref(ASMState *as, IRIns *ir)
1182{
1183 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1184 IRRef args[3];
1185 IRIns *irkey;
1186 Reg tmp;
1187 if (ir->r == RID_SINK)
1188 return;
1189 args[0] = ASMREF_L; /* lua_State *L */
1190 args[1] = ir->op1; /* GCtab *t */
1191 args[2] = ASMREF_TMP1; /* cTValue *key */
1192 asm_setupresult(as, ir, ci); /* TValue * */
1193 asm_gencall(as, ci, args);
1194 tmp = ra_releasetmp(as, ASMREF_TMP1);
1195 irkey = IR(ir->op2);
1196 if (irt_isnum(irkey->t)) {
1197 /* For numbers use the constant itself or a spill slot as a TValue. */
1198 if (irref_isk(ir->op2))
1199 emit_loada(as, tmp, ir_knum(irkey));
1200 else
1201 emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey));
1202 } else {
1203 /* Otherwise use g->tmptv to hold the TValue. */
1204 if (!irref_isk(ir->op2)) {
1205 Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
1206 emit_movtomro(as, REX_64IR(irkey, src), tmp, 0);
1207 } else if (!irt_ispri(irkey->t)) {
1208 emit_movmroi(as, tmp, 0, irkey->i);
1209 }
1210 if (!(LJ_64 && irt_islightud(irkey->t)))
1211 emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
1212 emit_loada(as, tmp, &J2G(as->J)->tmptv);
1213 }
1214}
1215
1216static void asm_uref(ASMState *as, IRIns *ir) 1158static void asm_uref(ASMState *as, IRIns *ir)
1217{ 1159{
1218 /* NYI: Check that UREFO is still open and not aliasing a slot. */ 1160 /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -1272,7 +1214,7 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1272 case IRT_U8: xo = XO_MOVZXb; break; 1214 case IRT_U8: xo = XO_MOVZXb; break;
1273 case IRT_I16: xo = XO_MOVSXw; break; 1215 case IRT_I16: xo = XO_MOVSXw; break;
1274 case IRT_U16: xo = XO_MOVZXw; break; 1216 case IRT_U16: xo = XO_MOVZXw; break;
1275 case IRT_NUM: xo = XMM_MOVRM(as); break; 1217 case IRT_NUM: xo = XO_MOVSD; break;
1276 case IRT_FLOAT: xo = XO_MOVSS; break; 1218 case IRT_FLOAT: xo = XO_MOVSS; break;
1277 default: 1219 default:
1278 if (LJ_64 && irt_is64(ir->t)) 1220 if (LJ_64 && irt_is64(ir->t))
@@ -1285,6 +1227,9 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1285 emit_mrm(as, xo, dest, RID_MRM); 1227 emit_mrm(as, xo, dest, RID_MRM);
1286} 1228}
1287 1229
1230#define asm_fload(as, ir) asm_fxload(as, ir)
1231#define asm_xload(as, ir) asm_fxload(as, ir)
1232
1288static void asm_fxstore(ASMState *as, IRIns *ir) 1233static void asm_fxstore(ASMState *as, IRIns *ir)
1289{ 1234{
1290 RegSet allow = RSET_GPR; 1235 RegSet allow = RSET_GPR;
@@ -1348,6 +1293,9 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1348 } 1293 }
1349} 1294}
1350 1295
1296#define asm_fstore(as, ir) asm_fxstore(as, ir)
1297#define asm_xstore(as, ir) asm_fxstore(as, ir)
1298
1351#if LJ_64 1299#if LJ_64
1352static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) 1300static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1353{ 1301{
@@ -1386,7 +1334,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1386 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; 1334 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1387 Reg dest = ra_dest(as, ir, allow); 1335 Reg dest = ra_dest(as, ir, allow);
1388 asm_fuseahuref(as, ir->op1, RSET_GPR); 1336 asm_fuseahuref(as, ir->op1, RSET_GPR);
1389 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); 1337 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
1390 } else { 1338 } else {
1391 asm_fuseahuref(as, ir->op1, RSET_GPR); 1339 asm_fuseahuref(as, ir->op1, RSET_GPR);
1392 } 1340 }
@@ -1452,7 +1400,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1452 Reg left = ra_scratch(as, RSET_FPR); 1400 Reg left = ra_scratch(as, RSET_FPR);
1453 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ 1401 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
1454 base = ra_alloc1(as, REF_BASE, RSET_GPR); 1402 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1455 emit_rmro(as, XMM_MOVRM(as), left, base, ofs); 1403 emit_rmro(as, XO_MOVSD, left, base, ofs);
1456 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1404 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1457#if LJ_64 1405#if LJ_64
1458 } else if (irt_islightud(t)) { 1406 } else if (irt_islightud(t)) {
@@ -1470,11 +1418,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
1470 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1418 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
1471 if ((ir->op2 & IRSLOAD_CONVERT)) { 1419 if ((ir->op2 & IRSLOAD_CONVERT)) {
1472 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ 1420 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
1473 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); 1421 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
1474 } else if (irt_isnum(t)) {
1475 emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
1476 } else { 1422 } else {
1477 emit_rmro(as, XO_MOV, dest, base, ofs); 1423 emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
1478 } 1424 }
1479 } else { 1425 } else {
1480 if (!(ir->op2 & IRSLOAD_TYPECHECK)) 1426 if (!(ir->op2 & IRSLOAD_TYPECHECK))
@@ -1501,15 +1447,13 @@ static void asm_sload(ASMState *as, IRIns *ir)
1501static void asm_cnew(ASMState *as, IRIns *ir) 1447static void asm_cnew(ASMState *as, IRIns *ir)
1502{ 1448{
1503 CTState *cts = ctype_ctsG(J2G(as->J)); 1449 CTState *cts = ctype_ctsG(J2G(as->J));
1504 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1450 CTypeID id = (CTypeID)IR(ir->op1)->i;
1505 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1451 CTSize sz;
1506 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1452 CTInfo info = lj_ctype_info(cts, id, &sz);
1507 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1453 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1508 IRRef args[2]; 1454 IRRef args[4];
1509 lua_assert(sz != CTSIZE_INVALID); 1455 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1510 1456
1511 args[0] = ASMREF_L; /* lua_State *L */
1512 args[1] = ASMREF_TMP1; /* MSize size */
1513 as->gcsteps++; 1457 as->gcsteps++;
1514 asm_setupresult(as, ir, ci); /* GCcdata * */ 1458 asm_setupresult(as, ir, ci); /* GCcdata * */
1515 1459
@@ -1552,15 +1496,26 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1552 } while (1); 1496 } while (1);
1553#endif 1497#endif
1554 lua_assert(sz == 4 || sz == 8); 1498 lua_assert(sz == 4 || sz == 8);
1499 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1500 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1501 args[0] = ASMREF_L; /* lua_State *L */
1502 args[1] = ir->op1; /* CTypeID id */
1503 args[2] = ir->op2; /* CTSize sz */
1504 args[3] = ASMREF_TMP1; /* CTSize align */
1505 asm_gencall(as, ci, args);
1506 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1507 return;
1555 } 1508 }
1556 1509
1557 /* Combine initialization of marked, gct and ctypeid. */ 1510 /* Combine initialization of marked, gct and ctypeid. */
1558 emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); 1511 emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
1559 emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, 1512 emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
1560 (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16))); 1513 (int32_t)((~LJ_TCDATA<<8)+(id<<16)));
1561 emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); 1514 emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES);
1562 emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); 1515 emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite);
1563 1516
1517 args[0] = ASMREF_L; /* lua_State *L */
1518 args[1] = ASMREF_TMP1; /* MSize size */
1564 asm_gencall(as, ci, args); 1519 asm_gencall(as, ci, args);
1565 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); 1520 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
1566} 1521}
@@ -1638,36 +1593,9 @@ static void asm_x87load(ASMState *as, IRRef ref)
1638 } 1593 }
1639} 1594}
1640 1595
1641/* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */
1642static int fpmjoin_pow(ASMState *as, IRIns *ir)
1643{
1644 IRIns *irp = IR(ir->op1);
1645 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1646 IRIns *irpp = IR(irp->op1);
1647 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1648 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1649 /* The modified regs must match with the *.dasc implementation. */
1650 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1651 IRIns *irx;
1652 if (ra_hasreg(ir->r))
1653 rset_clear(drop, ir->r); /* Dest reg handled below. */
1654 ra_evictset(as, drop);
1655 ra_destreg(as, ir, RID_XMM0);
1656 emit_call(as, lj_vm_pow_sse);
1657 irx = IR(irpp->op1);
1658 if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
1659 irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */
1660 ra_left(as, RID_XMM0, irpp->op1);
1661 ra_left(as, RID_XMM1, irp->op2);
1662 return 1;
1663 }
1664 }
1665 return 0;
1666}
1667
1668static void asm_fpmath(ASMState *as, IRIns *ir) 1596static void asm_fpmath(ASMState *as, IRIns *ir)
1669{ 1597{
1670 IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER; 1598 IRFPMathOp fpm = (IRFPMathOp)ir->op2;
1671 if (fpm == IRFPM_SQRT) { 1599 if (fpm == IRFPM_SQRT) {
1672 Reg dest = ra_dest(as, ir, RSET_FPR); 1600 Reg dest = ra_dest(as, ir, RSET_FPR);
1673 Reg left = asm_fuseload(as, ir->op1, RSET_FPR); 1601 Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
@@ -1698,53 +1626,31 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1698 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); 1626 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
1699 ra_left(as, RID_XMM0, ir->op1); 1627 ra_left(as, RID_XMM0, ir->op1);
1700 } 1628 }
1701 } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { 1629 } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
1702 /* Rejoined to pow(). */ 1630 /* Rejoined to pow(). */
1703 } else { /* Handle x87 ops. */ 1631 } else {
1704 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ 1632 asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
1705 Reg dest = ir->r;
1706 if (ra_hasreg(dest)) {
1707 ra_free(as, dest);
1708 ra_modified(as, dest);
1709 emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs);
1710 }
1711 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
1712 switch (fpm) { /* st0 = lj_vm_*(st0) */
1713 case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break;
1714 case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break;
1715 case IRFPM_SIN: emit_x87op(as, XI_FSIN); break;
1716 case IRFPM_COS: emit_x87op(as, XI_FCOS); break;
1717 case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break;
1718 case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10:
1719 /* Note: the use of fyl2xp1 would be pointless here. When computing
1720 ** log(1.0+eps) the precision is already lost after 1.0 is added.
1721 ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense.
1722 */
1723 emit_x87op(as, XI_FYL2X); break;
1724 case IRFPM_OTHER:
1725 switch (ir->o) {
1726 case IR_ATAN2:
1727 emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break;
1728 case IR_LDEXP:
1729 emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break;
1730 default: lua_assert(0); break;
1731 }
1732 break;
1733 default: lua_assert(0); break;
1734 }
1735 asm_x87load(as, ir->op1);
1736 switch (fpm) {
1737 case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break;
1738 case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break;
1739 case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break;
1740 case IRFPM_OTHER:
1741 if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2);
1742 break;
1743 default: break;
1744 }
1745 } 1633 }
1746} 1634}
1747 1635
1636#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1637
1638static void asm_ldexp(ASMState *as, IRIns *ir)
1639{
1640 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
1641 Reg dest = ir->r;
1642 if (ra_hasreg(dest)) {
1643 ra_free(as, dest);
1644 ra_modified(as, dest);
1645 emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
1646 }
1647 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
1648 emit_x87op(as, XI_FPOP1);
1649 emit_x87op(as, XI_FSCALE);
1650 asm_x87load(as, ir->op1);
1651 asm_x87load(as, ir->op2);
1652}
1653
1748static void asm_fppowi(ASMState *as, IRIns *ir) 1654static void asm_fppowi(ASMState *as, IRIns *ir)
1749{ 1655{
1750 /* The modified regs must match with the *.dasc implementation. */ 1656 /* The modified regs must match with the *.dasc implementation. */
@@ -1758,26 +1664,15 @@ static void asm_fppowi(ASMState *as, IRIns *ir)
1758 ra_left(as, RID_EAX, ir->op2); 1664 ra_left(as, RID_EAX, ir->op2);
1759} 1665}
1760 1666
1761#if LJ_64 && LJ_HASFFI 1667static void asm_pow(ASMState *as, IRIns *ir)
1762static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id)
1763{ 1668{
1764 const CCallInfo *ci = &lj_ir_callinfo[id]; 1669#if LJ_64 && LJ_HASFFI
1765 IRRef args[2]; 1670 if (!irt_isnum(ir->t))
1766 args[0] = ir->op1; 1671 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1767 args[1] = ir->op2; 1672 IRCALL_lj_carith_powu64);
1768 asm_setupresult(as, ir, ci); 1673 else
1769 asm_gencall(as, ci, args);
1770}
1771#endif 1674#endif
1772 1675 asm_fppowi(as, ir);
1773static void asm_intmod(ASMState *as, IRIns *ir)
1774{
1775 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi];
1776 IRRef args[2];
1777 args[0] = ir->op1;
1778 args[1] = ir->op2;
1779 asm_setupresult(as, ir, ci);
1780 asm_gencall(as, ci, args);
1781} 1676}
1782 1677
1783static int asm_swapops(ASMState *as, IRIns *ir) 1678static int asm_swapops(ASMState *as, IRIns *ir)
@@ -1960,6 +1855,44 @@ static void asm_add(ASMState *as, IRIns *ir)
1960 asm_intarith(as, ir, XOg_ADD); 1855 asm_intarith(as, ir, XOg_ADD);
1961} 1856}
1962 1857
1858static void asm_sub(ASMState *as, IRIns *ir)
1859{
1860 if (irt_isnum(ir->t))
1861 asm_fparith(as, ir, XO_SUBSD);
1862 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
1863 asm_intarith(as, ir, XOg_SUB);
1864}
1865
1866static void asm_mul(ASMState *as, IRIns *ir)
1867{
1868 if (irt_isnum(ir->t))
1869 asm_fparith(as, ir, XO_MULSD);
1870 else
1871 asm_intarith(as, ir, XOg_X_IMUL);
1872}
1873
1874static void asm_div(ASMState *as, IRIns *ir)
1875{
1876#if LJ_64 && LJ_HASFFI
1877 if (!irt_isnum(ir->t))
1878 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1879 IRCALL_lj_carith_divu64);
1880 else
1881#endif
1882 asm_fparith(as, ir, XO_DIVSD);
1883}
1884
1885static void asm_mod(ASMState *as, IRIns *ir)
1886{
1887#if LJ_64 && LJ_HASFFI
1888 if (!irt_isint(ir->t))
1889 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1890 IRCALL_lj_carith_modu64);
1891 else
1892#endif
1893 asm_callid(as, ir, IRCALL_lj_vm_modi);
1894}
1895
1963static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) 1896static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1964{ 1897{
1965 Reg dest = ra_dest(as, ir, RSET_GPR); 1898 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1967,7 +1900,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1967 ra_left(as, dest, ir->op1); 1900 ra_left(as, dest, ir->op1);
1968} 1901}
1969 1902
1970static void asm_min_max(ASMState *as, IRIns *ir, int cc) 1903static void asm_neg(ASMState *as, IRIns *ir)
1904{
1905 if (irt_isnum(ir->t))
1906 asm_fparith(as, ir, XO_XORPS);
1907 else
1908 asm_neg_not(as, ir, XOg_NEG);
1909}
1910
1911#define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS)
1912
1913static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1971{ 1914{
1972 Reg right, dest = ra_dest(as, ir, RSET_GPR); 1915 Reg right, dest = ra_dest(as, ir, RSET_GPR);
1973 IRRef lref = ir->op1, rref = ir->op2; 1916 IRRef lref = ir->op1, rref = ir->op2;
@@ -1978,7 +1921,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc)
1978 ra_left(as, dest, lref); 1921 ra_left(as, dest, lref);
1979} 1922}
1980 1923
1981static void asm_bitswap(ASMState *as, IRIns *ir) 1924static void asm_min(ASMState *as, IRIns *ir)
1925{
1926 if (irt_isnum(ir->t))
1927 asm_fparith(as, ir, XO_MINSD);
1928 else
1929 asm_intmin_max(as, ir, CC_G);
1930}
1931
1932static void asm_max(ASMState *as, IRIns *ir)
1933{
1934 if (irt_isnum(ir->t))
1935 asm_fparith(as, ir, XO_MAXSD);
1936 else
1937 asm_intmin_max(as, ir, CC_L);
1938}
1939
1940/* Note: don't use LEA for overflow-checking arithmetic! */
1941#define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD)
1942#define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB)
1943#define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL)
1944
1945#define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT)
1946
1947static void asm_bswap(ASMState *as, IRIns *ir)
1982{ 1948{
1983 Reg dest = ra_dest(as, ir, RSET_GPR); 1949 Reg dest = ra_dest(as, ir, RSET_GPR);
1984 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), 1950 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
@@ -1986,7 +1952,11 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1986 ra_left(as, dest, ir->op1); 1952 ra_left(as, dest, ir->op1);
1987} 1953}
1988 1954
1989static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) 1955#define asm_band(as, ir) asm_intarith(as, ir, XOg_AND)
1956#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
1957#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
1958
1959static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv)
1990{ 1960{
1991 IRRef rref = ir->op2; 1961 IRRef rref = ir->op2;
1992 IRIns *irr = IR(rref); 1962 IRIns *irr = IR(rref);
@@ -1995,11 +1965,27 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
1995 int shift; 1965 int shift;
1996 dest = ra_dest(as, ir, RSET_GPR); 1966 dest = ra_dest(as, ir, RSET_GPR);
1997 shift = irr->i & (irt_is64(ir->t) ? 63 : 31); 1967 shift = irr->i & (irt_is64(ir->t) ? 63 : 31);
1968 if (!xv && shift && (as->flags & JIT_F_BMI2)) {
1969 Reg left = asm_fuseloadm(as, ir->op1, RSET_GPR, irt_is64(ir->t));
1970 if (left != dest) { /* BMI2 rotate right by constant. */
1971 emit_i8(as, xs == XOg_ROL ? -shift : shift);
1972 emit_mrm(as, VEX_64IR(ir, XV_RORX), dest, left);
1973 return;
1974 }
1975 }
1998 switch (shift) { 1976 switch (shift) {
1999 case 0: break; 1977 case 0: break;
2000 case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break; 1978 case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break;
2001 default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break; 1979 default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break;
2002 } 1980 }
1981 } else if ((as->flags & JIT_F_BMI2) && xv) { /* BMI2 variable shifts. */
1982 Reg left, right;
1983 dest = ra_dest(as, ir, RSET_GPR);
1984 right = ra_alloc1(as, rref, RSET_GPR);
1985 left = asm_fuseloadm(as, ir->op1, rset_exclude(RSET_GPR, right),
1986 irt_is64(ir->t));
1987 emit_mrm(as, VEX_64IR(ir, xv) ^ (right << 19), dest, left);
1988 return;
2003 } else { /* Variable shifts implicitly use register cl (i.e. ecx). */ 1989 } else { /* Variable shifts implicitly use register cl (i.e. ecx). */
2004 Reg right; 1990 Reg right;
2005 dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX)); 1991 dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX));
@@ -2025,6 +2011,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
2025 */ 2011 */
2026} 2012}
2027 2013
2014#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL, XV_SHLX)
2015#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR, XV_SHRX)
2016#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR, XV_SARX)
2017#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL, 0)
2018#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR, 0)
2019
2028/* -- Comparisons --------------------------------------------------------- */ 2020/* -- Comparisons --------------------------------------------------------- */
2029 2021
2030/* Virtual flags for unordered FP comparisons. */ 2022/* Virtual flags for unordered FP comparisons. */
@@ -2051,8 +2043,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = {
2051}; 2043};
2052 2044
2053/* FP and integer comparisons. */ 2045/* FP and integer comparisons. */
2054static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) 2046static void asm_comp(ASMState *as, IRIns *ir)
2055{ 2047{
2048 uint32_t cc = asm_compmap[ir->o];
2056 if (irt_isnum(ir->t)) { 2049 if (irt_isnum(ir->t)) {
2057 IRRef lref = ir->op1; 2050 IRRef lref = ir->op1;
2058 IRRef rref = ir->op2; 2051 IRRef rref = ir->op2;
@@ -2207,6 +2200,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
2207 } 2200 }
2208} 2201}
2209 2202
2203#define asm_equal(as, ir) asm_comp(as, ir)
2204
2210#if LJ_32 && LJ_HASFFI 2205#if LJ_32 && LJ_HASFFI
2211/* 64 bit integer comparisons in 32 bit mode. */ 2206/* 64 bit integer comparisons in 32 bit mode. */
2212static void asm_comp_int64(ASMState *as, IRIns *ir) 2207static void asm_comp_int64(ASMState *as, IRIns *ir)
@@ -2289,13 +2284,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
2289 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 2284 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
2290 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 2285 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
2291 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ 2286 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
2292 if (usehi || uselo) {
2293 if (irt_isfp(ir->t))
2294 asm_conv_fp_int64(as, ir);
2295 else
2296 asm_conv_int64_fp(as, ir);
2297 }
2298 as->curins--; /* Always skip the CONV. */ 2287 as->curins--; /* Always skip the CONV. */
2288 if (usehi || uselo)
2289 asm_conv64(as, ir);
2299 return; 2290 return;
2300 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ 2291 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
2301 asm_comp_int64(as, ir); 2292 asm_comp_int64(as, ir);
@@ -2344,6 +2335,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
2344#endif 2335#endif
2345} 2336}
2346 2337
2338/* -- Profiling ----------------------------------------------------------- */
2339
2340static void asm_prof(ASMState *as, IRIns *ir)
2341{
2342 UNUSED(ir);
2343 asm_guardcc(as, CC_NE);
2344 emit_i8(as, HOOK_PROFILE);
2345 emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask);
2346}
2347
2347/* -- Stack handling ------------------------------------------------------ */ 2348/* -- Stack handling ------------------------------------------------------ */
2348 2349
2349/* Check Lua stack size for overflow. Use exit handler as fallback. */ 2350/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -2365,7 +2366,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
2365 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, 2366 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
2366 ptr2addr(&J2G(as->J)->jit_base)); 2367 ptr2addr(&J2G(as->J)->jit_base));
2367 emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); 2368 emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
2368 emit_getgl(as, r, jit_L); 2369 emit_getgl(as, r, cur_L);
2369 if (allow == RSET_EMPTY) /* Spill temp. register. */ 2370 if (allow == RSET_EMPTY) /* Spill temp. register. */
2370 emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); 2371 emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
2371} 2372}
@@ -2593,163 +2594,6 @@ static void asm_tail_prep(ASMState *as)
2593 } 2594 }
2594} 2595}
2595 2596
2596/* -- Instruction dispatch ------------------------------------------------ */
2597
2598/* Assemble a single instruction. */
2599static void asm_ir(ASMState *as, IRIns *ir)
2600{
2601 switch ((IROp)ir->o) {
2602 /* Miscellaneous ops. */
2603 case IR_LOOP: asm_loop(as); break;
2604 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2605 case IR_USE:
2606 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2607 case IR_PHI: asm_phi(as, ir); break;
2608 case IR_HIOP: asm_hiop(as, ir); break;
2609 case IR_GCSTEP: asm_gcstep(as, ir); break;
2610
2611 /* Guarded assertions. */
2612 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2613 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2614 case IR_EQ: case IR_NE: case IR_ABC:
2615 asm_comp(as, ir, asm_compmap[ir->o]);
2616 break;
2617
2618 case IR_RETF: asm_retf(as, ir); break;
2619
2620 /* Bit ops. */
2621 case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break;
2622 case IR_BSWAP: asm_bitswap(as, ir); break;
2623
2624 case IR_BAND: asm_intarith(as, ir, XOg_AND); break;
2625 case IR_BOR: asm_intarith(as, ir, XOg_OR); break;
2626 case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break;
2627
2628 case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break;
2629 case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break;
2630 case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break;
2631 case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break;
2632 case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break;
2633
2634 /* Arithmetic ops. */
2635 case IR_ADD: asm_add(as, ir); break;
2636 case IR_SUB:
2637 if (irt_isnum(ir->t))
2638 asm_fparith(as, ir, XO_SUBSD);
2639 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
2640 asm_intarith(as, ir, XOg_SUB);
2641 break;
2642 case IR_MUL:
2643 if (irt_isnum(ir->t))
2644 asm_fparith(as, ir, XO_MULSD);
2645 else
2646 asm_intarith(as, ir, XOg_X_IMUL);
2647 break;
2648 case IR_DIV:
2649#if LJ_64 && LJ_HASFFI
2650 if (!irt_isnum(ir->t))
2651 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
2652 IRCALL_lj_carith_divu64);
2653 else
2654#endif
2655 asm_fparith(as, ir, XO_DIVSD);
2656 break;
2657 case IR_MOD:
2658#if LJ_64 && LJ_HASFFI
2659 if (!irt_isint(ir->t))
2660 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
2661 IRCALL_lj_carith_modu64);
2662 else
2663#endif
2664 asm_intmod(as, ir);
2665 break;
2666
2667 case IR_NEG:
2668 if (irt_isnum(ir->t))
2669 asm_fparith(as, ir, XO_XORPS);
2670 else
2671 asm_neg_not(as, ir, XOg_NEG);
2672 break;
2673 case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break;
2674
2675 case IR_MIN:
2676 if (irt_isnum(ir->t))
2677 asm_fparith(as, ir, XO_MINSD);
2678 else
2679 asm_min_max(as, ir, CC_G);
2680 break;
2681 case IR_MAX:
2682 if (irt_isnum(ir->t))
2683 asm_fparith(as, ir, XO_MAXSD);
2684 else
2685 asm_min_max(as, ir, CC_L);
2686 break;
2687
2688 case IR_FPMATH: case IR_ATAN2: case IR_LDEXP:
2689 asm_fpmath(as, ir);
2690 break;
2691 case IR_POW:
2692#if LJ_64 && LJ_HASFFI
2693 if (!irt_isnum(ir->t))
2694 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
2695 IRCALL_lj_carith_powu64);
2696 else
2697#endif
2698 asm_fppowi(as, ir);
2699 break;
2700
2701 /* Overflow-checking arithmetic ops. Note: don't use LEA here! */
2702 case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break;
2703 case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break;
2704 case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break;
2705
2706 /* Memory references. */
2707 case IR_AREF: asm_aref(as, ir); break;
2708 case IR_HREF: asm_href(as, ir); break;
2709 case IR_HREFK: asm_hrefk(as, ir); break;
2710 case IR_NEWREF: asm_newref(as, ir); break;
2711 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2712 case IR_FREF: asm_fref(as, ir); break;
2713 case IR_STRREF: asm_strref(as, ir); break;
2714
2715 /* Loads and stores. */
2716 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2717 asm_ahuvload(as, ir);
2718 break;
2719 case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
2720 case IR_SLOAD: asm_sload(as, ir); break;
2721
2722 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2723 case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break;
2724
2725 /* Allocations. */
2726 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2727 case IR_TNEW: asm_tnew(as, ir); break;
2728 case IR_TDUP: asm_tdup(as, ir); break;
2729 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2730
2731 /* Write barriers. */
2732 case IR_TBAR: asm_tbar(as, ir); break;
2733 case IR_OBAR: asm_obar(as, ir); break;
2734
2735 /* Type conversions. */
2736 case IR_TOBIT: asm_tobit(as, ir); break;
2737 case IR_CONV: asm_conv(as, ir); break;
2738 case IR_TOSTR: asm_tostr(as, ir); break;
2739 case IR_STRTO: asm_strto(as, ir); break;
2740
2741 /* Calls. */
2742 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2743 case IR_CALLXS: asm_callx(as, ir); break;
2744 case IR_CARG: break;
2745
2746 default:
2747 setintV(&as->J->errinfo, ir->o);
2748 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2749 break;
2750 }
2751}
2752
2753/* -- Trace setup --------------------------------------------------------- */ 2597/* -- Trace setup --------------------------------------------------------- */
2754 2598
2755/* Ensure there are enough stack slots for call arguments. */ 2599/* Ensure there are enough stack slots for call arguments. */