aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm_x86.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_asm_x86.h')
-rw-r--r--src/lj_asm_x86.h526
1 files changed, 198 insertions, 328 deletions
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index fda911e5..3e87ba18 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -384,7 +384,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64)
384/* Count the required number of stack slots for a call. */ 384/* Count the required number of stack slots for a call. */
385static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) 385static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
386{ 386{
387 uint32_t i, nargs = CCI_NARGS(ci); 387 uint32_t i, nargs = CCI_XNARGS(ci);
388 int nslots = 0; 388 int nslots = 0;
389#if LJ_64 389#if LJ_64
390 if (LJ_ABI_WIN) { 390 if (LJ_ABI_WIN) {
@@ -417,7 +417,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
417/* Generate a call to a C function. */ 417/* Generate a call to a C function. */
418static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 418static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
419{ 419{
420 uint32_t n, nargs = CCI_NARGS(ci); 420 uint32_t n, nargs = CCI_XNARGS(ci);
421 int32_t ofs = STACKARG_OFS; 421 int32_t ofs = STACKARG_OFS;
422#if LJ_64 422#if LJ_64
423 uint32_t gprs = REGARG_GPRS; 423 uint32_t gprs = REGARG_GPRS;
@@ -552,7 +552,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
552 if (ra_hasreg(dest)) { 552 if (ra_hasreg(dest)) {
553 ra_free(as, dest); 553 ra_free(as, dest);
554 ra_modified(as, dest); 554 ra_modified(as, dest);
555 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 555 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS,
556 dest, RID_ESP, ofs); 556 dest, RID_ESP, ofs);
557 } 557 }
558 if ((ci->flags & CCI_CASTU64)) { 558 if ((ci->flags & CCI_CASTU64)) {
@@ -576,15 +576,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
576 } 576 }
577} 577}
578 578
579static void asm_call(ASMState *as, IRIns *ir)
580{
581 IRRef args[CCI_NARGS_MAX];
582 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
583 asm_collectargs(as, ir, ci, args);
584 asm_setupresult(as, ir, ci);
585 asm_gencall(as, ci, args);
586}
587
588/* Return a constant function pointer or NULL for indirect calls. */ 579/* Return a constant function pointer or NULL for indirect calls. */
589static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) 580static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
590{ 581{
@@ -664,8 +655,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
664 asm_guardcc(as, CC_NE); 655 asm_guardcc(as, CC_NE);
665 emit_rr(as, XO_UCOMISD, left, tmp); 656 emit_rr(as, XO_UCOMISD, left, tmp);
666 emit_rr(as, XO_CVTSI2SD, tmp, dest); 657 emit_rr(as, XO_CVTSI2SD, tmp, dest);
667 if (!(as->flags & JIT_F_SPLIT_XMM)) 658 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
668 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
669 emit_rr(as, XO_CVTTSD2SI, dest, left); 659 emit_rr(as, XO_CVTTSD2SI, dest, left);
670 /* Can't fuse since left is needed twice. */ 660 /* Can't fuse since left is needed twice. */
671} 661}
@@ -721,8 +711,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
721 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, 711 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
722 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); 712 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
723 } 713 }
724 if (!(as->flags & JIT_F_SPLIT_XMM)) 714 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
725 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
726 } else if (stfp) { /* FP to integer conversion. */ 715 } else if (stfp) { /* FP to integer conversion. */
727 if (irt_isguard(ir->t)) { 716 if (irt_isguard(ir->t)) {
728 /* Checked conversions are only supported from number to int. */ 717 /* Checked conversions are only supported from number to int. */
@@ -730,9 +719,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
730 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 719 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
731 } else { 720 } else {
732 Reg dest = ra_dest(as, ir, RSET_GPR); 721 Reg dest = ra_dest(as, ir, RSET_GPR);
733 x86Op op = st == IRT_NUM ? 722 x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
734 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) :
735 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI);
736 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { 723 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
737 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ 724 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
738 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ 725 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
@@ -826,8 +813,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
826 if (ra_hasreg(dest)) { 813 if (ra_hasreg(dest)) {
827 ra_free(as, dest); 814 ra_free(as, dest);
828 ra_modified(as, dest); 815 ra_modified(as, dest);
829 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 816 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs);
830 dest, RID_ESP, ofs);
831 } 817 }
832 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, 818 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
833 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); 819 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
@@ -855,7 +841,6 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
855 Reg lo, hi; 841 Reg lo, hi;
856 lua_assert(st == IRT_NUM || st == IRT_FLOAT); 842 lua_assert(st == IRT_NUM || st == IRT_FLOAT);
857 lua_assert(dt == IRT_I64 || dt == IRT_U64); 843 lua_assert(dt == IRT_I64 || dt == IRT_U64);
858 lua_assert(((ir-1)->op2 & IRCONV_TRUNC));
859 hi = ra_dest(as, ir, RSET_GPR); 844 hi = ra_dest(as, ir, RSET_GPR);
860 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); 845 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
861 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); 846 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
@@ -898,6 +883,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
898 st == IRT_NUM ? XOg_FLDq: XOg_FLDd, 883 st == IRT_NUM ? XOg_FLDq: XOg_FLDd,
899 asm_fuseload(as, ir->op1, RSET_EMPTY)); 884 asm_fuseload(as, ir->op1, RSET_EMPTY));
900} 885}
886
887static void asm_conv64(ASMState *as, IRIns *ir)
888{
889 if (irt_isfp(ir->t))
890 asm_conv_fp_int64(as, ir);
891 else
892 asm_conv_int64_fp(as, ir);
893}
901#endif 894#endif
902 895
903static void asm_strto(ASMState *as, IRIns *ir) 896static void asm_strto(ASMState *as, IRIns *ir)
@@ -919,29 +912,32 @@ static void asm_strto(ASMState *as, IRIns *ir)
919 RID_ESP, sps_scale(ir->s)); 912 RID_ESP, sps_scale(ir->s));
920} 913}
921 914
922static void asm_tostr(ASMState *as, IRIns *ir) 915/* -- Memory references --------------------------------------------------- */
916
917/* Get pointer to TValue. */
918static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
923{ 919{
924 IRIns *irl = IR(ir->op1); 920 IRIns *ir = IR(ref);
925 IRRef args[2]; 921 if (irt_isnum(ir->t)) {
926 args[0] = ASMREF_L; 922 /* For numbers use the constant itself or a spill slot as a TValue. */
927 as->gcsteps++; 923 if (irref_isk(ref))
928 if (irt_isnum(irl->t)) { 924 emit_loada(as, dest, ir_knum(ir));
929 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; 925 else
930 args[1] = ASMREF_TMP1; /* const lua_Number * */ 926 emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
931 asm_setupresult(as, ir, ci); /* GCstr * */
932 asm_gencall(as, ci, args);
933 emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
934 RID_ESP, ra_spill(as, irl));
935 } else { 927 } else {
936 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; 928 /* Otherwise use g->tmptv to hold the TValue. */
937 args[1] = ir->op1; /* int32_t k */ 929 if (!irref_isk(ref)) {
938 asm_setupresult(as, ir, ci); /* GCstr * */ 930 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
939 asm_gencall(as, ci, args); 931 emit_movtomro(as, REX_64IR(ir, src), dest, 0);
932 } else if (!irt_ispri(ir->t)) {
933 emit_movmroi(as, dest, 0, ir->i);
934 }
935 if (!(LJ_64 && irt_islightud(ir->t)))
936 emit_movmroi(as, dest, 4, irt_toitype(ir->t));
937 emit_loada(as, dest, &J2G(as->J)->tmptv);
940 } 938 }
941} 939}
942 940
943/* -- Memory references --------------------------------------------------- */
944
945static void asm_aref(ASMState *as, IRIns *ir) 941static void asm_aref(ASMState *as, IRIns *ir)
946{ 942{
947 Reg dest = ra_dest(as, ir, RSET_GPR); 943 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -952,23 +948,6 @@ static void asm_aref(ASMState *as, IRIns *ir)
952 emit_rr(as, XO_MOV, dest, as->mrm.base); 948 emit_rr(as, XO_MOV, dest, as->mrm.base);
953} 949}
954 950
955/* Merge NE(HREF, niltv) check. */
956static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
957{
958 /* Assumes nothing else generates NE of HREF. */
959 if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins &&
960 ra_hasreg(ir->r)) {
961 MCode *p = as->mcp;
962 p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6;
963 /* Ensure no loop branch inversion happened. */
964 if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) {
965 as->mcp = p; /* Kill cmp reg, imm32 + jz exit. */
966 return p + *(int32_t *)(p-4); /* Return exit address. */
967 }
968 }
969 return NULL;
970}
971
972/* Inlined hash lookup. Specialized for key type and for const keys. 951/* Inlined hash lookup. Specialized for key type and for const keys.
973** The equivalent C code is: 952** The equivalent C code is:
974** Node *n = hashkey(t, key); 953** Node *n = hashkey(t, key);
@@ -977,10 +956,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
977** } while ((n = nextnode(n))); 956** } while ((n = nextnode(n)));
978** return niltv(L); 957** return niltv(L);
979*/ 958*/
980static void asm_href(ASMState *as, IRIns *ir) 959static void asm_href(ASMState *as, IRIns *ir, IROp merge)
981{ 960{
982 MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */
983 RegSet allow = RSET_GPR; 961 RegSet allow = RSET_GPR;
962 int destused = ra_used(ir);
984 Reg dest = ra_dest(as, ir, allow); 963 Reg dest = ra_dest(as, ir, allow);
985 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); 964 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
986 Reg key = RID_NONE, tmp = RID_NONE; 965 Reg key = RID_NONE, tmp = RID_NONE;
@@ -997,14 +976,12 @@ static void asm_href(ASMState *as, IRIns *ir)
997 tmp = ra_scratch(as, rset_exclude(allow, key)); 976 tmp = ra_scratch(as, rset_exclude(allow, key));
998 } 977 }
999 978
1000 /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */ 979 /* Key not found in chain: jump to exit (if merged) or load niltv. */
1001 l_end = emit_label(as); 980 l_end = emit_label(as);
1002 if (nilexit && ir[1].o == IR_NE) { 981 if (merge == IR_NE)
1003 emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */ 982 asm_guardcc(as, CC_E); /* XI_JMP is not found by lj_asm_patchexit. */
1004 nilexit = NULL; 983 else if (destused)
1005 } else {
1006 emit_loada(as, dest, niltvg(J2G(as->J))); 984 emit_loada(as, dest, niltvg(J2G(as->J)));
1007 }
1008 985
1009 /* Follow hash chain until the end. */ 986 /* Follow hash chain until the end. */
1010 l_loop = emit_sjcc_label(as, CC_NZ); 987 l_loop = emit_sjcc_label(as, CC_NZ);
@@ -1013,8 +990,8 @@ static void asm_href(ASMState *as, IRIns *ir)
1013 l_next = emit_label(as); 990 l_next = emit_label(as);
1014 991
1015 /* Type and value comparison. */ 992 /* Type and value comparison. */
1016 if (nilexit) 993 if (merge == IR_EQ)
1017 emit_jcc(as, CC_E, nilexit); 994 asm_guardcc(as, CC_E);
1018 else 995 else
1019 emit_sjcc(as, CC_E, l_end); 996 emit_sjcc(as, CC_E, l_end);
1020 if (irt_isnum(kt)) { 997 if (irt_isnum(kt)) {
@@ -1170,41 +1147,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1170#endif 1147#endif
1171} 1148}
1172 1149
1173static void asm_newref(ASMState *as, IRIns *ir)
1174{
1175 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1176 IRRef args[3];
1177 IRIns *irkey;
1178 Reg tmp;
1179 if (ir->r == RID_SINK)
1180 return;
1181 args[0] = ASMREF_L; /* lua_State *L */
1182 args[1] = ir->op1; /* GCtab *t */
1183 args[2] = ASMREF_TMP1; /* cTValue *key */
1184 asm_setupresult(as, ir, ci); /* TValue * */
1185 asm_gencall(as, ci, args);
1186 tmp = ra_releasetmp(as, ASMREF_TMP1);
1187 irkey = IR(ir->op2);
1188 if (irt_isnum(irkey->t)) {
1189 /* For numbers use the constant itself or a spill slot as a TValue. */
1190 if (irref_isk(ir->op2))
1191 emit_loada(as, tmp, ir_knum(irkey));
1192 else
1193 emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey));
1194 } else {
1195 /* Otherwise use g->tmptv to hold the TValue. */
1196 if (!irref_isk(ir->op2)) {
1197 Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
1198 emit_movtomro(as, REX_64IR(irkey, src), tmp, 0);
1199 } else if (!irt_ispri(irkey->t)) {
1200 emit_movmroi(as, tmp, 0, irkey->i);
1201 }
1202 if (!(LJ_64 && irt_islightud(irkey->t)))
1203 emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
1204 emit_loada(as, tmp, &J2G(as->J)->tmptv);
1205 }
1206}
1207
1208static void asm_uref(ASMState *as, IRIns *ir) 1150static void asm_uref(ASMState *as, IRIns *ir)
1209{ 1151{
1210 /* NYI: Check that UREFO is still open and not aliasing a slot. */ 1152 /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -1264,7 +1206,7 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1264 case IRT_U8: xo = XO_MOVZXb; break; 1206 case IRT_U8: xo = XO_MOVZXb; break;
1265 case IRT_I16: xo = XO_MOVSXw; break; 1207 case IRT_I16: xo = XO_MOVSXw; break;
1266 case IRT_U16: xo = XO_MOVZXw; break; 1208 case IRT_U16: xo = XO_MOVZXw; break;
1267 case IRT_NUM: xo = XMM_MOVRM(as); break; 1209 case IRT_NUM: xo = XO_MOVSD; break;
1268 case IRT_FLOAT: xo = XO_MOVSS; break; 1210 case IRT_FLOAT: xo = XO_MOVSS; break;
1269 default: 1211 default:
1270 if (LJ_64 && irt_is64(ir->t)) 1212 if (LJ_64 && irt_is64(ir->t))
@@ -1277,6 +1219,9 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1277 emit_mrm(as, xo, dest, RID_MRM); 1219 emit_mrm(as, xo, dest, RID_MRM);
1278} 1220}
1279 1221
1222#define asm_fload(as, ir) asm_fxload(as, ir)
1223#define asm_xload(as, ir) asm_fxload(as, ir)
1224
1280static void asm_fxstore(ASMState *as, IRIns *ir) 1225static void asm_fxstore(ASMState *as, IRIns *ir)
1281{ 1226{
1282 RegSet allow = RSET_GPR; 1227 RegSet allow = RSET_GPR;
@@ -1340,6 +1285,9 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1340 } 1285 }
1341} 1286}
1342 1287
1288#define asm_fstore(as, ir) asm_fxstore(as, ir)
1289#define asm_xstore(as, ir) asm_fxstore(as, ir)
1290
1343#if LJ_64 1291#if LJ_64
1344static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) 1292static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1345{ 1293{
@@ -1378,7 +1326,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1378 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; 1326 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1379 Reg dest = ra_dest(as, ir, allow); 1327 Reg dest = ra_dest(as, ir, allow);
1380 asm_fuseahuref(as, ir->op1, RSET_GPR); 1328 asm_fuseahuref(as, ir->op1, RSET_GPR);
1381 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); 1329 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
1382 } else { 1330 } else {
1383 asm_fuseahuref(as, ir->op1, RSET_GPR); 1331 asm_fuseahuref(as, ir->op1, RSET_GPR);
1384 } 1332 }
@@ -1444,7 +1392,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1444 Reg left = ra_scratch(as, RSET_FPR); 1392 Reg left = ra_scratch(as, RSET_FPR);
1445 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ 1393 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
1446 base = ra_alloc1(as, REF_BASE, RSET_GPR); 1394 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1447 emit_rmro(as, XMM_MOVRM(as), left, base, ofs); 1395 emit_rmro(as, XO_MOVSD, left, base, ofs);
1448 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1396 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1449#if LJ_64 1397#if LJ_64
1450 } else if (irt_islightud(t)) { 1398 } else if (irt_islightud(t)) {
@@ -1462,11 +1410,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
1462 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1410 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
1463 if ((ir->op2 & IRSLOAD_CONVERT)) { 1411 if ((ir->op2 & IRSLOAD_CONVERT)) {
1464 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ 1412 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
1465 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); 1413 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
1466 } else if (irt_isnum(t)) {
1467 emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
1468 } else { 1414 } else {
1469 emit_rmro(as, XO_MOV, dest, base, ofs); 1415 emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
1470 } 1416 }
1471 } else { 1417 } else {
1472 if (!(ir->op2 & IRSLOAD_TYPECHECK)) 1418 if (!(ir->op2 & IRSLOAD_TYPECHECK))
@@ -1493,15 +1439,13 @@ static void asm_sload(ASMState *as, IRIns *ir)
1493static void asm_cnew(ASMState *as, IRIns *ir) 1439static void asm_cnew(ASMState *as, IRIns *ir)
1494{ 1440{
1495 CTState *cts = ctype_ctsG(J2G(as->J)); 1441 CTState *cts = ctype_ctsG(J2G(as->J));
1496 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1442 CTypeID id = (CTypeID)IR(ir->op1)->i;
1497 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1443 CTSize sz;
1498 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1444 CTInfo info = lj_ctype_info(cts, id, &sz);
1499 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1445 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1500 IRRef args[2]; 1446 IRRef args[4];
1501 lua_assert(sz != CTSIZE_INVALID); 1447 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1502 1448
1503 args[0] = ASMREF_L; /* lua_State *L */
1504 args[1] = ASMREF_TMP1; /* MSize size */
1505 as->gcsteps++; 1449 as->gcsteps++;
1506 asm_setupresult(as, ir, ci); /* GCcdata * */ 1450 asm_setupresult(as, ir, ci); /* GCcdata * */
1507 1451
@@ -1544,15 +1488,26 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1544 } while (1); 1488 } while (1);
1545#endif 1489#endif
1546 lua_assert(sz == 4 || sz == 8); 1490 lua_assert(sz == 4 || sz == 8);
1491 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1492 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1493 args[0] = ASMREF_L; /* lua_State *L */
1494 args[1] = ir->op1; /* CTypeID id */
1495 args[2] = ir->op2; /* CTSize sz */
1496 args[3] = ASMREF_TMP1; /* CTSize align */
1497 asm_gencall(as, ci, args);
1498 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1499 return;
1547 } 1500 }
1548 1501
1549 /* Combine initialization of marked, gct and ctypeid. */ 1502 /* Combine initialization of marked, gct and ctypeid. */
1550 emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); 1503 emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
1551 emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, 1504 emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
1552 (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16))); 1505 (int32_t)((~LJ_TCDATA<<8)+(id<<16)));
1553 emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); 1506 emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES);
1554 emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); 1507 emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite);
1555 1508
1509 args[0] = ASMREF_L; /* lua_State *L */
1510 args[1] = ASMREF_TMP1; /* MSize size */
1556 asm_gencall(as, ci, args); 1511 asm_gencall(as, ci, args);
1557 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); 1512 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
1558} 1513}
@@ -1630,31 +1585,21 @@ static void asm_x87load(ASMState *as, IRRef ref)
1630 } 1585 }
1631} 1586}
1632 1587
1633/* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */ 1588static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
1634static int fpmjoin_pow(ASMState *as, IRIns *ir)
1635{ 1589{
1636 IRIns *irp = IR(ir->op1); 1590 /* The modified regs must match with the *.dasc implementation. */
1637 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1591 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1638 IRIns *irpp = IR(irp->op1); 1592 IRIns *irx;
1639 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1593 if (ra_hasreg(ir->r))
1640 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1594 rset_clear(drop, ir->r); /* Dest reg handled below. */
1641 /* The modified regs must match with the *.dasc implementation. */ 1595 ra_evictset(as, drop);
1642 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); 1596 ra_destreg(as, ir, RID_XMM0);
1643 IRIns *irx; 1597 emit_call(as, lj_vm_pow_sse);
1644 if (ra_hasreg(ir->r)) 1598 irx = IR(lref);
1645 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1599 if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
1646 ra_evictset(as, drop); 1600 irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */
1647 ra_destreg(as, ir, RID_XMM0); 1601 ra_left(as, RID_XMM0, lref);
1648 emit_call(as, lj_vm_pow_sse); 1602 ra_left(as, RID_XMM1, rref);
1649 irx = IR(irpp->op1);
1650 if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
1651 irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */
1652 ra_left(as, RID_XMM0, irpp->op1);
1653 ra_left(as, RID_XMM1, irp->op2);
1654 return 1;
1655 }
1656 }
1657 return 0;
1658} 1603}
1659 1604
1660static void asm_fpmath(ASMState *as, IRIns *ir) 1605static void asm_fpmath(ASMState *as, IRIns *ir)
@@ -1690,7 +1635,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1690 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); 1635 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
1691 ra_left(as, RID_XMM0, ir->op1); 1636 ra_left(as, RID_XMM0, ir->op1);
1692 } 1637 }
1693 } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { 1638 } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
1694 /* Rejoined to pow(). */ 1639 /* Rejoined to pow(). */
1695 } else { /* Handle x87 ops. */ 1640 } else { /* Handle x87 ops. */
1696 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ 1641 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
@@ -1698,7 +1643,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1698 if (ra_hasreg(dest)) { 1643 if (ra_hasreg(dest)) {
1699 ra_free(as, dest); 1644 ra_free(as, dest);
1700 ra_modified(as, dest); 1645 ra_modified(as, dest);
1701 emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); 1646 emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
1702 } 1647 }
1703 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); 1648 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
1704 switch (fpm) { /* st0 = lj_vm_*(st0) */ 1649 switch (fpm) { /* st0 = lj_vm_*(st0) */
@@ -1737,6 +1682,9 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1737 } 1682 }
1738} 1683}
1739 1684
1685#define asm_atan2(as, ir) asm_fpmath(as, ir)
1686#define asm_ldexp(as, ir) asm_fpmath(as, ir)
1687
1740static void asm_fppowi(ASMState *as, IRIns *ir) 1688static void asm_fppowi(ASMState *as, IRIns *ir)
1741{ 1689{
1742 /* The modified regs must match with the *.dasc implementation. */ 1690 /* The modified regs must match with the *.dasc implementation. */
@@ -1750,26 +1698,15 @@ static void asm_fppowi(ASMState *as, IRIns *ir)
1750 ra_left(as, RID_EAX, ir->op2); 1698 ra_left(as, RID_EAX, ir->op2);
1751} 1699}
1752 1700
1753#if LJ_64 && LJ_HASFFI 1701static void asm_pow(ASMState *as, IRIns *ir)
1754static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id)
1755{ 1702{
1756 const CCallInfo *ci = &lj_ir_callinfo[id]; 1703#if LJ_64 && LJ_HASFFI
1757 IRRef args[2]; 1704 if (!irt_isnum(ir->t))
1758 args[0] = ir->op1; 1705 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1759 args[1] = ir->op2; 1706 IRCALL_lj_carith_powu64);
1760 asm_setupresult(as, ir, ci); 1707 else
1761 asm_gencall(as, ci, args);
1762}
1763#endif 1708#endif
1764 1709 asm_fppowi(as, ir);
1765static void asm_intmod(ASMState *as, IRIns *ir)
1766{
1767 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi];
1768 IRRef args[2];
1769 args[0] = ir->op1;
1770 args[1] = ir->op2;
1771 asm_setupresult(as, ir, ci);
1772 asm_gencall(as, ci, args);
1773} 1710}
1774 1711
1775static int asm_swapops(ASMState *as, IRIns *ir) 1712static int asm_swapops(ASMState *as, IRIns *ir)
@@ -1948,6 +1885,44 @@ static void asm_add(ASMState *as, IRIns *ir)
1948 asm_intarith(as, ir, XOg_ADD); 1885 asm_intarith(as, ir, XOg_ADD);
1949} 1886}
1950 1887
1888static void asm_sub(ASMState *as, IRIns *ir)
1889{
1890 if (irt_isnum(ir->t))
1891 asm_fparith(as, ir, XO_SUBSD);
1892 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
1893 asm_intarith(as, ir, XOg_SUB);
1894}
1895
1896static void asm_mul(ASMState *as, IRIns *ir)
1897{
1898 if (irt_isnum(ir->t))
1899 asm_fparith(as, ir, XO_MULSD);
1900 else
1901 asm_intarith(as, ir, XOg_X_IMUL);
1902}
1903
1904static void asm_div(ASMState *as, IRIns *ir)
1905{
1906#if LJ_64 && LJ_HASFFI
1907 if (!irt_isnum(ir->t))
1908 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1909 IRCALL_lj_carith_divu64);
1910 else
1911#endif
1912 asm_fparith(as, ir, XO_DIVSD);
1913}
1914
1915static void asm_mod(ASMState *as, IRIns *ir)
1916{
1917#if LJ_64 && LJ_HASFFI
1918 if (!irt_isint(ir->t))
1919 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1920 IRCALL_lj_carith_modu64);
1921 else
1922#endif
1923 asm_callid(as, ir, IRCALL_lj_vm_modi);
1924}
1925
1951static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) 1926static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1952{ 1927{
1953 Reg dest = ra_dest(as, ir, RSET_GPR); 1928 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1955,7 +1930,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1955 ra_left(as, dest, ir->op1); 1930 ra_left(as, dest, ir->op1);
1956} 1931}
1957 1932
1958static void asm_min_max(ASMState *as, IRIns *ir, int cc) 1933static void asm_neg(ASMState *as, IRIns *ir)
1934{
1935 if (irt_isnum(ir->t))
1936 asm_fparith(as, ir, XO_XORPS);
1937 else
1938 asm_neg_not(as, ir, XOg_NEG);
1939}
1940
1941#define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS)
1942
1943static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1959{ 1944{
1960 Reg right, dest = ra_dest(as, ir, RSET_GPR); 1945 Reg right, dest = ra_dest(as, ir, RSET_GPR);
1961 IRRef lref = ir->op1, rref = ir->op2; 1946 IRRef lref = ir->op1, rref = ir->op2;
@@ -1966,7 +1951,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc)
1966 ra_left(as, dest, lref); 1951 ra_left(as, dest, lref);
1967} 1952}
1968 1953
1969static void asm_bitswap(ASMState *as, IRIns *ir) 1954static void asm_min(ASMState *as, IRIns *ir)
1955{
1956 if (irt_isnum(ir->t))
1957 asm_fparith(as, ir, XO_MINSD);
1958 else
1959 asm_intmin_max(as, ir, CC_G);
1960}
1961
1962static void asm_max(ASMState *as, IRIns *ir)
1963{
1964 if (irt_isnum(ir->t))
1965 asm_fparith(as, ir, XO_MAXSD);
1966 else
1967 asm_intmin_max(as, ir, CC_L);
1968}
1969
1970/* Note: don't use LEA for overflow-checking arithmetic! */
1971#define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD)
1972#define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB)
1973#define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL)
1974
1975#define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT)
1976
1977static void asm_bswap(ASMState *as, IRIns *ir)
1970{ 1978{
1971 Reg dest = ra_dest(as, ir, RSET_GPR); 1979 Reg dest = ra_dest(as, ir, RSET_GPR);
1972 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), 1980 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
@@ -1974,6 +1982,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1974 ra_left(as, dest, ir->op1); 1982 ra_left(as, dest, ir->op1);
1975} 1983}
1976 1984
1985#define asm_band(as, ir) asm_intarith(as, ir, XOg_AND)
1986#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
1987#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
1988
1977static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) 1989static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
1978{ 1990{
1979 IRRef rref = ir->op2; 1991 IRRef rref = ir->op2;
@@ -2013,6 +2025,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
2013 */ 2025 */
2014} 2026}
2015 2027
2028#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL)
2029#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR)
2030#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR)
2031#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL)
2032#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR)
2033
2016/* -- Comparisons --------------------------------------------------------- */ 2034/* -- Comparisons --------------------------------------------------------- */
2017 2035
2018/* Virtual flags for unordered FP comparisons. */ 2036/* Virtual flags for unordered FP comparisons. */
@@ -2039,8 +2057,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = {
2039}; 2057};
2040 2058
2041/* FP and integer comparisons. */ 2059/* FP and integer comparisons. */
2042static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) 2060static void asm_comp(ASMState *as, IRIns *ir)
2043{ 2061{
2062 uint32_t cc = asm_compmap[ir->o];
2044 if (irt_isnum(ir->t)) { 2063 if (irt_isnum(ir->t)) {
2045 IRRef lref = ir->op1; 2064 IRRef lref = ir->op1;
2046 IRRef rref = ir->op2; 2065 IRRef rref = ir->op2;
@@ -2195,6 +2214,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
2195 } 2214 }
2196} 2215}
2197 2216
2217#define asm_equal(as, ir) asm_comp(as, ir)
2218
2198#if LJ_32 && LJ_HASFFI 2219#if LJ_32 && LJ_HASFFI
2199/* 64 bit integer comparisons in 32 bit mode. */ 2220/* 64 bit integer comparisons in 32 bit mode. */
2200static void asm_comp_int64(ASMState *as, IRIns *ir) 2221static void asm_comp_int64(ASMState *as, IRIns *ir)
@@ -2277,13 +2298,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
2277 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 2298 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
2278 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 2299 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
2279 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ 2300 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
2280 if (usehi || uselo) {
2281 if (irt_isfp(ir->t))
2282 asm_conv_fp_int64(as, ir);
2283 else
2284 asm_conv_int64_fp(as, ir);
2285 }
2286 as->curins--; /* Always skip the CONV. */ 2301 as->curins--; /* Always skip the CONV. */
2302 if (usehi || uselo)
2303 asm_conv64(as, ir);
2287 return; 2304 return;
2288 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ 2305 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
2289 asm_comp_int64(as, ir); 2306 asm_comp_int64(as, ir);
@@ -2332,6 +2349,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
2332#endif 2349#endif
2333} 2350}
2334 2351
2352/* -- Profiling ----------------------------------------------------------- */
2353
2354static void asm_prof(ASMState *as, IRIns *ir)
2355{
2356 UNUSED(ir);
2357 asm_guardcc(as, CC_NE);
2358 emit_i8(as, HOOK_PROFILE);
2359 emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask);
2360}
2361
2335/* -- Stack handling ------------------------------------------------------ */ 2362/* -- Stack handling ------------------------------------------------------ */
2336 2363
2337/* Check Lua stack size for overflow. Use exit handler as fallback. */ 2364/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -2353,7 +2380,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
2353 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, 2380 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
2354 ptr2addr(&J2G(as->J)->jit_base)); 2381 ptr2addr(&J2G(as->J)->jit_base));
2355 emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); 2382 emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
2356 emit_getgl(as, r, jit_L); 2383 emit_getgl(as, r, cur_L);
2357 if (allow == RSET_EMPTY) /* Spill temp. register. */ 2384 if (allow == RSET_EMPTY) /* Spill temp. register. */
2358 emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); 2385 emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
2359} 2386}
@@ -2581,163 +2608,6 @@ static void asm_tail_prep(ASMState *as)
2581 } 2608 }
2582} 2609}
2583 2610
2584/* -- Instruction dispatch ------------------------------------------------ */
2585
2586/* Assemble a single instruction. */
2587static void asm_ir(ASMState *as, IRIns *ir)
2588{
2589 switch ((IROp)ir->o) {
2590 /* Miscellaneous ops. */
2591 case IR_LOOP: asm_loop(as); break;
2592 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2593 case IR_USE:
2594 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2595 case IR_PHI: asm_phi(as, ir); break;
2596 case IR_HIOP: asm_hiop(as, ir); break;
2597 case IR_GCSTEP: asm_gcstep(as, ir); break;
2598
2599 /* Guarded assertions. */
2600 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2601 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2602 case IR_EQ: case IR_NE: case IR_ABC:
2603 asm_comp(as, ir, asm_compmap[ir->o]);
2604 break;
2605
2606 case IR_RETF: asm_retf(as, ir); break;
2607
2608 /* Bit ops. */
2609 case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break;
2610 case IR_BSWAP: asm_bitswap(as, ir); break;
2611
2612 case IR_BAND: asm_intarith(as, ir, XOg_AND); break;
2613 case IR_BOR: asm_intarith(as, ir, XOg_OR); break;
2614 case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break;
2615
2616 case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break;
2617 case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break;
2618 case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break;
2619 case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break;
2620 case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break;
2621
2622 /* Arithmetic ops. */
2623 case IR_ADD: asm_add(as, ir); break;
2624 case IR_SUB:
2625 if (irt_isnum(ir->t))
2626 asm_fparith(as, ir, XO_SUBSD);
2627 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
2628 asm_intarith(as, ir, XOg_SUB);
2629 break;
2630 case IR_MUL:
2631 if (irt_isnum(ir->t))
2632 asm_fparith(as, ir, XO_MULSD);
2633 else
2634 asm_intarith(as, ir, XOg_X_IMUL);
2635 break;
2636 case IR_DIV:
2637#if LJ_64 && LJ_HASFFI
2638 if (!irt_isnum(ir->t))
2639 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
2640 IRCALL_lj_carith_divu64);
2641 else
2642#endif
2643 asm_fparith(as, ir, XO_DIVSD);
2644 break;
2645 case IR_MOD:
2646#if LJ_64 && LJ_HASFFI
2647 if (!irt_isint(ir->t))
2648 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
2649 IRCALL_lj_carith_modu64);
2650 else
2651#endif
2652 asm_intmod(as, ir);
2653 break;
2654
2655 case IR_NEG:
2656 if (irt_isnum(ir->t))
2657 asm_fparith(as, ir, XO_XORPS);
2658 else
2659 asm_neg_not(as, ir, XOg_NEG);
2660 break;
2661 case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break;
2662
2663 case IR_MIN:
2664 if (irt_isnum(ir->t))
2665 asm_fparith(as, ir, XO_MINSD);
2666 else
2667 asm_min_max(as, ir, CC_G);
2668 break;
2669 case IR_MAX:
2670 if (irt_isnum(ir->t))
2671 asm_fparith(as, ir, XO_MAXSD);
2672 else
2673 asm_min_max(as, ir, CC_L);
2674 break;
2675
2676 case IR_FPMATH: case IR_ATAN2: case IR_LDEXP:
2677 asm_fpmath(as, ir);
2678 break;
2679 case IR_POW:
2680#if LJ_64 && LJ_HASFFI
2681 if (!irt_isnum(ir->t))
2682 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
2683 IRCALL_lj_carith_powu64);
2684 else
2685#endif
2686 asm_fppowi(as, ir);
2687 break;
2688
2689 /* Overflow-checking arithmetic ops. Note: don't use LEA here! */
2690 case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break;
2691 case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break;
2692 case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break;
2693
2694 /* Memory references. */
2695 case IR_AREF: asm_aref(as, ir); break;
2696 case IR_HREF: asm_href(as, ir); break;
2697 case IR_HREFK: asm_hrefk(as, ir); break;
2698 case IR_NEWREF: asm_newref(as, ir); break;
2699 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2700 case IR_FREF: asm_fref(as, ir); break;
2701 case IR_STRREF: asm_strref(as, ir); break;
2702
2703 /* Loads and stores. */
2704 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2705 asm_ahuvload(as, ir);
2706 break;
2707 case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
2708 case IR_SLOAD: asm_sload(as, ir); break;
2709
2710 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2711 case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break;
2712
2713 /* Allocations. */
2714 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2715 case IR_TNEW: asm_tnew(as, ir); break;
2716 case IR_TDUP: asm_tdup(as, ir); break;
2717 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2718
2719 /* Write barriers. */
2720 case IR_TBAR: asm_tbar(as, ir); break;
2721 case IR_OBAR: asm_obar(as, ir); break;
2722
2723 /* Type conversions. */
2724 case IR_TOBIT: asm_tobit(as, ir); break;
2725 case IR_CONV: asm_conv(as, ir); break;
2726 case IR_TOSTR: asm_tostr(as, ir); break;
2727 case IR_STRTO: asm_strto(as, ir); break;
2728
2729 /* Calls. */
2730 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2731 case IR_CALLXS: asm_callx(as, ir); break;
2732 case IR_CARG: break;
2733
2734 default:
2735 setintV(&as->J->errinfo, ir->o);
2736 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2737 break;
2738 }
2739}
2740
2741/* -- Trace setup --------------------------------------------------------- */ 2611/* -- Trace setup --------------------------------------------------------- */
2742 2612
2743/* Ensure there are enough stack slots for call arguments. */ 2613/* Ensure there are enough stack slots for call arguments. */