aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm_ppc.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lj_asm_ppc.h374
1 files changed, 110 insertions, 264 deletions
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index e8f3d08b..0ebed40f 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -251,7 +251,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
251/* Generate a call to a C function. */ 251/* Generate a call to a C function. */
252static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 252static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
253{ 253{
254 uint32_t n, nargs = CCI_NARGS(ci); 254 uint32_t n, nargs = CCI_XNARGS(ci);
255 int32_t ofs = 8; 255 int32_t ofs = 8;
256 Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; 256 Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
257 if ((void *)ci->func) 257 if ((void *)ci->func)
@@ -323,23 +323,16 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
323 } else { 323 } else {
324 ra_destreg(as, ir, RID_FPRET); 324 ra_destreg(as, ir, RID_FPRET);
325 } 325 }
326#if LJ_32
326 } else if (hiop) { 327 } else if (hiop) {
327 ra_destpair(as, ir); 328 ra_destpair(as, ir);
329#endif
328 } else { 330 } else {
329 ra_destreg(as, ir, RID_RET); 331 ra_destreg(as, ir, RID_RET);
330 } 332 }
331 } 333 }
332} 334}
333 335
334static void asm_call(ASMState *as, IRIns *ir)
335{
336 IRRef args[CCI_NARGS_MAX];
337 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
338 asm_collectargs(as, ir, ci, args);
339 asm_setupresult(as, ir, ci);
340 asm_gencall(as, ci, args);
341}
342
343static void asm_callx(ASMState *as, IRIns *ir) 336static void asm_callx(ASMState *as, IRIns *ir)
344{ 337{
345 IRRef args[CCI_NARGS_MAX*2]; 338 IRRef args[CCI_NARGS_MAX*2];
@@ -352,7 +345,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
352 func = ir->op2; irf = IR(func); 345 func = ir->op2; irf = IR(func);
353 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } 346 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
354 if (irref_isk(func)) { /* Call to constant address. */ 347 if (irref_isk(func)) { /* Call to constant address. */
355 ci.func = (ASMFunction)(void *)(irf->i); 348 ci.func = (ASMFunction)(void *)(intptr_t)(irf->i);
356 } else { /* Need a non-argument register for indirect calls. */ 349 } else { /* Need a non-argument register for indirect calls. */
357 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); 350 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
358 Reg freg = ra_alloc1(as, func, allow); 351 Reg freg = ra_alloc1(as, func, allow);
@@ -363,16 +356,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
363 asm_gencall(as, &ci, args); 356 asm_gencall(as, &ci, args);
364} 357}
365 358
366static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
367{
368 const CCallInfo *ci = &lj_ir_callinfo[id];
369 IRRef args[2];
370 args[0] = ir->op1;
371 args[1] = ir->op2;
372 asm_setupresult(as, ir, ci);
373 asm_gencall(as, ci, args);
374}
375
376/* -- Returns ------------------------------------------------------------- */ 359/* -- Returns ------------------------------------------------------------- */
377 360
378/* Return to lower frame. Guard that it goes to the right spot. */ 361/* Return to lower frame. Guard that it goes to the right spot. */
@@ -380,7 +363,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
380{ 363{
381 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 364 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
382 void *pc = ir_kptr(IR(ir->op2)); 365 void *pc = ir_kptr(IR(ir->op2));
383 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 366 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
384 as->topslot -= (BCReg)delta; 367 as->topslot -= (BCReg)delta;
385 if ((int32_t)as->topslot < 0) as->topslot = 0; 368 if ((int32_t)as->topslot < 0) as->topslot = 0;
386 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 369 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -513,28 +496,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
513 } 496 }
514} 497}
515 498
516#if LJ_HASFFI
517static void asm_conv64(ASMState *as, IRIns *ir)
518{
519 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
520 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
521 IRCallID id;
522 const CCallInfo *ci;
523 IRRef args[2];
524 args[0] = ir->op1;
525 args[1] = (ir-1)->op1;
526 if (st == IRT_NUM || st == IRT_FLOAT) {
527 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
528 ir--;
529 } else {
530 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
531 }
532 ci = &lj_ir_callinfo[id];
533 asm_setupresult(as, ir, ci);
534 asm_gencall(as, ci, args);
535}
536#endif
537
538static void asm_strto(ASMState *as, IRIns *ir) 499static void asm_strto(ASMState *as, IRIns *ir)
539{ 500{
540 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 501 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -553,6 +514,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
553 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); 514 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
554} 515}
555 516
517/* -- Memory references --------------------------------------------------- */
518
556/* Get pointer to TValue. */ 519/* Get pointer to TValue. */
557static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 520static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
558{ 521{
@@ -566,7 +529,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
566 /* Otherwise use g->tmptv to hold the TValue. */ 529 /* Otherwise use g->tmptv to hold the TValue. */
567 RegSet allow = rset_exclude(RSET_GPR, dest); 530 RegSet allow = rset_exclude(RSET_GPR, dest);
568 Reg type; 531 Reg type;
569 emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768); 532 emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768);
570 if (!irt_ispri(ir->t)) { 533 if (!irt_ispri(ir->t)) {
571 Reg src = ra_alloc1(as, ref, allow); 534 Reg src = ra_alloc1(as, ref, allow);
572 emit_setgl(as, src, tmptv.gcr); 535 emit_setgl(as, src, tmptv.gcr);
@@ -576,27 +539,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
576 } 539 }
577} 540}
578 541
579static void asm_tostr(ASMState *as, IRIns *ir)
580{
581 IRRef args[2];
582 args[0] = ASMREF_L;
583 as->gcsteps++;
584 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
585 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
586 args[1] = ASMREF_TMP1; /* const lua_Number * */
587 asm_setupresult(as, ir, ci); /* GCstr * */
588 asm_gencall(as, ci, args);
589 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
590 } else {
591 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
592 args[1] = ir->op1; /* int32_t k */
593 asm_setupresult(as, ir, ci); /* GCstr * */
594 asm_gencall(as, ci, args);
595 }
596}
597
598/* -- Memory references --------------------------------------------------- */
599
600static void asm_aref(ASMState *as, IRIns *ir) 542static void asm_aref(ASMState *as, IRIns *ir)
601{ 543{
602 Reg dest = ra_dest(as, ir, RSET_GPR); 544 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -773,20 +715,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
773 } 715 }
774} 716}
775 717
776static void asm_newref(ASMState *as, IRIns *ir)
777{
778 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
779 IRRef args[3];
780 if (ir->r == RID_SINK)
781 return;
782 args[0] = ASMREF_L; /* lua_State *L */
783 args[1] = ir->op1; /* GCtab *t */
784 args[2] = ASMREF_TMP1; /* cTValue *key */
785 asm_setupresult(as, ir, ci); /* TValue * */
786 asm_gencall(as, ci, args);
787 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
788}
789
790static void asm_uref(ASMState *as, IRIns *ir) 718static void asm_uref(ASMState *as, IRIns *ir)
791{ 719{
792 /* NYI: Check that UREFO is still open and not aliasing a slot. */ 720 /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -917,7 +845,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
917 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 845 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
918} 846}
919 847
920static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 848static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
921{ 849{
922 IRIns *irb; 850 IRIns *irb;
923 if (ir->r == RID_SINK) 851 if (ir->r == RID_SINK)
@@ -934,6 +862,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
934 } 862 }
935} 863}
936 864
865#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
866
937static void asm_ahuvload(ASMState *as, IRIns *ir) 867static void asm_ahuvload(ASMState *as, IRIns *ir)
938{ 868{
939 IRType1 t = ir->t; 869 IRType1 t = ir->t;
@@ -1084,19 +1014,15 @@ dotypecheck:
1084static void asm_cnew(ASMState *as, IRIns *ir) 1014static void asm_cnew(ASMState *as, IRIns *ir)
1085{ 1015{
1086 CTState *cts = ctype_ctsG(J2G(as->J)); 1016 CTState *cts = ctype_ctsG(J2G(as->J));
1087 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1017 CTypeID id = (CTypeID)IR(ir->op1)->i;
1088 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1018 CTSize sz;
1089 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1019 CTInfo info = lj_ctype_info(cts, id, &sz);
1090 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1020 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1091 IRRef args[2]; 1021 IRRef args[4];
1092 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1093 RegSet drop = RSET_SCRATCH; 1022 RegSet drop = RSET_SCRATCH;
1094 lua_assert(sz != CTSIZE_INVALID); 1023 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1095 1024
1096 args[0] = ASMREF_L; /* lua_State *L */
1097 args[1] = ASMREF_TMP1; /* MSize size */
1098 as->gcsteps++; 1025 as->gcsteps++;
1099
1100 if (ra_hasreg(ir->r)) 1026 if (ra_hasreg(ir->r))
1101 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1027 rset_clear(drop, ir->r); /* Dest reg handled below. */
1102 ra_evictset(as, drop); 1028 ra_evictset(as, drop);
@@ -1105,6 +1031,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1105 1031
1106 /* Initialize immutable cdata object. */ 1032 /* Initialize immutable cdata object. */
1107 if (ir->o == IR_CNEWI) { 1033 if (ir->o == IR_CNEWI) {
1034 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1108 int32_t ofs = sizeof(GCcdata); 1035 int32_t ofs = sizeof(GCcdata);
1109 lua_assert(sz == 4 || sz == 8); 1036 lua_assert(sz == 4 || sz == 8);
1110 if (sz == 8) { 1037 if (sz == 8) {
@@ -1118,12 +1045,24 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1118 if (ofs == sizeof(GCcdata)) break; 1045 if (ofs == sizeof(GCcdata)) break;
1119 ofs -= 4; ir++; 1046 ofs -= 4; ir++;
1120 } 1047 }
1048 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1049 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1050 args[0] = ASMREF_L; /* lua_State *L */
1051 args[1] = ir->op1; /* CTypeID id */
1052 args[2] = ir->op2; /* CTSize sz */
1053 args[3] = ASMREF_TMP1; /* CTSize align */
1054 asm_gencall(as, ci, args);
1055 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1056 return;
1121 } 1057 }
1058
1122 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1059 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1123 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); 1060 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
1124 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); 1061 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
1125 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); 1062 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
1126 emit_ti(as, PPCI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ 1063 emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
1064 args[0] = ASMREF_L; /* lua_State *L */
1065 args[1] = ASMREF_TMP1; /* MSize size */
1127 asm_gencall(as, ci, args); 1066 asm_gencall(as, ci, args);
1128 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1067 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1129 ra_releasetmp(as, ASMREF_TMP1)); 1068 ra_releasetmp(as, ASMREF_TMP1));
@@ -1197,23 +1136,14 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
1197 emit_fb(as, pi, dest, left); 1136 emit_fb(as, pi, dest, left);
1198} 1137}
1199 1138
1200static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1139static void asm_fpmath(ASMState *as, IRIns *ir)
1201{ 1140{
1202 IRIns *irp = IR(ir->op1); 1141 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1203 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1142 return;
1204 IRIns *irpp = IR(irp->op1); 1143 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
1205 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1144 asm_fpunary(as, ir, PPCI_FSQRT);
1206 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1145 else
1207 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1146 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1208 IRRef args[2];
1209 args[0] = irpp->op1;
1210 args[1] = irp->op2;
1211 asm_setupresult(as, ir, ci);
1212 asm_gencall(as, ci, args);
1213 return 1;
1214 }
1215 }
1216 return 0;
1217} 1147}
1218 1148
1219static void asm_add(ASMState *as, IRIns *ir) 1149static void asm_add(ASMState *as, IRIns *ir)
@@ -1313,6 +1243,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
1313 } 1243 }
1314} 1244}
1315 1245
1246#define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV)
1247#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1248#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1249
1316static void asm_neg(ASMState *as, IRIns *ir) 1250static void asm_neg(ASMState *as, IRIns *ir)
1317{ 1251{
1318 if (irt_isnum(ir->t)) { 1252 if (irt_isnum(ir->t)) {
@@ -1331,6 +1265,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
1331 } 1265 }
1332} 1266}
1333 1267
1268#define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS)
1269#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1270#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1271
1334static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) 1272static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1335{ 1273{
1336 Reg dest, left, right; 1274 Reg dest, left, right;
@@ -1346,6 +1284,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1346 emit_tab(as, pi|PPCF_DOT, dest, left, right); 1284 emit_tab(as, pi|PPCF_DOT, dest, left, right);
1347} 1285}
1348 1286
1287#define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO)
1288#define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO)
1289#define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO)
1290
1349#if LJ_HASFFI 1291#if LJ_HASFFI
1350static void asm_add64(ASMState *as, IRIns *ir) 1292static void asm_add64(ASMState *as, IRIns *ir)
1351{ 1293{
@@ -1425,7 +1367,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
1425} 1367}
1426#endif 1368#endif
1427 1369
1428static void asm_bitnot(ASMState *as, IRIns *ir) 1370static void asm_bnot(ASMState *as, IRIns *ir)
1429{ 1371{
1430 Reg dest, left, right; 1372 Reg dest, left, right;
1431 PPCIns pi = PPCI_NOR; 1373 PPCIns pi = PPCI_NOR;
@@ -1452,7 +1394,7 @@ nofuse:
1452 emit_asb(as, pi, dest, left, right); 1394 emit_asb(as, pi, dest, left, right);
1453} 1395}
1454 1396
1455static void asm_bitswap(ASMState *as, IRIns *ir) 1397static void asm_bswap(ASMState *as, IRIns *ir)
1456{ 1398{
1457 Reg dest = ra_dest(as, ir, RSET_GPR); 1399 Reg dest = ra_dest(as, ir, RSET_GPR);
1458 IRIns *irx; 1400 IRIns *irx;
@@ -1473,32 +1415,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1473 } 1415 }
1474} 1416}
1475 1417
1476static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1477{
1478 Reg dest = ra_dest(as, ir, RSET_GPR);
1479 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1480 if (irref_isk(ir->op2)) {
1481 int32_t k = IR(ir->op2)->i;
1482 Reg tmp = left;
1483 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1484 if (!checku16(k)) {
1485 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1486 if ((k & 0xffff) == 0) return;
1487 }
1488 emit_asi(as, pik, dest, left, k);
1489 return;
1490 }
1491 }
1492 /* May fail due to spills/restores above, but simplifies the logic. */
1493 if (as->flagmcp == as->mcp) {
1494 as->flagmcp = NULL;
1495 as->mcp++;
1496 pi |= PPCF_DOT;
1497 }
1498 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1499 emit_asb(as, pi, dest, left, right);
1500}
1501
1502/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ 1418/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
1503static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) 1419static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
1504{ 1420{
@@ -1529,7 +1445,7 @@ nofuse:
1529 *--as->mcp = pi | PPCF_T(left); 1445 *--as->mcp = pi | PPCF_T(left);
1530} 1446}
1531 1447
1532static void asm_bitand(ASMState *as, IRIns *ir) 1448static void asm_band(ASMState *as, IRIns *ir)
1533{ 1449{
1534 Reg dest, left, right; 1450 Reg dest, left, right;
1535 IRRef lref = ir->op1; 1451 IRRef lref = ir->op1;
@@ -1584,6 +1500,35 @@ static void asm_bitand(ASMState *as, IRIns *ir)
1584 emit_asb(as, PPCI_AND ^ dot, dest, left, right); 1500 emit_asb(as, PPCI_AND ^ dot, dest, left, right);
1585} 1501}
1586 1502
1503static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1504{
1505 Reg dest = ra_dest(as, ir, RSET_GPR);
1506 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1507 if (irref_isk(ir->op2)) {
1508 int32_t k = IR(ir->op2)->i;
1509 Reg tmp = left;
1510 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1511 if (!checku16(k)) {
1512 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1513 if ((k & 0xffff) == 0) return;
1514 }
1515 emit_asi(as, pik, dest, left, k);
1516 return;
1517 }
1518 }
1519 /* May fail due to spills/restores above, but simplifies the logic. */
1520 if (as->flagmcp == as->mcp) {
1521 as->flagmcp = NULL;
1522 as->mcp++;
1523 pi |= PPCF_DOT;
1524 }
1525 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1526 emit_asb(as, pi, dest, left, right);
1527}
1528
1529#define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI)
1530#define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI)
1531
1587static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) 1532static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1588{ 1533{
1589 Reg dest, left; 1534 Reg dest, left;
@@ -1609,6 +1554,14 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1609 } 1554 }
1610} 1555}
1611 1556
1557#define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0)
1558#define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1)
1559#define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI)
1560#define asm_brol(as, ir) \
1561 asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \
1562 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
1563#define asm_bror(as, ir) lua_assert(0)
1564
1612static void asm_min_max(ASMState *as, IRIns *ir, int ismax) 1565static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1613{ 1566{
1614 if (irt_isnum(ir->t)) { 1567 if (irt_isnum(ir->t)) {
@@ -1639,6 +1592,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1639 } 1592 }
1640} 1593}
1641 1594
1595#define asm_min(as, ir) asm_min_max(as, ir, 0)
1596#define asm_max(as, ir) asm_min_max(as, ir, 1)
1597
1642/* -- Comparisons --------------------------------------------------------- */ 1598/* -- Comparisons --------------------------------------------------------- */
1643 1599
1644#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ 1600#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */
@@ -1715,6 +1671,8 @@ static void asm_comp(ASMState *as, IRIns *ir)
1715 } 1671 }
1716} 1672}
1717 1673
1674#define asm_equal(as, ir) asm_comp(as, ir)
1675
1718#if LJ_HASFFI 1676#if LJ_HASFFI
1719/* 64 bit integer comparisons. */ 1677/* 64 bit integer comparisons. */
1720static void asm_comp64(ASMState *as, IRIns *ir) 1678static void asm_comp64(ASMState *as, IRIns *ir)
@@ -1760,8 +1718,8 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1760 } else if ((ir-1)->o == IR_XSTORE) { 1718 } else if ((ir-1)->o == IR_XSTORE) {
1761 as->curins--; /* Handle both stores here. */ 1719 as->curins--; /* Handle both stores here. */
1762 if ((ir-1)->r != RID_SINK) { 1720 if ((ir-1)->r != RID_SINK) {
1763 asm_xstore(as, ir, 0); 1721 asm_xstore_(as, ir, 0);
1764 asm_xstore(as, ir-1, 4); 1722 asm_xstore_(as, ir-1, 4);
1765 } 1723 }
1766 return; 1724 return;
1767 } 1725 }
@@ -1785,6 +1743,17 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1785#endif 1743#endif
1786} 1744}
1787 1745
1746/* -- Profiling ----------------------------------------------------------- */
1747
1748static void asm_prof(ASMState *as, IRIns *ir)
1749{
1750 UNUSED(ir);
1751 asm_guardcc(as, CC_NE);
1752 emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE);
1753 emit_lsglptr(as, PPCI_LBZ, RID_TMP,
1754 (int32_t)offsetof(global_State, hookmask));
1755}
1756
1788/* -- Stack handling ------------------------------------------------------ */ 1757/* -- Stack handling ------------------------------------------------------ */
1789 1758
1790/* Check Lua stack size for overflow. Use exit handler as fallback. */ 1759/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1806,7 +1775,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1806 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); 1775 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
1807 if (pbase == RID_TMP) 1776 if (pbase == RID_TMP)
1808 emit_getgl(as, RID_TMP, jit_base); 1777 emit_getgl(as, RID_TMP, jit_base);
1809 emit_getgl(as, tmp, jit_L); 1778 emit_getgl(as, tmp, cur_L);
1810 if (allow == RSET_EMPTY) /* Spill temp. register. */ 1779 if (allow == RSET_EMPTY) /* Spill temp. register. */
1811 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); 1780 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
1812} 1781}
@@ -1967,136 +1936,13 @@ static void asm_tail_prep(ASMState *as)
1967 } 1936 }
1968} 1937}
1969 1938
1970/* -- Instruction dispatch ------------------------------------------------ */
1971
1972/* Assemble a single instruction. */
1973static void asm_ir(ASMState *as, IRIns *ir)
1974{
1975 switch ((IROp)ir->o) {
1976 /* Miscellaneous ops. */
1977 case IR_LOOP: asm_loop(as); break;
1978 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1979 case IR_USE:
1980 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1981 case IR_PHI: asm_phi(as, ir); break;
1982 case IR_HIOP: asm_hiop(as, ir); break;
1983 case IR_GCSTEP: asm_gcstep(as, ir); break;
1984
1985 /* Guarded assertions. */
1986 case IR_EQ: case IR_NE:
1987 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1988 as->curins--;
1989 asm_href(as, ir-1, (IROp)ir->o);
1990 break;
1991 }
1992 /* fallthrough */
1993 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1994 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1995 case IR_ABC:
1996 asm_comp(as, ir);
1997 break;
1998
1999 case IR_RETF: asm_retf(as, ir); break;
2000
2001 /* Bit ops. */
2002 case IR_BNOT: asm_bitnot(as, ir); break;
2003 case IR_BSWAP: asm_bitswap(as, ir); break;
2004
2005 case IR_BAND: asm_bitand(as, ir); break;
2006 case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break;
2007 case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break;
2008
2009 case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break;
2010 case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break;
2011 case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break;
2012 case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31),
2013 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break;
2014 case IR_BROR: lua_assert(0); break;
2015
2016 /* Arithmetic ops. */
2017 case IR_ADD: asm_add(as, ir); break;
2018 case IR_SUB: asm_sub(as, ir); break;
2019 case IR_MUL: asm_mul(as, ir); break;
2020 case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break;
2021 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2022 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2023 case IR_NEG: asm_neg(as, ir); break;
2024
2025 case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break;
2026 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2027 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2028 case IR_MIN: asm_min_max(as, ir, 0); break;
2029 case IR_MAX: asm_min_max(as, ir, 1); break;
2030 case IR_FPMATH:
2031 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2032 break;
2033 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
2034 asm_fpunary(as, ir, PPCI_FSQRT);
2035 else
2036 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2037 break;
2038
2039 /* Overflow-checking arithmetic ops. */
2040 case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break;
2041 case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break;
2042 case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break;
2043
2044 /* Memory references. */
2045 case IR_AREF: asm_aref(as, ir); break;
2046 case IR_HREF: asm_href(as, ir, 0); break;
2047 case IR_HREFK: asm_hrefk(as, ir); break;
2048 case IR_NEWREF: asm_newref(as, ir); break;
2049 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2050 case IR_FREF: asm_fref(as, ir); break;
2051 case IR_STRREF: asm_strref(as, ir); break;
2052
2053 /* Loads and stores. */
2054 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2055 asm_ahuvload(as, ir);
2056 break;
2057 case IR_FLOAD: asm_fload(as, ir); break;
2058 case IR_XLOAD: asm_xload(as, ir); break;
2059 case IR_SLOAD: asm_sload(as, ir); break;
2060
2061 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2062 case IR_FSTORE: asm_fstore(as, ir); break;
2063 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2064
2065 /* Allocations. */
2066 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2067 case IR_TNEW: asm_tnew(as, ir); break;
2068 case IR_TDUP: asm_tdup(as, ir); break;
2069 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2070
2071 /* Write barriers. */
2072 case IR_TBAR: asm_tbar(as, ir); break;
2073 case IR_OBAR: asm_obar(as, ir); break;
2074
2075 /* Type conversions. */
2076 case IR_CONV: asm_conv(as, ir); break;
2077 case IR_TOBIT: asm_tobit(as, ir); break;
2078 case IR_TOSTR: asm_tostr(as, ir); break;
2079 case IR_STRTO: asm_strto(as, ir); break;
2080
2081 /* Calls. */
2082 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2083 case IR_CALLXS: asm_callx(as, ir); break;
2084 case IR_CARG: break;
2085
2086 default:
2087 setintV(&as->J->errinfo, ir->o);
2088 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2089 break;
2090 }
2091}
2092
2093/* -- Trace setup --------------------------------------------------------- */ 1939/* -- Trace setup --------------------------------------------------------- */
2094 1940
2095/* Ensure there are enough stack slots for call arguments. */ 1941/* Ensure there are enough stack slots for call arguments. */
2096static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 1942static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2097{ 1943{
2098 IRRef args[CCI_NARGS_MAX*2]; 1944 IRRef args[CCI_NARGS_MAX*2];
2099 uint32_t i, nargs = (int)CCI_NARGS(ci); 1945 uint32_t i, nargs = CCI_XNARGS(ci);
2100 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; 1946 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
2101 asm_collectargs(as, ir, ci, args); 1947 asm_collectargs(as, ir, ci, args);
2102 for (i = 0; i < nargs; i++) 1948 for (i = 0; i < nargs; i++)