diff options
Diffstat (limited to 'src/lj_asm_ppc.h')
-rw-r--r-- | src/lj_asm_ppc.h | 366 |
1 files changed, 105 insertions, 261 deletions
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index e1a496a7..676bfcbf 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h | |||
@@ -249,7 +249,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) | |||
249 | /* Generate a call to a C function. */ | 249 | /* Generate a call to a C function. */ |
250 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | 250 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) |
251 | { | 251 | { |
252 | uint32_t n, nargs = CCI_NARGS(ci); | 252 | uint32_t n, nargs = CCI_XNARGS(ci); |
253 | int32_t ofs = 8; | 253 | int32_t ofs = 8; |
254 | Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; | 254 | Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; |
255 | if ((void *)ci->func) | 255 | if ((void *)ci->func) |
@@ -329,15 +329,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
329 | } | 329 | } |
330 | } | 330 | } |
331 | 331 | ||
332 | static void asm_call(ASMState *as, IRIns *ir) | ||
333 | { | ||
334 | IRRef args[CCI_NARGS_MAX]; | ||
335 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
336 | asm_collectargs(as, ir, ci, args); | ||
337 | asm_setupresult(as, ir, ci); | ||
338 | asm_gencall(as, ci, args); | ||
339 | } | ||
340 | |||
341 | static void asm_callx(ASMState *as, IRIns *ir) | 332 | static void asm_callx(ASMState *as, IRIns *ir) |
342 | { | 333 | { |
343 | IRRef args[CCI_NARGS_MAX*2]; | 334 | IRRef args[CCI_NARGS_MAX*2]; |
@@ -361,16 +352,6 @@ static void asm_callx(ASMState *as, IRIns *ir) | |||
361 | asm_gencall(as, &ci, args); | 352 | asm_gencall(as, &ci, args); |
362 | } | 353 | } |
363 | 354 | ||
364 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | ||
365 | { | ||
366 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
367 | IRRef args[2]; | ||
368 | args[0] = ir->op1; | ||
369 | args[1] = ir->op2; | ||
370 | asm_setupresult(as, ir, ci); | ||
371 | asm_gencall(as, ci, args); | ||
372 | } | ||
373 | |||
374 | /* -- Returns ------------------------------------------------------------- */ | 355 | /* -- Returns ------------------------------------------------------------- */ |
375 | 356 | ||
376 | /* Return to lower frame. Guard that it goes to the right spot. */ | 357 | /* Return to lower frame. Guard that it goes to the right spot. */ |
@@ -511,28 +492,6 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
511 | } | 492 | } |
512 | } | 493 | } |
513 | 494 | ||
514 | #if LJ_HASFFI | ||
515 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
516 | { | ||
517 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
518 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
519 | IRCallID id; | ||
520 | const CCallInfo *ci; | ||
521 | IRRef args[2]; | ||
522 | args[0] = ir->op1; | ||
523 | args[1] = (ir-1)->op1; | ||
524 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
525 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
526 | ir--; | ||
527 | } else { | ||
528 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
529 | } | ||
530 | ci = &lj_ir_callinfo[id]; | ||
531 | asm_setupresult(as, ir, ci); | ||
532 | asm_gencall(as, ci, args); | ||
533 | } | ||
534 | #endif | ||
535 | |||
536 | static void asm_strto(ASMState *as, IRIns *ir) | 495 | static void asm_strto(ASMState *as, IRIns *ir) |
537 | { | 496 | { |
538 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; | 497 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; |
@@ -551,6 +510,8 @@ static void asm_strto(ASMState *as, IRIns *ir) | |||
551 | emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); | 510 | emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); |
552 | } | 511 | } |
553 | 512 | ||
513 | /* -- Memory references --------------------------------------------------- */ | ||
514 | |||
554 | /* Get pointer to TValue. */ | 515 | /* Get pointer to TValue. */ |
555 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | 516 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) |
556 | { | 517 | { |
@@ -574,27 +535,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | |||
574 | } | 535 | } |
575 | } | 536 | } |
576 | 537 | ||
577 | static void asm_tostr(ASMState *as, IRIns *ir) | ||
578 | { | ||
579 | IRRef args[2]; | ||
580 | args[0] = ASMREF_L; | ||
581 | as->gcsteps++; | ||
582 | if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) { | ||
583 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; | ||
584 | args[1] = ASMREF_TMP1; /* const lua_Number * */ | ||
585 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
586 | asm_gencall(as, ci, args); | ||
587 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); | ||
588 | } else { | ||
589 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; | ||
590 | args[1] = ir->op1; /* int32_t k */ | ||
591 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
592 | asm_gencall(as, ci, args); | ||
593 | } | ||
594 | } | ||
595 | |||
596 | /* -- Memory references --------------------------------------------------- */ | ||
597 | |||
598 | static void asm_aref(ASMState *as, IRIns *ir) | 538 | static void asm_aref(ASMState *as, IRIns *ir) |
599 | { | 539 | { |
600 | Reg dest = ra_dest(as, ir, RSET_GPR); | 540 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -771,20 +711,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
771 | } | 711 | } |
772 | } | 712 | } |
773 | 713 | ||
774 | static void asm_newref(ASMState *as, IRIns *ir) | ||
775 | { | ||
776 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
777 | IRRef args[3]; | ||
778 | if (ir->r == RID_SINK) | ||
779 | return; | ||
780 | args[0] = ASMREF_L; /* lua_State *L */ | ||
781 | args[1] = ir->op1; /* GCtab *t */ | ||
782 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
783 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
784 | asm_gencall(as, ci, args); | ||
785 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | ||
786 | } | ||
787 | |||
788 | static void asm_uref(ASMState *as, IRIns *ir) | 714 | static void asm_uref(ASMState *as, IRIns *ir) |
789 | { | 715 | { |
790 | /* NYI: Check that UREFO is still open and not aliasing a slot. */ | 716 | /* NYI: Check that UREFO is still open and not aliasing a slot. */ |
@@ -915,7 +841,7 @@ static void asm_xload(ASMState *as, IRIns *ir) | |||
915 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); | 841 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); |
916 | } | 842 | } |
917 | 843 | ||
918 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | 844 | static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) |
919 | { | 845 | { |
920 | IRIns *irb; | 846 | IRIns *irb; |
921 | if (ir->r == RID_SINK) | 847 | if (ir->r == RID_SINK) |
@@ -932,6 +858,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | |||
932 | } | 858 | } |
933 | } | 859 | } |
934 | 860 | ||
861 | #define asm_xstore(as, ir) asm_xstore_(as, ir, 0) | ||
862 | |||
935 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 863 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
936 | { | 864 | { |
937 | IRType1 t = ir->t; | 865 | IRType1 t = ir->t; |
@@ -1082,19 +1010,15 @@ dotypecheck: | |||
1082 | static void asm_cnew(ASMState *as, IRIns *ir) | 1010 | static void asm_cnew(ASMState *as, IRIns *ir) |
1083 | { | 1011 | { |
1084 | CTState *cts = ctype_ctsG(J2G(as->J)); | 1012 | CTState *cts = ctype_ctsG(J2G(as->J)); |
1085 | CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; | 1013 | CTypeID id = (CTypeID)IR(ir->op1)->i; |
1086 | CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? | 1014 | CTSize sz; |
1087 | lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; | 1015 | CTInfo info = lj_ctype_info(cts, id, &sz); |
1088 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | 1016 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; |
1089 | IRRef args[2]; | 1017 | IRRef args[4]; |
1090 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | ||
1091 | RegSet drop = RSET_SCRATCH; | 1018 | RegSet drop = RSET_SCRATCH; |
1092 | lua_assert(sz != CTSIZE_INVALID); | 1019 | lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); |
1093 | 1020 | ||
1094 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1095 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1096 | as->gcsteps++; | 1021 | as->gcsteps++; |
1097 | |||
1098 | if (ra_hasreg(ir->r)) | 1022 | if (ra_hasreg(ir->r)) |
1099 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 1023 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
1100 | ra_evictset(as, drop); | 1024 | ra_evictset(as, drop); |
@@ -1103,6 +1027,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1103 | 1027 | ||
1104 | /* Initialize immutable cdata object. */ | 1028 | /* Initialize immutable cdata object. */ |
1105 | if (ir->o == IR_CNEWI) { | 1029 | if (ir->o == IR_CNEWI) { |
1030 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | ||
1106 | int32_t ofs = sizeof(GCcdata); | 1031 | int32_t ofs = sizeof(GCcdata); |
1107 | lua_assert(sz == 4 || sz == 8); | 1032 | lua_assert(sz == 4 || sz == 8); |
1108 | if (sz == 8) { | 1033 | if (sz == 8) { |
@@ -1116,12 +1041,24 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1116 | if (ofs == sizeof(GCcdata)) break; | 1041 | if (ofs == sizeof(GCcdata)) break; |
1117 | ofs -= 4; ir++; | 1042 | ofs -= 4; ir++; |
1118 | } | 1043 | } |
1044 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1045 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1046 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1047 | args[1] = ir->op1; /* CTypeID id */ | ||
1048 | args[2] = ir->op2; /* CTSize sz */ | ||
1049 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1050 | asm_gencall(as, ci, args); | ||
1051 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1052 | return; | ||
1119 | } | 1053 | } |
1054 | |||
1120 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ | 1055 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ |
1121 | emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); | 1056 | emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); |
1122 | emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); | 1057 | emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); |
1123 | emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); | 1058 | emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); |
1124 | emit_ti(as, PPCI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ | 1059 | emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ |
1060 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1061 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1125 | asm_gencall(as, ci, args); | 1062 | asm_gencall(as, ci, args); |
1126 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), | 1063 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), |
1127 | ra_releasetmp(as, ASMREF_TMP1)); | 1064 | ra_releasetmp(as, ASMREF_TMP1)); |
@@ -1195,23 +1132,14 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi) | |||
1195 | emit_fb(as, pi, dest, left); | 1132 | emit_fb(as, pi, dest, left); |
1196 | } | 1133 | } |
1197 | 1134 | ||
1198 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) | 1135 | static void asm_fpmath(ASMState *as, IRIns *ir) |
1199 | { | 1136 | { |
1200 | IRIns *irp = IR(ir->op1); | 1137 | if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) |
1201 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | 1138 | return; |
1202 | IRIns *irpp = IR(irp->op1); | 1139 | if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) |
1203 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | 1140 | asm_fpunary(as, ir, PPCI_FSQRT); |
1204 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | 1141 | else |
1205 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; | 1142 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); |
1206 | IRRef args[2]; | ||
1207 | args[0] = irpp->op1; | ||
1208 | args[1] = irp->op2; | ||
1209 | asm_setupresult(as, ir, ci); | ||
1210 | asm_gencall(as, ci, args); | ||
1211 | return 1; | ||
1212 | } | ||
1213 | } | ||
1214 | return 0; | ||
1215 | } | 1143 | } |
1216 | 1144 | ||
1217 | static void asm_add(ASMState *as, IRIns *ir) | 1145 | static void asm_add(ASMState *as, IRIns *ir) |
@@ -1311,6 +1239,10 @@ static void asm_mul(ASMState *as, IRIns *ir) | |||
1311 | } | 1239 | } |
1312 | } | 1240 | } |
1313 | 1241 | ||
1242 | #define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV) | ||
1243 | #define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) | ||
1244 | #define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) | ||
1245 | |||
1314 | static void asm_neg(ASMState *as, IRIns *ir) | 1246 | static void asm_neg(ASMState *as, IRIns *ir) |
1315 | { | 1247 | { |
1316 | if (irt_isnum(ir->t)) { | 1248 | if (irt_isnum(ir->t)) { |
@@ -1329,6 +1261,10 @@ static void asm_neg(ASMState *as, IRIns *ir) | |||
1329 | } | 1261 | } |
1330 | } | 1262 | } |
1331 | 1263 | ||
1264 | #define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS) | ||
1265 | #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) | ||
1266 | #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) | ||
1267 | |||
1332 | static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) | 1268 | static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) |
1333 | { | 1269 | { |
1334 | Reg dest, left, right; | 1270 | Reg dest, left, right; |
@@ -1344,6 +1280,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) | |||
1344 | emit_tab(as, pi|PPCF_DOT, dest, left, right); | 1280 | emit_tab(as, pi|PPCF_DOT, dest, left, right); |
1345 | } | 1281 | } |
1346 | 1282 | ||
1283 | #define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO) | ||
1284 | #define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO) | ||
1285 | #define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO) | ||
1286 | |||
1347 | #if LJ_HASFFI | 1287 | #if LJ_HASFFI |
1348 | static void asm_add64(ASMState *as, IRIns *ir) | 1288 | static void asm_add64(ASMState *as, IRIns *ir) |
1349 | { | 1289 | { |
@@ -1423,7 +1363,7 @@ static void asm_neg64(ASMState *as, IRIns *ir) | |||
1423 | } | 1363 | } |
1424 | #endif | 1364 | #endif |
1425 | 1365 | ||
1426 | static void asm_bitnot(ASMState *as, IRIns *ir) | 1366 | static void asm_bnot(ASMState *as, IRIns *ir) |
1427 | { | 1367 | { |
1428 | Reg dest, left, right; | 1368 | Reg dest, left, right; |
1429 | PPCIns pi = PPCI_NOR; | 1369 | PPCIns pi = PPCI_NOR; |
@@ -1450,7 +1390,7 @@ nofuse: | |||
1450 | emit_asb(as, pi, dest, left, right); | 1390 | emit_asb(as, pi, dest, left, right); |
1451 | } | 1391 | } |
1452 | 1392 | ||
1453 | static void asm_bitswap(ASMState *as, IRIns *ir) | 1393 | static void asm_bswap(ASMState *as, IRIns *ir) |
1454 | { | 1394 | { |
1455 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1395 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1456 | IRIns *irx; | 1396 | IRIns *irx; |
@@ -1471,32 +1411,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir) | |||
1471 | } | 1411 | } |
1472 | } | 1412 | } |
1473 | 1413 | ||
1474 | static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | ||
1475 | { | ||
1476 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1477 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1478 | if (irref_isk(ir->op2)) { | ||
1479 | int32_t k = IR(ir->op2)->i; | ||
1480 | Reg tmp = left; | ||
1481 | if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { | ||
1482 | if (!checku16(k)) { | ||
1483 | emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); | ||
1484 | if ((k & 0xffff) == 0) return; | ||
1485 | } | ||
1486 | emit_asi(as, pik, dest, left, k); | ||
1487 | return; | ||
1488 | } | ||
1489 | } | ||
1490 | /* May fail due to spills/restores above, but simplifies the logic. */ | ||
1491 | if (as->flagmcp == as->mcp) { | ||
1492 | as->flagmcp = NULL; | ||
1493 | as->mcp++; | ||
1494 | pi |= PPCF_DOT; | ||
1495 | } | ||
1496 | right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1497 | emit_asb(as, pi, dest, left, right); | ||
1498 | } | ||
1499 | |||
1500 | /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ | 1414 | /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ |
1501 | static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) | 1415 | static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) |
1502 | { | 1416 | { |
@@ -1527,7 +1441,7 @@ nofuse: | |||
1527 | *--as->mcp = pi | PPCF_T(left); | 1441 | *--as->mcp = pi | PPCF_T(left); |
1528 | } | 1442 | } |
1529 | 1443 | ||
1530 | static void asm_bitand(ASMState *as, IRIns *ir) | 1444 | static void asm_band(ASMState *as, IRIns *ir) |
1531 | { | 1445 | { |
1532 | Reg dest, left, right; | 1446 | Reg dest, left, right; |
1533 | IRRef lref = ir->op1; | 1447 | IRRef lref = ir->op1; |
@@ -1582,6 +1496,35 @@ static void asm_bitand(ASMState *as, IRIns *ir) | |||
1582 | emit_asb(as, PPCI_AND ^ dot, dest, left, right); | 1496 | emit_asb(as, PPCI_AND ^ dot, dest, left, right); |
1583 | } | 1497 | } |
1584 | 1498 | ||
1499 | static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | ||
1500 | { | ||
1501 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1502 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1503 | if (irref_isk(ir->op2)) { | ||
1504 | int32_t k = IR(ir->op2)->i; | ||
1505 | Reg tmp = left; | ||
1506 | if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { | ||
1507 | if (!checku16(k)) { | ||
1508 | emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); | ||
1509 | if ((k & 0xffff) == 0) return; | ||
1510 | } | ||
1511 | emit_asi(as, pik, dest, left, k); | ||
1512 | return; | ||
1513 | } | ||
1514 | } | ||
1515 | /* May fail due to spills/restores above, but simplifies the logic. */ | ||
1516 | if (as->flagmcp == as->mcp) { | ||
1517 | as->flagmcp = NULL; | ||
1518 | as->mcp++; | ||
1519 | pi |= PPCF_DOT; | ||
1520 | } | ||
1521 | right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1522 | emit_asb(as, pi, dest, left, right); | ||
1523 | } | ||
1524 | |||
1525 | #define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI) | ||
1526 | #define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI) | ||
1527 | |||
1585 | static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | 1528 | static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) |
1586 | { | 1529 | { |
1587 | Reg dest, left; | 1530 | Reg dest, left; |
@@ -1607,6 +1550,14 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | |||
1607 | } | 1550 | } |
1608 | } | 1551 | } |
1609 | 1552 | ||
1553 | #define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0) | ||
1554 | #define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1) | ||
1555 | #define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI) | ||
1556 | #define asm_brol(as, ir) \ | ||
1557 | asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \ | ||
1558 | PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) | ||
1559 | #define asm_bror(as, ir) lua_assert(0) | ||
1560 | |||
1610 | static void asm_min_max(ASMState *as, IRIns *ir, int ismax) | 1561 | static void asm_min_max(ASMState *as, IRIns *ir, int ismax) |
1611 | { | 1562 | { |
1612 | if (irt_isnum(ir->t)) { | 1563 | if (irt_isnum(ir->t)) { |
@@ -1637,6 +1588,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) | |||
1637 | } | 1588 | } |
1638 | } | 1589 | } |
1639 | 1590 | ||
1591 | #define asm_min(as, ir) asm_min_max(as, ir, 0) | ||
1592 | #define asm_max(as, ir) asm_min_max(as, ir, 1) | ||
1593 | |||
1640 | /* -- Comparisons --------------------------------------------------------- */ | 1594 | /* -- Comparisons --------------------------------------------------------- */ |
1641 | 1595 | ||
1642 | #define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ | 1596 | #define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ |
@@ -1713,6 +1667,8 @@ static void asm_comp(ASMState *as, IRIns *ir) | |||
1713 | } | 1667 | } |
1714 | } | 1668 | } |
1715 | 1669 | ||
1670 | #define asm_equal(as, ir) asm_comp(as, ir) | ||
1671 | |||
1716 | #if LJ_HASFFI | 1672 | #if LJ_HASFFI |
1717 | /* 64 bit integer comparisons. */ | 1673 | /* 64 bit integer comparisons. */ |
1718 | static void asm_comp64(ASMState *as, IRIns *ir) | 1674 | static void asm_comp64(ASMState *as, IRIns *ir) |
@@ -1758,8 +1714,8 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1758 | } else if ((ir-1)->o == IR_XSTORE) { | 1714 | } else if ((ir-1)->o == IR_XSTORE) { |
1759 | as->curins--; /* Handle both stores here. */ | 1715 | as->curins--; /* Handle both stores here. */ |
1760 | if ((ir-1)->r != RID_SINK) { | 1716 | if ((ir-1)->r != RID_SINK) { |
1761 | asm_xstore(as, ir, 0); | 1717 | asm_xstore_(as, ir, 0); |
1762 | asm_xstore(as, ir-1, 4); | 1718 | asm_xstore_(as, ir-1, 4); |
1763 | } | 1719 | } |
1764 | return; | 1720 | return; |
1765 | } | 1721 | } |
@@ -1783,6 +1739,17 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1783 | #endif | 1739 | #endif |
1784 | } | 1740 | } |
1785 | 1741 | ||
1742 | /* -- Profiling ----------------------------------------------------------- */ | ||
1743 | |||
1744 | static void asm_prof(ASMState *as, IRIns *ir) | ||
1745 | { | ||
1746 | UNUSED(ir); | ||
1747 | asm_guardcc(as, CC_NE); | ||
1748 | emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE); | ||
1749 | emit_lsglptr(as, PPCI_LBZ, RID_TMP, | ||
1750 | (int32_t)offsetof(global_State, hookmask)); | ||
1751 | } | ||
1752 | |||
1786 | /* -- Stack handling ------------------------------------------------------ */ | 1753 | /* -- Stack handling ------------------------------------------------------ */ |
1787 | 1754 | ||
1788 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ | 1755 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ |
@@ -1804,7 +1771,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
1804 | emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); | 1771 | emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); |
1805 | if (pbase == RID_TMP) | 1772 | if (pbase == RID_TMP) |
1806 | emit_getgl(as, RID_TMP, jit_base); | 1773 | emit_getgl(as, RID_TMP, jit_base); |
1807 | emit_getgl(as, tmp, jit_L); | 1774 | emit_getgl(as, tmp, cur_L); |
1808 | if (allow == RSET_EMPTY) /* Spill temp. register. */ | 1775 | if (allow == RSET_EMPTY) /* Spill temp. register. */ |
1809 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); | 1776 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); |
1810 | } | 1777 | } |
@@ -1965,136 +1932,13 @@ static void asm_tail_prep(ASMState *as) | |||
1965 | } | 1932 | } |
1966 | } | 1933 | } |
1967 | 1934 | ||
1968 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
1969 | |||
1970 | /* Assemble a single instruction. */ | ||
1971 | static void asm_ir(ASMState *as, IRIns *ir) | ||
1972 | { | ||
1973 | switch ((IROp)ir->o) { | ||
1974 | /* Miscellaneous ops. */ | ||
1975 | case IR_LOOP: asm_loop(as); break; | ||
1976 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
1977 | case IR_USE: | ||
1978 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
1979 | case IR_PHI: asm_phi(as, ir); break; | ||
1980 | case IR_HIOP: asm_hiop(as, ir); break; | ||
1981 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
1982 | |||
1983 | /* Guarded assertions. */ | ||
1984 | case IR_EQ: case IR_NE: | ||
1985 | if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { | ||
1986 | as->curins--; | ||
1987 | asm_href(as, ir-1, (IROp)ir->o); | ||
1988 | break; | ||
1989 | } | ||
1990 | /* fallthrough */ | ||
1991 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
1992 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
1993 | case IR_ABC: | ||
1994 | asm_comp(as, ir); | ||
1995 | break; | ||
1996 | |||
1997 | case IR_RETF: asm_retf(as, ir); break; | ||
1998 | |||
1999 | /* Bit ops. */ | ||
2000 | case IR_BNOT: asm_bitnot(as, ir); break; | ||
2001 | case IR_BSWAP: asm_bitswap(as, ir); break; | ||
2002 | |||
2003 | case IR_BAND: asm_bitand(as, ir); break; | ||
2004 | case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break; | ||
2005 | case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break; | ||
2006 | |||
2007 | case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break; | ||
2008 | case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break; | ||
2009 | case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break; | ||
2010 | case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), | ||
2011 | PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break; | ||
2012 | case IR_BROR: lua_assert(0); break; | ||
2013 | |||
2014 | /* Arithmetic ops. */ | ||
2015 | case IR_ADD: asm_add(as, ir); break; | ||
2016 | case IR_SUB: asm_sub(as, ir); break; | ||
2017 | case IR_MUL: asm_mul(as, ir); break; | ||
2018 | case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break; | ||
2019 | case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break; | ||
2020 | case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break; | ||
2021 | case IR_NEG: asm_neg(as, ir); break; | ||
2022 | |||
2023 | case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break; | ||
2024 | case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break; | ||
2025 | case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break; | ||
2026 | case IR_MIN: asm_min_max(as, ir, 0); break; | ||
2027 | case IR_MAX: asm_min_max(as, ir, 1); break; | ||
2028 | case IR_FPMATH: | ||
2029 | if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) | ||
2030 | break; | ||
2031 | if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) | ||
2032 | asm_fpunary(as, ir, PPCI_FSQRT); | ||
2033 | else | ||
2034 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); | ||
2035 | break; | ||
2036 | |||
2037 | /* Overflow-checking arithmetic ops. */ | ||
2038 | case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break; | ||
2039 | case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break; | ||
2040 | case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break; | ||
2041 | |||
2042 | /* Memory references. */ | ||
2043 | case IR_AREF: asm_aref(as, ir); break; | ||
2044 | case IR_HREF: asm_href(as, ir, 0); break; | ||
2045 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
2046 | case IR_NEWREF: asm_newref(as, ir); break; | ||
2047 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
2048 | case IR_FREF: asm_fref(as, ir); break; | ||
2049 | case IR_STRREF: asm_strref(as, ir); break; | ||
2050 | |||
2051 | /* Loads and stores. */ | ||
2052 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
2053 | asm_ahuvload(as, ir); | ||
2054 | break; | ||
2055 | case IR_FLOAD: asm_fload(as, ir); break; | ||
2056 | case IR_XLOAD: asm_xload(as, ir); break; | ||
2057 | case IR_SLOAD: asm_sload(as, ir); break; | ||
2058 | |||
2059 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
2060 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
2061 | case IR_XSTORE: asm_xstore(as, ir, 0); break; | ||
2062 | |||
2063 | /* Allocations. */ | ||
2064 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
2065 | case IR_TNEW: asm_tnew(as, ir); break; | ||
2066 | case IR_TDUP: asm_tdup(as, ir); break; | ||
2067 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
2068 | |||
2069 | /* Write barriers. */ | ||
2070 | case IR_TBAR: asm_tbar(as, ir); break; | ||
2071 | case IR_OBAR: asm_obar(as, ir); break; | ||
2072 | |||
2073 | /* Type conversions. */ | ||
2074 | case IR_CONV: asm_conv(as, ir); break; | ||
2075 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
2076 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
2077 | case IR_STRTO: asm_strto(as, ir); break; | ||
2078 | |||
2079 | /* Calls. */ | ||
2080 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
2081 | case IR_CALLXS: asm_callx(as, ir); break; | ||
2082 | case IR_CARG: break; | ||
2083 | |||
2084 | default: | ||
2085 | setintV(&as->J->errinfo, ir->o); | ||
2086 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
2087 | break; | ||
2088 | } | ||
2089 | } | ||
2090 | |||
2091 | /* -- Trace setup --------------------------------------------------------- */ | 1935 | /* -- Trace setup --------------------------------------------------------- */ |
2092 | 1936 | ||
2093 | /* Ensure there are enough stack slots for call arguments. */ | 1937 | /* Ensure there are enough stack slots for call arguments. */ |
2094 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | 1938 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) |
2095 | { | 1939 | { |
2096 | IRRef args[CCI_NARGS_MAX*2]; | 1940 | IRRef args[CCI_NARGS_MAX*2]; |
2097 | uint32_t i, nargs = (int)CCI_NARGS(ci); | 1941 | uint32_t i, nargs = CCI_XNARGS(ci); |
2098 | int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; | 1942 | int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; |
2099 | asm_collectargs(as, ir, ci, args); | 1943 | asm_collectargs(as, ir, ci, args); |
2100 | for (i = 0; i < nargs; i++) | 1944 | for (i = 0; i < nargs; i++) |