aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm_ppc.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_asm_ppc.h')
-rw-r--r--src/lj_asm_ppc.h913
1 files changed, 529 insertions, 384 deletions
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index 5fd35d2e..5ea4d47d 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -156,6 +156,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
156 return ra_allock(as, ofs-(int16_t)ofs, allow); 156 return ra_allock(as, ofs-(int16_t)ofs, allow);
157 } 157 }
158 } 158 }
159 } else if (ir->o == IR_TMPREF) {
160 *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768);
161 return RID_JGL;
159 } 162 }
160 } 163 }
161 *ofsp = 0; 164 *ofsp = 0;
@@ -181,7 +184,7 @@ static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
181 return; 184 return;
182 } 185 }
183 } else if (ir->o == IR_STRREF) { 186 } else if (ir->o == IR_STRREF) {
184 lua_assert(ofs == 0); 187 lj_assertA(ofs == 0, "bad usage");
185 ofs = (int32_t)sizeof(GCstr); 188 ofs = (int32_t)sizeof(GCstr);
186 if (irref_isk(ir->op2)) { 189 if (irref_isk(ir->op2)) {
187 ofs += IR(ir->op2)->i; 190 ofs += IR(ir->op2)->i;
@@ -226,6 +229,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
226 emit_tab(as, pi, rt, left, right); 229 emit_tab(as, pi, rt, left, right);
227} 230}
228 231
232#if !LJ_SOFTFP
229/* Fuse to multiply-add/sub instruction. */ 233/* Fuse to multiply-add/sub instruction. */
230static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) 234static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
231{ 235{
@@ -245,24 +249,30 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
245 } 249 }
246 return 0; 250 return 0;
247} 251}
252#endif
248 253
249/* -- Calls --------------------------------------------------------------- */ 254/* -- Calls --------------------------------------------------------------- */
250 255
251/* Generate a call to a C function. */ 256/* Generate a call to a C function. */
252static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 257static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
253{ 258{
254 uint32_t n, nargs = CCI_NARGS(ci); 259 uint32_t n, nargs = CCI_XNARGS(ci);
255 int32_t ofs = 8; 260 int32_t ofs = 8;
256 Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; 261 Reg gpr = REGARG_FIRSTGPR;
262#if !LJ_SOFTFP
263 Reg fpr = REGARG_FIRSTFPR;
264#endif
257 if ((void *)ci->func) 265 if ((void *)ci->func)
258 emit_call(as, (void *)ci->func); 266 emit_call(as, (void *)ci->func);
259 for (n = 0; n < nargs; n++) { /* Setup args. */ 267 for (n = 0; n < nargs; n++) { /* Setup args. */
260 IRRef ref = args[n]; 268 IRRef ref = args[n];
261 if (ref) { 269 if (ref) {
262 IRIns *ir = IR(ref); 270 IRIns *ir = IR(ref);
271#if !LJ_SOFTFP
263 if (irt_isfp(ir->t)) { 272 if (irt_isfp(ir->t)) {
264 if (fpr <= REGARG_LASTFPR) { 273 if (fpr <= REGARG_LASTFPR) {
265 lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ 274 lj_assertA(rset_test(as->freeset, fpr),
275 "reg %d not free", fpr); /* Already evicted. */
266 ra_leftov(as, fpr, ref); 276 ra_leftov(as, fpr, ref);
267 fpr++; 277 fpr++;
268 } else { 278 } else {
@@ -271,9 +281,12 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
271 emit_spstore(as, ir, r, ofs); 281 emit_spstore(as, ir, r, ofs);
272 ofs += irt_isnum(ir->t) ? 8 : 4; 282 ofs += irt_isnum(ir->t) ? 8 : 4;
273 } 283 }
274 } else { 284 } else
285#endif
286 {
275 if (gpr <= REGARG_LASTGPR) { 287 if (gpr <= REGARG_LASTGPR) {
276 lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ 288 lj_assertA(rset_test(as->freeset, gpr),
289 "reg %d not free", gpr); /* Already evicted. */
277 ra_leftov(as, gpr, ref); 290 ra_leftov(as, gpr, ref);
278 gpr++; 291 gpr++;
279 } else { 292 } else {
@@ -290,8 +303,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
290 } 303 }
291 checkmclim(as); 304 checkmclim(as);
292 } 305 }
306#if !LJ_SOFTFP
293 if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ 307 if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */
294 emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); 308 emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
309#endif
295} 310}
296 311
297/* Setup result reg/sp for call. Evict scratch regs. */ 312/* Setup result reg/sp for call. Evict scratch regs. */
@@ -299,16 +314,18 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
299{ 314{
300 RegSet drop = RSET_SCRATCH; 315 RegSet drop = RSET_SCRATCH;
301 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); 316 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
317#if !LJ_SOFTFP
302 if ((ci->flags & CCI_NOFPRCLOBBER)) 318 if ((ci->flags & CCI_NOFPRCLOBBER))
303 drop &= ~RSET_FPR; 319 drop &= ~RSET_FPR;
320#endif
304 if (ra_hasreg(ir->r)) 321 if (ra_hasreg(ir->r))
305 rset_clear(drop, ir->r); /* Dest reg handled below. */ 322 rset_clear(drop, ir->r); /* Dest reg handled below. */
306 if (hiop && ra_hasreg((ir+1)->r)) 323 if (hiop && ra_hasreg((ir+1)->r))
307 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ 324 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
308 ra_evictset(as, drop); /* Evictions must be performed first. */ 325 ra_evictset(as, drop); /* Evictions must be performed first. */
309 if (ra_used(ir)) { 326 if (ra_used(ir)) {
310 lua_assert(!irt_ispri(ir->t)); 327 lj_assertA(!irt_ispri(ir->t), "PRI dest");
311 if (irt_isfp(ir->t)) { 328 if (!LJ_SOFTFP && irt_isfp(ir->t)) {
312 if ((ci->flags & CCI_CASTU64)) { 329 if ((ci->flags & CCI_CASTU64)) {
313 /* Use spill slot or temp slots. */ 330 /* Use spill slot or temp slots. */
314 int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; 331 int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
@@ -331,15 +348,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
331 } 348 }
332} 349}
333 350
334static void asm_call(ASMState *as, IRIns *ir)
335{
336 IRRef args[CCI_NARGS_MAX];
337 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
338 asm_collectargs(as, ir, ci, args);
339 asm_setupresult(as, ir, ci);
340 asm_gencall(as, ci, args);
341}
342
343static void asm_callx(ASMState *as, IRIns *ir) 351static void asm_callx(ASMState *as, IRIns *ir)
344{ 352{
345 IRRef args[CCI_NARGS_MAX*2]; 353 IRRef args[CCI_NARGS_MAX*2];
@@ -352,7 +360,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
352 func = ir->op2; irf = IR(func); 360 func = ir->op2; irf = IR(func);
353 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } 361 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
354 if (irref_isk(func)) { /* Call to constant address. */ 362 if (irref_isk(func)) { /* Call to constant address. */
355 ci.func = (ASMFunction)(void *)(irf->i); 363 ci.func = (ASMFunction)(void *)(intptr_t)(irf->i);
356 } else { /* Need a non-argument register for indirect calls. */ 364 } else { /* Need a non-argument register for indirect calls. */
357 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); 365 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
358 Reg freg = ra_alloc1(as, func, allow); 366 Reg freg = ra_alloc1(as, func, allow);
@@ -363,16 +371,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
363 asm_gencall(as, &ci, args); 371 asm_gencall(as, &ci, args);
364} 372}
365 373
366static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
367{
368 const CCallInfo *ci = &lj_ir_callinfo[id];
369 IRRef args[2];
370 args[0] = ir->op1;
371 args[1] = ir->op2;
372 asm_setupresult(as, ir, ci);
373 asm_gencall(as, ci, args);
374}
375
376/* -- Returns ------------------------------------------------------------- */ 374/* -- Returns ------------------------------------------------------------- */
377 375
378/* Return to lower frame. Guard that it goes to the right spot. */ 376/* Return to lower frame. Guard that it goes to the right spot. */
@@ -380,7 +378,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
380{ 378{
381 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 379 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
382 void *pc = ir_kptr(IR(ir->op2)); 380 void *pc = ir_kptr(IR(ir->op2));
383 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 381 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
384 as->topslot -= (BCReg)delta; 382 as->topslot -= (BCReg)delta;
385 if ((int32_t)as->topslot < 0) as->topslot = 0; 383 if ((int32_t)as->topslot < 0) as->topslot = 0;
386 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 384 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -392,8 +390,24 @@ static void asm_retf(ASMState *as, IRIns *ir)
392 emit_tai(as, PPCI_LWZ, RID_TMP, base, -8); 390 emit_tai(as, PPCI_LWZ, RID_TMP, base, -8);
393} 391}
394 392
393/* -- Buffer operations --------------------------------------------------- */
394
395#if LJ_HASBUFFER
396static void asm_bufhdr_write(ASMState *as, Reg sb)
397{
398 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
399 IRIns irgc;
400 irgc.ot = IRT(0, IRT_PGC); /* GC type. */
401 emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
402 emit_rot(as, PPCI_RLWIMI, RID_TMP, tmp, 0, 31-lj_fls(SBUF_MASK_FLAG), 31);
403 emit_getgl(as, RID_TMP, cur_L);
404 emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
405}
406#endif
407
395/* -- Type conversions ---------------------------------------------------- */ 408/* -- Type conversions ---------------------------------------------------- */
396 409
410#if !LJ_SOFTFP
397static void asm_tointg(ASMState *as, IRIns *ir, Reg left) 411static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
398{ 412{
399 RegSet allow = RSET_FPR; 413 RegSet allow = RSET_FPR;
@@ -410,8 +424,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
410 emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); 424 emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000);
411 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 425 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
412 emit_lsptr(as, PPCI_LFS, (fbias & 31), 426 emit_lsptr(as, PPCI_LFS, (fbias & 31),
413 (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), 427 (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR);
414 RSET_GPR);
415 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 428 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
416 emit_fb(as, PPCI_FCTIWZ, tmp, left); 429 emit_fb(as, PPCI_FCTIWZ, tmp, left);
417} 430}
@@ -427,15 +440,27 @@ static void asm_tobit(ASMState *as, IRIns *ir)
427 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 440 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
428 emit_fab(as, PPCI_FADD, tmp, left, right); 441 emit_fab(as, PPCI_FADD, tmp, left, right);
429} 442}
443#endif
430 444
431static void asm_conv(ASMState *as, IRIns *ir) 445static void asm_conv(ASMState *as, IRIns *ir)
432{ 446{
433 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 447 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
448#if !LJ_SOFTFP
434 int stfp = (st == IRT_NUM || st == IRT_FLOAT); 449 int stfp = (st == IRT_NUM || st == IRT_FLOAT);
450#endif
435 IRRef lref = ir->op1; 451 IRRef lref = ir->op1;
436 lua_assert(irt_type(ir->t) != st); 452 /* 64 bit integer conversions are handled by SPLIT. */
437 lua_assert(!(irt_isint64(ir->t) || 453 lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)),
438 (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ 454 "IR %04d has unsplit 64 bit type",
455 (int)(ir - as->ir) - REF_BIAS);
456#if LJ_SOFTFP
457 /* FP conversions are handled by SPLIT. */
458 lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT),
459 "IR %04d has FP type",
460 (int)(ir - as->ir) - REF_BIAS);
461 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
462#else
463 lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
439 if (irt_isfp(ir->t)) { 464 if (irt_isfp(ir->t)) {
440 Reg dest = ra_dest(as, ir, RSET_FPR); 465 Reg dest = ra_dest(as, ir, RSET_FPR);
441 if (stfp) { /* FP to FP conversion. */ 466 if (stfp) { /* FP to FP conversion. */
@@ -450,13 +475,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
450 Reg left = ra_alloc1(as, lref, allow); 475 Reg left = ra_alloc1(as, lref, allow);
451 Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); 476 Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left));
452 Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); 477 Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
453 const float *kbias;
454 if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); 478 if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest);
455 emit_fab(as, PPCI_FSUB, dest, dest, fbias); 479 emit_fab(as, PPCI_FSUB, dest, dest, fbias);
456 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); 480 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
457 kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000)); 481 emit_lsptr(as, PPCI_LFS, (fbias & 31),
458 if (st == IRT_U32) kbias++; 482 &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31],
459 emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias,
460 rset_clear(allow, hibias)); 483 rset_clear(allow, hibias));
461 emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, 484 emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP,
462 RID_SP, SPOFS_TMPLO); 485 RID_SP, SPOFS_TMPLO);
@@ -466,7 +489,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
466 } else if (stfp) { /* FP to integer conversion. */ 489 } else if (stfp) { /* FP to integer conversion. */
467 if (irt_isguard(ir->t)) { 490 if (irt_isguard(ir->t)) {
468 /* Checked conversions are only supported from number to int. */ 491 /* Checked conversions are only supported from number to int. */
469 lua_assert(irt_isint(ir->t) && st == IRT_NUM); 492 lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
493 "bad type for checked CONV");
470 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 494 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
471 } else { 495 } else {
472 Reg dest = ra_dest(as, ir, RSET_GPR); 496 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -489,19 +513,20 @@ static void asm_conv(ASMState *as, IRIns *ir)
489 emit_fb(as, PPCI_FCTIWZ, tmp, tmp); 513 emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
490 emit_fab(as, PPCI_FSUB, tmp, left, tmp); 514 emit_fab(as, PPCI_FSUB, tmp, left, tmp);
491 emit_lsptr(as, PPCI_LFS, (tmp & 31), 515 emit_lsptr(as, PPCI_LFS, (tmp & 31),
492 (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)), 516 (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
493 RSET_GPR);
494 } else { 517 } else {
495 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 518 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
496 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 519 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
497 emit_fb(as, PPCI_FCTIWZ, tmp, left); 520 emit_fb(as, PPCI_FCTIWZ, tmp, left);
498 } 521 }
499 } 522 }
500 } else { 523 } else
524#endif
525 {
501 Reg dest = ra_dest(as, ir, RSET_GPR); 526 Reg dest = ra_dest(as, ir, RSET_GPR);
502 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 527 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
503 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 528 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
504 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); 529 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
505 if ((ir->op2 & IRCONV_SEXT)) 530 if ((ir->op2 & IRCONV_SEXT))
506 emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left); 531 emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left);
507 else 532 else
@@ -513,90 +538,102 @@ static void asm_conv(ASMState *as, IRIns *ir)
513 } 538 }
514} 539}
515 540
516#if LJ_HASFFI
517static void asm_conv64(ASMState *as, IRIns *ir)
518{
519 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
520 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
521 IRCallID id;
522 const CCallInfo *ci;
523 IRRef args[2];
524 args[0] = ir->op1;
525 args[1] = (ir-1)->op1;
526 if (st == IRT_NUM || st == IRT_FLOAT) {
527 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
528 ir--;
529 } else {
530 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
531 }
532 ci = &lj_ir_callinfo[id];
533 asm_setupresult(as, ir, ci);
534 asm_gencall(as, ci, args);
535}
536#endif
537
538static void asm_strto(ASMState *as, IRIns *ir) 541static void asm_strto(ASMState *as, IRIns *ir)
539{ 542{
540 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 543 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
541 IRRef args[2]; 544 IRRef args[2];
542 int32_t ofs; 545 int32_t ofs = SPOFS_TMP;
546#if LJ_SOFTFP
547 ra_evictset(as, RSET_SCRATCH);
548 if (ra_used(ir)) {
549 if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
550 (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) {
551 int i;
552 for (i = 0; i < 2; i++) {
553 Reg r = (ir+i)->r;
554 if (ra_hasreg(r)) {
555 ra_free(as, r);
556 ra_modified(as, r);
557 emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
558 }
559 }
560 ofs = sps_scale(ir->s & ~1);
561 } else {
562 Reg rhi = ra_dest(as, ir+1, RSET_GPR);
563 Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
564 emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs);
565 emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4);
566 }
567 }
568#else
543 RegSet drop = RSET_SCRATCH; 569 RegSet drop = RSET_SCRATCH;
544 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ 570 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
545 ra_evictset(as, drop); 571 ra_evictset(as, drop);
572 if (ir->s) ofs = sps_scale(ir->s);
573#endif
546 asm_guardcc(as, CC_EQ); 574 asm_guardcc(as, CC_EQ);
547 emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ 575 emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */
548 args[0] = ir->op1; /* GCstr *str */ 576 args[0] = ir->op1; /* GCstr *str */
549 args[1] = ASMREF_TMP1; /* TValue *n */ 577 args[1] = ASMREF_TMP1; /* TValue *n */
550 asm_gencall(as, ci, args); 578 asm_gencall(as, ci, args);
551 /* Store the result to the spill slot or temp slots. */ 579 /* Store the result to the spill slot or temp slots. */
552 ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
553 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); 580 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
554} 581}
555 582
583/* -- Memory references --------------------------------------------------- */
584
556/* Get pointer to TValue. */ 585/* Get pointer to TValue. */
557static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 586static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
558{ 587{
559 IRIns *ir = IR(ref); 588 int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768);
560 if (irt_isnum(ir->t)) { 589 if ((mode & IRTMPREF_IN1)) {
561 if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ 590 IRIns *ir = IR(ref);
562 ra_allockreg(as, i32ptr(ir_knum(ir)), dest); 591 if (irt_isnum(ir->t)) {
563 else /* Otherwise force a spill and use the spill slot. */ 592 if ((mode & IRTMPREF_OUT1)) {
564 emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir)); 593#if LJ_SOFTFP
565 } else { 594 lj_assertA(irref_isk(ref), "unsplit FP op");
566 /* Otherwise use g->tmptv to hold the TValue. */ 595 emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
567 RegSet allow = rset_exclude(RSET_GPR, dest); 596 emit_setgl(as,
568 Reg type; 597 ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
569 emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768); 598 tmptv.u32.lo);
570 if (!irt_ispri(ir->t)) { 599 emit_setgl(as,
571 Reg src = ra_alloc1(as, ref, allow); 600 ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
572 emit_setgl(as, src, tmptv.gcr); 601 tmptv.u32.hi);
602#else
603 Reg src = ra_alloc1(as, ref, RSET_FPR);
604 emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
605 emit_fai(as, PPCI_STFD, src, RID_JGL, tmpofs);
606#endif
607 } else if (irref_isk(ref)) {
608 /* Use the number constant itself as a TValue. */
609 ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
610 } else {
611#if LJ_SOFTFP
612 lj_assertA(0, "unsplit FP op");
613#else
614 /* Otherwise force a spill and use the spill slot. */
615 emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir));
616#endif
617 }
618 } else {
619 /* Otherwise use g->tmptv to hold the TValue. */
620 Reg type;
621 emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
622 if (!irt_ispri(ir->t)) {
623 Reg src = ra_alloc1(as, ref, RSET_GPR);
624 emit_setgl(as, src, tmptv.gcr);
625 }
626 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
627 type = ra_alloc1(as, ref+1, RSET_GPR);
628 else
629 type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
630 emit_setgl(as, type, tmptv.it);
573 } 631 }
574 type = ra_allock(as, irt_toitype(ir->t), allow);
575 emit_setgl(as, type, tmptv.it);
576 }
577}
578
579static void asm_tostr(ASMState *as, IRIns *ir)
580{
581 IRRef args[2];
582 args[0] = ASMREF_L;
583 as->gcsteps++;
584 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
585 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
586 args[1] = ASMREF_TMP1; /* const lua_Number * */
587 asm_setupresult(as, ir, ci); /* GCstr * */
588 asm_gencall(as, ci, args);
589 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
590 } else { 632 } else {
591 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; 633 emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
592 args[1] = ir->op1; /* int32_t k */
593 asm_setupresult(as, ir, ci); /* GCstr * */
594 asm_gencall(as, ci, args);
595 } 634 }
596} 635}
597 636
598/* -- Memory references --------------------------------------------------- */
599
600static void asm_aref(ASMState *as, IRIns *ir) 637static void asm_aref(ASMState *as, IRIns *ir)
601{ 638{
602 Reg dest = ra_dest(as, ir, RSET_GPR); 639 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -636,11 +673,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
636 Reg tisnum = RID_NONE, tmpnum = RID_NONE; 673 Reg tisnum = RID_NONE, tmpnum = RID_NONE;
637 IRRef refkey = ir->op2; 674 IRRef refkey = ir->op2;
638 IRIns *irkey = IR(refkey); 675 IRIns *irkey = IR(refkey);
676 int isk = irref_isk(refkey);
639 IRType1 kt = irkey->t; 677 IRType1 kt = irkey->t;
640 uint32_t khash; 678 uint32_t khash;
641 MCLabel l_end, l_loop, l_next; 679 MCLabel l_end, l_loop, l_next;
642 680
643 rset_clear(allow, tab); 681 rset_clear(allow, tab);
682#if LJ_SOFTFP
683 if (!isk) {
684 key = ra_alloc1(as, refkey, allow);
685 rset_clear(allow, key);
686 if (irkey[1].o == IR_HIOP) {
687 if (ra_hasreg((irkey+1)->r)) {
688 tmpnum = (irkey+1)->r;
689 ra_noweak(as, tmpnum);
690 } else {
691 tmpnum = ra_allocref(as, refkey+1, allow);
692 }
693 rset_clear(allow, tmpnum);
694 }
695 }
696#else
644 if (irt_isnum(kt)) { 697 if (irt_isnum(kt)) {
645 key = ra_alloc1(as, refkey, RSET_FPR); 698 key = ra_alloc1(as, refkey, RSET_FPR);
646 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); 699 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
@@ -650,6 +703,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
650 key = ra_alloc1(as, refkey, allow); 703 key = ra_alloc1(as, refkey, allow);
651 rset_clear(allow, key); 704 rset_clear(allow, key);
652 } 705 }
706#endif
653 tmp2 = ra_scratch(as, allow); 707 tmp2 = ra_scratch(as, allow);
654 rset_clear(allow, tmp2); 708 rset_clear(allow, tmp2);
655 709
@@ -672,7 +726,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
672 asm_guardcc(as, CC_EQ); 726 asm_guardcc(as, CC_EQ);
673 else 727 else
674 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); 728 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
675 if (irt_isnum(kt)) { 729 if (!LJ_SOFTFP && irt_isnum(kt)) {
676 emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); 730 emit_fab(as, PPCI_FCMPU, 0, tmpnum, key);
677 emit_condbranch(as, PPCI_BC, CC_GE, l_next); 731 emit_condbranch(as, PPCI_BC, CC_GE, l_next);
678 emit_ab(as, PPCI_CMPLW, tmp1, tisnum); 732 emit_ab(as, PPCI_CMPLW, tmp1, tisnum);
@@ -682,7 +736,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
682 emit_ab(as, PPCI_CMPW, tmp2, key); 736 emit_ab(as, PPCI_CMPW, tmp2, key);
683 emit_condbranch(as, PPCI_BC, CC_NE, l_next); 737 emit_condbranch(as, PPCI_BC, CC_NE, l_next);
684 } 738 }
685 emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); 739 if (LJ_SOFTFP && ra_hasreg(tmpnum))
740 emit_ab(as, PPCI_CMPW, tmp1, tmpnum);
741 else
742 emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
686 if (!irt_ispri(kt)) 743 if (!irt_ispri(kt))
687 emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); 744 emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
688 } 745 }
@@ -691,35 +748,41 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
691 (((char *)as->mcp-(char *)l_loop) & 0xffffu); 748 (((char *)as->mcp-(char *)l_loop) & 0xffffu);
692 749
693 /* Load main position relative to tab->node into dest. */ 750 /* Load main position relative to tab->node into dest. */
694 khash = irref_isk(refkey) ? ir_khash(irkey) : 1; 751 khash = isk ? ir_khash(as, irkey) : 1;
695 if (khash == 0) { 752 if (khash == 0) {
696 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); 753 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
697 } else { 754 } else {
698 Reg tmphash = tmp1; 755 Reg tmphash = tmp1;
699 if (irref_isk(refkey)) 756 if (isk)
700 tmphash = ra_allock(as, khash, allow); 757 tmphash = ra_allock(as, khash, allow);
701 emit_tab(as, PPCI_ADD, dest, dest, tmp1); 758 emit_tab(as, PPCI_ADD, dest, dest, tmp1);
702 emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); 759 emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node));
703 emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); 760 emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash);
704 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); 761 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
705 emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); 762 emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
706 if (irref_isk(refkey)) { 763 if (isk) {
707 /* Nothing to do. */ 764 /* Nothing to do. */
708 } else if (irt_isstr(kt)) { 765 } else if (irt_isstr(kt)) {
709 emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); 766 emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, sid));
710 } else { /* Must match with hash*() in lj_tab.c. */ 767 } else { /* Must match with hash*() in lj_tab.c. */
711 emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1); 768 emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1);
712 emit_rotlwi(as, tmp2, tmp2, HASH_ROT3); 769 emit_rotlwi(as, tmp2, tmp2, HASH_ROT3);
713 emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); 770 emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2);
714 emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); 771 emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31);
715 emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); 772 emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2);
716 if (irt_isnum(kt)) { 773 if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
774#if LJ_SOFTFP
775 emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
776 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
777 emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum);
778#else
717 int32_t ofs = ra_spill(as, irkey); 779 int32_t ofs = ra_spill(as, irkey);
718 emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); 780 emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1);
719 emit_rotlwi(as, dest, tmp1, HASH_ROT1); 781 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
720 emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); 782 emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1);
721 emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); 783 emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4);
722 emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); 784 emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs);
785#endif
723 } else { 786 } else {
724 emit_asb(as, PPCI_XOR, tmp2, key, tmp1); 787 emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
725 emit_rotlwi(as, dest, tmp1, HASH_ROT1); 788 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
@@ -740,7 +803,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
740 Reg node = ra_alloc1(as, ir->op1, RSET_GPR); 803 Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
741 Reg key = RID_NONE, type = RID_TMP, idx = node; 804 Reg key = RID_NONE, type = RID_TMP, idx = node;
742 RegSet allow = rset_exclude(RSET_GPR, node); 805 RegSet allow = rset_exclude(RSET_GPR, node);
743 lua_assert(ofs % sizeof(Node) == 0); 806 lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
744 if (ofs > 32736) { 807 if (ofs > 32736) {
745 idx = dest; 808 idx = dest;
746 rset_clear(allow, dest); 809 rset_clear(allow, dest);
@@ -773,20 +836,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
773 } 836 }
774} 837}
775 838
776static void asm_newref(ASMState *as, IRIns *ir)
777{
778 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
779 IRRef args[3];
780 if (ir->r == RID_SINK)
781 return;
782 args[0] = ASMREF_L; /* lua_State *L */
783 args[1] = ir->op1; /* GCtab *t */
784 args[2] = ASMREF_TMP1; /* cTValue *key */
785 asm_setupresult(as, ir, ci); /* TValue * */
786 asm_gencall(as, ci, args);
787 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
788}
789
790static void asm_uref(ASMState *as, IRIns *ir) 839static void asm_uref(ASMState *as, IRIns *ir)
791{ 840{
792 Reg dest = ra_dest(as, ir, RSET_GPR); 841 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -813,7 +862,7 @@ static void asm_uref(ASMState *as, IRIns *ir)
813static void asm_fref(ASMState *as, IRIns *ir) 862static void asm_fref(ASMState *as, IRIns *ir)
814{ 863{
815 UNUSED(as); UNUSED(ir); 864 UNUSED(as); UNUSED(ir);
816 lua_assert(!ra_used(ir)); 865 lj_assertA(!ra_used(ir), "unfused FREF");
817} 866}
818 867
819static void asm_strref(ASMState *as, IRIns *ir) 868static void asm_strref(ASMState *as, IRIns *ir)
@@ -853,26 +902,28 @@ static void asm_strref(ASMState *as, IRIns *ir)
853 902
854/* -- Loads and stores ---------------------------------------------------- */ 903/* -- Loads and stores ---------------------------------------------------- */
855 904
856static PPCIns asm_fxloadins(IRIns *ir) 905static PPCIns asm_fxloadins(ASMState *as, IRIns *ir)
857{ 906{
907 UNUSED(as);
858 switch (irt_type(ir->t)) { 908 switch (irt_type(ir->t)) {
859 case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */ 909 case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */
860 case IRT_U8: return PPCI_LBZ; 910 case IRT_U8: return PPCI_LBZ;
861 case IRT_I16: return PPCI_LHA; 911 case IRT_I16: return PPCI_LHA;
862 case IRT_U16: return PPCI_LHZ; 912 case IRT_U16: return PPCI_LHZ;
863 case IRT_NUM: return PPCI_LFD; 913 case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_LFD;
864 case IRT_FLOAT: return PPCI_LFS; 914 case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS;
865 default: return PPCI_LWZ; 915 default: return PPCI_LWZ;
866 } 916 }
867} 917}
868 918
869static PPCIns asm_fxstoreins(IRIns *ir) 919static PPCIns asm_fxstoreins(ASMState *as, IRIns *ir)
870{ 920{
921 UNUSED(as);
871 switch (irt_type(ir->t)) { 922 switch (irt_type(ir->t)) {
872 case IRT_I8: case IRT_U8: return PPCI_STB; 923 case IRT_I8: case IRT_U8: return PPCI_STB;
873 case IRT_I16: case IRT_U16: return PPCI_STH; 924 case IRT_I16: case IRT_U16: return PPCI_STH;
874 case IRT_NUM: return PPCI_STFD; 925 case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_STFD;
875 case IRT_FLOAT: return PPCI_STFS; 926 case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS;
876 default: return PPCI_STW; 927 default: return PPCI_STW;
877 } 928 }
878} 929}
@@ -880,18 +931,24 @@ static PPCIns asm_fxstoreins(IRIns *ir)
880static void asm_fload(ASMState *as, IRIns *ir) 931static void asm_fload(ASMState *as, IRIns *ir)
881{ 932{
882 Reg dest = ra_dest(as, ir, RSET_GPR); 933 Reg dest = ra_dest(as, ir, RSET_GPR);
883 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); 934 PPCIns pi = asm_fxloadins(as, ir);
884 PPCIns pi = asm_fxloadins(ir); 935 Reg idx;
885 int32_t ofs; 936 int32_t ofs;
886 if (ir->op2 == IRFL_TAB_ARRAY) { 937 if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
887 ofs = asm_fuseabase(as, ir->op1); 938 idx = RID_JGL;
888 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ 939 ofs = (ir->op2 << 2) - 32768 - GG_OFS(g);
889 emit_tai(as, PPCI_ADDI, dest, idx, ofs); 940 } else {
890 return; 941 idx = ra_alloc1(as, ir->op1, RSET_GPR);
942 if (ir->op2 == IRFL_TAB_ARRAY) {
943 ofs = asm_fuseabase(as, ir->op1);
944 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
945 emit_tai(as, PPCI_ADDI, dest, idx, ofs);
946 return;
947 }
891 } 948 }
949 ofs = field_ofs[ir->op2];
892 } 950 }
893 ofs = field_ofs[ir->op2]; 951 lj_assertA(!irt_isi8(ir->t), "unsupported FLOAD I8");
894 lua_assert(!irt_isi8(ir->t));
895 emit_tai(as, pi, dest, idx, ofs); 952 emit_tai(as, pi, dest, idx, ofs);
896} 953}
897 954
@@ -902,21 +959,22 @@ static void asm_fstore(ASMState *as, IRIns *ir)
902 IRIns *irf = IR(ir->op1); 959 IRIns *irf = IR(ir->op1);
903 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); 960 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
904 int32_t ofs = field_ofs[irf->op2]; 961 int32_t ofs = field_ofs[irf->op2];
905 PPCIns pi = asm_fxstoreins(ir); 962 PPCIns pi = asm_fxstoreins(as, ir);
906 emit_tai(as, pi, src, idx, ofs); 963 emit_tai(as, pi, src, idx, ofs);
907 } 964 }
908} 965}
909 966
910static void asm_xload(ASMState *as, IRIns *ir) 967static void asm_xload(ASMState *as, IRIns *ir)
911{ 968{
912 Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 969 Reg dest = ra_dest(as, ir,
913 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); 970 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
971 lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
914 if (irt_isi8(ir->t)) 972 if (irt_isi8(ir->t))
915 emit_as(as, PPCI_EXTSB, dest, dest); 973 emit_as(as, PPCI_EXTSB, dest, dest);
916 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 974 asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
917} 975}
918 976
919static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 977static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
920{ 978{
921 IRIns *irb; 979 IRIns *irb;
922 if (ir->r == RID_SINK) 980 if (ir->r == RID_SINK)
@@ -927,36 +985,54 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
927 Reg src = ra_alloc1(as, irb->op1, RSET_GPR); 985 Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
928 asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); 986 asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src));
929 } else { 987 } else {
930 Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 988 Reg src = ra_alloc1(as, ir->op2,
931 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, 989 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
990 asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1,
932 rset_exclude(RSET_GPR, src), ofs); 991 rset_exclude(RSET_GPR, src), ofs);
933 } 992 }
934} 993}
935 994
995#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
996
936static void asm_ahuvload(ASMState *as, IRIns *ir) 997static void asm_ahuvload(ASMState *as, IRIns *ir)
937{ 998{
938 IRType1 t = ir->t; 999 IRType1 t = ir->t;
939 Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; 1000 Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx;
940 RegSet allow = RSET_GPR; 1001 RegSet allow = RSET_GPR;
941 int32_t ofs = AHUREF_LSX; 1002 int32_t ofs = AHUREF_LSX;
1003 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) {
1004 t.irt = IRT_NUM;
1005 if (ra_used(ir+1)) {
1006 type = ra_dest(as, ir+1, allow);
1007 rset_clear(allow, type);
1008 }
1009 ofs = 0;
1010 }
942 if (ra_used(ir)) { 1011 if (ra_used(ir)) {
943 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1012 lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
944 if (!irt_isnum(t)) ofs = 0; 1013 irt_isint(ir->t) || irt_isaddr(ir->t),
945 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); 1014 "bad load type %d", irt_type(ir->t));
1015 if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0;
1016 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
946 rset_clear(allow, dest); 1017 rset_clear(allow, dest);
947 } 1018 }
948 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); 1019 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
1020 if (ir->o == IR_VLOAD) {
1021 ofs = ofs != AHUREF_LSX ? ofs + 8 * ir->op2 :
1022 ir->op2 ? 8 * ir->op2 : AHUREF_LSX;
1023 }
949 if (irt_isnum(t)) { 1024 if (irt_isnum(t)) {
950 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx)); 1025 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx));
951 asm_guardcc(as, CC_GE); 1026 asm_guardcc(as, CC_GE);
952 emit_ab(as, PPCI_CMPLW, type, tisnum); 1027 emit_ab(as, PPCI_CMPLW, type, tisnum);
953 if (ra_hasreg(dest)) { 1028 if (ra_hasreg(dest)) {
954 if (ofs == AHUREF_LSX) { 1029 if (!LJ_SOFTFP && ofs == AHUREF_LSX) {
955 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, 1030 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR,
956 (idx&255)), (idx>>8))); 1031 (idx&255)), (idx>>8)));
957 emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); 1032 emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp);
958 } else { 1033 } else {
959 emit_fai(as, PPCI_LFD, dest, idx, ofs); 1034 emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx,
1035 ofs+4*LJ_SOFTFP);
960 } 1036 }
961 } 1037 }
962 } else { 1038 } else {
@@ -979,7 +1055,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
979 int32_t ofs = AHUREF_LSX; 1055 int32_t ofs = AHUREF_LSX;
980 if (ir->r == RID_SINK) 1056 if (ir->r == RID_SINK)
981 return; 1057 return;
982 if (irt_isnum(ir->t)) { 1058 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
983 src = ra_alloc1(as, ir->op2, RSET_FPR); 1059 src = ra_alloc1(as, ir->op2, RSET_FPR);
984 } else { 1060 } else {
985 if (!irt_ispri(ir->t)) { 1061 if (!irt_ispri(ir->t)) {
@@ -987,11 +1063,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
987 rset_clear(allow, src); 1063 rset_clear(allow, src);
988 ofs = 0; 1064 ofs = 0;
989 } 1065 }
990 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 1066 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
1067 type = ra_alloc1(as, (ir+1)->op2, allow);
1068 else
1069 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
991 rset_clear(allow, type); 1070 rset_clear(allow, type);
992 } 1071 }
993 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); 1072 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
994 if (irt_isnum(ir->t)) { 1073 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
995 if (ofs == AHUREF_LSX) { 1074 if (ofs == AHUREF_LSX) {
996 emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); 1075 emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP);
997 emit_slwi(as, RID_TMP, (idx>>8), 3); 1076 emit_slwi(as, RID_TMP, (idx>>8), 3);
@@ -1016,21 +1095,39 @@ static void asm_sload(ASMState *as, IRIns *ir)
1016 IRType1 t = ir->t; 1095 IRType1 t = ir->t;
1017 Reg dest = RID_NONE, type = RID_NONE, base; 1096 Reg dest = RID_NONE, type = RID_NONE, base;
1018 RegSet allow = RSET_GPR; 1097 RegSet allow = RSET_GPR;
1019 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 1098 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
1020 lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); 1099 if (hiop)
1021 lua_assert(LJ_DUALNUM || 1100 t.irt = IRT_NUM;
1022 !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); 1101 lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
1102 "bad parent SLOAD"); /* Handled by asm_head_side(). */
1103 lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
1104 "inconsistent SLOAD variant");
1105 lj_assertA(LJ_DUALNUM ||
1106 !irt_isint(t) ||
1107 (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)),
1108 "bad SLOAD type");
1109#if LJ_SOFTFP
1110 lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
1111 "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */
1112 if (hiop && ra_used(ir+1)) {
1113 type = ra_dest(as, ir+1, allow);
1114 rset_clear(allow, type);
1115 }
1116#else
1023 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { 1117 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
1024 dest = ra_scratch(as, RSET_FPR); 1118 dest = ra_scratch(as, RSET_FPR);
1025 asm_tointg(as, ir, dest); 1119 asm_tointg(as, ir, dest);
1026 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1120 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1027 } else if (ra_used(ir)) { 1121 } else
1028 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1122#endif
1029 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); 1123 if (ra_used(ir)) {
1124 lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t),
1125 "bad SLOAD type %d", irt_type(ir->t));
1126 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
1030 rset_clear(allow, dest); 1127 rset_clear(allow, dest);
1031 base = ra_alloc1(as, REF_BASE, allow); 1128 base = ra_alloc1(as, REF_BASE, allow);
1032 rset_clear(allow, base); 1129 rset_clear(allow, base);
1033 if ((ir->op2 & IRSLOAD_CONVERT)) { 1130 if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) {
1034 if (irt_isint(t)) { 1131 if (irt_isint(t)) {
1035 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 1132 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
1036 dest = ra_scratch(as, RSET_FPR); 1133 dest = ra_scratch(as, RSET_FPR);
@@ -1044,7 +1141,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1044 emit_fab(as, PPCI_FSUB, dest, dest, fbias); 1141 emit_fab(as, PPCI_FSUB, dest, dest, fbias);
1045 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); 1142 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
1046 emit_lsptr(as, PPCI_LFS, (fbias & 31), 1143 emit_lsptr(as, PPCI_LFS, (fbias & 31),
1047 (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), 1144 (void *)&as->J->k32[LJ_K32_2P52_2P31],
1048 rset_clear(allow, hibias)); 1145 rset_clear(allow, hibias));
1049 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); 1146 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO);
1050 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); 1147 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
@@ -1062,10 +1159,13 @@ dotypecheck:
1062 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1159 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1063 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); 1160 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
1064 asm_guardcc(as, CC_GE); 1161 asm_guardcc(as, CC_GE);
1065 emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); 1162#if !LJ_SOFTFP
1066 type = RID_TMP; 1163 type = RID_TMP;
1164#endif
1165 emit_ab(as, PPCI_CMPLW, type, tisnum);
1067 } 1166 }
1068 if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); 1167 if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest,
1168 base, ofs-(LJ_SOFTFP?0:4));
1069 } else { 1169 } else {
1070 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1170 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1071 asm_guardcc(as, CC_NE); 1171 asm_guardcc(as, CC_NE);
@@ -1083,19 +1183,16 @@ dotypecheck:
1083static void asm_cnew(ASMState *as, IRIns *ir) 1183static void asm_cnew(ASMState *as, IRIns *ir)
1084{ 1184{
1085 CTState *cts = ctype_ctsG(J2G(as->J)); 1185 CTState *cts = ctype_ctsG(J2G(as->J));
1086 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1186 CTypeID id = (CTypeID)IR(ir->op1)->i;
1087 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1187 CTSize sz;
1088 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1188 CTInfo info = lj_ctype_info(cts, id, &sz);
1089 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1189 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1090 IRRef args[2]; 1190 IRRef args[4];
1091 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1092 RegSet drop = RSET_SCRATCH; 1191 RegSet drop = RSET_SCRATCH;
1093 lua_assert(sz != CTSIZE_INVALID); 1192 lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
1193 "bad CNEW/CNEWI operands");
1094 1194
1095 args[0] = ASMREF_L; /* lua_State *L */
1096 args[1] = ASMREF_TMP1; /* MSize size */
1097 as->gcsteps++; 1195 as->gcsteps++;
1098
1099 if (ra_hasreg(ir->r)) 1196 if (ra_hasreg(ir->r))
1100 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1197 rset_clear(drop, ir->r); /* Dest reg handled below. */
1101 ra_evictset(as, drop); 1198 ra_evictset(as, drop);
@@ -1104,11 +1201,12 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1104 1201
1105 /* Initialize immutable cdata object. */ 1202 /* Initialize immutable cdata object. */
1106 if (ir->o == IR_CNEWI) { 1203 if (ir->o == IR_CNEWI) {
1204 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1107 int32_t ofs = sizeof(GCcdata); 1205 int32_t ofs = sizeof(GCcdata);
1108 lua_assert(sz == 4 || sz == 8); 1206 lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
1109 if (sz == 8) { 1207 if (sz == 8) {
1110 ofs += 4; 1208 ofs += 4;
1111 lua_assert((ir+1)->o == IR_HIOP); 1209 lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI");
1112 } 1210 }
1113 for (;;) { 1211 for (;;) {
1114 Reg r = ra_alloc1(as, ir->op2, allow); 1212 Reg r = ra_alloc1(as, ir->op2, allow);
@@ -1117,18 +1215,28 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1117 if (ofs == sizeof(GCcdata)) break; 1215 if (ofs == sizeof(GCcdata)) break;
1118 ofs -= 4; ir++; 1216 ofs -= 4; ir++;
1119 } 1217 }
1218 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1219 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1220 args[0] = ASMREF_L; /* lua_State *L */
1221 args[1] = ir->op1; /* CTypeID id */
1222 args[2] = ir->op2; /* CTSize sz */
1223 args[3] = ASMREF_TMP1; /* CTSize align */
1224 asm_gencall(as, ci, args);
1225 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1226 return;
1120 } 1227 }
1228
1121 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1229 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1122 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); 1230 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
1123 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); 1231 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
1124 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); 1232 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
1125 emit_ti(as, PPCI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ 1233 emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
1234 args[0] = ASMREF_L; /* lua_State *L */
1235 args[1] = ASMREF_TMP1; /* MSize size */
1126 asm_gencall(as, ci, args); 1236 asm_gencall(as, ci, args);
1127 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1237 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1128 ra_releasetmp(as, ASMREF_TMP1)); 1238 ra_releasetmp(as, ASMREF_TMP1));
1129} 1239}
1130#else
1131#define asm_cnew(as, ir) ((void)0)
1132#endif 1240#endif
1133 1241
1134/* -- Write barriers ------------------------------------------------------ */ 1242/* -- Write barriers ------------------------------------------------------ */
@@ -1142,7 +1250,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
1142 emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist)); 1250 emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist));
1143 emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked)); 1251 emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked));
1144 emit_setgl(as, tab, gc.grayagain); 1252 emit_setgl(as, tab, gc.grayagain);
1145 lua_assert(LJ_GC_BLACK == 0x04); 1253 lj_assertA(LJ_GC_BLACK == 0x04, "bad LJ_GC_BLACK");
1146 emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */ 1254 emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */
1147 emit_getgl(as, link, gc.grayagain); 1255 emit_getgl(as, link, gc.grayagain);
1148 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); 1256 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
@@ -1157,7 +1265,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
1157 MCLabel l_end; 1265 MCLabel l_end;
1158 Reg obj, val, tmp; 1266 Reg obj, val, tmp;
1159 /* No need for other object barriers (yet). */ 1267 /* No need for other object barriers (yet). */
1160 lua_assert(IR(ir->op1)->o == IR_UREFC); 1268 lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
1161 ra_evictset(as, RSET_SCRATCH); 1269 ra_evictset(as, RSET_SCRATCH);
1162 l_end = emit_label(as); 1270 l_end = emit_label(as);
1163 args[0] = ASMREF_TMP1; /* global_State *g */ 1271 args[0] = ASMREF_TMP1; /* global_State *g */
@@ -1178,6 +1286,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
1178 1286
1179/* -- Arithmetic and logic operations ------------------------------------- */ 1287/* -- Arithmetic and logic operations ------------------------------------- */
1180 1288
1289#if !LJ_SOFTFP
1181static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) 1290static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi)
1182{ 1291{
1183 Reg dest = ra_dest(as, ir, RSET_FPR); 1292 Reg dest = ra_dest(as, ir, RSET_FPR);
@@ -1196,31 +1305,24 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
1196 emit_fb(as, pi, dest, left); 1305 emit_fb(as, pi, dest, left);
1197} 1306}
1198 1307
1199static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1308static void asm_fpmath(ASMState *as, IRIns *ir)
1200{ 1309{
1201 IRIns *irp = IR(ir->op1); 1310 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
1202 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1311 asm_fpunary(as, ir, PPCI_FSQRT);
1203 IRIns *irpp = IR(irp->op1); 1312 else
1204 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1313 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1205 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1206 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
1207 IRRef args[2];
1208 args[0] = irpp->op1;
1209 args[1] = irp->op2;
1210 asm_setupresult(as, ir, ci);
1211 asm_gencall(as, ci, args);
1212 return 1;
1213 }
1214 }
1215 return 0;
1216} 1314}
1315#endif
1217 1316
1218static void asm_add(ASMState *as, IRIns *ir) 1317static void asm_add(ASMState *as, IRIns *ir)
1219{ 1318{
1319#if !LJ_SOFTFP
1220 if (irt_isnum(ir->t)) { 1320 if (irt_isnum(ir->t)) {
1221 if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) 1321 if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD))
1222 asm_fparith(as, ir, PPCI_FADD); 1322 asm_fparith(as, ir, PPCI_FADD);
1223 } else { 1323 } else
1324#endif
1325 {
1224 Reg dest = ra_dest(as, ir, RSET_GPR); 1326 Reg dest = ra_dest(as, ir, RSET_GPR);
1225 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1327 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1226 PPCIns pi; 1328 PPCIns pi;
@@ -1259,10 +1361,13 @@ static void asm_add(ASMState *as, IRIns *ir)
1259 1361
1260static void asm_sub(ASMState *as, IRIns *ir) 1362static void asm_sub(ASMState *as, IRIns *ir)
1261{ 1363{
1364#if !LJ_SOFTFP
1262 if (irt_isnum(ir->t)) { 1365 if (irt_isnum(ir->t)) {
1263 if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) 1366 if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB))
1264 asm_fparith(as, ir, PPCI_FSUB); 1367 asm_fparith(as, ir, PPCI_FSUB);
1265 } else { 1368 } else
1369#endif
1370 {
1266 PPCIns pi = PPCI_SUBF; 1371 PPCIns pi = PPCI_SUBF;
1267 Reg dest = ra_dest(as, ir, RSET_GPR); 1372 Reg dest = ra_dest(as, ir, RSET_GPR);
1268 Reg left, right; 1373 Reg left, right;
@@ -1288,9 +1393,12 @@ static void asm_sub(ASMState *as, IRIns *ir)
1288 1393
1289static void asm_mul(ASMState *as, IRIns *ir) 1394static void asm_mul(ASMState *as, IRIns *ir)
1290{ 1395{
1396#if !LJ_SOFTFP
1291 if (irt_isnum(ir->t)) { 1397 if (irt_isnum(ir->t)) {
1292 asm_fparith(as, ir, PPCI_FMUL); 1398 asm_fparith(as, ir, PPCI_FMUL);
1293 } else { 1399 } else
1400#endif
1401 {
1294 PPCIns pi = PPCI_MULLW; 1402 PPCIns pi = PPCI_MULLW;
1295 Reg dest = ra_dest(as, ir, RSET_GPR); 1403 Reg dest = ra_dest(as, ir, RSET_GPR);
1296 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1404 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
@@ -1312,11 +1420,16 @@ static void asm_mul(ASMState *as, IRIns *ir)
1312 } 1420 }
1313} 1421}
1314 1422
1423#define asm_fpdiv(as, ir) asm_fparith(as, ir, PPCI_FDIV)
1424
1315static void asm_neg(ASMState *as, IRIns *ir) 1425static void asm_neg(ASMState *as, IRIns *ir)
1316{ 1426{
1427#if !LJ_SOFTFP
1317 if (irt_isnum(ir->t)) { 1428 if (irt_isnum(ir->t)) {
1318 asm_fpunary(as, ir, PPCI_FNEG); 1429 asm_fpunary(as, ir, PPCI_FNEG);
1319 } else { 1430 } else
1431#endif
1432 {
1320 Reg dest, left; 1433 Reg dest, left;
1321 PPCIns pi = PPCI_NEG; 1434 PPCIns pi = PPCI_NEG;
1322 if (as->flagmcp == as->mcp) { 1435 if (as->flagmcp == as->mcp) {
@@ -1330,6 +1443,8 @@ static void asm_neg(ASMState *as, IRIns *ir)
1330 } 1443 }
1331} 1444}
1332 1445
1446#define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS)
1447
1333static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) 1448static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1334{ 1449{
1335 Reg dest, left, right; 1450 Reg dest, left, right;
@@ -1345,6 +1460,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1345 emit_tab(as, pi|PPCF_DOT, dest, left, right); 1460 emit_tab(as, pi|PPCF_DOT, dest, left, right);
1346} 1461}
1347 1462
1463#define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO)
1464#define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO)
1465#define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO)
1466
1348#if LJ_HASFFI 1467#if LJ_HASFFI
1349static void asm_add64(ASMState *as, IRIns *ir) 1468static void asm_add64(ASMState *as, IRIns *ir)
1350{ 1469{
@@ -1424,7 +1543,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
1424} 1543}
1425#endif 1544#endif
1426 1545
1427static void asm_bitnot(ASMState *as, IRIns *ir) 1546static void asm_bnot(ASMState *as, IRIns *ir)
1428{ 1547{
1429 Reg dest, left, right; 1548 Reg dest, left, right;
1430 PPCIns pi = PPCI_NOR; 1549 PPCIns pi = PPCI_NOR;
@@ -1451,7 +1570,7 @@ nofuse:
1451 emit_asb(as, pi, dest, left, right); 1570 emit_asb(as, pi, dest, left, right);
1452} 1571}
1453 1572
1454static void asm_bitswap(ASMState *as, IRIns *ir) 1573static void asm_bswap(ASMState *as, IRIns *ir)
1455{ 1574{
1456 Reg dest = ra_dest(as, ir, RSET_GPR); 1575 Reg dest = ra_dest(as, ir, RSET_GPR);
1457 IRIns *irx; 1576 IRIns *irx;
@@ -1472,32 +1591,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1472 } 1591 }
1473} 1592}
1474 1593
1475static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1476{
1477 Reg dest = ra_dest(as, ir, RSET_GPR);
1478 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1479 if (irref_isk(ir->op2)) {
1480 int32_t k = IR(ir->op2)->i;
1481 Reg tmp = left;
1482 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1483 if (!checku16(k)) {
1484 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1485 if ((k & 0xffff) == 0) return;
1486 }
1487 emit_asi(as, pik, dest, left, k);
1488 return;
1489 }
1490 }
1491 /* May fail due to spills/restores above, but simplifies the logic. */
1492 if (as->flagmcp == as->mcp) {
1493 as->flagmcp = NULL;
1494 as->mcp++;
1495 pi |= PPCF_DOT;
1496 }
1497 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1498 emit_asb(as, pi, dest, left, right);
1499}
1500
1501/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ 1594/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
1502static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) 1595static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
1503{ 1596{
@@ -1528,7 +1621,7 @@ nofuse:
1528 *--as->mcp = pi | PPCF_T(left); 1621 *--as->mcp = pi | PPCF_T(left);
1529} 1622}
1530 1623
1531static void asm_bitand(ASMState *as, IRIns *ir) 1624static void asm_band(ASMState *as, IRIns *ir)
1532{ 1625{
1533 Reg dest, left, right; 1626 Reg dest, left, right;
1534 IRRef lref = ir->op1; 1627 IRRef lref = ir->op1;
@@ -1583,6 +1676,35 @@ static void asm_bitand(ASMState *as, IRIns *ir)
1583 emit_asb(as, PPCI_AND ^ dot, dest, left, right); 1676 emit_asb(as, PPCI_AND ^ dot, dest, left, right);
1584} 1677}
1585 1678
1679static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1680{
1681 Reg dest = ra_dest(as, ir, RSET_GPR);
1682 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1683 if (irref_isk(ir->op2)) {
1684 int32_t k = IR(ir->op2)->i;
1685 Reg tmp = left;
1686 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1687 if (!checku16(k)) {
1688 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1689 if ((k & 0xffff) == 0) return;
1690 }
1691 emit_asi(as, pik, dest, left, k);
1692 return;
1693 }
1694 }
1695 /* May fail due to spills/restores above, but simplifies the logic. */
1696 if (as->flagmcp == as->mcp) {
1697 as->flagmcp = NULL;
1698 as->mcp++;
1699 pi |= PPCF_DOT;
1700 }
1701 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1702 emit_asb(as, pi, dest, left, right);
1703}
1704
1705#define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI)
1706#define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI)
1707
1586static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) 1708static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1587{ 1709{
1588 Reg dest, left; 1710 Reg dest, left;
@@ -1608,9 +1730,48 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1608 } 1730 }
1609} 1731}
1610 1732
1733#define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0)
1734#define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1)
1735#define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI)
1736#define asm_brol(as, ir) \
1737 asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \
1738 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
1739#define asm_bror(as, ir) lj_assertA(0, "unexpected BROR")
1740
1741#if LJ_SOFTFP
1742static void asm_sfpmin_max(ASMState *as, IRIns *ir)
1743{
1744 CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp];
1745 IRRef args[4];
1746 MCLabel l_right, l_end;
1747 Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR);
1748 Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR);
1749 Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR);
1750 PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE;
1751 righthi = (lefthi >> 8); lefthi &= 255;
1752 rightlo = (leftlo >> 8); leftlo &= 255;
1753 args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
1754 args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
1755 l_end = emit_label(as);
1756 if (desthi != righthi) emit_mr(as, desthi, righthi);
1757 if (destlo != rightlo) emit_mr(as, destlo, rightlo);
1758 l_right = emit_label(as);
1759 if (l_end != l_right) emit_jmp(as, l_end);
1760 if (desthi != lefthi) emit_mr(as, desthi, lefthi);
1761 if (destlo != leftlo) emit_mr(as, destlo, leftlo);
1762 if (l_right == as->mcp+1) {
1763 cond ^= 4; l_right = l_end; ++as->mcp;
1764 }
1765 emit_condbranch(as, PPCI_BC, cond, l_right);
1766 ra_evictset(as, RSET_SCRATCH);
1767 emit_cmpi(as, RID_RET, 1);
1768 asm_gencall(as, &ci, args);
1769}
1770#endif
1771
1611static void asm_min_max(ASMState *as, IRIns *ir, int ismax) 1772static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1612{ 1773{
1613 if (irt_isnum(ir->t)) { 1774 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1614 Reg dest = ra_dest(as, ir, RSET_FPR); 1775 Reg dest = ra_dest(as, ir, RSET_FPR);
1615 Reg tmp = dest; 1776 Reg tmp = dest;
1616 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 1777 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
@@ -1618,9 +1779,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1618 if (tmp == left || tmp == right) 1779 if (tmp == left || tmp == right)
1619 tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR, 1780 tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR,
1620 dest), left), right)); 1781 dest), left), right));
1621 emit_facb(as, PPCI_FSEL, dest, tmp, 1782 emit_facb(as, PPCI_FSEL, dest, tmp, left, right);
1622 ismax ? left : right, ismax ? right : left); 1783 emit_fab(as, PPCI_FSUB, tmp, ismax ? left : right, ismax ? right : left);
1623 emit_fab(as, PPCI_FSUB, tmp, left, right);
1624 } else { 1784 } else {
1625 Reg dest = ra_dest(as, ir, RSET_GPR); 1785 Reg dest = ra_dest(as, ir, RSET_GPR);
1626 Reg tmp1 = RID_TMP, tmp2 = dest; 1786 Reg tmp1 = RID_TMP, tmp2 = dest;
@@ -1638,6 +1798,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1638 } 1798 }
1639} 1799}
1640 1800
1801#define asm_min(as, ir) asm_min_max(as, ir, 0)
1802#define asm_max(as, ir) asm_min_max(as, ir, 1)
1803
1641/* -- Comparisons --------------------------------------------------------- */ 1804/* -- Comparisons --------------------------------------------------------- */
1642 1805
1643#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ 1806#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */
@@ -1695,7 +1858,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc)
1695static void asm_comp(ASMState *as, IRIns *ir) 1858static void asm_comp(ASMState *as, IRIns *ir)
1696{ 1859{
1697 PPCCC cc = asm_compmap[ir->o]; 1860 PPCCC cc = asm_compmap[ir->o];
1698 if (irt_isnum(ir->t)) { 1861 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1699 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 1862 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1700 right = (left >> 8); left &= 255; 1863 right = (left >> 8); left &= 255;
1701 asm_guardcc(as, (cc >> 4)); 1864 asm_guardcc(as, (cc >> 4));
@@ -1714,6 +1877,46 @@ static void asm_comp(ASMState *as, IRIns *ir)
1714 } 1877 }
1715} 1878}
1716 1879
1880#define asm_equal(as, ir) asm_comp(as, ir)
1881
1882#if LJ_SOFTFP
1883/* SFP comparisons. */
1884static void asm_sfpcomp(ASMState *as, IRIns *ir)
1885{
1886 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
1887 RegSet drop = RSET_SCRATCH;
1888 Reg r;
1889 IRRef args[4];
1890 args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
1891 args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
1892
1893 for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) {
1894 if (!rset_test(as->freeset, r) &&
1895 regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
1896 rset_clear(drop, r);
1897 }
1898 ra_evictset(as, drop);
1899 asm_setupresult(as, ir, ci);
1900 switch ((IROp)ir->o) {
1901 case IR_ULT:
1902 asm_guardcc(as, CC_EQ);
1903 emit_ai(as, PPCI_CMPWI, RID_RET, 0);
1904 case IR_ULE:
1905 asm_guardcc(as, CC_EQ);
1906 emit_ai(as, PPCI_CMPWI, RID_RET, 1);
1907 break;
1908 case IR_GE: case IR_GT:
1909 asm_guardcc(as, CC_EQ);
1910 emit_ai(as, PPCI_CMPWI, RID_RET, 2);
1911 default:
1912 asm_guardcc(as, (asm_compmap[ir->o] & 0xf));
1913 emit_ai(as, PPCI_CMPWI, RID_RET, 0);
1914 break;
1915 }
1916 asm_gencall(as, ci, args);
1917}
1918#endif
1919
1717#if LJ_HASFFI 1920#if LJ_HASFFI
1718/* 64 bit integer comparisons. */ 1921/* 64 bit integer comparisons. */
1719static void asm_comp64(ASMState *as, IRIns *ir) 1922static void asm_comp64(ASMState *as, IRIns *ir)
@@ -1738,50 +1941,87 @@ static void asm_comp64(ASMState *as, IRIns *ir)
1738} 1941}
1739#endif 1942#endif
1740 1943
1741/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ 1944/* -- Split register ops -------------------------------------------------- */
1742 1945
1743/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ 1946/* Hiword op of a split 32/32 bit op. Previous op is be the loword op. */
1744static void asm_hiop(ASMState *as, IRIns *ir) 1947static void asm_hiop(ASMState *as, IRIns *ir)
1745{ 1948{
1746#if LJ_HASFFI
1747 /* HIOP is marked as a store because it needs its own DCE logic. */ 1949 /* HIOP is marked as a store because it needs its own DCE logic. */
1748 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 1950 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
1749 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 1951 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
1952#if LJ_HASFFI || LJ_SOFTFP
1750 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ 1953 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
1751 as->curins--; /* Always skip the CONV. */ 1954 as->curins--; /* Always skip the CONV. */
1955#if LJ_HASFFI && !LJ_SOFTFP
1752 if (usehi || uselo) 1956 if (usehi || uselo)
1753 asm_conv64(as, ir); 1957 asm_conv64(as, ir);
1754 return; 1958 return;
1959#endif
1755 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ 1960 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
1756 as->curins--; /* Always skip the loword comparison. */ 1961 as->curins--; /* Always skip the loword comparison. */
1962#if LJ_SOFTFP
1963 if (!irt_isint(ir->t)) {
1964 asm_sfpcomp(as, ir-1);
1965 return;
1966 }
1967#endif
1968#if LJ_HASFFI
1757 asm_comp64(as, ir); 1969 asm_comp64(as, ir);
1970#endif
1758 return; 1971 return;
1972#if LJ_SOFTFP
1973 } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
1974 as->curins--; /* Always skip the loword min/max. */
1975 if (uselo || usehi)
1976 asm_sfpmin_max(as, ir-1);
1977 return;
1978#endif
1759 } else if ((ir-1)->o == IR_XSTORE) { 1979 } else if ((ir-1)->o == IR_XSTORE) {
1760 as->curins--; /* Handle both stores here. */ 1980 as->curins--; /* Handle both stores here. */
1761 if ((ir-1)->r != RID_SINK) { 1981 if ((ir-1)->r != RID_SINK) {
1762 asm_xstore(as, ir, 0); 1982 asm_xstore_(as, ir, 0);
1763 asm_xstore(as, ir-1, 4); 1983 asm_xstore_(as, ir-1, 4);
1764 } 1984 }
1765 return; 1985 return;
1766 } 1986 }
1987#endif
1767 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1988 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
1768 switch ((ir-1)->o) { 1989 switch ((ir-1)->o) {
1990#if LJ_HASFFI
1769 case IR_ADD: as->curins--; asm_add64(as, ir); break; 1991 case IR_ADD: as->curins--; asm_add64(as, ir); break;
1770 case IR_SUB: as->curins--; asm_sub64(as, ir); break; 1992 case IR_SUB: as->curins--; asm_sub64(as, ir); break;
1771 case IR_NEG: as->curins--; asm_neg64(as, ir); break; 1993 case IR_NEG: as->curins--; asm_neg64(as, ir); break;
1772 case IR_CALLN: 1994 case IR_CNEWI:
1773 case IR_CALLXS: 1995 /* Nothing to do here. Handled by lo op itself. */
1996 break;
1997#endif
1998#if LJ_SOFTFP
1999 case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2000 case IR_STRTO:
1774 if (!uselo) 2001 if (!uselo)
1775 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ 2002 ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
1776 break; 2003 break;
1777 case IR_CNEWI: 2004 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
1778 /* Nothing to do here. Handled by lo op itself. */ 2005 /* Nothing to do here. Handled by lo op itself. */
1779 break; 2006 break;
1780 default: lua_assert(0); break;
1781 }
1782#else
1783 UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */
1784#endif 2007#endif
2008 case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
2009 if (!uselo)
2010 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
2011 break;
2012 default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
2013 }
2014}
2015
2016/* -- Profiling ----------------------------------------------------------- */
2017
2018static void asm_prof(ASMState *as, IRIns *ir)
2019{
2020 UNUSED(ir);
2021 asm_guardcc(as, CC_NE);
2022 emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE);
2023 emit_lsglptr(as, PPCI_LBZ, RID_TMP,
2024 (int32_t)offsetof(global_State, hookmask));
1785} 2025}
1786 2026
1787/* -- Stack handling ------------------------------------------------------ */ 2027/* -- Stack handling ------------------------------------------------------ */
@@ -1805,7 +2045,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1805 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); 2045 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
1806 if (pbase == RID_TMP) 2046 if (pbase == RID_TMP)
1807 emit_getgl(as, RID_TMP, jit_base); 2047 emit_getgl(as, RID_TMP, jit_base);
1808 emit_getgl(as, tmp, jit_L); 2048 emit_getgl(as, tmp, cur_L);
1809 if (allow == RSET_EMPTY) /* Spill temp. register. */ 2049 if (allow == RSET_EMPTY) /* Spill temp. register. */
1810 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); 2050 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
1811} 2051}
@@ -1826,12 +2066,25 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1826 if ((sn & SNAP_NORESTORE)) 2066 if ((sn & SNAP_NORESTORE))
1827 continue; 2067 continue;
1828 if (irt_isnum(ir->t)) { 2068 if (irt_isnum(ir->t)) {
2069#if LJ_SOFTFP
2070 Reg tmp;
2071 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
2072 /* LJ_SOFTFP: must be a number constant. */
2073 lj_assertA(irref_isk(ref), "unsplit FP op");
2074 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
2075 emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
2076 if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
2077 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
2078 emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
2079#else
1829 Reg src = ra_alloc1(as, ref, RSET_FPR); 2080 Reg src = ra_alloc1(as, ref, RSET_FPR);
1830 emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); 2081 emit_fai(as, PPCI_STFD, src, RID_BASE, ofs);
2082#endif
1831 } else { 2083 } else {
1832 Reg type; 2084 Reg type;
1833 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); 2085 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
1834 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); 2086 lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
2087 "restore of IR type %d", irt_type(ir->t));
1835 if (!irt_ispri(ir->t)) { 2088 if (!irt_ispri(ir->t)) {
1836 Reg src = ra_alloc1(as, ref, allow); 2089 Reg src = ra_alloc1(as, ref, allow);
1837 rset_clear(allow, src); 2090 rset_clear(allow, src);
@@ -1840,6 +2093,12 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1840 if ((sn & (SNAP_CONT|SNAP_FRAME))) { 2093 if ((sn & (SNAP_CONT|SNAP_FRAME))) {
1841 if (s == 0) continue; /* Do not overwrite link to previous frame. */ 2094 if (s == 0) continue; /* Do not overwrite link to previous frame. */
1842 type = ra_allock(as, (int32_t)(*flinks--), allow); 2095 type = ra_allock(as, (int32_t)(*flinks--), allow);
2096#if LJ_SOFTFP
2097 } else if ((sn & SNAP_SOFTFPNUM)) {
2098 type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
2099#endif
2100 } else if ((sn & SNAP_KEYINDEX)) {
2101 type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow);
1843 } else { 2102 } else {
1844 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 2103 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
1845 } 2104 }
@@ -1847,7 +2106,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1847 } 2106 }
1848 checkmclim(as); 2107 checkmclim(as);
1849 } 2108 }
1850 lua_assert(map + nent == flinks); 2109 lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
1851} 2110}
1852 2111
1853/* -- GC handling --------------------------------------------------------- */ 2112/* -- GC handling --------------------------------------------------------- */
@@ -1898,6 +2157,12 @@ static void asm_loop_fixup(ASMState *as)
1898 } 2157 }
1899} 2158}
1900 2159
2160/* Fixup the tail of the loop. */
2161static void asm_loop_tail_fixup(ASMState *as)
2162{
2163 UNUSED(as); /* Nothing to do. */
2164}
2165
1901/* -- Head of trace ------------------------------------------------------- */ 2166/* -- Head of trace ------------------------------------------------------- */
1902 2167
1903/* Coalesce BASE register for a root trace. */ 2168/* Coalesce BASE register for a root trace. */
@@ -1949,7 +2214,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
1949 as->mctop = p; 2214 as->mctop = p;
1950 } else { 2215 } else {
1951 /* Patch stack adjustment. */ 2216 /* Patch stack adjustment. */
1952 lua_assert(checki16(CFRAME_SIZE+spadj)); 2217 lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range");
1953 p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); 2218 p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj);
1954 p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; 2219 p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj;
1955 } 2220 }
@@ -1970,147 +2235,25 @@ static void asm_tail_prep(ASMState *as)
1970 } 2235 }
1971} 2236}
1972 2237
1973/* -- Instruction dispatch ------------------------------------------------ */
1974
1975/* Assemble a single instruction. */
1976static void asm_ir(ASMState *as, IRIns *ir)
1977{
1978 switch ((IROp)ir->o) {
1979 /* Miscellaneous ops. */
1980 case IR_LOOP: asm_loop(as); break;
1981 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1982 case IR_USE:
1983 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1984 case IR_PHI: asm_phi(as, ir); break;
1985 case IR_HIOP: asm_hiop(as, ir); break;
1986 case IR_GCSTEP: asm_gcstep(as, ir); break;
1987
1988 /* Guarded assertions. */
1989 case IR_EQ: case IR_NE:
1990 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1991 as->curins--;
1992 asm_href(as, ir-1, (IROp)ir->o);
1993 break;
1994 }
1995 /* fallthrough */
1996 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1997 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1998 case IR_ABC:
1999 asm_comp(as, ir);
2000 break;
2001
2002 case IR_RETF: asm_retf(as, ir); break;
2003
2004 /* Bit ops. */
2005 case IR_BNOT: asm_bitnot(as, ir); break;
2006 case IR_BSWAP: asm_bitswap(as, ir); break;
2007
2008 case IR_BAND: asm_bitand(as, ir); break;
2009 case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break;
2010 case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break;
2011
2012 case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break;
2013 case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break;
2014 case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break;
2015 case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31),
2016 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break;
2017 case IR_BROR: lua_assert(0); break;
2018
2019 /* Arithmetic ops. */
2020 case IR_ADD: asm_add(as, ir); break;
2021 case IR_SUB: asm_sub(as, ir); break;
2022 case IR_MUL: asm_mul(as, ir); break;
2023 case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break;
2024 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2025 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2026 case IR_NEG: asm_neg(as, ir); break;
2027
2028 case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break;
2029 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2030 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2031 case IR_MIN: asm_min_max(as, ir, 0); break;
2032 case IR_MAX: asm_min_max(as, ir, 1); break;
2033 case IR_FPMATH:
2034 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2035 break;
2036 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
2037 asm_fpunary(as, ir, PPCI_FSQRT);
2038 else
2039 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2040 break;
2041
2042 /* Overflow-checking arithmetic ops. */
2043 case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break;
2044 case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break;
2045 case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break;
2046
2047 /* Memory references. */
2048 case IR_AREF: asm_aref(as, ir); break;
2049 case IR_HREF: asm_href(as, ir, 0); break;
2050 case IR_HREFK: asm_hrefk(as, ir); break;
2051 case IR_NEWREF: asm_newref(as, ir); break;
2052 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2053 case IR_FREF: asm_fref(as, ir); break;
2054 case IR_STRREF: asm_strref(as, ir); break;
2055
2056 /* Loads and stores. */
2057 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2058 asm_ahuvload(as, ir);
2059 break;
2060 case IR_FLOAD: asm_fload(as, ir); break;
2061 case IR_XLOAD: asm_xload(as, ir); break;
2062 case IR_SLOAD: asm_sload(as, ir); break;
2063
2064 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2065 case IR_FSTORE: asm_fstore(as, ir); break;
2066 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2067
2068 /* Allocations. */
2069 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2070 case IR_TNEW: asm_tnew(as, ir); break;
2071 case IR_TDUP: asm_tdup(as, ir); break;
2072 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2073
2074 /* Write barriers. */
2075 case IR_TBAR: asm_tbar(as, ir); break;
2076 case IR_OBAR: asm_obar(as, ir); break;
2077
2078 /* Type conversions. */
2079 case IR_CONV: asm_conv(as, ir); break;
2080 case IR_TOBIT: asm_tobit(as, ir); break;
2081 case IR_TOSTR: asm_tostr(as, ir); break;
2082 case IR_STRTO: asm_strto(as, ir); break;
2083
2084 /* Calls. */
2085 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2086 case IR_CALLXS: asm_callx(as, ir); break;
2087 case IR_CARG: break;
2088
2089 default:
2090 setintV(&as->J->errinfo, ir->o);
2091 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2092 break;
2093 }
2094}
2095
2096/* -- Trace setup --------------------------------------------------------- */ 2238/* -- Trace setup --------------------------------------------------------- */
2097 2239
2098/* Ensure there are enough stack slots for call arguments. */ 2240/* Ensure there are enough stack slots for call arguments. */
2099static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2241static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2100{ 2242{
2101 IRRef args[CCI_NARGS_MAX*2]; 2243 IRRef args[CCI_NARGS_MAX*2];
2102 uint32_t i, nargs = (int)CCI_NARGS(ci); 2244 uint32_t i, nargs = CCI_XNARGS(ci);
2103 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; 2245 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
2104 asm_collectargs(as, ir, ci, args); 2246 asm_collectargs(as, ir, ci, args);
2105 for (i = 0; i < nargs; i++) 2247 for (i = 0; i < nargs; i++)
2106 if (args[i] && irt_isfp(IR(args[i])->t)) { 2248 if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) {
2107 if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; 2249 if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1;
2108 } else { 2250 } else {
2109 if (ngpr > 0) ngpr--; else nslots++; 2251 if (ngpr > 0) ngpr--; else nslots++;
2110 } 2252 }
2111 if (nslots > as->evenspill) /* Leave room for args in stack slots. */ 2253 if (nslots > as->evenspill) /* Leave room for args in stack slots. */
2112 as->evenspill = nslots; 2254 as->evenspill = nslots;
2113 return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); 2255 return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) :
2256 REGSP_HINT(RID_RET);
2114} 2257}
2115 2258
2116static void asm_setup_target(ASMState *as) 2259static void asm_setup_target(ASMState *as)
@@ -2150,7 +2293,8 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2150 } else if ((ins & 0xfc000000u) == PPCI_B && 2293 } else if ((ins & 0xfc000000u) == PPCI_B &&
2151 ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) { 2294 ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) {
2152 ptrdiff_t delta = (char *)target - (char *)p; 2295 ptrdiff_t delta = (char *)target - (char *)p;
2153 lua_assert(((delta + 0x02000000) >> 26) == 0); 2296 lj_assertJ(((delta + 0x02000000) >> 26) == 0,
2297 "branch target out of range");
2154 *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu); 2298 *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu);
2155 if (!cstart) cstart = p; 2299 if (!cstart) cstart = p;
2156 } 2300 }
@@ -2158,7 +2302,8 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2158 /* Always patch long-range branch in exit stub itself. Except, if we can't. */ 2302 /* Always patch long-range branch in exit stub itself. Except, if we can't. */
2159 if (patchlong) { 2303 if (patchlong) {
2160 ptrdiff_t delta = (char *)target - (char *)px - clearso; 2304 ptrdiff_t delta = (char *)target - (char *)px - clearso;
2161 lua_assert(((delta + 0x02000000) >> 26) == 0); 2305 lj_assertJ(((delta + 0x02000000) >> 26) == 0,
2306 "branch target out of range");
2162 *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu); 2307 *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu);
2163 } 2308 }
2164 if (!cstart) cstart = px; 2309 if (!cstart) cstart = px;