aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm_ppc.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_asm_ppc.h')
-rw-r--r--src/lj_asm_ppc.h729
1 files changed, 406 insertions, 323 deletions
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index dc092db2..8fa8c8ef 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -226,6 +226,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
226 emit_tab(as, pi, rt, left, right); 226 emit_tab(as, pi, rt, left, right);
227} 227}
228 228
229#if !LJ_SOFTFP
229/* Fuse to multiply-add/sub instruction. */ 230/* Fuse to multiply-add/sub instruction. */
230static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) 231static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
231{ 232{
@@ -245,21 +246,26 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
245 } 246 }
246 return 0; 247 return 0;
247} 248}
249#endif
248 250
249/* -- Calls --------------------------------------------------------------- */ 251/* -- Calls --------------------------------------------------------------- */
250 252
251/* Generate a call to a C function. */ 253/* Generate a call to a C function. */
252static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 254static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
253{ 255{
254 uint32_t n, nargs = CCI_NARGS(ci); 256 uint32_t n, nargs = CCI_XNARGS(ci);
255 int32_t ofs = 8; 257 int32_t ofs = 8;
256 Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; 258 Reg gpr = REGARG_FIRSTGPR;
259#if !LJ_SOFTFP
260 Reg fpr = REGARG_FIRSTFPR;
261#endif
257 if ((void *)ci->func) 262 if ((void *)ci->func)
258 emit_call(as, (void *)ci->func); 263 emit_call(as, (void *)ci->func);
259 for (n = 0; n < nargs; n++) { /* Setup args. */ 264 for (n = 0; n < nargs; n++) { /* Setup args. */
260 IRRef ref = args[n]; 265 IRRef ref = args[n];
261 if (ref) { 266 if (ref) {
262 IRIns *ir = IR(ref); 267 IRIns *ir = IR(ref);
268#if !LJ_SOFTFP
263 if (irt_isfp(ir->t)) { 269 if (irt_isfp(ir->t)) {
264 if (fpr <= REGARG_LASTFPR) { 270 if (fpr <= REGARG_LASTFPR) {
265 lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ 271 lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */
@@ -271,7 +277,9 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
271 emit_spstore(as, ir, r, ofs); 277 emit_spstore(as, ir, r, ofs);
272 ofs += irt_isnum(ir->t) ? 8 : 4; 278 ofs += irt_isnum(ir->t) ? 8 : 4;
273 } 279 }
274 } else { 280 } else
281#endif
282 {
275 if (gpr <= REGARG_LASTGPR) { 283 if (gpr <= REGARG_LASTGPR) {
276 lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ 284 lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */
277 ra_leftov(as, gpr, ref); 285 ra_leftov(as, gpr, ref);
@@ -290,8 +298,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
290 } 298 }
291 checkmclim(as); 299 checkmclim(as);
292 } 300 }
301#if !LJ_SOFTFP
293 if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ 302 if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */
294 emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); 303 emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
304#endif
295} 305}
296 306
297/* Setup result reg/sp for call. Evict scratch regs. */ 307/* Setup result reg/sp for call. Evict scratch regs. */
@@ -299,8 +309,10 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
299{ 309{
300 RegSet drop = RSET_SCRATCH; 310 RegSet drop = RSET_SCRATCH;
301 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); 311 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
312#if !LJ_SOFTFP
302 if ((ci->flags & CCI_NOFPRCLOBBER)) 313 if ((ci->flags & CCI_NOFPRCLOBBER))
303 drop &= ~RSET_FPR; 314 drop &= ~RSET_FPR;
315#endif
304 if (ra_hasreg(ir->r)) 316 if (ra_hasreg(ir->r))
305 rset_clear(drop, ir->r); /* Dest reg handled below. */ 317 rset_clear(drop, ir->r); /* Dest reg handled below. */
306 if (hiop && ra_hasreg((ir+1)->r)) 318 if (hiop && ra_hasreg((ir+1)->r))
@@ -308,7 +320,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
308 ra_evictset(as, drop); /* Evictions must be performed first. */ 320 ra_evictset(as, drop); /* Evictions must be performed first. */
309 if (ra_used(ir)) { 321 if (ra_used(ir)) {
310 lua_assert(!irt_ispri(ir->t)); 322 lua_assert(!irt_ispri(ir->t));
311 if (irt_isfp(ir->t)) { 323 if (!LJ_SOFTFP && irt_isfp(ir->t)) {
312 if ((ci->flags & CCI_CASTU64)) { 324 if ((ci->flags & CCI_CASTU64)) {
313 /* Use spill slot or temp slots. */ 325 /* Use spill slot or temp slots. */
314 int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; 326 int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
@@ -323,23 +335,16 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
323 } else { 335 } else {
324 ra_destreg(as, ir, RID_FPRET); 336 ra_destreg(as, ir, RID_FPRET);
325 } 337 }
338#if LJ_32
326 } else if (hiop) { 339 } else if (hiop) {
327 ra_destpair(as, ir); 340 ra_destpair(as, ir);
341#endif
328 } else { 342 } else {
329 ra_destreg(as, ir, RID_RET); 343 ra_destreg(as, ir, RID_RET);
330 } 344 }
331 } 345 }
332} 346}
333 347
334static void asm_call(ASMState *as, IRIns *ir)
335{
336 IRRef args[CCI_NARGS_MAX];
337 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
338 asm_collectargs(as, ir, ci, args);
339 asm_setupresult(as, ir, ci);
340 asm_gencall(as, ci, args);
341}
342
343static void asm_callx(ASMState *as, IRIns *ir) 348static void asm_callx(ASMState *as, IRIns *ir)
344{ 349{
345 IRRef args[CCI_NARGS_MAX*2]; 350 IRRef args[CCI_NARGS_MAX*2];
@@ -352,7 +357,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
352 func = ir->op2; irf = IR(func); 357 func = ir->op2; irf = IR(func);
353 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } 358 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
354 if (irref_isk(func)) { /* Call to constant address. */ 359 if (irref_isk(func)) { /* Call to constant address. */
355 ci.func = (ASMFunction)(void *)(irf->i); 360 ci.func = (ASMFunction)(void *)(intptr_t)(irf->i);
356 } else { /* Need a non-argument register for indirect calls. */ 361 } else { /* Need a non-argument register for indirect calls. */
357 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); 362 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
358 Reg freg = ra_alloc1(as, func, allow); 363 Reg freg = ra_alloc1(as, func, allow);
@@ -363,16 +368,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
363 asm_gencall(as, &ci, args); 368 asm_gencall(as, &ci, args);
364} 369}
365 370
366static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
367{
368 const CCallInfo *ci = &lj_ir_callinfo[id];
369 IRRef args[2];
370 args[0] = ir->op1;
371 args[1] = ir->op2;
372 asm_setupresult(as, ir, ci);
373 asm_gencall(as, ci, args);
374}
375
376/* -- Returns ------------------------------------------------------------- */ 371/* -- Returns ------------------------------------------------------------- */
377 372
378/* Return to lower frame. Guard that it goes to the right spot. */ 373/* Return to lower frame. Guard that it goes to the right spot. */
@@ -380,7 +375,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
380{ 375{
381 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 376 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
382 void *pc = ir_kptr(IR(ir->op2)); 377 void *pc = ir_kptr(IR(ir->op2));
383 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 378 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
384 as->topslot -= (BCReg)delta; 379 as->topslot -= (BCReg)delta;
385 if ((int32_t)as->topslot < 0) as->topslot = 0; 380 if ((int32_t)as->topslot < 0) as->topslot = 0;
386 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 381 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -394,6 +389,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
394 389
395/* -- Type conversions ---------------------------------------------------- */ 390/* -- Type conversions ---------------------------------------------------- */
396 391
392#if !LJ_SOFTFP
397static void asm_tointg(ASMState *as, IRIns *ir, Reg left) 393static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
398{ 394{
399 RegSet allow = RSET_FPR; 395 RegSet allow = RSET_FPR;
@@ -410,8 +406,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
410 emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); 406 emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000);
411 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 407 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
412 emit_lsptr(as, PPCI_LFS, (fbias & 31), 408 emit_lsptr(as, PPCI_LFS, (fbias & 31),
413 (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), 409 (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR);
414 RSET_GPR);
415 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 410 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
416 emit_fb(as, PPCI_FCTIWZ, tmp, left); 411 emit_fb(as, PPCI_FCTIWZ, tmp, left);
417} 412}
@@ -427,15 +422,23 @@ static void asm_tobit(ASMState *as, IRIns *ir)
427 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 422 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
428 emit_fab(as, PPCI_FADD, tmp, left, right); 423 emit_fab(as, PPCI_FADD, tmp, left, right);
429} 424}
425#endif
430 426
431static void asm_conv(ASMState *as, IRIns *ir) 427static void asm_conv(ASMState *as, IRIns *ir)
432{ 428{
433 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 429 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
430#if !LJ_SOFTFP
434 int stfp = (st == IRT_NUM || st == IRT_FLOAT); 431 int stfp = (st == IRT_NUM || st == IRT_FLOAT);
432#endif
435 IRRef lref = ir->op1; 433 IRRef lref = ir->op1;
436 lua_assert(irt_type(ir->t) != st);
437 lua_assert(!(irt_isint64(ir->t) || 434 lua_assert(!(irt_isint64(ir->t) ||
438 (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ 435 (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
436#if LJ_SOFTFP
437 /* FP conversions are handled by SPLIT. */
438 lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
439 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
440#else
441 lua_assert(irt_type(ir->t) != st);
439 if (irt_isfp(ir->t)) { 442 if (irt_isfp(ir->t)) {
440 Reg dest = ra_dest(as, ir, RSET_FPR); 443 Reg dest = ra_dest(as, ir, RSET_FPR);
441 if (stfp) { /* FP to FP conversion. */ 444 if (stfp) { /* FP to FP conversion. */
@@ -450,13 +453,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
450 Reg left = ra_alloc1(as, lref, allow); 453 Reg left = ra_alloc1(as, lref, allow);
451 Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); 454 Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left));
452 Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); 455 Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
453 const float *kbias;
454 if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); 456 if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest);
455 emit_fab(as, PPCI_FSUB, dest, dest, fbias); 457 emit_fab(as, PPCI_FSUB, dest, dest, fbias);
456 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); 458 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
457 kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000)); 459 emit_lsptr(as, PPCI_LFS, (fbias & 31),
458 if (st == IRT_U32) kbias++; 460 &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31],
459 emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias,
460 rset_clear(allow, hibias)); 461 rset_clear(allow, hibias));
461 emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, 462 emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP,
462 RID_SP, SPOFS_TMPLO); 463 RID_SP, SPOFS_TMPLO);
@@ -489,15 +490,16 @@ static void asm_conv(ASMState *as, IRIns *ir)
489 emit_fb(as, PPCI_FCTIWZ, tmp, tmp); 490 emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
490 emit_fab(as, PPCI_FSUB, tmp, left, tmp); 491 emit_fab(as, PPCI_FSUB, tmp, left, tmp);
491 emit_lsptr(as, PPCI_LFS, (tmp & 31), 492 emit_lsptr(as, PPCI_LFS, (tmp & 31),
492 (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)), 493 (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
493 RSET_GPR);
494 } else { 494 } else {
495 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 495 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
496 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 496 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
497 emit_fb(as, PPCI_FCTIWZ, tmp, left); 497 emit_fb(as, PPCI_FCTIWZ, tmp, left);
498 } 498 }
499 } 499 }
500 } else { 500 } else
501#endif
502 {
501 Reg dest = ra_dest(as, ir, RSET_GPR); 503 Reg dest = ra_dest(as, ir, RSET_GPR);
502 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 504 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
503 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 505 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -513,46 +515,50 @@ static void asm_conv(ASMState *as, IRIns *ir)
513 } 515 }
514} 516}
515 517
516#if LJ_HASFFI
517static void asm_conv64(ASMState *as, IRIns *ir)
518{
519 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
520 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
521 IRCallID id;
522 const CCallInfo *ci;
523 IRRef args[2];
524 args[0] = ir->op1;
525 args[1] = (ir-1)->op1;
526 if (st == IRT_NUM || st == IRT_FLOAT) {
527 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
528 ir--;
529 } else {
530 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
531 }
532 ci = &lj_ir_callinfo[id];
533 asm_setupresult(as, ir, ci);
534 asm_gencall(as, ci, args);
535}
536#endif
537
538static void asm_strto(ASMState *as, IRIns *ir) 518static void asm_strto(ASMState *as, IRIns *ir)
539{ 519{
540 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 520 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
541 IRRef args[2]; 521 IRRef args[2];
542 int32_t ofs; 522 int32_t ofs = SPOFS_TMP;
523#if LJ_SOFTFP
524 ra_evictset(as, RSET_SCRATCH);
525 if (ra_used(ir)) {
526 if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
527 (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) {
528 int i;
529 for (i = 0; i < 2; i++) {
530 Reg r = (ir+i)->r;
531 if (ra_hasreg(r)) {
532 ra_free(as, r);
533 ra_modified(as, r);
534 emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
535 }
536 }
537 ofs = sps_scale(ir->s & ~1);
538 } else {
539 Reg rhi = ra_dest(as, ir+1, RSET_GPR);
540 Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
541 emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs);
542 emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4);
543 }
544 }
545#else
543 RegSet drop = RSET_SCRATCH; 546 RegSet drop = RSET_SCRATCH;
544 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ 547 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
545 ra_evictset(as, drop); 548 ra_evictset(as, drop);
549 if (ir->s) ofs = sps_scale(ir->s);
550#endif
546 asm_guardcc(as, CC_EQ); 551 asm_guardcc(as, CC_EQ);
547 emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ 552 emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */
548 args[0] = ir->op1; /* GCstr *str */ 553 args[0] = ir->op1; /* GCstr *str */
549 args[1] = ASMREF_TMP1; /* TValue *n */ 554 args[1] = ASMREF_TMP1; /* TValue *n */
550 asm_gencall(as, ci, args); 555 asm_gencall(as, ci, args);
551 /* Store the result to the spill slot or temp slots. */ 556 /* Store the result to the spill slot or temp slots. */
552 ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
553 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); 557 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
554} 558}
555 559
560/* -- Memory references --------------------------------------------------- */
561
556/* Get pointer to TValue. */ 562/* Get pointer to TValue. */
557static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 563static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
558{ 564{
@@ -566,37 +572,19 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
566 /* Otherwise use g->tmptv to hold the TValue. */ 572 /* Otherwise use g->tmptv to hold the TValue. */
567 RegSet allow = rset_exclude(RSET_GPR, dest); 573 RegSet allow = rset_exclude(RSET_GPR, dest);
568 Reg type; 574 Reg type;
569 emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768); 575 emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768);
570 if (!irt_ispri(ir->t)) { 576 if (!irt_ispri(ir->t)) {
571 Reg src = ra_alloc1(as, ref, allow); 577 Reg src = ra_alloc1(as, ref, allow);
572 emit_setgl(as, src, tmptv.gcr); 578 emit_setgl(as, src, tmptv.gcr);
573 } 579 }
574 type = ra_allock(as, irt_toitype(ir->t), allow); 580 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
581 type = ra_alloc1(as, ref+1, allow);
582 else
583 type = ra_allock(as, irt_toitype(ir->t), allow);
575 emit_setgl(as, type, tmptv.it); 584 emit_setgl(as, type, tmptv.it);
576 } 585 }
577} 586}
578 587
579static void asm_tostr(ASMState *as, IRIns *ir)
580{
581 IRRef args[2];
582 args[0] = ASMREF_L;
583 as->gcsteps++;
584 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
585 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
586 args[1] = ASMREF_TMP1; /* const lua_Number * */
587 asm_setupresult(as, ir, ci); /* GCstr * */
588 asm_gencall(as, ci, args);
589 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
590 } else {
591 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
592 args[1] = ir->op1; /* int32_t k */
593 asm_setupresult(as, ir, ci); /* GCstr * */
594 asm_gencall(as, ci, args);
595 }
596}
597
598/* -- Memory references --------------------------------------------------- */
599
600static void asm_aref(ASMState *as, IRIns *ir) 588static void asm_aref(ASMState *as, IRIns *ir)
601{ 589{
602 Reg dest = ra_dest(as, ir, RSET_GPR); 590 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -636,11 +624,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
636 Reg tisnum = RID_NONE, tmpnum = RID_NONE; 624 Reg tisnum = RID_NONE, tmpnum = RID_NONE;
637 IRRef refkey = ir->op2; 625 IRRef refkey = ir->op2;
638 IRIns *irkey = IR(refkey); 626 IRIns *irkey = IR(refkey);
627 int isk = irref_isk(refkey);
639 IRType1 kt = irkey->t; 628 IRType1 kt = irkey->t;
640 uint32_t khash; 629 uint32_t khash;
641 MCLabel l_end, l_loop, l_next; 630 MCLabel l_end, l_loop, l_next;
642 631
643 rset_clear(allow, tab); 632 rset_clear(allow, tab);
633#if LJ_SOFTFP
634 if (!isk) {
635 key = ra_alloc1(as, refkey, allow);
636 rset_clear(allow, key);
637 if (irkey[1].o == IR_HIOP) {
638 if (ra_hasreg((irkey+1)->r)) {
639 tmpnum = (irkey+1)->r;
640 ra_noweak(as, tmpnum);
641 } else {
642 tmpnum = ra_allocref(as, refkey+1, allow);
643 }
644 rset_clear(allow, tmpnum);
645 }
646 }
647#else
644 if (irt_isnum(kt)) { 648 if (irt_isnum(kt)) {
645 key = ra_alloc1(as, refkey, RSET_FPR); 649 key = ra_alloc1(as, refkey, RSET_FPR);
646 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); 650 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
@@ -650,6 +654,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
650 key = ra_alloc1(as, refkey, allow); 654 key = ra_alloc1(as, refkey, allow);
651 rset_clear(allow, key); 655 rset_clear(allow, key);
652 } 656 }
657#endif
653 tmp2 = ra_scratch(as, allow); 658 tmp2 = ra_scratch(as, allow);
654 rset_clear(allow, tmp2); 659 rset_clear(allow, tmp2);
655 660
@@ -672,7 +677,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
672 asm_guardcc(as, CC_EQ); 677 asm_guardcc(as, CC_EQ);
673 else 678 else
674 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); 679 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
675 if (irt_isnum(kt)) { 680 if (!LJ_SOFTFP && irt_isnum(kt)) {
676 emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); 681 emit_fab(as, PPCI_FCMPU, 0, tmpnum, key);
677 emit_condbranch(as, PPCI_BC, CC_GE, l_next); 682 emit_condbranch(as, PPCI_BC, CC_GE, l_next);
678 emit_ab(as, PPCI_CMPLW, tmp1, tisnum); 683 emit_ab(as, PPCI_CMPLW, tmp1, tisnum);
@@ -682,7 +687,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
682 emit_ab(as, PPCI_CMPW, tmp2, key); 687 emit_ab(as, PPCI_CMPW, tmp2, key);
683 emit_condbranch(as, PPCI_BC, CC_NE, l_next); 688 emit_condbranch(as, PPCI_BC, CC_NE, l_next);
684 } 689 }
685 emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); 690 if (LJ_SOFTFP && ra_hasreg(tmpnum))
691 emit_ab(as, PPCI_CMPW, tmp1, tmpnum);
692 else
693 emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
686 if (!irt_ispri(kt)) 694 if (!irt_ispri(kt))
687 emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); 695 emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
688 } 696 }
@@ -691,19 +699,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
691 (((char *)as->mcp-(char *)l_loop) & 0xffffu); 699 (((char *)as->mcp-(char *)l_loop) & 0xffffu);
692 700
693 /* Load main position relative to tab->node into dest. */ 701 /* Load main position relative to tab->node into dest. */
694 khash = irref_isk(refkey) ? ir_khash(irkey) : 1; 702 khash = isk ? ir_khash(irkey) : 1;
695 if (khash == 0) { 703 if (khash == 0) {
696 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); 704 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
697 } else { 705 } else {
698 Reg tmphash = tmp1; 706 Reg tmphash = tmp1;
699 if (irref_isk(refkey)) 707 if (isk)
700 tmphash = ra_allock(as, khash, allow); 708 tmphash = ra_allock(as, khash, allow);
701 emit_tab(as, PPCI_ADD, dest, dest, tmp1); 709 emit_tab(as, PPCI_ADD, dest, dest, tmp1);
702 emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); 710 emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node));
703 emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); 711 emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash);
704 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); 712 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
705 emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); 713 emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
706 if (irref_isk(refkey)) { 714 if (isk) {
707 /* Nothing to do. */ 715 /* Nothing to do. */
708 } else if (irt_isstr(kt)) { 716 } else if (irt_isstr(kt)) {
709 emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); 717 emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash));
@@ -713,13 +721,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
713 emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); 721 emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2);
714 emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); 722 emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31);
715 emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); 723 emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2);
716 if (irt_isnum(kt)) { 724 if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
725#if LJ_SOFTFP
726 emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
727 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
728 emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum);
729#else
717 int32_t ofs = ra_spill(as, irkey); 730 int32_t ofs = ra_spill(as, irkey);
718 emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); 731 emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1);
719 emit_rotlwi(as, dest, tmp1, HASH_ROT1); 732 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
720 emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); 733 emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1);
721 emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); 734 emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4);
722 emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); 735 emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs);
736#endif
723 } else { 737 } else {
724 emit_asb(as, PPCI_XOR, tmp2, key, tmp1); 738 emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
725 emit_rotlwi(as, dest, tmp1, HASH_ROT1); 739 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
@@ -773,20 +787,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
773 } 787 }
774} 788}
775 789
776static void asm_newref(ASMState *as, IRIns *ir)
777{
778 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
779 IRRef args[3];
780 if (ir->r == RID_SINK)
781 return;
782 args[0] = ASMREF_L; /* lua_State *L */
783 args[1] = ir->op1; /* GCtab *t */
784 args[2] = ASMREF_TMP1; /* cTValue *key */
785 asm_setupresult(as, ir, ci); /* TValue * */
786 asm_gencall(as, ci, args);
787 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
788}
789
790static void asm_uref(ASMState *as, IRIns *ir) 790static void asm_uref(ASMState *as, IRIns *ir)
791{ 791{
792 Reg dest = ra_dest(as, ir, RSET_GPR); 792 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -860,8 +860,8 @@ static PPCIns asm_fxloadins(IRIns *ir)
860 case IRT_U8: return PPCI_LBZ; 860 case IRT_U8: return PPCI_LBZ;
861 case IRT_I16: return PPCI_LHA; 861 case IRT_I16: return PPCI_LHA;
862 case IRT_U16: return PPCI_LHZ; 862 case IRT_U16: return PPCI_LHZ;
863 case IRT_NUM: return PPCI_LFD; 863 case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_LFD;
864 case IRT_FLOAT: return PPCI_LFS; 864 case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS;
865 default: return PPCI_LWZ; 865 default: return PPCI_LWZ;
866 } 866 }
867} 867}
@@ -871,8 +871,8 @@ static PPCIns asm_fxstoreins(IRIns *ir)
871 switch (irt_type(ir->t)) { 871 switch (irt_type(ir->t)) {
872 case IRT_I8: case IRT_U8: return PPCI_STB; 872 case IRT_I8: case IRT_U8: return PPCI_STB;
873 case IRT_I16: case IRT_U16: return PPCI_STH; 873 case IRT_I16: case IRT_U16: return PPCI_STH;
874 case IRT_NUM: return PPCI_STFD; 874 case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_STFD;
875 case IRT_FLOAT: return PPCI_STFS; 875 case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS;
876 default: return PPCI_STW; 876 default: return PPCI_STW;
877 } 877 }
878} 878}
@@ -880,17 +880,23 @@ static PPCIns asm_fxstoreins(IRIns *ir)
880static void asm_fload(ASMState *as, IRIns *ir) 880static void asm_fload(ASMState *as, IRIns *ir)
881{ 881{
882 Reg dest = ra_dest(as, ir, RSET_GPR); 882 Reg dest = ra_dest(as, ir, RSET_GPR);
883 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
884 PPCIns pi = asm_fxloadins(ir); 883 PPCIns pi = asm_fxloadins(ir);
884 Reg idx;
885 int32_t ofs; 885 int32_t ofs;
886 if (ir->op2 == IRFL_TAB_ARRAY) { 886 if (ir->op1 == REF_NIL) {
887 ofs = asm_fuseabase(as, ir->op1); 887 idx = RID_JGL;
888 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ 888 ofs = (ir->op2 << 2) - 32768;
889 emit_tai(as, PPCI_ADDI, dest, idx, ofs); 889 } else {
890 return; 890 idx = ra_alloc1(as, ir->op1, RSET_GPR);
891 if (ir->op2 == IRFL_TAB_ARRAY) {
892 ofs = asm_fuseabase(as, ir->op1);
893 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
894 emit_tai(as, PPCI_ADDI, dest, idx, ofs);
895 return;
896 }
891 } 897 }
898 ofs = field_ofs[ir->op2];
892 } 899 }
893 ofs = field_ofs[ir->op2];
894 lua_assert(!irt_isi8(ir->t)); 900 lua_assert(!irt_isi8(ir->t));
895 emit_tai(as, pi, dest, idx, ofs); 901 emit_tai(as, pi, dest, idx, ofs);
896} 902}
@@ -909,14 +915,15 @@ static void asm_fstore(ASMState *as, IRIns *ir)
909 915
910static void asm_xload(ASMState *as, IRIns *ir) 916static void asm_xload(ASMState *as, IRIns *ir)
911{ 917{
912 Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 918 Reg dest = ra_dest(as, ir,
919 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
913 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); 920 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
914 if (irt_isi8(ir->t)) 921 if (irt_isi8(ir->t))
915 emit_as(as, PPCI_EXTSB, dest, dest); 922 emit_as(as, PPCI_EXTSB, dest, dest);
916 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 923 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
917} 924}
918 925
919static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 926static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
920{ 927{
921 IRIns *irb; 928 IRIns *irb;
922 if (ir->r == RID_SINK) 929 if (ir->r == RID_SINK)
@@ -927,22 +934,34 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
927 Reg src = ra_alloc1(as, irb->op1, RSET_GPR); 934 Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
928 asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); 935 asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src));
929 } else { 936 } else {
930 Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 937 Reg src = ra_alloc1(as, ir->op2,
938 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
931 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, 939 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
932 rset_exclude(RSET_GPR, src), ofs); 940 rset_exclude(RSET_GPR, src), ofs);
933 } 941 }
934} 942}
935 943
944#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
945
936static void asm_ahuvload(ASMState *as, IRIns *ir) 946static void asm_ahuvload(ASMState *as, IRIns *ir)
937{ 947{
938 IRType1 t = ir->t; 948 IRType1 t = ir->t;
939 Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; 949 Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx;
940 RegSet allow = RSET_GPR; 950 RegSet allow = RSET_GPR;
941 int32_t ofs = AHUREF_LSX; 951 int32_t ofs = AHUREF_LSX;
952 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) {
953 t.irt = IRT_NUM;
954 if (ra_used(ir+1)) {
955 type = ra_dest(as, ir+1, allow);
956 rset_clear(allow, type);
957 }
958 ofs = 0;
959 }
942 if (ra_used(ir)) { 960 if (ra_used(ir)) {
943 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 961 lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
944 if (!irt_isnum(t)) ofs = 0; 962 irt_isint(ir->t) || irt_isaddr(ir->t));
945 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); 963 if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0;
964 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
946 rset_clear(allow, dest); 965 rset_clear(allow, dest);
947 } 966 }
948 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); 967 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
@@ -951,12 +970,13 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
951 asm_guardcc(as, CC_GE); 970 asm_guardcc(as, CC_GE);
952 emit_ab(as, PPCI_CMPLW, type, tisnum); 971 emit_ab(as, PPCI_CMPLW, type, tisnum);
953 if (ra_hasreg(dest)) { 972 if (ra_hasreg(dest)) {
954 if (ofs == AHUREF_LSX) { 973 if (!LJ_SOFTFP && ofs == AHUREF_LSX) {
955 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, 974 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR,
956 (idx&255)), (idx>>8))); 975 (idx&255)), (idx>>8)));
957 emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); 976 emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp);
958 } else { 977 } else {
959 emit_fai(as, PPCI_LFD, dest, idx, ofs); 978 emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx,
979 ofs+4*LJ_SOFTFP);
960 } 980 }
961 } 981 }
962 } else { 982 } else {
@@ -979,7 +999,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
979 int32_t ofs = AHUREF_LSX; 999 int32_t ofs = AHUREF_LSX;
980 if (ir->r == RID_SINK) 1000 if (ir->r == RID_SINK)
981 return; 1001 return;
982 if (irt_isnum(ir->t)) { 1002 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
983 src = ra_alloc1(as, ir->op2, RSET_FPR); 1003 src = ra_alloc1(as, ir->op2, RSET_FPR);
984 } else { 1004 } else {
985 if (!irt_ispri(ir->t)) { 1005 if (!irt_ispri(ir->t)) {
@@ -987,11 +1007,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
987 rset_clear(allow, src); 1007 rset_clear(allow, src);
988 ofs = 0; 1008 ofs = 0;
989 } 1009 }
990 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 1010 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
1011 type = ra_alloc1(as, (ir+1)->op2, allow);
1012 else
1013 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
991 rset_clear(allow, type); 1014 rset_clear(allow, type);
992 } 1015 }
993 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); 1016 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
994 if (irt_isnum(ir->t)) { 1017 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
995 if (ofs == AHUREF_LSX) { 1018 if (ofs == AHUREF_LSX) {
996 emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); 1019 emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP);
997 emit_slwi(as, RID_TMP, (idx>>8), 3); 1020 emit_slwi(as, RID_TMP, (idx>>8), 3);
@@ -1016,21 +1039,33 @@ static void asm_sload(ASMState *as, IRIns *ir)
1016 IRType1 t = ir->t; 1039 IRType1 t = ir->t;
1017 Reg dest = RID_NONE, type = RID_NONE, base; 1040 Reg dest = RID_NONE, type = RID_NONE, base;
1018 RegSet allow = RSET_GPR; 1041 RegSet allow = RSET_GPR;
1042 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
1043 if (hiop)
1044 t.irt = IRT_NUM;
1019 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 1045 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
1020 lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); 1046 lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
1021 lua_assert(LJ_DUALNUM || 1047 lua_assert(LJ_DUALNUM ||
1022 !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); 1048 !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
1049#if LJ_SOFTFP
1050 lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */
1051 if (hiop && ra_used(ir+1)) {
1052 type = ra_dest(as, ir+1, allow);
1053 rset_clear(allow, type);
1054 }
1055#else
1023 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { 1056 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
1024 dest = ra_scratch(as, RSET_FPR); 1057 dest = ra_scratch(as, RSET_FPR);
1025 asm_tointg(as, ir, dest); 1058 asm_tointg(as, ir, dest);
1026 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1059 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1027 } else if (ra_used(ir)) { 1060 } else
1061#endif
1062 if (ra_used(ir)) {
1028 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1063 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
1029 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); 1064 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
1030 rset_clear(allow, dest); 1065 rset_clear(allow, dest);
1031 base = ra_alloc1(as, REF_BASE, allow); 1066 base = ra_alloc1(as, REF_BASE, allow);
1032 rset_clear(allow, base); 1067 rset_clear(allow, base);
1033 if ((ir->op2 & IRSLOAD_CONVERT)) { 1068 if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) {
1034 if (irt_isint(t)) { 1069 if (irt_isint(t)) {
1035 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 1070 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
1036 dest = ra_scratch(as, RSET_FPR); 1071 dest = ra_scratch(as, RSET_FPR);
@@ -1044,7 +1079,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1044 emit_fab(as, PPCI_FSUB, dest, dest, fbias); 1079 emit_fab(as, PPCI_FSUB, dest, dest, fbias);
1045 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); 1080 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
1046 emit_lsptr(as, PPCI_LFS, (fbias & 31), 1081 emit_lsptr(as, PPCI_LFS, (fbias & 31),
1047 (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), 1082 (void *)&as->J->k32[LJ_K32_2P52_2P31],
1048 rset_clear(allow, hibias)); 1083 rset_clear(allow, hibias));
1049 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); 1084 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO);
1050 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); 1085 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
@@ -1062,10 +1097,13 @@ dotypecheck:
1062 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1097 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1063 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); 1098 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
1064 asm_guardcc(as, CC_GE); 1099 asm_guardcc(as, CC_GE);
1065 emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); 1100#if !LJ_SOFTFP
1066 type = RID_TMP; 1101 type = RID_TMP;
1102#endif
1103 emit_ab(as, PPCI_CMPLW, type, tisnum);
1067 } 1104 }
1068 if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); 1105 if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest,
1106 base, ofs-(LJ_SOFTFP?0:4));
1069 } else { 1107 } else {
1070 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1108 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1071 asm_guardcc(as, CC_NE); 1109 asm_guardcc(as, CC_NE);
@@ -1083,19 +1121,15 @@ dotypecheck:
1083static void asm_cnew(ASMState *as, IRIns *ir) 1121static void asm_cnew(ASMState *as, IRIns *ir)
1084{ 1122{
1085 CTState *cts = ctype_ctsG(J2G(as->J)); 1123 CTState *cts = ctype_ctsG(J2G(as->J));
1086 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1124 CTypeID id = (CTypeID)IR(ir->op1)->i;
1087 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1125 CTSize sz;
1088 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1126 CTInfo info = lj_ctype_info(cts, id, &sz);
1089 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1127 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1090 IRRef args[2]; 1128 IRRef args[4];
1091 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1092 RegSet drop = RSET_SCRATCH; 1129 RegSet drop = RSET_SCRATCH;
1093 lua_assert(sz != CTSIZE_INVALID); 1130 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1094 1131
1095 args[0] = ASMREF_L; /* lua_State *L */
1096 args[1] = ASMREF_TMP1; /* MSize size */
1097 as->gcsteps++; 1132 as->gcsteps++;
1098
1099 if (ra_hasreg(ir->r)) 1133 if (ra_hasreg(ir->r))
1100 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1134 rset_clear(drop, ir->r); /* Dest reg handled below. */
1101 ra_evictset(as, drop); 1135 ra_evictset(as, drop);
@@ -1104,6 +1138,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1104 1138
1105 /* Initialize immutable cdata object. */ 1139 /* Initialize immutable cdata object. */
1106 if (ir->o == IR_CNEWI) { 1140 if (ir->o == IR_CNEWI) {
1141 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1107 int32_t ofs = sizeof(GCcdata); 1142 int32_t ofs = sizeof(GCcdata);
1108 lua_assert(sz == 4 || sz == 8); 1143 lua_assert(sz == 4 || sz == 8);
1109 if (sz == 8) { 1144 if (sz == 8) {
@@ -1117,12 +1152,24 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1117 if (ofs == sizeof(GCcdata)) break; 1152 if (ofs == sizeof(GCcdata)) break;
1118 ofs -= 4; ir++; 1153 ofs -= 4; ir++;
1119 } 1154 }
1155 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1156 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1157 args[0] = ASMREF_L; /* lua_State *L */
1158 args[1] = ir->op1; /* CTypeID id */
1159 args[2] = ir->op2; /* CTSize sz */
1160 args[3] = ASMREF_TMP1; /* CTSize align */
1161 asm_gencall(as, ci, args);
1162 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1163 return;
1120 } 1164 }
1165
1121 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1166 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1122 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); 1167 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
1123 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); 1168 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
1124 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); 1169 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
1125 emit_ti(as, PPCI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ 1170 emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
1171 args[0] = ASMREF_L; /* lua_State *L */
1172 args[1] = ASMREF_TMP1; /* MSize size */
1126 asm_gencall(as, ci, args); 1173 asm_gencall(as, ci, args);
1127 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1174 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1128 ra_releasetmp(as, ASMREF_TMP1)); 1175 ra_releasetmp(as, ASMREF_TMP1));
@@ -1178,6 +1225,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
1178 1225
1179/* -- Arithmetic and logic operations ------------------------------------- */ 1226/* -- Arithmetic and logic operations ------------------------------------- */
1180 1227
1228#if !LJ_SOFTFP
1181static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) 1229static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi)
1182{ 1230{
1183 Reg dest = ra_dest(as, ir, RSET_FPR); 1231 Reg dest = ra_dest(as, ir, RSET_FPR);
@@ -1196,31 +1244,26 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
1196 emit_fb(as, pi, dest, left); 1244 emit_fb(as, pi, dest, left);
1197} 1245}
1198 1246
1199static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1247static void asm_fpmath(ASMState *as, IRIns *ir)
1200{ 1248{
1201 IRIns *irp = IR(ir->op1); 1249 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1202 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1250 return;
1203 IRIns *irpp = IR(irp->op1); 1251 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
1204 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1252 asm_fpunary(as, ir, PPCI_FSQRT);
1205 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1253 else
1206 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1254 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1207 IRRef args[2];
1208 args[0] = irpp->op1;
1209 args[1] = irp->op2;
1210 asm_setupresult(as, ir, ci);
1211 asm_gencall(as, ci, args);
1212 return 1;
1213 }
1214 }
1215 return 0;
1216} 1255}
1256#endif
1217 1257
1218static void asm_add(ASMState *as, IRIns *ir) 1258static void asm_add(ASMState *as, IRIns *ir)
1219{ 1259{
1260#if !LJ_SOFTFP
1220 if (irt_isnum(ir->t)) { 1261 if (irt_isnum(ir->t)) {
1221 if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) 1262 if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD))
1222 asm_fparith(as, ir, PPCI_FADD); 1263 asm_fparith(as, ir, PPCI_FADD);
1223 } else { 1264 } else
1265#endif
1266 {
1224 Reg dest = ra_dest(as, ir, RSET_GPR); 1267 Reg dest = ra_dest(as, ir, RSET_GPR);
1225 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1268 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1226 PPCIns pi; 1269 PPCIns pi;
@@ -1259,10 +1302,13 @@ static void asm_add(ASMState *as, IRIns *ir)
1259 1302
1260static void asm_sub(ASMState *as, IRIns *ir) 1303static void asm_sub(ASMState *as, IRIns *ir)
1261{ 1304{
1305#if !LJ_SOFTFP
1262 if (irt_isnum(ir->t)) { 1306 if (irt_isnum(ir->t)) {
1263 if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) 1307 if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB))
1264 asm_fparith(as, ir, PPCI_FSUB); 1308 asm_fparith(as, ir, PPCI_FSUB);
1265 } else { 1309 } else
1310#endif
1311 {
1266 PPCIns pi = PPCI_SUBF; 1312 PPCIns pi = PPCI_SUBF;
1267 Reg dest = ra_dest(as, ir, RSET_GPR); 1313 Reg dest = ra_dest(as, ir, RSET_GPR);
1268 Reg left, right; 1314 Reg left, right;
@@ -1288,9 +1334,12 @@ static void asm_sub(ASMState *as, IRIns *ir)
1288 1334
1289static void asm_mul(ASMState *as, IRIns *ir) 1335static void asm_mul(ASMState *as, IRIns *ir)
1290{ 1336{
1337#if !LJ_SOFTFP
1291 if (irt_isnum(ir->t)) { 1338 if (irt_isnum(ir->t)) {
1292 asm_fparith(as, ir, PPCI_FMUL); 1339 asm_fparith(as, ir, PPCI_FMUL);
1293 } else { 1340 } else
1341#endif
1342 {
1294 PPCIns pi = PPCI_MULLW; 1343 PPCIns pi = PPCI_MULLW;
1295 Reg dest = ra_dest(as, ir, RSET_GPR); 1344 Reg dest = ra_dest(as, ir, RSET_GPR);
1296 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1345 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
@@ -1312,11 +1361,18 @@ static void asm_mul(ASMState *as, IRIns *ir)
1312 } 1361 }
1313} 1362}
1314 1363
1364#define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV)
1365#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1366#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1367
1315static void asm_neg(ASMState *as, IRIns *ir) 1368static void asm_neg(ASMState *as, IRIns *ir)
1316{ 1369{
1370#if !LJ_SOFTFP
1317 if (irt_isnum(ir->t)) { 1371 if (irt_isnum(ir->t)) {
1318 asm_fpunary(as, ir, PPCI_FNEG); 1372 asm_fpunary(as, ir, PPCI_FNEG);
1319 } else { 1373 } else
1374#endif
1375 {
1320 Reg dest, left; 1376 Reg dest, left;
1321 PPCIns pi = PPCI_NEG; 1377 PPCIns pi = PPCI_NEG;
1322 if (as->flagmcp == as->mcp) { 1378 if (as->flagmcp == as->mcp) {
@@ -1330,6 +1386,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
1330 } 1386 }
1331} 1387}
1332 1388
1389#define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS)
1390#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1391#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1392
1333static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) 1393static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1334{ 1394{
1335 Reg dest, left, right; 1395 Reg dest, left, right;
@@ -1345,6 +1405,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1345 emit_tab(as, pi|PPCF_DOT, dest, left, right); 1405 emit_tab(as, pi|PPCF_DOT, dest, left, right);
1346} 1406}
1347 1407
1408#define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO)
1409#define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO)
1410#define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO)
1411
1348#if LJ_HASFFI 1412#if LJ_HASFFI
1349static void asm_add64(ASMState *as, IRIns *ir) 1413static void asm_add64(ASMState *as, IRIns *ir)
1350{ 1414{
@@ -1424,7 +1488,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
1424} 1488}
1425#endif 1489#endif
1426 1490
1427static void asm_bitnot(ASMState *as, IRIns *ir) 1491static void asm_bnot(ASMState *as, IRIns *ir)
1428{ 1492{
1429 Reg dest, left, right; 1493 Reg dest, left, right;
1430 PPCIns pi = PPCI_NOR; 1494 PPCIns pi = PPCI_NOR;
@@ -1451,7 +1515,7 @@ nofuse:
1451 emit_asb(as, pi, dest, left, right); 1515 emit_asb(as, pi, dest, left, right);
1452} 1516}
1453 1517
1454static void asm_bitswap(ASMState *as, IRIns *ir) 1518static void asm_bswap(ASMState *as, IRIns *ir)
1455{ 1519{
1456 Reg dest = ra_dest(as, ir, RSET_GPR); 1520 Reg dest = ra_dest(as, ir, RSET_GPR);
1457 IRIns *irx; 1521 IRIns *irx;
@@ -1472,32 +1536,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1472 } 1536 }
1473} 1537}
1474 1538
1475static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1476{
1477 Reg dest = ra_dest(as, ir, RSET_GPR);
1478 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1479 if (irref_isk(ir->op2)) {
1480 int32_t k = IR(ir->op2)->i;
1481 Reg tmp = left;
1482 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1483 if (!checku16(k)) {
1484 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1485 if ((k & 0xffff) == 0) return;
1486 }
1487 emit_asi(as, pik, dest, left, k);
1488 return;
1489 }
1490 }
1491 /* May fail due to spills/restores above, but simplifies the logic. */
1492 if (as->flagmcp == as->mcp) {
1493 as->flagmcp = NULL;
1494 as->mcp++;
1495 pi |= PPCF_DOT;
1496 }
1497 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1498 emit_asb(as, pi, dest, left, right);
1499}
1500
1501/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ 1539/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
1502static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) 1540static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
1503{ 1541{
@@ -1528,7 +1566,7 @@ nofuse:
1528 *--as->mcp = pi | PPCF_T(left); 1566 *--as->mcp = pi | PPCF_T(left);
1529} 1567}
1530 1568
1531static void asm_bitand(ASMState *as, IRIns *ir) 1569static void asm_band(ASMState *as, IRIns *ir)
1532{ 1570{
1533 Reg dest, left, right; 1571 Reg dest, left, right;
1534 IRRef lref = ir->op1; 1572 IRRef lref = ir->op1;
@@ -1583,6 +1621,35 @@ static void asm_bitand(ASMState *as, IRIns *ir)
1583 emit_asb(as, PPCI_AND ^ dot, dest, left, right); 1621 emit_asb(as, PPCI_AND ^ dot, dest, left, right);
1584} 1622}
1585 1623
1624static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1625{
1626 Reg dest = ra_dest(as, ir, RSET_GPR);
1627 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1628 if (irref_isk(ir->op2)) {
1629 int32_t k = IR(ir->op2)->i;
1630 Reg tmp = left;
1631 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1632 if (!checku16(k)) {
1633 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1634 if ((k & 0xffff) == 0) return;
1635 }
1636 emit_asi(as, pik, dest, left, k);
1637 return;
1638 }
1639 }
1640 /* May fail due to spills/restores above, but simplifies the logic. */
1641 if (as->flagmcp == as->mcp) {
1642 as->flagmcp = NULL;
1643 as->mcp++;
1644 pi |= PPCF_DOT;
1645 }
1646 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1647 emit_asb(as, pi, dest, left, right);
1648}
1649
1650#define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI)
1651#define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI)
1652
1586static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) 1653static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1587{ 1654{
1588 Reg dest, left; 1655 Reg dest, left;
@@ -1608,9 +1675,48 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1608 } 1675 }
1609} 1676}
1610 1677
1678#define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0)
1679#define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1)
1680#define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI)
1681#define asm_brol(as, ir) \
1682 asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \
1683 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
1684#define asm_bror(as, ir) lua_assert(0)
1685
1686#if LJ_SOFTFP
1687static void asm_sfpmin_max(ASMState *as, IRIns *ir)
1688{
1689 CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp];
1690 IRRef args[4];
1691 MCLabel l_right, l_end;
1692 Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR);
1693 Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR);
1694 Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR);
1695 PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE;
1696 righthi = (lefthi >> 8); lefthi &= 255;
1697 rightlo = (leftlo >> 8); leftlo &= 255;
1698 args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
1699 args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
1700 l_end = emit_label(as);
1701 if (desthi != righthi) emit_mr(as, desthi, righthi);
1702 if (destlo != rightlo) emit_mr(as, destlo, rightlo);
1703 l_right = emit_label(as);
1704 if (l_end != l_right) emit_jmp(as, l_end);
1705 if (desthi != lefthi) emit_mr(as, desthi, lefthi);
1706 if (destlo != leftlo) emit_mr(as, destlo, leftlo);
1707 if (l_right == as->mcp+1) {
1708 cond ^= 4; l_right = l_end; ++as->mcp;
1709 }
1710 emit_condbranch(as, PPCI_BC, cond, l_right);
1711 ra_evictset(as, RSET_SCRATCH);
1712 emit_cmpi(as, RID_RET, 1);
1713 asm_gencall(as, &ci, args);
1714}
1715#endif
1716
1611static void asm_min_max(ASMState *as, IRIns *ir, int ismax) 1717static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1612{ 1718{
1613 if (irt_isnum(ir->t)) { 1719 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1614 Reg dest = ra_dest(as, ir, RSET_FPR); 1720 Reg dest = ra_dest(as, ir, RSET_FPR);
1615 Reg tmp = dest; 1721 Reg tmp = dest;
1616 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 1722 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
@@ -1638,6 +1744,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1638 } 1744 }
1639} 1745}
1640 1746
1747#define asm_min(as, ir) asm_min_max(as, ir, 0)
1748#define asm_max(as, ir) asm_min_max(as, ir, 1)
1749
1641/* -- Comparisons --------------------------------------------------------- */ 1750/* -- Comparisons --------------------------------------------------------- */
1642 1751
1643#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ 1752#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */
@@ -1695,7 +1804,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc)
1695static void asm_comp(ASMState *as, IRIns *ir) 1804static void asm_comp(ASMState *as, IRIns *ir)
1696{ 1805{
1697 PPCCC cc = asm_compmap[ir->o]; 1806 PPCCC cc = asm_compmap[ir->o];
1698 if (irt_isnum(ir->t)) { 1807 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1699 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 1808 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1700 right = (left >> 8); left &= 255; 1809 right = (left >> 8); left &= 255;
1701 asm_guardcc(as, (cc >> 4)); 1810 asm_guardcc(as, (cc >> 4));
@@ -1714,6 +1823,46 @@ static void asm_comp(ASMState *as, IRIns *ir)
1714 } 1823 }
1715} 1824}
1716 1825
1826#define asm_equal(as, ir) asm_comp(as, ir)
1827
1828#if LJ_SOFTFP
1829/* SFP comparisons. */
1830static void asm_sfpcomp(ASMState *as, IRIns *ir)
1831{
1832 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
1833 RegSet drop = RSET_SCRATCH;
1834 Reg r;
1835 IRRef args[4];
1836 args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
1837 args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
1838
1839 for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) {
1840 if (!rset_test(as->freeset, r) &&
1841 regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
1842 rset_clear(drop, r);
1843 }
1844 ra_evictset(as, drop);
1845 asm_setupresult(as, ir, ci);
1846 switch ((IROp)ir->o) {
1847 case IR_ULT:
1848 asm_guardcc(as, CC_EQ);
1849 emit_ai(as, PPCI_CMPWI, RID_RET, 0);
1850 case IR_ULE:
1851 asm_guardcc(as, CC_EQ);
1852 emit_ai(as, PPCI_CMPWI, RID_RET, 1);
1853 break;
1854 case IR_GE: case IR_GT:
1855 asm_guardcc(as, CC_EQ);
1856 emit_ai(as, PPCI_CMPWI, RID_RET, 2);
1857 default:
1858 asm_guardcc(as, (asm_compmap[ir->o] & 0xf));
1859 emit_ai(as, PPCI_CMPWI, RID_RET, 0);
1860 break;
1861 }
1862 asm_gencall(as, ci, args);
1863}
1864#endif
1865
1717#if LJ_HASFFI 1866#if LJ_HASFFI
1718/* 64 bit integer comparisons. */ 1867/* 64 bit integer comparisons. */
1719static void asm_comp64(ASMState *as, IRIns *ir) 1868static void asm_comp64(ASMState *as, IRIns *ir)
@@ -1743,37 +1892,67 @@ static void asm_comp64(ASMState *as, IRIns *ir)
1743/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ 1892/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
1744static void asm_hiop(ASMState *as, IRIns *ir) 1893static void asm_hiop(ASMState *as, IRIns *ir)
1745{ 1894{
1746#if LJ_HASFFI 1895#if LJ_HASFFI || LJ_SOFTFP
1747 /* HIOP is marked as a store because it needs its own DCE logic. */ 1896 /* HIOP is marked as a store because it needs its own DCE logic. */
1748 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 1897 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
1749 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 1898 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
1750 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ 1899 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
1751 as->curins--; /* Always skip the CONV. */ 1900 as->curins--; /* Always skip the CONV. */
1901#if LJ_HASFFI && !LJ_SOFTFP
1752 if (usehi || uselo) 1902 if (usehi || uselo)
1753 asm_conv64(as, ir); 1903 asm_conv64(as, ir);
1754 return; 1904 return;
1905#endif
1755 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ 1906 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
1756 as->curins--; /* Always skip the loword comparison. */ 1907 as->curins--; /* Always skip the loword comparison. */
1908#if LJ_SOFTFP
1909 if (!irt_isint(ir->t)) {
1910 asm_sfpcomp(as, ir-1);
1911 return;
1912 }
1913#endif
1914#if LJ_HASFFI
1757 asm_comp64(as, ir); 1915 asm_comp64(as, ir);
1916#endif
1917 return;
1918#if LJ_SOFTFP
1919 } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
1920 as->curins--; /* Always skip the loword min/max. */
1921 if (uselo || usehi)
1922 asm_sfpmin_max(as, ir-1);
1758 return; 1923 return;
1924#endif
1759 } else if ((ir-1)->o == IR_XSTORE) { 1925 } else if ((ir-1)->o == IR_XSTORE) {
1760 as->curins--; /* Handle both stores here. */ 1926 as->curins--; /* Handle both stores here. */
1761 if ((ir-1)->r != RID_SINK) { 1927 if ((ir-1)->r != RID_SINK) {
1762 asm_xstore(as, ir, 0); 1928 asm_xstore_(as, ir, 0);
1763 asm_xstore(as, ir-1, 4); 1929 asm_xstore_(as, ir-1, 4);
1764 } 1930 }
1765 return; 1931 return;
1766 } 1932 }
1767 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1933 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
1768 switch ((ir-1)->o) { 1934 switch ((ir-1)->o) {
1935#if LJ_HASFFI
1769 case IR_ADD: as->curins--; asm_add64(as, ir); break; 1936 case IR_ADD: as->curins--; asm_add64(as, ir); break;
1770 case IR_SUB: as->curins--; asm_sub64(as, ir); break; 1937 case IR_SUB: as->curins--; asm_sub64(as, ir); break;
1771 case IR_NEG: as->curins--; asm_neg64(as, ir); break; 1938 case IR_NEG: as->curins--; asm_neg64(as, ir); break;
1939#endif
1940#if LJ_SOFTFP
1941 case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1942 case IR_STRTO:
1943 if (!uselo)
1944 ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
1945 break;
1946#endif
1772 case IR_CALLN: 1947 case IR_CALLN:
1948 case IR_CALLS:
1773 case IR_CALLXS: 1949 case IR_CALLXS:
1774 if (!uselo) 1950 if (!uselo)
1775 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ 1951 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
1776 break; 1952 break;
1953#if LJ_SOFTFP
1954 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
1955#endif
1777 case IR_CNEWI: 1956 case IR_CNEWI:
1778 /* Nothing to do here. Handled by lo op itself. */ 1957 /* Nothing to do here. Handled by lo op itself. */
1779 break; 1958 break;
@@ -1784,6 +1963,17 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1784#endif 1963#endif
1785} 1964}
1786 1965
1966/* -- Profiling ----------------------------------------------------------- */
1967
1968static void asm_prof(ASMState *as, IRIns *ir)
1969{
1970 UNUSED(ir);
1971 asm_guardcc(as, CC_NE);
1972 emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE);
1973 emit_lsglptr(as, PPCI_LBZ, RID_TMP,
1974 (int32_t)offsetof(global_State, hookmask));
1975}
1976
1787/* -- Stack handling ------------------------------------------------------ */ 1977/* -- Stack handling ------------------------------------------------------ */
1788 1978
1789/* Check Lua stack size for overflow. Use exit handler as fallback. */ 1979/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1805,7 +1995,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1805 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); 1995 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
1806 if (pbase == RID_TMP) 1996 if (pbase == RID_TMP)
1807 emit_getgl(as, RID_TMP, jit_base); 1997 emit_getgl(as, RID_TMP, jit_base);
1808 emit_getgl(as, tmp, jit_L); 1998 emit_getgl(as, tmp, cur_L);
1809 if (allow == RSET_EMPTY) /* Spill temp. register. */ 1999 if (allow == RSET_EMPTY) /* Spill temp. register. */
1810 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); 2000 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
1811} 2001}
@@ -1826,8 +2016,19 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1826 if ((sn & SNAP_NORESTORE)) 2016 if ((sn & SNAP_NORESTORE))
1827 continue; 2017 continue;
1828 if (irt_isnum(ir->t)) { 2018 if (irt_isnum(ir->t)) {
2019#if LJ_SOFTFP
2020 Reg tmp;
2021 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
2022 lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */
2023 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
2024 emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
2025 if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
2026 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
2027 emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
2028#else
1829 Reg src = ra_alloc1(as, ref, RSET_FPR); 2029 Reg src = ra_alloc1(as, ref, RSET_FPR);
1830 emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); 2030 emit_fai(as, PPCI_STFD, src, RID_BASE, ofs);
2031#endif
1831 } else { 2032 } else {
1832 Reg type; 2033 Reg type;
1833 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); 2034 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
@@ -1840,6 +2041,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1840 if ((sn & (SNAP_CONT|SNAP_FRAME))) { 2041 if ((sn & (SNAP_CONT|SNAP_FRAME))) {
1841 if (s == 0) continue; /* Do not overwrite link to previous frame. */ 2042 if (s == 0) continue; /* Do not overwrite link to previous frame. */
1842 type = ra_allock(as, (int32_t)(*flinks--), allow); 2043 type = ra_allock(as, (int32_t)(*flinks--), allow);
2044#if LJ_SOFTFP
2045 } else if ((sn & SNAP_SOFTFPNUM)) {
2046 type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
2047#endif
1843 } else { 2048 } else {
1844 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 2049 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
1845 } 2050 }
@@ -1966,147 +2171,25 @@ static void asm_tail_prep(ASMState *as)
1966 } 2171 }
1967} 2172}
1968 2173
1969/* -- Instruction dispatch ------------------------------------------------ */
1970
1971/* Assemble a single instruction. */
1972static void asm_ir(ASMState *as, IRIns *ir)
1973{
1974 switch ((IROp)ir->o) {
1975 /* Miscellaneous ops. */
1976 case IR_LOOP: asm_loop(as); break;
1977 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1978 case IR_USE:
1979 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1980 case IR_PHI: asm_phi(as, ir); break;
1981 case IR_HIOP: asm_hiop(as, ir); break;
1982 case IR_GCSTEP: asm_gcstep(as, ir); break;
1983
1984 /* Guarded assertions. */
1985 case IR_EQ: case IR_NE:
1986 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1987 as->curins--;
1988 asm_href(as, ir-1, (IROp)ir->o);
1989 break;
1990 }
1991 /* fallthrough */
1992 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1993 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1994 case IR_ABC:
1995 asm_comp(as, ir);
1996 break;
1997
1998 case IR_RETF: asm_retf(as, ir); break;
1999
2000 /* Bit ops. */
2001 case IR_BNOT: asm_bitnot(as, ir); break;
2002 case IR_BSWAP: asm_bitswap(as, ir); break;
2003
2004 case IR_BAND: asm_bitand(as, ir); break;
2005 case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break;
2006 case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break;
2007
2008 case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break;
2009 case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break;
2010 case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break;
2011 case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31),
2012 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break;
2013 case IR_BROR: lua_assert(0); break;
2014
2015 /* Arithmetic ops. */
2016 case IR_ADD: asm_add(as, ir); break;
2017 case IR_SUB: asm_sub(as, ir); break;
2018 case IR_MUL: asm_mul(as, ir); break;
2019 case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break;
2020 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2021 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2022 case IR_NEG: asm_neg(as, ir); break;
2023
2024 case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break;
2025 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2026 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2027 case IR_MIN: asm_min_max(as, ir, 0); break;
2028 case IR_MAX: asm_min_max(as, ir, 1); break;
2029 case IR_FPMATH:
2030 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2031 break;
2032 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
2033 asm_fpunary(as, ir, PPCI_FSQRT);
2034 else
2035 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2036 break;
2037
2038 /* Overflow-checking arithmetic ops. */
2039 case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break;
2040 case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break;
2041 case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break;
2042
2043 /* Memory references. */
2044 case IR_AREF: asm_aref(as, ir); break;
2045 case IR_HREF: asm_href(as, ir, 0); break;
2046 case IR_HREFK: asm_hrefk(as, ir); break;
2047 case IR_NEWREF: asm_newref(as, ir); break;
2048 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2049 case IR_FREF: asm_fref(as, ir); break;
2050 case IR_STRREF: asm_strref(as, ir); break;
2051
2052 /* Loads and stores. */
2053 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2054 asm_ahuvload(as, ir);
2055 break;
2056 case IR_FLOAD: asm_fload(as, ir); break;
2057 case IR_XLOAD: asm_xload(as, ir); break;
2058 case IR_SLOAD: asm_sload(as, ir); break;
2059
2060 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2061 case IR_FSTORE: asm_fstore(as, ir); break;
2062 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2063
2064 /* Allocations. */
2065 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2066 case IR_TNEW: asm_tnew(as, ir); break;
2067 case IR_TDUP: asm_tdup(as, ir); break;
2068 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2069
2070 /* Write barriers. */
2071 case IR_TBAR: asm_tbar(as, ir); break;
2072 case IR_OBAR: asm_obar(as, ir); break;
2073
2074 /* Type conversions. */
2075 case IR_CONV: asm_conv(as, ir); break;
2076 case IR_TOBIT: asm_tobit(as, ir); break;
2077 case IR_TOSTR: asm_tostr(as, ir); break;
2078 case IR_STRTO: asm_strto(as, ir); break;
2079
2080 /* Calls. */
2081 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2082 case IR_CALLXS: asm_callx(as, ir); break;
2083 case IR_CARG: break;
2084
2085 default:
2086 setintV(&as->J->errinfo, ir->o);
2087 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2088 break;
2089 }
2090}
2091
2092/* -- Trace setup --------------------------------------------------------- */ 2174/* -- Trace setup --------------------------------------------------------- */
2093 2175
2094/* Ensure there are enough stack slots for call arguments. */ 2176/* Ensure there are enough stack slots for call arguments. */
2095static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2177static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2096{ 2178{
2097 IRRef args[CCI_NARGS_MAX*2]; 2179 IRRef args[CCI_NARGS_MAX*2];
2098 uint32_t i, nargs = (int)CCI_NARGS(ci); 2180 uint32_t i, nargs = CCI_XNARGS(ci);
2099 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; 2181 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
2100 asm_collectargs(as, ir, ci, args); 2182 asm_collectargs(as, ir, ci, args);
2101 for (i = 0; i < nargs; i++) 2183 for (i = 0; i < nargs; i++)
2102 if (args[i] && irt_isfp(IR(args[i])->t)) { 2184 if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) {
2103 if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; 2185 if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1;
2104 } else { 2186 } else {
2105 if (ngpr > 0) ngpr--; else nslots++; 2187 if (ngpr > 0) ngpr--; else nslots++;
2106 } 2188 }
2107 if (nslots > as->evenspill) /* Leave room for args in stack slots. */ 2189 if (nslots > as->evenspill) /* Leave room for args in stack slots. */
2108 as->evenspill = nslots; 2190 as->evenspill = nslots;
2109 return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); 2191 return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) :
2192 REGSP_HINT(RID_RET);
2110} 2193}
2111 2194
2112static void asm_setup_target(ASMState *as) 2195static void asm_setup_target(ASMState *as)