diff options
Diffstat (limited to 'src/lj_asm_ppc.h')
-rw-r--r-- | src/lj_asm_ppc.h | 729 |
1 files changed, 406 insertions, 323 deletions
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index dc092db2..8fa8c8ef 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h | |||
@@ -226,6 +226,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, | |||
226 | emit_tab(as, pi, rt, left, right); | 226 | emit_tab(as, pi, rt, left, right); |
227 | } | 227 | } |
228 | 228 | ||
229 | #if !LJ_SOFTFP | ||
229 | /* Fuse to multiply-add/sub instruction. */ | 230 | /* Fuse to multiply-add/sub instruction. */ |
230 | static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) | 231 | static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) |
231 | { | 232 | { |
@@ -245,21 +246,26 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) | |||
245 | } | 246 | } |
246 | return 0; | 247 | return 0; |
247 | } | 248 | } |
249 | #endif | ||
248 | 250 | ||
249 | /* -- Calls --------------------------------------------------------------- */ | 251 | /* -- Calls --------------------------------------------------------------- */ |
250 | 252 | ||
251 | /* Generate a call to a C function. */ | 253 | /* Generate a call to a C function. */ |
252 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | 254 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) |
253 | { | 255 | { |
254 | uint32_t n, nargs = CCI_NARGS(ci); | 256 | uint32_t n, nargs = CCI_XNARGS(ci); |
255 | int32_t ofs = 8; | 257 | int32_t ofs = 8; |
256 | Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; | 258 | Reg gpr = REGARG_FIRSTGPR; |
259 | #if !LJ_SOFTFP | ||
260 | Reg fpr = REGARG_FIRSTFPR; | ||
261 | #endif | ||
257 | if ((void *)ci->func) | 262 | if ((void *)ci->func) |
258 | emit_call(as, (void *)ci->func); | 263 | emit_call(as, (void *)ci->func); |
259 | for (n = 0; n < nargs; n++) { /* Setup args. */ | 264 | for (n = 0; n < nargs; n++) { /* Setup args. */ |
260 | IRRef ref = args[n]; | 265 | IRRef ref = args[n]; |
261 | if (ref) { | 266 | if (ref) { |
262 | IRIns *ir = IR(ref); | 267 | IRIns *ir = IR(ref); |
268 | #if !LJ_SOFTFP | ||
263 | if (irt_isfp(ir->t)) { | 269 | if (irt_isfp(ir->t)) { |
264 | if (fpr <= REGARG_LASTFPR) { | 270 | if (fpr <= REGARG_LASTFPR) { |
265 | lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ | 271 | lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ |
@@ -271,7 +277,9 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
271 | emit_spstore(as, ir, r, ofs); | 277 | emit_spstore(as, ir, r, ofs); |
272 | ofs += irt_isnum(ir->t) ? 8 : 4; | 278 | ofs += irt_isnum(ir->t) ? 8 : 4; |
273 | } | 279 | } |
274 | } else { | 280 | } else |
281 | #endif | ||
282 | { | ||
275 | if (gpr <= REGARG_LASTGPR) { | 283 | if (gpr <= REGARG_LASTGPR) { |
276 | lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ | 284 | lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ |
277 | ra_leftov(as, gpr, ref); | 285 | ra_leftov(as, gpr, ref); |
@@ -290,8 +298,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
290 | } | 298 | } |
291 | checkmclim(as); | 299 | checkmclim(as); |
292 | } | 300 | } |
301 | #if !LJ_SOFTFP | ||
293 | if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ | 302 | if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ |
294 | emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); | 303 | emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); |
304 | #endif | ||
295 | } | 305 | } |
296 | 306 | ||
297 | /* Setup result reg/sp for call. Evict scratch regs. */ | 307 | /* Setup result reg/sp for call. Evict scratch regs. */ |
@@ -299,8 +309,10 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
299 | { | 309 | { |
300 | RegSet drop = RSET_SCRATCH; | 310 | RegSet drop = RSET_SCRATCH; |
301 | int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); | 311 | int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); |
312 | #if !LJ_SOFTFP | ||
302 | if ((ci->flags & CCI_NOFPRCLOBBER)) | 313 | if ((ci->flags & CCI_NOFPRCLOBBER)) |
303 | drop &= ~RSET_FPR; | 314 | drop &= ~RSET_FPR; |
315 | #endif | ||
304 | if (ra_hasreg(ir->r)) | 316 | if (ra_hasreg(ir->r)) |
305 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 317 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
306 | if (hiop && ra_hasreg((ir+1)->r)) | 318 | if (hiop && ra_hasreg((ir+1)->r)) |
@@ -308,7 +320,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
308 | ra_evictset(as, drop); /* Evictions must be performed first. */ | 320 | ra_evictset(as, drop); /* Evictions must be performed first. */ |
309 | if (ra_used(ir)) { | 321 | if (ra_used(ir)) { |
310 | lua_assert(!irt_ispri(ir->t)); | 322 | lua_assert(!irt_ispri(ir->t)); |
311 | if (irt_isfp(ir->t)) { | 323 | if (!LJ_SOFTFP && irt_isfp(ir->t)) { |
312 | if ((ci->flags & CCI_CASTU64)) { | 324 | if ((ci->flags & CCI_CASTU64)) { |
313 | /* Use spill slot or temp slots. */ | 325 | /* Use spill slot or temp slots. */ |
314 | int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; | 326 | int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; |
@@ -323,23 +335,16 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
323 | } else { | 335 | } else { |
324 | ra_destreg(as, ir, RID_FPRET); | 336 | ra_destreg(as, ir, RID_FPRET); |
325 | } | 337 | } |
338 | #if LJ_32 | ||
326 | } else if (hiop) { | 339 | } else if (hiop) { |
327 | ra_destpair(as, ir); | 340 | ra_destpair(as, ir); |
341 | #endif | ||
328 | } else { | 342 | } else { |
329 | ra_destreg(as, ir, RID_RET); | 343 | ra_destreg(as, ir, RID_RET); |
330 | } | 344 | } |
331 | } | 345 | } |
332 | } | 346 | } |
333 | 347 | ||
334 | static void asm_call(ASMState *as, IRIns *ir) | ||
335 | { | ||
336 | IRRef args[CCI_NARGS_MAX]; | ||
337 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
338 | asm_collectargs(as, ir, ci, args); | ||
339 | asm_setupresult(as, ir, ci); | ||
340 | asm_gencall(as, ci, args); | ||
341 | } | ||
342 | |||
343 | static void asm_callx(ASMState *as, IRIns *ir) | 348 | static void asm_callx(ASMState *as, IRIns *ir) |
344 | { | 349 | { |
345 | IRRef args[CCI_NARGS_MAX*2]; | 350 | IRRef args[CCI_NARGS_MAX*2]; |
@@ -352,7 +357,7 @@ static void asm_callx(ASMState *as, IRIns *ir) | |||
352 | func = ir->op2; irf = IR(func); | 357 | func = ir->op2; irf = IR(func); |
353 | if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } | 358 | if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } |
354 | if (irref_isk(func)) { /* Call to constant address. */ | 359 | if (irref_isk(func)) { /* Call to constant address. */ |
355 | ci.func = (ASMFunction)(void *)(irf->i); | 360 | ci.func = (ASMFunction)(void *)(intptr_t)(irf->i); |
356 | } else { /* Need a non-argument register for indirect calls. */ | 361 | } else { /* Need a non-argument register for indirect calls. */ |
357 | RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); | 362 | RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); |
358 | Reg freg = ra_alloc1(as, func, allow); | 363 | Reg freg = ra_alloc1(as, func, allow); |
@@ -363,16 +368,6 @@ static void asm_callx(ASMState *as, IRIns *ir) | |||
363 | asm_gencall(as, &ci, args); | 368 | asm_gencall(as, &ci, args); |
364 | } | 369 | } |
365 | 370 | ||
366 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | ||
367 | { | ||
368 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
369 | IRRef args[2]; | ||
370 | args[0] = ir->op1; | ||
371 | args[1] = ir->op2; | ||
372 | asm_setupresult(as, ir, ci); | ||
373 | asm_gencall(as, ci, args); | ||
374 | } | ||
375 | |||
376 | /* -- Returns ------------------------------------------------------------- */ | 371 | /* -- Returns ------------------------------------------------------------- */ |
377 | 372 | ||
378 | /* Return to lower frame. Guard that it goes to the right spot. */ | 373 | /* Return to lower frame. Guard that it goes to the right spot. */ |
@@ -380,7 +375,7 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
380 | { | 375 | { |
381 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | 376 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); |
382 | void *pc = ir_kptr(IR(ir->op2)); | 377 | void *pc = ir_kptr(IR(ir->op2)); |
383 | int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); | 378 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); |
384 | as->topslot -= (BCReg)delta; | 379 | as->topslot -= (BCReg)delta; |
385 | if ((int32_t)as->topslot < 0) as->topslot = 0; | 380 | if ((int32_t)as->topslot < 0) as->topslot = 0; |
386 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | 381 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ |
@@ -394,6 +389,7 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
394 | 389 | ||
395 | /* -- Type conversions ---------------------------------------------------- */ | 390 | /* -- Type conversions ---------------------------------------------------- */ |
396 | 391 | ||
392 | #if !LJ_SOFTFP | ||
397 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | 393 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) |
398 | { | 394 | { |
399 | RegSet allow = RSET_FPR; | 395 | RegSet allow = RSET_FPR; |
@@ -410,8 +406,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | |||
410 | emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); | 406 | emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); |
411 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | 407 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); |
412 | emit_lsptr(as, PPCI_LFS, (fbias & 31), | 408 | emit_lsptr(as, PPCI_LFS, (fbias & 31), |
413 | (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), | 409 | (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR); |
414 | RSET_GPR); | ||
415 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | 410 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); |
416 | emit_fb(as, PPCI_FCTIWZ, tmp, left); | 411 | emit_fb(as, PPCI_FCTIWZ, tmp, left); |
417 | } | 412 | } |
@@ -427,15 +422,23 @@ static void asm_tobit(ASMState *as, IRIns *ir) | |||
427 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | 422 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); |
428 | emit_fab(as, PPCI_FADD, tmp, left, right); | 423 | emit_fab(as, PPCI_FADD, tmp, left, right); |
429 | } | 424 | } |
425 | #endif | ||
430 | 426 | ||
431 | static void asm_conv(ASMState *as, IRIns *ir) | 427 | static void asm_conv(ASMState *as, IRIns *ir) |
432 | { | 428 | { |
433 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | 429 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
430 | #if !LJ_SOFTFP | ||
434 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); | 431 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); |
432 | #endif | ||
435 | IRRef lref = ir->op1; | 433 | IRRef lref = ir->op1; |
436 | lua_assert(irt_type(ir->t) != st); | ||
437 | lua_assert(!(irt_isint64(ir->t) || | 434 | lua_assert(!(irt_isint64(ir->t) || |
438 | (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ | 435 | (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ |
436 | #if LJ_SOFTFP | ||
437 | /* FP conversions are handled by SPLIT. */ | ||
438 | lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); | ||
439 | /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ | ||
440 | #else | ||
441 | lua_assert(irt_type(ir->t) != st); | ||
439 | if (irt_isfp(ir->t)) { | 442 | if (irt_isfp(ir->t)) { |
440 | Reg dest = ra_dest(as, ir, RSET_FPR); | 443 | Reg dest = ra_dest(as, ir, RSET_FPR); |
441 | if (stfp) { /* FP to FP conversion. */ | 444 | if (stfp) { /* FP to FP conversion. */ |
@@ -450,13 +453,11 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
450 | Reg left = ra_alloc1(as, lref, allow); | 453 | Reg left = ra_alloc1(as, lref, allow); |
451 | Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); | 454 | Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); |
452 | Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); | 455 | Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); |
453 | const float *kbias; | ||
454 | if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); | 456 | if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); |
455 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); | 457 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); |
456 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); | 458 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); |
457 | kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000)); | 459 | emit_lsptr(as, PPCI_LFS, (fbias & 31), |
458 | if (st == IRT_U32) kbias++; | 460 | &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31], |
459 | emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias, | ||
460 | rset_clear(allow, hibias)); | 461 | rset_clear(allow, hibias)); |
461 | emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, | 462 | emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, |
462 | RID_SP, SPOFS_TMPLO); | 463 | RID_SP, SPOFS_TMPLO); |
@@ -489,15 +490,16 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
489 | emit_fb(as, PPCI_FCTIWZ, tmp, tmp); | 490 | emit_fb(as, PPCI_FCTIWZ, tmp, tmp); |
490 | emit_fab(as, PPCI_FSUB, tmp, left, tmp); | 491 | emit_fab(as, PPCI_FSUB, tmp, left, tmp); |
491 | emit_lsptr(as, PPCI_LFS, (tmp & 31), | 492 | emit_lsptr(as, PPCI_LFS, (tmp & 31), |
492 | (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)), | 493 | (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); |
493 | RSET_GPR); | ||
494 | } else { | 494 | } else { |
495 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | 495 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); |
496 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | 496 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); |
497 | emit_fb(as, PPCI_FCTIWZ, tmp, left); | 497 | emit_fb(as, PPCI_FCTIWZ, tmp, left); |
498 | } | 498 | } |
499 | } | 499 | } |
500 | } else { | 500 | } else |
501 | #endif | ||
502 | { | ||
501 | Reg dest = ra_dest(as, ir, RSET_GPR); | 503 | Reg dest = ra_dest(as, ir, RSET_GPR); |
502 | if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | 504 | if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ |
503 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | 505 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); |
@@ -513,46 +515,50 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
513 | } | 515 | } |
514 | } | 516 | } |
515 | 517 | ||
516 | #if LJ_HASFFI | ||
517 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
518 | { | ||
519 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
520 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
521 | IRCallID id; | ||
522 | const CCallInfo *ci; | ||
523 | IRRef args[2]; | ||
524 | args[0] = ir->op1; | ||
525 | args[1] = (ir-1)->op1; | ||
526 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
527 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
528 | ir--; | ||
529 | } else { | ||
530 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
531 | } | ||
532 | ci = &lj_ir_callinfo[id]; | ||
533 | asm_setupresult(as, ir, ci); | ||
534 | asm_gencall(as, ci, args); | ||
535 | } | ||
536 | #endif | ||
537 | |||
538 | static void asm_strto(ASMState *as, IRIns *ir) | 518 | static void asm_strto(ASMState *as, IRIns *ir) |
539 | { | 519 | { |
540 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; | 520 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; |
541 | IRRef args[2]; | 521 | IRRef args[2]; |
542 | int32_t ofs; | 522 | int32_t ofs = SPOFS_TMP; |
523 | #if LJ_SOFTFP | ||
524 | ra_evictset(as, RSET_SCRATCH); | ||
525 | if (ra_used(ir)) { | ||
526 | if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && | ||
527 | (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { | ||
528 | int i; | ||
529 | for (i = 0; i < 2; i++) { | ||
530 | Reg r = (ir+i)->r; | ||
531 | if (ra_hasreg(r)) { | ||
532 | ra_free(as, r); | ||
533 | ra_modified(as, r); | ||
534 | emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); | ||
535 | } | ||
536 | } | ||
537 | ofs = sps_scale(ir->s & ~1); | ||
538 | } else { | ||
539 | Reg rhi = ra_dest(as, ir+1, RSET_GPR); | ||
540 | Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); | ||
541 | emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs); | ||
542 | emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4); | ||
543 | } | ||
544 | } | ||
545 | #else | ||
543 | RegSet drop = RSET_SCRATCH; | 546 | RegSet drop = RSET_SCRATCH; |
544 | if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ | 547 | if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ |
545 | ra_evictset(as, drop); | 548 | ra_evictset(as, drop); |
549 | if (ir->s) ofs = sps_scale(ir->s); | ||
550 | #endif | ||
546 | asm_guardcc(as, CC_EQ); | 551 | asm_guardcc(as, CC_EQ); |
547 | emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ | 552 | emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ |
548 | args[0] = ir->op1; /* GCstr *str */ | 553 | args[0] = ir->op1; /* GCstr *str */ |
549 | args[1] = ASMREF_TMP1; /* TValue *n */ | 554 | args[1] = ASMREF_TMP1; /* TValue *n */ |
550 | asm_gencall(as, ci, args); | 555 | asm_gencall(as, ci, args); |
551 | /* Store the result to the spill slot or temp slots. */ | 556 | /* Store the result to the spill slot or temp slots. */ |
552 | ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; | ||
553 | emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); | 557 | emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); |
554 | } | 558 | } |
555 | 559 | ||
560 | /* -- Memory references --------------------------------------------------- */ | ||
561 | |||
556 | /* Get pointer to TValue. */ | 562 | /* Get pointer to TValue. */ |
557 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | 563 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) |
558 | { | 564 | { |
@@ -566,37 +572,19 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | |||
566 | /* Otherwise use g->tmptv to hold the TValue. */ | 572 | /* Otherwise use g->tmptv to hold the TValue. */ |
567 | RegSet allow = rset_exclude(RSET_GPR, dest); | 573 | RegSet allow = rset_exclude(RSET_GPR, dest); |
568 | Reg type; | 574 | Reg type; |
569 | emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768); | 575 | emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768); |
570 | if (!irt_ispri(ir->t)) { | 576 | if (!irt_ispri(ir->t)) { |
571 | Reg src = ra_alloc1(as, ref, allow); | 577 | Reg src = ra_alloc1(as, ref, allow); |
572 | emit_setgl(as, src, tmptv.gcr); | 578 | emit_setgl(as, src, tmptv.gcr); |
573 | } | 579 | } |
574 | type = ra_allock(as, irt_toitype(ir->t), allow); | 580 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) |
581 | type = ra_alloc1(as, ref+1, allow); | ||
582 | else | ||
583 | type = ra_allock(as, irt_toitype(ir->t), allow); | ||
575 | emit_setgl(as, type, tmptv.it); | 584 | emit_setgl(as, type, tmptv.it); |
576 | } | 585 | } |
577 | } | 586 | } |
578 | 587 | ||
579 | static void asm_tostr(ASMState *as, IRIns *ir) | ||
580 | { | ||
581 | IRRef args[2]; | ||
582 | args[0] = ASMREF_L; | ||
583 | as->gcsteps++; | ||
584 | if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) { | ||
585 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; | ||
586 | args[1] = ASMREF_TMP1; /* const lua_Number * */ | ||
587 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
588 | asm_gencall(as, ci, args); | ||
589 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); | ||
590 | } else { | ||
591 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; | ||
592 | args[1] = ir->op1; /* int32_t k */ | ||
593 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
594 | asm_gencall(as, ci, args); | ||
595 | } | ||
596 | } | ||
597 | |||
598 | /* -- Memory references --------------------------------------------------- */ | ||
599 | |||
600 | static void asm_aref(ASMState *as, IRIns *ir) | 588 | static void asm_aref(ASMState *as, IRIns *ir) |
601 | { | 589 | { |
602 | Reg dest = ra_dest(as, ir, RSET_GPR); | 590 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -636,11 +624,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
636 | Reg tisnum = RID_NONE, tmpnum = RID_NONE; | 624 | Reg tisnum = RID_NONE, tmpnum = RID_NONE; |
637 | IRRef refkey = ir->op2; | 625 | IRRef refkey = ir->op2; |
638 | IRIns *irkey = IR(refkey); | 626 | IRIns *irkey = IR(refkey); |
627 | int isk = irref_isk(refkey); | ||
639 | IRType1 kt = irkey->t; | 628 | IRType1 kt = irkey->t; |
640 | uint32_t khash; | 629 | uint32_t khash; |
641 | MCLabel l_end, l_loop, l_next; | 630 | MCLabel l_end, l_loop, l_next; |
642 | 631 | ||
643 | rset_clear(allow, tab); | 632 | rset_clear(allow, tab); |
633 | #if LJ_SOFTFP | ||
634 | if (!isk) { | ||
635 | key = ra_alloc1(as, refkey, allow); | ||
636 | rset_clear(allow, key); | ||
637 | if (irkey[1].o == IR_HIOP) { | ||
638 | if (ra_hasreg((irkey+1)->r)) { | ||
639 | tmpnum = (irkey+1)->r; | ||
640 | ra_noweak(as, tmpnum); | ||
641 | } else { | ||
642 | tmpnum = ra_allocref(as, refkey+1, allow); | ||
643 | } | ||
644 | rset_clear(allow, tmpnum); | ||
645 | } | ||
646 | } | ||
647 | #else | ||
644 | if (irt_isnum(kt)) { | 648 | if (irt_isnum(kt)) { |
645 | key = ra_alloc1(as, refkey, RSET_FPR); | 649 | key = ra_alloc1(as, refkey, RSET_FPR); |
646 | tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); | 650 | tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); |
@@ -650,6 +654,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
650 | key = ra_alloc1(as, refkey, allow); | 654 | key = ra_alloc1(as, refkey, allow); |
651 | rset_clear(allow, key); | 655 | rset_clear(allow, key); |
652 | } | 656 | } |
657 | #endif | ||
653 | tmp2 = ra_scratch(as, allow); | 658 | tmp2 = ra_scratch(as, allow); |
654 | rset_clear(allow, tmp2); | 659 | rset_clear(allow, tmp2); |
655 | 660 | ||
@@ -672,7 +677,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
672 | asm_guardcc(as, CC_EQ); | 677 | asm_guardcc(as, CC_EQ); |
673 | else | 678 | else |
674 | emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); | 679 | emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); |
675 | if (irt_isnum(kt)) { | 680 | if (!LJ_SOFTFP && irt_isnum(kt)) { |
676 | emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); | 681 | emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); |
677 | emit_condbranch(as, PPCI_BC, CC_GE, l_next); | 682 | emit_condbranch(as, PPCI_BC, CC_GE, l_next); |
678 | emit_ab(as, PPCI_CMPLW, tmp1, tisnum); | 683 | emit_ab(as, PPCI_CMPLW, tmp1, tisnum); |
@@ -682,7 +687,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
682 | emit_ab(as, PPCI_CMPW, tmp2, key); | 687 | emit_ab(as, PPCI_CMPW, tmp2, key); |
683 | emit_condbranch(as, PPCI_BC, CC_NE, l_next); | 688 | emit_condbranch(as, PPCI_BC, CC_NE, l_next); |
684 | } | 689 | } |
685 | emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); | 690 | if (LJ_SOFTFP && ra_hasreg(tmpnum)) |
691 | emit_ab(as, PPCI_CMPW, tmp1, tmpnum); | ||
692 | else | ||
693 | emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); | ||
686 | if (!irt_ispri(kt)) | 694 | if (!irt_ispri(kt)) |
687 | emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); | 695 | emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); |
688 | } | 696 | } |
@@ -691,19 +699,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
691 | (((char *)as->mcp-(char *)l_loop) & 0xffffu); | 699 | (((char *)as->mcp-(char *)l_loop) & 0xffffu); |
692 | 700 | ||
693 | /* Load main position relative to tab->node into dest. */ | 701 | /* Load main position relative to tab->node into dest. */ |
694 | khash = irref_isk(refkey) ? ir_khash(irkey) : 1; | 702 | khash = isk ? ir_khash(irkey) : 1; |
695 | if (khash == 0) { | 703 | if (khash == 0) { |
696 | emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); | 704 | emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); |
697 | } else { | 705 | } else { |
698 | Reg tmphash = tmp1; | 706 | Reg tmphash = tmp1; |
699 | if (irref_isk(refkey)) | 707 | if (isk) |
700 | tmphash = ra_allock(as, khash, allow); | 708 | tmphash = ra_allock(as, khash, allow); |
701 | emit_tab(as, PPCI_ADD, dest, dest, tmp1); | 709 | emit_tab(as, PPCI_ADD, dest, dest, tmp1); |
702 | emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); | 710 | emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); |
703 | emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); | 711 | emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); |
704 | emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); | 712 | emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); |
705 | emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); | 713 | emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); |
706 | if (irref_isk(refkey)) { | 714 | if (isk) { |
707 | /* Nothing to do. */ | 715 | /* Nothing to do. */ |
708 | } else if (irt_isstr(kt)) { | 716 | } else if (irt_isstr(kt)) { |
709 | emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); | 717 | emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); |
@@ -713,13 +721,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
713 | emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); | 721 | emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); |
714 | emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); | 722 | emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); |
715 | emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); | 723 | emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); |
716 | if (irt_isnum(kt)) { | 724 | if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { |
725 | #if LJ_SOFTFP | ||
726 | emit_asb(as, PPCI_XOR, tmp2, key, tmp1); | ||
727 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); | ||
728 | emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum); | ||
729 | #else | ||
717 | int32_t ofs = ra_spill(as, irkey); | 730 | int32_t ofs = ra_spill(as, irkey); |
718 | emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); | 731 | emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); |
719 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); | 732 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); |
720 | emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); | 733 | emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); |
721 | emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); | 734 | emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); |
722 | emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); | 735 | emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); |
736 | #endif | ||
723 | } else { | 737 | } else { |
724 | emit_asb(as, PPCI_XOR, tmp2, key, tmp1); | 738 | emit_asb(as, PPCI_XOR, tmp2, key, tmp1); |
725 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); | 739 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); |
@@ -773,20 +787,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
773 | } | 787 | } |
774 | } | 788 | } |
775 | 789 | ||
776 | static void asm_newref(ASMState *as, IRIns *ir) | ||
777 | { | ||
778 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
779 | IRRef args[3]; | ||
780 | if (ir->r == RID_SINK) | ||
781 | return; | ||
782 | args[0] = ASMREF_L; /* lua_State *L */ | ||
783 | args[1] = ir->op1; /* GCtab *t */ | ||
784 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
785 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
786 | asm_gencall(as, ci, args); | ||
787 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | ||
788 | } | ||
789 | |||
790 | static void asm_uref(ASMState *as, IRIns *ir) | 790 | static void asm_uref(ASMState *as, IRIns *ir) |
791 | { | 791 | { |
792 | Reg dest = ra_dest(as, ir, RSET_GPR); | 792 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -860,8 +860,8 @@ static PPCIns asm_fxloadins(IRIns *ir) | |||
860 | case IRT_U8: return PPCI_LBZ; | 860 | case IRT_U8: return PPCI_LBZ; |
861 | case IRT_I16: return PPCI_LHA; | 861 | case IRT_I16: return PPCI_LHA; |
862 | case IRT_U16: return PPCI_LHZ; | 862 | case IRT_U16: return PPCI_LHZ; |
863 | case IRT_NUM: return PPCI_LFD; | 863 | case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_LFD; |
864 | case IRT_FLOAT: return PPCI_LFS; | 864 | case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS; |
865 | default: return PPCI_LWZ; | 865 | default: return PPCI_LWZ; |
866 | } | 866 | } |
867 | } | 867 | } |
@@ -871,8 +871,8 @@ static PPCIns asm_fxstoreins(IRIns *ir) | |||
871 | switch (irt_type(ir->t)) { | 871 | switch (irt_type(ir->t)) { |
872 | case IRT_I8: case IRT_U8: return PPCI_STB; | 872 | case IRT_I8: case IRT_U8: return PPCI_STB; |
873 | case IRT_I16: case IRT_U16: return PPCI_STH; | 873 | case IRT_I16: case IRT_U16: return PPCI_STH; |
874 | case IRT_NUM: return PPCI_STFD; | 874 | case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_STFD; |
875 | case IRT_FLOAT: return PPCI_STFS; | 875 | case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS; |
876 | default: return PPCI_STW; | 876 | default: return PPCI_STW; |
877 | } | 877 | } |
878 | } | 878 | } |
@@ -880,17 +880,23 @@ static PPCIns asm_fxstoreins(IRIns *ir) | |||
880 | static void asm_fload(ASMState *as, IRIns *ir) | 880 | static void asm_fload(ASMState *as, IRIns *ir) |
881 | { | 881 | { |
882 | Reg dest = ra_dest(as, ir, RSET_GPR); | 882 | Reg dest = ra_dest(as, ir, RSET_GPR); |
883 | Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); | ||
884 | PPCIns pi = asm_fxloadins(ir); | 883 | PPCIns pi = asm_fxloadins(ir); |
884 | Reg idx; | ||
885 | int32_t ofs; | 885 | int32_t ofs; |
886 | if (ir->op2 == IRFL_TAB_ARRAY) { | 886 | if (ir->op1 == REF_NIL) { |
887 | ofs = asm_fuseabase(as, ir->op1); | 887 | idx = RID_JGL; |
888 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | 888 | ofs = (ir->op2 << 2) - 32768; |
889 | emit_tai(as, PPCI_ADDI, dest, idx, ofs); | 889 | } else { |
890 | return; | 890 | idx = ra_alloc1(as, ir->op1, RSET_GPR); |
891 | if (ir->op2 == IRFL_TAB_ARRAY) { | ||
892 | ofs = asm_fuseabase(as, ir->op1); | ||
893 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | ||
894 | emit_tai(as, PPCI_ADDI, dest, idx, ofs); | ||
895 | return; | ||
896 | } | ||
891 | } | 897 | } |
898 | ofs = field_ofs[ir->op2]; | ||
892 | } | 899 | } |
893 | ofs = field_ofs[ir->op2]; | ||
894 | lua_assert(!irt_isi8(ir->t)); | 900 | lua_assert(!irt_isi8(ir->t)); |
895 | emit_tai(as, pi, dest, idx, ofs); | 901 | emit_tai(as, pi, dest, idx, ofs); |
896 | } | 902 | } |
@@ -909,14 +915,15 @@ static void asm_fstore(ASMState *as, IRIns *ir) | |||
909 | 915 | ||
910 | static void asm_xload(ASMState *as, IRIns *ir) | 916 | static void asm_xload(ASMState *as, IRIns *ir) |
911 | { | 917 | { |
912 | Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | 918 | Reg dest = ra_dest(as, ir, |
919 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); | ||
913 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); | 920 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); |
914 | if (irt_isi8(ir->t)) | 921 | if (irt_isi8(ir->t)) |
915 | emit_as(as, PPCI_EXTSB, dest, dest); | 922 | emit_as(as, PPCI_EXTSB, dest, dest); |
916 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); | 923 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); |
917 | } | 924 | } |
918 | 925 | ||
919 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | 926 | static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) |
920 | { | 927 | { |
921 | IRIns *irb; | 928 | IRIns *irb; |
922 | if (ir->r == RID_SINK) | 929 | if (ir->r == RID_SINK) |
@@ -927,22 +934,34 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | |||
927 | Reg src = ra_alloc1(as, irb->op1, RSET_GPR); | 934 | Reg src = ra_alloc1(as, irb->op1, RSET_GPR); |
928 | asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); | 935 | asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); |
929 | } else { | 936 | } else { |
930 | Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | 937 | Reg src = ra_alloc1(as, ir->op2, |
938 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); | ||
931 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | 939 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, |
932 | rset_exclude(RSET_GPR, src), ofs); | 940 | rset_exclude(RSET_GPR, src), ofs); |
933 | } | 941 | } |
934 | } | 942 | } |
935 | 943 | ||
944 | #define asm_xstore(as, ir) asm_xstore_(as, ir, 0) | ||
945 | |||
936 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 946 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
937 | { | 947 | { |
938 | IRType1 t = ir->t; | 948 | IRType1 t = ir->t; |
939 | Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; | 949 | Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; |
940 | RegSet allow = RSET_GPR; | 950 | RegSet allow = RSET_GPR; |
941 | int32_t ofs = AHUREF_LSX; | 951 | int32_t ofs = AHUREF_LSX; |
952 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) { | ||
953 | t.irt = IRT_NUM; | ||
954 | if (ra_used(ir+1)) { | ||
955 | type = ra_dest(as, ir+1, allow); | ||
956 | rset_clear(allow, type); | ||
957 | } | ||
958 | ofs = 0; | ||
959 | } | ||
942 | if (ra_used(ir)) { | 960 | if (ra_used(ir)) { |
943 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 961 | lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || |
944 | if (!irt_isnum(t)) ofs = 0; | 962 | irt_isint(ir->t) || irt_isaddr(ir->t)); |
945 | dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); | 963 | if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0; |
964 | dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); | ||
946 | rset_clear(allow, dest); | 965 | rset_clear(allow, dest); |
947 | } | 966 | } |
948 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | 967 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); |
@@ -951,12 +970,13 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
951 | asm_guardcc(as, CC_GE); | 970 | asm_guardcc(as, CC_GE); |
952 | emit_ab(as, PPCI_CMPLW, type, tisnum); | 971 | emit_ab(as, PPCI_CMPLW, type, tisnum); |
953 | if (ra_hasreg(dest)) { | 972 | if (ra_hasreg(dest)) { |
954 | if (ofs == AHUREF_LSX) { | 973 | if (!LJ_SOFTFP && ofs == AHUREF_LSX) { |
955 | tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, | 974 | tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, |
956 | (idx&255)), (idx>>8))); | 975 | (idx&255)), (idx>>8))); |
957 | emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); | 976 | emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); |
958 | } else { | 977 | } else { |
959 | emit_fai(as, PPCI_LFD, dest, idx, ofs); | 978 | emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx, |
979 | ofs+4*LJ_SOFTFP); | ||
960 | } | 980 | } |
961 | } | 981 | } |
962 | } else { | 982 | } else { |
@@ -979,7 +999,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
979 | int32_t ofs = AHUREF_LSX; | 999 | int32_t ofs = AHUREF_LSX; |
980 | if (ir->r == RID_SINK) | 1000 | if (ir->r == RID_SINK) |
981 | return; | 1001 | return; |
982 | if (irt_isnum(ir->t)) { | 1002 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
983 | src = ra_alloc1(as, ir->op2, RSET_FPR); | 1003 | src = ra_alloc1(as, ir->op2, RSET_FPR); |
984 | } else { | 1004 | } else { |
985 | if (!irt_ispri(ir->t)) { | 1005 | if (!irt_ispri(ir->t)) { |
@@ -987,11 +1007,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
987 | rset_clear(allow, src); | 1007 | rset_clear(allow, src); |
988 | ofs = 0; | 1008 | ofs = 0; |
989 | } | 1009 | } |
990 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | 1010 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) |
1011 | type = ra_alloc1(as, (ir+1)->op2, allow); | ||
1012 | else | ||
1013 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
991 | rset_clear(allow, type); | 1014 | rset_clear(allow, type); |
992 | } | 1015 | } |
993 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | 1016 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); |
994 | if (irt_isnum(ir->t)) { | 1017 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
995 | if (ofs == AHUREF_LSX) { | 1018 | if (ofs == AHUREF_LSX) { |
996 | emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); | 1019 | emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); |
997 | emit_slwi(as, RID_TMP, (idx>>8), 3); | 1020 | emit_slwi(as, RID_TMP, (idx>>8), 3); |
@@ -1016,21 +1039,33 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1016 | IRType1 t = ir->t; | 1039 | IRType1 t = ir->t; |
1017 | Reg dest = RID_NONE, type = RID_NONE, base; | 1040 | Reg dest = RID_NONE, type = RID_NONE, base; |
1018 | RegSet allow = RSET_GPR; | 1041 | RegSet allow = RSET_GPR; |
1042 | int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); | ||
1043 | if (hiop) | ||
1044 | t.irt = IRT_NUM; | ||
1019 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ | 1045 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ |
1020 | lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); | 1046 | lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); |
1021 | lua_assert(LJ_DUALNUM || | 1047 | lua_assert(LJ_DUALNUM || |
1022 | !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); | 1048 | !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); |
1049 | #if LJ_SOFTFP | ||
1050 | lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ | ||
1051 | if (hiop && ra_used(ir+1)) { | ||
1052 | type = ra_dest(as, ir+1, allow); | ||
1053 | rset_clear(allow, type); | ||
1054 | } | ||
1055 | #else | ||
1023 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { | 1056 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { |
1024 | dest = ra_scratch(as, RSET_FPR); | 1057 | dest = ra_scratch(as, RSET_FPR); |
1025 | asm_tointg(as, ir, dest); | 1058 | asm_tointg(as, ir, dest); |
1026 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | 1059 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ |
1027 | } else if (ra_used(ir)) { | 1060 | } else |
1061 | #endif | ||
1062 | if (ra_used(ir)) { | ||
1028 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 1063 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); |
1029 | dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); | 1064 | dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); |
1030 | rset_clear(allow, dest); | 1065 | rset_clear(allow, dest); |
1031 | base = ra_alloc1(as, REF_BASE, allow); | 1066 | base = ra_alloc1(as, REF_BASE, allow); |
1032 | rset_clear(allow, base); | 1067 | rset_clear(allow, base); |
1033 | if ((ir->op2 & IRSLOAD_CONVERT)) { | 1068 | if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { |
1034 | if (irt_isint(t)) { | 1069 | if (irt_isint(t)) { |
1035 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | 1070 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); |
1036 | dest = ra_scratch(as, RSET_FPR); | 1071 | dest = ra_scratch(as, RSET_FPR); |
@@ -1044,7 +1079,7 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1044 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); | 1079 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); |
1045 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); | 1080 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); |
1046 | emit_lsptr(as, PPCI_LFS, (fbias & 31), | 1081 | emit_lsptr(as, PPCI_LFS, (fbias & 31), |
1047 | (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), | 1082 | (void *)&as->J->k32[LJ_K32_2P52_2P31], |
1048 | rset_clear(allow, hibias)); | 1083 | rset_clear(allow, hibias)); |
1049 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); | 1084 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); |
1050 | emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); | 1085 | emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); |
@@ -1062,10 +1097,13 @@ dotypecheck: | |||
1062 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | 1097 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { |
1063 | Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); | 1098 | Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); |
1064 | asm_guardcc(as, CC_GE); | 1099 | asm_guardcc(as, CC_GE); |
1065 | emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); | 1100 | #if !LJ_SOFTFP |
1066 | type = RID_TMP; | 1101 | type = RID_TMP; |
1102 | #endif | ||
1103 | emit_ab(as, PPCI_CMPLW, type, tisnum); | ||
1067 | } | 1104 | } |
1068 | if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); | 1105 | if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, |
1106 | base, ofs-(LJ_SOFTFP?0:4)); | ||
1069 | } else { | 1107 | } else { |
1070 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | 1108 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { |
1071 | asm_guardcc(as, CC_NE); | 1109 | asm_guardcc(as, CC_NE); |
@@ -1083,19 +1121,15 @@ dotypecheck: | |||
1083 | static void asm_cnew(ASMState *as, IRIns *ir) | 1121 | static void asm_cnew(ASMState *as, IRIns *ir) |
1084 | { | 1122 | { |
1085 | CTState *cts = ctype_ctsG(J2G(as->J)); | 1123 | CTState *cts = ctype_ctsG(J2G(as->J)); |
1086 | CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; | 1124 | CTypeID id = (CTypeID)IR(ir->op1)->i; |
1087 | CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? | 1125 | CTSize sz; |
1088 | lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; | 1126 | CTInfo info = lj_ctype_info(cts, id, &sz); |
1089 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | 1127 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; |
1090 | IRRef args[2]; | 1128 | IRRef args[4]; |
1091 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | ||
1092 | RegSet drop = RSET_SCRATCH; | 1129 | RegSet drop = RSET_SCRATCH; |
1093 | lua_assert(sz != CTSIZE_INVALID); | 1130 | lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); |
1094 | 1131 | ||
1095 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1096 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1097 | as->gcsteps++; | 1132 | as->gcsteps++; |
1098 | |||
1099 | if (ra_hasreg(ir->r)) | 1133 | if (ra_hasreg(ir->r)) |
1100 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 1134 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
1101 | ra_evictset(as, drop); | 1135 | ra_evictset(as, drop); |
@@ -1104,6 +1138,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1104 | 1138 | ||
1105 | /* Initialize immutable cdata object. */ | 1139 | /* Initialize immutable cdata object. */ |
1106 | if (ir->o == IR_CNEWI) { | 1140 | if (ir->o == IR_CNEWI) { |
1141 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | ||
1107 | int32_t ofs = sizeof(GCcdata); | 1142 | int32_t ofs = sizeof(GCcdata); |
1108 | lua_assert(sz == 4 || sz == 8); | 1143 | lua_assert(sz == 4 || sz == 8); |
1109 | if (sz == 8) { | 1144 | if (sz == 8) { |
@@ -1117,12 +1152,24 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1117 | if (ofs == sizeof(GCcdata)) break; | 1152 | if (ofs == sizeof(GCcdata)) break; |
1118 | ofs -= 4; ir++; | 1153 | ofs -= 4; ir++; |
1119 | } | 1154 | } |
1155 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1156 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1157 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1158 | args[1] = ir->op1; /* CTypeID id */ | ||
1159 | args[2] = ir->op2; /* CTSize sz */ | ||
1160 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1161 | asm_gencall(as, ci, args); | ||
1162 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1163 | return; | ||
1120 | } | 1164 | } |
1165 | |||
1121 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ | 1166 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ |
1122 | emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); | 1167 | emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); |
1123 | emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); | 1168 | emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); |
1124 | emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); | 1169 | emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); |
1125 | emit_ti(as, PPCI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ | 1170 | emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ |
1171 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1172 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1126 | asm_gencall(as, ci, args); | 1173 | asm_gencall(as, ci, args); |
1127 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), | 1174 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), |
1128 | ra_releasetmp(as, ASMREF_TMP1)); | 1175 | ra_releasetmp(as, ASMREF_TMP1)); |
@@ -1178,6 +1225,7 @@ static void asm_obar(ASMState *as, IRIns *ir) | |||
1178 | 1225 | ||
1179 | /* -- Arithmetic and logic operations ------------------------------------- */ | 1226 | /* -- Arithmetic and logic operations ------------------------------------- */ |
1180 | 1227 | ||
1228 | #if !LJ_SOFTFP | ||
1181 | static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) | 1229 | static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) |
1182 | { | 1230 | { |
1183 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1231 | Reg dest = ra_dest(as, ir, RSET_FPR); |
@@ -1196,31 +1244,26 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi) | |||
1196 | emit_fb(as, pi, dest, left); | 1244 | emit_fb(as, pi, dest, left); |
1197 | } | 1245 | } |
1198 | 1246 | ||
1199 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) | 1247 | static void asm_fpmath(ASMState *as, IRIns *ir) |
1200 | { | 1248 | { |
1201 | IRIns *irp = IR(ir->op1); | 1249 | if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) |
1202 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | 1250 | return; |
1203 | IRIns *irpp = IR(irp->op1); | 1251 | if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) |
1204 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | 1252 | asm_fpunary(as, ir, PPCI_FSQRT); |
1205 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | 1253 | else |
1206 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; | 1254 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); |
1207 | IRRef args[2]; | ||
1208 | args[0] = irpp->op1; | ||
1209 | args[1] = irp->op2; | ||
1210 | asm_setupresult(as, ir, ci); | ||
1211 | asm_gencall(as, ci, args); | ||
1212 | return 1; | ||
1213 | } | ||
1214 | } | ||
1215 | return 0; | ||
1216 | } | 1255 | } |
1256 | #endif | ||
1217 | 1257 | ||
1218 | static void asm_add(ASMState *as, IRIns *ir) | 1258 | static void asm_add(ASMState *as, IRIns *ir) |
1219 | { | 1259 | { |
1260 | #if !LJ_SOFTFP | ||
1220 | if (irt_isnum(ir->t)) { | 1261 | if (irt_isnum(ir->t)) { |
1221 | if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) | 1262 | if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) |
1222 | asm_fparith(as, ir, PPCI_FADD); | 1263 | asm_fparith(as, ir, PPCI_FADD); |
1223 | } else { | 1264 | } else |
1265 | #endif | ||
1266 | { | ||
1224 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1267 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1225 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | 1268 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); |
1226 | PPCIns pi; | 1269 | PPCIns pi; |
@@ -1259,10 +1302,13 @@ static void asm_add(ASMState *as, IRIns *ir) | |||
1259 | 1302 | ||
1260 | static void asm_sub(ASMState *as, IRIns *ir) | 1303 | static void asm_sub(ASMState *as, IRIns *ir) |
1261 | { | 1304 | { |
1305 | #if !LJ_SOFTFP | ||
1262 | if (irt_isnum(ir->t)) { | 1306 | if (irt_isnum(ir->t)) { |
1263 | if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) | 1307 | if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) |
1264 | asm_fparith(as, ir, PPCI_FSUB); | 1308 | asm_fparith(as, ir, PPCI_FSUB); |
1265 | } else { | 1309 | } else |
1310 | #endif | ||
1311 | { | ||
1266 | PPCIns pi = PPCI_SUBF; | 1312 | PPCIns pi = PPCI_SUBF; |
1267 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1313 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1268 | Reg left, right; | 1314 | Reg left, right; |
@@ -1288,9 +1334,12 @@ static void asm_sub(ASMState *as, IRIns *ir) | |||
1288 | 1334 | ||
1289 | static void asm_mul(ASMState *as, IRIns *ir) | 1335 | static void asm_mul(ASMState *as, IRIns *ir) |
1290 | { | 1336 | { |
1337 | #if !LJ_SOFTFP | ||
1291 | if (irt_isnum(ir->t)) { | 1338 | if (irt_isnum(ir->t)) { |
1292 | asm_fparith(as, ir, PPCI_FMUL); | 1339 | asm_fparith(as, ir, PPCI_FMUL); |
1293 | } else { | 1340 | } else |
1341 | #endif | ||
1342 | { | ||
1294 | PPCIns pi = PPCI_MULLW; | 1343 | PPCIns pi = PPCI_MULLW; |
1295 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1344 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1296 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | 1345 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); |
@@ -1312,11 +1361,18 @@ static void asm_mul(ASMState *as, IRIns *ir) | |||
1312 | } | 1361 | } |
1313 | } | 1362 | } |
1314 | 1363 | ||
1364 | #define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV) | ||
1365 | #define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) | ||
1366 | #define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) | ||
1367 | |||
1315 | static void asm_neg(ASMState *as, IRIns *ir) | 1368 | static void asm_neg(ASMState *as, IRIns *ir) |
1316 | { | 1369 | { |
1370 | #if !LJ_SOFTFP | ||
1317 | if (irt_isnum(ir->t)) { | 1371 | if (irt_isnum(ir->t)) { |
1318 | asm_fpunary(as, ir, PPCI_FNEG); | 1372 | asm_fpunary(as, ir, PPCI_FNEG); |
1319 | } else { | 1373 | } else |
1374 | #endif | ||
1375 | { | ||
1320 | Reg dest, left; | 1376 | Reg dest, left; |
1321 | PPCIns pi = PPCI_NEG; | 1377 | PPCIns pi = PPCI_NEG; |
1322 | if (as->flagmcp == as->mcp) { | 1378 | if (as->flagmcp == as->mcp) { |
@@ -1330,6 +1386,10 @@ static void asm_neg(ASMState *as, IRIns *ir) | |||
1330 | } | 1386 | } |
1331 | } | 1387 | } |
1332 | 1388 | ||
1389 | #define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS) | ||
1390 | #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) | ||
1391 | #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) | ||
1392 | |||
1333 | static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) | 1393 | static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) |
1334 | { | 1394 | { |
1335 | Reg dest, left, right; | 1395 | Reg dest, left, right; |
@@ -1345,6 +1405,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) | |||
1345 | emit_tab(as, pi|PPCF_DOT, dest, left, right); | 1405 | emit_tab(as, pi|PPCF_DOT, dest, left, right); |
1346 | } | 1406 | } |
1347 | 1407 | ||
1408 | #define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO) | ||
1409 | #define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO) | ||
1410 | #define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO) | ||
1411 | |||
1348 | #if LJ_HASFFI | 1412 | #if LJ_HASFFI |
1349 | static void asm_add64(ASMState *as, IRIns *ir) | 1413 | static void asm_add64(ASMState *as, IRIns *ir) |
1350 | { | 1414 | { |
@@ -1424,7 +1488,7 @@ static void asm_neg64(ASMState *as, IRIns *ir) | |||
1424 | } | 1488 | } |
1425 | #endif | 1489 | #endif |
1426 | 1490 | ||
1427 | static void asm_bitnot(ASMState *as, IRIns *ir) | 1491 | static void asm_bnot(ASMState *as, IRIns *ir) |
1428 | { | 1492 | { |
1429 | Reg dest, left, right; | 1493 | Reg dest, left, right; |
1430 | PPCIns pi = PPCI_NOR; | 1494 | PPCIns pi = PPCI_NOR; |
@@ -1451,7 +1515,7 @@ nofuse: | |||
1451 | emit_asb(as, pi, dest, left, right); | 1515 | emit_asb(as, pi, dest, left, right); |
1452 | } | 1516 | } |
1453 | 1517 | ||
1454 | static void asm_bitswap(ASMState *as, IRIns *ir) | 1518 | static void asm_bswap(ASMState *as, IRIns *ir) |
1455 | { | 1519 | { |
1456 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1520 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1457 | IRIns *irx; | 1521 | IRIns *irx; |
@@ -1472,32 +1536,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir) | |||
1472 | } | 1536 | } |
1473 | } | 1537 | } |
1474 | 1538 | ||
1475 | static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | ||
1476 | { | ||
1477 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1478 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1479 | if (irref_isk(ir->op2)) { | ||
1480 | int32_t k = IR(ir->op2)->i; | ||
1481 | Reg tmp = left; | ||
1482 | if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { | ||
1483 | if (!checku16(k)) { | ||
1484 | emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); | ||
1485 | if ((k & 0xffff) == 0) return; | ||
1486 | } | ||
1487 | emit_asi(as, pik, dest, left, k); | ||
1488 | return; | ||
1489 | } | ||
1490 | } | ||
1491 | /* May fail due to spills/restores above, but simplifies the logic. */ | ||
1492 | if (as->flagmcp == as->mcp) { | ||
1493 | as->flagmcp = NULL; | ||
1494 | as->mcp++; | ||
1495 | pi |= PPCF_DOT; | ||
1496 | } | ||
1497 | right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1498 | emit_asb(as, pi, dest, left, right); | ||
1499 | } | ||
1500 | |||
1501 | /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ | 1539 | /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ |
1502 | static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) | 1540 | static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) |
1503 | { | 1541 | { |
@@ -1528,7 +1566,7 @@ nofuse: | |||
1528 | *--as->mcp = pi | PPCF_T(left); | 1566 | *--as->mcp = pi | PPCF_T(left); |
1529 | } | 1567 | } |
1530 | 1568 | ||
1531 | static void asm_bitand(ASMState *as, IRIns *ir) | 1569 | static void asm_band(ASMState *as, IRIns *ir) |
1532 | { | 1570 | { |
1533 | Reg dest, left, right; | 1571 | Reg dest, left, right; |
1534 | IRRef lref = ir->op1; | 1572 | IRRef lref = ir->op1; |
@@ -1583,6 +1621,35 @@ static void asm_bitand(ASMState *as, IRIns *ir) | |||
1583 | emit_asb(as, PPCI_AND ^ dot, dest, left, right); | 1621 | emit_asb(as, PPCI_AND ^ dot, dest, left, right); |
1584 | } | 1622 | } |
1585 | 1623 | ||
1624 | static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | ||
1625 | { | ||
1626 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1627 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1628 | if (irref_isk(ir->op2)) { | ||
1629 | int32_t k = IR(ir->op2)->i; | ||
1630 | Reg tmp = left; | ||
1631 | if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { | ||
1632 | if (!checku16(k)) { | ||
1633 | emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); | ||
1634 | if ((k & 0xffff) == 0) return; | ||
1635 | } | ||
1636 | emit_asi(as, pik, dest, left, k); | ||
1637 | return; | ||
1638 | } | ||
1639 | } | ||
1640 | /* May fail due to spills/restores above, but simplifies the logic. */ | ||
1641 | if (as->flagmcp == as->mcp) { | ||
1642 | as->flagmcp = NULL; | ||
1643 | as->mcp++; | ||
1644 | pi |= PPCF_DOT; | ||
1645 | } | ||
1646 | right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1647 | emit_asb(as, pi, dest, left, right); | ||
1648 | } | ||
1649 | |||
1650 | #define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI) | ||
1651 | #define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI) | ||
1652 | |||
1586 | static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | 1653 | static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) |
1587 | { | 1654 | { |
1588 | Reg dest, left; | 1655 | Reg dest, left; |
@@ -1608,9 +1675,48 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | |||
1608 | } | 1675 | } |
1609 | } | 1676 | } |
1610 | 1677 | ||
1678 | #define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0) | ||
1679 | #define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1) | ||
1680 | #define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI) | ||
1681 | #define asm_brol(as, ir) \ | ||
1682 | asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \ | ||
1683 | PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) | ||
1684 | #define asm_bror(as, ir) lua_assert(0) | ||
1685 | |||
1686 | #if LJ_SOFTFP | ||
1687 | static void asm_sfpmin_max(ASMState *as, IRIns *ir) | ||
1688 | { | ||
1689 | CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp]; | ||
1690 | IRRef args[4]; | ||
1691 | MCLabel l_right, l_end; | ||
1692 | Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR); | ||
1693 | Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR); | ||
1694 | Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR); | ||
1695 | PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE; | ||
1696 | righthi = (lefthi >> 8); lefthi &= 255; | ||
1697 | rightlo = (leftlo >> 8); leftlo &= 255; | ||
1698 | args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; | ||
1699 | args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; | ||
1700 | l_end = emit_label(as); | ||
1701 | if (desthi != righthi) emit_mr(as, desthi, righthi); | ||
1702 | if (destlo != rightlo) emit_mr(as, destlo, rightlo); | ||
1703 | l_right = emit_label(as); | ||
1704 | if (l_end != l_right) emit_jmp(as, l_end); | ||
1705 | if (desthi != lefthi) emit_mr(as, desthi, lefthi); | ||
1706 | if (destlo != leftlo) emit_mr(as, destlo, leftlo); | ||
1707 | if (l_right == as->mcp+1) { | ||
1708 | cond ^= 4; l_right = l_end; ++as->mcp; | ||
1709 | } | ||
1710 | emit_condbranch(as, PPCI_BC, cond, l_right); | ||
1711 | ra_evictset(as, RSET_SCRATCH); | ||
1712 | emit_cmpi(as, RID_RET, 1); | ||
1713 | asm_gencall(as, &ci, args); | ||
1714 | } | ||
1715 | #endif | ||
1716 | |||
1611 | static void asm_min_max(ASMState *as, IRIns *ir, int ismax) | 1717 | static void asm_min_max(ASMState *as, IRIns *ir, int ismax) |
1612 | { | 1718 | { |
1613 | if (irt_isnum(ir->t)) { | 1719 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
1614 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1720 | Reg dest = ra_dest(as, ir, RSET_FPR); |
1615 | Reg tmp = dest; | 1721 | Reg tmp = dest; |
1616 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | 1722 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); |
@@ -1638,6 +1744,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) | |||
1638 | } | 1744 | } |
1639 | } | 1745 | } |
1640 | 1746 | ||
1747 | #define asm_min(as, ir) asm_min_max(as, ir, 0) | ||
1748 | #define asm_max(as, ir) asm_min_max(as, ir, 1) | ||
1749 | |||
1641 | /* -- Comparisons --------------------------------------------------------- */ | 1750 | /* -- Comparisons --------------------------------------------------------- */ |
1642 | 1751 | ||
1643 | #define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ | 1752 | #define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ |
@@ -1695,7 +1804,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc) | |||
1695 | static void asm_comp(ASMState *as, IRIns *ir) | 1804 | static void asm_comp(ASMState *as, IRIns *ir) |
1696 | { | 1805 | { |
1697 | PPCCC cc = asm_compmap[ir->o]; | 1806 | PPCCC cc = asm_compmap[ir->o]; |
1698 | if (irt_isnum(ir->t)) { | 1807 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
1699 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | 1808 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); |
1700 | right = (left >> 8); left &= 255; | 1809 | right = (left >> 8); left &= 255; |
1701 | asm_guardcc(as, (cc >> 4)); | 1810 | asm_guardcc(as, (cc >> 4)); |
@@ -1714,6 +1823,46 @@ static void asm_comp(ASMState *as, IRIns *ir) | |||
1714 | } | 1823 | } |
1715 | } | 1824 | } |
1716 | 1825 | ||
1826 | #define asm_equal(as, ir) asm_comp(as, ir) | ||
1827 | |||
1828 | #if LJ_SOFTFP | ||
1829 | /* SFP comparisons. */ | ||
1830 | static void asm_sfpcomp(ASMState *as, IRIns *ir) | ||
1831 | { | ||
1832 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; | ||
1833 | RegSet drop = RSET_SCRATCH; | ||
1834 | Reg r; | ||
1835 | IRRef args[4]; | ||
1836 | args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; | ||
1837 | args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; | ||
1838 | |||
1839 | for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { | ||
1840 | if (!rset_test(as->freeset, r) && | ||
1841 | regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) | ||
1842 | rset_clear(drop, r); | ||
1843 | } | ||
1844 | ra_evictset(as, drop); | ||
1845 | asm_setupresult(as, ir, ci); | ||
1846 | switch ((IROp)ir->o) { | ||
1847 | case IR_ULT: | ||
1848 | asm_guardcc(as, CC_EQ); | ||
1849 | emit_ai(as, PPCI_CMPWI, RID_RET, 0); | ||
1850 | case IR_ULE: | ||
1851 | asm_guardcc(as, CC_EQ); | ||
1852 | emit_ai(as, PPCI_CMPWI, RID_RET, 1); | ||
1853 | break; | ||
1854 | case IR_GE: case IR_GT: | ||
1855 | asm_guardcc(as, CC_EQ); | ||
1856 | emit_ai(as, PPCI_CMPWI, RID_RET, 2); | ||
1857 | default: | ||
1858 | asm_guardcc(as, (asm_compmap[ir->o] & 0xf)); | ||
1859 | emit_ai(as, PPCI_CMPWI, RID_RET, 0); | ||
1860 | break; | ||
1861 | } | ||
1862 | asm_gencall(as, ci, args); | ||
1863 | } | ||
1864 | #endif | ||
1865 | |||
1717 | #if LJ_HASFFI | 1866 | #if LJ_HASFFI |
1718 | /* 64 bit integer comparisons. */ | 1867 | /* 64 bit integer comparisons. */ |
1719 | static void asm_comp64(ASMState *as, IRIns *ir) | 1868 | static void asm_comp64(ASMState *as, IRIns *ir) |
@@ -1743,37 +1892,67 @@ static void asm_comp64(ASMState *as, IRIns *ir) | |||
1743 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ | 1892 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ |
1744 | static void asm_hiop(ASMState *as, IRIns *ir) | 1893 | static void asm_hiop(ASMState *as, IRIns *ir) |
1745 | { | 1894 | { |
1746 | #if LJ_HASFFI | 1895 | #if LJ_HASFFI || LJ_SOFTFP |
1747 | /* HIOP is marked as a store because it needs its own DCE logic. */ | 1896 | /* HIOP is marked as a store because it needs its own DCE logic. */ |
1748 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | 1897 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ |
1749 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | 1898 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; |
1750 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ | 1899 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ |
1751 | as->curins--; /* Always skip the CONV. */ | 1900 | as->curins--; /* Always skip the CONV. */ |
1901 | #if LJ_HASFFI && !LJ_SOFTFP | ||
1752 | if (usehi || uselo) | 1902 | if (usehi || uselo) |
1753 | asm_conv64(as, ir); | 1903 | asm_conv64(as, ir); |
1754 | return; | 1904 | return; |
1905 | #endif | ||
1755 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ | 1906 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ |
1756 | as->curins--; /* Always skip the loword comparison. */ | 1907 | as->curins--; /* Always skip the loword comparison. */ |
1908 | #if LJ_SOFTFP | ||
1909 | if (!irt_isint(ir->t)) { | ||
1910 | asm_sfpcomp(as, ir-1); | ||
1911 | return; | ||
1912 | } | ||
1913 | #endif | ||
1914 | #if LJ_HASFFI | ||
1757 | asm_comp64(as, ir); | 1915 | asm_comp64(as, ir); |
1916 | #endif | ||
1917 | return; | ||
1918 | #if LJ_SOFTFP | ||
1919 | } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { | ||
1920 | as->curins--; /* Always skip the loword min/max. */ | ||
1921 | if (uselo || usehi) | ||
1922 | asm_sfpmin_max(as, ir-1); | ||
1758 | return; | 1923 | return; |
1924 | #endif | ||
1759 | } else if ((ir-1)->o == IR_XSTORE) { | 1925 | } else if ((ir-1)->o == IR_XSTORE) { |
1760 | as->curins--; /* Handle both stores here. */ | 1926 | as->curins--; /* Handle both stores here. */ |
1761 | if ((ir-1)->r != RID_SINK) { | 1927 | if ((ir-1)->r != RID_SINK) { |
1762 | asm_xstore(as, ir, 0); | 1928 | asm_xstore_(as, ir, 0); |
1763 | asm_xstore(as, ir-1, 4); | 1929 | asm_xstore_(as, ir-1, 4); |
1764 | } | 1930 | } |
1765 | return; | 1931 | return; |
1766 | } | 1932 | } |
1767 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 1933 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
1768 | switch ((ir-1)->o) { | 1934 | switch ((ir-1)->o) { |
1935 | #if LJ_HASFFI | ||
1769 | case IR_ADD: as->curins--; asm_add64(as, ir); break; | 1936 | case IR_ADD: as->curins--; asm_add64(as, ir); break; |
1770 | case IR_SUB: as->curins--; asm_sub64(as, ir); break; | 1937 | case IR_SUB: as->curins--; asm_sub64(as, ir); break; |
1771 | case IR_NEG: as->curins--; asm_neg64(as, ir); break; | 1938 | case IR_NEG: as->curins--; asm_neg64(as, ir); break; |
1939 | #endif | ||
1940 | #if LJ_SOFTFP | ||
1941 | case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
1942 | case IR_STRTO: | ||
1943 | if (!uselo) | ||
1944 | ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ | ||
1945 | break; | ||
1946 | #endif | ||
1772 | case IR_CALLN: | 1947 | case IR_CALLN: |
1948 | case IR_CALLS: | ||
1773 | case IR_CALLXS: | 1949 | case IR_CALLXS: |
1774 | if (!uselo) | 1950 | if (!uselo) |
1775 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ | 1951 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ |
1776 | break; | 1952 | break; |
1953 | #if LJ_SOFTFP | ||
1954 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: | ||
1955 | #endif | ||
1777 | case IR_CNEWI: | 1956 | case IR_CNEWI: |
1778 | /* Nothing to do here. Handled by lo op itself. */ | 1957 | /* Nothing to do here. Handled by lo op itself. */ |
1779 | break; | 1958 | break; |
@@ -1784,6 +1963,17 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1784 | #endif | 1963 | #endif |
1785 | } | 1964 | } |
1786 | 1965 | ||
1966 | /* -- Profiling ----------------------------------------------------------- */ | ||
1967 | |||
1968 | static void asm_prof(ASMState *as, IRIns *ir) | ||
1969 | { | ||
1970 | UNUSED(ir); | ||
1971 | asm_guardcc(as, CC_NE); | ||
1972 | emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE); | ||
1973 | emit_lsglptr(as, PPCI_LBZ, RID_TMP, | ||
1974 | (int32_t)offsetof(global_State, hookmask)); | ||
1975 | } | ||
1976 | |||
1787 | /* -- Stack handling ------------------------------------------------------ */ | 1977 | /* -- Stack handling ------------------------------------------------------ */ |
1788 | 1978 | ||
1789 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ | 1979 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ |
@@ -1805,7 +1995,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
1805 | emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); | 1995 | emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); |
1806 | if (pbase == RID_TMP) | 1996 | if (pbase == RID_TMP) |
1807 | emit_getgl(as, RID_TMP, jit_base); | 1997 | emit_getgl(as, RID_TMP, jit_base); |
1808 | emit_getgl(as, tmp, jit_L); | 1998 | emit_getgl(as, tmp, cur_L); |
1809 | if (allow == RSET_EMPTY) /* Spill temp. register. */ | 1999 | if (allow == RSET_EMPTY) /* Spill temp. register. */ |
1810 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); | 2000 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); |
1811 | } | 2001 | } |
@@ -1826,8 +2016,19 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
1826 | if ((sn & SNAP_NORESTORE)) | 2016 | if ((sn & SNAP_NORESTORE)) |
1827 | continue; | 2017 | continue; |
1828 | if (irt_isnum(ir->t)) { | 2018 | if (irt_isnum(ir->t)) { |
2019 | #if LJ_SOFTFP | ||
2020 | Reg tmp; | ||
2021 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); | ||
2022 | lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ | ||
2023 | tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); | ||
2024 | emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); | ||
2025 | if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); | ||
2026 | tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); | ||
2027 | emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); | ||
2028 | #else | ||
1829 | Reg src = ra_alloc1(as, ref, RSET_FPR); | 2029 | Reg src = ra_alloc1(as, ref, RSET_FPR); |
1830 | emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); | 2030 | emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); |
2031 | #endif | ||
1831 | } else { | 2032 | } else { |
1832 | Reg type; | 2033 | Reg type; |
1833 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); | 2034 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); |
@@ -1840,6 +2041,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
1840 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { | 2041 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
1841 | if (s == 0) continue; /* Do not overwrite link to previous frame. */ | 2042 | if (s == 0) continue; /* Do not overwrite link to previous frame. */ |
1842 | type = ra_allock(as, (int32_t)(*flinks--), allow); | 2043 | type = ra_allock(as, (int32_t)(*flinks--), allow); |
2044 | #if LJ_SOFTFP | ||
2045 | } else if ((sn & SNAP_SOFTFPNUM)) { | ||
2046 | type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); | ||
2047 | #endif | ||
1843 | } else { | 2048 | } else { |
1844 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | 2049 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); |
1845 | } | 2050 | } |
@@ -1966,147 +2171,25 @@ static void asm_tail_prep(ASMState *as) | |||
1966 | } | 2171 | } |
1967 | } | 2172 | } |
1968 | 2173 | ||
1969 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
1970 | |||
1971 | /* Assemble a single instruction. */ | ||
1972 | static void asm_ir(ASMState *as, IRIns *ir) | ||
1973 | { | ||
1974 | switch ((IROp)ir->o) { | ||
1975 | /* Miscellaneous ops. */ | ||
1976 | case IR_LOOP: asm_loop(as); break; | ||
1977 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
1978 | case IR_USE: | ||
1979 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
1980 | case IR_PHI: asm_phi(as, ir); break; | ||
1981 | case IR_HIOP: asm_hiop(as, ir); break; | ||
1982 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
1983 | |||
1984 | /* Guarded assertions. */ | ||
1985 | case IR_EQ: case IR_NE: | ||
1986 | if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { | ||
1987 | as->curins--; | ||
1988 | asm_href(as, ir-1, (IROp)ir->o); | ||
1989 | break; | ||
1990 | } | ||
1991 | /* fallthrough */ | ||
1992 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
1993 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
1994 | case IR_ABC: | ||
1995 | asm_comp(as, ir); | ||
1996 | break; | ||
1997 | |||
1998 | case IR_RETF: asm_retf(as, ir); break; | ||
1999 | |||
2000 | /* Bit ops. */ | ||
2001 | case IR_BNOT: asm_bitnot(as, ir); break; | ||
2002 | case IR_BSWAP: asm_bitswap(as, ir); break; | ||
2003 | |||
2004 | case IR_BAND: asm_bitand(as, ir); break; | ||
2005 | case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break; | ||
2006 | case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break; | ||
2007 | |||
2008 | case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break; | ||
2009 | case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break; | ||
2010 | case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break; | ||
2011 | case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), | ||
2012 | PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break; | ||
2013 | case IR_BROR: lua_assert(0); break; | ||
2014 | |||
2015 | /* Arithmetic ops. */ | ||
2016 | case IR_ADD: asm_add(as, ir); break; | ||
2017 | case IR_SUB: asm_sub(as, ir); break; | ||
2018 | case IR_MUL: asm_mul(as, ir); break; | ||
2019 | case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break; | ||
2020 | case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break; | ||
2021 | case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break; | ||
2022 | case IR_NEG: asm_neg(as, ir); break; | ||
2023 | |||
2024 | case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break; | ||
2025 | case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break; | ||
2026 | case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break; | ||
2027 | case IR_MIN: asm_min_max(as, ir, 0); break; | ||
2028 | case IR_MAX: asm_min_max(as, ir, 1); break; | ||
2029 | case IR_FPMATH: | ||
2030 | if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) | ||
2031 | break; | ||
2032 | if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) | ||
2033 | asm_fpunary(as, ir, PPCI_FSQRT); | ||
2034 | else | ||
2035 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); | ||
2036 | break; | ||
2037 | |||
2038 | /* Overflow-checking arithmetic ops. */ | ||
2039 | case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break; | ||
2040 | case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break; | ||
2041 | case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break; | ||
2042 | |||
2043 | /* Memory references. */ | ||
2044 | case IR_AREF: asm_aref(as, ir); break; | ||
2045 | case IR_HREF: asm_href(as, ir, 0); break; | ||
2046 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
2047 | case IR_NEWREF: asm_newref(as, ir); break; | ||
2048 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
2049 | case IR_FREF: asm_fref(as, ir); break; | ||
2050 | case IR_STRREF: asm_strref(as, ir); break; | ||
2051 | |||
2052 | /* Loads and stores. */ | ||
2053 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
2054 | asm_ahuvload(as, ir); | ||
2055 | break; | ||
2056 | case IR_FLOAD: asm_fload(as, ir); break; | ||
2057 | case IR_XLOAD: asm_xload(as, ir); break; | ||
2058 | case IR_SLOAD: asm_sload(as, ir); break; | ||
2059 | |||
2060 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
2061 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
2062 | case IR_XSTORE: asm_xstore(as, ir, 0); break; | ||
2063 | |||
2064 | /* Allocations. */ | ||
2065 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
2066 | case IR_TNEW: asm_tnew(as, ir); break; | ||
2067 | case IR_TDUP: asm_tdup(as, ir); break; | ||
2068 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
2069 | |||
2070 | /* Write barriers. */ | ||
2071 | case IR_TBAR: asm_tbar(as, ir); break; | ||
2072 | case IR_OBAR: asm_obar(as, ir); break; | ||
2073 | |||
2074 | /* Type conversions. */ | ||
2075 | case IR_CONV: asm_conv(as, ir); break; | ||
2076 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
2077 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
2078 | case IR_STRTO: asm_strto(as, ir); break; | ||
2079 | |||
2080 | /* Calls. */ | ||
2081 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
2082 | case IR_CALLXS: asm_callx(as, ir); break; | ||
2083 | case IR_CARG: break; | ||
2084 | |||
2085 | default: | ||
2086 | setintV(&as->J->errinfo, ir->o); | ||
2087 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
2088 | break; | ||
2089 | } | ||
2090 | } | ||
2091 | |||
2092 | /* -- Trace setup --------------------------------------------------------- */ | 2174 | /* -- Trace setup --------------------------------------------------------- */ |
2093 | 2175 | ||
2094 | /* Ensure there are enough stack slots for call arguments. */ | 2176 | /* Ensure there are enough stack slots for call arguments. */ |
2095 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | 2177 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) |
2096 | { | 2178 | { |
2097 | IRRef args[CCI_NARGS_MAX*2]; | 2179 | IRRef args[CCI_NARGS_MAX*2]; |
2098 | uint32_t i, nargs = (int)CCI_NARGS(ci); | 2180 | uint32_t i, nargs = CCI_XNARGS(ci); |
2099 | int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; | 2181 | int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; |
2100 | asm_collectargs(as, ir, ci, args); | 2182 | asm_collectargs(as, ir, ci, args); |
2101 | for (i = 0; i < nargs; i++) | 2183 | for (i = 0; i < nargs; i++) |
2102 | if (args[i] && irt_isfp(IR(args[i])->t)) { | 2184 | if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { |
2103 | if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; | 2185 | if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; |
2104 | } else { | 2186 | } else { |
2105 | if (ngpr > 0) ngpr--; else nslots++; | 2187 | if (ngpr > 0) ngpr--; else nslots++; |
2106 | } | 2188 | } |
2107 | if (nslots > as->evenspill) /* Leave room for args in stack slots. */ | 2189 | if (nslots > as->evenspill) /* Leave room for args in stack slots. */ |
2108 | as->evenspill = nslots; | 2190 | as->evenspill = nslots; |
2109 | return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); | 2191 | return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) : |
2192 | REGSP_HINT(RID_RET); | ||
2110 | } | 2193 | } |
2111 | 2194 | ||
2112 | static void asm_setup_target(ASMState *as) | 2195 | static void asm_setup_target(ASMState *as) |