diff options
Diffstat (limited to 'src/lj_asm_ppc.h')
-rw-r--r-- | src/lj_asm_ppc.h | 913 |
1 files changed, 529 insertions, 384 deletions
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 5fd35d2e..5ea4d47d 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h | |||
@@ -156,6 +156,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) | |||
156 | return ra_allock(as, ofs-(int16_t)ofs, allow); | 156 | return ra_allock(as, ofs-(int16_t)ofs, allow); |
157 | } | 157 | } |
158 | } | 158 | } |
159 | } else if (ir->o == IR_TMPREF) { | ||
160 | *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768); | ||
161 | return RID_JGL; | ||
159 | } | 162 | } |
160 | } | 163 | } |
161 | *ofsp = 0; | 164 | *ofsp = 0; |
@@ -181,7 +184,7 @@ static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref, | |||
181 | return; | 184 | return; |
182 | } | 185 | } |
183 | } else if (ir->o == IR_STRREF) { | 186 | } else if (ir->o == IR_STRREF) { |
184 | lua_assert(ofs == 0); | 187 | lj_assertA(ofs == 0, "bad usage"); |
185 | ofs = (int32_t)sizeof(GCstr); | 188 | ofs = (int32_t)sizeof(GCstr); |
186 | if (irref_isk(ir->op2)) { | 189 | if (irref_isk(ir->op2)) { |
187 | ofs += IR(ir->op2)->i; | 190 | ofs += IR(ir->op2)->i; |
@@ -226,6 +229,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, | |||
226 | emit_tab(as, pi, rt, left, right); | 229 | emit_tab(as, pi, rt, left, right); |
227 | } | 230 | } |
228 | 231 | ||
232 | #if !LJ_SOFTFP | ||
229 | /* Fuse to multiply-add/sub instruction. */ | 233 | /* Fuse to multiply-add/sub instruction. */ |
230 | static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) | 234 | static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) |
231 | { | 235 | { |
@@ -245,24 +249,30 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) | |||
245 | } | 249 | } |
246 | return 0; | 250 | return 0; |
247 | } | 251 | } |
252 | #endif | ||
248 | 253 | ||
249 | /* -- Calls --------------------------------------------------------------- */ | 254 | /* -- Calls --------------------------------------------------------------- */ |
250 | 255 | ||
251 | /* Generate a call to a C function. */ | 256 | /* Generate a call to a C function. */ |
252 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | 257 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) |
253 | { | 258 | { |
254 | uint32_t n, nargs = CCI_NARGS(ci); | 259 | uint32_t n, nargs = CCI_XNARGS(ci); |
255 | int32_t ofs = 8; | 260 | int32_t ofs = 8; |
256 | Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; | 261 | Reg gpr = REGARG_FIRSTGPR; |
262 | #if !LJ_SOFTFP | ||
263 | Reg fpr = REGARG_FIRSTFPR; | ||
264 | #endif | ||
257 | if ((void *)ci->func) | 265 | if ((void *)ci->func) |
258 | emit_call(as, (void *)ci->func); | 266 | emit_call(as, (void *)ci->func); |
259 | for (n = 0; n < nargs; n++) { /* Setup args. */ | 267 | for (n = 0; n < nargs; n++) { /* Setup args. */ |
260 | IRRef ref = args[n]; | 268 | IRRef ref = args[n]; |
261 | if (ref) { | 269 | if (ref) { |
262 | IRIns *ir = IR(ref); | 270 | IRIns *ir = IR(ref); |
271 | #if !LJ_SOFTFP | ||
263 | if (irt_isfp(ir->t)) { | 272 | if (irt_isfp(ir->t)) { |
264 | if (fpr <= REGARG_LASTFPR) { | 273 | if (fpr <= REGARG_LASTFPR) { |
265 | lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ | 274 | lj_assertA(rset_test(as->freeset, fpr), |
275 | "reg %d not free", fpr); /* Already evicted. */ | ||
266 | ra_leftov(as, fpr, ref); | 276 | ra_leftov(as, fpr, ref); |
267 | fpr++; | 277 | fpr++; |
268 | } else { | 278 | } else { |
@@ -271,9 +281,12 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
271 | emit_spstore(as, ir, r, ofs); | 281 | emit_spstore(as, ir, r, ofs); |
272 | ofs += irt_isnum(ir->t) ? 8 : 4; | 282 | ofs += irt_isnum(ir->t) ? 8 : 4; |
273 | } | 283 | } |
274 | } else { | 284 | } else |
285 | #endif | ||
286 | { | ||
275 | if (gpr <= REGARG_LASTGPR) { | 287 | if (gpr <= REGARG_LASTGPR) { |
276 | lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ | 288 | lj_assertA(rset_test(as->freeset, gpr), |
289 | "reg %d not free", gpr); /* Already evicted. */ | ||
277 | ra_leftov(as, gpr, ref); | 290 | ra_leftov(as, gpr, ref); |
278 | gpr++; | 291 | gpr++; |
279 | } else { | 292 | } else { |
@@ -290,8 +303,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
290 | } | 303 | } |
291 | checkmclim(as); | 304 | checkmclim(as); |
292 | } | 305 | } |
306 | #if !LJ_SOFTFP | ||
293 | if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ | 307 | if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ |
294 | emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); | 308 | emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); |
309 | #endif | ||
295 | } | 310 | } |
296 | 311 | ||
297 | /* Setup result reg/sp for call. Evict scratch regs. */ | 312 | /* Setup result reg/sp for call. Evict scratch regs. */ |
@@ -299,16 +314,18 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
299 | { | 314 | { |
300 | RegSet drop = RSET_SCRATCH; | 315 | RegSet drop = RSET_SCRATCH; |
301 | int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); | 316 | int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); |
317 | #if !LJ_SOFTFP | ||
302 | if ((ci->flags & CCI_NOFPRCLOBBER)) | 318 | if ((ci->flags & CCI_NOFPRCLOBBER)) |
303 | drop &= ~RSET_FPR; | 319 | drop &= ~RSET_FPR; |
320 | #endif | ||
304 | if (ra_hasreg(ir->r)) | 321 | if (ra_hasreg(ir->r)) |
305 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 322 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
306 | if (hiop && ra_hasreg((ir+1)->r)) | 323 | if (hiop && ra_hasreg((ir+1)->r)) |
307 | rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ | 324 | rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ |
308 | ra_evictset(as, drop); /* Evictions must be performed first. */ | 325 | ra_evictset(as, drop); /* Evictions must be performed first. */ |
309 | if (ra_used(ir)) { | 326 | if (ra_used(ir)) { |
310 | lua_assert(!irt_ispri(ir->t)); | 327 | lj_assertA(!irt_ispri(ir->t), "PRI dest"); |
311 | if (irt_isfp(ir->t)) { | 328 | if (!LJ_SOFTFP && irt_isfp(ir->t)) { |
312 | if ((ci->flags & CCI_CASTU64)) { | 329 | if ((ci->flags & CCI_CASTU64)) { |
313 | /* Use spill slot or temp slots. */ | 330 | /* Use spill slot or temp slots. */ |
314 | int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; | 331 | int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; |
@@ -331,15 +348,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
331 | } | 348 | } |
332 | } | 349 | } |
333 | 350 | ||
334 | static void asm_call(ASMState *as, IRIns *ir) | ||
335 | { | ||
336 | IRRef args[CCI_NARGS_MAX]; | ||
337 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
338 | asm_collectargs(as, ir, ci, args); | ||
339 | asm_setupresult(as, ir, ci); | ||
340 | asm_gencall(as, ci, args); | ||
341 | } | ||
342 | |||
343 | static void asm_callx(ASMState *as, IRIns *ir) | 351 | static void asm_callx(ASMState *as, IRIns *ir) |
344 | { | 352 | { |
345 | IRRef args[CCI_NARGS_MAX*2]; | 353 | IRRef args[CCI_NARGS_MAX*2]; |
@@ -352,7 +360,7 @@ static void asm_callx(ASMState *as, IRIns *ir) | |||
352 | func = ir->op2; irf = IR(func); | 360 | func = ir->op2; irf = IR(func); |
353 | if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } | 361 | if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } |
354 | if (irref_isk(func)) { /* Call to constant address. */ | 362 | if (irref_isk(func)) { /* Call to constant address. */ |
355 | ci.func = (ASMFunction)(void *)(irf->i); | 363 | ci.func = (ASMFunction)(void *)(intptr_t)(irf->i); |
356 | } else { /* Need a non-argument register for indirect calls. */ | 364 | } else { /* Need a non-argument register for indirect calls. */ |
357 | RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); | 365 | RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); |
358 | Reg freg = ra_alloc1(as, func, allow); | 366 | Reg freg = ra_alloc1(as, func, allow); |
@@ -363,16 +371,6 @@ static void asm_callx(ASMState *as, IRIns *ir) | |||
363 | asm_gencall(as, &ci, args); | 371 | asm_gencall(as, &ci, args); |
364 | } | 372 | } |
365 | 373 | ||
366 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | ||
367 | { | ||
368 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
369 | IRRef args[2]; | ||
370 | args[0] = ir->op1; | ||
371 | args[1] = ir->op2; | ||
372 | asm_setupresult(as, ir, ci); | ||
373 | asm_gencall(as, ci, args); | ||
374 | } | ||
375 | |||
376 | /* -- Returns ------------------------------------------------------------- */ | 374 | /* -- Returns ------------------------------------------------------------- */ |
377 | 375 | ||
378 | /* Return to lower frame. Guard that it goes to the right spot. */ | 376 | /* Return to lower frame. Guard that it goes to the right spot. */ |
@@ -380,7 +378,7 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
380 | { | 378 | { |
381 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | 379 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); |
382 | void *pc = ir_kptr(IR(ir->op2)); | 380 | void *pc = ir_kptr(IR(ir->op2)); |
383 | int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); | 381 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); |
384 | as->topslot -= (BCReg)delta; | 382 | as->topslot -= (BCReg)delta; |
385 | if ((int32_t)as->topslot < 0) as->topslot = 0; | 383 | if ((int32_t)as->topslot < 0) as->topslot = 0; |
386 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | 384 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ |
@@ -392,8 +390,24 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
392 | emit_tai(as, PPCI_LWZ, RID_TMP, base, -8); | 390 | emit_tai(as, PPCI_LWZ, RID_TMP, base, -8); |
393 | } | 391 | } |
394 | 392 | ||
393 | /* -- Buffer operations --------------------------------------------------- */ | ||
394 | |||
395 | #if LJ_HASBUFFER | ||
396 | static void asm_bufhdr_write(ASMState *as, Reg sb) | ||
397 | { | ||
398 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); | ||
399 | IRIns irgc; | ||
400 | irgc.ot = IRT(0, IRT_PGC); /* GC type. */ | ||
401 | emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); | ||
402 | emit_rot(as, PPCI_RLWIMI, RID_TMP, tmp, 0, 31-lj_fls(SBUF_MASK_FLAG), 31); | ||
403 | emit_getgl(as, RID_TMP, cur_L); | ||
404 | emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); | ||
405 | } | ||
406 | #endif | ||
407 | |||
395 | /* -- Type conversions ---------------------------------------------------- */ | 408 | /* -- Type conversions ---------------------------------------------------- */ |
396 | 409 | ||
410 | #if !LJ_SOFTFP | ||
397 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | 411 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) |
398 | { | 412 | { |
399 | RegSet allow = RSET_FPR; | 413 | RegSet allow = RSET_FPR; |
@@ -410,8 +424,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | |||
410 | emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); | 424 | emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); |
411 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | 425 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); |
412 | emit_lsptr(as, PPCI_LFS, (fbias & 31), | 426 | emit_lsptr(as, PPCI_LFS, (fbias & 31), |
413 | (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), | 427 | (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR); |
414 | RSET_GPR); | ||
415 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | 428 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); |
416 | emit_fb(as, PPCI_FCTIWZ, tmp, left); | 429 | emit_fb(as, PPCI_FCTIWZ, tmp, left); |
417 | } | 430 | } |
@@ -427,15 +440,27 @@ static void asm_tobit(ASMState *as, IRIns *ir) | |||
427 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | 440 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); |
428 | emit_fab(as, PPCI_FADD, tmp, left, right); | 441 | emit_fab(as, PPCI_FADD, tmp, left, right); |
429 | } | 442 | } |
443 | #endif | ||
430 | 444 | ||
431 | static void asm_conv(ASMState *as, IRIns *ir) | 445 | static void asm_conv(ASMState *as, IRIns *ir) |
432 | { | 446 | { |
433 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | 447 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
448 | #if !LJ_SOFTFP | ||
434 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); | 449 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); |
450 | #endif | ||
435 | IRRef lref = ir->op1; | 451 | IRRef lref = ir->op1; |
436 | lua_assert(irt_type(ir->t) != st); | 452 | /* 64 bit integer conversions are handled by SPLIT. */ |
437 | lua_assert(!(irt_isint64(ir->t) || | 453 | lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)), |
438 | (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ | 454 | "IR %04d has unsplit 64 bit type", |
455 | (int)(ir - as->ir) - REF_BIAS); | ||
456 | #if LJ_SOFTFP | ||
457 | /* FP conversions are handled by SPLIT. */ | ||
458 | lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), | ||
459 | "IR %04d has FP type", | ||
460 | (int)(ir - as->ir) - REF_BIAS); | ||
461 | /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ | ||
462 | #else | ||
463 | lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); | ||
439 | if (irt_isfp(ir->t)) { | 464 | if (irt_isfp(ir->t)) { |
440 | Reg dest = ra_dest(as, ir, RSET_FPR); | 465 | Reg dest = ra_dest(as, ir, RSET_FPR); |
441 | if (stfp) { /* FP to FP conversion. */ | 466 | if (stfp) { /* FP to FP conversion. */ |
@@ -450,13 +475,11 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
450 | Reg left = ra_alloc1(as, lref, allow); | 475 | Reg left = ra_alloc1(as, lref, allow); |
451 | Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); | 476 | Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); |
452 | Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); | 477 | Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); |
453 | const float *kbias; | ||
454 | if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); | 478 | if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); |
455 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); | 479 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); |
456 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); | 480 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); |
457 | kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000)); | 481 | emit_lsptr(as, PPCI_LFS, (fbias & 31), |
458 | if (st == IRT_U32) kbias++; | 482 | &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31], |
459 | emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias, | ||
460 | rset_clear(allow, hibias)); | 483 | rset_clear(allow, hibias)); |
461 | emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, | 484 | emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, |
462 | RID_SP, SPOFS_TMPLO); | 485 | RID_SP, SPOFS_TMPLO); |
@@ -466,7 +489,8 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
466 | } else if (stfp) { /* FP to integer conversion. */ | 489 | } else if (stfp) { /* FP to integer conversion. */ |
467 | if (irt_isguard(ir->t)) { | 490 | if (irt_isguard(ir->t)) { |
468 | /* Checked conversions are only supported from number to int. */ | 491 | /* Checked conversions are only supported from number to int. */ |
469 | lua_assert(irt_isint(ir->t) && st == IRT_NUM); | 492 | lj_assertA(irt_isint(ir->t) && st == IRT_NUM, |
493 | "bad type for checked CONV"); | ||
470 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | 494 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); |
471 | } else { | 495 | } else { |
472 | Reg dest = ra_dest(as, ir, RSET_GPR); | 496 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -489,19 +513,20 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
489 | emit_fb(as, PPCI_FCTIWZ, tmp, tmp); | 513 | emit_fb(as, PPCI_FCTIWZ, tmp, tmp); |
490 | emit_fab(as, PPCI_FSUB, tmp, left, tmp); | 514 | emit_fab(as, PPCI_FSUB, tmp, left, tmp); |
491 | emit_lsptr(as, PPCI_LFS, (tmp & 31), | 515 | emit_lsptr(as, PPCI_LFS, (tmp & 31), |
492 | (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)), | 516 | (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); |
493 | RSET_GPR); | ||
494 | } else { | 517 | } else { |
495 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | 518 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); |
496 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | 519 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); |
497 | emit_fb(as, PPCI_FCTIWZ, tmp, left); | 520 | emit_fb(as, PPCI_FCTIWZ, tmp, left); |
498 | } | 521 | } |
499 | } | 522 | } |
500 | } else { | 523 | } else |
524 | #endif | ||
525 | { | ||
501 | Reg dest = ra_dest(as, ir, RSET_GPR); | 526 | Reg dest = ra_dest(as, ir, RSET_GPR); |
502 | if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | 527 | if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ |
503 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | 528 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); |
504 | lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); | 529 | lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); |
505 | if ((ir->op2 & IRCONV_SEXT)) | 530 | if ((ir->op2 & IRCONV_SEXT)) |
506 | emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left); | 531 | emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left); |
507 | else | 532 | else |
@@ -513,90 +538,102 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
513 | } | 538 | } |
514 | } | 539 | } |
515 | 540 | ||
516 | #if LJ_HASFFI | ||
517 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
518 | { | ||
519 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
520 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
521 | IRCallID id; | ||
522 | const CCallInfo *ci; | ||
523 | IRRef args[2]; | ||
524 | args[0] = ir->op1; | ||
525 | args[1] = (ir-1)->op1; | ||
526 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
527 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
528 | ir--; | ||
529 | } else { | ||
530 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
531 | } | ||
532 | ci = &lj_ir_callinfo[id]; | ||
533 | asm_setupresult(as, ir, ci); | ||
534 | asm_gencall(as, ci, args); | ||
535 | } | ||
536 | #endif | ||
537 | |||
538 | static void asm_strto(ASMState *as, IRIns *ir) | 541 | static void asm_strto(ASMState *as, IRIns *ir) |
539 | { | 542 | { |
540 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; | 543 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; |
541 | IRRef args[2]; | 544 | IRRef args[2]; |
542 | int32_t ofs; | 545 | int32_t ofs = SPOFS_TMP; |
546 | #if LJ_SOFTFP | ||
547 | ra_evictset(as, RSET_SCRATCH); | ||
548 | if (ra_used(ir)) { | ||
549 | if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && | ||
550 | (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { | ||
551 | int i; | ||
552 | for (i = 0; i < 2; i++) { | ||
553 | Reg r = (ir+i)->r; | ||
554 | if (ra_hasreg(r)) { | ||
555 | ra_free(as, r); | ||
556 | ra_modified(as, r); | ||
557 | emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); | ||
558 | } | ||
559 | } | ||
560 | ofs = sps_scale(ir->s & ~1); | ||
561 | } else { | ||
562 | Reg rhi = ra_dest(as, ir+1, RSET_GPR); | ||
563 | Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); | ||
564 | emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs); | ||
565 | emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4); | ||
566 | } | ||
567 | } | ||
568 | #else | ||
543 | RegSet drop = RSET_SCRATCH; | 569 | RegSet drop = RSET_SCRATCH; |
544 | if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ | 570 | if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ |
545 | ra_evictset(as, drop); | 571 | ra_evictset(as, drop); |
572 | if (ir->s) ofs = sps_scale(ir->s); | ||
573 | #endif | ||
546 | asm_guardcc(as, CC_EQ); | 574 | asm_guardcc(as, CC_EQ); |
547 | emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ | 575 | emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ |
548 | args[0] = ir->op1; /* GCstr *str */ | 576 | args[0] = ir->op1; /* GCstr *str */ |
549 | args[1] = ASMREF_TMP1; /* TValue *n */ | 577 | args[1] = ASMREF_TMP1; /* TValue *n */ |
550 | asm_gencall(as, ci, args); | 578 | asm_gencall(as, ci, args); |
551 | /* Store the result to the spill slot or temp slots. */ | 579 | /* Store the result to the spill slot or temp slots. */ |
552 | ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; | ||
553 | emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); | 580 | emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); |
554 | } | 581 | } |
555 | 582 | ||
583 | /* -- Memory references --------------------------------------------------- */ | ||
584 | |||
556 | /* Get pointer to TValue. */ | 585 | /* Get pointer to TValue. */ |
557 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | 586 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) |
558 | { | 587 | { |
559 | IRIns *ir = IR(ref); | 588 | int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768); |
560 | if (irt_isnum(ir->t)) { | 589 | if ((mode & IRTMPREF_IN1)) { |
561 | if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ | 590 | IRIns *ir = IR(ref); |
562 | ra_allockreg(as, i32ptr(ir_knum(ir)), dest); | 591 | if (irt_isnum(ir->t)) { |
563 | else /* Otherwise force a spill and use the spill slot. */ | 592 | if ((mode & IRTMPREF_OUT1)) { |
564 | emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir)); | 593 | #if LJ_SOFTFP |
565 | } else { | 594 | lj_assertA(irref_isk(ref), "unsplit FP op"); |
566 | /* Otherwise use g->tmptv to hold the TValue. */ | 595 | emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); |
567 | RegSet allow = rset_exclude(RSET_GPR, dest); | 596 | emit_setgl(as, |
568 | Reg type; | 597 | ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR), |
569 | emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768); | 598 | tmptv.u32.lo); |
570 | if (!irt_ispri(ir->t)) { | 599 | emit_setgl(as, |
571 | Reg src = ra_alloc1(as, ref, allow); | 600 | ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR), |
572 | emit_setgl(as, src, tmptv.gcr); | 601 | tmptv.u32.hi); |
602 | #else | ||
603 | Reg src = ra_alloc1(as, ref, RSET_FPR); | ||
604 | emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); | ||
605 | emit_fai(as, PPCI_STFD, src, RID_JGL, tmpofs); | ||
606 | #endif | ||
607 | } else if (irref_isk(ref)) { | ||
608 | /* Use the number constant itself as a TValue. */ | ||
609 | ra_allockreg(as, i32ptr(ir_knum(ir)), dest); | ||
610 | } else { | ||
611 | #if LJ_SOFTFP | ||
612 | lj_assertA(0, "unsplit FP op"); | ||
613 | #else | ||
614 | /* Otherwise force a spill and use the spill slot. */ | ||
615 | emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir)); | ||
616 | #endif | ||
617 | } | ||
618 | } else { | ||
619 | /* Otherwise use g->tmptv to hold the TValue. */ | ||
620 | Reg type; | ||
621 | emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); | ||
622 | if (!irt_ispri(ir->t)) { | ||
623 | Reg src = ra_alloc1(as, ref, RSET_GPR); | ||
624 | emit_setgl(as, src, tmptv.gcr); | ||
625 | } | ||
626 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)) | ||
627 | type = ra_alloc1(as, ref+1, RSET_GPR); | ||
628 | else | ||
629 | type = ra_allock(as, irt_toitype(ir->t), RSET_GPR); | ||
630 | emit_setgl(as, type, tmptv.it); | ||
573 | } | 631 | } |
574 | type = ra_allock(as, irt_toitype(ir->t), allow); | ||
575 | emit_setgl(as, type, tmptv.it); | ||
576 | } | ||
577 | } | ||
578 | |||
579 | static void asm_tostr(ASMState *as, IRIns *ir) | ||
580 | { | ||
581 | IRRef args[2]; | ||
582 | args[0] = ASMREF_L; | ||
583 | as->gcsteps++; | ||
584 | if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) { | ||
585 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; | ||
586 | args[1] = ASMREF_TMP1; /* const lua_Number * */ | ||
587 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
588 | asm_gencall(as, ci, args); | ||
589 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); | ||
590 | } else { | 632 | } else { |
591 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; | 633 | emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); |
592 | args[1] = ir->op1; /* int32_t k */ | ||
593 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
594 | asm_gencall(as, ci, args); | ||
595 | } | 634 | } |
596 | } | 635 | } |
597 | 636 | ||
598 | /* -- Memory references --------------------------------------------------- */ | ||
599 | |||
600 | static void asm_aref(ASMState *as, IRIns *ir) | 637 | static void asm_aref(ASMState *as, IRIns *ir) |
601 | { | 638 | { |
602 | Reg dest = ra_dest(as, ir, RSET_GPR); | 639 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -636,11 +673,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
636 | Reg tisnum = RID_NONE, tmpnum = RID_NONE; | 673 | Reg tisnum = RID_NONE, tmpnum = RID_NONE; |
637 | IRRef refkey = ir->op2; | 674 | IRRef refkey = ir->op2; |
638 | IRIns *irkey = IR(refkey); | 675 | IRIns *irkey = IR(refkey); |
676 | int isk = irref_isk(refkey); | ||
639 | IRType1 kt = irkey->t; | 677 | IRType1 kt = irkey->t; |
640 | uint32_t khash; | 678 | uint32_t khash; |
641 | MCLabel l_end, l_loop, l_next; | 679 | MCLabel l_end, l_loop, l_next; |
642 | 680 | ||
643 | rset_clear(allow, tab); | 681 | rset_clear(allow, tab); |
682 | #if LJ_SOFTFP | ||
683 | if (!isk) { | ||
684 | key = ra_alloc1(as, refkey, allow); | ||
685 | rset_clear(allow, key); | ||
686 | if (irkey[1].o == IR_HIOP) { | ||
687 | if (ra_hasreg((irkey+1)->r)) { | ||
688 | tmpnum = (irkey+1)->r; | ||
689 | ra_noweak(as, tmpnum); | ||
690 | } else { | ||
691 | tmpnum = ra_allocref(as, refkey+1, allow); | ||
692 | } | ||
693 | rset_clear(allow, tmpnum); | ||
694 | } | ||
695 | } | ||
696 | #else | ||
644 | if (irt_isnum(kt)) { | 697 | if (irt_isnum(kt)) { |
645 | key = ra_alloc1(as, refkey, RSET_FPR); | 698 | key = ra_alloc1(as, refkey, RSET_FPR); |
646 | tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); | 699 | tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); |
@@ -650,6 +703,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
650 | key = ra_alloc1(as, refkey, allow); | 703 | key = ra_alloc1(as, refkey, allow); |
651 | rset_clear(allow, key); | 704 | rset_clear(allow, key); |
652 | } | 705 | } |
706 | #endif | ||
653 | tmp2 = ra_scratch(as, allow); | 707 | tmp2 = ra_scratch(as, allow); |
654 | rset_clear(allow, tmp2); | 708 | rset_clear(allow, tmp2); |
655 | 709 | ||
@@ -672,7 +726,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
672 | asm_guardcc(as, CC_EQ); | 726 | asm_guardcc(as, CC_EQ); |
673 | else | 727 | else |
674 | emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); | 728 | emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); |
675 | if (irt_isnum(kt)) { | 729 | if (!LJ_SOFTFP && irt_isnum(kt)) { |
676 | emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); | 730 | emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); |
677 | emit_condbranch(as, PPCI_BC, CC_GE, l_next); | 731 | emit_condbranch(as, PPCI_BC, CC_GE, l_next); |
678 | emit_ab(as, PPCI_CMPLW, tmp1, tisnum); | 732 | emit_ab(as, PPCI_CMPLW, tmp1, tisnum); |
@@ -682,7 +736,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
682 | emit_ab(as, PPCI_CMPW, tmp2, key); | 736 | emit_ab(as, PPCI_CMPW, tmp2, key); |
683 | emit_condbranch(as, PPCI_BC, CC_NE, l_next); | 737 | emit_condbranch(as, PPCI_BC, CC_NE, l_next); |
684 | } | 738 | } |
685 | emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); | 739 | if (LJ_SOFTFP && ra_hasreg(tmpnum)) |
740 | emit_ab(as, PPCI_CMPW, tmp1, tmpnum); | ||
741 | else | ||
742 | emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); | ||
686 | if (!irt_ispri(kt)) | 743 | if (!irt_ispri(kt)) |
687 | emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); | 744 | emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); |
688 | } | 745 | } |
@@ -691,35 +748,41 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
691 | (((char *)as->mcp-(char *)l_loop) & 0xffffu); | 748 | (((char *)as->mcp-(char *)l_loop) & 0xffffu); |
692 | 749 | ||
693 | /* Load main position relative to tab->node into dest. */ | 750 | /* Load main position relative to tab->node into dest. */ |
694 | khash = irref_isk(refkey) ? ir_khash(irkey) : 1; | 751 | khash = isk ? ir_khash(as, irkey) : 1; |
695 | if (khash == 0) { | 752 | if (khash == 0) { |
696 | emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); | 753 | emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); |
697 | } else { | 754 | } else { |
698 | Reg tmphash = tmp1; | 755 | Reg tmphash = tmp1; |
699 | if (irref_isk(refkey)) | 756 | if (isk) |
700 | tmphash = ra_allock(as, khash, allow); | 757 | tmphash = ra_allock(as, khash, allow); |
701 | emit_tab(as, PPCI_ADD, dest, dest, tmp1); | 758 | emit_tab(as, PPCI_ADD, dest, dest, tmp1); |
702 | emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); | 759 | emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); |
703 | emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); | 760 | emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); |
704 | emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); | 761 | emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); |
705 | emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); | 762 | emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); |
706 | if (irref_isk(refkey)) { | 763 | if (isk) { |
707 | /* Nothing to do. */ | 764 | /* Nothing to do. */ |
708 | } else if (irt_isstr(kt)) { | 765 | } else if (irt_isstr(kt)) { |
709 | emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); | 766 | emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, sid)); |
710 | } else { /* Must match with hash*() in lj_tab.c. */ | 767 | } else { /* Must match with hash*() in lj_tab.c. */ |
711 | emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1); | 768 | emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1); |
712 | emit_rotlwi(as, tmp2, tmp2, HASH_ROT3); | 769 | emit_rotlwi(as, tmp2, tmp2, HASH_ROT3); |
713 | emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); | 770 | emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); |
714 | emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); | 771 | emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); |
715 | emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); | 772 | emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); |
716 | if (irt_isnum(kt)) { | 773 | if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { |
774 | #if LJ_SOFTFP | ||
775 | emit_asb(as, PPCI_XOR, tmp2, key, tmp1); | ||
776 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); | ||
777 | emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum); | ||
778 | #else | ||
717 | int32_t ofs = ra_spill(as, irkey); | 779 | int32_t ofs = ra_spill(as, irkey); |
718 | emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); | 780 | emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); |
719 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); | 781 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); |
720 | emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); | 782 | emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); |
721 | emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); | 783 | emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); |
722 | emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); | 784 | emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); |
785 | #endif | ||
723 | } else { | 786 | } else { |
724 | emit_asb(as, PPCI_XOR, tmp2, key, tmp1); | 787 | emit_asb(as, PPCI_XOR, tmp2, key, tmp1); |
725 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); | 788 | emit_rotlwi(as, dest, tmp1, HASH_ROT1); |
@@ -740,7 +803,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
740 | Reg node = ra_alloc1(as, ir->op1, RSET_GPR); | 803 | Reg node = ra_alloc1(as, ir->op1, RSET_GPR); |
741 | Reg key = RID_NONE, type = RID_TMP, idx = node; | 804 | Reg key = RID_NONE, type = RID_TMP, idx = node; |
742 | RegSet allow = rset_exclude(RSET_GPR, node); | 805 | RegSet allow = rset_exclude(RSET_GPR, node); |
743 | lua_assert(ofs % sizeof(Node) == 0); | 806 | lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); |
744 | if (ofs > 32736) { | 807 | if (ofs > 32736) { |
745 | idx = dest; | 808 | idx = dest; |
746 | rset_clear(allow, dest); | 809 | rset_clear(allow, dest); |
@@ -773,20 +836,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
773 | } | 836 | } |
774 | } | 837 | } |
775 | 838 | ||
776 | static void asm_newref(ASMState *as, IRIns *ir) | ||
777 | { | ||
778 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
779 | IRRef args[3]; | ||
780 | if (ir->r == RID_SINK) | ||
781 | return; | ||
782 | args[0] = ASMREF_L; /* lua_State *L */ | ||
783 | args[1] = ir->op1; /* GCtab *t */ | ||
784 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
785 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
786 | asm_gencall(as, ci, args); | ||
787 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | ||
788 | } | ||
789 | |||
790 | static void asm_uref(ASMState *as, IRIns *ir) | 839 | static void asm_uref(ASMState *as, IRIns *ir) |
791 | { | 840 | { |
792 | Reg dest = ra_dest(as, ir, RSET_GPR); | 841 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -813,7 +862,7 @@ static void asm_uref(ASMState *as, IRIns *ir) | |||
813 | static void asm_fref(ASMState *as, IRIns *ir) | 862 | static void asm_fref(ASMState *as, IRIns *ir) |
814 | { | 863 | { |
815 | UNUSED(as); UNUSED(ir); | 864 | UNUSED(as); UNUSED(ir); |
816 | lua_assert(!ra_used(ir)); | 865 | lj_assertA(!ra_used(ir), "unfused FREF"); |
817 | } | 866 | } |
818 | 867 | ||
819 | static void asm_strref(ASMState *as, IRIns *ir) | 868 | static void asm_strref(ASMState *as, IRIns *ir) |
@@ -853,26 +902,28 @@ static void asm_strref(ASMState *as, IRIns *ir) | |||
853 | 902 | ||
854 | /* -- Loads and stores ---------------------------------------------------- */ | 903 | /* -- Loads and stores ---------------------------------------------------- */ |
855 | 904 | ||
856 | static PPCIns asm_fxloadins(IRIns *ir) | 905 | static PPCIns asm_fxloadins(ASMState *as, IRIns *ir) |
857 | { | 906 | { |
907 | UNUSED(as); | ||
858 | switch (irt_type(ir->t)) { | 908 | switch (irt_type(ir->t)) { |
859 | case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */ | 909 | case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */ |
860 | case IRT_U8: return PPCI_LBZ; | 910 | case IRT_U8: return PPCI_LBZ; |
861 | case IRT_I16: return PPCI_LHA; | 911 | case IRT_I16: return PPCI_LHA; |
862 | case IRT_U16: return PPCI_LHZ; | 912 | case IRT_U16: return PPCI_LHZ; |
863 | case IRT_NUM: return PPCI_LFD; | 913 | case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_LFD; |
864 | case IRT_FLOAT: return PPCI_LFS; | 914 | case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS; |
865 | default: return PPCI_LWZ; | 915 | default: return PPCI_LWZ; |
866 | } | 916 | } |
867 | } | 917 | } |
868 | 918 | ||
869 | static PPCIns asm_fxstoreins(IRIns *ir) | 919 | static PPCIns asm_fxstoreins(ASMState *as, IRIns *ir) |
870 | { | 920 | { |
921 | UNUSED(as); | ||
871 | switch (irt_type(ir->t)) { | 922 | switch (irt_type(ir->t)) { |
872 | case IRT_I8: case IRT_U8: return PPCI_STB; | 923 | case IRT_I8: case IRT_U8: return PPCI_STB; |
873 | case IRT_I16: case IRT_U16: return PPCI_STH; | 924 | case IRT_I16: case IRT_U16: return PPCI_STH; |
874 | case IRT_NUM: return PPCI_STFD; | 925 | case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_STFD; |
875 | case IRT_FLOAT: return PPCI_STFS; | 926 | case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS; |
876 | default: return PPCI_STW; | 927 | default: return PPCI_STW; |
877 | } | 928 | } |
878 | } | 929 | } |
@@ -880,18 +931,24 @@ static PPCIns asm_fxstoreins(IRIns *ir) | |||
880 | static void asm_fload(ASMState *as, IRIns *ir) | 931 | static void asm_fload(ASMState *as, IRIns *ir) |
881 | { | 932 | { |
882 | Reg dest = ra_dest(as, ir, RSET_GPR); | 933 | Reg dest = ra_dest(as, ir, RSET_GPR); |
883 | Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); | 934 | PPCIns pi = asm_fxloadins(as, ir); |
884 | PPCIns pi = asm_fxloadins(ir); | 935 | Reg idx; |
885 | int32_t ofs; | 936 | int32_t ofs; |
886 | if (ir->op2 == IRFL_TAB_ARRAY) { | 937 | if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ |
887 | ofs = asm_fuseabase(as, ir->op1); | 938 | idx = RID_JGL; |
888 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | 939 | ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); |
889 | emit_tai(as, PPCI_ADDI, dest, idx, ofs); | 940 | } else { |
890 | return; | 941 | idx = ra_alloc1(as, ir->op1, RSET_GPR); |
942 | if (ir->op2 == IRFL_TAB_ARRAY) { | ||
943 | ofs = asm_fuseabase(as, ir->op1); | ||
944 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | ||
945 | emit_tai(as, PPCI_ADDI, dest, idx, ofs); | ||
946 | return; | ||
947 | } | ||
891 | } | 948 | } |
949 | ofs = field_ofs[ir->op2]; | ||
892 | } | 950 | } |
893 | ofs = field_ofs[ir->op2]; | 951 | lj_assertA(!irt_isi8(ir->t), "unsupported FLOAD I8"); |
894 | lua_assert(!irt_isi8(ir->t)); | ||
895 | emit_tai(as, pi, dest, idx, ofs); | 952 | emit_tai(as, pi, dest, idx, ofs); |
896 | } | 953 | } |
897 | 954 | ||
@@ -902,21 +959,22 @@ static void asm_fstore(ASMState *as, IRIns *ir) | |||
902 | IRIns *irf = IR(ir->op1); | 959 | IRIns *irf = IR(ir->op1); |
903 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); | 960 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); |
904 | int32_t ofs = field_ofs[irf->op2]; | 961 | int32_t ofs = field_ofs[irf->op2]; |
905 | PPCIns pi = asm_fxstoreins(ir); | 962 | PPCIns pi = asm_fxstoreins(as, ir); |
906 | emit_tai(as, pi, src, idx, ofs); | 963 | emit_tai(as, pi, src, idx, ofs); |
907 | } | 964 | } |
908 | } | 965 | } |
909 | 966 | ||
910 | static void asm_xload(ASMState *as, IRIns *ir) | 967 | static void asm_xload(ASMState *as, IRIns *ir) |
911 | { | 968 | { |
912 | Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | 969 | Reg dest = ra_dest(as, ir, |
913 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); | 970 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); |
971 | lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); | ||
914 | if (irt_isi8(ir->t)) | 972 | if (irt_isi8(ir->t)) |
915 | emit_as(as, PPCI_EXTSB, dest, dest); | 973 | emit_as(as, PPCI_EXTSB, dest, dest); |
916 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); | 974 | asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); |
917 | } | 975 | } |
918 | 976 | ||
919 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | 977 | static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) |
920 | { | 978 | { |
921 | IRIns *irb; | 979 | IRIns *irb; |
922 | if (ir->r == RID_SINK) | 980 | if (ir->r == RID_SINK) |
@@ -927,36 +985,54 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | |||
927 | Reg src = ra_alloc1(as, irb->op1, RSET_GPR); | 985 | Reg src = ra_alloc1(as, irb->op1, RSET_GPR); |
928 | asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); | 986 | asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); |
929 | } else { | 987 | } else { |
930 | Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | 988 | Reg src = ra_alloc1(as, ir->op2, |
931 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | 989 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); |
990 | asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, | ||
932 | rset_exclude(RSET_GPR, src), ofs); | 991 | rset_exclude(RSET_GPR, src), ofs); |
933 | } | 992 | } |
934 | } | 993 | } |
935 | 994 | ||
995 | #define asm_xstore(as, ir) asm_xstore_(as, ir, 0) | ||
996 | |||
936 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 997 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
937 | { | 998 | { |
938 | IRType1 t = ir->t; | 999 | IRType1 t = ir->t; |
939 | Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; | 1000 | Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; |
940 | RegSet allow = RSET_GPR; | 1001 | RegSet allow = RSET_GPR; |
941 | int32_t ofs = AHUREF_LSX; | 1002 | int32_t ofs = AHUREF_LSX; |
1003 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) { | ||
1004 | t.irt = IRT_NUM; | ||
1005 | if (ra_used(ir+1)) { | ||
1006 | type = ra_dest(as, ir+1, allow); | ||
1007 | rset_clear(allow, type); | ||
1008 | } | ||
1009 | ofs = 0; | ||
1010 | } | ||
942 | if (ra_used(ir)) { | 1011 | if (ra_used(ir)) { |
943 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 1012 | lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || |
944 | if (!irt_isnum(t)) ofs = 0; | 1013 | irt_isint(ir->t) || irt_isaddr(ir->t), |
945 | dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); | 1014 | "bad load type %d", irt_type(ir->t)); |
1015 | if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0; | ||
1016 | dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); | ||
946 | rset_clear(allow, dest); | 1017 | rset_clear(allow, dest); |
947 | } | 1018 | } |
948 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | 1019 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); |
1020 | if (ir->o == IR_VLOAD) { | ||
1021 | ofs = ofs != AHUREF_LSX ? ofs + 8 * ir->op2 : | ||
1022 | ir->op2 ? 8 * ir->op2 : AHUREF_LSX; | ||
1023 | } | ||
949 | if (irt_isnum(t)) { | 1024 | if (irt_isnum(t)) { |
950 | Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx)); | 1025 | Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx)); |
951 | asm_guardcc(as, CC_GE); | 1026 | asm_guardcc(as, CC_GE); |
952 | emit_ab(as, PPCI_CMPLW, type, tisnum); | 1027 | emit_ab(as, PPCI_CMPLW, type, tisnum); |
953 | if (ra_hasreg(dest)) { | 1028 | if (ra_hasreg(dest)) { |
954 | if (ofs == AHUREF_LSX) { | 1029 | if (!LJ_SOFTFP && ofs == AHUREF_LSX) { |
955 | tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, | 1030 | tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, |
956 | (idx&255)), (idx>>8))); | 1031 | (idx&255)), (idx>>8))); |
957 | emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); | 1032 | emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); |
958 | } else { | 1033 | } else { |
959 | emit_fai(as, PPCI_LFD, dest, idx, ofs); | 1034 | emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx, |
1035 | ofs+4*LJ_SOFTFP); | ||
960 | } | 1036 | } |
961 | } | 1037 | } |
962 | } else { | 1038 | } else { |
@@ -979,7 +1055,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
979 | int32_t ofs = AHUREF_LSX; | 1055 | int32_t ofs = AHUREF_LSX; |
980 | if (ir->r == RID_SINK) | 1056 | if (ir->r == RID_SINK) |
981 | return; | 1057 | return; |
982 | if (irt_isnum(ir->t)) { | 1058 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
983 | src = ra_alloc1(as, ir->op2, RSET_FPR); | 1059 | src = ra_alloc1(as, ir->op2, RSET_FPR); |
984 | } else { | 1060 | } else { |
985 | if (!irt_ispri(ir->t)) { | 1061 | if (!irt_ispri(ir->t)) { |
@@ -987,11 +1063,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
987 | rset_clear(allow, src); | 1063 | rset_clear(allow, src); |
988 | ofs = 0; | 1064 | ofs = 0; |
989 | } | 1065 | } |
990 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | 1066 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) |
1067 | type = ra_alloc1(as, (ir+1)->op2, allow); | ||
1068 | else | ||
1069 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
991 | rset_clear(allow, type); | 1070 | rset_clear(allow, type); |
992 | } | 1071 | } |
993 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | 1072 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); |
994 | if (irt_isnum(ir->t)) { | 1073 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
995 | if (ofs == AHUREF_LSX) { | 1074 | if (ofs == AHUREF_LSX) { |
996 | emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); | 1075 | emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); |
997 | emit_slwi(as, RID_TMP, (idx>>8), 3); | 1076 | emit_slwi(as, RID_TMP, (idx>>8), 3); |
@@ -1016,21 +1095,39 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1016 | IRType1 t = ir->t; | 1095 | IRType1 t = ir->t; |
1017 | Reg dest = RID_NONE, type = RID_NONE, base; | 1096 | Reg dest = RID_NONE, type = RID_NONE, base; |
1018 | RegSet allow = RSET_GPR; | 1097 | RegSet allow = RSET_GPR; |
1019 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ | 1098 | int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); |
1020 | lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); | 1099 | if (hiop) |
1021 | lua_assert(LJ_DUALNUM || | 1100 | t.irt = IRT_NUM; |
1022 | !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); | 1101 | lj_assertA(!(ir->op2 & IRSLOAD_PARENT), |
1102 | "bad parent SLOAD"); /* Handled by asm_head_side(). */ | ||
1103 | lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), | ||
1104 | "inconsistent SLOAD variant"); | ||
1105 | lj_assertA(LJ_DUALNUM || | ||
1106 | !irt_isint(t) || | ||
1107 | (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)), | ||
1108 | "bad SLOAD type"); | ||
1109 | #if LJ_SOFTFP | ||
1110 | lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), | ||
1111 | "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ | ||
1112 | if (hiop && ra_used(ir+1)) { | ||
1113 | type = ra_dest(as, ir+1, allow); | ||
1114 | rset_clear(allow, type); | ||
1115 | } | ||
1116 | #else | ||
1023 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { | 1117 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { |
1024 | dest = ra_scratch(as, RSET_FPR); | 1118 | dest = ra_scratch(as, RSET_FPR); |
1025 | asm_tointg(as, ir, dest); | 1119 | asm_tointg(as, ir, dest); |
1026 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | 1120 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ |
1027 | } else if (ra_used(ir)) { | 1121 | } else |
1028 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 1122 | #endif |
1029 | dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); | 1123 | if (ra_used(ir)) { |
1124 | lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t), | ||
1125 | "bad SLOAD type %d", irt_type(ir->t)); | ||
1126 | dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); | ||
1030 | rset_clear(allow, dest); | 1127 | rset_clear(allow, dest); |
1031 | base = ra_alloc1(as, REF_BASE, allow); | 1128 | base = ra_alloc1(as, REF_BASE, allow); |
1032 | rset_clear(allow, base); | 1129 | rset_clear(allow, base); |
1033 | if ((ir->op2 & IRSLOAD_CONVERT)) { | 1130 | if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { |
1034 | if (irt_isint(t)) { | 1131 | if (irt_isint(t)) { |
1035 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | 1132 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); |
1036 | dest = ra_scratch(as, RSET_FPR); | 1133 | dest = ra_scratch(as, RSET_FPR); |
@@ -1044,7 +1141,7 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1044 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); | 1141 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); |
1045 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); | 1142 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); |
1046 | emit_lsptr(as, PPCI_LFS, (fbias & 31), | 1143 | emit_lsptr(as, PPCI_LFS, (fbias & 31), |
1047 | (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), | 1144 | (void *)&as->J->k32[LJ_K32_2P52_2P31], |
1048 | rset_clear(allow, hibias)); | 1145 | rset_clear(allow, hibias)); |
1049 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); | 1146 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); |
1050 | emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); | 1147 | emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); |
@@ -1062,10 +1159,13 @@ dotypecheck: | |||
1062 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | 1159 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { |
1063 | Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); | 1160 | Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); |
1064 | asm_guardcc(as, CC_GE); | 1161 | asm_guardcc(as, CC_GE); |
1065 | emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); | 1162 | #if !LJ_SOFTFP |
1066 | type = RID_TMP; | 1163 | type = RID_TMP; |
1164 | #endif | ||
1165 | emit_ab(as, PPCI_CMPLW, type, tisnum); | ||
1067 | } | 1166 | } |
1068 | if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); | 1167 | if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, |
1168 | base, ofs-(LJ_SOFTFP?0:4)); | ||
1069 | } else { | 1169 | } else { |
1070 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | 1170 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { |
1071 | asm_guardcc(as, CC_NE); | 1171 | asm_guardcc(as, CC_NE); |
@@ -1083,19 +1183,16 @@ dotypecheck: | |||
1083 | static void asm_cnew(ASMState *as, IRIns *ir) | 1183 | static void asm_cnew(ASMState *as, IRIns *ir) |
1084 | { | 1184 | { |
1085 | CTState *cts = ctype_ctsG(J2G(as->J)); | 1185 | CTState *cts = ctype_ctsG(J2G(as->J)); |
1086 | CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; | 1186 | CTypeID id = (CTypeID)IR(ir->op1)->i; |
1087 | CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? | 1187 | CTSize sz; |
1088 | lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; | 1188 | CTInfo info = lj_ctype_info(cts, id, &sz); |
1089 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | 1189 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; |
1090 | IRRef args[2]; | 1190 | IRRef args[4]; |
1091 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | ||
1092 | RegSet drop = RSET_SCRATCH; | 1191 | RegSet drop = RSET_SCRATCH; |
1093 | lua_assert(sz != CTSIZE_INVALID); | 1192 | lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), |
1193 | "bad CNEW/CNEWI operands"); | ||
1094 | 1194 | ||
1095 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1096 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1097 | as->gcsteps++; | 1195 | as->gcsteps++; |
1098 | |||
1099 | if (ra_hasreg(ir->r)) | 1196 | if (ra_hasreg(ir->r)) |
1100 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 1197 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
1101 | ra_evictset(as, drop); | 1198 | ra_evictset(as, drop); |
@@ -1104,11 +1201,12 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1104 | 1201 | ||
1105 | /* Initialize immutable cdata object. */ | 1202 | /* Initialize immutable cdata object. */ |
1106 | if (ir->o == IR_CNEWI) { | 1203 | if (ir->o == IR_CNEWI) { |
1204 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | ||
1107 | int32_t ofs = sizeof(GCcdata); | 1205 | int32_t ofs = sizeof(GCcdata); |
1108 | lua_assert(sz == 4 || sz == 8); | 1206 | lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); |
1109 | if (sz == 8) { | 1207 | if (sz == 8) { |
1110 | ofs += 4; | 1208 | ofs += 4; |
1111 | lua_assert((ir+1)->o == IR_HIOP); | 1209 | lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI"); |
1112 | } | 1210 | } |
1113 | for (;;) { | 1211 | for (;;) { |
1114 | Reg r = ra_alloc1(as, ir->op2, allow); | 1212 | Reg r = ra_alloc1(as, ir->op2, allow); |
@@ -1117,18 +1215,28 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1117 | if (ofs == sizeof(GCcdata)) break; | 1215 | if (ofs == sizeof(GCcdata)) break; |
1118 | ofs -= 4; ir++; | 1216 | ofs -= 4; ir++; |
1119 | } | 1217 | } |
1218 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1219 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1220 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1221 | args[1] = ir->op1; /* CTypeID id */ | ||
1222 | args[2] = ir->op2; /* CTSize sz */ | ||
1223 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1224 | asm_gencall(as, ci, args); | ||
1225 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1226 | return; | ||
1120 | } | 1227 | } |
1228 | |||
1121 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ | 1229 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ |
1122 | emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); | 1230 | emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); |
1123 | emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); | 1231 | emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); |
1124 | emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); | 1232 | emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); |
1125 | emit_ti(as, PPCI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ | 1233 | emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ |
1234 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1235 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1126 | asm_gencall(as, ci, args); | 1236 | asm_gencall(as, ci, args); |
1127 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), | 1237 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), |
1128 | ra_releasetmp(as, ASMREF_TMP1)); | 1238 | ra_releasetmp(as, ASMREF_TMP1)); |
1129 | } | 1239 | } |
1130 | #else | ||
1131 | #define asm_cnew(as, ir) ((void)0) | ||
1132 | #endif | 1240 | #endif |
1133 | 1241 | ||
1134 | /* -- Write barriers ------------------------------------------------------ */ | 1242 | /* -- Write barriers ------------------------------------------------------ */ |
@@ -1142,7 +1250,7 @@ static void asm_tbar(ASMState *as, IRIns *ir) | |||
1142 | emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist)); | 1250 | emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist)); |
1143 | emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked)); | 1251 | emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked)); |
1144 | emit_setgl(as, tab, gc.grayagain); | 1252 | emit_setgl(as, tab, gc.grayagain); |
1145 | lua_assert(LJ_GC_BLACK == 0x04); | 1253 | lj_assertA(LJ_GC_BLACK == 0x04, "bad LJ_GC_BLACK"); |
1146 | emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */ | 1254 | emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */ |
1147 | emit_getgl(as, link, gc.grayagain); | 1255 | emit_getgl(as, link, gc.grayagain); |
1148 | emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); | 1256 | emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); |
@@ -1157,7 +1265,7 @@ static void asm_obar(ASMState *as, IRIns *ir) | |||
1157 | MCLabel l_end; | 1265 | MCLabel l_end; |
1158 | Reg obj, val, tmp; | 1266 | Reg obj, val, tmp; |
1159 | /* No need for other object barriers (yet). */ | 1267 | /* No need for other object barriers (yet). */ |
1160 | lua_assert(IR(ir->op1)->o == IR_UREFC); | 1268 | lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); |
1161 | ra_evictset(as, RSET_SCRATCH); | 1269 | ra_evictset(as, RSET_SCRATCH); |
1162 | l_end = emit_label(as); | 1270 | l_end = emit_label(as); |
1163 | args[0] = ASMREF_TMP1; /* global_State *g */ | 1271 | args[0] = ASMREF_TMP1; /* global_State *g */ |
@@ -1178,6 +1286,7 @@ static void asm_obar(ASMState *as, IRIns *ir) | |||
1178 | 1286 | ||
1179 | /* -- Arithmetic and logic operations ------------------------------------- */ | 1287 | /* -- Arithmetic and logic operations ------------------------------------- */ |
1180 | 1288 | ||
1289 | #if !LJ_SOFTFP | ||
1181 | static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) | 1290 | static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) |
1182 | { | 1291 | { |
1183 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1292 | Reg dest = ra_dest(as, ir, RSET_FPR); |
@@ -1196,31 +1305,24 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi) | |||
1196 | emit_fb(as, pi, dest, left); | 1305 | emit_fb(as, pi, dest, left); |
1197 | } | 1306 | } |
1198 | 1307 | ||
1199 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) | 1308 | static void asm_fpmath(ASMState *as, IRIns *ir) |
1200 | { | 1309 | { |
1201 | IRIns *irp = IR(ir->op1); | 1310 | if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) |
1202 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | 1311 | asm_fpunary(as, ir, PPCI_FSQRT); |
1203 | IRIns *irpp = IR(irp->op1); | 1312 | else |
1204 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | 1313 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); |
1205 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | ||
1206 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; | ||
1207 | IRRef args[2]; | ||
1208 | args[0] = irpp->op1; | ||
1209 | args[1] = irp->op2; | ||
1210 | asm_setupresult(as, ir, ci); | ||
1211 | asm_gencall(as, ci, args); | ||
1212 | return 1; | ||
1213 | } | ||
1214 | } | ||
1215 | return 0; | ||
1216 | } | 1314 | } |
1315 | #endif | ||
1217 | 1316 | ||
1218 | static void asm_add(ASMState *as, IRIns *ir) | 1317 | static void asm_add(ASMState *as, IRIns *ir) |
1219 | { | 1318 | { |
1319 | #if !LJ_SOFTFP | ||
1220 | if (irt_isnum(ir->t)) { | 1320 | if (irt_isnum(ir->t)) { |
1221 | if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) | 1321 | if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) |
1222 | asm_fparith(as, ir, PPCI_FADD); | 1322 | asm_fparith(as, ir, PPCI_FADD); |
1223 | } else { | 1323 | } else |
1324 | #endif | ||
1325 | { | ||
1224 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1326 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1225 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | 1327 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); |
1226 | PPCIns pi; | 1328 | PPCIns pi; |
@@ -1259,10 +1361,13 @@ static void asm_add(ASMState *as, IRIns *ir) | |||
1259 | 1361 | ||
1260 | static void asm_sub(ASMState *as, IRIns *ir) | 1362 | static void asm_sub(ASMState *as, IRIns *ir) |
1261 | { | 1363 | { |
1364 | #if !LJ_SOFTFP | ||
1262 | if (irt_isnum(ir->t)) { | 1365 | if (irt_isnum(ir->t)) { |
1263 | if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) | 1366 | if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) |
1264 | asm_fparith(as, ir, PPCI_FSUB); | 1367 | asm_fparith(as, ir, PPCI_FSUB); |
1265 | } else { | 1368 | } else |
1369 | #endif | ||
1370 | { | ||
1266 | PPCIns pi = PPCI_SUBF; | 1371 | PPCIns pi = PPCI_SUBF; |
1267 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1372 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1268 | Reg left, right; | 1373 | Reg left, right; |
@@ -1288,9 +1393,12 @@ static void asm_sub(ASMState *as, IRIns *ir) | |||
1288 | 1393 | ||
1289 | static void asm_mul(ASMState *as, IRIns *ir) | 1394 | static void asm_mul(ASMState *as, IRIns *ir) |
1290 | { | 1395 | { |
1396 | #if !LJ_SOFTFP | ||
1291 | if (irt_isnum(ir->t)) { | 1397 | if (irt_isnum(ir->t)) { |
1292 | asm_fparith(as, ir, PPCI_FMUL); | 1398 | asm_fparith(as, ir, PPCI_FMUL); |
1293 | } else { | 1399 | } else |
1400 | #endif | ||
1401 | { | ||
1294 | PPCIns pi = PPCI_MULLW; | 1402 | PPCIns pi = PPCI_MULLW; |
1295 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1403 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1296 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | 1404 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); |
@@ -1312,11 +1420,16 @@ static void asm_mul(ASMState *as, IRIns *ir) | |||
1312 | } | 1420 | } |
1313 | } | 1421 | } |
1314 | 1422 | ||
1423 | #define asm_fpdiv(as, ir) asm_fparith(as, ir, PPCI_FDIV) | ||
1424 | |||
1315 | static void asm_neg(ASMState *as, IRIns *ir) | 1425 | static void asm_neg(ASMState *as, IRIns *ir) |
1316 | { | 1426 | { |
1427 | #if !LJ_SOFTFP | ||
1317 | if (irt_isnum(ir->t)) { | 1428 | if (irt_isnum(ir->t)) { |
1318 | asm_fpunary(as, ir, PPCI_FNEG); | 1429 | asm_fpunary(as, ir, PPCI_FNEG); |
1319 | } else { | 1430 | } else |
1431 | #endif | ||
1432 | { | ||
1320 | Reg dest, left; | 1433 | Reg dest, left; |
1321 | PPCIns pi = PPCI_NEG; | 1434 | PPCIns pi = PPCI_NEG; |
1322 | if (as->flagmcp == as->mcp) { | 1435 | if (as->flagmcp == as->mcp) { |
@@ -1330,6 +1443,8 @@ static void asm_neg(ASMState *as, IRIns *ir) | |||
1330 | } | 1443 | } |
1331 | } | 1444 | } |
1332 | 1445 | ||
1446 | #define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS) | ||
1447 | |||
1333 | static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) | 1448 | static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) |
1334 | { | 1449 | { |
1335 | Reg dest, left, right; | 1450 | Reg dest, left, right; |
@@ -1345,6 +1460,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) | |||
1345 | emit_tab(as, pi|PPCF_DOT, dest, left, right); | 1460 | emit_tab(as, pi|PPCF_DOT, dest, left, right); |
1346 | } | 1461 | } |
1347 | 1462 | ||
1463 | #define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO) | ||
1464 | #define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO) | ||
1465 | #define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO) | ||
1466 | |||
1348 | #if LJ_HASFFI | 1467 | #if LJ_HASFFI |
1349 | static void asm_add64(ASMState *as, IRIns *ir) | 1468 | static void asm_add64(ASMState *as, IRIns *ir) |
1350 | { | 1469 | { |
@@ -1424,7 +1543,7 @@ static void asm_neg64(ASMState *as, IRIns *ir) | |||
1424 | } | 1543 | } |
1425 | #endif | 1544 | #endif |
1426 | 1545 | ||
1427 | static void asm_bitnot(ASMState *as, IRIns *ir) | 1546 | static void asm_bnot(ASMState *as, IRIns *ir) |
1428 | { | 1547 | { |
1429 | Reg dest, left, right; | 1548 | Reg dest, left, right; |
1430 | PPCIns pi = PPCI_NOR; | 1549 | PPCIns pi = PPCI_NOR; |
@@ -1451,7 +1570,7 @@ nofuse: | |||
1451 | emit_asb(as, pi, dest, left, right); | 1570 | emit_asb(as, pi, dest, left, right); |
1452 | } | 1571 | } |
1453 | 1572 | ||
1454 | static void asm_bitswap(ASMState *as, IRIns *ir) | 1573 | static void asm_bswap(ASMState *as, IRIns *ir) |
1455 | { | 1574 | { |
1456 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1575 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1457 | IRIns *irx; | 1576 | IRIns *irx; |
@@ -1472,32 +1591,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir) | |||
1472 | } | 1591 | } |
1473 | } | 1592 | } |
1474 | 1593 | ||
1475 | static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | ||
1476 | { | ||
1477 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1478 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1479 | if (irref_isk(ir->op2)) { | ||
1480 | int32_t k = IR(ir->op2)->i; | ||
1481 | Reg tmp = left; | ||
1482 | if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { | ||
1483 | if (!checku16(k)) { | ||
1484 | emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); | ||
1485 | if ((k & 0xffff) == 0) return; | ||
1486 | } | ||
1487 | emit_asi(as, pik, dest, left, k); | ||
1488 | return; | ||
1489 | } | ||
1490 | } | ||
1491 | /* May fail due to spills/restores above, but simplifies the logic. */ | ||
1492 | if (as->flagmcp == as->mcp) { | ||
1493 | as->flagmcp = NULL; | ||
1494 | as->mcp++; | ||
1495 | pi |= PPCF_DOT; | ||
1496 | } | ||
1497 | right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1498 | emit_asb(as, pi, dest, left, right); | ||
1499 | } | ||
1500 | |||
1501 | /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ | 1594 | /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ |
1502 | static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) | 1595 | static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) |
1503 | { | 1596 | { |
@@ -1528,7 +1621,7 @@ nofuse: | |||
1528 | *--as->mcp = pi | PPCF_T(left); | 1621 | *--as->mcp = pi | PPCF_T(left); |
1529 | } | 1622 | } |
1530 | 1623 | ||
1531 | static void asm_bitand(ASMState *as, IRIns *ir) | 1624 | static void asm_band(ASMState *as, IRIns *ir) |
1532 | { | 1625 | { |
1533 | Reg dest, left, right; | 1626 | Reg dest, left, right; |
1534 | IRRef lref = ir->op1; | 1627 | IRRef lref = ir->op1; |
@@ -1583,6 +1676,35 @@ static void asm_bitand(ASMState *as, IRIns *ir) | |||
1583 | emit_asb(as, PPCI_AND ^ dot, dest, left, right); | 1676 | emit_asb(as, PPCI_AND ^ dot, dest, left, right); |
1584 | } | 1677 | } |
1585 | 1678 | ||
1679 | static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | ||
1680 | { | ||
1681 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1682 | Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | ||
1683 | if (irref_isk(ir->op2)) { | ||
1684 | int32_t k = IR(ir->op2)->i; | ||
1685 | Reg tmp = left; | ||
1686 | if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { | ||
1687 | if (!checku16(k)) { | ||
1688 | emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); | ||
1689 | if ((k & 0xffff) == 0) return; | ||
1690 | } | ||
1691 | emit_asi(as, pik, dest, left, k); | ||
1692 | return; | ||
1693 | } | ||
1694 | } | ||
1695 | /* May fail due to spills/restores above, but simplifies the logic. */ | ||
1696 | if (as->flagmcp == as->mcp) { | ||
1697 | as->flagmcp = NULL; | ||
1698 | as->mcp++; | ||
1699 | pi |= PPCF_DOT; | ||
1700 | } | ||
1701 | right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | ||
1702 | emit_asb(as, pi, dest, left, right); | ||
1703 | } | ||
1704 | |||
1705 | #define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI) | ||
1706 | #define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI) | ||
1707 | |||
1586 | static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | 1708 | static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) |
1587 | { | 1709 | { |
1588 | Reg dest, left; | 1710 | Reg dest, left; |
@@ -1608,9 +1730,48 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | |||
1608 | } | 1730 | } |
1609 | } | 1731 | } |
1610 | 1732 | ||
1733 | #define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0) | ||
1734 | #define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1) | ||
1735 | #define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI) | ||
1736 | #define asm_brol(as, ir) \ | ||
1737 | asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \ | ||
1738 | PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) | ||
1739 | #define asm_bror(as, ir) lj_assertA(0, "unexpected BROR") | ||
1740 | |||
1741 | #if LJ_SOFTFP | ||
1742 | static void asm_sfpmin_max(ASMState *as, IRIns *ir) | ||
1743 | { | ||
1744 | CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp]; | ||
1745 | IRRef args[4]; | ||
1746 | MCLabel l_right, l_end; | ||
1747 | Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR); | ||
1748 | Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR); | ||
1749 | Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR); | ||
1750 | PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE; | ||
1751 | righthi = (lefthi >> 8); lefthi &= 255; | ||
1752 | rightlo = (leftlo >> 8); leftlo &= 255; | ||
1753 | args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; | ||
1754 | args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; | ||
1755 | l_end = emit_label(as); | ||
1756 | if (desthi != righthi) emit_mr(as, desthi, righthi); | ||
1757 | if (destlo != rightlo) emit_mr(as, destlo, rightlo); | ||
1758 | l_right = emit_label(as); | ||
1759 | if (l_end != l_right) emit_jmp(as, l_end); | ||
1760 | if (desthi != lefthi) emit_mr(as, desthi, lefthi); | ||
1761 | if (destlo != leftlo) emit_mr(as, destlo, leftlo); | ||
1762 | if (l_right == as->mcp+1) { | ||
1763 | cond ^= 4; l_right = l_end; ++as->mcp; | ||
1764 | } | ||
1765 | emit_condbranch(as, PPCI_BC, cond, l_right); | ||
1766 | ra_evictset(as, RSET_SCRATCH); | ||
1767 | emit_cmpi(as, RID_RET, 1); | ||
1768 | asm_gencall(as, &ci, args); | ||
1769 | } | ||
1770 | #endif | ||
1771 | |||
1611 | static void asm_min_max(ASMState *as, IRIns *ir, int ismax) | 1772 | static void asm_min_max(ASMState *as, IRIns *ir, int ismax) |
1612 | { | 1773 | { |
1613 | if (irt_isnum(ir->t)) { | 1774 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
1614 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1775 | Reg dest = ra_dest(as, ir, RSET_FPR); |
1615 | Reg tmp = dest; | 1776 | Reg tmp = dest; |
1616 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | 1777 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); |
@@ -1618,9 +1779,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) | |||
1618 | if (tmp == left || tmp == right) | 1779 | if (tmp == left || tmp == right) |
1619 | tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR, | 1780 | tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR, |
1620 | dest), left), right)); | 1781 | dest), left), right)); |
1621 | emit_facb(as, PPCI_FSEL, dest, tmp, | 1782 | emit_facb(as, PPCI_FSEL, dest, tmp, left, right); |
1622 | ismax ? left : right, ismax ? right : left); | 1783 | emit_fab(as, PPCI_FSUB, tmp, ismax ? left : right, ismax ? right : left); |
1623 | emit_fab(as, PPCI_FSUB, tmp, left, right); | ||
1624 | } else { | 1784 | } else { |
1625 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1785 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1626 | Reg tmp1 = RID_TMP, tmp2 = dest; | 1786 | Reg tmp1 = RID_TMP, tmp2 = dest; |
@@ -1638,6 +1798,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) | |||
1638 | } | 1798 | } |
1639 | } | 1799 | } |
1640 | 1800 | ||
1801 | #define asm_min(as, ir) asm_min_max(as, ir, 0) | ||
1802 | #define asm_max(as, ir) asm_min_max(as, ir, 1) | ||
1803 | |||
1641 | /* -- Comparisons --------------------------------------------------------- */ | 1804 | /* -- Comparisons --------------------------------------------------------- */ |
1642 | 1805 | ||
1643 | #define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ | 1806 | #define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ |
@@ -1695,7 +1858,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc) | |||
1695 | static void asm_comp(ASMState *as, IRIns *ir) | 1858 | static void asm_comp(ASMState *as, IRIns *ir) |
1696 | { | 1859 | { |
1697 | PPCCC cc = asm_compmap[ir->o]; | 1860 | PPCCC cc = asm_compmap[ir->o]; |
1698 | if (irt_isnum(ir->t)) { | 1861 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
1699 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | 1862 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); |
1700 | right = (left >> 8); left &= 255; | 1863 | right = (left >> 8); left &= 255; |
1701 | asm_guardcc(as, (cc >> 4)); | 1864 | asm_guardcc(as, (cc >> 4)); |
@@ -1714,6 +1877,46 @@ static void asm_comp(ASMState *as, IRIns *ir) | |||
1714 | } | 1877 | } |
1715 | } | 1878 | } |
1716 | 1879 | ||
1880 | #define asm_equal(as, ir) asm_comp(as, ir) | ||
1881 | |||
1882 | #if LJ_SOFTFP | ||
1883 | /* SFP comparisons. */ | ||
1884 | static void asm_sfpcomp(ASMState *as, IRIns *ir) | ||
1885 | { | ||
1886 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; | ||
1887 | RegSet drop = RSET_SCRATCH; | ||
1888 | Reg r; | ||
1889 | IRRef args[4]; | ||
1890 | args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; | ||
1891 | args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; | ||
1892 | |||
1893 | for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { | ||
1894 | if (!rset_test(as->freeset, r) && | ||
1895 | regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) | ||
1896 | rset_clear(drop, r); | ||
1897 | } | ||
1898 | ra_evictset(as, drop); | ||
1899 | asm_setupresult(as, ir, ci); | ||
1900 | switch ((IROp)ir->o) { | ||
1901 | case IR_ULT: | ||
1902 | asm_guardcc(as, CC_EQ); | ||
1903 | emit_ai(as, PPCI_CMPWI, RID_RET, 0); | ||
1904 | case IR_ULE: | ||
1905 | asm_guardcc(as, CC_EQ); | ||
1906 | emit_ai(as, PPCI_CMPWI, RID_RET, 1); | ||
1907 | break; | ||
1908 | case IR_GE: case IR_GT: | ||
1909 | asm_guardcc(as, CC_EQ); | ||
1910 | emit_ai(as, PPCI_CMPWI, RID_RET, 2); | ||
1911 | default: | ||
1912 | asm_guardcc(as, (asm_compmap[ir->o] & 0xf)); | ||
1913 | emit_ai(as, PPCI_CMPWI, RID_RET, 0); | ||
1914 | break; | ||
1915 | } | ||
1916 | asm_gencall(as, ci, args); | ||
1917 | } | ||
1918 | #endif | ||
1919 | |||
1717 | #if LJ_HASFFI | 1920 | #if LJ_HASFFI |
1718 | /* 64 bit integer comparisons. */ | 1921 | /* 64 bit integer comparisons. */ |
1719 | static void asm_comp64(ASMState *as, IRIns *ir) | 1922 | static void asm_comp64(ASMState *as, IRIns *ir) |
@@ -1738,50 +1941,87 @@ static void asm_comp64(ASMState *as, IRIns *ir) | |||
1738 | } | 1941 | } |
1739 | #endif | 1942 | #endif |
1740 | 1943 | ||
1741 | /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ | 1944 | /* -- Split register ops -------------------------------------------------- */ |
1742 | 1945 | ||
1743 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ | 1946 | /* Hiword op of a split 32/32 bit op. Previous op is be the loword op. */ |
1744 | static void asm_hiop(ASMState *as, IRIns *ir) | 1947 | static void asm_hiop(ASMState *as, IRIns *ir) |
1745 | { | 1948 | { |
1746 | #if LJ_HASFFI | ||
1747 | /* HIOP is marked as a store because it needs its own DCE logic. */ | 1949 | /* HIOP is marked as a store because it needs its own DCE logic. */ |
1748 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | 1950 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ |
1749 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | 1951 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; |
1952 | #if LJ_HASFFI || LJ_SOFTFP | ||
1750 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ | 1953 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ |
1751 | as->curins--; /* Always skip the CONV. */ | 1954 | as->curins--; /* Always skip the CONV. */ |
1955 | #if LJ_HASFFI && !LJ_SOFTFP | ||
1752 | if (usehi || uselo) | 1956 | if (usehi || uselo) |
1753 | asm_conv64(as, ir); | 1957 | asm_conv64(as, ir); |
1754 | return; | 1958 | return; |
1959 | #endif | ||
1755 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ | 1960 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ |
1756 | as->curins--; /* Always skip the loword comparison. */ | 1961 | as->curins--; /* Always skip the loword comparison. */ |
1962 | #if LJ_SOFTFP | ||
1963 | if (!irt_isint(ir->t)) { | ||
1964 | asm_sfpcomp(as, ir-1); | ||
1965 | return; | ||
1966 | } | ||
1967 | #endif | ||
1968 | #if LJ_HASFFI | ||
1757 | asm_comp64(as, ir); | 1969 | asm_comp64(as, ir); |
1970 | #endif | ||
1758 | return; | 1971 | return; |
1972 | #if LJ_SOFTFP | ||
1973 | } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { | ||
1974 | as->curins--; /* Always skip the loword min/max. */ | ||
1975 | if (uselo || usehi) | ||
1976 | asm_sfpmin_max(as, ir-1); | ||
1977 | return; | ||
1978 | #endif | ||
1759 | } else if ((ir-1)->o == IR_XSTORE) { | 1979 | } else if ((ir-1)->o == IR_XSTORE) { |
1760 | as->curins--; /* Handle both stores here. */ | 1980 | as->curins--; /* Handle both stores here. */ |
1761 | if ((ir-1)->r != RID_SINK) { | 1981 | if ((ir-1)->r != RID_SINK) { |
1762 | asm_xstore(as, ir, 0); | 1982 | asm_xstore_(as, ir, 0); |
1763 | asm_xstore(as, ir-1, 4); | 1983 | asm_xstore_(as, ir-1, 4); |
1764 | } | 1984 | } |
1765 | return; | 1985 | return; |
1766 | } | 1986 | } |
1987 | #endif | ||
1767 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 1988 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
1768 | switch ((ir-1)->o) { | 1989 | switch ((ir-1)->o) { |
1990 | #if LJ_HASFFI | ||
1769 | case IR_ADD: as->curins--; asm_add64(as, ir); break; | 1991 | case IR_ADD: as->curins--; asm_add64(as, ir); break; |
1770 | case IR_SUB: as->curins--; asm_sub64(as, ir); break; | 1992 | case IR_SUB: as->curins--; asm_sub64(as, ir); break; |
1771 | case IR_NEG: as->curins--; asm_neg64(as, ir); break; | 1993 | case IR_NEG: as->curins--; asm_neg64(as, ir); break; |
1772 | case IR_CALLN: | 1994 | case IR_CNEWI: |
1773 | case IR_CALLXS: | 1995 | /* Nothing to do here. Handled by lo op itself. */ |
1996 | break; | ||
1997 | #endif | ||
1998 | #if LJ_SOFTFP | ||
1999 | case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
2000 | case IR_STRTO: | ||
1774 | if (!uselo) | 2001 | if (!uselo) |
1775 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ | 2002 | ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ |
1776 | break; | 2003 | break; |
1777 | case IR_CNEWI: | 2004 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF: |
1778 | /* Nothing to do here. Handled by lo op itself. */ | 2005 | /* Nothing to do here. Handled by lo op itself. */ |
1779 | break; | 2006 | break; |
1780 | default: lua_assert(0); break; | ||
1781 | } | ||
1782 | #else | ||
1783 | UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ | ||
1784 | #endif | 2007 | #endif |
2008 | case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: | ||
2009 | if (!uselo) | ||
2010 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ | ||
2011 | break; | ||
2012 | default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; | ||
2013 | } | ||
2014 | } | ||
2015 | |||
2016 | /* -- Profiling ----------------------------------------------------------- */ | ||
2017 | |||
2018 | static void asm_prof(ASMState *as, IRIns *ir) | ||
2019 | { | ||
2020 | UNUSED(ir); | ||
2021 | asm_guardcc(as, CC_NE); | ||
2022 | emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE); | ||
2023 | emit_lsglptr(as, PPCI_LBZ, RID_TMP, | ||
2024 | (int32_t)offsetof(global_State, hookmask)); | ||
1785 | } | 2025 | } |
1786 | 2026 | ||
1787 | /* -- Stack handling ------------------------------------------------------ */ | 2027 | /* -- Stack handling ------------------------------------------------------ */ |
@@ -1805,7 +2045,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
1805 | emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); | 2045 | emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); |
1806 | if (pbase == RID_TMP) | 2046 | if (pbase == RID_TMP) |
1807 | emit_getgl(as, RID_TMP, jit_base); | 2047 | emit_getgl(as, RID_TMP, jit_base); |
1808 | emit_getgl(as, tmp, jit_L); | 2048 | emit_getgl(as, tmp, cur_L); |
1809 | if (allow == RSET_EMPTY) /* Spill temp. register. */ | 2049 | if (allow == RSET_EMPTY) /* Spill temp. register. */ |
1810 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); | 2050 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); |
1811 | } | 2051 | } |
@@ -1826,12 +2066,25 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
1826 | if ((sn & SNAP_NORESTORE)) | 2066 | if ((sn & SNAP_NORESTORE)) |
1827 | continue; | 2067 | continue; |
1828 | if (irt_isnum(ir->t)) { | 2068 | if (irt_isnum(ir->t)) { |
2069 | #if LJ_SOFTFP | ||
2070 | Reg tmp; | ||
2071 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); | ||
2072 | /* LJ_SOFTFP: must be a number constant. */ | ||
2073 | lj_assertA(irref_isk(ref), "unsplit FP op"); | ||
2074 | tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); | ||
2075 | emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); | ||
2076 | if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); | ||
2077 | tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); | ||
2078 | emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); | ||
2079 | #else | ||
1829 | Reg src = ra_alloc1(as, ref, RSET_FPR); | 2080 | Reg src = ra_alloc1(as, ref, RSET_FPR); |
1830 | emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); | 2081 | emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); |
2082 | #endif | ||
1831 | } else { | 2083 | } else { |
1832 | Reg type; | 2084 | Reg type; |
1833 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); | 2085 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); |
1834 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); | 2086 | lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), |
2087 | "restore of IR type %d", irt_type(ir->t)); | ||
1835 | if (!irt_ispri(ir->t)) { | 2088 | if (!irt_ispri(ir->t)) { |
1836 | Reg src = ra_alloc1(as, ref, allow); | 2089 | Reg src = ra_alloc1(as, ref, allow); |
1837 | rset_clear(allow, src); | 2090 | rset_clear(allow, src); |
@@ -1840,6 +2093,12 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
1840 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { | 2093 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
1841 | if (s == 0) continue; /* Do not overwrite link to previous frame. */ | 2094 | if (s == 0) continue; /* Do not overwrite link to previous frame. */ |
1842 | type = ra_allock(as, (int32_t)(*flinks--), allow); | 2095 | type = ra_allock(as, (int32_t)(*flinks--), allow); |
2096 | #if LJ_SOFTFP | ||
2097 | } else if ((sn & SNAP_SOFTFPNUM)) { | ||
2098 | type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); | ||
2099 | #endif | ||
2100 | } else if ((sn & SNAP_KEYINDEX)) { | ||
2101 | type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow); | ||
1843 | } else { | 2102 | } else { |
1844 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | 2103 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); |
1845 | } | 2104 | } |
@@ -1847,7 +2106,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
1847 | } | 2106 | } |
1848 | checkmclim(as); | 2107 | checkmclim(as); |
1849 | } | 2108 | } |
1850 | lua_assert(map + nent == flinks); | 2109 | lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); |
1851 | } | 2110 | } |
1852 | 2111 | ||
1853 | /* -- GC handling --------------------------------------------------------- */ | 2112 | /* -- GC handling --------------------------------------------------------- */ |
@@ -1898,6 +2157,12 @@ static void asm_loop_fixup(ASMState *as) | |||
1898 | } | 2157 | } |
1899 | } | 2158 | } |
1900 | 2159 | ||
2160 | /* Fixup the tail of the loop. */ | ||
2161 | static void asm_loop_tail_fixup(ASMState *as) | ||
2162 | { | ||
2163 | UNUSED(as); /* Nothing to do. */ | ||
2164 | } | ||
2165 | |||
1901 | /* -- Head of trace ------------------------------------------------------- */ | 2166 | /* -- Head of trace ------------------------------------------------------- */ |
1902 | 2167 | ||
1903 | /* Coalesce BASE register for a root trace. */ | 2168 | /* Coalesce BASE register for a root trace. */ |
@@ -1949,7 +2214,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |||
1949 | as->mctop = p; | 2214 | as->mctop = p; |
1950 | } else { | 2215 | } else { |
1951 | /* Patch stack adjustment. */ | 2216 | /* Patch stack adjustment. */ |
1952 | lua_assert(checki16(CFRAME_SIZE+spadj)); | 2217 | lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range"); |
1953 | p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); | 2218 | p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); |
1954 | p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; | 2219 | p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; |
1955 | } | 2220 | } |
@@ -1970,147 +2235,25 @@ static void asm_tail_prep(ASMState *as) | |||
1970 | } | 2235 | } |
1971 | } | 2236 | } |
1972 | 2237 | ||
1973 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
1974 | |||
1975 | /* Assemble a single instruction. */ | ||
1976 | static void asm_ir(ASMState *as, IRIns *ir) | ||
1977 | { | ||
1978 | switch ((IROp)ir->o) { | ||
1979 | /* Miscellaneous ops. */ | ||
1980 | case IR_LOOP: asm_loop(as); break; | ||
1981 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
1982 | case IR_USE: | ||
1983 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
1984 | case IR_PHI: asm_phi(as, ir); break; | ||
1985 | case IR_HIOP: asm_hiop(as, ir); break; | ||
1986 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
1987 | |||
1988 | /* Guarded assertions. */ | ||
1989 | case IR_EQ: case IR_NE: | ||
1990 | if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { | ||
1991 | as->curins--; | ||
1992 | asm_href(as, ir-1, (IROp)ir->o); | ||
1993 | break; | ||
1994 | } | ||
1995 | /* fallthrough */ | ||
1996 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
1997 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
1998 | case IR_ABC: | ||
1999 | asm_comp(as, ir); | ||
2000 | break; | ||
2001 | |||
2002 | case IR_RETF: asm_retf(as, ir); break; | ||
2003 | |||
2004 | /* Bit ops. */ | ||
2005 | case IR_BNOT: asm_bitnot(as, ir); break; | ||
2006 | case IR_BSWAP: asm_bitswap(as, ir); break; | ||
2007 | |||
2008 | case IR_BAND: asm_bitand(as, ir); break; | ||
2009 | case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break; | ||
2010 | case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break; | ||
2011 | |||
2012 | case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break; | ||
2013 | case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break; | ||
2014 | case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break; | ||
2015 | case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), | ||
2016 | PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break; | ||
2017 | case IR_BROR: lua_assert(0); break; | ||
2018 | |||
2019 | /* Arithmetic ops. */ | ||
2020 | case IR_ADD: asm_add(as, ir); break; | ||
2021 | case IR_SUB: asm_sub(as, ir); break; | ||
2022 | case IR_MUL: asm_mul(as, ir); break; | ||
2023 | case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break; | ||
2024 | case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break; | ||
2025 | case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break; | ||
2026 | case IR_NEG: asm_neg(as, ir); break; | ||
2027 | |||
2028 | case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break; | ||
2029 | case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break; | ||
2030 | case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break; | ||
2031 | case IR_MIN: asm_min_max(as, ir, 0); break; | ||
2032 | case IR_MAX: asm_min_max(as, ir, 1); break; | ||
2033 | case IR_FPMATH: | ||
2034 | if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) | ||
2035 | break; | ||
2036 | if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) | ||
2037 | asm_fpunary(as, ir, PPCI_FSQRT); | ||
2038 | else | ||
2039 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); | ||
2040 | break; | ||
2041 | |||
2042 | /* Overflow-checking arithmetic ops. */ | ||
2043 | case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break; | ||
2044 | case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break; | ||
2045 | case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break; | ||
2046 | |||
2047 | /* Memory references. */ | ||
2048 | case IR_AREF: asm_aref(as, ir); break; | ||
2049 | case IR_HREF: asm_href(as, ir, 0); break; | ||
2050 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
2051 | case IR_NEWREF: asm_newref(as, ir); break; | ||
2052 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
2053 | case IR_FREF: asm_fref(as, ir); break; | ||
2054 | case IR_STRREF: asm_strref(as, ir); break; | ||
2055 | |||
2056 | /* Loads and stores. */ | ||
2057 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
2058 | asm_ahuvload(as, ir); | ||
2059 | break; | ||
2060 | case IR_FLOAD: asm_fload(as, ir); break; | ||
2061 | case IR_XLOAD: asm_xload(as, ir); break; | ||
2062 | case IR_SLOAD: asm_sload(as, ir); break; | ||
2063 | |||
2064 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
2065 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
2066 | case IR_XSTORE: asm_xstore(as, ir, 0); break; | ||
2067 | |||
2068 | /* Allocations. */ | ||
2069 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
2070 | case IR_TNEW: asm_tnew(as, ir); break; | ||
2071 | case IR_TDUP: asm_tdup(as, ir); break; | ||
2072 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
2073 | |||
2074 | /* Write barriers. */ | ||
2075 | case IR_TBAR: asm_tbar(as, ir); break; | ||
2076 | case IR_OBAR: asm_obar(as, ir); break; | ||
2077 | |||
2078 | /* Type conversions. */ | ||
2079 | case IR_CONV: asm_conv(as, ir); break; | ||
2080 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
2081 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
2082 | case IR_STRTO: asm_strto(as, ir); break; | ||
2083 | |||
2084 | /* Calls. */ | ||
2085 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
2086 | case IR_CALLXS: asm_callx(as, ir); break; | ||
2087 | case IR_CARG: break; | ||
2088 | |||
2089 | default: | ||
2090 | setintV(&as->J->errinfo, ir->o); | ||
2091 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
2092 | break; | ||
2093 | } | ||
2094 | } | ||
2095 | |||
2096 | /* -- Trace setup --------------------------------------------------------- */ | 2238 | /* -- Trace setup --------------------------------------------------------- */ |
2097 | 2239 | ||
2098 | /* Ensure there are enough stack slots for call arguments. */ | 2240 | /* Ensure there are enough stack slots for call arguments. */ |
2099 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | 2241 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) |
2100 | { | 2242 | { |
2101 | IRRef args[CCI_NARGS_MAX*2]; | 2243 | IRRef args[CCI_NARGS_MAX*2]; |
2102 | uint32_t i, nargs = (int)CCI_NARGS(ci); | 2244 | uint32_t i, nargs = CCI_XNARGS(ci); |
2103 | int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; | 2245 | int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; |
2104 | asm_collectargs(as, ir, ci, args); | 2246 | asm_collectargs(as, ir, ci, args); |
2105 | for (i = 0; i < nargs; i++) | 2247 | for (i = 0; i < nargs; i++) |
2106 | if (args[i] && irt_isfp(IR(args[i])->t)) { | 2248 | if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { |
2107 | if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; | 2249 | if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; |
2108 | } else { | 2250 | } else { |
2109 | if (ngpr > 0) ngpr--; else nslots++; | 2251 | if (ngpr > 0) ngpr--; else nslots++; |
2110 | } | 2252 | } |
2111 | if (nslots > as->evenspill) /* Leave room for args in stack slots. */ | 2253 | if (nslots > as->evenspill) /* Leave room for args in stack slots. */ |
2112 | as->evenspill = nslots; | 2254 | as->evenspill = nslots; |
2113 | return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); | 2255 | return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) : |
2256 | REGSP_HINT(RID_RET); | ||
2114 | } | 2257 | } |
2115 | 2258 | ||
2116 | static void asm_setup_target(ASMState *as) | 2259 | static void asm_setup_target(ASMState *as) |
@@ -2150,7 +2293,8 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | |||
2150 | } else if ((ins & 0xfc000000u) == PPCI_B && | 2293 | } else if ((ins & 0xfc000000u) == PPCI_B && |
2151 | ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) { | 2294 | ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) { |
2152 | ptrdiff_t delta = (char *)target - (char *)p; | 2295 | ptrdiff_t delta = (char *)target - (char *)p; |
2153 | lua_assert(((delta + 0x02000000) >> 26) == 0); | 2296 | lj_assertJ(((delta + 0x02000000) >> 26) == 0, |
2297 | "branch target out of range"); | ||
2154 | *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu); | 2298 | *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu); |
2155 | if (!cstart) cstart = p; | 2299 | if (!cstart) cstart = p; |
2156 | } | 2300 | } |
@@ -2158,7 +2302,8 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | |||
2158 | /* Always patch long-range branch in exit stub itself. Except, if we can't. */ | 2302 | /* Always patch long-range branch in exit stub itself. Except, if we can't. */ |
2159 | if (patchlong) { | 2303 | if (patchlong) { |
2160 | ptrdiff_t delta = (char *)target - (char *)px - clearso; | 2304 | ptrdiff_t delta = (char *)target - (char *)px - clearso; |
2161 | lua_assert(((delta + 0x02000000) >> 26) == 0); | 2305 | lj_assertJ(((delta + 0x02000000) >> 26) == 0, |
2306 | "branch target out of range"); | ||
2162 | *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu); | 2307 | *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu); |
2163 | } | 2308 | } |
2164 | if (!cstart) cstart = px; | 2309 | if (!cstart) cstart = px; |