summaryrefslogtreecommitdiff
path: root/src/buildvm_ppc.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'src/buildvm_ppc.dasc')
-rw-r--r--src/buildvm_ppc.dasc1725
1 files changed, 891 insertions, 834 deletions
diff --git a/src/buildvm_ppc.dasc b/src/buildvm_ppc.dasc
index 2964e0e2..de3346cb 100644
--- a/src/buildvm_ppc.dasc
+++ b/src/buildvm_ppc.dasc
@@ -26,14 +26,12 @@
26|.define LREG, r18 // Register holding lua_State (also in SAVE_L). 26|.define LREG, r18 // Register holding lua_State (also in SAVE_L).
27|.define MULTRES, r19 // Size of multi-result: (nresults+1)*8. 27|.define MULTRES, r19 // Size of multi-result: (nresults+1)*8.
28| 28|
29|// Constants for vectorized type-comparisons (hi+low GPR). C callee-save. 29|// Constants for type-comparisons, stores and conversions. C callee-save.
30|.define TISNUM, r22 30|.define TISNUM, r22
31|.define TISNIL, r23 31|.define TISNIL, r23
32|.define ZERO, r24 32|.define ZERO, r24
33|.define TISSTR, r25 // NYI: remove. 33|.define TOBIT, f30
34|.define TISTAB, r26 // NYI: remove. 34|.define TONUM, f31
35|.define TISFUNC, r27 // NYI: remove.
36|.define TOBIT, r28 // NYI: use FP reg.
37| 35|
38|// The following temporaries are not saved across C calls, except for RA. 36|// The following temporaries are not saved across C calls, except for RA.
39|.define RA, r20 // Callee-save. 37|.define RA, r20 // Callee-save.
@@ -57,8 +55,8 @@
57|.define CARG4, r6 // Overlaps TMP3. 55|.define CARG4, r6 // Overlaps TMP3.
58|.define CARG5, r7 // Overlaps INS. 56|.define CARG5, r7 // Overlaps INS.
59| 57|
60|.define CARGF1, f1 58|.define FARG1, f1
61|.define CARGF2, f2 59|.define FARG2, f2
62| 60|
63|.define CRET1, r3 61|.define CRET1, r3
64|.define CRET2, r4 62|.define CRET2, r4
@@ -69,16 +67,23 @@
69|// Back chain for sp: 256(sp) <-- sp entering interpreter 67|// Back chain for sp: 256(sp) <-- sp entering interpreter
70|.define SAVE_GPR_, 184 // .. 184+18*4: 32 bit GPR saves. 68|.define SAVE_GPR_, 184 // .. 184+18*4: 32 bit GPR saves.
71|.define SAVE_FPR_, 40 // .. 40+18*8: 64 bit FPR saves. 69|.define SAVE_FPR_, 40 // .. 40+18*8: 64 bit FPR saves.
72|.define SAVE_UNUSED, 32(sp) // 8 unused bytes for alignment. 70|.define TONUM_LO, 36(sp)
71|.define TONUM_HI, 32(sp)
73|.define SAVE_ERRF, 28(sp) // 32 bit C frame info. 72|.define SAVE_ERRF, 28(sp) // 32 bit C frame info.
74|.define SAVE_NRES, 24(sp) 73|.define SAVE_NRES, 24(sp)
75|.define SAVE_CFRAME, 20(sp) 74|.define SAVE_CFRAME, 20(sp)
76|.define SAVE_L, 16(sp) 75|.define SAVE_L, 16(sp)
77|.define SAVE_PC, 12(sp) 76|.define SAVE_MULTRES, 12(sp)
78|.define SAVE_MULTRES, 8(sp) 77|.define SAVE_PC, 8(sp)
79|// Next frame lr: 4(sp) 78|// Next frame lr: 4(sp)
80|// Back chain for sp: 0(sp) <-- sp while in interpreter 79|// Back chain for sp: 0(sp) <-- sp while in interpreter
81| 80|
81|.define TMPD_LO, SAVE_MULTRES
82|.define TMPD_BLO, 15(sp)
83|.define TMPD_HI, SAVE_PC
84|.define TMPD, SAVE_PC
85|.define TONUM_D, TONUM_HI
86|
82|.macro save_, reg 87|.macro save_, reg
83| stw r..reg, SAVE_GPR_+(reg-14)*4(sp) 88| stw r..reg, SAVE_GPR_+(reg-14)*4(sp)
84| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 89| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
@@ -133,6 +138,30 @@
133|// Trap for not-yet-implemented parts. 138|// Trap for not-yet-implemented parts.
134|.macro NYI; tw 4, sp, sp; .endmacro 139|.macro NYI; tw 4, sp, sp; .endmacro
135| 140|
141|// int/FP conversions.
142|.macro tonum_i, freg, reg
143| xoris reg, reg, 0x8000
144| stw reg, TONUM_LO
145| lfd freg, TONUM_D
146| fsub freg, freg, TONUM
147|.endmacro
148|
149|.macro tonum_u, freg, reg
150| stw reg, TONUM_LO
151| lfd freg, TONUM_D
152| fsub freg, freg, TOBIT
153|.endmacro
154|
155|.macro toint, reg, freg, tmpfreg
156| fctiwz tmpfreg, freg
157| stfd tmpfreg, TMPD
158| lwz reg, TMPD_LO
159|.endmacro
160|
161|.macro toint, reg, freg
162| toint reg, freg, freg
163|.endmacro
164|
136|//----------------------------------------------------------------------- 165|//-----------------------------------------------------------------------
137| 166|
138|// Access to frame relative to BASE. 167|// Access to frame relative to BASE.
@@ -218,14 +247,11 @@
218| 247|
219|// Macros to test operand types. 248|// Macros to test operand types.
220|.macro checknum, reg; cmplw reg, TISNUM; .endmacro 249|.macro checknum, reg; cmplw reg, TISNUM; .endmacro
250|.macro checknum, cr, reg; cmplw cr, reg, TISNUM; .endmacro
221|.macro checkstr, reg; cmpwi reg, LJ_TSTR; .endmacro 251|.macro checkstr, reg; cmpwi reg, LJ_TSTR; .endmacro
222|.macro checktab, reg; cmpwi reg, LJ_TTAB; .endmacro 252|.macro checktab, reg; cmpwi reg, LJ_TTAB; .endmacro
223|.macro checkfunc, reg; cmpwi reg, LJ_TFUNC; .endmacro 253|.macro checkfunc, reg; cmpwi reg, LJ_TFUNC; .endmacro
224|.macro checknil, reg; cmpwi reg, LJ_TNIL; .endmacro 254|.macro checknil, reg; cmpwi reg, LJ_TNIL; .endmacro
225|.macro checkok, label; beq label; .endmacro // NYI: remove.
226|.macro checkfail, label; bne label; .endmacro // NYI: remove.
227|.macro checkanyfail, label; bns label; .endmacro // NYI: remove.
228|.macro checkallok, label; bso label; .endmacro // NYI: remove.
229| 255|
230|.macro branch_RD 256|.macro branch_RD
231| srwi TMP0, RD, 1 257| srwi TMP0, RD, 1
@@ -276,7 +302,7 @@ static void build_subroutines(BuildCtx *ctx)
276 |->vm_returnp: 302 |->vm_returnp:
277 | // See vm_return. Also: TMP2 = previous base. 303 | // See vm_return. Also: TMP2 = previous base.
278 | andi. TMP0, PC, FRAME_P 304 | andi. TMP0, PC, FRAME_P
279 | evsplati TMP1, LJ_TTRUE 305 | li TMP1, LJ_TTRUE
280 | beq ->cont_dispatch 306 | beq ->cont_dispatch
281 | 307 |
282 | // Return from pcall or xpcall fast func. 308 | // Return from pcall or xpcall fast func.
@@ -309,9 +335,9 @@ static void build_subroutines(BuildCtx *ctx)
309 | beq >2 335 | beq >2
310 |1: 336 |1:
311 | addic. TMP1, TMP1, -8 337 | addic. TMP1, TMP1, -8
312 | evldd TMP0, 0(RA) 338 | lfd f0, 0(RA)
313 | addi RA, RA, 8 339 | addi RA, RA, 8
314 | evstdd TMP0, 0(BASE) 340 | stfd f0, 0(BASE)
315 | addi BASE, BASE, 8 341 | addi BASE, BASE, 8
316 | bne <1 342 | bne <1
317 | 343 |
@@ -336,16 +362,17 @@ static void build_subroutines(BuildCtx *ctx)
336 | lwz TMP1, L->maxstack 362 | lwz TMP1, L->maxstack
337 | cmplw BASE, TMP1 363 | cmplw BASE, TMP1
338 | bge >8 364 | bge >8
339 | evstdd TISNIL, 0(BASE) 365 | stw TISNIL, 0(BASE)
340 | addi RD, RD, 8 366 | addi RD, RD, 8
341 | addi BASE, BASE, 8 367 | addi BASE, BASE, 8
342 | b <2 368 | b <2
343 | 369 |
344 |7: // Less results wanted. 370 |7: // Less results wanted.
371 | subfic TMP3, TMP2, 0 // LUA_MULTRET+1 case?
345 | sub TMP0, RD, TMP2 372 | sub TMP0, RD, TMP2
346 | cmpwi TMP2, 0 // LUA_MULTRET+1 case? 373 | subfe TMP1, TMP1, TMP1 // TMP1 = TMP2 == 0 ? 0 : -1
347 | sub TMP0, BASE, TMP0 // Subtract the difference. 374 | and TMP0, TMP0, TMP1
348 | iseleq BASE, BASE, TMP0 // Either keep top or shrink it. 375 | sub BASE, BASE, TMP0 // Either keep top or shrink it.
349 | b <3 376 | b <3
350 | 377 |
351 |8: // Corner case: need to grow stack for filling up results. 378 |8: // Corner case: need to grow stack for filling up results.
@@ -380,24 +407,25 @@ static void build_subroutines(BuildCtx *ctx)
380 | rlwinm sp, CARG1, 0, 0, 29 407 | rlwinm sp, CARG1, 0, 0, 29
381 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 408 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
382 | lwz L, SAVE_L 409 | lwz L, SAVE_L
383 | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants. 410 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
384 | evsplati TISFUNC, LJ_TFUNC
385 | lus TOBIT, 0x4338
386 | evsplati TISTAB, LJ_TTAB
387 | li TMP0, 0
388 | lwz BASE, L->base 411 | lwz BASE, L->base
389 | evmergelo TOBIT, TOBIT, TMP0 412 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
390 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 413 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
391 | evsplati TISSTR, LJ_TSTR 414 | li ZERO, 0
415 | stw TMP3, TMPD
392 | li TMP1, LJ_TFALSE 416 | li TMP1, LJ_TFALSE
393 | evsplati TISNIL, LJ_TNIL 417 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
418 | li TISNIL, LJ_TNIL
394 | li_vmstate INTERP 419 | li_vmstate INTERP
420 | lfs TOBIT, TMPD
395 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. 421 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
396 | la RA, -8(BASE) // Results start at BASE-8. 422 | la RA, -8(BASE) // Results start at BASE-8.
423 | stw TMP3, TMPD
397 | addi DISPATCH, DISPATCH, GG_G2DISP 424 | addi DISPATCH, DISPATCH, GG_G2DISP
398 | stw TMP1, 0(RA) // Prepend false to error message. 425 | stw TMP1, 0(RA) // Prepend false to error message.
399 | li RD, 16 // 2 results: false + error message. 426 | li RD, 16 // 2 results: false + error message.
400 | st_vmstate 427 | st_vmstate
428 | lfs TONUM, TMPD
401 | b ->vm_returnc 429 | b ->vm_returnc
402 | 430 |
403 |//----------------------------------------------------------------------- 431 |//-----------------------------------------------------------------------
@@ -454,22 +482,25 @@ static void build_subroutines(BuildCtx *ctx)
454 | // Resume after yield (like a return). 482 | // Resume after yield (like a return).
455 | mr RA, BASE 483 | mr RA, BASE
456 | lwz BASE, L->base 484 | lwz BASE, L->base
457 | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants. 485 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
458 | lwz TMP1, L->top 486 | lwz TMP1, L->top
459 | evsplati TISFUNC, LJ_TFUNC
460 | lus TOBIT, 0x4338
461 | evsplati TISTAB, LJ_TTAB
462 | lwz PC, FRAME_PC(BASE) 487 | lwz PC, FRAME_PC(BASE)
463 | li TMP2, 0 488 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
464 | evsplati TISSTR, LJ_TSTR
465 | sub RD, TMP1, BASE 489 | sub RD, TMP1, BASE
466 | evmergelo TOBIT, TOBIT, TMP2
467 | stb CARG3, L->status 490 | stb CARG3, L->status
491 | stw TMP3, TMPD
468 | andi. TMP0, PC, FRAME_TYPE 492 | andi. TMP0, PC, FRAME_TYPE
469 | li_vmstate INTERP 493 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
494 | lfs TOBIT, TMPD
495 | li ZERO, 0
496 | stw TMP3, TMPD
470 | addi RD, RD, 8 497 | addi RD, RD, 8
471 | evsplati TISNIL, LJ_TNIL 498 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
472 | mr MULTRES, RD 499 | mr MULTRES, RD
500 | stw TMP0, TONUM_HI
501 | li_vmstate INTERP
502 | lfs TONUM, TMPD
503 | li TISNIL, LJ_TNIL
473 | st_vmstate 504 | st_vmstate
474 | beq ->BC_RET_Z 505 | beq ->BC_RET_Z
475 | b ->vm_return 506 | b ->vm_return
@@ -500,27 +531,29 @@ static void build_subroutines(BuildCtx *ctx)
500 | 531 |
501 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 532 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
502 | lwz TMP2, L->base // TMP2 = old base (used in vmeta_call). 533 | lwz TMP2, L->base // TMP2 = old base (used in vmeta_call).
503 | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants. 534 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
504 | lwz TMP1, L->top 535 | lwz TMP1, L->top
505 | evsplati TISFUNC, LJ_TFUNC 536 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
506 | add PC, PC, BASE 537 | add PC, PC, BASE
507 | evsplati TISTAB, LJ_TTAB 538 | stw TMP3, TMPD
508 | lus TOBIT, 0x4338 539 | li ZERO, 0
509 | li TMP0, 0 540 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
541 | lfs TOBIT, TMPD
510 | sub PC, PC, TMP2 // PC = frame delta + frame type 542 | sub PC, PC, TMP2 // PC = frame delta + frame type
511 | evsplati TISSTR, LJ_TSTR 543 | stw TMP3, TMPD
544 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
512 | sub NARGS8:RC, TMP1, BASE 545 | sub NARGS8:RC, TMP1, BASE
513 | evmergelo TOBIT, TOBIT, TMP0 546 | stw TMP0, TONUM_HI
514 | li_vmstate INTERP 547 | li_vmstate INTERP
515 | evsplati TISNIL, LJ_TNIL 548 | lfs TONUM, TMPD
549 | li TISNIL, LJ_TNIL
516 | st_vmstate 550 | st_vmstate
517 | 551 |
518 |->vm_call_dispatch: 552 |->vm_call_dispatch:
519 | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC 553 | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
520 | li TMP0, -8 554 | lwz TMP0, FRAME_PC(BASE)
521 | evlddx LFUNC:RB, BASE, TMP0 555 | lwz LFUNC:RB, FRAME_FUNC(BASE)
522 | checkfunc LFUNC:RB 556 | checkfunc TMP0; bne ->vmeta_call
523 | checkfail ->vmeta_call
524 | 557 |
525 |->vm_call_dispatch_f: 558 |->vm_call_dispatch_f:
526 | ins_call 559 | ins_call
@@ -570,7 +603,7 @@ static void build_subroutines(BuildCtx *ctx)
570 | beq >1 603 | beq >1
571 | subi TMP2, RD, 8 604 | subi TMP2, RD, 8
572 | lwz TMP1, LFUNC:TMP1->pc 605 | lwz TMP1, LFUNC:TMP1->pc
573 | evstddx TISNIL, RA, TMP2 // Ensure one valid arg. 606 | stwx TISNIL, RA, TMP2 // Ensure one valid arg.
574 | lwz KBASE, PC2PROTO(k)(TMP1) 607 | lwz KBASE, PC2PROTO(k)(TMP1)
575 | // BASE = base, RA = resultptr, RB = meta base 608 | // BASE = base, RA = resultptr, RB = meta base
576 | mtctr TMP0 609 | mtctr TMP0
@@ -585,42 +618,45 @@ static void build_subroutines(BuildCtx *ctx)
585 | lwz INS, -4(PC) 618 | lwz INS, -4(PC)
586 | subi CARG2, RB, 16 619 | subi CARG2, RB, 16
587 | decode_RB8 SAVE0, INS 620 | decode_RB8 SAVE0, INS
588 | evldd TMP0, 0(RA) 621 | lfd f0, 0(RA)
589 | add TMP1, BASE, SAVE0 622 | add TMP1, BASE, SAVE0
590 | stw BASE, L->base 623 | stw BASE, L->base
591 | cmplw TMP1, CARG2 624 | cmplw TMP1, CARG2
592 | sub CARG3, CARG2, TMP1 625 | sub CARG3, CARG2, TMP1
593 | decode_RA8 RA, INS 626 | decode_RA8 RA, INS
594 | evstdd TMP0, 0(CARG2) 627 | stfd f0, 0(CARG2)
595 | bne ->BC_CAT_Z 628 | bne ->BC_CAT_Z
596 | evstddx TMP0, BASE, RA 629 | stfdx f0, BASE, RA
597 | b ->cont_nop 630 | b ->cont_nop
598 | 631 |
599 |//-- Table indexing metamethods ----------------------------------------- 632 |//-- Table indexing metamethods -----------------------------------------
600 | 633 |
601 |->vmeta_tgets1: 634 |->vmeta_tgets1:
602 | evmergelo STR:RC, TISSTR, STR:RC
603 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 635 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
636 | li TMP0, LJ_TSTR
604 | decode_RB8 RB, INS 637 | decode_RB8 RB, INS
605 | evstdd STR:RC, 0(CARG3) 638 | stw STR:RC, 4(CARG3)
606 | add CARG2, BASE, RB 639 | add CARG2, BASE, RB
640 | stw TMP0, 0(CARG3)
607 | b >1 641 | b >1
608 | 642 |
609 |->vmeta_tgets: 643 |->vmeta_tgets:
610 | evmergelo TAB:RB, TISTAB, TAB:RB
611 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 644 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
612 | evmergelo STR:RC, TISSTR, STR:RC 645 | li TMP0, LJ_TTAB
613 | evstdd TAB:RB, 0(CARG2) 646 | stw TAB:RB, 4(CARG2)
614 | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) 647 | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
615 | evstdd STR:RC, 0(CARG3) 648 | stw TMP0, 0(CARG2)
649 | li TMP1, LJ_TSTR
650 | stw STR:RC, 4(CARG3)
651 | stw TMP1, 0(CARG3)
616 | b >1 652 | b >1
617 | 653 |
618 |->vmeta_tgetb: // TMP0 = index 654 |->vmeta_tgetb: // TMP0 = index
619 | efdcfsi TMP0, TMP0 655 | tonum_u f0, TMP0
620 | decode_RB8 RB, INS 656 | decode_RB8 RB, INS
621 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 657 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
622 | add CARG2, BASE, RB 658 | add CARG2, BASE, RB
623 | evstdd TMP0, 0(CARG3) 659 | stfd f0, 0(CARG3)
624 | b >1 660 | b >1
625 | 661 |
626 |->vmeta_tgetv: 662 |->vmeta_tgetv:
@@ -636,9 +672,10 @@ static void build_subroutines(BuildCtx *ctx)
636 | // Returns TValue * (finished) or NULL (metamethod). 672 | // Returns TValue * (finished) or NULL (metamethod).
637 | cmplwi CRET1, 0 673 | cmplwi CRET1, 0
638 | beq >3 674 | beq >3
639 | evldd TMP0, 0(CRET1) 675 | lfd f0, 0(CRET1)
640 | evstddx TMP0, BASE, RA 676 | ins_next1
641 | ins_next 677 | stfdx f0, BASE, RA
678 | ins_next2
642 | 679 |
643 |3: // Call __index metamethod. 680 |3: // Call __index metamethod.
644 | // BASE = base, L->top = new base, stack = cont/func/t/k 681 | // BASE = base, L->top = new base, stack = cont/func/t/k
@@ -653,28 +690,31 @@ static void build_subroutines(BuildCtx *ctx)
653 |//----------------------------------------------------------------------- 690 |//-----------------------------------------------------------------------
654 | 691 |
655 |->vmeta_tsets1: 692 |->vmeta_tsets1:
656 | evmergelo STR:RC, TISSTR, STR:RC
657 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 693 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
694 | li TMP0, LJ_TSTR
658 | decode_RB8 RB, INS 695 | decode_RB8 RB, INS
659 | evstdd STR:RC, 0(CARG3) 696 | stw STR:RC, 4(CARG3)
660 | add CARG2, BASE, RB 697 | add CARG2, BASE, RB
698 | stw TMP0, 0(CARG3)
661 | b >1 699 | b >1
662 | 700 |
663 |->vmeta_tsets: 701 |->vmeta_tsets:
664 | evmergelo TAB:RB, TISTAB, TAB:RB
665 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 702 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
666 | evmergelo STR:RC, TISSTR, STR:RC 703 | li TMP0, LJ_TTAB
667 | evstdd TAB:RB, 0(CARG2) 704 | stw TAB:RB, 4(CARG2)
668 | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) 705 | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
669 | evstdd STR:RC, 0(CARG3) 706 | stw TMP0, 0(CARG2)
707 | li TMP1, LJ_TSTR
708 | stw STR:RC, 4(CARG3)
709 | stw TMP1, 0(CARG3)
670 | b >1 710 | b >1
671 | 711 |
672 |->vmeta_tsetb: // TMP0 = index 712 |->vmeta_tsetb: // TMP0 = index
673 | efdcfsi TMP0, TMP0 713 | tonum_u f0, TMP0
674 | decode_RB8 RB, INS 714 | decode_RB8 RB, INS
675 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 715 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
676 | add CARG2, BASE, RB 716 | add CARG2, BASE, RB
677 | evstdd TMP0, 0(CARG3) 717 | stfd f0, 0(CARG3)
678 | b >1 718 | b >1
679 | 719 |
680 |->vmeta_tsetv: 720 |->vmeta_tsetv:
@@ -689,11 +729,12 @@ static void build_subroutines(BuildCtx *ctx)
689 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 729 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
690 | // Returns TValue * (finished) or NULL (metamethod). 730 | // Returns TValue * (finished) or NULL (metamethod).
691 | cmplwi CRET1, 0 731 | cmplwi CRET1, 0
692 | evlddx TMP0, BASE, RA 732 | lfdx f0, BASE, RA
693 | beq >3 733 | beq >3
694 | // NOBARRIER: lj_meta_tset ensures the table is not black. 734 | // NOBARRIER: lj_meta_tset ensures the table is not black.
695 | evstdd TMP0, 0(CRET1) 735 | ins_next1
696 | ins_next 736 | stfd f0, 0(CRET1)
737 | ins_next2
697 | 738 |
698 |3: // Call __newindex metamethod. 739 |3: // Call __newindex metamethod.
699 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) 740 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
@@ -703,7 +744,7 @@ static void build_subroutines(BuildCtx *ctx)
703 | add PC, TMP1, BASE 744 | add PC, TMP1, BASE
704 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 745 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
705 | li NARGS8:RC, 24 // 3 args for func(t, k, v) 746 | li NARGS8:RC, 24 // 3 args for func(t, k, v)
706 | evstdd TMP0, 16(BASE) // Copy value to third argument. 747 | stfd f0, 16(BASE) // Copy value to third argument.
707 | b ->vm_call_dispatch_f 748 | b ->vm_call_dispatch_f
708 | 749 |
709 |//-- Comparison metamethods --------------------------------------------- 750 |//-- Comparison metamethods ---------------------------------------------
@@ -721,33 +762,35 @@ static void build_subroutines(BuildCtx *ctx)
721 |3: 762 |3:
722 | cmplwi CRET1, 1 763 | cmplwi CRET1, 1
723 | bgt ->vmeta_binop 764 | bgt ->vmeta_binop
765 | subfic CRET1, CRET1, 0
724 |4: 766 |4:
725 | lwz INS, 0(PC) 767 | lwz INS, 0(PC)
726 | addi PC, PC, 4 768 | addi PC, PC, 4
727 | decode_RD4 TMP2, INS 769 | decode_RD4 TMP2, INS
728 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) 770 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
729 | add TMP2, TMP2, TMP3 771 | and TMP2, TMP2, CRET1
730 | isellt PC, PC, TMP2 772 | add PC, PC, TMP2
731 |->cont_nop: 773 |->cont_nop:
732 | ins_next 774 | ins_next
733 | 775 |
734 |->cont_ra: // RA = resultptr 776 |->cont_ra: // RA = resultptr
735 | lwz INS, -4(PC) 777 | lwz INS, -4(PC)
736 | evldd TMP0, 0(RA) 778 | lfd f0, 0(RA)
737 | decode_RA8 TMP1, INS 779 | decode_RA8 TMP1, INS
738 | evstddx TMP0, BASE, TMP1 780 | stfdx f0, BASE, TMP1
739 | b ->cont_nop 781 | b ->cont_nop
740 | 782 |
741 |->cont_condt: // RA = resultptr 783 |->cont_condt: // RA = resultptr
742 | lwz TMP0, 0(RA) 784 | lwz TMP0, 0(RA)
743 | li TMP1, LJ_TTRUE 785 | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is true.
744 | cmplw TMP1, TMP0 // Branch if result is true. 786 | subfe CRET1, CRET1, CRET1
787 | not CRET1, CRET1
745 | b <4 788 | b <4
746 | 789 |
747 |->cont_condf: // RA = resultptr 790 |->cont_condf: // RA = resultptr
748 | lwz TMP0, 0(RA) 791 | lwz TMP0, 0(RA)
749 | li TMP1, LJ_TFALSE 792 | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is false.
750 | cmplw TMP0, TMP1 // Branch if result is false. 793 | subfe CRET1, CRET1, CRET1
751 | b <4 794 | b <4
752 | 795 |
753 |->vmeta_equal: 796 |->vmeta_equal:
@@ -773,8 +816,8 @@ static void build_subroutines(BuildCtx *ctx)
773 | b >1 816 | b >1
774 | 817 |
775 |->vmeta_unm: 818 |->vmeta_unm:
776 | add CARG3, BASE, RD 819 | mr CARG3, RD
777 | mr CARG4, CARG3 820 | mr CARG4, RD
778 | b >1 821 | b >1
779 | 822 |
780 |->vmeta_arith_vv: 823 |->vmeta_arith_vv:
@@ -796,6 +839,7 @@ static void build_subroutines(BuildCtx *ctx)
796 | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 839 | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
797 | sub TMP1, CRET1, BASE 840 | sub TMP1, CRET1, BASE
798 | stw PC, -16(CRET1) // [cont|PC] 841 | stw PC, -16(CRET1) // [cont|PC]
842 | mr TMP2, BASE
799 | addi PC, TMP1, FRAME_CONT 843 | addi PC, TMP1, FRAME_CONT
800 | mr BASE, CRET1 844 | mr BASE, CRET1
801 | li NARGS8:RC, 16 // 2 args for func(o1, o2). 845 | li NARGS8:RC, 16 // 2 args for func(o1, o2).
@@ -805,7 +849,7 @@ static void build_subroutines(BuildCtx *ctx)
805#ifdef LUAJIT_ENABLE_LUA52COMPAT 849#ifdef LUAJIT_ENABLE_LUA52COMPAT
806 | mr SAVE0, CARG1 850 | mr SAVE0, CARG1
807#endif 851#endif
808 | add CARG2, BASE, RD 852 | mr CARG2, RD
809 | stw BASE, L->base 853 | stw BASE, L->base
810 | mr CARG1, L 854 | mr CARG1, L
811 | stw PC, SAVE_PC 855 | stw PC, SAVE_PC
@@ -882,29 +926,40 @@ static void build_subroutines(BuildCtx *ctx)
882 |.macro .ffunc_1, name 926 |.macro .ffunc_1, name
883 |->ff_ .. name: 927 |->ff_ .. name:
884 | cmplwi NARGS8:RC, 8 928 | cmplwi NARGS8:RC, 8
885 | evldd CARG1, 0(BASE) 929 | lwz CARG3, 0(BASE)
930 | lwz CARG1, 4(BASE)
886 | blt ->fff_fallback 931 | blt ->fff_fallback
887 |.endmacro 932 |.endmacro
888 | 933 |
889 |.macro .ffunc_2, name 934 |.macro .ffunc_2, name
890 |->ff_ .. name: 935 |->ff_ .. name:
891 | cmplwi NARGS8:RC, 16 936 | cmplwi NARGS8:RC, 16
892 | evldd CARG1, 0(BASE) 937 | lwz CARG3, 0(BASE)
893 | evldd CARG2, 8(BASE) 938 | lwz CARG4, 8(BASE)
939 | lwz CARG1, 4(BASE)
940 | lwz CARG2, 12(BASE)
894 | blt ->fff_fallback 941 | blt ->fff_fallback
895 |.endmacro 942 |.endmacro
896 | 943 |
897 |.macro .ffunc_n, name 944 |.macro .ffunc_n, name
898 | .ffunc_1 name 945 |->ff_ .. name:
899 | checknum CARG1 946 | cmplwi NARGS8:RC, 8
900 | checkfail ->fff_fallback 947 | lwz CARG3, 0(BASE)
948 | lfd FARG1, 0(BASE)
949 | blt ->fff_fallback
950 | checknum CARG3; bge ->fff_fallback
901 |.endmacro 951 |.endmacro
902 | 952 |
903 |.macro .ffunc_nn, name 953 |.macro .ffunc_nn, name
904 | .ffunc_2 name 954 |->ff_ .. name:
905 | evmergehi TMP0, CARG1, CARG2 955 | cmplwi NARGS8:RC, 16
906 | checknum TMP0 956 | lwz CARG3, 0(BASE)
907 | checkanyfail ->fff_fallback 957 | lfd FARG1, 0(BASE)
958 | lwz CARG4, 8(BASE)
959 | lfd FARG2, 8(BASE)
960 | blt ->fff_fallback
961 | checknum CARG3; bge ->fff_fallback
962 | checknum CARG4; bge ->fff_fallback
908 |.endmacro 963 |.endmacro
909 | 964 |
910 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. 965 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
@@ -917,24 +972,22 @@ static void build_subroutines(BuildCtx *ctx)
917 | 972 |
918 |//-- Base library: checks ----------------------------------------------- 973 |//-- Base library: checks -----------------------------------------------
919 | 974 |
920 |.ffunc assert 975 |.ffunc_1 assert
921 | cmplwi NARGS8:RC, 8 976 | li TMP1, LJ_TFALSE
922 | evldd TMP0, 0(BASE) 977 | la RA, -8(BASE)
923 | blt ->fff_fallback 978 | cmplw cr1, CARG3, TMP1
924 | evaddw TMP1, TISNIL, TISNIL // Synthesize LJ_TFALSE.
925 | la RA, -8(BASE)
926 | evcmpltu cr1, TMP0, TMP1
927 | lwz PC, FRAME_PC(BASE) 979 | lwz PC, FRAME_PC(BASE)
928 | bge cr1, ->fff_fallback 980 | bge cr1, ->fff_fallback
929 | evstdd TMP0, 0(RA) 981 | stw CARG3, 0(RA)
930 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. 982 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
931 | beq ->fff_res // Done if exactly 1 argument. 983 | stw CARG1, 4(RA)
984 | beq ->fff_res // Done if exactly 1 argument.
932 | li TMP1, 8 985 | li TMP1, 8
933 | subi RC, RC, 8 986 | subi RC, RC, 8
934 |1: 987 |1:
935 | cmplw TMP1, RC 988 | cmplw TMP1, RC
936 | evlddx TMP0, BASE, TMP1 989 | lfdx f0, BASE, TMP1
937 | evstddx TMP0, RA, TMP1 990 | stfdx f0, RA, TMP1
938 | addi TMP1, TMP1, 8 991 | addi TMP1, TMP1, 8
939 | bne <1 992 | bne <1
940 | b ->fff_res 993 | b ->fff_res
@@ -943,71 +996,70 @@ static void build_subroutines(BuildCtx *ctx)
943 | cmplwi NARGS8:RC, 8 996 | cmplwi NARGS8:RC, 8
944 | lwz CARG1, 0(BASE) 997 | lwz CARG1, 0(BASE)
945 | blt ->fff_fallback 998 | blt ->fff_fallback
946 | li TMP2, ~LJ_TNUMX 999 | subfc TMP0, TISNUM, CARG1
947 | cmplw CARG1, TISNUM 1000 | subfe TMP2, CARG1, CARG1
948 | not TMP1, CARG1 1001 | orc TMP1, TMP2, TMP0
949 | isellt TMP1, TMP2, TMP1 1002 | addi TMP1, TMP1, ~LJ_TISNUM+1
950 | slwi TMP1, TMP1, 3 1003 | slwi TMP1, TMP1, 3
951 | la TMP2, CFUNC:RB->upvalue 1004 | la TMP2, CFUNC:RB->upvalue
952 | evlddx STR:CRET1, TMP2, TMP1 1005 | lfdx FARG1, TMP2, TMP1
953 | b ->fff_restv 1006 | b ->fff_resn
954 | 1007 |
955 |//-- Base library: getters and setters --------------------------------- 1008 |//-- Base library: getters and setters ---------------------------------
956 | 1009 |
957 |.ffunc_1 getmetatable 1010 |.ffunc_1 getmetatable
958 | checktab CARG1 1011 | checktab CARG3; bne >6
959 | evmergehi TMP1, CARG1, CARG1
960 | checkfail >6
961 |1: // Field metatable must be at same offset for GCtab and GCudata! 1012 |1: // Field metatable must be at same offset for GCtab and GCudata!
962 | lwz TAB:RB, TAB:CARG1->metatable 1013 | lwz TAB:CARG1, TAB:CARG1->metatable
963 |2: 1014 |2:
964 | evmr CRET1, TISNIL 1015 | li CARG3, LJ_TNIL
965 | cmplwi TAB:RB, 0 1016 | cmplwi TAB:CARG1, 0
966 | lwz STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) 1017 | lwz STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
967 | beq ->fff_restv 1018 | beq ->fff_restv
968 | lwz TMP0, TAB:RB->hmask 1019 | lwz TMP0, TAB:CARG1->hmask
969 | evmergelo CRET1, TISTAB, TAB:RB // Use metatable as default result. 1020 | li CARG3, LJ_TTAB // Use metatable as default result.
970 | lwz TMP1, STR:RC->hash 1021 | lwz TMP1, STR:RC->hash
971 | lwz NODE:TMP2, TAB:RB->node 1022 | lwz NODE:TMP2, TAB:CARG1->node
972 | evmergelo STR:RC, TISSTR, STR:RC
973 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 1023 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
974 | slwi TMP0, TMP1, 5 1024 | slwi TMP0, TMP1, 5
975 | slwi TMP1, TMP1, 3 1025 | slwi TMP1, TMP1, 3
976 | sub TMP1, TMP0, TMP1 1026 | sub TMP1, TMP0, TMP1
977 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 1027 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
978 |3: // Rearranged logic, because we expect _not_ to find the key. 1028 |3: // Rearranged logic, because we expect _not_ to find the key.
979 | evldd TMP0, NODE:TMP2->key 1029 | lwz CARG4, NODE:TMP2->key
980 | evldd TMP1, NODE:TMP2->val 1030 | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2)
981 | evcmpeq TMP0, STR:RC 1031 | lwz CARG2, NODE:TMP2->val
982 | lwz NODE:TMP2, NODE:TMP2->next 1032 | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2)
983 | checkallok >5 1033 | checkstr CARG4; bne >4
984 | cmplwi NODE:TMP2, 0 1034 | cmpw TMP0, STR:RC; beq >5
985 | beq ->fff_restv // Not found, keep default result. 1035 |4:
986 | b <3 1036 | lwz NODE:TMP2, NODE:TMP2->next
1037 | cmplwi NODE:TMP2, 0
1038 | beq ->fff_restv // Not found, keep default result.
1039 | b <3
987 |5: 1040 |5:
988 | checknil TMP1 1041 | checknil CARG2
989 | checkok ->fff_restv // Ditto for nil value. 1042 | beq ->fff_restv // Ditto for nil value.
990 | evmr CRET1, TMP1 // Return value of mt.__metatable. 1043 | mr CARG3, CARG2 // Return value of mt.__metatable.
1044 | mr CARG1, TMP1
991 | b ->fff_restv 1045 | b ->fff_restv
992 | 1046 |
993 |6: 1047 |6:
994 | cmpwi TMP1, LJ_TUDATA 1048 | cmpwi CARG3, LJ_TUDATA; beq <1
995 | not TMP1, TMP1 1049 | subfc TMP0, TISNUM, CARG3
996 | beq <1 1050 | subfe TMP2, CARG3, CARG3
997 | checknum CARG1 1051 | orc TMP1, TMP2, TMP0
998 | slwi TMP1, TMP1, 2 1052 | addi TMP1, TMP1, ~LJ_TISNUM+1
999 | li TMP2, 4*~LJ_TNUMX 1053 | slwi TMP1, TMP1, 2
1000 | isellt TMP1, TMP2, TMP1
1001 | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH) 1054 | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH)
1002 | lwzx TAB:RB, TMP2, TMP1 1055 | lwzx TAB:CARG1, TMP2, TMP1
1003 | b <2 1056 | b <2
1004 | 1057 |
1005 |.ffunc_2 setmetatable 1058 |.ffunc_2 setmetatable
1006 | // Fast path: no mt for table yet and not clearing the mt. 1059 | // Fast path: no mt for table yet and not clearing the mt.
1007 | evmergehi TMP0, TAB:CARG1, TAB:CARG2 1060 | checktab CARG3; bne ->fff_fallback
1008 | checktab TMP0
1009 | checkanyfail ->fff_fallback
1010 | lwz TAB:TMP1, TAB:CARG1->metatable 1061 | lwz TAB:TMP1, TAB:CARG1->metatable
1062 | checktab CARG4; bne ->fff_fallback
1011 | cmplwi TAB:TMP1, 0 1063 | cmplwi TAB:TMP1, 0
1012 | lbz TMP3, TAB:CARG1->marked 1064 | lbz TMP3, TAB:CARG1->marked
1013 | bne ->fff_fallback 1065 | bne ->fff_fallback
@@ -1018,60 +1070,61 @@ static void build_subroutines(BuildCtx *ctx)
1018 | b ->fff_restv 1070 | b ->fff_restv
1019 | 1071 |
1020 |.ffunc rawget 1072 |.ffunc rawget
1021 | cmplwi NARGS8:RC, 16 1073 | cmplwi NARGS8:RC, 8
1022 | evldd CARG2, 0(BASE) 1074 | lwz CARG4, 0(BASE)
1075 | lwz TAB:CARG2, 4(BASE)
1023 | blt ->fff_fallback 1076 | blt ->fff_fallback
1024 | checktab CARG2 1077 | checktab CARG4; bne ->fff_fallback
1025 | la CARG3, 8(BASE) 1078 | la CARG3, 8(BASE)
1026 | checkfail ->fff_fallback
1027 | mr CARG1, L 1079 | mr CARG1, L
1028 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1080 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1029 | // Returns cTValue *. 1081 | // Returns cTValue *.
1030 | evldd CRET1, 0(CRET1) 1082 | lfd FARG1, 0(CRET1)
1031 | b ->fff_restv 1083 | b ->fff_resn
1032 | 1084 |
1033 |//-- Base library: conversions ------------------------------------------ 1085 |//-- Base library: conversions ------------------------------------------
1034 | 1086 |
1035 |.ffunc tonumber 1087 |.ffunc tonumber
1036 | // Only handles the number case inline (without a base argument). 1088 | // Only handles the number case inline (without a base argument).
1037 | cmplwi NARGS8:RC, 8 1089 | cmplwi NARGS8:RC, 8
1038 | evldd CARG1, 0(BASE) 1090 | lwz CARG1, 0(BASE)
1091 | lfd FARG1, 0(BASE)
1039 | bne ->fff_fallback // Exactly one argument. 1092 | bne ->fff_fallback // Exactly one argument.
1040 | checknum CARG1 1093 | checknum CARG1; bgt ->fff_fallback
1041 | checkok ->fff_restv 1094 | b ->fff_resn
1042 | b ->fff_fallback
1043 | 1095 |
1044 |.ffunc_1 tostring 1096 |.ffunc_1 tostring
1045 | // Only handles the string or number case inline. 1097 | // Only handles the string or number case inline.
1046 | checkstr CARG1 1098 | checkstr CARG3
1047 | // A __tostring method in the string base metatable is ignored. 1099 | // A __tostring method in the string base metatable is ignored.
1048 | checkok ->fff_restv // String key? 1100 | beq ->fff_restv // String key?
1049 | // Handle numbers inline, unless a number base metatable is present. 1101 | // Handle numbers inline, unless a number base metatable is present.
1050 | lwz TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) 1102 | lwz TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
1051 | checknum CARG1 1103 | checknum CARG3
1052 | cmplwi cr1, TMP0, 0 1104 | cmplwi cr1, TMP0, 0
1053 | stw BASE, L->base // Add frame since C call can throw. 1105 | stw BASE, L->base // Add frame since C call can throw.
1054 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 1106 | crorc 4*cr0+eq, 4*cr0+gt, 4*cr1+eq
1055 | stw PC, SAVE_PC // Redundant (but a defined value). 1107 | stw PC, SAVE_PC // Redundant (but a defined value).
1056 | bne ->fff_fallback 1108 | beq ->fff_fallback
1057 | ffgccheck 1109 | ffgccheck
1058 | mr CARG1, L 1110 | mr CARG1, L
1059 | mr CARG2, BASE 1111 | mr CARG2, BASE
1060 | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) 1112 | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np)
1061 | // Returns GCstr *. 1113 | // Returns GCstr *.
1062 | evmergelo STR:CRET1, TISSTR, STR:CRET1 1114 | li CARG3, LJ_TSTR
1063 | b ->fff_restv 1115 | b ->fff_restv
1064 | 1116 |
1065 |//-- Base library: iterators ------------------------------------------- 1117 |//-- Base library: iterators -------------------------------------------
1066 | 1118 |
1067 |.ffunc next 1119 |.ffunc next
1068 | cmplwi NARGS8:RC, 8 1120 | cmplwi NARGS8:RC, 8
1069 | evldd CARG2, 0(BASE) 1121 | lwz CARG1, 0(BASE)
1122 | lwz TAB:CARG2, 4(BASE)
1070 | blt ->fff_fallback 1123 | blt ->fff_fallback
1071 | evstddx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. 1124 | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil.
1072 | checktab TAB:CARG2 1125 | checktab CARG1
1073 | lwz PC, FRAME_PC(BASE) 1126 | lwz PC, FRAME_PC(BASE)
1074 | checkfail ->fff_fallback 1127 | bne ->fff_fallback
1075 | stw BASE, L->base // Add frame since C call can throw. 1128 | stw BASE, L->base // Add frame since C call can throw.
1076 | mr CARG1, L 1129 | mr CARG1, L
1077 | stw BASE, L->top // Dummy frame length is ok. 1130 | stw BASE, L->top // Dummy frame length is ok.
@@ -1080,60 +1133,69 @@ static void build_subroutines(BuildCtx *ctx)
1080 | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) 1133 | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1081 | // Returns 0 at end of traversal. 1134 | // Returns 0 at end of traversal.
1082 | cmplwi CRET1, 0 1135 | cmplwi CRET1, 0
1083 | evmr CRET1, TISNIL 1136 | li CARG3, LJ_TNIL
1084 | beq ->fff_restv // End of traversal: return nil. 1137 | beq ->fff_restv // End of traversal: return nil.
1085 | evldd TMP0, 8(BASE) // Copy key and value to results. 1138 | lfd f0, 8(BASE) // Copy key and value to results.
1086 | la RA, -8(BASE) 1139 | la RA, -8(BASE)
1087 | evldd TMP1, 16(BASE) 1140 | lfd f1, 16(BASE)
1088 | evstdd TMP0, 0(RA) 1141 | stfd f0, 0(RA)
1089 | li RD, (2+1)*8 1142 | li RD, (2+1)*8
1090 | evstdd TMP1, 8(RA) 1143 | stfd f1, 8(RA)
1091 | b ->fff_res 1144 | b ->fff_res
1092 | 1145 |
1093 |.ffunc_1 pairs 1146 |.ffunc_1 pairs
1094 | checktab TAB:CARG1 1147 | checktab CARG3
1095 | lwz PC, FRAME_PC(BASE) 1148 | lwz PC, FRAME_PC(BASE)
1096 | checkfail ->fff_fallback 1149 | bne ->fff_fallback
1097#ifdef LUAJIT_ENABLE_LUA52COMPAT 1150#ifdef LUAJIT_ENABLE_LUA52COMPAT
1098 | lwz TAB:TMP2, TAB:CARG1->metatable 1151 | lwz TAB:TMP2, TAB:CARG1->metatable
1099 | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0] 1152 | lfd f0, CFUNC:RB->upvalue[0]
1100 | cmplwi TAB:TMP2, 0 1153 | cmplwi TAB:TMP2, 0
1101 | la RA, -8(BASE) 1154 | la RA, -8(BASE)
1102 | bne ->fff_fallback 1155 | bne ->fff_fallback
1103#else 1156#else
1104 | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0] 1157 | lfd f0, CFUNC:RB->upvalue[0]
1105 | la RA, -8(BASE) 1158 | la RA, -8(BASE)
1106#endif 1159#endif
1107 | evstdd TISNIL, 8(BASE) 1160 | stw TISNIL, 8(BASE)
1108 | li RD, (3+1)*8 1161 | li RD, (3+1)*8
1109 | evstdd CFUNC:TMP0, 0(RA) 1162 | stfd f0, 0(RA)
1110 | b ->fff_res 1163 | b ->fff_res
1111 | 1164 |
1112 |.ffunc_2 ipairs_aux 1165 |.ffunc ipairs_aux
1113 | checktab TAB:CARG1 1166 | cmplwi NARGS8:RC, 16
1167 | lwz CARG3, 0(BASE)
1168 | lwz TAB:CARG1, 4(BASE)
1169 | lwz CARG4, 8(BASE)
1170 | lfd FARG2, 8(BASE)
1171 | blt ->fff_fallback
1172 | checktab CARG3
1173 | checknum cr1, CARG4
1114 | lwz PC, FRAME_PC(BASE) 1174 | lwz PC, FRAME_PC(BASE)
1115 | checkfail ->fff_fallback 1175 | lus TMP0, 0x3ff0
1116 | checknum CARG2 1176 | stw ZERO, TMPD_LO
1117 | lus TMP3, 0x3ff0 1177 | bne ->fff_fallback
1118 | checkfail ->fff_fallback 1178 | stw TMP0, TMPD_HI
1119 | efdctsi TMP2, CARG2 1179 | bge cr1, ->fff_fallback
1180 | lfd FARG1, TMPD
1181 | toint TMP2, FARG2, f0
1120 | lwz TMP0, TAB:CARG1->asize 1182 | lwz TMP0, TAB:CARG1->asize
1121 | evmergelo TMP3, TMP3, ZERO
1122 | lwz TMP1, TAB:CARG1->array 1183 | lwz TMP1, TAB:CARG1->array
1123 | efdadd CARG2, CARG2, TMP3 1184 | fadd FARG2, FARG2, FARG1
1124 | addi TMP2, TMP2, 1 1185 | addi TMP2, TMP2, 1
1125 | la RA, -8(BASE) 1186 | la RA, -8(BASE)
1126 | cmplw TMP0, TMP2 1187 | cmplw TMP0, TMP2
1127 | slwi TMP3, TMP2, 3 1188 | slwi TMP3, TMP2, 3
1128 | evstdd CARG2, 0(RA) 1189 | stfd FARG2, 0(RA)
1129 | ble >2 // Not in array part? 1190 | ble >2 // Not in array part?
1130 | evlddx TMP1, TMP1, TMP3 1191 | lwzx TMP2, TMP1, TMP3
1192 | lfdx f0, TMP1, TMP3
1131 |1: 1193 |1:
1132 | checknil TMP1 1194 | checknil TMP2
1133 | li RD, (0+1)*8 1195 | li RD, (0+1)*8
1134 | checkok ->fff_res // End of iteration, return 0 results. 1196 | beq ->fff_res // End of iteration, return 0 results.
1135 | li RD, (2+1)*8 1197 | li RD, (2+1)*8
1136 | evstdd TMP1, 8(RA) 1198 | stfd f0, 8(RA)
1137 | b ->fff_res 1199 | b ->fff_res
1138 |2: // Check for empty hash part first. Otherwise call C function. 1200 |2: // Check for empty hash part first. Otherwise call C function.
1139 | lwz TMP0, TAB:CARG1->hmask 1201 | lwz TMP0, TAB:CARG1->hmask
@@ -1146,27 +1208,28 @@ static void build_subroutines(BuildCtx *ctx)
1146 | cmplwi CRET1, 0 1208 | cmplwi CRET1, 0
1147 | li RD, (0+1)*8 1209 | li RD, (0+1)*8
1148 | beq ->fff_res 1210 | beq ->fff_res
1149 | evldd TMP1, 0(CRET1) 1211 | lwz TMP2, 0(CRET1)
1212 | lfd f0, 0(CRET1)
1150 | b <1 1213 | b <1
1151 | 1214 |
1152 |.ffunc_1 ipairs 1215 |.ffunc_1 ipairs
1153 | checktab TAB:CARG1 1216 | checktab CARG3
1154 | lwz PC, FRAME_PC(BASE) 1217 | lwz PC, FRAME_PC(BASE)
1155 | checkfail ->fff_fallback 1218 | bne ->fff_fallback
1156#ifdef LUAJIT_ENABLE_LUA52COMPAT 1219#ifdef LUAJIT_ENABLE_LUA52COMPAT
1157 | lwz TAB:TMP2, TAB:CARG1->metatable 1220 | lwz TAB:TMP2, TAB:CARG1->metatable
1158 | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0] 1221 | lfd f0, CFUNC:RB->upvalue[0]
1159 | cmplwi TAB:TMP2, 0 1222 | cmplwi TAB:TMP2, 0
1160 | la RA, -8(BASE) 1223 | la RA, -8(BASE)
1161 | bne ->fff_fallback 1224 | bne ->fff_fallback
1162#else 1225#else
1163 | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0] 1226 | lfd f0, CFUNC:RB->upvalue[0]
1164 | la RA, -8(BASE) 1227 | la RA, -8(BASE)
1165#endif 1228#endif
1166 | evsplati TMP1, 0 1229 | stw ZERO, 8(BASE)
1230 | stw ZERO, 12(BASE)
1167 | li RD, (3+1)*8 1231 | li RD, (3+1)*8
1168 | evstdd TMP1, 8(BASE) 1232 | stfd f0, 0(RA)
1169 | evstdd CFUNC:TMP0, 0(RA)
1170 | b ->fff_res 1233 | b ->fff_res
1171 | 1234 |
1172 |//-- Base library: catch errors ---------------------------------------- 1235 |//-- Base library: catch errors ----------------------------------------
@@ -1183,18 +1246,23 @@ static void build_subroutines(BuildCtx *ctx)
1183 | addi PC, TMP3, 8+FRAME_PCALL 1246 | addi PC, TMP3, 8+FRAME_PCALL
1184 | b ->vm_call_dispatch 1247 | b ->vm_call_dispatch
1185 | 1248 |
1186 |.ffunc_2 xpcall 1249 |.ffunc xpcall
1187 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) 1250 | cmplwi NARGS8:RC, 16
1251 | lwz CARG4, 8(BASE)
1252 | lfd FARG2, 8(BASE)
1253 | lwz CARG3, 0(BASE)
1254 | lfd FARG1, 0(BASE)
1255 | blt ->fff_fallback
1256 | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1188 | mr TMP2, BASE 1257 | mr TMP2, BASE
1189 | checkfunc CARG2 // Traceback must be a function. 1258 | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function.
1190 | checkfail ->fff_fallback
1191 | la BASE, 16(BASE) 1259 | la BASE, 16(BASE)
1192 | // Remember active hook before pcall. 1260 | // Remember active hook before pcall.
1193 | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31 1261 | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31
1194 | evstdd CARG2, 0(TMP2) // Swap function and traceback. 1262 | stfd FARG2, 0(TMP2) // Swap function and traceback.
1195 | subi NARGS8:RC, NARGS8:RC, 16 1263 | subi NARGS8:RC, NARGS8:RC, 16
1196 | evstdd CARG1, 8(TMP2) 1264 | stfd FARG1, 8(TMP2)
1197 | addi PC, TMP3, 16+FRAME_PCALL 1265 | addi PC, TMP1, 16+FRAME_PCALL
1198 | b ->vm_call_dispatch 1266 | b ->vm_call_dispatch
1199 | 1267 |
1200 |//-- Coroutine library -------------------------------------------------- 1268 |//-- Coroutine library --------------------------------------------------
@@ -1202,15 +1270,11 @@ static void build_subroutines(BuildCtx *ctx)
1202 |.macro coroutine_resume_wrap, resume 1270 |.macro coroutine_resume_wrap, resume
1203 |.if resume 1271 |.if resume
1204 |.ffunc_1 coroutine_resume 1272 |.ffunc_1 coroutine_resume
1205 | evmergehi TMP0, L:CARG1, L:CARG1 1273 | cmpwi CARG3, LJ_TTHREAD; bne ->fff_fallback
1206 |.else 1274 |.else
1207 |.ffunc coroutine_wrap_aux 1275 |.ffunc coroutine_wrap_aux
1208 | lwz L:CARG1, CFUNC:RB->upvalue[0].gcr 1276 | lwz L:CARG1, CFUNC:RB->upvalue[0].gcr
1209 |.endif 1277 |.endif
1210 |.if resume
1211 | cmpwi TMP0, LJ_TTHREAD
1212 | bne ->fff_fallback
1213 |.endif
1214 | lbz TMP0, L:CARG1->status 1278 | lbz TMP0, L:CARG1->status
1215 | lwz TMP1, L:CARG1->cframe 1279 | lwz TMP1, L:CARG1->cframe
1216 | lwz CARG2, L:CARG1->top 1280 | lwz CARG2, L:CARG1->top
@@ -1240,9 +1304,9 @@ static void build_subroutines(BuildCtx *ctx)
1240 | stw BASE, L->top 1304 | stw BASE, L->top
1241 |2: // Move args to coroutine. 1305 |2: // Move args to coroutine.
1242 | cmpw TMP1, NARGS8:RC 1306 | cmpw TMP1, NARGS8:RC
1243 | evlddx TMP0, BASE, TMP1 1307 | lfdx f0, BASE, TMP1
1244 | beq >3 1308 | beq >3
1245 | evstddx TMP0, CARG2, TMP1 1309 | stfdx f0, CARG2, TMP1
1246 | addi TMP1, TMP1, 8 1310 | addi TMP1, TMP1, 8
1247 | b <2 1311 | b <2
1248 |3: 1312 |3:
@@ -1272,8 +1336,8 @@ static void build_subroutines(BuildCtx *ctx)
1272 | stw TMP2, L:SAVE0->top // Clear coroutine stack. 1336 | stw TMP2, L:SAVE0->top // Clear coroutine stack.
1273 |5: // Move results from coroutine. 1337 |5: // Move results from coroutine.
1274 | cmplw TMP1, TMP3 1338 | cmplw TMP1, TMP3
1275 | evlddx TMP0, TMP2, TMP1 1339 | lfdx f0, TMP2, TMP1
1276 | evstddx TMP0, BASE, TMP1 1340 | stfdx f0, BASE, TMP1
1277 | addi TMP1, TMP1, 8 1341 | addi TMP1, TMP1, 8
1278 | bne <5 1342 | bne <5
1279 |6: 1343 |6:
@@ -1298,12 +1362,12 @@ static void build_subroutines(BuildCtx *ctx)
1298 | andi. TMP0, PC, FRAME_TYPE 1362 | andi. TMP0, PC, FRAME_TYPE
1299 | la TMP3, -8(TMP3) 1363 | la TMP3, -8(TMP3)
1300 | li TMP1, LJ_TFALSE 1364 | li TMP1, LJ_TFALSE
1301 | evldd TMP0, 0(TMP3) 1365 | lfd f0, 0(TMP3)
1302 | stw TMP3, L:SAVE0->top // Remove error from coroutine stack. 1366 | stw TMP3, L:SAVE0->top // Remove error from coroutine stack.
1303 | li RD, (2+1)*8 1367 | li RD, (2+1)*8
1304 | stw TMP1, -8(BASE) // Prepend false to results. 1368 | stw TMP1, -8(BASE) // Prepend false to results.
1305 | la RA, -8(BASE) 1369 | la RA, -8(BASE)
1306 | evstdd TMP0, 0(BASE) // Copy error message. 1370 | stfd f0, 0(BASE) // Copy error message.
1307 | b <7 1371 | b <7
1308 |.else 1372 |.else
1309 | mr CARG1, L 1373 | mr CARG1, L
@@ -1336,15 +1400,17 @@ static void build_subroutines(BuildCtx *ctx)
1336 | 1400 |
1337 |//-- Math library ------------------------------------------------------- 1401 |//-- Math library -------------------------------------------------------
1338 | 1402 |
1339 |.ffunc_n math_abs 1403 |.ffunc_1 math_abs
1340 | efdabs CRET1, CARG1 1404 | checknum CARG3; bge ->fff_fallback
1405 | rlwinm CARG3, CARG3, 0, 1, 31
1341 | // Fallthrough. 1406 | // Fallthrough.
1342 | 1407 |
1343 |->fff_restv: 1408 |->fff_restv:
1344 | // CRET1 = TValue result. 1409 | // CARG3/CARG1 = TValue result.
1345 | lwz PC, FRAME_PC(BASE) 1410 | lwz PC, FRAME_PC(BASE)
1346 | la RA, -8(BASE) 1411 | stw CARG3, -8(BASE)
1347 | evstdd CRET1, 0(RA) 1412 | la RA, -8(BASE)
1413 | stw CARG1, -4(BASE)
1348 |->fff_res1: 1414 |->fff_res1:
1349 | // RA = results, PC = return. 1415 | // RA = results, PC = return.
1350 | li RD, (1+1)*8 1416 | li RD, (1+1)*8
@@ -1367,54 +1433,24 @@ static void build_subroutines(BuildCtx *ctx)
1367 |6: // Fill up results with nil. 1433 |6: // Fill up results with nil.
1368 | subi TMP1, RD, 8 1434 | subi TMP1, RD, 8
1369 | addi RD, RD, 8 1435 | addi RD, RD, 8
1370 | evstddx TISNIL, RA, TMP1 1436 | stwx TISNIL, RA, TMP1
1371 | b <5 1437 | b <5
1372 | 1438 |
1373 |.macro math_extern, func 1439 |.macro math_extern, func
1374 | .ffunc math_ .. func 1440 | .ffunc_n math_ .. func
1375 | cmplwi NARGS8:RC, 8
1376 | evldd CARG2, 0(BASE)
1377 | blt ->fff_fallback
1378 | checknum CARG2
1379 | evmergehi CARG1, CARG2, CARG2
1380 | checkfail ->fff_fallback
1381 | bl extern func 1441 | bl extern func
1382 | evmergelo CRET1, CRET1, CRET2 1442 | b ->fff_resn
1383 | b ->fff_restv
1384 |.endmacro 1443 |.endmacro
1385 | 1444 |
1386 |.macro math_extern2, func 1445 |.macro math_extern2, func
1387 | .ffunc math_ .. func 1446 | .ffunc_nn math_ .. func
1388 | cmplwi NARGS8:RC, 16
1389 | evldd CARG2, 0(BASE)
1390 | evldd CARG4, 8(BASE)
1391 | blt ->fff_fallback
1392 | evmergehi CARG1, CARG4, CARG2
1393 | checknum CARG1
1394 | evmergehi CARG3, CARG4, CARG4
1395 | checkanyfail ->fff_fallback
1396 | bl extern func 1447 | bl extern func
1397 | evmergelo CRET1, CRET1, CRET2 1448 | b ->fff_resn
1398 | b ->fff_restv
1399 |.endmacro 1449 |.endmacro
1400 | 1450 |
1401 |.macro math_round, func 1451 | // NYI: Use internal implementation.
1402 | .ffunc math_ .. func 1452 | math_extern floor
1403 | cmplwi NARGS8:RC, 8 1453 | math_extern ceil
1404 | evldd CARG2, 0(BASE)
1405 | blt ->fff_fallback
1406 | checknum CARG2
1407 | evmergehi CARG1, CARG2, CARG2
1408 | checkfail ->fff_fallback
1409 | lwz PC, FRAME_PC(BASE)
1410 | bl ->vm_..func.._hilo;
1411 | la RA, -8(BASE)
1412 | evstdd CRET2, 0(RA)
1413 | b ->fff_res1
1414 |.endmacro
1415 |
1416 | math_round floor
1417 | math_round ceil
1418 | 1454 |
1419 | math_extern sqrt 1455 | math_extern sqrt
1420 | math_extern log 1456 | math_extern log
@@ -1435,119 +1471,98 @@ static void build_subroutines(BuildCtx *ctx)
1435 | 1471 |
1436 |->ff_math_deg: 1472 |->ff_math_deg:
1437 |.ffunc_n math_rad 1473 |.ffunc_n math_rad
1438 | evldd CARG2, CFUNC:RB->upvalue[0] 1474 | lfd FARG2, CFUNC:RB->upvalue[0]
1439 | efdmul CRET1, CARG1, CARG2 1475 | fmul FARG1, FARG1, FARG2
1440 | b ->fff_restv 1476 | b ->fff_resn
1441 | 1477 |
1442 |.ffunc math_ldexp 1478 |.ffunc_nn math_ldexp
1443 | cmplwi NARGS8:RC, 16 1479 | toint CARG1, FARG2
1444 | evldd CARG2, 0(BASE)
1445 | evldd CARG4, 8(BASE)
1446 | blt ->fff_fallback
1447 | evmergehi CARG1, CARG4, CARG2
1448 | checknum CARG1
1449 | checkanyfail ->fff_fallback
1450 | efdctsi CARG3, CARG4
1451 | bl extern ldexp 1480 | bl extern ldexp
1452 | evmergelo CRET1, CRET1, CRET2 1481 | b ->fff_resn
1453 | b ->fff_restv
1454 | 1482 |
1455 |.ffunc math_frexp 1483 |.ffunc_n math_frexp
1456 | cmplwi NARGS8:RC, 8 1484 | la CARG1, DISPATCH_GL(tmptv)(DISPATCH)
1457 | evldd CARG2, 0(BASE)
1458 | blt ->fff_fallback
1459 | checknum CARG2
1460 | evmergehi CARG1, CARG2, CARG2
1461 | checkfail ->fff_fallback
1462 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
1463 | lwz PC, FRAME_PC(BASE) 1485 | lwz PC, FRAME_PC(BASE)
1464 | bl extern frexp 1486 | bl extern frexp
1465 | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH) 1487 | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH)
1466 | evmergelo CRET1, CRET1, CRET2
1467 | efdcfsi CRET2, TMP1
1468 | la RA, -8(BASE) 1488 | la RA, -8(BASE)
1469 | evstdd CRET1, 0(RA) 1489 | tonum_i FARG2, TMP1
1490 | stfd FARG1, 0(RA)
1470 | li RD, (2+1)*8 1491 | li RD, (2+1)*8
1471 | evstdd CRET2, 8(RA) 1492 | stfd FARG2, 8(RA)
1472 | b ->fff_res 1493 | b ->fff_res
1473 | 1494 |
1474 |.ffunc math_modf 1495 |.ffunc_n math_modf
1475 | cmplwi NARGS8:RC, 8 1496 | la CARG1, -8(BASE)
1476 | evldd CARG2, 0(BASE)
1477 | blt ->fff_fallback
1478 | checknum CARG2
1479 | evmergehi CARG1, CARG2, CARG2
1480 | checkfail ->fff_fallback
1481 | la CARG3, -8(BASE)
1482 | lwz PC, FRAME_PC(BASE) 1497 | lwz PC, FRAME_PC(BASE)
1483 | bl extern modf 1498 | bl extern modf
1484 | evmergelo CRET1, CRET1, CRET2
1485 | la RA, -8(BASE) 1499 | la RA, -8(BASE)
1486 | evstdd CRET1, 0(BASE) 1500 | stfd FARG1, 0(BASE)
1487 | li RD, (2+1)*8 1501 | li RD, (2+1)*8
1488 | b ->fff_res 1502 | b ->fff_res
1489 | 1503 |
1490 |.macro math_minmax, name, cmpop 1504 |.macro math_minmax, name, ismax
1491 | .ffunc_1 name 1505 | .ffunc_n name
1492 | checknum CARG1 1506 | li TMP1, 8
1493 | li TMP1, 8
1494 | checkfail ->fff_fallback
1495 |1: 1507 |1:
1496 | evlddx CARG2, BASE, TMP1 1508 | lwzx CARG2, BASE, TMP1
1509 | lfdx FARG2, BASE, TMP1
1497 | cmplw cr1, TMP1, NARGS8:RC 1510 | cmplw cr1, TMP1, NARGS8:RC
1498 | checknum CARG2 1511 | checknum CARG2
1499 | bge cr1, ->fff_restv // Ok, since CRET1 = CARG1. 1512 | bge cr1, ->fff_resn
1500 | checkfail ->fff_fallback 1513 | bge ->fff_fallback
1501 | cmpop CARG2, CARG1 1514 | fsub f0, FARG1, FARG2
1502 | addi TMP1, TMP1, 8 1515 | addi TMP1, TMP1, 8
1503 | crmove 4*cr0+lt, 4*cr0+gt 1516 |.if ismax
1504 | evsel CARG1, CARG2, CARG1 1517 | fsel FARG1, f0, FARG1, FARG2
1518 |.else
1519 | fsel FARG1, f0, FARG2, FARG1
1520 |.endif
1505 | b <1 1521 | b <1
1506 |.endmacro 1522 |.endmacro
1507 | 1523 |
1508 | math_minmax math_min, efdtstlt 1524 | math_minmax math_min, 0
1509 | math_minmax math_max, efdtstgt 1525 | math_minmax math_max, 1
1510 | 1526 |
1511 |//-- String library ----------------------------------------------------- 1527 |//-- String library -----------------------------------------------------
1512 | 1528 |
1513 |.ffunc_1 string_len 1529 |.ffunc_1 string_len
1514 | checkstr STR:CARG1 1530 | checkstr CARG3; bne ->fff_fallback
1515 | checkfail ->fff_fallback 1531 | lwz CRET1, STR:CARG1->len
1516 | lwz TMP0, STR:CARG1->len 1532 | b ->fff_resi
1517 | efdcfsi CRET1, TMP0
1518 | b ->fff_restv
1519 | 1533 |
1520 |.ffunc string_byte // Only handle the 1-arg case here. 1534 |.ffunc string_byte // Only handle the 1-arg case here.
1521 | cmplwi NARGS8:RC, 8 1535 | cmplwi NARGS8:RC, 8
1522 | evldd STR:CARG1, 0(BASE) 1536 | lwz CARG3, 0(BASE)
1537 | lwz STR:CARG1, 4(BASE)
1523 | bne ->fff_fallback // Need exactly 1 argument. 1538 | bne ->fff_fallback // Need exactly 1 argument.
1524 | checkstr STR:CARG1 1539 | checkstr CARG3
1525 | la RA, -8(BASE) 1540 | bne ->fff_fallback
1526 | checkfail ->fff_fallback
1527 | lwz TMP0, STR:CARG1->len 1541 | lwz TMP0, STR:CARG1->len
1528 | li RD, (0+1)*8 1542 | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1529 | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end). 1543 | addic TMP3, TMP0, -1 // RD = ((str->len != 0)+1)*8
1530 | li TMP2, (1+1)*8 1544 | subfe RD, TMP3, TMP0
1531 | cmplwi TMP0, 0 1545 | stw TMP1, TONUM_LO // Inlined tonum_u f0, TMP1.
1532 | lwz PC, FRAME_PC(BASE) 1546 | addi RD, RD, 1
1533 | efdcfsi CRET1, TMP1 1547 | lfd f0, TONUM_D
1534 | iseleq RD, RD, TMP2 1548 | la RA, -8(BASE)
1535 | evstdd CRET1, 0(RA) 1549 | lwz PC, FRAME_PC(BASE)
1550 | fsub f0, f0, TOBIT
1551 | slwi RD, RD, 3
1552 | stfd f0, 0(RA)
1536 | b ->fff_res 1553 | b ->fff_res
1537 | 1554 |
1538 |.ffunc string_char // Only handle the 1-arg case here. 1555 |.ffunc string_char // Only handle the 1-arg case here.
1539 | ffgccheck 1556 | ffgccheck
1540 | cmplwi NARGS8:RC, 8 1557 | cmplwi NARGS8:RC, 8
1541 | evldd CARG1, 0(BASE) 1558 | lwz CARG3, 0(BASE)
1559 | lfd FARG1, 0(BASE)
1542 | bne ->fff_fallback // Exactly 1 argument. 1560 | bne ->fff_fallback // Exactly 1 argument.
1543 | checknum CARG1 1561 | checknum CARG3; bge ->fff_fallback
1544 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 1562 | toint TMP0, FARG1
1545 | checkfail ->fff_fallback 1563 | la CARG2, TMPD_BLO
1546 | efdctsiz TMP0, CARG1
1547 | li CARG3, 1 1564 | li CARG3, 1
1548 | cmplwi TMP0, 255 1565 | cmplwi TMP0, 255; bgt ->fff_fallback
1549 | stb TMP0, 0(CARG2)
1550 | bgt ->fff_fallback
1551 |->fff_newstr: 1566 |->fff_newstr:
1552 | mr CARG1, L 1567 | mr CARG1, L
1553 | stw BASE, L->base 1568 | stw BASE, L->base
@@ -1555,67 +1570,71 @@ static void build_subroutines(BuildCtx *ctx)
1555 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 1570 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
1556 | // Returns GCstr *. 1571 | // Returns GCstr *.
1557 | lwz BASE, L->base 1572 | lwz BASE, L->base
1558 | evmergelo STR:CRET1, TISSTR, STR:CRET1 1573 | li CARG3, LJ_TSTR
1559 | b ->fff_restv 1574 | b ->fff_restv
1560 | 1575 |
1561 |.ffunc string_sub 1576 |.ffunc string_sub
1562 | ffgccheck 1577 | ffgccheck
1563 | cmplwi NARGS8:RC, 16 1578 | cmplwi NARGS8:RC, 16
1564 | evldd CARG3, 16(BASE) 1579 | lwz CARG3, 16(BASE)
1565 | evldd STR:CARG1, 0(BASE) 1580 | lfd f0, 16(BASE)
1581 | lwz TMP0, 0(BASE)
1582 | lwz STR:CARG1, 4(BASE)
1566 | blt ->fff_fallback 1583 | blt ->fff_fallback
1567 | evldd CARG2, 8(BASE) 1584 | lwz CARG2, 8(BASE)
1585 | lfd f1, 8(BASE)
1568 | li TMP2, -1 1586 | li TMP2, -1
1569 | beq >1 1587 | beq >1
1570 | checknum CARG3 1588 | checknum CARG3; bge ->fff_fallback
1571 | checkfail ->fff_fallback 1589 | toint TMP2, f0
1572 | efdctsiz TMP2, CARG3
1573 |1: 1590 |1:
1574 | checknum CARG2 1591 | checknum CARG2; bge ->fff_fallback
1575 | checkfail ->fff_fallback 1592 | checkstr TMP0; bne ->fff_fallback
1576 | checkstr STR:CARG1 1593 | toint TMP1, f1
1577 | efdctsiz TMP1, CARG2
1578 | checkfail ->fff_fallback
1579 | lwz TMP0, STR:CARG1->len 1594 | lwz TMP0, STR:CARG1->len
1580 | cmplw TMP0, TMP2 // len < end? (unsigned compare) 1595 | cmplw TMP0, TMP2 // len < end? (unsigned compare)
1581 | add TMP3, TMP2, TMP0 1596 | addi TMP3, TMP2, 1
1582 | blt >5 1597 | blt >5
1583 |2: 1598 |2:
1584 | cmpwi TMP1, 0 // start <= 0? 1599 | cmpwi TMP1, 0 // start <= 0?
1585 | add TMP3, TMP1, TMP0 1600 | add TMP3, TMP1, TMP0
1586 | ble >7 1601 | ble >7
1587 |3: 1602 |3:
1588 | sub. CARG3, TMP2, TMP1 1603 | sub CARG3, TMP2, TMP1
1589 | addi CARG2, STR:CARG1, #STR-1 1604 | addi CARG2, STR:CARG1, #STR-1
1605 | srawi TMP0, CARG3, 31
1590 | addi CARG3, CARG3, 1 1606 | addi CARG3, CARG3, 1
1591 | add CARG2, CARG2, TMP1 1607 | add CARG2, CARG2, TMP1
1592 | isellt CARG3, r0, CARG3 1608 | andc CARG3, CARG3, TMP0
1593 | b ->fff_newstr 1609 | b ->fff_newstr
1594 | 1610 |
1595 |5: // Negative end or overflow. 1611 |5: // Negative end or overflow.
1596 | cmpw TMP0, TMP2 1612 | sub CARG2, TMP0, TMP2
1597 | addi TMP3, TMP3, 1 1613 | srawi CARG2, CARG2, 31
1598 | iselgt TMP2, TMP3, TMP0 // end = end > len ? len : end+len+1 1614 | andc TMP3, TMP3, CARG2 // end = end > len ? len : end+len+1
1615 | add TMP2, TMP0, TMP3
1599 | b <2 1616 | b <2
1600 | 1617 |
1601 |7: // Negative start or underflow. 1618 |7: // Negative start or underflow.
1602 | cmpwi cr1, TMP3, 0 1619 | addic CARG3, TMP1, -1
1603 | iseleq TMP1, r0, TMP3 1620 | subfe CARG3, CARG3, CARG3
1604 | isel TMP1, r0, TMP1, 4*cr1+lt 1621 | srawi CARG2, TMP3, 31 // Note: modifies carry.
1622 | andc TMP3, TMP3, CARG3
1623 | andc TMP1, TMP3, CARG2
1605 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) 1624 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0)
1606 | b <3 1625 | b <3
1607 | 1626 |
1608 |.ffunc string_rep // Only handle the 1-char case inline. 1627 |.ffunc string_rep // Only handle the 1-char case inline.
1609 | ffgccheck 1628 | ffgccheck
1610 | cmplwi NARGS8:RC, 16 1629 | cmplwi NARGS8:RC, 16
1611 | evldd CARG1, 0(BASE) 1630 | lwz CARG3, 0(BASE)
1612 | evldd CARG2, 8(BASE) 1631 | lwz STR:CARG1, 4(BASE)
1632 | lwz CARG4, 8(BASE)
1633 | lfd FARG2, 8(BASE)
1613 | blt ->fff_fallback 1634 | blt ->fff_fallback
1614 | checknum CARG2 1635 | checkstr CARG3; bne ->fff_fallback
1615 | checkfail ->fff_fallback 1636 | checknum CARG4; bge ->fff_fallback
1616 | checkstr STR:CARG1 1637 | toint CARG3, FARG2
1617 | efdctsiz CARG3, CARG2
1618 | checkfail ->fff_fallback
1619 | lwz TMP0, STR:CARG1->len 1638 | lwz TMP0, STR:CARG1->len
1620 | cmpwi CARG3, 0 1639 | cmpwi CARG3, 0
1621 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 1640 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
@@ -1635,18 +1654,19 @@ static void build_subroutines(BuildCtx *ctx)
1635 | bne <1 1654 | bne <1
1636 | b ->fff_newstr 1655 | b ->fff_newstr
1637 |2: // Return empty string. 1656 |2: // Return empty string.
1638 | la STR:CRET1, DISPATCH_GL(strempty)(DISPATCH) 1657 | la STR:CARG1, DISPATCH_GL(strempty)(DISPATCH)
1639 | evmergelo CRET1, TISSTR, STR:CRET1 1658 | li CARG3, LJ_TSTR
1640 | b ->fff_restv 1659 | b ->fff_restv
1641 | 1660 |
1642 |.ffunc string_reverse 1661 |.ffunc string_reverse
1643 | ffgccheck 1662 | ffgccheck
1644 | cmplwi NARGS8:RC, 8 1663 | cmplwi NARGS8:RC, 8
1645 | evldd CARG1, 0(BASE) 1664 | lwz CARG3, 0(BASE)
1665 | lwz STR:CARG1, 4(BASE)
1646 | blt ->fff_fallback 1666 | blt ->fff_fallback
1647 | checkstr STR:CARG1 1667 | checkstr CARG3
1648 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 1668 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1649 | checkfail ->fff_fallback 1669 | bne ->fff_fallback
1650 | lwz CARG3, STR:CARG1->len 1670 | lwz CARG3, STR:CARG1->len
1651 | la CARG1, #STR(STR:CARG1) 1671 | la CARG1, #STR(STR:CARG1)
1652 | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 1672 | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
@@ -1667,11 +1687,12 @@ static void build_subroutines(BuildCtx *ctx)
1667 | .ffunc name 1687 | .ffunc name
1668 | ffgccheck 1688 | ffgccheck
1669 | cmplwi NARGS8:RC, 8 1689 | cmplwi NARGS8:RC, 8
1670 | evldd CARG1, 0(BASE) 1690 | lwz CARG3, 0(BASE)
1691 | lwz STR:CARG1, 4(BASE)
1671 | blt ->fff_fallback 1692 | blt ->fff_fallback
1672 | checkstr STR:CARG1 1693 | checkstr CARG3
1673 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 1694 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1674 | checkfail ->fff_fallback 1695 | bne ->fff_fallback
1675 | lwz CARG3, STR:CARG1->len 1696 | lwz CARG3, STR:CARG1->len
1676 | la CARG1, #STR(STR:CARG1) 1697 | la CARG1, #STR(STR:CARG1)
1677 | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 1698 | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
@@ -1684,8 +1705,10 @@ static void build_subroutines(BuildCtx *ctx)
1684 | bge ->fff_newstr 1705 | bge ->fff_newstr
1685 | subi TMP0, TMP1, lo 1706 | subi TMP0, TMP1, lo
1686 | xori TMP3, TMP1, 0x20 1707 | xori TMP3, TMP1, 0x20
1687 | cmplwi TMP0, 26 1708 | addic TMP0, TMP0, -26
1688 | isellt TMP1, TMP3, TMP1 1709 | subfe TMP3, TMP3, TMP3
1710 | andi. TMP3, TMP3, 0x20
1711 | xor TMP1, TMP1, TMP3
1689 | stbx TMP1, CARG2, TMP2 1712 | stbx TMP1, CARG2, TMP2
1690 | addi TMP2, TMP2, 1 1713 | addi TMP2, TMP2, 1
1691 | b <1 1714 | b <1
@@ -1697,35 +1720,42 @@ static void build_subroutines(BuildCtx *ctx)
1697 |//-- Table library ------------------------------------------------------ 1720 |//-- Table library ------------------------------------------------------
1698 | 1721 |
1699 |.ffunc_1 table_getn 1722 |.ffunc_1 table_getn
1700 | checktab CARG1 1723 | checktab CARG3; bne ->fff_fallback
1701 | checkfail ->fff_fallback
1702 | bl extern lj_tab_len // (GCtab *t) 1724 | bl extern lj_tab_len // (GCtab *t)
1703 | // Returns uint32_t (but less than 2^31). 1725 | // Returns uint32_t (but less than 2^31).
1704 | efdcfsi CRET1, CRET1 1726 | b ->fff_resi
1705 | b ->fff_restv
1706 | 1727 |
1707 |//-- Bit library -------------------------------------------------------- 1728 |//-- Bit library --------------------------------------------------------
1708 | 1729 |
1709 |.macro .ffunc_bit, name 1730 |.macro .ffunc_bit, name
1710 | .ffunc_n bit_..name 1731 | .ffunc_n bit_..name
1711 | efdadd CARG1, CARG1, TOBIT 1732 | fadd FARG1, FARG1, TOBIT
1733 | stfd FARG1, TMPD
1734 | lwz CARG1, TMPD_LO
1712 |.endmacro 1735 |.endmacro
1713 | 1736 |
1714 |.ffunc_bit tobit 1737 |.ffunc_bit tobit
1715 |->fff_resbit: 1738 |->fff_resi:
1716 | efdcfsi CRET1, CARG1 1739 | tonum_i FARG1, CRET1
1717 | b ->fff_restv 1740 |->fff_resn:
1741 | lwz PC, FRAME_PC(BASE)
1742 | la RA, -8(BASE)
1743 | stfd FARG1, -8(BASE)
1744 | b ->fff_res1
1718 | 1745 |
1719 |.macro .ffunc_bit_op, name, ins 1746 |.macro .ffunc_bit_op, name, ins
1720 | .ffunc_bit name 1747 | .ffunc_bit name
1721 | li TMP1, 8 1748 | li TMP1, 8
1722 |1: 1749 |1:
1723 | evlddx CARG2, BASE, TMP1 1750 | lwzx CARG4, BASE, TMP1
1724 | cmplw cr1, TMP1, NARGS8:RC 1751 | cmplw cr1, TMP1, NARGS8:RC
1725 | checknum CARG2 1752 | lfdx FARG1, BASE, TMP1
1726 | bge cr1, ->fff_resbit 1753 | checknum CARG4
1727 | checkfail ->fff_fallback 1754 | bge cr1, ->fff_resi
1728 | efdadd CARG2, CARG2, TOBIT 1755 | fadd FARG1, FARG1, TOBIT
1756 | bge ->fff_fallback
1757 | stfd FARG1, TMPD
1758 | lwz CARG2, TMPD_LO
1729 | ins CARG1, CARG1, CARG2 1759 | ins CARG1, CARG1, CARG2
1730 | addi TMP1, TMP1, 8 1760 | addi TMP1, TMP1, 8
1731 | b <1 1761 | b <1
@@ -1739,26 +1769,28 @@ static void build_subroutines(BuildCtx *ctx)
1739 | rotlwi TMP0, CARG1, 8 1769 | rotlwi TMP0, CARG1, 8
1740 | rlwimi TMP0, CARG1, 24, 0, 7 1770 | rlwimi TMP0, CARG1, 24, 0, 7
1741 | rlwimi TMP0, CARG1, 24, 16, 23 1771 | rlwimi TMP0, CARG1, 24, 16, 23
1742 | efdcfsi CRET1, TMP0 1772 | mr CRET1, TMP0
1743 | b ->fff_restv 1773 | b ->fff_resi
1744 | 1774 |
1745 |.ffunc_bit bnot 1775 |.ffunc_bit bnot
1746 | not TMP0, CARG1 1776 | not CRET1, CARG1
1747 | efdcfsi CRET1, TMP0 1777 | b ->fff_resi
1748 | b ->fff_restv
1749 | 1778 |
1750 |.macro .ffunc_bit_sh, name, ins, shmod 1779 |.macro .ffunc_bit_sh, name, ins, shmod
1751 | .ffunc_nn bit_..name 1780 | .ffunc_nn bit_..name
1752 | efdadd CARG2, CARG2, TOBIT 1781 | fadd FARG1, FARG1, TOBIT
1753 | efdadd CARG1, CARG1, TOBIT 1782 | fadd FARG2, FARG2, TOBIT
1783 | stfd FARG1, TMPD
1784 | lwz CARG1, TMPD_LO
1785 | stfd FARG2, TMPD
1786 | lwz CARG2, TMPD_LO
1754 |.if shmod == 1 1787 |.if shmod == 1
1755 | rlwinm CARG2, CARG2, 0, 27, 31 1788 | rlwinm CARG2, CARG2, 0, 27, 31
1756 |.elif shmod == 2 1789 |.elif shmod == 2
1757 | neg CARG2, CARG2 1790 | neg CARG2, CARG2
1758 |.endif 1791 |.endif
1759 | ins TMP0, CARG1, CARG2 1792 | ins CRET1, CARG1, CARG2
1760 | efdcfsi CRET1, TMP0 1793 | b ->fff_resi
1761 | b ->fff_restv
1762 |.endmacro 1794 |.endmacro
1763 | 1795 |
1764 |.ffunc_bit_sh lshift, slw, 1 1796 |.ffunc_bit_sh lshift, slw, 1
@@ -1939,93 +1971,14 @@ static void build_subroutines(BuildCtx *ctx)
1939 |//-- Math helper functions ---------------------------------------------- 1971 |//-- Math helper functions ----------------------------------------------
1940 |//----------------------------------------------------------------------- 1972 |//-----------------------------------------------------------------------
1941 | 1973 |
1942 |// FP value rounding. Called by math.floor/math.ceil fast functions 1974 | // NYI: Use internal implementation.
1943 |// and from JIT code.
1944 |//
1945 |// This can be inlined if the CPU has the frin/friz/frip/frim instructions.
1946 |// The alternative hard-float approaches have a deep dependency chain.
1947 |// The resulting latency is at least 3x-7x the double-precision FP latency
1948 |// (e500v2: 6cy, e600: 5cy, Cell: 10cy) or around 20-70 cycles.
1949 |//
1950 |// The soft-float approach is tedious, but much faster (e500v2: ~11cy/~6cy).
1951 |// However it relies on a fast way to transfer the FP value to GPRs
1952 |// (e500v2: 0cy for lo-word, 1cy for hi-word).
1953 |//
1954 |.macro vm_round, name, mode
1955 | // Used temporaries: TMP0, TMP1, TMP2, TMP3.
1956 |->name.._efd: // Input: CARG2, output: CRET2
1957 | evmergehi CARG1, CARG2, CARG2
1958 |->name.._hilo:
1959 | // Input: CARG1 (hi), CARG2 (hi, lo), output: CRET2
1960 | rlwinm TMP2, CARG1, 12, 21, 31
1961 | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023
1962 | li TMP1, -1
1963 | cmplwi cr1, TMP2, 51 // 0 <= exp <= 51?
1964 | subfic TMP0, TMP2, 52
1965 | bgt cr1, >1
1966 | lus TMP3, 0xfff0
1967 | slw TMP0, TMP1, TMP0 // lomask = -1 << (52-exp)
1968 | sraw TMP1, TMP3, TMP2 // himask = (int32_t)0xfff00000 >> exp
1969 |.if mode == 2 // trunc(x):
1970 | evmergelo TMP0, TMP1, TMP0
1971 | evand CRET2, CARG2, TMP0 // hi &= himask, lo &= lomask
1972 |.else
1973 | andc TMP2, CARG2, TMP0
1974 | andc TMP3, CARG1, TMP1
1975 | or TMP2, TMP2, TMP3 // ztest = (hi&~himask) | (lo&~lomask)
1976 | srawi TMP3, CARG1, 31 // signmask = (int32_t)hi >> 31
1977 |.if mode == 0 // floor(x):
1978 | and. TMP2, TMP2, TMP3 // iszero = ((ztest & signmask) == 0)
1979 |.else // ceil(x):
1980 | andc. TMP2, TMP2, TMP3 // iszero = ((ztest & ~signmask) == 0)
1981 |.endif
1982 | and CARG2, CARG2, TMP0 // lo &= lomask
1983 | and CARG1, CARG1, TMP1 // hi &= himask
1984 | subc TMP0, CARG2, TMP0
1985 | iseleq TMP0, CARG2, TMP0 // lo = iszero ? lo : lo-lomask
1986 | sube TMP1, CARG1, TMP1
1987 | iseleq TMP1, CARG1, TMP1 // hi = iszero ? hi : hi-himask+carry
1988 | evmergelo CRET2, TMP1, TMP0
1989 |.endif
1990 | blr
1991 |1:
1992 | bgtlr // Already done if >=2^52, +-inf or nan.
1993 |.if mode == 2 // trunc(x):
1994 | rlwinm TMP1, CARG1, 0, 0, 0 // hi = sign(x)
1995 | li TMP0, 0
1996 | evmergelo CRET2, TMP1, TMP0
1997 |.else
1998 | rlwinm TMP2, CARG1, 0, 1, 31
1999 | srawi TMP0, CARG1, 31 // signmask = (int32_t)hi >> 31
2000 | or TMP2, TMP2, CARG2 // ztest = abs(hi) | lo
2001 | lus TMP1, 0x3ff0
2002 |.if mode == 0 // floor(x):
2003 | and. TMP2, TMP2, TMP0 // iszero = ((ztest & signmask) == 0)
2004 |.else // ceil(x):
2005 | andc. TMP2, TMP2, TMP0 // iszero = ((ztest & ~signmask) == 0)
2006 |.endif
2007 | li TMP0, 0
2008 | iseleq TMP1, r0, TMP1
2009 | rlwimi CARG1, TMP1, 0, 1, 31 // hi = sign(x) | (iszero ? 0.0 : 1.0)
2010 | evmergelo CRET2, CARG1, TMP0
2011 |.endif
2012 | blr
2013 |.endmacro
2014 |
2015 |->vm_floor: 1975 |->vm_floor:
2016 | mflr CARG3 1976 | b extern floor
2017 | bl ->vm_floor_hilo 1977 |->vm_ceil:
2018 | mtlr CARG3 1978 | b extern ceil
2019 | evmergehi CRET1, CRET2, CRET2 1979 |->vm_trunc:
2020 | blr
2021 |
2022 | vm_round vm_floor, 0
2023 | vm_round vm_ceil, 1
2024#if LJ_HASJIT 1980#if LJ_HASJIT
2025 | vm_round vm_trunc, 2 1981 | b extern trunc
2026#else
2027 |->vm_trunc_efd:
2028 |->vm_trunc_hilo:
2029#endif 1982#endif
2030 | 1983 |
2031 |->vm_powi: 1984 |->vm_powi:
@@ -2042,31 +1995,38 @@ static void build_subroutines(BuildCtx *ctx)
2042 |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -) 1995 |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
2043 |// and basic math functions. ORDER ARITH 1996 |// and basic math functions. ORDER ARITH
2044 |->vm_foldarith: 1997 |->vm_foldarith:
2045 | evmergelo CARG2, CARG1, CARG2 1998 | cmplwi CARG1, 1
2046 | cmplwi CARG5, 1 1999 | beq >1; bgt >2
2047 | evmergelo CARG4, CARG3, CARG4 2000 | fadd FARG1, FARG1, FARG2; blr
2048 | beq >1; bgt >2
2049 | efdadd CRET2, CARG2, CARG4; evmergehi CRET1, CRET2, CRET2; blr
2050 |1: 2001 |1:
2051 | efdsub CRET2, CARG2, CARG4; evmergehi CRET1, CRET2, CRET2; blr 2002 | fsub FARG1, FARG1, FARG2; blr
2052 |2: 2003 |2:
2053 | cmplwi CARG5, 3; beq >1; bgt >2 2004 | cmplwi CARG1, 3; beq >1; bgt >2
2054 | efdmul CRET2, CARG2, CARG4; evmergehi CRET1, CRET2, CRET2; blr 2005 | fmul FARG1, FARG1, FARG2; blr
2055 |1: 2006 |1:
2056 | efddiv CRET2, CARG2, CARG4; evmergehi CRET1, CRET2, CRET2; blr 2007 | fdiv FARG1, FARG1, FARG2; blr
2057 |2: 2008 |2:
2058 | cmplwi CARG5, 5; beq >1; bgt >2 2009 | cmplwi CARG1, 5; beq >1; bgt >2
2059 | evmr CARG3, CARG2; efddiv CRET2, CARG2, CARG4; evmr RB, CARG4 2010 | // NYI: Use internal implementation of floor and avoid spills.
2060 | mflr RC; bl ->vm_floor_efd; mtlr RC 2011 | stwu sp, -32(sp); stfd f14, 16(sp); stfd f15, 24(sp)
2061 | efdmul CRET2, CRET2, RB; efdsub CRET2, CARG3, CRET2 2012 | mflr r0
2062 | evmergehi CRET1, CRET2, CRET2; blr 2013 | fmr f14, FARG1
2014 | fdiv FARG1, FARG1, FARG2
2015 | stw r0, 36(sp)
2016 | fmr f15, FARG2
2017 | bl extern floor
2018 | lwz r0, 36(sp)
2019 | fmul FARG1, FARG1, f15
2020 | mtlr r0
2021 | fsub FARG1, f14, FARG1;
2022 | lfd f14, 16(sp); lfd f15, 24(sp); addi sp, sp, 32; blr
2063 |1: 2023 |1:
2064 | b extern pow 2024 | b extern pow
2065 |2: 2025 |2:
2066 | cmplwi CARG5, 7; beq >1; bgt >2 2026 | cmplwi CARG1, 7; beq >1; bgt >2
2067 | xoris CARG1, CARG1, 0x8000; blr 2027 | fneg FARG1, FARG1; blr
2068 |1: 2028 |1:
2069 | rlwinm CARG1, CARG1, 0, 1, 31; blr 2029 | fabs FARG1, FARG1; blr
2070 |2: 2030 |2:
2071 | NYI // Other operations only needed by JIT compiler. 2031 | NYI // Other operations only needed by JIT compiler.
2072 | 2032 |
@@ -2100,71 +2060,85 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2100 2060
2101 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 2061 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2102 | // RA = src1*8, RD = src2*8, JMP with RD = target 2062 | // RA = src1*8, RD = src2*8, JMP with RD = target
2103 | evlddx TMP0, BASE, RA 2063 | lwzx TMP0, BASE, RA
2104 | addi PC, PC, 4 2064 | addi PC, PC, 4
2105 | evlddx TMP1, BASE, RD 2065 | lfdx f0, BASE, RA
2106 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) 2066 | lwzx TMP1, BASE, RD
2067 | checknum cr0, TMP0
2107 | lwz TMP2, -4(PC) 2068 | lwz TMP2, -4(PC)
2108 | evmergehi RB, TMP0, TMP1 2069 | lfdx f1, BASE, RD
2070 | checknum cr1, TMP1
2109 | decode_RD4 TMP2, TMP2 2071 | decode_RD4 TMP2, TMP2
2110 | checknum RB 2072 | bge cr0, ->vmeta_comp
2111 | add TMP2, TMP2, TMP3 2073 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
2112 | checkanyfail ->vmeta_comp 2074 | bge cr1, ->vmeta_comp
2113 | efdcmplt TMP0, TMP1 2075 | fcmpu cr0, f0, f1
2114 if (op == BC_ISLE || op == BC_ISGT) { 2076 if (op == BC_ISLT) {
2115 | efdcmpeq cr1, TMP0, TMP1 2077 | bge >1
2116 | cror 4*cr0+gt, 4*cr0+gt, 4*cr1+gt 2078 } else if (op == BC_ISGE) {
2117 } 2079 | blt >1
2118 if (op == BC_ISLT || op == BC_ISLE) { 2080 } else if (op == BC_ISLE) {
2119 | iselgt PC, TMP2, PC 2081 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq
2082 | bge >1
2120 } else { 2083 } else {
2121 | iselgt PC, PC, TMP2 2084 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq
2085 | blt >1
2122 } 2086 }
2087 | add PC, PC, TMP2
2088 |1:
2123 | ins_next 2089 | ins_next
2124 break; 2090 break;
2125 2091
2126 case BC_ISEQV: case BC_ISNEV: 2092 case BC_ISEQV: case BC_ISNEV:
2127 vk = op == BC_ISEQV; 2093 vk = op == BC_ISEQV;
2128 | // RA = src1*8, RD = src2*8, JMP with RD = target 2094 | // RA = src1*8, RD = src2*8, JMP with RD = target
2129 | evlddx CARG2, BASE, RA 2095 | lwzux TMP0, RA, BASE
2096 | lwz TMP2, 0(PC)
2097 | lfd f0, 0(RA)
2130 | addi PC, PC, 4 2098 | addi PC, PC, 4
2131 | evlddx CARG3, BASE, RD 2099 | lwzux TMP1, RD, BASE
2132 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) 2100 | checknum cr0, TMP0
2133 | lwz TMP2, -4(PC)
2134 | evmergehi RB, CARG2, CARG3
2135 | decode_RD4 TMP2, TMP2 2101 | decode_RD4 TMP2, TMP2
2136 | checknum RB 2102 | lfd f1, 0(RD)
2137 | add TMP2, TMP2, TMP3 2103 | checknum cr1, TMP1
2138 | checkanyfail >5 2104 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
2139 | efdcmpeq CARG2, CARG3 2105 | bge cr0, >5
2106 | bge cr1, >5
2107 | fcmpu cr0, f0, f1
2140 if (vk) { 2108 if (vk) {
2141 | iselgt PC, TMP2, PC 2109 | bne >1
2110 | add PC, PC, TMP2
2142 } else { 2111 } else {
2143 | iselgt PC, PC, TMP2 2112 | beq >1
2113 | add PC, PC, TMP2
2144 } 2114 }
2145 |1: 2115 |1:
2146 | ins_next 2116 | ins_next
2147 | 2117 |
2148 |5: // Either or both types are not numbers. 2118 |5: // Either or both types are not numbers.
2149 | evcmpeq CARG2, CARG3 2119 | lwz CARG2, 4(RA)
2150 | not TMP3, RB 2120 | lwz CARG3, 4(RD)
2121 | not TMP3, TMP0
2122 | cmplw TMP0, TMP1
2151 | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? 2123 | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive?
2152 | crorc 4*cr7+lt, 4*cr0+so, 4*cr0+lt // 1: Same tv or different type.
2153 | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? 2124 | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata?
2154 | crandc 4*cr7+gt, 4*cr0+lt, 4*cr1+gt // 2: Same type and primitive. 2125 | cmplw cr5, CARG2, CARG3
2126 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
2127 | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
2128 | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
2155 | mr SAVE0, PC 2129 | mr SAVE0, PC
2130 | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
2131 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
2156 if (vk) { 2132 if (vk) {
2157 | isel PC, TMP2, PC, 4*cr7+gt 2133 | bne cr0, >6
2158 } else { 2134 | add PC, PC, TMP2
2159 | isel TMP2, PC, TMP2, 4*cr7+gt 2135 |6:
2160 }
2161 | cror 4*cr7+lt, 4*cr7+lt, 4*cr7+gt // 1 or 2.
2162 if (vk) {
2163 | isel PC, TMP2, PC, 4*cr0+so
2164 } else { 2136 } else {
2165 | isel PC, PC, TMP2, 4*cr0+so 2137 | beq cr0, >6
2138 | add PC, PC, TMP2
2139 |6:
2166 } 2140 }
2167 | blt cr7, <1 // Done if 1 or 2. 2141 | blt cr0, <1 // Done if 1 or 2.
2168 | blt cr6, <1 // Done if not tab/ud. 2142 | blt cr6, <1 // Done if not tab/ud.
2169 | 2143 |
2170 | // Different tables or userdatas. Need to check __eq metamethod. 2144 | // Different tables or userdatas. Need to check __eq metamethod.
@@ -2183,52 +2157,57 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2183 case BC_ISEQS: case BC_ISNES: 2157 case BC_ISEQS: case BC_ISNES:
2184 vk = op == BC_ISEQS; 2158 vk = op == BC_ISEQS;
2185 | // RA = src*8, RD = str_const*8 (~), JMP with RD = target 2159 | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
2186 | evlddx TMP0, BASE, RA 2160 | lwzux TMP0, RA, BASE
2187 | srwi RD, RD, 1 2161 | srwi RD, RD, 1
2162 | lwz STR:TMP3, 4(RA)
2188 | lwz INS, 0(PC) 2163 | lwz INS, 0(PC)
2189 | subfic RD, RD, -4 2164 | subfic RD, RD, -4
2190 | addi PC, PC, 4 2165 | addi PC, PC, 4
2191 | lwzx STR:TMP1, KBASE, RD // KBASE-4-str_const*4 2166 | lwzx STR:TMP1, KBASE, RD // KBASE-4-str_const*4
2192 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) 2167 | subfic TMP0, TMP0, LJ_TSTR
2168 | sub TMP1, STR:TMP1, STR:TMP3
2169 | or TMP0, TMP0, TMP1
2193 | decode_RD4 TMP2, INS 2170 | decode_RD4 TMP2, INS
2194 | evmergelo STR:TMP1, TISSTR, STR:TMP1 2171 | subfic TMP0, TMP0, 0
2195 | add TMP2, TMP2, TMP3 2172 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
2196 | evcmpeq TMP0, STR:TMP1 2173 | subfe TMP1, TMP1, TMP1
2197 if (vk) { 2174 if (vk) {
2198 | isel PC, TMP2, PC, 4*cr0+so 2175 | andc TMP2, TMP2, TMP1
2199 } else { 2176 } else {
2200 | isel PC, PC, TMP2, 4*cr0+so 2177 | and TMP2, TMP2, TMP1
2201 } 2178 }
2179 | add PC, PC, TMP2
2202 | ins_next 2180 | ins_next
2203 break; 2181 break;
2204 2182
2205 case BC_ISEQN: case BC_ISNEN: 2183 case BC_ISEQN: case BC_ISNEN:
2206 vk = op == BC_ISEQN; 2184 vk = op == BC_ISEQN;
2207 | // RA = src*8, RD = num_const*8, JMP with RD = target 2185 | // RA = src*8, RD = num_const*8, JMP with RD = target
2208 | evlddx TMP0, BASE, RA 2186 | lwzx TMP0, BASE, RA
2187 | lfdx f0, BASE, RA
2209 | addi PC, PC, 4 2188 | addi PC, PC, 4
2210 | evlddx TMP1, KBASE, RD 2189 | lfdx f1, KBASE, RD
2211 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2212 | lwz INS, -4(PC) 2190 | lwz INS, -4(PC)
2213 | checknum TMP0 2191 | checknum TMP0; bge >5
2214 | checkfail >5 2192 | fcmpu cr0, f0, f1
2215 | efdcmpeq TMP0, TMP1
2216 |1:
2217 | decode_RD4 TMP2, INS 2193 | decode_RD4 TMP2, INS
2218 | add TMP2, TMP2, TMP3 2194 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
2219 if (vk) { 2195 if (vk) {
2220 | iselgt PC, TMP2, PC 2196 | bne >5
2197 | add PC, PC, TMP2
2221 |5: 2198 |5:
2222 } else { 2199 } else {
2223 | iselgt PC, PC, TMP2 2200 | beq >2
2201 |1:
2202 | add PC, PC, TMP2
2203 |2:
2224 } 2204 }
2225 |3:
2226 | ins_next 2205 | ins_next
2227 if (!vk) { 2206 if (!vk) {
2228 |5: 2207 |5:
2229 | decode_RD4 TMP2, INS 2208 | decode_RD4 TMP2, INS
2230 | add PC, TMP2, TMP3 2209 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
2231 | b <3 2210 | b <1
2232 } 2211 }
2233 break; 2212 break;
2234 2213
@@ -2238,17 +2217,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2238 | lwzx TMP0, BASE, RA 2217 | lwzx TMP0, BASE, RA
2239 | srwi TMP1, RD, 3 2218 | srwi TMP1, RD, 3
2240 | lwz INS, 0(PC) 2219 | lwz INS, 0(PC)
2241 | addi PC, PC, 4
2242 | not TMP1, TMP1 2220 | not TMP1, TMP1
2243 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) 2221 | addi PC, PC, 4
2244 | cmplw TMP0, TMP1 2222 | sub TMP0, TMP0, TMP1
2245 | decode_RD4 TMP2, INS 2223 | decode_RD4 TMP2, INS
2246 | add TMP2, TMP2, TMP3 2224 | addic TMP0, TMP0, -1
2225 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
2226 | subfe TMP1, TMP1, TMP1
2247 if (vk) { 2227 if (vk) {
2248 | iseleq PC, TMP2, PC 2228 | and TMP2, TMP2, TMP1
2249 } else { 2229 } else {
2250 | iseleq PC, PC, TMP2 2230 | andc TMP2, TMP2, TMP1
2251 } 2231 }
2232 | add PC, PC, TMP2
2252 | ins_next 2233 | ins_next
2253 break; 2234 break;
2254 2235
@@ -2256,29 +2237,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2256 2237
2257 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: 2238 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
2258 | // RA = dst*8 or unused, RD = src*8, JMP with RD = target 2239 | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
2259 | evlddx TMP0, BASE, RD 2240 | lwzx TMP0, BASE, RD
2260 | evaddw TMP1, TISNIL, TISNIL // Synthesize LJ_TFALSE.
2261 | lwz INS, 0(PC) 2241 | lwz INS, 0(PC)
2262 | evcmpltu TMP0, TMP1
2263 | addi PC, PC, 4 2242 | addi PC, PC, 4
2264 if (op == BC_IST || op == BC_ISF) { 2243 if (op == BC_IST || op == BC_ISF) {
2265 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) 2244 | subfic TMP0, TMP0, LJ_TTRUE
2266 | decode_RD4 TMP2, INS 2245 | decode_RD4 TMP2, INS
2267 | add TMP2, TMP2, TMP3 2246 | subfe TMP1, TMP1, TMP1
2247 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
2268 if (op == BC_IST) { 2248 if (op == BC_IST) {
2269 | isellt PC, TMP2, PC 2249 | andc TMP2, TMP2, TMP1
2270 } else { 2250 } else {
2271 | isellt PC, PC, TMP2 2251 | and TMP2, TMP2, TMP1
2272 } 2252 }
2253 | add PC, PC, TMP2
2273 } else { 2254 } else {
2255 | li TMP1, LJ_TFALSE
2256 | lfdx f0, BASE, RD
2257 | cmplw TMP0, TMP1
2274 if (op == BC_ISTC) { 2258 if (op == BC_ISTC) {
2275 | checkfail >1 2259 | bge >1
2276 } else { 2260 } else {
2277 | checkok >1 2261 | blt >1
2278 } 2262 }
2279 | addis PC, PC, -(BCBIAS_J*4 >> 16) 2263 | addis PC, PC, -(BCBIAS_J*4 >> 16)
2280 | decode_RD4 TMP2, INS 2264 | decode_RD4 TMP2, INS
2281 | evstddx TMP0, BASE, RA 2265 | stfdx f0, BASE, RA
2282 | add PC, PC, TMP2 2266 | add PC, PC, TMP2
2283 |1: 2267 |1:
2284 } 2268 }
@@ -2290,8 +2274,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2290 case BC_MOV: 2274 case BC_MOV:
2291 | // RA = dst*8, RD = src*8 2275 | // RA = dst*8, RD = src*8
2292 | ins_next1 2276 | ins_next1
2293 | evlddx TMP0, BASE, RD 2277 | lfdx f0, BASE, RD
2294 | evstddx TMP0, BASE, RA 2278 | stfdx f0, BASE, RA
2295 | ins_next2 2279 | ins_next2
2296 break; 2280 break;
2297 case BC_NOT: 2281 case BC_NOT:
@@ -2305,28 +2289,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2305 break; 2289 break;
2306 case BC_UNM: 2290 case BC_UNM:
2307 | // RA = dst*8, RD = src*8 2291 | // RA = dst*8, RD = src*8
2308 | evlddx TMP0, BASE, RD 2292 | lwzux TMP1, RD, BASE
2309 | checknum TMP0 2293 | lwz TMP0, 4(RD)
2310 | checkfail ->vmeta_unm 2294 | checknum TMP1; bge ->vmeta_unm
2311 | efdneg TMP0, TMP0 2295 | xoris TMP1, TMP1, 0x8000
2312 | ins_next1 2296 | ins_next1
2313 | evstddx TMP0, BASE, RA 2297 | stwux TMP1, RA, BASE
2298 | stw TMP0, 4(RA)
2314 | ins_next2 2299 | ins_next2
2315 break; 2300 break;
2316 case BC_LEN: 2301 case BC_LEN:
2317 | // RA = dst*8, RD = src*8 2302 | // RA = dst*8, RD = src*8
2318 | evlddx CARG1, BASE, RD 2303 | lwzux TMP0, RD, BASE
2319 | checkstr CARG1 2304 | lwz CARG1, 4(RD)
2320 | checkfail >2 2305 | checkstr TMP0; bne >2
2321 | lwz CRET1, STR:CARG1->len 2306 | lwz CRET1, STR:CARG1->len
2322 |1: 2307 |1:
2308 | tonum_u f0, CRET1 // Result is a non-negative integer.
2323 | ins_next1 2309 | ins_next1
2324 | efdcfsi TMP0, CRET1 2310 | stfdx f0, BASE, RA
2325 | evstddx TMP0, BASE, RA
2326 | ins_next2 2311 | ins_next2
2327 |2: 2312 |2:
2328 | checktab CARG1 2313 | checktab TMP0; bne ->vmeta_len
2329 | checkfail ->vmeta_len
2330#ifdef LUAJIT_ENABLE_LUA52COMPAT 2314#ifdef LUAJIT_ENABLE_LUA52COMPAT
2331 | lwz TAB:TMP2, TAB:CARG1->metatable 2315 | lwz TAB:TMP2, TAB:CARG1->metatable
2332 | cmplwi TAB:TMP2, 0 2316 | cmplwi TAB:TMP2, 0
@@ -2353,72 +2337,77 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2353 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 2337 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2354 ||switch (vk) { 2338 ||switch (vk) {
2355 ||case 0: 2339 ||case 0:
2356 | evlddx t0, BASE, RB 2340 | lwzx CARG1, BASE, RB
2357 | checknum t0 2341 | lfdx t0, BASE, RB
2358 | evlddx t1, KBASE, RC 2342 | lfdx t1, KBASE, RC
2359 | checkfail ->vmeta_arith_vn 2343 | checknum CARG1; bge ->vmeta_arith_vn
2360 || break; 2344 || break;
2361 ||case 1: 2345 ||case 1:
2362 | evlddx t1, BASE, RB 2346 | lwzx CARG1, BASE, RB
2363 | checknum t1 2347 | lfdx t1, BASE, RB
2364 | evlddx t0, KBASE, RC 2348 | lfdx t0, KBASE, RC
2365 | checkfail ->vmeta_arith_nv 2349 | checknum CARG1; bge ->vmeta_arith_nv
2366 || break; 2350 || break;
2367 ||default: 2351 ||default:
2368 | evlddx t0, BASE, RB 2352 | lwzx CARG1, BASE, RB
2369 | evlddx t1, BASE, RC 2353 | lwzx CARG2, BASE, RC
2370 | evmergehi TMP2, t0, t1 2354 | lfdx t0, BASE, RB
2371 | checknum TMP2 2355 | lfdx t1, BASE, RC
2372 | checkanyfail ->vmeta_arith_vv 2356 | checknum cr0, CARG1
2357 | checknum cr1, CARG2
2358 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
2359 | bge ->vmeta_arith_vv
2373 || break; 2360 || break;
2374 ||} 2361 ||}
2375 |.endmacro 2362 |.endmacro
2376 | 2363 |
2377 |.macro ins_arith, ins 2364 |.macro ins_arith, ins
2378 | ins_arithpre TMP0, TMP1 2365 | ins_arithpre f0, f1
2379 | ins_next1 2366 | ins_next1
2380 | ins TMP0, TMP0, TMP1 2367 | ins f0, f0, f1
2381 | evstddx TMP0, BASE, RA 2368 | stfdx f0, BASE, RA
2382 | ins_next2 2369 | ins_next2
2383 |.endmacro 2370 |.endmacro
2384 2371
2385 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 2372 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2386 | ins_arith efdadd 2373 | ins_arith fadd
2387 break; 2374 break;
2388 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 2375 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2389 | ins_arith efdsub 2376 | ins_arith fsub
2390 break; 2377 break;
2391 case BC_MULVN: case BC_MULNV: case BC_MULVV: 2378 case BC_MULVN: case BC_MULNV: case BC_MULVV:
2392 | ins_arith efdmul 2379 | ins_arith fmul
2393 break; 2380 break;
2394 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 2381 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
2395 | ins_arith efddiv 2382 | ins_arith fdiv
2396 break; 2383 break;
2397 case BC_MODVN: 2384 case BC_MODVN:
2398 | ins_arithpre RD, SAVE0 2385 | ins_arithpre f14, f15
2399 |->BC_MODVN_Z: 2386 |->BC_MODVN_Z:
2400 | efddiv CARG2, RD, SAVE0 2387 | fdiv FARG1, f14, f15
2401 | bl ->vm_floor_efd // floor(b/c) 2388 | // NYI: Use internal implementation of floor.
2402 | efdmul TMP0, CRET2, SAVE0 2389 | bl extern floor // floor(b/c)
2390 | fmul f0, FARG1, f15
2403 | ins_next1 2391 | ins_next1
2404 | efdsub TMP0, RD, TMP0 // b - floor(b/c)*c 2392 | fsub f0, f14, f0 // b - floor(b/c)*c
2405 | evstddx TMP0, BASE, RA 2393 | stfdx f0, BASE, RA
2406 | ins_next2 2394 | ins_next2
2407 break; 2395 break;
2408 case BC_MODNV: case BC_MODVV: 2396 case BC_MODNV: case BC_MODVV:
2409 | ins_arithpre RD, SAVE0 2397 | ins_arithpre f14, f15
2410 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 2398 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
2411 break; 2399 break;
2412 case BC_POW: 2400 case BC_POW:
2413 | evlddx CARG2, BASE, RB 2401 | lwzx CARG1, BASE, RB
2414 | evlddx CARG4, BASE, RC 2402 | lfdx FARG1, BASE, RB
2415 | evmergehi CARG1, CARG4, CARG2 2403 | lwzx CARG2, BASE, RC
2416 | checknum CARG1 2404 | lfdx FARG2, BASE, RC
2417 | evmergehi CARG3, CARG4, CARG4 2405 | checknum cr0, CARG1
2418 | checkanyfail ->vmeta_arith_vv 2406 | checknum cr1, CARG2
2407 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
2408 | bge ->vmeta_arith_vv
2419 | bl extern pow 2409 | bl extern pow
2420 | evmergelo CRET2, CRET1, CRET2 2410 | stfdx FARG1, BASE, RA
2421 | evstddx CRET2, BASE, RA
2422 | ins_next 2411 | ins_next
2423 break; 2412 break;
2424 2413
@@ -2437,8 +2426,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2437 | cmplwi CRET1, 0 2426 | cmplwi CRET1, 0
2438 | lwz BASE, L->base 2427 | lwz BASE, L->base
2439 | bne ->vmeta_binop 2428 | bne ->vmeta_binop
2440 | evlddx TMP0, BASE, SAVE0 // Copy result from RB to RA. 2429 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
2441 | evstddx TMP0, BASE, RA 2430 | stfdx f0, BASE, RA
2442 | ins_next 2431 | ins_next
2443 break; 2432 break;
2444 2433
@@ -2446,41 +2435,63 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2446 2435
2447 case BC_KSTR: 2436 case BC_KSTR:
2448 | // RA = dst*8, RD = str_const*8 (~) 2437 | // RA = dst*8, RD = str_const*8 (~)
2449 | ins_next1
2450 | srwi TMP1, RD, 1 2438 | srwi TMP1, RD, 1
2451 | subfic TMP1, TMP1, -4 2439 | subfic TMP1, TMP1, -4
2452 | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4 2440 | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4
2453 | evmergelo TMP0, TISSTR, TMP0 2441 | li TMP2, LJ_TSTR
2454 | evstddx TMP0, BASE, RA 2442 | ins_next1
2443 | stwux TMP2, RA, BASE
2444 | stw TMP0, 4(RA)
2455 | ins_next2 2445 | ins_next2
2456 break; 2446 break;
2457 case BC_KCDATA: 2447 case BC_KCDATA:
2458#if LJ_HASFFI 2448#if LJ_HASFFI
2459 | // RA = dst*8, RD = cdata_const*8 (~) 2449 | // RA = dst*8, RD = cdata_const*8 (~)
2460 | ins_next1
2461 | srwi TMP1, RD, 1 2450 | srwi TMP1, RD, 1
2462 | subfic TMP1, TMP1, -4 2451 | subfic TMP1, TMP1, -4
2463 | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4 2452 | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4
2464 | li TMP2, LJ_TCDATA 2453 | li TMP2, LJ_TCDATA
2465 | evmergelo TMP0, TMP2, TMP0 2454 | ins_next1
2466 | evstddx TMP0, BASE, RA 2455 | stwux TMP2, RA, BASE
2456 | stw TMP0, 4(RA)
2467 | ins_next2 2457 | ins_next2
2468#endif 2458#endif
2469 break; 2459 break;
2470 case BC_KSHORT: 2460 case BC_KSHORT:
2471 | // RA = dst*8, RD = int16_literal*8 2461 | // RA = dst*8, RD = int16_literal*8
2472 | srwi TMP1, RD, 3 2462 | // NYI: which approach is faster?
2473 | extsh TMP1, TMP1 2463 |.if 1
2464 | slwi RD, RD, 13
2465 | srawi RD, RD, 16
2466 | tonum_i f0, RD
2467 | ins_next1
2468 | stfdx f0, BASE, RA
2469 | ins_next2
2470 |.else
2471 | slwi RD, RD, 13
2472 | srawi TMP1, RD, 31
2473 | xor TMP2, TMP1, RD
2474 | sub TMP2, TMP2, TMP1 // TMP2 = abs(x)
2475 | cntlzw TMP3, TMP2
2476 | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1
2477 | slw TMP2, TMP2, TMP3 // TMP2 = left aligned mantissa
2478 | subfic TMP3, RD, 0
2479 | slwi TMP1, TMP1, 20
2480 | rlwimi RD, TMP2, 21, 1, 31 // hi = sign(x) | (mantissa>>11)
2481 | subfe TMP0, TMP0, TMP0
2482 | add RD, RD, TMP1 // hi = hi + exponent-1
2483 | and RD, RD, TMP0 // hi = x == 0 ? 0 : hi
2474 | ins_next1 2484 | ins_next1
2475 | efdcfsi TMP0, TMP1 2485 | stwux RD, RA, BASE
2476 | evstddx TMP0, BASE, RA 2486 | stw ZERO, 4(RA)
2477 | ins_next2 2487 | ins_next2
2488 |.endif
2478 break; 2489 break;
2479 case BC_KNUM: 2490 case BC_KNUM:
2480 | // RA = dst*8, RD = num_const*8 2491 | // RA = dst*8, RD = num_const*8
2481 | evlddx TMP0, KBASE, RD 2492 | lfdx f0, KBASE, RD
2482 | ins_next1 2493 | ins_next1
2483 | evstddx TMP0, BASE, RA 2494 | stfdx f0, BASE, RA
2484 | ins_next2 2495 | ins_next2
2485 break; 2496 break;
2486 case BC_KPRI: 2497 case BC_KPRI:
@@ -2493,10 +2504,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2493 break; 2504 break;
2494 case BC_KNIL: 2505 case BC_KNIL:
2495 | // RA = base*8, RD = end*8 2506 | // RA = base*8, RD = end*8
2496 | evstddx TISNIL, BASE, RA 2507 | stwx TISNIL, BASE, RA
2497 | addi RA, RA, 8 2508 | addi RA, RA, 8
2498 |1: 2509 |1:
2499 | evstddx TISNIL, BASE, RA 2510 | stwx TISNIL, BASE, RA
2500 | cmpw RA, RD 2511 | cmpw RA, RD
2501 | addi RA, RA, 8 2512 | addi RA, RA, 8
2502 | blt <1 2513 | blt <1
@@ -2513,8 +2524,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2513 | addi RD, RD, offsetof(GCfuncL, uvptr) 2524 | addi RD, RD, offsetof(GCfuncL, uvptr)
2514 | lwzx UPVAL:RB, LFUNC:RB, RD 2525 | lwzx UPVAL:RB, LFUNC:RB, RD
2515 | lwz TMP1, UPVAL:RB->v 2526 | lwz TMP1, UPVAL:RB->v
2516 | evldd TMP0, 0(TMP1) 2527 | lfd f0, 0(TMP1)
2517 | evstddx TMP0, BASE, RA 2528 | stfdx f0, BASE, RA
2518 | ins_next2 2529 | ins_next2
2519 break; 2530 break;
2520 case BC_USETV: 2531 case BC_USETV:
@@ -2522,15 +2533,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2522 | lwz LFUNC:RB, FRAME_FUNC(BASE) 2533 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2523 | srwi RA, RA, 1 2534 | srwi RA, RA, 1
2524 | addi RA, RA, offsetof(GCfuncL, uvptr) 2535 | addi RA, RA, offsetof(GCfuncL, uvptr)
2525 | evlddx TMP1, BASE, RD 2536 | lfdux f0, RD, BASE
2526 | lwzx UPVAL:RB, LFUNC:RB, RA 2537 | lwzx UPVAL:RB, LFUNC:RB, RA
2527 | lbz TMP3, UPVAL:RB->marked 2538 | lbz TMP3, UPVAL:RB->marked
2528 | lwz CARG2, UPVAL:RB->v 2539 | lwz CARG2, UPVAL:RB->v
2529 | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 2540 | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
2530 | lbz TMP0, UPVAL:RB->closed 2541 | lbz TMP0, UPVAL:RB->closed
2531 | evmergehi TMP2, TMP1, TMP1 2542 | lwz TMP2, 0(RD)
2532 | evstdd TMP1, 0(CARG2) 2543 | stfd f0, 0(CARG2)
2533 | cmplwi cr1, TMP0, 0 2544 | cmplwi cr1, TMP0, 0
2545 | lwz TMP1, 4(RD)
2534 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 2546 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
2535 | subi TMP2, TMP2, (LJ_TISNUM+1) 2547 | subi TMP2, TMP2, (LJ_TISNUM+1)
2536 | bne >2 // Upvalue is closed and black? 2548 | bne >2 // Upvalue is closed and black?
@@ -2558,13 +2570,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2558 | addi RA, RA, offsetof(GCfuncL, uvptr) 2570 | addi RA, RA, offsetof(GCfuncL, uvptr)
2559 | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4 2571 | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4
2560 | lwzx UPVAL:RB, LFUNC:RB, RA 2572 | lwzx UPVAL:RB, LFUNC:RB, RA
2561 | evmergelo STR:TMP1, TISSTR, STR:TMP1
2562 | lbz TMP3, UPVAL:RB->marked 2573 | lbz TMP3, UPVAL:RB->marked
2563 | lwz CARG2, UPVAL:RB->v 2574 | lwz CARG2, UPVAL:RB->v
2564 | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 2575 | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
2565 | lbz TMP3, STR:TMP1->marked 2576 | lbz TMP3, STR:TMP1->marked
2566 | lbz TMP2, UPVAL:RB->closed 2577 | lbz TMP2, UPVAL:RB->closed
2567 | evstdd STR:TMP1, 0(CARG2) 2578 | li TMP0, LJ_TSTR
2579 | stw STR:TMP1, 4(CARG2)
2580 | stw TMP0, 0(CARG2)
2568 | bne >2 2581 | bne >2
2569 |1: 2582 |1:
2570 | ins_next2 2583 | ins_next2
@@ -2585,10 +2598,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2585 | lwz LFUNC:RB, FRAME_FUNC(BASE) 2598 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2586 | srwi RA, RA, 1 2599 | srwi RA, RA, 1
2587 | addi RA, RA, offsetof(GCfuncL, uvptr) 2600 | addi RA, RA, offsetof(GCfuncL, uvptr)
2588 | evlddx TMP0, KBASE, RD 2601 | lfdx f0, KBASE, RD
2589 | lwzx UPVAL:RB, LFUNC:RB, RA 2602 | lwzx UPVAL:RB, LFUNC:RB, RA
2590 | lwz TMP1, UPVAL:RB->v 2603 | lwz TMP1, UPVAL:RB->v
2591 | evstdd TMP0, 0(TMP1) 2604 | stfd f0, 0(TMP1)
2592 | ins_next2 2605 | ins_next2
2593 break; 2606 break;
2594 case BC_USETP: 2607 case BC_USETP:
@@ -2633,8 +2646,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2633 | bl extern lj_func_newL_gc 2646 | bl extern lj_func_newL_gc
2634 | // Returns GCfuncL *. 2647 | // Returns GCfuncL *.
2635 | lwz BASE, L->base 2648 | lwz BASE, L->base
2636 | evmergelo LFUNC:CRET1, TISFUNC, LFUNC:CRET1 2649 | li TMP0, LJ_TFUNC
2637 | evstddx LFUNC:CRET1, BASE, RA 2650 | stwux TMP0, RA, BASE
2651 | stw LFUNC:CRET1, 4(RA)
2638 | ins_next 2652 | ins_next
2639 break; 2653 break;
2640 2654
@@ -2654,9 +2668,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2654 if (op == BC_TNEW) { 2668 if (op == BC_TNEW) {
2655 | rlwinm CARG2, RD, 29, 21, 31 2669 | rlwinm CARG2, RD, 29, 21, 31
2656 | rlwinm CARG3, RD, 18, 27, 31 2670 | rlwinm CARG3, RD, 18, 27, 31
2657 | cmpwi CARG2, 0x7ff 2671 | cmpwi CARG2, 0x7ff; beq >3
2658 | li TMP1, 0x801 2672 |2:
2659 | iseleq CARG2, TMP1, CARG2
2660 | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) 2673 | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
2661 | // Returns Table *. 2674 | // Returns Table *.
2662 } else { 2675 } else {
@@ -2667,9 +2680,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2667 | // Returns Table *. 2680 | // Returns Table *.
2668 } 2681 }
2669 | lwz BASE, L->base 2682 | lwz BASE, L->base
2670 | evmergelo TAB:CRET1, TISTAB, TAB:CRET1 2683 | li TMP0, LJ_TTAB
2671 | evstddx TAB:CRET1, BASE, RA 2684 | stwux TMP0, RA, BASE
2685 | stw TAB:CRET1, 4(RA)
2672 | ins_next 2686 | ins_next
2687 if (op == BC_TNEW) {
2688 |3:
2689 | li CARG2, 0x801
2690 | b <2
2691 }
2673 |5: 2692 |5:
2674 | mr SAVE0, RD 2693 | mr SAVE0, RD
2675 | bl extern lj_gc_step_fixtop // (lua_State *L) 2694 | bl extern lj_gc_step_fixtop // (lua_State *L)
@@ -2696,28 +2715,34 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2696 2715
2697 case BC_TGETV: 2716 case BC_TGETV:
2698 | // RA = dst*8, RB = table*8, RC = key*8 2717 | // RA = dst*8, RB = table*8, RC = key*8
2699 | evlddx TAB:RB, BASE, RB 2718 | lwzux CARG1, RB, BASE
2700 | evlddx RC, BASE, RC 2719 | lwzux CARG2, RC, BASE
2701 | checktab TAB:RB 2720 | lwz TAB:RB, 4(RB)
2702 | checkfail ->vmeta_tgetv 2721 | lfd f0, 0(RC)
2703 | checknum RC 2722 | checktab CARG1
2704 | checkfail >5 2723 | checknum cr1, CARG2
2705 | // Convert number key to integer 2724 | bne ->vmeta_tgetv
2706 | efdctsi TMP2, RC 2725 | bge cr1, >5
2726 | // Convert number key to integer, check for integerness and range.
2727 | fctiwz f1, f0
2728 | fadd f2, f0, TOBIT
2729 | stfd f1, TMPD
2707 | lwz TMP0, TAB:RB->asize 2730 | lwz TMP0, TAB:RB->asize
2708 | efdcfsi TMP1, TMP2 2731 | fsub f2, f2, TOBIT
2709 | cmplw cr0, TMP0, TMP2 2732 | lwz TMP2, TMPD_LO
2710 | efdcmpeq cr1, RC, TMP1
2711 | lwz TMP1, TAB:RB->array 2733 | lwz TMP1, TAB:RB->array
2712 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+gt 2734 | fcmpu cr1, f0, f2
2735 | cmplw cr0, TMP0, TMP2
2736 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq
2713 | slwi TMP2, TMP2, 3 2737 | slwi TMP2, TMP2, 3
2714 | ble ->vmeta_tgetv // Integer key and in array part? 2738 | ble ->vmeta_tgetv // Integer key and in array part?
2715 | evlddx TMP1, TMP1, TMP2 2739 | lwzx TMP0, TMP1, TMP2
2716 | checknil TMP1 2740 | lfdx f14, TMP1, TMP2
2717 | checkok >2 2741 | checknil TMP0; beq >2
2718 |1: 2742 |1:
2719 | evstddx TMP1, BASE, RA 2743 | ins_next1
2720 | ins_next 2744 | stfdx f14, BASE, RA
2745 | ins_next2
2721 | 2746 |
2722 |2: // Check for __index if table value is nil. 2747 |2: // Check for __index if table value is nil.
2723 | lwz TAB:TMP2, TAB:RB->metatable 2748 | lwz TAB:TMP2, TAB:RB->metatable
@@ -2729,38 +2754,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2729 | b ->vmeta_tgetv 2754 | b ->vmeta_tgetv
2730 | 2755 |
2731 |5: 2756 |5:
2732 | checkstr STR:RC // String key? 2757 | checkstr CARG2; bne ->vmeta_tgetv
2733 | checkok ->BC_TGETS_Z 2758 | lwz STR:RC, 4(RC)
2734 | b ->vmeta_tgetv 2759 | b ->BC_TGETS_Z // String key?
2735 break; 2760 break;
2736 case BC_TGETS: 2761 case BC_TGETS:
2737 | // RA = dst*8, RB = table*8, RC = str_const*8 (~) 2762 | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
2738 | evlddx TAB:RB, BASE, RB 2763 | lwzux CARG1, RB, BASE
2739 | srwi TMP1, RC, 1 2764 | srwi TMP1, RC, 1
2740 | checktab TAB:RB 2765 | lwz TAB:RB, 4(RB)
2741 | subfic TMP1, TMP1, -4 2766 | subfic TMP1, TMP1, -4
2767 | checktab CARG1
2742 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 2768 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
2743 | checkfail ->vmeta_tgets1 2769 | bne ->vmeta_tgets1
2744 |->BC_TGETS_Z: 2770 |->BC_TGETS_Z:
2745 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 2771 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
2746 | lwz TMP0, TAB:RB->hmask 2772 | lwz TMP0, TAB:RB->hmask
2747 | lwz TMP1, STR:RC->hash 2773 | lwz TMP1, STR:RC->hash
2748 | lwz NODE:TMP2, TAB:RB->node 2774 | lwz NODE:TMP2, TAB:RB->node
2749 | evmergelo STR:RC, TISSTR, STR:RC
2750 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 2775 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
2751 | slwi TMP0, TMP1, 5 2776 | slwi TMP0, TMP1, 5
2752 | slwi TMP1, TMP1, 3 2777 | slwi TMP1, TMP1, 3
2753 | sub TMP1, TMP0, TMP1 2778 | sub TMP1, TMP0, TMP1
2754 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 2779 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
2755 |1: 2780 |1:
2756 | evldd TMP0, NODE:TMP2->key 2781 | lwz CARG1, NODE:TMP2->key
2757 | evldd TMP1, NODE:TMP2->val 2782 | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2)
2758 | evcmpeq TMP0, STR:RC 2783 | lwz CARG2, NODE:TMP2->val
2759 | checkanyfail >4 2784 | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2)
2760 | checknil TMP1 2785 | checkstr CARG1; bne >4
2761 | checkok >5 // Key found, but nil value? 2786 | cmpw TMP0, STR:RC; bne >4
2787 | checknil CARG2; beq >5 // Key found, but nil value?
2762 |3: 2788 |3:
2763 | evstddx TMP1, BASE, RA 2789 | stwux CARG2, RA, BASE
2790 | stw TMP1, 4(RA)
2764 | ins_next 2791 | ins_next
2765 | 2792 |
2766 |4: // Follow hash chain. 2793 |4: // Follow hash chain.
@@ -2768,7 +2795,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2768 | cmplwi NODE:TMP2, 0 2795 | cmplwi NODE:TMP2, 0
2769 | bne <1 2796 | bne <1
2770 | // End of hash chain: key not found, nil result. 2797 | // End of hash chain: key not found, nil result.
2771 | evmr TMP1, TISNIL 2798 | li CARG2, LJ_TNIL
2772 | 2799 |
2773 |5: // Check for __index if table value is nil. 2800 |5: // Check for __index if table value is nil.
2774 | lwz TAB:TMP2, TAB:RB->metatable 2801 | lwz TAB:TMP2, TAB:RB->metatable
@@ -2781,20 +2808,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2781 break; 2808 break;
2782 case BC_TGETB: 2809 case BC_TGETB:
2783 | // RA = dst*8, RB = table*8, RC = index*8 2810 | // RA = dst*8, RB = table*8, RC = index*8
2784 | evlddx TAB:RB, BASE, RB 2811 | lwzux CARG1, RB, BASE
2812 | lwz TAB:RB, 4(RB)
2785 | srwi TMP0, RC, 3 2813 | srwi TMP0, RC, 3
2786 | checktab TAB:RB 2814 | checktab CARG1; bne ->vmeta_tgetb
2787 | checkfail ->vmeta_tgetb
2788 | lwz TMP1, TAB:RB->asize 2815 | lwz TMP1, TAB:RB->asize
2789 | lwz TMP2, TAB:RB->array 2816 | lwz TMP2, TAB:RB->array
2790 | cmplw TMP0, TMP1 2817 | cmplw TMP0, TMP1; bge ->vmeta_tgetb
2791 | bge ->vmeta_tgetb 2818 | lwzx TMP1, TMP2, RC
2792 | evlddx TMP1, TMP2, RC 2819 | lfdx f0, TMP2, RC
2793 | checknil TMP1 2820 | checknil TMP1; beq >5
2794 | checkok >5
2795 |1: 2821 |1:
2796 | ins_next1 2822 | ins_next1
2797 | evstddx TMP1, BASE, RA 2823 | stfdx f0, BASE, RA
2798 | ins_next2 2824 | ins_next2
2799 | 2825 |
2800 |5: // Check for __index if table value is nil. 2826 |5: // Check for __index if table value is nil.
@@ -2809,30 +2835,34 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2809 2835
2810 case BC_TSETV: 2836 case BC_TSETV:
2811 | // RA = src*8, RB = table*8, RC = key*8 2837 | // RA = src*8, RB = table*8, RC = key*8
2812 | evlddx TAB:RB, BASE, RB 2838 | lwzux CARG1, RB, BASE
2813 | evlddx RC, BASE, RC 2839 | lwzux CARG2, RC, BASE
2814 | checktab TAB:RB 2840 | lwz TAB:RB, 4(RB)
2815 | checkfail ->vmeta_tsetv 2841 | lfd f0, 0(RC)
2816 | checknum RC 2842 | checktab CARG1
2817 | checkfail >5 2843 | checknum cr1, CARG2
2818 | // Convert number key to integer 2844 | bne ->vmeta_tsetv
2819 | efdctsi TMP2, RC 2845 | bge cr1, >5
2820 | evlddx SAVE0, BASE, RA 2846 | // Convert number key to integer, check for integerness and range.
2847 | fctiwz f1, f0
2848 | fadd f2, f0, TOBIT
2849 | stfd f1, TMPD
2821 | lwz TMP0, TAB:RB->asize 2850 | lwz TMP0, TAB:RB->asize
2822 | efdcfsi TMP1, TMP2 2851 | fsub f2, f2, TOBIT
2823 | cmplw cr0, TMP0, TMP2 2852 | lwz TMP2, TMPD_LO
2824 | efdcmpeq cr1, RC, TMP1
2825 | lwz TMP1, TAB:RB->array 2853 | lwz TMP1, TAB:RB->array
2826 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+gt 2854 | fcmpu cr1, f0, f2
2855 | cmplw cr0, TMP0, TMP2
2856 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq
2827 | slwi TMP0, TMP2, 3 2857 | slwi TMP0, TMP2, 3
2828 | ble ->vmeta_tsetv // Integer key and in array part? 2858 | ble ->vmeta_tsetv // Integer key and in array part?
2829 | lbz TMP3, TAB:RB->marked 2859 | lwzx TMP2, TMP1, TMP0
2830 | evlddx TMP2, TMP1, TMP0 2860 | lbz TMP3, TAB:RB->marked
2831 | checknil TMP2 2861 | lfdx f14, BASE, RA
2832 | checkok >3 2862 | checknil TMP2; beq >3
2833 |1: 2863 |1:
2834 | andi. TMP2, TMP3, LJ_GC_BLACK // isblack(table) 2864 | andi. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
2835 | evstddx SAVE0, TMP1, TMP0 2865 | stfdx f14, TMP1, TMP0
2836 | bne >7 2866 | bne >7
2837 |2: 2867 |2:
2838 | ins_next 2868 | ins_next
@@ -2847,46 +2877,55 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2847 | b ->vmeta_tsetv 2877 | b ->vmeta_tsetv
2848 | 2878 |
2849 |5: 2879 |5:
2850 | checkstr STR:RC // String key? 2880 | checkstr CARG2; bne ->vmeta_tsetv
2851 | checkok ->BC_TSETS_Z 2881 | lwz STR:RC, 4(RC)
2852 | b ->vmeta_tsetv 2882 | b ->BC_TSETS_Z // String key?
2853 | 2883 |
2854 |7: // Possible table write barrier for the value. Skip valiswhite check. 2884 |7: // Possible table write barrier for the value. Skip valiswhite check.
2855 | barrierback TAB:RB, TMP3, TMP0 2885 | barrierback TAB:RB, TMP3, TMP0
2856 | b <2 2886 | b <2
2857 break; 2887 break;
2888 |1:
2889 | checkstr CARG1; bne >4
2890 | cmpw TMP0, STR:RC; bne >4
2891 | checknil CARG2; beq >5 // Key found, but nil value?
2892 |3:
2893 | stwux CARG2, RA, BASE
2894 | stw TMP1, 4(RA)
2895 | ins_next
2858 case BC_TSETS: 2896 case BC_TSETS:
2859 | // RA = src*8, RB = table*8, RC = str_const*8 (~) 2897 | // RA = src*8, RB = table*8, RC = str_const*8 (~)
2860 | evlddx TAB:RB, BASE, RB 2898 | lwzux CARG1, RB, BASE
2861 | srwi TMP1, RC, 1 2899 | srwi TMP1, RC, 1
2862 | checktab TAB:RB 2900 | lwz TAB:RB, 4(RB)
2863 | subfic TMP1, TMP1, -4 2901 | subfic TMP1, TMP1, -4
2902 | checktab CARG1
2864 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 2903 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
2865 | checkfail ->vmeta_tsets1 2904 | bne ->vmeta_tsets1
2866 |->BC_TSETS_Z: 2905 |->BC_TSETS_Z:
2867 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 2906 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8
2868 | lwz TMP0, TAB:RB->hmask 2907 | lwz TMP0, TAB:RB->hmask
2869 | lwz TMP1, STR:RC->hash 2908 | lwz TMP1, STR:RC->hash
2870 | lwz NODE:TMP2, TAB:RB->node 2909 | lwz NODE:TMP2, TAB:RB->node
2871 | evmergelo STR:RC, TISSTR, STR:RC
2872 | stb ZERO, TAB:RB->nomm // Clear metamethod cache. 2910 | stb ZERO, TAB:RB->nomm // Clear metamethod cache.
2873 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 2911 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
2874 | evlddx SAVE0, BASE, RA 2912 | lfdx f14, BASE, RA
2875 | slwi TMP0, TMP1, 5 2913 | slwi TMP0, TMP1, 5
2876 | slwi TMP1, TMP1, 3 2914 | slwi TMP1, TMP1, 3
2877 | sub TMP1, TMP0, TMP1 2915 | sub TMP1, TMP0, TMP1
2878 | lbz TMP3, TAB:RB->marked 2916 | lbz TMP3, TAB:RB->marked
2879 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 2917 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
2880 |1: 2918 |1:
2881 | evldd TMP0, NODE:TMP2->key 2919 | lwz CARG1, NODE:TMP2->key
2882 | evldd TMP1, NODE:TMP2->val 2920 | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2)
2883 | evcmpeq TMP0, STR:RC 2921 | lwz CARG2, NODE:TMP2->val
2884 | checkanyfail >5 2922 | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2)
2885 | checknil TMP1 2923 | checkstr CARG1; bne >5
2886 | checkok >4 // Key found, but nil value? 2924 | cmpw TMP0, STR:RC; bne >5
2925 | checknil CARG2; beq >4 // Key found, but nil value?
2887 |2: 2926 |2:
2888 | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 2927 | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
2889 | evstdd SAVE0, NODE:TMP2->val 2928 | stfd f14, NODE:TMP2->val
2890 | bne >7 2929 | bne >7
2891 |3: 2930 |3:
2892 | ins_next 2931 | ins_next
@@ -2918,12 +2957,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2918 | andi. TMP0, TMP0, 1<<MM_newindex 2957 | andi. TMP0, TMP0, 1<<MM_newindex
2919 | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check. 2958 | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check.
2920 |6: 2959 |6:
2921 | mr CARG2, TAB:RB 2960 | li TMP0, LJ_TSTR
2922 | evstdd STR:RC, 0(CARG3) 2961 | stw STR:RC, 4(CARG3)
2962 | mr CARG2, TAB:RB
2963 | stw TMP0, 0(CARG3)
2923 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) 2964 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
2924 | // Returns TValue *. 2965 | // Returns TValue *.
2925 | lwz BASE, L->base 2966 | lwz BASE, L->base
2926 | evstdd SAVE0, 0(CRET1) 2967 | stfd f14, 0(CRET1)
2927 | b <3 // No 2nd write barrier needed. 2968 | b <3 // No 2nd write barrier needed.
2928 | 2969 |
2929 |7: // Possible table write barrier for the value. Skip valiswhite check. 2970 |7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -2932,22 +2973,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2932 break; 2973 break;
2933 case BC_TSETB: 2974 case BC_TSETB:
2934 | // RA = src*8, RB = table*8, RC = index*8 2975 | // RA = src*8, RB = table*8, RC = index*8
2935 | evlddx TAB:RB, BASE, RB 2976 | lwzux CARG1, RB, BASE
2977 | lwz TAB:RB, 4(RB)
2936 | srwi TMP0, RC, 3 2978 | srwi TMP0, RC, 3
2937 | checktab TAB:RB 2979 | checktab CARG1; bne ->vmeta_tsetb
2938 | checkfail ->vmeta_tsetb
2939 | lwz TMP1, TAB:RB->asize 2980 | lwz TMP1, TAB:RB->asize
2940 | lwz TMP2, TAB:RB->array 2981 | lwz TMP2, TAB:RB->array
2941 | lbz TMP3, TAB:RB->marked 2982 | lbz TMP3, TAB:RB->marked
2942 | cmplw TMP0, TMP1 2983 | cmplw TMP0, TMP1
2943 | evlddx SAVE0, BASE, RA 2984 | lfdx f14, BASE, RA
2944 | bge ->vmeta_tsetb 2985 | bge ->vmeta_tsetb
2945 | evlddx TMP1, TMP2, RC 2986 | lwzx TMP1, TMP2, RC
2946 | checknil TMP1 2987 | checknil TMP1; beq >5
2947 | checkok >5
2948 |1: 2988 |1:
2949 | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 2989 | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
2950 | evstddx SAVE0, TMP2, RC 2990 | stfdx f14, TMP2, RC
2951 | bne >7 2991 | bne >7
2952 |2: 2992 |2:
2953 | ins_next 2993 | ins_next
@@ -2987,10 +3027,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2987 | add TMP1, TMP1, TMP0 3027 | add TMP1, TMP1, TMP0
2988 | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 3028 | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
2989 |3: // Copy result slots to table. 3029 |3: // Copy result slots to table.
2990 | evldd TMP0, 0(RA) 3030 | lfd f0, 0(RA)
2991 | addi RA, RA, 8 3031 | addi RA, RA, 8
2992 | cmpw cr1, RA, TMP2 3032 | cmpw cr1, RA, TMP2
2993 | evstdd TMP0, 0(TMP1) 3033 | stfd f0, 0(TMP1)
2994 | addi TMP1, TMP1, 8 3034 | addi TMP1, TMP1, 8
2995 | blt cr1, <3 3035 | blt cr1, <3
2996 | bne >7 3036 | bne >7
@@ -3021,13 +3061,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3021 break; 3061 break;
3022 case BC_CALL: 3062 case BC_CALL:
3023 | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 3063 | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
3024 | evlddx LFUNC:RB, BASE, RA 3064 | mr TMP2, BASE
3025 | mr TMP2, BASE 3065 | lwzux TMP0, BASE, RA
3026 | add BASE, BASE, RA 3066 | lwz LFUNC:RB, 4(BASE)
3027 | subi NARGS8:RC, NARGS8:RC, 8 3067 | subi NARGS8:RC, NARGS8:RC, 8
3028 | checkfunc LFUNC:RB
3029 | addi BASE, BASE, 8 3068 | addi BASE, BASE, 8
3030 | checkfail ->vmeta_call 3069 | checkfunc TMP0; bne ->vmeta_call
3031 | ins_call 3070 | ins_call
3032 break; 3071 break;
3033 3072
@@ -3038,13 +3077,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3038 break; 3077 break;
3039 case BC_CALLT: 3078 case BC_CALLT:
3040 | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 3079 | // RA = base*8, (RB = 0,) RC = (nargs+1)*8
3041 | evlddx LFUNC:RB, BASE, RA 3080 | lwzux TMP0, RA, BASE
3042 | add RA, BASE, RA 3081 | lwz LFUNC:RB, 4(RA)
3043 | lwz TMP1, FRAME_PC(BASE)
3044 | subi NARGS8:RC, NARGS8:RC, 8 3082 | subi NARGS8:RC, NARGS8:RC, 8
3045 | checkfunc LFUNC:RB 3083 | lwz TMP1, FRAME_PC(BASE)
3084 | checkfunc TMP0
3046 | addi RA, RA, 8 3085 | addi RA, RA, 8
3047 | checkfail ->vmeta_callt 3086 | bne ->vmeta_callt
3048 |->BC_CALLT_Z: 3087 |->BC_CALLT_Z:
3049 | andi. TMP0, TMP1, FRAME_TYPE // Caveat: preserve cr0 until the crand. 3088 | andi. TMP0, TMP1, FRAME_TYPE // Caveat: preserve cr0 until the crand.
3050 | lbz TMP3, LFUNC:RB->ffid 3089 | lbz TMP3, LFUNC:RB->ffid
@@ -3058,9 +3097,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3058 | beq cr1, >3 3097 | beq cr1, >3
3059 |2: 3098 |2:
3060 | addi TMP3, TMP2, 8 3099 | addi TMP3, TMP2, 8
3061 | evlddx TMP0, RA, TMP2 3100 | lfdx f0, RA, TMP2
3062 | cmplw cr1, TMP3, NARGS8:RC 3101 | cmplw cr1, TMP3, NARGS8:RC
3063 | evstddx TMP0, BASE, TMP2 3102 | stfdx f0, BASE, TMP2
3064 | mr TMP2, TMP3 3103 | mr TMP2, TMP3
3065 | bne cr1, <2 3104 | bne cr1, <2
3066 |3: 3105 |3:
@@ -3089,19 +3128,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3089 3128
3090 case BC_ITERC: 3129 case BC_ITERC:
3091 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) 3130 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
3092 | subi RA, RA, 24 // evldd doesn't support neg. offsets. 3131 | mr TMP2, BASE
3093 | mr TMP2, BASE 3132 | add BASE, BASE, RA
3094 | evlddx LFUNC:RB, BASE, RA 3133 | lwz TMP1, -24(BASE)
3095 | add BASE, BASE, RA 3134 | lwz LFUNC:RB, -20(BASE)
3096 | evldd TMP0, 8(BASE) 3135 | lfd f1, -8(BASE)
3097 | evldd TMP1, 16(BASE) 3136 | lfd f0, -16(BASE)
3098 | evstdd LFUNC:RB, 24(BASE) // Copy callable. 3137 | stw TMP1, 0(BASE) // Copy callable.
3099 | checkfunc LFUNC:RB 3138 | stw LFUNC:RB, 4(BASE)
3100 | evstdd TMP0, 32(BASE) // Copy state. 3139 | checkfunc TMP1
3140 | stfd f1, 16(BASE) // Copy control var.
3101 | li NARGS8:RC, 16 // Iterators get 2 arguments. 3141 | li NARGS8:RC, 16 // Iterators get 2 arguments.
3102 | evstdd TMP1, 40(BASE) // Copy control var. 3142 | stfdu f0, 8(BASE) // Copy state.
3103 | addi BASE, BASE, 32 3143 | bne ->vmeta_call
3104 | checkfail ->vmeta_call
3105 | ins_call 3144 | ins_call
3106 break; 3145 break;
3107 3146
@@ -3120,18 +3159,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3120 | cmplw RC, TMP0 3159 | cmplw RC, TMP0
3121 | slwi TMP3, RC, 3 3160 | slwi TMP3, RC, 3
3122 | bge >5 // Index points after array part? 3161 | bge >5 // Index points after array part?
3123 | evlddx TMP2, TMP1, TMP3 3162 | lwzx TMP2, TMP1, TMP3
3163 | lfdx f0, TMP1, TMP3
3124 | checknil TMP2 3164 | checknil TMP2
3125 | lwz INS, -4(PC) 3165 | lwz INS, -4(PC)
3126 | checkok >4 3166 | beq >4
3127 | efdcfsi TMP0, RC 3167 | tonum_u f1, RC
3128 | addi RC, RC, 1 3168 | addi RC, RC, 1
3129 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) 3169 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
3130 | evstdd TMP2, 8(RA) 3170 | stfd f0, 8(RA)
3131 | decode_RD4 TMP1, INS 3171 | decode_RD4 TMP1, INS
3132 | stw RC, -4(RA) // Update control var. 3172 | stw RC, -4(RA) // Update control var.
3133 | add PC, TMP1, TMP3 3173 | add PC, TMP1, TMP3
3134 | evstdd TMP0, 0(RA) 3174 | stfd f1, 0(RA)
3135 |3: 3175 |3:
3136 | ins_next 3176 | ins_next
3137 | 3177 |
@@ -3149,17 +3189,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3149 | bgt <3 3189 | bgt <3
3150 | slwi RB, RC, 3 3190 | slwi RB, RC, 3
3151 | sub TMP3, TMP3, RB 3191 | sub TMP3, TMP3, RB
3152 | evlddx RB, TMP2, TMP3 3192 | lwzx RB, TMP2, TMP3
3193 | lfdx f0, TMP2, TMP3
3153 | add NODE:TMP3, TMP2, TMP3 3194 | add NODE:TMP3, TMP2, TMP3
3154 | checknil RB 3195 | checknil RB
3155 | lwz INS, -4(PC) 3196 | lwz INS, -4(PC)
3156 | checkok >7 3197 | beq >7
3157 | evldd TMP3, NODE:TMP3->key 3198 | lfd f1, NODE:TMP3->key
3158 | addis TMP2, PC, -(BCBIAS_J*4 >> 16) 3199 | addis TMP2, PC, -(BCBIAS_J*4 >> 16)
3159 | evstdd RB, 8(RA) 3200 | stfd f0, 8(RA)
3160 | add RC, RC, TMP0 3201 | add RC, RC, TMP0
3161 | decode_RD4 TMP1, INS 3202 | decode_RD4 TMP1, INS
3162 | evstdd TMP3, 0(RA) 3203 | stfd f1, 0(RA)
3163 | addi RC, RC, 1 3204 | addi RC, RC, 1
3164 | add PC, TMP1, TMP2 3205 | add PC, TMP1, TMP2
3165 | stw RC, -4(RA) // Update control var. 3206 | stw RC, -4(RA) // Update control var.
@@ -3173,11 +3214,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3173 case BC_ISNEXT: 3214 case BC_ISNEXT:
3174 | // RA = base*8, RD = target (points to ITERN) 3215 | // RA = base*8, RD = target (points to ITERN)
3175 | add RA, BASE, RA 3216 | add RA, BASE, RA
3176 | li TMP2, -24 3217 | lwz TMP0, -24(RA)
3177 | evlddx CFUNC:TMP1, RA, TMP2 3218 | lwz CFUNC:TMP1, -20(RA)
3178 | lwz TMP2, -16(RA) 3219 | lwz TMP2, -16(RA)
3179 | lwz TMP3, -8(RA) 3220 | lwz TMP3, -8(RA)
3180 | evmergehi TMP0, CFUNC:TMP1, CFUNC:TMP1
3181 | cmpwi cr0, TMP2, LJ_TTAB 3221 | cmpwi cr0, TMP2, LJ_TTAB
3182 | cmpwi cr1, TMP0, LJ_TFUNC 3222 | cmpwi cr1, TMP0, LJ_TFUNC
3183 | cmpwi cr6, TMP3, LJ_TNIL 3223 | cmpwi cr6, TMP3, LJ_TNIL
@@ -3218,16 +3258,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3218 | subi TMP2, TMP2, 16 3258 | subi TMP2, TMP2, 16
3219 | ble >2 // No vararg slots? 3259 | ble >2 // No vararg slots?
3220 |1: // Copy vararg slots to destination slots. 3260 |1: // Copy vararg slots to destination slots.
3221 | evldd TMP0, 0(RC) 3261 | lfd f0, 0(RC)
3222 | addi RC, RC, 8 3262 | addi RC, RC, 8
3223 | evstdd TMP0, 0(RA) 3263 | stfd f0, 0(RA)
3224 | cmplw RA, TMP2 3264 | cmplw RA, TMP2
3225 | cmplw cr1, RC, TMP3 3265 | cmplw cr1, RC, TMP3
3226 | bge >3 // All destination slots filled? 3266 | bge >3 // All destination slots filled?
3227 | addi RA, RA, 8 3267 | addi RA, RA, 8
3228 | blt cr1, <1 // More vararg slots? 3268 | blt cr1, <1 // More vararg slots?
3229 |2: // Fill up remainder with nil. 3269 |2: // Fill up remainder with nil.
3230 | evstdd TISNIL, 0(RA) 3270 | stw TISNIL, 0(RA)
3231 | cmplw RA, TMP2 3271 | cmplw RA, TMP2
3232 | addi RA, RA, 8 3272 | addi RA, RA, 8
3233 | blt <2 3273 | blt <2
@@ -3243,9 +3283,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3243 | addi MULTRES, TMP1, 8 3283 | addi MULTRES, TMP1, 8
3244 | bgt >7 3284 | bgt >7
3245 |6: 3285 |6:
3246 | evldd TMP0, 0(RC) 3286 | lfd f0, 0(RC)
3247 | addi RC, RC, 8 3287 | addi RC, RC, 8
3248 | evstdd TMP0, 0(RA) 3288 | stfd f0, 0(RA)
3249 | cmplw RC, TMP3 3289 | cmplw RC, TMP3
3250 | addi RA, RA, 8 3290 | addi RA, RA, 8
3251 | blt <6 // More vararg slots? 3291 | blt <6 // More vararg slots?
@@ -3296,14 +3336,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3296 | li TMP1, 0 3336 | li TMP1, 0
3297 |2: 3337 |2:
3298 | addi TMP3, TMP1, 8 3338 | addi TMP3, TMP1, 8
3299 | evlddx TMP0, RA, TMP1 3339 | lfdx f0, RA, TMP1
3300 | cmpw TMP3, RC 3340 | cmpw TMP3, RC
3301 | evstddx TMP0, TMP2, TMP1 3341 | stfdx f0, TMP2, TMP1
3302 | beq >3 3342 | beq >3
3303 | addi TMP1, TMP3, 8 3343 | addi TMP1, TMP3, 8
3304 | evlddx TMP0, RA, TMP3 3344 | lfdx f1, RA, TMP3
3305 | cmpw TMP1, RC 3345 | cmpw TMP1, RC
3306 | evstddx TMP0, TMP2, TMP3 3346 | stfdx f1, TMP2, TMP3
3307 | bne <2 3347 | bne <2
3308 |3: 3348 |3:
3309 |5: 3349 |5:
@@ -3320,7 +3360,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3320 |6: // Fill up results with nil. 3360 |6: // Fill up results with nil.
3321 | subi TMP1, RD, 8 3361 | subi TMP1, RD, 8
3322 | addi RD, RD, 8 3362 | addi RD, RD, 8
3323 | evstddx TISNIL, TMP2, TMP1 3363 | stwx TISNIL, TMP2, TMP1
3324 | b <5 3364 | b <5
3325 | 3365 |
3326 |->BC_RETV_Z: // Non-standard return case. 3366 |->BC_RETV_Z: // Non-standard return case.
@@ -3345,8 +3385,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3345 | subi TMP2, BASE, 8 3385 | subi TMP2, BASE, 8
3346 | decode_RB8 RB, INS 3386 | decode_RB8 RB, INS
3347 if (op == BC_RET1) { 3387 if (op == BC_RET1) {
3348 | evldd TMP0, 0(RA) 3388 | lfd f0, 0(RA)
3349 | evstdd TMP0, 0(TMP2) 3389 | stfd f0, 0(TMP2)
3350 } 3390 }
3351 |5: 3391 |5:
3352 | cmplw RB, RD 3392 | cmplw RB, RD
@@ -3362,7 +3402,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3362 |6: // Fill up results with nil. 3402 |6: // Fill up results with nil.
3363 | subi TMP1, RD, 8 3403 | subi TMP1, RD, 8
3364 | addi RD, RD, 8 3404 | addi RD, RD, 8
3365 | evstddx TISNIL, TMP2, TMP1 3405 | stwx TISNIL, TMP2, TMP1
3366 | b <5 3406 | b <5
3367 break; 3407 break;
3368 3408
@@ -3384,47 +3424,62 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3384 case BC_IFORL: 3424 case BC_IFORL:
3385 | // RA = base*8, RD = target (after end of loop or start of loop) 3425 | // RA = base*8, RD = target (after end of loop or start of loop)
3386 vk = (op == BC_IFORL || op == BC_JFORL); 3426 vk = (op == BC_IFORL || op == BC_JFORL);
3387 | add RA, BASE, RA
3388 | evldd TMP1, FORL_IDX*8(RA)
3389 | evldd TMP3, FORL_STEP*8(RA)
3390 | evldd TMP2, FORL_STOP*8(RA)
3391 if (!vk) {
3392 | evcmpgtu cr0, TMP1, TISNUM
3393 | evcmpgtu cr7, TMP3, TISNUM
3394 | evcmpgtu cr1, TMP2, TISNUM
3395 | cror 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
3396 | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3397 | blt ->vmeta_for
3398 }
3399 if (vk) { 3427 if (vk) {
3400 | efdadd TMP1, TMP1, TMP3 3428 | lfdux f1, RA, BASE
3401 | evstdd TMP1, FORL_IDX*8(RA) 3429 | lfd f3, FORL_STEP*8(RA)
3430 | lfd f2, FORL_STOP*8(RA)
3431 | lwz TMP3, FORL_STEP*8(RA)
3432 | fadd f1, f1, f3
3433 | stfd f1, FORL_IDX*8(RA)
3434 } else {
3435 | lwzux TMP1, RA, BASE
3436 | lfd f1, FORL_IDX*8(RA)
3437 | lwz TMP3, FORL_STEP*8(RA)
3438 | lfd f3, FORL_STEP*8(RA)
3439 | lwz TMP2, FORL_STOP*8(RA)
3440 | lfd f2, FORL_STOP*8(RA)
3441 | cmplw cr0, TMP1, TISNUM
3442 | cmplw cr7, TMP3, TISNUM
3443 | cmplw cr1, TMP2, TISNUM
3444 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
3445 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3446 | bge ->vmeta_for
3402 } 3447 }
3403 | evcmpgts TMP3, TISNIL 3448 | cmpwi cr3, TMP3, 0
3404 | evstdd TMP1, FORL_EXT*8(RA)
3405 | bge >2
3406 | efdcmpgt TMP1, TMP2
3407 |1:
3408 if (op != BC_JFORL) { 3449 if (op != BC_JFORL) {
3409 | srwi RD, RD, 1 3450 | srwi RD, RD, 1
3451 }
3452 | stfd f1, FORL_EXT*8(RA)
3453 if (op != BC_JFORL) {
3410 | add RD, PC, RD 3454 | add RD, PC, RD
3411 if (op == BC_JFORI) {
3412 | addis PC, RD, -(BCBIAS_J*4 >> 16)
3413 } else {
3414 | addis RD, RD, -(BCBIAS_J*4 >> 16)
3415 }
3416 } 3455 }
3456 | fcmpu cr0, f1, f2
3457 if (op == BC_JFORI) {
3458 | addis PC, RD, -(BCBIAS_J*4 >> 16)
3459 }
3460 | blt cr3, >5
3417 if (op == BC_FORI) { 3461 if (op == BC_FORI) {
3418 | iselgt PC, RD, PC 3462 | bgt >3
3419 } else if (op == BC_IFORL) { 3463 } else if (op == BC_IFORL) {
3420 | iselgt PC, PC, RD 3464 | bgt >2
3465 |1:
3466 | addis PC, RD, -(BCBIAS_J*4 >> 16)
3421 } else { 3467 } else {
3422 | ble =>BC_JLOOP 3468 | ble =>BC_JLOOP
3423 } 3469 }
3424 | ins_next
3425 |2: 3470 |2:
3426 | efdcmpgt TMP2, TMP1 3471 | ins_next
3427 | b <1 3472 |5: // Negative step.
3473 if (op == BC_FORI) {
3474 | bge <2
3475 |3:
3476 | addis PC, RD, -(BCBIAS_J*4 >> 16)
3477 } else if (op == BC_IFORL) {
3478 | bge <1
3479 } else {
3480 | bge =>BC_JLOOP
3481 }
3482 | b <2
3428 break; 3483 break;
3429 3484
3430 case BC_ITERL: 3485 case BC_ITERL:
@@ -3440,15 +3495,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3440#endif 3495#endif
3441 case BC_IITERL: 3496 case BC_IITERL:
3442 | // RA = base*8, RD = target 3497 | // RA = base*8, RD = target
3443 | evlddx TMP1, BASE, RA 3498 | lwzux TMP1, RA, BASE
3444 | subi RA, RA, 8 3499 | lwz TMP2, 4(RA)
3445 | checknil TMP1 3500 | checknil TMP1; beq >1 // Stop if iterator returned nil.
3446 | checkok >1 // Stop if iterator returned nil.
3447 if (op == BC_JITERL) { 3501 if (op == BC_JITERL) {
3448 | NYI 3502 | NYI
3449 } else { 3503 } else {
3450 | branch_RD // Otherwise save control var + branch. 3504 | branch_RD // Otherwise save control var + branch.
3451 | evstddx TMP1, BASE, RA 3505 | stw TMP1, -8(RA)
3506 | stw TMP2, -4(RA)
3452 } 3507 }
3453 |1: 3508 |1:
3454 | ins_next 3509 | ins_next
@@ -3514,7 +3569,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3514 } 3569 }
3515 | 3570 |
3516 |3: // Clear missing parameters. 3571 |3: // Clear missing parameters.
3517 | evstddx TISNIL, BASE, NARGS8:RC 3572 | stwx TISNIL, BASE, NARGS8:RC
3518 | addi NARGS8:RC, NARGS8:RC, 8 3573 | addi NARGS8:RC, NARGS8:RC, 8
3519 | b <2 3574 | b <2
3520 break; 3575 break;
@@ -3546,20 +3601,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3546 | beq >3 3601 | beq >3
3547 |1: 3602 |1:
3548 | cmplw RA, RC // Less args than parameters? 3603 | cmplw RA, RC // Less args than parameters?
3549 | evldd TMP0, 0(RA) 3604 | lwz TMP0, 0(RA)
3605 | lwz TMP3, 4(RA)
3550 | bge >4 3606 | bge >4
3551 | evstdd TISNIL, 0(RA) // Clear old fixarg slot (help the GC). 3607 | stw TISNIL, 0(RA) // Clear old fixarg slot (help the GC).
3552 | addi RA, RA, 8 3608 | addi RA, RA, 8
3553 |2: 3609 |2:
3554 | addic. TMP2, TMP2, -1 3610 | addic. TMP2, TMP2, -1
3555 | evstdd TMP0, 8(TMP1) 3611 | stw TMP0, 8(TMP1)
3612 | stw TMP3, 12(TMP1)
3556 | addi TMP1, TMP1, 8 3613 | addi TMP1, TMP1, 8
3557 | bne <1 3614 | bne <1
3558 |3: 3615 |3:
3559 | ins_next2 3616 | ins_next2
3560 | 3617 |
3561 |4: // Clear missing parameters. 3618 |4: // Clear missing parameters.
3562 | evmr TMP0, TISNIL 3619 | li TMP0, LJ_TNIL
3563 | b <2 3620 | b <2
3564 break; 3621 break;
3565 3622