aboutsummaryrefslogtreecommitdiff
path: root/src/vm_ppc.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm_ppc.dasc')
-rw-r--r--src/vm_ppc.dasc1620
1 files changed, 1256 insertions, 364 deletions
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index 6b973d4e..4299e266 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -1,4 +1,4 @@
1|// Low-level VM code for PowerPC CPUs. 1|// Low-level VM code for PowerPC 32 bit or 32on64 bit mode.
2|// Bytecode interpreter, fast functions and helper functions. 2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h 3|// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4| 4|
@@ -18,7 +18,6 @@
18|// DynASM defines used by the PPC port: 18|// DynASM defines used by the PPC port:
19|// 19|//
20|// P64 64 bit pointers (only for GPR64 testing). 20|// P64 64 bit pointers (only for GPR64 testing).
21|// Note: a full PPC64 _LP64 port is not planned.
22|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). 21|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3).
23|// Affects reg saves, stack layout, carry/overflow/dot flags etc. 22|// Affects reg saves, stack layout, carry/overflow/dot flags etc.
24|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). 23|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360).
@@ -103,6 +102,18 @@
103|// Fixed register assignments for the interpreter. 102|// Fixed register assignments for the interpreter.
104|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) 103|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA)
105| 104|
105|.macro .FPU, a, b
106|.if FPU
107| a, b
108|.endif
109|.endmacro
110|
111|.macro .FPU, a, b, c
112|.if FPU
113| a, b, c
114|.endif
115|.endmacro
116|
106|// The following must be C callee-save (but BASE is often refetched). 117|// The following must be C callee-save (but BASE is often refetched).
107|.define BASE, r14 // Base of current Lua stack frame. 118|.define BASE, r14 // Base of current Lua stack frame.
108|.define KBASE, r15 // Constants of current Lua function. 119|.define KBASE, r15 // Constants of current Lua function.
@@ -116,8 +127,10 @@
116|.define TISNUM, r22 127|.define TISNUM, r22
117|.define TISNIL, r23 128|.define TISNIL, r23
118|.define ZERO, r24 129|.define ZERO, r24
130|.if FPU
119|.define TOBIT, f30 // 2^52 + 2^51. 131|.define TOBIT, f30 // 2^52 + 2^51.
120|.define TONUM, f31 // 2^52 + 2^51 + 2^31. 132|.define TONUM, f31 // 2^52 + 2^51 + 2^31.
133|.endif
121| 134|
122|// The following temporaries are not saved across C calls, except for RA. 135|// The following temporaries are not saved across C calls, except for RA.
123|.define RA, r20 // Callee-save. 136|.define RA, r20 // Callee-save.
@@ -133,6 +146,7 @@
133| 146|
134|// Saved temporaries. 147|// Saved temporaries.
135|.define SAVE0, r21 148|.define SAVE0, r21
149|.define SAVE1, r25
136| 150|
137|// Calling conventions. 151|// Calling conventions.
138|.define CARG1, r3 152|.define CARG1, r3
@@ -141,8 +155,10 @@
141|.define CARG4, r6 // Overlaps TMP3. 155|.define CARG4, r6 // Overlaps TMP3.
142|.define CARG5, r7 // Overlaps INS. 156|.define CARG5, r7 // Overlaps INS.
143| 157|
158|.if FPU
144|.define FARG1, f1 159|.define FARG1, f1
145|.define FARG2, f2 160|.define FARG2, f2
161|.endif
146| 162|
147|.define CRET1, r3 163|.define CRET1, r3
148|.define CRET2, r4 164|.define CRET2, r4
@@ -213,10 +229,16 @@
213|.endif 229|.endif
214|.else 230|.else
215| 231|
232|.if FPU
216|.define SAVE_LR, 276(sp) 233|.define SAVE_LR, 276(sp)
217|.define CFRAME_SPACE, 272 // Delta for sp. 234|.define CFRAME_SPACE, 272 // Delta for sp.
218|// Back chain for sp: 272(sp) <-- sp entering interpreter 235|// Back chain for sp: 272(sp) <-- sp entering interpreter
219|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. 236|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves.
237|.else
238|.define SAVE_LR, 132(sp)
239|.define CFRAME_SPACE, 128 // Delta for sp.
240|// Back chain for sp: 128(sp) <-- sp entering interpreter
241|.endif
220|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. 242|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves.
221|.define SAVE_CR, 52(sp) // 32 bit CR save. 243|.define SAVE_CR, 52(sp) // 32 bit CR save.
222|.define SAVE_ERRF, 48(sp) // 32 bit C frame info. 244|.define SAVE_ERRF, 48(sp) // 32 bit C frame info.
@@ -226,16 +248,25 @@
226|.define SAVE_PC, 32(sp) 248|.define SAVE_PC, 32(sp)
227|.define SAVE_MULTRES, 28(sp) 249|.define SAVE_MULTRES, 28(sp)
228|.define UNUSED1, 24(sp) 250|.define UNUSED1, 24(sp)
251|.if FPU
229|.define TMPD_LO, 20(sp) 252|.define TMPD_LO, 20(sp)
230|.define TMPD_HI, 16(sp) 253|.define TMPD_HI, 16(sp)
231|.define TONUM_LO, 12(sp) 254|.define TONUM_LO, 12(sp)
232|.define TONUM_HI, 8(sp) 255|.define TONUM_HI, 8(sp)
256|.else
257|.define SFSAVE_4, 20(sp)
258|.define SFSAVE_3, 16(sp)
259|.define SFSAVE_2, 12(sp)
260|.define SFSAVE_1, 8(sp)
261|.endif
233|// Next frame lr: 4(sp) 262|// Next frame lr: 4(sp)
234|// Back chain for sp: 0(sp) <-- sp while in interpreter 263|// Back chain for sp: 0(sp) <-- sp while in interpreter
235| 264|
265|.if FPU
236|.define TMPD_BLO, 23(sp) 266|.define TMPD_BLO, 23(sp)
237|.define TMPD, TMPD_HI 267|.define TMPD, TMPD_HI
238|.define TONUM_D, TONUM_HI 268|.define TONUM_D, TONUM_HI
269|.endif
239| 270|
240|.endif 271|.endif
241| 272|
@@ -245,7 +276,7 @@
245|.else 276|.else
246| stw r..reg, SAVE_GPR_+(reg-14)*4(sp) 277| stw r..reg, SAVE_GPR_+(reg-14)*4(sp)
247|.endif 278|.endif
248| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 279| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
249|.endmacro 280|.endmacro
250|.macro rest_, reg 281|.macro rest_, reg
251|.if GPR64 282|.if GPR64
@@ -253,7 +284,7 @@
253|.else 284|.else
254| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) 285| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp)
255|.endif 286|.endif
256| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 287| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
257|.endmacro 288|.endmacro
258| 289|
259|.macro saveregs 290|.macro saveregs
@@ -316,19 +347,14 @@
316|.type NODE, Node 347|.type NODE, Node
317|.type NARGS8, int 348|.type NARGS8, int
318|.type TRACE, GCtrace 349|.type TRACE, GCtrace
350|.type SBUF, SBuf
319| 351|
320|//----------------------------------------------------------------------- 352|//-----------------------------------------------------------------------
321| 353|
322|// These basic macros should really be part of DynASM.
323|.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro
324|.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro
325|.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro
326|.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro
327|.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro
328|
329|// Trap for not-yet-implemented parts. 354|// Trap for not-yet-implemented parts.
330|.macro NYI; tw 4, sp, sp; .endmacro 355|.macro NYI; tw 4, sp, sp; .endmacro
331| 356|
357|.if FPU
332|// int/FP conversions. 358|// int/FP conversions.
333|.macro tonum_i, freg, reg 359|.macro tonum_i, freg, reg
334| xoris reg, reg, 0x8000 360| xoris reg, reg, 0x8000
@@ -352,6 +378,7 @@
352|.macro toint, reg, freg 378|.macro toint, reg, freg
353| toint reg, freg, freg 379| toint reg, freg, freg
354|.endmacro 380|.endmacro
381|.endif
355| 382|
356|//----------------------------------------------------------------------- 383|//-----------------------------------------------------------------------
357| 384|
@@ -539,9 +566,19 @@ static void build_subroutines(BuildCtx *ctx)
539 | beq >2 566 | beq >2
540 |1: 567 |1:
541 | addic. TMP1, TMP1, -8 568 | addic. TMP1, TMP1, -8
569 |.if FPU
542 | lfd f0, 0(RA) 570 | lfd f0, 0(RA)
571 |.else
572 | lwz CARG1, 0(RA)
573 | lwz CARG2, 4(RA)
574 |.endif
543 | addi RA, RA, 8 575 | addi RA, RA, 8
576 |.if FPU
544 | stfd f0, 0(BASE) 577 | stfd f0, 0(BASE)
578 |.else
579 | stw CARG1, 0(BASE)
580 | stw CARG2, 4(BASE)
581 |.endif
545 | addi BASE, BASE, 8 582 | addi BASE, BASE, 8
546 | bney <1 583 | bney <1
547 | 584 |
@@ -619,23 +656,23 @@ static void build_subroutines(BuildCtx *ctx)
619 | .toc ld TOCREG, SAVE_TOC 656 | .toc ld TOCREG, SAVE_TOC
620 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 657 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
621 | lp BASE, L->base 658 | lp BASE, L->base
622 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 659 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
623 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 660 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
624 | li ZERO, 0 661 | li ZERO, 0
625 | stw TMP3, TMPD 662 | .FPU stw TMP3, TMPD
626 | li TMP1, LJ_TFALSE 663 | li TMP1, LJ_TFALSE
627 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 664 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
628 | li TISNIL, LJ_TNIL 665 | li TISNIL, LJ_TNIL
629 | li_vmstate INTERP 666 | li_vmstate INTERP
630 | lfs TOBIT, TMPD 667 | .FPU lfs TOBIT, TMPD
631 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. 668 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
632 | la RA, -8(BASE) // Results start at BASE-8. 669 | la RA, -8(BASE) // Results start at BASE-8.
633 | stw TMP3, TMPD 670 | .FPU stw TMP3, TMPD
634 | addi DISPATCH, DISPATCH, GG_G2DISP 671 | addi DISPATCH, DISPATCH, GG_G2DISP
635 | stw TMP1, 0(RA) // Prepend false to error message. 672 | stw TMP1, 0(RA) // Prepend false to error message.
636 | li RD, 16 // 2 results: false + error message. 673 | li RD, 16 // 2 results: false + error message.
637 | st_vmstate 674 | st_vmstate
638 | lfs TONUM, TMPD 675 | .FPU lfs TONUM, TMPD
639 | b ->vm_returnc 676 | b ->vm_returnc
640 | 677 |
641 |//----------------------------------------------------------------------- 678 |//-----------------------------------------------------------------------
@@ -684,33 +721,34 @@ static void build_subroutines(BuildCtx *ctx)
684 | stw CARG3, SAVE_NRES 721 | stw CARG3, SAVE_NRES
685 | cmplwi TMP1, 0 722 | cmplwi TMP1, 0
686 | stw CARG3, SAVE_ERRF 723 | stw CARG3, SAVE_ERRF
687 | stp TMP0, L->cframe
688 | stp CARG3, SAVE_CFRAME 724 | stp CARG3, SAVE_CFRAME
689 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 725 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
726 | stp TMP0, L->cframe
690 | beq >3 727 | beq >3
691 | 728 |
692 | // Resume after yield (like a return). 729 | // Resume after yield (like a return).
730 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
693 | mr RA, BASE 731 | mr RA, BASE
694 | lp BASE, L->base 732 | lp BASE, L->base
695 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 733 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
696 | lp TMP1, L->top 734 | lp TMP1, L->top
697 | lwz PC, FRAME_PC(BASE) 735 | lwz PC, FRAME_PC(BASE)
698 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 736 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
699 | stb CARG3, L->status 737 | stb CARG3, L->status
700 | stw TMP3, TMPD 738 | .FPU stw TMP3, TMPD
701 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 739 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
702 | lfs TOBIT, TMPD 740 | .FPU lfs TOBIT, TMPD
703 | sub RD, TMP1, BASE 741 | sub RD, TMP1, BASE
704 | stw TMP3, TMPD 742 | .FPU stw TMP3, TMPD
705 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 743 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
706 | addi RD, RD, 8 744 | addi RD, RD, 8
707 | stw TMP0, TONUM_HI 745 | .FPU stw TMP0, TONUM_HI
708 | li_vmstate INTERP 746 | li_vmstate INTERP
709 | li ZERO, 0 747 | li ZERO, 0
710 | st_vmstate 748 | st_vmstate
711 | andix. TMP0, PC, FRAME_TYPE 749 | andix. TMP0, PC, FRAME_TYPE
712 | mr MULTRES, RD 750 | mr MULTRES, RD
713 | lfs TONUM, TMPD 751 | .FPU lfs TONUM, TMPD
714 | li TISNIL, LJ_TNIL 752 | li TISNIL, LJ_TNIL
715 | beq ->BC_RET_Z 753 | beq ->BC_RET_Z
716 | b ->vm_return 754 | b ->vm_return
@@ -729,33 +767,34 @@ static void build_subroutines(BuildCtx *ctx)
729 | 767 |
730 |1: // Entry point for vm_pcall above (PC = ftype). 768 |1: // Entry point for vm_pcall above (PC = ftype).
731 | lp TMP1, L:CARG1->cframe 769 | lp TMP1, L:CARG1->cframe
732 | stw CARG3, SAVE_NRES
733 | mr L, CARG1 770 | mr L, CARG1
734 | stw CARG1, SAVE_L 771 | stw CARG3, SAVE_NRES
735 | mr BASE, CARG2
736 | stp sp, L->cframe // Add our C frame to cframe chain.
737 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 772 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
773 | stw CARG1, SAVE_L
774 | mr BASE, CARG2
775 | addi DISPATCH, DISPATCH, GG_G2DISP
738 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 776 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
739 | stp TMP1, SAVE_CFRAME 777 | stp TMP1, SAVE_CFRAME
740 | addi DISPATCH, DISPATCH, GG_G2DISP 778 | stp sp, L->cframe // Add our C frame to cframe chain.
741 | 779 |
742 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 780 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
781 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
743 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). 782 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
744 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 783 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
745 | lp TMP1, L->top 784 | lp TMP1, L->top
746 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 785 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
747 | add PC, PC, BASE 786 | add PC, PC, BASE
748 | stw TMP3, TMPD 787 | .FPU stw TMP3, TMPD
749 | li ZERO, 0 788 | li ZERO, 0
750 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 789 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
751 | lfs TOBIT, TMPD 790 | .FPU lfs TOBIT, TMPD
752 | sub PC, PC, TMP2 // PC = frame delta + frame type 791 | sub PC, PC, TMP2 // PC = frame delta + frame type
753 | stw TMP3, TMPD 792 | .FPU stw TMP3, TMPD
754 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 793 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
755 | sub NARGS8:RC, TMP1, BASE 794 | sub NARGS8:RC, TMP1, BASE
756 | stw TMP0, TONUM_HI 795 | .FPU stw TMP0, TONUM_HI
757 | li_vmstate INTERP 796 | li_vmstate INTERP
758 | lfs TONUM, TMPD 797 | .FPU lfs TONUM, TMPD
759 | li TISNIL, LJ_TNIL 798 | li TISNIL, LJ_TNIL
760 | st_vmstate 799 | st_vmstate
761 | 800 |
@@ -776,15 +815,18 @@ static void build_subroutines(BuildCtx *ctx)
776 | lwz TMP0, L:CARG1->stack 815 | lwz TMP0, L:CARG1->stack
777 | stw CARG1, SAVE_L 816 | stw CARG1, SAVE_L
778 | lp TMP1, L->top 817 | lp TMP1, L->top
818 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
779 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 819 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
780 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). 820 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
781 | lp TMP1, L->cframe 821 | lp TMP1, L->cframe
782 | stp sp, L->cframe // Add our C frame to cframe chain. 822 | addi DISPATCH, DISPATCH, GG_G2DISP
783 | .toc lp CARG4, 0(CARG4) 823 | .toc lp CARG4, 0(CARG4)
784 | li TMP2, 0 824 | li TMP2, 0
785 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. 825 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
786 | stw TMP2, SAVE_ERRF // No error function. 826 | stw TMP2, SAVE_ERRF // No error function.
787 | stp TMP1, SAVE_CFRAME 827 | stp TMP1, SAVE_CFRAME
828 | stp sp, L->cframe // Add our C frame to cframe chain.
829 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
788 | mtctr CARG4 830 | mtctr CARG4
789 | bctrl // (lua_State *L, lua_CFunction func, void *ud) 831 | bctrl // (lua_State *L, lua_CFunction func, void *ud)
790 |.if PPE 832 |.if PPE
@@ -793,9 +835,7 @@ static void build_subroutines(BuildCtx *ctx)
793 |.else 835 |.else
794 | mr. BASE, CRET1 836 | mr. BASE, CRET1
795 |.endif 837 |.endif
796 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 838 | li PC, FRAME_CP
797 | li PC, FRAME_CP
798 | addi DISPATCH, DISPATCH, GG_G2DISP
799 | bne <3 // Else continue with the call. 839 | bne <3 // Else continue with the call.
800 | b ->vm_leave_cp // No base? Just remove C frame. 840 | b ->vm_leave_cp // No base? Just remove C frame.
801 | 841 |
@@ -842,15 +882,30 @@ static void build_subroutines(BuildCtx *ctx)
842 | lwz INS, -4(PC) 882 | lwz INS, -4(PC)
843 | subi CARG2, RB, 16 883 | subi CARG2, RB, 16
844 | decode_RB8 SAVE0, INS 884 | decode_RB8 SAVE0, INS
885 |.if FPU
845 | lfd f0, 0(RA) 886 | lfd f0, 0(RA)
887 |.else
888 | lwz TMP2, 0(RA)
889 | lwz TMP3, 4(RA)
890 |.endif
846 | add TMP1, BASE, SAVE0 891 | add TMP1, BASE, SAVE0
847 | stp BASE, L->base 892 | stp BASE, L->base
848 | cmplw TMP1, CARG2 893 | cmplw TMP1, CARG2
849 | sub CARG3, CARG2, TMP1 894 | sub CARG3, CARG2, TMP1
850 | decode_RA8 RA, INS 895 | decode_RA8 RA, INS
896 |.if FPU
851 | stfd f0, 0(CARG2) 897 | stfd f0, 0(CARG2)
898 |.else
899 | stw TMP2, 0(CARG2)
900 | stw TMP3, 4(CARG2)
901 |.endif
852 | bney ->BC_CAT_Z 902 | bney ->BC_CAT_Z
903 |.if FPU
853 | stfdx f0, BASE, RA 904 | stfdx f0, BASE, RA
905 |.else
906 | stwux TMP2, RA, BASE
907 | stw TMP3, 4(RA)
908 |.endif
854 | b ->cont_nop 909 | b ->cont_nop
855 | 910 |
856 |//-- Table indexing metamethods ----------------------------------------- 911 |//-- Table indexing metamethods -----------------------------------------
@@ -903,9 +958,19 @@ static void build_subroutines(BuildCtx *ctx)
903 | // Returns TValue * (finished) or NULL (metamethod). 958 | // Returns TValue * (finished) or NULL (metamethod).
904 | cmplwi CRET1, 0 959 | cmplwi CRET1, 0
905 | beq >3 960 | beq >3
961 |.if FPU
906 | lfd f0, 0(CRET1) 962 | lfd f0, 0(CRET1)
963 |.else
964 | lwz TMP0, 0(CRET1)
965 | lwz TMP1, 4(CRET1)
966 |.endif
907 | ins_next1 967 | ins_next1
968 |.if FPU
908 | stfdx f0, BASE, RA 969 | stfdx f0, BASE, RA
970 |.else
971 | stwux TMP0, RA, BASE
972 | stw TMP1, 4(RA)
973 |.endif
909 | ins_next2 974 | ins_next2
910 | 975 |
911 |3: // Call __index metamethod. 976 |3: // Call __index metamethod.
@@ -918,6 +983,22 @@ static void build_subroutines(BuildCtx *ctx)
918 | li NARGS8:RC, 16 // 2 args for func(t, k). 983 | li NARGS8:RC, 16 // 2 args for func(t, k).
919 | b ->vm_call_dispatch_f 984 | b ->vm_call_dispatch_f
920 | 985 |
986 |->vmeta_tgetr:
987 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
988 | // Returns cTValue * or NULL.
989 | cmplwi CRET1, 0
990 | beq >1
991 |.if FPU
992 | lfd f14, 0(CRET1)
993 |.else
994 | lwz SAVE0, 0(CRET1)
995 | lwz SAVE1, 4(CRET1)
996 |.endif
997 | b ->BC_TGETR_Z
998 |1:
999 | stwx TISNIL, BASE, RA
1000 | b ->cont_nop
1001 |
921 |//----------------------------------------------------------------------- 1002 |//-----------------------------------------------------------------------
922 | 1003 |
923 |->vmeta_tsets1: 1004 |->vmeta_tsets1:
@@ -967,11 +1048,21 @@ static void build_subroutines(BuildCtx *ctx)
967 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 1048 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
968 | // Returns TValue * (finished) or NULL (metamethod). 1049 | // Returns TValue * (finished) or NULL (metamethod).
969 | cmplwi CRET1, 0 1050 | cmplwi CRET1, 0
1051 |.if FPU
970 | lfdx f0, BASE, RA 1052 | lfdx f0, BASE, RA
1053 |.else
1054 | lwzux TMP2, RA, BASE
1055 | lwz TMP3, 4(RA)
1056 |.endif
971 | beq >3 1057 | beq >3
972 | // NOBARRIER: lj_meta_tset ensures the table is not black. 1058 | // NOBARRIER: lj_meta_tset ensures the table is not black.
973 | ins_next1 1059 | ins_next1
1060 |.if FPU
974 | stfd f0, 0(CRET1) 1061 | stfd f0, 0(CRET1)
1062 |.else
1063 | stw TMP2, 0(CRET1)
1064 | stw TMP3, 4(CRET1)
1065 |.endif
975 | ins_next2 1066 | ins_next2
976 | 1067 |
977 |3: // Call __newindex metamethod. 1068 |3: // Call __newindex metamethod.
@@ -982,9 +1073,27 @@ static void build_subroutines(BuildCtx *ctx)
982 | add PC, TMP1, BASE 1073 | add PC, TMP1, BASE
983 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 1074 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
984 | li NARGS8:RC, 24 // 3 args for func(t, k, v) 1075 | li NARGS8:RC, 24 // 3 args for func(t, k, v)
1076 |.if FPU
985 | stfd f0, 16(BASE) // Copy value to third argument. 1077 | stfd f0, 16(BASE) // Copy value to third argument.
1078 |.else
1079 | stw TMP2, 16(BASE)
1080 | stw TMP3, 20(BASE)
1081 |.endif
986 | b ->vm_call_dispatch_f 1082 | b ->vm_call_dispatch_f
987 | 1083 |
1084 |->vmeta_tsetr:
1085 | stp BASE, L->base
1086 | stw PC, SAVE_PC
1087 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1088 | // Returns TValue *.
1089 |.if FPU
1090 | stfd f14, 0(CRET1)
1091 |.else
1092 | stw SAVE0, 0(CRET1)
1093 | stw SAVE1, 4(CRET1)
1094 |.endif
1095 | b ->cont_nop
1096 |
988 |//-- Comparison metamethods --------------------------------------------- 1097 |//-- Comparison metamethods ---------------------------------------------
989 | 1098 |
990 |->vmeta_comp: 1099 |->vmeta_comp:
@@ -1021,9 +1130,19 @@ static void build_subroutines(BuildCtx *ctx)
1021 | 1130 |
1022 |->cont_ra: // RA = resultptr 1131 |->cont_ra: // RA = resultptr
1023 | lwz INS, -4(PC) 1132 | lwz INS, -4(PC)
1133 |.if FPU
1024 | lfd f0, 0(RA) 1134 | lfd f0, 0(RA)
1135 |.else
1136 | lwz CARG1, 0(RA)
1137 | lwz CARG2, 4(RA)
1138 |.endif
1025 | decode_RA8 TMP1, INS 1139 | decode_RA8 TMP1, INS
1140 |.if FPU
1026 | stfdx f0, BASE, TMP1 1141 | stfdx f0, BASE, TMP1
1142 |.else
1143 | stwux CARG1, TMP1, BASE
1144 | stw CARG2, 4(TMP1)
1145 |.endif
1027 | b ->cont_nop 1146 | b ->cont_nop
1028 | 1147 |
1029 |->cont_condt: // RA = resultptr 1148 |->cont_condt: // RA = resultptr
@@ -1063,6 +1182,16 @@ static void build_subroutines(BuildCtx *ctx)
1063 | b <3 1182 | b <3
1064 |.endif 1183 |.endif
1065 | 1184 |
1185 |->vmeta_istype:
1186 | subi PC, PC, 4
1187 | stp BASE, L->base
1188 | srwi CARG2, RA, 3
1189 | mr CARG1, L
1190 | srwi CARG3, RD, 3
1191 | stw PC, SAVE_PC
1192 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1193 | b ->cont_nop
1194 |
1066 |//-- Arithmetic metamethods --------------------------------------------- 1195 |//-- Arithmetic metamethods ---------------------------------------------
1067 | 1196 |
1068 |->vmeta_arith_nv: 1197 |->vmeta_arith_nv:
@@ -1219,22 +1348,32 @@ static void build_subroutines(BuildCtx *ctx)
1219 |.macro .ffunc_n, name 1348 |.macro .ffunc_n, name
1220 |->ff_ .. name: 1349 |->ff_ .. name:
1221 | cmplwi NARGS8:RC, 8 1350 | cmplwi NARGS8:RC, 8
1222 | lwz CARG3, 0(BASE) 1351 | lwz CARG1, 0(BASE)
1352 |.if FPU
1223 | lfd FARG1, 0(BASE) 1353 | lfd FARG1, 0(BASE)
1354 |.else
1355 | lwz CARG2, 4(BASE)
1356 |.endif
1224 | blt ->fff_fallback 1357 | blt ->fff_fallback
1225 | checknum CARG3; bge ->fff_fallback 1358 | checknum CARG1; bge ->fff_fallback
1226 |.endmacro 1359 |.endmacro
1227 | 1360 |
1228 |.macro .ffunc_nn, name 1361 |.macro .ffunc_nn, name
1229 |->ff_ .. name: 1362 |->ff_ .. name:
1230 | cmplwi NARGS8:RC, 16 1363 | cmplwi NARGS8:RC, 16
1231 | lwz CARG3, 0(BASE) 1364 | lwz CARG1, 0(BASE)
1365 |.if FPU
1232 | lfd FARG1, 0(BASE) 1366 | lfd FARG1, 0(BASE)
1233 | lwz CARG4, 8(BASE) 1367 | lwz CARG3, 8(BASE)
1234 | lfd FARG2, 8(BASE) 1368 | lfd FARG2, 8(BASE)
1369 |.else
1370 | lwz CARG2, 4(BASE)
1371 | lwz CARG3, 8(BASE)
1372 | lwz CARG4, 12(BASE)
1373 |.endif
1235 | blt ->fff_fallback 1374 | blt ->fff_fallback
1375 | checknum CARG1; bge ->fff_fallback
1236 | checknum CARG3; bge ->fff_fallback 1376 | checknum CARG3; bge ->fff_fallback
1237 | checknum CARG4; bge ->fff_fallback
1238 |.endmacro 1377 |.endmacro
1239 | 1378 |
1240 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. 1379 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
@@ -1255,14 +1394,21 @@ static void build_subroutines(BuildCtx *ctx)
1255 | bge cr1, ->fff_fallback 1394 | bge cr1, ->fff_fallback
1256 | stw CARG3, 0(RA) 1395 | stw CARG3, 0(RA)
1257 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. 1396 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
1397 | addi TMP1, BASE, 8
1398 | add TMP2, RA, NARGS8:RC
1258 | stw CARG1, 4(RA) 1399 | stw CARG1, 4(RA)
1259 | beq ->fff_res // Done if exactly 1 argument. 1400 | beq ->fff_res // Done if exactly 1 argument.
1260 | li TMP1, 8
1261 | subi RC, RC, 8
1262 |1: 1401 |1:
1263 | cmplw TMP1, RC 1402 | cmplw TMP1, TMP2
1264 | lfdx f0, BASE, TMP1 1403 |.if FPU
1265 | stfdx f0, RA, TMP1 1404 | lfd f0, 0(TMP1)
1405 | stfd f0, 0(TMP1)
1406 |.else
1407 | lwz CARG1, 0(TMP1)
1408 | lwz CARG2, 4(TMP1)
1409 | stw CARG1, -8(TMP1)
1410 | stw CARG2, -4(TMP1)
1411 |.endif
1266 | addi TMP1, TMP1, 8 1412 | addi TMP1, TMP1, 8
1267 | bney <1 1413 | bney <1
1268 | b ->fff_res 1414 | b ->fff_res
@@ -1277,8 +1423,14 @@ static void build_subroutines(BuildCtx *ctx)
1277 | orc TMP1, TMP2, TMP0 1423 | orc TMP1, TMP2, TMP0
1278 | addi TMP1, TMP1, ~LJ_TISNUM+1 1424 | addi TMP1, TMP1, ~LJ_TISNUM+1
1279 | slwi TMP1, TMP1, 3 1425 | slwi TMP1, TMP1, 3
1426 |.if FPU
1280 | la TMP2, CFUNC:RB->upvalue 1427 | la TMP2, CFUNC:RB->upvalue
1281 | lfdx FARG1, TMP2, TMP1 1428 | lfdx FARG1, TMP2, TMP1
1429 |.else
1430 | add TMP1, CFUNC:RB, TMP1
1431 | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi
1432 | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo
1433 |.endif
1282 | b ->fff_resn 1434 | b ->fff_resn
1283 | 1435 |
1284 |//-- Base library: getters and setters --------------------------------- 1436 |//-- Base library: getters and setters ---------------------------------
@@ -1356,7 +1508,12 @@ static void build_subroutines(BuildCtx *ctx)
1356 | mr CARG1, L 1508 | mr CARG1, L
1357 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1509 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1358 | // Returns cTValue *. 1510 | // Returns cTValue *.
1511 |.if FPU
1359 | lfd FARG1, 0(CRET1) 1512 | lfd FARG1, 0(CRET1)
1513 |.else
1514 | lwz CARG2, 4(CRET1)
1515 | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1.
1516 |.endif
1360 | b ->fff_resn 1517 | b ->fff_resn
1361 | 1518 |
1362 |//-- Base library: conversions ------------------------------------------ 1519 |//-- Base library: conversions ------------------------------------------
@@ -1365,7 +1522,11 @@ static void build_subroutines(BuildCtx *ctx)
1365 | // Only handles the number case inline (without a base argument). 1522 | // Only handles the number case inline (without a base argument).
1366 | cmplwi NARGS8:RC, 8 1523 | cmplwi NARGS8:RC, 8
1367 | lwz CARG1, 0(BASE) 1524 | lwz CARG1, 0(BASE)
1525 |.if FPU
1368 | lfd FARG1, 0(BASE) 1526 | lfd FARG1, 0(BASE)
1527 |.else
1528 | lwz CARG2, 4(BASE)
1529 |.endif
1369 | bne ->fff_fallback // Exactly one argument. 1530 | bne ->fff_fallback // Exactly one argument.
1370 | checknum CARG1; bgt ->fff_fallback 1531 | checknum CARG1; bgt ->fff_fallback
1371 | b ->fff_resn 1532 | b ->fff_resn
@@ -1387,9 +1548,9 @@ static void build_subroutines(BuildCtx *ctx)
1387 | mr CARG1, L 1548 | mr CARG1, L
1388 | mr CARG2, BASE 1549 | mr CARG2, BASE
1389 |.if DUALNUM 1550 |.if DUALNUM
1390 | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) 1551 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
1391 |.else 1552 |.else
1392 | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) 1553 | bl extern lj_strfmt_num // (lua_State *L, lua_Number *np)
1393 |.endif 1554 |.endif
1394 | // Returns GCstr *. 1555 | // Returns GCstr *.
1395 | li CARG3, LJ_TSTR 1556 | li CARG3, LJ_TSTR
@@ -1416,12 +1577,23 @@ static void build_subroutines(BuildCtx *ctx)
1416 | cmplwi CRET1, 0 1577 | cmplwi CRET1, 0
1417 | li CARG3, LJ_TNIL 1578 | li CARG3, LJ_TNIL
1418 | beq ->fff_restv // End of traversal: return nil. 1579 | beq ->fff_restv // End of traversal: return nil.
1419 | lfd f0, 8(BASE) // Copy key and value to results.
1420 | la RA, -8(BASE) 1580 | la RA, -8(BASE)
1581 |.if FPU
1582 | lfd f0, 8(BASE) // Copy key and value to results.
1421 | lfd f1, 16(BASE) 1583 | lfd f1, 16(BASE)
1422 | stfd f0, 0(RA) 1584 | stfd f0, 0(RA)
1423 | li RD, (2+1)*8
1424 | stfd f1, 8(RA) 1585 | stfd f1, 8(RA)
1586 |.else
1587 | lwz CARG1, 8(BASE)
1588 | lwz CARG2, 12(BASE)
1589 | lwz CARG3, 16(BASE)
1590 | lwz CARG4, 20(BASE)
1591 | stw CARG1, 0(RA)
1592 | stw CARG2, 4(RA)
1593 | stw CARG3, 8(RA)
1594 | stw CARG4, 12(RA)
1595 |.endif
1596 | li RD, (2+1)*8
1425 | b ->fff_res 1597 | b ->fff_res
1426 | 1598 |
1427 |.ffunc_1 pairs 1599 |.ffunc_1 pairs
@@ -1430,17 +1602,32 @@ static void build_subroutines(BuildCtx *ctx)
1430 | bne ->fff_fallback 1602 | bne ->fff_fallback
1431#if LJ_52 1603#if LJ_52
1432 | lwz TAB:TMP2, TAB:CARG1->metatable 1604 | lwz TAB:TMP2, TAB:CARG1->metatable
1605 |.if FPU
1433 | lfd f0, CFUNC:RB->upvalue[0] 1606 | lfd f0, CFUNC:RB->upvalue[0]
1607 |.else
1608 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1609 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1610 |.endif
1434 | cmplwi TAB:TMP2, 0 1611 | cmplwi TAB:TMP2, 0
1435 | la RA, -8(BASE) 1612 | la RA, -8(BASE)
1436 | bne ->fff_fallback 1613 | bne ->fff_fallback
1437#else 1614#else
1615 |.if FPU
1438 | lfd f0, CFUNC:RB->upvalue[0] 1616 | lfd f0, CFUNC:RB->upvalue[0]
1617 |.else
1618 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1619 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1620 |.endif
1439 | la RA, -8(BASE) 1621 | la RA, -8(BASE)
1440#endif 1622#endif
1441 | stw TISNIL, 8(BASE) 1623 | stw TISNIL, 8(BASE)
1442 | li RD, (3+1)*8 1624 | li RD, (3+1)*8
1625 |.if FPU
1443 | stfd f0, 0(RA) 1626 | stfd f0, 0(RA)
1627 |.else
1628 | stw TMP0, 0(RA)
1629 | stw TMP1, 4(RA)
1630 |.endif
1444 | b ->fff_res 1631 | b ->fff_res
1445 | 1632 |
1446 |.ffunc ipairs_aux 1633 |.ffunc ipairs_aux
@@ -1486,14 +1673,24 @@ static void build_subroutines(BuildCtx *ctx)
1486 | stfd FARG2, 0(RA) 1673 | stfd FARG2, 0(RA)
1487 |.endif 1674 |.endif
1488 | ble >2 // Not in array part? 1675 | ble >2 // Not in array part?
1676 |.if FPU
1489 | lwzx TMP2, TMP1, TMP3 1677 | lwzx TMP2, TMP1, TMP3
1490 | lfdx f0, TMP1, TMP3 1678 | lfdx f0, TMP1, TMP3
1679 |.else
1680 | lwzux TMP2, TMP1, TMP3
1681 | lwz TMP3, 4(TMP1)
1682 |.endif
1491 |1: 1683 |1:
1492 | checknil TMP2 1684 | checknil TMP2
1493 | li RD, (0+1)*8 1685 | li RD, (0+1)*8
1494 | beq ->fff_res // End of iteration, return 0 results. 1686 | beq ->fff_res // End of iteration, return 0 results.
1495 | li RD, (2+1)*8 1687 | li RD, (2+1)*8
1688 |.if FPU
1496 | stfd f0, 8(RA) 1689 | stfd f0, 8(RA)
1690 |.else
1691 | stw TMP2, 8(RA)
1692 | stw TMP3, 12(RA)
1693 |.endif
1497 | b ->fff_res 1694 | b ->fff_res
1498 |2: // Check for empty hash part first. Otherwise call C function. 1695 |2: // Check for empty hash part first. Otherwise call C function.
1499 | lwz TMP0, TAB:CARG1->hmask 1696 | lwz TMP0, TAB:CARG1->hmask
@@ -1507,7 +1704,11 @@ static void build_subroutines(BuildCtx *ctx)
1507 | li RD, (0+1)*8 1704 | li RD, (0+1)*8
1508 | beq ->fff_res 1705 | beq ->fff_res
1509 | lwz TMP2, 0(CRET1) 1706 | lwz TMP2, 0(CRET1)
1707 |.if FPU
1510 | lfd f0, 0(CRET1) 1708 | lfd f0, 0(CRET1)
1709 |.else
1710 | lwz TMP3, 4(CRET1)
1711 |.endif
1511 | b <1 1712 | b <1
1512 | 1713 |
1513 |.ffunc_1 ipairs 1714 |.ffunc_1 ipairs
@@ -1516,12 +1717,22 @@ static void build_subroutines(BuildCtx *ctx)
1516 | bne ->fff_fallback 1717 | bne ->fff_fallback
1517#if LJ_52 1718#if LJ_52
1518 | lwz TAB:TMP2, TAB:CARG1->metatable 1719 | lwz TAB:TMP2, TAB:CARG1->metatable
1720 |.if FPU
1519 | lfd f0, CFUNC:RB->upvalue[0] 1721 | lfd f0, CFUNC:RB->upvalue[0]
1722 |.else
1723 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1724 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1725 |.endif
1520 | cmplwi TAB:TMP2, 0 1726 | cmplwi TAB:TMP2, 0
1521 | la RA, -8(BASE) 1727 | la RA, -8(BASE)
1522 | bne ->fff_fallback 1728 | bne ->fff_fallback
1523#else 1729#else
1730 |.if FPU
1524 | lfd f0, CFUNC:RB->upvalue[0] 1731 | lfd f0, CFUNC:RB->upvalue[0]
1732 |.else
1733 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1734 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1735 |.endif
1525 | la RA, -8(BASE) 1736 | la RA, -8(BASE)
1526#endif 1737#endif
1527 |.if DUALNUM 1738 |.if DUALNUM
@@ -1531,7 +1742,12 @@ static void build_subroutines(BuildCtx *ctx)
1531 |.endif 1742 |.endif
1532 | stw ZERO, 12(BASE) 1743 | stw ZERO, 12(BASE)
1533 | li RD, (3+1)*8 1744 | li RD, (3+1)*8
1745 |.if FPU
1534 | stfd f0, 0(RA) 1746 | stfd f0, 0(RA)
1747 |.else
1748 | stw TMP0, 0(RA)
1749 | stw TMP1, 4(RA)
1750 |.endif
1535 | b ->fff_res 1751 | b ->fff_res
1536 | 1752 |
1537 |//-- Base library: catch errors ---------------------------------------- 1753 |//-- Base library: catch errors ----------------------------------------
@@ -1550,19 +1766,32 @@ static void build_subroutines(BuildCtx *ctx)
1550 | 1766 |
1551 |.ffunc xpcall 1767 |.ffunc xpcall
1552 | cmplwi NARGS8:RC, 16 1768 | cmplwi NARGS8:RC, 16
1553 | lwz CARG4, 8(BASE) 1769 | lwz CARG3, 8(BASE)
1770 |.if FPU
1554 | lfd FARG2, 8(BASE) 1771 | lfd FARG2, 8(BASE)
1555 | lfd FARG1, 0(BASE) 1772 | lfd FARG1, 0(BASE)
1773 |.else
1774 | lwz CARG1, 0(BASE)
1775 | lwz CARG2, 4(BASE)
1776 | lwz CARG4, 12(BASE)
1777 |.endif
1556 | blt ->fff_fallback 1778 | blt ->fff_fallback
1557 | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) 1779 | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1558 | mr TMP2, BASE 1780 | mr TMP2, BASE
1559 | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. 1781 | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function.
1560 | la BASE, 16(BASE) 1782 | la BASE, 16(BASE)
1561 | // Remember active hook before pcall. 1783 | // Remember active hook before pcall.
1562 | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 1784 | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31
1785 |.if FPU
1563 | stfd FARG2, 0(TMP2) // Swap function and traceback. 1786 | stfd FARG2, 0(TMP2) // Swap function and traceback.
1564 | subi NARGS8:RC, NARGS8:RC, 16
1565 | stfd FARG1, 8(TMP2) 1787 | stfd FARG1, 8(TMP2)
1788 |.else
1789 | stw CARG3, 0(TMP2)
1790 | stw CARG4, 4(TMP2)
1791 | stw CARG1, 8(TMP2)
1792 | stw CARG2, 12(TMP2)
1793 |.endif
1794 | subi NARGS8:RC, NARGS8:RC, 16
1566 | addi PC, TMP1, 16+FRAME_PCALL 1795 | addi PC, TMP1, 16+FRAME_PCALL
1567 | b ->vm_call_dispatch 1796 | b ->vm_call_dispatch
1568 | 1797 |
@@ -1605,9 +1834,21 @@ static void build_subroutines(BuildCtx *ctx)
1605 | stp BASE, L->top 1834 | stp BASE, L->top
1606 |2: // Move args to coroutine. 1835 |2: // Move args to coroutine.
1607 | cmpw TMP1, NARGS8:RC 1836 | cmpw TMP1, NARGS8:RC
1837 |.if FPU
1608 | lfdx f0, BASE, TMP1 1838 | lfdx f0, BASE, TMP1
1839 |.else
1840 | add CARG3, BASE, TMP1
1841 | lwz TMP2, 0(CARG3)
1842 | lwz TMP3, 4(CARG3)
1843 |.endif
1609 | beq >3 1844 | beq >3
1845 |.if FPU
1610 | stfdx f0, CARG2, TMP1 1846 | stfdx f0, CARG2, TMP1
1847 |.else
1848 | add CARG3, CARG2, TMP1
1849 | stw TMP2, 0(CARG3)
1850 | stw TMP3, 4(CARG3)
1851 |.endif
1611 | addi TMP1, TMP1, 8 1852 | addi TMP1, TMP1, 8
1612 | b <2 1853 | b <2
1613 |3: 1854 |3:
@@ -1622,6 +1863,7 @@ static void build_subroutines(BuildCtx *ctx)
1622 | lp TMP3, L:SAVE0->top 1863 | lp TMP3, L:SAVE0->top
1623 | li_vmstate INTERP 1864 | li_vmstate INTERP
1624 | lp BASE, L->base 1865 | lp BASE, L->base
1866 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
1625 | st_vmstate 1867 | st_vmstate
1626 | bgt >8 1868 | bgt >8
1627 | sub RD, TMP3, TMP2 1869 | sub RD, TMP3, TMP2
@@ -1637,8 +1879,17 @@ static void build_subroutines(BuildCtx *ctx)
1637 | stp TMP2, L:SAVE0->top // Clear coroutine stack. 1879 | stp TMP2, L:SAVE0->top // Clear coroutine stack.
1638 |5: // Move results from coroutine. 1880 |5: // Move results from coroutine.
1639 | cmplw TMP1, TMP3 1881 | cmplw TMP1, TMP3
1882 |.if FPU
1640 | lfdx f0, TMP2, TMP1 1883 | lfdx f0, TMP2, TMP1
1641 | stfdx f0, BASE, TMP1 1884 | stfdx f0, BASE, TMP1
1885 |.else
1886 | add CARG3, TMP2, TMP1
1887 | lwz CARG1, 0(CARG3)
1888 | lwz CARG2, 4(CARG3)
1889 | add CARG3, BASE, TMP1
1890 | stw CARG1, 0(CARG3)
1891 | stw CARG2, 4(CARG3)
1892 |.endif
1642 | addi TMP1, TMP1, 8 1893 | addi TMP1, TMP1, 8
1643 | bne <5 1894 | bne <5
1644 |6: 1895 |6:
@@ -1663,12 +1914,22 @@ static void build_subroutines(BuildCtx *ctx)
1663 | andix. TMP0, PC, FRAME_TYPE 1914 | andix. TMP0, PC, FRAME_TYPE
1664 | la TMP3, -8(TMP3) 1915 | la TMP3, -8(TMP3)
1665 | li TMP1, LJ_TFALSE 1916 | li TMP1, LJ_TFALSE
1917 |.if FPU
1666 | lfd f0, 0(TMP3) 1918 | lfd f0, 0(TMP3)
1919 |.else
1920 | lwz CARG1, 0(TMP3)
1921 | lwz CARG2, 4(TMP3)
1922 |.endif
1667 | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. 1923 | stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
1668 | li RD, (2+1)*8 1924 | li RD, (2+1)*8
1669 | stw TMP1, -8(BASE) // Prepend false to results. 1925 | stw TMP1, -8(BASE) // Prepend false to results.
1670 | la RA, -8(BASE) 1926 | la RA, -8(BASE)
1927 |.if FPU
1671 | stfd f0, 0(BASE) // Copy error message. 1928 | stfd f0, 0(BASE) // Copy error message.
1929 |.else
1930 | stw CARG1, 0(BASE) // Copy error message.
1931 | stw CARG2, 4(BASE)
1932 |.endif
1672 | b <7 1933 | b <7
1673 |.else 1934 |.else
1674 | mr CARG1, L 1935 | mr CARG1, L
@@ -1847,7 +2108,12 @@ static void build_subroutines(BuildCtx *ctx)
1847 | lus CARG1, 0x8000 // -(2^31). 2108 | lus CARG1, 0x8000 // -(2^31).
1848 | beqy ->fff_resi 2109 | beqy ->fff_resi
1849 |5: 2110 |5:
2111 |.if FPU
1850 | lfd FARG1, 0(BASE) 2112 | lfd FARG1, 0(BASE)
2113 |.else
2114 | lwz CARG1, 0(BASE)
2115 | lwz CARG2, 4(BASE)
2116 |.endif
1851 | blex func 2117 | blex func
1852 | b ->fff_resn 2118 | b ->fff_resn
1853 |.endmacro 2119 |.endmacro
@@ -1871,10 +2137,14 @@ static void build_subroutines(BuildCtx *ctx)
1871 | 2137 |
1872 |.ffunc math_log 2138 |.ffunc math_log
1873 | cmplwi NARGS8:RC, 8 2139 | cmplwi NARGS8:RC, 8
1874 | lwz CARG3, 0(BASE) 2140 | lwz CARG1, 0(BASE)
1875 | lfd FARG1, 0(BASE)
1876 | bne ->fff_fallback // Need exactly 1 argument. 2141 | bne ->fff_fallback // Need exactly 1 argument.
1877 | checknum CARG3; bge ->fff_fallback 2142 | checknum CARG1; bge ->fff_fallback
2143 |.if FPU
2144 | lfd FARG1, 0(BASE)
2145 |.else
2146 | lwz CARG2, 4(BASE)
2147 |.endif
1878 | blex log 2148 | blex log
1879 | b ->fff_resn 2149 | b ->fff_resn
1880 | 2150 |
@@ -1893,26 +2163,27 @@ static void build_subroutines(BuildCtx *ctx)
1893 | math_extern2 atan2 2163 | math_extern2 atan2
1894 | math_extern2 fmod 2164 | math_extern2 fmod
1895 | 2165 |
1896 |->ff_math_deg:
1897 |.ffunc_n math_rad
1898 | lfd FARG2, CFUNC:RB->upvalue[0]
1899 | fmul FARG1, FARG1, FARG2
1900 | b ->fff_resn
1901 |
1902 |.if DUALNUM 2166 |.if DUALNUM
1903 |.ffunc math_ldexp 2167 |.ffunc math_ldexp
1904 | cmplwi NARGS8:RC, 16 2168 | cmplwi NARGS8:RC, 16
1905 | lwz CARG3, 0(BASE) 2169 | lwz TMP0, 0(BASE)
2170 |.if FPU
1906 | lfd FARG1, 0(BASE) 2171 | lfd FARG1, 0(BASE)
1907 | lwz CARG4, 8(BASE) 2172 |.else
2173 | lwz CARG1, 0(BASE)
2174 | lwz CARG2, 4(BASE)
2175 |.endif
2176 | lwz TMP1, 8(BASE)
1908 |.if GPR64 2177 |.if GPR64
1909 | lwz CARG2, 12(BASE) 2178 | lwz CARG2, 12(BASE)
1910 |.else 2179 |.elif FPU
1911 | lwz CARG1, 12(BASE) 2180 | lwz CARG1, 12(BASE)
2181 |.else
2182 | lwz CARG3, 12(BASE)
1912 |.endif 2183 |.endif
1913 | blt ->fff_fallback 2184 | blt ->fff_fallback
1914 | checknum CARG3; bge ->fff_fallback 2185 | checknum TMP0; bge ->fff_fallback
1915 | checknum CARG4; bne ->fff_fallback 2186 | checknum TMP1; bne ->fff_fallback
1916 |.else 2187 |.else
1917 |.ffunc_nn math_ldexp 2188 |.ffunc_nn math_ldexp
1918 |.if GPR64 2189 |.if GPR64
@@ -1927,8 +2198,10 @@ static void build_subroutines(BuildCtx *ctx)
1927 |.ffunc_n math_frexp 2198 |.ffunc_n math_frexp
1928 |.if GPR64 2199 |.if GPR64
1929 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 2200 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
1930 |.else 2201 |.elif FPU
1931 | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) 2202 | la CARG1, DISPATCH_GL(tmptv)(DISPATCH)
2203 |.else
2204 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
1932 |.endif 2205 |.endif
1933 | lwz PC, FRAME_PC(BASE) 2206 | lwz PC, FRAME_PC(BASE)
1934 | blex frexp 2207 | blex frexp
@@ -1937,7 +2210,12 @@ static void build_subroutines(BuildCtx *ctx)
1937 |.if not DUALNUM 2210 |.if not DUALNUM
1938 | tonum_i FARG2, TMP1 2211 | tonum_i FARG2, TMP1
1939 |.endif 2212 |.endif
2213 |.if FPU
1940 | stfd FARG1, 0(RA) 2214 | stfd FARG1, 0(RA)
2215 |.else
2216 | stw CRET1, 0(RA)
2217 | stw CRET2, 4(RA)
2218 |.endif
1941 | li RD, (2+1)*8 2219 | li RD, (2+1)*8
1942 |.if DUALNUM 2220 |.if DUALNUM
1943 | stw TISNUM, 8(RA) 2221 | stw TISNUM, 8(RA)
@@ -1950,13 +2228,20 @@ static void build_subroutines(BuildCtx *ctx)
1950 |.ffunc_n math_modf 2228 |.ffunc_n math_modf
1951 |.if GPR64 2229 |.if GPR64
1952 | la CARG2, -8(BASE) 2230 | la CARG2, -8(BASE)
1953 |.else 2231 |.elif FPU
1954 | la CARG1, -8(BASE) 2232 | la CARG1, -8(BASE)
2233 |.else
2234 | la CARG3, -8(BASE)
1955 |.endif 2235 |.endif
1956 | lwz PC, FRAME_PC(BASE) 2236 | lwz PC, FRAME_PC(BASE)
1957 | blex modf 2237 | blex modf
1958 | la RA, -8(BASE) 2238 | la RA, -8(BASE)
2239 |.if FPU
1959 | stfd FARG1, 0(BASE) 2240 | stfd FARG1, 0(BASE)
2241 |.else
2242 | stw CRET1, 0(BASE)
2243 | stw CRET2, 4(BASE)
2244 |.endif
1960 | li RD, (2+1)*8 2245 | li RD, (2+1)*8
1961 | b ->fff_res 2246 | b ->fff_res
1962 | 2247 |
@@ -1964,13 +2249,13 @@ static void build_subroutines(BuildCtx *ctx)
1964 |.if DUALNUM 2249 |.if DUALNUM
1965 | .ffunc_1 name 2250 | .ffunc_1 name
1966 | checknum CARG3 2251 | checknum CARG3
1967 | addi TMP1, BASE, 8 2252 | addi SAVE0, BASE, 8
1968 | add TMP2, BASE, NARGS8:RC 2253 | add SAVE1, BASE, NARGS8:RC
1969 | bne >4 2254 | bne >4
1970 |1: // Handle integers. 2255 |1: // Handle integers.
1971 | lwz CARG4, 0(TMP1) 2256 | lwz CARG4, 0(SAVE0)
1972 | cmplw cr1, TMP1, TMP2 2257 | cmplw cr1, SAVE0, SAVE1
1973 | lwz CARG2, 4(TMP1) 2258 | lwz CARG2, 4(SAVE0)
1974 | bge cr1, ->fff_resi 2259 | bge cr1, ->fff_resi
1975 | checknum CARG4 2260 | checknum CARG4
1976 | xoris TMP0, CARG1, 0x8000 2261 | xoris TMP0, CARG1, 0x8000
@@ -1987,36 +2272,76 @@ static void build_subroutines(BuildCtx *ctx)
1987 |.if GPR64 2272 |.if GPR64
1988 | rldicl CARG1, CARG1, 0, 32 2273 | rldicl CARG1, CARG1, 0, 32
1989 |.endif 2274 |.endif
1990 | addi TMP1, TMP1, 8 2275 | addi SAVE0, SAVE0, 8
1991 | b <1 2276 | b <1
1992 |3: 2277 |3:
1993 | bge ->fff_fallback 2278 | bge ->fff_fallback
1994 | // Convert intermediate result to number and continue below. 2279 | // Convert intermediate result to number and continue below.
2280 |.if FPU
1995 | tonum_i FARG1, CARG1 2281 | tonum_i FARG1, CARG1
1996 | lfd FARG2, 0(TMP1) 2282 | lfd FARG2, 0(SAVE0)
2283 |.else
2284 | mr CARG2, CARG1
2285 | bl ->vm_sfi2d_1
2286 | lwz CARG3, 0(SAVE0)
2287 | lwz CARG4, 4(SAVE0)
2288 |.endif
1997 | b >6 2289 | b >6
1998 |4: 2290 |4:
2291 |.if FPU
1999 | lfd FARG1, 0(BASE) 2292 | lfd FARG1, 0(BASE)
2293 |.else
2294 | lwz CARG1, 0(BASE)
2295 | lwz CARG2, 4(BASE)
2296 |.endif
2000 | bge ->fff_fallback 2297 | bge ->fff_fallback
2001 |5: // Handle numbers. 2298 |5: // Handle numbers.
2002 | lwz CARG4, 0(TMP1) 2299 | lwz CARG3, 0(SAVE0)
2003 | cmplw cr1, TMP1, TMP2 2300 | cmplw cr1, SAVE0, SAVE1
2004 | lfd FARG2, 0(TMP1) 2301 |.if FPU
2302 | lfd FARG2, 0(SAVE0)
2303 |.else
2304 | lwz CARG4, 4(SAVE0)
2305 |.endif
2005 | bge cr1, ->fff_resn 2306 | bge cr1, ->fff_resn
2006 | checknum CARG4; bge >7 2307 | checknum CARG3; bge >7
2007 |6: 2308 |6:
2309 | addi SAVE0, SAVE0, 8
2310 |.if FPU
2008 | fsub f0, FARG1, FARG2 2311 | fsub f0, FARG1, FARG2
2009 | addi TMP1, TMP1, 8
2010 |.if ismax 2312 |.if ismax
2011 | fsel FARG1, f0, FARG1, FARG2 2313 | fsel FARG1, f0, FARG1, FARG2
2012 |.else 2314 |.else
2013 | fsel FARG1, f0, FARG2, FARG1 2315 | fsel FARG1, f0, FARG2, FARG1
2014 |.endif 2316 |.endif
2317 |.else
2318 | stw CARG1, SFSAVE_1
2319 | stw CARG2, SFSAVE_2
2320 | stw CARG3, SFSAVE_3
2321 | stw CARG4, SFSAVE_4
2322 | blex __ledf2
2323 | cmpwi CRET1, 0
2324 |.if ismax
2325 | blt >8
2326 |.else
2327 | bge >8
2328 |.endif
2329 | lwz CARG1, SFSAVE_1
2330 | lwz CARG2, SFSAVE_2
2331 | b <5
2332 |8:
2333 | lwz CARG1, SFSAVE_3
2334 | lwz CARG2, SFSAVE_4
2335 |.endif
2015 | b <5 2336 | b <5
2016 |7: // Convert integer to number and continue above. 2337 |7: // Convert integer to number and continue above.
2017 | lwz CARG2, 4(TMP1) 2338 | lwz CARG3, 4(SAVE0)
2018 | bne ->fff_fallback 2339 | bne ->fff_fallback
2019 | tonum_i FARG2, CARG2 2340 |.if FPU
2341 | tonum_i FARG2, CARG3
2342 |.else
2343 | bl ->vm_sfi2d_2
2344 |.endif
2020 | b <6 2345 | b <6
2021 |.else 2346 |.else
2022 | .ffunc_n name 2347 | .ffunc_n name
@@ -2044,11 +2369,6 @@ static void build_subroutines(BuildCtx *ctx)
2044 | 2369 |
2045 |//-- String library ----------------------------------------------------- 2370 |//-- String library -----------------------------------------------------
2046 | 2371 |
2047 |.ffunc_1 string_len
2048 | checkstr CARG3; bne ->fff_fallback
2049 | lwz CRET1, STR:CARG1->len
2050 | b ->fff_resi
2051 |
2052 |.ffunc string_byte // Only handle the 1-arg case here. 2372 |.ffunc string_byte // Only handle the 1-arg case here.
2053 | cmplwi NARGS8:RC, 8 2373 | cmplwi NARGS8:RC, 8
2054 | lwz CARG3, 0(BASE) 2374 | lwz CARG3, 0(BASE)
@@ -2103,6 +2423,7 @@ static void build_subroutines(BuildCtx *ctx)
2103 | stp BASE, L->base 2423 | stp BASE, L->base
2104 | stw PC, SAVE_PC 2424 | stw PC, SAVE_PC
2105 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 2425 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
2426 |->fff_resstr:
2106 | // Returns GCstr *. 2427 | // Returns GCstr *.
2107 | lp BASE, L->base 2428 | lp BASE, L->base
2108 | li CARG3, LJ_TSTR 2429 | li CARG3, LJ_TSTR
@@ -2180,114 +2501,29 @@ static void build_subroutines(BuildCtx *ctx)
2180 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) 2501 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0)
2181 | b <3 2502 | b <3
2182 | 2503 |
2183 |.ffunc string_rep // Only handle the 1-char case inline. 2504 |.macro ffstring_op, name
2184 | ffgccheck 2505 | .ffunc string_ .. name
2185 | cmplwi NARGS8:RC, 16
2186 | lwz TMP0, 0(BASE)
2187 | lwz STR:CARG1, 4(BASE)
2188 | lwz CARG4, 8(BASE)
2189 |.if DUALNUM
2190 | lwz CARG3, 12(BASE)
2191 |.else
2192 | lfd FARG2, 8(BASE)
2193 |.endif
2194 | bne ->fff_fallback // Exactly 2 arguments.
2195 | checkstr TMP0; bne ->fff_fallback
2196 |.if DUALNUM
2197 | checknum CARG4; bne ->fff_fallback
2198 |.else
2199 | checknum CARG4; bge ->fff_fallback
2200 | toint CARG3, FARG2
2201 |.endif
2202 | lwz TMP0, STR:CARG1->len
2203 | cmpwi CARG3, 0
2204 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
2205 | ble >2 // Count <= 0? (or non-int)
2206 | cmplwi TMP0, 1
2207 | subi TMP2, CARG3, 1
2208 | blt >2 // Zero length string?
2209 | cmplw cr1, TMP1, CARG3
2210 | bne ->fff_fallback // Fallback for > 1-char strings.
2211 | lbz TMP0, STR:CARG1[1]
2212 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2213 | blt cr1, ->fff_fallback
2214 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2215 | cmplwi TMP2, 0
2216 | stbx TMP0, CARG2, TMP2
2217 | subi TMP2, TMP2, 1
2218 | bne <1
2219 | b ->fff_newstr
2220 |2: // Return empty string.
2221 | la STR:CARG1, DISPATCH_GL(strempty)(DISPATCH)
2222 | li CARG3, LJ_TSTR
2223 | b ->fff_restv
2224 |
2225 |.ffunc string_reverse
2226 | ffgccheck
2227 | cmplwi NARGS8:RC, 8
2228 | lwz CARG3, 0(BASE)
2229 | lwz STR:CARG1, 4(BASE)
2230 | blt ->fff_fallback
2231 | checkstr CARG3
2232 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
2233 | bne ->fff_fallback
2234 | lwz CARG3, STR:CARG1->len
2235 | la CARG1, #STR(STR:CARG1)
2236 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2237 | li TMP2, 0
2238 | cmplw TMP1, CARG3
2239 | subi TMP3, CARG3, 1
2240 | blt ->fff_fallback
2241 |1: // Reverse string copy.
2242 | cmpwi TMP3, 0
2243 | lbzx TMP1, CARG1, TMP2
2244 | blty ->fff_newstr
2245 | stbx TMP1, CARG2, TMP3
2246 | subi TMP3, TMP3, 1
2247 | addi TMP2, TMP2, 1
2248 | b <1
2249 |
2250 |.macro ffstring_case, name, lo
2251 | .ffunc name
2252 | ffgccheck 2506 | ffgccheck
2253 | cmplwi NARGS8:RC, 8 2507 | cmplwi NARGS8:RC, 8
2254 | lwz CARG3, 0(BASE) 2508 | lwz CARG3, 0(BASE)
2255 | lwz STR:CARG1, 4(BASE) 2509 | lwz STR:CARG2, 4(BASE)
2256 | blt ->fff_fallback 2510 | blt ->fff_fallback
2257 | checkstr CARG3 2511 | checkstr CARG3
2258 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 2512 | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH)
2259 | bne ->fff_fallback 2513 | bne ->fff_fallback
2260 | lwz CARG3, STR:CARG1->len 2514 | lwz TMP0, SBUF:CARG1->b
2261 | la CARG1, #STR(STR:CARG1) 2515 | stw L, SBUF:CARG1->L
2262 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 2516 | stp BASE, L->base
2263 | cmplw TMP1, CARG3 2517 | stw PC, SAVE_PC
2264 | li TMP2, 0 2518 | stw TMP0, SBUF:CARG1->p
2265 | blt ->fff_fallback 2519 | bl extern lj_buf_putstr_ .. name
2266 |1: // ASCII case conversion. 2520 | bl extern lj_buf_tostr
2267 | cmplw TMP2, CARG3 2521 | b ->fff_resstr
2268 | lbzx TMP1, CARG1, TMP2
2269 | bgey ->fff_newstr
2270 | subi TMP0, TMP1, lo
2271 | xori TMP3, TMP1, 0x20
2272 | addic TMP0, TMP0, -26
2273 | subfe TMP3, TMP3, TMP3
2274 | rlwinm TMP3, TMP3, 0, 26, 26 // x &= 0x20.
2275 | xor TMP1, TMP1, TMP3
2276 | stbx TMP1, CARG2, TMP2
2277 | addi TMP2, TMP2, 1
2278 | b <1
2279 |.endmacro 2522 |.endmacro
2280 | 2523 |
2281 |ffstring_case string_lower, 65 2524 |ffstring_op reverse
2282 |ffstring_case string_upper, 97 2525 |ffstring_op lower
2283 | 2526 |ffstring_op upper
2284 |//-- Table library ------------------------------------------------------
2285 |
2286 |.ffunc_1 table_getn
2287 | checktab CARG3; bne ->fff_fallback
2288 | bl extern lj_tab_len // (GCtab *t)
2289 | // Returns uint32_t (but less than 2^31).
2290 | b ->fff_resi
2291 | 2527 |
2292 |//-- Bit library -------------------------------------------------------- 2528 |//-- Bit library --------------------------------------------------------
2293 | 2529 |
@@ -2305,28 +2541,37 @@ static void build_subroutines(BuildCtx *ctx)
2305 | 2541 |
2306 |.macro .ffunc_bit_op, name, ins 2542 |.macro .ffunc_bit_op, name, ins
2307 | .ffunc_bit name 2543 | .ffunc_bit name
2308 | addi TMP1, BASE, 8 2544 | addi SAVE0, BASE, 8
2309 | add TMP2, BASE, NARGS8:RC 2545 | add SAVE1, BASE, NARGS8:RC
2310 |1: 2546 |1:
2311 | lwz CARG4, 0(TMP1) 2547 | lwz CARG4, 0(SAVE0)
2312 | cmplw cr1, TMP1, TMP2 2548 | cmplw cr1, SAVE0, SAVE1
2313 |.if DUALNUM 2549 |.if DUALNUM
2314 | lwz CARG2, 4(TMP1) 2550 | lwz CARG2, 4(SAVE0)
2315 |.else 2551 |.else
2316 | lfd FARG1, 0(TMP1) 2552 | lfd FARG1, 0(SAVE0)
2317 |.endif 2553 |.endif
2318 | bgey cr1, ->fff_resi 2554 | bgey cr1, ->fff_resi
2319 | checknum CARG4 2555 | checknum CARG4
2320 |.if DUALNUM 2556 |.if DUALNUM
2557 |.if FPU
2321 | bnel ->fff_bitop_fb 2558 | bnel ->fff_bitop_fb
2322 |.else 2559 |.else
2560 | beq >3
2561 | stw CARG1, SFSAVE_1
2562 | bl ->fff_bitop_fb
2563 | mr CARG2, CARG1
2564 | lwz CARG1, SFSAVE_1
2565 |3:
2566 |.endif
2567 |.else
2323 | fadd FARG1, FARG1, TOBIT 2568 | fadd FARG1, FARG1, TOBIT
2324 | bge ->fff_fallback 2569 | bge ->fff_fallback
2325 | stfd FARG1, TMPD 2570 | stfd FARG1, TMPD
2326 | lwz CARG2, TMPD_LO 2571 | lwz CARG2, TMPD_LO
2327 |.endif 2572 |.endif
2328 | ins CARG1, CARG1, CARG2 2573 | ins CARG1, CARG1, CARG2
2329 | addi TMP1, TMP1, 8 2574 | addi SAVE0, SAVE0, 8
2330 | b <1 2575 | b <1
2331 |.endmacro 2576 |.endmacro
2332 | 2577 |
@@ -2348,7 +2593,14 @@ static void build_subroutines(BuildCtx *ctx)
2348 |.macro .ffunc_bit_sh, name, ins, shmod 2593 |.macro .ffunc_bit_sh, name, ins, shmod
2349 |.if DUALNUM 2594 |.if DUALNUM
2350 | .ffunc_2 bit_..name 2595 | .ffunc_2 bit_..name
2596 |.if FPU
2351 | checknum CARG3; bnel ->fff_tobit_fb 2597 | checknum CARG3; bnel ->fff_tobit_fb
2598 |.else
2599 | checknum CARG3; beq >1
2600 | bl ->fff_tobit_fb
2601 | lwz CARG2, 12(BASE) // Conversion polluted CARG2.
2602 |1:
2603 |.endif
2352 | // Note: no inline conversion from number for 2nd argument! 2604 | // Note: no inline conversion from number for 2nd argument!
2353 | checknum CARG4; bne ->fff_fallback 2605 | checknum CARG4; bne ->fff_fallback
2354 |.else 2606 |.else
@@ -2385,27 +2637,77 @@ static void build_subroutines(BuildCtx *ctx)
2385 |->fff_resn: 2637 |->fff_resn:
2386 | lwz PC, FRAME_PC(BASE) 2638 | lwz PC, FRAME_PC(BASE)
2387 | la RA, -8(BASE) 2639 | la RA, -8(BASE)
2640 |.if FPU
2388 | stfd FARG1, -8(BASE) 2641 | stfd FARG1, -8(BASE)
2642 |.else
2643 | stw CARG1, -8(BASE)
2644 | stw CARG2, -4(BASE)
2645 |.endif
2389 | b ->fff_res1 2646 | b ->fff_res1
2390 | 2647 |
2391 |// Fallback FP number to bit conversion. 2648 |// Fallback FP number to bit conversion.
2392 |->fff_tobit_fb: 2649 |->fff_tobit_fb:
2393 |.if DUALNUM 2650 |.if DUALNUM
2651 |.if FPU
2394 | lfd FARG1, 0(BASE) 2652 | lfd FARG1, 0(BASE)
2395 | bgt ->fff_fallback 2653 | bgt ->fff_fallback
2396 | fadd FARG1, FARG1, TOBIT 2654 | fadd FARG1, FARG1, TOBIT
2397 | stfd FARG1, TMPD 2655 | stfd FARG1, TMPD
2398 | lwz CARG1, TMPD_LO 2656 | lwz CARG1, TMPD_LO
2399 | blr 2657 | blr
2658 |.else
2659 | bgt ->fff_fallback
2660 | mr CARG2, CARG1
2661 | mr CARG1, CARG3
2662 |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2.
2663 |->vm_tobit:
2664 | slwi TMP2, CARG1, 1
2665 | addis TMP2, TMP2, 0x0020
2666 | cmpwi TMP2, 0
2667 | bge >2
2668 | li TMP1, 0x3e0
2669 | srawi TMP2, TMP2, 21
2670 | not TMP1, TMP1
2671 | sub. TMP2, TMP1, TMP2
2672 | cmpwi cr7, CARG1, 0
2673 | blt >1
2674 | slwi TMP1, CARG1, 11
2675 | srwi TMP0, CARG2, 21
2676 | oris TMP1, TMP1, 0x8000
2677 | or TMP1, TMP1, TMP0
2678 | srw CARG1, TMP1, TMP2
2679 | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
2680 | neg CARG1, CARG1
2681 | blr
2682 |1:
2683 | addi TMP2, TMP2, 21
2684 | srw TMP1, CARG2, TMP2
2685 | slwi CARG2, CARG1, 12
2686 | subfic TMP2, TMP2, 20
2687 | slw TMP0, CARG2, TMP2
2688 | or CARG1, TMP1, TMP0
2689 | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
2690 | neg CARG1, CARG1
2691 | blr
2692 |2:
2693 | li CARG1, 0
2694 | blr
2695 |.endif
2400 |.endif 2696 |.endif
2401 |->fff_bitop_fb: 2697 |->fff_bitop_fb:
2402 |.if DUALNUM 2698 |.if DUALNUM
2403 | lfd FARG1, 0(TMP1) 2699 |.if FPU
2700 | lfd FARG1, 0(SAVE0)
2404 | bgt ->fff_fallback 2701 | bgt ->fff_fallback
2405 | fadd FARG1, FARG1, TOBIT 2702 | fadd FARG1, FARG1, TOBIT
2406 | stfd FARG1, TMPD 2703 | stfd FARG1, TMPD
2407 | lwz CARG2, TMPD_LO 2704 | lwz CARG2, TMPD_LO
2408 | blr 2705 | blr
2706 |.else
2707 | bgt ->fff_fallback
2708 | mr CARG1, CARG4
2709 | b ->vm_tobit
2710 |.endif
2409 |.endif 2711 |.endif
2410 | 2712 |
2411 |//----------------------------------------------------------------------- 2713 |//-----------------------------------------------------------------------
@@ -2589,15 +2891,88 @@ static void build_subroutines(BuildCtx *ctx)
2589 | mtctr CRET1 2891 | mtctr CRET1
2590 | bctr 2892 | bctr
2591 | 2893 |
2894 |->cont_stitch: // Trace stitching.
2895 |.if JIT
2896 | // RA = resultptr, RB = meta base
2897 | lwz INS, -4(PC)
2898 | lwz TRACE:TMP2, -20(RB) // Save previous trace.
2899 | addic. TMP1, MULTRES, -8
2900 | decode_RA8 RC, INS // Call base.
2901 | beq >2
2902 |1: // Move results down.
2903 |.if FPU
2904 | lfd f0, 0(RA)
2905 |.else
2906 | lwz CARG1, 0(RA)
2907 | lwz CARG2, 4(RA)
2908 |.endif
2909 | addic. TMP1, TMP1, -8
2910 | addi RA, RA, 8
2911 |.if FPU
2912 | stfdx f0, BASE, RC
2913 |.else
2914 | add CARG3, BASE, RC
2915 | stw CARG1, 0(CARG3)
2916 | stw CARG2, 4(CARG3)
2917 |.endif
2918 | addi RC, RC, 8
2919 | bne <1
2920 |2:
2921 | decode_RA8 RA, INS
2922 | decode_RB8 RB, INS
2923 | add RA, RA, RB
2924 |3:
2925 | cmplw RA, RC
2926 | bgt >9 // More results wanted?
2927 |
2928 | lhz TMP3, TRACE:TMP2->traceno
2929 | lhz RD, TRACE:TMP2->link
2930 | cmpw RD, TMP3
2931 | cmpwi cr1, RD, 0
2932 | beq ->cont_nop // Blacklisted.
2933 | slwi RD, RD, 3
2934 | bne cr1, =>BC_JLOOP // Jump to stitched trace.
2935 |
2936 | // Stitch a new trace to the previous trace.
2937 | stw TMP3, DISPATCH_J(exitno)(DISPATCH)
2938 | stp L, DISPATCH_J(L)(DISPATCH)
2939 | stp BASE, L->base
2940 | addi CARG1, DISPATCH, GG_DISP2J
2941 | mr CARG2, PC
2942 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2943 | lp BASE, L->base
2944 | b ->cont_nop
2945 |
2946 |9:
2947 | stwx TISNIL, BASE, RC
2948 | addi RC, RC, 8
2949 | b <3
2950 |.endif
2951 |
2952 |->vm_profhook: // Dispatch target for profiler hook.
2953#if LJ_HASPROFILE
2954 | mr CARG1, L
2955 | stw MULTRES, SAVE_MULTRES
2956 | mr CARG2, PC
2957 | stp BASE, L->base
2958 | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2959 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2960 | lp BASE, L->base
2961 | subi PC, PC, 4
2962 | b ->cont_nop
2963#endif
2964 |
2592 |//----------------------------------------------------------------------- 2965 |//-----------------------------------------------------------------------
2593 |//-- Trace exit handler ------------------------------------------------- 2966 |//-- Trace exit handler -------------------------------------------------
2594 |//----------------------------------------------------------------------- 2967 |//-----------------------------------------------------------------------
2595 | 2968 |
2596 |.macro savex_, a, b, c, d 2969 |.macro savex_, a, b, c, d
2970 |.if FPU
2597 | stfd f..a, 16+a*8(sp) 2971 | stfd f..a, 16+a*8(sp)
2598 | stfd f..b, 16+b*8(sp) 2972 | stfd f..b, 16+b*8(sp)
2599 | stfd f..c, 16+c*8(sp) 2973 | stfd f..c, 16+c*8(sp)
2600 | stfd f..d, 16+d*8(sp) 2974 | stfd f..d, 16+d*8(sp)
2975 |.endif
2601 |.endmacro 2976 |.endmacro
2602 | 2977 |
2603 |->vm_exit_handler: 2978 |->vm_exit_handler:
@@ -2623,16 +2998,16 @@ static void build_subroutines(BuildCtx *ctx)
2623 | savex_ 20,21,22,23 2998 | savex_ 20,21,22,23
2624 | lhz CARG4, 2(CARG3) // Load trace number. 2999 | lhz CARG4, 2(CARG3) // Load trace number.
2625 | savex_ 24,25,26,27 3000 | savex_ 24,25,26,27
2626 | lwz L, DISPATCH_GL(jit_L)(DISPATCH) 3001 | lwz L, DISPATCH_GL(cur_L)(DISPATCH)
2627 | savex_ 28,29,30,31 3002 | savex_ 28,29,30,31
2628 | sub CARG3, TMP0, CARG3 // Compute exit number. 3003 | sub CARG3, TMP0, CARG3 // Compute exit number.
2629 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) 3004 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH)
2630 | srwi CARG3, CARG3, 2 3005 | srwi CARG3, CARG3, 2
2631 | stw L, DISPATCH_J(L)(DISPATCH) 3006 | stp L, DISPATCH_J(L)(DISPATCH)
2632 | subi CARG3, CARG3, 2 3007 | subi CARG3, CARG3, 2
2633 | stw TMP1, DISPATCH_GL(jit_L)(DISPATCH)
2634 | stw CARG4, DISPATCH_J(parent)(DISPATCH)
2635 | stp BASE, L->base 3008 | stp BASE, L->base
3009 | stw CARG4, DISPATCH_J(parent)(DISPATCH)
3010 | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH)
2636 | addi CARG1, DISPATCH, GG_DISP2J 3011 | addi CARG1, DISPATCH, GG_DISP2J
2637 | stw CARG3, DISPATCH_J(exitno)(DISPATCH) 3012 | stw CARG3, DISPATCH_J(exitno)(DISPATCH)
2638 | addi CARG2, sp, 16 3013 | addi CARG2, sp, 16
@@ -2656,28 +3031,29 @@ static void build_subroutines(BuildCtx *ctx)
2656 | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. 3031 | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set.
2657 | lwz L, SAVE_L 3032 | lwz L, SAVE_L
2658 | addi DISPATCH, JGL, -GG_DISP2G-32768 3033 | addi DISPATCH, JGL, -GG_DISP2G-32768
3034 | stp BASE, L->base
2659 |1: 3035 |1:
2660 | cmpwi CARG1, 0 3036 | cmpwi CARG1, 0
2661 | blt >3 // Check for error from exit. 3037 | blt >9 // Check for error from exit.
2662 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 3038 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2663 | slwi MULTRES, CARG1, 3 3039 | slwi MULTRES, CARG1, 3
2664 | li TMP2, 0 3040 | li TMP2, 0
2665 | stw MULTRES, SAVE_MULTRES 3041 | stw MULTRES, SAVE_MULTRES
2666 | lwz TMP1, LFUNC:TMP1->pc 3042 | lwz TMP1, LFUNC:RB->pc
2667 | stw TMP2, DISPATCH_GL(jit_L)(DISPATCH) 3043 | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH)
2668 | lwz KBASE, PC2PROTO(k)(TMP1) 3044 | lwz KBASE, PC2PROTO(k)(TMP1)
2669 | // Setup type comparison constants. 3045 | // Setup type comparison constants.
2670 | li TISNUM, LJ_TISNUM 3046 | li TISNUM, LJ_TISNUM
2671 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 3047 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2672 | stw TMP3, TMPD 3048 | .FPU stw TMP3, TMPD
2673 | li ZERO, 0 3049 | li ZERO, 0
2674 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 3050 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
2675 | lfs TOBIT, TMPD 3051 | .FPU lfs TOBIT, TMPD
2676 | stw TMP3, TMPD 3052 | .FPU stw TMP3, TMPD
2677 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 3053 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
2678 | li TISNIL, LJ_TNIL 3054 | li TISNIL, LJ_TNIL
2679 | stw TMP0, TONUM_HI 3055 | .FPU stw TMP0, TONUM_HI
2680 | lfs TONUM, TMPD 3056 | .FPU lfs TONUM, TMPD
2681 | // Modified copy of ins_next which handles function header dispatch, too. 3057 | // Modified copy of ins_next which handles function header dispatch, too.
2682 | lwz INS, 0(PC) 3058 | lwz INS, 0(PC)
2683 | addi PC, PC, 4 3059 | addi PC, PC, 4
@@ -2694,11 +3070,25 @@ static void build_subroutines(BuildCtx *ctx)
2694 | decode_RC8 RC, INS 3070 | decode_RC8 RC, INS
2695 | bctr 3071 | bctr
2696 |2: 3072 |2:
3073 | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function?
3074 | blt >3
3075 | // Check frame below fast function.
3076 | lwz TMP1, FRAME_PC(BASE)
3077 | andix. TMP0, TMP1, FRAME_TYPE
3078 | bney >3 // Trace stitching continuation?
3079 | // Otherwise set KBASE for Lua function below fast function.
3080 | lwz TMP2, -4(TMP1)
3081 | decode_RA8 TMP0, TMP2
3082 | sub TMP1, BASE, TMP0
3083 | lwz LFUNC:TMP2, -12(TMP1)
3084 | lwz TMP1, LFUNC:TMP2->pc
3085 | lwz KBASE, PC2PROTO(k)(TMP1)
3086 |3:
2697 | subi RC, MULTRES, 8 3087 | subi RC, MULTRES, 8
2698 | add RA, RA, BASE 3088 | add RA, RA, BASE
2699 | bctr 3089 | bctr
2700 | 3090 |
2701 |3: // Rethrow error from the right C frame. 3091 |9: // Rethrow error from the right C frame.
2702 | neg CARG2, CARG1 3092 | neg CARG2, CARG1
2703 | mr CARG1, L 3093 | mr CARG1, L
2704 | bl extern lj_err_throw // (lua_State *L, int errcode) 3094 | bl extern lj_err_throw // (lua_State *L, int errcode)
@@ -2708,7 +3098,35 @@ static void build_subroutines(BuildCtx *ctx)
2708 |//-- Math helper functions ---------------------------------------------- 3098 |//-- Math helper functions ----------------------------------------------
2709 |//----------------------------------------------------------------------- 3099 |//-----------------------------------------------------------------------
2710 | 3100 |
2711 |// NYI: Use internal implementations of floor, ceil, trunc. 3101 |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp.
3102 |
3103 |.macro sfi2d, AHI, ALO
3104 |.if not FPU
3105 | mr. AHI, ALO
3106 | bclr 12, 2 // Handle zero first.
3107 | srawi TMP0, ALO, 31
3108 | xor TMP1, ALO, TMP0
3109 | sub TMP1, TMP1, TMP0 // Absolute value in TMP1.
3110 | cntlzw AHI, TMP1
3111 | andix. TMP0, TMP0, 0x800 // Mask sign bit.
3112 | slw TMP1, TMP1, AHI // Align mantissa left with leading 1.
3113 | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI.
3114 | slwi ALO, TMP1, 21
3115 | or AHI, AHI, TMP0 // Sign | Exponent.
3116 | srwi TMP1, TMP1, 11
3117 | slwi AHI, AHI, 20 // Align left.
3118 | add AHI, AHI, TMP1 // Add mantissa, increment exponent.
3119 | blr
3120 |.endif
3121 |.endmacro
3122 |
3123 |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1.
3124 |->vm_sfi2d_1:
3125 | sfi2d CARG1, CARG2
3126 |
3127 |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1.
3128 |->vm_sfi2d_2:
3129 | sfi2d CARG3, CARG4
2712 | 3130 |
2713 |->vm_modi: 3131 |->vm_modi:
2714 | divwo. TMP0, CARG1, CARG2 3132 | divwo. TMP0, CARG1, CARG2
@@ -2776,21 +3194,21 @@ static void build_subroutines(BuildCtx *ctx)
2776 | addi DISPATCH, r12, GG_G2DISP 3194 | addi DISPATCH, r12, GG_G2DISP
2777 | stw r11, CTSTATE->cb.slot 3195 | stw r11, CTSTATE->cb.slot
2778 | stw r3, CTSTATE->cb.gpr[0] 3196 | stw r3, CTSTATE->cb.gpr[0]
2779 | stfd f1, CTSTATE->cb.fpr[0] 3197 | .FPU stfd f1, CTSTATE->cb.fpr[0]
2780 | stw r4, CTSTATE->cb.gpr[1] 3198 | stw r4, CTSTATE->cb.gpr[1]
2781 | stfd f2, CTSTATE->cb.fpr[1] 3199 | .FPU stfd f2, CTSTATE->cb.fpr[1]
2782 | stw r5, CTSTATE->cb.gpr[2] 3200 | stw r5, CTSTATE->cb.gpr[2]
2783 | stfd f3, CTSTATE->cb.fpr[2] 3201 | .FPU stfd f3, CTSTATE->cb.fpr[2]
2784 | stw r6, CTSTATE->cb.gpr[3] 3202 | stw r6, CTSTATE->cb.gpr[3]
2785 | stfd f4, CTSTATE->cb.fpr[3] 3203 | .FPU stfd f4, CTSTATE->cb.fpr[3]
2786 | stw r7, CTSTATE->cb.gpr[4] 3204 | stw r7, CTSTATE->cb.gpr[4]
2787 | stfd f5, CTSTATE->cb.fpr[4] 3205 | .FPU stfd f5, CTSTATE->cb.fpr[4]
2788 | stw r8, CTSTATE->cb.gpr[5] 3206 | stw r8, CTSTATE->cb.gpr[5]
2789 | stfd f6, CTSTATE->cb.fpr[5] 3207 | .FPU stfd f6, CTSTATE->cb.fpr[5]
2790 | stw r9, CTSTATE->cb.gpr[6] 3208 | stw r9, CTSTATE->cb.gpr[6]
2791 | stfd f7, CTSTATE->cb.fpr[6] 3209 | .FPU stfd f7, CTSTATE->cb.fpr[6]
2792 | stw r10, CTSTATE->cb.gpr[7] 3210 | stw r10, CTSTATE->cb.gpr[7]
2793 | stfd f8, CTSTATE->cb.fpr[7] 3211 | .FPU stfd f8, CTSTATE->cb.fpr[7]
2794 | addi TMP0, sp, CFRAME_SPACE+8 3212 | addi TMP0, sp, CFRAME_SPACE+8
2795 | stw TMP0, CTSTATE->cb.stack 3213 | stw TMP0, CTSTATE->cb.stack
2796 | mr CARG1, CTSTATE 3214 | mr CARG1, CTSTATE
@@ -2801,21 +3219,21 @@ static void build_subroutines(BuildCtx *ctx)
2801 | lp BASE, L:CRET1->base 3219 | lp BASE, L:CRET1->base
2802 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 3220 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2803 | lp RC, L:CRET1->top 3221 | lp RC, L:CRET1->top
2804 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 3222 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2805 | li ZERO, 0 3223 | li ZERO, 0
2806 | mr L, CRET1 3224 | mr L, CRET1
2807 | stw TMP3, TMPD 3225 | .FPU stw TMP3, TMPD
2808 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 3226 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
2809 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3227 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2810 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 3228 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
2811 | stw TMP0, TONUM_HI 3229 | .FPU stw TMP0, TONUM_HI
2812 | li TISNIL, LJ_TNIL 3230 | li TISNIL, LJ_TNIL
2813 | li_vmstate INTERP 3231 | li_vmstate INTERP
2814 | lfs TOBIT, TMPD 3232 | .FPU lfs TOBIT, TMPD
2815 | stw TMP3, TMPD 3233 | .FPU stw TMP3, TMPD
2816 | sub RC, RC, BASE 3234 | sub RC, RC, BASE
2817 | st_vmstate 3235 | st_vmstate
2818 | lfs TONUM, TMPD 3236 | .FPU lfs TONUM, TMPD
2819 | ins_callt 3237 | ins_callt
2820 |.endif 3238 |.endif
2821 | 3239 |
@@ -2829,7 +3247,7 @@ static void build_subroutines(BuildCtx *ctx)
2829 | mr CARG2, RA 3247 | mr CARG2, RA
2830 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) 3248 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
2831 | lwz CRET1, CTSTATE->cb.gpr[0] 3249 | lwz CRET1, CTSTATE->cb.gpr[0]
2832 | lfd FARG1, CTSTATE->cb.fpr[0] 3250 | .FPU lfd FARG1, CTSTATE->cb.fpr[0]
2833 | lwz CRET2, CTSTATE->cb.gpr[1] 3251 | lwz CRET2, CTSTATE->cb.gpr[1]
2834 | b ->vm_leave_unw 3252 | b ->vm_leave_unw
2835 |.endif 3253 |.endif
@@ -2863,14 +3281,14 @@ static void build_subroutines(BuildCtx *ctx)
2863 | bge <1 3281 | bge <1
2864 |2: 3282 |2:
2865 | bney cr1, >3 3283 | bney cr1, >3
2866 | lfd f1, CCSTATE->fpr[0] 3284 | .FPU lfd f1, CCSTATE->fpr[0]
2867 | lfd f2, CCSTATE->fpr[1] 3285 | .FPU lfd f2, CCSTATE->fpr[1]
2868 | lfd f3, CCSTATE->fpr[2] 3286 | .FPU lfd f3, CCSTATE->fpr[2]
2869 | lfd f4, CCSTATE->fpr[3] 3287 | .FPU lfd f4, CCSTATE->fpr[3]
2870 | lfd f5, CCSTATE->fpr[4] 3288 | .FPU lfd f5, CCSTATE->fpr[4]
2871 | lfd f6, CCSTATE->fpr[5] 3289 | .FPU lfd f6, CCSTATE->fpr[5]
2872 | lfd f7, CCSTATE->fpr[6] 3290 | .FPU lfd f7, CCSTATE->fpr[6]
2873 | lfd f8, CCSTATE->fpr[7] 3291 | .FPU lfd f8, CCSTATE->fpr[7]
2874 |3: 3292 |3:
2875 | lp TMP0, CCSTATE->func 3293 | lp TMP0, CCSTATE->func
2876 | lwz CARG2, CCSTATE->gpr[1] 3294 | lwz CARG2, CCSTATE->gpr[1]
@@ -2887,7 +3305,7 @@ static void build_subroutines(BuildCtx *ctx)
2887 | lwz TMP2, -4(r14) 3305 | lwz TMP2, -4(r14)
2888 | lwz TMP0, 4(r14) 3306 | lwz TMP0, 4(r14)
2889 | stw CARG1, CCSTATE:TMP1->gpr[0] 3307 | stw CARG1, CCSTATE:TMP1->gpr[0]
2890 | stfd FARG1, CCSTATE:TMP1->fpr[0] 3308 | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0]
2891 | stw CARG2, CCSTATE:TMP1->gpr[1] 3309 | stw CARG2, CCSTATE:TMP1->gpr[1]
2892 | mtlr TMP0 3310 | mtlr TMP0
2893 | stw CARG3, CCSTATE:TMP1->gpr[2] 3311 | stw CARG3, CCSTATE:TMP1->gpr[2]
@@ -2916,19 +3334,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2916 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 3334 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2917 | // RA = src1*8, RD = src2*8, JMP with RD = target 3335 | // RA = src1*8, RD = src2*8, JMP with RD = target
2918 |.if DUALNUM 3336 |.if DUALNUM
2919 | lwzux TMP0, RA, BASE 3337 | lwzux CARG1, RA, BASE
2920 | addi PC, PC, 4 3338 | addi PC, PC, 4
2921 | lwz CARG2, 4(RA) 3339 | lwz CARG2, 4(RA)
2922 | lwzux TMP1, RD, BASE 3340 | lwzux CARG3, RD, BASE
2923 | lwz TMP2, -4(PC) 3341 | lwz TMP2, -4(PC)
2924 | checknum cr0, TMP0 3342 | checknum cr0, CARG1
2925 | lwz CARG3, 4(RD) 3343 | lwz CARG4, 4(RD)
2926 | decode_RD4 TMP2, TMP2 3344 | decode_RD4 TMP2, TMP2
2927 | checknum cr1, TMP1 3345 | checknum cr1, CARG3
2928 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3346 | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16)
2929 | bne cr0, >7 3347 | bne cr0, >7
2930 | bne cr1, >8 3348 | bne cr1, >8
2931 | cmpw CARG2, CARG3 3349 | cmpw CARG2, CARG4
2932 if (op == BC_ISLT) { 3350 if (op == BC_ISLT) {
2933 | bge >2 3351 | bge >2
2934 } else if (op == BC_ISGE) { 3352 } else if (op == BC_ISGE) {
@@ -2939,28 +3357,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2939 | ble >2 3357 | ble >2
2940 } 3358 }
2941 |1: 3359 |1:
2942 | add PC, PC, TMP2 3360 | add PC, PC, SAVE0
2943 |2: 3361 |2:
2944 | ins_next 3362 | ins_next
2945 | 3363 |
2946 |7: // RA is not an integer. 3364 |7: // RA is not an integer.
2947 | bgt cr0, ->vmeta_comp 3365 | bgt cr0, ->vmeta_comp
2948 | // RA is a number. 3366 | // RA is a number.
2949 | lfd f0, 0(RA) 3367 | .FPU lfd f0, 0(RA)
2950 | bgt cr1, ->vmeta_comp 3368 | bgt cr1, ->vmeta_comp
2951 | blt cr1, >4 3369 | blt cr1, >4
2952 | // RA is a number, RD is an integer. 3370 | // RA is a number, RD is an integer.
2953 | tonum_i f1, CARG3 3371 |.if FPU
3372 | tonum_i f1, CARG4
3373 |.else
3374 | bl ->vm_sfi2d_2
3375 |.endif
2954 | b >5 3376 | b >5
2955 | 3377 |
2956 |8: // RA is an integer, RD is not an integer. 3378 |8: // RA is an integer, RD is not an integer.
2957 | bgt cr1, ->vmeta_comp 3379 | bgt cr1, ->vmeta_comp
2958 | // RA is an integer, RD is a number. 3380 | // RA is an integer, RD is a number.
3381 |.if FPU
2959 | tonum_i f0, CARG2 3382 | tonum_i f0, CARG2
3383 |.else
3384 | bl ->vm_sfi2d_1
3385 |.endif
2960 |4: 3386 |4:
2961 | lfd f1, 0(RD) 3387 | .FPU lfd f1, 0(RD)
2962 |5: 3388 |5:
3389 |.if FPU
2963 | fcmpu cr0, f0, f1 3390 | fcmpu cr0, f0, f1
3391 |.else
3392 | blex __ledf2
3393 | cmpwi CRET1, 0
3394 |.endif
2964 if (op == BC_ISLT) { 3395 if (op == BC_ISLT) {
2965 | bge <2 3396 | bge <2
2966 } else if (op == BC_ISGE) { 3397 } else if (op == BC_ISGE) {
@@ -3008,42 +3439,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3008 vk = op == BC_ISEQV; 3439 vk = op == BC_ISEQV;
3009 | // RA = src1*8, RD = src2*8, JMP with RD = target 3440 | // RA = src1*8, RD = src2*8, JMP with RD = target
3010 |.if DUALNUM 3441 |.if DUALNUM
3011 | lwzux TMP0, RA, BASE 3442 | lwzux CARG1, RA, BASE
3012 | addi PC, PC, 4 3443 | addi PC, PC, 4
3013 | lwz CARG2, 4(RA) 3444 | lwz CARG2, 4(RA)
3014 | lwzux TMP1, RD, BASE 3445 | lwzux CARG3, RD, BASE
3015 | checknum cr0, TMP0 3446 | checknum cr0, CARG1
3016 | lwz TMP2, -4(PC) 3447 | lwz SAVE0, -4(PC)
3017 | checknum cr1, TMP1 3448 | checknum cr1, CARG3
3018 | decode_RD4 TMP2, TMP2 3449 | decode_RD4 SAVE0, SAVE0
3019 | lwz CARG3, 4(RD) 3450 | lwz CARG4, 4(RD)
3020 | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt 3451 | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
3021 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3452 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3022 if (vk) { 3453 if (vk) {
3023 | ble cr7, ->BC_ISEQN_Z 3454 | ble cr7, ->BC_ISEQN_Z
3024 } else { 3455 } else {
3025 | ble cr7, ->BC_ISNEN_Z 3456 | ble cr7, ->BC_ISNEN_Z
3026 } 3457 }
3027 |.else 3458 |.else
3028 | lwzux TMP0, RA, BASE 3459 | lwzux CARG1, RA, BASE
3029 | lwz TMP2, 0(PC) 3460 | lwz SAVE0, 0(PC)
3030 | lfd f0, 0(RA) 3461 | lfd f0, 0(RA)
3031 | addi PC, PC, 4 3462 | addi PC, PC, 4
3032 | lwzux TMP1, RD, BASE 3463 | lwzux CARG3, RD, BASE
3033 | checknum cr0, TMP0 3464 | checknum cr0, CARG1
3034 | decode_RD4 TMP2, TMP2 3465 | decode_RD4 SAVE0, SAVE0
3035 | lfd f1, 0(RD) 3466 | lfd f1, 0(RD)
3036 | checknum cr1, TMP1 3467 | checknum cr1, CARG3
3037 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3468 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3038 | bge cr0, >5 3469 | bge cr0, >5
3039 | bge cr1, >5 3470 | bge cr1, >5
3040 | fcmpu cr0, f0, f1 3471 | fcmpu cr0, f0, f1
3041 if (vk) { 3472 if (vk) {
3042 | bne >1 3473 | bne >1
3043 | add PC, PC, TMP2 3474 | add PC, PC, SAVE0
3044 } else { 3475 } else {
3045 | beq >1 3476 | beq >1
3046 | add PC, PC, TMP2 3477 | add PC, PC, SAVE0
3047 } 3478 }
3048 |1: 3479 |1:
3049 | ins_next 3480 | ins_next
@@ -3051,36 +3482,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3051 |5: // Either or both types are not numbers. 3482 |5: // Either or both types are not numbers.
3052 |.if not DUALNUM 3483 |.if not DUALNUM
3053 | lwz CARG2, 4(RA) 3484 | lwz CARG2, 4(RA)
3054 | lwz CARG3, 4(RD) 3485 | lwz CARG4, 4(RD)
3055 |.endif 3486 |.endif
3056 |.if FFI 3487 |.if FFI
3057 | cmpwi cr7, TMP0, LJ_TCDATA 3488 | cmpwi cr7, CARG1, LJ_TCDATA
3058 | cmpwi cr5, TMP1, LJ_TCDATA 3489 | cmpwi cr5, CARG3, LJ_TCDATA
3059 |.endif 3490 |.endif
3060 | not TMP3, TMP0 3491 | not TMP2, CARG1
3061 | cmplw TMP0, TMP1 3492 | cmplw CARG1, CARG3
3062 | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? 3493 | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive?
3063 |.if FFI 3494 |.if FFI
3064 | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq 3495 | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq
3065 |.endif 3496 |.endif
3066 | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? 3497 | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata?
3067 |.if FFI 3498 |.if FFI
3068 | beq cr7, ->vmeta_equal_cd 3499 | beq cr7, ->vmeta_equal_cd
3069 |.endif 3500 |.endif
3070 | cmplw cr5, CARG2, CARG3 3501 | cmplw cr5, CARG2, CARG4
3071 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. 3502 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
3072 | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. 3503 | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
3073 | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. 3504 | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
3074 | mr SAVE0, PC 3505 | mr SAVE1, PC
3075 | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. 3506 | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
3076 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. 3507 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
3077 if (vk) { 3508 if (vk) {
3078 | bne cr0, >6 3509 | bne cr0, >6
3079 | add PC, PC, TMP2 3510 | add PC, PC, SAVE0
3080 |6: 3511 |6:
3081 } else { 3512 } else {
3082 | beq cr0, >6 3513 | beq cr0, >6
3083 | add PC, PC, TMP2 3514 | add PC, PC, SAVE0
3084 |6: 3515 |6:
3085 } 3516 }
3086 |.if DUALNUM 3517 |.if DUALNUM
@@ -3095,6 +3526,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3095 | 3526 |
3096 | // Different tables or userdatas. Need to check __eq metamethod. 3527 | // Different tables or userdatas. Need to check __eq metamethod.
3097 | // Field metatable must be at same offset for GCtab and GCudata! 3528 | // Field metatable must be at same offset for GCtab and GCudata!
3529 | mr CARG3, CARG4
3098 | lwz TAB:TMP2, TAB:CARG2->metatable 3530 | lwz TAB:TMP2, TAB:CARG2->metatable
3099 | li CARG4, 1-vk // ne = 0 or 1. 3531 | li CARG4, 1-vk // ne = 0 or 1.
3100 | cmplwi TAB:TMP2, 0 3532 | cmplwi TAB:TMP2, 0
@@ -3102,7 +3534,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3102 | lbz TMP2, TAB:TMP2->nomm 3534 | lbz TMP2, TAB:TMP2->nomm
3103 | andix. TMP2, TMP2, 1<<MM_eq 3535 | andix. TMP2, TMP2, 1<<MM_eq
3104 | bne <1 // Or 'no __eq' flag set? 3536 | bne <1 // Or 'no __eq' flag set?
3105 | mr PC, SAVE0 // Restore old PC. 3537 | mr PC, SAVE1 // Restore old PC.
3106 | b ->vmeta_equal // Handle __eq metamethod. 3538 | b ->vmeta_equal // Handle __eq metamethod.
3107 break; 3539 break;
3108 3540
@@ -3143,16 +3575,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3143 vk = op == BC_ISEQN; 3575 vk = op == BC_ISEQN;
3144 | // RA = src*8, RD = num_const*8, JMP with RD = target 3576 | // RA = src*8, RD = num_const*8, JMP with RD = target
3145 |.if DUALNUM 3577 |.if DUALNUM
3146 | lwzux TMP0, RA, BASE 3578 | lwzux CARG1, RA, BASE
3147 | addi PC, PC, 4 3579 | addi PC, PC, 4
3148 | lwz CARG2, 4(RA) 3580 | lwz CARG2, 4(RA)
3149 | lwzux TMP1, RD, KBASE 3581 | lwzux CARG3, RD, KBASE
3150 | checknum cr0, TMP0 3582 | checknum cr0, CARG1
3151 | lwz TMP2, -4(PC) 3583 | lwz SAVE0, -4(PC)
3152 | checknum cr1, TMP1 3584 | checknum cr1, CARG3
3153 | decode_RD4 TMP2, TMP2 3585 | decode_RD4 SAVE0, SAVE0
3154 | lwz CARG3, 4(RD) 3586 | lwz CARG4, 4(RD)
3155 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3587 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3156 if (vk) { 3588 if (vk) {
3157 |->BC_ISEQN_Z: 3589 |->BC_ISEQN_Z:
3158 } else { 3590 } else {
@@ -3160,7 +3592,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3160 } 3592 }
3161 | bne cr0, >7 3593 | bne cr0, >7
3162 | bne cr1, >8 3594 | bne cr1, >8
3163 | cmpw CARG2, CARG3 3595 | cmpw CARG2, CARG4
3164 |4: 3596 |4:
3165 |.else 3597 |.else
3166 if (vk) { 3598 if (vk) {
@@ -3168,20 +3600,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3168 } else { 3600 } else {
3169 |->BC_ISNEN_Z: // Dummy label. 3601 |->BC_ISNEN_Z: // Dummy label.
3170 } 3602 }
3171 | lwzx TMP0, BASE, RA 3603 | lwzx CARG1, BASE, RA
3172 | addi PC, PC, 4 3604 | addi PC, PC, 4
3173 | lfdx f0, BASE, RA 3605 | lfdx f0, BASE, RA
3174 | lwz TMP2, -4(PC) 3606 | lwz SAVE0, -4(PC)
3175 | lfdx f1, KBASE, RD 3607 | lfdx f1, KBASE, RD
3176 | decode_RD4 TMP2, TMP2 3608 | decode_RD4 SAVE0, SAVE0
3177 | checknum TMP0 3609 | checknum CARG1
3178 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3610 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3179 | bge >3 3611 | bge >3
3180 | fcmpu cr0, f0, f1 3612 | fcmpu cr0, f0, f1
3181 |.endif 3613 |.endif
3182 if (vk) { 3614 if (vk) {
3183 | bne >1 3615 | bne >1
3184 | add PC, PC, TMP2 3616 | add PC, PC, SAVE0
3185 |1: 3617 |1:
3186 |.if not FFI 3618 |.if not FFI
3187 |3: 3619 |3:
@@ -3192,13 +3624,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3192 |.if not FFI 3624 |.if not FFI
3193 |3: 3625 |3:
3194 |.endif 3626 |.endif
3195 | add PC, PC, TMP2 3627 | add PC, PC, SAVE0
3196 |2: 3628 |2:
3197 } 3629 }
3198 | ins_next 3630 | ins_next
3199 |.if FFI 3631 |.if FFI
3200 |3: 3632 |3:
3201 | cmpwi TMP0, LJ_TCDATA 3633 | cmpwi CARG1, LJ_TCDATA
3202 | beq ->vmeta_equal_cd 3634 | beq ->vmeta_equal_cd
3203 | b <1 3635 | b <1
3204 |.endif 3636 |.endif
@@ -3206,18 +3638,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3206 |7: // RA is not an integer. 3638 |7: // RA is not an integer.
3207 | bge cr0, <3 3639 | bge cr0, <3
3208 | // RA is a number. 3640 | // RA is a number.
3209 | lfd f0, 0(RA) 3641 | .FPU lfd f0, 0(RA)
3210 | blt cr1, >1 3642 | blt cr1, >1
3211 | // RA is a number, RD is an integer. 3643 | // RA is a number, RD is an integer.
3212 | tonum_i f1, CARG3 3644 |.if FPU
3645 | tonum_i f1, CARG4
3646 |.else
3647 | bl ->vm_sfi2d_2
3648 |.endif
3213 | b >2 3649 | b >2
3214 | 3650 |
3215 |8: // RA is an integer, RD is a number. 3651 |8: // RA is an integer, RD is a number.
3652 |.if FPU
3216 | tonum_i f0, CARG2 3653 | tonum_i f0, CARG2
3654 |.else
3655 | bl ->vm_sfi2d_1
3656 |.endif
3217 |1: 3657 |1:
3218 | lfd f1, 0(RD) 3658 | .FPU lfd f1, 0(RD)
3219 |2: 3659 |2:
3660 |.if FPU
3220 | fcmpu cr0, f0, f1 3661 | fcmpu cr0, f0, f1
3662 |.else
3663 | blex __ledf2
3664 | cmpwi CRET1, 0
3665 |.endif
3221 | b <4 3666 | b <4
3222 |.endif 3667 |.endif
3223 break; 3668 break;
@@ -3272,7 +3717,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3272 | add PC, PC, TMP2 3717 | add PC, PC, TMP2
3273 } else { 3718 } else {
3274 | li TMP1, LJ_TFALSE 3719 | li TMP1, LJ_TFALSE
3720 |.if FPU
3275 | lfdx f0, BASE, RD 3721 | lfdx f0, BASE, RD
3722 |.else
3723 | lwzux CARG1, RD, BASE
3724 | lwz CARG2, 4(RD)
3725 |.endif
3276 | cmplw TMP0, TMP1 3726 | cmplw TMP0, TMP1
3277 if (op == BC_ISTC) { 3727 if (op == BC_ISTC) {
3278 | bge >1 3728 | bge >1
@@ -3281,20 +3731,55 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3281 } 3731 }
3282 | addis PC, PC, -(BCBIAS_J*4 >> 16) 3732 | addis PC, PC, -(BCBIAS_J*4 >> 16)
3283 | decode_RD4 TMP2, INS 3733 | decode_RD4 TMP2, INS
3734 |.if FPU
3284 | stfdx f0, BASE, RA 3735 | stfdx f0, BASE, RA
3736 |.else
3737 | stwux CARG1, RA, BASE
3738 | stw CARG2, 4(RA)
3739 |.endif
3285 | add PC, PC, TMP2 3740 | add PC, PC, TMP2
3286 |1: 3741 |1:
3287 } 3742 }
3288 | ins_next 3743 | ins_next
3289 break; 3744 break;
3290 3745
3746 case BC_ISTYPE:
3747 | // RA = src*8, RD = -type*8
3748 | lwzx TMP0, BASE, RA
3749 | srwi TMP1, RD, 3
3750 | ins_next1
3751 |.if not PPE and not GPR64
3752 | add. TMP0, TMP0, TMP1
3753 |.else
3754 | neg TMP1, TMP1
3755 | cmpw TMP0, TMP1
3756 |.endif
3757 | bne ->vmeta_istype
3758 | ins_next2
3759 break;
3760 case BC_ISNUM:
3761 | // RA = src*8, RD = -(TISNUM-1)*8
3762 | lwzx TMP0, BASE, RA
3763 | ins_next1
3764 | checknum TMP0
3765 | bge ->vmeta_istype
3766 | ins_next2
3767 break;
3768
3291 /* -- Unary ops --------------------------------------------------------- */ 3769 /* -- Unary ops --------------------------------------------------------- */
3292 3770
3293 case BC_MOV: 3771 case BC_MOV:
3294 | // RA = dst*8, RD = src*8 3772 | // RA = dst*8, RD = src*8
3295 | ins_next1 3773 | ins_next1
3774 |.if FPU
3296 | lfdx f0, BASE, RD 3775 | lfdx f0, BASE, RD
3297 | stfdx f0, BASE, RA 3776 | stfdx f0, BASE, RA
3777 |.else
3778 | lwzux TMP0, RD, BASE
3779 | lwz TMP1, 4(RD)
3780 | stwux TMP0, RA, BASE
3781 | stw TMP1, 4(RA)
3782 |.endif
3298 | ins_next2 3783 | ins_next2
3299 break; 3784 break;
3300 case BC_NOT: 3785 case BC_NOT:
@@ -3396,44 +3881,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3396 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3881 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3397 ||switch (vk) { 3882 ||switch (vk) {
3398 ||case 0: 3883 ||case 0:
3399 | lwzx TMP1, BASE, RB 3884 | lwzx CARG1, BASE, RB
3400 | .if DUALNUM 3885 | .if DUALNUM
3401 | lwzx TMP2, KBASE, RC 3886 | lwzx CARG3, KBASE, RC
3402 | .endif 3887 | .endif
3888 | .if FPU
3403 | lfdx f14, BASE, RB 3889 | lfdx f14, BASE, RB
3404 | lfdx f15, KBASE, RC 3890 | lfdx f15, KBASE, RC
3891 | .else
3892 | add TMP1, BASE, RB
3893 | add TMP2, KBASE, RC
3894 | lwz CARG2, 4(TMP1)
3895 | lwz CARG4, 4(TMP2)
3896 | .endif
3405 | .if DUALNUM 3897 | .if DUALNUM
3406 | checknum cr0, TMP1 3898 | checknum cr0, CARG1
3407 | checknum cr1, TMP2 3899 | checknum cr1, CARG3
3408 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3900 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3409 | bge ->vmeta_arith_vn 3901 | bge ->vmeta_arith_vn
3410 | .else 3902 | .else
3411 | checknum TMP1; bge ->vmeta_arith_vn 3903 | checknum CARG1; bge ->vmeta_arith_vn
3412 | .endif 3904 | .endif
3413 || break; 3905 || break;
3414 ||case 1: 3906 ||case 1:
3415 | lwzx TMP1, BASE, RB 3907 | lwzx CARG1, BASE, RB
3416 | .if DUALNUM 3908 | .if DUALNUM
3417 | lwzx TMP2, KBASE, RC 3909 | lwzx CARG3, KBASE, RC
3418 | .endif 3910 | .endif
3911 | .if FPU
3419 | lfdx f15, BASE, RB 3912 | lfdx f15, BASE, RB
3420 | lfdx f14, KBASE, RC 3913 | lfdx f14, KBASE, RC
3914 | .else
3915 | add TMP1, BASE, RB
3916 | add TMP2, KBASE, RC
3917 | lwz CARG2, 4(TMP1)
3918 | lwz CARG4, 4(TMP2)
3919 | .endif
3421 | .if DUALNUM 3920 | .if DUALNUM
3422 | checknum cr0, TMP1 3921 | checknum cr0, CARG1
3423 | checknum cr1, TMP2 3922 | checknum cr1, CARG3
3424 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3923 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3425 | bge ->vmeta_arith_nv 3924 | bge ->vmeta_arith_nv
3426 | .else 3925 | .else
3427 | checknum TMP1; bge ->vmeta_arith_nv 3926 | checknum CARG1; bge ->vmeta_arith_nv
3428 | .endif 3927 | .endif
3429 || break; 3928 || break;
3430 ||default: 3929 ||default:
3431 | lwzx TMP1, BASE, RB 3930 | lwzx CARG1, BASE, RB
3432 | lwzx TMP2, BASE, RC 3931 | lwzx CARG3, BASE, RC
3932 | .if FPU
3433 | lfdx f14, BASE, RB 3933 | lfdx f14, BASE, RB
3434 | lfdx f15, BASE, RC 3934 | lfdx f15, BASE, RC
3435 | checknum cr0, TMP1 3935 | .else
3436 | checknum cr1, TMP2 3936 | add TMP1, BASE, RB
3937 | add TMP2, BASE, RC
3938 | lwz CARG2, 4(TMP1)
3939 | lwz CARG4, 4(TMP2)
3940 | .endif
3941 | checknum cr0, CARG1
3942 | checknum cr1, CARG3
3437 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3943 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3438 | bge ->vmeta_arith_vv 3944 | bge ->vmeta_arith_vv
3439 || break; 3945 || break;
@@ -3467,48 +3973,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3467 | fsub a, b, a // b - floor(b/c)*c 3973 | fsub a, b, a // b - floor(b/c)*c
3468 |.endmacro 3974 |.endmacro
3469 | 3975 |
3976 |.macro sfpmod
3977 |->BC_MODVN_Z:
3978 | stw CARG1, SFSAVE_1
3979 | stw CARG2, SFSAVE_2
3980 | mr SAVE0, CARG3
3981 | mr SAVE1, CARG4
3982 | blex __divdf3
3983 | blex floor
3984 | mr CARG3, SAVE0
3985 | mr CARG4, SAVE1
3986 | blex __muldf3
3987 | mr CARG3, CRET1
3988 | mr CARG4, CRET2
3989 | lwz CARG1, SFSAVE_1
3990 | lwz CARG2, SFSAVE_2
3991 | blex __subdf3
3992 |.endmacro
3993 |
3470 |.macro ins_arithfp, fpins 3994 |.macro ins_arithfp, fpins
3471 | ins_arithpre 3995 | ins_arithpre
3472 |.if "fpins" == "fpmod_" 3996 |.if "fpins" == "fpmod_"
3473 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3997 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3474 |.else 3998 |.elif FPU
3475 | fpins f0, f14, f15 3999 | fpins f0, f14, f15
3476 | ins_next1 4000 | ins_next1
3477 | stfdx f0, BASE, RA 4001 | stfdx f0, BASE, RA
3478 | ins_next2 4002 | ins_next2
4003 |.else
4004 | blex __divdf3 // Only soft-float div uses this macro.
4005 | ins_next1
4006 | stwux CRET1, RA, BASE
4007 | stw CRET2, 4(RA)
4008 | ins_next2
3479 |.endif 4009 |.endif
3480 |.endmacro 4010 |.endmacro
3481 | 4011 |
3482 |.macro ins_arithdn, intins, fpins 4012 |.macro ins_arithdn, intins, fpins, fpcall
3483 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 4013 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
3484 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 4014 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3485 ||switch (vk) { 4015 ||switch (vk) {
3486 ||case 0: 4016 ||case 0:
3487 | lwzux TMP1, RB, BASE 4017 | lwzux CARG1, RB, BASE
3488 | lwzux TMP2, RC, KBASE 4018 | lwzux CARG3, RC, KBASE
3489 | lwz CARG1, 4(RB) 4019 | lwz CARG2, 4(RB)
3490 | checknum cr0, TMP1 4020 | checknum cr0, CARG1
3491 | lwz CARG2, 4(RC) 4021 | lwz CARG4, 4(RC)
4022 | checknum cr1, CARG3
3492 || break; 4023 || break;
3493 ||case 1: 4024 ||case 1:
3494 | lwzux TMP1, RB, BASE 4025 | lwzux CARG3, RB, BASE
3495 | lwzux TMP2, RC, KBASE 4026 | lwzux CARG1, RC, KBASE
3496 | lwz CARG2, 4(RB) 4027 | lwz CARG4, 4(RB)
3497 | checknum cr0, TMP1 4028 | checknum cr0, CARG3
3498 | lwz CARG1, 4(RC) 4029 | lwz CARG2, 4(RC)
4030 | checknum cr1, CARG1
3499 || break; 4031 || break;
3500 ||default: 4032 ||default:
3501 | lwzux TMP1, RB, BASE 4033 | lwzux CARG1, RB, BASE
3502 | lwzux TMP2, RC, BASE 4034 | lwzux CARG3, RC, BASE
3503 | lwz CARG1, 4(RB) 4035 | lwz CARG2, 4(RB)
3504 | checknum cr0, TMP1 4036 | checknum cr0, CARG1
3505 | lwz CARG2, 4(RC) 4037 | lwz CARG4, 4(RC)
4038 | checknum cr1, CARG3
3506 || break; 4039 || break;
3507 ||} 4040 ||}
3508 | checknum cr1, TMP2
3509 | bne >5 4041 | bne >5
3510 | bne cr1, >5 4042 | bne cr1, >5
3511 | intins CARG1, CARG1, CARG2 4043 |.if "intins" == "intmod"
4044 | mr CARG1, CARG2
4045 | mr CARG2, CARG4
4046 |.endif
4047 | intins CARG1, CARG2, CARG4
3512 | bso >4 4048 | bso >4
3513 |1: 4049 |1:
3514 | ins_next1 4050 | ins_next1
@@ -3520,29 +4056,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3520 | checkov TMP0, <1 // Ignore unrelated overflow. 4056 | checkov TMP0, <1 // Ignore unrelated overflow.
3521 | ins_arithfallback b 4057 | ins_arithfallback b
3522 |5: // FP variant. 4058 |5: // FP variant.
4059 |.if FPU
3523 ||if (vk == 1) { 4060 ||if (vk == 1) {
3524 | lfd f15, 0(RB) 4061 | lfd f15, 0(RB)
3525 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3526 | lfd f14, 0(RC) 4062 | lfd f14, 0(RC)
3527 ||} else { 4063 ||} else {
3528 | lfd f14, 0(RB) 4064 | lfd f14, 0(RB)
3529 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3530 | lfd f15, 0(RC) 4065 | lfd f15, 0(RC)
3531 ||} 4066 ||}
4067 |.endif
4068 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3532 | ins_arithfallback bge 4069 | ins_arithfallback bge
3533 |.if "fpins" == "fpmod_" 4070 |.if "fpins" == "fpmod_"
3534 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 4071 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3535 |.else 4072 |.else
4073 |.if FPU
3536 | fpins f0, f14, f15 4074 | fpins f0, f14, f15
3537 | ins_next1
3538 | stfdx f0, BASE, RA 4075 | stfdx f0, BASE, RA
4076 |.else
4077 |.if "fpcall" == "sfpmod"
4078 | sfpmod
4079 |.else
4080 | blex fpcall
4081 |.endif
4082 | stwux CRET1, RA, BASE
4083 | stw CRET2, 4(RA)
4084 |.endif
4085 | ins_next1
3539 | b <2 4086 | b <2
3540 |.endif 4087 |.endif
3541 |.endmacro 4088 |.endmacro
3542 | 4089 |
3543 |.macro ins_arith, intins, fpins 4090 |.macro ins_arith, intins, fpins, fpcall
3544 |.if DUALNUM 4091 |.if DUALNUM
3545 | ins_arithdn intins, fpins 4092 | ins_arithdn intins, fpins, fpcall
3546 |.else 4093 |.else
3547 | ins_arithfp fpins 4094 | ins_arithfp fpins
3548 |.endif 4095 |.endif
@@ -3557,9 +4104,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3557 | addo. TMP0, TMP0, TMP3 4104 | addo. TMP0, TMP0, TMP3
3558 | add y, a, b 4105 | add y, a, b
3559 |.endmacro 4106 |.endmacro
3560 | ins_arith addo32., fadd 4107 | ins_arith addo32., fadd, __adddf3
3561 |.else 4108 |.else
3562 | ins_arith addo., fadd 4109 | ins_arith addo., fadd, __adddf3
3563 |.endif 4110 |.endif
3564 break; 4111 break;
3565 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 4112 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
@@ -3571,36 +4118,48 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3571 | subo. TMP0, TMP0, TMP3 4118 | subo. TMP0, TMP0, TMP3
3572 | sub y, a, b 4119 | sub y, a, b
3573 |.endmacro 4120 |.endmacro
3574 | ins_arith subo32., fsub 4121 | ins_arith subo32., fsub, __subdf3
3575 |.else 4122 |.else
3576 | ins_arith subo., fsub 4123 | ins_arith subo., fsub, __subdf3
3577 |.endif 4124 |.endif
3578 break; 4125 break;
3579 case BC_MULVN: case BC_MULNV: case BC_MULVV: 4126 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3580 | ins_arith mullwo., fmul 4127 | ins_arith mullwo., fmul, __muldf3
3581 break; 4128 break;
3582 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 4129 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3583 | ins_arithfp fdiv 4130 | ins_arithfp fdiv
3584 break; 4131 break;
3585 case BC_MODVN: 4132 case BC_MODVN:
3586 | ins_arith intmod, fpmod 4133 | ins_arith intmod, fpmod, sfpmod
3587 break; 4134 break;
3588 case BC_MODNV: case BC_MODVV: 4135 case BC_MODNV: case BC_MODVV:
3589 | ins_arith intmod, fpmod_ 4136 | ins_arith intmod, fpmod_, sfpmod
3590 break; 4137 break;
3591 case BC_POW: 4138 case BC_POW:
3592 | // NYI: (partial) integer arithmetic. 4139 | // NYI: (partial) integer arithmetic.
3593 | lwzx TMP1, BASE, RB 4140 | lwzx CARG1, BASE, RB
4141 | lwzx CARG3, BASE, RC
4142 |.if FPU
3594 | lfdx FARG1, BASE, RB 4143 | lfdx FARG1, BASE, RB
3595 | lwzx TMP2, BASE, RC
3596 | lfdx FARG2, BASE, RC 4144 | lfdx FARG2, BASE, RC
3597 | checknum cr0, TMP1 4145 |.else
3598 | checknum cr1, TMP2 4146 | add TMP1, BASE, RB
4147 | add TMP2, BASE, RC
4148 | lwz CARG2, 4(TMP1)
4149 | lwz CARG4, 4(TMP2)
4150 |.endif
4151 | checknum cr0, CARG1
4152 | checknum cr1, CARG3
3599 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 4153 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3600 | bge ->vmeta_arith_vv 4154 | bge ->vmeta_arith_vv
3601 | blex pow 4155 | blex pow
3602 | ins_next1 4156 | ins_next1
4157 |.if FPU
3603 | stfdx FARG1, BASE, RA 4158 | stfdx FARG1, BASE, RA
4159 |.else
4160 | stwux CARG1, RA, BASE
4161 | stw CARG2, 4(RA)
4162 |.endif
3604 | ins_next2 4163 | ins_next2
3605 break; 4164 break;
3606 4165
@@ -3620,8 +4179,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3620 | lp BASE, L->base 4179 | lp BASE, L->base
3621 | bne ->vmeta_binop 4180 | bne ->vmeta_binop
3622 | ins_next1 4181 | ins_next1
4182 |.if FPU
3623 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. 4183 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
3624 | stfdx f0, BASE, RA 4184 | stfdx f0, BASE, RA
4185 |.else
4186 | lwzux TMP0, SAVE0, BASE
4187 | lwz TMP1, 4(SAVE0)
4188 | stwux TMP0, RA, BASE
4189 | stw TMP1, 4(RA)
4190 |.endif
3625 | ins_next2 4191 | ins_next2
3626 break; 4192 break;
3627 4193
@@ -3684,8 +4250,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3684 case BC_KNUM: 4250 case BC_KNUM:
3685 | // RA = dst*8, RD = num_const*8 4251 | // RA = dst*8, RD = num_const*8
3686 | ins_next1 4252 | ins_next1
4253 |.if FPU
3687 | lfdx f0, KBASE, RD 4254 | lfdx f0, KBASE, RD
3688 | stfdx f0, BASE, RA 4255 | stfdx f0, BASE, RA
4256 |.else
4257 | lwzux TMP0, RD, KBASE
4258 | lwz TMP1, 4(RD)
4259 | stwux TMP0, RA, BASE
4260 | stw TMP1, 4(RA)
4261 |.endif
3689 | ins_next2 4262 | ins_next2
3690 break; 4263 break;
3691 case BC_KPRI: 4264 case BC_KPRI:
@@ -3718,8 +4291,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3718 | lwzx UPVAL:RB, LFUNC:RB, RD 4291 | lwzx UPVAL:RB, LFUNC:RB, RD
3719 | ins_next1 4292 | ins_next1
3720 | lwz TMP1, UPVAL:RB->v 4293 | lwz TMP1, UPVAL:RB->v
4294 |.if FPU
3721 | lfd f0, 0(TMP1) 4295 | lfd f0, 0(TMP1)
3722 | stfdx f0, BASE, RA 4296 | stfdx f0, BASE, RA
4297 |.else
4298 | lwz TMP2, 0(TMP1)
4299 | lwz TMP3, 4(TMP1)
4300 | stwux TMP2, RA, BASE
4301 | stw TMP3, 4(RA)
4302 |.endif
3723 | ins_next2 4303 | ins_next2
3724 break; 4304 break;
3725 case BC_USETV: 4305 case BC_USETV:
@@ -3727,14 +4307,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3727 | lwz LFUNC:RB, FRAME_FUNC(BASE) 4307 | lwz LFUNC:RB, FRAME_FUNC(BASE)
3728 | srwi RA, RA, 1 4308 | srwi RA, RA, 1
3729 | addi RA, RA, offsetof(GCfuncL, uvptr) 4309 | addi RA, RA, offsetof(GCfuncL, uvptr)
4310 |.if FPU
3730 | lfdux f0, RD, BASE 4311 | lfdux f0, RD, BASE
4312 |.else
4313 | lwzux CARG1, RD, BASE
4314 | lwz CARG3, 4(RD)
4315 |.endif
3731 | lwzx UPVAL:RB, LFUNC:RB, RA 4316 | lwzx UPVAL:RB, LFUNC:RB, RA
3732 | lbz TMP3, UPVAL:RB->marked 4317 | lbz TMP3, UPVAL:RB->marked
3733 | lwz CARG2, UPVAL:RB->v 4318 | lwz CARG2, UPVAL:RB->v
3734 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 4319 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
3735 | lbz TMP0, UPVAL:RB->closed 4320 | lbz TMP0, UPVAL:RB->closed
3736 | lwz TMP2, 0(RD) 4321 | lwz TMP2, 0(RD)
4322 |.if FPU
3737 | stfd f0, 0(CARG2) 4323 | stfd f0, 0(CARG2)
4324 |.else
4325 | stw CARG1, 0(CARG2)
4326 | stw CARG3, 4(CARG2)
4327 |.endif
3738 | cmplwi cr1, TMP0, 0 4328 | cmplwi cr1, TMP0, 0
3739 | lwz TMP1, 4(RD) 4329 | lwz TMP1, 4(RD)
3740 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 4330 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
@@ -3790,11 +4380,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3790 | lwz LFUNC:RB, FRAME_FUNC(BASE) 4380 | lwz LFUNC:RB, FRAME_FUNC(BASE)
3791 | srwi RA, RA, 1 4381 | srwi RA, RA, 1
3792 | addi RA, RA, offsetof(GCfuncL, uvptr) 4382 | addi RA, RA, offsetof(GCfuncL, uvptr)
4383 |.if FPU
3793 | lfdx f0, KBASE, RD 4384 | lfdx f0, KBASE, RD
4385 |.else
4386 | lwzux TMP2, RD, KBASE
4387 | lwz TMP3, 4(RD)
4388 |.endif
3794 | lwzx UPVAL:RB, LFUNC:RB, RA 4389 | lwzx UPVAL:RB, LFUNC:RB, RA
3795 | ins_next1 4390 | ins_next1
3796 | lwz TMP1, UPVAL:RB->v 4391 | lwz TMP1, UPVAL:RB->v
4392 |.if FPU
3797 | stfd f0, 0(TMP1) 4393 | stfd f0, 0(TMP1)
4394 |.else
4395 | stw TMP2, 0(TMP1)
4396 | stw TMP3, 4(TMP1)
4397 |.endif
3798 | ins_next2 4398 | ins_next2
3799 break; 4399 break;
3800 case BC_USETP: 4400 case BC_USETP:
@@ -3942,11 +4542,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3942 |.endif 4542 |.endif
3943 | ble ->vmeta_tgetv // Integer key and in array part? 4543 | ble ->vmeta_tgetv // Integer key and in array part?
3944 | lwzx TMP0, TMP1, TMP2 4544 | lwzx TMP0, TMP1, TMP2
4545 |.if FPU
3945 | lfdx f14, TMP1, TMP2 4546 | lfdx f14, TMP1, TMP2
4547 |.else
4548 | lwzux SAVE0, TMP1, TMP2
4549 | lwz SAVE1, 4(TMP1)
4550 |.endif
3946 | checknil TMP0; beq >2 4551 | checknil TMP0; beq >2
3947 |1: 4552 |1:
3948 | ins_next1 4553 | ins_next1
4554 |.if FPU
3949 | stfdx f14, BASE, RA 4555 | stfdx f14, BASE, RA
4556 |.else
4557 | stwux SAVE0, RA, BASE
4558 | stw SAVE1, 4(RA)
4559 |.endif
3950 | ins_next2 4560 | ins_next2
3951 | 4561 |
3952 |2: // Check for __index if table value is nil. 4562 |2: // Check for __index if table value is nil.
@@ -4022,12 +4632,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4022 | lwz TMP1, TAB:RB->asize 4632 | lwz TMP1, TAB:RB->asize
4023 | lwz TMP2, TAB:RB->array 4633 | lwz TMP2, TAB:RB->array
4024 | cmplw TMP0, TMP1; bge ->vmeta_tgetb 4634 | cmplw TMP0, TMP1; bge ->vmeta_tgetb
4635 |.if FPU
4025 | lwzx TMP1, TMP2, RC 4636 | lwzx TMP1, TMP2, RC
4026 | lfdx f0, TMP2, RC 4637 | lfdx f0, TMP2, RC
4638 |.else
4639 | lwzux TMP1, TMP2, RC
4640 | lwz TMP3, 4(TMP2)
4641 |.endif
4027 | checknil TMP1; beq >5 4642 | checknil TMP1; beq >5
4028 |1: 4643 |1:
4029 | ins_next1 4644 | ins_next1
4645 |.if FPU
4030 | stfdx f0, BASE, RA 4646 | stfdx f0, BASE, RA
4647 |.else
4648 | stwux TMP1, RA, BASE
4649 | stw TMP3, 4(RA)
4650 |.endif
4031 | ins_next2 4651 | ins_next2
4032 | 4652 |
4033 |5: // Check for __index if table value is nil. 4653 |5: // Check for __index if table value is nil.
@@ -4039,6 +4659,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4039 | bne <1 // 'no __index' flag set: done. 4659 | bne <1 // 'no __index' flag set: done.
4040 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4660 | b ->vmeta_tgetb // Caveat: preserve TMP0!
4041 break; 4661 break;
4662 case BC_TGETR:
4663 | // RA = dst*8, RB = table*8, RC = key*8
4664 | add RB, BASE, RB
4665 | lwz TAB:CARG1, 4(RB)
4666 |.if DUALNUM
4667 | add RC, BASE, RC
4668 | lwz TMP0, TAB:CARG1->asize
4669 | lwz CARG2, 4(RC)
4670 | lwz TMP1, TAB:CARG1->array
4671 |.else
4672 | lfdx f0, BASE, RC
4673 | lwz TMP0, TAB:CARG1->asize
4674 | toint CARG2, f0
4675 | lwz TMP1, TAB:CARG1->array
4676 |.endif
4677 | cmplw TMP0, CARG2
4678 | slwi TMP2, CARG2, 3
4679 | ble ->vmeta_tgetr // In array part?
4680 |.if FPU
4681 | lfdx f14, TMP1, TMP2
4682 |.else
4683 | lwzux SAVE0, TMP2, TMP1
4684 | lwz SAVE1, 4(TMP2)
4685 |.endif
4686 |->BC_TGETR_Z:
4687 | ins_next1
4688 |.if FPU
4689 | stfdx f14, BASE, RA
4690 |.else
4691 | stwux SAVE0, RA, BASE
4692 | stw SAVE1, 4(RA)
4693 |.endif
4694 | ins_next2
4695 break;
4042 4696
4043 case BC_TSETV: 4697 case BC_TSETV:
4044 | // RA = src*8, RB = table*8, RC = key*8 4698 | // RA = src*8, RB = table*8, RC = key*8
@@ -4077,11 +4731,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4077 | ble ->vmeta_tsetv // Integer key and in array part? 4731 | ble ->vmeta_tsetv // Integer key and in array part?
4078 | lwzx TMP2, TMP1, TMP0 4732 | lwzx TMP2, TMP1, TMP0
4079 | lbz TMP3, TAB:RB->marked 4733 | lbz TMP3, TAB:RB->marked
4734 |.if FPU
4080 | lfdx f14, BASE, RA 4735 | lfdx f14, BASE, RA
4736 |.else
4737 | add SAVE1, BASE, RA
4738 | lwz SAVE0, 0(SAVE1)
4739 | lwz SAVE1, 4(SAVE1)
4740 |.endif
4081 | checknil TMP2; beq >3 4741 | checknil TMP2; beq >3
4082 |1: 4742 |1:
4083 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) 4743 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
4744 |.if FPU
4084 | stfdx f14, TMP1, TMP0 4745 | stfdx f14, TMP1, TMP0
4746 |.else
4747 | stwux SAVE0, TMP1, TMP0
4748 | stw SAVE1, 4(TMP1)
4749 |.endif
4085 | bne >7 4750 | bne >7
4086 |2: 4751 |2:
4087 | ins_next 4752 | ins_next
@@ -4122,7 +4787,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4122 | lwz NODE:TMP2, TAB:RB->node 4787 | lwz NODE:TMP2, TAB:RB->node
4123 | stb ZERO, TAB:RB->nomm // Clear metamethod cache. 4788 | stb ZERO, TAB:RB->nomm // Clear metamethod cache.
4124 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4789 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
4790 |.if FPU
4125 | lfdx f14, BASE, RA 4791 | lfdx f14, BASE, RA
4792 |.else
4793 | add CARG2, BASE, RA
4794 | lwz SAVE0, 0(CARG2)
4795 | lwz SAVE1, 4(CARG2)
4796 |.endif
4126 | slwi TMP0, TMP1, 5 4797 | slwi TMP0, TMP1, 5
4127 | slwi TMP1, TMP1, 3 4798 | slwi TMP1, TMP1, 3
4128 | sub TMP1, TMP0, TMP1 4799 | sub TMP1, TMP0, TMP1
@@ -4138,7 +4809,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4138 | checknil CARG2; beq >4 // Key found, but nil value? 4809 | checknil CARG2; beq >4 // Key found, but nil value?
4139 |2: 4810 |2:
4140 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4811 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4812 |.if FPU
4141 | stfd f14, NODE:TMP2->val 4813 | stfd f14, NODE:TMP2->val
4814 |.else
4815 | stw SAVE0, NODE:TMP2->val.u32.hi
4816 | stw SAVE1, NODE:TMP2->val.u32.lo
4817 |.endif
4142 | bne >7 4818 | bne >7
4143 |3: 4819 |3:
4144 | ins_next 4820 | ins_next
@@ -4177,7 +4853,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4177 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) 4853 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
4178 | // Returns TValue *. 4854 | // Returns TValue *.
4179 | lp BASE, L->base 4855 | lp BASE, L->base
4856 |.if FPU
4180 | stfd f14, 0(CRET1) 4857 | stfd f14, 0(CRET1)
4858 |.else
4859 | stw SAVE0, 0(CRET1)
4860 | stw SAVE1, 4(CRET1)
4861 |.endif
4181 | b <3 // No 2nd write barrier needed. 4862 | b <3 // No 2nd write barrier needed.
4182 | 4863 |
4183 |7: // Possible table write barrier for the value. Skip valiswhite check. 4864 |7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -4194,13 +4875,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4194 | lwz TMP2, TAB:RB->array 4875 | lwz TMP2, TAB:RB->array
4195 | lbz TMP3, TAB:RB->marked 4876 | lbz TMP3, TAB:RB->marked
4196 | cmplw TMP0, TMP1 4877 | cmplw TMP0, TMP1
4878 |.if FPU
4197 | lfdx f14, BASE, RA 4879 | lfdx f14, BASE, RA
4880 |.else
4881 | add CARG2, BASE, RA
4882 | lwz SAVE0, 0(CARG2)
4883 | lwz SAVE1, 4(CARG2)
4884 |.endif
4198 | bge ->vmeta_tsetb 4885 | bge ->vmeta_tsetb
4199 | lwzx TMP1, TMP2, RC 4886 | lwzx TMP1, TMP2, RC
4200 | checknil TMP1; beq >5 4887 | checknil TMP1; beq >5
4201 |1: 4888 |1:
4202 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4889 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4890 |.if FPU
4203 | stfdx f14, TMP2, RC 4891 | stfdx f14, TMP2, RC
4892 |.else
4893 | stwux SAVE0, RC, TMP2
4894 | stw SAVE1, 4(RC)
4895 |.endif
4204 | bne >7 4896 | bne >7
4205 |2: 4897 |2:
4206 | ins_next 4898 | ins_next
@@ -4218,6 +4910,49 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4218 | barrierback TAB:RB, TMP3, TMP0 4910 | barrierback TAB:RB, TMP3, TMP0
4219 | b <2 4911 | b <2
4220 break; 4912 break;
4913 case BC_TSETR:
4914 | // RA = dst*8, RB = table*8, RC = key*8
4915 | add RB, BASE, RB
4916 | lwz TAB:CARG2, 4(RB)
4917 |.if DUALNUM
4918 | add RC, BASE, RC
4919 | lbz TMP3, TAB:CARG2->marked
4920 | lwz TMP0, TAB:CARG2->asize
4921 | lwz CARG3, 4(RC)
4922 | lwz TMP1, TAB:CARG2->array
4923 |.else
4924 | lfdx f0, BASE, RC
4925 | lbz TMP3, TAB:CARG2->marked
4926 | lwz TMP0, TAB:CARG2->asize
4927 | toint CARG3, f0
4928 | lwz TMP1, TAB:CARG2->array
4929 |.endif
4930 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
4931 | bne >7
4932 |2:
4933 | cmplw TMP0, CARG3
4934 | slwi TMP2, CARG3, 3
4935 |.if FPU
4936 | lfdx f14, BASE, RA
4937 |.else
4938 | lwzux SAVE0, RA, BASE
4939 | lwz SAVE1, 4(RA)
4940 |.endif
4941 | ble ->vmeta_tsetr // In array part?
4942 | ins_next1
4943 |.if FPU
4944 | stfdx f14, TMP1, TMP2
4945 |.else
4946 | stwux SAVE0, TMP1, TMP2
4947 | stw SAVE1, 4(TMP1)
4948 |.endif
4949 | ins_next2
4950 |
4951 |7: // Possible table write barrier for the value. Skip valiswhite check.
4952 | barrierback TAB:CARG2, TMP3, TMP2
4953 | b <2
4954 break;
4955
4221 4956
4222 case BC_TSETM: 4957 case BC_TSETM:
4223 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4958 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -4240,10 +4975,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4240 | add TMP1, TMP1, TMP0 4975 | add TMP1, TMP1, TMP0
4241 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4976 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4242 |3: // Copy result slots to table. 4977 |3: // Copy result slots to table.
4978 |.if FPU
4243 | lfd f0, 0(RA) 4979 | lfd f0, 0(RA)
4980 |.else
4981 | lwz SAVE0, 0(RA)
4982 | lwz SAVE1, 4(RA)
4983 |.endif
4244 | addi RA, RA, 8 4984 | addi RA, RA, 8
4245 | cmpw cr1, RA, TMP2 4985 | cmpw cr1, RA, TMP2
4986 |.if FPU
4246 | stfd f0, 0(TMP1) 4987 | stfd f0, 0(TMP1)
4988 |.else
4989 | stw SAVE0, 0(TMP1)
4990 | stw SAVE1, 4(TMP1)
4991 |.endif
4247 | addi TMP1, TMP1, 8 4992 | addi TMP1, TMP1, 8
4248 | blt cr1, <3 4993 | blt cr1, <3
4249 | bne >7 4994 | bne >7
@@ -4310,9 +5055,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4310 | beq cr1, >3 5055 | beq cr1, >3
4311 |2: 5056 |2:
4312 | addi TMP3, TMP2, 8 5057 | addi TMP3, TMP2, 8
5058 |.if FPU
4313 | lfdx f0, RA, TMP2 5059 | lfdx f0, RA, TMP2
5060 |.else
5061 | add CARG3, RA, TMP2
5062 | lwz CARG1, 0(CARG3)
5063 | lwz CARG2, 4(CARG3)
5064 |.endif
4314 | cmplw cr1, TMP3, NARGS8:RC 5065 | cmplw cr1, TMP3, NARGS8:RC
5066 |.if FPU
4315 | stfdx f0, BASE, TMP2 5067 | stfdx f0, BASE, TMP2
5068 |.else
5069 | stwux CARG1, TMP2, BASE
5070 | stw CARG2, 4(TMP2)
5071 |.endif
4316 | mr TMP2, TMP3 5072 | mr TMP2, TMP3
4317 | bne cr1, <2 5073 | bne cr1, <2
4318 |3: 5074 |3:
@@ -4345,14 +5101,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4345 | add BASE, BASE, RA 5101 | add BASE, BASE, RA
4346 | lwz TMP1, -24(BASE) 5102 | lwz TMP1, -24(BASE)
4347 | lwz LFUNC:RB, -20(BASE) 5103 | lwz LFUNC:RB, -20(BASE)
5104 |.if FPU
4348 | lfd f1, -8(BASE) 5105 | lfd f1, -8(BASE)
4349 | lfd f0, -16(BASE) 5106 | lfd f0, -16(BASE)
5107 |.else
5108 | lwz CARG1, -8(BASE)
5109 | lwz CARG2, -4(BASE)
5110 | lwz CARG3, -16(BASE)
5111 | lwz CARG4, -12(BASE)
5112 |.endif
4350 | stw TMP1, 0(BASE) // Copy callable. 5113 | stw TMP1, 0(BASE) // Copy callable.
4351 | stw LFUNC:RB, 4(BASE) 5114 | stw LFUNC:RB, 4(BASE)
4352 | checkfunc TMP1 5115 | checkfunc TMP1
4353 | stfd f1, 16(BASE) // Copy control var.
4354 | li NARGS8:RC, 16 // Iterators get 2 arguments. 5116 | li NARGS8:RC, 16 // Iterators get 2 arguments.
5117 |.if FPU
5118 | stfd f1, 16(BASE) // Copy control var.
4355 | stfdu f0, 8(BASE) // Copy state. 5119 | stfdu f0, 8(BASE) // Copy state.
5120 |.else
5121 | stw CARG1, 16(BASE) // Copy control var.
5122 | stw CARG2, 20(BASE)
5123 | stwu CARG3, 8(BASE) // Copy state.
5124 | stw CARG4, 4(BASE)
5125 |.endif
4356 | bne ->vmeta_call 5126 | bne ->vmeta_call
4357 | ins_call 5127 | ins_call
4358 break; 5128 break;
@@ -4373,7 +5143,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4373 | slwi TMP3, RC, 3 5143 | slwi TMP3, RC, 3
4374 | bge >5 // Index points after array part? 5144 | bge >5 // Index points after array part?
4375 | lwzx TMP2, TMP1, TMP3 5145 | lwzx TMP2, TMP1, TMP3
5146 |.if FPU
4376 | lfdx f0, TMP1, TMP3 5147 | lfdx f0, TMP1, TMP3
5148 |.else
5149 | lwzux CARG1, TMP3, TMP1
5150 | lwz CARG2, 4(TMP3)
5151 |.endif
4377 | checknil TMP2 5152 | checknil TMP2
4378 | lwz INS, -4(PC) 5153 | lwz INS, -4(PC)
4379 | beq >4 5154 | beq >4
@@ -4385,7 +5160,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4385 |.endif 5160 |.endif
4386 | addi RC, RC, 1 5161 | addi RC, RC, 1
4387 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) 5162 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
5163 |.if FPU
4388 | stfd f0, 8(RA) 5164 | stfd f0, 8(RA)
5165 |.else
5166 | stw CARG1, 8(RA)
5167 | stw CARG2, 12(RA)
5168 |.endif
4389 | decode_RD4 TMP1, INS 5169 | decode_RD4 TMP1, INS
4390 | stw RC, -4(RA) // Update control var. 5170 | stw RC, -4(RA) // Update control var.
4391 | add PC, TMP1, TMP3 5171 | add PC, TMP1, TMP3
@@ -4410,17 +5190,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4410 | slwi RB, RC, 3 5190 | slwi RB, RC, 3
4411 | sub TMP3, TMP3, RB 5191 | sub TMP3, TMP3, RB
4412 | lwzx RB, TMP2, TMP3 5192 | lwzx RB, TMP2, TMP3
5193 |.if FPU
4413 | lfdx f0, TMP2, TMP3 5194 | lfdx f0, TMP2, TMP3
5195 |.else
5196 | add CARG3, TMP2, TMP3
5197 | lwz CARG1, 0(CARG3)
5198 | lwz CARG2, 4(CARG3)
5199 |.endif
4414 | add NODE:TMP3, TMP2, TMP3 5200 | add NODE:TMP3, TMP2, TMP3
4415 | checknil RB 5201 | checknil RB
4416 | lwz INS, -4(PC) 5202 | lwz INS, -4(PC)
4417 | beq >7 5203 | beq >7
5204 |.if FPU
4418 | lfd f1, NODE:TMP3->key 5205 | lfd f1, NODE:TMP3->key
5206 |.else
5207 | lwz CARG3, NODE:TMP3->key.u32.hi
5208 | lwz CARG4, NODE:TMP3->key.u32.lo
5209 |.endif
4419 | addis TMP2, PC, -(BCBIAS_J*4 >> 16) 5210 | addis TMP2, PC, -(BCBIAS_J*4 >> 16)
5211 |.if FPU
4420 | stfd f0, 8(RA) 5212 | stfd f0, 8(RA)
5213 |.else
5214 | stw CARG1, 8(RA)
5215 | stw CARG2, 12(RA)
5216 |.endif
4421 | add RC, RC, TMP0 5217 | add RC, RC, TMP0
4422 | decode_RD4 TMP1, INS 5218 | decode_RD4 TMP1, INS
5219 |.if FPU
4423 | stfd f1, 0(RA) 5220 | stfd f1, 0(RA)
5221 |.else
5222 | stw CARG3, 0(RA)
5223 | stw CARG4, 4(RA)
5224 |.endif
4424 | addi RC, RC, 1 5225 | addi RC, RC, 1
4425 | add PC, TMP1, TMP2 5226 | add PC, TMP1, TMP2
4426 | stw RC, -4(RA) // Update control var. 5227 | stw RC, -4(RA) // Update control var.
@@ -4486,9 +5287,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4486 | subi TMP2, TMP2, 16 5287 | subi TMP2, TMP2, 16
4487 | ble >2 // No vararg slots? 5288 | ble >2 // No vararg slots?
4488 |1: // Copy vararg slots to destination slots. 5289 |1: // Copy vararg slots to destination slots.
5290 |.if FPU
4489 | lfd f0, 0(RC) 5291 | lfd f0, 0(RC)
5292 |.else
5293 | lwz CARG1, 0(RC)
5294 | lwz CARG2, 4(RC)
5295 |.endif
4490 | addi RC, RC, 8 5296 | addi RC, RC, 8
5297 |.if FPU
4491 | stfd f0, 0(RA) 5298 | stfd f0, 0(RA)
5299 |.else
5300 | stw CARG1, 0(RA)
5301 | stw CARG2, 4(RA)
5302 |.endif
4492 | cmplw RA, TMP2 5303 | cmplw RA, TMP2
4493 | cmplw cr1, RC, TMP3 5304 | cmplw cr1, RC, TMP3
4494 | bge >3 // All destination slots filled? 5305 | bge >3 // All destination slots filled?
@@ -4511,9 +5322,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4511 | addi MULTRES, TMP1, 8 5322 | addi MULTRES, TMP1, 8
4512 | bgt >7 5323 | bgt >7
4513 |6: 5324 |6:
5325 |.if FPU
4514 | lfd f0, 0(RC) 5326 | lfd f0, 0(RC)
5327 |.else
5328 | lwz CARG1, 0(RC)
5329 | lwz CARG2, 4(RC)
5330 |.endif
4515 | addi RC, RC, 8 5331 | addi RC, RC, 8
5332 |.if FPU
4516 | stfd f0, 0(RA) 5333 | stfd f0, 0(RA)
5334 |.else
5335 | stw CARG1, 0(RA)
5336 | stw CARG2, 4(RA)
5337 |.endif
4517 | cmplw RC, TMP3 5338 | cmplw RC, TMP3
4518 | addi RA, RA, 8 5339 | addi RA, RA, 8
4519 | blt <6 // More vararg slots? 5340 | blt <6 // More vararg slots?
@@ -4564,14 +5385,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4564 | li TMP1, 0 5385 | li TMP1, 0
4565 |2: 5386 |2:
4566 | addi TMP3, TMP1, 8 5387 | addi TMP3, TMP1, 8
5388 |.if FPU
4567 | lfdx f0, RA, TMP1 5389 | lfdx f0, RA, TMP1
5390 |.else
5391 | add CARG3, RA, TMP1
5392 | lwz CARG1, 0(CARG3)
5393 | lwz CARG2, 4(CARG3)
5394 |.endif
4568 | cmpw TMP3, RC 5395 | cmpw TMP3, RC
5396 |.if FPU
4569 | stfdx f0, TMP2, TMP1 5397 | stfdx f0, TMP2, TMP1
5398 |.else
5399 | add CARG3, TMP2, TMP1
5400 | stw CARG1, 0(CARG3)
5401 | stw CARG2, 4(CARG3)
5402 |.endif
4570 | beq >3 5403 | beq >3
4571 | addi TMP1, TMP3, 8 5404 | addi TMP1, TMP3, 8
5405 |.if FPU
4572 | lfdx f1, RA, TMP3 5406 | lfdx f1, RA, TMP3
5407 |.else
5408 | add CARG3, RA, TMP3
5409 | lwz CARG1, 0(CARG3)
5410 | lwz CARG2, 4(CARG3)
5411 |.endif
4573 | cmpw TMP1, RC 5412 | cmpw TMP1, RC
5413 |.if FPU
4574 | stfdx f1, TMP2, TMP3 5414 | stfdx f1, TMP2, TMP3
5415 |.else
5416 | add CARG3, TMP2, TMP3
5417 | stw CARG1, 0(CARG3)
5418 | stw CARG2, 4(CARG3)
5419 |.endif
4575 | bne <2 5420 | bne <2
4576 |3: 5421 |3:
4577 |5: 5422 |5:
@@ -4613,8 +5458,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4613 | subi TMP2, BASE, 8 5458 | subi TMP2, BASE, 8
4614 | decode_RB8 RB, INS 5459 | decode_RB8 RB, INS
4615 if (op == BC_RET1) { 5460 if (op == BC_RET1) {
5461 |.if FPU
4616 | lfd f0, 0(RA) 5462 | lfd f0, 0(RA)
4617 | stfd f0, 0(TMP2) 5463 | stfd f0, 0(TMP2)
5464 |.else
5465 | lwz CARG1, 0(RA)
5466 | lwz CARG2, 4(RA)
5467 | stw CARG1, 0(TMP2)
5468 | stw CARG2, 4(TMP2)
5469 |.endif
4618 } 5470 }
4619 |5: 5471 |5:
4620 | cmplw RB, RD 5472 | cmplw RB, RD
@@ -4675,11 +5527,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4675 |4: 5527 |4:
4676 | stw CARG1, FORL_IDX*8+4(RA) 5528 | stw CARG1, FORL_IDX*8+4(RA)
4677 } else { 5529 } else {
4678 | lwz TMP3, FORL_STEP*8(RA) 5530 | lwz SAVE0, FORL_STEP*8(RA)
4679 | lwz CARG3, FORL_STEP*8+4(RA) 5531 | lwz CARG3, FORL_STEP*8+4(RA)
4680 | lwz TMP2, FORL_STOP*8(RA) 5532 | lwz TMP2, FORL_STOP*8(RA)
4681 | lwz CARG2, FORL_STOP*8+4(RA) 5533 | lwz CARG2, FORL_STOP*8+4(RA)
4682 | cmplw cr7, TMP3, TISNUM 5534 | cmplw cr7, SAVE0, TISNUM
4683 | cmplw cr1, TMP2, TISNUM 5535 | cmplw cr1, TMP2, TISNUM
4684 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq 5536 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
4685 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 5537 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
@@ -4722,41 +5574,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4722 if (vk) { 5574 if (vk) {
4723 |.if DUALNUM 5575 |.if DUALNUM
4724 |9: // FP loop. 5576 |9: // FP loop.
5577 |.if FPU
4725 | lfd f1, FORL_IDX*8(RA) 5578 | lfd f1, FORL_IDX*8(RA)
4726 |.else 5579 |.else
5580 | lwz CARG1, FORL_IDX*8(RA)
5581 | lwz CARG2, FORL_IDX*8+4(RA)
5582 |.endif
5583 |.else
4727 | lfdux f1, RA, BASE 5584 | lfdux f1, RA, BASE
4728 |.endif 5585 |.endif
5586 |.if FPU
4729 | lfd f3, FORL_STEP*8(RA) 5587 | lfd f3, FORL_STEP*8(RA)
4730 | lfd f2, FORL_STOP*8(RA) 5588 | lfd f2, FORL_STOP*8(RA)
4731 | lwz TMP3, FORL_STEP*8(RA)
4732 | fadd f1, f1, f3 5589 | fadd f1, f1, f3
4733 | stfd f1, FORL_IDX*8(RA) 5590 | stfd f1, FORL_IDX*8(RA)
5591 |.else
5592 | lwz CARG3, FORL_STEP*8(RA)
5593 | lwz CARG4, FORL_STEP*8+4(RA)
5594 | mr SAVE1, RD
5595 | blex __adddf3
5596 | mr RD, SAVE1
5597 | stw CRET1, FORL_IDX*8(RA)
5598 | stw CRET2, FORL_IDX*8+4(RA)
5599 | lwz CARG3, FORL_STOP*8(RA)
5600 | lwz CARG4, FORL_STOP*8+4(RA)
5601 |.endif
5602 | lwz SAVE0, FORL_STEP*8(RA)
4734 } else { 5603 } else {
4735 |.if DUALNUM 5604 |.if DUALNUM
4736 |9: // FP loop. 5605 |9: // FP loop.
4737 |.else 5606 |.else
4738 | lwzux TMP1, RA, BASE 5607 | lwzux TMP1, RA, BASE
4739 | lwz TMP3, FORL_STEP*8(RA) 5608 | lwz SAVE0, FORL_STEP*8(RA)
4740 | lwz TMP2, FORL_STOP*8(RA) 5609 | lwz TMP2, FORL_STOP*8(RA)
4741 | cmplw cr0, TMP1, TISNUM 5610 | cmplw cr0, TMP1, TISNUM
4742 | cmplw cr7, TMP3, TISNUM 5611 | cmplw cr7, SAVE0, TISNUM
4743 | cmplw cr1, TMP2, TISNUM 5612 | cmplw cr1, TMP2, TISNUM
4744 |.endif 5613 |.endif
5614 |.if FPU
4745 | lfd f1, FORL_IDX*8(RA) 5615 | lfd f1, FORL_IDX*8(RA)
5616 |.else
5617 | lwz CARG1, FORL_IDX*8(RA)
5618 | lwz CARG2, FORL_IDX*8+4(RA)
5619 |.endif
4746 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt 5620 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
4747 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 5621 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
5622 |.if FPU
4748 | lfd f2, FORL_STOP*8(RA) 5623 | lfd f2, FORL_STOP*8(RA)
5624 |.else
5625 | lwz CARG3, FORL_STOP*8(RA)
5626 | lwz CARG4, FORL_STOP*8+4(RA)
5627 |.endif
4749 | bge ->vmeta_for 5628 | bge ->vmeta_for
4750 } 5629 }
4751 | cmpwi cr6, TMP3, 0 5630 | cmpwi cr6, SAVE0, 0
4752 if (op != BC_JFORL) { 5631 if (op != BC_JFORL) {
4753 | srwi RD, RD, 1 5632 | srwi RD, RD, 1
4754 } 5633 }
5634 |.if FPU
4755 | stfd f1, FORL_EXT*8(RA) 5635 | stfd f1, FORL_EXT*8(RA)
5636 |.else
5637 | stw CARG1, FORL_EXT*8(RA)
5638 | stw CARG2, FORL_EXT*8+4(RA)
5639 |.endif
4756 if (op != BC_JFORL) { 5640 if (op != BC_JFORL) {
4757 | add RD, PC, RD 5641 | add RD, PC, RD
4758 } 5642 }
5643 |.if FPU
4759 | fcmpu cr0, f1, f2 5644 | fcmpu cr0, f1, f2
5645 |.else
5646 | mr SAVE1, RD
5647 | blex __ledf2
5648 | cmpwi CRET1, 0
5649 | mr RD, SAVE1
5650 |.endif
4760 if (op == BC_JFORI) { 5651 if (op == BC_JFORI) {
4761 | addis PC, RD, -(BCBIAS_J*4 >> 16) 5652 | addis PC, RD, -(BCBIAS_J*4 >> 16)
4762 } 5653 }
@@ -4859,8 +5750,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4859 | lp TMP2, TRACE:TMP2->mcode 5750 | lp TMP2, TRACE:TMP2->mcode
4860 | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) 5751 | stw BASE, DISPATCH_GL(jit_base)(DISPATCH)
4861 | mtctr TMP2 5752 | mtctr TMP2
4862 | stw L, DISPATCH_GL(jit_L)(DISPATCH)
4863 | addi JGL, DISPATCH, GG_DISP2G+32768 5753 | addi JGL, DISPATCH, GG_DISP2G+32768
5754 | stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
4864 | bctr 5755 | bctr
4865 |.endif 5756 |.endif
4866 break; 5757 break;
@@ -4995,6 +5886,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4995 | lp TMP1, L->top 5886 | lp TMP1, L->top
4996 | li_vmstate INTERP 5887 | li_vmstate INTERP
4997 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. 5888 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller.
5889 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
4998 | sub RA, TMP1, RD // RA = L->top - nresults*8 5890 | sub RA, TMP1, RD // RA = L->top - nresults*8
4999 | st_vmstate 5891 | st_vmstate
5000 | b ->vm_returnc 5892 | b ->vm_returnc