aboutsummaryrefslogtreecommitdiff
path: root/src/vm_ppc.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm_ppc.dasc')
-rw-r--r--src/vm_ppc.dasc1621
1 files changed, 1257 insertions, 364 deletions
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index 91f50037..0839668c 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -1,4 +1,4 @@
1|// Low-level VM code for PowerPC CPUs. 1|// Low-level VM code for PowerPC 32 bit or 32on64 bit mode.
2|// Bytecode interpreter, fast functions and helper functions. 2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h 3|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
4| 4|
@@ -18,7 +18,7 @@
18|// DynASM defines used by the PPC port: 18|// DynASM defines used by the PPC port:
19|// 19|//
20|// P64 64 bit pointers (only for GPR64 testing). 20|// P64 64 bit pointers (only for GPR64 testing).
21|// Note: a full PPC64 _LP64 port is not planned. 21|// Note: see vm_ppc64.dasc for a full PPC64 _LP64 port.
22|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). 22|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3).
23|// Affects reg saves, stack layout, carry/overflow/dot flags etc. 23|// Affects reg saves, stack layout, carry/overflow/dot flags etc.
24|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). 24|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360).
@@ -103,6 +103,18 @@
103|// Fixed register assignments for the interpreter. 103|// Fixed register assignments for the interpreter.
104|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) 104|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA)
105| 105|
106|.macro .FPU, a, b
107|.if FPU
108| a, b
109|.endif
110|.endmacro
111|
112|.macro .FPU, a, b, c
113|.if FPU
114| a, b, c
115|.endif
116|.endmacro
117|
106|// The following must be C callee-save (but BASE is often refetched). 118|// The following must be C callee-save (but BASE is often refetched).
107|.define BASE, r14 // Base of current Lua stack frame. 119|.define BASE, r14 // Base of current Lua stack frame.
108|.define KBASE, r15 // Constants of current Lua function. 120|.define KBASE, r15 // Constants of current Lua function.
@@ -116,8 +128,10 @@
116|.define TISNUM, r22 128|.define TISNUM, r22
117|.define TISNIL, r23 129|.define TISNIL, r23
118|.define ZERO, r24 130|.define ZERO, r24
131|.if FPU
119|.define TOBIT, f30 // 2^52 + 2^51. 132|.define TOBIT, f30 // 2^52 + 2^51.
120|.define TONUM, f31 // 2^52 + 2^51 + 2^31. 133|.define TONUM, f31 // 2^52 + 2^51 + 2^31.
134|.endif
121| 135|
122|// The following temporaries are not saved across C calls, except for RA. 136|// The following temporaries are not saved across C calls, except for RA.
123|.define RA, r20 // Callee-save. 137|.define RA, r20 // Callee-save.
@@ -133,6 +147,7 @@
133| 147|
134|// Saved temporaries. 148|// Saved temporaries.
135|.define SAVE0, r21 149|.define SAVE0, r21
150|.define SAVE1, r25
136| 151|
137|// Calling conventions. 152|// Calling conventions.
138|.define CARG1, r3 153|.define CARG1, r3
@@ -141,8 +156,10 @@
141|.define CARG4, r6 // Overlaps TMP3. 156|.define CARG4, r6 // Overlaps TMP3.
142|.define CARG5, r7 // Overlaps INS. 157|.define CARG5, r7 // Overlaps INS.
143| 158|
159|.if FPU
144|.define FARG1, f1 160|.define FARG1, f1
145|.define FARG2, f2 161|.define FARG2, f2
162|.endif
146| 163|
147|.define CRET1, r3 164|.define CRET1, r3
148|.define CRET2, r4 165|.define CRET2, r4
@@ -213,10 +230,16 @@
213|.endif 230|.endif
214|.else 231|.else
215| 232|
233|.if FPU
216|.define SAVE_LR, 276(sp) 234|.define SAVE_LR, 276(sp)
217|.define CFRAME_SPACE, 272 // Delta for sp. 235|.define CFRAME_SPACE, 272 // Delta for sp.
218|// Back chain for sp: 272(sp) <-- sp entering interpreter 236|// Back chain for sp: 272(sp) <-- sp entering interpreter
219|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. 237|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves.
238|.else
239|.define SAVE_LR, 132(sp)
240|.define CFRAME_SPACE, 128 // Delta for sp.
241|// Back chain for sp: 128(sp) <-- sp entering interpreter
242|.endif
220|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. 243|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves.
221|.define SAVE_CR, 52(sp) // 32 bit CR save. 244|.define SAVE_CR, 52(sp) // 32 bit CR save.
222|.define SAVE_ERRF, 48(sp) // 32 bit C frame info. 245|.define SAVE_ERRF, 48(sp) // 32 bit C frame info.
@@ -226,16 +249,25 @@
226|.define SAVE_PC, 32(sp) 249|.define SAVE_PC, 32(sp)
227|.define SAVE_MULTRES, 28(sp) 250|.define SAVE_MULTRES, 28(sp)
228|.define UNUSED1, 24(sp) 251|.define UNUSED1, 24(sp)
252|.if FPU
229|.define TMPD_LO, 20(sp) 253|.define TMPD_LO, 20(sp)
230|.define TMPD_HI, 16(sp) 254|.define TMPD_HI, 16(sp)
231|.define TONUM_LO, 12(sp) 255|.define TONUM_LO, 12(sp)
232|.define TONUM_HI, 8(sp) 256|.define TONUM_HI, 8(sp)
257|.else
258|.define SFSAVE_4, 20(sp)
259|.define SFSAVE_3, 16(sp)
260|.define SFSAVE_2, 12(sp)
261|.define SFSAVE_1, 8(sp)
262|.endif
233|// Next frame lr: 4(sp) 263|// Next frame lr: 4(sp)
234|// Back chain for sp: 0(sp) <-- sp while in interpreter 264|// Back chain for sp: 0(sp) <-- sp while in interpreter
235| 265|
266|.if FPU
236|.define TMPD_BLO, 23(sp) 267|.define TMPD_BLO, 23(sp)
237|.define TMPD, TMPD_HI 268|.define TMPD, TMPD_HI
238|.define TONUM_D, TONUM_HI 269|.define TONUM_D, TONUM_HI
270|.endif
239| 271|
240|.endif 272|.endif
241| 273|
@@ -245,7 +277,7 @@
245|.else 277|.else
246| stw r..reg, SAVE_GPR_+(reg-14)*4(sp) 278| stw r..reg, SAVE_GPR_+(reg-14)*4(sp)
247|.endif 279|.endif
248| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 280| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
249|.endmacro 281|.endmacro
250|.macro rest_, reg 282|.macro rest_, reg
251|.if GPR64 283|.if GPR64
@@ -253,7 +285,7 @@
253|.else 285|.else
254| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) 286| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp)
255|.endif 287|.endif
256| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 288| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
257|.endmacro 289|.endmacro
258| 290|
259|.macro saveregs 291|.macro saveregs
@@ -316,19 +348,14 @@
316|.type NODE, Node 348|.type NODE, Node
317|.type NARGS8, int 349|.type NARGS8, int
318|.type TRACE, GCtrace 350|.type TRACE, GCtrace
351|.type SBUF, SBuf
319| 352|
320|//----------------------------------------------------------------------- 353|//-----------------------------------------------------------------------
321| 354|
322|// These basic macros should really be part of DynASM.
323|.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro
324|.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro
325|.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro
326|.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro
327|.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro
328|
329|// Trap for not-yet-implemented parts. 355|// Trap for not-yet-implemented parts.
330|.macro NYI; tw 4, sp, sp; .endmacro 356|.macro NYI; tw 4, sp, sp; .endmacro
331| 357|
358|.if FPU
332|// int/FP conversions. 359|// int/FP conversions.
333|.macro tonum_i, freg, reg 360|.macro tonum_i, freg, reg
334| xoris reg, reg, 0x8000 361| xoris reg, reg, 0x8000
@@ -352,6 +379,7 @@
352|.macro toint, reg, freg 379|.macro toint, reg, freg
353| toint reg, freg, freg 380| toint reg, freg, freg
354|.endmacro 381|.endmacro
382|.endif
355| 383|
356|//----------------------------------------------------------------------- 384|//-----------------------------------------------------------------------
357| 385|
@@ -539,9 +567,19 @@ static void build_subroutines(BuildCtx *ctx)
539 | beq >2 567 | beq >2
540 |1: 568 |1:
541 | addic. TMP1, TMP1, -8 569 | addic. TMP1, TMP1, -8
570 |.if FPU
542 | lfd f0, 0(RA) 571 | lfd f0, 0(RA)
572 |.else
573 | lwz CARG1, 0(RA)
574 | lwz CARG2, 4(RA)
575 |.endif
543 | addi RA, RA, 8 576 | addi RA, RA, 8
577 |.if FPU
544 | stfd f0, 0(BASE) 578 | stfd f0, 0(BASE)
579 |.else
580 | stw CARG1, 0(BASE)
581 | stw CARG2, 4(BASE)
582 |.endif
545 | addi BASE, BASE, 8 583 | addi BASE, BASE, 8
546 | bney <1 584 | bney <1
547 | 585 |
@@ -619,23 +657,23 @@ static void build_subroutines(BuildCtx *ctx)
619 | .toc ld TOCREG, SAVE_TOC 657 | .toc ld TOCREG, SAVE_TOC
620 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 658 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
621 | lp BASE, L->base 659 | lp BASE, L->base
622 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 660 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
623 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 661 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
624 | li ZERO, 0 662 | li ZERO, 0
625 | stw TMP3, TMPD 663 | .FPU stw TMP3, TMPD
626 | li TMP1, LJ_TFALSE 664 | li TMP1, LJ_TFALSE
627 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 665 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
628 | li TISNIL, LJ_TNIL 666 | li TISNIL, LJ_TNIL
629 | li_vmstate INTERP 667 | li_vmstate INTERP
630 | lfs TOBIT, TMPD 668 | .FPU lfs TOBIT, TMPD
631 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. 669 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
632 | la RA, -8(BASE) // Results start at BASE-8. 670 | la RA, -8(BASE) // Results start at BASE-8.
633 | stw TMP3, TMPD 671 | .FPU stw TMP3, TMPD
634 | addi DISPATCH, DISPATCH, GG_G2DISP 672 | addi DISPATCH, DISPATCH, GG_G2DISP
635 | stw TMP1, 0(RA) // Prepend false to error message. 673 | stw TMP1, 0(RA) // Prepend false to error message.
636 | li RD, 16 // 2 results: false + error message. 674 | li RD, 16 // 2 results: false + error message.
637 | st_vmstate 675 | st_vmstate
638 | lfs TONUM, TMPD 676 | .FPU lfs TONUM, TMPD
639 | b ->vm_returnc 677 | b ->vm_returnc
640 | 678 |
641 |//----------------------------------------------------------------------- 679 |//-----------------------------------------------------------------------
@@ -684,33 +722,34 @@ static void build_subroutines(BuildCtx *ctx)
684 | stw CARG3, SAVE_NRES 722 | stw CARG3, SAVE_NRES
685 | cmplwi TMP1, 0 723 | cmplwi TMP1, 0
686 | stw CARG3, SAVE_ERRF 724 | stw CARG3, SAVE_ERRF
687 | stp TMP0, L->cframe
688 | stp CARG3, SAVE_CFRAME 725 | stp CARG3, SAVE_CFRAME
689 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 726 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
727 | stp TMP0, L->cframe
690 | beq >3 728 | beq >3
691 | 729 |
692 | // Resume after yield (like a return). 730 | // Resume after yield (like a return).
731 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
693 | mr RA, BASE 732 | mr RA, BASE
694 | lp BASE, L->base 733 | lp BASE, L->base
695 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 734 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
696 | lp TMP1, L->top 735 | lp TMP1, L->top
697 | lwz PC, FRAME_PC(BASE) 736 | lwz PC, FRAME_PC(BASE)
698 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 737 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
699 | stb CARG3, L->status 738 | stb CARG3, L->status
700 | stw TMP3, TMPD 739 | .FPU stw TMP3, TMPD
701 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 740 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
702 | lfs TOBIT, TMPD 741 | .FPU lfs TOBIT, TMPD
703 | sub RD, TMP1, BASE 742 | sub RD, TMP1, BASE
704 | stw TMP3, TMPD 743 | .FPU stw TMP3, TMPD
705 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 744 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
706 | addi RD, RD, 8 745 | addi RD, RD, 8
707 | stw TMP0, TONUM_HI 746 | .FPU stw TMP0, TONUM_HI
708 | li_vmstate INTERP 747 | li_vmstate INTERP
709 | li ZERO, 0 748 | li ZERO, 0
710 | st_vmstate 749 | st_vmstate
711 | andix. TMP0, PC, FRAME_TYPE 750 | andix. TMP0, PC, FRAME_TYPE
712 | mr MULTRES, RD 751 | mr MULTRES, RD
713 | lfs TONUM, TMPD 752 | .FPU lfs TONUM, TMPD
714 | li TISNIL, LJ_TNIL 753 | li TISNIL, LJ_TNIL
715 | beq ->BC_RET_Z 754 | beq ->BC_RET_Z
716 | b ->vm_return 755 | b ->vm_return
@@ -729,33 +768,34 @@ static void build_subroutines(BuildCtx *ctx)
729 | 768 |
730 |1: // Entry point for vm_pcall above (PC = ftype). 769 |1: // Entry point for vm_pcall above (PC = ftype).
731 | lp TMP1, L:CARG1->cframe 770 | lp TMP1, L:CARG1->cframe
732 | stw CARG3, SAVE_NRES
733 | mr L, CARG1 771 | mr L, CARG1
734 | stw CARG1, SAVE_L 772 | stw CARG3, SAVE_NRES
735 | mr BASE, CARG2
736 | stp sp, L->cframe // Add our C frame to cframe chain.
737 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 773 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
774 | stw CARG1, SAVE_L
775 | mr BASE, CARG2
776 | addi DISPATCH, DISPATCH, GG_G2DISP
738 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 777 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
739 | stp TMP1, SAVE_CFRAME 778 | stp TMP1, SAVE_CFRAME
740 | addi DISPATCH, DISPATCH, GG_G2DISP 779 | stp sp, L->cframe // Add our C frame to cframe chain.
741 | 780 |
742 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 781 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
782 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
743 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). 783 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
744 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 784 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
745 | lp TMP1, L->top 785 | lp TMP1, L->top
746 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 786 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
747 | add PC, PC, BASE 787 | add PC, PC, BASE
748 | stw TMP3, TMPD 788 | .FPU stw TMP3, TMPD
749 | li ZERO, 0 789 | li ZERO, 0
750 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 790 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
751 | lfs TOBIT, TMPD 791 | .FPU lfs TOBIT, TMPD
752 | sub PC, PC, TMP2 // PC = frame delta + frame type 792 | sub PC, PC, TMP2 // PC = frame delta + frame type
753 | stw TMP3, TMPD 793 | .FPU stw TMP3, TMPD
754 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 794 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
755 | sub NARGS8:RC, TMP1, BASE 795 | sub NARGS8:RC, TMP1, BASE
756 | stw TMP0, TONUM_HI 796 | .FPU stw TMP0, TONUM_HI
757 | li_vmstate INTERP 797 | li_vmstate INTERP
758 | lfs TONUM, TMPD 798 | .FPU lfs TONUM, TMPD
759 | li TISNIL, LJ_TNIL 799 | li TISNIL, LJ_TNIL
760 | st_vmstate 800 | st_vmstate
761 | 801 |
@@ -776,15 +816,18 @@ static void build_subroutines(BuildCtx *ctx)
776 | lwz TMP0, L:CARG1->stack 816 | lwz TMP0, L:CARG1->stack
777 | stw CARG1, SAVE_L 817 | stw CARG1, SAVE_L
778 | lp TMP1, L->top 818 | lp TMP1, L->top
819 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
779 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 820 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
780 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). 821 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
781 | lp TMP1, L->cframe 822 | lp TMP1, L->cframe
782 | stp sp, L->cframe // Add our C frame to cframe chain. 823 | addi DISPATCH, DISPATCH, GG_G2DISP
783 | .toc lp CARG4, 0(CARG4) 824 | .toc lp CARG4, 0(CARG4)
784 | li TMP2, 0 825 | li TMP2, 0
785 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. 826 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
786 | stw TMP2, SAVE_ERRF // No error function. 827 | stw TMP2, SAVE_ERRF // No error function.
787 | stp TMP1, SAVE_CFRAME 828 | stp TMP1, SAVE_CFRAME
829 | stp sp, L->cframe // Add our C frame to cframe chain.
830 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
788 | mtctr CARG4 831 | mtctr CARG4
789 | bctrl // (lua_State *L, lua_CFunction func, void *ud) 832 | bctrl // (lua_State *L, lua_CFunction func, void *ud)
790 |.if PPE 833 |.if PPE
@@ -793,9 +836,7 @@ static void build_subroutines(BuildCtx *ctx)
793 |.else 836 |.else
794 | mr. BASE, CRET1 837 | mr. BASE, CRET1
795 |.endif 838 |.endif
796 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 839 | li PC, FRAME_CP
797 | li PC, FRAME_CP
798 | addi DISPATCH, DISPATCH, GG_G2DISP
799 | bne <3 // Else continue with the call. 840 | bne <3 // Else continue with the call.
800 | b ->vm_leave_cp // No base? Just remove C frame. 841 | b ->vm_leave_cp // No base? Just remove C frame.
801 | 842 |
@@ -842,15 +883,30 @@ static void build_subroutines(BuildCtx *ctx)
842 | lwz INS, -4(PC) 883 | lwz INS, -4(PC)
843 | subi CARG2, RB, 16 884 | subi CARG2, RB, 16
844 | decode_RB8 SAVE0, INS 885 | decode_RB8 SAVE0, INS
886 |.if FPU
845 | lfd f0, 0(RA) 887 | lfd f0, 0(RA)
888 |.else
889 | lwz TMP2, 0(RA)
890 | lwz TMP3, 4(RA)
891 |.endif
846 | add TMP1, BASE, SAVE0 892 | add TMP1, BASE, SAVE0
847 | stp BASE, L->base 893 | stp BASE, L->base
848 | cmplw TMP1, CARG2 894 | cmplw TMP1, CARG2
849 | sub CARG3, CARG2, TMP1 895 | sub CARG3, CARG2, TMP1
850 | decode_RA8 RA, INS 896 | decode_RA8 RA, INS
897 |.if FPU
851 | stfd f0, 0(CARG2) 898 | stfd f0, 0(CARG2)
899 |.else
900 | stw TMP2, 0(CARG2)
901 | stw TMP3, 4(CARG2)
902 |.endif
852 | bney ->BC_CAT_Z 903 | bney ->BC_CAT_Z
904 |.if FPU
853 | stfdx f0, BASE, RA 905 | stfdx f0, BASE, RA
906 |.else
907 | stwux TMP2, RA, BASE
908 | stw TMP3, 4(RA)
909 |.endif
854 | b ->cont_nop 910 | b ->cont_nop
855 | 911 |
856 |//-- Table indexing metamethods ----------------------------------------- 912 |//-- Table indexing metamethods -----------------------------------------
@@ -903,9 +959,19 @@ static void build_subroutines(BuildCtx *ctx)
903 | // Returns TValue * (finished) or NULL (metamethod). 959 | // Returns TValue * (finished) or NULL (metamethod).
904 | cmplwi CRET1, 0 960 | cmplwi CRET1, 0
905 | beq >3 961 | beq >3
962 |.if FPU
906 | lfd f0, 0(CRET1) 963 | lfd f0, 0(CRET1)
964 |.else
965 | lwz TMP0, 0(CRET1)
966 | lwz TMP1, 4(CRET1)
967 |.endif
907 | ins_next1 968 | ins_next1
969 |.if FPU
908 | stfdx f0, BASE, RA 970 | stfdx f0, BASE, RA
971 |.else
972 | stwux TMP0, RA, BASE
973 | stw TMP1, 4(RA)
974 |.endif
909 | ins_next2 975 | ins_next2
910 | 976 |
911 |3: // Call __index metamethod. 977 |3: // Call __index metamethod.
@@ -918,6 +984,22 @@ static void build_subroutines(BuildCtx *ctx)
918 | li NARGS8:RC, 16 // 2 args for func(t, k). 984 | li NARGS8:RC, 16 // 2 args for func(t, k).
919 | b ->vm_call_dispatch_f 985 | b ->vm_call_dispatch_f
920 | 986 |
987 |->vmeta_tgetr:
988 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
989 | // Returns cTValue * or NULL.
990 | cmplwi CRET1, 0
991 | beq >1
992 |.if FPU
993 | lfd f14, 0(CRET1)
994 |.else
995 | lwz SAVE0, 0(CRET1)
996 | lwz SAVE1, 4(CRET1)
997 |.endif
998 | b ->BC_TGETR_Z
999 |1:
1000 | stwx TISNIL, BASE, RA
1001 | b ->cont_nop
1002 |
921 |//----------------------------------------------------------------------- 1003 |//-----------------------------------------------------------------------
922 | 1004 |
923 |->vmeta_tsets1: 1005 |->vmeta_tsets1:
@@ -967,11 +1049,21 @@ static void build_subroutines(BuildCtx *ctx)
967 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 1049 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
968 | // Returns TValue * (finished) or NULL (metamethod). 1050 | // Returns TValue * (finished) or NULL (metamethod).
969 | cmplwi CRET1, 0 1051 | cmplwi CRET1, 0
1052 |.if FPU
970 | lfdx f0, BASE, RA 1053 | lfdx f0, BASE, RA
1054 |.else
1055 | lwzux TMP2, RA, BASE
1056 | lwz TMP3, 4(RA)
1057 |.endif
971 | beq >3 1058 | beq >3
972 | // NOBARRIER: lj_meta_tset ensures the table is not black. 1059 | // NOBARRIER: lj_meta_tset ensures the table is not black.
973 | ins_next1 1060 | ins_next1
1061 |.if FPU
974 | stfd f0, 0(CRET1) 1062 | stfd f0, 0(CRET1)
1063 |.else
1064 | stw TMP2, 0(CRET1)
1065 | stw TMP3, 4(CRET1)
1066 |.endif
975 | ins_next2 1067 | ins_next2
976 | 1068 |
977 |3: // Call __newindex metamethod. 1069 |3: // Call __newindex metamethod.
@@ -982,9 +1074,27 @@ static void build_subroutines(BuildCtx *ctx)
982 | add PC, TMP1, BASE 1074 | add PC, TMP1, BASE
983 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 1075 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
984 | li NARGS8:RC, 24 // 3 args for func(t, k, v) 1076 | li NARGS8:RC, 24 // 3 args for func(t, k, v)
1077 |.if FPU
985 | stfd f0, 16(BASE) // Copy value to third argument. 1078 | stfd f0, 16(BASE) // Copy value to third argument.
1079 |.else
1080 | stw TMP2, 16(BASE)
1081 | stw TMP3, 20(BASE)
1082 |.endif
986 | b ->vm_call_dispatch_f 1083 | b ->vm_call_dispatch_f
987 | 1084 |
1085 |->vmeta_tsetr:
1086 | stp BASE, L->base
1087 | stw PC, SAVE_PC
1088 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1089 | // Returns TValue *.
1090 |.if FPU
1091 | stfd f14, 0(CRET1)
1092 |.else
1093 | stw SAVE0, 0(CRET1)
1094 | stw SAVE1, 4(CRET1)
1095 |.endif
1096 | b ->cont_nop
1097 |
988 |//-- Comparison metamethods --------------------------------------------- 1098 |//-- Comparison metamethods ---------------------------------------------
989 | 1099 |
990 |->vmeta_comp: 1100 |->vmeta_comp:
@@ -1021,9 +1131,19 @@ static void build_subroutines(BuildCtx *ctx)
1021 | 1131 |
1022 |->cont_ra: // RA = resultptr 1132 |->cont_ra: // RA = resultptr
1023 | lwz INS, -4(PC) 1133 | lwz INS, -4(PC)
1134 |.if FPU
1024 | lfd f0, 0(RA) 1135 | lfd f0, 0(RA)
1136 |.else
1137 | lwz CARG1, 0(RA)
1138 | lwz CARG2, 4(RA)
1139 |.endif
1025 | decode_RA8 TMP1, INS 1140 | decode_RA8 TMP1, INS
1141 |.if FPU
1026 | stfdx f0, BASE, TMP1 1142 | stfdx f0, BASE, TMP1
1143 |.else
1144 | stwux CARG1, TMP1, BASE
1145 | stw CARG2, 4(TMP1)
1146 |.endif
1027 | b ->cont_nop 1147 | b ->cont_nop
1028 | 1148 |
1029 |->cont_condt: // RA = resultptr 1149 |->cont_condt: // RA = resultptr
@@ -1063,6 +1183,16 @@ static void build_subroutines(BuildCtx *ctx)
1063 | b <3 1183 | b <3
1064 |.endif 1184 |.endif
1065 | 1185 |
1186 |->vmeta_istype:
1187 | subi PC, PC, 4
1188 | stp BASE, L->base
1189 | srwi CARG2, RA, 3
1190 | mr CARG1, L
1191 | srwi CARG3, RD, 3
1192 | stw PC, SAVE_PC
1193 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1194 | b ->cont_nop
1195 |
1066 |//-- Arithmetic metamethods --------------------------------------------- 1196 |//-- Arithmetic metamethods ---------------------------------------------
1067 | 1197 |
1068 |->vmeta_arith_nv: 1198 |->vmeta_arith_nv:
@@ -1219,22 +1349,32 @@ static void build_subroutines(BuildCtx *ctx)
1219 |.macro .ffunc_n, name 1349 |.macro .ffunc_n, name
1220 |->ff_ .. name: 1350 |->ff_ .. name:
1221 | cmplwi NARGS8:RC, 8 1351 | cmplwi NARGS8:RC, 8
1222 | lwz CARG3, 0(BASE) 1352 | lwz CARG1, 0(BASE)
1353 |.if FPU
1223 | lfd FARG1, 0(BASE) 1354 | lfd FARG1, 0(BASE)
1355 |.else
1356 | lwz CARG2, 4(BASE)
1357 |.endif
1224 | blt ->fff_fallback 1358 | blt ->fff_fallback
1225 | checknum CARG3; bge ->fff_fallback 1359 | checknum CARG1; bge ->fff_fallback
1226 |.endmacro 1360 |.endmacro
1227 | 1361 |
1228 |.macro .ffunc_nn, name 1362 |.macro .ffunc_nn, name
1229 |->ff_ .. name: 1363 |->ff_ .. name:
1230 | cmplwi NARGS8:RC, 16 1364 | cmplwi NARGS8:RC, 16
1231 | lwz CARG3, 0(BASE) 1365 | lwz CARG1, 0(BASE)
1366 |.if FPU
1232 | lfd FARG1, 0(BASE) 1367 | lfd FARG1, 0(BASE)
1233 | lwz CARG4, 8(BASE) 1368 | lwz CARG3, 8(BASE)
1234 | lfd FARG2, 8(BASE) 1369 | lfd FARG2, 8(BASE)
1370 |.else
1371 | lwz CARG2, 4(BASE)
1372 | lwz CARG3, 8(BASE)
1373 | lwz CARG4, 12(BASE)
1374 |.endif
1235 | blt ->fff_fallback 1375 | blt ->fff_fallback
1376 | checknum CARG1; bge ->fff_fallback
1236 | checknum CARG3; bge ->fff_fallback 1377 | checknum CARG3; bge ->fff_fallback
1237 | checknum CARG4; bge ->fff_fallback
1238 |.endmacro 1378 |.endmacro
1239 | 1379 |
1240 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. 1380 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
@@ -1255,14 +1395,21 @@ static void build_subroutines(BuildCtx *ctx)
1255 | bge cr1, ->fff_fallback 1395 | bge cr1, ->fff_fallback
1256 | stw CARG3, 0(RA) 1396 | stw CARG3, 0(RA)
1257 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. 1397 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
1398 | addi TMP1, BASE, 8
1399 | add TMP2, RA, NARGS8:RC
1258 | stw CARG1, 4(RA) 1400 | stw CARG1, 4(RA)
1259 | beq ->fff_res // Done if exactly 1 argument. 1401 | beq ->fff_res // Done if exactly 1 argument.
1260 | li TMP1, 8
1261 | subi RC, RC, 8
1262 |1: 1402 |1:
1263 | cmplw TMP1, RC 1403 | cmplw TMP1, TMP2
1264 | lfdx f0, BASE, TMP1 1404 |.if FPU
1265 | stfdx f0, RA, TMP1 1405 | lfd f0, 0(TMP1)
1406 | stfd f0, 0(TMP1)
1407 |.else
1408 | lwz CARG1, 0(TMP1)
1409 | lwz CARG2, 4(TMP1)
1410 | stw CARG1, -8(TMP1)
1411 | stw CARG2, -4(TMP1)
1412 |.endif
1266 | addi TMP1, TMP1, 8 1413 | addi TMP1, TMP1, 8
1267 | bney <1 1414 | bney <1
1268 | b ->fff_res 1415 | b ->fff_res
@@ -1277,8 +1424,14 @@ static void build_subroutines(BuildCtx *ctx)
1277 | orc TMP1, TMP2, TMP0 1424 | orc TMP1, TMP2, TMP0
1278 | addi TMP1, TMP1, ~LJ_TISNUM+1 1425 | addi TMP1, TMP1, ~LJ_TISNUM+1
1279 | slwi TMP1, TMP1, 3 1426 | slwi TMP1, TMP1, 3
1427 |.if FPU
1280 | la TMP2, CFUNC:RB->upvalue 1428 | la TMP2, CFUNC:RB->upvalue
1281 | lfdx FARG1, TMP2, TMP1 1429 | lfdx FARG1, TMP2, TMP1
1430 |.else
1431 | add TMP1, CFUNC:RB, TMP1
1432 | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi
1433 | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo
1434 |.endif
1282 | b ->fff_resn 1435 | b ->fff_resn
1283 | 1436 |
1284 |//-- Base library: getters and setters --------------------------------- 1437 |//-- Base library: getters and setters ---------------------------------
@@ -1356,7 +1509,12 @@ static void build_subroutines(BuildCtx *ctx)
1356 | mr CARG1, L 1509 | mr CARG1, L
1357 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1510 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1358 | // Returns cTValue *. 1511 | // Returns cTValue *.
1512 |.if FPU
1359 | lfd FARG1, 0(CRET1) 1513 | lfd FARG1, 0(CRET1)
1514 |.else
1515 | lwz CARG2, 4(CRET1)
1516 | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1.
1517 |.endif
1360 | b ->fff_resn 1518 | b ->fff_resn
1361 | 1519 |
1362 |//-- Base library: conversions ------------------------------------------ 1520 |//-- Base library: conversions ------------------------------------------
@@ -1365,7 +1523,11 @@ static void build_subroutines(BuildCtx *ctx)
1365 | // Only handles the number case inline (without a base argument). 1523 | // Only handles the number case inline (without a base argument).
1366 | cmplwi NARGS8:RC, 8 1524 | cmplwi NARGS8:RC, 8
1367 | lwz CARG1, 0(BASE) 1525 | lwz CARG1, 0(BASE)
1526 |.if FPU
1368 | lfd FARG1, 0(BASE) 1527 | lfd FARG1, 0(BASE)
1528 |.else
1529 | lwz CARG2, 4(BASE)
1530 |.endif
1369 | bne ->fff_fallback // Exactly one argument. 1531 | bne ->fff_fallback // Exactly one argument.
1370 | checknum CARG1; bgt ->fff_fallback 1532 | checknum CARG1; bgt ->fff_fallback
1371 | b ->fff_resn 1533 | b ->fff_resn
@@ -1387,9 +1549,9 @@ static void build_subroutines(BuildCtx *ctx)
1387 | mr CARG1, L 1549 | mr CARG1, L
1388 | mr CARG2, BASE 1550 | mr CARG2, BASE
1389 |.if DUALNUM 1551 |.if DUALNUM
1390 | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) 1552 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
1391 |.else 1553 |.else
1392 | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) 1554 | bl extern lj_strfmt_num // (lua_State *L, lua_Number *np)
1393 |.endif 1555 |.endif
1394 | // Returns GCstr *. 1556 | // Returns GCstr *.
1395 | li CARG3, LJ_TSTR 1557 | li CARG3, LJ_TSTR
@@ -1416,12 +1578,23 @@ static void build_subroutines(BuildCtx *ctx)
1416 | cmplwi CRET1, 0 1578 | cmplwi CRET1, 0
1417 | li CARG3, LJ_TNIL 1579 | li CARG3, LJ_TNIL
1418 | beq ->fff_restv // End of traversal: return nil. 1580 | beq ->fff_restv // End of traversal: return nil.
1419 | lfd f0, 8(BASE) // Copy key and value to results.
1420 | la RA, -8(BASE) 1581 | la RA, -8(BASE)
1582 |.if FPU
1583 | lfd f0, 8(BASE) // Copy key and value to results.
1421 | lfd f1, 16(BASE) 1584 | lfd f1, 16(BASE)
1422 | stfd f0, 0(RA) 1585 | stfd f0, 0(RA)
1423 | li RD, (2+1)*8
1424 | stfd f1, 8(RA) 1586 | stfd f1, 8(RA)
1587 |.else
1588 | lwz CARG1, 8(BASE)
1589 | lwz CARG2, 12(BASE)
1590 | lwz CARG3, 16(BASE)
1591 | lwz CARG4, 20(BASE)
1592 | stw CARG1, 0(RA)
1593 | stw CARG2, 4(RA)
1594 | stw CARG3, 8(RA)
1595 | stw CARG4, 12(RA)
1596 |.endif
1597 | li RD, (2+1)*8
1425 | b ->fff_res 1598 | b ->fff_res
1426 | 1599 |
1427 |.ffunc_1 pairs 1600 |.ffunc_1 pairs
@@ -1430,17 +1603,32 @@ static void build_subroutines(BuildCtx *ctx)
1430 | bne ->fff_fallback 1603 | bne ->fff_fallback
1431#if LJ_52 1604#if LJ_52
1432 | lwz TAB:TMP2, TAB:CARG1->metatable 1605 | lwz TAB:TMP2, TAB:CARG1->metatable
1606 |.if FPU
1433 | lfd f0, CFUNC:RB->upvalue[0] 1607 | lfd f0, CFUNC:RB->upvalue[0]
1608 |.else
1609 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1610 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1611 |.endif
1434 | cmplwi TAB:TMP2, 0 1612 | cmplwi TAB:TMP2, 0
1435 | la RA, -8(BASE) 1613 | la RA, -8(BASE)
1436 | bne ->fff_fallback 1614 | bne ->fff_fallback
1437#else 1615#else
1616 |.if FPU
1438 | lfd f0, CFUNC:RB->upvalue[0] 1617 | lfd f0, CFUNC:RB->upvalue[0]
1618 |.else
1619 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1620 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1621 |.endif
1439 | la RA, -8(BASE) 1622 | la RA, -8(BASE)
1440#endif 1623#endif
1441 | stw TISNIL, 8(BASE) 1624 | stw TISNIL, 8(BASE)
1442 | li RD, (3+1)*8 1625 | li RD, (3+1)*8
1626 |.if FPU
1443 | stfd f0, 0(RA) 1627 | stfd f0, 0(RA)
1628 |.else
1629 | stw TMP0, 0(RA)
1630 | stw TMP1, 4(RA)
1631 |.endif
1444 | b ->fff_res 1632 | b ->fff_res
1445 | 1633 |
1446 |.ffunc ipairs_aux 1634 |.ffunc ipairs_aux
@@ -1486,14 +1674,24 @@ static void build_subroutines(BuildCtx *ctx)
1486 | stfd FARG2, 0(RA) 1674 | stfd FARG2, 0(RA)
1487 |.endif 1675 |.endif
1488 | ble >2 // Not in array part? 1676 | ble >2 // Not in array part?
1677 |.if FPU
1489 | lwzx TMP2, TMP1, TMP3 1678 | lwzx TMP2, TMP1, TMP3
1490 | lfdx f0, TMP1, TMP3 1679 | lfdx f0, TMP1, TMP3
1680 |.else
1681 | lwzux TMP2, TMP1, TMP3
1682 | lwz TMP3, 4(TMP1)
1683 |.endif
1491 |1: 1684 |1:
1492 | checknil TMP2 1685 | checknil TMP2
1493 | li RD, (0+1)*8 1686 | li RD, (0+1)*8
1494 | beq ->fff_res // End of iteration, return 0 results. 1687 | beq ->fff_res // End of iteration, return 0 results.
1495 | li RD, (2+1)*8 1688 | li RD, (2+1)*8
1689 |.if FPU
1496 | stfd f0, 8(RA) 1690 | stfd f0, 8(RA)
1691 |.else
1692 | stw TMP2, 8(RA)
1693 | stw TMP3, 12(RA)
1694 |.endif
1497 | b ->fff_res 1695 | b ->fff_res
1498 |2: // Check for empty hash part first. Otherwise call C function. 1696 |2: // Check for empty hash part first. Otherwise call C function.
1499 | lwz TMP0, TAB:CARG1->hmask 1697 | lwz TMP0, TAB:CARG1->hmask
@@ -1507,7 +1705,11 @@ static void build_subroutines(BuildCtx *ctx)
1507 | li RD, (0+1)*8 1705 | li RD, (0+1)*8
1508 | beq ->fff_res 1706 | beq ->fff_res
1509 | lwz TMP2, 0(CRET1) 1707 | lwz TMP2, 0(CRET1)
1708 |.if FPU
1510 | lfd f0, 0(CRET1) 1709 | lfd f0, 0(CRET1)
1710 |.else
1711 | lwz TMP3, 4(CRET1)
1712 |.endif
1511 | b <1 1713 | b <1
1512 | 1714 |
1513 |.ffunc_1 ipairs 1715 |.ffunc_1 ipairs
@@ -1516,12 +1718,22 @@ static void build_subroutines(BuildCtx *ctx)
1516 | bne ->fff_fallback 1718 | bne ->fff_fallback
1517#if LJ_52 1719#if LJ_52
1518 | lwz TAB:TMP2, TAB:CARG1->metatable 1720 | lwz TAB:TMP2, TAB:CARG1->metatable
1721 |.if FPU
1519 | lfd f0, CFUNC:RB->upvalue[0] 1722 | lfd f0, CFUNC:RB->upvalue[0]
1723 |.else
1724 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1725 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1726 |.endif
1520 | cmplwi TAB:TMP2, 0 1727 | cmplwi TAB:TMP2, 0
1521 | la RA, -8(BASE) 1728 | la RA, -8(BASE)
1522 | bne ->fff_fallback 1729 | bne ->fff_fallback
1523#else 1730#else
1731 |.if FPU
1524 | lfd f0, CFUNC:RB->upvalue[0] 1732 | lfd f0, CFUNC:RB->upvalue[0]
1733 |.else
1734 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1735 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1736 |.endif
1525 | la RA, -8(BASE) 1737 | la RA, -8(BASE)
1526#endif 1738#endif
1527 |.if DUALNUM 1739 |.if DUALNUM
@@ -1531,7 +1743,12 @@ static void build_subroutines(BuildCtx *ctx)
1531 |.endif 1743 |.endif
1532 | stw ZERO, 12(BASE) 1744 | stw ZERO, 12(BASE)
1533 | li RD, (3+1)*8 1745 | li RD, (3+1)*8
1746 |.if FPU
1534 | stfd f0, 0(RA) 1747 | stfd f0, 0(RA)
1748 |.else
1749 | stw TMP0, 0(RA)
1750 | stw TMP1, 4(RA)
1751 |.endif
1535 | b ->fff_res 1752 | b ->fff_res
1536 | 1753 |
1537 |//-- Base library: catch errors ---------------------------------------- 1754 |//-- Base library: catch errors ----------------------------------------
@@ -1550,19 +1767,32 @@ static void build_subroutines(BuildCtx *ctx)
1550 | 1767 |
1551 |.ffunc xpcall 1768 |.ffunc xpcall
1552 | cmplwi NARGS8:RC, 16 1769 | cmplwi NARGS8:RC, 16
1553 | lwz CARG4, 8(BASE) 1770 | lwz CARG3, 8(BASE)
1771 |.if FPU
1554 | lfd FARG2, 8(BASE) 1772 | lfd FARG2, 8(BASE)
1555 | lfd FARG1, 0(BASE) 1773 | lfd FARG1, 0(BASE)
1774 |.else
1775 | lwz CARG1, 0(BASE)
1776 | lwz CARG2, 4(BASE)
1777 | lwz CARG4, 12(BASE)
1778 |.endif
1556 | blt ->fff_fallback 1779 | blt ->fff_fallback
1557 | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) 1780 | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1558 | mr TMP2, BASE 1781 | mr TMP2, BASE
1559 | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. 1782 | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function.
1560 | la BASE, 16(BASE) 1783 | la BASE, 16(BASE)
1561 | // Remember active hook before pcall. 1784 | // Remember active hook before pcall.
1562 | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 1785 | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31
1786 |.if FPU
1563 | stfd FARG2, 0(TMP2) // Swap function and traceback. 1787 | stfd FARG2, 0(TMP2) // Swap function and traceback.
1564 | subi NARGS8:RC, NARGS8:RC, 16
1565 | stfd FARG1, 8(TMP2) 1788 | stfd FARG1, 8(TMP2)
1789 |.else
1790 | stw CARG3, 0(TMP2)
1791 | stw CARG4, 4(TMP2)
1792 | stw CARG1, 8(TMP2)
1793 | stw CARG2, 12(TMP2)
1794 |.endif
1795 | subi NARGS8:RC, NARGS8:RC, 16
1566 | addi PC, TMP1, 16+FRAME_PCALL 1796 | addi PC, TMP1, 16+FRAME_PCALL
1567 | b ->vm_call_dispatch 1797 | b ->vm_call_dispatch
1568 | 1798 |
@@ -1605,9 +1835,21 @@ static void build_subroutines(BuildCtx *ctx)
1605 | stp BASE, L->top 1835 | stp BASE, L->top
1606 |2: // Move args to coroutine. 1836 |2: // Move args to coroutine.
1607 | cmpw TMP1, NARGS8:RC 1837 | cmpw TMP1, NARGS8:RC
1838 |.if FPU
1608 | lfdx f0, BASE, TMP1 1839 | lfdx f0, BASE, TMP1
1840 |.else
1841 | add CARG3, BASE, TMP1
1842 | lwz TMP2, 0(CARG3)
1843 | lwz TMP3, 4(CARG3)
1844 |.endif
1609 | beq >3 1845 | beq >3
1846 |.if FPU
1610 | stfdx f0, CARG2, TMP1 1847 | stfdx f0, CARG2, TMP1
1848 |.else
1849 | add CARG3, CARG2, TMP1
1850 | stw TMP2, 0(CARG3)
1851 | stw TMP3, 4(CARG3)
1852 |.endif
1611 | addi TMP1, TMP1, 8 1853 | addi TMP1, TMP1, 8
1612 | b <2 1854 | b <2
1613 |3: 1855 |3:
@@ -1622,6 +1864,7 @@ static void build_subroutines(BuildCtx *ctx)
1622 | lp TMP3, L:SAVE0->top 1864 | lp TMP3, L:SAVE0->top
1623 | li_vmstate INTERP 1865 | li_vmstate INTERP
1624 | lp BASE, L->base 1866 | lp BASE, L->base
1867 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
1625 | st_vmstate 1868 | st_vmstate
1626 | bgt >8 1869 | bgt >8
1627 | sub RD, TMP3, TMP2 1870 | sub RD, TMP3, TMP2
@@ -1637,8 +1880,17 @@ static void build_subroutines(BuildCtx *ctx)
1637 | stp TMP2, L:SAVE0->top // Clear coroutine stack. 1880 | stp TMP2, L:SAVE0->top // Clear coroutine stack.
1638 |5: // Move results from coroutine. 1881 |5: // Move results from coroutine.
1639 | cmplw TMP1, TMP3 1882 | cmplw TMP1, TMP3
1883 |.if FPU
1640 | lfdx f0, TMP2, TMP1 1884 | lfdx f0, TMP2, TMP1
1641 | stfdx f0, BASE, TMP1 1885 | stfdx f0, BASE, TMP1
1886 |.else
1887 | add CARG3, TMP2, TMP1
1888 | lwz CARG1, 0(CARG3)
1889 | lwz CARG2, 4(CARG3)
1890 | add CARG3, BASE, TMP1
1891 | stw CARG1, 0(CARG3)
1892 | stw CARG2, 4(CARG3)
1893 |.endif
1642 | addi TMP1, TMP1, 8 1894 | addi TMP1, TMP1, 8
1643 | bne <5 1895 | bne <5
1644 |6: 1896 |6:
@@ -1663,12 +1915,22 @@ static void build_subroutines(BuildCtx *ctx)
1663 | andix. TMP0, PC, FRAME_TYPE 1915 | andix. TMP0, PC, FRAME_TYPE
1664 | la TMP3, -8(TMP3) 1916 | la TMP3, -8(TMP3)
1665 | li TMP1, LJ_TFALSE 1917 | li TMP1, LJ_TFALSE
1918 |.if FPU
1666 | lfd f0, 0(TMP3) 1919 | lfd f0, 0(TMP3)
1920 |.else
1921 | lwz CARG1, 0(TMP3)
1922 | lwz CARG2, 4(TMP3)
1923 |.endif
1667 | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. 1924 | stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
1668 | li RD, (2+1)*8 1925 | li RD, (2+1)*8
1669 | stw TMP1, -8(BASE) // Prepend false to results. 1926 | stw TMP1, -8(BASE) // Prepend false to results.
1670 | la RA, -8(BASE) 1927 | la RA, -8(BASE)
1928 |.if FPU
1671 | stfd f0, 0(BASE) // Copy error message. 1929 | stfd f0, 0(BASE) // Copy error message.
1930 |.else
1931 | stw CARG1, 0(BASE) // Copy error message.
1932 | stw CARG2, 4(BASE)
1933 |.endif
1672 | b <7 1934 | b <7
1673 |.else 1935 |.else
1674 | mr CARG1, L 1936 | mr CARG1, L
@@ -1847,7 +2109,12 @@ static void build_subroutines(BuildCtx *ctx)
1847 | lus CARG1, 0x8000 // -(2^31). 2109 | lus CARG1, 0x8000 // -(2^31).
1848 | beqy ->fff_resi 2110 | beqy ->fff_resi
1849 |5: 2111 |5:
2112 |.if FPU
1850 | lfd FARG1, 0(BASE) 2113 | lfd FARG1, 0(BASE)
2114 |.else
2115 | lwz CARG1, 0(BASE)
2116 | lwz CARG2, 4(BASE)
2117 |.endif
1851 | blex func 2118 | blex func
1852 | b ->fff_resn 2119 | b ->fff_resn
1853 |.endmacro 2120 |.endmacro
@@ -1871,10 +2138,14 @@ static void build_subroutines(BuildCtx *ctx)
1871 | 2138 |
1872 |.ffunc math_log 2139 |.ffunc math_log
1873 | cmplwi NARGS8:RC, 8 2140 | cmplwi NARGS8:RC, 8
1874 | lwz CARG3, 0(BASE) 2141 | lwz CARG1, 0(BASE)
1875 | lfd FARG1, 0(BASE)
1876 | bne ->fff_fallback // Need exactly 1 argument. 2142 | bne ->fff_fallback // Need exactly 1 argument.
1877 | checknum CARG3; bge ->fff_fallback 2143 | checknum CARG1; bge ->fff_fallback
2144 |.if FPU
2145 | lfd FARG1, 0(BASE)
2146 |.else
2147 | lwz CARG2, 4(BASE)
2148 |.endif
1878 | blex log 2149 | blex log
1879 | b ->fff_resn 2150 | b ->fff_resn
1880 | 2151 |
@@ -1893,26 +2164,27 @@ static void build_subroutines(BuildCtx *ctx)
1893 | math_extern2 atan2 2164 | math_extern2 atan2
1894 | math_extern2 fmod 2165 | math_extern2 fmod
1895 | 2166 |
1896 |->ff_math_deg:
1897 |.ffunc_n math_rad
1898 | lfd FARG2, CFUNC:RB->upvalue[0]
1899 | fmul FARG1, FARG1, FARG2
1900 | b ->fff_resn
1901 |
1902 |.if DUALNUM 2167 |.if DUALNUM
1903 |.ffunc math_ldexp 2168 |.ffunc math_ldexp
1904 | cmplwi NARGS8:RC, 16 2169 | cmplwi NARGS8:RC, 16
1905 | lwz CARG3, 0(BASE) 2170 | lwz TMP0, 0(BASE)
2171 |.if FPU
1906 | lfd FARG1, 0(BASE) 2172 | lfd FARG1, 0(BASE)
1907 | lwz CARG4, 8(BASE) 2173 |.else
2174 | lwz CARG1, 0(BASE)
2175 | lwz CARG2, 4(BASE)
2176 |.endif
2177 | lwz TMP1, 8(BASE)
1908 |.if GPR64 2178 |.if GPR64
1909 | lwz CARG2, 12(BASE) 2179 | lwz CARG2, 12(BASE)
1910 |.else 2180 |.elif FPU
1911 | lwz CARG1, 12(BASE) 2181 | lwz CARG1, 12(BASE)
2182 |.else
2183 | lwz CARG3, 12(BASE)
1912 |.endif 2184 |.endif
1913 | blt ->fff_fallback 2185 | blt ->fff_fallback
1914 | checknum CARG3; bge ->fff_fallback 2186 | checknum TMP0; bge ->fff_fallback
1915 | checknum CARG4; bne ->fff_fallback 2187 | checknum TMP1; bne ->fff_fallback
1916 |.else 2188 |.else
1917 |.ffunc_nn math_ldexp 2189 |.ffunc_nn math_ldexp
1918 |.if GPR64 2190 |.if GPR64
@@ -1927,8 +2199,10 @@ static void build_subroutines(BuildCtx *ctx)
1927 |.ffunc_n math_frexp 2199 |.ffunc_n math_frexp
1928 |.if GPR64 2200 |.if GPR64
1929 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 2201 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
1930 |.else 2202 |.elif FPU
1931 | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) 2203 | la CARG1, DISPATCH_GL(tmptv)(DISPATCH)
2204 |.else
2205 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
1932 |.endif 2206 |.endif
1933 | lwz PC, FRAME_PC(BASE) 2207 | lwz PC, FRAME_PC(BASE)
1934 | blex frexp 2208 | blex frexp
@@ -1937,7 +2211,12 @@ static void build_subroutines(BuildCtx *ctx)
1937 |.if not DUALNUM 2211 |.if not DUALNUM
1938 | tonum_i FARG2, TMP1 2212 | tonum_i FARG2, TMP1
1939 |.endif 2213 |.endif
2214 |.if FPU
1940 | stfd FARG1, 0(RA) 2215 | stfd FARG1, 0(RA)
2216 |.else
2217 | stw CRET1, 0(RA)
2218 | stw CRET2, 4(RA)
2219 |.endif
1941 | li RD, (2+1)*8 2220 | li RD, (2+1)*8
1942 |.if DUALNUM 2221 |.if DUALNUM
1943 | stw TISNUM, 8(RA) 2222 | stw TISNUM, 8(RA)
@@ -1950,13 +2229,20 @@ static void build_subroutines(BuildCtx *ctx)
1950 |.ffunc_n math_modf 2229 |.ffunc_n math_modf
1951 |.if GPR64 2230 |.if GPR64
1952 | la CARG2, -8(BASE) 2231 | la CARG2, -8(BASE)
1953 |.else 2232 |.elif FPU
1954 | la CARG1, -8(BASE) 2233 | la CARG1, -8(BASE)
2234 |.else
2235 | la CARG3, -8(BASE)
1955 |.endif 2236 |.endif
1956 | lwz PC, FRAME_PC(BASE) 2237 | lwz PC, FRAME_PC(BASE)
1957 | blex modf 2238 | blex modf
1958 | la RA, -8(BASE) 2239 | la RA, -8(BASE)
2240 |.if FPU
1959 | stfd FARG1, 0(BASE) 2241 | stfd FARG1, 0(BASE)
2242 |.else
2243 | stw CRET1, 0(BASE)
2244 | stw CRET2, 4(BASE)
2245 |.endif
1960 | li RD, (2+1)*8 2246 | li RD, (2+1)*8
1961 | b ->fff_res 2247 | b ->fff_res
1962 | 2248 |
@@ -1964,13 +2250,13 @@ static void build_subroutines(BuildCtx *ctx)
1964 |.if DUALNUM 2250 |.if DUALNUM
1965 | .ffunc_1 name 2251 | .ffunc_1 name
1966 | checknum CARG3 2252 | checknum CARG3
1967 | addi TMP1, BASE, 8 2253 | addi SAVE0, BASE, 8
1968 | add TMP2, BASE, NARGS8:RC 2254 | add SAVE1, BASE, NARGS8:RC
1969 | bne >4 2255 | bne >4
1970 |1: // Handle integers. 2256 |1: // Handle integers.
1971 | lwz CARG4, 0(TMP1) 2257 | lwz CARG4, 0(SAVE0)
1972 | cmplw cr1, TMP1, TMP2 2258 | cmplw cr1, SAVE0, SAVE1
1973 | lwz CARG2, 4(TMP1) 2259 | lwz CARG2, 4(SAVE0)
1974 | bge cr1, ->fff_resi 2260 | bge cr1, ->fff_resi
1975 | checknum CARG4 2261 | checknum CARG4
1976 | xoris TMP0, CARG1, 0x8000 2262 | xoris TMP0, CARG1, 0x8000
@@ -1987,36 +2273,76 @@ static void build_subroutines(BuildCtx *ctx)
1987 |.if GPR64 2273 |.if GPR64
1988 | rldicl CARG1, CARG1, 0, 32 2274 | rldicl CARG1, CARG1, 0, 32
1989 |.endif 2275 |.endif
1990 | addi TMP1, TMP1, 8 2276 | addi SAVE0, SAVE0, 8
1991 | b <1 2277 | b <1
1992 |3: 2278 |3:
1993 | bge ->fff_fallback 2279 | bge ->fff_fallback
1994 | // Convert intermediate result to number and continue below. 2280 | // Convert intermediate result to number and continue below.
2281 |.if FPU
1995 | tonum_i FARG1, CARG1 2282 | tonum_i FARG1, CARG1
1996 | lfd FARG2, 0(TMP1) 2283 | lfd FARG2, 0(SAVE0)
2284 |.else
2285 | mr CARG2, CARG1
2286 | bl ->vm_sfi2d_1
2287 | lwz CARG3, 0(SAVE0)
2288 | lwz CARG4, 4(SAVE0)
2289 |.endif
1997 | b >6 2290 | b >6
1998 |4: 2291 |4:
2292 |.if FPU
1999 | lfd FARG1, 0(BASE) 2293 | lfd FARG1, 0(BASE)
2294 |.else
2295 | lwz CARG1, 0(BASE)
2296 | lwz CARG2, 4(BASE)
2297 |.endif
2000 | bge ->fff_fallback 2298 | bge ->fff_fallback
2001 |5: // Handle numbers. 2299 |5: // Handle numbers.
2002 | lwz CARG4, 0(TMP1) 2300 | lwz CARG3, 0(SAVE0)
2003 | cmplw cr1, TMP1, TMP2 2301 | cmplw cr1, SAVE0, SAVE1
2004 | lfd FARG2, 0(TMP1) 2302 |.if FPU
2303 | lfd FARG2, 0(SAVE0)
2304 |.else
2305 | lwz CARG4, 4(SAVE0)
2306 |.endif
2005 | bge cr1, ->fff_resn 2307 | bge cr1, ->fff_resn
2006 | checknum CARG4; bge >7 2308 | checknum CARG3; bge >7
2007 |6: 2309 |6:
2310 | addi SAVE0, SAVE0, 8
2311 |.if FPU
2008 | fsub f0, FARG1, FARG2 2312 | fsub f0, FARG1, FARG2
2009 | addi TMP1, TMP1, 8
2010 |.if ismax 2313 |.if ismax
2011 | fsel FARG1, f0, FARG1, FARG2 2314 | fsel FARG1, f0, FARG1, FARG2
2012 |.else 2315 |.else
2013 | fsel FARG1, f0, FARG2, FARG1 2316 | fsel FARG1, f0, FARG2, FARG1
2014 |.endif 2317 |.endif
2318 |.else
2319 | stw CARG1, SFSAVE_1
2320 | stw CARG2, SFSAVE_2
2321 | stw CARG3, SFSAVE_3
2322 | stw CARG4, SFSAVE_4
2323 | blex __ledf2
2324 | cmpwi CRET1, 0
2325 |.if ismax
2326 | blt >8
2327 |.else
2328 | bge >8
2329 |.endif
2330 | lwz CARG1, SFSAVE_1
2331 | lwz CARG2, SFSAVE_2
2332 | b <5
2333 |8:
2334 | lwz CARG1, SFSAVE_3
2335 | lwz CARG2, SFSAVE_4
2336 |.endif
2015 | b <5 2337 | b <5
2016 |7: // Convert integer to number and continue above. 2338 |7: // Convert integer to number and continue above.
2017 | lwz CARG2, 4(TMP1) 2339 | lwz CARG3, 4(SAVE0)
2018 | bne ->fff_fallback 2340 | bne ->fff_fallback
2019 | tonum_i FARG2, CARG2 2341 |.if FPU
2342 | tonum_i FARG2, CARG3
2343 |.else
2344 | bl ->vm_sfi2d_2
2345 |.endif
2020 | b <6 2346 | b <6
2021 |.else 2347 |.else
2022 | .ffunc_n name 2348 | .ffunc_n name
@@ -2044,11 +2370,6 @@ static void build_subroutines(BuildCtx *ctx)
2044 | 2370 |
2045 |//-- String library ----------------------------------------------------- 2371 |//-- String library -----------------------------------------------------
2046 | 2372 |
2047 |.ffunc_1 string_len
2048 | checkstr CARG3; bne ->fff_fallback
2049 | lwz CRET1, STR:CARG1->len
2050 | b ->fff_resi
2051 |
2052 |.ffunc string_byte // Only handle the 1-arg case here. 2373 |.ffunc string_byte // Only handle the 1-arg case here.
2053 | cmplwi NARGS8:RC, 8 2374 | cmplwi NARGS8:RC, 8
2054 | lwz CARG3, 0(BASE) 2375 | lwz CARG3, 0(BASE)
@@ -2103,6 +2424,7 @@ static void build_subroutines(BuildCtx *ctx)
2103 | stp BASE, L->base 2424 | stp BASE, L->base
2104 | stw PC, SAVE_PC 2425 | stw PC, SAVE_PC
2105 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 2426 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
2427 |->fff_resstr:
2106 | // Returns GCstr *. 2428 | // Returns GCstr *.
2107 | lp BASE, L->base 2429 | lp BASE, L->base
2108 | li CARG3, LJ_TSTR 2430 | li CARG3, LJ_TSTR
@@ -2180,114 +2502,29 @@ static void build_subroutines(BuildCtx *ctx)
2180 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) 2502 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0)
2181 | b <3 2503 | b <3
2182 | 2504 |
2183 |.ffunc string_rep // Only handle the 1-char case inline. 2505 |.macro ffstring_op, name
2184 | ffgccheck 2506 | .ffunc string_ .. name
2185 | cmplwi NARGS8:RC, 16
2186 | lwz TMP0, 0(BASE)
2187 | lwz STR:CARG1, 4(BASE)
2188 | lwz CARG4, 8(BASE)
2189 |.if DUALNUM
2190 | lwz CARG3, 12(BASE)
2191 |.else
2192 | lfd FARG2, 8(BASE)
2193 |.endif
2194 | bne ->fff_fallback // Exactly 2 arguments.
2195 | checkstr TMP0; bne ->fff_fallback
2196 |.if DUALNUM
2197 | checknum CARG4; bne ->fff_fallback
2198 |.else
2199 | checknum CARG4; bge ->fff_fallback
2200 | toint CARG3, FARG2
2201 |.endif
2202 | lwz TMP0, STR:CARG1->len
2203 | cmpwi CARG3, 0
2204 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
2205 | ble >2 // Count <= 0? (or non-int)
2206 | cmplwi TMP0, 1
2207 | subi TMP2, CARG3, 1
2208 | blt >2 // Zero length string?
2209 | cmplw cr1, TMP1, CARG3
2210 | bne ->fff_fallback // Fallback for > 1-char strings.
2211 | lbz TMP0, STR:CARG1[1]
2212 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2213 | blt cr1, ->fff_fallback
2214 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2215 | cmplwi TMP2, 0
2216 | stbx TMP0, CARG2, TMP2
2217 | subi TMP2, TMP2, 1
2218 | bne <1
2219 | b ->fff_newstr
2220 |2: // Return empty string.
2221 | la STR:CARG1, DISPATCH_GL(strempty)(DISPATCH)
2222 | li CARG3, LJ_TSTR
2223 | b ->fff_restv
2224 |
2225 |.ffunc string_reverse
2226 | ffgccheck
2227 | cmplwi NARGS8:RC, 8
2228 | lwz CARG3, 0(BASE)
2229 | lwz STR:CARG1, 4(BASE)
2230 | blt ->fff_fallback
2231 | checkstr CARG3
2232 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
2233 | bne ->fff_fallback
2234 | lwz CARG3, STR:CARG1->len
2235 | la CARG1, #STR(STR:CARG1)
2236 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2237 | li TMP2, 0
2238 | cmplw TMP1, CARG3
2239 | subi TMP3, CARG3, 1
2240 | blt ->fff_fallback
2241 |1: // Reverse string copy.
2242 | cmpwi TMP3, 0
2243 | lbzx TMP1, CARG1, TMP2
2244 | blty ->fff_newstr
2245 | stbx TMP1, CARG2, TMP3
2246 | subi TMP3, TMP3, 1
2247 | addi TMP2, TMP2, 1
2248 | b <1
2249 |
2250 |.macro ffstring_case, name, lo
2251 | .ffunc name
2252 | ffgccheck 2507 | ffgccheck
2253 | cmplwi NARGS8:RC, 8 2508 | cmplwi NARGS8:RC, 8
2254 | lwz CARG3, 0(BASE) 2509 | lwz CARG3, 0(BASE)
2255 | lwz STR:CARG1, 4(BASE) 2510 | lwz STR:CARG2, 4(BASE)
2256 | blt ->fff_fallback 2511 | blt ->fff_fallback
2257 | checkstr CARG3 2512 | checkstr CARG3
2258 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 2513 | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH)
2259 | bne ->fff_fallback 2514 | bne ->fff_fallback
2260 | lwz CARG3, STR:CARG1->len 2515 | lwz TMP0, SBUF:CARG1->b
2261 | la CARG1, #STR(STR:CARG1) 2516 | stw L, SBUF:CARG1->L
2262 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 2517 | stp BASE, L->base
2263 | cmplw TMP1, CARG3 2518 | stw PC, SAVE_PC
2264 | li TMP2, 0 2519 | stw TMP0, SBUF:CARG1->p
2265 | blt ->fff_fallback 2520 | bl extern lj_buf_putstr_ .. name
2266 |1: // ASCII case conversion. 2521 | bl extern lj_buf_tostr
2267 | cmplw TMP2, CARG3 2522 | b ->fff_resstr
2268 | lbzx TMP1, CARG1, TMP2
2269 | bgey ->fff_newstr
2270 | subi TMP0, TMP1, lo
2271 | xori TMP3, TMP1, 0x20
2272 | addic TMP0, TMP0, -26
2273 | subfe TMP3, TMP3, TMP3
2274 | rlwinm TMP3, TMP3, 0, 26, 26 // x &= 0x20.
2275 | xor TMP1, TMP1, TMP3
2276 | stbx TMP1, CARG2, TMP2
2277 | addi TMP2, TMP2, 1
2278 | b <1
2279 |.endmacro 2523 |.endmacro
2280 | 2524 |
2281 |ffstring_case string_lower, 65 2525 |ffstring_op reverse
2282 |ffstring_case string_upper, 97 2526 |ffstring_op lower
2283 | 2527 |ffstring_op upper
2284 |//-- Table library ------------------------------------------------------
2285 |
2286 |.ffunc_1 table_getn
2287 | checktab CARG3; bne ->fff_fallback
2288 | bl extern lj_tab_len // (GCtab *t)
2289 | // Returns uint32_t (but less than 2^31).
2290 | b ->fff_resi
2291 | 2528 |
2292 |//-- Bit library -------------------------------------------------------- 2529 |//-- Bit library --------------------------------------------------------
2293 | 2530 |
@@ -2305,28 +2542,37 @@ static void build_subroutines(BuildCtx *ctx)
2305 | 2542 |
2306 |.macro .ffunc_bit_op, name, ins 2543 |.macro .ffunc_bit_op, name, ins
2307 | .ffunc_bit name 2544 | .ffunc_bit name
2308 | addi TMP1, BASE, 8 2545 | addi SAVE0, BASE, 8
2309 | add TMP2, BASE, NARGS8:RC 2546 | add SAVE1, BASE, NARGS8:RC
2310 |1: 2547 |1:
2311 | lwz CARG4, 0(TMP1) 2548 | lwz CARG4, 0(SAVE0)
2312 | cmplw cr1, TMP1, TMP2 2549 | cmplw cr1, SAVE0, SAVE1
2313 |.if DUALNUM 2550 |.if DUALNUM
2314 | lwz CARG2, 4(TMP1) 2551 | lwz CARG2, 4(SAVE0)
2315 |.else 2552 |.else
2316 | lfd FARG1, 0(TMP1) 2553 | lfd FARG1, 0(SAVE0)
2317 |.endif 2554 |.endif
2318 | bgey cr1, ->fff_resi 2555 | bgey cr1, ->fff_resi
2319 | checknum CARG4 2556 | checknum CARG4
2320 |.if DUALNUM 2557 |.if DUALNUM
2558 |.if FPU
2321 | bnel ->fff_bitop_fb 2559 | bnel ->fff_bitop_fb
2322 |.else 2560 |.else
2561 | beq >3
2562 | stw CARG1, SFSAVE_1
2563 | bl ->fff_bitop_fb
2564 | mr CARG2, CARG1
2565 | lwz CARG1, SFSAVE_1
2566 |3:
2567 |.endif
2568 |.else
2323 | fadd FARG1, FARG1, TOBIT 2569 | fadd FARG1, FARG1, TOBIT
2324 | bge ->fff_fallback 2570 | bge ->fff_fallback
2325 | stfd FARG1, TMPD 2571 | stfd FARG1, TMPD
2326 | lwz CARG2, TMPD_LO 2572 | lwz CARG2, TMPD_LO
2327 |.endif 2573 |.endif
2328 | ins CARG1, CARG1, CARG2 2574 | ins CARG1, CARG1, CARG2
2329 | addi TMP1, TMP1, 8 2575 | addi SAVE0, SAVE0, 8
2330 | b <1 2576 | b <1
2331 |.endmacro 2577 |.endmacro
2332 | 2578 |
@@ -2348,7 +2594,14 @@ static void build_subroutines(BuildCtx *ctx)
2348 |.macro .ffunc_bit_sh, name, ins, shmod 2594 |.macro .ffunc_bit_sh, name, ins, shmod
2349 |.if DUALNUM 2595 |.if DUALNUM
2350 | .ffunc_2 bit_..name 2596 | .ffunc_2 bit_..name
2597 |.if FPU
2351 | checknum CARG3; bnel ->fff_tobit_fb 2598 | checknum CARG3; bnel ->fff_tobit_fb
2599 |.else
2600 | checknum CARG3; beq >1
2601 | bl ->fff_tobit_fb
2602 | lwz CARG2, 12(BASE) // Conversion polluted CARG2.
2603 |1:
2604 |.endif
2352 | // Note: no inline conversion from number for 2nd argument! 2605 | // Note: no inline conversion from number for 2nd argument!
2353 | checknum CARG4; bne ->fff_fallback 2606 | checknum CARG4; bne ->fff_fallback
2354 |.else 2607 |.else
@@ -2385,27 +2638,77 @@ static void build_subroutines(BuildCtx *ctx)
2385 |->fff_resn: 2638 |->fff_resn:
2386 | lwz PC, FRAME_PC(BASE) 2639 | lwz PC, FRAME_PC(BASE)
2387 | la RA, -8(BASE) 2640 | la RA, -8(BASE)
2641 |.if FPU
2388 | stfd FARG1, -8(BASE) 2642 | stfd FARG1, -8(BASE)
2643 |.else
2644 | stw CARG1, -8(BASE)
2645 | stw CARG2, -4(BASE)
2646 |.endif
2389 | b ->fff_res1 2647 | b ->fff_res1
2390 | 2648 |
2391 |// Fallback FP number to bit conversion. 2649 |// Fallback FP number to bit conversion.
2392 |->fff_tobit_fb: 2650 |->fff_tobit_fb:
2393 |.if DUALNUM 2651 |.if DUALNUM
2652 |.if FPU
2394 | lfd FARG1, 0(BASE) 2653 | lfd FARG1, 0(BASE)
2395 | bgt ->fff_fallback 2654 | bgt ->fff_fallback
2396 | fadd FARG1, FARG1, TOBIT 2655 | fadd FARG1, FARG1, TOBIT
2397 | stfd FARG1, TMPD 2656 | stfd FARG1, TMPD
2398 | lwz CARG1, TMPD_LO 2657 | lwz CARG1, TMPD_LO
2399 | blr 2658 | blr
2659 |.else
2660 | bgt ->fff_fallback
2661 | mr CARG2, CARG1
2662 | mr CARG1, CARG3
2663 |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2.
2664 |->vm_tobit:
2665 | slwi TMP2, CARG1, 1
2666 | addis TMP2, TMP2, 0x0020
2667 | cmpwi TMP2, 0
2668 | bge >2
2669 | li TMP1, 0x3e0
2670 | srawi TMP2, TMP2, 21
2671 | not TMP1, TMP1
2672 | sub. TMP2, TMP1, TMP2
2673 | cmpwi cr7, CARG1, 0
2674 | blt >1
2675 | slwi TMP1, CARG1, 11
2676 | srwi TMP0, CARG2, 21
2677 | oris TMP1, TMP1, 0x8000
2678 | or TMP1, TMP1, TMP0
2679 | srw CARG1, TMP1, TMP2
2680 | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
2681 | neg CARG1, CARG1
2682 | blr
2683 |1:
2684 | addi TMP2, TMP2, 21
2685 | srw TMP1, CARG2, TMP2
2686 | slwi CARG2, CARG1, 12
2687 | subfic TMP2, TMP2, 20
2688 | slw TMP0, CARG2, TMP2
2689 | or CARG1, TMP1, TMP0
2690 | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
2691 | neg CARG1, CARG1
2692 | blr
2693 |2:
2694 | li CARG1, 0
2695 | blr
2696 |.endif
2400 |.endif 2697 |.endif
2401 |->fff_bitop_fb: 2698 |->fff_bitop_fb:
2402 |.if DUALNUM 2699 |.if DUALNUM
2403 | lfd FARG1, 0(TMP1) 2700 |.if FPU
2701 | lfd FARG1, 0(SAVE0)
2404 | bgt ->fff_fallback 2702 | bgt ->fff_fallback
2405 | fadd FARG1, FARG1, TOBIT 2703 | fadd FARG1, FARG1, TOBIT
2406 | stfd FARG1, TMPD 2704 | stfd FARG1, TMPD
2407 | lwz CARG2, TMPD_LO 2705 | lwz CARG2, TMPD_LO
2408 | blr 2706 | blr
2707 |.else
2708 | bgt ->fff_fallback
2709 | mr CARG1, CARG4
2710 | b ->vm_tobit
2711 |.endif
2409 |.endif 2712 |.endif
2410 | 2713 |
2411 |//----------------------------------------------------------------------- 2714 |//-----------------------------------------------------------------------
@@ -2589,15 +2892,88 @@ static void build_subroutines(BuildCtx *ctx)
2589 | mtctr CRET1 2892 | mtctr CRET1
2590 | bctr 2893 | bctr
2591 | 2894 |
2895 |->cont_stitch: // Trace stitching.
2896 |.if JIT
2897 | // RA = resultptr, RB = meta base
2898 | lwz INS, -4(PC)
2899 | lwz TRACE:TMP2, -20(RB) // Save previous trace.
2900 | addic. TMP1, MULTRES, -8
2901 | decode_RA8 RC, INS // Call base.
2902 | beq >2
2903 |1: // Move results down.
2904 |.if FPU
2905 | lfd f0, 0(RA)
2906 |.else
2907 | lwz CARG1, 0(RA)
2908 | lwz CARG2, 4(RA)
2909 |.endif
2910 | addic. TMP1, TMP1, -8
2911 | addi RA, RA, 8
2912 |.if FPU
2913 | stfdx f0, BASE, RC
2914 |.else
2915 | add CARG3, BASE, RC
2916 | stw CARG1, 0(CARG3)
2917 | stw CARG2, 4(CARG3)
2918 |.endif
2919 | addi RC, RC, 8
2920 | bne <1
2921 |2:
2922 | decode_RA8 RA, INS
2923 | decode_RB8 RB, INS
2924 | add RA, RA, RB
2925 |3:
2926 | cmplw RA, RC
2927 | bgt >9 // More results wanted?
2928 |
2929 | lhz TMP3, TRACE:TMP2->traceno
2930 | lhz RD, TRACE:TMP2->link
2931 | cmpw RD, TMP3
2932 | cmpwi cr1, RD, 0
2933 | beq ->cont_nop // Blacklisted.
2934 | slwi RD, RD, 3
2935 | bne cr1, =>BC_JLOOP // Jump to stitched trace.
2936 |
2937 | // Stitch a new trace to the previous trace.
2938 | stw TMP3, DISPATCH_J(exitno)(DISPATCH)
2939 | stp L, DISPATCH_J(L)(DISPATCH)
2940 | stp BASE, L->base
2941 | addi CARG1, DISPATCH, GG_DISP2J
2942 | mr CARG2, PC
2943 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2944 | lp BASE, L->base
2945 | b ->cont_nop
2946 |
2947 |9:
2948 | stwx TISNIL, BASE, RC
2949 | addi RC, RC, 8
2950 | b <3
2951 |.endif
2952 |
2953 |->vm_profhook: // Dispatch target for profiler hook.
2954#if LJ_HASPROFILE
2955 | mr CARG1, L
2956 | stw MULTRES, SAVE_MULTRES
2957 | mr CARG2, PC
2958 | stp BASE, L->base
2959 | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2960 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2961 | lp BASE, L->base
2962 | subi PC, PC, 4
2963 | b ->cont_nop
2964#endif
2965 |
2592 |//----------------------------------------------------------------------- 2966 |//-----------------------------------------------------------------------
2593 |//-- Trace exit handler ------------------------------------------------- 2967 |//-- Trace exit handler -------------------------------------------------
2594 |//----------------------------------------------------------------------- 2968 |//-----------------------------------------------------------------------
2595 | 2969 |
2596 |.macro savex_, a, b, c, d 2970 |.macro savex_, a, b, c, d
2971 |.if FPU
2597 | stfd f..a, 16+a*8(sp) 2972 | stfd f..a, 16+a*8(sp)
2598 | stfd f..b, 16+b*8(sp) 2973 | stfd f..b, 16+b*8(sp)
2599 | stfd f..c, 16+c*8(sp) 2974 | stfd f..c, 16+c*8(sp)
2600 | stfd f..d, 16+d*8(sp) 2975 | stfd f..d, 16+d*8(sp)
2976 |.endif
2601 |.endmacro 2977 |.endmacro
2602 | 2978 |
2603 |->vm_exit_handler: 2979 |->vm_exit_handler:
@@ -2623,16 +2999,16 @@ static void build_subroutines(BuildCtx *ctx)
2623 | savex_ 20,21,22,23 2999 | savex_ 20,21,22,23
2624 | lhz CARG4, 2(CARG3) // Load trace number. 3000 | lhz CARG4, 2(CARG3) // Load trace number.
2625 | savex_ 24,25,26,27 3001 | savex_ 24,25,26,27
2626 | lwz L, DISPATCH_GL(jit_L)(DISPATCH) 3002 | lwz L, DISPATCH_GL(cur_L)(DISPATCH)
2627 | savex_ 28,29,30,31 3003 | savex_ 28,29,30,31
2628 | sub CARG3, TMP0, CARG3 // Compute exit number. 3004 | sub CARG3, TMP0, CARG3 // Compute exit number.
2629 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) 3005 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH)
2630 | srwi CARG3, CARG3, 2 3006 | srwi CARG3, CARG3, 2
2631 | stw L, DISPATCH_J(L)(DISPATCH) 3007 | stp L, DISPATCH_J(L)(DISPATCH)
2632 | subi CARG3, CARG3, 2 3008 | subi CARG3, CARG3, 2
2633 | stw TMP1, DISPATCH_GL(jit_L)(DISPATCH)
2634 | stw CARG4, DISPATCH_J(parent)(DISPATCH)
2635 | stp BASE, L->base 3009 | stp BASE, L->base
3010 | stw CARG4, DISPATCH_J(parent)(DISPATCH)
3011 | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH)
2636 | addi CARG1, DISPATCH, GG_DISP2J 3012 | addi CARG1, DISPATCH, GG_DISP2J
2637 | stw CARG3, DISPATCH_J(exitno)(DISPATCH) 3013 | stw CARG3, DISPATCH_J(exitno)(DISPATCH)
2638 | addi CARG2, sp, 16 3014 | addi CARG2, sp, 16
@@ -2656,28 +3032,29 @@ static void build_subroutines(BuildCtx *ctx)
2656 | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. 3032 | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set.
2657 | lwz L, SAVE_L 3033 | lwz L, SAVE_L
2658 | addi DISPATCH, JGL, -GG_DISP2G-32768 3034 | addi DISPATCH, JGL, -GG_DISP2G-32768
3035 | stp BASE, L->base
2659 |1: 3036 |1:
2660 | cmpwi CARG1, 0 3037 | cmpwi CARG1, 0
2661 | blt >3 // Check for error from exit. 3038 | blt >9 // Check for error from exit.
2662 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 3039 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2663 | slwi MULTRES, CARG1, 3 3040 | slwi MULTRES, CARG1, 3
2664 | li TMP2, 0 3041 | li TMP2, 0
2665 | stw MULTRES, SAVE_MULTRES 3042 | stw MULTRES, SAVE_MULTRES
2666 | lwz TMP1, LFUNC:TMP1->pc 3043 | lwz TMP1, LFUNC:RB->pc
2667 | stw TMP2, DISPATCH_GL(jit_L)(DISPATCH) 3044 | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH)
2668 | lwz KBASE, PC2PROTO(k)(TMP1) 3045 | lwz KBASE, PC2PROTO(k)(TMP1)
2669 | // Setup type comparison constants. 3046 | // Setup type comparison constants.
2670 | li TISNUM, LJ_TISNUM 3047 | li TISNUM, LJ_TISNUM
2671 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 3048 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2672 | stw TMP3, TMPD 3049 | .FPU stw TMP3, TMPD
2673 | li ZERO, 0 3050 | li ZERO, 0
2674 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 3051 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
2675 | lfs TOBIT, TMPD 3052 | .FPU lfs TOBIT, TMPD
2676 | stw TMP3, TMPD 3053 | .FPU stw TMP3, TMPD
2677 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 3054 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
2678 | li TISNIL, LJ_TNIL 3055 | li TISNIL, LJ_TNIL
2679 | stw TMP0, TONUM_HI 3056 | .FPU stw TMP0, TONUM_HI
2680 | lfs TONUM, TMPD 3057 | .FPU lfs TONUM, TMPD
2681 | // Modified copy of ins_next which handles function header dispatch, too. 3058 | // Modified copy of ins_next which handles function header dispatch, too.
2682 | lwz INS, 0(PC) 3059 | lwz INS, 0(PC)
2683 | addi PC, PC, 4 3060 | addi PC, PC, 4
@@ -2694,11 +3071,25 @@ static void build_subroutines(BuildCtx *ctx)
2694 | decode_RC8 RC, INS 3071 | decode_RC8 RC, INS
2695 | bctr 3072 | bctr
2696 |2: 3073 |2:
3074 | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function?
3075 | blt >3
3076 | // Check frame below fast function.
3077 | lwz TMP1, FRAME_PC(BASE)
3078 | andix. TMP0, TMP1, FRAME_TYPE
3079 | bney >3 // Trace stitching continuation?
3080 | // Otherwise set KBASE for Lua function below fast function.
3081 | lwz TMP2, -4(TMP1)
3082 | decode_RA8 TMP0, TMP2
3083 | sub TMP1, BASE, TMP0
3084 | lwz LFUNC:TMP2, -12(TMP1)
3085 | lwz TMP1, LFUNC:TMP2->pc
3086 | lwz KBASE, PC2PROTO(k)(TMP1)
3087 |3:
2697 | subi RC, MULTRES, 8 3088 | subi RC, MULTRES, 8
2698 | add RA, RA, BASE 3089 | add RA, RA, BASE
2699 | bctr 3090 | bctr
2700 | 3091 |
2701 |3: // Rethrow error from the right C frame. 3092 |9: // Rethrow error from the right C frame.
2702 | neg CARG2, CARG1 3093 | neg CARG2, CARG1
2703 | mr CARG1, L 3094 | mr CARG1, L
2704 | bl extern lj_err_throw // (lua_State *L, int errcode) 3095 | bl extern lj_err_throw // (lua_State *L, int errcode)
@@ -2708,7 +3099,35 @@ static void build_subroutines(BuildCtx *ctx)
2708 |//-- Math helper functions ---------------------------------------------- 3099 |//-- Math helper functions ----------------------------------------------
2709 |//----------------------------------------------------------------------- 3100 |//-----------------------------------------------------------------------
2710 | 3101 |
2711 |// NYI: Use internal implementations of floor, ceil, trunc. 3102 |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp.
3103 |
3104 |.macro sfi2d, AHI, ALO
3105 |.if not FPU
3106 | mr. AHI, ALO
3107 | bclr 12, 2 // Handle zero first.
3108 | srawi TMP0, ALO, 31
3109 | xor TMP1, ALO, TMP0
3110 | sub TMP1, TMP1, TMP0 // Absolute value in TMP1.
3111 | cntlzw AHI, TMP1
3112 | andix. TMP0, TMP0, 0x800 // Mask sign bit.
3113 | slw TMP1, TMP1, AHI // Align mantissa left with leading 1.
3114 | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI.
3115 | slwi ALO, TMP1, 21
3116 | or AHI, AHI, TMP0 // Sign | Exponent.
3117 | srwi TMP1, TMP1, 11
3118 | slwi AHI, AHI, 20 // Align left.
3119 | add AHI, AHI, TMP1 // Add mantissa, increment exponent.
3120 | blr
3121 |.endif
3122 |.endmacro
3123 |
3124 |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1.
3125 |->vm_sfi2d_1:
3126 | sfi2d CARG1, CARG2
3127 |
3128 |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1.
3129 |->vm_sfi2d_2:
3130 | sfi2d CARG3, CARG4
2712 | 3131 |
2713 |->vm_modi: 3132 |->vm_modi:
2714 | divwo. TMP0, CARG1, CARG2 3133 | divwo. TMP0, CARG1, CARG2
@@ -2776,21 +3195,21 @@ static void build_subroutines(BuildCtx *ctx)
2776 | addi DISPATCH, r12, GG_G2DISP 3195 | addi DISPATCH, r12, GG_G2DISP
2777 | stw r11, CTSTATE->cb.slot 3196 | stw r11, CTSTATE->cb.slot
2778 | stw r3, CTSTATE->cb.gpr[0] 3197 | stw r3, CTSTATE->cb.gpr[0]
2779 | stfd f1, CTSTATE->cb.fpr[0] 3198 | .FPU stfd f1, CTSTATE->cb.fpr[0]
2780 | stw r4, CTSTATE->cb.gpr[1] 3199 | stw r4, CTSTATE->cb.gpr[1]
2781 | stfd f2, CTSTATE->cb.fpr[1] 3200 | .FPU stfd f2, CTSTATE->cb.fpr[1]
2782 | stw r5, CTSTATE->cb.gpr[2] 3201 | stw r5, CTSTATE->cb.gpr[2]
2783 | stfd f3, CTSTATE->cb.fpr[2] 3202 | .FPU stfd f3, CTSTATE->cb.fpr[2]
2784 | stw r6, CTSTATE->cb.gpr[3] 3203 | stw r6, CTSTATE->cb.gpr[3]
2785 | stfd f4, CTSTATE->cb.fpr[3] 3204 | .FPU stfd f4, CTSTATE->cb.fpr[3]
2786 | stw r7, CTSTATE->cb.gpr[4] 3205 | stw r7, CTSTATE->cb.gpr[4]
2787 | stfd f5, CTSTATE->cb.fpr[4] 3206 | .FPU stfd f5, CTSTATE->cb.fpr[4]
2788 | stw r8, CTSTATE->cb.gpr[5] 3207 | stw r8, CTSTATE->cb.gpr[5]
2789 | stfd f6, CTSTATE->cb.fpr[5] 3208 | .FPU stfd f6, CTSTATE->cb.fpr[5]
2790 | stw r9, CTSTATE->cb.gpr[6] 3209 | stw r9, CTSTATE->cb.gpr[6]
2791 | stfd f7, CTSTATE->cb.fpr[6] 3210 | .FPU stfd f7, CTSTATE->cb.fpr[6]
2792 | stw r10, CTSTATE->cb.gpr[7] 3211 | stw r10, CTSTATE->cb.gpr[7]
2793 | stfd f8, CTSTATE->cb.fpr[7] 3212 | .FPU stfd f8, CTSTATE->cb.fpr[7]
2794 | addi TMP0, sp, CFRAME_SPACE+8 3213 | addi TMP0, sp, CFRAME_SPACE+8
2795 | stw TMP0, CTSTATE->cb.stack 3214 | stw TMP0, CTSTATE->cb.stack
2796 | mr CARG1, CTSTATE 3215 | mr CARG1, CTSTATE
@@ -2801,21 +3220,21 @@ static void build_subroutines(BuildCtx *ctx)
2801 | lp BASE, L:CRET1->base 3220 | lp BASE, L:CRET1->base
2802 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 3221 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2803 | lp RC, L:CRET1->top 3222 | lp RC, L:CRET1->top
2804 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 3223 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2805 | li ZERO, 0 3224 | li ZERO, 0
2806 | mr L, CRET1 3225 | mr L, CRET1
2807 | stw TMP3, TMPD 3226 | .FPU stw TMP3, TMPD
2808 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 3227 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
2809 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3228 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2810 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 3229 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
2811 | stw TMP0, TONUM_HI 3230 | .FPU stw TMP0, TONUM_HI
2812 | li TISNIL, LJ_TNIL 3231 | li TISNIL, LJ_TNIL
2813 | li_vmstate INTERP 3232 | li_vmstate INTERP
2814 | lfs TOBIT, TMPD 3233 | .FPU lfs TOBIT, TMPD
2815 | stw TMP3, TMPD 3234 | .FPU stw TMP3, TMPD
2816 | sub RC, RC, BASE 3235 | sub RC, RC, BASE
2817 | st_vmstate 3236 | st_vmstate
2818 | lfs TONUM, TMPD 3237 | .FPU lfs TONUM, TMPD
2819 | ins_callt 3238 | ins_callt
2820 |.endif 3239 |.endif
2821 | 3240 |
@@ -2829,7 +3248,7 @@ static void build_subroutines(BuildCtx *ctx)
2829 | mr CARG2, RA 3248 | mr CARG2, RA
2830 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) 3249 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
2831 | lwz CRET1, CTSTATE->cb.gpr[0] 3250 | lwz CRET1, CTSTATE->cb.gpr[0]
2832 | lfd FARG1, CTSTATE->cb.fpr[0] 3251 | .FPU lfd FARG1, CTSTATE->cb.fpr[0]
2833 | lwz CRET2, CTSTATE->cb.gpr[1] 3252 | lwz CRET2, CTSTATE->cb.gpr[1]
2834 | b ->vm_leave_unw 3253 | b ->vm_leave_unw
2835 |.endif 3254 |.endif
@@ -2863,14 +3282,14 @@ static void build_subroutines(BuildCtx *ctx)
2863 | bge <1 3282 | bge <1
2864 |2: 3283 |2:
2865 | bney cr1, >3 3284 | bney cr1, >3
2866 | lfd f1, CCSTATE->fpr[0] 3285 | .FPU lfd f1, CCSTATE->fpr[0]
2867 | lfd f2, CCSTATE->fpr[1] 3286 | .FPU lfd f2, CCSTATE->fpr[1]
2868 | lfd f3, CCSTATE->fpr[2] 3287 | .FPU lfd f3, CCSTATE->fpr[2]
2869 | lfd f4, CCSTATE->fpr[3] 3288 | .FPU lfd f4, CCSTATE->fpr[3]
2870 | lfd f5, CCSTATE->fpr[4] 3289 | .FPU lfd f5, CCSTATE->fpr[4]
2871 | lfd f6, CCSTATE->fpr[5] 3290 | .FPU lfd f6, CCSTATE->fpr[5]
2872 | lfd f7, CCSTATE->fpr[6] 3291 | .FPU lfd f7, CCSTATE->fpr[6]
2873 | lfd f8, CCSTATE->fpr[7] 3292 | .FPU lfd f8, CCSTATE->fpr[7]
2874 |3: 3293 |3:
2875 | lp TMP0, CCSTATE->func 3294 | lp TMP0, CCSTATE->func
2876 | lwz CARG2, CCSTATE->gpr[1] 3295 | lwz CARG2, CCSTATE->gpr[1]
@@ -2887,7 +3306,7 @@ static void build_subroutines(BuildCtx *ctx)
2887 | lwz TMP2, -4(r14) 3306 | lwz TMP2, -4(r14)
2888 | lwz TMP0, 4(r14) 3307 | lwz TMP0, 4(r14)
2889 | stw CARG1, CCSTATE:TMP1->gpr[0] 3308 | stw CARG1, CCSTATE:TMP1->gpr[0]
2890 | stfd FARG1, CCSTATE:TMP1->fpr[0] 3309 | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0]
2891 | stw CARG2, CCSTATE:TMP1->gpr[1] 3310 | stw CARG2, CCSTATE:TMP1->gpr[1]
2892 | mtlr TMP0 3311 | mtlr TMP0
2893 | stw CARG3, CCSTATE:TMP1->gpr[2] 3312 | stw CARG3, CCSTATE:TMP1->gpr[2]
@@ -2916,19 +3335,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2916 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 3335 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2917 | // RA = src1*8, RD = src2*8, JMP with RD = target 3336 | // RA = src1*8, RD = src2*8, JMP with RD = target
2918 |.if DUALNUM 3337 |.if DUALNUM
2919 | lwzux TMP0, RA, BASE 3338 | lwzux CARG1, RA, BASE
2920 | addi PC, PC, 4 3339 | addi PC, PC, 4
2921 | lwz CARG2, 4(RA) 3340 | lwz CARG2, 4(RA)
2922 | lwzux TMP1, RD, BASE 3341 | lwzux CARG3, RD, BASE
2923 | lwz TMP2, -4(PC) 3342 | lwz TMP2, -4(PC)
2924 | checknum cr0, TMP0 3343 | checknum cr0, CARG1
2925 | lwz CARG3, 4(RD) 3344 | lwz CARG4, 4(RD)
2926 | decode_RD4 TMP2, TMP2 3345 | decode_RD4 TMP2, TMP2
2927 | checknum cr1, TMP1 3346 | checknum cr1, CARG3
2928 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3347 | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16)
2929 | bne cr0, >7 3348 | bne cr0, >7
2930 | bne cr1, >8 3349 | bne cr1, >8
2931 | cmpw CARG2, CARG3 3350 | cmpw CARG2, CARG4
2932 if (op == BC_ISLT) { 3351 if (op == BC_ISLT) {
2933 | bge >2 3352 | bge >2
2934 } else if (op == BC_ISGE) { 3353 } else if (op == BC_ISGE) {
@@ -2939,28 +3358,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2939 | ble >2 3358 | ble >2
2940 } 3359 }
2941 |1: 3360 |1:
2942 | add PC, PC, TMP2 3361 | add PC, PC, SAVE0
2943 |2: 3362 |2:
2944 | ins_next 3363 | ins_next
2945 | 3364 |
2946 |7: // RA is not an integer. 3365 |7: // RA is not an integer.
2947 | bgt cr0, ->vmeta_comp 3366 | bgt cr0, ->vmeta_comp
2948 | // RA is a number. 3367 | // RA is a number.
2949 | lfd f0, 0(RA) 3368 | .FPU lfd f0, 0(RA)
2950 | bgt cr1, ->vmeta_comp 3369 | bgt cr1, ->vmeta_comp
2951 | blt cr1, >4 3370 | blt cr1, >4
2952 | // RA is a number, RD is an integer. 3371 | // RA is a number, RD is an integer.
2953 | tonum_i f1, CARG3 3372 |.if FPU
3373 | tonum_i f1, CARG4
3374 |.else
3375 | bl ->vm_sfi2d_2
3376 |.endif
2954 | b >5 3377 | b >5
2955 | 3378 |
2956 |8: // RA is an integer, RD is not an integer. 3379 |8: // RA is an integer, RD is not an integer.
2957 | bgt cr1, ->vmeta_comp 3380 | bgt cr1, ->vmeta_comp
2958 | // RA is an integer, RD is a number. 3381 | // RA is an integer, RD is a number.
3382 |.if FPU
2959 | tonum_i f0, CARG2 3383 | tonum_i f0, CARG2
3384 |.else
3385 | bl ->vm_sfi2d_1
3386 |.endif
2960 |4: 3387 |4:
2961 | lfd f1, 0(RD) 3388 | .FPU lfd f1, 0(RD)
2962 |5: 3389 |5:
3390 |.if FPU
2963 | fcmpu cr0, f0, f1 3391 | fcmpu cr0, f0, f1
3392 |.else
3393 | blex __ledf2
3394 | cmpwi CRET1, 0
3395 |.endif
2964 if (op == BC_ISLT) { 3396 if (op == BC_ISLT) {
2965 | bge <2 3397 | bge <2
2966 } else if (op == BC_ISGE) { 3398 } else if (op == BC_ISGE) {
@@ -3008,42 +3440,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3008 vk = op == BC_ISEQV; 3440 vk = op == BC_ISEQV;
3009 | // RA = src1*8, RD = src2*8, JMP with RD = target 3441 | // RA = src1*8, RD = src2*8, JMP with RD = target
3010 |.if DUALNUM 3442 |.if DUALNUM
3011 | lwzux TMP0, RA, BASE 3443 | lwzux CARG1, RA, BASE
3012 | addi PC, PC, 4 3444 | addi PC, PC, 4
3013 | lwz CARG2, 4(RA) 3445 | lwz CARG2, 4(RA)
3014 | lwzux TMP1, RD, BASE 3446 | lwzux CARG3, RD, BASE
3015 | checknum cr0, TMP0 3447 | checknum cr0, CARG1
3016 | lwz TMP2, -4(PC) 3448 | lwz SAVE0, -4(PC)
3017 | checknum cr1, TMP1 3449 | checknum cr1, CARG3
3018 | decode_RD4 TMP2, TMP2 3450 | decode_RD4 SAVE0, SAVE0
3019 | lwz CARG3, 4(RD) 3451 | lwz CARG4, 4(RD)
3020 | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt 3452 | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
3021 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3453 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3022 if (vk) { 3454 if (vk) {
3023 | ble cr7, ->BC_ISEQN_Z 3455 | ble cr7, ->BC_ISEQN_Z
3024 } else { 3456 } else {
3025 | ble cr7, ->BC_ISNEN_Z 3457 | ble cr7, ->BC_ISNEN_Z
3026 } 3458 }
3027 |.else 3459 |.else
3028 | lwzux TMP0, RA, BASE 3460 | lwzux CARG1, RA, BASE
3029 | lwz TMP2, 0(PC) 3461 | lwz SAVE0, 0(PC)
3030 | lfd f0, 0(RA) 3462 | lfd f0, 0(RA)
3031 | addi PC, PC, 4 3463 | addi PC, PC, 4
3032 | lwzux TMP1, RD, BASE 3464 | lwzux CARG3, RD, BASE
3033 | checknum cr0, TMP0 3465 | checknum cr0, CARG1
3034 | decode_RD4 TMP2, TMP2 3466 | decode_RD4 SAVE0, SAVE0
3035 | lfd f1, 0(RD) 3467 | lfd f1, 0(RD)
3036 | checknum cr1, TMP1 3468 | checknum cr1, CARG3
3037 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3469 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3038 | bge cr0, >5 3470 | bge cr0, >5
3039 | bge cr1, >5 3471 | bge cr1, >5
3040 | fcmpu cr0, f0, f1 3472 | fcmpu cr0, f0, f1
3041 if (vk) { 3473 if (vk) {
3042 | bne >1 3474 | bne >1
3043 | add PC, PC, TMP2 3475 | add PC, PC, SAVE0
3044 } else { 3476 } else {
3045 | beq >1 3477 | beq >1
3046 | add PC, PC, TMP2 3478 | add PC, PC, SAVE0
3047 } 3479 }
3048 |1: 3480 |1:
3049 | ins_next 3481 | ins_next
@@ -3051,36 +3483,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3051 |5: // Either or both types are not numbers. 3483 |5: // Either or both types are not numbers.
3052 |.if not DUALNUM 3484 |.if not DUALNUM
3053 | lwz CARG2, 4(RA) 3485 | lwz CARG2, 4(RA)
3054 | lwz CARG3, 4(RD) 3486 | lwz CARG4, 4(RD)
3055 |.endif 3487 |.endif
3056 |.if FFI 3488 |.if FFI
3057 | cmpwi cr7, TMP0, LJ_TCDATA 3489 | cmpwi cr7, CARG1, LJ_TCDATA
3058 | cmpwi cr5, TMP1, LJ_TCDATA 3490 | cmpwi cr5, CARG3, LJ_TCDATA
3059 |.endif 3491 |.endif
3060 | not TMP3, TMP0 3492 | not TMP2, CARG1
3061 | cmplw TMP0, TMP1 3493 | cmplw CARG1, CARG3
3062 | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? 3494 | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive?
3063 |.if FFI 3495 |.if FFI
3064 | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq 3496 | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq
3065 |.endif 3497 |.endif
3066 | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? 3498 | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata?
3067 |.if FFI 3499 |.if FFI
3068 | beq cr7, ->vmeta_equal_cd 3500 | beq cr7, ->vmeta_equal_cd
3069 |.endif 3501 |.endif
3070 | cmplw cr5, CARG2, CARG3 3502 | cmplw cr5, CARG2, CARG4
3071 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. 3503 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
3072 | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. 3504 | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
3073 | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. 3505 | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
3074 | mr SAVE0, PC 3506 | mr SAVE1, PC
3075 | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. 3507 | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
3076 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. 3508 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
3077 if (vk) { 3509 if (vk) {
3078 | bne cr0, >6 3510 | bne cr0, >6
3079 | add PC, PC, TMP2 3511 | add PC, PC, SAVE0
3080 |6: 3512 |6:
3081 } else { 3513 } else {
3082 | beq cr0, >6 3514 | beq cr0, >6
3083 | add PC, PC, TMP2 3515 | add PC, PC, SAVE0
3084 |6: 3516 |6:
3085 } 3517 }
3086 |.if DUALNUM 3518 |.if DUALNUM
@@ -3095,6 +3527,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3095 | 3527 |
3096 | // Different tables or userdatas. Need to check __eq metamethod. 3528 | // Different tables or userdatas. Need to check __eq metamethod.
3097 | // Field metatable must be at same offset for GCtab and GCudata! 3529 | // Field metatable must be at same offset for GCtab and GCudata!
3530 | mr CARG3, CARG4
3098 | lwz TAB:TMP2, TAB:CARG2->metatable 3531 | lwz TAB:TMP2, TAB:CARG2->metatable
3099 | li CARG4, 1-vk // ne = 0 or 1. 3532 | li CARG4, 1-vk // ne = 0 or 1.
3100 | cmplwi TAB:TMP2, 0 3533 | cmplwi TAB:TMP2, 0
@@ -3102,7 +3535,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3102 | lbz TMP2, TAB:TMP2->nomm 3535 | lbz TMP2, TAB:TMP2->nomm
3103 | andix. TMP2, TMP2, 1<<MM_eq 3536 | andix. TMP2, TMP2, 1<<MM_eq
3104 | bne <1 // Or 'no __eq' flag set? 3537 | bne <1 // Or 'no __eq' flag set?
3105 | mr PC, SAVE0 // Restore old PC. 3538 | mr PC, SAVE1 // Restore old PC.
3106 | b ->vmeta_equal // Handle __eq metamethod. 3539 | b ->vmeta_equal // Handle __eq metamethod.
3107 break; 3540 break;
3108 3541
@@ -3143,16 +3576,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3143 vk = op == BC_ISEQN; 3576 vk = op == BC_ISEQN;
3144 | // RA = src*8, RD = num_const*8, JMP with RD = target 3577 | // RA = src*8, RD = num_const*8, JMP with RD = target
3145 |.if DUALNUM 3578 |.if DUALNUM
3146 | lwzux TMP0, RA, BASE 3579 | lwzux CARG1, RA, BASE
3147 | addi PC, PC, 4 3580 | addi PC, PC, 4
3148 | lwz CARG2, 4(RA) 3581 | lwz CARG2, 4(RA)
3149 | lwzux TMP1, RD, KBASE 3582 | lwzux CARG3, RD, KBASE
3150 | checknum cr0, TMP0 3583 | checknum cr0, CARG1
3151 | lwz TMP2, -4(PC) 3584 | lwz SAVE0, -4(PC)
3152 | checknum cr1, TMP1 3585 | checknum cr1, CARG3
3153 | decode_RD4 TMP2, TMP2 3586 | decode_RD4 SAVE0, SAVE0
3154 | lwz CARG3, 4(RD) 3587 | lwz CARG4, 4(RD)
3155 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3588 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3156 if (vk) { 3589 if (vk) {
3157 |->BC_ISEQN_Z: 3590 |->BC_ISEQN_Z:
3158 } else { 3591 } else {
@@ -3160,7 +3593,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3160 } 3593 }
3161 | bne cr0, >7 3594 | bne cr0, >7
3162 | bne cr1, >8 3595 | bne cr1, >8
3163 | cmpw CARG2, CARG3 3596 | cmpw CARG2, CARG4
3164 |4: 3597 |4:
3165 |.else 3598 |.else
3166 if (vk) { 3599 if (vk) {
@@ -3168,20 +3601,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3168 } else { 3601 } else {
3169 |->BC_ISNEN_Z: // Dummy label. 3602 |->BC_ISNEN_Z: // Dummy label.
3170 } 3603 }
3171 | lwzx TMP0, BASE, RA 3604 | lwzx CARG1, BASE, RA
3172 | addi PC, PC, 4 3605 | addi PC, PC, 4
3173 | lfdx f0, BASE, RA 3606 | lfdx f0, BASE, RA
3174 | lwz TMP2, -4(PC) 3607 | lwz SAVE0, -4(PC)
3175 | lfdx f1, KBASE, RD 3608 | lfdx f1, KBASE, RD
3176 | decode_RD4 TMP2, TMP2 3609 | decode_RD4 SAVE0, SAVE0
3177 | checknum TMP0 3610 | checknum CARG1
3178 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3611 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3179 | bge >3 3612 | bge >3
3180 | fcmpu cr0, f0, f1 3613 | fcmpu cr0, f0, f1
3181 |.endif 3614 |.endif
3182 if (vk) { 3615 if (vk) {
3183 | bne >1 3616 | bne >1
3184 | add PC, PC, TMP2 3617 | add PC, PC, SAVE0
3185 |1: 3618 |1:
3186 |.if not FFI 3619 |.if not FFI
3187 |3: 3620 |3:
@@ -3192,13 +3625,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3192 |.if not FFI 3625 |.if not FFI
3193 |3: 3626 |3:
3194 |.endif 3627 |.endif
3195 | add PC, PC, TMP2 3628 | add PC, PC, SAVE0
3196 |2: 3629 |2:
3197 } 3630 }
3198 | ins_next 3631 | ins_next
3199 |.if FFI 3632 |.if FFI
3200 |3: 3633 |3:
3201 | cmpwi TMP0, LJ_TCDATA 3634 | cmpwi CARG1, LJ_TCDATA
3202 | beq ->vmeta_equal_cd 3635 | beq ->vmeta_equal_cd
3203 | b <1 3636 | b <1
3204 |.endif 3637 |.endif
@@ -3206,18 +3639,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3206 |7: // RA is not an integer. 3639 |7: // RA is not an integer.
3207 | bge cr0, <3 3640 | bge cr0, <3
3208 | // RA is a number. 3641 | // RA is a number.
3209 | lfd f0, 0(RA) 3642 | .FPU lfd f0, 0(RA)
3210 | blt cr1, >1 3643 | blt cr1, >1
3211 | // RA is a number, RD is an integer. 3644 | // RA is a number, RD is an integer.
3212 | tonum_i f1, CARG3 3645 |.if FPU
3646 | tonum_i f1, CARG4
3647 |.else
3648 | bl ->vm_sfi2d_2
3649 |.endif
3213 | b >2 3650 | b >2
3214 | 3651 |
3215 |8: // RA is an integer, RD is a number. 3652 |8: // RA is an integer, RD is a number.
3653 |.if FPU
3216 | tonum_i f0, CARG2 3654 | tonum_i f0, CARG2
3655 |.else
3656 | bl ->vm_sfi2d_1
3657 |.endif
3217 |1: 3658 |1:
3218 | lfd f1, 0(RD) 3659 | .FPU lfd f1, 0(RD)
3219 |2: 3660 |2:
3661 |.if FPU
3220 | fcmpu cr0, f0, f1 3662 | fcmpu cr0, f0, f1
3663 |.else
3664 | blex __ledf2
3665 | cmpwi CRET1, 0
3666 |.endif
3221 | b <4 3667 | b <4
3222 |.endif 3668 |.endif
3223 break; 3669 break;
@@ -3272,7 +3718,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3272 | add PC, PC, TMP2 3718 | add PC, PC, TMP2
3273 } else { 3719 } else {
3274 | li TMP1, LJ_TFALSE 3720 | li TMP1, LJ_TFALSE
3721 |.if FPU
3275 | lfdx f0, BASE, RD 3722 | lfdx f0, BASE, RD
3723 |.else
3724 | lwzux CARG1, RD, BASE
3725 | lwz CARG2, 4(RD)
3726 |.endif
3276 | cmplw TMP0, TMP1 3727 | cmplw TMP0, TMP1
3277 if (op == BC_ISTC) { 3728 if (op == BC_ISTC) {
3278 | bge >1 3729 | bge >1
@@ -3281,20 +3732,55 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3281 } 3732 }
3282 | addis PC, PC, -(BCBIAS_J*4 >> 16) 3733 | addis PC, PC, -(BCBIAS_J*4 >> 16)
3283 | decode_RD4 TMP2, INS 3734 | decode_RD4 TMP2, INS
3735 |.if FPU
3284 | stfdx f0, BASE, RA 3736 | stfdx f0, BASE, RA
3737 |.else
3738 | stwux CARG1, RA, BASE
3739 | stw CARG2, 4(RA)
3740 |.endif
3285 | add PC, PC, TMP2 3741 | add PC, PC, TMP2
3286 |1: 3742 |1:
3287 } 3743 }
3288 | ins_next 3744 | ins_next
3289 break; 3745 break;
3290 3746
3747 case BC_ISTYPE:
3748 | // RA = src*8, RD = -type*8
3749 | lwzx TMP0, BASE, RA
3750 | srwi TMP1, RD, 3
3751 | ins_next1
3752 |.if not PPE and not GPR64
3753 | add. TMP0, TMP0, TMP1
3754 |.else
3755 | neg TMP1, TMP1
3756 | cmpw TMP0, TMP1
3757 |.endif
3758 | bne ->vmeta_istype
3759 | ins_next2
3760 break;
3761 case BC_ISNUM:
3762 | // RA = src*8, RD = -(TISNUM-1)*8
3763 | lwzx TMP0, BASE, RA
3764 | ins_next1
3765 | checknum TMP0
3766 | bge ->vmeta_istype
3767 | ins_next2
3768 break;
3769
3291 /* -- Unary ops --------------------------------------------------------- */ 3770 /* -- Unary ops --------------------------------------------------------- */
3292 3771
3293 case BC_MOV: 3772 case BC_MOV:
3294 | // RA = dst*8, RD = src*8 3773 | // RA = dst*8, RD = src*8
3295 | ins_next1 3774 | ins_next1
3775 |.if FPU
3296 | lfdx f0, BASE, RD 3776 | lfdx f0, BASE, RD
3297 | stfdx f0, BASE, RA 3777 | stfdx f0, BASE, RA
3778 |.else
3779 | lwzux TMP0, RD, BASE
3780 | lwz TMP1, 4(RD)
3781 | stwux TMP0, RA, BASE
3782 | stw TMP1, 4(RA)
3783 |.endif
3298 | ins_next2 3784 | ins_next2
3299 break; 3785 break;
3300 case BC_NOT: 3786 case BC_NOT:
@@ -3396,44 +3882,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3396 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3882 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3397 ||switch (vk) { 3883 ||switch (vk) {
3398 ||case 0: 3884 ||case 0:
3399 | lwzx TMP1, BASE, RB 3885 | lwzx CARG1, BASE, RB
3400 | .if DUALNUM 3886 | .if DUALNUM
3401 | lwzx TMP2, KBASE, RC 3887 | lwzx CARG3, KBASE, RC
3402 | .endif 3888 | .endif
3889 | .if FPU
3403 | lfdx f14, BASE, RB 3890 | lfdx f14, BASE, RB
3404 | lfdx f15, KBASE, RC 3891 | lfdx f15, KBASE, RC
3892 | .else
3893 | add TMP1, BASE, RB
3894 | add TMP2, KBASE, RC
3895 | lwz CARG2, 4(TMP1)
3896 | lwz CARG4, 4(TMP2)
3897 | .endif
3405 | .if DUALNUM 3898 | .if DUALNUM
3406 | checknum cr0, TMP1 3899 | checknum cr0, CARG1
3407 | checknum cr1, TMP2 3900 | checknum cr1, CARG3
3408 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3901 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3409 | bge ->vmeta_arith_vn 3902 | bge ->vmeta_arith_vn
3410 | .else 3903 | .else
3411 | checknum TMP1; bge ->vmeta_arith_vn 3904 | checknum CARG1; bge ->vmeta_arith_vn
3412 | .endif 3905 | .endif
3413 || break; 3906 || break;
3414 ||case 1: 3907 ||case 1:
3415 | lwzx TMP1, BASE, RB 3908 | lwzx CARG1, BASE, RB
3416 | .if DUALNUM 3909 | .if DUALNUM
3417 | lwzx TMP2, KBASE, RC 3910 | lwzx CARG3, KBASE, RC
3418 | .endif 3911 | .endif
3912 | .if FPU
3419 | lfdx f15, BASE, RB 3913 | lfdx f15, BASE, RB
3420 | lfdx f14, KBASE, RC 3914 | lfdx f14, KBASE, RC
3915 | .else
3916 | add TMP1, BASE, RB
3917 | add TMP2, KBASE, RC
3918 | lwz CARG2, 4(TMP1)
3919 | lwz CARG4, 4(TMP2)
3920 | .endif
3421 | .if DUALNUM 3921 | .if DUALNUM
3422 | checknum cr0, TMP1 3922 | checknum cr0, CARG1
3423 | checknum cr1, TMP2 3923 | checknum cr1, CARG3
3424 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3924 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3425 | bge ->vmeta_arith_nv 3925 | bge ->vmeta_arith_nv
3426 | .else 3926 | .else
3427 | checknum TMP1; bge ->vmeta_arith_nv 3927 | checknum CARG1; bge ->vmeta_arith_nv
3428 | .endif 3928 | .endif
3429 || break; 3929 || break;
3430 ||default: 3930 ||default:
3431 | lwzx TMP1, BASE, RB 3931 | lwzx CARG1, BASE, RB
3432 | lwzx TMP2, BASE, RC 3932 | lwzx CARG3, BASE, RC
3933 | .if FPU
3433 | lfdx f14, BASE, RB 3934 | lfdx f14, BASE, RB
3434 | lfdx f15, BASE, RC 3935 | lfdx f15, BASE, RC
3435 | checknum cr0, TMP1 3936 | .else
3436 | checknum cr1, TMP2 3937 | add TMP1, BASE, RB
3938 | add TMP2, BASE, RC
3939 | lwz CARG2, 4(TMP1)
3940 | lwz CARG4, 4(TMP2)
3941 | .endif
3942 | checknum cr0, CARG1
3943 | checknum cr1, CARG3
3437 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3944 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3438 | bge ->vmeta_arith_vv 3945 | bge ->vmeta_arith_vv
3439 || break; 3946 || break;
@@ -3467,48 +3974,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3467 | fsub a, b, a // b - floor(b/c)*c 3974 | fsub a, b, a // b - floor(b/c)*c
3468 |.endmacro 3975 |.endmacro
3469 | 3976 |
3977 |.macro sfpmod
3978 |->BC_MODVN_Z:
3979 | stw CARG1, SFSAVE_1
3980 | stw CARG2, SFSAVE_2
3981 | mr SAVE0, CARG3
3982 | mr SAVE1, CARG4
3983 | blex __divdf3
3984 | blex floor
3985 | mr CARG3, SAVE0
3986 | mr CARG4, SAVE1
3987 | blex __muldf3
3988 | mr CARG3, CRET1
3989 | mr CARG4, CRET2
3990 | lwz CARG1, SFSAVE_1
3991 | lwz CARG2, SFSAVE_2
3992 | blex __subdf3
3993 |.endmacro
3994 |
3470 |.macro ins_arithfp, fpins 3995 |.macro ins_arithfp, fpins
3471 | ins_arithpre 3996 | ins_arithpre
3472 |.if "fpins" == "fpmod_" 3997 |.if "fpins" == "fpmod_"
3473 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3998 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3474 |.else 3999 |.elif FPU
3475 | fpins f0, f14, f15 4000 | fpins f0, f14, f15
3476 | ins_next1 4001 | ins_next1
3477 | stfdx f0, BASE, RA 4002 | stfdx f0, BASE, RA
3478 | ins_next2 4003 | ins_next2
4004 |.else
4005 | blex __divdf3 // Only soft-float div uses this macro.
4006 | ins_next1
4007 | stwux CRET1, RA, BASE
4008 | stw CRET2, 4(RA)
4009 | ins_next2
3479 |.endif 4010 |.endif
3480 |.endmacro 4011 |.endmacro
3481 | 4012 |
3482 |.macro ins_arithdn, intins, fpins 4013 |.macro ins_arithdn, intins, fpins, fpcall
3483 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 4014 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
3484 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 4015 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3485 ||switch (vk) { 4016 ||switch (vk) {
3486 ||case 0: 4017 ||case 0:
3487 | lwzux TMP1, RB, BASE 4018 | lwzux CARG1, RB, BASE
3488 | lwzux TMP2, RC, KBASE 4019 | lwzux CARG3, RC, KBASE
3489 | lwz CARG1, 4(RB) 4020 | lwz CARG2, 4(RB)
3490 | checknum cr0, TMP1 4021 | checknum cr0, CARG1
3491 | lwz CARG2, 4(RC) 4022 | lwz CARG4, 4(RC)
4023 | checknum cr1, CARG3
3492 || break; 4024 || break;
3493 ||case 1: 4025 ||case 1:
3494 | lwzux TMP1, RB, BASE 4026 | lwzux CARG3, RB, BASE
3495 | lwzux TMP2, RC, KBASE 4027 | lwzux CARG1, RC, KBASE
3496 | lwz CARG2, 4(RB) 4028 | lwz CARG4, 4(RB)
3497 | checknum cr0, TMP1 4029 | checknum cr0, CARG3
3498 | lwz CARG1, 4(RC) 4030 | lwz CARG2, 4(RC)
4031 | checknum cr1, CARG1
3499 || break; 4032 || break;
3500 ||default: 4033 ||default:
3501 | lwzux TMP1, RB, BASE 4034 | lwzux CARG1, RB, BASE
3502 | lwzux TMP2, RC, BASE 4035 | lwzux CARG3, RC, BASE
3503 | lwz CARG1, 4(RB) 4036 | lwz CARG2, 4(RB)
3504 | checknum cr0, TMP1 4037 | checknum cr0, CARG1
3505 | lwz CARG2, 4(RC) 4038 | lwz CARG4, 4(RC)
4039 | checknum cr1, CARG3
3506 || break; 4040 || break;
3507 ||} 4041 ||}
3508 | checknum cr1, TMP2
3509 | bne >5 4042 | bne >5
3510 | bne cr1, >5 4043 | bne cr1, >5
3511 | intins CARG1, CARG1, CARG2 4044 |.if "intins" == "intmod"
4045 | mr CARG1, CARG2
4046 | mr CARG2, CARG4
4047 |.endif
4048 | intins CARG1, CARG2, CARG4
3512 | bso >4 4049 | bso >4
3513 |1: 4050 |1:
3514 | ins_next1 4051 | ins_next1
@@ -3520,29 +4057,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3520 | checkov TMP0, <1 // Ignore unrelated overflow. 4057 | checkov TMP0, <1 // Ignore unrelated overflow.
3521 | ins_arithfallback b 4058 | ins_arithfallback b
3522 |5: // FP variant. 4059 |5: // FP variant.
4060 |.if FPU
3523 ||if (vk == 1) { 4061 ||if (vk == 1) {
3524 | lfd f15, 0(RB) 4062 | lfd f15, 0(RB)
3525 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3526 | lfd f14, 0(RC) 4063 | lfd f14, 0(RC)
3527 ||} else { 4064 ||} else {
3528 | lfd f14, 0(RB) 4065 | lfd f14, 0(RB)
3529 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3530 | lfd f15, 0(RC) 4066 | lfd f15, 0(RC)
3531 ||} 4067 ||}
4068 |.endif
4069 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3532 | ins_arithfallback bge 4070 | ins_arithfallback bge
3533 |.if "fpins" == "fpmod_" 4071 |.if "fpins" == "fpmod_"
3534 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 4072 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3535 |.else 4073 |.else
4074 |.if FPU
3536 | fpins f0, f14, f15 4075 | fpins f0, f14, f15
3537 | ins_next1
3538 | stfdx f0, BASE, RA 4076 | stfdx f0, BASE, RA
4077 |.else
4078 |.if "fpcall" == "sfpmod"
4079 | sfpmod
4080 |.else
4081 | blex fpcall
4082 |.endif
4083 | stwux CRET1, RA, BASE
4084 | stw CRET2, 4(RA)
4085 |.endif
4086 | ins_next1
3539 | b <2 4087 | b <2
3540 |.endif 4088 |.endif
3541 |.endmacro 4089 |.endmacro
3542 | 4090 |
3543 |.macro ins_arith, intins, fpins 4091 |.macro ins_arith, intins, fpins, fpcall
3544 |.if DUALNUM 4092 |.if DUALNUM
3545 | ins_arithdn intins, fpins 4093 | ins_arithdn intins, fpins, fpcall
3546 |.else 4094 |.else
3547 | ins_arithfp fpins 4095 | ins_arithfp fpins
3548 |.endif 4096 |.endif
@@ -3557,9 +4105,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3557 | addo. TMP0, TMP0, TMP3 4105 | addo. TMP0, TMP0, TMP3
3558 | add y, a, b 4106 | add y, a, b
3559 |.endmacro 4107 |.endmacro
3560 | ins_arith addo32., fadd 4108 | ins_arith addo32., fadd, __adddf3
3561 |.else 4109 |.else
3562 | ins_arith addo., fadd 4110 | ins_arith addo., fadd, __adddf3
3563 |.endif 4111 |.endif
3564 break; 4112 break;
3565 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 4113 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
@@ -3571,36 +4119,48 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3571 | subo. TMP0, TMP0, TMP3 4119 | subo. TMP0, TMP0, TMP3
3572 | sub y, a, b 4120 | sub y, a, b
3573 |.endmacro 4121 |.endmacro
3574 | ins_arith subo32., fsub 4122 | ins_arith subo32., fsub, __subdf3
3575 |.else 4123 |.else
3576 | ins_arith subo., fsub 4124 | ins_arith subo., fsub, __subdf3
3577 |.endif 4125 |.endif
3578 break; 4126 break;
3579 case BC_MULVN: case BC_MULNV: case BC_MULVV: 4127 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3580 | ins_arith mullwo., fmul 4128 | ins_arith mullwo., fmul, __muldf3
3581 break; 4129 break;
3582 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 4130 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3583 | ins_arithfp fdiv 4131 | ins_arithfp fdiv
3584 break; 4132 break;
3585 case BC_MODVN: 4133 case BC_MODVN:
3586 | ins_arith intmod, fpmod 4134 | ins_arith intmod, fpmod, sfpmod
3587 break; 4135 break;
3588 case BC_MODNV: case BC_MODVV: 4136 case BC_MODNV: case BC_MODVV:
3589 | ins_arith intmod, fpmod_ 4137 | ins_arith intmod, fpmod_, sfpmod
3590 break; 4138 break;
3591 case BC_POW: 4139 case BC_POW:
3592 | // NYI: (partial) integer arithmetic. 4140 | // NYI: (partial) integer arithmetic.
3593 | lwzx TMP1, BASE, RB 4141 | lwzx CARG1, BASE, RB
4142 | lwzx CARG3, BASE, RC
4143 |.if FPU
3594 | lfdx FARG1, BASE, RB 4144 | lfdx FARG1, BASE, RB
3595 | lwzx TMP2, BASE, RC
3596 | lfdx FARG2, BASE, RC 4145 | lfdx FARG2, BASE, RC
3597 | checknum cr0, TMP1 4146 |.else
3598 | checknum cr1, TMP2 4147 | add TMP1, BASE, RB
4148 | add TMP2, BASE, RC
4149 | lwz CARG2, 4(TMP1)
4150 | lwz CARG4, 4(TMP2)
4151 |.endif
4152 | checknum cr0, CARG1
4153 | checknum cr1, CARG3
3599 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 4154 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3600 | bge ->vmeta_arith_vv 4155 | bge ->vmeta_arith_vv
3601 | blex pow 4156 | blex pow
3602 | ins_next1 4157 | ins_next1
4158 |.if FPU
3603 | stfdx FARG1, BASE, RA 4159 | stfdx FARG1, BASE, RA
4160 |.else
4161 | stwux CARG1, RA, BASE
4162 | stw CARG2, 4(RA)
4163 |.endif
3604 | ins_next2 4164 | ins_next2
3605 break; 4165 break;
3606 4166
@@ -3620,8 +4180,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3620 | lp BASE, L->base 4180 | lp BASE, L->base
3621 | bne ->vmeta_binop 4181 | bne ->vmeta_binop
3622 | ins_next1 4182 | ins_next1
4183 |.if FPU
3623 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. 4184 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
3624 | stfdx f0, BASE, RA 4185 | stfdx f0, BASE, RA
4186 |.else
4187 | lwzux TMP0, SAVE0, BASE
4188 | lwz TMP1, 4(SAVE0)
4189 | stwux TMP0, RA, BASE
4190 | stw TMP1, 4(RA)
4191 |.endif
3625 | ins_next2 4192 | ins_next2
3626 break; 4193 break;
3627 4194
@@ -3684,8 +4251,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3684 case BC_KNUM: 4251 case BC_KNUM:
3685 | // RA = dst*8, RD = num_const*8 4252 | // RA = dst*8, RD = num_const*8
3686 | ins_next1 4253 | ins_next1
4254 |.if FPU
3687 | lfdx f0, KBASE, RD 4255 | lfdx f0, KBASE, RD
3688 | stfdx f0, BASE, RA 4256 | stfdx f0, BASE, RA
4257 |.else
4258 | lwzux TMP0, RD, KBASE
4259 | lwz TMP1, 4(RD)
4260 | stwux TMP0, RA, BASE
4261 | stw TMP1, 4(RA)
4262 |.endif
3689 | ins_next2 4263 | ins_next2
3690 break; 4264 break;
3691 case BC_KPRI: 4265 case BC_KPRI:
@@ -3718,8 +4292,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3718 | lwzx UPVAL:RB, LFUNC:RB, RD 4292 | lwzx UPVAL:RB, LFUNC:RB, RD
3719 | ins_next1 4293 | ins_next1
3720 | lwz TMP1, UPVAL:RB->v 4294 | lwz TMP1, UPVAL:RB->v
4295 |.if FPU
3721 | lfd f0, 0(TMP1) 4296 | lfd f0, 0(TMP1)
3722 | stfdx f0, BASE, RA 4297 | stfdx f0, BASE, RA
4298 |.else
4299 | lwz TMP2, 0(TMP1)
4300 | lwz TMP3, 4(TMP1)
4301 | stwux TMP2, RA, BASE
4302 | stw TMP3, 4(RA)
4303 |.endif
3723 | ins_next2 4304 | ins_next2
3724 break; 4305 break;
3725 case BC_USETV: 4306 case BC_USETV:
@@ -3727,14 +4308,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3727 | lwz LFUNC:RB, FRAME_FUNC(BASE) 4308 | lwz LFUNC:RB, FRAME_FUNC(BASE)
3728 | srwi RA, RA, 1 4309 | srwi RA, RA, 1
3729 | addi RA, RA, offsetof(GCfuncL, uvptr) 4310 | addi RA, RA, offsetof(GCfuncL, uvptr)
4311 |.if FPU
3730 | lfdux f0, RD, BASE 4312 | lfdux f0, RD, BASE
4313 |.else
4314 | lwzux CARG1, RD, BASE
4315 | lwz CARG3, 4(RD)
4316 |.endif
3731 | lwzx UPVAL:RB, LFUNC:RB, RA 4317 | lwzx UPVAL:RB, LFUNC:RB, RA
3732 | lbz TMP3, UPVAL:RB->marked 4318 | lbz TMP3, UPVAL:RB->marked
3733 | lwz CARG2, UPVAL:RB->v 4319 | lwz CARG2, UPVAL:RB->v
3734 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 4320 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
3735 | lbz TMP0, UPVAL:RB->closed 4321 | lbz TMP0, UPVAL:RB->closed
3736 | lwz TMP2, 0(RD) 4322 | lwz TMP2, 0(RD)
4323 |.if FPU
3737 | stfd f0, 0(CARG2) 4324 | stfd f0, 0(CARG2)
4325 |.else
4326 | stw CARG1, 0(CARG2)
4327 | stw CARG3, 4(CARG2)
4328 |.endif
3738 | cmplwi cr1, TMP0, 0 4329 | cmplwi cr1, TMP0, 0
3739 | lwz TMP1, 4(RD) 4330 | lwz TMP1, 4(RD)
3740 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 4331 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
@@ -3790,11 +4381,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3790 | lwz LFUNC:RB, FRAME_FUNC(BASE) 4381 | lwz LFUNC:RB, FRAME_FUNC(BASE)
3791 | srwi RA, RA, 1 4382 | srwi RA, RA, 1
3792 | addi RA, RA, offsetof(GCfuncL, uvptr) 4383 | addi RA, RA, offsetof(GCfuncL, uvptr)
4384 |.if FPU
3793 | lfdx f0, KBASE, RD 4385 | lfdx f0, KBASE, RD
4386 |.else
4387 | lwzux TMP2, RD, KBASE
4388 | lwz TMP3, 4(RD)
4389 |.endif
3794 | lwzx UPVAL:RB, LFUNC:RB, RA 4390 | lwzx UPVAL:RB, LFUNC:RB, RA
3795 | ins_next1 4391 | ins_next1
3796 | lwz TMP1, UPVAL:RB->v 4392 | lwz TMP1, UPVAL:RB->v
4393 |.if FPU
3797 | stfd f0, 0(TMP1) 4394 | stfd f0, 0(TMP1)
4395 |.else
4396 | stw TMP2, 0(TMP1)
4397 | stw TMP3, 4(TMP1)
4398 |.endif
3798 | ins_next2 4399 | ins_next2
3799 break; 4400 break;
3800 case BC_USETP: 4401 case BC_USETP:
@@ -3942,11 +4543,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3942 |.endif 4543 |.endif
3943 | ble ->vmeta_tgetv // Integer key and in array part? 4544 | ble ->vmeta_tgetv // Integer key and in array part?
3944 | lwzx TMP0, TMP1, TMP2 4545 | lwzx TMP0, TMP1, TMP2
4546 |.if FPU
3945 | lfdx f14, TMP1, TMP2 4547 | lfdx f14, TMP1, TMP2
4548 |.else
4549 | lwzux SAVE0, TMP1, TMP2
4550 | lwz SAVE1, 4(TMP1)
4551 |.endif
3946 | checknil TMP0; beq >2 4552 | checknil TMP0; beq >2
3947 |1: 4553 |1:
3948 | ins_next1 4554 | ins_next1
4555 |.if FPU
3949 | stfdx f14, BASE, RA 4556 | stfdx f14, BASE, RA
4557 |.else
4558 | stwux SAVE0, RA, BASE
4559 | stw SAVE1, 4(RA)
4560 |.endif
3950 | ins_next2 4561 | ins_next2
3951 | 4562 |
3952 |2: // Check for __index if table value is nil. 4563 |2: // Check for __index if table value is nil.
@@ -4022,12 +4633,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4022 | lwz TMP1, TAB:RB->asize 4633 | lwz TMP1, TAB:RB->asize
4023 | lwz TMP2, TAB:RB->array 4634 | lwz TMP2, TAB:RB->array
4024 | cmplw TMP0, TMP1; bge ->vmeta_tgetb 4635 | cmplw TMP0, TMP1; bge ->vmeta_tgetb
4636 |.if FPU
4025 | lwzx TMP1, TMP2, RC 4637 | lwzx TMP1, TMP2, RC
4026 | lfdx f0, TMP2, RC 4638 | lfdx f0, TMP2, RC
4639 |.else
4640 | lwzux TMP1, TMP2, RC
4641 | lwz TMP3, 4(TMP2)
4642 |.endif
4027 | checknil TMP1; beq >5 4643 | checknil TMP1; beq >5
4028 |1: 4644 |1:
4029 | ins_next1 4645 | ins_next1
4646 |.if FPU
4030 | stfdx f0, BASE, RA 4647 | stfdx f0, BASE, RA
4648 |.else
4649 | stwux TMP1, RA, BASE
4650 | stw TMP3, 4(RA)
4651 |.endif
4031 | ins_next2 4652 | ins_next2
4032 | 4653 |
4033 |5: // Check for __index if table value is nil. 4654 |5: // Check for __index if table value is nil.
@@ -4039,6 +4660,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4039 | bne <1 // 'no __index' flag set: done. 4660 | bne <1 // 'no __index' flag set: done.
4040 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4661 | b ->vmeta_tgetb // Caveat: preserve TMP0!
4041 break; 4662 break;
4663 case BC_TGETR:
4664 | // RA = dst*8, RB = table*8, RC = key*8
4665 | add RB, BASE, RB
4666 | lwz TAB:CARG1, 4(RB)
4667 |.if DUALNUM
4668 | add RC, BASE, RC
4669 | lwz TMP0, TAB:CARG1->asize
4670 | lwz CARG2, 4(RC)
4671 | lwz TMP1, TAB:CARG1->array
4672 |.else
4673 | lfdx f0, BASE, RC
4674 | lwz TMP0, TAB:CARG1->asize
4675 | toint CARG2, f0
4676 | lwz TMP1, TAB:CARG1->array
4677 |.endif
4678 | cmplw TMP0, CARG2
4679 | slwi TMP2, CARG2, 3
4680 | ble ->vmeta_tgetr // In array part?
4681 |.if FPU
4682 | lfdx f14, TMP1, TMP2
4683 |.else
4684 | lwzux SAVE0, TMP2, TMP1
4685 | lwz SAVE1, 4(TMP2)
4686 |.endif
4687 |->BC_TGETR_Z:
4688 | ins_next1
4689 |.if FPU
4690 | stfdx f14, BASE, RA
4691 |.else
4692 | stwux SAVE0, RA, BASE
4693 | stw SAVE1, 4(RA)
4694 |.endif
4695 | ins_next2
4696 break;
4042 4697
4043 case BC_TSETV: 4698 case BC_TSETV:
4044 | // RA = src*8, RB = table*8, RC = key*8 4699 | // RA = src*8, RB = table*8, RC = key*8
@@ -4077,11 +4732,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4077 | ble ->vmeta_tsetv // Integer key and in array part? 4732 | ble ->vmeta_tsetv // Integer key and in array part?
4078 | lwzx TMP2, TMP1, TMP0 4733 | lwzx TMP2, TMP1, TMP0
4079 | lbz TMP3, TAB:RB->marked 4734 | lbz TMP3, TAB:RB->marked
4735 |.if FPU
4080 | lfdx f14, BASE, RA 4736 | lfdx f14, BASE, RA
4737 |.else
4738 | add SAVE1, BASE, RA
4739 | lwz SAVE0, 0(SAVE1)
4740 | lwz SAVE1, 4(SAVE1)
4741 |.endif
4081 | checknil TMP2; beq >3 4742 | checknil TMP2; beq >3
4082 |1: 4743 |1:
4083 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) 4744 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
4745 |.if FPU
4084 | stfdx f14, TMP1, TMP0 4746 | stfdx f14, TMP1, TMP0
4747 |.else
4748 | stwux SAVE0, TMP1, TMP0
4749 | stw SAVE1, 4(TMP1)
4750 |.endif
4085 | bne >7 4751 | bne >7
4086 |2: 4752 |2:
4087 | ins_next 4753 | ins_next
@@ -4122,7 +4788,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4122 | lwz NODE:TMP2, TAB:RB->node 4788 | lwz NODE:TMP2, TAB:RB->node
4123 | stb ZERO, TAB:RB->nomm // Clear metamethod cache. 4789 | stb ZERO, TAB:RB->nomm // Clear metamethod cache.
4124 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4790 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
4791 |.if FPU
4125 | lfdx f14, BASE, RA 4792 | lfdx f14, BASE, RA
4793 |.else
4794 | add CARG2, BASE, RA
4795 | lwz SAVE0, 0(CARG2)
4796 | lwz SAVE1, 4(CARG2)
4797 |.endif
4126 | slwi TMP0, TMP1, 5 4798 | slwi TMP0, TMP1, 5
4127 | slwi TMP1, TMP1, 3 4799 | slwi TMP1, TMP1, 3
4128 | sub TMP1, TMP0, TMP1 4800 | sub TMP1, TMP0, TMP1
@@ -4138,7 +4810,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4138 | checknil CARG2; beq >4 // Key found, but nil value? 4810 | checknil CARG2; beq >4 // Key found, but nil value?
4139 |2: 4811 |2:
4140 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4812 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4813 |.if FPU
4141 | stfd f14, NODE:TMP2->val 4814 | stfd f14, NODE:TMP2->val
4815 |.else
4816 | stw SAVE0, NODE:TMP2->val.u32.hi
4817 | stw SAVE1, NODE:TMP2->val.u32.lo
4818 |.endif
4142 | bne >7 4819 | bne >7
4143 |3: 4820 |3:
4144 | ins_next 4821 | ins_next
@@ -4177,7 +4854,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4177 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) 4854 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
4178 | // Returns TValue *. 4855 | // Returns TValue *.
4179 | lp BASE, L->base 4856 | lp BASE, L->base
4857 |.if FPU
4180 | stfd f14, 0(CRET1) 4858 | stfd f14, 0(CRET1)
4859 |.else
4860 | stw SAVE0, 0(CRET1)
4861 | stw SAVE1, 4(CRET1)
4862 |.endif
4181 | b <3 // No 2nd write barrier needed. 4863 | b <3 // No 2nd write barrier needed.
4182 | 4864 |
4183 |7: // Possible table write barrier for the value. Skip valiswhite check. 4865 |7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -4194,13 +4876,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4194 | lwz TMP2, TAB:RB->array 4876 | lwz TMP2, TAB:RB->array
4195 | lbz TMP3, TAB:RB->marked 4877 | lbz TMP3, TAB:RB->marked
4196 | cmplw TMP0, TMP1 4878 | cmplw TMP0, TMP1
4879 |.if FPU
4197 | lfdx f14, BASE, RA 4880 | lfdx f14, BASE, RA
4881 |.else
4882 | add CARG2, BASE, RA
4883 | lwz SAVE0, 0(CARG2)
4884 | lwz SAVE1, 4(CARG2)
4885 |.endif
4198 | bge ->vmeta_tsetb 4886 | bge ->vmeta_tsetb
4199 | lwzx TMP1, TMP2, RC 4887 | lwzx TMP1, TMP2, RC
4200 | checknil TMP1; beq >5 4888 | checknil TMP1; beq >5
4201 |1: 4889 |1:
4202 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4890 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4891 |.if FPU
4203 | stfdx f14, TMP2, RC 4892 | stfdx f14, TMP2, RC
4893 |.else
4894 | stwux SAVE0, RC, TMP2
4895 | stw SAVE1, 4(RC)
4896 |.endif
4204 | bne >7 4897 | bne >7
4205 |2: 4898 |2:
4206 | ins_next 4899 | ins_next
@@ -4218,6 +4911,49 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4218 | barrierback TAB:RB, TMP3, TMP0 4911 | barrierback TAB:RB, TMP3, TMP0
4219 | b <2 4912 | b <2
4220 break; 4913 break;
4914 case BC_TSETR:
4915 | // RA = dst*8, RB = table*8, RC = key*8
4916 | add RB, BASE, RB
4917 | lwz TAB:CARG2, 4(RB)
4918 |.if DUALNUM
4919 | add RC, BASE, RC
4920 | lbz TMP3, TAB:CARG2->marked
4921 | lwz TMP0, TAB:CARG2->asize
4922 | lwz CARG3, 4(RC)
4923 | lwz TMP1, TAB:CARG2->array
4924 |.else
4925 | lfdx f0, BASE, RC
4926 | lbz TMP3, TAB:CARG2->marked
4927 | lwz TMP0, TAB:CARG2->asize
4928 | toint CARG3, f0
4929 | lwz TMP1, TAB:CARG2->array
4930 |.endif
4931 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
4932 | bne >7
4933 |2:
4934 | cmplw TMP0, CARG3
4935 | slwi TMP2, CARG3, 3
4936 |.if FPU
4937 | lfdx f14, BASE, RA
4938 |.else
4939 | lwzux SAVE0, RA, BASE
4940 | lwz SAVE1, 4(RA)
4941 |.endif
4942 | ble ->vmeta_tsetr // In array part?
4943 | ins_next1
4944 |.if FPU
4945 | stfdx f14, TMP1, TMP2
4946 |.else
4947 | stwux SAVE0, TMP1, TMP2
4948 | stw SAVE1, 4(TMP1)
4949 |.endif
4950 | ins_next2
4951 |
4952 |7: // Possible table write barrier for the value. Skip valiswhite check.
4953 | barrierback TAB:CARG2, TMP3, TMP2
4954 | b <2
4955 break;
4956
4221 4957
4222 case BC_TSETM: 4958 case BC_TSETM:
4223 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4959 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -4240,10 +4976,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4240 | add TMP1, TMP1, TMP0 4976 | add TMP1, TMP1, TMP0
4241 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4977 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4242 |3: // Copy result slots to table. 4978 |3: // Copy result slots to table.
4979 |.if FPU
4243 | lfd f0, 0(RA) 4980 | lfd f0, 0(RA)
4981 |.else
4982 | lwz SAVE0, 0(RA)
4983 | lwz SAVE1, 4(RA)
4984 |.endif
4244 | addi RA, RA, 8 4985 | addi RA, RA, 8
4245 | cmpw cr1, RA, TMP2 4986 | cmpw cr1, RA, TMP2
4987 |.if FPU
4246 | stfd f0, 0(TMP1) 4988 | stfd f0, 0(TMP1)
4989 |.else
4990 | stw SAVE0, 0(TMP1)
4991 | stw SAVE1, 4(TMP1)
4992 |.endif
4247 | addi TMP1, TMP1, 8 4993 | addi TMP1, TMP1, 8
4248 | blt cr1, <3 4994 | blt cr1, <3
4249 | bne >7 4995 | bne >7
@@ -4310,9 +5056,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4310 | beq cr1, >3 5056 | beq cr1, >3
4311 |2: 5057 |2:
4312 | addi TMP3, TMP2, 8 5058 | addi TMP3, TMP2, 8
5059 |.if FPU
4313 | lfdx f0, RA, TMP2 5060 | lfdx f0, RA, TMP2
5061 |.else
5062 | add CARG3, RA, TMP2
5063 | lwz CARG1, 0(CARG3)
5064 | lwz CARG2, 4(CARG3)
5065 |.endif
4314 | cmplw cr1, TMP3, NARGS8:RC 5066 | cmplw cr1, TMP3, NARGS8:RC
5067 |.if FPU
4315 | stfdx f0, BASE, TMP2 5068 | stfdx f0, BASE, TMP2
5069 |.else
5070 | stwux CARG1, TMP2, BASE
5071 | stw CARG2, 4(TMP2)
5072 |.endif
4316 | mr TMP2, TMP3 5073 | mr TMP2, TMP3
4317 | bne cr1, <2 5074 | bne cr1, <2
4318 |3: 5075 |3:
@@ -4345,14 +5102,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4345 | add BASE, BASE, RA 5102 | add BASE, BASE, RA
4346 | lwz TMP1, -24(BASE) 5103 | lwz TMP1, -24(BASE)
4347 | lwz LFUNC:RB, -20(BASE) 5104 | lwz LFUNC:RB, -20(BASE)
5105 |.if FPU
4348 | lfd f1, -8(BASE) 5106 | lfd f1, -8(BASE)
4349 | lfd f0, -16(BASE) 5107 | lfd f0, -16(BASE)
5108 |.else
5109 | lwz CARG1, -8(BASE)
5110 | lwz CARG2, -4(BASE)
5111 | lwz CARG3, -16(BASE)
5112 | lwz CARG4, -12(BASE)
5113 |.endif
4350 | stw TMP1, 0(BASE) // Copy callable. 5114 | stw TMP1, 0(BASE) // Copy callable.
4351 | stw LFUNC:RB, 4(BASE) 5115 | stw LFUNC:RB, 4(BASE)
4352 | checkfunc TMP1 5116 | checkfunc TMP1
4353 | stfd f1, 16(BASE) // Copy control var.
4354 | li NARGS8:RC, 16 // Iterators get 2 arguments. 5117 | li NARGS8:RC, 16 // Iterators get 2 arguments.
5118 |.if FPU
5119 | stfd f1, 16(BASE) // Copy control var.
4355 | stfdu f0, 8(BASE) // Copy state. 5120 | stfdu f0, 8(BASE) // Copy state.
5121 |.else
5122 | stw CARG1, 16(BASE) // Copy control var.
5123 | stw CARG2, 20(BASE)
5124 | stwu CARG3, 8(BASE) // Copy state.
5125 | stw CARG4, 4(BASE)
5126 |.endif
4356 | bne ->vmeta_call 5127 | bne ->vmeta_call
4357 | ins_call 5128 | ins_call
4358 break; 5129 break;
@@ -4373,7 +5144,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4373 | slwi TMP3, RC, 3 5144 | slwi TMP3, RC, 3
4374 | bge >5 // Index points after array part? 5145 | bge >5 // Index points after array part?
4375 | lwzx TMP2, TMP1, TMP3 5146 | lwzx TMP2, TMP1, TMP3
5147 |.if FPU
4376 | lfdx f0, TMP1, TMP3 5148 | lfdx f0, TMP1, TMP3
5149 |.else
5150 | lwzux CARG1, TMP3, TMP1
5151 | lwz CARG2, 4(TMP3)
5152 |.endif
4377 | checknil TMP2 5153 | checknil TMP2
4378 | lwz INS, -4(PC) 5154 | lwz INS, -4(PC)
4379 | beq >4 5155 | beq >4
@@ -4385,7 +5161,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4385 |.endif 5161 |.endif
4386 | addi RC, RC, 1 5162 | addi RC, RC, 1
4387 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) 5163 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
5164 |.if FPU
4388 | stfd f0, 8(RA) 5165 | stfd f0, 8(RA)
5166 |.else
5167 | stw CARG1, 8(RA)
5168 | stw CARG2, 12(RA)
5169 |.endif
4389 | decode_RD4 TMP1, INS 5170 | decode_RD4 TMP1, INS
4390 | stw RC, -4(RA) // Update control var. 5171 | stw RC, -4(RA) // Update control var.
4391 | add PC, TMP1, TMP3 5172 | add PC, TMP1, TMP3
@@ -4410,17 +5191,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4410 | slwi RB, RC, 3 5191 | slwi RB, RC, 3
4411 | sub TMP3, TMP3, RB 5192 | sub TMP3, TMP3, RB
4412 | lwzx RB, TMP2, TMP3 5193 | lwzx RB, TMP2, TMP3
5194 |.if FPU
4413 | lfdx f0, TMP2, TMP3 5195 | lfdx f0, TMP2, TMP3
5196 |.else
5197 | add CARG3, TMP2, TMP3
5198 | lwz CARG1, 0(CARG3)
5199 | lwz CARG2, 4(CARG3)
5200 |.endif
4414 | add NODE:TMP3, TMP2, TMP3 5201 | add NODE:TMP3, TMP2, TMP3
4415 | checknil RB 5202 | checknil RB
4416 | lwz INS, -4(PC) 5203 | lwz INS, -4(PC)
4417 | beq >7 5204 | beq >7
5205 |.if FPU
4418 | lfd f1, NODE:TMP3->key 5206 | lfd f1, NODE:TMP3->key
5207 |.else
5208 | lwz CARG3, NODE:TMP3->key.u32.hi
5209 | lwz CARG4, NODE:TMP3->key.u32.lo
5210 |.endif
4419 | addis TMP2, PC, -(BCBIAS_J*4 >> 16) 5211 | addis TMP2, PC, -(BCBIAS_J*4 >> 16)
5212 |.if FPU
4420 | stfd f0, 8(RA) 5213 | stfd f0, 8(RA)
5214 |.else
5215 | stw CARG1, 8(RA)
5216 | stw CARG2, 12(RA)
5217 |.endif
4421 | add RC, RC, TMP0 5218 | add RC, RC, TMP0
4422 | decode_RD4 TMP1, INS 5219 | decode_RD4 TMP1, INS
5220 |.if FPU
4423 | stfd f1, 0(RA) 5221 | stfd f1, 0(RA)
5222 |.else
5223 | stw CARG3, 0(RA)
5224 | stw CARG4, 4(RA)
5225 |.endif
4424 | addi RC, RC, 1 5226 | addi RC, RC, 1
4425 | add PC, TMP1, TMP2 5227 | add PC, TMP1, TMP2
4426 | stw RC, -4(RA) // Update control var. 5228 | stw RC, -4(RA) // Update control var.
@@ -4486,9 +5288,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4486 | subi TMP2, TMP2, 16 5288 | subi TMP2, TMP2, 16
4487 | ble >2 // No vararg slots? 5289 | ble >2 // No vararg slots?
4488 |1: // Copy vararg slots to destination slots. 5290 |1: // Copy vararg slots to destination slots.
5291 |.if FPU
4489 | lfd f0, 0(RC) 5292 | lfd f0, 0(RC)
5293 |.else
5294 | lwz CARG1, 0(RC)
5295 | lwz CARG2, 4(RC)
5296 |.endif
4490 | addi RC, RC, 8 5297 | addi RC, RC, 8
5298 |.if FPU
4491 | stfd f0, 0(RA) 5299 | stfd f0, 0(RA)
5300 |.else
5301 | stw CARG1, 0(RA)
5302 | stw CARG2, 4(RA)
5303 |.endif
4492 | cmplw RA, TMP2 5304 | cmplw RA, TMP2
4493 | cmplw cr1, RC, TMP3 5305 | cmplw cr1, RC, TMP3
4494 | bge >3 // All destination slots filled? 5306 | bge >3 // All destination slots filled?
@@ -4511,9 +5323,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4511 | addi MULTRES, TMP1, 8 5323 | addi MULTRES, TMP1, 8
4512 | bgt >7 5324 | bgt >7
4513 |6: 5325 |6:
5326 |.if FPU
4514 | lfd f0, 0(RC) 5327 | lfd f0, 0(RC)
5328 |.else
5329 | lwz CARG1, 0(RC)
5330 | lwz CARG2, 4(RC)
5331 |.endif
4515 | addi RC, RC, 8 5332 | addi RC, RC, 8
5333 |.if FPU
4516 | stfd f0, 0(RA) 5334 | stfd f0, 0(RA)
5335 |.else
5336 | stw CARG1, 0(RA)
5337 | stw CARG2, 4(RA)
5338 |.endif
4517 | cmplw RC, TMP3 5339 | cmplw RC, TMP3
4518 | addi RA, RA, 8 5340 | addi RA, RA, 8
4519 | blt <6 // More vararg slots? 5341 | blt <6 // More vararg slots?
@@ -4564,14 +5386,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4564 | li TMP1, 0 5386 | li TMP1, 0
4565 |2: 5387 |2:
4566 | addi TMP3, TMP1, 8 5388 | addi TMP3, TMP1, 8
5389 |.if FPU
4567 | lfdx f0, RA, TMP1 5390 | lfdx f0, RA, TMP1
5391 |.else
5392 | add CARG3, RA, TMP1
5393 | lwz CARG1, 0(CARG3)
5394 | lwz CARG2, 4(CARG3)
5395 |.endif
4568 | cmpw TMP3, RC 5396 | cmpw TMP3, RC
5397 |.if FPU
4569 | stfdx f0, TMP2, TMP1 5398 | stfdx f0, TMP2, TMP1
5399 |.else
5400 | add CARG3, TMP2, TMP1
5401 | stw CARG1, 0(CARG3)
5402 | stw CARG2, 4(CARG3)
5403 |.endif
4570 | beq >3 5404 | beq >3
4571 | addi TMP1, TMP3, 8 5405 | addi TMP1, TMP3, 8
5406 |.if FPU
4572 | lfdx f1, RA, TMP3 5407 | lfdx f1, RA, TMP3
5408 |.else
5409 | add CARG3, RA, TMP3
5410 | lwz CARG1, 0(CARG3)
5411 | lwz CARG2, 4(CARG3)
5412 |.endif
4573 | cmpw TMP1, RC 5413 | cmpw TMP1, RC
5414 |.if FPU
4574 | stfdx f1, TMP2, TMP3 5415 | stfdx f1, TMP2, TMP3
5416 |.else
5417 | add CARG3, TMP2, TMP3
5418 | stw CARG1, 0(CARG3)
5419 | stw CARG2, 4(CARG3)
5420 |.endif
4575 | bne <2 5421 | bne <2
4576 |3: 5422 |3:
4577 |5: 5423 |5:
@@ -4613,8 +5459,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4613 | subi TMP2, BASE, 8 5459 | subi TMP2, BASE, 8
4614 | decode_RB8 RB, INS 5460 | decode_RB8 RB, INS
4615 if (op == BC_RET1) { 5461 if (op == BC_RET1) {
5462 |.if FPU
4616 | lfd f0, 0(RA) 5463 | lfd f0, 0(RA)
4617 | stfd f0, 0(TMP2) 5464 | stfd f0, 0(TMP2)
5465 |.else
5466 | lwz CARG1, 0(RA)
5467 | lwz CARG2, 4(RA)
5468 | stw CARG1, 0(TMP2)
5469 | stw CARG2, 4(TMP2)
5470 |.endif
4618 } 5471 }
4619 |5: 5472 |5:
4620 | cmplw RB, RD 5473 | cmplw RB, RD
@@ -4675,11 +5528,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4675 |4: 5528 |4:
4676 | stw CARG1, FORL_IDX*8+4(RA) 5529 | stw CARG1, FORL_IDX*8+4(RA)
4677 } else { 5530 } else {
4678 | lwz TMP3, FORL_STEP*8(RA) 5531 | lwz SAVE0, FORL_STEP*8(RA)
4679 | lwz CARG3, FORL_STEP*8+4(RA) 5532 | lwz CARG3, FORL_STEP*8+4(RA)
4680 | lwz TMP2, FORL_STOP*8(RA) 5533 | lwz TMP2, FORL_STOP*8(RA)
4681 | lwz CARG2, FORL_STOP*8+4(RA) 5534 | lwz CARG2, FORL_STOP*8+4(RA)
4682 | cmplw cr7, TMP3, TISNUM 5535 | cmplw cr7, SAVE0, TISNUM
4683 | cmplw cr1, TMP2, TISNUM 5536 | cmplw cr1, TMP2, TISNUM
4684 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq 5537 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
4685 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 5538 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
@@ -4722,41 +5575,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4722 if (vk) { 5575 if (vk) {
4723 |.if DUALNUM 5576 |.if DUALNUM
4724 |9: // FP loop. 5577 |9: // FP loop.
5578 |.if FPU
4725 | lfd f1, FORL_IDX*8(RA) 5579 | lfd f1, FORL_IDX*8(RA)
4726 |.else 5580 |.else
5581 | lwz CARG1, FORL_IDX*8(RA)
5582 | lwz CARG2, FORL_IDX*8+4(RA)
5583 |.endif
5584 |.else
4727 | lfdux f1, RA, BASE 5585 | lfdux f1, RA, BASE
4728 |.endif 5586 |.endif
5587 |.if FPU
4729 | lfd f3, FORL_STEP*8(RA) 5588 | lfd f3, FORL_STEP*8(RA)
4730 | lfd f2, FORL_STOP*8(RA) 5589 | lfd f2, FORL_STOP*8(RA)
4731 | lwz TMP3, FORL_STEP*8(RA)
4732 | fadd f1, f1, f3 5590 | fadd f1, f1, f3
4733 | stfd f1, FORL_IDX*8(RA) 5591 | stfd f1, FORL_IDX*8(RA)
5592 |.else
5593 | lwz CARG3, FORL_STEP*8(RA)
5594 | lwz CARG4, FORL_STEP*8+4(RA)
5595 | mr SAVE1, RD
5596 | blex __adddf3
5597 | mr RD, SAVE1
5598 | stw CRET1, FORL_IDX*8(RA)
5599 | stw CRET2, FORL_IDX*8+4(RA)
5600 | lwz CARG3, FORL_STOP*8(RA)
5601 | lwz CARG4, FORL_STOP*8+4(RA)
5602 |.endif
5603 | lwz SAVE0, FORL_STEP*8(RA)
4734 } else { 5604 } else {
4735 |.if DUALNUM 5605 |.if DUALNUM
4736 |9: // FP loop. 5606 |9: // FP loop.
4737 |.else 5607 |.else
4738 | lwzux TMP1, RA, BASE 5608 | lwzux TMP1, RA, BASE
4739 | lwz TMP3, FORL_STEP*8(RA) 5609 | lwz SAVE0, FORL_STEP*8(RA)
4740 | lwz TMP2, FORL_STOP*8(RA) 5610 | lwz TMP2, FORL_STOP*8(RA)
4741 | cmplw cr0, TMP1, TISNUM 5611 | cmplw cr0, TMP1, TISNUM
4742 | cmplw cr7, TMP3, TISNUM 5612 | cmplw cr7, SAVE0, TISNUM
4743 | cmplw cr1, TMP2, TISNUM 5613 | cmplw cr1, TMP2, TISNUM
4744 |.endif 5614 |.endif
5615 |.if FPU
4745 | lfd f1, FORL_IDX*8(RA) 5616 | lfd f1, FORL_IDX*8(RA)
5617 |.else
5618 | lwz CARG1, FORL_IDX*8(RA)
5619 | lwz CARG2, FORL_IDX*8+4(RA)
5620 |.endif
4746 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt 5621 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
4747 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 5622 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
5623 |.if FPU
4748 | lfd f2, FORL_STOP*8(RA) 5624 | lfd f2, FORL_STOP*8(RA)
5625 |.else
5626 | lwz CARG3, FORL_STOP*8(RA)
5627 | lwz CARG4, FORL_STOP*8+4(RA)
5628 |.endif
4749 | bge ->vmeta_for 5629 | bge ->vmeta_for
4750 } 5630 }
4751 | cmpwi cr6, TMP3, 0 5631 | cmpwi cr6, SAVE0, 0
4752 if (op != BC_JFORL) { 5632 if (op != BC_JFORL) {
4753 | srwi RD, RD, 1 5633 | srwi RD, RD, 1
4754 } 5634 }
5635 |.if FPU
4755 | stfd f1, FORL_EXT*8(RA) 5636 | stfd f1, FORL_EXT*8(RA)
5637 |.else
5638 | stw CARG1, FORL_EXT*8(RA)
5639 | stw CARG2, FORL_EXT*8+4(RA)
5640 |.endif
4756 if (op != BC_JFORL) { 5641 if (op != BC_JFORL) {
4757 | add RD, PC, RD 5642 | add RD, PC, RD
4758 } 5643 }
5644 |.if FPU
4759 | fcmpu cr0, f1, f2 5645 | fcmpu cr0, f1, f2
5646 |.else
5647 | mr SAVE1, RD
5648 | blex __ledf2
5649 | cmpwi CRET1, 0
5650 | mr RD, SAVE1
5651 |.endif
4760 if (op == BC_JFORI) { 5652 if (op == BC_JFORI) {
4761 | addis PC, RD, -(BCBIAS_J*4 >> 16) 5653 | addis PC, RD, -(BCBIAS_J*4 >> 16)
4762 } 5654 }
@@ -4859,8 +5751,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4859 | lp TMP2, TRACE:TMP2->mcode 5751 | lp TMP2, TRACE:TMP2->mcode
4860 | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) 5752 | stw BASE, DISPATCH_GL(jit_base)(DISPATCH)
4861 | mtctr TMP2 5753 | mtctr TMP2
4862 | stw L, DISPATCH_GL(jit_L)(DISPATCH)
4863 | addi JGL, DISPATCH, GG_DISP2G+32768 5754 | addi JGL, DISPATCH, GG_DISP2G+32768
5755 | stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
4864 | bctr 5756 | bctr
4865 |.endif 5757 |.endif
4866 break; 5758 break;
@@ -4995,6 +5887,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4995 | lp TMP1, L->top 5887 | lp TMP1, L->top
4996 | li_vmstate INTERP 5888 | li_vmstate INTERP
4997 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. 5889 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller.
5890 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
4998 | sub RA, TMP1, RD // RA = L->top - nresults*8 5891 | sub RA, TMP1, RD // RA = L->top - nresults*8
4999 | st_vmstate 5892 | st_vmstate
5000 | b ->vm_returnc 5893 | b ->vm_returnc