aboutsummaryrefslogtreecommitdiff
path: root/src/vm_mips.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm_mips.dasc')
-rw-r--r--src/vm_mips.dasc2492
1 files changed, 1754 insertions, 738 deletions
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index e6b53e0d..37506139 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -1,6 +1,9 @@
1|// Low-level VM code for MIPS CPUs. 1|// Low-level VM code for MIPS CPUs.
2|// Bytecode interpreter, fast functions and helper functions. 2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h 3|// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4|//
5|// MIPS soft-float support contributed by Djordje Kovacevic and
6|// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc.
4| 7|
5|.arch mips 8|.arch mips
6|.section code_op, code_sub 9|.section code_op, code_sub
@@ -18,6 +21,12 @@
18|// Fixed register assignments for the interpreter. 21|// Fixed register assignments for the interpreter.
19|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra 22|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra
20| 23|
24|.macro .FPU, a, b
25|.if FPU
26| a, b
27|.endif
28|.endmacro
29|
21|// The following must be C callee-save (but BASE is often refetched). 30|// The following must be C callee-save (but BASE is often refetched).
22|.define BASE, r16 // Base of current Lua stack frame. 31|.define BASE, r16 // Base of current Lua stack frame.
23|.define KBASE, r17 // Constants of current Lua function. 32|.define KBASE, r17 // Constants of current Lua function.
@@ -25,13 +34,15 @@
25|.define DISPATCH, r19 // Opcode dispatch table. 34|.define DISPATCH, r19 // Opcode dispatch table.
26|.define LREG, r20 // Register holding lua_State (also in SAVE_L). 35|.define LREG, r20 // Register holding lua_State (also in SAVE_L).
27|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. 36|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8.
28|// NYI: r22 currently unused.
29| 37|
30|.define JGL, r30 // On-trace: global_State + 32768. 38|.define JGL, r30 // On-trace: global_State + 32768.
31| 39|
32|// Constants for type-comparisons, stores and conversions. C callee-save. 40|// Constants for type-comparisons, stores and conversions. C callee-save.
41|.define TISNUM, r22
33|.define TISNIL, r30 42|.define TISNIL, r30
43|.if FPU
34|.define TOBIT, f30 // 2^52 + 2^51. 44|.define TOBIT, f30 // 2^52 + 2^51.
45|.endif
35| 46|
36|// The following temporaries are not saved across C calls, except for RA. 47|// The following temporaries are not saved across C calls, except for RA.
37|.define RA, r23 // Callee-save. 48|.define RA, r23 // Callee-save.
@@ -46,7 +57,7 @@
46|.define TMP2, r14 57|.define TMP2, r14
47|.define TMP3, r15 58|.define TMP3, r15
48| 59|
49|// Calling conventions. 60|// MIPS o32 calling convention.
50|.define CFUNCADDR, r25 61|.define CFUNCADDR, r25
51|.define CARG1, r4 62|.define CARG1, r4
52|.define CARG2, r5 63|.define CARG2, r5
@@ -56,13 +67,33 @@
56|.define CRET1, r2 67|.define CRET1, r2
57|.define CRET2, r3 68|.define CRET2, r3
58| 69|
70|.if ENDIAN_LE
71|.define SFRETLO, CRET1
72|.define SFRETHI, CRET2
73|.define SFARG1LO, CARG1
74|.define SFARG1HI, CARG2
75|.define SFARG2LO, CARG3
76|.define SFARG2HI, CARG4
77|.else
78|.define SFRETLO, CRET2
79|.define SFRETHI, CRET1
80|.define SFARG1LO, CARG2
81|.define SFARG1HI, CARG1
82|.define SFARG2LO, CARG4
83|.define SFARG2HI, CARG3
84|.endif
85|
86|.if FPU
59|.define FARG1, f12 87|.define FARG1, f12
60|.define FARG2, f14 88|.define FARG2, f14
61| 89|
62|.define FRET1, f0 90|.define FRET1, f0
63|.define FRET2, f2 91|.define FRET2, f2
92|.endif
64| 93|
65|// Stack layout while in interpreter. Must match with lj_frame.h. 94|// Stack layout while in interpreter. Must match with lj_frame.h.
95|.if FPU // MIPS32 hard-float.
96|
66|.define CFRAME_SPACE, 112 // Delta for sp. 97|.define CFRAME_SPACE, 112 // Delta for sp.
67| 98|
68|.define SAVE_ERRF, 124(sp) // 32 bit C frame info. 99|.define SAVE_ERRF, 124(sp) // 32 bit C frame info.
@@ -72,6 +103,20 @@
72|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. 103|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
73|.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves. 104|.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves.
74|.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves. 105|.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves.
106|
107|.else // MIPS32 soft-float
108|
109|.define CFRAME_SPACE, 64 // Delta for sp.
110|
111|.define SAVE_ERRF, 76(sp) // 32 bit C frame info.
112|.define SAVE_NRES, 72(sp)
113|.define SAVE_CFRAME, 68(sp)
114|.define SAVE_L, 64(sp)
115|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
116|.define SAVE_GPR_, 24 // .. 24+10*4: 32 bit GPR saves.
117|
118|.endif
119|
75|.define SAVE_PC, 20(sp) 120|.define SAVE_PC, 20(sp)
76|.define ARG5, 16(sp) 121|.define ARG5, 16(sp)
77|.define CSAVE_4, 12(sp) 122|.define CSAVE_4, 12(sp)
@@ -83,43 +128,45 @@
83|.define ARG5_OFS, 16 128|.define ARG5_OFS, 16
84|.define SAVE_MULTRES, ARG5 129|.define SAVE_MULTRES, ARG5
85| 130|
131|//-----------------------------------------------------------------------
132|
86|.macro saveregs 133|.macro saveregs
87| addiu sp, sp, -CFRAME_SPACE 134| addiu sp, sp, -CFRAME_SPACE
88| sw ra, SAVE_GPR_+9*4(sp) 135| sw ra, SAVE_GPR_+9*4(sp)
89| sw r30, SAVE_GPR_+8*4(sp) 136| sw r30, SAVE_GPR_+8*4(sp)
90| sdc1 f30, SAVE_FPR_+5*8(sp) 137| .FPU sdc1 f30, SAVE_FPR_+5*8(sp)
91| sw r23, SAVE_GPR_+7*4(sp) 138| sw r23, SAVE_GPR_+7*4(sp)
92| sw r22, SAVE_GPR_+6*4(sp) 139| sw r22, SAVE_GPR_+6*4(sp)
93| sdc1 f28, SAVE_FPR_+4*8(sp) 140| .FPU sdc1 f28, SAVE_FPR_+4*8(sp)
94| sw r21, SAVE_GPR_+5*4(sp) 141| sw r21, SAVE_GPR_+5*4(sp)
95| sw r20, SAVE_GPR_+4*4(sp) 142| sw r20, SAVE_GPR_+4*4(sp)
96| sdc1 f26, SAVE_FPR_+3*8(sp) 143| .FPU sdc1 f26, SAVE_FPR_+3*8(sp)
97| sw r19, SAVE_GPR_+3*4(sp) 144| sw r19, SAVE_GPR_+3*4(sp)
98| sw r18, SAVE_GPR_+2*4(sp) 145| sw r18, SAVE_GPR_+2*4(sp)
99| sdc1 f24, SAVE_FPR_+2*8(sp) 146| .FPU sdc1 f24, SAVE_FPR_+2*8(sp)
100| sw r17, SAVE_GPR_+1*4(sp) 147| sw r17, SAVE_GPR_+1*4(sp)
101| sw r16, SAVE_GPR_+0*4(sp) 148| sw r16, SAVE_GPR_+0*4(sp)
102| sdc1 f22, SAVE_FPR_+1*8(sp) 149| .FPU sdc1 f22, SAVE_FPR_+1*8(sp)
103| sdc1 f20, SAVE_FPR_+0*8(sp) 150| .FPU sdc1 f20, SAVE_FPR_+0*8(sp)
104|.endmacro 151|.endmacro
105| 152|
106|.macro restoreregs_ret 153|.macro restoreregs_ret
107| lw ra, SAVE_GPR_+9*4(sp) 154| lw ra, SAVE_GPR_+9*4(sp)
108| lw r30, SAVE_GPR_+8*4(sp) 155| lw r30, SAVE_GPR_+8*4(sp)
109| ldc1 f30, SAVE_FPR_+5*8(sp) 156| .FPU ldc1 f30, SAVE_FPR_+5*8(sp)
110| lw r23, SAVE_GPR_+7*4(sp) 157| lw r23, SAVE_GPR_+7*4(sp)
111| lw r22, SAVE_GPR_+6*4(sp) 158| lw r22, SAVE_GPR_+6*4(sp)
112| ldc1 f28, SAVE_FPR_+4*8(sp) 159| .FPU ldc1 f28, SAVE_FPR_+4*8(sp)
113| lw r21, SAVE_GPR_+5*4(sp) 160| lw r21, SAVE_GPR_+5*4(sp)
114| lw r20, SAVE_GPR_+4*4(sp) 161| lw r20, SAVE_GPR_+4*4(sp)
115| ldc1 f26, SAVE_FPR_+3*8(sp) 162| .FPU ldc1 f26, SAVE_FPR_+3*8(sp)
116| lw r19, SAVE_GPR_+3*4(sp) 163| lw r19, SAVE_GPR_+3*4(sp)
117| lw r18, SAVE_GPR_+2*4(sp) 164| lw r18, SAVE_GPR_+2*4(sp)
118| ldc1 f24, SAVE_FPR_+2*8(sp) 165| .FPU ldc1 f24, SAVE_FPR_+2*8(sp)
119| lw r17, SAVE_GPR_+1*4(sp) 166| lw r17, SAVE_GPR_+1*4(sp)
120| lw r16, SAVE_GPR_+0*4(sp) 167| lw r16, SAVE_GPR_+0*4(sp)
121| ldc1 f22, SAVE_FPR_+1*8(sp) 168| .FPU ldc1 f22, SAVE_FPR_+1*8(sp)
122| ldc1 f20, SAVE_FPR_+0*8(sp) 169| .FPU ldc1 f20, SAVE_FPR_+0*8(sp)
123| jr ra 170| jr ra
124| addiu sp, sp, CFRAME_SPACE 171| addiu sp, sp, CFRAME_SPACE
125|.endmacro 172|.endmacro
@@ -138,6 +185,7 @@
138|.type NODE, Node 185|.type NODE, Node
139|.type NARGS8, int 186|.type NARGS8, int
140|.type TRACE, GCtrace 187|.type TRACE, GCtrace
188|.type SBUF, SBuf
141| 189|
142|//----------------------------------------------------------------------- 190|//-----------------------------------------------------------------------
143| 191|
@@ -152,13 +200,23 @@
152|//----------------------------------------------------------------------- 200|//-----------------------------------------------------------------------
153| 201|
154|// Endian-specific defines. 202|// Endian-specific defines.
155|.define FRAME_PC, LJ_ENDIAN_SELECT(-4,-8) 203|.if ENDIAN_LE
156|.define FRAME_FUNC, LJ_ENDIAN_SELECT(-8,-4) 204|.define FRAME_PC, -4
157|.define HI, LJ_ENDIAN_SELECT(4,0) 205|.define FRAME_FUNC, -8
158|.define LO, LJ_ENDIAN_SELECT(0,4) 206|.define HI, 4
159|.define OFS_RD, LJ_ENDIAN_SELECT(2,0) 207|.define LO, 0
160|.define OFS_RA, LJ_ENDIAN_SELECT(1,2) 208|.define OFS_RD, 2
161|.define OFS_OP, LJ_ENDIAN_SELECT(0,3) 209|.define OFS_RA, 1
210|.define OFS_OP, 0
211|.else
212|.define FRAME_PC, -8
213|.define FRAME_FUNC, -4
214|.define HI, 0
215|.define LO, 4
216|.define OFS_RD, 0
217|.define OFS_RA, 2
218|.define OFS_OP, 3
219|.endif
162| 220|
163|// Instruction decode. 221|// Instruction decode.
164|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro 222|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
@@ -353,9 +411,11 @@ static void build_subroutines(BuildCtx *ctx)
353 |. sll TMP2, TMP2, 3 411 |. sll TMP2, TMP2, 3
354 |1: 412 |1:
355 | addiu TMP1, TMP1, -8 413 | addiu TMP1, TMP1, -8
356 | ldc1 f0, 0(RA) 414 | lw SFRETHI, HI(RA)
415 | lw SFRETLO, LO(RA)
357 | addiu RA, RA, 8 416 | addiu RA, RA, 8
358 | sdc1 f0, 0(BASE) 417 | sw SFRETHI, HI(BASE)
418 | sw SFRETLO, LO(BASE)
359 | bnez TMP1, <1 419 | bnez TMP1, <1
360 |. addiu BASE, BASE, 8 420 |. addiu BASE, BASE, 8
361 | 421 |
@@ -424,15 +484,16 @@ static void build_subroutines(BuildCtx *ctx)
424 | and sp, CARG1, AT 484 | and sp, CARG1, AT
425 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 485 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
426 | lw L, SAVE_L 486 | lw L, SAVE_L
427 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 487 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
488 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
428 | li TISNIL, LJ_TNIL 489 | li TISNIL, LJ_TNIL
429 | lw BASE, L->base 490 | lw BASE, L->base
430 | lw DISPATCH, L->glref // Setup pointer to dispatch table. 491 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
431 | mtc1 TMP3, TOBIT 492 | .FPU mtc1 TMP3, TOBIT
432 | li TMP1, LJ_TFALSE 493 | li TMP1, LJ_TFALSE
433 | li_vmstate INTERP 494 | li_vmstate INTERP
434 | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame. 495 | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame.
435 | cvt.d.s TOBIT, TOBIT 496 | .FPU cvt.d.s TOBIT, TOBIT
436 | addiu RA, BASE, -8 // Results start at BASE-8. 497 | addiu RA, BASE, -8 // Results start at BASE-8.
437 | addiu DISPATCH, DISPATCH, GG_G2DISP 498 | addiu DISPATCH, DISPATCH, GG_G2DISP
438 | sw TMP1, HI(RA) // Prepend false to error message. 499 | sw TMP1, HI(RA) // Prepend false to error message.
@@ -486,21 +547,23 @@ static void build_subroutines(BuildCtx *ctx)
486 | addiu DISPATCH, DISPATCH, GG_G2DISP 547 | addiu DISPATCH, DISPATCH, GG_G2DISP
487 | sw r0, SAVE_NRES 548 | sw r0, SAVE_NRES
488 | sw r0, SAVE_ERRF 549 | sw r0, SAVE_ERRF
489 | sw TMP0, L->cframe 550 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
490 | sw r0, SAVE_CFRAME 551 | sw r0, SAVE_CFRAME
491 | beqz TMP1, >3 552 | beqz TMP1, >3
492 |. sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 553 |. sw TMP0, L->cframe
493 | 554 |
494 | // Resume after yield (like a return). 555 | // Resume after yield (like a return).
556 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
495 | move RA, BASE 557 | move RA, BASE
496 | lw BASE, L->base 558 | lw BASE, L->base
559 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
497 | lw TMP1, L->top 560 | lw TMP1, L->top
498 | lw PC, FRAME_PC(BASE) 561 | lw PC, FRAME_PC(BASE)
499 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 562 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
500 | subu RD, TMP1, BASE 563 | subu RD, TMP1, BASE
501 | mtc1 TMP3, TOBIT 564 | .FPU mtc1 TMP3, TOBIT
502 | sb r0, L->status 565 | sb r0, L->status
503 | cvt.d.s TOBIT, TOBIT 566 | .FPU cvt.d.s TOBIT, TOBIT
504 | li_vmstate INTERP 567 | li_vmstate INTERP
505 | addiu RD, RD, 8 568 | addiu RD, RD, 8
506 | st_vmstate 569 | st_vmstate
@@ -525,25 +588,27 @@ static void build_subroutines(BuildCtx *ctx)
525 | 588 |
526 |1: // Entry point for vm_pcall above (PC = ftype). 589 |1: // Entry point for vm_pcall above (PC = ftype).
527 | lw TMP1, L:CARG1->cframe 590 | lw TMP1, L:CARG1->cframe
528 | sw CARG3, SAVE_NRES
529 | move L, CARG1 591 | move L, CARG1
530 | sw CARG1, SAVE_L 592 | sw CARG3, SAVE_NRES
531 | move BASE, CARG2
532 | sw sp, L->cframe // Add our C frame to cframe chain.
533 | lw DISPATCH, L->glref // Setup pointer to dispatch table. 593 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
594 | sw CARG1, SAVE_L
595 | move BASE, CARG2
596 | addiu DISPATCH, DISPATCH, GG_G2DISP
534 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 597 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
535 | sw TMP1, SAVE_CFRAME 598 | sw TMP1, SAVE_CFRAME
536 | addiu DISPATCH, DISPATCH, GG_G2DISP 599 | sw sp, L->cframe // Add our C frame to cframe chain.
537 | 600 |
538 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 601 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
602 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
539 | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). 603 | lw TMP2, L->base // TMP2 = old base (used in vmeta_call).
540 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 604 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
605 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
541 | lw TMP1, L->top 606 | lw TMP1, L->top
542 | mtc1 TMP3, TOBIT 607 | .FPU mtc1 TMP3, TOBIT
543 | addu PC, PC, BASE 608 | addu PC, PC, BASE
544 | subu NARGS8:RC, TMP1, BASE 609 | subu NARGS8:RC, TMP1, BASE
545 | subu PC, PC, TMP2 // PC = frame delta + frame type 610 | subu PC, PC, TMP2 // PC = frame delta + frame type
546 | cvt.d.s TOBIT, TOBIT 611 | .FPU cvt.d.s TOBIT, TOBIT
547 | li_vmstate INTERP 612 | li_vmstate INTERP
548 | li TISNIL, LJ_TNIL 613 | li TISNIL, LJ_TNIL
549 | st_vmstate 614 | st_vmstate
@@ -566,20 +631,21 @@ static void build_subroutines(BuildCtx *ctx)
566 | lw TMP0, L:CARG1->stack 631 | lw TMP0, L:CARG1->stack
567 | sw CARG1, SAVE_L 632 | sw CARG1, SAVE_L
568 | lw TMP1, L->top 633 | lw TMP1, L->top
634 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
569 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 635 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
570 | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). 636 | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
571 | lw TMP1, L->cframe 637 | lw TMP1, L->cframe
572 | sw sp, L->cframe // Add our C frame to cframe chain. 638 | addiu DISPATCH, DISPATCH, GG_G2DISP
573 | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. 639 | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
574 | sw r0, SAVE_ERRF // No error function. 640 | sw r0, SAVE_ERRF // No error function.
575 | move CFUNCADDR, CARG4 641 | sw TMP1, SAVE_CFRAME
642 | sw sp, L->cframe // Add our C frame to cframe chain.
643 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
576 | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) 644 | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
577 |. sw TMP1, SAVE_CFRAME 645 |. move CFUNCADDR, CARG4
578 | move BASE, CRET1 646 | move BASE, CRET1
579 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
580 | li PC, FRAME_CP
581 | bnez CRET1, <3 // Else continue with the call. 647 | bnez CRET1, <3 // Else continue with the call.
582 |. addiu DISPATCH, DISPATCH, GG_G2DISP 648 |. li PC, FRAME_CP
583 | b ->vm_leave_cp // No base? Just remove C frame. 649 | b ->vm_leave_cp // No base? Just remove C frame.
584 |. nop 650 |. nop
585 | 651 |
@@ -624,7 +690,8 @@ static void build_subroutines(BuildCtx *ctx)
624 |->cont_cat: // RA = resultptr, RB = meta base 690 |->cont_cat: // RA = resultptr, RB = meta base
625 | lw INS, -4(PC) 691 | lw INS, -4(PC)
626 | addiu CARG2, RB, -16 692 | addiu CARG2, RB, -16
627 | ldc1 f0, 0(RA) 693 | lw SFRETHI, HI(RA)
694 | lw SFRETLO, LO(RA)
628 | decode_RB8a MULTRES, INS 695 | decode_RB8a MULTRES, INS
629 | decode_RA8a RA, INS 696 | decode_RA8a RA, INS
630 | decode_RB8b MULTRES 697 | decode_RB8b MULTRES
@@ -632,11 +699,13 @@ static void build_subroutines(BuildCtx *ctx)
632 | addu TMP1, BASE, MULTRES 699 | addu TMP1, BASE, MULTRES
633 | sw BASE, L->base 700 | sw BASE, L->base
634 | subu CARG3, CARG2, TMP1 701 | subu CARG3, CARG2, TMP1
702 | sw SFRETHI, HI(CARG2)
635 | bne TMP1, CARG2, ->BC_CAT_Z 703 | bne TMP1, CARG2, ->BC_CAT_Z
636 |. sdc1 f0, 0(CARG2) 704 |. sw SFRETLO, LO(CARG2)
637 | addu RA, BASE, RA 705 | addu RA, BASE, RA
706 | sw SFRETHI, HI(RA)
638 | b ->cont_nop 707 | b ->cont_nop
639 |. sdc1 f0, 0(RA) 708 |. sw SFRETLO, LO(RA)
640 | 709 |
641 |//-- Table indexing metamethods ----------------------------------------- 710 |//-- Table indexing metamethods -----------------------------------------
642 | 711 |
@@ -659,10 +728,9 @@ static void build_subroutines(BuildCtx *ctx)
659 |. sw TMP1, HI(CARG3) 728 |. sw TMP1, HI(CARG3)
660 | 729 |
661 |->vmeta_tgetb: // TMP0 = index 730 |->vmeta_tgetb: // TMP0 = index
662 | mtc1 TMP0, f0
663 | cvt.d.w f0, f0
664 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 731 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
665 | sdc1 f0, 0(CARG3) 732 | sw TMP0, LO(CARG3)
733 | sw TISNUM, HI(CARG3)
666 | 734 |
667 |->vmeta_tgetv: 735 |->vmeta_tgetv:
668 |1: 736 |1:
@@ -674,9 +742,11 @@ static void build_subroutines(BuildCtx *ctx)
674 | // Returns TValue * (finished) or NULL (metamethod). 742 | // Returns TValue * (finished) or NULL (metamethod).
675 | beqz CRET1, >3 743 | beqz CRET1, >3
676 |. addiu TMP1, BASE, -FRAME_CONT 744 |. addiu TMP1, BASE, -FRAME_CONT
677 | ldc1 f0, 0(CRET1) 745 | lw SFARG1HI, HI(CRET1)
746 | lw SFARG2HI, LO(CRET1)
678 | ins_next1 747 | ins_next1
679 | sdc1 f0, 0(RA) 748 | sw SFARG1HI, HI(RA)
749 | sw SFARG2HI, LO(RA)
680 | ins_next2 750 | ins_next2
681 | 751 |
682 |3: // Call __index metamethod. 752 |3: // Call __index metamethod.
@@ -688,6 +758,17 @@ static void build_subroutines(BuildCtx *ctx)
688 | b ->vm_call_dispatch_f 758 | b ->vm_call_dispatch_f
689 |. li NARGS8:RC, 16 // 2 args for func(t, k). 759 |. li NARGS8:RC, 16 // 2 args for func(t, k).
690 | 760 |
761 |->vmeta_tgetr:
762 | load_got lj_tab_getinth
763 | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
764 |. nop
765 | // Returns cTValue * or NULL.
766 | beqz CRET1, ->BC_TGETR_Z
767 |. move SFARG2HI, TISNIL
768 | lw SFARG2HI, HI(CRET1)
769 | b ->BC_TGETR_Z
770 |. lw SFARG2LO, LO(CRET1)
771 |
691 |//----------------------------------------------------------------------- 772 |//-----------------------------------------------------------------------
692 | 773 |
693 |->vmeta_tsets1: 774 |->vmeta_tsets1:
@@ -709,10 +790,9 @@ static void build_subroutines(BuildCtx *ctx)
709 |. sw TMP1, HI(CARG3) 790 |. sw TMP1, HI(CARG3)
710 | 791 |
711 |->vmeta_tsetb: // TMP0 = index 792 |->vmeta_tsetb: // TMP0 = index
712 | mtc1 TMP0, f0
713 | cvt.d.w f0, f0
714 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 793 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
715 | sdc1 f0, 0(CARG3) 794 | sw TMP0, LO(CARG3)
795 | sw TISNUM, HI(CARG3)
716 | 796 |
717 |->vmeta_tsetv: 797 |->vmeta_tsetv:
718 |1: 798 |1:
@@ -722,11 +802,13 @@ static void build_subroutines(BuildCtx *ctx)
722 | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 802 | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
723 |. move CARG1, L 803 |. move CARG1, L
724 | // Returns TValue * (finished) or NULL (metamethod). 804 | // Returns TValue * (finished) or NULL (metamethod).
805 | lw SFARG1HI, HI(RA)
725 | beqz CRET1, >3 806 | beqz CRET1, >3
726 |. ldc1 f0, 0(RA) 807 |. lw SFARG1LO, LO(RA)
727 | // NOBARRIER: lj_meta_tset ensures the table is not black. 808 | // NOBARRIER: lj_meta_tset ensures the table is not black.
728 | ins_next1 809 | ins_next1
729 | sdc1 f0, 0(CRET1) 810 | sw SFARG1HI, HI(CRET1)
811 | sw SFARG1LO, LO(CRET1)
730 | ins_next2 812 | ins_next2
731 | 813 |
732 |3: // Call __newindex metamethod. 814 |3: // Call __newindex metamethod.
@@ -736,14 +818,27 @@ static void build_subroutines(BuildCtx *ctx)
736 | sw PC, -16+HI(BASE) // [cont|PC] 818 | sw PC, -16+HI(BASE) // [cont|PC]
737 | subu PC, BASE, TMP1 819 | subu PC, BASE, TMP1
738 | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 820 | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
739 | sdc1 f0, 16(BASE) // Copy value to third argument. 821 | sw SFARG1HI, 16+HI(BASE) // Copy value to third argument.
822 | sw SFARG1LO, 16+LO(BASE)
740 | b ->vm_call_dispatch_f 823 | b ->vm_call_dispatch_f
741 |. li NARGS8:RC, 24 // 3 args for func(t, k, v) 824 |. li NARGS8:RC, 24 // 3 args for func(t, k, v)
742 | 825 |
826 |->vmeta_tsetr:
827 | load_got lj_tab_setinth
828 | sw BASE, L->base
829 | sw PC, SAVE_PC
830 | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
831 |. move CARG1, L
832 | // Returns TValue *.
833 | b ->BC_TSETR_Z
834 |. nop
835 |
743 |//-- Comparison metamethods --------------------------------------------- 836 |//-- Comparison metamethods ---------------------------------------------
744 | 837 |
745 |->vmeta_comp: 838 |->vmeta_comp:
746 | // CARG2, CARG3 are already set by BC_ISLT/BC_ISGE/BC_ISLE/BC_ISGT. 839 | // RA/RD point to o1/o2.
840 | move CARG2, RA
841 | move CARG3, RD
747 | load_got lj_meta_comp 842 | load_got lj_meta_comp
748 | addiu PC, PC, -4 843 | addiu PC, PC, -4
749 | sw BASE, L->base 844 | sw BASE, L->base
@@ -769,11 +864,13 @@ static void build_subroutines(BuildCtx *ctx)
769 | 864 |
770 |->cont_ra: // RA = resultptr 865 |->cont_ra: // RA = resultptr
771 | lbu TMP1, -4+OFS_RA(PC) 866 | lbu TMP1, -4+OFS_RA(PC)
772 | ldc1 f0, 0(RA) 867 | lw SFRETHI, HI(RA)
868 | lw SFRETLO, LO(RA)
773 | sll TMP1, TMP1, 3 869 | sll TMP1, TMP1, 3
774 | addu TMP1, BASE, TMP1 870 | addu TMP1, BASE, TMP1
871 | sw SFRETHI, HI(TMP1)
775 | b ->cont_nop 872 | b ->cont_nop
776 |. sdc1 f0, 0(TMP1) 873 |. sw SFRETLO, LO(TMP1)
777 | 874 |
778 |->cont_condt: // RA = resultptr 875 |->cont_condt: // RA = resultptr
779 | lw TMP0, HI(RA) 876 | lw TMP0, HI(RA)
@@ -788,8 +885,11 @@ static void build_subroutines(BuildCtx *ctx)
788 |. addiu TMP2, AT, -1 // Branch if result is false. 885 |. addiu TMP2, AT, -1 // Branch if result is false.
789 | 886 |
790 |->vmeta_equal: 887 |->vmeta_equal:
791 | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. 888 | // SFARG1LO/SFARG2LO point to o1/o2. TMP0 is set to 0/1.
792 | load_got lj_meta_equal 889 | load_got lj_meta_equal
890 | move CARG2, SFARG1LO
891 | move CARG3, SFARG2LO
892 | move CARG4, TMP0
793 | addiu PC, PC, -4 893 | addiu PC, PC, -4
794 | sw BASE, L->base 894 | sw BASE, L->base
795 | sw PC, SAVE_PC 895 | sw PC, SAVE_PC
@@ -813,17 +913,31 @@ static void build_subroutines(BuildCtx *ctx)
813 |. nop 913 |. nop
814 |.endif 914 |.endif
815 | 915 |
916 |->vmeta_istype:
917 | load_got lj_meta_istype
918 | addiu PC, PC, -4
919 | sw BASE, L->base
920 | srl CARG2, RA, 3
921 | srl CARG3, RD, 3
922 | sw PC, SAVE_PC
923 | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
924 |. move CARG1, L
925 | b ->cont_nop
926 |. nop
927 |
816 |//-- Arithmetic metamethods --------------------------------------------- 928 |//-- Arithmetic metamethods ---------------------------------------------
817 | 929 |
818 |->vmeta_unm: 930 |->vmeta_unm:
819 | move CARG4, CARG3 931 | move RC, RB
820 | 932 |
821 |->vmeta_arith: 933 |->vmeta_arith:
822 | load_got lj_meta_arith 934 | load_got lj_meta_arith
823 | decode_OP1 TMP0, INS 935 | decode_OP1 TMP0, INS
824 | sw BASE, L->base 936 | sw BASE, L->base
825 | sw PC, SAVE_PC
826 | move CARG2, RA 937 | move CARG2, RA
938 | sw PC, SAVE_PC
939 | move CARG3, RB
940 | move CARG4, RC
827 | sw TMP0, ARG5 941 | sw TMP0, ARG5
828 | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) 942 | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
829 |. move CARG1, L 943 |. move CARG1, L
@@ -931,40 +1045,52 @@ static void build_subroutines(BuildCtx *ctx)
931 | 1045 |
932 |.macro .ffunc_1, name 1046 |.macro .ffunc_1, name
933 |->ff_ .. name: 1047 |->ff_ .. name:
1048 | lw SFARG1HI, HI(BASE)
934 | beqz NARGS8:RC, ->fff_fallback 1049 | beqz NARGS8:RC, ->fff_fallback
935 |. lw CARG3, HI(BASE) 1050 |. lw SFARG1LO, LO(BASE)
936 | lw CARG1, LO(BASE)
937 |.endmacro 1051 |.endmacro
938 | 1052 |
939 |.macro .ffunc_2, name 1053 |.macro .ffunc_2, name
940 |->ff_ .. name: 1054 |->ff_ .. name:
941 | sltiu AT, NARGS8:RC, 16 1055 | sltiu AT, NARGS8:RC, 16
942 | lw CARG3, HI(BASE) 1056 | lw SFARG1HI, HI(BASE)
943 | bnez AT, ->fff_fallback 1057 | bnez AT, ->fff_fallback
944 |. lw CARG4, 8+HI(BASE) 1058 |. lw SFARG2HI, 8+HI(BASE)
945 | lw CARG1, LO(BASE) 1059 | lw SFARG1LO, LO(BASE)
946 | lw CARG2, 8+LO(BASE) 1060 | lw SFARG2LO, 8+LO(BASE)
947 |.endmacro 1061 |.endmacro
948 | 1062 |
949 |.macro .ffunc_n, name // Caveat: has delay slot! 1063 |.macro .ffunc_n, name // Caveat: has delay slot!
950 |->ff_ .. name: 1064 |->ff_ .. name:
951 | lw CARG3, HI(BASE) 1065 | lw SFARG1HI, HI(BASE)
1066 |.if FPU
1067 | ldc1 FARG1, 0(BASE)
1068 |.else
1069 | lw SFARG1LO, LO(BASE)
1070 |.endif
952 | beqz NARGS8:RC, ->fff_fallback 1071 | beqz NARGS8:RC, ->fff_fallback
953 |. ldc1 FARG1, 0(BASE) 1072 |. sltiu AT, SFARG1HI, LJ_TISNUM
954 | sltiu AT, CARG3, LJ_TISNUM
955 | beqz AT, ->fff_fallback 1073 | beqz AT, ->fff_fallback
956 |.endmacro 1074 |.endmacro
957 | 1075 |
958 |.macro .ffunc_nn, name // Caveat: has delay slot! 1076 |.macro .ffunc_nn, name // Caveat: has delay slot!
959 |->ff_ .. name: 1077 |->ff_ .. name:
960 | sltiu AT, NARGS8:RC, 16 1078 | sltiu AT, NARGS8:RC, 16
961 | lw CARG3, HI(BASE) 1079 | lw SFARG1HI, HI(BASE)
962 | bnez AT, ->fff_fallback 1080 | bnez AT, ->fff_fallback
963 |. lw CARG4, 8+HI(BASE) 1081 |. lw SFARG2HI, 8+HI(BASE)
964 | ldc1 FARG1, 0(BASE) 1082 | sltiu TMP0, SFARG1HI, LJ_TISNUM
965 | ldc1 FARG2, 8(BASE) 1083 |.if FPU
966 | sltiu TMP0, CARG3, LJ_TISNUM 1084 | ldc1 FARG1, 0(BASE)
967 | sltiu TMP1, CARG4, LJ_TISNUM 1085 |.else
1086 | lw SFARG1LO, LO(BASE)
1087 |.endif
1088 | sltiu TMP1, SFARG2HI, LJ_TISNUM
1089 |.if FPU
1090 | ldc1 FARG2, 8(BASE)
1091 |.else
1092 | lw SFARG2LO, 8+LO(BASE)
1093 |.endif
968 | and TMP0, TMP0, TMP1 1094 | and TMP0, TMP0, TMP1
969 | beqz TMP0, ->fff_fallback 1095 | beqz TMP0, ->fff_fallback
970 |.endmacro 1096 |.endmacro
@@ -980,52 +1106,54 @@ static void build_subroutines(BuildCtx *ctx)
980 |//-- Base library: checks ----------------------------------------------- 1106 |//-- Base library: checks -----------------------------------------------
981 | 1107 |
982 |.ffunc_1 assert 1108 |.ffunc_1 assert
983 | sltiu AT, CARG3, LJ_TISTRUECOND 1109 | sltiu AT, SFARG1HI, LJ_TISTRUECOND
984 | beqz AT, ->fff_fallback 1110 | beqz AT, ->fff_fallback
985 |. addiu RA, BASE, -8 1111 |. addiu RA, BASE, -8
986 | lw PC, FRAME_PC(BASE) 1112 | lw PC, FRAME_PC(BASE)
987 | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. 1113 | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
988 | addu TMP2, RA, NARGS8:RC 1114 | addu TMP2, RA, NARGS8:RC
989 | sw CARG3, HI(RA) 1115 | sw SFARG1HI, HI(RA)
990 | addiu TMP1, BASE, 8 1116 | addiu TMP1, BASE, 8
991 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. 1117 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument.
992 |. sw CARG1, LO(RA) 1118 |. sw SFARG1LO, LO(RA)
993 |1: 1119 |1:
994 | ldc1 f0, 0(TMP1) 1120 | lw SFRETHI, HI(TMP1)
995 | sdc1 f0, -8(TMP1) 1121 | lw SFRETLO, LO(TMP1)
1122 | sw SFRETHI, -8+HI(TMP1)
1123 | sw SFRETLO, -8+LO(TMP1)
996 | bne TMP1, TMP2, <1 1124 | bne TMP1, TMP2, <1
997 |. addiu TMP1, TMP1, 8 1125 |. addiu TMP1, TMP1, 8
998 | b ->fff_res 1126 | b ->fff_res
999 |. nop 1127 |. nop
1000 | 1128 |
1001 |.ffunc type 1129 |.ffunc type
1002 | lw CARG3, HI(BASE) 1130 | lw SFARG1HI, HI(BASE)
1003 | li TMP1, LJ_TISNUM
1004 | beqz NARGS8:RC, ->fff_fallback 1131 | beqz NARGS8:RC, ->fff_fallback
1005 |. sltiu TMP0, CARG3, LJ_TISNUM 1132 |. sltiu TMP0, SFARG1HI, LJ_TISNUM
1006 | movz TMP1, CARG3, TMP0 1133 | movn SFARG1HI, TISNUM, TMP0
1007 | not TMP1, TMP1 1134 | not TMP1, SFARG1HI
1008 | sll TMP1, TMP1, 3 1135 | sll TMP1, TMP1, 3
1009 | addu TMP1, CFUNC:RB, TMP1 1136 | addu TMP1, CFUNC:RB, TMP1
1010 | b ->fff_resn 1137 | lw SFARG1HI, CFUNC:TMP1->upvalue[0].u32.hi
1011 |. ldc1 FRET1, CFUNC:TMP1->upvalue 1138 | b ->fff_restv
1139 |. lw SFARG1LO, CFUNC:TMP1->upvalue[0].u32.lo
1012 | 1140 |
1013 |//-- Base library: getters and setters --------------------------------- 1141 |//-- Base library: getters and setters ---------------------------------
1014 | 1142 |
1015 |.ffunc_1 getmetatable 1143 |.ffunc_1 getmetatable
1016 | li AT, LJ_TTAB 1144 | li AT, LJ_TTAB
1017 | bne CARG3, AT, >6 1145 | bne SFARG1HI, AT, >6
1018 |. li AT, LJ_TUDATA 1146 |. li AT, LJ_TUDATA
1019 |1: // Field metatable must be at same offset for GCtab and GCudata! 1147 |1: // Field metatable must be at same offset for GCtab and GCudata!
1020 | lw TAB:CARG1, TAB:CARG1->metatable 1148 | lw TAB:SFARG1LO, TAB:SFARG1LO->metatable
1021 |2: 1149 |2:
1022 | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) 1150 | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
1023 | beqz TAB:CARG1, ->fff_restv 1151 | beqz TAB:SFARG1LO, ->fff_restv
1024 |. li CARG3, LJ_TNIL 1152 |. li SFARG1HI, LJ_TNIL
1025 | lw TMP0, TAB:CARG1->hmask 1153 | lw TMP0, TAB:SFARG1LO->hmask
1026 | li CARG3, LJ_TTAB // Use metatable as default result. 1154 | li SFARG1HI, LJ_TTAB // Use metatable as default result.
1027 | lw TMP1, STR:RC->hash 1155 | lw TMP1, STR:RC->hash
1028 | lw NODE:TMP2, TAB:CARG1->node 1156 | lw NODE:TMP2, TAB:SFARG1LO->node
1029 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 1157 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
1030 | sll TMP0, TMP1, 5 1158 | sll TMP0, TMP1, 5
1031 | sll TMP1, TMP1, 3 1159 | sll TMP1, TMP1, 3
@@ -1037,7 +1165,7 @@ static void build_subroutines(BuildCtx *ctx)
1037 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 1165 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
1038 | lw NODE:TMP3, NODE:TMP2->next 1166 | lw NODE:TMP3, NODE:TMP2->next
1039 | bne CARG4, AT, >4 1167 | bne CARG4, AT, >4
1040 |. lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) 1168 |. lw CARG3, offsetof(Node, val)+HI(NODE:TMP2)
1041 | beq TMP0, STR:RC, >5 1169 | beq TMP0, STR:RC, >5
1042 |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2) 1170 |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2)
1043 |4: 1171 |4:
@@ -1046,36 +1174,35 @@ static void build_subroutines(BuildCtx *ctx)
1046 | b <3 1174 | b <3
1047 |. nop 1175 |. nop
1048 |5: 1176 |5:
1049 | beq CARG2, TISNIL, ->fff_restv // Ditto for nil value. 1177 | beq CARG3, TISNIL, ->fff_restv // Ditto for nil value.
1050 |. nop 1178 |. nop
1051 | move CARG3, CARG2 // Return value of mt.__metatable. 1179 | move SFARG1HI, CARG3 // Return value of mt.__metatable.
1052 | b ->fff_restv 1180 | b ->fff_restv
1053 |. move CARG1, TMP1 1181 |. move SFARG1LO, TMP1
1054 | 1182 |
1055 |6: 1183 |6:
1056 | beq CARG3, AT, <1 1184 | beq SFARG1HI, AT, <1
1057 |. sltiu TMP0, CARG3, LJ_TISNUM 1185 |. sltu AT, TISNUM, SFARG1HI
1058 | li TMP1, LJ_TISNUM 1186 | movz SFARG1HI, TISNUM, AT
1059 | movz TMP1, CARG3, TMP0 1187 | not TMP1, SFARG1HI
1060 | not TMP1, TMP1
1061 | sll TMP1, TMP1, 2 1188 | sll TMP1, TMP1, 2
1062 | addu TMP1, DISPATCH, TMP1 1189 | addu TMP1, DISPATCH, TMP1
1063 | b <2 1190 | b <2
1064 |. lw TAB:CARG1, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1) 1191 |. lw TAB:SFARG1LO, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1)
1065 | 1192 |
1066 |.ffunc_2 setmetatable 1193 |.ffunc_2 setmetatable
1067 | // Fast path: no mt for table yet and not clearing the mt. 1194 | // Fast path: no mt for table yet and not clearing the mt.
1068 | li AT, LJ_TTAB 1195 | li AT, LJ_TTAB
1069 | bne CARG3, AT, ->fff_fallback 1196 | bne SFARG1HI, AT, ->fff_fallback
1070 |. addiu CARG4, CARG4, -LJ_TTAB 1197 |. addiu SFARG2HI, SFARG2HI, -LJ_TTAB
1071 | lw TAB:TMP1, TAB:CARG1->metatable 1198 | lw TAB:TMP1, TAB:SFARG1LO->metatable
1072 | lbu TMP3, TAB:CARG1->marked 1199 | lbu TMP3, TAB:SFARG1LO->marked
1073 | or AT, CARG4, TAB:TMP1 1200 | or AT, SFARG2HI, TAB:TMP1
1074 | bnez AT, ->fff_fallback 1201 | bnez AT, ->fff_fallback
1075 |. andi AT, TMP3, LJ_GC_BLACK // isblack(table) 1202 |. andi AT, TMP3, LJ_GC_BLACK // isblack(table)
1076 | beqz AT, ->fff_restv 1203 | beqz AT, ->fff_restv
1077 |. sw TAB:CARG2, TAB:CARG1->metatable 1204 |. sw TAB:SFARG2LO, TAB:SFARG1LO->metatable
1078 | barrierback TAB:CARG1, TMP3, TMP0, ->fff_restv 1205 | barrierback TAB:SFARG1LO, TMP3, TMP0, ->fff_restv
1079 | 1206 |
1080 |.ffunc rawget 1207 |.ffunc rawget
1081 | lw CARG4, HI(BASE) 1208 | lw CARG4, HI(BASE)
@@ -1089,44 +1216,44 @@ static void build_subroutines(BuildCtx *ctx)
1089 | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1216 | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1090 |. move CARG1, L 1217 |. move CARG1, L
1091 | // Returns cTValue *. 1218 | // Returns cTValue *.
1092 | b ->fff_resn 1219 | lw SFARG1HI, HI(CRET1)
1093 |. ldc1 FRET1, 0(CRET1) 1220 | b ->fff_restv
1221 |. lw SFARG1LO, LO(CRET1)
1094 | 1222 |
1095 |//-- Base library: conversions ------------------------------------------ 1223 |//-- Base library: conversions ------------------------------------------
1096 | 1224 |
1097 |.ffunc tonumber 1225 |.ffunc tonumber
1098 | // Only handles the number case inline (without a base argument). 1226 | // Only handles the number case inline (without a base argument).
1099 | lw CARG1, HI(BASE) 1227 | lw CARG1, HI(BASE)
1100 | xori AT, NARGS8:RC, 8 1228 | xori AT, NARGS8:RC, 8 // Exactly one number argument.
1101 | sltiu CARG1, CARG1, LJ_TISNUM 1229 | sltu TMP0, TISNUM, CARG1
1102 | movn CARG1, r0, AT 1230 | or AT, AT, TMP0
1103 | beqz CARG1, ->fff_fallback // Exactly one number argument. 1231 | bnez AT, ->fff_fallback
1104 |. ldc1 FRET1, 0(BASE) 1232 |. lw SFARG1HI, HI(BASE)
1105 | b ->fff_resn 1233 | b ->fff_restv
1106 |. nop 1234 |. lw SFARG1LO, LO(BASE)
1107 | 1235 |
1108 |.ffunc_1 tostring 1236 |.ffunc_1 tostring
1109 | // Only handles the string or number case inline. 1237 | // Only handles the string or number case inline.
1110 | li AT, LJ_TSTR 1238 | li AT, LJ_TSTR
1111 | // A __tostring method in the string base metatable is ignored. 1239 | // A __tostring method in the string base metatable is ignored.
1112 | beq CARG3, AT, ->fff_restv // String key? 1240 | beq SFARG1HI, AT, ->fff_restv // String key?
1113 | // Handle numbers inline, unless a number base metatable is present. 1241 | // Handle numbers inline, unless a number base metatable is present.
1114 |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) 1242 |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
1115 | sltiu TMP0, CARG3, LJ_TISNUM 1243 | sltu TMP0, TISNUM, SFARG1HI
1116 | sltiu TMP1, TMP1, 1 1244 | or TMP0, TMP0, TMP1
1117 | and TMP0, TMP0, TMP1 1245 | bnez TMP0, ->fff_fallback
1118 | beqz TMP0, ->fff_fallback
1119 |. sw BASE, L->base // Add frame since C call can throw. 1246 |. sw BASE, L->base // Add frame since C call can throw.
1120 | ffgccheck 1247 | ffgccheck
1121 |. sw PC, SAVE_PC // Redundant (but a defined value). 1248 |. sw PC, SAVE_PC // Redundant (but a defined value).
1122 | load_got lj_str_fromnum 1249 | load_got lj_strfmt_number
1123 | move CARG1, L 1250 | move CARG1, L
1124 | call_intern lj_str_fromnum // (lua_State *L, lua_Number *np) 1251 | call_intern lj_strfmt_number // (lua_State *L, cTValue *o)
1125 |. move CARG2, BASE 1252 |. move CARG2, BASE
1126 | // Returns GCstr *. 1253 | // Returns GCstr *.
1127 | li CARG3, LJ_TSTR 1254 | li SFARG1HI, LJ_TSTR
1128 | b ->fff_restv 1255 | b ->fff_restv
1129 |. move CARG1, CRET1 1256 |. move SFARG1LO, CRET1
1130 | 1257 |
1131 |//-- Base library: iterators ------------------------------------------- 1258 |//-- Base library: iterators -------------------------------------------
1132 | 1259 |
@@ -1148,31 +1275,38 @@ static void build_subroutines(BuildCtx *ctx)
1148 |. move CARG1, L 1275 |. move CARG1, L
1149 | // Returns 0 at end of traversal. 1276 | // Returns 0 at end of traversal.
1150 | beqz CRET1, ->fff_restv // End of traversal: return nil. 1277 | beqz CRET1, ->fff_restv // End of traversal: return nil.
1151 |. li CARG3, LJ_TNIL 1278 |. li SFARG1HI, LJ_TNIL
1152 | ldc1 f0, 8(BASE) // Copy key and value to results. 1279 | lw TMP0, 8+HI(BASE)
1280 | lw TMP1, 8+LO(BASE)
1153 | addiu RA, BASE, -8 1281 | addiu RA, BASE, -8
1154 | ldc1 f2, 16(BASE) 1282 | lw TMP2, 16+HI(BASE)
1155 | li RD, (2+1)*8 1283 | lw TMP3, 16+LO(BASE)
1156 | sdc1 f0, 0(RA) 1284 | sw TMP0, HI(RA)
1285 | sw TMP1, LO(RA)
1286 | sw TMP2, 8+HI(RA)
1287 | sw TMP3, 8+LO(RA)
1157 | b ->fff_res 1288 | b ->fff_res
1158 |. sdc1 f2, 8(RA) 1289 |. li RD, (2+1)*8
1159 | 1290 |
1160 |.ffunc_1 pairs 1291 |.ffunc_1 pairs
1161 | li AT, LJ_TTAB 1292 | li AT, LJ_TTAB
1162 | bne CARG3, AT, ->fff_fallback 1293 | bne SFARG1HI, AT, ->fff_fallback
1163 |. lw PC, FRAME_PC(BASE) 1294 |. lw PC, FRAME_PC(BASE)
1164#if LJ_52 1295#if LJ_52
1165 | lw TAB:TMP2, TAB:CARG1->metatable 1296 | lw TAB:TMP2, TAB:SFARG1LO->metatable
1166 | ldc1 f0, CFUNC:RB->upvalue[0] 1297 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1298 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1167 | bnez TAB:TMP2, ->fff_fallback 1299 | bnez TAB:TMP2, ->fff_fallback
1168#else 1300#else
1169 | ldc1 f0, CFUNC:RB->upvalue[0] 1301 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1302 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1170#endif 1303#endif
1171 |. addiu RA, BASE, -8 1304 |. addiu RA, BASE, -8
1172 | sw TISNIL, 8+HI(BASE) 1305 | sw TISNIL, 8+HI(BASE)
1173 | li RD, (3+1)*8 1306 | sw TMP0, HI(RA)
1307 | sw TMP1, LO(RA)
1174 | b ->fff_res 1308 | b ->fff_res
1175 |. sdc1 f0, 0(RA) 1309 |. li RD, (3+1)*8
1176 | 1310 |
1177 |.ffunc ipairs_aux 1311 |.ffunc ipairs_aux
1178 | sltiu AT, NARGS8:RC, 16 1312 | sltiu AT, NARGS8:RC, 16
@@ -1180,35 +1314,32 @@ static void build_subroutines(BuildCtx *ctx)
1180 | lw TAB:CARG1, LO(BASE) 1314 | lw TAB:CARG1, LO(BASE)
1181 | lw CARG4, 8+HI(BASE) 1315 | lw CARG4, 8+HI(BASE)
1182 | bnez AT, ->fff_fallback 1316 | bnez AT, ->fff_fallback
1183 |. ldc1 FARG2, 8(BASE) 1317 |. addiu CARG3, CARG3, -LJ_TTAB
1184 | addiu CARG3, CARG3, -LJ_TTAB 1318 | xor CARG4, CARG4, TISNUM
1185 | sltiu AT, CARG4, LJ_TISNUM 1319 | and AT, CARG3, CARG4
1186 | li TMP0, 1 1320 | bnez AT, ->fff_fallback
1187 | movn AT, r0, CARG3
1188 | mtc1 TMP0, FARG1
1189 | beqz AT, ->fff_fallback
1190 |. lw PC, FRAME_PC(BASE) 1321 |. lw PC, FRAME_PC(BASE)
1191 | cvt.w.d FRET1, FARG2 1322 | lw TMP2, 8+LO(BASE)
1192 | cvt.d.w FARG1, FARG1
1193 | lw TMP0, TAB:CARG1->asize 1323 | lw TMP0, TAB:CARG1->asize
1194 | lw TMP1, TAB:CARG1->array 1324 | lw TMP1, TAB:CARG1->array
1195 | mfc1 TMP2, FRET1
1196 | addiu RA, BASE, -8
1197 | add.d FARG2, FARG2, FARG1
1198 | addiu TMP2, TMP2, 1 1325 | addiu TMP2, TMP2, 1
1326 | sw TISNUM, -8+HI(BASE)
1199 | sltu AT, TMP2, TMP0 1327 | sltu AT, TMP2, TMP0
1328 | sw TMP2, -8+LO(BASE)
1329 | beqz AT, >2 // Not in array part?
1330 |. addiu RA, BASE, -8
1200 | sll TMP3, TMP2, 3 1331 | sll TMP3, TMP2, 3
1201 | addu TMP3, TMP1, TMP3 1332 | addu TMP3, TMP1, TMP3
1202 | beqz AT, >2 // Not in array part? 1333 | lw TMP1, HI(TMP3)
1203 |. sdc1 FARG2, 0(RA) 1334 | lw TMP2, LO(TMP3)
1204 | lw TMP2, HI(TMP3)
1205 | ldc1 f0, 0(TMP3)
1206 |1: 1335 |1:
1207 | beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results. 1336 | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
1208 |. li RD, (0+1)*8 1337 |. li RD, (0+1)*8
1209 | li RD, (2+1)*8 1338 | sw TMP1, 8+HI(RA)
1339 | sw TMP2, 8+LO(RA)
1210 | b ->fff_res 1340 | b ->fff_res
1211 |. sdc1 f0, 8(RA) 1341 |. li RD, (2+1)*8
1342 |
1212 |2: // Check for empty hash part first. Otherwise call C function. 1343 |2: // Check for empty hash part first. Otherwise call C function.
1213 | lw TMP0, TAB:CARG1->hmask 1344 | lw TMP0, TAB:CARG1->hmask
1214 | load_got lj_tab_getinth 1345 | load_got lj_tab_getinth
@@ -1219,27 +1350,30 @@ static void build_subroutines(BuildCtx *ctx)
1219 | // Returns cTValue * or NULL. 1350 | // Returns cTValue * or NULL.
1220 | beqz CRET1, ->fff_res 1351 | beqz CRET1, ->fff_res
1221 |. li RD, (0+1)*8 1352 |. li RD, (0+1)*8
1222 | lw TMP2, HI(CRET1) 1353 | lw TMP1, HI(CRET1)
1223 | b <1 1354 | b <1
1224 |. ldc1 f0, 0(CRET1) 1355 |. lw TMP2, LO(CRET1)
1225 | 1356 |
1226 |.ffunc_1 ipairs 1357 |.ffunc_1 ipairs
1227 | li AT, LJ_TTAB 1358 | li AT, LJ_TTAB
1228 | bne CARG3, AT, ->fff_fallback 1359 | bne SFARG1HI, AT, ->fff_fallback
1229 |. lw PC, FRAME_PC(BASE) 1360 |. lw PC, FRAME_PC(BASE)
1230#if LJ_52 1361#if LJ_52
1231 | lw TAB:TMP2, TAB:CARG1->metatable 1362 | lw TAB:TMP2, TAB:SFARG1LO->metatable
1232 | ldc1 f0, CFUNC:RB->upvalue[0] 1363 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1364 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1233 | bnez TAB:TMP2, ->fff_fallback 1365 | bnez TAB:TMP2, ->fff_fallback
1234#else 1366#else
1235 | ldc1 f0, CFUNC:RB->upvalue[0] 1367 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1368 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1236#endif 1369#endif
1237 |. addiu RA, BASE, -8 1370 |. addiu RA, BASE, -8
1238 | sw r0, 8+HI(BASE) 1371 | sw TISNUM, 8+HI(BASE)
1239 | sw r0, 8+LO(BASE) 1372 | sw r0, 8+LO(BASE)
1240 | li RD, (3+1)*8 1373 | sw TMP0, HI(RA)
1374 | sw TMP1, LO(RA)
1241 | b ->fff_res 1375 | b ->fff_res
1242 |. sdc1 f0, 0(RA) 1376 |. li RD, (3+1)*8
1243 | 1377 |
1244 |//-- Base library: catch errors ---------------------------------------- 1378 |//-- Base library: catch errors ----------------------------------------
1245 | 1379 |
@@ -1259,8 +1393,9 @@ static void build_subroutines(BuildCtx *ctx)
1259 | sltiu AT, NARGS8:RC, 16 1393 | sltiu AT, NARGS8:RC, 16
1260 | lw CARG4, 8+HI(BASE) 1394 | lw CARG4, 8+HI(BASE)
1261 | bnez AT, ->fff_fallback 1395 | bnez AT, ->fff_fallback
1262 |. ldc1 FARG2, 8(BASE) 1396 |. lw CARG3, 8+LO(BASE)
1263 | ldc1 FARG1, 0(BASE) 1397 | lw CARG1, LO(BASE)
1398 | lw CARG2, HI(BASE)
1264 | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) 1399 | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1265 | li AT, LJ_TFUNC 1400 | li AT, LJ_TFUNC
1266 | move TMP2, BASE 1401 | move TMP2, BASE
@@ -1268,9 +1403,11 @@ static void build_subroutines(BuildCtx *ctx)
1268 | addiu BASE, BASE, 16 1403 | addiu BASE, BASE, 16
1269 | // Remember active hook before pcall. 1404 | // Remember active hook before pcall.
1270 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT 1405 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
1271 | sdc1 FARG2, 0(TMP2) // Swap function and traceback. 1406 | sw CARG3, LO(TMP2) // Swap function and traceback.
1407 | sw CARG4, HI(TMP2)
1272 | andi TMP3, TMP3, 1 1408 | andi TMP3, TMP3, 1
1273 | sdc1 FARG1, 8(TMP2) 1409 | sw CARG1, 8+LO(TMP2)
1410 | sw CARG2, 8+HI(TMP2)
1274 | addiu PC, TMP3, 16+FRAME_PCALL 1411 | addiu PC, TMP3, 16+FRAME_PCALL
1275 | b ->vm_call_dispatch 1412 | b ->vm_call_dispatch
1276 |. addiu NARGS8:RC, NARGS8:RC, -16 1413 |. addiu NARGS8:RC, NARGS8:RC, -16
@@ -1279,7 +1416,10 @@ static void build_subroutines(BuildCtx *ctx)
1279 | 1416 |
1280 |.macro coroutine_resume_wrap, resume 1417 |.macro coroutine_resume_wrap, resume
1281 |.if resume 1418 |.if resume
1282 |.ffunc_1 coroutine_resume 1419 |.ffunc coroutine_resume
1420 | lw CARG3, HI(BASE)
1421 | beqz NARGS8:RC, ->fff_fallback
1422 |. lw CARG1, LO(BASE)
1283 | li AT, LJ_TTHREAD 1423 | li AT, LJ_TTHREAD
1284 | bne CARG3, AT, ->fff_fallback 1424 | bne CARG3, AT, ->fff_fallback
1285 |.else 1425 |.else
@@ -1314,11 +1454,13 @@ static void build_subroutines(BuildCtx *ctx)
1314 | move CARG3, CARG2 1454 | move CARG3, CARG2
1315 | sw BASE, L->top 1455 | sw BASE, L->top
1316 |2: // Move args to coroutine. 1456 |2: // Move args to coroutine.
1317 | ldc1 f0, 0(BASE) 1457 | lw SFRETHI, HI(BASE)
1458 | lw SFRETLO, LO(BASE)
1318 | sltu AT, BASE, TMP1 1459 | sltu AT, BASE, TMP1
1319 | beqz AT, >3 1460 | beqz AT, >3
1320 |. addiu BASE, BASE, 8 1461 |. addiu BASE, BASE, 8
1321 | sdc1 f0, 0(CARG3) 1462 | sw SFRETHI, HI(CARG3)
1463 | sw SFRETLO, LO(CARG3)
1322 | b <2 1464 | b <2
1323 |. addiu CARG3, CARG3, 8 1465 |. addiu CARG3, CARG3, 8
1324 |3: 1466 |3:
@@ -1331,6 +1473,7 @@ static void build_subroutines(BuildCtx *ctx)
1331 | lw TMP3, L:RA->top 1473 | lw TMP3, L:RA->top
1332 | li_vmstate INTERP 1474 | li_vmstate INTERP
1333 | lw BASE, L->base 1475 | lw BASE, L->base
1476 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
1334 | st_vmstate 1477 | st_vmstate
1335 | beqz AT, >8 1478 | beqz AT, >8
1336 |. subu RD, TMP3, TMP2 1479 |. subu RD, TMP3, TMP2
@@ -1343,10 +1486,12 @@ static void build_subroutines(BuildCtx *ctx)
1343 | sw TMP2, L:RA->top // Clear coroutine stack. 1486 | sw TMP2, L:RA->top // Clear coroutine stack.
1344 | move TMP1, BASE 1487 | move TMP1, BASE
1345 |5: // Move results from coroutine. 1488 |5: // Move results from coroutine.
1346 | ldc1 f0, 0(TMP2) 1489 | lw SFRETHI, HI(TMP2)
1490 | lw SFRETLO, LO(TMP2)
1347 | addiu TMP2, TMP2, 8 1491 | addiu TMP2, TMP2, 8
1348 | sltu AT, TMP2, TMP3 1492 | sltu AT, TMP2, TMP3
1349 | sdc1 f0, 0(TMP1) 1493 | sw SFRETHI, HI(TMP1)
1494 | sw SFRETLO, LO(TMP1)
1350 | bnez AT, <5 1495 | bnez AT, <5
1351 |. addiu TMP1, TMP1, 8 1496 |. addiu TMP1, TMP1, 8
1352 |6: 1497 |6:
@@ -1371,12 +1516,14 @@ static void build_subroutines(BuildCtx *ctx)
1371 |.if resume 1516 |.if resume
1372 | addiu TMP3, TMP3, -8 1517 | addiu TMP3, TMP3, -8
1373 | li TMP1, LJ_TFALSE 1518 | li TMP1, LJ_TFALSE
1374 | ldc1 f0, 0(TMP3) 1519 | lw SFRETHI, HI(TMP3)
1520 | lw SFRETLO, LO(TMP3)
1375 | sw TMP3, L:RA->top // Remove error from coroutine stack. 1521 | sw TMP3, L:RA->top // Remove error from coroutine stack.
1376 | li RD, (2+1)*8 1522 | li RD, (2+1)*8
1377 | sw TMP1, -8+HI(BASE) // Prepend false to results. 1523 | sw TMP1, -8+HI(BASE) // Prepend false to results.
1378 | addiu RA, BASE, -8 1524 | addiu RA, BASE, -8
1379 | sdc1 f0, 0(BASE) // Copy error message. 1525 | sw SFRETHI, HI(BASE) // Copy error message.
1526 | sw SFRETLO, LO(BASE)
1380 | b <7 1527 | b <7
1381 |. andi TMP0, PC, FRAME_TYPE 1528 |. andi TMP0, PC, FRAME_TYPE
1382 |.else 1529 |.else
@@ -1412,20 +1559,29 @@ static void build_subroutines(BuildCtx *ctx)
1412 | 1559 |
1413 |//-- Math library ------------------------------------------------------- 1560 |//-- Math library -------------------------------------------------------
1414 | 1561 |
1415 |.ffunc_n math_abs 1562 |.ffunc_1 math_abs
1416 |. abs.d FRET1, FARG1 1563 | bne SFARG1HI, TISNUM, >1
1417 |->fff_resn: 1564 |. sra TMP0, SFARG1LO, 31
1418 | lw PC, FRAME_PC(BASE) 1565 | xor TMP1, SFARG1LO, TMP0
1419 | addiu RA, BASE, -8 1566 | subu SFARG1LO, TMP1, TMP0
1420 | b ->fff_res1 1567 | bgez SFARG1LO, ->fff_restv
1421 |. sdc1 FRET1, -8(BASE) 1568 |. nop
1569 | lui SFARG1HI, 0x41e0 // 2^31 as a double.
1570 | b ->fff_restv
1571 |. li SFARG1LO, 0
1572 |1:
1573 | sltiu AT, SFARG1HI, LJ_TISNUM
1574 | beqz AT, ->fff_fallback
1575 |. sll SFARG1HI, SFARG1HI, 1
1576 | srl SFARG1HI, SFARG1HI, 1
1577 |// fallthrough
1422 | 1578 |
1423 |->fff_restv: 1579 |->fff_restv:
1424 | // CARG3/CARG1 = TValue result. 1580 | // SFARG1LO/SFARG1HI = TValue result.
1425 | lw PC, FRAME_PC(BASE) 1581 | lw PC, FRAME_PC(BASE)
1426 | sw CARG3, -8+HI(BASE) 1582 | sw SFARG1HI, -8+HI(BASE)
1427 | addiu RA, BASE, -8 1583 | addiu RA, BASE, -8
1428 | sw CARG1, -8+LO(BASE) 1584 | sw SFARG1LO, -8+LO(BASE)
1429 |->fff_res1: 1585 |->fff_res1:
1430 | // RA = results, PC = return. 1586 | // RA = results, PC = return.
1431 | li RD, (1+1)*8 1587 | li RD, (1+1)*8
@@ -1454,15 +1610,19 @@ static void build_subroutines(BuildCtx *ctx)
1454 |. sw TISNIL, -8+HI(TMP1) 1610 |. sw TISNIL, -8+HI(TMP1)
1455 | 1611 |
1456 |.macro math_extern, func 1612 |.macro math_extern, func
1457 |->ff_math_ .. func: 1613 | .ffunc math_ .. func
1458 | lw CARG3, HI(BASE) 1614 | lw SFARG1HI, HI(BASE)
1459 | beqz NARGS8:RC, ->fff_fallback 1615 | beqz NARGS8:RC, ->fff_fallback
1460 |. load_got func 1616 |. load_got func
1461 | sltiu AT, CARG3, LJ_TISNUM 1617 | sltiu AT, SFARG1HI, LJ_TISNUM
1462 | beqz AT, ->fff_fallback 1618 | beqz AT, ->fff_fallback
1463 |. nop 1619 |.if FPU
1464 | call_extern
1465 |. ldc1 FARG1, 0(BASE) 1620 |. ldc1 FARG1, 0(BASE)
1621 |.else
1622 |. lw SFARG1LO, LO(BASE)
1623 |.endif
1624 | call_extern
1625 |. nop
1466 | b ->fff_resn 1626 | b ->fff_resn
1467 |. nop 1627 |. nop
1468 |.endmacro 1628 |.endmacro
@@ -1476,10 +1636,22 @@ static void build_subroutines(BuildCtx *ctx)
1476 |. nop 1636 |. nop
1477 |.endmacro 1637 |.endmacro
1478 | 1638 |
1639 |// TODO: Return integer type if result is integer (own sf implementation).
1479 |.macro math_round, func 1640 |.macro math_round, func
1480 | .ffunc_n math_ .. func 1641 |->ff_math_ .. func:
1481 |. nop 1642 | lw SFARG1HI, HI(BASE)
1643 | beqz NARGS8:RC, ->fff_fallback
1644 |. lw SFARG1LO, LO(BASE)
1645 | beq SFARG1HI, TISNUM, ->fff_restv
1646 |. sltu AT, SFARG1HI, TISNUM
1647 | beqz AT, ->fff_fallback
1648 |.if FPU
1649 |. ldc1 FARG1, 0(BASE)
1482 | bal ->vm_ .. func 1650 | bal ->vm_ .. func
1651 |.else
1652 |. load_got func
1653 | call_extern
1654 |.endif
1483 |. nop 1655 |. nop
1484 | b ->fff_resn 1656 | b ->fff_resn
1485 |. nop 1657 |. nop
@@ -1489,15 +1661,19 @@ static void build_subroutines(BuildCtx *ctx)
1489 | math_round ceil 1661 | math_round ceil
1490 | 1662 |
1491 |.ffunc math_log 1663 |.ffunc math_log
1492 | lw CARG3, HI(BASE)
1493 | li AT, 8 1664 | li AT, 8
1494 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. 1665 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
1495 |. load_got log 1666 |. lw SFARG1HI, HI(BASE)
1496 | sltiu AT, CARG3, LJ_TISNUM 1667 | sltiu AT, SFARG1HI, LJ_TISNUM
1497 | beqz AT, ->fff_fallback 1668 | beqz AT, ->fff_fallback
1498 |. nop 1669 |. load_got log
1670 |.if FPU
1499 | call_extern 1671 | call_extern
1500 |. ldc1 FARG1, 0(BASE) 1672 |. ldc1 FARG1, 0(BASE)
1673 |.else
1674 | call_extern
1675 |. lw SFARG1LO, LO(BASE)
1676 |.endif
1501 | b ->fff_resn 1677 | b ->fff_resn
1502 |. nop 1678 |. nop
1503 | 1679 |
@@ -1516,23 +1692,43 @@ static void build_subroutines(BuildCtx *ctx)
1516 | math_extern2 atan2 1692 | math_extern2 atan2
1517 | math_extern2 fmod 1693 | math_extern2 fmod
1518 | 1694 |
1695 |.if FPU
1519 |.ffunc_n math_sqrt 1696 |.ffunc_n math_sqrt
1520 |. sqrt.d FRET1, FARG1 1697 |. sqrt.d FRET1, FARG1
1521 | b ->fff_resn 1698 |// fallthrough to ->fff_resn
1522 |. nop 1699 |.else
1700 | math_extern sqrt
1701 |.endif
1702 |
1703 |->fff_resn:
1704 | lw PC, FRAME_PC(BASE)
1705 | addiu RA, BASE, -8
1706 |.if FPU
1707 | b ->fff_res1
1708 |. sdc1 FRET1, -8(BASE)
1709 |.else
1710 | sw SFRETHI, -8+HI(BASE)
1711 | b ->fff_res1
1712 |. sw SFRETLO, -8+LO(BASE)
1713 |.endif
1523 | 1714 |
1524 |->ff_math_deg:
1525 |.ffunc_n math_rad
1526 |. ldc1 FARG2, CFUNC:RB->upvalue[0]
1527 | b ->fff_resn
1528 |. mul.d FRET1, FARG1, FARG2
1529 | 1715 |
1530 |.ffunc_nn math_ldexp 1716 |.ffunc math_ldexp
1531 | cvt.w.d FARG2, FARG2 1717 | sltiu AT, NARGS8:RC, 16
1718 | lw SFARG1HI, HI(BASE)
1719 | bnez AT, ->fff_fallback
1720 |. lw CARG4, 8+HI(BASE)
1721 | bne CARG4, TISNUM, ->fff_fallback
1532 | load_got ldexp 1722 | load_got ldexp
1533 | mfc1 CARG3, FARG2 1723 |. sltu AT, SFARG1HI, TISNUM
1724 | beqz AT, ->fff_fallback
1725 |.if FPU
1726 |. ldc1 FARG1, 0(BASE)
1727 |.else
1728 |. lw SFARG1LO, LO(BASE)
1729 |.endif
1534 | call_extern 1730 | call_extern
1535 |. nop 1731 |. lw CARG3, 8+LO(BASE)
1536 | b ->fff_resn 1732 | b ->fff_resn
1537 |. nop 1733 |. nop
1538 | 1734 |
@@ -1543,10 +1739,17 @@ static void build_subroutines(BuildCtx *ctx)
1543 |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 1739 |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
1544 | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) 1740 | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH)
1545 | addiu RA, BASE, -8 1741 | addiu RA, BASE, -8
1742 |.if FPU
1546 | mtc1 TMP1, FARG2 1743 | mtc1 TMP1, FARG2
1547 | sdc1 FRET1, 0(RA) 1744 | sdc1 FRET1, 0(RA)
1548 | cvt.d.w FARG2, FARG2 1745 | cvt.d.w FARG2, FARG2
1549 | sdc1 FARG2, 8(RA) 1746 | sdc1 FARG2, 8(RA)
1747 |.else
1748 | sw SFRETLO, LO(RA)
1749 | sw SFRETHI, HI(RA)
1750 | sw TMP1, 8+LO(RA)
1751 | sw TISNUM, 8+HI(RA)
1752 |.endif
1550 | b ->fff_res 1753 | b ->fff_res
1551 |. li RD, (2+1)*8 1754 |. li RD, (2+1)*8
1552 | 1755 |
@@ -1556,49 +1759,101 @@ static void build_subroutines(BuildCtx *ctx)
1556 | call_extern 1759 | call_extern
1557 |. addiu CARG3, BASE, -8 1760 |. addiu CARG3, BASE, -8
1558 | addiu RA, BASE, -8 1761 | addiu RA, BASE, -8
1762 |.if FPU
1559 | sdc1 FRET1, 0(BASE) 1763 | sdc1 FRET1, 0(BASE)
1764 |.else
1765 | sw SFRETLO, LO(BASE)
1766 | sw SFRETHI, HI(BASE)
1767 |.endif
1560 | b ->fff_res 1768 | b ->fff_res
1561 |. li RD, (2+1)*8 1769 |. li RD, (2+1)*8
1562 | 1770 |
1563 |.macro math_minmax, name, ismax 1771 |.macro math_minmax, name, intins, fpins
1564 |->ff_ .. name: 1772 | .ffunc_1 name
1565 | lw CARG3, HI(BASE) 1773 | addu TMP3, BASE, NARGS8:RC
1566 | beqz NARGS8:RC, ->fff_fallback 1774 | bne SFARG1HI, TISNUM, >5
1567 |. ldc1 FRET1, 0(BASE) 1775 |. addiu TMP2, BASE, 8
1568 | sltiu AT, CARG3, LJ_TISNUM 1776 |1: // Handle integers.
1777 |. lw SFARG2HI, HI(TMP2)
1778 | beq TMP2, TMP3, ->fff_restv
1779 |. lw SFARG2LO, LO(TMP2)
1780 | bne SFARG2HI, TISNUM, >3
1781 |. slt AT, SFARG1LO, SFARG2LO
1782 | intins SFARG1LO, SFARG2LO, AT
1783 | b <1
1784 |. addiu TMP2, TMP2, 8
1785 |
1786 |3: // Convert intermediate result to number and continue with number loop.
1787 | sltiu AT, SFARG2HI, LJ_TISNUM
1569 | beqz AT, ->fff_fallback 1788 | beqz AT, ->fff_fallback
1570 |. addu TMP2, BASE, NARGS8:RC 1789 |.if FPU
1571 | addiu TMP1, BASE, 8 1790 |. mtc1 SFARG1LO, FRET1
1572 | beq TMP1, TMP2, ->fff_resn 1791 | cvt.d.w FRET1, FRET1
1573 |1: 1792 | b >7
1574 |. lw CARG3, HI(TMP1) 1793 |. ldc1 FARG1, 0(TMP2)
1575 | ldc1 FARG1, 0(TMP1) 1794 |.else
1576 | addiu TMP1, TMP1, 8 1795 |. nop
1577 | sltiu AT, CARG3, LJ_TISNUM 1796 | bal ->vm_sfi2d_1
1797 |. nop
1798 | b >7
1799 |. nop
1800 |.endif
1801 |
1802 |5:
1803 |. sltiu AT, SFARG1HI, LJ_TISNUM
1578 | beqz AT, ->fff_fallback 1804 | beqz AT, ->fff_fallback
1579 |.if ismax 1805 |.if FPU
1580 |. c.olt.d FARG1, FRET1 1806 |. ldc1 FRET1, 0(BASE)
1807 |.endif
1808 |
1809 |6: // Handle numbers.
1810 |. lw SFARG2HI, HI(TMP2)
1811 |.if FPU
1812 | beq TMP2, TMP3, ->fff_resn
1581 |.else 1813 |.else
1582 |. c.olt.d FRET1, FARG1 1814 | beq TMP2, TMP3, ->fff_restv
1583 |.endif 1815 |.endif
1584 | bne TMP1, TMP2, <1 1816 |. sltiu AT, SFARG2HI, LJ_TISNUM
1585 |. movf.d FRET1, FARG1 1817 | beqz AT, >8
1586 | b ->fff_resn 1818 |.if FPU
1819 |. ldc1 FARG1, 0(TMP2)
1820 |.else
1821 |. lw SFARG2LO, LO(TMP2)
1822 |.endif
1823 |7:
1824 |.if FPU
1825 | c.olt.d FRET1, FARG1
1826 | fpins FRET1, FARG1
1827 |.else
1828 | bal ->vm_sfcmpolt
1587 |. nop 1829 |. nop
1830 | intins SFARG1LO, SFARG2LO, CRET1
1831 | intins SFARG1HI, SFARG2HI, CRET1
1832 |.endif
1833 | b <6
1834 |. addiu TMP2, TMP2, 8
1835 |
1836 |8: // Convert integer to number and continue with number loop.
1837 | bne SFARG2HI, TISNUM, ->fff_fallback
1838 |.if FPU
1839 |. lwc1 FARG1, LO(TMP2)
1840 | b <7
1841 |. cvt.d.w FARG1, FARG1
1842 |.else
1843 |. nop
1844 | bal ->vm_sfi2d_2
1845 |. nop
1846 | b <7
1847 |. nop
1848 |.endif
1849 |
1588 |.endmacro 1850 |.endmacro
1589 | 1851 |
1590 | math_minmax math_min, 0 1852 | math_minmax math_min, movz, movf.d
1591 | math_minmax math_max, 1 1853 | math_minmax math_max, movn, movt.d
1592 | 1854 |
1593 |//-- String library ----------------------------------------------------- 1855 |//-- String library -----------------------------------------------------
1594 | 1856 |
1595 |.ffunc_1 string_len
1596 | li AT, LJ_TSTR
1597 | bne CARG3, AT, ->fff_fallback
1598 |. nop
1599 | b ->fff_resi
1600 |. lw CRET1, STR:CARG1->len
1601 |
1602 |.ffunc string_byte // Only handle the 1-arg case here. 1857 |.ffunc string_byte // Only handle the 1-arg case here.
1603 | lw CARG3, HI(BASE) 1858 | lw CARG3, HI(BASE)
1604 | lw STR:CARG1, LO(BASE) 1859 | lw STR:CARG1, LO(BASE)
@@ -1608,33 +1863,31 @@ static void build_subroutines(BuildCtx *ctx)
1608 | bnez AT, ->fff_fallback // Need exactly 1 string argument. 1863 | bnez AT, ->fff_fallback // Need exactly 1 string argument.
1609 |. nop 1864 |. nop
1610 | lw TMP0, STR:CARG1->len 1865 | lw TMP0, STR:CARG1->len
1611 | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1612 | addiu RA, BASE, -8 1866 | addiu RA, BASE, -8
1867 | lw PC, FRAME_PC(BASE)
1613 | sltu RD, r0, TMP0 1868 | sltu RD, r0, TMP0
1614 | mtc1 TMP1, f0 1869 | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1615 | addiu RD, RD, 1 1870 | addiu RD, RD, 1
1616 | cvt.d.w f0, f0
1617 | lw PC, FRAME_PC(BASE)
1618 | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 1871 | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8
1872 | sw TISNUM, HI(RA)
1619 | b ->fff_res 1873 | b ->fff_res
1620 |. sdc1 f0, 0(RA) 1874 |. sw TMP1, LO(RA)
1621 | 1875 |
1622 |.ffunc string_char // Only handle the 1-arg case here. 1876 |.ffunc string_char // Only handle the 1-arg case here.
1623 | ffgccheck 1877 | ffgccheck
1624 |. nop 1878 |. nop
1625 | lw CARG3, HI(BASE) 1879 | lw CARG3, HI(BASE)
1626 | ldc1 FARG1, 0(BASE) 1880 | lw CARG1, LO(BASE)
1627 | li AT, 8 1881 | li TMP1, 255
1628 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. 1882 | xori AT, NARGS8:RC, 8 // Exactly 1 argument.
1629 |. sltiu AT, CARG3, LJ_TISNUM 1883 | xor TMP0, CARG3, TISNUM // Integer.
1630 | beqz AT, ->fff_fallback 1884 | sltu TMP1, TMP1, CARG1 // !(255 < n).
1885 | or AT, AT, TMP0
1886 | or AT, AT, TMP1
1887 | bnez AT, ->fff_fallback
1631 |. li CARG3, 1 1888 |. li CARG3, 1
1632 | cvt.w.d FARG1, FARG1
1633 | addiu CARG2, sp, ARG5_OFS 1889 | addiu CARG2, sp, ARG5_OFS
1634 | sltiu AT, TMP0, 256 1890 | sb CARG1, ARG5
1635 | mfc1 TMP0, FARG1
1636 | beqz AT, ->fff_fallback
1637 |. sw TMP0, ARG5
1638 |->fff_newstr: 1891 |->fff_newstr:
1639 | load_got lj_str_new 1892 | load_got lj_str_new
1640 | sw BASE, L->base 1893 | sw BASE, L->base
@@ -1643,35 +1896,30 @@ static void build_subroutines(BuildCtx *ctx)
1643 |. move CARG1, L 1896 |. move CARG1, L
1644 | // Returns GCstr *. 1897 | // Returns GCstr *.
1645 | lw BASE, L->base 1898 | lw BASE, L->base
1646 | move CARG1, CRET1 1899 |->fff_resstr:
1900 | move SFARG1LO, CRET1
1647 | b ->fff_restv 1901 | b ->fff_restv
1648 |. li CARG3, LJ_TSTR 1902 |. li SFARG1HI, LJ_TSTR
1649 | 1903 |
1650 |.ffunc string_sub 1904 |.ffunc string_sub
1651 | ffgccheck 1905 | ffgccheck
1652 |. nop 1906 |. nop
1653 | addiu AT, NARGS8:RC, -16 1907 | addiu AT, NARGS8:RC, -16
1654 | lw CARG3, 16+HI(BASE) 1908 | lw CARG3, 16+HI(BASE)
1655 | ldc1 f0, 16(BASE)
1656 | lw TMP0, HI(BASE) 1909 | lw TMP0, HI(BASE)
1657 | lw STR:CARG1, LO(BASE) 1910 | lw STR:CARG1, LO(BASE)
1658 | bltz AT, ->fff_fallback 1911 | bltz AT, ->fff_fallback
1659 | lw CARG2, 8+HI(BASE) 1912 |. lw CARG2, 8+HI(BASE)
1660 | ldc1 f2, 8(BASE)
1661 | beqz AT, >1 1913 | beqz AT, >1
1662 |. li CARG4, -1 1914 |. li CARG4, -1
1663 | cvt.w.d f0, f0 1915 | bne CARG3, TISNUM, ->fff_fallback
1664 | sltiu AT, CARG3, LJ_TISNUM 1916 |. lw CARG4, 16+LO(BASE)
1665 | beqz AT, ->fff_fallback
1666 |. mfc1 CARG4, f0
1667 |1: 1917 |1:
1668 | sltiu AT, CARG2, LJ_TISNUM 1918 | bne CARG2, TISNUM, ->fff_fallback
1669 | beqz AT, ->fff_fallback
1670 |. li AT, LJ_TSTR 1919 |. li AT, LJ_TSTR
1671 | cvt.w.d f2, f2
1672 | bne TMP0, AT, ->fff_fallback 1920 | bne TMP0, AT, ->fff_fallback
1673 |. lw CARG2, STR:CARG1->len 1921 |. lw CARG3, 8+LO(BASE)
1674 | mfc1 CARG3, f2 1922 | lw CARG2, STR:CARG1->len
1675 | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end 1923 | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
1676 | slt AT, CARG4, r0 1924 | slt AT, CARG4, r0
1677 | addiu TMP0, CARG2, 1 1925 | addiu TMP0, CARG2, 1
@@ -1693,139 +1941,130 @@ static void build_subroutines(BuildCtx *ctx)
1693 | bgez CARG3, ->fff_newstr 1941 | bgez CARG3, ->fff_newstr
1694 |. addiu CARG3, CARG3, 1 // len++ 1942 |. addiu CARG3, CARG3, 1 // len++
1695 |->fff_emptystr: // Return empty string. 1943 |->fff_emptystr: // Return empty string.
1696 | addiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty) 1944 | addiu STR:SFARG1LO, DISPATCH, DISPATCH_GL(strempty)
1697 | b ->fff_restv 1945 | b ->fff_restv
1698 |. li CARG3, LJ_TSTR 1946 |. li SFARG1HI, LJ_TSTR
1699 |
1700 |.ffunc string_rep // Only handle the 1-char case inline.
1701 | ffgccheck
1702 |. nop
1703 | lw TMP0, HI(BASE)
1704 | addiu AT, NARGS8:RC, -16 // Exactly 2 arguments.
1705 | lw CARG4, 8+HI(BASE)
1706 | lw STR:CARG1, LO(BASE)
1707 | addiu TMP0, TMP0, -LJ_TSTR
1708 | ldc1 f0, 8(BASE)
1709 | or AT, AT, TMP0
1710 | bnez AT, ->fff_fallback
1711 |. sltiu AT, CARG4, LJ_TISNUM
1712 | cvt.w.d f0, f0
1713 | beqz AT, ->fff_fallback
1714 |. lw TMP0, STR:CARG1->len
1715 | mfc1 CARG3, f0
1716 | lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1717 | li AT, 1
1718 | blez CARG3, ->fff_emptystr // Count <= 0?
1719 |. sltu AT, AT, TMP0
1720 | beqz TMP0, ->fff_emptystr // Zero length string?
1721 |. sltu TMP0, TMP1, CARG3
1722 | or AT, AT, TMP0
1723 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1724 | bnez AT, ->fff_fallback // Fallback for > 1-char strings.
1725 |. lbu TMP0, STR:CARG1[1]
1726 | addu TMP2, CARG2, CARG3
1727 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
1728 | addiu TMP2, TMP2, -1
1729 | sltu AT, CARG2, TMP2
1730 | bnez AT, <1
1731 |. sb TMP0, 0(TMP2)
1732 | b ->fff_newstr
1733 |. nop
1734 |
1735 |.ffunc string_reverse
1736 | ffgccheck
1737 |. nop
1738 | lw CARG3, HI(BASE)
1739 | lw STR:CARG1, LO(BASE)
1740 | beqz NARGS8:RC, ->fff_fallback
1741 |. li AT, LJ_TSTR
1742 | bne CARG3, AT, ->fff_fallback
1743 |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1744 | lw CARG3, STR:CARG1->len
1745 | addiu CARG1, STR:CARG1, #STR
1746 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1747 | sltu AT, TMP1, CARG3
1748 | bnez AT, ->fff_fallback
1749 |. addu TMP3, CARG1, CARG3
1750 | addu CARG4, CARG2, CARG3
1751 |1: // Reverse string copy.
1752 | lbu TMP1, 0(CARG1)
1753 | sltu AT, CARG1, TMP3
1754 | beqz AT, ->fff_newstr
1755 |. addiu CARG1, CARG1, 1
1756 | addiu CARG4, CARG4, -1
1757 | b <1
1758 | sb TMP1, 0(CARG4)
1759 | 1947 |
1760 |.macro ffstring_case, name, lo 1948 |.macro ffstring_op, name
1761 | .ffunc name 1949 | .ffunc string_ .. name
1762 | ffgccheck 1950 | ffgccheck
1763 |. nop 1951 |. nop
1764 | lw CARG3, HI(BASE) 1952 | lw CARG3, HI(BASE)
1765 | lw STR:CARG1, LO(BASE) 1953 | lw STR:CARG2, LO(BASE)
1766 | beqz NARGS8:RC, ->fff_fallback 1954 | beqz NARGS8:RC, ->fff_fallback
1767 |. li AT, LJ_TSTR 1955 |. li AT, LJ_TSTR
1768 | bne CARG3, AT, ->fff_fallback 1956 | bne CARG3, AT, ->fff_fallback
1769 |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 1957 |. addiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
1770 | lw CARG3, STR:CARG1->len 1958 | load_got lj_buf_putstr_ .. name
1771 | addiu CARG1, STR:CARG1, #STR 1959 | lw TMP0, SBUF:CARG1->b
1772 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 1960 | sw L, SBUF:CARG1->L
1773 | sltu AT, TMP1, CARG3 1961 | sw BASE, L->base
1774 | bnez AT, ->fff_fallback 1962 | sw TMP0, SBUF:CARG1->p
1775 |. addu TMP3, CARG1, CARG3 1963 | call_intern extern lj_buf_putstr_ .. name
1776 | move CARG4, CARG2 1964 |. sw PC, SAVE_PC
1777 |1: // ASCII case conversion. 1965 | load_got lj_buf_tostr
1778 | lbu TMP1, 0(CARG1) 1966 | call_intern lj_buf_tostr
1779 | sltu AT, CARG1, TMP3 1967 |. move SBUF:CARG1, SBUF:CRET1
1780 | beqz AT, ->fff_newstr 1968 | b ->fff_resstr
1781 |. addiu TMP0, TMP1, -lo 1969 |. lw BASE, L->base
1782 | xori TMP2, TMP1, 0x20
1783 | sltiu AT, TMP0, 26
1784 | movn TMP1, TMP2, AT
1785 | addiu CARG1, CARG1, 1
1786 | sb TMP1, 0(CARG4)
1787 | b <1
1788 |. addiu CARG4, CARG4, 1
1789 |.endmacro 1970 |.endmacro
1790 | 1971 |
1791 |ffstring_case string_lower, 65 1972 |ffstring_op reverse
1792 |ffstring_case string_upper, 97 1973 |ffstring_op lower
1974 |ffstring_op upper
1793 | 1975 |
1794 |//-- Table library ------------------------------------------------------ 1976 |//-- Bit library --------------------------------------------------------
1795 | 1977 |
1796 |.ffunc_1 table_getn 1978 |->vm_tobit_fb:
1797 | li AT, LJ_TTAB 1979 | beqz TMP1, ->fff_fallback
1798 | bne CARG3, AT, ->fff_fallback 1980 |.if FPU
1799 |. load_got lj_tab_len 1981 |. ldc1 FARG1, 0(BASE)
1800 | call_intern lj_tab_len // (GCtab *t) 1982 | add.d FARG1, FARG1, TOBIT
1801 |. nop 1983 | jr ra
1802 | // Returns uint32_t (but less than 2^31). 1984 |. mfc1 CRET1, FARG1
1803 | b ->fff_resi 1985 |.else
1986 |// FP number to bit conversion for soft-float.
1987 |->vm_tobit:
1988 | sll TMP0, SFARG1HI, 1
1989 | lui AT, 0x0020
1990 | addu TMP0, TMP0, AT
1991 | slt AT, TMP0, r0
1992 | movz SFARG1LO, r0, AT
1993 | beqz AT, >2
1994 |. li TMP1, 0x3e0
1995 | not TMP1, TMP1
1996 | sra TMP0, TMP0, 21
1997 | subu TMP0, TMP1, TMP0
1998 | slt AT, TMP0, r0
1999 | bnez AT, >1
2000 |. sll TMP1, SFARG1HI, 11
2001 | lui AT, 0x8000
2002 | or TMP1, TMP1, AT
2003 | srl AT, SFARG1LO, 21
2004 | or TMP1, TMP1, AT
2005 | slt AT, SFARG1HI, r0
2006 | beqz AT, >2
2007 |. srlv SFARG1LO, TMP1, TMP0
2008 | subu SFARG1LO, r0, SFARG1LO
2009 |2:
2010 | jr ra
2011 |. move CRET1, SFARG1LO
2012 |1:
2013 | addiu TMP0, TMP0, 21
2014 | srlv TMP1, SFARG1LO, TMP0
2015 | li AT, 20
2016 | subu TMP0, AT, TMP0
2017 | sll SFARG1LO, SFARG1HI, 12
2018 | sllv AT, SFARG1LO, TMP0
2019 | or SFARG1LO, TMP1, AT
2020 | slt AT, SFARG1HI, r0
2021 | beqz AT, <2
1804 |. nop 2022 |. nop
1805 | 2023 | jr ra
1806 |//-- Bit library -------------------------------------------------------- 2024 |. subu CRET1, r0, SFARG1LO
2025 |.endif
1807 | 2026 |
1808 |.macro .ffunc_bit, name 2027 |.macro .ffunc_bit, name
1809 | .ffunc_n bit_..name 2028 | .ffunc_1 bit_..name
1810 |. add.d FARG1, FARG1, TOBIT 2029 | beq SFARG1HI, TISNUM, >6
1811 | mfc1 CRET1, FARG1 2030 |. move CRET1, SFARG1LO
2031 | bal ->vm_tobit_fb
2032 |. sltu TMP1, SFARG1HI, TISNUM
2033 |6:
1812 |.endmacro 2034 |.endmacro
1813 | 2035 |
1814 |.macro .ffunc_bit_op, name, ins 2036 |.macro .ffunc_bit_op, name, ins
1815 | .ffunc_bit name 2037 | .ffunc_bit name
1816 | addiu TMP1, BASE, 8 2038 | addiu TMP2, BASE, 8
1817 | addu TMP2, BASE, NARGS8:RC 2039 | addu TMP3, BASE, NARGS8:RC
1818 |1: 2040 |1:
1819 | lw CARG4, HI(TMP1) 2041 | lw SFARG1HI, HI(TMP2)
1820 | beq TMP1, TMP2, ->fff_resi 2042 | beq TMP2, TMP3, ->fff_resi
1821 |. ldc1 FARG1, 0(TMP1) 2043 |. lw SFARG1LO, LO(TMP2)
1822 | sltiu AT, CARG4, LJ_TISNUM 2044 |.if FPU
1823 | beqz AT, ->fff_fallback 2045 | bne SFARG1HI, TISNUM, >2
1824 | add.d FARG1, FARG1, TOBIT 2046 |. addiu TMP2, TMP2, 8
1825 | mfc1 CARG2, FARG1
1826 | ins CRET1, CRET1, CARG2
1827 | b <1 2047 | b <1
1828 |. addiu TMP1, TMP1, 8 2048 |. ins CRET1, CRET1, SFARG1LO
2049 |2:
2050 | ldc1 FARG1, -8(TMP2)
2051 | sltu TMP1, SFARG1HI, TISNUM
2052 | beqz TMP1, ->fff_fallback
2053 |. add.d FARG1, FARG1, TOBIT
2054 | mfc1 SFARG1LO, FARG1
2055 | b <1
2056 |. ins CRET1, CRET1, SFARG1LO
2057 |.else
2058 | beq SFARG1HI, TISNUM, >2
2059 |. move CRET2, CRET1
2060 | bal ->vm_tobit_fb
2061 |. sltu TMP1, SFARG1HI, TISNUM
2062 | move SFARG1LO, CRET2
2063 |2:
2064 | ins CRET1, CRET1, SFARG1LO
2065 | b <1
2066 |. addiu TMP2, TMP2, 8
2067 |.endif
1829 |.endmacro 2068 |.endmacro
1830 | 2069 |
1831 |.ffunc_bit_op band, and 2070 |.ffunc_bit_op band, and
@@ -1849,24 +2088,28 @@ static void build_subroutines(BuildCtx *ctx)
1849 |. not CRET1, CRET1 2088 |. not CRET1, CRET1
1850 | 2089 |
1851 |.macro .ffunc_bit_sh, name, ins, shmod 2090 |.macro .ffunc_bit_sh, name, ins, shmod
1852 | .ffunc_nn bit_..name 2091 | .ffunc_2 bit_..name
1853 |. add.d FARG1, FARG1, TOBIT 2092 | beq SFARG1HI, TISNUM, >1
1854 | add.d FARG2, FARG2, TOBIT 2093 |. nop
1855 | mfc1 CARG1, FARG1 2094 | bal ->vm_tobit_fb
1856 | mfc1 CARG2, FARG2 2095 |. sltu TMP1, SFARG1HI, TISNUM
2096 | move SFARG1LO, CRET1
2097 |1:
2098 | bne SFARG2HI, TISNUM, ->fff_fallback
2099 |. nop
1857 |.if shmod == 1 2100 |.if shmod == 1
1858 | li AT, 32 2101 | li AT, 32
1859 | subu TMP0, AT, CARG2 2102 | subu TMP0, AT, SFARG2LO
1860 | sllv CARG2, CARG1, CARG2 2103 | sllv SFARG2LO, SFARG1LO, SFARG2LO
1861 | srlv CARG1, CARG1, TMP0 2104 | srlv SFARG1LO, SFARG1LO, TMP0
1862 |.elif shmod == 2 2105 |.elif shmod == 2
1863 | li AT, 32 2106 | li AT, 32
1864 | subu TMP0, AT, CARG2 2107 | subu TMP0, AT, SFARG2LO
1865 | srlv CARG2, CARG1, CARG2 2108 | srlv SFARG2LO, SFARG1LO, SFARG2LO
1866 | sllv CARG1, CARG1, TMP0 2109 | sllv SFARG1LO, SFARG1LO, TMP0
1867 |.endif 2110 |.endif
1868 | b ->fff_resi 2111 | b ->fff_resi
1869 |. ins CRET1, CARG1, CARG2 2112 |. ins CRET1, SFARG1LO, SFARG2LO
1870 |.endmacro 2113 |.endmacro
1871 | 2114 |
1872 |.ffunc_bit_sh lshift, sllv, 0 2115 |.ffunc_bit_sh lshift, sllv, 0
@@ -1878,9 +2121,11 @@ static void build_subroutines(BuildCtx *ctx)
1878 | 2121 |
1879 |.ffunc_bit tobit 2122 |.ffunc_bit tobit
1880 |->fff_resi: 2123 |->fff_resi:
1881 | mtc1 CRET1, FRET1 2124 | lw PC, FRAME_PC(BASE)
1882 | b ->fff_resn 2125 | addiu RA, BASE, -8
1883 |. cvt.d.w FRET1, FRET1 2126 | sw TISNUM, -8+HI(BASE)
2127 | b ->fff_res1
2128 |. sw CRET1, -8+LO(BASE)
1884 | 2129 |
1885 |//----------------------------------------------------------------------- 2130 |//-----------------------------------------------------------------------
1886 | 2131 |
@@ -2067,19 +2312,96 @@ static void build_subroutines(BuildCtx *ctx)
2067 | jr CRET1 2312 | jr CRET1
2068 |. lw INS, -4(PC) 2313 |. lw INS, -4(PC)
2069 | 2314 |
2315 |->cont_stitch: // Trace stitching.
2316 |.if JIT
2317 | // RA = resultptr, RB = meta base
2318 | lw INS, -4(PC)
2319 | lw TMP2, -24+LO(RB) // Save previous trace.
2320 | decode_RA8a RC, INS
2321 | addiu AT, MULTRES, -8
2322 | decode_RA8b RC
2323 | beqz AT, >2
2324 |. addu RC, BASE, RC // Call base.
2325 |1: // Move results down.
2326 | lw SFRETHI, HI(RA)
2327 | lw SFRETLO, LO(RA)
2328 | addiu AT, AT, -8
2329 | addiu RA, RA, 8
2330 | sw SFRETHI, HI(RC)
2331 | sw SFRETLO, LO(RC)
2332 | bnez AT, <1
2333 |. addiu RC, RC, 8
2334 |2:
2335 | decode_RA8a RA, INS
2336 | decode_RB8a RB, INS
2337 | decode_RA8b RA
2338 | decode_RB8b RB
2339 | addu RA, RA, RB
2340 | addu RA, BASE, RA
2341 |3:
2342 | sltu AT, RC, RA
2343 | bnez AT, >9 // More results wanted?
2344 |. nop
2345 |
2346 | lhu TMP3, TRACE:TMP2->traceno
2347 | lhu RD, TRACE:TMP2->link
2348 | beq RD, TMP3, ->cont_nop // Blacklisted.
2349 |. load_got lj_dispatch_stitch
2350 | bnez RD, =>BC_JLOOP // Jump to stitched trace.
2351 |. sll RD, RD, 3
2352 |
2353 | // Stitch a new trace to the previous trace.
2354 | sw TMP3, DISPATCH_J(exitno)(DISPATCH)
2355 | sw L, DISPATCH_J(L)(DISPATCH)
2356 | sw BASE, L->base
2357 | addiu CARG1, DISPATCH, GG_DISP2J
2358 | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2359 |. move CARG2, PC
2360 | b ->cont_nop
2361 |. lw BASE, L->base
2362 |
2363 |9:
2364 | sw TISNIL, HI(RC)
2365 | b <3
2366 |. addiu RC, RC, 8
2367 |.endif
2368 |
2369 |->vm_profhook: // Dispatch target for profiler hook.
2370#if LJ_HASPROFILE
2371 | load_got lj_dispatch_profile
2372 | sw MULTRES, SAVE_MULTRES
2373 | move CARG2, PC
2374 | sw BASE, L->base
2375 | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2376 |. move CARG1, L
2377 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2378 | addiu PC, PC, -4
2379 | b ->cont_nop
2380 |. lw BASE, L->base
2381#endif
2382 |
2070 |//----------------------------------------------------------------------- 2383 |//-----------------------------------------------------------------------
2071 |//-- Trace exit handler ------------------------------------------------- 2384 |//-- Trace exit handler -------------------------------------------------
2072 |//----------------------------------------------------------------------- 2385 |//-----------------------------------------------------------------------
2073 | 2386 |
2074 |.macro savex_, a, b 2387 |.macro savex_, a, b
2388 |.if FPU
2075 | sdc1 f..a, 16+a*8(sp) 2389 | sdc1 f..a, 16+a*8(sp)
2076 | sw r..a, 16+32*8+a*4(sp) 2390 | sw r..a, 16+32*8+a*4(sp)
2077 | sw r..b, 16+32*8+b*4(sp) 2391 | sw r..b, 16+32*8+b*4(sp)
2392 |.else
2393 | sw r..a, 16+a*4(sp)
2394 | sw r..b, 16+b*4(sp)
2395 |.endif
2078 |.endmacro 2396 |.endmacro
2079 | 2397 |
2080 |->vm_exit_handler: 2398 |->vm_exit_handler:
2081 |.if JIT 2399 |.if JIT
2400 |.if FPU
2082 | addiu sp, sp, -(16+32*8+32*4) 2401 | addiu sp, sp, -(16+32*8+32*4)
2402 |.else
2403 | addiu sp, sp, -(16+32*4)
2404 |.endif
2083 | savex_ 0, 1 2405 | savex_ 0, 1
2084 | savex_ 2, 3 2406 | savex_ 2, 3
2085 | savex_ 4, 5 2407 | savex_ 4, 5
@@ -2094,25 +2416,34 @@ static void build_subroutines(BuildCtx *ctx)
2094 | savex_ 22, 23 2416 | savex_ 22, 23
2095 | savex_ 24, 25 2417 | savex_ 24, 25
2096 | savex_ 26, 27 2418 | savex_ 26, 27
2419 |.if FPU
2097 | sdc1 f28, 16+28*8(sp) 2420 | sdc1 f28, 16+28*8(sp)
2098 | sw r28, 16+32*8+28*4(sp)
2099 | sdc1 f30, 16+30*8(sp) 2421 | sdc1 f30, 16+30*8(sp)
2422 | sw r28, 16+32*8+28*4(sp)
2100 | sw r30, 16+32*8+30*4(sp) 2423 | sw r30, 16+32*8+30*4(sp)
2101 | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP. 2424 | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP.
2425 | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
2426 | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP
2427 |.else
2428 | sw r28, 16+28*4(sp)
2429 | sw r30, 16+30*4(sp)
2430 | sw r0, 16+31*4(sp) // Clear RID_TMP.
2431 | addiu TMP2, sp, 16+32*4 // Recompute original value of sp.
2432 | sw TMP2, 16+29*4(sp) // Store sp in RID_SP
2433 |.endif
2102 | li_vmstate EXIT 2434 | li_vmstate EXIT
2103 | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
2104 | addiu DISPATCH, JGL, -GG_DISP2G-32768 2435 | addiu DISPATCH, JGL, -GG_DISP2G-32768
2105 | lw TMP1, 0(TMP2) // Load exit number. 2436 | lw TMP1, 0(TMP2) // Load exit number.
2106 | st_vmstate 2437 | st_vmstate
2107 | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP. 2438 | lw L, DISPATCH_GL(cur_L)(DISPATCH)
2108 | lw L, DISPATCH_GL(jit_L)(DISPATCH) 2439 | lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
2109 | lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
2110 | load_got lj_trace_exit 2440 | load_got lj_trace_exit
2111 | sw L, DISPATCH_J(L)(DISPATCH) 2441 | sw L, DISPATCH_J(L)(DISPATCH)
2112 | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. 2442 | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number.
2443 | sw BASE, L->base
2113 | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. 2444 | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number.
2114 | addiu CARG1, DISPATCH, GG_DISP2J 2445 | addiu CARG1, DISPATCH, GG_DISP2J
2115 | sw BASE, L->base 2446 | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
2116 | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) 2447 | call_intern lj_trace_exit // (jit_State *J, ExitState *ex)
2117 |. addiu CARG2, sp, 16 2448 |. addiu CARG2, sp, 16
2118 | // Returns MULTRES (unscaled) or negated error code. 2449 | // Returns MULTRES (unscaled) or negated error code.
@@ -2128,19 +2459,21 @@ static void build_subroutines(BuildCtx *ctx)
2128 |.if JIT 2459 |.if JIT
2129 | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. 2460 | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
2130 | lw L, SAVE_L 2461 | lw L, SAVE_L
2131 | addiu DISPATCH, JGL, -GG_DISP2G-32768 2462 | addiu DISPATCH, JGL, -GG_DISP2G-32768
2463 | sw BASE, L->base
2132 |1: 2464 |1:
2133 | bltz CRET1, >3 // Check for error from exit. 2465 | bltz CRET1, >9 // Check for error from exit.
2134 |. lw LFUNC:TMP1, FRAME_FUNC(BASE) 2466 |. lw LFUNC:RB, FRAME_FUNC(BASE)
2135 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2467 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2136 | sll MULTRES, CRET1, 3 2468 | sll MULTRES, CRET1, 3
2137 | li TISNIL, LJ_TNIL 2469 | li TISNIL, LJ_TNIL
2470 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2138 | sw MULTRES, SAVE_MULTRES 2471 | sw MULTRES, SAVE_MULTRES
2139 | mtc1 TMP3, TOBIT 2472 | .FPU mtc1 TMP3, TOBIT
2140 | lw TMP1, LFUNC:TMP1->pc 2473 | lw TMP1, LFUNC:RB->pc
2141 | sw r0, DISPATCH_GL(jit_L)(DISPATCH) 2474 | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
2142 | lw KBASE, PC2PROTO(k)(TMP1) 2475 | lw KBASE, PC2PROTO(k)(TMP1)
2143 | cvt.d.s TOBIT, TOBIT 2476 | .FPU cvt.d.s TOBIT, TOBIT
2144 | // Modified copy of ins_next which handles function header dispatch, too. 2477 | // Modified copy of ins_next which handles function header dispatch, too.
2145 | lw INS, 0(PC) 2478 | lw INS, 0(PC)
2146 | addiu PC, PC, 4 2479 | addiu PC, PC, 4
@@ -2148,7 +2481,7 @@ static void build_subroutines(BuildCtx *ctx)
2148 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) 2481 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
2149 | decode_OP4a TMP1, INS 2482 | decode_OP4a TMP1, INS
2150 | decode_OP4b TMP1 2483 | decode_OP4b TMP1
2151 | sltiu TMP2, TMP1, BC_FUNCF*4 // Function header? 2484 | sltiu TMP2, TMP1, BC_FUNCF*4
2152 | addu TMP0, DISPATCH, TMP1 2485 | addu TMP0, DISPATCH, TMP1
2153 | decode_RD8a RD, INS 2486 | decode_RD8a RD, INS
2154 | lw AT, 0(TMP0) 2487 | lw AT, 0(TMP0)
@@ -2158,11 +2491,27 @@ static void build_subroutines(BuildCtx *ctx)
2158 | jr AT 2491 | jr AT
2159 |. decode_RD8b RD 2492 |. decode_RD8b RD
2160 |2: 2493 |2:
2494 | sltiu TMP2, TMP1, (BC_FUNCC+2)*4 // Fast function?
2495 | bnez TMP2, >3
2496 |. lw TMP1, FRAME_PC(BASE)
2497 | // Check frame below fast function.
2498 | andi TMP0, TMP1, FRAME_TYPE
2499 | bnez TMP0, >3 // Trace stitching continuation?
2500 |. nop
2501 | // Otherwise set KBASE for Lua function below fast function.
2502 | lw TMP2, -4(TMP1)
2503 | decode_RA8a TMP0, TMP2
2504 | decode_RA8b TMP0
2505 | subu TMP1, BASE, TMP0
2506 | lw LFUNC:TMP2, -8+FRAME_FUNC(TMP1)
2507 | lw TMP1, LFUNC:TMP2->pc
2508 | lw KBASE, PC2PROTO(k)(TMP1)
2509 |3:
2161 | addiu RC, MULTRES, -8 2510 | addiu RC, MULTRES, -8
2162 | jr AT 2511 | jr AT
2163 |. addu RA, RA, BASE 2512 |. addu RA, RA, BASE
2164 | 2513 |
2165 |3: // Rethrow error from the right C frame. 2514 |9: // Rethrow error from the right C frame.
2166 | load_got lj_err_throw 2515 | load_got lj_err_throw
2167 | negu CARG2, CRET1 2516 | negu CARG2, CRET1
2168 | call_intern lj_err_throw // (lua_State *L, int errcode) 2517 | call_intern lj_err_throw // (lua_State *L, int errcode)
@@ -2173,8 +2522,9 @@ static void build_subroutines(BuildCtx *ctx)
2173 |//-- Math helper functions ---------------------------------------------- 2522 |//-- Math helper functions ----------------------------------------------
2174 |//----------------------------------------------------------------------- 2523 |//-----------------------------------------------------------------------
2175 | 2524 |
2525 |// Hard-float round to integer.
2176 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. 2526 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
2177 |.macro vm_round, func 2527 |.macro vm_round_hf, func
2178 | lui TMP0, 0x4330 // Hiword of 2^52 (double). 2528 | lui TMP0, 0x4330 // Hiword of 2^52 (double).
2179 | mtc1 r0, f4 2529 | mtc1 r0, f4
2180 | mtc1 TMP0, f5 2530 | mtc1 TMP0, f5
@@ -2216,6 +2566,12 @@ static void build_subroutines(BuildCtx *ctx)
2216 |. mov.d FRET1, FARG1 2566 |. mov.d FRET1, FARG1
2217 |.endmacro 2567 |.endmacro
2218 | 2568 |
2569 |.macro vm_round, func
2570 |.if FPU
2571 | vm_round_hf, func
2572 |.endif
2573 |.endmacro
2574 |
2219 |->vm_floor: 2575 |->vm_floor:
2220 | vm_round floor 2576 | vm_round floor
2221 |->vm_ceil: 2577 |->vm_ceil:
@@ -2225,6 +2581,178 @@ static void build_subroutines(BuildCtx *ctx)
2225 | vm_round trunc 2581 | vm_round trunc
2226 |.endif 2582 |.endif
2227 | 2583 |
2584 |// Soft-float integer to number conversion.
2585 |.macro sfi2d, AHI, ALO
2586 |.if not FPU
2587 | beqz ALO, >9 // Handle zero first.
2588 |. sra TMP0, ALO, 31
2589 | xor TMP1, ALO, TMP0
2590 | subu TMP1, TMP1, TMP0 // Absolute value in TMP1.
2591 | clz AHI, TMP1
2592 | andi TMP0, TMP0, 0x800 // Mask sign bit.
2593 | li AT, 0x3ff+31-1
2594 | sllv TMP1, TMP1, AHI // Align mantissa left with leading 1.
2595 | subu AHI, AT, AHI // Exponent - 1 in AHI.
2596 | sll ALO, TMP1, 21
2597 | or AHI, AHI, TMP0 // Sign | Exponent.
2598 | srl TMP1, TMP1, 11
2599 | sll AHI, AHI, 20 // Align left.
2600 | jr ra
2601 |. addu AHI, AHI, TMP1 // Add mantissa, increment exponent.
2602 |9:
2603 | jr ra
2604 |. li AHI, 0
2605 |.endif
2606 |.endmacro
2607 |
2608 |// Input SFARG1LO. Output: SFARG1*. Temporaries: AT, TMP0, TMP1.
2609 |->vm_sfi2d_1:
2610 | sfi2d SFARG1HI, SFARG1LO
2611 |
2612 |// Input SFARG2LO. Output: SFARG2*. Temporaries: AT, TMP0, TMP1.
2613 |->vm_sfi2d_2:
2614 | sfi2d SFARG2HI, SFARG2LO
2615 |
2616 |// Soft-float comparison. Equivalent to c.eq.d.
2617 |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2618 |->vm_sfcmpeq:
2619 |.if not FPU
2620 | sll AT, SFARG1HI, 1
2621 | sll TMP0, SFARG2HI, 1
2622 | or CRET1, SFARG1LO, SFARG2LO
2623 | or TMP1, AT, TMP0
2624 | or TMP1, TMP1, CRET1
2625 | beqz TMP1, >8 // Both args +-0: return 1.
2626 |. sltu CRET1, r0, SFARG1LO
2627 | lui TMP1, 0xffe0
2628 | addu AT, AT, CRET1
2629 | sltu CRET1, r0, SFARG2LO
2630 | sltu AT, TMP1, AT
2631 | addu TMP0, TMP0, CRET1
2632 | sltu TMP0, TMP1, TMP0
2633 | or TMP1, AT, TMP0
2634 | bnez TMP1, >9 // Either arg is NaN: return 0;
2635 |. xor TMP0, SFARG1HI, SFARG2HI
2636 | xor TMP1, SFARG1LO, SFARG2LO
2637 | or AT, TMP0, TMP1
2638 | jr ra
2639 |. sltiu CRET1, AT, 1 // Same values: return 1.
2640 |8:
2641 | jr ra
2642 |. li CRET1, 1
2643 |9:
2644 | jr ra
2645 |. li CRET1, 0
2646 |.endif
2647 |
2648 |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d.
2649 |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2.
2650 |->vm_sfcmpult:
2651 |.if not FPU
2652 | b >1
2653 |. li CRET2, 1
2654 |.endif
2655 |
2656 |->vm_sfcmpolt:
2657 |.if not FPU
2658 | li CRET2, 0
2659 |1:
2660 | sll AT, SFARG1HI, 1
2661 | sll TMP0, SFARG2HI, 1
2662 | or CRET1, SFARG1LO, SFARG2LO
2663 | or TMP1, AT, TMP0
2664 | or TMP1, TMP1, CRET1
2665 | beqz TMP1, >8 // Both args +-0: return 0.
2666 |. sltu CRET1, r0, SFARG1LO
2667 | lui TMP1, 0xffe0
2668 | addu AT, AT, CRET1
2669 | sltu CRET1, r0, SFARG2LO
2670 | sltu AT, TMP1, AT
2671 | addu TMP0, TMP0, CRET1
2672 | sltu TMP0, TMP1, TMP0
2673 | or TMP1, AT, TMP0
2674 | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
2675 |. and AT, SFARG1HI, SFARG2HI
2676 | bltz AT, >5 // Both args negative?
2677 |. nop
2678 | beq SFARG1HI, SFARG2HI, >8
2679 |. sltu CRET1, SFARG1LO, SFARG2LO
2680 | jr ra
2681 |. slt CRET1, SFARG1HI, SFARG2HI
2682 |5: // Swap conditions if both operands are negative.
2683 | beq SFARG1HI, SFARG2HI, >8
2684 |. sltu CRET1, SFARG2LO, SFARG1LO
2685 | jr ra
2686 |. slt CRET1, SFARG2HI, SFARG1HI
2687 |8:
2688 | jr ra
2689 |. nop
2690 |9:
2691 | jr ra
2692 |. move CRET1, CRET2
2693 |.endif
2694 |
2695 |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a.
2696 |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2697 |->vm_sfcmpolex:
2698 |.if not FPU
2699 | sll AT, SFARG1HI, 1
2700 | sll TMP0, SFARG2HI, 1
2701 | or CRET1, SFARG1LO, SFARG2LO
2702 | or TMP1, AT, TMP0
2703 | or TMP1, TMP1, CRET1
2704 | beqz TMP1, >8 // Both args +-0: return 1.
2705 |. sltu CRET1, r0, SFARG1LO
2706 | lui TMP1, 0xffe0
2707 | addu AT, AT, CRET1
2708 | sltu CRET1, r0, SFARG2LO
2709 | sltu AT, TMP1, AT
2710 | addu TMP0, TMP0, CRET1
2711 | sltu TMP0, TMP1, TMP0
2712 | or TMP1, AT, TMP0
2713 | bnez TMP1, >9 // Either arg is NaN: return 0;
2714 |. and AT, SFARG1HI, SFARG2HI
2715 | xor AT, AT, TMP3
2716 | bltz AT, >5 // Both args negative?
2717 |. nop
2718 | beq SFARG1HI, SFARG2HI, >6
2719 |. sltu CRET1, SFARG2LO, SFARG1LO
2720 | jr ra
2721 |. slt CRET1, SFARG2HI, SFARG1HI
2722 |5: // Swap conditions if both operands are negative.
2723 | beq SFARG1HI, SFARG2HI, >6
2724 |. sltu CRET1, SFARG1LO, SFARG2LO
2725 | slt CRET1, SFARG1HI, SFARG2HI
2726 |6:
2727 | jr ra
2728 |. nop
2729 |8:
2730 | jr ra
2731 |. li CRET1, 1
2732 |9:
2733 | jr ra
2734 |. li CRET1, 0
2735 |.endif
2736 |
2737 |.macro sfmin_max, name, intins
2738 |->vm_sf .. name:
2739 |.if JIT and not FPU
2740 | move TMP2, ra
2741 | bal ->vm_sfcmpolt
2742 |. nop
2743 | move TMP0, CRET1
2744 | move SFRETHI, SFARG1HI
2745 | move SFRETLO, SFARG1LO
2746 | move ra, TMP2
2747 | intins SFRETHI, SFARG2HI, TMP0
2748 | jr ra
2749 |. intins SFRETLO, SFARG2LO, TMP0
2750 |.endif
2751 |.endmacro
2752 |
2753 | sfmin_max min, movz
2754 | sfmin_max max, movn
2755 |
2228 |//----------------------------------------------------------------------- 2756 |//-----------------------------------------------------------------------
2229 |//-- Miscellaneous functions -------------------------------------------- 2757 |//-- Miscellaneous functions --------------------------------------------
2230 |//----------------------------------------------------------------------- 2758 |//-----------------------------------------------------------------------
@@ -2244,10 +2772,10 @@ static void build_subroutines(BuildCtx *ctx)
2244 | sw r1, CTSTATE->cb.slot 2772 | sw r1, CTSTATE->cb.slot
2245 | sw CARG1, CTSTATE->cb.gpr[0] 2773 | sw CARG1, CTSTATE->cb.gpr[0]
2246 | sw CARG2, CTSTATE->cb.gpr[1] 2774 | sw CARG2, CTSTATE->cb.gpr[1]
2247 | sdc1 FARG1, CTSTATE->cb.fpr[0] 2775 | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0]
2248 | sw CARG3, CTSTATE->cb.gpr[2] 2776 | sw CARG3, CTSTATE->cb.gpr[2]
2249 | sw CARG4, CTSTATE->cb.gpr[3] 2777 | sw CARG4, CTSTATE->cb.gpr[3]
2250 | sdc1 FARG2, CTSTATE->cb.fpr[1] 2778 | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1]
2251 | addiu TMP0, sp, CFRAME_SPACE+16 2779 | addiu TMP0, sp, CFRAME_SPACE+16
2252 | sw TMP0, CTSTATE->cb.stack 2780 | sw TMP0, CTSTATE->cb.stack
2253 | sw r0, SAVE_PC // Any value outside of bytecode is ok. 2781 | sw r0, SAVE_PC // Any value outside of bytecode is ok.
@@ -2257,15 +2785,16 @@ static void build_subroutines(BuildCtx *ctx)
2257 | // Returns lua_State *. 2785 | // Returns lua_State *.
2258 | lw BASE, L:CRET1->base 2786 | lw BASE, L:CRET1->base
2259 | lw RC, L:CRET1->top 2787 | lw RC, L:CRET1->top
2788 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2260 | move L, CRET1 2789 | move L, CRET1
2261 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2790 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2262 | lw LFUNC:RB, FRAME_FUNC(BASE) 2791 | lw LFUNC:RB, FRAME_FUNC(BASE)
2263 | mtc1 TMP3, TOBIT 2792 | .FPU mtc1 TMP3, TOBIT
2264 | li_vmstate INTERP 2793 | li_vmstate INTERP
2265 | li TISNIL, LJ_TNIL 2794 | li TISNIL, LJ_TNIL
2266 | subu RC, RC, BASE 2795 | subu RC, RC, BASE
2267 | st_vmstate 2796 | st_vmstate
2268 | cvt.d.s TOBIT, TOBIT 2797 | .FPU cvt.d.s TOBIT, TOBIT
2269 | ins_callt 2798 | ins_callt
2270 |.endif 2799 |.endif
2271 | 2800 |
@@ -2279,11 +2808,11 @@ static void build_subroutines(BuildCtx *ctx)
2279 | move CARG2, RA 2808 | move CARG2, RA
2280 | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) 2809 | call_intern lj_ccallback_leave // (CTState *cts, TValue *o)
2281 |. move CARG1, CTSTATE 2810 |. move CARG1, CTSTATE
2811 | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0]
2282 | lw CRET1, CTSTATE->cb.gpr[0] 2812 | lw CRET1, CTSTATE->cb.gpr[0]
2283 | ldc1 FRET1, CTSTATE->cb.fpr[0] 2813 | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1]
2284 | lw CRET2, CTSTATE->cb.gpr[1]
2285 | b ->vm_leave_unw 2814 | b ->vm_leave_unw
2286 |. ldc1 FRET2, CTSTATE->cb.fpr[1] 2815 |. lw CRET2, CTSTATE->cb.gpr[1]
2287 |.endif 2816 |.endif
2288 | 2817 |
2289 |->vm_ffi_call: // Call C function via FFI. 2818 |->vm_ffi_call: // Call C function via FFI.
@@ -2315,8 +2844,8 @@ static void build_subroutines(BuildCtx *ctx)
2315 | lw CARG2, CCSTATE->gpr[1] 2844 | lw CARG2, CCSTATE->gpr[1]
2316 | lw CARG3, CCSTATE->gpr[2] 2845 | lw CARG3, CCSTATE->gpr[2]
2317 | lw CARG4, CCSTATE->gpr[3] 2846 | lw CARG4, CCSTATE->gpr[3]
2318 | ldc1 FARG1, CCSTATE->fpr[0] 2847 | .FPU ldc1 FARG1, CCSTATE->fpr[0]
2319 | ldc1 FARG2, CCSTATE->fpr[1] 2848 | .FPU ldc1 FARG2, CCSTATE->fpr[1]
2320 | jalr CFUNCADDR 2849 | jalr CFUNCADDR
2321 |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. 2850 |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
2322 | lw CCSTATE:TMP1, -12(r16) 2851 | lw CCSTATE:TMP1, -12(r16)
@@ -2324,8 +2853,13 @@ static void build_subroutines(BuildCtx *ctx)
2324 | lw ra, -4(r16) 2853 | lw ra, -4(r16)
2325 | sw CRET1, CCSTATE:TMP1->gpr[0] 2854 | sw CRET1, CCSTATE:TMP1->gpr[0]
2326 | sw CRET2, CCSTATE:TMP1->gpr[1] 2855 | sw CRET2, CCSTATE:TMP1->gpr[1]
2856 |.if FPU
2327 | sdc1 FRET1, CCSTATE:TMP1->fpr[0] 2857 | sdc1 FRET1, CCSTATE:TMP1->fpr[0]
2328 | sdc1 FRET2, CCSTATE:TMP1->fpr[1] 2858 | sdc1 FRET2, CCSTATE:TMP1->fpr[1]
2859 |.else
2860 | sw CARG1, CCSTATE:TMP1->gpr[2] // Soft-float: complex double .im part.
2861 | sw CARG2, CCSTATE:TMP1->gpr[3]
2862 |.endif
2329 | move sp, r16 2863 | move sp, r16
2330 | jr ra 2864 | jr ra
2331 |. move r16, TMP2 2865 |. move r16, TMP2
@@ -2349,82 +2883,143 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2349 2883
2350 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 2884 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2351 | // RA = src1*8, RD = src2*8, JMP with RD = target 2885 | // RA = src1*8, RD = src2*8, JMP with RD = target
2352 | addu CARG2, BASE, RA 2886 |.macro bc_comp, FRA, FRD, RAHI, RALO, RDHI, RDLO, movop, fmovop, fcomp, sfcomp
2353 | addu CARG3, BASE, RD 2887 | addu RA, BASE, RA
2354 | lw TMP0, HI(CARG2) 2888 | addu RD, BASE, RD
2355 | lw TMP1, HI(CARG3) 2889 | lw RAHI, HI(RA)
2356 | ldc1 f0, 0(CARG2) 2890 | lw RDHI, HI(RD)
2357 | ldc1 f2, 0(CARG3)
2358 | sltiu TMP0, TMP0, LJ_TISNUM
2359 | sltiu TMP1, TMP1, LJ_TISNUM
2360 | lhu TMP2, OFS_RD(PC) 2891 | lhu TMP2, OFS_RD(PC)
2361 | and TMP0, TMP0, TMP1
2362 | addiu PC, PC, 4 2892 | addiu PC, PC, 4
2363 | beqz TMP0, ->vmeta_comp 2893 | bne RAHI, TISNUM, >2
2364 |. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) 2894 |. lw RALO, LO(RA)
2365 | decode_RD4b TMP2 2895 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2366 | addu TMP2, TMP2, TMP1 2896 | lw RDLO, LO(RD)
2367 if (op == BC_ISLT || op == BC_ISGE) { 2897 | bne RDHI, TISNUM, >5
2368 | c.olt.d f0, f2 2898 |. decode_RD4b TMP2
2369 } else { 2899 | slt AT, SFARG1LO, SFARG2LO
2370 | c.ole.d f0, f2 2900 | addu TMP2, TMP2, TMP3
2371 } 2901 | movop TMP2, r0, AT
2372 if (op == BC_ISLT || op == BC_ISLE) {
2373 | movf TMP2, r0
2374 } else {
2375 | movt TMP2, r0
2376 }
2377 | addu PC, PC, TMP2
2378 |1: 2902 |1:
2903 | addu PC, PC, TMP2
2379 | ins_next 2904 | ins_next
2905 |
2906 |2: // RA is not an integer.
2907 | sltiu AT, RAHI, LJ_TISNUM
2908 | beqz AT, ->vmeta_comp
2909 |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2910 | sltiu AT, RDHI, LJ_TISNUM
2911 |.if FPU
2912 | ldc1 FRA, 0(RA)
2913 | ldc1 FRD, 0(RD)
2914 |.else
2915 | lw RDLO, LO(RD)
2916 |.endif
2917 | beqz AT, >4
2918 |. decode_RD4b TMP2
2919 |3: // RA and RD are both numbers.
2920 |.if FPU
2921 | fcomp f20, f22
2922 | addu TMP2, TMP2, TMP3
2923 | b <1
2924 |. fmovop TMP2, r0
2925 |.else
2926 | bal sfcomp
2927 |. addu TMP2, TMP2, TMP3
2928 | b <1
2929 |. movop TMP2, r0, CRET1
2930 |.endif
2931 |
2932 |4: // RA is a number, RD is not a number.
2933 | bne RDHI, TISNUM, ->vmeta_comp
2934 | // RA is a number, RD is an integer. Convert RD to a number.
2935 |.if FPU
2936 |. lwc1 FRD, LO(RD)
2937 | b <3
2938 |. cvt.d.w FRD, FRD
2939 |.else
2940 |. nop
2941 |.if "RDHI" == "SFARG1HI"
2942 | bal ->vm_sfi2d_1
2943 |.else
2944 | bal ->vm_sfi2d_2
2945 |.endif
2946 |. nop
2947 | b <3
2948 |. nop
2949 |.endif
2950 |
2951 |5: // RA is an integer, RD is not an integer
2952 | sltiu AT, RDHI, LJ_TISNUM
2953 | beqz AT, ->vmeta_comp
2954 | // RA is an integer, RD is a number. Convert RA to a number.
2955 |.if FPU
2956 |. mtc1 RALO, FRA
2957 | ldc1 FRD, 0(RD)
2958 | b <3
2959 | cvt.d.w FRA, FRA
2960 |.else
2961 |. nop
2962 |.if "RAHI" == "SFARG1HI"
2963 | bal ->vm_sfi2d_1
2964 |.else
2965 | bal ->vm_sfi2d_2
2966 |.endif
2967 |. nop
2968 | b <3
2969 |. nop
2970 |.endif
2971 |.endmacro
2972 |
2973 if (op == BC_ISLT) {
2974 | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movz, movf, c.olt.d, ->vm_sfcmpolt
2975 } else if (op == BC_ISGE) {
2976 | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movn, movt, c.olt.d, ->vm_sfcmpolt
2977 } else if (op == BC_ISLE) {
2978 | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movn, movt, c.ult.d, ->vm_sfcmpult
2979 } else {
2980 | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movz, movf, c.ult.d, ->vm_sfcmpult
2981 }
2380 break; 2982 break;
2381 2983
2382 case BC_ISEQV: case BC_ISNEV: 2984 case BC_ISEQV: case BC_ISNEV:
2383 vk = op == BC_ISEQV; 2985 vk = op == BC_ISEQV;
2384 | // RA = src1*8, RD = src2*8, JMP with RD = target 2986 | // RA = src1*8, RD = src2*8, JMP with RD = target
2385 | addu RA, BASE, RA 2987 | addu RA, BASE, RA
2386 | addiu PC, PC, 4 2988 | addiu PC, PC, 4
2387 | lw TMP0, HI(RA)
2388 | ldc1 f0, 0(RA)
2389 | addu RD, BASE, RD 2989 | addu RD, BASE, RD
2990 | lw SFARG1HI, HI(RA)
2390 | lhu TMP2, -4+OFS_RD(PC) 2991 | lhu TMP2, -4+OFS_RD(PC)
2391 | lw TMP1, HI(RD) 2992 | lw SFARG2HI, HI(RD)
2392 | ldc1 f2, 0(RD)
2393 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 2993 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2394 | sltiu AT, TMP0, LJ_TISNUM 2994 | sltu AT, TISNUM, SFARG1HI
2395 | sltiu CARG1, TMP1, LJ_TISNUM 2995 | sltu TMP0, TISNUM, SFARG2HI
2396 | decode_RD4b TMP2 2996 | or AT, AT, TMP0
2397 | and AT, AT, CARG1
2398 | beqz AT, >5
2399 |. addu TMP2, TMP2, TMP3
2400 | c.eq.d f0, f2
2401 if (vk) { 2997 if (vk) {
2402 | movf TMP2, r0 2998 | beqz AT, ->BC_ISEQN_Z
2403 } else { 2999 } else {
2404 | movt TMP2, r0 3000 | beqz AT, ->BC_ISNEN_Z
2405 } 3001 }
2406 |1: 3002 |. decode_RD4b TMP2
2407 | addu PC, PC, TMP2 3003 | // Either or both types are not numbers.
2408 | ins_next 3004 | lw SFARG1LO, LO(RA)
2409 |5: // Either or both types are not numbers. 3005 | lw SFARG2LO, LO(RD)
2410 | lw CARG2, LO(RA) 3006 | addu TMP2, TMP2, TMP3
2411 | lw CARG3, LO(RD)
2412 |.if FFI 3007 |.if FFI
2413 | li TMP3, LJ_TCDATA 3008 | li TMP3, LJ_TCDATA
2414 | beq TMP0, TMP3, ->vmeta_equal_cd 3009 | beq SFARG1HI, TMP3, ->vmeta_equal_cd
2415 |.endif 3010 |.endif
2416 |. sltiu AT, TMP0, LJ_TISPRI // Not a primitive? 3011 |. sltiu AT, SFARG1HI, LJ_TISPRI // Not a primitive?
2417 |.if FFI 3012 |.if FFI
2418 | beq TMP1, TMP3, ->vmeta_equal_cd 3013 | beq SFARG2HI, TMP3, ->vmeta_equal_cd
2419 |.endif 3014 |.endif
2420 |. xor TMP3, CARG2, CARG3 // Same tv? 3015 |. xor TMP3, SFARG1LO, SFARG2LO // Same tv?
2421 | xor TMP1, TMP1, TMP0 // Same type? 3016 | xor SFARG2HI, SFARG2HI, SFARG1HI // Same type?
2422 | sltiu CARG1, TMP0, LJ_TISTABUD+1 // Table or userdata? 3017 | sltiu TMP0, SFARG1HI, LJ_TISTABUD+1 // Table or userdata?
2423 | movz TMP3, r0, AT // Ignore tv if primitive. 3018 | movz TMP3, r0, AT // Ignore tv if primitive.
2424 | movn CARG1, r0, TMP1 // Tab/ud and same type? 3019 | movn TMP0, r0, SFARG2HI // Tab/ud and same type?
2425 | or AT, TMP1, TMP3 // Same type && (pri||same tv). 3020 | or AT, SFARG2HI, TMP3 // Same type && (pri||same tv).
2426 | movz CARG1, r0, AT 3021 | movz TMP0, r0, AT
2427 | beqz CARG1, <1 // Done if not tab/ud or not same type or same tv. 3022 | beqz TMP0, >1 // Done if not tab/ud or not same type or same tv.
2428 if (vk) { 3023 if (vk) {
2429 |. movn TMP2, r0, AT 3024 |. movn TMP2, r0, AT
2430 } else { 3025 } else {
@@ -2432,15 +3027,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2432 } 3027 }
2433 | // Different tables or userdatas. Need to check __eq metamethod. 3028 | // Different tables or userdatas. Need to check __eq metamethod.
2434 | // Field metatable must be at same offset for GCtab and GCudata! 3029 | // Field metatable must be at same offset for GCtab and GCudata!
2435 | lw TAB:TMP1, TAB:CARG2->metatable 3030 | lw TAB:TMP1, TAB:SFARG1LO->metatable
2436 | beqz TAB:TMP1, <1 // No metatable? 3031 | beqz TAB:TMP1, >1 // No metatable?
2437 |. nop 3032 |. nop
2438 | lbu TMP1, TAB:TMP1->nomm 3033 | lbu TMP1, TAB:TMP1->nomm
2439 | andi TMP1, TMP1, 1<<MM_eq 3034 | andi TMP1, TMP1, 1<<MM_eq
2440 | bnez TMP1, <1 // Or 'no __eq' flag set? 3035 | bnez TMP1, >1 // Or 'no __eq' flag set?
2441 |. nop 3036 |. nop
2442 | b ->vmeta_equal // Handle __eq metamethod. 3037 | b ->vmeta_equal // Handle __eq metamethod.
2443 |. li CARG4, 1-vk // ne = 0 or 1. 3038 |. li TMP0, 1-vk // ne = 0 or 1.
3039 |1:
3040 | addu PC, PC, TMP2
3041 | ins_next
2444 break; 3042 break;
2445 3043
2446 case BC_ISEQS: case BC_ISNES: 3044 case BC_ISEQS: case BC_ISNES:
@@ -2477,38 +3075,124 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2477 vk = op == BC_ISEQN; 3075 vk = op == BC_ISEQN;
2478 | // RA = src*8, RD = num_const*8, JMP with RD = target 3076 | // RA = src*8, RD = num_const*8, JMP with RD = target
2479 | addu RA, BASE, RA 3077 | addu RA, BASE, RA
2480 | addiu PC, PC, 4 3078 | addu RD, KBASE, RD
2481 | lw TMP0, HI(RA) 3079 | lw SFARG1HI, HI(RA)
2482 | ldc1 f0, 0(RA) 3080 | lw SFARG2HI, HI(RD)
2483 | addu RD, KBASE, RD 3081 | lhu TMP2, OFS_RD(PC)
2484 | lhu TMP2, -4+OFS_RD(PC) 3082 | addiu PC, PC, 4
2485 | ldc1 f2, 0(RD)
2486 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3083 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2487 | sltiu AT, TMP0, LJ_TISNUM
2488 | decode_RD4b TMP2 3084 | decode_RD4b TMP2
2489 |.if FFI
2490 | beqz AT, >5
2491 |.else
2492 | beqz AT, >1
2493 |.endif
2494 |. addu TMP2, TMP2, TMP3
2495 | c.eq.d f0, f2
2496 if (vk) { 3085 if (vk) {
2497 | movf TMP2, r0 3086 |->BC_ISEQN_Z:
2498 | addu PC, PC, TMP2 3087 } else {
3088 |->BC_ISNEN_Z:
3089 }
3090 | bne SFARG1HI, TISNUM, >3
3091 |. lw SFARG1LO, LO(RA)
3092 | lw SFARG2LO, LO(RD)
3093 | addu TMP2, TMP2, TMP3
3094 | bne SFARG2HI, TISNUM, >6
3095 |. xor AT, SFARG1LO, SFARG2LO
3096 if (vk) {
3097 | movn TMP2, r0, AT
2499 |1: 3098 |1:
3099 | addu PC, PC, TMP2
3100 |2:
2500 } else { 3101 } else {
2501 | movt TMP2, r0 3102 | movz TMP2, r0, AT
2502 |1: 3103 |1:
3104 |2:
2503 | addu PC, PC, TMP2 3105 | addu PC, PC, TMP2
2504 } 3106 }
2505 | ins_next 3107 | ins_next
3108 |
3109 |3: // RA is not an integer.
3110 | sltiu AT, SFARG1HI, LJ_TISNUM
2506 |.if FFI 3111 |.if FFI
2507 |5: 3112 | beqz AT, >8
2508 | li AT, LJ_TCDATA 3113 |.else
2509 | beq TMP0, AT, ->vmeta_equal_cd 3114 | beqz AT, <2
3115 |.endif
3116 |. addu TMP2, TMP2, TMP3
3117 | sltiu AT, SFARG2HI, LJ_TISNUM
3118 |.if FPU
3119 | ldc1 f20, 0(RA)
3120 | ldc1 f22, 0(RD)
3121 |.endif
3122 | beqz AT, >5
3123 |. lw SFARG2LO, LO(RD)
3124 |4: // RA and RD are both numbers.
3125 |.if FPU
3126 | c.eq.d f20, f22
3127 | b <1
3128 if (vk) {
3129 |. movf TMP2, r0
3130 } else {
3131 |. movt TMP2, r0
3132 }
3133 |.else
3134 | bal ->vm_sfcmpeq
2510 |. nop 3135 |. nop
2511 | b <1 3136 | b <1
3137 if (vk) {
3138 |. movz TMP2, r0, CRET1
3139 } else {
3140 |. movn TMP2, r0, CRET1
3141 }
3142 |.endif
3143 |
3144 |5: // RA is a number, RD is not a number.
3145 |.if FFI
3146 | bne SFARG2HI, TISNUM, >9
3147 |.else
3148 | bne SFARG2HI, TISNUM, <2
3149 |.endif
3150 | // RA is a number, RD is an integer. Convert RD to a number.
3151 |.if FPU
3152 |. lwc1 f22, LO(RD)
3153 | b <4
3154 |. cvt.d.w f22, f22
3155 |.else
3156 |. nop
3157 | bal ->vm_sfi2d_2
3158 |. nop
3159 | b <4
3160 |. nop
3161 |.endif
3162 |
3163 |6: // RA is an integer, RD is not an integer
3164 | sltiu AT, SFARG2HI, LJ_TISNUM
3165 |.if FFI
3166 | beqz AT, >9
3167 |.else
3168 | beqz AT, <2
3169 |.endif
3170 | // RA is an integer, RD is a number. Convert RA to a number.
3171 |.if FPU
3172 |. mtc1 SFARG1LO, f20
3173 | ldc1 f22, 0(RD)
3174 | b <4
3175 | cvt.d.w f20, f20
3176 |.else
3177 |. nop
3178 | bal ->vm_sfi2d_1
3179 |. nop
3180 | b <4
3181 |. nop
3182 |.endif
3183 |
3184 |.if FFI
3185 |8:
3186 | li AT, LJ_TCDATA
3187 | bne SFARG1HI, AT, <2
3188 |. nop
3189 | b ->vmeta_equal_cd
3190 |. nop
3191 |9:
3192 | li AT, LJ_TCDATA
3193 | bne SFARG2HI, AT, <2
3194 |. nop
3195 | b ->vmeta_equal_cd
2512 |. nop 3196 |. nop
2513 |.endif 3197 |.endif
2514 break; 3198 break;
@@ -2560,7 +3244,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2560 | addu PC, PC, TMP2 3244 | addu PC, PC, TMP2
2561 } else { 3245 } else {
2562 | sltiu TMP0, TMP0, LJ_TISTRUECOND 3246 | sltiu TMP0, TMP0, LJ_TISTRUECOND
2563 | ldc1 f0, 0(RD) 3247 | lw SFRETHI, HI(RD)
3248 | lw SFRETLO, LO(RD)
2564 if (op == BC_ISTC) { 3249 if (op == BC_ISTC) {
2565 | beqz TMP0, >1 3250 | beqz TMP0, >1
2566 } else { 3251 } else {
@@ -2570,22 +3255,45 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2570 | decode_RD4b TMP2 3255 | decode_RD4b TMP2
2571 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3256 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2572 | addu TMP2, TMP2, TMP3 3257 | addu TMP2, TMP2, TMP3
2573 | sdc1 f0, 0(RA) 3258 | sw SFRETHI, HI(RA)
3259 | sw SFRETLO, LO(RA)
2574 | addu PC, PC, TMP2 3260 | addu PC, PC, TMP2
2575 |1: 3261 |1:
2576 } 3262 }
2577 | ins_next 3263 | ins_next
2578 break; 3264 break;
2579 3265
3266 case BC_ISTYPE:
3267 | // RA = src*8, RD = -type*8
3268 | addu TMP2, BASE, RA
3269 | srl TMP1, RD, 3
3270 | lw TMP0, HI(TMP2)
3271 | ins_next1
3272 | addu AT, TMP0, TMP1
3273 | bnez AT, ->vmeta_istype
3274 |. ins_next2
3275 break;
3276 case BC_ISNUM:
3277 | // RA = src*8, RD = -(TISNUM-1)*8
3278 | addu TMP2, BASE, RA
3279 | lw TMP0, HI(TMP2)
3280 | ins_next1
3281 | sltiu AT, TMP0, LJ_TISNUM
3282 | beqz AT, ->vmeta_istype
3283 |. ins_next2
3284 break;
3285
2580 /* -- Unary ops --------------------------------------------------------- */ 3286 /* -- Unary ops --------------------------------------------------------- */
2581 3287
2582 case BC_MOV: 3288 case BC_MOV:
2583 | // RA = dst*8, RD = src*8 3289 | // RA = dst*8, RD = src*8
2584 | addu RD, BASE, RD 3290 | addu RD, BASE, RD
2585 | addu RA, BASE, RA 3291 | addu RA, BASE, RA
2586 | ldc1 f0, 0(RD) 3292 | lw SFRETHI, HI(RD)
3293 | lw SFRETLO, LO(RD)
2587 | ins_next1 3294 | ins_next1
2588 | sdc1 f0, 0(RA) 3295 | sw SFRETHI, HI(RA)
3296 | sw SFRETLO, LO(RA)
2589 | ins_next2 3297 | ins_next2
2590 break; 3298 break;
2591 case BC_NOT: 3299 case BC_NOT:
@@ -2602,16 +3310,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2602 break; 3310 break;
2603 case BC_UNM: 3311 case BC_UNM:
2604 | // RA = dst*8, RD = src*8 3312 | // RA = dst*8, RD = src*8
2605 | addu CARG3, BASE, RD 3313 | addu RB, BASE, RD
3314 | lw SFARG1HI, HI(RB)
2606 | addu RA, BASE, RA 3315 | addu RA, BASE, RA
2607 | lw TMP0, HI(CARG3) 3316 | bne SFARG1HI, TISNUM, >2
2608 | ldc1 f0, 0(CARG3) 3317 |. lw SFARG1LO, LO(RB)
2609 | sltiu AT, TMP0, LJ_TISNUM 3318 | lui TMP1, 0x8000
2610 | beqz AT, ->vmeta_unm 3319 | beq SFARG1LO, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
2611 |. neg.d f0, f0 3320 |. negu SFARG1LO, SFARG1LO
3321 |1:
2612 | ins_next1 3322 | ins_next1
2613 | sdc1 f0, 0(RA) 3323 | sw SFARG1HI, HI(RA)
3324 | sw SFARG1LO, LO(RA)
2614 | ins_next2 3325 | ins_next2
3326 |2:
3327 | sltiu AT, SFARG1HI, LJ_TISNUM
3328 | beqz AT, ->vmeta_unm
3329 |. lui TMP1, 0x8000
3330 | b <1
3331 |. xor SFARG1HI, SFARG1HI, TMP1
2615 break; 3332 break;
2616 case BC_LEN: 3333 case BC_LEN:
2617 | // RA = dst*8, RD = src*8 3334 | // RA = dst*8, RD = src*8
@@ -2622,12 +3339,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2622 | li AT, LJ_TSTR 3339 | li AT, LJ_TSTR
2623 | bne TMP0, AT, >2 3340 | bne TMP0, AT, >2
2624 |. li AT, LJ_TTAB 3341 |. li AT, LJ_TTAB
2625 | lw CRET1, STR:CARG1->len 3342 | lw CRET1, STR:CARG1->len
2626 |1: 3343 |1:
2627 | mtc1 CRET1, f0
2628 | cvt.d.w f0, f0
2629 | ins_next1 3344 | ins_next1
2630 | sdc1 f0, 0(RA) 3345 | sw TISNUM, HI(RA)
3346 | sw CRET1, LO(RA)
2631 | ins_next2 3347 | ins_next2
2632 |2: 3348 |2:
2633 | bne TMP0, AT, ->vmeta_len 3349 | bne TMP0, AT, ->vmeta_len
@@ -2658,104 +3374,232 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2658 3374
2659 /* -- Binary ops -------------------------------------------------------- */ 3375 /* -- Binary ops -------------------------------------------------------- */
2660 3376
2661 |.macro ins_arithpre 3377 |.macro fpmod, a, b, c
3378 | bal ->vm_floor // floor(b/c)
3379 |. div.d FARG1, b, c
3380 | mul.d a, FRET1, c
3381 | sub.d a, b, a // b - floor(b/c)*c
3382 |.endmacro
3383
3384 |.macro sfpmod
3385 | addiu sp, sp, -16
3386 |
3387 | load_got __divdf3
3388 | sw SFARG1HI, HI(sp)
3389 | sw SFARG1LO, LO(sp)
3390 | sw SFARG2HI, 8+HI(sp)
3391 | call_extern
3392 |. sw SFARG2LO, 8+LO(sp)
3393 |
3394 | load_got floor
3395 | move SFARG1HI, SFRETHI
3396 | call_extern
3397 |. move SFARG1LO, SFRETLO
3398 |
3399 | load_got __muldf3
3400 | move SFARG1HI, SFRETHI
3401 | move SFARG1LO, SFRETLO
3402 | lw SFARG2HI, 8+HI(sp)
3403 | call_extern
3404 |. lw SFARG2LO, 8+LO(sp)
3405 |
3406 | load_got __subdf3
3407 | lw SFARG1HI, HI(sp)
3408 | lw SFARG1LO, LO(sp)
3409 | move SFARG2HI, SFRETHI
3410 | call_extern
3411 |. move SFARG2LO, SFRETLO
3412 |
3413 | addiu sp, sp, 16
3414 |.endmacro
3415
3416 |.macro ins_arithpre, label
2662 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3417 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2663 | decode_RB8a RB, INS
2664 | decode_RB8b RB
2665 | decode_RDtoRC8 RC, RD
2666 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 3418 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
2667 ||switch (vk) { 3419 ||switch (vk) {
2668 ||case 0: 3420 ||case 0:
2669 | addu CARG3, BASE, RB 3421 | decode_RB8a RB, INS
2670 | addu CARG4, KBASE, RC 3422 | decode_RB8b RB
2671 | lw TMP1, HI(CARG3) 3423 | decode_RDtoRC8 RC, RD
2672 | ldc1 f20, 0(CARG3) 3424 | // RA = dst*8, RB = src1*8, RC = num_const*8
2673 | ldc1 f22, 0(CARG4) 3425 | addu RB, BASE, RB
2674 | sltiu AT, TMP1, LJ_TISNUM 3426 |.if "label" ~= "none"
3427 | b label
3428 |.endif
3429 |. addu RC, KBASE, RC
2675 || break; 3430 || break;
2676 ||case 1: 3431 ||case 1:
2677 | addu CARG4, BASE, RB 3432 | decode_RB8a RC, INS
2678 | addu CARG3, KBASE, RC 3433 | decode_RB8b RC
2679 | lw TMP1, HI(CARG4) 3434 | decode_RDtoRC8 RB, RD
2680 | ldc1 f22, 0(CARG4) 3435 | // RA = dst*8, RB = num_const*8, RC = src1*8
2681 | ldc1 f20, 0(CARG3) 3436 | addu RC, BASE, RC
2682 | sltiu AT, TMP1, LJ_TISNUM 3437 |.if "label" ~= "none"
3438 | b label
3439 |.endif
3440 |. addu RB, KBASE, RB
2683 || break; 3441 || break;
2684 ||default: 3442 ||default:
2685 | addu CARG3, BASE, RB 3443 | decode_RB8a RB, INS
2686 | addu CARG4, BASE, RC 3444 | decode_RB8b RB
2687 | lw TMP1, HI(CARG3) 3445 | decode_RDtoRC8 RC, RD
2688 | lw TMP2, HI(CARG4) 3446 | // RA = dst*8, RB = src1*8, RC = src2*8
2689 | ldc1 f20, 0(CARG3) 3447 | addu RB, BASE, RB
2690 | ldc1 f22, 0(CARG4) 3448 |.if "label" ~= "none"
2691 | sltiu AT, TMP1, LJ_TISNUM 3449 | b label
2692 | sltiu TMP0, TMP2, LJ_TISNUM 3450 |.endif
2693 | and AT, AT, TMP0 3451 |. addu RC, BASE, RC
2694 || break; 3452 || break;
2695 ||} 3453 ||}
2696 | beqz AT, ->vmeta_arith
2697 |. addu RA, BASE, RA
2698 |.endmacro 3454 |.endmacro
2699 | 3455 |
2700 |.macro fpmod, a, b, c 3456 |.macro ins_arith, intins, fpins, fpcall, label
2701 |->BC_MODVN_Z: 3457 | ins_arithpre none
2702 | bal ->vm_floor // floor(b/c)
2703 |. div.d FARG1, b, c
2704 | mul.d a, FRET1, c
2705 | sub.d a, b, a // b - floor(b/c)*c
2706 |.endmacro
2707 | 3458 |
2708 |.macro ins_arith, ins 3459 |.if "label" ~= "none"
2709 | ins_arithpre 3460 |label:
2710 |.if "ins" == "fpmod_" 3461 |.endif
2711 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3462 |
2712 |. nop 3463 | lw SFARG1HI, HI(RB)
3464 | lw SFARG2HI, HI(RC)
3465 |
3466 |.if "intins" ~= "div"
3467 |
3468 | // Check for two integers.
3469 | lw SFARG1LO, LO(RB)
3470 | bne SFARG1HI, TISNUM, >5
3471 |. lw SFARG2LO, LO(RC)
3472 | bne SFARG2HI, TISNUM, >5
3473 |
3474 |.if "intins" == "addu"
3475 |. intins CRET1, SFARG1LO, SFARG2LO
3476 | xor TMP1, CRET1, SFARG1LO // ((y^a) & (y^b)) < 0: overflow.
3477 | xor TMP2, CRET1, SFARG2LO
3478 | and TMP1, TMP1, TMP2
3479 | bltz TMP1, ->vmeta_arith
3480 |. addu RA, BASE, RA
3481 |.elif "intins" == "subu"
3482 |. intins CRET1, SFARG1LO, SFARG2LO
3483 | xor TMP1, CRET1, SFARG1LO // ((y^a) & (a^b)) < 0: overflow.
3484 | xor TMP2, SFARG1LO, SFARG2LO
3485 | and TMP1, TMP1, TMP2
3486 | bltz TMP1, ->vmeta_arith
3487 |. addu RA, BASE, RA
3488 |.elif "intins" == "mult"
3489 |. intins SFARG1LO, SFARG2LO
3490 | mflo CRET1
3491 | mfhi TMP2
3492 | sra TMP1, CRET1, 31
3493 | bne TMP1, TMP2, ->vmeta_arith
3494 |. addu RA, BASE, RA
2713 |.else 3495 |.else
2714 | ins f0, f20, f22 3496 |. load_got lj_vm_modi
3497 | beqz SFARG2LO, ->vmeta_arith
3498 |. addu RA, BASE, RA
3499 |.if ENDIAN_BE
3500 | move CARG1, SFARG1LO
3501 |.endif
3502 | call_extern
3503 |. move CARG2, SFARG2LO
3504 |.endif
3505 |
3506 | ins_next1
3507 | sw TISNUM, HI(RA)
3508 | sw CRET1, LO(RA)
3509 |3:
3510 | ins_next2
3511 |
3512 |.elif not FPU
3513 |
3514 | lw SFARG1LO, LO(RB)
3515 | lw SFARG2LO, LO(RC)
3516 |
3517 |.endif
3518 |
3519 |5: // Check for two numbers.
3520 | .FPU ldc1 f20, 0(RB)
3521 | sltiu AT, SFARG1HI, LJ_TISNUM
3522 | sltiu TMP0, SFARG2HI, LJ_TISNUM
3523 | .FPU ldc1 f22, 0(RC)
3524 | and AT, AT, TMP0
3525 | beqz AT, ->vmeta_arith
3526 |. addu RA, BASE, RA
3527 |
3528 |.if FPU
3529 | fpins FRET1, f20, f22
3530 |.elif "fpcall" == "sfpmod"
3531 | sfpmod
3532 |.else
3533 | load_got fpcall
3534 | call_extern
3535 |. nop
3536 |.endif
3537 |
2715 | ins_next1 3538 | ins_next1
2716 | sdc1 f0, 0(RA) 3539 |.if not FPU
3540 | sw SFRETHI, HI(RA)
3541 |.endif
3542 |.if "intins" ~= "div"
3543 | b <3
3544 |.endif
3545 |.if FPU
3546 |. sdc1 FRET1, 0(RA)
3547 |.else
3548 |. sw SFRETLO, LO(RA)
3549 |.endif
3550 |.if "intins" == "div"
2717 | ins_next2 3551 | ins_next2
2718 |.endif 3552 |.endif
3553 |
2719 |.endmacro 3554 |.endmacro
2720 3555
2721 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3556 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2722 | ins_arith add.d 3557 | ins_arith addu, add.d, __adddf3, none
2723 break; 3558 break;
2724 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3559 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2725 | ins_arith sub.d 3560 | ins_arith subu, sub.d, __subdf3, none
2726 break; 3561 break;
2727 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3562 case BC_MULVN: case BC_MULNV: case BC_MULVV:
2728 | ins_arith mul.d 3563 | ins_arith mult, mul.d, __muldf3, none
3564 break;
3565 case BC_DIVVN:
3566 | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z
2729 break; 3567 break;
2730 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3568 case BC_DIVNV: case BC_DIVVV:
2731 | ins_arith div.d 3569 | ins_arithpre ->BC_DIVVN_Z
2732 break; 3570 break;
2733 case BC_MODVN: 3571 case BC_MODVN:
2734 | ins_arith fpmod 3572 | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z
2735 break; 3573 break;
2736 case BC_MODNV: case BC_MODVV: 3574 case BC_MODNV: case BC_MODVV:
2737 | ins_arith fpmod_ 3575 | ins_arithpre ->BC_MODVN_Z
2738 break; 3576 break;
2739 case BC_POW: 3577 case BC_POW:
2740 | decode_RB8a RB, INS 3578 | ins_arithpre none
2741 | decode_RB8b RB 3579 | lw SFARG1HI, HI(RB)
2742 | decode_RDtoRC8 RC, RD 3580 | lw SFARG2HI, HI(RC)
2743 | addu CARG3, BASE, RB 3581 | sltiu AT, SFARG1HI, LJ_TISNUM
2744 | addu CARG4, BASE, RC 3582 | sltiu TMP0, SFARG2HI, LJ_TISNUM
2745 | lw TMP1, HI(CARG3)
2746 | lw TMP2, HI(CARG4)
2747 | ldc1 FARG1, 0(CARG3)
2748 | ldc1 FARG2, 0(CARG4)
2749 | sltiu AT, TMP1, LJ_TISNUM
2750 | sltiu TMP0, TMP2, LJ_TISNUM
2751 | and AT, AT, TMP0 3583 | and AT, AT, TMP0
2752 | load_got pow 3584 | load_got pow
2753 | beqz AT, ->vmeta_arith 3585 | beqz AT, ->vmeta_arith
2754 |. addu RA, BASE, RA 3586 |. addu RA, BASE, RA
3587 |.if FPU
3588 | ldc1 FARG1, 0(RB)
3589 | ldc1 FARG2, 0(RC)
3590 |.else
3591 | lw SFARG1LO, LO(RB)
3592 | lw SFARG2LO, LO(RC)
3593 |.endif
2755 | call_extern 3594 | call_extern
2756 |. nop 3595 |. nop
2757 | ins_next1 3596 | ins_next1
3597 |.if FPU
2758 | sdc1 FRET1, 0(RA) 3598 | sdc1 FRET1, 0(RA)
3599 |.else
3600 | sw SFRETHI, HI(RA)
3601 | sw SFRETLO, LO(RA)
3602 |.endif
2759 | ins_next2 3603 | ins_next2
2760 break; 3604 break;
2761 3605
@@ -2778,10 +3622,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2778 | bnez CRET1, ->vmeta_binop 3622 | bnez CRET1, ->vmeta_binop
2779 |. lw BASE, L->base 3623 |. lw BASE, L->base
2780 | addu RB, BASE, MULTRES 3624 | addu RB, BASE, MULTRES
2781 | ldc1 f0, 0(RB) 3625 | lw SFRETHI, HI(RB)
3626 | lw SFRETLO, LO(RB)
2782 | addu RA, BASE, RA 3627 | addu RA, BASE, RA
2783 | ins_next1 3628 | ins_next1
2784 | sdc1 f0, 0(RA) // Copy result from RB to RA. 3629 | sw SFRETHI, HI(RA)
3630 | sw SFRETLO, LO(RA)
2785 | ins_next2 3631 | ins_next2
2786 break; 3632 break;
2787 3633
@@ -2816,20 +3662,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2816 case BC_KSHORT: 3662 case BC_KSHORT:
2817 | // RA = dst*8, RD = int16_literal*8 3663 | // RA = dst*8, RD = int16_literal*8
2818 | sra RD, INS, 16 3664 | sra RD, INS, 16
2819 | mtc1 RD, f0
2820 | addu RA, BASE, RA 3665 | addu RA, BASE, RA
2821 | cvt.d.w f0, f0
2822 | ins_next1 3666 | ins_next1
2823 | sdc1 f0, 0(RA) 3667 | sw TISNUM, HI(RA)
3668 | sw RD, LO(RA)
2824 | ins_next2 3669 | ins_next2
2825 break; 3670 break;
2826 case BC_KNUM: 3671 case BC_KNUM:
2827 | // RA = dst*8, RD = num_const*8 3672 | // RA = dst*8, RD = num_const*8
2828 | addu RD, KBASE, RD 3673 | addu RD, KBASE, RD
2829 | addu RA, BASE, RA 3674 | addu RA, BASE, RA
2830 | ldc1 f0, 0(RD) 3675 | lw SFRETHI, HI(RD)
3676 | lw SFRETLO, LO(RD)
2831 | ins_next1 3677 | ins_next1
2832 | sdc1 f0, 0(RA) 3678 | sw SFRETHI, HI(RA)
3679 | sw SFRETLO, LO(RA)
2833 | ins_next2 3680 | ins_next2
2834 break; 3681 break;
2835 case BC_KPRI: 3682 case BC_KPRI:
@@ -2865,9 +3712,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2865 | lw UPVAL:RB, LFUNC:RD->uvptr 3712 | lw UPVAL:RB, LFUNC:RD->uvptr
2866 | ins_next1 3713 | ins_next1
2867 | lw TMP1, UPVAL:RB->v 3714 | lw TMP1, UPVAL:RB->v
2868 | ldc1 f0, 0(TMP1) 3715 | lw SFRETHI, HI(TMP1)
3716 | lw SFRETLO, LO(TMP1)
2869 | addu RA, BASE, RA 3717 | addu RA, BASE, RA
2870 | sdc1 f0, 0(RA) 3718 | sw SFRETHI, HI(RA)
3719 | sw SFRETLO, LO(RA)
2871 | ins_next2 3720 | ins_next2
2872 break; 3721 break;
2873 case BC_USETV: 3722 case BC_USETV:
@@ -2876,26 +3725,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2876 | srl RA, RA, 1 3725 | srl RA, RA, 1
2877 | addu RD, BASE, RD 3726 | addu RD, BASE, RD
2878 | addu RA, RA, LFUNC:RB 3727 | addu RA, RA, LFUNC:RB
2879 | ldc1 f0, 0(RD)
2880 | lw UPVAL:RB, LFUNC:RA->uvptr 3728 | lw UPVAL:RB, LFUNC:RA->uvptr
3729 | lw SFRETHI, HI(RD)
3730 | lw SFRETLO, LO(RD)
2881 | lbu TMP3, UPVAL:RB->marked 3731 | lbu TMP3, UPVAL:RB->marked
2882 | lw CARG2, UPVAL:RB->v 3732 | lw CARG2, UPVAL:RB->v
2883 | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 3733 | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
2884 | lbu TMP0, UPVAL:RB->closed 3734 | lbu TMP0, UPVAL:RB->closed
2885 | lw TMP2, HI(RD) 3735 | sw SFRETHI, HI(CARG2)
2886 | sdc1 f0, 0(CARG2) 3736 | sw SFRETLO, LO(CARG2)
2887 | li AT, LJ_GC_BLACK|1 3737 | li AT, LJ_GC_BLACK|1
2888 | or TMP3, TMP3, TMP0 3738 | or TMP3, TMP3, TMP0
2889 | beq TMP3, AT, >2 // Upvalue is closed and black? 3739 | beq TMP3, AT, >2 // Upvalue is closed and black?
2890 |. addiu TMP2, TMP2, -(LJ_TNUMX+1) 3740 |. addiu TMP2, SFRETHI, -(LJ_TNUMX+1)
2891 |1: 3741 |1:
2892 | ins_next 3742 | ins_next
2893 | 3743 |
2894 |2: // Check if new value is collectable. 3744 |2: // Check if new value is collectable.
2895 | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) 3745 | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
2896 | beqz AT, <1 // tvisgcv(v) 3746 | beqz AT, <1 // tvisgcv(v)
2897 |. lw TMP1, LO(RD) 3747 |. nop
2898 | lbu TMP3, GCOBJ:TMP1->gch.marked 3748 | lbu TMP3, GCOBJ:SFRETLO->gch.marked
2899 | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) 3749 | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
2900 | beqz TMP3, <1 3750 | beqz TMP3, <1
2901 |. load_got lj_gc_barrieruv 3751 |. load_got lj_gc_barrieruv
@@ -2943,11 +3793,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2943 | srl RA, RA, 1 3793 | srl RA, RA, 1
2944 | addu RD, KBASE, RD 3794 | addu RD, KBASE, RD
2945 | addu RA, RA, LFUNC:RB 3795 | addu RA, RA, LFUNC:RB
2946 | ldc1 f0, 0(RD) 3796 | lw UPVAL:RB, LFUNC:RA->uvptr
2947 | lw UPVAL:RB, LFUNC:RA->uvptr 3797 | lw SFRETHI, HI(RD)
3798 | lw SFRETLO, LO(RD)
3799 | lw TMP1, UPVAL:RB->v
2948 | ins_next1 3800 | ins_next1
2949 | lw TMP1, UPVAL:RB->v 3801 | sw SFRETHI, HI(TMP1)
2950 | sdc1 f0, 0(TMP1) 3802 | sw SFRETLO, LO(TMP1)
2951 | ins_next2 3803 | ins_next2
2952 break; 3804 break;
2953 case BC_USETP: 3805 case BC_USETP:
@@ -2957,10 +3809,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2957 | srl TMP0, RD, 3 3809 | srl TMP0, RD, 3
2958 | addu RA, RA, LFUNC:RB 3810 | addu RA, RA, LFUNC:RB
2959 | not TMP0, TMP0 3811 | not TMP0, TMP0
2960 | lw UPVAL:RB, LFUNC:RA->uvptr 3812 | lw UPVAL:RB, LFUNC:RA->uvptr
2961 | ins_next1 3813 | ins_next1
2962 | lw TMP1, UPVAL:RB->v 3814 | lw TMP1, UPVAL:RB->v
2963 | sw TMP0, HI(TMP1) 3815 | sw TMP0, HI(TMP1)
2964 | ins_next2 3816 | ins_next2
2965 break; 3817 break;
2966 3818
@@ -2996,8 +3848,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2996 | li TMP0, LJ_TFUNC 3848 | li TMP0, LJ_TFUNC
2997 | ins_next1 3849 | ins_next1
2998 | addu RA, BASE, RA 3850 | addu RA, BASE, RA
2999 | sw TMP0, HI(RA)
3000 | sw LFUNC:CRET1, LO(RA) 3851 | sw LFUNC:CRET1, LO(RA)
3852 | sw TMP0, HI(RA)
3001 | ins_next2 3853 | ins_next2
3002 break; 3854 break;
3003 3855
@@ -3078,31 +3930,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3078 | lw TMP2, HI(CARG3) 3930 | lw TMP2, HI(CARG3)
3079 | lw TAB:RB, LO(CARG2) 3931 | lw TAB:RB, LO(CARG2)
3080 | li AT, LJ_TTAB 3932 | li AT, LJ_TTAB
3081 | ldc1 f0, 0(CARG3)
3082 | bne TMP1, AT, ->vmeta_tgetv 3933 | bne TMP1, AT, ->vmeta_tgetv
3083 |. addu RA, BASE, RA 3934 |. addu RA, BASE, RA
3084 | sltiu AT, TMP2, LJ_TISNUM 3935 | bne TMP2, TISNUM, >5
3085 | beqz AT, >5 3936 |. lw RC, LO(CARG3)
3086 |. li AT, LJ_TSTR 3937 | lw TMP0, TAB:RB->asize
3087 |
3088 | // Convert number key to integer, check for integerness and range.
3089 | cvt.w.d f2, f0
3090 | lw TMP0, TAB:RB->asize
3091 | mfc1 TMP2, f2
3092 | cvt.d.w f4, f2
3093 | lw TMP1, TAB:RB->array 3938 | lw TMP1, TAB:RB->array
3094 | c.eq.d f0, f4 3939 | sltu AT, RC, TMP0
3095 | sltu AT, TMP2, TMP0 3940 | sll TMP2, RC, 3
3096 | movf AT, r0
3097 | sll TMP2, TMP2, 3
3098 | beqz AT, ->vmeta_tgetv // Integer key and in array part? 3941 | beqz AT, ->vmeta_tgetv // Integer key and in array part?
3099 |. addu TMP2, TMP1, TMP2 3942 |. addu TMP2, TMP1, TMP2
3100 | lw TMP0, HI(TMP2) 3943 | lw SFRETHI, HI(TMP2)
3101 | beq TMP0, TISNIL, >2 3944 | beq SFRETHI, TISNIL, >2
3102 |. ldc1 f0, 0(TMP2) 3945 |. lw SFRETLO, LO(TMP2)
3103 |1: 3946 |1:
3104 | ins_next1 3947 | ins_next1
3105 | sdc1 f0, 0(RA) 3948 | sw SFRETHI, HI(RA)
3949 | sw SFRETLO, LO(RA)
3106 | ins_next2 3950 | ins_next2
3107 | 3951 |
3108 |2: // Check for __index if table value is nil. 3952 |2: // Check for __index if table value is nil.
@@ -3117,8 +3961,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3117 |. nop 3961 |. nop
3118 | 3962 |
3119 |5: 3963 |5:
3964 | li AT, LJ_TSTR
3120 | bne TMP2, AT, ->vmeta_tgetv 3965 | bne TMP2, AT, ->vmeta_tgetv
3121 |. lw STR:RC, LO(CARG3) 3966 |. nop
3122 | b ->BC_TGETS_Z // String key? 3967 | b ->BC_TGETS_Z // String key?
3123 |. nop 3968 |. nop
3124 break; 3969 break;
@@ -3150,18 +3995,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3150 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) 3995 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
3151 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 3996 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
3152 | lw NODE:TMP1, NODE:TMP2->next 3997 | lw NODE:TMP1, NODE:TMP2->next
3153 | lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) 3998 | lw SFRETHI, offsetof(Node, val)+HI(NODE:TMP2)
3154 | addiu CARG1, CARG1, -LJ_TSTR 3999 | addiu CARG1, CARG1, -LJ_TSTR
3155 | xor TMP0, TMP0, STR:RC 4000 | xor TMP0, TMP0, STR:RC
3156 | or AT, CARG1, TMP0 4001 | or AT, CARG1, TMP0
3157 | bnez AT, >4 4002 | bnez AT, >4
3158 |. lw TAB:TMP3, TAB:RB->metatable 4003 |. lw TAB:TMP3, TAB:RB->metatable
3159 | beq CARG2, TISNIL, >5 // Key found, but nil value? 4004 | beq SFRETHI, TISNIL, >5 // Key found, but nil value?
3160 |. lw CARG1, offsetof(Node, val)+LO(NODE:TMP2) 4005 |. lw SFRETLO, offsetof(Node, val)+LO(NODE:TMP2)
3161 |3: 4006 |3:
3162 | ins_next1 4007 | ins_next1
3163 | sw CARG2, HI(RA) 4008 | sw SFRETHI, HI(RA)
3164 | sw CARG1, LO(RA) 4009 | sw SFRETLO, LO(RA)
3165 | ins_next2 4010 | ins_next2
3166 | 4011 |
3167 |4: // Follow hash chain. 4012 |4: // Follow hash chain.
@@ -3171,7 +4016,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3171 | 4016 |
3172 |5: // Check for __index if table value is nil. 4017 |5: // Check for __index if table value is nil.
3173 | beqz TAB:TMP3, <3 // No metatable: done. 4018 | beqz TAB:TMP3, <3 // No metatable: done.
3174 |. li CARG2, LJ_TNIL 4019 |. li SFRETHI, LJ_TNIL
3175 | lbu TMP0, TAB:TMP3->nomm 4020 | lbu TMP0, TAB:TMP3->nomm
3176 | andi TMP0, TMP0, 1<<MM_index 4021 | andi TMP0, TMP0, 1<<MM_index
3177 | bnez TMP0, <3 // 'no __index' flag set: done. 4022 | bnez TMP0, <3 // 'no __index' flag set: done.
@@ -3196,12 +4041,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3196 | sltu AT, TMP0, TMP1 4041 | sltu AT, TMP0, TMP1
3197 | beqz AT, ->vmeta_tgetb 4042 | beqz AT, ->vmeta_tgetb
3198 |. addu RC, TMP2, RC 4043 |. addu RC, TMP2, RC
3199 | lw TMP1, HI(RC) 4044 | lw SFRETHI, HI(RC)
3200 | beq TMP1, TISNIL, >5 4045 | beq SFRETHI, TISNIL, >5
3201 |. ldc1 f0, 0(RC) 4046 |. lw SFRETLO, LO(RC)
3202 |1: 4047 |1:
3203 | ins_next1 4048 | ins_next1
3204 | sdc1 f0, 0(RA) 4049 | sw SFRETHI, HI(RA)
4050 | sw SFRETLO, LO(RA)
3205 | ins_next2 4051 | ins_next2
3206 | 4052 |
3207 |5: // Check for __index if table value is nil. 4053 |5: // Check for __index if table value is nil.
@@ -3212,9 +4058,33 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3212 | andi TMP1, TMP1, 1<<MM_index 4058 | andi TMP1, TMP1, 1<<MM_index
3213 | bnez TMP1, <1 // 'no __index' flag set: done. 4059 | bnez TMP1, <1 // 'no __index' flag set: done.
3214 |. nop 4060 |. nop
3215 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4061 | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
3216 |. nop 4062 |. nop
3217 break; 4063 break;
4064 case BC_TGETR:
4065 | // RA = dst*8, RB = table*8, RC = key*8
4066 | decode_RB8a RB, INS
4067 | decode_RB8b RB
4068 | decode_RDtoRC8 RC, RD
4069 | addu RB, BASE, RB
4070 | addu RC, BASE, RC
4071 | lw TAB:CARG1, LO(RB)
4072 | lw CARG2, LO(RC)
4073 | addu RA, BASE, RA
4074 | lw TMP0, TAB:CARG1->asize
4075 | lw TMP1, TAB:CARG1->array
4076 | sltu AT, CARG2, TMP0
4077 | sll TMP2, CARG2, 3
4078 | beqz AT, ->vmeta_tgetr // In array part?
4079 |. addu CRET1, TMP1, TMP2
4080 | lw SFARG2HI, HI(CRET1)
4081 | lw SFARG2LO, LO(CRET1)
4082 |->BC_TGETR_Z:
4083 | ins_next1
4084 | sw SFARG2HI, HI(RA)
4085 | sw SFARG2LO, LO(RA)
4086 | ins_next2
4087 break;
3218 4088
3219 case BC_TSETV: 4089 case BC_TSETV:
3220 | // RA = src*8, RB = table*8, RC = key*8 4090 | // RA = src*8, RB = table*8, RC = key*8
@@ -3227,33 +4097,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3227 | lw TMP2, HI(CARG3) 4097 | lw TMP2, HI(CARG3)
3228 | lw TAB:RB, LO(CARG2) 4098 | lw TAB:RB, LO(CARG2)
3229 | li AT, LJ_TTAB 4099 | li AT, LJ_TTAB
3230 | ldc1 f0, 0(CARG3)
3231 | bne TMP1, AT, ->vmeta_tsetv 4100 | bne TMP1, AT, ->vmeta_tsetv
3232 |. addu RA, BASE, RA 4101 |. addu RA, BASE, RA
3233 | sltiu AT, TMP2, LJ_TISNUM 4102 | bne TMP2, TISNUM, >5
3234 | beqz AT, >5 4103 |. lw RC, LO(CARG3)
3235 |. li AT, LJ_TSTR 4104 | lw TMP0, TAB:RB->asize
3236 |
3237 | // Convert number key to integer, check for integerness and range.
3238 | cvt.w.d f2, f0
3239 | lw TMP0, TAB:RB->asize
3240 | mfc1 TMP2, f2
3241 | cvt.d.w f4, f2
3242 | lw TMP1, TAB:RB->array 4105 | lw TMP1, TAB:RB->array
3243 | c.eq.d f0, f4 4106 | sltu AT, RC, TMP0
3244 | sltu AT, TMP2, TMP0 4107 | sll TMP2, RC, 3
3245 | movf AT, r0
3246 | sll TMP2, TMP2, 3
3247 | beqz AT, ->vmeta_tsetv // Integer key and in array part? 4108 | beqz AT, ->vmeta_tsetv // Integer key and in array part?
3248 |. addu TMP1, TMP1, TMP2 4109 |. addu TMP1, TMP1, TMP2
3249 | lbu TMP3, TAB:RB->marked
3250 | lw TMP0, HI(TMP1) 4110 | lw TMP0, HI(TMP1)
4111 | lbu TMP3, TAB:RB->marked
4112 | lw SFRETHI, HI(RA)
3251 | beq TMP0, TISNIL, >3 4113 | beq TMP0, TISNIL, >3
3252 |. ldc1 f0, 0(RA) 4114 |. lw SFRETLO, LO(RA)
3253 |1: 4115 |1:
3254 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4116 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
3255 | bnez AT, >7 4117 | sw SFRETHI, HI(TMP1)
3256 |. sdc1 f0, 0(TMP1) 4118 | bnez AT, >7
4119 |. sw SFRETLO, LO(TMP1)
3257 |2: 4120 |2:
3258 | ins_next 4121 | ins_next
3259 | 4122 |
@@ -3269,8 +4132,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3269 |. nop 4132 |. nop
3270 | 4133 |
3271 |5: 4134 |5:
4135 | li AT, LJ_TSTR
3272 | bne TMP2, AT, ->vmeta_tsetv 4136 | bne TMP2, AT, ->vmeta_tsetv
3273 |. lw STR:RC, LO(CARG3) 4137 |. nop
3274 | b ->BC_TSETS_Z // String key? 4138 | b ->BC_TSETS_Z // String key?
3275 |. nop 4139 |. nop
3276 | 4140 |
@@ -3302,7 +4166,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3302 | sll TMP1, TMP1, 3 4166 | sll TMP1, TMP1, 3
3303 | subu TMP1, TMP0, TMP1 4167 | subu TMP1, TMP0, TMP1
3304 | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 4168 | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
4169 |.if FPU
3305 | ldc1 f20, 0(RA) 4170 | ldc1 f20, 0(RA)
4171 |.else
4172 | lw SFRETHI, HI(RA)
4173 | lw SFRETLO, LO(RA)
4174 |.endif
3306 |1: 4175 |1:
3307 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) 4176 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
3308 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 4177 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
@@ -3316,8 +4185,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3316 |. lw TAB:TMP0, TAB:RB->metatable 4185 |. lw TAB:TMP0, TAB:RB->metatable
3317 |2: 4186 |2:
3318 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4187 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4188 |.if FPU
3319 | bnez AT, >7 4189 | bnez AT, >7
3320 |. sdc1 f20, NODE:TMP2->val 4190 |. sdc1 f20, NODE:TMP2->val
4191 |.else
4192 | sw SFRETHI, NODE:TMP2->val.u32.hi
4193 | bnez AT, >7
4194 |. sw SFRETLO, NODE:TMP2->val.u32.lo
4195 |.endif
3321 |3: 4196 |3:
3322 | ins_next 4197 | ins_next
3323 | 4198 |
@@ -3355,8 +4230,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3355 |. move CARG1, L 4230 |. move CARG1, L
3356 | // Returns TValue *. 4231 | // Returns TValue *.
3357 | lw BASE, L->base 4232 | lw BASE, L->base
4233 |.if FPU
3358 | b <3 // No 2nd write barrier needed. 4234 | b <3 // No 2nd write barrier needed.
3359 |. sdc1 f20, 0(CRET1) 4235 |. sdc1 f20, 0(CRET1)
4236 |.else
4237 | lw SFARG1HI, HI(RA)
4238 | lw SFARG1LO, LO(RA)
4239 | sw SFARG1HI, HI(CRET1)
4240 | b <3 // No 2nd write barrier needed.
4241 |. sw SFARG1LO, LO(CRET1)
4242 |.endif
3360 | 4243 |
3361 |7: // Possible table write barrier for the value. Skip valiswhite check. 4244 |7: // Possible table write barrier for the value. Skip valiswhite check.
3362 | barrierback TAB:RB, TMP3, TMP0, <3 4245 | barrierback TAB:RB, TMP3, TMP0, <3
@@ -3381,11 +4264,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3381 | lw TMP1, HI(RC) 4264 | lw TMP1, HI(RC)
3382 | lbu TMP3, TAB:RB->marked 4265 | lbu TMP3, TAB:RB->marked
3383 | beq TMP1, TISNIL, >5 4266 | beq TMP1, TISNIL, >5
3384 |. ldc1 f0, 0(RA)
3385 |1: 4267 |1:
4268 |. lw SFRETHI, HI(RA)
4269 | lw SFRETLO, LO(RA)
3386 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4270 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4271 | sw SFRETHI, HI(RC)
3387 | bnez AT, >7 4272 | bnez AT, >7
3388 |. sdc1 f0, 0(RC) 4273 |. sw SFRETLO, LO(RC)
3389 |2: 4274 |2:
3390 | ins_next 4275 | ins_next
3391 | 4276 |
@@ -3397,12 +4282,43 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3397 | andi TMP1, TMP1, 1<<MM_newindex 4282 | andi TMP1, TMP1, 1<<MM_newindex
3398 | bnez TMP1, <1 // 'no __newindex' flag set: done. 4283 | bnez TMP1, <1 // 'no __newindex' flag set: done.
3399 |. nop 4284 |. nop
3400 | b ->vmeta_tsetb // Caveat: preserve TMP0! 4285 | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2!
3401 |. nop 4286 |. nop
3402 | 4287 |
3403 |7: // Possible table write barrier for the value. Skip valiswhite check. 4288 |7: // Possible table write barrier for the value. Skip valiswhite check.
3404 | barrierback TAB:RB, TMP3, TMP0, <2 4289 | barrierback TAB:RB, TMP3, TMP0, <2
3405 break; 4290 break;
4291 case BC_TSETR:
4292 | // RA = dst*8, RB = table*8, RC = key*8
4293 | decode_RB8a RB, INS
4294 | decode_RB8b RB
4295 | decode_RDtoRC8 RC, RD
4296 | addu CARG1, BASE, RB
4297 | addu CARG3, BASE, RC
4298 | lw TAB:CARG2, LO(CARG1)
4299 | lw CARG3, LO(CARG3)
4300 | lbu TMP3, TAB:CARG2->marked
4301 | lw TMP0, TAB:CARG2->asize
4302 | lw TMP1, TAB:CARG2->array
4303 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4304 | bnez AT, >7
4305 |. addu RA, BASE, RA
4306 |2:
4307 | sltu AT, CARG3, TMP0
4308 | sll TMP2, CARG3, 3
4309 | beqz AT, ->vmeta_tsetr // In array part?
4310 |. addu CRET1, TMP1, TMP2
4311 |->BC_TSETR_Z:
4312 | lw SFARG1HI, HI(RA)
4313 | lw SFARG1LO, LO(RA)
4314 | ins_next1
4315 | sw SFARG1HI, HI(CRET1)
4316 | sw SFARG1LO, LO(CRET1)
4317 | ins_next2
4318 |
4319 |7: // Possible table write barrier for the value. Skip valiswhite check.
4320 | barrierback TAB:CARG2, TMP3, CRET1, <2
4321 break;
3406 4322
3407 case BC_TSETM: 4323 case BC_TSETM:
3408 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4324 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -3425,10 +4341,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3425 | addu TMP1, TMP1, CARG1 4341 | addu TMP1, TMP1, CARG1
3426 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4342 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
3427 |3: // Copy result slots to table. 4343 |3: // Copy result slots to table.
3428 | ldc1 f0, 0(RA) 4344 | lw SFRETHI, HI(RA)
4345 | lw SFRETLO, LO(RA)
3429 | addiu RA, RA, 8 4346 | addiu RA, RA, 8
3430 | sltu AT, RA, TMP2 4347 | sltu AT, RA, TMP2
3431 | sdc1 f0, 0(TMP1) 4348 | sw SFRETHI, HI(TMP1)
4349 | sw SFRETLO, LO(TMP1)
3432 | bnez AT, <3 4350 | bnez AT, <3
3433 |. addiu TMP1, TMP1, 8 4351 |. addiu TMP1, TMP1, 8
3434 | bnez TMP0, >7 4352 | bnez TMP0, >7
@@ -3503,10 +4421,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3503 | beqz NARGS8:RC, >3 4421 | beqz NARGS8:RC, >3
3504 |. move TMP3, NARGS8:RC 4422 |. move TMP3, NARGS8:RC
3505 |2: 4423 |2:
3506 | ldc1 f0, 0(RA) 4424 | lw SFRETHI, HI(RA)
4425 | lw SFRETLO, LO(RA)
3507 | addiu RA, RA, 8 4426 | addiu RA, RA, 8
3508 | addiu TMP3, TMP3, -8 4427 | addiu TMP3, TMP3, -8
3509 | sdc1 f0, 0(TMP2) 4428 | sw SFRETHI, HI(TMP2)
4429 | sw SFRETLO, LO(TMP2)
3510 | bnez TMP3, <2 4430 | bnez TMP3, <2
3511 |. addiu TMP2, TMP2, 8 4431 |. addiu TMP2, TMP2, 8
3512 |3: 4432 |3:
@@ -3543,12 +4463,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3543 | li AT, LJ_TFUNC 4463 | li AT, LJ_TFUNC
3544 | lw TMP1, -24+HI(BASE) 4464 | lw TMP1, -24+HI(BASE)
3545 | lw LFUNC:RB, -24+LO(BASE) 4465 | lw LFUNC:RB, -24+LO(BASE)
3546 | ldc1 f2, -8(BASE) 4466 | lw SFARG1HI, -16+HI(BASE)
3547 | ldc1 f0, -16(BASE) 4467 | lw SFARG1LO, -16+LO(BASE)
4468 | lw SFARG2HI, -8+HI(BASE)
4469 | lw SFARG2LO, -8+LO(BASE)
3548 | sw TMP1, HI(BASE) // Copy callable. 4470 | sw TMP1, HI(BASE) // Copy callable.
3549 | sw LFUNC:RB, LO(BASE) 4471 | sw LFUNC:RB, LO(BASE)
3550 | sdc1 f2, 16(BASE) // Copy control var. 4472 | sw SFARG1HI, 8+HI(BASE) // Copy state.
3551 | sdc1 f0, 8(BASE) // Copy state. 4473 | sw SFARG1LO, 8+LO(BASE)
4474 | sw SFARG2HI, 16+HI(BASE) // Copy control var.
4475 | sw SFARG2LO, 16+LO(BASE)
3552 | addiu BASE, BASE, 8 4476 | addiu BASE, BASE, 8
3553 | bne TMP1, AT, ->vmeta_call 4477 | bne TMP1, AT, ->vmeta_call
3554 |. li NARGS8:RC, 16 // Iterators get 2 arguments. 4478 |. li NARGS8:RC, 16 // Iterators get 2 arguments.
@@ -3571,20 +4495,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3571 | beqz AT, >5 // Index points after array part? 4495 | beqz AT, >5 // Index points after array part?
3572 |. sll TMP3, RC, 3 4496 |. sll TMP3, RC, 3
3573 | addu TMP3, TMP1, TMP3 4497 | addu TMP3, TMP1, TMP3
3574 | lw TMP2, HI(TMP3) 4498 | lw SFARG1HI, HI(TMP3)
3575 | ldc1 f0, 0(TMP3) 4499 | lw SFARG1LO, LO(TMP3)
3576 | mtc1 RC, f2
3577 | lhu RD, -4+OFS_RD(PC) 4500 | lhu RD, -4+OFS_RD(PC)
3578 | beq TMP2, TISNIL, <1 // Skip holes in array part. 4501 | sw TISNUM, HI(RA)
4502 | sw RC, LO(RA)
4503 | beq SFARG1HI, TISNIL, <1 // Skip holes in array part.
3579 |. addiu RC, RC, 1 4504 |. addiu RC, RC, 1
3580 | cvt.d.w f2, f2 4505 | sw SFARG1HI, 8+HI(RA)
4506 | sw SFARG1LO, 8+LO(RA)
3581 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 4507 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3582 | sdc1 f0, 8(RA)
3583 | decode_RD4b RD 4508 | decode_RD4b RD
3584 | addu RD, RD, TMP3 4509 | addu RD, RD, TMP3
3585 | sw RC, -8+LO(RA) // Update control var. 4510 | sw RC, -8+LO(RA) // Update control var.
3586 | addu PC, PC, RD 4511 | addu PC, PC, RD
3587 | sdc1 f2, 0(RA)
3588 |3: 4512 |3:
3589 | ins_next 4513 | ins_next
3590 | 4514 |
@@ -3599,18 +4523,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3599 | sll RB, RC, 3 4523 | sll RB, RC, 3
3600 | subu TMP3, TMP3, RB 4524 | subu TMP3, TMP3, RB
3601 | addu NODE:TMP3, TMP3, TMP2 4525 | addu NODE:TMP3, TMP3, TMP2
3602 | lw RB, HI(NODE:TMP3) 4526 | lw SFARG1HI, NODE:TMP3->val.u32.hi
3603 | ldc1 f0, 0(NODE:TMP3) 4527 | lw SFARG1LO, NODE:TMP3->val.u32.lo
3604 | lhu RD, -4+OFS_RD(PC) 4528 | lhu RD, -4+OFS_RD(PC)
3605 | beq RB, TISNIL, <6 // Skip holes in hash part. 4529 | beq SFARG1HI, TISNIL, <6 // Skip holes in hash part.
3606 |. addiu RC, RC, 1 4530 |. addiu RC, RC, 1
3607 | ldc1 f2, NODE:TMP3->key 4531 | lw SFARG2HI, NODE:TMP3->key.u32.hi
4532 | lw SFARG2LO, NODE:TMP3->key.u32.lo
3608 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 4533 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3609 | sdc1 f0, 8(RA) 4534 | sw SFARG1HI, 8+HI(RA)
4535 | sw SFARG1LO, 8+LO(RA)
3610 | addu RC, RC, TMP0 4536 | addu RC, RC, TMP0
3611 | decode_RD4b RD 4537 | decode_RD4b RD
3612 | addu RD, RD, TMP3 4538 | addu RD, RD, TMP3
3613 | sdc1 f2, 0(RA) 4539 | sw SFARG2HI, HI(RA)
4540 | sw SFARG2LO, LO(RA)
3614 | addu PC, PC, RD 4541 | addu PC, PC, RD
3615 | b <3 4542 | b <3
3616 |. sw RC, -8+LO(RA) // Update control var. 4543 |. sw RC, -8+LO(RA) // Update control var.
@@ -3690,9 +4617,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3690 | bnez AT, >7 4617 | bnez AT, >7
3691 |. addiu MULTRES, TMP1, 8 4618 |. addiu MULTRES, TMP1, 8
3692 |6: 4619 |6:
3693 | ldc1 f0, 0(RC) 4620 | lw SFRETHI, HI(RC)
4621 | lw SFRETLO, LO(RC)
3694 | addiu RC, RC, 8 4622 | addiu RC, RC, 8
3695 | sdc1 f0, 0(RA) 4623 | sw SFRETHI, HI(RA)
4624 | sw SFRETLO, LO(RA)
3696 | sltu AT, RC, TMP3 4625 | sltu AT, RC, TMP3
3697 | bnez AT, <6 // More vararg slots? 4626 | bnez AT, <6 // More vararg slots?
3698 |. addiu RA, RA, 8 4627 |. addiu RA, RA, 8
@@ -3748,10 +4677,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3748 | beqz RC, >3 4677 | beqz RC, >3
3749 |. subu BASE, TMP2, TMP0 4678 |. subu BASE, TMP2, TMP0
3750 |2: 4679 |2:
3751 | ldc1 f0, 0(RA) 4680 | lw SFRETHI, HI(RA)
4681 | lw SFRETLO, LO(RA)
3752 | addiu RA, RA, 8 4682 | addiu RA, RA, 8
3753 | addiu RC, RC, -8 4683 | addiu RC, RC, -8
3754 | sdc1 f0, 0(TMP2) 4684 | sw SFRETHI, HI(TMP2)
4685 | sw SFRETLO, LO(TMP2)
3755 | bnez RC, <2 4686 | bnez RC, <2
3756 |. addiu TMP2, TMP2, 8 4687 |. addiu TMP2, TMP2, 8
3757 |3: 4688 |3:
@@ -3792,14 +4723,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3792 | lw INS, -4(PC) 4723 | lw INS, -4(PC)
3793 | addiu TMP2, BASE, -8 4724 | addiu TMP2, BASE, -8
3794 if (op == BC_RET1) { 4725 if (op == BC_RET1) {
3795 | ldc1 f0, 0(RA) 4726 | lw SFRETHI, HI(RA)
4727 | lw SFRETLO, LO(RA)
3796 } 4728 }
3797 | decode_RB8a RB, INS 4729 | decode_RB8a RB, INS
3798 | decode_RA8a RA, INS 4730 | decode_RA8a RA, INS
3799 | decode_RB8b RB 4731 | decode_RB8b RB
3800 | decode_RA8b RA 4732 | decode_RA8b RA
3801 if (op == BC_RET1) { 4733 if (op == BC_RET1) {
3802 | sdc1 f0, 0(TMP2) 4734 | sw SFRETHI, HI(TMP2)
4735 | sw SFRETLO, LO(TMP2)
3803 } 4736 }
3804 | subu BASE, TMP2, RA 4737 | subu BASE, TMP2, RA
3805 |5: 4738 |5:
@@ -3841,69 +4774,147 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3841 | // RA = base*8, RD = target (after end of loop or start of loop) 4774 | // RA = base*8, RD = target (after end of loop or start of loop)
3842 vk = (op == BC_IFORL || op == BC_JFORL); 4775 vk = (op == BC_IFORL || op == BC_JFORL);
3843 | addu RA, BASE, RA 4776 | addu RA, BASE, RA
3844 if (vk) { 4777 | lw SFARG1HI, FORL_IDX*8+HI(RA)
3845 | ldc1 f0, FORL_IDX*8(RA) 4778 | lw SFARG1LO, FORL_IDX*8+LO(RA)
3846 | ldc1 f4, FORL_STEP*8(RA)
3847 | ldc1 f2, FORL_STOP*8(RA)
3848 | lw TMP3, FORL_STEP*8+HI(RA)
3849 | add.d f0, f0, f4
3850 | sdc1 f0, FORL_IDX*8(RA)
3851 } else {
3852 | lw TMP1, FORL_IDX*8+HI(RA)
3853 | lw TMP3, FORL_STEP*8+HI(RA)
3854 | lw TMP2, FORL_STOP*8+HI(RA)
3855 | sltiu TMP1, TMP1, LJ_TISNUM
3856 | sltiu TMP0, TMP3, LJ_TISNUM
3857 | sltiu TMP2, TMP2, LJ_TISNUM
3858 | and TMP1, TMP1, TMP0
3859 | and TMP1, TMP1, TMP2
3860 | ldc1 f0, FORL_IDX*8(RA)
3861 | beqz TMP1, ->vmeta_for
3862 |. ldc1 f2, FORL_STOP*8(RA)
3863 }
3864 if (op != BC_JFORL) { 4779 if (op != BC_JFORL) {
3865 | srl RD, RD, 1 4780 | srl RD, RD, 1
3866 | lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535) 4781 | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
4782 | addu TMP2, RD, TMP2
4783 }
4784 if (!vk) {
4785 | lw SFARG2HI, FORL_STOP*8+HI(RA)
4786 | lw SFARG2LO, FORL_STOP*8+LO(RA)
4787 | bne SFARG1HI, TISNUM, >5
4788 |. lw SFRETHI, FORL_STEP*8+HI(RA)
4789 | xor AT, SFARG2HI, TISNUM
4790 | lw SFRETLO, FORL_STEP*8+LO(RA)
4791 | xor TMP0, SFRETHI, TISNUM
4792 | or AT, AT, TMP0
4793 | bnez AT, ->vmeta_for
4794 |. slt AT, SFRETLO, r0
4795 | slt CRET1, SFARG2LO, SFARG1LO
4796 | slt TMP1, SFARG1LO, SFARG2LO
4797 | movn CRET1, TMP1, AT
4798 } else {
4799 | bne SFARG1HI, TISNUM, >5
4800 |. lw SFARG2LO, FORL_STEP*8+LO(RA)
4801 | lw SFRETLO, FORL_STOP*8+LO(RA)
4802 | move TMP3, SFARG1LO
4803 | addu SFARG1LO, SFARG1LO, SFARG2LO
4804 | xor TMP0, SFARG1LO, TMP3
4805 | xor TMP1, SFARG1LO, SFARG2LO
4806 | and TMP0, TMP0, TMP1
4807 | slt TMP1, SFARG1LO, SFRETLO
4808 | slt CRET1, SFRETLO, SFARG1LO
4809 | slt AT, SFARG2LO, r0
4810 | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow.
4811 | movn CRET1, TMP1, AT
4812 | or CRET1, CRET1, TMP0
4813 }
4814 |1:
4815 if (op == BC_FORI) {
4816 | movz TMP2, r0, CRET1
4817 | addu PC, PC, TMP2
4818 } else if (op == BC_JFORI) {
4819 | addu PC, PC, TMP2
4820 | lhu RD, -4+OFS_RD(PC)
4821 } else if (op == BC_IFORL) {
4822 | movn TMP2, r0, CRET1
4823 | addu PC, PC, TMP2
3867 } 4824 }
3868 | c.le.d 0, f0, f2 4825 if (vk) {
3869 | c.le.d 1, f2, f0 4826 | sw SFARG1HI, FORL_IDX*8+HI(RA)
3870 | sdc1 f0, FORL_EXT*8(RA) 4827 | sw SFARG1LO, FORL_IDX*8+LO(RA)
4828 }
4829 | ins_next1
4830 | sw SFARG1HI, FORL_EXT*8+HI(RA)
4831 | sw SFARG1LO, FORL_EXT*8+LO(RA)
4832 |2:
3871 if (op == BC_JFORI) { 4833 if (op == BC_JFORI) {
3872 | li TMP1, 1 4834 | beqz CRET1, =>BC_JLOOP
3873 | li TMP2, 1
3874 | addu TMP0, RD, TMP0
3875 | slt TMP3, TMP3, r0
3876 | movf TMP1, r0, 0
3877 | addu PC, PC, TMP0
3878 | movf TMP2, r0, 1
3879 | lhu RD, -4+OFS_RD(PC)
3880 | movn TMP1, TMP2, TMP3
3881 | bnez TMP1, =>BC_JLOOP
3882 |. decode_RD8b RD 4835 |. decode_RD8b RD
3883 } else if (op == BC_JFORL) { 4836 } else if (op == BC_JFORL) {
3884 | li TMP1, 1 4837 | beqz CRET1, =>BC_JLOOP
3885 | li TMP2, 1 4838 }
3886 | slt TMP3, TMP3, r0 4839 | ins_next2
3887 | movf TMP1, r0, 0 4840 |
3888 | movf TMP2, r0, 1 4841 |5: // FP loop.
3889 | movn TMP1, TMP2, TMP3 4842 |.if FPU
3890 | bnez TMP1, =>BC_JLOOP 4843 if (!vk) {
4844 | ldc1 f0, FORL_IDX*8(RA)
4845 | ldc1 f2, FORL_STOP*8(RA)
4846 | sltiu TMP0, SFARG1HI, LJ_TISNUM
4847 | sltiu TMP1, SFARG2HI, LJ_TISNUM
4848 | sltiu AT, SFRETHI, LJ_TISNUM
4849 | and TMP0, TMP0, TMP1
4850 | and AT, AT, TMP0
4851 | beqz AT, ->vmeta_for
4852 |. slt TMP3, SFRETHI, r0
4853 | c.ole.d 0, f0, f2
4854 | c.ole.d 1, f2, f0
4855 | li CRET1, 1
4856 | movt CRET1, r0, 0
4857 | movt AT, r0, 1
4858 | b <1
4859 |. movn CRET1, AT, TMP3
4860 } else {
4861 | ldc1 f0, FORL_IDX*8(RA)
4862 | ldc1 f4, FORL_STEP*8(RA)
4863 | ldc1 f2, FORL_STOP*8(RA)
4864 | lw SFARG2HI, FORL_STEP*8+HI(RA)
4865 | add.d f0, f0, f4
4866 | c.ole.d 0, f0, f2
4867 | c.ole.d 1, f2, f0
4868 | slt TMP3, SFARG2HI, r0
4869 | li CRET1, 1
4870 | li AT, 1
4871 | movt CRET1, r0, 0
4872 | movt AT, r0, 1
4873 | movn CRET1, AT, TMP3
4874 if (op == BC_IFORL) {
4875 | movn TMP2, r0, CRET1
4876 | addu PC, PC, TMP2
4877 }
4878 | sdc1 f0, FORL_IDX*8(RA)
4879 | ins_next1
4880 | b <2
4881 |. sdc1 f0, FORL_EXT*8(RA)
4882 }
4883 |.else
4884 if (!vk) {
4885 | sltiu TMP0, SFARG1HI, LJ_TISNUM
4886 | sltiu TMP1, SFARG2HI, LJ_TISNUM
4887 | sltiu AT, SFRETHI, LJ_TISNUM
4888 | and TMP0, TMP0, TMP1
4889 | and AT, AT, TMP0
4890 | beqz AT, ->vmeta_for
4891 |. nop
4892 | bal ->vm_sfcmpolex
4893 |. move TMP3, SFRETHI
4894 | b <1
3891 |. nop 4895 |. nop
3892 } else { 4896 } else {
3893 | addu TMP1, RD, TMP0 4897 | lw SFARG2HI, FORL_STEP*8+HI(RA)
3894 | slt TMP3, TMP3, r0 4898 | load_got __adddf3
3895 | move TMP2, TMP1 4899 | call_extern
3896 if (op == BC_FORI) { 4900 |. sw TMP2, ARG5
3897 | movt TMP1, r0, 0 4901 | lw SFARG2HI, FORL_STOP*8+HI(RA)
3898 | movt TMP2, r0, 1 4902 | lw SFARG2LO, FORL_STOP*8+LO(RA)
4903 | move SFARG1HI, SFRETHI
4904 | move SFARG1LO, SFRETLO
4905 | bal ->vm_sfcmpolex
4906 |. lw TMP3, FORL_STEP*8+HI(RA)
4907 if ( op == BC_JFORL ) {
4908 | lhu RD, -4+OFS_RD(PC)
4909 | lw TMP2, ARG5
4910 | b <1
4911 |. decode_RD8b RD
3899 } else { 4912 } else {
3900 | movf TMP1, r0, 0 4913 | b <1
3901 | movf TMP2, r0, 1 4914 |. lw TMP2, ARG5
3902 } 4915 }
3903 | movn TMP1, TMP2, TMP3
3904 | addu PC, PC, TMP1
3905 } 4916 }
3906 | ins_next 4917 |.endif
3907 break; 4918 break;
3908 4919
3909 case BC_ITERL: 4920 case BC_ITERL:
@@ -3962,8 +4973,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3962 | sw AT, DISPATCH_GL(vmstate)(DISPATCH) 4973 | sw AT, DISPATCH_GL(vmstate)(DISPATCH)
3963 | lw TRACE:TMP2, 0(TMP1) 4974 | lw TRACE:TMP2, 0(TMP1)
3964 | sw BASE, DISPATCH_GL(jit_base)(DISPATCH) 4975 | sw BASE, DISPATCH_GL(jit_base)(DISPATCH)
3965 | sw L, DISPATCH_GL(jit_L)(DISPATCH)
3966 | lw TMP2, TRACE:TMP2->mcode 4976 | lw TMP2, TRACE:TMP2->mcode
4977 | sw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
3967 | jr TMP2 4978 | jr TMP2
3968 |. addiu JGL, DISPATCH, GG_DISP2G+32768 4979 |. addiu JGL, DISPATCH, GG_DISP2G+32768
3969 |.endif 4980 |.endif
@@ -4089,6 +5100,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4089 | li_vmstate INTERP 5100 | li_vmstate INTERP
4090 | lw PC, FRAME_PC(BASE) // Fetch PC of caller. 5101 | lw PC, FRAME_PC(BASE) // Fetch PC of caller.
4091 | subu RA, TMP1, RD // RA = L->top - nresults*8 5102 | subu RA, TMP1, RD // RA = L->top - nresults*8
5103 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
4092 | b ->vm_returnc 5104 | b ->vm_returnc
4093 |. st_vmstate 5105 |. st_vmstate
4094 break; 5106 break;
@@ -4151,8 +5163,10 @@ static void emit_asm_debug(BuildCtx *ctx)
4151 fcofs, CFRAME_SIZE); 5163 fcofs, CFRAME_SIZE);
4152 for (i = 23; i >= 16; i--) 5164 for (i = 23; i >= 16; i--)
4153 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); 5165 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
5166#if !LJ_SOFTFP
4154 for (i = 30; i >= 20; i -= 2) 5167 for (i = 30; i >= 20; i -= 2)
4155 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); 5168 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
5169#endif
4156 fprintf(ctx->fp, 5170 fprintf(ctx->fp,
4157 "\t.align 2\n" 5171 "\t.align 2\n"
4158 ".LEFDE0:\n\n"); 5172 ".LEFDE0:\n\n");
@@ -4204,8 +5218,10 @@ static void emit_asm_debug(BuildCtx *ctx)
4204 fcofs, CFRAME_SIZE); 5218 fcofs, CFRAME_SIZE);
4205 for (i = 23; i >= 16; i--) 5219 for (i = 23; i >= 16; i--)
4206 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); 5220 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
5221#if !LJ_SOFTFP
4207 for (i = 30; i >= 20; i -= 2) 5222 for (i = 30; i >= 20; i -= 2)
4208 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); 5223 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
5224#endif
4209 fprintf(ctx->fp, 5225 fprintf(ctx->fp,
4210 "\t.align 2\n" 5226 "\t.align 2\n"
4211 ".LEFDE2:\n\n"); 5227 ".LEFDE2:\n\n");