aboutsummaryrefslogtreecommitdiff
path: root/src/vm_mips.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm_mips.dasc')
-rw-r--r--src/vm_mips.dasc2673
1 files changed, 1909 insertions, 764 deletions
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 866b8e3d..34645bf1 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -1,6 +1,9 @@
1|// Low-level VM code for MIPS CPUs. 1|// Low-level VM code for MIPS CPUs.
2|// Bytecode interpreter, fast functions and helper functions. 2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h 3|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
4|//
5|// MIPS soft-float support contributed by Djordje Kovacevic and
6|// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc.
4| 7|
5|.arch mips 8|.arch mips
6|.section code_op, code_sub 9|.section code_op, code_sub
@@ -18,6 +21,12 @@
18|// Fixed register assignments for the interpreter. 21|// Fixed register assignments for the interpreter.
19|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra 22|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra
20| 23|
24|.macro .FPU, a, b
25|.if FPU
26| a, b
27|.endif
28|.endmacro
29|
21|// The following must be C callee-save (but BASE is often refetched). 30|// The following must be C callee-save (but BASE is often refetched).
22|.define BASE, r16 // Base of current Lua stack frame. 31|.define BASE, r16 // Base of current Lua stack frame.
23|.define KBASE, r17 // Constants of current Lua function. 32|.define KBASE, r17 // Constants of current Lua function.
@@ -25,13 +34,15 @@
25|.define DISPATCH, r19 // Opcode dispatch table. 34|.define DISPATCH, r19 // Opcode dispatch table.
26|.define LREG, r20 // Register holding lua_State (also in SAVE_L). 35|.define LREG, r20 // Register holding lua_State (also in SAVE_L).
27|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. 36|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8.
28|// NYI: r22 currently unused.
29| 37|
30|.define JGL, r30 // On-trace: global_State + 32768. 38|.define JGL, r30 // On-trace: global_State + 32768.
31| 39|
32|// Constants for type-comparisons, stores and conversions. C callee-save. 40|// Constants for type-comparisons, stores and conversions. C callee-save.
41|.define TISNUM, r22
33|.define TISNIL, r30 42|.define TISNIL, r30
43|.if FPU
34|.define TOBIT, f30 // 2^52 + 2^51. 44|.define TOBIT, f30 // 2^52 + 2^51.
45|.endif
35| 46|
36|// The following temporaries are not saved across C calls, except for RA. 47|// The following temporaries are not saved across C calls, except for RA.
37|.define RA, r23 // Callee-save. 48|.define RA, r23 // Callee-save.
@@ -46,7 +57,7 @@
46|.define TMP2, r14 57|.define TMP2, r14
47|.define TMP3, r15 58|.define TMP3, r15
48| 59|
49|// Calling conventions. 60|// MIPS o32 calling convention.
50|.define CFUNCADDR, r25 61|.define CFUNCADDR, r25
51|.define CARG1, r4 62|.define CARG1, r4
52|.define CARG2, r5 63|.define CARG2, r5
@@ -56,13 +67,33 @@
56|.define CRET1, r2 67|.define CRET1, r2
57|.define CRET2, r3 68|.define CRET2, r3
58| 69|
70|.if ENDIAN_LE
71|.define SFRETLO, CRET1
72|.define SFRETHI, CRET2
73|.define SFARG1LO, CARG1
74|.define SFARG1HI, CARG2
75|.define SFARG2LO, CARG3
76|.define SFARG2HI, CARG4
77|.else
78|.define SFRETLO, CRET2
79|.define SFRETHI, CRET1
80|.define SFARG1LO, CARG2
81|.define SFARG1HI, CARG1
82|.define SFARG2LO, CARG4
83|.define SFARG2HI, CARG3
84|.endif
85|
86|.if FPU
59|.define FARG1, f12 87|.define FARG1, f12
60|.define FARG2, f14 88|.define FARG2, f14
61| 89|
62|.define FRET1, f0 90|.define FRET1, f0
63|.define FRET2, f2 91|.define FRET2, f2
92|.endif
64| 93|
65|// Stack layout while in interpreter. Must match with lj_frame.h. 94|// Stack layout while in interpreter. Must match with lj_frame.h.
95|.if FPU // MIPS32 hard-float.
96|
66|.define CFRAME_SPACE, 112 // Delta for sp. 97|.define CFRAME_SPACE, 112 // Delta for sp.
67| 98|
68|.define SAVE_ERRF, 124(sp) // 32 bit C frame info. 99|.define SAVE_ERRF, 124(sp) // 32 bit C frame info.
@@ -72,6 +103,20 @@
72|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. 103|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
73|.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves. 104|.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves.
74|.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves. 105|.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves.
106|
107|.else // MIPS32 soft-float
108|
109|.define CFRAME_SPACE, 64 // Delta for sp.
110|
111|.define SAVE_ERRF, 76(sp) // 32 bit C frame info.
112|.define SAVE_NRES, 72(sp)
113|.define SAVE_CFRAME, 68(sp)
114|.define SAVE_L, 64(sp)
115|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
116|.define SAVE_GPR_, 24 // .. 24+10*4: 32 bit GPR saves.
117|
118|.endif
119|
75|.define SAVE_PC, 20(sp) 120|.define SAVE_PC, 20(sp)
76|.define ARG5, 16(sp) 121|.define ARG5, 16(sp)
77|.define CSAVE_4, 12(sp) 122|.define CSAVE_4, 12(sp)
@@ -83,43 +128,45 @@
83|.define ARG5_OFS, 16 128|.define ARG5_OFS, 16
84|.define SAVE_MULTRES, ARG5 129|.define SAVE_MULTRES, ARG5
85| 130|
131|//-----------------------------------------------------------------------
132|
86|.macro saveregs 133|.macro saveregs
87| addiu sp, sp, -CFRAME_SPACE 134| addiu sp, sp, -CFRAME_SPACE
88| sw ra, SAVE_GPR_+9*4(sp) 135| sw ra, SAVE_GPR_+9*4(sp)
89| sw r30, SAVE_GPR_+8*4(sp) 136| sw r30, SAVE_GPR_+8*4(sp)
90| sdc1 f30, SAVE_FPR_+5*8(sp) 137| .FPU sdc1 f30, SAVE_FPR_+5*8(sp)
91| sw r23, SAVE_GPR_+7*4(sp) 138| sw r23, SAVE_GPR_+7*4(sp)
92| sw r22, SAVE_GPR_+6*4(sp) 139| sw r22, SAVE_GPR_+6*4(sp)
93| sdc1 f28, SAVE_FPR_+4*8(sp) 140| .FPU sdc1 f28, SAVE_FPR_+4*8(sp)
94| sw r21, SAVE_GPR_+5*4(sp) 141| sw r21, SAVE_GPR_+5*4(sp)
95| sw r20, SAVE_GPR_+4*4(sp) 142| sw r20, SAVE_GPR_+4*4(sp)
96| sdc1 f26, SAVE_FPR_+3*8(sp) 143| .FPU sdc1 f26, SAVE_FPR_+3*8(sp)
97| sw r19, SAVE_GPR_+3*4(sp) 144| sw r19, SAVE_GPR_+3*4(sp)
98| sw r18, SAVE_GPR_+2*4(sp) 145| sw r18, SAVE_GPR_+2*4(sp)
99| sdc1 f24, SAVE_FPR_+2*8(sp) 146| .FPU sdc1 f24, SAVE_FPR_+2*8(sp)
100| sw r17, SAVE_GPR_+1*4(sp) 147| sw r17, SAVE_GPR_+1*4(sp)
101| sw r16, SAVE_GPR_+0*4(sp) 148| sw r16, SAVE_GPR_+0*4(sp)
102| sdc1 f22, SAVE_FPR_+1*8(sp) 149| .FPU sdc1 f22, SAVE_FPR_+1*8(sp)
103| sdc1 f20, SAVE_FPR_+0*8(sp) 150| .FPU sdc1 f20, SAVE_FPR_+0*8(sp)
104|.endmacro 151|.endmacro
105| 152|
106|.macro restoreregs_ret 153|.macro restoreregs_ret
107| lw ra, SAVE_GPR_+9*4(sp) 154| lw ra, SAVE_GPR_+9*4(sp)
108| lw r30, SAVE_GPR_+8*4(sp) 155| lw r30, SAVE_GPR_+8*4(sp)
109| ldc1 f30, SAVE_FPR_+5*8(sp) 156| .FPU ldc1 f30, SAVE_FPR_+5*8(sp)
110| lw r23, SAVE_GPR_+7*4(sp) 157| lw r23, SAVE_GPR_+7*4(sp)
111| lw r22, SAVE_GPR_+6*4(sp) 158| lw r22, SAVE_GPR_+6*4(sp)
112| ldc1 f28, SAVE_FPR_+4*8(sp) 159| .FPU ldc1 f28, SAVE_FPR_+4*8(sp)
113| lw r21, SAVE_GPR_+5*4(sp) 160| lw r21, SAVE_GPR_+5*4(sp)
114| lw r20, SAVE_GPR_+4*4(sp) 161| lw r20, SAVE_GPR_+4*4(sp)
115| ldc1 f26, SAVE_FPR_+3*8(sp) 162| .FPU ldc1 f26, SAVE_FPR_+3*8(sp)
116| lw r19, SAVE_GPR_+3*4(sp) 163| lw r19, SAVE_GPR_+3*4(sp)
117| lw r18, SAVE_GPR_+2*4(sp) 164| lw r18, SAVE_GPR_+2*4(sp)
118| ldc1 f24, SAVE_FPR_+2*8(sp) 165| .FPU ldc1 f24, SAVE_FPR_+2*8(sp)
119| lw r17, SAVE_GPR_+1*4(sp) 166| lw r17, SAVE_GPR_+1*4(sp)
120| lw r16, SAVE_GPR_+0*4(sp) 167| lw r16, SAVE_GPR_+0*4(sp)
121| ldc1 f22, SAVE_FPR_+1*8(sp) 168| .FPU ldc1 f22, SAVE_FPR_+1*8(sp)
122| ldc1 f20, SAVE_FPR_+0*8(sp) 169| .FPU ldc1 f20, SAVE_FPR_+0*8(sp)
123| jr ra 170| jr ra
124| addiu sp, sp, CFRAME_SPACE 171| addiu sp, sp, CFRAME_SPACE
125|.endmacro 172|.endmacro
@@ -138,11 +185,12 @@
138|.type NODE, Node 185|.type NODE, Node
139|.type NARGS8, int 186|.type NARGS8, int
140|.type TRACE, GCtrace 187|.type TRACE, GCtrace
188|.type SBUF, SBuf
141| 189|
142|//----------------------------------------------------------------------- 190|//-----------------------------------------------------------------------
143| 191|
144|// Trap for not-yet-implemented parts. 192|// Trap for not-yet-implemented parts.
145|.macro NYI; .long 0xf0f0f0f0; .endmacro 193|.macro NYI; .long 0xec1cf0f0; .endmacro
146| 194|
147|// Macros to mark delay slots. 195|// Macros to mark delay slots.
148|.macro ., a; a; .endmacro 196|.macro ., a; a; .endmacro
@@ -152,13 +200,23 @@
152|//----------------------------------------------------------------------- 200|//-----------------------------------------------------------------------
153| 201|
154|// Endian-specific defines. 202|// Endian-specific defines.
155|.define FRAME_PC, LJ_ENDIAN_SELECT(-4,-8) 203|.if ENDIAN_LE
156|.define FRAME_FUNC, LJ_ENDIAN_SELECT(-8,-4) 204|.define FRAME_PC, -4
157|.define HI, LJ_ENDIAN_SELECT(4,0) 205|.define FRAME_FUNC, -8
158|.define LO, LJ_ENDIAN_SELECT(0,4) 206|.define HI, 4
159|.define OFS_RD, LJ_ENDIAN_SELECT(2,0) 207|.define LO, 0
160|.define OFS_RA, LJ_ENDIAN_SELECT(1,2) 208|.define OFS_RD, 2
161|.define OFS_OP, LJ_ENDIAN_SELECT(0,3) 209|.define OFS_RA, 1
210|.define OFS_OP, 0
211|.else
212|.define FRAME_PC, -8
213|.define FRAME_FUNC, -4
214|.define HI, 0
215|.define LO, 4
216|.define OFS_RD, 0
217|.define OFS_RA, 2
218|.define OFS_OP, 3
219|.endif
162| 220|
163|// Instruction decode. 221|// Instruction decode.
164|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro 222|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
@@ -353,9 +411,11 @@ static void build_subroutines(BuildCtx *ctx)
353 |. sll TMP2, TMP2, 3 411 |. sll TMP2, TMP2, 3
354 |1: 412 |1:
355 | addiu TMP1, TMP1, -8 413 | addiu TMP1, TMP1, -8
356 | ldc1 f0, 0(RA) 414 | lw SFRETHI, HI(RA)
415 | lw SFRETLO, LO(RA)
357 | addiu RA, RA, 8 416 | addiu RA, RA, 8
358 | sdc1 f0, 0(BASE) 417 | sw SFRETHI, HI(BASE)
418 | sw SFRETLO, LO(BASE)
359 | bnez TMP1, <1 419 | bnez TMP1, <1
360 |. addiu BASE, BASE, 8 420 |. addiu BASE, BASE, 8
361 | 421 |
@@ -424,15 +484,16 @@ static void build_subroutines(BuildCtx *ctx)
424 | and sp, CARG1, AT 484 | and sp, CARG1, AT
425 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 485 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
426 | lw L, SAVE_L 486 | lw L, SAVE_L
427 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 487 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
488 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
428 | li TISNIL, LJ_TNIL 489 | li TISNIL, LJ_TNIL
429 | lw BASE, L->base 490 | lw BASE, L->base
430 | lw DISPATCH, L->glref // Setup pointer to dispatch table. 491 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
431 | mtc1 TMP3, TOBIT 492 | .FPU mtc1 TMP3, TOBIT
432 | li TMP1, LJ_TFALSE 493 | li TMP1, LJ_TFALSE
433 | li_vmstate INTERP 494 | li_vmstate INTERP
434 | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame. 495 | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame.
435 | cvt.d.s TOBIT, TOBIT 496 | .FPU cvt.d.s TOBIT, TOBIT
436 | addiu RA, BASE, -8 // Results start at BASE-8. 497 | addiu RA, BASE, -8 // Results start at BASE-8.
437 | addiu DISPATCH, DISPATCH, GG_G2DISP 498 | addiu DISPATCH, DISPATCH, GG_G2DISP
438 | sw TMP1, HI(RA) // Prepend false to error message. 499 | sw TMP1, HI(RA) // Prepend false to error message.
@@ -440,6 +501,10 @@ static void build_subroutines(BuildCtx *ctx)
440 | b ->vm_returnc 501 | b ->vm_returnc
441 |. li RD, 16 // 2 results: false + error message. 502 |. li RD, 16 // 2 results: false + error message.
442 | 503 |
504 |->vm_unwind_stub: // Jump to exit stub from unwinder.
505 | jr CARG1
506 |. move ra, CARG2
507 |
443 |//----------------------------------------------------------------------- 508 |//-----------------------------------------------------------------------
444 |//-- Grow stack for calls ----------------------------------------------- 509 |//-- Grow stack for calls -----------------------------------------------
445 |//----------------------------------------------------------------------- 510 |//-----------------------------------------------------------------------
@@ -486,21 +551,23 @@ static void build_subroutines(BuildCtx *ctx)
486 | addiu DISPATCH, DISPATCH, GG_G2DISP 551 | addiu DISPATCH, DISPATCH, GG_G2DISP
487 | sw r0, SAVE_NRES 552 | sw r0, SAVE_NRES
488 | sw r0, SAVE_ERRF 553 | sw r0, SAVE_ERRF
489 | sw TMP0, L->cframe 554 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
490 | sw r0, SAVE_CFRAME 555 | sw r0, SAVE_CFRAME
491 | beqz TMP1, >3 556 | beqz TMP1, >3
492 |. sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 557 |. sw TMP0, L->cframe
493 | 558 |
494 | // Resume after yield (like a return). 559 | // Resume after yield (like a return).
560 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
495 | move RA, BASE 561 | move RA, BASE
496 | lw BASE, L->base 562 | lw BASE, L->base
563 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
497 | lw TMP1, L->top 564 | lw TMP1, L->top
498 | lw PC, FRAME_PC(BASE) 565 | lw PC, FRAME_PC(BASE)
499 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 566 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
500 | subu RD, TMP1, BASE 567 | subu RD, TMP1, BASE
501 | mtc1 TMP3, TOBIT 568 | .FPU mtc1 TMP3, TOBIT
502 | sb r0, L->status 569 | sb r0, L->status
503 | cvt.d.s TOBIT, TOBIT 570 | .FPU cvt.d.s TOBIT, TOBIT
504 | li_vmstate INTERP 571 | li_vmstate INTERP
505 | addiu RD, RD, 8 572 | addiu RD, RD, 8
506 | st_vmstate 573 | st_vmstate
@@ -525,25 +592,27 @@ static void build_subroutines(BuildCtx *ctx)
525 | 592 |
526 |1: // Entry point for vm_pcall above (PC = ftype). 593 |1: // Entry point for vm_pcall above (PC = ftype).
527 | lw TMP1, L:CARG1->cframe 594 | lw TMP1, L:CARG1->cframe
528 | sw CARG3, SAVE_NRES
529 | move L, CARG1 595 | move L, CARG1
530 | sw CARG1, SAVE_L 596 | sw CARG3, SAVE_NRES
531 | move BASE, CARG2
532 | sw sp, L->cframe // Add our C frame to cframe chain.
533 | lw DISPATCH, L->glref // Setup pointer to dispatch table. 597 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
598 | sw CARG1, SAVE_L
599 | move BASE, CARG2
600 | addiu DISPATCH, DISPATCH, GG_G2DISP
534 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 601 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
535 | sw TMP1, SAVE_CFRAME 602 | sw TMP1, SAVE_CFRAME
536 | addiu DISPATCH, DISPATCH, GG_G2DISP 603 | sw sp, L->cframe // Add our C frame to cframe chain.
537 | 604 |
538 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 605 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
606 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
539 | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). 607 | lw TMP2, L->base // TMP2 = old base (used in vmeta_call).
540 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 608 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
609 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
541 | lw TMP1, L->top 610 | lw TMP1, L->top
542 | mtc1 TMP3, TOBIT 611 | .FPU mtc1 TMP3, TOBIT
543 | addu PC, PC, BASE 612 | addu PC, PC, BASE
544 | subu NARGS8:RC, TMP1, BASE 613 | subu NARGS8:RC, TMP1, BASE
545 | subu PC, PC, TMP2 // PC = frame delta + frame type 614 | subu PC, PC, TMP2 // PC = frame delta + frame type
546 | cvt.d.s TOBIT, TOBIT 615 | .FPU cvt.d.s TOBIT, TOBIT
547 | li_vmstate INTERP 616 | li_vmstate INTERP
548 | li TISNIL, LJ_TNIL 617 | li TISNIL, LJ_TNIL
549 | st_vmstate 618 | st_vmstate
@@ -566,20 +635,21 @@ static void build_subroutines(BuildCtx *ctx)
566 | lw TMP0, L:CARG1->stack 635 | lw TMP0, L:CARG1->stack
567 | sw CARG1, SAVE_L 636 | sw CARG1, SAVE_L
568 | lw TMP1, L->top 637 | lw TMP1, L->top
638 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
569 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 639 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
570 | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). 640 | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
571 | lw TMP1, L->cframe 641 | lw TMP1, L->cframe
572 | sw sp, L->cframe // Add our C frame to cframe chain. 642 | addiu DISPATCH, DISPATCH, GG_G2DISP
573 | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. 643 | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
574 | sw r0, SAVE_ERRF // No error function. 644 | sw r0, SAVE_ERRF // No error function.
575 | move CFUNCADDR, CARG4 645 | sw TMP1, SAVE_CFRAME
646 | sw sp, L->cframe // Add our C frame to cframe chain.
647 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
576 | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) 648 | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
577 |. sw TMP1, SAVE_CFRAME 649 |. move CFUNCADDR, CARG4
578 | move BASE, CRET1 650 | move BASE, CRET1
579 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
580 | li PC, FRAME_CP
581 | bnez CRET1, <3 // Else continue with the call. 651 | bnez CRET1, <3 // Else continue with the call.
582 |. addiu DISPATCH, DISPATCH, GG_G2DISP 652 |. li PC, FRAME_CP
583 | b ->vm_leave_cp // No base? Just remove C frame. 653 | b ->vm_leave_cp // No base? Just remove C frame.
584 |. nop 654 |. nop
585 | 655 |
@@ -624,7 +694,8 @@ static void build_subroutines(BuildCtx *ctx)
624 |->cont_cat: // RA = resultptr, RB = meta base 694 |->cont_cat: // RA = resultptr, RB = meta base
625 | lw INS, -4(PC) 695 | lw INS, -4(PC)
626 | addiu CARG2, RB, -16 696 | addiu CARG2, RB, -16
627 | ldc1 f0, 0(RA) 697 | lw SFRETHI, HI(RA)
698 | lw SFRETLO, LO(RA)
628 | decode_RB8a MULTRES, INS 699 | decode_RB8a MULTRES, INS
629 | decode_RA8a RA, INS 700 | decode_RA8a RA, INS
630 | decode_RB8b MULTRES 701 | decode_RB8b MULTRES
@@ -632,11 +703,13 @@ static void build_subroutines(BuildCtx *ctx)
632 | addu TMP1, BASE, MULTRES 703 | addu TMP1, BASE, MULTRES
633 | sw BASE, L->base 704 | sw BASE, L->base
634 | subu CARG3, CARG2, TMP1 705 | subu CARG3, CARG2, TMP1
706 | sw SFRETHI, HI(CARG2)
635 | bne TMP1, CARG2, ->BC_CAT_Z 707 | bne TMP1, CARG2, ->BC_CAT_Z
636 |. sdc1 f0, 0(CARG2) 708 |. sw SFRETLO, LO(CARG2)
637 | addu RA, BASE, RA 709 | addu RA, BASE, RA
710 | sw SFRETHI, HI(RA)
638 | b ->cont_nop 711 | b ->cont_nop
639 |. sdc1 f0, 0(RA) 712 |. sw SFRETLO, LO(RA)
640 | 713 |
641 |//-- Table indexing metamethods ----------------------------------------- 714 |//-- Table indexing metamethods -----------------------------------------
642 | 715 |
@@ -659,10 +732,9 @@ static void build_subroutines(BuildCtx *ctx)
659 |. sw TMP1, HI(CARG3) 732 |. sw TMP1, HI(CARG3)
660 | 733 |
661 |->vmeta_tgetb: // TMP0 = index 734 |->vmeta_tgetb: // TMP0 = index
662 | mtc1 TMP0, f0
663 | cvt.d.w f0, f0
664 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 735 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
665 | sdc1 f0, 0(CARG3) 736 | sw TMP0, LO(CARG3)
737 | sw TISNUM, HI(CARG3)
666 | 738 |
667 |->vmeta_tgetv: 739 |->vmeta_tgetv:
668 |1: 740 |1:
@@ -674,9 +746,11 @@ static void build_subroutines(BuildCtx *ctx)
674 | // Returns TValue * (finished) or NULL (metamethod). 746 | // Returns TValue * (finished) or NULL (metamethod).
675 | beqz CRET1, >3 747 | beqz CRET1, >3
676 |. addiu TMP1, BASE, -FRAME_CONT 748 |. addiu TMP1, BASE, -FRAME_CONT
677 | ldc1 f0, 0(CRET1) 749 | lw SFARG1HI, HI(CRET1)
750 | lw SFARG2HI, LO(CRET1)
678 | ins_next1 751 | ins_next1
679 | sdc1 f0, 0(RA) 752 | sw SFARG1HI, HI(RA)
753 | sw SFARG2HI, LO(RA)
680 | ins_next2 754 | ins_next2
681 | 755 |
682 |3: // Call __index metamethod. 756 |3: // Call __index metamethod.
@@ -688,6 +762,17 @@ static void build_subroutines(BuildCtx *ctx)
688 | b ->vm_call_dispatch_f 762 | b ->vm_call_dispatch_f
689 |. li NARGS8:RC, 16 // 2 args for func(t, k). 763 |. li NARGS8:RC, 16 // 2 args for func(t, k).
690 | 764 |
765 |->vmeta_tgetr:
766 | load_got lj_tab_getinth
767 | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
768 |. nop
769 | // Returns cTValue * or NULL.
770 | beqz CRET1, ->BC_TGETR_Z
771 |. move SFARG2HI, TISNIL
772 | lw SFARG2HI, HI(CRET1)
773 | b ->BC_TGETR_Z
774 |. lw SFARG2LO, LO(CRET1)
775 |
691 |//----------------------------------------------------------------------- 776 |//-----------------------------------------------------------------------
692 | 777 |
693 |->vmeta_tsets1: 778 |->vmeta_tsets1:
@@ -709,10 +794,9 @@ static void build_subroutines(BuildCtx *ctx)
709 |. sw TMP1, HI(CARG3) 794 |. sw TMP1, HI(CARG3)
710 | 795 |
711 |->vmeta_tsetb: // TMP0 = index 796 |->vmeta_tsetb: // TMP0 = index
712 | mtc1 TMP0, f0
713 | cvt.d.w f0, f0
714 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 797 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
715 | sdc1 f0, 0(CARG3) 798 | sw TMP0, LO(CARG3)
799 | sw TISNUM, HI(CARG3)
716 | 800 |
717 |->vmeta_tsetv: 801 |->vmeta_tsetv:
718 |1: 802 |1:
@@ -722,11 +806,13 @@ static void build_subroutines(BuildCtx *ctx)
722 | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 806 | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
723 |. move CARG1, L 807 |. move CARG1, L
724 | // Returns TValue * (finished) or NULL (metamethod). 808 | // Returns TValue * (finished) or NULL (metamethod).
809 | lw SFARG1HI, HI(RA)
725 | beqz CRET1, >3 810 | beqz CRET1, >3
726 |. ldc1 f0, 0(RA) 811 |. lw SFARG1LO, LO(RA)
727 | // NOBARRIER: lj_meta_tset ensures the table is not black. 812 | // NOBARRIER: lj_meta_tset ensures the table is not black.
728 | ins_next1 813 | ins_next1
729 | sdc1 f0, 0(CRET1) 814 | sw SFARG1HI, HI(CRET1)
815 | sw SFARG1LO, LO(CRET1)
730 | ins_next2 816 | ins_next2
731 | 817 |
732 |3: // Call __newindex metamethod. 818 |3: // Call __newindex metamethod.
@@ -736,14 +822,27 @@ static void build_subroutines(BuildCtx *ctx)
736 | sw PC, -16+HI(BASE) // [cont|PC] 822 | sw PC, -16+HI(BASE) // [cont|PC]
737 | subu PC, BASE, TMP1 823 | subu PC, BASE, TMP1
738 | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 824 | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
739 | sdc1 f0, 16(BASE) // Copy value to third argument. 825 | sw SFARG1HI, 16+HI(BASE) // Copy value to third argument.
826 | sw SFARG1LO, 16+LO(BASE)
740 | b ->vm_call_dispatch_f 827 | b ->vm_call_dispatch_f
741 |. li NARGS8:RC, 24 // 3 args for func(t, k, v) 828 |. li NARGS8:RC, 24 // 3 args for func(t, k, v)
742 | 829 |
830 |->vmeta_tsetr:
831 | load_got lj_tab_setinth
832 | sw BASE, L->base
833 | sw PC, SAVE_PC
834 | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
835 |. move CARG1, L
836 | // Returns TValue *.
837 | b ->BC_TSETR_Z
838 |. nop
839 |
743 |//-- Comparison metamethods --------------------------------------------- 840 |//-- Comparison metamethods ---------------------------------------------
744 | 841 |
745 |->vmeta_comp: 842 |->vmeta_comp:
746 | // CARG2, CARG3 are already set by BC_ISLT/BC_ISGE/BC_ISLE/BC_ISGT. 843 | // RA/RD point to o1/o2.
844 | move CARG2, RA
845 | move CARG3, RD
747 | load_got lj_meta_comp 846 | load_got lj_meta_comp
748 | addiu PC, PC, -4 847 | addiu PC, PC, -4
749 | sw BASE, L->base 848 | sw BASE, L->base
@@ -769,11 +868,13 @@ static void build_subroutines(BuildCtx *ctx)
769 | 868 |
770 |->cont_ra: // RA = resultptr 869 |->cont_ra: // RA = resultptr
771 | lbu TMP1, -4+OFS_RA(PC) 870 | lbu TMP1, -4+OFS_RA(PC)
772 | ldc1 f0, 0(RA) 871 | lw SFRETHI, HI(RA)
872 | lw SFRETLO, LO(RA)
773 | sll TMP1, TMP1, 3 873 | sll TMP1, TMP1, 3
774 | addu TMP1, BASE, TMP1 874 | addu TMP1, BASE, TMP1
875 | sw SFRETHI, HI(TMP1)
775 | b ->cont_nop 876 | b ->cont_nop
776 |. sdc1 f0, 0(TMP1) 877 |. sw SFRETLO, LO(TMP1)
777 | 878 |
778 |->cont_condt: // RA = resultptr 879 |->cont_condt: // RA = resultptr
779 | lw TMP0, HI(RA) 880 | lw TMP0, HI(RA)
@@ -788,8 +889,11 @@ static void build_subroutines(BuildCtx *ctx)
788 |. addiu TMP2, AT, -1 // Branch if result is false. 889 |. addiu TMP2, AT, -1 // Branch if result is false.
789 | 890 |
790 |->vmeta_equal: 891 |->vmeta_equal:
791 | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. 892 | // SFARG1LO/SFARG2LO point to o1/o2. TMP0 is set to 0/1.
792 | load_got lj_meta_equal 893 | load_got lj_meta_equal
894 | move CARG2, SFARG1LO
895 | move CARG3, SFARG2LO
896 | move CARG4, TMP0
793 | addiu PC, PC, -4 897 | addiu PC, PC, -4
794 | sw BASE, L->base 898 | sw BASE, L->base
795 | sw PC, SAVE_PC 899 | sw PC, SAVE_PC
@@ -813,17 +917,31 @@ static void build_subroutines(BuildCtx *ctx)
813 |. nop 917 |. nop
814 |.endif 918 |.endif
815 | 919 |
920 |->vmeta_istype:
921 | load_got lj_meta_istype
922 | addiu PC, PC, -4
923 | sw BASE, L->base
924 | srl CARG2, RA, 3
925 | srl CARG3, RD, 3
926 | sw PC, SAVE_PC
927 | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
928 |. move CARG1, L
929 | b ->cont_nop
930 |. nop
931 |
816 |//-- Arithmetic metamethods --------------------------------------------- 932 |//-- Arithmetic metamethods ---------------------------------------------
817 | 933 |
818 |->vmeta_unm: 934 |->vmeta_unm:
819 | move CARG4, CARG3 935 | move RC, RB
820 | 936 |
821 |->vmeta_arith: 937 |->vmeta_arith:
822 | load_got lj_meta_arith 938 | load_got lj_meta_arith
823 | decode_OP1 TMP0, INS 939 | decode_OP1 TMP0, INS
824 | sw BASE, L->base 940 | sw BASE, L->base
825 | sw PC, SAVE_PC
826 | move CARG2, RA 941 | move CARG2, RA
942 | sw PC, SAVE_PC
943 | move CARG3, RB
944 | move CARG4, RC
827 | sw TMP0, ARG5 945 | sw TMP0, ARG5
828 | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) 946 | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
829 |. move CARG1, L 947 |. move CARG1, L
@@ -931,40 +1049,52 @@ static void build_subroutines(BuildCtx *ctx)
931 | 1049 |
932 |.macro .ffunc_1, name 1050 |.macro .ffunc_1, name
933 |->ff_ .. name: 1051 |->ff_ .. name:
1052 | lw SFARG1HI, HI(BASE)
934 | beqz NARGS8:RC, ->fff_fallback 1053 | beqz NARGS8:RC, ->fff_fallback
935 |. lw CARG3, HI(BASE) 1054 |. lw SFARG1LO, LO(BASE)
936 | lw CARG1, LO(BASE)
937 |.endmacro 1055 |.endmacro
938 | 1056 |
939 |.macro .ffunc_2, name 1057 |.macro .ffunc_2, name
940 |->ff_ .. name: 1058 |->ff_ .. name:
941 | sltiu AT, NARGS8:RC, 16 1059 | sltiu AT, NARGS8:RC, 16
942 | lw CARG3, HI(BASE) 1060 | lw SFARG1HI, HI(BASE)
943 | bnez AT, ->fff_fallback 1061 | bnez AT, ->fff_fallback
944 |. lw CARG4, 8+HI(BASE) 1062 |. lw SFARG2HI, 8+HI(BASE)
945 | lw CARG1, LO(BASE) 1063 | lw SFARG1LO, LO(BASE)
946 | lw CARG2, 8+LO(BASE) 1064 | lw SFARG2LO, 8+LO(BASE)
947 |.endmacro 1065 |.endmacro
948 | 1066 |
949 |.macro .ffunc_n, name // Caveat: has delay slot! 1067 |.macro .ffunc_n, name // Caveat: has delay slot!
950 |->ff_ .. name: 1068 |->ff_ .. name:
951 | lw CARG3, HI(BASE) 1069 | lw SFARG1HI, HI(BASE)
1070 |.if FPU
1071 | ldc1 FARG1, 0(BASE)
1072 |.else
1073 | lw SFARG1LO, LO(BASE)
1074 |.endif
952 | beqz NARGS8:RC, ->fff_fallback 1075 | beqz NARGS8:RC, ->fff_fallback
953 |. ldc1 FARG1, 0(BASE) 1076 |. sltiu AT, SFARG1HI, LJ_TISNUM
954 | sltiu AT, CARG3, LJ_TISNUM
955 | beqz AT, ->fff_fallback 1077 | beqz AT, ->fff_fallback
956 |.endmacro 1078 |.endmacro
957 | 1079 |
958 |.macro .ffunc_nn, name // Caveat: has delay slot! 1080 |.macro .ffunc_nn, name // Caveat: has delay slot!
959 |->ff_ .. name: 1081 |->ff_ .. name:
960 | sltiu AT, NARGS8:RC, 16 1082 | sltiu AT, NARGS8:RC, 16
961 | lw CARG3, HI(BASE) 1083 | lw SFARG1HI, HI(BASE)
962 | bnez AT, ->fff_fallback 1084 | bnez AT, ->fff_fallback
963 |. lw CARG4, 8+HI(BASE) 1085 |. lw SFARG2HI, 8+HI(BASE)
964 | ldc1 FARG1, 0(BASE) 1086 | sltiu TMP0, SFARG1HI, LJ_TISNUM
965 | ldc1 FARG2, 8(BASE) 1087 |.if FPU
966 | sltiu TMP0, CARG3, LJ_TISNUM 1088 | ldc1 FARG1, 0(BASE)
967 | sltiu TMP1, CARG4, LJ_TISNUM 1089 |.else
1090 | lw SFARG1LO, LO(BASE)
1091 |.endif
1092 | sltiu TMP1, SFARG2HI, LJ_TISNUM
1093 |.if FPU
1094 | ldc1 FARG2, 8(BASE)
1095 |.else
1096 | lw SFARG2LO, 8+LO(BASE)
1097 |.endif
968 | and TMP0, TMP0, TMP1 1098 | and TMP0, TMP0, TMP1
969 | beqz TMP0, ->fff_fallback 1099 | beqz TMP0, ->fff_fallback
970 |.endmacro 1100 |.endmacro
@@ -980,53 +1110,55 @@ static void build_subroutines(BuildCtx *ctx)
980 |//-- Base library: checks ----------------------------------------------- 1110 |//-- Base library: checks -----------------------------------------------
981 | 1111 |
982 |.ffunc_1 assert 1112 |.ffunc_1 assert
983 | sltiu AT, CARG3, LJ_TISTRUECOND 1113 | sltiu AT, SFARG1HI, LJ_TISTRUECOND
984 | beqz AT, ->fff_fallback 1114 | beqz AT, ->fff_fallback
985 |. addiu RA, BASE, -8 1115 |. addiu RA, BASE, -8
986 | lw PC, FRAME_PC(BASE) 1116 | lw PC, FRAME_PC(BASE)
987 | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. 1117 | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
988 | addu TMP2, RA, NARGS8:RC 1118 | addu TMP2, RA, NARGS8:RC
989 | sw CARG3, HI(RA) 1119 | sw SFARG1HI, HI(RA)
990 | addiu TMP1, BASE, 8 1120 | addiu TMP1, BASE, 8
991 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. 1121 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument.
992 |. sw CARG1, LO(RA) 1122 |. sw SFARG1LO, LO(RA)
993 |1: 1123 |1:
994 | ldc1 f0, 0(TMP1) 1124 | lw SFRETHI, HI(TMP1)
995 | sdc1 f0, -8(TMP1) 1125 | lw SFRETLO, LO(TMP1)
1126 | sw SFRETHI, -8+HI(TMP1)
1127 | sw SFRETLO, -8+LO(TMP1)
996 | bne TMP1, TMP2, <1 1128 | bne TMP1, TMP2, <1
997 |. addiu TMP1, TMP1, 8 1129 |. addiu TMP1, TMP1, 8
998 | b ->fff_res 1130 | b ->fff_res
999 |. nop 1131 |. nop
1000 | 1132 |
1001 |.ffunc type 1133 |.ffunc type
1002 | lw CARG3, HI(BASE) 1134 | lw SFARG1HI, HI(BASE)
1003 | li TMP1, LJ_TISNUM
1004 | beqz NARGS8:RC, ->fff_fallback 1135 | beqz NARGS8:RC, ->fff_fallback
1005 |. sltiu TMP0, CARG3, LJ_TISNUM 1136 |. sltiu TMP0, SFARG1HI, LJ_TISNUM
1006 | movz TMP1, CARG3, TMP0 1137 | movn SFARG1HI, TISNUM, TMP0
1007 | not TMP1, TMP1 1138 | not TMP1, SFARG1HI
1008 | sll TMP1, TMP1, 3 1139 | sll TMP1, TMP1, 3
1009 | addu TMP1, CFUNC:RB, TMP1 1140 | addu TMP1, CFUNC:RB, TMP1
1010 | b ->fff_resn 1141 | lw SFARG1HI, CFUNC:TMP1->upvalue[0].u32.hi
1011 |. ldc1 FRET1, CFUNC:TMP1->upvalue 1142 | b ->fff_restv
1143 |. lw SFARG1LO, CFUNC:TMP1->upvalue[0].u32.lo
1012 | 1144 |
1013 |//-- Base library: getters and setters --------------------------------- 1145 |//-- Base library: getters and setters ---------------------------------
1014 | 1146 |
1015 |.ffunc_1 getmetatable 1147 |.ffunc_1 getmetatable
1016 | li AT, LJ_TTAB 1148 | li AT, LJ_TTAB
1017 | bne CARG3, AT, >6 1149 | bne SFARG1HI, AT, >6
1018 |. li AT, LJ_TUDATA 1150 |. li AT, LJ_TUDATA
1019 |1: // Field metatable must be at same offset for GCtab and GCudata! 1151 |1: // Field metatable must be at same offset for GCtab and GCudata!
1020 | lw TAB:CARG1, TAB:CARG1->metatable 1152 | lw TAB:SFARG1LO, TAB:SFARG1LO->metatable
1021 |2: 1153 |2:
1022 | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) 1154 | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
1023 | beqz TAB:CARG1, ->fff_restv 1155 | beqz TAB:SFARG1LO, ->fff_restv
1024 |. li CARG3, LJ_TNIL 1156 |. li SFARG1HI, LJ_TNIL
1025 | lw TMP0, TAB:CARG1->hmask 1157 | lw TMP0, TAB:SFARG1LO->hmask
1026 | li CARG3, LJ_TTAB // Use metatable as default result. 1158 | li SFARG1HI, LJ_TTAB // Use metatable as default result.
1027 | lw TMP1, STR:RC->hash 1159 | lw TMP1, STR:RC->sid
1028 | lw NODE:TMP2, TAB:CARG1->node 1160 | lw NODE:TMP2, TAB:SFARG1LO->node
1029 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 1161 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
1030 | sll TMP0, TMP1, 5 1162 | sll TMP0, TMP1, 5
1031 | sll TMP1, TMP1, 3 1163 | sll TMP1, TMP1, 3
1032 | subu TMP1, TMP0, TMP1 1164 | subu TMP1, TMP0, TMP1
@@ -1037,7 +1169,7 @@ static void build_subroutines(BuildCtx *ctx)
1037 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 1169 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
1038 | lw NODE:TMP3, NODE:TMP2->next 1170 | lw NODE:TMP3, NODE:TMP2->next
1039 | bne CARG4, AT, >4 1171 | bne CARG4, AT, >4
1040 |. lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) 1172 |. lw CARG3, offsetof(Node, val)+HI(NODE:TMP2)
1041 | beq TMP0, STR:RC, >5 1173 | beq TMP0, STR:RC, >5
1042 |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2) 1174 |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2)
1043 |4: 1175 |4:
@@ -1046,36 +1178,35 @@ static void build_subroutines(BuildCtx *ctx)
1046 | b <3 1178 | b <3
1047 |. nop 1179 |. nop
1048 |5: 1180 |5:
1049 | beq CARG2, TISNIL, ->fff_restv // Ditto for nil value. 1181 | beq CARG3, TISNIL, ->fff_restv // Ditto for nil value.
1050 |. nop 1182 |. nop
1051 | move CARG3, CARG2 // Return value of mt.__metatable. 1183 | move SFARG1HI, CARG3 // Return value of mt.__metatable.
1052 | b ->fff_restv 1184 | b ->fff_restv
1053 |. move CARG1, TMP1 1185 |. move SFARG1LO, TMP1
1054 | 1186 |
1055 |6: 1187 |6:
1056 | beq CARG3, AT, <1 1188 | beq SFARG1HI, AT, <1
1057 |. sltiu TMP0, CARG3, LJ_TISNUM 1189 |. sltu AT, TISNUM, SFARG1HI
1058 | li TMP1, LJ_TISNUM 1190 | movz SFARG1HI, TISNUM, AT
1059 | movz TMP1, CARG3, TMP0 1191 | not TMP1, SFARG1HI
1060 | not TMP1, TMP1
1061 | sll TMP1, TMP1, 2 1192 | sll TMP1, TMP1, 2
1062 | addu TMP1, DISPATCH, TMP1 1193 | addu TMP1, DISPATCH, TMP1
1063 | b <2 1194 | b <2
1064 |. lw TAB:CARG1, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1) 1195 |. lw TAB:SFARG1LO, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1)
1065 | 1196 |
1066 |.ffunc_2 setmetatable 1197 |.ffunc_2 setmetatable
1067 | // Fast path: no mt for table yet and not clearing the mt. 1198 | // Fast path: no mt for table yet and not clearing the mt.
1068 | li AT, LJ_TTAB 1199 | li AT, LJ_TTAB
1069 | bne CARG3, AT, ->fff_fallback 1200 | bne SFARG1HI, AT, ->fff_fallback
1070 |. addiu CARG4, CARG4, -LJ_TTAB 1201 |. addiu SFARG2HI, SFARG2HI, -LJ_TTAB
1071 | lw TAB:TMP1, TAB:CARG1->metatable 1202 | lw TAB:TMP1, TAB:SFARG1LO->metatable
1072 | lbu TMP3, TAB:CARG1->marked 1203 | lbu TMP3, TAB:SFARG1LO->marked
1073 | or AT, CARG4, TAB:TMP1 1204 | or AT, SFARG2HI, TAB:TMP1
1074 | bnez AT, ->fff_fallback 1205 | bnez AT, ->fff_fallback
1075 |. andi AT, TMP3, LJ_GC_BLACK // isblack(table) 1206 |. andi AT, TMP3, LJ_GC_BLACK // isblack(table)
1076 | beqz AT, ->fff_restv 1207 | beqz AT, ->fff_restv
1077 |. sw TAB:CARG2, TAB:CARG1->metatable 1208 |. sw TAB:SFARG2LO, TAB:SFARG1LO->metatable
1078 | barrierback TAB:CARG1, TMP3, TMP0, ->fff_restv 1209 | barrierback TAB:SFARG1LO, TMP3, TMP0, ->fff_restv
1079 | 1210 |
1080 |.ffunc rawget 1211 |.ffunc rawget
1081 | lw CARG4, HI(BASE) 1212 | lw CARG4, HI(BASE)
@@ -1089,90 +1220,89 @@ static void build_subroutines(BuildCtx *ctx)
1089 | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1220 | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1090 |. move CARG1, L 1221 |. move CARG1, L
1091 | // Returns cTValue *. 1222 | // Returns cTValue *.
1092 | b ->fff_resn 1223 | lw SFARG1HI, HI(CRET1)
1093 |. ldc1 FRET1, 0(CRET1) 1224 | b ->fff_restv
1225 |. lw SFARG1LO, LO(CRET1)
1094 | 1226 |
1095 |//-- Base library: conversions ------------------------------------------ 1227 |//-- Base library: conversions ------------------------------------------
1096 | 1228 |
1097 |.ffunc tonumber 1229 |.ffunc tonumber
1098 | // Only handles the number case inline (without a base argument). 1230 | // Only handles the number case inline (without a base argument).
1099 | lw CARG1, HI(BASE) 1231 | lw CARG1, HI(BASE)
1100 | xori AT, NARGS8:RC, 8 1232 | xori AT, NARGS8:RC, 8 // Exactly one number argument.
1101 | sltiu CARG1, CARG1, LJ_TISNUM 1233 | sltu TMP0, TISNUM, CARG1
1102 | movn CARG1, r0, AT 1234 | or AT, AT, TMP0
1103 | beqz CARG1, ->fff_fallback // Exactly one number argument. 1235 | bnez AT, ->fff_fallback
1104 |. ldc1 FRET1, 0(BASE) 1236 |. lw SFARG1HI, HI(BASE)
1105 | b ->fff_resn 1237 | b ->fff_restv
1106 |. nop 1238 |. lw SFARG1LO, LO(BASE)
1107 | 1239 |
1108 |.ffunc_1 tostring 1240 |.ffunc_1 tostring
1109 | // Only handles the string or number case inline. 1241 | // Only handles the string or number case inline.
1110 | li AT, LJ_TSTR 1242 | li AT, LJ_TSTR
1111 | // A __tostring method in the string base metatable is ignored. 1243 | // A __tostring method in the string base metatable is ignored.
1112 | beq CARG3, AT, ->fff_restv // String key? 1244 | beq SFARG1HI, AT, ->fff_restv // String key?
1113 | // Handle numbers inline, unless a number base metatable is present. 1245 | // Handle numbers inline, unless a number base metatable is present.
1114 |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) 1246 |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
1115 | sltiu TMP0, CARG3, LJ_TISNUM 1247 | sltu TMP0, TISNUM, SFARG1HI
1116 | sltiu TMP1, TMP1, 1 1248 | or TMP0, TMP0, TMP1
1117 | and TMP0, TMP0, TMP1 1249 | bnez TMP0, ->fff_fallback
1118 | beqz TMP0, ->fff_fallback
1119 |. sw BASE, L->base // Add frame since C call can throw. 1250 |. sw BASE, L->base // Add frame since C call can throw.
1120 | ffgccheck 1251 | ffgccheck
1121 |. sw PC, SAVE_PC // Redundant (but a defined value). 1252 |. sw PC, SAVE_PC // Redundant (but a defined value).
1122 | load_got lj_str_fromnum 1253 | load_got lj_strfmt_number
1123 | move CARG1, L 1254 | move CARG1, L
1124 | call_intern lj_str_fromnum // (lua_State *L, lua_Number *np) 1255 | call_intern lj_strfmt_number // (lua_State *L, cTValue *o)
1125 |. move CARG2, BASE 1256 |. move CARG2, BASE
1126 | // Returns GCstr *. 1257 | // Returns GCstr *.
1127 | li CARG3, LJ_TSTR 1258 | li SFARG1HI, LJ_TSTR
1128 | b ->fff_restv 1259 | b ->fff_restv
1129 |. move CARG1, CRET1 1260 |. move SFARG1LO, CRET1
1130 | 1261 |
1131 |//-- Base library: iterators ------------------------------------------- 1262 |//-- Base library: iterators -------------------------------------------
1132 | 1263 |
1133 |.ffunc next 1264 |.ffunc next
1134 | lw CARG1, HI(BASE) 1265 | lw CARG2, HI(BASE)
1135 | lw TAB:CARG2, LO(BASE) 1266 | lw TAB:CARG1, LO(BASE)
1136 | beqz NARGS8:RC, ->fff_fallback 1267 | beqz NARGS8:RC, ->fff_fallback
1137 |. addu TMP2, BASE, NARGS8:RC 1268 |. addu TMP2, BASE, NARGS8:RC
1138 | li AT, LJ_TTAB 1269 | li AT, LJ_TTAB
1139 | sw TISNIL, HI(TMP2) // Set missing 2nd arg to nil. 1270 | sw TISNIL, HI(TMP2) // Set missing 2nd arg to nil.
1140 | bne CARG1, AT, ->fff_fallback 1271 | bne CARG2, AT, ->fff_fallback
1141 |. lw PC, FRAME_PC(BASE) 1272 |. lw PC, FRAME_PC(BASE)
1142 | load_got lj_tab_next 1273 | load_got lj_tab_next
1143 | sw BASE, L->base // Add frame since C call can throw. 1274 | addiu CARG2, BASE, 8
1144 | sw BASE, L->top // Dummy frame length is ok. 1275 | call_intern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
1145 | addiu CARG3, BASE, 8 1276 |. addiu CARG3, BASE, -8
1146 | sw PC, SAVE_PC 1277 | // Returns 1=found, 0=end, -1=error.
1147 | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) 1278 | addiu RA, BASE, -8
1148 |. move CARG1, L 1279 | bgtz CRET1, ->fff_res // Found key/value.
1149 | // Returns 0 at end of traversal. 1280 |. li RD, (2+1)*8
1150 | beqz CRET1, ->fff_restv // End of traversal: return nil. 1281 | beqz CRET1, ->fff_restv // End of traversal: return nil.
1151 |. li CARG3, LJ_TNIL 1282 |. li SFARG1HI, LJ_TNIL
1152 | ldc1 f0, 8(BASE) // Copy key and value to results. 1283 | lw CFUNC:RB, FRAME_FUNC(BASE)
1153 | addiu RA, BASE, -8 1284 | b ->fff_fallback // Invalid key.
1154 | ldc1 f2, 16(BASE) 1285 |. li RC, 2*8
1155 | li RD, (2+1)*8
1156 | sdc1 f0, 0(RA)
1157 | b ->fff_res
1158 |. sdc1 f2, 8(RA)
1159 | 1286 |
1160 |.ffunc_1 pairs 1287 |.ffunc_1 pairs
1161 | li AT, LJ_TTAB 1288 | li AT, LJ_TTAB
1162 | bne CARG3, AT, ->fff_fallback 1289 | bne SFARG1HI, AT, ->fff_fallback
1163 |. lw PC, FRAME_PC(BASE) 1290 |. lw PC, FRAME_PC(BASE)
1164#if LJ_52 1291#if LJ_52
1165 | lw TAB:TMP2, TAB:CARG1->metatable 1292 | lw TAB:TMP2, TAB:SFARG1LO->metatable
1166 | ldc1 f0, CFUNC:RB->upvalue[0] 1293 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1294 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1167 | bnez TAB:TMP2, ->fff_fallback 1295 | bnez TAB:TMP2, ->fff_fallback
1168#else 1296#else
1169 | ldc1 f0, CFUNC:RB->upvalue[0] 1297 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1298 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1170#endif 1299#endif
1171 |. addiu RA, BASE, -8 1300 |. addiu RA, BASE, -8
1172 | sw TISNIL, 8+HI(BASE) 1301 | sw TISNIL, 8+HI(BASE)
1173 | li RD, (3+1)*8 1302 | sw TMP0, HI(RA)
1303 | sw TMP1, LO(RA)
1174 | b ->fff_res 1304 | b ->fff_res
1175 |. sdc1 f0, 0(RA) 1305 |. li RD, (3+1)*8
1176 | 1306 |
1177 |.ffunc ipairs_aux 1307 |.ffunc ipairs_aux
1178 | sltiu AT, NARGS8:RC, 16 1308 | sltiu AT, NARGS8:RC, 16
@@ -1180,35 +1310,32 @@ static void build_subroutines(BuildCtx *ctx)
1180 | lw TAB:CARG1, LO(BASE) 1310 | lw TAB:CARG1, LO(BASE)
1181 | lw CARG4, 8+HI(BASE) 1311 | lw CARG4, 8+HI(BASE)
1182 | bnez AT, ->fff_fallback 1312 | bnez AT, ->fff_fallback
1183 |. ldc1 FARG2, 8(BASE) 1313 |. addiu CARG3, CARG3, -LJ_TTAB
1184 | addiu CARG3, CARG3, -LJ_TTAB 1314 | xor CARG4, CARG4, TISNUM
1185 | sltiu AT, CARG4, LJ_TISNUM 1315 | and AT, CARG3, CARG4
1186 | li TMP0, 1 1316 | bnez AT, ->fff_fallback
1187 | movn AT, r0, CARG3
1188 | mtc1 TMP0, FARG1
1189 | beqz AT, ->fff_fallback
1190 |. lw PC, FRAME_PC(BASE) 1317 |. lw PC, FRAME_PC(BASE)
1191 | cvt.w.d FRET1, FARG2 1318 | lw TMP2, 8+LO(BASE)
1192 | cvt.d.w FARG1, FARG1
1193 | lw TMP0, TAB:CARG1->asize 1319 | lw TMP0, TAB:CARG1->asize
1194 | lw TMP1, TAB:CARG1->array 1320 | lw TMP1, TAB:CARG1->array
1195 | mfc1 TMP2, FRET1
1196 | addiu RA, BASE, -8
1197 | add.d FARG2, FARG2, FARG1
1198 | addiu TMP2, TMP2, 1 1321 | addiu TMP2, TMP2, 1
1322 | sw TISNUM, -8+HI(BASE)
1199 | sltu AT, TMP2, TMP0 1323 | sltu AT, TMP2, TMP0
1324 | sw TMP2, -8+LO(BASE)
1325 | beqz AT, >2 // Not in array part?
1326 |. addiu RA, BASE, -8
1200 | sll TMP3, TMP2, 3 1327 | sll TMP3, TMP2, 3
1201 | addu TMP3, TMP1, TMP3 1328 | addu TMP3, TMP1, TMP3
1202 | beqz AT, >2 // Not in array part? 1329 | lw TMP1, HI(TMP3)
1203 |. sdc1 FARG2, 0(RA) 1330 | lw TMP2, LO(TMP3)
1204 | lw TMP2, HI(TMP3)
1205 | ldc1 f0, 0(TMP3)
1206 |1: 1331 |1:
1207 | beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results. 1332 | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
1208 |. li RD, (0+1)*8 1333 |. li RD, (0+1)*8
1209 | li RD, (2+1)*8 1334 | sw TMP1, 8+HI(RA)
1335 | sw TMP2, 8+LO(RA)
1210 | b ->fff_res 1336 | b ->fff_res
1211 |. sdc1 f0, 8(RA) 1337 |. li RD, (2+1)*8
1338 |
1212 |2: // Check for empty hash part first. Otherwise call C function. 1339 |2: // Check for empty hash part first. Otherwise call C function.
1213 | lw TMP0, TAB:CARG1->hmask 1340 | lw TMP0, TAB:CARG1->hmask
1214 | load_got lj_tab_getinth 1341 | load_got lj_tab_getinth
@@ -1219,27 +1346,30 @@ static void build_subroutines(BuildCtx *ctx)
1219 | // Returns cTValue * or NULL. 1346 | // Returns cTValue * or NULL.
1220 | beqz CRET1, ->fff_res 1347 | beqz CRET1, ->fff_res
1221 |. li RD, (0+1)*8 1348 |. li RD, (0+1)*8
1222 | lw TMP2, HI(CRET1) 1349 | lw TMP1, HI(CRET1)
1223 | b <1 1350 | b <1
1224 |. ldc1 f0, 0(CRET1) 1351 |. lw TMP2, LO(CRET1)
1225 | 1352 |
1226 |.ffunc_1 ipairs 1353 |.ffunc_1 ipairs
1227 | li AT, LJ_TTAB 1354 | li AT, LJ_TTAB
1228 | bne CARG3, AT, ->fff_fallback 1355 | bne SFARG1HI, AT, ->fff_fallback
1229 |. lw PC, FRAME_PC(BASE) 1356 |. lw PC, FRAME_PC(BASE)
1230#if LJ_52 1357#if LJ_52
1231 | lw TAB:TMP2, TAB:CARG1->metatable 1358 | lw TAB:TMP2, TAB:SFARG1LO->metatable
1232 | ldc1 f0, CFUNC:RB->upvalue[0] 1359 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1360 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1233 | bnez TAB:TMP2, ->fff_fallback 1361 | bnez TAB:TMP2, ->fff_fallback
1234#else 1362#else
1235 | ldc1 f0, CFUNC:RB->upvalue[0] 1363 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1364 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1236#endif 1365#endif
1237 |. addiu RA, BASE, -8 1366 |. addiu RA, BASE, -8
1238 | sw r0, 8+HI(BASE) 1367 | sw TISNUM, 8+HI(BASE)
1239 | sw r0, 8+LO(BASE) 1368 | sw r0, 8+LO(BASE)
1240 | li RD, (3+1)*8 1369 | sw TMP0, HI(RA)
1370 | sw TMP1, LO(RA)
1241 | b ->fff_res 1371 | b ->fff_res
1242 |. sdc1 f0, 0(RA) 1372 |. li RD, (3+1)*8
1243 | 1373 |
1244 |//-- Base library: catch errors ---------------------------------------- 1374 |//-- Base library: catch errors ----------------------------------------
1245 | 1375 |
@@ -1259,8 +1389,9 @@ static void build_subroutines(BuildCtx *ctx)
1259 | sltiu AT, NARGS8:RC, 16 1389 | sltiu AT, NARGS8:RC, 16
1260 | lw CARG4, 8+HI(BASE) 1390 | lw CARG4, 8+HI(BASE)
1261 | bnez AT, ->fff_fallback 1391 | bnez AT, ->fff_fallback
1262 |. ldc1 FARG2, 8(BASE) 1392 |. lw CARG3, 8+LO(BASE)
1263 | ldc1 FARG1, 0(BASE) 1393 | lw CARG1, LO(BASE)
1394 | lw CARG2, HI(BASE)
1264 | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) 1395 | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1265 | li AT, LJ_TFUNC 1396 | li AT, LJ_TFUNC
1266 | move TMP2, BASE 1397 | move TMP2, BASE
@@ -1268,9 +1399,11 @@ static void build_subroutines(BuildCtx *ctx)
1268 | addiu BASE, BASE, 16 1399 | addiu BASE, BASE, 16
1269 | // Remember active hook before pcall. 1400 | // Remember active hook before pcall.
1270 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT 1401 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
1271 | sdc1 FARG2, 0(TMP2) // Swap function and traceback. 1402 | sw CARG3, LO(TMP2) // Swap function and traceback.
1403 | sw CARG4, HI(TMP2)
1272 | andi TMP3, TMP3, 1 1404 | andi TMP3, TMP3, 1
1273 | sdc1 FARG1, 8(TMP2) 1405 | sw CARG1, 8+LO(TMP2)
1406 | sw CARG2, 8+HI(TMP2)
1274 | addiu PC, TMP3, 16+FRAME_PCALL 1407 | addiu PC, TMP3, 16+FRAME_PCALL
1275 | b ->vm_call_dispatch 1408 | b ->vm_call_dispatch
1276 |. addiu NARGS8:RC, NARGS8:RC, -16 1409 |. addiu NARGS8:RC, NARGS8:RC, -16
@@ -1279,7 +1412,10 @@ static void build_subroutines(BuildCtx *ctx)
1279 | 1412 |
1280 |.macro coroutine_resume_wrap, resume 1413 |.macro coroutine_resume_wrap, resume
1281 |.if resume 1414 |.if resume
1282 |.ffunc_1 coroutine_resume 1415 |.ffunc coroutine_resume
1416 | lw CARG3, HI(BASE)
1417 | beqz NARGS8:RC, ->fff_fallback
1418 |. lw CARG1, LO(BASE)
1283 | li AT, LJ_TTHREAD 1419 | li AT, LJ_TTHREAD
1284 | bne CARG3, AT, ->fff_fallback 1420 | bne CARG3, AT, ->fff_fallback
1285 |.else 1421 |.else
@@ -1314,11 +1450,13 @@ static void build_subroutines(BuildCtx *ctx)
1314 | move CARG3, CARG2 1450 | move CARG3, CARG2
1315 | sw BASE, L->top 1451 | sw BASE, L->top
1316 |2: // Move args to coroutine. 1452 |2: // Move args to coroutine.
1317 | ldc1 f0, 0(BASE) 1453 | lw SFRETHI, HI(BASE)
1454 | lw SFRETLO, LO(BASE)
1318 | sltu AT, BASE, TMP1 1455 | sltu AT, BASE, TMP1
1319 | beqz AT, >3 1456 | beqz AT, >3
1320 |. addiu BASE, BASE, 8 1457 |. addiu BASE, BASE, 8
1321 | sdc1 f0, 0(CARG3) 1458 | sw SFRETHI, HI(CARG3)
1459 | sw SFRETLO, LO(CARG3)
1322 | b <2 1460 | b <2
1323 |. addiu CARG3, CARG3, 8 1461 |. addiu CARG3, CARG3, 8
1324 |3: 1462 |3:
@@ -1331,6 +1469,7 @@ static void build_subroutines(BuildCtx *ctx)
1331 | lw TMP3, L:RA->top 1469 | lw TMP3, L:RA->top
1332 | li_vmstate INTERP 1470 | li_vmstate INTERP
1333 | lw BASE, L->base 1471 | lw BASE, L->base
1472 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
1334 | st_vmstate 1473 | st_vmstate
1335 | beqz AT, >8 1474 | beqz AT, >8
1336 |. subu RD, TMP3, TMP2 1475 |. subu RD, TMP3, TMP2
@@ -1343,10 +1482,12 @@ static void build_subroutines(BuildCtx *ctx)
1343 | sw TMP2, L:RA->top // Clear coroutine stack. 1482 | sw TMP2, L:RA->top // Clear coroutine stack.
1344 | move TMP1, BASE 1483 | move TMP1, BASE
1345 |5: // Move results from coroutine. 1484 |5: // Move results from coroutine.
1346 | ldc1 f0, 0(TMP2) 1485 | lw SFRETHI, HI(TMP2)
1486 | lw SFRETLO, LO(TMP2)
1347 | addiu TMP2, TMP2, 8 1487 | addiu TMP2, TMP2, 8
1348 | sltu AT, TMP2, TMP3 1488 | sltu AT, TMP2, TMP3
1349 | sdc1 f0, 0(TMP1) 1489 | sw SFRETHI, HI(TMP1)
1490 | sw SFRETLO, LO(TMP1)
1350 | bnez AT, <5 1491 | bnez AT, <5
1351 |. addiu TMP1, TMP1, 8 1492 |. addiu TMP1, TMP1, 8
1352 |6: 1493 |6:
@@ -1371,12 +1512,14 @@ static void build_subroutines(BuildCtx *ctx)
1371 |.if resume 1512 |.if resume
1372 | addiu TMP3, TMP3, -8 1513 | addiu TMP3, TMP3, -8
1373 | li TMP1, LJ_TFALSE 1514 | li TMP1, LJ_TFALSE
1374 | ldc1 f0, 0(TMP3) 1515 | lw SFRETHI, HI(TMP3)
1516 | lw SFRETLO, LO(TMP3)
1375 | sw TMP3, L:RA->top // Remove error from coroutine stack. 1517 | sw TMP3, L:RA->top // Remove error from coroutine stack.
1376 | li RD, (2+1)*8 1518 | li RD, (2+1)*8
1377 | sw TMP1, -8+HI(BASE) // Prepend false to results. 1519 | sw TMP1, -8+HI(BASE) // Prepend false to results.
1378 | addiu RA, BASE, -8 1520 | addiu RA, BASE, -8
1379 | sdc1 f0, 0(BASE) // Copy error message. 1521 | sw SFRETHI, HI(BASE) // Copy error message.
1522 | sw SFRETLO, LO(BASE)
1380 | b <7 1523 | b <7
1381 |. andi TMP0, PC, FRAME_TYPE 1524 |. andi TMP0, PC, FRAME_TYPE
1382 |.else 1525 |.else
@@ -1412,20 +1555,29 @@ static void build_subroutines(BuildCtx *ctx)
1412 | 1555 |
1413 |//-- Math library ------------------------------------------------------- 1556 |//-- Math library -------------------------------------------------------
1414 | 1557 |
1415 |.ffunc_n math_abs 1558 |.ffunc_1 math_abs
1416 |. abs.d FRET1, FARG1 1559 | bne SFARG1HI, TISNUM, >1
1417 |->fff_resn: 1560 |. sra TMP0, SFARG1LO, 31
1418 | lw PC, FRAME_PC(BASE) 1561 | xor TMP1, SFARG1LO, TMP0
1419 | addiu RA, BASE, -8 1562 | subu SFARG1LO, TMP1, TMP0
1420 | b ->fff_res1 1563 | bgez SFARG1LO, ->fff_restv
1421 |. sdc1 FRET1, -8(BASE) 1564 |. nop
1565 | lui SFARG1HI, 0x41e0 // 2^31 as a double.
1566 | b ->fff_restv
1567 |. li SFARG1LO, 0
1568 |1:
1569 | sltiu AT, SFARG1HI, LJ_TISNUM
1570 | beqz AT, ->fff_fallback
1571 |. sll SFARG1HI, SFARG1HI, 1
1572 | srl SFARG1HI, SFARG1HI, 1
1573 |// fallthrough
1422 | 1574 |
1423 |->fff_restv: 1575 |->fff_restv:
1424 | // CARG3/CARG1 = TValue result. 1576 | // SFARG1LO/SFARG1HI = TValue result.
1425 | lw PC, FRAME_PC(BASE) 1577 | lw PC, FRAME_PC(BASE)
1426 | sw CARG3, -8+HI(BASE) 1578 | sw SFARG1HI, -8+HI(BASE)
1427 | addiu RA, BASE, -8 1579 | addiu RA, BASE, -8
1428 | sw CARG1, -8+LO(BASE) 1580 | sw SFARG1LO, -8+LO(BASE)
1429 |->fff_res1: 1581 |->fff_res1:
1430 | // RA = results, PC = return. 1582 | // RA = results, PC = return.
1431 | li RD, (1+1)*8 1583 | li RD, (1+1)*8
@@ -1454,15 +1606,19 @@ static void build_subroutines(BuildCtx *ctx)
1454 |. sw TISNIL, -8+HI(TMP1) 1606 |. sw TISNIL, -8+HI(TMP1)
1455 | 1607 |
1456 |.macro math_extern, func 1608 |.macro math_extern, func
1457 |->ff_math_ .. func: 1609 | .ffunc math_ .. func
1458 | lw CARG3, HI(BASE) 1610 | lw SFARG1HI, HI(BASE)
1459 | beqz NARGS8:RC, ->fff_fallback 1611 | beqz NARGS8:RC, ->fff_fallback
1460 |. load_got func 1612 |. load_got func
1461 | sltiu AT, CARG3, LJ_TISNUM 1613 | sltiu AT, SFARG1HI, LJ_TISNUM
1462 | beqz AT, ->fff_fallback 1614 | beqz AT, ->fff_fallback
1463 |. nop 1615 |.if FPU
1464 | call_extern
1465 |. ldc1 FARG1, 0(BASE) 1616 |. ldc1 FARG1, 0(BASE)
1617 |.else
1618 |. lw SFARG1LO, LO(BASE)
1619 |.endif
1620 | call_extern
1621 |. nop
1466 | b ->fff_resn 1622 | b ->fff_resn
1467 |. nop 1623 |. nop
1468 |.endmacro 1624 |.endmacro
@@ -1476,10 +1632,22 @@ static void build_subroutines(BuildCtx *ctx)
1476 |. nop 1632 |. nop
1477 |.endmacro 1633 |.endmacro
1478 | 1634 |
1635 |// TODO: Return integer type if result is integer (own sf implementation).
1479 |.macro math_round, func 1636 |.macro math_round, func
1480 | .ffunc_n math_ .. func 1637 |->ff_math_ .. func:
1481 |. nop 1638 | lw SFARG1HI, HI(BASE)
1639 | beqz NARGS8:RC, ->fff_fallback
1640 |. lw SFARG1LO, LO(BASE)
1641 | beq SFARG1HI, TISNUM, ->fff_restv
1642 |. sltu AT, SFARG1HI, TISNUM
1643 | beqz AT, ->fff_fallback
1644 |.if FPU
1645 |. ldc1 FARG1, 0(BASE)
1482 | bal ->vm_ .. func 1646 | bal ->vm_ .. func
1647 |.else
1648 |. load_got func
1649 | call_extern
1650 |.endif
1483 |. nop 1651 |. nop
1484 | b ->fff_resn 1652 | b ->fff_resn
1485 |. nop 1653 |. nop
@@ -1489,15 +1657,19 @@ static void build_subroutines(BuildCtx *ctx)
1489 | math_round ceil 1657 | math_round ceil
1490 | 1658 |
1491 |.ffunc math_log 1659 |.ffunc math_log
1492 | lw CARG3, HI(BASE)
1493 | li AT, 8 1660 | li AT, 8
1494 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. 1661 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
1495 |. load_got log 1662 |. lw SFARG1HI, HI(BASE)
1496 | sltiu AT, CARG3, LJ_TISNUM 1663 | sltiu AT, SFARG1HI, LJ_TISNUM
1497 | beqz AT, ->fff_fallback 1664 | beqz AT, ->fff_fallback
1498 |. nop 1665 |. load_got log
1666 |.if FPU
1499 | call_extern 1667 | call_extern
1500 |. ldc1 FARG1, 0(BASE) 1668 |. ldc1 FARG1, 0(BASE)
1669 |.else
1670 | call_extern
1671 |. lw SFARG1LO, LO(BASE)
1672 |.endif
1501 | b ->fff_resn 1673 | b ->fff_resn
1502 |. nop 1674 |. nop
1503 | 1675 |
@@ -1516,23 +1688,43 @@ static void build_subroutines(BuildCtx *ctx)
1516 | math_extern2 atan2 1688 | math_extern2 atan2
1517 | math_extern2 fmod 1689 | math_extern2 fmod
1518 | 1690 |
1691 |.if FPU
1519 |.ffunc_n math_sqrt 1692 |.ffunc_n math_sqrt
1520 |. sqrt.d FRET1, FARG1 1693 |. sqrt.d FRET1, FARG1
1521 | b ->fff_resn 1694 |// fallthrough to ->fff_resn
1522 |. nop 1695 |.else
1696 | math_extern sqrt
1697 |.endif
1698 |
1699 |->fff_resn:
1700 | lw PC, FRAME_PC(BASE)
1701 | addiu RA, BASE, -8
1702 |.if FPU
1703 | b ->fff_res1
1704 |. sdc1 FRET1, -8(BASE)
1705 |.else
1706 | sw SFRETHI, -8+HI(BASE)
1707 | b ->fff_res1
1708 |. sw SFRETLO, -8+LO(BASE)
1709 |.endif
1523 | 1710 |
1524 |->ff_math_deg:
1525 |.ffunc_n math_rad
1526 |. ldc1 FARG2, CFUNC:RB->upvalue[0]
1527 | b ->fff_resn
1528 |. mul.d FRET1, FARG1, FARG2
1529 | 1711 |
1530 |.ffunc_nn math_ldexp 1712 |.ffunc math_ldexp
1531 | cvt.w.d FARG2, FARG2 1713 | sltiu AT, NARGS8:RC, 16
1714 | lw SFARG1HI, HI(BASE)
1715 | bnez AT, ->fff_fallback
1716 |. lw CARG4, 8+HI(BASE)
1717 | bne CARG4, TISNUM, ->fff_fallback
1532 | load_got ldexp 1718 | load_got ldexp
1533 | mfc1 CARG3, FARG2 1719 |. sltu AT, SFARG1HI, TISNUM
1720 | beqz AT, ->fff_fallback
1721 |.if FPU
1722 |. ldc1 FARG1, 0(BASE)
1723 |.else
1724 |. lw SFARG1LO, LO(BASE)
1725 |.endif
1534 | call_extern 1726 | call_extern
1535 |. nop 1727 |. lw CARG3, 8+LO(BASE)
1536 | b ->fff_resn 1728 | b ->fff_resn
1537 |. nop 1729 |. nop
1538 | 1730 |
@@ -1543,10 +1735,17 @@ static void build_subroutines(BuildCtx *ctx)
1543 |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 1735 |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
1544 | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) 1736 | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH)
1545 | addiu RA, BASE, -8 1737 | addiu RA, BASE, -8
1738 |.if FPU
1546 | mtc1 TMP1, FARG2 1739 | mtc1 TMP1, FARG2
1547 | sdc1 FRET1, 0(RA) 1740 | sdc1 FRET1, 0(RA)
1548 | cvt.d.w FARG2, FARG2 1741 | cvt.d.w FARG2, FARG2
1549 | sdc1 FARG2, 8(RA) 1742 | sdc1 FARG2, 8(RA)
1743 |.else
1744 | sw SFRETLO, LO(RA)
1745 | sw SFRETHI, HI(RA)
1746 | sw TMP1, 8+LO(RA)
1747 | sw TISNUM, 8+HI(RA)
1748 |.endif
1550 | b ->fff_res 1749 | b ->fff_res
1551 |. li RD, (2+1)*8 1750 |. li RD, (2+1)*8
1552 | 1751 |
@@ -1556,49 +1755,109 @@ static void build_subroutines(BuildCtx *ctx)
1556 | call_extern 1755 | call_extern
1557 |. addiu CARG3, BASE, -8 1756 |. addiu CARG3, BASE, -8
1558 | addiu RA, BASE, -8 1757 | addiu RA, BASE, -8
1758 |.if FPU
1559 | sdc1 FRET1, 0(BASE) 1759 | sdc1 FRET1, 0(BASE)
1760 |.else
1761 | sw SFRETLO, LO(BASE)
1762 | sw SFRETHI, HI(BASE)
1763 |.endif
1560 | b ->fff_res 1764 | b ->fff_res
1561 |. li RD, (2+1)*8 1765 |. li RD, (2+1)*8
1562 | 1766 |
1563 |.macro math_minmax, name, ismax 1767 |.macro math_minmax, name, intins, ismax
1564 |->ff_ .. name: 1768 | .ffunc_1 name
1565 | lw CARG3, HI(BASE) 1769 | addu TMP3, BASE, NARGS8:RC
1566 | beqz NARGS8:RC, ->fff_fallback 1770 | bne SFARG1HI, TISNUM, >5
1567 |. ldc1 FRET1, 0(BASE) 1771 |. addiu TMP2, BASE, 8
1568 | sltiu AT, CARG3, LJ_TISNUM 1772 |1: // Handle integers.
1773 |. lw SFARG2HI, HI(TMP2)
1774 | beq TMP2, TMP3, ->fff_restv
1775 |. lw SFARG2LO, LO(TMP2)
1776 | bne SFARG2HI, TISNUM, >3
1777 |. slt AT, SFARG1LO, SFARG2LO
1778 | intins SFARG1LO, SFARG2LO, AT
1779 | b <1
1780 |. addiu TMP2, TMP2, 8
1781 |
1782 |3: // Convert intermediate result to number and continue with number loop.
1783 | sltiu AT, SFARG2HI, LJ_TISNUM
1569 | beqz AT, ->fff_fallback 1784 | beqz AT, ->fff_fallback
1570 |. addu TMP2, BASE, NARGS8:RC 1785 |.if FPU
1571 | addiu TMP1, BASE, 8 1786 |. mtc1 SFARG1LO, FRET1
1572 | beq TMP1, TMP2, ->fff_resn 1787 | cvt.d.w FRET1, FRET1
1573 |1: 1788 | b >7
1574 |. lw CARG3, HI(TMP1) 1789 |. ldc1 FARG1, 0(TMP2)
1575 | ldc1 FARG1, 0(TMP1) 1790 |.else
1576 | addiu TMP1, TMP1, 8 1791 |. nop
1577 | sltiu AT, CARG3, LJ_TISNUM 1792 | bal ->vm_sfi2d_1
1793 |. nop
1794 | b >7
1795 |. nop
1796 |.endif
1797 |
1798 |5:
1799 |. sltiu AT, SFARG1HI, LJ_TISNUM
1578 | beqz AT, ->fff_fallback 1800 | beqz AT, ->fff_fallback
1801 |.if FPU
1802 |. ldc1 FRET1, 0(BASE)
1803 |.endif
1804 |
1805 |6: // Handle numbers.
1806 |. lw SFARG2HI, HI(TMP2)
1807 |.if FPU
1808 | beq TMP2, TMP3, ->fff_resn
1809 |.else
1810 | beq TMP2, TMP3, ->fff_restv
1811 |.endif
1812 |. sltiu AT, SFARG2HI, LJ_TISNUM
1813 | beqz AT, >8
1814 |.if FPU
1815 |. ldc1 FARG1, 0(TMP2)
1816 |.else
1817 |. lw SFARG2LO, LO(TMP2)
1818 |.endif
1819 |7:
1820 |.if FPU
1579 |.if ismax 1821 |.if ismax
1580 |. c.olt.d FARG1, FRET1 1822 | c.olt.d FARG1, FRET1
1581 |.else 1823 |.else
1582 |. c.olt.d FRET1, FARG1 1824 | c.olt.d FRET1, FARG1
1825 |.endif
1826 | movf.d FRET1, FARG1
1827 |.else
1828 |.if ismax
1829 | bal ->vm_sfcmpogt
1830 |.else
1831 | bal ->vm_sfcmpolt
1583 |.endif 1832 |.endif
1584 | bne TMP1, TMP2, <1
1585 |. movf.d FRET1, FARG1
1586 | b ->fff_resn
1587 |. nop 1833 |. nop
1834 | movz SFARG1LO, SFARG2LO, CRET1
1835 | movz SFARG1HI, SFARG2HI, CRET1
1836 |.endif
1837 | b <6
1838 |. addiu TMP2, TMP2, 8
1839 |
1840 |8: // Convert integer to number and continue with number loop.
1841 | bne SFARG2HI, TISNUM, ->fff_fallback
1842 |.if FPU
1843 |. lwc1 FARG1, LO(TMP2)
1844 | b <7
1845 |. cvt.d.w FARG1, FARG1
1846 |.else
1847 |. nop
1848 | bal ->vm_sfi2d_2
1849 |. nop
1850 | b <7
1851 |. nop
1852 |.endif
1853 |
1588 |.endmacro 1854 |.endmacro
1589 | 1855 |
1590 | math_minmax math_min, 0 1856 | math_minmax math_min, movz, 0
1591 | math_minmax math_max, 1 1857 | math_minmax math_max, movn, 1
1592 | 1858 |
1593 |//-- String library ----------------------------------------------------- 1859 |//-- String library -----------------------------------------------------
1594 | 1860 |
1595 |.ffunc_1 string_len
1596 | li AT, LJ_TSTR
1597 | bne CARG3, AT, ->fff_fallback
1598 |. nop
1599 | b ->fff_resi
1600 |. lw CRET1, STR:CARG1->len
1601 |
1602 |.ffunc string_byte // Only handle the 1-arg case here. 1861 |.ffunc string_byte // Only handle the 1-arg case here.
1603 | lw CARG3, HI(BASE) 1862 | lw CARG3, HI(BASE)
1604 | lw STR:CARG1, LO(BASE) 1863 | lw STR:CARG1, LO(BASE)
@@ -1608,33 +1867,31 @@ static void build_subroutines(BuildCtx *ctx)
1608 | bnez AT, ->fff_fallback // Need exactly 1 string argument. 1867 | bnez AT, ->fff_fallback // Need exactly 1 string argument.
1609 |. nop 1868 |. nop
1610 | lw TMP0, STR:CARG1->len 1869 | lw TMP0, STR:CARG1->len
1611 | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1612 | addiu RA, BASE, -8 1870 | addiu RA, BASE, -8
1871 | lw PC, FRAME_PC(BASE)
1613 | sltu RD, r0, TMP0 1872 | sltu RD, r0, TMP0
1614 | mtc1 TMP1, f0 1873 | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1615 | addiu RD, RD, 1 1874 | addiu RD, RD, 1
1616 | cvt.d.w f0, f0
1617 | lw PC, FRAME_PC(BASE)
1618 | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 1875 | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8
1876 | sw TISNUM, HI(RA)
1619 | b ->fff_res 1877 | b ->fff_res
1620 |. sdc1 f0, 0(RA) 1878 |. sw TMP1, LO(RA)
1621 | 1879 |
1622 |.ffunc string_char // Only handle the 1-arg case here. 1880 |.ffunc string_char // Only handle the 1-arg case here.
1623 | ffgccheck 1881 | ffgccheck
1624 |. nop 1882 |. nop
1625 | lw CARG3, HI(BASE) 1883 | lw CARG3, HI(BASE)
1626 | ldc1 FARG1, 0(BASE) 1884 | lw CARG1, LO(BASE)
1627 | li AT, 8 1885 | li TMP1, 255
1628 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. 1886 | xori AT, NARGS8:RC, 8 // Exactly 1 argument.
1629 |. sltiu AT, CARG3, LJ_TISNUM 1887 | xor TMP0, CARG3, TISNUM // Integer.
1630 | beqz AT, ->fff_fallback 1888 | sltu TMP1, TMP1, CARG1 // !(255 < n).
1889 | or AT, AT, TMP0
1890 | or AT, AT, TMP1
1891 | bnez AT, ->fff_fallback
1631 |. li CARG3, 1 1892 |. li CARG3, 1
1632 | cvt.w.d FARG1, FARG1
1633 | addiu CARG2, sp, ARG5_OFS 1893 | addiu CARG2, sp, ARG5_OFS
1634 | sltiu AT, TMP0, 256 1894 | sb CARG1, ARG5
1635 | mfc1 TMP0, FARG1
1636 | beqz AT, ->fff_fallback
1637 |. sw TMP0, ARG5
1638 |->fff_newstr: 1895 |->fff_newstr:
1639 | load_got lj_str_new 1896 | load_got lj_str_new
1640 | sw BASE, L->base 1897 | sw BASE, L->base
@@ -1643,35 +1900,30 @@ static void build_subroutines(BuildCtx *ctx)
1643 |. move CARG1, L 1900 |. move CARG1, L
1644 | // Returns GCstr *. 1901 | // Returns GCstr *.
1645 | lw BASE, L->base 1902 | lw BASE, L->base
1646 | move CARG1, CRET1 1903 |->fff_resstr:
1904 | move SFARG1LO, CRET1
1647 | b ->fff_restv 1905 | b ->fff_restv
1648 |. li CARG3, LJ_TSTR 1906 |. li SFARG1HI, LJ_TSTR
1649 | 1907 |
1650 |.ffunc string_sub 1908 |.ffunc string_sub
1651 | ffgccheck 1909 | ffgccheck
1652 |. nop 1910 |. nop
1653 | addiu AT, NARGS8:RC, -16 1911 | addiu AT, NARGS8:RC, -16
1654 | lw CARG3, 16+HI(BASE) 1912 | lw CARG3, 16+HI(BASE)
1655 | ldc1 f0, 16(BASE)
1656 | lw TMP0, HI(BASE) 1913 | lw TMP0, HI(BASE)
1657 | lw STR:CARG1, LO(BASE) 1914 | lw STR:CARG1, LO(BASE)
1658 | bltz AT, ->fff_fallback 1915 | bltz AT, ->fff_fallback
1659 | lw CARG2, 8+HI(BASE) 1916 |. lw CARG2, 8+HI(BASE)
1660 | ldc1 f2, 8(BASE)
1661 | beqz AT, >1 1917 | beqz AT, >1
1662 |. li CARG4, -1 1918 |. li CARG4, -1
1663 | cvt.w.d f0, f0 1919 | bne CARG3, TISNUM, ->fff_fallback
1664 | sltiu AT, CARG3, LJ_TISNUM 1920 |. lw CARG4, 16+LO(BASE)
1665 | beqz AT, ->fff_fallback
1666 |. mfc1 CARG4, f0
1667 |1: 1921 |1:
1668 | sltiu AT, CARG2, LJ_TISNUM 1922 | bne CARG2, TISNUM, ->fff_fallback
1669 | beqz AT, ->fff_fallback
1670 |. li AT, LJ_TSTR 1923 |. li AT, LJ_TSTR
1671 | cvt.w.d f2, f2
1672 | bne TMP0, AT, ->fff_fallback 1924 | bne TMP0, AT, ->fff_fallback
1673 |. lw CARG2, STR:CARG1->len 1925 |. lw CARG3, 8+LO(BASE)
1674 | mfc1 CARG3, f2 1926 | lw CARG2, STR:CARG1->len
1675 | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end 1927 | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
1676 | slt AT, CARG4, r0 1928 | slt AT, CARG4, r0
1677 | addiu TMP0, CARG2, 1 1929 | addiu TMP0, CARG2, 1
@@ -1693,139 +1945,130 @@ static void build_subroutines(BuildCtx *ctx)
1693 | bgez CARG3, ->fff_newstr 1945 | bgez CARG3, ->fff_newstr
1694 |. addiu CARG3, CARG3, 1 // len++ 1946 |. addiu CARG3, CARG3, 1 // len++
1695 |->fff_emptystr: // Return empty string. 1947 |->fff_emptystr: // Return empty string.
1696 | addiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty) 1948 | addiu STR:SFARG1LO, DISPATCH, DISPATCH_GL(strempty)
1697 | b ->fff_restv 1949 | b ->fff_restv
1698 |. li CARG3, LJ_TSTR 1950 |. li SFARG1HI, LJ_TSTR
1699 | 1951 |
1700 |.ffunc string_rep // Only handle the 1-char case inline. 1952 |.macro ffstring_op, name
1701 | ffgccheck 1953 | .ffunc string_ .. name
1702 |. nop
1703 | lw TMP0, HI(BASE)
1704 | addiu AT, NARGS8:RC, -16 // Exactly 2 arguments.
1705 | lw CARG4, 8+HI(BASE)
1706 | lw STR:CARG1, LO(BASE)
1707 | addiu TMP0, TMP0, -LJ_TSTR
1708 | ldc1 f0, 8(BASE)
1709 | or AT, AT, TMP0
1710 | bnez AT, ->fff_fallback
1711 |. sltiu AT, CARG4, LJ_TISNUM
1712 | cvt.w.d f0, f0
1713 | beqz AT, ->fff_fallback
1714 |. lw TMP0, STR:CARG1->len
1715 | mfc1 CARG3, f0
1716 | lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1717 | li AT, 1
1718 | blez CARG3, ->fff_emptystr // Count <= 0?
1719 |. sltu AT, AT, TMP0
1720 | beqz TMP0, ->fff_emptystr // Zero length string?
1721 |. sltu TMP0, TMP1, CARG3
1722 | or AT, AT, TMP0
1723 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1724 | bnez AT, ->fff_fallback // Fallback for > 1-char strings.
1725 |. lbu TMP0, STR:CARG1[1]
1726 | addu TMP2, CARG2, CARG3
1727 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
1728 | addiu TMP2, TMP2, -1
1729 | sltu AT, CARG2, TMP2
1730 | bnez AT, <1
1731 |. sb TMP0, 0(TMP2)
1732 | b ->fff_newstr
1733 |. nop
1734 |
1735 |.ffunc string_reverse
1736 | ffgccheck 1954 | ffgccheck
1737 |. nop 1955 |. nop
1738 | lw CARG3, HI(BASE) 1956 | lw CARG3, HI(BASE)
1739 | lw STR:CARG1, LO(BASE) 1957 | lw STR:CARG2, LO(BASE)
1740 | beqz NARGS8:RC, ->fff_fallback 1958 | beqz NARGS8:RC, ->fff_fallback
1741 |. li AT, LJ_TSTR 1959 |. li AT, LJ_TSTR
1742 | bne CARG3, AT, ->fff_fallback 1960 | bne CARG3, AT, ->fff_fallback
1743 |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 1961 |. addiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
1744 | lw CARG3, STR:CARG1->len 1962 | load_got lj_buf_putstr_ .. name
1745 | addiu CARG1, STR:CARG1, #STR 1963 | lw TMP0, SBUF:CARG1->b
1746 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 1964 | sw L, SBUF:CARG1->L
1747 | sltu AT, TMP1, CARG3 1965 | sw BASE, L->base
1748 | bnez AT, ->fff_fallback 1966 | sw TMP0, SBUF:CARG1->w
1749 |. addu TMP3, CARG1, CARG3 1967 | call_intern extern lj_buf_putstr_ .. name
1750 | addu CARG4, CARG2, CARG3 1968 |. sw PC, SAVE_PC
1751 |1: // Reverse string copy. 1969 | load_got lj_buf_tostr
1752 | lbu TMP1, 0(CARG1) 1970 | call_intern lj_buf_tostr
1753 | sltu AT, CARG1, TMP3 1971 |. move SBUF:CARG1, SBUF:CRET1
1754 | beqz AT, ->fff_newstr 1972 | b ->fff_resstr
1755 |. addiu CARG1, CARG1, 1 1973 |. lw BASE, L->base
1756 | addiu CARG4, CARG4, -1
1757 | b <1
1758 | sb TMP1, 0(CARG4)
1759 |
1760 |.macro ffstring_case, name, lo
1761 | .ffunc name
1762 | ffgccheck
1763 |. nop
1764 | lw CARG3, HI(BASE)
1765 | lw STR:CARG1, LO(BASE)
1766 | beqz NARGS8:RC, ->fff_fallback
1767 |. li AT, LJ_TSTR
1768 | bne CARG3, AT, ->fff_fallback
1769 |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1770 | lw CARG3, STR:CARG1->len
1771 | addiu CARG1, STR:CARG1, #STR
1772 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1773 | sltu AT, TMP1, CARG3
1774 | bnez AT, ->fff_fallback
1775 |. addu TMP3, CARG1, CARG3
1776 | move CARG4, CARG2
1777 |1: // ASCII case conversion.
1778 | lbu TMP1, 0(CARG1)
1779 | sltu AT, CARG1, TMP3
1780 | beqz AT, ->fff_newstr
1781 |. addiu TMP0, TMP1, -lo
1782 | xori TMP2, TMP1, 0x20
1783 | sltiu AT, TMP0, 26
1784 | movn TMP1, TMP2, AT
1785 | addiu CARG1, CARG1, 1
1786 | sb TMP1, 0(CARG4)
1787 | b <1
1788 |. addiu CARG4, CARG4, 1
1789 |.endmacro 1974 |.endmacro
1790 | 1975 |
1791 |ffstring_case string_lower, 65 1976 |ffstring_op reverse
1792 |ffstring_case string_upper, 97 1977 |ffstring_op lower
1978 |ffstring_op upper
1793 | 1979 |
1794 |//-- Table library ------------------------------------------------------ 1980 |//-- Bit library --------------------------------------------------------
1795 | 1981 |
1796 |.ffunc_1 table_getn 1982 |->vm_tobit_fb:
1797 | li AT, LJ_TTAB 1983 | beqz TMP1, ->fff_fallback
1798 | bne CARG3, AT, ->fff_fallback 1984 |.if FPU
1799 |. load_got lj_tab_len 1985 |. ldc1 FARG1, 0(BASE)
1800 | call_intern lj_tab_len // (GCtab *t) 1986 | add.d FARG1, FARG1, TOBIT
1801 |. nop 1987 | jr ra
1802 | // Returns uint32_t (but less than 2^31). 1988 |. mfc1 CRET1, FARG1
1803 | b ->fff_resi 1989 |.else
1990 |// FP number to bit conversion for soft-float.
1991 |->vm_tobit:
1992 | sll TMP0, SFARG1HI, 1
1993 | lui AT, 0x0020
1994 | addu TMP0, TMP0, AT
1995 | slt AT, TMP0, r0
1996 | movz SFARG1LO, r0, AT
1997 | beqz AT, >2
1998 |. li TMP1, 0x3e0
1999 | not TMP1, TMP1
2000 | sra TMP0, TMP0, 21
2001 | subu TMP0, TMP1, TMP0
2002 | slt AT, TMP0, r0
2003 | bnez AT, >1
2004 |. sll TMP1, SFARG1HI, 11
2005 | lui AT, 0x8000
2006 | or TMP1, TMP1, AT
2007 | srl AT, SFARG1LO, 21
2008 | or TMP1, TMP1, AT
2009 | slt AT, SFARG1HI, r0
2010 | beqz AT, >2
2011 |. srlv SFARG1LO, TMP1, TMP0
2012 | subu SFARG1LO, r0, SFARG1LO
2013 |2:
2014 | jr ra
2015 |. move CRET1, SFARG1LO
2016 |1:
2017 | addiu TMP0, TMP0, 21
2018 | srlv TMP1, SFARG1LO, TMP0
2019 | li AT, 20
2020 | subu TMP0, AT, TMP0
2021 | sll SFARG1LO, SFARG1HI, 12
2022 | sllv AT, SFARG1LO, TMP0
2023 | or SFARG1LO, TMP1, AT
2024 | slt AT, SFARG1HI, r0
2025 | beqz AT, <2
1804 |. nop 2026 |. nop
1805 | 2027 | jr ra
1806 |//-- Bit library -------------------------------------------------------- 2028 |. subu CRET1, r0, SFARG1LO
2029 |.endif
1807 | 2030 |
1808 |.macro .ffunc_bit, name 2031 |.macro .ffunc_bit, name
1809 | .ffunc_n bit_..name 2032 | .ffunc_1 bit_..name
1810 |. add.d FARG1, FARG1, TOBIT 2033 | beq SFARG1HI, TISNUM, >6
1811 | mfc1 CRET1, FARG1 2034 |. move CRET1, SFARG1LO
2035 | bal ->vm_tobit_fb
2036 |. sltu TMP1, SFARG1HI, TISNUM
2037 |6:
1812 |.endmacro 2038 |.endmacro
1813 | 2039 |
1814 |.macro .ffunc_bit_op, name, ins 2040 |.macro .ffunc_bit_op, name, ins
1815 | .ffunc_bit name 2041 | .ffunc_bit name
1816 | addiu TMP1, BASE, 8 2042 | addiu TMP2, BASE, 8
1817 | addu TMP2, BASE, NARGS8:RC 2043 | addu TMP3, BASE, NARGS8:RC
1818 |1: 2044 |1:
1819 | lw CARG4, HI(TMP1) 2045 | lw SFARG1HI, HI(TMP2)
1820 | beq TMP1, TMP2, ->fff_resi 2046 | beq TMP2, TMP3, ->fff_resi
1821 |. ldc1 FARG1, 0(TMP1) 2047 |. lw SFARG1LO, LO(TMP2)
1822 | sltiu AT, CARG4, LJ_TISNUM 2048 |.if FPU
1823 | beqz AT, ->fff_fallback 2049 | bne SFARG1HI, TISNUM, >2
1824 | add.d FARG1, FARG1, TOBIT 2050 |. addiu TMP2, TMP2, 8
1825 | mfc1 CARG2, FARG1
1826 | ins CRET1, CRET1, CARG2
1827 | b <1 2051 | b <1
1828 |. addiu TMP1, TMP1, 8 2052 |. ins CRET1, CRET1, SFARG1LO
2053 |2:
2054 | ldc1 FARG1, -8(TMP2)
2055 | sltu TMP1, SFARG1HI, TISNUM
2056 | beqz TMP1, ->fff_fallback
2057 |. add.d FARG1, FARG1, TOBIT
2058 | mfc1 SFARG1LO, FARG1
2059 | b <1
2060 |. ins CRET1, CRET1, SFARG1LO
2061 |.else
2062 | beq SFARG1HI, TISNUM, >2
2063 |. move CRET2, CRET1
2064 | bal ->vm_tobit_fb
2065 |. sltu TMP1, SFARG1HI, TISNUM
2066 | move SFARG1LO, CRET2
2067 |2:
2068 | ins CRET1, CRET1, SFARG1LO
2069 | b <1
2070 |. addiu TMP2, TMP2, 8
2071 |.endif
1829 |.endmacro 2072 |.endmacro
1830 | 2073 |
1831 |.ffunc_bit_op band, and 2074 |.ffunc_bit_op band, and
@@ -1849,24 +2092,28 @@ static void build_subroutines(BuildCtx *ctx)
1849 |. not CRET1, CRET1 2092 |. not CRET1, CRET1
1850 | 2093 |
1851 |.macro .ffunc_bit_sh, name, ins, shmod 2094 |.macro .ffunc_bit_sh, name, ins, shmod
1852 | .ffunc_nn bit_..name 2095 | .ffunc_2 bit_..name
1853 |. add.d FARG1, FARG1, TOBIT 2096 | beq SFARG1HI, TISNUM, >1
1854 | add.d FARG2, FARG2, TOBIT 2097 |. nop
1855 | mfc1 CARG1, FARG1 2098 | bal ->vm_tobit_fb
1856 | mfc1 CARG2, FARG2 2099 |. sltu TMP1, SFARG1HI, TISNUM
2100 | move SFARG1LO, CRET1
2101 |1:
2102 | bne SFARG2HI, TISNUM, ->fff_fallback
2103 |. nop
1857 |.if shmod == 1 2104 |.if shmod == 1
1858 | li AT, 32 2105 | li AT, 32
1859 | subu TMP0, AT, CARG2 2106 | subu TMP0, AT, SFARG2LO
1860 | sllv CARG2, CARG1, CARG2 2107 | sllv SFARG2LO, SFARG1LO, SFARG2LO
1861 | srlv CARG1, CARG1, TMP0 2108 | srlv SFARG1LO, SFARG1LO, TMP0
1862 |.elif shmod == 2 2109 |.elif shmod == 2
1863 | li AT, 32 2110 | li AT, 32
1864 | subu TMP0, AT, CARG2 2111 | subu TMP0, AT, SFARG2LO
1865 | srlv CARG2, CARG1, CARG2 2112 | srlv SFARG2LO, SFARG1LO, SFARG2LO
1866 | sllv CARG1, CARG1, TMP0 2113 | sllv SFARG1LO, SFARG1LO, TMP0
1867 |.endif 2114 |.endif
1868 | b ->fff_resi 2115 | b ->fff_resi
1869 |. ins CRET1, CARG1, CARG2 2116 |. ins CRET1, SFARG1LO, SFARG2LO
1870 |.endmacro 2117 |.endmacro
1871 | 2118 |
1872 |.ffunc_bit_sh lshift, sllv, 0 2119 |.ffunc_bit_sh lshift, sllv, 0
@@ -1878,9 +2125,11 @@ static void build_subroutines(BuildCtx *ctx)
1878 | 2125 |
1879 |.ffunc_bit tobit 2126 |.ffunc_bit tobit
1880 |->fff_resi: 2127 |->fff_resi:
1881 | mtc1 CRET1, FRET1 2128 | lw PC, FRAME_PC(BASE)
1882 | b ->fff_resn 2129 | addiu RA, BASE, -8
1883 |. cvt.d.w FRET1, FRET1 2130 | sw TISNUM, -8+HI(BASE)
2131 | b ->fff_res1
2132 |. sw CRET1, -8+LO(BASE)
1884 | 2133 |
1885 |//----------------------------------------------------------------------- 2134 |//-----------------------------------------------------------------------
1886 | 2135 |
@@ -2067,19 +2316,96 @@ static void build_subroutines(BuildCtx *ctx)
2067 | jr CRET1 2316 | jr CRET1
2068 |. lw INS, -4(PC) 2317 |. lw INS, -4(PC)
2069 | 2318 |
2319 |->cont_stitch: // Trace stitching.
2320 |.if JIT
2321 | // RA = resultptr, RB = meta base
2322 | lw INS, -4(PC)
2323 | lw TMP2, -24+LO(RB) // Save previous trace.
2324 | decode_RA8a RC, INS
2325 | addiu AT, MULTRES, -8
2326 | decode_RA8b RC
2327 | beqz AT, >2
2328 |. addu RC, BASE, RC // Call base.
2329 |1: // Move results down.
2330 | lw SFRETHI, HI(RA)
2331 | lw SFRETLO, LO(RA)
2332 | addiu AT, AT, -8
2333 | addiu RA, RA, 8
2334 | sw SFRETHI, HI(RC)
2335 | sw SFRETLO, LO(RC)
2336 | bnez AT, <1
2337 |. addiu RC, RC, 8
2338 |2:
2339 | decode_RA8a RA, INS
2340 | decode_RB8a RB, INS
2341 | decode_RA8b RA
2342 | decode_RB8b RB
2343 | addu RA, RA, RB
2344 | addu RA, BASE, RA
2345 |3:
2346 | sltu AT, RC, RA
2347 | bnez AT, >9 // More results wanted?
2348 |. nop
2349 |
2350 | lhu TMP3, TRACE:TMP2->traceno
2351 | lhu RD, TRACE:TMP2->link
2352 | beq RD, TMP3, ->cont_nop // Blacklisted.
2353 |. load_got lj_dispatch_stitch
2354 | bnez RD, =>BC_JLOOP // Jump to stitched trace.
2355 |. sll RD, RD, 3
2356 |
2357 | // Stitch a new trace to the previous trace.
2358 | sw TMP3, DISPATCH_J(exitno)(DISPATCH)
2359 | sw L, DISPATCH_J(L)(DISPATCH)
2360 | sw BASE, L->base
2361 | addiu CARG1, DISPATCH, GG_DISP2J
2362 | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2363 |. move CARG2, PC
2364 | b ->cont_nop
2365 |. lw BASE, L->base
2366 |
2367 |9:
2368 | sw TISNIL, HI(RC)
2369 | b <3
2370 |. addiu RC, RC, 8
2371 |.endif
2372 |
2373 |->vm_profhook: // Dispatch target for profiler hook.
2374#if LJ_HASPROFILE
2375 | load_got lj_dispatch_profile
2376 | sw MULTRES, SAVE_MULTRES
2377 | move CARG2, PC
2378 | sw BASE, L->base
2379 | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2380 |. move CARG1, L
2381 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2382 | addiu PC, PC, -4
2383 | b ->cont_nop
2384 |. lw BASE, L->base
2385#endif
2386 |
2070 |//----------------------------------------------------------------------- 2387 |//-----------------------------------------------------------------------
2071 |//-- Trace exit handler ------------------------------------------------- 2388 |//-- Trace exit handler -------------------------------------------------
2072 |//----------------------------------------------------------------------- 2389 |//-----------------------------------------------------------------------
2073 | 2390 |
2074 |.macro savex_, a, b 2391 |.macro savex_, a, b
2392 |.if FPU
2075 | sdc1 f..a, 16+a*8(sp) 2393 | sdc1 f..a, 16+a*8(sp)
2076 | sw r..a, 16+32*8+a*4(sp) 2394 | sw r..a, 16+32*8+a*4(sp)
2077 | sw r..b, 16+32*8+b*4(sp) 2395 | sw r..b, 16+32*8+b*4(sp)
2396 |.else
2397 | sw r..a, 16+a*4(sp)
2398 | sw r..b, 16+b*4(sp)
2399 |.endif
2078 |.endmacro 2400 |.endmacro
2079 | 2401 |
2080 |->vm_exit_handler: 2402 |->vm_exit_handler:
2081 |.if JIT 2403 |.if JIT
2404 |.if FPU
2082 | addiu sp, sp, -(16+32*8+32*4) 2405 | addiu sp, sp, -(16+32*8+32*4)
2406 |.else
2407 | addiu sp, sp, -(16+32*4)
2408 |.endif
2083 | savex_ 0, 1 2409 | savex_ 0, 1
2084 | savex_ 2, 3 2410 | savex_ 2, 3
2085 | savex_ 4, 5 2411 | savex_ 4, 5
@@ -2094,25 +2420,34 @@ static void build_subroutines(BuildCtx *ctx)
2094 | savex_ 22, 23 2420 | savex_ 22, 23
2095 | savex_ 24, 25 2421 | savex_ 24, 25
2096 | savex_ 26, 27 2422 | savex_ 26, 27
2423 |.if FPU
2097 | sdc1 f28, 16+28*8(sp) 2424 | sdc1 f28, 16+28*8(sp)
2098 | sw r28, 16+32*8+28*4(sp)
2099 | sdc1 f30, 16+30*8(sp) 2425 | sdc1 f30, 16+30*8(sp)
2426 | sw r28, 16+32*8+28*4(sp)
2100 | sw r30, 16+32*8+30*4(sp) 2427 | sw r30, 16+32*8+30*4(sp)
2101 | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP. 2428 | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP.
2429 | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
2430 | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP
2431 |.else
2432 | sw r28, 16+28*4(sp)
2433 | sw r30, 16+30*4(sp)
2434 | sw r0, 16+31*4(sp) // Clear RID_TMP.
2435 | addiu TMP2, sp, 16+32*4 // Recompute original value of sp.
2436 | sw TMP2, 16+29*4(sp) // Store sp in RID_SP
2437 |.endif
2102 | li_vmstate EXIT 2438 | li_vmstate EXIT
2103 | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
2104 | addiu DISPATCH, JGL, -GG_DISP2G-32768 2439 | addiu DISPATCH, JGL, -GG_DISP2G-32768
2105 | lw TMP1, 0(TMP2) // Load exit number. 2440 | lw TMP1, 0(TMP2) // Load exit number.
2106 | st_vmstate 2441 | st_vmstate
2107 | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP. 2442 | lw L, DISPATCH_GL(cur_L)(DISPATCH)
2108 | lw L, DISPATCH_GL(jit_L)(DISPATCH) 2443 | lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
2109 | lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
2110 | load_got lj_trace_exit 2444 | load_got lj_trace_exit
2111 | sw L, DISPATCH_J(L)(DISPATCH) 2445 | sw L, DISPATCH_J(L)(DISPATCH)
2112 | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. 2446 | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number.
2447 | sw BASE, L->base
2113 | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. 2448 | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number.
2114 | addiu CARG1, DISPATCH, GG_DISP2J 2449 | addiu CARG1, DISPATCH, GG_DISP2J
2115 | sw BASE, L->base 2450 | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
2116 | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) 2451 | call_intern lj_trace_exit // (jit_State *J, ExitState *ex)
2117 |. addiu CARG2, sp, 16 2452 |. addiu CARG2, sp, 16
2118 | // Returns MULTRES (unscaled) or negated error code. 2453 | // Returns MULTRES (unscaled) or negated error code.
@@ -2128,19 +2463,21 @@ static void build_subroutines(BuildCtx *ctx)
2128 |.if JIT 2463 |.if JIT
2129 | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. 2464 | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
2130 | lw L, SAVE_L 2465 | lw L, SAVE_L
2131 | addiu DISPATCH, JGL, -GG_DISP2G-32768 2466 | addiu DISPATCH, JGL, -GG_DISP2G-32768
2467 | sw BASE, L->base
2132 |1: 2468 |1:
2133 | bltz CRET1, >3 // Check for error from exit. 2469 | bltz CRET1, >9 // Check for error from exit.
2134 |. lw LFUNC:TMP1, FRAME_FUNC(BASE) 2470 |. lw LFUNC:RB, FRAME_FUNC(BASE)
2135 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2471 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2136 | sll MULTRES, CRET1, 3 2472 | sll MULTRES, CRET1, 3
2137 | li TISNIL, LJ_TNIL 2473 | li TISNIL, LJ_TNIL
2474 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2138 | sw MULTRES, SAVE_MULTRES 2475 | sw MULTRES, SAVE_MULTRES
2139 | mtc1 TMP3, TOBIT 2476 | .FPU mtc1 TMP3, TOBIT
2140 | lw TMP1, LFUNC:TMP1->pc 2477 | lw TMP1, LFUNC:RB->pc
2141 | sw r0, DISPATCH_GL(jit_L)(DISPATCH) 2478 | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
2142 | lw KBASE, PC2PROTO(k)(TMP1) 2479 | lw KBASE, PC2PROTO(k)(TMP1)
2143 | cvt.d.s TOBIT, TOBIT 2480 | .FPU cvt.d.s TOBIT, TOBIT
2144 | // Modified copy of ins_next which handles function header dispatch, too. 2481 | // Modified copy of ins_next which handles function header dispatch, too.
2145 | lw INS, 0(PC) 2482 | lw INS, 0(PC)
2146 | addiu PC, PC, 4 2483 | addiu PC, PC, 4
@@ -2148,7 +2485,7 @@ static void build_subroutines(BuildCtx *ctx)
2148 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) 2485 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
2149 | decode_OP4a TMP1, INS 2486 | decode_OP4a TMP1, INS
2150 | decode_OP4b TMP1 2487 | decode_OP4b TMP1
2151 | sltiu TMP2, TMP1, BC_FUNCF*4 // Function header? 2488 | sltiu TMP2, TMP1, BC_FUNCF*4
2152 | addu TMP0, DISPATCH, TMP1 2489 | addu TMP0, DISPATCH, TMP1
2153 | decode_RD8a RD, INS 2490 | decode_RD8a RD, INS
2154 | lw AT, 0(TMP0) 2491 | lw AT, 0(TMP0)
@@ -2158,13 +2495,30 @@ static void build_subroutines(BuildCtx *ctx)
2158 | jr AT 2495 | jr AT
2159 |. decode_RD8b RD 2496 |. decode_RD8b RD
2160 |2: 2497 |2:
2498 | sltiu TMP2, TMP1, (BC_FUNCC+2)*4 // Fast function?
2499 | bnez TMP2, >3
2500 |. lw TMP1, FRAME_PC(BASE)
2501 | // Check frame below fast function.
2502 | andi TMP0, TMP1, FRAME_TYPE
2503 | bnez TMP0, >3 // Trace stitching continuation?
2504 |. nop
2505 | // Otherwise set KBASE for Lua function below fast function.
2506 | lw TMP2, -4(TMP1)
2507 | decode_RA8a TMP0, TMP2
2508 | decode_RA8b TMP0
2509 | subu TMP1, BASE, TMP0
2510 | lw LFUNC:TMP2, -8+FRAME_FUNC(TMP1)
2511 | lw TMP1, LFUNC:TMP2->pc
2512 | lw KBASE, PC2PROTO(k)(TMP1)
2513 |3:
2161 | addiu RC, MULTRES, -8 2514 | addiu RC, MULTRES, -8
2162 | jr AT 2515 | jr AT
2163 |. addu RA, RA, BASE 2516 |. addu RA, RA, BASE
2164 | 2517 |
2165 |3: // Rethrow error from the right C frame. 2518 |9: // Rethrow error from the right C frame.
2166 | load_got lj_err_run 2519 | load_got lj_err_trace
2167 | call_intern lj_err_run // (lua_State *L) 2520 | sub CARG2, r0, CRET1
2521 | call_intern lj_err_trace // (lua_State *L, int errcode)
2168 |. move CARG1, L 2522 |. move CARG1, L
2169 |.endif 2523 |.endif
2170 | 2524 |
@@ -2172,8 +2526,9 @@ static void build_subroutines(BuildCtx *ctx)
2172 |//-- Math helper functions ---------------------------------------------- 2526 |//-- Math helper functions ----------------------------------------------
2173 |//----------------------------------------------------------------------- 2527 |//-----------------------------------------------------------------------
2174 | 2528 |
2529 |// Hard-float round to integer.
2175 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. 2530 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
2176 |.macro vm_round, func 2531 |.macro vm_round_hf, func
2177 | lui TMP0, 0x4330 // Hiword of 2^52 (double). 2532 | lui TMP0, 0x4330 // Hiword of 2^52 (double).
2178 | mtc1 r0, f4 2533 | mtc1 r0, f4
2179 | mtc1 TMP0, f5 2534 | mtc1 TMP0, f5
@@ -2215,6 +2570,12 @@ static void build_subroutines(BuildCtx *ctx)
2215 |. mov.d FRET1, FARG1 2570 |. mov.d FRET1, FARG1
2216 |.endmacro 2571 |.endmacro
2217 | 2572 |
2573 |.macro vm_round, func
2574 |.if FPU
2575 | vm_round_hf, func
2576 |.endif
2577 |.endmacro
2578 |
2218 |->vm_floor: 2579 |->vm_floor:
2219 | vm_round floor 2580 | vm_round floor
2220 |->vm_ceil: 2581 |->vm_ceil:
@@ -2224,10 +2585,286 @@ static void build_subroutines(BuildCtx *ctx)
2224 | vm_round trunc 2585 | vm_round trunc
2225 |.endif 2586 |.endif
2226 | 2587 |
2588 |// Soft-float integer to number conversion.
2589 |.macro sfi2d, AHI, ALO
2590 |.if not FPU
2591 | beqz ALO, >9 // Handle zero first.
2592 |. sra TMP0, ALO, 31
2593 | xor TMP1, ALO, TMP0
2594 | subu TMP1, TMP1, TMP0 // Absolute value in TMP1.
2595 | clz AHI, TMP1
2596 | andi TMP0, TMP0, 0x800 // Mask sign bit.
2597 | li AT, 0x3ff+31-1
2598 | sllv TMP1, TMP1, AHI // Align mantissa left with leading 1.
2599 | subu AHI, AT, AHI // Exponent - 1 in AHI.
2600 | sll ALO, TMP1, 21
2601 | or AHI, AHI, TMP0 // Sign | Exponent.
2602 | srl TMP1, TMP1, 11
2603 | sll AHI, AHI, 20 // Align left.
2604 | jr ra
2605 |. addu AHI, AHI, TMP1 // Add mantissa, increment exponent.
2606 |9:
2607 | jr ra
2608 |. li AHI, 0
2609 |.endif
2610 |.endmacro
2611 |
2612 |// Input SFARG1LO. Output: SFARG1*. Temporaries: AT, TMP0, TMP1.
2613 |->vm_sfi2d_1:
2614 | sfi2d SFARG1HI, SFARG1LO
2615 |
2616 |// Input SFARG2LO. Output: SFARG2*. Temporaries: AT, TMP0, TMP1.
2617 |->vm_sfi2d_2:
2618 | sfi2d SFARG2HI, SFARG2LO
2619 |
2620 |// Soft-float comparison. Equivalent to c.eq.d.
2621 |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2622 |->vm_sfcmpeq:
2623 |.if not FPU
2624 | sll AT, SFARG1HI, 1
2625 | sll TMP0, SFARG2HI, 1
2626 | or CRET1, SFARG1LO, SFARG2LO
2627 | or TMP1, AT, TMP0
2628 | or TMP1, TMP1, CRET1
2629 | beqz TMP1, >8 // Both args +-0: return 1.
2630 |. sltu CRET1, r0, SFARG1LO
2631 | lui TMP1, 0xffe0
2632 | addu AT, AT, CRET1
2633 | sltu CRET1, r0, SFARG2LO
2634 | sltu AT, TMP1, AT
2635 | addu TMP0, TMP0, CRET1
2636 | sltu TMP0, TMP1, TMP0
2637 | or TMP1, AT, TMP0
2638 | bnez TMP1, >9 // Either arg is NaN: return 0;
2639 |. xor TMP0, SFARG1HI, SFARG2HI
2640 | xor TMP1, SFARG1LO, SFARG2LO
2641 | or AT, TMP0, TMP1
2642 | jr ra
2643 |. sltiu CRET1, AT, 1 // Same values: return 1.
2644 |8:
2645 | jr ra
2646 |. li CRET1, 1
2647 |9:
2648 | jr ra
2649 |. li CRET1, 0
2650 |.endif
2651 |
2652 |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d.
2653 |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2.
2654 |->vm_sfcmpult:
2655 |.if not FPU
2656 | b >1
2657 |. li CRET2, 1
2658 |.endif
2659 |
2660 |->vm_sfcmpolt:
2661 |.if not FPU
2662 | li CRET2, 0
2663 |1:
2664 | sll AT, SFARG1HI, 1
2665 | sll TMP0, SFARG2HI, 1
2666 | or CRET1, SFARG1LO, SFARG2LO
2667 | or TMP1, AT, TMP0
2668 | or TMP1, TMP1, CRET1
2669 | beqz TMP1, >8 // Both args +-0: return 0.
2670 |. sltu CRET1, r0, SFARG1LO
2671 | lui TMP1, 0xffe0
2672 | addu AT, AT, CRET1
2673 | sltu CRET1, r0, SFARG2LO
2674 | sltu AT, TMP1, AT
2675 | addu TMP0, TMP0, CRET1
2676 | sltu TMP0, TMP1, TMP0
2677 | or TMP1, AT, TMP0
2678 | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
2679 |. and AT, SFARG1HI, SFARG2HI
2680 | bltz AT, >5 // Both args negative?
2681 |. nop
2682 | beq SFARG1HI, SFARG2HI, >8
2683 |. sltu CRET1, SFARG1LO, SFARG2LO
2684 | jr ra
2685 |. slt CRET1, SFARG1HI, SFARG2HI
2686 |5: // Swap conditions if both operands are negative.
2687 | beq SFARG1HI, SFARG2HI, >8
2688 |. sltu CRET1, SFARG2LO, SFARG1LO
2689 | jr ra
2690 |. slt CRET1, SFARG2HI, SFARG1HI
2691 |8:
2692 | jr ra
2693 |. nop
2694 |9:
2695 | jr ra
2696 |. move CRET1, CRET2
2697 |.endif
2698 |
2699 |->vm_sfcmpogt:
2700 |.if not FPU
2701 | sll AT, SFARG2HI, 1
2702 | sll TMP0, SFARG1HI, 1
2703 | or CRET1, SFARG2LO, SFARG1LO
2704 | or TMP1, AT, TMP0
2705 | or TMP1, TMP1, CRET1
2706 | beqz TMP1, >8 // Both args +-0: return 0.
2707 |. sltu CRET1, r0, SFARG2LO
2708 | lui TMP1, 0xffe0
2709 | addu AT, AT, CRET1
2710 | sltu CRET1, r0, SFARG1LO
2711 | sltu AT, TMP1, AT
2712 | addu TMP0, TMP0, CRET1
2713 | sltu TMP0, TMP1, TMP0
2714 | or TMP1, AT, TMP0
2715 | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
2716 |. and AT, SFARG2HI, SFARG1HI
2717 | bltz AT, >5 // Both args negative?
2718 |. nop
2719 | beq SFARG2HI, SFARG1HI, >8
2720 |. sltu CRET1, SFARG2LO, SFARG1LO
2721 | jr ra
2722 |. slt CRET1, SFARG2HI, SFARG1HI
2723 |5: // Swap conditions if both operands are negative.
2724 | beq SFARG2HI, SFARG1HI, >8
2725 |. sltu CRET1, SFARG1LO, SFARG2LO
2726 | jr ra
2727 |. slt CRET1, SFARG1HI, SFARG2HI
2728 |8:
2729 | jr ra
2730 |. nop
2731 |9:
2732 | jr ra
2733 |. li CRET1, 0
2734 |.endif
2735 |
2736 |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a.
2737 |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2738 |->vm_sfcmpolex:
2739 |.if not FPU
2740 | sll AT, SFARG1HI, 1
2741 | sll TMP0, SFARG2HI, 1
2742 | or CRET1, SFARG1LO, SFARG2LO
2743 | or TMP1, AT, TMP0
2744 | or TMP1, TMP1, CRET1
2745 | beqz TMP1, >8 // Both args +-0: return 1.
2746 |. sltu CRET1, r0, SFARG1LO
2747 | lui TMP1, 0xffe0
2748 | addu AT, AT, CRET1
2749 | sltu CRET1, r0, SFARG2LO
2750 | sltu AT, TMP1, AT
2751 | addu TMP0, TMP0, CRET1
2752 | sltu TMP0, TMP1, TMP0
2753 | or TMP1, AT, TMP0
2754 | bnez TMP1, >9 // Either arg is NaN: return 0;
2755 |. and AT, SFARG1HI, SFARG2HI
2756 | xor AT, AT, TMP3
2757 | bltz AT, >5 // Both args negative?
2758 |. nop
2759 | beq SFARG1HI, SFARG2HI, >6
2760 |. sltu CRET1, SFARG2LO, SFARG1LO
2761 | jr ra
2762 |. slt CRET1, SFARG2HI, SFARG1HI
2763 |5: // Swap conditions if both operands are negative.
2764 | beq SFARG1HI, SFARG2HI, >6
2765 |. sltu CRET1, SFARG1LO, SFARG2LO
2766 | slt CRET1, SFARG1HI, SFARG2HI
2767 |6:
2768 | jr ra
2769 |. nop
2770 |8:
2771 | jr ra
2772 |. li CRET1, 1
2773 |9:
2774 | jr ra
2775 |. li CRET1, 0
2776 |.endif
2777 |
2778 |.macro sfmin_max, name, fpcall
2779 |->vm_sf .. name:
2780 |.if JIT and not FPU
2781 | move TMP2, ra
2782 | bal ->fpcall
2783 |. nop
2784 | move TMP0, CRET1
2785 | move SFRETHI, SFARG1HI
2786 | move SFRETLO, SFARG1LO
2787 | move ra, TMP2
2788 | movz SFRETHI, SFARG2HI, TMP0
2789 | jr ra
2790 |. movz SFRETLO, SFARG2LO, TMP0
2791 |.endif
2792 |.endmacro
2793 |
2794 | sfmin_max min, vm_sfcmpolt
2795 | sfmin_max max, vm_sfcmpogt
2796 |
2227 |//----------------------------------------------------------------------- 2797 |//-----------------------------------------------------------------------
2228 |//-- Miscellaneous functions -------------------------------------------- 2798 |//-- Miscellaneous functions --------------------------------------------
2229 |//----------------------------------------------------------------------- 2799 |//-----------------------------------------------------------------------
2230 | 2800 |
2801 |.define NEXT_TAB, TAB:CARG1
2802 |.define NEXT_IDX, CARG2
2803 |.define NEXT_ASIZE, CARG3
2804 |.define NEXT_NIL, CARG4
2805 |.define NEXT_TMP0, r12
2806 |.define NEXT_TMP1, r13
2807 |.define NEXT_TMP2, r14
2808 |.define NEXT_RES_VK, CRET1
2809 |.define NEXT_RES_IDX, CRET2
2810 |.define NEXT_RES_PTR, sp
2811 |.define NEXT_RES_VAL_I, 0(sp)
2812 |.define NEXT_RES_VAL_IT, 4(sp)
2813 |.define NEXT_RES_KEY_I, 8(sp)
2814 |.define NEXT_RES_KEY_IT, 12(sp)
2815 |
2816 |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
2817 |// Next idx returned in CRET2.
2818 |->vm_next:
2819 |.if JIT and ENDIAN_LE
2820 | lw NEXT_ASIZE, NEXT_TAB->asize
2821 | lw NEXT_TMP0, NEXT_TAB->array
2822 | li NEXT_NIL, LJ_TNIL
2823 |1: // Traverse array part.
2824 | sltu AT, NEXT_IDX, NEXT_ASIZE
2825 | sll NEXT_TMP1, NEXT_IDX, 3
2826 | beqz AT, >5
2827 |. addu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1
2828 | lw NEXT_TMP2, 4(NEXT_TMP1)
2829 | sw NEXT_IDX, NEXT_RES_KEY_I
2830 | beq NEXT_TMP2, NEXT_NIL, <1
2831 |. addiu NEXT_IDX, NEXT_IDX, 1
2832 | lw NEXT_TMP0, 0(NEXT_TMP1)
2833 | li AT, LJ_TISNUM
2834 | sw NEXT_TMP2, NEXT_RES_VAL_IT
2835 | sw AT, NEXT_RES_KEY_IT
2836 | sw NEXT_TMP0, NEXT_RES_VAL_I
2837 | move NEXT_RES_VK, NEXT_RES_PTR
2838 | jr ra
2839 |. move NEXT_RES_IDX, NEXT_IDX
2840 |
2841 |5: // Traverse hash part.
2842 | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE
2843 | lw NODE:NEXT_RES_VK, NEXT_TAB->node
2844 | sll NEXT_TMP2, NEXT_RES_IDX, 5
2845 | lw NEXT_TMP0, NEXT_TAB->hmask
2846 | sll AT, NEXT_RES_IDX, 3
2847 | subu AT, NEXT_TMP2, AT
2848 | addu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT
2849 |6:
2850 | sltu AT, NEXT_TMP0, NEXT_RES_IDX
2851 | bnez AT, >8
2852 |. nop
2853 | lw NEXT_TMP2, NODE:NEXT_RES_VK->val.it
2854 | bne NEXT_TMP2, NEXT_NIL, >9
2855 |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1
2856 | // Skip holes in hash part.
2857 | b <6
2858 |. addiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node)
2859 |
2860 |8: // End of iteration. Set the key to nil (not the value).
2861 | sw NEXT_NIL, NEXT_RES_KEY_IT
2862 | move NEXT_RES_VK, NEXT_RES_PTR
2863 |9:
2864 | jr ra
2865 |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE
2866 |.endif
2867 |
2231 |//----------------------------------------------------------------------- 2868 |//-----------------------------------------------------------------------
2232 |//-- FFI helper functions ----------------------------------------------- 2869 |//-- FFI helper functions -----------------------------------------------
2233 |//----------------------------------------------------------------------- 2870 |//-----------------------------------------------------------------------
@@ -2243,10 +2880,10 @@ static void build_subroutines(BuildCtx *ctx)
2243 | sw r1, CTSTATE->cb.slot 2880 | sw r1, CTSTATE->cb.slot
2244 | sw CARG1, CTSTATE->cb.gpr[0] 2881 | sw CARG1, CTSTATE->cb.gpr[0]
2245 | sw CARG2, CTSTATE->cb.gpr[1] 2882 | sw CARG2, CTSTATE->cb.gpr[1]
2246 | sdc1 FARG1, CTSTATE->cb.fpr[0] 2883 | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0]
2247 | sw CARG3, CTSTATE->cb.gpr[2] 2884 | sw CARG3, CTSTATE->cb.gpr[2]
2248 | sw CARG4, CTSTATE->cb.gpr[3] 2885 | sw CARG4, CTSTATE->cb.gpr[3]
2249 | sdc1 FARG2, CTSTATE->cb.fpr[1] 2886 | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1]
2250 | addiu TMP0, sp, CFRAME_SPACE+16 2887 | addiu TMP0, sp, CFRAME_SPACE+16
2251 | sw TMP0, CTSTATE->cb.stack 2888 | sw TMP0, CTSTATE->cb.stack
2252 | sw r0, SAVE_PC // Any value outside of bytecode is ok. 2889 | sw r0, SAVE_PC // Any value outside of bytecode is ok.
@@ -2256,15 +2893,16 @@ static void build_subroutines(BuildCtx *ctx)
2256 | // Returns lua_State *. 2893 | // Returns lua_State *.
2257 | lw BASE, L:CRET1->base 2894 | lw BASE, L:CRET1->base
2258 | lw RC, L:CRET1->top 2895 | lw RC, L:CRET1->top
2896 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2259 | move L, CRET1 2897 | move L, CRET1
2260 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2898 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2261 | lw LFUNC:RB, FRAME_FUNC(BASE) 2899 | lw LFUNC:RB, FRAME_FUNC(BASE)
2262 | mtc1 TMP3, TOBIT 2900 | .FPU mtc1 TMP3, TOBIT
2263 | li_vmstate INTERP 2901 | li_vmstate INTERP
2264 | li TISNIL, LJ_TNIL 2902 | li TISNIL, LJ_TNIL
2265 | subu RC, RC, BASE 2903 | subu RC, RC, BASE
2266 | st_vmstate 2904 | st_vmstate
2267 | cvt.d.s TOBIT, TOBIT 2905 | .FPU cvt.d.s TOBIT, TOBIT
2268 | ins_callt 2906 | ins_callt
2269 |.endif 2907 |.endif
2270 | 2908 |
@@ -2278,11 +2916,11 @@ static void build_subroutines(BuildCtx *ctx)
2278 | move CARG2, RA 2916 | move CARG2, RA
2279 | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) 2917 | call_intern lj_ccallback_leave // (CTState *cts, TValue *o)
2280 |. move CARG1, CTSTATE 2918 |. move CARG1, CTSTATE
2919 | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0]
2281 | lw CRET1, CTSTATE->cb.gpr[0] 2920 | lw CRET1, CTSTATE->cb.gpr[0]
2282 | ldc1 FRET1, CTSTATE->cb.fpr[0] 2921 | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1]
2283 | lw CRET2, CTSTATE->cb.gpr[1]
2284 | b ->vm_leave_unw 2922 | b ->vm_leave_unw
2285 |. ldc1 FRET2, CTSTATE->cb.fpr[1] 2923 |. lw CRET2, CTSTATE->cb.gpr[1]
2286 |.endif 2924 |.endif
2287 | 2925 |
2288 |->vm_ffi_call: // Call C function via FFI. 2926 |->vm_ffi_call: // Call C function via FFI.
@@ -2314,8 +2952,8 @@ static void build_subroutines(BuildCtx *ctx)
2314 | lw CARG2, CCSTATE->gpr[1] 2952 | lw CARG2, CCSTATE->gpr[1]
2315 | lw CARG3, CCSTATE->gpr[2] 2953 | lw CARG3, CCSTATE->gpr[2]
2316 | lw CARG4, CCSTATE->gpr[3] 2954 | lw CARG4, CCSTATE->gpr[3]
2317 | ldc1 FARG1, CCSTATE->fpr[0] 2955 | .FPU ldc1 FARG1, CCSTATE->fpr[0]
2318 | ldc1 FARG2, CCSTATE->fpr[1] 2956 | .FPU ldc1 FARG2, CCSTATE->fpr[1]
2319 | jalr CFUNCADDR 2957 | jalr CFUNCADDR
2320 |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. 2958 |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
2321 | lw CCSTATE:TMP1, -12(r16) 2959 | lw CCSTATE:TMP1, -12(r16)
@@ -2323,8 +2961,13 @@ static void build_subroutines(BuildCtx *ctx)
2323 | lw ra, -4(r16) 2961 | lw ra, -4(r16)
2324 | sw CRET1, CCSTATE:TMP1->gpr[0] 2962 | sw CRET1, CCSTATE:TMP1->gpr[0]
2325 | sw CRET2, CCSTATE:TMP1->gpr[1] 2963 | sw CRET2, CCSTATE:TMP1->gpr[1]
2964 |.if FPU
2326 | sdc1 FRET1, CCSTATE:TMP1->fpr[0] 2965 | sdc1 FRET1, CCSTATE:TMP1->fpr[0]
2327 | sdc1 FRET2, CCSTATE:TMP1->fpr[1] 2966 | sdc1 FRET2, CCSTATE:TMP1->fpr[1]
2967 |.else
2968 | sw CARG1, CCSTATE:TMP1->gpr[2] // Soft-float: complex double .im part.
2969 | sw CARG2, CCSTATE:TMP1->gpr[3]
2970 |.endif
2328 | move sp, r16 2971 | move sp, r16
2329 | jr ra 2972 | jr ra
2330 |. move r16, TMP2 2973 |. move r16, TMP2
@@ -2348,82 +2991,143 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2348 2991
2349 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 2992 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2350 | // RA = src1*8, RD = src2*8, JMP with RD = target 2993 | // RA = src1*8, RD = src2*8, JMP with RD = target
2351 | addu CARG2, BASE, RA 2994 |.macro bc_comp, FRA, FRD, RAHI, RALO, RDHI, RDLO, movop, fmovop, fcomp, sfcomp
2352 | addu CARG3, BASE, RD 2995 | addu RA, BASE, RA
2353 | lw TMP0, HI(CARG2) 2996 | addu RD, BASE, RD
2354 | lw TMP1, HI(CARG3) 2997 | lw RAHI, HI(RA)
2355 | ldc1 f0, 0(CARG2) 2998 | lw RDHI, HI(RD)
2356 | ldc1 f2, 0(CARG3)
2357 | sltiu TMP0, TMP0, LJ_TISNUM
2358 | sltiu TMP1, TMP1, LJ_TISNUM
2359 | lhu TMP2, OFS_RD(PC) 2999 | lhu TMP2, OFS_RD(PC)
2360 | and TMP0, TMP0, TMP1
2361 | addiu PC, PC, 4 3000 | addiu PC, PC, 4
2362 | beqz TMP0, ->vmeta_comp 3001 | bne RAHI, TISNUM, >2
2363 |. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) 3002 |. lw RALO, LO(RA)
2364 | decode_RD4b TMP2 3003 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2365 | addu TMP2, TMP2, TMP1 3004 | lw RDLO, LO(RD)
2366 if (op == BC_ISLT || op == BC_ISGE) { 3005 | bne RDHI, TISNUM, >5
2367 | c.olt.d f0, f2 3006 |. decode_RD4b TMP2
2368 } else { 3007 | slt AT, SFARG1LO, SFARG2LO
2369 | c.ole.d f0, f2 3008 | addu TMP2, TMP2, TMP3
2370 } 3009 | movop TMP2, r0, AT
2371 if (op == BC_ISLT || op == BC_ISLE) {
2372 | movf TMP2, r0
2373 } else {
2374 | movt TMP2, r0
2375 }
2376 | addu PC, PC, TMP2
2377 |1: 3010 |1:
3011 | addu PC, PC, TMP2
2378 | ins_next 3012 | ins_next
3013 |
3014 |2: // RA is not an integer.
3015 | sltiu AT, RAHI, LJ_TISNUM
3016 | beqz AT, ->vmeta_comp
3017 |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3018 | sltiu AT, RDHI, LJ_TISNUM
3019 |.if FPU
3020 | ldc1 FRA, 0(RA)
3021 | ldc1 FRD, 0(RD)
3022 |.else
3023 | lw RDLO, LO(RD)
3024 |.endif
3025 | beqz AT, >4
3026 |. decode_RD4b TMP2
3027 |3: // RA and RD are both numbers.
3028 |.if FPU
3029 | fcomp f20, f22
3030 | addu TMP2, TMP2, TMP3
3031 | b <1
3032 |. fmovop TMP2, r0
3033 |.else
3034 | bal sfcomp
3035 |. addu TMP2, TMP2, TMP3
3036 | b <1
3037 |. movop TMP2, r0, CRET1
3038 |.endif
3039 |
3040 |4: // RA is a number, RD is not a number.
3041 | bne RDHI, TISNUM, ->vmeta_comp
3042 | // RA is a number, RD is an integer. Convert RD to a number.
3043 |.if FPU
3044 |. lwc1 FRD, LO(RD)
3045 | b <3
3046 |. cvt.d.w FRD, FRD
3047 |.else
3048 |. nop
3049 |.if "RDHI" == "SFARG1HI"
3050 | bal ->vm_sfi2d_1
3051 |.else
3052 | bal ->vm_sfi2d_2
3053 |.endif
3054 |. nop
3055 | b <3
3056 |. nop
3057 |.endif
3058 |
3059 |5: // RA is an integer, RD is not an integer
3060 | sltiu AT, RDHI, LJ_TISNUM
3061 | beqz AT, ->vmeta_comp
3062 | // RA is an integer, RD is a number. Convert RA to a number.
3063 |.if FPU
3064 |. mtc1 RALO, FRA
3065 | ldc1 FRD, 0(RD)
3066 | b <3
3067 | cvt.d.w FRA, FRA
3068 |.else
3069 |. nop
3070 |.if "RAHI" == "SFARG1HI"
3071 | bal ->vm_sfi2d_1
3072 |.else
3073 | bal ->vm_sfi2d_2
3074 |.endif
3075 |. nop
3076 | b <3
3077 |. nop
3078 |.endif
3079 |.endmacro
3080 |
3081 if (op == BC_ISLT) {
3082 | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movz, movf, c.olt.d, ->vm_sfcmpolt
3083 } else if (op == BC_ISGE) {
3084 | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movn, movt, c.olt.d, ->vm_sfcmpolt
3085 } else if (op == BC_ISLE) {
3086 | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movn, movt, c.ult.d, ->vm_sfcmpult
3087 } else {
3088 | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movz, movf, c.ult.d, ->vm_sfcmpult
3089 }
2379 break; 3090 break;
2380 3091
2381 case BC_ISEQV: case BC_ISNEV: 3092 case BC_ISEQV: case BC_ISNEV:
2382 vk = op == BC_ISEQV; 3093 vk = op == BC_ISEQV;
2383 | // RA = src1*8, RD = src2*8, JMP with RD = target 3094 | // RA = src1*8, RD = src2*8, JMP with RD = target
2384 | addu RA, BASE, RA 3095 | addu RA, BASE, RA
2385 | addiu PC, PC, 4 3096 | addiu PC, PC, 4
2386 | lw TMP0, HI(RA)
2387 | ldc1 f0, 0(RA)
2388 | addu RD, BASE, RD 3097 | addu RD, BASE, RD
3098 | lw SFARG1HI, HI(RA)
2389 | lhu TMP2, -4+OFS_RD(PC) 3099 | lhu TMP2, -4+OFS_RD(PC)
2390 | lw TMP1, HI(RD) 3100 | lw SFARG2HI, HI(RD)
2391 | ldc1 f2, 0(RD)
2392 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3101 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2393 | sltiu AT, TMP0, LJ_TISNUM 3102 | sltu AT, TISNUM, SFARG1HI
2394 | sltiu CARG1, TMP1, LJ_TISNUM 3103 | sltu TMP0, TISNUM, SFARG2HI
2395 | decode_RD4b TMP2 3104 | or AT, AT, TMP0
2396 | and AT, AT, CARG1
2397 | beqz AT, >5
2398 |. addu TMP2, TMP2, TMP3
2399 | c.eq.d f0, f2
2400 if (vk) { 3105 if (vk) {
2401 | movf TMP2, r0 3106 | beqz AT, ->BC_ISEQN_Z
2402 } else { 3107 } else {
2403 | movt TMP2, r0 3108 | beqz AT, ->BC_ISNEN_Z
2404 } 3109 }
2405 |1: 3110 |. decode_RD4b TMP2
2406 | addu PC, PC, TMP2 3111 | // Either or both types are not numbers.
2407 | ins_next 3112 | lw SFARG1LO, LO(RA)
2408 |5: // Either or both types are not numbers. 3113 | lw SFARG2LO, LO(RD)
2409 | lw CARG2, LO(RA) 3114 | addu TMP2, TMP2, TMP3
2410 | lw CARG3, LO(RD)
2411 |.if FFI 3115 |.if FFI
2412 | li TMP3, LJ_TCDATA 3116 | li TMP3, LJ_TCDATA
2413 | beq TMP0, TMP3, ->vmeta_equal_cd 3117 | beq SFARG1HI, TMP3, ->vmeta_equal_cd
2414 |.endif 3118 |.endif
2415 |. sltiu AT, TMP0, LJ_TISPRI // Not a primitive? 3119 |. sltiu AT, SFARG1HI, LJ_TISPRI // Not a primitive?
2416 |.if FFI 3120 |.if FFI
2417 | beq TMP1, TMP3, ->vmeta_equal_cd 3121 | beq SFARG2HI, TMP3, ->vmeta_equal_cd
2418 |.endif 3122 |.endif
2419 |. xor TMP3, CARG2, CARG3 // Same tv? 3123 |. xor TMP3, SFARG1LO, SFARG2LO // Same tv?
2420 | xor TMP1, TMP1, TMP0 // Same type? 3124 | xor SFARG2HI, SFARG2HI, SFARG1HI // Same type?
2421 | sltiu CARG1, TMP0, LJ_TISTABUD+1 // Table or userdata? 3125 | sltiu TMP0, SFARG1HI, LJ_TISTABUD+1 // Table or userdata?
2422 | movz TMP3, r0, AT // Ignore tv if primitive. 3126 | movz TMP3, r0, AT // Ignore tv if primitive.
2423 | movn CARG1, r0, TMP1 // Tab/ud and same type? 3127 | movn TMP0, r0, SFARG2HI // Tab/ud and same type?
2424 | or AT, TMP1, TMP3 // Same type && (pri||same tv). 3128 | or AT, SFARG2HI, TMP3 // Same type && (pri||same tv).
2425 | movz CARG1, r0, AT 3129 | movz TMP0, r0, AT
2426 | beqz CARG1, <1 // Done if not tab/ud or not same type or same tv. 3130 | beqz TMP0, >1 // Done if not tab/ud or not same type or same tv.
2427 if (vk) { 3131 if (vk) {
2428 |. movn TMP2, r0, AT 3132 |. movn TMP2, r0, AT
2429 } else { 3133 } else {
@@ -2431,15 +3135,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2431 } 3135 }
2432 | // Different tables or userdatas. Need to check __eq metamethod. 3136 | // Different tables or userdatas. Need to check __eq metamethod.
2433 | // Field metatable must be at same offset for GCtab and GCudata! 3137 | // Field metatable must be at same offset for GCtab and GCudata!
2434 | lw TAB:TMP1, TAB:CARG2->metatable 3138 | lw TAB:TMP1, TAB:SFARG1LO->metatable
2435 | beqz TAB:TMP1, <1 // No metatable? 3139 | beqz TAB:TMP1, >1 // No metatable?
2436 |. nop 3140 |. nop
2437 | lbu TMP1, TAB:TMP1->nomm 3141 | lbu TMP1, TAB:TMP1->nomm
2438 | andi TMP1, TMP1, 1<<MM_eq 3142 | andi TMP1, TMP1, 1<<MM_eq
2439 | bnez TMP1, <1 // Or 'no __eq' flag set? 3143 | bnez TMP1, >1 // Or 'no __eq' flag set?
2440 |. nop 3144 |. nop
2441 | b ->vmeta_equal // Handle __eq metamethod. 3145 | b ->vmeta_equal // Handle __eq metamethod.
2442 |. li CARG4, 1-vk // ne = 0 or 1. 3146 |. li TMP0, 1-vk // ne = 0 or 1.
3147 |1:
3148 | addu PC, PC, TMP2
3149 | ins_next
2443 break; 3150 break;
2444 3151
2445 case BC_ISEQS: case BC_ISNES: 3152 case BC_ISEQS: case BC_ISNES:
@@ -2476,38 +3183,124 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2476 vk = op == BC_ISEQN; 3183 vk = op == BC_ISEQN;
2477 | // RA = src*8, RD = num_const*8, JMP with RD = target 3184 | // RA = src*8, RD = num_const*8, JMP with RD = target
2478 | addu RA, BASE, RA 3185 | addu RA, BASE, RA
2479 | addiu PC, PC, 4 3186 | addu RD, KBASE, RD
2480 | lw TMP0, HI(RA) 3187 | lw SFARG1HI, HI(RA)
2481 | ldc1 f0, 0(RA) 3188 | lw SFARG2HI, HI(RD)
2482 | addu RD, KBASE, RD 3189 | lhu TMP2, OFS_RD(PC)
2483 | lhu TMP2, -4+OFS_RD(PC) 3190 | addiu PC, PC, 4
2484 | ldc1 f2, 0(RD)
2485 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3191 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2486 | sltiu AT, TMP0, LJ_TISNUM
2487 | decode_RD4b TMP2 3192 | decode_RD4b TMP2
2488 |.if FFI
2489 | beqz AT, >5
2490 |.else
2491 | beqz AT, >1
2492 |.endif
2493 |. addu TMP2, TMP2, TMP3
2494 | c.eq.d f0, f2
2495 if (vk) { 3193 if (vk) {
2496 | movf TMP2, r0 3194 |->BC_ISEQN_Z:
2497 | addu PC, PC, TMP2 3195 } else {
3196 |->BC_ISNEN_Z:
3197 }
3198 | bne SFARG1HI, TISNUM, >3
3199 |. lw SFARG1LO, LO(RA)
3200 | lw SFARG2LO, LO(RD)
3201 | addu TMP2, TMP2, TMP3
3202 | bne SFARG2HI, TISNUM, >6
3203 |. xor AT, SFARG1LO, SFARG2LO
3204 if (vk) {
3205 | movn TMP2, r0, AT
2498 |1: 3206 |1:
3207 | addu PC, PC, TMP2
3208 |2:
2499 } else { 3209 } else {
2500 | movt TMP2, r0 3210 | movz TMP2, r0, AT
2501 |1: 3211 |1:
3212 |2:
2502 | addu PC, PC, TMP2 3213 | addu PC, PC, TMP2
2503 } 3214 }
2504 | ins_next 3215 | ins_next
3216 |
3217 |3: // RA is not an integer.
3218 | sltiu AT, SFARG1HI, LJ_TISNUM
2505 |.if FFI 3219 |.if FFI
2506 |5: 3220 | beqz AT, >8
2507 | li AT, LJ_TCDATA 3221 |.else
2508 | beq TMP0, AT, ->vmeta_equal_cd 3222 | beqz AT, <2
3223 |.endif
3224 |. addu TMP2, TMP2, TMP3
3225 | sltiu AT, SFARG2HI, LJ_TISNUM
3226 |.if FPU
3227 | ldc1 f20, 0(RA)
3228 | ldc1 f22, 0(RD)
3229 |.endif
3230 | beqz AT, >5
3231 |. lw SFARG2LO, LO(RD)
3232 |4: // RA and RD are both numbers.
3233 |.if FPU
3234 | c.eq.d f20, f22
3235 | b <1
3236 if (vk) {
3237 |. movf TMP2, r0
3238 } else {
3239 |. movt TMP2, r0
3240 }
3241 |.else
3242 | bal ->vm_sfcmpeq
2509 |. nop 3243 |. nop
2510 | b <1 3244 | b <1
3245 if (vk) {
3246 |. movz TMP2, r0, CRET1
3247 } else {
3248 |. movn TMP2, r0, CRET1
3249 }
3250 |.endif
3251 |
3252 |5: // RA is a number, RD is not a number.
3253 |.if FFI
3254 | bne SFARG2HI, TISNUM, >9
3255 |.else
3256 | bne SFARG2HI, TISNUM, <2
3257 |.endif
3258 | // RA is a number, RD is an integer. Convert RD to a number.
3259 |.if FPU
3260 |. lwc1 f22, LO(RD)
3261 | b <4
3262 |. cvt.d.w f22, f22
3263 |.else
3264 |. nop
3265 | bal ->vm_sfi2d_2
3266 |. nop
3267 | b <4
3268 |. nop
3269 |.endif
3270 |
3271 |6: // RA is an integer, RD is not an integer
3272 | sltiu AT, SFARG2HI, LJ_TISNUM
3273 |.if FFI
3274 | beqz AT, >9
3275 |.else
3276 | beqz AT, <2
3277 |.endif
3278 | // RA is an integer, RD is a number. Convert RA to a number.
3279 |.if FPU
3280 |. mtc1 SFARG1LO, f20
3281 | ldc1 f22, 0(RD)
3282 | b <4
3283 | cvt.d.w f20, f20
3284 |.else
3285 |. nop
3286 | bal ->vm_sfi2d_1
3287 |. nop
3288 | b <4
3289 |. nop
3290 |.endif
3291 |
3292 |.if FFI
3293 |8:
3294 | li AT, LJ_TCDATA
3295 | bne SFARG1HI, AT, <2
3296 |. nop
3297 | b ->vmeta_equal_cd
3298 |. nop
3299 |9:
3300 | li AT, LJ_TCDATA
3301 | bne SFARG2HI, AT, <2
3302 |. nop
3303 | b ->vmeta_equal_cd
2511 |. nop 3304 |. nop
2512 |.endif 3305 |.endif
2513 break; 3306 break;
@@ -2559,7 +3352,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2559 | addu PC, PC, TMP2 3352 | addu PC, PC, TMP2
2560 } else { 3353 } else {
2561 | sltiu TMP0, TMP0, LJ_TISTRUECOND 3354 | sltiu TMP0, TMP0, LJ_TISTRUECOND
2562 | ldc1 f0, 0(RD) 3355 | lw SFRETHI, HI(RD)
3356 | lw SFRETLO, LO(RD)
2563 if (op == BC_ISTC) { 3357 if (op == BC_ISTC) {
2564 | beqz TMP0, >1 3358 | beqz TMP0, >1
2565 } else { 3359 } else {
@@ -2569,22 +3363,45 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2569 | decode_RD4b TMP2 3363 | decode_RD4b TMP2
2570 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3364 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2571 | addu TMP2, TMP2, TMP3 3365 | addu TMP2, TMP2, TMP3
2572 | sdc1 f0, 0(RA) 3366 | sw SFRETHI, HI(RA)
3367 | sw SFRETLO, LO(RA)
2573 | addu PC, PC, TMP2 3368 | addu PC, PC, TMP2
2574 |1: 3369 |1:
2575 } 3370 }
2576 | ins_next 3371 | ins_next
2577 break; 3372 break;
2578 3373
3374 case BC_ISTYPE:
3375 | // RA = src*8, RD = -type*8
3376 | addu TMP2, BASE, RA
3377 | srl TMP1, RD, 3
3378 | lw TMP0, HI(TMP2)
3379 | ins_next1
3380 | addu AT, TMP0, TMP1
3381 | bnez AT, ->vmeta_istype
3382 |. ins_next2
3383 break;
3384 case BC_ISNUM:
3385 | // RA = src*8, RD = -(TISNUM-1)*8
3386 | addu TMP2, BASE, RA
3387 | lw TMP0, HI(TMP2)
3388 | ins_next1
3389 | sltiu AT, TMP0, LJ_TISNUM
3390 | beqz AT, ->vmeta_istype
3391 |. ins_next2
3392 break;
3393
2579 /* -- Unary ops --------------------------------------------------------- */ 3394 /* -- Unary ops --------------------------------------------------------- */
2580 3395
2581 case BC_MOV: 3396 case BC_MOV:
2582 | // RA = dst*8, RD = src*8 3397 | // RA = dst*8, RD = src*8
2583 | addu RD, BASE, RD 3398 | addu RD, BASE, RD
2584 | addu RA, BASE, RA 3399 | addu RA, BASE, RA
2585 | ldc1 f0, 0(RD) 3400 | lw SFRETHI, HI(RD)
3401 | lw SFRETLO, LO(RD)
2586 | ins_next1 3402 | ins_next1
2587 | sdc1 f0, 0(RA) 3403 | sw SFRETHI, HI(RA)
3404 | sw SFRETLO, LO(RA)
2588 | ins_next2 3405 | ins_next2
2589 break; 3406 break;
2590 case BC_NOT: 3407 case BC_NOT:
@@ -2601,16 +3418,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2601 break; 3418 break;
2602 case BC_UNM: 3419 case BC_UNM:
2603 | // RA = dst*8, RD = src*8 3420 | // RA = dst*8, RD = src*8
2604 | addu CARG3, BASE, RD 3421 | addu RB, BASE, RD
3422 | lw SFARG1HI, HI(RB)
2605 | addu RA, BASE, RA 3423 | addu RA, BASE, RA
2606 | lw TMP0, HI(CARG3) 3424 | bne SFARG1HI, TISNUM, >2
2607 | ldc1 f0, 0(CARG3) 3425 |. lw SFARG1LO, LO(RB)
2608 | sltiu AT, TMP0, LJ_TISNUM 3426 | lui TMP1, 0x8000
2609 | beqz AT, ->vmeta_unm 3427 | beq SFARG1LO, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
2610 |. neg.d f0, f0 3428 |. negu SFARG1LO, SFARG1LO
3429 |1:
2611 | ins_next1 3430 | ins_next1
2612 | sdc1 f0, 0(RA) 3431 | sw SFARG1HI, HI(RA)
3432 | sw SFARG1LO, LO(RA)
2613 | ins_next2 3433 | ins_next2
3434 |2:
3435 | sltiu AT, SFARG1HI, LJ_TISNUM
3436 | beqz AT, ->vmeta_unm
3437 |. lui TMP1, 0x8000
3438 | b <1
3439 |. xor SFARG1HI, SFARG1HI, TMP1
2614 break; 3440 break;
2615 case BC_LEN: 3441 case BC_LEN:
2616 | // RA = dst*8, RD = src*8 3442 | // RA = dst*8, RD = src*8
@@ -2621,12 +3447,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2621 | li AT, LJ_TSTR 3447 | li AT, LJ_TSTR
2622 | bne TMP0, AT, >2 3448 | bne TMP0, AT, >2
2623 |. li AT, LJ_TTAB 3449 |. li AT, LJ_TTAB
2624 | lw CRET1, STR:CARG1->len 3450 | lw CRET1, STR:CARG1->len
2625 |1: 3451 |1:
2626 | mtc1 CRET1, f0
2627 | cvt.d.w f0, f0
2628 | ins_next1 3452 | ins_next1
2629 | sdc1 f0, 0(RA) 3453 | sw TISNUM, HI(RA)
3454 | sw CRET1, LO(RA)
2630 | ins_next2 3455 | ins_next2
2631 |2: 3456 |2:
2632 | bne TMP0, AT, ->vmeta_len 3457 | bne TMP0, AT, ->vmeta_len
@@ -2657,104 +3482,232 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2657 3482
2658 /* -- Binary ops -------------------------------------------------------- */ 3483 /* -- Binary ops -------------------------------------------------------- */
2659 3484
2660 |.macro ins_arithpre 3485 |.macro fpmod, a, b, c
3486 | bal ->vm_floor // floor(b/c)
3487 |. div.d FARG1, b, c
3488 | mul.d a, FRET1, c
3489 | sub.d a, b, a // b - floor(b/c)*c
3490 |.endmacro
3491
3492 |.macro sfpmod
3493 | addiu sp, sp, -16
3494 |
3495 | load_got __divdf3
3496 | sw SFARG1HI, HI(sp)
3497 | sw SFARG1LO, LO(sp)
3498 | sw SFARG2HI, 8+HI(sp)
3499 | call_extern
3500 |. sw SFARG2LO, 8+LO(sp)
3501 |
3502 | load_got floor
3503 | move SFARG1HI, SFRETHI
3504 | call_extern
3505 |. move SFARG1LO, SFRETLO
3506 |
3507 | load_got __muldf3
3508 | move SFARG1HI, SFRETHI
3509 | move SFARG1LO, SFRETLO
3510 | lw SFARG2HI, 8+HI(sp)
3511 | call_extern
3512 |. lw SFARG2LO, 8+LO(sp)
3513 |
3514 | load_got __subdf3
3515 | lw SFARG1HI, HI(sp)
3516 | lw SFARG1LO, LO(sp)
3517 | move SFARG2HI, SFRETHI
3518 | call_extern
3519 |. move SFARG2LO, SFRETLO
3520 |
3521 | addiu sp, sp, 16
3522 |.endmacro
3523
3524 |.macro ins_arithpre, label
2661 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3525 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2662 | decode_RB8a RB, INS
2663 | decode_RB8b RB
2664 | decode_RDtoRC8 RC, RD
2665 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 3526 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
2666 ||switch (vk) { 3527 ||switch (vk) {
2667 ||case 0: 3528 ||case 0:
2668 | addu CARG3, BASE, RB 3529 | decode_RB8a RB, INS
2669 | addu CARG4, KBASE, RC 3530 | decode_RB8b RB
2670 | lw TMP1, HI(CARG3) 3531 | decode_RDtoRC8 RC, RD
2671 | ldc1 f20, 0(CARG3) 3532 | // RA = dst*8, RB = src1*8, RC = num_const*8
2672 | ldc1 f22, 0(CARG4) 3533 | addu RB, BASE, RB
2673 | sltiu AT, TMP1, LJ_TISNUM 3534 |.if "label" ~= "none"
3535 | b label
3536 |.endif
3537 |. addu RC, KBASE, RC
2674 || break; 3538 || break;
2675 ||case 1: 3539 ||case 1:
2676 | addu CARG4, BASE, RB 3540 | decode_RB8a RC, INS
2677 | addu CARG3, KBASE, RC 3541 | decode_RB8b RC
2678 | lw TMP1, HI(CARG4) 3542 | decode_RDtoRC8 RB, RD
2679 | ldc1 f22, 0(CARG4) 3543 | // RA = dst*8, RB = num_const*8, RC = src1*8
2680 | ldc1 f20, 0(CARG3) 3544 | addu RC, BASE, RC
2681 | sltiu AT, TMP1, LJ_TISNUM 3545 |.if "label" ~= "none"
3546 | b label
3547 |.endif
3548 |. addu RB, KBASE, RB
2682 || break; 3549 || break;
2683 ||default: 3550 ||default:
2684 | addu CARG3, BASE, RB 3551 | decode_RB8a RB, INS
2685 | addu CARG4, BASE, RC 3552 | decode_RB8b RB
2686 | lw TMP1, HI(CARG3) 3553 | decode_RDtoRC8 RC, RD
2687 | lw TMP2, HI(CARG4) 3554 | // RA = dst*8, RB = src1*8, RC = src2*8
2688 | ldc1 f20, 0(CARG3) 3555 | addu RB, BASE, RB
2689 | ldc1 f22, 0(CARG4) 3556 |.if "label" ~= "none"
2690 | sltiu AT, TMP1, LJ_TISNUM 3557 | b label
2691 | sltiu TMP0, TMP2, LJ_TISNUM 3558 |.endif
2692 | and AT, AT, TMP0 3559 |. addu RC, BASE, RC
2693 || break; 3560 || break;
2694 ||} 3561 ||}
2695 | beqz AT, ->vmeta_arith
2696 |. addu RA, BASE, RA
2697 |.endmacro 3562 |.endmacro
2698 | 3563 |
2699 |.macro fpmod, a, b, c 3564 |.macro ins_arith, intins, fpins, fpcall, label
2700 |->BC_MODVN_Z: 3565 | ins_arithpre none
2701 | bal ->vm_floor // floor(b/c)
2702 |. div.d FARG1, b, c
2703 | mul.d a, FRET1, c
2704 | sub.d a, b, a // b - floor(b/c)*c
2705 |.endmacro
2706 | 3566 |
2707 |.macro ins_arith, ins 3567 |.if "label" ~= "none"
2708 | ins_arithpre 3568 |label:
2709 |.if "ins" == "fpmod_" 3569 |.endif
2710 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3570 |
2711 |. nop 3571 | lw SFARG1HI, HI(RB)
3572 | lw SFARG2HI, HI(RC)
3573 |
3574 |.if "intins" ~= "div"
3575 |
3576 | // Check for two integers.
3577 | lw SFARG1LO, LO(RB)
3578 | bne SFARG1HI, TISNUM, >5
3579 |. lw SFARG2LO, LO(RC)
3580 | bne SFARG2HI, TISNUM, >5
3581 |
3582 |.if "intins" == "addu"
3583 |. intins CRET1, SFARG1LO, SFARG2LO
3584 | xor TMP1, CRET1, SFARG1LO // ((y^a) & (y^b)) < 0: overflow.
3585 | xor TMP2, CRET1, SFARG2LO
3586 | and TMP1, TMP1, TMP2
3587 | bltz TMP1, ->vmeta_arith
3588 |. addu RA, BASE, RA
3589 |.elif "intins" == "subu"
3590 |. intins CRET1, SFARG1LO, SFARG2LO
3591 | xor TMP1, CRET1, SFARG1LO // ((y^a) & (a^b)) < 0: overflow.
3592 | xor TMP2, SFARG1LO, SFARG2LO
3593 | and TMP1, TMP1, TMP2
3594 | bltz TMP1, ->vmeta_arith
3595 |. addu RA, BASE, RA
3596 |.elif "intins" == "mult"
3597 |. intins SFARG1LO, SFARG2LO
3598 | mflo CRET1
3599 | mfhi TMP2
3600 | sra TMP1, CRET1, 31
3601 | bne TMP1, TMP2, ->vmeta_arith
3602 |. addu RA, BASE, RA
2712 |.else 3603 |.else
2713 | ins f0, f20, f22 3604 |. load_got lj_vm_modi
3605 | beqz SFARG2LO, ->vmeta_arith
3606 |. addu RA, BASE, RA
3607 |.if ENDIAN_BE
3608 | move CARG1, SFARG1LO
3609 |.endif
3610 | call_extern
3611 |. move CARG2, SFARG2LO
3612 |.endif
3613 |
3614 | ins_next1
3615 | sw TISNUM, HI(RA)
3616 | sw CRET1, LO(RA)
3617 |3:
3618 | ins_next2
3619 |
3620 |.elif not FPU
3621 |
3622 | lw SFARG1LO, LO(RB)
3623 | lw SFARG2LO, LO(RC)
3624 |
3625 |.endif
3626 |
3627 |5: // Check for two numbers.
3628 | .FPU ldc1 f20, 0(RB)
3629 | sltiu AT, SFARG1HI, LJ_TISNUM
3630 | sltiu TMP0, SFARG2HI, LJ_TISNUM
3631 | .FPU ldc1 f22, 0(RC)
3632 | and AT, AT, TMP0
3633 | beqz AT, ->vmeta_arith
3634 |. addu RA, BASE, RA
3635 |
3636 |.if FPU
3637 | fpins FRET1, f20, f22
3638 |.elif "fpcall" == "sfpmod"
3639 | sfpmod
3640 |.else
3641 | load_got fpcall
3642 | call_extern
3643 |. nop
3644 |.endif
3645 |
2714 | ins_next1 3646 | ins_next1
2715 | sdc1 f0, 0(RA) 3647 |.if not FPU
3648 | sw SFRETHI, HI(RA)
3649 |.endif
3650 |.if "intins" ~= "div"
3651 | b <3
3652 |.endif
3653 |.if FPU
3654 |. sdc1 FRET1, 0(RA)
3655 |.else
3656 |. sw SFRETLO, LO(RA)
3657 |.endif
3658 |.if "intins" == "div"
2716 | ins_next2 3659 | ins_next2
2717 |.endif 3660 |.endif
3661 |
2718 |.endmacro 3662 |.endmacro
2719 3663
2720 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3664 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2721 | ins_arith add.d 3665 | ins_arith addu, add.d, __adddf3, none
2722 break; 3666 break;
2723 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3667 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2724 | ins_arith sub.d 3668 | ins_arith subu, sub.d, __subdf3, none
2725 break; 3669 break;
2726 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3670 case BC_MULVN: case BC_MULNV: case BC_MULVV:
2727 | ins_arith mul.d 3671 | ins_arith mult, mul.d, __muldf3, none
3672 break;
3673 case BC_DIVVN:
3674 | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z
2728 break; 3675 break;
2729 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3676 case BC_DIVNV: case BC_DIVVV:
2730 | ins_arith div.d 3677 | ins_arithpre ->BC_DIVVN_Z
2731 break; 3678 break;
2732 case BC_MODVN: 3679 case BC_MODVN:
2733 | ins_arith fpmod 3680 | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z
2734 break; 3681 break;
2735 case BC_MODNV: case BC_MODVV: 3682 case BC_MODNV: case BC_MODVV:
2736 | ins_arith fpmod_ 3683 | ins_arithpre ->BC_MODVN_Z
2737 break; 3684 break;
2738 case BC_POW: 3685 case BC_POW:
2739 | decode_RB8a RB, INS 3686 | ins_arithpre none
2740 | decode_RB8b RB 3687 | lw SFARG1HI, HI(RB)
2741 | decode_RDtoRC8 RC, RD 3688 | lw SFARG2HI, HI(RC)
2742 | addu CARG3, BASE, RB 3689 | sltiu AT, SFARG1HI, LJ_TISNUM
2743 | addu CARG4, BASE, RC 3690 | sltiu TMP0, SFARG2HI, LJ_TISNUM
2744 | lw TMP1, HI(CARG3)
2745 | lw TMP2, HI(CARG4)
2746 | ldc1 FARG1, 0(CARG3)
2747 | ldc1 FARG2, 0(CARG4)
2748 | sltiu AT, TMP1, LJ_TISNUM
2749 | sltiu TMP0, TMP2, LJ_TISNUM
2750 | and AT, AT, TMP0 3691 | and AT, AT, TMP0
2751 | load_got pow 3692 | load_got pow
2752 | beqz AT, ->vmeta_arith 3693 | beqz AT, ->vmeta_arith
2753 |. addu RA, BASE, RA 3694 |. addu RA, BASE, RA
3695 |.if FPU
3696 | ldc1 FARG1, 0(RB)
3697 | ldc1 FARG2, 0(RC)
3698 |.else
3699 | lw SFARG1LO, LO(RB)
3700 | lw SFARG2LO, LO(RC)
3701 |.endif
2754 | call_extern 3702 | call_extern
2755 |. nop 3703 |. nop
2756 | ins_next1 3704 | ins_next1
3705 |.if FPU
2757 | sdc1 FRET1, 0(RA) 3706 | sdc1 FRET1, 0(RA)
3707 |.else
3708 | sw SFRETHI, HI(RA)
3709 | sw SFRETLO, LO(RA)
3710 |.endif
2758 | ins_next2 3711 | ins_next2
2759 break; 3712 break;
2760 3713
@@ -2777,10 +3730,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2777 | bnez CRET1, ->vmeta_binop 3730 | bnez CRET1, ->vmeta_binop
2778 |. lw BASE, L->base 3731 |. lw BASE, L->base
2779 | addu RB, BASE, MULTRES 3732 | addu RB, BASE, MULTRES
2780 | ldc1 f0, 0(RB) 3733 | lw SFRETHI, HI(RB)
3734 | lw SFRETLO, LO(RB)
2781 | addu RA, BASE, RA 3735 | addu RA, BASE, RA
2782 | ins_next1 3736 | ins_next1
2783 | sdc1 f0, 0(RA) // Copy result from RB to RA. 3737 | sw SFRETHI, HI(RA)
3738 | sw SFRETLO, LO(RA)
2784 | ins_next2 3739 | ins_next2
2785 break; 3740 break;
2786 3741
@@ -2815,20 +3770,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2815 case BC_KSHORT: 3770 case BC_KSHORT:
2816 | // RA = dst*8, RD = int16_literal*8 3771 | // RA = dst*8, RD = int16_literal*8
2817 | sra RD, INS, 16 3772 | sra RD, INS, 16
2818 | mtc1 RD, f0
2819 | addu RA, BASE, RA 3773 | addu RA, BASE, RA
2820 | cvt.d.w f0, f0
2821 | ins_next1 3774 | ins_next1
2822 | sdc1 f0, 0(RA) 3775 | sw TISNUM, HI(RA)
3776 | sw RD, LO(RA)
2823 | ins_next2 3777 | ins_next2
2824 break; 3778 break;
2825 case BC_KNUM: 3779 case BC_KNUM:
2826 | // RA = dst*8, RD = num_const*8 3780 | // RA = dst*8, RD = num_const*8
2827 | addu RD, KBASE, RD 3781 | addu RD, KBASE, RD
2828 | addu RA, BASE, RA 3782 | addu RA, BASE, RA
2829 | ldc1 f0, 0(RD) 3783 | lw SFRETHI, HI(RD)
3784 | lw SFRETLO, LO(RD)
2830 | ins_next1 3785 | ins_next1
2831 | sdc1 f0, 0(RA) 3786 | sw SFRETHI, HI(RA)
3787 | sw SFRETLO, LO(RA)
2832 | ins_next2 3788 | ins_next2
2833 break; 3789 break;
2834 case BC_KPRI: 3790 case BC_KPRI:
@@ -2864,9 +3820,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2864 | lw UPVAL:RB, LFUNC:RD->uvptr 3820 | lw UPVAL:RB, LFUNC:RD->uvptr
2865 | ins_next1 3821 | ins_next1
2866 | lw TMP1, UPVAL:RB->v 3822 | lw TMP1, UPVAL:RB->v
2867 | ldc1 f0, 0(TMP1) 3823 | lw SFRETHI, HI(TMP1)
3824 | lw SFRETLO, LO(TMP1)
2868 | addu RA, BASE, RA 3825 | addu RA, BASE, RA
2869 | sdc1 f0, 0(RA) 3826 | sw SFRETHI, HI(RA)
3827 | sw SFRETLO, LO(RA)
2870 | ins_next2 3828 | ins_next2
2871 break; 3829 break;
2872 case BC_USETV: 3830 case BC_USETV:
@@ -2875,26 +3833,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2875 | srl RA, RA, 1 3833 | srl RA, RA, 1
2876 | addu RD, BASE, RD 3834 | addu RD, BASE, RD
2877 | addu RA, RA, LFUNC:RB 3835 | addu RA, RA, LFUNC:RB
2878 | ldc1 f0, 0(RD)
2879 | lw UPVAL:RB, LFUNC:RA->uvptr 3836 | lw UPVAL:RB, LFUNC:RA->uvptr
3837 | lw SFRETHI, HI(RD)
3838 | lw SFRETLO, LO(RD)
2880 | lbu TMP3, UPVAL:RB->marked 3839 | lbu TMP3, UPVAL:RB->marked
2881 | lw CARG2, UPVAL:RB->v 3840 | lw CARG2, UPVAL:RB->v
2882 | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 3841 | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
2883 | lbu TMP0, UPVAL:RB->closed 3842 | lbu TMP0, UPVAL:RB->closed
2884 | lw TMP2, HI(RD) 3843 | sw SFRETHI, HI(CARG2)
2885 | sdc1 f0, 0(CARG2) 3844 | sw SFRETLO, LO(CARG2)
2886 | li AT, LJ_GC_BLACK|1 3845 | li AT, LJ_GC_BLACK|1
2887 | or TMP3, TMP3, TMP0 3846 | or TMP3, TMP3, TMP0
2888 | beq TMP3, AT, >2 // Upvalue is closed and black? 3847 | beq TMP3, AT, >2 // Upvalue is closed and black?
2889 |. addiu TMP2, TMP2, -(LJ_TNUMX+1) 3848 |. addiu TMP2, SFRETHI, -(LJ_TNUMX+1)
2890 |1: 3849 |1:
2891 | ins_next 3850 | ins_next
2892 | 3851 |
2893 |2: // Check if new value is collectable. 3852 |2: // Check if new value is collectable.
2894 | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) 3853 | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
2895 | beqz AT, <1 // tvisgcv(v) 3854 | beqz AT, <1 // tvisgcv(v)
2896 |. lw TMP1, LO(RD) 3855 |. nop
2897 | lbu TMP3, GCOBJ:TMP1->gch.marked 3856 | lbu TMP3, GCOBJ:SFRETLO->gch.marked
2898 | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) 3857 | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
2899 | beqz TMP3, <1 3858 | beqz TMP3, <1
2900 |. load_got lj_gc_barrieruv 3859 |. load_got lj_gc_barrieruv
@@ -2942,11 +3901,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2942 | srl RA, RA, 1 3901 | srl RA, RA, 1
2943 | addu RD, KBASE, RD 3902 | addu RD, KBASE, RD
2944 | addu RA, RA, LFUNC:RB 3903 | addu RA, RA, LFUNC:RB
2945 | ldc1 f0, 0(RD) 3904 | lw UPVAL:RB, LFUNC:RA->uvptr
2946 | lw UPVAL:RB, LFUNC:RA->uvptr 3905 | lw SFRETHI, HI(RD)
3906 | lw SFRETLO, LO(RD)
3907 | lw TMP1, UPVAL:RB->v
2947 | ins_next1 3908 | ins_next1
2948 | lw TMP1, UPVAL:RB->v 3909 | sw SFRETHI, HI(TMP1)
2949 | sdc1 f0, 0(TMP1) 3910 | sw SFRETLO, LO(TMP1)
2950 | ins_next2 3911 | ins_next2
2951 break; 3912 break;
2952 case BC_USETP: 3913 case BC_USETP:
@@ -2956,10 +3917,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2956 | srl TMP0, RD, 3 3917 | srl TMP0, RD, 3
2957 | addu RA, RA, LFUNC:RB 3918 | addu RA, RA, LFUNC:RB
2958 | not TMP0, TMP0 3919 | not TMP0, TMP0
2959 | lw UPVAL:RB, LFUNC:RA->uvptr 3920 | lw UPVAL:RB, LFUNC:RA->uvptr
2960 | ins_next1 3921 | ins_next1
2961 | lw TMP1, UPVAL:RB->v 3922 | lw TMP1, UPVAL:RB->v
2962 | sw TMP0, HI(TMP1) 3923 | sw TMP0, HI(TMP1)
2963 | ins_next2 3924 | ins_next2
2964 break; 3925 break;
2965 3926
@@ -2995,8 +3956,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2995 | li TMP0, LJ_TFUNC 3956 | li TMP0, LJ_TFUNC
2996 | ins_next1 3957 | ins_next1
2997 | addu RA, BASE, RA 3958 | addu RA, BASE, RA
2998 | sw TMP0, HI(RA)
2999 | sw LFUNC:CRET1, LO(RA) 3959 | sw LFUNC:CRET1, LO(RA)
3960 | sw TMP0, HI(RA)
3000 | ins_next2 3961 | ins_next2
3001 break; 3962 break;
3002 3963
@@ -3077,31 +4038,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3077 | lw TMP2, HI(CARG3) 4038 | lw TMP2, HI(CARG3)
3078 | lw TAB:RB, LO(CARG2) 4039 | lw TAB:RB, LO(CARG2)
3079 | li AT, LJ_TTAB 4040 | li AT, LJ_TTAB
3080 | ldc1 f0, 0(CARG3)
3081 | bne TMP1, AT, ->vmeta_tgetv 4041 | bne TMP1, AT, ->vmeta_tgetv
3082 |. addu RA, BASE, RA 4042 |. addu RA, BASE, RA
3083 | sltiu AT, TMP2, LJ_TISNUM 4043 | bne TMP2, TISNUM, >5
3084 | beqz AT, >5 4044 |. lw RC, LO(CARG3)
3085 |. li AT, LJ_TSTR 4045 | lw TMP0, TAB:RB->asize
3086 |
3087 | // Convert number key to integer, check for integerness and range.
3088 | cvt.w.d f2, f0
3089 | lw TMP0, TAB:RB->asize
3090 | mfc1 TMP2, f2
3091 | cvt.d.w f4, f2
3092 | lw TMP1, TAB:RB->array 4046 | lw TMP1, TAB:RB->array
3093 | c.eq.d f0, f4 4047 | sltu AT, RC, TMP0
3094 | sltu AT, TMP2, TMP0 4048 | sll TMP2, RC, 3
3095 | movf AT, r0
3096 | sll TMP2, TMP2, 3
3097 | beqz AT, ->vmeta_tgetv // Integer key and in array part? 4049 | beqz AT, ->vmeta_tgetv // Integer key and in array part?
3098 |. addu TMP2, TMP1, TMP2 4050 |. addu TMP2, TMP1, TMP2
3099 | lw TMP0, HI(TMP2) 4051 | lw SFRETHI, HI(TMP2)
3100 | beq TMP0, TISNIL, >2 4052 | beq SFRETHI, TISNIL, >2
3101 |. ldc1 f0, 0(TMP2) 4053 |. lw SFRETLO, LO(TMP2)
3102 |1: 4054 |1:
3103 | ins_next1 4055 | ins_next1
3104 | sdc1 f0, 0(RA) 4056 | sw SFRETHI, HI(RA)
4057 | sw SFRETLO, LO(RA)
3105 | ins_next2 4058 | ins_next2
3106 | 4059 |
3107 |2: // Check for __index if table value is nil. 4060 |2: // Check for __index if table value is nil.
@@ -3116,8 +4069,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3116 |. nop 4069 |. nop
3117 | 4070 |
3118 |5: 4071 |5:
4072 | li AT, LJ_TSTR
3119 | bne TMP2, AT, ->vmeta_tgetv 4073 | bne TMP2, AT, ->vmeta_tgetv
3120 |. lw STR:RC, LO(CARG3) 4074 |. nop
3121 | b ->BC_TGETS_Z // String key? 4075 | b ->BC_TGETS_Z // String key?
3122 |. nop 4076 |. nop
3123 break; 4077 break;
@@ -3138,9 +4092,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3138 |->BC_TGETS_Z: 4092 |->BC_TGETS_Z:
3139 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 4093 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
3140 | lw TMP0, TAB:RB->hmask 4094 | lw TMP0, TAB:RB->hmask
3141 | lw TMP1, STR:RC->hash 4095 | lw TMP1, STR:RC->sid
3142 | lw NODE:TMP2, TAB:RB->node 4096 | lw NODE:TMP2, TAB:RB->node
3143 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4097 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
3144 | sll TMP0, TMP1, 5 4098 | sll TMP0, TMP1, 5
3145 | sll TMP1, TMP1, 3 4099 | sll TMP1, TMP1, 3
3146 | subu TMP1, TMP0, TMP1 4100 | subu TMP1, TMP0, TMP1
@@ -3149,18 +4103,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3149 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) 4103 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
3150 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 4104 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
3151 | lw NODE:TMP1, NODE:TMP2->next 4105 | lw NODE:TMP1, NODE:TMP2->next
3152 | lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) 4106 | lw SFRETHI, offsetof(Node, val)+HI(NODE:TMP2)
3153 | addiu CARG1, CARG1, -LJ_TSTR 4107 | addiu CARG1, CARG1, -LJ_TSTR
3154 | xor TMP0, TMP0, STR:RC 4108 | xor TMP0, TMP0, STR:RC
3155 | or AT, CARG1, TMP0 4109 | or AT, CARG1, TMP0
3156 | bnez AT, >4 4110 | bnez AT, >4
3157 |. lw TAB:TMP3, TAB:RB->metatable 4111 |. lw TAB:TMP3, TAB:RB->metatable
3158 | beq CARG2, TISNIL, >5 // Key found, but nil value? 4112 | beq SFRETHI, TISNIL, >5 // Key found, but nil value?
3159 |. lw CARG1, offsetof(Node, val)+LO(NODE:TMP2) 4113 |. lw SFRETLO, offsetof(Node, val)+LO(NODE:TMP2)
3160 |3: 4114 |3:
3161 | ins_next1 4115 | ins_next1
3162 | sw CARG2, HI(RA) 4116 | sw SFRETHI, HI(RA)
3163 | sw CARG1, LO(RA) 4117 | sw SFRETLO, LO(RA)
3164 | ins_next2 4118 | ins_next2
3165 | 4119 |
3166 |4: // Follow hash chain. 4120 |4: // Follow hash chain.
@@ -3170,7 +4124,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3170 | 4124 |
3171 |5: // Check for __index if table value is nil. 4125 |5: // Check for __index if table value is nil.
3172 | beqz TAB:TMP3, <3 // No metatable: done. 4126 | beqz TAB:TMP3, <3 // No metatable: done.
3173 |. li CARG2, LJ_TNIL 4127 |. li SFRETHI, LJ_TNIL
3174 | lbu TMP0, TAB:TMP3->nomm 4128 | lbu TMP0, TAB:TMP3->nomm
3175 | andi TMP0, TMP0, 1<<MM_index 4129 | andi TMP0, TMP0, 1<<MM_index
3176 | bnez TMP0, <3 // 'no __index' flag set: done. 4130 | bnez TMP0, <3 // 'no __index' flag set: done.
@@ -3195,12 +4149,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3195 | sltu AT, TMP0, TMP1 4149 | sltu AT, TMP0, TMP1
3196 | beqz AT, ->vmeta_tgetb 4150 | beqz AT, ->vmeta_tgetb
3197 |. addu RC, TMP2, RC 4151 |. addu RC, TMP2, RC
3198 | lw TMP1, HI(RC) 4152 | lw SFRETHI, HI(RC)
3199 | beq TMP1, TISNIL, >5 4153 | beq SFRETHI, TISNIL, >5
3200 |. ldc1 f0, 0(RC) 4154 |. lw SFRETLO, LO(RC)
3201 |1: 4155 |1:
3202 | ins_next1 4156 | ins_next1
3203 | sdc1 f0, 0(RA) 4157 | sw SFRETHI, HI(RA)
4158 | sw SFRETLO, LO(RA)
3204 | ins_next2 4159 | ins_next2
3205 | 4160 |
3206 |5: // Check for __index if table value is nil. 4161 |5: // Check for __index if table value is nil.
@@ -3211,9 +4166,33 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3211 | andi TMP1, TMP1, 1<<MM_index 4166 | andi TMP1, TMP1, 1<<MM_index
3212 | bnez TMP1, <1 // 'no __index' flag set: done. 4167 | bnez TMP1, <1 // 'no __index' flag set: done.
3213 |. nop 4168 |. nop
3214 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4169 | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
3215 |. nop 4170 |. nop
3216 break; 4171 break;
4172 case BC_TGETR:
4173 | // RA = dst*8, RB = table*8, RC = key*8
4174 | decode_RB8a RB, INS
4175 | decode_RB8b RB
4176 | decode_RDtoRC8 RC, RD
4177 | addu RB, BASE, RB
4178 | addu RC, BASE, RC
4179 | lw TAB:CARG1, LO(RB)
4180 | lw CARG2, LO(RC)
4181 | addu RA, BASE, RA
4182 | lw TMP0, TAB:CARG1->asize
4183 | lw TMP1, TAB:CARG1->array
4184 | sltu AT, CARG2, TMP0
4185 | sll TMP2, CARG2, 3
4186 | beqz AT, ->vmeta_tgetr // In array part?
4187 |. addu CRET1, TMP1, TMP2
4188 | lw SFARG2HI, HI(CRET1)
4189 | lw SFARG2LO, LO(CRET1)
4190 |->BC_TGETR_Z:
4191 | ins_next1
4192 | sw SFARG2HI, HI(RA)
4193 | sw SFARG2LO, LO(RA)
4194 | ins_next2
4195 break;
3217 4196
3218 case BC_TSETV: 4197 case BC_TSETV:
3219 | // RA = src*8, RB = table*8, RC = key*8 4198 | // RA = src*8, RB = table*8, RC = key*8
@@ -3226,33 +4205,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3226 | lw TMP2, HI(CARG3) 4205 | lw TMP2, HI(CARG3)
3227 | lw TAB:RB, LO(CARG2) 4206 | lw TAB:RB, LO(CARG2)
3228 | li AT, LJ_TTAB 4207 | li AT, LJ_TTAB
3229 | ldc1 f0, 0(CARG3)
3230 | bne TMP1, AT, ->vmeta_tsetv 4208 | bne TMP1, AT, ->vmeta_tsetv
3231 |. addu RA, BASE, RA 4209 |. addu RA, BASE, RA
3232 | sltiu AT, TMP2, LJ_TISNUM 4210 | bne TMP2, TISNUM, >5
3233 | beqz AT, >5 4211 |. lw RC, LO(CARG3)
3234 |. li AT, LJ_TSTR 4212 | lw TMP0, TAB:RB->asize
3235 |
3236 | // Convert number key to integer, check for integerness and range.
3237 | cvt.w.d f2, f0
3238 | lw TMP0, TAB:RB->asize
3239 | mfc1 TMP2, f2
3240 | cvt.d.w f4, f2
3241 | lw TMP1, TAB:RB->array 4213 | lw TMP1, TAB:RB->array
3242 | c.eq.d f0, f4 4214 | sltu AT, RC, TMP0
3243 | sltu AT, TMP2, TMP0 4215 | sll TMP2, RC, 3
3244 | movf AT, r0
3245 | sll TMP2, TMP2, 3
3246 | beqz AT, ->vmeta_tsetv // Integer key and in array part? 4216 | beqz AT, ->vmeta_tsetv // Integer key and in array part?
3247 |. addu TMP1, TMP1, TMP2 4217 |. addu TMP1, TMP1, TMP2
3248 | lbu TMP3, TAB:RB->marked
3249 | lw TMP0, HI(TMP1) 4218 | lw TMP0, HI(TMP1)
4219 | lbu TMP3, TAB:RB->marked
4220 | lw SFRETHI, HI(RA)
3250 | beq TMP0, TISNIL, >3 4221 | beq TMP0, TISNIL, >3
3251 |. ldc1 f0, 0(RA) 4222 |. lw SFRETLO, LO(RA)
3252 |1: 4223 |1:
3253 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4224 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
3254 | bnez AT, >7 4225 | sw SFRETHI, HI(TMP1)
3255 |. sdc1 f0, 0(TMP1) 4226 | bnez AT, >7
4227 |. sw SFRETLO, LO(TMP1)
3256 |2: 4228 |2:
3257 | ins_next 4229 | ins_next
3258 | 4230 |
@@ -3268,8 +4240,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3268 |. nop 4240 |. nop
3269 | 4241 |
3270 |5: 4242 |5:
4243 | li AT, LJ_TSTR
3271 | bne TMP2, AT, ->vmeta_tsetv 4244 | bne TMP2, AT, ->vmeta_tsetv
3272 |. lw STR:RC, LO(CARG3) 4245 |. nop
3273 | b ->BC_TSETS_Z // String key? 4246 | b ->BC_TSETS_Z // String key?
3274 |. nop 4247 |. nop
3275 | 4248 |
@@ -3293,15 +4266,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3293 |->BC_TSETS_Z: 4266 |->BC_TSETS_Z:
3294 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 4267 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8
3295 | lw TMP0, TAB:RB->hmask 4268 | lw TMP0, TAB:RB->hmask
3296 | lw TMP1, STR:RC->hash 4269 | lw TMP1, STR:RC->sid
3297 | lw NODE:TMP2, TAB:RB->node 4270 | lw NODE:TMP2, TAB:RB->node
3298 | sb r0, TAB:RB->nomm // Clear metamethod cache. 4271 | sb r0, TAB:RB->nomm // Clear metamethod cache.
3299 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4272 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
3300 | sll TMP0, TMP1, 5 4273 | sll TMP0, TMP1, 5
3301 | sll TMP1, TMP1, 3 4274 | sll TMP1, TMP1, 3
3302 | subu TMP1, TMP0, TMP1 4275 | subu TMP1, TMP0, TMP1
3303 | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 4276 | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
4277 |.if FPU
3304 | ldc1 f20, 0(RA) 4278 | ldc1 f20, 0(RA)
4279 |.else
4280 | lw SFRETHI, HI(RA)
4281 | lw SFRETLO, LO(RA)
4282 |.endif
3305 |1: 4283 |1:
3306 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) 4284 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
3307 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 4285 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
@@ -3315,8 +4293,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3315 |. lw TAB:TMP0, TAB:RB->metatable 4293 |. lw TAB:TMP0, TAB:RB->metatable
3316 |2: 4294 |2:
3317 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4295 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4296 |.if FPU
3318 | bnez AT, >7 4297 | bnez AT, >7
3319 |. sdc1 f20, NODE:TMP2->val 4298 |. sdc1 f20, NODE:TMP2->val
4299 |.else
4300 | sw SFRETHI, NODE:TMP2->val.u32.hi
4301 | bnez AT, >7
4302 |. sw SFRETLO, NODE:TMP2->val.u32.lo
4303 |.endif
3320 |3: 4304 |3:
3321 | ins_next 4305 | ins_next
3322 | 4306 |
@@ -3354,8 +4338,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3354 |. move CARG1, L 4338 |. move CARG1, L
3355 | // Returns TValue *. 4339 | // Returns TValue *.
3356 | lw BASE, L->base 4340 | lw BASE, L->base
4341 |.if FPU
3357 | b <3 // No 2nd write barrier needed. 4342 | b <3 // No 2nd write barrier needed.
3358 |. sdc1 f20, 0(CRET1) 4343 |. sdc1 f20, 0(CRET1)
4344 |.else
4345 | lw SFARG1HI, HI(RA)
4346 | lw SFARG1LO, LO(RA)
4347 | sw SFARG1HI, HI(CRET1)
4348 | b <3 // No 2nd write barrier needed.
4349 |. sw SFARG1LO, LO(CRET1)
4350 |.endif
3359 | 4351 |
3360 |7: // Possible table write barrier for the value. Skip valiswhite check. 4352 |7: // Possible table write barrier for the value. Skip valiswhite check.
3361 | barrierback TAB:RB, TMP3, TMP0, <3 4353 | barrierback TAB:RB, TMP3, TMP0, <3
@@ -3380,11 +4372,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3380 | lw TMP1, HI(RC) 4372 | lw TMP1, HI(RC)
3381 | lbu TMP3, TAB:RB->marked 4373 | lbu TMP3, TAB:RB->marked
3382 | beq TMP1, TISNIL, >5 4374 | beq TMP1, TISNIL, >5
3383 |. ldc1 f0, 0(RA)
3384 |1: 4375 |1:
4376 |. lw SFRETHI, HI(RA)
4377 | lw SFRETLO, LO(RA)
3385 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4378 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4379 | sw SFRETHI, HI(RC)
3386 | bnez AT, >7 4380 | bnez AT, >7
3387 |. sdc1 f0, 0(RC) 4381 |. sw SFRETLO, LO(RC)
3388 |2: 4382 |2:
3389 | ins_next 4383 | ins_next
3390 | 4384 |
@@ -3396,12 +4390,43 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3396 | andi TMP1, TMP1, 1<<MM_newindex 4390 | andi TMP1, TMP1, 1<<MM_newindex
3397 | bnez TMP1, <1 // 'no __newindex' flag set: done. 4391 | bnez TMP1, <1 // 'no __newindex' flag set: done.
3398 |. nop 4392 |. nop
3399 | b ->vmeta_tsetb // Caveat: preserve TMP0! 4393 | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2!
3400 |. nop 4394 |. nop
3401 | 4395 |
3402 |7: // Possible table write barrier for the value. Skip valiswhite check. 4396 |7: // Possible table write barrier for the value. Skip valiswhite check.
3403 | barrierback TAB:RB, TMP3, TMP0, <2 4397 | barrierback TAB:RB, TMP3, TMP0, <2
3404 break; 4398 break;
4399 case BC_TSETR:
4400 | // RA = dst*8, RB = table*8, RC = key*8
4401 | decode_RB8a RB, INS
4402 | decode_RB8b RB
4403 | decode_RDtoRC8 RC, RD
4404 | addu CARG1, BASE, RB
4405 | addu CARG3, BASE, RC
4406 | lw TAB:CARG2, LO(CARG1)
4407 | lw CARG3, LO(CARG3)
4408 | lbu TMP3, TAB:CARG2->marked
4409 | lw TMP0, TAB:CARG2->asize
4410 | lw TMP1, TAB:CARG2->array
4411 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4412 | bnez AT, >7
4413 |. addu RA, BASE, RA
4414 |2:
4415 | sltu AT, CARG3, TMP0
4416 | sll TMP2, CARG3, 3
4417 | beqz AT, ->vmeta_tsetr // In array part?
4418 |. addu CRET1, TMP1, TMP2
4419 |->BC_TSETR_Z:
4420 | lw SFARG1HI, HI(RA)
4421 | lw SFARG1LO, LO(RA)
4422 | ins_next1
4423 | sw SFARG1HI, HI(CRET1)
4424 | sw SFARG1LO, LO(CRET1)
4425 | ins_next2
4426 |
4427 |7: // Possible table write barrier for the value. Skip valiswhite check.
4428 | barrierback TAB:CARG2, TMP3, CRET1, <2
4429 break;
3405 4430
3406 case BC_TSETM: 4431 case BC_TSETM:
3407 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4432 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -3424,10 +4449,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3424 | addu TMP1, TMP1, CARG1 4449 | addu TMP1, TMP1, CARG1
3425 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4450 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
3426 |3: // Copy result slots to table. 4451 |3: // Copy result slots to table.
3427 | ldc1 f0, 0(RA) 4452 | lw SFRETHI, HI(RA)
4453 | lw SFRETLO, LO(RA)
3428 | addiu RA, RA, 8 4454 | addiu RA, RA, 8
3429 | sltu AT, RA, TMP2 4455 | sltu AT, RA, TMP2
3430 | sdc1 f0, 0(TMP1) 4456 | sw SFRETHI, HI(TMP1)
4457 | sw SFRETLO, LO(TMP1)
3431 | bnez AT, <3 4458 | bnez AT, <3
3432 |. addiu TMP1, TMP1, 8 4459 |. addiu TMP1, TMP1, 8
3433 | bnez TMP0, >7 4460 | bnez TMP0, >7
@@ -3502,10 +4529,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3502 | beqz NARGS8:RC, >3 4529 | beqz NARGS8:RC, >3
3503 |. move TMP3, NARGS8:RC 4530 |. move TMP3, NARGS8:RC
3504 |2: 4531 |2:
3505 | ldc1 f0, 0(RA) 4532 | lw SFRETHI, HI(RA)
4533 | lw SFRETLO, LO(RA)
3506 | addiu RA, RA, 8 4534 | addiu RA, RA, 8
3507 | addiu TMP3, TMP3, -8 4535 | addiu TMP3, TMP3, -8
3508 | sdc1 f0, 0(TMP2) 4536 | sw SFRETHI, HI(TMP2)
4537 | sw SFRETLO, LO(TMP2)
3509 | bnez TMP3, <2 4538 | bnez TMP3, <2
3510 |. addiu TMP2, TMP2, 8 4539 |. addiu TMP2, TMP2, 8
3511 |3: 4540 |3:
@@ -3542,12 +4571,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3542 | li AT, LJ_TFUNC 4571 | li AT, LJ_TFUNC
3543 | lw TMP1, -24+HI(BASE) 4572 | lw TMP1, -24+HI(BASE)
3544 | lw LFUNC:RB, -24+LO(BASE) 4573 | lw LFUNC:RB, -24+LO(BASE)
3545 | ldc1 f2, -8(BASE) 4574 | lw SFARG1HI, -16+HI(BASE)
3546 | ldc1 f0, -16(BASE) 4575 | lw SFARG1LO, -16+LO(BASE)
4576 | lw SFARG2HI, -8+HI(BASE)
4577 | lw SFARG2LO, -8+LO(BASE)
3547 | sw TMP1, HI(BASE) // Copy callable. 4578 | sw TMP1, HI(BASE) // Copy callable.
3548 | sw LFUNC:RB, LO(BASE) 4579 | sw LFUNC:RB, LO(BASE)
3549 | sdc1 f2, 16(BASE) // Copy control var. 4580 | sw SFARG1HI, 8+HI(BASE) // Copy state.
3550 | sdc1 f0, 8(BASE) // Copy state. 4581 | sw SFARG1LO, 8+LO(BASE)
4582 | sw SFARG2HI, 16+HI(BASE) // Copy control var.
4583 | sw SFARG2LO, 16+LO(BASE)
3551 | addiu BASE, BASE, 8 4584 | addiu BASE, BASE, 8
3552 | bne TMP1, AT, ->vmeta_call 4585 | bne TMP1, AT, ->vmeta_call
3553 |. li NARGS8:RC, 16 // Iterators get 2 arguments. 4586 |. li NARGS8:RC, 16 // Iterators get 2 arguments.
@@ -3555,10 +4588,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3555 break; 4588 break;
3556 4589
3557 case BC_ITERN: 4590 case BC_ITERN:
3558 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) 4591 |.if JIT and ENDIAN_LE
3559 |.if JIT 4592 | hotloop
3560 | // NYI: add hotloop, record BC_ITERN.
3561 |.endif 4593 |.endif
4594 |->vm_IITERN:
4595 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
3562 | addu RA, BASE, RA 4596 | addu RA, BASE, RA
3563 | lw TAB:RB, -16+LO(RA) 4597 | lw TAB:RB, -16+LO(RA)
3564 | lw RC, -8+LO(RA) // Get index from control var. 4598 | lw RC, -8+LO(RA) // Get index from control var.
@@ -3570,20 +4604,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3570 | beqz AT, >5 // Index points after array part? 4604 | beqz AT, >5 // Index points after array part?
3571 |. sll TMP3, RC, 3 4605 |. sll TMP3, RC, 3
3572 | addu TMP3, TMP1, TMP3 4606 | addu TMP3, TMP1, TMP3
3573 | lw TMP2, HI(TMP3) 4607 | lw SFARG1HI, HI(TMP3)
3574 | ldc1 f0, 0(TMP3) 4608 | lw SFARG1LO, LO(TMP3)
3575 | mtc1 RC, f2
3576 | lhu RD, -4+OFS_RD(PC) 4609 | lhu RD, -4+OFS_RD(PC)
3577 | beq TMP2, TISNIL, <1 // Skip holes in array part. 4610 | sw TISNUM, HI(RA)
4611 | sw RC, LO(RA)
4612 | beq SFARG1HI, TISNIL, <1 // Skip holes in array part.
3578 |. addiu RC, RC, 1 4613 |. addiu RC, RC, 1
3579 | cvt.d.w f2, f2 4614 | sw SFARG1HI, 8+HI(RA)
4615 | sw SFARG1LO, 8+LO(RA)
3580 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 4616 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3581 | sdc1 f0, 8(RA)
3582 | decode_RD4b RD 4617 | decode_RD4b RD
3583 | addu RD, RD, TMP3 4618 | addu RD, RD, TMP3
3584 | sw RC, -8+LO(RA) // Update control var. 4619 | sw RC, -8+LO(RA) // Update control var.
3585 | addu PC, PC, RD 4620 | addu PC, PC, RD
3586 | sdc1 f2, 0(RA)
3587 |3: 4621 |3:
3588 | ins_next 4622 | ins_next
3589 | 4623 |
@@ -3598,18 +4632,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3598 | sll RB, RC, 3 4632 | sll RB, RC, 3
3599 | subu TMP3, TMP3, RB 4633 | subu TMP3, TMP3, RB
3600 | addu NODE:TMP3, TMP3, TMP2 4634 | addu NODE:TMP3, TMP3, TMP2
3601 | lw RB, HI(NODE:TMP3) 4635 | lw SFARG1HI, NODE:TMP3->val.u32.hi
3602 | ldc1 f0, 0(NODE:TMP3) 4636 | lw SFARG1LO, NODE:TMP3->val.u32.lo
3603 | lhu RD, -4+OFS_RD(PC) 4637 | lhu RD, -4+OFS_RD(PC)
3604 | beq RB, TISNIL, <6 // Skip holes in hash part. 4638 | beq SFARG1HI, TISNIL, <6 // Skip holes in hash part.
3605 |. addiu RC, RC, 1 4639 |. addiu RC, RC, 1
3606 | ldc1 f2, NODE:TMP3->key 4640 | lw SFARG2HI, NODE:TMP3->key.u32.hi
4641 | lw SFARG2LO, NODE:TMP3->key.u32.lo
3607 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 4642 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3608 | sdc1 f0, 8(RA) 4643 | sw SFARG1HI, 8+HI(RA)
4644 | sw SFARG1LO, 8+LO(RA)
3609 | addu RC, RC, TMP0 4645 | addu RC, RC, TMP0
3610 | decode_RD4b RD 4646 | decode_RD4b RD
3611 | addu RD, RD, TMP3 4647 | addu RD, RD, TMP3
3612 | sdc1 f2, 0(RA) 4648 | sw SFARG2HI, HI(RA)
4649 | sw SFARG2LO, LO(RA)
3613 | addu PC, PC, RD 4650 | addu PC, PC, RD
3614 | b <3 4651 | b <3
3615 |. sw RC, -8+LO(RA) // Update control var. 4652 |. sw RC, -8+LO(RA) // Update control var.
@@ -3634,9 +4671,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3634 | addiu CARG2, CARG2, -FF_next_N 4671 | addiu CARG2, CARG2, -FF_next_N
3635 | or CARG2, CARG2, CARG3 4672 | or CARG2, CARG2, CARG3
3636 | bnez CARG2, >5 4673 | bnez CARG2, >5
3637 |. lui TMP1, 0xfffe 4674 |. lui TMP1, (LJ_KEYINDEX >> 16)
3638 | addu PC, TMP0, TMP2 4675 | addu PC, TMP0, TMP2
3639 | ori TMP1, TMP1, 0x7fff 4676 | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff)
3640 | sw r0, -8+LO(RA) // Initialize control var. 4677 | sw r0, -8+LO(RA) // Initialize control var.
3641 | sw TMP1, -8+HI(RA) 4678 | sw TMP1, -8+HI(RA)
3642 |1: 4679 |1:
@@ -3645,9 +4682,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3645 | li TMP3, BC_JMP 4682 | li TMP3, BC_JMP
3646 | li TMP1, BC_ITERC 4683 | li TMP1, BC_ITERC
3647 | sb TMP3, -4+OFS_OP(PC) 4684 | sb TMP3, -4+OFS_OP(PC)
3648 | addu PC, TMP0, TMP2 4685 | addu PC, TMP0, TMP2
4686 |.if JIT
4687 | lb TMP0, OFS_OP(PC)
4688 | li AT, BC_ITERN
4689 | bne TMP0, AT, >6
4690 |. lhu TMP2, OFS_RD(PC)
4691 |.endif
3649 | b <1 4692 | b <1
3650 |. sb TMP1, OFS_OP(PC) 4693 |. sb TMP1, OFS_OP(PC)
4694 |.if JIT
4695 |6: // Unpatch JLOOP.
4696 | lw TMP0, DISPATCH_J(trace)(DISPATCH)
4697 | sll TMP2, TMP2, 2
4698 | addu TMP0, TMP0, TMP2
4699 | lw TRACE:TMP2, 0(TMP0)
4700 | lw TMP0, TRACE:TMP2->startins
4701 | li AT, -256
4702 | and TMP0, TMP0, AT
4703 | or TMP0, TMP0, TMP1
4704 | b <1
4705 |. sw TMP0, 0(PC)
4706 |.endif
3651 break; 4707 break;
3652 4708
3653 case BC_VARG: 4709 case BC_VARG:
@@ -3689,9 +4745,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3689 | bnez AT, >7 4745 | bnez AT, >7
3690 |. addiu MULTRES, TMP1, 8 4746 |. addiu MULTRES, TMP1, 8
3691 |6: 4747 |6:
3692 | ldc1 f0, 0(RC) 4748 | lw SFRETHI, HI(RC)
4749 | lw SFRETLO, LO(RC)
3693 | addiu RC, RC, 8 4750 | addiu RC, RC, 8
3694 | sdc1 f0, 0(RA) 4751 | sw SFRETHI, HI(RA)
4752 | sw SFRETLO, LO(RA)
3695 | sltu AT, RC, TMP3 4753 | sltu AT, RC, TMP3
3696 | bnez AT, <6 // More vararg slots? 4754 | bnez AT, <6 // More vararg slots?
3697 |. addiu RA, RA, 8 4755 |. addiu RA, RA, 8
@@ -3747,10 +4805,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3747 | beqz RC, >3 4805 | beqz RC, >3
3748 |. subu BASE, TMP2, TMP0 4806 |. subu BASE, TMP2, TMP0
3749 |2: 4807 |2:
3750 | ldc1 f0, 0(RA) 4808 | lw SFRETHI, HI(RA)
4809 | lw SFRETLO, LO(RA)
3751 | addiu RA, RA, 8 4810 | addiu RA, RA, 8
3752 | addiu RC, RC, -8 4811 | addiu RC, RC, -8
3753 | sdc1 f0, 0(TMP2) 4812 | sw SFRETHI, HI(TMP2)
4813 | sw SFRETLO, LO(TMP2)
3754 | bnez RC, <2 4814 | bnez RC, <2
3755 |. addiu TMP2, TMP2, 8 4815 |. addiu TMP2, TMP2, 8
3756 |3: 4816 |3:
@@ -3791,14 +4851,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3791 | lw INS, -4(PC) 4851 | lw INS, -4(PC)
3792 | addiu TMP2, BASE, -8 4852 | addiu TMP2, BASE, -8
3793 if (op == BC_RET1) { 4853 if (op == BC_RET1) {
3794 | ldc1 f0, 0(RA) 4854 | lw SFRETHI, HI(RA)
4855 | lw SFRETLO, LO(RA)
3795 } 4856 }
3796 | decode_RB8a RB, INS 4857 | decode_RB8a RB, INS
3797 | decode_RA8a RA, INS 4858 | decode_RA8a RA, INS
3798 | decode_RB8b RB 4859 | decode_RB8b RB
3799 | decode_RA8b RA 4860 | decode_RA8b RA
3800 if (op == BC_RET1) { 4861 if (op == BC_RET1) {
3801 | sdc1 f0, 0(TMP2) 4862 | sw SFRETHI, HI(TMP2)
4863 | sw SFRETLO, LO(TMP2)
3802 } 4864 }
3803 | subu BASE, TMP2, RA 4865 | subu BASE, TMP2, RA
3804 |5: 4866 |5:
@@ -3840,69 +4902,147 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3840 | // RA = base*8, RD = target (after end of loop or start of loop) 4902 | // RA = base*8, RD = target (after end of loop or start of loop)
3841 vk = (op == BC_IFORL || op == BC_JFORL); 4903 vk = (op == BC_IFORL || op == BC_JFORL);
3842 | addu RA, BASE, RA 4904 | addu RA, BASE, RA
3843 if (vk) { 4905 | lw SFARG1HI, FORL_IDX*8+HI(RA)
3844 | ldc1 f0, FORL_IDX*8(RA) 4906 | lw SFARG1LO, FORL_IDX*8+LO(RA)
3845 | ldc1 f4, FORL_STEP*8(RA)
3846 | ldc1 f2, FORL_STOP*8(RA)
3847 | lw TMP3, FORL_STEP*8+HI(RA)
3848 | add.d f0, f0, f4
3849 | sdc1 f0, FORL_IDX*8(RA)
3850 } else {
3851 | lw TMP1, FORL_IDX*8+HI(RA)
3852 | lw TMP3, FORL_STEP*8+HI(RA)
3853 | lw TMP2, FORL_STOP*8+HI(RA)
3854 | sltiu TMP1, TMP1, LJ_TISNUM
3855 | sltiu TMP0, TMP3, LJ_TISNUM
3856 | sltiu TMP2, TMP2, LJ_TISNUM
3857 | and TMP1, TMP1, TMP0
3858 | and TMP1, TMP1, TMP2
3859 | ldc1 f0, FORL_IDX*8(RA)
3860 | beqz TMP1, ->vmeta_for
3861 |. ldc1 f2, FORL_STOP*8(RA)
3862 }
3863 if (op != BC_JFORL) { 4907 if (op != BC_JFORL) {
3864 | srl RD, RD, 1 4908 | srl RD, RD, 1
3865 | lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535) 4909 | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
4910 | addu TMP2, RD, TMP2
4911 }
4912 if (!vk) {
4913 | lw SFARG2HI, FORL_STOP*8+HI(RA)
4914 | lw SFARG2LO, FORL_STOP*8+LO(RA)
4915 | bne SFARG1HI, TISNUM, >5
4916 |. lw SFRETHI, FORL_STEP*8+HI(RA)
4917 | xor AT, SFARG2HI, TISNUM
4918 | lw SFRETLO, FORL_STEP*8+LO(RA)
4919 | xor TMP0, SFRETHI, TISNUM
4920 | or AT, AT, TMP0
4921 | bnez AT, ->vmeta_for
4922 |. slt AT, SFRETLO, r0
4923 | slt CRET1, SFARG2LO, SFARG1LO
4924 | slt TMP1, SFARG1LO, SFARG2LO
4925 | movn CRET1, TMP1, AT
4926 } else {
4927 | bne SFARG1HI, TISNUM, >5
4928 |. lw SFARG2LO, FORL_STEP*8+LO(RA)
4929 | lw SFRETLO, FORL_STOP*8+LO(RA)
4930 | move TMP3, SFARG1LO
4931 | addu SFARG1LO, SFARG1LO, SFARG2LO
4932 | xor TMP0, SFARG1LO, TMP3
4933 | xor TMP1, SFARG1LO, SFARG2LO
4934 | and TMP0, TMP0, TMP1
4935 | slt TMP1, SFARG1LO, SFRETLO
4936 | slt CRET1, SFRETLO, SFARG1LO
4937 | slt AT, SFARG2LO, r0
4938 | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow.
4939 | movn CRET1, TMP1, AT
4940 | or CRET1, CRET1, TMP0
4941 }
4942 |1:
4943 if (op == BC_FORI) {
4944 | movz TMP2, r0, CRET1
4945 | addu PC, PC, TMP2
4946 } else if (op == BC_JFORI) {
4947 | addu PC, PC, TMP2
4948 | lhu RD, -4+OFS_RD(PC)
4949 } else if (op == BC_IFORL) {
4950 | movn TMP2, r0, CRET1
4951 | addu PC, PC, TMP2
4952 }
4953 if (vk) {
4954 | sw SFARG1HI, FORL_IDX*8+HI(RA)
4955 | sw SFARG1LO, FORL_IDX*8+LO(RA)
3866 } 4956 }
3867 | c.le.d 0, f0, f2 4957 | ins_next1
3868 | c.le.d 1, f2, f0 4958 | sw SFARG1HI, FORL_EXT*8+HI(RA)
3869 | sdc1 f0, FORL_EXT*8(RA) 4959 | sw SFARG1LO, FORL_EXT*8+LO(RA)
4960 |2:
3870 if (op == BC_JFORI) { 4961 if (op == BC_JFORI) {
3871 | li TMP1, 1 4962 | beqz CRET1, =>BC_JLOOP
3872 | li TMP2, 1
3873 | addu TMP0, RD, TMP0
3874 | slt TMP3, TMP3, r0
3875 | movf TMP1, r0, 0
3876 | addu PC, PC, TMP0
3877 | movf TMP2, r0, 1
3878 | lhu RD, -4+OFS_RD(PC)
3879 | movn TMP1, TMP2, TMP3
3880 | bnez TMP1, =>BC_JLOOP
3881 |. decode_RD8b RD 4963 |. decode_RD8b RD
3882 } else if (op == BC_JFORL) { 4964 } else if (op == BC_JFORL) {
3883 | li TMP1, 1 4965 | beqz CRET1, =>BC_JLOOP
3884 | li TMP2, 1 4966 }
3885 | slt TMP3, TMP3, r0 4967 | ins_next2
3886 | movf TMP1, r0, 0 4968 |
3887 | movf TMP2, r0, 1 4969 |5: // FP loop.
3888 | movn TMP1, TMP2, TMP3 4970 |.if FPU
3889 | bnez TMP1, =>BC_JLOOP 4971 if (!vk) {
4972 | ldc1 f0, FORL_IDX*8(RA)
4973 | ldc1 f2, FORL_STOP*8(RA)
4974 | sltiu TMP0, SFARG1HI, LJ_TISNUM
4975 | sltiu TMP1, SFARG2HI, LJ_TISNUM
4976 | sltiu AT, SFRETHI, LJ_TISNUM
4977 | and TMP0, TMP0, TMP1
4978 | and AT, AT, TMP0
4979 | beqz AT, ->vmeta_for
4980 |. slt TMP3, SFRETHI, r0
4981 | c.ole.d 0, f0, f2
4982 | c.ole.d 1, f2, f0
4983 | li CRET1, 1
4984 | movt CRET1, r0, 0
4985 | movt AT, r0, 1
4986 | b <1
4987 |. movn CRET1, AT, TMP3
4988 } else {
4989 | ldc1 f0, FORL_IDX*8(RA)
4990 | ldc1 f4, FORL_STEP*8(RA)
4991 | ldc1 f2, FORL_STOP*8(RA)
4992 | lw SFARG2HI, FORL_STEP*8+HI(RA)
4993 | add.d f0, f0, f4
4994 | c.ole.d 0, f0, f2
4995 | c.ole.d 1, f2, f0
4996 | slt TMP3, SFARG2HI, r0
4997 | li CRET1, 1
4998 | li AT, 1
4999 | movt CRET1, r0, 0
5000 | movt AT, r0, 1
5001 | movn CRET1, AT, TMP3
5002 if (op == BC_IFORL) {
5003 | movn TMP2, r0, CRET1
5004 | addu PC, PC, TMP2
5005 }
5006 | sdc1 f0, FORL_IDX*8(RA)
5007 | ins_next1
5008 | b <2
5009 |. sdc1 f0, FORL_EXT*8(RA)
5010 }
5011 |.else
5012 if (!vk) {
5013 | sltiu TMP0, SFARG1HI, LJ_TISNUM
5014 | sltiu TMP1, SFARG2HI, LJ_TISNUM
5015 | sltiu AT, SFRETHI, LJ_TISNUM
5016 | and TMP0, TMP0, TMP1
5017 | and AT, AT, TMP0
5018 | beqz AT, ->vmeta_for
5019 |. nop
5020 | bal ->vm_sfcmpolex
5021 |. move TMP3, SFRETHI
5022 | b <1
3890 |. nop 5023 |. nop
3891 } else { 5024 } else {
3892 | addu TMP1, RD, TMP0 5025 | lw SFARG2HI, FORL_STEP*8+HI(RA)
3893 | slt TMP3, TMP3, r0 5026 | load_got __adddf3
3894 | move TMP2, TMP1 5027 | call_extern
3895 if (op == BC_FORI) { 5028 |. sw TMP2, ARG5
3896 | movt TMP1, r0, 0 5029 | lw SFARG2HI, FORL_STOP*8+HI(RA)
3897 | movt TMP2, r0, 1 5030 | lw SFARG2LO, FORL_STOP*8+LO(RA)
5031 | move SFARG1HI, SFRETHI
5032 | move SFARG1LO, SFRETLO
5033 | bal ->vm_sfcmpolex
5034 |. lw TMP3, FORL_STEP*8+HI(RA)
5035 if ( op == BC_JFORL ) {
5036 | lhu RD, -4+OFS_RD(PC)
5037 | lw TMP2, ARG5
5038 | b <1
5039 |. decode_RD8b RD
3898 } else { 5040 } else {
3899 | movf TMP1, r0, 0 5041 | b <1
3900 | movf TMP2, r0, 1 5042 |. lw TMP2, ARG5
3901 } 5043 }
3902 | movn TMP1, TMP2, TMP3
3903 | addu PC, PC, TMP1
3904 } 5044 }
3905 | ins_next 5045 |.endif
3906 break; 5046 break;
3907 5047
3908 case BC_ITERL: 5048 case BC_ITERL:
@@ -3961,8 +5101,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3961 | sw AT, DISPATCH_GL(vmstate)(DISPATCH) 5101 | sw AT, DISPATCH_GL(vmstate)(DISPATCH)
3962 | lw TRACE:TMP2, 0(TMP1) 5102 | lw TRACE:TMP2, 0(TMP1)
3963 | sw BASE, DISPATCH_GL(jit_base)(DISPATCH) 5103 | sw BASE, DISPATCH_GL(jit_base)(DISPATCH)
3964 | sw L, DISPATCH_GL(jit_L)(DISPATCH)
3965 | lw TMP2, TRACE:TMP2->mcode 5104 | lw TMP2, TRACE:TMP2->mcode
5105 | sw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
3966 | jr TMP2 5106 | jr TMP2
3967 |. addiu JGL, DISPATCH, GG_DISP2G+32768 5107 |. addiu JGL, DISPATCH, GG_DISP2G+32768
3968 |.endif 5108 |.endif
@@ -4088,6 +5228,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4088 | li_vmstate INTERP 5228 | li_vmstate INTERP
4089 | lw PC, FRAME_PC(BASE) // Fetch PC of caller. 5229 | lw PC, FRAME_PC(BASE) // Fetch PC of caller.
4090 | subu RA, TMP1, RD // RA = L->top - nresults*8 5230 | subu RA, TMP1, RD // RA = L->top - nresults*8
5231 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
4091 | b ->vm_returnc 5232 | b ->vm_returnc
4092 |. st_vmstate 5233 |. st_vmstate
4093 break; 5234 break;
@@ -4150,8 +5291,10 @@ static void emit_asm_debug(BuildCtx *ctx)
4150 fcofs, CFRAME_SIZE); 5291 fcofs, CFRAME_SIZE);
4151 for (i = 23; i >= 16; i--) 5292 for (i = 23; i >= 16; i--)
4152 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); 5293 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
5294#if !LJ_SOFTFP
4153 for (i = 30; i >= 20; i -= 2) 5295 for (i = 30; i >= 20; i -= 2)
4154 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); 5296 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
5297#endif
4155 fprintf(ctx->fp, 5298 fprintf(ctx->fp,
4156 "\t.align 2\n" 5299 "\t.align 2\n"
4157 ".LEFDE0:\n\n"); 5300 ".LEFDE0:\n\n");
@@ -4203,8 +5346,10 @@ static void emit_asm_debug(BuildCtx *ctx)
4203 fcofs, CFRAME_SIZE); 5346 fcofs, CFRAME_SIZE);
4204 for (i = 23; i >= 16; i--) 5347 for (i = 23; i >= 16; i--)
4205 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); 5348 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
5349#if !LJ_SOFTFP
4206 for (i = 30; i >= 20; i -= 2) 5350 for (i = 30; i >= 20; i -= 2)
4207 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); 5351 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
5352#endif
4208 fprintf(ctx->fp, 5353 fprintf(ctx->fp,
4209 "\t.align 2\n" 5354 "\t.align 2\n"
4210 ".LEFDE2:\n\n"); 5355 ".LEFDE2:\n\n");