aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2015-12-17 22:42:20 +0100
committerMike Pall <mike>2015-12-17 22:42:20 +0100
commit3f5c72421e282a2a4d8614064f13097678b80be1 (patch)
treeca2ddfad89bad2085b2c0660d5eed67a8d218c8d
parent126e55d416ad10dc9265593b73b9f322dbf9d658 (diff)
downloadluajit-3f5c72421e282a2a4d8614064f13097678b80be1.tar.gz
luajit-3f5c72421e282a2a4d8614064f13097678b80be1.tar.bz2
luajit-3f5c72421e282a2a4d8614064f13097678b80be1.zip
MIPS soft-float, part 1: Add soft-float support to interpreter.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. Sponsored by Cisco Systems, Inc.
-rw-r--r--src/lj_arch.h10
-rw-r--r--src/lj_dispatch.h18
-rw-r--r--src/lj_frame.h11
-rw-r--r--src/lj_ircall.h16
-rw-r--r--src/lj_vm.h2
-rw-r--r--src/vm_mips.dasc1356
6 files changed, 1189 insertions, 224 deletions
diff --git a/src/lj_arch.h b/src/lj_arch.h
index c66a11c8..a114bdda 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -304,6 +304,13 @@
304#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ 304#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
305#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE 305#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE
306 306
307#if !defined(LJ_ARCH_HASFPU) && defined(__mips_soft_float)
308#define LJ_ARCH_HASFPU 0
309#endif
310#if !defined(LJ_ABI_SOFTFP) && defined(__mips_soft_float)
311#define LJ_ABI_SOFTFP 1
312#endif
313
307#if _MIPS_ARCH_MIPS32R2 314#if _MIPS_ARCH_MIPS32R2
308#define LJ_ARCH_VERSION 20 315#define LJ_ARCH_VERSION 20
309#else 316#else
@@ -386,9 +393,6 @@
386#error "No support for PPC/e500 anymore (use LuaJIT 2.0)" 393#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
387#endif 394#endif
388#elif LJ_TARGET_MIPS 395#elif LJ_TARGET_MIPS
389#if defined(__mips_soft_float)
390#error "No support for MIPS CPUs without FPU"
391#endif
392#if defined(_LP64) 396#if defined(_LP64)
393#error "No support for MIPS64" 397#error "No support for MIPS64"
394#endif 398#endif
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
index 1e247e38..73d00ec0 100644
--- a/src/lj_dispatch.h
+++ b/src/lj_dispatch.h
@@ -14,6 +14,21 @@
14 14
15#if LJ_TARGET_MIPS 15#if LJ_TARGET_MIPS
16/* Need our own global offset table for the dreaded MIPS calling conventions. */ 16/* Need our own global offset table for the dreaded MIPS calling conventions. */
17#if LJ_SOFTFP
18extern double __adddf3(double a, double b);
19extern double __subdf3(double a, double b);
20extern double __muldf3(double a, double b);
21extern double __divdf3(double a, double b);
22extern void __ledf2(double a, double b);
23extern double __floatsidf(int32_t a);
24extern int32_t __fixdfsi(double a);
25
26#define SFGOTDEF(_) \
27 _(lj_num2bit) _(sqrt) _(__adddf3) _(__subdf3) _(__muldf3) _(__divdf3) _(__ledf2) \
28 _(__floatsidf) _(__fixdfsi)
29#else
30#define SFGOTDEF(_)
31#endif
17#if LJ_HASJIT 32#if LJ_HASJIT
18#define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot) 33#define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot)
19#else 34#else
@@ -39,7 +54,8 @@
39 _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \ 54 _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \
40 _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ 55 _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \
41 _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \ 56 _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \
42 _(lj_buf_putstr_upper) _(lj_buf_tostr) JITGOTDEF(_) FFIGOTDEF(_) 57 _(lj_buf_putstr_upper) _(lj_buf_tostr) \
58 JITGOTDEF(_) FFIGOTDEF(_) SFGOTDEF(_)
43 59
44enum { 60enum {
45#define GOTENUM(name) LJ_GOT_##name, 61#define GOTENUM(name) LJ_GOT_##name,
diff --git a/src/lj_frame.h b/src/lj_frame.h
index a86c36be..aa3ab20b 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -218,6 +218,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
218#define CFRAME_SHIFT_MULTRES 3 218#define CFRAME_SHIFT_MULTRES 3
219#endif 219#endif
220#elif LJ_TARGET_MIPS 220#elif LJ_TARGET_MIPS
221#if LJ_ARCH_HASFPU
221#define CFRAME_OFS_ERRF 124 222#define CFRAME_OFS_ERRF 124
222#define CFRAME_OFS_NRES 120 223#define CFRAME_OFS_NRES 120
223#define CFRAME_OFS_PREV 116 224#define CFRAME_OFS_PREV 116
@@ -227,6 +228,16 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
227#define CFRAME_SIZE 112 228#define CFRAME_SIZE 112
228#define CFRAME_SHIFT_MULTRES 3 229#define CFRAME_SHIFT_MULTRES 3
229#else 230#else
231#define CFRAME_OFS_ERRF 100
232#define CFRAME_OFS_NRES 96
233#define CFRAME_OFS_PREV 92
234#define CFRAME_OFS_L 88
235#define CFRAME_OFS_PC 44
236#define CFRAME_OFS_MULTRES 16
237#define CFRAME_SIZE 88
238#define CFRAME_SHIFT_MULTRES 3
239#endif
240#else
230#error "Missing CFRAME_* definitions for this architecture" 241#error "Missing CFRAME_* definitions for this architecture"
231#endif 242#endif
232 243
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 84e41ecf..1f44b03d 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -270,6 +270,22 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
270#define fp64_f2l __aeabi_f2lz 270#define fp64_f2l __aeabi_f2lz
271#define fp64_f2ul __aeabi_f2ulz 271#define fp64_f2ul __aeabi_f2ulz
272#endif 272#endif
273#elif LJ_TARGET_MIPS
274#define softfp_add __adddf3
275#define softfp_sub __subdf3
276#define softfp_mul __muldf3
277#define softfp_div __divdf3
278#define softfp_cmp __ledf2
279#define softfp_i2d __floatsidf
280#define softfp_d2i __fixdfsi
281#define softfp_ui2d __floatunsidf
282#define softfp_f2d __extendsfdf2
283#define softfp_d2ui __fixunsdfsi
284#define softfp_d2f __truncdfsf2
285#define softfp_i2f __floatsisf
286#define softfp_ui2f __floatunsisf
287#define softfp_f2i __fixsfsi
288#define softfp_f2ui __fixunssfsi
273#else 289#else
274#error "Missing soft-float definitions for target architecture" 290#error "Missing soft-float definitions for target architecture"
275#endif 291#endif
diff --git a/src/lj_vm.h b/src/lj_vm.h
index b31e22f7..cb76d7a7 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -50,7 +50,7 @@ LJ_ASMF void lj_vm_exit_handler(void);
50LJ_ASMF void lj_vm_exit_interp(void); 50LJ_ASMF void lj_vm_exit_interp(void);
51 51
52/* Internal math helper functions. */ 52/* Internal math helper functions. */
53#if LJ_TARGET_PPC || LJ_TARGET_ARM64 53#if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP)
54#define lj_vm_floor floor 54#define lj_vm_floor floor
55#define lj_vm_ceil ceil 55#define lj_vm_ceil ceil
56#else 56#else
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 134ed569..0dba1293 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -1,6 +1,9 @@
1|// Low-level VM code for MIPS CPUs. 1|// Low-level VM code for MIPS CPUs.
2|// Bytecode interpreter, fast functions and helper functions. 2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h 3|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4|//
5|// MIPS soft-float support contributed by Djordje Kovacevic and
6|// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc.
4| 7|
5|.arch mips 8|.arch mips
6|.section code_op, code_sub 9|.section code_op, code_sub
@@ -18,6 +21,12 @@
18|// Fixed register assignments for the interpreter. 21|// Fixed register assignments for the interpreter.
19|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra 22|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra
20| 23|
24|.macro .FPU, a, b
25|.if FPU
26| a, b
27|.endif
28|.endmacro
29|
21|// The following must be C callee-save (but BASE is often refetched). 30|// The following must be C callee-save (but BASE is often refetched).
22|.define BASE, r16 // Base of current Lua stack frame. 31|.define BASE, r16 // Base of current Lua stack frame.
23|.define KBASE, r17 // Constants of current Lua function. 32|.define KBASE, r17 // Constants of current Lua function.
@@ -31,7 +40,9 @@
31| 40|
32|// Constants for type-comparisons, stores and conversions. C callee-save. 41|// Constants for type-comparisons, stores and conversions. C callee-save.
33|.define TISNIL, r30 42|.define TISNIL, r30
43|.if FPU
34|.define TOBIT, f30 // 2^52 + 2^51. 44|.define TOBIT, f30 // 2^52 + 2^51.
45|.endif
35| 46|
36|// The following temporaries are not saved across C calls, except for RA. 47|// The following temporaries are not saved across C calls, except for RA.
37|.define RA, r23 // Callee-save. 48|.define RA, r23 // Callee-save.
@@ -46,6 +57,13 @@
46|.define TMP2, r14 57|.define TMP2, r14
47|.define TMP3, r15 58|.define TMP3, r15
48| 59|
60|.if not FPU
61|.define SFT1, r2
62|.define SFT2, r3
63|.define SFT3, r4
64|.define SFT4, r5
65|.endif
66|
49|// Calling conventions. 67|// Calling conventions.
50|.define CFUNCADDR, r25 68|.define CFUNCADDR, r25
51|.define CARG1, r4 69|.define CARG1, r4
@@ -56,13 +74,16 @@
56|.define CRET1, r2 74|.define CRET1, r2
57|.define CRET2, r3 75|.define CRET2, r3
58| 76|
77|.if FPU
59|.define FARG1, f12 78|.define FARG1, f12
60|.define FARG2, f14 79|.define FARG2, f14
61| 80|
62|.define FRET1, f0 81|.define FRET1, f0
63|.define FRET2, f2 82|.define FRET2, f2
83|.endif
64| 84|
65|// Stack layout while in interpreter. Must match with lj_frame.h. 85|// Stack layout while in interpreter. Must match with lj_frame.h.
86|.if FPU // MIPS32 hard-float.
66|.define CFRAME_SPACE, 112 // Delta for sp. 87|.define CFRAME_SPACE, 112 // Delta for sp.
67| 88|
68|.define SAVE_ERRF, 124(sp) // 32 bit C frame info. 89|.define SAVE_ERRF, 124(sp) // 32 bit C frame info.
@@ -83,43 +104,76 @@
83|.define ARG5_OFS, 16 104|.define ARG5_OFS, 16
84|.define SAVE_MULTRES, ARG5 105|.define SAVE_MULTRES, ARG5
85| 106|
107|//-----------------------------------------------------------------------
108|.else // MIPS32 soft-float.
109|
110|.define CFRAME_SPACE, 88 // Delta for sp.
111|
112|.define SAVE_ERRF, 100(sp) // 32 bit C frame info.
113|.define SAVE_NRES, 96(sp)
114|.define SAVE_CFRAME, 92(sp)
115|.define SAVE_L, 88(sp)
116|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
117|.define SAVE_GPR_, 48 // .. 48+10*4: 32 bit GPR saves.
118|.define SAVE_PC, 44(sp)
119|.define TEMP_SAVE_6, 40(sp)
120|.define TEMP_SAVE_5, 36(sp)
121|.define TEMP_SAVE_4, 32(sp)
122|.define TEMP_SAVE_3, 28(sp)
123|.define TEMP_SAVE_2, 24(sp)
124|.define TEMP_SAVE_1, 20(sp)
125|//----- 8 byte aligned, ^^^^ 24 byte register save area, owned by caller.
126|.define ARG5, 16(sp)
127|.define CSAVE_4, 12(sp)
128|.define CSAVE_3, 8(sp)
129|.define CSAVE_2, 4(sp)
130|.define CSAVE_1, 0(sp)
131|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by callee.
132|
133|.define ARG5_OFS, 16
134|.define SAVE_MULTRES, ARG5
135|
136|.endif
137|
138|//-----------------------------------------------------------------------
139|
86|.macro saveregs 140|.macro saveregs
87| addiu sp, sp, -CFRAME_SPACE 141| addiu sp, sp, -CFRAME_SPACE
88| sw ra, SAVE_GPR_+9*4(sp) 142| sw ra, SAVE_GPR_+9*4(sp)
89| sw r30, SAVE_GPR_+8*4(sp) 143| sw r30, SAVE_GPR_+8*4(sp)
90| sdc1 f30, SAVE_FPR_+5*8(sp) 144| .FPU sdc1 f30, SAVE_FPR_+5*8(sp)
91| sw r23, SAVE_GPR_+7*4(sp) 145| sw r23, SAVE_GPR_+7*4(sp)
92| sw r22, SAVE_GPR_+6*4(sp) 146| sw r22, SAVE_GPR_+6*4(sp)
93| sdc1 f28, SAVE_FPR_+4*8(sp) 147| .FPU sdc1 f28, SAVE_FPR_+4*8(sp)
94| sw r21, SAVE_GPR_+5*4(sp) 148| sw r21, SAVE_GPR_+5*4(sp)
95| sw r20, SAVE_GPR_+4*4(sp) 149| sw r20, SAVE_GPR_+4*4(sp)
96| sdc1 f26, SAVE_FPR_+3*8(sp) 150| .FPU sdc1 f26, SAVE_FPR_+3*8(sp)
97| sw r19, SAVE_GPR_+3*4(sp) 151| sw r19, SAVE_GPR_+3*4(sp)
98| sw r18, SAVE_GPR_+2*4(sp) 152| sw r18, SAVE_GPR_+2*4(sp)
99| sdc1 f24, SAVE_FPR_+2*8(sp) 153| .FPU sdc1 f24, SAVE_FPR_+2*8(sp)
100| sw r17, SAVE_GPR_+1*4(sp) 154| sw r17, SAVE_GPR_+1*4(sp)
101| sw r16, SAVE_GPR_+0*4(sp) 155| sw r16, SAVE_GPR_+0*4(sp)
102| sdc1 f22, SAVE_FPR_+1*8(sp) 156| .FPU sdc1 f22, SAVE_FPR_+1*8(sp)
103| sdc1 f20, SAVE_FPR_+0*8(sp) 157| .FPU sdc1 f20, SAVE_FPR_+0*8(sp)
104|.endmacro 158|.endmacro
105| 159|
106|.macro restoreregs_ret 160|.macro restoreregs_ret
107| lw ra, SAVE_GPR_+9*4(sp) 161| lw ra, SAVE_GPR_+9*4(sp)
108| lw r30, SAVE_GPR_+8*4(sp) 162| lw r30, SAVE_GPR_+8*4(sp)
109| ldc1 f30, SAVE_FPR_+5*8(sp) 163| .FPU ldc1 f30, SAVE_FPR_+5*8(sp)
110| lw r23, SAVE_GPR_+7*4(sp) 164| lw r23, SAVE_GPR_+7*4(sp)
111| lw r22, SAVE_GPR_+6*4(sp) 165| lw r22, SAVE_GPR_+6*4(sp)
112| ldc1 f28, SAVE_FPR_+4*8(sp) 166| .FPU ldc1 f28, SAVE_FPR_+4*8(sp)
113| lw r21, SAVE_GPR_+5*4(sp) 167| lw r21, SAVE_GPR_+5*4(sp)
114| lw r20, SAVE_GPR_+4*4(sp) 168| lw r20, SAVE_GPR_+4*4(sp)
115| ldc1 f26, SAVE_FPR_+3*8(sp) 169| .FPU ldc1 f26, SAVE_FPR_+3*8(sp)
116| lw r19, SAVE_GPR_+3*4(sp) 170| lw r19, SAVE_GPR_+3*4(sp)
117| lw r18, SAVE_GPR_+2*4(sp) 171| lw r18, SAVE_GPR_+2*4(sp)
118| ldc1 f24, SAVE_FPR_+2*8(sp) 172| .FPU ldc1 f24, SAVE_FPR_+2*8(sp)
119| lw r17, SAVE_GPR_+1*4(sp) 173| lw r17, SAVE_GPR_+1*4(sp)
120| lw r16, SAVE_GPR_+0*4(sp) 174| lw r16, SAVE_GPR_+0*4(sp)
121| ldc1 f22, SAVE_FPR_+1*8(sp) 175| .FPU ldc1 f22, SAVE_FPR_+1*8(sp)
122| ldc1 f20, SAVE_FPR_+0*8(sp) 176| .FPU ldc1 f20, SAVE_FPR_+0*8(sp)
123| jr ra 177| jr ra
124| addiu sp, sp, CFRAME_SPACE 178| addiu sp, sp, CFRAME_SPACE
125|.endmacro 179|.endmacro
@@ -270,6 +324,61 @@
270|.macro call_extern; jalr CFUNCADDR; .endmacro 324|.macro call_extern; jalr CFUNCADDR; .endmacro
271|.macro jmp_extern; jr CFUNCADDR; .endmacro 325|.macro jmp_extern; jr CFUNCADDR; .endmacro
272| 326|
327|// Converts int from given reg to double, result in CRET1 and CRET2 regs.
328|.if not FPU
329|.macro cvti2d, arg
330| load_got __floatsidf
331| call_extern
332|. move CARG1, arg
333|.endmacro
334|.endif
335|
336|// Loads a double-word floating-point value.
337|.macro load_double, fpr, gpr1, gpr2, src
338|.if FPU
339| ldc1 fpr, src
340|.else
341| lw gpr1, src
342| lw gpr2, 4+src
343|.endif
344|.endmacro
345|
346|// Stores a double-word floating-point value.
347|.macro store_double, fpr, gpr1, gpr2, dst
348|.if FPU
349| sdc1 fpr, dst
350|.else
351| sw gpr1, dst
352| sw gpr2, 4+dst
353|.endif
354|.endmacro
355|
356|// Loads the first double-word floating-point argument.
357|.macro load_farg1, src
358| load_double FARG1, CARG1, CARG2, src
359|.endmacro
360|
361|// Loads the second double-word floating-point argument.
362|.macro load_farg2, src
363| load_double FARG2, CARG3, CARG4, src
364|.endmacro
365|
366|.macro load_double1, src
367| load_double f0, SFT1, SFT2, src
368|.endmacro
369|
370|.macro store_double1, dst
371| store_double f0, SFT1, SFT2, dst
372|.endmacro
373|
374|.macro load_double2, src
375| load_double f2, SFT3, SFT4, src
376|.endmacro
377|
378|.macro store_double2, dst
379| store_double f2, SFT3, SFT4, dst
380|.endmacro
381|
273|.macro hotcheck, delta, target 382|.macro hotcheck, delta, target
274| srl TMP1, PC, 1 383| srl TMP1, PC, 1
275| andi TMP1, TMP1, 126 384| andi TMP1, TMP1, 126
@@ -354,9 +463,9 @@ static void build_subroutines(BuildCtx *ctx)
354 |. sll TMP2, TMP2, 3 463 |. sll TMP2, TMP2, 3
355 |1: 464 |1:
356 | addiu TMP1, TMP1, -8 465 | addiu TMP1, TMP1, -8
357 | ldc1 f0, 0(RA) 466 | load_double1 0(RA)
358 | addiu RA, RA, 8 467 | addiu RA, RA, 8
359 | sdc1 f0, 0(BASE) 468 | store_double1 0(BASE)
360 | bnez TMP1, <1 469 | bnez TMP1, <1
361 |. addiu BASE, BASE, 8 470 |. addiu BASE, BASE, 8
362 | 471 |
@@ -425,15 +534,15 @@ static void build_subroutines(BuildCtx *ctx)
425 | and sp, CARG1, AT 534 | and sp, CARG1, AT
426 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 535 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
427 | lw L, SAVE_L 536 | lw L, SAVE_L
428 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 537 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
429 | li TISNIL, LJ_TNIL 538 | li TISNIL, LJ_TNIL
430 | lw BASE, L->base 539 | lw BASE, L->base
431 | lw DISPATCH, L->glref // Setup pointer to dispatch table. 540 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
432 | mtc1 TMP3, TOBIT 541 | .FPU mtc1 TMP3, TOBIT
433 | li TMP1, LJ_TFALSE 542 | li TMP1, LJ_TFALSE
434 | li_vmstate INTERP 543 | li_vmstate INTERP
435 | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame. 544 | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame.
436 | cvt.d.s TOBIT, TOBIT 545 | .FPU cvt.d.s TOBIT, TOBIT
437 | addiu RA, BASE, -8 // Results start at BASE-8. 546 | addiu RA, BASE, -8 // Results start at BASE-8.
438 | addiu DISPATCH, DISPATCH, GG_G2DISP 547 | addiu DISPATCH, DISPATCH, GG_G2DISP
439 | sw TMP1, HI(RA) // Prepend false to error message. 548 | sw TMP1, HI(RA) // Prepend false to error message.
@@ -498,11 +607,11 @@ static void build_subroutines(BuildCtx *ctx)
498 | lw BASE, L->base 607 | lw BASE, L->base
499 | lw TMP1, L->top 608 | lw TMP1, L->top
500 | lw PC, FRAME_PC(BASE) 609 | lw PC, FRAME_PC(BASE)
501 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 610 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
502 | subu RD, TMP1, BASE 611 | subu RD, TMP1, BASE
503 | mtc1 TMP3, TOBIT 612 | .FPU mtc1 TMP3, TOBIT
504 | sb r0, L->status 613 | sb r0, L->status
505 | cvt.d.s TOBIT, TOBIT 614 | .FPU cvt.d.s TOBIT, TOBIT
506 | li_vmstate INTERP 615 | li_vmstate INTERP
507 | addiu RD, RD, 8 616 | addiu RD, RD, 8
508 | st_vmstate 617 | st_vmstate
@@ -540,13 +649,13 @@ static void build_subroutines(BuildCtx *ctx)
540 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 649 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
541 | sw L, DISPATCH_GL(cur_L)(DISPATCH) 650 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
542 | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). 651 | lw TMP2, L->base // TMP2 = old base (used in vmeta_call).
543 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 652 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
544 | lw TMP1, L->top 653 | lw TMP1, L->top
545 | mtc1 TMP3, TOBIT 654 | .FPU mtc1 TMP3, TOBIT
546 | addu PC, PC, BASE 655 | addu PC, PC, BASE
547 | subu NARGS8:RC, TMP1, BASE 656 | subu NARGS8:RC, TMP1, BASE
548 | subu PC, PC, TMP2 // PC = frame delta + frame type 657 | subu PC, PC, TMP2 // PC = frame delta + frame type
549 | cvt.d.s TOBIT, TOBIT 658 | .FPU cvt.d.s TOBIT, TOBIT
550 | li_vmstate INTERP 659 | li_vmstate INTERP
551 | li TISNIL, LJ_TNIL 660 | li TISNIL, LJ_TNIL
552 | st_vmstate 661 | st_vmstate
@@ -628,7 +737,7 @@ static void build_subroutines(BuildCtx *ctx)
628 |->cont_cat: // RA = resultptr, RB = meta base 737 |->cont_cat: // RA = resultptr, RB = meta base
629 | lw INS, -4(PC) 738 | lw INS, -4(PC)
630 | addiu CARG2, RB, -16 739 | addiu CARG2, RB, -16
631 | ldc1 f0, 0(RA) 740 | load_double1 0(RA)
632 | decode_RB8a MULTRES, INS 741 | decode_RB8a MULTRES, INS
633 | decode_RA8a RA, INS 742 | decode_RA8a RA, INS
634 | decode_RB8b MULTRES 743 | decode_RB8b MULTRES
@@ -636,11 +745,21 @@ static void build_subroutines(BuildCtx *ctx)
636 | addu TMP1, BASE, MULTRES 745 | addu TMP1, BASE, MULTRES
637 | sw BASE, L->base 746 | sw BASE, L->base
638 | subu CARG3, CARG2, TMP1 747 | subu CARG3, CARG2, TMP1
748 |.if FPU
639 | bne TMP1, CARG2, ->BC_CAT_Z 749 | bne TMP1, CARG2, ->BC_CAT_Z
640 |. sdc1 f0, 0(CARG2) 750 |. sdc1 f0, 0(CARG2)
641 | addu RA, BASE, RA 751 | addu RA, BASE, RA
642 | b ->cont_nop 752 | b ->cont_nop
643 |. sdc1 f0, 0(RA) 753 |. sdc1 f0, 0(RA)
754 |.else
755 | sw SFT1, 0(CARG2)
756 | bne TMP1, CARG2, ->BC_CAT_Z
757 |. sw SFT2, 4(CARG2)
758 | addu RA, BASE, RA
759 | sw SFT1, 0(RA)
760 | b ->cont_nop
761 |. sw SFT2, 4(RA)
762 |.endif
644 | 763 |
645 |//-- Table indexing metamethods ----------------------------------------- 764 |//-- Table indexing metamethods -----------------------------------------
646 | 765 |
@@ -663,10 +782,19 @@ static void build_subroutines(BuildCtx *ctx)
663 |. sw TMP1, HI(CARG3) 782 |. sw TMP1, HI(CARG3)
664 | 783 |
665 |->vmeta_tgetb: // TMP0 = index 784 |->vmeta_tgetb: // TMP0 = index
785 |.if FPU
666 | mtc1 TMP0, f0 786 | mtc1 TMP0, f0
667 | cvt.d.w f0, f0 787 | cvt.d.w f0, f0
668 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 788 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
669 | sdc1 f0, 0(CARG3) 789 | sdc1 f0, 0(CARG3)
790 |.else
791 | sw CARG2, TEMP_SAVE_1 //needed to be saved because it's used later in lj_meta_tget
792 | cvti2d TMP0
793 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
794 | sw CRET1, 0(CARG3)
795 | sw CRET2, 4(CARG3)
796 | lw CARG2, TEMP_SAVE_1
797 |.endif
670 | 798 |
671 |->vmeta_tgetv: 799 |->vmeta_tgetv:
672 |1: 800 |1:
@@ -678,9 +806,9 @@ static void build_subroutines(BuildCtx *ctx)
678 | // Returns TValue * (finished) or NULL (metamethod). 806 | // Returns TValue * (finished) or NULL (metamethod).
679 | beqz CRET1, >3 807 | beqz CRET1, >3
680 |. addiu TMP1, BASE, -FRAME_CONT 808 |. addiu TMP1, BASE, -FRAME_CONT
681 | ldc1 f0, 0(CRET1) 809 | load_double2 0(CRET1)
682 | ins_next1 810 | ins_next1
683 | sdc1 f0, 0(RA) 811 | store_double2 0(RA)
684 | ins_next2 812 | ins_next2
685 | 813 |
686 |3: // Call __index metamethod. 814 |3: // Call __index metamethod.
@@ -699,8 +827,14 @@ static void build_subroutines(BuildCtx *ctx)
699 | // Returns cTValue * or NULL. 827 | // Returns cTValue * or NULL.
700 | beqz CRET1, >1 828 | beqz CRET1, >1
701 |. nop 829 |. nop
830 |.if FPU
702 | b ->BC_TGETR_Z 831 | b ->BC_TGETR_Z
703 |. ldc1 f0, 0(CRET1) 832 |. ldc1 f0, 0(CRET1)
833 |.else
834 | lw SFT1, 0(CRET1)
835 | b ->BC_TGETR_Z
836 |. lw SFT2, 4(CRET1)
837 |.endif
704 | 838 |
705 |//----------------------------------------------------------------------- 839 |//-----------------------------------------------------------------------
706 | 840 |
@@ -723,10 +857,19 @@ static void build_subroutines(BuildCtx *ctx)
723 |. sw TMP1, HI(CARG3) 857 |. sw TMP1, HI(CARG3)
724 | 858 |
725 |->vmeta_tsetb: // TMP0 = index 859 |->vmeta_tsetb: // TMP0 = index
860 |.if FPU
726 | mtc1 TMP0, f0 861 | mtc1 TMP0, f0
727 | cvt.d.w f0, f0 862 | cvt.d.w f0, f0
728 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 863 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
729 | sdc1 f0, 0(CARG3) 864 | sdc1 f0, 0(CARG3)
865 |.else
866 | sw CARG2, TEMP_SAVE_1
867 | cvti2d TMP0
868 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
869 | sw CRET1, 0(CARG3)
870 | sw CRET2, 4(CARG3)
871 | lw CARG2, TEMP_SAVE_1
872 |.endif
730 | 873 |
731 |->vmeta_tsetv: 874 |->vmeta_tsetv:
732 |1: 875 |1:
@@ -736,11 +879,17 @@ static void build_subroutines(BuildCtx *ctx)
736 | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 879 | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
737 |. move CARG1, L 880 |. move CARG1, L
738 | // Returns TValue * (finished) or NULL (metamethod). 881 | // Returns TValue * (finished) or NULL (metamethod).
882 |.if FPU
739 | beqz CRET1, >3 883 | beqz CRET1, >3
740 |. ldc1 f0, 0(RA) 884 |. ldc1 f2, 0(RA)
885 |.else
886 | lw SFT3, 0(RA)
887 | beqz CRET1, >3
888 |. lw SFT4, 4(RA)
889 |.endif
741 | // NOBARRIER: lj_meta_tset ensures the table is not black. 890 | // NOBARRIER: lj_meta_tset ensures the table is not black.
742 | ins_next1 891 | ins_next1
743 | sdc1 f0, 0(CRET1) 892 | store_double2 0(CRET1)
744 | ins_next2 893 | ins_next2
745 | 894 |
746 |3: // Call __newindex metamethod. 895 |3: // Call __newindex metamethod.
@@ -750,7 +899,7 @@ static void build_subroutines(BuildCtx *ctx)
750 | sw PC, -16+HI(BASE) // [cont|PC] 899 | sw PC, -16+HI(BASE) // [cont|PC]
751 | subu PC, BASE, TMP1 900 | subu PC, BASE, TMP1
752 | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 901 | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
753 | sdc1 f0, 16(BASE) // Copy value to third argument. 902 | store_double2 16(BASE) // Copy value to third argument.
754 | b ->vm_call_dispatch_f 903 | b ->vm_call_dispatch_f
755 |. li NARGS8:RC, 24 // 3 args for func(t, k, v) 904 |. li NARGS8:RC, 24 // 3 args for func(t, k, v)
756 | 905 |
@@ -793,11 +942,17 @@ static void build_subroutines(BuildCtx *ctx)
793 | 942 |
794 |->cont_ra: // RA = resultptr 943 |->cont_ra: // RA = resultptr
795 | lbu TMP1, -4+OFS_RA(PC) 944 | lbu TMP1, -4+OFS_RA(PC)
796 | ldc1 f0, 0(RA) 945 | load_double1 0(RA)
797 | sll TMP1, TMP1, 3 946 | sll TMP1, TMP1, 3
798 | addu TMP1, BASE, TMP1 947 | addu TMP1, BASE, TMP1
948 |.if FPU
799 | b ->cont_nop 949 | b ->cont_nop
800 |. sdc1 f0, 0(TMP1) 950 |. sdc1 f0, 0(TMP1)
951 |.else
952 | sw SFT1, 0(TMP1)
953 | b ->cont_nop
954 |. sw SFT2, 4(TMP1)
955 |.endif
801 | 956 |
802 |->cont_condt: // RA = resultptr 957 |->cont_condt: // RA = resultptr
803 | lw TMP0, HI(RA) 958 | lw TMP0, HI(RA)
@@ -852,7 +1007,22 @@ static void build_subroutines(BuildCtx *ctx)
852 |//-- Arithmetic metamethods --------------------------------------------- 1007 |//-- Arithmetic metamethods ---------------------------------------------
853 | 1008 |
854 |->vmeta_unm: 1009 |->vmeta_unm:
855 | move CARG4, CARG3 1010 | b ->vmeta_arith
1011 |. move CARG4, CARG3
1012 |
1013 |->vmeta_arith_vn:
1014 | addu CARG3, BASE, RB
1015 | b ->vmeta_arith
1016 |. addu CARG4, KBASE, RC
1017 |
1018 |->vmeta_arith_nv:
1019 | addu CARG4, BASE, RB
1020 | b ->vmeta_arith
1021 |. addu CARG3, KBASE, RC
1022 |
1023 |->vmeta_arith_vv:
1024 | addu CARG3, BASE, RB
1025 | addu CARG4, BASE, RC
856 | 1026 |
857 |->vmeta_arith: 1027 |->vmeta_arith:
858 | load_got lj_meta_arith 1028 | load_got lj_meta_arith
@@ -985,9 +1155,9 @@ static void build_subroutines(BuildCtx *ctx)
985 |.macro .ffunc_n, name // Caveat: has delay slot! 1155 |.macro .ffunc_n, name // Caveat: has delay slot!
986 |->ff_ .. name: 1156 |->ff_ .. name:
987 | lw CARG3, HI(BASE) 1157 | lw CARG3, HI(BASE)
1158 | load_farg1 0(BASE)
988 | beqz NARGS8:RC, ->fff_fallback 1159 | beqz NARGS8:RC, ->fff_fallback
989 |. ldc1 FARG1, 0(BASE) 1160 |. sltiu AT, CARG3, LJ_TISNUM
990 | sltiu AT, CARG3, LJ_TISNUM
991 | beqz AT, ->fff_fallback 1161 | beqz AT, ->fff_fallback
992 |.endmacro 1162 |.endmacro
993 | 1163 |
@@ -997,10 +1167,10 @@ static void build_subroutines(BuildCtx *ctx)
997 | lw CARG3, HI(BASE) 1167 | lw CARG3, HI(BASE)
998 | bnez AT, ->fff_fallback 1168 | bnez AT, ->fff_fallback
999 |. lw CARG4, 8+HI(BASE) 1169 |. lw CARG4, 8+HI(BASE)
1000 | ldc1 FARG1, 0(BASE)
1001 | ldc1 FARG2, 8(BASE)
1002 | sltiu TMP0, CARG3, LJ_TISNUM 1170 | sltiu TMP0, CARG3, LJ_TISNUM
1003 | sltiu TMP1, CARG4, LJ_TISNUM 1171 | sltiu TMP1, CARG4, LJ_TISNUM
1172 | load_farg1 0(BASE)
1173 | load_farg2 8(BASE)
1004 | and TMP0, TMP0, TMP1 1174 | and TMP0, TMP0, TMP1
1005 | beqz TMP0, ->fff_fallback 1175 | beqz TMP0, ->fff_fallback
1006 |.endmacro 1176 |.endmacro
@@ -1027,8 +1197,8 @@ static void build_subroutines(BuildCtx *ctx)
1027 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. 1197 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument.
1028 |. sw CARG1, LO(RA) 1198 |. sw CARG1, LO(RA)
1029 |1: 1199 |1:
1030 | ldc1 f0, 0(TMP1) 1200 | load_double1 0(TMP1)
1031 | sdc1 f0, -8(TMP1) 1201 | store_double1 -8(TMP1)
1032 | bne TMP1, TMP2, <1 1202 | bne TMP1, TMP2, <1
1033 |. addiu TMP1, TMP1, 8 1203 |. addiu TMP1, TMP1, 8
1034 | b ->fff_res 1204 | b ->fff_res
@@ -1043,8 +1213,14 @@ static void build_subroutines(BuildCtx *ctx)
1043 | not TMP1, TMP1 1213 | not TMP1, TMP1
1044 | sll TMP1, TMP1, 3 1214 | sll TMP1, TMP1, 3
1045 | addu TMP1, CFUNC:RB, TMP1 1215 | addu TMP1, CFUNC:RB, TMP1
1216 |.if HFABI
1046 | b ->fff_resn 1217 | b ->fff_resn
1047 |. ldc1 FRET1, CFUNC:TMP1->upvalue 1218 |. ldc1 FRET1, CFUNC:TMP1->upvalue
1219 |.else
1220 | lw CRET1, CFUNC:TMP1->upvalue[0].u32.hi
1221 | b ->fff_resn
1222 |. lw CRET2, CFUNC:TMP1->upvalue[0].u32.lo
1223 |.endif
1048 | 1224 |
1049 |//-- Base library: getters and setters --------------------------------- 1225 |//-- Base library: getters and setters ---------------------------------
1050 | 1226 |
@@ -1125,8 +1301,14 @@ static void build_subroutines(BuildCtx *ctx)
1125 | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1301 | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1126 |. move CARG1, L 1302 |. move CARG1, L
1127 | // Returns cTValue *. 1303 | // Returns cTValue *.
1304 |.if HFABI
1128 | b ->fff_resn 1305 | b ->fff_resn
1129 |. ldc1 FRET1, 0(CRET1) 1306 |. ldc1 FRET1, 0(CRET1)
1307 |.else
1308 | lw CRET2, 4(CRET1)
1309 | b ->fff_resn
1310 |. lw CRET1, 0(CRET1)
1311 |.endif
1130 | 1312 |
1131 |//-- Base library: conversions ------------------------------------------ 1313 |//-- Base library: conversions ------------------------------------------
1132 | 1314 |
@@ -1136,8 +1318,14 @@ static void build_subroutines(BuildCtx *ctx)
1136 | xori AT, NARGS8:RC, 8 1318 | xori AT, NARGS8:RC, 8
1137 | sltiu CARG1, CARG1, LJ_TISNUM 1319 | sltiu CARG1, CARG1, LJ_TISNUM
1138 | movn CARG1, r0, AT 1320 | movn CARG1, r0, AT
1321 |.if HFABI
1139 | beqz CARG1, ->fff_fallback // Exactly one number argument. 1322 | beqz CARG1, ->fff_fallback // Exactly one number argument.
1140 |. ldc1 FRET1, 0(BASE) 1323 |. ldc1 FRET1, 0(BASE)
1324 |.else
1325 | lw CRET1, 0(BASE)
1326 | beqz CARG1, ->fff_fallback // Exactly one number argument.
1327 |. lw CRET2, 4(BASE)
1328 |.endif
1141 | b ->fff_resn 1329 | b ->fff_resn
1142 |. nop 1330 |. nop
1143 | 1331 |
@@ -1185,13 +1373,13 @@ static void build_subroutines(BuildCtx *ctx)
1185 | // Returns 0 at end of traversal. 1373 | // Returns 0 at end of traversal.
1186 | beqz CRET1, ->fff_restv // End of traversal: return nil. 1374 | beqz CRET1, ->fff_restv // End of traversal: return nil.
1187 |. li CARG3, LJ_TNIL 1375 |. li CARG3, LJ_TNIL
1188 | ldc1 f0, 8(BASE) // Copy key and value to results. 1376 | load_double1 8(BASE)
1189 | addiu RA, BASE, -8 1377 | addiu RA, BASE, -8
1190 | ldc1 f2, 16(BASE) 1378 | load_double2 16(BASE)
1191 | li RD, (2+1)*8 1379 | store_double1 0(RA)
1192 | sdc1 f0, 0(RA) 1380 | store_double2 8(RA)
1193 | b ->fff_res 1381 | b ->fff_res
1194 |. sdc1 f2, 8(RA) 1382 |. li RD, (2+1)*8
1195 | 1383 |
1196 |.ffunc_1 pairs 1384 |.ffunc_1 pairs
1197 | li AT, LJ_TTAB 1385 | li AT, LJ_TTAB
@@ -1199,16 +1387,32 @@ static void build_subroutines(BuildCtx *ctx)
1199 |. lw PC, FRAME_PC(BASE) 1387 |. lw PC, FRAME_PC(BASE)
1200#if LJ_52 1388#if LJ_52
1201 | lw TAB:TMP2, TAB:CARG1->metatable 1389 | lw TAB:TMP2, TAB:CARG1->metatable
1390 |.if FPU
1202 | ldc1 f0, CFUNC:RB->upvalue[0] 1391 | ldc1 f0, CFUNC:RB->upvalue[0]
1392 |.else
1393 | lw SFT1, CFUNC:RB->upvalue[0].u32.hi
1394 | lw SFT2, CFUNC:RB->upvalue[0].u32.lo
1395 |.endif
1203 | bnez TAB:TMP2, ->fff_fallback 1396 | bnez TAB:TMP2, ->fff_fallback
1204#else 1397#else
1398 |.if FPU
1205 | ldc1 f0, CFUNC:RB->upvalue[0] 1399 | ldc1 f0, CFUNC:RB->upvalue[0]
1400 |.else
1401 | lw SFT1, CFUNC:RB->upvalue[0].u32.hi
1402 | lw SFT2, CFUNC:RB->upvalue[0].u32.lo
1403 |.endif
1206#endif 1404#endif
1207 |. addiu RA, BASE, -8 1405 |. addiu RA, BASE, -8
1208 | sw TISNIL, 8+HI(BASE) 1406 | sw TISNIL, 8+HI(BASE)
1209 | li RD, (3+1)*8 1407 | li RD, (3+1)*8
1408 |.if FPU
1210 | b ->fff_res 1409 | b ->fff_res
1211 |. sdc1 f0, 0(RA) 1410 |. sdc1 f0, 0(RA)
1411 |.else
1412 | sw SFT1, 0(RA)
1413 | b ->fff_res
1414 |. sw SFT2, 4(RA)
1415 |.endif
1212 | 1416 |
1213 |.ffunc ipairs_aux 1417 |.ffunc ipairs_aux
1214 | sltiu AT, NARGS8:RC, 16 1418 | sltiu AT, NARGS8:RC, 16
@@ -1216,35 +1420,55 @@ static void build_subroutines(BuildCtx *ctx)
1216 | lw TAB:CARG1, LO(BASE) 1420 | lw TAB:CARG1, LO(BASE)
1217 | lw CARG4, 8+HI(BASE) 1421 | lw CARG4, 8+HI(BASE)
1218 | bnez AT, ->fff_fallback 1422 | bnez AT, ->fff_fallback
1219 |. ldc1 FARG2, 8(BASE) 1423 |. addiu CARG3, CARG3, -LJ_TTAB
1220 | addiu CARG3, CARG3, -LJ_TTAB
1221 | sltiu AT, CARG4, LJ_TISNUM 1424 | sltiu AT, CARG4, LJ_TISNUM
1222 | li TMP0, 1 1425 | li TMP0, 1
1223 | movn AT, r0, CARG3 1426 | movn AT, r0, CARG3
1224 | mtc1 TMP0, FARG1
1225 | beqz AT, ->fff_fallback 1427 | beqz AT, ->fff_fallback
1226 |. lw PC, FRAME_PC(BASE) 1428 |. lw PC, FRAME_PC(BASE)
1429 |.if FPU
1430 | ldc1 FARG2, 8(BASE)
1431 | mtc1 TMP0, FARG1
1227 | trunc.w.d FRET1, FARG2 1432 | trunc.w.d FRET1, FARG2
1228 | cvt.d.w FARG1, FARG1 1433 | cvt.d.w FARG1, FARG1
1229 | lw TMP0, TAB:CARG1->asize
1230 | lw TMP1, TAB:CARG1->array
1231 | mfc1 TMP2, FRET1 1434 | mfc1 TMP2, FRET1
1232 | addiu RA, BASE, -8
1233 | add.d FARG2, FARG2, FARG1 1435 | add.d FARG2, FARG2, FARG1
1436 |.else
1437 | sw CARG1, TEMP_SAVE_1
1438 | cvti2d TMP0
1439 | sw CRET1, TEMP_SAVE_2 // Store result CRET1/CRET2=1 (double).
1440 | sw CRET2, TEMP_SAVE_3
1441 | lw CARG2, 8+4(BASE)
1442 | load_got __fixdfsi
1443 | call_extern
1444 |. lw CARG1, 8(BASE)
1445 | sw CRET1, TEMP_SAVE_4
1446 | load_got __adddf3
1447 | lw CARG2, TEMP_SAVE_3
1448 | lw CARG3, 8(BASE)
1449 | lw CARG4, 8+4(BASE)
1450 | call_extern
1451 |. lw CARG1, TEMP_SAVE_2
1452 | lw TMP2, TEMP_SAVE_4
1453 | lw CARG1, TEMP_SAVE_1
1454 |.endif
1455 | lw TMP0, TAB:CARG1->asize
1456 | lw TMP1, TAB:CARG1->array
1234 | addiu TMP2, TMP2, 1 1457 | addiu TMP2, TMP2, 1
1235 | sltu AT, TMP2, TMP0 1458 | sltu AT, TMP2, TMP0
1459 | beqz AT, >2 // Not in array part?
1460 |. addiu RA, BASE, -8
1461 | store_double FARG2, CRET1, CRET2, 0(RA)
1236 | sll TMP3, TMP2, 3 1462 | sll TMP3, TMP2, 3
1237 | addu TMP3, TMP1, TMP3 1463 | addu TMP3, TMP1, TMP3
1238 | beqz AT, >2 // Not in array part?
1239 |. sdc1 FARG2, 0(RA)
1240 | lw TMP2, HI(TMP3) 1464 | lw TMP2, HI(TMP3)
1241 | ldc1 f0, 0(TMP3) 1465 | load_double1 0(TMP3)
1242 |1: 1466 |1:
1243 | beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results. 1467 | beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results.
1244 |. li RD, (0+1)*8 1468 |. li RD, (0+1)*8
1245 | li RD, (2+1)*8 1469 | store_double1 8(RA)
1246 | b ->fff_res 1470 | b ->fff_res
1247 |. sdc1 f0, 8(RA) 1471 |. li RD, (2+1)*8
1248 |2: // Check for empty hash part first. Otherwise call C function. 1472 |2: // Check for empty hash part first. Otherwise call C function.
1249 | lw TMP0, TAB:CARG1->hmask 1473 | lw TMP0, TAB:CARG1->hmask
1250 | load_got lj_tab_getinth 1474 | load_got lj_tab_getinth
@@ -1256,8 +1480,14 @@ static void build_subroutines(BuildCtx *ctx)
1256 | beqz CRET1, ->fff_res 1480 | beqz CRET1, ->fff_res
1257 |. li RD, (0+1)*8 1481 |. li RD, (0+1)*8
1258 | lw TMP2, HI(CRET1) 1482 | lw TMP2, HI(CRET1)
1483 |.if FPU
1259 | b <1 1484 | b <1
1260 |. ldc1 f0, 0(CRET1) 1485 |. ldc1 f0, 0(CRET1)
1486 |.else
1487 | lw SFT2, 4(CRET1)
1488 | b <1
1489 |. lw SFT1, 0(CRET1)
1490 |.endif
1261 | 1491 |
1262 |.ffunc_1 ipairs 1492 |.ffunc_1 ipairs
1263 | li AT, LJ_TTAB 1493 | li AT, LJ_TTAB
@@ -1265,17 +1495,33 @@ static void build_subroutines(BuildCtx *ctx)
1265 |. lw PC, FRAME_PC(BASE) 1495 |. lw PC, FRAME_PC(BASE)
1266#if LJ_52 1496#if LJ_52
1267 | lw TAB:TMP2, TAB:CARG1->metatable 1497 | lw TAB:TMP2, TAB:CARG1->metatable
1498 |.if FPU
1268 | ldc1 f0, CFUNC:RB->upvalue[0] 1499 | ldc1 f0, CFUNC:RB->upvalue[0]
1500 |.else
1501 | lw SFT1, CFUNC:RB->upvalue[0].u32.hi
1502 | lw SFT2, CFUNC:RB->upvalue[0].u32.lo
1503 |.endif
1269 | bnez TAB:TMP2, ->fff_fallback 1504 | bnez TAB:TMP2, ->fff_fallback
1270#else 1505#else
1506 |.if FPU
1271 | ldc1 f0, CFUNC:RB->upvalue[0] 1507 | ldc1 f0, CFUNC:RB->upvalue[0]
1508 |.else
1509 | lw SFT1, CFUNC:RB->upvalue[0].u32.hi
1510 | lw SFT2, CFUNC:RB->upvalue[0].u32.lo
1511 |.endif
1272#endif 1512#endif
1273 |. addiu RA, BASE, -8 1513 |. addiu RA, BASE, -8
1274 | sw r0, 8+HI(BASE) 1514 | sw r0, 8+HI(BASE)
1275 | sw r0, 8+LO(BASE) 1515 | sw r0, 8+LO(BASE)
1276 | li RD, (3+1)*8 1516 | li RD, (3+1)*8
1517 |.if FPU
1277 | b ->fff_res 1518 | b ->fff_res
1278 |. sdc1 f0, 0(RA) 1519 |. sdc1 f0, 0(RA)
1520 |.else
1521 | sw SFT1, 0(RA)
1522 | b ->fff_res
1523 |. sw SFT2, 4(RA)
1524 |.endif
1279 | 1525 |
1280 |//-- Base library: catch errors ---------------------------------------- 1526 |//-- Base library: catch errors ----------------------------------------
1281 | 1527 |
@@ -1295,8 +1541,12 @@ static void build_subroutines(BuildCtx *ctx)
1295 | sltiu AT, NARGS8:RC, 16 1541 | sltiu AT, NARGS8:RC, 16
1296 | lw CARG4, 8+HI(BASE) 1542 | lw CARG4, 8+HI(BASE)
1297 | bnez AT, ->fff_fallback 1543 | bnez AT, ->fff_fallback
1544 |.if FPU
1298 |. ldc1 FARG2, 8(BASE) 1545 |. ldc1 FARG2, 8(BASE)
1299 | ldc1 FARG1, 0(BASE) 1546 |.else
1547 |. lw CARG3, 8+LO(BASE)
1548 |.endif
1549 | load_double FARG1, CARG1, CARG2, 0(BASE)
1300 | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) 1550 | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1301 | li AT, LJ_TFUNC 1551 | li AT, LJ_TFUNC
1302 | move TMP2, BASE 1552 | move TMP2, BASE
@@ -1304,9 +1554,14 @@ static void build_subroutines(BuildCtx *ctx)
1304 | addiu BASE, BASE, 16 1554 | addiu BASE, BASE, 16
1305 | // Remember active hook before pcall. 1555 | // Remember active hook before pcall.
1306 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT 1556 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
1557 |.if FPU
1307 | sdc1 FARG2, 0(TMP2) // Swap function and traceback. 1558 | sdc1 FARG2, 0(TMP2) // Swap function and traceback.
1559 |.else
1560 | sw CARG3, LO(TMP2)
1561 | sw CARG4, HI(TMP2)
1562 |.endif
1308 | andi TMP3, TMP3, 1 1563 | andi TMP3, TMP3, 1
1309 | sdc1 FARG1, 8(TMP2) 1564 | store_double FARG1, CARG1, CARG2, 8(TMP2)
1310 | addiu PC, TMP3, 16+FRAME_PCALL 1565 | addiu PC, TMP3, 16+FRAME_PCALL
1311 | b ->vm_call_dispatch 1566 | b ->vm_call_dispatch
1312 |. addiu NARGS8:RC, NARGS8:RC, -16 1567 |. addiu NARGS8:RC, NARGS8:RC, -16
@@ -1350,11 +1605,11 @@ static void build_subroutines(BuildCtx *ctx)
1350 | move CARG3, CARG2 1605 | move CARG3, CARG2
1351 | sw BASE, L->top 1606 | sw BASE, L->top
1352 |2: // Move args to coroutine. 1607 |2: // Move args to coroutine.
1353 | ldc1 f0, 0(BASE) 1608 | load_double1 0(BASE)
1354 | sltu AT, BASE, TMP1 1609 | sltu AT, BASE, TMP1
1355 | beqz AT, >3 1610 | beqz AT, >3
1356 |. addiu BASE, BASE, 8 1611 |. addiu BASE, BASE, 8
1357 | sdc1 f0, 0(CARG3) 1612 | store_double1 0(CARG3)
1358 | b <2 1613 | b <2
1359 |. addiu CARG3, CARG3, 8 1614 |. addiu CARG3, CARG3, 8
1360 |3: 1615 |3:
@@ -1380,10 +1635,10 @@ static void build_subroutines(BuildCtx *ctx)
1380 | sw TMP2, L:RA->top // Clear coroutine stack. 1635 | sw TMP2, L:RA->top // Clear coroutine stack.
1381 | move TMP1, BASE 1636 | move TMP1, BASE
1382 |5: // Move results from coroutine. 1637 |5: // Move results from coroutine.
1383 | ldc1 f0, 0(TMP2) 1638 | load_double1 0(TMP2)
1384 | addiu TMP2, TMP2, 8 1639 | addiu TMP2, TMP2, 8
1385 | sltu AT, TMP2, TMP3 1640 | sltu AT, TMP2, TMP3
1386 | sdc1 f0, 0(TMP1) 1641 | store_double1 0(TMP1)
1387 | bnez AT, <5 1642 | bnez AT, <5
1388 |. addiu TMP1, TMP1, 8 1643 |. addiu TMP1, TMP1, 8
1389 |6: 1644 |6:
@@ -1408,12 +1663,12 @@ static void build_subroutines(BuildCtx *ctx)
1408 |.if resume 1663 |.if resume
1409 | addiu TMP3, TMP3, -8 1664 | addiu TMP3, TMP3, -8
1410 | li TMP1, LJ_TFALSE 1665 | li TMP1, LJ_TFALSE
1411 | ldc1 f0, 0(TMP3) 1666 | load_double1 0(TMP3)
1412 | sw TMP3, L:RA->top // Remove error from coroutine stack. 1667 | sw TMP3, L:RA->top // Remove error from coroutine stack.
1413 | li RD, (2+1)*8 1668 | li RD, (2+1)*8
1414 | sw TMP1, -8+HI(BASE) // Prepend false to results. 1669 | sw TMP1, -8+HI(BASE) // Prepend false to results.
1415 | addiu RA, BASE, -8 1670 | addiu RA, BASE, -8
1416 | sdc1 f0, 0(BASE) // Copy error message. 1671 | store_double1 0(BASE) // Copy error message.
1417 | b <7 1672 | b <7
1418 |. andi TMP0, PC, FRAME_TYPE 1673 |. andi TMP0, PC, FRAME_TYPE
1419 |.else 1674 |.else
@@ -1449,13 +1704,33 @@ static void build_subroutines(BuildCtx *ctx)
1449 | 1704 |
1450 |//-- Math library ------------------------------------------------------- 1705 |//-- Math library -------------------------------------------------------
1451 | 1706 |
1452 |.ffunc_n math_abs 1707 |.ffunc_1 math_abs
1708 | load_farg1 0(BASE)
1709 | sltiu AT, CARG3, LJ_TISNUM
1710 | beqz AT, ->fff_fallback
1711 |. nop
1712 |.if FPU
1453 |. abs.d FRET1, FARG1 1713 |. abs.d FRET1, FARG1
1714 |.else
1715 |. lui TMP1, 0x8000
1716 | and AT, CARG1, TMP1
1717 | move CRET2, CARG2
1718 | beqz AT, ->fff_resn
1719 |. move CRET1, CARG1
1720 | xor CRET1, CARG1, TMP1
1721 |.endif
1722 |
1454 |->fff_resn: 1723 |->fff_resn:
1455 | lw PC, FRAME_PC(BASE) 1724 | lw PC, FRAME_PC(BASE)
1456 | addiu RA, BASE, -8 1725 | addiu RA, BASE, -8
1726 |.if HFABI
1457 | b ->fff_res1 1727 | b ->fff_res1
1458 |. sdc1 FRET1, -8(BASE) 1728 |. sdc1 FRET1, -8(BASE)
1729 |.else
1730 | sw CRET1, -8(BASE)
1731 | b ->fff_res1
1732 |. sw CRET2, -8+4(BASE)
1733 |.endif
1459 | 1734 |
1460 |->fff_restv: 1735 |->fff_restv:
1461 | // CARG3/CARG1 = TValue result. 1736 | // CARG3/CARG1 = TValue result.
@@ -1498,8 +1773,14 @@ static void build_subroutines(BuildCtx *ctx)
1498 | sltiu AT, CARG3, LJ_TISNUM 1773 | sltiu AT, CARG3, LJ_TISNUM
1499 | beqz AT, ->fff_fallback 1774 | beqz AT, ->fff_fallback
1500 |. nop 1775 |. nop
1776 |.if HFABI
1501 | call_extern 1777 | call_extern
1502 |. ldc1 FARG1, 0(BASE) 1778 |. ldc1 FARG1, 0(BASE)
1779 |.else
1780 | lw CARG1, 0(BASE)
1781 | call_extern
1782 |. lw CARG2, 4(BASE)
1783 |.endif
1503 | b ->fff_resn 1784 | b ->fff_resn
1504 |. nop 1785 |. nop
1505 |.endmacro 1786 |.endmacro
@@ -1526,15 +1807,20 @@ static void build_subroutines(BuildCtx *ctx)
1526 | math_round ceil 1807 | math_round ceil
1527 | 1808 |
1528 |.ffunc math_log 1809 |.ffunc math_log
1529 | lw CARG3, HI(BASE)
1530 | li AT, 8 1810 | li AT, 8
1531 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. 1811 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
1532 |. load_got log 1812 |. lw CARG3, HI(BASE)
1533 | sltiu AT, CARG3, LJ_TISNUM 1813 | sltiu AT, CARG3, LJ_TISNUM
1534 | beqz AT, ->fff_fallback 1814 | beqz AT, ->fff_fallback
1535 |. nop 1815 |. load_got log
1816 |.if HFABI
1536 | call_extern 1817 | call_extern
1537 |. ldc1 FARG1, 0(BASE) 1818 |. ldc1 FARG1, 0(BASE)
1819 |.else
1820 | lw CARG1, 0(BASE)
1821 | call_extern
1822 |. lw CARG2, 4(BASE)
1823 |.endif
1538 | b ->fff_resn 1824 | b ->fff_resn
1539 |. nop 1825 |. nop
1540 | 1826 |
@@ -1553,17 +1839,40 @@ static void build_subroutines(BuildCtx *ctx)
1553 | math_extern2 atan2 1839 | math_extern2 atan2
1554 | math_extern2 fmod 1840 | math_extern2 fmod
1555 | 1841 |
1842 |.if FPU
1556 |.ffunc_n math_sqrt 1843 |.ffunc_n math_sqrt
1557 |. sqrt.d FRET1, FARG1 1844 |. sqrt.d FRET1, FARG1
1558 | b ->fff_resn 1845 | b ->fff_resn
1559 |. nop 1846 |. nop
1847 |.else
1848 | math_extern sqrt
1849 |.endif
1560 | 1850 |
1561 |.ffunc_nn math_ldexp 1851 |.ffunc_2 math_ldexp
1852 | sltiu TMP0, CARG3, LJ_TISNUM
1853 | sltiu TMP1, CARG4, LJ_TISNUM
1854 | load_farg1 0(BASE)
1855 | load_farg2 8(BASE)
1856 | and TMP0, TMP0, TMP1
1857 | beqz TMP0, ->fff_fallback
1858 |.if FPU
1859 | load_got ldexp
1562 | trunc.w.d FARG2, FARG2 1860 | trunc.w.d FARG2, FARG2
1861 | call_extern
1862 |. mfc1 CARG3, FARG2
1863 |.else
1864 | sw CARG1, TEMP_SAVE_1
1865 | sw CARG2, TEMP_SAVE_2
1866 | load_got __fixdfsi
1867 | move CARG1, CARG3
1868 | call_extern
1869 |. move CARG2, CARG4
1870 | lw CARG1, TEMP_SAVE_1
1563 | load_got ldexp 1871 | load_got ldexp
1564 | mfc1 CARG3, FARG2 1872 | lw CARG2, TEMP_SAVE_2
1565 | call_extern 1873 | call_extern
1566 |. nop 1874 |. move CARG3, CRET1
1875 |.endif
1567 | b ->fff_resn 1876 | b ->fff_resn
1568 |. nop 1877 |. nop
1569 | 1878 |
@@ -1574,10 +1883,14 @@ static void build_subroutines(BuildCtx *ctx)
1574 |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 1883 |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
1575 | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) 1884 | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH)
1576 | addiu RA, BASE, -8 1885 | addiu RA, BASE, -8
1886 | store_double FRET1, CRET1, CRET2, 0(RA)
1887 |.if FPU
1577 | mtc1 TMP1, FARG2 1888 | mtc1 TMP1, FARG2
1578 | sdc1 FRET1, 0(RA)
1579 | cvt.d.w FARG2, FARG2 1889 | cvt.d.w FARG2, FARG2
1580 | sdc1 FARG2, 8(RA) 1890 |.else
1891 | cvti2d TMP1
1892 |.endif
1893 | store_double FARG2, CRET1, CRET2, 8(RA)
1581 | b ->fff_res 1894 | b ->fff_res
1582 |. li RD, (2+1)*8 1895 |. li RD, (2+1)*8
1583 | 1896 |
@@ -1587,7 +1900,12 @@ static void build_subroutines(BuildCtx *ctx)
1587 | call_extern 1900 | call_extern
1588 |. addiu CARG3, BASE, -8 1901 |. addiu CARG3, BASE, -8
1589 | addiu RA, BASE, -8 1902 | addiu RA, BASE, -8
1903 |.if HFABI
1590 | sdc1 FRET1, 0(BASE) 1904 | sdc1 FRET1, 0(BASE)
1905 |.else
1906 | sw CRET1, 0(BASE)
1907 | sw CRET2, 4(BASE)
1908 |.endif
1591 | b ->fff_res 1909 | b ->fff_res
1592 |. li RD, (2+1)*8 1910 |. li RD, (2+1)*8
1593 | 1911 |
@@ -1595,25 +1913,73 @@ static void build_subroutines(BuildCtx *ctx)
1595 |->ff_ .. name: 1913 |->ff_ .. name:
1596 | lw CARG3, HI(BASE) 1914 | lw CARG3, HI(BASE)
1597 | beqz NARGS8:RC, ->fff_fallback 1915 | beqz NARGS8:RC, ->fff_fallback
1598 |. ldc1 FRET1, 0(BASE) 1916 |. sltiu AT, CARG3, LJ_TISNUM
1599 | sltiu AT, CARG3, LJ_TISNUM
1600 | beqz AT, ->fff_fallback 1917 | beqz AT, ->fff_fallback
1601 |. addu TMP2, BASE, NARGS8:RC 1918 |. addu TMP2, BASE, NARGS8:RC
1602 | addiu TMP1, BASE, 8 1919 | addiu TMP1, BASE, 8
1920 |.if HFABI
1921 | ldc1 FRET1, 0(BASE)
1603 | beq TMP1, TMP2, ->fff_resn 1922 | beq TMP1, TMP2, ->fff_resn
1923 |.else
1924 | lw CRET1, 0(BASE)
1925 | lw CRET2, 4(BASE)
1926 | beq TMP1, TMP2, ->fff_resn
1927 |.endif
1604 |1: 1928 |1:
1605 |. lw CARG3, HI(TMP1) 1929 |. lw CARG3, HI(TMP1)
1930 |.if HFABI
1606 | ldc1 FARG1, 0(TMP1) 1931 | ldc1 FARG1, 0(TMP1)
1607 | addiu TMP1, TMP1, 8 1932 |.else
1933 | lw CARG1, 0(TMP1)
1934 | lw CARG2, 4(TMP1)
1935 |.endif
1608 | sltiu AT, CARG3, LJ_TISNUM 1936 | sltiu AT, CARG3, LJ_TISNUM
1609 | beqz AT, ->fff_fallback 1937 | beqz AT, ->fff_fallback
1938 |. addiu TMP1, TMP1, 8
1939 |.if FPU
1610 |.if ismax 1940 |.if ismax
1611 |. c.olt.d FARG1, FRET1 1941 | c.olt.d FARG1, FRET1
1612 |.else 1942 |.else
1613 |. c.olt.d FRET1, FARG1 1943 | c.olt.d FRET1, FARG1
1614 |.endif 1944 |.endif
1615 | bne TMP1, TMP2, <1 1945 | bne TMP1, TMP2, <1
1616 |. movf.d FRET1, FARG1 1946 |. movf.d FRET1, FARG1
1947 |.else
1948 | load_got __ledf2
1949 | sw TMP1, TEMP_SAVE_1
1950 | sw TMP2, TEMP_SAVE_2
1951 | sw CARG1, TEMP_SAVE_3
1952 | sw CARG2, TEMP_SAVE_4
1953 | sw CRET1, TEMP_SAVE_5
1954 | sw CRET2, TEMP_SAVE_6
1955 | move CARG3, CRET1
1956 | call_extern
1957 |. move CARG4, CRET2
1958 | lw CARG4, TEMP_SAVE_6
1959 | lw CARG3, TEMP_SAVE_5
1960 | lw CARG2, TEMP_SAVE_4
1961 | lw CARG1, TEMP_SAVE_3
1962 | lw TMP2, TEMP_SAVE_2
1963 | lw TMP1, TEMP_SAVE_1
1964 |.if ismax
1965 | beqz CRET1, >2 // farg1==fret1
1966 |. li TMP3, 1
1967 | beq CRET1, TMP3, >2 // farg1>fret1
1968 |. nop
1969 |.else
1970 | blez CRET1, >2
1971 |. nop
1972 |.endif
1973 | move CRET1, CARG3 // Keep the value.
1974 | b >3
1975 |. move CRET2, CARG4
1976 |2:
1977 | move CRET1, CARG1 // Set new value.
1978 | move CRET2, CARG2
1979 |3:
1980 | bne TMP1, TMP2, <1
1981 |. nop
1982 |.endif
1617 | b ->fff_resn 1983 | b ->fff_resn
1618 |. nop 1984 |. nop
1619 |.endmacro 1985 |.endmacro
@@ -1632,32 +1998,52 @@ static void build_subroutines(BuildCtx *ctx)
1632 | bnez AT, ->fff_fallback // Need exactly 1 string argument. 1998 | bnez AT, ->fff_fallback // Need exactly 1 string argument.
1633 |. nop 1999 |. nop
1634 | lw TMP0, STR:CARG1->len 2000 | lw TMP0, STR:CARG1->len
1635 | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1636 | addiu RA, BASE, -8 2001 | addiu RA, BASE, -8
1637 | sltu RD, r0, TMP0 2002 | sltu RD, r0, TMP0
1638 | mtc1 TMP1, f0 2003 | lw PC, FRAME_PC(BASE)
1639 | addiu RD, RD, 1 2004 | addiu RD, RD, 1
2005 | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
2006 |.if FPU
2007 | mtc1 TMP1, f0
1640 | cvt.d.w f0, f0 2008 | cvt.d.w f0, f0
1641 | lw PC, FRAME_PC(BASE) 2009 | sdc1 f0, 0(RA)
1642 | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 2010 |.else
2011 | sw RD, TEMP_SAVE_1
2012 | cvti2d TMP1
2013 | sw CRET1, 0(RA)
2014 | sw CRET2, 4(RA)
2015 | lw RD, TEMP_SAVE_1
2016 |.endif
1643 | b ->fff_res 2017 | b ->fff_res
1644 |. sdc1 f0, 0(RA) 2018 |. sll RD, RD, 3 // RD = ((str->len != 0)+1)*8
1645 | 2019 |
1646 |.ffunc string_char // Only handle the 1-arg case here. 2020 |.ffunc string_char // Only handle the 1-arg case here.
1647 | ffgccheck 2021 | ffgccheck
1648 | lw CARG3, HI(BASE) 2022 | lw CARG3, HI(BASE)
1649 | ldc1 FARG1, 0(BASE)
1650 | li AT, 8 2023 | li AT, 8
1651 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. 2024 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
1652 |. sltiu AT, CARG3, LJ_TISNUM 2025 |. sltiu AT, CARG3, LJ_TISNUM
1653 | beqz AT, ->fff_fallback 2026 | beqz AT, ->fff_fallback
1654 |. li CARG3, 1 2027 |. li CARG3, 1
1655 | trunc.w.d FARG1, FARG1
1656 | addiu CARG2, sp, ARG5_OFS
1657 | sltiu AT, TMP0, 256 2028 | sltiu AT, TMP0, 256
1658 | mfc1 TMP0, FARG1
1659 | beqz AT, ->fff_fallback 2029 | beqz AT, ->fff_fallback
1660 |. sw TMP0, ARG5 2030 | load_farg1 0(BASE)
2031 |.if FPU
2032 | trunc.w.d FARG1, FARG1
2033 | mfc1 TMP0, FARG1
2034 |.else
2035 | load_got __fixdfsi
2036 | sw RB, TEMP_SAVE_1
2037 | sw RC, TEMP_SAVE_2
2038 | call_extern
2039 |. sw CARG3, TEMP_SAVE_3
2040 | lw CARG3, TEMP_SAVE_3
2041 | lw RC, TEMP_SAVE_2
2042 | lw RB, TEMP_SAVE_1
2043 | move TMP0, CRET1
2044 |.endif
2045 | addiu CARG2, sp, ARG5_OFS
2046 | sw TMP0, ARG5
1661 |->fff_newstr: 2047 |->fff_newstr:
1662 | load_got lj_str_new 2048 | load_got lj_str_new
1663 | sw BASE, L->base 2049 | sw BASE, L->base
@@ -1674,27 +2060,52 @@ static void build_subroutines(BuildCtx *ctx)
1674 |.ffunc string_sub 2060 |.ffunc string_sub
1675 | ffgccheck 2061 | ffgccheck
1676 | addiu AT, NARGS8:RC, -16 2062 | addiu AT, NARGS8:RC, -16
2063 |.if FPU
2064 | ldc1 f0, 16(BASE)
2065 | trunc.w.d f0, f0
2066 |.else
2067 | lw CARG1, 16(BASE)
2068 | load_got __fixdfsi
2069 | sw AT, TEMP_SAVE_1
2070 | call_extern
2071 |. lw CARG2, 16+4(BASE)
2072 | lw AT, TEMP_SAVE_1
2073 |.endif
1677 | lw CARG3, 16+HI(BASE) 2074 | lw CARG3, 16+HI(BASE)
1678 | ldc1 f0, 16(BASE)
1679 | lw TMP0, HI(BASE) 2075 | lw TMP0, HI(BASE)
1680 | lw STR:CARG1, LO(BASE) 2076 | lw STR:CARG1, LO(BASE)
1681 | bltz AT, ->fff_fallback 2077 | bltz AT, ->fff_fallback
1682 | lw CARG2, 8+HI(BASE) 2078 |. lw CARG2, 8+HI(BASE)
1683 | ldc1 f2, 8(BASE)
1684 | beqz AT, >1 2079 | beqz AT, >1
1685 |. li CARG4, -1 2080 |. li CARG4, -1
1686 | trunc.w.d f0, f0
1687 | sltiu AT, CARG3, LJ_TISNUM 2081 | sltiu AT, CARG3, LJ_TISNUM
1688 | beqz AT, ->fff_fallback 2082 | beqz AT, ->fff_fallback
2083 |.if FPU
1689 |. mfc1 CARG4, f0 2084 |. mfc1 CARG4, f0
2085 |.else
2086 |. move CARG4, CRET1
2087 |.endif
1690 |1: 2088 |1:
1691 | sltiu AT, CARG2, LJ_TISNUM 2089 | sltiu AT, CARG2, LJ_TISNUM
1692 | beqz AT, ->fff_fallback 2090 | beqz AT, ->fff_fallback
1693 |. li AT, LJ_TSTR 2091 |. li AT, LJ_TSTR
1694 | trunc.w.d f2, f2
1695 | bne TMP0, AT, ->fff_fallback 2092 | bne TMP0, AT, ->fff_fallback
1696 |. lw CARG2, STR:CARG1->len 2093 |.if FPU
2094 |. ldc1 f2, 8(BASE)
2095 | trunc.w.d f2, f2
1697 | mfc1 CARG3, f2 2096 | mfc1 CARG3, f2
2097 |.else
2098 |. sw CARG1, TEMP_SAVE_1
2099 | sw CARG4, TEMP_SAVE_2
2100 | lw CARG2, 8+4(BASE)
2101 | load_got __fixdfsi
2102 | call_extern
2103 |. lw CARG1, 8(BASE)
2104 | lw CARG1, TEMP_SAVE_1
2105 | lw CARG4, TEMP_SAVE_2
2106 | move CARG3, CRET1
2107 |.endif
2108 | lw CARG2, STR:CARG1->len
1698 | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end 2109 | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
1699 | slt AT, CARG4, r0 2110 | slt AT, CARG4, r0
1700 | addiu TMP0, CARG2, 1 2111 | addiu TMP0, CARG2, 1
@@ -1749,10 +2160,58 @@ static void build_subroutines(BuildCtx *ctx)
1749 | 2160 |
1750 |//-- Bit library -------------------------------------------------------- 2161 |//-- Bit library --------------------------------------------------------
1751 | 2162 |
2163 |.if not FPU
2164 |// FP number to bit conversion for soft-float.
2165 |->vm_tobit:
2166 | sll TMP0, CARG1, 1
2167 | lui TMP3, 0x0020
2168 | addu TMP0, TMP0, TMP3
2169 | slt TMP3, TMP0, r0
2170 | movz CARG2, r0, TMP3
2171 | beqz TMP3, >2
2172 |. li CARG4, 0x3e0
2173 | not CARG4, CARG4
2174 | sra TMP0, TMP0, 21
2175 | subu TMP0, CARG4, TMP0
2176 | slt TMP3, TMP0, r0
2177 | bnez TMP3, >1
2178 |. sll CARG4, CARG1, 11
2179 | lui TMP3, 0x8000
2180 | or CARG4, CARG4, TMP3
2181 | srl TMP3, CARG2, 21
2182 | or CARG4, CARG4, TMP3
2183 | slt TMP3, CARG1, r0
2184 | beqz TMP3, >2
2185 |. srlv CARG2, CARG4, TMP0
2186 | subu CARG2, r0, CARG2
2187 |2:
2188 | jr ra
2189 |. move CRET1, CARG2
2190 |1:
2191 | addiu TMP0, TMP0, 21
2192 | srlv CARG4, CARG2, TMP0
2193 | li TMP3, 20
2194 | subu TMP0, TMP3, TMP0
2195 | sll CARG2, CARG1, 12
2196 | sllv TMP3, CARG2, TMP0
2197 | or CARG2, CARG4, TMP3
2198 | slt TMP3, CARG1, r0
2199 | beqz TMP3, <2
2200 |. nop
2201 | jr ra
2202 |. subu CRET1, r0, CARG2
2203 |.endif
2204 |
1752 |.macro .ffunc_bit, name 2205 |.macro .ffunc_bit, name
1753 | .ffunc_n bit_..name 2206 | .ffunc_n bit_..name
2207 |.if FPU
1754 |. add.d FARG1, FARG1, TOBIT 2208 |. add.d FARG1, FARG1, TOBIT
1755 | mfc1 CRET1, FARG1 2209 | mfc1 CRET1, FARG1
2210 |.else
2211 |. nop
2212 | bal ->vm_tobit
2213 |. nop
2214 |.endif
1756 |.endmacro 2215 |.endmacro
1757 | 2216 |
1758 |.macro .ffunc_bit_op, name, ins 2217 |.macro .ffunc_bit_op, name, ins
@@ -1760,14 +2219,27 @@ static void build_subroutines(BuildCtx *ctx)
1760 | addiu TMP1, BASE, 8 2219 | addiu TMP1, BASE, 8
1761 | addu TMP2, BASE, NARGS8:RC 2220 | addu TMP2, BASE, NARGS8:RC
1762 |1: 2221 |1:
2222 | move CRET2, CRET1
1763 | lw CARG4, HI(TMP1) 2223 | lw CARG4, HI(TMP1)
2224 |.if FPU
1764 | beq TMP1, TMP2, ->fff_resi 2225 | beq TMP1, TMP2, ->fff_resi
1765 |. ldc1 FARG1, 0(TMP1) 2226 |. ldc1 FARG1, 0(TMP1)
2227 |.else
2228 | lw CARG1, 0(TMP1)
2229 | beq TMP1, TMP2, ->fff_resi
2230 |. lw CARG2, 4(TMP1)
2231 |.endif
1766 | sltiu AT, CARG4, LJ_TISNUM 2232 | sltiu AT, CARG4, LJ_TISNUM
1767 | beqz AT, ->fff_fallback 2233 | beqz AT, ->fff_fallback
1768 | add.d FARG1, FARG1, TOBIT 2234 |.if FPU
1769 | mfc1 CARG2, FARG1 2235 |. add.d FARG1, FARG1, TOBIT
1770 | ins CRET1, CRET1, CARG2 2236 | mfc1 CRET1, FARG1
2237 |.else
2238 |. nop
2239 | bal ->vm_tobit
2240 |. nop
2241 |.endif
2242 | ins CRET1, CRET2, CRET1
1771 | b <1 2243 | b <1
1772 |. addiu TMP1, TMP1, 8 2244 |. addiu TMP1, TMP1, 8
1773 |.endmacro 2245 |.endmacro
@@ -1794,10 +2266,22 @@ static void build_subroutines(BuildCtx *ctx)
1794 | 2266 |
1795 |.macro .ffunc_bit_sh, name, ins, shmod 2267 |.macro .ffunc_bit_sh, name, ins, shmod
1796 | .ffunc_nn bit_..name 2268 | .ffunc_nn bit_..name
2269 |.if FPU
1797 |. add.d FARG1, FARG1, TOBIT 2270 |. add.d FARG1, FARG1, TOBIT
1798 | add.d FARG2, FARG2, TOBIT 2271 | add.d FARG2, FARG2, TOBIT
1799 | mfc1 CARG1, FARG1 2272 | mfc1 CARG1, FARG1
1800 | mfc1 CARG2, FARG2 2273 | mfc1 CARG2, FARG2
2274 |.else
2275 |. sw CARG4, TEMP_SAVE_1
2276 | bal ->vm_tobit
2277 |. nop
2278 | move CRET2, CRET1
2279 | lw CARG2, TEMP_SAVE_1
2280 | bal ->vm_tobit
2281 |. move CARG1, CARG3
2282 | move CARG2, CRET1
2283 | move CARG1, CRET2
2284 |.endif
1801 |.if shmod == 1 2285 |.if shmod == 1
1802 | li AT, 32 2286 | li AT, 32
1803 | subu TMP0, AT, CARG2 2287 | subu TMP0, AT, CARG2
@@ -1822,9 +2306,19 @@ static void build_subroutines(BuildCtx *ctx)
1822 | 2306 |
1823 |.ffunc_bit tobit 2307 |.ffunc_bit tobit
1824 |->fff_resi: 2308 |->fff_resi:
2309 | lw PC, FRAME_PC(BASE)
2310 | addiu RA, BASE, -8
2311 |.if HFABI
1825 | mtc1 CRET1, FRET1 2312 | mtc1 CRET1, FRET1
1826 | b ->fff_resn 2313 | cvt.d.w FRET1, FRET1
1827 |. cvt.d.w FRET1, FRET1 2314 | b ->fff_res1
2315 |. sdc1 FRET1, -8(BASE)
2316 |.else // Result already in CRET1.
2317 | cvti2d CRET1
2318 | sw CRET1, -8(BASE)
2319 | b ->fff_res1
2320 |. sw CRET2, -8+4(BASE)
2321 |.endif
1828 | 2322 |
1829 |//----------------------------------------------------------------------- 2323 |//-----------------------------------------------------------------------
1830 | 2324 |
@@ -2082,14 +2576,23 @@ static void build_subroutines(BuildCtx *ctx)
2082 |//----------------------------------------------------------------------- 2576 |//-----------------------------------------------------------------------
2083 | 2577 |
2084 |.macro savex_, a, b 2578 |.macro savex_, a, b
2579 |.if FPU
2085 | sdc1 f..a, 16+a*8(sp) 2580 | sdc1 f..a, 16+a*8(sp)
2086 | sw r..a, 16+32*8+a*4(sp) 2581 | sw r..a, 16+32*8+a*4(sp)
2087 | sw r..b, 16+32*8+b*4(sp) 2582 | sw r..b, 16+32*8+b*4(sp)
2583 |.else
2584 | sw r..a, 16+a*4(sp)
2585 | sw r..b, 16+b*4(sp)
2586 |.endif
2088 |.endmacro 2587 |.endmacro
2089 | 2588 |
2090 |->vm_exit_handler: 2589 |->vm_exit_handler:
2091 |.if JIT 2590 |.if JIT
2591 |.if FPU
2092 | addiu sp, sp, -(16+32*8+32*4) 2592 | addiu sp, sp, -(16+32*8+32*4)
2593 |.else
2594 | addiu sp, sp, -(16+32*4)
2595 |.endif
2093 | savex_ 0, 1 2596 | savex_ 0, 1
2094 | savex_ 2, 3 2597 | savex_ 2, 3
2095 | savex_ 4, 5 2598 | savex_ 4, 5
@@ -2104,17 +2607,25 @@ static void build_subroutines(BuildCtx *ctx)
2104 | savex_ 22, 23 2607 | savex_ 22, 23
2105 | savex_ 24, 25 2608 | savex_ 24, 25
2106 | savex_ 26, 27 2609 | savex_ 26, 27
2610 |.if FPU
2107 | sdc1 f28, 16+28*8(sp) 2611 | sdc1 f28, 16+28*8(sp)
2108 | sw r28, 16+32*8+28*4(sp)
2109 | sdc1 f30, 16+30*8(sp) 2612 | sdc1 f30, 16+30*8(sp)
2613 | sw r28, 16+32*8+28*4(sp)
2110 | sw r30, 16+32*8+30*4(sp) 2614 | sw r30, 16+32*8+30*4(sp)
2111 | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP. 2615 | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP.
2616 | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
2617 | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP
2618 |.else
2619 | sw r28, 16+28*4(sp)
2620 | sw r30, 16+30*4(sp)
2621 | sw r0, 16+31*4(sp) // Clear RID_TMP.
2622 | addiu TMP2, sp, 16+32*4 // Recompute original value of sp.
2623 | sw TMP2, 16+29*4(sp) // Store sp in RID_SP
2624 |.endif
2112 | li_vmstate EXIT 2625 | li_vmstate EXIT
2113 | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
2114 | addiu DISPATCH, JGL, -GG_DISP2G-32768 2626 | addiu DISPATCH, JGL, -GG_DISP2G-32768
2115 | lw TMP1, 0(TMP2) // Load exit number. 2627 | lw TMP1, 0(TMP2) // Load exit number.
2116 | st_vmstate 2628 | st_vmstate
2117 | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP.
2118 | lw L, DISPATCH_GL(cur_L)(DISPATCH) 2629 | lw L, DISPATCH_GL(cur_L)(DISPATCH)
2119 | lw BASE, DISPATCH_GL(jit_base)(DISPATCH) 2630 | lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
2120 | load_got lj_trace_exit 2631 | load_got lj_trace_exit
@@ -2144,15 +2655,15 @@ static void build_subroutines(BuildCtx *ctx)
2144 |1: 2655 |1:
2145 | bltz CRET1, >9 // Check for error from exit. 2656 | bltz CRET1, >9 // Check for error from exit.
2146 |. lw LFUNC:RB, FRAME_FUNC(BASE) 2657 |. lw LFUNC:RB, FRAME_FUNC(BASE)
2147 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2658 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2148 | sll MULTRES, CRET1, 3 2659 | sll MULTRES, CRET1, 3
2149 | li TISNIL, LJ_TNIL 2660 | li TISNIL, LJ_TNIL
2150 | sw MULTRES, SAVE_MULTRES 2661 | sw MULTRES, SAVE_MULTRES
2151 | mtc1 TMP3, TOBIT 2662 | .FPU mtc1 TMP3, TOBIT
2152 | lw TMP1, LFUNC:RB->pc 2663 | lw TMP1, LFUNC:RB->pc
2153 | sw r0, DISPATCH_GL(jit_base)(DISPATCH) 2664 | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
2154 | lw KBASE, PC2PROTO(k)(TMP1) 2665 | lw KBASE, PC2PROTO(k)(TMP1)
2155 | cvt.d.s TOBIT, TOBIT 2666 | .FPU cvt.d.s TOBIT, TOBIT
2156 | // Modified copy of ins_next which handles function header dispatch, too. 2667 | // Modified copy of ins_next which handles function header dispatch, too.
2157 | lw INS, 0(PC) 2668 | lw INS, 0(PC)
2158 | addiu PC, PC, 4 2669 | addiu PC, PC, 4
@@ -2160,7 +2671,7 @@ static void build_subroutines(BuildCtx *ctx)
2160 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) 2671 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
2161 | decode_OP4a TMP1, INS 2672 | decode_OP4a TMP1, INS
2162 | decode_OP4b TMP1 2673 | decode_OP4b TMP1
2163 | sltiu TMP2, TMP1, BC_FUNCF*4 // Function header? 2674 | sltiu TMP2, TMP1, BC_FUNCF*4
2164 | addu TMP0, DISPATCH, TMP1 2675 | addu TMP0, DISPATCH, TMP1
2165 | decode_RD8a RD, INS 2676 | decode_RD8a RD, INS
2166 | lw AT, 0(TMP0) 2677 | lw AT, 0(TMP0)
@@ -2202,7 +2713,7 @@ static void build_subroutines(BuildCtx *ctx)
2202 |//----------------------------------------------------------------------- 2713 |//-----------------------------------------------------------------------
2203 | 2714 |
2204 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. 2715 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
2205 |.macro vm_round, func 2716 |.macro vm_round_hf, func
2206 | lui TMP0, 0x4330 // Hiword of 2^52 (double). 2717 | lui TMP0, 0x4330 // Hiword of 2^52 (double).
2207 | mtc1 r0, f4 2718 | mtc1 r0, f4
2208 | mtc1 TMP0, f5 2719 | mtc1 TMP0, f5
@@ -2244,6 +2755,25 @@ static void build_subroutines(BuildCtx *ctx)
2244 |. mov.d FRET1, FARG1 2755 |. mov.d FRET1, FARG1
2245 |.endmacro 2756 |.endmacro
2246 | 2757 |
2758 |.macro vm_round_sf, func
2759 | addiu sp, sp, -8
2760 | load_got func
2761 | sw ra, 0(sp)
2762 | call_extern
2763 |. nop
2764 | lw ra, 0(sp)
2765 | jr ra
2766 |. addiu sp, sp, 8
2767 |.endmacro
2768 |
2769 |.macro vm_round, func
2770 |.if FPU
2771 | vm_round_hf, func
2772 |.else
2773 | vm_round_sf, func
2774 |.endif
2775 |.endmacro
2776 |
2247 |->vm_floor: 2777 |->vm_floor:
2248 | vm_round floor 2778 | vm_round floor
2249 |->vm_ceil: 2779 |->vm_ceil:
@@ -2272,10 +2802,10 @@ static void build_subroutines(BuildCtx *ctx)
2272 | sw r1, CTSTATE->cb.slot 2802 | sw r1, CTSTATE->cb.slot
2273 | sw CARG1, CTSTATE->cb.gpr[0] 2803 | sw CARG1, CTSTATE->cb.gpr[0]
2274 | sw CARG2, CTSTATE->cb.gpr[1] 2804 | sw CARG2, CTSTATE->cb.gpr[1]
2275 | sdc1 FARG1, CTSTATE->cb.fpr[0] 2805 | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0]
2276 | sw CARG3, CTSTATE->cb.gpr[2] 2806 | sw CARG3, CTSTATE->cb.gpr[2]
2277 | sw CARG4, CTSTATE->cb.gpr[3] 2807 | sw CARG4, CTSTATE->cb.gpr[3]
2278 | sdc1 FARG2, CTSTATE->cb.fpr[1] 2808 | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1]
2279 | addiu TMP0, sp, CFRAME_SPACE+16 2809 | addiu TMP0, sp, CFRAME_SPACE+16
2280 | sw TMP0, CTSTATE->cb.stack 2810 | sw TMP0, CTSTATE->cb.stack
2281 | sw r0, SAVE_PC // Any value outside of bytecode is ok. 2811 | sw r0, SAVE_PC // Any value outside of bytecode is ok.
@@ -2286,14 +2816,14 @@ static void build_subroutines(BuildCtx *ctx)
2286 | lw BASE, L:CRET1->base 2816 | lw BASE, L:CRET1->base
2287 | lw RC, L:CRET1->top 2817 | lw RC, L:CRET1->top
2288 | move L, CRET1 2818 | move L, CRET1
2289 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2819 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2290 | lw LFUNC:RB, FRAME_FUNC(BASE) 2820 | lw LFUNC:RB, FRAME_FUNC(BASE)
2291 | mtc1 TMP3, TOBIT 2821 | .FPU mtc1 TMP3, TOBIT
2292 | li_vmstate INTERP 2822 | li_vmstate INTERP
2293 | li TISNIL, LJ_TNIL 2823 | li TISNIL, LJ_TNIL
2294 | subu RC, RC, BASE 2824 | subu RC, RC, BASE
2295 | st_vmstate 2825 | st_vmstate
2296 | cvt.d.s TOBIT, TOBIT 2826 | .FPU cvt.d.s TOBIT, TOBIT
2297 | ins_callt 2827 | ins_callt
2298 |.endif 2828 |.endif
2299 | 2829 |
@@ -2307,11 +2837,11 @@ static void build_subroutines(BuildCtx *ctx)
2307 | move CARG2, RA 2837 | move CARG2, RA
2308 | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) 2838 | call_intern lj_ccallback_leave // (CTState *cts, TValue *o)
2309 |. move CARG1, CTSTATE 2839 |. move CARG1, CTSTATE
2840 | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0]
2310 | lw CRET1, CTSTATE->cb.gpr[0] 2841 | lw CRET1, CTSTATE->cb.gpr[0]
2311 | ldc1 FRET1, CTSTATE->cb.fpr[0] 2842 | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1]
2312 | lw CRET2, CTSTATE->cb.gpr[1]
2313 | b ->vm_leave_unw 2843 | b ->vm_leave_unw
2314 |. ldc1 FRET2, CTSTATE->cb.fpr[1] 2844 |. lw CRET2, CTSTATE->cb.gpr[1]
2315 |.endif 2845 |.endif
2316 | 2846 |
2317 |->vm_ffi_call: // Call C function via FFI. 2847 |->vm_ffi_call: // Call C function via FFI.
@@ -2343,8 +2873,8 @@ static void build_subroutines(BuildCtx *ctx)
2343 | lw CARG2, CCSTATE->gpr[1] 2873 | lw CARG2, CCSTATE->gpr[1]
2344 | lw CARG3, CCSTATE->gpr[2] 2874 | lw CARG3, CCSTATE->gpr[2]
2345 | lw CARG4, CCSTATE->gpr[3] 2875 | lw CARG4, CCSTATE->gpr[3]
2346 | ldc1 FARG1, CCSTATE->fpr[0] 2876 | .FPU ldc1 FARG1, CCSTATE->fpr[0]
2347 | ldc1 FARG2, CCSTATE->fpr[1] 2877 | .FPU ldc1 FARG2, CCSTATE->fpr[1]
2348 | jalr CFUNCADDR 2878 | jalr CFUNCADDR
2349 |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. 2879 |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
2350 | lw CCSTATE:TMP1, -12(r16) 2880 | lw CCSTATE:TMP1, -12(r16)
@@ -2352,8 +2882,10 @@ static void build_subroutines(BuildCtx *ctx)
2352 | lw ra, -4(r16) 2882 | lw ra, -4(r16)
2353 | sw CRET1, CCSTATE:TMP1->gpr[0] 2883 | sw CRET1, CCSTATE:TMP1->gpr[0]
2354 | sw CRET2, CCSTATE:TMP1->gpr[1] 2884 | sw CRET2, CCSTATE:TMP1->gpr[1]
2355 | sdc1 FRET1, CCSTATE:TMP1->fpr[0] 2885 | .FPU sdc1 FRET1, CCSTATE:TMP1->fpr[0]
2356 | sdc1 FRET2, CCSTATE:TMP1->fpr[1] 2886 | .FPU sdc1 FRET2, CCSTATE:TMP1->fpr[1]
2887 | sw CARG1, CCSTATE:TMP1->gpr[2] // MIPS32 soft-float.
2888 | sw CARG2, CCSTATE:TMP1->gpr[3] // Complex doubles are returned in v0, v1, a0, a1.
2357 | move sp, r16 2889 | move sp, r16
2358 | jr ra 2890 | jr ra
2359 |. move r16, TMP2 2891 |. move r16, TMP2
@@ -2381,8 +2913,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2381 | addu CARG3, BASE, RD 2913 | addu CARG3, BASE, RD
2382 | lw TMP0, HI(CARG2) 2914 | lw TMP0, HI(CARG2)
2383 | lw TMP1, HI(CARG3) 2915 | lw TMP1, HI(CARG3)
2384 | ldc1 f0, 0(CARG2)
2385 | ldc1 f2, 0(CARG3)
2386 | sltiu TMP0, TMP0, LJ_TISNUM 2916 | sltiu TMP0, TMP0, LJ_TISNUM
2387 | sltiu TMP1, TMP1, LJ_TISNUM 2917 | sltiu TMP1, TMP1, LJ_TISNUM
2388 | lhu TMP2, OFS_RD(PC) 2918 | lhu TMP2, OFS_RD(PC)
@@ -2390,8 +2920,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2390 | addiu PC, PC, 4 2920 | addiu PC, PC, 4
2391 | beqz TMP0, ->vmeta_comp 2921 | beqz TMP0, ->vmeta_comp
2392 |. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) 2922 |. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535)
2923 | load_double f0, CARG1, CARG2, 0(CARG2)
2924 |.if FPU
2925 | ldc1 f2, 0(CARG3)
2926 |.else
2927 | lw CARG4, 4(CARG3)
2928 | lw CARG3, 0(CARG3)
2929 |.endif
2393 | decode_RD4b TMP2 2930 | decode_RD4b TMP2
2394 | addu TMP2, TMP2, TMP1 2931 | addu TMP2, TMP2, TMP1
2932 |.if FPU
2395 if (op == BC_ISLT || op == BC_ISGE) { 2933 if (op == BC_ISLT || op == BC_ISGE) {
2396 | c.olt.d f0, f2 2934 | c.olt.d f0, f2
2397 } else { 2935 } else {
@@ -2402,8 +2940,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2402 } else { 2940 } else {
2403 | movt TMP2, r0 2941 | movt TMP2, r0
2404 } 2942 }
2405 | addu PC, PC, TMP2 2943 |.else
2944 | load_got __ledf2
2945 | sw RD, TEMP_SAVE_1
2946 | sw TMP1, TEMP_SAVE_2
2947 | call_extern //CRET1 = f0<=f2
2948 |. sw TMP2, TEMP_SAVE_3
2949 | lw TMP2, TEMP_SAVE_3
2950 | lw TMP1, TEMP_SAVE_2
2951 if (op == BC_ISLT) {
2952 | bltz CRET1, >1
2953 } else if (op == BC_ISLE) {
2954 | blez CRET1, >1
2955 } else if (op == BC_ISGT) {
2956 | bgtz CRET1, >1
2957 } else {
2958 | bgez CRET1, >1
2959 }
2960 |. lw RD, TEMP_SAVE_1
2961 | move TMP2, r0
2406 |1: 2962 |1:
2963 |.endif
2964 | addu PC, PC, TMP2
2407 | ins_next 2965 | ins_next
2408 break; 2966 break;
2409 2967
@@ -2413,24 +2971,43 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2413 | addu RA, BASE, RA 2971 | addu RA, BASE, RA
2414 | addiu PC, PC, 4 2972 | addiu PC, PC, 4
2415 | lw TMP0, HI(RA) 2973 | lw TMP0, HI(RA)
2416 | ldc1 f0, 0(RA)
2417 | addu RD, BASE, RD 2974 | addu RD, BASE, RD
2418 | lhu TMP2, -4+OFS_RD(PC) 2975 | lhu TMP2, -4+OFS_RD(PC)
2419 | lw TMP1, HI(RD)
2420 | ldc1 f2, 0(RD)
2421 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 2976 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2977 | lw TMP1, HI(RD)
2978 | decode_RD4b TMP2
2422 | sltiu AT, TMP0, LJ_TISNUM 2979 | sltiu AT, TMP0, LJ_TISNUM
2423 | sltiu CARG1, TMP1, LJ_TISNUM 2980 | sltiu CARG1, TMP1, LJ_TISNUM
2424 | decode_RD4b TMP2 2981 | load_double f2, CARG3, CARG4, 0(RD)
2982 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2425 | and AT, AT, CARG1 2983 | and AT, AT, CARG1
2984 | load_double f0, CARG1, CARG2, 0(RA)
2426 | beqz AT, >5 2985 | beqz AT, >5
2427 |. addu TMP2, TMP2, TMP3 2986 |. addu TMP2, TMP2, TMP3
2987 |.if FPU
2428 | c.eq.d f0, f2 2988 | c.eq.d f0, f2
2429 if (vk) { 2989 if (vk) {
2430 | movf TMP2, r0 2990 | movf TMP2, r0
2431 } else { 2991 } else {
2432 | movt TMP2, r0 2992 | movt TMP2, r0
2433 } 2993 }
2994 |.else
2995 | load_got __ledf2
2996 | sw RD, TEMP_SAVE_1
2997 | call_extern
2998 |. sw TMP2, TEMP_SAVE_2
2999 | lw RD, TEMP_SAVE_1
3000 | lw TMP2, TEMP_SAVE_2
3001 if (vk) {
3002 | beqz CRET1, >4
3003 |. nop
3004 } else {
3005 | bnez CRET1, >4
3006 |. nop
3007 }
3008 | move TMP2, r0
3009 |4:
3010 |.endif
2434 |1: 3011 |1:
2435 | addu PC, PC, TMP2 3012 | addu PC, PC, TMP2
2436 | ins_next 3013 | ins_next
@@ -2507,10 +3084,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2507 | addu RA, BASE, RA 3084 | addu RA, BASE, RA
2508 | addiu PC, PC, 4 3085 | addiu PC, PC, 4
2509 | lw TMP0, HI(RA) 3086 | lw TMP0, HI(RA)
2510 | ldc1 f0, 0(RA) 3087 | load_double f0, CARG1, CARG2, 0(RA)
2511 | addu RD, KBASE, RD 3088 | addu RD, KBASE, RD
2512 | lhu TMP2, -4+OFS_RD(PC) 3089 | lhu TMP2, -4+OFS_RD(PC)
2513 | ldc1 f2, 0(RD) 3090 | load_double f2, CARG3, CARG4, 0(RD)
2514 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3091 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2515 | sltiu AT, TMP0, LJ_TISNUM 3092 | sltiu AT, TMP0, LJ_TISNUM
2516 | decode_RD4b TMP2 3093 | decode_RD4b TMP2
@@ -2520,6 +3097,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2520 | beqz AT, >1 3097 | beqz AT, >1
2521 |.endif 3098 |.endif
2522 |. addu TMP2, TMP2, TMP3 3099 |. addu TMP2, TMP2, TMP3
3100 |.if FPU
2523 | c.eq.d f0, f2 3101 | c.eq.d f0, f2
2524 if (vk) { 3102 if (vk) {
2525 | movf TMP2, r0 3103 | movf TMP2, r0
@@ -2530,6 +3108,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2530 |1: 3108 |1:
2531 | addu PC, PC, TMP2 3109 | addu PC, PC, TMP2
2532 } 3110 }
3111 |.else
3112 | load_got __ledf2
3113 | sw RD, TEMP_SAVE_1
3114 | call_extern
3115 |. sw TMP2, TEMP_SAVE_2
3116 | lw RD, TEMP_SAVE_1
3117 | lw TMP2, TEMP_SAVE_2
3118 if (vk) {
3119 | beqz CRET1, >4
3120 |. nop
3121 | move TMP2, r0
3122 |4:
3123 | addu PC, PC, TMP2
3124 |1:
3125 } else {
3126 | bnez CRET1, >1
3127 |. nop
3128 | move TMP2, r0
3129 |1:
3130 | addu PC, PC, TMP2
3131 }
3132 |.endif
2533 | ins_next 3133 | ins_next
2534 |.if FFI 3134 |.if FFI
2535 |5: 3135 |5:
@@ -2588,7 +3188,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2588 | addu PC, PC, TMP2 3188 | addu PC, PC, TMP2
2589 } else { 3189 } else {
2590 | sltiu TMP0, TMP0, LJ_TISTRUECOND 3190 | sltiu TMP0, TMP0, LJ_TISTRUECOND
2591 | ldc1 f0, 0(RD) 3191 | load_double1 0(RD)
2592 if (op == BC_ISTC) { 3192 if (op == BC_ISTC) {
2593 | beqz TMP0, >1 3193 | beqz TMP0, >1
2594 } else { 3194 } else {
@@ -2598,7 +3198,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2598 | decode_RD4b TMP2 3198 | decode_RD4b TMP2
2599 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3199 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2600 | addu TMP2, TMP2, TMP3 3200 | addu TMP2, TMP2, TMP3
2601 | sdc1 f0, 0(RA) 3201 | store_double1 0(RA)
2602 | addu PC, PC, TMP2 3202 | addu PC, PC, TMP2
2603 |1: 3203 |1:
2604 } 3204 }
@@ -2631,9 +3231,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2631 | // RA = dst*8, RD = src*8 3231 | // RA = dst*8, RD = src*8
2632 | addu RD, BASE, RD 3232 | addu RD, BASE, RD
2633 | addu RA, BASE, RA 3233 | addu RA, BASE, RA
2634 | ldc1 f0, 0(RD) 3234 | load_double1 0(RD)
2635 | ins_next1 3235 | ins_next1
2636 | sdc1 f0, 0(RA) 3236 | store_double1 0(RA)
2637 | ins_next2 3237 | ins_next2
2638 break; 3238 break;
2639 case BC_NOT: 3239 case BC_NOT:
@@ -2653,12 +3253,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2653 | addu CARG3, BASE, RD 3253 | addu CARG3, BASE, RD
2654 | addu RA, BASE, RA 3254 | addu RA, BASE, RA
2655 | lw TMP0, HI(CARG3) 3255 | lw TMP0, HI(CARG3)
2656 | ldc1 f0, 0(CARG3)
2657 | sltiu AT, TMP0, LJ_TISNUM 3256 | sltiu AT, TMP0, LJ_TISNUM
3257 | load_double f0, CARG1, CARG2, 0(CARG3)
3258 |.if FPU
2658 | beqz AT, ->vmeta_unm 3259 | beqz AT, ->vmeta_unm
2659 |. neg.d f0, f0 3260 |. neg.d f0, f0
3261 |.else
3262 | lui TMP1, 0x8000
3263 | xor CRET1, TMP1, CARG1
3264 | beqz AT, ->vmeta_unm
3265 |. move CRET2, CARG2
3266 |.endif
2660 | ins_next1 3267 | ins_next1
2661 | sdc1 f0, 0(RA) 3268 | store_double f0, CRET1, CRET2, 0(RA)
2662 | ins_next2 3269 | ins_next2
2663 break; 3270 break;
2664 case BC_LEN: 3271 case BC_LEN:
@@ -2672,10 +3279,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2672 |. li AT, LJ_TTAB 3279 |. li AT, LJ_TTAB
2673 | lw CRET1, STR:CARG1->len 3280 | lw CRET1, STR:CARG1->len
2674 |1: 3281 |1:
3282 |.if FPU
2675 | mtc1 CRET1, f0 3283 | mtc1 CRET1, f0
2676 | cvt.d.w f0, f0 3284 | cvt.d.w f0, f0
3285 |.else
3286 | cvti2d CRET1
3287 |.endif
2677 | ins_next1 3288 | ins_next1
2678 | sdc1 f0, 0(RA) 3289 | store_double f0, CRET1, CRET2, 0(RA)
2679 | ins_next2 3290 | ins_next2
2680 |2: 3291 |2:
2681 | bne TMP0, AT, ->vmeta_len 3292 | bne TMP0, AT, ->vmeta_len
@@ -2717,72 +3328,142 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2717 | addu CARG3, BASE, RB 3328 | addu CARG3, BASE, RB
2718 | addu CARG4, KBASE, RC 3329 | addu CARG4, KBASE, RC
2719 | lw TMP1, HI(CARG3) 3330 | lw TMP1, HI(CARG3)
2720 | ldc1 f20, 0(CARG3) 3331 | sltiu AT, TMP1, LJ_TISNUM
2721 | ldc1 f22, 0(CARG4) 3332 | load_double f20, CARG1, CARG2, 0(CARG3)
2722 | sltiu AT, TMP1, LJ_TISNUM 3333 | load_double f22, CARG3, CARG4, 0(CARG4)
3334 |.if FPU
3335 | beqz AT, ->vmeta_arith
3336 |.else
3337 | beqz AT, ->vmeta_arith_vn
3338 |.endif
3339 |. addu RA, BASE, RA
2723 || break; 3340 || break;
2724 ||case 1: 3341 ||case 1:
2725 | addu CARG4, BASE, RB 3342 | addu CARG4, BASE, RB
2726 | addu CARG3, KBASE, RC 3343 | addu CARG3, KBASE, RC
2727 | lw TMP1, HI(CARG4) 3344 | lw TMP1, HI(CARG4)
2728 | ldc1 f22, 0(CARG4) 3345 | sltiu AT, TMP1, LJ_TISNUM
2729 | ldc1 f20, 0(CARG3) 3346 | load_double f20, CARG1, CARG2, 0(CARG3)
2730 | sltiu AT, TMP1, LJ_TISNUM 3347 | load_double f22, CARG3, CARG4, 0(CARG4)
3348 |.if FPU
3349 | beqz AT, ->vmeta_arith
3350 |.else
3351 | beqz AT, ->vmeta_arith_nv
3352 |.endif
3353 |. addu RA, BASE, RA
2731 || break; 3354 || break;
2732 ||default: 3355 ||default:
2733 | addu CARG3, BASE, RB 3356 | addu CARG3, BASE, RB
2734 | addu CARG4, BASE, RC 3357 | addu CARG4, BASE, RC
2735 | lw TMP1, HI(CARG3) 3358 | lw TMP1, HI(CARG3)
2736 | lw TMP2, HI(CARG4) 3359 | lw TMP2, HI(CARG4)
2737 | ldc1 f20, 0(CARG3) 3360 | sltiu AT, TMP1, LJ_TISNUM
2738 | ldc1 f22, 0(CARG4) 3361 | sltiu TMP0, TMP2, LJ_TISNUM
2739 | sltiu AT, TMP1, LJ_TISNUM 3362 | and AT, AT, TMP0
2740 | sltiu TMP0, TMP2, LJ_TISNUM 3363 | load_double f20, CARG1, CARG2, 0(CARG3)
2741 | and AT, AT, TMP0 3364 | load_double f22, CARG3, CARG4, 0(CARG4)
3365 |.if FPU
3366 | beqz AT, ->vmeta_arith
3367 |.else
3368 | beqz AT, ->vmeta_arith_vv
3369 |.endif
3370 |. addu RA, BASE, RA
2742 || break; 3371 || break;
2743 ||} 3372 ||}
2744 | beqz AT, ->vmeta_arith
2745 |. addu RA, BASE, RA
2746 |.endmacro 3373 |.endmacro
2747 | 3374 |
3375 |.macro ins_arithfallback
3376 ||switch (vk) {
3377 ||case 0:
3378 | b ->vmeta_arith_vn
3379 |. nop
3380 || break;
3381 ||case 1:
3382 | b ->vmeta_arith_nv
3383 |. nop
3384 || break;
3385 ||default:
3386 | b ->vmeta_arith_vv
3387 |. nop
3388 || break;
3389 ||}
3390 |.endmacro
3391 |
3392 |.if FPU
2748 |.macro fpmod, a, b, c 3393 |.macro fpmod, a, b, c
2749 |->BC_MODVN_Z: 3394 |->BC_MODVN_Z:
2750 | bal ->vm_floor // floor(b/c) 3395 | bal ->vm_floor // floor(b/c)
2751 |. div.d FARG1, b, c 3396 |. div.d FARG1, b, c
2752 | mul.d a, FRET1, c 3397 | mul.d a, FRET1, c
2753 | sub.d a, b, a // b - floor(b/c)*c 3398 | sub.d a, b, a // b - floor(b/c)*c
2754 |.endmacro 3399 |.endmacro
3400 |.else
2755 | 3401 |
2756 |.macro ins_arith, ins 3402 |.macro sfpmod
3403 |->BC_MODVN_Z:
3404 | load_got __divdf3
3405 | sw CARG1, TEMP_SAVE_1
3406 | sw CARG2, TEMP_SAVE_2
3407 | sw CARG3, TEMP_SAVE_3
3408 | call_extern
3409 |. sw CARG4, TEMP_SAVE_4
3410 | move CARG1, CRET1
3411 | bal ->vm_floor
3412 |. move CARG2, CRET2
3413 | load_got __muldf3
3414 | move CARG1, CRET1
3415 | move CARG2, CRET2
3416 | lw CARG3, TEMP_SAVE_3
3417 | call_extern
3418 |. lw CARG4, TEMP_SAVE_4
3419 | load_got __subdf3
3420 | lw CARG1, TEMP_SAVE_1
3421 | lw CARG2, TEMP_SAVE_2
3422 | move CARG3, CRET1
3423 | call_extern
3424 |. move CARG4, CRET2
3425 |.endmacro
3426 |.endif
3427 |
3428 |.macro ins_arith, intins, fpins, fpcall
2757 | ins_arithpre 3429 | ins_arithpre
2758 |.if "ins" == "fpmod_" 3430 |.if "fpins" == "fpmod_"
2759 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3431 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
2760 |. nop 3432 |. nop
2761 |.else 3433 |.else
2762 | ins f0, f20, f22 3434 |.if FPU
3435 | fpins f0, f20, f22
3436 |.else
3437 |.if "fpcall" == "sfpmod"
3438 | sfpmod
3439 |.else
3440 | load_got fpcall
3441 | call_extern
3442 |. nop
3443 |.endif
3444 |.endif
2763 | ins_next1 3445 | ins_next1
2764 | sdc1 f0, 0(RA) 3446 | store_double1 0(RA)
2765 | ins_next2 3447 | ins_next2
2766 |.endif 3448 |.endif
2767 |.endmacro 3449 |.endmacro
2768 3450
2769 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3451 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2770 | ins_arith add.d 3452 | ins_arith addu, add.d, __adddf3
2771 break; 3453 break;
2772 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3454 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2773 | ins_arith sub.d 3455 | ins_arith subu, sub.d, __subdf3
2774 break; 3456 break;
2775 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3457 case BC_MULVN: case BC_MULNV: case BC_MULVV:
2776 | ins_arith mul.d 3458 | ins_arith mult, mul.d, __muldf3
2777 break; 3459 break;
2778 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3460 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
2779 | ins_arith div.d 3461 | ins_arith div, div.d, __divdf3
2780 break; 3462 break;
2781 case BC_MODVN: 3463 case BC_MODVN:
2782 | ins_arith fpmod 3464 | ins_arith modi, fpmod, sfpmod
2783 break;
2784 case BC_MODNV: case BC_MODVV: 3465 case BC_MODNV: case BC_MODVV:
2785 | ins_arith fpmod_ 3466 | ins_arith modi, fpmod_, sfpmod
2786 break; 3467 break;
2787 case BC_POW: 3468 case BC_POW:
2788 | decode_RB8a RB, INS 3469 | decode_RB8a RB, INS
@@ -2792,18 +3473,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2792 | addu CARG4, BASE, RC 3473 | addu CARG4, BASE, RC
2793 | lw TMP1, HI(CARG3) 3474 | lw TMP1, HI(CARG3)
2794 | lw TMP2, HI(CARG4) 3475 | lw TMP2, HI(CARG4)
2795 | ldc1 FARG1, 0(CARG3)
2796 | ldc1 FARG2, 0(CARG4)
2797 | sltiu AT, TMP1, LJ_TISNUM 3476 | sltiu AT, TMP1, LJ_TISNUM
2798 | sltiu TMP0, TMP2, LJ_TISNUM 3477 | sltiu TMP0, TMP2, LJ_TISNUM
2799 | and AT, AT, TMP0 3478 | and AT, AT, TMP0
2800 | load_got pow 3479 | load_got pow
2801 | beqz AT, ->vmeta_arith 3480 | beqz AT, ->vmeta_arith
2802 |. addu RA, BASE, RA 3481 |. addu RA, BASE, RA
3482 | load_farg1 0(CARG3)
3483 | load_farg2 0(CARG4)
2803 | call_extern 3484 | call_extern
2804 |. nop 3485 |. nop
2805 | ins_next1 3486 | ins_next1
3487 |.if HFABI
2806 | sdc1 FRET1, 0(RA) 3488 | sdc1 FRET1, 0(RA)
3489 |.else
3490 | sw CRET1, 0(RA)
3491 | sw CRET2, 4(RA)
3492 |.endif
2807 | ins_next2 3493 | ins_next2
2808 break; 3494 break;
2809 3495
@@ -2826,10 +3512,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2826 | bnez CRET1, ->vmeta_binop 3512 | bnez CRET1, ->vmeta_binop
2827 |. lw BASE, L->base 3513 |. lw BASE, L->base
2828 | addu RB, BASE, MULTRES 3514 | addu RB, BASE, MULTRES
2829 | ldc1 f0, 0(RB) 3515 | load_double1 0(RB)
2830 | addu RA, BASE, RA 3516 | addu RA, BASE, RA
2831 | ins_next1 3517 | ins_next1
2832 | sdc1 f0, 0(RA) // Copy result from RB to RA. 3518 | store_double1 0(RA)
2833 | ins_next2 3519 | ins_next2
2834 break; 3520 break;
2835 3521
@@ -2864,20 +3550,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2864 case BC_KSHORT: 3550 case BC_KSHORT:
2865 | // RA = dst*8, RD = int16_literal*8 3551 | // RA = dst*8, RD = int16_literal*8
2866 | sra RD, INS, 16 3552 | sra RD, INS, 16
2867 | mtc1 RD, f0
2868 | addu RA, BASE, RA 3553 | addu RA, BASE, RA
3554 |.if FPU
3555 | mtc1 RD, f0
2869 | cvt.d.w f0, f0 3556 | cvt.d.w f0, f0
3557 |.else
3558 | cvti2d RD
3559 |.endif
2870 | ins_next1 3560 | ins_next1
2871 | sdc1 f0, 0(RA) 3561 | store_double f0, CRET1, CRET2, 0(RA)
2872 | ins_next2 3562 | ins_next2
2873 break; 3563 break;
2874 case BC_KNUM: 3564 case BC_KNUM:
2875 | // RA = dst*8, RD = num_const*8 3565 | // RA = dst*8, RD = num_const*8
2876 | addu RD, KBASE, RD 3566 | addu RD, KBASE, RD
2877 | addu RA, BASE, RA 3567 | addu RA, BASE, RA
2878 | ldc1 f0, 0(RD) 3568 | load_double1 0(RD)
2879 | ins_next1 3569 | ins_next1
2880 | sdc1 f0, 0(RA) 3570 | store_double1 0(RA)
2881 | ins_next2 3571 | ins_next2
2882 break; 3572 break;
2883 case BC_KPRI: 3573 case BC_KPRI:
@@ -2913,9 +3603,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2913 | lw UPVAL:RB, LFUNC:RD->uvptr 3603 | lw UPVAL:RB, LFUNC:RD->uvptr
2914 | ins_next1 3604 | ins_next1
2915 | lw TMP1, UPVAL:RB->v 3605 | lw TMP1, UPVAL:RB->v
2916 | ldc1 f0, 0(TMP1) 3606 | load_double1 0(TMP1)
2917 | addu RA, BASE, RA 3607 | addu RA, BASE, RA
2918 | sdc1 f0, 0(RA) 3608 | store_double1 0(RA)
2919 | ins_next2 3609 | ins_next2
2920 break; 3610 break;
2921 case BC_USETV: 3611 case BC_USETV:
@@ -2924,14 +3614,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2924 | srl RA, RA, 1 3614 | srl RA, RA, 1
2925 | addu RD, BASE, RD 3615 | addu RD, BASE, RD
2926 | addu RA, RA, LFUNC:RB 3616 | addu RA, RA, LFUNC:RB
2927 | ldc1 f0, 0(RD) 3617 | load_double1 0(RD)
2928 | lw UPVAL:RB, LFUNC:RA->uvptr 3618 | lw UPVAL:RB, LFUNC:RA->uvptr
2929 | lbu TMP3, UPVAL:RB->marked 3619 | lbu TMP3, UPVAL:RB->marked
2930 | lw CARG2, UPVAL:RB->v 3620 | lw CARG2, UPVAL:RB->v
2931 | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 3621 | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
2932 | lbu TMP0, UPVAL:RB->closed 3622 | lbu TMP0, UPVAL:RB->closed
2933 | lw TMP2, HI(RD) 3623 | lw TMP2, HI(RD)
2934 | sdc1 f0, 0(CARG2) 3624 | store_double1 0(CARG2)
2935 | li AT, LJ_GC_BLACK|1 3625 | li AT, LJ_GC_BLACK|1
2936 | or TMP3, TMP3, TMP0 3626 | or TMP3, TMP3, TMP0
2937 | beq TMP3, AT, >2 // Upvalue is closed and black? 3627 | beq TMP3, AT, >2 // Upvalue is closed and black?
@@ -2991,11 +3681,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2991 | srl RA, RA, 1 3681 | srl RA, RA, 1
2992 | addu RD, KBASE, RD 3682 | addu RD, KBASE, RD
2993 | addu RA, RA, LFUNC:RB 3683 | addu RA, RA, LFUNC:RB
2994 | ldc1 f0, 0(RD) 3684 | load_double1 0(RD)
2995 | lw UPVAL:RB, LFUNC:RA->uvptr 3685 | lw UPVAL:RB, LFUNC:RA->uvptr
2996 | ins_next1 3686 | ins_next1
2997 | lw TMP1, UPVAL:RB->v 3687 | lw TMP1, UPVAL:RB->v
2998 | sdc1 f0, 0(TMP1) 3688 | store_double1 0(TMP1)
2999 | ins_next2 3689 | ins_next2
3000 break; 3690 break;
3001 case BC_USETP: 3691 case BC_USETP:
@@ -3126,13 +3816,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3126 | lw TMP2, HI(CARG3) 3816 | lw TMP2, HI(CARG3)
3127 | lw TAB:RB, LO(CARG2) 3817 | lw TAB:RB, LO(CARG2)
3128 | li AT, LJ_TTAB 3818 | li AT, LJ_TTAB
3129 | ldc1 f0, 0(CARG3)
3130 | bne TMP1, AT, ->vmeta_tgetv 3819 | bne TMP1, AT, ->vmeta_tgetv
3131 |. addu RA, BASE, RA 3820 |. addu RA, BASE, RA
3132 | sltiu AT, TMP2, LJ_TISNUM 3821 | sltiu AT, TMP2, LJ_TISNUM
3133 | beqz AT, >5 3822 | beqz AT, >5
3134 |. li AT, LJ_TSTR 3823 |. li AT, LJ_TSTR
3135 | 3824 |.if FPU
3825 | ldc1 f0, 0(CARG3)
3136 | // Convert number key to integer, check for integerness and range. 3826 | // Convert number key to integer, check for integerness and range.
3137 | cvt.w.d f2, f0 3827 | cvt.w.d f2, f0
3138 | lw TMP0, TAB:RB->asize 3828 | lw TMP0, TAB:RB->asize
@@ -3148,9 +3838,51 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3148 | lw TMP0, HI(TMP2) 3838 | lw TMP0, HI(TMP2)
3149 | beq TMP0, TISNIL, >2 3839 | beq TMP0, TISNIL, >2
3150 |. ldc1 f0, 0(TMP2) 3840 |. ldc1 f0, 0(TMP2)
3841 |.else
3842 | sw RB, TEMP_SAVE_1
3843 | sw CARG2, TEMP_SAVE_3
3844 | load_got __fixdfsi
3845 | lw CARG1, 0(CARG3)
3846 | lw CARG2, 4(CARG3)
3847 | call_extern // cvt.w.d f2, f0
3848 |. sw RC, TEMP_SAVE_2
3849 | sw CRET1, TEMP_SAVE_4
3850 | cvti2d CRET1 // cvt.d.w f4, f2
3851 | load_got __ledf2
3852 | lw RC, TEMP_SAVE_2
3853 | addu CARG3, BASE, RC
3854 | lw CARG1, 0(CARG3)
3855 | lw CARG2, 4(CARG3)
3856 | move CARG3, CRET1
3857 | move CARG4, CRET2
3858 | call_extern // c.eq.d f0, f4
3859 |. nop
3860 | lw CARG3, TEMP_SAVE_3
3861 | lw RC, TEMP_SAVE_2
3862 | lw RB, TEMP_SAVE_1
3863 | lw TMP0, TAB:RB->asize
3864 | lw TMP1, TAB:RB->array
3865 | lw TMP2, TEMP_SAVE_4
3866 | lw CARG2, TEMP_SAVE_3 // Restore old CARG2 and CARG3.
3867 | addu CARG3, BASE, RC
3868 | bnez CRET1, >3
3869 |. sltu AT, TMP2, TMP0
3870 | b >4
3871 |. nop
3872 |3:
3873 | move AT, r0
3874 |4:
3875 | sll TMP2, TMP2, 3
3876 | beqz AT, ->vmeta_tgetv // Integer key and in array part?
3877 |. addu TMP2, TMP1, TMP2
3878 | lw TMP0, HI(TMP2)
3879 | lw SFT2, 4(TMP2)
3880 | beq TMP0, TISNIL, >2
3881 |. lw SFT1, 0(TMP2)
3882 |.endif
3151 |1: 3883 |1:
3152 | ins_next1 3884 | ins_next1
3153 | sdc1 f0, 0(RA) 3885 | store_double1 0(RA)
3154 | ins_next2 3886 | ins_next2
3155 | 3887 |
3156 |2: // Check for __index if table value is nil. 3888 |2: // Check for __index if table value is nil.
@@ -3246,10 +3978,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3246 |. addu RC, TMP2, RC 3978 |. addu RC, TMP2, RC
3247 | lw TMP1, HI(RC) 3979 | lw TMP1, HI(RC)
3248 | beq TMP1, TISNIL, >5 3980 | beq TMP1, TISNIL, >5
3249 |. ldc1 f0, 0(RC) 3981 |. nop
3250 |1: 3982 |1:
3983 | load_double1 0(RC)
3251 | ins_next1 3984 | ins_next1
3252 | sdc1 f0, 0(RA) 3985 | store_double1 0(RA)
3253 | ins_next2 3986 | ins_next2
3254 | 3987 |
3255 |5: // Check for __index if table value is nil. 3988 |5: // Check for __index if table value is nil.
@@ -3271,20 +4004,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3271 | addu CARG2, BASE, RB 4004 | addu CARG2, BASE, RB
3272 | addu CARG3, BASE, RC 4005 | addu CARG3, BASE, RC
3273 | lw TAB:CARG1, LO(CARG2) 4006 | lw TAB:CARG1, LO(CARG2)
4007 | lw TMP0, TAB:CARG1->asize
4008 | lw TMP1, TAB:CARG1->array
4009 |.if FPU
3274 | ldc1 f0, 0(CARG3) 4010 | ldc1 f0, 0(CARG3)
3275 | trunc.w.d f2, f0 4011 | trunc.w.d f2, f0
3276 | lw TMP0, TAB:CARG1->asize
3277 | mfc1 CARG2, f2 4012 | mfc1 CARG2, f2
3278 | lw TMP1, TAB:CARG1->array 4013 |.else
4014 | load_got __fixdfsi
4015 | lw CARG1, 0(CARG3)
4016 | call_extern
4017 |. lw CARG2, 4(CARG3)
4018 | move CARG2, CRET1
4019 |.endif
3279 | sltu AT, CARG2, TMP0 4020 | sltu AT, CARG2, TMP0
3280 | sll TMP2, CARG2, 3 4021 | sll TMP2, CARG2, 3
3281 | beqz AT, ->vmeta_tgetr // In array part? 4022 | beqz AT, ->vmeta_tgetr // In array part?
3282 |. addu TMP2, TMP1, TMP2 4023 |. addu TMP2, TMP1, TMP2
3283 | ldc1 f0, 0(TMP2) 4024 | load_double1 0(TMP2)
3284 |->BC_TGETR_Z: 4025 |->BC_TGETR_Z:
3285 | addu RA, BASE, RA 4026 | addu RA, BASE, RA
3286 | ins_next1 4027 | ins_next1
3287 | sdc1 f0, 0(RA) 4028 | store_double1 0(RA)
3288 | ins_next2 4029 | ins_next2
3289 break; 4030 break;
3290 4031
@@ -3299,13 +4040,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3299 | lw TMP2, HI(CARG3) 4040 | lw TMP2, HI(CARG3)
3300 | lw TAB:RB, LO(CARG2) 4041 | lw TAB:RB, LO(CARG2)
3301 | li AT, LJ_TTAB 4042 | li AT, LJ_TTAB
3302 | ldc1 f0, 0(CARG3)
3303 | bne TMP1, AT, ->vmeta_tsetv 4043 | bne TMP1, AT, ->vmeta_tsetv
3304 |. addu RA, BASE, RA 4044 |. addu RA, BASE, RA
3305 | sltiu AT, TMP2, LJ_TISNUM 4045 | sltiu AT, TMP2, LJ_TISNUM
3306 | beqz AT, >5 4046 | beqz AT, >5
3307 |. li AT, LJ_TSTR 4047 |. li AT, LJ_TSTR
3308 | 4048 |.if FPU
4049 | ldc1 f0, 0(CARG3)
3309 | // Convert number key to integer, check for integerness and range. 4050 | // Convert number key to integer, check for integerness and range.
3310 | cvt.w.d f2, f0 4051 | cvt.w.d f2, f0
3311 | lw TMP0, TAB:RB->asize 4052 | lw TMP0, TAB:RB->asize
@@ -3326,6 +4067,52 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3326 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4067 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
3327 | bnez AT, >7 4068 | bnez AT, >7
3328 |. sdc1 f0, 0(TMP1) 4069 |. sdc1 f0, 0(TMP1)
4070 |.else
4071 | sw RB, TEMP_SAVE_1
4072 | sw RC, TEMP_SAVE_2
4073 | sw CARG2, TEMP_SAVE_3
4074 | load_got __fixdfsi
4075 | lw CARG1, 0(CARG3)
4076 | call_extern // cvt.w.d f2, f0
4077 |. lw CARG2, 4(CARG3)
4078 | sw CRET1, TEMP_SAVE_4
4079 | cvti2d CRET1 // cvt.d.w f4, f2
4080 | load_got __ledf2
4081 | lw RC, TEMP_SAVE_2
4082 | addu CARG3, BASE, RC
4083 | lw CARG1, 0(CARG3)
4084 | lw CARG2, 4(CARG3)
4085 | move CARG3, CRET1
4086 | call_extern // c.eq.d f0, f4
4087 |. move CARG4, CRET2
4088 | lw RC, TEMP_SAVE_2
4089 | lw RB, TEMP_SAVE_1
4090 | lw TMP0, TAB:RB->asize
4091 | lw TMP1, TAB:RB->array
4092 | lw TMP2, TEMP_SAVE_4
4093 | lw CARG2, TEMP_SAVE_3 // Restore old CARG2 and CARG3.
4094 | addu CARG3, BASE, RC
4095 | bnez CRET1, >4 // NaN?
4096 |. sltu AT, TMP2, TMP0
4097 | b >6
4098 |. nop
4099 |4:
4100 | move AT, r0
4101 |6:
4102 | sll TMP2, TMP2, 3
4103 | beqz AT, ->vmeta_tsetv // Integer key and in array part?
4104 |. addu TMP1, TMP1, TMP2
4105 | lbu TMP3, TAB:RB->marked
4106 | lw TMP0, HI(TMP1)
4107 | lw SFT1, 0(RA)
4108 | beq TMP0, TISNIL, >3
4109 |. lw SFT2, 4(RA)
4110 |1:
4111 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4112 | sw SFT1, 0(TMP1)
4113 | bnez AT, >7
4114 |. sw SFT2, 4(TMP1)
4115 |.endif
3329 |2: 4116 |2:
3330 | ins_next 4117 | ins_next
3331 | 4118 |
@@ -3374,7 +4161,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3374 | sll TMP1, TMP1, 3 4161 | sll TMP1, TMP1, 3
3375 | subu TMP1, TMP0, TMP1 4162 | subu TMP1, TMP0, TMP1
3376 | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 4163 | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
3377 | ldc1 f20, 0(RA) 4164 | load_double f20, SFT1, SFT2, 0(RA)
3378 |1: 4165 |1:
3379 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) 4166 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
3380 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 4167 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
@@ -3388,8 +4175,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3388 |. lw TAB:TMP0, TAB:RB->metatable 4175 |. lw TAB:TMP0, TAB:RB->metatable
3389 |2: 4176 |2:
3390 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4177 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4178 |.if FPU
3391 | bnez AT, >7 4179 | bnez AT, >7
3392 |. sdc1 f20, NODE:TMP2->val 4180 |. sdc1 f20, NODE:TMP2->val
4181 |.else
4182 | sw SFT1, NODE:TMP2->val.u32.hi
4183 | bnez AT, >7
4184 |. sw SFT2, NODE:TMP2->val.u32.lo
4185 |.endif
3393 |3: 4186 |3:
3394 | ins_next 4187 | ins_next
3395 | 4188 |
@@ -3417,6 +4210,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3417 | beqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check. 4210 | beqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3418 |. li AT, LJ_TSTR 4211 |. li AT, LJ_TSTR
3419 |6: 4212 |6:
4213 |.if not FPU
4214 | sw SFT1, TEMP_SAVE_1
4215 | sw SFT2, TEMP_SAVE_2
4216 |.endif
3420 | load_got lj_tab_newkey 4217 | load_got lj_tab_newkey
3421 | sw STR:RC, LO(CARG3) 4218 | sw STR:RC, LO(CARG3)
3422 | sw AT, HI(CARG3) 4219 | sw AT, HI(CARG3)
@@ -3427,8 +4224,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3427 |. move CARG1, L 4224 |. move CARG1, L
3428 | // Returns TValue *. 4225 | // Returns TValue *.
3429 | lw BASE, L->base 4226 | lw BASE, L->base
4227 |.if FPU
3430 | b <3 // No 2nd write barrier needed. 4228 | b <3 // No 2nd write barrier needed.
3431 |. sdc1 f20, 0(CRET1) 4229 |. sdc1 f20, 0(CRET1)
4230 |.else
4231 | lw SFT2, TEMP_SAVE_1
4232 | lw SFT3, TEMP_SAVE_2
4233 | sw SFT2, 0(CRET1)
4234 | b <3
4235 |. sw SFT3, 4(CRET1)
4236 |.endif
3432 | 4237 |
3433 |7: // Possible table write barrier for the value. Skip valiswhite check. 4238 |7: // Possible table write barrier for the value. Skip valiswhite check.
3434 | barrierback TAB:RB, TMP3, TMP0, <3 4239 | barrierback TAB:RB, TMP3, TMP0, <3
@@ -3453,11 +4258,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3453 | lw TMP1, HI(RC) 4258 | lw TMP1, HI(RC)
3454 | lbu TMP3, TAB:RB->marked 4259 | lbu TMP3, TAB:RB->marked
3455 | beq TMP1, TISNIL, >5 4260 | beq TMP1, TISNIL, >5
3456 |. ldc1 f0, 0(RA)
3457 |1: 4261 |1:
3458 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4262 |. andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4263 | load_double1 0(RA)
4264 |.if FPU
3459 | bnez AT, >7 4265 | bnez AT, >7
3460 |. sdc1 f0, 0(RC) 4266 |. sdc1 f0, 0(RC)
4267 |.else
4268 | sw SFT1, 0(RC)
4269 | bnez AT, >7
4270 |. sw SFT2, 4(RC)
4271 |.endif
3461 |2: 4272 |2:
3462 | ins_next 4273 | ins_next
3463 | 4274 |
@@ -3482,12 +4293,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3482 | decode_RDtoRC8 RC, RD 4293 | decode_RDtoRC8 RC, RD
3483 | addu CARG1, BASE, RB 4294 | addu CARG1, BASE, RB
3484 | addu CARG3, BASE, RC 4295 | addu CARG3, BASE, RC
3485 | lw TAB:CARG2, LO(CARG1) 4296 |.if FPU
3486 | ldc1 f0, 0(CARG3) 4297 | ldc1 f0, 0(CARG3)
3487 | trunc.w.d f2, f0 4298 | trunc.w.d f2, f0
4299 | mfc1 CARG3, f2
4300 |.else
4301 | load_got __fixdfsi
4302 | sw CARG1, TEMP_SAVE_1
4303 | lw CARG1, 0(CARG3)
4304 | call_extern
4305 |. lw CARG2, 4(CARG3)
4306 | lw CARG1, TEMP_SAVE_1
4307 | move CARG3, CRET1
4308 |.endif
4309 | lw TAB:CARG2, LO(CARG1)
3488 | lbu TMP3, TAB:CARG2->marked 4310 | lbu TMP3, TAB:CARG2->marked
3489 | lw TMP0, TAB:CARG2->asize 4311 | lw TMP0, TAB:CARG2->asize
3490 | mfc1 CARG3, f2
3491 | lw TMP1, TAB:CARG2->array 4312 | lw TMP1, TAB:CARG2->array
3492 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4313 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
3493 | bnez AT, >7 4314 | bnez AT, >7
@@ -3495,12 +4316,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3495 |2: 4316 |2:
3496 | sltu AT, CARG3, TMP0 4317 | sltu AT, CARG3, TMP0
3497 | sll TMP2, CARG3, 3 4318 | sll TMP2, CARG3, 3
4319 |.if FPU
3498 | beqz AT, ->vmeta_tsetr // In array part? 4320 | beqz AT, ->vmeta_tsetr // In array part?
3499 |. ldc1 f20, 0(RA) 4321 |. ldc1 f20, 0(RA)
3500 | addu CRET1, TMP1, TMP2 4322 | addu CRET1, TMP1, TMP2
3501 |->BC_TSETR_Z: 4323 |->BC_TSETR_Z:
4324 |.else
4325 | lw TMP0, 0(RA)
4326 | lw TMP3, 4(RA)
4327 | sw TMP0, TEMP_SAVE_1
4328 | beqz AT, ->vmeta_tsetr // In array part?
4329 |. sw TMP3, TEMP_SAVE_2
4330 | addu CRET1, TMP1, TMP2
4331 |->BC_TSETR_Z:
4332 | lw TMP0, TEMP_SAVE_1
4333 | lw TMP3, TEMP_SAVE_2
4334 |.endif
3502 | ins_next1 4335 | ins_next1
3503 | sdc1 f20, 0(CRET1) 4336 | store_double f20, TMP0, TMP3, 0(CRET1)
3504 | ins_next2 4337 | ins_next2
3505 | 4338 |
3506 |7: // Possible table write barrier for the value. Skip valiswhite check. 4339 |7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -3529,10 +4362,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3529 | addu TMP1, TMP1, CARG1 4362 | addu TMP1, TMP1, CARG1
3530 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4363 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
3531 |3: // Copy result slots to table. 4364 |3: // Copy result slots to table.
3532 | ldc1 f0, 0(RA) 4365 | load_double1 0(RA)
3533 | addiu RA, RA, 8 4366 | addiu RA, RA, 8
3534 | sltu AT, RA, TMP2 4367 | sltu AT, RA, TMP2
3535 | sdc1 f0, 0(TMP1) 4368 | store_double1 0(TMP1)
3536 | bnez AT, <3 4369 | bnez AT, <3
3537 |. addiu TMP1, TMP1, 8 4370 |. addiu TMP1, TMP1, 8
3538 | bnez TMP0, >7 4371 | bnez TMP0, >7
@@ -3607,10 +4440,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3607 | beqz NARGS8:RC, >3 4440 | beqz NARGS8:RC, >3
3608 |. move TMP3, NARGS8:RC 4441 |. move TMP3, NARGS8:RC
3609 |2: 4442 |2:
3610 | ldc1 f0, 0(RA) 4443 | load_double1 0(RA)
3611 | addiu RA, RA, 8 4444 | addiu RA, RA, 8
3612 | addiu TMP3, TMP3, -8 4445 | addiu TMP3, TMP3, -8
3613 | sdc1 f0, 0(TMP2) 4446 | store_double1 0(TMP2)
3614 | bnez TMP3, <2 4447 | bnez TMP3, <2
3615 |. addiu TMP2, TMP2, 8 4448 |. addiu TMP2, TMP2, 8
3616 |3: 4449 |3:
@@ -3647,12 +4480,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3647 | li AT, LJ_TFUNC 4480 | li AT, LJ_TFUNC
3648 | lw TMP1, -24+HI(BASE) 4481 | lw TMP1, -24+HI(BASE)
3649 | lw LFUNC:RB, -24+LO(BASE) 4482 | lw LFUNC:RB, -24+LO(BASE)
3650 | ldc1 f2, -8(BASE) 4483 | load_double1 -8(BASE)
3651 | ldc1 f0, -16(BASE) 4484 | load_double2 -16(BASE)
3652 | sw TMP1, HI(BASE) // Copy callable. 4485 | sw TMP1, HI(BASE) // Copy callable.
3653 | sw LFUNC:RB, LO(BASE) 4486 | sw LFUNC:RB, LO(BASE)
3654 | sdc1 f2, 16(BASE) // Copy control var. 4487 | store_double1 16(BASE) // Copy control var.
3655 | sdc1 f0, 8(BASE) // Copy state. 4488 | store_double2 8(BASE) // Copy state.
3656 | addiu BASE, BASE, 8 4489 | addiu BASE, BASE, 8
3657 | bne TMP1, AT, ->vmeta_call 4490 | bne TMP1, AT, ->vmeta_call
3658 |. li NARGS8:RC, 16 // Iterators get 2 arguments. 4491 |. li NARGS8:RC, 16 // Iterators get 2 arguments.
@@ -3676,19 +4509,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3676 |. sll TMP3, RC, 3 4509 |. sll TMP3, RC, 3
3677 | addu TMP3, TMP1, TMP3 4510 | addu TMP3, TMP1, TMP3
3678 | lw TMP2, HI(TMP3) 4511 | lw TMP2, HI(TMP3)
3679 | ldc1 f0, 0(TMP3) 4512 | load_double1 0(TMP3)
4513 |.if FPU
3680 | mtc1 RC, f2 4514 | mtc1 RC, f2
4515 |.else
4516 | move CARG1, RC
4517 |.endif
3681 | lhu RD, -4+OFS_RD(PC) 4518 | lhu RD, -4+OFS_RD(PC)
3682 | beq TMP2, TISNIL, <1 // Skip holes in array part. 4519 | beq TMP2, TISNIL, <1 // Skip holes in array part.
3683 |. addiu RC, RC, 1 4520 |. addiu RC, RC, 1
4521 | store_double1 8(RA)
4522 |.if FPU
3684 | cvt.d.w f2, f2 4523 | cvt.d.w f2, f2
4524 |.else
4525 | load_got __floatsidf
4526 | call_extern
4527 |. nop
4528 |.endif
3685 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 4529 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3686 | sdc1 f0, 8(RA) 4530 | store_double f2, CRET1, CRET2, 0(RA)
3687 | decode_RD4b RD 4531 | decode_RD4b RD
3688 | addu RD, RD, TMP3 4532 | addu RD, RD, TMP3
3689 | sw RC, -8+LO(RA) // Update control var. 4533 | sw RC, -8+LO(RA) // Update control var.
3690 | addu PC, PC, RD 4534 | addu PC, PC, RD
3691 | sdc1 f2, 0(RA)
3692 |3: 4535 |3:
3693 | ins_next 4536 | ins_next
3694 | 4537 |
@@ -3704,17 +4547,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3704 | subu TMP3, TMP3, RB 4547 | subu TMP3, TMP3, RB
3705 | addu NODE:TMP3, TMP3, TMP2 4548 | addu NODE:TMP3, TMP3, TMP2
3706 | lw RB, HI(NODE:TMP3) 4549 | lw RB, HI(NODE:TMP3)
3707 | ldc1 f0, 0(NODE:TMP3) 4550 | load_double1 0(NODE:TMP3)
3708 | lhu RD, -4+OFS_RD(PC) 4551 | lhu RD, -4+OFS_RD(PC)
3709 | beq RB, TISNIL, <6 // Skip holes in hash part. 4552 | beq RB, TISNIL, <6 // Skip holes in hash part.
3710 |. addiu RC, RC, 1 4553 |. addiu RC, RC, 1
4554 |.if FPU
3711 | ldc1 f2, NODE:TMP3->key 4555 | ldc1 f2, NODE:TMP3->key
4556 |.else
4557 | lw SFT3, NODE:TMP3->key.u32.hi
4558 | lw SFT4, NODE:TMP3->key.u32.lo
4559 |.endif
3712 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 4560 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3713 | sdc1 f0, 8(RA) 4561 | store_double1 8(RA)
3714 | addu RC, RC, TMP0 4562 | addu RC, RC, TMP0
3715 | decode_RD4b RD 4563 | decode_RD4b RD
3716 | addu RD, RD, TMP3 4564 | addu RD, RD, TMP3
3717 | sdc1 f2, 0(RA) 4565 | store_double2 0(RA)
3718 | addu PC, PC, RD 4566 | addu PC, PC, RD
3719 | b <3 4567 | b <3
3720 |. sw RC, -8+LO(RA) // Update control var. 4568 |. sw RC, -8+LO(RA) // Update control var.
@@ -3794,9 +4642,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3794 | bnez AT, >7 4642 | bnez AT, >7
3795 |. addiu MULTRES, TMP1, 8 4643 |. addiu MULTRES, TMP1, 8
3796 |6: 4644 |6:
3797 | ldc1 f0, 0(RC) 4645 | load_double1 0(RC)
3798 | addiu RC, RC, 8 4646 | addiu RC, RC, 8
3799 | sdc1 f0, 0(RA) 4647 | store_double1 0(RA)
3800 | sltu AT, RC, TMP3 4648 | sltu AT, RC, TMP3
3801 | bnez AT, <6 // More vararg slots? 4649 | bnez AT, <6 // More vararg slots?
3802 |. addiu RA, RA, 8 4650 |. addiu RA, RA, 8
@@ -3852,10 +4700,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3852 | beqz RC, >3 4700 | beqz RC, >3
3853 |. subu BASE, TMP2, TMP0 4701 |. subu BASE, TMP2, TMP0
3854 |2: 4702 |2:
3855 | ldc1 f0, 0(RA) 4703 | load_double1 0(RA)
3856 | addiu RA, RA, 8 4704 | addiu RA, RA, 8
3857 | addiu RC, RC, -8 4705 | addiu RC, RC, -8
3858 | sdc1 f0, 0(TMP2) 4706 | store_double1 0(TMP2)
3859 | bnez RC, <2 4707 | bnez RC, <2
3860 |. addiu TMP2, TMP2, 8 4708 |. addiu TMP2, TMP2, 8
3861 |3: 4709 |3:
@@ -3896,14 +4744,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3896 | lw INS, -4(PC) 4744 | lw INS, -4(PC)
3897 | addiu TMP2, BASE, -8 4745 | addiu TMP2, BASE, -8
3898 if (op == BC_RET1) { 4746 if (op == BC_RET1) {
3899 | ldc1 f0, 0(RA) 4747 | load_double1 0(RA)
3900 } 4748 }
3901 | decode_RB8a RB, INS 4749 | decode_RB8a RB, INS
3902 | decode_RA8a RA, INS 4750 | decode_RA8a RA, INS
3903 | decode_RB8b RB 4751 | decode_RB8b RB
3904 | decode_RA8b RA 4752 | decode_RA8b RA
3905 if (op == BC_RET1) { 4753 if (op == BC_RET1) {
3906 | sdc1 f0, 0(TMP2) 4754 | store_double1 0(TMP2)
3907 } 4755 }
3908 | subu BASE, TMP2, RA 4756 | subu BASE, TMP2, RA
3909 |5: 4757 |5:
@@ -3928,6 +4776,45 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3928 4776
3929 /* -- Loops and branches ------------------------------------------------ */ 4777 /* -- Loops and branches ------------------------------------------------ */
3930 4778
4779 |.macro cmp_res, gt
4780 |.if gt == 1
4781 |.if FPU
4782 | movf TMP1, r0, 0 // f0>f2: TMP1=0
4783 | movf TMP2, r0, 1 // f2>f0: TMP2=0
4784 |.else
4785 | li SFT2, 1
4786 | bne CRET1, SFT2, >1
4787 |. nop
4788 | b >2
4789 |. move TMP1, r0
4790 |1:
4791 | li SFT2, -1
4792 | bne CRET1, SFT2, >2
4793 |. nop
4794 | move TMP2, r0
4795 |2:
4796 |.endif
4797 |.else
4798 |.if FPU
4799 | movt TMP1, r0, 0 // f0<=f2: TMP1=0
4800 | movt TMP2, r0, 1 // f2<=f0: TMP2=0
4801 |.else
4802 | bltz CRET1, >3 // f0<f2: TMP1=0
4803 |. nop
4804 | beqz CRET1, >2 // f0==f2: TMP1=TMP2=0
4805 |. li SFT2, 1
4806 | bne SFT2, CRET1, >4 // f0>f2: TMP2=0
4807 |. nop
4808 | b >4
4809 |2:
4810 |. move TMP2, r0
4811 |3:
4812 | move TMP1, r0
4813 |4:
4814 |.endif
4815 |.endif
4816 |.endmacro
4817
3931 case BC_FORL: 4818 case BC_FORL:
3932 |.if JIT 4819 |.if JIT
3933 | hotloop 4820 | hotloop
@@ -3946,12 +4833,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3946 vk = (op == BC_IFORL || op == BC_JFORL); 4833 vk = (op == BC_IFORL || op == BC_JFORL);
3947 | addu RA, BASE, RA 4834 | addu RA, BASE, RA
3948 if (vk) { 4835 if (vk) {
4836 |.if FPU
3949 | ldc1 f0, FORL_IDX*8(RA) 4837 | ldc1 f0, FORL_IDX*8(RA)
3950 | ldc1 f4, FORL_STEP*8(RA) 4838 | ldc1 f4, FORL_STEP*8(RA)
3951 | ldc1 f2, FORL_STOP*8(RA) 4839 | ldc1 f2, FORL_STOP*8(RA)
3952 | lw TMP3, FORL_STEP*8+HI(RA) 4840 | lw TMP3, FORL_STEP*8+HI(RA)
3953 | add.d f0, f0, f4 4841 | add.d f0, f0, f4
3954 | sdc1 f0, FORL_IDX*8(RA) 4842 | sdc1 f0, FORL_IDX*8(RA)
4843 |.else
4844 | load_got __adddf3
4845 | load_farg1 FORL_IDX*8(RA)
4846 | load_farg2 FORL_STEP*8(RA)
4847 | call_extern
4848 |. sw RD, TEMP_SAVE_1 //save RD
4849 | sw CRET1, FORL_IDX*8(RA)
4850 | sw CRET2, FORL_IDX*8+4(RA)
4851 | load_farg1 FORL_IDX*8(RA)
4852 | load_farg2 FORL_STOP*8(RA) // f0 and f2
4853 | lw TMP3, FORL_STEP*8+HI(RA)
4854 | lw RD, TEMP_SAVE_1
4855 |.endif
3955 } else { 4856 } else {
3956 | lw TMP1, FORL_IDX*8+HI(RA) 4857 | lw TMP1, FORL_IDX*8+HI(RA)
3957 | lw TMP3, FORL_STEP*8+HI(RA) 4858 | lw TMP3, FORL_STEP*8+HI(RA)
@@ -3961,25 +4862,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3961 | sltiu TMP2, TMP2, LJ_TISNUM 4862 | sltiu TMP2, TMP2, LJ_TISNUM
3962 | and TMP1, TMP1, TMP0 4863 | and TMP1, TMP1, TMP0
3963 | and TMP1, TMP1, TMP2 4864 | and TMP1, TMP1, TMP2
4865 |.if FPU
3964 | ldc1 f0, FORL_IDX*8(RA) 4866 | ldc1 f0, FORL_IDX*8(RA)
3965 | beqz TMP1, ->vmeta_for 4867 | beqz TMP1, ->vmeta_for
3966 |. ldc1 f2, FORL_STOP*8(RA) 4868 |. ldc1 f2, FORL_STOP*8(RA)
4869 |.else
4870 | beqz TMP1, ->vmeta_for
4871 | load_farg1 FORL_IDX*8(RA)
4872 | load_farg2 FORL_STOP*8(RA)
4873 |.endif
3967 } 4874 }
3968 if (op != BC_JFORL) { 4875 if (op != BC_JFORL) {
3969 | srl RD, RD, 1 4876 | srl RD, RD, 1
3970 | lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535) 4877 | lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535)
3971 } 4878 }
4879 | store_double f0, CARG1, CARG2, FORL_EXT*8(RA)
4880 |.if FPU
3972 | c.le.d 0, f0, f2 4881 | c.le.d 0, f0, f2
3973 | c.le.d 1, f2, f0 4882 | c.le.d 1, f2, f0
3974 | sdc1 f0, FORL_EXT*8(RA) 4883 |.else
4884 | sw RD, TEMP_SAVE_1
4885 | load_got __ledf2 // f0<=f2
4886 | call_extern
4887 |. sw TMP0, TEMP_SAVE_2
4888 | lw TMP0, TEMP_SAVE_2
4889 | lw RD, TEMP_SAVE_1
4890 | lw TMP3, FORL_STEP*8+HI(RA) // Restored step.
4891 |.endif
4892 |
3975 if (op == BC_JFORI) { 4893 if (op == BC_JFORI) {
3976 | li TMP1, 1 4894 | li TMP1, 1
3977 | li TMP2, 1 4895 | li TMP2, 1
3978 | addu TMP0, RD, TMP0 4896 | addu TMP0, RD, TMP0
3979 | slt TMP3, TMP3, r0 4897 | slt TMP3, TMP3, r0
3980 | movf TMP1, r0, 0 4898 | cmp_res 1
3981 | addu PC, PC, TMP0 4899 | addu PC, PC, TMP0
3982 | movf TMP2, r0, 1
3983 | lhu RD, -4+OFS_RD(PC) 4900 | lhu RD, -4+OFS_RD(PC)
3984 | movn TMP1, TMP2, TMP3 4901 | movn TMP1, TMP2, TMP3
3985 | bnez TMP1, =>BC_JLOOP 4902 | bnez TMP1, =>BC_JLOOP
@@ -3988,8 +4905,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3988 | li TMP1, 1 4905 | li TMP1, 1
3989 | li TMP2, 1 4906 | li TMP2, 1
3990 | slt TMP3, TMP3, r0 4907 | slt TMP3, TMP3, r0
3991 | movf TMP1, r0, 0 4908 | cmp_res 1
3992 | movf TMP2, r0, 1
3993 | movn TMP1, TMP2, TMP3 4909 | movn TMP1, TMP2, TMP3
3994 | bnez TMP1, =>BC_JLOOP 4910 | bnez TMP1, =>BC_JLOOP
3995 |. nop 4911 |. nop
@@ -3998,11 +4914,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3998 | slt TMP3, TMP3, r0 4914 | slt TMP3, TMP3, r0
3999 | move TMP2, TMP1 4915 | move TMP2, TMP1
4000 if (op == BC_FORI) { 4916 if (op == BC_FORI) {
4001 | movt TMP1, r0, 0 4917 | cmp_res 0
4002 | movt TMP2, r0, 1
4003 } else { 4918 } else {
4004 | movf TMP1, r0, 0 4919 | cmp_res 1
4005 | movf TMP2, r0, 1
4006 } 4920 }
4007 | movn TMP1, TMP2, TMP3 4921 | movn TMP1, TMP2, TMP3
4008 | addu PC, PC, TMP1 4922 | addu PC, PC, TMP1
@@ -4256,8 +5170,10 @@ static void emit_asm_debug(BuildCtx *ctx)
4256 fcofs, CFRAME_SIZE); 5170 fcofs, CFRAME_SIZE);
4257 for (i = 23; i >= 16; i--) 5171 for (i = 23; i >= 16; i--)
4258 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); 5172 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
5173#if !LJ_SOFTFP
4259 for (i = 30; i >= 20; i -= 2) 5174 for (i = 30; i >= 20; i -= 2)
4260 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); 5175 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
5176#endif
4261 fprintf(ctx->fp, 5177 fprintf(ctx->fp,
4262 "\t.align 2\n" 5178 "\t.align 2\n"
4263 ".LEFDE0:\n\n"); 5179 ".LEFDE0:\n\n");
@@ -4275,6 +5191,7 @@ static void emit_asm_debug(BuildCtx *ctx)
4275 "\t.align 2\n" 5191 "\t.align 2\n"
4276 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); 5192 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
4277#endif 5193#endif
5194#if !LJ_NO_UNWIND
4278 fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n"); 5195 fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n");
4279 fprintf(ctx->fp, 5196 fprintf(ctx->fp,
4280 "\t.globl lj_err_unwind_dwarf\n" 5197 "\t.globl lj_err_unwind_dwarf\n"
@@ -4343,6 +5260,7 @@ static void emit_asm_debug(BuildCtx *ctx)
4343 "\t.align 2\n" 5260 "\t.align 2\n"
4344 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); 5261 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
4345#endif 5262#endif
5263#endif
4346 break; 5264 break;
4347 default: 5265 default:
4348 break; 5266 break;