diff options
author | Mike Pall <mike> | 2015-12-17 22:42:20 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2015-12-17 22:42:20 +0100 |
commit | 3f5c72421e282a2a4d8614064f13097678b80be1 (patch) | |
tree | ca2ddfad89bad2085b2c0660d5eed67a8d218c8d | |
parent | 126e55d416ad10dc9265593b73b9f322dbf9d658 (diff) | |
download | luajit-3f5c72421e282a2a4d8614064f13097678b80be1.tar.gz luajit-3f5c72421e282a2a4d8614064f13097678b80be1.tar.bz2 luajit-3f5c72421e282a2a4d8614064f13097678b80be1.zip |
MIPS soft-float, part 1: Add soft-float support to interpreter.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
Sponsored by Cisco Systems, Inc.
-rw-r--r-- | src/lj_arch.h | 10 | ||||
-rw-r--r-- | src/lj_dispatch.h | 18 | ||||
-rw-r--r-- | src/lj_frame.h | 11 | ||||
-rw-r--r-- | src/lj_ircall.h | 16 | ||||
-rw-r--r-- | src/lj_vm.h | 2 | ||||
-rw-r--r-- | src/vm_mips.dasc | 1356 |
6 files changed, 1189 insertions, 224 deletions
diff --git a/src/lj_arch.h b/src/lj_arch.h index c66a11c8..a114bdda 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h | |||
@@ -304,6 +304,13 @@ | |||
304 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ | 304 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ |
305 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE | 305 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE |
306 | 306 | ||
307 | #if !defined(LJ_ARCH_HASFPU) && defined(__mips_soft_float) | ||
308 | #define LJ_ARCH_HASFPU 0 | ||
309 | #endif | ||
310 | #if !defined(LJ_ABI_SOFTFP) && defined(__mips_soft_float) | ||
311 | #define LJ_ABI_SOFTFP 1 | ||
312 | #endif | ||
313 | |||
307 | #if _MIPS_ARCH_MIPS32R2 | 314 | #if _MIPS_ARCH_MIPS32R2 |
308 | #define LJ_ARCH_VERSION 20 | 315 | #define LJ_ARCH_VERSION 20 |
309 | #else | 316 | #else |
@@ -386,9 +393,6 @@ | |||
386 | #error "No support for PPC/e500 anymore (use LuaJIT 2.0)" | 393 | #error "No support for PPC/e500 anymore (use LuaJIT 2.0)" |
387 | #endif | 394 | #endif |
388 | #elif LJ_TARGET_MIPS | 395 | #elif LJ_TARGET_MIPS |
389 | #if defined(__mips_soft_float) | ||
390 | #error "No support for MIPS CPUs without FPU" | ||
391 | #endif | ||
392 | #if defined(_LP64) | 396 | #if defined(_LP64) |
393 | #error "No support for MIPS64" | 397 | #error "No support for MIPS64" |
394 | #endif | 398 | #endif |
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index 1e247e38..73d00ec0 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h | |||
@@ -14,6 +14,21 @@ | |||
14 | 14 | ||
15 | #if LJ_TARGET_MIPS | 15 | #if LJ_TARGET_MIPS |
16 | /* Need our own global offset table for the dreaded MIPS calling conventions. */ | 16 | /* Need our own global offset table for the dreaded MIPS calling conventions. */ |
17 | #if LJ_SOFTFP | ||
18 | extern double __adddf3(double a, double b); | ||
19 | extern double __subdf3(double a, double b); | ||
20 | extern double __muldf3(double a, double b); | ||
21 | extern double __divdf3(double a, double b); | ||
22 | extern void __ledf2(double a, double b); | ||
23 | extern double __floatsidf(int32_t a); | ||
24 | extern int32_t __fixdfsi(double a); | ||
25 | |||
26 | #define SFGOTDEF(_) \ | ||
27 | _(lj_num2bit) _(sqrt) _(__adddf3) _(__subdf3) _(__muldf3) _(__divdf3) _(__ledf2) \ | ||
28 | _(__floatsidf) _(__fixdfsi) | ||
29 | #else | ||
30 | #define SFGOTDEF(_) | ||
31 | #endif | ||
17 | #if LJ_HASJIT | 32 | #if LJ_HASJIT |
18 | #define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot) | 33 | #define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot) |
19 | #else | 34 | #else |
@@ -39,7 +54,8 @@ | |||
39 | _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \ | 54 | _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \ |
40 | _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ | 55 | _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ |
41 | _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \ | 56 | _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \ |
42 | _(lj_buf_putstr_upper) _(lj_buf_tostr) JITGOTDEF(_) FFIGOTDEF(_) | 57 | _(lj_buf_putstr_upper) _(lj_buf_tostr) \ |
58 | JITGOTDEF(_) FFIGOTDEF(_) SFGOTDEF(_) | ||
43 | 59 | ||
44 | enum { | 60 | enum { |
45 | #define GOTENUM(name) LJ_GOT_##name, | 61 | #define GOTENUM(name) LJ_GOT_##name, |
diff --git a/src/lj_frame.h b/src/lj_frame.h index a86c36be..aa3ab20b 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h | |||
@@ -218,6 +218,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ | |||
218 | #define CFRAME_SHIFT_MULTRES 3 | 218 | #define CFRAME_SHIFT_MULTRES 3 |
219 | #endif | 219 | #endif |
220 | #elif LJ_TARGET_MIPS | 220 | #elif LJ_TARGET_MIPS |
221 | #if LJ_ARCH_HASFPU | ||
221 | #define CFRAME_OFS_ERRF 124 | 222 | #define CFRAME_OFS_ERRF 124 |
222 | #define CFRAME_OFS_NRES 120 | 223 | #define CFRAME_OFS_NRES 120 |
223 | #define CFRAME_OFS_PREV 116 | 224 | #define CFRAME_OFS_PREV 116 |
@@ -227,6 +228,16 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ | |||
227 | #define CFRAME_SIZE 112 | 228 | #define CFRAME_SIZE 112 |
228 | #define CFRAME_SHIFT_MULTRES 3 | 229 | #define CFRAME_SHIFT_MULTRES 3 |
229 | #else | 230 | #else |
231 | #define CFRAME_OFS_ERRF 100 | ||
232 | #define CFRAME_OFS_NRES 96 | ||
233 | #define CFRAME_OFS_PREV 92 | ||
234 | #define CFRAME_OFS_L 88 | ||
235 | #define CFRAME_OFS_PC 44 | ||
236 | #define CFRAME_OFS_MULTRES 16 | ||
237 | #define CFRAME_SIZE 88 | ||
238 | #define CFRAME_SHIFT_MULTRES 3 | ||
239 | #endif | ||
240 | #else | ||
230 | #error "Missing CFRAME_* definitions for this architecture" | 241 | #error "Missing CFRAME_* definitions for this architecture" |
231 | #endif | 242 | #endif |
232 | 243 | ||
diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 84e41ecf..1f44b03d 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h | |||
@@ -270,6 +270,22 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; | |||
270 | #define fp64_f2l __aeabi_f2lz | 270 | #define fp64_f2l __aeabi_f2lz |
271 | #define fp64_f2ul __aeabi_f2ulz | 271 | #define fp64_f2ul __aeabi_f2ulz |
272 | #endif | 272 | #endif |
273 | #elif LJ_TARGET_MIPS | ||
274 | #define softfp_add __adddf3 | ||
275 | #define softfp_sub __subdf3 | ||
276 | #define softfp_mul __muldf3 | ||
277 | #define softfp_div __divdf3 | ||
278 | #define softfp_cmp __ledf2 | ||
279 | #define softfp_i2d __floatsidf | ||
280 | #define softfp_d2i __fixdfsi | ||
281 | #define softfp_ui2d __floatunsidf | ||
282 | #define softfp_f2d __extendsfdf2 | ||
283 | #define softfp_d2ui __fixunsdfsi | ||
284 | #define softfp_d2f __truncdfsf2 | ||
285 | #define softfp_i2f __floatsisf | ||
286 | #define softfp_ui2f __floatunsisf | ||
287 | #define softfp_f2i __fixsfsi | ||
288 | #define softfp_f2ui __fixunssfsi | ||
273 | #else | 289 | #else |
274 | #error "Missing soft-float definitions for target architecture" | 290 | #error "Missing soft-float definitions for target architecture" |
275 | #endif | 291 | #endif |
diff --git a/src/lj_vm.h b/src/lj_vm.h index b31e22f7..cb76d7a7 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h | |||
@@ -50,7 +50,7 @@ LJ_ASMF void lj_vm_exit_handler(void); | |||
50 | LJ_ASMF void lj_vm_exit_interp(void); | 50 | LJ_ASMF void lj_vm_exit_interp(void); |
51 | 51 | ||
52 | /* Internal math helper functions. */ | 52 | /* Internal math helper functions. */ |
53 | #if LJ_TARGET_PPC || LJ_TARGET_ARM64 | 53 | #if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP) |
54 | #define lj_vm_floor floor | 54 | #define lj_vm_floor floor |
55 | #define lj_vm_ceil ceil | 55 | #define lj_vm_ceil ceil |
56 | #else | 56 | #else |
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 134ed569..0dba1293 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc | |||
@@ -1,6 +1,9 @@ | |||
1 | |// Low-level VM code for MIPS CPUs. | 1 | |// Low-level VM code for MIPS CPUs. |
2 | |// Bytecode interpreter, fast functions and helper functions. | 2 | |// Bytecode interpreter, fast functions and helper functions. |
3 | |// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h | 3 | |// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h |
4 | |// | ||
5 | |// MIPS soft-float support contributed by Djordje Kovacevic and | ||
6 | |// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc. | ||
4 | | | 7 | | |
5 | |.arch mips | 8 | |.arch mips |
6 | |.section code_op, code_sub | 9 | |.section code_op, code_sub |
@@ -18,6 +21,12 @@ | |||
18 | |// Fixed register assignments for the interpreter. | 21 | |// Fixed register assignments for the interpreter. |
19 | |// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra | 22 | |// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra |
20 | | | 23 | | |
24 | |.macro .FPU, a, b | ||
25 | |.if FPU | ||
26 | | a, b | ||
27 | |.endif | ||
28 | |.endmacro | ||
29 | | | ||
21 | |// The following must be C callee-save (but BASE is often refetched). | 30 | |// The following must be C callee-save (but BASE is often refetched). |
22 | |.define BASE, r16 // Base of current Lua stack frame. | 31 | |.define BASE, r16 // Base of current Lua stack frame. |
23 | |.define KBASE, r17 // Constants of current Lua function. | 32 | |.define KBASE, r17 // Constants of current Lua function. |
@@ -31,7 +40,9 @@ | |||
31 | | | 40 | | |
32 | |// Constants for type-comparisons, stores and conversions. C callee-save. | 41 | |// Constants for type-comparisons, stores and conversions. C callee-save. |
33 | |.define TISNIL, r30 | 42 | |.define TISNIL, r30 |
43 | |.if FPU | ||
34 | |.define TOBIT, f30 // 2^52 + 2^51. | 44 | |.define TOBIT, f30 // 2^52 + 2^51. |
45 | |.endif | ||
35 | | | 46 | | |
36 | |// The following temporaries are not saved across C calls, except for RA. | 47 | |// The following temporaries are not saved across C calls, except for RA. |
37 | |.define RA, r23 // Callee-save. | 48 | |.define RA, r23 // Callee-save. |
@@ -46,6 +57,13 @@ | |||
46 | |.define TMP2, r14 | 57 | |.define TMP2, r14 |
47 | |.define TMP3, r15 | 58 | |.define TMP3, r15 |
48 | | | 59 | | |
60 | |.if not FPU | ||
61 | |.define SFT1, r2 | ||
62 | |.define SFT2, r3 | ||
63 | |.define SFT3, r4 | ||
64 | |.define SFT4, r5 | ||
65 | |.endif | ||
66 | | | ||
49 | |// Calling conventions. | 67 | |// Calling conventions. |
50 | |.define CFUNCADDR, r25 | 68 | |.define CFUNCADDR, r25 |
51 | |.define CARG1, r4 | 69 | |.define CARG1, r4 |
@@ -56,13 +74,16 @@ | |||
56 | |.define CRET1, r2 | 74 | |.define CRET1, r2 |
57 | |.define CRET2, r3 | 75 | |.define CRET2, r3 |
58 | | | 76 | | |
77 | |.if FPU | ||
59 | |.define FARG1, f12 | 78 | |.define FARG1, f12 |
60 | |.define FARG2, f14 | 79 | |.define FARG2, f14 |
61 | | | 80 | | |
62 | |.define FRET1, f0 | 81 | |.define FRET1, f0 |
63 | |.define FRET2, f2 | 82 | |.define FRET2, f2 |
83 | |.endif | ||
64 | | | 84 | | |
65 | |// Stack layout while in interpreter. Must match with lj_frame.h. | 85 | |// Stack layout while in interpreter. Must match with lj_frame.h. |
86 | |.if FPU // MIPS32 hard-float. | ||
66 | |.define CFRAME_SPACE, 112 // Delta for sp. | 87 | |.define CFRAME_SPACE, 112 // Delta for sp. |
67 | | | 88 | | |
68 | |.define SAVE_ERRF, 124(sp) // 32 bit C frame info. | 89 | |.define SAVE_ERRF, 124(sp) // 32 bit C frame info. |
@@ -83,43 +104,76 @@ | |||
83 | |.define ARG5_OFS, 16 | 104 | |.define ARG5_OFS, 16 |
84 | |.define SAVE_MULTRES, ARG5 | 105 | |.define SAVE_MULTRES, ARG5 |
85 | | | 106 | | |
107 | |//----------------------------------------------------------------------- | ||
108 | |.else // MIPS32 soft-float. | ||
109 | | | ||
110 | |.define CFRAME_SPACE, 88 // Delta for sp. | ||
111 | | | ||
112 | |.define SAVE_ERRF, 100(sp) // 32 bit C frame info. | ||
113 | |.define SAVE_NRES, 96(sp) | ||
114 | |.define SAVE_CFRAME, 92(sp) | ||
115 | |.define SAVE_L, 88(sp) | ||
116 | |//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. | ||
117 | |.define SAVE_GPR_, 48 // .. 48+10*4: 32 bit GPR saves. | ||
118 | |.define SAVE_PC, 44(sp) | ||
119 | |.define TEMP_SAVE_6, 40(sp) | ||
120 | |.define TEMP_SAVE_5, 36(sp) | ||
121 | |.define TEMP_SAVE_4, 32(sp) | ||
122 | |.define TEMP_SAVE_3, 28(sp) | ||
123 | |.define TEMP_SAVE_2, 24(sp) | ||
124 | |.define TEMP_SAVE_1, 20(sp) | ||
125 | |//----- 8 byte aligned, ^^^^ 24 byte register save area, owned by caller. | ||
126 | |.define ARG5, 16(sp) | ||
127 | |.define CSAVE_4, 12(sp) | ||
128 | |.define CSAVE_3, 8(sp) | ||
129 | |.define CSAVE_2, 4(sp) | ||
130 | |.define CSAVE_1, 0(sp) | ||
131 | |//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by callee. | ||
132 | | | ||
133 | |.define ARG5_OFS, 16 | ||
134 | |.define SAVE_MULTRES, ARG5 | ||
135 | | | ||
136 | |.endif | ||
137 | | | ||
138 | |//----------------------------------------------------------------------- | ||
139 | | | ||
86 | |.macro saveregs | 140 | |.macro saveregs |
87 | | addiu sp, sp, -CFRAME_SPACE | 141 | | addiu sp, sp, -CFRAME_SPACE |
88 | | sw ra, SAVE_GPR_+9*4(sp) | 142 | | sw ra, SAVE_GPR_+9*4(sp) |
89 | | sw r30, SAVE_GPR_+8*4(sp) | 143 | | sw r30, SAVE_GPR_+8*4(sp) |
90 | | sdc1 f30, SAVE_FPR_+5*8(sp) | 144 | | .FPU sdc1 f30, SAVE_FPR_+5*8(sp) |
91 | | sw r23, SAVE_GPR_+7*4(sp) | 145 | | sw r23, SAVE_GPR_+7*4(sp) |
92 | | sw r22, SAVE_GPR_+6*4(sp) | 146 | | sw r22, SAVE_GPR_+6*4(sp) |
93 | | sdc1 f28, SAVE_FPR_+4*8(sp) | 147 | | .FPU sdc1 f28, SAVE_FPR_+4*8(sp) |
94 | | sw r21, SAVE_GPR_+5*4(sp) | 148 | | sw r21, SAVE_GPR_+5*4(sp) |
95 | | sw r20, SAVE_GPR_+4*4(sp) | 149 | | sw r20, SAVE_GPR_+4*4(sp) |
96 | | sdc1 f26, SAVE_FPR_+3*8(sp) | 150 | | .FPU sdc1 f26, SAVE_FPR_+3*8(sp) |
97 | | sw r19, SAVE_GPR_+3*4(sp) | 151 | | sw r19, SAVE_GPR_+3*4(sp) |
98 | | sw r18, SAVE_GPR_+2*4(sp) | 152 | | sw r18, SAVE_GPR_+2*4(sp) |
99 | | sdc1 f24, SAVE_FPR_+2*8(sp) | 153 | | .FPU sdc1 f24, SAVE_FPR_+2*8(sp) |
100 | | sw r17, SAVE_GPR_+1*4(sp) | 154 | | sw r17, SAVE_GPR_+1*4(sp) |
101 | | sw r16, SAVE_GPR_+0*4(sp) | 155 | | sw r16, SAVE_GPR_+0*4(sp) |
102 | | sdc1 f22, SAVE_FPR_+1*8(sp) | 156 | | .FPU sdc1 f22, SAVE_FPR_+1*8(sp) |
103 | | sdc1 f20, SAVE_FPR_+0*8(sp) | 157 | | .FPU sdc1 f20, SAVE_FPR_+0*8(sp) |
104 | |.endmacro | 158 | |.endmacro |
105 | | | 159 | | |
106 | |.macro restoreregs_ret | 160 | |.macro restoreregs_ret |
107 | | lw ra, SAVE_GPR_+9*4(sp) | 161 | | lw ra, SAVE_GPR_+9*4(sp) |
108 | | lw r30, SAVE_GPR_+8*4(sp) | 162 | | lw r30, SAVE_GPR_+8*4(sp) |
109 | | ldc1 f30, SAVE_FPR_+5*8(sp) | 163 | | .FPU ldc1 f30, SAVE_FPR_+5*8(sp) |
110 | | lw r23, SAVE_GPR_+7*4(sp) | 164 | | lw r23, SAVE_GPR_+7*4(sp) |
111 | | lw r22, SAVE_GPR_+6*4(sp) | 165 | | lw r22, SAVE_GPR_+6*4(sp) |
112 | | ldc1 f28, SAVE_FPR_+4*8(sp) | 166 | | .FPU ldc1 f28, SAVE_FPR_+4*8(sp) |
113 | | lw r21, SAVE_GPR_+5*4(sp) | 167 | | lw r21, SAVE_GPR_+5*4(sp) |
114 | | lw r20, SAVE_GPR_+4*4(sp) | 168 | | lw r20, SAVE_GPR_+4*4(sp) |
115 | | ldc1 f26, SAVE_FPR_+3*8(sp) | 169 | | .FPU ldc1 f26, SAVE_FPR_+3*8(sp) |
116 | | lw r19, SAVE_GPR_+3*4(sp) | 170 | | lw r19, SAVE_GPR_+3*4(sp) |
117 | | lw r18, SAVE_GPR_+2*4(sp) | 171 | | lw r18, SAVE_GPR_+2*4(sp) |
118 | | ldc1 f24, SAVE_FPR_+2*8(sp) | 172 | | .FPU ldc1 f24, SAVE_FPR_+2*8(sp) |
119 | | lw r17, SAVE_GPR_+1*4(sp) | 173 | | lw r17, SAVE_GPR_+1*4(sp) |
120 | | lw r16, SAVE_GPR_+0*4(sp) | 174 | | lw r16, SAVE_GPR_+0*4(sp) |
121 | | ldc1 f22, SAVE_FPR_+1*8(sp) | 175 | | .FPU ldc1 f22, SAVE_FPR_+1*8(sp) |
122 | | ldc1 f20, SAVE_FPR_+0*8(sp) | 176 | | .FPU ldc1 f20, SAVE_FPR_+0*8(sp) |
123 | | jr ra | 177 | | jr ra |
124 | | addiu sp, sp, CFRAME_SPACE | 178 | | addiu sp, sp, CFRAME_SPACE |
125 | |.endmacro | 179 | |.endmacro |
@@ -270,6 +324,61 @@ | |||
270 | |.macro call_extern; jalr CFUNCADDR; .endmacro | 324 | |.macro call_extern; jalr CFUNCADDR; .endmacro |
271 | |.macro jmp_extern; jr CFUNCADDR; .endmacro | 325 | |.macro jmp_extern; jr CFUNCADDR; .endmacro |
272 | | | 326 | | |
327 | |// Converts int from given reg to double, result in CRET1 and CRET2 regs. | ||
328 | |.if not FPU | ||
329 | |.macro cvti2d, arg | ||
330 | | load_got __floatsidf | ||
331 | | call_extern | ||
332 | |. move CARG1, arg | ||
333 | |.endmacro | ||
334 | |.endif | ||
335 | | | ||
336 | |// Loads a double-word floating-point value. | ||
337 | |.macro load_double, fpr, gpr1, gpr2, src | ||
338 | |.if FPU | ||
339 | | ldc1 fpr, src | ||
340 | |.else | ||
341 | | lw gpr1, src | ||
342 | | lw gpr2, 4+src | ||
343 | |.endif | ||
344 | |.endmacro | ||
345 | | | ||
346 | |// Stores a double-word floating-point value. | ||
347 | |.macro store_double, fpr, gpr1, gpr2, dst | ||
348 | |.if FPU | ||
349 | | sdc1 fpr, dst | ||
350 | |.else | ||
351 | | sw gpr1, dst | ||
352 | | sw gpr2, 4+dst | ||
353 | |.endif | ||
354 | |.endmacro | ||
355 | | | ||
356 | |// Loads the first double-word floating-point argument. | ||
357 | |.macro load_farg1, src | ||
358 | | load_double FARG1, CARG1, CARG2, src | ||
359 | |.endmacro | ||
360 | | | ||
361 | |// Loads the second double-word floating-point argument. | ||
362 | |.macro load_farg2, src | ||
363 | | load_double FARG2, CARG3, CARG4, src | ||
364 | |.endmacro | ||
365 | | | ||
366 | |.macro load_double1, src | ||
367 | | load_double f0, SFT1, SFT2, src | ||
368 | |.endmacro | ||
369 | | | ||
370 | |.macro store_double1, dst | ||
371 | | store_double f0, SFT1, SFT2, dst | ||
372 | |.endmacro | ||
373 | | | ||
374 | |.macro load_double2, src | ||
375 | | load_double f2, SFT3, SFT4, src | ||
376 | |.endmacro | ||
377 | | | ||
378 | |.macro store_double2, dst | ||
379 | | store_double f2, SFT3, SFT4, dst | ||
380 | |.endmacro | ||
381 | | | ||
273 | |.macro hotcheck, delta, target | 382 | |.macro hotcheck, delta, target |
274 | | srl TMP1, PC, 1 | 383 | | srl TMP1, PC, 1 |
275 | | andi TMP1, TMP1, 126 | 384 | | andi TMP1, TMP1, 126 |
@@ -354,9 +463,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
354 | |. sll TMP2, TMP2, 3 | 463 | |. sll TMP2, TMP2, 3 |
355 | |1: | 464 | |1: |
356 | | addiu TMP1, TMP1, -8 | 465 | | addiu TMP1, TMP1, -8 |
357 | | ldc1 f0, 0(RA) | 466 | | load_double1 0(RA) |
358 | | addiu RA, RA, 8 | 467 | | addiu RA, RA, 8 |
359 | | sdc1 f0, 0(BASE) | 468 | | store_double1 0(BASE) |
360 | | bnez TMP1, <1 | 469 | | bnez TMP1, <1 |
361 | |. addiu BASE, BASE, 8 | 470 | |. addiu BASE, BASE, 8 |
362 | | | 471 | | |
@@ -425,15 +534,15 @@ static void build_subroutines(BuildCtx *ctx) | |||
425 | | and sp, CARG1, AT | 534 | | and sp, CARG1, AT |
426 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. | 535 | |->vm_unwind_ff_eh: // Landing pad for external unwinder. |
427 | | lw L, SAVE_L | 536 | | lw L, SAVE_L |
428 | | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 537 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
429 | | li TISNIL, LJ_TNIL | 538 | | li TISNIL, LJ_TNIL |
430 | | lw BASE, L->base | 539 | | lw BASE, L->base |
431 | | lw DISPATCH, L->glref // Setup pointer to dispatch table. | 540 | | lw DISPATCH, L->glref // Setup pointer to dispatch table. |
432 | | mtc1 TMP3, TOBIT | 541 | | .FPU mtc1 TMP3, TOBIT |
433 | | li TMP1, LJ_TFALSE | 542 | | li TMP1, LJ_TFALSE |
434 | | li_vmstate INTERP | 543 | | li_vmstate INTERP |
435 | | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame. | 544 | | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame. |
436 | | cvt.d.s TOBIT, TOBIT | 545 | | .FPU cvt.d.s TOBIT, TOBIT |
437 | | addiu RA, BASE, -8 // Results start at BASE-8. | 546 | | addiu RA, BASE, -8 // Results start at BASE-8. |
438 | | addiu DISPATCH, DISPATCH, GG_G2DISP | 547 | | addiu DISPATCH, DISPATCH, GG_G2DISP |
439 | | sw TMP1, HI(RA) // Prepend false to error message. | 548 | | sw TMP1, HI(RA) // Prepend false to error message. |
@@ -498,11 +607,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
498 | | lw BASE, L->base | 607 | | lw BASE, L->base |
499 | | lw TMP1, L->top | 608 | | lw TMP1, L->top |
500 | | lw PC, FRAME_PC(BASE) | 609 | | lw PC, FRAME_PC(BASE) |
501 | | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 610 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
502 | | subu RD, TMP1, BASE | 611 | | subu RD, TMP1, BASE |
503 | | mtc1 TMP3, TOBIT | 612 | | .FPU mtc1 TMP3, TOBIT |
504 | | sb r0, L->status | 613 | | sb r0, L->status |
505 | | cvt.d.s TOBIT, TOBIT | 614 | | .FPU cvt.d.s TOBIT, TOBIT |
506 | | li_vmstate INTERP | 615 | | li_vmstate INTERP |
507 | | addiu RD, RD, 8 | 616 | | addiu RD, RD, 8 |
508 | | st_vmstate | 617 | | st_vmstate |
@@ -540,13 +649,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
540 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). | 649 | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). |
541 | | sw L, DISPATCH_GL(cur_L)(DISPATCH) | 650 | | sw L, DISPATCH_GL(cur_L)(DISPATCH) |
542 | | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). | 651 | | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). |
543 | | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 652 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
544 | | lw TMP1, L->top | 653 | | lw TMP1, L->top |
545 | | mtc1 TMP3, TOBIT | 654 | | .FPU mtc1 TMP3, TOBIT |
546 | | addu PC, PC, BASE | 655 | | addu PC, PC, BASE |
547 | | subu NARGS8:RC, TMP1, BASE | 656 | | subu NARGS8:RC, TMP1, BASE |
548 | | subu PC, PC, TMP2 // PC = frame delta + frame type | 657 | | subu PC, PC, TMP2 // PC = frame delta + frame type |
549 | | cvt.d.s TOBIT, TOBIT | 658 | | .FPU cvt.d.s TOBIT, TOBIT |
550 | | li_vmstate INTERP | 659 | | li_vmstate INTERP |
551 | | li TISNIL, LJ_TNIL | 660 | | li TISNIL, LJ_TNIL |
552 | | st_vmstate | 661 | | st_vmstate |
@@ -628,7 +737,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
628 | |->cont_cat: // RA = resultptr, RB = meta base | 737 | |->cont_cat: // RA = resultptr, RB = meta base |
629 | | lw INS, -4(PC) | 738 | | lw INS, -4(PC) |
630 | | addiu CARG2, RB, -16 | 739 | | addiu CARG2, RB, -16 |
631 | | ldc1 f0, 0(RA) | 740 | | load_double1 0(RA) |
632 | | decode_RB8a MULTRES, INS | 741 | | decode_RB8a MULTRES, INS |
633 | | decode_RA8a RA, INS | 742 | | decode_RA8a RA, INS |
634 | | decode_RB8b MULTRES | 743 | | decode_RB8b MULTRES |
@@ -636,11 +745,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
636 | | addu TMP1, BASE, MULTRES | 745 | | addu TMP1, BASE, MULTRES |
637 | | sw BASE, L->base | 746 | | sw BASE, L->base |
638 | | subu CARG3, CARG2, TMP1 | 747 | | subu CARG3, CARG2, TMP1 |
748 | |.if FPU | ||
639 | | bne TMP1, CARG2, ->BC_CAT_Z | 749 | | bne TMP1, CARG2, ->BC_CAT_Z |
640 | |. sdc1 f0, 0(CARG2) | 750 | |. sdc1 f0, 0(CARG2) |
641 | | addu RA, BASE, RA | 751 | | addu RA, BASE, RA |
642 | | b ->cont_nop | 752 | | b ->cont_nop |
643 | |. sdc1 f0, 0(RA) | 753 | |. sdc1 f0, 0(RA) |
754 | |.else | ||
755 | | sw SFT1, 0(CARG2) | ||
756 | | bne TMP1, CARG2, ->BC_CAT_Z | ||
757 | |. sw SFT2, 4(CARG2) | ||
758 | | addu RA, BASE, RA | ||
759 | | sw SFT1, 0(RA) | ||
760 | | b ->cont_nop | ||
761 | |. sw SFT2, 4(RA) | ||
762 | |.endif | ||
644 | | | 763 | | |
645 | |//-- Table indexing metamethods ----------------------------------------- | 764 | |//-- Table indexing metamethods ----------------------------------------- |
646 | | | 765 | | |
@@ -663,10 +782,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
663 | |. sw TMP1, HI(CARG3) | 782 | |. sw TMP1, HI(CARG3) |
664 | | | 783 | | |
665 | |->vmeta_tgetb: // TMP0 = index | 784 | |->vmeta_tgetb: // TMP0 = index |
785 | |.if FPU | ||
666 | | mtc1 TMP0, f0 | 786 | | mtc1 TMP0, f0 |
667 | | cvt.d.w f0, f0 | 787 | | cvt.d.w f0, f0 |
668 | | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | 788 | | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) |
669 | | sdc1 f0, 0(CARG3) | 789 | | sdc1 f0, 0(CARG3) |
790 | |.else | ||
791 | | sw CARG2, TEMP_SAVE_1 //needed to be saved because it's used later in lj_meta_tget | ||
792 | | cvti2d TMP0 | ||
793 | | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | ||
794 | | sw CRET1, 0(CARG3) | ||
795 | | sw CRET2, 4(CARG3) | ||
796 | | lw CARG2, TEMP_SAVE_1 | ||
797 | |.endif | ||
670 | | | 798 | | |
671 | |->vmeta_tgetv: | 799 | |->vmeta_tgetv: |
672 | |1: | 800 | |1: |
@@ -678,9 +806,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
678 | | // Returns TValue * (finished) or NULL (metamethod). | 806 | | // Returns TValue * (finished) or NULL (metamethod). |
679 | | beqz CRET1, >3 | 807 | | beqz CRET1, >3 |
680 | |. addiu TMP1, BASE, -FRAME_CONT | 808 | |. addiu TMP1, BASE, -FRAME_CONT |
681 | | ldc1 f0, 0(CRET1) | 809 | | load_double2 0(CRET1) |
682 | | ins_next1 | 810 | | ins_next1 |
683 | | sdc1 f0, 0(RA) | 811 | | store_double2 0(RA) |
684 | | ins_next2 | 812 | | ins_next2 |
685 | | | 813 | | |
686 | |3: // Call __index metamethod. | 814 | |3: // Call __index metamethod. |
@@ -699,8 +827,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
699 | | // Returns cTValue * or NULL. | 827 | | // Returns cTValue * or NULL. |
700 | | beqz CRET1, >1 | 828 | | beqz CRET1, >1 |
701 | |. nop | 829 | |. nop |
830 | |.if FPU | ||
702 | | b ->BC_TGETR_Z | 831 | | b ->BC_TGETR_Z |
703 | |. ldc1 f0, 0(CRET1) | 832 | |. ldc1 f0, 0(CRET1) |
833 | |.else | ||
834 | | lw SFT1, 0(CRET1) | ||
835 | | b ->BC_TGETR_Z | ||
836 | |. lw SFT2, 4(CRET1) | ||
837 | |.endif | ||
704 | | | 838 | | |
705 | |//----------------------------------------------------------------------- | 839 | |//----------------------------------------------------------------------- |
706 | | | 840 | | |
@@ -723,10 +857,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
723 | |. sw TMP1, HI(CARG3) | 857 | |. sw TMP1, HI(CARG3) |
724 | | | 858 | | |
725 | |->vmeta_tsetb: // TMP0 = index | 859 | |->vmeta_tsetb: // TMP0 = index |
860 | |.if FPU | ||
726 | | mtc1 TMP0, f0 | 861 | | mtc1 TMP0, f0 |
727 | | cvt.d.w f0, f0 | 862 | | cvt.d.w f0, f0 |
728 | | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | 863 | | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) |
729 | | sdc1 f0, 0(CARG3) | 864 | | sdc1 f0, 0(CARG3) |
865 | |.else | ||
866 | | sw CARG2, TEMP_SAVE_1 | ||
867 | | cvti2d TMP0 | ||
868 | | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | ||
869 | | sw CRET1, 0(CARG3) | ||
870 | | sw CRET2, 4(CARG3) | ||
871 | | lw CARG2, TEMP_SAVE_1 | ||
872 | |.endif | ||
730 | | | 873 | | |
731 | |->vmeta_tsetv: | 874 | |->vmeta_tsetv: |
732 | |1: | 875 | |1: |
@@ -736,11 +879,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
736 | | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) | 879 | | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) |
737 | |. move CARG1, L | 880 | |. move CARG1, L |
738 | | // Returns TValue * (finished) or NULL (metamethod). | 881 | | // Returns TValue * (finished) or NULL (metamethod). |
882 | |.if FPU | ||
739 | | beqz CRET1, >3 | 883 | | beqz CRET1, >3 |
740 | |. ldc1 f0, 0(RA) | 884 | |. ldc1 f2, 0(RA) |
885 | |.else | ||
886 | | lw SFT3, 0(RA) | ||
887 | | beqz CRET1, >3 | ||
888 | |. lw SFT4, 4(RA) | ||
889 | |.endif | ||
741 | | // NOBARRIER: lj_meta_tset ensures the table is not black. | 890 | | // NOBARRIER: lj_meta_tset ensures the table is not black. |
742 | | ins_next1 | 891 | | ins_next1 |
743 | | sdc1 f0, 0(CRET1) | 892 | | store_double2 0(CRET1) |
744 | | ins_next2 | 893 | | ins_next2 |
745 | | | 894 | | |
746 | |3: // Call __newindex metamethod. | 895 | |3: // Call __newindex metamethod. |
@@ -750,7 +899,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
750 | | sw PC, -16+HI(BASE) // [cont|PC] | 899 | | sw PC, -16+HI(BASE) // [cont|PC] |
751 | | subu PC, BASE, TMP1 | 900 | | subu PC, BASE, TMP1 |
752 | | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | 901 | | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. |
753 | | sdc1 f0, 16(BASE) // Copy value to third argument. | 902 | | store_double2 16(BASE) // Copy value to third argument. |
754 | | b ->vm_call_dispatch_f | 903 | | b ->vm_call_dispatch_f |
755 | |. li NARGS8:RC, 24 // 3 args for func(t, k, v) | 904 | |. li NARGS8:RC, 24 // 3 args for func(t, k, v) |
756 | | | 905 | | |
@@ -793,11 +942,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
793 | | | 942 | | |
794 | |->cont_ra: // RA = resultptr | 943 | |->cont_ra: // RA = resultptr |
795 | | lbu TMP1, -4+OFS_RA(PC) | 944 | | lbu TMP1, -4+OFS_RA(PC) |
796 | | ldc1 f0, 0(RA) | 945 | | load_double1 0(RA) |
797 | | sll TMP1, TMP1, 3 | 946 | | sll TMP1, TMP1, 3 |
798 | | addu TMP1, BASE, TMP1 | 947 | | addu TMP1, BASE, TMP1 |
948 | |.if FPU | ||
799 | | b ->cont_nop | 949 | | b ->cont_nop |
800 | |. sdc1 f0, 0(TMP1) | 950 | |. sdc1 f0, 0(TMP1) |
951 | |.else | ||
952 | | sw SFT1, 0(TMP1) | ||
953 | | b ->cont_nop | ||
954 | |. sw SFT2, 4(TMP1) | ||
955 | |.endif | ||
801 | | | 956 | | |
802 | |->cont_condt: // RA = resultptr | 957 | |->cont_condt: // RA = resultptr |
803 | | lw TMP0, HI(RA) | 958 | | lw TMP0, HI(RA) |
@@ -852,7 +1007,22 @@ static void build_subroutines(BuildCtx *ctx) | |||
852 | |//-- Arithmetic metamethods --------------------------------------------- | 1007 | |//-- Arithmetic metamethods --------------------------------------------- |
853 | | | 1008 | | |
854 | |->vmeta_unm: | 1009 | |->vmeta_unm: |
855 | | move CARG4, CARG3 | 1010 | | b ->vmeta_arith |
1011 | |. move CARG4, CARG3 | ||
1012 | | | ||
1013 | |->vmeta_arith_vn: | ||
1014 | | addu CARG3, BASE, RB | ||
1015 | | b ->vmeta_arith | ||
1016 | |. addu CARG4, KBASE, RC | ||
1017 | | | ||
1018 | |->vmeta_arith_nv: | ||
1019 | | addu CARG4, BASE, RB | ||
1020 | | b ->vmeta_arith | ||
1021 | |. addu CARG3, KBASE, RC | ||
1022 | | | ||
1023 | |->vmeta_arith_vv: | ||
1024 | | addu CARG3, BASE, RB | ||
1025 | | addu CARG4, BASE, RC | ||
856 | | | 1026 | | |
857 | |->vmeta_arith: | 1027 | |->vmeta_arith: |
858 | | load_got lj_meta_arith | 1028 | | load_got lj_meta_arith |
@@ -985,9 +1155,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
985 | |.macro .ffunc_n, name // Caveat: has delay slot! | 1155 | |.macro .ffunc_n, name // Caveat: has delay slot! |
986 | |->ff_ .. name: | 1156 | |->ff_ .. name: |
987 | | lw CARG3, HI(BASE) | 1157 | | lw CARG3, HI(BASE) |
1158 | | load_farg1 0(BASE) | ||
988 | | beqz NARGS8:RC, ->fff_fallback | 1159 | | beqz NARGS8:RC, ->fff_fallback |
989 | |. ldc1 FARG1, 0(BASE) | 1160 | |. sltiu AT, CARG3, LJ_TISNUM |
990 | | sltiu AT, CARG3, LJ_TISNUM | ||
991 | | beqz AT, ->fff_fallback | 1161 | | beqz AT, ->fff_fallback |
992 | |.endmacro | 1162 | |.endmacro |
993 | | | 1163 | | |
@@ -997,10 +1167,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
997 | | lw CARG3, HI(BASE) | 1167 | | lw CARG3, HI(BASE) |
998 | | bnez AT, ->fff_fallback | 1168 | | bnez AT, ->fff_fallback |
999 | |. lw CARG4, 8+HI(BASE) | 1169 | |. lw CARG4, 8+HI(BASE) |
1000 | | ldc1 FARG1, 0(BASE) | ||
1001 | | ldc1 FARG2, 8(BASE) | ||
1002 | | sltiu TMP0, CARG3, LJ_TISNUM | 1170 | | sltiu TMP0, CARG3, LJ_TISNUM |
1003 | | sltiu TMP1, CARG4, LJ_TISNUM | 1171 | | sltiu TMP1, CARG4, LJ_TISNUM |
1172 | | load_farg1 0(BASE) | ||
1173 | | load_farg2 8(BASE) | ||
1004 | | and TMP0, TMP0, TMP1 | 1174 | | and TMP0, TMP0, TMP1 |
1005 | | beqz TMP0, ->fff_fallback | 1175 | | beqz TMP0, ->fff_fallback |
1006 | |.endmacro | 1176 | |.endmacro |
@@ -1027,8 +1197,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
1027 | | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. | 1197 | | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. |
1028 | |. sw CARG1, LO(RA) | 1198 | |. sw CARG1, LO(RA) |
1029 | |1: | 1199 | |1: |
1030 | | ldc1 f0, 0(TMP1) | 1200 | | load_double1 0(TMP1) |
1031 | | sdc1 f0, -8(TMP1) | 1201 | | store_double1 -8(TMP1) |
1032 | | bne TMP1, TMP2, <1 | 1202 | | bne TMP1, TMP2, <1 |
1033 | |. addiu TMP1, TMP1, 8 | 1203 | |. addiu TMP1, TMP1, 8 |
1034 | | b ->fff_res | 1204 | | b ->fff_res |
@@ -1043,8 +1213,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
1043 | | not TMP1, TMP1 | 1213 | | not TMP1, TMP1 |
1044 | | sll TMP1, TMP1, 3 | 1214 | | sll TMP1, TMP1, 3 |
1045 | | addu TMP1, CFUNC:RB, TMP1 | 1215 | | addu TMP1, CFUNC:RB, TMP1 |
1216 | |.if HFABI | ||
1046 | | b ->fff_resn | 1217 | | b ->fff_resn |
1047 | |. ldc1 FRET1, CFUNC:TMP1->upvalue | 1218 | |. ldc1 FRET1, CFUNC:TMP1->upvalue |
1219 | |.else | ||
1220 | | lw CRET1, CFUNC:TMP1->upvalue[0].u32.hi | ||
1221 | | b ->fff_resn | ||
1222 | |. lw CRET2, CFUNC:TMP1->upvalue[0].u32.lo | ||
1223 | |.endif | ||
1048 | | | 1224 | | |
1049 | |//-- Base library: getters and setters --------------------------------- | 1225 | |//-- Base library: getters and setters --------------------------------- |
1050 | | | 1226 | | |
@@ -1125,8 +1301,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
1125 | | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | 1301 | | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) |
1126 | |. move CARG1, L | 1302 | |. move CARG1, L |
1127 | | // Returns cTValue *. | 1303 | | // Returns cTValue *. |
1304 | |.if HFABI | ||
1128 | | b ->fff_resn | 1305 | | b ->fff_resn |
1129 | |. ldc1 FRET1, 0(CRET1) | 1306 | |. ldc1 FRET1, 0(CRET1) |
1307 | |.else | ||
1308 | | lw CRET2, 4(CRET1) | ||
1309 | | b ->fff_resn | ||
1310 | |. lw CRET1, 0(CRET1) | ||
1311 | |.endif | ||
1130 | | | 1312 | | |
1131 | |//-- Base library: conversions ------------------------------------------ | 1313 | |//-- Base library: conversions ------------------------------------------ |
1132 | | | 1314 | | |
@@ -1136,8 +1318,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
1136 | | xori AT, NARGS8:RC, 8 | 1318 | | xori AT, NARGS8:RC, 8 |
1137 | | sltiu CARG1, CARG1, LJ_TISNUM | 1319 | | sltiu CARG1, CARG1, LJ_TISNUM |
1138 | | movn CARG1, r0, AT | 1320 | | movn CARG1, r0, AT |
1321 | |.if HFABI | ||
1139 | | beqz CARG1, ->fff_fallback // Exactly one number argument. | 1322 | | beqz CARG1, ->fff_fallback // Exactly one number argument. |
1140 | |. ldc1 FRET1, 0(BASE) | 1323 | |. ldc1 FRET1, 0(BASE) |
1324 | |.else | ||
1325 | | lw CRET1, 0(BASE) | ||
1326 | | beqz CARG1, ->fff_fallback // Exactly one number argument. | ||
1327 | |. lw CRET2, 4(BASE) | ||
1328 | |.endif | ||
1141 | | b ->fff_resn | 1329 | | b ->fff_resn |
1142 | |. nop | 1330 | |. nop |
1143 | | | 1331 | | |
@@ -1185,13 +1373,13 @@ static void build_subroutines(BuildCtx *ctx) | |||
1185 | | // Returns 0 at end of traversal. | 1373 | | // Returns 0 at end of traversal. |
1186 | | beqz CRET1, ->fff_restv // End of traversal: return nil. | 1374 | | beqz CRET1, ->fff_restv // End of traversal: return nil. |
1187 | |. li CARG3, LJ_TNIL | 1375 | |. li CARG3, LJ_TNIL |
1188 | | ldc1 f0, 8(BASE) // Copy key and value to results. | 1376 | | load_double1 8(BASE) |
1189 | | addiu RA, BASE, -8 | 1377 | | addiu RA, BASE, -8 |
1190 | | ldc1 f2, 16(BASE) | 1378 | | load_double2 16(BASE) |
1191 | | li RD, (2+1)*8 | 1379 | | store_double1 0(RA) |
1192 | | sdc1 f0, 0(RA) | 1380 | | store_double2 8(RA) |
1193 | | b ->fff_res | 1381 | | b ->fff_res |
1194 | |. sdc1 f2, 8(RA) | 1382 | |. li RD, (2+1)*8 |
1195 | | | 1383 | | |
1196 | |.ffunc_1 pairs | 1384 | |.ffunc_1 pairs |
1197 | | li AT, LJ_TTAB | 1385 | | li AT, LJ_TTAB |
@@ -1199,16 +1387,32 @@ static void build_subroutines(BuildCtx *ctx) | |||
1199 | |. lw PC, FRAME_PC(BASE) | 1387 | |. lw PC, FRAME_PC(BASE) |
1200 | #if LJ_52 | 1388 | #if LJ_52 |
1201 | | lw TAB:TMP2, TAB:CARG1->metatable | 1389 | | lw TAB:TMP2, TAB:CARG1->metatable |
1390 | |.if FPU | ||
1202 | | ldc1 f0, CFUNC:RB->upvalue[0] | 1391 | | ldc1 f0, CFUNC:RB->upvalue[0] |
1392 | |.else | ||
1393 | | lw SFT1, CFUNC:RB->upvalue[0].u32.hi | ||
1394 | | lw SFT2, CFUNC:RB->upvalue[0].u32.lo | ||
1395 | |.endif | ||
1203 | | bnez TAB:TMP2, ->fff_fallback | 1396 | | bnez TAB:TMP2, ->fff_fallback |
1204 | #else | 1397 | #else |
1398 | |.if FPU | ||
1205 | | ldc1 f0, CFUNC:RB->upvalue[0] | 1399 | | ldc1 f0, CFUNC:RB->upvalue[0] |
1400 | |.else | ||
1401 | | lw SFT1, CFUNC:RB->upvalue[0].u32.hi | ||
1402 | | lw SFT2, CFUNC:RB->upvalue[0].u32.lo | ||
1403 | |.endif | ||
1206 | #endif | 1404 | #endif |
1207 | |. addiu RA, BASE, -8 | 1405 | |. addiu RA, BASE, -8 |
1208 | | sw TISNIL, 8+HI(BASE) | 1406 | | sw TISNIL, 8+HI(BASE) |
1209 | | li RD, (3+1)*8 | 1407 | | li RD, (3+1)*8 |
1408 | |.if FPU | ||
1210 | | b ->fff_res | 1409 | | b ->fff_res |
1211 | |. sdc1 f0, 0(RA) | 1410 | |. sdc1 f0, 0(RA) |
1411 | |.else | ||
1412 | | sw SFT1, 0(RA) | ||
1413 | | b ->fff_res | ||
1414 | |. sw SFT2, 4(RA) | ||
1415 | |.endif | ||
1212 | | | 1416 | | |
1213 | |.ffunc ipairs_aux | 1417 | |.ffunc ipairs_aux |
1214 | | sltiu AT, NARGS8:RC, 16 | 1418 | | sltiu AT, NARGS8:RC, 16 |
@@ -1216,35 +1420,55 @@ static void build_subroutines(BuildCtx *ctx) | |||
1216 | | lw TAB:CARG1, LO(BASE) | 1420 | | lw TAB:CARG1, LO(BASE) |
1217 | | lw CARG4, 8+HI(BASE) | 1421 | | lw CARG4, 8+HI(BASE) |
1218 | | bnez AT, ->fff_fallback | 1422 | | bnez AT, ->fff_fallback |
1219 | |. ldc1 FARG2, 8(BASE) | 1423 | |. addiu CARG3, CARG3, -LJ_TTAB |
1220 | | addiu CARG3, CARG3, -LJ_TTAB | ||
1221 | | sltiu AT, CARG4, LJ_TISNUM | 1424 | | sltiu AT, CARG4, LJ_TISNUM |
1222 | | li TMP0, 1 | 1425 | | li TMP0, 1 |
1223 | | movn AT, r0, CARG3 | 1426 | | movn AT, r0, CARG3 |
1224 | | mtc1 TMP0, FARG1 | ||
1225 | | beqz AT, ->fff_fallback | 1427 | | beqz AT, ->fff_fallback |
1226 | |. lw PC, FRAME_PC(BASE) | 1428 | |. lw PC, FRAME_PC(BASE) |
1429 | |.if FPU | ||
1430 | | ldc1 FARG2, 8(BASE) | ||
1431 | | mtc1 TMP0, FARG1 | ||
1227 | | trunc.w.d FRET1, FARG2 | 1432 | | trunc.w.d FRET1, FARG2 |
1228 | | cvt.d.w FARG1, FARG1 | 1433 | | cvt.d.w FARG1, FARG1 |
1229 | | lw TMP0, TAB:CARG1->asize | ||
1230 | | lw TMP1, TAB:CARG1->array | ||
1231 | | mfc1 TMP2, FRET1 | 1434 | | mfc1 TMP2, FRET1 |
1232 | | addiu RA, BASE, -8 | ||
1233 | | add.d FARG2, FARG2, FARG1 | 1435 | | add.d FARG2, FARG2, FARG1 |
1436 | |.else | ||
1437 | | sw CARG1, TEMP_SAVE_1 | ||
1438 | | cvti2d TMP0 | ||
1439 | | sw CRET1, TEMP_SAVE_2 // Store result CRET1/CRET2=1 (double). | ||
1440 | | sw CRET2, TEMP_SAVE_3 | ||
1441 | | lw CARG2, 8+4(BASE) | ||
1442 | | load_got __fixdfsi | ||
1443 | | call_extern | ||
1444 | |. lw CARG1, 8(BASE) | ||
1445 | | sw CRET1, TEMP_SAVE_4 | ||
1446 | | load_got __adddf3 | ||
1447 | | lw CARG2, TEMP_SAVE_3 | ||
1448 | | lw CARG3, 8(BASE) | ||
1449 | | lw CARG4, 8+4(BASE) | ||
1450 | | call_extern | ||
1451 | |. lw CARG1, TEMP_SAVE_2 | ||
1452 | | lw TMP2, TEMP_SAVE_4 | ||
1453 | | lw CARG1, TEMP_SAVE_1 | ||
1454 | |.endif | ||
1455 | | lw TMP0, TAB:CARG1->asize | ||
1456 | | lw TMP1, TAB:CARG1->array | ||
1234 | | addiu TMP2, TMP2, 1 | 1457 | | addiu TMP2, TMP2, 1 |
1235 | | sltu AT, TMP2, TMP0 | 1458 | | sltu AT, TMP2, TMP0 |
1459 | | beqz AT, >2 // Not in array part? | ||
1460 | |. addiu RA, BASE, -8 | ||
1461 | | store_double FARG2, CRET1, CRET2, 0(RA) | ||
1236 | | sll TMP3, TMP2, 3 | 1462 | | sll TMP3, TMP2, 3 |
1237 | | addu TMP3, TMP1, TMP3 | 1463 | | addu TMP3, TMP1, TMP3 |
1238 | | beqz AT, >2 // Not in array part? | ||
1239 | |. sdc1 FARG2, 0(RA) | ||
1240 | | lw TMP2, HI(TMP3) | 1464 | | lw TMP2, HI(TMP3) |
1241 | | ldc1 f0, 0(TMP3) | 1465 | | load_double1 0(TMP3) |
1242 | |1: | 1466 | |1: |
1243 | | beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results. | 1467 | | beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results. |
1244 | |. li RD, (0+1)*8 | 1468 | |. li RD, (0+1)*8 |
1245 | | li RD, (2+1)*8 | 1469 | | store_double1 8(RA) |
1246 | | b ->fff_res | 1470 | | b ->fff_res |
1247 | |. sdc1 f0, 8(RA) | 1471 | |. li RD, (2+1)*8 |
1248 | |2: // Check for empty hash part first. Otherwise call C function. | 1472 | |2: // Check for empty hash part first. Otherwise call C function. |
1249 | | lw TMP0, TAB:CARG1->hmask | 1473 | | lw TMP0, TAB:CARG1->hmask |
1250 | | load_got lj_tab_getinth | 1474 | | load_got lj_tab_getinth |
@@ -1256,8 +1480,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
1256 | | beqz CRET1, ->fff_res | 1480 | | beqz CRET1, ->fff_res |
1257 | |. li RD, (0+1)*8 | 1481 | |. li RD, (0+1)*8 |
1258 | | lw TMP2, HI(CRET1) | 1482 | | lw TMP2, HI(CRET1) |
1483 | |.if FPU | ||
1259 | | b <1 | 1484 | | b <1 |
1260 | |. ldc1 f0, 0(CRET1) | 1485 | |. ldc1 f0, 0(CRET1) |
1486 | |.else | ||
1487 | | lw SFT2, 4(CRET1) | ||
1488 | | b <1 | ||
1489 | |. lw SFT1, 0(CRET1) | ||
1490 | |.endif | ||
1261 | | | 1491 | | |
1262 | |.ffunc_1 ipairs | 1492 | |.ffunc_1 ipairs |
1263 | | li AT, LJ_TTAB | 1493 | | li AT, LJ_TTAB |
@@ -1265,17 +1495,33 @@ static void build_subroutines(BuildCtx *ctx) | |||
1265 | |. lw PC, FRAME_PC(BASE) | 1495 | |. lw PC, FRAME_PC(BASE) |
1266 | #if LJ_52 | 1496 | #if LJ_52 |
1267 | | lw TAB:TMP2, TAB:CARG1->metatable | 1497 | | lw TAB:TMP2, TAB:CARG1->metatable |
1498 | |.if FPU | ||
1268 | | ldc1 f0, CFUNC:RB->upvalue[0] | 1499 | | ldc1 f0, CFUNC:RB->upvalue[0] |
1500 | |.else | ||
1501 | | lw SFT1, CFUNC:RB->upvalue[0].u32.hi | ||
1502 | | lw SFT2, CFUNC:RB->upvalue[0].u32.lo | ||
1503 | |.endif | ||
1269 | | bnez TAB:TMP2, ->fff_fallback | 1504 | | bnez TAB:TMP2, ->fff_fallback |
1270 | #else | 1505 | #else |
1506 | |.if FPU | ||
1271 | | ldc1 f0, CFUNC:RB->upvalue[0] | 1507 | | ldc1 f0, CFUNC:RB->upvalue[0] |
1508 | |.else | ||
1509 | | lw SFT1, CFUNC:RB->upvalue[0].u32.hi | ||
1510 | | lw SFT2, CFUNC:RB->upvalue[0].u32.lo | ||
1511 | |.endif | ||
1272 | #endif | 1512 | #endif |
1273 | |. addiu RA, BASE, -8 | 1513 | |. addiu RA, BASE, -8 |
1274 | | sw r0, 8+HI(BASE) | 1514 | | sw r0, 8+HI(BASE) |
1275 | | sw r0, 8+LO(BASE) | 1515 | | sw r0, 8+LO(BASE) |
1276 | | li RD, (3+1)*8 | 1516 | | li RD, (3+1)*8 |
1517 | |.if FPU | ||
1277 | | b ->fff_res | 1518 | | b ->fff_res |
1278 | |. sdc1 f0, 0(RA) | 1519 | |. sdc1 f0, 0(RA) |
1520 | |.else | ||
1521 | | sw SFT1, 0(RA) | ||
1522 | | b ->fff_res | ||
1523 | |. sw SFT2, 4(RA) | ||
1524 | |.endif | ||
1279 | | | 1525 | | |
1280 | |//-- Base library: catch errors ---------------------------------------- | 1526 | |//-- Base library: catch errors ---------------------------------------- |
1281 | | | 1527 | | |
@@ -1295,8 +1541,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1295 | | sltiu AT, NARGS8:RC, 16 | 1541 | | sltiu AT, NARGS8:RC, 16 |
1296 | | lw CARG4, 8+HI(BASE) | 1542 | | lw CARG4, 8+HI(BASE) |
1297 | | bnez AT, ->fff_fallback | 1543 | | bnez AT, ->fff_fallback |
1544 | |.if FPU | ||
1298 | |. ldc1 FARG2, 8(BASE) | 1545 | |. ldc1 FARG2, 8(BASE) |
1299 | | ldc1 FARG1, 0(BASE) | 1546 | |.else |
1547 | |. lw CARG3, 8+LO(BASE) | ||
1548 | |.endif | ||
1549 | | load_double FARG1, CARG1, CARG2, 0(BASE) | ||
1300 | | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) | 1550 | | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) |
1301 | | li AT, LJ_TFUNC | 1551 | | li AT, LJ_TFUNC |
1302 | | move TMP2, BASE | 1552 | | move TMP2, BASE |
@@ -1304,9 +1554,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
1304 | | addiu BASE, BASE, 16 | 1554 | | addiu BASE, BASE, 16 |
1305 | | // Remember active hook before pcall. | 1555 | | // Remember active hook before pcall. |
1306 | | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT | 1556 | | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT |
1557 | |.if FPU | ||
1307 | | sdc1 FARG2, 0(TMP2) // Swap function and traceback. | 1558 | | sdc1 FARG2, 0(TMP2) // Swap function and traceback. |
1559 | |.else | ||
1560 | | sw CARG3, LO(TMP2) | ||
1561 | | sw CARG4, HI(TMP2) | ||
1562 | |.endif | ||
1308 | | andi TMP3, TMP3, 1 | 1563 | | andi TMP3, TMP3, 1 |
1309 | | sdc1 FARG1, 8(TMP2) | 1564 | | store_double FARG1, CARG1, CARG2, 8(TMP2) |
1310 | | addiu PC, TMP3, 16+FRAME_PCALL | 1565 | | addiu PC, TMP3, 16+FRAME_PCALL |
1311 | | b ->vm_call_dispatch | 1566 | | b ->vm_call_dispatch |
1312 | |. addiu NARGS8:RC, NARGS8:RC, -16 | 1567 | |. addiu NARGS8:RC, NARGS8:RC, -16 |
@@ -1350,11 +1605,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
1350 | | move CARG3, CARG2 | 1605 | | move CARG3, CARG2 |
1351 | | sw BASE, L->top | 1606 | | sw BASE, L->top |
1352 | |2: // Move args to coroutine. | 1607 | |2: // Move args to coroutine. |
1353 | | ldc1 f0, 0(BASE) | 1608 | | load_double1 0(BASE) |
1354 | | sltu AT, BASE, TMP1 | 1609 | | sltu AT, BASE, TMP1 |
1355 | | beqz AT, >3 | 1610 | | beqz AT, >3 |
1356 | |. addiu BASE, BASE, 8 | 1611 | |. addiu BASE, BASE, 8 |
1357 | | sdc1 f0, 0(CARG3) | 1612 | | store_double1 0(CARG3) |
1358 | | b <2 | 1613 | | b <2 |
1359 | |. addiu CARG3, CARG3, 8 | 1614 | |. addiu CARG3, CARG3, 8 |
1360 | |3: | 1615 | |3: |
@@ -1380,10 +1635,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
1380 | | sw TMP2, L:RA->top // Clear coroutine stack. | 1635 | | sw TMP2, L:RA->top // Clear coroutine stack. |
1381 | | move TMP1, BASE | 1636 | | move TMP1, BASE |
1382 | |5: // Move results from coroutine. | 1637 | |5: // Move results from coroutine. |
1383 | | ldc1 f0, 0(TMP2) | 1638 | | load_double1 0(TMP2) |
1384 | | addiu TMP2, TMP2, 8 | 1639 | | addiu TMP2, TMP2, 8 |
1385 | | sltu AT, TMP2, TMP3 | 1640 | | sltu AT, TMP2, TMP3 |
1386 | | sdc1 f0, 0(TMP1) | 1641 | | store_double1 0(TMP1) |
1387 | | bnez AT, <5 | 1642 | | bnez AT, <5 |
1388 | |. addiu TMP1, TMP1, 8 | 1643 | |. addiu TMP1, TMP1, 8 |
1389 | |6: | 1644 | |6: |
@@ -1408,12 +1663,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1408 | |.if resume | 1663 | |.if resume |
1409 | | addiu TMP3, TMP3, -8 | 1664 | | addiu TMP3, TMP3, -8 |
1410 | | li TMP1, LJ_TFALSE | 1665 | | li TMP1, LJ_TFALSE |
1411 | | ldc1 f0, 0(TMP3) | 1666 | | load_double1 0(TMP3) |
1412 | | sw TMP3, L:RA->top // Remove error from coroutine stack. | 1667 | | sw TMP3, L:RA->top // Remove error from coroutine stack. |
1413 | | li RD, (2+1)*8 | 1668 | | li RD, (2+1)*8 |
1414 | | sw TMP1, -8+HI(BASE) // Prepend false to results. | 1669 | | sw TMP1, -8+HI(BASE) // Prepend false to results. |
1415 | | addiu RA, BASE, -8 | 1670 | | addiu RA, BASE, -8 |
1416 | | sdc1 f0, 0(BASE) // Copy error message. | 1671 | | store_double1 0(BASE) // Copy error message. |
1417 | | b <7 | 1672 | | b <7 |
1418 | |. andi TMP0, PC, FRAME_TYPE | 1673 | |. andi TMP0, PC, FRAME_TYPE |
1419 | |.else | 1674 | |.else |
@@ -1449,13 +1704,33 @@ static void build_subroutines(BuildCtx *ctx) | |||
1449 | | | 1704 | | |
1450 | |//-- Math library ------------------------------------------------------- | 1705 | |//-- Math library ------------------------------------------------------- |
1451 | | | 1706 | | |
1452 | |.ffunc_n math_abs | 1707 | |.ffunc_1 math_abs |
1708 | | load_farg1 0(BASE) | ||
1709 | | sltiu AT, CARG3, LJ_TISNUM | ||
1710 | | beqz AT, ->fff_fallback | ||
1711 | |. nop | ||
1712 | |.if FPU | ||
1453 | |. abs.d FRET1, FARG1 | 1713 | |. abs.d FRET1, FARG1 |
1714 | |.else | ||
1715 | |. lui TMP1, 0x8000 | ||
1716 | | and AT, CARG1, TMP1 | ||
1717 | | move CRET2, CARG2 | ||
1718 | | beqz AT, ->fff_resn | ||
1719 | |. move CRET1, CARG1 | ||
1720 | | xor CRET1, CARG1, TMP1 | ||
1721 | |.endif | ||
1722 | | | ||
1454 | |->fff_resn: | 1723 | |->fff_resn: |
1455 | | lw PC, FRAME_PC(BASE) | 1724 | | lw PC, FRAME_PC(BASE) |
1456 | | addiu RA, BASE, -8 | 1725 | | addiu RA, BASE, -8 |
1726 | |.if HFABI | ||
1457 | | b ->fff_res1 | 1727 | | b ->fff_res1 |
1458 | |. sdc1 FRET1, -8(BASE) | 1728 | |. sdc1 FRET1, -8(BASE) |
1729 | |.else | ||
1730 | | sw CRET1, -8(BASE) | ||
1731 | | b ->fff_res1 | ||
1732 | |. sw CRET2, -8+4(BASE) | ||
1733 | |.endif | ||
1459 | | | 1734 | | |
1460 | |->fff_restv: | 1735 | |->fff_restv: |
1461 | | // CARG3/CARG1 = TValue result. | 1736 | | // CARG3/CARG1 = TValue result. |
@@ -1498,8 +1773,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
1498 | | sltiu AT, CARG3, LJ_TISNUM | 1773 | | sltiu AT, CARG3, LJ_TISNUM |
1499 | | beqz AT, ->fff_fallback | 1774 | | beqz AT, ->fff_fallback |
1500 | |. nop | 1775 | |. nop |
1776 | |.if HFABI | ||
1501 | | call_extern | 1777 | | call_extern |
1502 | |. ldc1 FARG1, 0(BASE) | 1778 | |. ldc1 FARG1, 0(BASE) |
1779 | |.else | ||
1780 | | lw CARG1, 0(BASE) | ||
1781 | | call_extern | ||
1782 | |. lw CARG2, 4(BASE) | ||
1783 | |.endif | ||
1503 | | b ->fff_resn | 1784 | | b ->fff_resn |
1504 | |. nop | 1785 | |. nop |
1505 | |.endmacro | 1786 | |.endmacro |
@@ -1526,15 +1807,20 @@ static void build_subroutines(BuildCtx *ctx) | |||
1526 | | math_round ceil | 1807 | | math_round ceil |
1527 | | | 1808 | | |
1528 | |.ffunc math_log | 1809 | |.ffunc math_log |
1529 | | lw CARG3, HI(BASE) | ||
1530 | | li AT, 8 | 1810 | | li AT, 8 |
1531 | | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. | 1811 | | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. |
1532 | |. load_got log | 1812 | |. lw CARG3, HI(BASE) |
1533 | | sltiu AT, CARG3, LJ_TISNUM | 1813 | | sltiu AT, CARG3, LJ_TISNUM |
1534 | | beqz AT, ->fff_fallback | 1814 | | beqz AT, ->fff_fallback |
1535 | |. nop | 1815 | |. load_got log |
1816 | |.if HFABI | ||
1536 | | call_extern | 1817 | | call_extern |
1537 | |. ldc1 FARG1, 0(BASE) | 1818 | |. ldc1 FARG1, 0(BASE) |
1819 | |.else | ||
1820 | | lw CARG1, 0(BASE) | ||
1821 | | call_extern | ||
1822 | |. lw CARG2, 4(BASE) | ||
1823 | |.endif | ||
1538 | | b ->fff_resn | 1824 | | b ->fff_resn |
1539 | |. nop | 1825 | |. nop |
1540 | | | 1826 | | |
@@ -1553,17 +1839,40 @@ static void build_subroutines(BuildCtx *ctx) | |||
1553 | | math_extern2 atan2 | 1839 | | math_extern2 atan2 |
1554 | | math_extern2 fmod | 1840 | | math_extern2 fmod |
1555 | | | 1841 | | |
1842 | |.if FPU | ||
1556 | |.ffunc_n math_sqrt | 1843 | |.ffunc_n math_sqrt |
1557 | |. sqrt.d FRET1, FARG1 | 1844 | |. sqrt.d FRET1, FARG1 |
1558 | | b ->fff_resn | 1845 | | b ->fff_resn |
1559 | |. nop | 1846 | |. nop |
1847 | |.else | ||
1848 | | math_extern sqrt | ||
1849 | |.endif | ||
1560 | | | 1850 | | |
1561 | |.ffunc_nn math_ldexp | 1851 | |.ffunc_2 math_ldexp |
1852 | | sltiu TMP0, CARG3, LJ_TISNUM | ||
1853 | | sltiu TMP1, CARG4, LJ_TISNUM | ||
1854 | | load_farg1 0(BASE) | ||
1855 | | load_farg2 8(BASE) | ||
1856 | | and TMP0, TMP0, TMP1 | ||
1857 | | beqz TMP0, ->fff_fallback | ||
1858 | |.if FPU | ||
1859 | | load_got ldexp | ||
1562 | | trunc.w.d FARG2, FARG2 | 1860 | | trunc.w.d FARG2, FARG2 |
1861 | | call_extern | ||
1862 | |. mfc1 CARG3, FARG2 | ||
1863 | |.else | ||
1864 | | sw CARG1, TEMP_SAVE_1 | ||
1865 | | sw CARG2, TEMP_SAVE_2 | ||
1866 | | load_got __fixdfsi | ||
1867 | | move CARG1, CARG3 | ||
1868 | | call_extern | ||
1869 | |. move CARG2, CARG4 | ||
1870 | | lw CARG1, TEMP_SAVE_1 | ||
1563 | | load_got ldexp | 1871 | | load_got ldexp |
1564 | | mfc1 CARG3, FARG2 | 1872 | | lw CARG2, TEMP_SAVE_2 |
1565 | | call_extern | 1873 | | call_extern |
1566 | |. nop | 1874 | |. move CARG3, CRET1 |
1875 | |.endif | ||
1567 | | b ->fff_resn | 1876 | | b ->fff_resn |
1568 | |. nop | 1877 | |. nop |
1569 | | | 1878 | | |
@@ -1574,10 +1883,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
1574 | |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | 1883 | |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) |
1575 | | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) | 1884 | | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) |
1576 | | addiu RA, BASE, -8 | 1885 | | addiu RA, BASE, -8 |
1886 | | store_double FRET1, CRET1, CRET2, 0(RA) | ||
1887 | |.if FPU | ||
1577 | | mtc1 TMP1, FARG2 | 1888 | | mtc1 TMP1, FARG2 |
1578 | | sdc1 FRET1, 0(RA) | ||
1579 | | cvt.d.w FARG2, FARG2 | 1889 | | cvt.d.w FARG2, FARG2 |
1580 | | sdc1 FARG2, 8(RA) | 1890 | |.else |
1891 | | cvti2d TMP1 | ||
1892 | |.endif | ||
1893 | | store_double FARG2, CRET1, CRET2, 8(RA) | ||
1581 | | b ->fff_res | 1894 | | b ->fff_res |
1582 | |. li RD, (2+1)*8 | 1895 | |. li RD, (2+1)*8 |
1583 | | | 1896 | | |
@@ -1587,7 +1900,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1587 | | call_extern | 1900 | | call_extern |
1588 | |. addiu CARG3, BASE, -8 | 1901 | |. addiu CARG3, BASE, -8 |
1589 | | addiu RA, BASE, -8 | 1902 | | addiu RA, BASE, -8 |
1903 | |.if HFABI | ||
1590 | | sdc1 FRET1, 0(BASE) | 1904 | | sdc1 FRET1, 0(BASE) |
1905 | |.else | ||
1906 | | sw CRET1, 0(BASE) | ||
1907 | | sw CRET2, 4(BASE) | ||
1908 | |.endif | ||
1591 | | b ->fff_res | 1909 | | b ->fff_res |
1592 | |. li RD, (2+1)*8 | 1910 | |. li RD, (2+1)*8 |
1593 | | | 1911 | | |
@@ -1595,25 +1913,73 @@ static void build_subroutines(BuildCtx *ctx) | |||
1595 | |->ff_ .. name: | 1913 | |->ff_ .. name: |
1596 | | lw CARG3, HI(BASE) | 1914 | | lw CARG3, HI(BASE) |
1597 | | beqz NARGS8:RC, ->fff_fallback | 1915 | | beqz NARGS8:RC, ->fff_fallback |
1598 | |. ldc1 FRET1, 0(BASE) | 1916 | |. sltiu AT, CARG3, LJ_TISNUM |
1599 | | sltiu AT, CARG3, LJ_TISNUM | ||
1600 | | beqz AT, ->fff_fallback | 1917 | | beqz AT, ->fff_fallback |
1601 | |. addu TMP2, BASE, NARGS8:RC | 1918 | |. addu TMP2, BASE, NARGS8:RC |
1602 | | addiu TMP1, BASE, 8 | 1919 | | addiu TMP1, BASE, 8 |
1920 | |.if HFABI | ||
1921 | | ldc1 FRET1, 0(BASE) | ||
1603 | | beq TMP1, TMP2, ->fff_resn | 1922 | | beq TMP1, TMP2, ->fff_resn |
1923 | |.else | ||
1924 | | lw CRET1, 0(BASE) | ||
1925 | | lw CRET2, 4(BASE) | ||
1926 | | beq TMP1, TMP2, ->fff_resn | ||
1927 | |.endif | ||
1604 | |1: | 1928 | |1: |
1605 | |. lw CARG3, HI(TMP1) | 1929 | |. lw CARG3, HI(TMP1) |
1930 | |.if HFABI | ||
1606 | | ldc1 FARG1, 0(TMP1) | 1931 | | ldc1 FARG1, 0(TMP1) |
1607 | | addiu TMP1, TMP1, 8 | 1932 | |.else |
1933 | | lw CARG1, 0(TMP1) | ||
1934 | | lw CARG2, 4(TMP1) | ||
1935 | |.endif | ||
1608 | | sltiu AT, CARG3, LJ_TISNUM | 1936 | | sltiu AT, CARG3, LJ_TISNUM |
1609 | | beqz AT, ->fff_fallback | 1937 | | beqz AT, ->fff_fallback |
1938 | |. addiu TMP1, TMP1, 8 | ||
1939 | |.if FPU | ||
1610 | |.if ismax | 1940 | |.if ismax |
1611 | |. c.olt.d FARG1, FRET1 | 1941 | | c.olt.d FARG1, FRET1 |
1612 | |.else | 1942 | |.else |
1613 | |. c.olt.d FRET1, FARG1 | 1943 | | c.olt.d FRET1, FARG1 |
1614 | |.endif | 1944 | |.endif |
1615 | | bne TMP1, TMP2, <1 | 1945 | | bne TMP1, TMP2, <1 |
1616 | |. movf.d FRET1, FARG1 | 1946 | |. movf.d FRET1, FARG1 |
1947 | |.else | ||
1948 | | load_got __ledf2 | ||
1949 | | sw TMP1, TEMP_SAVE_1 | ||
1950 | | sw TMP2, TEMP_SAVE_2 | ||
1951 | | sw CARG1, TEMP_SAVE_3 | ||
1952 | | sw CARG2, TEMP_SAVE_4 | ||
1953 | | sw CRET1, TEMP_SAVE_5 | ||
1954 | | sw CRET2, TEMP_SAVE_6 | ||
1955 | | move CARG3, CRET1 | ||
1956 | | call_extern | ||
1957 | |. move CARG4, CRET2 | ||
1958 | | lw CARG4, TEMP_SAVE_6 | ||
1959 | | lw CARG3, TEMP_SAVE_5 | ||
1960 | | lw CARG2, TEMP_SAVE_4 | ||
1961 | | lw CARG1, TEMP_SAVE_3 | ||
1962 | | lw TMP2, TEMP_SAVE_2 | ||
1963 | | lw TMP1, TEMP_SAVE_1 | ||
1964 | |.if ismax | ||
1965 | | beqz CRET1, >2 // farg1==fret1 | ||
1966 | |. li TMP3, 1 | ||
1967 | | beq CRET1, TMP3, >2 // farg1>fret1 | ||
1968 | |. nop | ||
1969 | |.else | ||
1970 | | blez CRET1, >2 | ||
1971 | |. nop | ||
1972 | |.endif | ||
1973 | | move CRET1, CARG3 // Keep the value. | ||
1974 | | b >3 | ||
1975 | |. move CRET2, CARG4 | ||
1976 | |2: | ||
1977 | | move CRET1, CARG1 // Set new value. | ||
1978 | | move CRET2, CARG2 | ||
1979 | |3: | ||
1980 | | bne TMP1, TMP2, <1 | ||
1981 | |. nop | ||
1982 | |.endif | ||
1617 | | b ->fff_resn | 1983 | | b ->fff_resn |
1618 | |. nop | 1984 | |. nop |
1619 | |.endmacro | 1985 | |.endmacro |
@@ -1632,32 +1998,52 @@ static void build_subroutines(BuildCtx *ctx) | |||
1632 | | bnez AT, ->fff_fallback // Need exactly 1 string argument. | 1998 | | bnez AT, ->fff_fallback // Need exactly 1 string argument. |
1633 | |. nop | 1999 | |. nop |
1634 | | lw TMP0, STR:CARG1->len | 2000 | | lw TMP0, STR:CARG1->len |
1635 | | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). | ||
1636 | | addiu RA, BASE, -8 | 2001 | | addiu RA, BASE, -8 |
1637 | | sltu RD, r0, TMP0 | 2002 | | sltu RD, r0, TMP0 |
1638 | | mtc1 TMP1, f0 | 2003 | | lw PC, FRAME_PC(BASE) |
1639 | | addiu RD, RD, 1 | 2004 | | addiu RD, RD, 1 |
2005 | | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). | ||
2006 | |.if FPU | ||
2007 | | mtc1 TMP1, f0 | ||
1640 | | cvt.d.w f0, f0 | 2008 | | cvt.d.w f0, f0 |
1641 | | lw PC, FRAME_PC(BASE) | 2009 | | sdc1 f0, 0(RA) |
1642 | | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 | 2010 | |.else |
2011 | | sw RD, TEMP_SAVE_1 | ||
2012 | | cvti2d TMP1 | ||
2013 | | sw CRET1, 0(RA) | ||
2014 | | sw CRET2, 4(RA) | ||
2015 | | lw RD, TEMP_SAVE_1 | ||
2016 | |.endif | ||
1643 | | b ->fff_res | 2017 | | b ->fff_res |
1644 | |. sdc1 f0, 0(RA) | 2018 | |. sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 |
1645 | | | 2019 | | |
1646 | |.ffunc string_char // Only handle the 1-arg case here. | 2020 | |.ffunc string_char // Only handle the 1-arg case here. |
1647 | | ffgccheck | 2021 | | ffgccheck |
1648 | | lw CARG3, HI(BASE) | 2022 | | lw CARG3, HI(BASE) |
1649 | | ldc1 FARG1, 0(BASE) | ||
1650 | | li AT, 8 | 2023 | | li AT, 8 |
1651 | | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. | 2024 | | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. |
1652 | |. sltiu AT, CARG3, LJ_TISNUM | 2025 | |. sltiu AT, CARG3, LJ_TISNUM |
1653 | | beqz AT, ->fff_fallback | 2026 | | beqz AT, ->fff_fallback |
1654 | |. li CARG3, 1 | 2027 | |. li CARG3, 1 |
1655 | | trunc.w.d FARG1, FARG1 | ||
1656 | | addiu CARG2, sp, ARG5_OFS | ||
1657 | | sltiu AT, TMP0, 256 | 2028 | | sltiu AT, TMP0, 256 |
1658 | | mfc1 TMP0, FARG1 | ||
1659 | | beqz AT, ->fff_fallback | 2029 | | beqz AT, ->fff_fallback |
1660 | |. sw TMP0, ARG5 | 2030 | | load_farg1 0(BASE) |
2031 | |.if FPU | ||
2032 | | trunc.w.d FARG1, FARG1 | ||
2033 | | mfc1 TMP0, FARG1 | ||
2034 | |.else | ||
2035 | | load_got __fixdfsi | ||
2036 | | sw RB, TEMP_SAVE_1 | ||
2037 | | sw RC, TEMP_SAVE_2 | ||
2038 | | call_extern | ||
2039 | |. sw CARG3, TEMP_SAVE_3 | ||
2040 | | lw CARG3, TEMP_SAVE_3 | ||
2041 | | lw RC, TEMP_SAVE_2 | ||
2042 | | lw RB, TEMP_SAVE_1 | ||
2043 | | move TMP0, CRET1 | ||
2044 | |.endif | ||
2045 | | addiu CARG2, sp, ARG5_OFS | ||
2046 | | sw TMP0, ARG5 | ||
1661 | |->fff_newstr: | 2047 | |->fff_newstr: |
1662 | | load_got lj_str_new | 2048 | | load_got lj_str_new |
1663 | | sw BASE, L->base | 2049 | | sw BASE, L->base |
@@ -1674,27 +2060,52 @@ static void build_subroutines(BuildCtx *ctx) | |||
1674 | |.ffunc string_sub | 2060 | |.ffunc string_sub |
1675 | | ffgccheck | 2061 | | ffgccheck |
1676 | | addiu AT, NARGS8:RC, -16 | 2062 | | addiu AT, NARGS8:RC, -16 |
2063 | |.if FPU | ||
2064 | | ldc1 f0, 16(BASE) | ||
2065 | | trunc.w.d f0, f0 | ||
2066 | |.else | ||
2067 | | lw CARG1, 16(BASE) | ||
2068 | | load_got __fixdfsi | ||
2069 | | sw AT, TEMP_SAVE_1 | ||
2070 | | call_extern | ||
2071 | |. lw CARG2, 16+4(BASE) | ||
2072 | | lw AT, TEMP_SAVE_1 | ||
2073 | |.endif | ||
1677 | | lw CARG3, 16+HI(BASE) | 2074 | | lw CARG3, 16+HI(BASE) |
1678 | | ldc1 f0, 16(BASE) | ||
1679 | | lw TMP0, HI(BASE) | 2075 | | lw TMP0, HI(BASE) |
1680 | | lw STR:CARG1, LO(BASE) | 2076 | | lw STR:CARG1, LO(BASE) |
1681 | | bltz AT, ->fff_fallback | 2077 | | bltz AT, ->fff_fallback |
1682 | | lw CARG2, 8+HI(BASE) | 2078 | |. lw CARG2, 8+HI(BASE) |
1683 | | ldc1 f2, 8(BASE) | ||
1684 | | beqz AT, >1 | 2079 | | beqz AT, >1 |
1685 | |. li CARG4, -1 | 2080 | |. li CARG4, -1 |
1686 | | trunc.w.d f0, f0 | ||
1687 | | sltiu AT, CARG3, LJ_TISNUM | 2081 | | sltiu AT, CARG3, LJ_TISNUM |
1688 | | beqz AT, ->fff_fallback | 2082 | | beqz AT, ->fff_fallback |
2083 | |.if FPU | ||
1689 | |. mfc1 CARG4, f0 | 2084 | |. mfc1 CARG4, f0 |
2085 | |.else | ||
2086 | |. move CARG4, CRET1 | ||
2087 | |.endif | ||
1690 | |1: | 2088 | |1: |
1691 | | sltiu AT, CARG2, LJ_TISNUM | 2089 | | sltiu AT, CARG2, LJ_TISNUM |
1692 | | beqz AT, ->fff_fallback | 2090 | | beqz AT, ->fff_fallback |
1693 | |. li AT, LJ_TSTR | 2091 | |. li AT, LJ_TSTR |
1694 | | trunc.w.d f2, f2 | ||
1695 | | bne TMP0, AT, ->fff_fallback | 2092 | | bne TMP0, AT, ->fff_fallback |
1696 | |. lw CARG2, STR:CARG1->len | 2093 | |.if FPU |
2094 | |. ldc1 f2, 8(BASE) | ||
2095 | | trunc.w.d f2, f2 | ||
1697 | | mfc1 CARG3, f2 | 2096 | | mfc1 CARG3, f2 |
2097 | |.else | ||
2098 | |. sw CARG1, TEMP_SAVE_1 | ||
2099 | | sw CARG4, TEMP_SAVE_2 | ||
2100 | | lw CARG2, 8+4(BASE) | ||
2101 | | load_got __fixdfsi | ||
2102 | | call_extern | ||
2103 | |. lw CARG1, 8(BASE) | ||
2104 | | lw CARG1, TEMP_SAVE_1 | ||
2105 | | lw CARG4, TEMP_SAVE_2 | ||
2106 | | move CARG3, CRET1 | ||
2107 | |.endif | ||
2108 | | lw CARG2, STR:CARG1->len | ||
1698 | | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end | 2109 | | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end |
1699 | | slt AT, CARG4, r0 | 2110 | | slt AT, CARG4, r0 |
1700 | | addiu TMP0, CARG2, 1 | 2111 | | addiu TMP0, CARG2, 1 |
@@ -1749,10 +2160,58 @@ static void build_subroutines(BuildCtx *ctx) | |||
1749 | | | 2160 | | |
1750 | |//-- Bit library -------------------------------------------------------- | 2161 | |//-- Bit library -------------------------------------------------------- |
1751 | | | 2162 | | |
2163 | |.if not FPU | ||
2164 | |// FP number to bit conversion for soft-float. | ||
2165 | |->vm_tobit: | ||
2166 | | sll TMP0, CARG1, 1 | ||
2167 | | lui TMP3, 0x0020 | ||
2168 | | addu TMP0, TMP0, TMP3 | ||
2169 | | slt TMP3, TMP0, r0 | ||
2170 | | movz CARG2, r0, TMP3 | ||
2171 | | beqz TMP3, >2 | ||
2172 | |. li CARG4, 0x3e0 | ||
2173 | | not CARG4, CARG4 | ||
2174 | | sra TMP0, TMP0, 21 | ||
2175 | | subu TMP0, CARG4, TMP0 | ||
2176 | | slt TMP3, TMP0, r0 | ||
2177 | | bnez TMP3, >1 | ||
2178 | |. sll CARG4, CARG1, 11 | ||
2179 | | lui TMP3, 0x8000 | ||
2180 | | or CARG4, CARG4, TMP3 | ||
2181 | | srl TMP3, CARG2, 21 | ||
2182 | | or CARG4, CARG4, TMP3 | ||
2183 | | slt TMP3, CARG1, r0 | ||
2184 | | beqz TMP3, >2 | ||
2185 | |. srlv CARG2, CARG4, TMP0 | ||
2186 | | subu CARG2, r0, CARG2 | ||
2187 | |2: | ||
2188 | | jr ra | ||
2189 | |. move CRET1, CARG2 | ||
2190 | |1: | ||
2191 | | addiu TMP0, TMP0, 21 | ||
2192 | | srlv CARG4, CARG2, TMP0 | ||
2193 | | li TMP3, 20 | ||
2194 | | subu TMP0, TMP3, TMP0 | ||
2195 | | sll CARG2, CARG1, 12 | ||
2196 | | sllv TMP3, CARG2, TMP0 | ||
2197 | | or CARG2, CARG4, TMP3 | ||
2198 | | slt TMP3, CARG1, r0 | ||
2199 | | beqz TMP3, <2 | ||
2200 | |. nop | ||
2201 | | jr ra | ||
2202 | |. subu CRET1, r0, CARG2 | ||
2203 | |.endif | ||
2204 | | | ||
1752 | |.macro .ffunc_bit, name | 2205 | |.macro .ffunc_bit, name |
1753 | | .ffunc_n bit_..name | 2206 | | .ffunc_n bit_..name |
2207 | |.if FPU | ||
1754 | |. add.d FARG1, FARG1, TOBIT | 2208 | |. add.d FARG1, FARG1, TOBIT |
1755 | | mfc1 CRET1, FARG1 | 2209 | | mfc1 CRET1, FARG1 |
2210 | |.else | ||
2211 | |. nop | ||
2212 | | bal ->vm_tobit | ||
2213 | |. nop | ||
2214 | |.endif | ||
1756 | |.endmacro | 2215 | |.endmacro |
1757 | | | 2216 | | |
1758 | |.macro .ffunc_bit_op, name, ins | 2217 | |.macro .ffunc_bit_op, name, ins |
@@ -1760,14 +2219,27 @@ static void build_subroutines(BuildCtx *ctx) | |||
1760 | | addiu TMP1, BASE, 8 | 2219 | | addiu TMP1, BASE, 8 |
1761 | | addu TMP2, BASE, NARGS8:RC | 2220 | | addu TMP2, BASE, NARGS8:RC |
1762 | |1: | 2221 | |1: |
2222 | | move CRET2, CRET1 | ||
1763 | | lw CARG4, HI(TMP1) | 2223 | | lw CARG4, HI(TMP1) |
2224 | |.if FPU | ||
1764 | | beq TMP1, TMP2, ->fff_resi | 2225 | | beq TMP1, TMP2, ->fff_resi |
1765 | |. ldc1 FARG1, 0(TMP1) | 2226 | |. ldc1 FARG1, 0(TMP1) |
2227 | |.else | ||
2228 | | lw CARG1, 0(TMP1) | ||
2229 | | beq TMP1, TMP2, ->fff_resi | ||
2230 | |. lw CARG2, 4(TMP1) | ||
2231 | |.endif | ||
1766 | | sltiu AT, CARG4, LJ_TISNUM | 2232 | | sltiu AT, CARG4, LJ_TISNUM |
1767 | | beqz AT, ->fff_fallback | 2233 | | beqz AT, ->fff_fallback |
1768 | | add.d FARG1, FARG1, TOBIT | 2234 | |.if FPU |
1769 | | mfc1 CARG2, FARG1 | 2235 | |. add.d FARG1, FARG1, TOBIT |
1770 | | ins CRET1, CRET1, CARG2 | 2236 | | mfc1 CRET1, FARG1 |
2237 | |.else | ||
2238 | |. nop | ||
2239 | | bal ->vm_tobit | ||
2240 | |. nop | ||
2241 | |.endif | ||
2242 | | ins CRET1, CRET2, CRET1 | ||
1771 | | b <1 | 2243 | | b <1 |
1772 | |. addiu TMP1, TMP1, 8 | 2244 | |. addiu TMP1, TMP1, 8 |
1773 | |.endmacro | 2245 | |.endmacro |
@@ -1794,10 +2266,22 @@ static void build_subroutines(BuildCtx *ctx) | |||
1794 | | | 2266 | | |
1795 | |.macro .ffunc_bit_sh, name, ins, shmod | 2267 | |.macro .ffunc_bit_sh, name, ins, shmod |
1796 | | .ffunc_nn bit_..name | 2268 | | .ffunc_nn bit_..name |
2269 | |.if FPU | ||
1797 | |. add.d FARG1, FARG1, TOBIT | 2270 | |. add.d FARG1, FARG1, TOBIT |
1798 | | add.d FARG2, FARG2, TOBIT | 2271 | | add.d FARG2, FARG2, TOBIT |
1799 | | mfc1 CARG1, FARG1 | 2272 | | mfc1 CARG1, FARG1 |
1800 | | mfc1 CARG2, FARG2 | 2273 | | mfc1 CARG2, FARG2 |
2274 | |.else | ||
2275 | |. sw CARG4, TEMP_SAVE_1 | ||
2276 | | bal ->vm_tobit | ||
2277 | |. nop | ||
2278 | | move CRET2, CRET1 | ||
2279 | | lw CARG2, TEMP_SAVE_1 | ||
2280 | | bal ->vm_tobit | ||
2281 | |. move CARG1, CARG3 | ||
2282 | | move CARG2, CRET1 | ||
2283 | | move CARG1, CRET2 | ||
2284 | |.endif | ||
1801 | |.if shmod == 1 | 2285 | |.if shmod == 1 |
1802 | | li AT, 32 | 2286 | | li AT, 32 |
1803 | | subu TMP0, AT, CARG2 | 2287 | | subu TMP0, AT, CARG2 |
@@ -1822,9 +2306,19 @@ static void build_subroutines(BuildCtx *ctx) | |||
1822 | | | 2306 | | |
1823 | |.ffunc_bit tobit | 2307 | |.ffunc_bit tobit |
1824 | |->fff_resi: | 2308 | |->fff_resi: |
2309 | | lw PC, FRAME_PC(BASE) | ||
2310 | | addiu RA, BASE, -8 | ||
2311 | |.if HFABI | ||
1825 | | mtc1 CRET1, FRET1 | 2312 | | mtc1 CRET1, FRET1 |
1826 | | b ->fff_resn | 2313 | | cvt.d.w FRET1, FRET1 |
1827 | |. cvt.d.w FRET1, FRET1 | 2314 | | b ->fff_res1 |
2315 | |. sdc1 FRET1, -8(BASE) | ||
2316 | |.else // Result already in CRET1. | ||
2317 | | cvti2d CRET1 | ||
2318 | | sw CRET1, -8(BASE) | ||
2319 | | b ->fff_res1 | ||
2320 | |. sw CRET2, -8+4(BASE) | ||
2321 | |.endif | ||
1828 | | | 2322 | | |
1829 | |//----------------------------------------------------------------------- | 2323 | |//----------------------------------------------------------------------- |
1830 | | | 2324 | | |
@@ -2082,14 +2576,23 @@ static void build_subroutines(BuildCtx *ctx) | |||
2082 | |//----------------------------------------------------------------------- | 2576 | |//----------------------------------------------------------------------- |
2083 | | | 2577 | | |
2084 | |.macro savex_, a, b | 2578 | |.macro savex_, a, b |
2579 | |.if FPU | ||
2085 | | sdc1 f..a, 16+a*8(sp) | 2580 | | sdc1 f..a, 16+a*8(sp) |
2086 | | sw r..a, 16+32*8+a*4(sp) | 2581 | | sw r..a, 16+32*8+a*4(sp) |
2087 | | sw r..b, 16+32*8+b*4(sp) | 2582 | | sw r..b, 16+32*8+b*4(sp) |
2583 | |.else | ||
2584 | | sw r..a, 16+a*4(sp) | ||
2585 | | sw r..b, 16+b*4(sp) | ||
2586 | |.endif | ||
2088 | |.endmacro | 2587 | |.endmacro |
2089 | | | 2588 | | |
2090 | |->vm_exit_handler: | 2589 | |->vm_exit_handler: |
2091 | |.if JIT | 2590 | |.if JIT |
2591 | |.if FPU | ||
2092 | | addiu sp, sp, -(16+32*8+32*4) | 2592 | | addiu sp, sp, -(16+32*8+32*4) |
2593 | |.else | ||
2594 | | addiu sp, sp, -(16+32*4) | ||
2595 | |.endif | ||
2093 | | savex_ 0, 1 | 2596 | | savex_ 0, 1 |
2094 | | savex_ 2, 3 | 2597 | | savex_ 2, 3 |
2095 | | savex_ 4, 5 | 2598 | | savex_ 4, 5 |
@@ -2104,17 +2607,25 @@ static void build_subroutines(BuildCtx *ctx) | |||
2104 | | savex_ 22, 23 | 2607 | | savex_ 22, 23 |
2105 | | savex_ 24, 25 | 2608 | | savex_ 24, 25 |
2106 | | savex_ 26, 27 | 2609 | | savex_ 26, 27 |
2610 | |.if FPU | ||
2107 | | sdc1 f28, 16+28*8(sp) | 2611 | | sdc1 f28, 16+28*8(sp) |
2108 | | sw r28, 16+32*8+28*4(sp) | ||
2109 | | sdc1 f30, 16+30*8(sp) | 2612 | | sdc1 f30, 16+30*8(sp) |
2613 | | sw r28, 16+32*8+28*4(sp) | ||
2110 | | sw r30, 16+32*8+30*4(sp) | 2614 | | sw r30, 16+32*8+30*4(sp) |
2111 | | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP. | 2615 | | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP. |
2616 | | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp. | ||
2617 | | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP | ||
2618 | |.else | ||
2619 | | sw r28, 16+28*4(sp) | ||
2620 | | sw r30, 16+30*4(sp) | ||
2621 | | sw r0, 16+31*4(sp) // Clear RID_TMP. | ||
2622 | | addiu TMP2, sp, 16+32*4 // Recompute original value of sp. | ||
2623 | | sw TMP2, 16+29*4(sp) // Store sp in RID_SP | ||
2624 | |.endif | ||
2112 | | li_vmstate EXIT | 2625 | | li_vmstate EXIT |
2113 | | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp. | ||
2114 | | addiu DISPATCH, JGL, -GG_DISP2G-32768 | 2626 | | addiu DISPATCH, JGL, -GG_DISP2G-32768 |
2115 | | lw TMP1, 0(TMP2) // Load exit number. | 2627 | | lw TMP1, 0(TMP2) // Load exit number. |
2116 | | st_vmstate | 2628 | | st_vmstate |
2117 | | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP. | ||
2118 | | lw L, DISPATCH_GL(cur_L)(DISPATCH) | 2629 | | lw L, DISPATCH_GL(cur_L)(DISPATCH) |
2119 | | lw BASE, DISPATCH_GL(jit_base)(DISPATCH) | 2630 | | lw BASE, DISPATCH_GL(jit_base)(DISPATCH) |
2120 | | load_got lj_trace_exit | 2631 | | load_got lj_trace_exit |
@@ -2144,15 +2655,15 @@ static void build_subroutines(BuildCtx *ctx) | |||
2144 | |1: | 2655 | |1: |
2145 | | bltz CRET1, >9 // Check for error from exit. | 2656 | | bltz CRET1, >9 // Check for error from exit. |
2146 | |. lw LFUNC:RB, FRAME_FUNC(BASE) | 2657 | |. lw LFUNC:RB, FRAME_FUNC(BASE) |
2147 | | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 2658 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
2148 | | sll MULTRES, CRET1, 3 | 2659 | | sll MULTRES, CRET1, 3 |
2149 | | li TISNIL, LJ_TNIL | 2660 | | li TISNIL, LJ_TNIL |
2150 | | sw MULTRES, SAVE_MULTRES | 2661 | | sw MULTRES, SAVE_MULTRES |
2151 | | mtc1 TMP3, TOBIT | 2662 | | .FPU mtc1 TMP3, TOBIT |
2152 | | lw TMP1, LFUNC:RB->pc | 2663 | | lw TMP1, LFUNC:RB->pc |
2153 | | sw r0, DISPATCH_GL(jit_base)(DISPATCH) | 2664 | | sw r0, DISPATCH_GL(jit_base)(DISPATCH) |
2154 | | lw KBASE, PC2PROTO(k)(TMP1) | 2665 | | lw KBASE, PC2PROTO(k)(TMP1) |
2155 | | cvt.d.s TOBIT, TOBIT | 2666 | | .FPU cvt.d.s TOBIT, TOBIT |
2156 | | // Modified copy of ins_next which handles function header dispatch, too. | 2667 | | // Modified copy of ins_next which handles function header dispatch, too. |
2157 | | lw INS, 0(PC) | 2668 | | lw INS, 0(PC) |
2158 | | addiu PC, PC, 4 | 2669 | | addiu PC, PC, 4 |
@@ -2160,7 +2671,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2160 | | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) | 2671 | | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) |
2161 | | decode_OP4a TMP1, INS | 2672 | | decode_OP4a TMP1, INS |
2162 | | decode_OP4b TMP1 | 2673 | | decode_OP4b TMP1 |
2163 | | sltiu TMP2, TMP1, BC_FUNCF*4 // Function header? | 2674 | | sltiu TMP2, TMP1, BC_FUNCF*4 |
2164 | | addu TMP0, DISPATCH, TMP1 | 2675 | | addu TMP0, DISPATCH, TMP1 |
2165 | | decode_RD8a RD, INS | 2676 | | decode_RD8a RD, INS |
2166 | | lw AT, 0(TMP0) | 2677 | | lw AT, 0(TMP0) |
@@ -2202,7 +2713,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2202 | |//----------------------------------------------------------------------- | 2713 | |//----------------------------------------------------------------------- |
2203 | | | 2714 | | |
2204 | |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. | 2715 | |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. |
2205 | |.macro vm_round, func | 2716 | |.macro vm_round_hf, func |
2206 | | lui TMP0, 0x4330 // Hiword of 2^52 (double). | 2717 | | lui TMP0, 0x4330 // Hiword of 2^52 (double). |
2207 | | mtc1 r0, f4 | 2718 | | mtc1 r0, f4 |
2208 | | mtc1 TMP0, f5 | 2719 | | mtc1 TMP0, f5 |
@@ -2244,6 +2755,25 @@ static void build_subroutines(BuildCtx *ctx) | |||
2244 | |. mov.d FRET1, FARG1 | 2755 | |. mov.d FRET1, FARG1 |
2245 | |.endmacro | 2756 | |.endmacro |
2246 | | | 2757 | | |
2758 | |.macro vm_round_sf, func | ||
2759 | | addiu sp, sp, -8 | ||
2760 | | load_got func | ||
2761 | | sw ra, 0(sp) | ||
2762 | | call_extern | ||
2763 | |. nop | ||
2764 | | lw ra, 0(sp) | ||
2765 | | jr ra | ||
2766 | |. addiu sp, sp, 8 | ||
2767 | |.endmacro | ||
2768 | | | ||
2769 | |.macro vm_round, func | ||
2770 | |.if FPU | ||
2771 | | vm_round_hf, func | ||
2772 | |.else | ||
2773 | | vm_round_sf, func | ||
2774 | |.endif | ||
2775 | |.endmacro | ||
2776 | | | ||
2247 | |->vm_floor: | 2777 | |->vm_floor: |
2248 | | vm_round floor | 2778 | | vm_round floor |
2249 | |->vm_ceil: | 2779 | |->vm_ceil: |
@@ -2272,10 +2802,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
2272 | | sw r1, CTSTATE->cb.slot | 2802 | | sw r1, CTSTATE->cb.slot |
2273 | | sw CARG1, CTSTATE->cb.gpr[0] | 2803 | | sw CARG1, CTSTATE->cb.gpr[0] |
2274 | | sw CARG2, CTSTATE->cb.gpr[1] | 2804 | | sw CARG2, CTSTATE->cb.gpr[1] |
2275 | | sdc1 FARG1, CTSTATE->cb.fpr[0] | 2805 | | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0] |
2276 | | sw CARG3, CTSTATE->cb.gpr[2] | 2806 | | sw CARG3, CTSTATE->cb.gpr[2] |
2277 | | sw CARG4, CTSTATE->cb.gpr[3] | 2807 | | sw CARG4, CTSTATE->cb.gpr[3] |
2278 | | sdc1 FARG2, CTSTATE->cb.fpr[1] | 2808 | | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1] |
2279 | | addiu TMP0, sp, CFRAME_SPACE+16 | 2809 | | addiu TMP0, sp, CFRAME_SPACE+16 |
2280 | | sw TMP0, CTSTATE->cb.stack | 2810 | | sw TMP0, CTSTATE->cb.stack |
2281 | | sw r0, SAVE_PC // Any value outside of bytecode is ok. | 2811 | | sw r0, SAVE_PC // Any value outside of bytecode is ok. |
@@ -2286,14 +2816,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
2286 | | lw BASE, L:CRET1->base | 2816 | | lw BASE, L:CRET1->base |
2287 | | lw RC, L:CRET1->top | 2817 | | lw RC, L:CRET1->top |
2288 | | move L, CRET1 | 2818 | | move L, CRET1 |
2289 | | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | 2819 | | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). |
2290 | | lw LFUNC:RB, FRAME_FUNC(BASE) | 2820 | | lw LFUNC:RB, FRAME_FUNC(BASE) |
2291 | | mtc1 TMP3, TOBIT | 2821 | | .FPU mtc1 TMP3, TOBIT |
2292 | | li_vmstate INTERP | 2822 | | li_vmstate INTERP |
2293 | | li TISNIL, LJ_TNIL | 2823 | | li TISNIL, LJ_TNIL |
2294 | | subu RC, RC, BASE | 2824 | | subu RC, RC, BASE |
2295 | | st_vmstate | 2825 | | st_vmstate |
2296 | | cvt.d.s TOBIT, TOBIT | 2826 | | .FPU cvt.d.s TOBIT, TOBIT |
2297 | | ins_callt | 2827 | | ins_callt |
2298 | |.endif | 2828 | |.endif |
2299 | | | 2829 | | |
@@ -2307,11 +2837,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
2307 | | move CARG2, RA | 2837 | | move CARG2, RA |
2308 | | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) | 2838 | | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) |
2309 | |. move CARG1, CTSTATE | 2839 | |. move CARG1, CTSTATE |
2840 | | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0] | ||
2310 | | lw CRET1, CTSTATE->cb.gpr[0] | 2841 | | lw CRET1, CTSTATE->cb.gpr[0] |
2311 | | ldc1 FRET1, CTSTATE->cb.fpr[0] | 2842 | | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1] |
2312 | | lw CRET2, CTSTATE->cb.gpr[1] | ||
2313 | | b ->vm_leave_unw | 2843 | | b ->vm_leave_unw |
2314 | |. ldc1 FRET2, CTSTATE->cb.fpr[1] | 2844 | |. lw CRET2, CTSTATE->cb.gpr[1] |
2315 | |.endif | 2845 | |.endif |
2316 | | | 2846 | | |
2317 | |->vm_ffi_call: // Call C function via FFI. | 2847 | |->vm_ffi_call: // Call C function via FFI. |
@@ -2343,8 +2873,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
2343 | | lw CARG2, CCSTATE->gpr[1] | 2873 | | lw CARG2, CCSTATE->gpr[1] |
2344 | | lw CARG3, CCSTATE->gpr[2] | 2874 | | lw CARG3, CCSTATE->gpr[2] |
2345 | | lw CARG4, CCSTATE->gpr[3] | 2875 | | lw CARG4, CCSTATE->gpr[3] |
2346 | | ldc1 FARG1, CCSTATE->fpr[0] | 2876 | | .FPU ldc1 FARG1, CCSTATE->fpr[0] |
2347 | | ldc1 FARG2, CCSTATE->fpr[1] | 2877 | | .FPU ldc1 FARG2, CCSTATE->fpr[1] |
2348 | | jalr CFUNCADDR | 2878 | | jalr CFUNCADDR |
2349 | |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. | 2879 | |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. |
2350 | | lw CCSTATE:TMP1, -12(r16) | 2880 | | lw CCSTATE:TMP1, -12(r16) |
@@ -2352,8 +2882,10 @@ static void build_subroutines(BuildCtx *ctx) | |||
2352 | | lw ra, -4(r16) | 2882 | | lw ra, -4(r16) |
2353 | | sw CRET1, CCSTATE:TMP1->gpr[0] | 2883 | | sw CRET1, CCSTATE:TMP1->gpr[0] |
2354 | | sw CRET2, CCSTATE:TMP1->gpr[1] | 2884 | | sw CRET2, CCSTATE:TMP1->gpr[1] |
2355 | | sdc1 FRET1, CCSTATE:TMP1->fpr[0] | 2885 | | .FPU sdc1 FRET1, CCSTATE:TMP1->fpr[0] |
2356 | | sdc1 FRET2, CCSTATE:TMP1->fpr[1] | 2886 | | .FPU sdc1 FRET2, CCSTATE:TMP1->fpr[1] |
2887 | | sw CARG1, CCSTATE:TMP1->gpr[2] // MIPS32 soft-float. | ||
2888 | | sw CARG2, CCSTATE:TMP1->gpr[3] // Complex doubles are returned in v0, v1, a0, a1. | ||
2357 | | move sp, r16 | 2889 | | move sp, r16 |
2358 | | jr ra | 2890 | | jr ra |
2359 | |. move r16, TMP2 | 2891 | |. move r16, TMP2 |
@@ -2381,8 +2913,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2381 | | addu CARG3, BASE, RD | 2913 | | addu CARG3, BASE, RD |
2382 | | lw TMP0, HI(CARG2) | 2914 | | lw TMP0, HI(CARG2) |
2383 | | lw TMP1, HI(CARG3) | 2915 | | lw TMP1, HI(CARG3) |
2384 | | ldc1 f0, 0(CARG2) | ||
2385 | | ldc1 f2, 0(CARG3) | ||
2386 | | sltiu TMP0, TMP0, LJ_TISNUM | 2916 | | sltiu TMP0, TMP0, LJ_TISNUM |
2387 | | sltiu TMP1, TMP1, LJ_TISNUM | 2917 | | sltiu TMP1, TMP1, LJ_TISNUM |
2388 | | lhu TMP2, OFS_RD(PC) | 2918 | | lhu TMP2, OFS_RD(PC) |
@@ -2390,8 +2920,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2390 | | addiu PC, PC, 4 | 2920 | | addiu PC, PC, 4 |
2391 | | beqz TMP0, ->vmeta_comp | 2921 | | beqz TMP0, ->vmeta_comp |
2392 | |. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) | 2922 | |. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) |
2923 | | load_double f0, CARG1, CARG2, 0(CARG2) | ||
2924 | |.if FPU | ||
2925 | | ldc1 f2, 0(CARG3) | ||
2926 | |.else | ||
2927 | | lw CARG4, 4(CARG3) | ||
2928 | | lw CARG3, 0(CARG3) | ||
2929 | |.endif | ||
2393 | | decode_RD4b TMP2 | 2930 | | decode_RD4b TMP2 |
2394 | | addu TMP2, TMP2, TMP1 | 2931 | | addu TMP2, TMP2, TMP1 |
2932 | |.if FPU | ||
2395 | if (op == BC_ISLT || op == BC_ISGE) { | 2933 | if (op == BC_ISLT || op == BC_ISGE) { |
2396 | | c.olt.d f0, f2 | 2934 | | c.olt.d f0, f2 |
2397 | } else { | 2935 | } else { |
@@ -2402,8 +2940,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2402 | } else { | 2940 | } else { |
2403 | | movt TMP2, r0 | 2941 | | movt TMP2, r0 |
2404 | } | 2942 | } |
2405 | | addu PC, PC, TMP2 | 2943 | |.else |
2944 | | load_got __ledf2 | ||
2945 | | sw RD, TEMP_SAVE_1 | ||
2946 | | sw TMP1, TEMP_SAVE_2 | ||
2947 | | call_extern //CRET1 = f0<=f2 | ||
2948 | |. sw TMP2, TEMP_SAVE_3 | ||
2949 | | lw TMP2, TEMP_SAVE_3 | ||
2950 | | lw TMP1, TEMP_SAVE_2 | ||
2951 | if (op == BC_ISLT) { | ||
2952 | | bltz CRET1, >1 | ||
2953 | } else if (op == BC_ISLE) { | ||
2954 | | blez CRET1, >1 | ||
2955 | } else if (op == BC_ISGT) { | ||
2956 | | bgtz CRET1, >1 | ||
2957 | } else { | ||
2958 | | bgez CRET1, >1 | ||
2959 | } | ||
2960 | |. lw RD, TEMP_SAVE_1 | ||
2961 | | move TMP2, r0 | ||
2406 | |1: | 2962 | |1: |
2963 | |.endif | ||
2964 | | addu PC, PC, TMP2 | ||
2407 | | ins_next | 2965 | | ins_next |
2408 | break; | 2966 | break; |
2409 | 2967 | ||
@@ -2413,24 +2971,43 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2413 | | addu RA, BASE, RA | 2971 | | addu RA, BASE, RA |
2414 | | addiu PC, PC, 4 | 2972 | | addiu PC, PC, 4 |
2415 | | lw TMP0, HI(RA) | 2973 | | lw TMP0, HI(RA) |
2416 | | ldc1 f0, 0(RA) | ||
2417 | | addu RD, BASE, RD | 2974 | | addu RD, BASE, RD |
2418 | | lhu TMP2, -4+OFS_RD(PC) | 2975 | | lhu TMP2, -4+OFS_RD(PC) |
2419 | | lw TMP1, HI(RD) | ||
2420 | | ldc1 f2, 0(RD) | ||
2421 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | 2976 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) |
2977 | | lw TMP1, HI(RD) | ||
2978 | | decode_RD4b TMP2 | ||
2422 | | sltiu AT, TMP0, LJ_TISNUM | 2979 | | sltiu AT, TMP0, LJ_TISNUM |
2423 | | sltiu CARG1, TMP1, LJ_TISNUM | 2980 | | sltiu CARG1, TMP1, LJ_TISNUM |
2424 | | decode_RD4b TMP2 | 2981 | | load_double f2, CARG3, CARG4, 0(RD) |
2982 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | ||
2425 | | and AT, AT, CARG1 | 2983 | | and AT, AT, CARG1 |
2984 | | load_double f0, CARG1, CARG2, 0(RA) | ||
2426 | | beqz AT, >5 | 2985 | | beqz AT, >5 |
2427 | |. addu TMP2, TMP2, TMP3 | 2986 | |. addu TMP2, TMP2, TMP3 |
2987 | |.if FPU | ||
2428 | | c.eq.d f0, f2 | 2988 | | c.eq.d f0, f2 |
2429 | if (vk) { | 2989 | if (vk) { |
2430 | | movf TMP2, r0 | 2990 | | movf TMP2, r0 |
2431 | } else { | 2991 | } else { |
2432 | | movt TMP2, r0 | 2992 | | movt TMP2, r0 |
2433 | } | 2993 | } |
2994 | |.else | ||
2995 | | load_got __ledf2 | ||
2996 | | sw RD, TEMP_SAVE_1 | ||
2997 | | call_extern | ||
2998 | |. sw TMP2, TEMP_SAVE_2 | ||
2999 | | lw RD, TEMP_SAVE_1 | ||
3000 | | lw TMP2, TEMP_SAVE_2 | ||
3001 | if (vk) { | ||
3002 | | beqz CRET1, >4 | ||
3003 | |. nop | ||
3004 | } else { | ||
3005 | | bnez CRET1, >4 | ||
3006 | |. nop | ||
3007 | } | ||
3008 | | move TMP2, r0 | ||
3009 | |4: | ||
3010 | |.endif | ||
2434 | |1: | 3011 | |1: |
2435 | | addu PC, PC, TMP2 | 3012 | | addu PC, PC, TMP2 |
2436 | | ins_next | 3013 | | ins_next |
@@ -2507,10 +3084,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2507 | | addu RA, BASE, RA | 3084 | | addu RA, BASE, RA |
2508 | | addiu PC, PC, 4 | 3085 | | addiu PC, PC, 4 |
2509 | | lw TMP0, HI(RA) | 3086 | | lw TMP0, HI(RA) |
2510 | | ldc1 f0, 0(RA) | 3087 | | load_double f0, CARG1, CARG2, 0(RA) |
2511 | | addu RD, KBASE, RD | 3088 | | addu RD, KBASE, RD |
2512 | | lhu TMP2, -4+OFS_RD(PC) | 3089 | | lhu TMP2, -4+OFS_RD(PC) |
2513 | | ldc1 f2, 0(RD) | 3090 | | load_double f2, CARG3, CARG4, 0(RD) |
2514 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | 3091 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) |
2515 | | sltiu AT, TMP0, LJ_TISNUM | 3092 | | sltiu AT, TMP0, LJ_TISNUM |
2516 | | decode_RD4b TMP2 | 3093 | | decode_RD4b TMP2 |
@@ -2520,6 +3097,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2520 | | beqz AT, >1 | 3097 | | beqz AT, >1 |
2521 | |.endif | 3098 | |.endif |
2522 | |. addu TMP2, TMP2, TMP3 | 3099 | |. addu TMP2, TMP2, TMP3 |
3100 | |.if FPU | ||
2523 | | c.eq.d f0, f2 | 3101 | | c.eq.d f0, f2 |
2524 | if (vk) { | 3102 | if (vk) { |
2525 | | movf TMP2, r0 | 3103 | | movf TMP2, r0 |
@@ -2530,6 +3108,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2530 | |1: | 3108 | |1: |
2531 | | addu PC, PC, TMP2 | 3109 | | addu PC, PC, TMP2 |
2532 | } | 3110 | } |
3111 | |.else | ||
3112 | | load_got __ledf2 | ||
3113 | | sw RD, TEMP_SAVE_1 | ||
3114 | | call_extern | ||
3115 | |. sw TMP2, TEMP_SAVE_2 | ||
3116 | | lw RD, TEMP_SAVE_1 | ||
3117 | | lw TMP2, TEMP_SAVE_2 | ||
3118 | if (vk) { | ||
3119 | | beqz CRET1, >4 | ||
3120 | |. nop | ||
3121 | | move TMP2, r0 | ||
3122 | |4: | ||
3123 | | addu PC, PC, TMP2 | ||
3124 | |1: | ||
3125 | } else { | ||
3126 | | bnez CRET1, >1 | ||
3127 | |. nop | ||
3128 | | move TMP2, r0 | ||
3129 | |1: | ||
3130 | | addu PC, PC, TMP2 | ||
3131 | } | ||
3132 | |.endif | ||
2533 | | ins_next | 3133 | | ins_next |
2534 | |.if FFI | 3134 | |.if FFI |
2535 | |5: | 3135 | |5: |
@@ -2588,7 +3188,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2588 | | addu PC, PC, TMP2 | 3188 | | addu PC, PC, TMP2 |
2589 | } else { | 3189 | } else { |
2590 | | sltiu TMP0, TMP0, LJ_TISTRUECOND | 3190 | | sltiu TMP0, TMP0, LJ_TISTRUECOND |
2591 | | ldc1 f0, 0(RD) | 3191 | | load_double1 0(RD) |
2592 | if (op == BC_ISTC) { | 3192 | if (op == BC_ISTC) { |
2593 | | beqz TMP0, >1 | 3193 | | beqz TMP0, >1 |
2594 | } else { | 3194 | } else { |
@@ -2598,7 +3198,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2598 | | decode_RD4b TMP2 | 3198 | | decode_RD4b TMP2 |
2599 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | 3199 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) |
2600 | | addu TMP2, TMP2, TMP3 | 3200 | | addu TMP2, TMP2, TMP3 |
2601 | | sdc1 f0, 0(RA) | 3201 | | store_double1 0(RA) |
2602 | | addu PC, PC, TMP2 | 3202 | | addu PC, PC, TMP2 |
2603 | |1: | 3203 | |1: |
2604 | } | 3204 | } |
@@ -2631,9 +3231,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2631 | | // RA = dst*8, RD = src*8 | 3231 | | // RA = dst*8, RD = src*8 |
2632 | | addu RD, BASE, RD | 3232 | | addu RD, BASE, RD |
2633 | | addu RA, BASE, RA | 3233 | | addu RA, BASE, RA |
2634 | | ldc1 f0, 0(RD) | 3234 | | load_double1 0(RD) |
2635 | | ins_next1 | 3235 | | ins_next1 |
2636 | | sdc1 f0, 0(RA) | 3236 | | store_double1 0(RA) |
2637 | | ins_next2 | 3237 | | ins_next2 |
2638 | break; | 3238 | break; |
2639 | case BC_NOT: | 3239 | case BC_NOT: |
@@ -2653,12 +3253,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2653 | | addu CARG3, BASE, RD | 3253 | | addu CARG3, BASE, RD |
2654 | | addu RA, BASE, RA | 3254 | | addu RA, BASE, RA |
2655 | | lw TMP0, HI(CARG3) | 3255 | | lw TMP0, HI(CARG3) |
2656 | | ldc1 f0, 0(CARG3) | ||
2657 | | sltiu AT, TMP0, LJ_TISNUM | 3256 | | sltiu AT, TMP0, LJ_TISNUM |
3257 | | load_double f0, CARG1, CARG2, 0(CARG3) | ||
3258 | |.if FPU | ||
2658 | | beqz AT, ->vmeta_unm | 3259 | | beqz AT, ->vmeta_unm |
2659 | |. neg.d f0, f0 | 3260 | |. neg.d f0, f0 |
3261 | |.else | ||
3262 | | lui TMP1, 0x8000 | ||
3263 | | xor CRET1, TMP1, CARG1 | ||
3264 | | beqz AT, ->vmeta_unm | ||
3265 | |. move CRET2, CARG2 | ||
3266 | |.endif | ||
2660 | | ins_next1 | 3267 | | ins_next1 |
2661 | | sdc1 f0, 0(RA) | 3268 | | store_double f0, CRET1, CRET2, 0(RA) |
2662 | | ins_next2 | 3269 | | ins_next2 |
2663 | break; | 3270 | break; |
2664 | case BC_LEN: | 3271 | case BC_LEN: |
@@ -2672,10 +3279,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2672 | |. li AT, LJ_TTAB | 3279 | |. li AT, LJ_TTAB |
2673 | | lw CRET1, STR:CARG1->len | 3280 | | lw CRET1, STR:CARG1->len |
2674 | |1: | 3281 | |1: |
3282 | |.if FPU | ||
2675 | | mtc1 CRET1, f0 | 3283 | | mtc1 CRET1, f0 |
2676 | | cvt.d.w f0, f0 | 3284 | | cvt.d.w f0, f0 |
3285 | |.else | ||
3286 | | cvti2d CRET1 | ||
3287 | |.endif | ||
2677 | | ins_next1 | 3288 | | ins_next1 |
2678 | | sdc1 f0, 0(RA) | 3289 | | store_double f0, CRET1, CRET2, 0(RA) |
2679 | | ins_next2 | 3290 | | ins_next2 |
2680 | |2: | 3291 | |2: |
2681 | | bne TMP0, AT, ->vmeta_len | 3292 | | bne TMP0, AT, ->vmeta_len |
@@ -2717,72 +3328,142 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2717 | | addu CARG3, BASE, RB | 3328 | | addu CARG3, BASE, RB |
2718 | | addu CARG4, KBASE, RC | 3329 | | addu CARG4, KBASE, RC |
2719 | | lw TMP1, HI(CARG3) | 3330 | | lw TMP1, HI(CARG3) |
2720 | | ldc1 f20, 0(CARG3) | 3331 | | sltiu AT, TMP1, LJ_TISNUM |
2721 | | ldc1 f22, 0(CARG4) | 3332 | | load_double f20, CARG1, CARG2, 0(CARG3) |
2722 | | sltiu AT, TMP1, LJ_TISNUM | 3333 | | load_double f22, CARG3, CARG4, 0(CARG4) |
3334 | |.if FPU | ||
3335 | | beqz AT, ->vmeta_arith | ||
3336 | |.else | ||
3337 | | beqz AT, ->vmeta_arith_vn | ||
3338 | |.endif | ||
3339 | |. addu RA, BASE, RA | ||
2723 | || break; | 3340 | || break; |
2724 | ||case 1: | 3341 | ||case 1: |
2725 | | addu CARG4, BASE, RB | 3342 | | addu CARG4, BASE, RB |
2726 | | addu CARG3, KBASE, RC | 3343 | | addu CARG3, KBASE, RC |
2727 | | lw TMP1, HI(CARG4) | 3344 | | lw TMP1, HI(CARG4) |
2728 | | ldc1 f22, 0(CARG4) | 3345 | | sltiu AT, TMP1, LJ_TISNUM |
2729 | | ldc1 f20, 0(CARG3) | 3346 | | load_double f20, CARG1, CARG2, 0(CARG3) |
2730 | | sltiu AT, TMP1, LJ_TISNUM | 3347 | | load_double f22, CARG3, CARG4, 0(CARG4) |
3348 | |.if FPU | ||
3349 | | beqz AT, ->vmeta_arith | ||
3350 | |.else | ||
3351 | | beqz AT, ->vmeta_arith_nv | ||
3352 | |.endif | ||
3353 | |. addu RA, BASE, RA | ||
2731 | || break; | 3354 | || break; |
2732 | ||default: | 3355 | ||default: |
2733 | | addu CARG3, BASE, RB | 3356 | | addu CARG3, BASE, RB |
2734 | | addu CARG4, BASE, RC | 3357 | | addu CARG4, BASE, RC |
2735 | | lw TMP1, HI(CARG3) | 3358 | | lw TMP1, HI(CARG3) |
2736 | | lw TMP2, HI(CARG4) | 3359 | | lw TMP2, HI(CARG4) |
2737 | | ldc1 f20, 0(CARG3) | 3360 | | sltiu AT, TMP1, LJ_TISNUM |
2738 | | ldc1 f22, 0(CARG4) | 3361 | | sltiu TMP0, TMP2, LJ_TISNUM |
2739 | | sltiu AT, TMP1, LJ_TISNUM | 3362 | | and AT, AT, TMP0 |
2740 | | sltiu TMP0, TMP2, LJ_TISNUM | 3363 | | load_double f20, CARG1, CARG2, 0(CARG3) |
2741 | | and AT, AT, TMP0 | 3364 | | load_double f22, CARG3, CARG4, 0(CARG4) |
3365 | |.if FPU | ||
3366 | | beqz AT, ->vmeta_arith | ||
3367 | |.else | ||
3368 | | beqz AT, ->vmeta_arith_vv | ||
3369 | |.endif | ||
3370 | |. addu RA, BASE, RA | ||
2742 | || break; | 3371 | || break; |
2743 | ||} | 3372 | ||} |
2744 | | beqz AT, ->vmeta_arith | ||
2745 | |. addu RA, BASE, RA | ||
2746 | |.endmacro | 3373 | |.endmacro |
2747 | | | 3374 | | |
3375 | |.macro ins_arithfallback | ||
3376 | ||switch (vk) { | ||
3377 | ||case 0: | ||
3378 | | b ->vmeta_arith_vn | ||
3379 | |. nop | ||
3380 | || break; | ||
3381 | ||case 1: | ||
3382 | | b ->vmeta_arith_nv | ||
3383 | |. nop | ||
3384 | || break; | ||
3385 | ||default: | ||
3386 | | b ->vmeta_arith_vv | ||
3387 | |. nop | ||
3388 | || break; | ||
3389 | ||} | ||
3390 | |.endmacro | ||
3391 | | | ||
3392 | |.if FPU | ||
2748 | |.macro fpmod, a, b, c | 3393 | |.macro fpmod, a, b, c |
2749 | |->BC_MODVN_Z: | 3394 | |->BC_MODVN_Z: |
2750 | | bal ->vm_floor // floor(b/c) | 3395 | | bal ->vm_floor // floor(b/c) |
2751 | |. div.d FARG1, b, c | 3396 | |. div.d FARG1, b, c |
2752 | | mul.d a, FRET1, c | 3397 | | mul.d a, FRET1, c |
2753 | | sub.d a, b, a // b - floor(b/c)*c | 3398 | | sub.d a, b, a // b - floor(b/c)*c |
2754 | |.endmacro | 3399 | |.endmacro |
3400 | |.else | ||
2755 | | | 3401 | | |
2756 | |.macro ins_arith, ins | 3402 | |.macro sfpmod |
3403 | |->BC_MODVN_Z: | ||
3404 | | load_got __divdf3 | ||
3405 | | sw CARG1, TEMP_SAVE_1 | ||
3406 | | sw CARG2, TEMP_SAVE_2 | ||
3407 | | sw CARG3, TEMP_SAVE_3 | ||
3408 | | call_extern | ||
3409 | |. sw CARG4, TEMP_SAVE_4 | ||
3410 | | move CARG1, CRET1 | ||
3411 | | bal ->vm_floor | ||
3412 | |. move CARG2, CRET2 | ||
3413 | | load_got __muldf3 | ||
3414 | | move CARG1, CRET1 | ||
3415 | | move CARG2, CRET2 | ||
3416 | | lw CARG3, TEMP_SAVE_3 | ||
3417 | | call_extern | ||
3418 | |. lw CARG4, TEMP_SAVE_4 | ||
3419 | | load_got __subdf3 | ||
3420 | | lw CARG1, TEMP_SAVE_1 | ||
3421 | | lw CARG2, TEMP_SAVE_2 | ||
3422 | | move CARG3, CRET1 | ||
3423 | | call_extern | ||
3424 | |. move CARG4, CRET2 | ||
3425 | |.endmacro | ||
3426 | |.endif | ||
3427 | | | ||
3428 | |.macro ins_arith, intins, fpins, fpcall | ||
2757 | | ins_arithpre | 3429 | | ins_arithpre |
2758 | |.if "ins" == "fpmod_" | 3430 | |.if "fpins" == "fpmod_" |
2759 | | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | 3431 | | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. |
2760 | |. nop | 3432 | |. nop |
2761 | |.else | 3433 | |.else |
2762 | | ins f0, f20, f22 | 3434 | |.if FPU |
3435 | | fpins f0, f20, f22 | ||
3436 | |.else | ||
3437 | |.if "fpcall" == "sfpmod" | ||
3438 | | sfpmod | ||
3439 | |.else | ||
3440 | | load_got fpcall | ||
3441 | | call_extern | ||
3442 | |. nop | ||
3443 | |.endif | ||
3444 | |.endif | ||
2763 | | ins_next1 | 3445 | | ins_next1 |
2764 | | sdc1 f0, 0(RA) | 3446 | | store_double1 0(RA) |
2765 | | ins_next2 | 3447 | | ins_next2 |
2766 | |.endif | 3448 | |.endif |
2767 | |.endmacro | 3449 | |.endmacro |
2768 | 3450 | ||
2769 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | 3451 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: |
2770 | | ins_arith add.d | 3452 | | ins_arith addu, add.d, __adddf3 |
2771 | break; | 3453 | break; |
2772 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | 3454 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: |
2773 | | ins_arith sub.d | 3455 | | ins_arith subu, sub.d, __subdf3 |
2774 | break; | 3456 | break; |
2775 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | 3457 | case BC_MULVN: case BC_MULNV: case BC_MULVV: |
2776 | | ins_arith mul.d | 3458 | | ins_arith mult, mul.d, __muldf3 |
2777 | break; | 3459 | break; |
2778 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | 3460 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: |
2779 | | ins_arith div.d | 3461 | | ins_arith div, div.d, __divdf3 |
2780 | break; | 3462 | break; |
2781 | case BC_MODVN: | 3463 | case BC_MODVN: |
2782 | | ins_arith fpmod | 3464 | | ins_arith modi, fpmod, sfpmod |
2783 | break; | ||
2784 | case BC_MODNV: case BC_MODVV: | 3465 | case BC_MODNV: case BC_MODVV: |
2785 | | ins_arith fpmod_ | 3466 | | ins_arith modi, fpmod_, sfpmod |
2786 | break; | 3467 | break; |
2787 | case BC_POW: | 3468 | case BC_POW: |
2788 | | decode_RB8a RB, INS | 3469 | | decode_RB8a RB, INS |
@@ -2792,18 +3473,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2792 | | addu CARG4, BASE, RC | 3473 | | addu CARG4, BASE, RC |
2793 | | lw TMP1, HI(CARG3) | 3474 | | lw TMP1, HI(CARG3) |
2794 | | lw TMP2, HI(CARG4) | 3475 | | lw TMP2, HI(CARG4) |
2795 | | ldc1 FARG1, 0(CARG3) | ||
2796 | | ldc1 FARG2, 0(CARG4) | ||
2797 | | sltiu AT, TMP1, LJ_TISNUM | 3476 | | sltiu AT, TMP1, LJ_TISNUM |
2798 | | sltiu TMP0, TMP2, LJ_TISNUM | 3477 | | sltiu TMP0, TMP2, LJ_TISNUM |
2799 | | and AT, AT, TMP0 | 3478 | | and AT, AT, TMP0 |
2800 | | load_got pow | 3479 | | load_got pow |
2801 | | beqz AT, ->vmeta_arith | 3480 | | beqz AT, ->vmeta_arith |
2802 | |. addu RA, BASE, RA | 3481 | |. addu RA, BASE, RA |
3482 | | load_farg1 0(CARG3) | ||
3483 | | load_farg2 0(CARG4) | ||
2803 | | call_extern | 3484 | | call_extern |
2804 | |. nop | 3485 | |. nop |
2805 | | ins_next1 | 3486 | | ins_next1 |
3487 | |.if HFABI | ||
2806 | | sdc1 FRET1, 0(RA) | 3488 | | sdc1 FRET1, 0(RA) |
3489 | |.else | ||
3490 | | sw CRET1, 0(RA) | ||
3491 | | sw CRET2, 4(RA) | ||
3492 | |.endif | ||
2807 | | ins_next2 | 3493 | | ins_next2 |
2808 | break; | 3494 | break; |
2809 | 3495 | ||
@@ -2826,10 +3512,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2826 | | bnez CRET1, ->vmeta_binop | 3512 | | bnez CRET1, ->vmeta_binop |
2827 | |. lw BASE, L->base | 3513 | |. lw BASE, L->base |
2828 | | addu RB, BASE, MULTRES | 3514 | | addu RB, BASE, MULTRES |
2829 | | ldc1 f0, 0(RB) | 3515 | | load_double1 0(RB) |
2830 | | addu RA, BASE, RA | 3516 | | addu RA, BASE, RA |
2831 | | ins_next1 | 3517 | | ins_next1 |
2832 | | sdc1 f0, 0(RA) // Copy result from RB to RA. | 3518 | | store_double1 0(RA) |
2833 | | ins_next2 | 3519 | | ins_next2 |
2834 | break; | 3520 | break; |
2835 | 3521 | ||
@@ -2864,20 +3550,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2864 | case BC_KSHORT: | 3550 | case BC_KSHORT: |
2865 | | // RA = dst*8, RD = int16_literal*8 | 3551 | | // RA = dst*8, RD = int16_literal*8 |
2866 | | sra RD, INS, 16 | 3552 | | sra RD, INS, 16 |
2867 | | mtc1 RD, f0 | ||
2868 | | addu RA, BASE, RA | 3553 | | addu RA, BASE, RA |
3554 | |.if FPU | ||
3555 | | mtc1 RD, f0 | ||
2869 | | cvt.d.w f0, f0 | 3556 | | cvt.d.w f0, f0 |
3557 | |.else | ||
3558 | | cvti2d RD | ||
3559 | |.endif | ||
2870 | | ins_next1 | 3560 | | ins_next1 |
2871 | | sdc1 f0, 0(RA) | 3561 | | store_double f0, CRET1, CRET2, 0(RA) |
2872 | | ins_next2 | 3562 | | ins_next2 |
2873 | break; | 3563 | break; |
2874 | case BC_KNUM: | 3564 | case BC_KNUM: |
2875 | | // RA = dst*8, RD = num_const*8 | 3565 | | // RA = dst*8, RD = num_const*8 |
2876 | | addu RD, KBASE, RD | 3566 | | addu RD, KBASE, RD |
2877 | | addu RA, BASE, RA | 3567 | | addu RA, BASE, RA |
2878 | | ldc1 f0, 0(RD) | 3568 | | load_double1 0(RD) |
2879 | | ins_next1 | 3569 | | ins_next1 |
2880 | | sdc1 f0, 0(RA) | 3570 | | store_double1 0(RA) |
2881 | | ins_next2 | 3571 | | ins_next2 |
2882 | break; | 3572 | break; |
2883 | case BC_KPRI: | 3573 | case BC_KPRI: |
@@ -2913,9 +3603,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2913 | | lw UPVAL:RB, LFUNC:RD->uvptr | 3603 | | lw UPVAL:RB, LFUNC:RD->uvptr |
2914 | | ins_next1 | 3604 | | ins_next1 |
2915 | | lw TMP1, UPVAL:RB->v | 3605 | | lw TMP1, UPVAL:RB->v |
2916 | | ldc1 f0, 0(TMP1) | 3606 | | load_double1 0(TMP1) |
2917 | | addu RA, BASE, RA | 3607 | | addu RA, BASE, RA |
2918 | | sdc1 f0, 0(RA) | 3608 | | store_double1 0(RA) |
2919 | | ins_next2 | 3609 | | ins_next2 |
2920 | break; | 3610 | break; |
2921 | case BC_USETV: | 3611 | case BC_USETV: |
@@ -2924,14 +3614,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2924 | | srl RA, RA, 1 | 3614 | | srl RA, RA, 1 |
2925 | | addu RD, BASE, RD | 3615 | | addu RD, BASE, RD |
2926 | | addu RA, RA, LFUNC:RB | 3616 | | addu RA, RA, LFUNC:RB |
2927 | | ldc1 f0, 0(RD) | 3617 | | load_double1 0(RD) |
2928 | | lw UPVAL:RB, LFUNC:RA->uvptr | 3618 | | lw UPVAL:RB, LFUNC:RA->uvptr |
2929 | | lbu TMP3, UPVAL:RB->marked | 3619 | | lbu TMP3, UPVAL:RB->marked |
2930 | | lw CARG2, UPVAL:RB->v | 3620 | | lw CARG2, UPVAL:RB->v |
2931 | | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) | 3621 | | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) |
2932 | | lbu TMP0, UPVAL:RB->closed | 3622 | | lbu TMP0, UPVAL:RB->closed |
2933 | | lw TMP2, HI(RD) | 3623 | | lw TMP2, HI(RD) |
2934 | | sdc1 f0, 0(CARG2) | 3624 | | store_double1 0(CARG2) |
2935 | | li AT, LJ_GC_BLACK|1 | 3625 | | li AT, LJ_GC_BLACK|1 |
2936 | | or TMP3, TMP3, TMP0 | 3626 | | or TMP3, TMP3, TMP0 |
2937 | | beq TMP3, AT, >2 // Upvalue is closed and black? | 3627 | | beq TMP3, AT, >2 // Upvalue is closed and black? |
@@ -2991,11 +3681,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2991 | | srl RA, RA, 1 | 3681 | | srl RA, RA, 1 |
2992 | | addu RD, KBASE, RD | 3682 | | addu RD, KBASE, RD |
2993 | | addu RA, RA, LFUNC:RB | 3683 | | addu RA, RA, LFUNC:RB |
2994 | | ldc1 f0, 0(RD) | 3684 | | load_double1 0(RD) |
2995 | | lw UPVAL:RB, LFUNC:RA->uvptr | 3685 | | lw UPVAL:RB, LFUNC:RA->uvptr |
2996 | | ins_next1 | 3686 | | ins_next1 |
2997 | | lw TMP1, UPVAL:RB->v | 3687 | | lw TMP1, UPVAL:RB->v |
2998 | | sdc1 f0, 0(TMP1) | 3688 | | store_double1 0(TMP1) |
2999 | | ins_next2 | 3689 | | ins_next2 |
3000 | break; | 3690 | break; |
3001 | case BC_USETP: | 3691 | case BC_USETP: |
@@ -3126,13 +3816,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3126 | | lw TMP2, HI(CARG3) | 3816 | | lw TMP2, HI(CARG3) |
3127 | | lw TAB:RB, LO(CARG2) | 3817 | | lw TAB:RB, LO(CARG2) |
3128 | | li AT, LJ_TTAB | 3818 | | li AT, LJ_TTAB |
3129 | | ldc1 f0, 0(CARG3) | ||
3130 | | bne TMP1, AT, ->vmeta_tgetv | 3819 | | bne TMP1, AT, ->vmeta_tgetv |
3131 | |. addu RA, BASE, RA | 3820 | |. addu RA, BASE, RA |
3132 | | sltiu AT, TMP2, LJ_TISNUM | 3821 | | sltiu AT, TMP2, LJ_TISNUM |
3133 | | beqz AT, >5 | 3822 | | beqz AT, >5 |
3134 | |. li AT, LJ_TSTR | 3823 | |. li AT, LJ_TSTR |
3135 | | | 3824 | |.if FPU |
3825 | | ldc1 f0, 0(CARG3) | ||
3136 | | // Convert number key to integer, check for integerness and range. | 3826 | | // Convert number key to integer, check for integerness and range. |
3137 | | cvt.w.d f2, f0 | 3827 | | cvt.w.d f2, f0 |
3138 | | lw TMP0, TAB:RB->asize | 3828 | | lw TMP0, TAB:RB->asize |
@@ -3148,9 +3838,51 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3148 | | lw TMP0, HI(TMP2) | 3838 | | lw TMP0, HI(TMP2) |
3149 | | beq TMP0, TISNIL, >2 | 3839 | | beq TMP0, TISNIL, >2 |
3150 | |. ldc1 f0, 0(TMP2) | 3840 | |. ldc1 f0, 0(TMP2) |
3841 | |.else | ||
3842 | | sw RB, TEMP_SAVE_1 | ||
3843 | | sw CARG2, TEMP_SAVE_3 | ||
3844 | | load_got __fixdfsi | ||
3845 | | lw CARG1, 0(CARG3) | ||
3846 | | lw CARG2, 4(CARG3) | ||
3847 | | call_extern // cvt.w.d f2, f0 | ||
3848 | |. sw RC, TEMP_SAVE_2 | ||
3849 | | sw CRET1, TEMP_SAVE_4 | ||
3850 | | cvti2d CRET1 // cvt.d.w f4, f2 | ||
3851 | | load_got __ledf2 | ||
3852 | | lw RC, TEMP_SAVE_2 | ||
3853 | | addu CARG3, BASE, RC | ||
3854 | | lw CARG1, 0(CARG3) | ||
3855 | | lw CARG2, 4(CARG3) | ||
3856 | | move CARG3, CRET1 | ||
3857 | | move CARG4, CRET2 | ||
3858 | | call_extern // c.eq.d f0, f4 | ||
3859 | |. nop | ||
3860 | | lw CARG3, TEMP_SAVE_3 | ||
3861 | | lw RC, TEMP_SAVE_2 | ||
3862 | | lw RB, TEMP_SAVE_1 | ||
3863 | | lw TMP0, TAB:RB->asize | ||
3864 | | lw TMP1, TAB:RB->array | ||
3865 | | lw TMP2, TEMP_SAVE_4 | ||
3866 | | lw CARG2, TEMP_SAVE_3 // Restore old CARG2 and CARG3. | ||
3867 | | addu CARG3, BASE, RC | ||
3868 | | bnez CRET1, >3 | ||
3869 | |. sltu AT, TMP2, TMP0 | ||
3870 | | b >4 | ||
3871 | |. nop | ||
3872 | |3: | ||
3873 | | move AT, r0 | ||
3874 | |4: | ||
3875 | | sll TMP2, TMP2, 3 | ||
3876 | | beqz AT, ->vmeta_tgetv // Integer key and in array part? | ||
3877 | |. addu TMP2, TMP1, TMP2 | ||
3878 | | lw TMP0, HI(TMP2) | ||
3879 | | lw SFT2, 4(TMP2) | ||
3880 | | beq TMP0, TISNIL, >2 | ||
3881 | |. lw SFT1, 0(TMP2) | ||
3882 | |.endif | ||
3151 | |1: | 3883 | |1: |
3152 | | ins_next1 | 3884 | | ins_next1 |
3153 | | sdc1 f0, 0(RA) | 3885 | | store_double1 0(RA) |
3154 | | ins_next2 | 3886 | | ins_next2 |
3155 | | | 3887 | | |
3156 | |2: // Check for __index if table value is nil. | 3888 | |2: // Check for __index if table value is nil. |
@@ -3246,10 +3978,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3246 | |. addu RC, TMP2, RC | 3978 | |. addu RC, TMP2, RC |
3247 | | lw TMP1, HI(RC) | 3979 | | lw TMP1, HI(RC) |
3248 | | beq TMP1, TISNIL, >5 | 3980 | | beq TMP1, TISNIL, >5 |
3249 | |. ldc1 f0, 0(RC) | 3981 | |. nop |
3250 | |1: | 3982 | |1: |
3983 | | load_double1 0(RC) | ||
3251 | | ins_next1 | 3984 | | ins_next1 |
3252 | | sdc1 f0, 0(RA) | 3985 | | store_double1 0(RA) |
3253 | | ins_next2 | 3986 | | ins_next2 |
3254 | | | 3987 | | |
3255 | |5: // Check for __index if table value is nil. | 3988 | |5: // Check for __index if table value is nil. |
@@ -3271,20 +4004,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3271 | | addu CARG2, BASE, RB | 4004 | | addu CARG2, BASE, RB |
3272 | | addu CARG3, BASE, RC | 4005 | | addu CARG3, BASE, RC |
3273 | | lw TAB:CARG1, LO(CARG2) | 4006 | | lw TAB:CARG1, LO(CARG2) |
4007 | | lw TMP0, TAB:CARG1->asize | ||
4008 | | lw TMP1, TAB:CARG1->array | ||
4009 | |.if FPU | ||
3274 | | ldc1 f0, 0(CARG3) | 4010 | | ldc1 f0, 0(CARG3) |
3275 | | trunc.w.d f2, f0 | 4011 | | trunc.w.d f2, f0 |
3276 | | lw TMP0, TAB:CARG1->asize | ||
3277 | | mfc1 CARG2, f2 | 4012 | | mfc1 CARG2, f2 |
3278 | | lw TMP1, TAB:CARG1->array | 4013 | |.else |
4014 | | load_got __fixdfsi | ||
4015 | | lw CARG1, 0(CARG3) | ||
4016 | | call_extern | ||
4017 | |. lw CARG2, 4(CARG3) | ||
4018 | | move CARG2, CRET1 | ||
4019 | |.endif | ||
3279 | | sltu AT, CARG2, TMP0 | 4020 | | sltu AT, CARG2, TMP0 |
3280 | | sll TMP2, CARG2, 3 | 4021 | | sll TMP2, CARG2, 3 |
3281 | | beqz AT, ->vmeta_tgetr // In array part? | 4022 | | beqz AT, ->vmeta_tgetr // In array part? |
3282 | |. addu TMP2, TMP1, TMP2 | 4023 | |. addu TMP2, TMP1, TMP2 |
3283 | | ldc1 f0, 0(TMP2) | 4024 | | load_double1 0(TMP2) |
3284 | |->BC_TGETR_Z: | 4025 | |->BC_TGETR_Z: |
3285 | | addu RA, BASE, RA | 4026 | | addu RA, BASE, RA |
3286 | | ins_next1 | 4027 | | ins_next1 |
3287 | | sdc1 f0, 0(RA) | 4028 | | store_double1 0(RA) |
3288 | | ins_next2 | 4029 | | ins_next2 |
3289 | break; | 4030 | break; |
3290 | 4031 | ||
@@ -3299,13 +4040,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3299 | | lw TMP2, HI(CARG3) | 4040 | | lw TMP2, HI(CARG3) |
3300 | | lw TAB:RB, LO(CARG2) | 4041 | | lw TAB:RB, LO(CARG2) |
3301 | | li AT, LJ_TTAB | 4042 | | li AT, LJ_TTAB |
3302 | | ldc1 f0, 0(CARG3) | ||
3303 | | bne TMP1, AT, ->vmeta_tsetv | 4043 | | bne TMP1, AT, ->vmeta_tsetv |
3304 | |. addu RA, BASE, RA | 4044 | |. addu RA, BASE, RA |
3305 | | sltiu AT, TMP2, LJ_TISNUM | 4045 | | sltiu AT, TMP2, LJ_TISNUM |
3306 | | beqz AT, >5 | 4046 | | beqz AT, >5 |
3307 | |. li AT, LJ_TSTR | 4047 | |. li AT, LJ_TSTR |
3308 | | | 4048 | |.if FPU |
4049 | | ldc1 f0, 0(CARG3) | ||
3309 | | // Convert number key to integer, check for integerness and range. | 4050 | | // Convert number key to integer, check for integerness and range. |
3310 | | cvt.w.d f2, f0 | 4051 | | cvt.w.d f2, f0 |
3311 | | lw TMP0, TAB:RB->asize | 4052 | | lw TMP0, TAB:RB->asize |
@@ -3326,6 +4067,52 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3326 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | 4067 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) |
3327 | | bnez AT, >7 | 4068 | | bnez AT, >7 |
3328 | |. sdc1 f0, 0(TMP1) | 4069 | |. sdc1 f0, 0(TMP1) |
4070 | |.else | ||
4071 | | sw RB, TEMP_SAVE_1 | ||
4072 | | sw RC, TEMP_SAVE_2 | ||
4073 | | sw CARG2, TEMP_SAVE_3 | ||
4074 | | load_got __fixdfsi | ||
4075 | | lw CARG1, 0(CARG3) | ||
4076 | | call_extern // cvt.w.d f2, f0 | ||
4077 | |. lw CARG2, 4(CARG3) | ||
4078 | | sw CRET1, TEMP_SAVE_4 | ||
4079 | | cvti2d CRET1 // cvt.d.w f4, f2 | ||
4080 | | load_got __ledf2 | ||
4081 | | lw RC, TEMP_SAVE_2 | ||
4082 | | addu CARG3, BASE, RC | ||
4083 | | lw CARG1, 0(CARG3) | ||
4084 | | lw CARG2, 4(CARG3) | ||
4085 | | move CARG3, CRET1 | ||
4086 | | call_extern // c.eq.d f0, f4 | ||
4087 | |. move CARG4, CRET2 | ||
4088 | | lw RC, TEMP_SAVE_2 | ||
4089 | | lw RB, TEMP_SAVE_1 | ||
4090 | | lw TMP0, TAB:RB->asize | ||
4091 | | lw TMP1, TAB:RB->array | ||
4092 | | lw TMP2, TEMP_SAVE_4 | ||
4093 | | lw CARG2, TEMP_SAVE_3 // Restore old CARG2 and CARG3. | ||
4094 | | addu CARG3, BASE, RC | ||
4095 | | bnez CRET1, >4 // NaN? | ||
4096 | |. sltu AT, TMP2, TMP0 | ||
4097 | | b >6 | ||
4098 | |. nop | ||
4099 | |4: | ||
4100 | | move AT, r0 | ||
4101 | |6: | ||
4102 | | sll TMP2, TMP2, 3 | ||
4103 | | beqz AT, ->vmeta_tsetv // Integer key and in array part? | ||
4104 | |. addu TMP1, TMP1, TMP2 | ||
4105 | | lbu TMP3, TAB:RB->marked | ||
4106 | | lw TMP0, HI(TMP1) | ||
4107 | | lw SFT1, 0(RA) | ||
4108 | | beq TMP0, TISNIL, >3 | ||
4109 | |. lw SFT2, 4(RA) | ||
4110 | |1: | ||
4111 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | ||
4112 | | sw SFT1, 0(TMP1) | ||
4113 | | bnez AT, >7 | ||
4114 | |. sw SFT2, 4(TMP1) | ||
4115 | |.endif | ||
3329 | |2: | 4116 | |2: |
3330 | | ins_next | 4117 | | ins_next |
3331 | | | 4118 | | |
@@ -3374,7 +4161,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3374 | | sll TMP1, TMP1, 3 | 4161 | | sll TMP1, TMP1, 3 |
3375 | | subu TMP1, TMP0, TMP1 | 4162 | | subu TMP1, TMP0, TMP1 |
3376 | | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) | 4163 | | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) |
3377 | | ldc1 f20, 0(RA) | 4164 | | load_double f20, SFT1, SFT2, 0(RA) |
3378 | |1: | 4165 | |1: |
3379 | | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) | 4166 | | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) |
3380 | | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) | 4167 | | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) |
@@ -3388,8 +4175,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3388 | |. lw TAB:TMP0, TAB:RB->metatable | 4175 | |. lw TAB:TMP0, TAB:RB->metatable |
3389 | |2: | 4176 | |2: |
3390 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | 4177 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) |
4178 | |.if FPU | ||
3391 | | bnez AT, >7 | 4179 | | bnez AT, >7 |
3392 | |. sdc1 f20, NODE:TMP2->val | 4180 | |. sdc1 f20, NODE:TMP2->val |
4181 | |.else | ||
4182 | | sw SFT1, NODE:TMP2->val.u32.hi | ||
4183 | | bnez AT, >7 | ||
4184 | |. sw SFT2, NODE:TMP2->val.u32.lo | ||
4185 | |.endif | ||
3393 | |3: | 4186 | |3: |
3394 | | ins_next | 4187 | | ins_next |
3395 | | | 4188 | | |
@@ -3417,6 +4210,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3417 | | beqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check. | 4210 | | beqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check. |
3418 | |. li AT, LJ_TSTR | 4211 | |. li AT, LJ_TSTR |
3419 | |6: | 4212 | |6: |
4213 | |.if not FPU | ||
4214 | | sw SFT1, TEMP_SAVE_1 | ||
4215 | | sw SFT2, TEMP_SAVE_2 | ||
4216 | |.endif | ||
3420 | | load_got lj_tab_newkey | 4217 | | load_got lj_tab_newkey |
3421 | | sw STR:RC, LO(CARG3) | 4218 | | sw STR:RC, LO(CARG3) |
3422 | | sw AT, HI(CARG3) | 4219 | | sw AT, HI(CARG3) |
@@ -3427,8 +4224,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3427 | |. move CARG1, L | 4224 | |. move CARG1, L |
3428 | | // Returns TValue *. | 4225 | | // Returns TValue *. |
3429 | | lw BASE, L->base | 4226 | | lw BASE, L->base |
4227 | |.if FPU | ||
3430 | | b <3 // No 2nd write barrier needed. | 4228 | | b <3 // No 2nd write barrier needed. |
3431 | |. sdc1 f20, 0(CRET1) | 4229 | |. sdc1 f20, 0(CRET1) |
4230 | |.else | ||
4231 | | lw SFT2, TEMP_SAVE_1 | ||
4232 | | lw SFT3, TEMP_SAVE_2 | ||
4233 | | sw SFT2, 0(CRET1) | ||
4234 | | b <3 | ||
4235 | |. sw SFT3, 4(CRET1) | ||
4236 | |.endif | ||
3432 | | | 4237 | | |
3433 | |7: // Possible table write barrier for the value. Skip valiswhite check. | 4238 | |7: // Possible table write barrier for the value. Skip valiswhite check. |
3434 | | barrierback TAB:RB, TMP3, TMP0, <3 | 4239 | | barrierback TAB:RB, TMP3, TMP0, <3 |
@@ -3453,11 +4258,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3453 | | lw TMP1, HI(RC) | 4258 | | lw TMP1, HI(RC) |
3454 | | lbu TMP3, TAB:RB->marked | 4259 | | lbu TMP3, TAB:RB->marked |
3455 | | beq TMP1, TISNIL, >5 | 4260 | | beq TMP1, TISNIL, >5 |
3456 | |. ldc1 f0, 0(RA) | ||
3457 | |1: | 4261 | |1: |
3458 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | 4262 | |. andi AT, TMP3, LJ_GC_BLACK // isblack(table) |
4263 | | load_double1 0(RA) | ||
4264 | |.if FPU | ||
3459 | | bnez AT, >7 | 4265 | | bnez AT, >7 |
3460 | |. sdc1 f0, 0(RC) | 4266 | |. sdc1 f0, 0(RC) |
4267 | |.else | ||
4268 | | sw SFT1, 0(RC) | ||
4269 | | bnez AT, >7 | ||
4270 | |. sw SFT2, 4(RC) | ||
4271 | |.endif | ||
3461 | |2: | 4272 | |2: |
3462 | | ins_next | 4273 | | ins_next |
3463 | | | 4274 | | |
@@ -3482,12 +4293,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3482 | | decode_RDtoRC8 RC, RD | 4293 | | decode_RDtoRC8 RC, RD |
3483 | | addu CARG1, BASE, RB | 4294 | | addu CARG1, BASE, RB |
3484 | | addu CARG3, BASE, RC | 4295 | | addu CARG3, BASE, RC |
3485 | | lw TAB:CARG2, LO(CARG1) | 4296 | |.if FPU |
3486 | | ldc1 f0, 0(CARG3) | 4297 | | ldc1 f0, 0(CARG3) |
3487 | | trunc.w.d f2, f0 | 4298 | | trunc.w.d f2, f0 |
4299 | | mfc1 CARG3, f2 | ||
4300 | |.else | ||
4301 | | load_got __fixdfsi | ||
4302 | | sw CARG1, TEMP_SAVE_1 | ||
4303 | | lw CARG1, 0(CARG3) | ||
4304 | | call_extern | ||
4305 | |. lw CARG2, 4(CARG3) | ||
4306 | | lw CARG1, TEMP_SAVE_1 | ||
4307 | | move CARG3, CRET1 | ||
4308 | |.endif | ||
4309 | | lw TAB:CARG2, LO(CARG1) | ||
3488 | | lbu TMP3, TAB:CARG2->marked | 4310 | | lbu TMP3, TAB:CARG2->marked |
3489 | | lw TMP0, TAB:CARG2->asize | 4311 | | lw TMP0, TAB:CARG2->asize |
3490 | | mfc1 CARG3, f2 | ||
3491 | | lw TMP1, TAB:CARG2->array | 4312 | | lw TMP1, TAB:CARG2->array |
3492 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | 4313 | | andi AT, TMP3, LJ_GC_BLACK // isblack(table) |
3493 | | bnez AT, >7 | 4314 | | bnez AT, >7 |
@@ -3495,12 +4316,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3495 | |2: | 4316 | |2: |
3496 | | sltu AT, CARG3, TMP0 | 4317 | | sltu AT, CARG3, TMP0 |
3497 | | sll TMP2, CARG3, 3 | 4318 | | sll TMP2, CARG3, 3 |
4319 | |.if FPU | ||
3498 | | beqz AT, ->vmeta_tsetr // In array part? | 4320 | | beqz AT, ->vmeta_tsetr // In array part? |
3499 | |. ldc1 f20, 0(RA) | 4321 | |. ldc1 f20, 0(RA) |
3500 | | addu CRET1, TMP1, TMP2 | 4322 | | addu CRET1, TMP1, TMP2 |
3501 | |->BC_TSETR_Z: | 4323 | |->BC_TSETR_Z: |
4324 | |.else | ||
4325 | | lw TMP0, 0(RA) | ||
4326 | | lw TMP3, 4(RA) | ||
4327 | | sw TMP0, TEMP_SAVE_1 | ||
4328 | | beqz AT, ->vmeta_tsetr // In array part? | ||
4329 | |. sw TMP3, TEMP_SAVE_2 | ||
4330 | | addu CRET1, TMP1, TMP2 | ||
4331 | |->BC_TSETR_Z: | ||
4332 | | lw TMP0, TEMP_SAVE_1 | ||
4333 | | lw TMP3, TEMP_SAVE_2 | ||
4334 | |.endif | ||
3502 | | ins_next1 | 4335 | | ins_next1 |
3503 | | sdc1 f20, 0(CRET1) | 4336 | | store_double f20, TMP0, TMP3, 0(CRET1) |
3504 | | ins_next2 | 4337 | | ins_next2 |
3505 | | | 4338 | | |
3506 | |7: // Possible table write barrier for the value. Skip valiswhite check. | 4339 | |7: // Possible table write barrier for the value. Skip valiswhite check. |
@@ -3529,10 +4362,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3529 | | addu TMP1, TMP1, CARG1 | 4362 | | addu TMP1, TMP1, CARG1 |
3530 | | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) | 4363 | | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) |
3531 | |3: // Copy result slots to table. | 4364 | |3: // Copy result slots to table. |
3532 | | ldc1 f0, 0(RA) | 4365 | | load_double1 0(RA) |
3533 | | addiu RA, RA, 8 | 4366 | | addiu RA, RA, 8 |
3534 | | sltu AT, RA, TMP2 | 4367 | | sltu AT, RA, TMP2 |
3535 | | sdc1 f0, 0(TMP1) | 4368 | | store_double1 0(TMP1) |
3536 | | bnez AT, <3 | 4369 | | bnez AT, <3 |
3537 | |. addiu TMP1, TMP1, 8 | 4370 | |. addiu TMP1, TMP1, 8 |
3538 | | bnez TMP0, >7 | 4371 | | bnez TMP0, >7 |
@@ -3607,10 +4440,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3607 | | beqz NARGS8:RC, >3 | 4440 | | beqz NARGS8:RC, >3 |
3608 | |. move TMP3, NARGS8:RC | 4441 | |. move TMP3, NARGS8:RC |
3609 | |2: | 4442 | |2: |
3610 | | ldc1 f0, 0(RA) | 4443 | | load_double1 0(RA) |
3611 | | addiu RA, RA, 8 | 4444 | | addiu RA, RA, 8 |
3612 | | addiu TMP3, TMP3, -8 | 4445 | | addiu TMP3, TMP3, -8 |
3613 | | sdc1 f0, 0(TMP2) | 4446 | | store_double1 0(TMP2) |
3614 | | bnez TMP3, <2 | 4447 | | bnez TMP3, <2 |
3615 | |. addiu TMP2, TMP2, 8 | 4448 | |. addiu TMP2, TMP2, 8 |
3616 | |3: | 4449 | |3: |
@@ -3647,12 +4480,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3647 | | li AT, LJ_TFUNC | 4480 | | li AT, LJ_TFUNC |
3648 | | lw TMP1, -24+HI(BASE) | 4481 | | lw TMP1, -24+HI(BASE) |
3649 | | lw LFUNC:RB, -24+LO(BASE) | 4482 | | lw LFUNC:RB, -24+LO(BASE) |
3650 | | ldc1 f2, -8(BASE) | 4483 | | load_double1 -8(BASE) |
3651 | | ldc1 f0, -16(BASE) | 4484 | | load_double2 -16(BASE) |
3652 | | sw TMP1, HI(BASE) // Copy callable. | 4485 | | sw TMP1, HI(BASE) // Copy callable. |
3653 | | sw LFUNC:RB, LO(BASE) | 4486 | | sw LFUNC:RB, LO(BASE) |
3654 | | sdc1 f2, 16(BASE) // Copy control var. | 4487 | | store_double1 16(BASE) // Copy control var. |
3655 | | sdc1 f0, 8(BASE) // Copy state. | 4488 | | store_double2 8(BASE) // Copy state. |
3656 | | addiu BASE, BASE, 8 | 4489 | | addiu BASE, BASE, 8 |
3657 | | bne TMP1, AT, ->vmeta_call | 4490 | | bne TMP1, AT, ->vmeta_call |
3658 | |. li NARGS8:RC, 16 // Iterators get 2 arguments. | 4491 | |. li NARGS8:RC, 16 // Iterators get 2 arguments. |
@@ -3676,19 +4509,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3676 | |. sll TMP3, RC, 3 | 4509 | |. sll TMP3, RC, 3 |
3677 | | addu TMP3, TMP1, TMP3 | 4510 | | addu TMP3, TMP1, TMP3 |
3678 | | lw TMP2, HI(TMP3) | 4511 | | lw TMP2, HI(TMP3) |
3679 | | ldc1 f0, 0(TMP3) | 4512 | | load_double1 0(TMP3) |
4513 | |.if FPU | ||
3680 | | mtc1 RC, f2 | 4514 | | mtc1 RC, f2 |
4515 | |.else | ||
4516 | | move CARG1, RC | ||
4517 | |.endif | ||
3681 | | lhu RD, -4+OFS_RD(PC) | 4518 | | lhu RD, -4+OFS_RD(PC) |
3682 | | beq TMP2, TISNIL, <1 // Skip holes in array part. | 4519 | | beq TMP2, TISNIL, <1 // Skip holes in array part. |
3683 | |. addiu RC, RC, 1 | 4520 | |. addiu RC, RC, 1 |
4521 | | store_double1 8(RA) | ||
4522 | |.if FPU | ||
3684 | | cvt.d.w f2, f2 | 4523 | | cvt.d.w f2, f2 |
4524 | |.else | ||
4525 | | load_got __floatsidf | ||
4526 | | call_extern | ||
4527 | |. nop | ||
4528 | |.endif | ||
3685 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | 4529 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) |
3686 | | sdc1 f0, 8(RA) | 4530 | | store_double f2, CRET1, CRET2, 0(RA) |
3687 | | decode_RD4b RD | 4531 | | decode_RD4b RD |
3688 | | addu RD, RD, TMP3 | 4532 | | addu RD, RD, TMP3 |
3689 | | sw RC, -8+LO(RA) // Update control var. | 4533 | | sw RC, -8+LO(RA) // Update control var. |
3690 | | addu PC, PC, RD | 4534 | | addu PC, PC, RD |
3691 | | sdc1 f2, 0(RA) | ||
3692 | |3: | 4535 | |3: |
3693 | | ins_next | 4536 | | ins_next |
3694 | | | 4537 | | |
@@ -3704,17 +4547,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3704 | | subu TMP3, TMP3, RB | 4547 | | subu TMP3, TMP3, RB |
3705 | | addu NODE:TMP3, TMP3, TMP2 | 4548 | | addu NODE:TMP3, TMP3, TMP2 |
3706 | | lw RB, HI(NODE:TMP3) | 4549 | | lw RB, HI(NODE:TMP3) |
3707 | | ldc1 f0, 0(NODE:TMP3) | 4550 | | load_double1 0(NODE:TMP3) |
3708 | | lhu RD, -4+OFS_RD(PC) | 4551 | | lhu RD, -4+OFS_RD(PC) |
3709 | | beq RB, TISNIL, <6 // Skip holes in hash part. | 4552 | | beq RB, TISNIL, <6 // Skip holes in hash part. |
3710 | |. addiu RC, RC, 1 | 4553 | |. addiu RC, RC, 1 |
4554 | |.if FPU | ||
3711 | | ldc1 f2, NODE:TMP3->key | 4555 | | ldc1 f2, NODE:TMP3->key |
4556 | |.else | ||
4557 | | lw SFT3, NODE:TMP3->key.u32.hi | ||
4558 | | lw SFT4, NODE:TMP3->key.u32.lo | ||
4559 | |.endif | ||
3712 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | 4560 | | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) |
3713 | | sdc1 f0, 8(RA) | 4561 | | store_double1 8(RA) |
3714 | | addu RC, RC, TMP0 | 4562 | | addu RC, RC, TMP0 |
3715 | | decode_RD4b RD | 4563 | | decode_RD4b RD |
3716 | | addu RD, RD, TMP3 | 4564 | | addu RD, RD, TMP3 |
3717 | | sdc1 f2, 0(RA) | 4565 | | store_double2 0(RA) |
3718 | | addu PC, PC, RD | 4566 | | addu PC, PC, RD |
3719 | | b <3 | 4567 | | b <3 |
3720 | |. sw RC, -8+LO(RA) // Update control var. | 4568 | |. sw RC, -8+LO(RA) // Update control var. |
@@ -3794,9 +4642,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3794 | | bnez AT, >7 | 4642 | | bnez AT, >7 |
3795 | |. addiu MULTRES, TMP1, 8 | 4643 | |. addiu MULTRES, TMP1, 8 |
3796 | |6: | 4644 | |6: |
3797 | | ldc1 f0, 0(RC) | 4645 | | load_double1 0(RC) |
3798 | | addiu RC, RC, 8 | 4646 | | addiu RC, RC, 8 |
3799 | | sdc1 f0, 0(RA) | 4647 | | store_double1 0(RA) |
3800 | | sltu AT, RC, TMP3 | 4648 | | sltu AT, RC, TMP3 |
3801 | | bnez AT, <6 // More vararg slots? | 4649 | | bnez AT, <6 // More vararg slots? |
3802 | |. addiu RA, RA, 8 | 4650 | |. addiu RA, RA, 8 |
@@ -3852,10 +4700,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3852 | | beqz RC, >3 | 4700 | | beqz RC, >3 |
3853 | |. subu BASE, TMP2, TMP0 | 4701 | |. subu BASE, TMP2, TMP0 |
3854 | |2: | 4702 | |2: |
3855 | | ldc1 f0, 0(RA) | 4703 | | load_double1 0(RA) |
3856 | | addiu RA, RA, 8 | 4704 | | addiu RA, RA, 8 |
3857 | | addiu RC, RC, -8 | 4705 | | addiu RC, RC, -8 |
3858 | | sdc1 f0, 0(TMP2) | 4706 | | store_double1 0(TMP2) |
3859 | | bnez RC, <2 | 4707 | | bnez RC, <2 |
3860 | |. addiu TMP2, TMP2, 8 | 4708 | |. addiu TMP2, TMP2, 8 |
3861 | |3: | 4709 | |3: |
@@ -3896,14 +4744,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3896 | | lw INS, -4(PC) | 4744 | | lw INS, -4(PC) |
3897 | | addiu TMP2, BASE, -8 | 4745 | | addiu TMP2, BASE, -8 |
3898 | if (op == BC_RET1) { | 4746 | if (op == BC_RET1) { |
3899 | | ldc1 f0, 0(RA) | 4747 | | load_double1 0(RA) |
3900 | } | 4748 | } |
3901 | | decode_RB8a RB, INS | 4749 | | decode_RB8a RB, INS |
3902 | | decode_RA8a RA, INS | 4750 | | decode_RA8a RA, INS |
3903 | | decode_RB8b RB | 4751 | | decode_RB8b RB |
3904 | | decode_RA8b RA | 4752 | | decode_RA8b RA |
3905 | if (op == BC_RET1) { | 4753 | if (op == BC_RET1) { |
3906 | | sdc1 f0, 0(TMP2) | 4754 | | store_double1 0(TMP2) |
3907 | } | 4755 | } |
3908 | | subu BASE, TMP2, RA | 4756 | | subu BASE, TMP2, RA |
3909 | |5: | 4757 | |5: |
@@ -3928,6 +4776,45 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3928 | 4776 | ||
3929 | /* -- Loops and branches ------------------------------------------------ */ | 4777 | /* -- Loops and branches ------------------------------------------------ */ |
3930 | 4778 | ||
4779 | |.macro cmp_res, gt | ||
4780 | |.if gt == 1 | ||
4781 | |.if FPU | ||
4782 | | movf TMP1, r0, 0 // f0>f2: TMP1=0 | ||
4783 | | movf TMP2, r0, 1 // f2>f0: TMP2=0 | ||
4784 | |.else | ||
4785 | | li SFT2, 1 | ||
4786 | | bne CRET1, SFT2, >1 | ||
4787 | |. nop | ||
4788 | | b >2 | ||
4789 | |. move TMP1, r0 | ||
4790 | |1: | ||
4791 | | li SFT2, -1 | ||
4792 | | bne CRET1, SFT2, >2 | ||
4793 | |. nop | ||
4794 | | move TMP2, r0 | ||
4795 | |2: | ||
4796 | |.endif | ||
4797 | |.else | ||
4798 | |.if FPU | ||
4799 | | movt TMP1, r0, 0 // f0<=f2: TMP1=0 | ||
4800 | | movt TMP2, r0, 1 // f2<=f0: TMP2=0 | ||
4801 | |.else | ||
4802 | | bltz CRET1, >3 // f0<f2: TMP1=0 | ||
4803 | |. nop | ||
4804 | | beqz CRET1, >2 // f0==f2: TMP1=TMP2=0 | ||
4805 | |. li SFT2, 1 | ||
4806 | | bne SFT2, CRET1, >4 // f0>f2: TMP2=0 | ||
4807 | |. nop | ||
4808 | | b >4 | ||
4809 | |2: | ||
4810 | |. move TMP2, r0 | ||
4811 | |3: | ||
4812 | | move TMP1, r0 | ||
4813 | |4: | ||
4814 | |.endif | ||
4815 | |.endif | ||
4816 | |.endmacro | ||
4817 | |||
3931 | case BC_FORL: | 4818 | case BC_FORL: |
3932 | |.if JIT | 4819 | |.if JIT |
3933 | | hotloop | 4820 | | hotloop |
@@ -3946,12 +4833,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3946 | vk = (op == BC_IFORL || op == BC_JFORL); | 4833 | vk = (op == BC_IFORL || op == BC_JFORL); |
3947 | | addu RA, BASE, RA | 4834 | | addu RA, BASE, RA |
3948 | if (vk) { | 4835 | if (vk) { |
4836 | |.if FPU | ||
3949 | | ldc1 f0, FORL_IDX*8(RA) | 4837 | | ldc1 f0, FORL_IDX*8(RA) |
3950 | | ldc1 f4, FORL_STEP*8(RA) | 4838 | | ldc1 f4, FORL_STEP*8(RA) |
3951 | | ldc1 f2, FORL_STOP*8(RA) | 4839 | | ldc1 f2, FORL_STOP*8(RA) |
3952 | | lw TMP3, FORL_STEP*8+HI(RA) | 4840 | | lw TMP3, FORL_STEP*8+HI(RA) |
3953 | | add.d f0, f0, f4 | 4841 | | add.d f0, f0, f4 |
3954 | | sdc1 f0, FORL_IDX*8(RA) | 4842 | | sdc1 f0, FORL_IDX*8(RA) |
4843 | |.else | ||
4844 | | load_got __adddf3 | ||
4845 | | load_farg1 FORL_IDX*8(RA) | ||
4846 | | load_farg2 FORL_STEP*8(RA) | ||
4847 | | call_extern | ||
4848 | |. sw RD, TEMP_SAVE_1 //save RD | ||
4849 | | sw CRET1, FORL_IDX*8(RA) | ||
4850 | | sw CRET2, FORL_IDX*8+4(RA) | ||
4851 | | load_farg1 FORL_IDX*8(RA) | ||
4852 | | load_farg2 FORL_STOP*8(RA) // f0 and f2 | ||
4853 | | lw TMP3, FORL_STEP*8+HI(RA) | ||
4854 | | lw RD, TEMP_SAVE_1 | ||
4855 | |.endif | ||
3955 | } else { | 4856 | } else { |
3956 | | lw TMP1, FORL_IDX*8+HI(RA) | 4857 | | lw TMP1, FORL_IDX*8+HI(RA) |
3957 | | lw TMP3, FORL_STEP*8+HI(RA) | 4858 | | lw TMP3, FORL_STEP*8+HI(RA) |
@@ -3961,25 +4862,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3961 | | sltiu TMP2, TMP2, LJ_TISNUM | 4862 | | sltiu TMP2, TMP2, LJ_TISNUM |
3962 | | and TMP1, TMP1, TMP0 | 4863 | | and TMP1, TMP1, TMP0 |
3963 | | and TMP1, TMP1, TMP2 | 4864 | | and TMP1, TMP1, TMP2 |
4865 | |.if FPU | ||
3964 | | ldc1 f0, FORL_IDX*8(RA) | 4866 | | ldc1 f0, FORL_IDX*8(RA) |
3965 | | beqz TMP1, ->vmeta_for | 4867 | | beqz TMP1, ->vmeta_for |
3966 | |. ldc1 f2, FORL_STOP*8(RA) | 4868 | |. ldc1 f2, FORL_STOP*8(RA) |
4869 | |.else | ||
4870 | | beqz TMP1, ->vmeta_for | ||
4871 | | load_farg1 FORL_IDX*8(RA) | ||
4872 | | load_farg2 FORL_STOP*8(RA) | ||
4873 | |.endif | ||
3967 | } | 4874 | } |
3968 | if (op != BC_JFORL) { | 4875 | if (op != BC_JFORL) { |
3969 | | srl RD, RD, 1 | 4876 | | srl RD, RD, 1 |
3970 | | lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535) | 4877 | | lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535) |
3971 | } | 4878 | } |
4879 | | store_double f0, CARG1, CARG2, FORL_EXT*8(RA) | ||
4880 | |.if FPU | ||
3972 | | c.le.d 0, f0, f2 | 4881 | | c.le.d 0, f0, f2 |
3973 | | c.le.d 1, f2, f0 | 4882 | | c.le.d 1, f2, f0 |
3974 | | sdc1 f0, FORL_EXT*8(RA) | 4883 | |.else |
4884 | | sw RD, TEMP_SAVE_1 | ||
4885 | | load_got __ledf2 // f0<=f2 | ||
4886 | | call_extern | ||
4887 | |. sw TMP0, TEMP_SAVE_2 | ||
4888 | | lw TMP0, TEMP_SAVE_2 | ||
4889 | | lw RD, TEMP_SAVE_1 | ||
4890 | | lw TMP3, FORL_STEP*8+HI(RA) // Restored step. | ||
4891 | |.endif | ||
4892 | | | ||
3975 | if (op == BC_JFORI) { | 4893 | if (op == BC_JFORI) { |
3976 | | li TMP1, 1 | 4894 | | li TMP1, 1 |
3977 | | li TMP2, 1 | 4895 | | li TMP2, 1 |
3978 | | addu TMP0, RD, TMP0 | 4896 | | addu TMP0, RD, TMP0 |
3979 | | slt TMP3, TMP3, r0 | 4897 | | slt TMP3, TMP3, r0 |
3980 | | movf TMP1, r0, 0 | 4898 | | cmp_res 1 |
3981 | | addu PC, PC, TMP0 | 4899 | | addu PC, PC, TMP0 |
3982 | | movf TMP2, r0, 1 | ||
3983 | | lhu RD, -4+OFS_RD(PC) | 4900 | | lhu RD, -4+OFS_RD(PC) |
3984 | | movn TMP1, TMP2, TMP3 | 4901 | | movn TMP1, TMP2, TMP3 |
3985 | | bnez TMP1, =>BC_JLOOP | 4902 | | bnez TMP1, =>BC_JLOOP |
@@ -3988,8 +4905,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3988 | | li TMP1, 1 | 4905 | | li TMP1, 1 |
3989 | | li TMP2, 1 | 4906 | | li TMP2, 1 |
3990 | | slt TMP3, TMP3, r0 | 4907 | | slt TMP3, TMP3, r0 |
3991 | | movf TMP1, r0, 0 | 4908 | | cmp_res 1 |
3992 | | movf TMP2, r0, 1 | ||
3993 | | movn TMP1, TMP2, TMP3 | 4909 | | movn TMP1, TMP2, TMP3 |
3994 | | bnez TMP1, =>BC_JLOOP | 4910 | | bnez TMP1, =>BC_JLOOP |
3995 | |. nop | 4911 | |. nop |
@@ -3998,11 +4914,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3998 | | slt TMP3, TMP3, r0 | 4914 | | slt TMP3, TMP3, r0 |
3999 | | move TMP2, TMP1 | 4915 | | move TMP2, TMP1 |
4000 | if (op == BC_FORI) { | 4916 | if (op == BC_FORI) { |
4001 | | movt TMP1, r0, 0 | 4917 | | cmp_res 0 |
4002 | | movt TMP2, r0, 1 | ||
4003 | } else { | 4918 | } else { |
4004 | | movf TMP1, r0, 0 | 4919 | | cmp_res 1 |
4005 | | movf TMP2, r0, 1 | ||
4006 | } | 4920 | } |
4007 | | movn TMP1, TMP2, TMP3 | 4921 | | movn TMP1, TMP2, TMP3 |
4008 | | addu PC, PC, TMP1 | 4922 | | addu PC, PC, TMP1 |
@@ -4256,8 +5170,10 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
4256 | fcofs, CFRAME_SIZE); | 5170 | fcofs, CFRAME_SIZE); |
4257 | for (i = 23; i >= 16; i--) | 5171 | for (i = 23; i >= 16; i--) |
4258 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); | 5172 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); |
5173 | #if !LJ_SOFTFP | ||
4259 | for (i = 30; i >= 20; i -= 2) | 5174 | for (i = 30; i >= 20; i -= 2) |
4260 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); | 5175 | fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); |
5176 | #endif | ||
4261 | fprintf(ctx->fp, | 5177 | fprintf(ctx->fp, |
4262 | "\t.align 2\n" | 5178 | "\t.align 2\n" |
4263 | ".LEFDE0:\n\n"); | 5179 | ".LEFDE0:\n\n"); |
@@ -4275,6 +5191,7 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
4275 | "\t.align 2\n" | 5191 | "\t.align 2\n" |
4276 | ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); | 5192 | ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); |
4277 | #endif | 5193 | #endif |
5194 | #if !LJ_NO_UNWIND | ||
4278 | fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n"); | 5195 | fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n"); |
4279 | fprintf(ctx->fp, | 5196 | fprintf(ctx->fp, |
4280 | "\t.globl lj_err_unwind_dwarf\n" | 5197 | "\t.globl lj_err_unwind_dwarf\n" |
@@ -4343,6 +5260,7 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
4343 | "\t.align 2\n" | 5260 | "\t.align 2\n" |
4344 | ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); | 5261 | ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); |
4345 | #endif | 5262 | #endif |
5263 | #endif | ||
4346 | break; | 5264 | break; |
4347 | default: | 5265 | default: |
4348 | break; | 5266 | break; |