aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2016-01-29 07:03:36 +0100
committerMike Pall <mike>2016-01-29 07:03:36 +0100
commit60de2f3d510b87f403c8dee01ed4caee1971894d (patch)
tree914c34dfddb2347bc75c9359b242ba0f95925d83
parent2f6b2967c7312d867890df158fe6e0988fda3854 (diff)
downloadluajit-60de2f3d510b87f403c8dee01ed4caee1971894d.tar.gz
luajit-60de2f3d510b87f403c8dee01ed4caee1971894d.tar.bz2
luajit-60de2f3d510b87f403c8dee01ed4caee1971894d.zip
MIPS: Switch to dual-number mode. Fix soft-float interpreter.
-rw-r--r--src/Makefile5
-rw-r--r--src/lj_arch.h34
-rw-r--r--src/lj_dispatch.h19
-rw-r--r--src/lj_frame.h12
-rw-r--r--src/lj_vm.h4
-rw-r--r--src/lj_vmmath.c28
-rw-r--r--src/vm_mips.dasc2742
7 files changed, 1418 insertions, 1426 deletions
diff --git a/src/Makefile b/src/Makefile
index 6d9a1053..1df39dc1 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -388,6 +388,11 @@ DASM_XFLAGS=
388DASM_AFLAGS= 388DASM_AFLAGS=
389DASM_ARCH= $(TARGET_LJARCH) 389DASM_ARCH= $(TARGET_LJARCH)
390 390
391ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH)))
392 DASM_AFLAGS+= -D ENDIAN_LE
393else
394 DASM_AFLAGS+= -D ENDIAN_BE
395endif
391ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH))) 396ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH)))
392 DASM_AFLAGS+= -D P64 397 DASM_AFLAGS+= -D P64
393endif 398endif
diff --git a/src/lj_arch.h b/src/lj_arch.h
index a114bdda..7096ad5e 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -295,6 +295,31 @@
295#define LJ_ARCH_NAME "mips" 295#define LJ_ARCH_NAME "mips"
296#define LJ_ARCH_ENDIAN LUAJIT_BE 296#define LJ_ARCH_ENDIAN LUAJIT_BE
297#endif 297#endif
298
299#if !defined(LJ_ARCH_HASFPU)
300#ifdef __mips_soft_float
301#define LJ_ARCH_HASFPU 0
302#else
303#define LJ_ARCH_HASFPU 1
304#endif
305#endif
306
307/* Temporarily disable features until the code has been merged. */
308#if !LJ_ARCH_HASFPU
309#define LJ_ARCH_NOJIT 1
310#endif
311#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__
312#define LUAJIT_NO_UNWIND 1
313#endif
314
315#if !defined(LJ_ABI_SOFTFP)
316#ifdef __mips_soft_float
317#define LJ_ABI_SOFTFP 1
318#else
319#define LJ_ABI_SOFTFP 0
320#endif
321#endif
322
298#define LJ_ARCH_BITS 32 323#define LJ_ARCH_BITS 32
299#define LJ_TARGET_MIPS 1 324#define LJ_TARGET_MIPS 1
300#define LJ_TARGET_EHRETREG 4 325#define LJ_TARGET_EHRETREG 4
@@ -302,14 +327,7 @@
302#define LJ_TARGET_MASKSHIFT 1 327#define LJ_TARGET_MASKSHIFT 1
303#define LJ_TARGET_MASKROT 1 328#define LJ_TARGET_MASKROT 1
304#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ 329#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
305#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE 330#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
306
307#if !defined(LJ_ARCH_HASFPU) && defined(__mips_soft_float)
308#define LJ_ARCH_HASFPU 0
309#endif
310#if !defined(LJ_ABI_SOFTFP) && defined(__mips_soft_float)
311#define LJ_ABI_SOFTFP 1
312#endif
313 331
314#if _MIPS_ARCH_MIPS32R2 332#if _MIPS_ARCH_MIPS32R2
315#define LJ_ARCH_VERSION 20 333#define LJ_ARCH_VERSION 20
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
index 73d00ec0..5844115b 100644
--- a/src/lj_dispatch.h
+++ b/src/lj_dispatch.h
@@ -14,18 +14,19 @@
14 14
15#if LJ_TARGET_MIPS 15#if LJ_TARGET_MIPS
16/* Need our own global offset table for the dreaded MIPS calling conventions. */ 16/* Need our own global offset table for the dreaded MIPS calling conventions. */
17
18#ifndef _LJ_VM_H
19LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b);
20#endif
21
17#if LJ_SOFTFP 22#if LJ_SOFTFP
23#ifndef _LJ_IRCALL_H
18extern double __adddf3(double a, double b); 24extern double __adddf3(double a, double b);
19extern double __subdf3(double a, double b); 25extern double __subdf3(double a, double b);
20extern double __muldf3(double a, double b); 26extern double __muldf3(double a, double b);
21extern double __divdf3(double a, double b); 27extern double __divdf3(double a, double b);
22extern void __ledf2(double a, double b); 28#endif
23extern double __floatsidf(int32_t a); 29#define SFGOTDEF(_) _(sqrt) _(__adddf3) _(__subdf3) _(__muldf3) _(__divdf3)
24extern int32_t __fixdfsi(double a);
25
26#define SFGOTDEF(_) \
27 _(lj_num2bit) _(sqrt) _(__adddf3) _(__subdf3) _(__muldf3) _(__divdf3) _(__ledf2) \
28 _(__floatsidf) _(__fixdfsi)
29#else 30#else
30#define SFGOTDEF(_) 31#define SFGOTDEF(_)
31#endif 32#endif
@@ -43,14 +44,14 @@ extern int32_t __fixdfsi(double a);
43#define GOTDEF(_) \ 44#define GOTDEF(_) \
44 _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ 45 _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
45 _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ 46 _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
46 _(pow) _(fmod) _(ldexp) \ 47 _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \
47 _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \ 48 _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
48 _(lj_dispatch_profile) _(lj_err_throw) \ 49 _(lj_dispatch_profile) _(lj_err_throw) \
49 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ 50 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
50 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ 51 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
51 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ 52 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \
52 _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \ 53 _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \
53 _(lj_meta_tset) _(lj_state_growstack) _(lj_strfmt_num) \ 54 _(lj_meta_tset) _(lj_state_growstack) _(lj_strfmt_number) \
54 _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \ 55 _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \
55 _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ 56 _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \
56 _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \ 57 _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \
diff --git a/src/lj_frame.h b/src/lj_frame.h
index aa3ab20b..07b36cdf 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -228,13 +228,13 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
228#define CFRAME_SIZE 112 228#define CFRAME_SIZE 112
229#define CFRAME_SHIFT_MULTRES 3 229#define CFRAME_SHIFT_MULTRES 3
230#else 230#else
231#define CFRAME_OFS_ERRF 100 231#define CFRAME_OFS_ERRF 76
232#define CFRAME_OFS_NRES 96 232#define CFRAME_OFS_NRES 72
233#define CFRAME_OFS_PREV 92 233#define CFRAME_OFS_PREV 68
234#define CFRAME_OFS_L 88 234#define CFRAME_OFS_L 64
235#define CFRAME_OFS_PC 44 235#define CFRAME_OFS_PC 20
236#define CFRAME_OFS_MULTRES 16 236#define CFRAME_OFS_MULTRES 16
237#define CFRAME_SIZE 88 237#define CFRAME_SIZE 64
238#define CFRAME_SHIFT_MULTRES 3 238#define CFRAME_SHIFT_MULTRES 3
239#endif 239#endif
240#else 240#else
diff --git a/src/lj_vm.h b/src/lj_vm.h
index cb76d7a7..9afb53ab 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -66,6 +66,9 @@ LJ_ASMF double lj_vm_log2(double);
66#else 66#else
67#define lj_vm_log2 log2 67#define lj_vm_log2 log2
68#endif 68#endif
69#if !(defined(_LJ_DISPATCH_H) && LJ_TARGET_MIPS)
70LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
71#endif
69 72
70#if LJ_HASJIT 73#if LJ_HASJIT
71#if LJ_TARGET_X86ORX64 74#if LJ_TARGET_X86ORX64
@@ -90,7 +93,6 @@ LJ_ASMF double lj_vm_exp2(double);
90#else 93#else
91#define lj_vm_exp2 exp2 94#define lj_vm_exp2 exp2
92#endif 95#endif
93LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
94#if LJ_HASFFI 96#if LJ_HASFFI
95LJ_ASMF int lj_vm_errno(void); 97LJ_ASMF int lj_vm_errno(void);
96#endif 98#endif
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
index ecad2950..90d8a78e 100644
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -57,6 +57,20 @@ double lj_vm_foldarith(double x, double y, int op)
57 } 57 }
58} 58}
59 59
60#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS
61int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
62{
63 uint32_t y, ua, ub;
64 lua_assert(b != 0); /* This must be checked before using this function. */
65 ua = a < 0 ? (uint32_t)-a : (uint32_t)a;
66 ub = b < 0 ? (uint32_t)-b : (uint32_t)b;
67 y = ua % ub;
68 if (y != 0 && (a^b) < 0) y = y - ub;
69 if (((int32_t)y^b) < 0) y = (uint32_t)-(int32_t)y;
70 return (int32_t)y;
71}
72#endif
73
60#if LJ_HASJIT 74#if LJ_HASJIT
61 75
62#ifdef LUAJIT_NO_LOG2 76#ifdef LUAJIT_NO_LOG2
@@ -73,20 +87,6 @@ double lj_vm_exp2(double a)
73} 87}
74#endif 88#endif
75 89
76#if !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)
77int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
78{
79 uint32_t y, ua, ub;
80 lua_assert(b != 0); /* This must be checked before using this function. */
81 ua = a < 0 ? (uint32_t)-a : (uint32_t)a;
82 ub = b < 0 ? (uint32_t)-b : (uint32_t)b;
83 y = ua % ub;
84 if (y != 0 && (a^b) < 0) y = y - ub;
85 if (((int32_t)y^b) < 0) y = (uint32_t)-(int32_t)y;
86 return (int32_t)y;
87}
88#endif
89
90#if !LJ_TARGET_X86ORX64 90#if !LJ_TARGET_X86ORX64
91/* Unsigned x^k. */ 91/* Unsigned x^k. */
92static double lj_vm_powui(double x, uint32_t k) 92static double lj_vm_powui(double x, uint32_t k)
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 0dba1293..8c307d8d 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -34,11 +34,11 @@
34|.define DISPATCH, r19 // Opcode dispatch table. 34|.define DISPATCH, r19 // Opcode dispatch table.
35|.define LREG, r20 // Register holding lua_State (also in SAVE_L). 35|.define LREG, r20 // Register holding lua_State (also in SAVE_L).
36|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. 36|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8.
37|// NYI: r22 currently unused.
38| 37|
39|.define JGL, r30 // On-trace: global_State + 32768. 38|.define JGL, r30 // On-trace: global_State + 32768.
40| 39|
41|// Constants for type-comparisons, stores and conversions. C callee-save. 40|// Constants for type-comparisons, stores and conversions. C callee-save.
41|.define TISNUM, r22
42|.define TISNIL, r30 42|.define TISNIL, r30
43|.if FPU 43|.if FPU
44|.define TOBIT, f30 // 2^52 + 2^51. 44|.define TOBIT, f30 // 2^52 + 2^51.
@@ -57,13 +57,6 @@
57|.define TMP2, r14 57|.define TMP2, r14
58|.define TMP3, r15 58|.define TMP3, r15
59| 59|
60|.if not FPU
61|.define SFT1, r2
62|.define SFT2, r3
63|.define SFT3, r4
64|.define SFT4, r5
65|.endif
66|
67|// Calling conventions. 60|// Calling conventions.
68|.define CFUNCADDR, r25 61|.define CFUNCADDR, r25
69|.define CARG1, r4 62|.define CARG1, r4
@@ -74,6 +67,22 @@
74|.define CRET1, r2 67|.define CRET1, r2
75|.define CRET2, r3 68|.define CRET2, r3
76| 69|
70|.if ENDIAN_LE
71|.define SFRETLO, CRET1
72|.define SFRETHI, CRET2
73|.define SFARG1LO, CARG1
74|.define SFARG1HI, CARG2
75|.define SFARG2LO, CARG3
76|.define SFARG2HI, CARG4
77|.else
78|.define SFRETLO, CRET2
79|.define SFRETHI, CRET1
80|.define SFARG1LO, CARG2
81|.define SFARG1HI, CARG1
82|.define SFARG2LO, CARG4
83|.define SFARG2HI, CARG3
84|.endif
85|
77|.if FPU 86|.if FPU
78|.define FARG1, f12 87|.define FARG1, f12
79|.define FARG2, f14 88|.define FARG2, f14
@@ -84,6 +93,7 @@
84| 93|
85|// Stack layout while in interpreter. Must match with lj_frame.h. 94|// Stack layout while in interpreter. Must match with lj_frame.h.
86|.if FPU // MIPS32 hard-float. 95|.if FPU // MIPS32 hard-float.
96|
87|.define CFRAME_SPACE, 112 // Delta for sp. 97|.define CFRAME_SPACE, 112 // Delta for sp.
88| 98|
89|.define SAVE_ERRF, 124(sp) // 32 bit C frame info. 99|.define SAVE_ERRF, 124(sp) // 32 bit C frame info.
@@ -93,36 +103,21 @@
93|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. 103|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
94|.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves. 104|.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves.
95|.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves. 105|.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves.
96|.define SAVE_PC, 20(sp)
97|.define ARG5, 16(sp)
98|.define CSAVE_4, 12(sp)
99|.define CSAVE_3, 8(sp)
100|.define CSAVE_2, 4(sp)
101|.define CSAVE_1, 0(sp)
102|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by callee.
103| 106|
104|.define ARG5_OFS, 16 107|.else // MIPS32 soft-float
105|.define SAVE_MULTRES, ARG5
106|
107|//-----------------------------------------------------------------------
108|.else // MIPS32 soft-float.
109| 108|
110|.define CFRAME_SPACE, 88 // Delta for sp. 109|.define CFRAME_SPACE, 64 // Delta for sp.
111| 110|
112|.define SAVE_ERRF, 100(sp) // 32 bit C frame info. 111|.define SAVE_ERRF, 76(sp) // 32 bit C frame info.
113|.define SAVE_NRES, 96(sp) 112|.define SAVE_NRES, 72(sp)
114|.define SAVE_CFRAME, 92(sp) 113|.define SAVE_CFRAME, 68(sp)
115|.define SAVE_L, 88(sp) 114|.define SAVE_L, 64(sp)
116|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. 115|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
117|.define SAVE_GPR_, 48 // .. 48+10*4: 32 bit GPR saves. 116|.define SAVE_GPR_, 24 // .. 24+10*4: 32 bit GPR saves.
118|.define SAVE_PC, 44(sp) 117|
119|.define TEMP_SAVE_6, 40(sp) 118|.endif
120|.define TEMP_SAVE_5, 36(sp) 119|
121|.define TEMP_SAVE_4, 32(sp) 120|.define SAVE_PC, 20(sp)
122|.define TEMP_SAVE_3, 28(sp)
123|.define TEMP_SAVE_2, 24(sp)
124|.define TEMP_SAVE_1, 20(sp)
125|//----- 8 byte aligned, ^^^^ 24 byte register save area, owned by caller.
126|.define ARG5, 16(sp) 121|.define ARG5, 16(sp)
127|.define CSAVE_4, 12(sp) 122|.define CSAVE_4, 12(sp)
128|.define CSAVE_3, 8(sp) 123|.define CSAVE_3, 8(sp)
@@ -133,8 +128,6 @@
133|.define ARG5_OFS, 16 128|.define ARG5_OFS, 16
134|.define SAVE_MULTRES, ARG5 129|.define SAVE_MULTRES, ARG5
135| 130|
136|.endif
137|
138|//----------------------------------------------------------------------- 131|//-----------------------------------------------------------------------
139| 132|
140|.macro saveregs 133|.macro saveregs
@@ -207,13 +200,23 @@
207|//----------------------------------------------------------------------- 200|//-----------------------------------------------------------------------
208| 201|
209|// Endian-specific defines. 202|// Endian-specific defines.
210|.define FRAME_PC, LJ_ENDIAN_SELECT(-4,-8) 203|.if ENDIAN_LE
211|.define FRAME_FUNC, LJ_ENDIAN_SELECT(-8,-4) 204|.define FRAME_PC, -4
212|.define HI, LJ_ENDIAN_SELECT(4,0) 205|.define FRAME_FUNC, -8
213|.define LO, LJ_ENDIAN_SELECT(0,4) 206|.define HI, 4
214|.define OFS_RD, LJ_ENDIAN_SELECT(2,0) 207|.define LO, 0
215|.define OFS_RA, LJ_ENDIAN_SELECT(1,2) 208|.define OFS_RD, 2
216|.define OFS_OP, LJ_ENDIAN_SELECT(0,3) 209|.define OFS_RA, 1
210|.define OFS_OP, 0
211|.else
212|.define FRAME_PC, -8
213|.define FRAME_FUNC, -4
214|.define HI, 0
215|.define LO, 4
216|.define OFS_RD, 0
217|.define OFS_RA, 2
218|.define OFS_OP, 3
219|.endif
217| 220|
218|// Instruction decode. 221|// Instruction decode.
219|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro 222|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
@@ -324,61 +327,6 @@
324|.macro call_extern; jalr CFUNCADDR; .endmacro 327|.macro call_extern; jalr CFUNCADDR; .endmacro
325|.macro jmp_extern; jr CFUNCADDR; .endmacro 328|.macro jmp_extern; jr CFUNCADDR; .endmacro
326| 329|
327|// Converts int from given reg to double, result in CRET1 and CRET2 regs.
328|.if not FPU
329|.macro cvti2d, arg
330| load_got __floatsidf
331| call_extern
332|. move CARG1, arg
333|.endmacro
334|.endif
335|
336|// Loads a double-word floating-point value.
337|.macro load_double, fpr, gpr1, gpr2, src
338|.if FPU
339| ldc1 fpr, src
340|.else
341| lw gpr1, src
342| lw gpr2, 4+src
343|.endif
344|.endmacro
345|
346|// Stores a double-word floating-point value.
347|.macro store_double, fpr, gpr1, gpr2, dst
348|.if FPU
349| sdc1 fpr, dst
350|.else
351| sw gpr1, dst
352| sw gpr2, 4+dst
353|.endif
354|.endmacro
355|
356|// Loads the first double-word floating-point argument.
357|.macro load_farg1, src
358| load_double FARG1, CARG1, CARG2, src
359|.endmacro
360|
361|// Loads the second double-word floating-point argument.
362|.macro load_farg2, src
363| load_double FARG2, CARG3, CARG4, src
364|.endmacro
365|
366|.macro load_double1, src
367| load_double f0, SFT1, SFT2, src
368|.endmacro
369|
370|.macro store_double1, dst
371| store_double f0, SFT1, SFT2, dst
372|.endmacro
373|
374|.macro load_double2, src
375| load_double f2, SFT3, SFT4, src
376|.endmacro
377|
378|.macro store_double2, dst
379| store_double f2, SFT3, SFT4, dst
380|.endmacro
381|
382|.macro hotcheck, delta, target 330|.macro hotcheck, delta, target
383| srl TMP1, PC, 1 331| srl TMP1, PC, 1
384| andi TMP1, TMP1, 126 332| andi TMP1, TMP1, 126
@@ -463,9 +411,11 @@ static void build_subroutines(BuildCtx *ctx)
463 |. sll TMP2, TMP2, 3 411 |. sll TMP2, TMP2, 3
464 |1: 412 |1:
465 | addiu TMP1, TMP1, -8 413 | addiu TMP1, TMP1, -8
466 | load_double1 0(RA) 414 | lw SFRETHI, HI(RA)
415 | lw SFRETLO, LO(RA)
467 | addiu RA, RA, 8 416 | addiu RA, RA, 8
468 | store_double1 0(BASE) 417 | sw SFRETHI, HI(BASE)
418 | sw SFRETLO, LO(BASE)
469 | bnez TMP1, <1 419 | bnez TMP1, <1
470 |. addiu BASE, BASE, 8 420 |. addiu BASE, BASE, 8
471 | 421 |
@@ -535,6 +485,7 @@ static void build_subroutines(BuildCtx *ctx)
535 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 485 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
536 | lw L, SAVE_L 486 | lw L, SAVE_L
537 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 487 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
488 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
538 | li TISNIL, LJ_TNIL 489 | li TISNIL, LJ_TNIL
539 | lw BASE, L->base 490 | lw BASE, L->base
540 | lw DISPATCH, L->glref // Setup pointer to dispatch table. 491 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
@@ -605,6 +556,7 @@ static void build_subroutines(BuildCtx *ctx)
605 | sw L, DISPATCH_GL(cur_L)(DISPATCH) 556 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
606 | move RA, BASE 557 | move RA, BASE
607 | lw BASE, L->base 558 | lw BASE, L->base
559 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
608 | lw TMP1, L->top 560 | lw TMP1, L->top
609 | lw PC, FRAME_PC(BASE) 561 | lw PC, FRAME_PC(BASE)
610 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 562 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
@@ -649,6 +601,7 @@ static void build_subroutines(BuildCtx *ctx)
649 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 601 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
650 | sw L, DISPATCH_GL(cur_L)(DISPATCH) 602 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
651 | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). 603 | lw TMP2, L->base // TMP2 = old base (used in vmeta_call).
604 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
652 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 605 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
653 | lw TMP1, L->top 606 | lw TMP1, L->top
654 | .FPU mtc1 TMP3, TOBIT 607 | .FPU mtc1 TMP3, TOBIT
@@ -737,7 +690,8 @@ static void build_subroutines(BuildCtx *ctx)
737 |->cont_cat: // RA = resultptr, RB = meta base 690 |->cont_cat: // RA = resultptr, RB = meta base
738 | lw INS, -4(PC) 691 | lw INS, -4(PC)
739 | addiu CARG2, RB, -16 692 | addiu CARG2, RB, -16
740 | load_double1 0(RA) 693 | lw SFRETHI, HI(RA)
694 | lw SFRETLO, LO(RA)
741 | decode_RB8a MULTRES, INS 695 | decode_RB8a MULTRES, INS
742 | decode_RA8a RA, INS 696 | decode_RA8a RA, INS
743 | decode_RB8b MULTRES 697 | decode_RB8b MULTRES
@@ -745,21 +699,13 @@ static void build_subroutines(BuildCtx *ctx)
745 | addu TMP1, BASE, MULTRES 699 | addu TMP1, BASE, MULTRES
746 | sw BASE, L->base 700 | sw BASE, L->base
747 | subu CARG3, CARG2, TMP1 701 | subu CARG3, CARG2, TMP1
748 |.if FPU 702 | sw SFRETHI, HI(CARG2)
749 | bne TMP1, CARG2, ->BC_CAT_Z
750 |. sdc1 f0, 0(CARG2)
751 | addu RA, BASE, RA
752 | b ->cont_nop
753 |. sdc1 f0, 0(RA)
754 |.else
755 | sw SFT1, 0(CARG2)
756 | bne TMP1, CARG2, ->BC_CAT_Z 703 | bne TMP1, CARG2, ->BC_CAT_Z
757 |. sw SFT2, 4(CARG2) 704 |. sw SFRETLO, LO(CARG2)
758 | addu RA, BASE, RA 705 | addu RA, BASE, RA
759 | sw SFT1, 0(RA) 706 | sw SFRETHI, HI(RA)
760 | b ->cont_nop 707 | b ->cont_nop
761 |. sw SFT2, 4(RA) 708 |. sw SFRETLO, LO(RA)
762 |.endif
763 | 709 |
764 |//-- Table indexing metamethods ----------------------------------------- 710 |//-- Table indexing metamethods -----------------------------------------
765 | 711 |
@@ -782,19 +728,9 @@ static void build_subroutines(BuildCtx *ctx)
782 |. sw TMP1, HI(CARG3) 728 |. sw TMP1, HI(CARG3)
783 | 729 |
784 |->vmeta_tgetb: // TMP0 = index 730 |->vmeta_tgetb: // TMP0 = index
785 |.if FPU
786 | mtc1 TMP0, f0
787 | cvt.d.w f0, f0
788 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
789 | sdc1 f0, 0(CARG3)
790 |.else
791 | sw CARG2, TEMP_SAVE_1 //needed to be saved because it's used later in lj_meta_tget
792 | cvti2d TMP0
793 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 731 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
794 | sw CRET1, 0(CARG3) 732 | sw TMP0, LO(CARG3)
795 | sw CRET2, 4(CARG3) 733 | sw TISNUM, HI(CARG3)
796 | lw CARG2, TEMP_SAVE_1
797 |.endif
798 | 734 |
799 |->vmeta_tgetv: 735 |->vmeta_tgetv:
800 |1: 736 |1:
@@ -806,9 +742,11 @@ static void build_subroutines(BuildCtx *ctx)
806 | // Returns TValue * (finished) or NULL (metamethod). 742 | // Returns TValue * (finished) or NULL (metamethod).
807 | beqz CRET1, >3 743 | beqz CRET1, >3
808 |. addiu TMP1, BASE, -FRAME_CONT 744 |. addiu TMP1, BASE, -FRAME_CONT
809 | load_double2 0(CRET1) 745 | lw SFARG1HI, HI(CRET1)
746 | lw SFARG2HI, LO(CRET1)
810 | ins_next1 747 | ins_next1
811 | store_double2 0(RA) 748 | sw SFARG1HI, HI(RA)
749 | sw SFARG2HI, LO(RA)
812 | ins_next2 750 | ins_next2
813 | 751 |
814 |3: // Call __index metamethod. 752 |3: // Call __index metamethod.
@@ -825,16 +763,11 @@ static void build_subroutines(BuildCtx *ctx)
825 | call_intern lj_tab_getinth // (GCtab *t, int32_t key) 763 | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
826 |. nop 764 |. nop
827 | // Returns cTValue * or NULL. 765 | // Returns cTValue * or NULL.
828 | beqz CRET1, >1 766 | beqz CRET1, ->BC_TGETR_Z
829 |. nop 767 |. move SFARG2HI, TISNIL
830 |.if FPU 768 | lw SFARG2HI, HI(CRET1)
831 | b ->BC_TGETR_Z 769 | b ->BC_TGETR_Z
832 |. ldc1 f0, 0(CRET1) 770 |. lw SFARG2LO, LO(CRET1)
833 |.else
834 | lw SFT1, 0(CRET1)
835 | b ->BC_TGETR_Z
836 |. lw SFT2, 4(CRET1)
837 |.endif
838 | 771 |
839 |//----------------------------------------------------------------------- 772 |//-----------------------------------------------------------------------
840 | 773 |
@@ -857,19 +790,9 @@ static void build_subroutines(BuildCtx *ctx)
857 |. sw TMP1, HI(CARG3) 790 |. sw TMP1, HI(CARG3)
858 | 791 |
859 |->vmeta_tsetb: // TMP0 = index 792 |->vmeta_tsetb: // TMP0 = index
860 |.if FPU
861 | mtc1 TMP0, f0
862 | cvt.d.w f0, f0
863 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 793 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
864 | sdc1 f0, 0(CARG3) 794 | sw TMP0, LO(CARG3)
865 |.else 795 | sw TISNUM, HI(CARG3)
866 | sw CARG2, TEMP_SAVE_1
867 | cvti2d TMP0
868 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
869 | sw CRET1, 0(CARG3)
870 | sw CRET2, 4(CARG3)
871 | lw CARG2, TEMP_SAVE_1
872 |.endif
873 | 796 |
874 |->vmeta_tsetv: 797 |->vmeta_tsetv:
875 |1: 798 |1:
@@ -879,17 +802,13 @@ static void build_subroutines(BuildCtx *ctx)
879 | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 802 | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
880 |. move CARG1, L 803 |. move CARG1, L
881 | // Returns TValue * (finished) or NULL (metamethod). 804 | // Returns TValue * (finished) or NULL (metamethod).
882 |.if FPU 805 | lw SFARG1HI, HI(RA)
883 | beqz CRET1, >3 806 | beqz CRET1, >3
884 |. ldc1 f2, 0(RA) 807 |. lw SFARG1LO, LO(RA)
885 |.else
886 | lw SFT3, 0(RA)
887 | beqz CRET1, >3
888 |. lw SFT4, 4(RA)
889 |.endif
890 | // NOBARRIER: lj_meta_tset ensures the table is not black. 808 | // NOBARRIER: lj_meta_tset ensures the table is not black.
891 | ins_next1 809 | ins_next1
892 | store_double2 0(CRET1) 810 | sw SFARG1HI, HI(CRET1)
811 | sw SFARG1LO, LO(CRET1)
893 | ins_next2 812 | ins_next2
894 | 813 |
895 |3: // Call __newindex metamethod. 814 |3: // Call __newindex metamethod.
@@ -899,7 +818,8 @@ static void build_subroutines(BuildCtx *ctx)
899 | sw PC, -16+HI(BASE) // [cont|PC] 818 | sw PC, -16+HI(BASE) // [cont|PC]
900 | subu PC, BASE, TMP1 819 | subu PC, BASE, TMP1
901 | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 820 | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
902 | store_double2 16(BASE) // Copy value to third argument. 821 | sw SFARG1HI, 16+HI(BASE) // Copy value to third argument.
822 | sw SFARG1LO, 16+LO(BASE)
903 | b ->vm_call_dispatch_f 823 | b ->vm_call_dispatch_f
904 |. li NARGS8:RC, 24 // 3 args for func(t, k, v) 824 |. li NARGS8:RC, 24 // 3 args for func(t, k, v)
905 | 825 |
@@ -916,7 +836,9 @@ static void build_subroutines(BuildCtx *ctx)
916 |//-- Comparison metamethods --------------------------------------------- 836 |//-- Comparison metamethods ---------------------------------------------
917 | 837 |
918 |->vmeta_comp: 838 |->vmeta_comp:
919 | // CARG2, CARG3 are already set by BC_ISLT/BC_ISGE/BC_ISLE/BC_ISGT. 839 | // RA/RD point to o1/o2.
840 | move CARG2, RA
841 | move CARG3, RD
920 | load_got lj_meta_comp 842 | load_got lj_meta_comp
921 | addiu PC, PC, -4 843 | addiu PC, PC, -4
922 | sw BASE, L->base 844 | sw BASE, L->base
@@ -942,17 +864,13 @@ static void build_subroutines(BuildCtx *ctx)
942 | 864 |
943 |->cont_ra: // RA = resultptr 865 |->cont_ra: // RA = resultptr
944 | lbu TMP1, -4+OFS_RA(PC) 866 | lbu TMP1, -4+OFS_RA(PC)
945 | load_double1 0(RA) 867 | lw SFRETHI, HI(RA)
868 | lw SFRETLO, LO(RA)
946 | sll TMP1, TMP1, 3 869 | sll TMP1, TMP1, 3
947 | addu TMP1, BASE, TMP1 870 | addu TMP1, BASE, TMP1
948 |.if FPU 871 | sw SFRETHI, HI(TMP1)
949 | b ->cont_nop
950 |. sdc1 f0, 0(TMP1)
951 |.else
952 | sw SFT1, 0(TMP1)
953 | b ->cont_nop 872 | b ->cont_nop
954 |. sw SFT2, 4(TMP1) 873 |. sw SFRETLO, LO(TMP1)
955 |.endif
956 | 874 |
957 |->cont_condt: // RA = resultptr 875 |->cont_condt: // RA = resultptr
958 | lw TMP0, HI(RA) 876 | lw TMP0, HI(RA)
@@ -967,8 +885,11 @@ static void build_subroutines(BuildCtx *ctx)
967 |. addiu TMP2, AT, -1 // Branch if result is false. 885 |. addiu TMP2, AT, -1 // Branch if result is false.
968 | 886 |
969 |->vmeta_equal: 887 |->vmeta_equal:
970 | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. 888 | // SFARG1LO/SFARG2LO point to o1/o2. TMP0 is set to 0/1.
971 | load_got lj_meta_equal 889 | load_got lj_meta_equal
890 | move CARG2, SFARG1LO
891 | move CARG3, SFARG2LO
892 | move CARG4, TMP0
972 | addiu PC, PC, -4 893 | addiu PC, PC, -4
973 | sw BASE, L->base 894 | sw BASE, L->base
974 | sw PC, SAVE_PC 895 | sw PC, SAVE_PC
@@ -1007,29 +928,16 @@ static void build_subroutines(BuildCtx *ctx)
1007 |//-- Arithmetic metamethods --------------------------------------------- 928 |//-- Arithmetic metamethods ---------------------------------------------
1008 | 929 |
1009 |->vmeta_unm: 930 |->vmeta_unm:
1010 | b ->vmeta_arith 931 | move RC, RB
1011 |. move CARG4, CARG3
1012 |
1013 |->vmeta_arith_vn:
1014 | addu CARG3, BASE, RB
1015 | b ->vmeta_arith
1016 |. addu CARG4, KBASE, RC
1017 |
1018 |->vmeta_arith_nv:
1019 | addu CARG4, BASE, RB
1020 | b ->vmeta_arith
1021 |. addu CARG3, KBASE, RC
1022 |
1023 |->vmeta_arith_vv:
1024 | addu CARG3, BASE, RB
1025 | addu CARG4, BASE, RC
1026 | 932 |
1027 |->vmeta_arith: 933 |->vmeta_arith:
1028 | load_got lj_meta_arith 934 | load_got lj_meta_arith
1029 | decode_OP1 TMP0, INS 935 | decode_OP1 TMP0, INS
1030 | sw BASE, L->base 936 | sw BASE, L->base
1031 | sw PC, SAVE_PC
1032 | move CARG2, RA 937 | move CARG2, RA
938 | sw PC, SAVE_PC
939 | move CARG3, RB
940 | move CARG4, RC
1033 | sw TMP0, ARG5 941 | sw TMP0, ARG5
1034 | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) 942 | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
1035 |. move CARG1, L 943 |. move CARG1, L
@@ -1137,40 +1045,52 @@ static void build_subroutines(BuildCtx *ctx)
1137 | 1045 |
1138 |.macro .ffunc_1, name 1046 |.macro .ffunc_1, name
1139 |->ff_ .. name: 1047 |->ff_ .. name:
1048 | lw SFARG1HI, HI(BASE)
1140 | beqz NARGS8:RC, ->fff_fallback 1049 | beqz NARGS8:RC, ->fff_fallback
1141 |. lw CARG3, HI(BASE) 1050 |. lw SFARG1LO, LO(BASE)
1142 | lw CARG1, LO(BASE)
1143 |.endmacro 1051 |.endmacro
1144 | 1052 |
1145 |.macro .ffunc_2, name 1053 |.macro .ffunc_2, name
1146 |->ff_ .. name: 1054 |->ff_ .. name:
1147 | sltiu AT, NARGS8:RC, 16 1055 | sltiu AT, NARGS8:RC, 16
1148 | lw CARG3, HI(BASE) 1056 | lw SFARG1HI, HI(BASE)
1149 | bnez AT, ->fff_fallback 1057 | bnez AT, ->fff_fallback
1150 |. lw CARG4, 8+HI(BASE) 1058 |. lw SFARG2HI, 8+HI(BASE)
1151 | lw CARG1, LO(BASE) 1059 | lw SFARG1LO, LO(BASE)
1152 | lw CARG2, 8+LO(BASE) 1060 | lw SFARG2LO, 8+LO(BASE)
1153 |.endmacro 1061 |.endmacro
1154 | 1062 |
1155 |.macro .ffunc_n, name // Caveat: has delay slot! 1063 |.macro .ffunc_n, name // Caveat: has delay slot!
1156 |->ff_ .. name: 1064 |->ff_ .. name:
1157 | lw CARG3, HI(BASE) 1065 | lw SFARG1HI, HI(BASE)
1158 | load_farg1 0(BASE) 1066 |.if FPU
1067 | ldc1 FARG1, 0(BASE)
1068 |.else
1069 | lw SFARG1LO, LO(BASE)
1070 |.endif
1159 | beqz NARGS8:RC, ->fff_fallback 1071 | beqz NARGS8:RC, ->fff_fallback
1160 |. sltiu AT, CARG3, LJ_TISNUM 1072 |. sltiu AT, SFARG1HI, LJ_TISNUM
1161 | beqz AT, ->fff_fallback 1073 | beqz AT, ->fff_fallback
1162 |.endmacro 1074 |.endmacro
1163 | 1075 |
1164 |.macro .ffunc_nn, name // Caveat: has delay slot! 1076 |.macro .ffunc_nn, name // Caveat: has delay slot!
1165 |->ff_ .. name: 1077 |->ff_ .. name:
1166 | sltiu AT, NARGS8:RC, 16 1078 | sltiu AT, NARGS8:RC, 16
1167 | lw CARG3, HI(BASE) 1079 | lw SFARG1HI, HI(BASE)
1168 | bnez AT, ->fff_fallback 1080 | bnez AT, ->fff_fallback
1169 |. lw CARG4, 8+HI(BASE) 1081 |. lw SFARG2HI, 8+HI(BASE)
1170 | sltiu TMP0, CARG3, LJ_TISNUM 1082 | sltiu TMP0, SFARG1HI, LJ_TISNUM
1171 | sltiu TMP1, CARG4, LJ_TISNUM 1083 |.if FPU
1172 | load_farg1 0(BASE) 1084 | ldc1 FARG1, 0(BASE)
1173 | load_farg2 8(BASE) 1085 |.else
1086 | lw SFARG1LO, LO(BASE)
1087 |.endif
1088 | sltiu TMP1, SFARG2HI, LJ_TISNUM
1089 |.if FPU
1090 | ldc1 FARG2, 8(BASE)
1091 |.else
1092 | lw SFARG2LO, 8+LO(BASE)
1093 |.endif
1174 | and TMP0, TMP0, TMP1 1094 | and TMP0, TMP0, TMP1
1175 | beqz TMP0, ->fff_fallback 1095 | beqz TMP0, ->fff_fallback
1176 |.endmacro 1096 |.endmacro
@@ -1186,58 +1106,54 @@ static void build_subroutines(BuildCtx *ctx)
1186 |//-- Base library: checks ----------------------------------------------- 1106 |//-- Base library: checks -----------------------------------------------
1187 | 1107 |
1188 |.ffunc_1 assert 1108 |.ffunc_1 assert
1189 | sltiu AT, CARG3, LJ_TISTRUECOND 1109 | sltiu AT, SFARG1HI, LJ_TISTRUECOND
1190 | beqz AT, ->fff_fallback 1110 | beqz AT, ->fff_fallback
1191 |. addiu RA, BASE, -8 1111 |. addiu RA, BASE, -8
1192 | lw PC, FRAME_PC(BASE) 1112 | lw PC, FRAME_PC(BASE)
1193 | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. 1113 | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
1194 | addu TMP2, RA, NARGS8:RC 1114 | addu TMP2, RA, NARGS8:RC
1195 | sw CARG3, HI(RA) 1115 | sw SFARG1HI, HI(RA)
1196 | addiu TMP1, BASE, 8 1116 | addiu TMP1, BASE, 8
1197 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. 1117 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument.
1198 |. sw CARG1, LO(RA) 1118 |. sw SFARG1LO, LO(RA)
1199 |1: 1119 |1:
1200 | load_double1 0(TMP1) 1120 | lw SFRETHI, HI(TMP1)
1201 | store_double1 -8(TMP1) 1121 | lw SFRETLO, LO(TMP1)
1122 | sw SFRETHI, -8+HI(TMP1)
1123 | sw SFRETLO, -8+LO(TMP1)
1202 | bne TMP1, TMP2, <1 1124 | bne TMP1, TMP2, <1
1203 |. addiu TMP1, TMP1, 8 1125 |. addiu TMP1, TMP1, 8
1204 | b ->fff_res 1126 | b ->fff_res
1205 |. nop 1127 |. nop
1206 | 1128 |
1207 |.ffunc type 1129 |.ffunc type
1208 | lw CARG3, HI(BASE) 1130 | lw SFARG1HI, HI(BASE)
1209 | li TMP1, LJ_TISNUM
1210 | beqz NARGS8:RC, ->fff_fallback 1131 | beqz NARGS8:RC, ->fff_fallback
1211 |. sltiu TMP0, CARG3, LJ_TISNUM 1132 |. sltiu TMP0, SFARG1HI, LJ_TISNUM
1212 | movz TMP1, CARG3, TMP0 1133 | movn SFARG1HI, TISNUM, TMP0
1213 | not TMP1, TMP1 1134 | not TMP1, SFARG1HI
1214 | sll TMP1, TMP1, 3 1135 | sll TMP1, TMP1, 3
1215 | addu TMP1, CFUNC:RB, TMP1 1136 | addu TMP1, CFUNC:RB, TMP1
1216 |.if HFABI 1137 | lw SFARG1HI, CFUNC:TMP1->upvalue[0].u32.hi
1217 | b ->fff_resn 1138 | b ->fff_restv
1218 |. ldc1 FRET1, CFUNC:TMP1->upvalue 1139 |. lw SFARG1LO, CFUNC:TMP1->upvalue[0].u32.lo
1219 |.else
1220 | lw CRET1, CFUNC:TMP1->upvalue[0].u32.hi
1221 | b ->fff_resn
1222 |. lw CRET2, CFUNC:TMP1->upvalue[0].u32.lo
1223 |.endif
1224 | 1140 |
1225 |//-- Base library: getters and setters --------------------------------- 1141 |//-- Base library: getters and setters ---------------------------------
1226 | 1142 |
1227 |.ffunc_1 getmetatable 1143 |.ffunc_1 getmetatable
1228 | li AT, LJ_TTAB 1144 | li AT, LJ_TTAB
1229 | bne CARG3, AT, >6 1145 | bne SFARG1HI, AT, >6
1230 |. li AT, LJ_TUDATA 1146 |. li AT, LJ_TUDATA
1231 |1: // Field metatable must be at same offset for GCtab and GCudata! 1147 |1: // Field metatable must be at same offset for GCtab and GCudata!
1232 | lw TAB:CARG1, TAB:CARG1->metatable 1148 | lw TAB:SFARG1LO, TAB:SFARG1LO->metatable
1233 |2: 1149 |2:
1234 | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) 1150 | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
1235 | beqz TAB:CARG1, ->fff_restv 1151 | beqz TAB:SFARG1LO, ->fff_restv
1236 |. li CARG3, LJ_TNIL 1152 |. li SFARG1HI, LJ_TNIL
1237 | lw TMP0, TAB:CARG1->hmask 1153 | lw TMP0, TAB:SFARG1LO->hmask
1238 | li CARG3, LJ_TTAB // Use metatable as default result. 1154 | li SFARG1HI, LJ_TTAB // Use metatable as default result.
1239 | lw TMP1, STR:RC->hash 1155 | lw TMP1, STR:RC->hash
1240 | lw NODE:TMP2, TAB:CARG1->node 1156 | lw NODE:TMP2, TAB:SFARG1LO->node
1241 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 1157 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
1242 | sll TMP0, TMP1, 5 1158 | sll TMP0, TMP1, 5
1243 | sll TMP1, TMP1, 3 1159 | sll TMP1, TMP1, 3
@@ -1249,7 +1165,7 @@ static void build_subroutines(BuildCtx *ctx)
1249 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 1165 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
1250 | lw NODE:TMP3, NODE:TMP2->next 1166 | lw NODE:TMP3, NODE:TMP2->next
1251 | bne CARG4, AT, >4 1167 | bne CARG4, AT, >4
1252 |. lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) 1168 |. lw CARG3, offsetof(Node, val)+HI(NODE:TMP2)
1253 | beq TMP0, STR:RC, >5 1169 | beq TMP0, STR:RC, >5
1254 |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2) 1170 |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2)
1255 |4: 1171 |4:
@@ -1258,36 +1174,35 @@ static void build_subroutines(BuildCtx *ctx)
1258 | b <3 1174 | b <3
1259 |. nop 1175 |. nop
1260 |5: 1176 |5:
1261 | beq CARG2, TISNIL, ->fff_restv // Ditto for nil value. 1177 | beq CARG3, TISNIL, ->fff_restv // Ditto for nil value.
1262 |. nop 1178 |. nop
1263 | move CARG3, CARG2 // Return value of mt.__metatable. 1179 | move SFARG1HI, CARG3 // Return value of mt.__metatable.
1264 | b ->fff_restv 1180 | b ->fff_restv
1265 |. move CARG1, TMP1 1181 |. move SFARG1LO, TMP1
1266 | 1182 |
1267 |6: 1183 |6:
1268 | beq CARG3, AT, <1 1184 | beq SFARG1HI, AT, <1
1269 |. sltiu TMP0, CARG3, LJ_TISNUM 1185 |. sltu AT, TISNUM, SFARG1HI
1270 | li TMP1, LJ_TISNUM 1186 | movz SFARG1HI, TISNUM, AT
1271 | movz TMP1, CARG3, TMP0 1187 | not TMP1, SFARG1HI
1272 | not TMP1, TMP1
1273 | sll TMP1, TMP1, 2 1188 | sll TMP1, TMP1, 2
1274 | addu TMP1, DISPATCH, TMP1 1189 | addu TMP1, DISPATCH, TMP1
1275 | b <2 1190 | b <2
1276 |. lw TAB:CARG1, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1) 1191 |. lw TAB:SFARG1LO, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1)
1277 | 1192 |
1278 |.ffunc_2 setmetatable 1193 |.ffunc_2 setmetatable
1279 | // Fast path: no mt for table yet and not clearing the mt. 1194 | // Fast path: no mt for table yet and not clearing the mt.
1280 | li AT, LJ_TTAB 1195 | li AT, LJ_TTAB
1281 | bne CARG3, AT, ->fff_fallback 1196 | bne SFARG1HI, AT, ->fff_fallback
1282 |. addiu CARG4, CARG4, -LJ_TTAB 1197 |. addiu SFARG2HI, SFARG2HI, -LJ_TTAB
1283 | lw TAB:TMP1, TAB:CARG1->metatable 1198 | lw TAB:TMP1, TAB:SFARG1LO->metatable
1284 | lbu TMP3, TAB:CARG1->marked 1199 | lbu TMP3, TAB:SFARG1LO->marked
1285 | or AT, CARG4, TAB:TMP1 1200 | or AT, SFARG2HI, TAB:TMP1
1286 | bnez AT, ->fff_fallback 1201 | bnez AT, ->fff_fallback
1287 |. andi AT, TMP3, LJ_GC_BLACK // isblack(table) 1202 |. andi AT, TMP3, LJ_GC_BLACK // isblack(table)
1288 | beqz AT, ->fff_restv 1203 | beqz AT, ->fff_restv
1289 |. sw TAB:CARG2, TAB:CARG1->metatable 1204 |. sw TAB:SFARG2LO, TAB:SFARG1LO->metatable
1290 | barrierback TAB:CARG1, TMP3, TMP0, ->fff_restv 1205 | barrierback TAB:SFARG1LO, TMP3, TMP0, ->fff_restv
1291 | 1206 |
1292 |.ffunc rawget 1207 |.ffunc rawget
1293 | lw CARG4, HI(BASE) 1208 | lw CARG4, HI(BASE)
@@ -1301,56 +1216,44 @@ static void build_subroutines(BuildCtx *ctx)
1301 | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1216 | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1302 |. move CARG1, L 1217 |. move CARG1, L
1303 | // Returns cTValue *. 1218 | // Returns cTValue *.
1304 |.if HFABI 1219 | lw SFARG1HI, HI(CRET1)
1305 | b ->fff_resn 1220 | b ->fff_restv
1306 |. ldc1 FRET1, 0(CRET1) 1221 |. lw SFARG1LO, LO(CRET1)
1307 |.else
1308 | lw CRET2, 4(CRET1)
1309 | b ->fff_resn
1310 |. lw CRET1, 0(CRET1)
1311 |.endif
1312 | 1222 |
1313 |//-- Base library: conversions ------------------------------------------ 1223 |//-- Base library: conversions ------------------------------------------
1314 | 1224 |
1315 |.ffunc tonumber 1225 |.ffunc tonumber
1316 | // Only handles the number case inline (without a base argument). 1226 | // Only handles the number case inline (without a base argument).
1317 | lw CARG1, HI(BASE) 1227 | lw CARG1, HI(BASE)
1318 | xori AT, NARGS8:RC, 8 1228 | xori AT, NARGS8:RC, 8 // Exactly one number argument.
1319 | sltiu CARG1, CARG1, LJ_TISNUM 1229 | sltu TMP0, TISNUM, CARG1
1320 | movn CARG1, r0, AT 1230 | or AT, AT, TMP0
1321 |.if HFABI 1231 | bnez AT, ->fff_fallback
1322 | beqz CARG1, ->fff_fallback // Exactly one number argument. 1232 |. lw SFARG1HI, HI(BASE)
1323 |. ldc1 FRET1, 0(BASE) 1233 | b ->fff_restv
1324 |.else 1234 |. lw SFARG1LO, LO(BASE)
1325 | lw CRET1, 0(BASE)
1326 | beqz CARG1, ->fff_fallback // Exactly one number argument.
1327 |. lw CRET2, 4(BASE)
1328 |.endif
1329 | b ->fff_resn
1330 |. nop
1331 | 1235 |
1332 |.ffunc_1 tostring 1236 |.ffunc_1 tostring
1333 | // Only handles the string or number case inline. 1237 | // Only handles the string or number case inline.
1334 | li AT, LJ_TSTR 1238 | li AT, LJ_TSTR
1335 | // A __tostring method in the string base metatable is ignored. 1239 | // A __tostring method in the string base metatable is ignored.
1336 | beq CARG3, AT, ->fff_restv // String key? 1240 | beq SFARG1HI, AT, ->fff_restv // String key?
1337 | // Handle numbers inline, unless a number base metatable is present. 1241 | // Handle numbers inline, unless a number base metatable is present.
1338 |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) 1242 |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
1339 | sltiu TMP0, CARG3, LJ_TISNUM 1243 | sltu TMP0, TISNUM, SFARG1HI
1340 | sltiu TMP1, TMP1, 1 1244 | or TMP0, TMP0, TMP1
1341 | and TMP0, TMP0, TMP1 1245 | bnez TMP0, ->fff_fallback
1342 | beqz TMP0, ->fff_fallback
1343 |. sw BASE, L->base // Add frame since C call can throw. 1246 |. sw BASE, L->base // Add frame since C call can throw.
1344 | ffgccheck 1247 | ffgccheck
1345 |. sw PC, SAVE_PC // Redundant (but a defined value). 1248 |. sw PC, SAVE_PC // Redundant (but a defined value).
1346 | load_got lj_strfmt_num 1249 | load_got lj_strfmt_number
1347 | move CARG1, L 1250 | move CARG1, L
1348 | call_intern lj_strfmt_num // (lua_State *L, lua_Number *np) 1251 | call_intern lj_strfmt_number // (lua_State *L, cTValue *o)
1349 |. move CARG2, BASE 1252 |. move CARG2, BASE
1350 | // Returns GCstr *. 1253 | // Returns GCstr *.
1351 | li CARG3, LJ_TSTR 1254 | li SFARG1HI, LJ_TSTR
1352 | b ->fff_restv 1255 | b ->fff_restv
1353 |. move CARG1, CRET1 1256 |. move SFARG1LO, CRET1
1354 | 1257 |
1355 |//-- Base library: iterators ------------------------------------------- 1258 |//-- Base library: iterators -------------------------------------------
1356 | 1259 |
@@ -1372,47 +1275,38 @@ static void build_subroutines(BuildCtx *ctx)
1372 |. move CARG1, L 1275 |. move CARG1, L
1373 | // Returns 0 at end of traversal. 1276 | // Returns 0 at end of traversal.
1374 | beqz CRET1, ->fff_restv // End of traversal: return nil. 1277 | beqz CRET1, ->fff_restv // End of traversal: return nil.
1375 |. li CARG3, LJ_TNIL 1278 |. li SFARG1HI, LJ_TNIL
1376 | load_double1 8(BASE) 1279 | lw TMP0, 8+HI(BASE)
1280 | lw TMP1, 8+LO(BASE)
1377 | addiu RA, BASE, -8 1281 | addiu RA, BASE, -8
1378 | load_double2 16(BASE) 1282 | lw TMP2, 16+HI(BASE)
1379 | store_double1 0(RA) 1283 | lw TMP3, 16+LO(BASE)
1380 | store_double2 8(RA) 1284 | sw TMP0, HI(RA)
1285 | sw TMP1, LO(RA)
1286 | sw TMP2, 8+HI(RA)
1287 | sw TMP3, 8+LO(RA)
1381 | b ->fff_res 1288 | b ->fff_res
1382 |. li RD, (2+1)*8 1289 |. li RD, (2+1)*8
1383 | 1290 |
1384 |.ffunc_1 pairs 1291 |.ffunc_1 pairs
1385 | li AT, LJ_TTAB 1292 | li AT, LJ_TTAB
1386 | bne CARG3, AT, ->fff_fallback 1293 | bne SFARG1HI, AT, ->fff_fallback
1387 |. lw PC, FRAME_PC(BASE) 1294 |. lw PC, FRAME_PC(BASE)
1388#if LJ_52 1295#if LJ_52
1389 | lw TAB:TMP2, TAB:CARG1->metatable 1296 | lw TAB:TMP2, TAB:SFARG1LO->metatable
1390 |.if FPU 1297 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1391 | ldc1 f0, CFUNC:RB->upvalue[0] 1298 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1392 |.else
1393 | lw SFT1, CFUNC:RB->upvalue[0].u32.hi
1394 | lw SFT2, CFUNC:RB->upvalue[0].u32.lo
1395 |.endif
1396 | bnez TAB:TMP2, ->fff_fallback 1299 | bnez TAB:TMP2, ->fff_fallback
1397#else 1300#else
1398 |.if FPU 1301 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1399 | ldc1 f0, CFUNC:RB->upvalue[0] 1302 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1400 |.else
1401 | lw SFT1, CFUNC:RB->upvalue[0].u32.hi
1402 | lw SFT2, CFUNC:RB->upvalue[0].u32.lo
1403 |.endif
1404#endif 1303#endif
1405 |. addiu RA, BASE, -8 1304 |. addiu RA, BASE, -8
1406 | sw TISNIL, 8+HI(BASE) 1305 | sw TISNIL, 8+HI(BASE)
1407 | li RD, (3+1)*8 1306 | sw TMP0, HI(RA)
1408 |.if FPU 1307 | sw TMP1, LO(RA)
1409 | b ->fff_res
1410 |. sdc1 f0, 0(RA)
1411 |.else
1412 | sw SFT1, 0(RA)
1413 | b ->fff_res 1308 | b ->fff_res
1414 |. sw SFT2, 4(RA) 1309 |. li RD, (3+1)*8
1415 |.endif
1416 | 1310 |
1417 |.ffunc ipairs_aux 1311 |.ffunc ipairs_aux
1418 | sltiu AT, NARGS8:RC, 16 1312 | sltiu AT, NARGS8:RC, 16
@@ -1421,54 +1315,31 @@ static void build_subroutines(BuildCtx *ctx)
1421 | lw CARG4, 8+HI(BASE) 1315 | lw CARG4, 8+HI(BASE)
1422 | bnez AT, ->fff_fallback 1316 | bnez AT, ->fff_fallback
1423 |. addiu CARG3, CARG3, -LJ_TTAB 1317 |. addiu CARG3, CARG3, -LJ_TTAB
1424 | sltiu AT, CARG4, LJ_TISNUM 1318 | xor CARG4, CARG4, TISNUM
1425 | li TMP0, 1 1319 | and AT, CARG3, CARG4
1426 | movn AT, r0, CARG3 1320 | bnez AT, ->fff_fallback
1427 | beqz AT, ->fff_fallback
1428 |. lw PC, FRAME_PC(BASE) 1321 |. lw PC, FRAME_PC(BASE)
1429 |.if FPU 1322 | lw TMP2, 8+LO(BASE)
1430 | ldc1 FARG2, 8(BASE)
1431 | mtc1 TMP0, FARG1
1432 | trunc.w.d FRET1, FARG2
1433 | cvt.d.w FARG1, FARG1
1434 | mfc1 TMP2, FRET1
1435 | add.d FARG2, FARG2, FARG1
1436 |.else
1437 | sw CARG1, TEMP_SAVE_1
1438 | cvti2d TMP0
1439 | sw CRET1, TEMP_SAVE_2 // Store result CRET1/CRET2=1 (double).
1440 | sw CRET2, TEMP_SAVE_3
1441 | lw CARG2, 8+4(BASE)
1442 | load_got __fixdfsi
1443 | call_extern
1444 |. lw CARG1, 8(BASE)
1445 | sw CRET1, TEMP_SAVE_4
1446 | load_got __adddf3
1447 | lw CARG2, TEMP_SAVE_3
1448 | lw CARG3, 8(BASE)
1449 | lw CARG4, 8+4(BASE)
1450 | call_extern
1451 |. lw CARG1, TEMP_SAVE_2
1452 | lw TMP2, TEMP_SAVE_4
1453 | lw CARG1, TEMP_SAVE_1
1454 |.endif
1455 | lw TMP0, TAB:CARG1->asize 1323 | lw TMP0, TAB:CARG1->asize
1456 | lw TMP1, TAB:CARG1->array 1324 | lw TMP1, TAB:CARG1->array
1457 | addiu TMP2, TMP2, 1 1325 | addiu TMP2, TMP2, 1
1326 | sw TISNUM, -8+HI(BASE)
1458 | sltu AT, TMP2, TMP0 1327 | sltu AT, TMP2, TMP0
1328 | sw TMP2, -8+LO(BASE)
1459 | beqz AT, >2 // Not in array part? 1329 | beqz AT, >2 // Not in array part?
1460 |. addiu RA, BASE, -8 1330 |. addiu RA, BASE, -8
1461 | store_double FARG2, CRET1, CRET2, 0(RA)
1462 | sll TMP3, TMP2, 3 1331 | sll TMP3, TMP2, 3
1463 | addu TMP3, TMP1, TMP3 1332 | addu TMP3, TMP1, TMP3
1464 | lw TMP2, HI(TMP3) 1333 | lw TMP1, HI(TMP3)
1465 | load_double1 0(TMP3) 1334 | lw TMP2, LO(TMP3)
1466 |1: 1335 |1:
1467 | beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results. 1336 | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
1468 |. li RD, (0+1)*8 1337 |. li RD, (0+1)*8
1469 | store_double1 8(RA) 1338 | sw TMP1, 8+HI(RA)
1339 | sw TMP2, 8+LO(RA)
1470 | b ->fff_res 1340 | b ->fff_res
1471 |. li RD, (2+1)*8 1341 |. li RD, (2+1)*8
1342 |
1472 |2: // Check for empty hash part first. Otherwise call C function. 1343 |2: // Check for empty hash part first. Otherwise call C function.
1473 | lw TMP0, TAB:CARG1->hmask 1344 | lw TMP0, TAB:CARG1->hmask
1474 | load_got lj_tab_getinth 1345 | load_got lj_tab_getinth
@@ -1479,49 +1350,30 @@ static void build_subroutines(BuildCtx *ctx)
1479 | // Returns cTValue * or NULL. 1350 | // Returns cTValue * or NULL.
1480 | beqz CRET1, ->fff_res 1351 | beqz CRET1, ->fff_res
1481 |. li RD, (0+1)*8 1352 |. li RD, (0+1)*8
1482 | lw TMP2, HI(CRET1) 1353 | lw TMP1, HI(CRET1)
1483 |.if FPU
1484 | b <1
1485 |. ldc1 f0, 0(CRET1)
1486 |.else
1487 | lw SFT2, 4(CRET1)
1488 | b <1 1354 | b <1
1489 |. lw SFT1, 0(CRET1) 1355 |. lw TMP2, LO(CRET1)
1490 |.endif
1491 | 1356 |
1492 |.ffunc_1 ipairs 1357 |.ffunc_1 ipairs
1493 | li AT, LJ_TTAB 1358 | li AT, LJ_TTAB
1494 | bne CARG3, AT, ->fff_fallback 1359 | bne SFARG1HI, AT, ->fff_fallback
1495 |. lw PC, FRAME_PC(BASE) 1360 |. lw PC, FRAME_PC(BASE)
1496#if LJ_52 1361#if LJ_52
1497 | lw TAB:TMP2, TAB:CARG1->metatable 1362 | lw TAB:TMP2, TAB:SFARG1LO->metatable
1498 |.if FPU 1363 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1499 | ldc1 f0, CFUNC:RB->upvalue[0] 1364 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1500 |.else
1501 | lw SFT1, CFUNC:RB->upvalue[0].u32.hi
1502 | lw SFT2, CFUNC:RB->upvalue[0].u32.lo
1503 |.endif
1504 | bnez TAB:TMP2, ->fff_fallback 1365 | bnez TAB:TMP2, ->fff_fallback
1505#else 1366#else
1506 |.if FPU 1367 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1507 | ldc1 f0, CFUNC:RB->upvalue[0] 1368 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1508 |.else
1509 | lw SFT1, CFUNC:RB->upvalue[0].u32.hi
1510 | lw SFT2, CFUNC:RB->upvalue[0].u32.lo
1511 |.endif
1512#endif 1369#endif
1513 |. addiu RA, BASE, -8 1370 |. addiu RA, BASE, -8
1514 | sw r0, 8+HI(BASE) 1371 | sw TISNUM, 8+HI(BASE)
1515 | sw r0, 8+LO(BASE) 1372 | sw r0, 8+LO(BASE)
1516 | li RD, (3+1)*8 1373 | sw TMP0, HI(RA)
1517 |.if FPU 1374 | sw TMP1, LO(RA)
1518 | b ->fff_res 1375 | b ->fff_res
1519 |. sdc1 f0, 0(RA) 1376 |. li RD, (3+1)*8
1520 |.else
1521 | sw SFT1, 0(RA)
1522 | b ->fff_res
1523 |. sw SFT2, 4(RA)
1524 |.endif
1525 | 1377 |
1526 |//-- Base library: catch errors ---------------------------------------- 1378 |//-- Base library: catch errors ----------------------------------------
1527 | 1379 |
@@ -1541,12 +1393,9 @@ static void build_subroutines(BuildCtx *ctx)
1541 | sltiu AT, NARGS8:RC, 16 1393 | sltiu AT, NARGS8:RC, 16
1542 | lw CARG4, 8+HI(BASE) 1394 | lw CARG4, 8+HI(BASE)
1543 | bnez AT, ->fff_fallback 1395 | bnez AT, ->fff_fallback
1544 |.if FPU
1545 |. ldc1 FARG2, 8(BASE)
1546 |.else
1547 |. lw CARG3, 8+LO(BASE) 1396 |. lw CARG3, 8+LO(BASE)
1548 |.endif 1397 | lw CARG1, LO(BASE)
1549 | load_double FARG1, CARG1, CARG2, 0(BASE) 1398 | lw CARG2, HI(BASE)
1550 | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) 1399 | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1551 | li AT, LJ_TFUNC 1400 | li AT, LJ_TFUNC
1552 | move TMP2, BASE 1401 | move TMP2, BASE
@@ -1554,14 +1403,11 @@ static void build_subroutines(BuildCtx *ctx)
1554 | addiu BASE, BASE, 16 1403 | addiu BASE, BASE, 16
1555 | // Remember active hook before pcall. 1404 | // Remember active hook before pcall.
1556 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT 1405 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
1557 |.if FPU 1406 | sw CARG3, LO(TMP2) // Swap function and traceback.
1558 | sdc1 FARG2, 0(TMP2) // Swap function and traceback.
1559 |.else
1560 | sw CARG3, LO(TMP2)
1561 | sw CARG4, HI(TMP2) 1407 | sw CARG4, HI(TMP2)
1562 |.endif
1563 | andi TMP3, TMP3, 1 1408 | andi TMP3, TMP3, 1
1564 | store_double FARG1, CARG1, CARG2, 8(TMP2) 1409 | sw CARG1, 8+LO(TMP2)
1410 | sw CARG2, 8+HI(TMP2)
1565 | addiu PC, TMP3, 16+FRAME_PCALL 1411 | addiu PC, TMP3, 16+FRAME_PCALL
1566 | b ->vm_call_dispatch 1412 | b ->vm_call_dispatch
1567 |. addiu NARGS8:RC, NARGS8:RC, -16 1413 |. addiu NARGS8:RC, NARGS8:RC, -16
@@ -1570,7 +1416,10 @@ static void build_subroutines(BuildCtx *ctx)
1570 | 1416 |
1571 |.macro coroutine_resume_wrap, resume 1417 |.macro coroutine_resume_wrap, resume
1572 |.if resume 1418 |.if resume
1573 |.ffunc_1 coroutine_resume 1419 |.ffunc coroutine_resume
1420 | lw CARG3, HI(BASE)
1421 | beqz NARGS8:RC, ->fff_fallback
1422 |. lw CARG1, LO(BASE)
1574 | li AT, LJ_TTHREAD 1423 | li AT, LJ_TTHREAD
1575 | bne CARG3, AT, ->fff_fallback 1424 | bne CARG3, AT, ->fff_fallback
1576 |.else 1425 |.else
@@ -1605,11 +1454,13 @@ static void build_subroutines(BuildCtx *ctx)
1605 | move CARG3, CARG2 1454 | move CARG3, CARG2
1606 | sw BASE, L->top 1455 | sw BASE, L->top
1607 |2: // Move args to coroutine. 1456 |2: // Move args to coroutine.
1608 | load_double1 0(BASE) 1457 | lw SFRETHI, HI(BASE)
1458 | lw SFRETLO, LO(BASE)
1609 | sltu AT, BASE, TMP1 1459 | sltu AT, BASE, TMP1
1610 | beqz AT, >3 1460 | beqz AT, >3
1611 |. addiu BASE, BASE, 8 1461 |. addiu BASE, BASE, 8
1612 | store_double1 0(CARG3) 1462 | sw SFRETHI, HI(CARG3)
1463 | sw SFRETLO, LO(CARG3)
1613 | b <2 1464 | b <2
1614 |. addiu CARG3, CARG3, 8 1465 |. addiu CARG3, CARG3, 8
1615 |3: 1466 |3:
@@ -1635,10 +1486,12 @@ static void build_subroutines(BuildCtx *ctx)
1635 | sw TMP2, L:RA->top // Clear coroutine stack. 1486 | sw TMP2, L:RA->top // Clear coroutine stack.
1636 | move TMP1, BASE 1487 | move TMP1, BASE
1637 |5: // Move results from coroutine. 1488 |5: // Move results from coroutine.
1638 | load_double1 0(TMP2) 1489 | lw SFRETHI, HI(TMP2)
1490 | lw SFRETLO, LO(TMP2)
1639 | addiu TMP2, TMP2, 8 1491 | addiu TMP2, TMP2, 8
1640 | sltu AT, TMP2, TMP3 1492 | sltu AT, TMP2, TMP3
1641 | store_double1 0(TMP1) 1493 | sw SFRETHI, HI(TMP1)
1494 | sw SFRETLO, LO(TMP1)
1642 | bnez AT, <5 1495 | bnez AT, <5
1643 |. addiu TMP1, TMP1, 8 1496 |. addiu TMP1, TMP1, 8
1644 |6: 1497 |6:
@@ -1663,12 +1516,14 @@ static void build_subroutines(BuildCtx *ctx)
1663 |.if resume 1516 |.if resume
1664 | addiu TMP3, TMP3, -8 1517 | addiu TMP3, TMP3, -8
1665 | li TMP1, LJ_TFALSE 1518 | li TMP1, LJ_TFALSE
1666 | load_double1 0(TMP3) 1519 | lw SFRETHI, HI(TMP3)
1520 | lw SFRETLO, LO(TMP3)
1667 | sw TMP3, L:RA->top // Remove error from coroutine stack. 1521 | sw TMP3, L:RA->top // Remove error from coroutine stack.
1668 | li RD, (2+1)*8 1522 | li RD, (2+1)*8
1669 | sw TMP1, -8+HI(BASE) // Prepend false to results. 1523 | sw TMP1, -8+HI(BASE) // Prepend false to results.
1670 | addiu RA, BASE, -8 1524 | addiu RA, BASE, -8
1671 | store_double1 0(BASE) // Copy error message. 1525 | sw SFRETHI, HI(BASE) // Copy error message.
1526 | sw SFRETLO, LO(BASE)
1672 | b <7 1527 | b <7
1673 |. andi TMP0, PC, FRAME_TYPE 1528 |. andi TMP0, PC, FRAME_TYPE
1674 |.else 1529 |.else
@@ -1705,39 +1560,28 @@ static void build_subroutines(BuildCtx *ctx)
1705 |//-- Math library ------------------------------------------------------- 1560 |//-- Math library -------------------------------------------------------
1706 | 1561 |
1707 |.ffunc_1 math_abs 1562 |.ffunc_1 math_abs
1708 | load_farg1 0(BASE) 1563 | bne SFARG1HI, TISNUM, >1
1709 | sltiu AT, CARG3, LJ_TISNUM 1564 |. sra TMP0, SFARG1LO, 31
1710 | beqz AT, ->fff_fallback 1565 | xor TMP1, SFARG1LO, TMP0
1566 | subu SFARG1LO, TMP1, TMP0
1567 | bgez SFARG1LO, ->fff_restv
1711 |. nop 1568 |. nop
1712 |.if FPU 1569 | lui SFARG1HI, 0x41e0 // 2^31 as a double.
1713 |. abs.d FRET1, FARG1 1570 | b ->fff_restv
1714 |.else 1571 |. li SFARG1LO, 0
1715 |. lui TMP1, 0x8000 1572 |1:
1716 | and AT, CARG1, TMP1 1573 | sltiu AT, SFARG1HI, LJ_TISNUM
1717 | move CRET2, CARG2 1574 | beqz AT, ->fff_fallback
1718 | beqz AT, ->fff_resn 1575 |. sll SFARG1HI, SFARG1HI, 1
1719 |. move CRET1, CARG1 1576 | srl SFARG1HI, SFARG1HI, 1
1720 | xor CRET1, CARG1, TMP1 1577 |// fallthrough
1721 |.endif
1722 |
1723 |->fff_resn:
1724 | lw PC, FRAME_PC(BASE)
1725 | addiu RA, BASE, -8
1726 |.if HFABI
1727 | b ->fff_res1
1728 |. sdc1 FRET1, -8(BASE)
1729 |.else
1730 | sw CRET1, -8(BASE)
1731 | b ->fff_res1
1732 |. sw CRET2, -8+4(BASE)
1733 |.endif
1734 | 1578 |
1735 |->fff_restv: 1579 |->fff_restv:
1736 | // CARG3/CARG1 = TValue result. 1580 | // SFARG1LO/SFARG1HI = TValue result.
1737 | lw PC, FRAME_PC(BASE) 1581 | lw PC, FRAME_PC(BASE)
1738 | sw CARG3, -8+HI(BASE) 1582 | sw SFARG1HI, -8+HI(BASE)
1739 | addiu RA, BASE, -8 1583 | addiu RA, BASE, -8
1740 | sw CARG1, -8+LO(BASE) 1584 | sw SFARG1LO, -8+LO(BASE)
1741 |->fff_res1: 1585 |->fff_res1:
1742 | // RA = results, PC = return. 1586 | // RA = results, PC = return.
1743 | li RD, (1+1)*8 1587 | li RD, (1+1)*8
@@ -1766,21 +1610,19 @@ static void build_subroutines(BuildCtx *ctx)
1766 |. sw TISNIL, -8+HI(TMP1) 1610 |. sw TISNIL, -8+HI(TMP1)
1767 | 1611 |
1768 |.macro math_extern, func 1612 |.macro math_extern, func
1769 |->ff_math_ .. func: 1613 | .ffunc math_ .. func
1770 | lw CARG3, HI(BASE) 1614 | lw SFARG1HI, HI(BASE)
1771 | beqz NARGS8:RC, ->fff_fallback 1615 | beqz NARGS8:RC, ->fff_fallback
1772 |. load_got func 1616 |. load_got func
1773 | sltiu AT, CARG3, LJ_TISNUM 1617 | sltiu AT, SFARG1HI, LJ_TISNUM
1774 | beqz AT, ->fff_fallback 1618 | beqz AT, ->fff_fallback
1775 |. nop 1619 |.if FPU
1776 |.if HFABI
1777 | call_extern
1778 |. ldc1 FARG1, 0(BASE) 1620 |. ldc1 FARG1, 0(BASE)
1779 |.else 1621 |.else
1780 | lw CARG1, 0(BASE) 1622 |. lw SFARG1LO, LO(BASE)
1781 | call_extern
1782 |. lw CARG2, 4(BASE)
1783 |.endif 1623 |.endif
1624 | call_extern
1625 |. nop
1784 | b ->fff_resn 1626 | b ->fff_resn
1785 |. nop 1627 |. nop
1786 |.endmacro 1628 |.endmacro
@@ -1794,10 +1636,22 @@ static void build_subroutines(BuildCtx *ctx)
1794 |. nop 1636 |. nop
1795 |.endmacro 1637 |.endmacro
1796 | 1638 |
1639 |// TODO: Return integer type if result is integer (own sf implementation).
1797 |.macro math_round, func 1640 |.macro math_round, func
1798 | .ffunc_n math_ .. func 1641 |->ff_math_ .. func:
1799 |. nop 1642 | lw SFARG1HI, HI(BASE)
1643 | beqz NARGS8:RC, ->fff_fallback
1644 |. lw SFARG1LO, LO(BASE)
1645 | beq SFARG1HI, TISNUM, ->fff_restv
1646 |. sltu AT, SFARG1HI, TISNUM
1647 | beqz AT, ->fff_fallback
1648 |.if FPU
1649 |. ldc1 FARG1, 0(BASE)
1800 | bal ->vm_ .. func 1650 | bal ->vm_ .. func
1651 |.else
1652 |. load_got func
1653 | call_extern
1654 |.endif
1801 |. nop 1655 |. nop
1802 | b ->fff_resn 1656 | b ->fff_resn
1803 |. nop 1657 |. nop
@@ -1809,17 +1663,16 @@ static void build_subroutines(BuildCtx *ctx)
1809 |.ffunc math_log 1663 |.ffunc math_log
1810 | li AT, 8 1664 | li AT, 8
1811 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. 1665 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
1812 |. lw CARG3, HI(BASE) 1666 |. lw SFARG1HI, HI(BASE)
1813 | sltiu AT, CARG3, LJ_TISNUM 1667 | sltiu AT, SFARG1HI, LJ_TISNUM
1814 | beqz AT, ->fff_fallback 1668 | beqz AT, ->fff_fallback
1815 |. load_got log 1669 |. load_got log
1816 |.if HFABI 1670 |.if FPU
1817 | call_extern 1671 | call_extern
1818 |. ldc1 FARG1, 0(BASE) 1672 |. ldc1 FARG1, 0(BASE)
1819 |.else 1673 |.else
1820 | lw CARG1, 0(BASE)
1821 | call_extern 1674 | call_extern
1822 |. lw CARG2, 4(BASE) 1675 |. lw SFARG1LO, LO(BASE)
1823 |.endif 1676 |.endif
1824 | b ->fff_resn 1677 | b ->fff_resn
1825 |. nop 1678 |. nop
@@ -1842,37 +1695,40 @@ static void build_subroutines(BuildCtx *ctx)
1842 |.if FPU 1695 |.if FPU
1843 |.ffunc_n math_sqrt 1696 |.ffunc_n math_sqrt
1844 |. sqrt.d FRET1, FARG1 1697 |. sqrt.d FRET1, FARG1
1845 | b ->fff_resn 1698 |// fallthrough to ->fff_resn
1846 |. nop
1847 |.else 1699 |.else
1848 | math_extern sqrt 1700 | math_extern sqrt
1849 |.endif 1701 |.endif
1850 | 1702 |
1851 |.ffunc_2 math_ldexp 1703 |->fff_resn:
1852 | sltiu TMP0, CARG3, LJ_TISNUM 1704 | lw PC, FRAME_PC(BASE)
1853 | sltiu TMP1, CARG4, LJ_TISNUM 1705 | addiu RA, BASE, -8
1854 | load_farg1 0(BASE)
1855 | load_farg2 8(BASE)
1856 | and TMP0, TMP0, TMP1
1857 | beqz TMP0, ->fff_fallback
1858 |.if FPU 1706 |.if FPU
1859 | load_got ldexp 1707 | b ->fff_res1
1860 | trunc.w.d FARG2, FARG2 1708 |. sdc1 FRET1, -8(BASE)
1861 | call_extern
1862 |. mfc1 CARG3, FARG2
1863 |.else 1709 |.else
1864 | sw CARG1, TEMP_SAVE_1 1710 | sw SFRETHI, -8+HI(BASE)
1865 | sw CARG2, TEMP_SAVE_2 1711 | b ->fff_res1
1866 | load_got __fixdfsi 1712 |. sw SFRETLO, -8+LO(BASE)
1867 | move CARG1, CARG3 1713 |.endif
1868 | call_extern 1714 |
1869 |. move CARG2, CARG4 1715 |
1870 | lw CARG1, TEMP_SAVE_1 1716 |.ffunc math_ldexp
1717 | sltiu AT, NARGS8:RC, 16
1718 | lw SFARG1HI, HI(BASE)
1719 | bnez AT, ->fff_fallback
1720 |. lw CARG4, 8+HI(BASE)
1721 | bne CARG4, TISNUM, ->fff_fallback
1871 | load_got ldexp 1722 | load_got ldexp
1872 | lw CARG2, TEMP_SAVE_2 1723 |. sltu AT, SFARG1HI, TISNUM
1873 | call_extern 1724 | beqz AT, ->fff_fallback
1874 |. move CARG3, CRET1 1725 |.if FPU
1726 |. ldc1 FARG1, 0(BASE)
1727 |.else
1728 |. lw SFARG1LO, LO(BASE)
1875 |.endif 1729 |.endif
1730 | call_extern
1731 |. lw CARG3, 8+LO(BASE)
1876 | b ->fff_resn 1732 | b ->fff_resn
1877 |. nop 1733 |. nop
1878 | 1734 |
@@ -1883,14 +1739,17 @@ static void build_subroutines(BuildCtx *ctx)
1883 |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 1739 |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
1884 | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) 1740 | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH)
1885 | addiu RA, BASE, -8 1741 | addiu RA, BASE, -8
1886 | store_double FRET1, CRET1, CRET2, 0(RA)
1887 |.if FPU 1742 |.if FPU
1888 | mtc1 TMP1, FARG2 1743 | mtc1 TMP1, FARG2
1744 | sdc1 FRET1, 0(RA)
1889 | cvt.d.w FARG2, FARG2 1745 | cvt.d.w FARG2, FARG2
1746 | sdc1 FARG2, 8(RA)
1890 |.else 1747 |.else
1891 | cvti2d TMP1 1748 | sw SFRETLO, LO(RA)
1749 | sw SFRETHI, HI(RA)
1750 | sw TMP1, 8+LO(RA)
1751 | sw TISNUM, 8+HI(RA)
1892 |.endif 1752 |.endif
1893 | store_double FARG2, CRET1, CRET2, 8(RA)
1894 | b ->fff_res 1753 | b ->fff_res
1895 |. li RD, (2+1)*8 1754 |. li RD, (2+1)*8
1896 | 1755 |
@@ -1900,92 +1759,98 @@ static void build_subroutines(BuildCtx *ctx)
1900 | call_extern 1759 | call_extern
1901 |. addiu CARG3, BASE, -8 1760 |. addiu CARG3, BASE, -8
1902 | addiu RA, BASE, -8 1761 | addiu RA, BASE, -8
1903 |.if HFABI 1762 |.if FPU
1904 | sdc1 FRET1, 0(BASE) 1763 | sdc1 FRET1, 0(BASE)
1905 |.else 1764 |.else
1906 | sw CRET1, 0(BASE) 1765 | sw SFRETLO, LO(BASE)
1907 | sw CRET2, 4(BASE) 1766 | sw SFRETHI, HI(BASE)
1908 |.endif 1767 |.endif
1909 | b ->fff_res 1768 | b ->fff_res
1910 |. li RD, (2+1)*8 1769 |. li RD, (2+1)*8
1911 | 1770 |
1912 |.macro math_minmax, name, ismax 1771 |.macro math_minmax, name, intins, fpins
1913 |->ff_ .. name: 1772 | .ffunc_1 name
1914 | lw CARG3, HI(BASE) 1773 | addu TMP3, BASE, NARGS8:RC
1915 | beqz NARGS8:RC, ->fff_fallback 1774 | bne SFARG1HI, TISNUM, >5
1916 |. sltiu AT, CARG3, LJ_TISNUM 1775 |. addiu TMP2, BASE, 8
1776 |1: // Handle integers.
1777 |. lw SFARG2HI, HI(TMP2)
1778 | beq TMP2, TMP3, ->fff_restv
1779 |. lw SFARG2LO, LO(TMP2)
1780 | bne SFARG2HI, TISNUM, >3
1781 |. slt AT, SFARG1LO, SFARG2LO
1782 | intins SFARG1LO, SFARG2LO, AT
1783 | b <1
1784 |. addiu TMP2, TMP2, 8
1785 |
1786 |3: // Convert intermediate result to number and continue with number loop.
1787 | sltiu AT, SFARG2HI, LJ_TISNUM
1917 | beqz AT, ->fff_fallback 1788 | beqz AT, ->fff_fallback
1918 |. addu TMP2, BASE, NARGS8:RC 1789 |.if FPU
1919 | addiu TMP1, BASE, 8 1790 |. mtc1 SFARG1LO, FRET1
1920 |.if HFABI 1791 | cvt.d.w FRET1, FRET1
1921 | ldc1 FRET1, 0(BASE) 1792 | b >7
1922 | beq TMP1, TMP2, ->fff_resn 1793 |. ldc1 FARG1, 0(TMP2)
1923 |.else 1794 |.else
1924 | lw CRET1, 0(BASE) 1795 |. nop
1925 | lw CRET2, 4(BASE) 1796 | bal ->vm_sfi2d_1
1926 | beq TMP1, TMP2, ->fff_resn 1797 |. nop
1798 | b >7
1799 |. nop
1927 |.endif 1800 |.endif
1928 |1: 1801 |
1929 |. lw CARG3, HI(TMP1) 1802 |5:
1930 |.if HFABI 1803 |. sltiu AT, SFARG1HI, LJ_TISNUM
1931 | ldc1 FARG1, 0(TMP1) 1804 | beqz AT, ->fff_fallback
1805 |.if FPU
1806 |. ldc1 FRET1, 0(BASE)
1807 |.endif
1808 |
1809 |6: // Handle numbers.
1810 |. lw SFARG2HI, HI(TMP2)
1811 |.if FPU
1812 | beq TMP2, TMP3, ->fff_resn
1932 |.else 1813 |.else
1933 | lw CARG1, 0(TMP1) 1814 | beq TMP2, TMP3, ->fff_restv
1934 | lw CARG2, 4(TMP1)
1935 |.endif 1815 |.endif
1936 | sltiu AT, CARG3, LJ_TISNUM 1816 |. sltiu AT, SFARG2HI, LJ_TISNUM
1937 | beqz AT, ->fff_fallback 1817 | beqz AT, >8
1938 |. addiu TMP1, TMP1, 8
1939 |.if FPU 1818 |.if FPU
1940 |.if ismax 1819 |. ldc1 FARG1, 0(TMP2)
1941 | c.olt.d FARG1, FRET1
1942 |.else 1820 |.else
1943 | c.olt.d FRET1, FARG1 1821 |. lw SFARG2LO, LO(TMP2)
1944 |.endif 1822 |.endif
1945 | bne TMP1, TMP2, <1 1823 |7:
1946 |. movf.d FRET1, FARG1 1824 |.if FPU
1825 | c.olt.d FRET1, FARG1
1826 | fpins FRET1, FARG1
1947 |.else 1827 |.else
1948 | load_got __ledf2 1828 | bal ->vm_sfcmpolt
1949 | sw TMP1, TEMP_SAVE_1
1950 | sw TMP2, TEMP_SAVE_2
1951 | sw CARG1, TEMP_SAVE_3
1952 | sw CARG2, TEMP_SAVE_4
1953 | sw CRET1, TEMP_SAVE_5
1954 | sw CRET2, TEMP_SAVE_6
1955 | move CARG3, CRET1
1956 | call_extern
1957 |. move CARG4, CRET2
1958 | lw CARG4, TEMP_SAVE_6
1959 | lw CARG3, TEMP_SAVE_5
1960 | lw CARG2, TEMP_SAVE_4
1961 | lw CARG1, TEMP_SAVE_3
1962 | lw TMP2, TEMP_SAVE_2
1963 | lw TMP1, TEMP_SAVE_1
1964 |.if ismax
1965 | beqz CRET1, >2 // farg1==fret1
1966 |. li TMP3, 1
1967 | beq CRET1, TMP3, >2 // farg1>fret1
1968 |. nop 1829 |. nop
1830 | intins SFARG1LO, SFARG2LO, CRET1
1831 | intins SFARG1HI, SFARG2HI, CRET1
1832 |.endif
1833 | b <6
1834 |. addiu TMP2, TMP2, 8
1835 |
1836 |8: // Convert integer to number and continue with number loop.
1837 | bne SFARG2HI, TISNUM, ->fff_fallback
1838 |.if FPU
1839 |. lwc1 FARG1, LO(TMP2)
1840 | b <7
1841 |. cvt.d.w FARG1, FARG1
1969 |.else 1842 |.else
1970 | blez CRET1, >2
1971 |. nop 1843 |. nop
1972 |.endif 1844 | bal ->vm_sfi2d_2
1973 | move CRET1, CARG3 // Keep the value.
1974 | b >3
1975 |. move CRET2, CARG4
1976 |2:
1977 | move CRET1, CARG1 // Set new value.
1978 | move CRET2, CARG2
1979 |3:
1980 | bne TMP1, TMP2, <1
1981 |. nop 1845 |. nop
1982 |.endif 1846 | b <7
1983 | b ->fff_resn
1984 |. nop 1847 |. nop
1848 |.endif
1849 |
1985 |.endmacro 1850 |.endmacro
1986 | 1851 |
1987 | math_minmax math_min, 0 1852 | math_minmax math_min, movz, movf.d
1988 | math_minmax math_max, 1 1853 | math_minmax math_max, movn, movt.d
1989 | 1854 |
1990 |//-- String library ----------------------------------------------------- 1855 |//-- String library -----------------------------------------------------
1991 | 1856 |
@@ -1999,51 +1864,29 @@ static void build_subroutines(BuildCtx *ctx)
1999 |. nop 1864 |. nop
2000 | lw TMP0, STR:CARG1->len 1865 | lw TMP0, STR:CARG1->len
2001 | addiu RA, BASE, -8 1866 | addiu RA, BASE, -8
1867 | lw PC, FRAME_PC(BASE)
2002 | sltu RD, r0, TMP0 1868 | sltu RD, r0, TMP0
2003 | lw PC, FRAME_PC(BASE)
2004 | addiu RD, RD, 1
2005 | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). 1869 | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
2006 |.if FPU 1870 | addiu RD, RD, 1
2007 | mtc1 TMP1, f0 1871 | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8
2008 | cvt.d.w f0, f0 1872 | sw TISNUM, HI(RA)
2009 | sdc1 f0, 0(RA)
2010 |.else
2011 | sw RD, TEMP_SAVE_1
2012 | cvti2d TMP1
2013 | sw CRET1, 0(RA)
2014 | sw CRET2, 4(RA)
2015 | lw RD, TEMP_SAVE_1
2016 |.endif
2017 | b ->fff_res 1873 | b ->fff_res
2018 |. sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 1874 |. sw TMP1, LO(RA)
2019 | 1875 |
2020 |.ffunc string_char // Only handle the 1-arg case here. 1876 |.ffunc string_char // Only handle the 1-arg case here.
2021 | ffgccheck 1877 | ffgccheck
2022 | lw CARG3, HI(BASE) 1878 |. lw CARG3, HI(BASE)
2023 | li AT, 8 1879 | lw CARG1, LO(BASE)
2024 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. 1880 | li TMP1, 255
2025 |. sltiu AT, CARG3, LJ_TISNUM 1881 | xori AT, NARGS8:RC, 8 // Exactly 1 argument.
2026 | beqz AT, ->fff_fallback 1882 | xor TMP0, CARG3, TISNUM // Integer.
1883 | sltu TMP1, TMP1, CARG1 // !(255 < n).
1884 | or AT, AT, TMP0
1885 | or AT, AT, TMP1
1886 | bnez AT, ->fff_fallback
2027 |. li CARG3, 1 1887 |. li CARG3, 1
2028 | sltiu AT, TMP0, 256
2029 | beqz AT, ->fff_fallback
2030 | load_farg1 0(BASE)
2031 |.if FPU
2032 | trunc.w.d FARG1, FARG1
2033 | mfc1 TMP0, FARG1
2034 |.else
2035 | load_got __fixdfsi
2036 | sw RB, TEMP_SAVE_1
2037 | sw RC, TEMP_SAVE_2
2038 | call_extern
2039 |. sw CARG3, TEMP_SAVE_3
2040 | lw CARG3, TEMP_SAVE_3
2041 | lw RC, TEMP_SAVE_2
2042 | lw RB, TEMP_SAVE_1
2043 | move TMP0, CRET1
2044 |.endif
2045 | addiu CARG2, sp, ARG5_OFS 1888 | addiu CARG2, sp, ARG5_OFS
2046 | sw TMP0, ARG5 1889 | sb CARG1, ARG5
2047 |->fff_newstr: 1890 |->fff_newstr:
2048 | load_got lj_str_new 1891 | load_got lj_str_new
2049 | sw BASE, L->base 1892 | sw BASE, L->base
@@ -2053,24 +1896,13 @@ static void build_subroutines(BuildCtx *ctx)
2053 | // Returns GCstr *. 1896 | // Returns GCstr *.
2054 | lw BASE, L->base 1897 | lw BASE, L->base
2055 |->fff_resstr: 1898 |->fff_resstr:
2056 | move CARG1, CRET1 1899 | move SFARG1LO, CRET1
2057 | b ->fff_restv 1900 | b ->fff_restv
2058 |. li CARG3, LJ_TSTR 1901 |. li SFARG1HI, LJ_TSTR
2059 | 1902 |
2060 |.ffunc string_sub 1903 |.ffunc string_sub
2061 | ffgccheck 1904 | ffgccheck
2062 | addiu AT, NARGS8:RC, -16 1905 |. addiu AT, NARGS8:RC, -16
2063 |.if FPU
2064 | ldc1 f0, 16(BASE)
2065 | trunc.w.d f0, f0
2066 |.else
2067 | lw CARG1, 16(BASE)
2068 | load_got __fixdfsi
2069 | sw AT, TEMP_SAVE_1
2070 | call_extern
2071 |. lw CARG2, 16+4(BASE)
2072 | lw AT, TEMP_SAVE_1
2073 |.endif
2074 | lw CARG3, 16+HI(BASE) 1906 | lw CARG3, 16+HI(BASE)
2075 | lw TMP0, HI(BASE) 1907 | lw TMP0, HI(BASE)
2076 | lw STR:CARG1, LO(BASE) 1908 | lw STR:CARG1, LO(BASE)
@@ -2078,33 +1910,13 @@ static void build_subroutines(BuildCtx *ctx)
2078 |. lw CARG2, 8+HI(BASE) 1910 |. lw CARG2, 8+HI(BASE)
2079 | beqz AT, >1 1911 | beqz AT, >1
2080 |. li CARG4, -1 1912 |. li CARG4, -1
2081 | sltiu AT, CARG3, LJ_TISNUM 1913 | bne CARG3, TISNUM, ->fff_fallback
2082 | beqz AT, ->fff_fallback 1914 |. lw CARG4, 16+LO(BASE)
2083 |.if FPU
2084 |. mfc1 CARG4, f0
2085 |.else
2086 |. move CARG4, CRET1
2087 |.endif
2088 |1: 1915 |1:
2089 | sltiu AT, CARG2, LJ_TISNUM 1916 | bne CARG2, TISNUM, ->fff_fallback
2090 | beqz AT, ->fff_fallback
2091 |. li AT, LJ_TSTR 1917 |. li AT, LJ_TSTR
2092 | bne TMP0, AT, ->fff_fallback 1918 | bne TMP0, AT, ->fff_fallback
2093 |.if FPU 1919 |. lw CARG3, 8+LO(BASE)
2094 |. ldc1 f2, 8(BASE)
2095 | trunc.w.d f2, f2
2096 | mfc1 CARG3, f2
2097 |.else
2098 |. sw CARG1, TEMP_SAVE_1
2099 | sw CARG4, TEMP_SAVE_2
2100 | lw CARG2, 8+4(BASE)
2101 | load_got __fixdfsi
2102 | call_extern
2103 |. lw CARG1, 8(BASE)
2104 | lw CARG1, TEMP_SAVE_1
2105 | lw CARG4, TEMP_SAVE_2
2106 | move CARG3, CRET1
2107 |.endif
2108 | lw CARG2, STR:CARG1->len 1920 | lw CARG2, STR:CARG1->len
2109 | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end 1921 | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
2110 | slt AT, CARG4, r0 1922 | slt AT, CARG4, r0
@@ -2127,14 +1939,14 @@ static void build_subroutines(BuildCtx *ctx)
2127 | bgez CARG3, ->fff_newstr 1939 | bgez CARG3, ->fff_newstr
2128 |. addiu CARG3, CARG3, 1 // len++ 1940 |. addiu CARG3, CARG3, 1 // len++
2129 |->fff_emptystr: // Return empty string. 1941 |->fff_emptystr: // Return empty string.
2130 | addiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty) 1942 | addiu STR:SFARG1LO, DISPATCH, DISPATCH_GL(strempty)
2131 | b ->fff_restv 1943 | b ->fff_restv
2132 |. li CARG3, LJ_TSTR 1944 |. li SFARG1HI, LJ_TSTR
2133 | 1945 |
2134 |.macro ffstring_op, name 1946 |.macro ffstring_op, name
2135 | .ffunc string_ .. name 1947 | .ffunc string_ .. name
2136 | ffgccheck 1948 | ffgccheck
2137 | lw CARG3, HI(BASE) 1949 |. lw CARG3, HI(BASE)
2138 | lw STR:CARG2, LO(BASE) 1950 | lw STR:CARG2, LO(BASE)
2139 | beqz NARGS8:RC, ->fff_fallback 1951 | beqz NARGS8:RC, ->fff_fallback
2140 |. li AT, LJ_TSTR 1952 |. li AT, LJ_TSTR
@@ -2160,88 +1972,96 @@ static void build_subroutines(BuildCtx *ctx)
2160 | 1972 |
2161 |//-- Bit library -------------------------------------------------------- 1973 |//-- Bit library --------------------------------------------------------
2162 | 1974 |
2163 |.if not FPU 1975 |->vm_tobit_fb:
1976 | beqz TMP1, ->fff_fallback
1977 |.if FPU
1978 |. ldc1 FARG1, 0(BASE)
1979 | add.d FARG1, FARG1, TOBIT
1980 | jr ra
1981 |. mfc1 CRET1, FARG1
1982 |.else
2164 |// FP number to bit conversion for soft-float. 1983 |// FP number to bit conversion for soft-float.
2165 |->vm_tobit: 1984 |->vm_tobit:
2166 | sll TMP0, CARG1, 1 1985 | sll TMP0, SFARG1HI, 1
2167 | lui TMP3, 0x0020 1986 | lui AT, 0x0020
2168 | addu TMP0, TMP0, TMP3 1987 | addu TMP0, TMP0, AT
2169 | slt TMP3, TMP0, r0 1988 | slt AT, TMP0, r0
2170 | movz CARG2, r0, TMP3 1989 | movz SFARG1LO, r0, AT
2171 | beqz TMP3, >2 1990 | beqz AT, >2
2172 |. li CARG4, 0x3e0 1991 |. li TMP1, 0x3e0
2173 | not CARG4, CARG4 1992 | not TMP1, TMP1
2174 | sra TMP0, TMP0, 21 1993 | sra TMP0, TMP0, 21
2175 | subu TMP0, CARG4, TMP0 1994 | subu TMP0, TMP1, TMP0
2176 | slt TMP3, TMP0, r0 1995 | slt AT, TMP0, r0
2177 | bnez TMP3, >1 1996 | bnez AT, >1
2178 |. sll CARG4, CARG1, 11 1997 |. sll TMP1, SFARG1HI, 11
2179 | lui TMP3, 0x8000 1998 | lui AT, 0x8000
2180 | or CARG4, CARG4, TMP3 1999 | or TMP1, TMP1, AT
2181 | srl TMP3, CARG2, 21 2000 | srl AT, SFARG1LO, 21
2182 | or CARG4, CARG4, TMP3 2001 | or TMP1, TMP1, AT
2183 | slt TMP3, CARG1, r0 2002 | slt AT, SFARG1HI, r0
2184 | beqz TMP3, >2 2003 | beqz AT, >2
2185 |. srlv CARG2, CARG4, TMP0 2004 |. srlv SFARG1LO, TMP1, TMP0
2186 | subu CARG2, r0, CARG2 2005 | subu SFARG1LO, r0, SFARG1LO
2187 |2: 2006 |2:
2188 | jr ra 2007 | jr ra
2189 |. move CRET1, CARG2 2008 |. move CRET1, SFARG1LO
2190 |1: 2009 |1:
2191 | addiu TMP0, TMP0, 21 2010 | addiu TMP0, TMP0, 21
2192 | srlv CARG4, CARG2, TMP0 2011 | srlv TMP1, SFARG1LO, TMP0
2193 | li TMP3, 20 2012 | li AT, 20
2194 | subu TMP0, TMP3, TMP0 2013 | subu TMP0, AT, TMP0
2195 | sll CARG2, CARG1, 12 2014 | sll SFARG1LO, SFARG1HI, 12
2196 | sllv TMP3, CARG2, TMP0 2015 | sllv AT, SFARG1LO, TMP0
2197 | or CARG2, CARG4, TMP3 2016 | or SFARG1LO, TMP1, AT
2198 | slt TMP3, CARG1, r0 2017 | slt AT, SFARG1HI, r0
2199 | beqz TMP3, <2 2018 | beqz AT, <2
2200 |. nop 2019 |. nop
2201 | jr ra 2020 | jr ra
2202 |. subu CRET1, r0, CARG2 2021 |. subu CRET1, r0, SFARG1LO
2203 |.endif 2022 |.endif
2204 | 2023 |
2205 |.macro .ffunc_bit, name 2024 |.macro .ffunc_bit, name
2206 | .ffunc_n bit_..name 2025 | .ffunc_1 bit_..name
2207 |.if FPU 2026 | beq SFARG1HI, TISNUM, >6
2208 |. add.d FARG1, FARG1, TOBIT 2027 |. move CRET1, SFARG1LO
2209 | mfc1 CRET1, FARG1 2028 | bal ->vm_tobit_fb
2210 |.else 2029 |. sltu TMP1, SFARG1HI, TISNUM
2211 |. nop 2030 |6:
2212 | bal ->vm_tobit
2213 |. nop
2214 |.endif
2215 |.endmacro 2031 |.endmacro
2216 | 2032 |
2217 |.macro .ffunc_bit_op, name, ins 2033 |.macro .ffunc_bit_op, name, ins
2218 | .ffunc_bit name 2034 | .ffunc_bit name
2219 | addiu TMP1, BASE, 8 2035 | addiu TMP2, BASE, 8
2220 | addu TMP2, BASE, NARGS8:RC 2036 | addu TMP3, BASE, NARGS8:RC
2221 |1: 2037 |1:
2222 | move CRET2, CRET1 2038 | lw SFARG1HI, HI(TMP2)
2223 | lw CARG4, HI(TMP1) 2039 | beq TMP2, TMP3, ->fff_resi
2224 |.if FPU 2040 |. lw SFARG1LO, LO(TMP2)
2225 | beq TMP1, TMP2, ->fff_resi
2226 |. ldc1 FARG1, 0(TMP1)
2227 |.else
2228 | lw CARG1, 0(TMP1)
2229 | beq TMP1, TMP2, ->fff_resi
2230 |. lw CARG2, 4(TMP1)
2231 |.endif
2232 | sltiu AT, CARG4, LJ_TISNUM
2233 | beqz AT, ->fff_fallback
2234 |.if FPU 2041 |.if FPU
2042 | bne SFARG1HI, TISNUM, >2
2043 |. addiu TMP2, TMP2, 8
2044 | b <1
2045 |. ins CRET1, CRET1, SFARG1LO
2046 |2:
2047 | ldc1 FARG1, -8(TMP2)
2048 | sltu TMP1, SFARG1HI, TISNUM
2049 | beqz TMP1, ->fff_fallback
2235 |. add.d FARG1, FARG1, TOBIT 2050 |. add.d FARG1, FARG1, TOBIT
2236 | mfc1 CRET1, FARG1 2051 | mfc1 SFARG1LO, FARG1
2052 | b <1
2053 |. ins CRET1, CRET1, SFARG1LO
2237 |.else 2054 |.else
2238 |. nop 2055 | beq SFARG1HI, TISNUM, >2
2239 | bal ->vm_tobit 2056 |. move CRET2, CRET1
2240 |. nop 2057 | bal ->vm_tobit_fb
2241 |.endif 2058 |. sltu TMP1, SFARG1HI, TISNUM
2242 | ins CRET1, CRET2, CRET1 2059 | move SFARG1LO, CRET2
2060 |2:
2061 | ins CRET1, CRET1, SFARG1LO
2243 | b <1 2062 | b <1
2244 |. addiu TMP1, TMP1, 8 2063 |. addiu TMP2, TMP2, 8
2064 |.endif
2245 |.endmacro 2065 |.endmacro
2246 | 2066 |
2247 |.ffunc_bit_op band, and 2067 |.ffunc_bit_op band, and
@@ -2265,36 +2085,28 @@ static void build_subroutines(BuildCtx *ctx)
2265 |. not CRET1, CRET1 2085 |. not CRET1, CRET1
2266 | 2086 |
2267 |.macro .ffunc_bit_sh, name, ins, shmod 2087 |.macro .ffunc_bit_sh, name, ins, shmod
2268 | .ffunc_nn bit_..name 2088 | .ffunc_2 bit_..name
2269 |.if FPU 2089 | beq SFARG1HI, TISNUM, >1
2270 |. add.d FARG1, FARG1, TOBIT 2090 |. nop
2271 | add.d FARG2, FARG2, TOBIT 2091 | bal ->vm_tobit_fb
2272 | mfc1 CARG1, FARG1 2092 |. sltu TMP1, SFARG1HI, TISNUM
2273 | mfc1 CARG2, FARG2 2093 | move SFARG1LO, CRET1
2274 |.else 2094 |1:
2275 |. sw CARG4, TEMP_SAVE_1 2095 | bne SFARG2HI, TISNUM, ->fff_fallback
2276 | bal ->vm_tobit
2277 |. nop 2096 |. nop
2278 | move CRET2, CRET1
2279 | lw CARG2, TEMP_SAVE_1
2280 | bal ->vm_tobit
2281 |. move CARG1, CARG3
2282 | move CARG2, CRET1
2283 | move CARG1, CRET2
2284 |.endif
2285 |.if shmod == 1 2097 |.if shmod == 1
2286 | li AT, 32 2098 | li AT, 32
2287 | subu TMP0, AT, CARG2 2099 | subu TMP0, AT, SFARG2LO
2288 | sllv CARG2, CARG1, CARG2 2100 | sllv SFARG2LO, SFARG1LO, SFARG2LO
2289 | srlv CARG1, CARG1, TMP0 2101 | srlv SFARG1LO, SFARG1LO, TMP0
2290 |.elif shmod == 2 2102 |.elif shmod == 2
2291 | li AT, 32 2103 | li AT, 32
2292 | subu TMP0, AT, CARG2 2104 | subu TMP0, AT, SFARG2LO
2293 | srlv CARG2, CARG1, CARG2 2105 | srlv SFARG2LO, SFARG1LO, SFARG2LO
2294 | sllv CARG1, CARG1, TMP0 2106 | sllv SFARG1LO, SFARG1LO, TMP0
2295 |.endif 2107 |.endif
2296 | b ->fff_resi 2108 | b ->fff_resi
2297 |. ins CRET1, CARG1, CARG2 2109 |. ins CRET1, SFARG1LO, SFARG2LO
2298 |.endmacro 2110 |.endmacro
2299 | 2111 |
2300 |.ffunc_bit_sh lshift, sllv, 0 2112 |.ffunc_bit_sh lshift, sllv, 0
@@ -2308,17 +2120,9 @@ static void build_subroutines(BuildCtx *ctx)
2308 |->fff_resi: 2120 |->fff_resi:
2309 | lw PC, FRAME_PC(BASE) 2121 | lw PC, FRAME_PC(BASE)
2310 | addiu RA, BASE, -8 2122 | addiu RA, BASE, -8
2311 |.if HFABI 2123 | sw TISNUM, -8+HI(BASE)
2312 | mtc1 CRET1, FRET1
2313 | cvt.d.w FRET1, FRET1
2314 | b ->fff_res1 2124 | b ->fff_res1
2315 |. sdc1 FRET1, -8(BASE) 2125 |. sw CRET1, -8+LO(BASE)
2316 |.else // Result already in CRET1.
2317 | cvti2d CRET1
2318 | sw CRET1, -8(BASE)
2319 | b ->fff_res1
2320 |. sw CRET2, -8+4(BASE)
2321 |.endif
2322 | 2126 |
2323 |//----------------------------------------------------------------------- 2127 |//-----------------------------------------------------------------------
2324 | 2128 |
@@ -2516,10 +2320,12 @@ static void build_subroutines(BuildCtx *ctx)
2516 | beqz AT, >2 2320 | beqz AT, >2
2517 |. addu RC, BASE, RC // Call base. 2321 |. addu RC, BASE, RC // Call base.
2518 |1: // Move results down. 2322 |1: // Move results down.
2519 | ldc1 f0, 0(RA) 2323 | lw SFRETHI, HI(RA)
2324 | lw SFRETLO, LO(RA)
2520 | addiu AT, AT, -8 2325 | addiu AT, AT, -8
2521 | addiu RA, RA, 8 2326 | addiu RA, RA, 8
2522 | sdc1 f0, 0(RC) 2327 | sw SFRETHI, HI(RC)
2328 | sw SFRETLO, LO(RC)
2523 | bnez AT, <1 2329 | bnez AT, <1
2524 |. addiu RC, RC, 8 2330 |. addiu RC, RC, 8
2525 |2: 2331 |2:
@@ -2658,6 +2464,7 @@ static void build_subroutines(BuildCtx *ctx)
2658 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2464 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2659 | sll MULTRES, CRET1, 3 2465 | sll MULTRES, CRET1, 3
2660 | li TISNIL, LJ_TNIL 2466 | li TISNIL, LJ_TNIL
2467 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2661 | sw MULTRES, SAVE_MULTRES 2468 | sw MULTRES, SAVE_MULTRES
2662 | .FPU mtc1 TMP3, TOBIT 2469 | .FPU mtc1 TMP3, TOBIT
2663 | lw TMP1, LFUNC:RB->pc 2470 | lw TMP1, LFUNC:RB->pc
@@ -2712,6 +2519,7 @@ static void build_subroutines(BuildCtx *ctx)
2712 |//-- Math helper functions ---------------------------------------------- 2519 |//-- Math helper functions ----------------------------------------------
2713 |//----------------------------------------------------------------------- 2520 |//-----------------------------------------------------------------------
2714 | 2521 |
2522 |// Hard-float round to integer.
2715 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. 2523 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
2716 |.macro vm_round_hf, func 2524 |.macro vm_round_hf, func
2717 | lui TMP0, 0x4330 // Hiword of 2^52 (double). 2525 | lui TMP0, 0x4330 // Hiword of 2^52 (double).
@@ -2755,22 +2563,9 @@ static void build_subroutines(BuildCtx *ctx)
2755 |. mov.d FRET1, FARG1 2563 |. mov.d FRET1, FARG1
2756 |.endmacro 2564 |.endmacro
2757 | 2565 |
2758 |.macro vm_round_sf, func
2759 | addiu sp, sp, -8
2760 | load_got func
2761 | sw ra, 0(sp)
2762 | call_extern
2763 |. nop
2764 | lw ra, 0(sp)
2765 | jr ra
2766 |. addiu sp, sp, 8
2767 |.endmacro
2768 |
2769 |.macro vm_round, func 2566 |.macro vm_round, func
2770 |.if FPU 2567 |.if FPU
2771 | vm_round_hf, func 2568 | vm_round_hf, func
2772 |.else
2773 | vm_round_sf, func
2774 |.endif 2569 |.endif
2775 |.endmacro 2570 |.endmacro
2776 | 2571 |
@@ -2783,6 +2578,159 @@ static void build_subroutines(BuildCtx *ctx)
2783 | vm_round trunc 2578 | vm_round trunc
2784 |.endif 2579 |.endif
2785 | 2580 |
2581 |// Soft-float integer to number conversion.
2582 |.macro sfi2d, AHI, ALO
2583 |.if not FPU
2584 | beqz ALO, >9 // Handle zero first.
2585 |. sra TMP0, ALO, 31
2586 | xor TMP1, ALO, TMP0
2587 | subu TMP1, TMP1, TMP0 // Absolute value in TMP1.
2588 | clz AHI, TMP1
2589 | andi TMP0, TMP0, 0x800 // Mask sign bit.
2590 | li AT, 0x3ff+31-1
2591 | sllv TMP1, TMP1, AHI // Align mantissa left with leading 1.
2592 | subu AHI, AT, AHI // Exponent - 1 in AHI.
2593 | sll ALO, TMP1, 21
2594 | or AHI, AHI, TMP0 // Sign | Exponent.
2595 | srl TMP1, TMP1, 11
2596 | sll AHI, AHI, 20 // Align left.
2597 | jr ra
2598 |. addu AHI, AHI, TMP1 // Add mantissa, increment exponent.
2599 |9:
2600 | jr ra
2601 |. li AHI, 0
2602 |.endif
2603 |.endmacro
2604 |
2605 |// Input SFARG1LO. Output: SFARG1*. Temporaries: AT, TMP0, TMP1.
2606 |->vm_sfi2d_1:
2607 | sfi2d SFARG1HI, SFARG1LO
2608 |
2609 |// Input SFARG2LO. Output: SFARG2*. Temporaries: AT, TMP0, TMP1.
2610 |->vm_sfi2d_2:
2611 | sfi2d SFARG2HI, SFARG2LO
2612 |
2613 |// Soft-float comparison. Equivalent to c.eq.d.
2614 |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2615 |->vm_sfcmpeq:
2616 |.if not FPU
2617 | sll AT, SFARG1HI, 1
2618 | sll TMP0, SFARG2HI, 1
2619 | or CRET1, SFARG1LO, SFARG2LO
2620 | or TMP1, AT, TMP0
2621 | or TMP1, TMP1, CRET1
2622 | beqz TMP1, >8 // Both args +-0: return 1.
2623 |. sltu CRET1, r0, SFARG1LO
2624 | lui TMP1, 0xffe0
2625 | addu AT, AT, CRET1
2626 | sltu CRET1, r0, SFARG2LO
2627 | sltu AT, TMP1, AT
2628 | addu TMP0, TMP0, CRET1
2629 | sltu TMP0, TMP1, TMP0
2630 | or TMP1, AT, TMP0
2631 | bnez TMP1, >9 // Either arg is NaN: return 0;
2632 |. xor TMP0, SFARG1HI, SFARG2HI
2633 | xor TMP1, SFARG1LO, SFARG2LO
2634 | or AT, TMP0, TMP1
2635 | jr ra
2636 |. sltiu CRET1, AT, 1 // Same values: return 1.
2637 |8:
2638 | jr ra
2639 |. li CRET1, 1
2640 |9:
2641 | jr ra
2642 |. li CRET1, 0
2643 |.endif
2644 |
2645 |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d.
2646 |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2.
2647 |->vm_sfcmpult:
2648 |.if not FPU
2649 | b >1
2650 |. li CRET2, 1
2651 |.endif
2652 |
2653 |->vm_sfcmpolt:
2654 |.if not FPU
2655 | li CRET2, 0
2656 |1:
2657 | sll AT, SFARG1HI, 1
2658 | sll TMP0, SFARG2HI, 1
2659 | or CRET1, SFARG1LO, SFARG2LO
2660 | or TMP1, AT, TMP0
2661 | or TMP1, TMP1, CRET1
2662 | beqz TMP1, >8 // Both args +-0: return 0.
2663 |. sltu CRET1, r0, SFARG1LO
2664 | lui TMP1, 0xffe0
2665 | addu AT, AT, CRET1
2666 | sltu CRET1, r0, SFARG2LO
2667 | sltu AT, TMP1, AT
2668 | addu TMP0, TMP0, CRET1
2669 | sltu TMP0, TMP1, TMP0
2670 | or TMP1, AT, TMP0
2671 | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
2672 |. and AT, SFARG1HI, SFARG2HI
2673 | bltz AT, >5 // Both args negative?
2674 |. nop
2675 | beq SFARG1HI, SFARG2HI, >8
2676 |. sltu CRET1, SFARG1LO, SFARG2LO
2677 | jr ra
2678 |. slt CRET1, SFARG1HI, SFARG2HI
2679 |5: // Swap conditions if both operands are negative.
2680 | beq SFARG1HI, SFARG2HI, >8
2681 |. sltu CRET1, SFARG2LO, SFARG1LO
2682 | jr ra
2683 |. slt CRET1, SFARG2HI, SFARG1HI
2684 |8:
2685 | jr ra
2686 |. nop
2687 |9:
2688 | jr ra
2689 |. move CRET1, CRET2
2690 |.endif
2691 |
2692 |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a.
2693 |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2694 |->vm_sfcmpolex:
2695 |.if not FPU
2696 | sll AT, SFARG1HI, 1
2697 | sll TMP0, SFARG2HI, 1
2698 | or CRET1, SFARG1LO, SFARG2LO
2699 | or TMP1, AT, TMP0
2700 | or TMP1, TMP1, CRET1
2701 | beqz TMP1, >8 // Both args +-0: return 1.
2702 |. sltu CRET1, r0, SFARG1LO
2703 | lui TMP1, 0xffe0
2704 | addu AT, AT, CRET1
2705 | sltu CRET1, r0, SFARG2LO
2706 | sltu AT, TMP1, AT
2707 | addu TMP0, TMP0, CRET1
2708 | sltu TMP0, TMP1, TMP0
2709 | or TMP1, AT, TMP0
2710 | bnez TMP1, >9 // Either arg is NaN: return 0;
2711 |. and AT, SFARG1HI, SFARG2HI
2712 | xor AT, AT, TMP3
2713 | bltz AT, >5 // Both args negative?
2714 |. nop
2715 | beq SFARG1HI, SFARG2HI, >6
2716 |. sltu CRET1, SFARG2LO, SFARG1LO
2717 | jr ra
2718 |. slt CRET1, SFARG2HI, SFARG1HI
2719 |5: // Swap conditions if both operands are negative.
2720 | beq SFARG1HI, SFARG2HI, >6
2721 |. sltu CRET1, SFARG1LO, SFARG2LO
2722 | slt CRET1, SFARG1HI, SFARG2HI
2723 |6:
2724 | jr ra
2725 |. nop
2726 |8:
2727 | jr ra
2728 |. li CRET1, 1
2729 |9:
2730 | jr ra
2731 |. li CRET1, 0
2732 |.endif
2733 |
2786 |//----------------------------------------------------------------------- 2734 |//-----------------------------------------------------------------------
2787 |//-- Miscellaneous functions -------------------------------------------- 2735 |//-- Miscellaneous functions --------------------------------------------
2788 |//----------------------------------------------------------------------- 2736 |//-----------------------------------------------------------------------
@@ -2815,6 +2763,7 @@ static void build_subroutines(BuildCtx *ctx)
2815 | // Returns lua_State *. 2763 | // Returns lua_State *.
2816 | lw BASE, L:CRET1->base 2764 | lw BASE, L:CRET1->base
2817 | lw RC, L:CRET1->top 2765 | lw RC, L:CRET1->top
2766 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2818 | move L, CRET1 2767 | move L, CRET1
2819 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2768 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2820 | lw LFUNC:RB, FRAME_FUNC(BASE) 2769 | lw LFUNC:RB, FRAME_FUNC(BASE)
@@ -2882,10 +2831,13 @@ static void build_subroutines(BuildCtx *ctx)
2882 | lw ra, -4(r16) 2831 | lw ra, -4(r16)
2883 | sw CRET1, CCSTATE:TMP1->gpr[0] 2832 | sw CRET1, CCSTATE:TMP1->gpr[0]
2884 | sw CRET2, CCSTATE:TMP1->gpr[1] 2833 | sw CRET2, CCSTATE:TMP1->gpr[1]
2885 | .FPU sdc1 FRET1, CCSTATE:TMP1->fpr[0] 2834 |.if FPU
2886 | .FPU sdc1 FRET2, CCSTATE:TMP1->fpr[1] 2835 | sdc1 FRET1, CCSTATE:TMP1->fpr[0]
2887 | sw CARG1, CCSTATE:TMP1->gpr[2] // MIPS32 soft-float. 2836 | sdc1 FRET2, CCSTATE:TMP1->fpr[1]
2888 | sw CARG2, CCSTATE:TMP1->gpr[3] // Complex doubles are returned in v0, v1, a0, a1. 2837 |.else
2838 | sw CARG1, CCSTATE:TMP1->gpr[2] // Soft-float: complex double .im part.
2839 | sw CARG2, CCSTATE:TMP1->gpr[3]
2840 |.endif
2889 | move sp, r16 2841 | move sp, r16
2890 | jr ra 2842 | jr ra
2891 |. move r16, TMP2 2843 |. move r16, TMP2
@@ -2909,127 +2861,143 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2909 2861
2910 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 2862 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2911 | // RA = src1*8, RD = src2*8, JMP with RD = target 2863 | // RA = src1*8, RD = src2*8, JMP with RD = target
2912 | addu CARG2, BASE, RA 2864 |.macro bc_comp, FRA, FRD, RAHI, RALO, RDHI, RDLO, movop, fmovop, fcomp, sfcomp
2913 | addu CARG3, BASE, RD 2865 | addu RA, BASE, RA
2914 | lw TMP0, HI(CARG2) 2866 | addu RD, BASE, RD
2915 | lw TMP1, HI(CARG3) 2867 | lw RAHI, HI(RA)
2916 | sltiu TMP0, TMP0, LJ_TISNUM 2868 | lw RDHI, HI(RD)
2917 | sltiu TMP1, TMP1, LJ_TISNUM
2918 | lhu TMP2, OFS_RD(PC) 2869 | lhu TMP2, OFS_RD(PC)
2919 | and TMP0, TMP0, TMP1
2920 | addiu PC, PC, 4 2870 | addiu PC, PC, 4
2921 | beqz TMP0, ->vmeta_comp 2871 | bne RAHI, TISNUM, >2
2922 |. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) 2872 |. lw RALO, LO(RA)
2923 | load_double f0, CARG1, CARG2, 0(CARG2) 2873 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2874 | lw RDLO, LO(RD)
2875 | bne RDHI, TISNUM, >5
2876 |. decode_RD4b TMP2
2877 | slt AT, SFARG1LO, SFARG2LO
2878 | addu TMP2, TMP2, TMP3
2879 | movop TMP2, r0, AT
2880 |1:
2881 | addu PC, PC, TMP2
2882 | ins_next
2883 |
2884 |2: // RA is not an integer.
2885 | sltiu AT, RAHI, LJ_TISNUM
2886 | beqz AT, ->vmeta_comp
2887 |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2888 | sltiu AT, RDHI, LJ_TISNUM
2924 |.if FPU 2889 |.if FPU
2925 | ldc1 f2, 0(CARG3) 2890 | ldc1 FRA, 0(RA)
2891 | ldc1 FRD, 0(RD)
2926 |.else 2892 |.else
2927 | lw CARG4, 4(CARG3) 2893 | lw RDLO, LO(RD)
2928 | lw CARG3, 0(CARG3)
2929 |.endif 2894 |.endif
2930 | decode_RD4b TMP2 2895 | beqz AT, >4
2931 | addu TMP2, TMP2, TMP1 2896 |. decode_RD4b TMP2
2897 |3: // RA and RD are both numbers.
2932 |.if FPU 2898 |.if FPU
2933 if (op == BC_ISLT || op == BC_ISGE) { 2899 | fcomp f20, f22
2934 | c.olt.d f0, f2 2900 | addu TMP2, TMP2, TMP3
2935 } else { 2901 | b <1
2936 | c.ole.d f0, f2 2902 |. fmovop TMP2, r0
2937 }
2938 if (op == BC_ISLT || op == BC_ISLE) {
2939 | movf TMP2, r0
2940 } else {
2941 | movt TMP2, r0
2942 }
2943 |.else 2903 |.else
2944 | load_got __ledf2 2904 | bal sfcomp
2945 | sw RD, TEMP_SAVE_1 2905 |. addu TMP2, TMP2, TMP3
2946 | sw TMP1, TEMP_SAVE_2 2906 | b <1
2947 | call_extern //CRET1 = f0<=f2 2907 |. movop TMP2, r0, CRET1
2948 |. sw TMP2, TEMP_SAVE_3 2908 |.endif
2949 | lw TMP2, TEMP_SAVE_3 2909 |
2950 | lw TMP1, TEMP_SAVE_2 2910 |4: // RA is a number, RD is not a number.
2911 | bne RDHI, TISNUM, ->vmeta_comp
2912 | // RA is a number, RD is an integer. Convert RD to a number.
2913 |.if FPU
2914 |. lwc1 FRD, LO(RD)
2915 | b <3
2916 |. cvt.d.w FRD, FRD
2917 |.else
2918 |. nop
2919 |.if "RDHI" == "SFARG1HI"
2920 | bal ->vm_sfi2d_1
2921 |.else
2922 | bal ->vm_sfi2d_2
2923 |.endif
2924 |. nop
2925 | b <3
2926 |. nop
2927 |.endif
2928 |
2929 |5: // RA is an integer, RD is not an integer
2930 | sltiu AT, RDHI, LJ_TISNUM
2931 | beqz AT, ->vmeta_comp
2932 | // RA is an integer, RD is a number. Convert RA to a number.
2933 |.if FPU
2934 |. mtc1 RALO, FRA
2935 | ldc1 FRD, 0(RD)
2936 | b <3
2937 | cvt.d.w FRA, FRA
2938 |.else
2939 |. nop
2940 |.if "RAHI" == "SFARG1HI"
2941 | bal ->vm_sfi2d_1
2942 |.else
2943 | bal ->vm_sfi2d_2
2944 |.endif
2945 |. nop
2946 | b <3
2947 |. nop
2948 |.endif
2949 |.endmacro
2950 |
2951 if (op == BC_ISLT) { 2951 if (op == BC_ISLT) {
2952 | bltz CRET1, >1 2952 | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movz, movf, c.olt.d, ->vm_sfcmpolt
2953 } else if (op == BC_ISGE) {
2954 | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movn, movt, c.olt.d, ->vm_sfcmpolt
2953 } else if (op == BC_ISLE) { 2955 } else if (op == BC_ISLE) {
2954 | blez CRET1, >1 2956 | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movn, movt, c.ult.d, ->vm_sfcmpult
2955 } else if (op == BC_ISGT) { 2957 } else {
2956 | bgtz CRET1, >1 2958 | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movz, movf, c.ult.d, ->vm_sfcmpult
2957 } else {
2958 | bgez CRET1, >1
2959 } 2959 }
2960 |. lw RD, TEMP_SAVE_1
2961 | move TMP2, r0
2962 |1:
2963 |.endif
2964 | addu PC, PC, TMP2
2965 | ins_next
2966 break; 2960 break;
2967 2961
2968 case BC_ISEQV: case BC_ISNEV: 2962 case BC_ISEQV: case BC_ISNEV:
2969 vk = op == BC_ISEQV; 2963 vk = op == BC_ISEQV;
2970 | // RA = src1*8, RD = src2*8, JMP with RD = target 2964 | // RA = src1*8, RD = src2*8, JMP with RD = target
2971 | addu RA, BASE, RA 2965 | addu RA, BASE, RA
2972 | addiu PC, PC, 4 2966 | addiu PC, PC, 4
2973 | lw TMP0, HI(RA)
2974 | addu RD, BASE, RD 2967 | addu RD, BASE, RD
2968 | lw SFARG1HI, HI(RA)
2975 | lhu TMP2, -4+OFS_RD(PC) 2969 | lhu TMP2, -4+OFS_RD(PC)
2970 | lw SFARG2HI, HI(RD)
2976 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 2971 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2977 | lw TMP1, HI(RD) 2972 | sltu AT, TISNUM, SFARG1HI
2978 | decode_RD4b TMP2 2973 | sltu TMP0, TISNUM, SFARG2HI
2979 | sltiu AT, TMP0, LJ_TISNUM 2974 | or AT, AT, TMP0
2980 | sltiu CARG1, TMP1, LJ_TISNUM
2981 | load_double f2, CARG3, CARG4, 0(RD)
2982 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2983 | and AT, AT, CARG1
2984 | load_double f0, CARG1, CARG2, 0(RA)
2985 | beqz AT, >5
2986 |. addu TMP2, TMP2, TMP3
2987 |.if FPU
2988 | c.eq.d f0, f2
2989 if (vk) { 2975 if (vk) {
2990 | movf TMP2, r0 2976 | beqz AT, ->BC_ISEQN_Z
2991 } else { 2977 } else {
2992 | movt TMP2, r0 2978 | beqz AT, ->BC_ISNEN_Z
2993 } 2979 }
2994 |.else 2980 |. decode_RD4b TMP2
2995 | load_got __ledf2 2981 | // Either or both types are not numbers.
2996 | sw RD, TEMP_SAVE_1 2982 | lw SFARG1LO, LO(RA)
2997 | call_extern 2983 | lw SFARG2LO, LO(RD)
2998 |. sw TMP2, TEMP_SAVE_2 2984 | addu TMP2, TMP2, TMP3
2999 | lw RD, TEMP_SAVE_1
3000 | lw TMP2, TEMP_SAVE_2
3001 if (vk) {
3002 | beqz CRET1, >4
3003 |. nop
3004 } else {
3005 | bnez CRET1, >4
3006 |. nop
3007 }
3008 | move TMP2, r0
3009 |4:
3010 |.endif
3011 |1:
3012 | addu PC, PC, TMP2
3013 | ins_next
3014 |5: // Either or both types are not numbers.
3015 | lw CARG2, LO(RA)
3016 | lw CARG3, LO(RD)
3017 |.if FFI 2985 |.if FFI
3018 | li TMP3, LJ_TCDATA 2986 | li TMP3, LJ_TCDATA
3019 | beq TMP0, TMP3, ->vmeta_equal_cd 2987 | beq SFARG1HI, TMP3, ->vmeta_equal_cd
3020 |.endif 2988 |.endif
3021 |. sltiu AT, TMP0, LJ_TISPRI // Not a primitive? 2989 |. sltiu AT, SFARG1HI, LJ_TISPRI // Not a primitive?
3022 |.if FFI 2990 |.if FFI
3023 | beq TMP1, TMP3, ->vmeta_equal_cd 2991 | beq SFARG2HI, TMP3, ->vmeta_equal_cd
3024 |.endif 2992 |.endif
3025 |. xor TMP3, CARG2, CARG3 // Same tv? 2993 |. xor TMP3, SFARG1LO, SFARG2LO // Same tv?
3026 | xor TMP1, TMP1, TMP0 // Same type? 2994 | xor SFARG2HI, SFARG2HI, SFARG1HI // Same type?
3027 | sltiu CARG1, TMP0, LJ_TISTABUD+1 // Table or userdata? 2995 | sltiu TMP0, SFARG1HI, LJ_TISTABUD+1 // Table or userdata?
3028 | movz TMP3, r0, AT // Ignore tv if primitive. 2996 | movz TMP3, r0, AT // Ignore tv if primitive.
3029 | movn CARG1, r0, TMP1 // Tab/ud and same type? 2997 | movn TMP0, r0, SFARG2HI // Tab/ud and same type?
3030 | or AT, TMP1, TMP3 // Same type && (pri||same tv). 2998 | or AT, SFARG2HI, TMP3 // Same type && (pri||same tv).
3031 | movz CARG1, r0, AT 2999 | movz TMP0, r0, AT
3032 | beqz CARG1, <1 // Done if not tab/ud or not same type or same tv. 3000 | beqz TMP0, >1 // Done if not tab/ud or not same type or same tv.
3033 if (vk) { 3001 if (vk) {
3034 |. movn TMP2, r0, AT 3002 |. movn TMP2, r0, AT
3035 } else { 3003 } else {
@@ -3037,15 +3005,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3037 } 3005 }
3038 | // Different tables or userdatas. Need to check __eq metamethod. 3006 | // Different tables or userdatas. Need to check __eq metamethod.
3039 | // Field metatable must be at same offset for GCtab and GCudata! 3007 | // Field metatable must be at same offset for GCtab and GCudata!
3040 | lw TAB:TMP1, TAB:CARG2->metatable 3008 | lw TAB:TMP1, TAB:SFARG1LO->metatable
3041 | beqz TAB:TMP1, <1 // No metatable? 3009 | beqz TAB:TMP1, >1 // No metatable?
3042 |. nop 3010 |. nop
3043 | lbu TMP1, TAB:TMP1->nomm 3011 | lbu TMP1, TAB:TMP1->nomm
3044 | andi TMP1, TMP1, 1<<MM_eq 3012 | andi TMP1, TMP1, 1<<MM_eq
3045 | bnez TMP1, <1 // Or 'no __eq' flag set? 3013 | bnez TMP1, >1 // Or 'no __eq' flag set?
3046 |. nop 3014 |. nop
3047 | b ->vmeta_equal // Handle __eq metamethod. 3015 | b ->vmeta_equal // Handle __eq metamethod.
3048 |. li CARG4, 1-vk // ne = 0 or 1. 3016 |. li TMP0, 1-vk // ne = 0 or 1.
3017 |1:
3018 | addu PC, PC, TMP2
3019 | ins_next
3049 break; 3020 break;
3050 3021
3051 case BC_ISEQS: case BC_ISNES: 3022 case BC_ISEQS: case BC_ISNES:
@@ -3082,61 +3053,124 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3082 vk = op == BC_ISEQN; 3053 vk = op == BC_ISEQN;
3083 | // RA = src*8, RD = num_const*8, JMP with RD = target 3054 | // RA = src*8, RD = num_const*8, JMP with RD = target
3084 | addu RA, BASE, RA 3055 | addu RA, BASE, RA
3085 | addiu PC, PC, 4 3056 | addu RD, KBASE, RD
3086 | lw TMP0, HI(RA) 3057 | lw SFARG1HI, HI(RA)
3087 | load_double f0, CARG1, CARG2, 0(RA) 3058 | lw SFARG2HI, HI(RD)
3088 | addu RD, KBASE, RD 3059 | lhu TMP2, OFS_RD(PC)
3089 | lhu TMP2, -4+OFS_RD(PC) 3060 | addiu PC, PC, 4
3090 | load_double f2, CARG3, CARG4, 0(RD)
3091 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3061 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3092 | sltiu AT, TMP0, LJ_TISNUM
3093 | decode_RD4b TMP2 3062 | decode_RD4b TMP2
3063 if (vk) {
3064 |->BC_ISEQN_Z:
3065 } else {
3066 |->BC_ISNEN_Z:
3067 }
3068 | bne SFARG1HI, TISNUM, >3
3069 |. lw SFARG1LO, LO(RA)
3070 | lw SFARG2LO, LO(RD)
3071 | addu TMP2, TMP2, TMP3
3072 | bne SFARG2HI, TISNUM, >6
3073 |. xor AT, SFARG1LO, SFARG2LO
3074 if (vk) {
3075 | movn TMP2, r0, AT
3076 |1:
3077 | addu PC, PC, TMP2
3078 |2:
3079 } else {
3080 | movz TMP2, r0, AT
3081 |1:
3082 |2:
3083 | addu PC, PC, TMP2
3084 }
3085 | ins_next
3086 |
3087 |3: // RA is not an integer.
3088 | sltiu AT, SFARG1HI, LJ_TISNUM
3094 |.if FFI 3089 |.if FFI
3095 | beqz AT, >5 3090 | beqz AT, >8
3096 |.else 3091 |.else
3097 | beqz AT, >1 3092 | beqz AT, <2
3098 |.endif 3093 |.endif
3099 |. addu TMP2, TMP2, TMP3 3094 |. addu TMP2, TMP2, TMP3
3095 | sltiu AT, SFARG2HI, LJ_TISNUM
3096 |.if FPU
3097 | ldc1 f20, 0(RA)
3098 | ldc1 f22, 0(RD)
3099 |.endif
3100 | beqz AT, >5
3101 |. lw SFARG2LO, LO(RD)
3102 |4: // RA and RD are both numbers.
3100 |.if FPU 3103 |.if FPU
3101 | c.eq.d f0, f2 3104 | c.eq.d f20, f22
3105 | b <1
3102 if (vk) { 3106 if (vk) {
3103 | movf TMP2, r0 3107 |. movf TMP2, r0
3104 | addu PC, PC, TMP2
3105 |1:
3106 } else { 3108 } else {
3107 | movt TMP2, r0 3109 |. movt TMP2, r0
3108 |1:
3109 | addu PC, PC, TMP2
3110 } 3110 }
3111 |.else 3111 |.else
3112 | load_got __ledf2 3112 | bal ->vm_sfcmpeq
3113 | sw RD, TEMP_SAVE_1 3113 |. nop
3114 | call_extern 3114 | b <1
3115 |. sw TMP2, TEMP_SAVE_2
3116 | lw RD, TEMP_SAVE_1
3117 | lw TMP2, TEMP_SAVE_2
3118 if (vk) { 3115 if (vk) {
3119 | beqz CRET1, >4 3116 |. movz TMP2, r0, CRET1
3120 |. nop
3121 | move TMP2, r0
3122 |4:
3123 | addu PC, PC, TMP2
3124 |1:
3125 } else { 3117 } else {
3126 | bnez CRET1, >1 3118 |. movn TMP2, r0, CRET1
3127 |. nop
3128 | move TMP2, r0
3129 |1:
3130 | addu PC, PC, TMP2
3131 } 3119 }
3132 |.endif 3120 |.endif
3133 | ins_next 3121 |
3122 |5: // RA is a number, RD is not a number.
3134 |.if FFI 3123 |.if FFI
3135 |5: 3124 | bne SFARG2HI, TISNUM, >9
3125 |.else
3126 | bne SFARG2HI, TISNUM, <2
3127 |.endif
3128 | // RA is a number, RD is an integer. Convert RD to a number.
3129 |.if FPU
3130 |. lwc1 f22, LO(RD)
3131 | b <4
3132 |. cvt.d.w f22, f22
3133 |.else
3134 |. nop
3135 | bal ->vm_sfi2d_2
3136 |. nop
3137 | b <4
3138 |. nop
3139 |.endif
3140 |
3141 |6: // RA is an integer, RD is not an integer
3142 | sltiu AT, SFARG2HI, LJ_TISNUM
3143 |.if FFI
3144 | beqz AT, >9
3145 |.else
3146 | beqz AT, <2
3147 |.endif
3148 | // RA is an integer, RD is a number. Convert RA to a number.
3149 |.if FPU
3150 |. mtc1 SFARG1LO, f20
3151 | ldc1 f22, 0(RD)
3152 | b <4
3153 | cvt.d.w f20, f20
3154 |.else
3155 |. nop
3156 | bal ->vm_sfi2d_1
3157 |. nop
3158 | b <4
3159 |. nop
3160 |.endif
3161 |
3162 |.if FFI
3163 |8:
3136 | li AT, LJ_TCDATA 3164 | li AT, LJ_TCDATA
3137 | beq TMP0, AT, ->vmeta_equal_cd 3165 | bne SFARG1HI, AT, <2
3138 |. nop 3166 |. nop
3139 | b <1 3167 | b ->vmeta_equal_cd
3168 |. nop
3169 |9:
3170 | li AT, LJ_TCDATA
3171 | bne SFARG2HI, AT, <2
3172 |. nop
3173 | b ->vmeta_equal_cd
3140 |. nop 3174 |. nop
3141 |.endif 3175 |.endif
3142 break; 3176 break;
@@ -3188,7 +3222,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3188 | addu PC, PC, TMP2 3222 | addu PC, PC, TMP2
3189 } else { 3223 } else {
3190 | sltiu TMP0, TMP0, LJ_TISTRUECOND 3224 | sltiu TMP0, TMP0, LJ_TISTRUECOND
3191 | load_double1 0(RD) 3225 | lw SFRETHI, HI(RD)
3226 | lw SFRETLO, LO(RD)
3192 if (op == BC_ISTC) { 3227 if (op == BC_ISTC) {
3193 | beqz TMP0, >1 3228 | beqz TMP0, >1
3194 } else { 3229 } else {
@@ -3198,7 +3233,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3198 | decode_RD4b TMP2 3233 | decode_RD4b TMP2
3199 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3234 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3200 | addu TMP2, TMP2, TMP3 3235 | addu TMP2, TMP2, TMP3
3201 | store_double1 0(RA) 3236 | sw SFRETHI, HI(RA)
3237 | sw SFRETLO, LO(RA)
3202 | addu PC, PC, TMP2 3238 | addu PC, PC, TMP2
3203 |1: 3239 |1:
3204 } 3240 }
@@ -3230,10 +3266,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3230 case BC_MOV: 3266 case BC_MOV:
3231 | // RA = dst*8, RD = src*8 3267 | // RA = dst*8, RD = src*8
3232 | addu RD, BASE, RD 3268 | addu RD, BASE, RD
3233 | addu RA, BASE, RA 3269 | addu RA, BASE, RA
3234 | load_double1 0(RD) 3270 | lw SFRETHI, HI(RD)
3271 | lw SFRETLO, LO(RD)
3235 | ins_next1 3272 | ins_next1
3236 | store_double1 0(RA) 3273 | sw SFRETHI, HI(RA)
3274 | sw SFRETLO, LO(RA)
3237 | ins_next2 3275 | ins_next2
3238 break; 3276 break;
3239 case BC_NOT: 3277 case BC_NOT:
@@ -3250,23 +3288,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3250 break; 3288 break;
3251 case BC_UNM: 3289 case BC_UNM:
3252 | // RA = dst*8, RD = src*8 3290 | // RA = dst*8, RD = src*8
3253 | addu CARG3, BASE, RD 3291 | addu RB, BASE, RD
3292 | lw SFARG1HI, HI(RB)
3254 | addu RA, BASE, RA 3293 | addu RA, BASE, RA
3255 | lw TMP0, HI(CARG3) 3294 | bne SFARG1HI, TISNUM, >2
3256 | sltiu AT, TMP0, LJ_TISNUM 3295 |. lw SFARG1LO, LO(RB)
3257 | load_double f0, CARG1, CARG2, 0(CARG3) 3296 | lui TMP1, 0x8000
3258 |.if FPU 3297 | beq SFARG1LO, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
3259 | beqz AT, ->vmeta_unm 3298 |. negu SFARG1LO, SFARG1LO
3260 |. neg.d f0, f0 3299 |1:
3261 |.else
3262 | lui TMP1, 0x8000
3263 | xor CRET1, TMP1, CARG1
3264 | beqz AT, ->vmeta_unm
3265 |. move CRET2, CARG2
3266 |.endif
3267 | ins_next1 3300 | ins_next1
3268 | store_double f0, CRET1, CRET2, 0(RA) 3301 | sw SFARG1HI, HI(RA)
3302 | sw SFARG1LO, LO(RA)
3269 | ins_next2 3303 | ins_next2
3304 |2:
3305 | sltiu AT, SFARG1HI, LJ_TISNUM
3306 | beqz AT, ->vmeta_unm
3307 |. lui TMP1, 0x8000
3308 | b <1
3309 |. xor SFARG1HI, SFARG1HI, TMP1
3270 break; 3310 break;
3271 case BC_LEN: 3311 case BC_LEN:
3272 | // RA = dst*8, RD = src*8 3312 | // RA = dst*8, RD = src*8
@@ -3277,16 +3317,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3277 | li AT, LJ_TSTR 3317 | li AT, LJ_TSTR
3278 | bne TMP0, AT, >2 3318 | bne TMP0, AT, >2
3279 |. li AT, LJ_TTAB 3319 |. li AT, LJ_TTAB
3280 | lw CRET1, STR:CARG1->len 3320 | lw CRET1, STR:CARG1->len
3281 |1: 3321 |1:
3282 |.if FPU
3283 | mtc1 CRET1, f0
3284 | cvt.d.w f0, f0
3285 |.else
3286 | cvti2d CRET1
3287 |.endif
3288 | ins_next1 3322 | ins_next1
3289 | store_double f0, CRET1, CRET2, 0(RA) 3323 | sw TISNUM, HI(RA)
3324 | sw CRET1, LO(RA)
3290 | ins_next2 3325 | ins_next2
3291 |2: 3326 |2:
3292 | bne TMP0, AT, ->vmeta_len 3327 | bne TMP0, AT, ->vmeta_len
@@ -3317,178 +3352,231 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3317 3352
3318 /* -- Binary ops -------------------------------------------------------- */ 3353 /* -- Binary ops -------------------------------------------------------- */
3319 3354
3320 |.macro ins_arithpre 3355 |.macro fpmod, a, b, c
3356 | bal ->vm_floor // floor(b/c)
3357 |. div.d FARG1, b, c
3358 | mul.d a, FRET1, c
3359 | sub.d a, b, a // b - floor(b/c)*c
3360 |.endmacro
3361
3362 |.macro sfpmod
3363 | addiu sp, sp, -16
3364 |
3365 | load_got __divdf3
3366 | sw SFARG1HI, HI(sp)
3367 | sw SFARG1LO, LO(sp)
3368 | sw SFARG2HI, 8+HI(sp)
3369 | call_extern
3370 |. sw SFARG2LO, 8+LO(sp)
3371 |
3372 | load_got floor
3373 | move SFARG1HI, SFRETHI
3374 | call_extern
3375 |. move SFARG1LO, SFRETLO
3376 |
3377 | load_got __muldf3
3378 | move SFARG1HI, SFRETHI
3379 | move SFARG1LO, SFRETLO
3380 | lw SFARG2HI, 8+HI(sp)
3381 | call_extern
3382 |. lw SFARG2LO, 8+LO(sp)
3383 |
3384 | load_got __subdf3
3385 | lw SFARG1HI, HI(sp)
3386 | lw SFARG1LO, LO(sp)
3387 | move SFARG2HI, SFRETHI
3388 | call_extern
3389 |. move SFARG2LO, SFRETLO
3390 |
3391 | addiu sp, sp, 16
3392 |.endmacro
3393
3394 |.macro ins_arithpre, label
3321 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3395 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3322 | decode_RB8a RB, INS
3323 | decode_RB8b RB
3324 | decode_RDtoRC8 RC, RD
3325 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 3396 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
3326 ||switch (vk) { 3397 ||switch (vk) {
3327 ||case 0: 3398 ||case 0:
3328 | addu CARG3, BASE, RB 3399 | decode_RB8a RB, INS
3329 | addu CARG4, KBASE, RC 3400 | decode_RB8b RB
3330 | lw TMP1, HI(CARG3) 3401 | decode_RDtoRC8 RC, RD
3331 | sltiu AT, TMP1, LJ_TISNUM 3402 | // RA = dst*8, RB = src1*8, RC = num_const*8
3332 | load_double f20, CARG1, CARG2, 0(CARG3) 3403 | addu RB, BASE, RB
3333 | load_double f22, CARG3, CARG4, 0(CARG4) 3404 |.if "label" ~= "none"
3334 |.if FPU 3405 | b label
3335 | beqz AT, ->vmeta_arith
3336 |.else
3337 | beqz AT, ->vmeta_arith_vn
3338 |.endif 3406 |.endif
3339 |. addu RA, BASE, RA 3407 |. addu RC, KBASE, RC
3340 || break; 3408 || break;
3341 ||case 1: 3409 ||case 1:
3342 | addu CARG4, BASE, RB 3410 | decode_RB8a RC, INS
3343 | addu CARG3, KBASE, RC 3411 | decode_RB8b RC
3344 | lw TMP1, HI(CARG4) 3412 | decode_RDtoRC8 RB, RD
3345 | sltiu AT, TMP1, LJ_TISNUM 3413 | // RA = dst*8, RB = num_const*8, RC = src1*8
3346 | load_double f20, CARG1, CARG2, 0(CARG3) 3414 | addu RC, BASE, RC
3347 | load_double f22, CARG3, CARG4, 0(CARG4) 3415 |.if "label" ~= "none"
3348 |.if FPU 3416 | b label
3349 | beqz AT, ->vmeta_arith
3350 |.else
3351 | beqz AT, ->vmeta_arith_nv
3352 |.endif 3417 |.endif
3353 |. addu RA, BASE, RA 3418 |. addu RB, KBASE, RB
3354 || break; 3419 || break;
3355 ||default: 3420 ||default:
3356 | addu CARG3, BASE, RB 3421 | decode_RB8a RB, INS
3357 | addu CARG4, BASE, RC 3422 | decode_RB8b RB
3358 | lw TMP1, HI(CARG3) 3423 | decode_RDtoRC8 RC, RD
3359 | lw TMP2, HI(CARG4) 3424 | // RA = dst*8, RB = src1*8, RC = src2*8
3360 | sltiu AT, TMP1, LJ_TISNUM 3425 | addu RB, BASE, RB
3361 | sltiu TMP0, TMP2, LJ_TISNUM 3426 |.if "label" ~= "none"
3362 | and AT, AT, TMP0 3427 | b label
3363 | load_double f20, CARG1, CARG2, 0(CARG3)
3364 | load_double f22, CARG3, CARG4, 0(CARG4)
3365 |.if FPU
3366 | beqz AT, ->vmeta_arith
3367 |.else
3368 | beqz AT, ->vmeta_arith_vv
3369 |.endif 3428 |.endif
3370 |. addu RA, BASE, RA 3429 |. addu RC, BASE, RC
3371 || break; 3430 || break;
3372 ||} 3431 ||}
3373 |.endmacro 3432 |.endmacro
3374 | 3433 |
3375 |.macro ins_arithfallback 3434 |.macro ins_arith, intins, fpins, fpcall, label
3376 ||switch (vk) { 3435 | ins_arithpre none
3377 ||case 0:
3378 | b ->vmeta_arith_vn
3379 |. nop
3380 || break;
3381 ||case 1:
3382 | b ->vmeta_arith_nv
3383 |. nop
3384 || break;
3385 ||default:
3386 | b ->vmeta_arith_vv
3387 |. nop
3388 || break;
3389 ||}
3390 |.endmacro
3391 | 3436 |
3392 |.if FPU 3437 |.if "label" ~= "none"
3393 |.macro fpmod, a, b, c 3438 |label:
3394 |->BC_MODVN_Z: 3439 |.endif
3395 | bal ->vm_floor // floor(b/c)
3396 |. div.d FARG1, b, c
3397 | mul.d a, FRET1, c
3398 | sub.d a, b, a // b - floor(b/c)*c
3399 |.endmacro
3400 |.else
3401 | 3440 |
3402 |.macro sfpmod 3441 | lw SFARG1HI, HI(RB)
3403 |->BC_MODVN_Z: 3442 | lw SFARG2HI, HI(RC)
3404 | load_got __divdf3 3443 |
3405 | sw CARG1, TEMP_SAVE_1 3444 |.if "intins" ~= "div"
3406 | sw CARG2, TEMP_SAVE_2 3445 |
3407 | sw CARG3, TEMP_SAVE_3 3446 | // Check for two integers.
3408 | call_extern 3447 | lw SFARG1LO, LO(RB)
3409 |. sw CARG4, TEMP_SAVE_4 3448 | bne SFARG1HI, TISNUM, >5
3410 | move CARG1, CRET1 3449 |. lw SFARG2LO, LO(RC)
3411 | bal ->vm_floor 3450 | bne SFARG2HI, TISNUM, >5
3412 |. move CARG2, CRET2 3451 |
3413 | load_got __muldf3 3452 |.if "intins" == "addu"
3414 | move CARG1, CRET1 3453 |. intins CRET1, SFARG1LO, SFARG2LO
3415 | move CARG2, CRET2 3454 | xor TMP1, CRET1, SFARG1LO // ((y^a) & (y^b)) < 0: overflow.
3416 | lw CARG3, TEMP_SAVE_3 3455 | xor TMP2, CRET1, SFARG2LO
3417 | call_extern 3456 | and TMP1, TMP1, TMP2
3418 |. lw CARG4, TEMP_SAVE_4 3457 | bltz TMP1, ->vmeta_arith
3419 | load_got __subdf3 3458 |. addu RA, BASE, RA
3420 | lw CARG1, TEMP_SAVE_1 3459 |.elif "intins" == "subu"
3421 | lw CARG2, TEMP_SAVE_2 3460 |. intins CRET1, SFARG1LO, SFARG2LO
3422 | move CARG3, CRET1 3461 | xor TMP1, CRET1, SFARG1LO // ((y^a) & (a^b)) < 0: overflow.
3462 | xor TMP2, SFARG1LO, SFARG2LO
3463 | and TMP1, TMP1, TMP2
3464 | bltz TMP1, ->vmeta_arith
3465 |. addu RA, BASE, RA
3466 |.elif "intins" == "mult"
3467 |. intins SFARG1LO, SFARG2LO
3468 | mflo CRET1
3469 | mfhi TMP2
3470 | sra TMP1, CRET1, 31
3471 | bne TMP1, TMP2, ->vmeta_arith
3472 |. addu RA, BASE, RA
3473 |.else
3474 |. load_got lj_vm_modi
3475 | beqz SFARG2LO, ->vmeta_arith
3476 |. addu RA, BASE, RA
3477 |.if ENDIAN_BE
3478 | move CARG1, SFARG1LO
3479 |.endif
3423 | call_extern 3480 | call_extern
3424 |. move CARG4, CRET2 3481 |. move CARG2, SFARG2LO
3425 |.endmacro
3426 |.endif 3482 |.endif
3427 | 3483 |
3428 |.macro ins_arith, intins, fpins, fpcall 3484 | ins_next1
3429 | ins_arithpre 3485 | sw TISNUM, HI(RA)
3430 |.if "fpins" == "fpmod_" 3486 | sw CRET1, LO(RA)
3431 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3487 |3:
3432 |. nop 3488 | ins_next2
3433 |.else 3489 |
3490 |.elif not FPU
3491 |
3492 | lw SFARG1LO, LO(RB)
3493 | lw SFARG2LO, LO(RC)
3494 |
3495 |.endif
3496 |
3497 |5: // Check for two numbers.
3498 | .FPU ldc1 f20, 0(RB)
3499 | sltiu AT, SFARG1HI, LJ_TISNUM
3500 | sltiu TMP0, SFARG2HI, LJ_TISNUM
3501 | .FPU ldc1 f22, 0(RC)
3502 | and AT, AT, TMP0
3503 | beqz AT, ->vmeta_arith
3504 |. addu RA, BASE, RA
3505 |
3434 |.if FPU 3506 |.if FPU
3435 | fpins f0, f20, f22 3507 | fpins FRET1, f20, f22
3436 |.else 3508 |.elif "fpcall" == "sfpmod"
3437 |.if "fpcall" == "sfpmod"
3438 | sfpmod 3509 | sfpmod
3439 |.else 3510 |.else
3440 | load_got fpcall 3511 | load_got fpcall
3441 | call_extern 3512 | call_extern
3442 |. nop 3513 |. nop
3443 |.endif 3514 |.endif
3444 |.endif 3515 |
3445 | ins_next1 3516 | ins_next1
3446 | store_double1 0(RA) 3517 |.if not FPU
3518 | sw SFRETHI, HI(RA)
3519 |.endif
3520 |.if "intins" ~= "div"
3521 | b <3
3522 |.endif
3523 |.if FPU
3524 |. sdc1 FRET1, 0(RA)
3525 |.else
3526 |. sw SFRETLO, LO(RA)
3527 |.endif
3528 |.if "intins" == "div"
3447 | ins_next2 3529 | ins_next2
3448 |.endif 3530 |.endif
3531 |
3449 |.endmacro 3532 |.endmacro
3450 3533
3451 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3534 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3452 | ins_arith addu, add.d, __adddf3 3535 | ins_arith addu, add.d, __adddf3, none
3453 break; 3536 break;
3454 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3537 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3455 | ins_arith subu, sub.d, __subdf3 3538 | ins_arith subu, sub.d, __subdf3, none
3456 break; 3539 break;
3457 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3540 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3458 | ins_arith mult, mul.d, __muldf3 3541 | ins_arith mult, mul.d, __muldf3, none
3542 break;
3543 case BC_DIVVN:
3544 | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z
3459 break; 3545 break;
3460 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3546 case BC_DIVNV: case BC_DIVVV:
3461 | ins_arith div, div.d, __divdf3 3547 | ins_arithpre ->BC_DIVVN_Z
3462 break; 3548 break;
3463 case BC_MODVN: 3549 case BC_MODVN:
3464 | ins_arith modi, fpmod, sfpmod 3550 | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z
3551 break;
3465 case BC_MODNV: case BC_MODVV: 3552 case BC_MODNV: case BC_MODVV:
3466 | ins_arith modi, fpmod_, sfpmod 3553 | ins_arithpre ->BC_MODVN_Z
3467 break; 3554 break;
3468 case BC_POW: 3555 case BC_POW:
3469 | decode_RB8a RB, INS 3556 | ins_arithpre none
3470 | decode_RB8b RB 3557 | lw SFARG1HI, HI(RB)
3471 | decode_RDtoRC8 RC, RD 3558 | lw SFARG2HI, HI(RC)
3472 | addu CARG3, BASE, RB 3559 | sltiu AT, SFARG1HI, LJ_TISNUM
3473 | addu CARG4, BASE, RC 3560 | sltiu TMP0, SFARG2HI, LJ_TISNUM
3474 | lw TMP1, HI(CARG3)
3475 | lw TMP2, HI(CARG4)
3476 | sltiu AT, TMP1, LJ_TISNUM
3477 | sltiu TMP0, TMP2, LJ_TISNUM
3478 | and AT, AT, TMP0 3561 | and AT, AT, TMP0
3479 | load_got pow 3562 | load_got pow
3480 | beqz AT, ->vmeta_arith 3563 | beqz AT, ->vmeta_arith
3481 |. addu RA, BASE, RA 3564 |. addu RA, BASE, RA
3482 | load_farg1 0(CARG3) 3565 |.if FPU
3483 | load_farg2 0(CARG4) 3566 | ldc1 FARG1, 0(RB)
3567 | ldc1 FARG2, 0(RC)
3568 |.else
3569 | lw SFARG1LO, LO(RB)
3570 | lw SFARG2LO, LO(RC)
3571 |.endif
3484 | call_extern 3572 | call_extern
3485 |. nop 3573 |. nop
3486 | ins_next1 3574 | ins_next1
3487 |.if HFABI 3575 |.if FPU
3488 | sdc1 FRET1, 0(RA) 3576 | sdc1 FRET1, 0(RA)
3489 |.else 3577 |.else
3490 | sw CRET1, 0(RA) 3578 | sw SFRETHI, HI(RA)
3491 | sw CRET2, 4(RA) 3579 | sw SFRETLO, LO(RA)
3492 |.endif 3580 |.endif
3493 | ins_next2 3581 | ins_next2
3494 break; 3582 break;
@@ -3512,10 +3600,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3512 | bnez CRET1, ->vmeta_binop 3600 | bnez CRET1, ->vmeta_binop
3513 |. lw BASE, L->base 3601 |. lw BASE, L->base
3514 | addu RB, BASE, MULTRES 3602 | addu RB, BASE, MULTRES
3515 | load_double1 0(RB) 3603 | lw SFRETHI, HI(RB)
3604 | lw SFRETLO, LO(RB)
3516 | addu RA, BASE, RA 3605 | addu RA, BASE, RA
3517 | ins_next1 3606 | ins_next1
3518 | store_double1 0(RA) 3607 | sw SFRETHI, HI(RA)
3608 | sw SFRETLO, LO(RA)
3519 | ins_next2 3609 | ins_next2
3520 break; 3610 break;
3521 3611
@@ -3551,23 +3641,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3551 | // RA = dst*8, RD = int16_literal*8 3641 | // RA = dst*8, RD = int16_literal*8
3552 | sra RD, INS, 16 3642 | sra RD, INS, 16
3553 | addu RA, BASE, RA 3643 | addu RA, BASE, RA
3554 |.if FPU
3555 | mtc1 RD, f0
3556 | cvt.d.w f0, f0
3557 |.else
3558 | cvti2d RD
3559 |.endif
3560 | ins_next1 3644 | ins_next1
3561 | store_double f0, CRET1, CRET2, 0(RA) 3645 | sw TISNUM, HI(RA)
3646 | sw RD, LO(RA)
3562 | ins_next2 3647 | ins_next2
3563 break; 3648 break;
3564 case BC_KNUM: 3649 case BC_KNUM:
3565 | // RA = dst*8, RD = num_const*8 3650 | // RA = dst*8, RD = num_const*8
3566 | addu RD, KBASE, RD 3651 | addu RD, KBASE, RD
3567 | addu RA, BASE, RA 3652 | addu RA, BASE, RA
3568 | load_double1 0(RD) 3653 | lw SFRETHI, HI(RD)
3654 | lw SFRETLO, LO(RD)
3569 | ins_next1 3655 | ins_next1
3570 | store_double1 0(RA) 3656 | sw SFRETHI, HI(RA)
3657 | sw SFRETLO, LO(RA)
3571 | ins_next2 3658 | ins_next2
3572 break; 3659 break;
3573 case BC_KPRI: 3660 case BC_KPRI:
@@ -3603,9 +3690,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3603 | lw UPVAL:RB, LFUNC:RD->uvptr 3690 | lw UPVAL:RB, LFUNC:RD->uvptr
3604 | ins_next1 3691 | ins_next1
3605 | lw TMP1, UPVAL:RB->v 3692 | lw TMP1, UPVAL:RB->v
3606 | load_double1 0(TMP1) 3693 | lw SFRETHI, HI(TMP1)
3694 | lw SFRETLO, LO(TMP1)
3607 | addu RA, BASE, RA 3695 | addu RA, BASE, RA
3608 | store_double1 0(RA) 3696 | sw SFRETHI, HI(RA)
3697 | sw SFRETLO, LO(RA)
3609 | ins_next2 3698 | ins_next2
3610 break; 3699 break;
3611 case BC_USETV: 3700 case BC_USETV:
@@ -3614,26 +3703,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3614 | srl RA, RA, 1 3703 | srl RA, RA, 1
3615 | addu RD, BASE, RD 3704 | addu RD, BASE, RD
3616 | addu RA, RA, LFUNC:RB 3705 | addu RA, RA, LFUNC:RB
3617 | load_double1 0(RD)
3618 | lw UPVAL:RB, LFUNC:RA->uvptr 3706 | lw UPVAL:RB, LFUNC:RA->uvptr
3707 | lw SFRETHI, HI(RD)
3708 | lw SFRETLO, LO(RD)
3619 | lbu TMP3, UPVAL:RB->marked 3709 | lbu TMP3, UPVAL:RB->marked
3620 | lw CARG2, UPVAL:RB->v 3710 | lw CARG2, UPVAL:RB->v
3621 | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 3711 | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
3622 | lbu TMP0, UPVAL:RB->closed 3712 | lbu TMP0, UPVAL:RB->closed
3623 | lw TMP2, HI(RD) 3713 | sw SFRETHI, HI(CARG2)
3624 | store_double1 0(CARG2) 3714 | sw SFRETLO, LO(CARG2)
3625 | li AT, LJ_GC_BLACK|1 3715 | li AT, LJ_GC_BLACK|1
3626 | or TMP3, TMP3, TMP0 3716 | or TMP3, TMP3, TMP0
3627 | beq TMP3, AT, >2 // Upvalue is closed and black? 3717 | beq TMP3, AT, >2 // Upvalue is closed and black?
3628 |. addiu TMP2, TMP2, -(LJ_TNUMX+1) 3718 |. addiu TMP2, SFRETHI, -(LJ_TNUMX+1)
3629 |1: 3719 |1:
3630 | ins_next 3720 | ins_next
3631 | 3721 |
3632 |2: // Check if new value is collectable. 3722 |2: // Check if new value is collectable.
3633 | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) 3723 | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
3634 | beqz AT, <1 // tvisgcv(v) 3724 | beqz AT, <1 // tvisgcv(v)
3635 |. lw TMP1, LO(RD) 3725 |. nop
3636 | lbu TMP3, GCOBJ:TMP1->gch.marked 3726 | lbu TMP3, GCOBJ:SFRETLO->gch.marked
3637 | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) 3727 | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
3638 | beqz TMP3, <1 3728 | beqz TMP3, <1
3639 |. load_got lj_gc_barrieruv 3729 |. load_got lj_gc_barrieruv
@@ -3681,11 +3771,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3681 | srl RA, RA, 1 3771 | srl RA, RA, 1
3682 | addu RD, KBASE, RD 3772 | addu RD, KBASE, RD
3683 | addu RA, RA, LFUNC:RB 3773 | addu RA, RA, LFUNC:RB
3684 | load_double1 0(RD) 3774 | lw UPVAL:RB, LFUNC:RA->uvptr
3685 | lw UPVAL:RB, LFUNC:RA->uvptr 3775 | lw SFRETHI, HI(RD)
3776 | lw SFRETLO, LO(RD)
3777 | lw TMP1, UPVAL:RB->v
3686 | ins_next1 3778 | ins_next1
3687 | lw TMP1, UPVAL:RB->v 3779 | sw SFRETHI, HI(TMP1)
3688 | store_double1 0(TMP1) 3780 | sw SFRETLO, LO(TMP1)
3689 | ins_next2 3781 | ins_next2
3690 break; 3782 break;
3691 case BC_USETP: 3783 case BC_USETP:
@@ -3695,10 +3787,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3695 | srl TMP0, RD, 3 3787 | srl TMP0, RD, 3
3696 | addu RA, RA, LFUNC:RB 3788 | addu RA, RA, LFUNC:RB
3697 | not TMP0, TMP0 3789 | not TMP0, TMP0
3698 | lw UPVAL:RB, LFUNC:RA->uvptr 3790 | lw UPVAL:RB, LFUNC:RA->uvptr
3699 | ins_next1 3791 | ins_next1
3700 | lw TMP1, UPVAL:RB->v 3792 | lw TMP1, UPVAL:RB->v
3701 | sw TMP0, HI(TMP1) 3793 | sw TMP0, HI(TMP1)
3702 | ins_next2 3794 | ins_next2
3703 break; 3795 break;
3704 3796
@@ -3734,8 +3826,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3734 | li TMP0, LJ_TFUNC 3826 | li TMP0, LJ_TFUNC
3735 | ins_next1 3827 | ins_next1
3736 | addu RA, BASE, RA 3828 | addu RA, BASE, RA
3737 | sw TMP0, HI(RA)
3738 | sw LFUNC:CRET1, LO(RA) 3829 | sw LFUNC:CRET1, LO(RA)
3830 | sw TMP0, HI(RA)
3739 | ins_next2 3831 | ins_next2
3740 break; 3832 break;
3741 3833
@@ -3818,71 +3910,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3818 | li AT, LJ_TTAB 3910 | li AT, LJ_TTAB
3819 | bne TMP1, AT, ->vmeta_tgetv 3911 | bne TMP1, AT, ->vmeta_tgetv
3820 |. addu RA, BASE, RA 3912 |. addu RA, BASE, RA
3821 | sltiu AT, TMP2, LJ_TISNUM 3913 | bne TMP2, TISNUM, >5
3822 | beqz AT, >5 3914 |. lw RC, LO(CARG3)
3823 |. li AT, LJ_TSTR 3915 | lw TMP0, TAB:RB->asize
3824 |.if FPU
3825 | ldc1 f0, 0(CARG3)
3826 | // Convert number key to integer, check for integerness and range.
3827 | cvt.w.d f2, f0
3828 | lw TMP0, TAB:RB->asize
3829 | mfc1 TMP2, f2
3830 | cvt.d.w f4, f2
3831 | lw TMP1, TAB:RB->array
3832 | c.eq.d f0, f4
3833 | sltu AT, TMP2, TMP0
3834 | movf AT, r0
3835 | sll TMP2, TMP2, 3
3836 | beqz AT, ->vmeta_tgetv // Integer key and in array part?
3837 |. addu TMP2, TMP1, TMP2
3838 | lw TMP0, HI(TMP2)
3839 | beq TMP0, TISNIL, >2
3840 |. ldc1 f0, 0(TMP2)
3841 |.else
3842 | sw RB, TEMP_SAVE_1
3843 | sw CARG2, TEMP_SAVE_3
3844 | load_got __fixdfsi
3845 | lw CARG1, 0(CARG3)
3846 | lw CARG2, 4(CARG3)
3847 | call_extern // cvt.w.d f2, f0
3848 |. sw RC, TEMP_SAVE_2
3849 | sw CRET1, TEMP_SAVE_4
3850 | cvti2d CRET1 // cvt.d.w f4, f2
3851 | load_got __ledf2
3852 | lw RC, TEMP_SAVE_2
3853 | addu CARG3, BASE, RC
3854 | lw CARG1, 0(CARG3)
3855 | lw CARG2, 4(CARG3)
3856 | move CARG3, CRET1
3857 | move CARG4, CRET2
3858 | call_extern // c.eq.d f0, f4
3859 |. nop
3860 | lw CARG3, TEMP_SAVE_3
3861 | lw RC, TEMP_SAVE_2
3862 | lw RB, TEMP_SAVE_1
3863 | lw TMP0, TAB:RB->asize
3864 | lw TMP1, TAB:RB->array 3916 | lw TMP1, TAB:RB->array
3865 | lw TMP2, TEMP_SAVE_4 3917 | sltu AT, RC, TMP0
3866 | lw CARG2, TEMP_SAVE_3 // Restore old CARG2 and CARG3. 3918 | sll TMP2, RC, 3
3867 | addu CARG3, BASE, RC
3868 | bnez CRET1, >3
3869 |. sltu AT, TMP2, TMP0
3870 | b >4
3871 |. nop
3872 |3:
3873 | move AT, r0
3874 |4:
3875 | sll TMP2, TMP2, 3
3876 | beqz AT, ->vmeta_tgetv // Integer key and in array part? 3919 | beqz AT, ->vmeta_tgetv // Integer key and in array part?
3877 |. addu TMP2, TMP1, TMP2 3920 |. addu TMP2, TMP1, TMP2
3878 | lw TMP0, HI(TMP2) 3921 | lw SFRETHI, HI(TMP2)
3879 | lw SFT2, 4(TMP2) 3922 | beq SFRETHI, TISNIL, >2
3880 | beq TMP0, TISNIL, >2 3923 |. lw SFRETLO, LO(TMP2)
3881 |. lw SFT1, 0(TMP2)
3882 |.endif
3883 |1: 3924 |1:
3884 | ins_next1 3925 | ins_next1
3885 | store_double1 0(RA) 3926 | sw SFRETHI, HI(RA)
3927 | sw SFRETLO, LO(RA)
3886 | ins_next2 3928 | ins_next2
3887 | 3929 |
3888 |2: // Check for __index if table value is nil. 3930 |2: // Check for __index if table value is nil.
@@ -3897,8 +3939,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3897 |. nop 3939 |. nop
3898 | 3940 |
3899 |5: 3941 |5:
3942 | li AT, LJ_TSTR
3900 | bne TMP2, AT, ->vmeta_tgetv 3943 | bne TMP2, AT, ->vmeta_tgetv
3901 |. lw STR:RC, LO(CARG3) 3944 |. nop
3902 | b ->BC_TGETS_Z // String key? 3945 | b ->BC_TGETS_Z // String key?
3903 |. nop 3946 |. nop
3904 break; 3947 break;
@@ -3930,18 +3973,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3930 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) 3973 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
3931 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 3974 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
3932 | lw NODE:TMP1, NODE:TMP2->next 3975 | lw NODE:TMP1, NODE:TMP2->next
3933 | lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) 3976 | lw SFRETHI, offsetof(Node, val)+HI(NODE:TMP2)
3934 | addiu CARG1, CARG1, -LJ_TSTR 3977 | addiu CARG1, CARG1, -LJ_TSTR
3935 | xor TMP0, TMP0, STR:RC 3978 | xor TMP0, TMP0, STR:RC
3936 | or AT, CARG1, TMP0 3979 | or AT, CARG1, TMP0
3937 | bnez AT, >4 3980 | bnez AT, >4
3938 |. lw TAB:TMP3, TAB:RB->metatable 3981 |. lw TAB:TMP3, TAB:RB->metatable
3939 | beq CARG2, TISNIL, >5 // Key found, but nil value? 3982 | beq SFRETHI, TISNIL, >5 // Key found, but nil value?
3940 |. lw CARG1, offsetof(Node, val)+LO(NODE:TMP2) 3983 |. lw SFRETLO, offsetof(Node, val)+LO(NODE:TMP2)
3941 |3: 3984 |3:
3942 | ins_next1 3985 | ins_next1
3943 | sw CARG2, HI(RA) 3986 | sw SFRETHI, HI(RA)
3944 | sw CARG1, LO(RA) 3987 | sw SFRETLO, LO(RA)
3945 | ins_next2 3988 | ins_next2
3946 | 3989 |
3947 |4: // Follow hash chain. 3990 |4: // Follow hash chain.
@@ -3951,7 +3994,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3951 | 3994 |
3952 |5: // Check for __index if table value is nil. 3995 |5: // Check for __index if table value is nil.
3953 | beqz TAB:TMP3, <3 // No metatable: done. 3996 | beqz TAB:TMP3, <3 // No metatable: done.
3954 |. li CARG2, LJ_TNIL 3997 |. li SFRETHI, LJ_TNIL
3955 | lbu TMP0, TAB:TMP3->nomm 3998 | lbu TMP0, TAB:TMP3->nomm
3956 | andi TMP0, TMP0, 1<<MM_index 3999 | andi TMP0, TMP0, 1<<MM_index
3957 | bnez TMP0, <3 // 'no __index' flag set: done. 4000 | bnez TMP0, <3 // 'no __index' flag set: done.
@@ -3976,13 +4019,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3976 | sltu AT, TMP0, TMP1 4019 | sltu AT, TMP0, TMP1
3977 | beqz AT, ->vmeta_tgetb 4020 | beqz AT, ->vmeta_tgetb
3978 |. addu RC, TMP2, RC 4021 |. addu RC, TMP2, RC
3979 | lw TMP1, HI(RC) 4022 | lw SFRETHI, HI(RC)
3980 | beq TMP1, TISNIL, >5 4023 | beq SFRETHI, TISNIL, >5
3981 |. nop 4024 |. lw SFRETLO, LO(RC)
3982 |1: 4025 |1:
3983 | load_double1 0(RC)
3984 | ins_next1 4026 | ins_next1
3985 | store_double1 0(RA) 4027 | sw SFRETHI, HI(RA)
4028 | sw SFRETLO, LO(RA)
3986 | ins_next2 4029 | ins_next2
3987 | 4030 |
3988 |5: // Check for __index if table value is nil. 4031 |5: // Check for __index if table value is nil.
@@ -3993,7 +4036,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3993 | andi TMP1, TMP1, 1<<MM_index 4036 | andi TMP1, TMP1, 1<<MM_index
3994 | bnez TMP1, <1 // 'no __index' flag set: done. 4037 | bnez TMP1, <1 // 'no __index' flag set: done.
3995 |. nop 4038 |. nop
3996 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4039 | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
3997 |. nop 4040 |. nop
3998 break; 4041 break;
3999 case BC_TGETR: 4042 case BC_TGETR:
@@ -4001,31 +4044,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4001 | decode_RB8a RB, INS 4044 | decode_RB8a RB, INS
4002 | decode_RB8b RB 4045 | decode_RB8b RB
4003 | decode_RDtoRC8 RC, RD 4046 | decode_RDtoRC8 RC, RD
4004 | addu CARG2, BASE, RB 4047 | addu RB, BASE, RB
4005 | addu CARG3, BASE, RC 4048 | addu RC, BASE, RC
4006 | lw TAB:CARG1, LO(CARG2) 4049 | lw TAB:CARG1, LO(RB)
4007 | lw TMP0, TAB:CARG1->asize 4050 | lw CARG2, LO(RC)
4051 | addu RA, BASE, RA
4052 | lw TMP0, TAB:CARG1->asize
4008 | lw TMP1, TAB:CARG1->array 4053 | lw TMP1, TAB:CARG1->array
4009 |.if FPU
4010 | ldc1 f0, 0(CARG3)
4011 | trunc.w.d f2, f0
4012 | mfc1 CARG2, f2
4013 |.else
4014 | load_got __fixdfsi
4015 | lw CARG1, 0(CARG3)
4016 | call_extern
4017 |. lw CARG2, 4(CARG3)
4018 | move CARG2, CRET1
4019 |.endif
4020 | sltu AT, CARG2, TMP0 4054 | sltu AT, CARG2, TMP0
4021 | sll TMP2, CARG2, 3 4055 | sll TMP2, CARG2, 3
4022 | beqz AT, ->vmeta_tgetr // In array part? 4056 | beqz AT, ->vmeta_tgetr // In array part?
4023 |. addu TMP2, TMP1, TMP2 4057 |. addu CRET1, TMP1, TMP2
4024 | load_double1 0(TMP2) 4058 | lw SFARG2HI, HI(CRET1)
4059 | lw SFARG2LO, LO(CRET1)
4025 |->BC_TGETR_Z: 4060 |->BC_TGETR_Z:
4026 | addu RA, BASE, RA
4027 | ins_next1 4061 | ins_next1
4028 | store_double1 0(RA) 4062 | sw SFARG2HI, HI(RA)
4063 | sw SFARG2LO, LO(RA)
4029 | ins_next2 4064 | ins_next2
4030 break; 4065 break;
4031 4066
@@ -4042,77 +4077,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4042 | li AT, LJ_TTAB 4077 | li AT, LJ_TTAB
4043 | bne TMP1, AT, ->vmeta_tsetv 4078 | bne TMP1, AT, ->vmeta_tsetv
4044 |. addu RA, BASE, RA 4079 |. addu RA, BASE, RA
4045 | sltiu AT, TMP2, LJ_TISNUM 4080 | bne TMP2, TISNUM, >5
4046 | beqz AT, >5 4081 |. lw RC, LO(CARG3)
4047 |. li AT, LJ_TSTR 4082 | lw TMP0, TAB:RB->asize
4048 |.if FPU
4049 | ldc1 f0, 0(CARG3)
4050 | // Convert number key to integer, check for integerness and range.
4051 | cvt.w.d f2, f0
4052 | lw TMP0, TAB:RB->asize
4053 | mfc1 TMP2, f2
4054 | cvt.d.w f4, f2
4055 | lw TMP1, TAB:RB->array 4083 | lw TMP1, TAB:RB->array
4056 | c.eq.d f0, f4 4084 | sltu AT, RC, TMP0
4057 | sltu AT, TMP2, TMP0 4085 | sll TMP2, RC, 3
4058 | movf AT, r0
4059 | sll TMP2, TMP2, 3
4060 | beqz AT, ->vmeta_tsetv // Integer key and in array part? 4086 | beqz AT, ->vmeta_tsetv // Integer key and in array part?
4061 |. addu TMP1, TMP1, TMP2 4087 |. addu TMP1, TMP1, TMP2
4062 | lbu TMP3, TAB:RB->marked
4063 | lw TMP0, HI(TMP1) 4088 | lw TMP0, HI(TMP1)
4064 | beq TMP0, TISNIL, >3
4065 |. ldc1 f0, 0(RA)
4066 |1:
4067 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4068 | bnez AT, >7
4069 |. sdc1 f0, 0(TMP1)
4070 |.else
4071 | sw RB, TEMP_SAVE_1
4072 | sw RC, TEMP_SAVE_2
4073 | sw CARG2, TEMP_SAVE_3
4074 | load_got __fixdfsi
4075 | lw CARG1, 0(CARG3)
4076 | call_extern // cvt.w.d f2, f0
4077 |. lw CARG2, 4(CARG3)
4078 | sw CRET1, TEMP_SAVE_4
4079 | cvti2d CRET1 // cvt.d.w f4, f2
4080 | load_got __ledf2
4081 | lw RC, TEMP_SAVE_2
4082 | addu CARG3, BASE, RC
4083 | lw CARG1, 0(CARG3)
4084 | lw CARG2, 4(CARG3)
4085 | move CARG3, CRET1
4086 | call_extern // c.eq.d f0, f4
4087 |. move CARG4, CRET2
4088 | lw RC, TEMP_SAVE_2
4089 | lw RB, TEMP_SAVE_1
4090 | lw TMP0, TAB:RB->asize
4091 | lw TMP1, TAB:RB->array
4092 | lw TMP2, TEMP_SAVE_4
4093 | lw CARG2, TEMP_SAVE_3 // Restore old CARG2 and CARG3.
4094 | addu CARG3, BASE, RC
4095 | bnez CRET1, >4 // NaN?
4096 |. sltu AT, TMP2, TMP0
4097 | b >6
4098 |. nop
4099 |4:
4100 | move AT, r0
4101 |6:
4102 | sll TMP2, TMP2, 3
4103 | beqz AT, ->vmeta_tsetv // Integer key and in array part?
4104 |. addu TMP1, TMP1, TMP2
4105 | lbu TMP3, TAB:RB->marked 4089 | lbu TMP3, TAB:RB->marked
4106 | lw TMP0, HI(TMP1) 4090 | lw SFRETHI, HI(RA)
4107 | lw SFT1, 0(RA)
4108 | beq TMP0, TISNIL, >3 4091 | beq TMP0, TISNIL, >3
4109 |. lw SFT2, 4(RA) 4092 |. lw SFRETLO, LO(RA)
4110 |1: 4093 |1:
4111 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4094 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4112 | sw SFT1, 0(TMP1) 4095 | sw SFRETHI, HI(TMP1)
4113 | bnez AT, >7 4096 | bnez AT, >7
4114 |. sw SFT2, 4(TMP1) 4097 |. sw SFRETLO, LO(TMP1)
4115 |.endif
4116 |2: 4098 |2:
4117 | ins_next 4099 | ins_next
4118 | 4100 |
@@ -4128,8 +4110,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4128 |. nop 4110 |. nop
4129 | 4111 |
4130 |5: 4112 |5:
4113 | li AT, LJ_TSTR
4131 | bne TMP2, AT, ->vmeta_tsetv 4114 | bne TMP2, AT, ->vmeta_tsetv
4132 |. lw STR:RC, LO(CARG3) 4115 |. nop
4133 | b ->BC_TSETS_Z // String key? 4116 | b ->BC_TSETS_Z // String key?
4134 |. nop 4117 |. nop
4135 | 4118 |
@@ -4161,7 +4144,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4161 | sll TMP1, TMP1, 3 4144 | sll TMP1, TMP1, 3
4162 | subu TMP1, TMP0, TMP1 4145 | subu TMP1, TMP0, TMP1
4163 | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 4146 | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
4164 | load_double f20, SFT1, SFT2, 0(RA) 4147 |.if FPU
4148 | ldc1 f20, 0(RA)
4149 |.else
4150 | lw SFRETHI, HI(RA)
4151 | lw SFRETLO, LO(RA)
4152 |.endif
4165 |1: 4153 |1:
4166 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) 4154 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
4167 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 4155 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
@@ -4179,9 +4167,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4179 | bnez AT, >7 4167 | bnez AT, >7
4180 |. sdc1 f20, NODE:TMP2->val 4168 |. sdc1 f20, NODE:TMP2->val
4181 |.else 4169 |.else
4182 | sw SFT1, NODE:TMP2->val.u32.hi 4170 | sw SFRETHI, NODE:TMP2->val.u32.hi
4183 | bnez AT, >7 4171 | bnez AT, >7
4184 |. sw SFT2, NODE:TMP2->val.u32.lo 4172 |. sw SFRETLO, NODE:TMP2->val.u32.lo
4185 |.endif 4173 |.endif
4186 |3: 4174 |3:
4187 | ins_next 4175 | ins_next
@@ -4210,10 +4198,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4210 | beqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check. 4198 | beqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check.
4211 |. li AT, LJ_TSTR 4199 |. li AT, LJ_TSTR
4212 |6: 4200 |6:
4213 |.if not FPU
4214 | sw SFT1, TEMP_SAVE_1
4215 | sw SFT2, TEMP_SAVE_2
4216 |.endif
4217 | load_got lj_tab_newkey 4201 | load_got lj_tab_newkey
4218 | sw STR:RC, LO(CARG3) 4202 | sw STR:RC, LO(CARG3)
4219 | sw AT, HI(CARG3) 4203 | sw AT, HI(CARG3)
@@ -4228,11 +4212,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4228 | b <3 // No 2nd write barrier needed. 4212 | b <3 // No 2nd write barrier needed.
4229 |. sdc1 f20, 0(CRET1) 4213 |. sdc1 f20, 0(CRET1)
4230 |.else 4214 |.else
4231 | lw SFT2, TEMP_SAVE_1 4215 | lw SFARG1HI, HI(RA)
4232 | lw SFT3, TEMP_SAVE_2 4216 | lw SFARG1LO, LO(RA)
4233 | sw SFT2, 0(CRET1) 4217 | sw SFARG1HI, HI(CRET1)
4234 | b <3 4218 | b <3 // No 2nd write barrier needed.
4235 |. sw SFT3, 4(CRET1) 4219 |. sw SFARG1LO, LO(CRET1)
4236 |.endif 4220 |.endif
4237 | 4221 |
4238 |7: // Possible table write barrier for the value. Skip valiswhite check. 4222 |7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -4259,16 +4243,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4259 | lbu TMP3, TAB:RB->marked 4243 | lbu TMP3, TAB:RB->marked
4260 | beq TMP1, TISNIL, >5 4244 | beq TMP1, TISNIL, >5
4261 |1: 4245 |1:
4262 |. andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4246 |. lw SFRETHI, HI(RA)
4263 | load_double1 0(RA) 4247 | lw SFRETLO, LO(RA)
4264 |.if FPU 4248 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4265 | bnez AT, >7 4249 | sw SFRETHI, HI(RC)
4266 |. sdc1 f0, 0(RC)
4267 |.else
4268 | sw SFT1, 0(RC)
4269 | bnez AT, >7 4250 | bnez AT, >7
4270 |. sw SFT2, 4(RC) 4251 |. sw SFRETLO, LO(RC)
4271 |.endif
4272 |2: 4252 |2:
4273 | ins_next 4253 | ins_next
4274 | 4254 |
@@ -4280,7 +4260,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4280 | andi TMP1, TMP1, 1<<MM_newindex 4260 | andi TMP1, TMP1, 1<<MM_newindex
4281 | bnez TMP1, <1 // 'no __newindex' flag set: done. 4261 | bnez TMP1, <1 // 'no __newindex' flag set: done.
4282 |. nop 4262 |. nop
4283 | b ->vmeta_tsetb // Caveat: preserve TMP0! 4263 | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2!
4284 |. nop 4264 |. nop
4285 | 4265 |
4286 |7: // Possible table write barrier for the value. Skip valiswhite check. 4266 |7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -4293,54 +4273,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4293 | decode_RDtoRC8 RC, RD 4273 | decode_RDtoRC8 RC, RD
4294 | addu CARG1, BASE, RB 4274 | addu CARG1, BASE, RB
4295 | addu CARG3, BASE, RC 4275 | addu CARG3, BASE, RC
4296 |.if FPU 4276 | lw TAB:CARG2, LO(CARG1)
4297 | ldc1 f0, 0(CARG3) 4277 | lw CARG3, LO(CARG3)
4298 | trunc.w.d f2, f0 4278 | lbu TMP3, TAB:CARG2->marked
4299 | mfc1 CARG3, f2
4300 |.else
4301 | load_got __fixdfsi
4302 | sw CARG1, TEMP_SAVE_1
4303 | lw CARG1, 0(CARG3)
4304 | call_extern
4305 |. lw CARG2, 4(CARG3)
4306 | lw CARG1, TEMP_SAVE_1
4307 | move CARG3, CRET1
4308 |.endif
4309 | lw TAB:CARG2, LO(CARG1)
4310 | lbu TMP3, TAB:CARG2->marked
4311 | lw TMP0, TAB:CARG2->asize 4279 | lw TMP0, TAB:CARG2->asize
4312 | lw TMP1, TAB:CARG2->array 4280 | lw TMP1, TAB:CARG2->array
4313 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4281 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4314 | bnez AT, >7 4282 | bnez AT, >7
4315 |. addu RA, BASE, RA 4283 |. addu RA, BASE, RA
4316 |2: 4284 |2:
4317 | sltu AT, CARG3, TMP0 4285 | sltu AT, CARG3, TMP0
4318 | sll TMP2, CARG3, 3 4286 | sll TMP2, CARG3, 3
4319 |.if FPU
4320 | beqz AT, ->vmeta_tsetr // In array part?
4321 |. ldc1 f20, 0(RA)
4322 | addu CRET1, TMP1, TMP2
4323 |->BC_TSETR_Z:
4324 |.else
4325 | lw TMP0, 0(RA)
4326 | lw TMP3, 4(RA)
4327 | sw TMP0, TEMP_SAVE_1
4328 | beqz AT, ->vmeta_tsetr // In array part? 4287 | beqz AT, ->vmeta_tsetr // In array part?
4329 |. sw TMP3, TEMP_SAVE_2 4288 |. addu CRET1, TMP1, TMP2
4330 | addu CRET1, TMP1, TMP2
4331 |->BC_TSETR_Z: 4289 |->BC_TSETR_Z:
4332 | lw TMP0, TEMP_SAVE_1 4290 | lw SFARG1HI, HI(RA)
4333 | lw TMP3, TEMP_SAVE_2 4291 | lw SFARG1LO, LO(RA)
4334 |.endif
4335 | ins_next1 4292 | ins_next1
4336 | store_double f20, TMP0, TMP3, 0(CRET1) 4293 | sw SFARG1HI, HI(CRET1)
4294 | sw SFARG1LO, LO(CRET1)
4337 | ins_next2 4295 | ins_next2
4338 | 4296 |
4339 |7: // Possible table write barrier for the value. Skip valiswhite check. 4297 |7: // Possible table write barrier for the value. Skip valiswhite check.
4340 | barrierback TAB:RB, TMP3, TMP0, <2 4298 | barrierback TAB:RB, TMP3, TMP0, <2
4341 break; 4299 break;
4342 4300
4343
4344 case BC_TSETM: 4301 case BC_TSETM:
4345 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4302 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
4346 | addu RA, BASE, RA 4303 | addu RA, BASE, RA
@@ -4362,10 +4319,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4362 | addu TMP1, TMP1, CARG1 4319 | addu TMP1, TMP1, CARG1
4363 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4320 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4364 |3: // Copy result slots to table. 4321 |3: // Copy result slots to table.
4365 | load_double1 0(RA) 4322 | lw SFRETHI, HI(RA)
4323 | lw SFRETLO, LO(RA)
4366 | addiu RA, RA, 8 4324 | addiu RA, RA, 8
4367 | sltu AT, RA, TMP2 4325 | sltu AT, RA, TMP2
4368 | store_double1 0(TMP1) 4326 | sw SFRETHI, HI(TMP1)
4327 | sw SFRETLO, LO(TMP1)
4369 | bnez AT, <3 4328 | bnez AT, <3
4370 |. addiu TMP1, TMP1, 8 4329 |. addiu TMP1, TMP1, 8
4371 | bnez TMP0, >7 4330 | bnez TMP0, >7
@@ -4440,10 +4399,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4440 | beqz NARGS8:RC, >3 4399 | beqz NARGS8:RC, >3
4441 |. move TMP3, NARGS8:RC 4400 |. move TMP3, NARGS8:RC
4442 |2: 4401 |2:
4443 | load_double1 0(RA) 4402 | lw SFRETHI, HI(RA)
4403 | lw SFRETLO, LO(RA)
4444 | addiu RA, RA, 8 4404 | addiu RA, RA, 8
4445 | addiu TMP3, TMP3, -8 4405 | addiu TMP3, TMP3, -8
4446 | store_double1 0(TMP2) 4406 | sw SFRETHI, HI(TMP2)
4407 | sw SFRETLO, LO(TMP2)
4447 | bnez TMP3, <2 4408 | bnez TMP3, <2
4448 |. addiu TMP2, TMP2, 8 4409 |. addiu TMP2, TMP2, 8
4449 |3: 4410 |3:
@@ -4480,12 +4441,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4480 | li AT, LJ_TFUNC 4441 | li AT, LJ_TFUNC
4481 | lw TMP1, -24+HI(BASE) 4442 | lw TMP1, -24+HI(BASE)
4482 | lw LFUNC:RB, -24+LO(BASE) 4443 | lw LFUNC:RB, -24+LO(BASE)
4483 | load_double1 -8(BASE) 4444 | lw SFARG1HI, -16+HI(BASE)
4484 | load_double2 -16(BASE) 4445 | lw SFARG1LO, -16+LO(BASE)
4446 | lw SFARG2HI, -8+HI(BASE)
4447 | lw SFARG2LO, -8+LO(BASE)
4485 | sw TMP1, HI(BASE) // Copy callable. 4448 | sw TMP1, HI(BASE) // Copy callable.
4486 | sw LFUNC:RB, LO(BASE) 4449 | sw LFUNC:RB, LO(BASE)
4487 | store_double1 16(BASE) // Copy control var. 4450 | sw SFARG1HI, 8+HI(BASE) // Copy state.
4488 | store_double2 8(BASE) // Copy state. 4451 | sw SFARG1LO, 8+LO(BASE)
4452 | sw SFARG2HI, 16+HI(BASE) // Copy control var.
4453 | sw SFARG2LO, 16+LO(BASE)
4489 | addiu BASE, BASE, 8 4454 | addiu BASE, BASE, 8
4490 | bne TMP1, AT, ->vmeta_call 4455 | bne TMP1, AT, ->vmeta_call
4491 |. li NARGS8:RC, 16 // Iterators get 2 arguments. 4456 |. li NARGS8:RC, 16 // Iterators get 2 arguments.
@@ -4508,26 +4473,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4508 | beqz AT, >5 // Index points after array part? 4473 | beqz AT, >5 // Index points after array part?
4509 |. sll TMP3, RC, 3 4474 |. sll TMP3, RC, 3
4510 | addu TMP3, TMP1, TMP3 4475 | addu TMP3, TMP1, TMP3
4511 | lw TMP2, HI(TMP3) 4476 | lw SFARG1HI, HI(TMP3)
4512 | load_double1 0(TMP3) 4477 | lw SFARG1LO, LO(TMP3)
4513 |.if FPU
4514 | mtc1 RC, f2
4515 |.else
4516 | move CARG1, RC
4517 |.endif
4518 | lhu RD, -4+OFS_RD(PC) 4478 | lhu RD, -4+OFS_RD(PC)
4519 | beq TMP2, TISNIL, <1 // Skip holes in array part. 4479 | sw TISNUM, HI(RA)
4480 | sw RC, LO(RA)
4481 | beq SFARG1HI, TISNIL, <1 // Skip holes in array part.
4520 |. addiu RC, RC, 1 4482 |. addiu RC, RC, 1
4521 | store_double1 8(RA) 4483 | sw SFARG1HI, 8+HI(RA)
4522 |.if FPU 4484 | sw SFARG1LO, 8+LO(RA)
4523 | cvt.d.w f2, f2
4524 |.else
4525 | load_got __floatsidf
4526 | call_extern
4527 |. nop
4528 |.endif
4529 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 4485 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
4530 | store_double f2, CRET1, CRET2, 0(RA)
4531 | decode_RD4b RD 4486 | decode_RD4b RD
4532 | addu RD, RD, TMP3 4487 | addu RD, RD, TMP3
4533 | sw RC, -8+LO(RA) // Update control var. 4488 | sw RC, -8+LO(RA) // Update control var.
@@ -4546,23 +4501,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4546 | sll RB, RC, 3 4501 | sll RB, RC, 3
4547 | subu TMP3, TMP3, RB 4502 | subu TMP3, TMP3, RB
4548 | addu NODE:TMP3, TMP3, TMP2 4503 | addu NODE:TMP3, TMP3, TMP2
4549 | lw RB, HI(NODE:TMP3) 4504 | lw SFARG1HI, NODE:TMP3->val.u32.hi
4550 | load_double1 0(NODE:TMP3) 4505 | lw SFARG1LO, NODE:TMP3->val.u32.lo
4551 | lhu RD, -4+OFS_RD(PC) 4506 | lhu RD, -4+OFS_RD(PC)
4552 | beq RB, TISNIL, <6 // Skip holes in hash part. 4507 | beq SFARG1HI, TISNIL, <6 // Skip holes in hash part.
4553 |. addiu RC, RC, 1 4508 |. addiu RC, RC, 1
4554 |.if FPU 4509 | lw SFARG2HI, NODE:TMP3->key.u32.hi
4555 | ldc1 f2, NODE:TMP3->key 4510 | lw SFARG2LO, NODE:TMP3->key.u32.lo
4556 |.else
4557 | lw SFT3, NODE:TMP3->key.u32.hi
4558 | lw SFT4, NODE:TMP3->key.u32.lo
4559 |.endif
4560 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 4511 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
4561 | store_double1 8(RA) 4512 | sw SFARG1HI, 8+HI(RA)
4513 | sw SFARG1LO, 8+LO(RA)
4562 | addu RC, RC, TMP0 4514 | addu RC, RC, TMP0
4563 | decode_RD4b RD 4515 | decode_RD4b RD
4564 | addu RD, RD, TMP3 4516 | addu RD, RD, TMP3
4565 | store_double2 0(RA) 4517 | sw SFARG2HI, HI(RA)
4518 | sw SFARG2LO, LO(RA)
4566 | addu PC, PC, RD 4519 | addu PC, PC, RD
4567 | b <3 4520 | b <3
4568 |. sw RC, -8+LO(RA) // Update control var. 4521 |. sw RC, -8+LO(RA) // Update control var.
@@ -4642,9 +4595,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4642 | bnez AT, >7 4595 | bnez AT, >7
4643 |. addiu MULTRES, TMP1, 8 4596 |. addiu MULTRES, TMP1, 8
4644 |6: 4597 |6:
4645 | load_double1 0(RC) 4598 | lw SFRETHI, HI(RC)
4599 | lw SFRETLO, LO(RC)
4646 | addiu RC, RC, 8 4600 | addiu RC, RC, 8
4647 | store_double1 0(RA) 4601 | sw SFRETHI, HI(RA)
4602 | sw SFRETLO, LO(RA)
4648 | sltu AT, RC, TMP3 4603 | sltu AT, RC, TMP3
4649 | bnez AT, <6 // More vararg slots? 4604 | bnez AT, <6 // More vararg slots?
4650 |. addiu RA, RA, 8 4605 |. addiu RA, RA, 8
@@ -4700,10 +4655,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4700 | beqz RC, >3 4655 | beqz RC, >3
4701 |. subu BASE, TMP2, TMP0 4656 |. subu BASE, TMP2, TMP0
4702 |2: 4657 |2:
4703 | load_double1 0(RA) 4658 | lw SFRETHI, HI(RA)
4659 | lw SFRETLO, LO(RA)
4704 | addiu RA, RA, 8 4660 | addiu RA, RA, 8
4705 | addiu RC, RC, -8 4661 | addiu RC, RC, -8
4706 | store_double1 0(TMP2) 4662 | sw SFRETHI, HI(TMP2)
4663 | sw SFRETLO, LO(TMP2)
4707 | bnez RC, <2 4664 | bnez RC, <2
4708 |. addiu TMP2, TMP2, 8 4665 |. addiu TMP2, TMP2, 8
4709 |3: 4666 |3:
@@ -4744,14 +4701,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4744 | lw INS, -4(PC) 4701 | lw INS, -4(PC)
4745 | addiu TMP2, BASE, -8 4702 | addiu TMP2, BASE, -8
4746 if (op == BC_RET1) { 4703 if (op == BC_RET1) {
4747 | load_double1 0(RA) 4704 | lw SFRETHI, HI(RA)
4705 | lw SFRETLO, LO(RA)
4748 } 4706 }
4749 | decode_RB8a RB, INS 4707 | decode_RB8a RB, INS
4750 | decode_RA8a RA, INS 4708 | decode_RA8a RA, INS
4751 | decode_RB8b RB 4709 | decode_RB8b RB
4752 | decode_RA8b RA 4710 | decode_RA8b RA
4753 if (op == BC_RET1) { 4711 if (op == BC_RET1) {
4754 | store_double1 0(TMP2) 4712 | sw SFRETHI, HI(TMP2)
4713 | sw SFRETLO, LO(TMP2)
4755 } 4714 }
4756 | subu BASE, TMP2, RA 4715 | subu BASE, TMP2, RA
4757 |5: 4716 |5:
@@ -4776,45 +4735,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4776 4735
4777 /* -- Loops and branches ------------------------------------------------ */ 4736 /* -- Loops and branches ------------------------------------------------ */
4778 4737
4779 |.macro cmp_res, gt
4780 |.if gt == 1
4781 |.if FPU
4782 | movf TMP1, r0, 0 // f0>f2: TMP1=0
4783 | movf TMP2, r0, 1 // f2>f0: TMP2=0
4784 |.else
4785 | li SFT2, 1
4786 | bne CRET1, SFT2, >1
4787 |. nop
4788 | b >2
4789 |. move TMP1, r0
4790 |1:
4791 | li SFT2, -1
4792 | bne CRET1, SFT2, >2
4793 |. nop
4794 | move TMP2, r0
4795 |2:
4796 |.endif
4797 |.else
4798 |.if FPU
4799 | movt TMP1, r0, 0 // f0<=f2: TMP1=0
4800 | movt TMP2, r0, 1 // f2<=f0: TMP2=0
4801 |.else
4802 | bltz CRET1, >3 // f0<f2: TMP1=0
4803 |. nop
4804 | beqz CRET1, >2 // f0==f2: TMP1=TMP2=0
4805 |. li SFT2, 1
4806 | bne SFT2, CRET1, >4 // f0>f2: TMP2=0
4807 |. nop
4808 | b >4
4809 |2:
4810 |. move TMP2, r0
4811 |3:
4812 | move TMP1, r0
4813 |4:
4814 |.endif
4815 |.endif
4816 |.endmacro
4817
4818 case BC_FORL: 4738 case BC_FORL:
4819 |.if JIT 4739 |.if JIT
4820 | hotloop 4740 | hotloop
@@ -4832,96 +4752,140 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4832 | // RA = base*8, RD = target (after end of loop or start of loop) 4752 | // RA = base*8, RD = target (after end of loop or start of loop)
4833 vk = (op == BC_IFORL || op == BC_JFORL); 4753 vk = (op == BC_IFORL || op == BC_JFORL);
4834 | addu RA, BASE, RA 4754 | addu RA, BASE, RA
4835 if (vk) { 4755 | lw SFARG1HI, FORL_IDX*8+HI(RA)
4836 |.if FPU 4756 | lw SFARG1LO, FORL_IDX*8+LO(RA)
4837 | ldc1 f0, FORL_IDX*8(RA)
4838 | ldc1 f4, FORL_STEP*8(RA)
4839 | ldc1 f2, FORL_STOP*8(RA)
4840 | lw TMP3, FORL_STEP*8+HI(RA)
4841 | add.d f0, f0, f4
4842 | sdc1 f0, FORL_IDX*8(RA)
4843 |.else
4844 | load_got __adddf3
4845 | load_farg1 FORL_IDX*8(RA)
4846 | load_farg2 FORL_STEP*8(RA)
4847 | call_extern
4848 |. sw RD, TEMP_SAVE_1 //save RD
4849 | sw CRET1, FORL_IDX*8(RA)
4850 | sw CRET2, FORL_IDX*8+4(RA)
4851 | load_farg1 FORL_IDX*8(RA)
4852 | load_farg2 FORL_STOP*8(RA) // f0 and f2
4853 | lw TMP3, FORL_STEP*8+HI(RA)
4854 | lw RD, TEMP_SAVE_1
4855 |.endif
4856 } else {
4857 | lw TMP1, FORL_IDX*8+HI(RA)
4858 | lw TMP3, FORL_STEP*8+HI(RA)
4859 | lw TMP2, FORL_STOP*8+HI(RA)
4860 | sltiu TMP1, TMP1, LJ_TISNUM
4861 | sltiu TMP0, TMP3, LJ_TISNUM
4862 | sltiu TMP2, TMP2, LJ_TISNUM
4863 | and TMP1, TMP1, TMP0
4864 | and TMP1, TMP1, TMP2
4865 |.if FPU
4866 | ldc1 f0, FORL_IDX*8(RA)
4867 | beqz TMP1, ->vmeta_for
4868 |. ldc1 f2, FORL_STOP*8(RA)
4869 |.else
4870 | beqz TMP1, ->vmeta_for
4871 | load_farg1 FORL_IDX*8(RA)
4872 | load_farg2 FORL_STOP*8(RA)
4873 |.endif
4874 }
4875 if (op != BC_JFORL) { 4757 if (op != BC_JFORL) {
4876 | srl RD, RD, 1 4758 | srl RD, RD, 1
4877 | lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535) 4759 | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
4760 | addu TMP2, RD, TMP2
4878 } 4761 }
4879 | store_double f0, CARG1, CARG2, FORL_EXT*8(RA) 4762 if (!vk) {
4880 |.if FPU 4763 | lw SFARG2HI, FORL_STOP*8+HI(RA)
4881 | c.le.d 0, f0, f2 4764 | lw SFARG2LO, FORL_STOP*8+LO(RA)
4882 | c.le.d 1, f2, f0 4765 | bne SFARG1HI, TISNUM, >5
4883 |.else 4766 |. lw SFRETHI, FORL_STEP*8+HI(RA)
4884 | sw RD, TEMP_SAVE_1 4767 | xor AT, SFARG2HI, TISNUM
4885 | load_got __ledf2 // f0<=f2 4768 | lw SFRETLO, FORL_STEP*8+LO(RA)
4886 | call_extern 4769 | xor TMP0, SFRETHI, TISNUM
4887 |. sw TMP0, TEMP_SAVE_2 4770 | or AT, AT, TMP0
4888 | lw TMP0, TEMP_SAVE_2 4771 | bnez AT, ->vmeta_for
4889 | lw RD, TEMP_SAVE_1 4772 |. slt AT, SFRETLO, r0
4890 | lw TMP3, FORL_STEP*8+HI(RA) // Restored step. 4773 | slt CRET1, SFARG2LO, SFARG1LO
4891 |.endif 4774 | slt TMP1, SFARG1LO, SFARG2LO
4892 | 4775 | movn CRET1, TMP1, AT
4776 } else {
4777 | bne SFARG1HI, TISNUM, >5
4778 |. lw SFARG2LO, FORL_STEP*8+LO(RA)
4779 | lw SFRETLO, FORL_STOP*8+LO(RA)
4780 | move TMP3, SFARG1LO
4781 | addu SFARG1LO, SFARG1LO, SFARG2LO
4782 | xor TMP0, SFARG1LO, TMP3
4783 | xor TMP1, SFARG1LO, SFARG2LO
4784 | and TMP0, TMP0, TMP1
4785 | slt TMP1, SFARG1LO, SFRETLO
4786 | slt CRET1, SFRETLO, SFARG1LO
4787 | slt AT, SFARG2LO, r0
4788 | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow.
4789 | movn CRET1, TMP1, AT
4790 | or CRET1, CRET1, TMP0
4791 }
4792 |1:
4793 if (op == BC_FORI) {
4794 | movz TMP2, r0, CRET1
4795 | addu PC, PC, TMP2
4796 } else if (op == BC_JFORI) {
4797 | addu PC, PC, TMP2
4798 | lhu RD, -4+OFS_RD(PC)
4799 } else if (op == BC_IFORL) {
4800 | movn TMP2, r0, CRET1
4801 | addu PC, PC, TMP2
4802 }
4803 if (vk) {
4804 | sw SFARG1HI, FORL_IDX*8+HI(RA)
4805 | sw SFARG1LO, FORL_IDX*8+LO(RA)
4806 }
4807 | ins_next1
4808 | sw SFARG1HI, FORL_EXT*8+HI(RA)
4809 | sw SFARG1LO, FORL_EXT*8+LO(RA)
4810 |2:
4893 if (op == BC_JFORI) { 4811 if (op == BC_JFORI) {
4894 | li TMP1, 1 4812 | beqz CRET1, =>BC_JLOOP
4895 | li TMP2, 1
4896 | addu TMP0, RD, TMP0
4897 | slt TMP3, TMP3, r0
4898 | cmp_res 1
4899 | addu PC, PC, TMP0
4900 | lhu RD, -4+OFS_RD(PC)
4901 | movn TMP1, TMP2, TMP3
4902 | bnez TMP1, =>BC_JLOOP
4903 |. decode_RD8b RD 4813 |. decode_RD8b RD
4904 } else if (op == BC_JFORL) { 4814 } else if (op == BC_JFORL) {
4905 | li TMP1, 1 4815 | beqz CRET1, =>BC_JLOOP
4906 | li TMP2, 1 4816 }
4907 | slt TMP3, TMP3, r0 4817 | ins_next2
4908 | cmp_res 1 4818 |
4909 | movn TMP1, TMP2, TMP3 4819 |5: // FP loop.
4910 | bnez TMP1, =>BC_JLOOP 4820 |.if FPU
4911 |. nop 4821 if (!vk) {
4822 | ldc1 f0, FORL_IDX*8(RA)
4823 | ldc1 f2, FORL_STOP*8(RA)
4824 | sltiu TMP0, SFARG1HI, LJ_TISNUM
4825 | sltiu TMP1, SFARG2HI, LJ_TISNUM
4826 | sltiu AT, SFRETHI, LJ_TISNUM
4827 | and TMP0, TMP0, TMP1
4828 | and AT, AT, TMP0
4829 | beqz AT, ->vmeta_for
4830 |. slt TMP3, SFRETHI, r0
4831 | c.ole.d 0, f0, f2
4832 | c.ole.d 1, f2, f0
4833 | li CRET1, 1
4834 | movt CRET1, r0, 0
4835 | movt AT, r0, 1
4836 | b <1
4837 |. movn CRET1, AT, TMP3
4912 } else { 4838 } else {
4913 | addu TMP1, RD, TMP0 4839 | ldc1 f0, FORL_IDX*8(RA)
4914 | slt TMP3, TMP3, r0 4840 | ldc1 f4, FORL_STEP*8(RA)
4915 | move TMP2, TMP1 4841 | ldc1 f2, FORL_STOP*8(RA)
4916 if (op == BC_FORI) { 4842 | lw SFARG2HI, FORL_STEP*8+HI(RA)
4917 | cmp_res 0 4843 | add.d f0, f0, f4
4918 } else { 4844 | c.ole.d 0, f0, f2
4919 | cmp_res 1 4845 | c.ole.d 1, f2, f0
4846 | slt TMP3, SFARG2HI, r0
4847 | li CRET1, 1
4848 | li AT, 1
4849 | movt CRET1, r0, 0
4850 | movt AT, r0, 1
4851 | movn CRET1, AT, TMP3
4852 if (op == BC_IFORL) {
4853 | movn TMP2, r0, CRET1
4854 | addu PC, PC, TMP2
4920 } 4855 }
4921 | movn TMP1, TMP2, TMP3 4856 | sdc1 f0, FORL_IDX*8(RA)
4922 | addu PC, PC, TMP1 4857 | ins_next1
4858 | b <2
4859 |. sdc1 f0, FORL_EXT*8(RA)
4923 } 4860 }
4924 | ins_next 4861 |.else
4862 if (!vk) {
4863 | sltiu TMP0, SFARG1HI, LJ_TISNUM
4864 | sltiu TMP1, SFARG2HI, LJ_TISNUM
4865 | sltiu AT, SFRETHI, LJ_TISNUM
4866 | and TMP0, TMP0, TMP1
4867 | and AT, AT, TMP0
4868 | beqz AT, ->vmeta_for
4869 |. nop
4870 | bal ->vm_sfcmpolex
4871 |. move TMP3, SFRETHI
4872 | b <1
4873 |. nop
4874 } else {
4875 | lw SFARG2HI, FORL_STEP*8+HI(RA)
4876 | load_got __adddf3
4877 | call_extern
4878 |. sw TMP2, ARG5
4879 | lw SFARG2HI, FORL_STOP*8+HI(RA)
4880 | lw SFARG2LO, FORL_STOP*8+LO(RA)
4881 | move SFARG1HI, SFRETHI
4882 | move SFARG1LO, SFRETLO
4883 | bal ->vm_sfcmpolex
4884 |. lw TMP3, FORL_STEP*8+HI(RA)
4885 | b <1
4886 |. lw TMP2, ARG5
4887 }
4888 |.endif
4925 break; 4889 break;
4926 4890
4927 case BC_ITERL: 4891 case BC_ITERL:
@@ -5225,8 +5189,10 @@ static void emit_asm_debug(BuildCtx *ctx)
5225 fcofs, CFRAME_SIZE); 5189 fcofs, CFRAME_SIZE);
5226 for (i = 23; i >= 16; i--) 5190 for (i = 23; i >= 16; i--)
5227 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); 5191 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
5192#if !LJ_SOFTFP
5228 for (i = 30; i >= 20; i -= 2) 5193 for (i = 30; i >= 20; i -= 2)
5229 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); 5194 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
5195#endif
5230 fprintf(ctx->fp, 5196 fprintf(ctx->fp,
5231 "\t.align 2\n" 5197 "\t.align 2\n"
5232 ".LEFDE2:\n\n"); 5198 ".LEFDE2:\n\n");