aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/host/buildvm_asm.c2
-rw-r--r--src/lj_arch.h29
-rw-r--r--src/lj_ccall.c38
-rw-r--r--src/lj_ccall.h4
-rw-r--r--src/lj_ccallback.c30
-rw-r--r--src/lj_frame.h2
-rw-r--r--src/lj_ircall.h2
-rw-r--r--src/vm_ppc.dasc1249
8 files changed, 1101 insertions, 255 deletions
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
index ffd14903..43595b31 100644
--- a/src/host/buildvm_asm.c
+++ b/src/host/buildvm_asm.c
@@ -338,7 +338,7 @@ void emit_asm(BuildCtx *ctx)
338#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA) 338#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
339 fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n"); 339 fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
340#endif 340#endif
341#if LJ_TARGET_PPC && !LJ_TARGET_PS3 341#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP
342 /* Hard-float ABI. */ 342 /* Hard-float ABI. */
343 fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n"); 343 fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
344#endif 344#endif
diff --git a/src/lj_arch.h b/src/lj_arch.h
index b7705642..0145a7c0 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -254,6 +254,29 @@
254#else 254#else
255#define LJ_ARCH_BITS 32 255#define LJ_ARCH_BITS 32
256#define LJ_ARCH_NAME "ppc" 256#define LJ_ARCH_NAME "ppc"
257
258#if !defined(LJ_ARCH_HASFPU)
259#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
260#define LJ_ARCH_HASFPU 0
261#else
262#define LJ_ARCH_HASFPU 1
263#endif
264#endif
265
266#if !defined(LJ_ABI_SOFTFP)
267#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
268#define LJ_ABI_SOFTFP 1
269#else
270#define LJ_ABI_SOFTFP 0
271#endif
272#endif
273#endif
274
275#if LJ_ABI_SOFTFP
276#define LJ_ARCH_NOJIT 1 /* NYI */
277#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
278#else
279#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
257#endif 280#endif
258 281
259#define LJ_TARGET_PPC 1 282#define LJ_TARGET_PPC 1
@@ -262,7 +285,6 @@
262#define LJ_TARGET_MASKSHIFT 0 285#define LJ_TARGET_MASKSHIFT 0
263#define LJ_TARGET_MASKROT 1 286#define LJ_TARGET_MASKROT 1
264#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ 287#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
265#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
266 288
267#if LJ_TARGET_CONSOLE 289#if LJ_TARGET_CONSOLE
268#define LJ_ARCH_PPC32ON64 1 290#define LJ_ARCH_PPC32ON64 1
@@ -415,16 +437,13 @@
415#error "No support for ILP32 model on ARM64" 437#error "No support for ILP32 model on ARM64"
416#endif 438#endif
417#elif LJ_TARGET_PPC 439#elif LJ_TARGET_PPC
418#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
419#error "No support for PowerPC CPUs without double-precision FPU"
420#endif
421#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE 440#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
422#error "No support for little-endian PPC32" 441#error "No support for little-endian PPC32"
423#endif 442#endif
424#if LJ_ARCH_PPC64 443#if LJ_ARCH_PPC64
425#error "No support for PowerPC 64 bit mode (yet)" 444#error "No support for PowerPC 64 bit mode (yet)"
426#endif 445#endif
427#ifdef __NO_FPRS__ 446#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
428#error "No support for PPC/e500 anymore (use LuaJIT 2.0)" 447#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
429#endif 448#endif
430#elif LJ_TARGET_MIPS32 449#elif LJ_TARGET_MIPS32
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index 5c252e5b..799be487 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -387,6 +387,24 @@
387#define CCALL_HANDLE_COMPLEXARG \ 387#define CCALL_HANDLE_COMPLEXARG \
388 /* Pass complex by value in 2 or 4 GPRs. */ 388 /* Pass complex by value in 2 or 4 GPRs. */
389 389
390#define CCALL_HANDLE_GPR \
391 /* Try to pass argument in GPRs. */ \
392 if (n > 1) { \
393 lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
394 if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \
395 ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
396 else if (ngpr + n > maxgpr) \
397 ngpr = maxgpr; /* Prevent reordering. */ \
398 } \
399 if (ngpr + n <= maxgpr) { \
400 dp = &cc->gpr[ngpr]; \
401 ngpr += n; \
402 goto done; \
403 } \
404
405#if LJ_ABI_SOFTFP
406#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
407#else
390#define CCALL_HANDLE_REGARG \ 408#define CCALL_HANDLE_REGARG \
391 if (isfp) { /* Try to pass argument in FPRs. */ \ 409 if (isfp) { /* Try to pass argument in FPRs. */ \
392 if (nfpr + 1 <= CCALL_NARG_FPR) { \ 410 if (nfpr + 1 <= CCALL_NARG_FPR) { \
@@ -395,24 +413,16 @@
395 d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ 413 d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
396 goto done; \ 414 goto done; \
397 } \ 415 } \
398 } else { /* Try to pass argument in GPRs. */ \ 416 } else { \
399 if (n > 1) { \ 417 CCALL_HANDLE_GPR \
400 lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
401 if (ctype_isinteger(d->info)) \
402 ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
403 else if (ngpr + n > maxgpr) \
404 ngpr = maxgpr; /* Prevent reordering. */ \
405 } \
406 if (ngpr + n <= maxgpr) { \
407 dp = &cc->gpr[ngpr]; \
408 ngpr += n; \
409 goto done; \
410 } \
411 } 418 }
419#endif
412 420
421#if !LJ_ABI_SOFTFP
413#define CCALL_HANDLE_RET \ 422#define CCALL_HANDLE_RET \
414 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ 423 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
415 ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ 424 ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
425#endif
416 426
417#elif LJ_TARGET_MIPS32 427#elif LJ_TARGET_MIPS32
418/* -- MIPS o32 calling conventions ---------------------------------------- */ 428/* -- MIPS o32 calling conventions ---------------------------------------- */
@@ -1080,7 +1090,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
1080 } 1090 }
1081 if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ 1091 if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
1082 1092
1083#if LJ_TARGET_X64 || LJ_TARGET_PPC 1093#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
1084 cc->nfpr = nfpr; /* Required for vararg functions. */ 1094 cc->nfpr = nfpr; /* Required for vararg functions. */
1085#endif 1095#endif
1086 cc->nsp = nsp; 1096 cc->nsp = nsp;
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index 59f66481..6efa48c7 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -86,9 +86,9 @@ typedef union FPRArg {
86#elif LJ_TARGET_PPC 86#elif LJ_TARGET_PPC
87 87
88#define CCALL_NARG_GPR 8 88#define CCALL_NARG_GPR 8
89#define CCALL_NARG_FPR 8 89#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8)
90#define CCALL_NRET_GPR 4 /* For complex double. */ 90#define CCALL_NRET_GPR 4 /* For complex double. */
91#define CCALL_NRET_FPR 1 91#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1)
92#define CCALL_SPS_EXTRA 4 92#define CCALL_SPS_EXTRA 4
93#define CCALL_SPS_FREE 0 93#define CCALL_SPS_FREE 0
94 94
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index 846827b1..03494a7a 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -419,6 +419,23 @@ void lj_ccallback_mcode_free(CTState *cts)
419 419
420#elif LJ_TARGET_PPC 420#elif LJ_TARGET_PPC
421 421
422#define CALLBACK_HANDLE_GPR \
423 if (n > 1) { \
424 lua_assert(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \
425 ctype_isinteger(cta->info)) && n == 2); /* int64_t. */ \
426 ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
427 } \
428 if (ngpr + n <= maxgpr) { \
429 sp = &cts->cb.gpr[ngpr]; \
430 ngpr += n; \
431 goto done; \
432 }
433
434#if LJ_ABI_SOFTFP
435#define CALLBACK_HANDLE_REGARG \
436 CALLBACK_HANDLE_GPR \
437 UNUSED(isfp);
438#else
422#define CALLBACK_HANDLE_REGARG \ 439#define CALLBACK_HANDLE_REGARG \
423 if (isfp) { \ 440 if (isfp) { \
424 if (nfpr + 1 <= CCALL_NARG_FPR) { \ 441 if (nfpr + 1 <= CCALL_NARG_FPR) { \
@@ -427,20 +444,15 @@ void lj_ccallback_mcode_free(CTState *cts)
427 goto done; \ 444 goto done; \
428 } \ 445 } \
429 } else { /* Try to pass argument in GPRs. */ \ 446 } else { /* Try to pass argument in GPRs. */ \
430 if (n > 1) { \ 447 CALLBACK_HANDLE_GPR \
431 lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \
432 ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
433 } \
434 if (ngpr + n <= maxgpr) { \
435 sp = &cts->cb.gpr[ngpr]; \
436 ngpr += n; \
437 goto done; \
438 } \
439 } 448 }
449#endif
440 450
451#if !LJ_ABI_SOFTFP
441#define CALLBACK_HANDLE_RET \ 452#define CALLBACK_HANDLE_RET \
442 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ 453 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
443 *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ 454 *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */
455#endif
444 456
445#elif LJ_TARGET_MIPS32 457#elif LJ_TARGET_MIPS32
446 458
diff --git a/src/lj_frame.h b/src/lj_frame.h
index 19c49a4a..04cb5a35 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -226,7 +226,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
226#define CFRAME_OFS_L 36 226#define CFRAME_OFS_L 36
227#define CFRAME_OFS_PC 32 227#define CFRAME_OFS_PC 32
228#define CFRAME_OFS_MULTRES 28 228#define CFRAME_OFS_MULTRES 28
229#define CFRAME_SIZE 272 229#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128)
230#define CFRAME_SHIFT_MULTRES 3 230#define CFRAME_SHIFT_MULTRES 3
231#endif 231#endif
232#elif LJ_TARGET_MIPS32 232#elif LJ_TARGET_MIPS32
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 73120065..9b3883b7 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -287,7 +287,7 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
287#define fp64_f2l __aeabi_f2lz 287#define fp64_f2l __aeabi_f2lz
288#define fp64_f2ul __aeabi_f2ulz 288#define fp64_f2ul __aeabi_f2ulz
289#endif 289#endif
290#elif LJ_TARGET_MIPS 290#elif LJ_TARGET_MIPS || LJ_TARGET_PPC
291#define softfp_add __adddf3 291#define softfp_add __adddf3
292#define softfp_sub __subdf3 292#define softfp_sub __subdf3
293#define softfp_mul __muldf3 293#define softfp_mul __muldf3
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index b4260ebc..0839668c 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -103,6 +103,18 @@
103|// Fixed register assignments for the interpreter. 103|// Fixed register assignments for the interpreter.
104|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) 104|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA)
105| 105|
106|.macro .FPU, a, b
107|.if FPU
108| a, b
109|.endif
110|.endmacro
111|
112|.macro .FPU, a, b, c
113|.if FPU
114| a, b, c
115|.endif
116|.endmacro
117|
106|// The following must be C callee-save (but BASE is often refetched). 118|// The following must be C callee-save (but BASE is often refetched).
107|.define BASE, r14 // Base of current Lua stack frame. 119|.define BASE, r14 // Base of current Lua stack frame.
108|.define KBASE, r15 // Constants of current Lua function. 120|.define KBASE, r15 // Constants of current Lua function.
@@ -116,8 +128,10 @@
116|.define TISNUM, r22 128|.define TISNUM, r22
117|.define TISNIL, r23 129|.define TISNIL, r23
118|.define ZERO, r24 130|.define ZERO, r24
131|.if FPU
119|.define TOBIT, f30 // 2^52 + 2^51. 132|.define TOBIT, f30 // 2^52 + 2^51.
120|.define TONUM, f31 // 2^52 + 2^51 + 2^31. 133|.define TONUM, f31 // 2^52 + 2^51 + 2^31.
134|.endif
121| 135|
122|// The following temporaries are not saved across C calls, except for RA. 136|// The following temporaries are not saved across C calls, except for RA.
123|.define RA, r20 // Callee-save. 137|.define RA, r20 // Callee-save.
@@ -133,6 +147,7 @@
133| 147|
134|// Saved temporaries. 148|// Saved temporaries.
135|.define SAVE0, r21 149|.define SAVE0, r21
150|.define SAVE1, r25
136| 151|
137|// Calling conventions. 152|// Calling conventions.
138|.define CARG1, r3 153|.define CARG1, r3
@@ -141,8 +156,10 @@
141|.define CARG4, r6 // Overlaps TMP3. 156|.define CARG4, r6 // Overlaps TMP3.
142|.define CARG5, r7 // Overlaps INS. 157|.define CARG5, r7 // Overlaps INS.
143| 158|
159|.if FPU
144|.define FARG1, f1 160|.define FARG1, f1
145|.define FARG2, f2 161|.define FARG2, f2
162|.endif
146| 163|
147|.define CRET1, r3 164|.define CRET1, r3
148|.define CRET2, r4 165|.define CRET2, r4
@@ -213,10 +230,16 @@
213|.endif 230|.endif
214|.else 231|.else
215| 232|
233|.if FPU
216|.define SAVE_LR, 276(sp) 234|.define SAVE_LR, 276(sp)
217|.define CFRAME_SPACE, 272 // Delta for sp. 235|.define CFRAME_SPACE, 272 // Delta for sp.
218|// Back chain for sp: 272(sp) <-- sp entering interpreter 236|// Back chain for sp: 272(sp) <-- sp entering interpreter
219|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. 237|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves.
238|.else
239|.define SAVE_LR, 132(sp)
240|.define CFRAME_SPACE, 128 // Delta for sp.
241|// Back chain for sp: 128(sp) <-- sp entering interpreter
242|.endif
220|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. 243|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves.
221|.define SAVE_CR, 52(sp) // 32 bit CR save. 244|.define SAVE_CR, 52(sp) // 32 bit CR save.
222|.define SAVE_ERRF, 48(sp) // 32 bit C frame info. 245|.define SAVE_ERRF, 48(sp) // 32 bit C frame info.
@@ -226,16 +249,25 @@
226|.define SAVE_PC, 32(sp) 249|.define SAVE_PC, 32(sp)
227|.define SAVE_MULTRES, 28(sp) 250|.define SAVE_MULTRES, 28(sp)
228|.define UNUSED1, 24(sp) 251|.define UNUSED1, 24(sp)
252|.if FPU
229|.define TMPD_LO, 20(sp) 253|.define TMPD_LO, 20(sp)
230|.define TMPD_HI, 16(sp) 254|.define TMPD_HI, 16(sp)
231|.define TONUM_LO, 12(sp) 255|.define TONUM_LO, 12(sp)
232|.define TONUM_HI, 8(sp) 256|.define TONUM_HI, 8(sp)
257|.else
258|.define SFSAVE_4, 20(sp)
259|.define SFSAVE_3, 16(sp)
260|.define SFSAVE_2, 12(sp)
261|.define SFSAVE_1, 8(sp)
262|.endif
233|// Next frame lr: 4(sp) 263|// Next frame lr: 4(sp)
234|// Back chain for sp: 0(sp) <-- sp while in interpreter 264|// Back chain for sp: 0(sp) <-- sp while in interpreter
235| 265|
266|.if FPU
236|.define TMPD_BLO, 23(sp) 267|.define TMPD_BLO, 23(sp)
237|.define TMPD, TMPD_HI 268|.define TMPD, TMPD_HI
238|.define TONUM_D, TONUM_HI 269|.define TONUM_D, TONUM_HI
270|.endif
239| 271|
240|.endif 272|.endif
241| 273|
@@ -245,7 +277,7 @@
245|.else 277|.else
246| stw r..reg, SAVE_GPR_+(reg-14)*4(sp) 278| stw r..reg, SAVE_GPR_+(reg-14)*4(sp)
247|.endif 279|.endif
248| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 280| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
249|.endmacro 281|.endmacro
250|.macro rest_, reg 282|.macro rest_, reg
251|.if GPR64 283|.if GPR64
@@ -253,7 +285,7 @@
253|.else 285|.else
254| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) 286| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp)
255|.endif 287|.endif
256| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 288| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
257|.endmacro 289|.endmacro
258| 290|
259|.macro saveregs 291|.macro saveregs
@@ -323,6 +355,7 @@
323|// Trap for not-yet-implemented parts. 355|// Trap for not-yet-implemented parts.
324|.macro NYI; tw 4, sp, sp; .endmacro 356|.macro NYI; tw 4, sp, sp; .endmacro
325| 357|
358|.if FPU
326|// int/FP conversions. 359|// int/FP conversions.
327|.macro tonum_i, freg, reg 360|.macro tonum_i, freg, reg
328| xoris reg, reg, 0x8000 361| xoris reg, reg, 0x8000
@@ -346,6 +379,7 @@
346|.macro toint, reg, freg 379|.macro toint, reg, freg
347| toint reg, freg, freg 380| toint reg, freg, freg
348|.endmacro 381|.endmacro
382|.endif
349| 383|
350|//----------------------------------------------------------------------- 384|//-----------------------------------------------------------------------
351| 385|
@@ -533,9 +567,19 @@ static void build_subroutines(BuildCtx *ctx)
533 | beq >2 567 | beq >2
534 |1: 568 |1:
535 | addic. TMP1, TMP1, -8 569 | addic. TMP1, TMP1, -8
570 |.if FPU
536 | lfd f0, 0(RA) 571 | lfd f0, 0(RA)
572 |.else
573 | lwz CARG1, 0(RA)
574 | lwz CARG2, 4(RA)
575 |.endif
537 | addi RA, RA, 8 576 | addi RA, RA, 8
577 |.if FPU
538 | stfd f0, 0(BASE) 578 | stfd f0, 0(BASE)
579 |.else
580 | stw CARG1, 0(BASE)
581 | stw CARG2, 4(BASE)
582 |.endif
539 | addi BASE, BASE, 8 583 | addi BASE, BASE, 8
540 | bney <1 584 | bney <1
541 | 585 |
@@ -613,23 +657,23 @@ static void build_subroutines(BuildCtx *ctx)
613 | .toc ld TOCREG, SAVE_TOC 657 | .toc ld TOCREG, SAVE_TOC
614 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 658 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
615 | lp BASE, L->base 659 | lp BASE, L->base
616 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 660 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
617 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 661 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
618 | li ZERO, 0 662 | li ZERO, 0
619 | stw TMP3, TMPD 663 | .FPU stw TMP3, TMPD
620 | li TMP1, LJ_TFALSE 664 | li TMP1, LJ_TFALSE
621 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 665 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
622 | li TISNIL, LJ_TNIL 666 | li TISNIL, LJ_TNIL
623 | li_vmstate INTERP 667 | li_vmstate INTERP
624 | lfs TOBIT, TMPD 668 | .FPU lfs TOBIT, TMPD
625 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. 669 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
626 | la RA, -8(BASE) // Results start at BASE-8. 670 | la RA, -8(BASE) // Results start at BASE-8.
627 | stw TMP3, TMPD 671 | .FPU stw TMP3, TMPD
628 | addi DISPATCH, DISPATCH, GG_G2DISP 672 | addi DISPATCH, DISPATCH, GG_G2DISP
629 | stw TMP1, 0(RA) // Prepend false to error message. 673 | stw TMP1, 0(RA) // Prepend false to error message.
630 | li RD, 16 // 2 results: false + error message. 674 | li RD, 16 // 2 results: false + error message.
631 | st_vmstate 675 | st_vmstate
632 | lfs TONUM, TMPD 676 | .FPU lfs TONUM, TMPD
633 | b ->vm_returnc 677 | b ->vm_returnc
634 | 678 |
635 |//----------------------------------------------------------------------- 679 |//-----------------------------------------------------------------------
@@ -690,22 +734,22 @@ static void build_subroutines(BuildCtx *ctx)
690 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 734 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
691 | lp TMP1, L->top 735 | lp TMP1, L->top
692 | lwz PC, FRAME_PC(BASE) 736 | lwz PC, FRAME_PC(BASE)
693 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 737 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
694 | stb CARG3, L->status 738 | stb CARG3, L->status
695 | stw TMP3, TMPD 739 | .FPU stw TMP3, TMPD
696 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 740 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
697 | lfs TOBIT, TMPD 741 | .FPU lfs TOBIT, TMPD
698 | sub RD, TMP1, BASE 742 | sub RD, TMP1, BASE
699 | stw TMP3, TMPD 743 | .FPU stw TMP3, TMPD
700 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 744 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
701 | addi RD, RD, 8 745 | addi RD, RD, 8
702 | stw TMP0, TONUM_HI 746 | .FPU stw TMP0, TONUM_HI
703 | li_vmstate INTERP 747 | li_vmstate INTERP
704 | li ZERO, 0 748 | li ZERO, 0
705 | st_vmstate 749 | st_vmstate
706 | andix. TMP0, PC, FRAME_TYPE 750 | andix. TMP0, PC, FRAME_TYPE
707 | mr MULTRES, RD 751 | mr MULTRES, RD
708 | lfs TONUM, TMPD 752 | .FPU lfs TONUM, TMPD
709 | li TISNIL, LJ_TNIL 753 | li TISNIL, LJ_TNIL
710 | beq ->BC_RET_Z 754 | beq ->BC_RET_Z
711 | b ->vm_return 755 | b ->vm_return
@@ -739,19 +783,19 @@ static void build_subroutines(BuildCtx *ctx)
739 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). 783 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
740 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 784 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
741 | lp TMP1, L->top 785 | lp TMP1, L->top
742 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 786 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
743 | add PC, PC, BASE 787 | add PC, PC, BASE
744 | stw TMP3, TMPD 788 | .FPU stw TMP3, TMPD
745 | li ZERO, 0 789 | li ZERO, 0
746 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 790 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
747 | lfs TOBIT, TMPD 791 | .FPU lfs TOBIT, TMPD
748 | sub PC, PC, TMP2 // PC = frame delta + frame type 792 | sub PC, PC, TMP2 // PC = frame delta + frame type
749 | stw TMP3, TMPD 793 | .FPU stw TMP3, TMPD
750 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 794 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
751 | sub NARGS8:RC, TMP1, BASE 795 | sub NARGS8:RC, TMP1, BASE
752 | stw TMP0, TONUM_HI 796 | .FPU stw TMP0, TONUM_HI
753 | li_vmstate INTERP 797 | li_vmstate INTERP
754 | lfs TONUM, TMPD 798 | .FPU lfs TONUM, TMPD
755 | li TISNIL, LJ_TNIL 799 | li TISNIL, LJ_TNIL
756 | st_vmstate 800 | st_vmstate
757 | 801 |
@@ -839,15 +883,30 @@ static void build_subroutines(BuildCtx *ctx)
839 | lwz INS, -4(PC) 883 | lwz INS, -4(PC)
840 | subi CARG2, RB, 16 884 | subi CARG2, RB, 16
841 | decode_RB8 SAVE0, INS 885 | decode_RB8 SAVE0, INS
886 |.if FPU
842 | lfd f0, 0(RA) 887 | lfd f0, 0(RA)
888 |.else
889 | lwz TMP2, 0(RA)
890 | lwz TMP3, 4(RA)
891 |.endif
843 | add TMP1, BASE, SAVE0 892 | add TMP1, BASE, SAVE0
844 | stp BASE, L->base 893 | stp BASE, L->base
845 | cmplw TMP1, CARG2 894 | cmplw TMP1, CARG2
846 | sub CARG3, CARG2, TMP1 895 | sub CARG3, CARG2, TMP1
847 | decode_RA8 RA, INS 896 | decode_RA8 RA, INS
897 |.if FPU
848 | stfd f0, 0(CARG2) 898 | stfd f0, 0(CARG2)
899 |.else
900 | stw TMP2, 0(CARG2)
901 | stw TMP3, 4(CARG2)
902 |.endif
849 | bney ->BC_CAT_Z 903 | bney ->BC_CAT_Z
904 |.if FPU
850 | stfdx f0, BASE, RA 905 | stfdx f0, BASE, RA
906 |.else
907 | stwux TMP2, RA, BASE
908 | stw TMP3, 4(RA)
909 |.endif
851 | b ->cont_nop 910 | b ->cont_nop
852 | 911 |
853 |//-- Table indexing metamethods ----------------------------------------- 912 |//-- Table indexing metamethods -----------------------------------------
@@ -900,9 +959,19 @@ static void build_subroutines(BuildCtx *ctx)
900 | // Returns TValue * (finished) or NULL (metamethod). 959 | // Returns TValue * (finished) or NULL (metamethod).
901 | cmplwi CRET1, 0 960 | cmplwi CRET1, 0
902 | beq >3 961 | beq >3
962 |.if FPU
903 | lfd f0, 0(CRET1) 963 | lfd f0, 0(CRET1)
964 |.else
965 | lwz TMP0, 0(CRET1)
966 | lwz TMP1, 4(CRET1)
967 |.endif
904 | ins_next1 968 | ins_next1
969 |.if FPU
905 | stfdx f0, BASE, RA 970 | stfdx f0, BASE, RA
971 |.else
972 | stwux TMP0, RA, BASE
973 | stw TMP1, 4(RA)
974 |.endif
906 | ins_next2 975 | ins_next2
907 | 976 |
908 |3: // Call __index metamethod. 977 |3: // Call __index metamethod.
@@ -920,7 +989,12 @@ static void build_subroutines(BuildCtx *ctx)
920 | // Returns cTValue * or NULL. 989 | // Returns cTValue * or NULL.
921 | cmplwi CRET1, 0 990 | cmplwi CRET1, 0
922 | beq >1 991 | beq >1
992 |.if FPU
923 | lfd f14, 0(CRET1) 993 | lfd f14, 0(CRET1)
994 |.else
995 | lwz SAVE0, 0(CRET1)
996 | lwz SAVE1, 4(CRET1)
997 |.endif
924 | b ->BC_TGETR_Z 998 | b ->BC_TGETR_Z
925 |1: 999 |1:
926 | stwx TISNIL, BASE, RA 1000 | stwx TISNIL, BASE, RA
@@ -975,11 +1049,21 @@ static void build_subroutines(BuildCtx *ctx)
975 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 1049 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
976 | // Returns TValue * (finished) or NULL (metamethod). 1050 | // Returns TValue * (finished) or NULL (metamethod).
977 | cmplwi CRET1, 0 1051 | cmplwi CRET1, 0
1052 |.if FPU
978 | lfdx f0, BASE, RA 1053 | lfdx f0, BASE, RA
1054 |.else
1055 | lwzux TMP2, RA, BASE
1056 | lwz TMP3, 4(RA)
1057 |.endif
979 | beq >3 1058 | beq >3
980 | // NOBARRIER: lj_meta_tset ensures the table is not black. 1059 | // NOBARRIER: lj_meta_tset ensures the table is not black.
981 | ins_next1 1060 | ins_next1
1061 |.if FPU
982 | stfd f0, 0(CRET1) 1062 | stfd f0, 0(CRET1)
1063 |.else
1064 | stw TMP2, 0(CRET1)
1065 | stw TMP3, 4(CRET1)
1066 |.endif
983 | ins_next2 1067 | ins_next2
984 | 1068 |
985 |3: // Call __newindex metamethod. 1069 |3: // Call __newindex metamethod.
@@ -990,7 +1074,12 @@ static void build_subroutines(BuildCtx *ctx)
990 | add PC, TMP1, BASE 1074 | add PC, TMP1, BASE
991 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 1075 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
992 | li NARGS8:RC, 24 // 3 args for func(t, k, v) 1076 | li NARGS8:RC, 24 // 3 args for func(t, k, v)
1077 |.if FPU
993 | stfd f0, 16(BASE) // Copy value to third argument. 1078 | stfd f0, 16(BASE) // Copy value to third argument.
1079 |.else
1080 | stw TMP2, 16(BASE)
1081 | stw TMP3, 20(BASE)
1082 |.endif
994 | b ->vm_call_dispatch_f 1083 | b ->vm_call_dispatch_f
995 | 1084 |
996 |->vmeta_tsetr: 1085 |->vmeta_tsetr:
@@ -998,7 +1087,12 @@ static void build_subroutines(BuildCtx *ctx)
998 | stw PC, SAVE_PC 1087 | stw PC, SAVE_PC
999 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) 1088 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1000 | // Returns TValue *. 1089 | // Returns TValue *.
1090 |.if FPU
1001 | stfd f14, 0(CRET1) 1091 | stfd f14, 0(CRET1)
1092 |.else
1093 | stw SAVE0, 0(CRET1)
1094 | stw SAVE1, 4(CRET1)
1095 |.endif
1002 | b ->cont_nop 1096 | b ->cont_nop
1003 | 1097 |
1004 |//-- Comparison metamethods --------------------------------------------- 1098 |//-- Comparison metamethods ---------------------------------------------
@@ -1037,9 +1131,19 @@ static void build_subroutines(BuildCtx *ctx)
1037 | 1131 |
1038 |->cont_ra: // RA = resultptr 1132 |->cont_ra: // RA = resultptr
1039 | lwz INS, -4(PC) 1133 | lwz INS, -4(PC)
1134 |.if FPU
1040 | lfd f0, 0(RA) 1135 | lfd f0, 0(RA)
1136 |.else
1137 | lwz CARG1, 0(RA)
1138 | lwz CARG2, 4(RA)
1139 |.endif
1041 | decode_RA8 TMP1, INS 1140 | decode_RA8 TMP1, INS
1141 |.if FPU
1042 | stfdx f0, BASE, TMP1 1142 | stfdx f0, BASE, TMP1
1143 |.else
1144 | stwux CARG1, TMP1, BASE
1145 | stw CARG2, 4(TMP1)
1146 |.endif
1043 | b ->cont_nop 1147 | b ->cont_nop
1044 | 1148 |
1045 |->cont_condt: // RA = resultptr 1149 |->cont_condt: // RA = resultptr
@@ -1245,22 +1349,32 @@ static void build_subroutines(BuildCtx *ctx)
1245 |.macro .ffunc_n, name 1349 |.macro .ffunc_n, name
1246 |->ff_ .. name: 1350 |->ff_ .. name:
1247 | cmplwi NARGS8:RC, 8 1351 | cmplwi NARGS8:RC, 8
1248 | lwz CARG3, 0(BASE) 1352 | lwz CARG1, 0(BASE)
1353 |.if FPU
1249 | lfd FARG1, 0(BASE) 1354 | lfd FARG1, 0(BASE)
1355 |.else
1356 | lwz CARG2, 4(BASE)
1357 |.endif
1250 | blt ->fff_fallback 1358 | blt ->fff_fallback
1251 | checknum CARG3; bge ->fff_fallback 1359 | checknum CARG1; bge ->fff_fallback
1252 |.endmacro 1360 |.endmacro
1253 | 1361 |
1254 |.macro .ffunc_nn, name 1362 |.macro .ffunc_nn, name
1255 |->ff_ .. name: 1363 |->ff_ .. name:
1256 | cmplwi NARGS8:RC, 16 1364 | cmplwi NARGS8:RC, 16
1257 | lwz CARG3, 0(BASE) 1365 | lwz CARG1, 0(BASE)
1366 |.if FPU
1258 | lfd FARG1, 0(BASE) 1367 | lfd FARG1, 0(BASE)
1259 | lwz CARG4, 8(BASE) 1368 | lwz CARG3, 8(BASE)
1260 | lfd FARG2, 8(BASE) 1369 | lfd FARG2, 8(BASE)
1370 |.else
1371 | lwz CARG2, 4(BASE)
1372 | lwz CARG3, 8(BASE)
1373 | lwz CARG4, 12(BASE)
1374 |.endif
1261 | blt ->fff_fallback 1375 | blt ->fff_fallback
1376 | checknum CARG1; bge ->fff_fallback
1262 | checknum CARG3; bge ->fff_fallback 1377 | checknum CARG3; bge ->fff_fallback
1263 | checknum CARG4; bge ->fff_fallback
1264 |.endmacro 1378 |.endmacro
1265 | 1379 |
1266 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. 1380 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
@@ -1281,14 +1395,21 @@ static void build_subroutines(BuildCtx *ctx)
1281 | bge cr1, ->fff_fallback 1395 | bge cr1, ->fff_fallback
1282 | stw CARG3, 0(RA) 1396 | stw CARG3, 0(RA)
1283 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. 1397 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
1398 | addi TMP1, BASE, 8
1399 | add TMP2, RA, NARGS8:RC
1284 | stw CARG1, 4(RA) 1400 | stw CARG1, 4(RA)
1285 | beq ->fff_res // Done if exactly 1 argument. 1401 | beq ->fff_res // Done if exactly 1 argument.
1286 | li TMP1, 8
1287 | subi RC, RC, 8
1288 |1: 1402 |1:
1289 | cmplw TMP1, RC 1403 | cmplw TMP1, TMP2
1290 | lfdx f0, BASE, TMP1 1404 |.if FPU
1291 | stfdx f0, RA, TMP1 1405 | lfd f0, 0(TMP1)
1406 | stfd f0, 0(TMP1)
1407 |.else
1408 | lwz CARG1, 0(TMP1)
1409 | lwz CARG2, 4(TMP1)
1410 | stw CARG1, -8(TMP1)
1411 | stw CARG2, -4(TMP1)
1412 |.endif
1292 | addi TMP1, TMP1, 8 1413 | addi TMP1, TMP1, 8
1293 | bney <1 1414 | bney <1
1294 | b ->fff_res 1415 | b ->fff_res
@@ -1303,8 +1424,14 @@ static void build_subroutines(BuildCtx *ctx)
1303 | orc TMP1, TMP2, TMP0 1424 | orc TMP1, TMP2, TMP0
1304 | addi TMP1, TMP1, ~LJ_TISNUM+1 1425 | addi TMP1, TMP1, ~LJ_TISNUM+1
1305 | slwi TMP1, TMP1, 3 1426 | slwi TMP1, TMP1, 3
1427 |.if FPU
1306 | la TMP2, CFUNC:RB->upvalue 1428 | la TMP2, CFUNC:RB->upvalue
1307 | lfdx FARG1, TMP2, TMP1 1429 | lfdx FARG1, TMP2, TMP1
1430 |.else
1431 | add TMP1, CFUNC:RB, TMP1
1432 | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi
1433 | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo
1434 |.endif
1308 | b ->fff_resn 1435 | b ->fff_resn
1309 | 1436 |
1310 |//-- Base library: getters and setters --------------------------------- 1437 |//-- Base library: getters and setters ---------------------------------
@@ -1382,7 +1509,12 @@ static void build_subroutines(BuildCtx *ctx)
1382 | mr CARG1, L 1509 | mr CARG1, L
1383 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1510 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1384 | // Returns cTValue *. 1511 | // Returns cTValue *.
1512 |.if FPU
1385 | lfd FARG1, 0(CRET1) 1513 | lfd FARG1, 0(CRET1)
1514 |.else
1515 | lwz CARG2, 4(CRET1)
1516 | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1.
1517 |.endif
1386 | b ->fff_resn 1518 | b ->fff_resn
1387 | 1519 |
1388 |//-- Base library: conversions ------------------------------------------ 1520 |//-- Base library: conversions ------------------------------------------
@@ -1391,7 +1523,11 @@ static void build_subroutines(BuildCtx *ctx)
1391 | // Only handles the number case inline (without a base argument). 1523 | // Only handles the number case inline (without a base argument).
1392 | cmplwi NARGS8:RC, 8 1524 | cmplwi NARGS8:RC, 8
1393 | lwz CARG1, 0(BASE) 1525 | lwz CARG1, 0(BASE)
1526 |.if FPU
1394 | lfd FARG1, 0(BASE) 1527 | lfd FARG1, 0(BASE)
1528 |.else
1529 | lwz CARG2, 4(BASE)
1530 |.endif
1395 | bne ->fff_fallback // Exactly one argument. 1531 | bne ->fff_fallback // Exactly one argument.
1396 | checknum CARG1; bgt ->fff_fallback 1532 | checknum CARG1; bgt ->fff_fallback
1397 | b ->fff_resn 1533 | b ->fff_resn
@@ -1442,12 +1578,23 @@ static void build_subroutines(BuildCtx *ctx)
1442 | cmplwi CRET1, 0 1578 | cmplwi CRET1, 0
1443 | li CARG3, LJ_TNIL 1579 | li CARG3, LJ_TNIL
1444 | beq ->fff_restv // End of traversal: return nil. 1580 | beq ->fff_restv // End of traversal: return nil.
1445 | lfd f0, 8(BASE) // Copy key and value to results.
1446 | la RA, -8(BASE) 1581 | la RA, -8(BASE)
1582 |.if FPU
1583 | lfd f0, 8(BASE) // Copy key and value to results.
1447 | lfd f1, 16(BASE) 1584 | lfd f1, 16(BASE)
1448 | stfd f0, 0(RA) 1585 | stfd f0, 0(RA)
1449 | li RD, (2+1)*8
1450 | stfd f1, 8(RA) 1586 | stfd f1, 8(RA)
1587 |.else
1588 | lwz CARG1, 8(BASE)
1589 | lwz CARG2, 12(BASE)
1590 | lwz CARG3, 16(BASE)
1591 | lwz CARG4, 20(BASE)
1592 | stw CARG1, 0(RA)
1593 | stw CARG2, 4(RA)
1594 | stw CARG3, 8(RA)
1595 | stw CARG4, 12(RA)
1596 |.endif
1597 | li RD, (2+1)*8
1451 | b ->fff_res 1598 | b ->fff_res
1452 | 1599 |
1453 |.ffunc_1 pairs 1600 |.ffunc_1 pairs
@@ -1456,17 +1603,32 @@ static void build_subroutines(BuildCtx *ctx)
1456 | bne ->fff_fallback 1603 | bne ->fff_fallback
1457#if LJ_52 1604#if LJ_52
1458 | lwz TAB:TMP2, TAB:CARG1->metatable 1605 | lwz TAB:TMP2, TAB:CARG1->metatable
1606 |.if FPU
1459 | lfd f0, CFUNC:RB->upvalue[0] 1607 | lfd f0, CFUNC:RB->upvalue[0]
1608 |.else
1609 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1610 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1611 |.endif
1460 | cmplwi TAB:TMP2, 0 1612 | cmplwi TAB:TMP2, 0
1461 | la RA, -8(BASE) 1613 | la RA, -8(BASE)
1462 | bne ->fff_fallback 1614 | bne ->fff_fallback
1463#else 1615#else
1616 |.if FPU
1464 | lfd f0, CFUNC:RB->upvalue[0] 1617 | lfd f0, CFUNC:RB->upvalue[0]
1618 |.else
1619 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1620 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1621 |.endif
1465 | la RA, -8(BASE) 1622 | la RA, -8(BASE)
1466#endif 1623#endif
1467 | stw TISNIL, 8(BASE) 1624 | stw TISNIL, 8(BASE)
1468 | li RD, (3+1)*8 1625 | li RD, (3+1)*8
1626 |.if FPU
1469 | stfd f0, 0(RA) 1627 | stfd f0, 0(RA)
1628 |.else
1629 | stw TMP0, 0(RA)
1630 | stw TMP1, 4(RA)
1631 |.endif
1470 | b ->fff_res 1632 | b ->fff_res
1471 | 1633 |
1472 |.ffunc ipairs_aux 1634 |.ffunc ipairs_aux
@@ -1512,14 +1674,24 @@ static void build_subroutines(BuildCtx *ctx)
1512 | stfd FARG2, 0(RA) 1674 | stfd FARG2, 0(RA)
1513 |.endif 1675 |.endif
1514 | ble >2 // Not in array part? 1676 | ble >2 // Not in array part?
1677 |.if FPU
1515 | lwzx TMP2, TMP1, TMP3 1678 | lwzx TMP2, TMP1, TMP3
1516 | lfdx f0, TMP1, TMP3 1679 | lfdx f0, TMP1, TMP3
1680 |.else
1681 | lwzux TMP2, TMP1, TMP3
1682 | lwz TMP3, 4(TMP1)
1683 |.endif
1517 |1: 1684 |1:
1518 | checknil TMP2 1685 | checknil TMP2
1519 | li RD, (0+1)*8 1686 | li RD, (0+1)*8
1520 | beq ->fff_res // End of iteration, return 0 results. 1687 | beq ->fff_res // End of iteration, return 0 results.
1521 | li RD, (2+1)*8 1688 | li RD, (2+1)*8
1689 |.if FPU
1522 | stfd f0, 8(RA) 1690 | stfd f0, 8(RA)
1691 |.else
1692 | stw TMP2, 8(RA)
1693 | stw TMP3, 12(RA)
1694 |.endif
1523 | b ->fff_res 1695 | b ->fff_res
1524 |2: // Check for empty hash part first. Otherwise call C function. 1696 |2: // Check for empty hash part first. Otherwise call C function.
1525 | lwz TMP0, TAB:CARG1->hmask 1697 | lwz TMP0, TAB:CARG1->hmask
@@ -1533,7 +1705,11 @@ static void build_subroutines(BuildCtx *ctx)
1533 | li RD, (0+1)*8 1705 | li RD, (0+1)*8
1534 | beq ->fff_res 1706 | beq ->fff_res
1535 | lwz TMP2, 0(CRET1) 1707 | lwz TMP2, 0(CRET1)
1708 |.if FPU
1536 | lfd f0, 0(CRET1) 1709 | lfd f0, 0(CRET1)
1710 |.else
1711 | lwz TMP3, 4(CRET1)
1712 |.endif
1537 | b <1 1713 | b <1
1538 | 1714 |
1539 |.ffunc_1 ipairs 1715 |.ffunc_1 ipairs
@@ -1542,12 +1718,22 @@ static void build_subroutines(BuildCtx *ctx)
1542 | bne ->fff_fallback 1718 | bne ->fff_fallback
1543#if LJ_52 1719#if LJ_52
1544 | lwz TAB:TMP2, TAB:CARG1->metatable 1720 | lwz TAB:TMP2, TAB:CARG1->metatable
1721 |.if FPU
1545 | lfd f0, CFUNC:RB->upvalue[0] 1722 | lfd f0, CFUNC:RB->upvalue[0]
1723 |.else
1724 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1725 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1726 |.endif
1546 | cmplwi TAB:TMP2, 0 1727 | cmplwi TAB:TMP2, 0
1547 | la RA, -8(BASE) 1728 | la RA, -8(BASE)
1548 | bne ->fff_fallback 1729 | bne ->fff_fallback
1549#else 1730#else
1731 |.if FPU
1550 | lfd f0, CFUNC:RB->upvalue[0] 1732 | lfd f0, CFUNC:RB->upvalue[0]
1733 |.else
1734 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1735 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1736 |.endif
1551 | la RA, -8(BASE) 1737 | la RA, -8(BASE)
1552#endif 1738#endif
1553 |.if DUALNUM 1739 |.if DUALNUM
@@ -1557,7 +1743,12 @@ static void build_subroutines(BuildCtx *ctx)
1557 |.endif 1743 |.endif
1558 | stw ZERO, 12(BASE) 1744 | stw ZERO, 12(BASE)
1559 | li RD, (3+1)*8 1745 | li RD, (3+1)*8
1746 |.if FPU
1560 | stfd f0, 0(RA) 1747 | stfd f0, 0(RA)
1748 |.else
1749 | stw TMP0, 0(RA)
1750 | stw TMP1, 4(RA)
1751 |.endif
1561 | b ->fff_res 1752 | b ->fff_res
1562 | 1753 |
1563 |//-- Base library: catch errors ---------------------------------------- 1754 |//-- Base library: catch errors ----------------------------------------
@@ -1576,19 +1767,32 @@ static void build_subroutines(BuildCtx *ctx)
1576 | 1767 |
1577 |.ffunc xpcall 1768 |.ffunc xpcall
1578 | cmplwi NARGS8:RC, 16 1769 | cmplwi NARGS8:RC, 16
1579 | lwz CARG4, 8(BASE) 1770 | lwz CARG3, 8(BASE)
1771 |.if FPU
1580 | lfd FARG2, 8(BASE) 1772 | lfd FARG2, 8(BASE)
1581 | lfd FARG1, 0(BASE) 1773 | lfd FARG1, 0(BASE)
1774 |.else
1775 | lwz CARG1, 0(BASE)
1776 | lwz CARG2, 4(BASE)
1777 | lwz CARG4, 12(BASE)
1778 |.endif
1582 | blt ->fff_fallback 1779 | blt ->fff_fallback
1583 | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) 1780 | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1584 | mr TMP2, BASE 1781 | mr TMP2, BASE
1585 | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. 1782 | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function.
1586 | la BASE, 16(BASE) 1783 | la BASE, 16(BASE)
1587 | // Remember active hook before pcall. 1784 | // Remember active hook before pcall.
1588 | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 1785 | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31
1786 |.if FPU
1589 | stfd FARG2, 0(TMP2) // Swap function and traceback. 1787 | stfd FARG2, 0(TMP2) // Swap function and traceback.
1590 | subi NARGS8:RC, NARGS8:RC, 16
1591 | stfd FARG1, 8(TMP2) 1788 | stfd FARG1, 8(TMP2)
1789 |.else
1790 | stw CARG3, 0(TMP2)
1791 | stw CARG4, 4(TMP2)
1792 | stw CARG1, 8(TMP2)
1793 | stw CARG2, 12(TMP2)
1794 |.endif
1795 | subi NARGS8:RC, NARGS8:RC, 16
1592 | addi PC, TMP1, 16+FRAME_PCALL 1796 | addi PC, TMP1, 16+FRAME_PCALL
1593 | b ->vm_call_dispatch 1797 | b ->vm_call_dispatch
1594 | 1798 |
@@ -1631,9 +1835,21 @@ static void build_subroutines(BuildCtx *ctx)
1631 | stp BASE, L->top 1835 | stp BASE, L->top
1632 |2: // Move args to coroutine. 1836 |2: // Move args to coroutine.
1633 | cmpw TMP1, NARGS8:RC 1837 | cmpw TMP1, NARGS8:RC
1838 |.if FPU
1634 | lfdx f0, BASE, TMP1 1839 | lfdx f0, BASE, TMP1
1840 |.else
1841 | add CARG3, BASE, TMP1
1842 | lwz TMP2, 0(CARG3)
1843 | lwz TMP3, 4(CARG3)
1844 |.endif
1635 | beq >3 1845 | beq >3
1846 |.if FPU
1636 | stfdx f0, CARG2, TMP1 1847 | stfdx f0, CARG2, TMP1
1848 |.else
1849 | add CARG3, CARG2, TMP1
1850 | stw TMP2, 0(CARG3)
1851 | stw TMP3, 4(CARG3)
1852 |.endif
1637 | addi TMP1, TMP1, 8 1853 | addi TMP1, TMP1, 8
1638 | b <2 1854 | b <2
1639 |3: 1855 |3:
@@ -1664,8 +1880,17 @@ static void build_subroutines(BuildCtx *ctx)
1664 | stp TMP2, L:SAVE0->top // Clear coroutine stack. 1880 | stp TMP2, L:SAVE0->top // Clear coroutine stack.
1665 |5: // Move results from coroutine. 1881 |5: // Move results from coroutine.
1666 | cmplw TMP1, TMP3 1882 | cmplw TMP1, TMP3
1883 |.if FPU
1667 | lfdx f0, TMP2, TMP1 1884 | lfdx f0, TMP2, TMP1
1668 | stfdx f0, BASE, TMP1 1885 | stfdx f0, BASE, TMP1
1886 |.else
1887 | add CARG3, TMP2, TMP1
1888 | lwz CARG1, 0(CARG3)
1889 | lwz CARG2, 4(CARG3)
1890 | add CARG3, BASE, TMP1
1891 | stw CARG1, 0(CARG3)
1892 | stw CARG2, 4(CARG3)
1893 |.endif
1669 | addi TMP1, TMP1, 8 1894 | addi TMP1, TMP1, 8
1670 | bne <5 1895 | bne <5
1671 |6: 1896 |6:
@@ -1690,12 +1915,22 @@ static void build_subroutines(BuildCtx *ctx)
1690 | andix. TMP0, PC, FRAME_TYPE 1915 | andix. TMP0, PC, FRAME_TYPE
1691 | la TMP3, -8(TMP3) 1916 | la TMP3, -8(TMP3)
1692 | li TMP1, LJ_TFALSE 1917 | li TMP1, LJ_TFALSE
1918 |.if FPU
1693 | lfd f0, 0(TMP3) 1919 | lfd f0, 0(TMP3)
1920 |.else
1921 | lwz CARG1, 0(TMP3)
1922 | lwz CARG2, 4(TMP3)
1923 |.endif
1694 | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. 1924 | stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
1695 | li RD, (2+1)*8 1925 | li RD, (2+1)*8
1696 | stw TMP1, -8(BASE) // Prepend false to results. 1926 | stw TMP1, -8(BASE) // Prepend false to results.
1697 | la RA, -8(BASE) 1927 | la RA, -8(BASE)
1928 |.if FPU
1698 | stfd f0, 0(BASE) // Copy error message. 1929 | stfd f0, 0(BASE) // Copy error message.
1930 |.else
1931 | stw CARG1, 0(BASE) // Copy error message.
1932 | stw CARG2, 4(BASE)
1933 |.endif
1699 | b <7 1934 | b <7
1700 |.else 1935 |.else
1701 | mr CARG1, L 1936 | mr CARG1, L
@@ -1874,7 +2109,12 @@ static void build_subroutines(BuildCtx *ctx)
1874 | lus CARG1, 0x8000 // -(2^31). 2109 | lus CARG1, 0x8000 // -(2^31).
1875 | beqy ->fff_resi 2110 | beqy ->fff_resi
1876 |5: 2111 |5:
2112 |.if FPU
1877 | lfd FARG1, 0(BASE) 2113 | lfd FARG1, 0(BASE)
2114 |.else
2115 | lwz CARG1, 0(BASE)
2116 | lwz CARG2, 4(BASE)
2117 |.endif
1878 | blex func 2118 | blex func
1879 | b ->fff_resn 2119 | b ->fff_resn
1880 |.endmacro 2120 |.endmacro
@@ -1898,10 +2138,14 @@ static void build_subroutines(BuildCtx *ctx)
1898 | 2138 |
1899 |.ffunc math_log 2139 |.ffunc math_log
1900 | cmplwi NARGS8:RC, 8 2140 | cmplwi NARGS8:RC, 8
1901 | lwz CARG3, 0(BASE) 2141 | lwz CARG1, 0(BASE)
1902 | lfd FARG1, 0(BASE)
1903 | bne ->fff_fallback // Need exactly 1 argument. 2142 | bne ->fff_fallback // Need exactly 1 argument.
1904 | checknum CARG3; bge ->fff_fallback 2143 | checknum CARG1; bge ->fff_fallback
2144 |.if FPU
2145 | lfd FARG1, 0(BASE)
2146 |.else
2147 | lwz CARG2, 4(BASE)
2148 |.endif
1905 | blex log 2149 | blex log
1906 | b ->fff_resn 2150 | b ->fff_resn
1907 | 2151 |
@@ -1923,17 +2167,24 @@ static void build_subroutines(BuildCtx *ctx)
1923 |.if DUALNUM 2167 |.if DUALNUM
1924 |.ffunc math_ldexp 2168 |.ffunc math_ldexp
1925 | cmplwi NARGS8:RC, 16 2169 | cmplwi NARGS8:RC, 16
1926 | lwz CARG3, 0(BASE) 2170 | lwz TMP0, 0(BASE)
2171 |.if FPU
1927 | lfd FARG1, 0(BASE) 2172 | lfd FARG1, 0(BASE)
1928 | lwz CARG4, 8(BASE) 2173 |.else
2174 | lwz CARG1, 0(BASE)
2175 | lwz CARG2, 4(BASE)
2176 |.endif
2177 | lwz TMP1, 8(BASE)
1929 |.if GPR64 2178 |.if GPR64
1930 | lwz CARG2, 12(BASE) 2179 | lwz CARG2, 12(BASE)
1931 |.else 2180 |.elif FPU
1932 | lwz CARG1, 12(BASE) 2181 | lwz CARG1, 12(BASE)
2182 |.else
2183 | lwz CARG3, 12(BASE)
1933 |.endif 2184 |.endif
1934 | blt ->fff_fallback 2185 | blt ->fff_fallback
1935 | checknum CARG3; bge ->fff_fallback 2186 | checknum TMP0; bge ->fff_fallback
1936 | checknum CARG4; bne ->fff_fallback 2187 | checknum TMP1; bne ->fff_fallback
1937 |.else 2188 |.else
1938 |.ffunc_nn math_ldexp 2189 |.ffunc_nn math_ldexp
1939 |.if GPR64 2190 |.if GPR64
@@ -1948,8 +2199,10 @@ static void build_subroutines(BuildCtx *ctx)
1948 |.ffunc_n math_frexp 2199 |.ffunc_n math_frexp
1949 |.if GPR64 2200 |.if GPR64
1950 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 2201 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
1951 |.else 2202 |.elif FPU
1952 | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) 2203 | la CARG1, DISPATCH_GL(tmptv)(DISPATCH)
2204 |.else
2205 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
1953 |.endif 2206 |.endif
1954 | lwz PC, FRAME_PC(BASE) 2207 | lwz PC, FRAME_PC(BASE)
1955 | blex frexp 2208 | blex frexp
@@ -1958,7 +2211,12 @@ static void build_subroutines(BuildCtx *ctx)
1958 |.if not DUALNUM 2211 |.if not DUALNUM
1959 | tonum_i FARG2, TMP1 2212 | tonum_i FARG2, TMP1
1960 |.endif 2213 |.endif
2214 |.if FPU
1961 | stfd FARG1, 0(RA) 2215 | stfd FARG1, 0(RA)
2216 |.else
2217 | stw CRET1, 0(RA)
2218 | stw CRET2, 4(RA)
2219 |.endif
1962 | li RD, (2+1)*8 2220 | li RD, (2+1)*8
1963 |.if DUALNUM 2221 |.if DUALNUM
1964 | stw TISNUM, 8(RA) 2222 | stw TISNUM, 8(RA)
@@ -1971,13 +2229,20 @@ static void build_subroutines(BuildCtx *ctx)
1971 |.ffunc_n math_modf 2229 |.ffunc_n math_modf
1972 |.if GPR64 2230 |.if GPR64
1973 | la CARG2, -8(BASE) 2231 | la CARG2, -8(BASE)
1974 |.else 2232 |.elif FPU
1975 | la CARG1, -8(BASE) 2233 | la CARG1, -8(BASE)
2234 |.else
2235 | la CARG3, -8(BASE)
1976 |.endif 2236 |.endif
1977 | lwz PC, FRAME_PC(BASE) 2237 | lwz PC, FRAME_PC(BASE)
1978 | blex modf 2238 | blex modf
1979 | la RA, -8(BASE) 2239 | la RA, -8(BASE)
2240 |.if FPU
1980 | stfd FARG1, 0(BASE) 2241 | stfd FARG1, 0(BASE)
2242 |.else
2243 | stw CRET1, 0(BASE)
2244 | stw CRET2, 4(BASE)
2245 |.endif
1981 | li RD, (2+1)*8 2246 | li RD, (2+1)*8
1982 | b ->fff_res 2247 | b ->fff_res
1983 | 2248 |
@@ -1985,13 +2250,13 @@ static void build_subroutines(BuildCtx *ctx)
1985 |.if DUALNUM 2250 |.if DUALNUM
1986 | .ffunc_1 name 2251 | .ffunc_1 name
1987 | checknum CARG3 2252 | checknum CARG3
1988 | addi TMP1, BASE, 8 2253 | addi SAVE0, BASE, 8
1989 | add TMP2, BASE, NARGS8:RC 2254 | add SAVE1, BASE, NARGS8:RC
1990 | bne >4 2255 | bne >4
1991 |1: // Handle integers. 2256 |1: // Handle integers.
1992 | lwz CARG4, 0(TMP1) 2257 | lwz CARG4, 0(SAVE0)
1993 | cmplw cr1, TMP1, TMP2 2258 | cmplw cr1, SAVE0, SAVE1
1994 | lwz CARG2, 4(TMP1) 2259 | lwz CARG2, 4(SAVE0)
1995 | bge cr1, ->fff_resi 2260 | bge cr1, ->fff_resi
1996 | checknum CARG4 2261 | checknum CARG4
1997 | xoris TMP0, CARG1, 0x8000 2262 | xoris TMP0, CARG1, 0x8000
@@ -2008,36 +2273,76 @@ static void build_subroutines(BuildCtx *ctx)
2008 |.if GPR64 2273 |.if GPR64
2009 | rldicl CARG1, CARG1, 0, 32 2274 | rldicl CARG1, CARG1, 0, 32
2010 |.endif 2275 |.endif
2011 | addi TMP1, TMP1, 8 2276 | addi SAVE0, SAVE0, 8
2012 | b <1 2277 | b <1
2013 |3: 2278 |3:
2014 | bge ->fff_fallback 2279 | bge ->fff_fallback
2015 | // Convert intermediate result to number and continue below. 2280 | // Convert intermediate result to number and continue below.
2281 |.if FPU
2016 | tonum_i FARG1, CARG1 2282 | tonum_i FARG1, CARG1
2017 | lfd FARG2, 0(TMP1) 2283 | lfd FARG2, 0(SAVE0)
2284 |.else
2285 | mr CARG2, CARG1
2286 | bl ->vm_sfi2d_1
2287 | lwz CARG3, 0(SAVE0)
2288 | lwz CARG4, 4(SAVE0)
2289 |.endif
2018 | b >6 2290 | b >6
2019 |4: 2291 |4:
2292 |.if FPU
2020 | lfd FARG1, 0(BASE) 2293 | lfd FARG1, 0(BASE)
2294 |.else
2295 | lwz CARG1, 0(BASE)
2296 | lwz CARG2, 4(BASE)
2297 |.endif
2021 | bge ->fff_fallback 2298 | bge ->fff_fallback
2022 |5: // Handle numbers. 2299 |5: // Handle numbers.
2023 | lwz CARG4, 0(TMP1) 2300 | lwz CARG3, 0(SAVE0)
2024 | cmplw cr1, TMP1, TMP2 2301 | cmplw cr1, SAVE0, SAVE1
2025 | lfd FARG2, 0(TMP1) 2302 |.if FPU
2303 | lfd FARG2, 0(SAVE0)
2304 |.else
2305 | lwz CARG4, 4(SAVE0)
2306 |.endif
2026 | bge cr1, ->fff_resn 2307 | bge cr1, ->fff_resn
2027 | checknum CARG4; bge >7 2308 | checknum CARG3; bge >7
2028 |6: 2309 |6:
2310 | addi SAVE0, SAVE0, 8
2311 |.if FPU
2029 | fsub f0, FARG1, FARG2 2312 | fsub f0, FARG1, FARG2
2030 | addi TMP1, TMP1, 8
2031 |.if ismax 2313 |.if ismax
2032 | fsel FARG1, f0, FARG1, FARG2 2314 | fsel FARG1, f0, FARG1, FARG2
2033 |.else 2315 |.else
2034 | fsel FARG1, f0, FARG2, FARG1 2316 | fsel FARG1, f0, FARG2, FARG1
2035 |.endif 2317 |.endif
2318 |.else
2319 | stw CARG1, SFSAVE_1
2320 | stw CARG2, SFSAVE_2
2321 | stw CARG3, SFSAVE_3
2322 | stw CARG4, SFSAVE_4
2323 | blex __ledf2
2324 | cmpwi CRET1, 0
2325 |.if ismax
2326 | blt >8
2327 |.else
2328 | bge >8
2329 |.endif
2330 | lwz CARG1, SFSAVE_1
2331 | lwz CARG2, SFSAVE_2
2332 | b <5
2333 |8:
2334 | lwz CARG1, SFSAVE_3
2335 | lwz CARG2, SFSAVE_4
2336 |.endif
2036 | b <5 2337 | b <5
2037 |7: // Convert integer to number and continue above. 2338 |7: // Convert integer to number and continue above.
2038 | lwz CARG2, 4(TMP1) 2339 | lwz CARG3, 4(SAVE0)
2039 | bne ->fff_fallback 2340 | bne ->fff_fallback
2040 | tonum_i FARG2, CARG2 2341 |.if FPU
2342 | tonum_i FARG2, CARG3
2343 |.else
2344 | bl ->vm_sfi2d_2
2345 |.endif
2041 | b <6 2346 | b <6
2042 |.else 2347 |.else
2043 | .ffunc_n name 2348 | .ffunc_n name
@@ -2237,28 +2542,37 @@ static void build_subroutines(BuildCtx *ctx)
2237 | 2542 |
2238 |.macro .ffunc_bit_op, name, ins 2543 |.macro .ffunc_bit_op, name, ins
2239 | .ffunc_bit name 2544 | .ffunc_bit name
2240 | addi TMP1, BASE, 8 2545 | addi SAVE0, BASE, 8
2241 | add TMP2, BASE, NARGS8:RC 2546 | add SAVE1, BASE, NARGS8:RC
2242 |1: 2547 |1:
2243 | lwz CARG4, 0(TMP1) 2548 | lwz CARG4, 0(SAVE0)
2244 | cmplw cr1, TMP1, TMP2 2549 | cmplw cr1, SAVE0, SAVE1
2245 |.if DUALNUM 2550 |.if DUALNUM
2246 | lwz CARG2, 4(TMP1) 2551 | lwz CARG2, 4(SAVE0)
2247 |.else 2552 |.else
2248 | lfd FARG1, 0(TMP1) 2553 | lfd FARG1, 0(SAVE0)
2249 |.endif 2554 |.endif
2250 | bgey cr1, ->fff_resi 2555 | bgey cr1, ->fff_resi
2251 | checknum CARG4 2556 | checknum CARG4
2252 |.if DUALNUM 2557 |.if DUALNUM
2558 |.if FPU
2253 | bnel ->fff_bitop_fb 2559 | bnel ->fff_bitop_fb
2254 |.else 2560 |.else
2561 | beq >3
2562 | stw CARG1, SFSAVE_1
2563 | bl ->fff_bitop_fb
2564 | mr CARG2, CARG1
2565 | lwz CARG1, SFSAVE_1
2566 |3:
2567 |.endif
2568 |.else
2255 | fadd FARG1, FARG1, TOBIT 2569 | fadd FARG1, FARG1, TOBIT
2256 | bge ->fff_fallback 2570 | bge ->fff_fallback
2257 | stfd FARG1, TMPD 2571 | stfd FARG1, TMPD
2258 | lwz CARG2, TMPD_LO 2572 | lwz CARG2, TMPD_LO
2259 |.endif 2573 |.endif
2260 | ins CARG1, CARG1, CARG2 2574 | ins CARG1, CARG1, CARG2
2261 | addi TMP1, TMP1, 8 2575 | addi SAVE0, SAVE0, 8
2262 | b <1 2576 | b <1
2263 |.endmacro 2577 |.endmacro
2264 | 2578 |
@@ -2280,7 +2594,14 @@ static void build_subroutines(BuildCtx *ctx)
2280 |.macro .ffunc_bit_sh, name, ins, shmod 2594 |.macro .ffunc_bit_sh, name, ins, shmod
2281 |.if DUALNUM 2595 |.if DUALNUM
2282 | .ffunc_2 bit_..name 2596 | .ffunc_2 bit_..name
2597 |.if FPU
2283 | checknum CARG3; bnel ->fff_tobit_fb 2598 | checknum CARG3; bnel ->fff_tobit_fb
2599 |.else
2600 | checknum CARG3; beq >1
2601 | bl ->fff_tobit_fb
2602 | lwz CARG2, 12(BASE) // Conversion polluted CARG2.
2603 |1:
2604 |.endif
2284 | // Note: no inline conversion from number for 2nd argument! 2605 | // Note: no inline conversion from number for 2nd argument!
2285 | checknum CARG4; bne ->fff_fallback 2606 | checknum CARG4; bne ->fff_fallback
2286 |.else 2607 |.else
@@ -2317,27 +2638,77 @@ static void build_subroutines(BuildCtx *ctx)
2317 |->fff_resn: 2638 |->fff_resn:
2318 | lwz PC, FRAME_PC(BASE) 2639 | lwz PC, FRAME_PC(BASE)
2319 | la RA, -8(BASE) 2640 | la RA, -8(BASE)
2641 |.if FPU
2320 | stfd FARG1, -8(BASE) 2642 | stfd FARG1, -8(BASE)
2643 |.else
2644 | stw CARG1, -8(BASE)
2645 | stw CARG2, -4(BASE)
2646 |.endif
2321 | b ->fff_res1 2647 | b ->fff_res1
2322 | 2648 |
2323 |// Fallback FP number to bit conversion. 2649 |// Fallback FP number to bit conversion.
2324 |->fff_tobit_fb: 2650 |->fff_tobit_fb:
2325 |.if DUALNUM 2651 |.if DUALNUM
2652 |.if FPU
2326 | lfd FARG1, 0(BASE) 2653 | lfd FARG1, 0(BASE)
2327 | bgt ->fff_fallback 2654 | bgt ->fff_fallback
2328 | fadd FARG1, FARG1, TOBIT 2655 | fadd FARG1, FARG1, TOBIT
2329 | stfd FARG1, TMPD 2656 | stfd FARG1, TMPD
2330 | lwz CARG1, TMPD_LO 2657 | lwz CARG1, TMPD_LO
2331 | blr 2658 | blr
2659 |.else
2660 | bgt ->fff_fallback
2661 | mr CARG2, CARG1
2662 | mr CARG1, CARG3
2663 |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2.
2664 |->vm_tobit:
2665 | slwi TMP2, CARG1, 1
2666 | addis TMP2, TMP2, 0x0020
2667 | cmpwi TMP2, 0
2668 | bge >2
2669 | li TMP1, 0x3e0
2670 | srawi TMP2, TMP2, 21
2671 | not TMP1, TMP1
2672 | sub. TMP2, TMP1, TMP2
2673 | cmpwi cr7, CARG1, 0
2674 | blt >1
2675 | slwi TMP1, CARG1, 11
2676 | srwi TMP0, CARG2, 21
2677 | oris TMP1, TMP1, 0x8000
2678 | or TMP1, TMP1, TMP0
2679 | srw CARG1, TMP1, TMP2
2680 | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
2681 | neg CARG1, CARG1
2682 | blr
2683 |1:
2684 | addi TMP2, TMP2, 21
2685 | srw TMP1, CARG2, TMP2
2686 | slwi CARG2, CARG1, 12
2687 | subfic TMP2, TMP2, 20
2688 | slw TMP0, CARG2, TMP2
2689 | or CARG1, TMP1, TMP0
2690 | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
2691 | neg CARG1, CARG1
2692 | blr
2693 |2:
2694 | li CARG1, 0
2695 | blr
2696 |.endif
2332 |.endif 2697 |.endif
2333 |->fff_bitop_fb: 2698 |->fff_bitop_fb:
2334 |.if DUALNUM 2699 |.if DUALNUM
2335 | lfd FARG1, 0(TMP1) 2700 |.if FPU
2701 | lfd FARG1, 0(SAVE0)
2336 | bgt ->fff_fallback 2702 | bgt ->fff_fallback
2337 | fadd FARG1, FARG1, TOBIT 2703 | fadd FARG1, FARG1, TOBIT
2338 | stfd FARG1, TMPD 2704 | stfd FARG1, TMPD
2339 | lwz CARG2, TMPD_LO 2705 | lwz CARG2, TMPD_LO
2340 | blr 2706 | blr
2707 |.else
2708 | bgt ->fff_fallback
2709 | mr CARG1, CARG4
2710 | b ->vm_tobit
2711 |.endif
2341 |.endif 2712 |.endif
2342 | 2713 |
2343 |//----------------------------------------------------------------------- 2714 |//-----------------------------------------------------------------------
@@ -2530,10 +2901,21 @@ static void build_subroutines(BuildCtx *ctx)
2530 | decode_RA8 RC, INS // Call base. 2901 | decode_RA8 RC, INS // Call base.
2531 | beq >2 2902 | beq >2
2532 |1: // Move results down. 2903 |1: // Move results down.
2904 |.if FPU
2533 | lfd f0, 0(RA) 2905 | lfd f0, 0(RA)
2906 |.else
2907 | lwz CARG1, 0(RA)
2908 | lwz CARG2, 4(RA)
2909 |.endif
2534 | addic. TMP1, TMP1, -8 2910 | addic. TMP1, TMP1, -8
2535 | addi RA, RA, 8 2911 | addi RA, RA, 8
2912 |.if FPU
2536 | stfdx f0, BASE, RC 2913 | stfdx f0, BASE, RC
2914 |.else
2915 | add CARG3, BASE, RC
2916 | stw CARG1, 0(CARG3)
2917 | stw CARG2, 4(CARG3)
2918 |.endif
2537 | addi RC, RC, 8 2919 | addi RC, RC, 8
2538 | bne <1 2920 | bne <1
2539 |2: 2921 |2:
@@ -2586,10 +2968,12 @@ static void build_subroutines(BuildCtx *ctx)
2586 |//----------------------------------------------------------------------- 2968 |//-----------------------------------------------------------------------
2587 | 2969 |
2588 |.macro savex_, a, b, c, d 2970 |.macro savex_, a, b, c, d
2971 |.if FPU
2589 | stfd f..a, 16+a*8(sp) 2972 | stfd f..a, 16+a*8(sp)
2590 | stfd f..b, 16+b*8(sp) 2973 | stfd f..b, 16+b*8(sp)
2591 | stfd f..c, 16+c*8(sp) 2974 | stfd f..c, 16+c*8(sp)
2592 | stfd f..d, 16+d*8(sp) 2975 | stfd f..d, 16+d*8(sp)
2976 |.endif
2593 |.endmacro 2977 |.endmacro
2594 | 2978 |
2595 |->vm_exit_handler: 2979 |->vm_exit_handler:
@@ -2661,16 +3045,16 @@ static void build_subroutines(BuildCtx *ctx)
2661 | lwz KBASE, PC2PROTO(k)(TMP1) 3045 | lwz KBASE, PC2PROTO(k)(TMP1)
2662 | // Setup type comparison constants. 3046 | // Setup type comparison constants.
2663 | li TISNUM, LJ_TISNUM 3047 | li TISNUM, LJ_TISNUM
2664 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 3048 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2665 | stw TMP3, TMPD 3049 | .FPU stw TMP3, TMPD
2666 | li ZERO, 0 3050 | li ZERO, 0
2667 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 3051 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
2668 | lfs TOBIT, TMPD 3052 | .FPU lfs TOBIT, TMPD
2669 | stw TMP3, TMPD 3053 | .FPU stw TMP3, TMPD
2670 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 3054 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
2671 | li TISNIL, LJ_TNIL 3055 | li TISNIL, LJ_TNIL
2672 | stw TMP0, TONUM_HI 3056 | .FPU stw TMP0, TONUM_HI
2673 | lfs TONUM, TMPD 3057 | .FPU lfs TONUM, TMPD
2674 | // Modified copy of ins_next which handles function header dispatch, too. 3058 | // Modified copy of ins_next which handles function header dispatch, too.
2675 | lwz INS, 0(PC) 3059 | lwz INS, 0(PC)
2676 | addi PC, PC, 4 3060 | addi PC, PC, 4
@@ -2715,7 +3099,35 @@ static void build_subroutines(BuildCtx *ctx)
2715 |//-- Math helper functions ---------------------------------------------- 3099 |//-- Math helper functions ----------------------------------------------
2716 |//----------------------------------------------------------------------- 3100 |//-----------------------------------------------------------------------
2717 | 3101 |
2718 |// NYI: Use internal implementations of floor, ceil, trunc. 3102 |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp.
3103 |
3104 |.macro sfi2d, AHI, ALO
3105 |.if not FPU
3106 | mr. AHI, ALO
3107 | bclr 12, 2 // Handle zero first.
3108 | srawi TMP0, ALO, 31
3109 | xor TMP1, ALO, TMP0
3110 | sub TMP1, TMP1, TMP0 // Absolute value in TMP1.
3111 | cntlzw AHI, TMP1
3112 | andix. TMP0, TMP0, 0x800 // Mask sign bit.
3113 | slw TMP1, TMP1, AHI // Align mantissa left with leading 1.
3114 | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI.
3115 | slwi ALO, TMP1, 21
3116 | or AHI, AHI, TMP0 // Sign | Exponent.
3117 | srwi TMP1, TMP1, 11
3118 | slwi AHI, AHI, 20 // Align left.
3119 | add AHI, AHI, TMP1 // Add mantissa, increment exponent.
3120 | blr
3121 |.endif
3122 |.endmacro
3123 |
3124 |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1.
3125 |->vm_sfi2d_1:
3126 | sfi2d CARG1, CARG2
3127 |
3128 |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1.
3129 |->vm_sfi2d_2:
3130 | sfi2d CARG3, CARG4
2719 | 3131 |
2720 |->vm_modi: 3132 |->vm_modi:
2721 | divwo. TMP0, CARG1, CARG2 3133 | divwo. TMP0, CARG1, CARG2
@@ -2783,21 +3195,21 @@ static void build_subroutines(BuildCtx *ctx)
2783 | addi DISPATCH, r12, GG_G2DISP 3195 | addi DISPATCH, r12, GG_G2DISP
2784 | stw r11, CTSTATE->cb.slot 3196 | stw r11, CTSTATE->cb.slot
2785 | stw r3, CTSTATE->cb.gpr[0] 3197 | stw r3, CTSTATE->cb.gpr[0]
2786 | stfd f1, CTSTATE->cb.fpr[0] 3198 | .FPU stfd f1, CTSTATE->cb.fpr[0]
2787 | stw r4, CTSTATE->cb.gpr[1] 3199 | stw r4, CTSTATE->cb.gpr[1]
2788 | stfd f2, CTSTATE->cb.fpr[1] 3200 | .FPU stfd f2, CTSTATE->cb.fpr[1]
2789 | stw r5, CTSTATE->cb.gpr[2] 3201 | stw r5, CTSTATE->cb.gpr[2]
2790 | stfd f3, CTSTATE->cb.fpr[2] 3202 | .FPU stfd f3, CTSTATE->cb.fpr[2]
2791 | stw r6, CTSTATE->cb.gpr[3] 3203 | stw r6, CTSTATE->cb.gpr[3]
2792 | stfd f4, CTSTATE->cb.fpr[3] 3204 | .FPU stfd f4, CTSTATE->cb.fpr[3]
2793 | stw r7, CTSTATE->cb.gpr[4] 3205 | stw r7, CTSTATE->cb.gpr[4]
2794 | stfd f5, CTSTATE->cb.fpr[4] 3206 | .FPU stfd f5, CTSTATE->cb.fpr[4]
2795 | stw r8, CTSTATE->cb.gpr[5] 3207 | stw r8, CTSTATE->cb.gpr[5]
2796 | stfd f6, CTSTATE->cb.fpr[5] 3208 | .FPU stfd f6, CTSTATE->cb.fpr[5]
2797 | stw r9, CTSTATE->cb.gpr[6] 3209 | stw r9, CTSTATE->cb.gpr[6]
2798 | stfd f7, CTSTATE->cb.fpr[6] 3210 | .FPU stfd f7, CTSTATE->cb.fpr[6]
2799 | stw r10, CTSTATE->cb.gpr[7] 3211 | stw r10, CTSTATE->cb.gpr[7]
2800 | stfd f8, CTSTATE->cb.fpr[7] 3212 | .FPU stfd f8, CTSTATE->cb.fpr[7]
2801 | addi TMP0, sp, CFRAME_SPACE+8 3213 | addi TMP0, sp, CFRAME_SPACE+8
2802 | stw TMP0, CTSTATE->cb.stack 3214 | stw TMP0, CTSTATE->cb.stack
2803 | mr CARG1, CTSTATE 3215 | mr CARG1, CTSTATE
@@ -2808,21 +3220,21 @@ static void build_subroutines(BuildCtx *ctx)
2808 | lp BASE, L:CRET1->base 3220 | lp BASE, L:CRET1->base
2809 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 3221 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2810 | lp RC, L:CRET1->top 3222 | lp RC, L:CRET1->top
2811 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 3223 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2812 | li ZERO, 0 3224 | li ZERO, 0
2813 | mr L, CRET1 3225 | mr L, CRET1
2814 | stw TMP3, TMPD 3226 | .FPU stw TMP3, TMPD
2815 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 3227 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
2816 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3228 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2817 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 3229 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
2818 | stw TMP0, TONUM_HI 3230 | .FPU stw TMP0, TONUM_HI
2819 | li TISNIL, LJ_TNIL 3231 | li TISNIL, LJ_TNIL
2820 | li_vmstate INTERP 3232 | li_vmstate INTERP
2821 | lfs TOBIT, TMPD 3233 | .FPU lfs TOBIT, TMPD
2822 | stw TMP3, TMPD 3234 | .FPU stw TMP3, TMPD
2823 | sub RC, RC, BASE 3235 | sub RC, RC, BASE
2824 | st_vmstate 3236 | st_vmstate
2825 | lfs TONUM, TMPD 3237 | .FPU lfs TONUM, TMPD
2826 | ins_callt 3238 | ins_callt
2827 |.endif 3239 |.endif
2828 | 3240 |
@@ -2836,7 +3248,7 @@ static void build_subroutines(BuildCtx *ctx)
2836 | mr CARG2, RA 3248 | mr CARG2, RA
2837 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) 3249 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
2838 | lwz CRET1, CTSTATE->cb.gpr[0] 3250 | lwz CRET1, CTSTATE->cb.gpr[0]
2839 | lfd FARG1, CTSTATE->cb.fpr[0] 3251 | .FPU lfd FARG1, CTSTATE->cb.fpr[0]
2840 | lwz CRET2, CTSTATE->cb.gpr[1] 3252 | lwz CRET2, CTSTATE->cb.gpr[1]
2841 | b ->vm_leave_unw 3253 | b ->vm_leave_unw
2842 |.endif 3254 |.endif
@@ -2870,14 +3282,14 @@ static void build_subroutines(BuildCtx *ctx)
2870 | bge <1 3282 | bge <1
2871 |2: 3283 |2:
2872 | bney cr1, >3 3284 | bney cr1, >3
2873 | lfd f1, CCSTATE->fpr[0] 3285 | .FPU lfd f1, CCSTATE->fpr[0]
2874 | lfd f2, CCSTATE->fpr[1] 3286 | .FPU lfd f2, CCSTATE->fpr[1]
2875 | lfd f3, CCSTATE->fpr[2] 3287 | .FPU lfd f3, CCSTATE->fpr[2]
2876 | lfd f4, CCSTATE->fpr[3] 3288 | .FPU lfd f4, CCSTATE->fpr[3]
2877 | lfd f5, CCSTATE->fpr[4] 3289 | .FPU lfd f5, CCSTATE->fpr[4]
2878 | lfd f6, CCSTATE->fpr[5] 3290 | .FPU lfd f6, CCSTATE->fpr[5]
2879 | lfd f7, CCSTATE->fpr[6] 3291 | .FPU lfd f7, CCSTATE->fpr[6]
2880 | lfd f8, CCSTATE->fpr[7] 3292 | .FPU lfd f8, CCSTATE->fpr[7]
2881 |3: 3293 |3:
2882 | lp TMP0, CCSTATE->func 3294 | lp TMP0, CCSTATE->func
2883 | lwz CARG2, CCSTATE->gpr[1] 3295 | lwz CARG2, CCSTATE->gpr[1]
@@ -2894,7 +3306,7 @@ static void build_subroutines(BuildCtx *ctx)
2894 | lwz TMP2, -4(r14) 3306 | lwz TMP2, -4(r14)
2895 | lwz TMP0, 4(r14) 3307 | lwz TMP0, 4(r14)
2896 | stw CARG1, CCSTATE:TMP1->gpr[0] 3308 | stw CARG1, CCSTATE:TMP1->gpr[0]
2897 | stfd FARG1, CCSTATE:TMP1->fpr[0] 3309 | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0]
2898 | stw CARG2, CCSTATE:TMP1->gpr[1] 3310 | stw CARG2, CCSTATE:TMP1->gpr[1]
2899 | mtlr TMP0 3311 | mtlr TMP0
2900 | stw CARG3, CCSTATE:TMP1->gpr[2] 3312 | stw CARG3, CCSTATE:TMP1->gpr[2]
@@ -2923,19 +3335,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2923 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 3335 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2924 | // RA = src1*8, RD = src2*8, JMP with RD = target 3336 | // RA = src1*8, RD = src2*8, JMP with RD = target
2925 |.if DUALNUM 3337 |.if DUALNUM
2926 | lwzux TMP0, RA, BASE 3338 | lwzux CARG1, RA, BASE
2927 | addi PC, PC, 4 3339 | addi PC, PC, 4
2928 | lwz CARG2, 4(RA) 3340 | lwz CARG2, 4(RA)
2929 | lwzux TMP1, RD, BASE 3341 | lwzux CARG3, RD, BASE
2930 | lwz TMP2, -4(PC) 3342 | lwz TMP2, -4(PC)
2931 | checknum cr0, TMP0 3343 | checknum cr0, CARG1
2932 | lwz CARG3, 4(RD) 3344 | lwz CARG4, 4(RD)
2933 | decode_RD4 TMP2, TMP2 3345 | decode_RD4 TMP2, TMP2
2934 | checknum cr1, TMP1 3346 | checknum cr1, CARG3
2935 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3347 | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16)
2936 | bne cr0, >7 3348 | bne cr0, >7
2937 | bne cr1, >8 3349 | bne cr1, >8
2938 | cmpw CARG2, CARG3 3350 | cmpw CARG2, CARG4
2939 if (op == BC_ISLT) { 3351 if (op == BC_ISLT) {
2940 | bge >2 3352 | bge >2
2941 } else if (op == BC_ISGE) { 3353 } else if (op == BC_ISGE) {
@@ -2946,28 +3358,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2946 | ble >2 3358 | ble >2
2947 } 3359 }
2948 |1: 3360 |1:
2949 | add PC, PC, TMP2 3361 | add PC, PC, SAVE0
2950 |2: 3362 |2:
2951 | ins_next 3363 | ins_next
2952 | 3364 |
2953 |7: // RA is not an integer. 3365 |7: // RA is not an integer.
2954 | bgt cr0, ->vmeta_comp 3366 | bgt cr0, ->vmeta_comp
2955 | // RA is a number. 3367 | // RA is a number.
2956 | lfd f0, 0(RA) 3368 | .FPU lfd f0, 0(RA)
2957 | bgt cr1, ->vmeta_comp 3369 | bgt cr1, ->vmeta_comp
2958 | blt cr1, >4 3370 | blt cr1, >4
2959 | // RA is a number, RD is an integer. 3371 | // RA is a number, RD is an integer.
2960 | tonum_i f1, CARG3 3372 |.if FPU
3373 | tonum_i f1, CARG4
3374 |.else
3375 | bl ->vm_sfi2d_2
3376 |.endif
2961 | b >5 3377 | b >5
2962 | 3378 |
2963 |8: // RA is an integer, RD is not an integer. 3379 |8: // RA is an integer, RD is not an integer.
2964 | bgt cr1, ->vmeta_comp 3380 | bgt cr1, ->vmeta_comp
2965 | // RA is an integer, RD is a number. 3381 | // RA is an integer, RD is a number.
3382 |.if FPU
2966 | tonum_i f0, CARG2 3383 | tonum_i f0, CARG2
3384 |.else
3385 | bl ->vm_sfi2d_1
3386 |.endif
2967 |4: 3387 |4:
2968 | lfd f1, 0(RD) 3388 | .FPU lfd f1, 0(RD)
2969 |5: 3389 |5:
3390 |.if FPU
2970 | fcmpu cr0, f0, f1 3391 | fcmpu cr0, f0, f1
3392 |.else
3393 | blex __ledf2
3394 | cmpwi CRET1, 0
3395 |.endif
2971 if (op == BC_ISLT) { 3396 if (op == BC_ISLT) {
2972 | bge <2 3397 | bge <2
2973 } else if (op == BC_ISGE) { 3398 } else if (op == BC_ISGE) {
@@ -3015,42 +3440,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3015 vk = op == BC_ISEQV; 3440 vk = op == BC_ISEQV;
3016 | // RA = src1*8, RD = src2*8, JMP with RD = target 3441 | // RA = src1*8, RD = src2*8, JMP with RD = target
3017 |.if DUALNUM 3442 |.if DUALNUM
3018 | lwzux TMP0, RA, BASE 3443 | lwzux CARG1, RA, BASE
3019 | addi PC, PC, 4 3444 | addi PC, PC, 4
3020 | lwz CARG2, 4(RA) 3445 | lwz CARG2, 4(RA)
3021 | lwzux TMP1, RD, BASE 3446 | lwzux CARG3, RD, BASE
3022 | checknum cr0, TMP0 3447 | checknum cr0, CARG1
3023 | lwz TMP2, -4(PC) 3448 | lwz SAVE0, -4(PC)
3024 | checknum cr1, TMP1 3449 | checknum cr1, CARG3
3025 | decode_RD4 TMP2, TMP2 3450 | decode_RD4 SAVE0, SAVE0
3026 | lwz CARG3, 4(RD) 3451 | lwz CARG4, 4(RD)
3027 | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt 3452 | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
3028 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3453 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3029 if (vk) { 3454 if (vk) {
3030 | ble cr7, ->BC_ISEQN_Z 3455 | ble cr7, ->BC_ISEQN_Z
3031 } else { 3456 } else {
3032 | ble cr7, ->BC_ISNEN_Z 3457 | ble cr7, ->BC_ISNEN_Z
3033 } 3458 }
3034 |.else 3459 |.else
3035 | lwzux TMP0, RA, BASE 3460 | lwzux CARG1, RA, BASE
3036 | lwz TMP2, 0(PC) 3461 | lwz SAVE0, 0(PC)
3037 | lfd f0, 0(RA) 3462 | lfd f0, 0(RA)
3038 | addi PC, PC, 4 3463 | addi PC, PC, 4
3039 | lwzux TMP1, RD, BASE 3464 | lwzux CARG3, RD, BASE
3040 | checknum cr0, TMP0 3465 | checknum cr0, CARG1
3041 | decode_RD4 TMP2, TMP2 3466 | decode_RD4 SAVE0, SAVE0
3042 | lfd f1, 0(RD) 3467 | lfd f1, 0(RD)
3043 | checknum cr1, TMP1 3468 | checknum cr1, CARG3
3044 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3469 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3045 | bge cr0, >5 3470 | bge cr0, >5
3046 | bge cr1, >5 3471 | bge cr1, >5
3047 | fcmpu cr0, f0, f1 3472 | fcmpu cr0, f0, f1
3048 if (vk) { 3473 if (vk) {
3049 | bne >1 3474 | bne >1
3050 | add PC, PC, TMP2 3475 | add PC, PC, SAVE0
3051 } else { 3476 } else {
3052 | beq >1 3477 | beq >1
3053 | add PC, PC, TMP2 3478 | add PC, PC, SAVE0
3054 } 3479 }
3055 |1: 3480 |1:
3056 | ins_next 3481 | ins_next
@@ -3058,36 +3483,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3058 |5: // Either or both types are not numbers. 3483 |5: // Either or both types are not numbers.
3059 |.if not DUALNUM 3484 |.if not DUALNUM
3060 | lwz CARG2, 4(RA) 3485 | lwz CARG2, 4(RA)
3061 | lwz CARG3, 4(RD) 3486 | lwz CARG4, 4(RD)
3062 |.endif 3487 |.endif
3063 |.if FFI 3488 |.if FFI
3064 | cmpwi cr7, TMP0, LJ_TCDATA 3489 | cmpwi cr7, CARG1, LJ_TCDATA
3065 | cmpwi cr5, TMP1, LJ_TCDATA 3490 | cmpwi cr5, CARG3, LJ_TCDATA
3066 |.endif 3491 |.endif
3067 | not TMP3, TMP0 3492 | not TMP2, CARG1
3068 | cmplw TMP0, TMP1 3493 | cmplw CARG1, CARG3
3069 | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? 3494 | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive?
3070 |.if FFI 3495 |.if FFI
3071 | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq 3496 | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq
3072 |.endif 3497 |.endif
3073 | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? 3498 | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata?
3074 |.if FFI 3499 |.if FFI
3075 | beq cr7, ->vmeta_equal_cd 3500 | beq cr7, ->vmeta_equal_cd
3076 |.endif 3501 |.endif
3077 | cmplw cr5, CARG2, CARG3 3502 | cmplw cr5, CARG2, CARG4
3078 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. 3503 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
3079 | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. 3504 | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
3080 | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. 3505 | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
3081 | mr SAVE0, PC 3506 | mr SAVE1, PC
3082 | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. 3507 | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
3083 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. 3508 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
3084 if (vk) { 3509 if (vk) {
3085 | bne cr0, >6 3510 | bne cr0, >6
3086 | add PC, PC, TMP2 3511 | add PC, PC, SAVE0
3087 |6: 3512 |6:
3088 } else { 3513 } else {
3089 | beq cr0, >6 3514 | beq cr0, >6
3090 | add PC, PC, TMP2 3515 | add PC, PC, SAVE0
3091 |6: 3516 |6:
3092 } 3517 }
3093 |.if DUALNUM 3518 |.if DUALNUM
@@ -3102,6 +3527,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3102 | 3527 |
3103 | // Different tables or userdatas. Need to check __eq metamethod. 3528 | // Different tables or userdatas. Need to check __eq metamethod.
3104 | // Field metatable must be at same offset for GCtab and GCudata! 3529 | // Field metatable must be at same offset for GCtab and GCudata!
3530 | mr CARG3, CARG4
3105 | lwz TAB:TMP2, TAB:CARG2->metatable 3531 | lwz TAB:TMP2, TAB:CARG2->metatable
3106 | li CARG4, 1-vk // ne = 0 or 1. 3532 | li CARG4, 1-vk // ne = 0 or 1.
3107 | cmplwi TAB:TMP2, 0 3533 | cmplwi TAB:TMP2, 0
@@ -3109,7 +3535,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3109 | lbz TMP2, TAB:TMP2->nomm 3535 | lbz TMP2, TAB:TMP2->nomm
3110 | andix. TMP2, TMP2, 1<<MM_eq 3536 | andix. TMP2, TMP2, 1<<MM_eq
3111 | bne <1 // Or 'no __eq' flag set? 3537 | bne <1 // Or 'no __eq' flag set?
3112 | mr PC, SAVE0 // Restore old PC. 3538 | mr PC, SAVE1 // Restore old PC.
3113 | b ->vmeta_equal // Handle __eq metamethod. 3539 | b ->vmeta_equal // Handle __eq metamethod.
3114 break; 3540 break;
3115 3541
@@ -3150,16 +3576,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3150 vk = op == BC_ISEQN; 3576 vk = op == BC_ISEQN;
3151 | // RA = src*8, RD = num_const*8, JMP with RD = target 3577 | // RA = src*8, RD = num_const*8, JMP with RD = target
3152 |.if DUALNUM 3578 |.if DUALNUM
3153 | lwzux TMP0, RA, BASE 3579 | lwzux CARG1, RA, BASE
3154 | addi PC, PC, 4 3580 | addi PC, PC, 4
3155 | lwz CARG2, 4(RA) 3581 | lwz CARG2, 4(RA)
3156 | lwzux TMP1, RD, KBASE 3582 | lwzux CARG3, RD, KBASE
3157 | checknum cr0, TMP0 3583 | checknum cr0, CARG1
3158 | lwz TMP2, -4(PC) 3584 | lwz SAVE0, -4(PC)
3159 | checknum cr1, TMP1 3585 | checknum cr1, CARG3
3160 | decode_RD4 TMP2, TMP2 3586 | decode_RD4 SAVE0, SAVE0
3161 | lwz CARG3, 4(RD) 3587 | lwz CARG4, 4(RD)
3162 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3588 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3163 if (vk) { 3589 if (vk) {
3164 |->BC_ISEQN_Z: 3590 |->BC_ISEQN_Z:
3165 } else { 3591 } else {
@@ -3167,7 +3593,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3167 } 3593 }
3168 | bne cr0, >7 3594 | bne cr0, >7
3169 | bne cr1, >8 3595 | bne cr1, >8
3170 | cmpw CARG2, CARG3 3596 | cmpw CARG2, CARG4
3171 |4: 3597 |4:
3172 |.else 3598 |.else
3173 if (vk) { 3599 if (vk) {
@@ -3175,20 +3601,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3175 } else { 3601 } else {
3176 |->BC_ISNEN_Z: // Dummy label. 3602 |->BC_ISNEN_Z: // Dummy label.
3177 } 3603 }
3178 | lwzx TMP0, BASE, RA 3604 | lwzx CARG1, BASE, RA
3179 | addi PC, PC, 4 3605 | addi PC, PC, 4
3180 | lfdx f0, BASE, RA 3606 | lfdx f0, BASE, RA
3181 | lwz TMP2, -4(PC) 3607 | lwz SAVE0, -4(PC)
3182 | lfdx f1, KBASE, RD 3608 | lfdx f1, KBASE, RD
3183 | decode_RD4 TMP2, TMP2 3609 | decode_RD4 SAVE0, SAVE0
3184 | checknum TMP0 3610 | checknum CARG1
3185 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3611 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3186 | bge >3 3612 | bge >3
3187 | fcmpu cr0, f0, f1 3613 | fcmpu cr0, f0, f1
3188 |.endif 3614 |.endif
3189 if (vk) { 3615 if (vk) {
3190 | bne >1 3616 | bne >1
3191 | add PC, PC, TMP2 3617 | add PC, PC, SAVE0
3192 |1: 3618 |1:
3193 |.if not FFI 3619 |.if not FFI
3194 |3: 3620 |3:
@@ -3199,13 +3625,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3199 |.if not FFI 3625 |.if not FFI
3200 |3: 3626 |3:
3201 |.endif 3627 |.endif
3202 | add PC, PC, TMP2 3628 | add PC, PC, SAVE0
3203 |2: 3629 |2:
3204 } 3630 }
3205 | ins_next 3631 | ins_next
3206 |.if FFI 3632 |.if FFI
3207 |3: 3633 |3:
3208 | cmpwi TMP0, LJ_TCDATA 3634 | cmpwi CARG1, LJ_TCDATA
3209 | beq ->vmeta_equal_cd 3635 | beq ->vmeta_equal_cd
3210 | b <1 3636 | b <1
3211 |.endif 3637 |.endif
@@ -3213,18 +3639,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3213 |7: // RA is not an integer. 3639 |7: // RA is not an integer.
3214 | bge cr0, <3 3640 | bge cr0, <3
3215 | // RA is a number. 3641 | // RA is a number.
3216 | lfd f0, 0(RA) 3642 | .FPU lfd f0, 0(RA)
3217 | blt cr1, >1 3643 | blt cr1, >1
3218 | // RA is a number, RD is an integer. 3644 | // RA is a number, RD is an integer.
3219 | tonum_i f1, CARG3 3645 |.if FPU
3646 | tonum_i f1, CARG4
3647 |.else
3648 | bl ->vm_sfi2d_2
3649 |.endif
3220 | b >2 3650 | b >2
3221 | 3651 |
3222 |8: // RA is an integer, RD is a number. 3652 |8: // RA is an integer, RD is a number.
3653 |.if FPU
3223 | tonum_i f0, CARG2 3654 | tonum_i f0, CARG2
3655 |.else
3656 | bl ->vm_sfi2d_1
3657 |.endif
3224 |1: 3658 |1:
3225 | lfd f1, 0(RD) 3659 | .FPU lfd f1, 0(RD)
3226 |2: 3660 |2:
3661 |.if FPU
3227 | fcmpu cr0, f0, f1 3662 | fcmpu cr0, f0, f1
3663 |.else
3664 | blex __ledf2
3665 | cmpwi CRET1, 0
3666 |.endif
3228 | b <4 3667 | b <4
3229 |.endif 3668 |.endif
3230 break; 3669 break;
@@ -3279,7 +3718,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3279 | add PC, PC, TMP2 3718 | add PC, PC, TMP2
3280 } else { 3719 } else {
3281 | li TMP1, LJ_TFALSE 3720 | li TMP1, LJ_TFALSE
3721 |.if FPU
3282 | lfdx f0, BASE, RD 3722 | lfdx f0, BASE, RD
3723 |.else
3724 | lwzux CARG1, RD, BASE
3725 | lwz CARG2, 4(RD)
3726 |.endif
3283 | cmplw TMP0, TMP1 3727 | cmplw TMP0, TMP1
3284 if (op == BC_ISTC) { 3728 if (op == BC_ISTC) {
3285 | bge >1 3729 | bge >1
@@ -3288,7 +3732,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3288 } 3732 }
3289 | addis PC, PC, -(BCBIAS_J*4 >> 16) 3733 | addis PC, PC, -(BCBIAS_J*4 >> 16)
3290 | decode_RD4 TMP2, INS 3734 | decode_RD4 TMP2, INS
3735 |.if FPU
3291 | stfdx f0, BASE, RA 3736 | stfdx f0, BASE, RA
3737 |.else
3738 | stwux CARG1, RA, BASE
3739 | stw CARG2, 4(RA)
3740 |.endif
3292 | add PC, PC, TMP2 3741 | add PC, PC, TMP2
3293 |1: 3742 |1:
3294 } 3743 }
@@ -3323,8 +3772,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3323 case BC_MOV: 3772 case BC_MOV:
3324 | // RA = dst*8, RD = src*8 3773 | // RA = dst*8, RD = src*8
3325 | ins_next1 3774 | ins_next1
3775 |.if FPU
3326 | lfdx f0, BASE, RD 3776 | lfdx f0, BASE, RD
3327 | stfdx f0, BASE, RA 3777 | stfdx f0, BASE, RA
3778 |.else
3779 | lwzux TMP0, RD, BASE
3780 | lwz TMP1, 4(RD)
3781 | stwux TMP0, RA, BASE
3782 | stw TMP1, 4(RA)
3783 |.endif
3328 | ins_next2 3784 | ins_next2
3329 break; 3785 break;
3330 case BC_NOT: 3786 case BC_NOT:
@@ -3426,44 +3882,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3426 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3882 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3427 ||switch (vk) { 3883 ||switch (vk) {
3428 ||case 0: 3884 ||case 0:
3429 | lwzx TMP1, BASE, RB 3885 | lwzx CARG1, BASE, RB
3430 | .if DUALNUM 3886 | .if DUALNUM
3431 | lwzx TMP2, KBASE, RC 3887 | lwzx CARG3, KBASE, RC
3432 | .endif 3888 | .endif
3889 | .if FPU
3433 | lfdx f14, BASE, RB 3890 | lfdx f14, BASE, RB
3434 | lfdx f15, KBASE, RC 3891 | lfdx f15, KBASE, RC
3892 | .else
3893 | add TMP1, BASE, RB
3894 | add TMP2, KBASE, RC
3895 | lwz CARG2, 4(TMP1)
3896 | lwz CARG4, 4(TMP2)
3897 | .endif
3435 | .if DUALNUM 3898 | .if DUALNUM
3436 | checknum cr0, TMP1 3899 | checknum cr0, CARG1
3437 | checknum cr1, TMP2 3900 | checknum cr1, CARG3
3438 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3901 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3439 | bge ->vmeta_arith_vn 3902 | bge ->vmeta_arith_vn
3440 | .else 3903 | .else
3441 | checknum TMP1; bge ->vmeta_arith_vn 3904 | checknum CARG1; bge ->vmeta_arith_vn
3442 | .endif 3905 | .endif
3443 || break; 3906 || break;
3444 ||case 1: 3907 ||case 1:
3445 | lwzx TMP1, BASE, RB 3908 | lwzx CARG1, BASE, RB
3446 | .if DUALNUM 3909 | .if DUALNUM
3447 | lwzx TMP2, KBASE, RC 3910 | lwzx CARG3, KBASE, RC
3448 | .endif 3911 | .endif
3912 | .if FPU
3449 | lfdx f15, BASE, RB 3913 | lfdx f15, BASE, RB
3450 | lfdx f14, KBASE, RC 3914 | lfdx f14, KBASE, RC
3915 | .else
3916 | add TMP1, BASE, RB
3917 | add TMP2, KBASE, RC
3918 | lwz CARG2, 4(TMP1)
3919 | lwz CARG4, 4(TMP2)
3920 | .endif
3451 | .if DUALNUM 3921 | .if DUALNUM
3452 | checknum cr0, TMP1 3922 | checknum cr0, CARG1
3453 | checknum cr1, TMP2 3923 | checknum cr1, CARG3
3454 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3924 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3455 | bge ->vmeta_arith_nv 3925 | bge ->vmeta_arith_nv
3456 | .else 3926 | .else
3457 | checknum TMP1; bge ->vmeta_arith_nv 3927 | checknum CARG1; bge ->vmeta_arith_nv
3458 | .endif 3928 | .endif
3459 || break; 3929 || break;
3460 ||default: 3930 ||default:
3461 | lwzx TMP1, BASE, RB 3931 | lwzx CARG1, BASE, RB
3462 | lwzx TMP2, BASE, RC 3932 | lwzx CARG3, BASE, RC
3933 | .if FPU
3463 | lfdx f14, BASE, RB 3934 | lfdx f14, BASE, RB
3464 | lfdx f15, BASE, RC 3935 | lfdx f15, BASE, RC
3465 | checknum cr0, TMP1 3936 | .else
3466 | checknum cr1, TMP2 3937 | add TMP1, BASE, RB
3938 | add TMP2, BASE, RC
3939 | lwz CARG2, 4(TMP1)
3940 | lwz CARG4, 4(TMP2)
3941 | .endif
3942 | checknum cr0, CARG1
3943 | checknum cr1, CARG3
3467 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3944 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3468 | bge ->vmeta_arith_vv 3945 | bge ->vmeta_arith_vv
3469 || break; 3946 || break;
@@ -3497,48 +3974,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3497 | fsub a, b, a // b - floor(b/c)*c 3974 | fsub a, b, a // b - floor(b/c)*c
3498 |.endmacro 3975 |.endmacro
3499 | 3976 |
3977 |.macro sfpmod
3978 |->BC_MODVN_Z:
3979 | stw CARG1, SFSAVE_1
3980 | stw CARG2, SFSAVE_2
3981 | mr SAVE0, CARG3
3982 | mr SAVE1, CARG4
3983 | blex __divdf3
3984 | blex floor
3985 | mr CARG3, SAVE0
3986 | mr CARG4, SAVE1
3987 | blex __muldf3
3988 | mr CARG3, CRET1
3989 | mr CARG4, CRET2
3990 | lwz CARG1, SFSAVE_1
3991 | lwz CARG2, SFSAVE_2
3992 | blex __subdf3
3993 |.endmacro
3994 |
3500 |.macro ins_arithfp, fpins 3995 |.macro ins_arithfp, fpins
3501 | ins_arithpre 3996 | ins_arithpre
3502 |.if "fpins" == "fpmod_" 3997 |.if "fpins" == "fpmod_"
3503 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3998 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3504 |.else 3999 |.elif FPU
3505 | fpins f0, f14, f15 4000 | fpins f0, f14, f15
3506 | ins_next1 4001 | ins_next1
3507 | stfdx f0, BASE, RA 4002 | stfdx f0, BASE, RA
3508 | ins_next2 4003 | ins_next2
4004 |.else
4005 | blex __divdf3 // Only soft-float div uses this macro.
4006 | ins_next1
4007 | stwux CRET1, RA, BASE
4008 | stw CRET2, 4(RA)
4009 | ins_next2
3509 |.endif 4010 |.endif
3510 |.endmacro 4011 |.endmacro
3511 | 4012 |
3512 |.macro ins_arithdn, intins, fpins 4013 |.macro ins_arithdn, intins, fpins, fpcall
3513 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 4014 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
3514 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 4015 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3515 ||switch (vk) { 4016 ||switch (vk) {
3516 ||case 0: 4017 ||case 0:
3517 | lwzux TMP1, RB, BASE 4018 | lwzux CARG1, RB, BASE
3518 | lwzux TMP2, RC, KBASE 4019 | lwzux CARG3, RC, KBASE
3519 | lwz CARG1, 4(RB) 4020 | lwz CARG2, 4(RB)
3520 | checknum cr0, TMP1 4021 | checknum cr0, CARG1
3521 | lwz CARG2, 4(RC) 4022 | lwz CARG4, 4(RC)
4023 | checknum cr1, CARG3
3522 || break; 4024 || break;
3523 ||case 1: 4025 ||case 1:
3524 | lwzux TMP1, RB, BASE 4026 | lwzux CARG3, RB, BASE
3525 | lwzux TMP2, RC, KBASE 4027 | lwzux CARG1, RC, KBASE
3526 | lwz CARG2, 4(RB) 4028 | lwz CARG4, 4(RB)
3527 | checknum cr0, TMP1 4029 | checknum cr0, CARG3
3528 | lwz CARG1, 4(RC) 4030 | lwz CARG2, 4(RC)
4031 | checknum cr1, CARG1
3529 || break; 4032 || break;
3530 ||default: 4033 ||default:
3531 | lwzux TMP1, RB, BASE 4034 | lwzux CARG1, RB, BASE
3532 | lwzux TMP2, RC, BASE 4035 | lwzux CARG3, RC, BASE
3533 | lwz CARG1, 4(RB) 4036 | lwz CARG2, 4(RB)
3534 | checknum cr0, TMP1 4037 | checknum cr0, CARG1
3535 | lwz CARG2, 4(RC) 4038 | lwz CARG4, 4(RC)
4039 | checknum cr1, CARG3
3536 || break; 4040 || break;
3537 ||} 4041 ||}
3538 | checknum cr1, TMP2
3539 | bne >5 4042 | bne >5
3540 | bne cr1, >5 4043 | bne cr1, >5
3541 | intins CARG1, CARG1, CARG2 4044 |.if "intins" == "intmod"
4045 | mr CARG1, CARG2
4046 | mr CARG2, CARG4
4047 |.endif
4048 | intins CARG1, CARG2, CARG4
3542 | bso >4 4049 | bso >4
3543 |1: 4050 |1:
3544 | ins_next1 4051 | ins_next1
@@ -3550,29 +4057,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3550 | checkov TMP0, <1 // Ignore unrelated overflow. 4057 | checkov TMP0, <1 // Ignore unrelated overflow.
3551 | ins_arithfallback b 4058 | ins_arithfallback b
3552 |5: // FP variant. 4059 |5: // FP variant.
4060 |.if FPU
3553 ||if (vk == 1) { 4061 ||if (vk == 1) {
3554 | lfd f15, 0(RB) 4062 | lfd f15, 0(RB)
3555 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3556 | lfd f14, 0(RC) 4063 | lfd f14, 0(RC)
3557 ||} else { 4064 ||} else {
3558 | lfd f14, 0(RB) 4065 | lfd f14, 0(RB)
3559 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3560 | lfd f15, 0(RC) 4066 | lfd f15, 0(RC)
3561 ||} 4067 ||}
4068 |.endif
4069 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3562 | ins_arithfallback bge 4070 | ins_arithfallback bge
3563 |.if "fpins" == "fpmod_" 4071 |.if "fpins" == "fpmod_"
3564 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 4072 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3565 |.else 4073 |.else
4074 |.if FPU
3566 | fpins f0, f14, f15 4075 | fpins f0, f14, f15
3567 | ins_next1
3568 | stfdx f0, BASE, RA 4076 | stfdx f0, BASE, RA
4077 |.else
4078 |.if "fpcall" == "sfpmod"
4079 | sfpmod
4080 |.else
4081 | blex fpcall
4082 |.endif
4083 | stwux CRET1, RA, BASE
4084 | stw CRET2, 4(RA)
4085 |.endif
4086 | ins_next1
3569 | b <2 4087 | b <2
3570 |.endif 4088 |.endif
3571 |.endmacro 4089 |.endmacro
3572 | 4090 |
3573 |.macro ins_arith, intins, fpins 4091 |.macro ins_arith, intins, fpins, fpcall
3574 |.if DUALNUM 4092 |.if DUALNUM
3575 | ins_arithdn intins, fpins 4093 | ins_arithdn intins, fpins, fpcall
3576 |.else 4094 |.else
3577 | ins_arithfp fpins 4095 | ins_arithfp fpins
3578 |.endif 4096 |.endif
@@ -3587,9 +4105,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3587 | addo. TMP0, TMP0, TMP3 4105 | addo. TMP0, TMP0, TMP3
3588 | add y, a, b 4106 | add y, a, b
3589 |.endmacro 4107 |.endmacro
3590 | ins_arith addo32., fadd 4108 | ins_arith addo32., fadd, __adddf3
3591 |.else 4109 |.else
3592 | ins_arith addo., fadd 4110 | ins_arith addo., fadd, __adddf3
3593 |.endif 4111 |.endif
3594 break; 4112 break;
3595 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 4113 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
@@ -3601,36 +4119,48 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3601 | subo. TMP0, TMP0, TMP3 4119 | subo. TMP0, TMP0, TMP3
3602 | sub y, a, b 4120 | sub y, a, b
3603 |.endmacro 4121 |.endmacro
3604 | ins_arith subo32., fsub 4122 | ins_arith subo32., fsub, __subdf3
3605 |.else 4123 |.else
3606 | ins_arith subo., fsub 4124 | ins_arith subo., fsub, __subdf3
3607 |.endif 4125 |.endif
3608 break; 4126 break;
3609 case BC_MULVN: case BC_MULNV: case BC_MULVV: 4127 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3610 | ins_arith mullwo., fmul 4128 | ins_arith mullwo., fmul, __muldf3
3611 break; 4129 break;
3612 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 4130 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3613 | ins_arithfp fdiv 4131 | ins_arithfp fdiv
3614 break; 4132 break;
3615 case BC_MODVN: 4133 case BC_MODVN:
3616 | ins_arith intmod, fpmod 4134 | ins_arith intmod, fpmod, sfpmod
3617 break; 4135 break;
3618 case BC_MODNV: case BC_MODVV: 4136 case BC_MODNV: case BC_MODVV:
3619 | ins_arith intmod, fpmod_ 4137 | ins_arith intmod, fpmod_, sfpmod
3620 break; 4138 break;
3621 case BC_POW: 4139 case BC_POW:
3622 | // NYI: (partial) integer arithmetic. 4140 | // NYI: (partial) integer arithmetic.
3623 | lwzx TMP1, BASE, RB 4141 | lwzx CARG1, BASE, RB
4142 | lwzx CARG3, BASE, RC
4143 |.if FPU
3624 | lfdx FARG1, BASE, RB 4144 | lfdx FARG1, BASE, RB
3625 | lwzx TMP2, BASE, RC
3626 | lfdx FARG2, BASE, RC 4145 | lfdx FARG2, BASE, RC
3627 | checknum cr0, TMP1 4146 |.else
3628 | checknum cr1, TMP2 4147 | add TMP1, BASE, RB
4148 | add TMP2, BASE, RC
4149 | lwz CARG2, 4(TMP1)
4150 | lwz CARG4, 4(TMP2)
4151 |.endif
4152 | checknum cr0, CARG1
4153 | checknum cr1, CARG3
3629 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 4154 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3630 | bge ->vmeta_arith_vv 4155 | bge ->vmeta_arith_vv
3631 | blex pow 4156 | blex pow
3632 | ins_next1 4157 | ins_next1
4158 |.if FPU
3633 | stfdx FARG1, BASE, RA 4159 | stfdx FARG1, BASE, RA
4160 |.else
4161 | stwux CARG1, RA, BASE
4162 | stw CARG2, 4(RA)
4163 |.endif
3634 | ins_next2 4164 | ins_next2
3635 break; 4165 break;
3636 4166
@@ -3650,8 +4180,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3650 | lp BASE, L->base 4180 | lp BASE, L->base
3651 | bne ->vmeta_binop 4181 | bne ->vmeta_binop
3652 | ins_next1 4182 | ins_next1
4183 |.if FPU
3653 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. 4184 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
3654 | stfdx f0, BASE, RA 4185 | stfdx f0, BASE, RA
4186 |.else
4187 | lwzux TMP0, SAVE0, BASE
4188 | lwz TMP1, 4(SAVE0)
4189 | stwux TMP0, RA, BASE
4190 | stw TMP1, 4(RA)
4191 |.endif
3655 | ins_next2 4192 | ins_next2
3656 break; 4193 break;
3657 4194
@@ -3714,8 +4251,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3714 case BC_KNUM: 4251 case BC_KNUM:
3715 | // RA = dst*8, RD = num_const*8 4252 | // RA = dst*8, RD = num_const*8
3716 | ins_next1 4253 | ins_next1
4254 |.if FPU
3717 | lfdx f0, KBASE, RD 4255 | lfdx f0, KBASE, RD
3718 | stfdx f0, BASE, RA 4256 | stfdx f0, BASE, RA
4257 |.else
4258 | lwzux TMP0, RD, KBASE
4259 | lwz TMP1, 4(RD)
4260 | stwux TMP0, RA, BASE
4261 | stw TMP1, 4(RA)
4262 |.endif
3719 | ins_next2 4263 | ins_next2
3720 break; 4264 break;
3721 case BC_KPRI: 4265 case BC_KPRI:
@@ -3748,8 +4292,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3748 | lwzx UPVAL:RB, LFUNC:RB, RD 4292 | lwzx UPVAL:RB, LFUNC:RB, RD
3749 | ins_next1 4293 | ins_next1
3750 | lwz TMP1, UPVAL:RB->v 4294 | lwz TMP1, UPVAL:RB->v
4295 |.if FPU
3751 | lfd f0, 0(TMP1) 4296 | lfd f0, 0(TMP1)
3752 | stfdx f0, BASE, RA 4297 | stfdx f0, BASE, RA
4298 |.else
4299 | lwz TMP2, 0(TMP1)
4300 | lwz TMP3, 4(TMP1)
4301 | stwux TMP2, RA, BASE
4302 | stw TMP3, 4(RA)
4303 |.endif
3753 | ins_next2 4304 | ins_next2
3754 break; 4305 break;
3755 case BC_USETV: 4306 case BC_USETV:
@@ -3757,14 +4308,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3757 | lwz LFUNC:RB, FRAME_FUNC(BASE) 4308 | lwz LFUNC:RB, FRAME_FUNC(BASE)
3758 | srwi RA, RA, 1 4309 | srwi RA, RA, 1
3759 | addi RA, RA, offsetof(GCfuncL, uvptr) 4310 | addi RA, RA, offsetof(GCfuncL, uvptr)
4311 |.if FPU
3760 | lfdux f0, RD, BASE 4312 | lfdux f0, RD, BASE
4313 |.else
4314 | lwzux CARG1, RD, BASE
4315 | lwz CARG3, 4(RD)
4316 |.endif
3761 | lwzx UPVAL:RB, LFUNC:RB, RA 4317 | lwzx UPVAL:RB, LFUNC:RB, RA
3762 | lbz TMP3, UPVAL:RB->marked 4318 | lbz TMP3, UPVAL:RB->marked
3763 | lwz CARG2, UPVAL:RB->v 4319 | lwz CARG2, UPVAL:RB->v
3764 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 4320 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
3765 | lbz TMP0, UPVAL:RB->closed 4321 | lbz TMP0, UPVAL:RB->closed
3766 | lwz TMP2, 0(RD) 4322 | lwz TMP2, 0(RD)
4323 |.if FPU
3767 | stfd f0, 0(CARG2) 4324 | stfd f0, 0(CARG2)
4325 |.else
4326 | stw CARG1, 0(CARG2)
4327 | stw CARG3, 4(CARG2)
4328 |.endif
3768 | cmplwi cr1, TMP0, 0 4329 | cmplwi cr1, TMP0, 0
3769 | lwz TMP1, 4(RD) 4330 | lwz TMP1, 4(RD)
3770 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 4331 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
@@ -3820,11 +4381,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3820 | lwz LFUNC:RB, FRAME_FUNC(BASE) 4381 | lwz LFUNC:RB, FRAME_FUNC(BASE)
3821 | srwi RA, RA, 1 4382 | srwi RA, RA, 1
3822 | addi RA, RA, offsetof(GCfuncL, uvptr) 4383 | addi RA, RA, offsetof(GCfuncL, uvptr)
4384 |.if FPU
3823 | lfdx f0, KBASE, RD 4385 | lfdx f0, KBASE, RD
4386 |.else
4387 | lwzux TMP2, RD, KBASE
4388 | lwz TMP3, 4(RD)
4389 |.endif
3824 | lwzx UPVAL:RB, LFUNC:RB, RA 4390 | lwzx UPVAL:RB, LFUNC:RB, RA
3825 | ins_next1 4391 | ins_next1
3826 | lwz TMP1, UPVAL:RB->v 4392 | lwz TMP1, UPVAL:RB->v
4393 |.if FPU
3827 | stfd f0, 0(TMP1) 4394 | stfd f0, 0(TMP1)
4395 |.else
4396 | stw TMP2, 0(TMP1)
4397 | stw TMP3, 4(TMP1)
4398 |.endif
3828 | ins_next2 4399 | ins_next2
3829 break; 4400 break;
3830 case BC_USETP: 4401 case BC_USETP:
@@ -3972,11 +4543,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3972 |.endif 4543 |.endif
3973 | ble ->vmeta_tgetv // Integer key and in array part? 4544 | ble ->vmeta_tgetv // Integer key and in array part?
3974 | lwzx TMP0, TMP1, TMP2 4545 | lwzx TMP0, TMP1, TMP2
4546 |.if FPU
3975 | lfdx f14, TMP1, TMP2 4547 | lfdx f14, TMP1, TMP2
4548 |.else
4549 | lwzux SAVE0, TMP1, TMP2
4550 | lwz SAVE1, 4(TMP1)
4551 |.endif
3976 | checknil TMP0; beq >2 4552 | checknil TMP0; beq >2
3977 |1: 4553 |1:
3978 | ins_next1 4554 | ins_next1
4555 |.if FPU
3979 | stfdx f14, BASE, RA 4556 | stfdx f14, BASE, RA
4557 |.else
4558 | stwux SAVE0, RA, BASE
4559 | stw SAVE1, 4(RA)
4560 |.endif
3980 | ins_next2 4561 | ins_next2
3981 | 4562 |
3982 |2: // Check for __index if table value is nil. 4563 |2: // Check for __index if table value is nil.
@@ -4052,12 +4633,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4052 | lwz TMP1, TAB:RB->asize 4633 | lwz TMP1, TAB:RB->asize
4053 | lwz TMP2, TAB:RB->array 4634 | lwz TMP2, TAB:RB->array
4054 | cmplw TMP0, TMP1; bge ->vmeta_tgetb 4635 | cmplw TMP0, TMP1; bge ->vmeta_tgetb
4636 |.if FPU
4055 | lwzx TMP1, TMP2, RC 4637 | lwzx TMP1, TMP2, RC
4056 | lfdx f0, TMP2, RC 4638 | lfdx f0, TMP2, RC
4639 |.else
4640 | lwzux TMP1, TMP2, RC
4641 | lwz TMP3, 4(TMP2)
4642 |.endif
4057 | checknil TMP1; beq >5 4643 | checknil TMP1; beq >5
4058 |1: 4644 |1:
4059 | ins_next1 4645 | ins_next1
4646 |.if FPU
4060 | stfdx f0, BASE, RA 4647 | stfdx f0, BASE, RA
4648 |.else
4649 | stwux TMP1, RA, BASE
4650 | stw TMP3, 4(RA)
4651 |.endif
4061 | ins_next2 4652 | ins_next2
4062 | 4653 |
4063 |5: // Check for __index if table value is nil. 4654 |5: // Check for __index if table value is nil.
@@ -4087,10 +4678,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4087 | cmplw TMP0, CARG2 4678 | cmplw TMP0, CARG2
4088 | slwi TMP2, CARG2, 3 4679 | slwi TMP2, CARG2, 3
4089 | ble ->vmeta_tgetr // In array part? 4680 | ble ->vmeta_tgetr // In array part?
4681 |.if FPU
4090 | lfdx f14, TMP1, TMP2 4682 | lfdx f14, TMP1, TMP2
4683 |.else
4684 | lwzux SAVE0, TMP2, TMP1
4685 | lwz SAVE1, 4(TMP2)
4686 |.endif
4091 |->BC_TGETR_Z: 4687 |->BC_TGETR_Z:
4092 | ins_next1 4688 | ins_next1
4689 |.if FPU
4093 | stfdx f14, BASE, RA 4690 | stfdx f14, BASE, RA
4691 |.else
4692 | stwux SAVE0, RA, BASE
4693 | stw SAVE1, 4(RA)
4694 |.endif
4094 | ins_next2 4695 | ins_next2
4095 break; 4696 break;
4096 4697
@@ -4131,11 +4732,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4131 | ble ->vmeta_tsetv // Integer key and in array part? 4732 | ble ->vmeta_tsetv // Integer key and in array part?
4132 | lwzx TMP2, TMP1, TMP0 4733 | lwzx TMP2, TMP1, TMP0
4133 | lbz TMP3, TAB:RB->marked 4734 | lbz TMP3, TAB:RB->marked
4735 |.if FPU
4134 | lfdx f14, BASE, RA 4736 | lfdx f14, BASE, RA
4737 |.else
4738 | add SAVE1, BASE, RA
4739 | lwz SAVE0, 0(SAVE1)
4740 | lwz SAVE1, 4(SAVE1)
4741 |.endif
4135 | checknil TMP2; beq >3 4742 | checknil TMP2; beq >3
4136 |1: 4743 |1:
4137 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) 4744 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
4745 |.if FPU
4138 | stfdx f14, TMP1, TMP0 4746 | stfdx f14, TMP1, TMP0
4747 |.else
4748 | stwux SAVE0, TMP1, TMP0
4749 | stw SAVE1, 4(TMP1)
4750 |.endif
4139 | bne >7 4751 | bne >7
4140 |2: 4752 |2:
4141 | ins_next 4753 | ins_next
@@ -4176,7 +4788,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4176 | lwz NODE:TMP2, TAB:RB->node 4788 | lwz NODE:TMP2, TAB:RB->node
4177 | stb ZERO, TAB:RB->nomm // Clear metamethod cache. 4789 | stb ZERO, TAB:RB->nomm // Clear metamethod cache.
4178 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4790 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
4791 |.if FPU
4179 | lfdx f14, BASE, RA 4792 | lfdx f14, BASE, RA
4793 |.else
4794 | add CARG2, BASE, RA
4795 | lwz SAVE0, 0(CARG2)
4796 | lwz SAVE1, 4(CARG2)
4797 |.endif
4180 | slwi TMP0, TMP1, 5 4798 | slwi TMP0, TMP1, 5
4181 | slwi TMP1, TMP1, 3 4799 | slwi TMP1, TMP1, 3
4182 | sub TMP1, TMP0, TMP1 4800 | sub TMP1, TMP0, TMP1
@@ -4192,7 +4810,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4192 | checknil CARG2; beq >4 // Key found, but nil value? 4810 | checknil CARG2; beq >4 // Key found, but nil value?
4193 |2: 4811 |2:
4194 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4812 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4813 |.if FPU
4195 | stfd f14, NODE:TMP2->val 4814 | stfd f14, NODE:TMP2->val
4815 |.else
4816 | stw SAVE0, NODE:TMP2->val.u32.hi
4817 | stw SAVE1, NODE:TMP2->val.u32.lo
4818 |.endif
4196 | bne >7 4819 | bne >7
4197 |3: 4820 |3:
4198 | ins_next 4821 | ins_next
@@ -4231,7 +4854,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4231 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) 4854 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
4232 | // Returns TValue *. 4855 | // Returns TValue *.
4233 | lp BASE, L->base 4856 | lp BASE, L->base
4857 |.if FPU
4234 | stfd f14, 0(CRET1) 4858 | stfd f14, 0(CRET1)
4859 |.else
4860 | stw SAVE0, 0(CRET1)
4861 | stw SAVE1, 4(CRET1)
4862 |.endif
4235 | b <3 // No 2nd write barrier needed. 4863 | b <3 // No 2nd write barrier needed.
4236 | 4864 |
4237 |7: // Possible table write barrier for the value. Skip valiswhite check. 4865 |7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -4248,13 +4876,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4248 | lwz TMP2, TAB:RB->array 4876 | lwz TMP2, TAB:RB->array
4249 | lbz TMP3, TAB:RB->marked 4877 | lbz TMP3, TAB:RB->marked
4250 | cmplw TMP0, TMP1 4878 | cmplw TMP0, TMP1
4879 |.if FPU
4251 | lfdx f14, BASE, RA 4880 | lfdx f14, BASE, RA
4881 |.else
4882 | add CARG2, BASE, RA
4883 | lwz SAVE0, 0(CARG2)
4884 | lwz SAVE1, 4(CARG2)
4885 |.endif
4252 | bge ->vmeta_tsetb 4886 | bge ->vmeta_tsetb
4253 | lwzx TMP1, TMP2, RC 4887 | lwzx TMP1, TMP2, RC
4254 | checknil TMP1; beq >5 4888 | checknil TMP1; beq >5
4255 |1: 4889 |1:
4256 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4890 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4891 |.if FPU
4257 | stfdx f14, TMP2, RC 4892 | stfdx f14, TMP2, RC
4893 |.else
4894 | stwux SAVE0, RC, TMP2
4895 | stw SAVE1, 4(RC)
4896 |.endif
4258 | bne >7 4897 | bne >7
4259 |2: 4898 |2:
4260 | ins_next 4899 | ins_next
@@ -4294,10 +4933,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4294 |2: 4933 |2:
4295 | cmplw TMP0, CARG3 4934 | cmplw TMP0, CARG3
4296 | slwi TMP2, CARG3, 3 4935 | slwi TMP2, CARG3, 3
4936 |.if FPU
4297 | lfdx f14, BASE, RA 4937 | lfdx f14, BASE, RA
4938 |.else
4939 | lwzux SAVE0, RA, BASE
4940 | lwz SAVE1, 4(RA)
4941 |.endif
4298 | ble ->vmeta_tsetr // In array part? 4942 | ble ->vmeta_tsetr // In array part?
4299 | ins_next1 4943 | ins_next1
4944 |.if FPU
4300 | stfdx f14, TMP1, TMP2 4945 | stfdx f14, TMP1, TMP2
4946 |.else
4947 | stwux SAVE0, TMP1, TMP2
4948 | stw SAVE1, 4(TMP1)
4949 |.endif
4301 | ins_next2 4950 | ins_next2
4302 | 4951 |
4303 |7: // Possible table write barrier for the value. Skip valiswhite check. 4952 |7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -4327,10 +4976,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4327 | add TMP1, TMP1, TMP0 4976 | add TMP1, TMP1, TMP0
4328 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4977 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4329 |3: // Copy result slots to table. 4978 |3: // Copy result slots to table.
4979 |.if FPU
4330 | lfd f0, 0(RA) 4980 | lfd f0, 0(RA)
4981 |.else
4982 | lwz SAVE0, 0(RA)
4983 | lwz SAVE1, 4(RA)
4984 |.endif
4331 | addi RA, RA, 8 4985 | addi RA, RA, 8
4332 | cmpw cr1, RA, TMP2 4986 | cmpw cr1, RA, TMP2
4987 |.if FPU
4333 | stfd f0, 0(TMP1) 4988 | stfd f0, 0(TMP1)
4989 |.else
4990 | stw SAVE0, 0(TMP1)
4991 | stw SAVE1, 4(TMP1)
4992 |.endif
4334 | addi TMP1, TMP1, 8 4993 | addi TMP1, TMP1, 8
4335 | blt cr1, <3 4994 | blt cr1, <3
4336 | bne >7 4995 | bne >7
@@ -4397,9 +5056,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4397 | beq cr1, >3 5056 | beq cr1, >3
4398 |2: 5057 |2:
4399 | addi TMP3, TMP2, 8 5058 | addi TMP3, TMP2, 8
5059 |.if FPU
4400 | lfdx f0, RA, TMP2 5060 | lfdx f0, RA, TMP2
5061 |.else
5062 | add CARG3, RA, TMP2
5063 | lwz CARG1, 0(CARG3)
5064 | lwz CARG2, 4(CARG3)
5065 |.endif
4401 | cmplw cr1, TMP3, NARGS8:RC 5066 | cmplw cr1, TMP3, NARGS8:RC
5067 |.if FPU
4402 | stfdx f0, BASE, TMP2 5068 | stfdx f0, BASE, TMP2
5069 |.else
5070 | stwux CARG1, TMP2, BASE
5071 | stw CARG2, 4(TMP2)
5072 |.endif
4403 | mr TMP2, TMP3 5073 | mr TMP2, TMP3
4404 | bne cr1, <2 5074 | bne cr1, <2
4405 |3: 5075 |3:
@@ -4432,14 +5102,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4432 | add BASE, BASE, RA 5102 | add BASE, BASE, RA
4433 | lwz TMP1, -24(BASE) 5103 | lwz TMP1, -24(BASE)
4434 | lwz LFUNC:RB, -20(BASE) 5104 | lwz LFUNC:RB, -20(BASE)
5105 |.if FPU
4435 | lfd f1, -8(BASE) 5106 | lfd f1, -8(BASE)
4436 | lfd f0, -16(BASE) 5107 | lfd f0, -16(BASE)
5108 |.else
5109 | lwz CARG1, -8(BASE)
5110 | lwz CARG2, -4(BASE)
5111 | lwz CARG3, -16(BASE)
5112 | lwz CARG4, -12(BASE)
5113 |.endif
4437 | stw TMP1, 0(BASE) // Copy callable. 5114 | stw TMP1, 0(BASE) // Copy callable.
4438 | stw LFUNC:RB, 4(BASE) 5115 | stw LFUNC:RB, 4(BASE)
4439 | checkfunc TMP1 5116 | checkfunc TMP1
4440 | stfd f1, 16(BASE) // Copy control var.
4441 | li NARGS8:RC, 16 // Iterators get 2 arguments. 5117 | li NARGS8:RC, 16 // Iterators get 2 arguments.
5118 |.if FPU
5119 | stfd f1, 16(BASE) // Copy control var.
4442 | stfdu f0, 8(BASE) // Copy state. 5120 | stfdu f0, 8(BASE) // Copy state.
5121 |.else
5122 | stw CARG1, 16(BASE) // Copy control var.
5123 | stw CARG2, 20(BASE)
5124 | stwu CARG3, 8(BASE) // Copy state.
5125 | stw CARG4, 4(BASE)
5126 |.endif
4443 | bne ->vmeta_call 5127 | bne ->vmeta_call
4444 | ins_call 5128 | ins_call
4445 break; 5129 break;
@@ -4460,7 +5144,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4460 | slwi TMP3, RC, 3 5144 | slwi TMP3, RC, 3
4461 | bge >5 // Index points after array part? 5145 | bge >5 // Index points after array part?
4462 | lwzx TMP2, TMP1, TMP3 5146 | lwzx TMP2, TMP1, TMP3
5147 |.if FPU
4463 | lfdx f0, TMP1, TMP3 5148 | lfdx f0, TMP1, TMP3
5149 |.else
5150 | lwzux CARG1, TMP3, TMP1
5151 | lwz CARG2, 4(TMP3)
5152 |.endif
4464 | checknil TMP2 5153 | checknil TMP2
4465 | lwz INS, -4(PC) 5154 | lwz INS, -4(PC)
4466 | beq >4 5155 | beq >4
@@ -4472,7 +5161,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4472 |.endif 5161 |.endif
4473 | addi RC, RC, 1 5162 | addi RC, RC, 1
4474 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) 5163 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
5164 |.if FPU
4475 | stfd f0, 8(RA) 5165 | stfd f0, 8(RA)
5166 |.else
5167 | stw CARG1, 8(RA)
5168 | stw CARG2, 12(RA)
5169 |.endif
4476 | decode_RD4 TMP1, INS 5170 | decode_RD4 TMP1, INS
4477 | stw RC, -4(RA) // Update control var. 5171 | stw RC, -4(RA) // Update control var.
4478 | add PC, TMP1, TMP3 5172 | add PC, TMP1, TMP3
@@ -4497,17 +5191,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4497 | slwi RB, RC, 3 5191 | slwi RB, RC, 3
4498 | sub TMP3, TMP3, RB 5192 | sub TMP3, TMP3, RB
4499 | lwzx RB, TMP2, TMP3 5193 | lwzx RB, TMP2, TMP3
5194 |.if FPU
4500 | lfdx f0, TMP2, TMP3 5195 | lfdx f0, TMP2, TMP3
5196 |.else
5197 | add CARG3, TMP2, TMP3
5198 | lwz CARG1, 0(CARG3)
5199 | lwz CARG2, 4(CARG3)
5200 |.endif
4501 | add NODE:TMP3, TMP2, TMP3 5201 | add NODE:TMP3, TMP2, TMP3
4502 | checknil RB 5202 | checknil RB
4503 | lwz INS, -4(PC) 5203 | lwz INS, -4(PC)
4504 | beq >7 5204 | beq >7
5205 |.if FPU
4505 | lfd f1, NODE:TMP3->key 5206 | lfd f1, NODE:TMP3->key
5207 |.else
5208 | lwz CARG3, NODE:TMP3->key.u32.hi
5209 | lwz CARG4, NODE:TMP3->key.u32.lo
5210 |.endif
4506 | addis TMP2, PC, -(BCBIAS_J*4 >> 16) 5211 | addis TMP2, PC, -(BCBIAS_J*4 >> 16)
5212 |.if FPU
4507 | stfd f0, 8(RA) 5213 | stfd f0, 8(RA)
5214 |.else
5215 | stw CARG1, 8(RA)
5216 | stw CARG2, 12(RA)
5217 |.endif
4508 | add RC, RC, TMP0 5218 | add RC, RC, TMP0
4509 | decode_RD4 TMP1, INS 5219 | decode_RD4 TMP1, INS
5220 |.if FPU
4510 | stfd f1, 0(RA) 5221 | stfd f1, 0(RA)
5222 |.else
5223 | stw CARG3, 0(RA)
5224 | stw CARG4, 4(RA)
5225 |.endif
4511 | addi RC, RC, 1 5226 | addi RC, RC, 1
4512 | add PC, TMP1, TMP2 5227 | add PC, TMP1, TMP2
4513 | stw RC, -4(RA) // Update control var. 5228 | stw RC, -4(RA) // Update control var.
@@ -4573,9 +5288,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4573 | subi TMP2, TMP2, 16 5288 | subi TMP2, TMP2, 16
4574 | ble >2 // No vararg slots? 5289 | ble >2 // No vararg slots?
4575 |1: // Copy vararg slots to destination slots. 5290 |1: // Copy vararg slots to destination slots.
5291 |.if FPU
4576 | lfd f0, 0(RC) 5292 | lfd f0, 0(RC)
5293 |.else
5294 | lwz CARG1, 0(RC)
5295 | lwz CARG2, 4(RC)
5296 |.endif
4577 | addi RC, RC, 8 5297 | addi RC, RC, 8
5298 |.if FPU
4578 | stfd f0, 0(RA) 5299 | stfd f0, 0(RA)
5300 |.else
5301 | stw CARG1, 0(RA)
5302 | stw CARG2, 4(RA)
5303 |.endif
4579 | cmplw RA, TMP2 5304 | cmplw RA, TMP2
4580 | cmplw cr1, RC, TMP3 5305 | cmplw cr1, RC, TMP3
4581 | bge >3 // All destination slots filled? 5306 | bge >3 // All destination slots filled?
@@ -4598,9 +5323,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4598 | addi MULTRES, TMP1, 8 5323 | addi MULTRES, TMP1, 8
4599 | bgt >7 5324 | bgt >7
4600 |6: 5325 |6:
5326 |.if FPU
4601 | lfd f0, 0(RC) 5327 | lfd f0, 0(RC)
5328 |.else
5329 | lwz CARG1, 0(RC)
5330 | lwz CARG2, 4(RC)
5331 |.endif
4602 | addi RC, RC, 8 5332 | addi RC, RC, 8
5333 |.if FPU
4603 | stfd f0, 0(RA) 5334 | stfd f0, 0(RA)
5335 |.else
5336 | stw CARG1, 0(RA)
5337 | stw CARG2, 4(RA)
5338 |.endif
4604 | cmplw RC, TMP3 5339 | cmplw RC, TMP3
4605 | addi RA, RA, 8 5340 | addi RA, RA, 8
4606 | blt <6 // More vararg slots? 5341 | blt <6 // More vararg slots?
@@ -4651,14 +5386,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4651 | li TMP1, 0 5386 | li TMP1, 0
4652 |2: 5387 |2:
4653 | addi TMP3, TMP1, 8 5388 | addi TMP3, TMP1, 8
5389 |.if FPU
4654 | lfdx f0, RA, TMP1 5390 | lfdx f0, RA, TMP1
5391 |.else
5392 | add CARG3, RA, TMP1
5393 | lwz CARG1, 0(CARG3)
5394 | lwz CARG2, 4(CARG3)
5395 |.endif
4655 | cmpw TMP3, RC 5396 | cmpw TMP3, RC
5397 |.if FPU
4656 | stfdx f0, TMP2, TMP1 5398 | stfdx f0, TMP2, TMP1
5399 |.else
5400 | add CARG3, TMP2, TMP1
5401 | stw CARG1, 0(CARG3)
5402 | stw CARG2, 4(CARG3)
5403 |.endif
4657 | beq >3 5404 | beq >3
4658 | addi TMP1, TMP3, 8 5405 | addi TMP1, TMP3, 8
5406 |.if FPU
4659 | lfdx f1, RA, TMP3 5407 | lfdx f1, RA, TMP3
5408 |.else
5409 | add CARG3, RA, TMP3
5410 | lwz CARG1, 0(CARG3)
5411 | lwz CARG2, 4(CARG3)
5412 |.endif
4660 | cmpw TMP1, RC 5413 | cmpw TMP1, RC
5414 |.if FPU
4661 | stfdx f1, TMP2, TMP3 5415 | stfdx f1, TMP2, TMP3
5416 |.else
5417 | add CARG3, TMP2, TMP3
5418 | stw CARG1, 0(CARG3)
5419 | stw CARG2, 4(CARG3)
5420 |.endif
4662 | bne <2 5421 | bne <2
4663 |3: 5422 |3:
4664 |5: 5423 |5:
@@ -4700,8 +5459,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4700 | subi TMP2, BASE, 8 5459 | subi TMP2, BASE, 8
4701 | decode_RB8 RB, INS 5460 | decode_RB8 RB, INS
4702 if (op == BC_RET1) { 5461 if (op == BC_RET1) {
5462 |.if FPU
4703 | lfd f0, 0(RA) 5463 | lfd f0, 0(RA)
4704 | stfd f0, 0(TMP2) 5464 | stfd f0, 0(TMP2)
5465 |.else
5466 | lwz CARG1, 0(RA)
5467 | lwz CARG2, 4(RA)
5468 | stw CARG1, 0(TMP2)
5469 | stw CARG2, 4(TMP2)
5470 |.endif
4705 } 5471 }
4706 |5: 5472 |5:
4707 | cmplw RB, RD 5473 | cmplw RB, RD
@@ -4762,11 +5528,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4762 |4: 5528 |4:
4763 | stw CARG1, FORL_IDX*8+4(RA) 5529 | stw CARG1, FORL_IDX*8+4(RA)
4764 } else { 5530 } else {
4765 | lwz TMP3, FORL_STEP*8(RA) 5531 | lwz SAVE0, FORL_STEP*8(RA)
4766 | lwz CARG3, FORL_STEP*8+4(RA) 5532 | lwz CARG3, FORL_STEP*8+4(RA)
4767 | lwz TMP2, FORL_STOP*8(RA) 5533 | lwz TMP2, FORL_STOP*8(RA)
4768 | lwz CARG2, FORL_STOP*8+4(RA) 5534 | lwz CARG2, FORL_STOP*8+4(RA)
4769 | cmplw cr7, TMP3, TISNUM 5535 | cmplw cr7, SAVE0, TISNUM
4770 | cmplw cr1, TMP2, TISNUM 5536 | cmplw cr1, TMP2, TISNUM
4771 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq 5537 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
4772 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 5538 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
@@ -4809,41 +5575,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4809 if (vk) { 5575 if (vk) {
4810 |.if DUALNUM 5576 |.if DUALNUM
4811 |9: // FP loop. 5577 |9: // FP loop.
5578 |.if FPU
4812 | lfd f1, FORL_IDX*8(RA) 5579 | lfd f1, FORL_IDX*8(RA)
4813 |.else 5580 |.else
5581 | lwz CARG1, FORL_IDX*8(RA)
5582 | lwz CARG2, FORL_IDX*8+4(RA)
5583 |.endif
5584 |.else
4814 | lfdux f1, RA, BASE 5585 | lfdux f1, RA, BASE
4815 |.endif 5586 |.endif
5587 |.if FPU
4816 | lfd f3, FORL_STEP*8(RA) 5588 | lfd f3, FORL_STEP*8(RA)
4817 | lfd f2, FORL_STOP*8(RA) 5589 | lfd f2, FORL_STOP*8(RA)
4818 | lwz TMP3, FORL_STEP*8(RA)
4819 | fadd f1, f1, f3 5590 | fadd f1, f1, f3
4820 | stfd f1, FORL_IDX*8(RA) 5591 | stfd f1, FORL_IDX*8(RA)
5592 |.else
5593 | lwz CARG3, FORL_STEP*8(RA)
5594 | lwz CARG4, FORL_STEP*8+4(RA)
5595 | mr SAVE1, RD
5596 | blex __adddf3
5597 | mr RD, SAVE1
5598 | stw CRET1, FORL_IDX*8(RA)
5599 | stw CRET2, FORL_IDX*8+4(RA)
5600 | lwz CARG3, FORL_STOP*8(RA)
5601 | lwz CARG4, FORL_STOP*8+4(RA)
5602 |.endif
5603 | lwz SAVE0, FORL_STEP*8(RA)
4821 } else { 5604 } else {
4822 |.if DUALNUM 5605 |.if DUALNUM
4823 |9: // FP loop. 5606 |9: // FP loop.
4824 |.else 5607 |.else
4825 | lwzux TMP1, RA, BASE 5608 | lwzux TMP1, RA, BASE
4826 | lwz TMP3, FORL_STEP*8(RA) 5609 | lwz SAVE0, FORL_STEP*8(RA)
4827 | lwz TMP2, FORL_STOP*8(RA) 5610 | lwz TMP2, FORL_STOP*8(RA)
4828 | cmplw cr0, TMP1, TISNUM 5611 | cmplw cr0, TMP1, TISNUM
4829 | cmplw cr7, TMP3, TISNUM 5612 | cmplw cr7, SAVE0, TISNUM
4830 | cmplw cr1, TMP2, TISNUM 5613 | cmplw cr1, TMP2, TISNUM
4831 |.endif 5614 |.endif
5615 |.if FPU
4832 | lfd f1, FORL_IDX*8(RA) 5616 | lfd f1, FORL_IDX*8(RA)
5617 |.else
5618 | lwz CARG1, FORL_IDX*8(RA)
5619 | lwz CARG2, FORL_IDX*8+4(RA)
5620 |.endif
4833 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt 5621 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
4834 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 5622 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
5623 |.if FPU
4835 | lfd f2, FORL_STOP*8(RA) 5624 | lfd f2, FORL_STOP*8(RA)
5625 |.else
5626 | lwz CARG3, FORL_STOP*8(RA)
5627 | lwz CARG4, FORL_STOP*8+4(RA)
5628 |.endif
4836 | bge ->vmeta_for 5629 | bge ->vmeta_for
4837 } 5630 }
4838 | cmpwi cr6, TMP3, 0 5631 | cmpwi cr6, SAVE0, 0
4839 if (op != BC_JFORL) { 5632 if (op != BC_JFORL) {
4840 | srwi RD, RD, 1 5633 | srwi RD, RD, 1
4841 } 5634 }
5635 |.if FPU
4842 | stfd f1, FORL_EXT*8(RA) 5636 | stfd f1, FORL_EXT*8(RA)
5637 |.else
5638 | stw CARG1, FORL_EXT*8(RA)
5639 | stw CARG2, FORL_EXT*8+4(RA)
5640 |.endif
4843 if (op != BC_JFORL) { 5641 if (op != BC_JFORL) {
4844 | add RD, PC, RD 5642 | add RD, PC, RD
4845 } 5643 }
5644 |.if FPU
4846 | fcmpu cr0, f1, f2 5645 | fcmpu cr0, f1, f2
5646 |.else
5647 | mr SAVE1, RD
5648 | blex __ledf2
5649 | cmpwi CRET1, 0
5650 | mr RD, SAVE1
5651 |.endif
4847 if (op == BC_JFORI) { 5652 if (op == BC_JFORI) {
4848 | addis PC, RD, -(BCBIAS_J*4 >> 16) 5653 | addis PC, RD, -(BCBIAS_J*4 >> 16)
4849 } 5654 }