aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2017-03-30 11:17:15 +0200
committerMike Pall <mike>2017-03-30 11:30:01 +0200
commit3143b218946395834f0bfef741061ac6ef3f5b56 (patch)
tree07721c6a94616eda13799a6027b00f3e3fc999a4
parent78f5f1cef19502289604299e4e6d00e14411f764 (diff)
downloadluajit-3143b218946395834f0bfef741061ac6ef3f5b56.tar.gz
luajit-3143b218946395834f0bfef741061ac6ef3f5b56.tar.bz2
luajit-3143b218946395834f0bfef741061ac6ef3f5b56.zip
ARM64: Add big-endian support.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. Sponsored by Cisco Systems, Inc.
-rw-r--r--Makefile4
-rw-r--r--src/Makefile3
-rw-r--r--src/host/buildvm_asm.c8
-rw-r--r--src/jit/bcsave.lua8
-rw-r--r--src/jit/dis_arm64be.lua12
-rw-r--r--src/lj_arch.h10
-rw-r--r--src/lj_asm.c3
-rw-r--r--src/lj_asm_arm64.h42
-rw-r--r--src/lj_ccall.c20
-rw-r--r--src/lj_ccall.h4
-rw-r--r--src/lj_ccallback.c18
-rw-r--r--src/lj_emit_arm64.h2
-rw-r--r--src/lj_target_arm64.h9
-rw-r--r--src/vm_arm64.dasc69
14 files changed, 149 insertions, 63 deletions
diff --git a/Makefile b/Makefile
index 489d7e75..e6472e0b 100644
--- a/Makefile
+++ b/Makefile
@@ -87,8 +87,8 @@ FILE_PC= luajit.pc
87FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h 87FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
88FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ 88FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
89 dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ 89 dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
90 dis_ppc.lua dis_mips.lua dis_mipsel.lua dis_mips64.lua \ 90 dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
91 dis_mips64el.lua vmdef.lua 91 dis_mips64.lua dis_mips64el.lua vmdef.lua
92 92
93ifeq (,$(findstring Windows,$(OS))) 93ifeq (,$(findstring Windows,$(OS)))
94 HOST_SYS:= $(shell uname -s) 94 HOST_SYS:= $(shell uname -s)
diff --git a/src/Makefile b/src/Makefile
index 7cb4c14a..f56465d1 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -242,6 +242,9 @@ ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
242 TARGET_LJARCH= arm 242 TARGET_LJARCH= arm
243else 243else
244ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) 244ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
245 ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
246 TARGET_ARCH= -D__AARCH64EB__=1
247 endif
245 TARGET_LJARCH= arm64 248 TARGET_LJARCH= arm64
246else 249else
247ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) 250ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
index addf281f..1a633602 100644
--- a/src/host/buildvm_asm.c
+++ b/src/host/buildvm_asm.c
@@ -93,10 +93,14 @@ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n)
93{ 93{
94 int i; 94 int i;
95 for (i = 0; i < n; i += 4) { 95 for (i = 0; i < n; i += 4) {
96 uint32_t ins = *(uint32_t *)(p+i);
97#if LJ_TARGET_ARM64 && LJ_BE
98 ins = lj_bswap(ins); /* ARM64 instructions are always little-endian. */
99#endif
96 if ((i & 15) == 0) 100 if ((i & 15) == 0)
97 fprintf(ctx->fp, "\t.long 0x%08x", *(uint32_t *)(p+i)); 101 fprintf(ctx->fp, "\t.long 0x%08x", ins);
98 else 102 else
99 fprintf(ctx->fp, ",0x%08x", *(uint32_t *)(p+i)); 103 fprintf(ctx->fp, ",0x%08x", ins);
100 if ((i & 15) == 12) putc('\n', ctx->fp); 104 if ((i & 15) == 12) putc('\n', ctx->fp);
101 } 105 }
102 if ((n & 15) != 0) putc('\n', ctx->fp); 106 if ((n & 15) != 0) putc('\n', ctx->fp);
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
index 9ee22a01..c17c88e0 100644
--- a/src/jit/bcsave.lua
+++ b/src/jit/bcsave.lua
@@ -63,8 +63,8 @@ local map_type = {
63} 63}
64 64
65local map_arch = { 65local map_arch = {
66 x86 = true, x64 = true, arm = true, arm64 = true, ppc = true, 66 x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true,
67 mips = true, mipsel = true, 67 ppc = true, mips = true, mipsel = true,
68} 68}
69 69
70local map_os = { 70local map_os = {
@@ -200,7 +200,7 @@ typedef struct {
200]] 200]]
201 local symname = LJBC_PREFIX..ctx.modname 201 local symname = LJBC_PREFIX..ctx.modname
202 local is64, isbe = false, false 202 local is64, isbe = false, false
203 if ctx.arch == "x64" or ctx.arch == "arm64" then 203 if ctx.arch == "x64" or ctx.arch == "arm64" or ctx.arch == "arm64be" then
204 is64 = true 204 is64 = true
205 elseif ctx.arch == "ppc" or ctx.arch == "mips" then 205 elseif ctx.arch == "ppc" or ctx.arch == "mips" then
206 isbe = true 206 isbe = true
@@ -237,7 +237,7 @@ typedef struct {
237 hdr.eendian = isbe and 2 or 1 237 hdr.eendian = isbe and 2 or 1
238 hdr.eversion = 1 238 hdr.eversion = 1
239 hdr.type = f16(1) 239 hdr.type = f16(1)
240 hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, ppc=20, mips=8, mipsel=8 })[ctx.arch]) 240 hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, arm64be=183, ppc=20, mips=8, mipsel=8 })[ctx.arch])
241 if ctx.arch == "mips" or ctx.arch == "mipsel" then 241 if ctx.arch == "mips" or ctx.arch == "mipsel" then
242 hdr.flags = f32(0x50001006) 242 hdr.flags = f32(0x50001006)
243 end 243 end
diff --git a/src/jit/dis_arm64be.lua b/src/jit/dis_arm64be.lua
new file mode 100644
index 00000000..7eb389e2
--- /dev/null
+++ b/src/jit/dis_arm64be.lua
@@ -0,0 +1,12 @@
1----------------------------------------------------------------------------
2-- LuaJIT ARM64BE disassembler wrapper module.
3--
4-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- ARM64 instructions are always little-endian. So just forward to the
8-- common ARM64 disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11return require((string.match(..., ".*%.") or "").."dis_arm64")
12
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 40509607..fe558157 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -215,9 +215,14 @@
215 215
216#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64 216#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
217 217
218#define LJ_ARCH_NAME "arm64"
219#define LJ_ARCH_BITS 64 218#define LJ_ARCH_BITS 64
219#if defined(__AARCH64EB__)
220#define LJ_ARCH_NAME "arm64be"
221#define LJ_ARCH_ENDIAN LUAJIT_BE
222#else
223#define LJ_ARCH_NAME "arm64"
220#define LJ_ARCH_ENDIAN LUAJIT_LE 224#define LJ_ARCH_ENDIAN LUAJIT_LE
225#endif
221#define LJ_TARGET_ARM64 1 226#define LJ_TARGET_ARM64 1
222#define LJ_TARGET_EHRETREG 0 227#define LJ_TARGET_EHRETREG 0
223#define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */ 228#define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */
@@ -409,9 +414,6 @@
409#error "Only ARM EABI or iOS 3.0+ ABI is supported" 414#error "Only ARM EABI or iOS 3.0+ ABI is supported"
410#endif 415#endif
411#elif LJ_TARGET_ARM64 416#elif LJ_TARGET_ARM64
412#if defined(__AARCH64EB__)
413#error "No support for big-endian ARM64"
414#endif
415#if defined(_ILP32) 417#if defined(_ILP32)
416#error "No support for ILP32 model on ARM64" 418#error "No support for ILP32 model on ARM64"
417#endif 419#endif
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 7c09dd9f..c2cf5a95 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -2393,6 +2393,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
2393 if (!as->loopref) 2393 if (!as->loopref)
2394 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ 2394 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
2395 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); 2395 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
2396#if LJ_TARGET_MCODE_FIXUP
2397 asm_mcode_fixup(T->mcode, T->szmcode);
2398#endif
2396 lj_mcode_sync(T->mcode, origtop); 2399 lj_mcode_sync(T->mcode, origtop);
2397} 2400}
2398 2401
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index b58ab3a1..8fd92e76 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -56,11 +56,11 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
56 asm_mclimit(as); 56 asm_mclimit(as);
57 /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ 57 /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
58 for (i = nexits-1; (int32_t)i >= 0; i--) 58 for (i = nexits-1; (int32_t)i >= 0; i--)
59 *--mxp = A64I_BL|((-3-i)&0x03ffffffu); 59 *--mxp = A64I_LE(A64I_BL|((-3-i)&0x03ffffffu));
60 *--mxp = A64I_MOVZw|A64F_U16(as->T->traceno); 60 *--mxp = A64I_LE(A64I_MOVZw|A64F_U16(as->T->traceno));
61 mxp--; 61 mxp--;
62 *mxp = A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu); 62 *mxp = A64I_LE(A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu));
63 *--mxp = A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP); 63 *--mxp = A64I_LE(A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP));
64 as->mctop = mxp; 64 as->mctop = mxp;
65} 65}
66 66
@@ -431,7 +431,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
431 fpr++; 431 fpr++;
432 } else { 432 } else {
433 Reg r = ra_alloc1(as, ref, RSET_FPR); 433 Reg r = ra_alloc1(as, ref, RSET_FPR);
434 emit_spstore(as, ir, r, ofs); 434 emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0));
435 ofs += 8; 435 ofs += 8;
436 } 436 }
437 } else { 437 } else {
@@ -441,7 +441,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
441 gpr++; 441 gpr++;
442 } else { 442 } else {
443 Reg r = ra_alloc1(as, ref, RSET_GPR); 443 Reg r = ra_alloc1(as, ref, RSET_GPR);
444 emit_spstore(as, ir, r, ofs); 444 emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0));
445 ofs += 8; 445 ofs += 8;
446 } 446 }
447 } 447 }
@@ -1082,7 +1082,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1082 src = ra_alloc1(as, ir->op2, allow); 1082 src = ra_alloc1(as, ir->op2, allow);
1083 rset_clear(allow, src); 1083 rset_clear(allow, src);
1084 if (irt_isinteger(ir->t)) 1084 if (irt_isinteger(ir->t))
1085 type = ra_allock(as, (int64_t)LJ_TISNUM << 47, allow); 1085 type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow);
1086 else 1086 else
1087 type = ra_allock(as, irt_toitype(ir->t), allow); 1087 type = ra_allock(as, irt_toitype(ir->t), allow);
1088 } else { 1088 } else {
@@ -1179,7 +1179,8 @@ dotypecheck:
1179 } 1179 }
1180 if (ra_hasreg(dest)) { 1180 if (ra_hasreg(dest)) {
1181 emit_lso(as, irt_isnum(t) ? A64I_LDRd : 1181 emit_lso(as, irt_isnum(t) ? A64I_LDRd :
1182 (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base, ofs); 1182 (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base,
1183 ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0)));
1183 } 1184 }
1184} 1185}
1185 1186
@@ -1909,7 +1910,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
1909 /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ 1910 /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
1910 int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); 1911 int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
1911 if (spadj == 0) { 1912 if (spadj == 0) {
1912 *--p = A64I_NOP; 1913 *--p = A64I_LE(A64I_NOP);
1913 as->mctop = p; 1914 as->mctop = p;
1914 } else { 1915 } else {
1915 /* Patch stack adjustment. */ 1916 /* Patch stack adjustment. */
@@ -1962,6 +1963,19 @@ static void asm_setup_target(ASMState *as)
1962 asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); 1963 asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
1963} 1964}
1964 1965
1966#if LJ_BE
1967/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
1968static void asm_mcode_fixup(MCode *mcode, MSize size)
1969{
1970 MCode *pe = (MCode *)((char *)mcode + size);
1971 while (mcode < pe) {
1972 MCode ins = *mcode;
1973 *mcode++ = lj_bswap(ins);
1974 }
1975}
1976#define LJ_TARGET_MCODE_FIXUP 1
1977#endif
1978
1965/* -- Trace patching ------------------------------------------------------ */ 1979/* -- Trace patching ------------------------------------------------------ */
1966 1980
1967/* Patch exit jumps of existing machine code to a new target. */ 1981/* Patch exit jumps of existing machine code to a new target. */
@@ -1974,29 +1988,29 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
1974 MCode *px = exitstub_trace_addr(T, exitno); 1988 MCode *px = exitstub_trace_addr(T, exitno);
1975 for (; p < pe; p++) { 1989 for (; p < pe; p++) {
1976 /* Look for exitstub branch, replace with branch to target. */ 1990 /* Look for exitstub branch, replace with branch to target. */
1977 uint32_t ins = *p; 1991 MCode ins = A64I_LE(*p);
1978 if ((ins & 0xff000000u) == 0x54000000u && 1992 if ((ins & 0xff000000u) == 0x54000000u &&
1979 ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { 1993 ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
1980 /* Patch bcc exitstub. */ 1994 /* Patch bcc exitstub. */
1981 *p = (ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u); 1995 *p = A64I_LE((ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u));
1982 cend = p+1; 1996 cend = p+1;
1983 if (!cstart) cstart = p; 1997 if (!cstart) cstart = p;
1984 } else if ((ins & 0xfc000000u) == 0x14000000u && 1998 } else if ((ins & 0xfc000000u) == 0x14000000u &&
1985 ((ins ^ (px-p)) & 0x03ffffffu) == 0) { 1999 ((ins ^ (px-p)) & 0x03ffffffu) == 0) {
1986 /* Patch b exitstub. */ 2000 /* Patch b exitstub. */
1987 *p = (ins & 0xfc000000u) | ((target-p) & 0x03ffffffu); 2001 *p = A64I_LE((ins & 0xfc000000u) | ((target-p) & 0x03ffffffu));
1988 cend = p+1; 2002 cend = p+1;
1989 if (!cstart) cstart = p; 2003 if (!cstart) cstart = p;
1990 } else if ((ins & 0x7e000000u) == 0x34000000u && 2004 } else if ((ins & 0x7e000000u) == 0x34000000u &&
1991 ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { 2005 ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
1992 /* Patch cbz/cbnz exitstub. */ 2006 /* Patch cbz/cbnz exitstub. */
1993 *p = (ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u); 2007 *p = A64I_LE((ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u));
1994 cend = p+1; 2008 cend = p+1;
1995 if (!cstart) cstart = p; 2009 if (!cstart) cstart = p;
1996 } else if ((ins & 0x7e000000u) == 0x36000000u && 2010 } else if ((ins & 0x7e000000u) == 0x36000000u &&
1997 ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) { 2011 ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) {
1998 /* Patch tbz/tbnz exitstub. */ 2012 /* Patch tbz/tbnz exitstub. */
1999 *p = (ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u); 2013 *p = A64I_LE((ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u));
2000 cend = p+1; 2014 cend = p+1;
2001 if (!cstart) cstart = p; 2015 if (!cstart) cstart = p;
2002 } 2016 }
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index 2b7ca364..5c252e5b 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -301,7 +301,7 @@
301 unsigned int cl = ccall_classify_struct(cts, ctr); \ 301 unsigned int cl = ccall_classify_struct(cts, ctr); \
302 if ((cl & 4)) { /* Combine float HFA from separate registers. */ \ 302 if ((cl & 4)) { /* Combine float HFA from separate registers. */ \
303 CTSize i = (cl >> 8) - 1; \ 303 CTSize i = (cl >> 8) - 1; \
304 do { ((uint32_t *)dp)[i] = cc->fpr[i].u32; } while (i--); \ 304 do { ((uint32_t *)dp)[i] = cc->fpr[i].lo; } while (i--); \
305 } else { \ 305 } else { \
306 if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \ 306 if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \
307 memcpy(dp, sp, ctr->size); \ 307 memcpy(dp, sp, ctr->size); \
@@ -359,6 +359,13 @@
359 } \ 359 } \
360 } 360 }
361 361
362#if LJ_BE
363#define CCALL_HANDLE_RET \
364 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
365 sp = (uint8_t *)&cc->fpr[0].f;
366#endif
367
368
362#elif LJ_TARGET_PPC 369#elif LJ_TARGET_PPC
363/* -- PPC calling conventions --------------------------------------------- */ 370/* -- PPC calling conventions --------------------------------------------- */
364 371
@@ -1033,9 +1040,16 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
1033 *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : 1040 *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp :
1034 (int32_t)*(int16_t *)dp; 1041 (int32_t)*(int16_t *)dp;
1035 } 1042 }
1043#if LJ_TARGET_ARM64 && LJ_BE
1044 if (isfp && d->size == sizeof(float))
1045 ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */
1046#endif
1047#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
1048 if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
1036#if LJ_TARGET_MIPS64 1049#if LJ_TARGET_MIPS64
1037 if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) || 1050 || (isfp && nsp == 0)
1038 (isfp && nsp == 0)) && d->size <= 4) { 1051#endif
1052 ) && d->size <= 4) {
1039 *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */ 1053 *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */
1040 } 1054 }
1041#endif 1055#endif
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index 34e800cc..59f66481 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -79,8 +79,8 @@ typedef union FPRArg {
79typedef intptr_t GPRArg; 79typedef intptr_t GPRArg;
80typedef union FPRArg { 80typedef union FPRArg {
81 double d; 81 double d;
82 float f; 82 struct { LJ_ENDIAN_LOHI(float f; , float g;) };
83 uint32_t u32; 83 struct { LJ_ENDIAN_LOHI(uint32_t lo; , uint32_t hi;) };
84} FPRArg; 84} FPRArg;
85 85
86#elif LJ_TARGET_PPC 86#elif LJ_TARGET_PPC
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index fce6a3ed..846827b1 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -173,16 +173,16 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
173 uint32_t *p = page; 173 uint32_t *p = page;
174 void *target = (void *)lj_vm_ffi_callback; 174 void *target = (void *)lj_vm_ffi_callback;
175 MSize slot; 175 MSize slot;
176 *p++ = A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4); 176 *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4));
177 *p++ = A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5); 177 *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5));
178 *p++ = A64I_BR | A64F_N(RID_X11); 178 *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11));
179 *p++ = A64I_NOP; 179 *p++ = A64I_LE(A64I_NOP);
180 ((void **)p)[0] = target; 180 ((void **)p)[0] = target;
181 ((void **)p)[1] = g; 181 ((void **)p)[1] = g;
182 p += 4; 182 p += 4;
183 for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { 183 for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
184 *p++ = A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot); 184 *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot));
185 *p = A64I_B | A64F_S26((page-p) & 0x03ffffffu); 185 *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu));
186 p++; 186 p++;
187 } 187 }
188 lua_assert(p - page <= CALLBACK_MCODE_SIZE); 188 lua_assert(p - page <= CALLBACK_MCODE_SIZE);
@@ -624,6 +624,10 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
624 if (ctype_isfp(ctr->info)) 624 if (ctype_isfp(ctr->info))
625 dp = (uint8_t *)&cts->cb.fpr[0]; 625 dp = (uint8_t *)&cts->cb.fpr[0];
626#endif 626#endif
627#if LJ_TARGET_ARM64 && LJ_BE
628 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float))
629 dp = (uint8_t *)&cts->cb.fpr[0].f[1];
630#endif
627 lj_cconv_ct_tv(cts, ctr, dp, o, 0); 631 lj_cconv_ct_tv(cts, ctr, dp, o, 0);
628#ifdef CALLBACK_HANDLE_RET 632#ifdef CALLBACK_HANDLE_RET
629 CALLBACK_HANDLE_RET 633 CALLBACK_HANDLE_RET
@@ -637,7 +641,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
637 *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : 641 *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
638 (int32_t)*(int16_t *)dp; 642 (int32_t)*(int16_t *)dp;
639 } 643 }
640#if LJ_TARGET_MIPS64 644#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
641 /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ 645 /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
642 if (ctr->size <= 4 && 646 if (ctr->size <= 4 &&
643 (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) 647 (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
index cfa18c83..6da4c7d4 100644
--- a/src/lj_emit_arm64.h
+++ b/src/lj_emit_arm64.h
@@ -140,7 +140,7 @@ static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
140 } else { 140 } else {
141 goto nopair; 141 goto nopair;
142 } 142 }
143 if (ofsm >= (-64<<sc) && ofsm <= (63<<sc)) { 143 if (ofsm >= (int)((unsigned int)-64<<sc) && ofsm <= (63<<sc)) {
144 *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) | 144 *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) |
145 (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); 145 (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000));
146 return; 146 return;
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
index 3f6bb39b..520023ae 100644
--- a/src/lj_target_arm64.h
+++ b/src/lj_target_arm64.h
@@ -107,7 +107,7 @@ typedef struct {
107/* Return the address of a per-trace exit stub. */ 107/* Return the address of a per-trace exit stub. */
108static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) 108static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
109{ 109{
110 while (*p == 0xd503201f) p++; /* Skip A64I_NOP. */ 110 while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */
111 return p + 3 + exitno; 111 return p + 3 + exitno;
112} 112}
113/* Avoid dependence on lj_jit.h if only including lj_target.h. */ 113/* Avoid dependence on lj_jit.h if only including lj_target.h. */
@@ -116,6 +116,13 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
116 116
117/* -- Instructions -------------------------------------------------------- */ 117/* -- Instructions -------------------------------------------------------- */
118 118
119/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
120#if LJ_BE
121#define A64I_LE(x) (lj_bswap(x))
122#else
123#define A64I_LE(x) (x)
124#endif
125
119/* Instruction fields. */ 126/* Instruction fields. */
120#define A64F_D(r) (r) 127#define A64F_D(r) (r)
121#define A64F_N(r) ((r) << 5) 128#define A64F_N(r) ((r) << 5)
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index bb2496ab..cfbc61a1 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -151,6 +151,21 @@
151|.define FRAME_FUNC, #-16 151|.define FRAME_FUNC, #-16
152|.define FRAME_PC, #-8 152|.define FRAME_PC, #-8
153| 153|
154|// Endian-specific defines.
155|.if ENDIAN_LE
156|.define LO, 0
157|.define OFS_RD, 2
158|.define OFS_RB, 3
159|.define OFS_RA, 1
160|.define OFS_OP, 0
161|.else
162|.define LO, 4
163|.define OFS_RD, 0
164|.define OFS_RB, 0
165|.define OFS_RA, 2
166|.define OFS_OP, 3
167|.endif
168|
154|.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro 169|.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro
155|.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro 170|.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro
156|.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro 171|.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro
@@ -717,7 +732,7 @@ static void build_subroutines(BuildCtx *ctx)
717 | cmp CRET1, #1 732 | cmp CRET1, #1
718 | bhi ->vmeta_binop 733 | bhi ->vmeta_binop
719 |4: 734 |4:
720 | ldrh RBw, [PC, #2] 735 | ldrh RBw, [PC, # OFS_RD]
721 | add PC, PC, #4 736 | add PC, PC, #4
722 | add RB, PC, RB, lsl #2 737 | add RB, PC, RB, lsl #2
723 | sub RB, RB, #0x20000 738 | sub RB, RB, #0x20000
@@ -1500,7 +1515,12 @@ static void build_subroutines(BuildCtx *ctx)
1500 | bne ->fff_fallback 1515 | bne ->fff_fallback
1501 | checkint CARG1, ->fff_fallback 1516 | checkint CARG1, ->fff_fallback
1502 | mov CARG3, #1 1517 | mov CARG3, #1
1503 | mov CARG2, BASE // Points to stack. Little-endian. 1518 | // Point to the char inside the integer in the stack slot.
1519 |.if ENDIAN_LE
1520 | mov CARG2, BASE
1521 |.else
1522 | add CARG2, BASE, #7
1523 |.endif
1504 |->fff_newstr: 1524 |->fff_newstr:
1505 | // CARG2 = str, CARG3 = len. 1525 | // CARG2 = str, CARG3 = len.
1506 | str BASE, L->base 1526 | str BASE, L->base
@@ -1703,7 +1723,7 @@ static void build_subroutines(BuildCtx *ctx)
1703 | ands TMP0, PC, #FRAME_TYPE 1723 | ands TMP0, PC, #FRAME_TYPE
1704 | and TMP1, PC, #~FRAME_TYPEP 1724 | and TMP1, PC, #~FRAME_TYPEP
1705 | bne >3 1725 | bne >3
1706 | ldrb RAw, [PC, #-3] 1726 | ldrb RAw, [PC, #-4+OFS_RA]
1707 | lsl RA, RA, #3 1727 | lsl RA, RA, #3
1708 | add TMP1, RA, #16 1728 | add TMP1, RA, #16
1709 |3: 1729 |3:
@@ -1838,7 +1858,7 @@ static void build_subroutines(BuildCtx *ctx)
1838 |->cont_stitch: // Trace stitching. 1858 |->cont_stitch: // Trace stitching.
1839 |.if JIT 1859 |.if JIT
1840 | // RA = resultptr, CARG4 = meta base 1860 | // RA = resultptr, CARG4 = meta base
1841 | ldr RB, SAVE_MULTRES 1861 | ldr RBw, SAVE_MULTRES
1842 | ldr INSw, [PC, #-4] 1862 | ldr INSw, [PC, #-4]
1843 | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. 1863 | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace.
1844 | subs RB, RB, #8 1864 | subs RB, RB, #8
@@ -1869,7 +1889,7 @@ static void build_subroutines(BuildCtx *ctx)
1869 | 1889 |
1870 | // Stitch a new trace to the previous trace. 1890 | // Stitch a new trace to the previous trace.
1871 | mov CARG1, #GL_J(exitno) 1891 | mov CARG1, #GL_J(exitno)
1872 | str RA, [GL, CARG1] 1892 | str RAw, [GL, CARG1]
1873 | mov CARG1, #GL_J(L) 1893 | mov CARG1, #GL_J(L)
1874 | str L, [GL, CARG1] 1894 | str L, [GL, CARG1]
1875 | str BASE, L->base 1895 | str BASE, L->base
@@ -1936,6 +1956,9 @@ static void build_subroutines(BuildCtx *ctx)
1936 | sub CARG1, CARG1, #2 1956 | sub CARG1, CARG1, #2
1937 | ldr CARG2w, [lr] // Load trace number. 1957 | ldr CARG2w, [lr] // Load trace number.
1938 | st_vmstate CARG4 1958 | st_vmstate CARG4
1959 |.if ENDIAN_BE
1960 | rev32 CARG2, CARG2
1961 |.endif
1939 | str BASE, L->base 1962 | str BASE, L->base
1940 | ubfx CARG2w, CARG2w, #5, #16 1963 | ubfx CARG2w, CARG2w, #5, #16
1941 | str CARG1w, [GL, #GL_J(exitno)] 1964 | str CARG1w, [GL, #GL_J(exitno)]
@@ -1967,14 +1990,14 @@ static void build_subroutines(BuildCtx *ctx)
1967 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 1990 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
1968 | movn TISNIL, #0 1991 | movn TISNIL, #0
1969 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK 1992 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
1970 | str RC, SAVE_MULTRES 1993 | str RCw, SAVE_MULTRES
1971 | str BASE, L->base 1994 | str BASE, L->base
1972 | ldr CARG2, LFUNC:CARG2->pc 1995 | ldr CARG2, LFUNC:CARG2->pc
1973 | str xzr, GL->jit_base 1996 | str xzr, GL->jit_base
1974 | mv_vmstate CARG4, INTERP 1997 | mv_vmstate CARG4, INTERP
1975 | ldr KBASE, [CARG2, #PC2PROTO(k)] 1998 | ldr KBASE, [CARG2, #PC2PROTO(k)]
1976 | // Modified copy of ins_next which handles function header dispatch, too. 1999 | // Modified copy of ins_next which handles function header dispatch, too.
1977 | ldrb RBw, [PC] 2000 | ldrb RBw, [PC, # OFS_OP]
1978 | ldr INSw, [PC], #4 2001 | ldr INSw, [PC], #4
1979 | st_vmstate CARG4 2002 | st_vmstate CARG4
1980 | cmp RBw, #BC_FUNCC+2 // Fast function? 2003 | cmp RBw, #BC_FUNCC+2 // Fast function?
@@ -2000,7 +2023,7 @@ static void build_subroutines(BuildCtx *ctx)
2000 | ands CARG2, CARG1, #FRAME_TYPE 2023 | ands CARG2, CARG1, #FRAME_TYPE
2001 | bne <2 // Trace stitching continuation? 2024 | bne <2 // Trace stitching continuation?
2002 | // Otherwise set KBASE for Lua function below fast function. 2025 | // Otherwise set KBASE for Lua function below fast function.
2003 | ldr CARG3, [CARG1, #-4] 2026 | ldr CARG3w, [CARG1, #-4]
2004 | decode_RA CARG1, CARG3 2027 | decode_RA CARG1, CARG3
2005 | sub CARG2, BASE, CARG1, lsl #3 2028 | sub CARG2, BASE, CARG1, lsl #3
2006 | ldr LFUNC:CARG3, [CARG2, #-32] 2029 | ldr LFUNC:CARG3, [CARG2, #-32]
@@ -2153,7 +2176,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2153 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 2176 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2154 | // RA = src1, RC = src2, JMP with RC = target 2177 | // RA = src1, RC = src2, JMP with RC = target
2155 | ldr CARG1, [BASE, RA, lsl #3] 2178 | ldr CARG1, [BASE, RA, lsl #3]
2156 | ldrh RBw, [PC, #2] 2179 | ldrh RBw, [PC, # OFS_RD]
2157 | ldr CARG2, [BASE, RC, lsl #3] 2180 | ldr CARG2, [BASE, RC, lsl #3]
2158 | add PC, PC, #4 2181 | add PC, PC, #4
2159 | add RB, PC, RB, lsl #2 2182 | add RB, PC, RB, lsl #2
@@ -2210,7 +2233,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2210 | // RA = src1, RC = src2, JMP with RC = target 2233 | // RA = src1, RC = src2, JMP with RC = target
2211 | ldr CARG1, [BASE, RA, lsl #3] 2234 | ldr CARG1, [BASE, RA, lsl #3]
2212 | add RC, BASE, RC, lsl #3 2235 | add RC, BASE, RC, lsl #3
2213 | ldrh RBw, [PC, #2] 2236 | ldrh RBw, [PC, # OFS_RD]
2214 | ldr CARG3, [RC] 2237 | ldr CARG3, [RC]
2215 | add PC, PC, #4 2238 | add PC, PC, #4
2216 | add RB, PC, RB, lsl #2 2239 | add RB, PC, RB, lsl #2
@@ -2271,7 +2294,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2271 | // RA = src, RC = str_const (~), JMP with RC = target 2294 | // RA = src, RC = str_const (~), JMP with RC = target
2272 | ldr CARG1, [BASE, RA, lsl #3] 2295 | ldr CARG1, [BASE, RA, lsl #3]
2273 | mvn RC, RC 2296 | mvn RC, RC
2274 | ldrh RBw, [PC, #2] 2297 | ldrh RBw, [PC, # OFS_RD]
2275 | ldr CARG2, [KBASE, RC, lsl #3] 2298 | ldr CARG2, [KBASE, RC, lsl #3]
2276 | add PC, PC, #4 2299 | add PC, PC, #4
2277 | movn TMP0, #~LJ_TSTR 2300 | movn TMP0, #~LJ_TSTR
@@ -2299,7 +2322,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2299 | // RA = src, RC = num_const (~), JMP with RC = target 2322 | // RA = src, RC = num_const (~), JMP with RC = target
2300 | ldr CARG1, [BASE, RA, lsl #3] 2323 | ldr CARG1, [BASE, RA, lsl #3]
2301 | add RC, KBASE, RC, lsl #3 2324 | add RC, KBASE, RC, lsl #3
2302 | ldrh RBw, [PC, #2] 2325 | ldrh RBw, [PC, # OFS_RD]
2303 | ldr CARG3, [RC] 2326 | ldr CARG3, [RC]
2304 | add PC, PC, #4 2327 | add PC, PC, #4
2305 | add RB, PC, RB, lsl #2 2328 | add RB, PC, RB, lsl #2
@@ -2359,7 +2382,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2359 vk = op == BC_ISEQP; 2382 vk = op == BC_ISEQP;
2360 | // RA = src, RC = primitive_type (~), JMP with RC = target 2383 | // RA = src, RC = primitive_type (~), JMP with RC = target
2361 | ldr TMP0, [BASE, RA, lsl #3] 2384 | ldr TMP0, [BASE, RA, lsl #3]
2362 | ldrh RBw, [PC, #2] 2385 | ldrh RBw, [PC, # OFS_RD]
2363 | add PC, PC, #4 2386 | add PC, PC, #4
2364 | add RC, RC, #1 2387 | add RC, RC, #1
2365 | add RB, PC, RB, lsl #2 2388 | add RB, PC, RB, lsl #2
@@ -2384,7 +2407,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2384 2407
2385 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: 2408 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
2386 | // RA = dst or unused, RC = src, JMP with RC = target 2409 | // RA = dst or unused, RC = src, JMP with RC = target
2387 | ldrh RBw, [PC, #2] 2410 | ldrh RBw, [PC, # OFS_RD]
2388 | ldr TMP0, [BASE, RC, lsl #3] 2411 | ldr TMP0, [BASE, RC, lsl #3]
2389 | add PC, PC, #4 2412 | add PC, PC, #4
2390 | mov_false TMP1 2413 | mov_false TMP1
@@ -2631,7 +2654,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2631 | str PC, SAVE_PC 2654 | str PC, SAVE_PC
2632 | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) 2655 | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
2633 | // Returns NULL (finished) or TValue * (metamethod). 2656 | // Returns NULL (finished) or TValue * (metamethod).
2634 | ldrb RBw, [PC, #-1] 2657 | ldrb RBw, [PC, #-4+OFS_RB]
2635 | ldr BASE, L->base 2658 | ldr BASE, L->base
2636 | cbnz CRET1, ->vmeta_binop 2659 | cbnz CRET1, ->vmeta_binop
2637 | ldr TMP0, [BASE, RB, lsl #3] 2660 | ldr TMP0, [BASE, RB, lsl #3]
@@ -3262,7 +3285,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3262 | ins_callt 3285 | ins_callt
3263 | 3286 |
3264 |5: // Tailcall to a fast function with a Lua frame below. 3287 |5: // Tailcall to a fast function with a Lua frame below.
3265 | ldrb RAw, [PC, #-3] 3288 | ldrb RAw, [PC, #-4+OFS_RA]
3266 | sub CARG1, BASE, RA, lsl #3 3289 | sub CARG1, BASE, RA, lsl #3
3267 | ldr LFUNC:CARG1, [CARG1, #-32] 3290 | ldr LFUNC:CARG1, [CARG1, #-32]
3268 | and LFUNC:CARG1, CARG1, #LJ_GCVMASK 3291 | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
@@ -3303,8 +3326,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3303 |.endif 3326 |.endif
3304 | add RA, BASE, RA, lsl #3 3327 | add RA, BASE, RA, lsl #3
3305 | ldr TAB:RB, [RA, #-16] 3328 | ldr TAB:RB, [RA, #-16]
3306 | ldrh TMP3w, [PC, #2] 3329 | ldrh TMP3w, [PC, # OFS_RD]
3307 | ldr CARG1w, [RA, #-8] // Get index from control var. 3330 | ldr CARG1w, [RA, #-8+LO] // Get index from control var.
3308 | add PC, PC, #4 3331 | add PC, PC, #4
3309 | add TMP3, PC, TMP3, lsl #2 3332 | add TMP3, PC, TMP3, lsl #2
3310 | and TAB:RB, RB, #LJ_GCVMASK 3333 | and TAB:RB, RB, #LJ_GCVMASK
@@ -3323,7 +3346,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3323 | stp CARG1, TMP0, [RA] 3346 | stp CARG1, TMP0, [RA]
3324 | add CARG1, CARG1, #1 3347 | add CARG1, CARG1, #1
3325 |3: 3348 |3:
3326 | str CARG1w, [RA, #-8] // Update control var. 3349 | str CARG1w, [RA, #-8+LO] // Update control var.
3327 | mov PC, TMP3 3350 | mov PC, TMP3
3328 |4: 3351 |4:
3329 | ins_next 3352 | ins_next
@@ -3369,8 +3392,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3369 |5: // Despecialize bytecode if any of the checks fail. 3392 |5: // Despecialize bytecode if any of the checks fail.
3370 | mov TMP0, #BC_JMP 3393 | mov TMP0, #BC_JMP
3371 | mov TMP1, #BC_ITERC 3394 | mov TMP1, #BC_ITERC
3372 | strb TMP0w, [PC, #-4] 3395 | strb TMP0w, [PC, #-4+OFS_OP]
3373 | strb TMP1w, [RC] 3396 | strb TMP1w, [RC, # OFS_OP]
3374 | b <1 3397 | b <1
3375 break; 3398 break;
3376 3399
@@ -3576,7 +3599,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3576 | csel PC, RC, PC, gt 3599 | csel PC, RC, PC, gt
3577 } else if (op == BC_JFORI) { 3600 } else if (op == BC_JFORI) {
3578 | mov PC, RC 3601 | mov PC, RC
3579 | ldrh RCw, [RC, #-2] 3602 | ldrh RCw, [RC, #-4+OFS_RD]
3580 } else if (op == BC_IFORL) { 3603 } else if (op == BC_IFORL) {
3581 | csel PC, RC, PC, le 3604 | csel PC, RC, PC, le
3582 } 3605 }
@@ -3617,7 +3640,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3617 if (op == BC_FORI) { 3640 if (op == BC_FORI) {
3618 | csel PC, RC, PC, hi 3641 | csel PC, RC, PC, hi
3619 } else if (op == BC_JFORI) { 3642 } else if (op == BC_JFORI) {
3620 | ldrh RCw, [RC, #-2] 3643 | ldrh RCw, [RC, #-4+OFS_RD]
3621 | bls =>BC_JLOOP 3644 | bls =>BC_JLOOP
3622 } else if (op == BC_IFORL) { 3645 } else if (op == BC_IFORL) {
3623 | csel PC, RC, PC, ls 3646 | csel PC, RC, PC, ls