diff options
author | Mike Pall <mike> | 2017-03-30 11:17:15 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2017-03-30 11:30:01 +0200 |
commit | 3143b218946395834f0bfef741061ac6ef3f5b56 (patch) | |
tree | 07721c6a94616eda13799a6027b00f3e3fc999a4 | |
parent | 78f5f1cef19502289604299e4e6d00e14411f764 (diff) | |
download | luajit-3143b218946395834f0bfef741061ac6ef3f5b56.tar.gz luajit-3143b218946395834f0bfef741061ac6ef3f5b56.tar.bz2 luajit-3143b218946395834f0bfef741061ac6ef3f5b56.zip |
ARM64: Add big-endian support.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
Sponsored by Cisco Systems, Inc.
-rw-r--r-- | Makefile | 4 | ||||
-rw-r--r-- | src/Makefile | 3 | ||||
-rw-r--r-- | src/host/buildvm_asm.c | 8 | ||||
-rw-r--r-- | src/jit/bcsave.lua | 8 | ||||
-rw-r--r-- | src/jit/dis_arm64be.lua | 12 | ||||
-rw-r--r-- | src/lj_arch.h | 10 | ||||
-rw-r--r-- | src/lj_asm.c | 3 | ||||
-rw-r--r-- | src/lj_asm_arm64.h | 42 | ||||
-rw-r--r-- | src/lj_ccall.c | 20 | ||||
-rw-r--r-- | src/lj_ccall.h | 4 | ||||
-rw-r--r-- | src/lj_ccallback.c | 18 | ||||
-rw-r--r-- | src/lj_emit_arm64.h | 2 | ||||
-rw-r--r-- | src/lj_target_arm64.h | 9 | ||||
-rw-r--r-- | src/vm_arm64.dasc | 69 |
14 files changed, 149 insertions, 63 deletions
@@ -87,8 +87,8 @@ FILE_PC= luajit.pc | |||
87 | FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h | 87 | FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h |
88 | FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ | 88 | FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ |
89 | dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ | 89 | dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ |
90 | dis_ppc.lua dis_mips.lua dis_mipsel.lua dis_mips64.lua \ | 90 | dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ |
91 | dis_mips64el.lua vmdef.lua | 91 | dis_mips64.lua dis_mips64el.lua vmdef.lua |
92 | 92 | ||
93 | ifeq (,$(findstring Windows,$(OS))) | 93 | ifeq (,$(findstring Windows,$(OS))) |
94 | HOST_SYS:= $(shell uname -s) | 94 | HOST_SYS:= $(shell uname -s) |
diff --git a/src/Makefile b/src/Makefile index 7cb4c14a..f56465d1 100644 --- a/src/Makefile +++ b/src/Makefile | |||
@@ -242,6 +242,9 @@ ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) | |||
242 | TARGET_LJARCH= arm | 242 | TARGET_LJARCH= arm |
243 | else | 243 | else |
244 | ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) | 244 | ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) |
245 | ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH))) | ||
246 | TARGET_ARCH= -D__AARCH64EB__=1 | ||
247 | endif | ||
245 | TARGET_LJARCH= arm64 | 248 | TARGET_LJARCH= arm64 |
246 | else | 249 | else |
247 | ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) | 250 | ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) |
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c index addf281f..1a633602 100644 --- a/src/host/buildvm_asm.c +++ b/src/host/buildvm_asm.c | |||
@@ -93,10 +93,14 @@ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n) | |||
93 | { | 93 | { |
94 | int i; | 94 | int i; |
95 | for (i = 0; i < n; i += 4) { | 95 | for (i = 0; i < n; i += 4) { |
96 | uint32_t ins = *(uint32_t *)(p+i); | ||
97 | #if LJ_TARGET_ARM64 && LJ_BE | ||
98 | ins = lj_bswap(ins); /* ARM64 instructions are always little-endian. */ | ||
99 | #endif | ||
96 | if ((i & 15) == 0) | 100 | if ((i & 15) == 0) |
97 | fprintf(ctx->fp, "\t.long 0x%08x", *(uint32_t *)(p+i)); | 101 | fprintf(ctx->fp, "\t.long 0x%08x", ins); |
98 | else | 102 | else |
99 | fprintf(ctx->fp, ",0x%08x", *(uint32_t *)(p+i)); | 103 | fprintf(ctx->fp, ",0x%08x", ins); |
100 | if ((i & 15) == 12) putc('\n', ctx->fp); | 104 | if ((i & 15) == 12) putc('\n', ctx->fp); |
101 | } | 105 | } |
102 | if ((n & 15) != 0) putc('\n', ctx->fp); | 106 | if ((n & 15) != 0) putc('\n', ctx->fp); |
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index 9ee22a01..c17c88e0 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua | |||
@@ -63,8 +63,8 @@ local map_type = { | |||
63 | } | 63 | } |
64 | 64 | ||
65 | local map_arch = { | 65 | local map_arch = { |
66 | x86 = true, x64 = true, arm = true, arm64 = true, ppc = true, | 66 | x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true, |
67 | mips = true, mipsel = true, | 67 | ppc = true, mips = true, mipsel = true, |
68 | } | 68 | } |
69 | 69 | ||
70 | local map_os = { | 70 | local map_os = { |
@@ -200,7 +200,7 @@ typedef struct { | |||
200 | ]] | 200 | ]] |
201 | local symname = LJBC_PREFIX..ctx.modname | 201 | local symname = LJBC_PREFIX..ctx.modname |
202 | local is64, isbe = false, false | 202 | local is64, isbe = false, false |
203 | if ctx.arch == "x64" or ctx.arch == "arm64" then | 203 | if ctx.arch == "x64" or ctx.arch == "arm64" or ctx.arch == "arm64be" then |
204 | is64 = true | 204 | is64 = true |
205 | elseif ctx.arch == "ppc" or ctx.arch == "mips" then | 205 | elseif ctx.arch == "ppc" or ctx.arch == "mips" then |
206 | isbe = true | 206 | isbe = true |
@@ -237,7 +237,7 @@ typedef struct { | |||
237 | hdr.eendian = isbe and 2 or 1 | 237 | hdr.eendian = isbe and 2 or 1 |
238 | hdr.eversion = 1 | 238 | hdr.eversion = 1 |
239 | hdr.type = f16(1) | 239 | hdr.type = f16(1) |
240 | hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, ppc=20, mips=8, mipsel=8 })[ctx.arch]) | 240 | hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, arm64be=183, ppc=20, mips=8, mipsel=8 })[ctx.arch]) |
241 | if ctx.arch == "mips" or ctx.arch == "mipsel" then | 241 | if ctx.arch == "mips" or ctx.arch == "mipsel" then |
242 | hdr.flags = f32(0x50001006) | 242 | hdr.flags = f32(0x50001006) |
243 | end | 243 | end |
diff --git a/src/jit/dis_arm64be.lua b/src/jit/dis_arm64be.lua new file mode 100644 index 00000000..7eb389e2 --- /dev/null +++ b/src/jit/dis_arm64be.lua | |||
@@ -0,0 +1,12 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT ARM64BE disassembler wrapper module. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2017 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- ARM64 instructions are always little-endian. So just forward to the | ||
8 | -- common ARM64 disassembler module. All the interesting stuff is there. | ||
9 | ------------------------------------------------------------------------------ | ||
10 | |||
11 | return require((string.match(..., ".*%.") or "").."dis_arm64") | ||
12 | |||
diff --git a/src/lj_arch.h b/src/lj_arch.h index 40509607..fe558157 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h | |||
@@ -215,9 +215,14 @@ | |||
215 | 215 | ||
216 | #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64 | 216 | #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64 |
217 | 217 | ||
218 | #define LJ_ARCH_NAME "arm64" | ||
219 | #define LJ_ARCH_BITS 64 | 218 | #define LJ_ARCH_BITS 64 |
219 | #if defined(__AARCH64EB__) | ||
220 | #define LJ_ARCH_NAME "arm64be" | ||
221 | #define LJ_ARCH_ENDIAN LUAJIT_BE | ||
222 | #else | ||
223 | #define LJ_ARCH_NAME "arm64" | ||
220 | #define LJ_ARCH_ENDIAN LUAJIT_LE | 224 | #define LJ_ARCH_ENDIAN LUAJIT_LE |
225 | #endif | ||
221 | #define LJ_TARGET_ARM64 1 | 226 | #define LJ_TARGET_ARM64 1 |
222 | #define LJ_TARGET_EHRETREG 0 | 227 | #define LJ_TARGET_EHRETREG 0 |
223 | #define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */ | 228 | #define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */ |
@@ -409,9 +414,6 @@ | |||
409 | #error "Only ARM EABI or iOS 3.0+ ABI is supported" | 414 | #error "Only ARM EABI or iOS 3.0+ ABI is supported" |
410 | #endif | 415 | #endif |
411 | #elif LJ_TARGET_ARM64 | 416 | #elif LJ_TARGET_ARM64 |
412 | #if defined(__AARCH64EB__) | ||
413 | #error "No support for big-endian ARM64" | ||
414 | #endif | ||
415 | #if defined(_ILP32) | 417 | #if defined(_ILP32) |
416 | #error "No support for ILP32 model on ARM64" | 418 | #error "No support for ILP32 model on ARM64" |
417 | #endif | 419 | #endif |
diff --git a/src/lj_asm.c b/src/lj_asm.c index 7c09dd9f..c2cf5a95 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -2393,6 +2393,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
2393 | if (!as->loopref) | 2393 | if (!as->loopref) |
2394 | asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ | 2394 | asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ |
2395 | T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); | 2395 | T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); |
2396 | #if LJ_TARGET_MCODE_FIXUP | ||
2397 | asm_mcode_fixup(T->mcode, T->szmcode); | ||
2398 | #endif | ||
2396 | lj_mcode_sync(T->mcode, origtop); | 2399 | lj_mcode_sync(T->mcode, origtop); |
2397 | } | 2400 | } |
2398 | 2401 | ||
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index b58ab3a1..8fd92e76 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h | |||
@@ -56,11 +56,11 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits) | |||
56 | asm_mclimit(as); | 56 | asm_mclimit(as); |
57 | /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ | 57 | /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ |
58 | for (i = nexits-1; (int32_t)i >= 0; i--) | 58 | for (i = nexits-1; (int32_t)i >= 0; i--) |
59 | *--mxp = A64I_BL|((-3-i)&0x03ffffffu); | 59 | *--mxp = A64I_LE(A64I_BL|((-3-i)&0x03ffffffu)); |
60 | *--mxp = A64I_MOVZw|A64F_U16(as->T->traceno); | 60 | *--mxp = A64I_LE(A64I_MOVZw|A64F_U16(as->T->traceno)); |
61 | mxp--; | 61 | mxp--; |
62 | *mxp = A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu); | 62 | *mxp = A64I_LE(A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu)); |
63 | *--mxp = A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP); | 63 | *--mxp = A64I_LE(A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP)); |
64 | as->mctop = mxp; | 64 | as->mctop = mxp; |
65 | } | 65 | } |
66 | 66 | ||
@@ -431,7 +431,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
431 | fpr++; | 431 | fpr++; |
432 | } else { | 432 | } else { |
433 | Reg r = ra_alloc1(as, ref, RSET_FPR); | 433 | Reg r = ra_alloc1(as, ref, RSET_FPR); |
434 | emit_spstore(as, ir, r, ofs); | 434 | emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0)); |
435 | ofs += 8; | 435 | ofs += 8; |
436 | } | 436 | } |
437 | } else { | 437 | } else { |
@@ -441,7 +441,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
441 | gpr++; | 441 | gpr++; |
442 | } else { | 442 | } else { |
443 | Reg r = ra_alloc1(as, ref, RSET_GPR); | 443 | Reg r = ra_alloc1(as, ref, RSET_GPR); |
444 | emit_spstore(as, ir, r, ofs); | 444 | emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0)); |
445 | ofs += 8; | 445 | ofs += 8; |
446 | } | 446 | } |
447 | } | 447 | } |
@@ -1082,7 +1082,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
1082 | src = ra_alloc1(as, ir->op2, allow); | 1082 | src = ra_alloc1(as, ir->op2, allow); |
1083 | rset_clear(allow, src); | 1083 | rset_clear(allow, src); |
1084 | if (irt_isinteger(ir->t)) | 1084 | if (irt_isinteger(ir->t)) |
1085 | type = ra_allock(as, (int64_t)LJ_TISNUM << 47, allow); | 1085 | type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow); |
1086 | else | 1086 | else |
1087 | type = ra_allock(as, irt_toitype(ir->t), allow); | 1087 | type = ra_allock(as, irt_toitype(ir->t), allow); |
1088 | } else { | 1088 | } else { |
@@ -1179,7 +1179,8 @@ dotypecheck: | |||
1179 | } | 1179 | } |
1180 | if (ra_hasreg(dest)) { | 1180 | if (ra_hasreg(dest)) { |
1181 | emit_lso(as, irt_isnum(t) ? A64I_LDRd : | 1181 | emit_lso(as, irt_isnum(t) ? A64I_LDRd : |
1182 | (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base, ofs); | 1182 | (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base, |
1183 | ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0))); | ||
1183 | } | 1184 | } |
1184 | } | 1185 | } |
1185 | 1186 | ||
@@ -1909,7 +1910,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |||
1909 | /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ | 1910 | /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ |
1910 | int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); | 1911 | int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); |
1911 | if (spadj == 0) { | 1912 | if (spadj == 0) { |
1912 | *--p = A64I_NOP; | 1913 | *--p = A64I_LE(A64I_NOP); |
1913 | as->mctop = p; | 1914 | as->mctop = p; |
1914 | } else { | 1915 | } else { |
1915 | /* Patch stack adjustment. */ | 1916 | /* Patch stack adjustment. */ |
@@ -1962,6 +1963,19 @@ static void asm_setup_target(ASMState *as) | |||
1962 | asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); | 1963 | asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); |
1963 | } | 1964 | } |
1964 | 1965 | ||
1966 | #if LJ_BE | ||
1967 | /* ARM64 instructions are always little-endian. Swap for ARM64BE. */ | ||
1968 | static void asm_mcode_fixup(MCode *mcode, MSize size) | ||
1969 | { | ||
1970 | MCode *pe = (MCode *)((char *)mcode + size); | ||
1971 | while (mcode < pe) { | ||
1972 | MCode ins = *mcode; | ||
1973 | *mcode++ = lj_bswap(ins); | ||
1974 | } | ||
1975 | } | ||
1976 | #define LJ_TARGET_MCODE_FIXUP 1 | ||
1977 | #endif | ||
1978 | |||
1965 | /* -- Trace patching ------------------------------------------------------ */ | 1979 | /* -- Trace patching ------------------------------------------------------ */ |
1966 | 1980 | ||
1967 | /* Patch exit jumps of existing machine code to a new target. */ | 1981 | /* Patch exit jumps of existing machine code to a new target. */ |
@@ -1974,29 +1988,29 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | |||
1974 | MCode *px = exitstub_trace_addr(T, exitno); | 1988 | MCode *px = exitstub_trace_addr(T, exitno); |
1975 | for (; p < pe; p++) { | 1989 | for (; p < pe; p++) { |
1976 | /* Look for exitstub branch, replace with branch to target. */ | 1990 | /* Look for exitstub branch, replace with branch to target. */ |
1977 | uint32_t ins = *p; | 1991 | MCode ins = A64I_LE(*p); |
1978 | if ((ins & 0xff000000u) == 0x54000000u && | 1992 | if ((ins & 0xff000000u) == 0x54000000u && |
1979 | ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { | 1993 | ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { |
1980 | /* Patch bcc exitstub. */ | 1994 | /* Patch bcc exitstub. */ |
1981 | *p = (ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u); | 1995 | *p = A64I_LE((ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u)); |
1982 | cend = p+1; | 1996 | cend = p+1; |
1983 | if (!cstart) cstart = p; | 1997 | if (!cstart) cstart = p; |
1984 | } else if ((ins & 0xfc000000u) == 0x14000000u && | 1998 | } else if ((ins & 0xfc000000u) == 0x14000000u && |
1985 | ((ins ^ (px-p)) & 0x03ffffffu) == 0) { | 1999 | ((ins ^ (px-p)) & 0x03ffffffu) == 0) { |
1986 | /* Patch b exitstub. */ | 2000 | /* Patch b exitstub. */ |
1987 | *p = (ins & 0xfc000000u) | ((target-p) & 0x03ffffffu); | 2001 | *p = A64I_LE((ins & 0xfc000000u) | ((target-p) & 0x03ffffffu)); |
1988 | cend = p+1; | 2002 | cend = p+1; |
1989 | if (!cstart) cstart = p; | 2003 | if (!cstart) cstart = p; |
1990 | } else if ((ins & 0x7e000000u) == 0x34000000u && | 2004 | } else if ((ins & 0x7e000000u) == 0x34000000u && |
1991 | ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { | 2005 | ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { |
1992 | /* Patch cbz/cbnz exitstub. */ | 2006 | /* Patch cbz/cbnz exitstub. */ |
1993 | *p = (ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u); | 2007 | *p = A64I_LE((ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u)); |
1994 | cend = p+1; | 2008 | cend = p+1; |
1995 | if (!cstart) cstart = p; | 2009 | if (!cstart) cstart = p; |
1996 | } else if ((ins & 0x7e000000u) == 0x36000000u && | 2010 | } else if ((ins & 0x7e000000u) == 0x36000000u && |
1997 | ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) { | 2011 | ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) { |
1998 | /* Patch tbz/tbnz exitstub. */ | 2012 | /* Patch tbz/tbnz exitstub. */ |
1999 | *p = (ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u); | 2013 | *p = A64I_LE((ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u)); |
2000 | cend = p+1; | 2014 | cend = p+1; |
2001 | if (!cstart) cstart = p; | 2015 | if (!cstart) cstart = p; |
2002 | } | 2016 | } |
diff --git a/src/lj_ccall.c b/src/lj_ccall.c index 2b7ca364..5c252e5b 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c | |||
@@ -301,7 +301,7 @@ | |||
301 | unsigned int cl = ccall_classify_struct(cts, ctr); \ | 301 | unsigned int cl = ccall_classify_struct(cts, ctr); \ |
302 | if ((cl & 4)) { /* Combine float HFA from separate registers. */ \ | 302 | if ((cl & 4)) { /* Combine float HFA from separate registers. */ \ |
303 | CTSize i = (cl >> 8) - 1; \ | 303 | CTSize i = (cl >> 8) - 1; \ |
304 | do { ((uint32_t *)dp)[i] = cc->fpr[i].u32; } while (i--); \ | 304 | do { ((uint32_t *)dp)[i] = cc->fpr[i].lo; } while (i--); \ |
305 | } else { \ | 305 | } else { \ |
306 | if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \ | 306 | if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \ |
307 | memcpy(dp, sp, ctr->size); \ | 307 | memcpy(dp, sp, ctr->size); \ |
@@ -359,6 +359,13 @@ | |||
359 | } \ | 359 | } \ |
360 | } | 360 | } |
361 | 361 | ||
362 | #if LJ_BE | ||
363 | #define CCALL_HANDLE_RET \ | ||
364 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ | ||
365 | sp = (uint8_t *)&cc->fpr[0].f; | ||
366 | #endif | ||
367 | |||
368 | |||
362 | #elif LJ_TARGET_PPC | 369 | #elif LJ_TARGET_PPC |
363 | /* -- PPC calling conventions --------------------------------------------- */ | 370 | /* -- PPC calling conventions --------------------------------------------- */ |
364 | 371 | ||
@@ -1033,9 +1040,16 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, | |||
1033 | *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : | 1040 | *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : |
1034 | (int32_t)*(int16_t *)dp; | 1041 | (int32_t)*(int16_t *)dp; |
1035 | } | 1042 | } |
1043 | #if LJ_TARGET_ARM64 && LJ_BE | ||
1044 | if (isfp && d->size == sizeof(float)) | ||
1045 | ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */ | ||
1046 | #endif | ||
1047 | #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) | ||
1048 | if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) | ||
1036 | #if LJ_TARGET_MIPS64 | 1049 | #if LJ_TARGET_MIPS64 |
1037 | if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) || | 1050 | || (isfp && nsp == 0) |
1038 | (isfp && nsp == 0)) && d->size <= 4) { | 1051 | #endif |
1052 | ) && d->size <= 4) { | ||
1039 | *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */ | 1053 | *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */ |
1040 | } | 1054 | } |
1041 | #endif | 1055 | #endif |
diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 34e800cc..59f66481 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h | |||
@@ -79,8 +79,8 @@ typedef union FPRArg { | |||
79 | typedef intptr_t GPRArg; | 79 | typedef intptr_t GPRArg; |
80 | typedef union FPRArg { | 80 | typedef union FPRArg { |
81 | double d; | 81 | double d; |
82 | float f; | 82 | struct { LJ_ENDIAN_LOHI(float f; , float g;) }; |
83 | uint32_t u32; | 83 | struct { LJ_ENDIAN_LOHI(uint32_t lo; , uint32_t hi;) }; |
84 | } FPRArg; | 84 | } FPRArg; |
85 | 85 | ||
86 | #elif LJ_TARGET_PPC | 86 | #elif LJ_TARGET_PPC |
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index fce6a3ed..846827b1 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c | |||
@@ -173,16 +173,16 @@ static void callback_mcode_init(global_State *g, uint32_t *page) | |||
173 | uint32_t *p = page; | 173 | uint32_t *p = page; |
174 | void *target = (void *)lj_vm_ffi_callback; | 174 | void *target = (void *)lj_vm_ffi_callback; |
175 | MSize slot; | 175 | MSize slot; |
176 | *p++ = A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4); | 176 | *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4)); |
177 | *p++ = A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5); | 177 | *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5)); |
178 | *p++ = A64I_BR | A64F_N(RID_X11); | 178 | *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11)); |
179 | *p++ = A64I_NOP; | 179 | *p++ = A64I_LE(A64I_NOP); |
180 | ((void **)p)[0] = target; | 180 | ((void **)p)[0] = target; |
181 | ((void **)p)[1] = g; | 181 | ((void **)p)[1] = g; |
182 | p += 4; | 182 | p += 4; |
183 | for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { | 183 | for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { |
184 | *p++ = A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot); | 184 | *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot)); |
185 | *p = A64I_B | A64F_S26((page-p) & 0x03ffffffu); | 185 | *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu)); |
186 | p++; | 186 | p++; |
187 | } | 187 | } |
188 | lua_assert(p - page <= CALLBACK_MCODE_SIZE); | 188 | lua_assert(p - page <= CALLBACK_MCODE_SIZE); |
@@ -624,6 +624,10 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) | |||
624 | if (ctype_isfp(ctr->info)) | 624 | if (ctype_isfp(ctr->info)) |
625 | dp = (uint8_t *)&cts->cb.fpr[0]; | 625 | dp = (uint8_t *)&cts->cb.fpr[0]; |
626 | #endif | 626 | #endif |
627 | #if LJ_TARGET_ARM64 && LJ_BE | ||
628 | if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) | ||
629 | dp = (uint8_t *)&cts->cb.fpr[0].f[1]; | ||
630 | #endif | ||
627 | lj_cconv_ct_tv(cts, ctr, dp, o, 0); | 631 | lj_cconv_ct_tv(cts, ctr, dp, o, 0); |
628 | #ifdef CALLBACK_HANDLE_RET | 632 | #ifdef CALLBACK_HANDLE_RET |
629 | CALLBACK_HANDLE_RET | 633 | CALLBACK_HANDLE_RET |
@@ -637,7 +641,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) | |||
637 | *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : | 641 | *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : |
638 | (int32_t)*(int16_t *)dp; | 642 | (int32_t)*(int16_t *)dp; |
639 | } | 643 | } |
640 | #if LJ_TARGET_MIPS64 | 644 | #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) |
641 | /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ | 645 | /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ |
642 | if (ctr->size <= 4 && | 646 | if (ctr->size <= 4 && |
643 | (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) | 647 | (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) |
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index cfa18c83..6da4c7d4 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h | |||
@@ -140,7 +140,7 @@ static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) | |||
140 | } else { | 140 | } else { |
141 | goto nopair; | 141 | goto nopair; |
142 | } | 142 | } |
143 | if (ofsm >= (-64<<sc) && ofsm <= (63<<sc)) { | 143 | if (ofsm >= (int)((unsigned int)-64<<sc) && ofsm <= (63<<sc)) { |
144 | *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) | | 144 | *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) | |
145 | (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); | 145 | (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); |
146 | return; | 146 | return; |
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h index 3f6bb39b..520023ae 100644 --- a/src/lj_target_arm64.h +++ b/src/lj_target_arm64.h | |||
@@ -107,7 +107,7 @@ typedef struct { | |||
107 | /* Return the address of a per-trace exit stub. */ | 107 | /* Return the address of a per-trace exit stub. */ |
108 | static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) | 108 | static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) |
109 | { | 109 | { |
110 | while (*p == 0xd503201f) p++; /* Skip A64I_NOP. */ | 110 | while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */ |
111 | return p + 3 + exitno; | 111 | return p + 3 + exitno; |
112 | } | 112 | } |
113 | /* Avoid dependence on lj_jit.h if only including lj_target.h. */ | 113 | /* Avoid dependence on lj_jit.h if only including lj_target.h. */ |
@@ -116,6 +116,13 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) | |||
116 | 116 | ||
117 | /* -- Instructions -------------------------------------------------------- */ | 117 | /* -- Instructions -------------------------------------------------------- */ |
118 | 118 | ||
119 | /* ARM64 instructions are always little-endian. Swap for ARM64BE. */ | ||
120 | #if LJ_BE | ||
121 | #define A64I_LE(x) (lj_bswap(x)) | ||
122 | #else | ||
123 | #define A64I_LE(x) (x) | ||
124 | #endif | ||
125 | |||
119 | /* Instruction fields. */ | 126 | /* Instruction fields. */ |
120 | #define A64F_D(r) (r) | 127 | #define A64F_D(r) (r) |
121 | #define A64F_N(r) ((r) << 5) | 128 | #define A64F_N(r) ((r) << 5) |
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index bb2496ab..cfbc61a1 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc | |||
@@ -151,6 +151,21 @@ | |||
151 | |.define FRAME_FUNC, #-16 | 151 | |.define FRAME_FUNC, #-16 |
152 | |.define FRAME_PC, #-8 | 152 | |.define FRAME_PC, #-8 |
153 | | | 153 | | |
154 | |// Endian-specific defines. | ||
155 | |.if ENDIAN_LE | ||
156 | |.define LO, 0 | ||
157 | |.define OFS_RD, 2 | ||
158 | |.define OFS_RB, 3 | ||
159 | |.define OFS_RA, 1 | ||
160 | |.define OFS_OP, 0 | ||
161 | |.else | ||
162 | |.define LO, 4 | ||
163 | |.define OFS_RD, 0 | ||
164 | |.define OFS_RB, 0 | ||
165 | |.define OFS_RA, 2 | ||
166 | |.define OFS_OP, 3 | ||
167 | |.endif | ||
168 | | | ||
154 | |.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro | 169 | |.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro |
155 | |.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro | 170 | |.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro |
156 | |.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro | 171 | |.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro |
@@ -717,7 +732,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
717 | | cmp CRET1, #1 | 732 | | cmp CRET1, #1 |
718 | | bhi ->vmeta_binop | 733 | | bhi ->vmeta_binop |
719 | |4: | 734 | |4: |
720 | | ldrh RBw, [PC, #2] | 735 | | ldrh RBw, [PC, # OFS_RD] |
721 | | add PC, PC, #4 | 736 | | add PC, PC, #4 |
722 | | add RB, PC, RB, lsl #2 | 737 | | add RB, PC, RB, lsl #2 |
723 | | sub RB, RB, #0x20000 | 738 | | sub RB, RB, #0x20000 |
@@ -1500,7 +1515,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1500 | | bne ->fff_fallback | 1515 | | bne ->fff_fallback |
1501 | | checkint CARG1, ->fff_fallback | 1516 | | checkint CARG1, ->fff_fallback |
1502 | | mov CARG3, #1 | 1517 | | mov CARG3, #1 |
1503 | | mov CARG2, BASE // Points to stack. Little-endian. | 1518 | | // Point to the char inside the integer in the stack slot. |
1519 | |.if ENDIAN_LE | ||
1520 | | mov CARG2, BASE | ||
1521 | |.else | ||
1522 | | add CARG2, BASE, #7 | ||
1523 | |.endif | ||
1504 | |->fff_newstr: | 1524 | |->fff_newstr: |
1505 | | // CARG2 = str, CARG3 = len. | 1525 | | // CARG2 = str, CARG3 = len. |
1506 | | str BASE, L->base | 1526 | | str BASE, L->base |
@@ -1703,7 +1723,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1703 | | ands TMP0, PC, #FRAME_TYPE | 1723 | | ands TMP0, PC, #FRAME_TYPE |
1704 | | and TMP1, PC, #~FRAME_TYPEP | 1724 | | and TMP1, PC, #~FRAME_TYPEP |
1705 | | bne >3 | 1725 | | bne >3 |
1706 | | ldrb RAw, [PC, #-3] | 1726 | | ldrb RAw, [PC, #-4+OFS_RA] |
1707 | | lsl RA, RA, #3 | 1727 | | lsl RA, RA, #3 |
1708 | | add TMP1, RA, #16 | 1728 | | add TMP1, RA, #16 |
1709 | |3: | 1729 | |3: |
@@ -1838,7 +1858,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1838 | |->cont_stitch: // Trace stitching. | 1858 | |->cont_stitch: // Trace stitching. |
1839 | |.if JIT | 1859 | |.if JIT |
1840 | | // RA = resultptr, CARG4 = meta base | 1860 | | // RA = resultptr, CARG4 = meta base |
1841 | | ldr RB, SAVE_MULTRES | 1861 | | ldr RBw, SAVE_MULTRES |
1842 | | ldr INSw, [PC, #-4] | 1862 | | ldr INSw, [PC, #-4] |
1843 | | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. | 1863 | | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. |
1844 | | subs RB, RB, #8 | 1864 | | subs RB, RB, #8 |
@@ -1869,7 +1889,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1869 | | | 1889 | | |
1870 | | // Stitch a new trace to the previous trace. | 1890 | | // Stitch a new trace to the previous trace. |
1871 | | mov CARG1, #GL_J(exitno) | 1891 | | mov CARG1, #GL_J(exitno) |
1872 | | str RA, [GL, CARG1] | 1892 | | str RAw, [GL, CARG1] |
1873 | | mov CARG1, #GL_J(L) | 1893 | | mov CARG1, #GL_J(L) |
1874 | | str L, [GL, CARG1] | 1894 | | str L, [GL, CARG1] |
1875 | | str BASE, L->base | 1895 | | str BASE, L->base |
@@ -1936,6 +1956,9 @@ static void build_subroutines(BuildCtx *ctx) | |||
1936 | | sub CARG1, CARG1, #2 | 1956 | | sub CARG1, CARG1, #2 |
1937 | | ldr CARG2w, [lr] // Load trace number. | 1957 | | ldr CARG2w, [lr] // Load trace number. |
1938 | | st_vmstate CARG4 | 1958 | | st_vmstate CARG4 |
1959 | |.if ENDIAN_BE | ||
1960 | | rev32 CARG2, CARG2 | ||
1961 | |.endif | ||
1939 | | str BASE, L->base | 1962 | | str BASE, L->base |
1940 | | ubfx CARG2w, CARG2w, #5, #16 | 1963 | | ubfx CARG2w, CARG2w, #5, #16 |
1941 | | str CARG1w, [GL, #GL_J(exitno)] | 1964 | | str CARG1w, [GL, #GL_J(exitno)] |
@@ -1967,14 +1990,14 @@ static void build_subroutines(BuildCtx *ctx) | |||
1967 | | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | 1990 | | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 |
1968 | | movn TISNIL, #0 | 1991 | | movn TISNIL, #0 |
1969 | | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | 1992 | | and LFUNC:CARG2, CARG2, #LJ_GCVMASK |
1970 | | str RC, SAVE_MULTRES | 1993 | | str RCw, SAVE_MULTRES |
1971 | | str BASE, L->base | 1994 | | str BASE, L->base |
1972 | | ldr CARG2, LFUNC:CARG2->pc | 1995 | | ldr CARG2, LFUNC:CARG2->pc |
1973 | | str xzr, GL->jit_base | 1996 | | str xzr, GL->jit_base |
1974 | | mv_vmstate CARG4, INTERP | 1997 | | mv_vmstate CARG4, INTERP |
1975 | | ldr KBASE, [CARG2, #PC2PROTO(k)] | 1998 | | ldr KBASE, [CARG2, #PC2PROTO(k)] |
1976 | | // Modified copy of ins_next which handles function header dispatch, too. | 1999 | | // Modified copy of ins_next which handles function header dispatch, too. |
1977 | | ldrb RBw, [PC] | 2000 | | ldrb RBw, [PC, # OFS_OP] |
1978 | | ldr INSw, [PC], #4 | 2001 | | ldr INSw, [PC], #4 |
1979 | | st_vmstate CARG4 | 2002 | | st_vmstate CARG4 |
1980 | | cmp RBw, #BC_FUNCC+2 // Fast function? | 2003 | | cmp RBw, #BC_FUNCC+2 // Fast function? |
@@ -2000,7 +2023,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
2000 | | ands CARG2, CARG1, #FRAME_TYPE | 2023 | | ands CARG2, CARG1, #FRAME_TYPE |
2001 | | bne <2 // Trace stitching continuation? | 2024 | | bne <2 // Trace stitching continuation? |
2002 | | // Otherwise set KBASE for Lua function below fast function. | 2025 | | // Otherwise set KBASE for Lua function below fast function. |
2003 | | ldr CARG3, [CARG1, #-4] | 2026 | | ldr CARG3w, [CARG1, #-4] |
2004 | | decode_RA CARG1, CARG3 | 2027 | | decode_RA CARG1, CARG3 |
2005 | | sub CARG2, BASE, CARG1, lsl #3 | 2028 | | sub CARG2, BASE, CARG1, lsl #3 |
2006 | | ldr LFUNC:CARG3, [CARG2, #-32] | 2029 | | ldr LFUNC:CARG3, [CARG2, #-32] |
@@ -2153,7 +2176,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2153 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | 2176 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: |
2154 | | // RA = src1, RC = src2, JMP with RC = target | 2177 | | // RA = src1, RC = src2, JMP with RC = target |
2155 | | ldr CARG1, [BASE, RA, lsl #3] | 2178 | | ldr CARG1, [BASE, RA, lsl #3] |
2156 | | ldrh RBw, [PC, #2] | 2179 | | ldrh RBw, [PC, # OFS_RD] |
2157 | | ldr CARG2, [BASE, RC, lsl #3] | 2180 | | ldr CARG2, [BASE, RC, lsl #3] |
2158 | | add PC, PC, #4 | 2181 | | add PC, PC, #4 |
2159 | | add RB, PC, RB, lsl #2 | 2182 | | add RB, PC, RB, lsl #2 |
@@ -2210,7 +2233,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2210 | | // RA = src1, RC = src2, JMP with RC = target | 2233 | | // RA = src1, RC = src2, JMP with RC = target |
2211 | | ldr CARG1, [BASE, RA, lsl #3] | 2234 | | ldr CARG1, [BASE, RA, lsl #3] |
2212 | | add RC, BASE, RC, lsl #3 | 2235 | | add RC, BASE, RC, lsl #3 |
2213 | | ldrh RBw, [PC, #2] | 2236 | | ldrh RBw, [PC, # OFS_RD] |
2214 | | ldr CARG3, [RC] | 2237 | | ldr CARG3, [RC] |
2215 | | add PC, PC, #4 | 2238 | | add PC, PC, #4 |
2216 | | add RB, PC, RB, lsl #2 | 2239 | | add RB, PC, RB, lsl #2 |
@@ -2271,7 +2294,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2271 | | // RA = src, RC = str_const (~), JMP with RC = target | 2294 | | // RA = src, RC = str_const (~), JMP with RC = target |
2272 | | ldr CARG1, [BASE, RA, lsl #3] | 2295 | | ldr CARG1, [BASE, RA, lsl #3] |
2273 | | mvn RC, RC | 2296 | | mvn RC, RC |
2274 | | ldrh RBw, [PC, #2] | 2297 | | ldrh RBw, [PC, # OFS_RD] |
2275 | | ldr CARG2, [KBASE, RC, lsl #3] | 2298 | | ldr CARG2, [KBASE, RC, lsl #3] |
2276 | | add PC, PC, #4 | 2299 | | add PC, PC, #4 |
2277 | | movn TMP0, #~LJ_TSTR | 2300 | | movn TMP0, #~LJ_TSTR |
@@ -2299,7 +2322,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2299 | | // RA = src, RC = num_const (~), JMP with RC = target | 2322 | | // RA = src, RC = num_const (~), JMP with RC = target |
2300 | | ldr CARG1, [BASE, RA, lsl #3] | 2323 | | ldr CARG1, [BASE, RA, lsl #3] |
2301 | | add RC, KBASE, RC, lsl #3 | 2324 | | add RC, KBASE, RC, lsl #3 |
2302 | | ldrh RBw, [PC, #2] | 2325 | | ldrh RBw, [PC, # OFS_RD] |
2303 | | ldr CARG3, [RC] | 2326 | | ldr CARG3, [RC] |
2304 | | add PC, PC, #4 | 2327 | | add PC, PC, #4 |
2305 | | add RB, PC, RB, lsl #2 | 2328 | | add RB, PC, RB, lsl #2 |
@@ -2359,7 +2382,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2359 | vk = op == BC_ISEQP; | 2382 | vk = op == BC_ISEQP; |
2360 | | // RA = src, RC = primitive_type (~), JMP with RC = target | 2383 | | // RA = src, RC = primitive_type (~), JMP with RC = target |
2361 | | ldr TMP0, [BASE, RA, lsl #3] | 2384 | | ldr TMP0, [BASE, RA, lsl #3] |
2362 | | ldrh RBw, [PC, #2] | 2385 | | ldrh RBw, [PC, # OFS_RD] |
2363 | | add PC, PC, #4 | 2386 | | add PC, PC, #4 |
2364 | | add RC, RC, #1 | 2387 | | add RC, RC, #1 |
2365 | | add RB, PC, RB, lsl #2 | 2388 | | add RB, PC, RB, lsl #2 |
@@ -2384,7 +2407,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2384 | 2407 | ||
2385 | case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: | 2408 | case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: |
2386 | | // RA = dst or unused, RC = src, JMP with RC = target | 2409 | | // RA = dst or unused, RC = src, JMP with RC = target |
2387 | | ldrh RBw, [PC, #2] | 2410 | | ldrh RBw, [PC, # OFS_RD] |
2388 | | ldr TMP0, [BASE, RC, lsl #3] | 2411 | | ldr TMP0, [BASE, RC, lsl #3] |
2389 | | add PC, PC, #4 | 2412 | | add PC, PC, #4 |
2390 | | mov_false TMP1 | 2413 | | mov_false TMP1 |
@@ -2631,7 +2654,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2631 | | str PC, SAVE_PC | 2654 | | str PC, SAVE_PC |
2632 | | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) | 2655 | | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) |
2633 | | // Returns NULL (finished) or TValue * (metamethod). | 2656 | | // Returns NULL (finished) or TValue * (metamethod). |
2634 | | ldrb RBw, [PC, #-1] | 2657 | | ldrb RBw, [PC, #-4+OFS_RB] |
2635 | | ldr BASE, L->base | 2658 | | ldr BASE, L->base |
2636 | | cbnz CRET1, ->vmeta_binop | 2659 | | cbnz CRET1, ->vmeta_binop |
2637 | | ldr TMP0, [BASE, RB, lsl #3] | 2660 | | ldr TMP0, [BASE, RB, lsl #3] |
@@ -3262,7 +3285,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3262 | | ins_callt | 3285 | | ins_callt |
3263 | | | 3286 | | |
3264 | |5: // Tailcall to a fast function with a Lua frame below. | 3287 | |5: // Tailcall to a fast function with a Lua frame below. |
3265 | | ldrb RAw, [PC, #-3] | 3288 | | ldrb RAw, [PC, #-4+OFS_RA] |
3266 | | sub CARG1, BASE, RA, lsl #3 | 3289 | | sub CARG1, BASE, RA, lsl #3 |
3267 | | ldr LFUNC:CARG1, [CARG1, #-32] | 3290 | | ldr LFUNC:CARG1, [CARG1, #-32] |
3268 | | and LFUNC:CARG1, CARG1, #LJ_GCVMASK | 3291 | | and LFUNC:CARG1, CARG1, #LJ_GCVMASK |
@@ -3303,8 +3326,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3303 | |.endif | 3326 | |.endif |
3304 | | add RA, BASE, RA, lsl #3 | 3327 | | add RA, BASE, RA, lsl #3 |
3305 | | ldr TAB:RB, [RA, #-16] | 3328 | | ldr TAB:RB, [RA, #-16] |
3306 | | ldrh TMP3w, [PC, #2] | 3329 | | ldrh TMP3w, [PC, # OFS_RD] |
3307 | | ldr CARG1w, [RA, #-8] // Get index from control var. | 3330 | | ldr CARG1w, [RA, #-8+LO] // Get index from control var. |
3308 | | add PC, PC, #4 | 3331 | | add PC, PC, #4 |
3309 | | add TMP3, PC, TMP3, lsl #2 | 3332 | | add TMP3, PC, TMP3, lsl #2 |
3310 | | and TAB:RB, RB, #LJ_GCVMASK | 3333 | | and TAB:RB, RB, #LJ_GCVMASK |
@@ -3323,7 +3346,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3323 | | stp CARG1, TMP0, [RA] | 3346 | | stp CARG1, TMP0, [RA] |
3324 | | add CARG1, CARG1, #1 | 3347 | | add CARG1, CARG1, #1 |
3325 | |3: | 3348 | |3: |
3326 | | str CARG1w, [RA, #-8] // Update control var. | 3349 | | str CARG1w, [RA, #-8+LO] // Update control var. |
3327 | | mov PC, TMP3 | 3350 | | mov PC, TMP3 |
3328 | |4: | 3351 | |4: |
3329 | | ins_next | 3352 | | ins_next |
@@ -3369,8 +3392,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3369 | |5: // Despecialize bytecode if any of the checks fail. | 3392 | |5: // Despecialize bytecode if any of the checks fail. |
3370 | | mov TMP0, #BC_JMP | 3393 | | mov TMP0, #BC_JMP |
3371 | | mov TMP1, #BC_ITERC | 3394 | | mov TMP1, #BC_ITERC |
3372 | | strb TMP0w, [PC, #-4] | 3395 | | strb TMP0w, [PC, #-4+OFS_OP] |
3373 | | strb TMP1w, [RC] | 3396 | | strb TMP1w, [RC, # OFS_OP] |
3374 | | b <1 | 3397 | | b <1 |
3375 | break; | 3398 | break; |
3376 | 3399 | ||
@@ -3576,7 +3599,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3576 | | csel PC, RC, PC, gt | 3599 | | csel PC, RC, PC, gt |
3577 | } else if (op == BC_JFORI) { | 3600 | } else if (op == BC_JFORI) { |
3578 | | mov PC, RC | 3601 | | mov PC, RC |
3579 | | ldrh RCw, [RC, #-2] | 3602 | | ldrh RCw, [RC, #-4+OFS_RD] |
3580 | } else if (op == BC_IFORL) { | 3603 | } else if (op == BC_IFORL) { |
3581 | | csel PC, RC, PC, le | 3604 | | csel PC, RC, PC, le |
3582 | } | 3605 | } |
@@ -3617,7 +3640,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3617 | if (op == BC_FORI) { | 3640 | if (op == BC_FORI) { |
3618 | | csel PC, RC, PC, hi | 3641 | | csel PC, RC, PC, hi |
3619 | } else if (op == BC_JFORI) { | 3642 | } else if (op == BC_JFORI) { |
3620 | | ldrh RCw, [RC, #-2] | 3643 | | ldrh RCw, [RC, #-4+OFS_RD] |
3621 | | bls =>BC_JLOOP | 3644 | | bls =>BC_JLOOP |
3622 | } else if (op == BC_IFORL) { | 3645 | } else if (op == BC_IFORL) { |
3623 | | csel PC, RC, PC, ls | 3646 | | csel PC, RC, PC, ls |