From 94d0b53004a5fa368defa4307a17edcdb87fe727 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 20 Jan 2020 22:15:45 +0100 Subject: MIPS: Add MIPS64 R6 port. Contributed by Hua Zhang, YunQiang Su from Wave Computing, and Radovan Birdic from RT-RK. Sponsored by Wave Computing. --- src/Makefile | 3 + src/jit/bcsave.lua | 84 +++++----- src/jit/dis_mips.lua | 293 ++++++++++++++++++++++++++++++++--- src/jit/dis_mips64r6.lua | 17 +++ src/jit/dis_mips64r6el.lua | 17 +++ src/lj_arch.h | 29 +++- src/lj_asm.c | 2 +- src/lj_asm_mips.h | 114 ++++++++++++-- src/lj_emit_mips.h | 15 +- src/lj_jit.h | 8 + src/lj_target_mips.h | 52 ++++++- src/vm_mips64.dasc | 370 +++++++++++++++++++++++++++++++++++++++++---- 12 files changed, 889 insertions(+), 115 deletions(-) create mode 100644 src/jit/dis_mips64r6.lua create mode 100644 src/jit/dis_mips64r6el.lua (limited to 'src') diff --git a/src/Makefile b/src/Makefile index ad80642b..386f279f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -445,6 +445,9 @@ ifeq (arm,$(TARGET_LJARCH)) DASM_AFLAGS+= -D IOS endif else +ifneq (,$(findstring LJ_TARGET_MIPSR6 ,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D MIPSR6 +endif ifeq (ppc,$(TARGET_LJARCH)) ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) DASM_AFLAGS+= -D SQRT diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index 2553d97e..41081184 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua @@ -17,6 +17,10 @@ local bit = require("bit") -- Symbol name prefix for LuaJIT bytecode. local LJBC_PREFIX = "luaJIT_BC_" +local type, assert = type, assert +local format = string.format +local tremove, tconcat = table.remove, table.concat + ------------------------------------------------------------------------------ local function usage() @@ -63,8 +67,18 @@ local map_type = { } local map_arch = { - x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true, - ppc = true, mips = true, mipsel = true, + x86 = { e = "le", b = 32, m = 3, p = 0x14c, }, + x64 = { e = "le", b = 64, m = 62, p = 0x8664, }, + arm = { e = "le", b = 32, m = 40, p = 0x1c0, }, + arm64 = { e = "le", b = 64, m = 183, p = 0xaa64, }, + arm64be = { e = "be", b = 64, m = 183, }, + ppc = { e = "be", b = 32, m = 20, }, + mips = { e = "be", b = 32, m = 8, f = 0x50001006, }, + mipsel = { e = "le", b = 32, m = 8, f = 0x50001006, }, + mips64 = { e = "be", b = 64, m = 8, f = 0x80000007, }, + mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, }, + mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, }, + mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, }, } local map_os = { @@ -73,33 +87,33 @@ local map_os = { } local function checkarg(str, map, err) - str = string.lower(str) + str = str:lower() local s = check(map[str], "unknown ", err) - return s == true and str or s + return type(s) == "string" and s or str end local function detecttype(str) - local ext = string.match(string.lower(str), "%.(%a+)$") + local ext = str:lower():match("%.(%a+)$") return map_type[ext] or "raw" end local function checkmodname(str) - check(string.match(str, "^[%w_.%-]+$"), "bad module name") - return string.gsub(str, "[%.%-]", "_") + check(str:match("^[%w_.%-]+$"), "bad module name") + return str:gsub("[%.%-]", "_") end local function detectmodname(str) if type(str) == "string" then - local tail = string.match(str, "[^/\\]+$") + local tail = str:match("[^/\\]+$") if tail then str = tail end - local head = string.match(str, "^(.*)%.[^.]*$") + local head = str:match("^(.*)%.[^.]*$") if head then str = head end - str = string.match(str, "^[%w_.%-]+") + str = str:match("^[%w_.%-]+") else str = nil end check(str, "cannot derive module name, use -n name") - return string.gsub(str, "[%.%-]", "_") + return str:gsub("[%.%-]", "_") end ------------------------------------------------------------------------------ @@ -118,7 +132,7 @@ end local function bcsave_c(ctx, output, s) local fp = savefile(output, "w") if ctx.type == "c" then - fp:write(string.format([[ + fp:write(format([[ #ifdef _cplusplus extern "C" #endif @@ -128,7 +142,7 @@ __declspec(dllexport) const unsigned char %s%s[] = { ]], LJBC_PREFIX, ctx.modname)) else - fp:write(string.format([[ + fp:write(format([[ #define %s%s_SIZE %d static const unsigned char %s%s[] = { ]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname)) @@ -138,13 +152,13 @@ static const unsigned char %s%s[] = { local b = tostring(string.byte(s, i)) m = m + #b + 1 if m > 78 then - fp:write(table.concat(t, ",", 1, n), ",\n") + fp:write(tconcat(t, ",", 1, n), ",\n") n, m = 0, #b + 1 end n = n + 1 t[n] = b end - bcsave_tail(fp, output, table.concat(t, ",", 1, n).."\n};\n") + bcsave_tail(fp, output, tconcat(t, ",", 1, n).."\n};\n") end local function bcsave_elfobj(ctx, output, s, ffi) @@ -199,12 +213,8 @@ typedef struct { } ELF64obj; ]] local symname = LJBC_PREFIX..ctx.modname - local is64, isbe = false, false - if ctx.arch == "x64" or ctx.arch == "arm64" or ctx.arch == "arm64be" then - is64 = true - elseif ctx.arch == "ppc" or ctx.arch == "mips" then - isbe = true - end + local ai = assert(map_arch[ctx.arch]) + local is64, isbe = ai.b == 64, ai.e == "be" -- Handle different host/target endianess. local function f32(x) return x end @@ -237,10 +247,8 @@ typedef struct { hdr.eendian = isbe and 2 or 1 hdr.eversion = 1 hdr.type = f16(1) - hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, arm64be=183, ppc=20, mips=8, mipsel=8 })[ctx.arch]) - if ctx.arch == "mips" or ctx.arch == "mipsel" then - hdr.flags = f32(0x50001006) - end + hdr.machine = f16(ai.m) + hdr.flags = f32(ai.f or 0) hdr.version = f32(1) hdr.shofs = fofs(ffi.offsetof(o, "sect")) hdr.ehsize = f16(ffi.sizeof(hdr)) @@ -336,12 +344,8 @@ typedef struct { } PEobj; ]] local symname = LJBC_PREFIX..ctx.modname - local is64 = false - if ctx.arch == "x86" then - symname = "_"..symname - elseif ctx.arch == "x64" then - is64 = true - end + local ai = assert(map_arch[ctx.arch]) + local is64 = ai.b == 64 local symexport = " /EXPORT:"..symname..",DATA " -- The file format is always little-endian. Swap if the host is big-endian. @@ -355,7 +359,7 @@ typedef struct { -- Create PE object and fill in header. local o = ffi.new("PEobj") local hdr = o.hdr - hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366, mipsel=0x366 })[ctx.arch]) + hdr.arch = f16(assert(ai.p)) hdr.nsects = f16(2) hdr.symtabofs = f32(ffi.offsetof(o, "sym0")) hdr.nsyms = f32(6) @@ -605,16 +609,16 @@ local function docmd(...) local n = 1 local list = false local ctx = { - strip = true, arch = jit.arch, os = string.lower(jit.os), + strip = true, arch = jit.arch, os = jit.os:lower(), type = false, modname = false, } while n <= #arg do local a = arg[n] - if type(a) == "string" and string.sub(a, 1, 1) == "-" and a ~= "-" then - table.remove(arg, n) + if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then + tremove(arg, n) if a == "--" then break end for m=2,#a do - local opt = string.sub(a, m, m) + local opt = a:sub(m, m) if opt == "l" then list = true elseif opt == "s" then @@ -627,13 +631,13 @@ local function docmd(...) if n ~= 1 then usage() end arg[1] = check(loadstring(arg[1])) elseif opt == "n" then - ctx.modname = checkmodname(table.remove(arg, n)) + ctx.modname = checkmodname(tremove(arg, n)) elseif opt == "t" then - ctx.type = checkarg(table.remove(arg, n), map_type, "file type") + ctx.type = checkarg(tremove(arg, n), map_type, "file type") elseif opt == "a" then - ctx.arch = checkarg(table.remove(arg, n), map_arch, "architecture") + ctx.arch = checkarg(tremove(arg, n), map_arch, "architecture") elseif opt == "o" then - ctx.os = checkarg(table.remove(arg, n), map_os, "OS name") + ctx.os = checkarg(tremove(arg, n), map_os, "OS name") else usage() end diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua index a12b8e62..c003b984 100644 --- a/src/jit/dis_mips.lua +++ b/src/jit/dis_mips.lua @@ -19,13 +19,34 @@ local band, bor, tohex = bit.band, bit.bor, bit.tohex local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift ------------------------------------------------------------------------------ --- Primary and extended opcode maps +-- Extended opcode maps common to all MIPS releases ------------------------------------------------------------------------------ -local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", } local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", } local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", } +local map_cop0 = { + shift = 25, mask = 1, + [0] = { + shift = 21, mask = 15, + [0] = "mfc0TDW", [4] = "mtc0TDW", + [10] = "rdpgprDT", + [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", }, + [14] = "wrpgprDT", + }, { + shift = 0, mask = 63, + [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp", + [24] = "eret", [31] = "deret", + [32] = "wait", + }, +} + +------------------------------------------------------------------------------ +-- Primary and extended opcode maps for MIPS R1-R5 +------------------------------------------------------------------------------ + +local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", } + local map_special = { shift = 0, mask = 63, [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, @@ -87,22 +108,6 @@ local map_regimm = { false, false, false, "synciSO", } -local map_cop0 = { - shift = 25, mask = 1, - [0] = { - shift = 21, mask = 15, - [0] = "mfc0TDW", [4] = "mtc0TDW", - [10] = "rdpgprDT", - [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", }, - [14] = "wrpgprDT", - }, { - shift = 0, mask = 63, - [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp", - [24] = "eret", [31] = "deret", - [32] = "wait", - }, -} - local map_cop1s = { shift = 0, mask = 63, [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", @@ -233,6 +238,208 @@ local map_pri = { false, "sdc1HSO", "sdc2TSO", "sdTSO", } +------------------------------------------------------------------------------ +-- Primary and extended opcode maps for MIPS R6 +------------------------------------------------------------------------------ + +local map_mul_r6 = { shift = 6, mask = 3, [2] = "mulDST", [3] = "muhDST" } +local map_mulu_r6 = { shift = 6, mask = 3, [2] = "muluDST", [3] = "muhuDST" } +local map_div_r6 = { shift = 6, mask = 3, [2] = "divDST", [3] = "modDST" } +local map_divu_r6 = { shift = 6, mask = 3, [2] = "divuDST", [3] = "moduDST" } +local map_dmul_r6 = { shift = 6, mask = 3, [2] = "dmulDST", [3] = "dmuhDST" } +local map_dmulu_r6 = { shift = 6, mask = 3, [2] = "dmuluDST", [3] = "dmuhuDST" } +local map_ddiv_r6 = { shift = 6, mask = 3, [2] = "ddivDST", [3] = "dmodDST" } +local map_ddivu_r6 = { shift = 6, mask = 3, [2] = "ddivuDST", [3] = "dmoduDST" } + +local map_special_r6 = { + shift = 0, mask = 63, + [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, + false, map_srl, "sraDTA", + "sllvDTS", false, map_srlv, "sravDTS", + "jrS", "jalrD1S", false, false, + "syscallY", "breakY", false, "sync", + "clzDS", "cloDS", "dclzDS", "dcloDS", + "dsllvDST", "dlsaDSTA", "dsrlvDST", "dsravDST", + map_mul_r6, map_mulu_r6, map_div_r6, map_divu_r6, + map_dmul_r6, map_dmulu_r6, map_ddiv_r6, map_ddivu_r6, + "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", + "andDST", "or|moveDST0", "xorDST", "nor|notDST0", + false, false, "sltDST", "sltuDST", + "daddDST", "dadduDST", "dsubDST", "dsubuDST", + "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", + "teqSTZ", "seleqzDST", "tneSTZ", "selnezDST", + "dsllDTA", false, "dsrlDTA", "dsraDTA", + "dsll32DTA", false, "dsrl32DTA", "dsra32DTA", +} + +local map_bshfl_r6 = { + shift = 9, mask = 3, + [1] = "alignDSTa", + _ = { + shift = 6, mask = 31, + [0] = "bitswapDT", + [2] = "wsbhDT", + [16] = "sebDT", + [24] = "sehDT", + } +} + +local map_dbshfl_r6 = { + shift = 9, mask = 3, + [1] = "dalignDSTa", + _ = { + shift = 6, mask = 31, + [0] = "dbitswapDT", + [2] = "dsbhDT", + [5] = "dshdDT", + } +} + +local map_special3_r6 = { + shift = 0, mask = 63, + [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK", + [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL", + [32] = map_bshfl_r6, [36] = map_dbshfl_r6, [59] = "rdhwrTD", +} + +local map_regimm_r6 = { + shift = 16, mask = 31, + [0] = "bltzSB", [1] = "bgezSB", + [6] = "dahiSI", [30] = "datiSI", + [23] = "sigrieI", [31] = "synciSO", +} + +local map_pcrel_r6 = { + shift = 19, mask = 3, + [0] = "addiupcS2", "lwpcS2", "lwupcS2", { + shift = 18, mask = 1, + [0] = "ldpcS3", { shift = 16, mask = 3, [2] = "auipcSI", [3] = "aluipcSI" } + } +} + +local map_cop1s_r6 = { + shift = 0, mask = 63, + [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", + "sqrt.sFG", "abs.sFG", "mov.sFG", "neg.sFG", + "round.l.sFG", "trunc.l.sFG", "ceil.l.sFG", "floor.l.sFG", + "round.w.sFG", "trunc.w.sFG", "ceil.w.sFG", "floor.w.sFG", + "sel.sFGH", false, false, false, + "seleqz.sFGH", "recip.sFG", "rsqrt.sFG", "selnez.sFGH", + "maddf.sFGH", "msubf.sFGH", "rint.sFG", "class.sFG", + "min.sFGH", "mina.sFGH", "max.sFGH", "maxa.sFGH", + false, "cvt.d.sFG", false, false, + "cvt.w.sFG", "cvt.l.sFG", +} + +local map_cop1d_r6 = { + shift = 0, mask = 63, + [0] = "add.dFGH", "sub.dFGH", "mul.dFGH", "div.dFGH", + "sqrt.dFG", "abs.dFG", "mov.dFG", "neg.dFG", + "round.l.dFG", "trunc.l.dFG", "ceil.l.dFG", "floor.l.dFG", + "round.w.dFG", "trunc.w.dFG", "ceil.w.dFG", "floor.w.dFG", + "sel.dFGH", false, false, false, + "seleqz.dFGH", "recip.dFG", "rsqrt.dFG", "selnez.dFGH", + "maddf.dFGH", "msubf.dFGH", "rint.dFG", "class.dFG", + "min.dFGH", "mina.dFGH", "max.dFGH", "maxa.dFGH", + "cvt.s.dFG", false, false, false, + "cvt.w.dFG", "cvt.l.dFG", +} + +local map_cop1w_r6 = { + shift = 0, mask = 63, + [0] = "cmp.af.sFGH", "cmp.un.sFGH", "cmp.eq.sFGH", "cmp.ueq.sFGH", + "cmp.lt.sFGH", "cmp.ult.sFGH", "cmp.le.sFGH", "cmp.ule.sFGH", + "cmp.saf.sFGH", "cmp.sun.sFGH", "cmp.seq.sFGH", "cmp.sueq.sFGH", + "cmp.slt.sFGH", "cmp.sult.sFGH", "cmp.sle.sFGH", "cmp.sule.sFGH", + false, "cmp.or.sFGH", "cmp.une.sFGH", "cmp.ne.sFGH", + false, false, false, false, + false, "cmp.sor.sFGH", "cmp.sune.sFGH", "cmp.sne.sFGH", + false, false, false, false, + "cvt.s.wFG", "cvt.d.wFG", +} + +local map_cop1l_r6 = { + shift = 0, mask = 63, + [0] = "cmp.af.dFGH", "cmp.un.dFGH", "cmp.eq.dFGH", "cmp.ueq.dFGH", + "cmp.lt.dFGH", "cmp.ult.dFGH", "cmp.le.dFGH", "cmp.ule.dFGH", + "cmp.saf.dFGH", "cmp.sun.dFGH", "cmp.seq.dFGH", "cmp.sueq.dFGH", + "cmp.slt.dFGH", "cmp.sult.dFGH", "cmp.sle.dFGH", "cmp.sule.dFGH", + false, "cmp.or.dFGH", "cmp.une.dFGH", "cmp.ne.dFGH", + false, false, false, false, + false, "cmp.sor.dFGH", "cmp.sune.dFGH", "cmp.sne.dFGH", + false, false, false, false, + "cvt.s.lFG", "cvt.d.lFG", +} + +local map_cop1_r6 = { + shift = 21, mask = 31, + [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG", + "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG", + false, "bc1eqzHB", false, false, + false, "bc1nezHB", false, false, + map_cop1s_r6, map_cop1d_r6, false, false, + map_cop1w_r6, map_cop1l_r6, +} + +local function maprs_popTS(rs, rt) + if rt == 0 then return 0 elseif rs == 0 then return 1 + elseif rs == rt then return 2 else return 3 end +end + +local map_pop06_r6 = { + maprs = maprs_popTS, [0] = "blezSB", "blezalcTB", "bgezalcTB", "bgeucSTB" +} +local map_pop07_r6 = { + maprs = maprs_popTS, [0] = "bgtzSB", "bgtzalcTB", "bltzalcTB", "bltucSTB" +} +local map_pop26_r6 = { + maprs = maprs_popTS, "blezcTB", "bgezcTB", "bgecSTB" +} +local map_pop27_r6 = { + maprs = maprs_popTS, "bgtzcTB", "bltzcTB", "bltcSTB" +} + +local function maprs_popS(rs, rt) + if rs == 0 then return 0 else return 1 end +end + +local map_pop66_r6 = { + maprs = maprs_popS, [0] = "jicTI", "beqzcSb" +} +local map_pop76_r6 = { + maprs = maprs_popS, [0] = "jialcTI", "bnezcSb" +} + +local function maprs_popST(rs, rt) + if rs >= rt then return 0 elseif rs == 0 then return 1 else return 2 end +end + +local map_pop10_r6 = { + maprs = maprs_popST, [0] = "bovcSTB", "beqzalcTB", "beqcSTB" +} +local map_pop30_r6 = { + maprs = maprs_popST, [0] = "bnvcSTB", "bnezalcTB", "bnecSTB" +} + +local map_pri_r6 = { + [0] = map_special_r6, map_regimm_r6, "jJ", "jalJ", + "beq|beqz|bST00B", "bne|bnezST0B", map_pop06_r6, map_pop07_r6, + map_pop10_r6, "addiu|liTS0I", "sltiTSI", "sltiuTSI", + "andiTSU", "ori|liTS0U", "xoriTSU", "aui|luiTS0U", + map_cop0, map_cop1_r6, false, false, + false, false, map_pop26_r6, map_pop27_r6, + map_pop30_r6, "daddiuTSI", false, false, + false, "dauiTSI", false, map_special3_r6, + "lbTSO", "lhTSO", false, "lwTSO", + "lbuTSO", "lhuTSO", false, false, + "sbTSO", "shTSO", false, "swTSO", + false, false, false, false, + false, "lwc1HSO", "bc#", false, + false, "ldc1HSO", map_pop66_r6, "ldTSO", + false, "swc1HSO", "balc#", map_pcrel_r6, + false, "sdc1HSO", map_pop76_r6, "sdTSO", +} + ------------------------------------------------------------------------------ local map_gpr = { @@ -287,10 +494,14 @@ local function disass_ins(ctx) ctx.op = op ctx.rel = nil - local opat = map_pri[rshift(op, 26)] + local opat = ctx.map_pri[rshift(op, 26)] while type(opat) ~= "string" do if not opat then return unknown(ctx) end - opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ + if opat.maprs then + opat = opat[opat.maprs(band(rshift(op,21),31), band(rshift(op,16),31))] + else + opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ + end end local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") @@ -314,6 +525,8 @@ local function disass_ins(ctx) x = "f"..band(rshift(op, 21), 31) elseif p == "A" then x = band(rshift(op, 6), 31) + elseif p == "a" then + x = band(rshift(op, 6), 7) elseif p == "E" then x = band(rshift(op, 6), 31) + 32 elseif p == "M" then @@ -333,6 +546,10 @@ local function disass_ins(ctx) x = band(rshift(op, 11), 31) - last + 33 elseif p == "I" then x = arshift(lshift(op, 16), 16) + elseif p == "2" then + x = arshift(lshift(op, 13), 11) + elseif p == "3" then + x = arshift(lshift(op, 14), 11) elseif p == "U" then x = band(op, 0xffff) elseif p == "O" then @@ -342,7 +559,15 @@ local function disass_ins(ctx) local index = map_gpr[band(rshift(op, 16), 31)] operands[#operands] = format("%s(%s)", index, last) elseif p == "B" then - x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4 + x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 14) + 4 + ctx.rel = x + x = format("0x%08x", x) + elseif p == "b" then + x = ctx.addr + ctx.pos + arshift(lshift(op, 11), 9) + 4 + ctx.rel = x + x = format("0x%08x", x) + elseif p == "#" then + x = ctx.addr + ctx.pos + arshift(lshift(op, 6), 4) + 4 ctx.rel = x x = format("0x%08x", x) elseif p == "J" then @@ -408,6 +633,7 @@ local function create(code, addr, out) ctx.disass = disass_block ctx.hexdump = 8 ctx.get = get_be + ctx.map_pri = map_pri return ctx end @@ -417,6 +643,19 @@ local function create_el(code, addr, out) return ctx end +local function create_r6(code, addr, out) + local ctx = create(code, addr, out) + ctx.map_pri = map_pri_r6 + return ctx +end + +local function create_r6_el(code, addr, out) + local ctx = create(code, addr, out) + ctx.get = get_le + ctx.map_pri = map_pri_r6 + return ctx +end + -- Simple API: disassemble code (a string) at address and output via out. local function disass(code, addr, out) create(code, addr, out):disass() @@ -426,6 +665,14 @@ local function disass_el(code, addr, out) create_el(code, addr, out):disass() end +local function disass_r6(code, addr, out) + create_r6(code, addr, out):disass() +end + +local function disass_r6_el(code, addr, out) + create_r6_el(code, addr, out):disass() +end + -- Return register name for RID. local function regname(r) if r < 32 then return map_gpr[r] end @@ -436,8 +683,12 @@ end return { create = create, create_el = create_el, + create_r6 = create_r6, + create_r6_el = create_r6_el, disass = disass, disass_el = disass_el, + disass_r6 = disass_r6, + disass_r6_el = disass_r6_el, regname = regname } diff --git a/src/jit/dis_mips64r6.lua b/src/jit/dis_mips64r6.lua new file mode 100644 index 00000000..023c05ab --- /dev/null +++ b/src/jit/dis_mips64r6.lua @@ -0,0 +1,17 @@ +---------------------------------------------------------------------------- +-- LuaJIT MIPS64R6 disassembler wrapper module. +-- +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- This module just exports the r6 big-endian functions from the +-- MIPS disassembler module. All the interesting stuff is there. +------------------------------------------------------------------------------ + +local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") +return { + create = dis_mips.create_r6, + disass = dis_mips.disass_r6, + regname = dis_mips.regname +} + diff --git a/src/jit/dis_mips64r6el.lua b/src/jit/dis_mips64r6el.lua new file mode 100644 index 00000000..f2988339 --- /dev/null +++ b/src/jit/dis_mips64r6el.lua @@ -0,0 +1,17 @@ +---------------------------------------------------------------------------- +-- LuaJIT MIPS64R6EL disassembler wrapper module. +-- +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- This module just exports the r6 little-endian functions from the +-- MIPS disassembler module. All the interesting stuff is there. +------------------------------------------------------------------------------ + +local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") +return { + create = dis_mips.create_r6_el, + disass = dis_mips.disass_r6_el, + regname = dis_mips.regname +} + diff --git a/src/lj_arch.h b/src/lj_arch.h index 903d6c64..cd1a0568 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -330,18 +330,38 @@ #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) +#if __mips_isa_rev >= 6 +#define LJ_TARGET_MIPSR6 1 +#define LJ_TARGET_UNALIGNED 1 +#endif #if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 +#if LJ_TARGET_MIPSR6 +#define LJ_ARCH_NAME "mips32r6el" +#else #define LJ_ARCH_NAME "mipsel" +#endif +#else +#if LJ_TARGET_MIPSR6 +#define LJ_ARCH_NAME "mips64r6el" #else #define LJ_ARCH_NAME "mips64el" #endif +#endif #define LJ_ARCH_ENDIAN LUAJIT_LE #else #if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 +#if LJ_TARGET_MIPSR6 +#define LJ_ARCH_NAME "mips32r6" +#else #define LJ_ARCH_NAME "mips" +#endif +#else +#if LJ_TARGET_MIPSR6 +#define LJ_ARCH_NAME "mips64r6" #else #define LJ_ARCH_NAME "mips64" #endif +#endif #define LJ_ARCH_ENDIAN LUAJIT_BE #endif @@ -377,7 +397,9 @@ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -#if _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2 +#if LJ_TARGET_MIPSR6 +#define LJ_ARCH_VERSION 60 +#elif _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2 #define LJ_ARCH_VERSION 20 #else #define LJ_ARCH_VERSION 10 @@ -453,8 +475,13 @@ #if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32)) #error "Only o32 ABI supported for MIPS32" #endif +#if LJ_TARGET_MIPSR6 +/* Not that useful, since most available r6 CPUs are 64 bit. */ +#error "No support for MIPS32R6" +#endif #elif LJ_TARGET_MIPS64 #if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64)) +/* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */ #error "Only n64 ABI supported for MIPS64" #endif #endif diff --git a/src/lj_asm.c b/src/lj_asm.c index c4c5dfdd..4f171edd 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -2112,8 +2112,8 @@ static void asm_setup_regsp(ASMState *as) ir->prev = REGSP_HINT(RID_FPRET); continue; } - /* fallthrough */ #endif + /* fallthrough */ case IR_CALLN: case IR_CALLXS: #if LJ_SOFTFP case IR_MIN: case IR_MAX: diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index 3a4679b8..3dbe836d 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -101,7 +101,12 @@ static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt) as->invmcp = NULL; as->loopinv = 1; as->mcp = p+1; +#if !LJ_TARGET_MIPSR6 mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */ +#else + mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : + (mi>>28) == 4 ? 0x00800000u : 0x00010000u); /* Invert cond. */ +#endif target = p; /* Patch target later in asm_loop_fixup. */ } emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); @@ -410,7 +415,11 @@ static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) { /* The modified regs must match with the *.dasc implementation. */ RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| - RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR); + RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR) +#if LJ_TARGET_MIPSR6 + |RID2RSET(RID_F21) +#endif + ; if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); ra_evictset(as, drop); ra_destreg(as, ir, RID_FPRET); @@ -444,8 +453,13 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) { Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); Reg dest = ra_dest(as, ir, RSET_GPR); +#if !LJ_TARGET_MIPSR6 asm_guard(as, MIPSI_BC1F, 0, 0); emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left); +#else + asm_guard(as, MIPSI_BC1EQZ, 0, (tmp&31)); + emit_fgh(as, MIPSI_CMP_EQ_D, tmp, tmp, left); +#endif emit_fg(as, MIPSI_CVT_D_W, tmp, tmp); emit_tg(as, MIPSI_MFC1, dest, tmp); emit_fg(as, MIPSI_CVT_W_D, tmp, left); @@ -599,8 +613,13 @@ static void asm_conv(ASMState *as, IRIns *ir) (void *)&as->J->k64[LJ_K64_M2P64], rset_exclude(RSET_GPR, dest)); emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); +#if !LJ_TARGET_MIPSR6 + emit_branch(as, MIPSI_BC1T, 0, 0, l_end); + emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); +#else + emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end); + emit_fgh(as, MIPSI_CMP_LT_D, left, left, tmp); +#endif emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P63], rset_exclude(RSET_GPR, dest)); @@ -611,8 +630,13 @@ static void asm_conv(ASMState *as, IRIns *ir) (void *)&as->J->k32[LJ_K32_M2P64], rset_exclude(RSET_GPR, dest)); emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); +#if !LJ_TARGET_MIPSR6 + emit_branch(as, MIPSI_BC1T, 0, 0, l_end); + emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); +#else + emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end); + emit_fgh(as, MIPSI_CMP_LT_S, left, left, tmp); +#endif emit_lsptr(as, MIPSI_LWC1, (tmp & 31), (void *)&as->J->k32[LJ_K32_2P63], rset_exclude(RSET_GPR, dest)); @@ -840,8 +864,12 @@ static void asm_aref(ASMState *as, IRIns *ir) } base = ra_alloc1(as, ir->op1, RSET_GPR); idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); +#if !LJ_TARGET_MIPSR6 emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base); emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); +#else + emit_dst(as, MIPSI_ALSA | MIPSF_A(3-1), dest, idx, base); +#endif } /* Inlined hash lookup. Specialized for key type and for const keys. @@ -944,8 +972,13 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) l_end = asm_exitstub_addr(as); } if (!LJ_SOFTFP && irt_isnum(kt)) { +#if !LJ_TARGET_MIPSR6 emit_branch(as, MIPSI_BC1T, 0, 0, l_end); emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); +#else + emit_branch(as, MIPSI_BC1NEZ, 0, (tmpnum&31), l_end); + emit_fgh(as, MIPSI_CMP_EQ_D, tmpnum, tmpnum, key); +#endif *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */ emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); @@ -1196,7 +1229,9 @@ static MIPSIns asm_fxloadins(IRIns *ir) case IRT_I16: return MIPSI_LH; case IRT_U16: return MIPSI_LHU; case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_LDC1; + /* fallthrough */ case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1; + /* fallthrough */ default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW; } } @@ -1207,7 +1242,9 @@ static MIPSIns asm_fxstoreins(IRIns *ir) case IRT_I8: case IRT_U8: return MIPSI_SB; case IRT_I16: case IRT_U16: return MIPSI_SH; case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_SDC1; + /* fallthrough */ case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1; + /* fallthrough */ default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW; } } @@ -1253,7 +1290,7 @@ static void asm_xload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); + lua_assert(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED)); asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); } @@ -1544,7 +1581,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) ofs -= 4; if (LJ_BE) ir++; else ir--; } #else - emit_tsi(as, MIPSI_SD, ra_alloc1(as, ir->op2, allow), + emit_tsi(as, sz == 8 ? MIPSI_SD : MIPSI_SW, ra_alloc1(as, ir->op2, allow), RID_RET, sizeof(GCcdata)); #endif lua_assert(sz == 4 || sz == 8); @@ -1672,6 +1709,7 @@ static void asm_add(ASMState *as, IRIns *ir) } else #endif { + /* TODO MIPSR6: Fuse ADD(BSHL(a,1-4),b) or ADD(ADD(a,a),b) to MIPSI_ALSA. */ Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); if (irref_isk(ir->op2)) { @@ -1716,8 +1754,12 @@ static void asm_mul(ASMState *as, IRIns *ir) Reg right, left = ra_alloc2(as, ir, RSET_GPR); right = (left >> 8); left &= 255; if (LJ_64 && irt_is64(ir->t)) { +#if !LJ_TARGET_MIPSR6 emit_dst(as, MIPSI_MFLO, dest, 0, 0); emit_dst(as, MIPSI_DMULT, 0, left, right); +#else + emit_dst(as, MIPSI_DMUL, dest, left, right); +#endif } else { emit_dst(as, MIPSI_MUL, dest, left, right); } @@ -1801,6 +1843,7 @@ static void asm_abs(ASMState *as, IRIns *ir) static void asm_arithov(ASMState *as, IRIns *ir) { + /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */ Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); lua_assert(!irt_is64(ir->t)); if (irref_isk(ir->op2)) { @@ -1845,9 +1888,14 @@ static void asm_mulov(ASMState *as, IRIns *ir) right), dest)); asm_guard(as, MIPSI_BNE, RID_TMP, tmp); emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31); +#if !LJ_TARGET_MIPSR6 emit_dst(as, MIPSI_MFHI, tmp, 0, 0); emit_dst(as, MIPSI_MFLO, dest, 0, 0); emit_dst(as, MIPSI_MULT, 0, left, right); +#else + emit_dst(as, MIPSI_MUL, dest, left, right); + emit_dst(as, MIPSI_MUH, tmp, left, right); +#endif } #if LJ_32 && LJ_HASFFI @@ -2071,6 +2119,7 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) Reg dest = ra_dest(as, ir, RSET_FPR); Reg right, left = ra_alloc2(as, ir, RSET_FPR); right = (left >> 8); left &= 255; +#if !LJ_TARGET_MIPSR6 if (dest == left) { emit_fg(as, MIPSI_MOVT_D, dest, right); } else { @@ -2078,19 +2127,37 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); } emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); +#else + emit_fgh(as, ismax ? MIPSI_MAX_D : MIPSI_MIN_D, dest, left, right); +#endif #endif } else { Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_alloc2(as, ir, RSET_GPR); right = (left >> 8); left &= 255; - if (dest == left) { - emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); + if (left == right) { + if (dest != left) emit_move(as, dest, left); } else { - emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); - if (dest != right) emit_move(as, dest, right); +#if !LJ_TARGET_MIPSR6 + if (dest == left) { + emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); + } else { + emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); + if (dest != right) emit_move(as, dest, right); + } +#else + emit_dst(as, MIPSI_OR, dest, dest, RID_TMP); + if (dest != right) { + emit_dst(as, MIPSI_SELNEZ, RID_TMP, right, RID_TMP); + emit_dst(as, MIPSI_SELEQZ, dest, left, RID_TMP); + } else { + emit_dst(as, MIPSI_SELEQZ, RID_TMP, left, RID_TMP); + emit_dst(as, MIPSI_SELNEZ, dest, right, RID_TMP); + } +#endif + emit_dst(as, MIPSI_SLT, RID_TMP, + ismax ? left : right, ismax ? right : left); } - emit_dst(as, MIPSI_SLT, RID_TMP, - ismax ? left : right, ismax ? right : left); } } @@ -2174,10 +2241,18 @@ static void asm_comp(ASMState *as, IRIns *ir) #if LJ_SOFTFP asm_sfpcomp(as, ir); #else +#if !LJ_TARGET_MIPSR6 Reg right, left = ra_alloc2(as, ir, RSET_FPR); right = (left >> 8); left &= 255; asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); +#else + Reg tmp, right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right)); + asm_guard(as, (op&1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31)); + emit_fgh(as, MIPSI_CMP_LT_D + ((op&3) ^ ((op>>2)&1)), tmp, left, right); +#endif #endif } else { Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); @@ -2213,9 +2288,13 @@ static void asm_equal(ASMState *as, IRIns *ir) if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { #if LJ_SOFTFP asm_sfpcomp(as, ir); -#else +#elif !LJ_TARGET_MIPSR6 asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); +#else + Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right)); + asm_guard(as, (ir->o & 1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31)); + emit_fgh(as, MIPSI_CMP_EQ_D, tmp, left, right); #endif } else { asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); @@ -2618,7 +2697,12 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) if (((p[-1] ^ (px-p)) & 0xffffu) == 0 && ((p[-1] & 0xf0000000u) == MIPSI_BEQ || (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ || - (p[-1] & 0xffe00000u) == MIPSI_BC1F)) { +#if !LJ_TARGET_MIPSR6 + (p[-1] & 0xffe00000u) == MIPSI_BC1F +#else + (p[-1] & 0xff600000u) == MIPSI_BC1EQZ +#endif + )) { ptrdiff_t delta = target - p; if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ patchbranch: diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h index bb6593ae..313d030a 100644 --- a/src/lj_emit_mips.h +++ b/src/lj_emit_mips.h @@ -138,6 +138,7 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) } else if (emit_kdelta1(as, r, (intptr_t)u64)) { return; } else { + /* TODO MIPSR6: Use DAHI & DATI. Caveat: sign-extension. */ if ((u64 & 0xffff)) { emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff); } @@ -236,10 +237,22 @@ static void emit_jmp(ASMState *as, MCode *target) static void emit_call(ASMState *as, void *target, int needcfa) { MCode *p = as->mcp; - *--p = MIPSI_NOP; +#if LJ_TARGET_MIPSR6 + ptrdiff_t delta = (char *)target - (char *)p; + if ((((delta>>2) + 0x02000000) >> 26) == 0) { /* Try compact call first. */ + *--p = MIPSI_BALC | (((uintptr_t)delta >>2) & 0x03ffffffu); + as->mcp = p; + return; + } +#endif + *--p = MIPSI_NOP; /* Delay slot. */ if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) { +#if !LJ_TARGET_MIPSR6 *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) | (((uintptr_t)target >>2) & 0x03ffffffu); +#else + *--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu); +#endif } else { /* Target out of range: need indirect call. */ *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR); needcfa = 1; diff --git a/src/lj_jit.h b/src/lj_jit.h index 5d41ef4b..0d9a9afe 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -51,10 +51,18 @@ /* Names for the CPU-specific flags. Must match the order above. */ #define JIT_F_CPU_FIRST JIT_F_MIPSXXR2 #if LJ_TARGET_MIPS32 +#if LJ_TARGET_MIPSR6 +#define JIT_F_CPUSTRING "\010MIPS32R6" +#else #define JIT_F_CPUSTRING "\010MIPS32R2" +#endif +#else +#if LJ_TARGET_MIPSR6 +#define JIT_F_CPUSTRING "\010MIPS64R6" #else #define JIT_F_CPUSTRING "\010MIPS64R2" #endif +#endif #else #define JIT_F_CPU_FIRST 0 #define JIT_F_CPUSTRING "" diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h index 740687b3..84db6012 100644 --- a/src/lj_target_mips.h +++ b/src/lj_target_mips.h @@ -223,6 +223,8 @@ typedef enum MIPSIns { MIPSI_ADDIU = 0x24000000, MIPSI_SUB = 0x00000022, MIPSI_SUBU = 0x00000023, + +#if !LJ_TARGET_MIPSR6 MIPSI_MUL = 0x70000002, MIPSI_DIV = 0x0000001a, MIPSI_DIVU = 0x0000001b, @@ -232,6 +234,15 @@ typedef enum MIPSIns { MIPSI_MFHI = 0x00000010, MIPSI_MFLO = 0x00000012, MIPSI_MULT = 0x00000018, +#else + MIPSI_MUL = 0x00000098, + MIPSI_MUH = 0x000000d8, + MIPSI_DIV = 0x0000009a, + MIPSI_DIVU = 0x0000009b, + + MIPSI_SELEQZ = 0x00000035, + MIPSI_SELNEZ = 0x00000037, +#endif MIPSI_SLL = 0x00000000, MIPSI_SRL = 0x00000002, @@ -253,8 +264,13 @@ typedef enum MIPSIns { MIPSI_B = 0x10000000, MIPSI_J = 0x08000000, MIPSI_JAL = 0x0c000000, +#if !LJ_TARGET_MIPSR6 MIPSI_JALX = 0x74000000, MIPSI_JR = 0x00000008, +#else + MIPSI_JR = 0x00000009, + MIPSI_BALC = 0xe8000000, +#endif MIPSI_JALR = 0x0000f809, MIPSI_BEQ = 0x10000000, @@ -282,15 +298,23 @@ typedef enum MIPSIns { /* MIPS64 instructions. */ MIPSI_DADD = 0x0000002c, - MIPSI_DADDI = 0x60000000, MIPSI_DADDU = 0x0000002d, MIPSI_DADDIU = 0x64000000, MIPSI_DSUB = 0x0000002e, MIPSI_DSUBU = 0x0000002f, +#if !LJ_TARGET_MIPSR6 MIPSI_DDIV = 0x0000001e, MIPSI_DDIVU = 0x0000001f, MIPSI_DMULT = 0x0000001c, MIPSI_DMULTU = 0x0000001d, +#else + MIPSI_DDIV = 0x0000009e, + MIPSI_DMOD = 0x000000de, + MIPSI_DDIVU = 0x0000009f, + MIPSI_DMODU = 0x000000df, + MIPSI_DMUL = 0x0000009c, + MIPSI_DMUH = 0x000000dc, +#endif MIPSI_DSLL = 0x00000038, MIPSI_DSRL = 0x0000003a, @@ -308,6 +332,11 @@ typedef enum MIPSIns { MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU, MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD, MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD, +#if LJ_TARGET_MIPSR6 + MIPSI_LSA = 0x00000005, + MIPSI_DLSA = 0x00000015, + MIPSI_ALSA = LJ_32 ? MIPSI_LSA : MIPSI_DLSA, +#endif /* Extract/insert instructions. */ MIPSI_DEXTM = 0x7c000001, @@ -317,18 +346,19 @@ typedef enum MIPSIns { MIPSI_DINSU = 0x7c000006, MIPSI_DINS = 0x7c000007, - MIPSI_RINT_D = 0x4620001a, - MIPSI_RINT_S = 0x4600001a, - MIPSI_RINT = 0x4400001a, MIPSI_FLOOR_D = 0x4620000b, - MIPSI_CEIL_D = 0x4620000a, - MIPSI_ROUND_D = 0x46200008, /* FP instructions. */ MIPSI_MOV_S = 0x46000006, MIPSI_MOV_D = 0x46200006, +#if !LJ_TARGET_MIPSR6 MIPSI_MOVT_D = 0x46210011, MIPSI_MOVF_D = 0x46200011, +#else + MIPSI_MIN_D = 0x4620001C, + MIPSI_MAX_D = 0x4620001E, + MIPSI_SEL_D = 0x46200010, +#endif MIPSI_ABS_D = 0x46200005, MIPSI_NEG_D = 0x46200007, @@ -363,15 +393,23 @@ typedef enum MIPSIns { MIPSI_DMTC1 = 0x44a00000, MIPSI_DMFC1 = 0x44200000, +#if !LJ_TARGET_MIPSR6 MIPSI_BC1F = 0x45000000, MIPSI_BC1T = 0x45010000, - MIPSI_C_EQ_D = 0x46200032, MIPSI_C_OLT_S = 0x46000034, MIPSI_C_OLT_D = 0x46200034, MIPSI_C_ULT_D = 0x46200035, MIPSI_C_OLE_D = 0x46200036, MIPSI_C_ULE_D = 0x46200037, +#else + MIPSI_BC1EQZ = 0x45200000, + MIPSI_BC1NEZ = 0x45a00000, + MIPSI_CMP_EQ_D = 0x46a00002, + MIPSI_CMP_LT_S = 0x46800004, + MIPSI_CMP_LT_D = 0x46a00004, +#endif + } MIPSIns; #endif diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc index 1682c81e..91c12216 100644 --- a/src/vm_mips64.dasc +++ b/src/vm_mips64.dasc @@ -83,6 +83,10 @@ | |.define FRET1, f0 |.define FRET2, f2 +| +|.define FTMP0, f20 +|.define FTMP1, f21 +|.define FTMP2, f22 |.endif | |// Stack layout while in interpreter. Must match with lj_frame.h. @@ -310,10 +314,10 @@ |.endmacro | |// Assumes DISPATCH is relative to GL. -#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) -#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name) +#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) +#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) +#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) +#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name) | #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) | @@ -492,8 +496,15 @@ static void build_subroutines(BuildCtx *ctx) |7: // Less results wanted. | subu TMP0, RD, TMP2 | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it. + |.if MIPSR6 + | selnez TMP0, TMP0, TMP2 // LUA_MULTRET+1 case? + | seleqz BASE, BASE, TMP2 + | b <3 + |. or BASE, BASE, TMP0 + |.else | b <3 |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case? + |.endif | |8: // Corner case: need to grow stack for filling up results. | // This can happen if: @@ -1121,11 +1132,16 @@ static void build_subroutines(BuildCtx *ctx) |.endmacro | |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot! + |// MIPSR6: no delay slot, but a forbidden slot. |.macro ffgccheck | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) | dsubu AT, TMP0, TMP1 + |.if MIPSR6 + | bgezalc AT, ->fff_gcstep + |.else | bgezal AT, ->fff_gcstep + |.endif |.endmacro | |//-- Base library: checks ----------------------------------------------- @@ -1153,7 +1169,13 @@ static void build_subroutines(BuildCtx *ctx) | sltu TMP1, TISNUM, TMP0 | not TMP2, TMP0 | li TMP3, ~LJ_TISNUM + |.if MIPSR6 + | selnez TMP2, TMP2, TMP1 + | seleqz TMP3, TMP3, TMP1 + | or TMP2, TMP2, TMP3 + |.else | movz TMP2, TMP3, TMP1 + |.endif | dsll TMP2, TMP2, 3 | daddu TMP2, CFUNC:RB, TMP2 | b ->fff_restv @@ -1165,7 +1187,11 @@ static void build_subroutines(BuildCtx *ctx) | gettp TMP2, CARG1 | daddiu TMP0, TMP2, -LJ_TTAB | daddiu TMP1, TMP2, -LJ_TUDATA + |.if MIPSR6 + | selnez TMP0, TMP1, TMP0 + |.else | movn TMP0, TMP1, TMP0 + |.endif | bnez TMP0, >6 |. cleartp TAB:CARG1 |1: // Field metatable must be at same offset for GCtab and GCudata! @@ -1204,7 +1230,13 @@ static void build_subroutines(BuildCtx *ctx) | |6: | sltiu AT, TMP2, LJ_TISNUM + |.if MIPSR6 + | selnez TMP0, TISNUM, AT + | seleqz AT, TMP2, AT + | or TMP2, TMP0, AT + |.else | movn TMP2, TISNUM, AT + |.endif | dsll TMP2, TMP2, 3 | dsubu TMP0, DISPATCH, TMP2 | b <2 @@ -1266,8 +1298,13 @@ static void build_subroutines(BuildCtx *ctx) | or TMP0, TMP0, TMP1 | bnez TMP0, ->fff_fallback |. sd BASE, L->base // Add frame since C call can throw. + |.if MIPSR6 + | sd PC, SAVE_PC // Redundant (but a defined value). + | ffgccheck + |.else | ffgccheck |. sd PC, SAVE_PC // Redundant (but a defined value). + |.endif | load_got lj_strfmt_number | move CARG1, L | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) @@ -1438,8 +1475,15 @@ static void build_subroutines(BuildCtx *ctx) | addiu AT, TMP0, -LUA_YIELD | daddu CARG3, CARG2, TMP0 | daddiu TMP3, CARG2, 8 + |.if MIPSR6 + | seleqz CARG2, CARG2, AT + | selnez TMP3, TMP3, AT + | bgtz AT, ->fff_fallback // st > LUA_YIELD? + |. or CARG2, TMP3, CARG2 + |.else | bgtz AT, ->fff_fallback // st > LUA_YIELD? |. movn CARG2, TMP3, AT + |.endif | xor TMP2, TMP2, CARG3 | bnez TMP1, ->fff_fallback // cframe != 0? |. or AT, TMP2, TMP0 @@ -1751,7 +1795,7 @@ static void build_subroutines(BuildCtx *ctx) | b ->fff_res |. li RD, (2+1)*8 | - |.macro math_minmax, name, intins, fpins + |.macro math_minmax, name, intins, intinsc, fpins | .ffunc_1 name | daddu TMP3, BASE, NARGS8:RC | checkint CARG1, >5 @@ -1763,7 +1807,13 @@ static void build_subroutines(BuildCtx *ctx) |. sextw CARG1, CARG1 | lw CARG2, LO(TMP2) |. slt AT, CARG1, CARG2 + |.if MIPSR6 + | intins TMP1, CARG2, AT + | intinsc CARG1, CARG1, AT + | or CARG1, CARG1, TMP1 + |.else | intins CARG1, CARG2, AT + |.endif | daddiu TMP2, TMP2, 8 | zextw CARG1, CARG1 | b <1 @@ -1799,13 +1849,23 @@ static void build_subroutines(BuildCtx *ctx) |. nop |7: |.if FPU + |.if MIPSR6 + | fpins FRET1, FRET1, FARG1 + |.else | c.olt.d FRET1, FARG1 | fpins FRET1, FARG1 + |.endif |.else | bal ->vm_sfcmpolt |. nop + |.if MIPSR6 + | intins AT, CARG2, CRET1 + | intinsc CARG1, CARG1, CRET1 + | or CARG1, CARG1, AT + |.else | intins CARG1, CARG2, CRET1 |.endif + |.endif | b <6 |. daddiu TMP2, TMP2, 8 | @@ -1825,8 +1885,13 @@ static void build_subroutines(BuildCtx *ctx) | |.endmacro | - | math_minmax math_min, movz, movf.d - | math_minmax math_max, movn, movt.d + |.if MIPSR6 + | math_minmax math_min, seleqz, selnez, min.d + | math_minmax math_max, selnez, seleqz, max.d + |.else + | math_minmax math_min, movz, _, movf.d + | math_minmax math_max, movn, _, movt.d + |.endif | |//-- String library ----------------------------------------------------- | @@ -1851,7 +1916,9 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc string_char // Only handle the 1-arg case here. | ffgccheck + |.if not MIPSR6 |. nop + |.endif | ld CARG1, 0(BASE) | gettp TMP0, CARG1 | xori AT, NARGS8:RC, 8 // Exactly 1 argument. @@ -1881,7 +1948,9 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc string_sub | ffgccheck + |.if not MIPSR6 |. nop + |.endif | addiu AT, NARGS8:RC, -16 | ld TMP0, 0(BASE) | bltz AT, ->fff_fallback @@ -1904,8 +1973,30 @@ static void build_subroutines(BuildCtx *ctx) | addiu TMP0, CARG2, 1 | addu TMP1, CARG4, TMP0 | slt TMP3, CARG3, r0 + |.if MIPSR6 + | seleqz CARG4, CARG4, AT + | selnez TMP1, TMP1, AT + | or CARG4, TMP1, CARG4 // if (end < 0) end += len+1 + |.else | movn CARG4, TMP1, AT // if (end < 0) end += len+1 + |.endif | addu TMP1, CARG3, TMP0 + |.if MIPSR6 + | selnez TMP1, TMP1, TMP3 + | seleqz CARG3, CARG3, TMP3 + | or CARG3, TMP1, CARG3 // if (start < 0) start += len+1 + | li TMP2, 1 + | slt AT, CARG4, r0 + | slt TMP3, r0, CARG3 + | seleqz CARG4, CARG4, AT // if (end < 0) end = 0 + | selnez CARG3, CARG3, TMP3 + | seleqz TMP2, TMP2, TMP3 + | or CARG3, TMP2, CARG3 // if (start < 1) start = 1 + | slt AT, CARG2, CARG4 + | seleqz CARG4, CARG4, AT + | selnez CARG2, CARG2, AT + | or CARG4, CARG2, CARG4 // if (end > len) end = len + |.else | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1 | li TMP2, 1 | slt AT, CARG4, r0 @@ -1914,6 +2005,7 @@ static void build_subroutines(BuildCtx *ctx) | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1 | slt AT, CARG2, CARG4 | movn CARG4, CARG2, AT // if (end > len) end = len + |.endif | daddu CARG2, STR:CARG1, CARG3 | subu CARG3, CARG4, CARG3 // len = end - start | daddiu CARG2, CARG2, sizeof(GCstr)-1 @@ -1975,7 +2067,13 @@ static void build_subroutines(BuildCtx *ctx) | slt AT, CARG1, r0 | dsrlv CRET1, TMP0, CARG3 | dsubu TMP0, r0, CRET1 + |.if MIPSR6 + | selnez TMP0, TMP0, AT + | seleqz CRET1, CRET1, AT + | or CRET1, CRET1, TMP0 + |.else | movn CRET1, TMP0, AT + |.endif | jr ra |. zextw CRET1, CRET1 |1: @@ -1998,14 +2096,28 @@ static void build_subroutines(BuildCtx *ctx) | slt AT, CARG1, r0 | dsrlv CRET1, CRET2, TMP0 | dsubu CARG1, r0, CRET1 + |.if MIPSR6 + | seleqz CRET1, CRET1, AT + | selnez CARG1, CARG1, AT + | or CRET1, CRET1, CARG1 + |.else | movn CRET1, CARG1, AT + |.endif | li CARG1, 64 | subu TMP0, CARG1, TMP0 | dsllv CRET2, CRET2, TMP0 // Integer check. | sextw AT, CRET1 | xor AT, CRET1, AT // Range check. | jr ra + |.if MIPSR6 + | seleqz AT, AT, CRET2 + | selnez CRET2, CRET2, CRET2 + | jr ra + |. or CRET2, AT, CRET2 + |.else + | jr ra |. movz CRET2, AT, CRET2 + |.endif |1: | jr ra |. li CRET2, 1 @@ -2515,15 +2627,22 @@ static void build_subroutines(BuildCtx *ctx) | |// Hard-float round to integer. |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. + |// MIPSR6: Modifies FTMP1, too. |.macro vm_round_hf, func | lui TMP0, 0x4330 // Hiword of 2^52 (double). | dsll TMP0, TMP0, 32 | dmtc1 TMP0, f4 | abs.d FRET2, FARG1 // |x| | dmfc1 AT, FARG1 + |.if MIPSR6 + | cmp.lt.d FTMP1, FRET2, f4 + | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 + | bc1eqz FTMP1, >1 // Truncate only if |x| < 2^52. + |.else | c.olt.d 0, FRET2, f4 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 | bc1f 0, >1 // Truncate only if |x| < 2^52. + |.endif |. sub.d FRET1, FRET1, f4 | slt AT, AT, r0 |.if "func" == "ceil" @@ -2534,16 +2653,38 @@ static void build_subroutines(BuildCtx *ctx) |.if "func" == "trunc" | dsll TMP0, TMP0, 32 | dmtc1 TMP0, f4 + |.if MIPSR6 + | cmp.lt.d FTMP1, FRET2, FRET1 // |x| < result? + | sub.d FRET2, FRET1, f4 + | sel.d FTMP1, FRET1, FRET2 // If yes, subtract +1. + | dmtc1 AT, FRET1 + | neg.d FRET2, FTMP1 + | jr ra + |. sel.d FRET1, FTMP1, FRET2 // Merge sign bit back in. + |.else | c.olt.d 0, FRET2, FRET1 // |x| < result? | sub.d FRET2, FRET1, f4 | movt.d FRET1, FRET2, 0 // If yes, subtract +1. | neg.d FRET2, FRET1 | jr ra |. movn.d FRET1, FRET2, AT // Merge sign bit back in. + |.endif |.else | neg.d FRET2, FRET1 | dsll TMP0, TMP0, 32 | dmtc1 TMP0, f4 + |.if MIPSR6 + | dmtc1 AT, FTMP1 + | sel.d FTMP1, FRET1, FRET2 + |.if "func" == "ceil" + | cmp.lt.d FRET1, FTMP1, FARG1 // x > result? + |.else + | cmp.lt.d FRET1, FARG1, FTMP1 // x < result? + |.endif + | sub.d FRET2, FTMP1, f4 // If yes, subtract +-1. + | jr ra + |. sel.d FRET1, FTMP1, FRET2 + |.else | movn.d FRET1, FRET2, AT // Merge sign bit back in. |.if "func" == "ceil" | c.olt.d 0, FRET1, FARG1 // x > result? @@ -2554,6 +2695,7 @@ static void build_subroutines(BuildCtx *ctx) | jr ra |. movt.d FRET1, FRET2, 0 |.endif + |.endif |1: | jr ra |. mov.d FRET1, FARG1 @@ -2698,7 +2840,7 @@ static void build_subroutines(BuildCtx *ctx) |. li CRET1, 0 |.endif | - |.macro sfmin_max, name, intins + |.macro sfmin_max, name, intins, intinsc |->vm_sf .. name: |.if JIT and not FPU | move TMP2, ra @@ -2707,13 +2849,25 @@ static void build_subroutines(BuildCtx *ctx) | move ra, TMP2 | move TMP0, CRET1 | move CRET1, CARG1 + |.if MIPSR6 + | intins CRET1, CRET1, TMP0 + | intinsc TMP0, CARG2, TMP0 + | jr ra + |. or CRET1, CRET1, TMP0 + |.else | jr ra |. intins CRET1, CARG2, TMP0 |.endif + |.endif |.endmacro | - | sfmin_max min, movz - | sfmin_max max, movn + |.if MIPSR6 + | sfmin_max min, selnez, seleqz + | sfmin_max max, seleqz, selnez + |.else + | sfmin_max min, movz, _ + | sfmin_max max, movn, _ + |.endif | |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- @@ -2882,7 +3036,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | slt AT, CARG1, CARG2 | addu TMP2, TMP2, TMP3 + |.if MIPSR6 + | movop TMP2, TMP2, AT + |.else | movop TMP2, r0, AT + |.endif |1: | daddu PC, PC, TMP2 | ins_next @@ -2900,16 +3058,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif |3: // RA and RD are both numbers. |.if FPU - | fcomp f20, f22 + |.if MIPSR6 + | fcomp FTMP0, FTMP0, FTMP2 + | addu TMP2, TMP2, TMP3 + | mfc1 TMP3, FTMP0 + | b <1 + |. fmovop TMP2, TMP2, TMP3 + |.else + | fcomp FTMP0, FTMP2 | addu TMP2, TMP2, TMP3 | b <1 |. fmovop TMP2, r0 + |.endif |.else | bal sfcomp |. addu TMP2, TMP2, TMP3 | b <1 + |.if MIPSR6 + |. movop TMP2, TMP2, CRET1 + |.else |. movop TMP2, r0, CRET1 |.endif + |.endif | |4: // RA is a number, RD is not a number. | bne CARG4, TISNUM, ->vmeta_comp @@ -2956,15 +3126,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif |.endmacro | + |.if MIPSR6 + if (op == BC_ISLT) { + | bc_comp FTMP0, FTMP2, CARG1, CARG2, selnez, selnez, cmp.lt.d, ->vm_sfcmpolt + } else if (op == BC_ISGE) { + | bc_comp FTMP0, FTMP2, CARG1, CARG2, seleqz, seleqz, cmp.lt.d, ->vm_sfcmpolt + } else if (op == BC_ISLE) { + | bc_comp FTMP2, FTMP0, CARG2, CARG1, seleqz, seleqz, cmp.ult.d, ->vm_sfcmpult + } else { + | bc_comp FTMP2, FTMP0, CARG2, CARG1, selnez, selnez, cmp.ult.d, ->vm_sfcmpult + } + |.else if (op == BC_ISLT) { - | bc_comp f20, f22, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt + | bc_comp FTMP0, FTMP2, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt } else if (op == BC_ISGE) { - | bc_comp f20, f22, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt + | bc_comp FTMP0, FTMP2, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt } else if (op == BC_ISLE) { - | bc_comp f22, f20, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult + | bc_comp FTMP2, FTMP0, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult } else { - | bc_comp f22, f20, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult + | bc_comp FTMP2, FTMP0, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult } + |.endif break; case BC_ISEQV: case BC_ISNEV: @@ -3010,7 +3192,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |2: // Check if the tags are the same and it's a table or userdata. | xor AT, CARG3, CARG4 // Same type? | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? + |.if MIPSR6 + | seleqz TMP0, TMP0, AT + |.else | movn TMP0, r0, AT + |.endif if (vk) { | beqz TMP0, <1 } else { @@ -3060,11 +3246,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | xor TMP1, CARG1, CARG2 | addu TMP2, TMP2, TMP3 + |.if MIPSR6 + if (vk) { + | seleqz TMP2, TMP2, TMP1 + } else { + | selnez TMP2, TMP2, TMP1 + } + |.else if (vk) { | movn TMP2, r0, TMP1 } else { | movz TMP2, r0, TMP1 } + |.endif | daddu PC, PC, TMP2 | ins_next break; @@ -3091,6 +3285,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bne CARG4, TISNUM, >6 |. addu TMP2, TMP2, TMP3 | xor AT, CARG1, CARG2 + |.if MIPSR6 + if (vk) { + | seleqz TMP2, TMP2, AT + |1: + | daddu PC, PC, TMP2 + |2: + } else { + | selnez TMP2, TMP2, AT + |1: + |2: + | daddu PC, PC, TMP2 + } + |.else if (vk) { | movn TMP2, r0, AT |1: @@ -3102,6 +3309,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |2: | daddu PC, PC, TMP2 } + |.endif | ins_next | |3: // RA is not an integer. @@ -3114,30 +3322,49 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |. addu TMP2, TMP2, TMP3 | sltu AT, CARG4, TISNUM |.if FPU - | ldc1 f20, 0(RA) - | ldc1 f22, 0(RD) + | ldc1 FTMP0, 0(RA) + | ldc1 FTMP2, 0(RD) |.endif | beqz AT, >5 |. nop |4: // RA and RD are both numbers. |.if FPU - | c.eq.d f20, f22 + |.if MIPSR6 + | cmp.eq.d FTMP0, FTMP0, FTMP2 + | dmfc1 TMP1, FTMP0 + | b <1 + if (vk) { + |. selnez TMP2, TMP2, TMP1 + } else { + |. seleqz TMP2, TMP2, TMP1 + } + |.else + | c.eq.d FTMP0, FTMP2 | b <1 if (vk) { |. movf TMP2, r0 } else { |. movt TMP2, r0 } + |.endif |.else | bal ->vm_sfcmpeq |. nop | b <1 + |.if MIPSR6 + if (vk) { + |. selnez TMP2, TMP2, CRET1 + } else { + |. seleqz TMP2, TMP2, CRET1 + } + |.else if (vk) { |. movz TMP2, r0, CRET1 } else { |. movn TMP2, r0, CRET1 } |.endif + |.endif | |5: // RA is a number, RD is not a number. |.if FFI @@ -3147,9 +3374,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif | // RA is a number, RD is an integer. Convert RD to a number. |.if FPU - |. lwc1 f22, LO(RD) + |. lwc1 FTMP2, LO(RD) | b <4 - |. cvt.d.w f22, f22 + |. cvt.d.w FTMP2, FTMP2 |.else |. sextw CARG2, CARG2 | bal ->vm_sfi2d_2 @@ -3167,10 +3394,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif | // RA is an integer, RD is a number. Convert RA to a number. |.if FPU - |. lwc1 f20, LO(RA) - | ldc1 f22, 0(RD) + |. lwc1 FTMP0, LO(RA) + | ldc1 FTMP2, 0(RD) | b <4 - | cvt.d.w f20, f20 + | cvt.d.w FTMP0, FTMP0 |.else |. sextw CARG1, CARG1 | bal ->vm_sfi2d_1 @@ -3213,11 +3440,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | decode_RD4b TMP2 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | addu TMP2, TMP2, TMP3 + |.if MIPSR6 + if (vk) { + | seleqz TMP2, TMP2, TMP0 + } else { + | selnez TMP2, TMP2, TMP0 + } + |.else if (vk) { | movn TMP2, r0, TMP0 } else { | movz TMP2, r0, TMP0 } + |.endif | daddu PC, PC, TMP2 | ins_next break; @@ -3236,11 +3471,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | decode_RD4b TMP2 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | addu TMP2, TMP2, TMP3 + |.if MIPSR6 + if (op == BC_IST) { + | selnez TMP2, TMP2, TMP0; + } else { + | seleqz TMP2, TMP2, TMP0; + } + |.else if (op == BC_IST) { | movz TMP2, r0, TMP0 } else { | movn TMP2, r0, TMP0 } + |.endif | daddu PC, PC, TMP2 } else { | ld CRET1, 0(RD) @@ -3483,9 +3726,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bltz TMP1, ->vmeta_arith |. daddu RA, BASE, RA |.elif "intins" == "mult" + |.if MIPSR6 + |. nop + | mul CRET1, CARG3, CARG4 + | muh TMP2, CARG3, CARG4 + |.else |. intins CARG3, CARG4 | mflo CRET1 | mfhi TMP2 + |.endif | sra TMP1, CRET1, 31 | bne TMP1, TMP2, ->vmeta_arith |. daddu RA, BASE, RA @@ -3508,16 +3757,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif | |5: // Check for two numbers. - | .FPU ldc1 f20, 0(RB) + | .FPU ldc1 FTMP0, 0(RB) | sltu AT, TMP0, TISNUM | sltu TMP0, TMP1, TISNUM - | .FPU ldc1 f22, 0(RC) + | .FPU ldc1 FTMP2, 0(RC) | and AT, AT, TMP0 | beqz AT, ->vmeta_arith |. daddu RA, BASE, RA | |.if FPU - | fpins FRET1, f20, f22 + | fpins FRET1, FTMP0, FTMP2 |.elif "fpcall" == "sfpmod" | sfpmod |.else @@ -3847,7 +4096,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li TMP0, 0x801 | addiu AT, CARG2, -0x7ff | srl CARG3, RD, 14 + |.if MIPSR6 + | seleqz TMP0, TMP0, AT + | selnez CARG2, CARG2, AT + | or CARG2, CARG2, TMP0 + |.else | movz CARG2, TMP0, AT + |.endif | // (lua_State *L, int32_t asize, uint32_t hbits) | call_intern lj_tab_new |. move CARG1, L @@ -4128,7 +4383,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) | settp STR:RC, TMP3 // Tagged key to look for. |.if FPU - | ldc1 f20, 0(RA) + | ldc1 FTMP0, 0(RA) |.else | ld CRET1, 0(RA) |.endif @@ -4144,7 +4399,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | andi AT, TMP3, LJ_GC_BLACK // isblack(table) | bnez AT, >7 |.if FPU - |. sdc1 f20, NODE:TMP2->val + |. sdc1 FTMP0, NODE:TMP2->val |.else |. sd CRET1, NODE:TMP2->val |.endif @@ -4185,7 +4440,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ld BASE, L->base |.if FPU | b <3 // No 2nd write barrier needed. - |. sdc1 f20, 0(CRET1) + |. sdc1 FTMP0, 0(CRET1) |.else | ld CARG1, 0(RA) | b <3 // No 2nd write barrier needed. @@ -4528,7 +4783,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ld CARG1, 0(RC) | sltu AT, RC, TMP3 | daddiu RC, RC, 8 + |.if MIPSR6 + | selnez CARG1, CARG1, AT + | seleqz AT, TISNIL, AT + | or CARG1, CARG1, AT + |.else | movz CARG1, TISNIL, AT + |.endif | sd CARG1, 0(RA) | sltu AT, RA, TMP2 | bnez AT, <1 @@ -4717,7 +4978,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | dext AT, CRET1, 31, 0 | slt CRET1, CARG2, CARG3 | slt TMP1, CARG3, CARG2 + |.if MIPSR6 + | selnez TMP1, TMP1, AT + | seleqz CRET1, CRET1, AT + | or CRET1, CRET1, TMP1 + |.else | movn CRET1, TMP1, AT + |.endif } else { | bne CARG3, TISNUM, >5 |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type @@ -4733,20 +5000,34 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | slt CRET1, CRET1, CARG1 | slt AT, CARG2, r0 | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. + |.if MIPSR6 + | selnez TMP1, TMP1, AT + | seleqz CRET1, CRET1, AT + | or CRET1, CRET1, TMP1 + |.else | movn CRET1, TMP1, AT + |.endif | or CRET1, CRET1, TMP0 | zextw CARG1, CARG1 | settp CARG1, TISNUM } |1: if (op == BC_FORI) { + |.if MIPSR6 + | selnez TMP2, TMP2, CRET1 + |.else | movz TMP2, r0, CRET1 + |.endif | daddu PC, PC, TMP2 } else if (op == BC_JFORI) { | daddu PC, PC, TMP2 | lhu RD, -4+OFS_RD(PC) } else if (op == BC_IFORL) { + |.if MIPSR6 + | seleqz TMP2, TMP2, CRET1 + |.else | movn TMP2, r0, CRET1 + |.endif | daddu PC, PC, TMP2 } if (vk) { @@ -4776,6 +5057,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | and AT, AT, TMP0 | beqz AT, ->vmeta_for |. slt TMP3, TMP3, r0 + |.if MIPSR6 + | dmtc1 TMP3, FTMP2 + | cmp.lt.d FTMP0, f0, f2 + | cmp.lt.d FTMP1, f2, f0 + | sel.d FTMP2, FTMP1, FTMP0 + | b <1 + |. dmfc1 CRET1, FTMP2 + |.else | c.ole.d 0, f0, f2 | c.ole.d 1, f2, f0 | li CRET1, 1 @@ -4783,12 +5072,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | movt AT, r0, 1 | b <1 |. movn CRET1, AT, TMP3 + |.endif } else { | ldc1 f0, FORL_IDX*8(RA) | ldc1 f4, FORL_STEP*8(RA) | ldc1 f2, FORL_STOP*8(RA) | ld TMP3, FORL_STEP*8(RA) | add.d f0, f0, f4 + |.if MIPSR6 + | slt TMP3, TMP3, r0 + | dmtc1 TMP3, FTMP2 + | cmp.lt.d FTMP0, f0, f2 + | cmp.lt.d FTMP1, f2, f0 + | sel.d FTMP2, FTMP1, FTMP0 + | dmfc1 CRET1, FTMP2 + if (op == BC_IFORL) { + | seleqz TMP2, TMP2, CRET1 + | daddu PC, PC, TMP2 + } + |.else | c.ole.d 0, f0, f2 | c.ole.d 1, f2, f0 | slt TMP3, TMP3, r0 @@ -4801,6 +5103,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | movn TMP2, r0, CRET1 | daddu PC, PC, TMP2 } + |.endif | sdc1 f0, FORL_IDX*8(RA) | ins_next1 | b <2 @@ -4976,8 +5279,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ld TMP0, 0(RA) | sltu AT, RA, RC // Less args than parameters? | move CARG1, TMP0 + |.if MIPSR6 + | selnez TMP0, TMP0, AT + | seleqz TMP3, TISNIL, AT + | or TMP0, TMP0, TMP3 + | seleqz TMP3, CARG1, AT + | selnez CARG1, TISNIL, AT + | or CARG1, CARG1, TMP3 + |.else | movz TMP0, TISNIL, AT // Clear missing parameters. | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC). + |.endif | addiu TMP2, TMP2, -1 | sd TMP0, 16(TMP1) | daddiu TMP1, TMP1, 8 -- cgit v1.2.3-55-g6feb