aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2017-02-20 03:43:10 +0100
committerMike Pall <mike>2017-02-20 03:43:10 +0100
commita25c0b99b84558887887b8e298409dcf8605e5e3 (patch)
tree8cb7b1db3cb0cd4f6cdd59540d39d986b502e471 /src
parent4416e885d28c0f49d2c7bb3f9630ab23c22fbc9a (diff)
downloadluajit-a25c0b99b84558887887b8e298409dcf8605e5e3.tar.gz
luajit-a25c0b99b84558887887b8e298409dcf8605e5e3.tar.bz2
luajit-a25c0b99b84558887887b8e298409dcf8605e5e3.zip
MIPS64, part 2: Add MIPS64 hard-float JIT compiler backend.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. Sponsored by Cisco Systems, Inc.
Diffstat (limited to 'src')
-rw-r--r--src/jit/dis_mips.lua45
-rw-r--r--src/jit/dis_mips64.lua17
-rw-r--r--src/jit/dis_mips64el.lua17
-rw-r--r--src/jit/dump.lua2
-rw-r--r--src/lj_arch.h4
-rw-r--r--src/lj_asm_mips.h673
-rw-r--r--src/lj_emit_mips.h107
-rw-r--r--src/lj_jit.h8
-rw-r--r--src/lj_mcode.c6
-rw-r--r--src/lj_snap.c3
-rw-r--r--src/lj_target_mips.h90
-rw-r--r--src/lj_trace.c11
-rw-r--r--src/vm_mips64.dasc237
13 files changed, 1022 insertions, 198 deletions
diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua
index fdd3d353..a12b8e62 100644
--- a/src/jit/dis_mips.lua
+++ b/src/jit/dis_mips.lua
@@ -34,15 +34,17 @@ local map_special = {
34 "jrS", "jalrD1S", "movzDST", "movnDST", 34 "jrS", "jalrD1S", "movzDST", "movnDST",
35 "syscallY", "breakY", false, "sync", 35 "syscallY", "breakY", false, "sync",
36 "mfhiD", "mthiS", "mfloD", "mtloS", 36 "mfhiD", "mthiS", "mfloD", "mtloS",
37 false, false, false, false, 37 "dsllvDST", false, "dsrlvDST", "dsravDST",
38 "multST", "multuST", "divST", "divuST", 38 "multST", "multuST", "divST", "divuST",
39 false, false, false, false, 39 "dmultST", "dmultuST", "ddivST", "ddivuST",
40 "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", 40 "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T",
41 "andDST", "or|moveDST0", "xorDST", "nor|notDST0", 41 "andDST", "or|moveDST0", "xorDST", "nor|notDST0",
42 false, false, "sltDST", "sltuDST", 42 false, false, "sltDST", "sltuDST",
43 false, false, false, false, 43 "daddDST", "dadduDST", "dsubDST", "dsubuDST",
44 "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", 44 "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ",
45 "teqSTZ", false, "tneSTZ", 45 "teqSTZ", false, "tneSTZ", false,
46 "dsllDTA", false, "dsrlDTA", "dsraDTA",
47 "dsll32DTA", false, "dsrl32DTA", "dsra32DTA",
46} 48}
47 49
48local map_special2 = { 50local map_special2 = {
@@ -60,11 +62,17 @@ local map_bshfl = {
60 [24] = "sehDT", 62 [24] = "sehDT",
61} 63}
62 64
65local map_dbshfl = {
66 shift = 6, mask = 31,
67 [2] = "dsbhDT",
68 [5] = "dshdDT",
69}
70
63local map_special3 = { 71local map_special3 = {
64 shift = 0, mask = 63, 72 shift = 0, mask = 63,
65 [0] = "extTSAK", [4] = "insTSAL", 73 [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK",
66 [32] = map_bshfl, 74 [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL",
67 [59] = "rdhwrTD", 75 [32] = map_bshfl, [36] = map_dbshfl, [59] = "rdhwrTD",
68} 76}
69 77
70local map_regimm = { 78local map_regimm = {
@@ -178,8 +186,8 @@ local map_cop1bc = {
178 186
179local map_cop1 = { 187local map_cop1 = {
180 shift = 21, mask = 31, 188 shift = 21, mask = 31,
181 [0] = "mfc1TG", false, "cfc1TG", "mfhc1TG", 189 [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG",
182 "mtc1TG", false, "ctc1TG", "mthc1TG", 190 "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG",
183 map_cop1bc, false, false, false, 191 map_cop1bc, false, false, false,
184 false, false, false, false, 192 false, false, false, false,
185 map_cop1s, map_cop1d, false, false, 193 map_cop1s, map_cop1d, false, false,
@@ -213,16 +221,16 @@ local map_pri = {
213 "andiTSU", "ori|liTS0U", "xoriTSU", "luiTU", 221 "andiTSU", "ori|liTS0U", "xoriTSU", "luiTU",
214 map_cop0, map_cop1, false, map_cop1x, 222 map_cop0, map_cop1, false, map_cop1x,
215 "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB", 223 "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB",
216 false, false, false, false, 224 "daddiTSI", "daddiuTSI", false, false,
217 map_special2, "jalxJ", false, map_special3, 225 map_special2, "jalxJ", false, map_special3,
218 "lbTSO", "lhTSO", "lwlTSO", "lwTSO", 226 "lbTSO", "lhTSO", "lwlTSO", "lwTSO",
219 "lbuTSO", "lhuTSO", "lwrTSO", false, 227 "lbuTSO", "lhuTSO", "lwrTSO", false,
220 "sbTSO", "shTSO", "swlTSO", "swTSO", 228 "sbTSO", "shTSO", "swlTSO", "swTSO",
221 false, false, "swrTSO", "cacheNSO", 229 false, false, "swrTSO", "cacheNSO",
222 "llTSO", "lwc1HSO", "lwc2TSO", "prefNSO", 230 "llTSO", "lwc1HSO", "lwc2TSO", "prefNSO",
223 false, "ldc1HSO", "ldc2TSO", false, 231 false, "ldc1HSO", "ldc2TSO", "ldTSO",
224 "scTSO", "swc1HSO", "swc2TSO", false, 232 "scTSO", "swc1HSO", "swc2TSO", false,
225 false, "sdc1HSO", "sdc2TSO", false, 233 false, "sdc1HSO", "sdc2TSO", "sdTSO",
226} 234}
227 235
228------------------------------------------------------------------------------ 236------------------------------------------------------------------------------
@@ -306,6 +314,8 @@ local function disass_ins(ctx)
306 x = "f"..band(rshift(op, 21), 31) 314 x = "f"..band(rshift(op, 21), 31)
307 elseif p == "A" then 315 elseif p == "A" then
308 x = band(rshift(op, 6), 31) 316 x = band(rshift(op, 6), 31)
317 elseif p == "E" then
318 x = band(rshift(op, 6), 31) + 32
309 elseif p == "M" then 319 elseif p == "M" then
310 x = band(rshift(op, 11), 31) 320 x = band(rshift(op, 11), 31)
311 elseif p == "N" then 321 elseif p == "N" then
@@ -315,8 +325,12 @@ local function disass_ins(ctx)
315 if x == 0 then x = nil end 325 if x == 0 then x = nil end
316 elseif p == "K" then 326 elseif p == "K" then
317 x = band(rshift(op, 11), 31) + 1 327 x = band(rshift(op, 11), 31) + 1
328 elseif p == "P" then
329 x = band(rshift(op, 11), 31) + 33
318 elseif p == "L" then 330 elseif p == "L" then
319 x = band(rshift(op, 11), 31) - last + 1 331 x = band(rshift(op, 11), 31) - last + 1
332 elseif p == "Q" then
333 x = band(rshift(op, 11), 31) - last + 33
320 elseif p == "I" then 334 elseif p == "I" then
321 x = arshift(lshift(op, 16), 16) 335 x = arshift(lshift(op, 16), 16)
322 elseif p == "U" then 336 elseif p == "U" then
@@ -330,11 +344,12 @@ local function disass_ins(ctx)
330 elseif p == "B" then 344 elseif p == "B" then
331 x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4 345 x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4
332 ctx.rel = x 346 ctx.rel = x
333 x = "0x"..tohex(x) 347 x = format("0x%08x", x)
334 elseif p == "J" then 348 elseif p == "J" then
335 x = band(ctx.addr + ctx.pos, 0xf0000000) + band(op, 0x03ffffff)*4 349 local a = ctx.addr + ctx.pos
350 x = a - band(a, 0x0fffffff) + band(op, 0x03ffffff)*4
336 ctx.rel = x 351 ctx.rel = x
337 x = "0x"..tohex(x) 352 x = format("0x%08x", x)
338 elseif p == "V" then 353 elseif p == "V" then
339 x = band(rshift(op, 8), 7) 354 x = band(rshift(op, 8), 7)
340 if x == 0 then x = nil end 355 if x == 0 then x = nil end
diff --git a/src/jit/dis_mips64.lua b/src/jit/dis_mips64.lua
new file mode 100644
index 00000000..c4374928
--- /dev/null
+++ b/src/jit/dis_mips64.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64 disassembler wrapper module.
3--
4-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the big-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create,
14 disass = dis_mips.disass,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64el.lua
new file mode 100644
index 00000000..2b1470af
--- /dev/null
+++ b/src/jit/dis_mips64el.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64EL disassembler wrapper module.
3--
4-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the little-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create_el,
14 disass = dis_mips.disass_el,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dump.lua b/src/jit/dump.lua
index 898ce9a1..2bea652b 100644
--- a/src/jit/dump.lua
+++ b/src/jit/dump.lua
@@ -85,7 +85,7 @@ local nexitsym = 0
85local function fillsymtab_tr(tr, nexit) 85local function fillsymtab_tr(tr, nexit)
86 local t = {} 86 local t = {}
87 symtabmt.__index = t 87 symtabmt.__index = t
88 if jit.arch == "mips" or jit.arch == "mipsel" then 88 if jit.arch:sub(1, 4) == "mips" then
89 t[traceexitstub(tr, 0)] = "exit" 89 t[traceexitstub(tr, 0)] = "exit"
90 return 90 return
91 end 91 end
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 001111da..9bf6f481 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -332,10 +332,12 @@
332#define LJ_ARCH_BITS 32 332#define LJ_ARCH_BITS 32
333#define LJ_TARGET_MIPS32 1 333#define LJ_TARGET_MIPS32 1
334#else 334#else
335#if LJ_ABI_SOFTFP || !LJ_ARCH_HASFPU
336#define LJ_ARCH_NOJIT 1 /* NYI */
337#endif
335#define LJ_ARCH_BITS 64 338#define LJ_ARCH_BITS 64
336#define LJ_TARGET_MIPS64 1 339#define LJ_TARGET_MIPS64 1
337#define LJ_TARGET_GC64 1 340#define LJ_TARGET_GC64 1
338#define LJ_ARCH_NOJIT 1 /* NYI */
339#endif 341#endif
340#define LJ_TARGET_MIPS 1 342#define LJ_TARGET_MIPS 1
341#define LJ_TARGET_EHRETREG 4 343#define LJ_TARGET_EHRETREG 4
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index dd821c70..affe7d89 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -23,7 +23,7 @@ static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow)
23{ 23{
24 Reg r = IR(ref)->r; 24 Reg r = IR(ref)->r;
25 if (ra_noreg(r)) { 25 if (ra_noreg(r)) {
26 if (!(allow & RSET_FPR) && irref_isk(ref) && IR(ref)->i == 0) 26 if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(IR(ref)) == 0)
27 return RID_ZERO; 27 return RID_ZERO;
28 r = ra_allocref(as, ref, allow); 28 r = ra_allocref(as, ref, allow);
29 } else { 29 } else {
@@ -166,9 +166,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
166 } else if (ir->o == IR_UREFC) { 166 } else if (ir->o == IR_UREFC) {
167 if (irref_isk(ir->op1)) { 167 if (irref_isk(ir->op1)) {
168 GCfunc *fn = ir_kfunc(IR(ir->op1)); 168 GCfunc *fn = ir_kfunc(IR(ir->op1));
169 int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); 169 intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv;
170 int32_t jgl = (intptr_t)J2G(as->J); 170 intptr_t jgl = (intptr_t)J2G(as->J);
171 if ((uint32_t)(ofs-jgl) < 65536) { 171 if ((uintptr_t)(ofs-jgl) < 65536) {
172 *ofsp = ofs-jgl-32768; 172 *ofsp = ofs-jgl-32768;
173 return RID_JGL; 173 return RID_JGL;
174 } else { 174 } else {
@@ -190,20 +190,21 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
190 Reg base; 190 Reg base;
191 if (ra_noreg(ir->r) && canfuse(as, ir)) { 191 if (ra_noreg(ir->r) && canfuse(as, ir)) {
192 if (ir->o == IR_ADD) { 192 if (ir->o == IR_ADD) {
193 int32_t ofs2; 193 intptr_t ofs2;
194 if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) { 194 if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(IR(ir->op2)),
195 checki16(ofs2))) {
195 ref = ir->op1; 196 ref = ir->op1;
196 ofs = ofs2; 197 ofs = (int32_t)ofs2;
197 } 198 }
198 } else if (ir->o == IR_STRREF) { 199 } else if (ir->o == IR_STRREF) {
199 int32_t ofs2 = 65536; 200 intptr_t ofs2 = 65536;
200 lua_assert(ofs == 0); 201 lua_assert(ofs == 0);
201 ofs = (int32_t)sizeof(GCstr); 202 ofs = (int32_t)sizeof(GCstr);
202 if (irref_isk(ir->op2)) { 203 if (irref_isk(ir->op2)) {
203 ofs2 = ofs + IR(ir->op2)->i; 204 ofs2 = ofs + get_kval(IR(ir->op2));
204 ref = ir->op1; 205 ref = ir->op1;
205 } else if (irref_isk(ir->op1)) { 206 } else if (irref_isk(ir->op1)) {
206 ofs2 = ofs + IR(ir->op1)->i; 207 ofs2 = ofs + get_kval(IR(ir->op1));
207 ref = ir->op2; 208 ref = ir->op2;
208 } 209 }
209 if (!checki16(ofs2)) { 210 if (!checki16(ofs2)) {
@@ -211,7 +212,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
211 Reg right, left = ra_alloc2(as, ir, allow); 212 Reg right, left = ra_alloc2(as, ir, allow);
212 right = (left >> 8); left &= 255; 213 right = (left >> 8); left &= 255;
213 emit_hsi(as, mi, rt, RID_TMP, ofs); 214 emit_hsi(as, mi, rt, RID_TMP, ofs);
214 emit_dst(as, MIPSI_ADDU, RID_TMP, left, right); 215 emit_dst(as, MIPSI_AADDU, RID_TMP, left, right);
215 return; 216 return;
216 } 217 }
217 ofs = ofs2; 218 ofs = ofs2;
@@ -227,7 +228,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
227static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 228static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
228{ 229{
229 uint32_t n, nargs = CCI_XNARGS(ci); 230 uint32_t n, nargs = CCI_XNARGS(ci);
230 int32_t ofs = 16; 231 int32_t ofs = LJ_32 ? 16 : 0;
231#if LJ_SOFTFP 232#if LJ_SOFTFP
232 Reg gpr = REGARG_FIRSTGPR; 233 Reg gpr = REGARG_FIRSTGPR;
233#else 234#else
@@ -249,15 +250,15 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
249 !(ci->flags & CCI_VARARG)) { 250 !(ci->flags & CCI_VARARG)) {
250 lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ 251 lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */
251 ra_leftov(as, fpr, ref); 252 ra_leftov(as, fpr, ref);
252 fpr += 2; 253 fpr += LJ_32 ? 2 : 1;
253 gpr += irt_isnum(ir->t) ? 2 : 1; 254 gpr += (LJ_32 && irt_isnum(ir->t)) ? 2 : 1;
254 } else 255 } else
255#endif 256#endif
256 { 257 {
257#if !LJ_SOFTFP 258#if LJ_32 && !LJ_SOFTFP
258 fpr = REGARG_LASTFPR+1; 259 fpr = REGARG_LASTFPR+1;
259#endif 260#endif
260 if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1; 261 if (LJ_32 && irt_isnum(ir->t)) gpr = (gpr+1) & ~1;
261 if (gpr <= REGARG_LASTGPR) { 262 if (gpr <= REGARG_LASTGPR) {
262 lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ 263 lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */
263#if !LJ_SOFTFP 264#if !LJ_SOFTFP
@@ -269,35 +270,55 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
269 r = ra_alloc1(as, ref, RSET_FPR); 270 r = ra_alloc1(as, ref, RSET_FPR);
270 as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); 271 as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
271 if (irt_isnum(ir->t)) { 272 if (irt_isnum(ir->t)) {
273#if LJ_32
272 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1); 274 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1);
273 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r); 275 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r);
274 lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */ 276 lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */
275 gpr += 2; 277 gpr += 2;
278#else
279 emit_tg(as, MIPSI_DMFC1, gpr, r);
280 gpr++; fpr++;
281#endif
276 } else if (irt_isfloat(ir->t)) { 282 } else if (irt_isfloat(ir->t)) {
277 emit_tg(as, MIPSI_MFC1, gpr, r); 283 emit_tg(as, MIPSI_MFC1, gpr, r);
278 gpr++; 284 gpr++;
285#if LJ_64
286 fpr++;
287#endif
279 } 288 }
280 } else 289 } else
281#endif 290#endif
282 { 291 {
283 ra_leftov(as, gpr, ref); 292 ra_leftov(as, gpr, ref);
284 gpr++; 293 gpr++;
294#if LJ_64
295 fpr++;
296#endif
285 } 297 }
286 } else { 298 } else {
287 Reg r = ra_alloc1z(as, ref, !LJ_SOFTFP && irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 299 Reg r = ra_alloc1z(as, ref, !LJ_SOFTFP && irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
300#if LJ_32
288 if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; 301 if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4;
289 emit_spstore(as, ir, r, ofs); 302 emit_spstore(as, ir, r, ofs);
290 ofs += irt_isnum(ir->t) ? 8 : 4; 303 ofs += irt_isnum(ir->t) ? 8 : 4;
304#else
305 emit_spstore(as, ir, r, ofs + ((LJ_BE && (LJ_SOFTFP || r < RID_MAX_GPR) && !irt_is64(ir->t)) ? 4 : 0));
306 ofs += 8;
307#endif
291 } 308 }
292 } 309 }
293 } else { 310 } else {
294#if !LJ_SOFTFP 311#if !LJ_SOFTFP
295 fpr = REGARG_LASTFPR+1; 312 fpr = REGARG_LASTFPR+1;
296#endif 313#endif
297 if (gpr <= REGARG_LASTGPR) 314 if (gpr <= REGARG_LASTGPR) {
298 gpr++; 315 gpr++;
299 else 316#if LJ_64
300 ofs += 4; 317 fpr++;
318#endif
319 } else {
320 ofs += LJ_32 ? 4 : 8;
321 }
301 } 322 }
302 checkmclim(as); 323 checkmclim(as);
303 } 324 }
@@ -307,15 +328,19 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
307static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) 328static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
308{ 329{
309 RegSet drop = RSET_SCRATCH; 330 RegSet drop = RSET_SCRATCH;
331#if LJ_32
310 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); 332 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
333#endif
311#if !LJ_SOFTFP 334#if !LJ_SOFTFP
312 if ((ci->flags & CCI_NOFPRCLOBBER)) 335 if ((ci->flags & CCI_NOFPRCLOBBER))
313 drop &= ~RSET_FPR; 336 drop &= ~RSET_FPR;
314#endif 337#endif
315 if (ra_hasreg(ir->r)) 338 if (ra_hasreg(ir->r))
316 rset_clear(drop, ir->r); /* Dest reg handled below. */ 339 rset_clear(drop, ir->r); /* Dest reg handled below. */
340#if LJ_32
317 if (hiop && ra_hasreg((ir+1)->r)) 341 if (hiop && ra_hasreg((ir+1)->r))
318 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ 342 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
343#endif
319 ra_evictset(as, drop); /* Evictions must be performed first. */ 344 ra_evictset(as, drop); /* Evictions must be performed first. */
320 if (ra_used(ir)) { 345 if (ra_used(ir)) {
321 lua_assert(!irt_ispri(ir->t)); 346 lua_assert(!irt_ispri(ir->t));
@@ -326,18 +351,28 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
326 if (ra_hasreg(dest)) { 351 if (ra_hasreg(dest)) {
327 ra_free(as, dest); 352 ra_free(as, dest);
328 ra_modified(as, dest); 353 ra_modified(as, dest);
354#if LJ_32
329 emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1); 355 emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1);
330 emit_tg(as, MIPSI_MTC1, RID_RETLO, dest); 356 emit_tg(as, MIPSI_MTC1, RID_RETLO, dest);
357#else
358 emit_tg(as, MIPSI_DMTC1, RID_RET, dest);
359#endif
331 } 360 }
332 if (ofs) { 361 if (ofs) {
362#if LJ_32
333 emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0)); 363 emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0));
334 emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4)); 364 emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4));
365#else
366 emit_tsi(as, MIPSI_SD, RID_RET, RID_SP, ofs);
367#endif
335 } 368 }
336 } else { 369 } else {
337 ra_destreg(as, ir, RID_FPRET); 370 ra_destreg(as, ir, RID_FPRET);
338 } 371 }
372#if LJ_32
339 } else if (hiop) { 373 } else if (hiop) {
340 ra_destpair(as, ir); 374 ra_destpair(as, ir);
375#endif
341 } else { 376 } else {
342 ra_destreg(as, ir, RID_RET); 377 ra_destreg(as, ir, RID_RET);
343 } 378 }
@@ -356,7 +391,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
356 func = ir->op2; irf = IR(func); 391 func = ir->op2; irf = IR(func);
357 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } 392 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
358 if (irref_isk(func)) { /* Call to constant address. */ 393 if (irref_isk(func)) { /* Call to constant address. */
359 ci.func = (ASMFunction)(void *)(irf->i); 394 ci.func = (ASMFunction)(void *)get_kval(irf);
360 } else { /* Need specific register for indirect calls. */ 395 } else { /* Need specific register for indirect calls. */
361 Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); 396 Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR));
362 MCode *p = as->mcp; 397 MCode *p = as->mcp;
@@ -399,8 +434,8 @@ static void asm_retf(ASMState *as, IRIns *ir)
399 emit_setgl(as, base, jit_base); 434 emit_setgl(as, base, jit_base);
400 emit_addptr(as, base, -8*delta); 435 emit_addptr(as, base, -8*delta);
401 asm_guard(as, MIPSI_BNE, RID_TMP, 436 asm_guard(as, MIPSI_BNE, RID_TMP,
402 ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); 437 ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base)));
403 emit_tsi(as, MIPSI_LW, RID_TMP, base, -8); 438 emit_tsi(as, MIPSI_AL, RID_TMP, base, -8);
404} 439}
405 440
406/* -- Type conversions ---------------------------------------------------- */ 441/* -- Type conversions ---------------------------------------------------- */
@@ -435,10 +470,15 @@ static void asm_conv(ASMState *as, IRIns *ir)
435#if !LJ_SOFTFP 470#if !LJ_SOFTFP
436 int stfp = (st == IRT_NUM || st == IRT_FLOAT); 471 int stfp = (st == IRT_NUM || st == IRT_FLOAT);
437#endif 472#endif
473#if LJ_64
474 int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
475#endif
438 IRRef lref = ir->op1; 476 IRRef lref = ir->op1;
477#if LJ_32
439 lua_assert(!(irt_isint64(ir->t) || 478 lua_assert(!(irt_isint64(ir->t) ||
440 (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ 479 (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
441#if LJ_SOFTFP 480#endif
481#if LJ_32 && LJ_SOFTFP
442 /* FP conversions are handled by SPLIT. */ 482 /* FP conversions are handled by SPLIT. */
443 lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); 483 lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
444 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ 484 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
@@ -463,11 +503,40 @@ static void asm_conv(ASMState *as, IRIns *ir)
463 emit_tg(as, MIPSI_MTC1, RID_TMP, dest); 503 emit_tg(as, MIPSI_MTC1, RID_TMP, dest);
464 emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); 504 emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left);
465 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); 505 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
506#if LJ_64
507 } else if(st == IRT_U64) { /* U64 to FP conversion. */
508 /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */
509 Reg left = ra_alloc1(as, lref, RSET_GPR);
510 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest));
511 MCLabel l_end = emit_label(as);
512 if (irt_isfloat(ir->t)) {
513 emit_fgh(as, MIPSI_ADD_S, dest, dest, tmp);
514 emit_lsptr(as, MIPSI_LWC1, (tmp & 31), (void *)&as->J->k32[LJ_K32_2P63],
515 rset_exclude(RSET_GPR, left));
516 emit_fg(as, MIPSI_CVT_S_L, dest, dest);
517 } else {
518 emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp);
519 emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P63],
520 rset_exclude(RSET_GPR, left));
521 emit_fg(as, MIPSI_CVT_D_L, dest, dest);
522 }
523 emit_branch(as, MIPSI_BGEZ, left, RID_ZERO, l_end);
524 emit_tg(as, MIPSI_DMTC1, RID_TMP, dest);
525 emit_tsml(as, MIPSI_DEXTM, RID_TMP, left, 30, 0);
526#endif
466 } else { /* Integer to FP conversion. */ 527 } else { /* Integer to FP conversion. */
467 Reg left = ra_alloc1(as, lref, RSET_GPR); 528 Reg left = ra_alloc1(as, lref, RSET_GPR);
529#if LJ_32
468 emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W, 530 emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W,
469 dest, dest); 531 dest, dest);
470 emit_tg(as, MIPSI_MTC1, left, dest); 532 emit_tg(as, MIPSI_MTC1, left, dest);
533#else
534 MIPSIns mi = irt_isfloat(ir->t) ?
535 (st64 ? MIPSI_CVT_S_L : MIPSI_CVT_S_W) :
536 (st64 ? MIPSI_CVT_D_L : MIPSI_CVT_D_W);
537 emit_fg(as, mi, dest, dest);
538 emit_tg(as, st64 ? MIPSI_DMTC1 : MIPSI_MTC1, left, dest);
539#endif
471 } 540 }
472 } else if (stfp) { /* FP to integer conversion. */ 541 } else if (stfp) { /* FP to integer conversion. */
473 if (irt_isguard(ir->t)) { 542 if (irt_isguard(ir->t)) {
@@ -478,7 +547,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
478 Reg dest = ra_dest(as, ir, RSET_GPR); 547 Reg dest = ra_dest(as, ir, RSET_GPR);
479 Reg left = ra_alloc1(as, lref, RSET_FPR); 548 Reg left = ra_alloc1(as, lref, RSET_FPR);
480 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 549 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
481 if (irt_isu32(ir->t)) { 550 if (irt_isu32(ir->t)) { /* FP to U32 conversion. */
482 /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ 551 /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */
483 emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP); 552 emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP);
484 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); 553 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
@@ -493,10 +562,50 @@ static void asm_conv(ASMState *as, IRIns *ir)
493 else 562 else
494 emit_lsptr(as, MIPSI_LDC1, (tmp & 31), 563 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
495 (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); 564 (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
565#if LJ_64
566 } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */
567 MCLabel l_end;
568 emit_tg(as, MIPSI_DMFC1, dest, tmp);
569 l_end = emit_label(as);
570 /* For inputs >= 2^63 add -2^64 and convert again. */
571 if (st == IRT_NUM) {
572 emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp);
573 emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp);
574 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
575 (void *)&as->J->k64[LJ_K64_M2P64],
576 rset_exclude(RSET_GPR, dest));
577 emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */
578 emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
579 emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp);
580 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
581 (void *)&as->J->k64[LJ_K64_2P63],
582 rset_exclude(RSET_GPR, dest));
583 } else {
584 emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp);
585 emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp);
586 emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
587 (void *)&as->J->k32[LJ_K32_M2P64],
588 rset_exclude(RSET_GPR, dest));
589 emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */
590 emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
591 emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp);
592 emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
593 (void *)&as->J->k32[LJ_K32_2P63],
594 rset_exclude(RSET_GPR, dest));
595 }
596#endif
496 } else { 597 } else {
598#if LJ_32
497 emit_tg(as, MIPSI_MFC1, dest, tmp); 599 emit_tg(as, MIPSI_MFC1, dest, tmp);
498 emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, 600 emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D,
499 tmp, left); 601 tmp, left);
602#else
603 MIPSIns mi = irt_is64(ir->t) ?
604 (st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) :
605 (st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S);
606 emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left);
607 emit_fg(as, mi, left, left);
608#endif
500 } 609 }
501 } 610 }
502 } else 611 } else
@@ -507,7 +616,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
507 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 616 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
508 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); 617 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
509 if ((ir->op2 & IRCONV_SEXT)) { 618 if ((ir->op2 & IRCONV_SEXT)) {
510 if ((as->flags & JIT_F_MIPSXXR2)) { 619 if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) {
511 emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); 620 emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left);
512 } else { 621 } else {
513 uint32_t shift = st == IRT_I8 ? 24 : 16; 622 uint32_t shift = st == IRT_I8 ? 24 : 16;
@@ -519,8 +628,35 @@ static void asm_conv(ASMState *as, IRIns *ir)
519 (int32_t)(st == IRT_U8 ? 0xff : 0xffff)); 628 (int32_t)(st == IRT_U8 ? 0xff : 0xffff));
520 } 629 }
521 } else { /* 32/64 bit integer conversions. */ 630 } else { /* 32/64 bit integer conversions. */
631#if LJ_32
522 /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ 632 /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */
523 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ 633 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
634#else
635 if (irt_is64(ir->t)) {
636 if (st64) {
637 /* 64/64 bit no-op (cast)*/
638 ra_leftov(as, dest, lref);
639 } else {
640 Reg left = ra_alloc1(as, lref, RSET_GPR);
641 if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */
642 emit_dta(as, MIPSI_SLL, dest, left, 0);
643 } else { /* 32 to 64 bit zero extension. */
644 emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0);
645 }
646 }
647 } else {
648 if (st64) {
649 /* This is either a 32 bit reg/reg mov which zeroes the hiword
650 ** or a load of the loword from a 64 bit address.
651 */
652 Reg left = ra_alloc1(as, lref, RSET_GPR);
653 emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0);
654 } else { /* 32/32 bit no-op (cast). */
655 /* Do nothing, but may need to move regs. */
656 ra_leftov(as, dest, lref);
657 }
658 }
659#endif
524 } 660 }
525 } 661 }
526} 662}
@@ -563,23 +699,50 @@ static void asm_strto(ASMState *as, IRIns *ir)
563 args[1] = ASMREF_TMP1; /* TValue *n */ 699 args[1] = ASMREF_TMP1; /* TValue *n */
564 asm_gencall(as, ci, args); 700 asm_gencall(as, ci, args);
565 /* Store the result to the spill slot or temp slots. */ 701 /* Store the result to the spill slot or temp slots. */
566 emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), 702 emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1),
567 RID_SP, ofs); 703 RID_SP, ofs);
568} 704}
569 705
570/* -- Memory references --------------------------------------------------- */ 706/* -- Memory references --------------------------------------------------- */
571 707
708#if LJ_64
709/* Store tagged value for ref at base+ofs. */
710static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
711{
712 RegSet allow = rset_exclude(RSET_GPR, base);
713 IRIns *ir = IR(ref);
714 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
715 if (irref_isk(ref)) {
716 TValue k;
717 lj_ir_kvalue(as->J->L, &k, ir);
718 emit_tsi(as, MIPSI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs);
719 } else {
720 Reg src = ra_alloc1(as, ref, allow);
721 Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47,
722 rset_exclude(allow, src));
723 emit_tsi(as, MIPSI_SD, RID_TMP, base, ofs);
724 if (irt_isinteger(ir->t)) {
725 emit_dst(as, MIPSI_DADDU, RID_TMP, RID_TMP, type);
726 emit_tsml(as, MIPSI_DEXT, RID_TMP, src, 31, 0);
727 } else {
728 emit_dst(as, MIPSI_DADDU, RID_TMP, src, type);
729 }
730 }
731}
732#endif
733
572/* Get pointer to TValue. */ 734/* Get pointer to TValue. */
573static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 735static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
574{ 736{
575 IRIns *ir = IR(ref); 737 IRIns *ir = IR(ref);
576 if (irt_isnum(ir->t)) { 738 if (irt_isnum(ir->t)) {
577 if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ 739 if (irref_isk(ref)) /* Use the number constant itself as a TValue. */
578 ra_allockreg(as, i32ptr(ir_knum(ir)), dest); 740 ra_allockreg(as, igcptr(ir_knum(ir)), dest);
579 else /* Otherwise force a spill and use the spill slot. */ 741 else /* Otherwise force a spill and use the spill slot. */
580 emit_tsi(as, MIPSI_ADDIU, dest, RID_SP, ra_spill(as, ir)); 742 emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir));
581 } else { 743 } else {
582 /* Otherwise use g->tmptv to hold the TValue. */ 744 /* Otherwise use g->tmptv to hold the TValue. */
745#if LJ_32
583 RegSet allow = rset_exclude(RSET_GPR, dest); 746 RegSet allow = rset_exclude(RSET_GPR, dest);
584 Reg type; 747 Reg type;
585 emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, (int32_t)(offsetof(global_State, tmptv)-32768)); 748 emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, (int32_t)(offsetof(global_State, tmptv)-32768));
@@ -592,6 +755,11 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
592 else 755 else
593 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 756 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
594 emit_setgl(as, type, tmptv.it); 757 emit_setgl(as, type, tmptv.it);
758#else
759 asm_tvstore64(as, dest, 0, ref);
760 emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL,
761 (int32_t)(offsetof(global_State, tmptv)-32768));
762#endif
595 } 763 }
596} 764}
597 765
@@ -606,13 +774,13 @@ static void asm_aref(ASMState *as, IRIns *ir)
606 ofs += 8*IR(ir->op2)->i; 774 ofs += 8*IR(ir->op2)->i;
607 if (checki16(ofs)) { 775 if (checki16(ofs)) {
608 base = ra_alloc1(as, refa, RSET_GPR); 776 base = ra_alloc1(as, refa, RSET_GPR);
609 emit_tsi(as, MIPSI_ADDIU, dest, base, ofs); 777 emit_tsi(as, MIPSI_AADDIU, dest, base, ofs);
610 return; 778 return;
611 } 779 }
612 } 780 }
613 base = ra_alloc1(as, ir->op1, RSET_GPR); 781 base = ra_alloc1(as, ir->op1, RSET_GPR);
614 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); 782 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
615 emit_dst(as, MIPSI_ADDU, dest, RID_TMP, base); 783 emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base);
616 emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); 784 emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3);
617} 785}
618 786
@@ -633,13 +801,14 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
633 Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; 801 Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2;
634 IRRef refkey = ir->op2; 802 IRRef refkey = ir->op2;
635 IRIns *irkey = IR(refkey); 803 IRIns *irkey = IR(refkey);
804 int isk = irref_isk(refkey);
636 IRType1 kt = irkey->t; 805 IRType1 kt = irkey->t;
637 uint32_t khash; 806 uint32_t khash;
638 MCLabel l_end, l_loop, l_next; 807 MCLabel l_end, l_loop, l_next;
639 808
640 rset_clear(allow, tab); 809 rset_clear(allow, tab);
641#if LJ_SOFTFP 810#if LJ_32 && LJ_SOFTFP
642 if (!irref_isk(refkey)) { 811 if (!isk) {
643 key = ra_alloc1(as, refkey, allow); 812 key = ra_alloc1(as, refkey, allow);
644 rset_clear(allow, key); 813 rset_clear(allow, key);
645 if (irkey[1].o == IR_HIOP) { 814 if (irkey[1].o == IR_HIOP) {
@@ -664,8 +833,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
664 } else if (!irt_ispri(kt)) { 833 } else if (!irt_ispri(kt)) {
665 key = ra_alloc1(as, refkey, allow); 834 key = ra_alloc1(as, refkey, allow);
666 rset_clear(allow, key); 835 rset_clear(allow, key);
836#if LJ_32
667 type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow); 837 type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow);
668 rset_clear(allow, type); 838 rset_clear(allow, type);
839#endif
669 } 840 }
670#endif 841#endif
671 tmp2 = ra_scratch(as, allow); 842 tmp2 = ra_scratch(as, allow);
@@ -679,9 +850,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
679 else if (destused) 850 else if (destused)
680 emit_loada(as, dest, niltvg(J2G(as->J))); 851 emit_loada(as, dest, niltvg(J2G(as->J)));
681 /* Follow hash chain until the end. */ 852 /* Follow hash chain until the end. */
682 emit_move(as, dest, tmp2); 853 emit_move(as, dest, tmp1);
683 l_loop = --as->mcp; 854 l_loop = --as->mcp;
684 emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, next)); 855 emit_tsi(as, MIPSI_AL, tmp1, dest, (int32_t)offsetof(Node, next));
685 l_next = emit_label(as); 856 l_next = emit_label(as);
686 857
687 /* Type and value comparison. */ 858 /* Type and value comparison. */
@@ -693,38 +864,66 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
693 emit_branch(as, MIPSI_BC1T, 0, 0, l_end); 864 emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
694 emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); 865 emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
695 *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */ 866 *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */
696 emit_branch(as, MIPSI_BEQ, tmp2, RID_ZERO, l_next); 867 emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next);
697 emit_tsi(as, MIPSI_SLTIU, tmp2, tmp2, (int32_t)LJ_TISNUM); 868 emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM);
869#if LJ_32
698 emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n)); 870 emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n));
699 } else { 871 } else {
700 if (irt_ispri(kt)) { 872 if (irt_ispri(kt)) {
701 emit_branch(as, MIPSI_BEQ, tmp2, type, l_end); 873 emit_branch(as, MIPSI_BEQ, tmp1, type, l_end);
702 } else { 874 } else {
703 emit_branch(as, MIPSI_BEQ, tmp1, key, l_end); 875 emit_branch(as, MIPSI_BEQ, tmp2, key, l_end);
704 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.gcr)); 876 emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
705 emit_branch(as, MIPSI_BNE, tmp2, type, l_next); 877 emit_branch(as, MIPSI_BNE, tmp1, type, l_next);
878 }
879 }
880 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it));
881 *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
882#else
883 emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15);
884 emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum);
885 emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
886 } else if (irt_isaddr(kt)) {
887 Reg refk = tmp2;
888 if (isk) {
889 int64_t k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
890 refk = ra_allock(as, k, allow);
891 rset_clear(allow, refk);
706 } 892 }
893 emit_branch(as, MIPSI_BEQ, tmp1, refk, l_end);
894 emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key));
895 } else {
896 Reg pri = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
897 rset_clear(allow, pri);
898 lua_assert(irt_ispri(kt) && !irt_isnil(kt));
899 emit_branch(as, MIPSI_BEQ, tmp1, pri, l_end);
900 emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key));
901 }
902 *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
903 if (!isk && irt_isaddr(kt)) {
904 type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow);
905 emit_dst(as, MIPSI_DADDU, tmp2, key, type);
906 rset_clear(allow, type);
707 } 907 }
708 emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.it)); 908#endif
709 *l_loop = MIPSI_BNE | MIPSF_S(tmp2) | ((as->mcp-l_loop-1) & 0xffffu);
710 909
711 /* Load main position relative to tab->node into dest. */ 910 /* Load main position relative to tab->node into dest. */
712 khash = irref_isk(refkey) ? ir_khash(irkey) : 1; 911 khash = isk ? ir_khash(irkey) : 1;
713 if (khash == 0) { 912 if (khash == 0) {
714 emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); 913 emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node));
715 } else { 914 } else {
716 Reg tmphash = tmp1; 915 Reg tmphash = tmp1;
717 if (irref_isk(refkey)) 916 if (isk)
718 tmphash = ra_allock(as, khash, allow); 917 tmphash = ra_allock(as, khash, allow);
719 emit_dst(as, MIPSI_ADDU, dest, dest, tmp1); 918 emit_dst(as, MIPSI_AADDU, dest, dest, tmp1);
720 lua_assert(sizeof(Node) == 24); 919 lua_assert(sizeof(Node) == 24);
721 emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1); 920 emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1);
722 emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3); 921 emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3);
723 emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5); 922 emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5);
724 emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash); 923 emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash);
725 emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); 924 emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node));
726 emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); 925 emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
727 if (irref_isk(refkey)) { 926 if (isk) {
728 /* Nothing to do. */ 927 /* Nothing to do. */
729 } else if (irt_isstr(kt)) { 928 } else if (irt_isstr(kt)) {
730 emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash)); 929 emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash));
@@ -734,6 +933,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
734 emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2); 933 emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2);
735 emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31); 934 emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31);
736 emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest); 935 emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest);
936#if LJ_32
737 if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { 937 if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
738 emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); 938 emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1);
739 if ((as->flags & JIT_F_MIPSXXR2)) { 939 if ((as->flags & JIT_F_MIPSXXR2)) {
@@ -756,6 +956,23 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
756 emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31); 956 emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31);
757 emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow)); 957 emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow));
758 } 958 }
959#else
960 emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1);
961 emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
962 if (irt_isnum(kt)) {
963 emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1);
964 emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0);
965 emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0);
966#if !LJ_SOFTFP
967 emit_tg(as, MIPSI_DMFC1, tmp1, key);
968#endif
969 } else {
970 checkmclim(as);
971 emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0);
972 emit_dta(as, MIPSI_SLL, tmp2, key, 0);
973 emit_dst(as, MIPSI_DADDU, tmp1, key, type);
974 }
975#endif
759 } 976 }
760 } 977 }
761} 978}
@@ -768,17 +985,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
768 int32_t kofs = ofs + (int32_t)offsetof(Node, key); 985 int32_t kofs = ofs + (int32_t)offsetof(Node, key);
769 Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; 986 Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
770 Reg node = ra_alloc1(as, ir->op1, RSET_GPR); 987 Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
771 Reg key = RID_NONE, type = RID_TMP, idx = node;
772 RegSet allow = rset_exclude(RSET_GPR, node); 988 RegSet allow = rset_exclude(RSET_GPR, node);
989 Reg idx = node;
990#if LJ_32
991 Reg key = RID_NONE, type = RID_TMP;
773 int32_t lo, hi; 992 int32_t lo, hi;
993#else
994 Reg key = ra_scratch(as, allow);
995 int64_t k;
996#endif
774 lua_assert(ofs % sizeof(Node) == 0); 997 lua_assert(ofs % sizeof(Node) == 0);
775 if (ofs > 32736) { 998 if (ofs > 32736) {
776 idx = dest; 999 idx = dest;
777 rset_clear(allow, dest); 1000 rset_clear(allow, dest);
778 kofs = (int32_t)offsetof(Node, key); 1001 kofs = (int32_t)offsetof(Node, key);
779 } else if (ra_hasreg(dest)) { 1002 } else if (ra_hasreg(dest)) {
780 emit_tsi(as, MIPSI_ADDIU, dest, node, ofs); 1003 emit_tsi(as, MIPSI_AADDIU, dest, node, ofs);
781 } 1004 }
1005#if LJ_32
782 if (!irt_ispri(irkey->t)) { 1006 if (!irt_ispri(irkey->t)) {
783 key = ra_scratch(as, allow); 1007 key = ra_scratch(as, allow);
784 rset_clear(allow, key); 1008 rset_clear(allow, key);
@@ -797,8 +1021,20 @@ nolo:
797 asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO); 1021 asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO);
798 if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0)); 1022 if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0));
799 emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4)); 1023 emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4));
1024#else
1025 if (irt_ispri(irkey->t)) {
1026 lua_assert(!irt_isnil(irkey->t));
1027 k = ~((int64_t)~irt_toitype(irkey->t) << 47);
1028 } else if (irt_isnum(irkey->t)) {
1029 k = (int64_t)ir_knum(irkey)->u64;
1030 } else {
1031 k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey);
1032 }
1033 asm_guard(as, MIPSI_BNE, key, ra_allock(as, k, allow));
1034 emit_tsi(as, MIPSI_LD, key, idx, kofs);
1035#endif
800 if (ofs > 32736) 1036 if (ofs > 32736)
801 emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow)); 1037 emit_tsi(as, MIPSI_AADDU, dest, node, ra_allock(as, ofs, allow));
802} 1038}
803 1039
804static void asm_uref(ASMState *as, IRIns *ir) 1040static void asm_uref(ASMState *as, IRIns *ir)
@@ -807,19 +1043,19 @@ static void asm_uref(ASMState *as, IRIns *ir)
807 if (irref_isk(ir->op1)) { 1043 if (irref_isk(ir->op1)) {
808 GCfunc *fn = ir_kfunc(IR(ir->op1)); 1044 GCfunc *fn = ir_kfunc(IR(ir->op1));
809 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; 1045 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
810 emit_lsptr(as, MIPSI_LW, dest, v, RSET_GPR); 1046 emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR);
811 } else { 1047 } else {
812 Reg uv = ra_scratch(as, RSET_GPR); 1048 Reg uv = ra_scratch(as, RSET_GPR);
813 Reg func = ra_alloc1(as, ir->op1, RSET_GPR); 1049 Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
814 if (ir->o == IR_UREFC) { 1050 if (ir->o == IR_UREFC) {
815 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); 1051 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
816 emit_tsi(as, MIPSI_ADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); 1052 emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
817 emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); 1053 emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
818 } else { 1054 } else {
819 emit_tsi(as, MIPSI_LW, dest, uv, (int32_t)offsetof(GCupval, v)); 1055 emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v));
820 } 1056 }
821 emit_tsi(as, MIPSI_LW, uv, func, 1057 emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) +
822 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); 1058 (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
823 } 1059 }
824} 1060}
825 1061
@@ -831,6 +1067,7 @@ static void asm_fref(ASMState *as, IRIns *ir)
831 1067
832static void asm_strref(ASMState *as, IRIns *ir) 1068static void asm_strref(ASMState *as, IRIns *ir)
833{ 1069{
1070#if LJ_32
834 Reg dest = ra_dest(as, ir, RSET_GPR); 1071 Reg dest = ra_dest(as, ir, RSET_GPR);
835 IRRef ref = ir->op2, refk = ir->op1; 1072 IRRef ref = ir->op2, refk = ir->op1;
836 int32_t ofs = (int32_t)sizeof(GCstr); 1073 int32_t ofs = (int32_t)sizeof(GCstr);
@@ -862,6 +1099,20 @@ static void asm_strref(ASMState *as, IRIns *ir)
862 else 1099 else
863 emit_dst(as, MIPSI_ADDU, dest, r, 1100 emit_dst(as, MIPSI_ADDU, dest, r,
864 ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); 1101 ra_allock(as, ofs, rset_exclude(RSET_GPR, r)));
1102#else
1103 RegSet allow = RSET_GPR;
1104 Reg dest = ra_dest(as, ir, allow);
1105 Reg base = ra_alloc1(as, ir->op1, allow);
1106 IRIns *irr = IR(ir->op2);
1107 int32_t ofs = sizeof(GCstr);
1108 rset_clear(allow, base);
1109 if (irref_isk(ir->op2) && checki16(ofs + irr->i)) {
1110 emit_tsi(as, MIPSI_DADDIU, dest, base, ofs + irr->i);
1111 } else {
1112 emit_tsi(as, MIPSI_DADDIU, dest, dest, ofs);
1113 emit_dst(as, MIPSI_DADDU, dest, base, ra_alloc1(as, ir->op2, allow));
1114 }
1115#endif
865} 1116}
866 1117
867/* -- Loads and stores ---------------------------------------------------- */ 1118/* -- Loads and stores ---------------------------------------------------- */
@@ -875,7 +1126,7 @@ static MIPSIns asm_fxloadins(IRIns *ir)
875 case IRT_U16: return MIPSI_LHU; 1126 case IRT_U16: return MIPSI_LHU;
876 case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_LDC1; 1127 case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_LDC1;
877 case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1; 1128 case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1;
878 default: return MIPSI_LW; 1129 default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW;
879 } 1130 }
880} 1131}
881 1132
@@ -886,7 +1137,7 @@ static MIPSIns asm_fxstoreins(IRIns *ir)
886 case IRT_I16: case IRT_U16: return MIPSI_SH; 1137 case IRT_I16: case IRT_U16: return MIPSI_SH;
887 case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_SDC1; 1138 case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_SDC1;
888 case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1; 1139 case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1;
889 default: return MIPSI_SW; 1140 default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW;
890 } 1141 }
891} 1142}
892 1143
@@ -898,13 +1149,13 @@ static void asm_fload(ASMState *as, IRIns *ir)
898 int32_t ofs; 1149 int32_t ofs;
899 if (ir->op1 == REF_NIL) { 1150 if (ir->op1 == REF_NIL) {
900 idx = RID_JGL; 1151 idx = RID_JGL;
901 ofs = (ir->op2 << 2) - 32768; 1152 ofs = (ir->op2 << 2) - 32768 - GG_OFS(g);
902 } else { 1153 } else {
903 idx = ra_alloc1(as, ir->op1, RSET_GPR); 1154 idx = ra_alloc1(as, ir->op1, RSET_GPR);
904 if (ir->op2 == IRFL_TAB_ARRAY) { 1155 if (ir->op2 == IRFL_TAB_ARRAY) {
905 ofs = asm_fuseabase(as, ir->op1); 1156 ofs = asm_fuseabase(as, ir->op1);
906 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ 1157 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
907 emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs); 1158 emit_tsi(as, MIPSI_AADDIU, dest, idx, ofs);
908 return; 1159 return;
909 } 1160 }
910 } 1161 }
@@ -949,36 +1200,59 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
949 1200
950static void asm_ahuvload(ASMState *as, IRIns *ir) 1201static void asm_ahuvload(ASMState *as, IRIns *ir)
951{ 1202{
952 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 1203 int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP);
953 IRType t = hiop ? IRT_NUM : irt_type(ir->t);
954 Reg dest = RID_NONE, type = RID_TMP, idx; 1204 Reg dest = RID_NONE, type = RID_TMP, idx;
955 RegSet allow = RSET_GPR; 1205 RegSet allow = RSET_GPR;
956 int32_t ofs = 0; 1206 int32_t ofs = 0;
957 if (hiop && ra_used(ir+1)) { 1207 IRType1 t = ir->t;
958 type = ra_dest(as, ir+1, allow); 1208 if (hiop) {
959 rset_clear(allow, type); 1209 t.irt = IRT_NUM;
1210 if (ra_used(ir+1)) {
1211 type = ra_dest(as, ir+1, allow);
1212 rset_clear(allow, type);
1213 }
960 } 1214 }
961 if (ra_used(ir)) { 1215 if (ra_used(ir)) {
962 lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || 1216 lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
963 irt_isint(ir->t) || irt_isaddr(ir->t)); 1217 irt_isint(ir->t) || irt_isaddr(ir->t));
964 dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); 1218 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
965 rset_clear(allow, dest); 1219 rset_clear(allow, dest);
1220#if LJ_64
1221 if (irt_isaddr(t))
1222 emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0);
1223 else if (irt_isint(t))
1224 emit_dta(as, MIPSI_SLL, dest, dest, 0);
1225#endif
966 } 1226 }
967 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); 1227 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
968 rset_clear(allow, idx); 1228 rset_clear(allow, idx);
969 if (t == IRT_NUM) { 1229 if (irt_isnum(t)) {
970 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); 1230 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
971 emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); 1231 emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM);
972 } else { 1232 } else {
973 asm_guard(as, MIPSI_BNE, type, ra_allock(as, irt_toitype_(t), allow)); 1233 asm_guard(as, MIPSI_BNE, type,
1234 ra_allock(as, (int32_t)irt_toitype(t), allow));
974 } 1235 }
1236#if LJ_32
975 if (ra_hasreg(dest)) { 1237 if (ra_hasreg(dest)) {
976 if (!LJ_SOFTFP && t == IRT_NUM) 1238 if (!LJ_SOFTFP && irt_isnum(t))
977 emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); 1239 emit_hsi(as, MIPSI_LDC1, dest, idx, ofs);
978 else 1240 else
979 emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0)); 1241 emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0));
980 } 1242 }
981 emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4)); 1243 emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4));
1244#else
1245 if (ra_hasreg(dest)) {
1246 if (!LJ_SOFTFP && irt_isnum(t)) {
1247 emit_hsi(as, MIPSI_LDC1, dest, idx, ofs);
1248 dest = type;
1249 }
1250 } else {
1251 dest = type;
1252 }
1253 emit_dta(as, MIPSI_DSRA32, type, dest, 15);
1254 emit_tsi(as, MIPSI_LD, dest, idx, ofs);
1255#endif
982} 1256}
983 1257
984static void asm_ahustore(ASMState *as, IRIns *ir) 1258static void asm_ahustore(ASMState *as, IRIns *ir)
@@ -990,103 +1264,159 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
990 return; 1264 return;
991 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 1265 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
992 src = ra_alloc1(as, ir->op2, RSET_FPR); 1266 src = ra_alloc1(as, ir->op2, RSET_FPR);
1267 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
1268 emit_hsi(as, MIPSI_SDC1, src, idx, ofs);
993 } else { 1269 } else {
994 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 1270#if LJ_32
995 if (!irt_ispri(ir->t)) { 1271 if (!irt_ispri(ir->t)) {
996 src = ra_alloc1(as, ir->op2, allow); 1272 src = ra_alloc1(as, ir->op2, allow);
997 rset_clear(allow, src); 1273 rset_clear(allow, src);
998 } 1274 }
999 if (hiop) 1275 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
1000 type = ra_alloc1(as, (ir+1)->op2, allow); 1276 type = ra_alloc1(as, (ir+1)->op2, allow);
1001 else 1277 else
1002 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 1278 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
1003 rset_clear(allow, type); 1279 rset_clear(allow, type);
1004 } 1280 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
1005 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
1006 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1007 emit_hsi(as, MIPSI_SDC1, src, idx, ofs);
1008 } else {
1009 if (ra_hasreg(src)) 1281 if (ra_hasreg(src))
1010 emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0)); 1282 emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0));
1011 emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4)); 1283 emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4));
1284#else
1285 Reg tmp = RID_TMP;
1286 if (irt_ispri(ir->t)) {
1287 tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
1288 rset_clear(allow, tmp);
1289 } else {
1290 src = ra_alloc1(as, ir->op2, allow);
1291 rset_clear(allow, src);
1292 type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
1293 rset_clear(allow, type);
1294 }
1295 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
1296 emit_tsi(as, MIPSI_SD, tmp, idx, ofs);
1297 if (ra_hasreg(src)) {
1298 if (irt_isinteger(ir->t)) {
1299 emit_dst(as, MIPSI_DADDU, tmp, tmp, type);
1300 emit_tsml(as, MIPSI_DEXT, tmp, src, 31, 0);
1301 } else {
1302 emit_dst(as, MIPSI_DADDU, tmp, src, type);
1303 }
1304 }
1305#endif
1012 } 1306 }
1013} 1307}
1014 1308
1015static void asm_sload(ASMState *as, IRIns *ir) 1309static void asm_sload(ASMState *as, IRIns *ir)
1016{ 1310{
1017 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
1018 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
1019 IRType t = hiop ? IRT_NUM : irt_type(ir->t);
1020 Reg dest = RID_NONE, type = RID_NONE, base; 1311 Reg dest = RID_NONE, type = RID_NONE, base;
1021 RegSet allow = RSET_GPR; 1312 RegSet allow = RSET_GPR;
1313 IRType1 t = ir->t;
1314#if LJ_32
1315 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
1316 int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP);
1317 if (hiop)
1318 t.irt = IRT_NUM;
1319#else
1320 int32_t ofs = 8*((int32_t)ir->op1-2);
1321#endif
1022 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 1322 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
1023 lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); 1323 lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
1024#if LJ_SOFTFP 1324#if LJ_32 && LJ_SOFTFP
1025 lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ 1325 lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */
1026 if (hiop && ra_used(ir+1)) { 1326 if (hiop && ra_used(ir+1)) {
1027 type = ra_dest(as, ir+1, allow); 1327 type = ra_dest(as, ir+1, allow);
1028 rset_clear(allow, type); 1328 rset_clear(allow, type);
1029 } 1329 }
1030#else 1330#else
1031 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(ir->t) && t == IRT_INT) { 1331 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
1032 dest = ra_scratch(as, RSET_FPR); 1332 dest = ra_scratch(as, RSET_FPR);
1033 asm_tointg(as, ir, dest); 1333 asm_tointg(as, ir, dest);
1034 t = IRT_NUM; /* Continue with a regular number type check. */ 1334 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1035 } else 1335 } else
1036#endif 1336#endif
1037 if (ra_used(ir)) { 1337 if (ra_used(ir)) {
1038 lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || 1338 lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
1039 irt_isint(ir->t) || irt_isaddr(ir->t)); 1339 irt_isint(ir->t) || irt_isaddr(ir->t));
1040 dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); 1340 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
1041 rset_clear(allow, dest); 1341 rset_clear(allow, dest);
1042 base = ra_alloc1(as, REF_BASE, allow); 1342 base = ra_alloc1(as, REF_BASE, allow);
1043 rset_clear(allow, base); 1343 rset_clear(allow, base);
1044 if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { 1344 if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) {
1045 if (t == IRT_INT) { 1345 if (irt_isint(t)) {
1046 Reg tmp = ra_scratch(as, RSET_FPR); 1346 Reg tmp = ra_scratch(as, RSET_FPR);
1047 emit_tg(as, MIPSI_MFC1, dest, tmp); 1347 emit_tg(as, MIPSI_MFC1, dest, tmp);
1048 emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp); 1348 emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
1049 dest = tmp; 1349 dest = tmp;
1050 t = IRT_NUM; /* Check for original type. */ 1350 t.irt = IRT_NUM; /* Check for original type. */
1051 } else { 1351 } else {
1052 Reg tmp = ra_scratch(as, RSET_GPR); 1352 Reg tmp = ra_scratch(as, RSET_GPR);
1053 emit_fg(as, MIPSI_CVT_D_W, dest, dest); 1353 emit_fg(as, MIPSI_CVT_D_W, dest, dest);
1054 emit_tg(as, MIPSI_MTC1, tmp, dest); 1354 emit_tg(as, MIPSI_MTC1, tmp, dest);
1055 dest = tmp; 1355 dest = tmp;
1056 t = IRT_INT; /* Check for original type. */ 1356 t.irt = IRT_INT; /* Check for original type. */
1057 } 1357 }
1058 } 1358 }
1359#if LJ_64
1360 else if (irt_isaddr(t)) {
1361 /* Clear type from pointers. */
1362 emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0);
1363 } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
1364 /* Sign-extend integers. */
1365 emit_dta(as, MIPSI_SLL, dest, dest, 0);
1366 }
1367#endif
1059 goto dotypecheck; 1368 goto dotypecheck;
1060 } 1369 }
1061 base = ra_alloc1(as, REF_BASE, allow); 1370 base = ra_alloc1(as, REF_BASE, allow);
1062 rset_clear(allow, base); 1371 rset_clear(allow, base);
1063dotypecheck: 1372dotypecheck:
1373#if LJ_32
1064 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1374 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1065 if (ra_noreg(type)) { 1375 if (ra_noreg(type))
1066 if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && 1376 type = RID_TMP;
1067 rset_test((as->freeset & allow), dest+1)) { 1377 if (irt_isnum(t)) {
1068 type = dest+1;
1069 ra_modified(as, type);
1070 } else {
1071 type = RID_TMP;
1072 }
1073 }
1074 if (t == IRT_NUM) {
1075 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); 1378 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
1076 emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); 1379 emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM);
1077 } else { 1380 } else {
1078 Reg ktype = ra_allock(as, irt_toitype_(t), allow); 1381 Reg ktype = ra_allock(as, irt_toitype(t), allow);
1079 asm_guard(as, MIPSI_BNE, type, ktype); 1382 asm_guard(as, MIPSI_BNE, type, ktype);
1080 } 1383 }
1081 } 1384 }
1082 if (ra_hasreg(dest)) { 1385 if (ra_hasreg(dest)) {
1083 if (!LJ_SOFTFP && t == IRT_NUM) 1386 if (!LJ_SOFTFP && irt_isnum(t))
1084 emit_hsi(as, MIPSI_LDC1, dest, base, ofs); 1387 emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
1085 else 1388 else
1086 emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0)); 1389 emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0));
1087 } 1390 }
1088 if (ra_hasreg(type)) 1391 if (ra_hasreg(type))
1089 emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4)); 1392 emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4));
1393#else
1394 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1395 type = dest < RID_MAX_GPR ? dest : RID_TMP;
1396 if (irt_ispri(t)) {
1397 asm_guard(as, MIPSI_BNE, type,
1398 ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow));
1399 } else {
1400 if (irt_isnum(t)) {
1401 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
1402 emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM);
1403 if (ra_hasreg(dest))
1404 emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
1405 } else {
1406 asm_guard(as, MIPSI_BNE, RID_TMP,
1407 ra_allock(as, (int32_t)irt_toitype(t), allow));
1408 }
1409 emit_dta(as, MIPSI_DSRA32, RID_TMP, type, 15);
1410 }
1411 emit_tsi(as, MIPSI_LD, type, base, ofs);
1412 } else if (ra_hasreg(dest)) {
1413 if (irt_isnum(t))
1414 emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
1415 else
1416 emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base,
1417 ofs ^ ((LJ_BE && irt_isint(t)) ? 4 : 0));
1418 }
1419#endif
1090} 1420}
1091 1421
1092/* -- Allocations --------------------------------------------------------- */ 1422/* -- Allocations --------------------------------------------------------- */
@@ -1113,8 +1443,8 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1113 /* Initialize immutable cdata object. */ 1443 /* Initialize immutable cdata object. */
1114 if (ir->o == IR_CNEWI) { 1444 if (ir->o == IR_CNEWI) {
1115 RegSet allow = (RSET_GPR & ~RSET_SCRATCH); 1445 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1446#if LJ_32
1116 int32_t ofs = sizeof(GCcdata); 1447 int32_t ofs = sizeof(GCcdata);
1117 lua_assert(sz == 4 || sz == 8);
1118 if (sz == 8) { 1448 if (sz == 8) {
1119 ofs += 4; 1449 ofs += 4;
1120 lua_assert((ir+1)->o == IR_HIOP); 1450 lua_assert((ir+1)->o == IR_HIOP);
@@ -1127,6 +1457,11 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1127 if (ofs == sizeof(GCcdata)) break; 1457 if (ofs == sizeof(GCcdata)) break;
1128 ofs -= 4; if (LJ_BE) ir++; else ir--; 1458 ofs -= 4; if (LJ_BE) ir++; else ir--;
1129 } 1459 }
1460#else
1461 emit_tsi(as, MIPSI_SD, ra_alloc1(as, ir->op2, allow),
1462 RID_RET, sizeof(GCcdata));
1463#endif
1464 lua_assert(sz == 4 || sz == 8);
1130 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ 1465 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1131 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; 1466 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1132 args[0] = ASMREF_L; /* lua_State *L */ 1467 args[0] = ASMREF_L; /* lua_State *L */
@@ -1161,7 +1496,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
1161 Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); 1496 Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1162 Reg link = RID_TMP; 1497 Reg link = RID_TMP;
1163 MCLabel l_end = emit_label(as); 1498 MCLabel l_end = emit_label(as);
1164 emit_tsi(as, MIPSI_SW, link, tab, (int32_t)offsetof(GCtab, gclist)); 1499 emit_tsi(as, MIPSI_AS, link, tab, (int32_t)offsetof(GCtab, gclist));
1165 emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); 1500 emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked));
1166 emit_setgl(as, tab, gc.grayagain); 1501 emit_setgl(as, tab, gc.grayagain);
1167 emit_getgl(as, link, gc.grayagain); 1502 emit_getgl(as, link, gc.grayagain);
@@ -1184,7 +1519,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
1184 args[0] = ASMREF_TMP1; /* global_State *g */ 1519 args[0] = ASMREF_TMP1; /* global_State *g */
1185 args[1] = ir->op1; /* TValue *tv */ 1520 args[1] = ir->op1; /* TValue *tv */
1186 asm_gencall(as, ci, args); 1521 asm_gencall(as, ci, args);
1187 emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); 1522 emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
1188 obj = IR(ir->op1)->r; 1523 obj = IR(ir->op1)->r;
1189 tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); 1524 tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
1190 emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); 1525 emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end);
@@ -1230,8 +1565,9 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1230 1565
1231static void asm_add(ASMState *as, IRIns *ir) 1566static void asm_add(ASMState *as, IRIns *ir)
1232{ 1567{
1568 IRType1 t = ir->t;
1233#if !LJ_SOFTFP 1569#if !LJ_SOFTFP
1234 if (irt_isnum(ir->t)) { 1570 if (irt_isnum(t)) {
1235 asm_fparith(as, ir, MIPSI_ADD_D); 1571 asm_fparith(as, ir, MIPSI_ADD_D);
1236 } else 1572 } else
1237#endif 1573#endif
@@ -1239,14 +1575,16 @@ static void asm_add(ASMState *as, IRIns *ir)
1239 Reg dest = ra_dest(as, ir, RSET_GPR); 1575 Reg dest = ra_dest(as, ir, RSET_GPR);
1240 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1576 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1241 if (irref_isk(ir->op2)) { 1577 if (irref_isk(ir->op2)) {
1242 int32_t k = IR(ir->op2)->i; 1578 intptr_t k = get_kval(IR(ir->op2));
1243 if (checki16(k)) { 1579 if (checki16(k)) {
1244 emit_tsi(as, MIPSI_ADDIU, dest, left, k); 1580 emit_tsi(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDIU : MIPSI_ADDIU, dest,
1581 left, k);
1245 return; 1582 return;
1246 } 1583 }
1247 } 1584 }
1248 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1585 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1249 emit_dst(as, MIPSI_ADDU, dest, left, right); 1586 emit_dst(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDU : MIPSI_ADDU, dest,
1587 left, right);
1250 } 1588 }
1251} 1589}
1252 1590
@@ -1261,7 +1599,8 @@ static void asm_sub(ASMState *as, IRIns *ir)
1261 Reg dest = ra_dest(as, ir, RSET_GPR); 1599 Reg dest = ra_dest(as, ir, RSET_GPR);
1262 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 1600 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1263 right = (left >> 8); left &= 255; 1601 right = (left >> 8); left &= 255;
1264 emit_dst(as, MIPSI_SUBU, dest, left, right); 1602 emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest,
1603 left, right);
1265 } 1604 }
1266} 1605}
1267 1606
@@ -1276,13 +1615,49 @@ static void asm_mul(ASMState *as, IRIns *ir)
1276 Reg dest = ra_dest(as, ir, RSET_GPR); 1615 Reg dest = ra_dest(as, ir, RSET_GPR);
1277 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 1616 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1278 right = (left >> 8); left &= 255; 1617 right = (left >> 8); left &= 255;
1279 emit_dst(as, MIPSI_MUL, dest, left, right); 1618 if (LJ_64 && irt_is64(ir->t)) {
1619 emit_dst(as, MIPSI_MFLO, dest, 0, 0);
1620 emit_dst(as, MIPSI_DMULT, 0, left, right);
1621 } else {
1622 emit_dst(as, MIPSI_MUL, dest, left, right);
1623 }
1280 } 1624 }
1281} 1625}
1282 1626
1283#define asm_div(as, ir) asm_fparith(as, ir, MIPSI_DIV_D) 1627static void asm_mod(ASMState *as, IRIns *ir)
1284#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) 1628{
1285#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) 1629#if LJ_64 && LJ_HASFFI
1630 if (!irt_isint(ir->t))
1631 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1632 IRCALL_lj_carith_modu64);
1633 else
1634#endif
1635 asm_callid(as, ir, IRCALL_lj_vm_modi);
1636}
1637
1638#if !LJ_SOFTFP
1639static void asm_pow(ASMState *as, IRIns *ir)
1640{
1641#if LJ_64 && LJ_HASFFI
1642 if (!irt_isnum(ir->t))
1643 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1644 IRCALL_lj_carith_powu64);
1645 else
1646#endif
1647 asm_callid(as, ir, IRCALL_lj_vm_powi);
1648}
1649
1650static void asm_div(ASMState *as, IRIns *ir)
1651{
1652#if LJ_64 && LJ_HASFFI
1653 if (!irt_isnum(ir->t))
1654 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1655 IRCALL_lj_carith_divu64);
1656 else
1657#endif
1658 asm_fparith(as, ir, MIPSI_DIV_D);
1659}
1660#endif
1286 1661
1287static void asm_neg(ASMState *as, IRIns *ir) 1662static void asm_neg(ASMState *as, IRIns *ir)
1288{ 1663{
@@ -1294,7 +1669,8 @@ static void asm_neg(ASMState *as, IRIns *ir)
1294 { 1669 {
1295 Reg dest = ra_dest(as, ir, RSET_GPR); 1670 Reg dest = ra_dest(as, ir, RSET_GPR);
1296 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1671 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1297 emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left); 1672 emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest,
1673 RID_ZERO, left);
1298 } 1674 }
1299} 1675}
1300 1676
@@ -1305,6 +1681,7 @@ static void asm_neg(ASMState *as, IRIns *ir)
1305static void asm_arithov(ASMState *as, IRIns *ir) 1681static void asm_arithov(ASMState *as, IRIns *ir)
1306{ 1682{
1307 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); 1683 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
1684 lua_assert(!irt_is64(ir->t));
1308 if (irref_isk(ir->op2)) { 1685 if (irref_isk(ir->op2)) {
1309 int k = IR(ir->op2)->i; 1686 int k = IR(ir->op2)->i;
1310 if (ir->o == IR_SUBOV) k = -k; 1687 if (ir->o == IR_SUBOV) k = -k;
@@ -1352,7 +1729,7 @@ static void asm_mulov(ASMState *as, IRIns *ir)
1352 emit_dst(as, MIPSI_MULT, 0, left, right); 1729 emit_dst(as, MIPSI_MULT, 0, left, right);
1353} 1730}
1354 1731
1355#if LJ_HASFFI 1732#if LJ_32 && LJ_HASFFI
1356static void asm_add64(ASMState *as, IRIns *ir) 1733static void asm_add64(ASMState *as, IRIns *ir)
1357{ 1734{
1358 Reg dest = ra_dest(as, ir, RSET_GPR); 1735 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1454,6 +1831,7 @@ static void asm_bswap(ASMState *as, IRIns *ir)
1454{ 1831{
1455 Reg dest = ra_dest(as, ir, RSET_GPR); 1832 Reg dest = ra_dest(as, ir, RSET_GPR);
1456 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1833 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1834#if LJ_32
1457 if ((as->flags & JIT_F_MIPSXXR2)) { 1835 if ((as->flags & JIT_F_MIPSXXR2)) {
1458 emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); 1836 emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16);
1459 emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); 1837 emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left);
@@ -1469,6 +1847,15 @@ static void asm_bswap(ASMState *as, IRIns *ir)
1469 emit_dta(as, MIPSI_SRL, tmp, left, 24); 1847 emit_dta(as, MIPSI_SRL, tmp, left, 24);
1470 emit_dta(as, MIPSI_SLL, RID_TMP, left, 24); 1848 emit_dta(as, MIPSI_SLL, RID_TMP, left, 24);
1471 } 1849 }
1850#else
1851 if (irt_is64(ir->t)) {
1852 emit_dst(as, MIPSI_DSHD, dest, 0, RID_TMP);
1853 emit_dst(as, MIPSI_DSBH, RID_TMP, 0, left);
1854 } else {
1855 emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16);
1856 emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left);
1857 }
1858#endif
1472} 1859}
1473 1860
1474static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) 1861static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
@@ -1476,7 +1863,7 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1476 Reg dest = ra_dest(as, ir, RSET_GPR); 1863 Reg dest = ra_dest(as, ir, RSET_GPR);
1477 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1864 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1478 if (irref_isk(ir->op2)) { 1865 if (irref_isk(ir->op2)) {
1479 int32_t k = IR(ir->op2)->i; 1866 intptr_t k = get_kval(IR(ir->op2));
1480 if (checku16(k)) { 1867 if (checku16(k)) {
1481 emit_tsi(as, mik, dest, left, k); 1868 emit_tsi(as, mik, dest, left, k);
1482 return; 1869 return;
@@ -1494,11 +1881,14 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1494{ 1881{
1495 Reg dest = ra_dest(as, ir, RSET_GPR); 1882 Reg dest = ra_dest(as, ir, RSET_GPR);
1496 if (irref_isk(ir->op2)) { /* Constant shifts. */ 1883 if (irref_isk(ir->op2)) { /* Constant shifts. */
1497 uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31); 1884 uint32_t shift = (uint32_t)IR(ir->op2)->i;
1498 emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), shift); 1885 if (LJ_64 && irt_is64(ir->t)) mik |= (shift & 32) ? MIPSI_D32 : MIPSI_D;
1886 emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR),
1887 (shift & 31));
1499 } else { 1888 } else {
1500 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 1889 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1501 right = (left >> 8); left &= 255; 1890 right = (left >> 8); left &= 255;
1891 if (LJ_64 && irt_is64(ir->t)) mi |= MIPSI_DV;
1502 emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */ 1892 emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */
1503 } 1893 }
1504} 1894}
@@ -1510,7 +1900,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1510 1900
1511static void asm_bror(ASMState *as, IRIns *ir) 1901static void asm_bror(ASMState *as, IRIns *ir)
1512{ 1902{
1513 if ((as->flags & JIT_F_MIPSXXR2)) { 1903 if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) {
1514 asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); 1904 asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
1515 } else { 1905 } else {
1516 Reg dest = ra_dest(as, ir, RSET_GPR); 1906 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1529,7 +1919,7 @@ static void asm_bror(ASMState *as, IRIns *ir)
1529 } 1919 }
1530} 1920}
1531 1921
1532#if LJ_SOFTFP 1922#if LJ_32 && LJ_SOFTFP
1533static void asm_sfpmin_max(ASMState *as, IRIns *ir) 1923static void asm_sfpmin_max(ASMState *as, IRIns *ir)
1534{ 1924{
1535 CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax]; 1925 CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax];
@@ -1578,7 +1968,7 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1578 1968
1579/* -- Comparisons --------------------------------------------------------- */ 1969/* -- Comparisons --------------------------------------------------------- */
1580 1970
1581#if LJ_SOFTFP 1971#if LJ_32 && LJ_SOFTFP
1582/* SFP comparisons. */ 1972/* SFP comparisons. */
1583static void asm_sfpcomp(ASMState *as, IRIns *ir) 1973static void asm_sfpcomp(ASMState *as, IRIns *ir)
1584{ 1974{
@@ -1651,13 +2041,13 @@ static void asm_comp(ASMState *as, IRIns *ir)
1651 } else { 2041 } else {
1652 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); 2042 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
1653 if (op == IR_ABC) op = IR_UGT; 2043 if (op == IR_ABC) op = IR_UGT;
1654 if ((op&4) == 0 && irref_isk(ir->op2) && IR(ir->op2)->i == 0) { 2044 if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(IR(ir->op2)) == 0) {
1655 MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : 2045 MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) :
1656 ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); 2046 ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ);
1657 asm_guard(as, mi, left, 0); 2047 asm_guard(as, mi, left, 0);
1658 } else { 2048 } else {
1659 if (irref_isk(ir->op2)) { 2049 if (irref_isk(ir->op2)) {
1660 int32_t k = IR(ir->op2)->i; 2050 intptr_t k = get_kval(IR(ir->op2));
1661 if ((op&2)) k++; 2051 if ((op&2)) k++;
1662 if (checki16(k)) { 2052 if (checki16(k)) {
1663 asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); 2053 asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
@@ -1676,7 +2066,8 @@ static void asm_comp(ASMState *as, IRIns *ir)
1676 2066
1677static void asm_equal(ASMState *as, IRIns *ir) 2067static void asm_equal(ASMState *as, IRIns *ir)
1678{ 2068{
1679 Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR); 2069 Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ?
2070 RSET_FPR : RSET_GPR);
1680 right = (left >> 8); left &= 255; 2071 right = (left >> 8); left &= 255;
1681 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 2072 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1682 asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); 2073 asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
@@ -1686,7 +2077,7 @@ static void asm_equal(ASMState *as, IRIns *ir)
1686 } 2077 }
1687} 2078}
1688 2079
1689#if LJ_HASFFI 2080#if LJ_32 && LJ_HASFFI
1690/* 64 bit integer comparisons. */ 2081/* 64 bit integer comparisons. */
1691static void asm_comp64(ASMState *as, IRIns *ir) 2082static void asm_comp64(ASMState *as, IRIns *ir)
1692{ 2083{
@@ -1728,7 +2119,7 @@ static void asm_comp64eq(ASMState *as, IRIns *ir)
1728/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ 2119/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
1729static void asm_hiop(ASMState *as, IRIns *ir) 2120static void asm_hiop(ASMState *as, IRIns *ir)
1730{ 2121{
1731#if LJ_HASFFI || LJ_SOFTFP 2122#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP)
1732 /* HIOP is marked as a store because it needs its own DCE logic. */ 2123 /* HIOP is marked as a store because it needs its own DCE logic. */
1733 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 2124 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
1734 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 2125 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
@@ -1832,36 +2223,42 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1832 Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; 2223 Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE;
1833 ExitNo oldsnap = as->snapno; 2224 ExitNo oldsnap = as->snapno;
1834 rset_clear(allow, pbase); 2225 rset_clear(allow, pbase);
2226#if LJ_32
1835 tmp = allow ? rset_pickbot(allow) : 2227 tmp = allow ? rset_pickbot(allow) :
1836 (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); 2228 (pbase == RID_RETHI ? RID_RETLO : RID_RETHI);
2229#else
2230 tmp = allow ? rset_pickbot(allow) : RID_RET;
2231#endif
1837 as->snapno = exitno; 2232 as->snapno = exitno;
1838 asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); 2233 asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
1839 as->snapno = oldsnap; 2234 as->snapno = oldsnap;
1840 if (allow == RSET_EMPTY) /* Restore temp. register. */ 2235 if (allow == RSET_EMPTY) /* Restore temp. register. */
1841 emit_tsi(as, MIPSI_LW, tmp, RID_SP, 0); 2236 emit_tsi(as, MIPSI_AL, tmp, RID_SP, 0);
1842 else 2237 else
1843 ra_modified(as, tmp); 2238 ra_modified(as, tmp);
1844 emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); 2239 emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot));
1845 emit_dst(as, MIPSI_SUBU, RID_TMP, tmp, pbase); 2240 emit_dst(as, MIPSI_ASUBU, RID_TMP, tmp, pbase);
1846 emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack)); 2241 emit_tsi(as, MIPSI_AL, tmp, tmp, offsetof(lua_State, maxstack));
1847 if (pbase == RID_TMP) 2242 if (pbase == RID_TMP)
1848 emit_getgl(as, RID_TMP, jit_base); 2243 emit_getgl(as, RID_TMP, jit_base);
1849 emit_getgl(as, tmp, cur_L); 2244 emit_getgl(as, tmp, cur_L);
1850 if (allow == RSET_EMPTY) /* Spill temp. register. */ 2245 if (allow == RSET_EMPTY) /* Spill temp. register. */
1851 emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0); 2246 emit_tsi(as, MIPSI_AS, tmp, RID_SP, 0);
1852} 2247}
1853 2248
1854/* Restore Lua stack from on-trace state. */ 2249/* Restore Lua stack from on-trace state. */
1855static void asm_stack_restore(ASMState *as, SnapShot *snap) 2250static void asm_stack_restore(ASMState *as, SnapShot *snap)
1856{ 2251{
1857 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 2252 SnapEntry *map = &as->T->snapmap[snap->mapofs];
1858 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; 2253#if LJ_32 || defined(LUA_USE_ASSERT)
2254 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
2255#endif
1859 MSize n, nent = snap->nent; 2256 MSize n, nent = snap->nent;
1860 /* Store the value of all modified slots to the Lua stack. */ 2257 /* Store the value of all modified slots to the Lua stack. */
1861 for (n = 0; n < nent; n++) { 2258 for (n = 0; n < nent; n++) {
1862 SnapEntry sn = map[n]; 2259 SnapEntry sn = map[n];
1863 BCReg s = snap_slot(sn); 2260 BCReg s = snap_slot(sn);
1864 int32_t ofs = 8*((int32_t)s-1); 2261 int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
1865 IRRef ref = snap_ref(sn); 2262 IRRef ref = snap_ref(sn);
1866 IRIns *ir = IR(ref); 2263 IRIns *ir = IR(ref);
1867 if ((sn & SNAP_NORESTORE)) 2264 if ((sn & SNAP_NORESTORE))
@@ -1881,8 +2278,9 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1881 emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); 2278 emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs);
1882#endif 2279#endif
1883 } else { 2280 } else {
1884 Reg type; 2281#if LJ_32
1885 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); 2282 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
2283 Reg type;
1886 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); 2284 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
1887 if (!irt_ispri(ir->t)) { 2285 if (!irt_ispri(ir->t)) {
1888 Reg src = ra_alloc1(as, ref, allow); 2286 Reg src = ra_alloc1(as, ref, allow);
@@ -1900,6 +2298,9 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1900 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 2298 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
1901 } 2299 }
1902 emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4)); 2300 emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4));
2301#else
2302 asm_tvstore64(as, RID_BASE, ofs, ref);
2303#endif
1903 } 2304 }
1904 checkmclim(as); 2305 checkmclim(as);
1905 } 2306 }
@@ -1923,7 +2324,7 @@ static void asm_gc_check(ASMState *as)
1923 args[0] = ASMREF_TMP1; /* global_State *g */ 2324 args[0] = ASMREF_TMP1; /* global_State *g */
1924 args[1] = ASMREF_TMP2; /* MSize steps */ 2325 args[1] = ASMREF_TMP2; /* MSize steps */
1925 asm_gencall(as, ci, args); 2326 asm_gencall(as, ci, args);
1926 emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); 2327 emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
1927 tmp = ra_releasetmp(as, ASMREF_TMP2); 2328 tmp = ra_releasetmp(as, ASMREF_TMP2);
1928 emit_loadi(as, tmp, as->gcsteps); 2329 emit_loadi(as, tmp, as->gcsteps);
1929 /* Jump around GC step if GC total < GC threshold. */ 2330 /* Jump around GC step if GC total < GC threshold. */
@@ -1998,7 +2399,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
1998 MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; 2399 MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp;
1999 int32_t spadj = as->T->spadjust; 2400 int32_t spadj = as->T->spadjust;
2000 MCode *p = as->mctop-1; 2401 MCode *p = as->mctop-1;
2001 *p = spadj ? (MIPSI_ADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; 2402 *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP;
2002 p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); 2403 p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
2003} 2404}
2004 2405
@@ -2016,9 +2417,14 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2016{ 2417{
2017 IRRef args[CCI_NARGS_MAX*2]; 2418 IRRef args[CCI_NARGS_MAX*2];
2018 uint32_t i, nargs = CCI_XNARGS(ci); 2419 uint32_t i, nargs = CCI_XNARGS(ci);
2420#if LJ_32
2019 int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; 2421 int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
2422#else
2423 int nslots = 0, ngpr = REGARG_NUMGPR;
2424#endif
2020 asm_collectargs(as, ir, ci, args); 2425 asm_collectargs(as, ir, ci, args);
2021 for (i = 0; i < nargs; i++) { 2426 for (i = 0; i < nargs; i++) {
2427#if LJ_32
2022 if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t) && 2428 if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t) &&
2023 nfpr > 0 && !(ci->flags & CCI_VARARG)) { 2429 nfpr > 0 && !(ci->flags & CCI_VARARG)) {
2024 nfpr--; 2430 nfpr--;
@@ -2031,6 +2437,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2031 nfpr = 0; 2437 nfpr = 0;
2032 if (ngpr > 0) ngpr--; else nslots++; 2438 if (ngpr > 0) ngpr--; else nslots++;
2033 } 2439 }
2440#else
2441 if (ngpr > 0) ngpr--; else nslots += 2;
2442#endif
2034 } 2443 }
2035 if (nslots > as->evenspill) /* Leave room for args in stack slots. */ 2444 if (nslots > as->evenspill) /* Leave room for args in stack slots. */
2036 as->evenspill = nslots; 2445 as->evenspill = nslots;
diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h
index 20ecb7a2..8a9ee24d 100644
--- a/src/lj_emit_mips.h
+++ b/src/lj_emit_mips.h
@@ -3,6 +3,28 @@
3** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h 3** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
4*/ 4*/
5 5
6#if LJ_64
7static intptr_t get_k64val(IRIns *ir)
8{
9 if (ir->o == IR_KINT64) {
10 return (intptr_t)ir_kint64(ir)->u64;
11 } else if (ir->o == IR_KGC) {
12 return (intptr_t)ir_kgc(ir);
13 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
14 return (intptr_t)ir_kptr(ir);
15 } else {
16 lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL);
17 return ir->i; /* Sign-extended. */
18 }
19}
20#endif
21
22#if LJ_64
23#define get_kval(ir) get_k64val(ir)
24#else
25#define get_kval(ir) ((ir)->i)
26#endif
27
6/* -- Emit basic instructions --------------------------------------------- */ 28/* -- Emit basic instructions --------------------------------------------- */
7 29
8static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt) 30static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt)
@@ -35,7 +57,7 @@ static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh)
35 57
36static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) 58static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift)
37{ 59{
38 if ((as->flags & JIT_F_MIPSXXR2)) { 60 if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) {
39 emit_dta(as, MIPSI_ROTR, dest, src, shift); 61 emit_dta(as, MIPSI_ROTR, dest, src, shift);
40 } else { 62 } else {
41 emit_dst(as, MIPSI_OR, dest, dest, tmp); 63 emit_dst(as, MIPSI_OR, dest, dest, tmp);
@@ -44,13 +66,21 @@ static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift)
44 } 66 }
45} 67}
46 68
69#if LJ_64
70static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb,
71 uint32_t lsb)
72{
73 *--as->mcp = mi | MIPSF_T(rt) | MIPSF_S(rs) | MIPSF_M(msb) | MIPSF_L(lsb);
74}
75#endif
76
47/* -- Emit loads/stores --------------------------------------------------- */ 77/* -- Emit loads/stores --------------------------------------------------- */
48 78
49/* Prefer rematerialization of BASE/L from global_State over spills. */ 79/* Prefer rematerialization of BASE/L from global_State over spills. */
50#define emit_canremat(ref) ((ref) <= REF_BASE) 80#define emit_canremat(ref) ((ref) <= REF_BASE)
51 81
52/* Try to find a one step delta relative to another constant. */ 82/* Try to find a one step delta relative to another constant. */
53static int emit_kdelta1(ASMState *as, Reg t, int32_t i) 83static int emit_kdelta1(ASMState *as, Reg t, intptr_t i)
54{ 84{
55 RegSet work = ~as->freeset & RSET_GPR; 85 RegSet work = ~as->freeset & RSET_GPR;
56 while (work) { 86 while (work) {
@@ -58,9 +88,10 @@ static int emit_kdelta1(ASMState *as, Reg t, int32_t i)
58 IRRef ref = regcost_ref(as->cost[r]); 88 IRRef ref = regcost_ref(as->cost[r]);
59 lua_assert(r != t); 89 lua_assert(r != t);
60 if (ref < ASMREF_L) { 90 if (ref < ASMREF_L) {
61 int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); 91 intptr_t delta = (intptr_t)((uintptr_t)i -
92 (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(IR(ref))));
62 if (checki16(delta)) { 93 if (checki16(delta)) {
63 emit_tsi(as, MIPSI_ADDIU, t, r, delta); 94 emit_tsi(as, MIPSI_AADDIU, t, r, delta);
64 return 1; 95 return 1;
65 } 96 }
66 } 97 }
@@ -76,8 +107,8 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
76 emit_ti(as, MIPSI_LI, r, i); 107 emit_ti(as, MIPSI_LI, r, i);
77 } else { 108 } else {
78 if ((i & 0xffff)) { 109 if ((i & 0xffff)) {
79 int32_t jgl = i32ptr(J2G(as->J)); 110 intptr_t jgl = (intptr_t)(void *)J2G(as->J);
80 if ((uint32_t)(i-jgl) < 65536) { 111 if ((uintptr_t)(i-jgl) < 65536) {
81 emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768); 112 emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768);
82 return; 113 return;
83 } else if (emit_kdelta1(as, r, i)) { 114 } else if (emit_kdelta1(as, r, i)) {
@@ -92,7 +123,39 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
92 } 123 }
93} 124}
94 125
126#if LJ_64
127/* Load a 64 bit constant into a GPR. */
128static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
129{
130 if (checki32((int64_t)u64)) {
131 emit_loadi(as, r, (int32_t)u64);
132 } else {
133 uint64_t delta = u64 - (uint64_t)(void *)J2G(as->J);
134 if (delta < 65536) {
135 emit_tsi(as, MIPSI_DADDIU, r, RID_JGL, (int32_t)(delta-32768));
136 } else if (emit_kdelta1(as, r, (intptr_t)u64)) {
137 return;
138 } else {
139 if ((u64 & 0xffff)) {
140 emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff);
141 }
142 if (((u64 >> 16) & 0xffff)) {
143 emit_dta(as, MIPSI_DSLL, r, r, 16);
144 emit_tsi(as, MIPSI_ORI, r, r, (u64 >> 16) & 0xffff);
145 emit_dta(as, MIPSI_DSLL, r, r, 16);
146 } else {
147 emit_dta(as, MIPSI_DSLL32, r, r, 0);
148 }
149 emit_loadi(as, r, (int32_t)(u64 >> 32));
150 }
151 /* TODO: There are probably more optimization opportunities. */
152 }
153}
154
155#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr)))
156#else
95#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) 157#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr)))
158#endif
96 159
97static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); 160static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
98static void ra_allockreg(ASMState *as, intptr_t k, Reg r); 161static void ra_allockreg(ASMState *as, intptr_t k, Reg r);
@@ -100,8 +163,8 @@ static void ra_allockreg(ASMState *as, intptr_t k, Reg r);
100/* Get/set from constant pointer. */ 163/* Get/set from constant pointer. */
101static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) 164static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow)
102{ 165{
103 int32_t jgl = i32ptr(J2G(as->J)); 166 intptr_t jgl = (intptr_t)(J2G(as->J));
104 int32_t i = i32ptr(p); 167 intptr_t i = (intptr_t)(p);
105 Reg base; 168 Reg base;
106 if ((uint32_t)(i-jgl) < 65536) { 169 if ((uint32_t)(i-jgl) < 65536) {
107 i = i-jgl-32768; 170 i = i-jgl-32768;
@@ -112,8 +175,24 @@ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow)
112 emit_tsi(as, mi, r, base, i); 175 emit_tsi(as, mi, r, base, i);
113} 176}
114 177
178#if LJ_64
179static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
180{
181 const uint64_t *k = &ir_k64(ir)->u64;
182 Reg r64 = r;
183 if (rset_test(RSET_FPR, r)) {
184 r64 = RID_TMP;
185 emit_tg(as, MIPSI_DMTC1, r64, r);
186 }
187 if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536)
188 emit_lsptr(as, MIPSI_LD, r64, (void *)k, 0);
189 else
190 emit_loadu64(as, r64, *k);
191}
192#else
115#define emit_loadk64(as, r, ir) \ 193#define emit_loadk64(as, r, ir) \
116 emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) 194 emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR)
195#endif
117 196
118/* Get/set global_State fields. */ 197/* Get/set global_State fields. */
119static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) 198static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs)
@@ -122,9 +201,9 @@ static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs)
122} 201}
123 202
124#define emit_getgl(as, r, field) \ 203#define emit_getgl(as, r, field) \
125 emit_lsglptr(as, MIPSI_LW, (r), (int32_t)offsetof(global_State, field)) 204 emit_lsglptr(as, MIPSI_AL, (r), (int32_t)offsetof(global_State, field))
126#define emit_setgl(as, r, field) \ 205#define emit_setgl(as, r, field) \
127 emit_lsglptr(as, MIPSI_SW, (r), (int32_t)offsetof(global_State, field)) 206 emit_lsglptr(as, MIPSI_AS, (r), (int32_t)offsetof(global_State, field))
128 207
129/* Trace number is determined from per-trace exit stubs. */ 208/* Trace number is determined from per-trace exit stubs. */
130#define emit_setvmstate(as, i) UNUSED(i) 209#define emit_setvmstate(as, i) UNUSED(i)
@@ -164,7 +243,7 @@ static void emit_call(ASMState *as, void *target, int needcfa)
164 needcfa = 1; 243 needcfa = 1;
165 } 244 }
166 as->mcp = p; 245 as->mcp = p;
167 if (needcfa) ra_allockreg(as, i32ptr(target), RID_CFUNCADDR); 246 if (needcfa) ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR);
168} 247}
169 248
170/* -- Emit generic operations --------------------------------------------- */ 249/* -- Emit generic operations --------------------------------------------- */
@@ -185,7 +264,7 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
185static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) 264static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
186{ 265{
187 if (r < RID_MAX_GPR) 266 if (r < RID_MAX_GPR)
188 emit_tsi(as, MIPSI_LW, r, base, ofs); 267 emit_tsi(as, irt_is64(ir->t) ? MIPSI_LD : MIPSI_LW, r, base, ofs);
189 else 268 else
190 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1, 269 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1,
191 (r & 31), base, ofs); 270 (r & 31), base, ofs);
@@ -195,7 +274,7 @@ static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
195static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) 274static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
196{ 275{
197 if (r < RID_MAX_GPR) 276 if (r < RID_MAX_GPR)
198 emit_tsi(as, MIPSI_SW, r, base, ofs); 277 emit_tsi(as, irt_is64(ir->t) ? MIPSI_SD : MIPSI_SW, r, base, ofs);
199 else 278 else
200 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1, 279 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1,
201 (r&31), base, ofs); 280 (r&31), base, ofs);
@@ -206,7 +285,7 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
206{ 285{
207 if (ofs) { 286 if (ofs) {
208 lua_assert(checki16(ofs)); 287 lua_assert(checki16(ofs));
209 emit_tsi(as, MIPSI_ADDIU, r, r, ofs); 288 emit_tsi(as, MIPSI_AADDIU, r, r, ofs);
210 } 289 }
211} 290}
212 291
diff --git a/src/lj_jit.h b/src/lj_jit.h
index ddcb576c..92054e3d 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -337,6 +337,10 @@ enum {
337#endif 337#endif
338#if LJ_TARGET_MIPS 338#if LJ_TARGET_MIPS
339 LJ_K64_2P31, /* 2^31 */ 339 LJ_K64_2P31, /* 2^31 */
340#if LJ_64
341 LJ_K64_2P63, /* 2^63 */
342 LJ_K64_M2P64, /* -2^64 */
343#endif
340#endif 344#endif
341 LJ_K64__MAX, 345 LJ_K64__MAX,
342}; 346};
@@ -352,6 +356,10 @@ enum {
352#if LJ_TARGET_PPC || LJ_TARGET_MIPS 356#if LJ_TARGET_PPC || LJ_TARGET_MIPS
353 LJ_K32_2P31, /* 2^31 */ 357 LJ_K32_2P31, /* 2^31 */
354#endif 358#endif
359#if LJ_TARGET_MIPS64
360 LJ_K32_2P63, /* 2^63 */
361 LJ_K32_M2P64, /* -2^64 */
362#endif
355 LJ_K32__MAX 363 LJ_K32__MAX
356}; 364};
357 365
diff --git a/src/lj_mcode.c b/src/lj_mcode.c
index a33a4c5d..0f29a3ce 100644
--- a/src/lj_mcode.c
+++ b/src/lj_mcode.c
@@ -206,7 +206,7 @@ static void mcode_protect(jit_State *J, int prot)
206 206
207#if LJ_TARGET_X64 207#if LJ_TARGET_X64
208#define mcode_validptr(p) ((p) && (uintptr_t)(p) < (uintptr_t)1<<47) 208#define mcode_validptr(p) ((p) && (uintptr_t)(p) < (uintptr_t)1<<47)
209#elif LJ_TARGET_ARM64 209#elif LJ_TARGET_ARM64 || LJ_TARGET_MIPS64
210/* We have no clue about the valid VA range. It could be 39 - 52 bits. */ 210/* We have no clue about the valid VA range. It could be 39 - 52 bits. */
211#define mcode_validptr(p) (p) 211#define mcode_validptr(p) (p)
212#else 212#else
@@ -224,8 +224,8 @@ static void *mcode_alloc(jit_State *J, size_t sz)
224 */ 224 */
225#if LJ_TARGET_MIPS 225#if LJ_TARGET_MIPS
226 /* Use the middle of the 256MB-aligned region. */ 226 /* Use the middle of the 256MB-aligned region. */
227 uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & 0xf0000000u) + 227 uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler &
228 0x08000000u; 228 ~(uintptr_t)0x0fffffffu) + 0x08000000u;
229#else 229#else
230 uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; 230 uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff;
231#endif 231#endif
diff --git a/src/lj_snap.c b/src/lj_snap.c
index 2eb40a8d..bb063c2b 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -723,8 +723,9 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
723#else 723#else
724 if (LJ_BE && sz == 4) src++; 724 if (LJ_BE && sz == 4) src++;
725#endif 725#endif
726 } 726 } else
727#endif 727#endif
728 if (LJ_64 && LJ_BE && sz == 4) src++;
728 } 729 }
729 } 730 }
730 lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); 731 lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h
index 1b061943..740687b3 100644
--- a/src/lj_target_mips.h
+++ b/src/lj_target_mips.h
@@ -81,7 +81,7 @@ enum {
81 RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP)) 81 RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP))
82#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) 82#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
83#if LJ_SOFTFP 83#if LJ_SOFTFP
84#define RSET_FPR 0 84#define RSET_FPR 0
85#else 85#else
86#if LJ_32 86#if LJ_32
87#define RSET_FPR \ 87#define RSET_FPR \
@@ -90,11 +90,11 @@ enum {
90 RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\ 90 RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\
91 RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30)) 91 RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30))
92#else 92#else
93#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) 93#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
94#endif 94#endif
95#endif 95#endif
96#define RSET_ALL (RSET_GPR|RSET_FPR) 96#define RSET_ALL (RSET_GPR|RSET_FPR)
97#define RSET_INIT RSET_ALL 97#define RSET_INIT RSET_ALL
98 98
99#define RSET_SCRATCH_GPR \ 99#define RSET_SCRATCH_GPR \
100 (RSET_RANGE(RID_R1, RID_R15+1)|\ 100 (RSET_RANGE(RID_R1, RID_R15+1)|\
@@ -192,8 +192,12 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p)
192#define MIPSF_F(r) ((r) << 6) 192#define MIPSF_F(r) ((r) << 6)
193#define MIPSF_A(n) ((n) << 6) 193#define MIPSF_A(n) ((n) << 6)
194#define MIPSF_M(n) ((n) << 11) 194#define MIPSF_M(n) ((n) << 11)
195#define MIPSF_L(n) ((n) << 6)
195 196
196typedef enum MIPSIns { 197typedef enum MIPSIns {
198 MIPSI_D = 0x38,
199 MIPSI_DV = 0x10,
200 MIPSI_D32 = 0x3c,
197 /* Integer instructions. */ 201 /* Integer instructions. */
198 MIPSI_MOVE = 0x00000025, 202 MIPSI_MOVE = 0x00000025,
199 MIPSI_NOP = 0x00000000, 203 MIPSI_NOP = 0x00000000,
@@ -202,22 +206,27 @@ typedef enum MIPSIns {
202 MIPSI_LU = 0x34000000, 206 MIPSI_LU = 0x34000000,
203 MIPSI_LUI = 0x3c000000, 207 MIPSI_LUI = 0x3c000000,
204 208
205 MIPSI_ADDIU = 0x24000000, 209 MIPSI_AND = 0x00000024,
206 MIPSI_ANDI = 0x30000000, 210 MIPSI_ANDI = 0x30000000,
211 MIPSI_OR = 0x00000025,
207 MIPSI_ORI = 0x34000000, 212 MIPSI_ORI = 0x34000000,
213 MIPSI_XOR = 0x00000026,
208 MIPSI_XORI = 0x38000000, 214 MIPSI_XORI = 0x38000000,
215 MIPSI_NOR = 0x00000027,
216
217 MIPSI_SLT = 0x0000002a,
218 MIPSI_SLTU = 0x0000002b,
209 MIPSI_SLTI = 0x28000000, 219 MIPSI_SLTI = 0x28000000,
210 MIPSI_SLTIU = 0x2c000000, 220 MIPSI_SLTIU = 0x2c000000,
211 221
212 MIPSI_ADDU = 0x00000021, 222 MIPSI_ADDU = 0x00000021,
223 MIPSI_ADDIU = 0x24000000,
224 MIPSI_SUB = 0x00000022,
213 MIPSI_SUBU = 0x00000023, 225 MIPSI_SUBU = 0x00000023,
214 MIPSI_MUL = 0x70000002, 226 MIPSI_MUL = 0x70000002,
215 MIPSI_AND = 0x00000024, 227 MIPSI_DIV = 0x0000001a,
216 MIPSI_OR = 0x00000025, 228 MIPSI_DIVU = 0x0000001b,
217 MIPSI_XOR = 0x00000026, 229
218 MIPSI_NOR = 0x00000027,
219 MIPSI_SLT = 0x0000002a,
220 MIPSI_SLTU = 0x0000002b,
221 MIPSI_MOVZ = 0x0000000a, 230 MIPSI_MOVZ = 0x0000000a,
222 MIPSI_MOVN = 0x0000000b, 231 MIPSI_MOVN = 0x0000000b,
223 MIPSI_MFHI = 0x00000010, 232 MIPSI_MFHI = 0x00000010,
@@ -228,14 +237,18 @@ typedef enum MIPSIns {
228 MIPSI_SRL = 0x00000002, 237 MIPSI_SRL = 0x00000002,
229 MIPSI_SRA = 0x00000003, 238 MIPSI_SRA = 0x00000003,
230 MIPSI_ROTR = 0x00200002, /* MIPSXXR2 */ 239 MIPSI_ROTR = 0x00200002, /* MIPSXXR2 */
240 MIPSI_DROTR = 0x0020003a,
241 MIPSI_DROTR32 = 0x0020003e,
231 MIPSI_SLLV = 0x00000004, 242 MIPSI_SLLV = 0x00000004,
232 MIPSI_SRLV = 0x00000006, 243 MIPSI_SRLV = 0x00000006,
233 MIPSI_SRAV = 0x00000007, 244 MIPSI_SRAV = 0x00000007,
234 MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */ 245 MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */
246 MIPSI_DROTRV = 0x00000056,
235 247
236 MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */ 248 MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */
237 MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */ 249 MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */
238 MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */ 250 MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */
251 MIPSI_DSBH = 0x7c0000a4,
239 252
240 MIPSI_B = 0x10000000, 253 MIPSI_B = 0x10000000,
241 MIPSI_J = 0x08000000, 254 MIPSI_J = 0x08000000,
@@ -253,7 +266,9 @@ typedef enum MIPSIns {
253 266
254 /* Load/store instructions. */ 267 /* Load/store instructions. */
255 MIPSI_LW = 0x8c000000, 268 MIPSI_LW = 0x8c000000,
269 MIPSI_LD = 0xdc000000,
256 MIPSI_SW = 0xac000000, 270 MIPSI_SW = 0xac000000,
271 MIPSI_SD = 0xfc000000,
257 MIPSI_LB = 0x80000000, 272 MIPSI_LB = 0x80000000,
258 MIPSI_SB = 0xa0000000, 273 MIPSI_SB = 0xa0000000,
259 MIPSI_LH = 0x84000000, 274 MIPSI_LH = 0x84000000,
@@ -266,13 +281,48 @@ typedef enum MIPSIns {
266 MIPSI_SDC1 = 0xf4000000, 281 MIPSI_SDC1 = 0xf4000000,
267 282
268 /* MIPS64 instructions. */ 283 /* MIPS64 instructions. */
269 MIPSI_DSLL = 0x00000038, 284 MIPSI_DADD = 0x0000002c,
270 MIPSI_LD = 0xdc000000, 285 MIPSI_DADDI = 0x60000000,
286 MIPSI_DADDU = 0x0000002d,
271 MIPSI_DADDIU = 0x64000000, 287 MIPSI_DADDIU = 0x64000000,
272 MIPSI_SD = 0xfc000000, 288 MIPSI_DSUB = 0x0000002e,
273 MIPSI_DMFC1 = 0x44200000, 289 MIPSI_DSUBU = 0x0000002f,
290 MIPSI_DDIV = 0x0000001e,
291 MIPSI_DDIVU = 0x0000001f,
292 MIPSI_DMULT = 0x0000001c,
293 MIPSI_DMULTU = 0x0000001d,
294
295 MIPSI_DSLL = 0x00000038,
296 MIPSI_DSRL = 0x0000003a,
297 MIPSI_DSLLV = 0x00000014,
298 MIPSI_DSRLV = 0x00000016,
299 MIPSI_DSRA = 0x0000003b,
300 MIPSI_DSRAV = 0x00000017,
274 MIPSI_DSRA32 = 0x0000003f, 301 MIPSI_DSRA32 = 0x0000003f,
275 MIPSI_MFHC1 = 0x44600000, 302 MIPSI_DSLL32 = 0x0000003c,
303 MIPSI_DSRL32 = 0x0000003e,
304 MIPSI_DSHD = 0x7c000164,
305
306 MIPSI_AADDU = LJ_32 ? MIPSI_ADDU : MIPSI_DADDU,
307 MIPSI_AADDIU = LJ_32 ? MIPSI_ADDIU : MIPSI_DADDIU,
308 MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU,
309 MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD,
310 MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD,
311
312 /* Extract/insert instructions. */
313 MIPSI_DEXTM = 0x7c000001,
314 MIPSI_DEXTU = 0x7c000002,
315 MIPSI_DEXT = 0x7c000003,
316 MIPSI_DINSM = 0x7c000005,
317 MIPSI_DINSU = 0x7c000006,
318 MIPSI_DINS = 0x7c000007,
319
320 MIPSI_RINT_D = 0x4620001a,
321 MIPSI_RINT_S = 0x4600001a,
322 MIPSI_RINT = 0x4400001a,
323 MIPSI_FLOOR_D = 0x4620000b,
324 MIPSI_CEIL_D = 0x4620000a,
325 MIPSI_ROUND_D = 0x46200008,
276 326
277 /* FP instructions. */ 327 /* FP instructions. */
278 MIPSI_MOV_S = 0x46000006, 328 MIPSI_MOV_S = 0x46000006,
@@ -298,24 +348,30 @@ typedef enum MIPSIns {
298 MIPSI_CVT_W_D = 0x46200024, 348 MIPSI_CVT_W_D = 0x46200024,
299 MIPSI_CVT_S_W = 0x46800020, 349 MIPSI_CVT_S_W = 0x46800020,
300 MIPSI_CVT_D_W = 0x46800021, 350 MIPSI_CVT_D_W = 0x46800021,
351 MIPSI_CVT_S_L = 0x46a00020,
352 MIPSI_CVT_D_L = 0x46a00021,
301 353
302 MIPSI_TRUNC_W_S = 0x4600000d, 354 MIPSI_TRUNC_W_S = 0x4600000d,
303 MIPSI_TRUNC_W_D = 0x4620000d, 355 MIPSI_TRUNC_W_D = 0x4620000d,
356 MIPSI_TRUNC_L_S = 0x46000009,
357 MIPSI_TRUNC_L_D = 0x46200009,
304 MIPSI_FLOOR_W_S = 0x4600000f, 358 MIPSI_FLOOR_W_S = 0x4600000f,
305 MIPSI_FLOOR_W_D = 0x4620000f, 359 MIPSI_FLOOR_W_D = 0x4620000f,
306 360
307 MIPSI_MFC1 = 0x44000000, 361 MIPSI_MFC1 = 0x44000000,
308 MIPSI_MTC1 = 0x44800000, 362 MIPSI_MTC1 = 0x44800000,
363 MIPSI_DMTC1 = 0x44a00000,
364 MIPSI_DMFC1 = 0x44200000,
309 365
310 MIPSI_BC1F = 0x45000000, 366 MIPSI_BC1F = 0x45000000,
311 MIPSI_BC1T = 0x45010000, 367 MIPSI_BC1T = 0x45010000,
312 368
313 MIPSI_C_EQ_D = 0x46200032, 369 MIPSI_C_EQ_D = 0x46200032,
370 MIPSI_C_OLT_S = 0x46000034,
314 MIPSI_C_OLT_D = 0x46200034, 371 MIPSI_C_OLT_D = 0x46200034,
315 MIPSI_C_ULT_D = 0x46200035, 372 MIPSI_C_ULT_D = 0x46200035,
316 MIPSI_C_OLE_D = 0x46200036, 373 MIPSI_C_OLE_D = 0x46200036,
317 MIPSI_C_ULE_D = 0x46200037, 374 MIPSI_C_ULE_D = 0x46200037,
318
319} MIPSIns; 375} MIPSIns;
320 376
321#endif 377#endif
diff --git a/src/lj_trace.c b/src/lj_trace.c
index 4cd925ed..80a7f024 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -319,13 +319,15 @@ void lj_trace_initstate(global_State *g)
319 /* Initialize 32/64 bit constants. */ 319 /* Initialize 32/64 bit constants. */
320#if LJ_TARGET_X86ORX64 320#if LJ_TARGET_X86ORX64
321 J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000); 321 J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000);
322 J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
323 J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
324#if LJ_32 322#if LJ_32
325 J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000); 323 J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000);
326#endif 324#endif
325 J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
327 J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000; 326 J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000;
328#endif 327#endif
328#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64
329 J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
330#endif
329#if LJ_TARGET_PPC 331#if LJ_TARGET_PPC
330 J->k32[LJ_K32_2P52_2P31] = 0x59800004; 332 J->k32[LJ_K32_2P52_2P31] = 0x59800004;
331 J->k32[LJ_K32_2P52] = 0x59800000; 333 J->k32[LJ_K32_2P52] = 0x59800000;
@@ -335,6 +337,11 @@ void lj_trace_initstate(global_State *g)
335#endif 337#endif
336#if LJ_TARGET_MIPS 338#if LJ_TARGET_MIPS
337 J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000); 339 J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
340#if LJ_64
341 J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000);
342 J->k32[LJ_K32_2P63] = 0x5f000000;
343 J->k32[LJ_K32_M2P64] = 0xdf800000;
344#endif
338#endif 345#endif
339} 346}
340 347
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
index c518c306..f0c22a74 100644
--- a/src/vm_mips64.dasc
+++ b/src/vm_mips64.dasc
@@ -327,7 +327,13 @@
327|.macro jmp_extern; jr CFUNCADDR; .endmacro 327|.macro jmp_extern; jr CFUNCADDR; .endmacro
328| 328|
329|.macro hotcheck, delta, target 329|.macro hotcheck, delta, target
330| NYI 330| dsrl TMP1, PC, 1
331| andi TMP1, TMP1, 126
332| daddu TMP1, TMP1, DISPATCH
333| lhu TMP2, GG_DISP2HOT(TMP1)
334| addiu TMP2, TMP2, -delta
335| bltz TMP2, target
336|. sh TMP2, GG_DISP2HOT(TMP1)
331|.endmacro 337|.endmacro
332| 338|
333|.macro hotloop 339|.macro hotloop
@@ -2150,7 +2156,21 @@ static void build_subroutines(BuildCtx *ctx)
2150 |//----------------------------------------------------------------------- 2156 |//-----------------------------------------------------------------------
2151 | 2157 |
2152 |->vm_record: // Dispatch target for recording phase. 2158 |->vm_record: // Dispatch target for recording phase.
2153 | NYI 2159 |.if JIT
2160 | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
2161 | andi AT, TMP3, HOOK_VMEVENT // No recording while in vmevent.
2162 | bnez AT, >5
2163 | // Decrement the hookcount for consistency, but always do the call.
2164 |. lw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
2165 | andi AT, TMP3, HOOK_ACTIVE
2166 | bnez AT, >1
2167 |. addiu TMP2, TMP2, -1
2168 | andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
2169 | beqz AT, >1
2170 |. nop
2171 | b >1
2172 |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
2173 |.endif
2154 | 2174 |
2155 |->vm_rethook: // Dispatch target for return hooks. 2175 |->vm_rethook: // Dispatch target for return hooks.
2156 | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) 2176 | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
@@ -2201,7 +2221,25 @@ static void build_subroutines(BuildCtx *ctx)
2201 |. lw MULTRES, -24+LO(RB) // Restore MULTRES for *M ins. 2221 |. lw MULTRES, -24+LO(RB) // Restore MULTRES for *M ins.
2202 | 2222 |
2203 |->vm_hotloop: // Hot loop counter underflow. 2223 |->vm_hotloop: // Hot loop counter underflow.
2204 | NYI 2224 |.if JIT
2225 | ld LFUNC:TMP1, FRAME_FUNC(BASE)
2226 | daddiu CARG1, DISPATCH, GG_DISP2J
2227 | cleartp LFUNC:TMP1
2228 | sd PC, SAVE_PC
2229 | ld TMP1, LFUNC:TMP1->pc
2230 | move CARG2, PC
2231 | sd L, DISPATCH_J(L)(DISPATCH)
2232 | lbu TMP1, PC2PROTO(framesize)(TMP1)
2233 | load_got lj_trace_hot
2234 | sd BASE, L->base
2235 | dsll TMP1, TMP1, 3
2236 | daddu TMP1, BASE, TMP1
2237 | call_intern lj_trace_hot // (jit_State *J, const BCIns *pc)
2238 |. sd TMP1, L->top
2239 | b <3
2240 |. nop
2241 |.endif
2242 |
2205 | 2243 |
2206 |->vm_callhook: // Dispatch target for call hooks. 2244 |->vm_callhook: // Dispatch target for call hooks.
2207 |.if JIT 2245 |.if JIT
@@ -2235,21 +2273,69 @@ static void build_subroutines(BuildCtx *ctx)
2235 | 2273 |
2236 |->cont_stitch: // Trace stitching. 2274 |->cont_stitch: // Trace stitching.
2237 |.if JIT 2275 |.if JIT
2238 | NYI 2276 | // RA = resultptr, RB = meta base
2277 | lw INS, -4(PC)
2278 | ld TRACE:TMP2, -40(RB) // Save previous trace.
2279 | decode_RA8a RC, INS
2280 | daddiu AT, MULTRES, -8
2281 | cleartp TRACE:TMP2
2282 | decode_RA8b RC
2283 | beqz AT, >2
2284 |. daddu RC, BASE, RC // Call base.
2285 |1: // Move results down.
2286 | ld CARG1, 0(RA)
2287 | daddiu AT, AT, -8
2288 | daddiu RA, RA, 8
2289 | sd CARG1, 0(RC)
2290 | bnez AT, <1
2291 |. daddiu RC, RC, 8
2292 |2:
2293 | decode_RA8a RA, INS
2294 | decode_RB8a RB, INS
2295 | decode_RA8b RA
2296 | decode_RB8b RB
2297 | daddu RA, RA, RB
2298 | daddu RA, BASE, RA
2299 |3:
2300 | sltu AT, RC, RA
2301 | bnez AT, >9 // More results wanted?
2302 |. nop
2303 |
2304 | lhu TMP3, TRACE:TMP2->traceno
2305 | lhu RD, TRACE:TMP2->link
2306 | beq RD, TMP3, ->cont_nop // Blacklisted.
2307 |. load_got lj_dispatch_stitch
2308 | bnez RD, =>BC_JLOOP // Jump to stitched trace.
2309 |. sll RD, RD, 3
2310 |
2311 | // Stitch a new trace to the previous trace.
2312 | sw TMP3, DISPATCH_J(exitno)(DISPATCH)
2313 | sd L, DISPATCH_J(L)(DISPATCH)
2314 | sd BASE, L->base
2315 | daddiu CARG1, DISPATCH, GG_DISP2J
2316 | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2317 |. move CARG2, PC
2318 | b ->cont_nop
2319 |. ld BASE, L->base
2320 |
2321 |9:
2322 | sd TISNIL, 0(RC)
2323 | b <3
2324 |. daddiu RC, RC, 8
2239 |.endif 2325 |.endif
2240 | 2326 |
2241 |->vm_profhook: // Dispatch target for profiler hook. 2327 |->vm_profhook: // Dispatch target for profiler hook.
2242#if LJ_HASPROFILE 2328#if LJ_HASPROFILE
2243 | load_got lj_dispatch_profile 2329 | load_got lj_dispatch_profile
2244 | sw MULTRES, SAVE_MULTRES 2330 | sd MULTRES, SAVE_MULTRES
2245 | move CARG2, PC 2331 | move CARG2, PC
2246 | sw BASE, L->base 2332 | sd BASE, L->base
2247 | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) 2333 | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2248 |. move CARG1, L 2334 |. move CARG1, L
2249 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. 2335 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2250 | daddiu PC, PC, -4 2336 | daddiu PC, PC, -4
2251 | b ->cont_nop 2337 | b ->cont_nop
2252 |. lw BASE, L->base 2338 |. ld BASE, L->base
2253#endif 2339#endif
2254 | 2340 |
2255 |//----------------------------------------------------------------------- 2341 |//-----------------------------------------------------------------------
@@ -2259,6 +2345,7 @@ static void build_subroutines(BuildCtx *ctx)
2259 |.macro savex_, a, b 2345 |.macro savex_, a, b
2260 |.if FPU 2346 |.if FPU
2261 | sdc1 f..a, a*8(sp) 2347 | sdc1 f..a, a*8(sp)
2348 | sdc1 f..b, b*8(sp)
2262 | sd r..a, 32*8+a*8(sp) 2349 | sd r..a, 32*8+a*8(sp)
2263 | sd r..b, 32*8+b*8(sp) 2350 | sd r..b, 32*8+b*8(sp)
2264 |.else 2351 |.else
@@ -2269,11 +2356,124 @@ static void build_subroutines(BuildCtx *ctx)
2269 | 2356 |
2270 |->vm_exit_handler: 2357 |->vm_exit_handler:
2271 |.if JIT 2358 |.if JIT
2272 | NYI 2359 |.if FPU
2360 | daddiu sp, sp, -(32*8+32*8)
2361 |.else
2362 | daddiu sp, sp, -(32*8)
2363 |.endif
2364 | savex_ 0, 1
2365 | savex_ 2, 3
2366 | savex_ 4, 5
2367 | savex_ 6, 7
2368 | savex_ 8, 9
2369 | savex_ 10, 11
2370 | savex_ 12, 13
2371 | savex_ 14, 15
2372 | savex_ 16, 17
2373 | savex_ 18, 19
2374 | savex_ 20, 21
2375 | savex_ 22, 23
2376 | savex_ 24, 25
2377 | savex_ 26, 27
2378 | savex_ 28, 30
2379 |.if FPU
2380 | sdc1 f29, 29*8(sp)
2381 | sdc1 f31, 31*8(sp)
2382 | sd r0, 32*8+31*8(sp) // Clear RID_TMP.
2383 | daddiu TMP2, sp, 32*8+32*8 // Recompute original value of sp.
2384 | sd TMP2, 32*8+29*8(sp) // Store sp in RID_SP
2385 |.else
2386 | sd r0, 31*8(sp) // Clear RID_TMP.
2387 | daddiu TMP2, sp, 32*8 // Recompute original value of sp.
2388 | sd TMP2, 29*8(sp) // Store sp in RID_SP
2389 |.endif
2390 | li_vmstate EXIT
2391 | daddiu DISPATCH, JGL, -GG_DISP2G-32768
2392 | lw TMP1, 0(TMP2) // Load exit number.
2393 | st_vmstate
2394 | ld L, DISPATCH_GL(cur_L)(DISPATCH)
2395 | ld BASE, DISPATCH_GL(jit_base)(DISPATCH)
2396 | load_got lj_trace_exit
2397 | sd L, DISPATCH_J(L)(DISPATCH)
2398 | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number.
2399 | sd BASE, L->base
2400 | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number.
2401 | daddiu CARG1, DISPATCH, GG_DISP2J
2402 | sd r0, DISPATCH_GL(jit_base)(DISPATCH)
2403 | call_intern lj_trace_exit // (jit_State *J, ExitState *ex)
2404 |. move CARG2, sp
2405 | // Returns MULTRES (unscaled) or negated error code.
2406 | ld TMP1, L->cframe
2407 | li AT, -4
2408 | ld BASE, L->base
2409 | and sp, TMP1, AT
2410 | ld PC, SAVE_PC // Get SAVE_PC.
2411 | b >1
2412 |. sd L, SAVE_L // Set SAVE_L (on-trace resume/yield).
2273 |.endif 2413 |.endif
2274 |->vm_exit_interp: 2414 |->vm_exit_interp:
2275 |.if JIT 2415 |.if JIT
2276 | NYI 2416 | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
2417 | ld L, SAVE_L
2418 | daddiu DISPATCH, JGL, -GG_DISP2G-32768
2419 | sd BASE, L->base
2420 |1:
2421 | bltz CRET1, >9 // Check for error from exit.
2422 |. ld LFUNC:RB, FRAME_FUNC(BASE)
2423 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2424 | dsll MULTRES, CRET1, 3
2425 | cleartp LFUNC:RB
2426 | sd MULTRES, SAVE_MULTRES
2427 | li TISNIL, LJ_TNIL
2428 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2429 | .FPU mtc1 TMP3, TOBIT
2430 | ld TMP1, LFUNC:RB->pc
2431 | sd r0, DISPATCH_GL(jit_base)(DISPATCH)
2432 | ld KBASE, PC2PROTO(k)(TMP1)
2433 | .FPU cvt.d.s TOBIT, TOBIT
2434 | // Modified copy of ins_next which handles function header dispatch, too.
2435 | lw INS, 0(PC)
2436 | daddiu PC, PC, 4
2437 | // Assumes TISNIL == ~LJ_VMST_INTERP == -1
2438 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
2439 | decode_OP8a TMP1, INS
2440 | decode_OP8b TMP1
2441 | sltiu TMP2, TMP1, BC_FUNCF*8
2442 | daddu TMP0, DISPATCH, TMP1
2443 | decode_RD8a RD, INS
2444 | ld AT, 0(TMP0)
2445 | decode_RA8a RA, INS
2446 | beqz TMP2, >2
2447 |. decode_RA8b RA
2448 | jr AT
2449 |. decode_RD8b RD
2450 |2:
2451 | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function?
2452 | bnez TMP2, >3
2453 |. ld TMP1, FRAME_PC(BASE)
2454 | // Check frame below fast function.
2455 | andi TMP0, TMP1, FRAME_TYPE
2456 | bnez TMP0, >3 // Trace stitching continuation?
2457 |. nop
2458 | // Otherwise set KBASE for Lua function below fast function.
2459 | lw TMP2, -4(TMP1)
2460 | decode_RA8a TMP0, TMP2
2461 | decode_RA8b TMP0
2462 | dsubu TMP1, BASE, TMP0
2463 | ld LFUNC:TMP2, -32(TMP1)
2464 | cleartp LFUNC:TMP2
2465 | ld TMP1, LFUNC:TMP2->pc
2466 | ld KBASE, PC2PROTO(k)(TMP1)
2467 |3:
2468 | daddiu RC, MULTRES, -8
2469 | jr AT
2470 |. daddu RA, RA, BASE
2471 |
2472 |9: // Rethrow error from the right C frame.
2473 | load_got lj_err_throw
2474 | negu CARG2, CRET1
2475 | call_intern lj_err_throw // (lua_State *L, int errcode)
2476 |. move CARG1, L
2277 |.endif 2477 |.endif
2278 | 2478 |
2279 |//----------------------------------------------------------------------- 2479 |//-----------------------------------------------------------------------
@@ -4013,7 +4213,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4013 | ins_next2 4213 | ins_next2
4014 | 4214 |
4015 |7: // Possible table write barrier for the value. Skip valiswhite check. 4215 |7: // Possible table write barrier for the value. Skip valiswhite check.
4016 | barrierback TAB:RB, TMP3, TMP0, <2 4216 | barrierback TAB:CARG2, TMP3, TMP0, <2
4017 break; 4217 break;
4018 4218
4019 case BC_TSETM: 4219 case BC_TSETM:
@@ -4632,7 +4832,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4632 4832
4633 case BC_JLOOP: 4833 case BC_JLOOP:
4634 |.if JIT 4834 |.if JIT
4635 | NYI 4835 | // RA = base*8 (ignored), RD = traceno*8
4836 | ld TMP1, DISPATCH_J(trace)(DISPATCH)
4837 | li AT, 0
4838 | daddu TMP1, TMP1, RD
4839 | // Traces on MIPS don't store the trace number, so use 0.
4840 | sd AT, DISPATCH_GL(vmstate)(DISPATCH)
4841 | ld TRACE:TMP2, 0(TMP1)
4842 | sd BASE, DISPATCH_GL(jit_base)(DISPATCH)
4843 | ld TMP2, TRACE:TMP2->mcode
4844 | sd L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
4845 | jr TMP2
4846 |. daddiu JGL, DISPATCH, GG_DISP2G+32768
4636 |.endif 4847 |.endif
4637 break; 4848 break;
4638 4849
@@ -4694,10 +4905,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4694 4905
4695 case BC_IFUNCV: 4906 case BC_IFUNCV:
4696 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 4907 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
4908 | li TMP0, LJ_TFUNC
4697 | daddu TMP1, BASE, RC 4909 | daddu TMP1, BASE, RC
4698 | ld TMP2, L->maxstack 4910 | ld TMP2, L->maxstack
4911 | settp LFUNC:RB, TMP0
4699 | daddu TMP0, RA, RC 4912 | daddu TMP0, RA, RC
4700 | sd LFUNC:RB, 0(TMP1) // Store (untagged) copy of LFUNC. 4913 | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC.
4701 | daddiu TMP3, RC, 16+FRAME_VARG 4914 | daddiu TMP3, RC, 16+FRAME_VARG
4702 | sltu AT, TMP0, TMP2 4915 | sltu AT, TMP0, TMP2
4703 | ld KBASE, -4+PC2PROTO(k)(PC) 4916 | ld KBASE, -4+PC2PROTO(k)(PC)